From 6c9567e0850be2f0f94ab64fa6512413fd1a1eb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 17 Feb 2025 14:13:56 +0100 Subject: [PATCH 0001/2924] KVM: s390: Don't use %pK through tracepoints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restricted pointers ("%pK") are not meant to be used through TP_format(). It can unintentionally expose security sensitive, raw pointer values. Use regular pointer formatting instead. Link: https://lore.kernel.org/lkml/20250113171731-dc10e3c1-da64-4af0-b767-7c7070468023@linutronix.de/ Signed-off-by: Thomas Weißschuh Reviewed-by: Michael Mueller Link: https://lore.kernel.org/r/20250217-restricted-pointers-s390-v1-1-0e4ace75d8aa@linutronix.de Signed-off-by: Janosch Frank Message-ID: <20250217-restricted-pointers-s390-v1-1-0e4ace75d8aa@linutronix.de> --- arch/s390/kvm/trace-s390.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h index 9ac92dbf680dbb..9e28f165c114ca 100644 --- a/arch/s390/kvm/trace-s390.h +++ b/arch/s390/kvm/trace-s390.h @@ -56,7 +56,7 @@ TRACE_EVENT(kvm_s390_create_vcpu, __entry->sie_block = sie_block; ), - TP_printk("create cpu %d at 0x%pK, sie block at 0x%pK", + TP_printk("create cpu %d at 0x%p, sie block at 0x%p", __entry->id, __entry->vcpu, __entry->sie_block) ); @@ -255,7 +255,7 @@ TRACE_EVENT(kvm_s390_enable_css, __entry->kvm = kvm; ), - TP_printk("enabling channel I/O support (kvm @ %pK)\n", + TP_printk("enabling channel I/O support (kvm @ %p)\n", __entry->kvm) ); From 0c7fbae5bc782429c97d68dc40fb126748d7e352 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 17 Feb 2025 14:13:57 +0100 Subject: [PATCH 0002/2924] KVM: s390: Don't use %pK through debug printing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restricted pointers ("%pK") are only meant to be used when directly printing to a file from task context. Otherwise it can unintentionally expose security sensitive, raw pointer values. Use regular pointer formatting instead. Link: https://lore.kernel.org/lkml/20250113171731-dc10e3c1-da64-4af0-b767-7c7070468023@linutronix.de/ Signed-off-by: Thomas Weißschuh Reviewed-by: Michael Mueller Tested-by: Michael Mueller Link: https://lore.kernel.org/r/20250217-restricted-pointers-s390-v1-2-0e4ace75d8aa@linutronix.de Signed-off-by: Janosch Frank Message-ID: <20250217-restricted-pointers-s390-v1-2-0e4ace75d8aa@linutronix.de> --- arch/s390/kvm/intercept.c | 2 +- arch/s390/kvm/interrupt.c | 8 ++++---- arch/s390/kvm/kvm-s390.c | 10 +++++----- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 610dd44a948b22..a06a000f196ce0 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -95,7 +95,7 @@ static int handle_validity(struct kvm_vcpu *vcpu) vcpu->stat.exit_validity++; trace_kvm_s390_intercept_validity(vcpu, viwhy); - KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%pK)", viwhy, + KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%p)", viwhy, current->pid, vcpu->kvm); /* do not warn on invalid runtime instrumentation mode */ diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 07ff0e10cb7f5c..c0558f05400732 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -3161,7 +3161,7 @@ void kvm_s390_gisa_clear(struct kvm *kvm) if (!gi->origin) return; gisa_clear_ipm(gi->origin); - VM_EVENT(kvm, 3, "gisa 0x%pK cleared", gi->origin); + VM_EVENT(kvm, 3, "gisa 0x%p cleared", gi->origin); } void kvm_s390_gisa_init(struct kvm *kvm) @@ -3178,7 +3178,7 @@ void kvm_s390_gisa_init(struct kvm *kvm) gi->timer.function = gisa_vcpu_kicker; memset(gi->origin, 0, sizeof(struct kvm_s390_gisa)); gi->origin->next_alert = (u32)virt_to_phys(gi->origin); - VM_EVENT(kvm, 3, "gisa 0x%pK initialized", gi->origin); + VM_EVENT(kvm, 3, "gisa 0x%p initialized", gi->origin); } void kvm_s390_gisa_enable(struct kvm *kvm) @@ -3219,7 +3219,7 @@ void kvm_s390_gisa_destroy(struct kvm *kvm) process_gib_alert_list(); hrtimer_cancel(&gi->timer); gi->origin = NULL; - VM_EVENT(kvm, 3, "gisa 0x%pK destroyed", gisa); + VM_EVENT(kvm, 3, "gisa 0x%p destroyed", gisa); } void kvm_s390_gisa_disable(struct kvm *kvm) @@ -3468,7 +3468,7 @@ int __init kvm_s390_gib_init(u8 nisc) } } - KVM_EVENT(3, "gib 0x%pK (nisc=%d) initialized", gib, gib->nisc); + KVM_EVENT(3, "gib 0x%p (nisc=%d) initialized", gib, gib->nisc); goto out; out_unreg_gal: diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ebecb96bacce7d..9e427ba3aed42e 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1020,7 +1020,7 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att } mutex_unlock(&kvm->lock); VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); - VM_EVENT(kvm, 3, "New guest asce: 0x%pK", + VM_EVENT(kvm, 3, "New guest asce: 0x%p", (void *) kvm->arch.gmap->asce); break; } @@ -3464,7 +3464,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_s390_gisa_init(kvm); INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup); kvm->arch.pv.set_aside = NULL; - KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); + KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid); return 0; out_err: @@ -3527,7 +3527,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_s390_destroy_adapters(kvm); kvm_s390_clear_float_irqs(kvm); kvm_s390_vsie_destroy(kvm); - KVM_EVENT(3, "vm 0x%pK destroyed", kvm); + KVM_EVENT(3, "vm 0x%p destroyed", kvm); } /* Section: vcpu related */ @@ -3648,7 +3648,7 @@ static int sca_switch_to_extended(struct kvm *kvm) free_page((unsigned long)old_sca); - VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", + VM_EVENT(kvm, 2, "Switched to ESCA (0x%p -> 0x%p)", old_sca, kvm->arch.sca); return 0; } @@ -4025,7 +4025,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) goto out_free_sie_block; } - VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", + VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%p, sie block at 0x%p", vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); From 8aa580cd92843b60d4d6331f3b0a9e8409bb70eb Mon Sep 17 00:00:00 2001 From: Xingui Yang Date: Wed, 12 Mar 2025 17:51:34 +0800 Subject: [PATCH 0003/2924] scsi: hisi_sas: Enable force phy when SATA disk directly connected when a SATA disk is directly connected the SAS controller determines the disk to which I/Os are delivered based on the port ID in the DQ entry. When many phys are disconnected and reconnect, the port ID of phys were changed and used by other link, resulting in I/O being sent to incorrect disk. Data inconsistency on the SATA disk may occur during I/O retries using the old port ID. So enable force phy, then force the command to be executed in a certain phy, and if the actual phy ID of the port does not match the phy configured in the command, the chip will stop delivering the I/O to disk. Fixes: ce60689e12dd ("scsi: hisi_sas: add v3 code to send ATA frame") Signed-off-by: Xingui Yang Link: https://lore.kernel.org/r/20250312095135.3048379-2-yangxingui@huawei.com Reviewed-by: Yihang Li Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 9 +++++++-- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 14 ++++++++++++-- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c index 04ee02797ca3fe..be2c0b628595c6 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c @@ -2501,6 +2501,7 @@ static void prep_ata_v2_hw(struct hisi_hba *hisi_hba, struct hisi_sas_port *port = to_hisi_sas_port(sas_port); struct sas_ata_task *ata_task = &task->ata_task; struct sas_tmf_task *tmf = slot->tmf; + int phy_id; u8 *buf_cmd; int has_data = 0, hdr_tag = 0; u32 dw0, dw1 = 0, dw2 = 0; @@ -2508,10 +2509,14 @@ static void prep_ata_v2_hw(struct hisi_hba *hisi_hba, /* create header */ /* dw0 */ dw0 = port->id << CMD_HDR_PORT_OFF; - if (parent_dev && dev_is_expander(parent_dev->dev_type)) + if (parent_dev && dev_is_expander(parent_dev->dev_type)) { dw0 |= 3 << CMD_HDR_CMD_OFF; - else + } else { + phy_id = device->phy->identify.phy_identifier; + dw0 |= (1U << phy_id) << CMD_HDR_PHY_ID_OFF; + dw0 |= CMD_HDR_FORCE_PHY_MSK; dw0 |= 4 << CMD_HDR_CMD_OFF; + } if (tmf && ata_task->force_phy) { dw0 |= CMD_HDR_FORCE_PHY_MSK; diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 095bbf80c34efb..6a0656f3b596cc 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -359,6 +359,10 @@ #define CMD_HDR_RESP_REPORT_MSK (0x1 << CMD_HDR_RESP_REPORT_OFF) #define CMD_HDR_TLR_CTRL_OFF 6 #define CMD_HDR_TLR_CTRL_MSK (0x3 << CMD_HDR_TLR_CTRL_OFF) +#define CMD_HDR_PHY_ID_OFF 8 +#define CMD_HDR_PHY_ID_MSK (0x1ff << CMD_HDR_PHY_ID_OFF) +#define CMD_HDR_FORCE_PHY_OFF 17 +#define CMD_HDR_FORCE_PHY_MSK (0x1U << CMD_HDR_FORCE_PHY_OFF) #define CMD_HDR_PORT_OFF 18 #define CMD_HDR_PORT_MSK (0xf << CMD_HDR_PORT_OFF) #define CMD_HDR_PRIORITY_OFF 27 @@ -1429,15 +1433,21 @@ static void prep_ata_v3_hw(struct hisi_hba *hisi_hba, struct hisi_sas_cmd_hdr *hdr = slot->cmd_hdr; struct asd_sas_port *sas_port = device->port; struct hisi_sas_port *port = to_hisi_sas_port(sas_port); + int phy_id; u8 *buf_cmd; int has_data = 0, hdr_tag = 0; u32 dw1 = 0, dw2 = 0; hdr->dw0 = cpu_to_le32(port->id << CMD_HDR_PORT_OFF); - if (parent_dev && dev_is_expander(parent_dev->dev_type)) + if (parent_dev && dev_is_expander(parent_dev->dev_type)) { hdr->dw0 |= cpu_to_le32(3 << CMD_HDR_CMD_OFF); - else + } else { + phy_id = device->phy->identify.phy_identifier; + hdr->dw0 |= cpu_to_le32((1U << phy_id) + << CMD_HDR_PHY_ID_OFF); + hdr->dw0 |= CMD_HDR_FORCE_PHY_MSK; hdr->dw0 |= cpu_to_le32(4U << CMD_HDR_CMD_OFF); + } switch (task->data_dir) { case DMA_TO_DEVICE: From daff37f00c7506ca322ccfce95d342022f06ec58 Mon Sep 17 00:00:00 2001 From: Xingui Yang Date: Wed, 12 Mar 2025 17:51:35 +0800 Subject: [PATCH 0004/2924] scsi: hisi_sas: Fix I/O errors caused by hardware port ID changes The hw port ID of phy may change when inserting disks in batches, causing the port ID in hisi_sas_port and itct to be inconsistent with the hardware, resulting in I/O errors. The solution is to set the device state to gone to intercept I/O sent to the device, and then execute linkreset to discard and find the disk to re-update its information. Signed-off-by: Xingui Yang Link: https://lore.kernel.org/r/20250312095135.3048379-3-yangxingui@huawei.com Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_main.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 3596414d970b24..7a484ad0f9abe6 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -935,8 +935,28 @@ static void hisi_sas_phyup_work_common(struct work_struct *work, container_of(work, typeof(*phy), works[event]); struct hisi_hba *hisi_hba = phy->hisi_hba; struct asd_sas_phy *sas_phy = &phy->sas_phy; + struct asd_sas_port *sas_port = sas_phy->port; + struct hisi_sas_port *port = phy->port; + struct device *dev = hisi_hba->dev; + struct domain_device *port_dev; int phy_no = sas_phy->id; + if (!test_bit(HISI_SAS_RESETTING_BIT, &hisi_hba->flags) && + sas_port && port && (port->id != phy->port_id)) { + dev_info(dev, "phy%d's hw port id changed from %d to %llu\n", + phy_no, port->id, phy->port_id); + port_dev = sas_port->port_dev; + if (port_dev && !dev_is_expander(port_dev->dev_type)) { + /* + * Set the device state to gone to block + * sending IO to the device. + */ + set_bit(SAS_DEV_GONE, &port_dev->state); + hisi_sas_notify_phy_event(phy, HISI_PHYE_LINK_RESET); + return; + } + } + phy->wait_phyup_cnt = 0; if (phy->identify.target_port_protocols == SAS_PROTOCOL_SSP) hisi_hba->hw->sl_notify_ssp(hisi_hba, phy_no); From 20b97acc4cafa2be8ac91a777de135110e58a90b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 14 Mar 2025 15:51:50 -0700 Subject: [PATCH 0005/2924] scsi: ufs: core: Fix a race condition related to device commands There is a TOCTOU race in ufshcd_compl_one_cqe(): hba->dev_cmd.complete may be cleared from another thread after it has been checked and before it is used. Fix this race by moving the device command completion from the stack of the device command submitter into struct ufs_hba. This patch fixes the following kernel crash: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 Call trace: _raw_spin_lock_irqsave+0x34/0x80 complete+0x24/0xb8 ufshcd_compl_one_cqe+0x13c/0x4f0 ufshcd_mcq_poll_cqe_lock+0xb4/0x108 ufshcd_intr+0x2f4/0x444 __handle_irq_event_percpu+0xbc/0x250 handle_irq_event+0x48/0xb0 Fixes: 5a0b0cb9bee7 ("[SCSI] ufs: Add support for sending NOP OUT UPIU") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20250314225206.1487838-1-bvanassche@acm.org Reviewed-by: Peter Wang Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 25 ++++++------------------- include/ufs/ufshcd.h | 2 +- 2 files changed, 7 insertions(+), 20 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 4e1e214fc5a25c..3288a7da73dc70 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -3176,16 +3176,10 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba, int err; retry: - time_left = wait_for_completion_timeout(hba->dev_cmd.complete, + time_left = wait_for_completion_timeout(&hba->dev_cmd.complete, time_left); if (likely(time_left)) { - /* - * The completion handler called complete() and the caller of - * this function still owns the @lrbp tag so the code below does - * not trigger any race conditions. - */ - hba->dev_cmd.complete = NULL; err = ufshcd_get_tr_ocs(lrbp, NULL); if (!err) err = ufshcd_dev_cmd_completion(hba, lrbp); @@ -3199,7 +3193,6 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba, /* successfully cleared the command, retry if needed */ if (ufshcd_clear_cmd(hba, lrbp->task_tag) == 0) err = -EAGAIN; - hba->dev_cmd.complete = NULL; return err; } @@ -3215,11 +3208,9 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba, spin_lock_irqsave(&hba->outstanding_lock, flags); pending = test_bit(lrbp->task_tag, &hba->outstanding_reqs); - if (pending) { - hba->dev_cmd.complete = NULL; + if (pending) __clear_bit(lrbp->task_tag, &hba->outstanding_reqs); - } spin_unlock_irqrestore(&hba->outstanding_lock, flags); if (!pending) { @@ -3237,8 +3228,6 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba, spin_lock_irqsave(&hba->outstanding_lock, flags); pending = test_bit(lrbp->task_tag, &hba->outstanding_reqs); - if (pending) - hba->dev_cmd.complete = NULL; spin_unlock_irqrestore(&hba->outstanding_lock, flags); if (!pending) { @@ -3272,13 +3261,9 @@ static void ufshcd_dev_man_unlock(struct ufs_hba *hba) static int ufshcd_issue_dev_cmd(struct ufs_hba *hba, struct ufshcd_lrb *lrbp, const u32 tag, int timeout) { - DECLARE_COMPLETION_ONSTACK(wait); int err; - hba->dev_cmd.complete = &wait; - ufshcd_add_query_upiu_trace(hba, UFS_QUERY_SEND, lrbp->ucd_req_ptr); - ufshcd_send_command(hba, tag, hba->dev_cmd_queue); err = ufshcd_wait_for_dev_cmd(hba, lrbp, timeout); @@ -5585,12 +5570,12 @@ void ufshcd_compl_one_cqe(struct ufs_hba *hba, int task_tag, ufshcd_release_scsi_cmd(hba, lrbp); /* Do not touch lrbp after scsi done */ scsi_done(cmd); - } else if (hba->dev_cmd.complete) { + } else { if (cqe) { ocs = le32_to_cpu(cqe->status) & MASK_OCS; lrbp->utr_descriptor_ptr->header.ocs = ocs; } - complete(hba->dev_cmd.complete); + complete(&hba->dev_cmd.complete); } } @@ -10475,6 +10460,8 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) */ spin_lock_init(&hba->clk_gating.lock); + init_completion(&hba->dev_cmd.complete); + err = ufshcd_hba_init(hba); if (err) goto out_error; diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index e3909cc691b2a8..f56050ce944598 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -246,7 +246,7 @@ struct ufs_query { struct ufs_dev_cmd { enum dev_cmd_type type; struct mutex lock; - struct completion *complete; + struct completion complete; struct ufs_query query; }; From 021ba7f1babd029e714d13a6bf2571b08af96d0f Mon Sep 17 00:00:00 2001 From: Xiaogang Chen Date: Fri, 21 Mar 2025 11:41:26 -0500 Subject: [PATCH 0006/2924] udmabuf: fix a buf size overflow issue during udmabuf creation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit by casting size_limit_mb to u64 when calculate pglimit. Signed-off-by: Xiaogang Chen Link: https://patchwork.freedesktop.org/patch/msgid/20250321164126.329638-1-xiaogang.chen@amd.com Signed-off-by: Christian König --- drivers/dma-buf/udmabuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c index cc7398cc17d67f..e74e36a8ecda21 100644 --- a/drivers/dma-buf/udmabuf.c +++ b/drivers/dma-buf/udmabuf.c @@ -393,7 +393,7 @@ static long udmabuf_create(struct miscdevice *device, if (!ubuf) return -ENOMEM; - pglimit = (size_limit_mb * 1024 * 1024) >> PAGE_SHIFT; + pglimit = ((u64)size_limit_mb * 1024 * 1024) >> PAGE_SHIFT; for (i = 0; i < head->count; i++) { pgoff_t subpgcnt; From 4ba2abe154ef68f9612eee9d6fbfe53a1736b064 Mon Sep 17 00:00:00 2001 From: Brendan King Date: Tue, 18 Mar 2025 14:53:13 +0000 Subject: [PATCH 0007/2924] drm/imagination: take paired job reference For paired jobs, have the fragment job take a reference on the geometry job, so that the geometry job cannot be freed until the fragment job has finished with it. The geometry job structure is accessed when the fragment job is being prepared by the GPU scheduler. Taking the reference prevents the geometry job being freed until the fragment job no longer requires it. Fixes a use after free bug detected by KASAN: [ 124.256386] BUG: KASAN: slab-use-after-free in pvr_queue_prepare_job+0x108/0x868 [powervr] [ 124.264893] Read of size 1 at addr ffff0000084cb960 by task kworker/u16:4/63 Cc: stable@vger.kernel.org Fixes: eaf01ee5ba28 ("drm/imagination: Implement job submission and scheduling") Signed-off-by: Brendan King Reviewed-by: Matt Coster Link: https://lore.kernel.org/r/20250318-ddkopsrc-1337-use-after-free-in-pvr_queue_prepare_job-v1-1-80fb30d044a6@imgtec.com Signed-off-by: Matt Coster --- drivers/gpu/drm/imagination/pvr_job.c | 7 +++++++ drivers/gpu/drm/imagination/pvr_queue.c | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/drivers/gpu/drm/imagination/pvr_job.c b/drivers/gpu/drm/imagination/pvr_job.c index 618503a212a7d3..aad183a5737183 100644 --- a/drivers/gpu/drm/imagination/pvr_job.c +++ b/drivers/gpu/drm/imagination/pvr_job.c @@ -677,6 +677,13 @@ pvr_jobs_link_geom_frag(struct pvr_job_data *job_data, u32 *job_count) geom_job->paired_job = frag_job; frag_job->paired_job = geom_job; + /* The geometry job pvr_job structure is used when the fragment + * job is being prepared by the GPU scheduler. Have the fragment + * job hold a reference on the geometry job to prevent it being + * freed until the fragment job has finished with it. + */ + pvr_job_get(geom_job); + /* Skip the fragment job we just paired to the geometry job. */ i++; } diff --git a/drivers/gpu/drm/imagination/pvr_queue.c b/drivers/gpu/drm/imagination/pvr_queue.c index 43411be930a214..fa1afe1193e1d3 100644 --- a/drivers/gpu/drm/imagination/pvr_queue.c +++ b/drivers/gpu/drm/imagination/pvr_queue.c @@ -866,6 +866,10 @@ static void pvr_queue_free_job(struct drm_sched_job *sched_job) struct pvr_job *job = container_of(sched_job, struct pvr_job, base); drm_sched_job_cleanup(sched_job); + + if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) + pvr_job_put(job->paired_job); + job->paired_job = NULL; pvr_job_put(job); } From a5b230e7f3a55bd8bd8d012eec75a4b7baa671d5 Mon Sep 17 00:00:00 2001 From: Brendan King Date: Tue, 18 Mar 2025 14:55:55 +0000 Subject: [PATCH 0008/2924] drm/imagination: fix firmware memory leaks Free the memory used to hold the results of firmware image processing when the module is unloaded. Fix the related issue of the same memory being leaked if processing of the firmware image fails during module load. Ensure all firmware GEM objects are destroyed if firmware image processing fails. Fixes memory leaks on powervr module unload detected by Kmemleak: unreferenced object 0xffff000042e20000 (size 94208): comm "modprobe", pid 470, jiffies 4295277154 hex dump (first 32 bytes): 02 ae 7f ed bf 45 84 00 3c 5b 1f ed 9f 45 45 05 .....E..<[...EE. d5 4f 5d 14 6c 00 3d 23 30 d0 3a 4a 66 0e 48 c8 .O].l.=#0.:Jf.H. backtrace (crc dd329dec): kmemleak_alloc+0x30/0x40 ___kmalloc_large_node+0x140/0x188 __kmalloc_large_node_noprof+0x2c/0x13c __kmalloc_noprof+0x48/0x4c0 pvr_fw_init+0xaa4/0x1f50 [powervr] unreferenced object 0xffff000042d20000 (size 20480): comm "modprobe", pid 470, jiffies 4295277154 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 09 00 00 00 0b 00 00 00 ................ 00 00 00 00 00 00 00 00 07 00 00 00 08 00 00 00 ................ backtrace (crc 395b02e3): kmemleak_alloc+0x30/0x40 ___kmalloc_large_node+0x140/0x188 __kmalloc_large_node_noprof+0x2c/0x13c __kmalloc_noprof+0x48/0x4c0 pvr_fw_init+0xb0c/0x1f50 [powervr] Cc: stable@vger.kernel.org Fixes: cc1aeedb98ad ("drm/imagination: Implement firmware infrastructure and META FW support") Signed-off-by: Brendan King Reviewed-by: Matt Coster Link: https://lore.kernel.org/r/20250318-ddkopsrc-1339-firmware-related-memory-leak-on-module-unload-v1-1-155337c57bb4@imgtec.com Signed-off-by: Matt Coster --- drivers/gpu/drm/imagination/pvr_fw.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/imagination/pvr_fw.c b/drivers/gpu/drm/imagination/pvr_fw.c index 3debc9870a82ae..d09c4c68411627 100644 --- a/drivers/gpu/drm/imagination/pvr_fw.c +++ b/drivers/gpu/drm/imagination/pvr_fw.c @@ -732,7 +732,7 @@ pvr_fw_process(struct pvr_device *pvr_dev) fw_mem->core_data, fw_mem->core_code_alloc_size); if (err) - goto err_free_fw_core_data_obj; + goto err_free_kdata; memcpy(fw_code_ptr, fw_mem->code, fw_mem->code_alloc_size); memcpy(fw_data_ptr, fw_mem->data, fw_mem->data_alloc_size); @@ -742,10 +742,14 @@ pvr_fw_process(struct pvr_device *pvr_dev) memcpy(fw_core_data_ptr, fw_mem->core_data, fw_mem->core_data_alloc_size); /* We're finished with the firmware section memory on the CPU, unmap. */ - if (fw_core_data_ptr) + if (fw_core_data_ptr) { pvr_fw_object_vunmap(fw_mem->core_data_obj); - if (fw_core_code_ptr) + fw_core_data_ptr = NULL; + } + if (fw_core_code_ptr) { pvr_fw_object_vunmap(fw_mem->core_code_obj); + fw_core_code_ptr = NULL; + } pvr_fw_object_vunmap(fw_mem->data_obj); fw_data_ptr = NULL; pvr_fw_object_vunmap(fw_mem->code_obj); @@ -753,7 +757,7 @@ pvr_fw_process(struct pvr_device *pvr_dev) err = pvr_fw_create_fwif_connection_ctl(pvr_dev); if (err) - goto err_free_fw_core_data_obj; + goto err_free_kdata; return 0; @@ -763,13 +767,16 @@ pvr_fw_process(struct pvr_device *pvr_dev) kfree(fw_mem->data); kfree(fw_mem->code); -err_free_fw_core_data_obj: if (fw_core_data_ptr) - pvr_fw_object_unmap_and_destroy(fw_mem->core_data_obj); + pvr_fw_object_vunmap(fw_mem->core_data_obj); + if (fw_mem->core_data_obj) + pvr_fw_object_destroy(fw_mem->core_data_obj); err_free_fw_core_code_obj: if (fw_core_code_ptr) - pvr_fw_object_unmap_and_destroy(fw_mem->core_code_obj); + pvr_fw_object_vunmap(fw_mem->core_code_obj); + if (fw_mem->core_code_obj) + pvr_fw_object_destroy(fw_mem->core_code_obj); err_free_fw_data_obj: if (fw_data_ptr) @@ -836,6 +843,12 @@ pvr_fw_cleanup(struct pvr_device *pvr_dev) struct pvr_fw_mem *fw_mem = &pvr_dev->fw_dev.mem; pvr_fw_fini_fwif_connection_ctl(pvr_dev); + + kfree(fw_mem->core_data); + kfree(fw_mem->core_code); + kfree(fw_mem->data); + kfree(fw_mem->code); + if (fw_mem->core_code_obj) pvr_fw_object_destroy(fw_mem->core_code_obj); if (fw_mem->core_data_obj) From 3d50e61a17b642af060566acb0eabe3c0eb3ef1f Mon Sep 17 00:00:00 2001 From: Vivek Kasireddy Date: Tue, 25 Mar 2025 13:10:21 -0700 Subject: [PATCH 0009/2924] drm/virtio: Fix flickering issue seen with imported dmabufs We need to save the reservation object pointer associated with the imported dmabuf in the newly created GEM object to allow drm_gem_plane_helper_prepare_fb() to extract the exclusive fence from it and attach it to the plane state during prepare phase. This is needed to ensure that drm_atomic_helper_wait_for_fences() correctly waits for the relevant fences (move, etc) associated with the reservation object, thereby implementing proper synchronization. Otherwise, artifacts or slight flickering can be seen when apps are dragged across the screen when running Gnome (Wayland). This problem is mostly seen with dGPUs in the case where the FBs are allocated in VRAM but need to be migrated to System RAM as they are shared with virtio-gpu. Fixes: ca77f27a2665 ("drm/virtio: Import prime buffers from other devices as guest blobs") Cc: Gerd Hoffmann Cc: Dmitry Osipenko Cc: Gurchetan Singh Cc: Chia-I Wu Signed-off-by: Vivek Kasireddy Reviewed-by: Dmitry Osipenko Signed-off-by: Dmitry Osipenko [dmitry.osipenko@collabora.com: Moved assignment before object_init()] Link: https://patchwork.freedesktop.org/patch/msgid/20250325201021.1315080-1-vivek.kasireddy@intel.com --- drivers/gpu/drm/virtio/virtgpu_prime.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/virtio/virtgpu_prime.c b/drivers/gpu/drm/virtio/virtgpu_prime.c index f92133a01195a9..d28d1c45a703b5 100644 --- a/drivers/gpu/drm/virtio/virtgpu_prime.c +++ b/drivers/gpu/drm/virtio/virtgpu_prime.c @@ -319,6 +319,7 @@ struct drm_gem_object *virtgpu_gem_prime_import(struct drm_device *dev, return ERR_PTR(-ENOMEM); obj = &bo->base.base; + obj->resv = buf->resv; obj->funcs = &virtgpu_gem_dma_buf_funcs; drm_gem_private_object_init(dev, obj, buf->size); From 8ec0fbb28d049273bfd4f1e7a5ae4c74884beed3 Mon Sep 17 00:00:00 2001 From: Chris Bainbridge Date: Wed, 26 Mar 2025 12:52:10 +0000 Subject: [PATCH 0010/2924] drm/nouveau: prime: fix ttm_bo_delayed_delete oops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix an oops in ttm_bo_delayed_delete which results from dererencing a dangling pointer: Oops: general protection fault, probably for non-canonical address 0x6b6b6b6b6b6b6b7b: 0000 [#1] PREEMPT SMP CPU: 4 UID: 0 PID: 1082 Comm: kworker/u65:2 Not tainted 6.14.0-rc4-00267-g505460b44513-dirty #216 Hardware name: LENOVO 82N6/LNVNB161216, BIOS GKCN65WW 01/16/2024 Workqueue: ttm ttm_bo_delayed_delete [ttm] RIP: 0010:dma_resv_iter_first_unlocked+0x55/0x290 Code: 31 f6 48 c7 c7 00 2b fa aa e8 97 bd 52 ff e8 a2 c1 53 00 5a 85 c0 74 48 e9 88 01 00 00 4c 89 63 20 4d 85 e4 0f 84 30 01 00 00 <41> 8b 44 24 10 c6 43 2c 01 48 89 df 89 43 28 e8 97 fd ff ff 4c 8b RSP: 0018:ffffbf9383473d60 EFLAGS: 00010202 RAX: 0000000000000001 RBX: ffffbf9383473d88 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffffbf9383473d78 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 6b6b6b6b6b6b6b6b R13: ffffa003bbf78580 R14: ffffa003a6728040 R15: 00000000000383cc FS: 0000000000000000(0000) GS:ffffa00991c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000758348024dd0 CR3: 000000012c259000 CR4: 0000000000f50ef0 PKRU: 55555554 Call Trace: ? __die_body.cold+0x19/0x26 ? die_addr+0x3d/0x70 ? exc_general_protection+0x159/0x460 ? asm_exc_general_protection+0x27/0x30 ? dma_resv_iter_first_unlocked+0x55/0x290 dma_resv_wait_timeout+0x56/0x100 ttm_bo_delayed_delete+0x69/0xb0 [ttm] process_one_work+0x217/0x5c0 worker_thread+0x1c8/0x3d0 ? apply_wqattrs_cleanup.part.0+0xc0/0xc0 kthread+0x10b/0x240 ? kthreads_online_cpu+0x140/0x140 ret_from_fork+0x40/0x70 ? kthreads_online_cpu+0x140/0x140 ret_from_fork_asm+0x11/0x20 The cause of this is: - drm_prime_gem_destroy calls dma_buf_put(dma_buf) which releases the reference to the shared dma_buf. The reference count is 0, so the dma_buf is destroyed, which in turn decrements the corresponding amdgpu_bo reference count to 0, and the amdgpu_bo is destroyed - calling drm_gem_object_release then dma_resv_fini (which destroys the reservation object), then finally freeing the amdgpu_bo. - nouveau_bo obj->bo.base.resv is now a dangling pointer to the memory formerly allocated to the amdgpu_bo. - nouveau_gem_object_del calls ttm_bo_put(&nvbo->bo) which calls ttm_bo_release, which schedules ttm_bo_delayed_delete. - ttm_bo_delayed_delete runs and dereferences the dangling resv pointer, resulting in a general protection fault. Fix this by moving the drm_prime_gem_destroy call from nouveau_gem_object_del to nouveau_bo_del_ttm. This ensures that it will be run after ttm_bo_delayed_delete. Signed-off-by: Chris Bainbridge Suggested-by: Christian König Fixes: 22b33e8ed0e3 ("nouveau: add PRIME support") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3937 Cc: Stable@vger.kernel.org Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/Z-P4epVK8k7tFZ7C@debian.local --- drivers/gpu/drm/nouveau/nouveau_bo.c | 3 +++ drivers/gpu/drm/nouveau/nouveau_gem.c | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index db961eade2257f..2016c1e7242fe3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -144,6 +144,9 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo) nouveau_bo_del_io_reserve_lru(bo); nv10_bo_put_tile_region(dev, nvbo->tile, NULL); + if (bo->base.import_attach) + drm_prime_gem_destroy(&bo->base, bo->sg); + /* * If nouveau_bo_new() allocated this buffer, the GEM object was never * initialized, so don't attempt to release it. diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 9ae2cee1c7c580..67e3c99de73ae6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -87,9 +87,6 @@ nouveau_gem_object_del(struct drm_gem_object *gem) return; } - if (gem->import_attach) - drm_prime_gem_destroy(gem, nvbo->bo.sg); - ttm_bo_put(&nvbo->bo); pm_runtime_mark_last_busy(dev); From 6b4568b675b14cf890c0c21779773c3e08e80ce5 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Tue, 25 Mar 2025 12:42:19 +0100 Subject: [PATCH 0011/2924] accel/ivpu: Fix warning in ivpu_ipc_send_receive_internal() Warn if device is suspended only when runtime PM is enabled. Runtime PM is disabled during reset/recovery and it is not an error to use ivpu_ipc_send_receive_internal() in such cases. Fixes: 5eaa49741119 ("accel/ivpu: Prevent recovery invocation during probe and resume") Cc: stable@vger.kernel.org # v6.13+ Signed-off-by: Maciej Falkowski Reviewed-by: Lizhi Hou Signed-off-by: Jacek Lawrynowicz Link: https://lore.kernel.org/r/20250325114219.3739951-1-maciej.falkowski@linux.intel.com --- drivers/accel/ivpu/ivpu_ipc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c index 01ebf88fe6ef0a..5daaf07fc1a712 100644 --- a/drivers/accel/ivpu/ivpu_ipc.c +++ b/drivers/accel/ivpu/ivpu_ipc.c @@ -302,7 +302,8 @@ ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req struct ivpu_ipc_consumer cons; int ret; - drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev)); + drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev) && + pm_runtime_enabled(vdev->drm.dev)); ivpu_ipc_consumer_add(vdev, &cons, channel, NULL); From 9a6f56762d23a1f3af15e67901493c927caaf882 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Tue, 25 Mar 2025 12:43:05 +0100 Subject: [PATCH 0012/2924] accel/ivpu: Fix deadlock in ivpu_ms_cleanup() Fix deadlock in ivpu_ms_cleanup() by preventing runtime resume after file_priv->ms_lock is acquired. During a failure in runtime resume, a cold boot is executed, which calls ivpu_ms_cleanup_all(). This function calls ivpu_ms_cleanup() that acquires file_priv->ms_lock and causes the deadlock. Fixes: cdfad4db7756 ("accel/ivpu: Add NPU profiling support") Cc: stable@vger.kernel.org # v6.11+ Signed-off-by: Maciej Falkowski Reviewed-by: Lizhi Hou Signed-off-by: Jacek Lawrynowicz Link: https://lore.kernel.org/r/20250325114306.3740022-2-maciej.falkowski@linux.intel.com --- drivers/accel/ivpu/ivpu_ms.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/accel/ivpu/ivpu_ms.c b/drivers/accel/ivpu/ivpu_ms.c index ffe7b10f8a767b..eb485cf15ad64f 100644 --- a/drivers/accel/ivpu/ivpu_ms.c +++ b/drivers/accel/ivpu/ivpu_ms.c @@ -4,6 +4,7 @@ */ #include +#include #include "ivpu_drv.h" #include "ivpu_gem.h" @@ -281,6 +282,9 @@ int ivpu_ms_get_info_ioctl(struct drm_device *dev, void *data, struct drm_file * void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv) { struct ivpu_ms_instance *ms, *tmp; + struct ivpu_device *vdev = file_priv->vdev; + + pm_runtime_get_sync(vdev->drm.dev); mutex_lock(&file_priv->ms_lock); @@ -293,6 +297,8 @@ void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv) free_instance(file_priv, ms); mutex_unlock(&file_priv->ms_lock); + + pm_runtime_put_autosuspend(vdev->drm.dev); } void ivpu_ms_cleanup_all(struct ivpu_device *vdev) From d893da85e06edf54737bb80648bb58ba8fd56d9f Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Tue, 25 Mar 2025 12:43:06 +0100 Subject: [PATCH 0013/2924] accel/ivpu: Fix PM related deadlocks in MS IOCTLs Prevent runtime resume/suspend while MS IOCTLs are in progress. Failed suspend will call ivpu_ms_cleanup() that would try to acquire file_priv->ms_lock, which is already held by the IOCTLs. Fixes: cdfad4db7756 ("accel/ivpu: Add NPU profiling support") Cc: stable@vger.kernel.org # v6.11+ Signed-off-by: Maciej Falkowski Reviewed-by: Lizhi Hou Signed-off-by: Jacek Lawrynowicz Link: https://lore.kernel.org/r/20250325114306.3740022-3-maciej.falkowski@linux.intel.com --- drivers/accel/ivpu/ivpu_debugfs.c | 4 ++-- drivers/accel/ivpu/ivpu_ms.c | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c index 8180b95ed69dc7..093a2e93b0b394 100644 --- a/drivers/accel/ivpu/ivpu_debugfs.c +++ b/drivers/accel/ivpu/ivpu_debugfs.c @@ -331,7 +331,7 @@ ivpu_force_recovery_fn(struct file *file, const char __user *user_buf, size_t si return -EINVAL; ret = ivpu_rpm_get(vdev); - if (ret) + if (ret < 0) return ret; ivpu_pm_trigger_recovery(vdev, "debugfs"); @@ -382,7 +382,7 @@ static int dct_active_set(void *data, u64 active_percent) return -EINVAL; ret = ivpu_rpm_get(vdev); - if (ret) + if (ret < 0) return ret; if (active_percent) diff --git a/drivers/accel/ivpu/ivpu_ms.c b/drivers/accel/ivpu/ivpu_ms.c index eb485cf15ad64f..2a043baf10ca17 100644 --- a/drivers/accel/ivpu/ivpu_ms.c +++ b/drivers/accel/ivpu/ivpu_ms.c @@ -45,6 +45,10 @@ int ivpu_ms_start_ioctl(struct drm_device *dev, void *data, struct drm_file *fil args->sampling_period_ns < MS_MIN_SAMPLE_PERIOD_NS) return -EINVAL; + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + mutex_lock(&file_priv->ms_lock); if (get_instance_by_mask(file_priv, args->metric_group_mask)) { @@ -97,6 +101,8 @@ int ivpu_ms_start_ioctl(struct drm_device *dev, void *data, struct drm_file *fil kfree(ms); unlock: mutex_unlock(&file_priv->ms_lock); + + ivpu_rpm_put(vdev); return ret; } @@ -161,6 +167,10 @@ int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file * if (!args->metric_group_mask) return -EINVAL; + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + mutex_lock(&file_priv->ms_lock); ms = get_instance_by_mask(file_priv, args->metric_group_mask); @@ -188,6 +198,7 @@ int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file * unlock: mutex_unlock(&file_priv->ms_lock); + ivpu_rpm_put(vdev); return ret; } @@ -205,11 +216,17 @@ int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file { struct ivpu_file_priv *file_priv = file->driver_priv; struct drm_ivpu_metric_streamer_stop *args = data; + struct ivpu_device *vdev = file_priv->vdev; struct ivpu_ms_instance *ms; + int ret; if (!args->metric_group_mask) return -EINVAL; + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + mutex_lock(&file_priv->ms_lock); ms = get_instance_by_mask(file_priv, args->metric_group_mask); @@ -218,6 +235,7 @@ int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file mutex_unlock(&file_priv->ms_lock); + ivpu_rpm_put(vdev); return ms ? 0 : -EINVAL; } From 8e587ab43cb92a9e57f99ea8d6c069ee65863707 Mon Sep 17 00:00:00 2001 From: Sidong Yang Date: Wed, 19 Mar 2025 11:24:01 +0000 Subject: [PATCH 0014/2924] btrfs: ioctl: don't free iov when btrfs_encoded_read() returns -EAGAIN Fix a bug in encoded read that mistakenly frees the iov in case btrfs_encoded_read() returns -EAGAIN assuming the structure will be reused. This can happen when when receiving requests concurrently, the io_uring subsystem does not reset the data, and the last free will happen in btrfs_uring_read_finished(). Handle the -EAGAIN error and skip freeing iov. CC: stable@vger.kernel.org # 6.13+ Signed-off-by: Sidong Yang Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a13d81bb56a089..63aeacc5494574 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4902,6 +4902,8 @@ static int btrfs_uring_encoded_read(struct io_uring_cmd *cmd, unsigned int issue ret = btrfs_encoded_read(&kiocb, &data->iter, &data->args, &cached_state, &disk_bytenr, &disk_io_size); + if (ret == -EAGAIN) + goto out_acct; if (ret < 0 && ret != -EIOCBQUEUED) goto out_free; From dc08c58696f8555e4a802f1f23c894a330d80ab7 Mon Sep 17 00:00:00 2001 From: Johannes Kimmel Date: Wed, 19 Mar 2025 22:49:00 +0100 Subject: [PATCH 0015/2924] btrfs: correctly escape subvol in btrfs_show_options() Currently, displaying the btrfs subvol mount option doesn't escape ','. This makes parsing /proc/self/mounts and /proc/self/mountinfo ambiguous for subvolume names that contain commas. The text after the comma could be mistaken for another option (think "subvol=foo,ro", where ro is actually part of the subvolumes name). Replace the manual escape characters list with a call to seq_show_option(). Thanks to Calvin Walton for suggesting this approach. Fixes: c8d3fe028f64 ("Btrfs: show subvol= and subvolid= in /proc/mounts") CC: stable@vger.kernel.org # 5.4+ Suggested-by: Calvin Walton Signed-off-by: Johannes Kimmel Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 40709e2a44fcec..7121d8c7a318ec 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1139,8 +1139,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) subvol_name = btrfs_get_subvol_name_from_objectid(info, btrfs_root_id(BTRFS_I(d_inode(dentry))->root)); if (!IS_ERR(subvol_name)) { - seq_puts(seq, ",subvol="); - seq_escape(seq, subvol_name, " \t\n\\"); + seq_show_option(seq, "subvol", subvol_name); kfree(subvol_name); } return 0; From 65f2a3b2323edde7c5de3a44e67fec00873b4217 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 28 Mar 2025 07:59:12 +1030 Subject: [PATCH 0016/2924] btrfs: remove folio order ASSERT()s in super block writeback path [BUG] There is a syzbot report that the ASSERT() inside write_dev_supers() got triggered: assertion failed: folio_order(folio) == 0, in fs/btrfs/disk-io.c:3858 ------------[ cut here ]------------ kernel BUG at fs/btrfs/disk-io.c:3858! Oops: invalid opcode: 0000 [#1] SMP KASAN NOPTI CPU: 0 UID: 0 PID: 6730 Comm: syz-executor378 Not tainted 6.14.0-syzkaller-03565-gf6e0150b2003 #0 PREEMPT(full) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 RIP: 0010:write_dev_supers fs/btrfs/disk-io.c:3858 [inline] RIP: 0010:write_all_supers+0x400f/0x4090 fs/btrfs/disk-io.c:4155 Call Trace: btrfs_commit_transaction+0x1eda/0x3750 fs/btrfs/transaction.c:2528 btrfs_quota_enable+0xfcc/0x21a0 fs/btrfs/qgroup.c:1226 btrfs_ioctl_quota_ctl+0x144/0x1c0 fs/btrfs/ioctl.c:3677 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:906 [inline] __se_sys_ioctl+0xf1/0x160 fs/ioctl.c:892 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f5ad1f20289 ---[ end trace 0000000000000000 ]--- [CAUSE] Since commit f93ee0df5139 ("btrfs: convert super block writes to folio in write_dev_supers()") and commit c94b7349b859 ("btrfs: convert super block writes to folio in wait_dev_supers()"), the super block writeback path is converted to use folio. Since the original code is using page based interfaces, we have an "ASSERT(folio_order(folio) == 0);" added to make sure everything is not changed. But the folio here is not from any btrfs inode, but from the block device, and we have no control on the folio order in bdev, the device can choose whatever folio size they want/need. E.g. the bdev may even have a block size of multiple pages. So the ASSERT() is triggered. [FIX] The super block writeback path has taken larger folios into consideration, so there is no need for the ASSERT(). And since commit bc00965dbff7 ("btrfs: count super block write errors in device instead of tracking folio error state"), the wait path no longer checks the folio status but only wait for the folio writeback to finish, there is nothing requiring the ASSERT() either. So we can remove both ASSERT()s safely now. Reported-by: syzbot+34122898a11ab689518a@syzkaller.appspotmail.com Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1a916716cefebe..d0da9988ea48c8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3853,7 +3853,6 @@ static int write_dev_supers(struct btrfs_device *device, atomic_inc(&device->sb_write_errors); continue; } - ASSERT(folio_order(folio) == 0); offset = offset_in_folio(folio, bytenr); disk_super = folio_address(folio) + offset; @@ -3926,7 +3925,6 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors) /* If the folio has been removed, then we know it completed. */ if (IS_ERR(folio)) continue; - ASSERT(folio_order(folio) == 0); /* Folio will be unlocked once the write completes. */ folio_wait_locked(folio); From 668e041662e92ab3ebcb9eb606d3ec01884546ab Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sun, 30 Mar 2025 17:52:40 -0400 Subject: [PATCH 0017/2924] cgroup/cpuset: Fix incorrect isolated_cpus update in update_parent_effective_cpumask() Before commit f0af1bfc27b5 ("cgroup/cpuset: Relax constraints to partition & cpus changes"), a cpuset partition cannot be enabled if not all the requested CPUs can be granted from the parent cpuset. After that commit, a cpuset partition can be created even if the requested exclusive CPUs contain CPUs not allowed its parent. The delmask containing exclusive CPUs to be removed from its parent wasn't adjusted accordingly. That is not a problem until the introduction of a new isolated_cpus mask in commit 11e5f407b64a ("cgroup/cpuset: Keep track of CPUs in isolated partitions") as the CPUs in the delmask may be added directly into isolated_cpus. As a result, isolated_cpus may incorrectly contain CPUs that are not isolated leading to incorrect data reporting. Fix this by adjusting the delmask to reflect the actual exclusive CPUs for the creation of the partition. Fixes: 11e5f407b64a ("cgroup/cpuset: Keep track of CPUs in isolated partitions") Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 39c1fc643d770d..2c49d80f2a0139 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -1679,9 +1679,9 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, if (nocpu) return PERR_NOCPUS; - cpumask_copy(tmp->delmask, xcpus); - deleting = true; - subparts_delta++; + deleting = cpumask_and(tmp->delmask, xcpus, parent->effective_xcpus); + if (deleting) + subparts_delta++; new_prs = (cmd == partcmd_enable) ? PRS_ROOT : PRS_ISOLATED; } else if (cmd == partcmd_disable) { /* From 8bf450f3aec3d1bbd725d179502c64b8992588e4 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sun, 30 Mar 2025 17:52:41 -0400 Subject: [PATCH 0018/2924] cgroup/cpuset: Fix error handling in remote_partition_disable() When remote_partition_disable() is called to disable a remote partition, it always sets the partition to an invalid partition state. It should only do so if an error code (prs_err) has been set. Correct that and add proper error code in places where remote_partition_disable() is called due to error. Fixes: 181c8e091aae ("cgroup/cpuset: Introduce remote partition") Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 2c49d80f2a0139..e457b4b1db5d15 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -1406,6 +1406,7 @@ static int remote_partition_enable(struct cpuset *cs, int new_prs, list_add(&cs->remote_sibling, &remote_children); spin_unlock_irq(&callback_lock); update_unbound_workqueue_cpumask(isolcpus_updated); + cs->prs_err = 0; /* * Propagate changes in top_cpuset's effective_cpus down the hierarchy. @@ -1436,9 +1437,11 @@ static void remote_partition_disable(struct cpuset *cs, struct tmpmasks *tmp) list_del_init(&cs->remote_sibling); isolcpus_updated = partition_xcpus_del(cs->partition_root_state, NULL, tmp->new_cpus); - cs->partition_root_state = -cs->partition_root_state; - if (!cs->prs_err) - cs->prs_err = PERR_INVCPUS; + if (cs->prs_err) + cs->partition_root_state = -cs->partition_root_state; + else + cs->partition_root_state = PRS_MEMBER; + reset_partition_data(cs); spin_unlock_irq(&callback_lock); update_unbound_workqueue_cpumask(isolcpus_updated); @@ -1471,8 +1474,10 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask, WARN_ON_ONCE(!cpumask_subset(cs->effective_xcpus, subpartitions_cpus)); - if (cpumask_empty(newmask)) + if (cpumask_empty(newmask)) { + cs->prs_err = PERR_CPUSEMPTY; goto invalidate; + } adding = cpumask_andnot(tmp->addmask, newmask, cs->effective_xcpus); deleting = cpumask_andnot(tmp->delmask, cs->effective_xcpus, newmask); @@ -1482,10 +1487,15 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask, * not allocated to other partitions and there are effective_cpus * left in the top cpuset. */ - if (adding && (!capable(CAP_SYS_ADMIN) || - cpumask_intersects(tmp->addmask, subpartitions_cpus) || - cpumask_subset(top_cpuset.effective_cpus, tmp->addmask))) - goto invalidate; + if (adding) { + if (!capable(CAP_SYS_ADMIN)) + cs->prs_err = PERR_ACCESS; + else if (cpumask_intersects(tmp->addmask, subpartitions_cpus) || + cpumask_subset(top_cpuset.effective_cpus, tmp->addmask)) + cs->prs_err = PERR_NOCPUS; + if (cs->prs_err) + goto invalidate; + } spin_lock_irq(&callback_lock); if (adding) @@ -1601,7 +1611,7 @@ static bool prstate_housekeeping_conflict(int prstate, struct cpumask *new_cpus) * The partcmd_update command is used by update_cpumasks_hier() with newmask * NULL and update_cpumask() with newmask set. The partcmd_invalidate is used * by update_cpumask() with NULL newmask. In both cases, the callers won't - * check for error and so partition_root_state and prs_error will be updated + * check for error and so partition_root_state and prs_err will be updated * directly. */ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, @@ -3739,6 +3749,7 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) if (remote && cpumask_empty(&new_cpus) && partition_is_populated(cs, NULL)) { + cs->prs_err = PERR_HOTPLUG; remote_partition_disable(cs, tmp); compute_effective_cpumask(&new_cpus, cs, parent); remote = false; From f62a5d39368e34a966c8df63e1f05eed7fe9c5de Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sun, 30 Mar 2025 17:52:42 -0400 Subject: [PATCH 0019/2924] cgroup/cpuset: Remove remote_partition_check() & make update_cpumasks_hier() handle remote partition Currently, changes in exclusive CPUs are being handled in remote_partition_check() by disabling conflicting remote partitions. However, that may lead to results unexpected by the users. Fix this problem by removing remote_partition_check() and making update_cpumasks_hier() handle changes in descendant remote partitions properly. The compute_effective_exclusive_cpumask() function is enhanced to check the exclusive_cpus and effective_xcpus from siblings and excluded them in its effective exclusive CPUs computation and return a value to show if there is any sibling conflicts. This is somewhat like the cpu_exclusive flag check in validate_change(). This is the initial step to enable us to retire the use of cpu_exclusive flag in cgroup v2 in the future. One of the tests in the TEST_MATRIX of the test_cpuset_prs.sh script has to be updated due to changes in the way a child remote partition root is being handled (updated instead of invalidation) in update_cpumasks_hier(). Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 258 ++++++++++-------- .../selftests/cgroup/test_cpuset_prs.sh | 4 +- 2 files changed, 143 insertions(+), 119 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index e457b4b1db5d15..0ab63f5974ba82 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -86,7 +86,6 @@ static struct list_head remote_children; * A flag to force sched domain rebuild at the end of an operation. * It can be set in * - update_partition_sd_lb() - * - remote_partition_check() * - update_cpumasks_hier() * - cpuset_update_flag() * - cpuset_hotplug_update_tasks() @@ -1340,20 +1339,57 @@ EXPORT_SYMBOL_GPL(cpuset_cpu_is_isolated); * compute_effective_exclusive_cpumask - compute effective exclusive CPUs * @cs: cpuset * @xcpus: effective exclusive CPUs value to be set - * Return: true if xcpus is not empty, false otherwise. + * @real_cs: the real cpuset (can be NULL) + * Return: 0 if there is no sibling conflict, > 0 otherwise * - * Starting with exclusive_cpus (cpus_allowed if exclusive_cpus is not set), - * it must be a subset of parent's effective_xcpus. + * If exclusive_cpus isn't explicitly set or a real_cs is provided, we have to + * scan the sibling cpusets and exclude their exclusive_cpus or effective_xcpus + * as well. The provision of real_cs means that a cpumask is being changed and + * the given cs is a trial one. */ -static bool compute_effective_exclusive_cpumask(struct cpuset *cs, - struct cpumask *xcpus) +static int compute_effective_exclusive_cpumask(struct cpuset *cs, + struct cpumask *xcpus, + struct cpuset *real_cs) { + struct cgroup_subsys_state *css; struct cpuset *parent = parent_cs(cs); + struct cpuset *sibling; + int retval = 0; if (!xcpus) xcpus = cs->effective_xcpus; - return cpumask_and(xcpus, user_xcpus(cs), parent->effective_xcpus); + cpumask_and(xcpus, user_xcpus(cs), parent->effective_xcpus); + + if (!real_cs) { + if (!cpumask_empty(cs->exclusive_cpus)) + return 0; + } else { + cs = real_cs; + } + + /* + * Exclude exclusive CPUs from siblings + */ + rcu_read_lock(); + cpuset_for_each_child(sibling, css, parent) { + if (sibling == cs) + continue; + + if (!cpumask_empty(sibling->exclusive_cpus) && + cpumask_intersects(xcpus, sibling->exclusive_cpus)) { + cpumask_andnot(xcpus, xcpus, sibling->exclusive_cpus); + retval++; + continue; + } + if (!cpumask_empty(sibling->effective_xcpus) && + cpumask_intersects(xcpus, sibling->effective_xcpus)) { + cpumask_andnot(xcpus, xcpus, sibling->effective_xcpus); + retval++; + } + } + rcu_read_unlock(); + return retval; } static inline bool is_remote_partition(struct cpuset *cs) @@ -1395,7 +1431,7 @@ static int remote_partition_enable(struct cpuset *cs, int new_prs, * remote partition root underneath it, its exclusive_cpus must * have overlapped with subpartitions_cpus. */ - compute_effective_exclusive_cpumask(cs, tmp->new_cpus); + compute_effective_exclusive_cpumask(cs, tmp->new_cpus, NULL); if (cpumask_empty(tmp->new_cpus) || cpumask_intersects(tmp->new_cpus, subpartitions_cpus) || cpumask_subset(top_cpuset.effective_cpus, tmp->new_cpus)) @@ -1404,8 +1440,10 @@ static int remote_partition_enable(struct cpuset *cs, int new_prs, spin_lock_irq(&callback_lock); isolcpus_updated = partition_xcpus_add(new_prs, NULL, tmp->new_cpus); list_add(&cs->remote_sibling, &remote_children); + cpumask_copy(cs->effective_xcpus, tmp->new_cpus); spin_unlock_irq(&callback_lock); update_unbound_workqueue_cpumask(isolcpus_updated); + cpuset_force_rebuild(); cs->prs_err = 0; /* @@ -1429,22 +1467,24 @@ static void remote_partition_disable(struct cpuset *cs, struct tmpmasks *tmp) { bool isolcpus_updated; - compute_effective_exclusive_cpumask(cs, tmp->new_cpus); WARN_ON_ONCE(!is_remote_partition(cs)); - WARN_ON_ONCE(!cpumask_subset(tmp->new_cpus, subpartitions_cpus)); + WARN_ON_ONCE(!cpumask_subset(cs->effective_xcpus, subpartitions_cpus)); spin_lock_irq(&callback_lock); list_del_init(&cs->remote_sibling); isolcpus_updated = partition_xcpus_del(cs->partition_root_state, - NULL, tmp->new_cpus); + NULL, cs->effective_xcpus); if (cs->prs_err) cs->partition_root_state = -cs->partition_root_state; else cs->partition_root_state = PRS_MEMBER; + /* effective_xcpus may need to be changed */ + compute_effective_exclusive_cpumask(cs, NULL, NULL); reset_partition_data(cs); spin_unlock_irq(&callback_lock); update_unbound_workqueue_cpumask(isolcpus_updated); + cpuset_force_rebuild(); /* * Propagate changes in top_cpuset's effective_cpus down the hierarchy. @@ -1456,14 +1496,15 @@ static void remote_partition_disable(struct cpuset *cs, struct tmpmasks *tmp) /* * remote_cpus_update - cpus_exclusive change of remote partition * @cs: the cpuset to be updated - * @newmask: the new effective_xcpus mask + * @xcpus: the new exclusive_cpus mask, if non-NULL + * @excpus: the new effective_xcpus mask * @tmp: temporary masks * * top_cpuset and subpartitions_cpus will be updated or partition can be * invalidated. */ -static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask, - struct tmpmasks *tmp) +static void remote_cpus_update(struct cpuset *cs, struct cpumask *xcpus, + struct cpumask *excpus, struct tmpmasks *tmp) { bool adding, deleting; int prs = cs->partition_root_state; @@ -1474,13 +1515,13 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask, WARN_ON_ONCE(!cpumask_subset(cs->effective_xcpus, subpartitions_cpus)); - if (cpumask_empty(newmask)) { + if (cpumask_empty(excpus)) { cs->prs_err = PERR_CPUSEMPTY; goto invalidate; } - adding = cpumask_andnot(tmp->addmask, newmask, cs->effective_xcpus); - deleting = cpumask_andnot(tmp->delmask, cs->effective_xcpus, newmask); + adding = cpumask_andnot(tmp->addmask, excpus, cs->effective_xcpus); + deleting = cpumask_andnot(tmp->delmask, cs->effective_xcpus, excpus); /* * Additions of remote CPUs is only allowed if those CPUs are @@ -1502,8 +1543,17 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask, isolcpus_updated += partition_xcpus_add(prs, NULL, tmp->addmask); if (deleting) isolcpus_updated += partition_xcpus_del(prs, NULL, tmp->delmask); + /* + * Need to update effective_xcpus and exclusive_cpus now as + * update_sibling_cpumasks() below may iterate back to the same cs. + */ + cpumask_copy(cs->effective_xcpus, excpus); + if (xcpus) + cpumask_copy(cs->exclusive_cpus, xcpus); spin_unlock_irq(&callback_lock); update_unbound_workqueue_cpumask(isolcpus_updated); + if (adding || deleting) + cpuset_force_rebuild(); /* * Propagate changes in top_cpuset's effective_cpus down the hierarchy. @@ -1516,47 +1566,6 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask, remote_partition_disable(cs, tmp); } -/* - * remote_partition_check - check if a child remote partition needs update - * @cs: the cpuset to be updated - * @newmask: the new effective_xcpus mask - * @delmask: temporary mask for deletion (not in tmp) - * @tmp: temporary masks - * - * This should be called before the given cs has updated its cpus_allowed - * and/or effective_xcpus. - */ -static void remote_partition_check(struct cpuset *cs, struct cpumask *newmask, - struct cpumask *delmask, struct tmpmasks *tmp) -{ - struct cpuset *child, *next; - int disable_cnt = 0; - - /* - * Compute the effective exclusive CPUs that will be deleted. - */ - if (!cpumask_andnot(delmask, cs->effective_xcpus, newmask) || - !cpumask_intersects(delmask, subpartitions_cpus)) - return; /* No deletion of exclusive CPUs in partitions */ - - /* - * Searching the remote children list to look for those that will - * be impacted by the deletion of exclusive CPUs. - * - * Since a cpuset must be removed from the remote children list - * before it can go offline and holding cpuset_mutex will prevent - * any change in cpuset status. RCU read lock isn't needed. - */ - lockdep_assert_held(&cpuset_mutex); - list_for_each_entry_safe(child, next, &remote_children, remote_sibling) - if (cpumask_intersects(child->effective_cpus, delmask)) { - remote_partition_disable(child, tmp); - disable_cnt++; - } - if (disable_cnt) - cpuset_force_rebuild(); -} - /* * prstate_housekeeping_conflict - check for partition & housekeeping conflicts * @prstate: partition root state to be checked @@ -1629,6 +1638,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, bool nocpu; lockdep_assert_held(&cpuset_mutex); + WARN_ON_ONCE(is_remote_partition(cs)); /* * new_prs will only be changed for the partcmd_update and @@ -1670,13 +1680,20 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, nocpu = tasks_nocpu_error(parent, cs, xcpus); if ((cmd == partcmd_enable) || (cmd == partcmd_enablei)) { + /* + * Need to call compute_effective_exclusive_cpumask() in case + * exclusive_cpus not set. Sibling conflict should only happen + * if exclusive_cpus isn't set. + */ + xcpus = tmp->new_cpus; + if (compute_effective_exclusive_cpumask(cs, xcpus, NULL)) + WARN_ON_ONCE(!cpumask_empty(cs->exclusive_cpus)); + /* * Enabling partition root is not allowed if its - * effective_xcpus is empty or doesn't overlap with - * parent's effective_xcpus. + * effective_xcpus is empty. */ - if (cpumask_empty(xcpus) || - !cpumask_intersects(xcpus, parent->effective_xcpus)) + if (cpumask_empty(xcpus)) return PERR_INVCPUS; if (prstate_housekeeping_conflict(new_prs, xcpus)) @@ -1695,13 +1712,16 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, new_prs = (cmd == partcmd_enable) ? PRS_ROOT : PRS_ISOLATED; } else if (cmd == partcmd_disable) { /* - * May need to add cpus to parent's effective_cpus for - * valid partition root. + * May need to add cpus back to parent's effective_cpus + * (and maybe removed from subpartitions_cpus/isolated_cpus) + * for valid partition root. xcpus may contain CPUs that + * shouldn't be removed from the two global cpumasks. */ - adding = !is_prs_invalid(old_prs) && - cpumask_and(tmp->addmask, xcpus, parent->effective_xcpus); - if (adding) + if (is_partition_valid(cs)) { + cpumask_copy(tmp->addmask, cs->effective_xcpus); + adding = true; subparts_delta--; + } new_prs = PRS_MEMBER; } else if (newmask) { /* @@ -1711,6 +1731,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, part_error = PERR_CPUSEMPTY; goto write_error; } + /* Check newmask again, whether cpus are available for parent/cs */ nocpu |= tasks_nocpu_error(parent, cs, newmask); @@ -1927,7 +1948,7 @@ static void compute_partition_effective_cpumask(struct cpuset *cs, * 2) All the effective_cpus will be used up and cp * has tasks */ - compute_effective_exclusive_cpumask(cs, new_ecpus); + compute_effective_exclusive_cpumask(cs, new_ecpus, NULL); cpumask_and(new_ecpus, new_ecpus, cpu_active_mask); rcu_read_lock(); @@ -1935,6 +1956,11 @@ static void compute_partition_effective_cpumask(struct cpuset *cs, if (!is_partition_valid(child)) continue; + /* + * There shouldn't be a remote partition underneath another + * partition root. + */ + WARN_ON_ONCE(is_remote_partition(child)); child->prs_err = 0; if (!cpumask_subset(child->effective_xcpus, cs->effective_xcpus)) @@ -1990,32 +2016,39 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp, bool remote = is_remote_partition(cp); bool update_parent = false; + old_prs = new_prs = cp->partition_root_state; + /* - * Skip descendent remote partition that acquires CPUs - * directly from top cpuset unless it is cs. + * For child remote partition root (!= cs), we need to call + * remote_cpus_update() if effective_xcpus will be changed. + * Otherwise, we can skip the whole subtree. + * + * remote_cpus_update() will reuse tmp->new_cpus only after + * its value is being processed. */ if (remote && (cp != cs)) { - pos_css = css_rightmost_descendant(pos_css); - continue; - } + compute_effective_exclusive_cpumask(cp, tmp->new_cpus, NULL); + if (cpumask_equal(cp->effective_xcpus, tmp->new_cpus)) { + pos_css = css_rightmost_descendant(pos_css); + continue; + } + rcu_read_unlock(); + remote_cpus_update(cp, NULL, tmp->new_cpus, tmp); + rcu_read_lock(); - /* - * Update effective_xcpus if exclusive_cpus set. - * The case when exclusive_cpus isn't set is handled later. - */ - if (!cpumask_empty(cp->exclusive_cpus) && (cp != cs)) { - spin_lock_irq(&callback_lock); - compute_effective_exclusive_cpumask(cp, NULL); - spin_unlock_irq(&callback_lock); + /* Remote partition may be invalidated */ + new_prs = cp->partition_root_state; + remote = (new_prs == old_prs); } - old_prs = new_prs = cp->partition_root_state; - if (remote || (is_partition_valid(parent) && - is_partition_valid(cp))) + if (remote || (is_partition_valid(parent) && is_partition_valid(cp))) compute_partition_effective_cpumask(cp, tmp->new_cpus); else compute_effective_cpumask(tmp->new_cpus, cp, parent); + if (remote) + goto get_css; /* Ready to update cpuset data */ + /* * A partition with no effective_cpus is allowed as long as * there is no task associated with it. Call @@ -2035,9 +2068,6 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp, if (is_in_v2_mode() && !remote && cpumask_empty(tmp->new_cpus)) cpumask_copy(tmp->new_cpus, parent->effective_cpus); - if (remote) - goto get_css; - /* * Skip the whole subtree if * 1) the cpumask remains the same, @@ -2098,6 +2128,9 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp, spin_lock_irq(&callback_lock); cpumask_copy(cp->effective_cpus, tmp->new_cpus); cp->partition_root_state = new_prs; + if (!cpumask_empty(cp->exclusive_cpus) && (cp != cs)) + compute_effective_exclusive_cpumask(cp, NULL, NULL); + /* * Make sure effective_xcpus is properly set for a valid * partition root. @@ -2184,7 +2217,14 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs, parent); if (cpumask_equal(tmp->new_cpus, sibling->effective_cpus)) continue; + } else if (is_remote_partition(sibling)) { + /* + * Change in a sibling cpuset won't affect a remote + * partition root. + */ + continue; } + if (!css_tryget_online(&sibling->css)) continue; @@ -2241,8 +2281,9 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, * trialcs->effective_xcpus is used as a temporary cpumask * for checking validity of the partition root. */ + trialcs->partition_root_state = PRS_MEMBER; if (!cpumask_empty(trialcs->exclusive_cpus) || is_partition_valid(cs)) - compute_effective_exclusive_cpumask(trialcs, NULL); + compute_effective_exclusive_cpumask(trialcs, NULL, cs); } /* Nothing to do if the cpus didn't change */ @@ -2315,19 +2356,13 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, * Call remote_cpus_update() to handle valid remote partition */ if (is_remote_partition(cs)) - remote_cpus_update(cs, xcpus, &tmp); + remote_cpus_update(cs, NULL, xcpus, &tmp); else if (invalidate) update_parent_effective_cpumask(cs, partcmd_invalidate, NULL, &tmp); else update_parent_effective_cpumask(cs, partcmd_update, xcpus, &tmp); - } else if (!cpumask_empty(cs->exclusive_cpus)) { - /* - * Use trialcs->effective_cpus as a temp cpumask - */ - remote_partition_check(cs, trialcs->effective_xcpus, - trialcs->effective_cpus, &tmp); } spin_lock_irq(&callback_lock); @@ -2379,8 +2414,15 @@ static int update_exclusive_cpumask(struct cpuset *cs, struct cpuset *trialcs, if (cpumask_equal(cs->exclusive_cpus, trialcs->exclusive_cpus)) return 0; - if (*buf) - compute_effective_exclusive_cpumask(trialcs, NULL); + if (*buf) { + trialcs->partition_root_state = PRS_MEMBER; + /* + * Reject the change if there is exclusive CPUs conflict with + * the siblings. + */ + if (compute_effective_exclusive_cpumask(trialcs, NULL, cs)) + return -EINVAL; + } /* * Check all the descendants in update_cpumasks_hier() if @@ -2411,8 +2453,8 @@ static int update_exclusive_cpumask(struct cpuset *cs, struct cpuset *trialcs, if (invalidate) remote_partition_disable(cs, &tmp); else - remote_cpus_update(cs, trialcs->effective_xcpus, - &tmp); + remote_cpus_update(cs, trialcs->exclusive_cpus, + trialcs->effective_xcpus, &tmp); } else if (invalidate) { update_parent_effective_cpumask(cs, partcmd_invalidate, NULL, &tmp); @@ -2420,12 +2462,6 @@ static int update_exclusive_cpumask(struct cpuset *cs, struct cpuset *trialcs, update_parent_effective_cpumask(cs, partcmd_update, trialcs->effective_xcpus, &tmp); } - } else if (!cpumask_empty(trialcs->exclusive_cpus)) { - /* - * Use trialcs->effective_cpus as a temp cpumask - */ - remote_partition_check(cs, trialcs->effective_xcpus, - trialcs->effective_cpus, &tmp); } spin_lock_irq(&callback_lock); cpumask_copy(cs->exclusive_cpus, trialcs->exclusive_cpus); @@ -2806,17 +2842,6 @@ static int update_prstate(struct cpuset *cs, int new_prs) if (alloc_cpumasks(NULL, &tmpmask)) return -ENOMEM; - /* - * Setup effective_xcpus if not properly set yet, it will be cleared - * later if partition becomes invalid. - */ - if ((new_prs > 0) && cpumask_empty(cs->exclusive_cpus)) { - spin_lock_irq(&callback_lock); - cpumask_and(cs->effective_xcpus, - cs->cpus_allowed, parent->effective_xcpus); - spin_unlock_irq(&callback_lock); - } - err = update_partition_exclusive(cs, new_prs); if (err) goto out; @@ -3753,7 +3778,6 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) remote_partition_disable(cs, tmp); compute_effective_cpumask(&new_cpus, cs, parent); remote = false; - cpuset_force_rebuild(); } /* diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index 400a696a0d212e..4e3fabed52da0e 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -326,8 +326,8 @@ TEST_MATRIX=( . . X3 P2 . 0 A1:0-2,A2:1-2,XA2:3,XA3:3,A3:3 \ A1:P0,A3:P2 3" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ - . . X3 . . 0 A1:0-3,A2:1-3,XA2:3,XA3:3,A3:2-3 \ - A1:P0,A3:P-2" + . . X3 . . 0 A1:0-2,A2:1-2,XA2:3,XA3:3,A3:3,XA3:3 \ + A1:P0,A3:P2 3" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ . X4 . . . 0 A1:0-3,A2:1-3,A3:2-3,XA1:4,XA2:,XA3 \ A1:P0,A3:P-2" From 6da580ec656a5ed135db2cdf574b47635611a4d7 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sun, 30 Mar 2025 17:52:43 -0400 Subject: [PATCH 0020/2924] cgroup/cpuset: Don't allow creation of local partition over a remote one Currently, we don't allow the creation of a remote partition underneath another local or remote partition. However, it is currently possible to create a new local partition with an existing remote partition underneath it if top_cpuset is the parent. However, the current cpuset code does not set the effective exclusive CPUs correctly to account for those that are taken by the remote partition. Changing the code to properly account for those remote partition CPUs under all possible circumstances can be complex. It is much easier to not allow such a configuration which is not that useful. So forbid that by making sure that exclusive_cpus mask doesn't overlap with subpartitions_cpus and invalidate the partition if that happens. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset-internal.h | 1 + kernel/cgroup/cpuset.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/kernel/cgroup/cpuset-internal.h b/kernel/cgroup/cpuset-internal.h index 976a8bc3ff6031..383963e28ac69c 100644 --- a/kernel/cgroup/cpuset-internal.h +++ b/kernel/cgroup/cpuset-internal.h @@ -33,6 +33,7 @@ enum prs_errcode { PERR_CPUSEMPTY, PERR_HKEEPING, PERR_ACCESS, + PERR_REMOTE, }; /* bits in struct cpuset flags field */ diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 0ab63f5974ba82..d973e098797447 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -61,6 +61,7 @@ static const char * const perr_strings[] = { [PERR_CPUSEMPTY] = "cpuset.cpus and cpuset.cpus.exclusive are empty", [PERR_HKEEPING] = "partition config conflicts with housekeeping setup", [PERR_ACCESS] = "Enable partition not permitted", + [PERR_REMOTE] = "Have remote partition underneath", }; /* @@ -2855,6 +2856,19 @@ static int update_prstate(struct cpuset *cs, int new_prs) goto out; } + /* + * We don't support the creation of a new local partition with + * a remote partition underneath it. This unsupported + * setting can happen only if parent is the top_cpuset because + * a remote partition cannot be created underneath an existing + * local or remote partition. + */ + if ((parent == &top_cpuset) && + cpumask_intersects(cs->exclusive_cpus, subpartitions_cpus)) { + err = PERR_REMOTE; + goto out; + } + /* * If parent is valid partition, enable local partiion. * Otherwise, enable a remote partition. From f0a0bd3d23a44a2c5f628e8ca8ad882498ca5aae Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sun, 30 Mar 2025 17:52:44 -0400 Subject: [PATCH 0021/2924] cgroup/cpuset: Code cleanup and comment update Rename partition_xcpus_newstate() to isolated_cpus_update(), update_partition_exclusive() to update_partition_exclusive_flag() and the new_xcpus_state variable to isolcpus_updated to make their meanings more explicit. Also add some comments to further clarify the code. No functional change is expected. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 61 ++++++++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index d973e098797447..6a72d7003875c5 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -65,7 +65,13 @@ static const char * const perr_strings[] = { }; /* - * Exclusive CPUs distributed out to sub-partitions of top_cpuset + * For local partitions, update to subpartitions_cpus & isolated_cpus is done + * in update_parent_effective_cpumask(). For remote partitions, it is done in + * the remote_partition_*() and remote_cpus_update() helpers. + */ +/* + * Exclusive CPUs distributed out to local or remote sub-partitions of + * top_cpuset */ static cpumask_var_t subpartitions_cpus; @@ -1089,9 +1095,14 @@ void cpuset_reset_sched_domains(void) * * Iterate through each task of @cs updating its cpus_allowed to the * effective cpuset's. As this function is called with cpuset_mutex held, - * cpuset membership stays stable. For top_cpuset, task_cpu_possible_mask() - * is used instead of effective_cpus to make sure all offline CPUs are also - * included as hotplug code won't update cpumasks for tasks in top_cpuset. + * cpuset membership stays stable. + * + * For top_cpuset, task_cpu_possible_mask() is used instead of effective_cpus + * to make sure all offline CPUs are also included as hotplug code won't + * update cpumasks for tasks in top_cpuset. + * + * As task_cpu_possible_mask() can be task dependent in arm64, we have to + * do cpu masking per task instead of doing it once for all. */ void cpuset_update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus) { @@ -1151,7 +1162,7 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs, * * Return: 0 if successful, an error code otherwise */ -static int update_partition_exclusive(struct cpuset *cs, int new_prs) +static int update_partition_exclusive_flag(struct cpuset *cs, int new_prs) { bool exclusive = (new_prs > PRS_MEMBER); @@ -1234,12 +1245,12 @@ static void reset_partition_data(struct cpuset *cs) } /* - * partition_xcpus_newstate - Exclusive CPUs state change + * isolated_cpus_update - Update the isolated_cpus mask * @old_prs: old partition_root_state * @new_prs: new partition_root_state * @xcpus: exclusive CPUs with state change */ -static void partition_xcpus_newstate(int old_prs, int new_prs, struct cpumask *xcpus) +static void isolated_cpus_update(int old_prs, int new_prs, struct cpumask *xcpus) { WARN_ON_ONCE(old_prs == new_prs); if (new_prs == PRS_ISOLATED) @@ -1273,8 +1284,8 @@ static bool partition_xcpus_add(int new_prs, struct cpuset *parent, isolcpus_updated = (new_prs != parent->partition_root_state); if (isolcpus_updated) - partition_xcpus_newstate(parent->partition_root_state, new_prs, - xcpus); + isolated_cpus_update(parent->partition_root_state, new_prs, + xcpus); cpumask_andnot(parent->effective_cpus, parent->effective_cpus, xcpus); return isolcpus_updated; @@ -1304,8 +1315,8 @@ static bool partition_xcpus_del(int old_prs, struct cpuset *parent, isolcpus_updated = (old_prs != parent->partition_root_state); if (isolcpus_updated) - partition_xcpus_newstate(old_prs, parent->partition_root_state, - xcpus); + isolated_cpus_update(old_prs, parent->partition_root_state, + xcpus); cpumask_and(xcpus, xcpus, cpu_active_mask); cpumask_or(parent->effective_cpus, parent->effective_cpus, xcpus); @@ -1634,8 +1645,8 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, int old_prs, new_prs; int part_error = PERR_NONE; /* Partition error? */ int subparts_delta = 0; - struct cpumask *xcpus; /* cs effective_xcpus */ int isolcpus_updated = 0; + struct cpumask *xcpus = user_xcpus(cs); bool nocpu; lockdep_assert_held(&cpuset_mutex); @@ -1647,7 +1658,6 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, */ adding = deleting = false; old_prs = new_prs = cs->partition_root_state; - xcpus = user_xcpus(cs); if (cmd == partcmd_invalidate) { if (is_prs_invalid(old_prs)) @@ -1861,7 +1871,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, * CPU lists in cs haven't been updated yet. So defer it to later. */ if ((old_prs != new_prs) && (cmd != partcmd_update)) { - int err = update_partition_exclusive(cs, new_prs); + int err = update_partition_exclusive_flag(cs, new_prs); if (err) return err; @@ -1899,7 +1909,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, update_unbound_workqueue_cpumask(isolcpus_updated); if ((old_prs != new_prs) && (cmd == partcmd_update)) - update_partition_exclusive(cs, new_prs); + update_partition_exclusive_flag(cs, new_prs); if (adding || deleting) { cpuset_update_tasks_cpumask(parent, tmp->addmask); @@ -2829,7 +2839,7 @@ static int update_prstate(struct cpuset *cs, int new_prs) int err = PERR_NONE, old_prs = cs->partition_root_state; struct cpuset *parent = parent_cs(cs); struct tmpmasks tmpmask; - bool new_xcpus_state = false; + bool isolcpus_updated = false; if (old_prs == new_prs) return 0; @@ -2843,7 +2853,7 @@ static int update_prstate(struct cpuset *cs, int new_prs) if (alloc_cpumasks(NULL, &tmpmask)) return -ENOMEM; - err = update_partition_exclusive(cs, new_prs); + err = update_partition_exclusive_flag(cs, new_prs); if (err) goto out; @@ -2884,8 +2894,9 @@ static int update_prstate(struct cpuset *cs, int new_prs) } else if (old_prs && new_prs) { /* * A change in load balance state only, no change in cpumasks. + * Need to update isolated_cpus. */ - new_xcpus_state = true; + isolcpus_updated = true; } else { /* * Switching back to member is always allowed even if it @@ -2909,7 +2920,7 @@ static int update_prstate(struct cpuset *cs, int new_prs) */ if (err) { new_prs = -new_prs; - update_partition_exclusive(cs, new_prs); + update_partition_exclusive_flag(cs, new_prs); } spin_lock_irq(&callback_lock); @@ -2917,14 +2928,18 @@ static int update_prstate(struct cpuset *cs, int new_prs) WRITE_ONCE(cs->prs_err, err); if (!is_partition_valid(cs)) reset_partition_data(cs); - else if (new_xcpus_state) - partition_xcpus_newstate(old_prs, new_prs, cs->effective_xcpus); + else if (isolcpus_updated) + isolated_cpus_update(old_prs, new_prs, cs->effective_xcpus); spin_unlock_irq(&callback_lock); - update_unbound_workqueue_cpumask(new_xcpus_state); + update_unbound_workqueue_cpumask(isolcpus_updated); - /* Force update if switching back to member */ + /* Force update if switching back to member & update effective_xcpus */ update_cpumasks_hier(cs, &tmpmask, !new_prs); + /* A newly created partition must have effective_xcpus set */ + WARN_ON_ONCE(!old_prs && (new_prs > 0) + && cpumask_empty(cs->effective_xcpus)); + /* Update sched domains and load balance flag */ update_partition_sd_lb(cs, old_prs); From 52e039f9e2557f46b083d5d8ca94793ddea44a07 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sun, 30 Mar 2025 17:52:45 -0400 Subject: [PATCH 0022/2924] cgroup/cpuset: Remove unneeded goto in sched_partition_write() and rename it The goto statement in sched_partition_write() is not needed. Remove it and rename sched_partition_write()/sched_partition_show() to cpuset_partition_write()/cpuset_partition_show(). Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 6a72d7003875c5..4921919c87042c 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -3272,7 +3272,7 @@ int cpuset_common_seq_show(struct seq_file *sf, void *v) return ret; } -static int sched_partition_show(struct seq_file *seq, void *v) +static int cpuset_partition_show(struct seq_file *seq, void *v) { struct cpuset *cs = css_cs(seq_css(seq)); const char *err, *type = NULL; @@ -3303,7 +3303,7 @@ static int sched_partition_show(struct seq_file *seq, void *v) return 0; } -static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf, +static ssize_t cpuset_partition_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct cpuset *cs = css_cs(of_css(of)); @@ -3324,11 +3324,8 @@ static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf, css_get(&cs->css); cpus_read_lock(); mutex_lock(&cpuset_mutex); - if (!is_cpuset_online(cs)) - goto out_unlock; - - retval = update_prstate(cs, val); -out_unlock: + if (is_cpuset_online(cs)) + retval = update_prstate(cs, val); mutex_unlock(&cpuset_mutex); cpus_read_unlock(); css_put(&cs->css); @@ -3372,8 +3369,8 @@ static struct cftype dfl_files[] = { { .name = "cpus.partition", - .seq_show = sched_partition_show, - .write = sched_partition_write, + .seq_show = cpuset_partition_show, + .write = cpuset_partition_write, .private = FILE_PARTITION_ROOT, .flags = CFTYPE_NOT_ON_ROOT, .file_offset = offsetof(struct cpuset, partition_file), From 65046b5e0ad71990b5a0256710cf050d2d2ab3dd Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sun, 30 Mar 2025 17:52:46 -0400 Subject: [PATCH 0023/2924] selftest/cgroup: Update test_cpuset_prs.sh to use | as effective CPUs and state separator Currently, ',' is used as the cgroup separator of the expected effective CPUs and partition root states in the test matrix. However, ',' can be part of the output of the cpuset.cpus*.effective and cpuset.cpus.isolated files. Change the separator to '|' so that ',' can appear as part of the expected values. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- .../selftests/cgroup/test_cpuset_prs.sh | 236 +++++++++--------- 1 file changed, 118 insertions(+), 118 deletions(-) diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index 4e3fabed52da0e..f11f347129d871 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -207,130 +207,130 @@ TEST_MATRIX=( " C0-1:P1 . . C2-3 S+:C4-5 . . . 0 A1:4-5" " C0-1 . . C2-3:P1 . . . C2 0 " " C0-1 . . C2-3:P1 . . . C4-5 0 B1:4-5" - "C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1,A2:2-3" - "C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1,A2:2-3" - "C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:,A2:3 A1:P1,A2:P1" - "C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3,A2:3 A1:P1,A2:P0" - "C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4,A2:2" - "C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:,B1:0-2 A1:P1,A2:P1" - "$SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1" + "C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1|A2:2-3" + "C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1|A2:2-3" + "C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:|A2:3 A1:P1|A2:P1" + "C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3|A2:3 A1:P1|A2:P0" + "C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4|A2:2" + "C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:|B1:0-2 A1:P1|A2:P1" + "$SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:|A2:2|A3:3 A1:P1|A2:P1|A3:P1" # CPU offlining cases: - " C0-1 . . C2-3 S+ C4-5 . O2=0 0 A1:0-1,B1:3" - "C0-3:P1:S+ C2-3:P1 . . O2=0 . . . 0 A1:0-1,A2:3" - "C0-3:P1:S+ C2-3:P1 . . O2=0 O2=1 . . 0 A1:0-1,A2:2-3" - "C0-3:P1:S+ C2-3:P1 . . O1=0 . . . 0 A1:0,A2:2-3" - "C0-3:P1:S+ C2-3:P1 . . O1=0 O1=1 . . 0 A1:0-1,A2:2-3" - "C2-3:P1:S+ C3:P1 . . O3=0 O3=1 . . 0 A1:2,A2:3 A1:P1,A2:P1" - "C2-3:P1:S+ C3:P2 . . O3=0 O3=1 . . 0 A1:2,A2:3 A1:P1,A2:P2" - "C2-3:P1:S+ C3:P1 . . O2=0 O2=1 . . 0 A1:2,A2:3 A1:P1,A2:P1" - "C2-3:P1:S+ C3:P2 . . O2=0 O2=1 . . 0 A1:2,A2:3 A1:P1,A2:P2" - "C2-3:P1:S+ C3:P1 . . O2=0 . . . 0 A1:,A2:3 A1:P1,A2:P1" - "C2-3:P1:S+ C3:P1 . . O3=0 . . . 0 A1:2,A2: A1:P1,A2:P1" - "C2-3:P1:S+ C3:P1 . . T:O2=0 . . . 0 A1:3,A2:3 A1:P1,A2:P-1" - "C2-3:P1:S+ C3:P1 . . . T:O3=0 . . 0 A1:2,A2:2 A1:P1,A2:P-1" - "$SETUP_A123_PARTITIONS . O1=0 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . O2=0 . . . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . O3=0 . . . 0 A1:1,A2:2,A3: A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . T:O1=0 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" - "$SETUP_A123_PARTITIONS . . T:O2=0 . . 0 A1:1,A2:3,A3:3 A1:P1,A2:P1,A3:P-1" - "$SETUP_A123_PARTITIONS . . . T:O3=0 . 0 A1:1,A2:2,A3:2 A1:P1,A2:P1,A3:P-1" - "$SETUP_A123_PARTITIONS . T:O1=0 O1=1 . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . . T:O2=0 O2=1 . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . . . T:O3=0 O3=1 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O1=1 . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O2=1 . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" + " C0-1 . . C2-3 S+ C4-5 . O2=0 0 A1:0-1|B1:3" + "C0-3:P1:S+ C2-3:P1 . . O2=0 . . . 0 A1:0-1|A2:3" + "C0-3:P1:S+ C2-3:P1 . . O2=0 O2=1 . . 0 A1:0-1|A2:2-3" + "C0-3:P1:S+ C2-3:P1 . . O1=0 . . . 0 A1:0|A2:2-3" + "C0-3:P1:S+ C2-3:P1 . . O1=0 O1=1 . . 0 A1:0-1|A2:2-3" + "C2-3:P1:S+ C3:P1 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P1" + "C2-3:P1:S+ C3:P2 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P2" + "C2-3:P1:S+ C3:P1 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P1" + "C2-3:P1:S+ C3:P2 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P2" + "C2-3:P1:S+ C3:P1 . . O2=0 . . . 0 A1:|A2:3 A1:P1|A2:P1" + "C2-3:P1:S+ C3:P1 . . O3=0 . . . 0 A1:2|A2: A1:P1|A2:P1" + "C2-3:P1:S+ C3:P1 . . T:O2=0 . . . 0 A1:3|A2:3 A1:P1|A2:P-1" + "C2-3:P1:S+ C3:P1 . . . T:O3=0 . . 0 A1:2|A2:2 A1:P1|A2:P-1" + "$SETUP_A123_PARTITIONS . O1=0 . . . 0 A1:|A2:2|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . O2=0 . . . 0 A1:1|A2:|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . O3=0 . . . 0 A1:1|A2:2|A3: A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . T:O1=0 . . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1" + "$SETUP_A123_PARTITIONS . . T:O2=0 . . 0 A1:1|A2:3|A3:3 A1:P1|A2:P1|A3:P-1" + "$SETUP_A123_PARTITIONS . . . T:O3=0 . 0 A1:1|A2:2|A3:2 A1:P1|A2:P1|A3:P-1" + "$SETUP_A123_PARTITIONS . T:O1=0 O1=1 . . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . . T:O2=0 O2=1 . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . . . T:O3=0 O3=1 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O1=1 . 0 A1:1|A2:|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O2=1 . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1" # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- # # Remote partition and cpuset.cpus.exclusive tests # - " C0-3:S+ C1-3:S+ C2-3 . X2-3 . . . 0 A1:0-3,A2:1-3,A3:2-3,XA1:2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1,A2:2-3,A3:2-3 A1:P0,A2:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2,A2:3,A3:3 A1:P0,A2:P2 3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-3,A2:1-3,A3:2-3,B1:2-3 A1:P0,A3:P0,B1:P-2" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:2-3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1|A2:2-3|A3:2-3 A1:P0|A2:P2 2-3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2|A2:3|A3:3 A1:P0|A2:P2 3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" + " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-3|A2:1-3|A3:2-3|B1:2-3 A1:P0|A3:P0|B1:P-2" " C0-3:S+ C1-3:S+ C2-3 C4-5 . . . P2 0 B1:4-5 B1:P2 4-5" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4" - " C0-3:S+ C1-3:S+ C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1,A3:2-3 A2:P2,A3:P2 1-3" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3,B1:4-5 A3:P2,B1:P2 2-5" - " C4:X0-3:S+ X1-3:S+ X2-3 . . P2 . . 0 A1:4,A2:1-3,A3:1-3 A2:P2 1-3" - " C4:X0-3:S+ X1-3:S+ X2-3 . . . P2 . 0 A1:4,A2:4,A3:2-3 A3:P2 2-3" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4" + " C0-3:S+ C1-3:S+ C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1|A3:2-3 A2:P2|A3:P2 1-3" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3|B1:4-5 A3:P2|B1:P2 2-5" + " C4:X0-3:S+ X1-3:S+ X2-3 . . P2 . . 0 A1:4|A2:1-3|A3:1-3 A2:P2 1-3" + " C4:X0-3:S+ X1-3:S+ X2-3 . . . P2 . 0 A1:4|A2:4|A3:2-3 A3:P2 2-3" # Nested remote/local partition tests - " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4-5 \ - A1:P0,A2:P1,A3:P2,B1:P1 2-3" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4 \ - A1:P0,A2:P1,A3:P2,B1:P1 2-4,2-3" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 . P1 0 A1:0-1,A2:2-3,A3:2-3,B1:4 \ - A1:P0,A2:P1,A3:P0,B1:P1" - " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:2,A3:3,B1:4 \ - A1:P0,A2:P1,A3:P2,B1:P1 2-4,3" - " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1,A2:2-3,A3:4 \ - A1:P0,A2:P2,A3:P1 2-4,2-3" - " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X3-4:P1 . 0 A1:0-1,A2:2,A3:3-4 \ - A1:P0,A2:P2,A3:P1 2" + " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4-5 \ + A1:P0|A2:P1|A3:P2|B1:P1 2-3" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4 \ + A1:P0|A2:P1|A3:P2|B1:P1 2-4|2-3" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 . P1 0 A1:0-1|A2:2-3|A3:2-3|B1:4 \ + A1:P0|A2:P1|A3:P0|B1:P1" + " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:2|A3:3|B1:4 \ + A1:P0|A2:P1|A3:P2|B1:P1 2-4|3" + " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1|A2:2-3|A3:4 \ + A1:P0|A2:P2|A3:P1 2-4|2-3" + " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X3-4:P1 . 0 A1:0-1|A2:2|A3:3-4 \ + A1:P0|A2:P2|A3:P1 2" " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ - . . X5 . . 0 A1:0-4,A2:1-4,A3:2-4 \ - A1:P0,A2:P-2,A3:P-1" + . . X5 . . 0 A1:0-4|A2:1-4|A3:2-4 \ + A1:P0|A2:P-2|A3:P-1" " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ - . . . X1 . 0 A1:0-1,A2:2-4,A3:2-4 \ - A1:P0,A2:P2,A3:P-1 2-4" + . . . X1 . 0 A1:0-1|A2:2-4|A3:2-4 \ + A1:P0|A2:P2|A3:P-1 2-4" # Remote partition offline tests - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1,A2:1,A3:3 A1:P0,A3:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3" - " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2,A2:1-2,A3: A1:P0,A3:P2 3" - " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2,A2:1-2,A3:1-2 A1:P0,A3:P-2 3," + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1|A2:1|A3:3 A1:P0|A3:P2 2-3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" + " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2|A2:1-2|A3: A1:P0|A3:P2 3" + " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2|A2:1-2|A3:1-2 A1:P0|A3:P-2 3|" # An invalidated remote partition cannot self-recover from hotplug - " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3,A2:1-3,A3:2 A1:P0,A3:P-2" + " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3|A2:1-3|A3:2 A1:P0|A3:P-2" # cpus.exclusive.effective clearing test - " C0-3:S+ C1-3:S+ C2 . X2-3:X . . . 0 A1:0-3,A2:1-3,A3:2,XA1:" + " C0-3:S+ C1-3:S+ C2 . X2-3:X . . . 0 A1:0-3|A2:1-3|A3:2|XA1:" # Invalid to valid remote partition transition test - " C0-3:S+ C1-3 . . . X3:P2 . . 0 A1:0-3,A2:1-3,XA2: A2:P-2" + " C0-3:S+ C1-3 . . . X3:P2 . . 0 A1:0-3|A2:1-3|XA2: A2:P-2" " C0-3:S+ C1-3:X3:P2 - . . X2-3 P2 . . 0 A1:0-2,A2:3,XA2:3 A2:P2 3" + . . X2-3 P2 . . 0 A1:0-2|A2:3|XA2:3 A2:P2 3" # Invalid to valid local partition direct transition tests - " C1-3:S+:P2 X4:P2 . . . . . . 0 A1:1-3,XA1:1-3,A2:1-3:XA2: A1:P2,A2:P-2 1-3" - " C1-3:S+:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2,XA1:1-3,A2:3:XA2:3 A1:P2,A2:P2 1-3" - " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4,B1:4-6 A1:P-2,B1:P0" - " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3,B1:4-6 A1:P2,B1:P0 0-3" - " C0-3:P2 . . C3-5:C4-5 . . . . 0 A1:0-3,B1:4-5 A1:P2,B1:P0 0-3" + " C1-3:S+:P2 X4:P2 . . . . . . 0 A1:1-3|XA1:1-3|A2:1-3:XA2: A1:P2|A2:P-2 1-3" + " C1-3:S+:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2|XA1:1-3|A2:3:XA2:3 A1:P2|A2:P2 1-3" + " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4|B1:4-6 A1:P-2|B1:P0" + " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3|B1:4-6 A1:P2|B1:P0 0-3" + " C0-3:P2 . . C3-5:C4-5 . . . . 0 A1:0-3|B1:4-5 A1:P2|B1:P0 0-3" # Local partition invalidation tests " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ - . . . . . 0 A1:1,A2:2,A3:3 A1:P2,A2:P2,A3:P2 1-3" + . . . . . 0 A1:1|A2:2|A3:3 A1:P2|A2:P2|A3:P2 1-3" " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ - . . X4 . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3" + . . X4 . . 0 A1:1-3|A2:1-3|A3:2-3|XA2:|XA3: A1:P2|A2:P-2|A3:P-2 1-3" " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ - . . C4:X . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3" + . . C4:X . . 0 A1:1-3|A2:1-3|A3:2-3|XA2:|XA3: A1:P2|A2:P-2|A3:P-2 1-3" # Local partition CPU change tests - " C0-5:S+:P2 C4-5:S+:P1 . . . C3-5 . . 0 A1:0-2,A2:3-5 A1:P2,A2:P1 0-2" - " C0-5:S+:P2 C4-5:S+:P1 . . C1-5 . . . 0 A1:1-3,A2:4-5 A1:P2,A2:P1 1-3" + " C0-5:S+:P2 C4-5:S+:P1 . . . C3-5 . . 0 A1:0-2|A2:3-5 A1:P2|A2:P1 0-2" + " C0-5:S+:P2 C4-5:S+:P1 . . C1-5 . . . 0 A1:1-3|A2:4-5 A1:P2|A2:P1 1-3" # cpus_allowed/exclusive_cpus update tests " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ - . X:C4 . P2 . 0 A1:4,A2:4,XA2:,XA3:,A3:4 \ - A1:P0,A3:P-2" + . X:C4 . P2 . 0 A1:4|A2:4|XA2:|XA3:|A3:4 \ + A1:P0|A3:P-2" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ - . X1 . P2 . 0 A1:0-3,A2:1-3,XA1:1,XA2:,XA3:,A3:2-3 \ - A1:P0,A3:P-2" + . X1 . P2 . 0 A1:0-3|A2:1-3|XA1:1|XA2:|XA3:|A3:2-3 \ + A1:P0|A3:P-2" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ - . . X3 P2 . 0 A1:0-2,A2:1-2,XA2:3,XA3:3,A3:3 \ - A1:P0,A3:P2 3" + . . X3 P2 . 0 A1:0-2|A2:1-2|XA2:3|XA3:3|A3:3 \ + A1:P0|A3:P2 3" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ - . . X3 . . 0 A1:0-2,A2:1-2,XA2:3,XA3:3,A3:3,XA3:3 \ - A1:P0,A3:P2 3" + . . X3 . . 0 A1:0-2|A2:1-2|XA2:3|XA3:3|A3:3|XA3:3 \ + A1:P0|A3:P2 3" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ - . X4 . . . 0 A1:0-3,A2:1-3,A3:2-3,XA1:4,XA2:,XA3 \ - A1:P0,A3:P-2" + . X4 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:4|XA2:|XA3 \ + A1:P0|A3:P-2" # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- @@ -339,68 +339,68 @@ TEST_MATRIX=( # # Adding CPUs to partition root that are not in parent's # cpuset.cpus is allowed, but those extra CPUs are ignored. - "C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:,A2:2-3 A1:P1,A2:P1" + "C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:|A2:2-3 A1:P1|A2:P1" # Taking away all CPUs from parent or itself if there are tasks # will make the partition invalid. - "C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3,A2:2-3 A1:P1,A2:P-1" - " C3:P1:S+ C3 . . T P1 . . 0 A1:3,A2:3 A1:P1,A2:P-1" - "$SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" - "$SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" + "C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3|A2:2-3 A1:P1|A2:P-1" + " C3:P1:S+ C3 . . T P1 . . 0 A1:3|A2:3 A1:P1|A2:P-1" + "$SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1" + "$SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1" # Changing a partition root to member makes child partitions invalid - "C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3,A2:3 A1:P0,A2:P-1" - "$SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P0,A3:P-1" + "C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3|A2:3 A1:P0|A2:P-1" + "$SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P0|A3:P-1" # cpuset.cpus can contains cpus not in parent's cpuset.cpus as long # as they overlap. - "C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2,A2:3 A1:P1,A2:P1" + "C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2|A2:3 A1:P1|A2:P1" # Deletion of CPUs distributed to child cgroup is allowed. - "C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5,A2:4-5" + "C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5|A2:4-5" # To become a valid partition root, cpuset.cpus must overlap parent's # cpuset.cpus. - " C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1,A2:0-1 A1:P1,A2:P-1" + " C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1|A2:0-1 A1:P1|A2:P-1" # Enabling partition with child cpusets is allowed - " C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1,A2:1 A1:P1" + " C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1|A2:1 A1:P1" - # A partition root with non-partition root parent is invalid, but it + # A partition root with non-partition root parent is invalid| but it # can be made valid if its parent becomes a partition root too. - " C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1,A2:1 A1:P0,A2:P-2" - " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0,A2:1 A1:P1,A2:P2" + " C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1|A2:1 A1:P0|A2:P-2" + " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0|A2:1 A1:P1|A2:P2" # A non-exclusive cpuset.cpus change will invalidate partition and its siblings - " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P0" - " C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P-1" - " C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P0,B1:P-1" + " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P-1|B1:P0" + " C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P-1|B1:P-1" + " C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P0|B1:P-1" # cpuset.cpus can overlap with sibling cpuset.cpus.exclusive but not subsumed by it - " C0-3 . . C4-5 X5 . . . 0 A1:0-3,B1:4-5" + " C0-3 . . C4-5 X5 . . . 0 A1:0-3|B1:4-5" # Child partition root that try to take all CPUs from parent partition # with tasks will remain invalid. - " C1-4:P1:S+ P1 . . . . . . 0 A1:1-4,A2:1-4 A1:P1,A2:P-1" - " C1-4:P1:S+ P1 . . . C1-4 . . 0 A1,A2:1-4 A1:P1,A2:P1" - " C1-4:P1:S+ P1 . . T C1-4 . . 0 A1:1-4,A2:1-4 A1:P1,A2:P-1" + " C1-4:P1:S+ P1 . . . . . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1" + " C1-4:P1:S+ P1 . . . C1-4 . . 0 A1|A2:1-4 A1:P1|A2:P1" + " C1-4:P1:S+ P1 . . T C1-4 . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1" # Clearing of cpuset.cpus with a preset cpuset.cpus.exclusive shouldn't # affect cpuset.cpus.exclusive.effective. - " C1-4:X3:S+ C1:X3 . . . C . . 0 A2:1-4,XA2:3" + " C1-4:X3:S+ C1:X3 . . . C . . 0 A2:1-4|XA2:3" # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- # Failure cases: # A task cannot be added to a partition with no cpu - "C2-3:P1:S+ C3:P1 . . O2=0:T . . . 1 A1:,A2:3 A1:P1,A2:P1" + "C2-3:P1:S+ C3:P1 . . O2=0:T . . . 1 A1:|A2:3 A1:P1|A2:P1" # Changes to cpuset.cpus.exclusive that violate exclusivity rule is rejected - " C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3,B1:4-5" + " C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3|B1:4-5" # cpuset.cpus cannot be a subset of sibling cpuset.cpus.exclusive - " C0-3 . . C4-5 X3-5 . . . 1 A1:0-3,B1:4-5" + " C0-3 . . C4-5 X3-5 . . . 1 A1:0-3|B1:4-5" ) # @@ -567,12 +567,12 @@ dump_states() # # Check effective cpus -# $1 - check string, format: :[,:]* +# $1 - check string, format: :[|:]* # check_effective_cpus() { CHK_STR=$1 - for CHK in $(echo $CHK_STR | sed -e "s/,/ /g") + for CHK in $(echo $CHK_STR | sed -e "s/|/ /g") do set -- $(echo $CHK | sed -e "s/:/ /g") CGRP=$1 @@ -593,12 +593,12 @@ check_effective_cpus() # # Check cgroup states -# $1 - check string, format: :[,:]* +# $1 - check string, format: :[|:]* # check_cgroup_states() { CHK_STR=$1 - for CHK in $(echo $CHK_STR | sed -e "s/,/ /g") + for CHK in $(echo $CHK_STR | sed -e "s/|/ /g") do set -- $(echo $CHK | sed -e "s/:/ /g") CGRP=$1 @@ -674,9 +674,9 @@ check_isolcpus() then EXPECT_VAL= EXPECT_VAL2= - elif [[ $(expr $EXPECT_VAL : ".*,.*") > 0 ]] + elif [[ $(expr $EXPECT_VAL : ".*|.*") > 0 ]] then - set -- $(echo $EXPECT_VAL | sed -e "s/,/ /g") + set -- $(echo $EXPECT_VAL | sed -e "s/|/ /g") EXPECT_VAL=$1 EXPECT_VAL2=$2 else From b2b2b4d058b776be0168b4ea46ed84cfb0f884e9 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sun, 30 Mar 2025 17:52:47 -0400 Subject: [PATCH 0024/2924] selftest/cgroup: Clean up and restructure test_cpuset_prs.sh Cleaning up the test_cpuset_prs.sh script and restructure some of the functions so that a new test matrix with a different cgroup directory structure can be added in the next patch. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- .../selftests/cgroup/test_cpuset_prs.sh | 257 +++++++++++------- 1 file changed, 156 insertions(+), 101 deletions(-) diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index f11f347129d871..d99412e7d1965f 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -88,22 +88,30 @@ echo "" > test/cpuset.cpus # If isolated CPUs have been reserved at boot time (as shown in # cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-8 # that will be used by this script for testing purpose. If not, some of -# the tests may fail incorrectly. These pre-isolated CPUs should stay in -# an isolated state throughout the testing process for now. +# the tests may fail incorrectly. Wait a bit and retry again just in case +# these isolated CPUs are leftover from previous run and have just been +# cleaned up earlier in this script. +# +# These pre-isolated CPUs should stay in an isolated state throughout the +# testing process for now. # BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated) +[[ -n "$BOOT_ISOLCPUS" ]] && { + sleep 0.5 + BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated) +} if [[ -n "$BOOT_ISOLCPUS" ]] then [[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 8 ]] && skip_test "Pre-isolated CPUs ($BOOT_ISOLCPUS) overlap CPUs to be tested" echo "Pre-isolated CPUs: $BOOT_ISOLCPUS" fi + cleanup() { online_cpus cd $CGROUP2 - rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1 - rmdir test > /dev/null 2>&1 + rmdir A1/A2/A3 A1/A2 A1 B1 test/A1 test/B1 test > /dev/null 2>&1 [[ -n "$SCHED_DEBUG" ]] && echo "$SCHED_DEBUG" > /sys/kernel/debug/sched/verbose } @@ -173,14 +181,22 @@ test_add_proc() # # Cgroup test hierarchy # -# root -- A1 -- A2 -- A3 -# +- B1 +# root +# | +# +------+------+ +# | | +# A1 B1 +# | +# A2 +# | +# A3 # # P = set cpus.partition (0:member, 1:root, 2:isolated) # C = add cpu-list to cpuset.cpus # X = add cpu-list to cpuset.cpus.exclusive # S

= use prefix in subtree_control # T = put a task into cgroup +# CX = add cpu-list to both cpuset.cpus and cpuset.cpus.exclusive # O= = Write to CPU online file of # # ECPUs - effective CPUs of cpusets @@ -453,25 +469,26 @@ set_ctrl_state() PFILE=$CGRP/cpuset.cpus.partition CFILE=$CGRP/cpuset.cpus XFILE=$CGRP/cpuset.cpus.exclusive - S=$(expr substr $CMD 1 1) - if [[ $S = S ]] - then - PREFIX=${CMD#?} + case $CMD in + S*) PREFIX=${CMD#?} COMM="echo ${PREFIX}${CTRL} > $SFILE" eval $COMM $REDIRECT - elif [[ $S = X ]] - then + ;; + X*) CPUS=${CMD#?} COMM="echo $CPUS > $XFILE" eval $COMM $REDIRECT - elif [[ $S = C ]] - then - CPUS=${CMD#?} + ;; + CX*) + CPUS=${CMD#??} + COMM="echo $CPUS > $CFILE; echo $CPUS > $XFILE" + eval $COMM $REDIRECT + ;; + C*) CPUS=${CMD#?} COMM="echo $CPUS > $CFILE" eval $COMM $REDIRECT - elif [[ $S = P ]] - then - VAL=${CMD#?} + ;; + P*) VAL=${CMD#?} case $VAL in 0) VAL=member ;; @@ -486,15 +503,17 @@ set_ctrl_state() esac COMM="echo $VAL > $PFILE" eval $COMM $REDIRECT - elif [[ $S = O ]] - then - VAL=${CMD#?} + ;; + O*) VAL=${CMD#?} write_cpu_online $VAL - elif [[ $S = T ]] - then - COMM="echo 0 > $TFILE" + ;; + T*) COMM="echo 0 > $TFILE" eval $COMM $REDIRECT - fi + ;; + *) echo "Unknown command: $CMD" + exit 1 + ;; + esac RET=$? [[ $RET -ne 0 ]] && { [[ -n "$SHOWERR" ]] && { @@ -532,21 +551,18 @@ online_cpus() } # -# Return 1 if the list of effective cpus isn't the same as the initial list. +# Remove all the test cgroup directories # reset_cgroup_states() { echo 0 > $CGROUP2/cgroup.procs online_cpus - rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1 - pause 0.02 - set_ctrl_state . R- - pause 0.01 + rmdir $RESET_LIST > /dev/null 2>&1 } dump_states() { - for DIR in . A1 A1/A2 A1/A2/A3 B1 + for DIR in $CGROUP_LIST do CPUS=$DIR/cpuset.cpus ECPUS=$DIR/cpuset.cpus.effective @@ -565,6 +581,21 @@ dump_states() done } +# +# Set the actual cgroup directory into $CGRP_DIR +# $1 - cgroup name +# +set_cgroup_dir() +{ + CGRP_DIR=$1 + [[ $CGRP_DIR = A2 ]] && CGRP_DIR=A1/A2 + [[ $CGRP_DIR = A3 ]] && CGRP_DIR=A1/A2/A3 + [[ $CGRP_DIR = c11 ]] && CGRP_DIR=p1/c11 + [[ $CGRP_DIR = c12 ]] && CGRP_DIR=p1/c12 + [[ $CGRP_DIR = c21 ]] && CGRP_DIR=p2/c21 + [[ $CGRP_DIR = c22 ]] && CGRP_DIR=p2/c22 +} + # # Check effective cpus # $1 - check string, format: :[|:]* @@ -576,7 +607,8 @@ check_effective_cpus() do set -- $(echo $CHK | sed -e "s/:/ /g") CGRP=$1 - CPUS=$2 + EXPECTED_CPUS=$2 + ACTUAL_CPUS= if [[ $CGRP = X* ]] then CGRP=${CGRP#X} @@ -584,10 +616,10 @@ check_effective_cpus() else FILE=cpuset.cpus.effective fi - [[ $CGRP = A2 ]] && CGRP=A1/A2 - [[ $CGRP = A3 ]] && CGRP=A1/A2/A3 - [[ -e $CGRP/$FILE ]] || return 1 - [[ $CPUS = $(cat $CGRP/$FILE) ]] || return 1 + set_cgroup_dir $CGRP + [[ -e $CGRP_DIR/$FILE ]] || return 1 + ACTUAL_CPUS=$(cat $CGRP_DIR/$FILE) + [[ $EXPECTED_CPUS = $ACTUAL_CPUS ]] || return 1 done } @@ -602,23 +634,21 @@ check_cgroup_states() do set -- $(echo $CHK | sed -e "s/:/ /g") CGRP=$1 - CGRP_DIR=$CGRP - STATE=$2 + EXPECTED_STATE=$2 FILE= - EVAL=$(expr substr $STATE 2 2) - [[ $CGRP = A2 ]] && CGRP_DIR=A1/A2 - [[ $CGRP = A3 ]] && CGRP_DIR=A1/A2/A3 + EVAL=$(expr substr $EXPECTED_STATE 2 2) - case $STATE in + set_cgroup_dir $CGRP + case $EXPECTED_STATE in P*) FILE=$CGRP_DIR/cpuset.cpus.partition ;; - *) echo "Unknown state: $STATE!" + *) echo "Unknown state: $EXPECTED_STATE!" exit 1 ;; esac - VAL=$(cat $FILE) + ACTUAL_STATE=$(cat $FILE) - case "$VAL" in + case "$ACTUAL_STATE" in member) VAL=0 ;; root) VAL=1 @@ -642,7 +672,7 @@ check_cgroup_states() [[ $VAL -eq 1 && $VERBOSE -gt 0 ]] && { DOMS=$(cat $CGRP_DIR/cpuset.cpus.effective) [[ -n "$DOMS" ]] && - echo " [$CGRP] sched-domain: $DOMS" > $CONSOLE + echo " [$CGRP_DIR] sched-domain: $DOMS" > $CONSOLE } done return 0 @@ -665,22 +695,22 @@ check_cgroup_states() # check_isolcpus() { - EXPECT_VAL=$1 - ISOLCPUS= + EXPECTED_ISOLCPUS=$1 + ISCPUS=${CGROUP2}/cpuset.cpus.isolated + ISOLCPUS=$(cat $ISCPUS) LASTISOLCPU= SCHED_DOMAINS=/sys/kernel/debug/sched/domains - ISCPUS=${CGROUP2}/cpuset.cpus.isolated - if [[ $EXPECT_VAL = . ]] + if [[ $EXPECTED_ISOLCPUS = . ]] then - EXPECT_VAL= - EXPECT_VAL2= - elif [[ $(expr $EXPECT_VAL : ".*|.*") > 0 ]] + EXPECTED_ISOLCPUS= + EXPECTED_SDOMAIN= + elif [[ $(expr $EXPECTED_ISOLCPUS : ".*|.*") > 0 ]] then - set -- $(echo $EXPECT_VAL | sed -e "s/|/ /g") - EXPECT_VAL=$1 - EXPECT_VAL2=$2 + set -- $(echo $EXPECTED_ISOLCPUS | sed -e "s/|/ /g") + EXPECTED_ISOLCPUS=$2 + EXPECTED_SDOMAIN=$1 else - EXPECT_VAL2=$EXPECT_VAL + EXPECTED_SDOMAIN=$EXPECTED_ISOLCPUS fi # @@ -689,20 +719,21 @@ check_isolcpus() # to make appending those CPUs easier. # [[ -n "$BOOT_ISOLCPUS" ]] && { - EXPECT_VAL=${EXPECT_VAL:+${EXPECT_VAL},}${BOOT_ISOLCPUS} - EXPECT_VAL2=${EXPECT_VAL2:+${EXPECT_VAL2},}${BOOT_ISOLCPUS} + EXPECTED_ISOLCPUS=${EXPECTED_ISOLCPUS:+${EXPECTED_ISOLCPUS},}${BOOT_ISOLCPUS} + EXPECTED_SDOMAIN=${EXPECTED_SDOMAIN:+${EXPECTED_SDOMAIN},}${BOOT_ISOLCPUS} } # # Check cpuset.cpus.isolated cpumask # - [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && { + [[ "$EXPECTED_ISOLCPUS" != "$ISOLCPUS" ]] && { # Take a 50ms pause and try again pause 0.05 ISOLCPUS=$(cat $ISCPUS) } - [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && return 1 + [[ "$EXPECTED_ISOLCPUS" != "$ISOLCPUS" ]] && return 1 ISOLCPUS= + EXPECTED_ISOLCPUS=$EXPECTED_SDOMAIN # # Use the sched domain in debugfs to check isolated CPUs, if available @@ -736,7 +767,7 @@ check_isolcpus() done [[ "$ISOLCPUS" = *- ]] && ISOLCPUS=${ISOLCPUS}$LASTISOLCPU - [[ "$EXPECT_VAL" = "$ISOLCPUS" ]] + [[ "$EXPECTED_SDOMAIN" = "$ISOLCPUS" ]] } test_fail() @@ -773,6 +804,63 @@ null_isolcpus_check() exit 1 } +# +# Check state transition test result +# $1 - Test number +# $2 - Expected effective CPU values +# $3 - Expected partition states +# $4 - Expected isolated CPUs +# +check_test_results() +{ + _NR=$1 + _ECPUS="$2" + _PSTATES="$3" + _ISOLCPUS="$4" + + [[ -n "$_ECPUS" && "$_ECPUS" != . ]] && { + check_effective_cpus $_ECPUS + [[ $? -ne 0 ]] && test_fail $_NR "effective CPU" \ + "Cgroup $CGRP: expected $EXPECTED_CPUS, got $ACTUAL_CPUS" + } + + [[ -n "$_PSTATES" && "$_PSTATES" != . ]] && { + check_cgroup_states $_PSTATES + [[ $? -ne 0 ]] && test_fail $_NR states \ + "Cgroup $CGRP: expected $EXPECTED_STATE, got $ACTUAL_STATE" + } + + # Compare the expected isolated CPUs with the actual ones, + # if available + [[ -n "$_ISOLCPUS" ]] && { + check_isolcpus $_ISOLCPUS + [[ $? -ne 0 ]] && { + [[ -n "$BOOT_ISOLCPUS" ]] && _ISOLCPUS=${_ISOLCPUS},${BOOT_ISOLCPUS} + test_fail $_NR "isolated CPU" \ + "Expect $_ISOLCPUS, get $ISOLCPUS instead" + } + } + reset_cgroup_states + # + # Check to see if effective cpu list changes + # + _NEWLIST=$(cat $CGROUP2/cpuset.cpus.effective) + RETRY=0 + while [[ $_NEWLIST != $CPULIST && $RETRY -lt 8 ]] + do + # Wait a bit longer & recheck a few times + pause 0.02 + ((RETRY++)) + _NEWLIST=$(cat $CGROUP2/cpuset.cpus.effective) + done + [[ $_NEWLIST != $CPULIST ]] && { + echo "Effective cpus changed to $_NEWLIST after test $_NR!" + exit 1 + } + null_isolcpus_check + [[ $VERBOSE -gt 0 ]] && echo "Test $I done." +} + # # Run cpuset state transition test # $1 - test matrix name @@ -785,6 +873,8 @@ run_state_test() { TEST=$1 CONTROLLER=cpuset + CGROUP_LIST=". A1 A1/A2 A1/A2/A3 B1" + RESET_LIST="A1/A2/A3 A1/A2 A1 B1" I=0 eval CNT="\${#$TEST[@]}" @@ -824,45 +914,7 @@ run_state_test() [[ $RETVAL -ne $RESULT ]] && test_fail $I result - [[ -n "$ECPUS" && "$ECPUS" != . ]] && { - check_effective_cpus $ECPUS - [[ $? -ne 0 ]] && test_fail $I "effective CPU" - } - - [[ -n "$STATES" && "$STATES" != . ]] && { - check_cgroup_states $STATES - [[ $? -ne 0 ]] && test_fail $I states - } - - # Compare the expected isolated CPUs with the actual ones, - # if available - [[ -n "$ICPUS" ]] && { - check_isolcpus $ICPUS - [[ $? -ne 0 ]] && { - [[ -n "$BOOT_ISOLCPUS" ]] && ICPUS=${ICPUS},${BOOT_ISOLCPUS} - test_fail $I "isolated CPU" \ - "Expect $ICPUS, get $ISOLCPUS instead" - } - } - reset_cgroup_states - # - # Check to see if effective cpu list changes - # - NEWLIST=$(cat cpuset.cpus.effective) - RETRY=0 - while [[ $NEWLIST != $CPULIST && $RETRY -lt 8 ]] - do - # Wait a bit longer & recheck a few times - pause 0.02 - ((RETRY++)) - NEWLIST=$(cat cpuset.cpus.effective) - done - [[ $NEWLIST != $CPULIST ]] && { - echo "Effective cpus changed to $NEWLIST after test $I!" - exit 1 - } - null_isolcpus_check - [[ $VERBOSE -gt 0 ]] && echo "Test $I done." + check_test_results $I "$ECPUS" "$STATES" "$ICPUS" ((I++)) done echo "All $I tests of $TEST PASSED." @@ -932,6 +984,7 @@ test_isolated() echo $$ > $CGROUP2/cgroup.procs [[ -d A1 ]] && rmdir A1 null_isolcpus_check + pause 0.05 } # @@ -997,6 +1050,8 @@ test_inotify() else echo "Inotify test PASSED" fi + echo member > cpuset.cpus.partition + echo "" > cpuset.cpus } trap cleanup 0 2 3 6 From e8a457b73569d7096ff46c307c37dbba55dd7a9c Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sun, 30 Mar 2025 17:52:48 -0400 Subject: [PATCH 0025/2924] selftest/cgroup: Add a remote partition transition test to test_cpuset_prs.sh The current cgroup directory layout for running the partition state transition tests is mainly suitable for testing local partitions as well as with a mix of local and remote partitions. It is not that suitable for doing extensive remote partition and nested remote/local partition testing. Add a new set of remote partition tests REMOTE_TEST_MATRIX with another cgroup directory structure more tailored for remote partition testing to provide better code coverage. Also add a few new test cases as well as adjusting existig ones for the original TEST_MATRIX. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- .../selftests/cgroup/test_cpuset_prs.sh | 154 ++++++++++++++++-- 1 file changed, 143 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index d99412e7d1965f..a17256d9f88a87 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -112,6 +112,8 @@ cleanup() online_cpus cd $CGROUP2 rmdir A1/A2/A3 A1/A2 A1 B1 test/A1 test/B1 test > /dev/null 2>&1 + rmdir rtest/p1/c11 rtest/p1/c12 rtest/p2/c21 \ + rtest/p2/c22 rtest/p1 rtest/p2 rtest > /dev/null 2>&1 [[ -n "$SCHED_DEBUG" ]] && echo "$SCHED_DEBUG" > /sys/kernel/debug/sched/verbose } @@ -223,9 +225,9 @@ TEST_MATRIX=( " C0-1:P1 . . C2-3 S+:C4-5 . . . 0 A1:4-5" " C0-1 . . C2-3:P1 . . . C2 0 " " C0-1 . . C2-3:P1 . . . C4-5 0 B1:4-5" - "C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1|A2:2-3" - "C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1|A2:2-3" - "C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:|A2:3 A1:P1|A2:P1" + "C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1|A2:2-3|XA2:2-3" + "C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1|A2:2-3|XA2:2-3" + "C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:|A2:3|XA2:3 A1:P1|A2:P1" "C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3|A2:3 A1:P1|A2:P0" "C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4|A2:2" "C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:|B1:0-2 A1:P1|A2:P1" @@ -291,7 +293,7 @@ TEST_MATRIX=( A1:P0|A2:P2|A3:P1 2" " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ . . X5 . . 0 A1:0-4|A2:1-4|A3:2-4 \ - A1:P0|A2:P-2|A3:P-1" + A1:P0|A2:P-2|A3:P-1 ." " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ . . . X1 . 0 A1:0-1|A2:2-4|A3:2-4 \ A1:P0|A2:P2|A3:P-1 2-4" @@ -303,13 +305,13 @@ TEST_MATRIX=( " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2|A2:1-2|A3:1-2 A1:P0|A3:P-2 3|" # An invalidated remote partition cannot self-recover from hotplug - " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3|A2:1-3|A3:2 A1:P0|A3:P-2" + " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3|A2:1-3|A3:2 A1:P0|A3:P-2 ." # cpus.exclusive.effective clearing test " C0-3:S+ C1-3:S+ C2 . X2-3:X . . . 0 A1:0-3|A2:1-3|A3:2|XA1:" # Invalid to valid remote partition transition test - " C0-3:S+ C1-3 . . . X3:P2 . . 0 A1:0-3|A2:1-3|XA2: A2:P-2" + " C0-3:S+ C1-3 . . . X3:P2 . . 0 A1:0-3|A2:1-3|XA2: A2:P-2 ." " C0-3:S+ C1-3:X3:P2 . . X2-3 P2 . . 0 A1:0-2|A2:3|XA2:3 A2:P2 3" @@ -318,7 +320,6 @@ TEST_MATRIX=( " C1-3:S+:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2|XA1:1-3|A2:3:XA2:3 A1:P2|A2:P2 1-3" " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4|B1:4-6 A1:P-2|B1:P0" " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3|B1:4-6 A1:P2|B1:P0 0-3" - " C0-3:P2 . . C3-5:C4-5 . . . . 0 A1:0-3|B1:4-5 A1:P2|B1:P0 0-3" # Local partition invalidation tests " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ @@ -334,10 +335,10 @@ TEST_MATRIX=( # cpus_allowed/exclusive_cpus update tests " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ . X:C4 . P2 . 0 A1:4|A2:4|XA2:|XA3:|A3:4 \ - A1:P0|A3:P-2" + A1:P0|A3:P-2 ." " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ . X1 . P2 . 0 A1:0-3|A2:1-3|XA1:1|XA2:|XA3:|A3:2-3 \ - A1:P0|A3:P-2" + A1:P0|A3:P-2 ." " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ . . X3 P2 . 0 A1:0-2|A2:1-2|XA2:3|XA3:3|A3:3 \ A1:P0|A3:P2 3" @@ -385,7 +386,7 @@ TEST_MATRIX=( # A partition root with non-partition root parent is invalid| but it # can be made valid if its parent becomes a partition root too. " C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1|A2:1 A1:P0|A2:P-2" - " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0|A2:1 A1:P1|A2:P2" + " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0|A2:1 A1:P1|A2:P2 0-1|1" # A non-exclusive cpuset.cpus change will invalidate partition and its siblings " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P-1|B1:P0" @@ -405,6 +406,17 @@ TEST_MATRIX=( # affect cpuset.cpus.exclusive.effective. " C1-4:X3:S+ C1:X3 . . . C . . 0 A2:1-4|XA2:3" + # cpuset.cpus can contain CPUs that overlap a sibling cpuset with cpus.exclusive + # but creating a local partition out of it is not allowed. Similarly and change + # in cpuset.cpus of a local partition that overlaps sibling exclusive CPUs will + # invalidate it. + " CX1-4:S+ CX2-4:P2 . C5-6 . . . P1 0 A1:1|A2:2-4|B1:5-6|XB1:5-6 \ + A1:P0|A2:P2:B1:P1 2-4" + " CX1-4:S+ CX2-4:P2 . C3-6 . . . P1 0 A1:1|A2:2-4|B1:5-6 \ + A1:P0|A2:P2:B1:P-1 2-4" + " CX1-4:S+ CX2-4:P2 . C5-6 . . . P1:C3-6 0 A1:1|A2:2-4|B1:5-6 \ + A1:P0|A2:P2:B1:P-1 2-4" + # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- # Failure cases: @@ -419,6 +431,54 @@ TEST_MATRIX=( " C0-3 . . C4-5 X3-5 . . . 1 A1:0-3|B1:4-5" ) +# +# Cpuset controller remote partition test matrix. +# +# Cgroup test hierarchy +# +# root +# | +# rtest (cpuset.cpus.exclusive=1-7) +# | +# +------+------+ +# | | +# p1 p2 +# +--+--+ +--+--+ +# | | | | +# c11 c12 c21 c22 +# +# REMOTE_TEST_MATRIX uses the same notational convention as TEST_MATRIX. +# Only CPUs 1-7 should be used. +# +REMOTE_TEST_MATRIX=( + # old-p1 old-p2 old-c11 old-c12 old-c21 old-c22 + # new-p1 new-p2 new-c11 new-c12 new-c21 new-c22 ECPUs Pstate ISOLCPUS + # ------ ------ ------- ------- ------- ------- ----- ------ -------- + " X1-3:S+ X4-6:S+ X1-2 X3 X4-5 X6 \ + . . P2 P2 P2 P2 c11:1-2|c12:3|c21:4-5|c22:6 \ + c11:P2|c12:P2|c21:P2|c22:P2 1-6" + " CX1-4:S+ . X1-2:P2 C3 . . \ + . . . C3-4 . . p1:3-4|c11:1-2|c12:3-4 \ + p1:P0|c11:P2|c12:P0 1-2" + " CX1-4:S+ . X1-2:P2 . . . \ + X2-4 . . . . . p1:1,3-4|c11:2 \ + p1:P0|c11:P2 2" + " CX1-5:S+ . X1-2:P2 X3-5:P1 . . \ + X2-4 . . . . . p1:1,5|c11:2|c12:3-4 \ + p1:P0|c11:P2|c12:P1 2" + " CX1-4:S+ . X1-2:P2 X3-4:P1 . . \ + . . X2 . . . p1:1|c11:2|c12:3-4 \ + p1:P0|c11:P2|c12:P1 2" + # p1 as member, will get its effective CPUs from its parent rtest + " CX1-4:S+ . X1-2:P2 X3-4:P1 . . \ + . . X1 CX2-4 . . p1:5-7|c11:1|c12:2-4 \ + p1:P0|c11:P2|c12:P1 1" + " CX1-4:S+ X5-6:P1:S+ . . . . \ + . . X1-2:P2 X4-5:P1 . X1-7:P2 p1:3|c11:1-2|c12:4:c22:5-6 \ + p1:P0|p2:P1|c11:P2|c12:P1|c22:P2 \ + 1-2,4-6|1-2,5-6" +) + # # Write to the cpu online file # $1 - = where = cpu number, value to be written @@ -902,10 +962,11 @@ run_state_test() STATES=${11} ICPUS=${12} - set_ctrl_state_noerr B1 $OLD_B1 set_ctrl_state_noerr A1 $OLD_A1 set_ctrl_state_noerr A1/A2 $OLD_A2 set_ctrl_state_noerr A1/A2/A3 $OLD_A3 + set_ctrl_state_noerr B1 $OLD_B1 + RETVAL=0 set_ctrl_state A1 $NEW_A1; ((RETVAL += $?)) set_ctrl_state A1/A2 $NEW_A2; ((RETVAL += $?)) @@ -920,6 +981,76 @@ run_state_test() echo "All $I tests of $TEST PASSED." } +# +# Run cpuset remote partition state transition test +# $1 - test matrix name +# +run_remote_state_test() +{ + TEST=$1 + CONTROLLER=cpuset + [[ -d rtest ]] || mkdir rtest + cd rtest + echo +cpuset > cgroup.subtree_control + echo "1-7" > cpuset.cpus + echo "1-7" > cpuset.cpus.exclusive + CGROUP_LIST=".. . p1 p2 p1/c11 p1/c12 p2/c21 p2/c22" + RESET_LIST="p1/c11 p1/c12 p2/c21 p2/c22 p1 p2" + I=0 + eval CNT="\${#$TEST[@]}" + + reset_cgroup_states + console_msg "Running remote partition state transition test ..." + + while [[ $I -lt $CNT ]] + do + echo "Running test $I ..." > $CONSOLE + [[ $VERBOSE -gt 1 ]] && { + echo "" + eval echo \${$TEST[$I]} + } + eval set -- "\${$TEST[$I]}" + OLD_p1=$1 + OLD_p2=$2 + OLD_c11=$3 + OLD_c12=$4 + OLD_c21=$5 + OLD_c22=$6 + NEW_p1=$7 + NEW_p2=$8 + NEW_c11=$9 + NEW_c12=${10} + NEW_c21=${11} + NEW_c22=${12} + ECPUS=${13} + STATES=${14} + ICPUS=${15} + + set_ctrl_state_noerr p1 $OLD_p1 + set_ctrl_state_noerr p2 $OLD_p2 + set_ctrl_state_noerr p1/c11 $OLD_c11 + set_ctrl_state_noerr p1/c12 $OLD_c12 + set_ctrl_state_noerr p2/c21 $OLD_c21 + set_ctrl_state_noerr p2/c22 $OLD_c22 + + RETVAL=0 + set_ctrl_state p1 $NEW_p1 ; ((RETVAL += $?)) + set_ctrl_state p2 $NEW_p2 ; ((RETVAL += $?)) + set_ctrl_state p1/c11 $NEW_c11; ((RETVAL += $?)) + set_ctrl_state p1/c12 $NEW_c12; ((RETVAL += $?)) + set_ctrl_state p2/c21 $NEW_c21; ((RETVAL += $?)) + set_ctrl_state p2/c22 $NEW_c22; ((RETVAL += $?)) + + [[ $RETVAL -ne 0 ]] && test_fail $I result + + check_test_results $I "$ECPUS" "$STATES" "$ICPUS" + ((I++)) + done + cd .. + rmdir rtest + echo "All $I tests of $TEST PASSED." +} + # # Testing the new "isolated" partition root type # @@ -1056,6 +1187,7 @@ test_inotify() trap cleanup 0 2 3 6 run_state_test TEST_MATRIX +run_remote_state_test REMOTE_TEST_MATRIX test_isolated test_inotify echo "All tests PASSED." From acfcaf90db1fa833236d9f8249b6099cf638e5d1 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Thu, 27 Mar 2025 09:36:15 -0700 Subject: [PATCH 0026/2924] smccc: kvm_guest: Align with DISCOVER_IMPL_CPUS ABI The ABI of the hypercall requires that R2 and R3 are 0. Explicitly pass 0 for these parameters. Cc: Shameer Kolothum Fixes: 86edf6bdcf05 ("smccc/kvm_guest: Enable errata based on implementation CPUs") Reviewed-by: Shameer Kolothum Link: https://lore.kernel.org/r/20250327163613.2516073-1-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- drivers/firmware/smccc/kvm_guest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/smccc/kvm_guest.c b/drivers/firmware/smccc/kvm_guest.c index 5767aed25cdc05..ac2d3cf8a776a9 100644 --- a/drivers/firmware/smccc/kvm_guest.c +++ b/drivers/firmware/smccc/kvm_guest.c @@ -95,7 +95,7 @@ void __init kvm_arm_target_impl_cpu_init(void) for (i = 0; i < max_cpus; i++) { arm_smccc_1_1_invoke(ARM_SMCCC_VENDOR_HYP_KVM_DISCOVER_IMPL_CPUS_FUNC_ID, - i, &res); + i, 0, 0, &res); if (res.a0 != SMCCC_RET_SUCCESS) { pr_warn("Discovering target implementation CPUs failed\n"); goto mem_free; From 7d6c63c3191427a69ffd1383146df01f695d6195 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Tue, 1 Apr 2025 10:09:12 -0700 Subject: [PATCH 0027/2924] cgroup: rstat: call cgroup_rstat_updated_list with cgroup_rstat_lock The commit 093c8812de2d ("cgroup: rstat: Cleanup flushing functions and locking") during cleanup accidentally changed the code to call cgroup_rstat_updated_list() without cgroup_rstat_lock which is required. Fix it. Fixes: 093c8812de2d ("cgroup: rstat: Cleanup flushing functions and locking") Reported-by: Jakub Kicinski Reported-by: Breno Leitao Reported-by: Venkat Rao Bagalkote Closes: https://lore.kernel.org/all/6564c3d6-9372-4352-9847-1eb3aea07ca4@linux.ibm.com/ Signed-off-by: Shakeel Butt Signed-off-by: Tejun Heo --- kernel/cgroup/rstat.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 4bb587d5d34f97..b2239156b7def9 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -318,10 +318,11 @@ __bpf_kfunc void cgroup_rstat_flush(struct cgroup *cgrp) might_sleep(); for_each_possible_cpu(cpu) { - struct cgroup *pos = cgroup_rstat_updated_list(cgrp, cpu); + struct cgroup *pos; /* Reacquire for each CPU to avoid disabling IRQs too long */ __cgroup_rstat_lock(cgrp, cpu); + pos = cgroup_rstat_updated_list(cgrp, cpu); for (; pos; pos = pos->rstat_flush_next) { struct cgroup_subsys_state *css; From 7cf6dd467e87664f5b3f4ca7be324569464edf0b Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 1 Apr 2025 15:38:41 +0300 Subject: [PATCH 0028/2924] drm/virtio: Don't attach GEM to a non-created context in gem_object_open() The vfpriv->ctx_id is always initialized to a non-zero value. Check whether context was created before attaching GEM to this context ID. This left unnoticed previously because host silently skips attachment if context doesn't exist, still we shouldn't do that for consistency. Fixes: 086b9f27f0ab ("drm/virtio: Don't create a context with default param if context_init is supported") Cc: # v6.14+ Signed-off-by: Dmitry Osipenko Reviewed-by: Rob Clark Link: https://lore.kernel.org/r/20250401123842.2232205-1-dmitry.osipenko@collabora.com --- drivers/gpu/drm/virtio/virtgpu_gem.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c index 5aab588fc400e7..3d6aa26fdb5343 100644 --- a/drivers/gpu/drm/virtio/virtgpu_gem.c +++ b/drivers/gpu/drm/virtio/virtgpu_gem.c @@ -115,13 +115,14 @@ int virtio_gpu_gem_object_open(struct drm_gem_object *obj, if (!vgdev->has_context_init) virtio_gpu_create_context(obj->dev, file); - objs = virtio_gpu_array_alloc(1); - if (!objs) - return -ENOMEM; - virtio_gpu_array_add_obj(objs, obj); + if (vfpriv->context_created) { + objs = virtio_gpu_array_alloc(1); + if (!objs) + return -ENOMEM; + virtio_gpu_array_add_obj(objs, obj); - if (vfpriv->ctx_id) virtio_gpu_cmd_context_attach_resource(vgdev, vfpriv->ctx_id, objs); + } out_notify: virtio_gpu_notify(vgdev); From 395cc80051f8da267b27496a4029dd931a198855 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 1 Apr 2025 15:38:42 +0300 Subject: [PATCH 0029/2924] drm/virtio: Fix missed dmabuf unpinning in error path of prepare_fb() Correct error handling in prepare_fb() to fix leaking resources when error happens. Fixes: 4a696a2ee646 ("drm/virtio: Add prepare and cleanup routines for imported dmabuf obj") Cc: # v6.14+ Acked-by: Vivek Kasireddy Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20250401123842.2232205-2-dmitry.osipenko@collabora.com --- drivers/gpu/drm/virtio/virtgpu_plane.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c index 42aa554eca9fef..26abe3d1b122b4 100644 --- a/drivers/gpu/drm/virtio/virtgpu_plane.c +++ b/drivers/gpu/drm/virtio/virtgpu_plane.c @@ -322,12 +322,6 @@ static int virtio_gpu_plane_prepare_fb(struct drm_plane *plane, return 0; obj = new_state->fb->obj[0]; - if (obj->import_attach) { - ret = virtio_gpu_prepare_imported_obj(plane, new_state, obj); - if (ret) - return ret; - } - if (bo->dumb || obj->import_attach) { vgplane_st->fence = virtio_gpu_fence_alloc(vgdev, vgdev->fence_drv.context, @@ -336,7 +330,21 @@ static int virtio_gpu_plane_prepare_fb(struct drm_plane *plane, return -ENOMEM; } + if (obj->import_attach) { + ret = virtio_gpu_prepare_imported_obj(plane, new_state, obj); + if (ret) + goto err_fence; + } + return 0; + +err_fence: + if (vgplane_st->fence) { + dma_fence_put(&vgplane_st->fence->f); + vgplane_st->fence = NULL; + } + + return ret; } static void virtio_gpu_cleanup_imported_obj(struct drm_gem_object *obj) From a22b3d54de94f82ca057cc2ebf9496fa91ebf698 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sun, 30 Mar 2025 17:52:39 -0400 Subject: [PATCH 0030/2924] cgroup/cpuset: Fix race between newly created partition and dying one There is a possible race between removing a cgroup diectory that is a partition root and the creation of a new partition. The partition to be removed can be dying but still online, it doesn't not currently participate in checking for exclusive CPUs conflict, but the exclusive CPUs are still there in subpartitions_cpus and isolated_cpus. These two cpumasks are global states that affect the operation of cpuset partitions. The exclusive CPUs in dying cpusets will only be removed when cpuset_css_offline() function is called after an RCU delay. As a result, it is possible that a new partition can be created with exclusive CPUs that overlap with those of a dying one. When that dying partition is finally offlined, it removes those overlapping exclusive CPUs from subpartitions_cpus and maybe isolated_cpus resulting in an incorrect CPU configuration. This bug was found when a warning was triggered in remote_partition_disable() during testing because the subpartitions_cpus mask was empty. One possible way to fix this is to iterate the dying cpusets as well and avoid using the exclusive CPUs in those dying cpusets. However, this can still cause random partition creation failures or other anomalies due to racing. A better way to fix this race is to reset the partition state at the moment when a cpuset is being killed. Introduce a new css_killed() CSS function pointer and call it, if defined, before setting CSS_DYING flag in kill_css(). Also update the css_is_dying() helper to use the CSS_DYING flag introduced by commit 33c35aa48178 ("cgroup: Prevent kill_css() from being called more than once") for proper synchronization. Add a new cpuset_css_killed() function to reset the partition state of a valid partition root if it is being killed. Fixes: ee8dde0cd2ce ("cpuset: Add new v2 cpuset.sched.partition flag") Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 1 + include/linux/cgroup.h | 2 +- kernel/cgroup/cgroup.c | 6 ++++++ kernel/cgroup/cpuset.c | 20 +++++++++++++++++--- 4 files changed, 25 insertions(+), 4 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 485b651869d9b5..5bc8f55c8cca14 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -710,6 +710,7 @@ struct cgroup_subsys { void (*css_released)(struct cgroup_subsys_state *css); void (*css_free)(struct cgroup_subsys_state *css); void (*css_reset)(struct cgroup_subsys_state *css); + void (*css_killed)(struct cgroup_subsys_state *css); void (*css_rstat_flush)(struct cgroup_subsys_state *css, int cpu); int (*css_extra_stat_show)(struct seq_file *seq, struct cgroup_subsys_state *css); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 28e999f2c64213..e7da3c3b098b3a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -344,7 +344,7 @@ static inline u64 cgroup_id(const struct cgroup *cgrp) */ static inline bool css_is_dying(struct cgroup_subsys_state *css) { - return !(css->flags & CSS_NO_REF) && percpu_ref_is_dying(&css->refcnt); + return css->flags & CSS_DYING; } static inline void cgroup_get(struct cgroup *cgrp) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index f231fe3a0744f1..49d6222059977a 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5909,6 +5909,12 @@ static void kill_css(struct cgroup_subsys_state *css) if (css->flags & CSS_DYING) return; + /* + * Call css_killed(), if defined, before setting the CSS_DYING flag + */ + if (css->ss->css_killed) + css->ss->css_killed(css); + css->flags |= CSS_DYING; /* diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 4921919c87042c..306b6043009145 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -3536,9 +3536,6 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css) cpus_read_lock(); mutex_lock(&cpuset_mutex); - if (is_partition_valid(cs)) - update_prstate(cs, 0); - if (!cpuset_v2() && is_sched_load_balance(cs)) cpuset_update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); @@ -3549,6 +3546,22 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css) cpus_read_unlock(); } +static void cpuset_css_killed(struct cgroup_subsys_state *css) +{ + struct cpuset *cs = css_cs(css); + + cpus_read_lock(); + mutex_lock(&cpuset_mutex); + + /* Reset valid partition back to member */ + if (is_partition_valid(cs)) + update_prstate(cs, PRS_MEMBER); + + mutex_unlock(&cpuset_mutex); + cpus_read_unlock(); + +} + static void cpuset_css_free(struct cgroup_subsys_state *css) { struct cpuset *cs = css_cs(css); @@ -3670,6 +3683,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = { .css_alloc = cpuset_css_alloc, .css_online = cpuset_css_online, .css_offline = cpuset_css_offline, + .css_killed = cpuset_css_killed, .css_free = cpuset_css_free, .can_attach = cpuset_can_attach, .cancel_attach = cpuset_cancel_attach, From 99f201a9a7d59d95da75695c41b3baf72c39efd3 Mon Sep 17 00:00:00 2001 From: Yedidya Benshimol Date: Tue, 1 Apr 2025 06:45:54 +0300 Subject: [PATCH 0031/2924] wifi: iwlwifi: mld: reduce scope for uninitialized variable After resuming from D3, keeping the connection or disconnecting isn't relevant for the case of netdetect. Reduce the scope of the keep_connection indicator to wowlan only. Fixes: d1e879ec600f9 ("wifi: iwlwifi: add iwlmld sub-driver") Signed-off-by: Yedidya Benshimol Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250401064530.769f76a9ad6e.I69e8f194997eb3a20e40d27fdc31002d5753d905@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mld/d3.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/d3.c b/drivers/net/wireless/intel/iwlwifi/mld/d3.c index 5a7207accd8677..2c6e8ecd93b7ba 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/d3.c @@ -1895,7 +1895,6 @@ int iwl_mld_wowlan_resume(struct iwl_mld *mld) int link_id; int ret; bool fw_err = false; - bool keep_connection; lockdep_assert_wiphy(mld->wiphy); @@ -1965,7 +1964,7 @@ int iwl_mld_wowlan_resume(struct iwl_mld *mld) iwl_mld_process_netdetect_res(mld, bss_vif, &resume_data); mld->netdetect = false; } else { - keep_connection = + bool keep_connection = iwl_mld_process_wowlan_status(mld, bss_vif, resume_data.wowlan_status); @@ -1973,11 +1972,10 @@ int iwl_mld_wowlan_resume(struct iwl_mld *mld) if (keep_connection) iwl_mld_unblock_emlsr(mld, bss_vif, IWL_MLD_EMLSR_BLOCKED_WOWLAN); + else + ieee80211_resume_disconnect(bss_vif); } - if (!mld->netdetect && !keep_connection) - ieee80211_resume_disconnect(bss_vif); - goto out; err: From 676b902db4766aaec049d6ca4800dfa093599005 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 1 Apr 2025 06:45:55 +0300 Subject: [PATCH 0032/2924] wifi: iwlwifi: mld: fix PM_SLEEP -Wundef warning Config symbols are not defined if turned off, so need to use #ifdef, not #if. Fixes: d1e879ec600f9 ("wifi: iwlwifi: add iwlmld sub-driver") Signed-off-by: Johannes Berg Reviewed-by: Yedidya Ben Shimol Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250401064530.612020bcdaad.I4e885e6646576e29fb236250a1b5038d3f14b08e@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mld/iface.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/iface.h b/drivers/net/wireless/intel/iwlwifi/mld/iface.h index d1d56b081bf634..ec14d0736cee6e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/iface.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/iface.h @@ -166,7 +166,7 @@ struct iwl_mld_vif { struct iwl_mld_emlsr emlsr; -#if CONFIG_PM_SLEEP +#ifdef CONFIG_PM_SLEEP struct iwl_mld_wowlan_data wowlan_data; #endif #ifdef CONFIG_IWLWIFI_DEBUGFS From 44605365f93518eacfc500665d965e88db4791c2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 25 Mar 2025 09:43:30 +0100 Subject: [PATCH 0033/2924] iwlwifi: mld: fix building with CONFIG_PM_SLEEP disabled The newly added driver causes multiple build problems when CONFIG_PM_SLEEP is disabled: drivers/net/wireless/intel/iwlwifi/mld/mac80211.c:1982:12: error: 'iwl_mld_resume' defined but not used [-Werror=unused-function] 1982 | static int iwl_mld_resume(struct ieee80211_hw *hw) | ^~~~~~~~~~~~~~ drivers/net/wireless/intel/iwlwifi/mld/mac80211.c:1960:1: error: 'iwl_mld_suspend' defined but not used [-Werror=unused-function] 1960 | iwl_mld_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) | ^~~~~~~~~~~~~~~ drivers/net/wireless/intel/iwlwifi/mld/mac80211.c:1946:13: error: 'iwl_mld_set_wakeup' defined but not used [-Werror=unused-function] 1946 | static void iwl_mld_set_wakeup(struct ieee80211_hw *hw, bool enabled) | ^~~~~~~~~~~~~~~~~~ drivers/net/wireless/intel/iwlwifi/mld/mac80211.c: In function 'iwl_mld_mac80211_start': drivers/net/wireless/intel/iwlwifi/mld/mac80211.c:504:20: error: 'ret' is used uninitialized [-Werror=uninitialized] 504 | if (!in_d3 || ret) { | ^~ drivers/net/wireless/intel/iwlwifi/mld/mac80211.c:478:13: note: 'ret' was declared here 478 | int ret; | ^~~ Hide the unused functions and make sure the 'ret' variable is not used before the initialization. Signed-off-by: Arnd Bergmann Link: https://patch.msgid.link/20250325084340.378724-1-arnd@kernel.org Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mld/mac80211.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c index 6851064b82da18..0b5bc5abb82da5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c @@ -501,7 +501,7 @@ int iwl_mld_mac80211_start(struct ieee80211_hw *hw) iwl_mld_restart_cleanup(mld); } - if (!in_d3 || ret) { + if (!in_d3) { ret = iwl_mld_start_fw(mld); if (ret) goto error; @@ -537,7 +537,8 @@ void iwl_mld_mac80211_stop(struct ieee80211_hw *hw, bool suspend) /* if the suspend flow fails the fw is in error. Stop it here, and it * will be started upon wakeup */ - if (!suspend || iwl_mld_no_wowlan_suspend(mld)) + if (!suspend || + (IS_ENABLED(CONFIG_PM_SLEEP) && iwl_mld_no_wowlan_suspend(mld))) iwl_mld_stop_fw(mld); /* HW is stopped, no more coming RX. OTOH, the worker can't run as the @@ -1943,6 +1944,7 @@ static void iwl_mld_sta_rc_update(struct ieee80211_hw *hw, } } +#ifdef CONFIG_PM_SLEEP static void iwl_mld_set_wakeup(struct ieee80211_hw *hw, bool enabled) { struct iwl_mld *mld = IWL_MAC80211_GET_MLD(hw); @@ -1994,6 +1996,7 @@ static int iwl_mld_resume(struct ieee80211_hw *hw) return 0; } +#endif static int iwl_mld_alloc_ptk_pn(struct iwl_mld *mld, struct iwl_mld_sta *mld_sta, From 9bb8deae8aec180f8127708ee484c3eedab2b86f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 29 Mar 2025 22:01:34 +0100 Subject: [PATCH 0034/2924] wifi: add wireless list to MAINTAINERS Add the wireless list to MAINTAINERS entries for all those drivers that are no longer covered by the entry now. Signed-off-by: Johannes Berg Link: https://patch.msgid.link/20250329220135.8bfaffbad97d.I946354c2395f4a30b8c435857a92553b1b58df5b@changeid Signed-off-by: Johannes Berg --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 1cd25139cc58a0..cbbc5098069000 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6239,6 +6239,7 @@ F: Documentation/process/cve.rst CW1200 WLAN driver S: Orphan +L: linux-wireless@vger.kernel.org F: drivers/net/wireless/st/ F: include/linux/platform_data/net-cw1200.h @@ -14069,6 +14070,7 @@ S: Odd fixes F: drivers/net/ethernet/marvell/sk* MARVELL LIBERTAS WIRELESS DRIVER +L: linux-wireless@vger.kernel.org L: libertas-dev@lists.infradead.org S: Orphan F: drivers/net/wireless/marvell/libertas/ @@ -19470,6 +19472,7 @@ F: drivers/media/tuners/qt1010* QUALCOMM ATH12K WIRELESS DRIVER M: Jeff Johnson +L: linux-wireless@vger.kernel.org L: ath12k@lists.infradead.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath12k @@ -19479,6 +19482,7 @@ N: ath12k QUALCOMM ATHEROS ATH10K WIRELESS DRIVER M: Jeff Johnson +L: linux-wireless@vger.kernel.org L: ath10k@lists.infradead.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath10k @@ -19488,6 +19492,7 @@ N: ath10k QUALCOMM ATHEROS ATH11K WIRELESS DRIVER M: Jeff Johnson +L: linux-wireless@vger.kernel.org L: ath11k@lists.infradead.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath11k @@ -21832,6 +21837,7 @@ F: drivers/platform/x86/touchscreen_dmi.c SILICON LABS WIRELESS DRIVERS (for WFxxx series) M: Jérôme Pouiller +L: linux-wireless@vger.kernel.org S: Supported F: Documentation/devicetree/bindings/net/wireless/silabs,wfx.yaml F: drivers/net/wireless/silabs/ From 27c7e63b3cb1a20bb78ed4a36c561ea4579fd7da Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Sun, 30 Mar 2025 16:01:10 +0530 Subject: [PATCH 0035/2924] wifi: at76c50x: fix use after free access in at76_disconnect The memory pointed to by priv is freed at the end of at76_delete_device function (using ieee80211_free_hw). But the code then accesses the udev field of the freed object to put the USB device. This may also lead to a memory leak of the usb device. Fix this by using udev from interface. Fixes: 29e20aa6c6af ("at76c50x-usb: fix use after free on failure path in at76_probe()") Signed-off-by: Abdun Nihaal Link: https://patch.msgid.link/20250330103110.44080-1-abdun.nihaal@gmail.com Signed-off-by: Johannes Berg --- drivers/net/wireless/atmel/at76c50x-usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/atmel/at76c50x-usb.c b/drivers/net/wireless/atmel/at76c50x-usb.c index 504e05ea30f298..97ea7ab0f49102 100644 --- a/drivers/net/wireless/atmel/at76c50x-usb.c +++ b/drivers/net/wireless/atmel/at76c50x-usb.c @@ -2552,7 +2552,7 @@ static void at76_disconnect(struct usb_interface *interface) wiphy_info(priv->hw->wiphy, "disconnecting\n"); at76_delete_device(priv); - usb_put_dev(priv->udev); + usb_put_dev(interface_to_usbdev(interface)); dev_info(&interface->dev, "disconnected\n"); } From a104042e2bf6528199adb6ca901efe7b60c2c27f Mon Sep 17 00:00:00 2001 From: Remi Pommarel Date: Mon, 24 Mar 2025 17:28:20 +0100 Subject: [PATCH 0036/2924] wifi: mac80211: Update skb's control block key in ieee80211_tx_dequeue() The ieee80211 skb control block key (set when skb was queued) could have been removed before ieee80211_tx_dequeue() call. ieee80211_tx_dequeue() already called ieee80211_tx_h_select_key() to get the current key, but the latter do not update the key in skb control block in case it is NULL. Because some drivers actually use this key in their TX callbacks (e.g. ath1{1,2}k_mac_op_tx()) this could lead to the use after free below: BUG: KASAN: slab-use-after-free in ath11k_mac_op_tx+0x590/0x61c Read of size 4 at addr ffffff803083c248 by task kworker/u16:4/1440 CPU: 3 UID: 0 PID: 1440 Comm: kworker/u16:4 Not tainted 6.13.0-ge128f627f404 #2 Hardware name: HW (DT) Workqueue: bat_events batadv_send_outstanding_bcast_packet Call trace: show_stack+0x14/0x1c (C) dump_stack_lvl+0x58/0x74 print_report+0x164/0x4c0 kasan_report+0xac/0xe8 __asan_report_load4_noabort+0x1c/0x24 ath11k_mac_op_tx+0x590/0x61c ieee80211_handle_wake_tx_queue+0x12c/0x1c8 ieee80211_queue_skb+0xdcc/0x1b4c ieee80211_tx+0x1ec/0x2bc ieee80211_xmit+0x224/0x324 __ieee80211_subif_start_xmit+0x85c/0xcf8 ieee80211_subif_start_xmit+0xc0/0xec4 dev_hard_start_xmit+0xf4/0x28c __dev_queue_xmit+0x6ac/0x318c batadv_send_skb_packet+0x38c/0x4b0 batadv_send_outstanding_bcast_packet+0x110/0x328 process_one_work+0x578/0xc10 worker_thread+0x4bc/0xc7c kthread+0x2f8/0x380 ret_from_fork+0x10/0x20 Allocated by task 1906: kasan_save_stack+0x28/0x4c kasan_save_track+0x1c/0x40 kasan_save_alloc_info+0x3c/0x4c __kasan_kmalloc+0xac/0xb0 __kmalloc_noprof+0x1b4/0x380 ieee80211_key_alloc+0x3c/0xb64 ieee80211_add_key+0x1b4/0x71c nl80211_new_key+0x2b4/0x5d8 genl_family_rcv_msg_doit+0x198/0x240 <...> Freed by task 1494: kasan_save_stack+0x28/0x4c kasan_save_track+0x1c/0x40 kasan_save_free_info+0x48/0x94 __kasan_slab_free+0x48/0x60 kfree+0xc8/0x31c kfree_sensitive+0x70/0x80 ieee80211_key_free_common+0x10c/0x174 ieee80211_free_keys+0x188/0x46c ieee80211_stop_mesh+0x70/0x2cc ieee80211_leave_mesh+0x1c/0x60 cfg80211_leave_mesh+0xe0/0x280 cfg80211_leave+0x1e0/0x244 <...> Reset SKB control block key before calling ieee80211_tx_h_select_key() to avoid that. Fixes: bb42f2d13ffc ("mac80211: Move reorder-sensitive TX handlers to after TXQ dequeue") Signed-off-by: Remi Pommarel Link: https://patch.msgid.link/06aa507b853ca385ceded81c18b0a6dd0f081bc8.1742833382.git.repk@triplefau.lt Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 20179db88c4a6c..34f229a6eab01d 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3894,6 +3894,7 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, * The key can be removed while the packet was queued, so need to call * this here to get the current key. */ + info->control.hw_key = NULL; r = ieee80211_tx_h_select_key(&tx); if (r != TX_CONTINUE) { ieee80211_free_txskb(&local->hw, skb); From 378677eb8f44621ecc9ce659f7af61e5baa94d81 Mon Sep 17 00:00:00 2001 From: Remi Pommarel Date: Mon, 24 Mar 2025 17:28:21 +0100 Subject: [PATCH 0037/2924] wifi: mac80211: Purge vif txq in ieee80211_do_stop() After ieee80211_do_stop() SKB from vif's txq could still be processed. Indeed another concurrent vif schedule_and_wake_txq call could cause those packets to be dequeued (see ieee80211_handle_wake_tx_queue()) without checking the sdata current state. Because vif.drv_priv is now cleared in this function, this could lead to driver crash. For example in ath12k, ahvif is store in vif.drv_priv. Thus if ath12k_mac_op_tx() is called after ieee80211_do_stop(), ahvif->ah can be NULL, leading the ath12k_warn(ahvif->ah,...) call in this function to trigger the NULL deref below. Unable to handle kernel paging request at virtual address dfffffc000000001 KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] batman_adv: bat0: Interface deactivated: brbh1337 Mem abort info: ESR = 0x0000000096000004 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x04: level 0 translation fault Data abort info: ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 CM = 0, WnR = 0, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [dfffffc000000001] address between user and kernel address ranges Internal error: Oops: 0000000096000004 [#1] SMP CPU: 1 UID: 0 PID: 978 Comm: lbd Not tainted 6.13.0-g633f875b8f1e #114 Hardware name: HW (DT) pstate: 10000005 (nzcV daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : ath12k_mac_op_tx+0x6cc/0x29b8 [ath12k] lr : ath12k_mac_op_tx+0x174/0x29b8 [ath12k] sp : ffffffc086ace450 x29: ffffffc086ace450 x28: 0000000000000000 x27: 1ffffff810d59ca4 x26: ffffff801d05f7c0 x25: 0000000000000000 x24: 000000004000001e x23: ffffff8009ce4926 x22: ffffff801f9c0800 x21: ffffff801d05f7f0 x20: ffffff8034a19f40 x19: 0000000000000000 x18: ffffff801f9c0958 x17: ffffff800bc0a504 x16: dfffffc000000000 x15: ffffffc086ace4f8 x14: ffffff801d05f83c x13: 0000000000000000 x12: ffffffb003a0bf03 x11: 0000000000000000 x10: ffffffb003a0bf02 x9 : ffffff8034a19f40 x8 : ffffff801d05f818 x7 : 1ffffff0069433dc x6 : ffffff8034a19ee0 x5 : ffffff801d05f7f0 x4 : 0000000000000000 x3 : 0000000000000001 x2 : 0000000000000000 x1 : dfffffc000000000 x0 : 0000000000000008 Call trace: ath12k_mac_op_tx+0x6cc/0x29b8 [ath12k] (P) ieee80211_handle_wake_tx_queue+0x16c/0x260 ieee80211_queue_skb+0xeec/0x1d20 ieee80211_tx+0x200/0x2c8 ieee80211_xmit+0x22c/0x338 __ieee80211_subif_start_xmit+0x7e8/0xc60 ieee80211_subif_start_xmit+0xc4/0xee0 __ieee80211_subif_start_xmit_8023.isra.0+0x854/0x17a0 ieee80211_subif_start_xmit_8023+0x124/0x488 dev_hard_start_xmit+0x160/0x5a8 __dev_queue_xmit+0x6f8/0x3120 br_dev_queue_push_xmit+0x120/0x4a8 __br_forward+0xe4/0x2b0 deliver_clone+0x5c/0xd0 br_flood+0x398/0x580 br_dev_xmit+0x454/0x9f8 dev_hard_start_xmit+0x160/0x5a8 __dev_queue_xmit+0x6f8/0x3120 ip6_finish_output2+0xc28/0x1b60 __ip6_finish_output+0x38c/0x638 ip6_output+0x1b4/0x338 ip6_local_out+0x7c/0xa8 ip6_send_skb+0x7c/0x1b0 ip6_push_pending_frames+0x94/0xd0 rawv6_sendmsg+0x1a98/0x2898 inet_sendmsg+0x94/0xe0 __sys_sendto+0x1e4/0x308 __arm64_sys_sendto+0xc4/0x140 do_el0_svc+0x110/0x280 el0_svc+0x20/0x60 el0t_64_sync_handler+0x104/0x138 el0t_64_sync+0x154/0x158 To avoid that, empty vif's txq at ieee80211_do_stop() so no packet could be dequeued after ieee80211_do_stop() (new packets cannot be queued because SDATA_STATE_RUNNING is cleared at this point). Fixes: ba8c3d6f16a1 ("mac80211: add an intermediate software queue implementation") Signed-off-by: Remi Pommarel Link: https://patch.msgid.link/ff7849e268562456274213c0476e09481a48f489.1742833382.git.repk@triplefau.lt Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index b0423046028c57..183b63235000a7 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -659,6 +659,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) ieee80211_txq_remove_vlan(local, sdata); + if (sdata->vif.txq) + ieee80211_txq_purge(sdata->local, to_txq_info(sdata->vif.txq)); + sdata->bss = NULL; if (local->open_count == 0) From ff4ec537e48cfb84400f52ad102f6d82fe934580 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 21 Mar 2025 17:36:00 +0300 Subject: [PATCH 0038/2924] wifi: iwlwifi: mld: silence uninitialized variable warning The "resp_len" isn't initialized if iwl_dhc_resp_data() fails. Fixes: b611cf6b57a8 ("wifi: iwlwifi: mld: add support for DHC_TOOLS_UMAC_GET_TAS_STATUS command") Signed-off-by: Dan Carpenter Acked-by: Miri Korenblit Link: https://patch.msgid.link/add9c9e2-3b44-4e0a-a4aa-7326f6425baf@stanley.mountain [fix typo in commit message] Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mld/debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mld/debugfs.c index 453ce2ba39d1f9..89d95e9b4f301a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/debugfs.c @@ -396,8 +396,8 @@ static ssize_t iwl_dbgfs_tas_get_status_read(struct iwl_mld *mld, char *buf, .data[0] = &cmd, }; struct iwl_dhc_tas_status_resp *resp = NULL; + u32 resp_len = 0; ssize_t pos = 0; - u32 resp_len; u32 status; int ret; From 9e935c0fe3f806ff700a804c00832f0f340b6061 Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Sun, 30 Mar 2025 16:04:24 +0530 Subject: [PATCH 0039/2924] wifi: brcmfmac: fix memory leak in brcmf_get_module_param The memory allocated for settings is not freed when brcmf_of_probe fails. Fix that by freeing settings before returning in error path. Fixes: 0ff0843310b7 ("wifi: brcmfmac: Add optional lpo clock enable support") Signed-off-by: Abdun Nihaal Acked-by: Arend van Spriel Link: https://patch.msgid.link/20250330103425.44197-1-abdun.nihaal@gmail.com Signed-off-by: Johannes Berg --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c index cfcf01eb0daa54..f26e4679e4ff02 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c @@ -561,8 +561,10 @@ struct brcmf_mp_device *brcmf_get_module_param(struct device *dev, if (!found) { /* No platform data for this device, try OF and DMI data */ brcmf_dmi_probe(settings, chip, chiprev); - if (brcmf_of_probe(dev, bus_type, settings) == -EPROBE_DEFER) + if (brcmf_of_probe(dev, bus_type, settings) == -EPROBE_DEFER) { + kfree(settings); return ERR_PTR(-EPROBE_DEFER); + } brcmf_acpi_probe(dev, bus_type, settings); } return settings; From a0f0dc96de03ffeefc2a177b7f8acde565cb77f4 Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Sun, 30 Mar 2025 16:15:32 +0530 Subject: [PATCH 0040/2924] wifi: wl1251: fix memory leak in wl1251_tx_work The skb dequeued from tx_queue is lost when wl1251_ps_elp_wakeup fails with a -ETIMEDOUT error. Fix that by queueing the skb back to tx_queue. Fixes: c5483b719363 ("wl12xx: check if elp wakeup failed") Signed-off-by: Abdun Nihaal Reviewed-by: Michael Nemanov Link: https://patch.msgid.link/20250330104532.44935-1-abdun.nihaal@gmail.com Signed-off-by: Johannes Berg --- drivers/net/wireless/ti/wl1251/tx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ti/wl1251/tx.c b/drivers/net/wireless/ti/wl1251/tx.c index 474b603c121cba..adb4840b048932 100644 --- a/drivers/net/wireless/ti/wl1251/tx.c +++ b/drivers/net/wireless/ti/wl1251/tx.c @@ -342,8 +342,10 @@ void wl1251_tx_work(struct work_struct *work) while ((skb = skb_dequeue(&wl->tx_queue))) { if (!woken_up) { ret = wl1251_ps_elp_wakeup(wl); - if (ret < 0) + if (ret < 0) { + skb_queue_head(&wl->tx_queue, skb); goto out; + } woken_up = true; } From d24fa977eec53399a9a49a2e1dc592430ea0a607 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Sun, 30 Mar 2025 12:34:47 +0900 Subject: [PATCH 0041/2924] tracing: fprobe: Fix to lock module while registering fprobe Since register_fprobe() does not get the module reference count while registering fgraph filter, if the target functions (symbols) are in modules, those modules can be unloaded when registering fprobe to fgraph. To avoid this issue, get the reference counter of module for each symbol, and put it after register the fprobe. Link: https://lore.kernel.org/all/174330568792.459674.16874380163991113156.stgit@devnote2/ Reported-by: Steven Rostedt Closes: https://lore.kernel.org/all/20250325130628.3a9e234c@gandalf.local.home/ Fixes: 4346ba160409 ("fprobe: Rewrite fprobe on function-graph tracer") Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/fprobe.c | 67 +++++++++++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 19 deletions(-) diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index 33082c4e8154ea..cb86f90d4b1edd 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -445,6 +445,7 @@ struct filter_match_data { size_t index; size_t size; unsigned long *addrs; + struct module **mods; }; static int filter_match_callback(void *data, const char *name, unsigned long addr) @@ -458,30 +459,47 @@ static int filter_match_callback(void *data, const char *name, unsigned long add if (!ftrace_location(addr)) return 0; - if (match->addrs) - match->addrs[match->index] = addr; + if (match->addrs) { + struct module *mod = __module_text_address(addr); + + if (mod && !try_module_get(mod)) + return 0; + match->mods[match->index] = mod; + match->addrs[match->index] = addr; + } match->index++; return match->index == match->size; } /* * Make IP list from the filter/no-filter glob patterns. - * Return the number of matched symbols, or -ENOENT. + * Return the number of matched symbols, or errno. + * If @addrs == NULL, this just counts the number of matched symbols. If @addrs + * is passed with an array, we need to pass the an @mods array of the same size + * to increment the module refcount for each symbol. + * This means we also need to call `module_put` for each element of @mods after + * using the @addrs. */ -static int ip_list_from_filter(const char *filter, const char *notfilter, - unsigned long *addrs, size_t size) +static int get_ips_from_filter(const char *filter, const char *notfilter, + unsigned long *addrs, struct module **mods, + size_t size) { struct filter_match_data match = { .filter = filter, .notfilter = notfilter, - .index = 0, .size = size, .addrs = addrs}; + .index = 0, .size = size, .addrs = addrs, .mods = mods}; int ret; + if (addrs && !mods) + return -EINVAL; + ret = kallsyms_on_each_symbol(filter_match_callback, &match); if (ret < 0) return ret; - ret = module_kallsyms_on_each_symbol(NULL, filter_match_callback, &match); - if (ret < 0) - return ret; + if (IS_ENABLED(CONFIG_MODULES)) { + ret = module_kallsyms_on_each_symbol(NULL, filter_match_callback, &match); + if (ret < 0) + return ret; + } return match.index ?: -ENOENT; } @@ -543,24 +561,35 @@ static int fprobe_init(struct fprobe *fp, unsigned long *addrs, int num) */ int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter) { - unsigned long *addrs; - int ret; + unsigned long *addrs __free(kfree) = NULL; + struct module **mods __free(kfree) = NULL; + int ret, num; if (!fp || !filter) return -EINVAL; - ret = ip_list_from_filter(filter, notfilter, NULL, FPROBE_IPS_MAX); - if (ret < 0) - return ret; + num = get_ips_from_filter(filter, notfilter, NULL, NULL, FPROBE_IPS_MAX); + if (num < 0) + return num; - addrs = kcalloc(ret, sizeof(unsigned long), GFP_KERNEL); + addrs = kcalloc(num, sizeof(*addrs), GFP_KERNEL); if (!addrs) return -ENOMEM; - ret = ip_list_from_filter(filter, notfilter, addrs, ret); - if (ret > 0) - ret = register_fprobe_ips(fp, addrs, ret); - kfree(addrs); + mods = kcalloc(num, sizeof(*mods), GFP_KERNEL); + if (!mods) + return -ENOMEM; + + ret = get_ips_from_filter(filter, notfilter, addrs, mods, num); + if (ret < 0) + return ret; + + ret = register_fprobe_ips(fp, addrs, ret); + + for (int i = 0; i < num; i++) { + if (mods[i]) + module_put(mods[i]); + } return ret; } EXPORT_SYMBOL_GPL(register_fprobe); From dd941507a9486252d6fcf11814387666792020f3 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Mon, 31 Mar 2025 23:05:07 +0900 Subject: [PATCH 0042/2924] tracing: fprobe events: Fix possible UAF on modules Commit ac91052f0ae5 ("tracing: tprobe-events: Fix leakage of module refcount") moved try_module_get() from __find_tracepoint_module_cb() to find_tracepoint() caller, but that introduced a possible UAF because the module can be unloaded before try_module_get(). In this case, the module object should be freed too. Thus, try_module_get() does not only fail but may access to the freed object. To avoid that, try_module_get() in __find_tracepoint_module_cb() again. Link: https://lore.kernel.org/all/174342990779.781946.9138388479067729366.stgit@devnote2/ Fixes: ac91052f0ae5 ("tracing: tprobe-events: Fix leakage of module refcount") Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace_fprobe.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index 985ff98272da8f..2cd9ff1049f180 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -919,9 +919,15 @@ static void __find_tracepoint_module_cb(struct tracepoint *tp, struct module *mo struct __find_tracepoint_cb_data *data = priv; if (!data->tpoint && !strcmp(data->tp_name, tp->name)) { - data->tpoint = tp; - if (!data->mod) + /* If module is not specified, try getting module refcount. */ + if (!data->mod && mod) { + /* If failed to get refcount, ignore this tracepoint. */ + if (!try_module_get(mod)) + return; + data->mod = mod; + } + data->tpoint = tp; } } @@ -933,7 +939,11 @@ static void __find_tracepoint_cb(struct tracepoint *tp, void *priv) data->tpoint = tp; } -/* Find a tracepoint from kernel and module. */ +/* + * Find a tracepoint from kernel and module. If the tracepoint is on the module, + * the module's refcount is incremented and returned as *@tp_mod. Thus, if it is + * not NULL, caller must call module_put(*tp_mod) after used the tracepoint. + */ static struct tracepoint *find_tracepoint(const char *tp_name, struct module **tp_mod) { @@ -962,7 +972,10 @@ static void reenable_trace_fprobe(struct trace_fprobe *tf) } } -/* Find a tracepoint from specified module. */ +/* + * Find a tracepoint from specified module. In this case, this does not get the + * module's refcount. The caller must ensure the module is not freed. + */ static struct tracepoint *find_tracepoint_in_module(struct module *mod, const char *tp_name) { @@ -1169,11 +1182,6 @@ static int trace_fprobe_create_internal(int argc, const char *argv[], if (is_tracepoint) { ctx->flags |= TPARG_FL_TPOINT; tpoint = find_tracepoint(symbol, &tp_mod); - /* lock module until register this tprobe. */ - if (tp_mod && !try_module_get(tp_mod)) { - tpoint = NULL; - tp_mod = NULL; - } if (tpoint) { ctx->funcname = kallsyms_lookup( (unsigned long)tpoint->probestub, From 7ca59947b5fcf94e7ea4029d1bd0f7c41500a161 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 1 Apr 2025 12:28:59 +0200 Subject: [PATCH 0043/2924] pwm: mediatek: Prevent divide-by-zero in pwm_mediatek_config() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With CONFIG_COMPILE_TEST && !CONFIG_HAVE_CLK, pwm_mediatek_config() has a divide-by-zero in the following line: do_div(resolution, clk_get_rate(pc->clk_pwms[pwm->hwpwm])); due to the fact that the !CONFIG_HAVE_CLK version of clk_get_rate() returns zero. This is presumably just a theoretical problem: COMPILE_TEST overrides the dependency on RALINK which would select COMMON_CLK. Regardless it's a good idea to check for the error explicitly to avoid divide-by-zero. Fixes the following warning: drivers/pwm/pwm-mediatek.o: warning: objtool: .text: unexpected end of section Signed-off-by: Josh Poimboeuf Link: https://lore.kernel.org/r/fb56444939325cc173e752ba199abd7aeae3bf12.1742852847.git.jpoimboe@kernel.org [ukleinek: s/CONFIG_CLK/CONFIG_HAVE_CLK/] Fixes: caf065f8fd58 ("pwm: Add MediaTek PWM support") Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/9e78a0796acba3435553ed7db1c7965dcffa6215.1743501688.git.u.kleine-koenig@baylibre.com Signed-off-by: Uwe Kleine-König --- drivers/pwm/pwm-mediatek.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/pwm/pwm-mediatek.c b/drivers/pwm/pwm-mediatek.c index 01dfa0fab80a44..7eaab58314995c 100644 --- a/drivers/pwm/pwm-mediatek.c +++ b/drivers/pwm/pwm-mediatek.c @@ -121,21 +121,25 @@ static int pwm_mediatek_config(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_mediatek_chip *pc = to_pwm_mediatek_chip(chip); u32 clkdiv = 0, cnt_period, cnt_duty, reg_width = PWMDWIDTH, reg_thres = PWMTHRES; + unsigned long clk_rate; u64 resolution; int ret; ret = pwm_mediatek_clk_enable(chip, pwm); - if (ret < 0) return ret; + clk_rate = clk_get_rate(pc->clk_pwms[pwm->hwpwm]); + if (!clk_rate) + return -EINVAL; + /* Make sure we use the bus clock and not the 26MHz clock */ if (pc->soc->has_ck_26m_sel) writel(0, pc->regs + PWM_CK_26M_SEL); /* Using resolution in picosecond gets accuracy higher */ resolution = (u64)NSEC_PER_SEC * 1000; - do_div(resolution, clk_get_rate(pc->clk_pwms[pwm->hwpwm])); + do_div(resolution, clk_rate); cnt_period = DIV_ROUND_CLOSEST_ULL((u64)period_ns * 1000, resolution); while (cnt_period > 8191) { From 1f5bdd3b0c7000156d99faeed19bd522615b38e3 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Wed, 26 Mar 2025 12:06:59 +0800 Subject: [PATCH 0044/2924] smccc: kvm_guest: Remove unneeded semicolon Remove unnecessary semicolons reported by Coccinelle/coccicheck and the semantic patch at scripts/coccinelle/misc/semicolon.cocci. Signed-off-by: Chen Ni Link: https://lore.kernel.org/r/20250326040659.1190696-1-nichen@iscas.ac.cn Signed-off-by: Oliver Upton --- drivers/firmware/smccc/kvm_guest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/smccc/kvm_guest.c b/drivers/firmware/smccc/kvm_guest.c index ac2d3cf8a776a9..a123c05cbc9e66 100644 --- a/drivers/firmware/smccc/kvm_guest.c +++ b/drivers/firmware/smccc/kvm_guest.c @@ -103,7 +103,7 @@ void __init kvm_arm_target_impl_cpu_init(void) target[i].midr = res.a1; target[i].revidr = res.a2; target[i].aidr = res.a3; - }; + } if (!cpu_errata_set_target_impl(max_cpus, target)) { pr_warn("Failed to set target implementation CPUs\n"); From fb8a3eba9c812b67f9cf5531e5b55d13a51e938e Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 2 Apr 2025 13:17:23 -0700 Subject: [PATCH 0045/2924] KVM: arm64: Only read HPFAR_EL2 when value is architecturally valid KVM's logic for deciding when HPFAR_EL2 is UNKNOWN doesn't align with the architecture. Most notably, KVM assumes HPFAR_EL2 contains the faulting IPA even in the case of an SEA. Align the logic with the architecture rather than attempting to paraphrase it. Additionally, take the opportunity to improve the language around ARM erratum #834220 such that it actually describes the bug. Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20250402201725.2963645-2-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/include/asm/esr.h | 22 ++++++++++-- arch/arm64/kvm/hyp/include/hyp/fault.h | 46 ++++++++++++++++---------- 2 files changed, 48 insertions(+), 20 deletions(-) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index d1b1a33f9a8b0d..92fb26e9084057 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -121,6 +121,15 @@ #define ESR_ELx_FSC_SEA_TTW(n) (0x14 + (n)) #define ESR_ELx_FSC_SECC (0x18) #define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n)) +#define ESR_ELx_FSC_ADDRSZ (0x00) + +/* + * Annoyingly, the negative levels for Address size faults aren't laid out + * contiguously (or in the desired order) + */ +#define ESR_ELx_FSC_ADDRSZ_nL(n) ((n) == -1 ? 0x25 : 0x2C) +#define ESR_ELx_FSC_ADDRSZ_L(n) ((n) < 0 ? ESR_ELx_FSC_ADDRSZ_nL(n) : \ + (ESR_ELx_FSC_ADDRSZ + (n))) /* Status codes for individual page table levels */ #define ESR_ELx_FSC_ACCESS_L(n) (ESR_ELx_FSC_ACCESS + (n)) @@ -161,8 +170,6 @@ #define ESR_ELx_Xs_MASK (GENMASK_ULL(4, 0)) /* ISS field definitions for exceptions taken in to Hyp */ -#define ESR_ELx_FSC_ADDRSZ (0x00) -#define ESR_ELx_FSC_ADDRSZ_L(n) (ESR_ELx_FSC_ADDRSZ + (n)) #define ESR_ELx_CV (UL(1) << 24) #define ESR_ELx_COND_SHIFT (20) #define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) @@ -464,6 +471,17 @@ static inline bool esr_fsc_is_access_flag_fault(unsigned long esr) (esr == ESR_ELx_FSC_ACCESS_L(0)); } +static inline bool esr_fsc_is_addr_sz_fault(unsigned long esr) +{ + esr &= ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_ADDRSZ_L(3)) || + (esr == ESR_ELx_FSC_ADDRSZ_L(2)) || + (esr == ESR_ELx_FSC_ADDRSZ_L(1)) || + (esr == ESR_ELx_FSC_ADDRSZ_L(0)) || + (esr == ESR_ELx_FSC_ADDRSZ_L(-1)); +} + /* Indicate whether ESR.EC==0x1A is for an ERETAx instruction */ static inline bool esr_iss_is_eretax(unsigned long esr) { diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h index 17df94570f03a5..59409685c14f7d 100644 --- a/arch/arm64/kvm/hyp/include/hyp/fault.h +++ b/arch/arm64/kvm/hyp/include/hyp/fault.h @@ -44,31 +44,41 @@ static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) return true; } +/* + * Checks for the conditions when HPFAR_EL2 is written, per ARM ARM R_FKLWR. + */ +static inline bool __hpfar_valid(u64 esr) +{ + /* + * CPUs affected by ARM erratum #834220 may incorrectly report a + * stage-2 translation fault when a stage-1 permission fault occurs. + * + * Re-walk the page tables to determine if a stage-1 fault actually + * occurred. + */ + if (cpus_have_final_cap(ARM64_WORKAROUND_834220) && + esr_fsc_is_translation_fault(esr)) + return false; + + if (esr_fsc_is_translation_fault(esr) || esr_fsc_is_access_flag_fault(esr)) + return true; + + if ((esr & ESR_ELx_S1PTW) && esr_fsc_is_permission_fault(esr)) + return true; + + return esr_fsc_is_addr_sz_fault(esr); +} + static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault) { u64 hpfar, far; far = read_sysreg_el2(SYS_FAR); - /* - * The HPFAR can be invalid if the stage 2 fault did not - * happen during a stage 1 page table walk (the ESR_EL2.S1PTW - * bit is clear) and one of the two following cases are true: - * 1. The fault was due to a permission fault - * 2. The processor carries errata 834220 - * - * Therefore, for all non S1PTW faults where we either have a - * permission fault or the errata workaround is enabled, we - * resolve the IPA using the AT instruction. - */ - if (!(esr & ESR_ELx_S1PTW) && - (cpus_have_final_cap(ARM64_WORKAROUND_834220) || - esr_fsc_is_permission_fault(esr))) { - if (!__translate_far_to_hpfar(far, &hpfar)) - return false; - } else { + if (__hpfar_valid(esr)) hpfar = read_sysreg(hpfar_el2); - } + else if (!__translate_far_to_hpfar(far, &hpfar)) + return false; fault->far_el2 = far; fault->hpfar_el2 = hpfar; From 1cf3e126f1528cdcaf77524f48e54ccbcb029473 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 2 Apr 2025 13:17:24 -0700 Subject: [PATCH 0046/2924] arm64: Convert HPFAR_EL2 to sysreg table Switch over to the typical sysreg table for HPFAR_EL2 as we're about to start using more fields in the register. Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20250402201725.2963645-3-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_emulate.h | 4 +++- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 2 +- arch/arm64/tools/sysreg | 7 +++++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index d7cf66573acaff..44e3fc6483c8d0 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -305,7 +305,9 @@ static __always_inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vc static __always_inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu) { - return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8; + u64 hpfar = vcpu->arch.fault.hpfar_el2; + + return FIELD_GET(HPFAR_EL2_FIPA, hpfar) << 12; } static inline u64 kvm_vcpu_get_disr(const struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index f34f11c720d707..5ce2230054d984 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -578,7 +578,7 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) return; } - addr = (fault.hpfar_el2 & HPFAR_MASK) << 8; + addr = FIELD_GET(HPFAR_EL2_FIPA, fault.hpfar_el2) << 12; ret = host_stage2_idmap(addr); BUG_ON(ret && ret != -EAGAIN); } diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 2c63662c1a4898..31ad9ce2b91c51 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -3433,3 +3433,10 @@ Field 5 F Field 4 P Field 3:0 Align EndSysreg + +Sysreg HPFAR_EL2 3 4 6 0 4 +Field 63 NS +Res0 62:48 +Field 47:4 FIPA +Res0 3:0 +EndSysreg From 26fbdf36922711f285fd185ad644f0acdf15959f Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 2 Apr 2025 13:17:25 -0700 Subject: [PATCH 0047/2924] KVM: arm64: Don't translate FAR if invalid/unsafe Don't re-walk the page tables if an SEA occurred during the faulting page table walk to avoid taking a fatal exception in the hyp. Additionally, check that FAR_EL2 is valid for SEAs not taken on PTW as the architecture doesn't guarantee it contains the fault VA. Finally, fix up the rest of the abort path by checking for SEAs early and bugging the VM if we get further along with an UNKNOWN fault IPA. Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20250402201725.2963645-4-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/include/asm/esr.h | 22 ++++++++++++++++++ arch/arm64/include/asm/kvm_emulate.h | 3 +++ arch/arm64/include/asm/kvm_ras.h | 2 +- arch/arm64/kvm/hyp/include/hyp/fault.h | 26 ++++++++++++++++----- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 7 ++++++ arch/arm64/kvm/mmu.c | 31 ++++++++++++++++---------- 6 files changed, 73 insertions(+), 18 deletions(-) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 92fb26e9084057..e4f77757937e65 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -482,6 +482,28 @@ static inline bool esr_fsc_is_addr_sz_fault(unsigned long esr) (esr == ESR_ELx_FSC_ADDRSZ_L(-1)); } +static inline bool esr_fsc_is_sea_ttw(unsigned long esr) +{ + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_SEA_TTW(3)) || + (esr == ESR_ELx_FSC_SEA_TTW(2)) || + (esr == ESR_ELx_FSC_SEA_TTW(1)) || + (esr == ESR_ELx_FSC_SEA_TTW(0)) || + (esr == ESR_ELx_FSC_SEA_TTW(-1)); +} + +static inline bool esr_fsc_is_secc_ttw(unsigned long esr) +{ + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_SECC_TTW(3)) || + (esr == ESR_ELx_FSC_SECC_TTW(2)) || + (esr == ESR_ELx_FSC_SECC_TTW(1)) || + (esr == ESR_ELx_FSC_SECC_TTW(0)) || + (esr == ESR_ELx_FSC_SECC_TTW(-1)); +} + /* Indicate whether ESR.EC==0x1A is for an ERETAx instruction */ static inline bool esr_iss_is_eretax(unsigned long esr) { diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 44e3fc6483c8d0..bd020fc28aa9ca 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -307,6 +307,9 @@ static __always_inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu { u64 hpfar = vcpu->arch.fault.hpfar_el2; + if (unlikely(!(hpfar & HPFAR_EL2_NS))) + return INVALID_GPA; + return FIELD_GET(HPFAR_EL2_FIPA, hpfar) << 12; } diff --git a/arch/arm64/include/asm/kvm_ras.h b/arch/arm64/include/asm/kvm_ras.h index 87e10d9a635b55..9398ade632aaf9 100644 --- a/arch/arm64/include/asm/kvm_ras.h +++ b/arch/arm64/include/asm/kvm_ras.h @@ -14,7 +14,7 @@ * Was this synchronous external abort a RAS notification? * Returns '0' for errors handled by some RAS subsystem, or -ENOENT. */ -static inline int kvm_handle_guest_sea(phys_addr_t addr, u64 esr) +static inline int kvm_handle_guest_sea(void) { /* apei_claim_sea(NULL) expects to mask interrupts itself */ lockdep_assert_irqs_enabled(); diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h index 59409685c14f7d..fc573fc767b0e7 100644 --- a/arch/arm64/kvm/hyp/include/hyp/fault.h +++ b/arch/arm64/kvm/hyp/include/hyp/fault.h @@ -12,6 +12,16 @@ #include #include +static inline bool __fault_safe_to_translate(u64 esr) +{ + u64 fsc = esr & ESR_ELx_FSC; + + if (esr_fsc_is_sea_ttw(esr) || esr_fsc_is_secc_ttw(esr)) + return false; + + return !(fsc == ESR_ELx_FSC_EXTABT && (esr & ESR_ELx_FnV)); +} + static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) { int ret; @@ -71,17 +81,23 @@ static inline bool __hpfar_valid(u64 esr) static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault) { - u64 hpfar, far; + u64 hpfar; - far = read_sysreg_el2(SYS_FAR); + fault->far_el2 = read_sysreg_el2(SYS_FAR); + fault->hpfar_el2 = 0; if (__hpfar_valid(esr)) hpfar = read_sysreg(hpfar_el2); - else if (!__translate_far_to_hpfar(far, &hpfar)) + else if (unlikely(!__fault_safe_to_translate(esr))) + return true; + else if (!__translate_far_to_hpfar(fault->far_el2, &hpfar)) return false; - fault->far_el2 = far; - fault->hpfar_el2 = hpfar; + /* + * Hijack HPFAR_EL2.NS (RES0 in Non-secure) to indicate a valid + * HPFAR value. + */ + fault->hpfar_el2 = hpfar | HPFAR_EL2_NS; return true; } diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 5ce2230054d984..2a5284f749b427 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -578,7 +578,14 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) return; } + + /* + * Yikes, we couldn't resolve the fault IPA. This should reinject an + * abort into the host when we figure out how to do that. + */ + BUG_ON(!(fault.hpfar_el2 & HPFAR_EL2_NS)); addr = FIELD_GET(HPFAR_EL2_FIPA, fault.hpfar_el2) << 12; + ret = host_stage2_idmap(addr); BUG_ON(ret && ret != -EAGAIN); } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 2feb6c6b63af6c..754f2fe0cc6738 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1794,9 +1794,28 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) gfn_t gfn; int ret, idx; + /* Synchronous External Abort? */ + if (kvm_vcpu_abt_issea(vcpu)) { + /* + * For RAS the host kernel may handle this abort. + * There is no need to pass the error into the guest. + */ + if (kvm_handle_guest_sea()) + kvm_inject_vabt(vcpu); + + return 1; + } + esr = kvm_vcpu_get_esr(vcpu); + /* + * The fault IPA should be reliable at this point as we're not dealing + * with an SEA. + */ ipa = fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); + if (KVM_BUG_ON(ipa == INVALID_GPA, vcpu->kvm)) + return -EFAULT; + is_iabt = kvm_vcpu_trap_is_iabt(vcpu); if (esr_fsc_is_translation_fault(esr)) { @@ -1818,18 +1837,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) } } - /* Synchronous External Abort? */ - if (kvm_vcpu_abt_issea(vcpu)) { - /* - * For RAS the host kernel may handle this abort. - * There is no need to pass the error into the guest. - */ - if (kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_esr(vcpu))) - kvm_inject_vabt(vcpu); - - return 1; - } - trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu), kvm_vcpu_get_hfar(vcpu), fault_ipa); From 72eea84a1092b50a10eeecfeba4b28ac9f1312ab Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Tue, 18 Mar 2025 17:43:43 +0800 Subject: [PATCH 0048/2924] scsi: iscsi: Fix missing scsi_host_put() in error path Add goto to ensure scsi_host_put() is called in all error paths of iscsi_set_host_param() function. This fixes a potential memory leak when strlen() check fails. Fixes: ce51c8170084 ("scsi: iscsi: Add strlen() check in iscsi_if_set{_host}_param()") Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20250318094344.91776-1-linmq006@gmail.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 9c347c64c315f8..0b8c91bf793fcb 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -3182,11 +3182,14 @@ iscsi_set_host_param(struct iscsi_transport *transport, } /* see similar check in iscsi_if_set_param() */ - if (strlen(data) > ev->u.set_host_param.len) - return -EINVAL; + if (strlen(data) > ev->u.set_host_param.len) { + err = -EINVAL; + goto out; + } err = transport->set_host_param(shost, ev->u.set_host_param.param, data, ev->u.set_host_param.len); +out: scsi_host_put(shost); return err; } From 3d101165e72316775947d71321d97194f03dfef3 Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Wed, 19 Mar 2025 15:30:18 +0000 Subject: [PATCH 0049/2924] scsi: ufs: exynos: Ensure pre_link() executes before exynos_ufs_phy_init() Ensure clocks are enabled before configuring unipro. Additionally move the pre_link() hook before the exynos_ufs_phy_init() calls. This means the register write sequence more closely resembles the ordering of the downstream driver. Signed-off-by: Peter Griffin Link: https://lore.kernel.org/r/20250319-exynos-ufs-stability-fixes-v2-1-96722cc2ba1b@linaro.org Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-exynos.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/ufs/host/ufs-exynos.c b/drivers/ufs/host/ufs-exynos.c index 13dd5dfc03eb38..cd750786187cde 100644 --- a/drivers/ufs/host/ufs-exynos.c +++ b/drivers/ufs/host/ufs-exynos.c @@ -1049,9 +1049,14 @@ static int exynos_ufs_pre_link(struct ufs_hba *hba) exynos_ufs_config_intr(ufs, DFES_DEF_L4_ERRS, UNIPRO_L4); exynos_ufs_set_unipro_pclk_div(ufs); + exynos_ufs_setup_clocks(hba, true, PRE_CHANGE); + /* unipro */ exynos_ufs_config_unipro(ufs); + if (ufs->drv_data->pre_link) + ufs->drv_data->pre_link(ufs); + /* m-phy */ exynos_ufs_phy_init(ufs); if (!(ufs->opts & EXYNOS_UFS_OPT_SKIP_CONFIG_PHY_ATTR)) { @@ -1059,11 +1064,6 @@ static int exynos_ufs_pre_link(struct ufs_hba *hba) exynos_ufs_config_phy_cap_attr(ufs); } - exynos_ufs_setup_clocks(hba, true, PRE_CHANGE); - - if (ufs->drv_data->pre_link) - ufs->drv_data->pre_link(ufs); - return 0; } From 68f5ef7eebf0f41df4d38ea55a54c2462af1e3d6 Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Wed, 19 Mar 2025 15:30:19 +0000 Subject: [PATCH 0050/2924] scsi: ufs: exynos: Move UFS shareability value to drvdata gs101 I/O coherency shareability bits differ from exynosauto SoC. To support both SoCs move this info the SoC drvdata. Currently both the value and mask are the same for both gs101 and exynosauto, thus we use the same value. Signed-off-by: Peter Griffin Link: https://lore.kernel.org/r/20250319-exynos-ufs-stability-fixes-v2-2-96722cc2ba1b@linaro.org Fixes: d11e0a318df8 ("scsi: ufs: exynos: Add support for Tensor gs101 SoC") Cc: stable@vger.kernel.org Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-exynos.c | 20 ++++++++++++++------ drivers/ufs/host/ufs-exynos.h | 2 ++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/ufs/host/ufs-exynos.c b/drivers/ufs/host/ufs-exynos.c index cd750786187cde..1d4603f7622d18 100644 --- a/drivers/ufs/host/ufs-exynos.c +++ b/drivers/ufs/host/ufs-exynos.c @@ -92,11 +92,16 @@ UIC_TRANSPORT_NO_CONNECTION_RX |\ UIC_TRANSPORT_BAD_TC) -/* FSYS UFS Shareability */ -#define UFS_WR_SHARABLE BIT(2) -#define UFS_RD_SHARABLE BIT(1) -#define UFS_SHARABLE (UFS_WR_SHARABLE | UFS_RD_SHARABLE) -#define UFS_SHAREABILITY_OFFSET 0x710 +/* UFS Shareability */ +#define UFS_EXYNOSAUTO_WR_SHARABLE BIT(2) +#define UFS_EXYNOSAUTO_RD_SHARABLE BIT(1) +#define UFS_EXYNOSAUTO_SHARABLE (UFS_EXYNOSAUTO_WR_SHARABLE | \ + UFS_EXYNOSAUTO_RD_SHARABLE) +#define UFS_GS101_WR_SHARABLE BIT(1) +#define UFS_GS101_RD_SHARABLE BIT(0) +#define UFS_GS101_SHARABLE (UFS_GS101_WR_SHARABLE | \ + UFS_GS101_RD_SHARABLE) +#define UFS_SHAREABILITY_OFFSET 0x710 /* Multi-host registers */ #define MHCTRL 0xC4 @@ -210,7 +215,7 @@ static int exynos_ufs_shareability(struct exynos_ufs *ufs) if (ufs->sysreg) { return regmap_update_bits(ufs->sysreg, ufs->shareability_reg_offset, - UFS_SHARABLE, UFS_SHARABLE); + ufs->iocc_mask, ufs->iocc_mask); } return 0; @@ -1174,6 +1179,7 @@ static int exynos_ufs_parse_dt(struct device *dev, struct exynos_ufs *ufs) } } + ufs->iocc_mask = ufs->drv_data->iocc_mask; ufs->pclk_avail_min = PCLK_AVAIL_MIN; ufs->pclk_avail_max = PCLK_AVAIL_MAX; @@ -2034,6 +2040,7 @@ static const struct exynos_ufs_drv_data exynosauto_ufs_drvs = { .opts = EXYNOS_UFS_OPT_BROKEN_AUTO_CLK_CTRL | EXYNOS_UFS_OPT_SKIP_CONFIG_PHY_ATTR | EXYNOS_UFS_OPT_BROKEN_RX_SEL_IDX, + .iocc_mask = UFS_EXYNOSAUTO_SHARABLE, .drv_init = exynosauto_ufs_drv_init, .post_hce_enable = exynosauto_ufs_post_hce_enable, .pre_link = exynosauto_ufs_pre_link, @@ -2135,6 +2142,7 @@ static const struct exynos_ufs_drv_data gs101_ufs_drvs = { .opts = EXYNOS_UFS_OPT_SKIP_CONFIG_PHY_ATTR | EXYNOS_UFS_OPT_UFSPR_SECURE | EXYNOS_UFS_OPT_TIMER_TICK_SELECT, + .iocc_mask = UFS_GS101_SHARABLE, .drv_init = gs101_ufs_drv_init, .pre_link = gs101_ufs_pre_link, .post_link = gs101_ufs_post_link, diff --git a/drivers/ufs/host/ufs-exynos.h b/drivers/ufs/host/ufs-exynos.h index 9670dc138d1e49..ad49d9cdd5c122 100644 --- a/drivers/ufs/host/ufs-exynos.h +++ b/drivers/ufs/host/ufs-exynos.h @@ -181,6 +181,7 @@ struct exynos_ufs_drv_data { struct exynos_ufs_uic_attr *uic_attr; unsigned int quirks; unsigned int opts; + u32 iocc_mask; /* SoC's specific operations */ int (*drv_init)(struct exynos_ufs *ufs); int (*pre_link)(struct exynos_ufs *ufs); @@ -231,6 +232,7 @@ struct exynos_ufs { const struct exynos_ufs_drv_data *drv_data; struct regmap *sysreg; u32 shareability_reg_offset; + u32 iocc_mask; u32 opts; #define EXYNOS_UFS_OPT_HAS_APB_CLK_CTRL BIT(0) From f92bb7436802f8eb7ee72dc911a33c8897fde366 Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Wed, 19 Mar 2025 15:30:20 +0000 Subject: [PATCH 0051/2924] scsi: ufs: exynos: Disable iocc if dma-coherent property isn't set If dma-coherent property isn't set then descriptors are non-cacheable and the iocc shareability bits should be disabled. Without this UFS can end up in an incompatible configuration and suffer from random cache related stability issues. Suggested-by: Bart Van Assche Fixes: cc52e15397cc ("scsi: ufs: ufs-exynos: Support ExynosAuto v9 UFS") Signed-off-by: Peter Griffin Link: https://lore.kernel.org/r/20250319-exynos-ufs-stability-fixes-v2-3-96722cc2ba1b@linaro.org Cc: Chanho Park Cc: stable@vger.kernel.org Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-exynos.c | 17 +++++++++++++---- drivers/ufs/host/ufs-exynos.h | 3 ++- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/ufs/host/ufs-exynos.c b/drivers/ufs/host/ufs-exynos.c index 1d4603f7622d18..533904a4c1a728 100644 --- a/drivers/ufs/host/ufs-exynos.c +++ b/drivers/ufs/host/ufs-exynos.c @@ -214,8 +214,8 @@ static int exynos_ufs_shareability(struct exynos_ufs *ufs) /* IO Coherency setting */ if (ufs->sysreg) { return regmap_update_bits(ufs->sysreg, - ufs->shareability_reg_offset, - ufs->iocc_mask, ufs->iocc_mask); + ufs->iocc_offset, + ufs->iocc_mask, ufs->iocc_val); } return 0; @@ -1173,13 +1173,22 @@ static int exynos_ufs_parse_dt(struct device *dev, struct exynos_ufs *ufs) ufs->sysreg = NULL; else { if (of_property_read_u32_index(np, "samsung,sysreg", 1, - &ufs->shareability_reg_offset)) { + &ufs->iocc_offset)) { dev_warn(dev, "can't get an offset from sysreg. Set to default value\n"); - ufs->shareability_reg_offset = UFS_SHAREABILITY_OFFSET; + ufs->iocc_offset = UFS_SHAREABILITY_OFFSET; } } ufs->iocc_mask = ufs->drv_data->iocc_mask; + /* + * no 'dma-coherent' property means the descriptors are + * non-cacheable so iocc shareability should be disabled. + */ + if (of_dma_is_coherent(dev->of_node)) + ufs->iocc_val = ufs->iocc_mask; + else + ufs->iocc_val = 0; + ufs->pclk_avail_min = PCLK_AVAIL_MIN; ufs->pclk_avail_max = PCLK_AVAIL_MAX; diff --git a/drivers/ufs/host/ufs-exynos.h b/drivers/ufs/host/ufs-exynos.h index ad49d9cdd5c122..d0b3df221503c6 100644 --- a/drivers/ufs/host/ufs-exynos.h +++ b/drivers/ufs/host/ufs-exynos.h @@ -231,8 +231,9 @@ struct exynos_ufs { ktime_t entry_hibern8_t; const struct exynos_ufs_drv_data *drv_data; struct regmap *sysreg; - u32 shareability_reg_offset; + u32 iocc_offset; u32 iocc_mask; + u32 iocc_val; u32 opts; #define EXYNOS_UFS_OPT_HAS_APB_CLK_CTRL BIT(0) From 7f05fd9a3b6fb3a9abc5a748307d11831c03175f Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Wed, 19 Mar 2025 15:30:21 +0000 Subject: [PATCH 0052/2924] scsi: ufs: exynos: Ensure consistent phy reference counts ufshcd_link_startup() can call ufshcd_vops_link_startup_notify() multiple times when retrying. This causes the phy reference count to keep increasing and the phy to not properly re-initialize. If the phy has already been previously powered on, first issue a phy_power_off() and phy_exit(), before re-initializing and powering on again. Signed-off-by: Peter Griffin Link: https://lore.kernel.org/r/20250319-exynos-ufs-stability-fixes-v2-4-96722cc2ba1b@linaro.org Fixes: 3d73b200f989 ("scsi: ufs: ufs-exynos: Change ufs phy control sequence") Cc: stable@vger.kernel.org Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-exynos.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/ufs/host/ufs-exynos.c b/drivers/ufs/host/ufs-exynos.c index 533904a4c1a728..e77b3c63e69811 100644 --- a/drivers/ufs/host/ufs-exynos.c +++ b/drivers/ufs/host/ufs-exynos.c @@ -962,6 +962,12 @@ static int exynos_ufs_phy_init(struct exynos_ufs *ufs) } phy_set_bus_width(generic_phy, ufs->avail_ln_rx); + + if (generic_phy->power_count) { + phy_power_off(generic_phy); + phy_exit(generic_phy); + } + ret = phy_init(generic_phy); if (ret) { dev_err(hba->dev, "%s: phy init failed, ret = %d\n", From deac9ad496ec17e1ec06848964ecc635bdaca703 Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Wed, 19 Mar 2025 15:30:22 +0000 Subject: [PATCH 0053/2924] scsi: ufs: exynos: Enable PRDT pre-fetching with UFSHCD_CAP_CRYPTO PRDT_PREFETCH_ENABLE[31] bit should be set when desctype field of fmpsecurity0 register is type2 (double file encryption) or type3 (support for file and disk encryption). Setting this bit enables PRDT pre-fetching on both TXPRDT and RXPRDT. Signed-off-by: Peter Griffin Link: https://lore.kernel.org/r/20250319-exynos-ufs-stability-fixes-v2-5-96722cc2ba1b@linaro.org Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-exynos.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/ufs/host/ufs-exynos.c b/drivers/ufs/host/ufs-exynos.c index e77b3c63e69811..ac71b878d872ff 100644 --- a/drivers/ufs/host/ufs-exynos.c +++ b/drivers/ufs/host/ufs-exynos.c @@ -34,7 +34,7 @@ * Exynos's Vendor specific registers for UFSHCI */ #define HCI_TXPRDT_ENTRY_SIZE 0x00 -#define PRDT_PREFECT_EN BIT(31) +#define PRDT_PREFETCH_EN BIT(31) #define HCI_RXPRDT_ENTRY_SIZE 0x04 #define HCI_1US_TO_CNT_VAL 0x0C #define CNT_VAL_1US_MASK 0x3FF @@ -1098,12 +1098,17 @@ static int exynos_ufs_post_link(struct ufs_hba *hba) struct exynos_ufs *ufs = ufshcd_get_variant(hba); struct phy *generic_phy = ufs->phy; struct exynos_ufs_uic_attr *attr = ufs->drv_data->uic_attr; + u32 val = ilog2(DATA_UNIT_SIZE); exynos_ufs_establish_connt(ufs); exynos_ufs_fit_aggr_timeout(ufs); hci_writel(ufs, 0xa, HCI_DATA_REORDER); - hci_writel(ufs, ilog2(DATA_UNIT_SIZE), HCI_TXPRDT_ENTRY_SIZE); + + if (hba->caps & UFSHCD_CAP_CRYPTO) + val |= PRDT_PREFETCH_EN; + hci_writel(ufs, val, HCI_TXPRDT_ENTRY_SIZE); + hci_writel(ufs, ilog2(DATA_UNIT_SIZE), HCI_RXPRDT_ENTRY_SIZE); hci_writel(ufs, (1 << hba->nutrs) - 1, HCI_UTRL_NEXUS_TYPE); hci_writel(ufs, (1 << hba->nutmrs) - 1, HCI_UTMRL_NEXUS_TYPE); From 67e4085015c33bf2fb552af1f171c58b81ef0616 Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Wed, 19 Mar 2025 15:30:23 +0000 Subject: [PATCH 0054/2924] scsi: ufs: exynos: Move phy calls to .exit() callback ufshcd_pltfrm_remove() calls ufshcd_remove(hba) which in turn calls ufshcd_hba_exit(). By moving the phy_power_off() and phy_exit() calls to the newly created .exit callback they get called by ufshcd_variant_hba_exit() before ufshcd_hba_exit() turns off the regulators. This is also similar flow to the ufs-qcom driver. Signed-off-by: Peter Griffin Link: https://lore.kernel.org/r/20250319-exynos-ufs-stability-fixes-v2-6-96722cc2ba1b@linaro.org Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-exynos.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/ufs/host/ufs-exynos.c b/drivers/ufs/host/ufs-exynos.c index ac71b878d872ff..b9fbc78be74ee8 100644 --- a/drivers/ufs/host/ufs-exynos.c +++ b/drivers/ufs/host/ufs-exynos.c @@ -1522,6 +1522,14 @@ static int exynos_ufs_init(struct ufs_hba *hba) return ret; } +static void exynos_ufs_exit(struct ufs_hba *hba) +{ + struct exynos_ufs *ufs = ufshcd_get_variant(hba); + + phy_power_off(ufs->phy); + phy_exit(ufs->phy); +} + static int exynos_ufs_host_reset(struct ufs_hba *hba) { struct exynos_ufs *ufs = ufshcd_get_variant(hba); @@ -1977,6 +1985,7 @@ static int gs101_ufs_pre_pwr_change(struct exynos_ufs *ufs, static const struct ufs_hba_variant_ops ufs_hba_exynos_ops = { .name = "exynos_ufs", .init = exynos_ufs_init, + .exit = exynos_ufs_exit, .hce_enable_notify = exynos_ufs_hce_enable_notify, .link_startup_notify = exynos_ufs_link_startup_notify, .pwr_change_notify = exynos_ufs_pwr_change_notify, @@ -2015,13 +2024,7 @@ static int exynos_ufs_probe(struct platform_device *pdev) static void exynos_ufs_remove(struct platform_device *pdev) { - struct ufs_hba *hba = platform_get_drvdata(pdev); - struct exynos_ufs *ufs = ufshcd_get_variant(hba); - ufshcd_pltfrm_remove(pdev); - - phy_power_off(ufs->phy); - phy_exit(ufs->phy); } static struct exynos_ufs_uic_attr exynos7_uic_attr = { From cd4c0025069f16fc666c6ffc56c49c9b1154841f Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Wed, 19 Mar 2025 15:30:24 +0000 Subject: [PATCH 0055/2924] scsi: ufs: exynos: gs101: Put UFS device in reset on .suspend() GPIO_OUT[0] is connected to the reset pin of embedded UFS device. Before powering off the phy assert the reset signal. This is added as a gs101 specific suspend hook so as not to have any unintended consequences for other SoCs supported by this driver. Signed-off-by: Peter Griffin Link: https://lore.kernel.org/r/20250319-exynos-ufs-stability-fixes-v2-7-96722cc2ba1b@linaro.org Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-exynos.c | 10 ++++++++++ drivers/ufs/host/ufs-exynos.h | 1 + 2 files changed, 11 insertions(+) diff --git a/drivers/ufs/host/ufs-exynos.c b/drivers/ufs/host/ufs-exynos.c index b9fbc78be74ee8..2436b9454480b0 100644 --- a/drivers/ufs/host/ufs-exynos.c +++ b/drivers/ufs/host/ufs-exynos.c @@ -1700,6 +1700,12 @@ static void exynos_ufs_hibern8_notify(struct ufs_hba *hba, } } +static int gs101_ufs_suspend(struct exynos_ufs *ufs) +{ + hci_writel(ufs, 0 << 0, HCI_GPIO_OUT); + return 0; +} + static int exynos_ufs_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op, enum ufs_notify_change_status status) { @@ -1708,6 +1714,9 @@ static int exynos_ufs_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op, if (status == PRE_CHANGE) return 0; + if (ufs->drv_data->suspend) + ufs->drv_data->suspend(ufs); + if (!ufshcd_is_link_active(hba)) phy_power_off(ufs->phy); @@ -2170,6 +2179,7 @@ static const struct exynos_ufs_drv_data gs101_ufs_drvs = { .pre_link = gs101_ufs_pre_link, .post_link = gs101_ufs_post_link, .pre_pwr_change = gs101_ufs_pre_pwr_change, + .suspend = gs101_ufs_suspend, }; static const struct of_device_id exynos_ufs_of_match[] = { diff --git a/drivers/ufs/host/ufs-exynos.h b/drivers/ufs/host/ufs-exynos.h index d0b3df221503c6..3c6fe5132190ab 100644 --- a/drivers/ufs/host/ufs-exynos.h +++ b/drivers/ufs/host/ufs-exynos.h @@ -192,6 +192,7 @@ struct exynos_ufs_drv_data { struct ufs_pa_layer_attr *pwr); int (*pre_hce_enable)(struct exynos_ufs *ufs); int (*post_hce_enable)(struct exynos_ufs *ufs); + int (*suspend)(struct exynos_ufs *ufs); }; struct ufs_phy_time_cfg { From f7b705c238d1483f0a766e2b20010f176e5c0fb7 Mon Sep 17 00:00:00 2001 From: Igor Pylypiv Date: Wed, 19 Mar 2025 23:03:05 +0000 Subject: [PATCH 0056/2924] scsi: pm80xx: Set phy_attached to zero when device is gone When a fatal error occurs, a phy down event may not be received to set phy->phy_attached to zero. Signed-off-by: Igor Pylypiv Signed-off-by: Salomon Dushimirimana Link: https://lore.kernel.org/r/20250319230305.3172920-1-salomondush@google.com Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm8001_sas.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index 183ce00aa671ea..f7067878b34f3b 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -766,6 +766,7 @@ static void pm8001_dev_gone_notify(struct domain_device *dev) spin_lock_irqsave(&pm8001_ha->lock, flags); } PM8001_CHIP_DISP->dereg_dev_req(pm8001_ha, device_id); + pm8001_ha->phy[pm8001_dev->attached_phy].phy_attached = 0; pm8001_free_dev(pm8001_dev); } else { pm8001_dbg(pm8001_ha, DISC, "Found dev has gone.\n"); From a2d5a0072235a69749ceb04c1a26dc75df66a31a Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Fri, 21 Mar 2025 23:33:19 +0100 Subject: [PATCH 0057/2924] scsi: smartpqi: Use is_kdump_kernel() to check for kdump The smartpqi driver checks the reset_devices variable to determine whether special adjustments need to be made for kdump. This has the effect that after a regular kexec reboot, some driver parameters such as max_transfer_size are much lower than usual. More importantly, kexec reboot tests have revealed memory corruption caused by the driver log being written to system memory after a kexec. Fix this by testing is_kdump_kernel() rather than reset_devices where appropriate. Fixes: 058311b72f54 ("scsi: smartpqi: Add fw log to kdump") Signed-off-by: Martin Wilck Link: https://lore.kernel.org/r/20250321223319.109250-1-mwilck@suse.com Cc: Randy Wright Acked-by: Don Brace Tested-by: Don Brace Reviewed-by: Lee Duncan Signed-off-by: Martin K. Petersen --- drivers/scsi/smartpqi/smartpqi_init.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 0da7be40c92580..e790b5d4e3c70a 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -5246,7 +5247,7 @@ static void pqi_calculate_io_resources(struct pqi_ctrl_info *ctrl_info) ctrl_info->error_buffer_length = ctrl_info->max_io_slots * PQI_ERROR_BUFFER_ELEMENT_LENGTH; - if (reset_devices) + if (is_kdump_kernel()) max_transfer_size = min(ctrl_info->max_transfer_size, PQI_MAX_TRANSFER_SIZE_KDUMP); else @@ -5275,7 +5276,7 @@ static void pqi_calculate_queue_resources(struct pqi_ctrl_info *ctrl_info) u16 num_elements_per_iq; u16 num_elements_per_oq; - if (reset_devices) { + if (is_kdump_kernel()) { num_queue_groups = 1; } else { int num_cpus; @@ -8288,12 +8289,12 @@ static int pqi_ctrl_init(struct pqi_ctrl_info *ctrl_info) u32 product_id; if (reset_devices) { - if (pqi_is_fw_triage_supported(ctrl_info)) { + if (is_kdump_kernel() && pqi_is_fw_triage_supported(ctrl_info)) { rc = sis_wait_for_fw_triage_completion(ctrl_info); if (rc) return rc; } - if (sis_is_ctrl_logging_supported(ctrl_info)) { + if (is_kdump_kernel() && sis_is_ctrl_logging_supported(ctrl_info)) { sis_notify_kdump(ctrl_info); rc = sis_wait_for_ctrl_logging_completion(ctrl_info); if (rc) @@ -8344,7 +8345,7 @@ static int pqi_ctrl_init(struct pqi_ctrl_info *ctrl_info) ctrl_info->product_id = (u8)product_id; ctrl_info->product_revision = (u8)(product_id >> 8); - if (reset_devices) { + if (is_kdump_kernel()) { if (ctrl_info->max_outstanding_requests > PQI_MAX_OUTSTANDING_REQUESTS_KDUMP) ctrl_info->max_outstanding_requests = @@ -8480,7 +8481,7 @@ static int pqi_ctrl_init(struct pqi_ctrl_info *ctrl_info) if (rc) return rc; - if (ctrl_info->ctrl_logging_supported && !reset_devices) { + if (ctrl_info->ctrl_logging_supported && !is_kdump_kernel()) { pqi_host_setup_buffer(ctrl_info, &ctrl_info->ctrl_log_memory, PQI_CTRL_LOG_TOTAL_SIZE, PQI_CTRL_LOG_MIN_SIZE); pqi_host_memory_update(ctrl_info, &ctrl_info->ctrl_log_memory, PQI_VENDOR_GENERAL_CTRL_LOG_MEMORY_UPDATE); } From bdab40480b146e2f37f4c7164cb47f526e77ee6d Mon Sep 17 00:00:00 2001 From: "Bao D. Nguyen" Date: Fri, 28 Mar 2025 13:17:11 -0700 Subject: [PATCH 0058/2924] scsi: ufs: core: Rename ufshcd_wb_presrv_usrspc_keep_vcc_on() The ufshcd_wb_presrv_usrspc_keep_vcc_on() function has deviated from its original implementation. The "_keep_vcc_on" part of the function name is misleading. Rename the function to ufshcd_wb_curr_buff_threshold_check() to improve the readability. Also, updated the comments in the function. There is no change to the functionality. Signed-off-by: Bao D. Nguyen Link: https://lore.kernel.org/r/02ae5e133f6ebf23b54d943e6d1d9de2544eb80e.1743192926.git.quic_nguyenb@quicinc.com Reviewed-by: Avri Altman Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 3288a7da73dc70..f317232433f939 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -6068,7 +6068,7 @@ int ufshcd_wb_toggle_buf_flush(struct ufs_hba *hba, bool enable) return ret; } -static bool ufshcd_wb_presrv_usrspc_keep_vcc_on(struct ufs_hba *hba, +static bool ufshcd_wb_curr_buff_threshold_check(struct ufs_hba *hba, u32 avail_buf) { u32 cur_buf; @@ -6150,15 +6150,13 @@ static bool ufshcd_wb_need_flush(struct ufs_hba *hba) } /* - * The ufs device needs the vcc to be ON to flush. * With user-space reduction enabled, it's enough to enable flush * by checking only the available buffer. The threshold * defined here is > 90% full. * With user-space preserved enabled, the current-buffer * should be checked too because the wb buffer size can reduce * when disk tends to be full. This info is provided by current - * buffer (dCurrentWriteBoosterBufferSize). There's no point in - * keeping vcc on when current buffer is empty. + * buffer (dCurrentWriteBoosterBufferSize). */ index = ufshcd_wb_get_query_index(hba); ret = ufshcd_query_attr_retry(hba, UPIU_QUERY_OPCODE_READ_ATTR, @@ -6173,7 +6171,7 @@ static bool ufshcd_wb_need_flush(struct ufs_hba *hba) if (!hba->dev_info.b_presrv_uspc_en) return avail_buf <= UFS_WB_BUF_REMAIN_PERCENT(10); - return ufshcd_wb_presrv_usrspc_keep_vcc_on(hba, avail_buf); + return ufshcd_wb_curr_buff_threshold_check(hba, avail_buf); } static void ufshcd_rpm_dev_flush_recheck_work(struct work_struct *work) From 1fd2e77b889761d9bde0c580518689d1d8e83117 Mon Sep 17 00:00:00 2001 From: "Bao D. Nguyen" Date: Fri, 28 Mar 2025 14:46:13 -0700 Subject: [PATCH 0059/2924] scsi: ufs: core: Add device level exception support The ufs device JEDEC specification version 4.1 adds support for the device level exception events. To support this new device level exception feature, expose two new sysfs nodes below to provide the user space access to the device level exception information. /sys/bus/platform/drivers/ufshcd/*/device_lvl_exception_count /sys/bus/platform/drivers/ufshcd/*/device_lvl_exception_id The device_lvl_exception_count sysfs node reports the number of device level exceptions that have occurred since the last time this variable is reset. Writing a value of 0 will reset it. The device_lvl_exception_id reports the exception ID which is the qDeviceLevelExceptionID attribute of the device JEDEC specifications version 4.1 and later. The user space application can query these sysfs nodes to get more information about the device level exception. Signed-off-by: Bao D. Nguyen Link: https://lore.kernel.org/r/6278d7c125b2f0cf5056f4a647a4b9c1fdd24fc7.1743198325.git.quic_nguyenb@quicinc.com Reviewed-by: Peter Wang Reviewed-by: Bart Van Assche Reviewed-by: Arthur Simchaev Signed-off-by: Martin K. Petersen --- Documentation/ABI/testing/sysfs-driver-ufs | 32 ++++++++++++ drivers/ufs/core/ufs-sysfs.c | 54 ++++++++++++++++++++ drivers/ufs/core/ufshcd-priv.h | 1 + drivers/ufs/core/ufshcd.c | 59 ++++++++++++++++++++++ include/ufs/ufs.h | 5 +- include/ufs/ufshcd.h | 5 ++ 6 files changed, 155 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-driver-ufs b/Documentation/ABI/testing/sysfs-driver-ufs index ae0191295d29b8..e36d2de16cbdad 100644 --- a/Documentation/ABI/testing/sysfs-driver-ufs +++ b/Documentation/ABI/testing/sysfs-driver-ufs @@ -1604,3 +1604,35 @@ Description: prevent the UFS from frequently performing clock gating/ungating. The attribute is read/write. + +What: /sys/bus/platform/drivers/ufshcd/*/device_lvl_exception_count +What: /sys/bus/platform/devices/*.ufs/device_lvl_exception_count +Date: March 2025 +Contact: Bao D. Nguyen +Description: + This attribute is applicable to ufs devices compliant to the + JEDEC specifications version 4.1 or later. The + device_lvl_exception_count is a counter indicating the number of + times the device level exceptions have occurred since the last + time this variable is reset. Writing a 0 value to this + attribute will reset the device_lvl_exception_count. If the + device_lvl_exception_count reads a positive value, the user + application should read the device_lvl_exception_id attribute to + know more information about the exception. + + The attribute is read/write. + +What: /sys/bus/platform/drivers/ufshcd/*/device_lvl_exception_id +What: /sys/bus/platform/devices/*.ufs/device_lvl_exception_id +Date: March 2025 +Contact: Bao D. Nguyen +Description: + Reading the device_lvl_exception_id returns the + qDeviceLevelExceptionID attribute of the ufs device JEDEC + specification version 4.1. The definition of the + qDeviceLevelExceptionID is the ufs device vendor specific + implementation. Refer to the device manufacturer datasheet for + more information on the meaning of the qDeviceLevelExceptionID + attribute value. + + The attribute is read only. diff --git a/drivers/ufs/core/ufs-sysfs.c b/drivers/ufs/core/ufs-sysfs.c index 90b5ab60f5ae4a..634cf163f4cb10 100644 --- a/drivers/ufs/core/ufs-sysfs.c +++ b/drivers/ufs/core/ufs-sysfs.c @@ -466,6 +466,56 @@ static ssize_t critical_health_show(struct device *dev, return sysfs_emit(buf, "%d\n", hba->critical_health_count); } +static ssize_t device_lvl_exception_count_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + + if (hba->dev_info.wspecversion < 0x410) + return -EOPNOTSUPP; + + return sysfs_emit(buf, "%u\n", atomic_read(&hba->dev_lvl_exception_count)); +} + +static ssize_t device_lvl_exception_count_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + unsigned int value; + + if (kstrtouint(buf, 0, &value)) + return -EINVAL; + + /* the only supported usecase is to reset the dev_lvl_exception_count */ + if (value) + return -EINVAL; + + atomic_set(&hba->dev_lvl_exception_count, 0); + + return count; +} + +static ssize_t device_lvl_exception_id_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + u64 exception_id; + int err; + + ufshcd_rpm_get_sync(hba); + err = ufshcd_read_device_lvl_exception_id(hba, &exception_id); + ufshcd_rpm_put_sync(hba); + + if (err) + return err; + + hba->dev_lvl_exception_id = exception_id; + return sysfs_emit(buf, "%llu\n", exception_id); +} + static DEVICE_ATTR_RW(rpm_lvl); static DEVICE_ATTR_RO(rpm_target_dev_state); static DEVICE_ATTR_RO(rpm_target_link_state); @@ -479,6 +529,8 @@ static DEVICE_ATTR_RW(wb_flush_threshold); static DEVICE_ATTR_RW(rtc_update_ms); static DEVICE_ATTR_RW(pm_qos_enable); static DEVICE_ATTR_RO(critical_health); +static DEVICE_ATTR_RW(device_lvl_exception_count); +static DEVICE_ATTR_RO(device_lvl_exception_id); static struct attribute *ufs_sysfs_ufshcd_attrs[] = { &dev_attr_rpm_lvl.attr, @@ -494,6 +546,8 @@ static struct attribute *ufs_sysfs_ufshcd_attrs[] = { &dev_attr_rtc_update_ms.attr, &dev_attr_pm_qos_enable.attr, &dev_attr_critical_health.attr, + &dev_attr_device_lvl_exception_count.attr, + &dev_attr_device_lvl_exception_id.attr, NULL }; diff --git a/drivers/ufs/core/ufshcd-priv.h b/drivers/ufs/core/ufshcd-priv.h index 10b4a19a70f10e..d0a2c963a27d37 100644 --- a/drivers/ufs/core/ufshcd-priv.h +++ b/drivers/ufs/core/ufshcd-priv.h @@ -94,6 +94,7 @@ int ufshcd_exec_raw_upiu_cmd(struct ufs_hba *hba, enum query_opcode desc_op); int ufshcd_wb_toggle(struct ufs_hba *hba, bool enable); +int ufshcd_read_device_lvl_exception_id(struct ufs_hba *hba, u64 *exception_id); /* Wrapper functions for safely calling variant operations */ static inline const char *ufshcd_get_var_name(struct ufs_hba *hba) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index f317232433f939..4c1a92218b269b 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -5998,6 +5998,42 @@ static void ufshcd_bkops_exception_event_handler(struct ufs_hba *hba) __func__, err); } +int ufshcd_read_device_lvl_exception_id(struct ufs_hba *hba, u64 *exception_id) +{ + struct utp_upiu_query_v4_0 *upiu_resp; + struct ufs_query_req *request = NULL; + struct ufs_query_res *response = NULL; + int err; + + if (hba->dev_info.wspecversion < 0x410) + return -EOPNOTSUPP; + + ufshcd_hold(hba); + mutex_lock(&hba->dev_cmd.lock); + + ufshcd_init_query(hba, &request, &response, + UPIU_QUERY_OPCODE_READ_ATTR, + QUERY_ATTR_IDN_DEV_LVL_EXCEPTION_ID, 0, 0); + + request->query_func = UPIU_QUERY_FUNC_STANDARD_READ_REQUEST; + + err = ufshcd_exec_dev_cmd(hba, DEV_CMD_TYPE_QUERY, QUERY_REQ_TIMEOUT); + + if (err) { + dev_err(hba->dev, "%s: failed to read device level exception %d\n", + __func__, err); + goto out; + } + + upiu_resp = (struct utp_upiu_query_v4_0 *)response; + *exception_id = get_unaligned_be64(&upiu_resp->osf3); +out: + mutex_unlock(&hba->dev_cmd.lock); + ufshcd_release(hba); + + return err; +} + static int __ufshcd_wb_toggle(struct ufs_hba *hba, bool set, enum flag_idn idn) { u8 index; @@ -6223,6 +6259,11 @@ static void ufshcd_exception_event_handler(struct work_struct *work) sysfs_notify(&hba->dev->kobj, NULL, "critical_health"); } + if (status & hba->ee_drv_mask & MASK_EE_DEV_LVL_EXCEPTION) { + atomic_inc(&hba->dev_lvl_exception_count); + sysfs_notify(&hba->dev->kobj, NULL, "device_lvl_exception_count"); + } + ufs_debugfs_exception_event(hba, status); } @@ -8122,6 +8163,22 @@ static void ufshcd_temp_notif_probe(struct ufs_hba *hba, const u8 *desc_buf) } } +static void ufshcd_device_lvl_exception_probe(struct ufs_hba *hba, u8 *desc_buf) +{ + u32 ext_ufs_feature; + + if (hba->dev_info.wspecversion < 0x410) + return; + + ext_ufs_feature = get_unaligned_be32(desc_buf + + DEVICE_DESC_PARAM_EXT_UFS_FEATURE_SUP); + if (!(ext_ufs_feature & UFS_DEV_LVL_EXCEPTION_SUP)) + return; + + atomic_set(&hba->dev_lvl_exception_count, 0); + ufshcd_enable_ee(hba, MASK_EE_DEV_LVL_EXCEPTION); +} + static void ufshcd_set_rtt(struct ufs_hba *hba) { struct ufs_dev_info *dev_info = &hba->dev_info; @@ -8322,6 +8379,8 @@ static int ufs_get_device_desc(struct ufs_hba *hba) ufs_init_rtc(hba, desc_buf); + ufshcd_device_lvl_exception_probe(hba, desc_buf); + /* * ufshcd_read_string_desc returns size of the string * reset the error value diff --git a/include/ufs/ufs.h b/include/ufs/ufs.h index 8a24ed59ec46f0..1c47136d8715f3 100644 --- a/include/ufs/ufs.h +++ b/include/ufs/ufs.h @@ -180,7 +180,8 @@ enum attr_idn { QUERY_ATTR_IDN_AVAIL_WB_BUFF_SIZE = 0x1D, QUERY_ATTR_IDN_WB_BUFF_LIFE_TIME_EST = 0x1E, QUERY_ATTR_IDN_CURR_WB_BUFF_SIZE = 0x1F, - QUERY_ATTR_IDN_TIMESTAMP = 0x30 + QUERY_ATTR_IDN_TIMESTAMP = 0x30, + QUERY_ATTR_IDN_DEV_LVL_EXCEPTION_ID = 0x34, }; /* Descriptor idn for Query requests */ @@ -390,6 +391,7 @@ enum { UFS_DEV_EXT_TEMP_NOTIF = BIT(6), UFS_DEV_HPB_SUPPORT = BIT(7), UFS_DEV_WRITE_BOOSTER_SUP = BIT(8), + UFS_DEV_LVL_EXCEPTION_SUP = BIT(12), }; #define UFS_DEV_HPB_SUPPORT_VERSION 0x310 @@ -419,6 +421,7 @@ enum { MASK_EE_TOO_LOW_TEMP = BIT(4), MASK_EE_WRITEBOOSTER_EVENT = BIT(5), MASK_EE_PERFORMANCE_THROTTLING = BIT(6), + MASK_EE_DEV_LVL_EXCEPTION = BIT(7), MASK_EE_HEALTH_CRITICAL = BIT(9), }; #define MASK_EE_URGENT_TEMP (MASK_EE_TOO_HIGH_TEMP | MASK_EE_TOO_LOW_TEMP) diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index f56050ce944598..e928ed0265ffc8 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -968,6 +968,9 @@ enum ufshcd_mcq_opr { * @pm_qos_req: PM QoS request handle * @pm_qos_enabled: flag to check if pm qos is enabled * @critical_health_count: count of critical health exceptions + * @dev_lvl_exception_count: count of device level exceptions since last reset + * @dev_lvl_exception_id: vendor specific information about the + * device level exception event. */ struct ufs_hba { void __iomem *mmio_base; @@ -1138,6 +1141,8 @@ struct ufs_hba { bool pm_qos_enabled; int critical_health_count; + atomic_t dev_lvl_exception_count; + u64 dev_lvl_exception_id; }; /** From a63b69f05f999acae91b0b50d7c5fe4fb241dbaf Mon Sep 17 00:00:00 2001 From: Li Haoran Date: Mon, 31 Mar 2025 15:55:11 +0800 Subject: [PATCH 0060/2924] scsi: scsi_transport_srp: Replace min/max nesting with clamp() This patch replaces min(a, max(b, c)) by clamp(val, lo, hi) in the SRP transport layer. The clamp() macro explicitly expresses the intent of constraining a value within bounds, improving code readability. Signed-off-by: Li Haoran Signed-off-by: Shao Mingyin Link: https://lore.kernel.org/r/202503311555115618U8Md16mKpRYOIy2TOmB6@zte.com.cn Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_srp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index 64f6b22e8cc0c9..aeb58a9e6b7f1d 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -388,7 +388,7 @@ static void srp_reconnect_work(struct work_struct *work) "reconnect attempt %d failed (%d)\n", ++rport->failed_reconnects, res); delay = rport->reconnect_delay * - min(100, max(1, rport->failed_reconnects - 10)); + clamp(rport->failed_reconnects - 10, 1, 100); if (delay > 0) queue_delayed_work(system_long_wq, &rport->reconnect_work, delay * HZ); From aad9945623ab4029ae7789609fb6166c97976c62 Mon Sep 17 00:00:00 2001 From: Chandrakanth Patil Date: Thu, 3 Apr 2025 01:07:34 +0530 Subject: [PATCH 0061/2924] scsi: megaraid_sas: Block zero-length ATA VPD inquiry A firmware bug was observed where ATA VPD inquiry commands with a zero-length data payload were not handled and failed with a non-standard status code of 0xf0. Avoid sending ATA VPD inquiry commands without data payload by setting the device no_vpd_size flag to 1. In addition, if the firmware returns a status code of 0xf0, set scsi_cmnd->result to CHECK_CONDITION to facilitate proper error handling. Suggested-by: Martin K. Petersen Signed-off-by: Chandrakanth Patil Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250402193735.5098-1-chandrakanth.patil@broadcom.com Tested-by: Ryan Lahfa Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_base.c | 9 +++++++-- drivers/scsi/megaraid/megaraid_sas_fusion.c | 5 ++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 28c75865967af3..d793924547f80e 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -2103,6 +2103,9 @@ static int megasas_sdev_configure(struct scsi_device *sdev, /* This sdev property may change post OCR */ megasas_set_dynamic_target_properties(sdev, lim, is_target_prop); + if (!MEGASAS_IS_LOGICAL(sdev)) + sdev->no_vpd_size = 1; + mutex_unlock(&instance->reset_mutex); return 0; @@ -3662,8 +3665,10 @@ megasas_complete_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd, case MFI_STAT_SCSI_IO_FAILED: case MFI_STAT_LD_INIT_IN_PROGRESS: - cmd->scmd->result = - (DID_ERROR << 16) | hdr->scsi_status; + if (hdr->scsi_status == 0xf0) + cmd->scmd->result = (DID_ERROR << 16) | SAM_STAT_CHECK_CONDITION; + else + cmd->scmd->result = (DID_ERROR << 16) | hdr->scsi_status; break; case MFI_STAT_SCSI_DONE_WITH_ERROR: diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 1eec23da28e2d6..1eea4df9e47d35 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -2043,7 +2043,10 @@ map_cmd_status(struct fusion_context *fusion, case MFI_STAT_SCSI_IO_FAILED: case MFI_STAT_LD_INIT_IN_PROGRESS: - scmd->result = (DID_ERROR << 16) | ext_status; + if (ext_status == 0xf0) + scmd->result = (DID_ERROR << 16) | SAM_STAT_CHECK_CONDITION; + else + scmd->result = (DID_ERROR << 16) | ext_status; break; case MFI_STAT_SCSI_DONE_WITH_ERROR: From 1b4902f0a4f20aaea14d51a378368fa697467901 Mon Sep 17 00:00:00 2001 From: Chandrakanth Patil Date: Thu, 3 Apr 2025 01:07:35 +0530 Subject: [PATCH 0062/2924] scsi: megaraid_sas: Driver version update to 07.734.00.00-rc1 Signed-off-by: Chandrakanth Patil Link: https://lore.kernel.org/r/20250402193735.5098-2-chandrakanth.patil@broadcom.com Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index 088cc40ae866a0..8ee2bfe475715a 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -23,8 +23,8 @@ /* * MegaRAID SAS Driver meta data */ -#define MEGASAS_VERSION "07.727.03.00-rc1" -#define MEGASAS_RELDATE "Oct 03, 2023" +#define MEGASAS_VERSION "07.734.00.00-rc1" +#define MEGASAS_RELDATE "Apr 03, 2025" #define MEGASAS_MSIX_NAME_LEN 32 From 7fb6afa9125fc111478615e24231943c4f76cc2e Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Wed, 15 Jan 2025 09:58:59 +0100 Subject: [PATCH 0063/2924] drm/sti: remove duplicate object names When merging 2 drivers common object files were not deduplicated. Fixes: dcec16efd677 ("drm/sti: Build monolithic driver") Cc: stable@kernel.org Signed-off-by: Rolf Eike Beer Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/1920148.tdWV9SEqCh@devpool47.emlix.com Signed-off-by: Raphael Gallais-Pou --- drivers/gpu/drm/sti/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/sti/Makefile b/drivers/gpu/drm/sti/Makefile index f203ac5514ae0b..f778a4eee7c9cf 100644 --- a/drivers/gpu/drm/sti/Makefile +++ b/drivers/gpu/drm/sti/Makefile @@ -7,8 +7,6 @@ sti-drm-y := \ sti_compositor.o \ sti_crtc.o \ sti_plane.o \ - sti_crtc.o \ - sti_plane.o \ sti_hdmi.o \ sti_hdmi_tx3g4c28phy.o \ sti_dvo.o \ From aed06d36ba4e7fe90f2d4a85835cee7c80ea72a7 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sat, 22 Feb 2025 01:35:30 +0000 Subject: [PATCH 0064/2924] ceph: Remove osd_client deadcode osd_req_op_extent_osd_data_pagelist() was added in 2013 as part of commit a4ce40a9a7c1 ("libceph: combine initializing and setting osd data") but never used. The last use of osd_req_op_cls_request_data_pagelist() was removed in 2017's commit ecd4a68a26a2 ("rbd: switch rbd_obj_method_sync() to ceph_osdc_call()") Remove them. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Viacheslav Dubeyko Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 6 ------ net/ceph/osd_client.c | 23 ----------------------- 2 files changed, 29 deletions(-) diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index d55b30057a4555..50b14a5661c7a4 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -490,9 +490,6 @@ extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *, struct page **pages, u64 length, u32 alignment, bool pages_from_pool, bool own_pages); -extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *, - unsigned int which, - struct ceph_pagelist *pagelist); #ifdef CONFIG_BLOCK void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req, unsigned int which, @@ -509,9 +506,6 @@ void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req, void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req, unsigned int which, struct iov_iter *iter); -extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *, - unsigned int which, - struct ceph_pagelist *pagelist); extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *, unsigned int which, struct page **pages, u64 length, diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index b24afec241382b..6664ea73ccf81b 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -220,16 +220,6 @@ void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *osd_req, } EXPORT_SYMBOL(osd_req_op_extent_osd_data_pages); -void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *osd_req, - unsigned int which, struct ceph_pagelist *pagelist) -{ - struct ceph_osd_data *osd_data; - - osd_data = osd_req_op_data(osd_req, which, extent, osd_data); - ceph_osd_data_pagelist_init(osd_data, pagelist); -} -EXPORT_SYMBOL(osd_req_op_extent_osd_data_pagelist); - #ifdef CONFIG_BLOCK void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req, unsigned int which, @@ -297,19 +287,6 @@ static void osd_req_op_cls_request_info_pagelist( ceph_osd_data_pagelist_init(osd_data, pagelist); } -void osd_req_op_cls_request_data_pagelist( - struct ceph_osd_request *osd_req, - unsigned int which, struct ceph_pagelist *pagelist) -{ - struct ceph_osd_data *osd_data; - - osd_data = osd_req_op_data(osd_req, which, cls, request_data); - ceph_osd_data_pagelist_init(osd_data, pagelist); - osd_req->r_ops[which].cls.indata_len += pagelist->length; - osd_req->r_ops[which].indata_len += pagelist->length; -} -EXPORT_SYMBOL(osd_req_op_cls_request_data_pagelist); - void osd_req_op_cls_request_data_pages(struct ceph_osd_request *osd_req, unsigned int which, struct page **pages, u64 length, u32 alignment, bool pages_from_pool, bool own_pages) From f452a2204614fc10e2c3b85904c4bd300c2789dc Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 12 Mar 2025 10:47:11 +0000 Subject: [PATCH 0065/2924] ceph: Fix incorrect flush end position calculation In ceph, in fill_fscrypt_truncate(), the end flush position is calculated by: loff_t lend = orig_pos + CEPH_FSCRYPT_BLOCK_SHIFT - 1; but that's using the block shift not the block size. Fix this to use the block size instead. Fixes: 5c64737d2536 ("ceph: add truncate size handling support for fscrypt") Signed-off-by: David Howells Reviewed-by: Viacheslav Dubeyko Signed-off-by: Ilya Dryomov --- fs/ceph/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 7dd6c2275085b9..e3ab07797c8508 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -2362,7 +2362,7 @@ static int fill_fscrypt_truncate(struct inode *inode, /* Try to writeback the dirty pagecaches */ if (issued & (CEPH_CAP_FILE_BUFFER)) { - loff_t lend = orig_pos + CEPH_FSCRYPT_BLOCK_SHIFT - 1; + loff_t lend = orig_pos + CEPH_FSCRYPT_BLOCK_SIZE - 1; ret = filemap_write_and_wait_range(inode->i_mapping, orig_pos, lend); From 72070e57b0a518ec8e562a2b68fdfc796ef5c040 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 4 Apr 2025 08:18:49 +0800 Subject: [PATCH 0066/2924] selftests: ublk: fix test_stripe_04 Commit 57ed58c13256 ("selftests: ublk: enable zero copy for stripe target") added test entry of test_stripe_04, but forgot to add the test script. So fix the test by adding the script file. Reported-by: Uday Shankar Signed-off-by: Ming Lei Reviewed-by: Uday Shankar Link: https://lore.kernel.org/r/20250404001849.1443064-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- .../testing/selftests/ublk/test_stripe_04.sh | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100755 tools/testing/selftests/ublk/test_stripe_04.sh diff --git a/tools/testing/selftests/ublk/test_stripe_04.sh b/tools/testing/selftests/ublk/test_stripe_04.sh new file mode 100755 index 00000000000000..1f2b642381d179 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stripe_04.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="stripe_04" +ERR_CODE=0 + +_prep_test "stripe" "mkfs & mount & umount on zero copy" + +backfile_0=$(_create_backfile 256M) +backfile_1=$(_create_backfile 256M) +dev_id=$(_add_ublk_dev -t stripe -z -q 2 "$backfile_0" "$backfile_1") +_check_add_dev $TID $? "$backfile_0" "$backfile_1" + +_mkfs_mount_test /dev/ublkb"${dev_id}" +ERR_CODE=$? + +_cleanup_test "stripe" + +_remove_backfile "$backfile_0" +_remove_backfile "$backfile_1" + +_show_result $TID $ERR_CODE From e7327c193014a4d8666e9c1cda09cf2c060518e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 1 Apr 2025 12:29:00 +0200 Subject: [PATCH 0067/2924] pwm: rcar: Improve register calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There were several issues in the function rcar_pwm_set_counter(): - The u64 values period_ns and duty_ns were cast to int on function call which might loose bits on 32 bit architectures. Fix: Make parameters to rcar_pwm_set_counter() u64 - The algorithm divided by the result of a division which looses precision. Fix: Make use of mul_u64_u64_div_u64() - The calculated values were just masked to fit the respective register fields which again might loose bits. Fix: Explicitly check for overlow Implement the respective fixes. A side effect of fixing the 2nd issue is that there is no division by 0 if clk_get_rate() returns 0. Fixes: ed6c1476bf7f ("pwm: Add support for R-Car PWM Timer") Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/ab3dac794b2216cc1cc56d65c93dd164f8bd461b.1743501688.git.u.kleine-koenig@baylibre.com [ukleinek: Added an explicit #include to please the 0day build bot] Link: https://lore.kernel.org/oe-kbuild-all/202504031354.VJtxScP5-lkp@intel.com/ Reviewed-by: Geert Uytterhoeven Signed-off-by: Uwe Kleine-König --- drivers/pwm/pwm-rcar.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/pwm/pwm-rcar.c b/drivers/pwm/pwm-rcar.c index 2261789cc27dae..578dbdd2d5a721 100644 --- a/drivers/pwm/pwm-rcar.c +++ b/drivers/pwm/pwm-rcar.c @@ -8,6 +8,7 @@ * - The hardware cannot generate a 0% duty cycle. */ +#include #include #include #include @@ -102,23 +103,24 @@ static void rcar_pwm_set_clock_control(struct rcar_pwm_chip *rp, rcar_pwm_write(rp, value, RCAR_PWMCR); } -static int rcar_pwm_set_counter(struct rcar_pwm_chip *rp, int div, int duty_ns, - int period_ns) +static int rcar_pwm_set_counter(struct rcar_pwm_chip *rp, int div, u64 duty_ns, + u64 period_ns) { - unsigned long long one_cycle, tmp; /* 0.01 nanoseconds */ + unsigned long long tmp; unsigned long clk_rate = clk_get_rate(rp->clk); u32 cyc, ph; - one_cycle = NSEC_PER_SEC * 100ULL << div; - do_div(one_cycle, clk_rate); + /* div <= 24 == RCAR_PWM_MAX_DIVISION, so the shift doesn't overflow. */ + tmp = mul_u64_u64_div_u64(period_ns, clk_rate, (u64)NSEC_PER_SEC << div); + if (tmp > FIELD_MAX(RCAR_PWMCNT_CYC0_MASK)) + tmp = FIELD_MAX(RCAR_PWMCNT_CYC0_MASK); - tmp = period_ns * 100ULL; - do_div(tmp, one_cycle); - cyc = (tmp << RCAR_PWMCNT_CYC0_SHIFT) & RCAR_PWMCNT_CYC0_MASK; + cyc = FIELD_PREP(RCAR_PWMCNT_CYC0_MASK, tmp); - tmp = duty_ns * 100ULL; - do_div(tmp, one_cycle); - ph = tmp & RCAR_PWMCNT_PH0_MASK; + tmp = mul_u64_u64_div_u64(duty_ns, clk_rate, (u64)NSEC_PER_SEC << div); + if (tmp > FIELD_MAX(RCAR_PWMCNT_PH0_MASK)) + tmp = FIELD_MAX(RCAR_PWMCNT_PH0_MASK); + ph = FIELD_PREP(RCAR_PWMCNT_PH0_MASK, tmp); /* Avoid prohibited setting */ if (cyc == 0 || ph == 0) From 928446a5302eee30ebb32075c0db5dda5a138fb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 1 Apr 2025 12:29:01 +0200 Subject: [PATCH 0068/2924] pwm: fsl-ftm: Handle clk_get_rate() returning 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Considering that the driver doesn't enable the used clocks (and also that clk_get_rate() returns 0 if CONFIG_HAVE_CLK is unset) better check the return value of clk_get_rate() for being non-zero before dividing by it. Fixes: 3479bbd1e1f8 ("pwm: fsl-ftm: More relaxed permissions for updating period") Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/b68351a51017035651bc62ad3146afcb706874f0.1743501688.git.u.kleine-koenig@baylibre.com Signed-off-by: Uwe Kleine-König --- drivers/pwm/pwm-fsl-ftm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/pwm/pwm-fsl-ftm.c b/drivers/pwm/pwm-fsl-ftm.c index 2510c10ca47303..c45a5fca4cbbd2 100644 --- a/drivers/pwm/pwm-fsl-ftm.c +++ b/drivers/pwm/pwm-fsl-ftm.c @@ -118,6 +118,9 @@ static unsigned int fsl_pwm_ticks_to_ns(struct fsl_pwm_chip *fpc, unsigned long long exval; rate = clk_get_rate(fpc->clk[fpc->period.clk_select]); + if (rate >> fpc->period.clk_ps == 0) + return 0; + exval = ticks; exval *= 1000000000UL; do_div(exval, rate >> fpc->period.clk_ps); @@ -190,6 +193,9 @@ static unsigned int fsl_pwm_calculate_duty(struct fsl_pwm_chip *fpc, unsigned int period = fpc->period.mod_period + 1; unsigned int period_ns = fsl_pwm_ticks_to_ns(fpc, period); + if (!period_ns) + return 0; + duty = (unsigned long long)duty_ns * period; do_div(duty, period_ns); From 80fd663590cf4c6a7baaa405cd65060469c95eca Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 20 Mar 2025 11:42:55 -0400 Subject: [PATCH 0069/2924] selftests: kvm: revamp MONITOR/MWAIT tests Run each testcase in a separate VMs to cover more possibilities; move WRMSR close to MONITOR/MWAIT to test updating CPUID bits while in the VM. Signed-off-by: Paolo Bonzini --- .../selftests/kvm/x86/monitor_mwait_test.c | 108 +++++++++--------- 1 file changed, 57 insertions(+), 51 deletions(-) diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c index 2b550eff35f1b3..390ae2d874932b 100644 --- a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c +++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c @@ -7,6 +7,7 @@ #include "kvm_util.h" #include "processor.h" +#include "kselftest.h" #define CPUID_MWAIT (1u << 3) @@ -14,6 +15,8 @@ enum monitor_mwait_testcases { MWAIT_QUIRK_DISABLED = BIT(0), MISC_ENABLES_QUIRK_DISABLED = BIT(1), MWAIT_DISABLED = BIT(2), + CPUID_DISABLED = BIT(3), + TEST_MAX = CPUID_DISABLED * 2 - 1, }; /* @@ -35,11 +38,19 @@ do { \ testcase, vector); \ } while (0) -static void guest_monitor_wait(int testcase) +static void guest_monitor_wait(void *arg) { + int testcase = (int) (long) arg; u8 vector; - GUEST_SYNC(testcase); + u64 val = rdmsr(MSR_IA32_MISC_ENABLE) & ~MSR_IA32_MISC_ENABLE_MWAIT; + if (!(testcase & MWAIT_DISABLED)) + val |= MSR_IA32_MISC_ENABLE_MWAIT; + wrmsr(MSR_IA32_MISC_ENABLE, val); + + __GUEST_ASSERT(this_cpu_has(X86_FEATURE_MWAIT) == !(testcase & MWAIT_DISABLED), + "Expected CPUID.MWAIT %s\n", + (testcase & MWAIT_DISABLED) ? "cleared" : "set"); /* * Arbitrarily MONITOR this function, SVM performs fault checks before @@ -50,19 +61,6 @@ static void guest_monitor_wait(int testcase) vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0)); GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector); -} - -static void guest_code(void) -{ - guest_monitor_wait(MWAIT_DISABLED); - - guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED); - - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED); - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED); - - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED); - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED); GUEST_DONE(); } @@ -74,56 +72,64 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; struct ucall uc; int testcase; + char test[80]; - TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2)); - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT); + ksft_print_header(); + ksft_set_plan(12); + for (testcase = 0; testcase <= TEST_MAX; testcase++) { + vm = vm_create_with_one_vcpu(&vcpu, guest_monitor_wait); + vcpu_args_set(vcpu, 1, (void *)(long)testcase); + + disabled_quirks = 0; + if (testcase & MWAIT_QUIRK_DISABLED) { + disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS; + strcpy(test, "MWAIT can fault"); + } else { + strcpy(test, "MWAIT never faults"); + } + if (testcase & MISC_ENABLES_QUIRK_DISABLED) { + disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT; + strcat(test, ", MISC_ENABLE updates CPUID"); + } else { + strcat(test, ", no CPUID updates"); + } + + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks); + + if (!(testcase & MISC_ENABLES_QUIRK_DISABLED) && + (!!(testcase & CPUID_DISABLED) ^ !!(testcase & MWAIT_DISABLED))) + continue; + + if (testcase & CPUID_DISABLED) { + strcat(test, ", CPUID clear"); + vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT); + } else { + strcat(test, ", CPUID set"); + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_MWAIT); + } + + if (testcase & MWAIT_DISABLED) + strcat(test, ", MWAIT disabled"); - while (1) { vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - testcase = uc.args[1]; - break; case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - goto done; + /* Detected in vcpu_run */ + break; case UCALL_DONE: - goto done; + ksft_test_result_pass("%s\n", test); + break; default: TEST_FAIL("Unknown ucall %lu", uc.cmd); - goto done; - } - - disabled_quirks = 0; - if (testcase & MWAIT_QUIRK_DISABLED) - disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS; - if (testcase & MISC_ENABLES_QUIRK_DISABLED) - disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT; - vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks); - - /* - * If the MISC_ENABLES quirk (KVM neglects to update CPUID to - * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT - * bit in MISC_ENABLES accordingly. If the quirk is enabled, - * the only valid configuration is MWAIT disabled, as CPUID - * can't be manually changed after running the vCPU. - */ - if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) { - TEST_ASSERT(testcase & MWAIT_DISABLED, - "Can't toggle CPUID features after running vCPU"); - continue; + break; } - - vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE, - (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT); + kvm_vm_free(vm); } + ksft_finished(); -done: - kvm_vm_free(vm); return 0; } From 11934771e7e79dcf4528803f9e3299b214c36f30 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 1 Apr 2025 00:18:51 +0200 Subject: [PATCH 0070/2924] selftests: kvm: bring list of exit reasons up to date Signed-off-by: Paolo Bonzini Message-ID: <20250331221851.614582-1-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/kvm_util.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 279ad8946040cb..815bc45dd8dc68 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -2019,9 +2019,8 @@ static struct exit_reason { KVM_EXIT_STRING(RISCV_SBI), KVM_EXIT_STRING(RISCV_CSR), KVM_EXIT_STRING(NOTIFY), -#ifdef KVM_EXIT_MEMORY_NOT_PRESENT - KVM_EXIT_STRING(MEMORY_NOT_PRESENT), -#endif + KVM_EXIT_STRING(LOONGARCH_IOCSR), + KVM_EXIT_STRING(MEMORY_FAULT), }; /* From c57047f6f37906cc4f6a4fec1683f87731f25248 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 1 Apr 2025 16:13:27 +0200 Subject: [PATCH 0071/2924] selftests: kvm: list once tests that are valid on all architectures Several tests cover infrastructure from virt/kvm/ and userspace APIs that have only minimal requirements from architecture-specific code. As such, they are available on all architectures that have libkvm support, and this presumably will apply also in the future (for example if loongarch gets selftests support). Put them in a separate variable and list them only once. Signed-off-by: Paolo Bonzini Message-ID: <20250401141327.785520-1-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile.kvm | 45 ++++++++---------------- 1 file changed, 15 insertions(+), 30 deletions(-) diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index f773f8f9924945..f62b0a5aba35a0 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -50,8 +50,18 @@ LIBKVM_riscv += lib/riscv/ucall.c # Non-compiled test targets TEST_PROGS_x86 += x86/nx_huge_pages_test.sh +# Compiled test targets valid on all architectures with libkvm support +TEST_GEN_PROGS_COMMON = demand_paging_test +TEST_GEN_PROGS_COMMON += dirty_log_test +TEST_GEN_PROGS_COMMON += guest_print_test +TEST_GEN_PROGS_COMMON += kvm_binary_stats_test +TEST_GEN_PROGS_COMMON += kvm_create_max_vcpus +TEST_GEN_PROGS_COMMON += kvm_page_table_test +TEST_GEN_PROGS_COMMON += set_memory_region_test + # Compiled test targets -TEST_GEN_PROGS_x86 = x86/cpuid_test +TEST_GEN_PROGS_x86 = $(TEST_GEN_PROGS_COMMON) +TEST_GEN_PROGS_x86 += x86/cpuid_test TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test TEST_GEN_PROGS_x86 += x86/feature_msrs_test @@ -119,27 +129,21 @@ TEST_GEN_PROGS_x86 += x86/triple_fault_event_test TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test TEST_GEN_PROGS_x86 += access_tracking_perf_test TEST_GEN_PROGS_x86 += coalesced_io_test -TEST_GEN_PROGS_x86 += demand_paging_test -TEST_GEN_PROGS_x86 += dirty_log_test TEST_GEN_PROGS_x86 += dirty_log_perf_test TEST_GEN_PROGS_x86 += guest_memfd_test -TEST_GEN_PROGS_x86 += guest_print_test TEST_GEN_PROGS_x86 += hardware_disable_test -TEST_GEN_PROGS_x86 += kvm_create_max_vcpus -TEST_GEN_PROGS_x86 += kvm_page_table_test TEST_GEN_PROGS_x86 += memslot_modification_stress_test TEST_GEN_PROGS_x86 += memslot_perf_test TEST_GEN_PROGS_x86 += mmu_stress_test TEST_GEN_PROGS_x86 += rseq_test -TEST_GEN_PROGS_x86 += set_memory_region_test TEST_GEN_PROGS_x86 += steal_time -TEST_GEN_PROGS_x86 += kvm_binary_stats_test TEST_GEN_PROGS_x86 += system_counter_offset_test TEST_GEN_PROGS_x86 += pre_fault_memory_test # Compiled outputs used by test targets TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test +TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON) TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases TEST_GEN_PROGS_arm64 += arm64/debug-exceptions @@ -158,22 +162,16 @@ TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3 TEST_GEN_PROGS_arm64 += access_tracking_perf_test TEST_GEN_PROGS_arm64 += arch_timer TEST_GEN_PROGS_arm64 += coalesced_io_test -TEST_GEN_PROGS_arm64 += demand_paging_test -TEST_GEN_PROGS_arm64 += dirty_log_test TEST_GEN_PROGS_arm64 += dirty_log_perf_test -TEST_GEN_PROGS_arm64 += guest_print_test TEST_GEN_PROGS_arm64 += get-reg-list -TEST_GEN_PROGS_arm64 += kvm_create_max_vcpus -TEST_GEN_PROGS_arm64 += kvm_page_table_test TEST_GEN_PROGS_arm64 += memslot_modification_stress_test TEST_GEN_PROGS_arm64 += memslot_perf_test TEST_GEN_PROGS_arm64 += mmu_stress_test TEST_GEN_PROGS_arm64 += rseq_test -TEST_GEN_PROGS_arm64 += set_memory_region_test TEST_GEN_PROGS_arm64 += steal_time -TEST_GEN_PROGS_arm64 += kvm_binary_stats_test -TEST_GEN_PROGS_s390 = s390/memop +TEST_GEN_PROGS_s390 = $(TEST_GEN_PROGS_COMMON) +TEST_GEN_PROGS_s390 += s390/memop TEST_GEN_PROGS_s390 += s390/resets TEST_GEN_PROGS_s390 += s390/sync_regs_test TEST_GEN_PROGS_s390 += s390/tprot @@ -182,27 +180,14 @@ TEST_GEN_PROGS_s390 += s390/debug_test TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test TEST_GEN_PROGS_s390 += s390/shared_zeropage_test TEST_GEN_PROGS_s390 += s390/ucontrol_test -TEST_GEN_PROGS_s390 += demand_paging_test -TEST_GEN_PROGS_s390 += dirty_log_test -TEST_GEN_PROGS_s390 += guest_print_test -TEST_GEN_PROGS_s390 += kvm_create_max_vcpus -TEST_GEN_PROGS_s390 += kvm_page_table_test TEST_GEN_PROGS_s390 += rseq_test -TEST_GEN_PROGS_s390 += set_memory_region_test -TEST_GEN_PROGS_s390 += kvm_binary_stats_test +TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON) TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test TEST_GEN_PROGS_riscv += riscv/ebreak_test TEST_GEN_PROGS_riscv += arch_timer TEST_GEN_PROGS_riscv += coalesced_io_test -TEST_GEN_PROGS_riscv += demand_paging_test -TEST_GEN_PROGS_riscv += dirty_log_test TEST_GEN_PROGS_riscv += get-reg-list -TEST_GEN_PROGS_riscv += guest_print_test -TEST_GEN_PROGS_riscv += kvm_binary_stats_test -TEST_GEN_PROGS_riscv += kvm_create_max_vcpus -TEST_GEN_PROGS_riscv += kvm_page_table_test -TEST_GEN_PROGS_riscv += set_memory_region_test TEST_GEN_PROGS_riscv += steal_time SPLIT_TESTS += arch_timer From f3e555ba45da361f5286b35921c7ca8afbef6384 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 31 Mar 2025 17:05:50 +0200 Subject: [PATCH 0072/2924] Documentation: KVM: KVM_GET_SUPPORTED_CPUID now exposes TSC_DEADLINE TSC_DEADLINE is now advertised unconditionally by KVM_GET_SUPPORTED_CPUID, since commit 9be4ec35d668 ("KVM: x86: Advertise TSC_DEADLINE_TIMER in KVM_GET_SUPPORTED_CPUID", 2024-12-18). Adjust the documentation to reflect the new behavior. Signed-off-by: Paolo Bonzini Reviewed-by: Sean Christopherson Message-ID: <20250331150550.510320-1-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 1f8625b7646a2f..0ec40765ccd4ec 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -9076,9 +9076,10 @@ the local APIC. The same is true for the ``KVM_FEATURE_PV_UNHALT`` paravirtualized feature. -CPU[EAX=1]:ECX[24] (TSC_DEADLINE) is not reported by ``KVM_GET_SUPPORTED_CPUID``. -It can be enabled if ``KVM_CAP_TSC_DEADLINE_TIMER`` is present and the kernel -has enabled in-kernel emulation of the local APIC. +On older versions of Linux, CPU[EAX=1]:ECX[24] (TSC_DEADLINE) is not reported by +``KVM_GET_SUPPORTED_CPUID``, but it can be enabled if ``KVM_CAP_TSC_DEADLINE_TIMER`` +is present and the kernel has enabled in-kernel emulation of the local APIC. +On newer versions, ``KVM_GET_SUPPORTED_CPUID`` does report the bit as available. CPU topology ~~~~~~~~~~~~ From 0405d4b63d082861f4eaff9d39c78ee9dc34f845 Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Fri, 4 Apr 2025 13:31:29 +0800 Subject: [PATCH 0073/2924] isofs: Prevent the use of too small fid syzbot reported a slab-out-of-bounds Read in isofs_fh_to_parent. [1] The handle_bytes value passed in by the reproducing program is equal to 12. In handle_to_path(), only 12 bytes of memory are allocated for the structure file_handle->f_handle member, which causes an out-of-bounds access when accessing the member parent_block of the structure isofs_fid in isofs, because accessing parent_block requires at least 16 bytes of f_handle. Here, fh_len is used to indirectly confirm that the value of handle_bytes is greater than 3 before accessing parent_block. [1] BUG: KASAN: slab-out-of-bounds in isofs_fh_to_parent+0x1b8/0x210 fs/isofs/export.c:183 Read of size 4 at addr ffff0000cc030d94 by task syz-executor215/6466 CPU: 1 UID: 0 PID: 6466 Comm: syz-executor215 Not tainted 6.14.0-rc7-syzkaller-ga2392f333575 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 Call trace: show_stack+0x2c/0x3c arch/arm64/kernel/stacktrace.c:466 (C) __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0xe4/0x150 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:408 [inline] print_report+0x198/0x550 mm/kasan/report.c:521 kasan_report+0xd8/0x138 mm/kasan/report.c:634 __asan_report_load4_noabort+0x20/0x2c mm/kasan/report_generic.c:380 isofs_fh_to_parent+0x1b8/0x210 fs/isofs/export.c:183 exportfs_decode_fh_raw+0x2dc/0x608 fs/exportfs/expfs.c:523 do_handle_to_path+0xa0/0x198 fs/fhandle.c:257 handle_to_path fs/fhandle.c:385 [inline] do_handle_open+0x8cc/0xb8c fs/fhandle.c:403 __do_sys_open_by_handle_at fs/fhandle.c:443 [inline] __se_sys_open_by_handle_at fs/fhandle.c:434 [inline] __arm64_sys_open_by_handle_at+0x80/0x94 fs/fhandle.c:434 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:49 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:132 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:151 el0_svc+0x54/0x168 arch/arm64/kernel/entry-common.c:744 el0t_64_sync_handler+0x84/0x108 arch/arm64/kernel/entry-common.c:762 el0t_64_sync+0x198/0x19c arch/arm64/kernel/entry.S:600 Allocated by task 6466: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x40/0x78 mm/kasan/common.c:68 kasan_save_alloc_info+0x40/0x50 mm/kasan/generic.c:562 poison_kmalloc_redzone mm/kasan/common.c:377 [inline] __kasan_kmalloc+0xac/0xc4 mm/kasan/common.c:394 kasan_kmalloc include/linux/kasan.h:260 [inline] __do_kmalloc_node mm/slub.c:4294 [inline] __kmalloc_noprof+0x32c/0x54c mm/slub.c:4306 kmalloc_noprof include/linux/slab.h:905 [inline] handle_to_path fs/fhandle.c:357 [inline] do_handle_open+0x5a4/0xb8c fs/fhandle.c:403 __do_sys_open_by_handle_at fs/fhandle.c:443 [inline] __se_sys_open_by_handle_at fs/fhandle.c:434 [inline] __arm64_sys_open_by_handle_at+0x80/0x94 fs/fhandle.c:434 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:49 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:132 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:151 el0_svc+0x54/0x168 arch/arm64/kernel/entry-common.c:744 el0t_64_sync_handler+0x84/0x108 arch/arm64/kernel/entry-common.c:762 el0t_64_sync+0x198/0x19c arch/arm64/kernel/entry.S:600 Reported-by: syzbot+4d7cd7dd0ce1aa8d5c65@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=4d7cd7dd0ce1aa8d5c65 Tested-by: syzbot+4d7cd7dd0ce1aa8d5c65@syzkaller.appspotmail.com CC: stable@vger.kernel.org Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Edward Adam Davis Signed-off-by: Jan Kara Link: https://patch.msgid.link/tencent_9C8CB8A7E7C6C512C7065DC98B6EDF6EC606@qq.com --- fs/isofs/export.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/isofs/export.c b/fs/isofs/export.c index 35768a63fb1d23..421d247fae5230 100644 --- a/fs/isofs/export.c +++ b/fs/isofs/export.c @@ -180,7 +180,7 @@ static struct dentry *isofs_fh_to_parent(struct super_block *sb, return NULL; return isofs_export_iget(sb, - fh_len > 2 ? ifid->parent_block : 0, + fh_len > 3 ? ifid->parent_block : 0, ifid->parent_offset, fh_len > 4 ? ifid->parent_generation : 0); } From 26cb30f22f9cb9d964f7ae4b3233c0b9d2cddb2f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 1 Apr 2025 14:50:11 +0200 Subject: [PATCH 0074/2924] Documentation: kvm: give correct name for KVM_CAP_SPAPR_MULTITCE The capability is incorrectly called KVM_CAP_PPC_MULTITCE in the documentation. Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 0ec40765ccd4ec..d2580cb9ab82f5 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -8847,10 +8847,9 @@ clearing the PVCLOCK_TSC_STABLE_BIT flag in Xen pvclock sources. This will be done when the KVM_CAP_XEN_HVM ioctl sets the KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE flag. -8.31 KVM_CAP_PPC_MULTITCE -------------------------- +8.31 KVM_CAP_SPAPR_MULTITCE +--------------------------- -:Capability: KVM_CAP_PPC_MULTITCE :Architectures: ppc :Type: vm From 2f313018de0fce3f97f6cd49925c8c0cb1a37c67 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 1 Apr 2025 14:50:43 +0200 Subject: [PATCH 0075/2924] Documentation: kvm: drop "Capability" heading from capabilities It is redundant, and sometimes wrong. Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index d2580cb9ab82f5..eec775f04df60f 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -7966,7 +7966,6 @@ See Documentation/arch/x86/sgx.rst for more details. 7.26 KVM_CAP_PPC_RPT_INVALIDATE ------------------------------- -:Capability: KVM_CAP_PPC_RPT_INVALIDATE :Architectures: ppc :Type: vm @@ -8041,7 +8040,6 @@ upgrading the VMM process without interrupting the guest. 7.30 KVM_CAP_PPC_AIL_MODE_3 ------------------------------- -:Capability: KVM_CAP_PPC_AIL_MODE_3 :Architectures: ppc :Type: vm @@ -8055,7 +8053,6 @@ handling interrupts and system calls. 7.31 KVM_CAP_DISABLE_QUIRKS2 ---------------------------- -:Capability: KVM_CAP_DISABLE_QUIRKS2 :Parameters: args[0] - set of KVM quirks to disable :Architectures: x86 :Type: vm @@ -8895,7 +8892,6 @@ leaf. 8.34 KVM_CAP_EXIT_HYPERCALL --------------------------- -:Capability: KVM_CAP_EXIT_HYPERCALL :Architectures: x86 :Type: vm @@ -8914,7 +8910,6 @@ ENOSYS for the others. 8.35 KVM_CAP_PMU_CAPABILITY --------------------------- -:Capability: KVM_CAP_PMU_CAPABILITY :Architectures: x86 :Type: vm :Parameters: arg[0] is bitmask of PMU virtualization capabilities. @@ -8936,7 +8931,6 @@ should adjust CPUID leaf 0xA to reflect that the PMU is disabled. 8.36 KVM_CAP_ARM_SYSTEM_SUSPEND ------------------------------- -:Capability: KVM_CAP_ARM_SYSTEM_SUSPEND :Architectures: arm64 :Type: vm @@ -8946,7 +8940,6 @@ type KVM_SYSTEM_EVENT_SUSPEND to process the guest suspend request. 8.37 KVM_CAP_S390_PROTECTED_DUMP -------------------------------- -:Capability: KVM_CAP_S390_PROTECTED_DUMP :Architectures: s390 :Type: vm @@ -8959,7 +8952,6 @@ available and supports the `KVM_PV_DUMP_CPU` subcommand. 8.38 KVM_CAP_VM_DISABLE_NX_HUGE_PAGES ------------------------------------- -:Capability: KVM_CAP_VM_DISABLE_NX_HUGE_PAGES :Architectures: x86 :Type: vm :Parameters: arg[0] must be 0. @@ -8976,7 +8968,6 @@ This capability may only be set before any vCPUs are created. 8.39 KVM_CAP_S390_CPU_TOPOLOGY ------------------------------ -:Capability: KVM_CAP_S390_CPU_TOPOLOGY :Architectures: s390 :Type: vm @@ -9001,7 +8992,6 @@ must point to a byte where the value will be stored or retrieved from. 8.40 KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE --------------------------------------- -:Capability: KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE :Architectures: arm64 :Type: vm :Parameters: arg[0] is the new split chunk size. @@ -9028,7 +9018,6 @@ block sizes is exposed in KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES as a 8.41 KVM_CAP_VM_TYPES --------------------- -:Capability: KVM_CAP_MEMORY_ATTRIBUTES :Architectures: x86 :Type: system ioctl From ed7974fd592bc0d3649a42725f5e0b13ea466010 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 1 Apr 2025 14:54:40 +0200 Subject: [PATCH 0076/2924] Documentation: kvm: fix some definition lists Ensure that they have a ":" in front of the defined item. Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index eec775f04df60f..2d8920d1d594ac 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -7927,10 +7927,10 @@ by POWER10 processor. 7.24 KVM_CAP_VM_COPY_ENC_CONTEXT_FROM ------------------------------------- -Architectures: x86 SEV enabled -Type: vm -Parameters: args[0] is the fd of the source vm -Returns: 0 on success; ENOTTY on error +:Architectures: x86 SEV enabled +:Type: vm +:Parameters: args[0] is the fd of the source vm +:Returns: 0 on success; ENOTTY on error This capability enables userspace to copy encryption context from the vm indicated by the fd to the vm this is called on. @@ -8647,7 +8647,7 @@ limit the attack surface on KVM's MSR emulation code. 8.28 KVM_CAP_ENFORCE_PV_FEATURE_CPUID ------------------------------------- -Architectures: x86 +:Architectures: x86 When enabled, KVM will disable paravirtual features provided to the guest according to the bits in the KVM_CPUID_FEATURES CPUID leaf @@ -8881,7 +8881,7 @@ available to the guest on migration. 8.33 KVM_CAP_HYPERV_ENFORCE_CPUID --------------------------------- -Architectures: x86 +:Architectures: x86 When enabled, KVM will disable emulated Hyper-V features provided to the guest according to the bits Hyper-V CPUID feature leaves. Otherwise, all From af339282e203fb3fa99790c6c48ced3c27cc7bd9 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 1 Apr 2025 14:57:39 +0200 Subject: [PATCH 0077/2924] Documentation: kvm: organize capabilities in the right section Categorize the capabilities correctly. Section 6 is for enabled vCPU capabilities; section 7 is for enabled VM capabilities; section 8 is for informational ones. Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 750 +++++++++++++++++---------------- 1 file changed, 376 insertions(+), 374 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 2d8920d1d594ac..3a8605f88dc545 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -7447,6 +7447,75 @@ Unused bitfields in the bitarrays must be set to zero. This capability connects the vcpu to an in-kernel XIVE device. +6.76 KVM_CAP_HYPERV_SYNIC +------------------------- + +:Architectures: x86 +:Target: vcpu + +This capability, if KVM_CHECK_EXTENSION indicates that it is +available, means that the kernel has an implementation of the +Hyper-V Synthetic interrupt controller(SynIC). Hyper-V SynIC is +used to support Windows Hyper-V based guest paravirt drivers(VMBus). + +In order to use SynIC, it has to be activated by setting this +capability via KVM_ENABLE_CAP ioctl on the vcpu fd. Note that this +will disable the use of APIC hardware virtualization even if supported +by the CPU, as it's incompatible with SynIC auto-EOI behavior. + +6.77 KVM_CAP_HYPERV_SYNIC2 +-------------------------- + +:Architectures: x86 +:Target: vcpu + +This capability enables a newer version of Hyper-V Synthetic interrupt +controller (SynIC). The only difference with KVM_CAP_HYPERV_SYNIC is that KVM +doesn't clear SynIC message and event flags pages when they are enabled by +writing to the respective MSRs. + +6.78 KVM_CAP_HYPERV_DIRECT_TLBFLUSH +----------------------------------- + +:Architectures: x86 +:Target: vcpu + +This capability indicates that KVM running on top of Hyper-V hypervisor +enables Direct TLB flush for its guests meaning that TLB flush +hypercalls are handled by Level 0 hypervisor (Hyper-V) bypassing KVM. +Due to the different ABI for hypercall parameters between Hyper-V and +KVM, enabling this capability effectively disables all hypercall +handling by KVM (as some KVM hypercall may be mistakenly treated as TLB +flush hypercalls by Hyper-V) so userspace should disable KVM identification +in CPUID and only exposes Hyper-V identification. In this case, guest +thinks it's running on Hyper-V and only use Hyper-V hypercalls. + +6.79 KVM_CAP_HYPERV_ENFORCE_CPUID +--------------------------------- + +:Architectures: x86 +:Target: vcpu + +When enabled, KVM will disable emulated Hyper-V features provided to the +guest according to the bits Hyper-V CPUID feature leaves. Otherwise, all +currently implemented Hyper-V features are provided unconditionally when +Hyper-V identification is set in the HYPERV_CPUID_INTERFACE (0x40000001) +leaf. + +6.80 KVM_CAP_ENFORCE_PV_FEATURE_CPUID +------------------------------------- + +:Architectures: x86 +:Target: vcpu + +When enabled, KVM will disable paravirtual features provided to the +guest according to the bits in the KVM_CPUID_FEATURES CPUID leaf +(0x40000001). Otherwise, a guest may use the paravirtual features +regardless of what has actually been exposed through the CPUID leaf. + +.. _KVM_CAP_DIRTY_LOG_RING: + + .. _cap_enable_vm: 7. Capabilities that can be enabled on VMs @@ -7963,23 +8032,6 @@ default. See Documentation/arch/x86/sgx.rst for more details. -7.26 KVM_CAP_PPC_RPT_INVALIDATE -------------------------------- - -:Architectures: ppc -:Type: vm - -This capability indicates that the kernel is capable of handling -H_RPT_INVALIDATE hcall. - -In order to enable the use of H_RPT_INVALIDATE in the guest, -user space might have to advertise it for the guest. For example, -IBM pSeries (sPAPR) guest starts using it if "hcall-rpt-invalidate" is -present in the "ibm,hypertas-functions" device-tree property. - -This capability is enabled for hypervisors on platforms like POWER9 -that support radix MMU. - 7.27 KVM_CAP_EXIT_ON_EMULATION_FAILURE -------------------------------------- @@ -8037,19 +8089,6 @@ indicated by the fd to the VM this is called on. This is intended to support intra-host migration of VMs between userspace VMMs, upgrading the VMM process without interrupting the guest. -7.30 KVM_CAP_PPC_AIL_MODE_3 -------------------------------- - -:Architectures: ppc -:Type: vm - -This capability indicates that the kernel supports the mode 3 setting for the -"Address Translation Mode on Interrupt" aka "Alternate Interrupt Location" -resource that is controlled with the H_SET_MODE hypercall. - -This capability allows a guest kernel to use a better-performance mode for -handling interrupts and system calls. - 7.31 KVM_CAP_DISABLE_QUIRKS2 ---------------------------- @@ -8207,27 +8246,6 @@ This capability is aimed to mitigate the threat that malicious VMs can cause CPU stuck (due to event windows don't open up) and make the CPU unavailable to host or other VMs. -7.34 KVM_CAP_MEMORY_FAULT_INFO ------------------------------- - -:Architectures: x86 -:Returns: Informational only, -EINVAL on direct KVM_ENABLE_CAP. - -The presence of this capability indicates that KVM_RUN will fill -kvm_run.memory_fault if KVM cannot resolve a guest page fault VM-Exit, e.g. if -there is a valid memslot but no backing VMA for the corresponding host virtual -address. - -The information in kvm_run.memory_fault is valid if and only if KVM_RUN returns -an error with errno=EFAULT or errno=EHWPOISON *and* kvm_run.exit_reason is set -to KVM_EXIT_MEMORY_FAULT. - -Note: Userspaces which attempt to resolve memory faults so that they can retry -KVM_RUN are encouraged to guard against repeatedly receiving the same -error/annotated fault. - -See KVM_EXIT_MEMORY_FAULT for more information. - 7.35 KVM_CAP_X86_APIC_BUS_CYCLES_NS ----------------------------------- @@ -8245,86 +8263,272 @@ by KVM_CHECK_EXTENSION. Note: Userspace is responsible for correctly configuring CPUID 0x15, a.k.a. the core crystal clock frequency, if a non-zero CPUID 0x15 is exposed to the guest. -7.36 KVM_CAP_X86_GUEST_MODE ------------------------------- - -:Architectures: x86 -:Returns: Informational only, -EINVAL on direct KVM_ENABLE_CAP. - -The presence of this capability indicates that KVM_RUN will update the -KVM_RUN_X86_GUEST_MODE bit in kvm_run.flags to indicate whether the -vCPU was executing nested guest code when it exited. +7.36 KVM_CAP_DIRTY_LOG_RING/KVM_CAP_DIRTY_LOG_RING_ACQ_REL +---------------------------------------------------------- -KVM exits with the register state of either the L1 or L2 guest -depending on which executed at the time of an exit. Userspace must -take care to differentiate between these cases. +:Architectures: x86, arm64 +:Type: vm +:Parameters: args[0] - size of the dirty log ring -7.37 KVM_CAP_ARM_WRITABLE_IMP_ID_REGS -------------------------------------- +KVM is capable of tracking dirty memory using ring buffers that are +mmapped into userspace; there is one dirty ring per vcpu. -:Architectures: arm64 -:Target: VM -:Parameters: None -:Returns: 0 on success, -EINVAL if vCPUs have been created before enabling this - capability. +The dirty ring is available to userspace as an array of +``struct kvm_dirty_gfn``. Each dirty entry is defined as:: -This capability changes the behavior of the registers that identify a PE -implementation of the Arm architecture: MIDR_EL1, REVIDR_EL1, and AIDR_EL1. -By default, these registers are visible to userspace but treated as invariant. + struct kvm_dirty_gfn { + __u32 flags; + __u32 slot; /* as_id | slot_id */ + __u64 offset; + }; -When this capability is enabled, KVM allows userspace to change the -aforementioned registers before the first KVM_RUN. These registers are VM -scoped, meaning that the same set of values are presented on all vCPUs in a -given VM. +The following values are defined for the flags field to define the +current state of the entry:: -8. Other capabilities. -====================== + #define KVM_DIRTY_GFN_F_DIRTY BIT(0) + #define KVM_DIRTY_GFN_F_RESET BIT(1) + #define KVM_DIRTY_GFN_F_MASK 0x3 -This section lists capabilities that give information about other -features of the KVM implementation. +Userspace should call KVM_ENABLE_CAP ioctl right after KVM_CREATE_VM +ioctl to enable this capability for the new guest and set the size of +the rings. Enabling the capability is only allowed before creating any +vCPU, and the size of the ring must be a power of two. The larger the +ring buffer, the less likely the ring is full and the VM is forced to +exit to userspace. The optimal size depends on the workload, but it is +recommended that it be at least 64 KiB (4096 entries). -8.1 KVM_CAP_PPC_HWRNG ---------------------- +Just like for dirty page bitmaps, the buffer tracks writes to +all user memory regions for which the KVM_MEM_LOG_DIRTY_PAGES flag was +set in KVM_SET_USER_MEMORY_REGION. Once a memory region is registered +with the flag set, userspace can start harvesting dirty pages from the +ring buffer. -:Architectures: ppc +An entry in the ring buffer can be unused (flag bits ``00``), +dirty (flag bits ``01``) or harvested (flag bits ``1X``). The +state machine for the entry is as follows:: -This capability, if KVM_CHECK_EXTENSION indicates that it is -available, means that the kernel has an implementation of the -H_RANDOM hypercall backed by a hardware random-number generator. -If present, the kernel H_RANDOM handler can be enabled for guest use -with the KVM_CAP_PPC_ENABLE_HCALL capability. + dirtied harvested reset + 00 -----------> 01 -------------> 1X -------+ + ^ | + | | + +------------------------------------------+ -8.2 KVM_CAP_HYPERV_SYNIC ------------------------- +To harvest the dirty pages, userspace accesses the mmapped ring buffer +to read the dirty GFNs. If the flags has the DIRTY bit set (at this stage +the RESET bit must be cleared), then it means this GFN is a dirty GFN. +The userspace should harvest this GFN and mark the flags from state +``01b`` to ``1Xb`` (bit 0 will be ignored by KVM, but bit 1 must be set +to show that this GFN is harvested and waiting for a reset), and move +on to the next GFN. The userspace should continue to do this until the +flags of a GFN have the DIRTY bit cleared, meaning that it has harvested +all the dirty GFNs that were available. -:Architectures: x86 +Note that on weakly ordered architectures, userspace accesses to the +ring buffer (and more specifically the 'flags' field) must be ordered, +using load-acquire/store-release accessors when available, or any +other memory barrier that will ensure this ordering. -This capability, if KVM_CHECK_EXTENSION indicates that it is -available, means that the kernel has an implementation of the -Hyper-V Synthetic interrupt controller(SynIC). Hyper-V SynIC is -used to support Windows Hyper-V based guest paravirt drivers(VMBus). +It's not necessary for userspace to harvest the all dirty GFNs at once. +However it must collect the dirty GFNs in sequence, i.e., the userspace +program cannot skip one dirty GFN to collect the one next to it. -In order to use SynIC, it has to be activated by setting this -capability via KVM_ENABLE_CAP ioctl on the vcpu fd. Note that this -will disable the use of APIC hardware virtualization even if supported -by the CPU, as it's incompatible with SynIC auto-EOI behavior. +After processing one or more entries in the ring buffer, userspace +calls the VM ioctl KVM_RESET_DIRTY_RINGS to notify the kernel about +it, so that the kernel will reprotect those collected GFNs. +Therefore, the ioctl must be called *before* reading the content of +the dirty pages. -8.3 KVM_CAP_PPC_MMU_RADIX -------------------------- +The dirty ring can get full. When it happens, the KVM_RUN of the +vcpu will return with exit reason KVM_EXIT_DIRTY_LOG_FULL. -:Architectures: ppc +The dirty ring interface has a major difference comparing to the +KVM_GET_DIRTY_LOG interface in that, when reading the dirty ring from +userspace, it's still possible that the kernel has not yet flushed the +processor's dirty page buffers into the kernel buffer (with dirty bitmaps, the +flushing is done by the KVM_GET_DIRTY_LOG ioctl). To achieve that, one +needs to kick the vcpu out of KVM_RUN using a signal. The resulting +vmexit ensures that all dirty GFNs are flushed to the dirty rings. -This capability, if KVM_CHECK_EXTENSION indicates that it is -available, means that the kernel can support guests using the -radix MMU defined in Power ISA V3.00 (as implemented in the POWER9 -processor). +NOTE: KVM_CAP_DIRTY_LOG_RING_ACQ_REL is the only capability that +should be exposed by weakly ordered architecture, in order to indicate +the additional memory ordering requirements imposed on userspace when +reading the state of an entry and mutating it from DIRTY to HARVESTED. +Architecture with TSO-like ordering (such as x86) are allowed to +expose both KVM_CAP_DIRTY_LOG_RING and KVM_CAP_DIRTY_LOG_RING_ACQ_REL +to userspace. -8.4 KVM_CAP_PPC_MMU_HASH_V3 ---------------------------- +After enabling the dirty rings, the userspace needs to detect the +capability of KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP to see whether the +ring structures can be backed by per-slot bitmaps. With this capability +advertised, it means the architecture can dirty guest pages without +vcpu/ring context, so that some of the dirty information will still be +maintained in the bitmap structure. KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP +can't be enabled if the capability of KVM_CAP_DIRTY_LOG_RING_ACQ_REL +hasn't been enabled, or any memslot has been existing. -:Architectures: ppc +Note that the bitmap here is only a backup of the ring structure. The +use of the ring and bitmap combination is only beneficial if there is +only a very small amount of memory that is dirtied out of vcpu/ring +context. Otherwise, the stand-alone per-slot bitmap mechanism needs to +be considered. -This capability, if KVM_CHECK_EXTENSION indicates that it is +To collect dirty bits in the backup bitmap, userspace can use the same +KVM_GET_DIRTY_LOG ioctl. KVM_CLEAR_DIRTY_LOG isn't needed as long as all +the generation of the dirty bits is done in a single pass. Collecting +the dirty bitmap should be the very last thing that the VMM does before +considering the state as complete. VMM needs to ensure that the dirty +state is final and avoid missing dirty pages from another ioctl ordered +after the bitmap collection. + +NOTE: Multiple examples of using the backup bitmap: (1) save vgic/its +tables through command KVM_DEV_ARM_{VGIC_GRP_CTRL, ITS_SAVE_TABLES} on +KVM device "kvm-arm-vgic-its". (2) restore vgic/its tables through +command KVM_DEV_ARM_{VGIC_GRP_CTRL, ITS_RESTORE_TABLES} on KVM device +"kvm-arm-vgic-its". VGICv3 LPI pending status is restored. (3) save +vgic3 pending table through KVM_DEV_ARM_VGIC_{GRP_CTRL, SAVE_PENDING_TABLES} +command on KVM device "kvm-arm-vgic-v3". + +7.37 KVM_CAP_PMU_CAPABILITY +--------------------------- + +:Architectures: x86 +:Type: vm +:Parameters: arg[0] is bitmask of PMU virtualization capabilities. +:Returns: 0 on success, -EINVAL when arg[0] contains invalid bits + +This capability alters PMU virtualization in KVM. + +Calling KVM_CHECK_EXTENSION for this capability returns a bitmask of +PMU virtualization capabilities that can be adjusted on a VM. + +The argument to KVM_ENABLE_CAP is also a bitmask and selects specific +PMU virtualization capabilities to be applied to the VM. This can +only be invoked on a VM prior to the creation of VCPUs. + +At this time, KVM_PMU_CAP_DISABLE is the only capability. Setting +this capability will disable PMU virtualization for that VM. Usermode +should adjust CPUID leaf 0xA to reflect that the PMU is disabled. + +7.38 KVM_CAP_VM_DISABLE_NX_HUGE_PAGES +------------------------------------- + +:Architectures: x86 +:Type: vm +:Parameters: arg[0] must be 0. +:Returns: 0 on success, -EPERM if the userspace process does not + have CAP_SYS_BOOT, -EINVAL if args[0] is not 0 or any vCPUs have been + created. + +This capability disables the NX huge pages mitigation for iTLB MULTIHIT. + +The capability has no effect if the nx_huge_pages module parameter is not set. + +This capability may only be set before any vCPUs are created. + +7.39 KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE +--------------------------------------- + +:Architectures: arm64 +:Type: vm +:Parameters: arg[0] is the new split chunk size. +:Returns: 0 on success, -EINVAL if any memslot was already created. + +This capability sets the chunk size used in Eager Page Splitting. + +Eager Page Splitting improves the performance of dirty-logging (used +in live migrations) when guest memory is backed by huge-pages. It +avoids splitting huge-pages (into PAGE_SIZE pages) on fault, by doing +it eagerly when enabling dirty logging (with the +KVM_MEM_LOG_DIRTY_PAGES flag for a memory region), or when using +KVM_CLEAR_DIRTY_LOG. + +The chunk size specifies how many pages to break at a time, using a +single allocation for each chunk. Bigger the chunk size, more pages +need to be allocated ahead of time. + +The chunk size needs to be a valid block size. The list of acceptable +block sizes is exposed in KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES as a +64-bit bitmap (each bit describing a block size). The default value is +0, to disable the eager page splitting. + +7.40 KVM_CAP_EXIT_HYPERCALL +--------------------------- + +:Architectures: x86 +:Type: vm + +This capability, if enabled, will cause KVM to exit to userspace +with KVM_EXIT_HYPERCALL exit reason to process some hypercalls. + +Calling KVM_CHECK_EXTENSION for this capability will return a bitmask +of hypercalls that can be configured to exit to userspace. +Right now, the only such hypercall is KVM_HC_MAP_GPA_RANGE. + +The argument to KVM_ENABLE_CAP is also a bitmask, and must be a subset +of the result of KVM_CHECK_EXTENSION. KVM will forward to userspace +the hypercalls whose corresponding bit is in the argument, and return +ENOSYS for the others. + +7.41 KVM_CAP_ARM_SYSTEM_SUSPEND +------------------------------- + +:Architectures: arm64 +:Type: vm + +When enabled, KVM will exit to userspace with KVM_EXIT_SYSTEM_EVENT of +type KVM_SYSTEM_EVENT_SUSPEND to process the guest suspend request. + +7.37 KVM_CAP_ARM_WRITABLE_IMP_ID_REGS +------------------------------------- + +:Architectures: arm64 +:Target: VM +:Parameters: None +:Returns: 0 on success, -EINVAL if vCPUs have been created before enabling this + capability. + +This capability changes the behavior of the registers that identify a PE +implementation of the Arm architecture: MIDR_EL1, REVIDR_EL1, and AIDR_EL1. +By default, these registers are visible to userspace but treated as invariant. + +When this capability is enabled, KVM allows userspace to change the +aforementioned registers before the first KVM_RUN. These registers are VM +scoped, meaning that the same set of values are presented on all vCPUs in a +given VM. + +8. Other capabilities. +====================== + +This section lists capabilities that give information about other +features of the KVM implementation. + +8.1 KVM_CAP_PPC_HWRNG +--------------------- + +:Architectures: ppc + +This capability, if KVM_CHECK_EXTENSION indicates that it is +available, means that the kernel has an implementation of the +H_RANDOM hypercall backed by a hardware random-number generator. +If present, the kernel H_RANDOM handler can be enabled for guest use +with the KVM_CAP_PPC_ENABLE_HCALL capability. + +8.3 KVM_CAP_PPC_MMU_RADIX +------------------------- + +:Architectures: ppc + +This capability, if KVM_CHECK_EXTENSION indicates that it is +available, means that the kernel can support guests using the +radix MMU defined in Power ISA V3.00 (as implemented in the POWER9 +processor). + +8.4 KVM_CAP_PPC_MMU_HASH_V3 +--------------------------- + +:Architectures: ppc + +This capability, if KVM_CHECK_EXTENSION indicates that it is available, means that the kernel can support guests using the hashed page table MMU defined in Power ISA V3.00 (as implemented in the POWER9 processor), including in-memory segment tables. @@ -8454,16 +8658,6 @@ virtual SMT modes that can be set using KVM_CAP_PPC_SMT. If bit N (counting from the right) is set, then a virtual SMT mode of 2^N is available. -8.11 KVM_CAP_HYPERV_SYNIC2 --------------------------- - -:Architectures: x86 - -This capability enables a newer version of Hyper-V Synthetic interrupt -controller (SynIC). The only difference with KVM_CAP_HYPERV_SYNIC is that KVM -doesn't clear SynIC message and event flags pages when they are enabled by -writing to the respective MSRs. - 8.12 KVM_CAP_HYPERV_VP_INDEX ---------------------------- @@ -8478,7 +8672,6 @@ capability is absent, userspace can still query this msr's value. ------------------------------- :Architectures: s390 -:Parameters: none This capability indicates if the flic device will be able to get/set the AIS states for migration via the KVM_DEV_FLIC_AISM_ALL attribute and allows @@ -8552,21 +8745,6 @@ This capability indicates that KVM supports paravirtualized Hyper-V IPI send hypercalls: HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx. -8.21 KVM_CAP_HYPERV_DIRECT_TLBFLUSH ------------------------------------ - -:Architectures: x86 - -This capability indicates that KVM running on top of Hyper-V hypervisor -enables Direct TLB flush for its guests meaning that TLB flush -hypercalls are handled by Level 0 hypervisor (Hyper-V) bypassing KVM. -Due to the different ABI for hypercall parameters between Hyper-V and -KVM, enabling this capability effectively disables all hypercall -handling by KVM (as some KVM hypercall may be mistakenly treated as TLB -flush hypercalls by Hyper-V) so userspace should disable KVM identification -in CPUID and only exposes Hyper-V identification. In this case, guest -thinks it's running on Hyper-V and only use Hyper-V hypercalls. - 8.22 KVM_CAP_S390_VCPU_RESETS ----------------------------- @@ -8644,142 +8822,6 @@ In combination with KVM_CAP_X86_USER_SPACE_MSR, this allows user space to trap and emulate MSRs that are outside of the scope of KVM as well as limit the attack surface on KVM's MSR emulation code. -8.28 KVM_CAP_ENFORCE_PV_FEATURE_CPUID -------------------------------------- - -:Architectures: x86 - -When enabled, KVM will disable paravirtual features provided to the -guest according to the bits in the KVM_CPUID_FEATURES CPUID leaf -(0x40000001). Otherwise, a guest may use the paravirtual features -regardless of what has actually been exposed through the CPUID leaf. - -.. _KVM_CAP_DIRTY_LOG_RING: - -8.29 KVM_CAP_DIRTY_LOG_RING/KVM_CAP_DIRTY_LOG_RING_ACQ_REL ----------------------------------------------------------- - -:Architectures: x86, arm64 -:Parameters: args[0] - size of the dirty log ring - -KVM is capable of tracking dirty memory using ring buffers that are -mmapped into userspace; there is one dirty ring per vcpu. - -The dirty ring is available to userspace as an array of -``struct kvm_dirty_gfn``. Each dirty entry is defined as:: - - struct kvm_dirty_gfn { - __u32 flags; - __u32 slot; /* as_id | slot_id */ - __u64 offset; - }; - -The following values are defined for the flags field to define the -current state of the entry:: - - #define KVM_DIRTY_GFN_F_DIRTY BIT(0) - #define KVM_DIRTY_GFN_F_RESET BIT(1) - #define KVM_DIRTY_GFN_F_MASK 0x3 - -Userspace should call KVM_ENABLE_CAP ioctl right after KVM_CREATE_VM -ioctl to enable this capability for the new guest and set the size of -the rings. Enabling the capability is only allowed before creating any -vCPU, and the size of the ring must be a power of two. The larger the -ring buffer, the less likely the ring is full and the VM is forced to -exit to userspace. The optimal size depends on the workload, but it is -recommended that it be at least 64 KiB (4096 entries). - -Just like for dirty page bitmaps, the buffer tracks writes to -all user memory regions for which the KVM_MEM_LOG_DIRTY_PAGES flag was -set in KVM_SET_USER_MEMORY_REGION. Once a memory region is registered -with the flag set, userspace can start harvesting dirty pages from the -ring buffer. - -An entry in the ring buffer can be unused (flag bits ``00``), -dirty (flag bits ``01``) or harvested (flag bits ``1X``). The -state machine for the entry is as follows:: - - dirtied harvested reset - 00 -----------> 01 -------------> 1X -------+ - ^ | - | | - +------------------------------------------+ - -To harvest the dirty pages, userspace accesses the mmapped ring buffer -to read the dirty GFNs. If the flags has the DIRTY bit set (at this stage -the RESET bit must be cleared), then it means this GFN is a dirty GFN. -The userspace should harvest this GFN and mark the flags from state -``01b`` to ``1Xb`` (bit 0 will be ignored by KVM, but bit 1 must be set -to show that this GFN is harvested and waiting for a reset), and move -on to the next GFN. The userspace should continue to do this until the -flags of a GFN have the DIRTY bit cleared, meaning that it has harvested -all the dirty GFNs that were available. - -Note that on weakly ordered architectures, userspace accesses to the -ring buffer (and more specifically the 'flags' field) must be ordered, -using load-acquire/store-release accessors when available, or any -other memory barrier that will ensure this ordering. - -It's not necessary for userspace to harvest the all dirty GFNs at once. -However it must collect the dirty GFNs in sequence, i.e., the userspace -program cannot skip one dirty GFN to collect the one next to it. - -After processing one or more entries in the ring buffer, userspace -calls the VM ioctl KVM_RESET_DIRTY_RINGS to notify the kernel about -it, so that the kernel will reprotect those collected GFNs. -Therefore, the ioctl must be called *before* reading the content of -the dirty pages. - -The dirty ring can get full. When it happens, the KVM_RUN of the -vcpu will return with exit reason KVM_EXIT_DIRTY_LOG_FULL. - -The dirty ring interface has a major difference comparing to the -KVM_GET_DIRTY_LOG interface in that, when reading the dirty ring from -userspace, it's still possible that the kernel has not yet flushed the -processor's dirty page buffers into the kernel buffer (with dirty bitmaps, the -flushing is done by the KVM_GET_DIRTY_LOG ioctl). To achieve that, one -needs to kick the vcpu out of KVM_RUN using a signal. The resulting -vmexit ensures that all dirty GFNs are flushed to the dirty rings. - -NOTE: KVM_CAP_DIRTY_LOG_RING_ACQ_REL is the only capability that -should be exposed by weakly ordered architecture, in order to indicate -the additional memory ordering requirements imposed on userspace when -reading the state of an entry and mutating it from DIRTY to HARVESTED. -Architecture with TSO-like ordering (such as x86) are allowed to -expose both KVM_CAP_DIRTY_LOG_RING and KVM_CAP_DIRTY_LOG_RING_ACQ_REL -to userspace. - -After enabling the dirty rings, the userspace needs to detect the -capability of KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP to see whether the -ring structures can be backed by per-slot bitmaps. With this capability -advertised, it means the architecture can dirty guest pages without -vcpu/ring context, so that some of the dirty information will still be -maintained in the bitmap structure. KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP -can't be enabled if the capability of KVM_CAP_DIRTY_LOG_RING_ACQ_REL -hasn't been enabled, or any memslot has been existing. - -Note that the bitmap here is only a backup of the ring structure. The -use of the ring and bitmap combination is only beneficial if there is -only a very small amount of memory that is dirtied out of vcpu/ring -context. Otherwise, the stand-alone per-slot bitmap mechanism needs to -be considered. - -To collect dirty bits in the backup bitmap, userspace can use the same -KVM_GET_DIRTY_LOG ioctl. KVM_CLEAR_DIRTY_LOG isn't needed as long as all -the generation of the dirty bits is done in a single pass. Collecting -the dirty bitmap should be the very last thing that the VMM does before -considering the state as complete. VMM needs to ensure that the dirty -state is final and avoid missing dirty pages from another ioctl ordered -after the bitmap collection. - -NOTE: Multiple examples of using the backup bitmap: (1) save vgic/its -tables through command KVM_DEV_ARM_{VGIC_GRP_CTRL, ITS_SAVE_TABLES} on -KVM device "kvm-arm-vgic-its". (2) restore vgic/its tables through -command KVM_DEV_ARM_{VGIC_GRP_CTRL, ITS_RESTORE_TABLES} on KVM device -"kvm-arm-vgic-its". VGICv3 LPI pending status is restored. (3) save -vgic3 pending table through KVM_DEV_ARM_VGIC_{GRP_CTRL, SAVE_PENDING_TABLES} -command on KVM device "kvm-arm-vgic-v3". - 8.30 KVM_CAP_XEN_HVM -------------------- @@ -8878,65 +8920,6 @@ This capability indicates that the KVM virtual PTP service is supported in the host. A VMM can check whether the service is available to the guest on migration. -8.33 KVM_CAP_HYPERV_ENFORCE_CPUID ---------------------------------- - -:Architectures: x86 - -When enabled, KVM will disable emulated Hyper-V features provided to the -guest according to the bits Hyper-V CPUID feature leaves. Otherwise, all -currently implemented Hyper-V features are provided unconditionally when -Hyper-V identification is set in the HYPERV_CPUID_INTERFACE (0x40000001) -leaf. - -8.34 KVM_CAP_EXIT_HYPERCALL ---------------------------- - -:Architectures: x86 -:Type: vm - -This capability, if enabled, will cause KVM to exit to userspace -with KVM_EXIT_HYPERCALL exit reason to process some hypercalls. - -Calling KVM_CHECK_EXTENSION for this capability will return a bitmask -of hypercalls that can be configured to exit to userspace. -Right now, the only such hypercall is KVM_HC_MAP_GPA_RANGE. - -The argument to KVM_ENABLE_CAP is also a bitmask, and must be a subset -of the result of KVM_CHECK_EXTENSION. KVM will forward to userspace -the hypercalls whose corresponding bit is in the argument, and return -ENOSYS for the others. - -8.35 KVM_CAP_PMU_CAPABILITY ---------------------------- - -:Architectures: x86 -:Type: vm -:Parameters: arg[0] is bitmask of PMU virtualization capabilities. -:Returns: 0 on success, -EINVAL when arg[0] contains invalid bits - -This capability alters PMU virtualization in KVM. - -Calling KVM_CHECK_EXTENSION for this capability returns a bitmask of -PMU virtualization capabilities that can be adjusted on a VM. - -The argument to KVM_ENABLE_CAP is also a bitmask and selects specific -PMU virtualization capabilities to be applied to the VM. This can -only be invoked on a VM prior to the creation of VCPUs. - -At this time, KVM_PMU_CAP_DISABLE is the only capability. Setting -this capability will disable PMU virtualization for that VM. Usermode -should adjust CPUID leaf 0xA to reflect that the PMU is disabled. - -8.36 KVM_CAP_ARM_SYSTEM_SUSPEND -------------------------------- - -:Architectures: arm64 -:Type: vm - -When enabled, KVM will exit to userspace with KVM_EXIT_SYSTEM_EVENT of -type KVM_SYSTEM_EVENT_SUSPEND to process the guest suspend request. - 8.37 KVM_CAP_S390_PROTECTED_DUMP -------------------------------- @@ -8949,22 +8932,6 @@ PV guests. The `KVM_PV_DUMP` command is available for the dump related UV data. Also the vcpu ioctl `KVM_S390_PV_CPU_COMMAND` is available and supports the `KVM_PV_DUMP_CPU` subcommand. -8.38 KVM_CAP_VM_DISABLE_NX_HUGE_PAGES -------------------------------------- - -:Architectures: x86 -:Type: vm -:Parameters: arg[0] must be 0. -:Returns: 0 on success, -EPERM if the userspace process does not - have CAP_SYS_BOOT, -EINVAL if args[0] is not 0 or any vCPUs have been - created. - -This capability disables the NX huge pages mitigation for iTLB MULTIHIT. - -The capability has no effect if the nx_huge_pages module parameter is not set. - -This capability may only be set before any vCPUs are created. - 8.39 KVM_CAP_S390_CPU_TOPOLOGY ------------------------------ @@ -8989,32 +8956,6 @@ structure. When getting the Modified Change Topology Report value, the attr->addr must point to a byte where the value will be stored or retrieved from. -8.40 KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE ---------------------------------------- - -:Architectures: arm64 -:Type: vm -:Parameters: arg[0] is the new split chunk size. -:Returns: 0 on success, -EINVAL if any memslot was already created. - -This capability sets the chunk size used in Eager Page Splitting. - -Eager Page Splitting improves the performance of dirty-logging (used -in live migrations) when guest memory is backed by huge-pages. It -avoids splitting huge-pages (into PAGE_SIZE pages) on fault, by doing -it eagerly when enabling dirty logging (with the -KVM_MEM_LOG_DIRTY_PAGES flag for a memory region), or when using -KVM_CLEAR_DIRTY_LOG. - -The chunk size specifies how many pages to break at a time, using a -single allocation for each chunk. Bigger the chunk size, more pages -need to be allocated ahead of time. - -The chunk size needs to be a valid block size. The list of acceptable -block sizes is exposed in KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES as a -64-bit bitmap (each bit describing a block size). The default value is -0, to disable the eager page splitting. - 8.41 KVM_CAP_VM_TYPES --------------------- @@ -9034,6 +8975,67 @@ Do not use KVM_X86_SW_PROTECTED_VM for "real" VMs, and especially not in production. The behavior and effective ABI for software-protected VMs is unstable. +8.42 KVM_CAP_PPC_RPT_INVALIDATE +------------------------------- + +:Architectures: ppc + +This capability indicates that the kernel is capable of handling +H_RPT_INVALIDATE hcall. + +In order to enable the use of H_RPT_INVALIDATE in the guest, +user space might have to advertise it for the guest. For example, +IBM pSeries (sPAPR) guest starts using it if "hcall-rpt-invalidate" is +present in the "ibm,hypertas-functions" device-tree property. + +This capability is enabled for hypervisors on platforms like POWER9 +that support radix MMU. + +8.43 KVM_CAP_PPC_AIL_MODE_3 +--------------------------- + +:Architectures: ppc + +This capability indicates that the kernel supports the mode 3 setting for the +"Address Translation Mode on Interrupt" aka "Alternate Interrupt Location" +resource that is controlled with the H_SET_MODE hypercall. + +This capability allows a guest kernel to use a better-performance mode for +handling interrupts and system calls. + +8.44 KVM_CAP_MEMORY_FAULT_INFO +------------------------------ + +:Architectures: x86 + +The presence of this capability indicates that KVM_RUN will fill +kvm_run.memory_fault if KVM cannot resolve a guest page fault VM-Exit, e.g. if +there is a valid memslot but no backing VMA for the corresponding host virtual +address. + +The information in kvm_run.memory_fault is valid if and only if KVM_RUN returns +an error with errno=EFAULT or errno=EHWPOISON *and* kvm_run.exit_reason is set +to KVM_EXIT_MEMORY_FAULT. + +Note: Userspaces which attempt to resolve memory faults so that they can retry +KVM_RUN are encouraged to guard against repeatedly receiving the same +error/annotated fault. + +See KVM_EXIT_MEMORY_FAULT for more information. + +8.45 KVM_CAP_X86_GUEST_MODE +--------------------------- + +:Architectures: x86 + +The presence of this capability indicates that KVM_RUN will update the +KVM_RUN_X86_GUEST_MODE bit in kvm_run.flags to indicate whether the +vCPU was executing nested guest code when it exited. + +KVM exits with the register state of either the L1 or L2 guest +depending on which executed at the time of an exit. Userspace must +take care to differentiate between these cases. + 9. Known KVM API problems ========================= From 269a2c3663c6d19526347e7c537b36e74e4df3e6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 1 Apr 2025 15:04:06 +0200 Subject: [PATCH 0078/2924] Documentation: kvm: remove KVM_CAP_MIPS_TE Trap and emulate virtualization is not available anymore for MIPS. Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 3a8605f88dc545..47c7c3f92314e5 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -8563,20 +8563,6 @@ may be incompatible with the MIPS VZ ASE. virtualization, including standard guest virtual memory segments. == ========================================================================== -8.6 KVM_CAP_MIPS_TE -------------------- - -:Architectures: mips - -This capability, if KVM_CHECK_EXTENSION on the main kvm handle indicates that -it is available, means that the trap & emulate implementation is available to -run guest code in user mode, even if KVM_CAP_MIPS_VZ indicates that hardware -assisted virtualisation is also available. KVM_VM_MIPS_TE (0) must be passed -to KVM_CREATE_VM to create a VM which utilises it. - -If KVM_CHECK_EXTENSION on a kvm VM handle indicates that this capability is -available, it means that the VM is using trap & emulate. - 8.7 KVM_CAP_MIPS_64BIT ---------------------- From ef01cac401f18647d62720cf773d7bb0541827da Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 1 Apr 2025 08:05:04 -0700 Subject: [PATCH 0079/2924] KVM: x86: Acquire SRCU in KVM_GET_MP_STATE to protect guest memory accesses Acquire a lock on kvm->srcu when userspace is getting MP state to handle a rather extreme edge case where "accepting" APIC events, i.e. processing pending INIT or SIPI, can trigger accesses to guest memory. If the vCPU is in L2 with INIT *and* a TRIPLE_FAULT request pending, then getting MP state will trigger a nested VM-Exit by way of ->check_nested_events(), and emuating the nested VM-Exit can access guest memory. The splat was originally hit by syzkaller on a Google-internal kernel, and reproduced on an upstream kernel by hacking the triple_fault_event_test selftest to stuff a pending INIT, store an MSR on VM-Exit (to generate a memory access on VMX), and do vcpu_mp_state_get() to trigger the scenario. ============================= WARNING: suspicious RCU usage 6.14.0-rc3-b112d356288b-vmx/pi_lockdep_false_pos-lock #3 Not tainted ----------------------------- include/linux/kvm_host.h:1058 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by triple_fault_ev/1256: #0: ffff88810df5a330 (&vcpu->mutex){+.+.}-{4:4}, at: kvm_vcpu_ioctl+0x8b/0x9a0 [kvm] stack backtrace: CPU: 11 UID: 1000 PID: 1256 Comm: triple_fault_ev Not tainted 6.14.0-rc3-b112d356288b-vmx #3 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 Call Trace: dump_stack_lvl+0x7f/0x90 lockdep_rcu_suspicious+0x144/0x190 kvm_vcpu_gfn_to_memslot+0x156/0x180 [kvm] kvm_vcpu_read_guest+0x3e/0x90 [kvm] read_and_check_msr_entry+0x2e/0x180 [kvm_intel] __nested_vmx_vmexit+0x550/0xde0 [kvm_intel] kvm_check_nested_events+0x1b/0x30 [kvm] kvm_apic_accept_events+0x33/0x100 [kvm] kvm_arch_vcpu_ioctl_get_mpstate+0x30/0x1d0 [kvm] kvm_vcpu_ioctl+0x33e/0x9a0 [kvm] __x64_sys_ioctl+0x8b/0xb0 do_syscall_64+0x6c/0x170 entry_SYSCALL_64_after_hwframe+0x4b/0x53 Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-ID: <20250401150504.829812-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c841817a914a34..3712dde0bf9d1f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11786,6 +11786,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, if (kvm_mpx_supported()) kvm_load_guest_fpu(vcpu); + kvm_vcpu_srcu_read_lock(vcpu); + r = kvm_apic_accept_events(vcpu); if (r < 0) goto out; @@ -11799,6 +11801,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, mp_state->mp_state = vcpu->arch.mp_state; out: + kvm_vcpu_srcu_read_unlock(vcpu); + if (kvm_mpx_supported()) kvm_put_guest_fpu(vcpu); vcpu_put(vcpu); From 0297cdc12a87629ad904ac8c0630f7702f9a2d48 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 1 Apr 2025 07:22:38 -0700 Subject: [PATCH 0080/2924] KVM: selftests: Add option to rseq test to override /dev/cpu_dma_latency Add a "-l " param to the rseq test so that the user can override /dev/cpu_dma_latency, as described by the test's suggested workaround for not being able to complete enough migrations. cpu_dma_latency is not a normal file, even as far as procfs files go. Writes to cpu_dma_latency only persist so long as the file is open, e.g. so that the kernel automatically reverts back to a power-optimized state once the sensitive workload completes. Provide the necessary functionality instead of effectively forcing the user to write a non-obvious wrapper. Cc: Dongsheng Zhang Cc: Zide Chen Signed-off-by: Sean Christopherson Message-ID: <20250401142238.819487-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/rseq_test.c | 31 ++++++++++++++++++++----- 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c index e5898678bfab48..1375fca80bcdbe 100644 --- a/tools/testing/selftests/kvm/rseq_test.c +++ b/tools/testing/selftests/kvm/rseq_test.c @@ -196,25 +196,27 @@ static void calc_min_max_cpu(void) static void help(const char *name) { puts(""); - printf("usage: %s [-h] [-u]\n", name); + printf("usage: %s [-h] [-u] [-l latency]\n", name); printf(" -u: Don't sanity check the number of successful KVM_RUNs\n"); + printf(" -l: Set /dev/cpu_dma_latency to suppress deep sleep states\n"); puts(""); exit(0); } int main(int argc, char *argv[]) { + int r, i, snapshot, opt, fd = -1, latency = -1; bool skip_sanity_check = false; - int r, i, snapshot; struct kvm_vm *vm; struct kvm_vcpu *vcpu; u32 cpu, rseq_cpu; - int opt; - while ((opt = getopt(argc, argv, "hu")) != -1) { + while ((opt = getopt(argc, argv, "hl:u")) != -1) { switch (opt) { case 'u': skip_sanity_check = true; + case 'l': + latency = atoi_paranoid(optarg); break; case 'h': default: @@ -243,6 +245,20 @@ int main(int argc, char *argv[]) pthread_create(&migration_thread, NULL, migration_worker, (void *)(unsigned long)syscall(SYS_gettid)); + if (latency >= 0) { + /* + * Writes to cpu_dma_latency persist only while the file is + * open, i.e. it allows userspace to provide guaranteed latency + * while running a workload. Keep the file open until the test + * completes, otherwise writing cpu_dma_latency is meaningless. + */ + fd = open("/dev/cpu_dma_latency", O_RDWR); + TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("open() /dev/cpu_dma_latency", fd)); + + r = write(fd, &latency, 4); + TEST_ASSERT(r >= 1, "Error setting /dev/cpu_dma_latency"); + } + for (i = 0; !done; i++) { vcpu_run(vcpu); TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, @@ -278,6 +294,9 @@ int main(int argc, char *argv[]) "rseq CPU = %d, sched CPU = %d", rseq_cpu, cpu); } + if (fd > 0) + close(fd); + /* * Sanity check that the test was able to enter the guest a reasonable * number of times, e.g. didn't get stalled too often/long waiting for @@ -293,8 +312,8 @@ int main(int argc, char *argv[]) TEST_ASSERT(skip_sanity_check || i > (NR_TASK_MIGRATIONS / 2), "Only performed %d KVM_RUNs, task stalled too much?\n\n" " Try disabling deep sleep states to reduce CPU wakeup latency,\n" - " e.g. via cpuidle.off=1 or setting /dev/cpu_dma_latency to '0',\n" - " or run with -u to disable this sanity check.", i); + " e.g. via cpuidle.off=1 or via -l , or run with -u to\n" + " disable this sanity check.", i); pthread_join(migration_thread, NULL); From 81d480fdf8b7d9b13fe87e1f0516f89794708094 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 14 Mar 2025 19:34:48 -0700 Subject: [PATCH 0081/2924] KVM: x86/mmu: Wrap sanity check on number of TDP MMU pages with KVM_PROVE_MMU Wrap the TDP MMU page counter in CONFIG_KVM_PROVE_MMU so that the sanity check is omitted from production builds, and more importantly to remove the atomic accesses to account pages. A one-off memory leak in production is relatively uninteresting, and a WARN_ON won't help mitigate a systemic issue; it's as much about helping triage memory leaks as it is about detecting them in the first place, and doesn't magically stop the leaks. I.e. production environments will be quite sad if a severe KVM bug escapes, regardless of whether or not KVM WARNs. Signed-off-by: Sean Christopherson Message-ID: <20250315023448.2358456-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 7 ++++++- arch/x86/kvm/mmu/tdp_mmu.c | 8 +++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a884ab544335e7..3bdae454a95976 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1472,8 +1472,13 @@ struct kvm_arch { struct once nx_once; #ifdef CONFIG_X86_64 - /* The number of TDP MMU pages across all roots. */ +#ifdef CONFIG_KVM_PROVE_MMU + /* + * The number of TDP MMU pages across all roots. Used only to sanity + * check that KVM isn't leaking TDP MMU pages. + */ atomic64_t tdp_mmu_pages; +#endif /* * List of struct kvm_mmu_pages being used as roots. diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 7cc0564f5f97e5..21a3b816624239 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -40,7 +40,9 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) kvm_tdp_mmu_invalidate_roots(kvm, KVM_VALID_ROOTS); kvm_tdp_mmu_zap_invalidated_roots(kvm, false); - WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages)); +#ifdef CONFIG_KVM_PROVE_MMU + KVM_MMU_WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages)); +#endif WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); /* @@ -325,13 +327,17 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, static void tdp_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) { kvm_account_pgtable_pages((void *)sp->spt, +1); +#ifdef CONFIG_KVM_PROVE_MMU atomic64_inc(&kvm->arch.tdp_mmu_pages); +#endif } static void tdp_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) { kvm_account_pgtable_pages((void *)sp->spt, -1); +#ifdef CONFIG_KVM_PROVE_MMU atomic64_dec(&kvm->arch.tdp_mmu_pages); +#endif } /** From 459a35111b0a890172a78d51c01b204e13a34a18 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 14 Mar 2025 19:46:23 -0700 Subject: [PATCH 0082/2924] KVM: Allow building irqbypass.ko as as module when kvm.ko is a module Convert HAVE_KVM_IRQ_BYPASS into a tristate so that selecting IRQ_BYPASS_MANAGER follows KVM={m,y}, i.e. doesn't force irqbypass.ko to be built-in. Note, PPC allows building KVM as a module, but selects HAVE_KVM_IRQ_BYPASS from a boolean Kconfig, i.e. KVM PPC unnecessarily forces irqbpass.ko to be built-in. But that flaw is a longstanding PPC specific issue. Fixes: 61df71ee992d ("kvm: move "select IRQ_BYPASS_MANAGER" to common code") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-ID: <20250315024623.2363994-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- virt/kvm/Kconfig | 2 +- virt/kvm/eventfd.c | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5438a1b446a6b0..291d49b9bf0549 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2382,7 +2382,7 @@ static inline bool kvm_is_visible_memslot(struct kvm_memory_slot *memslot) struct kvm_vcpu *kvm_get_running_vcpu(void); struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); -#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS +#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS) bool kvm_arch_has_irq_bypass(void); int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *, struct irq_bypass_producer *); diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 746e1f466aa647..727b542074e7ef 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -75,7 +75,7 @@ config KVM_COMPAT depends on KVM && COMPAT && !(S390 || ARM64 || RISCV) config HAVE_KVM_IRQ_BYPASS - bool + tristate select IRQ_BYPASS_MANAGER config HAVE_KVM_VCPU_ASYNC_IOCTL diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 249ba5b72e9b09..11e5d1e3f12eae 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -149,7 +149,7 @@ irqfd_shutdown(struct work_struct *work) /* * It is now safe to release the object's resources */ -#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS +#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS) irq_bypass_unregister_consumer(&irqfd->consumer); #endif eventfd_ctx_put(irqfd->eventfd); @@ -274,7 +274,7 @@ static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd) write_seqcount_end(&irqfd->irq_entry_sc); } -#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS +#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS) void __attribute__((weak)) kvm_arch_irq_bypass_stop( struct irq_bypass_consumer *cons) { @@ -424,7 +424,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) if (events & EPOLLIN) schedule_work(&irqfd->inject); -#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS +#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS) if (kvm_arch_has_irq_bypass()) { irqfd->consumer.token = (void *)irqfd->eventfd; irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer; @@ -609,14 +609,14 @@ void kvm_irq_routing_update(struct kvm *kvm) spin_lock_irq(&kvm->irqfds.lock); list_for_each_entry(irqfd, &kvm->irqfds.items, list) { -#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS +#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS) /* Under irqfds.lock, so can read irq_entry safely */ struct kvm_kernel_irq_routing_entry old = irqfd->irq_entry; #endif irqfd_update(kvm, irqfd); -#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS +#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS) if (irqfd->producer && kvm_arch_irqfd_route_changed(&old, &irqfd->irq_entry)) { int ret = kvm_arch_update_irqfd_routing( From bc52ae0a708cb6fa3926d11c88e3c55e1171b4a1 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 14 Mar 2025 19:41:02 -0700 Subject: [PATCH 0083/2924] KVM: x86: Explicitly zero-initialize on-stack CPUID unions Explicitly zero/empty-initialize the unions used for PMU related CPUID entries, instead of manually zeroing all fields (hopefully), or in the case of 0x80000022, relying on the compiler to clobber the uninitialized bitfields. Signed-off-by: Sean Christopherson Reviewed-by: Jim Mattson Message-ID: <20250315024102.2361628-1-seanjc@google.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 5e4d4934c0d3c2..571c906ffcbfe2 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -1427,8 +1427,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) } break; case 0xa: { /* Architectural Performance Monitoring */ - union cpuid10_eax eax; - union cpuid10_edx edx; + union cpuid10_eax eax = { }; + union cpuid10_edx edx = { }; if (!enable_pmu || !static_cpu_has(X86_FEATURE_ARCH_PERFMON)) { entry->eax = entry->ebx = entry->ecx = entry->edx = 0; @@ -1444,8 +1444,6 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) if (kvm_pmu_cap.version) edx.split.anythread_deprecated = 1; - edx.split.reserved1 = 0; - edx.split.reserved2 = 0; entry->eax = eax.full; entry->ebx = kvm_pmu_cap.events_mask; @@ -1763,7 +1761,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) break; /* AMD Extended Performance Monitoring and Debug */ case 0x80000022: { - union cpuid_0x80000022_ebx ebx; + union cpuid_0x80000022_ebx ebx = { }; entry->ecx = entry->edx = 0; if (!enable_pmu || !kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2)) { From 6bad6ecc63b75af294ff3f56f54d6b857c8964a5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 1 Apr 2025 08:47:26 -0700 Subject: [PATCH 0084/2924] KVM: VMX: Assert that IRQs are disabled when putting vCPU on PI wakeup list Assert that IRQs are already disabled when putting a vCPU on a CPU's PI wakeup list, as opposed to saving/disabling+restoring IRQs. KVM relies on IRQs being disabled until the vCPU task is fully scheduled out, i.e. until the scheduler has dropped all of its per-CPU locks (e.g. for the runqueue), as attempting to wake the task while it's being scheduled out could lead to deadlock. Signed-off-by: Sean Christopherson Reviewed-by: Maxim Levitsky Reviewed-by: Yan Zhao Message-ID: <20250401154727.835231-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/posted_intr.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c index ec08fa3caf43ce..840d435229a872 100644 --- a/arch/x86/kvm/vmx/posted_intr.c +++ b/arch/x86/kvm/vmx/posted_intr.c @@ -148,9 +148,8 @@ static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu) struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu); struct pi_desc old, new; - unsigned long flags; - local_irq_save(flags); + lockdep_assert_irqs_disabled(); raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); list_add_tail(&vmx->pi_wakeup_list, @@ -176,8 +175,6 @@ static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu) */ if (pi_test_on(&new)) __apic_send_IPI_self(POSTED_INTR_WAKEUP_VECTOR); - - local_irq_restore(flags); } static bool vmx_needs_pi_wakeup(struct kvm_vcpu *vcpu) From c0b8dcabb2cddc98c265548632c39e97422f61b6 Mon Sep 17 00:00:00 2001 From: Yan Zhao Date: Tue, 1 Apr 2025 08:47:27 -0700 Subject: [PATCH 0085/2924] KVM: VMX: Use separate subclasses for PI wakeup lock to squash false positive Use a separate subclass when acquiring KVM's per-CPU posted interrupts wakeup lock in the scheduled out path, i.e. when adding a vCPU on the list of vCPUs to wake, to workaround a false positive deadlock. Chain exists of: &p->pi_lock --> &rq->__lock --> &per_cpu(wakeup_vcpus_on_cpu_lock, cpu) Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&per_cpu(wakeup_vcpus_on_cpu_lock, cpu)); lock(&rq->__lock); lock(&per_cpu(wakeup_vcpus_on_cpu_lock, cpu)); lock(&p->pi_lock); *** DEADLOCK *** In the wakeup handler, the callchain is *always*: sysvec_kvm_posted_intr_wakeup_ipi() | --> pi_wakeup_handler() | --> kvm_vcpu_wake_up() | --> try_to_wake_up(), and the lock order is: &per_cpu(wakeup_vcpus_on_cpu_lock, cpu) --> &p->pi_lock. For the schedule out path, the callchain is always (for all intents and purposes; if the kernel is preemptible, kvm_sched_out() can be called from something other than schedule(), but the beginning of the callchain will be the same point in vcpu_block()): vcpu_block() | --> schedule() | --> kvm_sched_out() | --> vmx_vcpu_put() | --> vmx_vcpu_pi_put() | --> pi_enable_wakeup_handler() and the lock order is: &rq->__lock --> &per_cpu(wakeup_vcpus_on_cpu_lock, cpu) I.e. lockdep sees AB+BC ordering for schedule out, and CA ordering for wakeup, and complains about the A=>C versus C=>A inversion. In practice, deadlock can't occur between schedule out and the wakeup handler as they are mutually exclusive. The entirely of the schedule out code that runs with the problematic scheduler locks held, does so with IRQs disabled, i.e. can't run concurrently with the wakeup handler. Use a subclass instead disabling lockdep entirely, and tell lockdep that both subclasses are being acquired when loading a vCPU, as the sched_out and sched_in paths are NOT mutually exclusive, e.g. CPU 0 CPU 1 --------------- --------------- vCPU0 sched_out vCPU1 sched_in vCPU1 sched_out vCPU 0 sched_in where vCPU0's sched_in may race with vCPU1's sched_out, on CPU 0's wakeup list+lock. Signed-off-by: Yan Zhao Signed-off-by: Sean Christopherson Reviewed-by: Maxim Levitsky Message-ID: <20250401154727.835231-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/posted_intr.c | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c index 840d435229a872..51116fe69a5001 100644 --- a/arch/x86/kvm/vmx/posted_intr.c +++ b/arch/x86/kvm/vmx/posted_intr.c @@ -31,6 +31,8 @@ static DEFINE_PER_CPU(struct list_head, wakeup_vcpus_on_cpu); */ static DEFINE_PER_CPU(raw_spinlock_t, wakeup_vcpus_on_cpu_lock); +#define PI_LOCK_SCHED_OUT SINGLE_DEPTH_NESTING + static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) { return &(to_vmx(vcpu)->pi_desc); @@ -89,9 +91,20 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) * current pCPU if the task was migrated. */ if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR) { - raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); + raw_spinlock_t *spinlock = &per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu); + + /* + * In addition to taking the wakeup lock for the regular/IRQ + * context, tell lockdep it is being taken for the "sched out" + * context as well. vCPU loads happens in task context, and + * this is taking the lock of the *previous* CPU, i.e. can race + * with both the scheduler and the wakeup handler. + */ + raw_spin_lock(spinlock); + spin_acquire(&spinlock->dep_map, PI_LOCK_SCHED_OUT, 0, _RET_IP_); list_del(&vmx->pi_wakeup_list); - raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); + spin_release(&spinlock->dep_map, _RET_IP_); + raw_spin_unlock(spinlock); } dest = cpu_physical_id(cpu); @@ -151,7 +164,20 @@ static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu) lockdep_assert_irqs_disabled(); - raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); + /* + * Acquire the wakeup lock using the "sched out" context to workaround + * a lockdep false positive. When this is called, schedule() holds + * various per-CPU scheduler locks. When the wakeup handler runs, it + * holds this CPU's wakeup lock while calling try_to_wake_up(), which + * can eventually take the aforementioned scheduler locks, which causes + * lockdep to assume there is deadlock. + * + * Deadlock can't actually occur because IRQs are disabled for the + * entirety of the sched_out critical section, i.e. the wakeup handler + * can't run while the scheduler locks are held. + */ + raw_spin_lock_nested(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu), + PI_LOCK_SCHED_OUT); list_add_tail(&vmx->pi_wakeup_list, &per_cpu(wakeup_vcpus_on_cpu, vcpu->cpu)); raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); From ab6005f3912fff07330297aba08922d2456dcede Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 4 Apr 2025 15:46:34 +0100 Subject: [PATCH 0086/2924] io_uring: don't post tag CQEs on file/buffer registration failure Buffer / file table registration is all or nothing, if it fails all resources we might have partially registered are dropped and the table is killed. If that happens, it doesn't make sense to post any rsrc tag CQEs. That would be confusing to the application, which should not need to handle that case. Cc: stable@vger.kernel.org Signed-off-by: Pavel Begunkov Fixes: 7029acd8a9503 ("io_uring/rsrc: get rid of per-ring io_rsrc_node list") Link: https://lore.kernel.org/r/c514446a8dcb0197cddd5d4ba8f6511da081cf1f.1743777957.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/rsrc.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 5e64a8bb30a451..b36c8825550eff 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -175,6 +175,18 @@ void io_rsrc_cache_free(struct io_ring_ctx *ctx) io_alloc_cache_free(&ctx->imu_cache, kfree); } +static void io_clear_table_tags(struct io_rsrc_data *data) +{ + int i; + + for (i = 0; i < data->nr; i++) { + struct io_rsrc_node *node = data->nodes[i]; + + if (node) + node->tag = 0; + } +} + __cold void io_rsrc_data_free(struct io_ring_ctx *ctx, struct io_rsrc_data *data) { @@ -583,6 +595,7 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, io_file_table_set_alloc_range(ctx, 0, ctx->file_table.data.nr); return 0; fail: + io_clear_table_tags(&ctx->file_table.data); io_sqe_files_unregister(ctx); return ret; } @@ -902,8 +915,10 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, } ctx->buf_table = data; - if (ret) + if (ret) { + io_clear_table_tags(&ctx->buf_table); io_sqe_buffers_unregister(ctx); + } return ret; } From 9ad19171b6d6fa5dcdd8a1d5d1b82dbdeaf65ab0 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 1 Apr 2025 15:15:54 -0700 Subject: [PATCH 0087/2924] lib/crc: remove unnecessary prompt for CONFIG_CRC32 and drop 'default y' All modules that need CONFIG_CRC32 already select it, so there is no need to bother users about the option, nor to default it to y. Reviewed-by: Christoph Hellwig Reviewed-by: "Martin K. Petersen" Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250401221600.24878-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- arch/sh/configs/edosk7705_defconfig | 1 - arch/sh/configs/kfr2r09-romimage_defconfig | 1 - arch/sh/configs/sh7724_generic_defconfig | 1 - arch/sh/configs/sh7770_generic_defconfig | 1 - lib/Kconfig | 8 +------- 5 files changed, 1 insertion(+), 11 deletions(-) diff --git a/arch/sh/configs/edosk7705_defconfig b/arch/sh/configs/edosk7705_defconfig index 296ed768cbbb7f..ee3f6db7d8da1f 100644 --- a/arch/sh/configs/edosk7705_defconfig +++ b/arch/sh/configs/edosk7705_defconfig @@ -33,4 +33,3 @@ CONFIG_CMDLINE_FROM_BOOTLOADER=y # CONFIG_PROC_FS is not set # CONFIG_SYSFS is not set # CONFIG_ENABLE_MUST_CHECK is not set -# CONFIG_CRC32 is not set diff --git a/arch/sh/configs/kfr2r09-romimage_defconfig b/arch/sh/configs/kfr2r09-romimage_defconfig index 42bf34181a3e09..88fbb65cb9f9e0 100644 --- a/arch/sh/configs/kfr2r09-romimage_defconfig +++ b/arch/sh/configs/kfr2r09-romimage_defconfig @@ -49,4 +49,3 @@ CONFIG_TMPFS=y # CONFIG_NETWORK_FILESYSTEMS is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_FS=y -# CONFIG_CRC32 is not set diff --git a/arch/sh/configs/sh7724_generic_defconfig b/arch/sh/configs/sh7724_generic_defconfig index 5440bd0ca4ed5b..e6298f22623a9c 100644 --- a/arch/sh/configs/sh7724_generic_defconfig +++ b/arch/sh/configs/sh7724_generic_defconfig @@ -39,4 +39,3 @@ CONFIG_UIO_PDRV_GENIRQ=y # CONFIG_SYSFS is not set # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_ENABLE_MUST_CHECK is not set -# CONFIG_CRC32 is not set diff --git a/arch/sh/configs/sh7770_generic_defconfig b/arch/sh/configs/sh7770_generic_defconfig index 4338af8d02d036..2e2b46980b58bb 100644 --- a/arch/sh/configs/sh7770_generic_defconfig +++ b/arch/sh/configs/sh7770_generic_defconfig @@ -41,4 +41,3 @@ CONFIG_UIO_PDRV_GENIRQ=y # CONFIG_SYSFS is not set # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_ENABLE_MUST_CHECK is not set -# CONFIG_CRC32 is not set diff --git a/lib/Kconfig b/lib/Kconfig index 61cce0686b53eb..c91de83b3e5a2d 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -177,14 +177,8 @@ config CRC_ITU_T functions require M here. config CRC32 - tristate "CRC32/CRC32c functions" - default y + tristate select BITREVERSE - help - This option is provided for the case where no in-kernel-tree - modules require CRC32/CRC32c functions, but a module built outside - the kernel tree does. Such modules that use library CRC32/CRC32c - functions require M here. config ARCH_HAS_CRC32 bool From 7939da264bcc91ced4d01014241a581e79d414d9 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 1 Apr 2025 15:15:55 -0700 Subject: [PATCH 0088/2924] lib/crc: remove unnecessary prompt for CONFIG_CRC_CCITT All modules that need CONFIG_CRC_CCITT already select it, so there is no need to bother users about the option. Reviewed-by: Christoph Hellwig Reviewed-by: "Martin K. Petersen" Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250401221600.24878-3-ebiggers@kernel.org Signed-off-by: Eric Biggers --- arch/arm/configs/at91_dt_defconfig | 1 - arch/arm/configs/collie_defconfig | 1 - arch/arm/configs/dove_defconfig | 1 - arch/arm/configs/exynos_defconfig | 1 - arch/arm/configs/imx_v6_v7_defconfig | 1 - arch/arm/configs/lpc32xx_defconfig | 1 - arch/arm/configs/milbeaut_m10v_defconfig | 1 - arch/arm/configs/mmp2_defconfig | 1 - arch/arm/configs/multi_v4t_defconfig | 1 - arch/arm/configs/multi_v5_defconfig | 1 - arch/arm/configs/mvebu_v5_defconfig | 1 - arch/arm/configs/omap2plus_defconfig | 1 - arch/arm/configs/pxa168_defconfig | 1 - arch/arm/configs/pxa910_defconfig | 1 - arch/arm/configs/pxa_defconfig | 1 - arch/arm/configs/s5pv210_defconfig | 1 - arch/arm/configs/sama7_defconfig | 1 - arch/arm/configs/spitz_defconfig | 1 - arch/arm/configs/wpcm450_defconfig | 1 - arch/hexagon/configs/comet_defconfig | 1 - arch/mips/configs/fuloong2e_defconfig | 1 - arch/parisc/configs/generic-32bit_defconfig | 1 - arch/parisc/configs/generic-64bit_defconfig | 1 - arch/powerpc/configs/44x/warp_defconfig | 1 - arch/powerpc/configs/85xx/ge_imp3a_defconfig | 1 - arch/powerpc/configs/85xx/stx_gp3_defconfig | 1 - arch/powerpc/configs/gamecube_defconfig | 1 - arch/powerpc/configs/linkstation_defconfig | 1 - arch/powerpc/configs/mpc866_ads_defconfig | 1 - arch/powerpc/configs/mvme5100_defconfig | 1 - arch/powerpc/configs/pasemi_defconfig | 1 - arch/powerpc/configs/ps3_defconfig | 1 - arch/powerpc/configs/wii_defconfig | 1 - arch/sh/configs/magicpanelr2_defconfig | 1 - arch/sh/configs/se7206_defconfig | 1 - arch/sh/configs/se7712_defconfig | 1 - arch/sh/configs/se7721_defconfig | 1 - arch/sh/configs/sh03_defconfig | 1 - arch/sh/configs/sh2007_defconfig | 1 - lib/Kconfig | 7 +------ 40 files changed, 1 insertion(+), 45 deletions(-) diff --git a/arch/arm/configs/at91_dt_defconfig b/arch/arm/configs/at91_dt_defconfig index f2596a1b2f7d9a..ff13e1ecf4bb95 100644 --- a/arch/arm/configs/at91_dt_defconfig +++ b/arch/arm/configs/at91_dt_defconfig @@ -232,7 +232,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_DEV_ATMEL_AES=y CONFIG_CRYPTO_DEV_ATMEL_TDES=y CONFIG_CRYPTO_DEV_ATMEL_SHA=y -CONFIG_CRC_CCITT=y CONFIG_FONTS=y CONFIG_FONT_8x8=y CONFIG_FONT_ACORN_8x8=y diff --git a/arch/arm/configs/collie_defconfig b/arch/arm/configs/collie_defconfig index 42cb1c8541188c..578c6a4af620d7 100644 --- a/arch/arm/configs/collie_defconfig +++ b/arch/arm/configs/collie_defconfig @@ -78,7 +78,6 @@ CONFIG_ROMFS_FS=y CONFIG_NLS_DEFAULT="cp437" CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -CONFIG_CRC_CCITT=y CONFIG_FONTS=y CONFIG_FONT_MINI_4x6=y # CONFIG_DEBUG_BUGVERBOSE is not set diff --git a/arch/arm/configs/dove_defconfig b/arch/arm/configs/dove_defconfig index b382a2e175fbc6..d76eb12d29a759 100644 --- a/arch/arm/configs/dove_defconfig +++ b/arch/arm/configs/dove_defconfig @@ -128,7 +128,6 @@ CONFIG_CRYPTO_DEFLATE=y CONFIG_CRYPTO_LZO=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_DEV_MARVELL_CESA=y -CONFIG_CRC_CCITT=y CONFIG_PRINTK_TIME=y # CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig index 7ad48fdda1dac6..e81a5d6c1c2085 100644 --- a/arch/arm/configs/exynos_defconfig +++ b/arch/arm/configs/exynos_defconfig @@ -370,7 +370,6 @@ CONFIG_CRYPTO_AES_ARM_BS=m CONFIG_CRYPTO_CHACHA20_NEON=m CONFIG_CRYPTO_DEV_EXYNOS_RNG=y CONFIG_CRYPTO_DEV_S5P=y -CONFIG_CRC_CCITT=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=96 CONFIG_FONTS=y diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 297c6a7b978a69..9c2f8a3b30cb22 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -481,7 +481,6 @@ CONFIG_SECURITYFS=y CONFIG_CRYPTO_DEV_FSL_CAAM=y CONFIG_CRYPTO_DEV_SAHARA=y CONFIG_CRYPTO_DEV_MXS_DCP=y -CONFIG_CRC_CCITT=m CONFIG_CRC_T10DIF=y CONFIG_CMA_SIZE_MBYTES=64 CONFIG_FONTS=y diff --git a/arch/arm/configs/lpc32xx_defconfig b/arch/arm/configs/lpc32xx_defconfig index 98e267213b214f..9afccd76446b6b 100644 --- a/arch/arm/configs/lpc32xx_defconfig +++ b/arch/arm/configs/lpc32xx_defconfig @@ -179,7 +179,6 @@ CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y CONFIG_CRYPTO_ANSI_CPRNG=y # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_CCITT=y CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y diff --git a/arch/arm/configs/milbeaut_m10v_defconfig b/arch/arm/configs/milbeaut_m10v_defconfig index acd16204f8d7f8..bb35cc0c469e9c 100644 --- a/arch/arm/configs/milbeaut_m10v_defconfig +++ b/arch/arm/configs/milbeaut_m10v_defconfig @@ -108,7 +108,6 @@ CONFIG_CRYPTO_AES_ARM_BS=m CONFIG_CRYPTO_AES_ARM_CE=m CONFIG_CRYPTO_CHACHA20_NEON=m # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_CCITT=m CONFIG_CRC_ITU_T=m CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=64 diff --git a/arch/arm/configs/mmp2_defconfig b/arch/arm/configs/mmp2_defconfig index f6f9e135353ed4..842a989baa277d 100644 --- a/arch/arm/configs/mmp2_defconfig +++ b/arch/arm/configs/mmp2_defconfig @@ -67,7 +67,6 @@ CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_CRC_CCITT=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y diff --git a/arch/arm/configs/multi_v4t_defconfig b/arch/arm/configs/multi_v4t_defconfig index 27d650635d9b83..1a86dc30552361 100644 --- a/arch/arm/configs/multi_v4t_defconfig +++ b/arch/arm/configs/multi_v4t_defconfig @@ -91,6 +91,5 @@ CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_CRAMFS=y CONFIG_MINIX_FS=y -CONFIG_CRC_CCITT=y # CONFIG_FTRACE is not set CONFIG_DEBUG_USER=y diff --git a/arch/arm/configs/multi_v5_defconfig b/arch/arm/configs/multi_v5_defconfig index db81862bdb9374..cf6180b4296e98 100644 --- a/arch/arm/configs/multi_v5_defconfig +++ b/arch/arm/configs/multi_v5_defconfig @@ -289,7 +289,6 @@ CONFIG_NLS_UTF8=y CONFIG_CRYPTO_CBC=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_DEV_MARVELL_CESA=y -CONFIG_CRC_CCITT=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/arm/configs/mvebu_v5_defconfig b/arch/arm/configs/mvebu_v5_defconfig index a518d4a2581e08..23dbb80fcc2eec 100644 --- a/arch/arm/configs/mvebu_v5_defconfig +++ b/arch/arm/configs/mvebu_v5_defconfig @@ -187,7 +187,6 @@ CONFIG_NLS_UTF8=y CONFIG_CRYPTO_CBC=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_DEV_MARVELL_CESA=y -CONFIG_CRC_CCITT=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 113d6dfe52435e..c5238581a8f6d2 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -706,7 +706,6 @@ CONFIG_CRYPTO_DEV_OMAP=m CONFIG_CRYPTO_DEV_OMAP_SHAM=m CONFIG_CRYPTO_DEV_OMAP_AES=m CONFIG_CRYPTO_DEV_OMAP_DES=m -CONFIG_CRC_CCITT=y CONFIG_CRC_T10DIF=y CONFIG_CRC_ITU_T=y CONFIG_DMA_CMA=y diff --git a/arch/arm/configs/pxa168_defconfig b/arch/arm/configs/pxa168_defconfig index ce10fe2104bf1e..4748c7d33cb8a9 100644 --- a/arch/arm/configs/pxa168_defconfig +++ b/arch/arm/configs/pxa168_defconfig @@ -41,7 +41,6 @@ CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_CRC_CCITT=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y diff --git a/arch/arm/configs/pxa910_defconfig b/arch/arm/configs/pxa910_defconfig index 1f28aea860146f..49b59c600ae1c2 100644 --- a/arch/arm/configs/pxa910_defconfig +++ b/arch/arm/configs/pxa910_defconfig @@ -50,7 +50,6 @@ CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_CRC_CCITT=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index de0ac8f521d761..c21ea1d72f7cc5 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -663,7 +663,6 @@ CONFIG_CRYPTO_SHA1_ARM=m CONFIG_CRYPTO_SHA256_ARM=m CONFIG_CRYPTO_SHA512_ARM=m CONFIG_CRYPTO_AES_ARM=m -CONFIG_CRC_CCITT=y CONFIG_CRC_T10DIF=m CONFIG_FONTS=y CONFIG_FONT_8x8=y diff --git a/arch/arm/configs/s5pv210_defconfig b/arch/arm/configs/s5pv210_defconfig index 5dbe85c263de3c..02121eec365819 100644 --- a/arch/arm/configs/s5pv210_defconfig +++ b/arch/arm/configs/s5pv210_defconfig @@ -113,7 +113,6 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y -CONFIG_CRC_CCITT=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/arm/configs/sama7_defconfig b/arch/arm/configs/sama7_defconfig index ea7ddf640ba73e..d850f92a73eafb 100644 --- a/arch/arm/configs/sama7_defconfig +++ b/arch/arm/configs/sama7_defconfig @@ -227,7 +227,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_DEV_ATMEL_AES=y CONFIG_CRYPTO_DEV_ATMEL_TDES=y CONFIG_CRYPTO_DEV_ATMEL_SHA=y -CONFIG_CRC_CCITT=y CONFIG_CRC_ITU_T=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=32 diff --git a/arch/arm/configs/spitz_defconfig b/arch/arm/configs/spitz_defconfig index ac5b7a5aaff680..ffec59e3f49c1d 100644 --- a/arch/arm/configs/spitz_defconfig +++ b/arch/arm/configs/spitz_defconfig @@ -234,7 +234,6 @@ CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_WP512=m -CONFIG_CRC_CCITT=y CONFIG_FONTS=y CONFIG_FONT_8x8=y CONFIG_FONT_8x16=y diff --git a/arch/arm/configs/wpcm450_defconfig b/arch/arm/configs/wpcm450_defconfig index 5e4397f7f828cf..60e38f5b7dae06 100644 --- a/arch/arm/configs/wpcm450_defconfig +++ b/arch/arm/configs/wpcm450_defconfig @@ -191,7 +191,6 @@ CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y CONFIG_X509_CERTIFICATE_PARSER=y CONFIG_PKCS7_MESSAGE_PARSER=y CONFIG_SYSTEM_TRUSTED_KEYRING=y -CONFIG_CRC_CCITT=y CONFIG_CRC_ITU_T=m CONFIG_PRINTK_TIME=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/hexagon/configs/comet_defconfig b/arch/hexagon/configs/comet_defconfig index 469c025297c699..fed4a64b36fb39 100644 --- a/arch/hexagon/configs/comet_defconfig +++ b/arch/hexagon/configs/comet_defconfig @@ -72,7 +72,6 @@ CONFIG_INET=y CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_CCITT=y CONFIG_CRC16=y CONFIG_CRC_T10DIF=y CONFIG_FRAME_WARN=0 diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig index 5ab149cd3178e8..114fcd67898d47 100644 --- a/arch/mips/configs/fuloong2e_defconfig +++ b/arch/mips/configs/fuloong2e_defconfig @@ -218,4 +218,3 @@ CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_DEFLATE=m CONFIG_CRYPTO_LZO=m # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_CCITT=y diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig index f5fffc24c3bc5b..68c45d39e5b1f0 100644 --- a/arch/parisc/configs/generic-32bit_defconfig +++ b/arch/parisc/configs/generic-32bit_defconfig @@ -264,7 +264,6 @@ CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA1=y CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_DEFLATE=y -CONFIG_CRC_CCITT=m CONFIG_CRC_T10DIF=y CONFIG_FONTS=y CONFIG_PRINTK_TIME=y diff --git a/arch/parisc/configs/generic-64bit_defconfig b/arch/parisc/configs/generic-64bit_defconfig index 2487765b7be362..ecc9ffcc11cd71 100644 --- a/arch/parisc/configs/generic-64bit_defconfig +++ b/arch/parisc/configs/generic-64bit_defconfig @@ -292,7 +292,6 @@ CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_DEFLATE=m # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_CCITT=m CONFIG_PRINTK_TIME=y CONFIG_DEBUG_KERNEL=y CONFIG_STRIP_ASM_SYMS=y diff --git a/arch/powerpc/configs/44x/warp_defconfig b/arch/powerpc/configs/44x/warp_defconfig index 20891c413149ca..633dba199d6274 100644 --- a/arch/powerpc/configs/44x/warp_defconfig +++ b/arch/powerpc/configs/44x/warp_defconfig @@ -85,7 +85,6 @@ CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_ISO8859_15=y CONFIG_NLS_UTF8=y -CONFIG_CRC_CCITT=y CONFIG_CRC_T10DIF=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y diff --git a/arch/powerpc/configs/85xx/ge_imp3a_defconfig b/arch/powerpc/configs/85xx/ge_imp3a_defconfig index 6f58ee1edf1fc6..3cf48f1f729fc3 100644 --- a/arch/powerpc/configs/85xx/ge_imp3a_defconfig +++ b/arch/powerpc/configs/85xx/ge_imp3a_defconfig @@ -221,7 +221,6 @@ CONFIG_NLS_ISO8859_15=y CONFIG_NLS_KOI8_R=m CONFIG_NLS_KOI8_U=m CONFIG_NLS_UTF8=y -CONFIG_CRC_CCITT=y CONFIG_CRC_T10DIF=y CONFIG_MAGIC_SYSRQ=y CONFIG_CRYPTO_CBC=y diff --git a/arch/powerpc/configs/85xx/stx_gp3_defconfig b/arch/powerpc/configs/85xx/stx_gp3_defconfig index e7080497048d5c..ef7c511a6930fd 100644 --- a/arch/powerpc/configs/85xx/stx_gp3_defconfig +++ b/arch/powerpc/configs/85xx/stx_gp3_defconfig @@ -60,7 +60,6 @@ CONFIG_CRAMFS=m CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_NLS=y -CONFIG_CRC_CCITT=y CONFIG_CRC_T10DIF=m CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set diff --git a/arch/powerpc/configs/gamecube_defconfig b/arch/powerpc/configs/gamecube_defconfig index d77eeb525366a3..cdd99657b71b0f 100644 --- a/arch/powerpc/configs/gamecube_defconfig +++ b/arch/powerpc/configs/gamecube_defconfig @@ -82,7 +82,6 @@ CONFIG_ROOT_NFS=y CONFIG_CIFS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -CONFIG_CRC_CCITT=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_SPINLOCK=y CONFIG_DEBUG_MUTEXES=y diff --git a/arch/powerpc/configs/linkstation_defconfig b/arch/powerpc/configs/linkstation_defconfig index fa707de761bed4..8be99d4d5289a9 100644 --- a/arch/powerpc/configs/linkstation_defconfig +++ b/arch/powerpc/configs/linkstation_defconfig @@ -125,7 +125,6 @@ CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_932=m CONFIG_NLS_ISO8859_1=m CONFIG_NLS_UTF8=m -CONFIG_CRC_CCITT=m CONFIG_CRC_T10DIF=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/powerpc/configs/mpc866_ads_defconfig b/arch/powerpc/configs/mpc866_ads_defconfig index a0d27c59ea788b..dfbdd5e8e10879 100644 --- a/arch/powerpc/configs/mpc866_ads_defconfig +++ b/arch/powerpc/configs/mpc866_ads_defconfig @@ -38,4 +38,3 @@ CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y -CONFIG_CRC_CCITT=y diff --git a/arch/powerpc/configs/mvme5100_defconfig b/arch/powerpc/configs/mvme5100_defconfig index d1c7fd5bf34b31..dd7b0f8e50aee3 100644 --- a/arch/powerpc/configs/mvme5100_defconfig +++ b/arch/powerpc/configs/mvme5100_defconfig @@ -107,7 +107,6 @@ CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_932=m CONFIG_NLS_ISO8859_1=m CONFIG_NLS_UTF8=m -CONFIG_CRC_CCITT=m CONFIG_CRC_T10DIF=y CONFIG_XZ_DEC=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index 61993944db40ad..8bbf51b38480ec 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -159,7 +159,6 @@ CONFIG_NFSD=y CONFIG_NFSD_V4=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -CONFIG_CRC_CCITT=y CONFIG_PRINTK_TIME=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 2b175ddf82f0bc..62376b8c13e947 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -148,7 +148,6 @@ CONFIG_NLS_ISO8859_1=y CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_LZO=m -CONFIG_CRC_CCITT=m CONFIG_CRC_T10DIF=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig index 5017a697b67bc3..7c714a19221e10 100644 --- a/arch/powerpc/configs/wii_defconfig +++ b/arch/powerpc/configs/wii_defconfig @@ -114,7 +114,6 @@ CONFIG_ROOT_NFS=y CONFIG_CIFS=m CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -CONFIG_CRC_CCITT=y CONFIG_PRINTK_TIME=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_SPINLOCK=y diff --git a/arch/sh/configs/magicpanelr2_defconfig b/arch/sh/configs/magicpanelr2_defconfig index 8d443749550ec0..a1109762c8ec65 100644 --- a/arch/sh/configs/magicpanelr2_defconfig +++ b/arch/sh/configs/magicpanelr2_defconfig @@ -86,5 +86,4 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_KOBJECT=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_FRAME_POINTER=y -CONFIG_CRC_CCITT=m CONFIG_CRC16=m diff --git a/arch/sh/configs/se7206_defconfig b/arch/sh/configs/se7206_defconfig index 472fdf365cad0e..bdd29e817ff717 100644 --- a/arch/sh/configs/se7206_defconfig +++ b/arch/sh/configs/se7206_defconfig @@ -101,6 +101,5 @@ CONFIG_CRYPTO_DEFLATE=y CONFIG_CRYPTO_LZO=y # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_CCITT=y CONFIG_CRC16=y CONFIG_CRC_ITU_T=y diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig index 20f07aee5bde72..6b31be19b94fd5 100644 --- a/arch/sh/configs/se7712_defconfig +++ b/arch/sh/configs/se7712_defconfig @@ -97,4 +97,3 @@ CONFIG_FRAME_POINTER=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_CCITT=y diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig index 00862d3c030d24..35f500699b1d70 100644 --- a/arch/sh/configs/se7721_defconfig +++ b/arch/sh/configs/se7721_defconfig @@ -123,4 +123,3 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_FRAME_POINTER=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_CCITT=y diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig index 48f38ec236b6b0..4d75c92cac10ed 100644 --- a/arch/sh/configs/sh03_defconfig +++ b/arch/sh/configs/sh03_defconfig @@ -120,6 +120,5 @@ CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_SHA1=y CONFIG_CRYPTO_DEFLATE=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_CCITT=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_GENERIC=y diff --git a/arch/sh/configs/sh2007_defconfig b/arch/sh/configs/sh2007_defconfig index 1b1174a07e3617..61bc391d443c35 100644 --- a/arch/sh/configs/sh2007_defconfig +++ b/arch/sh/configs/sh2007_defconfig @@ -193,5 +193,4 @@ CONFIG_CRYPTO_DEFLATE=y CONFIG_CRYPTO_LZO=y # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_CCITT=y CONFIG_CRC16=y diff --git a/lib/Kconfig b/lib/Kconfig index c91de83b3e5a2d..81d8ff429dca01 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -139,12 +139,7 @@ config TRACE_MMIO_ACCESS source "lib/crypto/Kconfig" config CRC_CCITT - tristate "CRC-CCITT functions" - help - This option is provided for the case where no in-kernel-tree - modules require CRC-CCITT functions, but a module built outside - the kernel tree does. Such modules that use library CRC-CCITT - functions require M here. + tristate config CRC16 tristate "CRC16 functions" From 2038af8edae281283ee10e220a3df19dcad3b61c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 1 Apr 2025 15:15:56 -0700 Subject: [PATCH 0089/2924] lib/crc: remove unnecessary prompt for CONFIG_CRC16 All modules that need CONFIG_CRC16 already select it, so there is no need to bother users about the option. Reviewed-by: Christoph Hellwig Reviewed-by: "Martin K. Petersen" Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250401221600.24878-4-ebiggers@kernel.org Signed-off-by: Eric Biggers --- arch/hexagon/configs/comet_defconfig | 1 - arch/m68k/configs/amcore_defconfig | 1 - arch/mips/configs/omega2p_defconfig | 1 - arch/mips/configs/rb532_defconfig | 1 - arch/mips/configs/sb1250_swarm_defconfig | 1 - arch/mips/configs/vocore2_defconfig | 1 - arch/powerpc/configs/skiroot_defconfig | 1 - arch/sh/configs/hp6xx_defconfig | 1 - arch/sh/configs/magicpanelr2_defconfig | 1 - arch/sh/configs/se7206_defconfig | 1 - arch/sh/configs/sh2007_defconfig | 1 - arch/sh/configs/titan_defconfig | 1 - arch/sparc/configs/sparc64_defconfig | 1 - lib/Kconfig | 7 +------ 14 files changed, 1 insertion(+), 19 deletions(-) diff --git a/arch/hexagon/configs/comet_defconfig b/arch/hexagon/configs/comet_defconfig index fed4a64b36fb39..902cf30cf54b16 100644 --- a/arch/hexagon/configs/comet_defconfig +++ b/arch/hexagon/configs/comet_defconfig @@ -72,7 +72,6 @@ CONFIG_INET=y CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set -CONFIG_CRC16=y CONFIG_CRC_T10DIF=y CONFIG_FRAME_WARN=0 CONFIG_MAGIC_SYSRQ=y diff --git a/arch/m68k/configs/amcore_defconfig b/arch/m68k/configs/amcore_defconfig index 67a0d157122daa..110279a64aa447 100644 --- a/arch/m68k/configs/amcore_defconfig +++ b/arch/m68k/configs/amcore_defconfig @@ -89,4 +89,3 @@ CONFIG_PANIC_ON_OOPS=y # CONFIG_CRYPTO_ECHAINIV is not set CONFIG_CRYPTO_ANSI_CPRNG=y # CONFIG_CRYPTO_HW is not set -CONFIG_CRC16=y diff --git a/arch/mips/configs/omega2p_defconfig b/arch/mips/configs/omega2p_defconfig index 128f9abab7fcc7..e2bcdfd290a1d8 100644 --- a/arch/mips/configs/omega2p_defconfig +++ b/arch/mips/configs/omega2p_defconfig @@ -111,7 +111,6 @@ CONFIG_NLS_KOI8_U=y CONFIG_NLS_UTF8=y CONFIG_CRYPTO_DEFLATE=y CONFIG_CRYPTO_LZO=y -CONFIG_CRC16=y CONFIG_XZ_DEC=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y diff --git a/arch/mips/configs/rb532_defconfig b/arch/mips/configs/rb532_defconfig index 0261969a6e45e2..42b161d587c778 100644 --- a/arch/mips/configs/rb532_defconfig +++ b/arch/mips/configs/rb532_defconfig @@ -155,5 +155,4 @@ CONFIG_JFFS2_COMPRESSION_OPTIONS=y CONFIG_SQUASHFS=y CONFIG_CRYPTO_TEST=m # CONFIG_CRYPTO_HW is not set -CONFIG_CRC16=m CONFIG_STRIP_ASM_SYMS=y diff --git a/arch/mips/configs/sb1250_swarm_defconfig b/arch/mips/configs/sb1250_swarm_defconfig index ce855b644bb03d..ae2afff00e01a0 100644 --- a/arch/mips/configs/sb1250_swarm_defconfig +++ b/arch/mips/configs/sb1250_swarm_defconfig @@ -99,4 +99,3 @@ CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_DEFLATE=m CONFIG_CRYPTO_LZO=m # CONFIG_CRYPTO_HW is not set -CONFIG_CRC16=m diff --git a/arch/mips/configs/vocore2_defconfig b/arch/mips/configs/vocore2_defconfig index 917967fed45fe6..2a9a9b12847d3a 100644 --- a/arch/mips/configs/vocore2_defconfig +++ b/arch/mips/configs/vocore2_defconfig @@ -111,7 +111,6 @@ CONFIG_NLS_KOI8_U=y CONFIG_NLS_UTF8=y CONFIG_CRYPTO_DEFLATE=y CONFIG_CRYPTO_LZO=y -CONFIG_CRC16=y CONFIG_XZ_DEC=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index 1eb446452fc040..6c0517961eeeac 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -279,7 +279,6 @@ CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY=y # CONFIG_INTEGRITY is not set CONFIG_LSM="yama,loadpin,safesetid,integrity" # CONFIG_CRYPTO_HW is not set -CONFIG_CRC16=y CONFIG_CRC_ITU_T=y # CONFIG_XZ_DEC_X86 is not set # CONFIG_XZ_DEC_IA64 is not set diff --git a/arch/sh/configs/hp6xx_defconfig b/arch/sh/configs/hp6xx_defconfig index 77e3185f63e47a..3b7525510f7c2d 100644 --- a/arch/sh/configs/hp6xx_defconfig +++ b/arch/sh/configs/hp6xx_defconfig @@ -56,5 +56,4 @@ CONFIG_CRYPTO_PCBC=y CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set -CONFIG_CRC16=y CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/magicpanelr2_defconfig b/arch/sh/configs/magicpanelr2_defconfig index a1109762c8ec65..93b9aa32dc7c74 100644 --- a/arch/sh/configs/magicpanelr2_defconfig +++ b/arch/sh/configs/magicpanelr2_defconfig @@ -86,4 +86,3 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_KOBJECT=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_FRAME_POINTER=y -CONFIG_CRC16=m diff --git a/arch/sh/configs/se7206_defconfig b/arch/sh/configs/se7206_defconfig index bdd29e817ff717..d881fccbe6f0d4 100644 --- a/arch/sh/configs/se7206_defconfig +++ b/arch/sh/configs/se7206_defconfig @@ -101,5 +101,4 @@ CONFIG_CRYPTO_DEFLATE=y CONFIG_CRYPTO_LZO=y # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set -CONFIG_CRC16=y CONFIG_CRC_ITU_T=y diff --git a/arch/sh/configs/sh2007_defconfig b/arch/sh/configs/sh2007_defconfig index 61bc391d443c35..cc6292b3235af9 100644 --- a/arch/sh/configs/sh2007_defconfig +++ b/arch/sh/configs/sh2007_defconfig @@ -193,4 +193,3 @@ CONFIG_CRYPTO_DEFLATE=y CONFIG_CRYPTO_LZO=y # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set -CONFIG_CRC16=y diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig index 11ff5fd510de10..f3fad19b3059a0 100644 --- a/arch/sh/configs/titan_defconfig +++ b/arch/sh/configs/titan_defconfig @@ -265,4 +265,3 @@ CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC16=m diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig index 01b2bdfbf9a892..f1ba0fefe1f94a 100644 --- a/arch/sparc/configs/sparc64_defconfig +++ b/arch/sparc/configs/sparc64_defconfig @@ -229,7 +229,6 @@ CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC16=m CONFIG_VCC=m CONFIG_PATA_CMD64X=y CONFIG_IP_PNP=y diff --git a/lib/Kconfig b/lib/Kconfig index 81d8ff429dca01..372e07c5e31d94 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -142,12 +142,7 @@ config CRC_CCITT tristate config CRC16 - tristate "CRC16 functions" - help - This option is provided for the case where no in-kernel-tree - modules require CRC16 functions, but a module built outside - the kernel tree does. Such modules that use library CRC16 - functions require M here. + tristate config CRC_T10DIF tristate "CRC calculation for the T10 Data Integrity Field" From a6d0dbba950880e269d433222ca6d516ebe8a6ae Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 1 Apr 2025 15:15:57 -0700 Subject: [PATCH 0090/2924] lib/crc: remove unnecessary prompt for CONFIG_CRC_T10DIF All modules that need CONFIG_CRC_T10DIF already select it, so there is no need to bother users about the option. Reviewed-by: Christoph Hellwig Reviewed-by: "Martin K. Petersen" Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250401221600.24878-5-ebiggers@kernel.org Signed-off-by: Eric Biggers --- arch/arm/configs/davinci_all_defconfig | 1 - arch/arm/configs/imx_v6_v7_defconfig | 1 - arch/arm/configs/omap2plus_defconfig | 1 - arch/arm/configs/orion5x_defconfig | 1 - arch/arm/configs/pxa_defconfig | 1 - arch/hexagon/configs/comet_defconfig | 1 - arch/mips/configs/bigsur_defconfig | 1 - arch/mips/configs/ip22_defconfig | 1 - arch/mips/configs/ip27_defconfig | 1 - arch/mips/configs/ip30_defconfig | 1 - arch/mips/configs/ip32_defconfig | 1 - arch/parisc/configs/generic-32bit_defconfig | 1 - arch/powerpc/configs/44x/sam440ep_defconfig | 1 - arch/powerpc/configs/44x/warp_defconfig | 1 - arch/powerpc/configs/83xx/mpc832x_rdb_defconfig | 1 - arch/powerpc/configs/83xx/mpc834x_itx_defconfig | 1 - arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig | 1 - arch/powerpc/configs/83xx/mpc837x_rdb_defconfig | 1 - arch/powerpc/configs/85xx/ge_imp3a_defconfig | 1 - arch/powerpc/configs/85xx/stx_gp3_defconfig | 1 - arch/powerpc/configs/85xx/xes_mpc85xx_defconfig | 1 - arch/powerpc/configs/86xx-hw.config | 1 - arch/powerpc/configs/amigaone_defconfig | 1 - arch/powerpc/configs/chrp32_defconfig | 1 - arch/powerpc/configs/fsl-emb-nonhw.config | 1 - arch/powerpc/configs/g5_defconfig | 1 - arch/powerpc/configs/linkstation_defconfig | 1 - arch/powerpc/configs/mpc83xx_defconfig | 1 - arch/powerpc/configs/mvme5100_defconfig | 1 - arch/powerpc/configs/pmac32_defconfig | 1 - arch/powerpc/configs/ppc44x_defconfig | 1 - arch/powerpc/configs/ppc64e_defconfig | 1 - arch/powerpc/configs/ps3_defconfig | 1 - arch/powerpc/configs/storcenter_defconfig | 1 - arch/sh/configs/ap325rxa_defconfig | 1 - arch/sh/configs/ecovec24_defconfig | 1 - arch/sh/configs/espt_defconfig | 1 - arch/sh/configs/hp6xx_defconfig | 1 - arch/sh/configs/landisk_defconfig | 1 - arch/sh/configs/lboxre2_defconfig | 1 - arch/sh/configs/migor_defconfig | 1 - arch/sh/configs/r7780mp_defconfig | 1 - arch/sh/configs/r7785rp_defconfig | 1 - arch/sh/configs/rts7751r2d1_defconfig | 1 - arch/sh/configs/rts7751r2dplus_defconfig | 1 - arch/sh/configs/sdk7780_defconfig | 1 - arch/sh/configs/se7724_defconfig | 1 - arch/sh/configs/sh7763rdp_defconfig | 1 - lib/Kconfig | 6 +----- tools/testing/selftests/bpf/config.x86_64 | 1 - tools/testing/selftests/hid/config.common | 1 - 51 files changed, 1 insertion(+), 55 deletions(-) diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig index 3474e475373ae8..70b8c78386f405 100644 --- a/arch/arm/configs/davinci_all_defconfig +++ b/arch/arm/configs/davinci_all_defconfig @@ -249,7 +249,6 @@ CONFIG_NLS_ASCII=m CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=m # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_T10DIF=m CONFIG_DMA_CMA=y CONFIG_DEBUG_FS=y CONFIG_DEBUG_RT_MUTEXES=y diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 9c2f8a3b30cb22..062c1eb8dd6012 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -481,7 +481,6 @@ CONFIG_SECURITYFS=y CONFIG_CRYPTO_DEV_FSL_CAAM=y CONFIG_CRYPTO_DEV_SAHARA=y CONFIG_CRYPTO_DEV_MXS_DCP=y -CONFIG_CRC_T10DIF=y CONFIG_CMA_SIZE_MBYTES=64 CONFIG_FONTS=y CONFIG_FONT_8x8=y diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index c5238581a8f6d2..d8a35c44b64f74 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -706,7 +706,6 @@ CONFIG_CRYPTO_DEV_OMAP=m CONFIG_CRYPTO_DEV_OMAP_SHAM=m CONFIG_CRYPTO_DEV_OMAP_AES=m CONFIG_CRYPTO_DEV_OMAP_DES=m -CONFIG_CRC_T10DIF=y CONFIG_CRC_ITU_T=y CONFIG_DMA_CMA=y CONFIG_FONTS=y diff --git a/arch/arm/configs/orion5x_defconfig b/arch/arm/configs/orion5x_defconfig index 0629b088a584a5..62b9c61027898f 100644 --- a/arch/arm/configs/orion5x_defconfig +++ b/arch/arm/configs/orion5x_defconfig @@ -136,7 +136,6 @@ CONFIG_CRYPTO_CBC=m CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_DEV_MARVELL_CESA=y -CONFIG_CRC_T10DIF=y # CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index c21ea1d72f7cc5..24fca8608554cd 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -663,7 +663,6 @@ CONFIG_CRYPTO_SHA1_ARM=m CONFIG_CRYPTO_SHA256_ARM=m CONFIG_CRYPTO_SHA512_ARM=m CONFIG_CRYPTO_AES_ARM=m -CONFIG_CRC_T10DIF=m CONFIG_FONTS=y CONFIG_FONT_8x8=y CONFIG_FONT_8x16=y diff --git a/arch/hexagon/configs/comet_defconfig b/arch/hexagon/configs/comet_defconfig index 902cf30cf54b16..c6108f00028876 100644 --- a/arch/hexagon/configs/comet_defconfig +++ b/arch/hexagon/configs/comet_defconfig @@ -72,7 +72,6 @@ CONFIG_INET=y CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_T10DIF=y CONFIG_FRAME_WARN=0 CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig index fe282630b51cb9..8f7c36868204a3 100644 --- a/arch/mips/configs/bigsur_defconfig +++ b/arch/mips/configs/bigsur_defconfig @@ -238,7 +238,6 @@ CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_LZO=m -CONFIG_CRC_T10DIF=m CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig index 31ca93d3acc59d..f1a8ccf2c45928 100644 --- a/arch/mips/configs/ip22_defconfig +++ b/arch/mips/configs/ip22_defconfig @@ -326,5 +326,4 @@ CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_LZO=m # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_T10DIF=m CONFIG_DEBUG_MEMORY_INIT=y diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig index b8907b3d7a331f..5d079941fd207e 100644 --- a/arch/mips/configs/ip27_defconfig +++ b/arch/mips/configs/ip27_defconfig @@ -317,4 +317,3 @@ CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_LZO=m -CONFIG_CRC_T10DIF=m diff --git a/arch/mips/configs/ip30_defconfig b/arch/mips/configs/ip30_defconfig index 270181a7320a4e..a4524e78546947 100644 --- a/arch/mips/configs/ip30_defconfig +++ b/arch/mips/configs/ip30_defconfig @@ -179,4 +179,3 @@ CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_LZO=m -CONFIG_CRC_T10DIF=m diff --git a/arch/mips/configs/ip32_defconfig b/arch/mips/configs/ip32_defconfig index 121e7e48fa7713..d8ac11427f69b0 100644 --- a/arch/mips/configs/ip32_defconfig +++ b/arch/mips/configs/ip32_defconfig @@ -177,7 +177,6 @@ CONFIG_CRYPTO_SERPENT=y CONFIG_CRYPTO_TEA=y CONFIG_CRYPTO_TWOFISH=y CONFIG_CRYPTO_DEFLATE=y -CONFIG_CRC_T10DIF=y CONFIG_FONTS=y CONFIG_FONT_8x8=y CONFIG_FONT_8x16=y diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig index 68c45d39e5b1f0..5b65c98596131a 100644 --- a/arch/parisc/configs/generic-32bit_defconfig +++ b/arch/parisc/configs/generic-32bit_defconfig @@ -264,7 +264,6 @@ CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA1=y CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_DEFLATE=y -CONFIG_CRC_T10DIF=y CONFIG_FONTS=y CONFIG_PRINTK_TIME=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/powerpc/configs/44x/sam440ep_defconfig b/arch/powerpc/configs/44x/sam440ep_defconfig index 2479ab62d12fa6..98221bda380d0b 100644 --- a/arch/powerpc/configs/44x/sam440ep_defconfig +++ b/arch/powerpc/configs/44x/sam440ep_defconfig @@ -91,5 +91,4 @@ CONFIG_AFFS_FS=m # CONFIG_NETWORK_FILESYSTEMS is not set CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -CONFIG_CRC_T10DIF=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/powerpc/configs/44x/warp_defconfig b/arch/powerpc/configs/44x/warp_defconfig index 633dba199d6274..5757625469c4ce 100644 --- a/arch/powerpc/configs/44x/warp_defconfig +++ b/arch/powerpc/configs/44x/warp_defconfig @@ -85,7 +85,6 @@ CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_ISO8859_15=y CONFIG_NLS_UTF8=y -CONFIG_CRC_T10DIF=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_FS=y diff --git a/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig index 1715ff5474427a..b99caba8724a72 100644 --- a/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig @@ -73,6 +73,5 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_932=y CONFIG_NLS_ISO8859_8=y CONFIG_NLS_ISO8859_1=y -CONFIG_CRC_T10DIF=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m diff --git a/arch/powerpc/configs/83xx/mpc834x_itx_defconfig b/arch/powerpc/configs/83xx/mpc834x_itx_defconfig index e65c0057147f7d..11163052fdbacf 100644 --- a/arch/powerpc/configs/83xx/mpc834x_itx_defconfig +++ b/arch/powerpc/configs/83xx/mpc834x_itx_defconfig @@ -80,5 +80,4 @@ CONFIG_TMPFS=y CONFIG_NFS_FS=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_CRC_T10DIF=y CONFIG_CRYPTO_PCBC=m diff --git a/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig b/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig index 17714bf0ed40b9..312d39e4242c3f 100644 --- a/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig +++ b/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig @@ -72,5 +72,4 @@ CONFIG_TMPFS=y CONFIG_NFS_FS=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_CRC_T10DIF=y CONFIG_CRYPTO_PCBC=m diff --git a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig index 58fae5131fa7fc..ac27f99faab86e 100644 --- a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig @@ -75,6 +75,5 @@ CONFIG_TMPFS=y CONFIG_NFS_FS=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_CRC_T10DIF=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m diff --git a/arch/powerpc/configs/85xx/ge_imp3a_defconfig b/arch/powerpc/configs/85xx/ge_imp3a_defconfig index 3cf48f1f729fc3..7beb36a41d4589 100644 --- a/arch/powerpc/configs/85xx/ge_imp3a_defconfig +++ b/arch/powerpc/configs/85xx/ge_imp3a_defconfig @@ -221,7 +221,6 @@ CONFIG_NLS_ISO8859_15=y CONFIG_NLS_KOI8_R=m CONFIG_NLS_KOI8_U=m CONFIG_NLS_UTF8=y -CONFIG_CRC_T10DIF=y CONFIG_MAGIC_SYSRQ=y CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_MD5=y diff --git a/arch/powerpc/configs/85xx/stx_gp3_defconfig b/arch/powerpc/configs/85xx/stx_gp3_defconfig index ef7c511a6930fd..0a42072fa23c75 100644 --- a/arch/powerpc/configs/85xx/stx_gp3_defconfig +++ b/arch/powerpc/configs/85xx/stx_gp3_defconfig @@ -60,7 +60,6 @@ CONFIG_CRAMFS=m CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_NLS=y -CONFIG_CRC_T10DIF=m CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_BDI_SWITCH=y diff --git a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig index 3a6381aa9fdc69..488d03ae6d6c28 100644 --- a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig +++ b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig @@ -132,7 +132,6 @@ CONFIG_ROOT_NFS=y CONFIG_NFSD=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -CONFIG_CRC_T10DIF=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_CRYPTO_HMAC=y diff --git a/arch/powerpc/configs/86xx-hw.config b/arch/powerpc/configs/86xx-hw.config index 0cb24b33c88e9d..e7bd265fae5a4a 100644 --- a/arch/powerpc/configs/86xx-hw.config +++ b/arch/powerpc/configs/86xx-hw.config @@ -5,7 +5,6 @@ CONFIG_BROADCOM_PHY=y # CONFIG_CARDBUS is not set CONFIG_CHR_DEV_SG=y CONFIG_CHR_DEV_ST=y -CONFIG_CRC_T10DIF=y CONFIG_CRYPTO_HMAC=y CONFIG_DS1682=y CONFIG_EEPROM_LEGACY=y diff --git a/arch/powerpc/configs/amigaone_defconfig b/arch/powerpc/configs/amigaone_defconfig index 200bb1ecb56044..69ef3dc31c4b65 100644 --- a/arch/powerpc/configs/amigaone_defconfig +++ b/arch/powerpc/configs/amigaone_defconfig @@ -106,7 +106,6 @@ CONFIG_TMPFS=y CONFIG_AFFS_FS=m CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=m -CONFIG_CRC_T10DIF=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MUTEXES=y diff --git a/arch/powerpc/configs/chrp32_defconfig b/arch/powerpc/configs/chrp32_defconfig index fb314f75ad4b5e..b799c95480ae6d 100644 --- a/arch/powerpc/configs/chrp32_defconfig +++ b/arch/powerpc/configs/chrp32_defconfig @@ -110,7 +110,6 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=m -CONFIG_CRC_T10DIF=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MUTEXES=y diff --git a/arch/powerpc/configs/fsl-emb-nonhw.config b/arch/powerpc/configs/fsl-emb-nonhw.config index d6d2a458847bbf..2f81bc2d819e15 100644 --- a/arch/powerpc/configs/fsl-emb-nonhw.config +++ b/arch/powerpc/configs/fsl-emb-nonhw.config @@ -15,7 +15,6 @@ CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_SCHED=y CONFIG_CGROUPS=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set -CONFIG_CRC_T10DIF=y CONFIG_CPUSETS=y CONFIG_CRAMFS=y CONFIG_CRYPTO_MD4=y diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index 9215bed532919d..7e58f3e6c9870c 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -231,7 +231,6 @@ CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_ISO8859_15=y CONFIG_NLS_UTF8=y -CONFIG_CRC_T10DIF=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MUTEXES=y diff --git a/arch/powerpc/configs/linkstation_defconfig b/arch/powerpc/configs/linkstation_defconfig index 8be99d4d5289a9..b564f9e33a0dfa 100644 --- a/arch/powerpc/configs/linkstation_defconfig +++ b/arch/powerpc/configs/linkstation_defconfig @@ -125,7 +125,6 @@ CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_932=m CONFIG_NLS_ISO8859_1=m CONFIG_NLS_UTF8=m -CONFIG_CRC_T10DIF=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/powerpc/configs/mpc83xx_defconfig b/arch/powerpc/configs/mpc83xx_defconfig index 83c4710017e949..a815d9e5e3e8cd 100644 --- a/arch/powerpc/configs/mpc83xx_defconfig +++ b/arch/powerpc/configs/mpc83xx_defconfig @@ -97,7 +97,6 @@ CONFIG_TMPFS=y CONFIG_NFS_FS=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_CRC_T10DIF=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_SHA512=y diff --git a/arch/powerpc/configs/mvme5100_defconfig b/arch/powerpc/configs/mvme5100_defconfig index dd7b0f8e50aee3..fa2b3b9c594528 100644 --- a/arch/powerpc/configs/mvme5100_defconfig +++ b/arch/powerpc/configs/mvme5100_defconfig @@ -107,7 +107,6 @@ CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_932=m CONFIG_NLS_ISO8859_1=m CONFIG_NLS_UTF8=m -CONFIG_CRC_T10DIF=y CONFIG_XZ_DEC=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index e8b3f67bf3f56b..1bc3466bc909ed 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -276,7 +276,6 @@ CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_ISO8859_1=m -CONFIG_CRC_T10DIF=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/powerpc/configs/ppc44x_defconfig b/arch/powerpc/configs/ppc44x_defconfig index 8b595f67068c21..41c930f74ed41e 100644 --- a/arch/powerpc/configs/ppc44x_defconfig +++ b/arch/powerpc/configs/ppc44x_defconfig @@ -90,7 +90,6 @@ CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_ISO8859_1=m -CONFIG_CRC_T10DIF=m CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y CONFIG_CRYPTO_ECB=y diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig index 4c05f4e4d50560..d2e659a2d8cb97 100644 --- a/arch/powerpc/configs/ppc64e_defconfig +++ b/arch/powerpc/configs/ppc64e_defconfig @@ -207,7 +207,6 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y -CONFIG_CRC_T10DIF=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_STACK_USAGE=y diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 62376b8c13e947..0b48d2b776c443 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -148,7 +148,6 @@ CONFIG_NLS_ISO8859_1=y CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_LZO=m -CONFIG_CRC_T10DIF=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/powerpc/configs/storcenter_defconfig b/arch/powerpc/configs/storcenter_defconfig index 7a978d39699119..e415222bd83980 100644 --- a/arch/powerpc/configs/storcenter_defconfig +++ b/arch/powerpc/configs/storcenter_defconfig @@ -75,4 +75,3 @@ CONFIG_NLS_DEFAULT="utf8" CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y -CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/ap325rxa_defconfig b/arch/sh/configs/ap325rxa_defconfig index 4464a2ad42ed0f..b6f36c938f1d5c 100644 --- a/arch/sh/configs/ap325rxa_defconfig +++ b/arch/sh/configs/ap325rxa_defconfig @@ -99,4 +99,3 @@ CONFIG_NLS_ISO8859_1=y CONFIG_CRYPTO=y CONFIG_CRYPTO_CBC=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/ecovec24_defconfig b/arch/sh/configs/ecovec24_defconfig index ee1b36682155dd..e76694aace2572 100644 --- a/arch/sh/configs/ecovec24_defconfig +++ b/arch/sh/configs/ecovec24_defconfig @@ -128,4 +128,3 @@ CONFIG_DEBUG_FS=y CONFIG_CRYPTO=y CONFIG_CRYPTO_CBC=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/espt_defconfig b/arch/sh/configs/espt_defconfig index 67716a44463eb9..da176f100e004b 100644 --- a/arch/sh/configs/espt_defconfig +++ b/arch/sh/configs/espt_defconfig @@ -110,4 +110,3 @@ CONFIG_NLS_UTF8=y # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_FS=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/hp6xx_defconfig b/arch/sh/configs/hp6xx_defconfig index 3b7525510f7c2d..3582af15ad8609 100644 --- a/arch/sh/configs/hp6xx_defconfig +++ b/arch/sh/configs/hp6xx_defconfig @@ -56,4 +56,3 @@ CONFIG_CRYPTO_PCBC=y CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig index d871623955c59b..924bb3233b0bc4 100644 --- a/arch/sh/configs/landisk_defconfig +++ b/arch/sh/configs/landisk_defconfig @@ -111,4 +111,3 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_932=y CONFIG_SH_STANDARD_BIOS=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/lboxre2_defconfig b/arch/sh/configs/lboxre2_defconfig index 6a234761bfd777..0307bb2be79f35 100644 --- a/arch/sh/configs/lboxre2_defconfig +++ b/arch/sh/configs/lboxre2_defconfig @@ -58,4 +58,3 @@ CONFIG_ROMFS_FS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_SH_STANDARD_BIOS=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/migor_defconfig b/arch/sh/configs/migor_defconfig index 2d1e65cad2398a..fc2010c241fbb0 100644 --- a/arch/sh/configs/migor_defconfig +++ b/arch/sh/configs/migor_defconfig @@ -90,4 +90,3 @@ CONFIG_DEBUG_FS=y CONFIG_CRYPTO_MANAGER=y # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/r7780mp_defconfig b/arch/sh/configs/r7780mp_defconfig index 6bd6c0ae85d7c9..f28b8c4181c24a 100644 --- a/arch/sh/configs/r7780mp_defconfig +++ b/arch/sh/configs/r7780mp_defconfig @@ -105,4 +105,3 @@ CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/r7785rp_defconfig b/arch/sh/configs/r7785rp_defconfig index cde668569cc1dd..3a4239f20ff13b 100644 --- a/arch/sh/configs/r7785rp_defconfig +++ b/arch/sh/configs/r7785rp_defconfig @@ -103,4 +103,3 @@ CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/rts7751r2d1_defconfig b/arch/sh/configs/rts7751r2d1_defconfig index c863a11c759207..69568cc4039608 100644 --- a/arch/sh/configs/rts7751r2d1_defconfig +++ b/arch/sh/configs/rts7751r2d1_defconfig @@ -87,4 +87,3 @@ CONFIG_MINIX_FS=y CONFIG_NLS_CODEPAGE_932=y CONFIG_DEBUG_FS=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/rts7751r2dplus_defconfig b/arch/sh/configs/rts7751r2dplus_defconfig index 7e4f710d46c737..ecb4bdb5bb58fa 100644 --- a/arch/sh/configs/rts7751r2dplus_defconfig +++ b/arch/sh/configs/rts7751r2dplus_defconfig @@ -92,4 +92,3 @@ CONFIG_MINIX_FS=y CONFIG_NLS_CODEPAGE_932=y CONFIG_DEBUG_FS=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/sdk7780_defconfig b/arch/sh/configs/sdk7780_defconfig index cd24cf08210e14..9870d16d971103 100644 --- a/arch/sh/configs/sdk7780_defconfig +++ b/arch/sh/configs/sdk7780_defconfig @@ -136,4 +136,3 @@ CONFIG_SH_STANDARD_BIOS=y CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_DES=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/se7724_defconfig b/arch/sh/configs/se7724_defconfig index 96521271758c9d..9501e69eb88644 100644 --- a/arch/sh/configs/se7724_defconfig +++ b/arch/sh/configs/se7724_defconfig @@ -128,4 +128,3 @@ CONFIG_NLS_ISO8859_1=y CONFIG_CRYPTO=y CONFIG_CRYPTO_CBC=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/sh7763rdp_defconfig b/arch/sh/configs/sh7763rdp_defconfig index 57923c3296cc66..b77b3313157e20 100644 --- a/arch/sh/configs/sh7763rdp_defconfig +++ b/arch/sh/configs/sh7763rdp_defconfig @@ -112,4 +112,3 @@ CONFIG_NLS_UTF8=y # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_FS=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/lib/Kconfig b/lib/Kconfig index 372e07c5e31d94..4301bfd08feb76 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -145,11 +145,7 @@ config CRC16 tristate config CRC_T10DIF - tristate "CRC calculation for the T10 Data Integrity Field" - help - This option is only needed if a module that's not in the - kernel tree needs to calculate CRC checks for use with the - SCSI data integrity subsystem. + tristate config ARCH_HAS_CRC_T10DIF bool diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index 5680befae8c6d1..5e713ef7caa307 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -39,7 +39,6 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_STAT=y CONFIG_CPU_IDLE_GOV_LADDER=y CONFIG_CPUSETS=y -CONFIG_CRC_T10DIF=y CONFIG_CRYPTO_BLAKE2B=y CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_XXHASH=y diff --git a/tools/testing/selftests/hid/config.common b/tools/testing/selftests/hid/config.common index 45b5570441ce81..b1f40857307da6 100644 --- a/tools/testing/selftests/hid/config.common +++ b/tools/testing/selftests/hid/config.common @@ -39,7 +39,6 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_STAT=y CONFIG_CPU_IDLE_GOV_LADDER=y CONFIG_CPUSETS=y -CONFIG_CRC_T10DIF=y CONFIG_CRYPTO_BLAKE2B=y CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_CRYPTO_SEQIV=y From a0d55dd740db75acca2afbd84a2f54f644dbc268 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 1 Apr 2025 15:15:58 -0700 Subject: [PATCH 0091/2924] lib/crc: remove unnecessary prompt for CONFIG_CRC_ITU_T All modules that need CONFIG_CRC_ITU_T already select it, so there is no need to bother users about the option. Reviewed-by: Christoph Hellwig Reviewed-by: "Martin K. Petersen" Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250401221600.24878-6-ebiggers@kernel.org Signed-off-by: Eric Biggers --- arch/arm/configs/lpc18xx_defconfig | 1 - arch/arm/configs/milbeaut_m10v_defconfig | 1 - arch/arm/configs/mxs_defconfig | 1 - arch/arm/configs/omap2plus_defconfig | 1 - arch/arm/configs/sama7_defconfig | 1 - arch/arm/configs/stm32_defconfig | 1 - arch/arm/configs/wpcm450_defconfig | 1 - arch/mips/configs/ath79_defconfig | 1 - arch/mips/configs/rt305x_defconfig | 1 - arch/mips/configs/xway_defconfig | 1 - arch/powerpc/configs/skiroot_defconfig | 1 - arch/sh/configs/se7206_defconfig | 1 - lib/Kconfig | 7 +------ 13 files changed, 1 insertion(+), 18 deletions(-) diff --git a/arch/arm/configs/lpc18xx_defconfig b/arch/arm/configs/lpc18xx_defconfig index 2aa2ac8c6507d8..2d489186e945f3 100644 --- a/arch/arm/configs/lpc18xx_defconfig +++ b/arch/arm/configs/lpc18xx_defconfig @@ -147,7 +147,6 @@ CONFIG_EXT2_FS=y # CONFIG_INOTIFY_USER is not set CONFIG_JFFS2_FS=y # CONFIG_NETWORK_FILESYSTEMS is not set -CONFIG_CRC_ITU_T=y CONFIG_PRINTK_TIME=y # CONFIG_ENABLE_MUST_CHECK is not set # CONFIG_DEBUG_BUGVERBOSE is not set diff --git a/arch/arm/configs/milbeaut_m10v_defconfig b/arch/arm/configs/milbeaut_m10v_defconfig index bb35cc0c469e9c..275ddf7a3a14df 100644 --- a/arch/arm/configs/milbeaut_m10v_defconfig +++ b/arch/arm/configs/milbeaut_m10v_defconfig @@ -108,7 +108,6 @@ CONFIG_CRYPTO_AES_ARM_BS=m CONFIG_CRYPTO_AES_ARM_CE=m CONFIG_CRYPTO_CHACHA20_NEON=m # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_ITU_T=m CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=64 CONFIG_PRINTK_TIME=y diff --git a/arch/arm/configs/mxs_defconfig b/arch/arm/configs/mxs_defconfig index d8a6e43c401ef2..c76d66135abb19 100644 --- a/arch/arm/configs/mxs_defconfig +++ b/arch/arm/configs/mxs_defconfig @@ -160,7 +160,6 @@ CONFIG_NLS_CODEPAGE_850=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_ISO8859_15=y CONFIG_CRYPTO_DEV_MXS_DCP=y -CONFIG_CRC_ITU_T=m CONFIG_FONTS=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index d8a35c44b64f74..75b326bc7830ce 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -706,7 +706,6 @@ CONFIG_CRYPTO_DEV_OMAP=m CONFIG_CRYPTO_DEV_OMAP_SHAM=m CONFIG_CRYPTO_DEV_OMAP_AES=m CONFIG_CRYPTO_DEV_OMAP_DES=m -CONFIG_CRC_ITU_T=y CONFIG_DMA_CMA=y CONFIG_FONTS=y CONFIG_FONT_8x8=y diff --git a/arch/arm/configs/sama7_defconfig b/arch/arm/configs/sama7_defconfig index d850f92a73eafb..e14720a9a5ac47 100644 --- a/arch/arm/configs/sama7_defconfig +++ b/arch/arm/configs/sama7_defconfig @@ -227,7 +227,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_DEV_ATMEL_AES=y CONFIG_CRYPTO_DEV_ATMEL_TDES=y CONFIG_CRYPTO_DEV_ATMEL_SHA=y -CONFIG_CRC_ITU_T=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=32 CONFIG_CMA_ALIGNMENT=9 diff --git a/arch/arm/configs/stm32_defconfig b/arch/arm/configs/stm32_defconfig index 423bb41c4225ff..dcd9c316072ead 100644 --- a/arch/arm/configs/stm32_defconfig +++ b/arch/arm/configs/stm32_defconfig @@ -74,7 +74,6 @@ CONFIG_EXT3_FS=y # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set CONFIG_NLS=y -CONFIG_CRC_ITU_T=y CONFIG_PRINTK_TIME=y # CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y diff --git a/arch/arm/configs/wpcm450_defconfig b/arch/arm/configs/wpcm450_defconfig index 60e38f5b7dae06..cd4b3e70ff688b 100644 --- a/arch/arm/configs/wpcm450_defconfig +++ b/arch/arm/configs/wpcm450_defconfig @@ -191,7 +191,6 @@ CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y CONFIG_X509_CERTIFICATE_PARSER=y CONFIG_PKCS7_MESSAGE_PARSER=y CONFIG_SYSTEM_TRUSTED_KEYRING=y -CONFIG_CRC_ITU_T=m CONFIG_PRINTK_TIME=y CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/mips/configs/ath79_defconfig b/arch/mips/configs/ath79_defconfig index 8caa03a41327d9..cba0b85c6707cb 100644 --- a/arch/mips/configs/ath79_defconfig +++ b/arch/mips/configs/ath79_defconfig @@ -82,7 +82,6 @@ CONFIG_LEDS_GPIO=y # CONFIG_IOMMU_SUPPORT is not set # CONFIG_DNOTIFY is not set # CONFIG_PROC_PAGE_MONITOR is not set -CONFIG_CRC_ITU_T=m CONFIG_STRIP_ASM_SYMS=y CONFIG_DEBUG_FS=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/mips/configs/rt305x_defconfig b/arch/mips/configs/rt305x_defconfig index 8404e0a9d8b22f..8f9701efef1984 100644 --- a/arch/mips/configs/rt305x_defconfig +++ b/arch/mips/configs/rt305x_defconfig @@ -128,7 +128,6 @@ CONFIG_SQUASHFS=y # CONFIG_SQUASHFS_ZLIB is not set CONFIG_SQUASHFS_XZ=y CONFIG_CRYPTO_ARC4=m -CONFIG_CRC_ITU_T=m # CONFIG_XZ_DEC_X86 is not set # CONFIG_XZ_DEC_POWERPC is not set # CONFIG_XZ_DEC_IA64 is not set diff --git a/arch/mips/configs/xway_defconfig b/arch/mips/configs/xway_defconfig index 7b91edfe3e0753..aae8497b687285 100644 --- a/arch/mips/configs/xway_defconfig +++ b/arch/mips/configs/xway_defconfig @@ -140,7 +140,6 @@ CONFIG_SQUASHFS=y # CONFIG_SQUASHFS_ZLIB is not set CONFIG_SQUASHFS_XZ=y CONFIG_CRYPTO_ARC4=m -CONFIG_CRC_ITU_T=m CONFIG_PRINTK_TIME=y CONFIG_STRIP_ASM_SYMS=y CONFIG_DEBUG_FS=y diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index 6c0517961eeeac..6f436cb7d0c1ec 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -279,7 +279,6 @@ CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY=y # CONFIG_INTEGRITY is not set CONFIG_LSM="yama,loadpin,safesetid,integrity" # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_ITU_T=y # CONFIG_XZ_DEC_X86 is not set # CONFIG_XZ_DEC_IA64 is not set # CONFIG_XZ_DEC_ARM is not set diff --git a/arch/sh/configs/se7206_defconfig b/arch/sh/configs/se7206_defconfig index d881fccbe6f0d4..64f9308ee5865d 100644 --- a/arch/sh/configs/se7206_defconfig +++ b/arch/sh/configs/se7206_defconfig @@ -101,4 +101,3 @@ CONFIG_CRYPTO_DEFLATE=y CONFIG_CRYPTO_LZO=y # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_ITU_T=y diff --git a/lib/Kconfig b/lib/Kconfig index 4301bfd08feb76..89470bb245193b 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -155,12 +155,7 @@ config CRC_T10DIF_ARCH default CRC_T10DIF if ARCH_HAS_CRC_T10DIF && CRC_OPTIMIZATIONS config CRC_ITU_T - tristate "CRC ITU-T V.41 functions" - help - This option is provided for the case where no in-kernel-tree - modules require CRC ITU-T V.41 functions, but a module built outside - the kernel tree does. Such modules that use library CRC ITU-T V.41 - functions require M here. + tristate config CRC32 tristate From 31ab49a99f0572da6a62f121878e2155b04904e5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 1 Apr 2025 15:15:59 -0700 Subject: [PATCH 0092/2924] lib/crc: document all the CRC library kconfig options Previous commits removed all the original CRC kconfig help text, since it was oriented towards people configuring the kernel, and the options are no longer user-selectable. However, it's still useful for there to be help text for kernel developers. Add this. Reviewed-by: Christoph Hellwig Reviewed-by: "Martin K. Petersen" Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250401221600.24878-7-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/Kconfig | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/lib/Kconfig b/lib/Kconfig index 89470bb245193b..4e796eaea2f4f6 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -140,12 +140,21 @@ source "lib/crypto/Kconfig" config CRC_CCITT tristate + help + The CRC-CCITT library functions. Select this if your module uses any + of the functions from . config CRC16 tristate + help + The CRC16 library functions. Select this if your module uses any of + the functions from . config CRC_T10DIF tristate + help + The CRC-T10DIF library functions. Select this if your module uses + any of the functions from . config ARCH_HAS_CRC_T10DIF bool @@ -156,10 +165,16 @@ config CRC_T10DIF_ARCH config CRC_ITU_T tristate + help + The CRC-ITU-T library functions. Select this if your module uses + any of the functions from . config CRC32 tristate select BITREVERSE + help + The CRC32 library functions. Select this if your module uses any of + the functions from or . config ARCH_HAS_CRC32 bool @@ -170,6 +185,9 @@ config CRC32_ARCH config CRC64 tristate + help + The CRC64 library functions. Select this if your module uses any of + the functions from . config ARCH_HAS_CRC64 bool @@ -180,9 +198,15 @@ config CRC64_ARCH config CRC4 tristate + help + The CRC4 library functions. Select this if your module uses any of + the functions from . config CRC7 tristate + help + The CRC7 library functions. Select this if your module uses any of + the functions from . config LIBCRC32C tristate @@ -193,6 +217,9 @@ config LIBCRC32C config CRC8 tristate + help + The CRC8 library functions. Select this if your module uses any of + the functions from . config CRC_OPTIMIZATIONS bool "Enable optimized CRC implementations" if EXPERT From b261d2222063a9a8b9ec284244c285f2998ee01e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 1 Apr 2025 15:16:00 -0700 Subject: [PATCH 0093/2924] lib/crc: remove CONFIG_LIBCRC32C Now that LIBCRC32C does nothing besides select CRC32, make every option that selects LIBCRC32C instead select CRC32 directly. Then remove LIBCRC32C. Reviewed-by: Christoph Hellwig Reviewed-by: "Martin K. Petersen" Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250401221600.24878-8-ebiggers@kernel.org Signed-off-by: Eric Biggers --- drivers/block/Kconfig | 2 +- drivers/block/drbd/Kconfig | 2 +- drivers/md/Kconfig | 2 +- drivers/md/persistent-data/Kconfig | 2 +- drivers/net/ethernet/broadcom/Kconfig | 4 ++-- drivers/net/ethernet/cavium/Kconfig | 2 +- fs/bcachefs/Kconfig | 2 +- fs/btrfs/Kconfig | 2 +- fs/ceph/Kconfig | 2 +- fs/erofs/Kconfig | 2 +- fs/gfs2/Kconfig | 1 - fs/xfs/Kconfig | 2 +- lib/Kconfig | 7 ------- net/batman-adv/Kconfig | 2 +- net/ceph/Kconfig | 2 +- net/netfilter/Kconfig | 4 ++-- net/netfilter/ipvs/Kconfig | 2 +- net/openvswitch/Kconfig | 2 +- net/sched/Kconfig | 2 +- net/sctp/Kconfig | 2 +- 20 files changed, 20 insertions(+), 28 deletions(-) diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index a97f2c40c640dd..2551ebf88dda14 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -367,7 +367,7 @@ config BLK_DEV_RBD tristate "Rados block device (RBD)" depends on INET && BLOCK select CEPH_LIB - select LIBCRC32C + select CRC32 select CRYPTO_AES select CRYPTO help diff --git a/drivers/block/drbd/Kconfig b/drivers/block/drbd/Kconfig index 6fb4e38fca88c2..495a72da04c6c1 100644 --- a/drivers/block/drbd/Kconfig +++ b/drivers/block/drbd/Kconfig @@ -10,7 +10,7 @@ config BLK_DEV_DRBD tristate "DRBD Distributed Replicated Block Device support" depends on PROC_FS && INET select LRU_CACHE - select LIBCRC32C + select CRC32 help NOTE: In order to authenticate connections you have to select diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 0b1870a09e1fdc..2c26a02391cded 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -139,7 +139,7 @@ config MD_RAID456 tristate "RAID-4/RAID-5/RAID-6 mode" depends on BLK_DEV_MD select RAID6_PQ - select LIBCRC32C + select CRC32 select ASYNC_MEMCPY select ASYNC_XOR select ASYNC_PQ diff --git a/drivers/md/persistent-data/Kconfig b/drivers/md/persistent-data/Kconfig index f4f948b0e17310..dbb97a7233ab95 100644 --- a/drivers/md/persistent-data/Kconfig +++ b/drivers/md/persistent-data/Kconfig @@ -2,7 +2,7 @@ config DM_PERSISTENT_DATA tristate depends on BLK_DEV_DM - select LIBCRC32C + select CRC32 select DM_BUFIO help Library providing immutable on-disk data structure support for diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index eeec8bf17cf458..1bd4313215d715 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -143,7 +143,7 @@ config BNX2X depends on PTP_1588_CLOCK_OPTIONAL select FW_LOADER select ZLIB_INFLATE - select LIBCRC32C + select CRC32 select MDIO help This driver supports Broadcom NetXtremeII 10 gigabit Ethernet cards. @@ -207,7 +207,7 @@ config BNXT depends on PCI depends on PTP_1588_CLOCK_OPTIONAL select FW_LOADER - select LIBCRC32C + select CRC32 select NET_DEVLINK select PAGE_POOL select DIMLIB diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig index ca742cc146d798..7dae5aad3689d6 100644 --- a/drivers/net/ethernet/cavium/Kconfig +++ b/drivers/net/ethernet/cavium/Kconfig @@ -70,8 +70,8 @@ config LIQUIDIO depends on 64BIT && PCI depends on PCI depends on PTP_1588_CLOCK_OPTIONAL + select CRC32 select FW_LOADER - select LIBCRC32C select LIQUIDIO_CORE select NET_DEVLINK help diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig index c9798750202d38..ea668dedb2608a 100644 --- a/fs/bcachefs/Kconfig +++ b/fs/bcachefs/Kconfig @@ -4,7 +4,7 @@ config BCACHEFS_FS depends on BLOCK select EXPORTFS select CLOSURES - select LIBCRC32C + select CRC32 select CRC64 select FS_POSIX_ACL select LZ4_COMPRESS diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index fa8515598341ec..73a2dfb854c55e 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -3,9 +3,9 @@ config BTRFS_FS tristate "Btrfs filesystem support" select BLK_CGROUP_PUNT_BIO + select CRC32 select CRYPTO select CRYPTO_CRC32C - select LIBCRC32C select CRYPTO_XXHASH select CRYPTO_SHA256 select CRYPTO_BLAKE2B diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig index 7249d70e1a43fa..3e7def3d31c16f 100644 --- a/fs/ceph/Kconfig +++ b/fs/ceph/Kconfig @@ -3,7 +3,7 @@ config CEPH_FS tristate "Ceph distributed file system" depends on INET select CEPH_LIB - select LIBCRC32C + select CRC32 select CRYPTO_AES select CRYPTO select NETFS_SUPPORT diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig index 331e49cd1b8d9d..8f68ec49ad897b 100644 --- a/fs/erofs/Kconfig +++ b/fs/erofs/Kconfig @@ -3,8 +3,8 @@ config EROFS_FS tristate "EROFS filesystem support" depends on BLOCK + select CRC32 select FS_IOMAP - select LIBCRC32C help EROFS (Enhanced Read-Only File System) is a lightweight read-only file system with modern designs (e.g. no buffer heads, inline diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index be7f87a8e11ae1..7bd231d16d4a07 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig @@ -4,7 +4,6 @@ config GFS2_FS select BUFFER_HEAD select FS_POSIX_ACL select CRC32 - select LIBCRC32C select QUOTACTL select FS_IOMAP help diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index fffd6fffdce0f0..ae0ca68584963a 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -3,7 +3,7 @@ config XFS_FS tristate "XFS filesystem support" depends on BLOCK select EXPORTFS - select LIBCRC32C + select CRC32 select FS_IOMAP help XFS is a high performance journaling filesystem which originated diff --git a/lib/Kconfig b/lib/Kconfig index 4e796eaea2f4f6..6c1b8f1842678c 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -208,13 +208,6 @@ config CRC7 The CRC7 library functions. Select this if your module uses any of the functions from . -config LIBCRC32C - tristate - select CRC32 - help - This option just selects CRC32 and is provided for compatibility - purposes until the users are updated to select CRC32 directly. - config CRC8 tristate help diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index 860a0786bc1e40..20b316207f9aa6 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -9,7 +9,7 @@ config BATMAN_ADV tristate "B.A.T.M.A.N. Advanced Meshing Protocol" - select LIBCRC32C + select CRC32 help B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is a routing protocol for multi-hop ad-hoc mesh networks. The diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig index c5c4eef3a9ff13..0aa21fcbf6ece5 100644 --- a/net/ceph/Kconfig +++ b/net/ceph/Kconfig @@ -2,7 +2,7 @@ config CEPH_LIB tristate "Ceph core library" depends on INET - select LIBCRC32C + select CRC32 select CRYPTO_AES select CRYPTO_CBC select CRYPTO_GCM diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index df2dc21304efbe..047ba81865edff 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -212,7 +212,7 @@ config NF_CT_PROTO_SCTP bool 'SCTP protocol connection tracking support' depends on NETFILTER_ADVANCED default y - select LIBCRC32C + select CRC32 help With this option enabled, the layer 3 independent connection tracking code will be able to do state tracking on SCTP connections. @@ -475,7 +475,7 @@ endif # NF_CONNTRACK config NF_TABLES select NETFILTER_NETLINK - select LIBCRC32C + select CRC32 tristate "Netfilter nf_tables support" help nftables is the new packet classification framework that intends to diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 2a3017b9c001b3..8c5b1fe12d0782 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -105,7 +105,7 @@ config IP_VS_PROTO_AH config IP_VS_PROTO_SCTP bool "SCTP load balancing support" - select LIBCRC32C + select CRC32 help This option enables support for load balancing SCTP transport protocol. Say Y if unsure. diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 2535f3f9f4623b..5481bd561eb414 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -11,7 +11,7 @@ config OPENVSWITCH (!NF_NAT || NF_NAT) && \ (!NETFILTER_CONNCOUNT || NETFILTER_CONNCOUNT))) depends on PSAMPLE || !PSAMPLE - select LIBCRC32C + select CRC32 select MPLS select NET_MPLS_GSO select DST_CACHE diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 8180d0c12fceaf..a800127effcd73 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -784,7 +784,7 @@ config NET_ACT_SKBEDIT config NET_ACT_CSUM tristate "Checksum Updating" depends on NET_CLS_ACT && INET - select LIBCRC32C + select CRC32 help Say Y here to update some common checksum after some direct packet alterations. diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index 5da599ff84a90f..d18a72df3654eb 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -7,10 +7,10 @@ menuconfig IP_SCTP tristate "The SCTP Protocol" depends on INET depends on IPV6 || IPV6=n + select CRC32 select CRYPTO select CRYPTO_HMAC select CRYPTO_SHA1 - select LIBCRC32C select NET_UDP_TUNNEL help Stream Control Transmission Protocol From 9bae8f4f21689b96a4b4fc505740dd97b9142c41 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 3 Apr 2025 15:08:41 -0700 Subject: [PATCH 0094/2924] selftests/bpf: Make res_spin_lock test less verbose Currently, the res_spin_lock test is too chatty as it constantly prints the test_run results for each iteration in each thread, so in case verbose output is requested or things go wrong, it will flood the logs of CI and other systems with repeated messages that offer no valuable insight. Reduce this by doing assertions when the condition actually flips, and proceed to break out and exit the threads. We still assert to mark the test as failed and print the expected and reported values. Suggested-by: Alexei Starovoitov Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20250403220841.66654-1-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/res_spin_lock.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c index 115287ba441bfd..0703e987df8997 100644 --- a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c @@ -25,8 +25,11 @@ static void *spin_lock_thread(void *arg) while (!READ_ONCE(skip)) { err = bpf_prog_test_run_opts(prog_fd, &topts); - ASSERT_OK(err, "test_run"); - ASSERT_OK(topts.retval, "test_run retval"); + if (err || topts.retval) { + ASSERT_OK(err, "test_run"); + ASSERT_OK(topts.retval, "test_run retval"); + break; + } } pthread_exit(arg); } From d05af90d6218e9c8f1c2026990c3f53c1b41bfb0 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 25 Mar 2025 09:57:46 +0800 Subject: [PATCH 0095/2924] md/raid10: fix missing discard IO accounting md_account_bio() is not called from raid10_handle_discard(), now that we handle bitmap inside md_account_bio(), also fix missing bitmap_startwrite for discard. Test whole disk discard for 20G raid10: Before: Device d/s dMB/s drqm/s %drqm d_await dareq-sz md0 48.00 16.00 0.00 0.00 5.42 341.33 After: Device d/s dMB/s drqm/s %drqm d_await dareq-sz md0 68.00 20462.00 0.00 0.00 2.65 308133.65 Link: https://lore.kernel.org/linux-raid/20250325015746.3195035-1-yukuai1@huaweicloud.com Fixes: 528bc2cf2fcc ("md/raid10: enable io accounting") Signed-off-by: Yu Kuai Acked-by: Coly Li --- drivers/md/raid10.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 846c5f29486e3f..ba32bac975b8d6 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1735,6 +1735,7 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio) * The discard bio returns only first r10bio finishes */ if (first_copy) { + md_account_bio(mddev, &bio); r10_bio->master_bio = bio; set_bit(R10BIO_Discard, &r10_bio->state); first_copy = false; From 6ec1f0239485028445d213d91cfee5242f3211ba Mon Sep 17 00:00:00 2001 From: Zheng Qixing Date: Thu, 3 Apr 2025 09:53:22 +0800 Subject: [PATCH 0096/2924] md/md-bitmap: fix stats collection for external bitmaps The bitmap_get_stats() function incorrectly returns -ENOENT for external bitmaps. Remove the external bitmap check as the statistics should be available regardless of bitmap storage location. Return -EINVAL only for invalid bitmap with no storage (neither in superblock nor in external file). Note: "bitmap_info.external" here refers to a bitmap stored in a separate file (bitmap_file), not to external metadata. Fixes: 8d28d0ddb986 ("md/md-bitmap: Synchronize bitmap_get_stats() with bitmap lifetime") Signed-off-by: Zheng Qixing Link: https://lore.kernel.org/linux-raid/20250403015322.2873369-1-zhengqixing@huaweicloud.com Signed-off-by: Yu Kuai --- drivers/md/md-bitmap.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index 44ec9b17cfd336..37b08f26c62f5d 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -2357,9 +2357,8 @@ static int bitmap_get_stats(void *data, struct md_bitmap_stats *stats) if (!bitmap) return -ENOENT; - if (bitmap->mddev->bitmap_info.external) - return -ENOENT; - if (!bitmap->storage.sb_page) /* no superblock */ + if (!bitmap->mddev->bitmap_info.external && + !bitmap->storage.sb_page) return -EINVAL; sb = kmap_local_page(bitmap->storage.sb_page); stats->sync_size = le64_to_cpu(sb->sync_size); From d8d78398e550039295e0237eafb703e2d21f7d57 Mon Sep 17 00:00:00 2001 From: Raghavendra Rao Ananta Date: Sat, 5 Apr 2025 00:10:41 +0000 Subject: [PATCH 0097/2924] KVM: arm64: selftests: Introduce and use hardware-definition macros The kvm selftest library for arm64 currently configures the hardware fields, such as shift and mask in the page-table entries and registers, directly with numbers. While it add comments at places, it's better to rewrite them with appropriate macros to improve the readability and reduce the risk of errors. Hence, introduce macros to define the hardware fields and use them in the arm64 processor library. Most of the definitions are primary copied from the Linux's header, arch/arm64/include/asm/pgtable-hwdef.h. No functional change intended. Suggested-by: Oliver Upton Signed-off-by: Raghavendra Rao Ananta Link: https://lore.kernel.org/r/20250405001042.1470552-2-rananta@google.com Signed-off-by: Oliver Upton --- .../selftests/kvm/arm64/page_fault_test.c | 2 +- .../selftests/kvm/include/arm64/processor.h | 66 +++++++++++++++++-- .../selftests/kvm/lib/arm64/processor.c | 57 ++++++++-------- 3 files changed, 92 insertions(+), 33 deletions(-) diff --git a/tools/testing/selftests/kvm/arm64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c index ec33a8f9c908c8..dc6559dad9d863 100644 --- a/tools/testing/selftests/kvm/arm64/page_fault_test.c +++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c @@ -199,7 +199,7 @@ static bool guest_set_ha(void) if (hadbs == 0) return false; - tcr = read_sysreg(tcr_el1) | TCR_EL1_HA; + tcr = read_sysreg(tcr_el1) | TCR_HA; write_sysreg(tcr, tcr_el1); isb(); diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h index 1e8d0d531fbd39..7d88ff22013ab4 100644 --- a/tools/testing/selftests/kvm/include/arm64/processor.h +++ b/tools/testing/selftests/kvm/include/arm64/processor.h @@ -62,6 +62,66 @@ MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \ MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT)) +/* TCR_EL1 specific flags */ +#define TCR_T0SZ_OFFSET 0 +#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_T0SZ_OFFSET) + +#define TCR_IRGN0_SHIFT 8 +#define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_NC (UL(0) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WBWA (UL(1) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WT (UL(2) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WBnWA (UL(3) << TCR_IRGN0_SHIFT) + +#define TCR_ORGN0_SHIFT 10 +#define TCR_ORGN0_MASK (UL(3) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_NC (UL(0) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WBWA (UL(1) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WT (UL(2) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WBnWA (UL(3) << TCR_ORGN0_SHIFT) + +#define TCR_SH0_SHIFT 12 +#define TCR_SH0_MASK (UL(3) << TCR_SH0_SHIFT) +#define TCR_SH0_INNER (UL(3) << TCR_SH0_SHIFT) + +#define TCR_TG0_SHIFT 14 +#define TCR_TG0_MASK (UL(3) << TCR_TG0_SHIFT) +#define TCR_TG0_4K (UL(0) << TCR_TG0_SHIFT) +#define TCR_TG0_64K (UL(1) << TCR_TG0_SHIFT) +#define TCR_TG0_16K (UL(2) << TCR_TG0_SHIFT) + +#define TCR_IPS_SHIFT 32 +#define TCR_IPS_MASK (UL(7) << TCR_IPS_SHIFT) +#define TCR_IPS_52_BITS (UL(6) << TCR_IPS_SHIFT) +#define TCR_IPS_48_BITS (UL(5) << TCR_IPS_SHIFT) +#define TCR_IPS_40_BITS (UL(2) << TCR_IPS_SHIFT) +#define TCR_IPS_36_BITS (UL(1) << TCR_IPS_SHIFT) + +#define TCR_HA (UL(1) << 39) +#define TCR_DS (UL(1) << 59) + +/* + * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers). + */ +#define PTE_ATTRINDX(t) ((t) << 2) +#define PTE_ATTRINDX_MASK GENMASK(4, 2) +#define PTE_ATTRINDX_SHIFT 2 + +#define PTE_VALID BIT(0) +#define PGD_TYPE_TABLE BIT(1) +#define PUD_TYPE_TABLE BIT(1) +#define PMD_TYPE_TABLE BIT(1) +#define PTE_TYPE_PAGE BIT(1) + +#define PTE_AF BIT(10) + +#define PTE_ADDR_MASK(page_shift) GENMASK(47, (page_shift)) +#define PTE_ADDR_51_48 GENMASK(15, 12) +#define PTE_ADDR_51_48_SHIFT 12 +#define PTE_ADDR_MASK_LPA2(page_shift) GENMASK(49, (page_shift)) +#define PTE_ADDR_51_50_LPA2 GENMASK(9, 8) +#define PTE_ADDR_51_50_LPA2_SHIFT 8 + void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init); struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, struct kvm_vcpu_init *init, void *guest_code); @@ -102,12 +162,6 @@ enum { (v) == VECTOR_SYNC_LOWER_64 || \ (v) == VECTOR_SYNC_LOWER_32) -/* Access flag */ -#define PTE_AF (1ULL << 10) - -/* Access flag update enable/disable */ -#define TCR_EL1_HA (1ULL << 39) - void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, uint32_t *ipa16k, uint32_t *ipa64k); diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 7ba3aa3755f35f..da5802c8a59c1f 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -72,13 +72,13 @@ static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs) uint64_t pte; if (use_lpa2_pte_format(vm)) { - pte = pa & GENMASK(49, vm->page_shift); - pte |= FIELD_GET(GENMASK(51, 50), pa) << 8; - attrs &= ~GENMASK(9, 8); + pte = pa & PTE_ADDR_MASK_LPA2(vm->page_shift); + pte |= FIELD_GET(GENMASK(51, 50), pa) << PTE_ADDR_51_50_LPA2_SHIFT; + attrs &= ~PTE_ADDR_51_50_LPA2; } else { - pte = pa & GENMASK(47, vm->page_shift); + pte = pa & PTE_ADDR_MASK(vm->page_shift); if (vm->page_shift == 16) - pte |= FIELD_GET(GENMASK(51, 48), pa) << 12; + pte |= FIELD_GET(GENMASK(51, 48), pa) << PTE_ADDR_51_48_SHIFT; } pte |= attrs; @@ -90,12 +90,12 @@ static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte) uint64_t pa; if (use_lpa2_pte_format(vm)) { - pa = pte & GENMASK(49, vm->page_shift); - pa |= FIELD_GET(GENMASK(9, 8), pte) << 50; + pa = pte & PTE_ADDR_MASK_LPA2(vm->page_shift); + pa |= FIELD_GET(PTE_ADDR_51_50_LPA2, pte) << 50; } else { - pa = pte & GENMASK(47, vm->page_shift); + pa = pte & PTE_ADDR_MASK(vm->page_shift); if (vm->page_shift == 16) - pa |= FIELD_GET(GENMASK(15, 12), pte) << 48; + pa |= FIELD_GET(PTE_ADDR_51_48, pte) << 48; } return pa; @@ -128,7 +128,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, uint64_t flags) { - uint8_t attr_idx = flags & 7; + uint8_t attr_idx = flags & (PTE_ATTRINDX_MASK >> PTE_ATTRINDX_SHIFT); + uint64_t pg_attr; uint64_t *ptep; TEST_ASSERT((vaddr % vm->page_size) == 0, @@ -147,18 +148,21 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PGD_TYPE_TABLE | PTE_VALID); switch (vm->pgtable_levels) { case 4: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PUD_TYPE_TABLE | PTE_VALID); /* fall through */ case 3: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PMD_TYPE_TABLE | PTE_VALID); /* fall through */ case 2: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8; @@ -167,7 +171,8 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, TEST_FAIL("Page table levels must be 2, 3, or 4"); } - *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3); /* AF */ + pg_attr = PTE_AF | PTE_ATTRINDX(attr_idx) | PTE_TYPE_PAGE | PTE_VALID; + *ptep = addr_pte(vm, paddr, pg_attr); } void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) @@ -293,20 +298,20 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) case VM_MODE_P48V48_64K: case VM_MODE_P40V48_64K: case VM_MODE_P36V48_64K: - tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ + tcr_el1 |= TCR_TG0_64K; break; case VM_MODE_P52V48_16K: case VM_MODE_P48V48_16K: case VM_MODE_P40V48_16K: case VM_MODE_P36V48_16K: case VM_MODE_P36V47_16K: - tcr_el1 |= 2ul << 14; /* TG0 = 16KB */ + tcr_el1 |= TCR_TG0_16K; break; case VM_MODE_P52V48_4K: case VM_MODE_P48V48_4K: case VM_MODE_P40V48_4K: case VM_MODE_P36V48_4K: - tcr_el1 |= 0ul << 14; /* TG0 = 4KB */ + tcr_el1 |= TCR_TG0_4K; break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); @@ -319,35 +324,35 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) case VM_MODE_P52V48_4K: case VM_MODE_P52V48_16K: case VM_MODE_P52V48_64K: - tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ + tcr_el1 |= TCR_IPS_52_BITS; ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2; break; case VM_MODE_P48V48_4K: case VM_MODE_P48V48_16K: case VM_MODE_P48V48_64K: - tcr_el1 |= 5ul << 32; /* IPS = 48 bits */ + tcr_el1 |= TCR_IPS_48_BITS; break; case VM_MODE_P40V48_4K: case VM_MODE_P40V48_16K: case VM_MODE_P40V48_64K: - tcr_el1 |= 2ul << 32; /* IPS = 40 bits */ + tcr_el1 |= TCR_IPS_40_BITS; break; case VM_MODE_P36V48_4K: case VM_MODE_P36V48_16K: case VM_MODE_P36V48_64K: case VM_MODE_P36V47_16K: - tcr_el1 |= 1ul << 32; /* IPS = 36 bits */ + tcr_el1 |= TCR_IPS_36_BITS; break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); } - sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */; - /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */; - tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12); - tcr_el1 |= (64 - vm->va_bits) /* T0SZ */; + sctlr_el1 |= SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_I; + + tcr_el1 |= TCR_IRGN0_WBWA | TCR_ORGN0_WBWA | TCR_SH0_INNER; + tcr_el1 |= TCR_T0SZ(vm->va_bits); if (use_lpa2_pte_format(vm)) - tcr_el1 |= (1ul << 59) /* DS */; + tcr_el1 |= TCR_DS; vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1); vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1); From c8631ea59b6523035ffb607634eef7bacc8947fe Mon Sep 17 00:00:00 2001 From: Raghavendra Rao Ananta Date: Sat, 5 Apr 2025 00:10:42 +0000 Subject: [PATCH 0098/2924] KVM: arm64: selftests: Explicitly set the page attrs to Inner-Shareable Atomic instructions such as 'ldset' in the guest have been observed to cause an EL1 data abort with FSC 0x35 (IMPLEMENTATION DEFINED fault (Unsupported Exclusive or Atomic access)) on Neoverse-N3. Per DDI0487L.a B2.2.6, atomic instructions are only architecturally guaranteed for Inner/Outer Shareable Normal Write-Back memory. For anything else the behavior is IMPLEMENTATION DEFINED and can lose atomicity, or, in this case, generate an abort. It would appear that selftests sets up the stage-1 mappings as Non Shareable, leading to the observed abort. Explicitly set the Shareability field to Inner Shareable for non-LPA2 page tables. Note that for the LPA2 page table format, translations for cacheable memory inherit the shareability attribute of the PTW, i.e. TCR_ELx.SH{0,1}. Suggested-by: Oliver Upton Signed-off-by: Raghavendra Rao Ananta Link: https://lore.kernel.org/r/20250405001042.1470552-3-rananta@google.com [oliver: Rephrase changelog] Signed-off-by: Oliver Upton --- tools/testing/selftests/kvm/include/arm64/processor.h | 1 + tools/testing/selftests/kvm/lib/arm64/processor.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h index 7d88ff22013ab4..b0fc0f945766fe 100644 --- a/tools/testing/selftests/kvm/include/arm64/processor.h +++ b/tools/testing/selftests/kvm/include/arm64/processor.h @@ -113,6 +113,7 @@ #define PMD_TYPE_TABLE BIT(1) #define PTE_TYPE_PAGE BIT(1) +#define PTE_SHARED (UL(3) << 8) /* SH[1:0], inner shareable */ #define PTE_AF BIT(10) #define PTE_ADDR_MASK(page_shift) GENMASK(47, (page_shift)) diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index da5802c8a59c1f..9d69904cb6084a 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -172,6 +172,9 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, } pg_attr = PTE_AF | PTE_ATTRINDX(attr_idx) | PTE_TYPE_PAGE | PTE_VALID; + if (!use_lpa2_pte_format(vm)) + pg_attr |= PTE_SHARED; + *ptep = addr_pte(vm, paddr, pg_attr); } From 0ba3a4ab76fd3367b9cb680cad70182c896c795c Mon Sep 17 00:00:00 2001 From: Gabriel Shahrouzi Date: Sat, 5 Apr 2025 16:30:36 -0400 Subject: [PATCH 0099/2924] perf/core: Fix WARN_ON(!ctx) in __free_event() for partial init Move the get_ctx(child_ctx) call and the child_event->ctx assignment to occur immediately after the child event is allocated. Ensure that child_event->ctx is non-NULL before any subsequent error path within inherit_event calls free_event(), satisfying the assumptions of the cleanup code. Details: There's no clear Fixes tag, because this bug is a side-effect of multiple interacting commits over time (up to 15 years old), not a single regression. The code initially incremented refcount then assigned context immediately after the child_event was created. Later, an early validity check for child_event was added before the refcount/assignment. Even later, a WARN_ON_ONCE() cleanup check was added, assuming event->ctx is valid if the pmu_ctx is valid. The problem is that the WARN_ON_ONCE() could trigger after the initial check passed but before child_event->ctx was assigned, violating its precondition. The solution is to assign child_event->ctx right after its initial validation. This ensures the context exists for any subsequent checks or cleanup routines, resolving the WARN_ON_ONCE(). To resolve it, defer the refcount update and child_event->ctx assignment directly after child_event->pmu_ctx is set but before checking if the parent event is orphaned. The cleanup routine depends on event->pmu_ctx being non-NULL before it verifies event->ctx is non-NULL. This also maintains the author's original intent of passing in child_ctx to find_get_pmu_context before its refcount/assignment. [ mingo: Expanded the changelog from another email by Gabriel Shahrouzi. ] Reported-by: syzbot+ff3aa851d46ab82953a3@syzkaller.appspotmail.com Signed-off-by: Gabriel Shahrouzi Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Kan Liang Cc: Oleg Nesterov Cc: Alexander Shishkin Link: https://lore.kernel.org/r/20250405203036.582721-1-gshahrouzi@gmail.com Closes: https://syzkaller.appspot.com/bug?extid=ff3aa851d46ab82953a3 --- kernel/events/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 128db74e9eab8c..9af9726ef5f5ad 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -14016,6 +14016,9 @@ inherit_event(struct perf_event *parent_event, if (IS_ERR(child_event)) return child_event; + get_ctx(child_ctx); + child_event->ctx = child_ctx; + pmu_ctx = find_get_pmu_context(child_event->pmu, child_ctx, child_event); if (IS_ERR(pmu_ctx)) { free_event(child_event); @@ -14037,8 +14040,6 @@ inherit_event(struct perf_event *parent_event, return NULL; } - get_ctx(child_ctx); - /* * Make the child state follow the state of the parent event, * not its attr.disabled bit. We hold the parent's mutex, @@ -14059,7 +14060,6 @@ inherit_event(struct perf_event *parent_event, local64_set(&hwc->period_left, sample_period); } - child_event->ctx = child_ctx; child_event->overflow_handler = parent_event->overflow_handler; child_event->overflow_handler_context = parent_event->overflow_handler_context; From d7bff1415e85b889dc8908be6aedba8807ae5e37 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 3 Apr 2025 17:02:08 +0100 Subject: [PATCH 0100/2924] ASoC: codecs:lpass-wsa-macro: Fix vi feedback rate Currently the VI feedback rate is set to fixed 8K, fix this by getting the correct rate from params_rate. Without this patch incorrect rate will be set on the VI feedback recording resulting in rate miss match and audio artifacts. Fixes: 2c4066e5d428 ("ASoC: codecs: lpass-wsa-macro: add dapm widgets and route") Cc: stable@vger.kernel.org Signed-off-by: Srinivas Kandagatla Reviewed-by: Dmitry Baryshkov Link: https://patch.msgid.link/20250403160209.21613-2-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/lpass-wsa-macro.c | 39 +++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/lpass-wsa-macro.c b/sound/soc/codecs/lpass-wsa-macro.c index b261fa373e657a..83e9a27ca3c05b 100644 --- a/sound/soc/codecs/lpass-wsa-macro.c +++ b/sound/soc/codecs/lpass-wsa-macro.c @@ -63,6 +63,10 @@ #define CDC_WSA_TX_SPKR_PROT_CLK_DISABLE 0 #define CDC_WSA_TX_SPKR_PROT_PCM_RATE_MASK GENMASK(3, 0) #define CDC_WSA_TX_SPKR_PROT_PCM_RATE_8K 0 +#define CDC_WSA_TX_SPKR_PROT_PCM_RATE_16K 1 +#define CDC_WSA_TX_SPKR_PROT_PCM_RATE_24K 2 +#define CDC_WSA_TX_SPKR_PROT_PCM_RATE_32K 3 +#define CDC_WSA_TX_SPKR_PROT_PCM_RATE_48K 4 #define CDC_WSA_TX0_SPKR_PROT_PATH_CFG0 (0x0248) #define CDC_WSA_TX1_SPKR_PROT_PATH_CTL (0x0264) #define CDC_WSA_TX1_SPKR_PROT_PATH_CFG0 (0x0268) @@ -407,6 +411,7 @@ struct wsa_macro { int ear_spkr_gain; int spkr_gain_offset; int spkr_mode; + u32 pcm_rate_vi; int is_softclip_on[WSA_MACRO_SOFTCLIP_MAX]; int softclip_clk_users[WSA_MACRO_SOFTCLIP_MAX]; struct regmap *regmap; @@ -1280,6 +1285,7 @@ static int wsa_macro_hw_params(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct snd_soc_component *component = dai->component; + struct wsa_macro *wsa = snd_soc_component_get_drvdata(component); int ret; switch (substream->stream) { @@ -1291,6 +1297,11 @@ static int wsa_macro_hw_params(struct snd_pcm_substream *substream, __func__, params_rate(params)); return ret; } + break; + case SNDRV_PCM_STREAM_CAPTURE: + if (dai->id == WSA_MACRO_AIF_VI) + wsa->pcm_rate_vi = params_rate(params); + break; default: break; @@ -1465,6 +1476,28 @@ static int wsa_macro_enable_vi_feedback(struct snd_soc_dapm_widget *w, struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); struct wsa_macro *wsa = snd_soc_component_get_drvdata(component); u32 tx_reg0, tx_reg1; + u32 rate_val; + + switch (wsa->pcm_rate_vi) { + case 8000: + rate_val = CDC_WSA_TX_SPKR_PROT_PCM_RATE_8K; + break; + case 16000: + rate_val = CDC_WSA_TX_SPKR_PROT_PCM_RATE_16K; + break; + case 24000: + rate_val = CDC_WSA_TX_SPKR_PROT_PCM_RATE_24K; + break; + case 32000: + rate_val = CDC_WSA_TX_SPKR_PROT_PCM_RATE_32K; + break; + case 48000: + rate_val = CDC_WSA_TX_SPKR_PROT_PCM_RATE_48K; + break; + default: + rate_val = CDC_WSA_TX_SPKR_PROT_PCM_RATE_8K; + break; + } if (test_bit(WSA_MACRO_TX0, &wsa->active_ch_mask[WSA_MACRO_AIF_VI])) { tx_reg0 = CDC_WSA_TX0_SPKR_PROT_PATH_CTL; @@ -1476,7 +1509,7 @@ static int wsa_macro_enable_vi_feedback(struct snd_soc_dapm_widget *w, switch (event) { case SND_SOC_DAPM_POST_PMU: - /* Enable V&I sensing */ + /* Enable V&I sensing */ snd_soc_component_update_bits(component, tx_reg0, CDC_WSA_TX_SPKR_PROT_RESET_MASK, CDC_WSA_TX_SPKR_PROT_RESET); @@ -1485,10 +1518,10 @@ static int wsa_macro_enable_vi_feedback(struct snd_soc_dapm_widget *w, CDC_WSA_TX_SPKR_PROT_RESET); snd_soc_component_update_bits(component, tx_reg0, CDC_WSA_TX_SPKR_PROT_PCM_RATE_MASK, - CDC_WSA_TX_SPKR_PROT_PCM_RATE_8K); + rate_val); snd_soc_component_update_bits(component, tx_reg1, CDC_WSA_TX_SPKR_PROT_PCM_RATE_MASK, - CDC_WSA_TX_SPKR_PROT_PCM_RATE_8K); + rate_val); snd_soc_component_update_bits(component, tx_reg0, CDC_WSA_TX_SPKR_PROT_CLK_EN_MASK, CDC_WSA_TX_SPKR_PROT_CLK_ENABLE); From 7648beb65600220996ebb2da207610b1ff9b735e Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 3 Apr 2025 17:02:09 +0100 Subject: [PATCH 0101/2924] ASoC: codecs:lpass-wsa-macro: Fix logic of enabling vi channels Existing code only configures one of WSA_MACRO_TX0 or WSA_MACRO_TX1 paths eventhough we enable both of them. Fix this bug by adding proper checks and rearranging some of the common code to able to allow setting both TX0 and TX1 paths Without this patch only one channel gets enabled in VI path instead of 2 channels. End result would be 1 channel recording instead of 2. Fixes: 2c4066e5d428 ("ASoC: codecs: lpass-wsa-macro: add dapm widgets and route") Cc: stable@vger.kernel.org Co-developed-by: Manikantan R Signed-off-by: Manikantan R Signed-off-by: Srinivas Kandagatla Reviewed-by: Dmitry Baryshkov Link: https://patch.msgid.link/20250403160209.21613-3-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/lpass-wsa-macro.c | 108 +++++++++++++++++------------ 1 file changed, 63 insertions(+), 45 deletions(-) diff --git a/sound/soc/codecs/lpass-wsa-macro.c b/sound/soc/codecs/lpass-wsa-macro.c index 83e9a27ca3c05b..c1fb71cfb5d077 100644 --- a/sound/soc/codecs/lpass-wsa-macro.c +++ b/sound/soc/codecs/lpass-wsa-macro.c @@ -1459,6 +1459,67 @@ static void wsa_macro_mclk_enable(struct wsa_macro *wsa, bool mclk_enable) } } +static void wsa_macro_enable_disable_vi_sense(struct snd_soc_component *component, bool enable, + u32 tx_reg0, u32 tx_reg1, u32 val) +{ + if (enable) { + /* Enable V&I sensing */ + snd_soc_component_update_bits(component, tx_reg0, + CDC_WSA_TX_SPKR_PROT_RESET_MASK, + CDC_WSA_TX_SPKR_PROT_RESET); + snd_soc_component_update_bits(component, tx_reg1, + CDC_WSA_TX_SPKR_PROT_RESET_MASK, + CDC_WSA_TX_SPKR_PROT_RESET); + snd_soc_component_update_bits(component, tx_reg0, + CDC_WSA_TX_SPKR_PROT_PCM_RATE_MASK, + val); + snd_soc_component_update_bits(component, tx_reg1, + CDC_WSA_TX_SPKR_PROT_PCM_RATE_MASK, + val); + snd_soc_component_update_bits(component, tx_reg0, + CDC_WSA_TX_SPKR_PROT_CLK_EN_MASK, + CDC_WSA_TX_SPKR_PROT_CLK_ENABLE); + snd_soc_component_update_bits(component, tx_reg1, + CDC_WSA_TX_SPKR_PROT_CLK_EN_MASK, + CDC_WSA_TX_SPKR_PROT_CLK_ENABLE); + snd_soc_component_update_bits(component, tx_reg0, + CDC_WSA_TX_SPKR_PROT_RESET_MASK, + CDC_WSA_TX_SPKR_PROT_NO_RESET); + snd_soc_component_update_bits(component, tx_reg1, + CDC_WSA_TX_SPKR_PROT_RESET_MASK, + CDC_WSA_TX_SPKR_PROT_NO_RESET); + } else { + snd_soc_component_update_bits(component, tx_reg0, + CDC_WSA_TX_SPKR_PROT_RESET_MASK, + CDC_WSA_TX_SPKR_PROT_RESET); + snd_soc_component_update_bits(component, tx_reg1, + CDC_WSA_TX_SPKR_PROT_RESET_MASK, + CDC_WSA_TX_SPKR_PROT_RESET); + snd_soc_component_update_bits(component, tx_reg0, + CDC_WSA_TX_SPKR_PROT_CLK_EN_MASK, + CDC_WSA_TX_SPKR_PROT_CLK_DISABLE); + snd_soc_component_update_bits(component, tx_reg1, + CDC_WSA_TX_SPKR_PROT_CLK_EN_MASK, + CDC_WSA_TX_SPKR_PROT_CLK_DISABLE); + } +} + +static void wsa_macro_enable_disable_vi_feedback(struct snd_soc_component *component, + bool enable, u32 rate) +{ + struct wsa_macro *wsa = snd_soc_component_get_drvdata(component); + + if (test_bit(WSA_MACRO_TX0, &wsa->active_ch_mask[WSA_MACRO_AIF_VI])) + wsa_macro_enable_disable_vi_sense(component, enable, + CDC_WSA_TX0_SPKR_PROT_PATH_CTL, + CDC_WSA_TX1_SPKR_PROT_PATH_CTL, rate); + + if (test_bit(WSA_MACRO_TX1, &wsa->active_ch_mask[WSA_MACRO_AIF_VI])) + wsa_macro_enable_disable_vi_sense(component, enable, + CDC_WSA_TX2_SPKR_PROT_PATH_CTL, + CDC_WSA_TX3_SPKR_PROT_PATH_CTL, rate); +} + static int wsa_macro_mclk_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event) { @@ -1475,7 +1536,6 @@ static int wsa_macro_enable_vi_feedback(struct snd_soc_dapm_widget *w, { struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); struct wsa_macro *wsa = snd_soc_component_get_drvdata(component); - u32 tx_reg0, tx_reg1; u32 rate_val; switch (wsa->pcm_rate_vi) { @@ -1499,56 +1559,14 @@ static int wsa_macro_enable_vi_feedback(struct snd_soc_dapm_widget *w, break; } - if (test_bit(WSA_MACRO_TX0, &wsa->active_ch_mask[WSA_MACRO_AIF_VI])) { - tx_reg0 = CDC_WSA_TX0_SPKR_PROT_PATH_CTL; - tx_reg1 = CDC_WSA_TX1_SPKR_PROT_PATH_CTL; - } else if (test_bit(WSA_MACRO_TX1, &wsa->active_ch_mask[WSA_MACRO_AIF_VI])) { - tx_reg0 = CDC_WSA_TX2_SPKR_PROT_PATH_CTL; - tx_reg1 = CDC_WSA_TX3_SPKR_PROT_PATH_CTL; - } - switch (event) { case SND_SOC_DAPM_POST_PMU: /* Enable V&I sensing */ - snd_soc_component_update_bits(component, tx_reg0, - CDC_WSA_TX_SPKR_PROT_RESET_MASK, - CDC_WSA_TX_SPKR_PROT_RESET); - snd_soc_component_update_bits(component, tx_reg1, - CDC_WSA_TX_SPKR_PROT_RESET_MASK, - CDC_WSA_TX_SPKR_PROT_RESET); - snd_soc_component_update_bits(component, tx_reg0, - CDC_WSA_TX_SPKR_PROT_PCM_RATE_MASK, - rate_val); - snd_soc_component_update_bits(component, tx_reg1, - CDC_WSA_TX_SPKR_PROT_PCM_RATE_MASK, - rate_val); - snd_soc_component_update_bits(component, tx_reg0, - CDC_WSA_TX_SPKR_PROT_CLK_EN_MASK, - CDC_WSA_TX_SPKR_PROT_CLK_ENABLE); - snd_soc_component_update_bits(component, tx_reg1, - CDC_WSA_TX_SPKR_PROT_CLK_EN_MASK, - CDC_WSA_TX_SPKR_PROT_CLK_ENABLE); - snd_soc_component_update_bits(component, tx_reg0, - CDC_WSA_TX_SPKR_PROT_RESET_MASK, - CDC_WSA_TX_SPKR_PROT_NO_RESET); - snd_soc_component_update_bits(component, tx_reg1, - CDC_WSA_TX_SPKR_PROT_RESET_MASK, - CDC_WSA_TX_SPKR_PROT_NO_RESET); + wsa_macro_enable_disable_vi_feedback(component, true, rate_val); break; case SND_SOC_DAPM_POST_PMD: /* Disable V&I sensing */ - snd_soc_component_update_bits(component, tx_reg0, - CDC_WSA_TX_SPKR_PROT_RESET_MASK, - CDC_WSA_TX_SPKR_PROT_RESET); - snd_soc_component_update_bits(component, tx_reg1, - CDC_WSA_TX_SPKR_PROT_RESET_MASK, - CDC_WSA_TX_SPKR_PROT_RESET); - snd_soc_component_update_bits(component, tx_reg0, - CDC_WSA_TX_SPKR_PROT_CLK_EN_MASK, - CDC_WSA_TX_SPKR_PROT_CLK_DISABLE); - snd_soc_component_update_bits(component, tx_reg1, - CDC_WSA_TX_SPKR_PROT_CLK_EN_MASK, - CDC_WSA_TX_SPKR_PROT_CLK_DISABLE); + wsa_macro_enable_disable_vi_feedback(component, false, rate_val); break; } From dfcf3dde45df383f2695c3d3475fec153d2c7dbe Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 4 Apr 2025 16:32:13 +0300 Subject: [PATCH 0102/2924] ASoC: Intel: sof_sdw: Add quirk for Asus Zenbook S16 Asus laptops with sound PCI subsystem ID 1043:1f43 have the DMICs connected to the host instead of the CS42L43 so need the SOC_SDW_CODEC_MIC quirk. Link: https://github.com/thesofproject/sof/issues/9930 Fixes: 084344970808 ("ASoC: Intel: sof_sdw: Add quirk for Asus Zenbook S14") Signed-off-by: Peter Ujfalusi Reviewed-by: Bard Liao Reviewed-by: Simon Trimmer Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250404133213.4658-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_sdw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index 90dafa810b2ec0..095d08b3fc8249 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -764,6 +764,7 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { static const struct snd_pci_quirk sof_sdw_ssid_quirk_table[] = { SND_PCI_QUIRK(0x1043, 0x1e13, "ASUS Zenbook S14", SOC_SDW_CODEC_MIC), + SND_PCI_QUIRK(0x1043, 0x1f43, "ASUS Zenbook S16", SOC_SDW_CODEC_MIC), {} }; From 2b727b3f8a04fe52f55316ccb8792cfd9b2dd05d Mon Sep 17 00:00:00 2001 From: Brady Norander Date: Sun, 30 Mar 2025 09:08:54 -0400 Subject: [PATCH 0103/2924] ASoC: dwc: always enable/disable i2s irqs Commit a42e988 ("ASoC: dwc: add DMA handshake control") changed the behavior of the driver to not enable or disable i2s irqs if using DMA. This breaks platforms such as AMD ACP. Audio playback appears to work but no audio can be heard. Revert to the old behavior by always enabling and disabling i2s irqs while keeping DMA handshake control. Fixes: a42e988b626 ("ASoC: dwc: add DMA handshake control") Signed-off-by: Brady Norander Link: https://patch.msgid.link/20250330130852.37881-3-bradynorander@gmail.com Signed-off-by: Mark Brown --- sound/soc/dwc/dwc-i2s.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/sound/soc/dwc/dwc-i2s.c b/sound/soc/dwc/dwc-i2s.c index 4c4171bb3fbbe4..28001e9857d9dc 100644 --- a/sound/soc/dwc/dwc-i2s.c +++ b/sound/soc/dwc/dwc-i2s.c @@ -199,12 +199,10 @@ static void i2s_start(struct dw_i2s_dev *dev, else i2s_write_reg(dev->i2s_base, IRER, 1); - /* I2S needs to enable IRQ to make a handshake with DMAC on the JH7110 SoC */ - if (dev->use_pio || dev->is_jh7110) - i2s_enable_irqs(dev, substream->stream, config->chan_nr); - else + if (!(dev->use_pio || dev->is_jh7110)) i2s_enable_dma(dev, substream->stream); + i2s_enable_irqs(dev, substream->stream, config->chan_nr); i2s_write_reg(dev->i2s_base, CER, 1); } @@ -218,11 +216,12 @@ static void i2s_stop(struct dw_i2s_dev *dev, else i2s_write_reg(dev->i2s_base, IRER, 0); - if (dev->use_pio || dev->is_jh7110) - i2s_disable_irqs(dev, substream->stream, 8); - else + if (!(dev->use_pio || dev->is_jh7110)) i2s_disable_dma(dev, substream->stream); + i2s_disable_irqs(dev, substream->stream, 8); + + if (!dev->active) { i2s_write_reg(dev->i2s_base, CER, 0); i2s_write_reg(dev->i2s_base, IER, 0); From a31a4934b31faea76e735bab17e63d02fcd8e029 Mon Sep 17 00:00:00 2001 From: Evgeny Pimenov Date: Tue, 1 Apr 2025 23:40:58 +0300 Subject: [PATCH 0104/2924] ASoC: qcom: Fix sc7280 lpass potential buffer overflow Case values introduced in commit 5f78e1fb7a3e ("ASoC: qcom: Add driver support for audioreach solution") cause out of bounds access in arrays of sc7280 driver data (e.g. in case of RX_CODEC_DMA_RX_0 in sc7280_snd_hw_params()). Redefine LPASS_MAX_PORTS to consider the maximum possible port id for q6dsp as sc7280 driver utilizes some of those values. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 77d0ffef793d ("ASoC: qcom: Add macro for lpass DAI id's max limit") Cc: stable@vger.kernel.org # v6.0+ Suggested-by: Mikhail Kobuk Suggested-by: Alexey Khoroshilov Signed-off-by: Evgeny Pimenov Link: https://patch.msgid.link/20250401204058.32261-1-pimenoveu12@gmail.com Signed-off-by: Mark Brown --- sound/soc/qcom/lpass.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/qcom/lpass.h b/sound/soc/qcom/lpass.h index 27a2bf9a661393..de3ec6f594c11c 100644 --- a/sound/soc/qcom/lpass.h +++ b/sound/soc/qcom/lpass.h @@ -13,10 +13,11 @@ #include #include #include +#include #include "lpass-hdmi.h" #define LPASS_AHBIX_CLOCK_FREQUENCY 131072000 -#define LPASS_MAX_PORTS (LPASS_CDC_DMA_VA_TX8 + 1) +#define LPASS_MAX_PORTS (DISPLAY_PORT_RX_7 + 1) #define LPASS_MAX_MI2S_PORTS (8) #define LPASS_MAX_DMA_CHANNELS (8) #define LPASS_MAX_HDMI_DMA_CHANNELS (4) From ef5c23ae9ab380fa756f257411024a9b4518d1b9 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Wed, 19 Mar 2025 11:35:04 +0800 Subject: [PATCH 0105/2924] ASoC: fsl_asrc_dma: get codec or cpu dai from backend With audio graph card, original cpu dai is changed to codec device in backend, so if cpu dai is dummy device in backend, get the codec dai device, which is the real hardware device connected. The specific case is ASRC->SAI->AMIX->CODEC. Signed-off-by: Shengjiu Wang Link: https://patch.msgid.link/20250319033504.2898605-1-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_asrc_dma.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/sound/soc/fsl/fsl_asrc_dma.c b/sound/soc/fsl/fsl_asrc_dma.c index f501f47242fb0b..1bba48318e2ddf 100644 --- a/sound/soc/fsl/fsl_asrc_dma.c +++ b/sound/soc/fsl/fsl_asrc_dma.c @@ -156,11 +156,24 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component, for_each_dpcm_be(rtd, stream, dpcm) { struct snd_soc_pcm_runtime *be = dpcm->be; struct snd_pcm_substream *substream_be; - struct snd_soc_dai *dai = snd_soc_rtd_to_cpu(be, 0); + struct snd_soc_dai *dai_cpu = snd_soc_rtd_to_cpu(be, 0); + struct snd_soc_dai *dai_codec = snd_soc_rtd_to_codec(be, 0); + struct snd_soc_dai *dai; if (dpcm->fe != rtd) continue; + /* + * With audio graph card, original cpu dai is changed to codec + * device in backend, so if cpu dai is dummy device in backend, + * get the codec dai device, which is the real hardware device + * connected. + */ + if (!snd_soc_dai_is_dummy(dai_cpu)) + dai = dai_cpu; + else + dai = dai_codec; + substream_be = snd_soc_dpcm_get_substream(be, stream); dma_params_be = snd_soc_dai_get_dma_data(dai, substream_be); dev_be = dai->dev; From 95f723cf141b95e3b3a5b92cf2ea98a863fe7275 Mon Sep 17 00:00:00 2001 From: Henry Martin Date: Wed, 2 Apr 2025 22:14:11 +0800 Subject: [PATCH 0106/2924] ASoC: Intel: avs: Fix null-ptr-deref in avs_component_probe() devm_kasprintf() returns NULL when memory allocation fails. Currently, avs_component_probe() does not check for this case, which results in a NULL pointer dereference. Fixes: 739c031110da ("ASoC: Intel: avs: Provide support for fallback topology") Reviewed-by: Cezary Rojewski Reviewed-by: Ethan Carter Edwards Signed-off-by: Henry Martin Link: https://patch.msgid.link/20250402141411.44972-1-bsdhenrymartin@gmail.com Signed-off-by: Mark Brown --- sound/soc/intel/avs/pcm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/intel/avs/pcm.c b/sound/soc/intel/avs/pcm.c index dac463390da135..7072bcf4e56f10 100644 --- a/sound/soc/intel/avs/pcm.c +++ b/sound/soc/intel/avs/pcm.c @@ -927,7 +927,8 @@ static int avs_component_probe(struct snd_soc_component *component) else mach->tplg_filename = devm_kasprintf(adev->dev, GFP_KERNEL, "hda-generic-tplg.bin"); - + if (!mach->tplg_filename) + return -ENOMEM; filename = kasprintf(GFP_KERNEL, "%s/%s", component->driver->topology_name_prefix, mach->tplg_filename); if (!filename) From f5cd27ec714628b522af8c588d3907671404385e Mon Sep 17 00:00:00 2001 From: Gabriel Shahrouzi Date: Thu, 3 Apr 2025 21:28:20 -0400 Subject: [PATCH 0107/2924] bcachefs: Fix escape sequence in prt_printf Remove backslash before format specifier. Ensure correct output. Signed-off-by: Gabriel Shahrouzi Signed-off-by: Kent Overstreet --- fs/bcachefs/data_update.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index de02ebf847ec94..b211c97238aba9 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -607,7 +607,7 @@ void bch2_data_update_inflight_to_text(struct printbuf *out, struct data_update prt_newline(out); printbuf_indent_add(out, 2); bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts); - prt_printf(out, "read_done:\t\%u\n", m->read_done); + prt_printf(out, "read_done:\t%u\n", m->read_done); bch2_write_op_to_text(out, &m->op); printbuf_indent_sub(out, 2); } From afc5444e4d862c805798016a37cf301f5f30e1e6 Mon Sep 17 00:00:00 2001 From: Gabriel Shahrouzi Date: Thu, 3 Apr 2025 21:28:21 -0400 Subject: [PATCH 0108/2924] bcachefs: Fix type for parameter in journal_advance_devs_to_next_bucket Replace u64 with __le64 to match the expected parameter type. Ensure consistency both in function calls and within the function itself. Signed-off-by: Gabriel Shahrouzi Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 1b7961f4f609e3..2a54ac79189bf1 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1460,7 +1460,7 @@ int bch2_journal_read(struct bch_fs *c, static void journal_advance_devs_to_next_bucket(struct journal *j, struct dev_alloc_list *devs, - unsigned sectors, u64 seq) + unsigned sectors, __le64 seq) { struct bch_fs *c = container_of(j, struct bch_fs, journal); From 4a22a7332341f49e60b804811fee7edf87909e9e Mon Sep 17 00:00:00 2001 From: Gabriel Shahrouzi Date: Thu, 3 Apr 2025 21:28:22 -0400 Subject: [PATCH 0109/2924] bcachefs: Use cpu_to_le16 for dirent lengths Prevent incorrect byte ordering for big-endian systems. Signed-off-by: Gabriel Shahrouzi Signed-off-by: Kent Overstreet --- fs/bcachefs/dirent.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index bf53a029f3562c..8488a757811514 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -287,8 +287,8 @@ static void dirent_init_casefolded_name(struct bkey_i_dirent *dirent, EBUG_ON(!dirent->v.d_casefold); EBUG_ON(!cf_name->len); - dirent->v.d_cf_name_block.d_name_len = name->len; - dirent->v.d_cf_name_block.d_cf_name_len = cf_name->len; + dirent->v.d_cf_name_block.d_name_len = cpu_to_le16(name->len); + dirent->v.d_cf_name_block.d_cf_name_len = cpu_to_le16(cf_name->len); memcpy(&dirent->v.d_cf_name_block.d_names[0], name->name, name->len); memcpy(&dirent->v.d_cf_name_block.d_names[name->len], cf_name->name, cf_name->len); memset(&dirent->v.d_cf_name_block.d_names[name->len + cf_name->len], 0, From 34b47e3d73a21ef992905746cdb044ce02d3b29a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 5 Apr 2025 12:26:43 -0400 Subject: [PATCH 0110/2924] bcachefs: Fix UAF in bchfs_read() Commit 3ba0240a8789 fixed a bug in the read retry path in __bch2_read(), and changed bchfs_read() to match - to avoid a landmine if bch2_read_extent() ever starts returning transaction restarts. But that was incorrect, because bchfs_read() doesn't use a separate stack allocated bvec_iter, it uses the one in the rbio being submitted. Add a comment explaining the issue, and revert the buggy change. Fixes: 3ba0240a8789 ("bcachefs: Fix silent short reads in data read retry path") Reported-by: syzbot+2deb10b8dc9aae6fab67@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io-buffered.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index 19d4599918dc59..e3a75dcca60c81 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -225,11 +225,26 @@ static void bchfs_read(struct btree_trans *trans, bch2_read_extent(trans, rbio, iter.pos, data_btree, k, offset_into_extent, flags); - swap(rbio->bio.bi_iter.bi_size, bytes); + /* + * Careful there's a landmine here if bch2_read_extent() ever + * starts returning transaction restarts here. + * + * We've changed rbio->bi_iter.bi_size to be "bytes we can read + * from this extent" with the swap call, and we restore it + * below. That restore needs to come before checking for + * errors. + * + * But unlike __bch2_read(), we use the rbio bvec iter, not one + * on the stack, so we can't do the restore right after the + * bch2_read_extent() call: we don't own that iterator anymore + * if BCH_READ_last_fragment is set, since we may have submitted + * that rbio instead of cloning it. + */ if (flags & BCH_READ_last_fragment) break; + swap(rbio->bio.bi_iter.bi_size, bytes); bio_advance(&rbio->bio, bytes); err: if (ret && From 1ec94a9f6dd870a4d9c799d26e440c8f6b6268d2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 6 Apr 2025 13:38:11 -0400 Subject: [PATCH 0111/2924] bcachefs: Fix duplicate "ro,read_only" in opts at startup Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index a58edde43bee3b..16ebea1214af5f 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1002,12 +1002,6 @@ static void print_mount_opts(struct bch_fs *c) prt_str(&p, "starting version "); bch2_version_to_text(&p, c->sb.version); - if (c->opts.read_only) { - prt_str(&p, " opts="); - first = false; - prt_printf(&p, "ro"); - } - for (i = 0; i < bch2_opts_nr; i++) { const struct bch_option *opt = &bch2_opt_table[i]; u64 v = bch2_opt_get_by_id(&c->opts, i); From 4bf4b5046de0ef7f9dc50f3a9ef8a6dcda178a6d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 1 Apr 2025 21:33:33 -0700 Subject: [PATCH 0112/2924] bcachefs: use library APIs for ChaCha20 and Poly1305 Just use the ChaCha20 and Poly1305 libraries instead of the clunky crypto API. This is much simpler. It is also slightly faster, since the libraries provide more direct access to the same architecture-optimized ChaCha20 and Poly1305 code. I've tested that existing encrypted bcachefs filesystems can be continue to be accessed with this patch applied. Reviewed-by: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: Kent Overstreet --- fs/bcachefs/Kconfig | 5 +- fs/bcachefs/bcachefs.h | 4 +- fs/bcachefs/btree_node_scan.c | 2 +- fs/bcachefs/checksum.c | 247 ++++++++-------------------------- fs/bcachefs/checksum.h | 3 +- fs/bcachefs/io_read.c | 3 +- fs/bcachefs/super.c | 3 - 7 files changed, 65 insertions(+), 202 deletions(-) diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig index bf1c94e51dd064..508c0cfdf7c96d 100644 --- a/fs/bcachefs/Kconfig +++ b/fs/bcachefs/Kconfig @@ -15,10 +15,9 @@ config BCACHEFS_FS select ZLIB_INFLATE select ZSTD_COMPRESS select ZSTD_DECOMPRESS - select CRYPTO select CRYPTO_LIB_SHA256 - select CRYPTO_CHACHA20 - select CRYPTO_POLY1305 + select CRYPTO_LIB_CHACHA + select CRYPTO_LIB_POLY1305 select KEYS select RAID6_PQ select XOR_BLOCKS diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 5d9f208a1bb744..5cb0fc384ac020 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -981,8 +981,8 @@ struct bch_fs { mempool_t compress_workspace[BCH_COMPRESSION_OPT_NR]; size_t zstd_workspace_size; - struct crypto_sync_skcipher *chacha20; - struct crypto_shash *poly1305; + struct bch_key chacha20_key; + bool chacha20_key_set; atomic64_t key_version; diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index 8c9fdb7263fea5..f520fbcd045540 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -183,7 +183,7 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, return; if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(&bn->keys))) { - if (!c->chacha20) + if (!c->chacha20_key_set) return; struct nonce nonce = btree_nonce(&bn->keys, 0); diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c index 3726689093e301..d0a34a097b809e 100644 --- a/fs/bcachefs/checksum.c +++ b/fs/bcachefs/checksum.c @@ -7,17 +7,12 @@ #include "super-io.h" #include -#include #include #include #include #include -#include -#include #include -#include #include -#include #include /* @@ -96,116 +91,40 @@ static void bch2_checksum_update(struct bch2_checksum_state *state, const void * } } -static inline int do_encrypt_sg(struct crypto_sync_skcipher *tfm, - struct nonce nonce, - struct scatterlist *sg, size_t len) +static void bch2_chacha20_init(u32 state[CHACHA_STATE_WORDS], + const struct bch_key *key, struct nonce nonce) { - SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm); + u32 key_words[CHACHA_KEY_SIZE / sizeof(u32)]; - skcipher_request_set_sync_tfm(req, tfm); - skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, sg, sg, len, nonce.d); + BUILD_BUG_ON(sizeof(key_words) != sizeof(*key)); + memcpy(key_words, key, sizeof(key_words)); + le32_to_cpu_array(key_words, ARRAY_SIZE(key_words)); - int ret = crypto_skcipher_encrypt(req); - if (ret) - pr_err("got error %i from crypto_skcipher_encrypt()", ret); - - return ret; -} - -static inline int do_encrypt(struct crypto_sync_skcipher *tfm, - struct nonce nonce, - void *buf, size_t len) -{ - if (!is_vmalloc_addr(buf)) { - struct scatterlist sg = {}; - - sg_mark_end(&sg); - sg_set_page(&sg, virt_to_page(buf), len, offset_in_page(buf)); - return do_encrypt_sg(tfm, nonce, &sg, len); - } else { - DARRAY_PREALLOCATED(struct scatterlist, 4) sgl; - size_t sgl_len = 0; - int ret; - - darray_init(&sgl); - - while (len) { - unsigned offset = offset_in_page(buf); - struct scatterlist sg = { - .page_link = (unsigned long) vmalloc_to_page(buf), - .offset = offset, - .length = min(len, PAGE_SIZE - offset), - }; + BUILD_BUG_ON(sizeof(nonce) != CHACHA_IV_SIZE); + chacha_init(state, key_words, (const u8 *)nonce.d); - if (darray_push(&sgl, sg)) { - sg_mark_end(&darray_last(sgl)); - ret = do_encrypt_sg(tfm, nonce, sgl.data, sgl_len); - if (ret) - goto err; - - nonce = nonce_add(nonce, sgl_len); - sgl_len = 0; - sgl.nr = 0; - BUG_ON(darray_push(&sgl, sg)); - } - - buf += sg.length; - len -= sg.length; - sgl_len += sg.length; - } - - sg_mark_end(&darray_last(sgl)); - ret = do_encrypt_sg(tfm, nonce, sgl.data, sgl_len); -err: - darray_exit(&sgl); - return ret; - } + memzero_explicit(key_words, sizeof(key_words)); } -int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce, - void *buf, size_t len) +static void bch2_chacha20(const struct bch_key *key, struct nonce nonce, + void *data, size_t len) { - struct crypto_sync_skcipher *chacha20 = - crypto_alloc_sync_skcipher("chacha20", 0, 0); - int ret; - - ret = PTR_ERR_OR_ZERO(chacha20); - if (ret) { - pr_err("error requesting chacha20 cipher: %s", bch2_err_str(ret)); - return ret; - } - - ret = crypto_skcipher_setkey(&chacha20->base, - (void *) key, sizeof(*key)); - if (ret) { - pr_err("error from crypto_skcipher_setkey(): %s", bch2_err_str(ret)); - goto err; - } + u32 state[CHACHA_STATE_WORDS]; - ret = do_encrypt(chacha20, nonce, buf, len); -err: - crypto_free_sync_skcipher(chacha20); - return ret; + bch2_chacha20_init(state, key, nonce); + chacha20_crypt(state, data, data, len); + memzero_explicit(state, sizeof(state)); } -static int gen_poly_key(struct bch_fs *c, struct shash_desc *desc, - struct nonce nonce) +static void bch2_poly1305_init(struct poly1305_desc_ctx *desc, + struct bch_fs *c, struct nonce nonce) { - u8 key[POLY1305_KEY_SIZE]; - int ret; + u8 key[POLY1305_KEY_SIZE] = { 0 }; nonce.d[3] ^= BCH_NONCE_POLY; - memset(key, 0, sizeof(key)); - ret = do_encrypt(c->chacha20, nonce, key, sizeof(key)); - if (ret) - return ret; - - desc->tfm = c->poly1305; - crypto_shash_init(desc); - crypto_shash_update(desc, key, sizeof(key)); - return 0; + bch2_chacha20(&c->chacha20_key, nonce, key, sizeof(key)); + poly1305_init(desc, key); } struct bch_csum bch2_checksum(struct bch_fs *c, unsigned type, @@ -230,14 +149,13 @@ struct bch_csum bch2_checksum(struct bch_fs *c, unsigned type, case BCH_CSUM_chacha20_poly1305_80: case BCH_CSUM_chacha20_poly1305_128: { - SHASH_DESC_ON_STACK(desc, c->poly1305); + struct poly1305_desc_ctx dctx; u8 digest[POLY1305_DIGEST_SIZE]; struct bch_csum ret = { 0 }; - gen_poly_key(c, desc, nonce); - - crypto_shash_update(desc, data, len); - crypto_shash_final(desc, digest); + bch2_poly1305_init(&dctx, c, nonce); + poly1305_update(&dctx, data, len); + poly1305_final(&dctx, digest); memcpy(&ret, digest, bch_crc_bytes[type]); return ret; @@ -253,11 +171,12 @@ int bch2_encrypt(struct bch_fs *c, unsigned type, if (!bch2_csum_type_is_encryption(type)) return 0; - if (bch2_fs_inconsistent_on(!c->chacha20, + if (bch2_fs_inconsistent_on(!c->chacha20_key_set, c, "attempting to encrypt without encryption key")) return -BCH_ERR_no_encryption_key; - return do_encrypt(c->chacha20, nonce, data, len); + bch2_chacha20(&c->chacha20_key, nonce, data, len); + return 0; } static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type, @@ -296,26 +215,26 @@ static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type, case BCH_CSUM_chacha20_poly1305_80: case BCH_CSUM_chacha20_poly1305_128: { - SHASH_DESC_ON_STACK(desc, c->poly1305); + struct poly1305_desc_ctx dctx; u8 digest[POLY1305_DIGEST_SIZE]; struct bch_csum ret = { 0 }; - gen_poly_key(c, desc, nonce); + bch2_poly1305_init(&dctx, c, nonce); #ifdef CONFIG_HIGHMEM __bio_for_each_segment(bv, bio, *iter, *iter) { void *p = kmap_local_page(bv.bv_page) + bv.bv_offset; - crypto_shash_update(desc, p, bv.bv_len); + poly1305_update(&dctx, p, bv.bv_len); kunmap_local(p); } #else __bio_for_each_bvec(bv, bio, *iter, *iter) - crypto_shash_update(desc, + poly1305_update(&dctx, page_address(bv.bv_page) + bv.bv_offset, bv.bv_len); #endif - crypto_shash_final(desc, digest); + poly1305_final(&dctx, digest); memcpy(&ret, digest, bch_crc_bytes[type]); return ret; @@ -338,43 +257,33 @@ int __bch2_encrypt_bio(struct bch_fs *c, unsigned type, { struct bio_vec bv; struct bvec_iter iter; - DARRAY_PREALLOCATED(struct scatterlist, 4) sgl; - size_t sgl_len = 0; + u32 chacha_state[CHACHA_STATE_WORDS]; int ret = 0; - if (bch2_fs_inconsistent_on(!c->chacha20, + if (bch2_fs_inconsistent_on(!c->chacha20_key_set, c, "attempting to encrypt without encryption key")) return -BCH_ERR_no_encryption_key; - darray_init(&sgl); + bch2_chacha20_init(chacha_state, &c->chacha20_key, nonce); bio_for_each_segment(bv, bio, iter) { - struct scatterlist sg = { - .page_link = (unsigned long) bv.bv_page, - .offset = bv.bv_offset, - .length = bv.bv_len, - }; - - if (darray_push(&sgl, sg)) { - sg_mark_end(&darray_last(sgl)); - ret = do_encrypt_sg(c->chacha20, nonce, sgl.data, sgl_len); - if (ret) - goto err; - - nonce = nonce_add(nonce, sgl_len); - sgl_len = 0; - sgl.nr = 0; - - BUG_ON(darray_push(&sgl, sg)); + void *p; + + /* + * chacha_crypt() assumes that the length is a multiple of + * CHACHA_BLOCK_SIZE on any non-final call. + */ + if (!IS_ALIGNED(bv.bv_len, CHACHA_BLOCK_SIZE)) { + bch_err_ratelimited(c, "bio not aligned for encryption"); + ret = -EIO; + break; } - sgl_len += sg.length; + p = bvec_kmap_local(&bv); + chacha20_crypt(chacha_state, p, p, bv.bv_len); + kunmap_local(p); } - - sg_mark_end(&darray_last(sgl)); - ret = do_encrypt_sg(c->chacha20, nonce, sgl.data, sgl_len); -err: - darray_exit(&sgl); + memzero_explicit(chacha_state, sizeof(chacha_state)); return ret; } @@ -650,10 +559,7 @@ int bch2_decrypt_sb_key(struct bch_fs *c, } /* decrypt real key: */ - ret = bch2_chacha_encrypt_key(&user_key, bch2_sb_key_nonce(c), - &sb_key, sizeof(sb_key)); - if (ret) - goto err; + bch2_chacha20(&user_key, bch2_sb_key_nonce(c), &sb_key, sizeof(sb_key)); if (bch2_key_is_encrypted(&sb_key)) { bch_err(c, "incorrect encryption key"); @@ -668,31 +574,6 @@ int bch2_decrypt_sb_key(struct bch_fs *c, return ret; } -static int bch2_alloc_ciphers(struct bch_fs *c) -{ - if (c->chacha20) - return 0; - - struct crypto_sync_skcipher *chacha20 = crypto_alloc_sync_skcipher("chacha20", 0, 0); - int ret = PTR_ERR_OR_ZERO(chacha20); - if (ret) { - bch_err(c, "error requesting chacha20 module: %s", bch2_err_str(ret)); - return ret; - } - - struct crypto_shash *poly1305 = crypto_alloc_shash("poly1305", 0, 0); - ret = PTR_ERR_OR_ZERO(poly1305); - if (ret) { - bch_err(c, "error requesting poly1305 module: %s", bch2_err_str(ret)); - crypto_free_sync_skcipher(chacha20); - return ret; - } - - c->chacha20 = chacha20; - c->poly1305 = poly1305; - return 0; -} - #if 0 /* @@ -797,35 +678,21 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed) void bch2_fs_encryption_exit(struct bch_fs *c) { - if (c->poly1305) - crypto_free_shash(c->poly1305); - if (c->chacha20) - crypto_free_sync_skcipher(c->chacha20); + memzero_explicit(&c->chacha20_key, sizeof(c->chacha20_key)); } int bch2_fs_encryption_init(struct bch_fs *c) { struct bch_sb_field_crypt *crypt; - struct bch_key key; - int ret = 0; + int ret; crypt = bch2_sb_field_get(c->disk_sb.sb, crypt); if (!crypt) - goto out; + return 0; - ret = bch2_alloc_ciphers(c); + ret = bch2_decrypt_sb_key(c, crypt, &c->chacha20_key); if (ret) - goto out; - - ret = bch2_decrypt_sb_key(c, crypt, &key); - if (ret) - goto out; - - ret = crypto_skcipher_setkey(&c->chacha20->base, - (void *) &key.key, sizeof(key.key)); - if (ret) - goto out; -out: - memzero_explicit(&key, sizeof(key)); - return ret; + return ret; + c->chacha20_key_set = true; + return 0; } diff --git a/fs/bcachefs/checksum.h b/fs/bcachefs/checksum.h index 4ac251c8fcd839..1310782d3ae930 100644 --- a/fs/bcachefs/checksum.h +++ b/fs/bcachefs/checksum.h @@ -69,7 +69,6 @@ static inline void bch2_csum_err_msg(struct printbuf *out, bch2_csum_to_text(out, type, expected); } -int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t); int bch2_request_key(struct bch_sb *, struct bch_key *); #ifndef __KERNEL__ int bch2_revoke_key(struct bch_sb *); @@ -156,7 +155,7 @@ static inline bool bch2_checksum_type_valid(const struct bch_fs *c, if (type >= BCH_CSUM_NR) return false; - if (bch2_csum_type_is_encryption(type) && !c->chacha20) + if (bch2_csum_type_is_encryption(type) && !c->chacha20_key_set) return false; return true; diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 417bb0c7bbfa65..fd627c8d105364 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -977,7 +977,8 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, goto err; } - if (unlikely(bch2_csum_type_is_encryption(pick.crc.csum_type)) && !c->chacha20) { + if (unlikely(bch2_csum_type_is_encryption(pick.crc.csum_type)) && + !c->chacha20_key_set) { struct printbuf buf = PRINTBUF; bch2_read_err_msg_trans(trans, &buf, orig, read_pos); prt_printf(&buf, "attempting to read encrypted data without encryption key\n "); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 16ebea1214af5f..956f51d563b49f 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -70,13 +70,10 @@ #include #include #include -#include MODULE_LICENSE("GPL"); MODULE_AUTHOR("Kent Overstreet "); MODULE_DESCRIPTION("bcachefs filesystem"); -MODULE_SOFTDEP("pre: chacha20"); -MODULE_SOFTDEP("pre: poly1305"); MODULE_SOFTDEP("pre: xxhash"); const char * const bch2_fs_flag_strs[] = { From c92896ffb7fc0c49ca031d9e98e76008597bbab0 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 2 Apr 2025 09:23:38 -0700 Subject: [PATCH 0113/2924] bcachefs: Remove unnecessary softdep on xxhash As with the other algorithms, bcachefs does not access xxhash through the crypto API. So there is no need to use a module softdep to ensure that it is loaded. Signed-off-by: Eric Biggers Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 956f51d563b49f..b79e80a435e09a 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -74,7 +74,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Kent Overstreet "); MODULE_DESCRIPTION("bcachefs filesystem"); -MODULE_SOFTDEP("pre: xxhash"); const char * const bch2_fs_flag_strs[] = { #define x(n) #n, From 55fd97fbc4744a43fb2d134908e481266cf33bda Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 26 Mar 2025 11:44:30 -0400 Subject: [PATCH 0114/2924] bcachefs: Use sort_nonatomic() instead of sort() Fixes "task out to lunch" warnings during recovery on large machines with lots of dirty data in the journal. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_journal_iter.c | 5 ++--- fs/bcachefs/btree_node_scan.c | 4 ++-- fs/bcachefs/btree_write_buffer.c | 8 ++++---- fs/bcachefs/recovery.c | 6 +++--- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c index d1ad1a7613c9f7..7d6c971db23c33 100644 --- a/fs/bcachefs/btree_journal_iter.c +++ b/fs/bcachefs/btree_journal_iter.c @@ -644,8 +644,6 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans, */ static int journal_sort_key_cmp(const void *_l, const void *_r) { - cond_resched(); - const struct journal_key *l = _l; const struct journal_key *r = _r; @@ -689,7 +687,8 @@ void bch2_journal_keys_put(struct bch_fs *c) static void __journal_keys_sort(struct journal_keys *keys) { - sort(keys->data, keys->nr, sizeof(keys->data[0]), journal_sort_key_cmp, NULL); + sort_nonatomic(keys->data, keys->nr, sizeof(keys->data[0]), + journal_sort_key_cmp, NULL); cond_resched(); diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index f520fbcd045540..86acf037590ccc 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -398,7 +398,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c) bch2_print_string_as_lines(KERN_INFO, buf.buf); } - sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_cookie, NULL); + sort_nonatomic(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_cookie, NULL); dst = 0; darray_for_each(f->nodes, i) { @@ -418,7 +418,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c) } f->nodes.nr = dst; - sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL); + sort_nonatomic(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL); if (0 && c->opts.verbose) { printbuf_reset(&buf); diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index adbe576ec77ee3..0941fb2c026d54 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -428,10 +428,10 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) */ trace_and_count(c, write_buffer_flush_slowpath, trans, slowpath, wb->flushing.keys.nr); - sort(wb->flushing.keys.data, - wb->flushing.keys.nr, - sizeof(wb->flushing.keys.data[0]), - wb_key_seq_cmp, NULL); + sort_nonatomic(wb->flushing.keys.data, + wb->flushing.keys.nr, + sizeof(wb->flushing.keys.data[0]), + wb_key_seq_cmp, NULL); darray_for_each(wb->flushing.keys, i) { if (!i->journal_seq) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 79fd18a5a07c43..d2b07f602da95a 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -389,9 +389,9 @@ int bch2_journal_replay(struct bch_fs *c) * Now, replay any remaining keys in the order in which they appear in * the journal, unpinning those journal entries as we go: */ - sort(keys_sorted.data, keys_sorted.nr, - sizeof(keys_sorted.data[0]), - journal_sort_seq_cmp, NULL); + sort_nonatomic(keys_sorted.data, keys_sorted.nr, + sizeof(keys_sorted.data[0]), + journal_sort_seq_cmp, NULL); darray_for_each(keys_sorted, kp) { cond_resched(); From 1ddaff40c08abb926be5ba713c5efc412d0836c5 Mon Sep 17 00:00:00 2001 From: Akhil R Date: Wed, 26 Mar 2025 20:21:10 +0530 Subject: [PATCH 0115/2924] crypto: tegra - Fix IV usage for AES ECB Modifying the crypto_request turns out to be not the right way to handle the stale value issue with the IV. Though the IV is not used for AES ECB, it eventually get used in algorithms like LRW in the next step after AES ECB encryption/decryption. Setting req->iv to NULL breaks the implementation of such algorithms. Hence modify only the local reqctx to check for IV. Fixes: bde558220866 ("crypto: tegra - Set IV to NULL explicitly for AES ECB") Signed-off-by: Akhil R Signed-off-by: Herbert Xu --- drivers/crypto/tegra/tegra-se-aes.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/crypto/tegra/tegra-se-aes.c b/drivers/crypto/tegra/tegra-se-aes.c index ca9d0cca1f748e..0e07d0523291a5 100644 --- a/drivers/crypto/tegra/tegra-se-aes.c +++ b/drivers/crypto/tegra/tegra-se-aes.c @@ -269,7 +269,7 @@ static int tegra_aes_do_one_req(struct crypto_engine *engine, void *areq) unsigned int cmdlen, key1_id, key2_id; int ret; - rctx->iv = (u32 *)req->iv; + rctx->iv = (ctx->alg == SE_ALG_ECB) ? NULL : (u32 *)req->iv; rctx->len = req->cryptlen; key1_id = ctx->key1_id; key2_id = ctx->key2_id; @@ -498,9 +498,6 @@ static int tegra_aes_crypt(struct skcipher_request *req, bool encrypt) if (!req->cryptlen) return 0; - if (ctx->alg == SE_ALG_ECB) - req->iv = NULL; - rctx->encrypt = encrypt; return crypto_transfer_skcipher_request_to_engine(ctx->se->engine, req); From 261ffd53cc8e91e6484a3170a1ddf59a16696667 Mon Sep 17 00:00:00 2001 From: Nuno Das Neves Date: Tue, 1 Apr 2025 10:32:17 -0700 Subject: [PATCH 0116/2924] Drivers: hv: Fix bad pointer dereference in hv_get_partition_id 'output' is already a pointer to the output argument, it should be passed directly to hv_do_hypercall() without the '&' operator. Fixes: e96204e5e96e ("hyperv: Move hv_current_partition_id to arch-generic code") Signed-off-by: Nuno Das Neves Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/1743528737-20310-1-git-send-email-nunodasneves@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <1743528737-20310-1-git-send-email-nunodasneves@linux.microsoft.com> --- drivers/hv/hv_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index b3b11be1165009..a7d7494feaca13 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -307,7 +307,7 @@ void __init hv_get_partition_id(void) local_irq_save(flags); output = *this_cpu_ptr(hyperv_pcpu_input_arg); - status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, &output); + status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, output); pt_id = output->partition_id; local_irq_restore(flags); From 2908ffa53f8ed487ab4285635c79d4911cced93c Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Mon, 24 Mar 2025 12:35:41 +0000 Subject: [PATCH 0117/2924] firmware: exynos-acpm: check saved RX before bailing out on empty RX queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we're polling for responses and get a response that corresponds to another request, we save the RX data in order to drain the RX queue. If the response for the current request is not found in the request's iteration of the queue, or if the queue is empty, we must check whether the RX data was saved by a previous request when it drained the RX queue. We failed to check for already saved responses when the queue was empty, and requests could time out. Check saved RX before bailing out on empty RX queue. Fixes: a88927b534ba ("firmware: add Exynos ACPM protocol driver") Reported-by: André Draszik Signed-off-by: Tudor Ambarus Reviewed-by: André Draszik Tested-by: André Draszik Link: https://lore.kernel.org/r/20250324-acpm-drained-rx-queue-v1-1-577774335151@linaro.org Signed-off-by: Krzysztof Kozlowski --- drivers/firmware/samsung/exynos-acpm.c | 44 ++++++++++++++++++-------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/drivers/firmware/samsung/exynos-acpm.c b/drivers/firmware/samsung/exynos-acpm.c index a85b2dbdd9f0d7..15e991b99f5a38 100644 --- a/drivers/firmware/samsung/exynos-acpm.c +++ b/drivers/firmware/samsung/exynos-acpm.c @@ -184,6 +184,29 @@ struct acpm_match_data { #define client_to_acpm_chan(c) container_of(c, struct acpm_chan, cl) #define handle_to_acpm_info(h) container_of(h, struct acpm_info, handle) +/** + * acpm_get_saved_rx() - get the response if it was already saved. + * @achan: ACPM channel info. + * @xfer: reference to the transfer to get response for. + * @tx_seqnum: xfer TX sequence number. + */ +static void acpm_get_saved_rx(struct acpm_chan *achan, + const struct acpm_xfer *xfer, u32 tx_seqnum) +{ + const struct acpm_rx_data *rx_data = &achan->rx_data[tx_seqnum - 1]; + u32 rx_seqnum; + + if (!rx_data->response) + return; + + rx_seqnum = FIELD_GET(ACPM_PROTOCOL_SEQNUM, rx_data->cmd[0]); + + if (rx_seqnum == tx_seqnum) { + memcpy(xfer->rxd, rx_data->cmd, xfer->rxlen); + clear_bit(rx_seqnum - 1, achan->bitmap_seqnum); + } +} + /** * acpm_get_rx() - get response from RX queue. * @achan: ACPM channel info. @@ -204,15 +227,16 @@ static int acpm_get_rx(struct acpm_chan *achan, const struct acpm_xfer *xfer) rx_front = readl(achan->rx.front); i = readl(achan->rx.rear); - /* Bail out if RX is empty. */ - if (i == rx_front) + tx_seqnum = FIELD_GET(ACPM_PROTOCOL_SEQNUM, xfer->txd[0]); + + if (i == rx_front) { + acpm_get_saved_rx(achan, xfer, tx_seqnum); return 0; + } base = achan->rx.base; mlen = achan->mlen; - tx_seqnum = FIELD_GET(ACPM_PROTOCOL_SEQNUM, xfer->txd[0]); - /* Drain RX queue. */ do { /* Read RX seqnum. */ @@ -259,16 +283,8 @@ static int acpm_get_rx(struct acpm_chan *achan, const struct acpm_xfer *xfer) * If the response was not in this iteration of the queue, check if the * RX data was previously saved. */ - rx_data = &achan->rx_data[tx_seqnum - 1]; - if (!rx_set && rx_data->response) { - rx_seqnum = FIELD_GET(ACPM_PROTOCOL_SEQNUM, - rx_data->cmd[0]); - - if (rx_seqnum == tx_seqnum) { - memcpy(xfer->rxd, rx_data->cmd, xfer->rxlen); - clear_bit(rx_seqnum - 1, achan->bitmap_seqnum); - } - } + if (!rx_set) + acpm_get_saved_rx(achan, xfer, tx_seqnum); return 0; } From 6f8a394aa952257575910d57cf0a63627fa949a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sat, 5 Apr 2025 19:51:07 +0200 Subject: [PATCH 0118/2924] cifs: Ensure that all non-client-specific reparse points are processed by the server MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix regression in mounts to e.g. onedrive shares. Generally, reparse points are processed by the SMB server during the SMB OPEN request, but there are few reparse points which do not have OPEN-like meaning for the SMB server and has to be processed by the SMB client. Those are symlinks and special files (fifo, socket, block, char). For Linux SMB client, it is required to process also name surrogate reparse points as they represent another entity on the SMB server system. Linux client will mark them as separate mount points. Examples of name surrogate reparse points are NTFS junction points (e.g. created by the "mklink" tool on Windows servers). So after processing the name surrogate reparse points, clear the -EOPNOTSUPP error code returned from the parse_reparse_point() to let SMB server to process reparse points. And remove printing misleading error message "unhandled reparse tag:" as reparse points are handled by SMB server and hence unhandled fact is normal operation. Fixes: cad3fc0a4c8c ("cifs: Throw -EOPNOTSUPP error on unsupported reparse point type from parse_reparse_point()") Fixes: b587fd128660 ("cifs: Treat unhandled directory name surrogate reparse points as mount directory nodes") Cc: stable@vger.kernel.org Reported-by: Junwen Sun Tested-by: Junwen Sun Signed-off-by: Pali Rohár Signed-off-by: Steve French --- fs/smb/client/inode.c | 10 ++++++++++ fs/smb/client/reparse.c | 4 ---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index a00a9d91d0da3c..9b56198f723031 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -1228,6 +1228,16 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data, cifs_create_junction_fattr(fattr, sb); goto out; } + /* + * If the reparse point is unsupported by the Linux SMB + * client then let it process by the SMB server. So mask + * the -EOPNOTSUPP error code. This will allow Linux SMB + * client to send SMB OPEN request to server. If server + * does not support this reparse point too then server + * will return error during open the path. + */ + if (rc == -EOPNOTSUPP) + rc = 0; } if (data->reparse.tag == IO_REPARSE_TAG_SYMLINK && !rc) { diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index 2b9e9885dc4258..f85dd40f34aff7 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -1062,8 +1062,6 @@ int parse_reparse_point(struct reparse_data_buffer *buf, const char *full_path, struct cifs_open_info_data *data) { - struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); - data->reparse.buf = buf; /* See MS-FSCC 2.1.2 */ @@ -1090,8 +1088,6 @@ int parse_reparse_point(struct reparse_data_buffer *buf, } return 0; default: - cifs_tcon_dbg(VFS | ONCE, "unhandled reparse tag: 0x%08x\n", - le32_to_cpu(buf->ReparseTag)); return -EOPNOTSUPP; } } From b365b9d404b7376c60c91cd079218bfef11b7822 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 6 Apr 2025 14:09:19 -0500 Subject: [PATCH 0119/2924] smb311 client: fix missing tcon check when mounting with linux/posix extensions When mounting the same share twice, once with the "linux" mount parameter (or equivalently "posix") and then once without (or e.g. with "nolinux"), we were incorrectly reusing the same tree connection for both mounts. This meant that the first mount of the share on the client, would cause subsequent mounts of that same share on the same client to ignore that mount parm ("linux" vs. "nolinux") and incorrectly reuse the same tcon. Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/connect.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index f298e86a3c1fdb..4a0b2d220fe84e 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -2556,6 +2556,8 @@ static int match_tcon(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) return 0; if (tcon->nodelete != ctx->nodelete) return 0; + if (tcon->posix_extensions != ctx->linux_ext) + return 0; return 1; } From 549d8994447f2f628c6cedd139d53926bdfee881 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 Mar 2025 22:38:55 +0100 Subject: [PATCH 0120/2924] media: vivid: fix FB dependency It's not enough to have a dependency on CONFIG_FB, as that can be in a loadable module when vivid itself is builtin: drivers/media/test-drivers/vivid/vivid-osd.o: in function `vivid_fb_init': vivid-osd.c:(.text+0xdc0): undefined reference to `fb_alloc_cmap' vivid-osd.c:(.text+0xe26): undefined reference to `register_framebuffer' Change the dependency to only allow configurations that can be built, but change the FB to FB_CORE so this is also possible when using DRM with FB compatibility rather than full fbdev. Fixes: 20889ddede38 ("media: vivid: Introduce VIDEO_VIVID_OSD") Signed-off-by: Arnd Bergmann Reviewed-by: Ricardo Ribalda Signed-off-by: Hans Verkuil --- drivers/media/test-drivers/vivid/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/test-drivers/vivid/Kconfig b/drivers/media/test-drivers/vivid/Kconfig index e95edc0f22bfb9..cc470070a7a5ea 100644 --- a/drivers/media/test-drivers/vivid/Kconfig +++ b/drivers/media/test-drivers/vivid/Kconfig @@ -32,7 +32,8 @@ config VIDEO_VIVID_CEC config VIDEO_VIVID_OSD bool "Enable Framebuffer for testing Output Overlay" - depends on VIDEO_VIVID && FB + depends on VIDEO_VIVID && FB_CORE + depends on VIDEO_VIVID=m || FB_CORE=y default y select FB_IOMEM_HELPERS help From 9df181c8de1b6b285556f80bfd02584f3457f32e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 Mar 2025 16:46:30 +0100 Subject: [PATCH 0121/2924] media: i2c: lt6911uxe: Fix Kconfig dependencies: The new driver fails to build if I2C is disabled: drivers/media/i2c/lt6911uxe.c:703:1: error: data definition has no type or storage class [-Werror] 703 | module_i2c_driver(lt6911uxe_i2c_driver); or if I2C is on but V4L2_CCI_I2C is not: ERROR: modpost: "cci_write" [drivers/media/i2c/lt6911uxe.ko] undefined! ERROR: modpost: "cci_read" [drivers/media/i2c/lt6911uxe.ko] undefined! For both by adding a dependency on I2C and selecting V4L2_CCI_I2C, which follows the common practice for these. Fixes: e49563c3be09 ("media: i2c: add lt6911uxe hdmi bridge driver") Signed-off-by: Arnd Bergmann Signed-off-by: Hans Verkuil --- drivers/media/i2c/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/i2c/Kconfig b/drivers/media/i2c/Kconfig index e576b213084d23..b06365d02ef1ef 100644 --- a/drivers/media/i2c/Kconfig +++ b/drivers/media/i2c/Kconfig @@ -1149,8 +1149,9 @@ config VIDEO_ISL7998X config VIDEO_LT6911UXE tristate "Lontium LT6911UXE decoder" - depends on ACPI && VIDEO_DEV + depends on ACPI && VIDEO_DEV && I2C select V4L2_FWNODE + select V4L2_CCI_I2C help This is a Video4Linux2 sensor-level driver for the Lontium LT6911UXE HDMI to MIPI CSI-2 bridge. From 0dce5b44bd38af20b0383ae4cabeead37b4b9a9a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 18 Mar 2025 15:03:27 +0100 Subject: [PATCH 0122/2924] media: platform: synopsys: VIDEO_SYNOPSYS_HDMIRX should depend on ARCH_ROCKCHIP For now, the Synopsys HDMI HDMI RX Controller is only supported on Rockchip RK3588 SoCs. Hence add a dependency on ARCH_ROCKCHIP, to prevent asking the user about this driver when configuring a kernel without Rockchip SoC support. Fixes: 7b59b132ad4398f9 ("media: platform: synopsys: Add support for HDMI input driver") Signed-off-by: Geert Uytterhoeven Reviewed-by: Shreeya Patel Signed-off-by: Hans Verkuil --- drivers/media/platform/synopsys/hdmirx/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/platform/synopsys/hdmirx/Kconfig b/drivers/media/platform/synopsys/hdmirx/Kconfig index 27e6706f84a373..4321f985f63206 100644 --- a/drivers/media/platform/synopsys/hdmirx/Kconfig +++ b/drivers/media/platform/synopsys/hdmirx/Kconfig @@ -2,6 +2,7 @@ config VIDEO_SYNOPSYS_HDMIRX tristate "Synopsys DesignWare HDMI Receiver driver" + depends on ARCH_ROCKCHIP || COMPILE_TEST depends on VIDEO_DEV select MEDIA_CONTROLLER select VIDEO_V4L2_SUBDEV_API From 118b34092e37da1d3c4808e8cd1dd0246ac3f97e Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 19 Mar 2025 11:32:56 +0100 Subject: [PATCH 0123/2924] media: i2c: lt6911uxe: add two selects to Kconfig In order to get the v4l2_subdev functions you need to select MEDIA_CONTROLLER and VIDEO_V4L2_SUBDEV_API. Signed-off-by: Hans Verkuil Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202503151002.HacBN2LO-lkp@intel.com/ Fixes: e49563c3be09 ("media: i2c: add lt6911uxe hdmi bridge driver") --- drivers/media/i2c/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/i2c/Kconfig b/drivers/media/i2c/Kconfig index b06365d02ef1ef..e45ba127069fc0 100644 --- a/drivers/media/i2c/Kconfig +++ b/drivers/media/i2c/Kconfig @@ -1152,6 +1152,8 @@ config VIDEO_LT6911UXE depends on ACPI && VIDEO_DEV && I2C select V4L2_FWNODE select V4L2_CCI_I2C + select MEDIA_CONTROLLER + select VIDEO_V4L2_SUBDEV_API help This is a Video4Linux2 sensor-level driver for the Lontium LT6911UXE HDMI to MIPI CSI-2 bridge. From d51adf038ebe59b592005166209b70218b1da849 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 27 Feb 2025 15:02:46 +0100 Subject: [PATCH 0124/2924] media: cec: tda9950: add back i2c dependency drivers/media/cec/i2c/tda9950.c: In function 'tda9950_write_range': drivers/media/cec/i2c/tda9950.c:92:15: error: implicit declaration of function 'i2c_transfer' [-Wimplicit-function-declaration] 92 | ret = i2c_transfer(client->adapter, &msg, 1); | ^~~~~~~~~~~~ drivers/media/cec/i2c/tda9950.c: In function 'tda9950_probe': drivers/media/cec/i2c/tda9950.c:391:14: error: implicit declaration of function 'i2c_check_functionality' [-Wimplicit-function-declaration] 391 | if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { | ^~~~~~~~~~~~~~~~~~~~~~~ Fixes: caa6f4a75e9f ("media: cec: move driver for TDA9950 from drm/i2c") Signed-off-by: Arnd Bergmann Signed-off-by: Hans Verkuil --- drivers/media/cec/i2c/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/cec/i2c/Kconfig b/drivers/media/cec/i2c/Kconfig index b9d21643eef189..c31abc26f60204 100644 --- a/drivers/media/cec/i2c/Kconfig +++ b/drivers/media/cec/i2c/Kconfig @@ -16,6 +16,7 @@ config CEC_CH7322 config CEC_NXP_TDA9950 tristate "NXP Semiconductors TDA9950/TDA998X HDMI CEC" + depends on I2C select CEC_NOTIFIER select CEC_CORE default DRM_I2C_NXP_TDA998X From 06eaa824fd239edd1eab2754f29b2d03da313003 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 18 Mar 2025 07:19:46 +0000 Subject: [PATCH 0125/2924] mm/memblock: pass size instead of end to memblock_set_node() The second parameter of memblock_set_node() is size instead of end. Since it iterates from lower address to higher address, finally the node id is correct. But during the process, some of them are wrong. Pass size instead of end. Fixes: 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()") Signed-off-by: Wei Yang CC: Mike Rapoport CC: Yajun Deng CC: stable@vger.kernel.org Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20250318071948.23854-2-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (Microsoft) --- mm/memblock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memblock.c b/mm/memblock.c index 0a53db4d9f7beb..9639f04b4fdf9f 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -2196,7 +2196,7 @@ static void __init memmap_init_reserved_pages(void) if (memblock_is_nomap(region)) reserve_bootmem_region(start, end, nid); - memblock_set_node(start, end, &memblock.reserved, nid); + memblock_set_node(start, region->size, &memblock.reserved, nid); } /* From eac8ea8736ccc09513152d970eb2a42ed78e87e8 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 18 Mar 2025 07:19:47 +0000 Subject: [PATCH 0126/2924] mm/memblock: repeat setting reserved region nid if array is doubled Commit 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()") introduce a way to set nid to all reserved region. But there is a corner case it will leave some region with invalid nid. When memblock_set_node() doubles the array of memblock.reserved, it may lead to a new reserved region before current position. The new region will be left with an invalid node id. Repeat the process when detecting it. Fixes: 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()") Signed-off-by: Wei Yang CC: Mike Rapoport CC: Yajun Deng CC: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250318071948.23854-3-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (Microsoft) --- mm/memblock.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/mm/memblock.c b/mm/memblock.c index 9639f04b4fdf9f..d3509414b8c3a1 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -2183,11 +2183,14 @@ static void __init memmap_init_reserved_pages(void) struct memblock_region *region; phys_addr_t start, end; int nid; + unsigned long max_reserved; /* * set nid on all reserved pages and also treat struct * pages for the NOMAP regions as PageReserved */ +repeat: + max_reserved = memblock.reserved.max; for_each_mem_region(region) { nid = memblock_get_region_node(region); start = region->base; @@ -2198,6 +2201,13 @@ static void __init memmap_init_reserved_pages(void) memblock_set_node(start, region->size, &memblock.reserved, nid); } + /* + * 'max' is changed means memblock.reserved has been doubled its + * array, which may result a new reserved region before current + * 'start'. Now we should repeat the procedure to set its node id. + */ + if (max_reserved != memblock.reserved.max) + goto repeat; /* * initialize struct pages for reserved regions that don't have From ed471e1984939a500eea179bc16e1c2aadf00db5 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Mon, 7 Apr 2025 10:43:51 +0900 Subject: [PATCH 0127/2924] memblock tests: Fix mutex related build error Fix mutex and free_reserved_area() related build errors which have been introduced by commit 74e2498ccf7b ("mm/memblock: Add reserved memory release function"). Fixes: 74e2498ccf7b ("mm/memblock: Add reserved memory release function") Reported-by: Wei Yang Closes: https://lore.kernel.org/all/20250405023018.g2ae52nrz2757b3n@master/ Signed-off-by: Masami Hiramatsu (Google) Link: https://lore.kernel.org/r/174399023133.47537.7375975856054461445.stgit@devnote2 Signed-off-by: Mike Rapoport (Microsoft) --- tools/testing/memblock/internal.h | 6 ++++++ tools/testing/memblock/linux/mutex.h | 14 ++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 tools/testing/memblock/linux/mutex.h diff --git a/tools/testing/memblock/internal.h b/tools/testing/memblock/internal.h index 1cf82acb2a3e4d..0ab4b53bb4f32a 100644 --- a/tools/testing/memblock/internal.h +++ b/tools/testing/memblock/internal.h @@ -24,4 +24,10 @@ static inline void accept_memory(phys_addr_t start, unsigned long size) { } +static inline unsigned long free_reserved_area(void *start, void *end, + int poison, const char *s) +{ + return 0; +} + #endif diff --git a/tools/testing/memblock/linux/mutex.h b/tools/testing/memblock/linux/mutex.h new file mode 100644 index 00000000000000..ae3f497165d6a8 --- /dev/null +++ b/tools/testing/memblock/linux/mutex.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _MUTEX_H +#define _MUTEX_H + +#define DEFINE_MUTEX(name) int name + +static inline void dummy_mutex_guard(int *name) +{ +} + +#define guard(mutex) \ + dummy_##mutex##_guard + +#endif /* _MUTEX_H */ \ No newline at end of file From 3b394dff15e14550a26b133fc7b556b5b526f6a5 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 18 Mar 2025 07:19:48 +0000 Subject: [PATCH 0128/2924] memblock tests: add test for memblock_set_node Add a test to check memblock_set_node() behavior. And create a corner case in which the memblock.reserved array is doubled during memblock_set_node(). And finally make sure all regions in memblock.reserved are with valid node id. Signed-off-by: Wei Yang CC: Mike Rapoport CC: Yajun Deng Link: https://lore.kernel.org/r/20250318071948.23854-4-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (Microsoft) --- tools/testing/memblock/tests/basic_api.c | 102 +++++++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c index 67503089e6a0ad..01e836fba48884 100644 --- a/tools/testing/memblock/tests/basic_api.c +++ b/tools/testing/memblock/tests/basic_api.c @@ -2434,6 +2434,107 @@ static int memblock_overlaps_region_checks(void) return 0; } +#ifdef CONFIG_NUMA +static int memblock_set_node_check(void) +{ + unsigned long i, max_reserved; + struct memblock_region *rgn; + void *orig_region; + + PREFIX_PUSH(); + + reset_memblock_regions(); + memblock_allow_resize(); + + dummy_physical_memory_init(); + memblock_add(dummy_physical_memory_base(), MEM_SIZE); + orig_region = memblock.reserved.regions; + + /* Equally Split range to node 0 and 1*/ + memblock_set_node(memblock_start_of_DRAM(), + memblock_phys_mem_size() / 2, &memblock.memory, 0); + memblock_set_node(memblock_start_of_DRAM() + memblock_phys_mem_size() / 2, + memblock_phys_mem_size() / 2, &memblock.memory, 1); + + ASSERT_EQ(memblock.memory.cnt, 2); + rgn = &memblock.memory.regions[0]; + ASSERT_EQ(rgn->base, memblock_start_of_DRAM()); + ASSERT_EQ(rgn->size, memblock_phys_mem_size() / 2); + ASSERT_EQ(memblock_get_region_node(rgn), 0); + rgn = &memblock.memory.regions[1]; + ASSERT_EQ(rgn->base, memblock_start_of_DRAM() + memblock_phys_mem_size() / 2); + ASSERT_EQ(rgn->size, memblock_phys_mem_size() / 2); + ASSERT_EQ(memblock_get_region_node(rgn), 1); + + /* Reserve 126 regions with the last one across node boundary */ + for (i = 0; i < 125; i++) + memblock_reserve(memblock_start_of_DRAM() + SZ_16 * i, SZ_8); + + memblock_reserve(memblock_start_of_DRAM() + memblock_phys_mem_size() / 2 - SZ_8, + SZ_16); + + /* + * Commit 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()") + * do following process to set nid to each memblock.reserved region. + * But it may miss some region if memblock_set_node() double the + * array. + * + * By checking 'max', we make sure all region nid is set properly. + */ +repeat: + max_reserved = memblock.reserved.max; + for_each_mem_region(rgn) { + int nid = memblock_get_region_node(rgn); + + memblock_set_node(rgn->base, rgn->size, &memblock.reserved, nid); + } + if (max_reserved != memblock.reserved.max) + goto repeat; + + /* Confirm each region has valid node set */ + for_each_reserved_mem_region(rgn) { + ASSERT_TRUE(numa_valid_node(memblock_get_region_node(rgn))); + if (rgn == (memblock.reserved.regions + memblock.reserved.cnt - 1)) + ASSERT_EQ(1, memblock_get_region_node(rgn)); + else + ASSERT_EQ(0, memblock_get_region_node(rgn)); + } + + dummy_physical_memory_cleanup(); + + /* + * The current reserved.regions is occupying a range of memory that + * allocated from dummy_physical_memory_init(). After free the memory, + * we must not use it. So restore the origin memory region to make sure + * the tests can run as normal and not affected by the double array. + */ + memblock.reserved.regions = orig_region; + memblock.reserved.cnt = INIT_MEMBLOCK_RESERVED_REGIONS; + + test_pass_pop(); + + return 0; +} + +static int memblock_set_node_checks(void) +{ + prefix_reset(); + prefix_push("memblock_set_node"); + test_print("Running memblock_set_node tests...\n"); + + memblock_set_node_check(); + + prefix_pop(); + + return 0; +} +#else +static int memblock_set_node_checks(void) +{ + return 0; +} +#endif + int memblock_basic_checks(void) { memblock_initialization_check(); @@ -2444,6 +2545,7 @@ int memblock_basic_checks(void) memblock_bottom_up_checks(); memblock_trim_memory_checks(); memblock_overlaps_region_checks(); + memblock_set_node_checks(); return 0; } From b40c54648158f63557ae3bb594802a199632afaf Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 28 Mar 2025 12:32:59 +0100 Subject: [PATCH 0129/2924] xenbus: add module description Modules without a description now cause a warning: WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/xen/xenbus/xenbus_probe_frontend.o Signed-off-by: Arnd Bergmann Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Message-ID: <20250328113302.2632353-1-arnd@kernel.org> --- drivers/xen/xenbus/xenbus_probe_frontend.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index fcb335bb7b1878..6d1819269cbe53 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -513,4 +513,5 @@ static int __init boot_wait_for_devices(void) late_initcall(boot_wait_for_devices); #endif +MODULE_DESCRIPTION("Xen PV-device frontend support"); MODULE_LICENSE("GPL"); From 8323f3a69de6f6e96bf22f32dd8e2920766050c2 Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Thu, 27 Mar 2025 11:23:49 +0800 Subject: [PATCH 0130/2924] gpio: tegra186: fix resource handling in ACPI probe path When the Tegra186 GPIO controller is probed through ACPI matching, the driver emits two error messages during probing: "tegra186-gpio NVDA0508:00: invalid resource (null)" "tegra186-gpio NVDA0508:00: invalid resource (null)" Fix this by getting resource first and then do the ioremap. Fixes: 2606e7c9f5fc ("gpio: tegra186: Add ACPI support") Cc: stable@vger.kernel.org Signed-off-by: Guixin Liu Link: https://lore.kernel.org/r/20250327032349.78809-1-kanie@linux.alibaba.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-tegra186.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/gpio/gpio-tegra186.c b/drivers/gpio/gpio-tegra186.c index 6895b65c86aff5..d27bfac6c9f53d 100644 --- a/drivers/gpio/gpio-tegra186.c +++ b/drivers/gpio/gpio-tegra186.c @@ -823,6 +823,7 @@ static int tegra186_gpio_probe(struct platform_device *pdev) struct gpio_irq_chip *irq; struct tegra_gpio *gpio; struct device_node *np; + struct resource *res; char **names; int err; @@ -842,19 +843,19 @@ static int tegra186_gpio_probe(struct platform_device *pdev) gpio->num_banks++; /* get register apertures */ - gpio->secure = devm_platform_ioremap_resource_byname(pdev, "security"); - if (IS_ERR(gpio->secure)) { - gpio->secure = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(gpio->secure)) - return PTR_ERR(gpio->secure); - } - - gpio->base = devm_platform_ioremap_resource_byname(pdev, "gpio"); - if (IS_ERR(gpio->base)) { - gpio->base = devm_platform_ioremap_resource(pdev, 1); - if (IS_ERR(gpio->base)) - return PTR_ERR(gpio->base); - } + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "security"); + if (!res) + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + gpio->secure = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(gpio->secure)) + return PTR_ERR(gpio->secure); + + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "gpio"); + if (!res) + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + gpio->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(gpio->base)) + return PTR_ERR(gpio->base); err = platform_irq_count(pdev); if (err < 0) From 36c6468724aa98d33fea9a1d7e07ddda6302f5d4 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 28 Mar 2025 09:24:01 +0100 Subject: [PATCH 0131/2924] mtd: nand: Drop explicit test for built-in CONFIG_SPI_QPIC_SNAND If CONFIG_SPI_QPIC_SNAND=m, but CONFIG_MTD_NAND_QCOM=n: ERROR: modpost: "qcom_nandc_unalloc" [drivers/spi/spi-qpic-snand.ko] undefined! ... Fix this by dropping the explicit test for a built-in CONFIG_SPI_QPIC_SNAND completely. Kbuild handles multiple and mixed obj-y/obj-m rules for the same object file fine. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202503280759.XhwLcV7m-lkp@intel.com/ Fixes: 7304d1909080ef0c ("spi: spi-qpic: add driver for QCOM SPI NAND flash Interface") Signed-off-by: Geert Uytterhoeven Signed-off-by: Miquel Raynal --- drivers/mtd/nand/Makefile | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile index db516a45f0c526..44913ff1bf12cc 100644 --- a/drivers/mtd/nand/Makefile +++ b/drivers/mtd/nand/Makefile @@ -3,11 +3,8 @@ nandcore-objs := core.o bbt.o obj-$(CONFIG_MTD_NAND_CORE) += nandcore.o obj-$(CONFIG_MTD_NAND_ECC_MEDIATEK) += ecc-mtk.o -ifeq ($(CONFIG_SPI_QPIC_SNAND),y) obj-$(CONFIG_SPI_QPIC_SNAND) += qpic_common.o -else obj-$(CONFIG_MTD_NAND_QCOM) += qpic_common.o -endif obj-y += onenand/ obj-y += raw/ obj-y += spi/ From d027951dc85cb2e15924c980dc22a6754d100c7c Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Wed, 2 Apr 2025 11:16:43 +0800 Subject: [PATCH 0132/2924] mtd: inftlcore: Add error check for inftl_read_oob() In INFTL_findwriteunit(), the return value of inftl_read_oob() need to be checked. A proper implementation can be found in INFTL_deleteblock(). The status will be set as SECTOR_IGNORE to break from the while-loop correctly if the inftl_read_oob() fails. Fixes: 8593fbc68b0d ("[MTD] Rework the out of band handling completely") Cc: stable@vger.kernel.org # v2.6+ Signed-off-by: Wentao Liang Signed-off-by: Miquel Raynal --- drivers/mtd/inftlcore.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/inftlcore.c b/drivers/mtd/inftlcore.c index 9739387cff8c91..58c6e1743f5c65 100644 --- a/drivers/mtd/inftlcore.c +++ b/drivers/mtd/inftlcore.c @@ -482,10 +482,11 @@ static inline u16 INFTL_findwriteunit(struct INFTLrecord *inftl, unsigned block) silly = MAX_LOOPS; while (thisEUN <= inftl->lastEUN) { - inftl_read_oob(mtd, (thisEUN * inftl->EraseSize) + - blockofs, 8, &retlen, (char *)&bci); - - status = bci.Status | bci.Status1; + if (inftl_read_oob(mtd, (thisEUN * inftl->EraseSize) + + blockofs, 8, &retlen, (char *)&bci) < 0) + status = SECTOR_IGNORE; + else + status = bci.Status | bci.Status1; pr_debug("INFTL: status of block %d in EUN %d is %x\n", block , writeEUN, status); From b79fe1829975556854665258cf4d2476784a89db Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Wed, 2 Apr 2025 15:56:23 +0800 Subject: [PATCH 0133/2924] mtd: rawnand: Add status chack in r852_ready() In r852_ready(), the dev get from r852_get_dev() need to be checked. An unstable device should not be ready. A proper implementation can be found in r852_read_byte(). Add a status check and return 0 when it is unstable. Fixes: 50a487e7719c ("mtd: rawnand: Pass a nand_chip object to chip->dev_ready()") Cc: stable@vger.kernel.org # v4.20+ Signed-off-by: Wentao Liang Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/r852.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mtd/nand/raw/r852.c b/drivers/mtd/nand/raw/r852.c index b07c2f8b40350d..918974d088cf65 100644 --- a/drivers/mtd/nand/raw/r852.c +++ b/drivers/mtd/nand/raw/r852.c @@ -387,6 +387,9 @@ static int r852_wait(struct nand_chip *chip) static int r852_ready(struct nand_chip *chip) { struct r852_device *dev = r852_get_dev(nand_to_mtd(chip)); + if (dev->card_unstable) + return 0; + return !(r852_read_reg(dev, R852_CARD_STA) & R852_CARD_STA_BUSY); } From 1c1fd374a2fe72b8a6dde62d3c3a9fd153e7581c Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 1 Apr 2025 15:36:37 +0200 Subject: [PATCH 0134/2924] mtd: spinand: Fix build with gcc < 7.5 __VA_OPT__ is a macro that is useful when some arguments can be present or not to entirely skip some part of a definition. Unfortunately, it is a too recent addition that some of the still supported old GCC versions do not know about, and is anyway not part of C11 that is the version used in the kernel. Find a trick to remove this macro, typically '__VA_ARGS__ + 0' is a workaround used in netlink.h which works very well here, as we either expect: - 0 - A positive value - No value, which means the field should be 0. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202503181330.YcDXGy7F-lkp@intel.com/ Fixes: 7ce0d16d5802 ("mtd: spinand: Add an optional frequency to read from cache macros") Cc: stable@vger.kernel.org Tested-by: Jean Delvare Signed-off-by: Miquel Raynal --- include/linux/mtd/spinand.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 1e748958dad404..311f145eb4e843 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -67,7 +67,7 @@ SPI_MEM_OP_ADDR(2, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 1), \ - __VA_OPT__(SPI_MEM_OP_MAX_FREQ(__VA_ARGS__))) + SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0)) #define SPINAND_PAGE_READ_FROM_CACHE_FAST_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x0b, 1), \ From fdc7bd909a5f38793468e9cf9b6a9063d96c6234 Mon Sep 17 00:00:00 2001 From: Sam Edwards Date: Sat, 29 Mar 2025 09:50:17 -0700 Subject: [PATCH 0135/2924] arm64: dts: rockchip: Allow Turing RK1 cooling fan to spin down The RK3588 thermal sensor driver only receives interrupts when a higher-temperature threshold is crossed; it cannot notify when the sensor cools back off. As a result, the driver must poll for temperature changes to detect when the conditions for a thermal trip are no longer met. However, it only does so if the DT enables polling. Before this patch, the RK1 DT did not enable polling, causing the fan to continue running at the speed corresponding to the highest temperature reached. Follow suit with similar RK3588 boards by setting a polling-delay of 1000ms, enabling the driver to detect when the sensor cools back off, allowing the fan speed to decrease as appropriate. Fixes: 7c8ec5e6b9d6 ("arm64: dts: rockchip: Enable automatic fan control on Turing RK1") Cc: stable@kernel.org # v6.13+ Signed-off-by: Sam Edwards Reviewed-by: Dragan Simic Link: https://lore.kernel.org/r/20250329165017.3885-1-CFSworks@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi index 711ac4f2c7cb66..60ad272982ad51 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi @@ -214,6 +214,8 @@ }; &package_thermal { + polling-delay = <1000>; + trips { package_active1: trip-active1 { temperature = <45000>; From e0bd7ecf6b2dc71215af699dffbf14bf0bc3d978 Mon Sep 17 00:00:00 2001 From: Dragan Simic Date: Mon, 24 Mar 2025 12:00:43 +0100 Subject: [PATCH 0136/2924] arm64: dts: rockchip: Remove overdrive-mode OPPs from RK3588J SoC dtsi The differences in the vendor-approved CPU and GPU OPPs for the standard Rockchip RK3588 variant [1] and the industrial Rockchip RK3588J variant [2] come from the latter, presumably, supporting an extended temperature range that's usually associated with industrial applications, despite the two SoC variant datasheets specifying the same upper limit for the allowed ambient temperature for both variants. However, the lower temperature limit is specified much lower for the RK3588J variant. [1][2] To be on the safe side and to ensure maximum longevity of the RK3588J SoCs, only the CPU and GPU OPPs that are declared by the vendor to be always safe for this SoC variant may be provided. As explained by the vendor [3] and according to the RK3588J datasheet, [2] higher-frequency/higher-voltage CPU and GPU OPPs can be used as well, but at the risk of reducing the SoC lifetime expectancy. Presumably, using the higher OPPs may be safe only when not enjoying the assumed extended temperature range that the RK3588J, as an SoC variant targeted specifically at higher-temperature, industrial applications, is made (or binned) for. Anyone able to keep their RK3588J-based board outside the above-presumed extended temperature range at all times, and willing to take the associated risk of possibly reducing the SoC lifetime expectancy, is free to apply a DT overlay that adds the higher CPU and GPU OPPs. With all this and the downstream RK3588(J) DT definitions [4][5] in mind, let's delete the RK3588J CPU and GPU OPPs that are not considered belonging to the normal operation mode for this SoC variant. To quote the RK3588J datasheet [2], "normal mode means the chipset works under safety voltage and frequency; for the industrial environment, highly recommend to keep in normal mode, the lifetime is reasonably guaranteed", while "overdrive mode brings higher frequency, and the voltage will increase accordingly; under the overdrive mode for a long time, the chipset may shorten the lifetime, especially in high-temperature condition". To sum the RK3588J datasheet [2] and the vendor-provided DTs up, [4][5] the maximum allowed CPU core, GPU and NPU frequencies are as follows: IP core | Normal mode | Overdrive mode ------------+-------------+---------------- Cortex-A55 | 1,296 MHz | 1,704 MHz Cortex-A76 | 1,608 MHz | 2,016 MHz GPU | 700 MHz | 850 MHz NPU | 800 MHz | 950 MHz Unfortunately, when it comes to the actual voltages for the RK3588J CPU and GPU OPPs, there's a discrepancy between the RK3588J datasheet [2] and the downstream kernel code. [4][5] The RK3588J datasheet states that "the max. working voltage of CPU/GPU/NPU is 0.75 V under the normal mode", while the downstream kernel code actually allows voltage ranges that go up to 0.95 V, which is still within the voltage range allowed by the datasheet. However, the RK3588J datasheet also tells us to "strictly refer to the software configuration of SDK and the hardware reference design", so let's embrace the voltage ranges provided by the downstream kernel code, which also prevents the undesirable theoretical outcome of ending up with no usable OPPs on a particular board, as a result of the board's voltage regulator(s) being unable to deliver the exact voltages, for whatever reason. The above-described voltage ranges for the RK3588J CPU OPPs remain taken from the downstream kernel code [4][5] by picking the highest, worst-bin values, which ensure that all RK3588J bins will work reliably. Yes, with some power inevitably wasted as unnecessarily generated heat, but the reliability is paramount, together with the longevity. This deficiency may be revisited separately at some point in the future. The provided RK3588J CPU OPPs follow the slightly debatable "provide only the highest-frequency OPP from the same-voltage group" approach that's been established earlier, [6] as a result of the "same-voltage, lower-frequency" OPPs being considered inefficient from the IPA governor's standpoint, which may also be revisited separately at some point in the future. [1] https://wiki.friendlyelec.com/wiki/images/e/ee/Rockchip_RK3588_Datasheet_V1.6-20231016.pdf [2] https://wmsc.lcsc.com/wmsc/upload/file/pdf/v2/lcsc/2403201054_Rockchip-RK3588J_C22364189.pdf [3] https://lore.kernel.org/linux-rockchip/e55125ed-64fb-455e-b1e4-cebe2cf006e4@cherry.de/T/#u [4] https://raw.githubusercontent.com/rockchip-linux/kernel/604cec4004abe5a96c734f2fab7b74809d2d742f/arch/arm64/boot/dts/rockchip/rk3588s.dtsi [5] https://raw.githubusercontent.com/rockchip-linux/kernel/604cec4004abe5a96c734f2fab7b74809d2d742f/arch/arm64/boot/dts/rockchip/rk3588j.dtsi [6] https://lore.kernel.org/all/20240229-rk-dts-additions-v3-5-6afe8473a631@gmail.com/ Fixes: 667885a68658 ("arm64: dts: rockchip: Add OPP data for CPU cores on RK3588j") Fixes: a7b2070505a2 ("arm64: dts: rockchip: Split GPU OPPs of RK3588 and RK3588j") Cc: stable@vger.kernel.org Cc: Heiko Stuebner Cc: Alexey Charkov Helped-by: Quentin Schulz Reviewed-by: Quentin Schulz Signed-off-by: Dragan Simic Link: https://lore.kernel.org/r/eeec0d30d79b019d111b3f0aa2456e69896b2caa.1742813866.git.dsimic@manjaro.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588j.dtsi | 53 ++++++++--------------- 1 file changed, 17 insertions(+), 36 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588j.dtsi b/arch/arm64/boot/dts/rockchip/rk3588j.dtsi index bce72bac4503b5..3045cb3bd68c63 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588j.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588j.dtsi @@ -11,20 +11,15 @@ compatible = "operating-points-v2"; opp-shared; - opp-1416000000 { - opp-hz = /bits/ 64 <1416000000>; + opp-1200000000 { + opp-hz = /bits/ 64 <1200000000>; opp-microvolt = <750000 750000 950000>; clock-latency-ns = <40000>; opp-suspend; }; - opp-1608000000 { - opp-hz = /bits/ 64 <1608000000>; - opp-microvolt = <887500 887500 950000>; - clock-latency-ns = <40000>; - }; - opp-1704000000 { - opp-hz = /bits/ 64 <1704000000>; - opp-microvolt = <937500 937500 950000>; + opp-1296000000 { + opp-hz = /bits/ 64 <1296000000>; + opp-microvolt = <775000 775000 950000>; clock-latency-ns = <40000>; }; }; @@ -33,9 +28,14 @@ compatible = "operating-points-v2"; opp-shared; + opp-1200000000{ + opp-hz = /bits/ 64 <1200000000>; + opp-microvolt = <750000 750000 950000>; + clock-latency-ns = <40000>; + }; opp-1416000000 { opp-hz = /bits/ 64 <1416000000>; - opp-microvolt = <750000 750000 950000>; + opp-microvolt = <762500 762500 950000>; clock-latency-ns = <40000>; }; opp-1608000000 { @@ -43,25 +43,20 @@ opp-microvolt = <787500 787500 950000>; clock-latency-ns = <40000>; }; - opp-1800000000 { - opp-hz = /bits/ 64 <1800000000>; - opp-microvolt = <875000 875000 950000>; - clock-latency-ns = <40000>; - }; - opp-2016000000 { - opp-hz = /bits/ 64 <2016000000>; - opp-microvolt = <950000 950000 950000>; - clock-latency-ns = <40000>; - }; }; cluster2_opp_table: opp-table-cluster2 { compatible = "operating-points-v2"; opp-shared; + opp-1200000000{ + opp-hz = /bits/ 64 <1200000000>; + opp-microvolt = <750000 750000 950000>; + clock-latency-ns = <40000>; + }; opp-1416000000 { opp-hz = /bits/ 64 <1416000000>; - opp-microvolt = <750000 750000 950000>; + opp-microvolt = <762500 762500 950000>; clock-latency-ns = <40000>; }; opp-1608000000 { @@ -69,16 +64,6 @@ opp-microvolt = <787500 787500 950000>; clock-latency-ns = <40000>; }; - opp-1800000000 { - opp-hz = /bits/ 64 <1800000000>; - opp-microvolt = <875000 875000 950000>; - clock-latency-ns = <40000>; - }; - opp-2016000000 { - opp-hz = /bits/ 64 <2016000000>; - opp-microvolt = <950000 950000 950000>; - clock-latency-ns = <40000>; - }; }; gpu_opp_table: opp-table { @@ -104,10 +89,6 @@ opp-hz = /bits/ 64 <700000000>; opp-microvolt = <750000 750000 850000>; }; - opp-850000000 { - opp-hz = /bits/ 64 <800000000>; - opp-microvolt = <787500 787500 850000>; - }; }; }; From 7fc027e894fae7c8661b52b1fde223004b2a8e0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 19 Mar 2025 12:31:38 +0100 Subject: [PATCH 0137/2924] arm64: dts: rockchip: Add pinmuxing for eMMC on QNAP TS433 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Until now the emmc worked when booting because the bootrom set up the pin config correctly to load the initial bootloader from it. So when the kernel started it "just" reused this setup but never made sure it was actually correct. This then breaks when the system is started via some other means, like downloading the initial bootloader via the bootrom usb download. With actual emmc pin-config added, barebox is able to access the eMMC even when booted via USB. Fixes: 9da1c0327d58 ("arm64: dts: rockchip: Add basic support for QNAP TS-433") Signed-off-by: Uwe Kleine-König [refined commit message to explain that we're currently just running on bootom-goodwill] Link: https://lore.kernel.org/r/20250319113138.125192-2-uwe@kleine-koenig.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3568-qnap-ts433.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3568-qnap-ts433.dts b/arch/arm64/boot/dts/rockchip/rk3568-qnap-ts433.dts index 7bd32d230ad2f1..b80d628c426b71 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-qnap-ts433.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-qnap-ts433.dts @@ -619,6 +619,8 @@ bus-width = <8>; max-frequency = <200000000>; non-removable; + pinctrl-names = "default"; + pinctrl-0 = <&emmc_bus8 &emmc_clk &emmc_cmd &emmc_datastrobe>; status = "okay"; }; From 120305ab9017ee76ac0bed4b72b349faeb1deb1c Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Mon, 7 Apr 2025 14:19:39 +0800 Subject: [PATCH 0138/2924] ALSA: hda/tas2781: Remove unnecessary NULL check before release_firmware() release_firmware() checks for NULL pointers internally. Remove unneeded NULL check for fmw here. Signed-off-by: Chen Ni Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20250407061939.2771803-1-nichen@iscas.ac.cn Signed-off-by: Takashi Iwai --- sound/pci/hda/tas2781_hda_spi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/pci/hda/tas2781_hda_spi.c b/sound/pci/hda/tas2781_hda_spi.c index 399f2e4c3b62b4..25175ff4b3aaa5 100644 --- a/sound/pci/hda/tas2781_hda_spi.c +++ b/sound/pci/hda/tas2781_hda_spi.c @@ -1003,8 +1003,7 @@ static void tasdev_fw_ready(const struct firmware *fmw, void *context) */ out: - if (fmw) - release_firmware(fmw); + release_firmware(fmw); pm_runtime_mark_last_busy(tas_hda->priv->dev); pm_runtime_put_autosuspend(tas_hda->priv->dev); } From 75f8c87555e6ddeff2c49bd47460a71a940edc48 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 6 Mar 2025 09:45:52 +0100 Subject: [PATCH 0139/2924] irqchip/davinci: Remove leftover header Commit fa8dede4d0a0 ("irqchip: remove davinci aintc driver") removed the davinci aintc driver but left behind the associated header. Remove it now. Fixes: fa8dede4d0a0 ("irqchip: remove davinci aintc driver") Signed-off-by: Bartosz Golaszewski Signed-off-by: Thomas Gleixner Acked-by: Arnd Bergmann Link: https://lore.kernel.org/all/20250306084552.15894-1-brgl@bgdev.pl --- include/linux/irqchip/irq-davinci-aintc.h | 27 ----------------------- 1 file changed, 27 deletions(-) delete mode 100644 include/linux/irqchip/irq-davinci-aintc.h diff --git a/include/linux/irqchip/irq-davinci-aintc.h b/include/linux/irqchip/irq-davinci-aintc.h deleted file mode 100644 index ea4e087fac9833..00000000000000 --- a/include/linux/irqchip/irq-davinci-aintc.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2019 Texas Instruments - */ - -#ifndef _LINUX_IRQ_DAVINCI_AINTC_ -#define _LINUX_IRQ_DAVINCI_AINTC_ - -#include - -/** - * struct davinci_aintc_config - configuration data for davinci-aintc driver. - * - * @reg: register range to map - * @num_irqs: number of HW interrupts supported by the controller - * @prios: an array of size num_irqs containing priority settings for - * each interrupt - */ -struct davinci_aintc_config { - struct resource reg; - unsigned int num_irqs; - u8 *prios; -}; - -void davinci_aintc_init(const struct davinci_aintc_config *config); - -#endif /* _LINUX_IRQ_DAVINCI_AINTC_ */ From 305825d09b15586d2e4311e0c12f10f2a0c18ac5 Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Sat, 5 Apr 2025 13:56:24 +0800 Subject: [PATCH 0140/2924] irqchip/sg2042-msi: Add missing chip flags The sg2042-msi driver uses the fallback callbacks set by msi_lib_init_dev_msi_info(). commit 1c000dcaad2b ("irqchip/irq-msi-lib: Optionally set default irq_eoi()/irq_ack()") changed the behavior of the fallback mechanism by making it opt-in. The sg2042-msi was not fixed up for this, which causes a NULL pointer dereference due to the missing irq_ack() callback. Add the missing chip flag to msi_parent_ops. Fixes: c66741549424 ("irqchip: Add the Sophgo SG2042 MSI interrupt controller") Signed-off-by: Inochi Amaoto Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250217085657.789309-3-apatel@ventanamicro.com Link: https://lore.kernel.org/all/20250405055625.1530180-1-inochiama@gmail.com --- drivers/irqchip/irq-sg2042-msi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/irq-sg2042-msi.c b/drivers/irqchip/irq-sg2042-msi.c index ee682e87eb8be2..375b55aa0acd8d 100644 --- a/drivers/irqchip/irq-sg2042-msi.c +++ b/drivers/irqchip/irq-sg2042-msi.c @@ -151,6 +151,7 @@ static const struct irq_domain_ops sg2042_msi_middle_domain_ops = { static const struct msi_parent_ops sg2042_msi_parent_ops = { .required_flags = SG2042_MSI_FLAGS_REQUIRED, .supported_flags = SG2042_MSI_FLAGS_SUPPORTED, + .chip_flags = MSI_CHIP_FLAG_SET_ACK, .bus_select_mask = MATCH_PCI_MSI, .bus_select_token = DOMAIN_BUS_NEXUS, .prefix = "SG2042-", From ed583d008edcb021c30ecad2e9d5c868d9ed5862 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 7 Feb 2025 23:54:04 +0200 Subject: [PATCH 0141/2924] drm/i915: Fix scanline_offset for LNL+ and BMG+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turns out LNL+ and BMG+ no longer have the weird extra scanline offset for HDMI outputs. Fix intel_crtc_scanline_offset() accordingly so that scanline evasion/etc. works correctly on HDMI outputs on these new platforms. Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250207215406.19348-2-ville.syrjala@linux.intel.com Reviewed-by: Uma Shankar (cherry picked from commit fede97b72b957b46260ca98fc924ba2b916e50d7) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_vblank.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_vblank.c b/drivers/gpu/drm/i915/display/intel_vblank.c index 4efd4f7d497abb..7b240ce681a01a 100644 --- a/drivers/gpu/drm/i915/display/intel_vblank.c +++ b/drivers/gpu/drm/i915/display/intel_vblank.c @@ -222,7 +222,9 @@ int intel_crtc_scanline_offset(const struct intel_crtc_state *crtc_state) * However if queried just before the start of vblank we'll get an * answer that's slightly in the future. */ - if (DISPLAY_VER(display) == 2) + if (DISPLAY_VER(display) >= 20 || display->platform.battlemage) + return 1; + else if (DISPLAY_VER(display) == 2) return -1; else if (HAS_DDI(display) && intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) return 2; From 9d3d9776bd3bd9c32d460dfe6c3363134de578bc Mon Sep 17 00:00:00 2001 From: Badal Nilawar Date: Mon, 10 Mar 2025 20:58:21 +0530 Subject: [PATCH 0142/2924] drm/i915: Disable RPG during live selftest The Forcewake timeout issue has been observed on Gen 12.0 and above. To address this, disable Render Power-Gating (RPG) during live self-tests for these generations. The temporary workaround 'drm/i915/mtl: do not enable render power-gating on MTL' disables RPG globally, which is unnecessary since the issues were only seen during self-tests. v2: take runtime pm wakeref Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/9413 Fixes: 25e7976db86b ("drm/i915/mtl: do not enable render power-gating on MTL") Cc: Rodrigo Vivi Cc: Andi Shyti Cc: Andrzej Hajda Signed-off-by: Badal Nilawar Signed-off-by: Sk Anirban Reviewed-by: Karthik Poosa Signed-off-by: Anshuman Gupta Link: https://patchwork.freedesktop.org/patch/msgid/20250310152821.2931678-1-sk.anirban@intel.com (cherry picked from commit 0a4ae87706c6d15d14648e428c3a76351f823e48) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_rc6.c | 19 ++++--------------- .../gpu/drm/i915/selftests/i915_selftest.c | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 9378d5901c4939..9ca42589da4dad 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -117,21 +117,10 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) GEN6_RC_CTL_RC6_ENABLE | GEN6_RC_CTL_EI_MODE(1); - /* - * BSpec 52698 - Render powergating must be off. - * FIXME BSpec is outdated, disabling powergating for MTL is just - * temporary wa and should be removed after fixing real cause - * of forcewake timeouts. - */ - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) - pg_enable = - GEN9_MEDIA_PG_ENABLE | - GEN11_MEDIA_SAMPLER_PG_ENABLE; - else - pg_enable = - GEN9_RENDER_PG_ENABLE | - GEN9_MEDIA_PG_ENABLE | - GEN11_MEDIA_SAMPLER_PG_ENABLE; + pg_enable = + GEN9_RENDER_PG_ENABLE | + GEN9_MEDIA_PG_ENABLE | + GEN11_MEDIA_SAMPLER_PG_ENABLE; if (GRAPHICS_VER(gt->i915) >= 12 && !IS_DG1(gt->i915)) { for (i = 0; i < I915_MAX_VCS; i++) diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c index fee76c1d2f4500..889281819c5b13 100644 --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -23,7 +23,9 @@ #include +#include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_gt_regs.h" #include "gt/uc/intel_gsc_fw.h" #include "i915_driver.h" @@ -253,11 +255,27 @@ int i915_mock_selftests(void) int i915_live_selftests(struct pci_dev *pdev) { struct drm_i915_private *i915 = pdev_to_i915(pdev); + struct intel_uncore *uncore = &i915->uncore; int err; + u32 pg_enable; + intel_wakeref_t wakeref; if (!i915_selftest.live) return 0; + /* + * FIXME Disable render powergating, this is temporary wa and should be removed + * after fixing real cause of forcewake timeouts. + */ + with_intel_runtime_pm(uncore->rpm, wakeref) { + if (IS_GFX_GT_IP_RANGE(to_gt(i915), IP_VER(12, 00), IP_VER(12, 74))) { + pg_enable = intel_uncore_read(uncore, GEN9_PG_ENABLE); + if (pg_enable & GEN9_RENDER_PG_ENABLE) + intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, + pg_enable & ~GEN9_RENDER_PG_ENABLE); + } + } + __wait_gsc_proxy_completed(i915); __wait_gsc_huc_load_completed(i915); From 2e43ae7dd71cd9bb0d1bce1d3306bf77523feb81 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 27 Mar 2025 14:47:39 +0200 Subject: [PATCH 0143/2924] drm/i915/gvt: fix unterminated-string-initialization warning Initializing const char opregion_signature[16] = OPREGION_SIGNATURE (which is "IntelGraphicsMem") drops the NUL termination of the string. This is intentional, but the compiler doesn't know this. Switch to initializing header->signature directly from the string litaral, with sizeof destination rather than source. We don't treat the signature as a string other than for initialization; it's really just a blob of binary data. Add a static assert for good measure to cross-check the sizes. Reported-by: Kees Cook Closes: https://lore.kernel.org/r/20250310222355.work.417-kees@kernel.org Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13934 Tested-by: Nicolas Chauvet Tested-by: Damian Tometzki Cc: stable@vger.kernel.org Reviewed-by: Zhenyu Wang Link: https://lore.kernel.org/r/20250327124739.2609656-1-jani.nikula@intel.com Signed-off-by: Jani Nikula (cherry picked from commit 4f8207469094bd04aad952258ceb9ff4c77b6bfa) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gvt/opregion.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c index 509f9ccae3a9f0..dbad4d853d3ade 100644 --- a/drivers/gpu/drm/i915/gvt/opregion.c +++ b/drivers/gpu/drm/i915/gvt/opregion.c @@ -222,7 +222,6 @@ int intel_vgpu_init_opregion(struct intel_vgpu *vgpu) u8 *buf; struct opregion_header *header; struct vbt v; - const char opregion_signature[16] = OPREGION_SIGNATURE; gvt_dbg_core("init vgpu%d opregion\n", vgpu->id); vgpu_opregion(vgpu)->va = (void *)__get_free_pages(GFP_KERNEL | @@ -236,8 +235,10 @@ int intel_vgpu_init_opregion(struct intel_vgpu *vgpu) /* emulated opregion with VBT mailbox only */ buf = (u8 *)vgpu_opregion(vgpu)->va; header = (struct opregion_header *)buf; - memcpy(header->signature, opregion_signature, - sizeof(opregion_signature)); + + static_assert(sizeof(header->signature) == sizeof(OPREGION_SIGNATURE) - 1); + memcpy(header->signature, OPREGION_SIGNATURE, sizeof(header->signature)); + header->size = 0x8; header->opregion_ver = 0x02000000; header->mboxes = MBOX_VBT; From 584cf613c24a4250d9be4819efc841aa2624d5b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 6 Mar 2025 23:07:40 +0200 Subject: [PATCH 0144/2924] drm/i915/dp: Reject HBR3 when sink doesn't support TPS4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the DP spec TPS4 is mandatory for HBR3. We have however seen some broken eDP sinks that violate this and declare support for HBR3 without TPS4 support. At least in the case of the icl Dell XPS 13 7390 this results in an unstable output. Reject HBR3 when TPS4 supports is unavailable on the sink. v2: Leave breadcrumbs in dmesg to avoid head scratching (Jani) Cc: stable@vger.kernel.org Cc: Jani Nikula Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/5969 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250306210740.11886-1-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit 38188a7f575dacba1120a59fd5d62c7f3313c0fa) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp.c | 49 +++++++++++++++++++++---- 1 file changed, 42 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index a236b5fc7a3d7b..9476aaa9190056 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -172,10 +172,28 @@ int intel_dp_link_symbol_clock(int rate) static int max_dprx_rate(struct intel_dp *intel_dp) { + struct intel_display *display = to_intel_display(intel_dp); + struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; + int max_rate; + if (intel_dp_tunnel_bw_alloc_is_enabled(intel_dp)) - return drm_dp_tunnel_max_dprx_rate(intel_dp->tunnel); + max_rate = drm_dp_tunnel_max_dprx_rate(intel_dp->tunnel); + else + max_rate = drm_dp_bw_code_to_link_rate(intel_dp->dpcd[DP_MAX_LINK_RATE]); - return drm_dp_bw_code_to_link_rate(intel_dp->dpcd[DP_MAX_LINK_RATE]); + /* + * Some broken eDP sinks illegally declare support for + * HBR3 without TPS4, and are unable to produce a stable + * output. Reject HBR3 when TPS4 is not available. + */ + if (max_rate >= 810000 && !drm_dp_tps4_supported(intel_dp->dpcd)) { + drm_dbg_kms(display->drm, + "[ENCODER:%d:%s] Rejecting HBR3 due to missing TPS4 support\n", + encoder->base.base.id, encoder->base.name); + max_rate = 540000; + } + + return max_rate; } static int max_dprx_lane_count(struct intel_dp *intel_dp) @@ -4170,6 +4188,9 @@ static void intel_edp_mso_init(struct intel_dp *intel_dp) static void intel_edp_set_sink_rates(struct intel_dp *intel_dp) { + struct intel_display *display = to_intel_display(intel_dp); + struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; + intel_dp->num_sink_rates = 0; if (intel_dp->edp_dpcd[0] >= DP_EDP_14) { @@ -4180,10 +4201,7 @@ intel_edp_set_sink_rates(struct intel_dp *intel_dp) sink_rates, sizeof(sink_rates)); for (i = 0; i < ARRAY_SIZE(sink_rates); i++) { - int val = le16_to_cpu(sink_rates[i]); - - if (val == 0) - break; + int rate; /* Value read multiplied by 200kHz gives the per-lane * link rate in kHz. The source rates are, however, @@ -4191,7 +4209,24 @@ intel_edp_set_sink_rates(struct intel_dp *intel_dp) * back to symbols is * (val * 200kHz)*(8/10 ch. encoding)*(1/8 bit to Byte) */ - intel_dp->sink_rates[i] = (val * 200) / 10; + rate = le16_to_cpu(sink_rates[i]) * 200 / 10; + + if (rate == 0) + break; + + /* + * Some broken eDP sinks illegally declare support for + * HBR3 without TPS4, and are unable to produce a stable + * output. Reject HBR3 when TPS4 is not available. + */ + if (rate >= 810000 && !drm_dp_tps4_supported(intel_dp->dpcd)) { + drm_dbg_kms(display->drm, + "[ENCODER:%d:%s] Rejecting HBR3 due to missing TPS4 support\n", + encoder->base.base.id, encoder->base.name); + break; + } + + intel_dp->sink_rates[i] = rate; } intel_dp->num_sink_rates = i; } From bc1feb8174b7e46c1806a6f684d89a47508f3a53 Mon Sep 17 00:00:00 2001 From: Vivek Kasireddy Date: Mon, 24 Mar 2025 10:22:33 -0700 Subject: [PATCH 0145/2924] drm/i915/xe2hpd: Identify the memory type for SKUs with GDDR + ECC Some SKUs of Xe2_HPD platforms (such as BMG) have GDDR memory type with ECC enabled. We need to identify this scenario and add a new case in xelpdp_get_dram_info() to handle it. In addition, the derating value needs to be adjusted accordingly to compensate for the limited bandwidth. Bspec: 64602 Cc: Matt Roper Fixes: 3adcf970dc7e ("drm/xe/bmg: Drop force_probe requirement") Cc: stable@vger.kernel.org Signed-off-by: Vivek Kasireddy Reviewed-by: Matt Roper Acked-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20250324-tip-v2-1-38397de319f8@intel.com (cherry picked from commit 327e30123cafcb45c0fc5843da0367b90332999d) Signed-off-by: Lucas De Marchi Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_bw.c | 14 +++++++++++++- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/soc/intel_dram.c | 4 ++++ drivers/gpu/drm/xe/xe_device_types.h | 1 + 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index 048be287224774..98b898a1de8ffc 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -244,6 +244,7 @@ static int icl_get_qgv_points(struct drm_i915_private *dev_priv, qi->deinterleave = 4; break; case INTEL_DRAM_GDDR: + case INTEL_DRAM_GDDR_ECC: qi->channel_width = 32; break; default: @@ -398,6 +399,12 @@ static const struct intel_sa_info xe2_hpd_sa_info = { /* Other values not used by simplified algorithm */ }; +static const struct intel_sa_info xe2_hpd_ecc_sa_info = { + .derating = 45, + .deprogbwlimit = 53, + /* Other values not used by simplified algorithm */ +}; + static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel_sa_info *sa) { struct intel_qgv_info qi = {}; @@ -740,10 +747,15 @@ static unsigned int icl_qgv_bw(struct drm_i915_private *i915, void intel_bw_init_hw(struct drm_i915_private *dev_priv) { + const struct dram_info *dram_info = &dev_priv->dram_info; + if (!HAS_DISPLAY(dev_priv)) return; - if (DISPLAY_VERx100(dev_priv) >= 1401 && IS_DGFX(dev_priv)) + if (DISPLAY_VERx100(dev_priv) >= 1401 && IS_DGFX(dev_priv) && + dram_info->type == INTEL_DRAM_GDDR_ECC) + xe2_hpd_get_bw_info(dev_priv, &xe2_hpd_ecc_sa_info); + else if (DISPLAY_VERx100(dev_priv) >= 1401 && IS_DGFX(dev_priv)) xe2_hpd_get_bw_info(dev_priv, &xe2_hpd_sa_info); else if (DISPLAY_VER(dev_priv) >= 14) tgl_get_bw_info(dev_priv, &mtl_sa_info); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ffc346379cc2c2..54538b6f85df5a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -305,6 +305,7 @@ struct drm_i915_private { INTEL_DRAM_DDR5, INTEL_DRAM_LPDDR5, INTEL_DRAM_GDDR, + INTEL_DRAM_GDDR_ECC, } type; u8 num_qgv_points; u8 num_psf_gv_points; diff --git a/drivers/gpu/drm/i915/soc/intel_dram.c b/drivers/gpu/drm/i915/soc/intel_dram.c index 9e310f4099f423..f60eedb0e92cfe 100644 --- a/drivers/gpu/drm/i915/soc/intel_dram.c +++ b/drivers/gpu/drm/i915/soc/intel_dram.c @@ -687,6 +687,10 @@ static int xelpdp_get_dram_info(struct drm_i915_private *i915) drm_WARN_ON(&i915->drm, !IS_DGFX(i915)); dram_info->type = INTEL_DRAM_GDDR; break; + case 9: + drm_WARN_ON(&i915->drm, !IS_DGFX(i915)); + dram_info->type = INTEL_DRAM_GDDR_ECC; + break; default: MISSING_CASE(val); return -EINVAL; diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 72ef0b6fc4250f..9f8667ebba853e 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -585,6 +585,7 @@ struct xe_device { INTEL_DRAM_DDR5, INTEL_DRAM_LPDDR5, INTEL_DRAM_GDDR, + INTEL_DRAM_GDDR_ECC, } type; u8 num_qgv_points; u8 num_psf_gv_points; From 8578b2f7e1fb79d4b92b62fbbe913548bb363654 Mon Sep 17 00:00:00 2001 From: Will Pierce Date: Wed, 2 Apr 2025 01:14:26 -0700 Subject: [PATCH 0146/2924] riscv: Use kvmalloc_array on relocation_hashtable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The number of relocations may be a huge value that is unallocatable by kmalloc. Use kvmalloc instead so that it does not fail. Fixes: 8fd6c5142395 ("riscv: Add remaining module relocations") Suggested-by: Clément Léger Reviewed-by: Alexandre Ghiti Signed-off-by: Will Pierce Link: https://lore.kernel.org/r/20250402081426.5197-1-wgpierce17@gmail.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/module.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 47d0ebeec93c23..0ae34d79b87bd9 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -648,7 +648,7 @@ process_accumulated_relocations(struct module *me, kfree(bucket_iter); } - kfree(*relocation_hashtable); + kvfree(*relocation_hashtable); } static int add_relocation_to_accumulate(struct module *me, int type, @@ -752,9 +752,10 @@ initialize_relocation_hashtable(unsigned int num_relocations, hashtable_size <<= should_double_size; - *relocation_hashtable = kmalloc_array(hashtable_size, - sizeof(**relocation_hashtable), - GFP_KERNEL); + /* Number of relocations may be large, so kvmalloc it */ + *relocation_hashtable = kvmalloc_array(hashtable_size, + sizeof(**relocation_hashtable), + GFP_KERNEL); if (!*relocation_hashtable) return 0; From 70fc03cd76311a06c8c84deb70b2e16837497774 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 25 Sep 2024 16:25:32 +0200 Subject: [PATCH 0147/2924] Documentation: riscv: Fix typo MIMPLID -> MIMPID The macro that is really defined is RISCV_HWPROBE_KEY_MIMPID, not RISCV_HWPROBE_KEY_MIMPLID (difference is the 'L'). Also, the riscv privileged specification names the register "mimpid", not "mimplid". Correct these typos. Signed-off-by: Nam Cao Reviewed-by: Conor Dooley Reviewed-by: Andrew Jones Reviewed-by: Evan Green Link: https://lore.kernel.org/r/20240925142532.31808-1-namcao@linutronix.de Signed-off-by: Alexandre Ghiti --- Documentation/arch/riscv/hwprobe.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst index 53607d962653b6..f60bf599175597 100644 --- a/Documentation/arch/riscv/hwprobe.rst +++ b/Documentation/arch/riscv/hwprobe.rst @@ -51,7 +51,7 @@ The following keys are defined: * :c:macro:`RISCV_HWPROBE_KEY_MARCHID`: Contains the value of ``marchid``, as defined by the RISC-V privileged architecture specification. -* :c:macro:`RISCV_HWPROBE_KEY_MIMPLID`: Contains the value of ``mimplid``, as +* :c:macro:`RISCV_HWPROBE_KEY_MIMPID`: Contains the value of ``mimpid``, as defined by the RISC-V privileged architecture specification. * :c:macro:`RISCV_HWPROBE_KEY_BASE_BEHAVIOR`: A bitmask containing the base From 0f2946bb172632e122d4033e0b03f85230a29510 Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Mon, 31 Mar 2025 13:29:12 -0400 Subject: [PATCH 0148/2924] xen: Change xen-acpi-processor dom0 dependency xen-acpi-processor functions under a PVH dom0 with only a xen_initial_domain() runtime check. Change the Kconfig dependency from PV dom0 to generic dom0 to reflect that. Suggested-by: Jan Beulich Signed-off-by: Jason Andryuk Reviewed-by: Juergen Gross Tested-by: Jan Beulich Signed-off-by: Juergen Gross Message-ID: <20250331172913.51240-1-jason.andryuk@amd.com> --- drivers/xen/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index f7d6f47971fdf8..24f485827e0399 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -278,7 +278,7 @@ config XEN_PRIVCMD_EVENTFD config XEN_ACPI_PROCESSOR tristate "Xen ACPI processor" - depends on XEN && XEN_PV_DOM0 && X86 && ACPI_PROCESSOR && CPU_FREQ + depends on XEN && XEN_DOM0 && X86 && ACPI_PROCESSOR && CPU_FREQ default m help This ACPI processor uploads Power Management information to the Xen From eb3a04a8516ee9b5174379306f94279fc90424c4 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 10 Feb 2025 15:11:22 +0100 Subject: [PATCH 0149/2924] ovl: don't allow datadir only In theory overlayfs could support upper layer directly referring to a data layer, but there's no current use case for this. Originally, when data-only layers were introduced, this wasn't allowed, only introduced by the "datadir+" feature, but without actually handling this case, resulting in an Oops. Fix by disallowing datadir without lowerdir. Reported-by: Giuseppe Scrivano Fixes: 24e16e385f22 ("ovl: add support for appending lowerdirs one by one") Cc: # v6.7 Reviewed-by: Amir Goldstein Reviewed-by: Alexander Larsson Reviewed-by: Christian Brauner Signed-off-by: Miklos Szeredi --- fs/overlayfs/super.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index b63474d1b06476..e19940d649ca3a 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1138,6 +1138,11 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, return ERR_PTR(-EINVAL); } + if (ctx->nr == ctx->nr_data) { + pr_err("at least one non-data lowerdir is required\n"); + return ERR_PTR(-EINVAL); + } + err = -EINVAL; for (i = 0; i < ctx->nr; i++) { l = &ctx->lower[i]; From a6eb9a4a69cc360b930dad9dc8513f8fd9b3577f Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Mon, 10 Feb 2025 13:07:55 +0100 Subject: [PATCH 0150/2924] ovl: remove unused forward declaration The ovl_get_verity_xattr() function was never added, only its declaration. Signed-off-by: Giuseppe Scrivano Fixes: 184996e92e86 ("ovl: Validate verity xattr when resolving lowerdata") Reviewed-by: Amir Goldstein Reviewed-by: Alexander Larsson Reviewed-by: Christian Brauner Signed-off-by: Miklos Szeredi --- fs/overlayfs/overlayfs.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 6f2f8f4cfbbc17..aef942a758cea5 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -541,8 +541,6 @@ int ovl_set_metacopy_xattr(struct ovl_fs *ofs, struct dentry *d, bool ovl_is_metacopy_dentry(struct dentry *dentry); char *ovl_get_redirect_xattr(struct ovl_fs *ofs, const struct path *path, int padding); int ovl_ensure_verity_loaded(struct path *path); -int ovl_get_verity_xattr(struct ovl_fs *ofs, const struct path *path, - u8 *digest_buf, int *buf_length); int ovl_validate_verity(struct ovl_fs *ofs, struct path *metapath, struct path *datapath); From 87af633689ce16ddb166c80f32b120e50b1295de Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Mon, 7 Apr 2025 10:28:37 +0200 Subject: [PATCH 0151/2924] x86/xen: fix balloon target initialization for PVH dom0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PVH dom0 re-uses logic from PV dom0, in which RAM ranges not assigned to dom0 are re-used as scratch memory to map foreign and grant pages. Such logic relies on reporting those unpopulated ranges as RAM to Linux, and mark them as reserved. This way Linux creates the underlying page structures required for metadata management. Such approach works fine on PV because the initial balloon target is calculated using specific Xen data, that doesn't take into account the memory type changes described above. However on HVM and PVH the initial balloon target is calculated using get_num_physpages(), and that function does take into account the unpopulated RAM regions used as scratch space for remote domain mappings. This leads to PVH dom0 having an incorrect initial balloon target, which causes malfunction (excessive memory freeing) of the balloon driver if the dom0 memory target is later adjusted from the toolstack. Fix this by using xen_released_pages to account for any pages that are part of the memory map, but are already unpopulated when the balloon driver is initialized. This accounts for any regions used for scratch remote mappings. Note on x86 xen_released_pages definition is moved to enlighten.c so it's uniformly available for all Xen-enabled builds. Take the opportunity to unify PV with PVH/HVM guests regarding the usage of get_num_physpages(), as that avoids having to add different logic for PV vs PVH in both balloon_add_regions() and arch_xen_unpopulated_init(). Much like a6aa4eb994ee, the code in this changeset should have been part of 38620fc4e893. Fixes: a6aa4eb994ee ('xen/x86: add extra pages to unpopulated-alloc if available') Signed-off-by: Roger Pau Monné Reviewed-by: Juergen Gross Cc: stable@vger.kernel.org Signed-off-by: Juergen Gross Message-ID: <20250407082838.65495-1-roger.pau@citrix.com> --- arch/x86/xen/enlighten.c | 10 ++++++++++ arch/x86/xen/setup.c | 3 --- drivers/xen/balloon.c | 34 ++++++++++++++++++++++++---------- 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 43dcd8c7badc08..1b7710bd0d0511 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -70,6 +70,9 @@ EXPORT_SYMBOL(xen_start_flags); */ struct shared_info *HYPERVISOR_shared_info = &xen_dummy_shared_info; +/* Number of pages released from the initial allocation. */ +unsigned long xen_released_pages; + static __ref void xen_get_vendor(void) { init_cpu_devs(); @@ -466,6 +469,13 @@ int __init arch_xen_unpopulated_init(struct resource **res) xen_free_unpopulated_pages(1, &pg); } + /* + * Account for the region being in the physmap but unpopulated. + * The value in xen_released_pages is used by the balloon + * driver to know how much of the physmap is unpopulated and + * set an accurate initial memory target. + */ + xen_released_pages += xen_extra_mem[i].n_pfns; /* Zero so region is not also added to the balloon driver. */ xen_extra_mem[i].n_pfns = 0; } diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index c3db71d96c434a..3823e52aef523c 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -37,9 +37,6 @@ #define GB(x) ((uint64_t)(x) * 1024 * 1024 * 1024) -/* Number of pages released from the initial allocation. */ -unsigned long xen_released_pages; - /* Memory map would allow PCI passthrough. */ bool xen_pv_pci_possible; diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 65d4e7fa1eb8df..8c852807ba1c10 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -679,7 +679,7 @@ void xen_free_ballooned_pages(unsigned int nr_pages, struct page **pages) } EXPORT_SYMBOL(xen_free_ballooned_pages); -static void __init balloon_add_regions(void) +static int __init balloon_add_regions(void) { unsigned long start_pfn, pages; unsigned long pfn, extra_pfn_end; @@ -702,26 +702,38 @@ static void __init balloon_add_regions(void) for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) balloon_append(pfn_to_page(pfn)); - balloon_stats.total_pages += extra_pfn_end - start_pfn; + /* + * Extra regions are accounted for in the physmap, but need + * decreasing from current_pages to balloon down the initial + * allocation, because they are already accounted for in + * total_pages. + */ + if (extra_pfn_end - start_pfn >= balloon_stats.current_pages) { + WARN(1, "Extra pages underflow current target"); + return -ERANGE; + } + balloon_stats.current_pages -= extra_pfn_end - start_pfn; } + + return 0; } static int __init balloon_init(void) { struct task_struct *task; + int rc; if (!xen_domain()) return -ENODEV; pr_info("Initialising balloon driver\n"); -#ifdef CONFIG_XEN_PV - balloon_stats.current_pages = xen_pv_domain() - ? min(xen_start_info->nr_pages - xen_released_pages, max_pfn) - : get_num_physpages(); -#else - balloon_stats.current_pages = get_num_physpages(); -#endif + if (xen_released_pages >= get_num_physpages()) { + WARN(1, "Released pages underflow current target"); + return -ERANGE; + } + + balloon_stats.current_pages = get_num_physpages() - xen_released_pages; balloon_stats.target_pages = balloon_stats.current_pages; balloon_stats.balloon_low = 0; balloon_stats.balloon_high = 0; @@ -738,7 +750,9 @@ static int __init balloon_init(void) register_sysctl_init("xen/balloon", balloon_table); #endif - balloon_add_regions(); + rc = balloon_add_regions(); + if (rc) + return rc; task = kthread_run(balloon_thread, NULL, "xen-balloon"); if (IS_ERR(task)) { From d8455a63f731b4f585acc4d49fd7ad78db63b3d0 Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Thu, 13 Mar 2025 16:55:26 +0800 Subject: [PATCH 0152/2924] platform/x86: intel_pmc_ipc: add option to build without ACPI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a configuration option that allows users to build the intel_pmc_ipc driver without ACPI support. This is useful for systems where ACPI is not available or desired. Based on the discussion from the patch [1], it was necessary to provide this option to accommodate specific use cases. Link: https://patchwork.kernel.org/project/netdevbpf/patch/20250227121522.1802832-6-yong.liang.choong@linux.intel.com/#26280764 [1] Signed-off-by: David E. Box Co-developed-by: Choong Yong Liang Signed-off-by: Choong Yong Liang Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250313085526.1439092-1-yong.liang.choong@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/platform_data/x86/intel_pmc_ipc.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/platform_data/x86/intel_pmc_ipc.h b/include/linux/platform_data/x86/intel_pmc_ipc.h index 6e603a8c075f90..1d34435b70016a 100644 --- a/include/linux/platform_data/x86/intel_pmc_ipc.h +++ b/include/linux/platform_data/x86/intel_pmc_ipc.h @@ -36,6 +36,7 @@ struct pmc_ipc_rbuf { */ static inline int intel_pmc_ipc(struct pmc_ipc_cmd *ipc_cmd, struct pmc_ipc_rbuf *rbuf) { +#ifdef CONFIG_ACPI struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object params[PMC_IPCS_PARAM_COUNT] = { {.type = ACPI_TYPE_INTEGER,}, @@ -89,6 +90,9 @@ static inline int intel_pmc_ipc(struct pmc_ipc_cmd *ipc_cmd, struct pmc_ipc_rbuf } return 0; +#else + return -ENODEV; +#endif /* CONFIG_ACPI */ } #endif /* INTEL_PMC_IPC_H */ From 6c44e5354d4d16d9d891a419ca3f57abfe18ce7a Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 1 Apr 2025 20:49:00 +0000 Subject: [PATCH 0153/2924] RAS/AMD/ATL: Include row[13] bit in row retirement Based on feedback from hardware folks, row[13] is part of the variable bits within a physical row (along with all column bits). Only half the physical addresses affected by a row are calculated if this bit is not included. Add the row[13] bit to the row retirement flow. Fixes: 3b566b30b414 ("RAS/AMD/ATL: Add MI300 row retirement support") Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250401-fix-fmpm-extra-records-v1-1-840bcf7a8ac5@amd.com --- drivers/ras/amd/atl/umc.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c index dc8aa12f63c811..cb8ace3d4e42d0 100644 --- a/drivers/ras/amd/atl/umc.c +++ b/drivers/ras/amd/atl/umc.c @@ -320,7 +320,7 @@ static unsigned long convert_dram_to_norm_addr_mi300(unsigned long addr) * See amd_atl::convert_dram_to_norm_addr_mi300() for MI300 address formats. */ #define MI300_NUM_COL BIT(HWEIGHT(MI300_UMC_MCA_COL)) -static void retire_row_mi300(struct atl_err *a_err) +static void _retire_row_mi300(struct atl_err *a_err) { unsigned long addr; struct page *p; @@ -351,6 +351,23 @@ static void retire_row_mi300(struct atl_err *a_err) } } +/* + * In addition to the column bits, the row[13] bit should also be included when + * calculating addresses affected by a physical row. + * + * Instead of running through another loop over a single bit, just run through + * the column bits twice and flip the row[13] bit in-between. + * + * See MI300_UMC_MCA_ROW for the row bits in MCA_ADDR_UMC value. + */ +#define MI300_UMC_MCA_ROW13 BIT(23) +static void retire_row_mi300(struct atl_err *a_err) +{ + _retire_row_mi300(a_err); + a_err->addr ^= MI300_UMC_MCA_ROW13; + _retire_row_mi300(a_err); +} + void amd_retire_dram_row(struct atl_err *a_err) { if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) From 00e53d0f4baedd72196b65f00698b2a5a537dc2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 5 Apr 2025 11:27:12 +0200 Subject: [PATCH 0154/2924] pwm: Let pwm_set_waveform() succeed even if lowlevel driver rounded up MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Waveform parameters are supposed to be rounded down to the next value possible for the hardware. However when a requested value is too small, .round_waveform_tohw() is supposed to pick the next bigger value and return 1. Let pwm_set_waveform() behave in the same way. This creates consistency between pwm_set_waveform_might_sleep() with exact=false and pwm_round_waveform_might_sleep() + pwm_set_waveform_might_sleep() with exact=true. The PWM_DEBUG rounding check has to be adapted to only trigger if no uprounding happend. Signed-off-by: Uwe Kleine-König Tested-by: Trevor Gamblin Link: https://lore.kernel.org/r/353dc6ae31be815e41fd3df89c257127ca0d1a09.1743844730.git.u.kleine-koenig@baylibre.com Signed-off-by: Uwe Kleine-König --- drivers/pwm/core.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index a40c511e009652..0387bd838487b1 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -322,7 +322,7 @@ static int __pwm_set_waveform(struct pwm_device *pwm, const struct pwm_ops *ops = chip->ops; char wfhw[WFHWSIZE]; struct pwm_waveform wf_rounded; - int err; + int err, ret_tohw; BUG_ON(WFHWSIZE < ops->sizeof_wfhw); @@ -332,16 +332,16 @@ static int __pwm_set_waveform(struct pwm_device *pwm, if (!pwm_wf_valid(wf)) return -EINVAL; - err = __pwm_round_waveform_tohw(chip, pwm, wf, &wfhw); - if (err) - return err; + ret_tohw = __pwm_round_waveform_tohw(chip, pwm, wf, &wfhw); + if (ret_tohw < 0) + return ret_tohw; if ((IS_ENABLED(CONFIG_PWM_DEBUG) || exact) && wf->period_length_ns) { err = __pwm_round_waveform_fromhw(chip, pwm, &wfhw, &wf_rounded); if (err) return err; - if (IS_ENABLED(CONFIG_PWM_DEBUG) && !pwm_check_rounding(wf, &wf_rounded)) + if (IS_ENABLED(CONFIG_PWM_DEBUG) && ret_tohw == 0 && !pwm_check_rounding(wf, &wf_rounded)) dev_err(&chip->dev, "Wrong rounding: requested %llu/%llu [+%llu], result %llu/%llu [+%llu]\n", wf->duty_length_ns, wf->period_length_ns, wf->duty_offset_ns, wf_rounded.duty_length_ns, wf_rounded.period_length_ns, wf_rounded.duty_offset_ns); @@ -382,7 +382,8 @@ static int __pwm_set_waveform(struct pwm_device *pwm, wf_rounded.duty_length_ns, wf_rounded.period_length_ns, wf_rounded.duty_offset_ns, wf_set.duty_length_ns, wf_set.period_length_ns, wf_set.duty_offset_ns); } - return 0; + + return ret_tohw; } /** From fda6e0034e9da64e1cec31f4539b6c7abd9ed8be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 5 Apr 2025 11:27:13 +0200 Subject: [PATCH 0155/2924] pwm: stm32: Search an appropriate duty_cycle if period cannot be modified MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If another channel is already enabled period must not be modified. If the requested period is smaller than this unchangable period the driver is still supposed to search a duty_cycle according to the usual rounding rules. So don't set the duty_cycle to 0 but continue to determine an appropriate value for ccr. Fixes: deaba9cff809 ("pwm: stm32: Implementation of the waveform callbacks") Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/f0c50df31daa3d6069bfa8d7fb3e71fae241b026.1743844730.git.u.kleine-koenig@baylibre.com Signed-off-by: Uwe Kleine-König --- drivers/pwm/pwm-stm32.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/pwm/pwm-stm32.c b/drivers/pwm/pwm-stm32.c index a59de4de18b6e9..ec2c05c9ee7a67 100644 --- a/drivers/pwm/pwm-stm32.c +++ b/drivers/pwm/pwm-stm32.c @@ -103,22 +103,16 @@ static int stm32_pwm_round_waveform_tohw(struct pwm_chip *chip, if (ret) goto out; - /* - * calculate the best value for ARR for the given PSC, refuse if - * the resulting period gets bigger than the requested one. - */ arr = mul_u64_u64_div_u64(wf->period_length_ns, rate, (u64)NSEC_PER_SEC * (wfhw->psc + 1)); if (arr <= wfhw->arr) { /* - * requested period is small than the currently + * requested period is smaller than the currently * configured and unchangable period, report back the smallest - * possible period, i.e. the current state; Initialize - * ccr to anything valid. + * possible period, i.e. the current state and return 1 + * to indicate the wrong rounding direction. */ - wfhw->ccr = 0; ret = 1; - goto out; } } else { From a85e08a05bf77d5d03b4ac0c59768a606a1b640b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 5 Apr 2025 11:27:16 +0200 Subject: [PATCH 0156/2924] pwm: axi-pwmgen: Let .round_waveform_tohw() signal when request was rounded up MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .round_waveform_tohw() is supposed to return 1 if the requested waveform cannot be implemented by rounding down all parameters. Also adapt the corresponding comment to better describe why the implemented procedure is right. Signed-off-by: Uwe Kleine-König Tested-by: Trevor Gamblin Link: https://lore.kernel.org/r/ba451573f0218d76645f068cec78bd97802cf010.1743844730.git.u.kleine-koenig@baylibre.com Signed-off-by: Uwe Kleine-König --- drivers/pwm/pwm-axi-pwmgen.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/pwm/pwm-axi-pwmgen.c b/drivers/pwm/pwm-axi-pwmgen.c index 4259a0db9ff458..4337c8f5acf055 100644 --- a/drivers/pwm/pwm-axi-pwmgen.c +++ b/drivers/pwm/pwm-axi-pwmgen.c @@ -75,6 +75,7 @@ static int axi_pwmgen_round_waveform_tohw(struct pwm_chip *chip, { struct axi_pwmgen_waveform *wfhw = _wfhw; struct axi_pwmgen_ddata *ddata = axi_pwmgen_ddata_from_chip(chip); + int ret = 0; if (wf->period_length_ns == 0) { *wfhw = (struct axi_pwmgen_waveform){ @@ -91,12 +92,15 @@ static int axi_pwmgen_round_waveform_tohw(struct pwm_chip *chip, if (wfhw->period_cnt == 0) { /* * The specified period is too short for the hardware. - * Let's round .duty_cycle down to 0 to get a (somewhat) - * valid result. + * So round up .period_cnt to 1 (i.e. the smallest + * possible period). With .duty_cycle and .duty_offset + * being less than or equal to .period, their rounded + * value must be 0. */ wfhw->period_cnt = 1; wfhw->duty_cycle_cnt = 0; wfhw->duty_offset_cnt = 0; + ret = 1; } else { wfhw->duty_cycle_cnt = min_t(u64, mul_u64_u32_div(wf->duty_length_ns, ddata->clk_rate_hz, NSEC_PER_SEC), @@ -111,7 +115,7 @@ static int axi_pwmgen_round_waveform_tohw(struct pwm_chip *chip, pwm->hwpwm, wf->duty_length_ns, wf->period_length_ns, wf->duty_offset_ns, ddata->clk_rate_hz, wfhw->period_cnt, wfhw->duty_cycle_cnt, wfhw->duty_offset_cnt); - return 0; + return ret; } static int axi_pwmgen_round_waveform_fromhw(struct pwm_chip *chip, struct pwm_device *pwm, From cfa5f336bdbde49cf0102ab55007b34361988fd1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 2 Apr 2025 13:17:54 +0100 Subject: [PATCH 0157/2924] devpts: Fix type for uid and gid params Fix devpts to parse uid and gid params using the correct type so that they get interpreted in the context of the user namespace. Fixes: cc0876f817d6 ("vfs: Convert devpts to use the new mount API") Reported-by: Debarshi Ray Closes: https://github.com/containers/podman/issues/25751 Signed-off-by: Giuseppe Scrivano Signed-off-by: David Howells Link: https://lore.kernel.org/r/759134.1743596274@warthog.procyon.org.uk cc: Eric Sandeen cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/devpts/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 42e4d6eeb29f5b..9c20d78e41f6b4 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -89,12 +89,12 @@ enum { }; static const struct fs_parameter_spec devpts_param_specs[] = { - fsparam_u32 ("gid", Opt_gid), + fsparam_gid ("gid", Opt_gid), fsparam_s32 ("max", Opt_max), fsparam_u32oct ("mode", Opt_mode), fsparam_flag ("newinstance", Opt_newinstance), fsparam_u32oct ("ptmxmode", Opt_ptmxmode), - fsparam_u32 ("uid", Opt_uid), + fsparam_uid ("uid", Opt_uid), {} }; From a94fd938df2b1628da66b498aa0eeb89593bc7a2 Mon Sep 17 00:00:00 2001 From: Xiangsheng Hou Date: Mon, 7 Apr 2025 19:50:49 +0800 Subject: [PATCH 0158/2924] virtiofs: add filesystem context source name check In certain scenarios, for example, during fuzz testing, the source name may be NULL, which could lead to a kernel panic. Therefore, an extra check for the source name should be added. Fixes: a62a8ef9d97d ("virtio-fs: add virtiofs filesystem") Cc: # all LTS kernels Signed-off-by: Xiangsheng Hou Link: https://lore.kernel.org/20250407115111.25535-1-xiangsheng.hou@mediatek.com Signed-off-by: Christian Brauner --- fs/fuse/virtio_fs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 2c7b24cb67adb2..53c2626e90e723 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -1669,6 +1669,9 @@ static int virtio_fs_get_tree(struct fs_context *fsc) unsigned int virtqueue_size; int err = -EIO; + if (!fsc->source) + return invalf(fsc, "No source specified"); + /* This gets a reference on virtio_fs object. This ptr gets installed * in fc->iq->priv. Once fuse_conn is going away, it calls ->put() * to drop the reference to this object. From 9b58440a5b2fe78102ce1e9e03946645558d0f55 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 5 Apr 2025 11:17:49 +0100 Subject: [PATCH 0159/2924] io_uring/zcrx: put refill data into separate cache line Refill queue lock and other bits are only used from the allocation path on the rx softirq side, but it shares the cache line with other fields like ctx that are used also in the "syscall" path, which causes cache bouncing when softirq runs on a different CPU. Separate them into different cache lines. The first one now contains constant fields used by both contextx, followed by a line responsible for refill queue data. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/6d1f598e27d623c07fc49d6baee13089a9b1216c.1743848241.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/zcrx.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h index 706cc7300780d6..b59c560d5d84a6 100644 --- a/io_uring/zcrx.h +++ b/io_uring/zcrx.h @@ -26,11 +26,11 @@ struct io_zcrx_ifq { struct io_ring_ctx *ctx; struct io_zcrx_area *area; + spinlock_t rq_lock ____cacheline_aligned_in_smp; struct io_uring *rq_ring; struct io_uring_zcrx_rqe *rqes; - u32 rq_entries; u32 cached_rq_head; - spinlock_t rq_lock; + u32 rq_entries; u32 if_rxq; struct device *dev; From 5a17131a5dbd0ebca655bfb65fe3fe643ccc27f3 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 5 Apr 2025 11:18:29 +0100 Subject: [PATCH 0160/2924] io_uring/zcrx: separate niov number from pages A preparation patch that separates the number of pages / folios from the number of niovs. They will not match in the future to support huge pages, improved dma mapping and/or larger chunk sizes. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/0780ac966ee84200385737f45bb0f2ada052392b.1743848231.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/zcrx.c | 19 ++++++++++--------- io_uring/zcrx.h | 1 + 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 80d4a6f71d2931..0f46e0404c0454 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -181,7 +181,7 @@ static void io_zcrx_free_area(struct io_zcrx_area *area) kvfree(area->nia.niovs); kvfree(area->user_refs); if (area->pages) { - unpin_user_pages(area->pages, area->nia.num_niovs); + unpin_user_pages(area->pages, area->nr_folios); kvfree(area->pages); } kfree(area); @@ -192,7 +192,7 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq, struct io_uring_zcrx_area_reg *area_reg) { struct io_zcrx_area *area; - int i, ret, nr_pages; + int i, ret, nr_pages, nr_iovs; struct iovec iov; if (area_reg->flags || area_reg->rq_area_token) @@ -220,27 +220,28 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq, area->pages = NULL; goto err; } - area->nia.num_niovs = nr_pages; + area->nr_folios = nr_iovs = nr_pages; + area->nia.num_niovs = nr_iovs; - area->nia.niovs = kvmalloc_array(nr_pages, sizeof(area->nia.niovs[0]), + area->nia.niovs = kvmalloc_array(nr_iovs, sizeof(area->nia.niovs[0]), GFP_KERNEL | __GFP_ZERO); if (!area->nia.niovs) goto err; - area->freelist = kvmalloc_array(nr_pages, sizeof(area->freelist[0]), + area->freelist = kvmalloc_array(nr_iovs, sizeof(area->freelist[0]), GFP_KERNEL | __GFP_ZERO); if (!area->freelist) goto err; - for (i = 0; i < nr_pages; i++) + for (i = 0; i < nr_iovs; i++) area->freelist[i] = i; - area->user_refs = kvmalloc_array(nr_pages, sizeof(area->user_refs[0]), + area->user_refs = kvmalloc_array(nr_iovs, sizeof(area->user_refs[0]), GFP_KERNEL | __GFP_ZERO); if (!area->user_refs) goto err; - for (i = 0; i < nr_pages; i++) { + for (i = 0; i < nr_iovs; i++) { struct net_iov *niov = &area->nia.niovs[i]; niov->owner = &area->nia; @@ -248,7 +249,7 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq, atomic_set(&area->user_refs[i], 0); } - area->free_count = nr_pages; + area->free_count = nr_iovs; area->ifq = ifq; /* we're only supporting one area per ifq for now */ area->area_id = 0; diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h index b59c560d5d84a6..47f1c0e8c197ca 100644 --- a/io_uring/zcrx.h +++ b/io_uring/zcrx.h @@ -15,6 +15,7 @@ struct io_zcrx_area { bool is_mapped; u16 area_id; struct page **pages; + unsigned long nr_folios; /* freelist */ spinlock_t freelist_lock ____cacheline_aligned_in_smp; From cf960726eb65e8d0bfecbcce6cf95f47b1ffa6cc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 7 Apr 2025 07:51:23 -0600 Subject: [PATCH 0161/2924] io_uring/kbuf: reject zero sized provided buffers This isn't fixing a real issue, but there's also zero point in going through group and buffer setup, when the buffers are going to be rejected once attempted to get used. Cc: stable@vger.kernel.org Reported-by: syzbot+58928048fd1416f1457c@syzkaller.appspotmail.com Signed-off-by: Jens Axboe --- io_uring/kbuf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 09810925967138..953d5e74256916 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -504,6 +504,8 @@ int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe p->nbufs = tmp; p->addr = READ_ONCE(sqe->addr); p->len = READ_ONCE(sqe->len); + if (!p->len) + return -EINVAL; if (check_mul_overflow((unsigned long)p->len, (unsigned long)p->nbufs, &size)) From 56a49e19e1aea1374e9ba58cfd40260587bb7355 Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Mon, 7 Apr 2025 08:19:26 +0000 Subject: [PATCH 0162/2924] cpufreq/amd-pstate: Fix min_limit perf and freq updation for performance governor The min_limit perf and freq values can get disconnected with performance governor, as we only modify the perf value in the special case. Fix that by modifying the perf and freq values together Fixes: 009d1c29a451 ("cpufreq/amd-pstate: Move perf values into a union") Signed-off-by: Dhananjay Ugwekar Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20250407081925.850473-1-dhananjay.ugwekar@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 6789eed1bb5ba0..c54c031939c830 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -607,13 +607,16 @@ static void amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) union perf_cached perf = READ_ONCE(cpudata->perf); perf.max_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->max); - perf.min_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->min); + WRITE_ONCE(cpudata->max_limit_freq, policy->max); - if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { perf.min_limit_perf = min(perf.nominal_perf, perf.max_limit_perf); + WRITE_ONCE(cpudata->min_limit_freq, min(cpudata->nominal_freq, cpudata->max_limit_freq)); + } else { + perf.min_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->min); + WRITE_ONCE(cpudata->min_limit_freq, policy->min); + } - WRITE_ONCE(cpudata->max_limit_freq, policy->max); - WRITE_ONCE(cpudata->min_limit_freq, policy->min); WRITE_ONCE(cpudata->perf, perf); } From 9546ad1a9bda7362492114f5866b95b0ac4a100e Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 3 Apr 2025 09:19:29 +0200 Subject: [PATCH 0163/2924] nvme: requeue namespace scan on missed AENs Scanning for namespaces can take some time, so if the target is reconfigured while the scan is running we may miss a Attached Namespace Attribute Changed AEN. Check if the NVME_AER_NOTICE_NS_CHANGED bit is set once the scan has finished, and requeue scanning to pick up any missed change. Signed-off-by: Hannes Reinecke Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index cc23035148b4ba..dc298de76de172 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4295,6 +4295,10 @@ static void nvme_scan_work(struct work_struct *work) nvme_scan_ns_sequential(ctrl); } mutex_unlock(&ctrl->scan_lock); + + /* Requeue if we have missed AENs */ + if (test_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) + nvme_queue_scan(ctrl); } /* From f35508b93a2fc127a8d185da2e5beade2f789977 Mon Sep 17 00:00:00 2001 From: Stanimir Varbanov Date: Mon, 7 Apr 2025 15:59:18 +0300 Subject: [PATCH 0164/2924] irqchip/irq-bcm2712-mip: Set EOI/ACK flags in msi_parent_ops The recently introduced msi_parent_ops::chip_flags sets irq_eoi()/irq_ack() conditionally, but MIP driver has not been updated. Populate chip_flags with EOI | ACK flags. Fixes: 32c6c054661a ("irqchip: Add Broadcom BCM2712 MSI-X interrupt controller") Signed-off-by: Stanimir Varbanov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250407125918.3021454-1-svarbanov@suse.de --- drivers/irqchip/irq-bcm2712-mip.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/irq-bcm2712-mip.c b/drivers/irqchip/irq-bcm2712-mip.c index 49a19db2d1e1b3..4cce24233f0f34 100644 --- a/drivers/irqchip/irq-bcm2712-mip.c +++ b/drivers/irqchip/irq-bcm2712-mip.c @@ -163,6 +163,7 @@ static const struct irq_domain_ops mip_middle_domain_ops = { static const struct msi_parent_ops mip_msi_parent_ops = { .supported_flags = MIP_MSI_FLAGS_SUPPORTED, .required_flags = MIP_MSI_FLAGS_REQUIRED, + .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, .bus_select_token = DOMAIN_BUS_GENERIC_MSI, .bus_select_mask = MATCH_PCI_MSI, .prefix = "MIP-MSI-", From 1296dcbad2316882f98559762b518eaa6aefcd92 Mon Sep 17 00:00:00 2001 From: Mubin Sayyed Date: Thu, 3 Apr 2025 11:38:36 +0530 Subject: [PATCH 0165/2924] dt-bindings: xilinx: Remove myself from maintainership As I am leaving AMD and will no longer be maintaining these platform drivers, so removing myself from maintainership. Signed-off-by: Mubin Sayyed Acked-by: Damien Le Moal Acked-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20250403060836.2602361-1-mubin.sayyed@amd.com Signed-off-by: Rob Herring (Arm) --- Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml | 1 - .../devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml | 1 - Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml | 1 - Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml | 1 - Documentation/devicetree/bindings/usb/microchip,usb5744.yaml | 1 - Documentation/devicetree/bindings/usb/xlnx,usb2.yaml | 1 - 6 files changed, 6 deletions(-) diff --git a/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml b/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml index 6ad78429dc7467..c92341888a2880 100644 --- a/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml +++ b/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml @@ -7,7 +7,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Ceva AHCI SATA Controller maintainers: - - Mubin Sayyed - Radhey Shyam Pandey description: | diff --git a/Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml b/Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml index bb93baa888794b..e13e9d6dd148ae 100644 --- a/Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml +++ b/Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml @@ -12,7 +12,6 @@ description: PS_MODE). Every pin can be configured as input/output. maintainers: - - Mubin Sayyed - Radhey Shyam Pandey properties: diff --git a/Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml b/Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml index 1f1b42dde94d50..1db85fc9966f13 100644 --- a/Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml +++ b/Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml @@ -7,7 +7,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Zynq UltraScale+ MPSoC and Versal reset maintainers: - - Mubin Sayyed - Radhey Shyam Pandey description: | diff --git a/Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml b/Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml index b5843f4d17d879..379dacacb52681 100644 --- a/Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml +++ b/Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml @@ -7,7 +7,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Xilinx SuperSpeed DWC3 USB SoC controller maintainers: - - Mubin Sayyed - Radhey Shyam Pandey properties: diff --git a/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml b/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml index e2a72deae77601..c68c04da339982 100644 --- a/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml +++ b/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml @@ -17,7 +17,6 @@ description: maintainers: - Michal Simek - - Mubin Sayyed - Radhey Shyam Pandey properties: diff --git a/Documentation/devicetree/bindings/usb/xlnx,usb2.yaml b/Documentation/devicetree/bindings/usb/xlnx,usb2.yaml index a7f75fe366652b..f295aa9d9ee79f 100644 --- a/Documentation/devicetree/bindings/usb/xlnx,usb2.yaml +++ b/Documentation/devicetree/bindings/usb/xlnx,usb2.yaml @@ -7,7 +7,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Xilinx udc controller maintainers: - - Mubin Sayyed - Radhey Shyam Pandey properties: From 8b37357a78d7fa13d88ea822b35b40137da1c85e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Van=C4=9Bk?= Date: Mon, 7 Apr 2025 15:24:27 +0200 Subject: [PATCH 0166/2924] x86/acpi: Don't limit CPUs to 1 for Xen PV guests due to disabled ACPI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Xen disables ACPI for PV guests in DomU, which causes acpi_mps_check() to return 1 when CONFIG_X86_MPPARSE is not set. As a result, the local APIC is disabled and the guest is later limited to a single vCPU, despite being configured with more. This regression was introduced in version 6.9 in commit 7c0edad3643f ("x86/cpu/topology: Rework possible CPU management"), which added an early check that limits CPUs to 1 if apic_is_disabled. Update the acpi_mps_check() logic to return 0 early when running as a Xen PV guest in DomU, preventing APIC from being disabled in this specific case and restoring correct multi-vCPU behaviour. Fixes: 7c0edad3643f ("x86/cpu/topology: Rework possible CPU management") Signed-off-by: Petr Vaněk Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250407132445.6732-2-arkamar@atlas.cz --- arch/x86/kernel/acpi/boot.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index dae6a73be40e1c..9fa321a95eb33f 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -23,6 +23,8 @@ #include #include +#include + #include #include #include @@ -1729,6 +1731,15 @@ int __init acpi_mps_check(void) { #if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_X86_MPPARSE) /* mptable code is not built-in*/ + + /* + * Xen disables ACPI in PV DomU guests but it still emulates APIC and + * supports SMP. Returning early here ensures that APIC is not disabled + * unnecessarily and the guest is not limited to a single vCPU. + */ + if (xen_pv_domain() && !xen_initial_domain()) + return 0; + if (acpi_disabled || acpi_noirq) { pr_warn("MPS support code is not built-in, using acpi=off or acpi=noirq or pci=noacpi may have problem\n"); return 1; From 649b50a82f09fa44c2f7a65618e4584072145ab7 Mon Sep 17 00:00:00 2001 From: Ruslan Piasetskyi Date: Wed, 26 Mar 2025 23:06:38 +0100 Subject: [PATCH 0167/2924] mmc: renesas_sdhi: Fix error handling in renesas_sdhi_probe After moving tmio_mmc_host_probe down, error handling has to be adjusted. Fixes: 74f45de394d9 ("mmc: renesas_sdhi: register irqs before registering controller") Reviewed-by: Ihar Salauyou Signed-off-by: Ruslan Piasetskyi Reviewed-by: Geert Uytterhoeven Reviewed-by: Wolfram Sang Tested-by: Wolfram Sang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250326220638.460083-1-ruslan.piasetskyi@gmail.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_core.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index fa6526be36381d..cea6af5daf9932 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -1243,26 +1243,26 @@ int renesas_sdhi_probe(struct platform_device *pdev, num_irqs = platform_irq_count(pdev); if (num_irqs < 0) { ret = num_irqs; - goto eirq; + goto edisclk; } /* There must be at least one IRQ source */ if (!num_irqs) { ret = -ENXIO; - goto eirq; + goto edisclk; } for (i = 0; i < num_irqs; i++) { irq = platform_get_irq(pdev, i); if (irq < 0) { ret = irq; - goto eirq; + goto edisclk; } ret = devm_request_irq(&pdev->dev, irq, tmio_mmc_irq, 0, dev_name(&pdev->dev), host); if (ret) - goto eirq; + goto edisclk; } ret = tmio_mmc_host_probe(host); @@ -1274,8 +1274,6 @@ int renesas_sdhi_probe(struct platform_device *pdev, return ret; -eirq: - tmio_mmc_host_remove(host); edisclk: renesas_sdhi_clk_disable(host); efree: From 9078f01fec1275a1974a01a64a5a495d72898c60 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 29 Mar 2025 17:41:26 +0100 Subject: [PATCH 0168/2924] mmc: renesas_sdhi: add regulator dependency The driver started using the regulator subsystem and fails to build without a dependeny on CONFIG_REGULATOR: ERROR: modpost: "rdev_get_drvdata" [drivers/mmc/host/renesas_sdhi_core.ko] undefined! ERROR: modpost: "devm_regulator_register" [drivers/mmc/host/renesas_sdhi_core.ko] undefined! The 'select RESET_CONTROLLER' needs to either go away or get changed to a dependency in order to avoid Kconfig dependency loops here. It also turns out the the superh version needs neither RESET_CONTROLLER nor REGULATOR, and this works because CONFIG_OF is not set there. Change both to a 'depends on', but add '|| !OF' for the superh case. Fixes: fae80a99dc03 ("mmc: renesas_sdhi: Add support for RZ/G3E SoC") Tested-by: Biju Das Signed-off-by: Arnd Bergmann Reviewed-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20250329164145.3194284-1-arnd@kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 6824131b69b188..264e11fa58eafb 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -691,8 +691,8 @@ config MMC_TMIO_CORE config MMC_SDHI tristate "Renesas SDHI SD/SDIO controller support" depends on SUPERH || ARCH_RENESAS || COMPILE_TEST + depends on (RESET_CONTROLLER && REGULATOR) || !OF select MMC_TMIO_CORE - select RESET_CONTROLLER if ARCH_RENESAS help This provides support for the SDHI SD/SDIO controller found in Renesas SuperH, ARM and ARM64 based SoCs From 77183db6b8dbd8c352816030b328dd55993dc330 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 31 Mar 2025 00:17:32 +0200 Subject: [PATCH 0169/2924] mmc: renesas_sdhi: disable clocks if registering regulator failed Because the clocks were just enabled, bail out to the proper target if there are problems with the regulator. Fixes: fae80a99dc03 ("mmc: renesas_sdhi: Add support for RZ/G3E SoC") Signed-off-by: Wolfram Sang Reviewed-by: Biju Das Tested-by: Biju Das Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20250330221732.56072-2-wsa+renesas@sang-engineering.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index cea6af5daf9932..8c83e203c51670 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -1179,7 +1179,7 @@ int renesas_sdhi_probe(struct platform_device *pdev, if (IS_ERR(rdev)) { dev_err(dev, "regulator register failed err=%ld", PTR_ERR(rdev)); ret = PTR_ERR(rdev); - goto efree; + goto edisclk; } priv->rdev = rdev; } From 4808595a9922e89726ec5611d7749b63966b7fa8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 7 Apr 2025 12:01:11 -0400 Subject: [PATCH 0170/2924] tracing: Hide get_vm_area() from MMUless builds The function get_vm_area() is not defined for non-MMU builds and causes a build error if it is used. Hide the map_pages() function around a: #ifdef CONFIG_MMU to keep it from being compiled when CONFIG_MMU is not set. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250407120111.2ccc9319@gandalf.local.home Reported-by: Guenter Roeck Tested-by: Guenter Roeck Closes: https://lore.kernel.org/all/4f8ece8b-8862-4f7c-8ede-febd28f8a9fe@roeck-us.net/ Fixes: 394f3f02de531 ("tracing: Use vmap_page_range() to map memmap ring buffer") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b581e388a9d9f7..8ddf6b17215caa 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -9806,6 +9806,7 @@ static int instance_mkdir(const char *name) return ret; } +#ifdef CONFIG_MMU static u64 map_pages(unsigned long start, unsigned long size) { unsigned long vmap_start, vmap_end; @@ -9828,6 +9829,12 @@ static u64 map_pages(unsigned long start, unsigned long size) return (u64)vmap_start; } +#else +static inline u64 map_pages(unsigned long start, unsigned long size) +{ + return 0; +} +#endif /** * trace_array_get_by_name - Create/Lookup a trace array, given its name. From f2f29da9f0d4367f6ff35e0d9d021257bb53e273 Mon Sep 17 00:00:00 2001 From: Myrrh Periwinkle Date: Sun, 6 Apr 2025 11:45:22 +0700 Subject: [PATCH 0171/2924] x86/e820: Fix handling of subpage regions when calculating nosave ranges in e820__register_nosave_regions() While debugging kexec/hibernation hangs and crashes, it turned out that the current implementation of e820__register_nosave_regions() suffers from multiple serious issues: - The end of last region is tracked by PFN, causing it to find holes that aren't there if two consecutive subpage regions are present - The nosave PFN ranges derived from holes are rounded out (instead of rounded in) which makes it inconsistent with how explicitly reserved regions are handled Fix this by: - Treating reserved regions as if they were holes, to ensure consistent handling (rounding out nosave PFN ranges is more correct as the kernel does not use partial pages) - Tracking the end of the last RAM region by address instead of pages to detect holes more precisely These bugs appear to have been introduced about ~18 years ago with the very first version of e820_mark_nosave_regions(), and its flawed assumptions were carried forward uninterrupted through various waves of rewrites and renames. [ mingo: Added Git archeology details, for kicks and giggles. ] Fixes: e8eff5ac294e ("[PATCH] Make swsusp avoid memory holes and reserved memory regions on x86_64") Reported-by: Roberto Ricci Tested-by: Roberto Ricci Signed-off-by: Myrrh Periwinkle Signed-off-by: Ingo Molnar Cc: Rafael J. Wysocki Cc: Ard Biesheuvel Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: David Woodhouse Cc: Len Brown Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250406-fix-e820-nosave-v3-1-f3787bc1ee1d@qtmlabs.xyz Closes: https://lore.kernel.org/all/Z4WFjBVHpndct7br@desktop0a/ --- arch/x86/kernel/e820.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 57120f0749cc3c..9d8dd8deb2a702 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -753,22 +753,21 @@ void __init e820__memory_setup_extended(u64 phys_addr, u32 data_len) void __init e820__register_nosave_regions(unsigned long limit_pfn) { int i; - unsigned long pfn = 0; + u64 last_addr = 0; for (i = 0; i < e820_table->nr_entries; i++) { struct e820_entry *entry = &e820_table->entries[i]; - if (pfn < PFN_UP(entry->addr)) - register_nosave_region(pfn, PFN_UP(entry->addr)); - - pfn = PFN_DOWN(entry->addr + entry->size); - if (entry->type != E820_TYPE_RAM) - register_nosave_region(PFN_UP(entry->addr), pfn); + continue; - if (pfn >= limit_pfn) - break; + if (last_addr < entry->addr) + register_nosave_region(PFN_DOWN(last_addr), PFN_UP(entry->addr)); + + last_addr = entry->addr + entry->size; } + + register_nosave_region(PFN_DOWN(last_addr), limit_pfn); } #ifdef CONFIG_ACPI From bb5e07cb927724e0b47be371fa081141cfb14414 Mon Sep 17 00:00:00 2001 From: Vasiliy Kovalev Date: Sat, 19 Oct 2024 22:13:03 +0300 Subject: [PATCH 0172/2924] hfs/hfsplus: fix slab-out-of-bounds in hfs_bnode_read_key Syzbot reported an issue in hfs subsystem: BUG: KASAN: slab-out-of-bounds in memcpy_from_page include/linux/highmem.h:423 [inline] BUG: KASAN: slab-out-of-bounds in hfs_bnode_read fs/hfs/bnode.c:35 [inline] BUG: KASAN: slab-out-of-bounds in hfs_bnode_read_key+0x314/0x450 fs/hfs/bnode.c:70 Write of size 94 at addr ffff8880123cd100 by task syz-executor237/5102 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:377 [inline] print_report+0x169/0x550 mm/kasan/report.c:488 kasan_report+0x143/0x180 mm/kasan/report.c:601 kasan_check_range+0x282/0x290 mm/kasan/generic.c:189 __asan_memcpy+0x40/0x70 mm/kasan/shadow.c:106 memcpy_from_page include/linux/highmem.h:423 [inline] hfs_bnode_read fs/hfs/bnode.c:35 [inline] hfs_bnode_read_key+0x314/0x450 fs/hfs/bnode.c:70 hfs_brec_insert+0x7f3/0xbd0 fs/hfs/brec.c:159 hfs_cat_create+0x41d/0xa50 fs/hfs/catalog.c:118 hfs_mkdir+0x6c/0xe0 fs/hfs/dir.c:232 vfs_mkdir+0x2f9/0x4f0 fs/namei.c:4257 do_mkdirat+0x264/0x3a0 fs/namei.c:4280 __do_sys_mkdir fs/namei.c:4300 [inline] __se_sys_mkdir fs/namei.c:4298 [inline] __x64_sys_mkdir+0x6c/0x80 fs/namei.c:4298 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7fbdd6057a99 Add a check for key length in hfs_bnode_read_key to prevent out-of-bounds memory access. If the key length is invalid, the key buffer is cleared, improving stability and reliability. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+5f3a973ed3dfb85a6683@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=5f3a973ed3dfb85a6683 Cc: stable@vger.kernel.org Signed-off-by: Vasiliy Kovalev Link: https://lore.kernel.org/20241019191303.24048-1-kovalev@altlinux.org Reviewed-by: Cengiz Can Signed-off-by: Christian Brauner --- fs/hfs/bnode.c | 6 ++++++ fs/hfsplus/bnode.c | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c index 6add6ebfef8967..cb823a8a6ba960 100644 --- a/fs/hfs/bnode.c +++ b/fs/hfs/bnode.c @@ -67,6 +67,12 @@ void hfs_bnode_read_key(struct hfs_bnode *node, void *key, int off) else key_len = tree->max_key_len + 1; + if (key_len > sizeof(hfs_btree_key) || key_len < 1) { + memset(key, 0, sizeof(hfs_btree_key)); + pr_err("hfs: Invalid key length: %d\n", key_len); + return; + } + hfs_bnode_read(node, key, off, key_len); } diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c index 87974d5e679156..079ea80534f7de 100644 --- a/fs/hfsplus/bnode.c +++ b/fs/hfsplus/bnode.c @@ -67,6 +67,12 @@ void hfs_bnode_read_key(struct hfs_bnode *node, void *key, int off) else key_len = tree->max_key_len + 2; + if (key_len > sizeof(hfsplus_btree_key) || key_len < 1) { + memset(key, 0, sizeof(hfsplus_btree_key)); + pr_err("hfsplus: Invalid key length: %d\n", key_len); + return; + } + hfs_bnode_read(node, key, off, key_len); } From a8605b0ed187f53f077a769ce2b52ddb97f3eb42 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 4 Apr 2025 09:50:22 -0500 Subject: [PATCH 0173/2924] ACPI: button: Only send `KEY_POWER` for `ACPI_BUTTON_NOTIFY_STATUS` Commit a7e23ec17feec ("ACPI: button: Install notifier for system events as well") modified the ACPI button behavior to send `ACPI_BUTTON_NOTIFY_WAKE` events. This caused a regression on Dell Optiplex 3040 sending `KEY_POWER` randomly at runtime. Adjust logic so that the `ACPI_BUTTON_NOTIFY_WAKE` event will never send `KEY_POWER`. Fixes: a7e23ec17feec ("ACPI: button: Install notifier for system events as well") Reported-by: Ian Laurie Closes: https://lore.kernel.org/linux-acpi/CAJZ5v0hbA6bqxHupTh4NZR-GVSb9M5RL7JSb2yQgvYYJg+z2aQ@mail.gmail.com/T/#md8071e480212201f23e4929607386750d3b6bc13 Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2357044 Signed-off-by: Mario Limonciello Tested-by: Ian Laurie Link: https://patch.msgid.link/20250404145034.2608574-1-superm1@kernel.org [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/button.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index 90b09840536dde..0a702604018825 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -458,7 +458,7 @@ static void acpi_button_notify(acpi_handle handle, u32 event, void *data) acpi_pm_wakeup_event(&device->dev); button = acpi_driver_data(device); - if (button->suspended) + if (button->suspended || event == ACPI_BUTTON_NOTIFY_WAKE) return; input = button->input; From 6b395d31146a3fae775823ea8570a37b922f6685 Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Mon, 24 Mar 2025 09:39:35 +0530 Subject: [PATCH 0174/2924] RDMA/bnxt_re: Fix budget handling of notification queue The cited commit in Fixes tag introduced a bug which can cause hang of completion queue processing because of notification queue budget goes to zero. Found while doing nfs over rdma mount and umount. Below message is noticed because of the existing bug. kernel: cm_destroy_id_wait_timeout: cm_id=00000000ff6c6cc6 timed out. state 11 -> 0, refcnt=1 Fix to handle this issue - Driver will not change nq->budget upon create and destroy of cq and srq rdma resources. Fixes: cb97b377a135 ("RDMA/bnxt_re: Refurbish CQ to NQ hash calculation") Link: https://patch.msgid.link/r/20250324040935.90182-1-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Kashyap Desai Signed-off-by: Kalesh AP Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 9082b3fd2b4729..e14b05cd089ab0 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1785,8 +1785,6 @@ int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) bnxt_qplib_destroy_srq(&rdev->qplib_res, qplib_srq); ib_umem_release(srq->umem); atomic_dec(&rdev->stats.res.srq_count); - if (nq) - nq->budget--; return 0; } @@ -1908,8 +1906,6 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, goto fail; } } - if (nq) - nq->budget++; active_srqs = atomic_inc_return(&rdev->stats.res.srq_count); if (active_srqs > rdev->stats.res.srq_watermark) rdev->stats.res.srq_watermark = active_srqs; @@ -3079,7 +3075,6 @@ int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) ib_umem_release(cq->umem); atomic_dec(&rdev->stats.res.cq_count); - nq->budget--; kfree(cq->cql); return 0; } From 62dd71e691109bb44ba8ad7f58b3a2ac6b69d496 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 Mar 2025 16:57:15 +0100 Subject: [PATCH 0175/2924] RDMA/ucaps: Avoid format-security warning Passing a non-literal format string to dev_set_name causes a warning: drivers/infiniband/core/ucaps.c:173:33: error: format string is not a string literal (potentially insecure) [-Werror,-Wformat-security] 173 | ret = dev_set_name(&ucap->dev, ucap_names[type]); | ^~~~~~~~~~~~~~~~ drivers/infiniband/core/ucaps.c:173:33: note: treat the string as an argument to avoid this 173 | ret = dev_set_name(&ucap->dev, ucap_names[type]); | ^ | "%s", Turn the name into the %s argument as suggested by gcc. Fixes: 61e51682816d ("RDMA/uverbs: Introduce UCAP (User CAPabilities) API") Link: https://patch.msgid.link/r/20250314155721.264083-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Reviewed-by: Zhu Yanjun Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/ucaps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/ucaps.c b/drivers/infiniband/core/ucaps.c index 6853c6d078f91e..de5cb8bf0a6132 100644 --- a/drivers/infiniband/core/ucaps.c +++ b/drivers/infiniband/core/ucaps.c @@ -170,7 +170,7 @@ int ib_create_ucap(enum rdma_user_cap type) ucap->dev.class = &ucaps_class; ucap->dev.devt = MKDEV(MAJOR(ucaps_base_dev), type); ucap->dev.release = ucap_dev_release; - ret = dev_set_name(&ucap->dev, ucap_names[type]); + ret = dev_set_name(&ucap->dev, "%s", ucap_names[type]); if (ret) goto err_device; From b988685388effd648150aab272533f833a2a70f0 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 1 Apr 2025 08:38:51 -0500 Subject: [PATCH 0176/2924] ACPI: EC: Set ec_no_wakeup for Lenovo Go S When AC adapter is unplugged or plugged in EC wakes from HW sleep but APU doesn't enter back into HW sleep. The reason this happens is that, when the APU exits HW sleep, the power rails controlled by the EC will power up the TCON. The TCON has a GPIO that will be toggled at this time. The GPIO is not marked as a wakeup source, but the GPIO controller still has an unserviced interrupt. Unserviced interrupts will block entering HW sleep again. Clearing the GPIO doesn't help as the TCON continues to assert it until it's been initialized by i2c-hid. Fixing this would require TCON F/W changes and it's already broken in the wild on production hardware. To avoid triggering this issue add a quirk to avoid letting EC wake up system at all. The power button still works properly on this system. Reported-by: Antheas Kapenekakis Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3929 Link: https://github.com/bazzite-org/patchwork/commit/95b93b2852718ee1e808c72e6b1836da4a95fc63 Co-developed-by: Antheas Kapenekakis Signed-off-by: Antheas Kapenekakis Signed-off-by: Mario Limonciello Link: https://patch.msgid.link/20250401133858.1892077-1-superm1@kernel.org [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 8db09d81918fbb..3c5f34892734e9 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -2301,6 +2301,34 @@ static const struct dmi_system_id acpi_ec_no_wakeup[] = { DMI_MATCH(DMI_PRODUCT_FAMILY, "103C_5336AN HP ZHAN 66 Pro"), }, }, + /* + * Lenovo Legion Go S; touchscreen blocks HW sleep when woken up from EC + * https://gitlab.freedesktop.org/drm/amd/-/issues/3929 + */ + { + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83L3"), + } + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83N6"), + } + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83Q2"), + } + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83Q3"), + } + }, { }, }; From 216a61d33c0728a8cf1650aaed2c523c6ce16354 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 4 Apr 2025 12:21:25 +0000 Subject: [PATCH 0177/2924] net: ethtool: fix ethtool_ringparam_get_cfg() returns a hds_thresh value always as 0. When hds-thresh is configured, ethnl_set_rings() is called, and it calls ethtool_ringparam_get_cfg() to get ringparameters from .get_ringparam() callback and dev->cfg. Both hds_config and hds_thresh values should be set from dev->cfg, not from .get_ringparam(). But ethtool_ringparam_get_cfg() sets only hds_config from dev->cfg. So, ethtool_ringparam_get_cfg() returns always a hds_thresh as 0. If an input value of hds-thresh is 0, a hds_thresh value from ethtool_ringparam_get_cfg() are same. So ethnl_set_rings() does nothing and returns immediately. It causes a bug that setting a hds-thresh value to 0 is not working. Reproducer: modprobe netdevsim echo 1 > /sys/bus/netdevsim/new_device ethtool -G eth0 hds-thresh 100 ethtool -G eth0 hds-thresh 0 ethtool -g eth0 #hds-thresh value should be 0, but it shows 100. The tools/testing/selftests/drivers/net/hds.py can test it too with applying a following patch for hds.py. Fixes: 928459bbda19 ("net: ethtool: populate the default HDS params in the core") Signed-off-by: Taehee Yoo Link: https://patch.msgid.link/20250404122126.1555648-2-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- net/ethtool/common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 0cb6da1f692a0a..49bea6b45bd5c1 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -830,6 +830,7 @@ void ethtool_ringparam_get_cfg(struct net_device *dev, /* Driver gives us current state, we want to return current config */ kparam->tcp_data_split = dev->cfg->hds_config; + kparam->hds_thresh = dev->cfg->hds_thresh; } static void ethtool_init_tsinfo(struct kernel_ethtool_ts_info *info) From 22d3a63d5321326cb05a6dff7d2c488236cf56f2 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 4 Apr 2025 12:21:26 +0000 Subject: [PATCH 0178/2924] selftests: drv-net: test random value for hds-thresh hds.py has been testing 0(set_hds_thresh_zero()), MAX(set_hds_thresh_max()), GT(set_hds_thresh_gt()) values for hds-thresh. However if a hds-thresh value was already 0, set_hds_thresh_zero() can't test properly. So, it tests random value first and then tests 0, MAX, GT values. Testing bnxt: TAP version 13 1..13 ok 1 hds.get_hds ok 2 hds.get_hds_thresh ok 3 hds.set_hds_disable # SKIP disabling of HDS not supported by the device ok 4 hds.set_hds_enable ok 5 hds.set_hds_thresh_random ok 6 hds.set_hds_thresh_zero ok 7 hds.set_hds_thresh_max ok 8 hds.set_hds_thresh_gt ok 9 hds.set_xdp ok 10 hds.enabled_set_xdp ok 11 hds.ioctl ok 12 hds.ioctl_set_xdp ok 13 hds.ioctl_enabled_set_xdp # Totals: pass:12 fail:0 xfail:0 xpass:0 skip:1 error:0 Testing lo: TAP version 13 1..13 ok 1 hds.get_hds # SKIP tcp-data-split not supported by device ok 2 hds.get_hds_thresh # SKIP hds-thresh not supported by device ok 3 hds.set_hds_disable # SKIP ring-set not supported by the device ok 4 hds.set_hds_enable # SKIP ring-set not supported by the device ok 5 hds.set_hds_thresh_random # SKIP hds-thresh not supported by device ok 6 hds.set_hds_thresh_zero # SKIP ring-set not supported by the device ok 7 hds.set_hds_thresh_max # SKIP hds-thresh not supported by device ok 8 hds.set_hds_thresh_gt # SKIP hds-thresh not supported by device ok 9 hds.set_xdp # SKIP tcp-data-split not supported by device ok 10 hds.enabled_set_xdp # SKIP tcp-data-split not supported by device ok 11 hds.ioctl # SKIP tcp-data-split not supported by device ok 12 hds.ioctl_set_xdp # SKIP tcp-data-split not supported by device ok 13 hds.ioctl_enabled_set_xdp # SKIP tcp-data-split not supported by device # Totals: pass:0 fail:0 xfail:0 xpass:0 skip:13 error:0 Signed-off-by: Taehee Yoo Link: https://patch.msgid.link/20250404122126.1555648-3-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hds.py | 33 +++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py index 8b7f6acad15f13..7c90a040ce45ad 100755 --- a/tools/testing/selftests/drivers/net/hds.py +++ b/tools/testing/selftests/drivers/net/hds.py @@ -6,7 +6,7 @@ from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx from lib.py import CmdExitFailure, EthtoolFamily, NlError from lib.py import NetDrvEnv -from lib.py import defer, ethtool, ip +from lib.py import defer, ethtool, ip, random def _get_hds_mode(cfg, netnl) -> str: @@ -109,6 +109,36 @@ def set_hds_thresh_zero(cfg, netnl) -> None: ksft_eq(0, rings['hds-thresh']) +def set_hds_thresh_random(cfg, netnl) -> None: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'hds-thresh' not in rings: + raise KsftSkipEx('hds-thresh not supported by device') + if 'hds-thresh-max' not in rings: + raise KsftSkipEx('hds-thresh-max not defined by device') + + if rings['hds-thresh-max'] < 2: + raise KsftSkipEx('hds-thresh-max is too small') + elif rings['hds-thresh-max'] == 2: + hds_thresh = 1 + else: + while True: + hds_thresh = random.randint(1, rings['hds-thresh-max'] - 1) + if hds_thresh != rings['hds-thresh']: + break + + try: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_thresh}) + except NlError as e: + if e.error == errno.EINVAL: + raise KsftSkipEx("hds-thresh-set not supported by the device") + elif e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("ring-set not supported by the device") + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + ksft_eq(hds_thresh, rings['hds-thresh']) + def set_hds_thresh_max(cfg, netnl) -> None: try: rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) @@ -243,6 +273,7 @@ def main() -> None: get_hds_thresh, set_hds_disable, set_hds_enable, + set_hds_thresh_random, set_hds_thresh_zero, set_hds_thresh_max, set_hds_thresh_gt, From 54f5fafcced113c7d203f6848f4f14840e74e9d1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 5 Apr 2025 20:57:51 -0700 Subject: [PATCH 0179/2924] ipv6: Fix null-ptr-deref in addrconf_add_ifaddr(). The cited commit placed netdev_lock_ops() just after __dev_get_by_index() in addrconf_add_ifaddr(), where dev could be NULL as reported. [0] Let's call netdev_lock_ops() only when dev is not NULL. [0]: Oops: general protection fault, probably for non-canonical address 0xdffffc0000000198: 0000 [#1] SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000cc0-0x0000000000000cc7] CPU: 3 UID: 0 PID: 12032 Comm: syz.0.15 Not tainted 6.14.0-13408-g9f867ba24d36 #1 PREEMPT(full) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 RIP: 0010:addrconf_add_ifaddr (./include/net/netdev_lock.h:30 ./include/net/netdev_lock.h:41 net/ipv6/addrconf.c:3157) Code: 8b b4 24 94 00 00 00 4c 89 ef e8 7e 4c 2f ff 4c 8d b0 c5 0c 00 00 48 89 c3 48 b8 00 00 00 00 00 fc ff df 4c 89 f2 48 c1 ea 03 <0f> b6 04 02 4c 89 f2 83 e2 07 38 d0 7f 08 80 RSP: 0018:ffffc90015b0faa0 EFLAGS: 00010213 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000198 RSI: ffffffff893162f2 RDI: ffff888078cb0338 RBP: ffffc90015b0fbb0 R08: 0000000000000000 R09: fffffbfff20cbbe2 R10: ffffc90015b0faa0 R11: 0000000000000000 R12: 1ffff92002b61f54 R13: ffff888078cb0000 R14: 0000000000000cc5 R15: ffff888078cb0000 FS: 00007f92559ed640(0000) GS:ffff8882a8659000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f92559ecfc8 CR3: 000000001c39e000 CR4: 00000000000006f0 Call Trace: inet6_ioctl (net/ipv6/af_inet6.c:580) sock_do_ioctl (net/socket.c:1196) sock_ioctl (net/socket.c:1314) __x64_sys_ioctl (fs/ioctl.c:52 fs/ioctl.c:906 fs/ioctl.c:892 fs/ioctl.c:892) do_syscall_64 (arch/x86/entry/syscall_64.c:63 arch/x86/entry/syscall_64.c:94) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130 RIP: 0033:0x7f9254b9c62d Code: 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff f8 RSP: 002b:00007f92559ecf98 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007f9254d65f80 RCX: 00007f9254b9c62d RDX: 0000000020000040 RSI: 0000000000008916 RDI: 0000000000000003 RBP: 00007f9254c264d3 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 00007f9254d65f80 R15: 00007f92559cd000 Modules linked in: Fixes: 8965c160b8f7 ("net: use netif_disable_lro in ipv6_add_dev") Reported-by: syzkaller Reported-by: Hui Guo Closes: https://lore.kernel.org/netdev/CAHOo4gK+tdU1B14Kh6tg-tNPqnQ1qGLfinONFVC43vmgEPnXXw@mail.gmail.com/ Signed-off-by: Kuniyuki Iwashima Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250406035755.69238-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c3b908fccbc143..9c52ed23ff2315 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3154,12 +3154,13 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg) rtnl_net_lock(net); dev = __dev_get_by_index(net, ireq.ifr6_ifindex); - netdev_lock_ops(dev); - if (dev) + if (dev) { + netdev_lock_ops(dev); err = inet6_addr_add(net, dev, &cfg, 0, 0, NULL); - else + netdev_unlock_ops(dev); + } else { err = -ENODEV; - netdev_unlock_ops(dev); + } rtnl_net_unlock(net); return err; } From 95ba3850fed03e01b422ab5d7943aeba130c9723 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Mon, 24 Mar 2025 20:31:32 +0800 Subject: [PATCH 0180/2924] RDMA/usnic: Fix passing zero to PTR_ERR in usnic_ib_pci_probe() drivers/infiniband/hw/usnic/usnic_ib_main.c:590 usnic_ib_pci_probe() warn: passing zero to 'PTR_ERR' Make usnic_ib_device_add() return NULL on fail path, also remove useless NULL check for usnic_ib_discover_pf() Fixes: e3cf00d0a87f ("IB/usnic: Add Cisco VIC low-level hardware driver") Link: https://patch.msgid.link/r/20250324123132.2392077-1-yuehaibing@huawei.com Signed-off-by: Yue Haibing Reviewed-by: Zhu Yanjun Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/usnic/usnic_ib_main.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 4ddcd5860e0fa4..11eca39b73a93e 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -397,7 +397,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev) if (!us_ibdev) { usnic_err("Device %s context alloc failed\n", netdev_name(pci_get_drvdata(dev))); - return ERR_PTR(-EFAULT); + return NULL; } us_ibdev->ufdev = usnic_fwd_dev_alloc(dev); @@ -517,8 +517,8 @@ static struct usnic_ib_dev *usnic_ib_discover_pf(struct usnic_vnic *vnic) } us_ibdev = usnic_ib_device_add(parent_pci); - if (IS_ERR_OR_NULL(us_ibdev)) { - us_ibdev = us_ibdev ? us_ibdev : ERR_PTR(-EFAULT); + if (!us_ibdev) { + us_ibdev = ERR_PTR(-EFAULT); goto out; } @@ -586,10 +586,10 @@ static int usnic_ib_pci_probe(struct pci_dev *pdev, } pf = usnic_ib_discover_pf(vf->vnic); - if (IS_ERR_OR_NULL(pf)) { - usnic_err("Failed to discover pf of vnic %s with err%ld\n", - pci_name(pdev), PTR_ERR(pf)); - err = pf ? PTR_ERR(pf) : -EFAULT; + if (IS_ERR(pf)) { + err = PTR_ERR(pf); + usnic_err("Failed to discover pf of vnic %s with err%d\n", + pci_name(pdev), err); goto out_clean_vnic; } From 04efcee6ef8d0f01eef495db047e7216d6e6e38f Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 4 Apr 2025 09:11:22 -0700 Subject: [PATCH 0181/2924] net: hold instance lock during NETDEV_CHANGE Cosmin reports an issue with ipv6_add_dev being called from NETDEV_CHANGE notifier: [ 3455.008776] ? ipv6_add_dev+0x370/0x620 [ 3455.010097] ipv6_find_idev+0x96/0xe0 [ 3455.010725] addrconf_add_dev+0x1e/0xa0 [ 3455.011382] addrconf_init_auto_addrs+0xb0/0x720 [ 3455.013537] addrconf_notify+0x35f/0x8d0 [ 3455.014214] notifier_call_chain+0x38/0xf0 [ 3455.014903] netdev_state_change+0x65/0x90 [ 3455.015586] linkwatch_do_dev+0x5a/0x70 [ 3455.016238] rtnl_getlink+0x241/0x3e0 [ 3455.019046] rtnetlink_rcv_msg+0x177/0x5e0 Similarly, linkwatch might get to ipv6_add_dev without ops lock: [ 3456.656261] ? ipv6_add_dev+0x370/0x620 [ 3456.660039] ipv6_find_idev+0x96/0xe0 [ 3456.660445] addrconf_add_dev+0x1e/0xa0 [ 3456.660861] addrconf_init_auto_addrs+0xb0/0x720 [ 3456.661803] addrconf_notify+0x35f/0x8d0 [ 3456.662236] notifier_call_chain+0x38/0xf0 [ 3456.662676] netdev_state_change+0x65/0x90 [ 3456.663112] linkwatch_do_dev+0x5a/0x70 [ 3456.663529] __linkwatch_run_queue+0xeb/0x200 [ 3456.663990] linkwatch_event+0x21/0x30 [ 3456.664399] process_one_work+0x211/0x610 [ 3456.664828] worker_thread+0x1cc/0x380 [ 3456.665691] kthread+0xf4/0x210 Reclassify NETDEV_CHANGE as a notifier that consistently runs under the instance lock. Link: https://lore.kernel.org/netdev/aac073de8beec3e531c86c101b274d434741c28e.camel@nvidia.com/ Reported-by: Cosmin Ratiu Tested-by: Cosmin Ratiu Fixes: ad7c7b2172c3 ("net: hold netdev instance lock during sysfs operations") Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250404161122.3907628-1-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- Documentation/networking/netdevices.rst | 10 +++++---- include/linux/netdevice.h | 2 ++ include/linux/rtnetlink.h | 2 +- net/core/dev.c | 11 +--------- net/core/dev_api.c | 16 ++++++++++++++ net/core/link_watch.c | 28 ++++++++++++++++++++----- net/core/lock_debug.c | 2 +- net/core/rtnetlink.c | 15 +++++++------ net/ethtool/ioctl.c | 2 +- net/hsr/hsr_device.c | 6 +++--- 10 files changed, 63 insertions(+), 31 deletions(-) diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst index 6c2d8945f59798..eab601ab2db0b7 100644 --- a/Documentation/networking/netdevices.rst +++ b/Documentation/networking/netdevices.rst @@ -338,10 +338,11 @@ operations directly under the netdev instance lock. Devices drivers are encouraged to rely on the instance lock where possible. For the (mostly software) drivers that need to interact with the core stack, -there are two sets of interfaces: ``dev_xxx`` and ``netif_xxx`` (e.g., -``dev_set_mtu`` and ``netif_set_mtu``). The ``dev_xxx`` functions handle -acquiring the instance lock themselves, while the ``netif_xxx`` functions -assume that the driver has already acquired the instance lock. +there are two sets of interfaces: ``dev_xxx``/``netdev_xxx`` and ``netif_xxx`` +(e.g., ``dev_set_mtu`` and ``netif_set_mtu``). The ``dev_xxx``/``netdev_xxx`` +functions handle acquiring the instance lock themselves, while the +``netif_xxx`` functions assume that the driver has already acquired +the instance lock. Notifiers and netdev instance lock ================================== @@ -354,6 +355,7 @@ For devices with locked ops, currently only the following notifiers are running under the lock: * ``NETDEV_REGISTER`` * ``NETDEV_UP`` +* ``NETDEV_CHANGE`` The following notifiers are running without the lock: * ``NETDEV_UNREGISTER`` diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cf3b6445817bb9..2d11d013cabedb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4429,6 +4429,7 @@ void linkwatch_fire_event(struct net_device *dev); * pending work list (if queued). */ void linkwatch_sync_dev(struct net_device *dev); +void __linkwatch_sync_dev(struct net_device *dev); /** * netif_carrier_ok - test if carrier present @@ -4974,6 +4975,7 @@ void dev_set_rx_mode(struct net_device *dev); int dev_set_promiscuity(struct net_device *dev, int inc); int netif_set_allmulti(struct net_device *dev, int inc, bool notify); int dev_set_allmulti(struct net_device *dev, int inc); +void netif_state_change(struct net_device *dev); void netdev_state_change(struct net_device *dev); void __netdev_notify_peers(struct net_device *dev); void netdev_notify_peers(struct net_device *dev); diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index ccaaf4c7d5f6a4..ea39dd23a1976e 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -240,6 +240,6 @@ rtnl_notify_needed(const struct net *net, u16 nlflags, u32 group) return (nlflags & NLM_F_ECHO) || rtnl_has_listeners(net, group); } -void netdev_set_operstate(struct net_device *dev, int newstate); +void netif_set_operstate(struct net_device *dev, int newstate); #endif /* __LINUX_RTNETLINK_H */ diff --git a/net/core/dev.c b/net/core/dev.c index 0608605cfc242f..75e104322ad527 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1518,15 +1518,7 @@ void netdev_features_change(struct net_device *dev) } EXPORT_SYMBOL(netdev_features_change); -/** - * netdev_state_change - device changes state - * @dev: device to cause notification - * - * Called to indicate a device has changed state. This function calls - * the notifier chains for netdev_chain and sends a NEWLINK message - * to the routing socket. - */ -void netdev_state_change(struct net_device *dev) +void netif_state_change(struct net_device *dev) { if (dev->flags & IFF_UP) { struct netdev_notifier_change_info change_info = { @@ -1538,7 +1530,6 @@ void netdev_state_change(struct net_device *dev) rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL, 0, NULL); } } -EXPORT_SYMBOL(netdev_state_change); /** * __netdev_notify_peers - notify network peers about existence of @dev, diff --git a/net/core/dev_api.c b/net/core/dev_api.c index 90bafb0b1b8c3f..90898cd540ced7 100644 --- a/net/core/dev_api.c +++ b/net/core/dev_api.c @@ -327,3 +327,19 @@ int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf) return ret; } EXPORT_SYMBOL_GPL(dev_xdp_propagate); + +/** + * netdev_state_change() - device changes state + * @dev: device to cause notification + * + * Called to indicate a device has changed state. This function calls + * the notifier chains for netdev_chain and sends a NEWLINK message + * to the routing socket. + */ +void netdev_state_change(struct net_device *dev) +{ + netdev_lock_ops(dev); + netif_state_change(dev); + netdev_unlock_ops(dev); +} +EXPORT_SYMBOL(netdev_state_change); diff --git a/net/core/link_watch.c b/net/core/link_watch.c index cb04ef2b9807c9..864f3bbc3a4c50 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -183,7 +183,7 @@ static void linkwatch_do_dev(struct net_device *dev) else dev_deactivate(dev); - netdev_state_change(dev); + netif_state_change(dev); } /* Note: our callers are responsible for calling netdev_tracker_free(). * This is the reason we use __dev_put() instead of dev_put(). @@ -240,7 +240,9 @@ static void __linkwatch_run_queue(int urgent_only) */ netdev_tracker_free(dev, &dev->linkwatch_dev_tracker); spin_unlock_irq(&lweventlist_lock); + netdev_lock_ops(dev); linkwatch_do_dev(dev); + netdev_unlock_ops(dev); do_dev--; spin_lock_irq(&lweventlist_lock); } @@ -253,25 +255,41 @@ static void __linkwatch_run_queue(int urgent_only) spin_unlock_irq(&lweventlist_lock); } -void linkwatch_sync_dev(struct net_device *dev) +static bool linkwatch_clean_dev(struct net_device *dev) { unsigned long flags; - int clean = 0; + bool clean = false; spin_lock_irqsave(&lweventlist_lock, flags); if (!list_empty(&dev->link_watch_list)) { list_del_init(&dev->link_watch_list); - clean = 1; + clean = true; /* We must release netdev tracker under * the spinlock protection. */ netdev_tracker_free(dev, &dev->linkwatch_dev_tracker); } spin_unlock_irqrestore(&lweventlist_lock, flags); - if (clean) + + return clean; +} + +void __linkwatch_sync_dev(struct net_device *dev) +{ + netdev_ops_assert_locked(dev); + + if (linkwatch_clean_dev(dev)) linkwatch_do_dev(dev); } +void linkwatch_sync_dev(struct net_device *dev) +{ + if (linkwatch_clean_dev(dev)) { + netdev_lock_ops(dev); + linkwatch_do_dev(dev); + netdev_unlock_ops(dev); + } +} /* Must be called with the rtnl semaphore held */ void linkwatch_run_queue(void) diff --git a/net/core/lock_debug.c b/net/core/lock_debug.c index b7f22dc92a6f30..941e26c1343de4 100644 --- a/net/core/lock_debug.c +++ b/net/core/lock_debug.c @@ -20,11 +20,11 @@ int netdev_debug_event(struct notifier_block *nb, unsigned long event, switch (cmd) { case NETDEV_REGISTER: case NETDEV_UP: + case NETDEV_CHANGE: netdev_ops_assert_locked(dev); fallthrough; case NETDEV_DOWN: case NETDEV_REBOOT: - case NETDEV_CHANGE: case NETDEV_UNREGISTER: case NETDEV_CHANGEMTU: case NETDEV_CHANGEADDR: diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index c238528350506d..d8d03ff87a3bf5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1043,7 +1043,7 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, } EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo); -void netdev_set_operstate(struct net_device *dev, int newstate) +void netif_set_operstate(struct net_device *dev, int newstate) { unsigned int old = READ_ONCE(dev->operstate); @@ -1052,9 +1052,9 @@ void netdev_set_operstate(struct net_device *dev, int newstate) return; } while (!try_cmpxchg(&dev->operstate, &old, newstate)); - netdev_state_change(dev); + netif_state_change(dev); } -EXPORT_SYMBOL(netdev_set_operstate); +EXPORT_SYMBOL(netif_set_operstate); static void set_operstate(struct net_device *dev, unsigned char transition) { @@ -1080,7 +1080,7 @@ static void set_operstate(struct net_device *dev, unsigned char transition) break; } - netdev_set_operstate(dev, operstate); + netif_set_operstate(dev, operstate); } static unsigned int rtnl_dev_get_flags(const struct net_device *dev) @@ -3396,7 +3396,7 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, errout: if (status & DO_SETLINK_MODIFIED) { if ((status & DO_SETLINK_NOTIFY) == DO_SETLINK_NOTIFY) - netdev_state_change(dev); + netif_state_change(dev); if (err < 0) net_warn_ratelimited("A link change request failed with some changes committed already. Interface %s may have been left with an inconsistent configuration, please check.\n", @@ -3676,8 +3676,11 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, nla_len(tb[IFLA_BROADCAST])); if (tb[IFLA_TXQLEN]) dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); - if (tb[IFLA_OPERSTATE]) + if (tb[IFLA_OPERSTATE]) { + netdev_lock_ops(dev); set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); + netdev_unlock_ops(dev); + } if (tb[IFLA_LINKMODE]) dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); if (tb[IFLA_GROUP]) diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 221639407c7245..8262cc10f98db7 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -60,7 +60,7 @@ static struct devlink *netdev_to_devlink_get(struct net_device *dev) u32 ethtool_op_get_link(struct net_device *dev) { /* Synchronize carrier state with link watch, see also rtnl_getlink() */ - linkwatch_sync_dev(dev); + __linkwatch_sync_dev(dev); return netif_carrier_ok(dev) ? 1 : 0; } diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 439cfb7ad5d148..1b1b700ec05eac 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -33,14 +33,14 @@ static void hsr_set_operstate(struct hsr_port *master, bool has_carrier) struct net_device *dev = master->dev; if (!is_admin_up(dev)) { - netdev_set_operstate(dev, IF_OPER_DOWN); + netif_set_operstate(dev, IF_OPER_DOWN); return; } if (has_carrier) - netdev_set_operstate(dev, IF_OPER_UP); + netif_set_operstate(dev, IF_OPER_UP); else - netdev_set_operstate(dev, IF_OPER_LOWERLAYERDOWN); + netif_set_operstate(dev, IF_OPER_LOWERLAYERDOWN); } static bool hsr_check_carrier(struct hsr_port *master) From d247667ecd6411ec5bec9a38db7feaa599ce3ee2 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Wed, 2 Apr 2025 10:09:44 +0300 Subject: [PATCH 0182/2924] RDMA/mlx5: Fix compilation warning when USER_ACCESS isn't set The cited commit made fs.c always compile, even when INFINIBAND_USER_ACCESS isn't set. This results in a compilation warning about an unused object when compiling with W=1 and USER_ACCESS is unset. Fix this by defining uverbs_destroy_def_handler() even when USER_ACCESS isn't set. Fixes: 36e0d433672f ("RDMA/mlx5: Compile fs.c regardless of INFINIBAND_USER_ACCESS config") Link: https://patch.msgid.link/r/20250402070944.1022093-1-mbloch@nvidia.com Signed-off-by: Mark Bloch Tested-by: Arnd Bergmann Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/fs.c | 2 -- include/rdma/ib_verbs.h | 7 +++++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 251246c73b339b..0ff9f18a71e828 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -3461,7 +3461,6 @@ DECLARE_UVERBS_NAMED_OBJECT( &UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY)); const struct uapi_definition mlx5_ib_flow_defs[] = { -#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) UAPI_DEF_CHAIN_OBJ_TREE_NAMED( MLX5_IB_OBJECT_FLOW_MATCHER), UAPI_DEF_CHAIN_OBJ_TREE( @@ -3472,7 +3471,6 @@ const struct uapi_definition mlx5_ib_flow_defs[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED( MLX5_IB_OBJECT_STEERING_ANCHOR, UAPI_DEF_IS_OBJ_SUPPORTED(mlx5_ib_shared_ft_allowed)), -#endif {}, }; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index d42eae69d9a822..901353796fbbf6 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4790,7 +4790,14 @@ void roce_del_all_netdev_gids(struct ib_device *ib_dev, struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile); +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs); +#else +static inline int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs) +{ + return 0; +} +#endif struct net_device *rdma_alloc_netdev(struct ib_device *device, u32 port_num, enum rdma_netdev_t type, const char *name, From 0cd575cab10e114e95921321f069a08d45bc412e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 4 Apr 2025 12:48:48 -0700 Subject: [PATCH 0183/2924] uprobes: Avoid false-positive lockdep splat on CONFIG_PREEMPT_RT=y in the ri_timer() uprobe timer callback, use raw_write_seqcount_*() Avoid a false-positive lockdep warning in the CONFIG_PREEMPT_RT=y configuration when using write_seqcount_begin() in the uprobe timer callback by using raw_write_* APIs. Uprobe's use of timer callback is guaranteed to not race with itself for a given uprobe_task, and as such seqcount's insistence on having preemption disabled on the writer side is irrelevant. So switch to raw_ variants of seqcount API instead of disabling preemption unnecessarily. Also, point out in the comments more explicitly why we use seqcount despite our reader side being rather simple and never retrying. We favor well-maintained kernel primitive in favor of open-coding our own memory barriers. Fixes: 8622e45b5da1 ("uprobes: Reuse return_instances between multiple uretprobes within task") Reported-by: Alexei Starovoitov Suggested-by: Sebastian Siewior Signed-off-by: Andrii Nakryiko Signed-off-by: Ingo Molnar Acked-by: Oleg Nesterov Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: stable@kernel.org Link: https://lore.kernel.org/r/20250404194848.2109539-1-andrii@kernel.org --- kernel/events/uprobes.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 615b4e6d22c7b1..8d783b5882b6a3 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1956,6 +1956,9 @@ static void free_ret_instance(struct uprobe_task *utask, * to-be-reused return instances for future uretprobes. If ri_timer() * happens to be running right now, though, we fallback to safety and * just perform RCU-delated freeing of ri. + * Admittedly, this is a rather simple use of seqcount, but it nicely + * abstracts away all the necessary memory barriers, so we use + * a well-supported kernel primitive here. */ if (raw_seqcount_try_begin(&utask->ri_seqcount, seq)) { /* immediate reuse of ri without RCU GP is OK */ @@ -2016,12 +2019,20 @@ static void ri_timer(struct timer_list *timer) /* RCU protects return_instance from freeing. */ guard(rcu)(); - write_seqcount_begin(&utask->ri_seqcount); + /* + * See free_ret_instance() for notes on seqcount use. + * We also employ raw API variants to avoid lockdep false-positive + * warning complaining about enabled preemption. The timer can only be + * invoked once for a uprobe_task. Therefore there can only be one + * writer. The reader does not require an even sequence count to make + * progress, so it is OK to remain preemptible on PREEMPT_RT. + */ + raw_write_seqcount_begin(&utask->ri_seqcount); for_each_ret_instance_rcu(ri, utask->return_instances) hprobe_expire(&ri->hprobe, false); - write_seqcount_end(&utask->ri_seqcount); + raw_write_seqcount_end(&utask->ri_seqcount); } static struct uprobe_task *alloc_utask(void) From 1b2fe85f3cf19026a0e9037242bcbf7e736b22e3 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Wed, 2 Apr 2025 11:26:57 +0800 Subject: [PATCH 0184/2924] RDMA/rxe: Fix null pointer dereference in ODP MR check The blktests/rnbd reported a null pointer dereference as following. Similar to the mlx5, introduce a is_odp_mr() to check if the odp is enabled in this mr. Workqueue: rxe_wq do_work [rdma_rxe] RIP: 0010:rxe_mr_copy+0x57/0x210 [rdma_rxe] Code: 7c 04 48 89 f3 48 89 d5 41 89 cf 45 89 c4 0f 84 dc 00 00 00 89 ca e8 f8 f8 ff ff 85 c0 0f 85 75 01 00 00 49 8b 86 f0 00 00 00 40 28 02 0f 85 98 01 00 00 41 8b 46 78 41 8b 8e 10 01 00 00 8d RSP: 0018:ffffa0aac02cfcf8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff9079cd440024 RCX: 0000000000000000 RDX: 000000000000003c RSI: ffff9079cd440060 RDI: ffff9079cd665600 RBP: ffff9079c0e5e45a R08: 0000000000000000 R09: 0000000000000000 R10: 000000003c000000 R11: 0000000000225510 R12: 0000000000000000 R13: 0000000000000000 R14: ffff9079cd665600 R15: 000000000000003c FS: 0000000000000000(0000) GS:ffff907ccfa80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000028 CR3: 0000000119498001 CR4: 00000000001726f0 Call Trace: ? __die_body+0x1e/0x60 ? page_fault_oops+0x14f/0x4c0 ? rxe_mr_copy+0x57/0x210 [rdma_rxe] ? search_bpf_extables+0x5f/0x80 ? exc_page_fault+0x7e/0x180 ? asm_exc_page_fault+0x26/0x30 ? rxe_mr_copy+0x57/0x210 [rdma_rxe] ? rxe_mr_copy+0x48/0x210 [rdma_rxe] ? rxe_pool_get_index+0x50/0x90 [rdma_rxe] rxe_receiver+0x1d98/0x2530 [rdma_rxe] ? psi_task_switch+0x1ff/0x250 ? finish_task_switch+0x92/0x2d0 ? __schedule+0xbdf/0x17c0 do_task+0x65/0x1e0 [rdma_rxe] process_scheduled_works+0xaa/0x3f0 worker_thread+0x117/0x240 Fixes: d03fb5c6599e ("RDMA/rxe: Allow registering MRs for On-Demand Paging") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/r/20250402032657.1762800-1-lizhijian@fujitsu.com Signed-off-by: Li Zhijian Reviewed-by: Daisuke Matsuda Reviewed-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_loc.h | 6 ++++++ drivers/infiniband/sw/rxe/rxe_mr.c | 4 ++-- drivers/infiniband/sw/rxe/rxe_resp.c | 4 ++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index feb386d98d1da6..0bc3fbb6554f4d 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -140,6 +140,12 @@ static inline int qp_mtu(struct rxe_qp *qp) return IB_MTU_4096; } +static inline bool is_odp_mr(struct rxe_mr *mr) +{ + return IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && mr->umem && + mr->umem->is_odp; +} + void free_rd_atomic_resource(struct resp_res *res); static inline void rxe_advance_resp_resource(struct rxe_qp *qp) diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 868d2f0b74e967..432d864c3ce9c7 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -323,7 +323,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, return err; } - if (mr->umem->is_odp) + if (is_odp_mr(mr)) return rxe_odp_mr_copy(mr, iova, addr, length, dir); else return rxe_mr_copy_xarray(mr, iova, addr, length, dir); @@ -536,7 +536,7 @@ int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value) u64 *va; /* ODP is not supported right now. WIP. */ - if (mr->umem->is_odp) + if (is_odp_mr(mr)) return RESPST_ERR_UNSUPPORTED_OPCODE; /* See IBA oA19-28 */ diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 54ba9ee1acc598..5d9174e408db44 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -650,7 +650,7 @@ static enum resp_states process_flush(struct rxe_qp *qp, struct resp_res *res = qp->resp.res; /* ODP is not supported right now. WIP. */ - if (mr->umem->is_odp) + if (is_odp_mr(mr)) return RESPST_ERR_UNSUPPORTED_OPCODE; /* oA19-14, oA19-15 */ @@ -706,7 +706,7 @@ static enum resp_states atomic_reply(struct rxe_qp *qp, if (!res->replay) { u64 iova = qp->resp.va + qp->resp.offset; - if (mr->umem->is_odp) + if (is_odp_mr(mr)) err = rxe_odp_atomic_op(mr, iova, pkt->opcode, atmeth_comp(pkt), atmeth_swap_add(pkt), From 7ab4f0e37a0f4207e742a8de69be03984db6ebf0 Mon Sep 17 00:00:00 2001 From: Jean-Marc Eurin Date: Tue, 1 Apr 2025 17:15:42 -0700 Subject: [PATCH 0185/2924] ACPI PPTT: Fix coding mistakes in a couple of sizeof() calls The end of table checks should be done with the structure size, but 2 of the 3 similar calls use the pointer size. Signed-off-by: Jean-Marc Eurin Link: https://patch.msgid.link/20250402001542.2600671-1-jmeurin@google.com [ rjw: Subject edits ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/pptt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index a35dd0e41c2704..f73ce6e13065dd 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -229,7 +229,7 @@ static int acpi_pptt_leaf_node(struct acpi_table_header *table_hdr, node_entry = ACPI_PTR_DIFF(node, table_hdr); entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr, sizeof(struct acpi_table_pptt)); - proc_sz = sizeof(struct acpi_pptt_processor *); + proc_sz = sizeof(struct acpi_pptt_processor); while ((unsigned long)entry + proc_sz < table_end) { cpu_node = (struct acpi_pptt_processor *)entry; @@ -270,7 +270,7 @@ static struct acpi_pptt_processor *acpi_find_processor_node(struct acpi_table_he table_end = (unsigned long)table_hdr + table_hdr->length; entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr, sizeof(struct acpi_table_pptt)); - proc_sz = sizeof(struct acpi_pptt_processor *); + proc_sz = sizeof(struct acpi_pptt_processor); /* find the processor structure associated with this cpuid */ while ((unsigned long)entry + proc_sz < table_end) { From 9beb2c91fb86e0be70a5833c6730441fa3c9efa8 Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Thu, 27 Mar 2025 19:47:24 +0800 Subject: [PATCH 0186/2924] RDMA/hns: Fix wrong maximum DMA segment size Set maximum DMA segment size to 2G instead of UINT_MAX due to HW limit. Fixes: e0477b34d9d1 ("RDMA: Explicitly pass in the dma_device to ib_register_device") Link: https://patch.msgid.link/r/20250327114724.3454268-3-huangjunxian6@hisilicon.com Signed-off-by: Chengchang Tang Signed-off-by: Junxian Huang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index cf89a8db4f64cd..8d0b63d4b50a6c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -763,7 +763,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) if (ret) return ret; } - dma_set_max_seg_size(dev, UINT_MAX); + dma_set_max_seg_size(dev, SZ_2G); ret = ib_register_device(ib_dev, "hns_%d", dev); if (ret) { dev_err(dev, "ib_register_device failed!\n"); From b71a2bb0ce07f40f92f59ed7f283068e41b10075 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 27 Mar 2025 17:33:49 -0400 Subject: [PATCH 0187/2924] drm/amdgpu/mes11: optimize MES pipe FW version fetching Don't fetch it again if we already have it. It seems the registers don't reliably have the value at resume in some cases. Fixes: 028c3fb37e70 ("drm/amdgpu/mes11: initiate mes v11 support") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4083 Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index e65916ada23b32..ef9538fbbf5371 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -894,6 +894,10 @@ static void mes_v11_0_get_fw_version(struct amdgpu_device *adev) { int pipe; + /* return early if we have already fetched these */ + if (adev->mes.sched_version && adev->mes.kiq_version) + return; + /* get MES scheduler/KIQ versions */ mutex_lock(&adev->srbm_mutex); From a755906fb2b8370c43e91ba437ae1b3e228e8b02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 20 Mar 2025 14:46:18 +0100 Subject: [PATCH 0188/2924] drm/amdgpu: immediately use GTT for new allocations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only use GTT as a fallback if we already have a backing store. This prevents evictions when an application constantly allocates and frees new memory. Partially fixes https://gitlab.freedesktop.org/drm/amd/-/issues/3844#note_2833985. Signed-off-by: Christian König Fixes: 216c1282dde3 ("drm/amdgpu: use GTT only as fallback for VRAM|GTT") Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 80cd6f5273db3a..0b9987781f7622 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -163,8 +163,8 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) * When GTT is just an alternative to VRAM make sure that we * only use it as fallback and still try to fill up VRAM first. */ - if (domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM && - !(adev->flags & AMD_IS_APU)) + if (abo->tbo.resource && !(adev->flags & AMD_IS_APU) && + domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) places[c].flags |= TTM_PL_FLAG_FALLBACK; c++; } From c0dd8a9253fadfb8e5357217d085f1989da4ef0a Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 7 Apr 2025 15:18:25 +0100 Subject: [PATCH 0189/2924] drm/amdgpu/dma_buf: fix page_link check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The page_link lower bits of the first sg could contain something like SG_END, if we are mapping a single VRAM page or contiguous blob which fits into one sg entry. Rather pull out the struct page, and use that in our check to know if we mapped struct pages vs VRAM. Fixes: f44ffd677fb3 ("drm/amdgpu: add support for exporting VRAM using DMA-buf v3") Signed-off-by: Matthew Auld Cc: Christian König Cc: amd-gfx@lists.freedesktop.org Cc: # v5.8+ Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 9f627caedc3f61..c9842a0e2a1cd4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -184,7 +184,7 @@ static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach, struct sg_table *sgt, enum dma_data_direction dir) { - if (sgt->sgl->page_link) { + if (sg_page(sgt->sgl)) { dma_unmap_sgtable(attach->dev, sgt, dir, 0); sg_free_table(sgt); kfree(sgt); From 2f6dd741cdcdadb9e125cc66d4fcfbe5ab92d36a Mon Sep 17 00:00:00 2001 From: Flora Cui Date: Wed, 26 Mar 2025 20:06:13 +0800 Subject: [PATCH 0190/2924] drm/amdgpu/ip_discovery: add missing ip_discovery fw Signed-off-by: Flora Cui Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index dc2713ec95a5bd..9e738fae2b74f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -120,6 +120,8 @@ MODULE_FIRMWARE("amdgpu/vega20_ip_discovery.bin"); MODULE_FIRMWARE("amdgpu/raven_ip_discovery.bin"); MODULE_FIRMWARE("amdgpu/raven2_ip_discovery.bin"); MODULE_FIRMWARE("amdgpu/picasso_ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/aldebaran_ip_discovery.bin"); #define mmIP_DISCOVERY_VERSION 0x16A00 #define mmRCC_CONFIG_MEMSIZE 0xde3 From ba6d8f878d6180d4d0ed0574479fc1e232928184 Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Fri, 28 Mar 2025 18:14:17 +0800 Subject: [PATCH 0191/2924] drm/amdkfd: sriov doesn't support per queue reset Disable per queue reset for sriov. Signed-off-by: Emily Deng Reviewed-by: Jonathan Kim Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index e477d7509646aa..2c4711c67d8aa8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -2001,7 +2001,8 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev) dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED; - dev->node_props.capability |= HSA_CAP_PER_QUEUE_RESET_SUPPORTED; + if (!amdgpu_sriov_vf(dev->gpu->adev)) + dev->node_props.capability |= HSA_CAP_PER_QUEUE_RESET_SUPPORTED; } else { dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 | HSA_DBG_WATCH_ADDR_MASK_HI_BIT; From 69a46ce1f15b4391c128d581f6936750f9bfa052 Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Wed, 19 Mar 2025 16:31:31 +0800 Subject: [PATCH 0192/2924] drm/amd/display: Do not enable Replay and PSR while VRR is on in amdgpu_dm_commit_planes() [Why] Replay and PSR will cause some video corruption while VRR is enabled. [How] Do not enable the Replay and PSR while VRR is active in amdgpu_dm_enable_self_refresh(). Fixes: 67edb81d6e9a ("drm/amd/display: Disable replay and psr while VRR is enabled") Reviewed-by: Sun peng Li Signed-off-by: Tom Chung Signed-off-by: Fangzhi Zuo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 36a830a7440f10..87058271b00cc4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -113,6 +113,7 @@ bool amdgpu_dm_crtc_vrr_active(const struct dm_crtc_state *dm_state) * * Panel Replay and PSR SU * - Enable when: + * - VRR is disabled * - vblank counter is disabled * - entry is allowed: usermode demonstrates an adequate number of fast * commits) @@ -131,19 +132,20 @@ static void amdgpu_dm_crtc_set_panel_sr_feature( bool is_sr_active = (link->replay_settings.replay_allow_active || link->psr_settings.psr_allow_active); bool is_crc_window_active = false; + bool vrr_active = amdgpu_dm_crtc_vrr_active_irq(vblank_work->acrtc); #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY is_crc_window_active = amdgpu_dm_crc_window_is_activated(&vblank_work->acrtc->base); #endif - if (link->replay_settings.replay_feature_enabled && + if (link->replay_settings.replay_feature_enabled && !vrr_active && allow_sr_entry && !is_sr_active && !is_crc_window_active) { amdgpu_dm_replay_enable(vblank_work->stream, true); } else if (vblank_enabled) { if (link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 && is_sr_active) amdgpu_dm_psr_disable(vblank_work->stream, false); - } else if (link->psr_settings.psr_feature_enabled && + } else if (link->psr_settings.psr_feature_enabled && !vrr_active && allow_sr_entry && !is_sr_active && !is_crc_window_active) { struct amdgpu_dm_connector *aconn = From 6d03811d7a99e08d5928f58120acb45b8ba22b08 Mon Sep 17 00:00:00 2001 From: Gustavo Silva Date: Tue, 4 Mar 2025 15:01:02 -0300 Subject: [PATCH 0193/2924] iio: imu: bmi270: fix initial sampling frequency configuration In the bmi270_configure_imu() function, the accelerometer and gyroscope configuration registers are incorrectly written with the mask BMI270_PWR_CONF_ADV_PWR_SAVE_MSK, which is unrelated to these registers. As a result, the accelerometer's sampling frequency is set to 200 Hz instead of the intended 100 Hz. Remove the mask to ensure the correct bits are set in the configuration registers. Fixes: 3ea51548d6b2 ("iio: imu: Add i2c driver for bmi270 imu") Signed-off-by: Gustavo Silva Reviewed-by: Alex Lanzano Link: https://patch.msgid.link/20250304-bmi270-odr-fix-v1-1-384dbcd699fb@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/imu/bmi270/bmi270_core.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/iio/imu/bmi270/bmi270_core.c b/drivers/iio/imu/bmi270/bmi270_core.c index a86be5af5ccb1f..2e4469f30d538c 100644 --- a/drivers/iio/imu/bmi270/bmi270_core.c +++ b/drivers/iio/imu/bmi270/bmi270_core.c @@ -918,8 +918,7 @@ static int bmi270_configure_imu(struct bmi270_data *data) FIELD_PREP(BMI270_ACC_CONF_ODR_MSK, BMI270_ACC_CONF_ODR_100HZ) | FIELD_PREP(BMI270_ACC_CONF_BWP_MSK, - BMI270_ACC_CONF_BWP_NORMAL_MODE) | - BMI270_PWR_CONF_ADV_PWR_SAVE_MSK); + BMI270_ACC_CONF_BWP_NORMAL_MODE)); if (ret) return dev_err_probe(dev, ret, "Failed to configure accelerometer"); @@ -927,8 +926,7 @@ static int bmi270_configure_imu(struct bmi270_data *data) FIELD_PREP(BMI270_GYR_CONF_ODR_MSK, BMI270_GYR_CONF_ODR_200HZ) | FIELD_PREP(BMI270_GYR_CONF_BWP_MSK, - BMI270_GYR_CONF_BWP_NORMAL_MODE) | - BMI270_PWR_CONF_ADV_PWR_SAVE_MSK); + BMI270_GYR_CONF_BWP_NORMAL_MODE)); if (ret) return dev_err_probe(dev, ret, "Failed to configure gyroscope"); From 38f67d0264929762e54ae5948703a21f841fe706 Mon Sep 17 00:00:00 2001 From: Lothar Rubusch Date: Sun, 9 Mar 2025 19:35:15 +0000 Subject: [PATCH 0194/2924] iio: accel: adxl367: fix setting odr for activity time update Fix setting the odr value to update activity time based on frequency derrived by recent odr, and not by obsolete odr value. The [small] bug: When _adxl367_set_odr() is called with a new odr value, it first writes the new odr value to the hardware register ADXL367_REG_FILTER_CTL. Second, it calls _adxl367_set_act_time_ms(), which calls adxl367_time_ms_to_samples(). Here st->odr still holds the old odr value. This st->odr member is used to derrive a frequency value, which is applied to update ADXL367_REG_TIME_ACT. Hence, the idea is to update activity time, based on possibilities and power consumption by the current ODR rate. Finally, when the function calls return, again in _adxl367_set_odr() the new ODR is assigned to st->odr. The fix: When setting a new ODR value is set to ADXL367_REG_FILTER_CTL, also ADXL367_REG_TIME_ACT should probably be updated with a frequency based on the recent ODR value and not the old one. Changing the location of the assignment to st->odr fixes this. Fixes: cbab791c5e2a5 ("iio: accel: add ADXL367 driver") Signed-off-by: Lothar Rubusch Reviewed-by: Marcelo Schmitt Link: https://patch.msgid.link/20250309193515.2974-1-l.rubusch@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/adxl367.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/iio/accel/adxl367.c b/drivers/iio/accel/adxl367.c index add4053e7a02e7..0c04b2bb7efbf7 100644 --- a/drivers/iio/accel/adxl367.c +++ b/drivers/iio/accel/adxl367.c @@ -601,18 +601,14 @@ static int _adxl367_set_odr(struct adxl367_state *st, enum adxl367_odr odr) if (ret) return ret; + st->odr = odr; + /* Activity timers depend on ODR */ ret = _adxl367_set_act_time_ms(st, st->act_time_ms); if (ret) return ret; - ret = _adxl367_set_inact_time_ms(st, st->inact_time_ms); - if (ret) - return ret; - - st->odr = odr; - - return 0; + return _adxl367_set_inact_time_ms(st, st->inact_time_ms); } static int adxl367_set_odr(struct iio_dev *indio_dev, enum adxl367_odr odr) From 159ca7f18129834b6f4c7eae67de48e96c752fc9 Mon Sep 17 00:00:00 2001 From: Silvano Seva Date: Tue, 11 Mar 2025 09:49:47 +0100 Subject: [PATCH 0195/2924] iio: imu: st_lsm6dsx: fix possible lockup in st_lsm6dsx_read_fifo Prevent st_lsm6dsx_read_fifo from falling in an infinite loop in case pattern_len is equal to zero and the device FIFO is not empty. Fixes: 290a6ce11d93 ("iio: imu: add support to lsm6dsx driver") Signed-off-by: Silvano Seva Acked-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250311085030.3593-2-s.seva@4sigma.it Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c index 0a7cd8c1aa3313..480a9b31065cd3 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c @@ -392,6 +392,9 @@ int st_lsm6dsx_read_fifo(struct st_lsm6dsx_hw *hw) if (fifo_status & cpu_to_le16(ST_LSM6DSX_FIFO_EMPTY_MASK)) return 0; + if (!pattern_len) + pattern_len = ST_LSM6DSX_SAMPLE_SIZE; + fifo_len = (le16_to_cpu(fifo_status) & fifo_diff_mask) * ST_LSM6DSX_CHAN_SIZE; fifo_len = (fifo_len / pattern_len) * pattern_len; From 8114ef86e2058e2554111b793596f17bee23fa15 Mon Sep 17 00:00:00 2001 From: Silvano Seva Date: Tue, 11 Mar 2025 09:49:49 +0100 Subject: [PATCH 0196/2924] iio: imu: st_lsm6dsx: fix possible lockup in st_lsm6dsx_read_tagged_fifo Prevent st_lsm6dsx_read_tagged_fifo from falling in an infinite loop in case pattern_len is equal to zero and the device FIFO is not empty. Fixes: 801a6e0af0c6 ("iio: imu: st_lsm6dsx: add support to LSM6DSO") Signed-off-by: Silvano Seva Acked-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250311085030.3593-4-s.seva@4sigma.it Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c index 480a9b31065cd3..8a9d2593576a2a 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c @@ -626,6 +626,9 @@ int st_lsm6dsx_read_tagged_fifo(struct st_lsm6dsx_hw *hw) if (!fifo_len) return 0; + if (!pattern_len) + pattern_len = ST_LSM6DSX_TAGGED_SAMPLE_SIZE; + for (read_len = 0; read_len < fifo_len; read_len += pattern_len) { err = st_lsm6dsx_read_block(hw, ST_LSM6DSX_REG_FIFO_OUT_TAG_ADDR, From 839f81de397019f55161c5982d670ac19d836173 Mon Sep 17 00:00:00 2001 From: Simon Xue Date: Wed, 12 Mar 2025 14:20:16 +0800 Subject: [PATCH 0197/2924] iio: adc: rockchip: Fix clock initialization sequence clock_set_rate should be executed after devm_clk_get_enabled. Fixes: 97ad10bb2901 ("iio: adc: rockchip_saradc: Make use of devm_clk_get_enabled") Signed-off-by: Simon Xue Reviewed-by: Heiko Stuebner Link: https://patch.msgid.link/20250312062016.137821-1-xxm@rock-chips.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/rockchip_saradc.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/iio/adc/rockchip_saradc.c b/drivers/iio/adc/rockchip_saradc.c index 9a099df7951891..5e28bd28b81a9a 100644 --- a/drivers/iio/adc/rockchip_saradc.c +++ b/drivers/iio/adc/rockchip_saradc.c @@ -520,15 +520,6 @@ static int rockchip_saradc_probe(struct platform_device *pdev) if (info->reset) rockchip_saradc_reset_controller(info->reset); - /* - * Use a default value for the converter clock. - * This may become user-configurable in the future. - */ - ret = clk_set_rate(info->clk, info->data->clk_rate); - if (ret < 0) - return dev_err_probe(&pdev->dev, ret, - "failed to set adc clk rate\n"); - ret = regulator_enable(info->vref); if (ret < 0) return dev_err_probe(&pdev->dev, ret, @@ -555,6 +546,14 @@ static int rockchip_saradc_probe(struct platform_device *pdev) if (IS_ERR(info->clk)) return dev_err_probe(&pdev->dev, PTR_ERR(info->clk), "failed to get adc clock\n"); + /* + * Use a default value for the converter clock. + * This may become user-configurable in the future. + */ + ret = clk_set_rate(info->clk, info->data->clk_rate); + if (ret < 0) + return dev_err_probe(&pdev->dev, ret, + "failed to set adc clk rate\n"); platform_set_drvdata(pdev, indio_dev); From 82c51ac74071b80b3199d9e200ae1a5399f4deb0 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Thu, 20 Mar 2025 11:21:52 -0500 Subject: [PATCH 0198/2924] iio: adc: ad7380: disable offload before using SPI bus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move disabling of the SPI offload before attempting to use the SPI bus to write a register in ad7380_offload_buffer_predisable(). This caused a crash in the spi_engine_irq() interrupt handler due to being in an invalid state. Fixes: bbeaec81a03e ("iio: ad7380: add support for SPI offload") Signed-off-by: David Lechner Reviewed-by: Nuno Sá Reviewed-by: Angelo Dureghello Link: https://patch.msgid.link/20250320-iio-adc-ad7380-fix-spi-offload-buffer-predisable-v1-1-6912ac8c0ae0@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7380.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/iio/adc/ad7380.c b/drivers/iio/adc/ad7380.c index 4fcb49fdf56639..a2b41980c942e4 100644 --- a/drivers/iio/adc/ad7380.c +++ b/drivers/iio/adc/ad7380.c @@ -1211,6 +1211,9 @@ static int ad7380_offload_buffer_predisable(struct iio_dev *indio_dev) struct ad7380_state *st = iio_priv(indio_dev); int ret; + spi_offload_trigger_disable(st->offload, st->offload_trigger); + spi_unoptimize_message(&st->offload_msg); + if (st->seq) { ret = regmap_update_bits(st->regmap, AD7380_REG_ADDR_CONFIG1, @@ -1222,10 +1225,6 @@ static int ad7380_offload_buffer_predisable(struct iio_dev *indio_dev) st->seq = false; } - spi_offload_trigger_disable(st->offload, st->offload_trigger); - - spi_unoptimize_message(&st->offload_msg); - return 0; } From f063a28002e3350088b4577c5640882bf4ea17ea Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Fri, 21 Mar 2025 19:10:00 +0100 Subject: [PATCH 0199/2924] iio: light: opt3001: fix deadlock due to concurrent flag access The threaded IRQ function in this driver is reading the flag twice: once to lock a mutex and once to unlock it. Even though the code setting the flag is designed to prevent it, there are subtle cases where the flag could be true at the mutex_lock stage and false at the mutex_unlock stage. This results in the mutex not being unlocked, resulting in a deadlock. Fix it by making the opt3001_irq() code generally more robust, reading the flag into a variable and using the variable value at both stages. Fixes: 94a9b7b1809f ("iio: light: add support for TI's opt3001 light sensor") Cc: stable@vger.kernel.org Signed-off-by: Luca Ceresoli Link: https://patch.msgid.link/20250321-opt3001-irq-fix-v1-1-6c520d851562@bootlin.com Signed-off-by: Jonathan Cameron --- drivers/iio/light/opt3001.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iio/light/opt3001.c b/drivers/iio/light/opt3001.c index 65b295877b4158..393a3d2fbe1d73 100644 --- a/drivers/iio/light/opt3001.c +++ b/drivers/iio/light/opt3001.c @@ -788,8 +788,9 @@ static irqreturn_t opt3001_irq(int irq, void *_iio) int ret; bool wake_result_ready_queue = false; enum iio_chan_type chan_type = opt->chip_info->chan_type; + bool ok_to_ignore_lock = opt->ok_to_ignore_lock; - if (!opt->ok_to_ignore_lock) + if (!ok_to_ignore_lock) mutex_lock(&opt->lock); ret = i2c_smbus_read_word_swapped(opt->client, OPT3001_CONFIGURATION); @@ -826,7 +827,7 @@ static irqreturn_t opt3001_irq(int irq, void *_iio) } out: - if (!opt->ok_to_ignore_lock) + if (!ok_to_ignore_lock) mutex_unlock(&opt->lock); if (wake_result_ready_queue) From 5257d80e22bf27009d6742e4c174f42cfe54e425 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Tue, 18 Mar 2025 17:52:09 -0500 Subject: [PATCH 0200/2924] iio: adc: ad7606: check for NULL before calling sw_mode_config() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check that the sw_mode_config function pointer is not NULL before calling it. Not all buses define this callback, which resulted in a NULL pointer dereference. Fixes: e571c1902116 ("iio: adc: ad7606: move scale_setup as function pointer on chip-info") Reviewed-by: Nuno Sá Signed-off-by: David Lechner Link: https://patch.msgid.link/20250318-iio-adc-ad7606-improvements-v2-1-4b605427774c@baylibre.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7606.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/ad7606.c b/drivers/iio/adc/ad7606.c index 1a314fddd7eb98..703556eb7257ea 100644 --- a/drivers/iio/adc/ad7606.c +++ b/drivers/iio/adc/ad7606.c @@ -1236,9 +1236,11 @@ static int ad7616_sw_mode_setup(struct iio_dev *indio_dev) st->write_scale = ad7616_write_scale_sw; st->write_os = &ad7616_write_os_sw; - ret = st->bops->sw_mode_config(indio_dev); - if (ret) - return ret; + if (st->bops->sw_mode_config) { + ret = st->bops->sw_mode_config(indio_dev); + if (ret) + return ret; + } /* Activate Burst mode and SEQEN MODE */ return ad7606_write_mask(st, AD7616_CONFIGURATION_REGISTER, @@ -1268,6 +1270,9 @@ static int ad7606b_sw_mode_setup(struct iio_dev *indio_dev) st->write_scale = ad7606_write_scale_sw; st->write_os = &ad7606_write_os_sw; + if (!st->bops->sw_mode_config) + return 0; + return st->bops->sw_mode_config(indio_dev); } From 83ded7cfaccccd2f4041769c313b58b4c9e265ad Mon Sep 17 00:00:00 2001 From: Zhang Lixu Date: Mon, 31 Mar 2025 13:50:20 +0800 Subject: [PATCH 0201/2924] iio: hid-sensor-prox: Restore lost scale assignments The variables `scale_pre_decml`, `scale_post_decml`, and `scale_precision` were assigned in commit d68c592e02f6 ("iio: hid-sensor-prox: Fix scale not correct issue"), but due to a merge conflict in commit 9c15db92a8e5 ("Merge tag 'iio-for-5.13a' of https://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio into staging-next"), these assignments were lost. Add back lost assignments and replace `st->prox_attr` with `st->prox_attr[0]` because commit 596ef5cf654b ("iio: hid-sensor-prox: Add support for more channels") changed `prox_attr` to an array. Cc: stable@vger.kernel.org # 5.13+ Fixes: 9c15db92a8e5 ("Merge tag 'iio-for-5.13a' of https://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio into staging-next") Signed-off-by: Zhang Lixu Acked-by: Srinivas Pandruvada Link: https://patch.msgid.link/20250331055022.1149736-2-lixu.zhang@intel.com Signed-off-by: Jonathan Cameron --- drivers/iio/light/hid-sensor-prox.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/iio/light/hid-sensor-prox.c b/drivers/iio/light/hid-sensor-prox.c index 76b76d12b38822..1dc6fb7cf61469 100644 --- a/drivers/iio/light/hid-sensor-prox.c +++ b/drivers/iio/light/hid-sensor-prox.c @@ -257,6 +257,11 @@ static int prox_parse_report(struct platform_device *pdev, st->num_channels = index; + st->scale_precision = hid_sensor_format_scale(hsdev->usage, + &st->prox_attr[0], + &st->scale_pre_decml, + &st->scale_post_decml); + return 0; } From 8b518cdb03f5f6e06d635cbfd9583d1fdbb39bfd Mon Sep 17 00:00:00 2001 From: Zhang Lixu Date: Mon, 31 Mar 2025 13:50:21 +0800 Subject: [PATCH 0202/2924] iio: hid-sensor-prox: support multi-channel SCALE calculation With the introduction of multi-channel support in commit 596ef5cf654b ("iio: hid-sensor-prox: Add support for more channels"), each channel requires an independent SCALE calculation, but the existing code only calculates SCALE for a single channel. Addresses the problem by modifying the driver to perform independent SCALE calculations for each channel. Cc: stable@vger.kernel.org Fixes: 596ef5cf654b ("iio: hid-sensor-prox: Add support for more channels") Signed-off-by: Zhang Lixu Acked-by: Srinivas Pandruvada Link: https://patch.msgid.link/20250331055022.1149736-3-lixu.zhang@intel.com Signed-off-by: Jonathan Cameron --- .../hid-sensors/hid-sensor-attributes.c | 4 ++++ drivers/iio/light/hid-sensor-prox.c | 24 ++++++++++--------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c index ad1882f608c0a2..2055a03cbeb187 100644 --- a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c +++ b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c @@ -66,6 +66,10 @@ static struct { {HID_USAGE_SENSOR_HUMIDITY, 0, 1000, 0}, {HID_USAGE_SENSOR_HINGE, 0, 0, 17453293}, {HID_USAGE_SENSOR_HINGE, HID_USAGE_SENSOR_UNITS_DEGREES, 0, 17453293}, + + {HID_USAGE_SENSOR_HUMAN_PRESENCE, 0, 1, 0}, + {HID_USAGE_SENSOR_HUMAN_PROXIMITY, 0, 1, 0}, + {HID_USAGE_SENSOR_HUMAN_ATTENTION, 0, 1, 0}, }; static void simple_div(int dividend, int divisor, int *whole, diff --git a/drivers/iio/light/hid-sensor-prox.c b/drivers/iio/light/hid-sensor-prox.c index 1dc6fb7cf61469..941508e58286c9 100644 --- a/drivers/iio/light/hid-sensor-prox.c +++ b/drivers/iio/light/hid-sensor-prox.c @@ -34,9 +34,9 @@ struct prox_state { struct iio_chan_spec channels[MAX_CHANNELS]; u32 channel2usage[MAX_CHANNELS]; u32 human_presence[MAX_CHANNELS]; - int scale_pre_decml; - int scale_post_decml; - int scale_precision; + int scale_pre_decml[MAX_CHANNELS]; + int scale_post_decml[MAX_CHANNELS]; + int scale_precision[MAX_CHANNELS]; unsigned long scan_mask[2]; /* One entry plus one terminator. */ int num_channels; }; @@ -116,9 +116,12 @@ static int prox_read_raw(struct iio_dev *indio_dev, ret_type = IIO_VAL_INT; break; case IIO_CHAN_INFO_SCALE: - *val = prox_state->scale_pre_decml; - *val2 = prox_state->scale_post_decml; - ret_type = prox_state->scale_precision; + if (chan->scan_index >= prox_state->num_channels) + return -EINVAL; + + *val = prox_state->scale_pre_decml[chan->scan_index]; + *val2 = prox_state->scale_post_decml[chan->scan_index]; + ret_type = prox_state->scale_precision[chan->scan_index]; break; case IIO_CHAN_INFO_OFFSET: *val = hid_sensor_convert_exponent( @@ -249,6 +252,10 @@ static int prox_parse_report(struct platform_device *pdev, st->prox_attr[index].size); dev_dbg(&pdev->dev, "prox %x:%x\n", st->prox_attr[index].index, st->prox_attr[index].report_id); + st->scale_precision[index] = + hid_sensor_format_scale(usage_id, &st->prox_attr[index], + &st->scale_pre_decml[index], + &st->scale_post_decml[index]); index++; } @@ -257,11 +264,6 @@ static int prox_parse_report(struct platform_device *pdev, st->num_channels = index; - st->scale_precision = hid_sensor_format_scale(hsdev->usage, - &st->prox_attr[0], - &st->scale_pre_decml, - &st->scale_post_decml); - return 0; } From 79dabbd505210e41c88060806c92c052496dd61c Mon Sep 17 00:00:00 2001 From: Zhang Lixu Date: Mon, 31 Mar 2025 13:50:22 +0800 Subject: [PATCH 0203/2924] iio: hid-sensor-prox: Fix incorrect OFFSET calculation The OFFSET calculation in the prox_read_raw() was incorrectly using the unit exponent, which is intended for SCALE calculations. Remove the incorrect OFFSET calculation and set it to a fixed value of 0. Cc: stable@vger.kernel.org Fixes: 39a3a0138f61 ("iio: hid-sensors: Added Proximity Sensor Driver") Signed-off-by: Zhang Lixu Acked-by: Srinivas Pandruvada Link: https://patch.msgid.link/20250331055022.1149736-4-lixu.zhang@intel.com Signed-off-by: Jonathan Cameron --- drivers/iio/light/hid-sensor-prox.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iio/light/hid-sensor-prox.c b/drivers/iio/light/hid-sensor-prox.c index 941508e58286c9..4c65b32d34ce41 100644 --- a/drivers/iio/light/hid-sensor-prox.c +++ b/drivers/iio/light/hid-sensor-prox.c @@ -124,8 +124,7 @@ static int prox_read_raw(struct iio_dev *indio_dev, ret_type = prox_state->scale_precision[chan->scan_index]; break; case IIO_CHAN_INFO_OFFSET: - *val = hid_sensor_convert_exponent( - prox_state->prox_attr[chan->scan_index].unit_expo); + *val = 0; ret_type = IIO_VAL_INT; break; case IIO_CHAN_INFO_SAMP_FREQ: From 2d7b60f33da324abe7824037b4829ff7df70e435 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Wed, 2 Apr 2025 18:55:58 -0500 Subject: [PATCH 0204/2924] iio: adc: ad7380: fix event threshold shift Add required bit shift to the event threshold read function to get correct scaling. When alert support was added, the write function correctly included the required shift needed to convert the threshold register value to the same scale as the raw ADC value. However, the shift got missed in the read function. Fixes: 27d1a4dbe1e1 ("iio: adc: ad7380: add alert support") Signed-off-by: David Lechner Reviewed-by: Julien Stephan Link: https://patch.msgid.link/20250402-iio-adc-ad7380-fix-event-threshold-shift-v1-1-ad4975c296b2@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7380.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/iio/adc/ad7380.c b/drivers/iio/adc/ad7380.c index a2b41980c942e4..aef85093eb16cb 100644 --- a/drivers/iio/adc/ad7380.c +++ b/drivers/iio/adc/ad7380.c @@ -1610,11 +1610,25 @@ static int ad7380_write_event_config(struct iio_dev *indio_dev, return ret; } -static int ad7380_get_alert_th(struct ad7380_state *st, +static int ad7380_get_alert_th(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, enum iio_event_direction dir, int *val) { - int ret, tmp; + struct ad7380_state *st = iio_priv(indio_dev); + const struct iio_scan_type *scan_type; + int ret, tmp, shift; + + scan_type = iio_get_current_scan_type(indio_dev, chan); + if (IS_ERR(scan_type)) + return PTR_ERR(scan_type); + + /* + * The register value is 12-bits and is compared to the most significant + * bits of raw value, therefore a shift is required to convert this to + * the same scale as the raw value. + */ + shift = scan_type->realbits - 12; switch (dir) { case IIO_EV_DIR_RISING: @@ -1624,7 +1638,7 @@ static int ad7380_get_alert_th(struct ad7380_state *st, if (ret) return ret; - *val = FIELD_GET(AD7380_ALERT_HIGH_TH, tmp); + *val = FIELD_GET(AD7380_ALERT_HIGH_TH, tmp) << shift; return IIO_VAL_INT; case IIO_EV_DIR_FALLING: ret = regmap_read(st->regmap, @@ -1633,7 +1647,7 @@ static int ad7380_get_alert_th(struct ad7380_state *st, if (ret) return ret; - *val = FIELD_GET(AD7380_ALERT_LOW_TH, tmp); + *val = FIELD_GET(AD7380_ALERT_LOW_TH, tmp) << shift; return IIO_VAL_INT; default: return -EINVAL; @@ -1647,7 +1661,6 @@ static int ad7380_read_event_value(struct iio_dev *indio_dev, enum iio_event_info info, int *val, int *val2) { - struct ad7380_state *st = iio_priv(indio_dev); int ret; switch (info) { @@ -1655,7 +1668,7 @@ static int ad7380_read_event_value(struct iio_dev *indio_dev, if (!iio_device_claim_direct(indio_dev)) return -EBUSY; - ret = ad7380_get_alert_th(st, dir, val); + ret = ad7380_get_alert_th(indio_dev, chan, dir, val); iio_device_release_direct(indio_dev); return ret; From 4408b59eeacfea777aae397177f49748cadde5ce Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 27 Mar 2025 17:53:32 +0800 Subject: [PATCH 0205/2924] drm/amd/display: Protect FPU in dml21_copy() Commit 7da55c27e76749b9 ("drm/amd/display: Remove incorrect FP context start") removes the FP context protection of dml2_create(), and it said "All the DC_FP_START/END should be used before call anything from DML2". However, dml21_copy() are not protected from their callers, causing such errors: do_fpu invoked from kernel context![#1]: CPU: 0 UID: 0 PID: 240 Comm: kworker/0:5 Not tainted 6.14.0-rc6+ #1 Workqueue: events work_for_cpu_fn pc ffff80000318bd2c ra ffff80000315750c tp 9000000105910000 sp 9000000105913810 a0 0000000000000000 a1 0000000000000002 a2 900000013140d728 a3 900000013140d720 a4 0000000000000000 a5 9000000131592d98 a6 0000000000017ae8 a7 00000000001312d0 t0 9000000130751ff0 t1 ffff800003790000 t2 ffff800003790000 t3 9000000131592e28 t4 000000000004c6a8 t5 00000000001b7740 t6 0000000000023e38 t7 0000000000249f00 t8 0000000000000002 u0 0000000000000000 s9 900000012b010000 s0 9000000131400000 s1 9000000130751fd8 s2 ffff800003408000 s3 9000000130752c78 s4 9000000131592da8 s5 9000000131592120 s6 9000000130751ff0 s7 9000000131592e28 s8 9000000131400008 ra: ffff80000315750c dml2_top_soc15_initialize_instance+0x20c/0x300 [amdgpu] ERA: ffff80000318bd2c mcg_dcn4_build_min_clock_table+0x14c/0x600 [amdgpu] CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) PRMD: 00000004 (PPLV0 +PIE -PWE) EUEN: 00000000 (-FPE -SXE -ASXE -BTE) ECFG: 00071c1d (LIE=0,2-4,10-12 VS=7) ESTAT: 000f0000 [FPD] (IS= ECode=15 EsubCode=0) PRID: 0014d010 (Loongson-64bit, Loongson-3C6000/S) Process kworker/0:5 (pid: 240, threadinfo=00000000f1700428, task=0000000020d2e962) Stack : 0000000000000000 0000000000000000 0000000000000000 9000000130751fd8 9000000131400000 ffff8000031574e0 9000000130751ff0 0000000000000000 9000000131592e28 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 f9175936df5d7fd2 900000012b00ff08 900000012b000000 ffff800003409000 ffff8000034a1780 90000001019634c0 900000012b000010 90000001307beeb8 90000001306b0000 0000000000000001 ffff8000031942b4 9000000130780000 90000001306c0000 9000000130780000 ffff8000031c276c 900000012b044bd0 ffff800003408000 ... Call Trace: [] mcg_dcn4_build_min_clock_table+0x14c/0x600 [amdgpu] [] dml2_top_soc15_initialize_instance+0x208/0x300 [amdgpu] [] dml21_create_copy+0x30/0x60 [amdgpu] [] dc_state_create_copy+0x68/0xe0 [amdgpu] [] amdgpu_dm_init+0x8c0/0x2060 [amdgpu] [] dm_hw_init+0x18/0x60 [amdgpu] [] amdgpu_device_init+0x1938/0x27e0 [amdgpu] [] amdgpu_driver_load_kms+0x20/0xa0 [amdgpu] [] amdgpu_pci_probe+0x1b0/0x580 [amdgpu] [<9000000003c7eae4>] local_pci_probe+0x44/0xc0 [<90000000032f2b18>] work_for_cpu_fn+0x18/0x40 [<90000000032f5da0>] process_one_work+0x160/0x300 [<90000000032f6718>] worker_thread+0x318/0x440 [<9000000003301b8c>] kthread+0x12c/0x220 [<90000000032b1484>] ret_from_kernel_thread+0x8/0xa4 Unfortunately, protecting dml21_copy() out of DML2 causes "sleeping function called from invalid context", so protect them with DC_FP_START() and DC_FP_END() inside. Fixes: 7da55c27e767 ("drm/amd/display: Remove incorrect FP context start") Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c index be54f0e696ce28..01e57267be9830 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c @@ -426,8 +426,12 @@ void dml21_copy(struct dml2_context *dst_dml_ctx, dst_dml_ctx->v21.mode_programming.programming = dst_dml2_programming; + DC_FP_START(); + /* need to initialize copied instance for internal references to be correct */ dml2_initialize_instance(&dst_dml_ctx->v21.dml_init); + + DC_FP_END(); } bool dml21_create_copy(struct dml2_context **dst_dml_ctx, From afcdf51d97cd58dd7a2e0aa8acbaea5108fa6826 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 27 Mar 2025 17:53:33 +0800 Subject: [PATCH 0206/2924] drm/amd/display: Protect FPU in dml2_init()/dml21_init() Commit 7da55c27e76749b9 ("drm/amd/display: Remove incorrect FP context start") removes the FP context protection of dml2_create(), and it said "All the DC_FP_START/END should be used before call anything from DML2". However, dml2_init()/dml21_init() are not protected from their callers, causing such errors: do_fpu invoked from kernel context![#1]: CPU: 0 UID: 0 PID: 239 Comm: kworker/0:5 Not tainted 6.14.0-rc6+ #2 Workqueue: events work_for_cpu_fn pc ffff80000319de80 ra ffff80000319de5c tp 900000010575c000 sp 900000010575f840 a0 0000000000000000 a1 900000012f210130 a2 900000012f000000 a3 ffff80000357e268 a4 ffff80000357e260 a5 900000012ea52cf0 a6 0000000400000004 a7 0000012c00001388 t0 00001900000015e0 t1 ffff80000379d000 t2 0000000010624dd3 t3 0000006400000014 t4 00000000000003e8 t5 0000005000000018 t6 0000000000000020 t7 0000000f00000064 t8 000000000000002f u0 5f5e9200f8901912 s9 900000012d380010 s0 900000012ea51fd8 s1 900000012f000000 s2 9000000109296000 s3 0000000000000001 s4 0000000000001fd8 s5 0000000000000001 s6 ffff800003415000 s7 900000012d390000 s8 ffff800003211f80 ra: ffff80000319de5c dml21_apply_soc_bb_overrides+0x3c/0x960 [amdgpu] ERA: ffff80000319de80 dml21_apply_soc_bb_overrides+0x60/0x960 [amdgpu] CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) PRMD: 00000004 (PPLV0 +PIE -PWE) EUEN: 00000000 (-FPE -SXE -ASXE -BTE) ECFG: 00071c1d (LIE=0,2-4,10-12 VS=7) ESTAT: 000f0000 [FPD] (IS= ECode=15 EsubCode=0) PRID: 0014d010 (Loongson-64bit, Loongson-3C6000/S) Process kworker/0:5 (pid: 239, threadinfo=00000000927eadc6, task=000000008fd31682) Stack : 00040dc000003164 0000000000000001 900000012f210130 900000012eabeeb8 900000012f000000 ffff80000319fe48 900000012f210000 900000012f210130 900000012f000000 900000012eabeeb8 0000000000000001 ffff8000031a0064 900000010575f9f0 900000012f210130 900000012eac0000 900000012ea80000 900000012f000000 ffff8000031cefc4 900000010575f9f0 ffff8000035859c0 ffff800003414000 900000010575fa78 900000012f000000 ffff8000031b4c50 0000000000000000 9000000101c9d700 9000000109c40000 5f5e9200f8901912 900000012d3c4bd0 900000012d3c5000 ffff8000034aed18 900000012d380010 900000012d3c4bd0 ffff800003414000 900000012d380000 ffff800002ea49dc 0000000000000001 900000012d3c6000 00000000ffffe423 0000000000010000 ... Call Trace: [] dml21_apply_soc_bb_overrides+0x60/0x960 [amdgpu] [] dml21_init+0xa4/0x280 [amdgpu] [] dml21_create+0x40/0x80 [amdgpu] [] dc_state_create+0x100/0x160 [amdgpu] [] dc_create+0x44c/0x640 [amdgpu] [] amdgpu_dm_init+0x3f8/0x2060 [amdgpu] [] dm_hw_init+0x18/0x60 [amdgpu] [] amdgpu_device_init+0x1938/0x27e0 [amdgpu] [] amdgpu_driver_load_kms+0x20/0xa0 [amdgpu] [] amdgpu_pci_probe+0x1b0/0x580 [amdgpu] [<900000000448eae4>] local_pci_probe+0x44/0xc0 [<9000000003b02b18>] work_for_cpu_fn+0x18/0x40 [<9000000003b05da0>] process_one_work+0x160/0x300 [<9000000003b06718>] worker_thread+0x318/0x440 [<9000000003b11b8c>] kthread+0x12c/0x220 [<9000000003ac1484>] ret_from_kernel_thread+0x8/0xa4 Unfortunately, protecting dml2_init()/dml21_init() out of DML2 causes "sleeping function called from invalid context", so protect them with DC_FP_START() and DC_FP_END() inside. Fixes: 7da55c27e767 ("drm/amd/display: Remove incorrect FP context start") Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c | 4 ++++ drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c index 01e57267be9830..987ce25e4b7b43 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c @@ -86,6 +86,8 @@ static void dml21_init(const struct dc *in_dc, struct dml2_context **dml_ctx, co /* Store configuration options */ (*dml_ctx)->config = *config; + DC_FP_START(); + /*Initialize SOCBB and DCNIP params */ dml21_initialize_soc_bb_params(&(*dml_ctx)->v21.dml_init, config, in_dc); dml21_initialize_ip_params(&(*dml_ctx)->v21.dml_init, config, in_dc); @@ -96,6 +98,8 @@ static void dml21_init(const struct dc *in_dc, struct dml2_context **dml_ctx, co /*Initialize DML21 instance */ dml2_initialize_instance(&(*dml_ctx)->v21.dml_init); + + DC_FP_END(); } bool dml21_create(const struct dc *in_dc, struct dml2_context **dml_ctx, const struct dml2_configuration_options *config) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index 939ee0708bd23b..6e68a4ecfd2a57 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -779,11 +779,15 @@ static void dml2_init(const struct dc *in_dc, const struct dml2_configuration_op break; } + DC_FP_START(); + initialize_dml2_ip_params(*dml2, in_dc, &(*dml2)->v20.dml_core_ctx.ip); initialize_dml2_soc_bbox(*dml2, in_dc, &(*dml2)->v20.dml_core_ctx.soc); initialize_dml2_soc_states(*dml2, in_dc, &(*dml2)->v20.dml_core_ctx.soc, &(*dml2)->v20.dml_core_ctx.states); + + DC_FP_END(); } bool dml2_create(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) From 366e77cd4923c3aa45341e15dcaf3377af9b042f Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 27 Mar 2025 17:53:34 +0800 Subject: [PATCH 0207/2924] drm/amd/display: Protect FPU in dml2_validate()/dml21_validate() Commit 7da55c27e76749b9 ("drm/amd/display: Remove incorrect FP context start") removes the FP context protection of dml2_create(), and it said "All the DC_FP_START/END should be used before call anything from DML2". However, dml2_validate()/dml21_validate() are not protected from their callers, causing such errors: do_fpu invoked from kernel context![#1]: CPU: 10 UID: 0 PID: 331 Comm: kworker/10:1H Not tainted 6.14.0-rc6+ #4 Workqueue: events_highpri dm_irq_work_func [amdgpu] pc ffff800003191eb0 ra ffff800003191e60 tp 9000000107a94000 sp 9000000107a975b0 a0 9000000140ce4910 a1 0000000000000000 a2 9000000140ce49b0 a3 9000000140ce49a8 a4 9000000140ce49a8 a5 0000000100000000 a6 0000000000000001 a7 9000000107a97660 t0 ffff800003790000 t1 9000000140ce5000 t2 0000000000000001 t3 0000000000000000 t4 0000000000000004 t5 0000000000000000 t6 0000000000000000 t7 0000000000000000 t8 0000000100000000 u0 ffff8000031a3b9c s9 9000000130bc0000 s0 9000000132400000 s1 9000000140ec0000 s2 9000000132400000 s3 9000000140ce0000 s4 90000000057f8b88 s5 9000000140ec0000 s6 9000000140ce4910 s7 0000000000000001 s8 9000000130d45010 ra: ffff800003191e60 dml21_map_dc_state_into_dml_display_cfg+0x40/0x1140 [amdgpu] ERA: ffff800003191eb0 dml21_map_dc_state_into_dml_display_cfg+0x90/0x1140 [amdgpu] CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) PRMD: 00000004 (PPLV0 +PIE -PWE) EUEN: 00000000 (-FPE -SXE -ASXE -BTE) ECFG: 00071c1d (LIE=0,2-4,10-12 VS=7) ESTAT: 000f0000 [FPD] (IS= ECode=15 EsubCode=0) PRID: 0014d010 (Loongson-64bit, Loongson-3C6000/S) Process kworker/10:1H (pid: 331, threadinfo=000000007bf9ddb0, task=00000000cc4ab9f3) Stack : 0000000100000000 0000043800000780 0000000100000001 0000000100000001 0000000000000000 0000078000000000 0000000000000438 0000078000000000 0000000000000438 0000078000000000 0000000000000438 0000000100000000 0000000100000000 0000000100000000 0000000100000000 0000000100000000 0000000000000001 9000000140ec0000 9000000132400000 9000000132400000 ffff800003408000 ffff800003408000 9000000132400000 9000000140ce0000 9000000140ce0000 ffff800003193850 0000000000000001 9000000140ec0000 9000000132400000 9000000140ec0860 9000000140ec0738 0000000000000001 90000001405e8000 9000000130bc0000 9000000140ec02a8 ffff8000031b5db8 0000000000000000 0000043800000780 0000000000000003 ffff8000031b79cc ... Call Trace: [] dml21_map_dc_state_into_dml_display_cfg+0x90/0x1140 [amdgpu] [] dml21_validate+0xcc/0x520 [amdgpu] [] dc_validate_global_state+0x2e8/0x460 [amdgpu] [] create_validate_stream_for_sink+0x3d4/0x420 [amdgpu] [] amdgpu_dm_connector_mode_valid+0x64/0x240 [amdgpu] [<900000000441d6b8>] drm_connector_mode_valid+0x38/0x80 [<900000000441d824>] __drm_helper_update_and_validate+0x124/0x3e0 [<900000000441ddc0>] drm_helper_probe_single_connector_modes+0x2e0/0x620 [<90000000044050dc>] drm_client_modeset_probe+0x23c/0x1780 [<9000000004420384>] __drm_fb_helper_initial_config_and_unlock+0x44/0x5a0 [<9000000004403acc>] drm_client_dev_hotplug+0xcc/0x140 [] handle_hpd_irq_helper+0x1b0/0x1e0 [amdgpu] [<90000000038f5da0>] process_one_work+0x160/0x300 [<90000000038f6718>] worker_thread+0x318/0x440 [<9000000003901b8c>] kthread+0x12c/0x220 [<90000000038b1484>] ret_from_kernel_thread+0x8/0xa4 Unfortunately, protecting dml2_validate()/dml21_validate() out of DML2 causes "sleeping function called from invalid context", so protect them with DC_FP_START() and DC_FP_END() inside. Fixes: 7da55c27e767 ("drm/amd/display: Remove incorrect FP context start") Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen Tested-by: Dongyan Qian Reviewed-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c | 9 +++++++-- drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c | 5 +++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c index 987ce25e4b7b43..94e99e540691cf 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c @@ -287,11 +287,16 @@ bool dml21_validate(const struct dc *in_dc, struct dc_state *context, struct dml { bool out = false; + DC_FP_START(); + /* Use dml_validate_only for fast_validate path */ - if (fast_validate) { + if (fast_validate) out = dml21_check_mode_support(in_dc, context, dml_ctx); - } else + else out = dml21_mode_check_and_programming(in_dc, context, dml_ctx); + + DC_FP_END(); + return out; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index 6e68a4ecfd2a57..f549a778f6f1a9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -732,11 +732,16 @@ bool dml2_validate(const struct dc *in_dc, struct dc_state *context, struct dml2 return out; } + DC_FP_START(); + /* Use dml_validate_only for fast_validate path */ if (fast_validate) out = dml2_validate_only(context); else out = dml2_validate_and_build_resource(in_dc, context); + + DC_FP_END(); + return out; } From 139e99d58e373bd11f085766e681d21d34d0b097 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 6 Mar 2025 11:29:20 -0600 Subject: [PATCH 0208/2924] drm/amd/display: Add HP Probook 445 and 465 to the quirk list for eDP on DP1 [Why] HP Probook 445 and 465 has DP0 and DP1 swapped. [How] Add HP Probook 445 and 465 to DP0/DP1 swap quirk list. Cc: stable@vger.kernel.org Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3995 Reviewed-by: Alex Hung Signed-off-by: Mario Limonciello Signed-off-by: Roman Li Tested-by: Anson Tsao Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d0d8ad5368c3f3..11dde6f297c744 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1729,6 +1729,20 @@ static const struct dmi_system_id dmi_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "HP EliteBook 665 16 inch G11 Notebook PC"), }, }, + { + .callback = edp0_on_dp1_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook 445 14 inch G11 Notebook PC"), + }, + }, + { + .callback = edp0_on_dp1_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook 465 16 inch G11 Notebook PC"), + }, + }, {} /* TODO: refactor this from a fixed table to a dynamic option */ }; From 1c5fdef30ed120613e769a3bd2a144cfd4c688d6 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 27 Mar 2025 14:07:55 -0500 Subject: [PATCH 0209/2924] drm/amd/display: Add HP Elitebook 645 to the quirk list for eDP on DP1 [Why] HP Elitebook 645 has DP0 and DP1 swapped. [How] Add HP Elitebook 645 to DP0/DP1 swap quirk list. Cc: stable@vger.kernel.org Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3701 Reviewed-by: Alex Hung Signed-off-by: Mario Limonciello Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 11dde6f297c744..9fed4471405f89 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1722,6 +1722,13 @@ static const struct dmi_system_id dmi_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "HP Elite mt645 G8 Mobile Thin Client"), }, }, + { + .callback = edp0_on_dp1_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP EliteBook 645 14 inch G11 Notebook PC"), + }, + }, { .callback = edp0_on_dp1_callback, .matches = { From b3862d60b1a8b6face673c820dccdd9c449563cc Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Thu, 27 Mar 2025 11:50:42 -0400 Subject: [PATCH 0210/2924] drm/amdkfd: limit sdma queue reset caps flagging for gfx9 ASICs post GFX 9 are being flagged as SDMA per queue reset supported in the KGD but KFD and scheduler FW currently have no support. Limit SDMA queue reset capabilities to GFX 9. Fixes: ceb7114c961b ("drm/amdkfd: flag per-sdma queue reset supported to user space") Signed-off-by: Jonathan Kim Reviewed-by: David Belanger Reviewed-by: Harish Kasiviswanathan Reviewed-by: Jesse Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 2c4711c67d8aa8..9bbee484d57cc4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1983,9 +1983,6 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev) if (kfd_dbg_has_ttmps_always_setup(dev->gpu)) dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID; - if (dev->gpu->adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE) - dev->node_props.capability2 |= HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED; - if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) { if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3) || KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 4)) @@ -2003,6 +2000,9 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev) if (!amdgpu_sriov_vf(dev->gpu->adev)) dev->node_props.capability |= HSA_CAP_PER_QUEUE_RESET_SUPPORTED; + + if (dev->gpu->adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE) + dev->node_props.capability2 |= HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED; } else { dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 | HSA_DBG_WATCH_ADDR_MASK_HI_BIT; From 3666ed821832f42baaf25f362680dda603cde732 Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Fri, 21 Mar 2025 13:19:05 -0500 Subject: [PATCH 0211/2924] drm/amdgpu: Increase KIQ invalidate_tlbs timeout KIQ invalidate_tlbs request has been seen to marginally exceed the configured 100 ms timeout on systems under load. All other KIQ requests in the driver use a 10 second timeout. Use a similar timeout implementation on the invalidate_tlbs path. v2: Poll once before msleep v3: Fix return value Signed-off-by: Jay Cornwall Cc: Kent Russell Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 19 ++++++++++++++----- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6d83ccfa42eeb0..2c04ae13384845 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -353,7 +353,6 @@ enum amdgpu_kiq_irq { AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0, AMDGPU_CP_KIQ_IRQ_LAST }; -#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */ #define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ #define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ #define MAX_KIQ_REG_TRY 1000 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 464625282872aa..ecb74ccf1d9081 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -699,12 +699,10 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, uint32_t flush_type, bool all_hub, uint32_t inst) { - u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : - adev->usec_timeout; struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring; struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst]; unsigned int ndw; - int r; + int r, cnt = 0; uint32_t seq; /* @@ -761,10 +759,21 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq[inst].ring_lock); - if (amdgpu_fence_wait_polling(ring, seq, usec_timeout) < 1) { + + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + + might_sleep(); + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY && + !amdgpu_reset_pending(adev->reset_domain)) { + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + } + + if (cnt > MAX_KIQ_REG_TRY) { dev_err(adev->dev, "timeout waiting for kiq fence\n"); r = -ETIME; - } + } else + r = 0; } error_unlock_reset: From 1b5447d773d461b670f29af7c5a9091cff915259 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 27 Mar 2025 20:51:28 +0100 Subject: [PATCH 0212/2924] drm/amdgpu: Add cgroups implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similar to xe, enable some simple management of VRAM only. Reviewed-by: Christian König Co-developed-by: Maxime Ripard Signed-off-by: Maxime Ripard Signed-off-by: Maarten Lankhorst Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 6da8994e0469af..2d7f82e98df92c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -24,6 +24,7 @@ #include #include +#include #include "amdgpu.h" #include "amdgpu_vm.h" @@ -907,6 +908,9 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev) struct ttm_resource_manager *man = &mgr->manager; int err; + man->cg = drmm_cgroup_register_region(adev_to_drm(adev), "vram", adev->gmc.real_vram_size); + if (IS_ERR(man->cg)) + return PTR_ERR(man->cg); ttm_resource_manager_init(man, &adev->mman.bdev, adev->gmc.real_vram_size); From f5e7fabd1f5c65b2e077efcdb118cfa67eae7311 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 9 Jan 2025 11:57:56 -0500 Subject: [PATCH 0213/2924] drm/amdgpu: allow pinning DMA-bufs into VRAM if all importers can do P2P MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Try pinning into VRAM to allow P2P with RDMA NICs without ODP support if all attachments can do P2P. If any attachment can't do P2P just pin into GTT instead. Acked-by: Simona Vetter Signed-off-by: Christian König Signed-off-by: Felix Kuehling Reviewed-by: Felix Kuehling Tested-by: Pak Nin Lui Cc: Simona Vetter Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 25 +++++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index c9842a0e2a1cd4..667080cc9ae1c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -75,11 +75,25 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, */ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach) { - struct drm_gem_object *obj = attach->dmabuf->priv; - struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + struct dma_buf *dmabuf = attach->dmabuf; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(dmabuf->priv); + u32 domains = bo->preferred_domains; - /* pin buffer into GTT */ - return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); + dma_resv_assert_held(dmabuf->resv); + + /* + * Try pinning into VRAM to allow P2P with RDMA NICs without ODP + * support if all attachments can do P2P. If any attachment can't do + * P2P just pin into GTT instead. + */ + list_for_each_entry(attach, &dmabuf->attachments, node) + if (!attach->peer2peer) + domains &= ~AMDGPU_GEM_DOMAIN_VRAM; + + if (domains & AMDGPU_GEM_DOMAIN_VRAM) + bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + + return amdgpu_bo_pin(bo, domains); } /** @@ -134,9 +148,6 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) return ERR_PTR(r); - - } else if (bo->tbo.resource->mem_type != TTM_PL_TT) { - return ERR_PTR(-EBUSY); } switch (bo->tbo.resource->mem_type) { From 5529df92b8e8cbb4b14a226665888f74648260ad Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 25 Mar 2025 15:47:10 -0700 Subject: [PATCH 0214/2924] drm/xe/bmg: Add one additional PCI ID One additional BMG PCI ID has been added to the spec; make sure our driver recognizes devices with this ID properly. Bspec: 68090 Cc: stable@vger.kernel.org # v6.12+ Reviewed-by: Clint Taylor Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250325224709.4073080-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper (cherry picked from commit cca9734ebe55f6af11ce8d57ca1afdc4d158c808) Signed-off-by: Lucas De Marchi --- include/drm/intel/pciids.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h index 4736ea525048e4..d212848d07f3fe 100644 --- a/include/drm/intel/pciids.h +++ b/include/drm/intel/pciids.h @@ -850,6 +850,7 @@ MACRO__(0xE20C, ## __VA_ARGS__), \ MACRO__(0xE20D, ## __VA_ARGS__), \ MACRO__(0xE210, ## __VA_ARGS__), \ + MACRO__(0xE211, ## __VA_ARGS__), \ MACRO__(0xE212, ## __VA_ARGS__), \ MACRO__(0xE215, ## __VA_ARGS__), \ MACRO__(0xE216, ## __VA_ARGS__) From 1d8c0557927e6d69cb6341b7792f882412cfccff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 26 Mar 2025 09:05:48 +0100 Subject: [PATCH 0215/2924] drm/xe/svm: Fix a potential bo UAF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If drm_gpusvm_migrate_to_devmem() succeeds, if a cpu access happens to the range the bo may be freed before xe_bo_unlock(), causing a UAF. Since the reference is transferred, use xe_svm_devmem_release() to release the reference on drm_gpusvm_migrate_to_devmem() failure, and hold a local reference to protect the UAF. Fixes: 2f118c949160 ("drm/xe: Add SVM VRAM migration") Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250326080551.40201-3-thomas.hellstrom@linux.intel.com (cherry picked from commit c9db07cab766b665c8fa1184649cef452f448dc8) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_svm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 3e829c87d7b45a..f8c128524d9f33 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -696,11 +696,14 @@ static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, list_for_each_entry(block, blocks, link) block->private = vr; + xe_bo_get(bo); err = drm_gpusvm_migrate_to_devmem(&vm->svm.gpusvm, &range->base, &bo->devmem_allocation, ctx); - xe_bo_unlock(bo); if (err) - xe_bo_put(bo); /* Creation ref */ + xe_svm_devmem_release(&bo->devmem_allocation); + + xe_bo_unlock(bo); + xe_bo_put(bo); unlock: mmap_read_unlock(mm); From 7bcfeddb36b77f9fe3b010bb0b282b7618420bba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 26 Mar 2025 16:16:34 +0100 Subject: [PATCH 0216/2924] drm/xe: Fix an out-of-bounds shift when invalidating TLB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the size of the range invalidated is larger than rounddown_pow_of_two(ULONG_MAX), The function macro roundup_pow_of_two(length) will hit an out-of-bounds shift [1]. Use a full TLB invalidation for such cases. v2: - Use a define for the range size limit over which we use a full TLB invalidation. (Lucas) - Use a better calculation of the limit. [1]: [ 39.202421] ------------[ cut here ]------------ [ 39.202657] UBSAN: shift-out-of-bounds in ./include/linux/log2.h:57:13 [ 39.202673] shift exponent 64 is too large for 64-bit type 'long unsigned int' [ 39.202688] CPU: 8 UID: 0 PID: 3129 Comm: xe_exec_system_ Tainted: G U 6.14.0+ #10 [ 39.202690] Tainted: [U]=USER [ 39.202690] Hardware name: ASUS System Product Name/PRIME B560M-A AC, BIOS 2001 02/01/2023 [ 39.202691] Call Trace: [ 39.202692] [ 39.202695] dump_stack_lvl+0x6e/0xa0 [ 39.202699] ubsan_epilogue+0x5/0x30 [ 39.202701] __ubsan_handle_shift_out_of_bounds.cold+0x61/0xe6 [ 39.202705] xe_gt_tlb_invalidation_range.cold+0x1d/0x3a [xe] [ 39.202800] ? find_held_lock+0x2b/0x80 [ 39.202803] ? mark_held_locks+0x40/0x70 [ 39.202806] xe_svm_invalidate+0x459/0x700 [xe] [ 39.202897] drm_gpusvm_notifier_invalidate+0x4d/0x70 [drm_gpusvm] [ 39.202900] __mmu_notifier_release+0x1f5/0x270 [ 39.202905] exit_mmap+0x40e/0x450 [ 39.202912] __mmput+0x45/0x110 [ 39.202914] exit_mm+0xc5/0x130 [ 39.202916] do_exit+0x21c/0x500 [ 39.202918] ? lockdep_hardirqs_on_prepare+0xdb/0x190 [ 39.202920] do_group_exit+0x36/0xa0 [ 39.202922] get_signal+0x8f8/0x900 [ 39.202926] arch_do_signal_or_restart+0x35/0x100 [ 39.202930] syscall_exit_to_user_mode+0x1fc/0x290 [ 39.202932] do_syscall_64+0xa1/0x180 [ 39.202934] ? do_user_addr_fault+0x59f/0x8a0 [ 39.202937] ? lock_release+0xd2/0x2a0 [ 39.202939] ? do_user_addr_fault+0x5a9/0x8a0 [ 39.202942] ? trace_hardirqs_off+0x4b/0xc0 [ 39.202944] ? clear_bhb_loop+0x25/0x80 [ 39.202946] ? clear_bhb_loop+0x25/0x80 [ 39.202947] ? clear_bhb_loop+0x25/0x80 [ 39.202950] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 39.202952] RIP: 0033:0x7fa945e543e1 [ 39.202961] Code: Unable to access opcode bytes at 0x7fa945e543b7. [ 39.202962] RSP: 002b:00007ffca8fb4170 EFLAGS: 00000293 [ 39.202963] RAX: 000000000000003d RBX: 0000000000000000 RCX: 00007fa945e543e3 [ 39.202964] RDX: 0000000000000000 RSI: 00007ffca8fb41ac RDI: 00000000ffffffff [ 39.202964] RBP: 00007ffca8fb4190 R08: 0000000000000000 R09: 00007fa945f600a0 [ 39.202965] R10: 0000000000000000 R11: 0000000000000293 R12: 0000000000000000 [ 39.202966] R13: 00007fa9460dd310 R14: 00007ffca8fb41ac R15: 0000000000000000 [ 39.202970] [ 39.202970] ---[ end trace ]--- Fixes: 332dd0116c82 ("drm/xe: Add range based TLB invalidations") Cc: Matthew Brost Cc: Rodrigo Vivi Cc: # v6.8+ Signed-off-by: Thomas Hellström Reviewed-by: Lucas De Marchi #v1 Link: https://lore.kernel.org/r/20250326151634.36916-1-thomas.hellstrom@linux.intel.com (cherry picked from commit b88f48f86500bc0b44b4f73ac66d500a40d320ad) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 03072e0949917c..084cbdeba8eaa5 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -322,6 +322,13 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) return 0; } +/* + * Ensure that roundup_pow_of_two(length) doesn't overflow. + * Note that roundup_pow_of_two() operates on unsigned long, + * not on u64. + */ +#define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX)) + /** * xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an * address range @@ -346,6 +353,7 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, struct xe_device *xe = gt_to_xe(gt); #define MAX_TLB_INVALIDATION_LEN 7 u32 action[MAX_TLB_INVALIDATION_LEN]; + u64 length = end - start; int len = 0; xe_gt_assert(gt, fence); @@ -358,11 +366,11 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */ - if (!xe->info.has_range_tlb_invalidation) { + if (!xe->info.has_range_tlb_invalidation || + length > MAX_RANGE_TLB_INVALIDATION_LENGTH) { action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL); } else { u64 orig_start = start; - u64 length = end - start; u64 align; if (length < SZ_4K) From 262de94a3a7ef23c326534b3d9483602b7af841e Mon Sep 17 00:00:00 2001 From: Niranjana Vishwanathapura Date: Thu, 27 Mar 2025 11:56:04 -0700 Subject: [PATCH 0217/2924] drm/xe: Ensure fixed_slice_mode gets set after ccs_mode change The RCU_MODE_FIXED_SLICE_CCS_MODE setting is not getting invoked in the gt reset path after the ccs_mode setting by the user. Add it to engine register update list (in hw_engine_setup_default_state()) which ensures it gets set in the gt reset and engine reset paths. v2: Add register update to engine list to ensure it gets updated after engine reset also. Fixes: 0d97ecce16bd ("drm/xe: Enable Fixed CCS mode setting") Cc: stable@vger.kernel.org Signed-off-by: Niranjana Vishwanathapura Reviewed-by: Matt Roper Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20250327185604.18230-1-niranjana.vishwanathapura@intel.com (cherry picked from commit 12468e519f98e4d93370712e3607fab61df9dae9) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_hw_engine.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 8c05fd30b7df61..93241fd0a4ba3b 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -389,12 +389,6 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) blit_cctl_val, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, - /* Use Fixed slice CCS mode */ - { XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"), - XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)), - XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE, - RCU_MODE_FIXED_SLICE_CCS_MODE)) - }, /* Disable WMTP if HW doesn't support it */ { XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"), XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)), @@ -461,6 +455,12 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, + /* Use Fixed slice CCS mode */ + { XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"), + XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)), + XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE, + RCU_MODE_FIXED_SLICE_CCS_MODE)) + }, }; xe_rtp_process_to_sr(&ctx, engine_entries, ARRAY_SIZE(engine_entries), &hwe->reg_sr); From 00e0ae4f1f872800413c819f8a2a909dc29cdc35 Mon Sep 17 00:00:00 2001 From: Julia Filipchuk Date: Tue, 25 Mar 2025 15:43:05 -0700 Subject: [PATCH 0218/2924] drm/xe/xe3lpg: Apply Wa_14022293748, Wa_22019794406 Extend Wa_14022293748, Wa_22019794406 to Xe3_LPG Signed-off-by: Julia Filipchuk Reviewed-by: Tejas Upadhyay Signed-off-by: John Harrison Link: https://lore.kernel.org/r/20250325224310.1455499-1-julia.filipchuk@intel.com (cherry picked from commit 32af900f2c6b1846fd3ede8ad36dd180d7e4ae70) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_wa_oob.rules | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 0c738af24f7c53..9b9e176992a837 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -32,8 +32,10 @@ GRAPHICS_VERSION(3001) 14022293748 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) + GRAPHICS_VERSION_RANGE(3000, 3001) 22019794406 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) + GRAPHICS_VERSION_RANGE(3000, 3001) 22019338487 MEDIA_VERSION(2000) GRAPHICS_VERSION(2001) MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_not_sriov_vf) From 20659d3150f1a2a258a173fe011013178ff2a197 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 11 Mar 2025 11:29:15 -0700 Subject: [PATCH 0219/2924] drm/xe: Use local fence in error path of xe_migrate_clear The intent of the error path in xe_migrate_clear is to wait on locally generated fence and then return. The code is waiting on m->fence which could be the local fence but this is only stable under the job mutex leading to a possible UAF. Fix code to wait on local fence. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: stable@vger.kernel.org Signed-off-by: Matthew Brost Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20250311182915.3606291-1-matthew.brost@intel.com (cherry picked from commit 762b7e95362170b3e13a8704f38d5e47eca4ba74) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index df4282c71bf0bf..a83bebef3780f7 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1177,7 +1177,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, err_sync: /* Sync partial copies if any. FIXME: job_mutex? */ if (fence) { - dma_fence_wait(m->fence, false); + dma_fence_wait(fence, false); dma_fence_put(fence); } From a5c71fd5b69b9da77e5e0b268e69e256932ba49c Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Thu, 27 Mar 2025 17:56:47 +0530 Subject: [PATCH 0220/2924] drm/xe/hw_engine: define sysfs_ops on all directories Sysfs_ops needs to be defined on all directories which can have attr files with set/get method. Add sysfs_ops to even those directories which is currently empty but would have attr files with set/get method in future. Leave .default with default sysfs_ops as it will never have setter method. V2(Himal/Rodrigo): - use single sysfs_ops for all dir and attr with set/get - add default ops as ./default does not need runtime pm at all Fixes: 3f0e14651ab0 ("drm/xe: Runtime PM wake on every sysfs call") Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20250327122647.886637-1-tejas.upadhyay@intel.com Signed-off-by: Tejas Upadhyay (cherry picked from commit 40780b9760b561e093508d07b8b9b06c94ab201e) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c | 108 +++++++++--------- 1 file changed, 52 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c index b53e8d2accdbd7..a440442b4d7270 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c @@ -32,14 +32,61 @@ bool xe_hw_engine_timeout_in_range(u64 timeout, u64 min, u64 max) return timeout >= min && timeout <= max; } -static void kobj_xe_hw_engine_release(struct kobject *kobj) +static void xe_hw_engine_sysfs_kobj_release(struct kobject *kobj) { kfree(kobj); } +static ssize_t xe_hw_engine_class_sysfs_attr_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct xe_device *xe = kobj_to_xe(kobj); + struct kobj_attribute *kattr; + ssize_t ret = -EIO; + + kattr = container_of(attr, struct kobj_attribute, attr); + if (kattr->show) { + xe_pm_runtime_get(xe); + ret = kattr->show(kobj, kattr, buf); + xe_pm_runtime_put(xe); + } + + return ret; +} + +static ssize_t xe_hw_engine_class_sysfs_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, + size_t count) +{ + struct xe_device *xe = kobj_to_xe(kobj); + struct kobj_attribute *kattr; + ssize_t ret = -EIO; + + kattr = container_of(attr, struct kobj_attribute, attr); + if (kattr->store) { + xe_pm_runtime_get(xe); + ret = kattr->store(kobj, kattr, buf, count); + xe_pm_runtime_put(xe); + } + + return ret; +} + +static const struct sysfs_ops xe_hw_engine_class_sysfs_ops = { + .show = xe_hw_engine_class_sysfs_attr_show, + .store = xe_hw_engine_class_sysfs_attr_store, +}; + static const struct kobj_type kobj_xe_hw_engine_type = { - .release = kobj_xe_hw_engine_release, - .sysfs_ops = &kobj_sysfs_ops + .release = xe_hw_engine_sysfs_kobj_release, + .sysfs_ops = &xe_hw_engine_class_sysfs_ops, +}; + +static const struct kobj_type kobj_xe_hw_engine_type_def = { + .release = xe_hw_engine_sysfs_kobj_release, + .sysfs_ops = &kobj_sysfs_ops, }; static ssize_t job_timeout_max_store(struct kobject *kobj, @@ -543,7 +590,7 @@ static int xe_add_hw_engine_class_defaults(struct xe_device *xe, if (!kobj) return -ENOMEM; - kobject_init(kobj, &kobj_xe_hw_engine_type); + kobject_init(kobj, &kobj_xe_hw_engine_type_def); err = kobject_add(kobj, parent, "%s", ".defaults"); if (err) goto err_object; @@ -559,57 +606,6 @@ static int xe_add_hw_engine_class_defaults(struct xe_device *xe, return err; } -static void xe_hw_engine_sysfs_kobj_release(struct kobject *kobj) -{ - kfree(kobj); -} - -static ssize_t xe_hw_engine_class_sysfs_attr_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct xe_device *xe = kobj_to_xe(kobj); - struct kobj_attribute *kattr; - ssize_t ret = -EIO; - - kattr = container_of(attr, struct kobj_attribute, attr); - if (kattr->show) { - xe_pm_runtime_get(xe); - ret = kattr->show(kobj, kattr, buf); - xe_pm_runtime_put(xe); - } - - return ret; -} - -static ssize_t xe_hw_engine_class_sysfs_attr_store(struct kobject *kobj, - struct attribute *attr, - const char *buf, - size_t count) -{ - struct xe_device *xe = kobj_to_xe(kobj); - struct kobj_attribute *kattr; - ssize_t ret = -EIO; - - kattr = container_of(attr, struct kobj_attribute, attr); - if (kattr->store) { - xe_pm_runtime_get(xe); - ret = kattr->store(kobj, kattr, buf, count); - xe_pm_runtime_put(xe); - } - - return ret; -} - -static const struct sysfs_ops xe_hw_engine_class_sysfs_ops = { - .show = xe_hw_engine_class_sysfs_attr_show, - .store = xe_hw_engine_class_sysfs_attr_store, -}; - -static const struct kobj_type xe_hw_engine_sysfs_kobj_type = { - .release = xe_hw_engine_sysfs_kobj_release, - .sysfs_ops = &xe_hw_engine_class_sysfs_ops, -}; static void hw_engine_class_sysfs_fini(void *arg) { @@ -640,7 +636,7 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt) if (!kobj) return -ENOMEM; - kobject_init(kobj, &xe_hw_engine_sysfs_kobj_type); + kobject_init(kobj, &kobj_xe_hw_engine_type); err = kobject_add(kobj, gt->sysfs, "engines"); if (err) From dac2d70bb23f247034dd2e5c1abc6689c684b793 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 24 Mar 2025 22:06:02 +0100 Subject: [PATCH 0221/2924] drm/xe: avoid plain 64-bit division Building the xe driver for i386 results in a link time warning: x86_64-linux-ld: drivers/gpu/drm/xe/xe_migrate.o: in function `xe_migrate_vram': xe_migrate.c:(.text+0x1e15): undefined reference to `__udivdi3' Avoid this by using DIV_U64_ROUND_UP() instead of DIV_ROUND_UP(). The driver is unlikely to be used on 32=bit hardware, so the extra cost here is not too important. Fixes: 9c44fd5f6e8a ("drm/xe: Add migrate layer functions for SVM support") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20250324210612.2927194-1-arnd@kernel.org Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi (cherry picked from commit c9092257506af4985c085103714c403812a5bdcb) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_migrate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index a83bebef3780f7..5a3e89022c3812 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1547,7 +1547,7 @@ void xe_migrate_wait(struct xe_migrate *m) static u32 pte_update_cmd_size(u64 size) { u32 num_dword; - u64 entries = DIV_ROUND_UP(size, XE_PAGE_SIZE); + u64 entries = DIV_U64_ROUND_UP(size, XE_PAGE_SIZE); XE_WARN_ON(size > MAX_PREEMPTDISABLE_TRANSFER); /* @@ -1558,7 +1558,7 @@ static u32 pte_update_cmd_size(u64 size) * 2 dword for the page table's physical location * 2*n dword for value of pte to fill (each pte entry is 2 dwords) */ - num_dword = (1 + 2) * DIV_ROUND_UP(entries, 0x1ff); + num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, 0x1ff); num_dword += entries * 2; return num_dword; From e775278cd75f24a2758c28558c4e41b36c935740 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Sun, 30 Mar 2025 12:59:23 -0400 Subject: [PATCH 0222/2924] drm/xe: Invalidate L3 read-only cachelines for geometry streams too Historically, the Vertex Fetcher unit has not been an L3 client. That meant that, when a buffer containing vertex data was written to, it was necessary to issue a PIPE_CONTROL::VF Cache Invalidate to invalidate any VF L2 cachelines associated with that buffer, so the new value would be properly read from memory. Since Tigerlake and later, VERTEX_BUFFER_STATE and 3DSTATE_INDEX_BUFFER have included an "L3 Bypass Enable" bit which userspace drivers can set to request that the vertex fetcher unit snoop L3. However, unlike most true L3 clients, the "VF Cache Invalidate" bit continues to only invalidate the VF L2 cache - and not any associated L3 lines. To handle that, PIPE_CONTROL has a new "L3 Read Only Cache Invalidation Bit", which according to the docs, "controls the invalidation of the Geometry streams cached in L3 cache at the top of the pipe." In other words, the vertex and index buffer data that gets cached in L3 when "L3 Bypass Disable" is set. Mesa always sets L3 Bypass Disable so that the VF unit snoops L3, and whenever it issues a VF Cache Invalidate, it also issues a L3 Read Only Cache Invalidate so that both L2 and L3 vertex data is invalidated. xe is issuing VF cache invalidates too (which handles cases like CPU writes to a buffer between GPU batches). Because userspace may enable L3 snooping, it needs to issue an L3 Read Only Cache Invalidate as well. Fixes significant flickering in Firefox on Meteorlake, which was writing to vertex buffers via the CPU between batches; the missing L3 Read Only invalidates were causing the vertex fetcher to read stale data from L3. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4460 Fixes: 6ef3bb60557d ("drm/xe: enable lite restore") Cc: stable@vger.kernel.org # v6.13+ Signed-off-by: Kenneth Graunke Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250330165923.56410-1-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 61672806b579dd5a150a042ec9383be2bbc2ae7e) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/instructions/xe_gpu_commands.h | 1 + drivers/gpu/drm/xe/xe_ring_ops.c | 13 +++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h index a255946b6f77e7..8cfcd3360896c2 100644 --- a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h @@ -41,6 +41,7 @@ #define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2)) +#define PIPE_CONTROL0_L3_READ_ONLY_CACHE_INVALIDATE BIT(10) /* gen12 */ #define PIPE_CONTROL0_HDC_PIPELINE_FLUSH BIT(9) /* gen12 */ #define PIPE_CONTROL_COMMAND_CACHE_INVALIDATE (1<<29) diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 917fc16de866a5..a7582b097ae678 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -137,7 +137,8 @@ emit_pipe_control(u32 *dw, int i, u32 bit_group_0, u32 bit_group_1, u32 offset, static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw, int i) { - u32 flags = PIPE_CONTROL_CS_STALL | + u32 flags0 = 0; + u32 flags1 = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_COMMAND_CACHE_INVALIDATE | PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | @@ -148,11 +149,15 @@ static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw, PIPE_CONTROL_STORE_DATA_INDEX; if (invalidate_tlb) - flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags1 |= PIPE_CONTROL_TLB_INVALIDATE; - flags &= ~mask_flags; + flags1 &= ~mask_flags; - return emit_pipe_control(dw, i, 0, flags, LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR, 0); + if (flags1 & PIPE_CONTROL_VF_CACHE_INVALIDATE) + flags0 |= PIPE_CONTROL0_L3_READ_ONLY_CACHE_INVALIDATE; + + return emit_pipe_control(dw, i, flags0, flags1, + LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR, 0); } static int emit_store_imm_ppgtt_posted(u64 addr, u64 value, From 88ecb66b9956a14577d513a6c8c28bb2e7989703 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 28 Mar 2025 14:17:52 -0400 Subject: [PATCH 0223/2924] drm/xe: Restore EIO errno return when GuC PC start fails Commit b4b05e53b550 ("drm/xe/guc_pc: Retry and wait longer for GuC PC start"), leads to the following Smatch static checker warning: drivers/gpu/drm/xe/xe_guc_pc.c:1073 xe_guc_pc_start() warn: missing error code here? '_dev_err()' failed. 'ret' = '0' Fixes: c605acb53f44 ("drm/xe/guc_pc: Retry and wait longer for GuC PC start") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/intel-xe/1454a5f1-ee18-4df1-a6b2-a4a3dddcd1cb@stanley.mountain/ Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250328181752.26677-1-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 3f2bdccbccdcb53b0d316474eafff2e3462a51ad) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc_pc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 85215313976ce4..43b1192ba61cde 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -1070,6 +1070,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING, SLPC_RESET_EXTENDED_TIMEOUT_MS)) { xe_gt_err(gt, "GuC PC Start failed: Dynamic GT frequency control and GT sleep states are now disabled.\n"); + ret = -EIO; goto out; } From a344e258acb0a7f0e7ed10a795c52d1baf705164 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 7 Apr 2025 16:27:55 +0100 Subject: [PATCH 0224/2924] KVM: arm64: Use acquire/release to communicate FF-A version negotiation The pKVM FF-A proxy rejects FF-A requests other than FFA_VERSION until version negotiation is complete, which is signalled by setting the global 'has_version_negotiated' variable. To avoid excessive locking, this variable is checked directly from kvm_host_ffa_handler() in response to an FF-A call, but this can race against another CPU performing the negotiation and potentially lead to reading a torn value (incredibly unlikely for a 'bool') or problematic re-ordering of the accesses to 'has_version_negotiated' and 'hyp_ffa_version' whereby a stale version number could be read by __do_ffa_mem_xfer(). Use acquire/release primitives when writing 'has_version_negotiated' with the version lock held and when reading without the lock held. Cc: Sebastian Ene Cc: Sudeep Holla Cc: Quentin Perret Cc: Oliver Upton Cc: Marc Zyngier Fixes: c9c012625e12 ("KVM: arm64: Trap FFA_VERSION host call in pKVM") Signed-off-by: Will Deacon Link: https://lore.kernel.org/r/20250407152755.1041-1-will@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/nvhe/ffa.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index e433dfab882aa5..3369dd0c4009f8 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -730,10 +730,10 @@ static void do_ffa_version(struct arm_smccc_res *res, hyp_ffa_version = ffa_req_version; } - if (hyp_ffa_post_init()) + if (hyp_ffa_post_init()) { res->a0 = FFA_RET_NOT_SUPPORTED; - else { - has_version_negotiated = true; + } else { + smp_store_release(&has_version_negotiated, true); res->a0 = hyp_ffa_version; } unlock: @@ -809,7 +809,8 @@ bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) if (!is_ffa_call(func_id)) return false; - if (!has_version_negotiated && func_id != FFA_VERSION) { + if (func_id != FFA_VERSION && + !smp_load_acquire(&has_version_negotiated)) { ffa_to_smccc_error(&res, FFA_RET_INVALID_PARAMETERS); goto out_handled; } From a3dc2983ca7b90fd35f978502de6d4664d965cfb Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 1 Apr 2025 00:35:44 +0900 Subject: [PATCH 0225/2924] tracing: fprobe: Cleanup fprobe hash when module unloading Cleanup fprobe address hash table on module unloading because the target symbols will be disappeared when unloading module and not sure the same symbol is mapped on the same address. Note that this is at least disables the fprobes if a part of target symbols on the unloaded modules. Unlike kprobes, fprobe does not re-enable the probe point by itself. To do that, the caller should take care register/unregister fprobe when loading/unloading modules. This simplifies the fprobe state managememt related to the module loading/unloading. Link: https://lore.kernel.org/all/174343534473.843280.13988101014957210732.stgit@devnote2/ Fixes: 4346ba160409 ("fprobe: Rewrite fprobe on function-graph tracer") Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/fprobe.c | 103 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 101 insertions(+), 2 deletions(-) diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index cb86f90d4b1edd..95c6e3473a76b5 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -89,8 +89,11 @@ static bool delete_fprobe_node(struct fprobe_hlist_node *node) { lockdep_assert_held(&fprobe_mutex); - WRITE_ONCE(node->fp, NULL); - hlist_del_rcu(&node->hlist); + /* Avoid double deleting */ + if (READ_ONCE(node->fp) != NULL) { + WRITE_ONCE(node->fp, NULL); + hlist_del_rcu(&node->hlist); + } return !!find_first_fprobe_node(node->addr); } @@ -411,6 +414,102 @@ static void fprobe_graph_remove_ips(unsigned long *addrs, int num) ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0); } +#ifdef CONFIG_MODULES + +#define FPROBE_IPS_BATCH_INIT 8 +/* instruction pointer address list */ +struct fprobe_addr_list { + int index; + int size; + unsigned long *addrs; +}; + +static int fprobe_addr_list_add(struct fprobe_addr_list *alist, unsigned long addr) +{ + unsigned long *addrs; + + if (alist->index >= alist->size) + return -ENOMEM; + + alist->addrs[alist->index++] = addr; + if (alist->index < alist->size) + return 0; + + /* Expand the address list */ + addrs = kcalloc(alist->size * 2, sizeof(*addrs), GFP_KERNEL); + if (!addrs) + return -ENOMEM; + + memcpy(addrs, alist->addrs, alist->size * sizeof(*addrs)); + alist->size *= 2; + kfree(alist->addrs); + alist->addrs = addrs; + + return 0; +} + +static void fprobe_remove_node_in_module(struct module *mod, struct hlist_head *head, + struct fprobe_addr_list *alist) +{ + struct fprobe_hlist_node *node; + int ret = 0; + + hlist_for_each_entry_rcu(node, head, hlist) { + if (!within_module(node->addr, mod)) + continue; + if (delete_fprobe_node(node)) + continue; + /* + * If failed to update alist, just continue to update hlist. + * Therefore, at list user handler will not hit anymore. + */ + if (!ret) + ret = fprobe_addr_list_add(alist, node->addr); + } +} + +/* Handle module unloading to manage fprobe_ip_table. */ +static int fprobe_module_callback(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct fprobe_addr_list alist = {.size = FPROBE_IPS_BATCH_INIT}; + struct module *mod = data; + int i; + + if (val != MODULE_STATE_GOING) + return NOTIFY_DONE; + + alist.addrs = kcalloc(alist.size, sizeof(*alist.addrs), GFP_KERNEL); + /* If failed to alloc memory, we can not remove ips from hash. */ + if (!alist.addrs) + return NOTIFY_DONE; + + mutex_lock(&fprobe_mutex); + for (i = 0; i < FPROBE_IP_TABLE_SIZE; i++) + fprobe_remove_node_in_module(mod, &fprobe_ip_table[i], &alist); + + if (alist.index < alist.size && alist.index > 0) + ftrace_set_filter_ips(&fprobe_graph_ops.ops, + alist.addrs, alist.index, 1, 0); + mutex_unlock(&fprobe_mutex); + + kfree(alist.addrs); + + return NOTIFY_DONE; +} + +static struct notifier_block fprobe_module_nb = { + .notifier_call = fprobe_module_callback, + .priority = 0, +}; + +static int __init init_fprobe_module(void) +{ + return register_module_notifier(&fprobe_module_nb); +} +early_initcall(init_fprobe_module); +#endif + static int symbols_cmp(const void *a, const void *b) { const char **str_a = (const char **) a; From 71d2143266efe0fca1583994af65c08643548316 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 7 Apr 2025 11:04:42 -0400 Subject: [PATCH 0226/2924] dt-bindings: display: nwl-dsi: Allow 'data-lanes' property for port@1 This controller support scalable data lanes from 1 to 4. Add the 'data-lanes' property to configure the number of MIPI display panel lanes selected for boards. Change $ref of port@1 from 'port' to 'port-base' and add 'endpoint' property referencing video-interfaces.yaml. Allow 'data-lanes' values 1, 2, 3, and 4 for port@1. Fix below CHECK_DTB warnings: arch/arm64/boot/dts/freescale/imx8mq-tqma8mq-mba8mx-lvds-tm070jvhg33.dtb: dsi@30a00000: ports:port@1:endpoint: Unevaluated properties are not allowed ('data-lanes' was unexpected) Reviewed-by: Krzysztof Kozlowski Signed-off-by: Frank Li Reviewed-by: Alexander Stein Link: https://lore.kernel.org/r/20250407150442.2778299-1-Frank.Li@nxp.com Signed-off-by: Rob Herring (Arm) --- .../bindings/display/bridge/nwl-dsi.yaml | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml b/Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml index 350fb8f400f022..5952e6448ed47e 100644 --- a/Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml +++ b/Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml @@ -111,11 +111,27 @@ properties: unevaluatedProperties: false port@1: - $ref: /schemas/graph.yaml#/properties/port + $ref: /schemas/graph.yaml#/$defs/port-base + unevaluatedProperties: false description: DSI output port node to the panel or the next bridge in the chain + properties: + endpoint: + $ref: /schemas/media/video-interfaces.yaml# + unevaluatedProperties: false + + properties: + data-lanes: + description: array of physical DSI data lane indexes. + minItems: 1 + items: + - const: 1 + - const: 2 + - const: 3 + - const: 4 + required: - port@0 - port@1 From 2bd73c7949ea317838da70d7c9c19ef7291cfcdd Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 7 Apr 2025 11:15:52 -0400 Subject: [PATCH 0227/2924] dt-bindings: interrupt-controller: fsl,irqsteer: Add i.MX94 support Add compatible string "fsl,imx94-irqsteer" for the i.MX94 chip, which is backward compatible with "fsl,imx-irqsteer". Acked-by: Conor Dooley Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20250407151552.2779343-1-Frank.Li@nxp.com Signed-off-by: Rob Herring (Arm) --- .../devicetree/bindings/interrupt-controller/fsl,irqsteer.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/interrupt-controller/fsl,irqsteer.yaml b/Documentation/devicetree/bindings/interrupt-controller/fsl,irqsteer.yaml index 6076ddf56bb5af..c49688be105819 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/fsl,irqsteer.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/fsl,irqsteer.yaml @@ -19,6 +19,7 @@ properties: - fsl,imx8mp-irqsteer - fsl,imx8qm-irqsteer - fsl,imx8qxp-irqsteer + - fsl,imx94-irqsteer - const: fsl,imx-irqsteer reg: From f0b12d3f28b19357eb323286a65c951cb9499ed4 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 7 Apr 2025 11:13:39 -0400 Subject: [PATCH 0228/2924] dt-bindings: timer: nxp,sysctr-timer: Add i.MX94 support Add compatible string "nxp,imx94-sysctr-timer" for the i.MX94 chip, which is backward compatible with i.MX95. Set it to fall back to "nxp,imx95-sysctr-timer". Acked-by: Conor Dooley Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20250407151340.2779124-1-Frank.Li@nxp.com Signed-off-by: Rob Herring (Arm) --- .../devicetree/bindings/timer/nxp,sysctr-timer.yaml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/timer/nxp,sysctr-timer.yaml b/Documentation/devicetree/bindings/timer/nxp,sysctr-timer.yaml index 891cca00952815..6b80b060672e54 100644 --- a/Documentation/devicetree/bindings/timer/nxp,sysctr-timer.yaml +++ b/Documentation/devicetree/bindings/timer/nxp,sysctr-timer.yaml @@ -18,9 +18,14 @@ description: | properties: compatible: - enum: - - nxp,imx95-sysctr-timer - - nxp,sysctr-timer + oneOf: + - enum: + - nxp,imx95-sysctr-timer + - nxp,sysctr-timer + - items: + - enum: + - nxp,imx94-sysctr-timer + - const: nxp,imx95-sysctr-timer reg: maxItems: 1 From f2f847461fb7620e299be873cdd9437ddecd2266 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?= Date: Mon, 7 Apr 2025 15:08:51 +0200 Subject: [PATCH 0229/2924] ASoC: Intel: avs: Constrain path based on BE capabilities MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For i2s and DMIC copiers constraint stream capabilities based on available NHLT configuration. This allows topology to provide generic configuration that handles more hardware, while filtering unavailable ones at runtime. Signed-off-by: Amadeusz Sławiński Link: https://patch.msgid.link/20250407130851.1726800-1-amadeuszx.slawinski@linux.intel.com Reviewed-by: Cezary Rojewski Signed-off-by: Mark Brown --- sound/soc/intel/avs/path.c | 72 ++++++++++++++++++++++++++++++++++++++ sound/soc/intel/avs/path.h | 5 +++ sound/soc/intel/avs/pcm.c | 49 +++++++++++++++++++++++++- 3 files changed, 125 insertions(+), 1 deletion(-) diff --git a/sound/soc/intel/avs/path.c b/sound/soc/intel/avs/path.c index ef0c1d125d66b8..cafb8c6198bedb 100644 --- a/sound/soc/intel/avs/path.c +++ b/sound/soc/intel/avs/path.c @@ -115,6 +115,78 @@ avs_path_find_variant(struct avs_dev *adev, return NULL; } +static struct acpi_nhlt_config * +avs_nhlt_config_or_default(struct avs_dev *adev, struct avs_tplg_module *t); + +int avs_path_set_constraint(struct avs_dev *adev, struct avs_tplg_path_template *template, + struct snd_pcm_hw_constraint_list *rate_list, + struct snd_pcm_hw_constraint_list *channels_list, + struct snd_pcm_hw_constraint_list *sample_bits_list) +{ + struct avs_tplg_path *path_template; + unsigned int *rlist, *clist, *slist; + size_t i; + + i = 0; + list_for_each_entry(path_template, &template->path_list, node) + i++; + + rlist = kcalloc(i, sizeof(rlist), GFP_KERNEL); + clist = kcalloc(i, sizeof(clist), GFP_KERNEL); + slist = kcalloc(i, sizeof(slist), GFP_KERNEL); + + i = 0; + list_for_each_entry(path_template, &template->path_list, node) { + struct avs_tplg_pipeline *pipeline_template; + + list_for_each_entry(pipeline_template, &path_template->ppl_list, node) { + struct avs_tplg_module *module_template; + + list_for_each_entry(module_template, &pipeline_template->mod_list, node) { + const guid_t *type = &module_template->cfg_ext->type; + struct acpi_nhlt_config *blob; + + if (!guid_equal(type, &AVS_COPIER_MOD_UUID) && + !guid_equal(type, &AVS_WOVHOSTM_MOD_UUID)) + continue; + + switch (module_template->cfg_ext->copier.dma_type) { + case AVS_DMA_DMIC_LINK_INPUT: + case AVS_DMA_I2S_LINK_OUTPUT: + case AVS_DMA_I2S_LINK_INPUT: + break; + default: + continue; + } + + blob = avs_nhlt_config_or_default(adev, module_template); + if (IS_ERR(blob)) + continue; + + rlist[i] = path_template->fe_fmt->sampling_freq; + clist[i] = path_template->fe_fmt->num_channels; + slist[i] = path_template->fe_fmt->bit_depth; + i++; + } + } + } + + if (i) { + rate_list->count = i; + rate_list->list = rlist; + channels_list->count = i; + channels_list->list = clist; + sample_bits_list->count = i; + sample_bits_list->list = slist; + } else { + kfree(rlist); + kfree(clist); + kfree(slist); + } + + return i; +} + static void avs_init_node_id(union avs_connector_node_id *node_id, struct avs_tplg_modcfg_ext *te, u32 dma_id) { diff --git a/sound/soc/intel/avs/path.h b/sound/soc/intel/avs/path.h index 7ed7e94e0a566b..c65ed84aa85305 100644 --- a/sound/soc/intel/avs/path.h +++ b/sound/soc/intel/avs/path.h @@ -69,6 +69,11 @@ int avs_path_reset(struct avs_path *path); int avs_path_pause(struct avs_path *path); int avs_path_run(struct avs_path *path, int trigger); +int avs_path_set_constraint(struct avs_dev *adev, struct avs_tplg_path_template *template, + struct snd_pcm_hw_constraint_list *rate_list, + struct snd_pcm_hw_constraint_list *channels_list, + struct snd_pcm_hw_constraint_list *sample_bits_list); + int avs_peakvol_set_volume(struct avs_dev *adev, struct avs_path_module *mod, struct soc_mixer_control *mc, long *input); int avs_peakvol_set_mute(struct avs_dev *adev, struct avs_path_module *mod, diff --git a/sound/soc/intel/avs/pcm.c b/sound/soc/intel/avs/pcm.c index 7072bcf4e56f10..d83ef504643bbb 100644 --- a/sound/soc/intel/avs/pcm.c +++ b/sound/soc/intel/avs/pcm.c @@ -31,6 +31,10 @@ struct avs_dma_data { struct hdac_ext_stream *host_stream; }; + struct snd_pcm_hw_constraint_list rate_list; + struct snd_pcm_hw_constraint_list channels_list; + struct snd_pcm_hw_constraint_list sample_bits_list; + struct work_struct period_elapsed_work; struct snd_pcm_substream *substream; }; @@ -74,6 +78,45 @@ void avs_period_elapsed(struct snd_pcm_substream *substream) schedule_work(&data->period_elapsed_work); } +static int hw_rule_param_size(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule); +static int avs_hw_constraints_init(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) +{ + struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream); + struct snd_pcm_runtime *runtime = substream->runtime; + struct snd_pcm_hw_constraint_list *r, *c, *s; + struct avs_tplg_path_template *template; + struct avs_dma_data *data; + int ret; + + ret = snd_pcm_hw_constraint_integer(runtime, SNDRV_PCM_HW_PARAM_PERIODS); + if (ret < 0) + return ret; + + data = snd_soc_dai_get_dma_data(dai, substream); + r = &(data->rate_list); + c = &(data->channels_list); + s = &(data->sample_bits_list); + + template = avs_dai_find_path_template(dai, !rtd->dai_link->no_pcm, substream->stream); + ret = avs_path_set_constraint(data->adev, template, r, c, s); + if (ret <= 0) + return ret; + + ret = snd_pcm_hw_constraint_list(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, r); + if (ret < 0) + return ret; + + ret = snd_pcm_hw_constraint_list(runtime, 0, SNDRV_PCM_HW_PARAM_CHANNELS, c); + if (ret < 0) + return ret; + + ret = snd_pcm_hw_constraint_list(runtime, 0, SNDRV_PCM_HW_PARAM_SAMPLE_BITS, s); + if (ret < 0) + return ret; + + return 0; +} + static int avs_dai_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream); @@ -101,7 +144,7 @@ static int avs_dai_startup(struct snd_pcm_substream *substream, struct snd_soc_d if (rtd->dai_link->ignore_suspend) adev->num_lp_paths++; - return 0; + return avs_hw_constraints_init(substream, dai); } static void avs_dai_shutdown(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) @@ -114,6 +157,10 @@ static void avs_dai_shutdown(struct snd_pcm_substream *substream, struct snd_soc if (rtd->dai_link->ignore_suspend) data->adev->num_lp_paths--; + kfree(data->rate_list.list); + kfree(data->channels_list.list); + kfree(data->sample_bits_list.list); + snd_soc_dai_set_dma_data(dai, substream, NULL); kfree(data); } From d639e7fd9aa038f46728206bdb23cf7109b3b53b Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 7 Apr 2025 22:39:39 +0000 Subject: [PATCH 0230/2924] ASoC: hdmi-codec: use RTD ID instead of DAI ID for ELD entry commit 0ecd24a6d8b2 ("ASoC: hdmi-codec: dump ELD through procfs") adds "eld#%d" entry for sound proc. It is using DAI ID. But it is possible to have duplicate DAI ID on same Sound Card. In such case, we will get below error. To avoid duplicate entry name, use RTD ID instead of DAI ID. proc_dir_entry 'card0/eld#0' already registered WARNING: CPU: 3 PID: 74 at fs/proc/generic.c:377 proc_register+0x11c/0x1a4 Modules linked in: CPU: 3 UID: 0 PID: 74 Comm: kworker/u33:5 Not tainted 6.14.0-rc1-next-20250206-arm64-renesas #174 Hardware name: Renesas Salvator-X 2nd version board based on r8a77951 (DT) Workqueue: events_unbound deferred_probe_work_func pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : proc_register+0x11c/0x1a4 ata1: SATA link down (SStatus 0 SControl 300) lr : proc_register+0x11c/0x1a4 sp : ffff8000847db880 x29: ffff8000847db880 x28: 0000000000000000 x27: ffff0004c3403c98 x26: 0000000000000005 x25: ffff0004c14b03e4 x24: 0000000000000005 x23: ffff0004c361adb8 x22: ffff800082f24860 x21: ffff0004c361ad00 x20: ffff0004c14b0300 x19: ffff0004c14b02c0 x18: 00000000ffffffff x17: 0000000000000000 x16: 00400034b5503510 x15: ffff8001047db447 x14: 0000000000000000 x13: 6465726574736967 x12: ffff800082e66d30 x11: 000000000000028e x10: ffff800082e66d30 x9 : 00000000ffffefff x8 : ffff800082ebed30 x7 : 0000000000017fe8 x6 : 0000000000000000 x5 : 80000000fffff000 x4 : 0000000000000000 x3 : 0000000000000000 x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff0004c15b3600 Call trace: proc_register+0x11c/0x1a4 (P) proc_create_data+0x3c/0x60 snd_info_register+0xd0/0x130 snd_info_register+0x30/0x130 snd_info_card_register+0x1c/0xbc snd_card_register+0x194/0x1ec snd_soc_bind_card+0x7f8/0xad0 snd_soc_register_card+0xe8/0xfc devm_snd_soc_register_card+0x48/0x98 audio_graph_parse_of+0x1c4/0x1f8 graph_probe+0x6c/0x80 ... Fixes: 0ecd24a6d8b2 ("ASoC: hdmi-codec: dump ELD through procfs") Reported-by: Thuan Nguyen Signed-off-by: Kuninori Morimoto Tested-by: Thuan Nguyen Acked-by: Mark Brown Link: https://patch.msgid.link/87a58roatw.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- sound/soc/codecs/hdmi-codec.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c index 17019b1d680b77..bc01ff65bd6f4d 100644 --- a/sound/soc/codecs/hdmi-codec.c +++ b/sound/soc/codecs/hdmi-codec.c @@ -842,12 +842,28 @@ static void print_eld_info(struct snd_info_entry *entry, static int hdmi_dai_proc_new(struct hdmi_codec_priv *hcp, struct snd_soc_dai *dai) { + struct snd_soc_component *component = dai->component; + struct snd_soc_card *card = component->card; + struct snd_soc_dai *d; + struct snd_soc_pcm_runtime *rtd; struct snd_info_entry *entry; char name[32]; - int err; + int err, i, id = 0; - snprintf(name, sizeof(name), "eld#%d", dai->id); - err = snd_card_proc_new(dai->component->card->snd_card, name, &entry); + /* + * To avoid duplicate proc entry, find its rtd and use rtd->id + * instead of dai->id + */ + for_each_card_rtds(card, rtd) { + for_each_rtd_dais(rtd, i, d) + if (d == dai) { + id = rtd->id; + goto found; + } + } +found: + snprintf(name, sizeof(name), "eld#%d", id); + err = snd_card_proc_new(card->snd_card, name, &entry); if (err < 0) return err; From ad320e408a8c95a282ab9c05cdf0c9b95e317985 Mon Sep 17 00:00:00 2001 From: Henry Martin Date: Fri, 4 Apr 2025 14:14:38 +0800 Subject: [PATCH 0231/2924] ata: pata_pxa: Fix potential NULL pointer dereference in pxa_ata_probe() devm_ioremap() returns NULL on error. Currently, pxa_ata_probe() does not check for this case, which can result in a NULL pointer dereference. Add NULL check after devm_ioremap() to prevent this issue. Fixes: 2dc6c6f15da9 ("[ARM] pata_pxa: DMA-capable PATA driver") Signed-off-by: Henry Martin Signed-off-by: Damien Le Moal --- drivers/ata/pata_pxa.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/ata/pata_pxa.c b/drivers/ata/pata_pxa.c index 434f380114af09..03dbaf4a13a75c 100644 --- a/drivers/ata/pata_pxa.c +++ b/drivers/ata/pata_pxa.c @@ -223,10 +223,16 @@ static int pxa_ata_probe(struct platform_device *pdev) ap->ioaddr.cmd_addr = devm_ioremap(&pdev->dev, cmd_res->start, resource_size(cmd_res)); + if (!ap->ioaddr.cmd_addr) + return -ENOMEM; ap->ioaddr.ctl_addr = devm_ioremap(&pdev->dev, ctl_res->start, resource_size(ctl_res)); + if (!ap->ioaddr.ctl_addr) + return -ENOMEM; ap->ioaddr.bmdma_addr = devm_ioremap(&pdev->dev, dma_res->start, resource_size(dma_res)); + if (!ap->ioaddr.bmdma_addr) + return -ENOMEM; /* * Adjust register offsets From 62baf70c327444338c34703c71aa8cc8e4189bd6 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 3 Apr 2025 09:19:30 +0200 Subject: [PATCH 0232/2924] nvme: re-read ANA log page after ns scan completes When scanning for new namespaces we might have missed an ANA AEN. The NVMe base spec (NVMe Base Specification v2.1, Figure 151 'Asynchonous Event Information - Notice': Asymmetric Namespace Access Change) states: A controller shall not send this even if an Attached Namespace Attribute Changed asynchronous event [...] is sent for the same event. so we need to re-read the ANA log page after we rescanned the namespace list to update the ANA states of the new namespaces. Signed-off-by: Hannes Reinecke Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index dc298de76de172..b502ac07483ba8 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4299,6 +4299,11 @@ static void nvme_scan_work(struct work_struct *work) /* Requeue if we have missed AENs */ if (test_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) nvme_queue_scan(ctrl); +#ifdef CONFIG_NVME_MULTIPATH + else + /* Re-read the ANA log page to not miss updates */ + queue_work(nvme_wq, &ctrl->ana_work); +#endif } /* From e3105f54a51554fb1bbf19dcaf93c4411d2d6c8a Mon Sep 17 00:00:00 2001 From: Uday Shankar Date: Fri, 4 Apr 2025 14:06:43 -0600 Subject: [PATCH 0233/2924] nvme: multipath: fix return value of nvme_available_path The function returns bool so we should return false, not NULL. No functional changes are expected. Signed-off-by: Uday Shankar Signed-off-by: Christoph Hellwig --- drivers/nvme/host/multipath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 6b12ca80aa2730..94152cf423f165 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -427,7 +427,7 @@ static bool nvme_available_path(struct nvme_ns_head *head) struct nvme_ns *ns; if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) - return NULL; + return false; list_for_each_entry_srcu(ns, &head->list, siblings, srcu_read_lock_held(&head->srcu)) { From 14c8a418159e541d70dbf8fc71225d1623beaf0f Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Thu, 20 Mar 2025 15:55:57 +0000 Subject: [PATCH 0234/2924] cpufreq: sun50i: prevent out-of-bounds access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A KASAN enabled kernel reports an out-of-bounds access when handling the nvmem cell in the sun50i cpufreq driver: ================================================================== BUG: KASAN: slab-out-of-bounds in sun50i_cpufreq_nvmem_probe+0x180/0x3d4 Read of size 4 at addr ffff000006bf31e0 by task kworker/u16:1/38 This is because the DT specifies the nvmem cell as covering only two bytes, but we use a u32 pointer to read the value. DTs for other SoCs indeed specify 4 bytes, so we cannot just shorten the variable to a u16. Fortunately nvmem_cell_read() allows to return the length of the nvmem cell, in bytes, so we can use that information to only access the valid portion of the data. To cover multiple cell sizes, use memcpy() to copy the information into a zeroed u32 buffer, then also make sure we always read the data in little endian fashion, as this is how the data is stored in the SID efuses. Fixes: 6cc4bcceff9a ("cpufreq: sun50i: Refactor speed bin decoding") Reported-by: Jernej Skrabec Signed-off-by: Andre Przywara Reviewed-by: Jernej Škrabec Signed-off-by: Viresh Kumar --- drivers/cpufreq/sun50i-cpufreq-nvmem.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/sun50i-cpufreq-nvmem.c b/drivers/cpufreq/sun50i-cpufreq-nvmem.c index 47d6840b348994..744312a44279cb 100644 --- a/drivers/cpufreq/sun50i-cpufreq-nvmem.c +++ b/drivers/cpufreq/sun50i-cpufreq-nvmem.c @@ -194,7 +194,9 @@ static int sun50i_cpufreq_get_efuse(void) struct nvmem_cell *speedbin_nvmem; const struct of_device_id *match; struct device *cpu_dev; - u32 *speedbin; + void *speedbin_ptr; + u32 speedbin = 0; + size_t len; int ret; cpu_dev = get_cpu_device(0); @@ -217,14 +219,18 @@ static int sun50i_cpufreq_get_efuse(void) return dev_err_probe(cpu_dev, PTR_ERR(speedbin_nvmem), "Could not get nvmem cell\n"); - speedbin = nvmem_cell_read(speedbin_nvmem, NULL); + speedbin_ptr = nvmem_cell_read(speedbin_nvmem, &len); nvmem_cell_put(speedbin_nvmem); - if (IS_ERR(speedbin)) - return PTR_ERR(speedbin); + if (IS_ERR(speedbin_ptr)) + return PTR_ERR(speedbin_ptr); - ret = opp_data->efuse_xlate(*speedbin); + if (len <= 4) + memcpy(&speedbin, speedbin_ptr, len); + speedbin = le32_to_cpu(speedbin); - kfree(speedbin); + ret = opp_data->efuse_xlate(speedbin); + + kfree(speedbin_ptr); return ret; }; From fc5414a4774e14e51a93499a6adfdc45f2de82e0 Mon Sep 17 00:00:00 2001 From: Pengyu Luo Date: Sat, 5 Apr 2025 00:42:19 +0800 Subject: [PATCH 0235/2924] cpufreq: Add SM8650 to cpufreq-dt-platdev blocklist SM8650 have already been supported by qcom-cpufreq-hw driver, but never been added to cpufreq-dt-platdev. This makes noise [ 0.388525] cpufreq-dt cpufreq-dt: failed register driver: -17 [ 0.388537] cpufreq-dt cpufreq-dt: probe with driver cpufreq-dt failed with error -17 So adding it to the cpufreq-dt-platdev driver's blocklist to fix it. Signed-off-by: Pengyu Luo Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt-platdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 2aa00769cf09da..a010da0f6337f8 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -175,6 +175,7 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "qcom,sm8350", }, { .compatible = "qcom,sm8450", }, { .compatible = "qcom,sm8550", }, + { .compatible = "qcom,sm8650", }, { .compatible = "st,stih407", }, { .compatible = "st,stih410", }, From d4f610a9bafdec8e3210789aa19335367da696ea Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 4 Apr 2025 14:40:06 +0200 Subject: [PATCH 0236/2924] cpufreq: Do not enable by default during compile testing Enabling the compile test should not cause automatic enabling of all drivers. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Viresh Kumar --- drivers/cpufreq/Kconfig.arm | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 4f9cb943d945c2..d4d625ded285f0 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -76,7 +76,7 @@ config ARM_VEXPRESS_SPC_CPUFREQ config ARM_BRCMSTB_AVS_CPUFREQ tristate "Broadcom STB AVS CPUfreq driver" depends on (ARCH_BRCMSTB && !ARM_SCMI_CPUFREQ) || COMPILE_TEST - default y + default ARCH_BRCMSTB help Some Broadcom STB SoCs use a co-processor running proprietary firmware ("AVS") to handle voltage and frequency scaling. This driver provides @@ -181,7 +181,7 @@ config ARM_RASPBERRYPI_CPUFREQ config ARM_S3C64XX_CPUFREQ bool "Samsung S3C64XX" depends on CPU_S3C6410 || COMPILE_TEST - default y + default CPU_S3C6410 help This adds the CPUFreq driver for Samsung S3C6410 SoC. @@ -190,7 +190,7 @@ config ARM_S3C64XX_CPUFREQ config ARM_S5PV210_CPUFREQ bool "Samsung S5PV210 and S5PC110" depends on CPU_S5PV210 || COMPILE_TEST - default y + default CPU_S5PV210 help This adds the CPUFreq driver for Samsung S5PV210 and S5PC110 SoCs. @@ -214,7 +214,7 @@ config ARM_SCMI_CPUFREQ config ARM_SPEAR_CPUFREQ bool "SPEAr CPUFreq support" depends on PLAT_SPEAR || COMPILE_TEST - default y + default PLAT_SPEAR help This adds the CPUFreq driver support for SPEAr SOCs. @@ -233,7 +233,7 @@ config ARM_TEGRA20_CPUFREQ tristate "Tegra20/30 CPUFreq support" depends on ARCH_TEGRA || COMPILE_TEST depends on CPUFREQ_DT - default y + default ARCH_TEGRA help This adds the CPUFreq driver support for Tegra20/30 SOCs. @@ -241,7 +241,7 @@ config ARM_TEGRA124_CPUFREQ bool "Tegra124 CPUFreq support" depends on ARCH_TEGRA || COMPILE_TEST depends on CPUFREQ_DT - default y + default ARCH_TEGRA help This adds the CPUFreq driver support for Tegra124 SOCs. @@ -256,14 +256,14 @@ config ARM_TEGRA194_CPUFREQ tristate "Tegra194 CPUFreq support" depends on ARCH_TEGRA_194_SOC || ARCH_TEGRA_234_SOC || (64BIT && COMPILE_TEST) depends on TEGRA_BPMP - default y + default ARCH_TEGRA help This adds CPU frequency driver support for Tegra194 SOCs. config ARM_TI_CPUFREQ bool "Texas Instruments CPUFreq support" depends on ARCH_OMAP2PLUS || ARCH_K3 || COMPILE_TEST - default y + default ARCH_OMAP2PLUS || ARCH_K3 help This driver enables valid OPPs on the running platform based on values contained within the SoC in use. Enable this in order to From a8df7d0ef92eca28c610206c6748daf537ac0586 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 8 Apr 2025 00:02:13 -0700 Subject: [PATCH 0237/2924] objtool: Fix INSN_CONTEXT_SWITCH handling in validate_unret() The !CONFIG_IA32_EMULATION version of xen_entry_SYSCALL_compat() ends with a SYSCALL instruction which is classified by objtool as INSN_CONTEXT_SWITCH. Unlike validate_branch(), validate_unret() doesn't consider INSN_CONTEXT_SWITCH in a non-function to be a dead end, so it keeps going past the end of xen_entry_SYSCALL_compat(), resulting in the following warning: vmlinux.o: warning: objtool: xen_reschedule_interrupt+0x2a: RET before UNTRAIN Fix that by adding INSN_CONTEXT_SWITCH handling to validate_unret() to match what validate_branch() is already doing. Fixes: a09a6e2399ba ("objtool: Add entry UNRET validation") Reported-by: Andrew Cooper Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/f5eda46fd09f15b1f5cde3d9ae3b92b958342add.1744095216.git.jpoimboe@kernel.org --- tools/objtool/check.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 4a1f6c3169b3b6..c81b070ca4952f 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3886,6 +3886,11 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn) WARN_INSN(insn, "RET before UNTRAIN"); return 1; + case INSN_CONTEXT_SWITCH: + if (insn_func(insn)) + break; + return 0; + case INSN_NOP: if (insn->retpoline_safe) return 0; From fe1042b1ef79e4d5df33d5c0f0ce936493714eec Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 8 Apr 2025 00:02:14 -0700 Subject: [PATCH 0238/2924] objtool: Split INSN_CONTEXT_SWITCH into INSN_SYSCALL and INSN_SYSRET INSN_CONTEXT_SWITCH is ambiguous. It can represent both call semantics (SYSCALL, SYSENTER) and return semantics (SYSRET, IRET, RETS, RETU). Those differ significantly: calls preserve control flow whereas returns terminate it. Objtool uses an arbitrary rule for INSN_CONTEXT_SWITCH that almost works by accident: if in a function, keep going; otherwise stop. It should instead be based on the semantics of the underlying instruction. In preparation for improving that, split INSN_CONTEXT_SWITCH into INSN_SYCALL and INSN_SYSRET. No functional change. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/19a76c74d2c051d3bc9a775823cafc65ad267a7a.1744095216.git.jpoimboe@kernel.org --- tools/objtool/arch/x86/decode.c | 18 +++++++++++------- tools/objtool/check.c | 6 ++++-- tools/objtool/include/objtool/arch.h | 3 ++- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 33d861c04ebd8f..3ce7b54003c229 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -522,7 +522,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec case INAT_PFX_REPNE: if (modrm == 0xca) /* eretu/erets */ - insn->type = INSN_CONTEXT_SWITCH; + insn->type = INSN_SYSRET; break; default: if (modrm == 0xca) @@ -535,11 +535,15 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec insn->type = INSN_JUMP_CONDITIONAL; - } else if (op2 == 0x05 || op2 == 0x07 || op2 == 0x34 || - op2 == 0x35) { + } else if (op2 == 0x05 || op2 == 0x34) { - /* sysenter, sysret */ - insn->type = INSN_CONTEXT_SWITCH; + /* syscall, sysenter */ + insn->type = INSN_SYSCALL; + + } else if (op2 == 0x07 || op2 == 0x35) { + + /* sysret, sysexit */ + insn->type = INSN_SYSRET; } else if (op2 == 0x0b || op2 == 0xb9) { @@ -676,7 +680,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec case 0xca: /* retf */ case 0xcb: /* retf */ - insn->type = INSN_CONTEXT_SWITCH; + insn->type = INSN_SYSRET; break; case 0xe0: /* loopne */ @@ -721,7 +725,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec } else if (modrm_reg == 5) { /* jmpf */ - insn->type = INSN_CONTEXT_SWITCH; + insn->type = INSN_SYSRET; } else if (modrm_reg == 6) { diff --git a/tools/objtool/check.c b/tools/objtool/check.c index c81b070ca4952f..2c703b960420a4 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3684,7 +3684,8 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, break; - case INSN_CONTEXT_SWITCH: + case INSN_SYSCALL: + case INSN_SYSRET: if (func) { if (!next_insn || !next_insn->hint) { WARN_INSN(insn, "unsupported instruction in callable function"); @@ -3886,7 +3887,8 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn) WARN_INSN(insn, "RET before UNTRAIN"); return 1; - case INSN_CONTEXT_SWITCH: + case INSN_SYSCALL: + case INSN_SYSRET: if (insn_func(insn)) break; return 0; diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index 089a1acc48a8d0..01ef6f415adf64 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -19,7 +19,8 @@ enum insn_type { INSN_CALL, INSN_CALL_DYNAMIC, INSN_RETURN, - INSN_CONTEXT_SWITCH, + INSN_SYSCALL, + INSN_SYSRET, INSN_BUG, INSN_NOP, INSN_STAC, From 9f9cc012c2cbac4833746a0182e06a8eec940d19 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 8 Apr 2025 00:02:15 -0700 Subject: [PATCH 0239/2924] objtool: Stop UNRET validation on UD2 In preparation for simplifying INSN_SYSCALL, make validate_unret() terminate control flow on UD2 just like validate_branch() already does. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/ce841269e7e28c8b7f32064464a9821034d724ff.1744095216.git.jpoimboe@kernel.org --- tools/objtool/check.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 2c703b960420a4..2dd89b0f4d5eb0 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3902,6 +3902,9 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn) break; } + if (insn->dead_end) + return 0; + if (!next) { WARN_INSN(insn, "teh end!"); return 1; From 2dbbca9be4e5ed68d0972a2bcf4561d9cb85b7b7 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 8 Apr 2025 00:02:16 -0700 Subject: [PATCH 0240/2924] objtool, xen: Fix INSN_SYSCALL / INSN_SYSRET semantics Objtool uses an arbitrary rule for INSN_SYSCALL and INSN_SYSRET that almost works by accident: if it's in a function, control flow continues after the instruction, otherwise it terminates. That behavior should instead be based on the semantics of the underlying instruction. Change INSN_SYSCALL to always preserve control flow and INSN_SYSRET to always terminate it. The changed semantic for INSN_SYSCALL requires a tweak to the !CONFIG_IA32_EMULATION version of xen_entry_SYSCALL_compat(). In Xen, SYSCALL is a hypercall which usually returns. But in this case it's a hypercall to IRET which doesn't return. Add UD2 to tell objtool to terminate control flow, and to prevent undefined behavior at runtime. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Reviewed-by: Juergen Gross # for the Xen part Cc: Linus Torvalds Link: https://lore.kernel.org/r/19453dfe9a0431b7f016e9dc16d031cad3812a50.1744095216.git.jpoimboe@kernel.org --- arch/x86/xen/xen-asm.S | 4 +--- tools/objtool/check.c | 21 +++++++++++++-------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 109af12f764727..461bb1526502a0 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -226,9 +226,7 @@ SYM_CODE_END(xen_early_idt_handler_array) push %rax mov $__HYPERVISOR_iret, %eax syscall /* Do the IRET. */ -#ifdef CONFIG_MITIGATION_SLS - int3 -#endif + ud2 /* The SYSCALL should never return. */ .endm SYM_CODE_START(xen_iret) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 2dd89b0f4d5eb0..69f94bc4749985 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3685,14 +3685,19 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, break; case INSN_SYSCALL: + if (func && (!next_insn || !next_insn->hint)) { + WARN_INSN(insn, "unsupported instruction in callable function"); + return 1; + } + + break; + case INSN_SYSRET: - if (func) { - if (!next_insn || !next_insn->hint) { - WARN_INSN(insn, "unsupported instruction in callable function"); - return 1; - } - break; + if (func && (!next_insn || !next_insn->hint)) { + WARN_INSN(insn, "unsupported instruction in callable function"); + return 1; } + return 0; case INSN_STAC: @@ -3888,9 +3893,9 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn) return 1; case INSN_SYSCALL: + break; + case INSN_SYSRET: - if (insn_func(insn)) - break; return 0; case INSN_NOP: From 69ae94725f4fc9e75219d2d69022029c5b24bc9a Mon Sep 17 00:00:00 2001 From: Tung Nguyen Date: Thu, 3 Apr 2025 09:24:31 +0000 Subject: [PATCH 0241/2924] tipc: fix memory leak in tipc_link_xmit In case the backlog transmit queue for system-importance messages is overloaded, tipc_link_xmit() returns -ENOBUFS but the skb list is not purged. This leads to memory leak and failure when a skb is allocated. This commit fixes this issue by purging the skb list before tipc_link_xmit() returns. Fixes: 365ad353c256 ("tipc: reduce risk of user starvation during link congestion") Signed-off-by: Tung Nguyen Link: https://patch.msgid.link/20250403092431.514063-1-tung.quang.nguyen@est.tech Signed-off-by: Paolo Abeni --- net/tipc/link.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tipc/link.c b/net/tipc/link.c index 50c2e0846ea4df..18be6ff4c3db0a 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1046,6 +1046,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, if (unlikely(l->backlog[imp].len >= l->backlog[imp].limit)) { if (imp == TIPC_SYSTEM_IMPORTANCE) { pr_warn("%s<%s>, link overflow", link_rst_msg, l->name); + __skb_queue_purge(list); return -ENOBUFS; } rc = link_schedule_user(l, hdr); From 6deb8435f6bfcc9b6c7efe3b8a941ae2fb731495 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 1 Apr 2025 14:46:42 +0200 Subject: [PATCH 0242/2924] gpio: deprecate the GPIOD_FLAGS_BIT_NONEXCLUSIVE flag The non-exclusive GPIO request flag looks like a functional feature but is in fact a workaround for a corner-case that got out of hand. It should be removed so deprecate it officially so that nobody uses it anymore. Acked-by: Linus Walleij Link: https://lore.kernel.org/r/20250401-gpio-todo-remove-nonexclusive-v2-1-7c1380797b0d@linaro.org Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/consumer.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 45b651c05b9c87..8adc8e9cb4a7b6 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -31,6 +31,7 @@ struct gpio_descs { #define GPIOD_FLAGS_BIT_DIR_OUT BIT(1) #define GPIOD_FLAGS_BIT_DIR_VAL BIT(2) #define GPIOD_FLAGS_BIT_OPEN_DRAIN BIT(3) +/* GPIOD_FLAGS_BIT_NONEXCLUSIVE is DEPRECATED, don't use in new code. */ #define GPIOD_FLAGS_BIT_NONEXCLUSIVE BIT(4) /** From 686e54ea31f3b7d9d20ac1fa2d0295d649c41f56 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 1 Apr 2025 14:46:43 +0200 Subject: [PATCH 0243/2924] gpio: deprecate devm_gpiod_unhinge() This function was introduced as a workaround for an issue with resource ownership in the regulator subsystem. Rather than passing the ownership of a GPIO, we should make the regulator core be able to deal with resources it didn't request. Deprecate this function so that we don't get more users in the tree. Link: https://lore.kernel.org/r/20250401-gpio-todo-remove-nonexclusive-v2-2-7c1380797b0d@linaro.org Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-devres.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib-devres.c b/drivers/gpio/gpiolib-devres.c index 08205f355cebe5..120d1ec5af3bd6 100644 --- a/drivers/gpio/gpiolib-devres.c +++ b/drivers/gpio/gpiolib-devres.c @@ -317,11 +317,15 @@ EXPORT_SYMBOL_GPL(devm_gpiod_put); * @dev: GPIO consumer * @desc: GPIO descriptor to remove resource management from * + * *DEPRECATED* + * This function should not be used. It's been provided as a workaround for + * resource ownership issues in the regulator framework and should be replaced + * with a better solution. + * * Remove resource management from a GPIO descriptor. This is needed when * you want to hand over lifecycle management of a descriptor to another * mechanism. */ - void devm_gpiod_unhinge(struct device *dev, struct gpio_desc *desc) { int ret; From 3af64f175b2405270bd0926153d9856a49b58352 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 1 Apr 2025 14:46:44 +0200 Subject: [PATCH 0244/2924] MAINTAINERS: add more keywords for the GPIO subsystem entry Add GPIOD_FLAGS_BIT_NONEXCLUSIVE and devm_gpiod_unhinge as keywords to the GPIO entry so that we get notified if anybody tries to use these deprecated symbols. We'll drop them from here once we remove them from the kernel. Acked-by: Linus Walleij Link: https://lore.kernel.org/r/20250401-gpio-todo-remove-nonexclusive-v2-3-7c1380797b0d@linaro.org Signed-off-by: Bartosz Golaszewski --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 96b82704950184..c59316109e3f8f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10151,6 +10151,8 @@ F: include/linux/gpio.h F: include/linux/gpio/ F: include/linux/of_gpio.h K: (devm_)?gpio_(request|free|direction|get|set) +K: GPIOD_FLAGS_BIT_NONEXCLUSIVE +K: devm_gpiod_unhinge GPIO UAPI M: Bartosz Golaszewski From 2de1cf175c00927c286f8bd72e18602fe072af95 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 1 Apr 2025 14:46:45 +0200 Subject: [PATCH 0245/2924] gpio: TODO: track the removal of regulator-related workarounds The GPIOD_FLAGS_BIT_NONEXCLUSIVE flag and devm_gpiod_unhinge() function should be replaced with a better solution. The pwrseq subsystem is a good candidate. GPIOs themselves should remain a unique resource. Add a task for tracking the removal of these deprecated symbols. Acked-by: Linus Walleij Link: https://lore.kernel.org/r/20250401-gpio-todo-remove-nonexclusive-v2-4-7c1380797b0d@linaro.org Signed-off-by: Bartosz Golaszewski --- drivers/gpio/TODO | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/drivers/gpio/TODO b/drivers/gpio/TODO index b5f0a7a2e1bf14..4b70cbaa1caacd 100644 --- a/drivers/gpio/TODO +++ b/drivers/gpio/TODO @@ -186,3 +186,37 @@ their hardware offsets within the chip. Encourage users to switch to using them and eventually remove the existing global export/unexport attribues. + +------------------------------------------------------------------------------- + +Remove GPIOD_FLAGS_BIT_NONEXCLUSIVE + +GPIOs in the linux kernel are meant to be an exclusive resource. This means +that the GPIO descriptors (the software representation of the hardware concept) +are not reference counted and - in general - only one user at a time can +request a GPIO line and control its settings. The consumer API is designed +around full control of the line's state as evidenced by the fact that, for +instance, gpiod_set_value() does indeed drive the line as requested, instead +of bumping an enable counter of some sort. + +A problematic use-case for GPIOs is when two consumers want to use the same +descriptor independently. An example of such a user is the regulator subsystem +which may instantiate several struct regulator_dev instances containing +a struct device but using the same enable GPIO line. + +A workaround was introduced in the form of the GPIOD_FLAGS_BIT_NONEXCLUSIVE +flag but its implementation is problematic: it does not provide any +synchronization of usage nor did it introduce any enable count meaning the +non-exclusive users of the same descriptor will in fact "fight" for the +control over it. This flag should be removed and replaced with a better +solution, possibly based on the new power sequencing subsystem. + +------------------------------------------------------------------------------- + +Remove devm_gpiod_unhinge() + +devm_gpiod_unhinge() is provided as a way to transfer the ownership of managed +enable GPIOs to the regulator core. Rather than doing that however, we should +make it possible for the regulator subsystem to deal with GPIO resources the +lifetime of which it doesn't control as logically, a GPIO obtained by a caller +should also be freed by it. From da47605e43af9996eb46c8a060f259a8c34cc3c5 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 6 Apr 2025 22:22:44 +0200 Subject: [PATCH 0246/2924] gpio: mpc8xxx: Fix wakeup source leaks on device unbind Device can be unbound, so driver must also release memory for the wakeup source. Cc: stable@vger.kernel.org Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250406202245.53854-1-krzysztof.kozlowski@linaro.org Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-mpc8xxx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c index 0cd4c36ae8aaf0..5415175364899e 100644 --- a/drivers/gpio/gpio-mpc8xxx.c +++ b/drivers/gpio/gpio-mpc8xxx.c @@ -410,7 +410,9 @@ static int mpc8xxx_probe(struct platform_device *pdev) goto err; } - device_init_wakeup(dev, true); + ret = devm_device_init_wakeup(dev); + if (ret) + return dev_err_probe(dev, ret, "Failed to init wakeup\n"); return 0; err: From c5672e310ad971d408752fce7596ed27adc6008f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 6 Apr 2025 22:22:45 +0200 Subject: [PATCH 0247/2924] gpio: zynq: Fix wakeup source leaks on device unbind Device can be unbound, so driver must also release memory for the wakeup source. Cc: stable@vger.kernel.org Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250406202245.53854-2-krzysztof.kozlowski@linaro.org Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-zynq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-zynq.c b/drivers/gpio/gpio-zynq.c index be81fa2b17abc6..3dae63f3ea2177 100644 --- a/drivers/gpio/gpio-zynq.c +++ b/drivers/gpio/gpio-zynq.c @@ -1011,6 +1011,7 @@ static void zynq_gpio_remove(struct platform_device *pdev) ret = pm_runtime_get_sync(&pdev->dev); if (ret < 0) dev_warn(&pdev->dev, "pm_runtime_get_sync() Failed\n"); + device_init_wakeup(&pdev->dev, 0); gpiochip_remove(&gpio->chip); device_set_wakeup_capable(&pdev->dev, 0); pm_runtime_disable(&pdev->dev); From 5ba8b837b522d7051ef81bacf3d95383ff8edce5 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Apr 2025 14:10:23 -0700 Subject: [PATCH 0248/2924] sch_htb: make htb_qlen_notify() idempotent htb_qlen_notify() always deactivates the HTB class and in fact could trigger a warning if it is already deactivated. Therefore, it is not idempotent and not friendly to its callers, like fq_codel_dequeue(). Let's make it idempotent to ease qdisc_tree_reduce_backlog() callers' life. Reported-by: Gerrard Tai Signed-off-by: Cong Wang Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250403211033.166059-2-xiyou.wangcong@gmail.com Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- net/sched/sch_htb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index c31bc5489bddc0..4b9a639b642e1e 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1485,6 +1485,8 @@ static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg) { struct htb_class *cl = (struct htb_class *)arg; + if (!cl->prio_activity) + return; htb_deactivate(qdisc_priv(sch), cl); } From df008598b3a00be02a8051fde89ca0fbc416bd55 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Apr 2025 14:10:24 -0700 Subject: [PATCH 0249/2924] sch_drr: make drr_qlen_notify() idempotent drr_qlen_notify() always deletes the DRR class from its active list with list_del(), therefore, it is not idempotent and not friendly to its callers, like fq_codel_dequeue(). Let's make it idempotent to ease qdisc_tree_reduce_backlog() callers' life. Also change other list_del()'s to list_del_init() just to be extra safe. Reported-by: Gerrard Tai Signed-off-by: Cong Wang Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250403211033.166059-3-xiyou.wangcong@gmail.com Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- net/sched/sch_drr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index c69b999fae171c..e0a81d313aa767 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -105,6 +105,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return -ENOBUFS; gnet_stats_basic_sync_init(&cl->bstats); + INIT_LIST_HEAD(&cl->alist); cl->common.classid = classid; cl->quantum = quantum; cl->qdisc = qdisc_create_dflt(sch->dev_queue, @@ -229,7 +230,7 @@ static void drr_qlen_notify(struct Qdisc *csh, unsigned long arg) { struct drr_class *cl = (struct drr_class *)arg; - list_del(&cl->alist); + list_del_init(&cl->alist); } static int drr_dump_class(struct Qdisc *sch, unsigned long arg, @@ -390,7 +391,7 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch) if (unlikely(skb == NULL)) goto out; if (cl->qdisc->q.qlen == 0) - list_del(&cl->alist); + list_del_init(&cl->alist); bstats_update(&cl->bstats, skb); qdisc_bstats_update(sch, skb); @@ -431,7 +432,7 @@ static void drr_reset_qdisc(struct Qdisc *sch) for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { if (cl->qdisc->q.qlen) - list_del(&cl->alist); + list_del_init(&cl->alist); qdisc_reset(cl->qdisc); } } From 51eb3b65544c9efd6a1026889ee5fb5aa62da3bb Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Apr 2025 14:10:25 -0700 Subject: [PATCH 0250/2924] sch_hfsc: make hfsc_qlen_notify() idempotent hfsc_qlen_notify() is not idempotent either and not friendly to its callers, like fq_codel_dequeue(). Let's make it idempotent to ease qdisc_tree_reduce_backlog() callers' life: 1. update_vf() decreases cl->cl_nactive, so we can check whether it is non-zero before calling it. 2. eltree_remove() always removes RB node cl->el_node, but we can use RB_EMPTY_NODE() + RB_CLEAR_NODE() to make it safe. Reported-by: Gerrard Tai Signed-off-by: Cong Wang Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250403211033.166059-4-xiyou.wangcong@gmail.com Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- net/sched/sch_hfsc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index c287bf8423b47b..ce5045eea06568 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -203,7 +203,10 @@ eltree_insert(struct hfsc_class *cl) static inline void eltree_remove(struct hfsc_class *cl) { - rb_erase(&cl->el_node, &cl->sched->eligible); + if (!RB_EMPTY_NODE(&cl->el_node)) { + rb_erase(&cl->el_node, &cl->sched->eligible); + RB_CLEAR_NODE(&cl->el_node); + } } static inline void @@ -1220,7 +1223,8 @@ hfsc_qlen_notify(struct Qdisc *sch, unsigned long arg) /* vttree is now handled in update_vf() so that update_vf(cl, 0, 0) * needs to be called explicitly to remove a class from vttree. */ - update_vf(cl, 0, 0); + if (cl->cl_nactive) + update_vf(cl, 0, 0); if (cl->cl_flags & HFSC_RSC) eltree_remove(cl); } From 55f9eca4bfe30a15d8656f915922e8c98b7f0728 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Apr 2025 14:10:26 -0700 Subject: [PATCH 0251/2924] sch_qfq: make qfq_qlen_notify() idempotent qfq_qlen_notify() always deletes its class from its active list with list_del_init() _and_ calls qfq_deactivate_agg() when the whole list becomes empty. To make it idempotent, just skip everything when it is not in the active list. Also change other list_del()'s to list_del_init() just to be extra safe. Reported-by: Gerrard Tai Signed-off-by: Cong Wang Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250403211033.166059-5-xiyou.wangcong@gmail.com Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- net/sched/sch_qfq.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 2cfbc977fe6d0b..687a932eb9b2f6 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -347,7 +347,7 @@ static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl) struct qfq_aggregate *agg = cl->agg; - list_del(&cl->alist); /* remove from RR queue of the aggregate */ + list_del_init(&cl->alist); /* remove from RR queue of the aggregate */ if (list_empty(&agg->active)) /* agg is now inactive */ qfq_deactivate_agg(q, agg); } @@ -474,6 +474,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, gnet_stats_basic_sync_init(&cl->bstats); cl->common.classid = classid; cl->deficit = lmax; + INIT_LIST_HEAD(&cl->alist); cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid, NULL); @@ -982,7 +983,7 @@ static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg, cl->deficit -= (int) len; if (cl->qdisc->q.qlen == 0) /* no more packets, remove from list */ - list_del(&cl->alist); + list_del_init(&cl->alist); else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) { cl->deficit += agg->lmax; list_move_tail(&cl->alist, &agg->active); @@ -1415,6 +1416,8 @@ static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg) struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl = (struct qfq_class *)arg; + if (list_empty(&cl->alist)) + return; qfq_deactivate_class(q, cl); } From a7a15f39c682ac4268624da2abdb9114bdde96d5 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Apr 2025 14:10:27 -0700 Subject: [PATCH 0252/2924] sch_ets: make est_qlen_notify() idempotent est_qlen_notify() deletes its class from its active list with list_del() when qlen is 0, therefore, it is not idempotent and not friendly to its callers, like fq_codel_dequeue(). Let's make it idempotent to ease qdisc_tree_reduce_backlog() callers' life. Also change other list_del()'s to list_del_init() just to be extra safe. Reported-by: Gerrard Tai Signed-off-by: Cong Wang Link: https://patch.msgid.link/20250403211033.166059-6-xiyou.wangcong@gmail.com Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- net/sched/sch_ets.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index 516038a4416380..c3bdeb14185bea 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -293,7 +293,7 @@ static void ets_class_qlen_notify(struct Qdisc *sch, unsigned long arg) * to remove them. */ if (!ets_class_is_strict(q, cl) && sch->q.qlen) - list_del(&cl->alist); + list_del_init(&cl->alist); } static int ets_class_dump(struct Qdisc *sch, unsigned long arg, @@ -488,7 +488,7 @@ static struct sk_buff *ets_qdisc_dequeue(struct Qdisc *sch) if (unlikely(!skb)) goto out; if (cl->qdisc->q.qlen == 0) - list_del(&cl->alist); + list_del_init(&cl->alist); return ets_qdisc_dequeue_skb(sch, skb); } @@ -657,7 +657,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, } for (i = q->nbands; i < oldbands; i++) { if (i >= q->nstrict && q->classes[i].qdisc->q.qlen) - list_del(&q->classes[i].alist); + list_del_init(&q->classes[i].alist); qdisc_tree_flush_backlog(q->classes[i].qdisc); } WRITE_ONCE(q->nstrict, nstrict); @@ -713,7 +713,7 @@ static void ets_qdisc_reset(struct Qdisc *sch) for (band = q->nstrict; band < q->nbands; band++) { if (q->classes[band].qdisc->q.qlen) - list_del(&q->classes[band].alist); + list_del_init(&q->classes[band].alist); } for (band = 0; band < q->nbands; band++) qdisc_reset(q->classes[band].qdisc); From 342debc12183b51773b3345ba267e9263bdfaaef Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Apr 2025 14:16:31 -0700 Subject: [PATCH 0253/2924] codel: remove sch->q.qlen check before qdisc_tree_reduce_backlog() After making all ->qlen_notify() callbacks idempotent, now it is safe to remove the check of qlen!=0 from both fq_codel_dequeue() and codel_qdisc_dequeue(). Reported-by: Gerrard Tai Fixes: 4b549a2ef4be ("fq_codel: Fair Queue Codel AQM") Fixes: 76e3cc126bb2 ("codel: Controlled Delay AQM") Signed-off-by: Cong Wang Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250403211636.166257-1-xiyou.wangcong@gmail.com Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- net/sched/sch_codel.c | 5 +---- net/sched/sch_fq_codel.c | 6 ++---- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 81189d02fee761..12dd71139da396 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -65,10 +65,7 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch) &q->stats, qdisc_pkt_len, codel_get_enqueue_time, drop_func, dequeue_func); - /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0, - * or HTB crashes. Defer it for next round. - */ - if (q->stats.drop_count && sch->q.qlen) { + if (q->stats.drop_count) { qdisc_tree_reduce_backlog(sch, q->stats.drop_count, q->stats.drop_len); q->stats.drop_count = 0; q->stats.drop_len = 0; diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 799f5397ad4c17..6c9029f71e88d3 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -315,10 +315,8 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch) } qdisc_bstats_update(sch, skb); flow->deficit -= qdisc_pkt_len(skb); - /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0, - * or HTB crashes. Defer it for next round. - */ - if (q->cstats.drop_count && sch->q.qlen) { + + if (q->cstats.drop_count) { qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len); q->cstats.drop_count = 0; From cbe9588b12d058cbb16735fd468c82ec4b3d1256 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Apr 2025 14:16:32 -0700 Subject: [PATCH 0254/2924] selftests/tc-testing: Add a test case for FQ_CODEL with HTB parent Add a test case for FQ_CODEL with HTB parent to verify packet drop behavior when the queue becomes empty. This helps ensure proper notification mechanisms between qdiscs. Note this is best-effort, it is hard to play with those parameters perfectly to always trigger ->qlen_notify(). Cc: Pedro Tammela Signed-off-by: Cong Wang Reviewed-by: Victor Nogueira Link: https://patch.msgid.link/20250403211636.166257-2-xiyou.wangcong@gmail.com Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- .../tc-testing/tc-tests/infra/qdiscs.json | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 25454fd955371a..545966b6adc6f9 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -158,5 +158,36 @@ "$TC qdisc del dev $DUMMY handle 1: root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "a4bb", + "name": "Test FQ_CODEL with HTB parent - force packet drop with empty queue", + "category": [ + "qdisc", + "fq_codel", + "htb" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1: root htb default 10", + "$TC class add dev $DUMMY parent 1: classid 1:10 htb rate 1kbit", + "$TC qdisc add dev $DUMMY parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", + "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10", + "ping -c 5 -f -I $DUMMY 10.10.10.1 > /dev/null || true", + "sleep 0.1" + ], + "cmdUnderTest": "$TC -s qdisc show dev $DUMMY", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'", + "matchPattern": "dropped [1-9][0-9]*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] } ] From 4cb1837ac5375b9b271cb83b2a43a3f942f4c36e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Apr 2025 14:16:33 -0700 Subject: [PATCH 0255/2924] selftests/tc-testing: Add a test case for FQ_CODEL with QFQ parent Add a test case for FQ_CODEL with QFQ parent to verify packet drop behavior when the queue becomes empty. This helps ensure proper notification mechanisms between qdiscs. Note this is best-effort, it is hard to play with those parameters perfectly to always trigger ->qlen_notify(). Cc: Pedro Tammela Signed-off-by: Cong Wang Reviewed-by: Victor Nogueira Link: https://patch.msgid.link/20250403211636.166257-3-xiyou.wangcong@gmail.com Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- .../tc-testing/tc-tests/infra/qdiscs.json | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 545966b6adc6f9..695522b00a3c0f 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -189,5 +189,36 @@ "$TC qdisc del dev $DUMMY handle 1: root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "a4be", + "name": "Test FQ_CODEL with QFQ parent - force packet drop with empty queue", + "category": [ + "qdisc", + "fq_codel", + "qfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1: root qfq", + "$TC class add dev $DUMMY parent 1: classid 1:10 qfq weight 1 maxpkt 1000", + "$TC qdisc add dev $DUMMY parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", + "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10", + "ping -c 10 -s 1000 -f -I $DUMMY 10.10.10.1 > /dev/null || true", + "sleep 0.1" + ], + "cmdUnderTest": "$TC -s qdisc show dev $DUMMY", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'", + "matchPattern": "dropped [1-9][0-9]*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] } ] From 72b05c1bf7ea799bfce1164d6605b27f060191ac Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Apr 2025 14:16:34 -0700 Subject: [PATCH 0256/2924] selftests/tc-testing: Add a test case for FQ_CODEL with HFSC parent Add a test case for FQ_CODEL with HFSC parent to verify packet drop behavior when the queue becomes empty. This helps ensure proper notification mechanisms between qdiscs. Note this is best-effort, it is hard to play with those parameters perfectly to always trigger ->qlen_notify(). Cc: Pedro Tammela Signed-off-by: Cong Wang Reviewed-by: Victor Nogueira Link: https://patch.msgid.link/20250403211636.166257-4-xiyou.wangcong@gmail.com Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- .../tc-testing/tc-tests/infra/qdiscs.json | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 695522b00a3c0f..0347b207fe6dad 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -220,5 +220,36 @@ "$TC qdisc del dev $DUMMY handle 1: root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "a4bf", + "name": "Test FQ_CODEL with HFSC parent - force packet drop with empty queue", + "category": [ + "qdisc", + "fq_codel", + "hfsc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1: root hfsc default 10", + "$TC class add dev $DUMMY parent 1: classid 1:10 hfsc sc rate 1kbit ul rate 1kbit", + "$TC qdisc add dev $DUMMY parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", + "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10", + "ping -c 5 -f -I $DUMMY 10.10.10.1 > /dev/null || true", + "sleep 0.1" + ], + "cmdUnderTest": "$TC -s qdisc show dev $DUMMY", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'", + "matchPattern": "dropped [1-9][0-9]*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] } ] From 0d5c27ecb60c6cc4e394035aa04696d7eb39f072 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Apr 2025 14:16:35 -0700 Subject: [PATCH 0257/2924] selftests/tc-testing: Add a test case for FQ_CODEL with DRR parent Add a test case for FQ_CODEL with DRR parent to verify packet drop behavior when the queue becomes empty. This helps ensure proper notification mechanisms between qdiscs. Note this is best-effort, it is hard to play with those parameters perfectly to always trigger ->qlen_notify(). Cc: Pedro Tammela Signed-off-by: Cong Wang Reviewed-by: Victor Nogueira Link: https://patch.msgid.link/20250403211636.166257-5-xiyou.wangcong@gmail.com Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- .../tc-testing/tc-tests/infra/qdiscs.json | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 0347b207fe6dad..4a45fedad8764f 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -251,5 +251,36 @@ "$TC qdisc del dev $DUMMY handle 1: root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "a4c0", + "name": "Test FQ_CODEL with DRR parent - force packet drop with empty queue", + "category": [ + "qdisc", + "fq_codel", + "drr" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1: root drr", + "$TC class add dev $DUMMY parent 1: classid 1:10 drr quantum 1500", + "$TC qdisc add dev $DUMMY parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", + "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10", + "ping -c 5 -f -I $DUMMY 10.10.10.1 > /dev/null || true", + "sleep 0.1" + ], + "cmdUnderTest": "$TC -s qdisc show dev $DUMMY", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'", + "matchPattern": "dropped [1-9][0-9]*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] } ] From ce94507f5fe04eb7fe1eecfe32a2b29233341ff0 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 3 Apr 2025 14:16:36 -0700 Subject: [PATCH 0258/2924] selftests/tc-testing: Add a test case for FQ_CODEL with ETS parent Add a test case for FQ_CODEL with ETS parent to verify packet drop behavior when the queue becomes empty. This helps ensure proper notification mechanisms between qdiscs. Note this is best-effort, it is hard to play with those parameters perfectly to always trigger ->qlen_notify(). Cc: Pedro Tammela Signed-off-by: Cong Wang Reviewed-by: Victor Nogueira Link: https://patch.msgid.link/20250403211636.166257-6-xiyou.wangcong@gmail.com Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- .../tc-testing/tc-tests/infra/qdiscs.json | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 4a45fedad8764f..d4ea9cd845a33a 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -282,5 +282,36 @@ "$TC qdisc del dev $DUMMY handle 1: root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "a4c1", + "name": "Test FQ_CODEL with ETS parent - force packet drop with empty queue", + "category": [ + "qdisc", + "fq_codel", + "ets" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1: root ets bands 2 strict 1", + "$TC class change dev $DUMMY parent 1: classid 1:1 ets", + "$TC qdisc add dev $DUMMY parent 1:1 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", + "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1", + "ping -c 5 -f -I $DUMMY 10.10.10.1 > /dev/null || true", + "sleep 0.1" + ], + "cmdUnderTest": "$TC -s qdisc show dev $DUMMY", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'", + "matchPattern": "dropped [1-9][0-9]*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] } ] From 2b9c536430126c233552cdcd6ec9d5077454ece4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 2 Apr 2025 15:20:00 +0300 Subject: [PATCH 0259/2924] gpiolib: of: Fix the choice for Ingenic NAND quirk The Ingenic NAND quirk has been added under CONFIG_LCD_HX8357 ifdeffery which sounds quite wrong. Fix the choice for Ingenic NAND quirk by wrapping it into own ifdeffery related to the respective driver. Fixes: 3a7fd473bd5d ("mtd: rawnand: ingenic: move the GPIO quirk to gpiolib-of.c") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250402122058.1517393-2-andriy.shevchenko@linux.intel.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-of.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index eb667f8f1ead4f..8fff6fdfb4a12a 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -193,6 +193,8 @@ static void of_gpio_try_fixup_polarity(const struct device_node *np, */ { "himax,hx8357", "gpios-reset", false }, { "himax,hx8369", "gpios-reset", false }, +#endif +#if IS_ENABLED(CONFIG_MTD_NAND_JZ4780) /* * The rb-gpios semantics was undocumented and qi,lb60 (along with * the ingenic driver) got it wrong. The active state encodes the From b8c7a1ac884cc267d1031f8de07f1a689a69fbab Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 2 Apr 2025 15:20:01 +0300 Subject: [PATCH 0260/2924] gpiolib: of: Move Atmel HSMCI quirk up out of the regulator comment The regulator comment in of_gpio_set_polarity_by_property() made on top of a couple of the cases, while Atmel HSMCI quirk is not related to that. Make it clear by moving Atmel HSMCI quirk up out of the scope of the regulator comment. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250402122058.1517393-3-andriy.shevchenko@linux.intel.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-of.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 8fff6fdfb4a12a..65f6a7177b78ef 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -268,6 +268,9 @@ static void of_gpio_set_polarity_by_property(const struct device_node *np, { "fsl,imx8qm-fec", "phy-reset-gpios", "phy-reset-active-high" }, { "fsl,s32v234-fec", "phy-reset-gpios", "phy-reset-active-high" }, #endif +#if IS_ENABLED(CONFIG_MMC_ATMELMCI) + { "atmel,hsmci", "cd-gpios", "cd-inverted" }, +#endif #if IS_ENABLED(CONFIG_PCI_IMX6) { "fsl,imx6q-pcie", "reset-gpio", "reset-gpio-active-high" }, { "fsl,imx6sx-pcie", "reset-gpio", "reset-gpio-active-high" }, @@ -293,9 +296,6 @@ static void of_gpio_set_polarity_by_property(const struct device_node *np, #if IS_ENABLED(CONFIG_REGULATOR_GPIO) { "regulator-gpio", "enable-gpio", "enable-active-high" }, { "regulator-gpio", "enable-gpios", "enable-active-high" }, -#endif -#if IS_ENABLED(CONFIG_MMC_ATMELMCI) - { "atmel,hsmci", "cd-gpios", "cd-inverted" }, #endif }; unsigned int i; From 9ca67840c0ddf3f39407339624cef824a4f27599 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 6 Mar 2025 18:54:47 +0000 Subject: [PATCH 0261/2924] firmware: arm_scmi: Balance device refcount when destroying devices Using device_find_child() to lookup the proper SCMI device to destroy causes an unbalance in device refcount, since device_find_child() calls an implicit get_device(): this, in turns, inhibits the call of the provided release methods upon devices destruction. As a consequence, one of the structures that is not freed properly upon destruction is the internal struct device_private dev->p populated by the drivers subsystem core. KMemleak detects this situation since loading/unloding some SCMI driver causes related devices to be created/destroyed without calling any device_release method. unreferenced object 0xffff00000f583800 (size 512): comm "insmod", pid 227, jiffies 4294912190 hex dump (first 32 bytes): 00 00 00 00 ad 4e ad de ff ff ff ff 00 00 00 00 .....N.......... ff ff ff ff ff ff ff ff 60 36 1d 8a 00 80 ff ff ........`6...... backtrace (crc 114e2eed): kmemleak_alloc+0xbc/0xd8 __kmalloc_cache_noprof+0x2dc/0x398 device_add+0x954/0x12d0 device_register+0x28/0x40 __scmi_device_create.part.0+0x1bc/0x380 scmi_device_create+0x2d0/0x390 scmi_create_protocol_devices+0x74/0xf8 scmi_device_request_notifier+0x1f8/0x2a8 notifier_call_chain+0x110/0x3b0 blocking_notifier_call_chain+0x70/0xb0 scmi_driver_register+0x350/0x7f0 0xffff80000a3b3038 do_one_initcall+0x12c/0x730 do_init_module+0x1dc/0x640 load_module+0x4b20/0x5b70 init_module_from_file+0xec/0x158 $ ./scripts/faddr2line ./vmlinux device_add+0x954/0x12d0 device_add+0x954/0x12d0: kmalloc_noprof at include/linux/slab.h:901 (inlined by) kzalloc_noprof at include/linux/slab.h:1037 (inlined by) device_private_init at drivers/base/core.c:3510 (inlined by) device_add at drivers/base/core.c:3561 Balance device refcount by issuing a put_device() on devices found via device_find_child(). Reported-by: Alice Ryhl Closes: https://lore.kernel.org/linux-arm-kernel/Z8nK3uFkspy61yjP@arm.com/T/#mc1f73a0ea5e41014fa145147b7b839fc988ada8f CC: Sudeep Holla CC: Catalin Marinas Fixes: d4f9dddd21f3 ("firmware: arm_scmi: Add dynamic scmi devices creation") Signed-off-by: Cristian Marussi Tested-by: Alice Ryhl Message-Id: <20250306185447.2039336-1-cristian.marussi@arm.com> Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/bus.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c index 7af01664ce7e2f..3a5474015f7dfd 100644 --- a/drivers/firmware/arm_scmi/bus.c +++ b/drivers/firmware/arm_scmi/bus.c @@ -255,6 +255,9 @@ static struct scmi_device *scmi_child_dev_find(struct device *parent, if (!dev) return NULL; + /* Drop the refcnt bumped implicitly by device_find_child */ + put_device(dev); + return to_scmi_dev(dev); } From c23c03bf1faa1e76be1eba35bad6da6a2a7c95ee Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 10 Mar 2025 17:58:00 +0000 Subject: [PATCH 0262/2924] firmware: arm_scmi: Fix timeout checks on polling path Polling mode transactions wait for a reply busy-looping without holding a spinlock, but currently the timeout checks are based only on elapsed time: as a result we could hit a false positive whenever our busy-looping thread is pre-empted and scheduled out for a time greater than the polling timeout. Change the checks at the end of the busy-loop to make sure that the polling wasn't indeed successful or an out-of-order reply caused the polling to be forcibly terminated. Fixes: 31d2f803c19c ("firmware: arm_scmi: Add sync_cmds_completed_on_ret transport flag") Reported-by: Huangjie Closes: https://lore.kernel.org/arm-scmi/20250123083323.2363749-1-jackhuang021@gmail.com/ Signed-off-by: Cristian Marussi Cc: stable@vger.kernel.org # 5.18.x Message-Id: <20250310175800.1444293-1-cristian.marussi@arm.com> Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/driver.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 1c75a4c9c37166..0390d5ff195ec0 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -1248,7 +1248,8 @@ static void xfer_put(const struct scmi_protocol_handle *ph, } static bool scmi_xfer_done_no_timeout(struct scmi_chan_info *cinfo, - struct scmi_xfer *xfer, ktime_t stop) + struct scmi_xfer *xfer, ktime_t stop, + bool *ooo) { struct scmi_info *info = handle_to_scmi_info(cinfo->handle); @@ -1257,7 +1258,7 @@ static bool scmi_xfer_done_no_timeout(struct scmi_chan_info *cinfo, * in case of out-of-order receptions of delayed responses */ return info->desc->ops->poll_done(cinfo, xfer) || - try_wait_for_completion(&xfer->done) || + (*ooo = try_wait_for_completion(&xfer->done)) || ktime_after(ktime_get(), stop); } @@ -1274,15 +1275,17 @@ static int scmi_wait_for_reply(struct device *dev, const struct scmi_desc *desc, * itself to support synchronous commands replies. */ if (!desc->sync_cmds_completed_on_ret) { + bool ooo = false; + /* * Poll on xfer using transport provided .poll_done(); * assumes no completion interrupt was available. */ ktime_t stop = ktime_add_ms(ktime_get(), timeout_ms); - spin_until_cond(scmi_xfer_done_no_timeout(cinfo, - xfer, stop)); - if (ktime_after(ktime_get(), stop)) { + spin_until_cond(scmi_xfer_done_no_timeout(cinfo, xfer, + stop, &ooo)); + if (!ooo && !info->desc->ops->poll_done(cinfo, xfer)) { dev_err(dev, "timed out in resp(caller: %pS) - polling\n", (void *)_RET_IP_); From 4567bdaaaaa1744da3d7da07d9aca2f941f5b4e5 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 21 Mar 2025 11:57:00 +0000 Subject: [PATCH 0263/2924] firmware: arm_ffa: Skip Rx buffer ownership release if not acquired MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Completion of the FFA_PARTITION_INFO_GET ABI transfers the ownership of the caller’s Rx buffer from the producer(typically partition mnager) to the consumer(this driver/OS). FFA_RX_RELEASE transfers the ownership from the consumer back to the producer. However, when we set the flag to just return the count of partitions deployed in the system corresponding to the specified UUID while invoking FFA_PARTITION_INFO_GET, the Rx buffer ownership shouldn't be transferred to this driver. We must be able to skip transferring back the ownership to the partition manager when we request just to get the count of the partitions as the buffers are not acquired in this case. Firmware may return FFA_RET_DENIED or other error for the ffa_rx_release() in such cases. Fixes: bb1be7498500 ("firmware: arm_ffa: Add v1.1 get_partition_info support") Message-Id: <20250321115700.3525197-1-sudeep.holla@arm.com> Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 19295282de2406..fe55613a8ea993 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -299,7 +299,8 @@ __ffa_partition_info_get(u32 uuid0, u32 uuid1, u32 uuid2, u32 uuid3, import_uuid(&buf->uuid, (u8 *)&rx_buf->uuid); } - ffa_rx_release(); + if (!(flags & PARTITION_INFO_GET_RETURN_COUNT_ONLY)) + ffa_rx_release(); mutex_unlock(&drv_info->rx_lock); From f1a69a940de58b16e8249dff26f74c8cc59b32be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo=20Navarro?= Date: Fri, 4 Apr 2025 16:53:21 +0200 Subject: [PATCH 0264/2924] sctp: detect and prevent references to a freed transport in sendmsg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sctp_sendmsg() re-uses associations and transports when possible by doing a lookup based on the socket endpoint and the message destination address, and then sctp_sendmsg_to_asoc() sets the selected transport in all the message chunks to be sent. There's a possible race condition if another thread triggers the removal of that selected transport, for instance, by explicitly unbinding an address with setsockopt(SCTP_SOCKOPT_BINDX_REM), after the chunks have been set up and before the message is sent. This can happen if the send buffer is full, during the period when the sender thread temporarily releases the socket lock in sctp_wait_for_sndbuf(). This causes the access to the transport data in sctp_outq_select_transport(), when the association outqueue is flushed, to result in a use-after-free read. This change avoids this scenario by having sctp_transport_free() signal the freeing of the transport, tagging it as "dead". In order to do this, the patch restores the "dead" bit in struct sctp_transport, which was removed in commit 47faa1e4c50e ("sctp: remove the dead field of sctp_transport"). Then, in the scenario where the sender thread has released the socket lock in sctp_wait_for_sndbuf(), the bit is checked again after re-acquiring the socket lock to detect the deletion. This is done while holding a reference to the transport to prevent it from being freed in the process. If the transport was deleted while the socket lock was relinquished, sctp_sendmsg_to_asoc() will return -EAGAIN to let userspace retry the send. The bug was found by a private syzbot instance (see the error report [1] and the C reproducer that triggers it [2]). Link: https://people.igalia.com/rcn/kernel_logs/20250402__KASAN_slab-use-after-free_Read_in_sctp_outq_select_transport.txt [1] Link: https://people.igalia.com/rcn/kernel_logs/20250402__KASAN_slab-use-after-free_Read_in_sctp_outq_select_transport__repro.c [2] Cc: stable@vger.kernel.org Fixes: df132eff4638 ("sctp: clear the transport of some out_chunk_list chunks in sctp_assoc_rm_peer") Suggested-by: Xin Long Signed-off-by: Ricardo Cañuelo Navarro Acked-by: Xin Long Link: https://patch.msgid.link/20250404-kasan_slab-use-after-free_read_in_sctp_outq_select_transport__20250404-v1-1-5ce4a0b78ef2@igalia.com Signed-off-by: Paolo Abeni --- include/net/sctp/structs.h | 3 ++- net/sctp/socket.c | 22 ++++++++++++++-------- net/sctp/transport.c | 2 ++ 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 31248cfdfb235f..dcd288fa1bb6fb 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -775,6 +775,7 @@ struct sctp_transport { /* Reference counting. */ refcount_t refcnt; + __u32 dead:1, /* RTO-Pending : A flag used to track if one of the DATA * chunks sent to this address is currently being * used to compute a RTT. If this flag is 0, @@ -784,7 +785,7 @@ struct sctp_transport { * calculation completes (i.e. the DATA chunk * is SACK'd) clear this flag. */ - __u32 rto_pending:1, + rto_pending:1, /* * hb_sent : a flag that signals that we have a pending diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 36ee34f483d703..53725ee7ba06d7 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -72,8 +72,9 @@ /* Forward declarations for internal helper functions. */ static bool sctp_writeable(const struct sock *sk); static void sctp_wfree(struct sk_buff *skb); -static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, - size_t msg_len); +static int sctp_wait_for_sndbuf(struct sctp_association *asoc, + struct sctp_transport *transport, + long *timeo_p, size_t msg_len); static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p); static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p); static int sctp_wait_for_accept(struct sock *sk, long timeo); @@ -1828,7 +1829,7 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc, if (sctp_wspace(asoc) <= 0 || !sk_wmem_schedule(sk, msg_len)) { timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); - err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); + err = sctp_wait_for_sndbuf(asoc, transport, &timeo, msg_len); if (err) goto err; if (unlikely(sinfo->sinfo_stream >= asoc->stream.outcnt)) { @@ -9214,8 +9215,9 @@ void sctp_sock_rfree(struct sk_buff *skb) /* Helper function to wait for space in the sndbuf. */ -static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, - size_t msg_len) +static int sctp_wait_for_sndbuf(struct sctp_association *asoc, + struct sctp_transport *transport, + long *timeo_p, size_t msg_len) { struct sock *sk = asoc->base.sk; long current_timeo = *timeo_p; @@ -9225,7 +9227,9 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, pr_debug("%s: asoc:%p, timeo:%ld, msg_len:%zu\n", __func__, asoc, *timeo_p, msg_len); - /* Increment the association's refcnt. */ + /* Increment the transport and association's refcnt. */ + if (transport) + sctp_transport_hold(transport); sctp_association_hold(asoc); /* Wait on the association specific sndbuf space. */ @@ -9234,7 +9238,7 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, TASK_INTERRUPTIBLE); if (asoc->base.dead) goto do_dead; - if (!*timeo_p) + if ((!*timeo_p) || (transport && transport->dead)) goto do_nonblock; if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING) goto do_error; @@ -9259,7 +9263,9 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, out: finish_wait(&asoc->wait, &wait); - /* Release the association's refcnt. */ + /* Release the transport and association's refcnt. */ + if (transport) + sctp_transport_put(transport); sctp_association_put(asoc); return err; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 2abe45af98e7c6..31eca29b6cfbfb 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -117,6 +117,8 @@ struct sctp_transport *sctp_transport_new(struct net *net, */ void sctp_transport_free(struct sctp_transport *transport) { + transport->dead = 1; + /* Try to delete the heartbeat timer. */ if (del_timer(&transport->hb_timer)) sctp_transport_put(transport); From 5071a1e606b30c0c11278d3c6620cd6a24724cf6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 4 Apr 2025 11:03:33 -0700 Subject: [PATCH 0265/2924] net: tls: explicitly disallow disconnect syzbot discovered that it can disconnect a TLS socket and then run into all sort of unexpected corner cases. I have a vague recollection of Eric pointing this out to us a long time ago. Supporting disconnect is really hard, for one thing if offload is enabled we'd need to wait for all packets to be _acked_. Disconnect is not commonly used, disallow it. The immediate problem syzbot run into is the warning in the strp, but that's just the easiest bug to trigger: WARNING: CPU: 0 PID: 5834 at net/tls/tls_strp.c:486 tls_strp_msg_load+0x72e/0xa80 net/tls/tls_strp.c:486 RIP: 0010:tls_strp_msg_load+0x72e/0xa80 net/tls/tls_strp.c:486 Call Trace: tls_rx_rec_wait+0x280/0xa60 net/tls/tls_sw.c:1363 tls_sw_recvmsg+0x85c/0x1c30 net/tls/tls_sw.c:2043 inet6_recvmsg+0x2c9/0x730 net/ipv6/af_inet6.c:678 sock_recvmsg_nosec net/socket.c:1023 [inline] sock_recvmsg+0x109/0x280 net/socket.c:1045 __sys_recvfrom+0x202/0x380 net/socket.c:2237 Fixes: 3c4d7559159b ("tls: kernel TLS support") Reported-by: syzbot+b4cd76826045a1eb93c1@syzkaller.appspotmail.com Signed-off-by: Jakub Kicinski Reviewed-by: Eric Dumazet Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20250404180334.3224206-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- net/tls/tls_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index cb86b0bf9a53e1..a3ccb3135e51ac 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -852,6 +852,11 @@ static int tls_setsockopt(struct sock *sk, int level, int optname, return do_tls_setsockopt(sk, optname, optval, optlen); } +static int tls_disconnect(struct sock *sk, int flags) +{ + return -EOPNOTSUPP; +} + struct tls_context *tls_ctx_create(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -947,6 +952,7 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], prot[TLS_BASE][TLS_BASE] = *base; prot[TLS_BASE][TLS_BASE].setsockopt = tls_setsockopt; prot[TLS_BASE][TLS_BASE].getsockopt = tls_getsockopt; + prot[TLS_BASE][TLS_BASE].disconnect = tls_disconnect; prot[TLS_BASE][TLS_BASE].close = tls_sk_proto_close; prot[TLS_SW][TLS_BASE] = prot[TLS_BASE][TLS_BASE]; From a1328a671e1c93a3513c286a05ff0abe6698d891 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 4 Apr 2025 11:03:34 -0700 Subject: [PATCH 0266/2924] selftests: tls: check that disconnect does nothing "Inspired" by syzbot test, pre-queue some data, disconnect() and try to receive(). This used to trigger a warning in TLS's strp. Now we expect the disconnect() to have almost no effect. Link: https://lore.kernel.org/67e6be74.050a0220.2f068f.007e.GAE@google.com Signed-off-by: Jakub Kicinski Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20250404180334.3224206-2-kuba@kernel.org Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/tls.c | 36 +++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 9a85f93c33d86c..5ded3b3a7538a6 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -1753,6 +1753,42 @@ TEST_F(tls_basic, rekey_tx) EXPECT_EQ(memcmp(buf, test_str, send_len), 0); } +TEST_F(tls_basic, disconnect) +{ + char const *test_str = "test_message"; + int send_len = strlen(test_str) + 1; + struct tls_crypto_info_keys key; + struct sockaddr_in addr; + char buf[20]; + int ret; + + if (self->notls) + return; + + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &key, 0); + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &key, key.len); + ASSERT_EQ(ret, 0); + + /* Pre-queue the data so that setsockopt parses it but doesn't + * dequeue it from the TCP socket. recvmsg would dequeue. + */ + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &key, key.len); + ASSERT_EQ(ret, 0); + + addr.sin_family = AF_UNSPEC; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + addr.sin_port = 0; + ret = connect(self->cfd, &addr, sizeof(addr)); + EXPECT_EQ(ret, -1); + EXPECT_EQ(errno, EOPNOTSUPP); + + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); +} + TEST_F(tls, rekey) { char const *test_str_1 = "test_message_before_rekey"; From 080410fe61e6df035960f5cbec9e381ac8b4ced0 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Mon, 7 Apr 2025 11:08:29 +0200 Subject: [PATCH 0267/2924] ALSA: azt2320: Replace deprecated strcpy() with strscpy() strcpy() is deprecated, use strscpy() instead. Link: https://github.com/KSPP/linux/issues/88 Cc: linux-hardening@vger.kernel.org Signed-off-by: Thorsten Blum Link: https://patch.msgid.link/20250407090832.743255-1-thorsten.blum@linux.dev Signed-off-by: Takashi Iwai --- sound/isa/azt2320.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/isa/azt2320.c b/sound/isa/azt2320.c index b937c9138d1248..588b9f0831d3fe 100644 --- a/sound/isa/azt2320.c +++ b/sound/isa/azt2320.c @@ -189,8 +189,8 @@ static int snd_card_azt2320_probe(int dev, if (error < 0) return error; - strcpy(card->driver, "AZT2320"); - strcpy(card->shortname, "Aztech AZT2320"); + strscpy(card->driver, "AZT2320"); + strscpy(card->shortname, "Aztech AZT2320"); sprintf(card->longname, "%s, WSS at 0x%lx, irq %i, dma %i&%i", card->shortname, chip->port, irq[dev], dma1[dev], dma2[dev]); From b7db94734e785e380b0db0f9295e07024f4d42a0 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Mon, 7 Apr 2025 12:33:41 +0530 Subject: [PATCH 0268/2924] octeontx2-pf: qos: fix VF root node parent queue index The current code configures the Physical Function (PF) root node at TL1 and the Virtual Function (VF) root node at TL2. This ensure at any given point of time PF traffic gets more priority. PF root node TL1 / \ TL2 TL2 VF root node / \ TL3 TL3 / \ TL4 TL4 / \ SMQ SMQ Due to a bug in the current code, the TL2 parent queue index on the VF interface is not being configured, leading to 'SMQ Flush' errors Fixes: 5e6808b4c68d ("octeontx2-pf: Add support for HTB offload") Signed-off-by: Hariprasad Kelam Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250407070341.2765426-1-hkelam@marvell.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/nic/qos.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c index 0f844c14485a0e..35acc07bd96489 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c @@ -165,6 +165,11 @@ static void __otx2_qos_txschq_cfg(struct otx2_nic *pfvf, otx2_config_sched_shaping(pfvf, node, cfg, &num_regs); } else if (level == NIX_TXSCH_LVL_TL2) { + /* configure parent txschq */ + cfg->reg[num_regs] = NIX_AF_TL2X_PARENT(node->schq); + cfg->regval[num_regs] = (u64)hw->tx_link << 16; + num_regs++; + /* configure link cfg */ if (level == pfvf->qos.link_cfg_lvl) { cfg->reg[num_regs] = NIX_AF_TL3_TL2X_LINKX_CFG(node->schq, hw->tx_link); From 64a66e2c3b3113dc78a6124e14825d68ddc2e188 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Mon, 7 Apr 2025 12:18:41 +0200 Subject: [PATCH 0269/2924] x86/xen: disable CPU idle and frequency drivers for PVH dom0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When running as a PVH dom0 the ACPI tables exposed to Linux are (mostly) the native ones, thus exposing the C and P states, that can lead to attachment of CPU idle and frequency drivers. However the entity in control of the CPU C and P states is Xen, as dom0 doesn't have a full view of the system load, neither has all CPUs assigned and identity pinned. Like it's done for classic PV guests, prevent Linux from using idle or frequency state drivers when running as a PVH dom0. On an AMD EPYC 7543P system without this fix a Linux PVH dom0 will keep the host CPUs spinning at 100% even when dom0 is completely idle, as it's attempting to use the acpi_idle driver. Signed-off-by: Roger Pau Monné Reviewed-by: Jason Andryuk Signed-off-by: Juergen Gross Message-ID: <20250407101842.67228-1-roger.pau@citrix.com> --- arch/x86/xen/enlighten_pvh.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index 0e3d930bcb89e8..9d25d9373945cb 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include +#include #include #include @@ -123,8 +125,23 @@ static void __init pvh_arch_setup(void) { pvh_reserve_extra_memory(); - if (xen_initial_domain()) + if (xen_initial_domain()) { xen_add_preferred_consoles(); + + /* + * Disable usage of CPU idle and frequency drivers: when + * running as hardware domain the exposed native ACPI tables + * causes idle and/or frequency drivers to attach and + * malfunction. It's Xen the entity that controls the idle and + * frequency states. + * + * For unprivileged domains the exposed ACPI tables are + * fabricated and don't contain such data. + */ + disable_cpuidle(); + disable_cpufreq(); + WARN_ON(xen_set_default_idle()); + } } void __init xen_pvh_init(struct boot_params *boot_params) From 13e7d7240a43d8ea528c12ae5a912be1ff7fa29b Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Mon, 7 Apr 2025 18:33:22 +0800 Subject: [PATCH 0270/2924] net: libwx: Fix the wrong Rx descriptor field WX_RXD_IPV6EX was incorrectly defined in Rx ring descriptor. In fact, this field stores the 802.1ad ID from which the packet was received. The wrong definition caused the statistics rx_csum_offload_errors to fail to grow when receiving the 802.1ad packet with incorrect checksum. Fixes: ef4f3c19f912 ("net: wangxun: libwx add rx offload functions") Signed-off-by: Jiawen Wu Reviewed-by: Michal Kubiak Link: https://patch.msgid.link/20250407103322.273241-1-jiawenwu@trustnetic.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/wangxun/libwx/wx_lib.c | 3 ++- drivers/net/ethernet/wangxun/libwx/wx_type.h | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 00b0b318df27e8..6ebefa31ece1c5 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -546,7 +546,8 @@ static void wx_rx_checksum(struct wx_ring *ring, return; /* Hardware can't guarantee csum if IPv6 Dest Header found */ - if (dptype.prot != WX_DEC_PTYPE_PROT_SCTP && WX_RXD_IPV6EX(rx_desc)) + if (dptype.prot != WX_DEC_PTYPE_PROT_SCTP && + wx_test_staterr(rx_desc, WX_RXD_STAT_IPV6EX)) return; /* if L4 checksum error */ diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 5b230ecbbabb51..4c545b2aa997cb 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -513,6 +513,7 @@ enum WX_MSCA_CMD_value { #define WX_RXD_STAT_L4CS BIT(7) /* L4 xsum calculated */ #define WX_RXD_STAT_IPCS BIT(8) /* IP xsum calculated */ #define WX_RXD_STAT_OUTERIPCS BIT(10) /* Cloud IP xsum calculated*/ +#define WX_RXD_STAT_IPV6EX BIT(12) /* IPv6 Dest Header */ #define WX_RXD_STAT_TS BIT(14) /* IEEE1588 Time Stamp */ #define WX_RXD_ERR_OUTERIPER BIT(26) /* CRC IP Header error */ @@ -589,8 +590,6 @@ enum wx_l2_ptypes { #define WX_RXD_PKTTYPE(_rxd) \ ((le32_to_cpu((_rxd)->wb.lower.lo_dword.data) >> 9) & 0xFF) -#define WX_RXD_IPV6EX(_rxd) \ - ((le32_to_cpu((_rxd)->wb.lower.lo_dword.data) >> 6) & 0x1) /*********************** Transmit Descriptor Config Masks ****************/ #define WX_TXD_STAT_DD BIT(0) /* Descriptor Done */ #define WX_TXD_DTYP_DATA 0 /* Adv Data Descriptor */ From 369609fc6272c2f6ad666ba4fd913f3baf32908f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Mon, 7 Apr 2025 12:55:34 +0200 Subject: [PATCH 0271/2924] tc: Ensure we have enough buffer space when sending filter netlink notifications MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The tfilter_notify() and tfilter_del_notify() functions assume that NLMSG_GOODSIZE is always enough to dump the filter chain. This is not always the case, which can lead to silent notify failures (because the return code of tfilter_notify() is not always checked). In particular, this can lead to NLM_F_ECHO not being honoured even though an action succeeds, which forces userspace to create workarounds[0]. Fix this by increasing the message size if dumping the filter chain into the allocated skb fails. Use the size of the incoming skb as a size hint if set, so we can start at a larger value when appropriate. To trigger this, run the following commands: # ip link add type veth # tc qdisc replace dev veth0 root handle 1: fq_codel # tc -echo filter add dev veth0 parent 1: u32 match u32 0 0 $(for i in $(seq 32); do echo action pedit munge ip dport set 22; done) Before this fix, tc just returns: Not a filter(cmd 2) After the fix, we get the correct echo: added filter dev veth0 parent 1: protocol all pref 49152 u32 chain 0 fh 800::800 order 2048 key ht 800 bkt 0 terminal flowid not_in_hw match 00000000/00000000 at 0 action order 1: pedit action pass keys 1 index 1 ref 1 bind 1 key #0 at 20: val 00000016 mask ffff0000 [repeated 32 times] [0] https://github.com/openvswitch/ovs/commit/106ef21860c935e5e0017a88bf42b94025c4e511 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Frode Nordahl Closes: https://bugs.launchpad.net/ubuntu/+source/openvswitch/+bug/2018500 Signed-off-by: Toke Høiland-Jørgensen Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20250407105542.16601-1-toke@redhat.com Signed-off-by: Paolo Abeni --- net/sched/cls_api.c | 66 ++++++++++++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 21 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 4f648af8cfaafe..ecec0a1e1c1a07 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -2057,6 +2057,7 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb, struct tcmsg *tcm; struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); + int ret = -EMSGSIZE; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) @@ -2101,11 +2102,45 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb, return skb->len; +cls_op_not_supp: + ret = -EOPNOTSUPP; out_nlmsg_trim: nla_put_failure: -cls_op_not_supp: nlmsg_trim(skb, b); - return -1; + return ret; +} + +static struct sk_buff *tfilter_notify_prep(struct net *net, + struct sk_buff *oskb, + struct nlmsghdr *n, + struct tcf_proto *tp, + struct tcf_block *block, + struct Qdisc *q, u32 parent, + void *fh, int event, + u32 portid, bool rtnl_held, + struct netlink_ext_ack *extack) +{ + unsigned int size = oskb ? max(NLMSG_GOODSIZE, oskb->len) : NLMSG_GOODSIZE; + struct sk_buff *skb; + int ret; + +retry: + skb = alloc_skb(size, GFP_KERNEL); + if (!skb) + return ERR_PTR(-ENOBUFS); + + ret = tcf_fill_node(net, skb, tp, block, q, parent, fh, portid, + n->nlmsg_seq, n->nlmsg_flags, event, false, + rtnl_held, extack); + if (ret <= 0) { + kfree_skb(skb); + if (ret == -EMSGSIZE) { + size += NLMSG_GOODSIZE; + goto retry; + } + return ERR_PTR(-EINVAL); + } + return skb; } static int tfilter_notify(struct net *net, struct sk_buff *oskb, @@ -2121,16 +2156,10 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, if (!unicast && !rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) return 0; - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb) - return -ENOBUFS; - - if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid, - n->nlmsg_seq, n->nlmsg_flags, event, - false, rtnl_held, extack) <= 0) { - kfree_skb(skb); - return -EINVAL; - } + skb = tfilter_notify_prep(net, oskb, n, tp, block, q, parent, fh, event, + portid, rtnl_held, extack); + if (IS_ERR(skb)) + return PTR_ERR(skb); if (unicast) err = rtnl_unicast(skb, net, portid); @@ -2153,16 +2182,11 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) return tp->ops->delete(tp, fh, last, rtnl_held, extack); - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb) - return -ENOBUFS; - - if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid, - n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER, - false, rtnl_held, extack) <= 0) { + skb = tfilter_notify_prep(net, oskb, n, tp, block, q, parent, fh, + RTM_DELTFILTER, portid, rtnl_held, extack); + if (IS_ERR(skb)) { NL_SET_ERR_MSG(extack, "Failed to build del event notification"); - kfree_skb(skb); - return -EINVAL; + return PTR_ERR(skb); } err = tp->ops->delete(tp, fh, last, rtnl_held, extack); From 4f038a6a02d20859a3479293cbf172b0f14cbdd6 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Mon, 7 Apr 2025 15:05:10 +0200 Subject: [PATCH 0272/2924] net: ethtool: Don't call .cleanup_data when prepare_data fails There's a consistent pattern where the .cleanup_data() callback is called when .prepare_data() fails, when it should really be called to clean after a successful .prepare_data() as per the documentation. Rewrite the error-handling paths to make sure we don't cleanup un-prepared data. Fixes: c781ff12a2f3 ("ethtool: Allow network drivers to dump arbitrary EEPROM data") Reviewed-by: Kory Maincent Reviewed-by: Simon Horman Reviewed-by: Michal Kubecek Signed-off-by: Maxime Chevallier Link: https://patch.msgid.link/20250407130511.75621-1-maxime.chevallier@bootlin.com Signed-off-by: Paolo Abeni --- net/ethtool/netlink.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index a163d40c6431b9..977beeaaa2f991 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -500,7 +500,7 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info) netdev_unlock_ops(req_info->dev); rtnl_unlock(); if (ret < 0) - goto err_cleanup; + goto err_dev; ret = ops->reply_size(req_info, reply_data); if (ret < 0) goto err_cleanup; @@ -560,7 +560,7 @@ static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev, netdev_unlock_ops(dev); rtnl_unlock(); if (ret < 0) - goto out; + goto out_cancel; ret = ethnl_fill_reply_header(skb, dev, ctx->ops->hdr_attr); if (ret < 0) goto out; @@ -569,6 +569,7 @@ static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev, out: if (ctx->ops->cleanup_data) ctx->ops->cleanup_data(ctx->reply_data); +out_cancel: ctx->reply_data->dev = NULL; if (ret < 0) genlmsg_cancel(skb, ehdr); @@ -793,7 +794,7 @@ static void ethnl_default_notify(struct net_device *dev, unsigned int cmd, ethnl_init_reply_data(reply_data, ops, dev); ret = ops->prepare_data(req_info, reply_data, &info); if (ret < 0) - goto err_cleanup; + goto err_rep; ret = ops->reply_size(req_info, reply_data); if (ret < 0) goto err_cleanup; @@ -828,6 +829,7 @@ static void ethnl_default_notify(struct net_device *dev, unsigned int cmd, err_cleanup: if (ops->cleanup_data) ops->cleanup_data(reply_data); +err_rep: kfree(reply_data); kfree(req_info); return; From 13c1d5f3a7fa7b55a26e73bb9e95342374a489b2 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 8 Apr 2025 09:34:07 +0200 Subject: [PATCH 0273/2924] drm/tests: helpers: Create kunit helper to destroy a drm_display_mode A number of test suites call functions that expect the returned drm_display_mode to be destroyed eventually. However, none of the tests called drm_mode_destroy, which results in a memory leak. Since drm_mode_destroy takes two pointers as argument, we can't use a kunit wrapper. Let's just create a helper every test suite can use. Reviewed-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20250408-drm-kunit-drm-display-mode-memleak-v1-1-996305a2e75a@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/tests/drm_kunit_helpers.c | 22 ++++++++++++++++++++++ include/drm/drm_kunit_helpers.h | 3 +++ 2 files changed, 25 insertions(+) diff --git a/drivers/gpu/drm/tests/drm_kunit_helpers.c b/drivers/gpu/drm/tests/drm_kunit_helpers.c index a4eb68f0decca1..6f6616cf496683 100644 --- a/drivers/gpu/drm/tests/drm_kunit_helpers.c +++ b/drivers/gpu/drm/tests/drm_kunit_helpers.c @@ -278,6 +278,28 @@ static void kunit_action_drm_mode_destroy(void *ptr) drm_mode_destroy(NULL, mode); } +/** + * drm_kunit_add_mode_destroy_action() - Add a drm_destroy_mode kunit action + * @test: The test context object + * @mode: The drm_display_mode to destroy eventually + * + * Registers a kunit action that will destroy the drm_display_mode at + * the end of the test. + * + * If an error occurs, the drm_display_mode will be destroyed. + * + * Returns: + * 0 on success, an error code otherwise. + */ +int drm_kunit_add_mode_destroy_action(struct kunit *test, + struct drm_display_mode *mode) +{ + return kunit_add_action_or_reset(test, + kunit_action_drm_mode_destroy, + mode); +} +EXPORT_SYMBOL_GPL(drm_kunit_add_mode_destroy_action); + /** * drm_kunit_display_mode_from_cea_vic() - return a mode for CEA VIC for a KUnit test * @test: The test context object diff --git a/include/drm/drm_kunit_helpers.h b/include/drm/drm_kunit_helpers.h index 11d59ce0bac0bb..1c62d1d4458cae 100644 --- a/include/drm/drm_kunit_helpers.h +++ b/include/drm/drm_kunit_helpers.h @@ -118,6 +118,9 @@ drm_kunit_helper_create_crtc(struct kunit *test, const struct drm_crtc_funcs *funcs, const struct drm_crtc_helper_funcs *helper_funcs); +int drm_kunit_add_mode_destroy_action(struct kunit *test, + struct drm_display_mode *mode); + struct drm_display_mode * drm_kunit_display_mode_from_cea_vic(struct kunit *test, struct drm_device *dev, u8 video_code); From dacafdcc7789cfeb0f0552716db56f210238225d Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 8 Apr 2025 09:34:08 +0200 Subject: [PATCH 0274/2924] drm/tests: modeset: Fix drm_display_mode memory leak drm_mode_find_dmt() returns a drm_display_mode that needs to be destroyed later one. The drm_test_pick_cmdline_res_1920_1080_60() test never does however, which leads to a memory leak. Let's make sure it's freed. Reported-by: Philipp Stanner Closes: https://lore.kernel.org/dri-devel/a7655158a6367ac46194d57f4b7433ef0772a73e.camel@mailbox.org/ Fixes: 8fc0380f6ba7 ("drm/client: Add some tests for drm_connector_pick_cmdline_mode()") Reviewed-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20250408-drm-kunit-drm-display-mode-memleak-v1-2-996305a2e75a@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/tests/drm_client_modeset_test.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/tests/drm_client_modeset_test.c b/drivers/gpu/drm/tests/drm_client_modeset_test.c index 7516f6cb36e4e3..3e9518d7b8b7eb 100644 --- a/drivers/gpu/drm/tests/drm_client_modeset_test.c +++ b/drivers/gpu/drm/tests/drm_client_modeset_test.c @@ -95,6 +95,9 @@ static void drm_test_pick_cmdline_res_1920_1080_60(struct kunit *test) expected_mode = drm_mode_find_dmt(priv->drm, 1920, 1080, 60, false); KUNIT_ASSERT_NOT_NULL(test, expected_mode); + ret = drm_kunit_add_mode_destroy_action(test, expected_mode); + KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_TRUE(test, drm_mode_parse_command_line_for_connector(cmdline, connector, From 9b0827ba821165851abd6c4c086673fb60d5c647 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 8 Apr 2025 09:34:09 +0200 Subject: [PATCH 0275/2924] drm/tests: modeset: Fix drm_display_mode memory leak drm_analog_tv_mode() and its variants return a drm_display_mode that needs to be destroyed later one. The drm_test_pick_cmdline_named() test never does however, which leads to a memory leak. Let's make sure it's freed. Reported-by: Philipp Stanner Closes: https://lore.kernel.org/dri-devel/a7655158a6367ac46194d57f4b7433ef0772a73e.camel@mailbox.org/ Fixes: fedcaf726f54 ("drm/modes: Properly generate a drm_display_mode from a named mode") Reviewed-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20250408-drm-kunit-drm-display-mode-memleak-v1-3-996305a2e75a@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/tests/drm_client_modeset_test.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tests/drm_client_modeset_test.c b/drivers/gpu/drm/tests/drm_client_modeset_test.c index 3e9518d7b8b7eb..b2fdb1a774fe69 100644 --- a/drivers/gpu/drm/tests/drm_client_modeset_test.c +++ b/drivers/gpu/drm/tests/drm_client_modeset_test.c @@ -132,7 +132,8 @@ static void drm_test_pick_cmdline_named(struct kunit *test) struct drm_device *drm = priv->drm; struct drm_connector *connector = &priv->connector; struct drm_cmdline_mode *cmdline_mode = &connector->cmdline_mode; - const struct drm_display_mode *expected_mode, *mode; + const struct drm_display_mode *mode; + struct drm_display_mode *expected_mode; const char *cmdline = params->cmdline; int ret; @@ -152,6 +153,9 @@ static void drm_test_pick_cmdline_named(struct kunit *test) expected_mode = params->func(drm); KUNIT_ASSERT_NOT_NULL(test, expected_mode); + ret = drm_kunit_add_mode_destroy_action(test, expected_mode); + KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_EXPECT_TRUE(test, drm_mode_equal(expected_mode, mode)); } From 70f29ca3117a8796cd6bde7612a3ded96d0f2dde Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 8 Apr 2025 09:34:10 +0200 Subject: [PATCH 0276/2924] drm/tests: cmdline: Fix drm_display_mode memory leak drm_analog_tv_mode() and its variants return a drm_display_mode that needs to be destroyed later one. The drm_test_cmdline_tv_options() test never does however, which leads to a memory leak. Let's make sure it's freed. Reported-by: Philipp Stanner Closes: https://lore.kernel.org/dri-devel/a7655158a6367ac46194d57f4b7433ef0772a73e.camel@mailbox.org/ Fixes: e691c9992ae1 ("drm/modes: Introduce the tv_mode property as a command-line option") Reviewed-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20250408-drm-kunit-drm-display-mode-memleak-v1-4-996305a2e75a@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/tests/drm_cmdline_parser_test.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tests/drm_cmdline_parser_test.c b/drivers/gpu/drm/tests/drm_cmdline_parser_test.c index 59c8408c453c2e..1cfcb597b088b4 100644 --- a/drivers/gpu/drm/tests/drm_cmdline_parser_test.c +++ b/drivers/gpu/drm/tests/drm_cmdline_parser_test.c @@ -7,6 +7,7 @@ #include #include +#include #include static const struct drm_connector no_connector = {}; @@ -955,8 +956,15 @@ struct drm_cmdline_tv_option_test { static void drm_test_cmdline_tv_options(struct kunit *test) { const struct drm_cmdline_tv_option_test *params = test->param_value; - const struct drm_display_mode *expected_mode = params->mode_fn(NULL); + struct drm_display_mode *expected_mode; struct drm_cmdline_mode mode = { }; + int ret; + + expected_mode = params->mode_fn(NULL); + KUNIT_ASSERT_NOT_NULL(test, expected_mode); + + ret = drm_kunit_add_mode_destroy_action(test, expected_mode); + KUNIT_ASSERT_EQ(test, ret, 0); KUNIT_EXPECT_TRUE(test, drm_mode_parse_command_line_for_connector(params->cmdline, &no_connector, &mode)); From d34146340f95cd9bf06d4ce71cca72127dc0b7cd Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 8 Apr 2025 09:34:11 +0200 Subject: [PATCH 0277/2924] drm/tests: modes: Fix drm_display_mode memory leak drm_analog_tv_mode() and its variants return a drm_display_mode that needs to be destroyed later one. The drm_modes_analog_tv tests never do however, which leads to a memory leak. Let's make sure it's freed. Reported-by: Philipp Stanner Closes: https://lore.kernel.org/dri-devel/a7655158a6367ac46194d57f4b7433ef0772a73e.camel@mailbox.org/ Fixes: 4fcd238560ee ("drm/modes: Add a function to generate analog display modes") Reviewed-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20250408-drm-kunit-drm-display-mode-memleak-v1-5-996305a2e75a@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/tests/drm_modes_test.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/tests/drm_modes_test.c b/drivers/gpu/drm/tests/drm_modes_test.c index 6ed51f99e133c9..7ba646d87856f5 100644 --- a/drivers/gpu/drm/tests/drm_modes_test.c +++ b/drivers/gpu/drm/tests/drm_modes_test.c @@ -40,6 +40,7 @@ static void drm_test_modes_analog_tv_ntsc_480i(struct kunit *test) { struct drm_test_modes_priv *priv = test->priv; struct drm_display_mode *mode; + int ret; mode = drm_analog_tv_mode(priv->drm, DRM_MODE_TV_MODE_NTSC, @@ -47,6 +48,9 @@ static void drm_test_modes_analog_tv_ntsc_480i(struct kunit *test) true); KUNIT_ASSERT_NOT_NULL(test, mode); + ret = drm_kunit_add_mode_destroy_action(test, mode); + KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_EXPECT_EQ(test, drm_mode_vrefresh(mode), 60); KUNIT_EXPECT_EQ(test, mode->hdisplay, 720); @@ -70,6 +74,7 @@ static void drm_test_modes_analog_tv_ntsc_480i_inlined(struct kunit *test) { struct drm_test_modes_priv *priv = test->priv; struct drm_display_mode *expected, *mode; + int ret; expected = drm_analog_tv_mode(priv->drm, DRM_MODE_TV_MODE_NTSC, @@ -77,9 +82,15 @@ static void drm_test_modes_analog_tv_ntsc_480i_inlined(struct kunit *test) true); KUNIT_ASSERT_NOT_NULL(test, expected); + ret = drm_kunit_add_mode_destroy_action(test, expected); + KUNIT_ASSERT_EQ(test, ret, 0); + mode = drm_mode_analog_ntsc_480i(priv->drm); KUNIT_ASSERT_NOT_NULL(test, mode); + ret = drm_kunit_add_mode_destroy_action(test, mode); + KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_EXPECT_TRUE(test, drm_mode_equal(expected, mode)); } @@ -87,6 +98,7 @@ static void drm_test_modes_analog_tv_pal_576i(struct kunit *test) { struct drm_test_modes_priv *priv = test->priv; struct drm_display_mode *mode; + int ret; mode = drm_analog_tv_mode(priv->drm, DRM_MODE_TV_MODE_PAL, @@ -94,6 +106,9 @@ static void drm_test_modes_analog_tv_pal_576i(struct kunit *test) true); KUNIT_ASSERT_NOT_NULL(test, mode); + ret = drm_kunit_add_mode_destroy_action(test, mode); + KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_EXPECT_EQ(test, drm_mode_vrefresh(mode), 50); KUNIT_EXPECT_EQ(test, mode->hdisplay, 720); @@ -117,6 +132,7 @@ static void drm_test_modes_analog_tv_pal_576i_inlined(struct kunit *test) { struct drm_test_modes_priv *priv = test->priv; struct drm_display_mode *expected, *mode; + int ret; expected = drm_analog_tv_mode(priv->drm, DRM_MODE_TV_MODE_PAL, @@ -124,9 +140,15 @@ static void drm_test_modes_analog_tv_pal_576i_inlined(struct kunit *test) true); KUNIT_ASSERT_NOT_NULL(test, expected); + ret = drm_kunit_add_mode_destroy_action(test, expected); + KUNIT_ASSERT_EQ(test, ret, 0); + mode = drm_mode_analog_pal_576i(priv->drm); KUNIT_ASSERT_NOT_NULL(test, mode); + ret = drm_kunit_add_mode_destroy_action(test, mode); + KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_EXPECT_TRUE(test, drm_mode_equal(expected, mode)); } From f02d3bfcd34645c3245216cf762f09b72ce582fa Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 8 Apr 2025 09:34:12 +0200 Subject: [PATCH 0278/2924] drm/tests: modes: Fix drm_display_mode memory leak drm_analog_tv_mode() and its variants return a drm_display_mode that needs to be destroyed later one. The drm_test_modes_analog_tv_mono_576i() test never does however, which leads to a memory leak. Let's make sure it's freed. Reported-by: Philipp Stanner Closes: https://lore.kernel.org/dri-devel/a7655158a6367ac46194d57f4b7433ef0772a73e.camel@mailbox.org/ Fixes: e31538489908 ("drm/tests: Add tests for the new Monochrome value of tv_mode") Reviewed-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20250408-drm-kunit-drm-display-mode-memleak-v1-6-996305a2e75a@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/tests/drm_modes_test.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/tests/drm_modes_test.c b/drivers/gpu/drm/tests/drm_modes_test.c index 7ba646d87856f5..f5b20f92df8be7 100644 --- a/drivers/gpu/drm/tests/drm_modes_test.c +++ b/drivers/gpu/drm/tests/drm_modes_test.c @@ -156,6 +156,7 @@ static void drm_test_modes_analog_tv_mono_576i(struct kunit *test) { struct drm_test_modes_priv *priv = test->priv; struct drm_display_mode *mode; + int ret; mode = drm_analog_tv_mode(priv->drm, DRM_MODE_TV_MODE_MONOCHROME, @@ -163,6 +164,9 @@ static void drm_test_modes_analog_tv_mono_576i(struct kunit *test) true); KUNIT_ASSERT_NOT_NULL(test, mode); + ret = drm_kunit_add_mode_destroy_action(test, mode); + KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_EXPECT_EQ(test, drm_mode_vrefresh(mode), 50); KUNIT_EXPECT_EQ(test, mode->hdisplay, 720); From 8b6f2e28431b2f9f84073bff50353aeaf25559d0 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 8 Apr 2025 09:34:13 +0200 Subject: [PATCH 0279/2924] drm/tests: probe-helper: Fix drm_display_mode memory leak drm_analog_tv_mode() and its variants return a drm_display_mode that needs to be destroyed later one. The drm_test_connector_helper_tv_get_modes_check() test never does however, which leads to a memory leak. Let's make sure it's freed. Reported-by: Philipp Stanner Closes: https://lore.kernel.org/dri-devel/a7655158a6367ac46194d57f4b7433ef0772a73e.camel@mailbox.org/ Fixes: 1e4a91db109f ("drm/probe-helper: Provide a TV get_modes helper") Reviewed-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20250408-drm-kunit-drm-display-mode-memleak-v1-7-996305a2e75a@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/tests/drm_probe_helper_test.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tests/drm_probe_helper_test.c b/drivers/gpu/drm/tests/drm_probe_helper_test.c index bc09ff38aca18e..db0e4f5df275e8 100644 --- a/drivers/gpu/drm/tests/drm_probe_helper_test.c +++ b/drivers/gpu/drm/tests/drm_probe_helper_test.c @@ -98,7 +98,7 @@ drm_test_connector_helper_tv_get_modes_check(struct kunit *test) struct drm_connector *connector = &priv->connector; struct drm_cmdline_mode *cmdline = &connector->cmdline_mode; struct drm_display_mode *mode; - const struct drm_display_mode *expected; + struct drm_display_mode *expected; size_t len; int ret; @@ -134,6 +134,9 @@ drm_test_connector_helper_tv_get_modes_check(struct kunit *test) KUNIT_EXPECT_TRUE(test, drm_mode_equal(mode, expected)); KUNIT_EXPECT_TRUE(test, mode->type & DRM_MODE_TYPE_PREFERRED); + + ret = drm_kunit_add_mode_destroy_action(test, expected); + KUNIT_ASSERT_EQ(test, ret, 0); } if (params->num_expected_modes >= 2) { @@ -145,6 +148,9 @@ drm_test_connector_helper_tv_get_modes_check(struct kunit *test) KUNIT_EXPECT_TRUE(test, drm_mode_equal(mode, expected)); KUNIT_EXPECT_FALSE(test, mode->type & DRM_MODE_TYPE_PREFERRED); + + ret = drm_kunit_add_mode_destroy_action(test, expected); + KUNIT_ASSERT_EQ(test, ret, 0); } mutex_unlock(&priv->drm->mode_config.mutex); From fe81536af3978f26a1383e4da7f135b973eb4209 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Mon, 31 Mar 2025 12:47:07 +0200 Subject: [PATCH 0280/2924] landlock: Remove incorrect warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit landlock_put_hierarchy() can be called when an error occurs in landlock_merge_ruleset() due to insufficient memory. In this case, the domain's audit details might not have been allocated yet, which would cause landlock_free_hierarchy_details() to print a warning (but still safely handle this case). We could keep the WARN_ON_ONCE(!hierarchy) but it's not worth it for this kind of function, so let's remove it entirely. Cc: Paul Moore Reported-by: syzbot+8bca99e91de7e060e4ea@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20250331104709.897062-1-mic@digikod.net Reviewed-by: Günther Noack Signed-off-by: Mickaël Salaün --- security/landlock/domain.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/landlock/domain.h b/security/landlock/domain.h index ed0d348e214c96..7fb70b25f85a19 100644 --- a/security/landlock/domain.h +++ b/security/landlock/domain.h @@ -130,7 +130,7 @@ int landlock_init_hierarchy_log(struct landlock_hierarchy *const hierarchy); static inline void landlock_free_hierarchy_details(struct landlock_hierarchy *const hierarchy) { - if (WARN_ON_ONCE(!hierarchy || !hierarchy->details)) + if (!hierarchy || !hierarchy->details) return; put_pid(hierarchy->details->pid); From 58029c39cdc54ac4f4dc40b4a9c05eed9f9b808a Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 27 Feb 2025 19:31:32 +0000 Subject: [PATCH 0281/2924] RAS/AMD/FMPM: Get masked address Some operations require checking, or ignoring, specific bits in an address value. For example, this can be comparing address values to identify unique structures. Currently, the full address value is compared when filtering for duplicates. This results in over counting and creation of extra records. This gives the impression that more unique events occurred than did in reality. Mask the address for physical rows on MI300. [ bp: Simplify. ] Fixes: 6f15e617cc99 ("RAS: Introduce a FRU memory poison manager") Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org --- drivers/ras/amd/atl/internal.h | 3 +++ drivers/ras/amd/atl/umc.c | 2 -- drivers/ras/amd/fmpm.c | 9 ++++++++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h index f9be26d2534846..d096b58cd0ae97 100644 --- a/drivers/ras/amd/atl/internal.h +++ b/drivers/ras/amd/atl/internal.h @@ -362,4 +362,7 @@ static inline void atl_debug_on_bad_intlv_mode(struct addr_ctx *ctx) atl_debug(ctx, "Unrecognized interleave mode: %u", ctx->map.intlv_mode); } +#define MI300_UMC_MCA_COL GENMASK(5, 1) +#define MI300_UMC_MCA_ROW13 BIT(23) + #endif /* __AMD_ATL_INTERNAL_H__ */ diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c index cb8ace3d4e42d0..6e072b7667e98b 100644 --- a/drivers/ras/amd/atl/umc.c +++ b/drivers/ras/amd/atl/umc.c @@ -229,7 +229,6 @@ int get_umc_info_mi300(void) * Additionally, the PC and Bank bits may be hashed. This must be accounted for before * reconstructing the normalized address. */ -#define MI300_UMC_MCA_COL GENMASK(5, 1) #define MI300_UMC_MCA_BANK GENMASK(9, 6) #define MI300_UMC_MCA_ROW GENMASK(24, 10) #define MI300_UMC_MCA_PC BIT(25) @@ -360,7 +359,6 @@ static void _retire_row_mi300(struct atl_err *a_err) * * See MI300_UMC_MCA_ROW for the row bits in MCA_ADDR_UMC value. */ -#define MI300_UMC_MCA_ROW13 BIT(23) static void retire_row_mi300(struct atl_err *a_err) { _retire_row_mi300(a_err); diff --git a/drivers/ras/amd/fmpm.c b/drivers/ras/amd/fmpm.c index 90de737fbc9097..8877c6ff64c468 100644 --- a/drivers/ras/amd/fmpm.c +++ b/drivers/ras/amd/fmpm.c @@ -250,6 +250,13 @@ static bool rec_has_valid_entries(struct fru_rec *rec) return true; } +/* + * Row retirement is done on MI300 systems, and some bits are 'don't + * care' for comparing addresses with unique physical rows. This + * includes all column bits and the row[13] bit. + */ +#define MASK_ADDR(addr) ((addr) & ~(MI300_UMC_MCA_ROW13 | MI300_UMC_MCA_COL)) + static bool fpds_equal(struct cper_fru_poison_desc *old, struct cper_fru_poison_desc *new) { /* @@ -258,7 +265,7 @@ static bool fpds_equal(struct cper_fru_poison_desc *old, struct cper_fru_poison_ * * Also, order the checks from most->least likely to fail to shortcut the code. */ - if (old->addr != new->addr) + if (MASK_ADDR(old->addr) != MASK_ADDR(new->addr)) return false; if (old->hw_id != new->hw_id) From 05a2b0011c4b6cbbc9b577f6abebe4e9333b0cf6 Mon Sep 17 00:00:00 2001 From: Lukas Fischer Date: Fri, 4 Apr 2025 14:51:51 +0200 Subject: [PATCH 0282/2924] scripts: generate_rust_analyzer: Add ffi crate Commit d072acda4862 ("rust: use custom FFI integer types") did not update rust-analyzer to include the new crate. To enable rust-analyzer support for these custom ffi types, add the `ffi` crate as a dependency to the `bindings`, `uapi` and `kernel` crates, which all directly depend on it. Fixes: d072acda4862 ("rust: use custom FFI integer types") Signed-off-by: Lukas Fischer Reviewed-by: Tamir Duberstein Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250404125150.85783-2-kernel@o1oo11oo.de Signed-off-by: Miguel Ojeda --- scripts/generate_rust_analyzer.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index cd41bc906fbd61..fe663dd0c43b04 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -112,6 +112,12 @@ def append_sysroot_crate( cfg=["kernel"], ) + append_crate( + "ffi", + srctree / "rust" / "ffi.rs", + ["core", "compiler_builtins"], + ) + def append_crate_with_generated( display_name, deps, @@ -131,9 +137,9 @@ def append_crate_with_generated( "exclude_dirs": [], } - append_crate_with_generated("bindings", ["core"]) - append_crate_with_generated("uapi", ["core"]) - append_crate_with_generated("kernel", ["core", "macros", "build_error", "pin_init", "bindings", "uapi"]) + append_crate_with_generated("bindings", ["core", "ffi"]) + append_crate_with_generated("uapi", ["core", "ffi"]) + append_crate_with_generated("kernel", ["core", "macros", "build_error", "pin_init", "ffi", "bindings", "uapi"]) def is_root_crate(build_file, target): try: From 47068309b5777313b6ac84a77d8d10dc7312260a Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 8 Apr 2025 09:50:42 -0700 Subject: [PATCH 0283/2924] sched_ext: Use kvzalloc for large exit_dump allocation Replace kzalloc with kvzalloc for the exit_dump buffer allocation, which can require large contiguous memory depending on the implementation. This change prevents allocation failures by allowing the system to fall back to vmalloc when contiguous memory allocation fails. Since this buffer is only used for debugging purposes, physical memory contiguity is not required, making vmalloc a suitable alternative. Cc: stable@vger.kernel.org Fixes: 07814a9439a3b0 ("sched_ext: Print debug dump after an error exit") Suggested-by: Rik van Riel Signed-off-by: Breno Leitao Acked-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 66bcd40a28ca1d..db9af6a3c04fd7 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4623,7 +4623,7 @@ static void scx_ops_bypass(bool bypass) static void free_exit_info(struct scx_exit_info *ei) { - kfree(ei->dump); + kvfree(ei->dump); kfree(ei->msg); kfree(ei->bt); kfree(ei); @@ -4639,7 +4639,7 @@ static struct scx_exit_info *alloc_exit_info(size_t exit_dump_len) ei->bt = kcalloc(SCX_EXIT_BT_LEN, sizeof(ei->bt[0]), GFP_KERNEL); ei->msg = kzalloc(SCX_EXIT_MSG_LEN, GFP_KERNEL); - ei->dump = kzalloc(exit_dump_len, GFP_KERNEL); + ei->dump = kvzalloc(exit_dump_len, GFP_KERNEL); if (!ei->bt || !ei->msg || !ei->dump) { free_exit_info(ei); From e776b26e3701945e0d20a11dc30ecd4da98e8d67 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 7 Apr 2025 13:20:04 -1000 Subject: [PATCH 0284/2924] sched_ext: Remove cpu.weight / cpu.idle unimplemented warnings sched_ext generates warnings when cpu.weight / cpu.idle are set to non-default values if the BPF scheduler doesn't implement weight support. These warnings don't provide much value while adding constant annoyance. A BPF scheduler may not implement any particular behavior and there's nothing particularly special about missing cgroup weight support. Drop the warnings. Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 41 +---------------------------------------- 1 file changed, 1 insertion(+), 40 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index db9af6a3c04fd7..21eaf081d336e4 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -3899,35 +3899,6 @@ bool scx_can_stop_tick(struct rq *rq) DEFINE_STATIC_PERCPU_RWSEM(scx_cgroup_rwsem); static bool scx_cgroup_enabled; -static bool cgroup_warned_missing_weight; -static bool cgroup_warned_missing_idle; - -static void scx_cgroup_warn_missing_weight(struct task_group *tg) -{ - if (scx_ops_enable_state() == SCX_OPS_DISABLED || - cgroup_warned_missing_weight) - return; - - if ((scx_ops.flags & SCX_OPS_HAS_CGROUP_WEIGHT) || !tg->css.parent) - return; - - pr_warn("sched_ext: \"%s\" does not implement cgroup cpu.weight\n", - scx_ops.name); - cgroup_warned_missing_weight = true; -} - -static void scx_cgroup_warn_missing_idle(struct task_group *tg) -{ - if (!scx_cgroup_enabled || cgroup_warned_missing_idle) - return; - - if (!tg->idle) - return; - - pr_warn("sched_ext: \"%s\" does not implement cgroup cpu.idle\n", - scx_ops.name); - cgroup_warned_missing_idle = true; -} int scx_tg_online(struct task_group *tg) { @@ -3937,8 +3908,6 @@ int scx_tg_online(struct task_group *tg) percpu_down_read(&scx_cgroup_rwsem); - scx_cgroup_warn_missing_weight(tg); - if (scx_cgroup_enabled) { if (SCX_HAS_OP(cgroup_init)) { struct scx_cgroup_init_args args = @@ -4076,9 +4045,7 @@ void scx_group_set_weight(struct task_group *tg, unsigned long weight) void scx_group_set_idle(struct task_group *tg, bool idle) { - percpu_down_read(&scx_cgroup_rwsem); - scx_cgroup_warn_missing_idle(tg); - percpu_up_read(&scx_cgroup_rwsem); + /* TODO: Implement ops->cgroup_set_idle() */ } static void scx_cgroup_lock(void) @@ -4272,9 +4239,6 @@ static int scx_cgroup_init(void) percpu_rwsem_assert_held(&scx_cgroup_rwsem); - cgroup_warned_missing_weight = false; - cgroup_warned_missing_idle = false; - /* * scx_tg_on/offline() are excluded through scx_cgroup_rwsem. If we walk * cgroups and init, all online cgroups are initialized. @@ -4284,9 +4248,6 @@ static int scx_cgroup_init(void) struct task_group *tg = css_tg(css); struct scx_cgroup_init_args args = { .weight = tg->scx_weight }; - scx_cgroup_warn_missing_weight(tg); - scx_cgroup_warn_missing_idle(tg); - if ((tg->scx_flags & (SCX_TG_ONLINE | SCX_TG_INITED)) != SCX_TG_ONLINE) continue; From bc08b15b54b8aadbc8a8f413271c07a3f4bead87 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 8 Apr 2025 08:31:12 -1000 Subject: [PATCH 0285/2924] sched_ext: Mark SCX_OPS_HAS_CGROUP_WEIGHT for deprecation SCX_OPS_HAS_CGROUP_WEIGHT was only used to suppress the missing cgroup weight support warnings. Now that the warnings are removed, the flag doesn't do anything. Mark it for deprecation and remove its usage from scx_flatcg. v2: Actually include the scx_flatcg update. Signed-off-by: Tejun Heo Suggested-and-reviewed-by: Andrea Righi --- kernel/sched/ext.c | 5 ++++- tools/sched_ext/scx_flatcg.bpf.c | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 21eaf081d336e4..fdbf249d1c68ec 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -163,7 +163,7 @@ enum scx_ops_flags { /* * CPU cgroup support flags */ - SCX_OPS_HAS_CGROUP_WEIGHT = 1LLU << 16, /* cpu.weight */ + SCX_OPS_HAS_CGROUP_WEIGHT = 1LLU << 16, /* DEPRECATED, will be removed on 6.18 */ SCX_OPS_ALL_FLAGS = SCX_OPS_KEEP_BUILTIN_IDLE | SCX_OPS_ENQ_LAST | @@ -5213,6 +5213,9 @@ static int validate_ops(const struct sched_ext_ops *ops) return -EINVAL; } + if (ops->flags & SCX_OPS_HAS_CGROUP_WEIGHT) + pr_warn("SCX_OPS_HAS_CGROUP_WEIGHT is deprecated and a noop\n"); + return 0; } diff --git a/tools/sched_ext/scx_flatcg.bpf.c b/tools/sched_ext/scx_flatcg.bpf.c index 2c720e3ecad593..fdc7170639e604 100644 --- a/tools/sched_ext/scx_flatcg.bpf.c +++ b/tools/sched_ext/scx_flatcg.bpf.c @@ -950,5 +950,5 @@ SCX_OPS_DEFINE(flatcg_ops, .cgroup_move = (void *)fcg_cgroup_move, .init = (void *)fcg_init, .exit = (void *)fcg_exit, - .flags = SCX_OPS_HAS_CGROUP_WEIGHT | SCX_OPS_ENQ_EXITING, + .flags = SCX_OPS_ENQ_EXITING, .name = "flatcg"); From 56799bc035658738f362acec3e7647bb84e68933 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 4 Mar 2025 14:54:46 +0100 Subject: [PATCH 0286/2924] perf: Fix hang while freeing sigtrap event Perf can hang while freeing a sigtrap event if a related deferred signal hadn't managed to be sent before the file got closed: perf_event_overflow() task_work_add(perf_pending_task) fput() task_work_add(____fput()) task_work_run() ____fput() perf_release() perf_event_release_kernel() _free_event() perf_pending_task_sync() task_work_cancel() -> FAILED rcuwait_wait_event() Once task_work_run() is running, the list of pending callbacks is removed from the task_struct and from this point on task_work_cancel() can't remove any pending and not yet started work items, hence the task_work_cancel() failure and the hang on rcuwait_wait_event(). Task work could be changed to remove one work at a time, so a work running on the current task can always cancel a pending one, however the wait / wake design is still subject to inverted dependencies when remote targets are involved, as pictured by Oleg: T1 T2 fd = perf_event_open(pid => T2->pid); fd = perf_event_open(pid => T1->pid); close(fd) close(fd) perf_event_overflow() perf_event_overflow() task_work_add(perf_pending_task) task_work_add(perf_pending_task) fput() fput() task_work_add(____fput()) task_work_add(____fput()) task_work_run() task_work_run() ____fput() ____fput() perf_release() perf_release() perf_event_release_kernel() perf_event_release_kernel() _free_event() _free_event() perf_pending_task_sync() perf_pending_task_sync() rcuwait_wait_event() rcuwait_wait_event() Therefore the only option left is to acquire the event reference count upon queueing the perf task work and release it from the task work, just like it was done before 3a5465418f5f ("perf: Fix event leak upon exec and file release") but without the leaks it fixed. Some adjustments are necessary to make it work: * A child event might dereference its parent upon freeing. Care must be taken to release the parent last. * Some places assuming the event doesn't have any reference held and therefore can be freed right away must instead put the reference and let the reference counting to its job. Reported-by: "Yi Lai" Closes: https://lore.kernel.org/all/Zx9Losv4YcJowaP%2F@ly-workstation/ Reported-by: syzbot+3c4321e10eea460eb606@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/673adf75.050a0220.87769.0024.GAE@google.com/ Fixes: 3a5465418f5f ("perf: Fix event leak upon exec and file release") Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20250304135446.18905-1-frederic@kernel.org --- include/linux/perf_event.h | 1 - kernel/events/core.c | 64 +++++++++++--------------------------- 2 files changed, 18 insertions(+), 47 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 5a9bf15d44617b..0069ba6866a48b 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -823,7 +823,6 @@ struct perf_event { struct irq_work pending_disable_irq; struct callback_head pending_task; unsigned int pending_work; - struct rcuwait pending_work_wait; atomic_t event_limit; diff --git a/kernel/events/core.c b/kernel/events/core.c index 9af9726ef5f5ad..e93c195659140d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5518,30 +5518,6 @@ static bool exclusive_event_installable(struct perf_event *event, static void perf_free_addr_filters(struct perf_event *event); -static void perf_pending_task_sync(struct perf_event *event) -{ - struct callback_head *head = &event->pending_task; - - if (!event->pending_work) - return; - /* - * If the task is queued to the current task's queue, we - * obviously can't wait for it to complete. Simply cancel it. - */ - if (task_work_cancel(current, head)) { - event->pending_work = 0; - local_dec(&event->ctx->nr_no_switch_fast); - return; - } - - /* - * All accesses related to the event are within the same RCU section in - * perf_pending_task(). The RCU grace period before the event is freed - * will make sure all those accesses are complete by then. - */ - rcuwait_wait_event(&event->pending_work_wait, !event->pending_work, TASK_UNINTERRUPTIBLE); -} - /* vs perf_event_alloc() error */ static void __free_event(struct perf_event *event) { @@ -5599,7 +5575,6 @@ static void _free_event(struct perf_event *event) { irq_work_sync(&event->pending_irq); irq_work_sync(&event->pending_disable_irq); - perf_pending_task_sync(event); unaccount_event(event); @@ -5692,10 +5667,17 @@ static void perf_remove_from_owner(struct perf_event *event) static void put_event(struct perf_event *event) { + struct perf_event *parent; + if (!atomic_long_dec_and_test(&event->refcount)) return; + parent = event->parent; _free_event(event); + + /* Matches the refcount bump in inherit_event() */ + if (parent) + put_event(parent); } /* @@ -5779,11 +5761,6 @@ int perf_event_release_kernel(struct perf_event *event) if (tmp == child) { perf_remove_from_context(child, DETACH_GROUP); list_move(&child->child_list, &free_list); - /* - * This matches the refcount bump in inherit_event(); - * this can't be the last reference. - */ - put_event(event); } else { var = &ctx->refcount; } @@ -5809,7 +5786,8 @@ int perf_event_release_kernel(struct perf_event *event) void *var = &child->ctx->refcount; list_del(&child->child_list); - free_event(child); + /* Last reference unless ->pending_task work is pending */ + put_event(child); /* * Wake any perf_event_free_task() waiting for this event to be @@ -5820,7 +5798,11 @@ int perf_event_release_kernel(struct perf_event *event) } no_ctx: - put_event(event); /* Must be the 'last' reference */ + /* + * Last reference unless ->pending_task work is pending on this event + * or any of its children. + */ + put_event(event); return 0; } EXPORT_SYMBOL_GPL(perf_event_release_kernel); @@ -7235,12 +7217,6 @@ static void perf_pending_task(struct callback_head *head) struct perf_event *event = container_of(head, struct perf_event, pending_task); int rctx; - /* - * All accesses to the event must belong to the same implicit RCU read-side - * critical section as the ->pending_work reset. See comment in - * perf_pending_task_sync(). - */ - rcu_read_lock(); /* * If we 'fail' here, that's OK, it means recursion is already disabled * and we won't recurse 'further'. @@ -7251,9 +7227,8 @@ static void perf_pending_task(struct callback_head *head) event->pending_work = 0; perf_sigtrap(event); local_dec(&event->ctx->nr_no_switch_fast); - rcuwait_wake_up(&event->pending_work_wait); } - rcu_read_unlock(); + put_event(event); if (rctx >= 0) perf_swevent_put_recursion_context(rctx); @@ -10248,6 +10223,7 @@ static int __perf_event_overflow(struct perf_event *event, !task_work_add(current, &event->pending_task, notify_mode)) { event->pending_work = pending_id; local_inc(&event->ctx->nr_no_switch_fast); + WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount)); event->pending_addr = 0; if (valid_sample && (data->sample_flags & PERF_SAMPLE_ADDR)) @@ -12610,7 +12586,6 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, init_irq_work(&event->pending_irq, perf_pending_irq); event->pending_disable_irq = IRQ_WORK_INIT_HARD(perf_pending_disable); init_task_work(&event->pending_task, perf_pending_task); - rcuwait_init(&event->pending_work_wait); mutex_init(&event->mmap_mutex); raw_spin_lock_init(&event->addr_filters.lock); @@ -13747,8 +13722,7 @@ perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx) * Kick perf_poll() for is_event_hup(); */ perf_event_wakeup(parent_event); - free_event(event); - put_event(parent_event); + put_event(event); return; } @@ -13872,13 +13846,11 @@ static void perf_free_event(struct perf_event *event, list_del_init(&event->child_list); mutex_unlock(&parent->child_mutex); - put_event(parent); - raw_spin_lock_irq(&ctx->lock); perf_group_detach(event); list_del_event(event, ctx); raw_spin_unlock_irq(&ctx->lock); - free_event(event); + put_event(event); } /* From 84ffc79bfbf70c779e60218563f2f3ad45288671 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 7 Apr 2025 16:22:12 -0700 Subject: [PATCH 0287/2924] kbuild: Add '-fno-builtin-wcslen' A recent optimization change in LLVM [1] aims to transform certain loop idioms into calls to strlen() or wcslen(). This change transforms the first while loop in UniStrcat() into a call to wcslen(), breaking the build when UniStrcat() gets inlined into alloc_path_with_tree_prefix(): ld.lld: error: undefined symbol: wcslen >>> referenced by nls_ucs2_utils.h:54 (fs/smb/client/../../nls/nls_ucs2_utils.h:54) >>> vmlinux.o:(alloc_path_with_tree_prefix) >>> referenced by nls_ucs2_utils.h:54 (fs/smb/client/../../nls/nls_ucs2_utils.h:54) >>> vmlinux.o:(alloc_path_with_tree_prefix) Disable this optimization with '-fno-builtin-wcslen', which prevents the compiler from assuming that wcslen() is available in the kernel's C library. [ More to the point - it's not that we couldn't implement wcslen(), it's that this isn't an optimization at all in the context of the kernel. Replacing a simple inlined loop with a function call to the same loop is just stupid and pointless if you don't have long strings and fancy libraries with vectorization support etc. For the regular 'strlen()' cases, we want the compiler to do this in order to handle the trivial case of constant strings. And we do have optimized versions of 'strlen()' on some architectures. But for wcslen? Just no. - Linus ] Cc: stable@vger.kernel.org Link: https://github.com/llvm/llvm-project/commit/9694844d7e36fd5e01011ab56b64f27b867aa72d [1] Signed-off-by: Nathan Chancellor Signed-off-by: Linus Torvalds --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index 38689a0c36052b..f4241855650711 100644 --- a/Makefile +++ b/Makefile @@ -1068,6 +1068,9 @@ ifdef CONFIG_CC_IS_GCC KBUILD_CFLAGS += -fconserve-stack endif +# Ensure compilers do not transform certain loops into calls to wcslen() +KBUILD_CFLAGS += -fno-builtin-wcslen + # change __FILE__ to the relative path to the source directory ifdef building_out_of_srctree KBUILD_CPPFLAGS += $(call cc-option,-ffile-prefix-map=$(srcroot)/=) From 3c75fff196c35c3bbe8c212ad03db94994130b32 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 7 Apr 2025 20:18:21 +0000 Subject: [PATCH 0288/2924] rust: pin-init: alloc: restrict `impl ZeroableOption` for `Box` to `T: Sized` Similar to what was done for `Zeroable>` in commit df27cef15360 ("rust: init: fix `Zeroable` implementation for `Option>` and `Option>`"), the latest Rust documentation [1] says it guarantees that `transmute::<_, Option>([0u8; size_of::()])` is sound and produces `Option::::None` only in some cases. In particular, it says: `Box` (specifically, only `Box`) when `U: Sized` Thus restrict the `impl` to `Sized`, and use similar wording as in that commit too. Link: https://doc.rust-lang.org/stable/std/option/index.html#representation [1] Signed-off-by: Miguel Ojeda Link: https://github.com/Rust-for-Linux/pin-init/pull/32/commits/a6007cf555e5946bcbfafe93a6468c329078acd8 Fixes: 9b2299af3b92 ("rust: pin-init: add `std` and `alloc` support from the user-space version") Cc: stable@vger.kernel.org [ Adjust mentioned commit to the one from the kernel. - Benno ] Signed-off-by: Benno Lossin Link: https://lore.kernel.org/r/20250407201755.649153-2-benno.lossin@proton.me Signed-off-by: Miguel Ojeda --- rust/pin-init/src/alloc.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/rust/pin-init/src/alloc.rs b/rust/pin-init/src/alloc.rs index e16baa3b434e08..5017f57442d862 100644 --- a/rust/pin-init/src/alloc.rs +++ b/rust/pin-init/src/alloc.rs @@ -17,11 +17,9 @@ use crate::{ pub extern crate alloc; -// SAFETY: All zeros is equivalent to `None` (option layout optimization guarantee). -// -// In this case we are allowed to use `T: ?Sized`, since all zeros is the `None` variant and there -// is no problem with a VTABLE pointer being null. -unsafe impl ZeroableOption for Box {} +// SAFETY: All zeros is equivalent to `None` (option layout optimization guarantee: +// ). +unsafe impl ZeroableOption for Box {} /// Smart pointer that can initialize memory in-place. pub trait InPlaceInit: Sized { From 193b5a75744af2669ad7c5f7aa4f9219dc73ca69 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 7 Apr 2025 20:18:29 +0000 Subject: [PATCH 0289/2924] rust: pin-init: use Markdown autolinks in Rust comments "Normal" comments in Rust (`//`) are also formatted in Markdown, like the documentation (`///` and `//!`), see Documentation/rust/coding-guidelines.rst Thus use Markdown autolinks for a couple links that were missing it. It also helps to get proper linking in some software like kitty [1]. Suggested-by: Benno Lossin Link: https://github.com/Rust-for-Linux/pin-init/pull/32#discussion_r2023103712 [1] Signed-off-by: Miguel Ojeda Link: https://github.com/Rust-for-Linux/pin-init/pull/32/commits/dd230d61bf0538281072fbff4bb71efc58f3420c Fixes: 84837cf6fa54 ("rust: pin-init: change examples to the user-space version") Cc: stable@vger.kernel.org [ Change case in title. Reworded commit message. - Benno ] Signed-off-by: Benno Lossin Link: https://lore.kernel.org/r/20250407201755.649153-3-benno.lossin@proton.me Signed-off-by: Miguel Ojeda --- rust/pin-init/examples/pthread_mutex.rs | 2 +- rust/pin-init/src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/pin-init/examples/pthread_mutex.rs b/rust/pin-init/examples/pthread_mutex.rs index 9164298c44c021..5ac22f1880d2f7 100644 --- a/rust/pin-init/examples/pthread_mutex.rs +++ b/rust/pin-init/examples/pthread_mutex.rs @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -// inspired by https://github.com/nbdd0121/pin-init/blob/trunk/examples/pthread_mutex.rs +// inspired by #![allow(clippy::undocumented_unsafe_blocks)] #![cfg_attr(feature = "alloc", feature(allocator_api))] #[cfg(not(windows))] diff --git a/rust/pin-init/src/lib.rs b/rust/pin-init/src/lib.rs index 05c44514765ef8..0806c689f693c1 100644 --- a/rust/pin-init/src/lib.rs +++ b/rust/pin-init/src/lib.rs @@ -1447,7 +1447,7 @@ impl_zeroable! { {} UnsafeCell, // SAFETY: All zeros is equivalent to `None` (option layout optimization guarantee: - // https://doc.rust-lang.org/stable/std/option/index.html#representation). + // ). Option, Option, Option, Option, Option, Option, Option, Option, Option, Option, From 445e99bdf68d62cc8cd9c129ea177b2b847d654d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 7 Apr 2025 09:42:22 -0700 Subject: [PATCH 0290/2924] rtnetlink: Fix bad unlock balance in do_setlink(). When validate_linkmsg() fails in do_setlink(), we jump to the errout label and calls netdev_unlock_ops() even though we have not called netdev_lock_ops() as reported by syzbot. [0] Let's return an error directly in such a case. [0] WARNING: bad unlock balance detected! 6.14.0-syzkaller-12504-g8bc251e5d874 #0 Not tainted syz-executor814/5834 is trying to release lock (&dev_instance_lock_key) at: [] netdev_unlock include/linux/netdevice.h:2756 [inline] [] netdev_unlock_ops include/net/netdev_lock.h:48 [inline] [] do_setlink+0xc26/0x43a0 net/core/rtnetlink.c:3406 but there are no more locks to release! other info that might help us debug this: 1 lock held by syz-executor814/5834: #0: ffffffff900fc408 (rtnl_mutex){+.+.}-{4:4}, at: rtnl_lock net/core/rtnetlink.c:80 [inline] #0: ffffffff900fc408 (rtnl_mutex){+.+.}-{4:4}, at: rtnl_nets_lock net/core/rtnetlink.c:341 [inline] #0: ffffffff900fc408 (rtnl_mutex){+.+.}-{4:4}, at: rtnl_newlink+0xd68/0x1fe0 net/core/rtnetlink.c:4064 stack backtrace: CPU: 0 UID: 0 PID: 5834 Comm: syz-executor814 Not tainted 6.14.0-syzkaller-12504-g8bc251e5d874 #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120 print_unlock_imbalance_bug+0x185/0x1a0 kernel/locking/lockdep.c:5296 __lock_release kernel/locking/lockdep.c:5535 [inline] lock_release+0x1ed/0x3e0 kernel/locking/lockdep.c:5887 __mutex_unlock_slowpath+0xee/0x800 kernel/locking/mutex.c:907 netdev_unlock include/linux/netdevice.h:2756 [inline] netdev_unlock_ops include/net/netdev_lock.h:48 [inline] do_setlink+0xc26/0x43a0 net/core/rtnetlink.c:3406 rtnl_group_changelink net/core/rtnetlink.c:3783 [inline] __rtnl_newlink net/core/rtnetlink.c:3937 [inline] rtnl_newlink+0x1619/0x1fe0 net/core/rtnetlink.c:4065 rtnetlink_rcv_msg+0x80f/0xd70 net/core/rtnetlink.c:6955 netlink_rcv_skb+0x208/0x480 net/netlink/af_netlink.c:2534 netlink_unicast_kernel net/netlink/af_netlink.c:1313 [inline] netlink_unicast+0x7f8/0x9a0 net/netlink/af_netlink.c:1339 netlink_sendmsg+0x8c3/0xcd0 net/netlink/af_netlink.c:1883 sock_sendmsg_nosec net/socket.c:712 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:727 ____sys_sendmsg+0x523/0x860 net/socket.c:2566 ___sys_sendmsg net/socket.c:2620 [inline] __sys_sendmsg+0x271/0x360 net/socket.c:2652 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f8427b614a9 Code: 48 83 c4 28 c3 e8 37 17 00 00 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fff9b59f3a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007fff9b59f578 RCX: 00007f8427b614a9 RDX: 0000000000000000 RSI: 0000200000000300 RDI: 0000000000000004 RBP: 00007f8427bd4610 R08: 000000000000000c R09: 00007fff9b59f578 R10: 000000000000001b R11: 0000000000000246 R12: 0000000000000001 R13: Fixes: 4c975fd70002 ("net: hold instance lock during NETDEV_REGISTER/UP") Reported-by: syzbot+45016fe295243a7882d3@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=45016fe295243a7882d3 Signed-off-by: Kuniyuki Iwashima Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250407164229.24414-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d8d03ff87a3bf5..39a5b72e861f68 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3027,7 +3027,7 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, err = validate_linkmsg(dev, tb, extack); if (err < 0) - goto errout; + return err; if (tb[IFLA_IFNAME]) nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); From c59026c0570a2a97ce2e7d5ae5e9c48fc841542b Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Tue, 18 Mar 2025 23:18:16 +0000 Subject: [PATCH 0291/2924] rust: kbuild: Don't export __pfx symbols With CONFIG_PREFIX_SYMBOLS, objtool adds __pfx prefix symbols to claim the compiler emitted call padding bytes. When CONFIG_X86_KERNEL_IBT is not selected, the symbols are added to individual object files and for Rust objects, they end up being exported, resulting in warnings with CONFIG_GENDWARFKSYMS as the symbols have no debugging information: warning: gendwarfksyms: symbol_print_versions: no information for symbol __pfx_rust_helper_put_task_struct warning: gendwarfksyms: symbol_print_versions: no information for symbol __pfx_rust_helper_task_euid warning: gendwarfksyms: symbol_print_versions: no information for symbol __pfx_rust_helper_readq_relaxed ... Filter out the __pfx prefix from exported symbols similarly to the existing __cfi and __odr_asan prefixes. Signed-off-by: Sami Tolvanen Reviewed-by: Alice Ryhl Cc: stable@vger.kernel.org Fixes: ac61506bf2d1 ("rust: Use gendwarfksyms + extended modversions for CONFIG_MODVERSIONS") Link: https://lore.kernel.org/r/20250318231815.917621-2-samitolvanen@google.com Signed-off-by: Miguel Ojeda --- rust/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/Makefile b/rust/Makefile index fa0eea8a9eca22..3aca903a7d08cf 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -368,7 +368,7 @@ $(obj)/bindings/bindings_helpers_generated.rs: private bindgen_target_extra = ; $(obj)/bindings/bindings_helpers_generated.rs: $(src)/helpers/helpers.c FORCE $(call if_changed_dep,bindgen) -rust_exports = $(NM) -p --defined-only $(1) | awk '$$2~/(T|R|D|B)/ && $$3!~/__cfi/ && $$3!~/__odr_asan/ { printf $(2),$$3 }' +rust_exports = $(NM) -p --defined-only $(1) | awk '$$2~/(T|R|D|B)/ && $$3!~/__(pfx|cfi|odr_asan)/ { printf $(2),$$3 }' quiet_cmd_exports = EXPORTS $@ cmd_exports = \ From 2d12c6fb78753925f494ca9079e2383529e8ae0e Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 8 Apr 2025 01:21:14 -0700 Subject: [PATCH 0292/2924] objtool: Remove ANNOTATE_IGNORE_ALTERNATIVE from CLAC/STAC ANNOTATE_IGNORE_ALTERNATIVE adds additional noise to the code generated by CLAC/STAC alternatives, hurting readability for those whose read uaccess-related code generation on a regular basis. Remove the annotation specifically for the "NOP patched with CLAC/STAC" case in favor of a manual check. Leave the other uses of that annotation in place as they're less common and more difficult to detect. Suggested-by: Linus Torvalds Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Acked-by: Linus Torvalds Link: https://lore.kernel.org/r/fc972ba4995d826fcfb8d02733a14be8d670900b.1744098446.git.jpoimboe@kernel.org --- arch/x86/include/asm/smap.h | 12 ++++++------ tools/objtool/check.c | 30 +++++++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h index 55a5e656e4b9ea..4f84d421d1cf29 100644 --- a/arch/x86/include/asm/smap.h +++ b/arch/x86/include/asm/smap.h @@ -16,23 +16,23 @@ #ifdef __ASSEMBLER__ #define ASM_CLAC \ - ALTERNATIVE __stringify(ANNOTATE_IGNORE_ALTERNATIVE), "clac", X86_FEATURE_SMAP + ALTERNATIVE "", "clac", X86_FEATURE_SMAP #define ASM_STAC \ - ALTERNATIVE __stringify(ANNOTATE_IGNORE_ALTERNATIVE), "stac", X86_FEATURE_SMAP + ALTERNATIVE "", "stac", X86_FEATURE_SMAP #else /* __ASSEMBLER__ */ static __always_inline void clac(void) { /* Note: a barrier is implicit in alternative() */ - alternative(ANNOTATE_IGNORE_ALTERNATIVE "", "clac", X86_FEATURE_SMAP); + alternative("", "clac", X86_FEATURE_SMAP); } static __always_inline void stac(void) { /* Note: a barrier is implicit in alternative() */ - alternative(ANNOTATE_IGNORE_ALTERNATIVE "", "stac", X86_FEATURE_SMAP); + alternative("", "stac", X86_FEATURE_SMAP); } static __always_inline unsigned long smap_save(void) @@ -59,9 +59,9 @@ static __always_inline void smap_restore(unsigned long flags) /* These macros can be used in asm() statements */ #define ASM_CLAC \ - ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE "", "clac", X86_FEATURE_SMAP) + ALTERNATIVE("", "clac", X86_FEATURE_SMAP) #define ASM_STAC \ - ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE "", "stac", X86_FEATURE_SMAP) + ALTERNATIVE("", "stac", X86_FEATURE_SMAP) #define ASM_CLAC_UNSAFE \ ALTERNATIVE("", ANNOTATE_IGNORE_ALTERNATIVE "clac", X86_FEATURE_SMAP) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 69f94bc4749985..b649049b6a11b2 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3505,6 +3505,34 @@ static struct instruction *next_insn_to_validate(struct objtool_file *file, return next_insn_same_sec(file, alt_group->orig_group->last_insn); } +static bool skip_alt_group(struct instruction *insn) +{ + struct instruction *alt_insn = insn->alts ? insn->alts->insn : NULL; + + /* ANNOTATE_IGNORE_ALTERNATIVE */ + if (insn->alt_group && insn->alt_group->ignore) + return true; + + /* + * For NOP patched with CLAC/STAC, only follow the latter to avoid + * impossible code paths combining patched CLAC with unpatched STAC + * or vice versa. + * + * ANNOTATE_IGNORE_ALTERNATIVE could have been used here, but Linus + * requested not to do that to avoid hurting .s file readability + * around CLAC/STAC alternative sites. + */ + + if (!alt_insn) + return false; + + /* Don't override ASM_{CLAC,STAC}_UNSAFE */ + if (alt_insn->alt_group && alt_insn->alt_group->ignore) + return false; + + return alt_insn->type == INSN_CLAC || alt_insn->type == INSN_STAC; +} + /* * Follow the branch starting at the given instruction, and recursively follow * any other branches (jumps). Meanwhile, track the frame pointer state at @@ -3625,7 +3653,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, } } - if (insn->alt_group && insn->alt_group->ignore) + if (skip_alt_group(insn)) return 0; if (handle_insn_ops(insn, next_insn, &state)) From 5cd2950359ef31c9e273588c33f372ff8e546e2e Mon Sep 17 00:00:00 2001 From: Khaled Elnaggar Date: Sun, 26 Jan 2025 21:51:33 +0200 Subject: [PATCH 0293/2924] selftests: tpm2: create a dedicated .gitignore The tpm2 selftests produce two logs: SpaceTest.log and AsyncTest.log. Only SpaceTest.log was listed in selftests/.gitignore, while AsyncTest.log remained untracked. This change creates a dedicated .gitignore in the tpm2/ directory to manage these entries, keeping tpm2-specific patterns isolated from parent .gitignore. Fixed white-space errors during commit Shuah Khan Link: https://lore.kernel.org/r/20250126195147.902608-1-khaledelnaggarlinux@gmail.com Signed-off-by: Khaled Elnaggar Reviewed-by: Jarkko Sakkinen Signed-off-by: Shuah Khan --- tools/testing/selftests/.gitignore | 1 - tools/testing/selftests/tpm2/.gitignore | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/tpm2/.gitignore diff --git a/tools/testing/selftests/.gitignore b/tools/testing/selftests/.gitignore index cb24124ac5b98e..674aaa02e39664 100644 --- a/tools/testing/selftests/.gitignore +++ b/tools/testing/selftests/.gitignore @@ -4,7 +4,6 @@ gpiogpio-hammer gpioinclude/ gpiolsgpio kselftest_install/ -tpm2/SpaceTest.log # Python bytecode and cache __pycache__/ diff --git a/tools/testing/selftests/tpm2/.gitignore b/tools/testing/selftests/tpm2/.gitignore new file mode 100644 index 00000000000000..6d6165c5e35da6 --- /dev/null +++ b/tools/testing/selftests/tpm2/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +AsyncTest.log +SpaceTest.log From 170ec11935de4acced0ae001562cca3a144acd67 Mon Sep 17 00:00:00 2001 From: Ahmed Salem Date: Wed, 12 Feb 2025 01:16:17 +0200 Subject: [PATCH 0294/2924] selftests: tpm2: test_smoke: use POSIX-conformant expression operator Use POSIX-conformant expression operator symbol '='. The use of the non POSIX-conformant symbol '==' would work in bash, but not in sh where the unexpected operator error would result in test_smoke.sh being skipped. Instead of changing the shebang to use bash, which may not be available on all systems, use the POSIX-conformant expression symbol '=' to test for equality. Without this patch: =================== # make -j8 TARGETS=tpm2 kselftest # selftests: tpm2: test_smoke.sh # ./test_smoke.sh: 9: [: 2: unexpected operator ok 1 selftests: tpm2: test_smoke.sh # SKIP With this patch: ================ # make -j8 TARGETS=tpm2 kselftest # selftests: tpm2: test_smoke.sh # Ran 9 tests in 9.236s ok 1 selftests: tpm2: test_smoke.sh Link: https://lore.kernel.org/r/37ztyakgrrtgvec344mg7mspchwjpxxtsprtjidso3pwkmm4f4@awsa5mzgqmtb Signed-off-by: Ahmed Salem Reviewed-by: Jarkko Sakkinen Signed-off-by: Shuah Khan --- tools/testing/selftests/tpm2/test_smoke.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/tpm2/test_smoke.sh b/tools/testing/selftests/tpm2/test_smoke.sh index 168f4b166234af..3a60e6c6f5c949 100755 --- a/tools/testing/selftests/tpm2/test_smoke.sh +++ b/tools/testing/selftests/tpm2/test_smoke.sh @@ -6,6 +6,6 @@ ksft_skip=4 [ -e /dev/tpm0 ] || exit $ksft_skip read tpm_version < /sys/class/tpm/tpm0/tpm_version_major -[ "$tpm_version" == 2 ] || exit $ksft_skip +[ "$tpm_version" = 2 ] || exit $ksft_skip python3 -m unittest -v tpm2_tests.SmokeTest 2>&1 From 14e594a1fc8b879734f8057a870d28c86a889c5f Mon Sep 17 00:00:00 2001 From: Rae Moar Date: Wed, 19 Mar 2025 22:33:51 +0000 Subject: [PATCH 0295/2924] kunit: tool: fix count of tests if late test plan Fix test count with late test plan. For example, TAP version 13 ok 1 test1 1..4 Returns a count of 1 passed, 1 crashed (because it expects tests after the test plan): returning the total count of 2 tests Change this to be 1 passed, 1 error: total count of 1 test Link: https://lore.kernel.org/r/20250319223351.1517262-1-rmoar@google.com Signed-off-by: Rae Moar Reviewed-by: David Gow Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_parser.py | 4 ++++ tools/testing/kunit/kunit_tool_test.py | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index da53a709773a23..c176487356e6c9 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -809,6 +809,10 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: test.log.extend(parse_diagnostic(lines)) if test.name != "" and not peek_test_name_match(lines, test): test.add_error(printer, 'missing subtest result line!') + elif not lines: + print_log(test.log, printer) + test.status = TestStatus.NO_TESTS + test.add_error(printer, 'No more test results!') else: parse_test_result(lines, test, expected_num, printer) diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 5ff4f6ffd8736d..bbba921e0eacb1 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -371,8 +371,8 @@ def test_parse_late_test_plan(self): """ result = kunit_parser.parse_run_tests(output.splitlines(), stdout) # Missing test results after test plan should alert a suspected test crash. - self.assertEqual(kunit_parser.TestStatus.TEST_CRASHED, result.status) - self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, crashed=1, errors=1)) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) + self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, errors=2)) def line_stream_from_strs(strs: Iterable[str]) -> kunit_parser.LineStream: return kunit_parser.LineStream(enumerate(strs, start=1)) From d1be0cf3b8aeae75bc8fff5b7a3e01ebfe276008 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 27 Mar 2025 16:33:01 +0100 Subject: [PATCH 0296/2924] kunit: Spelling s/slowm/slow/ Fix a misspelling of "slow". Link: https://lore.kernel.org/r/1f7ebf98598418914ec9f5b6d5cb8583d24a4bf0.1743089563.git.geert@linux-m68k.org Signed-off-by: Geert Uytterhoeven Reviewed-by: David Gow Signed-off-by: Shuah Khan --- include/kunit/test.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/kunit/test.h b/include/kunit/test.h index 0ffb97c785663f..39c768f87dc9f4 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -67,7 +67,7 @@ enum kunit_status { /* * Speed Attribute is stored as an enum and separated into categories of - * speed: very_slowm, slow, and normal. These speeds are relative to + * speed: very_slow, slow, and normal. These speeds are relative to * other KUnit tests. * * Note: unset speed attribute acts as default of KUNIT_SPEED_NORMAL. From 7d50e00fef2832e98d7e06bbfc85c1d66ee110ca Mon Sep 17 00:00:00 2001 From: Edward Liaw Date: Fri, 4 Apr 2025 22:12:20 +0000 Subject: [PATCH 0297/2924] selftests/futex: futex_waitv wouldblock test should fail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Testcase should fail if -EWOULDBLOCK is not returned when expected value differs from actual value from the waiter. Link: https://lore.kernel.org/r/20250404221225.1596324-1-edliaw@google.com Fixes: 9d57f7c79748920636f8293d2f01192d702fe390 ("selftests: futex: Test sys_futex_waitv() wouldblock") Signed-off-by: Edward Liaw Reviewed-by: Thomas Gleixner Reviewed-by: André Almeida Signed-off-by: Shuah Khan --- .../testing/selftests/futex/functional/futex_wait_wouldblock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c index 7d7a6a06cdb75b..2d8230da906429 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c +++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c @@ -98,7 +98,7 @@ int main(int argc, char *argv[]) info("Calling futex_waitv on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC); if (!res || errno != EWOULDBLOCK) { - ksft_test_result_pass("futex_waitv returned: %d %s\n", + ksft_test_result_fail("futex_waitv returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); ret = RET_FAIL; From 197c1eaa7ba633a482ed7588eea6fd4aa57e08d4 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Tue, 11 Mar 2025 16:09:40 +0800 Subject: [PATCH 0298/2924] selftests/mincore: Allow read-ahead pages to reach the end of the file When running the mincore_selftest on a system with an XFS file system, it failed the "check_file_mmap" test case due to the read-ahead pages reaching the end of the file. The failure log is as below: RUN global.check_file_mmap ... mincore_selftest.c:264:check_file_mmap:Expected i (1024) < vec_size (1024) mincore_selftest.c:265:check_file_mmap:Read-ahead pages reached the end of the file check_file_mmap: Test failed FAIL global.check_file_mmap This is because the read-ahead window size of the XFS file system on this machine is 4 MB, which is larger than the size from the #PF address to the end of the file. As a result, all the pages for this file are populated. blockdev --getra /dev/nvme0n1p5 8192 blockdev --getbsz /dev/nvme0n1p5 512 This issue can be fixed by extending the current FILE_SIZE 4MB to a larger number, but it will still fail if the read-ahead window size of the file system is larger enough. Additionally, in the real world, read-ahead pages reaching the end of the file can happen and is an expected behavior. Therefore, allowing read-ahead pages to reach the end of the file is a better choice for the "check_file_mmap" test case. Link: https://lore.kernel.org/r/20250311080940.21413-1-qiuxu.zhuo@intel.com Reported-by: Yi Lai Signed-off-by: Qiuxu Zhuo Signed-off-by: Shuah Khan --- tools/testing/selftests/mincore/mincore_selftest.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c index e949a43a614508..efabfcbe0b498c 100644 --- a/tools/testing/selftests/mincore/mincore_selftest.c +++ b/tools/testing/selftests/mincore/mincore_selftest.c @@ -261,9 +261,6 @@ TEST(check_file_mmap) TH_LOG("No read-ahead pages found in memory"); } - EXPECT_LT(i, vec_size) { - TH_LOG("Read-ahead pages reached the end of the file"); - } /* * End of the readahead window. The rest of the pages shouldn't * be in memory. From 21c02e8272bc95ba0dd44943665c669029b42760 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 7 Apr 2025 20:26:32 +0200 Subject: [PATCH 0299/2924] mptcp: only inc MPJoinAckHMacFailure for HMAC failures Recently, during a debugging session using local MPTCP connections, I noticed MPJoinAckHMacFailure was not zero on the server side. The counter was in fact incremented when the PM rejected new subflows, because the 'subflow' limit was reached. The fix is easy, simply dissociating the two cases: only the HMAC validation check should increase MPTCP_MIB_JOINACKMAC counter. Fixes: 4cf8b7e48a09 ("subflow: introduce and use mptcp_can_accept_new_subflow()") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250407-net-mptcp-hmac-failure-mib-v1-1-3c9ecd0a3a50@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/subflow.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 409bd415ef1d19..24c2de1891bdf3 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -899,13 +899,17 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, goto dispose_child; } - if (!subflow_hmac_valid(req, &mp_opt) || - !mptcp_can_accept_new_subflow(subflow_req->msk)) { + if (!subflow_hmac_valid(req, &mp_opt)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); goto dispose_child; } + if (!mptcp_can_accept_new_subflow(owner)) { + subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); + goto dispose_child; + } + /* move the msk reference ownership to the subflow */ subflow_req->msk = NULL; ctx->conn = (struct sock *)owner; From 6767698cf9c144c8d4c72925e9a1cd2cbc031d25 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 7 Apr 2025 20:26:33 +0200 Subject: [PATCH 0300/2924] selftests: mptcp: validate MPJoin HMacFailure counters The parent commit fixes an issue around these counters where one of them -- MPJoinAckHMacFailure -- was wrongly incremented in some cases. This makes sure the counter is always 0. It should be incremented only in case of corruption, or a wrong implementation, which should not be the case in these selftests. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250407-net-mptcp-hmac-failure-mib-v1-2-3c9ecd0a3a50@kernel.org Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/mptcp/mptcp_join.sh | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 13a3b68181ee14..befa66f5a366bb 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1441,6 +1441,15 @@ chk_join_nr() fi fi + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckHMacFailure") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "0" ]; then + rc=${KSFT_FAIL} + print_check "synack HMAC" + fail_test "got $count JOIN[s] synack HMAC failure expected 0" + fi + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx") if [ -z "$count" ]; then rc=${KSFT_SKIP} @@ -1450,6 +1459,15 @@ chk_join_nr() fail_test "got $count JOIN[s] ack rx expected $ack_nr" fi + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckHMacFailure") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "0" ]; then + rc=${KSFT_FAIL} + print_check "ack HMAC" + fail_test "got $count JOIN[s] ack HMAC failure expected 0" + fi + print_results "join Rx" ${rc} join_syn_tx="${join_syn_tx:-${syn_nr}}" \ From 7f1ff1b38a7c8b872382b796023419d87d78c47e Mon Sep 17 00:00:00 2001 From: Chenyuan Yang Date: Mon, 7 Apr 2025 13:49:52 -0500 Subject: [PATCH 0301/2924] net: libwx: handle page_pool_dev_alloc_pages error page_pool_dev_alloc_pages could return NULL. There was a WARN_ON(!page) but it would still proceed to use the NULL pointer and then crash. This is similar to commit 001ba0902046 ("net: fec: handle page_pool_dev_alloc_pages error"). This is found by our static analysis tool KNighter. Signed-off-by: Chenyuan Yang Fixes: 3c47e8ae113a ("net: libwx: Support to receive packets in NAPI") Reviewed-by: Joe Damato Link: https://patch.msgid.link/20250407184952.2111299-1-chenyuan0y@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/libwx/wx_lib.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 6ebefa31ece1c5..e69eaa65e0de85 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -310,7 +310,8 @@ static bool wx_alloc_mapped_page(struct wx_ring *rx_ring, return true; page = page_pool_dev_alloc_pages(rx_ring->page_pool); - WARN_ON(!page); + if (unlikely(!page)) + return false; dma = page_pool_get_dma_addr(page); bi->page_dma = dma; From c7efac7f1c71470ecd9b1a9a49b1b8164583c7dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Wed, 9 Apr 2025 00:29:49 +0200 Subject: [PATCH 0302/2924] cifs: Fix support for WSL-style symlinks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MS-FSCC in section 2.1.2.7 LX SYMLINK REPARSE_DATA_BUFFER now contains documentation about WSL symlink reparse point buffers. https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/68337353-9153-4ee1-ac6b-419839c3b7ad Fix the struct reparse_wsl_symlink_data_buffer to reflect buffer fields according to the MS-FSCC documentation. Fix the Linux SMB client to correctly fill the WSL symlink reparse point buffer when creaing new WSL-style symlink. There was a mistake during filling the data part of the reparse point buffer. It should starts with bytes "\x02\x00\x00\x00" (which represents version 2) but this constant was written as number 0x02000000 encoded in little endian, which resulted bytes "\x00\x00\x00\x02". This change is fixing this mistake. Fixes: 4e2043be5c14 ("cifs: Add support for creating WSL-style symlinks") Signed-off-by: Pali Rohár Signed-off-by: Steve French --- fs/smb/client/reparse.c | 25 ++++++++++++++++--------- fs/smb/common/smb2pdu.h | 6 +++--- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index f85dd40f34aff7..1416b9ffaca124 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -542,12 +542,12 @@ static int wsl_set_reparse_buf(struct reparse_data_buffer **buf, kfree(symname_utf16); return -ENOMEM; } - /* Flag 0x02000000 is unknown, but all wsl symlinks have this value */ - symlink_buf->Flags = cpu_to_le32(0x02000000); - /* PathBuffer is in UTF-8 but without trailing null-term byte */ + /* Version field must be set to 2 (MS-FSCC 2.1.2.7) */ + symlink_buf->Version = cpu_to_le32(2); + /* Target for Version 2 is in UTF-8 but without trailing null-term byte */ symname_utf8_len = utf16s_to_utf8s((wchar_t *)symname_utf16, symname_utf16_len/2, UTF16_LITTLE_ENDIAN, - symlink_buf->PathBuffer, + symlink_buf->Target, symname_utf8_maxlen); *buf = (struct reparse_data_buffer *)symlink_buf; buf_len = sizeof(struct reparse_wsl_symlink_data_buffer) + symname_utf8_len; @@ -1016,29 +1016,36 @@ static int parse_reparse_wsl_symlink(struct reparse_wsl_symlink_data_buffer *buf struct cifs_open_info_data *data) { int len = le16_to_cpu(buf->ReparseDataLength); + int data_offset = offsetof(typeof(*buf), Target) - offsetof(typeof(*buf), Version); int symname_utf8_len; __le16 *symname_utf16; int symname_utf16_len; - if (len <= sizeof(buf->Flags)) { + if (len <= data_offset) { cifs_dbg(VFS, "srv returned malformed wsl symlink buffer\n"); return -EIO; } - /* PathBuffer is in UTF-8 but without trailing null-term byte */ - symname_utf8_len = len - sizeof(buf->Flags); + /* MS-FSCC 2.1.2.7 defines layout of the Target field only for Version 2. */ + if (le32_to_cpu(buf->Version) != 2) { + cifs_dbg(VFS, "srv returned unsupported wsl symlink version %u\n", le32_to_cpu(buf->Version)); + return -EIO; + } + + /* Target for Version 2 is in UTF-8 but without trailing null-term byte */ + symname_utf8_len = len - data_offset; /* * Check that buffer does not contain null byte * because Linux cannot process symlink with null byte. */ - if (strnlen(buf->PathBuffer, symname_utf8_len) != symname_utf8_len) { + if (strnlen(buf->Target, symname_utf8_len) != symname_utf8_len) { cifs_dbg(VFS, "srv returned null byte in wsl symlink target location\n"); return -EIO; } symname_utf16 = kzalloc(symname_utf8_len * 2, GFP_KERNEL); if (!symname_utf16) return -ENOMEM; - symname_utf16_len = utf8s_to_utf16s(buf->PathBuffer, symname_utf8_len, + symname_utf16_len = utf8s_to_utf16s(buf->Target, symname_utf8_len, UTF16_LITTLE_ENDIAN, (wchar_t *) symname_utf16, symname_utf8_len * 2); if (symname_utf16_len < 0) { diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index 764dca80c15cb1..f79a5165a7cc6a 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -1567,13 +1567,13 @@ struct reparse_nfs_data_buffer { __u8 DataBuffer[]; } __packed; -/* For IO_REPARSE_TAG_LX_SYMLINK */ +/* For IO_REPARSE_TAG_LX_SYMLINK - see MS-FSCC 2.1.2.7 */ struct reparse_wsl_symlink_data_buffer { __le32 ReparseTag; __le16 ReparseDataLength; __u16 Reserved; - __le32 Flags; - __u8 PathBuffer[]; /* Variable Length UTF-8 string without nul-term */ + __le32 Version; /* Always 2 */ + __u8 Target[]; /* Variable Length UTF-8 string without nul-term */ } __packed; struct validate_negotiate_info_req { From 8d46a27085039158eb5e253ab8a35a0e33b5e864 Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Tue, 8 Apr 2025 15:30:01 +0800 Subject: [PATCH 0303/2924] ata: sata_sx4: Add error handling in pdc20621_i2c_read() The function pdc20621_prog_dimm0() calls the function pdc20621_i2c_read() but does not handle the error if the read fails. This could lead to process with invalid data. A proper implementation can be found in /source/drivers/ata/sata_sx4.c, pdc20621_prog_dimm_global(). As mentioned in its commit: bb44e154e25125bef31fa956785e90fccd24610b, the variable spd0 might be used uninitialized when pdc20621_i2c_read() fails. Add error handling to pdc20621_i2c_read(). If a read operation fails, an error message is logged via dev_err(), and return a negative error code. Add error handling to pdc20621_prog_dimm0() in pdc20621_dimm_init(), and return a negative error code if pdc20621_prog_dimm0() fails. Fixes: 4447d3515616 ("libata: convert the remaining SATA drivers to new init model") Signed-off-by: Wentao Liang Reviewed-by: Niklas Cassel Signed-off-by: Damien Le Moal --- drivers/ata/sata_sx4.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c index a482741eb181ff..c3042eca6332df 100644 --- a/drivers/ata/sata_sx4.c +++ b/drivers/ata/sata_sx4.c @@ -1117,9 +1117,14 @@ static int pdc20621_prog_dimm0(struct ata_host *host) mmio += PDC_CHIP0_OFS; for (i = 0; i < ARRAY_SIZE(pdc_i2c_read_data); i++) - pdc20621_i2c_read(host, PDC_DIMM0_SPD_DEV_ADDRESS, - pdc_i2c_read_data[i].reg, - &spd0[pdc_i2c_read_data[i].ofs]); + if (!pdc20621_i2c_read(host, PDC_DIMM0_SPD_DEV_ADDRESS, + pdc_i2c_read_data[i].reg, + &spd0[pdc_i2c_read_data[i].ofs])) { + dev_err(host->dev, + "Failed in i2c read at index %d: device=%#x, reg=%#x\n", + i, PDC_DIMM0_SPD_DEV_ADDRESS, pdc_i2c_read_data[i].reg); + return -EIO; + } data |= (spd0[4] - 8) | ((spd0[21] != 0) << 3) | ((spd0[3]-11) << 4); data |= ((spd0[17] / 4) << 6) | ((spd0[5] / 2) << 7) | @@ -1284,6 +1289,8 @@ static unsigned int pdc20621_dimm_init(struct ata_host *host) /* Programming DIMM0 Module Control Register (index_CID0:80h) */ size = pdc20621_prog_dimm0(host); + if (size < 0) + return size; dev_dbg(host->dev, "Local DIMM Size = %dMB\n", size); /* Programming DIMM Module Global Control Register (index_CID0:88h) */ From a421f5033c82990d795f8fcd30d5b835f8975508 Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Fri, 4 Apr 2025 13:35:40 +0530 Subject: [PATCH 0304/2924] drm/i915/vrr: Add vrr.vsync_{start, end} in vrr_params_changed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the missing vrr parameters in vrr_params_changed() helper. This ensures that changes in vrr.vsync_{start,end} trigger a call to appropriate helpers to update the VRR registers. Fixes: e8cd188e91bb ("drm/i915/display: Compute vrr_vsync params") Cc: Mitul Golani Cc: Arun R Murthy Cc: Ankit Nautiyal Cc: Jani Nikula Cc: # v6.10+ Signed-off-by: Ankit Nautiyal Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/20250404080540.2059511-1-ankit.k.nautiyal@intel.com (cherry picked from commit ced5e64f011cb5cd541988442997ceaa7385827e) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 3afb85fe8536df..3b509c70fb581d 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -968,7 +968,9 @@ static bool vrr_params_changed(const struct intel_crtc_state *old_crtc_state, old_crtc_state->vrr.vmin != new_crtc_state->vrr.vmin || old_crtc_state->vrr.vmax != new_crtc_state->vrr.vmax || old_crtc_state->vrr.guardband != new_crtc_state->vrr.guardband || - old_crtc_state->vrr.pipeline_full != new_crtc_state->vrr.pipeline_full; + old_crtc_state->vrr.pipeline_full != new_crtc_state->vrr.pipeline_full || + old_crtc_state->vrr.vsync_start != new_crtc_state->vrr.vsync_start || + old_crtc_state->vrr.vsync_end != new_crtc_state->vrr.vsync_end; } static bool cmrr_params_changed(const struct intel_crtc_state *old_crtc_state, From e3ea2eae70692a455e256787e4f54153fb739b90 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Wed, 2 Apr 2025 19:20:57 +0200 Subject: [PATCH 0305/2924] drm/i915/huc: Fix fence not released on early probe errors HuC delayed loading fence, introduced with commit 27536e03271da ("drm/i915/huc: track delayed HuC load with a fence"), is registered with object tracker early on driver probe but unregistered only from driver remove, which is not called on early probe errors. Since its memory is allocated under devres, then released anyway, it may happen to be allocated again to the fence and reused on future driver probes, resulting in kernel warnings that taint the kernel: <4> [309.731371] ------------[ cut here ]------------ <3> [309.731373] ODEBUG: init destroyed (active state 0) object: ffff88813d7dd2e0 object type: i915_sw_fence hint: sw_fence_dummy_notify+0x0/0x20 [i915] <4> [309.731575] WARNING: CPU: 2 PID: 3161 at lib/debugobjects.c:612 debug_print_object+0x93/0xf0 ... <4> [309.731693] CPU: 2 UID: 0 PID: 3161 Comm: i915_module_loa Tainted: G U 6.14.0-CI_DRM_16362-gf0fd77956987+ #1 ... <4> [309.731700] RIP: 0010:debug_print_object+0x93/0xf0 ... <4> [309.731728] Call Trace: <4> [309.731730] ... <4> [309.731949] __debug_object_init+0x17b/0x1c0 <4> [309.731957] debug_object_init+0x34/0x50 <4> [309.732126] __i915_sw_fence_init+0x34/0x60 [i915] <4> [309.732256] intel_huc_init_early+0x4b/0x1d0 [i915] <4> [309.732468] intel_uc_init_early+0x61/0x680 [i915] <4> [309.732667] intel_gt_common_init_early+0x105/0x130 [i915] <4> [309.732804] intel_root_gt_init_early+0x63/0x80 [i915] <4> [309.732938] i915_driver_probe+0x1fa/0xeb0 [i915] <4> [309.733075] i915_pci_probe+0xe6/0x220 [i915] <4> [309.733198] local_pci_probe+0x44/0xb0 <4> [309.733203] pci_device_probe+0xf4/0x270 <4> [309.733209] really_probe+0xee/0x3c0 <4> [309.733215] __driver_probe_device+0x8c/0x180 <4> [309.733219] driver_probe_device+0x24/0xd0 <4> [309.733223] __driver_attach+0x10f/0x220 <4> [309.733230] bus_for_each_dev+0x7d/0xe0 <4> [309.733236] driver_attach+0x1e/0x30 <4> [309.733239] bus_add_driver+0x151/0x290 <4> [309.733244] driver_register+0x5e/0x130 <4> [309.733247] __pci_register_driver+0x7d/0x90 <4> [309.733251] i915_pci_register_driver+0x23/0x30 [i915] <4> [309.733413] i915_init+0x34/0x120 [i915] <4> [309.733655] do_one_initcall+0x62/0x3f0 <4> [309.733667] do_init_module+0x97/0x2a0 <4> [309.733671] load_module+0x25ff/0x2890 <4> [309.733688] init_module_from_file+0x97/0xe0 <4> [309.733701] idempotent_init_module+0x118/0x330 <4> [309.733711] __x64_sys_finit_module+0x77/0x100 <4> [309.733715] x64_sys_call+0x1f37/0x2650 <4> [309.733719] do_syscall_64+0x91/0x180 <4> [309.733763] entry_SYSCALL_64_after_hwframe+0x76/0x7e <4> [309.733792] ... <4> [309.733806] ---[ end trace 0000000000000000 ]--- That scenario is most easily reproducible with igt@i915_module_load@reload-with-fault-injection. Fix the issue by moving the cleanup step to driver release path. Fixes: 27536e03271da ("drm/i915/huc: track delayed HuC load with a fence") Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13592 Cc: Daniele Ceraolo Spurio Cc: Alan Previn Signed-off-by: Janusz Krzysztofik Reviewed-by: Daniele Ceraolo Spurio Reviewed-by: Krzysztof Karas Signed-off-by: Daniele Ceraolo Spurio Link: https://lore.kernel.org/r/20250402172057.209924-2-janusz.krzysztofik@linux.intel.com (cherry picked from commit 795dbde92fe5c6996a02a5b579481de73035e7bf) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/uc/intel_huc.c | 11 +++++------ drivers/gpu/drm/i915/gt/uc/intel_huc.h | 1 + drivers/gpu/drm/i915/gt/uc/intel_uc.c | 1 + 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index d791f9baa11d1f..456d3372eef840 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -317,6 +317,11 @@ void intel_huc_init_early(struct intel_huc *huc) } } +void intel_huc_fini_late(struct intel_huc *huc) +{ + delayed_huc_load_fini(huc); +} + #define HUC_LOAD_MODE_STRING(x) (x ? "GSC" : "legacy") static int check_huc_loading_mode(struct intel_huc *huc) { @@ -414,12 +419,6 @@ int intel_huc_init(struct intel_huc *huc) void intel_huc_fini(struct intel_huc *huc) { - /* - * the fence is initialized in init_early, so we need to clean it up - * even if HuC loading is off. - */ - delayed_huc_load_fini(huc); - if (huc->heci_pkt) i915_vma_unpin_and_release(&huc->heci_pkt, 0); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h index d5e441b9e08d63..921ad4b1687f0b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h @@ -55,6 +55,7 @@ struct intel_huc { int intel_huc_sanitize(struct intel_huc *huc); void intel_huc_init_early(struct intel_huc *huc); +void intel_huc_fini_late(struct intel_huc *huc); int intel_huc_init(struct intel_huc *huc); void intel_huc_fini(struct intel_huc *huc); int intel_huc_auth(struct intel_huc *huc, enum intel_huc_authentication_type type); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 90ba1b0b4c9d25..4a3493e8d4333e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -136,6 +136,7 @@ void intel_uc_init_late(struct intel_uc *uc) void intel_uc_driver_late_release(struct intel_uc *uc) { + intel_huc_fini_late(&uc->huc); } /** From b013b817f32fb8c560b6970e1002e94f1c725923 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 8 Apr 2025 15:40:54 -0700 Subject: [PATCH 0306/2924] nvme-tcp: fix use-after-free of netns by kernel TCP socket. Commit 1be52169c348 ("nvme-tcp: fix selinux denied when calling sock_sendmsg") converted sock_create() in nvme_tcp_alloc_queue() to sock_create_kern(). sock_create_kern() creates a kernel socket, which does not hold a reference to netns. If the code does not manage the netns lifetime properly, use-after-free could happen. Also, TCP kernel socket with sk_net_refcnt 0 has a socket leak problem: it remains FIN_WAIT_1 if it misses FIN after close() because tcp_close() stops all timers. To fix such problems, let's hold netns ref by sk_net_refcnt_upgrade(). We had the same issue in CIFS, SMC, etc, and applied the same solution, see commit ef7134c7fc48 ("smb: client: Fix use-after-free of network namespace.") and commit 9744d2bf1976 ("smc: Fix use-after-free in tcp_write_timer_handler()."). Fixes: 1be52169c348 ("nvme-tcp: fix selinux denied when calling sock_sendmsg") Signed-off-by: Kuniyuki Iwashima Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 26c459f0198d9e..72d260201d8cfe 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1803,6 +1803,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, ret = PTR_ERR(sock_file); goto err_destroy_mutex; } + + sk_net_refcnt_upgrade(queue->sock->sk); nvme_tcp_reclassify_socket(queue->sock); /* Single syn retry */ From 45c2e30bbd64d75559b99f3a5c455129a7a34b06 Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 7 Apr 2025 13:46:37 +0100 Subject: [PATCH 0307/2924] x86/resctrl: Fix rdtgroup_mkdir()'s unlocked use of kernfs_node::name Since 741c10b096bc ("kernfs: Use RCU to access kernfs_node::name.") a helper rdt_kn_name() that checks that rdtgroup_mutex is held has been used for all accesses to the kernfs node name. rdtgroup_mkdir() uses the name to determine if a valid monitor group is being created by checking the parent name is "mon_groups". This is done without holding rdtgroup_mutex, and now triggers the following warning: | WARNING: suspicious RCU usage | 6.15.0-rc1 #4465 Tainted: G E | ----------------------------- | arch/x86/kernel/cpu/resctrl/internal.h:408 suspicious rcu_dereference_check() usage! [...] | Call Trace: | | dump_stack_lvl | lockdep_rcu_suspicious.cold | is_mon_groups | rdtgroup_mkdir | kernfs_iop_mkdir | vfs_mkdir | do_mkdirat | __x64_sys_mkdir | do_syscall_64 | entry_SYSCALL_64_after_hwframe Creating a control or monitor group calls mkdir_rdt_prepare(), which uses rdtgroup_kn_lock_live() to take the rdtgroup_mutex. To avoid taking and dropping the lock, move the check for the monitor group name and position into mkdir_rdt_prepare() so that it occurs under rdtgroup_mutex. Hoist is_mon_groups() earlier in the file. [ bp: Massage. ] Fixes: 741c10b096bc ("kernfs: Use RCU to access kernfs_node::name.") Signed-off-by: James Morse Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Acked-by: Ingo Molnar Link: https://lore.kernel.org/r/20250407124637.2433230-1-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 48 +++++++++++++++----------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 93ec829015f134..cc4a54145c83d8 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -3553,6 +3553,22 @@ static void mkdir_rdt_prepare_rmid_free(struct rdtgroup *rgrp) free_rmid(rgrp->closid, rgrp->mon.rmid); } +/* + * We allow creating mon groups only with in a directory called "mon_groups" + * which is present in every ctrl_mon group. Check if this is a valid + * "mon_groups" directory. + * + * 1. The directory should be named "mon_groups". + * 2. The mon group itself should "not" be named "mon_groups". + * This makes sure "mon_groups" directory always has a ctrl_mon group + * as parent. + */ +static bool is_mon_groups(struct kernfs_node *kn, const char *name) +{ + return (!strcmp(rdt_kn_name(kn), "mon_groups") && + strcmp(name, "mon_groups")); +} + static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, const char *name, umode_t mode, enum rdt_group_type rtype, struct rdtgroup **r) @@ -3568,6 +3584,15 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, goto out_unlock; } + /* + * Check that the parent directory for a monitor group is a "mon_groups" + * directory. + */ + if (rtype == RDTMON_GROUP && !is_mon_groups(parent_kn, name)) { + ret = -EPERM; + goto out_unlock; + } + if (rtype == RDTMON_GROUP && (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) { @@ -3751,22 +3776,6 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, return ret; } -/* - * We allow creating mon groups only with in a directory called "mon_groups" - * which is present in every ctrl_mon group. Check if this is a valid - * "mon_groups" directory. - * - * 1. The directory should be named "mon_groups". - * 2. The mon group itself should "not" be named "mon_groups". - * This makes sure "mon_groups" directory always has a ctrl_mon group - * as parent. - */ -static bool is_mon_groups(struct kernfs_node *kn, const char *name) -{ - return (!strcmp(rdt_kn_name(kn), "mon_groups") && - strcmp(name, "mon_groups")); -} - static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode) { @@ -3782,11 +3791,8 @@ static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name, if (resctrl_arch_alloc_capable() && parent_kn == rdtgroup_default.kn) return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode); - /* - * If RDT monitoring is supported and the parent directory is a valid - * "mon_groups" directory, add a monitoring subdirectory. - */ - if (resctrl_arch_mon_capable() && is_mon_groups(parent_kn, name)) + /* Else, attempt to add a monitoring subdirectory. */ + if (resctrl_arch_mon_capable()) return rdtgroup_mkdir_mon(parent_kn, name, mode); return -EPERM; From 2ccd42b959aaf490333dbd3b9b102eaf295c036a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 2 Apr 2025 22:36:21 +0200 Subject: [PATCH 0308/2924] s390/virtio_ccw: Don't allocate/assign airqs for non-existing queues If we finds a vq without a name in our input array in virtio_ccw_find_vqs(), we treat it as "non-existing" and set the vq pointer to NULL; we will not call virtio_ccw_setup_vq() to allocate/setup a vq. Consequently, we create only a queue if it actually exists (name != NULL) and assign an incremental queue index to each such existing queue. However, in virtio_ccw_register_adapter_ind()->get_airq_indicator() we will not ignore these "non-existing queues", but instead assign an airq indicator to them. Besides never releasing them in virtio_ccw_drop_indicators() (because there is no virtqueue), the bigger issue seems to be that there will be a disagreement between the device and the Linux guest about the airq indicator to be used for notifying a queue, because the indicator bit for adapter I/O interrupt is derived from the queue index. The virtio spec states under "Setting Up Two-Stage Queue Indicators": ... indicator contains the guest address of an area wherein the indicators for the devices are contained, starting at bit_nr, one bit per virtqueue of the device. And further in "Notification via Adapter I/O Interrupts": For notifying the driver of virtqueue buffers, the device sets the bit in the guest-provided indicator area at the corresponding offset. For example, QEMU uses in virtio_ccw_notify() the queue index (passed as "vector") to select the relevant indicator bit. If a queue does not exist, it does not have a corresponding indicator bit assigned, because it effectively doesn't have a queue index. Using a virtio-balloon-ccw device under QEMU with free-page-hinting disabled ("free-page-hint=off") but free-page-reporting enabled ("free-page-reporting=on") will result in free page reporting not working as expected: in the virtio_balloon driver, we'll be stuck forever in virtballoon_free_page_report()->wait_event(), because the waitqueue will not be woken up as the notification from the device is lost: it would use the wrong indicator bit. Free page reporting stops working and we get splats (when configured to detect hung wqs) like: INFO: task kworker/1:3:463 blocked for more than 61 seconds. Not tainted 6.14.0 #4 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:kworker/1:3 [...] Workqueue: events page_reporting_process Call Trace: [<000002f404e6dfb2>] __schedule+0x402/0x1640 [<000002f404e6f22e>] schedule+0x3e/0xe0 [<000002f3846a88fa>] virtballoon_free_page_report+0xaa/0x110 [virtio_balloon] [<000002f40435c8a4>] page_reporting_process+0x2e4/0x740 [<000002f403fd3ee2>] process_one_work+0x1c2/0x400 [<000002f403fd4b96>] worker_thread+0x296/0x420 [<000002f403fe10b4>] kthread+0x124/0x290 [<000002f403f4e0dc>] __ret_from_fork+0x3c/0x60 [<000002f404e77272>] ret_from_fork+0xa/0x38 There was recently a discussion [1] whether the "holes" should be treated differently again, effectively assigning also non-existing queues a queue index: that should also fix the issue, but requires other workarounds to not break existing setups. Let's fix it without affecting existing setups for now by properly ignoring the non-existing queues, so the indicator bits will match the queue indexes. [1] https://lore.kernel.org/all/cover.1720611677.git.mst@redhat.com/ Fixes: a229989d975e ("virtio: don't allocate vqs when names[i] = NULL") Reported-by: Chandra Merla Cc: stable@vger.kernel.org Signed-off-by: David Hildenbrand Tested-by: Thomas Huth Reviewed-by: Thomas Huth Reviewed-by: Cornelia Huck Acked-by: Michael S. Tsirkin Acked-by: Christian Borntraeger Link: https://lore.kernel.org/r/20250402203621.940090-1-david@redhat.com Signed-off-by: Heiko Carstens --- drivers/s390/virtio/virtio_ccw.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 21fa7ac849e5c3..4904b831c0a75f 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -302,11 +302,17 @@ static struct airq_info *new_airq_info(int index) static unsigned long *get_airq_indicator(struct virtqueue *vqs[], int nvqs, u64 *first, void **airq_info) { - int i, j; + int i, j, queue_idx, highest_queue_idx = -1; struct airq_info *info; unsigned long *indicator_addr = NULL; unsigned long bit, flags; + /* Array entries without an actual queue pointer must be ignored. */ + for (i = 0; i < nvqs; i++) { + if (vqs[i]) + highest_queue_idx++; + } + for (i = 0; i < MAX_AIRQ_AREAS && !indicator_addr; i++) { mutex_lock(&airq_areas_lock); if (!airq_areas[i]) @@ -316,7 +322,7 @@ static unsigned long *get_airq_indicator(struct virtqueue *vqs[], int nvqs, if (!info) return NULL; write_lock_irqsave(&info->lock, flags); - bit = airq_iv_alloc(info->aiv, nvqs); + bit = airq_iv_alloc(info->aiv, highest_queue_idx + 1); if (bit == -1UL) { /* Not enough vacancies. */ write_unlock_irqrestore(&info->lock, flags); @@ -325,8 +331,10 @@ static unsigned long *get_airq_indicator(struct virtqueue *vqs[], int nvqs, *first = bit; *airq_info = info; indicator_addr = info->aiv->vector; - for (j = 0; j < nvqs; j++) { - airq_iv_set_ptr(info->aiv, bit + j, + for (j = 0, queue_idx = 0; j < nvqs; j++) { + if (!vqs[j]) + continue; + airq_iv_set_ptr(info->aiv, bit + queue_idx++, (unsigned long)vqs[j]); } write_unlock_irqrestore(&info->lock, flags); From 8231a0e632405a03018034848d3c4620d7ba1dca Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 26 Aug 2024 22:13:44 +0200 Subject: [PATCH 0309/2924] s390: Add z17 elf platform Add detection for machine types 0x9175 and 0x9176 and set ELF platform name to z17. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/kernel/processor.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 54e281436a28b9..80b1f7a29f1164 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -294,6 +294,10 @@ static int __init setup_elf_platform(void) case 0x3932: strcpy(elf_platform, "z16"); break; + case 0x9175: + case 0x9176: + strcpy(elf_platform, "z17"); + break; } return 0; } From c51ea9888e88fc20df656dab01263f17529c7374 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 26 Aug 2024 22:13:47 +0200 Subject: [PATCH 0310/2924] s390: Allow to compile with z17 optimizations Add config and compile options which allow to compile with z17 optimizations if the compiler supports it. Add the miscellaneous-instruction-extension 4 facility to the list of facilities for z17. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 19 +++++++++++++++++++ arch/s390/Makefile | 2 ++ arch/s390/include/asm/march.h | 4 ++++ arch/s390/tools/gen_facilities.c | 3 +++ 4 files changed, 28 insertions(+) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index db8161ebb43cbb..99fb986fca6e8d 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -332,6 +332,10 @@ config HAVE_MARCH_Z16_FEATURES def_bool n select HAVE_MARCH_Z15_FEATURES +config HAVE_MARCH_Z17_FEATURES + def_bool n + select HAVE_MARCH_Z16_FEATURES + choice prompt "Processor type" default MARCH_Z196 @@ -397,6 +401,14 @@ config MARCH_Z16 Select this to enable optimizations for IBM z16 (3931 and 3932 series). +config MARCH_Z17 + bool "IBM z17" + select HAVE_MARCH_Z17_FEATURES + depends on $(cc-option,-march=z17) + help + Select this to enable optimizations for IBM z17 (9175 and + 9176 series). + endchoice config MARCH_Z10_TUNE @@ -420,6 +432,9 @@ config MARCH_Z15_TUNE config MARCH_Z16_TUNE def_bool TUNE_Z16 || MARCH_Z16 && TUNE_DEFAULT +config MARCH_Z17_TUNE + def_bool TUNE_Z17 || MARCH_Z17 && TUNE_DEFAULT + choice prompt "Tune code generation" default TUNE_DEFAULT @@ -464,6 +479,10 @@ config TUNE_Z16 bool "IBM z16" depends on $(cc-option,-mtune=z16) +config TUNE_Z17 + bool "IBM z17" + depends on $(cc-option,-mtune=z17) + endchoice config 64BIT diff --git a/arch/s390/Makefile b/arch/s390/Makefile index b06dc53bfed54c..7679bc16b692bd 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -48,6 +48,7 @@ mflags-$(CONFIG_MARCH_Z13) := -march=z13 mflags-$(CONFIG_MARCH_Z14) := -march=z14 mflags-$(CONFIG_MARCH_Z15) := -march=z15 mflags-$(CONFIG_MARCH_Z16) := -march=z16 +mflags-$(CONFIG_MARCH_Z17) := -march=z17 export CC_FLAGS_MARCH := $(mflags-y) @@ -61,6 +62,7 @@ cflags-$(CONFIG_MARCH_Z13_TUNE) += -mtune=z13 cflags-$(CONFIG_MARCH_Z14_TUNE) += -mtune=z14 cflags-$(CONFIG_MARCH_Z15_TUNE) += -mtune=z15 cflags-$(CONFIG_MARCH_Z16_TUNE) += -mtune=z16 +cflags-$(CONFIG_MARCH_Z17_TUNE) += -mtune=z17 cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include diff --git a/arch/s390/include/asm/march.h b/arch/s390/include/asm/march.h index fd9eef3be44ca4..11a71bd1495496 100644 --- a/arch/s390/include/asm/march.h +++ b/arch/s390/include/asm/march.h @@ -33,6 +33,10 @@ #define MARCH_HAS_Z16_FEATURES 1 #endif +#ifdef CONFIG_HAVE_MARCH_Z17_FEATURES +#define MARCH_HAS_Z17_FEATURES 1 +#endif + #endif /* __DECOMPRESSOR */ #endif /* __ASM_S390_MARCH_H */ diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c index 855f818deb98ef..d5c68ade71ab79 100644 --- a/arch/s390/tools/gen_facilities.c +++ b/arch/s390/tools/gen_facilities.c @@ -53,6 +53,9 @@ static struct facility_def facility_defs[] = { #endif #ifdef CONFIG_HAVE_MARCH_Z15_FEATURES 61, /* miscellaneous-instruction-extension 3 */ +#endif +#ifdef CONFIG_HAVE_MARCH_Z17_FEATURES + 84, /* miscellaneous-instruction-extension 4 */ #endif -1 /* END */ } From df194f57de7136b94bf25d2dce1da765b6c13654 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 23 May 2024 13:22:18 +0200 Subject: [PATCH 0311/2924] s390/cpumf: Update CPU Measurement facility extended counter set support Update CPU Measurement counter facility support for the extended counter set for machine types 9175 and 9176. Signed-off-by: Thomas Richter Acked-by: Sumanth Korikkar Signed-off-by: Heiko Carstens --- arch/s390/kernel/perf_cpum_cf.c | 2 +- arch/s390/kernel/perf_cpum_cf_events.c | 167 ++++++++++++++++++++++++- 2 files changed, 165 insertions(+), 4 deletions(-) diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 33205dd410e470..fe588bfb23e5eb 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -442,7 +442,7 @@ static void cpum_cf_make_setsize(enum cpumf_ctr_set ctrset) ctrset_size = 48; else if (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5) ctrset_size = 128; - else if (cpumf_ctr_info.csvn == 6 || cpumf_ctr_info.csvn == 7) + else if (cpumf_ctr_info.csvn >= 6 && cpumf_ctr_info.csvn <= 8) ctrset_size = 160; break; case CPUMF_CTR_SET_MT_DIAG: diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c index e4a6bfc910808a..690a293eb10d63 100644 --- a/arch/s390/kernel/perf_cpum_cf_events.c +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -237,7 +237,6 @@ CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_NO_SPECIAL, 0x00f4); CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_SPECIAL, 0x00f5); CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); - CPUMF_EVENT_ATTR(cf_z15, L1D_RO_EXCL_WRITES, 0x0080); CPUMF_EVENT_ATTR(cf_z15, DTLB2_WRITES, 0x0081); CPUMF_EVENT_ATTR(cf_z15, DTLB2_MISSES, 0x0082); @@ -365,6 +364,83 @@ CPUMF_EVENT_ATTR(cf_z16, NNPA_WAIT_LOCK, 0x010d); CPUMF_EVENT_ATTR(cf_z16, NNPA_HOLD_LOCK, 0x010e); CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); +CPUMF_EVENT_ATTR(cf_z17, L1D_RO_EXCL_WRITES, 0x0080); +CPUMF_EVENT_ATTR(cf_z17, DTLB2_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z17, DTLB2_MISSES, 0x0082); +CPUMF_EVENT_ATTR(cf_z17, CRSTE_1MB_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_z17, DTLB2_GPAGE_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_z17, ITLB2_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z17, ITLB2_MISSES, 0x0087); +CPUMF_EVENT_ATTR(cf_z17, TLB2_PTE_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_z17, TLB2_CRSTE_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z17, TLB2_ENGINES_BUSY, 0x008b); +CPUMF_EVENT_ATTR(cf_z17, TX_C_TEND, 0x008c); +CPUMF_EVENT_ATTR(cf_z17, TX_NC_TEND, 0x008d); +CPUMF_EVENT_ATTR(cf_z17, L1C_TLB2_MISSES, 0x008f); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ, 0x0091); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_IV, 0x0092); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_CHIP_HIT, 0x0093); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_DRAWER_HIT, 0x0094); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP, 0x0095); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_IV, 0x0096); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_CHIP_HIT, 0x0097); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_DRAWER_HIT, 0x0098); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_MODULE, 0x0099); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_DRAWER, 0x009a); +CPUMF_EVENT_ATTR(cf_z17, DCW_OFF_DRAWER, 0x009b); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_MEMORY, 0x009c); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_MODULE_MEMORY, 0x009d); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_DRAWER_MEMORY, 0x009e); +CPUMF_EVENT_ATTR(cf_z17, DCW_OFF_DRAWER_MEMORY, 0x009f); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_IV, 0x00a0); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_CHIP_HIT, 0x00a1); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_DRAWER_HIT, 0x00a2); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_IV, 0x00a3); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_CHIP_HIT, 0x00a4); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_DRAWER_HIT, 0x00a5); +CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_IV, 0x00a6); +CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_CHIP_HIT, 0x00a7); +CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_DRAWER_HIT, 0x00a8); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ, 0x00a9); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_IV, 0x00aa); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_CHIP_HIT, 0x00ab); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_DRAWER_HIT, 0x00ac); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP, 0x00ad); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_IV, 0x00ae); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_CHIP_HIT, 0x00af); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_DRAWER_HIT, 0x00b0); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_MODULE, 0x00b1); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_DRAWER, 0x00b2); +CPUMF_EVENT_ATTR(cf_z17, ICW_OFF_DRAWER, 0x00b3); +CPUMF_EVENT_ATTR(cf_z17, CYCLES_SAMETHRD, 0x00ca); +CPUMF_EVENT_ATTR(cf_z17, CYCLES_DIFFTHRD, 0x00cb); +CPUMF_EVENT_ATTR(cf_z17, INST_SAMETHRD, 0x00cc); +CPUMF_EVENT_ATTR(cf_z17, INST_DIFFTHRD, 0x00cd); +CPUMF_EVENT_ATTR(cf_z17, WRONG_BRANCH_PREDICTION, 0x00ce); +CPUMF_EVENT_ATTR(cf_z17, VX_BCD_EXECUTION_SLOTS, 0x00e1); +CPUMF_EVENT_ATTR(cf_z17, DECIMAL_INSTRUCTIONS, 0x00e2); +CPUMF_EVENT_ATTR(cf_z17, LAST_HOST_TRANSLATIONS, 0x00e8); +CPUMF_EVENT_ATTR(cf_z17, TX_NC_TABORT, 0x00f4); +CPUMF_EVENT_ATTR(cf_z17, TX_C_TABORT_NO_SPECIAL, 0x00f5); +CPUMF_EVENT_ATTR(cf_z17, TX_C_TABORT_SPECIAL, 0x00f6); +CPUMF_EVENT_ATTR(cf_z17, DFLT_ACCESS, 0x00f8); +CPUMF_EVENT_ATTR(cf_z17, DFLT_CYCLES, 0x00fd); +CPUMF_EVENT_ATTR(cf_z17, SORTL, 0x0100); +CPUMF_EVENT_ATTR(cf_z17, DFLT_CC, 0x0109); +CPUMF_EVENT_ATTR(cf_z17, DFLT_CCFINISH, 0x010a); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INVOCATIONS, 0x010b); +CPUMF_EVENT_ATTR(cf_z17, NNPA_COMPLETIONS, 0x010c); +CPUMF_EVENT_ATTR(cf_z17, NNPA_WAIT_LOCK, 0x010d); +CPUMF_EVENT_ATTR(cf_z17, NNPA_HOLD_LOCK, 0x010e); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_ONCHIP, 0x0110); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_OFFCHIP, 0x0111); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_DIFF, 0x0112); +CPUMF_EVENT_ATTR(cf_z17, NNPA_4K_PREFETCH, 0x0114); +CPUMF_EVENT_ATTR(cf_z17, NNPA_COMPL_LOCK, 0x0115); +CPUMF_EVENT_ATTR(cf_z17, NNPA_RETRY_LOCK, 0x0116); +CPUMF_EVENT_ATTR(cf_z17, NNPA_RETRY_LOCK_WITH_PLO, 0x0117); +CPUMF_EVENT_ATTR(cf_z17, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); +CPUMF_EVENT_ATTR(cf_z17, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); static struct attribute *cpumcf_fvn1_pmu_event_attr[] __initdata = { CPUMF_EVENT_PTR(cf_fvn1, CPU_CYCLES), @@ -414,7 +490,7 @@ static struct attribute *cpumcf_svn_12345_pmu_event_attr[] __initdata = { NULL, }; -static struct attribute *cpumcf_svn_67_pmu_event_attr[] __initdata = { +static struct attribute *cpumcf_svn_678_pmu_event_attr[] __initdata = { CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS), CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES), CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS), @@ -779,6 +855,87 @@ static struct attribute *cpumcf_z16_pmu_event_attr[] __initdata = { NULL, }; +static struct attribute *cpumcf_z17_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z17, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_z17, DTLB2_WRITES), + CPUMF_EVENT_PTR(cf_z17, DTLB2_MISSES), + CPUMF_EVENT_PTR(cf_z17, CRSTE_1MB_WRITES), + CPUMF_EVENT_PTR(cf_z17, DTLB2_GPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z17, ITLB2_WRITES), + CPUMF_EVENT_PTR(cf_z17, ITLB2_MISSES), + CPUMF_EVENT_PTR(cf_z17, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z17, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z17, TLB2_ENGINES_BUSY), + CPUMF_EVENT_PTR(cf_z17, TX_C_TEND), + CPUMF_EVENT_PTR(cf_z17, TX_NC_TEND), + CPUMF_EVENT_PTR(cf_z17, L1C_TLB2_MISSES), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ_IV), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_IV), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_MODULE), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_DRAWER), + CPUMF_EVENT_PTR(cf_z17, DCW_OFF_DRAWER), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_MEMORY), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_MODULE_MEMORY), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_DRAWER_MEMORY), + CPUMF_EVENT_PTR(cf_z17, DCW_OFF_DRAWER_MEMORY), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_IV), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_IV), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_IV), + CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ_IV), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_IV), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_MODULE), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_DRAWER), + CPUMF_EVENT_PTR(cf_z17, ICW_OFF_DRAWER), + CPUMF_EVENT_PTR(cf_z17, CYCLES_SAMETHRD), + CPUMF_EVENT_PTR(cf_z17, CYCLES_DIFFTHRD), + CPUMF_EVENT_PTR(cf_z17, INST_SAMETHRD), + CPUMF_EVENT_PTR(cf_z17, INST_DIFFTHRD), + CPUMF_EVENT_PTR(cf_z17, WRONG_BRANCH_PREDICTION), + CPUMF_EVENT_PTR(cf_z17, VX_BCD_EXECUTION_SLOTS), + CPUMF_EVENT_PTR(cf_z17, DECIMAL_INSTRUCTIONS), + CPUMF_EVENT_PTR(cf_z17, LAST_HOST_TRANSLATIONS), + CPUMF_EVENT_PTR(cf_z17, TX_NC_TABORT), + CPUMF_EVENT_PTR(cf_z17, TX_C_TABORT_NO_SPECIAL), + CPUMF_EVENT_PTR(cf_z17, TX_C_TABORT_SPECIAL), + CPUMF_EVENT_PTR(cf_z17, DFLT_ACCESS), + CPUMF_EVENT_PTR(cf_z17, DFLT_CYCLES), + CPUMF_EVENT_PTR(cf_z17, SORTL), + CPUMF_EVENT_PTR(cf_z17, DFLT_CC), + CPUMF_EVENT_PTR(cf_z17, DFLT_CCFINISH), + CPUMF_EVENT_PTR(cf_z17, NNPA_INVOCATIONS), + CPUMF_EVENT_PTR(cf_z17, NNPA_COMPLETIONS), + CPUMF_EVENT_PTR(cf_z17, NNPA_WAIT_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_HOLD_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_INST_ONCHIP), + CPUMF_EVENT_PTR(cf_z17, NNPA_INST_OFFCHIP), + CPUMF_EVENT_PTR(cf_z17, NNPA_INST_DIFF), + CPUMF_EVENT_PTR(cf_z17, NNPA_4K_PREFETCH), + CPUMF_EVENT_PTR(cf_z17, NNPA_COMPL_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_RETRY_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_RETRY_LOCK_WITH_PLO), + CPUMF_EVENT_PTR(cf_z17, MT_DIAG_CYCLES_ONE_THR_ACTIVE), + CPUMF_EVENT_PTR(cf_z17, MT_DIAG_CYCLES_TWO_THR_ACTIVE), + NULL, +}; + /* END: CPUM_CF COUNTER DEFINITIONS ===================================== */ static struct attribute_group cpumcf_pmu_events_group = { @@ -859,7 +1016,7 @@ __init const struct attribute_group **cpumf_cf_event_group(void) if (ci.csvn >= 1 && ci.csvn <= 5) csvn = cpumcf_svn_12345_pmu_event_attr; else if (ci.csvn >= 6) - csvn = cpumcf_svn_67_pmu_event_attr; + csvn = cpumcf_svn_678_pmu_event_attr; /* Determine model-specific counter set(s) */ get_cpu_id(&cpu_id); @@ -892,6 +1049,10 @@ __init const struct attribute_group **cpumf_cf_event_group(void) case 0x3932: model = cpumcf_z16_pmu_event_attr; break; + case 0x9175: + case 0x9176: + model = cpumcf_z17_pmu_event_attr; + break; default: model = none; break; From aa1ac98268cd1f380c713f07e39b1fa1d5c7650c Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Wed, 9 Apr 2025 10:03:53 +0200 Subject: [PATCH 0312/2924] s390/cpumf: Fix double free on error in cpumf_pmu_event_init() In PMU event initialization functions - cpumsf_pmu_event_init() - cpumf_pmu_event_init() - cfdiag_event_init() the partially created event had to be removed when an error was detected. The event::event_init() member function had to release all resources it allocated in case of error. event::destroy() had to be called on freeing an event after it was successfully created and event::event_init() returned success. With commit c70ca298036c ("perf/core: Simplify the perf_event_alloc() error path") this is not necessary anymore. The performance subsystem common code now always calls event::destroy() to clean up the allocated resources created during event initialization. Remove the event::destroy() invocation in PMU event initialization or that function is called twice for each event that runs into an error condition in event creation. This is the kernel log entry which shows up without the fix: ------------[ cut here ]------------ refcount_t: underflow; use-after-free. WARNING: CPU: 0 PID: 43388 at lib/refcount.c:87 refcount_dec_not_one+0x74/0x90 CPU: 0 UID: 0 PID: 43388 Comm: perf Not tainted 6.15.0-20250407.rc1.git0.300.fc41.s390x+git #1 NONE Hardware name: IBM 3931 A01 704 (LPAR) Krnl PSW : 0704c00180000000 00000209cb2c1b88 (refcount_dec_not_one+0x78/0x90) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Krnl GPRS: 0000020900000027 0000020900000023 0000000000000026 0000018900000000 00000004a2200a00 0000000000000000 0000000000000057 ffffffffffffffea 00000002b386c600 00000002b3f5b3e0 00000209cc51f140 00000209cc7fc550 0000000001449d38 ffffffffffffffff 00000209cb2c1b84 00000189d67dfb80 Krnl Code: 00000209cb2c1b78: c02000506727 larl %r2,00000209cbcce9c6 00000209cb2c1b7e: c0e5ffbd4431 brasl %r14,00000209caa6a3e0 #00000209cb2c1b84: af000000 mc 0,0 >00000209cb2c1b88: a7480001 lhi %r4,1 00000209cb2c1b8c: ebeff0a00004 lmg %r14,%r15,160(%r15) 00000209cb2c1b92: ec243fbf0055 risbg %r2,%r4,63,191,0 00000209cb2c1b98: 07fe bcr 15,%r14 00000209cb2c1b9a: 47000700 bc 0,1792 Call Trace: [<00000209cb2c1b88>] refcount_dec_not_one+0x78/0x90 [<00000209cb2c1dc4>] refcount_dec_and_mutex_lock+0x24/0x90 [<00000209caa3c29e>] hw_perf_event_destroy+0x2e/0x80 [<00000209cacaf8b4>] __free_event+0x74/0x270 [<00000209cacb47c4>] perf_event_alloc.part.0+0x4a4/0x730 [<00000209cacbf3e8>] __do_sys_perf_event_open+0x248/0xc20 [<00000209cacc14a4>] __s390x_sys_perf_event_open+0x44/0x50 [<00000209cb8114de>] __do_syscall+0x12e/0x260 [<00000209cb81ce34>] system_call+0x74/0x98 Last Breaking-Event-Address: [<00000209caa6a4d2>] __warn_printk+0xf2/0x100 ---[ end trace 0000000000000000 ]--- Fixes: c70ca298036c ("perf/core: Simplify the perf_event_alloc() error path") Signed-off-by: Thomas Richter Reviewed-by: Sumanth Korikkar Signed-off-by: Heiko Carstens --- arch/s390/kernel/perf_cpum_cf.c | 9 +-------- arch/s390/kernel/perf_cpum_sf.c | 3 --- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index fe588bfb23e5eb..e657fad7e376f5 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -858,18 +858,13 @@ static int cpumf_pmu_event_type(struct perf_event *event) static int cpumf_pmu_event_init(struct perf_event *event) { unsigned int type = event->attr.type; - int err; + int err = -ENOENT; if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW) err = __hw_perf_event_init(event, type); else if (event->pmu->type == type) /* Registered as unknown PMU */ err = __hw_perf_event_init(event, cpumf_pmu_event_type(event)); - else - return -ENOENT; - - if (unlikely(err) && event->destroy) - event->destroy(event); return err; } @@ -1819,8 +1814,6 @@ static int cfdiag_event_init(struct perf_event *event) event->destroy = hw_perf_event_destroy; err = cfdiag_event_init2(event); - if (unlikely(err)) - event->destroy(event); out: return err; } diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 5f60248cb46873..ad22799d8a7d91 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -885,9 +885,6 @@ static int cpumsf_pmu_event_init(struct perf_event *event) event->attr.exclude_idle = 0; err = __hw_perf_event_init(event); - if (unlikely(err)) - if (event->destroy) - event->destroy(event); return err; } From 996457176bb7c64b3d30996592c754205ec4d3ea Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 7 Apr 2025 20:22:14 +0300 Subject: [PATCH 0313/2924] x86/early_printk: Use 'mmio32' for consistency, fix comments First of all, using 'mmio' prevents proper implementation of 8-bit accessors. Second, it's simply inconsistent with uart8250 set of options. Rename it to 'mmio32'. While at it, remove rather misleading comment in the documentation. From now on mmio32 is self-explanatory and pciserial supports not only 32-bit MMIO accessors. Also, while at it, fix the comment for the "pciserial" case. The comment seems to be a copy'n'paste error when mentioning "serial" instead of "pciserial" (with double quotes). Fix this. With that, move it upper, so we don't calculate 'buf' twice. Fixes: 3181424aeac2 ("x86/early_printk: Add support for MMIO-based UARTs") Signed-off-by: Andy Shevchenko Signed-off-by: Ingo Molnar Reviewed-by: Denis Mukhin Link: https://lore.kernel.org/r/20250407172214.792745-1-andriy.shevchenko@linux.intel.com --- Documentation/admin-guide/kernel-parameters.txt | 5 +---- arch/x86/kernel/early_printk.c | 10 +++++----- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 76e538c77e3161..d9fd26b95b3409 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1407,18 +1407,15 @@ earlyprintk=serial[,0x...[,baudrate]] earlyprintk=ttySn[,baudrate] earlyprintk=dbgp[debugController#] + earlyprintk=mmio32,membase[,{nocfg|baudrate}] earlyprintk=pciserial[,force],bus:device.function[,{nocfg|baudrate}] earlyprintk=xdbc[xhciController#] earlyprintk=bios - earlyprintk=mmio,membase[,{nocfg|baudrate}] earlyprintk is useful when the kernel crashes before the normal console is initialized. It is not enabled by default because it has some cosmetic problems. - Only 32-bit memory addresses are supported for "mmio" - and "pciserial" devices. - Use "nocfg" to skip UART configuration, assume BIOS/firmware has configured UART correctly. diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 611f27e3890c28..3aad78bfcb267f 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -389,10 +389,10 @@ static int __init setup_early_printk(char *buf) keep = (strstr(buf, "keep") != NULL); while (*buf != '\0') { - if (!strncmp(buf, "mmio", 4)) { - early_mmio_serial_init(buf + 4); + if (!strncmp(buf, "mmio32", 6)) { + buf += 6; + early_mmio_serial_init(buf); early_console_register(&early_serial_console, keep); - buf += 4; } if (!strncmp(buf, "serial", 6)) { buf += 6; @@ -407,9 +407,9 @@ static int __init setup_early_printk(char *buf) } #ifdef CONFIG_PCI if (!strncmp(buf, "pciserial", 9)) { - early_pci_serial_init(buf + 9); + buf += 9; /* Keep from match the above "pciserial" */ + early_pci_serial_init(buf); early_console_register(&early_serial_console, keep); - buf += 9; /* Keep from match the above "serial" */ } #endif if (!strncmp(buf, "vga", 3) && From 13235d6d50bba99931c4392c0f813cfae0de3eac Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 8 Apr 2025 14:47:30 -0700 Subject: [PATCH 0314/2924] x86/bugs: Rename entry_ibpb() to write_ibpb() There's nothing entry-specific about entry_ibpb(). In preparation for calling it from elsewhere, rename it to write_ibpb(). Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/1e54ace131e79b760de3fe828264e26d0896e3ac.1744148254.git.jpoimboe@kernel.org --- arch/x86/entry/entry.S | 7 ++++--- arch/x86/include/asm/nospec-branch.h | 6 +++--- arch/x86/kernel/cpu/bugs.c | 6 +++--- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index d3caa31240ede5..cabe65ac8379f3 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -17,7 +17,8 @@ .pushsection .noinstr.text, "ax" -SYM_FUNC_START(entry_ibpb) +/* Clobbers AX, CX, DX */ +SYM_FUNC_START(write_ibpb) ANNOTATE_NOENDBR movl $MSR_IA32_PRED_CMD, %ecx movl $PRED_CMD_IBPB, %eax @@ -27,9 +28,9 @@ SYM_FUNC_START(entry_ibpb) /* Make sure IBPB clears return stack preductions too. */ FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_BUG_IBPB_NO_RET RET -SYM_FUNC_END(entry_ibpb) +SYM_FUNC_END(write_ibpb) /* For KVM */ -EXPORT_SYMBOL_GPL(entry_ibpb); +EXPORT_SYMBOL_GPL(write_ibpb); .popsection diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 8a5cc8e70439e1..591d1dbca60a4b 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -269,7 +269,7 @@ * typically has NO_MELTDOWN). * * While retbleed_untrain_ret() doesn't clobber anything but requires stack, - * entry_ibpb() will clobber AX, CX, DX. + * write_ibpb() will clobber AX, CX, DX. * * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point * where we have a stack but before any RET instruction. @@ -279,7 +279,7 @@ VALIDATE_UNRET_END CALL_UNTRAIN_RET ALTERNATIVE_2 "", \ - "call entry_ibpb", \ibpb_feature, \ + "call write_ibpb", \ibpb_feature, \ __stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH #endif .endm @@ -368,7 +368,7 @@ extern void srso_return_thunk(void); extern void srso_alias_return_thunk(void); extern void entry_untrain_ret(void); -extern void entry_ibpb(void); +extern void write_ibpb(void); #ifdef CONFIG_X86_64 extern void clear_bhb_loop(void); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 4386aa6c69e12c..608bbe6cf730e1 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1142,7 +1142,7 @@ static void __init retbleed_select_mitigation(void) setup_clear_cpu_cap(X86_FEATURE_RETHUNK); /* - * There is no need for RSB filling: entry_ibpb() ensures + * There is no need for RSB filling: write_ibpb() ensures * all predictions, including the RSB, are invalidated, * regardless of IBPB implementation. */ @@ -2676,7 +2676,7 @@ static void __init srso_select_mitigation(void) setup_clear_cpu_cap(X86_FEATURE_RETHUNK); /* - * There is no need for RSB filling: entry_ibpb() ensures + * There is no need for RSB filling: write_ibpb() ensures * all predictions, including the RSB, are invalidated, * regardless of IBPB implementation. */ @@ -2701,7 +2701,7 @@ static void __init srso_select_mitigation(void) srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT; /* - * There is no need for RSB filling: entry_ibpb() ensures + * There is no need for RSB filling: write_ibpb() ensures * all predictions, including the RSB, are invalidated, * regardless of IBPB implementation. */ From fc9fd3f98423367c79e0bd85a9515df26dc1b3cc Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 8 Apr 2025 14:47:31 -0700 Subject: [PATCH 0315/2924] x86/bugs: Use SBPB in write_ibpb() if applicable write_ibpb() does IBPB, which (among other things) flushes branch type predictions on AMD. If the CPU has SRSO_NO, or if the SRSO mitigation has been disabled, branch type flushing isn't needed, in which case the lighter-weight SBPB can be used. The 'x86_pred_cmd' variable already keeps track of whether IBPB or SBPB should be used. Use that instead of hardcoding IBPB. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/17c5dcd14b29199b75199d67ff7758de9d9a4928.1744148254.git.jpoimboe@kernel.org --- arch/x86/entry/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index cabe65ac8379f3..175958b02f2bfd 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -21,7 +21,7 @@ SYM_FUNC_START(write_ibpb) ANNOTATE_NOENDBR movl $MSR_IA32_PRED_CMD, %ecx - movl $PRED_CMD_IBPB, %eax + movl _ASM_RIP(x86_pred_cmd), %eax xorl %edx, %edx wrmsr From b1b19cfcf4656c75088dc06b7499f493e0dec3e5 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 8 Apr 2025 14:47:32 -0700 Subject: [PATCH 0316/2924] x86/bugs: Fix RSB clearing in indirect_branch_prediction_barrier() IBPB is expected to clear the RSB. However, if X86_BUG_IBPB_NO_RET is set, that doesn't happen. Make indirect_branch_prediction_barrier() take that into account by calling write_ibpb() which clears RSB on X86_BUG_IBPB_NO_RET: /* Make sure IBPB clears return stack preductions too. */ FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_BUG_IBPB_NO_RET Note that, as of the previous patch, write_ibpb() also reads 'x86_pred_cmd' in order to use SBPB when applicable: movl _ASM_RIP(x86_pred_cmd), %eax Therefore that existing behavior in indirect_branch_prediction_barrier() is not lost. Fixes: 50e4b3b94090 ("x86/entry: Have entry_ibpb() invalidate return predictions") Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Reviewed-by: Nikolay Borisov Link: https://lore.kernel.org/r/bba68888c511743d4cd65564d1fc41438907523f.1744148254.git.jpoimboe@kernel.org --- arch/x86/include/asm/nospec-branch.h | 6 +++--- arch/x86/kernel/cpu/bugs.c | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 591d1dbca60a4b..5c43f145454ddd 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -514,11 +514,11 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) : "memory"); } -extern u64 x86_pred_cmd; - static inline void indirect_branch_prediction_barrier(void) { - alternative_msr_write(MSR_IA32_PRED_CMD, x86_pred_cmd, X86_FEATURE_IBPB); + asm_inline volatile(ALTERNATIVE("", "call write_ibpb", X86_FEATURE_IBPB) + : ASM_CALL_CONSTRAINT + :: "rax", "rcx", "rdx", "memory"); } /* The Intel SPEC CTRL MSR base value cache */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 608bbe6cf730e1..99265098d04507 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -59,7 +59,6 @@ DEFINE_PER_CPU(u64, x86_spec_ctrl_current); EXPORT_PER_CPU_SYMBOL_GPL(x86_spec_ctrl_current); u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB; -EXPORT_SYMBOL_GPL(x86_pred_cmd); static u64 __ro_after_init x86_arch_cap_msr; From 18bae0dfec15b24ec14ca17dc18603372f5f254f Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 8 Apr 2025 14:47:33 -0700 Subject: [PATCH 0317/2924] x86/bugs: Don't fill RSB on VMEXIT with eIBRS+retpoline eIBRS protects against guest->host RSB underflow/poisoning attacks. Adding retpoline to the mix doesn't change that. Retpoline has a balanced CALL/RET anyway. So the current full RSB filling on VMEXIT with eIBRS+retpoline is overkill. Disable it or do the VMEXIT_LITE mitigation if needed. Suggested-by: Pawan Gupta Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Reviewed-by: Pawan Gupta Reviewed-by: Amit Shah Reviewed-by: Nikolay Borisov Cc: Paolo Bonzini Cc: Vitaly Kuznetsov Cc: Sean Christopherson Cc: David Woodhouse Link: https://lore.kernel.org/r/84a1226e5c9e2698eae1b5ade861f1b8bf3677dc.1744148254.git.jpoimboe@kernel.org --- arch/x86/kernel/cpu/bugs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 99265098d04507..a10b37bb747ed0 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1617,20 +1617,20 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_ case SPECTRE_V2_NONE: return; - case SPECTRE_V2_EIBRS_LFENCE: case SPECTRE_V2_EIBRS: + case SPECTRE_V2_EIBRS_LFENCE: + case SPECTRE_V2_EIBRS_RETPOLINE: if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { - setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE); pr_info("Spectre v2 / PBRSB-eIBRS: Retire a single CALL on VMEXIT\n"); + setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE); } return; - case SPECTRE_V2_EIBRS_RETPOLINE: case SPECTRE_V2_RETPOLINE: case SPECTRE_V2_LFENCE: case SPECTRE_V2_IBRS: - setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); pr_info("Spectre v2 / SpectreRSB : Filling RSB on VMEXIT\n"); + setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); return; } From 27ce8299bc1ec6df8306073785ff82b30b3cc5ee Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 8 Apr 2025 14:47:34 -0700 Subject: [PATCH 0318/2924] x86/bugs: Don't fill RSB on context switch with eIBRS User->user Spectre v2 attacks (including RSB) across context switches are already mitigated by IBPB in cond_mitigation(), if enabled globally or if either the prev or the next task has opted in to protection. RSB filling without IBPB serves no purpose for protecting user space, as indirect branches are still vulnerable. User->kernel RSB attacks are mitigated by eIBRS. In which case the RSB filling on context switch isn't needed, so remove it. Suggested-by: Pawan Gupta Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Reviewed-by: Pawan Gupta Reviewed-by: Amit Shah Reviewed-by: Nikolay Borisov Link: https://lore.kernel.org/r/98cdefe42180358efebf78e3b80752850c7a3e1b.1744148254.git.jpoimboe@kernel.org --- arch/x86/kernel/cpu/bugs.c | 24 ++++++++++++------------ arch/x86/mm/tlb.c | 6 +++--- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index a10b37bb747ed0..e2a672f925e3be 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1591,7 +1591,7 @@ static void __init spec_ctrl_disable_kernel_rrsba(void) rrsba_disabled = true; } -static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode) +static void __init spectre_v2_select_rsb_mitigation(enum spectre_v2_mitigation mode) { /* * Similar to context switches, there are two types of RSB attacks @@ -1615,7 +1615,7 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_ */ switch (mode) { case SPECTRE_V2_NONE: - return; + break; case SPECTRE_V2_EIBRS: case SPECTRE_V2_EIBRS_LFENCE: @@ -1624,18 +1624,21 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_ pr_info("Spectre v2 / PBRSB-eIBRS: Retire a single CALL on VMEXIT\n"); setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE); } - return; + break; case SPECTRE_V2_RETPOLINE: case SPECTRE_V2_LFENCE: case SPECTRE_V2_IBRS: - pr_info("Spectre v2 / SpectreRSB : Filling RSB on VMEXIT\n"); + pr_info("Spectre v2 / SpectreRSB: Filling RSB on context switch and VMEXIT\n"); + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); - return; - } + break; - pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation at VM exit"); - dump_stack(); + default: + pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation\n"); + dump_stack(); + break; + } } /* @@ -1867,10 +1870,7 @@ static void __init spectre_v2_select_mitigation(void) * * FIXME: Is this pointless for retbleed-affected AMD? */ - setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); - pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); - - spectre_v2_determine_rsb_fill_type_at_vmexit(mode); + spectre_v2_select_rsb_mitigation(mode); /* * Retpoline protects the kernel, but doesn't protect firmware. IBRS diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index e459d97ef39772..eb83348f930512 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -667,9 +667,9 @@ static void cond_mitigation(struct task_struct *next) prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_spec); /* - * Avoid user/user BTB poisoning by flushing the branch predictor - * when switching between processes. This stops one process from - * doing Spectre-v2 attacks on another. + * Avoid user->user BTB/RSB poisoning by flushing them when switching + * between processes. This stops one process from doing Spectre-v2 + * attacks on another. * * Both, the conditional and the always IBPB mode use the mm * pointer to avoid the IBPB when switching between tasks of the From 83f6665a49c3d44ad0c08f837d352dd290f5d10b Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 8 Apr 2025 14:47:35 -0700 Subject: [PATCH 0319/2924] x86/bugs: Add RSB mitigation document Create a document to summarize hard-earned knowledge about RSB-related mitigations, with references, and replace the overly verbose yet incomplete comments with a reference to the document. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/ab73f4659ba697a974759f07befd41ae605e33dd.1744148254.git.jpoimboe@kernel.org --- Documentation/admin-guide/hw-vuln/index.rst | 1 + Documentation/admin-guide/hw-vuln/rsb.rst | 268 ++++++++++++++++++++ arch/x86/kernel/cpu/bugs.c | 64 +---- 3 files changed, 282 insertions(+), 51 deletions(-) create mode 100644 Documentation/admin-guide/hw-vuln/rsb.rst diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index ff0b440ef2dc90..451874b8135d8e 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -22,3 +22,4 @@ are configurable at compile, boot or run time. srso gather_data_sampling reg-file-data-sampling + rsb diff --git a/Documentation/admin-guide/hw-vuln/rsb.rst b/Documentation/admin-guide/hw-vuln/rsb.rst new file mode 100644 index 00000000000000..21dbf9cf25f8bd --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/rsb.rst @@ -0,0 +1,268 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================= +RSB-related mitigations +======================= + +.. warning:: + Please keep this document up-to-date, otherwise you will be + volunteered to update it and convert it to a very long comment in + bugs.c! + +Since 2018 there have been many Spectre CVEs related to the Return Stack +Buffer (RSB) (sometimes referred to as the Return Address Stack (RAS) or +Return Address Predictor (RAP) on AMD). + +Information about these CVEs and how to mitigate them is scattered +amongst a myriad of microarchitecture-specific documents. + +This document attempts to consolidate all the relevant information in +once place and clarify the reasoning behind the current RSB-related +mitigations. It's meant to be as concise as possible, focused only on +the current kernel mitigations: what are the RSB-related attack vectors +and how are they currently being mitigated? + +It's *not* meant to describe how the RSB mechanism operates or how the +exploits work. More details about those can be found in the references +below. + +Rather, this is basically a glorified comment, but too long to actually +be one. So when the next CVE comes along, a kernel developer can +quickly refer to this as a refresher to see what we're actually doing +and why. + +At a high level, there are two classes of RSB attacks: RSB poisoning +(Intel and AMD) and RSB underflow (Intel only). They must each be +considered individually for each attack vector (and microarchitecture +where applicable). + +---- + +RSB poisoning (Intel and AMD) +============================= + +SpectreRSB +~~~~~~~~~~ + +RSB poisoning is a technique used by SpectreRSB [#spectre-rsb]_ where +an attacker poisons an RSB entry to cause a victim's return instruction +to speculate to an attacker-controlled address. This can happen when +there are unbalanced CALLs/RETs after a context switch or VMEXIT. + +* All attack vectors can potentially be mitigated by flushing out any + poisoned RSB entries using an RSB filling sequence + [#intel-rsb-filling]_ [#amd-rsb-filling]_ when transitioning between + untrusted and trusted domains. But this has a performance impact and + should be avoided whenever possible. + + .. DANGER:: + **FIXME**: Currently we're flushing 32 entries. However, some CPU + models have more than 32 entries. The loop count needs to be + increased for those. More detailed information is needed about RSB + sizes. + +* On context switch, the user->user mitigation requires ensuring the + RSB gets filled or cleared whenever IBPB gets written [#cond-ibpb]_ + during a context switch: + + * AMD: + On Zen 4+, IBPB (or SBPB [#amd-sbpb]_ if used) clears the RSB. + This is indicated by IBPB_RET in CPUID [#amd-ibpb-rsb]_. + + On Zen < 4, the RSB filling sequence [#amd-rsb-filling]_ must be + always be done in addition to IBPB [#amd-ibpb-no-rsb]_. This is + indicated by X86_BUG_IBPB_NO_RET. + + * Intel: + IBPB always clears the RSB: + + "Software that executed before the IBPB command cannot control + the predicted targets of indirect branches executed after the + command on the same logical processor. The term indirect branch + in this context includes near return instructions, so these + predicted targets may come from the RSB." [#intel-ibpb-rsb]_ + +* On context switch, user->kernel attacks are prevented by SMEP. User + space can only insert user space addresses into the RSB. Even + non-canonical addresses can't be inserted due to the page gap at the + end of the user canonical address space reserved by TASK_SIZE_MAX. + A SMEP #PF at instruction fetch prevents the kernel from speculatively + executing user space. + + * AMD: + "Finally, branches that are predicted as 'ret' instructions get + their predicted targets from the Return Address Predictor (RAP). + AMD recommends software use a RAP stuffing sequence (mitigation + V2-3 in [2]) and/or Supervisor Mode Execution Protection (SMEP) + to ensure that the addresses in the RAP are safe for + speculation. Collectively, we refer to these mitigations as "RAP + Protection"." [#amd-smep-rsb]_ + + * Intel: + "On processors with enhanced IBRS, an RSB overwrite sequence may + not suffice to prevent the predicted target of a near return + from using an RSB entry created in a less privileged predictor + mode. Software can prevent this by enabling SMEP (for + transitions from user mode to supervisor mode) and by having + IA32_SPEC_CTRL.IBRS set during VM exits." [#intel-smep-rsb]_ + +* On VMEXIT, guest->host attacks are mitigated by eIBRS (and PBRSB + mitigation if needed): + + * AMD: + "When Automatic IBRS is enabled, the internal return address + stack used for return address predictions is cleared on VMEXIT." + [#amd-eibrs-vmexit]_ + + * Intel: + "On processors with enhanced IBRS, an RSB overwrite sequence may + not suffice to prevent the predicted target of a near return + from using an RSB entry created in a less privileged predictor + mode. Software can prevent this by enabling SMEP (for + transitions from user mode to supervisor mode) and by having + IA32_SPEC_CTRL.IBRS set during VM exits. Processors with + enhanced IBRS still support the usage model where IBRS is set + only in the OS/VMM for OSes that enable SMEP. To do this, such + processors will ensure that guest behavior cannot control the + RSB after a VM exit once IBRS is set, even if IBRS was not set + at the time of the VM exit." [#intel-eibrs-vmexit]_ + + Note that some Intel CPUs are susceptible to Post-barrier Return + Stack Buffer Predictions (PBRSB) [#intel-pbrsb]_, where the last + CALL from the guest can be used to predict the first unbalanced RET. + In this case the PBRSB mitigation is needed in addition to eIBRS. + +AMD RETBleed / SRSO / Branch Type Confusion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +On AMD, poisoned RSB entries can also be created by the AMD RETBleed +variant [#retbleed-paper]_ [#amd-btc]_ or by Speculative Return Stack +Overflow [#amd-srso]_ (Inception [#inception-paper]_). The kernel +protects itself by replacing every RET in the kernel with a branch to a +single safe RET. + +---- + +RSB underflow (Intel only) +========================== + +RSB Alternate (RSBA) ("Intel Retbleed") +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some Intel Skylake-generation CPUs are susceptible to the Intel variant +of RETBleed [#retbleed-paper]_ (Return Stack Buffer Underflow +[#intel-rsbu]_). If a RET is executed when the RSB buffer is empty due +to mismatched CALLs/RETs or returning from a deep call stack, the branch +predictor can fall back to using the Branch Target Buffer (BTB). If a +user forces a BTB collision then the RET can speculatively branch to a +user-controlled address. + +* Note that RSB filling doesn't fully mitigate this issue. If there + are enough unbalanced RETs, the RSB may still underflow and fall back + to using a poisoned BTB entry. + +* On context switch, user->user underflow attacks are mitigated by the + conditional IBPB [#cond-ibpb]_ on context switch which effectively + clears the BTB: + + * "The indirect branch predictor barrier (IBPB) is an indirect branch + control mechanism that establishes a barrier, preventing software + that executed before the barrier from controlling the predicted + targets of indirect branches executed after the barrier on the same + logical processor." [#intel-ibpb-btb]_ + +* On context switch and VMEXIT, user->kernel and guest->host RSB + underflows are mitigated by IBRS or eIBRS: + + * "Enabling IBRS (including enhanced IBRS) will mitigate the "RSBU" + attack demonstrated by the researchers. As previously documented, + Intel recommends the use of enhanced IBRS, where supported. This + includes any processor that enumerates RRSBA but not RRSBA_DIS_S." + [#intel-rsbu]_ + + However, note that eIBRS and IBRS do not mitigate intra-mode attacks. + Like RRSBA below, this is mitigated by clearing the BHB on kernel + entry. + + As an alternative to classic IBRS, call depth tracking (combined with + retpolines) can be used to track kernel returns and fill the RSB when + it gets close to being empty. + +Restricted RSB Alternate (RRSBA) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some newer Intel CPUs have Restricted RSB Alternate (RRSBA) behavior, +which, similar to RSBA described above, also falls back to using the BTB +on RSB underflow. The only difference is that the predicted targets are +restricted to the current domain when eIBRS is enabled: + +* "Restricted RSB Alternate (RRSBA) behavior allows alternate branch + predictors to be used by near RET instructions when the RSB is + empty. When eIBRS is enabled, the predicted targets of these + alternate predictors are restricted to those belonging to the + indirect branch predictor entries of the current prediction domain. + [#intel-eibrs-rrsba]_ + +When a CPU with RRSBA is vulnerable to Branch History Injection +[#bhi-paper]_ [#intel-bhi]_, an RSB underflow could be used for an +intra-mode BTI attack. This is mitigated by clearing the BHB on +kernel entry. + +However if the kernel uses retpolines instead of eIBRS, it needs to +disable RRSBA: + +* "Where software is using retpoline as a mitigation for BHI or + intra-mode BTI, and the processor both enumerates RRSBA and + enumerates RRSBA_DIS controls, it should disable this behavior." + [#intel-retpoline-rrsba]_ + +---- + +References +========== + +.. [#spectre-rsb] `Spectre Returns! Speculation Attacks using the Return Stack Buffer `_ + +.. [#intel-rsb-filling] "Empty RSB Mitigation on Skylake-generation" in `Retpoline: A Branch Target Injection Mitigation `_ + +.. [#amd-rsb-filling] "Mitigation V2-3" in `Software Techniques for Managing Speculation `_ + +.. [#cond-ibpb] Whether IBPB is written depends on whether the prev and/or next task is protected from Spectre attacks. It typically requires opting in per task or system-wide. For more details see the documentation for the ``spectre_v2_user`` cmdline option in Documentation/admin-guide/kernel-parameters.txt. + +.. [#amd-sbpb] IBPB without flushing of branch type predictions. Only exists for AMD. + +.. [#amd-ibpb-rsb] "Function 8000_0008h -- Processor Capacity Parameters and Extended Feature Identification" in `AMD64 Architecture Programmer's Manual Volume 3: General-Purpose and System Instructions `_. SBPB behaves the same way according to `this email `_. + +.. [#amd-ibpb-no-rsb] `Spectre Attacks: Exploiting Speculative Execution `_ + +.. [#intel-ibpb-rsb] "Introduction" in `Post-barrier Return Stack Buffer Predictions / CVE-2022-26373 / INTEL-SA-00706 `_ + +.. [#amd-smep-rsb] "Existing Mitigations" in `Technical Guidance for Mitigating Branch Type Confusion `_ + +.. [#intel-smep-rsb] "Enhanced IBRS" in `Indirect Branch Restricted Speculation `_ + +.. [#amd-eibrs-vmexit] "Extended Feature Enable Register (EFER)" in `AMD64 Architecture Programmer's Manual Volume 2: System Programming `_ + +.. [#intel-eibrs-vmexit] "Enhanced IBRS" in `Indirect Branch Restricted Speculation `_ + +.. [#intel-pbrsb] `Post-barrier Return Stack Buffer Predictions / CVE-2022-26373 / INTEL-SA-00706 `_ + +.. [#retbleed-paper] `RETBleed: Arbitrary Speculative Code Execution with Return Instruction `_ + +.. [#amd-btc] `Technical Guidance for Mitigating Branch Type Confusion `_ + +.. [#amd-srso] `Technical Update Regarding Speculative Return Stack Overflow `_ + +.. [#inception-paper] `Inception: Exposing New Attack Surfaces with Training in Transient Execution `_ + +.. [#intel-rsbu] `Return Stack Buffer Underflow / Return Stack Buffer Underflow / CVE-2022-29901, CVE-2022-28693 / INTEL-SA-00702 `_ + +.. [#intel-ibpb-btb] `Indirect Branch Predictor Barrier' `_ + +.. [#intel-eibrs-rrsba] "Guidance for RSBU" in `Return Stack Buffer Underflow / Return Stack Buffer Underflow / CVE-2022-29901, CVE-2022-28693 / INTEL-SA-00702 `_ + +.. [#bhi-paper] `Branch History Injection: On the Effectiveness of Hardware Mitigations Against Cross-Privilege Spectre-v2 Attacks `_ + +.. [#intel-bhi] `Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598 `_ + +.. [#intel-retpoline-rrsba] "Retpoline" in `Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598 `_ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e2a672f925e3be..362602b705cc43 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1594,25 +1594,25 @@ static void __init spec_ctrl_disable_kernel_rrsba(void) static void __init spectre_v2_select_rsb_mitigation(enum spectre_v2_mitigation mode) { /* - * Similar to context switches, there are two types of RSB attacks - * after VM exit: + * WARNING! There are many subtleties to consider when changing *any* + * code related to RSB-related mitigations. Before doing so, carefully + * read the following document, and update if necessary: * - * 1) RSB underflow + * Documentation/admin-guide/hw-vuln/rsb.rst * - * 2) Poisoned RSB entry + * In an overly simplified nutshell: * - * When retpoline is enabled, both are mitigated by filling/clearing - * the RSB. + * - User->user RSB attacks are conditionally mitigated during + * context switches by cond_mitigation -> write_ibpb(). * - * When IBRS is enabled, while #1 would be mitigated by the IBRS branch - * prediction isolation protections, RSB still needs to be cleared - * because of #2. Note that SMEP provides no protection here, unlike - * user-space-poisoned RSB entries. + * - User->kernel and guest->host attacks are mitigated by eIBRS or + * RSB filling. * - * eIBRS should protect against RSB poisoning, but if the EIBRS_PBRSB - * bug is present then a LITE version of RSB protection is required, - * just a single call needs to retire before a RET is executed. + * Though, depending on config, note that other alternative + * mitigations may end up getting used instead, e.g., IBPB on + * entry/vmexit, call depth tracking, or return thunks. */ + switch (mode) { case SPECTRE_V2_NONE: break; @@ -1832,44 +1832,6 @@ static void __init spectre_v2_select_mitigation(void) spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); - /* - * If Spectre v2 protection has been enabled, fill the RSB during a - * context switch. In general there are two types of RSB attacks - * across context switches, for which the CALLs/RETs may be unbalanced. - * - * 1) RSB underflow - * - * Some Intel parts have "bottomless RSB". When the RSB is empty, - * speculated return targets may come from the branch predictor, - * which could have a user-poisoned BTB or BHB entry. - * - * AMD has it even worse: *all* returns are speculated from the BTB, - * regardless of the state of the RSB. - * - * When IBRS or eIBRS is enabled, the "user -> kernel" attack - * scenario is mitigated by the IBRS branch prediction isolation - * properties, so the RSB buffer filling wouldn't be necessary to - * protect against this type of attack. - * - * The "user -> user" attack scenario is mitigated by RSB filling. - * - * 2) Poisoned RSB entry - * - * If the 'next' in-kernel return stack is shorter than 'prev', - * 'next' could be tricked into speculating with a user-poisoned RSB - * entry. - * - * The "user -> kernel" attack scenario is mitigated by SMEP and - * eIBRS. - * - * The "user -> user" scenario, also known as SpectreBHB, requires - * RSB clearing. - * - * So to mitigate all cases, unconditionally fill RSB on context - * switches. - * - * FIXME: Is this pointless for retbleed-affected AMD? - */ spectre_v2_select_rsb_mitigation(mode); /* From 2b5f0c5bc819af2b0759a8fcddc1b39102735c0f Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Tue, 8 Apr 2025 17:29:03 +0200 Subject: [PATCH 0320/2924] nvmet-fcloop: swap list_add_tail arguments The newly element to be added to the list is the first argument of list_add_tail. This fix is missing dcfad4ab4d67 ("nvmet-fcloop: swap the list_add_tail arguments"). Fixes: 437c0b824dbd ("nvme-fcloop: add target to host LS request support") Signed-off-by: Daniel Wagner Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fcloop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index e1abb27927ff74..da195d61a9664c 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -478,7 +478,7 @@ fcloop_t2h_xmt_ls_rsp(struct nvme_fc_local_port *localport, if (targetport) { tport = targetport->private; spin_lock(&tport->lock); - list_add_tail(&tport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &tport->ls_list); spin_unlock(&tport->lock); queue_work(nvmet_wq, &tport->ls_work); } From f22c458f9495f9164358961dd73b2f62356c9750 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Tue, 8 Apr 2025 17:29:04 +0200 Subject: [PATCH 0321/2924] nvmet-fcloop: replace kref with refcount The kref wrapper is not really adding any value ontop of refcount. Thus replace the kref API with the refcount API. Signed-off-by: Daniel Wagner Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fcloop.c | 36 ++++++++++++------------------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index da195d61a9664c..67e24c7d59306f 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -239,7 +239,7 @@ struct fcloop_nport { struct fcloop_tport *tport; struct fcloop_lport *lport; struct list_head nport_list; - struct kref ref; + refcount_t ref; u64 node_name; u64 port_name; u32 port_role; @@ -274,7 +274,7 @@ struct fcloop_fcpreq { u32 inistate; bool active; bool aborted; - struct kref ref; + refcount_t ref; struct work_struct fcp_rcv_work; struct work_struct abort_rcv_work; struct work_struct tio_done_work; @@ -534,24 +534,18 @@ fcloop_tgt_discovery_evt(struct nvmet_fc_target_port *tgtport) } static void -fcloop_tfcp_req_free(struct kref *ref) +fcloop_tfcp_req_put(struct fcloop_fcpreq *tfcp_req) { - struct fcloop_fcpreq *tfcp_req = - container_of(ref, struct fcloop_fcpreq, ref); + if (!refcount_dec_and_test(&tfcp_req->ref)) + return; kfree(tfcp_req); } -static void -fcloop_tfcp_req_put(struct fcloop_fcpreq *tfcp_req) -{ - kref_put(&tfcp_req->ref, fcloop_tfcp_req_free); -} - static int fcloop_tfcp_req_get(struct fcloop_fcpreq *tfcp_req) { - return kref_get_unless_zero(&tfcp_req->ref); + return refcount_inc_not_zero(&tfcp_req->ref); } static void @@ -748,7 +742,7 @@ fcloop_fcp_req(struct nvme_fc_local_port *localport, INIT_WORK(&tfcp_req->fcp_rcv_work, fcloop_fcp_recv_work); INIT_WORK(&tfcp_req->abort_rcv_work, fcloop_fcp_abort_recv_work); INIT_WORK(&tfcp_req->tio_done_work, fcloop_tgt_fcprqst_done_work); - kref_init(&tfcp_req->ref); + refcount_set(&tfcp_req->ref, 1); queue_work(nvmet_wq, &tfcp_req->fcp_rcv_work); @@ -1001,24 +995,18 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, } static void -fcloop_nport_free(struct kref *ref) +fcloop_nport_put(struct fcloop_nport *nport) { - struct fcloop_nport *nport = - container_of(ref, struct fcloop_nport, ref); + if (!refcount_dec_and_test(&nport->ref)) + return; kfree(nport); } -static void -fcloop_nport_put(struct fcloop_nport *nport) -{ - kref_put(&nport->ref, fcloop_nport_free); -} - static int fcloop_nport_get(struct fcloop_nport *nport) { - return kref_get_unless_zero(&nport->ref); + return refcount_inc_not_zero(&nport->ref); } static void @@ -1249,7 +1237,7 @@ fcloop_alloc_nport(const char *buf, size_t count, bool remoteport) newnport->port_role = opts->roles; if (opts->mask & NVMF_OPT_FCADDR) newnport->port_id = opts->fcaddr; - kref_init(&newnport->ref); + refcount_set(&newnport->ref, 1); spin_lock_irqsave(&fcloop_lock, flags); From 72511b1dc4147dc4af74ddb98c22366124b26d4e Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Tue, 8 Apr 2025 17:29:05 +0200 Subject: [PATCH 0322/2924] nvmet-fcloop: add ref counting to lport The fcloop_lport objects live time is controlled by the user interface add_local_port and del_local_port. nport, rport and tport objects are pointing to the lport objects but here is no clear tracking. Let's introduce an explicit ref counter for the lport objects and prepare the stage for restructuring how lports are used. Signed-off-by: Daniel Wagner Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fcloop.c | 44 +++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 67e24c7d59306f..641201e62c1baf 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -208,6 +208,7 @@ struct fcloop_lport { struct nvme_fc_local_port *localport; struct list_head lport_list; struct completion unreg_done; + refcount_t ref; }; struct fcloop_lport_priv { @@ -994,6 +995,27 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, } } +static void +fcloop_lport_put(struct fcloop_lport *lport) +{ + unsigned long flags; + + if (!refcount_dec_and_test(&lport->ref)) + return; + + spin_lock_irqsave(&fcloop_lock, flags); + list_del(&lport->lport_list); + spin_unlock_irqrestore(&fcloop_lock, flags); + + kfree(lport); +} + +static int +fcloop_lport_get(struct fcloop_lport *lport) +{ + return refcount_inc_not_zero(&lport->ref); +} + static void fcloop_nport_put(struct fcloop_nport *nport) { @@ -1017,6 +1039,8 @@ fcloop_localport_delete(struct nvme_fc_local_port *localport) /* release any threads waiting for the unreg to complete */ complete(&lport->unreg_done); + + fcloop_lport_put(lport); } static void @@ -1128,6 +1152,7 @@ fcloop_create_local_port(struct device *dev, struct device_attribute *attr, lport->localport = localport; INIT_LIST_HEAD(&lport->lport_list); + refcount_set(&lport->ref, 1); spin_lock_irqsave(&fcloop_lock, flags); list_add_tail(&lport->lport_list, &fcloop_lports); @@ -1144,13 +1169,6 @@ fcloop_create_local_port(struct device *dev, struct device_attribute *attr, return ret ? ret : count; } - -static void -__unlink_local_port(struct fcloop_lport *lport) -{ - list_del(&lport->lport_list); -} - static int __wait_localport_unreg(struct fcloop_lport *lport) { @@ -1163,8 +1181,6 @@ __wait_localport_unreg(struct fcloop_lport *lport) if (!ret) wait_for_completion(&lport->unreg_done); - kfree(lport); - return ret; } @@ -1187,8 +1203,9 @@ fcloop_delete_local_port(struct device *dev, struct device_attribute *attr, list_for_each_entry(tlport, &fcloop_lports, lport_list) { if (tlport->localport->node_name == nodename && tlport->localport->port_name == portname) { + if (!fcloop_lport_get(tlport)) + break; lport = tlport; - __unlink_local_port(lport); break; } } @@ -1198,6 +1215,7 @@ fcloop_delete_local_port(struct device *dev, struct device_attribute *attr, return -ENOENT; ret = __wait_localport_unreg(lport); + fcloop_lport_put(lport); return ret ? ret : count; } @@ -1625,17 +1643,17 @@ static void __exit fcloop_exit(void) for (;;) { lport = list_first_entry_or_null(&fcloop_lports, typeof(*lport), lport_list); - if (!lport) + if (!lport || !fcloop_lport_get(lport)) break; - __unlink_local_port(lport); - spin_unlock_irqrestore(&fcloop_lock, flags); ret = __wait_localport_unreg(lport); if (ret) pr_warn("%s: Failed deleting local port\n", __func__); + fcloop_lport_put(lport); + spin_lock_irqsave(&fcloop_lock, flags); } From aeaa0913a6994bf56572a7035df3bce9ea183556 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Tue, 8 Apr 2025 17:29:06 +0200 Subject: [PATCH 0323/2924] nvmet-fc: inline nvmet_fc_delete_assoc No need for this tiny helper with only one user, just inline it. Signed-off-by: Daniel Wagner Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 7318b736d41417..6487c46ebba8d1 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1075,13 +1075,6 @@ nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) return newhost; } -static void -nvmet_fc_delete_assoc(struct nvmet_fc_tgt_assoc *assoc) -{ - nvmet_fc_delete_target_assoc(assoc); - nvmet_fc_tgt_a_put(assoc); -} - static void nvmet_fc_delete_assoc_work(struct work_struct *work) { @@ -1089,7 +1082,8 @@ nvmet_fc_delete_assoc_work(struct work_struct *work) container_of(work, struct nvmet_fc_tgt_assoc, del_work); struct nvmet_fc_tgtport *tgtport = assoc->tgtport; - nvmet_fc_delete_assoc(assoc); + nvmet_fc_delete_target_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); nvmet_fc_tgtport_put(tgtport); } From 88517565b5929436741012938702385ef885a9c2 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Tue, 8 Apr 2025 17:29:07 +0200 Subject: [PATCH 0324/2924] nvmet-fc: inline nvmet_fc_free_hostport No need for this tiny helper with only one user, let's inline it. And since the hostport ref counter needs to stay in sync, it's not optional anymore to give back the reference. Signed-off-by: Daniel Wagner Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 6487c46ebba8d1..6d64dadcb356b7 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -995,16 +995,6 @@ nvmet_fc_hostport_get(struct nvmet_fc_hostport *hostport) return kref_get_unless_zero(&hostport->ref); } -static void -nvmet_fc_free_hostport(struct nvmet_fc_hostport *hostport) -{ - /* if LLDD not implemented, leave as NULL */ - if (!hostport || !hostport->hosthandle) - return; - - nvmet_fc_hostport_put(hostport); -} - static struct nvmet_fc_hostport * nvmet_fc_match_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) { @@ -1184,7 +1174,7 @@ nvmet_fc_target_assoc_free(struct kref *ref) /* Send Disconnect now that all i/o has completed */ nvmet_fc_xmt_disconnect_assoc(assoc); - nvmet_fc_free_hostport(assoc->hostport); + nvmet_fc_hostport_put(assoc->hostport); spin_lock_irqsave(&tgtport->lock, flags); oldls = assoc->rcv_disconn; spin_unlock_irqrestore(&tgtport->lock, flags); @@ -1449,11 +1439,6 @@ nvmet_fc_free_tgtport(struct kref *ref) struct nvmet_fc_tgtport *tgtport = container_of(ref, struct nvmet_fc_tgtport, ref); struct device *dev = tgtport->dev; - unsigned long flags; - - spin_lock_irqsave(&nvmet_fc_tgtlock, flags); - list_del(&tgtport->tgt_list); - spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); nvmet_fc_free_ls_iodlist(tgtport); @@ -1614,6 +1599,11 @@ int nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port) { struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port); + unsigned long flags; + + spin_lock_irqsave(&nvmet_fc_tgtlock, flags); + list_del(&tgtport->tgt_list); + spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); nvmet_fc_portentry_unbind_tgt(tgtport); From 1a909565733edb1d46c8e9692a1ee278912b5a77 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Tue, 8 Apr 2025 17:29:08 +0200 Subject: [PATCH 0325/2924] nvmet-fc: update tgtport ref per assoc We need to take for each unique association a reference. nvmet_fc_alloc_hostport for each newly created association. Signed-off-by: Daniel Wagner Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 6d64dadcb356b7..42613280c06e82 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1127,6 +1127,7 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) goto out_ida; assoc->tgtport = tgtport; + nvmet_fc_tgtport_get(tgtport); assoc->a_id = idx; INIT_LIST_HEAD(&assoc->a_list); kref_init(&assoc->ref); @@ -1228,6 +1229,8 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) dev_info(tgtport->dev, "{%d:%d} Association deleted\n", tgtport->fc_target_port.port_num, assoc->a_id); + + nvmet_fc_tgtport_put(tgtport); } static struct nvmet_fc_tgt_assoc * From b0b26ad0e1943de25ce82a7e5af3574f31b1cf99 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Tue, 8 Apr 2025 17:29:09 +0200 Subject: [PATCH 0326/2924] nvmet-fc: take tgtport reference only once The reference counting code can be simplified. Instead taking a tgtport refrerence at the beginning of nvmet_fc_alloc_hostport and put it back if not a new hostport object is allocated, only take it when a new hostport object is allocated. Signed-off-by: Daniel Wagner Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 42613280c06e82..61e9eea3bee430 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1018,33 +1018,24 @@ nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) struct nvmet_fc_hostport *newhost, *match = NULL; unsigned long flags; + /* + * Caller holds a reference on tgtport. + */ + /* if LLDD not implemented, leave as NULL */ if (!hosthandle) return NULL; - /* - * take reference for what will be the newly allocated hostport if - * we end up using a new allocation - */ - if (!nvmet_fc_tgtport_get(tgtport)) - return ERR_PTR(-EINVAL); - spin_lock_irqsave(&tgtport->lock, flags); match = nvmet_fc_match_hostport(tgtport, hosthandle); spin_unlock_irqrestore(&tgtport->lock, flags); - if (match) { - /* no new allocation - release reference */ - nvmet_fc_tgtport_put(tgtport); + if (match) return match; - } newhost = kzalloc(sizeof(*newhost), GFP_KERNEL); - if (!newhost) { - /* no new allocation - release reference */ - nvmet_fc_tgtport_put(tgtport); + if (!newhost) return ERR_PTR(-ENOMEM); - } spin_lock_irqsave(&tgtport->lock, flags); match = nvmet_fc_match_hostport(tgtport, hosthandle); @@ -1053,6 +1044,7 @@ nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) kfree(newhost); newhost = match; } else { + nvmet_fc_tgtport_get(tgtport); newhost->tgtport = tgtport; newhost->hosthandle = hosthandle; INIT_LIST_HEAD(&newhost->host_list); From 70289ae5cac4d3a39575405aaf63330486cea030 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Tue, 8 Apr 2025 17:29:10 +0200 Subject: [PATCH 0327/2924] nvmet-fc: put ref when assoc->del_work is already scheduled Do not leak the tgtport reference when the work is already scheduled. Signed-off-by: Daniel Wagner Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 61e9eea3bee430..7b50130f10f657 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1073,7 +1073,8 @@ static void nvmet_fc_schedule_delete_assoc(struct nvmet_fc_tgt_assoc *assoc) { nvmet_fc_tgtport_get(assoc->tgtport); - queue_work(nvmet_wq, &assoc->del_work); + if (!queue_work(nvmet_wq, &assoc->del_work)) + nvmet_fc_tgtport_put(assoc->tgtport); } static bool From 45f5dcdd049719fb999393b30679605f16ebce14 Mon Sep 17 00:00:00 2001 From: Sharath Srinivasan Date: Wed, 26 Mar 2025 14:05:32 -0700 Subject: [PATCH 0328/2924] RDMA/cma: Fix workqueue crash in cma_netevent_work_handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit struct rdma_cm_id has member "struct work_struct net_work" that is reused for enqueuing cma_netevent_work_handler()s onto cma_wq. Below crash[1] can occur if more than one call to cma_netevent_callback() occurs in quick succession, which further enqueues cma_netevent_work_handler()s for the same rdma_cm_id, overwriting any previously queued work-item(s) that was just scheduled to run i.e. there is no guarantee the queued work item may run between two successive calls to cma_netevent_callback() and the 2nd INIT_WORK would overwrite the 1st work item (for the same rdma_cm_id), despite grabbing id_table_lock during enqueue. Also drgn analysis [2] indicates the work item was likely overwritten. Fix this by moving the INIT_WORK() to __rdma_create_id(), so that it doesn't race with any existing queue_work() or its worker thread. [1] Trimmed crash stack: ============================================= BUG: kernel NULL pointer dereference, address: 0000000000000008 kworker/u256:6 ... 6.12.0-0... Workqueue: cma_netevent_work_handler [rdma_cm] (rdma_cm) RIP: 0010:process_one_work+0xba/0x31a Call Trace: worker_thread+0x266/0x3a0 kthread+0xcf/0x100 ret_from_fork+0x31/0x50 ret_from_fork_asm+0x1a/0x30 ============================================= [2] drgn crash analysis: >>> trace = prog.crashed_thread().stack_trace() >>> trace (0) crash_setup_regs (./arch/x86/include/asm/kexec.h:111:15) (1) __crash_kexec (kernel/crash_core.c:122:4) (2) panic (kernel/panic.c:399:3) (3) oops_end (arch/x86/kernel/dumpstack.c:382:3) ... (8) process_one_work (kernel/workqueue.c:3168:2) (9) process_scheduled_works (kernel/workqueue.c:3310:3) (10) worker_thread (kernel/workqueue.c:3391:4) (11) kthread (kernel/kthread.c:389:9) Line workqueue.c:3168 for this kernel version is in process_one_work(): 3168 strscpy(worker->desc, pwq->wq->name, WORKER_DESC_LEN); >>> trace[8]["work"] *(struct work_struct *)0xffff92577d0a21d8 = { .data = (atomic_long_t){ .counter = (s64)536870912, <=== Note }, .entry = (struct list_head){ .next = (struct list_head *)0xffff924d075924c0, .prev = (struct list_head *)0xffff924d075924c0, }, .func = (work_func_t)cma_netevent_work_handler+0x0 = 0xffffffffc2cec280, } Suspicion is that pwq is NULL: >>> trace[8]["pwq"] (struct pool_workqueue *) In process_one_work(), pwq is assigned from: struct pool_workqueue *pwq = get_work_pwq(work); and get_work_pwq() is: static struct pool_workqueue *get_work_pwq(struct work_struct *work) { unsigned long data = atomic_long_read(&work->data); if (data & WORK_STRUCT_PWQ) return work_struct_pwq(data); else return NULL; } WORK_STRUCT_PWQ is 0x4: >>> print(repr(prog['WORK_STRUCT_PWQ'])) Object(prog, 'enum work_flags', value=4) But work->data is 536870912 which is 0x20000000. So, get_work_pwq() returns NULL and we crash in process_one_work(): 3168 strscpy(worker->desc, pwq->wq->name, WORKER_DESC_LEN); ============================================= Fixes: 925d046e7e52 ("RDMA/core: Add a netevent notifier to cma") Cc: stable@vger.kernel.org Co-developed-by: Håkon Bugge Signed-off-by: Håkon Bugge Signed-off-by: Sharath Srinivasan Reviewed-by: Patrisious Haddad Link: https://patch.msgid.link/bf0082f9-5b25-4593-92c6-d130aa8ba439@oracle.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index fedcdb56fb6b7c..ab31eefa916b3e 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -72,6 +72,8 @@ static const char * const cma_events[] = { static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, enum ib_gid_type gid_type); +static void cma_netevent_work_handler(struct work_struct *_work); + const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) { size_t index = event; @@ -1047,6 +1049,7 @@ __rdma_create_id(struct net *net, rdma_cm_event_handler event_handler, get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); id_priv->id.route.addr.dev_addr.net = get_net(net); id_priv->seq_num &= 0x00ffffff; + INIT_WORK(&id_priv->id.net_work, cma_netevent_work_handler); rdma_restrack_new(&id_priv->res, RDMA_RESTRACK_CM_ID); if (parent) @@ -5241,7 +5244,6 @@ static int cma_netevent_callback(struct notifier_block *self, if (!memcmp(current_id->id.route.addr.dev_addr.dst_dev_addr, neigh->ha, ETH_ALEN)) continue; - INIT_WORK(¤t_id->id.net_work, cma_netevent_work_handler); cma_id_get(current_id); queue_work(cma_wq, ¤t_id->id.net_work); } From 8c0cea59d40cf6dd13c2950437631dd614fbade6 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Mon, 7 Apr 2025 13:24:07 -0700 Subject: [PATCH 0329/2924] net_sched: sch_sfq: use a temporary work area for validating configuration Many configuration parameters have influence on others (e.g. divisor -> flows -> limit, depth -> limit) and so it is difficult to correctly do all of the validation before applying the configuration. And if a validation error is detected late it is difficult to roll back a partially applied configuration. To avoid these issues use a temporary work area to update and validate the configuration and only then apply the configuration to the internal state. Signed-off-by: Octavian Purdila Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 56 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 65d5b59da58303..7714ae94e05217 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -631,6 +631,15 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt, struct red_parms *p = NULL; struct sk_buff *to_free = NULL; struct sk_buff *tail = NULL; + unsigned int maxflows; + unsigned int quantum; + unsigned int divisor; + int perturb_period; + u8 headdrop; + u8 maxdepth; + int limit; + u8 flags; + if (opt->nla_len < nla_attr_size(sizeof(*ctl))) return -EINVAL; @@ -656,36 +665,59 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt, NL_SET_ERR_MSG_MOD(extack, "invalid limit"); return -EINVAL; } + sch_tree_lock(sch); + + limit = q->limit; + divisor = q->divisor; + headdrop = q->headdrop; + maxdepth = q->maxdepth; + maxflows = q->maxflows; + perturb_period = q->perturb_period; + quantum = q->quantum; + flags = q->flags; + + /* update and validate configuration */ if (ctl->quantum) - q->quantum = ctl->quantum; - WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ); + quantum = ctl->quantum; + perturb_period = ctl->perturb_period * HZ; if (ctl->flows) - q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); + maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); if (ctl->divisor) { - q->divisor = ctl->divisor; - q->maxflows = min_t(u32, q->maxflows, q->divisor); + divisor = ctl->divisor; + maxflows = min_t(u32, maxflows, divisor); } if (ctl_v1) { if (ctl_v1->depth) - q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH); + maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH); if (p) { - swap(q->red_parms, p); - red_set_parms(q->red_parms, + red_set_parms(p, ctl_v1->qth_min, ctl_v1->qth_max, ctl_v1->Wlog, ctl_v1->Plog, ctl_v1->Scell_log, NULL, ctl_v1->max_P); } - q->flags = ctl_v1->flags; - q->headdrop = ctl_v1->headdrop; + flags = ctl_v1->flags; + headdrop = ctl_v1->headdrop; } if (ctl->limit) { - q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows); - q->maxflows = min_t(u32, q->maxflows, q->limit); + limit = min_t(u32, ctl->limit, maxdepth * maxflows); + maxflows = min_t(u32, maxflows, limit); } + /* commit configuration */ + q->limit = limit; + q->divisor = divisor; + q->headdrop = headdrop; + q->maxdepth = maxdepth; + q->maxflows = maxflows; + WRITE_ONCE(q->perturb_period, perturb_period); + q->quantum = quantum; + q->flags = flags; + if (p) + swap(q->red_parms, p); + qlen = sch->q.qlen; while (sch->q.qlen > q->limit) { dropped += sfq_drop(sch, &to_free); From b3bf8f63e6179076b57c9de660c9f80b5abefe70 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Mon, 7 Apr 2025 13:24:08 -0700 Subject: [PATCH 0330/2924] net_sched: sch_sfq: move the limit validation It is not sufficient to directly validate the limit on the data that the user passes as it can be updated based on how the other parameters are changed. Move the check at the end of the configuration update process to also catch scenarios where the limit is indirectly updated, for example with the following configurations: tc qdisc add dev dummy0 handle 1: root sfq limit 2 flows 1 depth 1 tc qdisc add dev dummy0 handle 1: root sfq limit 2 flows 1 divisor 1 This fixes the following syzkaller reported crash: ------------[ cut here ]------------ UBSAN: array-index-out-of-bounds in net/sched/sch_sfq.c:203:6 index 65535 is out of range for type 'struct sfq_head[128]' CPU: 1 UID: 0 PID: 3037 Comm: syz.2.16 Not tainted 6.14.0-rc2-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 12/27/2024 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x201/0x300 lib/dump_stack.c:120 ubsan_epilogue lib/ubsan.c:231 [inline] __ubsan_handle_out_of_bounds+0xf5/0x120 lib/ubsan.c:429 sfq_link net/sched/sch_sfq.c:203 [inline] sfq_dec+0x53c/0x610 net/sched/sch_sfq.c:231 sfq_dequeue+0x34e/0x8c0 net/sched/sch_sfq.c:493 sfq_reset+0x17/0x60 net/sched/sch_sfq.c:518 qdisc_reset+0x12e/0x600 net/sched/sch_generic.c:1035 tbf_reset+0x41/0x110 net/sched/sch_tbf.c:339 qdisc_reset+0x12e/0x600 net/sched/sch_generic.c:1035 dev_reset_queue+0x100/0x1b0 net/sched/sch_generic.c:1311 netdev_for_each_tx_queue include/linux/netdevice.h:2590 [inline] dev_deactivate_many+0x7e5/0xe70 net/sched/sch_generic.c:1375 Reported-by: syzbot Fixes: 10685681bafc ("net_sched: sch_sfq: don't allow 1 packet limit") Signed-off-by: Octavian Purdila Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 7714ae94e05217..58b42dcf8f2013 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -661,10 +661,6 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt, if (!p) return -ENOMEM; } - if (ctl->limit == 1) { - NL_SET_ERR_MSG_MOD(extack, "invalid limit"); - return -EINVAL; - } sch_tree_lock(sch); @@ -705,6 +701,12 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt, limit = min_t(u32, ctl->limit, maxdepth * maxflows); maxflows = min_t(u32, maxflows, limit); } + if (limit == 1) { + sch_tree_unlock(sch); + kfree(p); + NL_SET_ERR_MSG_MOD(extack, "invalid limit"); + return -EINVAL; + } /* commit configuration */ q->limit = limit; From 26e705184e7a67bdcded69b4b86b583fc81971ce Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Mon, 7 Apr 2025 13:24:09 -0700 Subject: [PATCH 0331/2924] selftests/tc-testing: sfq: check that a derived limit of 1 is rejected Because the limit is updated indirectly when other parameters are updated, there are cases where even though the user requests a limit of 2 it can actually be set to 1. Add the following test cases to check that the kernel rejects them: - limit 2 depth 1 flows 1 - limit 2 depth 1 divisor 1 Signed-off-by: Octavian Purdila Acked-by: Cong Wang Signed-off-by: David S. Miller --- .../tc-testing/tc-tests/qdiscs/sfq.json | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json index 50e8d72781cbd3..28c6ce6da7dbb8 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json @@ -228,5 +228,41 @@ "matchCount": "0", "teardown": [ ] + }, + { + "id": "7f8f", + "name": "Check that a derived limit of 1 is rejected (limit 2 depth 1 flows 1)", + "category": [ + "qdisc", + "sfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq limit 2 depth 1 flows 1", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "sfq", + "matchCount": "0", + "teardown": [] + }, + { + "id": "5168", + "name": "Check that a derived limit of 1 is rejected (limit 2 depth 1 divisor 1)", + "category": [ + "qdisc", + "sfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq limit 2 depth 1 divisor 1", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "sfq", + "matchCount": "0", + "teardown": [] } ] From af76f7d57ee9a3be7b3840595ce3e2bdedd594a7 Mon Sep 17 00:00:00 2001 From: "Naveen N Rao (AMD)" Date: Wed, 9 Apr 2025 13:13:41 +0200 Subject: [PATCH 0332/2924] Documentation/x86: Update the naming of CPU features for /proc/cpuinfo Commit: 78ce84b9e0a5 ("x86/cpufeatures: Flip the /proc/cpuinfo appearance logic") changed how CPU feature names should be specified. Update document to reflect the same. Signed-off-by: Naveen N Rao (AMD) Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20250409111341.GDZ_ZWZS4LckBcirLE@fat_crate.local --- Documentation/arch/x86/cpuinfo.rst | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/Documentation/arch/x86/cpuinfo.rst b/Documentation/arch/x86/cpuinfo.rst index 6ef426a52cdc97..7114f34ba3e629 100644 --- a/Documentation/arch/x86/cpuinfo.rst +++ b/Documentation/arch/x86/cpuinfo.rst @@ -130,14 +130,18 @@ x86_cap/bug_flags[] arrays in kernel/cpu/capflags.c. The names in the resulting x86_cap/bug_flags[] are used to populate /proc/cpuinfo. The naming of flags in the x86_cap/bug_flags[] are as follows: -a: The name of the flag is from the string in X86_FEATURE_ by default. ----------------------------------------------------------------------------- -By default, the flag in /proc/cpuinfo is extracted from the respective -X86_FEATURE_ in cpufeatures.h. For example, the flag "avx2" is from -X86_FEATURE_AVX2. - -b: The naming can be overridden. --------------------------------- +a: Flags do not appear by default in /proc/cpuinfo +-------------------------------------------------- + +Feature flags are omitted by default from /proc/cpuinfo as it does not make +sense for the feature to be exposed to userspace in most cases. For example, +X86_FEATURE_ALWAYS is defined in cpufeatures.h but that flag is an internal +kernel feature used in the alternative runtime patching functionality. So the +flag does not appear in /proc/cpuinfo. + +b: Specify a flag name if absolutely needed +------------------------------------------- + If the comment on the line for the #define X86_FEATURE_* starts with a double-quote character (""), the string inside the double-quote characters will be the name of the flags. For example, the flag "sse4_1" comes from @@ -148,14 +152,6 @@ needed. For instance, /proc/cpuinfo is a userspace interface and must remain constant. If, for some reason, the naming of X86_FEATURE_ changes, one shall override the new naming with the name already used in /proc/cpuinfo. -c: The naming override can be "", which means it will not appear in /proc/cpuinfo. ----------------------------------------------------------------------------------- -The feature shall be omitted from /proc/cpuinfo if it does not make sense for -the feature to be exposed to userspace. For example, X86_FEATURE_ALWAYS is -defined in cpufeatures.h but that flag is an internal kernel feature used -in the alternative runtime patching functionality. So, its name is overridden -with "". Its flag will not appear in /proc/cpuinfo. - Flags are missing when one or more of these happen ================================================== From 254a6d14c9c952e8eae0fafd4fed3778721b948e Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Wed, 9 Apr 2025 13:14:35 +0200 Subject: [PATCH 0333/2924] Documentation/x86: Zap the subsection letters The subsections already have numbering - no need for the letters too. Zap the latter. Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20250409111435.GEZ_ZWmz3_lkP8S9Lb@fat_crate.local --- Documentation/arch/x86/cpuinfo.rst | 51 +++++++++++++++++------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/Documentation/arch/x86/cpuinfo.rst b/Documentation/arch/x86/cpuinfo.rst index 7114f34ba3e629..f80e2a558d2a60 100644 --- a/Documentation/arch/x86/cpuinfo.rst +++ b/Documentation/arch/x86/cpuinfo.rst @@ -79,8 +79,9 @@ feature flags. How are feature flags created? ============================== -a: Feature flags can be derived from the contents of CPUID leaves. ------------------------------------------------------------------- +Feature flags can be derived from the contents of CPUID leaves +-------------------------------------------------------------- + These feature definitions are organized mirroring the layout of CPUID leaves and grouped in words with offsets as mapped in enum cpuid_leafs in cpufeatures.h (see arch/x86/include/asm/cpufeatures.h for details). @@ -89,8 +90,9 @@ cpufeatures.h, and if it is detected at run time, the flags will be displayed accordingly in /proc/cpuinfo. For example, the flag "avx2" comes from X86_FEATURE_AVX2 in cpufeatures.h. -b: Flags can be from scattered CPUID-based features. ----------------------------------------------------- +Flags can be from scattered CPUID-based features +------------------------------------------------ + Hardware features enumerated in sparsely populated CPUID leaves get software-defined values. Still, CPUID needs to be queried to determine if a given feature is present. This is done in init_scattered_cpuid_features(). @@ -104,8 +106,9 @@ has only one feature and would waste 31 bits of space in the x86_capability[] array. Since there is a struct cpuinfo_x86 for each possible CPU, the wasted memory is not trivial. -c: Flags can be created synthetically under certain conditions for hardware features. -------------------------------------------------------------------------------------- +Flags can be created synthetically under certain conditions for hardware features +--------------------------------------------------------------------------------- + Examples of conditions include whether certain features are present in MSR_IA32_CORE_CAPS or specific CPU models are identified. If the needed conditions are met, the features are enabled by the set_cpu_cap or @@ -114,8 +117,8 @@ the feature X86_FEATURE_SPLIT_LOCK_DETECT will be enabled and "split_lock_detect" will be displayed. The flag "ring3mwait" will be displayed only when running on INTEL_XEON_PHI_[KNL|KNM] processors. -d: Flags can represent purely software features. ------------------------------------------------- +Flags can represent purely software features +-------------------------------------------- These flags do not represent hardware features. Instead, they represent a software feature implemented in the kernel. For example, Kernel Page Table Isolation is purely software feature and its feature flag X86_FEATURE_PTI is @@ -130,8 +133,8 @@ x86_cap/bug_flags[] arrays in kernel/cpu/capflags.c. The names in the resulting x86_cap/bug_flags[] are used to populate /proc/cpuinfo. The naming of flags in the x86_cap/bug_flags[] are as follows: -a: Flags do not appear by default in /proc/cpuinfo --------------------------------------------------- +Flags do not appear by default in /proc/cpuinfo +----------------------------------------------- Feature flags are omitted by default from /proc/cpuinfo as it does not make sense for the feature to be exposed to userspace in most cases. For example, @@ -139,8 +142,8 @@ X86_FEATURE_ALWAYS is defined in cpufeatures.h but that flag is an internal kernel feature used in the alternative runtime patching functionality. So the flag does not appear in /proc/cpuinfo. -b: Specify a flag name if absolutely needed -------------------------------------------- +Specify a flag name if absolutely needed +---------------------------------------- If the comment on the line for the #define X86_FEATURE_* starts with a double-quote character (""), the string inside the double-quote characters @@ -155,25 +158,28 @@ shall override the new naming with the name already used in /proc/cpuinfo. Flags are missing when one or more of these happen ================================================== -a: The hardware does not enumerate support for it. --------------------------------------------------- +The hardware does not enumerate support for it +---------------------------------------------- + For example, when a new kernel is running on old hardware or the feature is not enabled by boot firmware. Even if the hardware is new, there might be a problem enabling the feature at run time, the flag will not be displayed. -b: The kernel does not know about the flag. -------------------------------------------- +The kernel does not know about the flag +--------------------------------------- + For example, when an old kernel is running on new hardware. -c: The kernel disabled support for it at compile-time. ------------------------------------------------------- +The kernel disabled support for it at compile-time +-------------------------------------------------- + For example, if 5-level-paging is not enabled when building (i.e., CONFIG_X86_5LEVEL is not selected) the flag "la57" will not show up [#f1]_. Even though the feature will still be detected via CPUID, the kernel disables it by clearing via setup_clear_cpu_cap(X86_FEATURE_LA57). -d: The feature is disabled at boot-time. ----------------------------------------- +The feature is disabled at boot-time +------------------------------------ A feature can be disabled either using a command-line parameter or because it failed to be enabled. The command-line parameter clearcpuid= can be used to disable features using the feature number as defined in @@ -186,8 +192,9 @@ disable specific features. The list of parameters includes, but is not limited to, nofsgsbase, nosgx, noxsave, etc. 5-level paging can also be disabled using "no5lvl". -e: The feature was known to be non-functional. ----------------------------------------------- +The feature was known to be non-functional +------------------------------------------ + The feature was known to be non-functional because a dependency was missing at runtime. For example, AVX flags will not show up if XSAVE feature is disabled since they depend on XSAVE feature. Another example would be broken From e9c7fa025dc6125eb47993515d45da0cd02a263c Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 9 Apr 2025 12:45:20 +0100 Subject: [PATCH 0334/2924] ALSA: hda/cirrus_scodec_test: Don't select dependencies Depend on SND_HDA_CIRRUS_SCODEC and GPIOLIB instead of selecting them. KUNIT_ALL_TESTS should only build tests that have satisfied dependencies and test components that are already being built. It must not cause other stuff to be added to the build. Fixes: 2144833e7b41 ("ALSA: hda: cirrus_scodec: Add KUnit test") Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20250409114520.914079-1-rf@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/Kconfig | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig index fb955a205d50ce..9c427270ff4f49 100644 --- a/sound/pci/hda/Kconfig +++ b/sound/pci/hda/Kconfig @@ -96,9 +96,7 @@ config SND_HDA_CIRRUS_SCODEC config SND_HDA_CIRRUS_SCODEC_KUNIT_TEST tristate "KUnit test for Cirrus side-codec library" if !KUNIT_ALL_TESTS - select SND_HDA_CIRRUS_SCODEC - select GPIOLIB - depends on KUNIT + depends on SND_HDA_CIRRUS_SCODEC && GPIOLIB && KUNIT default KUNIT_ALL_TESTS help This builds KUnit tests for the cirrus side-codec library. From b5458fcabd96ce29adbf7225c1741ecdfff70a91 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 9 Apr 2025 15:09:08 +0800 Subject: [PATCH 0335/2924] ALSA: hda/realtek - Fixed ASUS platform headset Mic issue ASUS platform Headset Mic was disable by default. Assigned verb table for Mic pin will enable it. Fixes: 7ab61d0a9a35 ("ALSA: hda/realtek: Add support for ASUS B3405 and B3605 Laptops using CS35L41 HDA") Fixes: c86dd79a7c33 ("ALSA: hda/realtek: Add support for ASUS B5405 and B5605 Laptops using CS35L41 HDA") Signed-off-by: Kailang Yang Link: https://lore.kernel.org/0fe3421a6850461fb0b7012cb28ef71d@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 79004bc8107bc2..877137cb09aca4 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7969,6 +7969,7 @@ enum { ALC233_FIXUP_MEDION_MTL_SPK, ALC294_FIXUP_BASS_SPEAKER_15, ALC283_FIXUP_DELL_HP_RESUME, + ALC294_FIXUP_ASUS_CS35L41_SPI_2, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -10333,6 +10334,12 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc283_fixup_dell_hp_resume, }, + [ALC294_FIXUP_ASUS_CS35L41_SPI_2] = { + .type = HDA_FIXUP_FUNC, + .v.func = cs35l41_fixup_spi_two, + .chained = true, + .chain_id = ALC294_FIXUP_ASUS_HEADSET_MIC, + }, }; static const struct hda_quirk alc269_fixup_tbl[] = { @@ -10835,7 +10842,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x12a0, "ASUS X441UV", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x12a3, "Asus N7691ZM", ALC269_FIXUP_ASUS_N7601ZM), SND_PCI_QUIRK(0x1043, 0x12af, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x12b4, "ASUS B3405CCA / P3405CCA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x12b4, "ASUS B3405CCA / P3405CCA", ALC294_FIXUP_ASUS_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x12e0, "ASUS X541SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x12f0, "ASUS X541UV", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x1313, "Asus K42JZ", ALC269VB_FIXUP_ASUS_MIC_NO_PRESENCE), @@ -10925,14 +10932,14 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1fb3, "ASUS ROG Flow Z13 GZ302EA", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x3011, "ASUS B5605CVA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), - SND_PCI_QUIRK(0x1043, 0x3061, "ASUS B3405CCA", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x3071, "ASUS B5405CCA", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x30c1, "ASUS B3605CCA / P3605CCA", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x30d1, "ASUS B5405CCA", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x30e1, "ASUS B5605CCA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x3061, "ASUS B3405CCA", ALC294_FIXUP_ASUS_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x3071, "ASUS B5405CCA", ALC294_FIXUP_ASUS_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x30c1, "ASUS B3605CCA / P3605CCA", ALC294_FIXUP_ASUS_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x30d1, "ASUS B5405CCA", ALC294_FIXUP_ASUS_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x30e1, "ASUS B5605CCA", ALC294_FIXUP_ASUS_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x31d0, "ASUS Zen AIO 27 Z272SD_A272SD", ALC274_FIXUP_ASUS_ZEN_AIO_27), - SND_PCI_QUIRK(0x1043, 0x31e1, "ASUS B5605CCA", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x31f1, "ASUS B3605CCA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x31e1, "ASUS B5605CCA", ALC294_FIXUP_ASUS_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x31f1, "ASUS B3605CCA", ALC294_FIXUP_ASUS_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x3a20, "ASUS G614JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x3a30, "ASUS G814JVR/JIR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x3a40, "ASUS G814JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), From cfb32c656eb7bf1d7a776b8793bb6baa6f58b5a4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 8 Apr 2025 12:20:34 +0800 Subject: [PATCH 0336/2924] crypto: scomp - Fix null-pointer deref when freeing streams As the scomp streams are freed when an algorithm is unregistered, it is possible that the algorithm has never been used at all (e.g., an algorithm that does not have a self-test). So test whether the streams exist before freeing them. Reported-by: Sourabh Jain Fixes: 3d72ad46a23a ("crypto: acomp - Move stream management into scomp layer") Signed-off-by: Herbert Xu Tested-by: Sourabh Jain Signed-off-by: Herbert Xu --- crypto/scompress.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crypto/scompress.c b/crypto/scompress.c index d435d4b24469d4..f67ce38d203d84 100644 --- a/crypto/scompress.c +++ b/crypto/scompress.c @@ -111,6 +111,9 @@ static void scomp_free_streams(struct scomp_alg *alg) struct crypto_acomp_stream __percpu *stream = alg->stream; int i; + if (!stream) + return; + for_each_possible_cpu(i) { struct crypto_acomp_stream *ps = per_cpu_ptr(stream, i); From b7b39df7e710b0068356e4c696af07aa10e2cd3d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 8 Apr 2025 13:17:20 +0800 Subject: [PATCH 0337/2924] crypto: caam/qi - Fix drv_ctx refcount bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ensure refcount is raised before request is enqueued since it could be dequeued before the call returns. Reported-by: Sean Anderson Cc: Fixes: 11144416a755 ("crypto: caam/qi - optimize frame queue cleanup") Signed-off-by: Herbert Xu Reviewed-by: Horia Geantă Tested-by: Sean Anderson Signed-off-by: Herbert Xu --- drivers/crypto/caam/qi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c index 7701d00bcb3ac5..b6e7c0b29d4e6c 100644 --- a/drivers/crypto/caam/qi.c +++ b/drivers/crypto/caam/qi.c @@ -122,12 +122,12 @@ int caam_qi_enqueue(struct device *qidev, struct caam_drv_req *req) qm_fd_addr_set64(&fd, addr); do { + refcount_inc(&req->drv_ctx->refcnt); ret = qman_enqueue(req->drv_ctx->req_fq, &fd); - if (likely(!ret)) { - refcount_inc(&req->drv_ctx->refcnt); + if (likely(!ret)) return 0; - } + refcount_dec(&req->drv_ctx->refcnt); if (ret != -EBUSY) break; num_retries++; From 6ee6bd5d4fce502a5b5a2ea805e9ff16e6aa890f Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 9 Apr 2025 09:14:41 +0800 Subject: [PATCH 0338/2924] ublk: fix handling recovery & reissue in ublk_abort_queue() Commit 8284066946e6 ("ublk: grab request reference when the request is handled by userspace") doesn't grab request reference in case of recovery reissue. Then the request can be requeued & re-dispatch & failed when canceling uring command. If it is one zc request, the request can be freed before io_uring returns the zc buffer back, then cause kernel panic: [ 126.773061] BUG: kernel NULL pointer dereference, address: 00000000000000c8 [ 126.773657] #PF: supervisor read access in kernel mode [ 126.774052] #PF: error_code(0x0000) - not-present page [ 126.774455] PGD 0 P4D 0 [ 126.774698] Oops: Oops: 0000 [#1] SMP NOPTI [ 126.775034] CPU: 13 UID: 0 PID: 1612 Comm: kworker/u64:55 Not tainted 6.14.0_blk+ #182 PREEMPT(full) [ 126.775676] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-1.fc39 04/01/2014 [ 126.776275] Workqueue: iou_exit io_ring_exit_work [ 126.776651] RIP: 0010:ublk_io_release+0x14/0x130 [ublk_drv] Fixes it by always grabbing request reference for aborting the request. Reported-by: Caleb Sander Mateos Closes: https://lore.kernel.org/linux-block/CADUfDZodKfOGUeWrnAxcZiLT+puaZX8jDHoj_sfHZCOZwhzz6A@mail.gmail.com/ Fixes: 8284066946e6 ("ublk: grab request reference when the request is handled by userspace") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250409011444.2142010-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 2fd05c1bd30b03..41bed67508f260 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1140,6 +1140,25 @@ static void ublk_complete_rq(struct kref *ref) __ublk_complete_rq(req); } +static void ublk_do_fail_rq(struct request *req) +{ + struct ublk_queue *ubq = req->mq_hctx->driver_data; + + if (ublk_nosrv_should_reissue_outstanding(ubq->dev)) + blk_mq_requeue_request(req, false); + else + __ublk_complete_rq(req); +} + +static void ublk_fail_rq_fn(struct kref *ref) +{ + struct ublk_rq_data *data = container_of(ref, struct ublk_rq_data, + ref); + struct request *req = blk_mq_rq_from_pdu(data); + + ublk_do_fail_rq(req); +} + /* * Since ublk_rq_task_work_cb always fails requests immediately during * exiting, __ublk_fail_req() is only called from abort context during @@ -1153,10 +1172,13 @@ static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io, { WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE); - if (ublk_nosrv_should_reissue_outstanding(ubq->dev)) - blk_mq_requeue_request(req, false); - else - ublk_put_req_ref(ubq, req); + if (ublk_need_req_ref(ubq)) { + struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); + + kref_put(&data->ref, ublk_fail_rq_fn); + } else { + ublk_do_fail_rq(req); + } } static void ubq_complete_io_cmd(struct ublk_io *io, int res, From 18461f2a02be04f8bbbe3b37fecfc702e3fa5bc2 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 9 Apr 2025 09:14:42 +0800 Subject: [PATCH 0339/2924] ublk: don't fail request for recovery & reissue in case of ubq->canceling ubq->canceling is set with request queue quiesced when io_uring context is exiting. USER_RECOVERY or !RECOVERY_FAIL_IO requires request to be re-queued and re-dispatch after device is recovered. However commit d796cea7b9f3 ("ublk: implement ->queue_rqs()") still may fail any request in case of ubq->canceling, this way breaks USER_RECOVERY or !RECOVERY_FAIL_IO. Fix it by calling __ublk_abort_rq() in case of ubq->canceling. Reviewed-by: Uday Shankar Reported-by: Uday Shankar Closes: https://lore.kernel.org/linux-block/Z%2FQkkTRHfRxtN%2FmB@dev-ushankar.dev.purestorage.com/ Fixes: d796cea7b9f3 ("ublk: implement ->queue_rqs()") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250409011444.2142010-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 41bed67508f260..d6ca2f1097ad91 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1371,7 +1371,8 @@ static enum blk_eh_timer_return ublk_timeout(struct request *rq) return BLK_EH_RESET_TIMER; } -static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq) +static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq, + bool check_cancel) { blk_status_t res; @@ -1390,7 +1391,7 @@ static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq) if (ublk_nosrv_should_queue_io(ubq) && unlikely(ubq->force_abort)) return BLK_STS_IOERR; - if (unlikely(ubq->canceling)) + if (check_cancel && unlikely(ubq->canceling)) return BLK_STS_IOERR; /* fill iod to slot in io cmd buffer */ @@ -1409,7 +1410,7 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq = bd->rq; blk_status_t res; - res = ublk_prep_req(ubq, rq); + res = ublk_prep_req(ubq, rq, false); if (res != BLK_STS_OK) return res; @@ -1441,7 +1442,7 @@ static void ublk_queue_rqs(struct rq_list *rqlist) ublk_queue_cmd_list(ubq, &submit_list); ubq = this_q; - if (ublk_prep_req(ubq, req) == BLK_STS_OK) + if (ublk_prep_req(ubq, req, true) == BLK_STS_OK) rq_list_add_tail(&submit_list, req); else rq_list_add_tail(&requeue_list, req); From 7bd47be16108e55e6bc85bdd3cae5c9a2bc98a89 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 7 Apr 2025 10:21:26 +0300 Subject: [PATCH 0340/2924] dm table: Fix W=1 build warning when mempool_needs_integrity is unused The mempool_needs_integrity is unused. This, in particular, prevents kernel builds with Clang, `make W=1` and CONFIG_WERROR=y: drivers/md/dm-table.c:1052:7: error: variable 'mempool_needs_integrity' set but not used [-Werror,-Wunused-but-set-variable] 1052 | bool mempool_needs_integrity = t->integrity_supported; | ^ Fix this by removing the leftover. Fixes: 105ca2a2c2ff ("block: split struct bio_integrity_payload") Signed-off-by: Andy Shevchenko Signed-off-by: Mikulas Patocka --- drivers/md/dm-table.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 35100a435c88ba..53759dbbe9d605 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1049,7 +1049,6 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device * unsigned int min_pool_size = 0, pool_size; struct dm_md_mempools *pools; unsigned int bioset_flags = 0; - bool mempool_needs_integrity = t->integrity_supported; if (unlikely(type == DM_TYPE_NONE)) { DMERR("no table type is set, can't allocate mempools"); @@ -1074,8 +1073,6 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device * per_io_data_size = max(per_io_data_size, ti->per_io_data_size); min_pool_size = max(min_pool_size, ti->num_flush_bios); - - mempool_needs_integrity |= ti->mempool_needs_integrity; } pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size); front_pad = roundup(per_io_data_size, From 843c6cec1af85f05971b7baf3704801895e77d76 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Tue, 8 Apr 2025 19:29:26 -0600 Subject: [PATCH 0341/2924] ublk: pass ublksrv_ctrl_cmd * instead of io_uring_cmd * The ublk_ctrl_*() handlers all take struct io_uring_cmd *cmd but only use it to get struct ublksrv_ctrl_cmd *header from the io_uring SQE. Since the caller ublk_ctrl_uring_cmd() has already computed header, pass it instead of cmd. Signed-off-by: Caleb Sander Mateos Link: https://lore.kernel.org/r/20250409012928.3527198-1-csander@purestorage.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 46 +++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index d6ca2f1097ad91..cdb1543fa4a981 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -2436,9 +2436,9 @@ static struct ublk_device *ublk_get_device_from_id(int idx) return ub; } -static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) +static int ublk_ctrl_start_dev(struct ublk_device *ub, + const struct ublksrv_ctrl_cmd *header) { - const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); const struct ublk_param_basic *p = &ub->params.basic; int ublksrv_pid = (int)header->data[0]; struct queue_limits lim = { @@ -2557,9 +2557,8 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) } static int ublk_ctrl_get_queue_affinity(struct ublk_device *ub, - struct io_uring_cmd *cmd) + const struct ublksrv_ctrl_cmd *header) { - const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); void __user *argp = (void __user *)(unsigned long)header->addr; cpumask_var_t cpumask; unsigned long queue; @@ -2608,9 +2607,8 @@ static inline void ublk_dump_dev_info(struct ublksrv_ctrl_dev_info *info) info->nr_hw_queues, info->queue_depth); } -static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) +static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header) { - const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); void __user *argp = (void __user *)(unsigned long)header->addr; struct ublksrv_ctrl_dev_info info; struct ublk_device *ub; @@ -2835,9 +2833,8 @@ static int ublk_ctrl_stop_dev(struct ublk_device *ub) } static int ublk_ctrl_get_dev_info(struct ublk_device *ub, - struct io_uring_cmd *cmd) + const struct ublksrv_ctrl_cmd *header) { - const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); void __user *argp = (void __user *)(unsigned long)header->addr; if (header->len < sizeof(struct ublksrv_ctrl_dev_info) || !header->addr) @@ -2866,9 +2863,8 @@ static void ublk_ctrl_fill_params_devt(struct ublk_device *ub) } static int ublk_ctrl_get_params(struct ublk_device *ub, - struct io_uring_cmd *cmd) + const struct ublksrv_ctrl_cmd *header) { - const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); void __user *argp = (void __user *)(unsigned long)header->addr; struct ublk_params_header ph; int ret; @@ -2897,9 +2893,8 @@ static int ublk_ctrl_get_params(struct ublk_device *ub, } static int ublk_ctrl_set_params(struct ublk_device *ub, - struct io_uring_cmd *cmd) + const struct ublksrv_ctrl_cmd *header) { - const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); void __user *argp = (void __user *)(unsigned long)header->addr; struct ublk_params_header ph; int ret = -EFAULT; @@ -2963,9 +2958,8 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq) } static int ublk_ctrl_start_recovery(struct ublk_device *ub, - struct io_uring_cmd *cmd) + const struct ublksrv_ctrl_cmd *header) { - const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); int ret = -EINVAL; int i; @@ -3011,9 +3005,8 @@ static int ublk_ctrl_start_recovery(struct ublk_device *ub, } static int ublk_ctrl_end_recovery(struct ublk_device *ub, - struct io_uring_cmd *cmd) + const struct ublksrv_ctrl_cmd *header) { - const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); int ublksrv_pid = (int)header->data[0]; int ret = -EINVAL; int i; @@ -3060,9 +3053,8 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub, return ret; } -static int ublk_ctrl_get_features(struct io_uring_cmd *cmd) +static int ublk_ctrl_get_features(const struct ublksrv_ctrl_cmd *header) { - const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); void __user *argp = (void __user *)(unsigned long)header->addr; u64 features = UBLK_F_ALL; @@ -3201,7 +3193,7 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, goto out; if (cmd_op == UBLK_U_CMD_GET_FEATURES) { - ret = ublk_ctrl_get_features(cmd); + ret = ublk_ctrl_get_features(header); goto out; } @@ -3218,17 +3210,17 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, switch (_IOC_NR(cmd_op)) { case UBLK_CMD_START_DEV: - ret = ublk_ctrl_start_dev(ub, cmd); + ret = ublk_ctrl_start_dev(ub, header); break; case UBLK_CMD_STOP_DEV: ret = ublk_ctrl_stop_dev(ub); break; case UBLK_CMD_GET_DEV_INFO: case UBLK_CMD_GET_DEV_INFO2: - ret = ublk_ctrl_get_dev_info(ub, cmd); + ret = ublk_ctrl_get_dev_info(ub, header); break; case UBLK_CMD_ADD_DEV: - ret = ublk_ctrl_add_dev(cmd); + ret = ublk_ctrl_add_dev(header); break; case UBLK_CMD_DEL_DEV: ret = ublk_ctrl_del_dev(&ub, true); @@ -3237,19 +3229,19 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, ret = ublk_ctrl_del_dev(&ub, false); break; case UBLK_CMD_GET_QUEUE_AFFINITY: - ret = ublk_ctrl_get_queue_affinity(ub, cmd); + ret = ublk_ctrl_get_queue_affinity(ub, header); break; case UBLK_CMD_GET_PARAMS: - ret = ublk_ctrl_get_params(ub, cmd); + ret = ublk_ctrl_get_params(ub, header); break; case UBLK_CMD_SET_PARAMS: - ret = ublk_ctrl_set_params(ub, cmd); + ret = ublk_ctrl_set_params(ub, header); break; case UBLK_CMD_START_USER_RECOVERY: - ret = ublk_ctrl_start_recovery(ub, cmd); + ret = ublk_ctrl_start_recovery(ub, header); break; case UBLK_CMD_END_USER_RECOVERY: - ret = ublk_ctrl_end_recovery(ub, cmd); + ret = ublk_ctrl_end_recovery(ub, header); break; default: ret = -EOPNOTSUPP; From 5fc7d2b5cab47f2ac712f689140b1fed978fb91c Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 9 Apr 2025 13:07:17 +0100 Subject: [PATCH 0342/2924] ASoC: cs42l43: Reset clamp override on jack removal Some of the manually selected jack configurations will disable the headphone clamp override. Restore this on jack removal, such that the state is consistent for a new insert. Fixes: fc918cbe874e ("ASoC: cs42l43: Add support for the cs42l43") Signed-off-by: Charles Keepax Link: https://patch.msgid.link/20250409120717.1294528-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l43-jack.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/cs42l43-jack.c b/sound/soc/codecs/cs42l43-jack.c index ac19a572fe70cb..20e6ab6f0d4ad7 100644 --- a/sound/soc/codecs/cs42l43-jack.c +++ b/sound/soc/codecs/cs42l43-jack.c @@ -702,6 +702,9 @@ static void cs42l43_clear_jack(struct cs42l43_codec *priv) CS42L43_PGA_WIDESWING_MODE_EN_MASK, 0); regmap_update_bits(cs42l43->regmap, CS42L43_STEREO_MIC_CTRL, CS42L43_JACK_STEREO_CONFIG_MASK, 0); + regmap_update_bits(cs42l43->regmap, CS42L43_STEREO_MIC_CLAMP_CTRL, + CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_MASK, + CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_MASK); regmap_update_bits(cs42l43->regmap, CS42L43_HS2, CS42L43_HSDET_MODE_MASK | CS42L43_HSDET_MANUAL_MODE_MASK, 0x2 << CS42L43_HSDET_MODE_SHIFT); From 7f991dd3641ef52e0a25b43f27cc61440c4bdcb4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Mar 2025 10:26:25 -0400 Subject: [PATCH 0343/2924] drm/amdgpu/pm: add workload profile pause helper To be used for display idle optimizations when we want to pause non-default profiles. Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher (cherry picked from commit 6dafb5d4c7cdfc8f994e789d050e29e0d5ca6efd) --- .../gpu/drm/amd/include/kgd_pp_interface.h | 1 + drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 19 +++++++++++++++++++ drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 2 ++ 3 files changed, 22 insertions(+) diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 2a9606118d8994..21dc956b5f35d4 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -429,6 +429,7 @@ struct amd_pm_funcs { int (*set_pp_table)(void *handle, const char *buf, size_t size); void (*debugfs_print_current_performance_level)(void *handle, struct seq_file *m); int (*switch_power_profile)(void *handle, enum PP_SMC_POWER_PROFILE type, bool en); + int (*pause_power_profile)(void *handle, bool pause); /* export to amdgpu */ struct amd_vce_state *(*get_vce_clock_state)(void *handle, u32 idx); int (*dispatch_tasks)(void *handle, enum amd_pp_task task_id, diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 81e9b443ca0adc..3533d43ed1e73d 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -349,6 +349,25 @@ int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev, return ret; } +int amdgpu_dpm_pause_power_profile(struct amdgpu_device *adev, + bool pause) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + int ret = 0; + + if (amdgpu_sriov_vf(adev)) + return 0; + + if (pp_funcs && pp_funcs->pause_power_profile) { + mutex_lock(&adev->pm.mutex); + ret = pp_funcs->pause_power_profile( + adev->powerplay.pp_handle, pause); + mutex_unlock(&adev->pm.mutex); + } + + return ret; +} + int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev, uint32_t pstate) { diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index f93d287dbf1376..4c0f7ad1481661 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -410,6 +410,8 @@ int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev, int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev, enum PP_SMC_POWER_PROFILE type, bool en); +int amdgpu_dpm_pause_power_profile(struct amdgpu_device *adev, + bool pause); int amdgpu_dpm_baco_reset(struct amdgpu_device *adev); From c81a3ceedb1c2ee623bf594594fa3a228ac04a8c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Mar 2025 10:54:56 -0400 Subject: [PATCH 0344/2924] drm/amdgpu/pm/swsmu: implement pause workload profile Add the callback for implementation for swsmu. Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher (cherry picked from commit 92e511d1cecc6a8fa7bdfc8657f16ece9ab4d456) --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 36 ++++++++++++++++++- drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 1 + 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 033c3229b555f0..46cce1d2aaf357 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2398,7 +2398,11 @@ static int smu_switch_power_profile(void *handle, smu_power_profile_mode_get(smu, type); else smu_power_profile_mode_put(smu, type); - ret = smu_bump_power_profile_mode(smu, NULL, 0); + /* don't switch the active workload when paused */ + if (smu->pause_workload) + ret = 0; + else + ret = smu_bump_power_profile_mode(smu, NULL, 0); if (ret) { if (enable) smu_power_profile_mode_put(smu, type); @@ -2411,6 +2415,35 @@ static int smu_switch_power_profile(void *handle, return 0; } +static int smu_pause_power_profile(void *handle, + bool pause) +{ + struct smu_context *smu = handle; + struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); + u32 workload_mask = 1 << PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; + int ret; + + if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) + return -EOPNOTSUPP; + + if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL && + smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) { + smu->pause_workload = pause; + + /* force to bootup default profile */ + if (smu->pause_workload && smu->ppt_funcs->set_power_profile_mode) + ret = smu->ppt_funcs->set_power_profile_mode(smu, + workload_mask, + NULL, + 0); + else + ret = smu_bump_power_profile_mode(smu, NULL, 0); + return ret; + } + + return 0; +} + static enum amd_dpm_forced_level smu_get_performance_level(void *handle) { struct smu_context *smu = handle; @@ -3733,6 +3766,7 @@ static const struct amd_pm_funcs swsmu_pm_funcs = { .get_pp_table = smu_sys_get_pp_table, .set_pp_table = smu_sys_set_pp_table, .switch_power_profile = smu_switch_power_profile, + .pause_power_profile = smu_pause_power_profile, /* export to amdgpu */ .dispatch_tasks = smu_handle_dpm_task, .load_firmware = smu_load_microcode, diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 3ba169639f5460..dd6d0e7aa2425d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -558,6 +558,7 @@ struct smu_context { /* asic agnostic workload mask */ uint32_t workload_mask; + bool pause_workload; /* default/user workload preference */ uint32_t power_profile_mode; uint32_t workload_refcount[PP_SMC_POWER_PROFILE_COUNT]; From 50f29ead1f1ba48983b6c5e3813b15e497714f55 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Fri, 28 Mar 2025 10:34:57 +0800 Subject: [PATCH 0345/2924] drm/amd/display: pause the workload setting in dm Pause the workload setting in dm when doing idle optimization Reviewed-by: Alex Deucher Signed-off-by: Kenneth Feng Signed-off-by: Alex Deucher (cherry picked from commit b23f81c442ac33af0c808b4bb26333b881669bb7) --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 87058271b00cc4..e8bdd7f0c46079 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -246,6 +246,8 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) struct vblank_control_work *vblank_work = container_of(work, struct vblank_control_work, work); struct amdgpu_display_manager *dm = vblank_work->dm; + struct amdgpu_device *adev = drm_to_adev(dm->ddev); + int r; mutex_lock(&dm->dc_lock); @@ -273,8 +275,15 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) vblank_work->acrtc->dm_irq_params.allow_sr_entry); } - if (dm->active_vblank_irq_count == 0) + if (dm->active_vblank_irq_count == 0) { + r = amdgpu_dpm_pause_power_profile(adev, true); + if (r) + dev_warn(adev->dev, "failed to set default power profile mode\n"); dc_allow_idle_optimizations(dm->dc, true); + r = amdgpu_dpm_pause_power_profile(adev, false); + if (r) + dev_warn(adev->dev, "failed to restore the power profile mode\n"); + } mutex_unlock(&dm->dc_lock); From 35a5440832b2d6a46841b6bb68855e2622833401 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sun, 6 Apr 2025 17:27:24 -0400 Subject: [PATCH 0346/2924] drm/amdgpu: cancel gfx idle work in device suspend for s0ix This is normally handled in the gfx IP suspend callbacks, but for S0ix, those are skipped because we don't want to touch gfx. So handle it in device suspend. Fixes: b9467983b774 ("drm/amdgpu: add dynamic workload profile switching for gfx10") Fixes: 963537ca2325 ("drm/amdgpu: add dynamic workload profile switching for gfx11") Fixes: 5f95a1549555 ("drm/amdgpu: add dynamic workload profile switching for gfx12") Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit 906ad451675155380c1dc1881a244ebde8e8df0a) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 6ebf6179064b72..5a625bdb1a8804 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3643,6 +3643,13 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) adev, adev->ip_blocks[i].version->type)) continue; + /* Since we skip suspend for S0i3, we need to cancel the delayed + * idle work here as the suspend callback never gets called. + */ + if (adev->in_s0ix && + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX && + amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0)) + cancel_delayed_work_sync(&adev->gfx.idle_work); /* skip suspend of gfx/mes and psp for S0ix * gfx is in gfxoff state, so on resume it will exit gfxoff just * like at runtime. PSP is also part of the always on hardware From 7ba88b5cccc1a99c1afb96e31e7eedac9907704c Mon Sep 17 00:00:00 2001 From: Denis Arefev Date: Thu, 20 Mar 2025 12:35:02 +0300 Subject: [PATCH 0347/2924] drm/amd/pm/smu11: Prevent division by zero The user can set any speed value. If speed is greater than UINT_MAX/8, division by zero is possible. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 1e866f1fe528 ("drm/amd/pm: Prevent divide by zero") Signed-off-by: Denis Arefev Signed-off-by: Alex Deucher (cherry picked from commit da7dc714a8f8e1c9fc33c57cd63583779a3bef71) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 78391d8f35a9cb..25fabf336a6401 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -1204,7 +1204,7 @@ int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu, uint32_t crystal_clock_freq = 2500; uint32_t tach_period; - if (speed == 0) + if (!speed || speed > UINT_MAX/8) return -EINVAL; /* * To prevent from possible overheat, some ASICs may have requirement From 34779e14461cf715238dec5fd43a1e11977ec115 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 27 Mar 2025 17:46:59 -0400 Subject: [PATCH 0348/2924] drm/amdgpu/mes12: optimize MES pipe FW version fetching Don't fetch it again if we already have it. It seems the registers don't reliably have the value at resume in some cases. Fixes: 785f0f9fe742 ("drm/amdgpu: Add mes v12_0 ip block support (v4)") Reviewed-by: Shaoyun.liu Signed-off-by: Alex Deucher (cherry picked from commit 9e7b08d239c2f21e8f417854f81e5ff40edbebff) Cc: stable@vger.kernel.org # 6.12.x --- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 183dd3346da576..e6ab617b9a4041 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -1392,17 +1392,20 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, mes_v12_0_queue_init_register(ring); } - /* get MES scheduler/KIQ versions */ - mutex_lock(&adev->srbm_mutex); - soc21_grbm_select(adev, 3, pipe, 0, 0); + if (((pipe == AMDGPU_MES_SCHED_PIPE) && !adev->mes.sched_version) || + ((pipe == AMDGPU_MES_KIQ_PIPE) && !adev->mes.kiq_version)) { + /* get MES scheduler/KIQ versions */ + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, 3, pipe, 0, 0); - if (pipe == AMDGPU_MES_SCHED_PIPE) - adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); - else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) - adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); + if (pipe == AMDGPU_MES_SCHED_PIPE) + adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); + else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) + adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); - soc21_grbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } return 0; } From 1595f15391b81815e4ef91c339991913d556c1b6 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Tue, 8 Apr 2025 20:23:50 +0800 Subject: [PATCH 0349/2924] erofs: set error to bio if file-backed IO fails If a file-backed IO fails before submitting the bio to the lower filesystem, an error is returned, but the bio->bi_status is not marked as an error. However, the error information should be passed to the end_io handler. Otherwise, the IO request will be treated as successful. Fixes: 283213718f5d ("erofs: support compressed inodes for fileio") Signed-off-by: Sheng Yong Reviewed-by: Gao Xiang Link: https://lore.kernel.org/r/20250408122351.2104507-1-shengyong1@xiaomi.com Signed-off-by: Gao Xiang --- fs/erofs/fileio.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c index bec4b56b382651..4fa0a0121288bd 100644 --- a/fs/erofs/fileio.c +++ b/fs/erofs/fileio.c @@ -32,6 +32,8 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret) ret = 0; } if (rq->bio.bi_end_io) { + if (ret < 0 && !rq->bio.bi_status) + rq->bio.bi_status = errno_to_blk_status(ret); rq->bio.bi_end_io(&rq->bio); } else { bio_for_each_folio_all(fi, &rq->bio) { From d385f15d5ba0b4f62575eea09912268bf8136a56 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Tue, 8 Apr 2025 19:44:47 +0800 Subject: [PATCH 0350/2924] erofs: add __packed annotation to union(__le16..) I'm unsure why they aren't 2 bytes in size only in arm-linux-gnueabi. Reported-by: kernel test robot Closes: https://lore.kernel.org/r/202504051202.DS7QIknJ-lkp@intel.com Fixes: 61ba89b57905 ("erofs: add 48-bit block addressing on-disk support") Fixes: efb2aef569b3 ("erofs: add encoded extent on-disk definition") Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20250408114448.4040220-1-hsiangkao@linux.alibaba.com --- fs/erofs/erofs_fs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h index 9581e9bf8192dc..767fb4acdc93a8 100644 --- a/fs/erofs/erofs_fs.h +++ b/fs/erofs/erofs_fs.h @@ -56,7 +56,7 @@ struct erofs_super_block { union { __le16 rootnid_2b; /* nid of root directory */ __le16 blocks_hi; /* (48BIT on) blocks count MSB */ - } rb; + } __packed rb; __le64 inos; /* total valid ino # (== f_files - f_favail) */ __le64 epoch; /* base seconds used for compact inodes */ __le32 fixed_nsec; /* fixed nanoseconds for compact inodes */ @@ -148,7 +148,7 @@ union erofs_inode_i_nb { __le16 nlink; /* if EROFS_I_NLINK_1_BIT is unset */ __le16 blocks_hi; /* total blocks count MSB */ __le16 startblk_hi; /* starting block number MSB */ -}; +} __packed; /* 32-byte reduced form of an ondisk inode */ struct erofs_inode_compact { @@ -369,9 +369,9 @@ struct z_erofs_map_header { * bit 7 : pack the whole file into packed inode */ __u8 h_clusterbits; - }; + } __packed; __le16 h_extents_hi; /* extent count MSB */ - }; + } __packed; }; enum { From be45319c9fb1d5c272da9fd34854a7d39e7f58d1 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Tue, 8 Apr 2025 19:44:48 +0800 Subject: [PATCH 0351/2924] erofs: fix encoded extents handling - The MSB 32 bits of `z_fragmentoff` are available only in extent records of size >= 8B. - Use round_down() to calculate `lstart` as well as increase `pos` correspondingly for extent records of size == 8B. Fixes: 1d191b4ca51d ("erofs: implement encoded extent metadata") Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20250408114448.4040220-2-hsiangkao@linux.alibaba.com --- fs/erofs/zmap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 8de50df05dfe1b..14ea47f954f552 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -559,7 +559,8 @@ static int z_erofs_map_blocks_ext(struct inode *inode, pos += sizeof(__le64); lstart = 0; } else { - lstart = map->m_la >> vi->z_lclusterbits; + lstart = round_down(map->m_la, 1 << vi->z_lclusterbits); + pos += (lstart >> vi->z_lclusterbits) * recsz; pa = EROFS_NULL_ADDR; } @@ -614,7 +615,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode, if (last && (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) { map->m_flags |= EROFS_MAP_MAPPED | EROFS_MAP_FRAGMENT; vi->z_fragmentoff = map->m_plen; - if (recsz >= offsetof(struct z_erofs_extent, pstart_lo)) + if (recsz > offsetof(struct z_erofs_extent, pstart_lo)) vi->z_fragmentoff |= map->m_pa << 32; } else if (map->m_plen) { map->m_flags |= EROFS_MAP_MAPPED | From f0df00ebc57f803603f2a2e0df197e51f06fbe90 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 9 Apr 2025 06:58:37 -0700 Subject: [PATCH 0352/2924] x86/cpu: Avoid running off the end of an AMD erratum table The NULL array terminator at the end of erratum_1386_microcode was removed during the switch from x86_cpu_desc to x86_cpu_id. This causes readers to run off the end of the array. Replace the NULL. Fixes: f3f325152673 ("x86/cpu: Move AMD erratum 1386 table over to 'x86_cpu_id'") Reported-by: Jiri Slaby Signed-off-by: Dave Hansen --- arch/x86/kernel/cpu/amd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 79569f72b8ee50..a839ff506f4548 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -805,6 +805,7 @@ static void init_amd_bd(struct cpuinfo_x86 *c) static const struct x86_cpu_id erratum_1386_microcode[] = { X86_MATCH_VFM_STEPS(VFM_MAKE(X86_VENDOR_AMD, 0x17, 0x01), 0x2, 0x2, 0x0800126e), X86_MATCH_VFM_STEPS(VFM_MAKE(X86_VENDOR_AMD, 0x17, 0x31), 0x0, 0x0, 0x08301052), + {} }; static void fix_erratum_1386(struct cpuinfo_x86 *c) From 9502dd5c7029902f4a425bf959917a5a9e7c0e50 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 9 Apr 2025 11:14:21 -0300 Subject: [PATCH 0353/2924] smb: client: fix UAF in decryption with multichannel After commit f7025d861694 ("smb: client: allocate crypto only for primary server") and commit b0abcd65ec54 ("smb: client: fix UAF in async decryption"), the channels started reusing AEAD TFM from primary channel to perform synchronous decryption, but that can't done as there could be multiple cifsd threads (one per channel) simultaneously accessing it to perform decryption. This fixes the following KASAN splat when running fstest generic/249 with 'vers=3.1.1,multichannel,max_channels=4,seal' against Windows Server 2022: BUG: KASAN: slab-use-after-free in gf128mul_4k_lle+0xba/0x110 Read of size 8 at addr ffff8881046c18a0 by task cifsd/986 CPU: 3 UID: 0 PID: 986 Comm: cifsd Not tainted 6.15.0-rc1 #1 PREEMPT(voluntary) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-3.fc41 04/01/2014 Call Trace: dump_stack_lvl+0x5d/0x80 print_report+0x156/0x528 ? gf128mul_4k_lle+0xba/0x110 ? __virt_addr_valid+0x145/0x300 ? __phys_addr+0x46/0x90 ? gf128mul_4k_lle+0xba/0x110 kasan_report+0xdf/0x1a0 ? gf128mul_4k_lle+0xba/0x110 gf128mul_4k_lle+0xba/0x110 ghash_update+0x189/0x210 shash_ahash_update+0x295/0x370 ? __pfx_shash_ahash_update+0x10/0x10 ? __pfx_shash_ahash_update+0x10/0x10 ? __pfx_extract_iter_to_sg+0x10/0x10 ? ___kmalloc_large_node+0x10e/0x180 ? __asan_memset+0x23/0x50 crypto_ahash_update+0x3c/0xc0 gcm_hash_assoc_remain_continue+0x93/0xc0 crypt_message+0xe09/0xec0 [cifs] ? __pfx_crypt_message+0x10/0x10 [cifs] ? _raw_spin_unlock+0x23/0x40 ? __pfx_cifs_readv_from_socket+0x10/0x10 [cifs] decrypt_raw_data+0x229/0x380 [cifs] ? __pfx_decrypt_raw_data+0x10/0x10 [cifs] ? __pfx_cifs_read_iter_from_socket+0x10/0x10 [cifs] smb3_receive_transform+0x837/0xc80 [cifs] ? __pfx_smb3_receive_transform+0x10/0x10 [cifs] ? __pfx___might_resched+0x10/0x10 ? __pfx_smb3_is_transform_hdr+0x10/0x10 [cifs] cifs_demultiplex_thread+0x692/0x1570 [cifs] ? __pfx_cifs_demultiplex_thread+0x10/0x10 [cifs] ? rcu_is_watching+0x20/0x50 ? rcu_lockdep_current_cpu_online+0x62/0xb0 ? find_held_lock+0x32/0x90 ? kvm_sched_clock_read+0x11/0x20 ? local_clock_noinstr+0xd/0xd0 ? trace_irq_enable.constprop.0+0xa8/0xe0 ? __pfx_cifs_demultiplex_thread+0x10/0x10 [cifs] kthread+0x1fe/0x380 ? kthread+0x10f/0x380 ? __pfx_kthread+0x10/0x10 ? local_clock_noinstr+0xd/0xd0 ? ret_from_fork+0x1b/0x60 ? local_clock+0x15/0x30 ? lock_release+0x29b/0x390 ? rcu_is_watching+0x20/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x31/0x60 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 Tested-by: David Howells Reported-by: Steve French Closes: https://lore.kernel.org/r/CAH2r5mu6Yc0-RJXM3kFyBYUB09XmXBrNodOiCVR4EDrmxq5Szg@mail.gmail.com Fixes: f7025d861694 ("smb: client: allocate crypto only for primary server") Fixes: b0abcd65ec54 ("smb: client: fix UAF in async decryption") Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/cifsencrypt.c | 16 +++++----------- fs/smb/client/smb2ops.c | 6 +++--- fs/smb/client/smb2pdu.c | 11 ++--------- 3 files changed, 10 insertions(+), 23 deletions(-) diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c index e69968e88fe724..35892df7335c75 100644 --- a/fs/smb/client/cifsencrypt.c +++ b/fs/smb/client/cifsencrypt.c @@ -704,18 +704,12 @@ cifs_crypto_secmech_release(struct TCP_Server_Info *server) cifs_free_hash(&server->secmech.md5); cifs_free_hash(&server->secmech.sha512); - if (!SERVER_IS_CHAN(server)) { - if (server->secmech.enc) { - crypto_free_aead(server->secmech.enc); - server->secmech.enc = NULL; - } - - if (server->secmech.dec) { - crypto_free_aead(server->secmech.dec); - server->secmech.dec = NULL; - } - } else { + if (server->secmech.enc) { + crypto_free_aead(server->secmech.enc); server->secmech.enc = NULL; + } + if (server->secmech.dec) { + crypto_free_aead(server->secmech.dec); server->secmech.dec = NULL; } } diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 41d8cd20b25f88..3f7fe74688a922 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -4555,9 +4555,9 @@ decrypt_raw_data(struct TCP_Server_Info *server, char *buf, return rc; } } else { - if (unlikely(!server->secmech.dec)) - return -EIO; - + rc = smb3_crypto_aead_allocate(server); + if (unlikely(rc)) + return rc; tfm = server->secmech.dec; } diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 81e05db8e4d5a1..c4d52bebd37d30 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -1252,15 +1252,8 @@ SMB2_negotiate(const unsigned int xid, cifs_server_dbg(VFS, "Missing expected negotiate contexts\n"); } - if (server->cipher_type && !rc) { - if (!SERVER_IS_CHAN(server)) { - rc = smb3_crypto_aead_allocate(server); - } else { - /* For channels, just reuse the primary server crypto secmech. */ - server->secmech.enc = server->primary_server->secmech.enc; - server->secmech.dec = server->primary_server->secmech.dec; - } - } + if (server->cipher_type && !rc) + rc = smb3_crypto_aead_allocate(server); neg_exit: free_rsp_buf(resp_buftype, rsp); return rc; From d7b98ae5221007d3f202746903d4c21c7caf7ea9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 9 Apr 2025 17:15:42 +0200 Subject: [PATCH 0354/2924] dma/contiguous: avoid warning about unused size_bytes When building with W=1, this variable is unused for configs with CONFIG_CMA_SIZE_SEL_PERCENTAGE=y: kernel/dma/contiguous.c:67:26: error: 'size_bytes' defined but not used [-Werror=unused-const-variable=] Change this to a macro to avoid the warning. Fixes: c64be2bb1c6e ("drivers: add Contiguous Memory Allocator") Signed-off-by: Arnd Bergmann Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20250409151557.3890443-1-arnd@kernel.org --- kernel/dma/contiguous.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 055da410ac71d6..8df0dfaaca18ee 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -64,8 +64,7 @@ struct cma *dma_contiguous_default_area; * Users, who want to set the size of global CMA area for their system * should use cma= kernel parameter. */ -static const phys_addr_t size_bytes __initconst = - (phys_addr_t)CMA_SIZE_MBYTES * SZ_1M; +#define size_bytes ((phys_addr_t)CMA_SIZE_MBYTES * SZ_1M) static phys_addr_t size_cmdline __initdata = -1; static phys_addr_t base_cmdline __initdata; static phys_addr_t limit_cmdline __initdata; From e1a453a57bc76be678bd746f84e3d73f378a9511 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 7 Apr 2025 15:41:39 -0400 Subject: [PATCH 0355/2924] tracing: Do not add length to print format in synthetic events The following causes a vsnprintf fault: # echo 's:wake_lat char[] wakee; u64 delta;' >> /sys/kernel/tracing/dynamic_events # echo 'hist:keys=pid:ts=common_timestamp.usecs if !(common_flags & 0x18)' > /sys/kernel/tracing/events/sched/sched_waking/trigger # echo 'hist:keys=next_pid:delta=common_timestamp.usecs-$ts:onmatch(sched.sched_waking).trace(wake_lat,next_comm,$delta)' > /sys/kernel/tracing/events/sched/sched_switch/trigger Because the synthetic event's "wakee" field is created as a dynamic string (even though the string copied is not). The print format to print the dynamic string changed from "%*s" to "%s" because another location (__set_synth_event_print_fmt()) exported this to user space, and user space did not need that. But it is still used in print_synth_event(), and the output looks like: -0 [001] d..5. 193.428167: wake_lat: wakee=(efault)sshd-sessiondelta=155 sshd-session-879 [001] d..5. 193.811080: wake_lat: wakee=(efault)kworker/u34:5delta=58 -0 [002] d..5. 193.811198: wake_lat: wakee=(efault)bashdelta=91 bash-880 [002] d..5. 193.811371: wake_lat: wakee=(efault)kworker/u35:2delta=21 -0 [001] d..5. 193.811516: wake_lat: wakee=(efault)sshd-sessiondelta=129 sshd-session-879 [001] d..5. 193.967576: wake_lat: wakee=(efault)kworker/u34:5delta=50 The length isn't needed as the string is always nul terminated. Just print the string and not add the length (which was hard coded to the max string length anyway). Cc: stable@vger.kernel.org Cc: Mathieu Desnoyers Cc: Tom Zanussi Cc: Douglas Raillard Acked-by: Masami Hiramatsu (Google) Link: https://lore.kernel.org/20250407154139.69955768@gandalf.local.home Fixes: 4d38328eb442d ("tracing: Fix synth event printk format for str fields"); Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_synth.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 969f48742d72c6..33cfbd4ed76dda 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -370,7 +370,6 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter, union trace_synth_field *data = &entry->fields[n_u64]; trace_seq_printf(s, print_fmt, se->fields[i]->name, - STR_VAR_LEN_MAX, (char *)entry + data->as_dynamic.offset, i == se->n_fields - 1 ? "" : " "); n_u64++; From 9a0e6f15029e1a8a21e40f06fd05aa52b7f063de Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 19 Mar 2025 14:42:21 +0200 Subject: [PATCH 0356/2924] RDMA/core: Silence oversized kvmalloc() warning syzkaller triggered an oversized kvmalloc() warning. Silence it by adding __GFP_NOWARN. syzkaller log: WARNING: CPU: 7 PID: 518 at mm/util.c:665 __kvmalloc_node_noprof+0x175/0x180 CPU: 7 UID: 0 PID: 518 Comm: c_repro Not tainted 6.11.0-rc6+ #6 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:__kvmalloc_node_noprof+0x175/0x180 RSP: 0018:ffffc90001e67c10 EFLAGS: 00010246 RAX: 0000000000000100 RBX: 0000000000000400 RCX: ffffffff8149d46b RDX: 0000000000000000 RSI: ffff8881030fae80 RDI: 0000000000000002 RBP: 000000712c800000 R08: 0000000000000100 R09: 0000000000000000 R10: ffffc90001e67c10 R11: 0030ae0601000000 R12: 0000000000000000 R13: 0000000000000000 R14: 00000000ffffffff R15: 0000000000000000 FS: 00007fde79159740(0000) GS:ffff88813bdc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000180 CR3: 0000000105eb4005 CR4: 00000000003706b0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ib_umem_odp_get+0x1f6/0x390 mlx5_ib_reg_user_mr+0x1e8/0x450 ib_uverbs_reg_mr+0x28b/0x440 ib_uverbs_write+0x7d3/0xa30 vfs_write+0x1ac/0x6c0 ksys_write+0x134/0x170 ? __sanitizer_cov_trace_pc+0x1c/0x50 do_syscall_64+0x50/0x110 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fixes: 37824952dc8f ("RDMA/odp: Use kvcalloc for the dma_list and page_list") Signed-off-by: Shay Drory Link: https://patch.msgid.link/c6cb92379de668be94894f49c2cfa40e73f94d56.1742388096.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/umem_odp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index e9fa22d31c2332..c48ef608302055 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -76,12 +76,14 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp, npfns = (end - start) >> PAGE_SHIFT; umem_odp->pfn_list = kvcalloc( - npfns, sizeof(*umem_odp->pfn_list), GFP_KERNEL); + npfns, sizeof(*umem_odp->pfn_list), + GFP_KERNEL | __GFP_NOWARN); if (!umem_odp->pfn_list) return -ENOMEM; umem_odp->dma_list = kvcalloc( - ndmas, sizeof(*umem_odp->dma_list), GFP_KERNEL); + ndmas, sizeof(*umem_odp->dma_list), + GFP_KERNEL | __GFP_NOWARN); if (!umem_odp->dma_list) { ret = -ENOMEM; goto out_pfn_list; From 16cb6b0509b65ac89187e9402e0b7a9ddf1765ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sun, 6 Oct 2024 19:20:13 +0200 Subject: [PATCH 0357/2924] cifs: Fix encoding of SMB1 Session Setup Kerberos Request in non-UNICODE mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Like in UNICODE mode, SMB1 Session Setup Kerberos Request contains oslm and domain strings. Extract common code into ascii_oslm_strings() and ascii_domain_string() functions (similar to unicode variants) and use these functions in non-UNICODE code path in sess_auth_kerberos(). Decision if non-UNICODE or UNICODE mode is used is based on the SMBFLG2_UNICODE flag in Flags2 packed field, and not based on the capabilities of server. Fix this check too. Signed-off-by: Pali Rohár Signed-off-by: Steve French --- fs/smb/client/sess.c | 60 +++++++++++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 20 deletions(-) diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index f2ca5963cd9d24..b3fa9ee2691272 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -680,6 +680,22 @@ unicode_oslm_strings(char **pbcc_area, const struct nls_table *nls_cp) *pbcc_area = bcc_ptr; } +static void +ascii_oslm_strings(char **pbcc_area, const struct nls_table *nls_cp) +{ + char *bcc_ptr = *pbcc_area; + + strcpy(bcc_ptr, "Linux version "); + bcc_ptr += strlen("Linux version "); + strcpy(bcc_ptr, init_utsname()->release); + bcc_ptr += strlen(init_utsname()->release) + 1; + + strcpy(bcc_ptr, CIFS_NETWORK_OPSYS); + bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1; + + *pbcc_area = bcc_ptr; +} + static void unicode_domain_string(char **pbcc_area, struct cifs_ses *ses, const struct nls_table *nls_cp) { @@ -704,6 +720,25 @@ static void unicode_domain_string(char **pbcc_area, struct cifs_ses *ses, *pbcc_area = bcc_ptr; } +static void ascii_domain_string(char **pbcc_area, struct cifs_ses *ses, + const struct nls_table *nls_cp) +{ + char *bcc_ptr = *pbcc_area; + int len; + + /* copy domain */ + if (ses->domainName != NULL) { + len = strscpy(bcc_ptr, ses->domainName, CIFS_MAX_DOMAINNAME_LEN); + if (WARN_ON_ONCE(len < 0)) + len = CIFS_MAX_DOMAINNAME_LEN - 1; + bcc_ptr += len; + } /* else we send a null domain name so server will default to its own domain */ + *bcc_ptr = 0; + bcc_ptr++; + + *pbcc_area = bcc_ptr; +} + static void unicode_ssetup_strings(char **pbcc_area, struct cifs_ses *ses, const struct nls_table *nls_cp) { @@ -749,25 +784,10 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses, *bcc_ptr = 0; bcc_ptr++; /* account for null termination */ - /* copy domain */ - if (ses->domainName != NULL) { - len = strscpy(bcc_ptr, ses->domainName, CIFS_MAX_DOMAINNAME_LEN); - if (WARN_ON_ONCE(len < 0)) - len = CIFS_MAX_DOMAINNAME_LEN - 1; - bcc_ptr += len; - } /* else we send a null domain name so server will default to its own domain */ - *bcc_ptr = 0; - bcc_ptr++; - /* BB check for overflow here */ - strcpy(bcc_ptr, "Linux version "); - bcc_ptr += strlen("Linux version "); - strcpy(bcc_ptr, init_utsname()->release); - bcc_ptr += strlen(init_utsname()->release) + 1; - - strcpy(bcc_ptr, CIFS_NETWORK_OPSYS); - bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1; + ascii_domain_string(&bcc_ptr, ses, nls_cp); + ascii_oslm_strings(&bcc_ptr, nls_cp); *pbcc_area = bcc_ptr; } @@ -1570,7 +1590,7 @@ sess_auth_kerberos(struct sess_data *sess_data) sess_data->iov[1].iov_len = msg->secblob_len; pSMB->req.SecurityBlobLength = cpu_to_le16(sess_data->iov[1].iov_len); - if (ses->capabilities & CAP_UNICODE) { + if (pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) { /* unicode strings must be word aligned */ if (!IS_ALIGNED(sess_data->iov[0].iov_len + sess_data->iov[1].iov_len, 2)) { *bcc_ptr = 0; @@ -1579,8 +1599,8 @@ sess_auth_kerberos(struct sess_data *sess_data) unicode_oslm_strings(&bcc_ptr, sess_data->nls_cp); unicode_domain_string(&bcc_ptr, ses, sess_data->nls_cp); } else { - /* BB: is this right? */ - ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp); + ascii_oslm_strings(&bcc_ptr, sess_data->nls_cp); + ascii_domain_string(&bcc_ptr, ses, sess_data->nls_cp); } sess_data->iov[2].iov_len = (long) bcc_ptr - From 2424e146bee00ddb4d4f79d3224f54634ca8d2bc Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Tue, 8 Apr 2025 12:38:54 +0200 Subject: [PATCH 0358/2924] hrtimer: Add missing ACCESS_PRIVATE() for hrtimer::function The "function" field of struct hrtimer has been changed to private, but two instances have not been converted to use ACCESS_PRIVATE(). Convert them to use ACCESS_PRIVATE(). Fixes: 04257da0c99c ("hrtimers: Make callback function pointer private") Reported-by: kernel test robot Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250408103854.1851093-1-namcao@linutronix.de Closes: https://lore.kernel.org/oe-kbuild-all/202504071931.vOVl13tt-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202504072155.5UAZjYGU-lkp@intel.com/ --- include/linux/hrtimer.h | 2 +- kernel/time/hrtimer.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 1adcba3ddd7660..1ef867bb8c44b0 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -345,7 +345,7 @@ static inline void hrtimer_update_function(struct hrtimer *timer, if (WARN_ON_ONCE(!function)) return; #endif - timer->function = function; + ACCESS_PRIVATE(timer, function) = function; } /* Forward a hrtimer so it expires after now: */ diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 517ee2590a29e5..30899a8cc52c0a 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -366,7 +366,7 @@ static const struct debug_obj_descr hrtimer_debug_descr; static void *hrtimer_debug_hint(void *addr) { - return ((struct hrtimer *) addr)->function; + return ACCESS_PRIVATE((struct hrtimer *)addr, function); } /* From 1fac13956e9877483ece9d090a62239cdfe9deb7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 9 Apr 2025 21:16:01 +0200 Subject: [PATCH 0359/2924] x86/ibt: Fix hibernate Todd reported, and Len confirmed, that commit 582077c94052 ("x86/cfi: Clean up linkage") broke S4 hiberate on a fair number of machines. Turns out these machines trip #CP when trying to restore the image. As it happens, the commit in question removes two ENDBR instructions in the hibernate code, and clearly got it wrong. Notably restore_image() does an indirect jump to relocated_restore_code(), which is a relocated copy of core_restore_code(). In turn, core_restore_code(), will at the end do an indirect jump to restore_jump_address (r8), which is pointing at a relocated restore_registers(). So both sites do indeed need to be ENDBR. Fixes: 582077c94052 ("x86/cfi: Clean up linkage") Reported-by: Todd Brandt Signed-off-by: Peter Zijlstra (Intel) Tested-by: Todd Brandt Tested-by: Len Brown Link: https://bugzilla.kernel.org/show_bug.cgi?id=219998 Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219998 --- arch/x86/power/hibernate_asm_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S index 8c534c36adfa28..66f066b8fedad2 100644 --- a/arch/x86/power/hibernate_asm_64.S +++ b/arch/x86/power/hibernate_asm_64.S @@ -26,7 +26,7 @@ /* code below belongs to the image kernel */ .align PAGE_SIZE SYM_FUNC_START(restore_registers) - ANNOTATE_NOENDBR + ENDBR /* go back to the original page tables */ movq %r9, %cr3 @@ -120,7 +120,7 @@ SYM_FUNC_END(restore_image) /* code below has been relocated to a safe page */ SYM_FUNC_START(core_restore_code) - ANNOTATE_NOENDBR + ENDBR /* switch to temporary page tables */ movq %rax, %cr3 /* flush TLB */ From 87d2de042c602e12230283cd40fa604b881e12f7 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Sun, 23 Mar 2025 17:31:08 +0800 Subject: [PATCH 0360/2924] cxl/core: Fix caching dport GPF DVSEC issue Per Table 8-2 in CXL r3.2 section 8.1.1 and CXL r3.2 section 8.1.6, only CXL Downstream switch ports and CXL root ports have GPF DVSEC for CXL Port(DVSEC ID 04h). CXL subsystem has a gpf_dvsec in struct cxl_port which is used to cache the offset of a GPF DVSEC in PCIe configuration space. It will be updated during the first EP attaching to the cxl_port, so the gpf_dvsec can only cache the GPF DVSEC offset of the dport which the first EP is under. Will not have chance to update it during other EPs attaching. That means CXL subsystem will use the same GPF DVSEC offset for all dports under the port, it will be a problem if the GPF DVSEC offset cached in cxl_port is not the right offset for a dport. Moving gpf_dvsec from struct cxl_port to struct cxl_dport, make every cxl dport has their own GPF DVSEC offset caching, and each cxl dport uses its own GPF DVSEC offset for GPF DVSEC accessing. Fixes: a52b6a2c1c99 ("cxl/pci: Support Global Persistent Flush (GPF)") Signed-off-by: Li Ming Reviewed-by: Davidlohr Bueso Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Tested-by: Davidlohr Bueso Link: https://patch.msgid.link/20250323093110.233040-2-ming.li@zohomail.com Signed-off-by: Dave Jiang --- drivers/cxl/core/core.h | 2 +- drivers/cxl/core/pci.c | 16 ++++++++-------- drivers/cxl/core/port.c | 2 +- drivers/cxl/cxl.h | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 15699299dc11d4..17b692eb325713 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -119,7 +119,7 @@ int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, int cxl_ras_init(void); void cxl_ras_exit(void); -int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port); +int cxl_gpf_port_setup(struct cxl_dport *dport); int cxl_acpi_get_extended_linear_cache_size(struct resource *backing_res, int nid, resource_size_t *size); diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 96fecb799cbcf7..aab0a505d527e2 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -1128,26 +1128,26 @@ static int update_gpf_port_dvsec(struct pci_dev *pdev, int dvsec, int phase) return rc; } -int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port) +int cxl_gpf_port_setup(struct cxl_dport *dport) { struct pci_dev *pdev; - if (!port) + if (!dport) return -EINVAL; - if (!port->gpf_dvsec) { + if (!dport->gpf_dvsec) { int dvsec; - dvsec = cxl_gpf_get_dvsec(dport_dev, true); + dvsec = cxl_gpf_get_dvsec(dport->dport_dev, true); if (!dvsec) return -EINVAL; - port->gpf_dvsec = dvsec; + dport->gpf_dvsec = dvsec; } - pdev = to_pci_dev(dport_dev); - update_gpf_port_dvsec(pdev, port->gpf_dvsec, 1); - update_gpf_port_dvsec(pdev, port->gpf_dvsec, 2); + pdev = to_pci_dev(dport->dport_dev); + update_gpf_port_dvsec(pdev, dport->gpf_dvsec, 1); + update_gpf_port_dvsec(pdev, dport->gpf_dvsec, 2); return 0; } diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 0fd6646c1a2e8e..726bd4a7de2703 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1678,7 +1678,7 @@ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd) if (rc && rc != -EBUSY) return rc; - cxl_gpf_port_setup(dport_dev, port); + cxl_gpf_port_setup(dport); /* Any more ports to add between this one and the root? */ if (!dev_is_cxl_root_child(&port->dev)) diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index be8a7dc77719bd..2d81ccd83916d6 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -592,7 +592,6 @@ struct cxl_dax_region { * @cdat: Cached CDAT data * @cdat_available: Should a CDAT attribute be available in sysfs * @pci_latency: Upstream latency in picoseconds - * @gpf_dvsec: Cached GPF port DVSEC */ struct cxl_port { struct device dev; @@ -616,7 +615,6 @@ struct cxl_port { } cdat; bool cdat_available; long pci_latency; - int gpf_dvsec; }; /** @@ -664,6 +662,7 @@ struct cxl_rcrb_info { * @regs: Dport parsed register blocks * @coord: access coordinates (bandwidth and latency performance attributes) * @link_latency: calculated PCIe downstream latency + * @gpf_dvsec: Cached GPF port DVSEC */ struct cxl_dport { struct device *dport_dev; @@ -675,6 +674,7 @@ struct cxl_dport { struct cxl_regs regs; struct access_coordinate coord[ACCESS_COORDINATE_MAX]; long link_latency; + int gpf_dvsec; }; /** From 6af941db6a60a27209bdb2da1a3a780574d617fe Mon Sep 17 00:00:00 2001 From: Li Ming Date: Sun, 23 Mar 2025 17:31:09 +0800 Subject: [PATCH 0361/2924] cxl/pci: Update Port GPF timeout only when the first EP attaching update_gpf_port_dvsec() is used to update GPF Phase timeout, if a CXL switch is under a CXL root port, update_gpf_port_dvsec() will be invoked on the CXL root port when each cxl memory device under the CXL switch is attaching. It is enough to be invoked once, others are redundant. When the first EP attaching, it always triggers its ancestor dports to locate their own Port GPF DVSEC. The change is that invoking update_gpf_port_dvsec() on these ancestor dports after ancestor dport locating a Port GPF DVSEC. It guarantees that update_gpf_port_dvsec() is invoked on a dport only happens during the first EP attaching. Signed-off-by: Li Ming Reviewed-by: Davidlohr Bueso Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Tested-by: Davidlohr Bueso Link: https://patch.msgid.link/20250323093110.233040-3-ming.li@zohomail.com Signed-off-by: Dave Jiang --- drivers/cxl/core/pci.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index aab0a505d527e2..edbdaf1681e8c8 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -1130,12 +1130,11 @@ static int update_gpf_port_dvsec(struct pci_dev *pdev, int dvsec, int phase) int cxl_gpf_port_setup(struct cxl_dport *dport) { - struct pci_dev *pdev; - if (!dport) return -EINVAL; if (!dport->gpf_dvsec) { + struct pci_dev *pdev; int dvsec; dvsec = cxl_gpf_get_dvsec(dport->dport_dev, true); @@ -1143,11 +1142,10 @@ int cxl_gpf_port_setup(struct cxl_dport *dport) return -EINVAL; dport->gpf_dvsec = dvsec; + pdev = to_pci_dev(dport->dport_dev); + update_gpf_port_dvsec(pdev, dport->gpf_dvsec, 1); + update_gpf_port_dvsec(pdev, dport->gpf_dvsec, 2); } - pdev = to_pci_dev(dport->dport_dev); - update_gpf_port_dvsec(pdev, dport->gpf_dvsec, 1); - update_gpf_port_dvsec(pdev, dport->gpf_dvsec, 2); - return 0; } From 36aace15d9bdcfe6f03e078915067e89719478f5 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Sun, 23 Mar 2025 17:31:10 +0800 Subject: [PATCH 0362/2924] cxl/pci: Drop the parameter is_port of cxl_gpf_get_dvsec() The first parameter of cxl_gpf_get_dvsec() is a struct device, can be used to distinguish if the device is a cxl dport or a cxl pci device by checking the PCIe type of it, so the parameter is_port is unnecessary to cxl_gpf_get_dvsec(), using parameter struct device is enough. Signed-off-by: Li Ming Reviewed-by: Davidlohr Bueso Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Tested-by: Davidlohr Bueso Link: https://patch.msgid.link/20250323093110.233040-4-ming.li@zohomail.com Signed-off-by: Dave Jiang --- drivers/cxl/core/pci.c | 12 +++++++++--- drivers/cxl/cxl.h | 2 +- drivers/cxl/pmem.c | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index edbdaf1681e8c8..3b80e9a76ba862 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -1072,14 +1072,20 @@ int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c) #define GPF_TIMEOUT_BASE_MAX 2 #define GPF_TIMEOUT_SCALE_MAX 7 /* 10 seconds */ -u16 cxl_gpf_get_dvsec(struct device *dev, bool is_port) +u16 cxl_gpf_get_dvsec(struct device *dev) { + struct pci_dev *pdev; + bool is_port = true; u16 dvsec; if (!dev_is_pci(dev)) return 0; - dvsec = pci_find_dvsec_capability(to_pci_dev(dev), PCI_VENDOR_ID_CXL, + pdev = to_pci_dev(dev); + if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT) + is_port = false; + + dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, is_port ? CXL_DVSEC_PORT_GPF : CXL_DVSEC_DEVICE_GPF); if (!dvsec) dev_warn(dev, "%s GPF DVSEC not present\n", @@ -1137,7 +1143,7 @@ int cxl_gpf_port_setup(struct cxl_dport *dport) struct pci_dev *pdev; int dvsec; - dvsec = cxl_gpf_get_dvsec(dport->dport_dev, true); + dvsec = cxl_gpf_get_dvsec(dport->dport_dev); if (!dvsec) return -EINVAL; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 2d81ccd83916d6..a9ab46eb061001 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -910,6 +910,6 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port); #define __mock static #endif -u16 cxl_gpf_get_dvsec(struct device *dev, bool is_port); +u16 cxl_gpf_get_dvsec(struct device *dev); #endif /* __CXL_H__ */ diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index d061fe3d2b8662..e197883690efc1 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -108,7 +108,7 @@ static void cxl_nvdimm_arm_dirty_shutdown_tracking(struct cxl_nvdimm *cxl_nvd) return; } - if (!cxl_gpf_get_dvsec(cxlds->dev, false)) + if (!cxl_gpf_get_dvsec(cxlds->dev)) return; if (cxl_get_dirty_count(mds, &count)) { From 92e250c624ea37fde64bfd624fd2556f0d846f18 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 4 Apr 2025 15:34:29 +0200 Subject: [PATCH 0363/2924] timekeeping: Add a lockdep override in tick_freeze() tick_freeze() acquires a raw spinlock (tick_freeze_lock). Later in the callchain (timekeeping_suspend() -> mc146818_avoid_UIP()) the RTC driver acquires a spinlock which becomes a sleeping lock on PREEMPT_RT. Lockdep complains about this lock nesting. Add a lockdep override for this special case and a comment explaining why it is okay. Reported-by: Borislav Petkov Reported-by: Chris Bainbridge Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20250404133429.pnAzf-eF@linutronix.de Closes: https://lore.kernel.org/all/20250330113202.GAZ-krsjAnurOlTcp-@fat_crate.local/ Closes: https://lore.kernel.org/all/CAP-bSRZ0CWyZZsMtx046YV8L28LhY0fson2g4EqcwRAVN1Jk+Q@mail.gmail.com/ --- kernel/time/tick-common.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index a47bcf71defcf5..9a3859443c042c 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -509,6 +509,7 @@ void tick_resume(void) #ifdef CONFIG_SUSPEND static DEFINE_RAW_SPINLOCK(tick_freeze_lock); +static DEFINE_WAIT_OVERRIDE_MAP(tick_freeze_map, LD_WAIT_SLEEP); static unsigned int tick_freeze_depth; /** @@ -528,9 +529,22 @@ void tick_freeze(void) if (tick_freeze_depth == num_online_cpus()) { trace_suspend_resume(TPS("timekeeping_freeze"), smp_processor_id(), true); + /* + * All other CPUs have their interrupts disabled and are + * suspended to idle. Other tasks have been frozen so there + * is no scheduling happening. This means that there is no + * concurrency in the system at this point. Therefore it is + * okay to acquire a sleeping lock on PREEMPT_RT, such as a + * spinlock, because the lock cannot be held by other CPUs + * or threads and acquiring it cannot block. + * + * Inform lockdep about the situation. + */ + lock_map_acquire_try(&tick_freeze_map); system_state = SYSTEM_SUSPEND; sched_clock_suspend(); timekeeping_suspend(); + lock_map_release(&tick_freeze_map); } else { tick_suspend_local(); } @@ -552,8 +566,16 @@ void tick_unfreeze(void) raw_spin_lock(&tick_freeze_lock); if (tick_freeze_depth == num_online_cpus()) { + /* + * Similar to tick_freeze(). On resumption the first CPU may + * acquire uncontended sleeping locks while other CPUs block on + * tick_freeze_lock. + */ + lock_map_acquire_try(&tick_freeze_map); timekeeping_resume(); sched_clock_resume(); + lock_map_release(&tick_freeze_map); + system_state = SYSTEM_RUNNING; trace_suspend_resume(TPS("timekeeping_freeze"), smp_processor_id(), false); From 9ce7351291a6c64fa8dc36c786632ae5ded19bd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Thu, 3 Oct 2024 20:51:00 +0200 Subject: [PATCH 0364/2924] cifs: Remove explicit handling of IO_REPARSE_TAG_MOUNT_POINT in inode.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit IO_REPARSE_TAG_MOUNT_POINT is just a specific case of directory Name Surrogate reparse point. As reparse_info_to_fattr() already handles all directory Name Surrogate reparse point (done by the previous change), there is no need to have explicit case for IO_REPARSE_TAG_MOUNT_POINT. Signed-off-by: Pali Rohár Signed-off-by: Steve French --- fs/smb/client/inode.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 9b56198f723031..33d699c5cf37f7 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -1203,10 +1203,6 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data, goto out; } break; - case IO_REPARSE_TAG_MOUNT_POINT: - cifs_create_junction_fattr(fattr, sb); - rc = 0; - goto out; default: /* Check for cached reparse point data */ if (data->symlink_target || data->reparse.buf) { From 12193b9801e7559745a7a3202d3d3c9265905461 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Thu, 3 Oct 2024 21:06:34 +0200 Subject: [PATCH 0365/2924] cifs: Improve handling of name surrogate reparse points in reparse.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Like previous changes for file inode.c, handle directory name surrogate reparse points generally also in reparse.c. Signed-off-by: Pali Rohár Signed-off-by: Steve French --- fs/smb/client/reparse.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index 1416b9ffaca124..d79d88e324f466 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -1236,16 +1236,6 @@ bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb, bool ok; switch (tag) { - case IO_REPARSE_TAG_INTERNAL: - if (!(fattr->cf_cifsattrs & ATTR_DIRECTORY)) - return false; - fallthrough; - case IO_REPARSE_TAG_DFS: - case IO_REPARSE_TAG_DFSR: - case IO_REPARSE_TAG_MOUNT_POINT: - /* See cifs_create_junction_fattr() */ - fattr->cf_mode = S_IFDIR | 0711; - break; case IO_REPARSE_TAG_LX_SYMLINK: case IO_REPARSE_TAG_LX_FIFO: case IO_REPARSE_TAG_AF_UNIX: @@ -1265,7 +1255,14 @@ bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb, fattr->cf_mode |= S_IFLNK; break; default: - return false; + if (!(fattr->cf_cifsattrs & ATTR_DIRECTORY)) + return false; + if (!IS_REPARSE_TAG_NAME_SURROGATE(tag) && + tag != IO_REPARSE_TAG_INTERNAL) + return false; + /* See cifs_create_junction_fattr() */ + fattr->cf_mode = S_IFDIR | 0711; + break; } fattr->cf_dtype = S_DT(fattr->cf_mode); From 56c0bea52cef0bb02c4d6c23669ef485b5f67c87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 14 Oct 2024 15:00:05 +0200 Subject: [PATCH 0366/2924] cifs: Split parse_reparse_point callback to functions: get buffer and parse buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parsing reparse point buffer is generic for all SMB versions and is already implemented by global function parse_reparse_point(). Getting reparse point buffer from the SMB response is SMB version specific, so introduce for it a new callback get_reparse_point_buffer. This functionality split is needed for followup change - getting reparse point buffer without parsing it. Signed-off-by: Pali Rohár Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 6 ++---- fs/smb/client/inode.c | 11 +++++++---- fs/smb/client/reparse.c | 15 +++++---------- fs/smb/client/reparse.h | 5 +---- fs/smb/client/smb1ops.c | 17 ++++++----------- fs/smb/client/smb2ops.c | 8 ++++---- 6 files changed, 25 insertions(+), 37 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 07c4688ec4c99e..3b32116b0b4964 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -625,10 +625,8 @@ struct smb_version_operations { bool (*is_status_io_timeout)(char *buf); /* Check for STATUS_NETWORK_NAME_DELETED */ bool (*is_network_name_deleted)(char *buf, struct TCP_Server_Info *srv); - int (*parse_reparse_point)(struct cifs_sb_info *cifs_sb, - const char *full_path, - struct kvec *rsp_iov, - struct cifs_open_info_data *data); + struct reparse_data_buffer * (*get_reparse_point_buffer)(const struct kvec *rsp_iov, + u32 *plen); int (*create_reparse_symlink)(const unsigned int xid, struct inode *inode, struct dentry *dentry, diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 33d699c5cf37f7..75be4b46bc6f18 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -1207,10 +1207,13 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data, /* Check for cached reparse point data */ if (data->symlink_target || data->reparse.buf) { rc = 0; - } else if (iov && server->ops->parse_reparse_point) { - rc = server->ops->parse_reparse_point(cifs_sb, - full_path, - iov, data); + } else if (iov && server->ops->get_reparse_point_buffer) { + struct reparse_data_buffer *reparse_buf; + u32 reparse_len; + + reparse_buf = server->ops->get_reparse_point_buffer(iov, &reparse_len); + rc = parse_reparse_point(reparse_buf, reparse_len, + cifs_sb, full_path, data); /* * If the reparse point was not handled but it is the * name surrogate which points to directory, then treat diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index d79d88e324f466..bb25e77c5540c2 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -1099,18 +1099,13 @@ int parse_reparse_point(struct reparse_data_buffer *buf, } } -int smb2_parse_reparse_point(struct cifs_sb_info *cifs_sb, - const char *full_path, - struct kvec *rsp_iov, - struct cifs_open_info_data *data) +struct reparse_data_buffer *smb2_get_reparse_point_buffer(const struct kvec *rsp_iov, + u32 *plen) { - struct reparse_data_buffer *buf; struct smb2_ioctl_rsp *io = rsp_iov->iov_base; - u32 plen = le32_to_cpu(io->OutputCount); - - buf = (struct reparse_data_buffer *)((u8 *)io + - le32_to_cpu(io->OutputOffset)); - return parse_reparse_point(buf, plen, cifs_sb, full_path, data); + *plen = le32_to_cpu(io->OutputCount); + return (struct reparse_data_buffer *)((u8 *)io + + le32_to_cpu(io->OutputOffset)); } static bool wsl_to_fattr(struct cifs_open_info_data *data, diff --git a/fs/smb/client/reparse.h b/fs/smb/client/reparse.h index c0be5ab45a78a0..08de853b36a8a9 100644 --- a/fs/smb/client/reparse.h +++ b/fs/smb/client/reparse.h @@ -135,9 +135,6 @@ int smb2_create_reparse_symlink(const unsigned int xid, struct inode *inode, int smb2_mknod_reparse(unsigned int xid, struct inode *inode, struct dentry *dentry, struct cifs_tcon *tcon, const char *full_path, umode_t mode, dev_t dev); -int smb2_parse_reparse_point(struct cifs_sb_info *cifs_sb, - const char *full_path, - struct kvec *rsp_iov, - struct cifs_open_info_data *data); +struct reparse_data_buffer *smb2_get_reparse_point_buffer(const struct kvec *rsp_iov, u32 *len); #endif /* _CIFS_REPARSE_H */ diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index 26df807fbe7a9a..b1d665f6ae759f 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -970,18 +970,13 @@ static int cifs_query_symlink(const unsigned int xid, return rc; } -static int cifs_parse_reparse_point(struct cifs_sb_info *cifs_sb, - const char *full_path, - struct kvec *rsp_iov, - struct cifs_open_info_data *data) +static struct reparse_data_buffer *cifs_get_reparse_point_buffer(const struct kvec *rsp_iov, + u32 *plen) { - struct reparse_data_buffer *buf; TRANSACT_IOCTL_RSP *io = rsp_iov->iov_base; - u32 plen = le16_to_cpu(io->ByteCount); - - buf = (struct reparse_data_buffer *)((__u8 *)&io->hdr.Protocol + - le32_to_cpu(io->DataOffset)); - return parse_reparse_point(buf, plen, cifs_sb, full_path, data); + *plen = le16_to_cpu(io->ByteCount); + return (struct reparse_data_buffer *)((__u8 *)&io->hdr.Protocol + + le32_to_cpu(io->DataOffset)); } static bool @@ -1157,7 +1152,7 @@ struct smb_version_operations smb1_operations = { .rename = CIFSSMBRename, .create_hardlink = CIFSCreateHardLink, .query_symlink = cifs_query_symlink, - .parse_reparse_point = cifs_parse_reparse_point, + .get_reparse_point_buffer = cifs_get_reparse_point_buffer, .open = cifs_open_file, .set_fid = cifs_set_fid, .close = cifs_close_file, diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 3f7fe74688a922..2fe8eeb9853563 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -5303,7 +5303,7 @@ struct smb_version_operations smb20_operations = { .unlink = smb2_unlink, .rename = smb2_rename_path, .create_hardlink = smb2_create_hardlink, - .parse_reparse_point = smb2_parse_reparse_point, + .get_reparse_point_buffer = smb2_get_reparse_point_buffer, .query_mf_symlink = smb3_query_mf_symlink, .create_mf_symlink = smb3_create_mf_symlink, .create_reparse_symlink = smb2_create_reparse_symlink, @@ -5406,7 +5406,7 @@ struct smb_version_operations smb21_operations = { .unlink = smb2_unlink, .rename = smb2_rename_path, .create_hardlink = smb2_create_hardlink, - .parse_reparse_point = smb2_parse_reparse_point, + .get_reparse_point_buffer = smb2_get_reparse_point_buffer, .query_mf_symlink = smb3_query_mf_symlink, .create_mf_symlink = smb3_create_mf_symlink, .create_reparse_symlink = smb2_create_reparse_symlink, @@ -5513,7 +5513,7 @@ struct smb_version_operations smb30_operations = { .unlink = smb2_unlink, .rename = smb2_rename_path, .create_hardlink = smb2_create_hardlink, - .parse_reparse_point = smb2_parse_reparse_point, + .get_reparse_point_buffer = smb2_get_reparse_point_buffer, .query_mf_symlink = smb3_query_mf_symlink, .create_mf_symlink = smb3_create_mf_symlink, .create_reparse_symlink = smb2_create_reparse_symlink, @@ -5629,7 +5629,7 @@ struct smb_version_operations smb311_operations = { .unlink = smb2_unlink, .rename = smb2_rename_path, .create_hardlink = smb2_create_hardlink, - .parse_reparse_point = smb2_parse_reparse_point, + .get_reparse_point_buffer = smb2_get_reparse_point_buffer, .query_mf_symlink = smb3_query_mf_symlink, .create_mf_symlink = smb3_create_mf_symlink, .create_reparse_symlink = smb2_create_reparse_symlink, From ef86ab131d9127dfbfa8f06e12441d05fdfb090b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Thu, 26 Dec 2024 17:12:09 +0100 Subject: [PATCH 0367/2924] cifs: Fix querying of WSL CHR and BLK reparse points over SMB1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When reparse point in SMB1 query_path_info() callback was detected then query also for EA $LXDEV. In this EA are stored device major and minor numbers used by WSL CHR and BLK reparse points. Without major and minor numbers, stat() syscall does not work for char and block devices. Similar code is already in SMB2+ query_path_info() callback function. Signed-off-by: Pali Rohár Signed-off-by: Steve French --- fs/smb/client/smb1ops.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index b1d665f6ae759f..0adeec652dc191 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -568,6 +568,42 @@ static int cifs_query_path_info(const unsigned int xid, data->reparse_point = le32_to_cpu(fi.Attributes) & ATTR_REPARSE; } +#ifdef CONFIG_CIFS_XATTR + /* + * For WSL CHR and BLK reparse points it is required to fetch + * EA $LXDEV which contains major and minor device numbers. + */ + if (!rc && data->reparse_point) { + struct smb2_file_full_ea_info *ea; + + ea = (struct smb2_file_full_ea_info *)data->wsl.eas; + rc = CIFSSMBQAllEAs(xid, tcon, full_path, SMB2_WSL_XATTR_DEV, + &ea->ea_data[SMB2_WSL_XATTR_NAME_LEN + 1], + SMB2_WSL_XATTR_DEV_SIZE, cifs_sb); + if (rc == SMB2_WSL_XATTR_DEV_SIZE) { + ea->next_entry_offset = cpu_to_le32(0); + ea->flags = 0; + ea->ea_name_length = SMB2_WSL_XATTR_NAME_LEN; + ea->ea_value_length = cpu_to_le16(SMB2_WSL_XATTR_DEV_SIZE); + memcpy(&ea->ea_data[0], SMB2_WSL_XATTR_DEV, SMB2_WSL_XATTR_NAME_LEN + 1); + data->wsl.eas_len = sizeof(*ea) + SMB2_WSL_XATTR_NAME_LEN + 1 + + SMB2_WSL_XATTR_DEV_SIZE; + rc = 0; + } else if (rc >= 0) { + /* It is an error if EA $LXDEV has wrong size. */ + rc = -EINVAL; + } else { + /* + * In all other cases ignore error if fetching + * of EA $LXDEV failed. It is needed only for + * WSL CHR and BLK reparse points and wsl_to_fattr() + * handle the case when EA is missing. + */ + rc = 0; + } + } +#endif + return rc; } From f40a673d6b4a128fe95dd9b8c3ed02da50a6a862 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 7 Apr 2025 12:38:59 +0300 Subject: [PATCH 0368/2924] net: phy: move phy_link_change() prior to mdio_bus_phy_may_suspend() In an upcoming change, mdio_bus_phy_may_suspend() will need to distinguish a phylib-based PHY client from a phylink PHY client. For that, it will need to compare the phydev->phy_link_change() function pointer with the eponymous phy_link_change() provided by phylib. To avoid forward function declarations, the default PHY link state change method should be moved upwards. There is no functional change associated with this patch, it is only to reduce the noise from a real bug fix. Signed-off-by: Vladimir Oltean Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20250407093900.2155112-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy_device.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 675fbd22537879..1367296a33896f 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -244,6 +244,19 @@ static bool phy_drv_wol_enabled(struct phy_device *phydev) return wol.wolopts != 0; } +static void phy_link_change(struct phy_device *phydev, bool up) +{ + struct net_device *netdev = phydev->attached_dev; + + if (up) + netif_carrier_on(netdev); + else + netif_carrier_off(netdev); + phydev->adjust_link(netdev); + if (phydev->mii_ts && phydev->mii_ts->link_state) + phydev->mii_ts->link_state(phydev->mii_ts, phydev); +} + static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) { struct device_driver *drv = phydev->mdio.dev.driver; @@ -1055,19 +1068,6 @@ struct phy_device *phy_find_first(struct mii_bus *bus) } EXPORT_SYMBOL(phy_find_first); -static void phy_link_change(struct phy_device *phydev, bool up) -{ - struct net_device *netdev = phydev->attached_dev; - - if (up) - netif_carrier_on(netdev); - else - netif_carrier_off(netdev); - phydev->adjust_link(netdev); - if (phydev->mii_ts && phydev->mii_ts->link_state) - phydev->mii_ts->link_state(phydev->mii_ts, phydev); -} - /** * phy_prepare_link - prepares the PHY layer to monitor link status * @phydev: target phy_device struct From fc75ea20ffb452652f0d4033f38fe88d7cfdae35 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 7 Apr 2025 12:40:42 +0300 Subject: [PATCH 0369/2924] net: phy: allow MDIO bus PM ops to start/stop state machine for phylink-controlled PHY DSA has 2 kinds of drivers: 1. Those who call dsa_switch_suspend() and dsa_switch_resume() from their device PM ops: qca8k-8xxx, bcm_sf2, microchip ksz 2. Those who don't: all others. The above methods should be optional. For type 1, dsa_switch_suspend() calls dsa_user_suspend() -> phylink_stop(), and dsa_switch_resume() calls dsa_user_resume() -> phylink_start(). These seem good candidates for setting mac_managed_pm = true because that is essentially its definition [1], but that does not seem to be the biggest problem for now, and is not what this change focuses on. Talking strictly about the 2nd category of DSA drivers here (which do not have MAC managed PM, meaning that for their attached PHYs, mdio_bus_phy_suspend() and mdio_bus_phy_resume() should run in full), I have noticed that the following warning from mdio_bus_phy_resume() is triggered: WARN_ON(phydev->state != PHY_HALTED && phydev->state != PHY_READY && phydev->state != PHY_UP); because the PHY state machine is running. It's running as a result of a previous dsa_user_open() -> ... -> phylink_start() -> phy_start() having been initiated by the user. The previous mdio_bus_phy_suspend() was supposed to have called phy_stop_machine(), but it didn't. So this is why the PHY is in state PHY_NOLINK by the time mdio_bus_phy_resume() runs. mdio_bus_phy_suspend() did not call phy_stop_machine() because for phylink, the phydev->adjust_link function pointer is NULL. This seems a technicality introduced by commit fddd91016d16 ("phylib: fix PAL state machine restart on resume"). That commit was written before phylink existed, and was intended to avoid crashing with consumer drivers which don't use the PHY state machine - phylink always does, when using a PHY. But phylink itself has historically not been developed with suspend/resume in mind, and apparently not tested too much in that scenario, allowing this bug to exist unnoticed for so long. Plus, prior to the WARN_ON(), it would have likely been invisible. This issue is not in fact restricted to type 2 DSA drivers (according to the above ad-hoc classification), but can be extrapolated to any MAC driver with phylink and MDIO-bus-managed PHY PM ops. DSA is just where the issue was reported. Assuming mac_managed_pm is set correctly, a quick search indicates the following other drivers might be affected: $ grep -Zlr PHYLINK_NETDEV drivers/ | xargs -0 grep -L mac_managed_pm drivers/net/ethernet/atheros/ag71xx.c drivers/net/ethernet/microchip/sparx5/sparx5_main.c drivers/net/ethernet/microchip/lan966x/lan966x_main.c drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c drivers/net/ethernet/freescale/dpaa/dpaa_eth.c drivers/net/ethernet/freescale/ucc_geth.c drivers/net/ethernet/freescale/enetc/enetc_pf_common.c drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c drivers/net/ethernet/marvell/mvneta.c drivers/net/ethernet/marvell/prestera/prestera_main.c drivers/net/ethernet/mediatek/mtk_eth_soc.c drivers/net/ethernet/altera/altera_tse_main.c drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c drivers/net/ethernet/meta/fbnic/fbnic_phylink.c drivers/net/ethernet/tehuti/tn40_phy.c drivers/net/ethernet/mscc/ocelot_net.c Make the existing conditions dependent on the PHY device having a phydev->phy_link_change() implementation equal to the default phy_link_change() provided by phylib. Otherwise, we implicitly know that the phydev has the phylink-provided phylink_phy_change() callback, and when phylink is used, the PHY state machine always needs to be stopped/ started on the suspend/resume path. The code is structured as such that if phydev->phy_link_change() is absent, it is a matter of time until the kernel will crash - no need to further complicate the test. Thus, for the situation where the PM is not managed by the MAC, we will make the MDIO bus PM ops treat identically the phylink-controlled PHYs with the phylib-controlled PHYs where an adjust_link() callback is supplied. In both cases, the MDIO bus PM ops should stop and restart the PHY state machine. [1] https://lore.kernel.org/netdev/Z-1tiW9zjcoFkhwc@shell.armlinux.org.uk/ Fixes: 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state") Reported-by: Wei Fang Tested-by: Wei Fang Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250407094042.2155633-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy_device.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 1367296a33896f..cc1bfd22fb8120 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -257,6 +257,33 @@ static void phy_link_change(struct phy_device *phydev, bool up) phydev->mii_ts->link_state(phydev->mii_ts, phydev); } +/** + * phy_uses_state_machine - test whether consumer driver uses PAL state machine + * @phydev: the target PHY device structure + * + * Ultimately, this aims to indirectly determine whether the PHY is attached + * to a consumer which uses the state machine by calling phy_start() and + * phy_stop(). + * + * When the PHY driver consumer uses phylib, it must have previously called + * phy_connect_direct() or one of its derivatives, so that phy_prepare_link() + * has set up a hook for monitoring state changes. + * + * When the PHY driver is used by the MAC driver consumer through phylink (the + * only other provider of a phy_link_change() method), using the PHY state + * machine is not optional. + * + * Return: true if consumer calls phy_start() and phy_stop(), false otherwise. + */ +static bool phy_uses_state_machine(struct phy_device *phydev) +{ + if (phydev->phy_link_change == phy_link_change) + return phydev->attached_dev && phydev->adjust_link; + + /* phydev->phy_link_change is implicitly phylink_phy_change() */ + return true; +} + static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) { struct device_driver *drv = phydev->mdio.dev.driver; @@ -323,7 +350,7 @@ static __maybe_unused int mdio_bus_phy_suspend(struct device *dev) * may call phy routines that try to grab the same lock, and that may * lead to a deadlock. */ - if (phydev->attached_dev && phydev->adjust_link) + if (phy_uses_state_machine(phydev)) phy_stop_machine(phydev); if (!mdio_bus_phy_may_suspend(phydev)) @@ -377,7 +404,7 @@ static __maybe_unused int mdio_bus_phy_resume(struct device *dev) } } - if (phydev->attached_dev && phydev->adjust_link) + if (phy_uses_state_machine(phydev)) phy_start_machine(phydev); return 0; From 6933cd4714861eea6848f18396a119d741f25fc3 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 8 Apr 2025 11:43:16 +0300 Subject: [PATCH 0370/2924] ipv6: Align behavior across nexthops during path selection A nexthop is only chosen when the calculated multipath hash falls in the nexthop's hash region (i.e., the hash is smaller than the nexthop's hash threshold) and when the nexthop is assigned a non-negative score by rt6_score_route(). Commit 4d0ab3a6885e ("ipv6: Start path selection from the first nexthop") introduced an unintentional difference between the first nexthop and the rest when the score is negative. When the first nexthop matches, but has a negative score, the code will currently evaluate subsequent nexthops until one is found with a non-negative score. On the other hand, when a different nexthop matches, but has a negative score, the code will fallback to the nexthop with which the selection started ('match'). Align the behavior across all nexthops and fallback to 'match' when the first nexthop matches, but has a negative score. Fixes: 3d709f69a3e7 ("ipv6: Use hash-threshold instead of modulo-N") Fixes: 4d0ab3a6885e ("ipv6: Start path selection from the first nexthop") Reported-by: Willem de Bruijn Closes: https://lore.kernel.org/netdev/67efef607bc41_1ddca82948c@willemb.c.googlers.com.notmuch/ Signed-off-by: Ido Schimmel Reviewed-by: Willem de Bruijn Reviewed-by: David Ahern Link: https://patch.msgid.link/20250408084316.243559-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ab12b816ab9485..210b84cecc2427 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -470,10 +470,10 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, goto out; hash = fl6->mp_hash; - if (hash <= atomic_read(&first->fib6_nh->fib_nh_upper_bound) && - rt6_score_route(first->fib6_nh, first->fib6_flags, oif, - strict) >= 0) { - match = first; + if (hash <= atomic_read(&first->fib6_nh->fib_nh_upper_bound)) { + if (rt6_score_route(first->fib6_nh, first->fib6_flags, oif, + strict) >= 0) + match = first; goto out; } From 0bb2f7a1ad1f11d861f58e5ee5051c8974ff9569 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 7 Apr 2025 09:33:11 -0700 Subject: [PATCH 0371/2924] net: Fix null-ptr-deref by sock_lock_init_class_and_name() and rmmod. When I ran the repro [0] and waited a few seconds, I observed two LOCKDEP splats: a warning immediately followed by a null-ptr-deref. [1] Reproduction Steps: 1) Mount CIFS 2) Add an iptables rule to drop incoming FIN packets for CIFS 3) Unmount CIFS 4) Unload the CIFS module 5) Remove the iptables rule At step 3), the CIFS module calls sock_release() for the underlying TCP socket, and it returns quickly. However, the socket remains in FIN_WAIT_1 because incoming FIN packets are dropped. At this point, the module's refcnt is 0 while the socket is still alive, so the following rmmod command succeeds. # ss -tan State Recv-Q Send-Q Local Address:Port Peer Address:Port FIN-WAIT-1 0 477 10.0.2.15:51062 10.0.0.137:445 # lsmod | grep cifs cifs 1159168 0 This highlights a discrepancy between the lifetime of the CIFS module and the underlying TCP socket. Even after CIFS calls sock_release() and it returns, the TCP socket does not die immediately in order to close the connection gracefully. While this is generally fine, it causes an issue with LOCKDEP because CIFS assigns a different lock class to the TCP socket's sk->sk_lock using sock_lock_init_class_and_name(). Once an incoming packet is processed for the socket or a timer fires, sk->sk_lock is acquired. Then, LOCKDEP checks the lock context in check_wait_context(), where hlock_class() is called to retrieve the lock class. However, since the module has already been unloaded, hlock_class() logs a warning and returns NULL, triggering the null-ptr-deref. If LOCKDEP is enabled, we must ensure that a module calling sock_lock_init_class_and_name() (CIFS, NFS, etc) cannot be unloaded while such a socket is still alive to prevent this issue. Let's hold the module reference in sock_lock_init_class_and_name() and release it when the socket is freed in sk_prot_free(). Note that sock_lock_init() clears sk->sk_owner for svc_create_socket() that calls sock_lock_init_class_and_name() for a listening socket, which clones a socket by sk_clone_lock() without GFP_ZERO. [0]: CIFS_SERVER="10.0.0.137" CIFS_PATH="//${CIFS_SERVER}/Users/Administrator/Desktop/CIFS_TEST" DEV="enp0s3" CRED="/root/WindowsCredential.txt" MNT=$(mktemp -d /tmp/XXXXXX) mount -t cifs ${CIFS_PATH} ${MNT} -o vers=3.0,credentials=${CRED},cache=none,echo_interval=1 iptables -A INPUT -s ${CIFS_SERVER} -j DROP for i in $(seq 10); do umount ${MNT} rmmod cifs sleep 1 done rm -r ${MNT} iptables -D INPUT -s ${CIFS_SERVER} -j DROP [1]: DEBUG_LOCKS_WARN_ON(1) WARNING: CPU: 10 PID: 0 at kernel/locking/lockdep.c:234 hlock_class (kernel/locking/lockdep.c:234 kernel/locking/lockdep.c:223) Modules linked in: cifs_arc4 nls_ucs2_utils cifs_md4 [last unloaded: cifs] CPU: 10 UID: 0 PID: 0 Comm: swapper/10 Not tainted 6.14.0 #36 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:hlock_class (kernel/locking/lockdep.c:234 kernel/locking/lockdep.c:223) ... Call Trace: __lock_acquire (kernel/locking/lockdep.c:4853 kernel/locking/lockdep.c:5178) lock_acquire (kernel/locking/lockdep.c:469 kernel/locking/lockdep.c:5853 kernel/locking/lockdep.c:5816) _raw_spin_lock_nested (kernel/locking/spinlock.c:379) tcp_v4_rcv (./include/linux/skbuff.h:1678 ./include/net/tcp.h:2547 net/ipv4/tcp_ipv4.c:2350) ... BUG: kernel NULL pointer dereference, address: 00000000000000c4 PF: supervisor read access in kernel mode PF: error_code(0x0000) - not-present page PGD 0 Oops: Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 10 UID: 0 PID: 0 Comm: swapper/10 Tainted: G W 6.14.0 #36 Tainted: [W]=WARN Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:__lock_acquire (kernel/locking/lockdep.c:4852 kernel/locking/lockdep.c:5178) Code: 15 41 09 c7 41 8b 44 24 20 25 ff 1f 00 00 41 09 c7 8b 84 24 a0 00 00 00 45 89 7c 24 20 41 89 44 24 24 e8 e1 bc ff ff 4c 89 e7 <44> 0f b6 b8 c4 00 00 00 e8 d1 bc ff ff 0f b6 80 c5 00 00 00 88 44 RSP: 0018:ffa0000000468a10 EFLAGS: 00010046 RAX: 0000000000000000 RBX: ff1100010091cc38 RCX: 0000000000000027 RDX: ff1100081f09ca48 RSI: 0000000000000001 RDI: ff1100010091cc88 RBP: ff1100010091c200 R08: ff1100083fe6e228 R09: 00000000ffffbfff R10: ff1100081eca0000 R11: ff1100083fe10dc0 R12: ff1100010091cc88 R13: 0000000000000001 R14: 0000000000000000 R15: 00000000000424b1 FS: 0000000000000000(0000) GS:ff1100081f080000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000c4 CR3: 0000000002c4a003 CR4: 0000000000771ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: lock_acquire (kernel/locking/lockdep.c:469 kernel/locking/lockdep.c:5853 kernel/locking/lockdep.c:5816) _raw_spin_lock_nested (kernel/locking/spinlock.c:379) tcp_v4_rcv (./include/linux/skbuff.h:1678 ./include/net/tcp.h:2547 net/ipv4/tcp_ipv4.c:2350) ip_protocol_deliver_rcu (net/ipv4/ip_input.c:205 (discriminator 1)) ip_local_deliver_finish (./include/linux/rcupdate.h:878 net/ipv4/ip_input.c:234) ip_sublist_rcv_finish (net/ipv4/ip_input.c:576) ip_list_rcv_finish (net/ipv4/ip_input.c:628) ip_list_rcv (net/ipv4/ip_input.c:670) __netif_receive_skb_list_core (net/core/dev.c:5939 net/core/dev.c:5986) netif_receive_skb_list_internal (net/core/dev.c:6040 net/core/dev.c:6129) napi_complete_done (./include/linux/list.h:37 ./include/net/gro.h:519 ./include/net/gro.h:514 net/core/dev.c:6496) e1000_clean (drivers/net/ethernet/intel/e1000/e1000_main.c:3815) __napi_poll.constprop.0 (net/core/dev.c:7191) net_rx_action (net/core/dev.c:7262 net/core/dev.c:7382) handle_softirqs (kernel/softirq.c:561) __irq_exit_rcu (kernel/softirq.c:596 kernel/softirq.c:435 kernel/softirq.c:662) irq_exit_rcu (kernel/softirq.c:680) common_interrupt (arch/x86/kernel/irq.c:280 (discriminator 14)) asm_common_interrupt (./arch/x86/include/asm/idtentry.h:693) RIP: 0010:default_idle (./arch/x86/include/asm/irqflags.h:37 ./arch/x86/include/asm/irqflags.h:92 arch/x86/kernel/process.c:744) Code: 4c 01 c7 4c 29 c2 e9 72 ff ff ff 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa eb 07 0f 00 2d c3 2b 15 00 fb f4 c3 cc cc cc cc 66 66 2e 0f 1f 84 00 00 00 00 00 90 90 90 90 90 RSP: 0018:ffa00000000ffee8 EFLAGS: 00000202 RAX: 000000000000640b RBX: ff1100010091c200 RCX: 0000000000061aa4 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff812f30c5 RBP: 000000000000000a R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000002 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 ? do_idle (kernel/sched/idle.c:186 kernel/sched/idle.c:325) default_idle_call (./include/linux/cpuidle.h:143 kernel/sched/idle.c:118) do_idle (kernel/sched/idle.c:186 kernel/sched/idle.c:325) cpu_startup_entry (kernel/sched/idle.c:422 (discriminator 1)) start_secondary (arch/x86/kernel/smpboot.c:315) common_startup_64 (arch/x86/kernel/head_64.S:421) Modules linked in: cifs_arc4 nls_ucs2_utils cifs_md4 [last unloaded: cifs] CR2: 00000000000000c4 Fixes: ed07536ed673 ("[PATCH] lockdep: annotate nfs/nfsd in-kernel sockets") Signed-off-by: Kuniyuki Iwashima Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250407163313.22682-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 40 ++++++++++++++++++++++++++++++++++++++-- net/core/sock.c | 5 +++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 8daf1b3b12c607..694f954258d437 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -339,6 +339,8 @@ struct sk_filter; * @sk_txtime_unused: unused txtime flags * @ns_tracker: tracker for netns reference * @sk_user_frags: xarray of pages the user is holding a reference on. + * @sk_owner: reference to the real owner of the socket that calls + * sock_lock_init_class_and_name(). */ struct sock { /* @@ -547,6 +549,10 @@ struct sock { struct rcu_head sk_rcu; netns_tracker ns_tracker; struct xarray sk_user_frags; + +#if IS_ENABLED(CONFIG_PROVE_LOCKING) && IS_ENABLED(CONFIG_MODULES) + struct module *sk_owner; +#endif }; struct sock_bh_locked { @@ -1583,6 +1589,35 @@ static inline void sk_mem_uncharge(struct sock *sk, int size) sk_mem_reclaim(sk); } +#if IS_ENABLED(CONFIG_PROVE_LOCKING) && IS_ENABLED(CONFIG_MODULES) +static inline void sk_owner_set(struct sock *sk, struct module *owner) +{ + __module_get(owner); + sk->sk_owner = owner; +} + +static inline void sk_owner_clear(struct sock *sk) +{ + sk->sk_owner = NULL; +} + +static inline void sk_owner_put(struct sock *sk) +{ + module_put(sk->sk_owner); +} +#else +static inline void sk_owner_set(struct sock *sk, struct module *owner) +{ +} + +static inline void sk_owner_clear(struct sock *sk) +{ +} + +static inline void sk_owner_put(struct sock *sk) +{ +} +#endif /* * Macro so as to not evaluate some arguments when * lockdep is not enabled. @@ -1592,13 +1627,14 @@ static inline void sk_mem_uncharge(struct sock *sk, int size) */ #define sock_lock_init_class_and_name(sk, sname, skey, name, key) \ do { \ + sk_owner_set(sk, THIS_MODULE); \ sk->sk_lock.owned = 0; \ init_waitqueue_head(&sk->sk_lock.wq); \ spin_lock_init(&(sk)->sk_lock.slock); \ debug_check_no_locks_freed((void *)&(sk)->sk_lock, \ - sizeof((sk)->sk_lock)); \ + sizeof((sk)->sk_lock)); \ lockdep_set_class_and_name(&(sk)->sk_lock.slock, \ - (skey), (sname)); \ + (skey), (sname)); \ lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \ } while (0) diff --git a/net/core/sock.c b/net/core/sock.c index 323892066def8b..739a79859828f7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2130,6 +2130,8 @@ int sk_getsockopt(struct sock *sk, int level, int optname, */ static inline void sock_lock_init(struct sock *sk) { + sk_owner_clear(sk); + if (sk->sk_kern_sock) sock_lock_init_class_and_name( sk, @@ -2226,6 +2228,9 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) cgroup_sk_free(&sk->sk_cgrp_data); mem_cgroup_sk_free(sk); security_sk_free(sk); + + sk_owner_put(sk); + if (slab != NULL) kmem_cache_free(slab, sk); else From d4bac0288a2b444e468e6df9cb4ed69479ddf14a Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 8 Apr 2025 09:27:48 -0400 Subject: [PATCH 0372/2924] bpf: support SKF_NET_OFF and SKF_LL_OFF on skb frags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Classic BPF socket filters with SKB_NET_OFF and SKB_LL_OFF fail to read when these offsets extend into frags. This has been observed with iwlwifi and reproduced with tun with IFF_NAPI_FRAGS. The below straightforward socket filter on UDP port, applied to a RAW socket, will silently miss matching packets. const int offset_proto = offsetof(struct ip6_hdr, ip6_nxt); const int offset_dport = sizeof(struct ip6_hdr) + offsetof(struct udphdr, dest); struct sock_filter filter_code[] = { BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE), BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4), BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_NET_OFF + offset_proto), BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 0, 2), BPF_STMT(BPF_LD + BPF_H + BPF_ABS, SKF_NET_OFF + offset_dport), This is unexpected behavior. Socket filter programs should be consistent regardless of environment. Silent misses are particularly concerning as hard to detect. Use skb_copy_bits for offsets outside linear, same as done for non-SKF_(LL|NET) offsets. Offset is always positive after subtracting the reference threshold SKB_(LL|NET)_OFF, so is always >= skb_(mac|network)_offset. The sum of the two is an offset against skb->data, and may be negative, but it cannot point before skb->head, as skb_(mac|network)_offset would too. This appears to go back to when frag support was introduced to sk_run_filter in linux-2.4.4, before the introduction of git. The amount of code change and 8/16/32 bit duplication are unfortunate. But any attempt I made to be smarter saved very few LoC while complicating the code. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Link: https://lore.kernel.org/netdev/20250122200402.3461154-1-maze@google.com/ Link: https://elixir.bootlin.com/linux/2.4.4/source/net/core/filter.c#L244 Reported-by: Matt Moeller Co-developed-by: Maciej Żenczykowski Signed-off-by: Maciej Żenczykowski Signed-off-by: Willem de Bruijn Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20250408132833.195491-2-willemdebruijn.kernel@gmail.com Signed-off-by: Alexei Starovoitov --- net/core/filter.c | 80 ++++++++++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 36 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index bc6828761a47c0..79cab4d78dc3c6 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -218,24 +218,36 @@ BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x) return 0; } +static int bpf_skb_load_helper_convert_offset(const struct sk_buff *skb, int offset) +{ + if (likely(offset >= 0)) + return offset; + + if (offset >= SKF_NET_OFF) + return offset - SKF_NET_OFF + skb_network_offset(skb); + + if (offset >= SKF_LL_OFF && skb_mac_header_was_set(skb)) + return offset - SKF_LL_OFF + skb_mac_offset(skb); + + return INT_MIN; +} + BPF_CALL_4(bpf_skb_load_helper_8, const struct sk_buff *, skb, const void *, data, int, headlen, int, offset) { - u8 tmp, *ptr; + u8 tmp; const int len = sizeof(tmp); - if (offset >= 0) { - if (headlen - offset >= len) - return *(u8 *)(data + offset); - if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) - return tmp; - } else { - ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len); - if (likely(ptr)) - return *(u8 *)ptr; - } + offset = bpf_skb_load_helper_convert_offset(skb, offset); + if (offset == INT_MIN) + return -EFAULT; - return -EFAULT; + if (headlen - offset >= len) + return *(u8 *)(data + offset); + if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) + return tmp; + else + return -EFAULT; } BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb, @@ -248,21 +260,19 @@ BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb, BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *, data, int, headlen, int, offset) { - __be16 tmp, *ptr; + __be16 tmp; const int len = sizeof(tmp); - if (offset >= 0) { - if (headlen - offset >= len) - return get_unaligned_be16(data + offset); - if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) - return be16_to_cpu(tmp); - } else { - ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len); - if (likely(ptr)) - return get_unaligned_be16(ptr); - } + offset = bpf_skb_load_helper_convert_offset(skb, offset); + if (offset == INT_MIN) + return -EFAULT; - return -EFAULT; + if (headlen - offset >= len) + return get_unaligned_be16(data + offset); + if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) + return be16_to_cpu(tmp); + else + return -EFAULT; } BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb, @@ -275,21 +285,19 @@ BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb, BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *, data, int, headlen, int, offset) { - __be32 tmp, *ptr; + __be32 tmp; const int len = sizeof(tmp); - if (likely(offset >= 0)) { - if (headlen - offset >= len) - return get_unaligned_be32(data + offset); - if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) - return be32_to_cpu(tmp); - } else { - ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len); - if (likely(ptr)) - return get_unaligned_be32(ptr); - } + offset = bpf_skb_load_helper_convert_offset(skb, offset); + if (offset == INT_MIN) + return -EFAULT; - return -EFAULT; + if (headlen - offset >= len) + return get_unaligned_be32(data + offset); + if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) + return be32_to_cpu(tmp); + else + return -EFAULT; } BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb, From fcd7132cb1f93e4d4594ecb19b8dcecdf0497d9e Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 8 Apr 2025 09:27:49 -0400 Subject: [PATCH 0373/2924] selftests/net: test sk_filter support for SKF_NET_OFF on frags Verify that a classic BPF linux socket filter correctly matches packet contents. Including when accessing contents in an skb_frag. 1. Open a SOCK_RAW socket with a classic BPF filter on UDP dport 8000. 2. Open a tap device with IFF_NAPI_FRAGS to inject skbs with frags. 3. Send a packet for which the UDP header is in frag[0]. 4. Receive this packet to demonstrate that the socket accepted it. Acked-by: Stanislav Fomichev Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20250408132833.195491-3-willemdebruijn.kernel@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/net/.gitignore | 1 + tools/testing/selftests/net/Makefile | 2 + tools/testing/selftests/net/skf_net_off.c | 244 +++++++++++++++++++++ tools/testing/selftests/net/skf_net_off.sh | 30 +++ 4 files changed, 277 insertions(+) create mode 100644 tools/testing/selftests/net/skf_net_off.c create mode 100755 tools/testing/selftests/net/skf_net_off.sh diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 679542f565a484..532bb732bc6dd3 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -39,6 +39,7 @@ scm_rights sk_bind_sendto_listen sk_connect_zero_addr sk_so_peek_off +skf_net_off socket so_incoming_cpu so_netns_cookie diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 6d718b478ed83f..124078b56fa447 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -106,6 +106,8 @@ TEST_PROGS += ipv6_route_update_soft_lockup.sh TEST_PROGS += busy_poll_test.sh TEST_GEN_PROGS += proc_net_pktgen TEST_PROGS += lwt_dst_cache_ref_loop.sh +TEST_PROGS += skf_net_off.sh +TEST_GEN_FILES += skf_net_off # YNL files, must be before "include ..lib.mk" YNL_GEN_FILES := busy_poller netlink-dumps diff --git a/tools/testing/selftests/net/skf_net_off.c b/tools/testing/selftests/net/skf_net_off.c new file mode 100644 index 00000000000000..1fdf61d6cd7fe9 --- /dev/null +++ b/tools/testing/selftests/net/skf_net_off.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Open a tun device. + * + * [modifications: use IFF_NAPI_FRAGS, add sk filter] + * + * Expects the device to have been configured previously, e.g.: + * sudo ip tuntap add name tap1 mode tap + * sudo ip link set tap1 up + * sudo ip link set dev tap1 addr 02:00:00:00:00:01 + * sudo ip -6 addr add fdab::1 peer fdab::2 dev tap1 nodad + * + * And to avoid premature pskb_may_pull: + * + * sudo ethtool -K tap1 gro off + * sudo bash -c 'echo 0 > /proc/sys/net/ipv4/ip_early_demux' + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static bool cfg_do_filter; +static bool cfg_do_frags; +static int cfg_dst_port = 8000; +static char *cfg_ifname; + +static int tun_open(const char *tun_name) +{ + struct ifreq ifr = {0}; + int fd, ret; + + fd = open("/dev/net/tun", O_RDWR); + if (fd == -1) + error(1, errno, "open /dev/net/tun"); + + ifr.ifr_flags = IFF_TAP; + if (cfg_do_frags) + ifr.ifr_flags |= IFF_NAPI | IFF_NAPI_FRAGS; + + strncpy(ifr.ifr_name, tun_name, IFNAMSIZ - 1); + + ret = ioctl(fd, TUNSETIFF, &ifr); + if (ret) + error(1, ret, "ioctl TUNSETIFF"); + + return fd; +} + +static void sk_set_filter(int fd) +{ + const int offset_proto = offsetof(struct ip6_hdr, ip6_nxt); + const int offset_dport = sizeof(struct ip6_hdr) + offsetof(struct udphdr, dest); + + /* Filter UDP packets with destination port cfg_dst_port */ + struct sock_filter filter_code[] = { + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4), + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_NET_OFF + offset_proto), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 0, 2), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, SKF_NET_OFF + offset_dport), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dst_port, 1, 0), + BPF_STMT(BPF_RET + BPF_K, 0), + BPF_STMT(BPF_RET + BPF_K, 0xFFFF), + }; + + struct sock_fprog filter = { + sizeof(filter_code) / sizeof(filter_code[0]), + filter_code, + }; + + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof(filter))) + error(1, errno, "setsockopt attach filter"); +} + +static int raw_open(void) +{ + int fd; + + fd = socket(PF_INET6, SOCK_RAW, IPPROTO_UDP); + if (fd == -1) + error(1, errno, "socket raw (udp)"); + + if (cfg_do_filter) + sk_set_filter(fd); + + return fd; +} + +static void tun_write(int fd) +{ + const char eth_src[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x02 }; + const char eth_dst[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x01 }; + struct tun_pi pi = {0}; + struct ipv6hdr ip6h = {0}; + struct udphdr uh = {0}; + struct ethhdr eth = {0}; + uint32_t payload; + struct iovec iov[5]; + int ret; + + pi.proto = htons(ETH_P_IPV6); + + memcpy(eth.h_source, eth_src, sizeof(eth_src)); + memcpy(eth.h_dest, eth_dst, sizeof(eth_dst)); + eth.h_proto = htons(ETH_P_IPV6); + + ip6h.version = 6; + ip6h.payload_len = htons(sizeof(uh) + sizeof(uint32_t)); + ip6h.nexthdr = IPPROTO_UDP; + ip6h.hop_limit = 8; + if (inet_pton(AF_INET6, "fdab::2", &ip6h.saddr) != 1) + error(1, errno, "inet_pton src"); + if (inet_pton(AF_INET6, "fdab::1", &ip6h.daddr) != 1) + error(1, errno, "inet_pton src"); + + uh.source = htons(8000); + uh.dest = htons(cfg_dst_port); + uh.len = ip6h.payload_len; + uh.check = 0; + + payload = htonl(0xABABABAB); /* Covered in IPv6 length */ + + iov[0].iov_base = π + iov[0].iov_len = sizeof(pi); + iov[1].iov_base = ð + iov[1].iov_len = sizeof(eth); + iov[2].iov_base = &ip6h; + iov[2].iov_len = sizeof(ip6h); + iov[3].iov_base = &uh; + iov[3].iov_len = sizeof(uh); + iov[4].iov_base = &payload; + iov[4].iov_len = sizeof(payload); + + ret = writev(fd, iov, sizeof(iov) / sizeof(iov[0])); + if (ret <= 0) + error(1, errno, "writev"); +} + +static void raw_read(int fd) +{ + struct timeval tv = { .tv_usec = 100 * 1000 }; + struct msghdr msg = {0}; + struct iovec iov[2]; + struct udphdr uh; + uint32_t payload[2]; + int ret; + + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) + error(1, errno, "setsockopt rcvtimeo udp"); + + iov[0].iov_base = &uh; + iov[0].iov_len = sizeof(uh); + + iov[1].iov_base = payload; + iov[1].iov_len = sizeof(payload); + + msg.msg_iov = iov; + msg.msg_iovlen = sizeof(iov) / sizeof(iov[0]); + + ret = recvmsg(fd, &msg, 0); + if (ret <= 0) + error(1, errno, "read raw"); + if (ret != sizeof(uh) + sizeof(payload[0])) + error(1, errno, "read raw: len=%d\n", ret); + + fprintf(stderr, "raw recv: 0x%x\n", payload[0]); +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "fFi:")) != -1) { + switch (c) { + case 'f': + cfg_do_filter = true; + printf("bpf filter enabled\n"); + break; + case 'F': + cfg_do_frags = true; + printf("napi frags mode enabled\n"); + break; + case 'i': + cfg_ifname = optarg; + break; + default: + error(1, 0, "unknown option %c", optopt); + break; + } + } + + if (!cfg_ifname) + error(1, 0, "must specify tap interface name (-i)"); +} + +int main(int argc, char **argv) +{ + int fdt, fdr; + + parse_opts(argc, argv); + + fdr = raw_open(); + fdt = tun_open(cfg_ifname); + + tun_write(fdt); + raw_read(fdr); + + if (close(fdt)) + error(1, errno, "close tun"); + if (close(fdr)) + error(1, errno, "close udp"); + + fprintf(stderr, "OK\n"); + return 0; +} + diff --git a/tools/testing/selftests/net/skf_net_off.sh b/tools/testing/selftests/net/skf_net_off.sh new file mode 100755 index 00000000000000..5da5066fb46538 --- /dev/null +++ b/tools/testing/selftests/net/skf_net_off.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +readonly NS="ns-$(mktemp -u XXXXXX)" + +cleanup() { + ip netns del $NS +} + +ip netns add $NS +trap cleanup EXIT + +ip -netns $NS link set lo up +ip -netns $NS tuntap add name tap1 mode tap +ip -netns $NS link set tap1 up +ip -netns $NS link set dev tap1 addr 02:00:00:00:00:01 +ip -netns $NS -6 addr add fdab::1 peer fdab::2 dev tap1 nodad +ip netns exec $NS ethtool -K tap1 gro off + +# disable early demux, else udp_v6_early_demux pulls udp header into linear +ip netns exec $NS sysctl -w net.ipv4.ip_early_demux=0 + +echo "no filter" +ip netns exec $NS ./skf_net_off -i tap1 + +echo "filter, linear skb (-f)" +ip netns exec $NS ./skf_net_off -i tap1 -f + +echo "filter, fragmented skb (-f) (-F)" +ip netns exec $NS ./skf_net_off -i tap1 -f -F From 9992649f6786921873a9b89dafa5e04d8c5fef2b Mon Sep 17 00:00:00 2001 From: Henry Martin Date: Wed, 9 Apr 2025 20:48:13 +0800 Subject: [PATCH 0374/2924] cpufreq: apple-soc: Fix null-ptr-deref in apple_soc_cpufreq_get_rate() cpufreq_cpu_get_raw() can return NULL when the target CPU is not present in the policy->cpus mask. apple_soc_cpufreq_get_rate() does not check for this case, which results in a NULL pointer dereference. Fixes: 6286bbb40576 ("cpufreq: apple-soc: Add new driver to control Apple SoC CPU P-states") Signed-off-by: Henry Martin Signed-off-by: Viresh Kumar --- drivers/cpufreq/apple-soc-cpufreq.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/apple-soc-cpufreq.c b/drivers/cpufreq/apple-soc-cpufreq.c index 4994c86feb5738..b1d29b7af23262 100644 --- a/drivers/cpufreq/apple-soc-cpufreq.c +++ b/drivers/cpufreq/apple-soc-cpufreq.c @@ -134,11 +134,17 @@ static const struct of_device_id apple_soc_cpufreq_of_match[] __maybe_unused = { static unsigned int apple_soc_cpufreq_get_rate(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); - struct apple_cpu_priv *priv = policy->driver_data; + struct cpufreq_policy *policy; + struct apple_cpu_priv *priv; struct cpufreq_frequency_table *p; unsigned int pstate; + policy = cpufreq_cpu_get_raw(cpu); + if (unlikely(!policy)) + return 0; + + priv = policy->driver_data; + if (priv->info->cur_pstate_mask) { u32 reg = readl_relaxed(priv->reg_base + APPLE_DVFS_STATUS); From 484d3f15cc6cbaa52541d6259778e715b2c83c54 Mon Sep 17 00:00:00 2001 From: Henry Martin Date: Tue, 8 Apr 2025 23:03:53 +0800 Subject: [PATCH 0375/2924] cpufreq: scmi: Fix null-ptr-deref in scmi_cpufreq_get_rate() cpufreq_cpu_get_raw() can return NULL when the target CPU is not present in the policy->cpus mask. scmi_cpufreq_get_rate() does not check for this case, which results in a NULL pointer dereference. Add NULL check after cpufreq_cpu_get_raw() to prevent this issue. Fixes: 99d6bdf33877 ("cpufreq: add support for CPU DVFS based on SCMI message protocol") Signed-off-by: Henry Martin Acked-by: Sudeep Holla Signed-off-by: Viresh Kumar --- drivers/cpufreq/scmi-cpufreq.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index c310aeebc8f36b..944e899eb1be13 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -37,11 +37,17 @@ static struct cpufreq_driver scmi_cpufreq_driver; static unsigned int scmi_cpufreq_get_rate(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); - struct scmi_data *priv = policy->driver_data; + struct cpufreq_policy *policy; + struct scmi_data *priv; unsigned long rate; int ret; + policy = cpufreq_cpu_get_raw(cpu); + if (unlikely(!policy)) + return 0; + + priv = policy->driver_data; + ret = perf_ops->freq_get(ph, priv->domain_id, &rate, false); if (ret) return 0; From 73b24dc731731edf762f9454552cb3a5b7224949 Mon Sep 17 00:00:00 2001 From: Henry Martin Date: Tue, 8 Apr 2025 23:03:54 +0800 Subject: [PATCH 0376/2924] cpufreq: scpi: Fix null-ptr-deref in scpi_cpufreq_get_rate() cpufreq_cpu_get_raw() can return NULL when the target CPU is not present in the policy->cpus mask. scpi_cpufreq_get_rate() does not check for this case, which results in a NULL pointer dereference. Fixes: 343a8d17fa8d ("cpufreq: scpi: remove arm_big_little dependency") Signed-off-by: Henry Martin Acked-by: Sudeep Holla Signed-off-by: Viresh Kumar --- drivers/cpufreq/scpi-cpufreq.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c index 17cda84f00dfb9..dcbb0ae7dd476c 100644 --- a/drivers/cpufreq/scpi-cpufreq.c +++ b/drivers/cpufreq/scpi-cpufreq.c @@ -29,9 +29,16 @@ static struct scpi_ops *scpi_ops; static unsigned int scpi_cpufreq_get_rate(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); - struct scpi_data *priv = policy->driver_data; - unsigned long rate = clk_get_rate(priv->clk); + struct cpufreq_policy *policy; + struct scpi_data *priv; + unsigned long rate; + + policy = cpufreq_cpu_get_raw(cpu); + if (unlikely(!policy)) + return 0; + + priv = policy->driver_data; + rate = clk_get_rate(priv->clk); return rate / 1000; } From 56c283b9e001098362c76547cfaae022d48549c8 Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 9 Apr 2025 22:45:05 -0500 Subject: [PATCH 0377/2924] smb3: Add defines for two new FileSystemAttributes Two new file system attributes were recently added. See MS-FSCC 2.5.1: FILE_SUPPORTS_POSIX_UNLINK_RENAME and FILE_RETURNS_CLEANUP_RESULT_INFO Update the missing defines for ksmbd and cifs.ko Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/client/cifspdu.h | 2 ++ fs/smb/server/smb_common.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h index 48d0d6f439cf45..18d67ab113f0ec 100644 --- a/fs/smb/client/cifspdu.h +++ b/fs/smb/client/cifspdu.h @@ -2256,6 +2256,8 @@ typedef struct { #define FILE_SUPPORTS_ENCRYPTION 0x00020000 #define FILE_SUPPORTS_OBJECT_IDS 0x00010000 #define FILE_VOLUME_IS_COMPRESSED 0x00008000 +#define FILE_SUPPORTS_POSIX_UNLINK_RENAME 0x00000400 +#define FILE_RETURNS_CLEANUP_RESULT_INFO 0x00000200 #define FILE_SUPPORTS_REMOTE_STORAGE 0x00000100 #define FILE_SUPPORTS_REPARSE_POINTS 0x00000080 #define FILE_SUPPORTS_SPARSE_FILES 0x00000040 diff --git a/fs/smb/server/smb_common.h b/fs/smb/server/smb_common.h index a3d8a905b07e07..d742ba754348bb 100644 --- a/fs/smb/server/smb_common.h +++ b/fs/smb/server/smb_common.h @@ -72,6 +72,8 @@ #define FILE_SUPPORTS_ENCRYPTION 0x00020000 #define FILE_SUPPORTS_OBJECT_IDS 0x00010000 #define FILE_VOLUME_IS_COMPRESSED 0x00008000 +#define FILE_SUPPORTS_POSIX_UNLINK_RENAME 0x00000400 +#define FILE_RETURNS_CLEANUP_RESULT_INFO 0x00000200 #define FILE_SUPPORTS_REMOTE_STORAGE 0x00000100 #define FILE_SUPPORTS_REPARSE_POINTS 0x00000080 #define FILE_SUPPORTS_SPARSE_FILES 0x00000040 From f5ffef9881a76764477978c39f1ad0136a4adcab Mon Sep 17 00:00:00 2001 From: Bo Liu Date: Thu, 10 Apr 2025 00:20:47 -0400 Subject: [PATCH 0378/2924] erofs: remove duplicate code Remove duplicate code in function z_erofs_register_pcluster() Signed-off-by: Bo Liu Reviewed-by: Gao Xiang Link: https://lore.kernel.org/r/20250410042048.3044-2-liubo03@inspur.com Signed-off-by: Gao Xiang --- fs/erofs/zdata.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 0671184d9cf1ae..5c061aaeeb45b3 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -725,7 +725,6 @@ static int z_erofs_register_pcluster(struct z_erofs_frontend *fe) lockref_init(&pcl->lockref); /* one ref for this request */ pcl->algorithmformat = map->m_algorithmformat; pcl->pclustersize = map->m_plen; - pcl->pageofs_in = pageofs_in; pcl->length = 0; pcl->partial = true; pcl->next = fe->head; From fd15594ba7d559d9da741504c322b9f57c4981e5 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 9 Apr 2025 13:22:39 +0100 Subject: [PATCH 0379/2924] soundwire: bus: Fix race on the creation of the IRQ domain The SoundWire IRQ domain needs to be created before any slaves are added to the bus, such that the domain is always available when needed. Move the call to sdw_irq_create() before the calls to sdw_acpi_find_slaves() and sdw_of_find_slaves(). Fixes: 12a95123bfe1 ("soundwire: bus: Allow SoundWire peripherals to register IRQ handlers") Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20250409122239.1396489-1-ckeepax@opensource.cirrus.com Signed-off-by: Vinod Koul --- drivers/soundwire/bus.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c index 6f8a20014e76d4..39aecd34c6414b 100644 --- a/drivers/soundwire/bus.c +++ b/drivers/soundwire/bus.c @@ -122,6 +122,10 @@ int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent, set_bit(SDW_GROUP13_DEV_NUM, bus->assigned); set_bit(SDW_MASTER_DEV_NUM, bus->assigned); + ret = sdw_irq_create(bus, fwnode); + if (ret) + return ret; + /* * SDW is an enumerable bus, but devices can be powered off. So, * they won't be able to report as present. @@ -138,6 +142,7 @@ int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent, if (ret < 0) { dev_err(bus->dev, "Finding slaves failed:%d\n", ret); + sdw_irq_delete(bus); return ret; } @@ -156,10 +161,6 @@ int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent, bus->params.curr_bank = SDW_BANK0; bus->params.next_bank = SDW_BANK1; - ret = sdw_irq_create(bus, fwnode); - if (ret) - return ret; - return 0; } EXPORT_SYMBOL(sdw_bus_master_add); From 210db264cf87da8908c395b44170f04469009035 Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Wed, 12 Mar 2025 14:42:10 +0800 Subject: [PATCH 0380/2924] drm/rockchip: vop2: Fix interface enable/mux setting of DP1 on rk3588 This is a copy-paste error, which affects DP1 usage. Fixes: 328e6885996c ("drm/rockchip: vop2: Add platform specific callback") Signed-off-by: Andy Yan Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20250312064218.524143-1-andyshrk@163.com --- drivers/gpu/drm/rockchip/rockchip_vop2_reg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c index 14958d6b3d2e70..0a2840cbe8e22d 100644 --- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c +++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c @@ -1754,9 +1754,9 @@ static unsigned long rk3588_set_intf_mux(struct vop2_video_port *vp, int id, u32 dip |= FIELD_PREP(RK3588_DSP_IF_POL__DP0_PIN_POL, polflags); break; case ROCKCHIP_VOP2_EP_DP1: - die &= ~RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX; - die |= RK3588_SYS_DSP_INFACE_EN_MIPI1 | - FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX, vp->id); + die &= ~RK3588_SYS_DSP_INFACE_EN_DP1_MUX; + die |= RK3588_SYS_DSP_INFACE_EN_DP1 | + FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_DP1_MUX, vp->id); dip &= ~RK3588_DSP_IF_POL__DP1_PIN_POL; dip |= FIELD_PREP(RK3588_DSP_IF_POL__DP1_PIN_POL, polflags); break; From 1d34597a1e23004c7dd0ab5f58ba1ef95fd9ded5 Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Mon, 17 Mar 2025 18:27:53 +0800 Subject: [PATCH 0381/2924] drm/rockchip: dw_hdmi_qp: Fix io init for dw_hdmi_qp_rockchip_resume Use cfg->ctrl_ops->io_init callback make it work for all platform. And it's also gets rid of code duplication Fixes: 3f60dbd40d3f ("drm/rockchip: dw_hdmi_qp: Add platform ctrl callback") Signed-off-by: Andy Yan Reviewed-by: Sebastian Reichel Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20250317102757.565679-1-andyshrk@163.com --- .../gpu/drm/rockchip/dw_hdmi_qp-rockchip.c | 23 +++---------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c index 3d1dddb346035c..7d531b6f4c098c 100644 --- a/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c @@ -94,6 +94,7 @@ struct rockchip_hdmi_qp { struct gpio_desc *enable_gpio; struct delayed_work hpd_work; int port_id; + const struct rockchip_hdmi_qp_ctrl_ops *ctrl_ops; }; struct rockchip_hdmi_qp_ctrl_ops { @@ -461,6 +462,7 @@ static int dw_hdmi_qp_rockchip_bind(struct device *dev, struct device *master, return -ENODEV; } + hdmi->ctrl_ops = cfg->ctrl_ops; hdmi->dev = &pdev->dev; hdmi->port_id = -ENODEV; @@ -600,27 +602,8 @@ static void dw_hdmi_qp_rockchip_remove(struct platform_device *pdev) static int __maybe_unused dw_hdmi_qp_rockchip_resume(struct device *dev) { struct rockchip_hdmi_qp *hdmi = dev_get_drvdata(dev); - u32 val; - val = HIWORD_UPDATE(RK3588_SCLIN_MASK, RK3588_SCLIN_MASK) | - HIWORD_UPDATE(RK3588_SDAIN_MASK, RK3588_SDAIN_MASK) | - HIWORD_UPDATE(RK3588_MODE_MASK, RK3588_MODE_MASK) | - HIWORD_UPDATE(RK3588_I2S_SEL_MASK, RK3588_I2S_SEL_MASK); - regmap_write(hdmi->vo_regmap, - hdmi->port_id ? RK3588_GRF_VO1_CON6 : RK3588_GRF_VO1_CON3, - val); - - val = HIWORD_UPDATE(RK3588_SET_HPD_PATH_MASK, - RK3588_SET_HPD_PATH_MASK); - regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON7, val); - - if (hdmi->port_id) - val = HIWORD_UPDATE(RK3588_HDMI1_GRANT_SEL, - RK3588_HDMI1_GRANT_SEL); - else - val = HIWORD_UPDATE(RK3588_HDMI0_GRANT_SEL, - RK3588_HDMI0_GRANT_SEL); - regmap_write(hdmi->vo_regmap, RK3588_GRF_VO1_CON9, val); + hdmi->ctrl_ops->io_init(hdmi); dw_hdmi_qp_resume(dev, hdmi->hdmi); From c8c59bcac9300c22abf1afd7e236978a413c3644 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 9 Apr 2025 15:50:46 -0500 Subject: [PATCH 0382/2924] arm64: dts: rockchip: Use "regulator-fixed" for btreg on px30-engicam for vcc3v3-btreg The vcc3v3-btreg regulator only has 1 state and no state gpios defined, so "regulator-gpio" is not the correct binding to use. "regulator-fixed" is the correct binding to use. It supports an enable GPIO which is needed in this case. Signed-off-by: "Rob Herring (Arm)" Link: https://lore.kernel.org/r/20250409205047.1522943-1-robh@kernel.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/px30-engicam-common.dtsi | 3 +-- arch/arm64/boot/dts/rockchip/px30-engicam-ctouch2.dtsi | 2 +- .../boot/dts/rockchip/px30-engicam-px30-core-edimm2.2.dts | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/px30-engicam-common.dtsi b/arch/arm64/boot/dts/rockchip/px30-engicam-common.dtsi index 1edfd643b25ae8..a334ef0629d1bb 100644 --- a/arch/arm64/boot/dts/rockchip/px30-engicam-common.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30-engicam-common.dtsi @@ -31,7 +31,7 @@ }; vcc3v3_btreg: vcc3v3-btreg { - compatible = "regulator-gpio"; + compatible = "regulator-fixed"; enable-active-high; pinctrl-names = "default"; pinctrl-0 = <&bt_enable_h>; @@ -39,7 +39,6 @@ regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; regulator-always-on; - states = <3300000 0x0>; }; vcc3v3_rf_aux_mod: regulator-vcc3v3-rf-aux-mod { diff --git a/arch/arm64/boot/dts/rockchip/px30-engicam-ctouch2.dtsi b/arch/arm64/boot/dts/rockchip/px30-engicam-ctouch2.dtsi index 80db778c968483..b60e68faa83aa9 100644 --- a/arch/arm64/boot/dts/rockchip/px30-engicam-ctouch2.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30-engicam-ctouch2.dtsi @@ -26,5 +26,5 @@ }; &vcc3v3_btreg { - enable-gpios = <&gpio1 RK_PC3 GPIO_ACTIVE_HIGH>; + gpios = <&gpio1 RK_PC3 GPIO_ACTIVE_HIGH>; }; diff --git a/arch/arm64/boot/dts/rockchip/px30-engicam-px30-core-edimm2.2.dts b/arch/arm64/boot/dts/rockchip/px30-engicam-px30-core-edimm2.2.dts index 165d09ccb94244..5886b802c5202b 100644 --- a/arch/arm64/boot/dts/rockchip/px30-engicam-px30-core-edimm2.2.dts +++ b/arch/arm64/boot/dts/rockchip/px30-engicam-px30-core-edimm2.2.dts @@ -39,5 +39,5 @@ }; &vcc3v3_btreg { - enable-gpios = <&gpio1 RK_PC2 GPIO_ACTIVE_HIGH>; + gpios = <&gpio1 RK_PC2 GPIO_ACTIVE_HIGH>; }; From 6833cbdc733c9e1088fe9936b2dad95cc7d4c580 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 9 Apr 2025 15:50:39 -0500 Subject: [PATCH 0383/2924] arm64: dts: rockchip: Fix mmc-pwrseq clock name on rock-pi-4 The defined name for "mmc-pwrseq-simple" clock is "ext_clock". Signed-off-by: "Rob Herring (Arm)" Link: https://lore.kernel.org/r/20250409205040.1522754-1-robh@kernel.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi index 541dca12bf1a1f..046dbe32901786 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi @@ -43,7 +43,7 @@ sdio_pwrseq: sdio-pwrseq { compatible = "mmc-pwrseq-simple"; clocks = <&rk808 1>; - clock-names = "lpo"; + clock-names = "ext_clock"; pinctrl-names = "default"; pinctrl-0 = <&wifi_enable_h>; reset-gpios = <&gpio0 RK_PB2 GPIO_ACTIVE_LOW>; From acea9943271b62905033f2f8ca571cdd52d6ea7b Mon Sep 17 00:00:00 2001 From: Peng Jiang Date: Mon, 24 Mar 2025 19:12:30 +0800 Subject: [PATCH 0384/2924] vdso: Address variable shadowing in macros Compiling the kernel with gcc12.3 W=2 results in shadowing warnings: warning: declaration of '__pptr' shadows a previous local [-Wshadow] const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); note: in definition of macro '__put_unaligned_t' __pptr->x = (val); note: in expansion of macro '__get_unaligned_t' __put_unaligned_t(type, __get_unaligned_t(type, src), dst); __get_unaligned_t() and __put_unaligned_t() use a local variable named '__pptr', which can lead to variable shadowing when these macros are used in the same scope. This results in a -Wshadow warning during compilation. To address this issue, rename the local variables within the macros to ensure uniqueness. Signed-off-by: Peng Jiang Signed-off-by: Shao Mingyin Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250324191230477zpGtgIRSH4mEHdtxGtgx9@zte.com.cn --- include/vdso/unaligned.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/vdso/unaligned.h b/include/vdso/unaligned.h index eee3d2a4dbe4d3..ff0c06b6513eff 100644 --- a/include/vdso/unaligned.h +++ b/include/vdso/unaligned.h @@ -2,14 +2,14 @@ #ifndef __VDSO_UNALIGNED_H #define __VDSO_UNALIGNED_H -#define __get_unaligned_t(type, ptr) ({ \ - const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x; \ +#define __get_unaligned_t(type, ptr) ({ \ + const struct { type x; } __packed * __get_pptr = (typeof(__get_pptr))(ptr); \ + __get_pptr->x; \ }) -#define __put_unaligned_t(type, val, ptr) do { \ - struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x = (val); \ +#define __put_unaligned_t(type, val, ptr) do { \ + struct { type x; } __packed * __put_pptr = (typeof(__put_pptr))(ptr); \ + __put_pptr->x = (val); \ } while (0) #endif /* __VDSO_UNALIGNED_H */ From aabc6596ffb377c4c9c8f335124b92ea282c9821 Mon Sep 17 00:00:00 2001 From: Arnaud Lecomte Date: Tue, 8 Apr 2025 17:55:08 +0200 Subject: [PATCH 0385/2924] net: ppp: Add bound checking for skb data on ppp_sync_txmung MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ensure we have enough data in linear buffer from skb before accessing initial bytes. This prevents potential out-of-bounds accesses when processing short packets. When ppp_sync_txmung receives an incoming package with an empty payload: (remote) gef➤ p *(struct pppoe_hdr *) (skb->head + skb->network_header) $18 = { type = 0x1, ver = 0x1, code = 0x0, sid = 0x2, length = 0x0, tag = 0xffff8880371cdb96 } from the skb struct (trimmed) tail = 0x16, end = 0x140, head = 0xffff88803346f400 "4", data = 0xffff88803346f416 ":\377", truesize = 0x380, len = 0x0, data_len = 0x0, mac_len = 0xe, hdr_len = 0x0, it is not safe to access data[2]. Reported-by: syzbot+29fc8991b0ecb186cf40@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=29fc8991b0ecb186cf40 Tested-by: syzbot+29fc8991b0ecb186cf40@syzkaller.appspotmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Arnaud Lecomte Link: https://patch.msgid.link/20250408-bound-checking-ppp_txmung-v2-1-94bb6e1b92d0@arnaud-lcm.com [pabeni@redhat.com: fixed subj typo] Signed-off-by: Paolo Abeni --- drivers/net/ppp/ppp_synctty.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c index 644e99fc3623f5..9c4932198931f3 100644 --- a/drivers/net/ppp/ppp_synctty.c +++ b/drivers/net/ppp/ppp_synctty.c @@ -506,6 +506,11 @@ ppp_sync_txmunge(struct syncppp *ap, struct sk_buff *skb) unsigned char *data; int islcp; + /* Ensure we can safely access protocol field and LCP code */ + if (!pskb_may_pull(skb, 3)) { + kfree_skb(skb); + return NULL; + } data = skb->data; proto = get_unaligned_be16(data); From e042ed950d4e176379ba4c0722146cd96fb38aa2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 7 Apr 2025 19:40:18 +0200 Subject: [PATCH 0386/2924] nft_set_pipapo: fix incorrect avx2 match of 5th field octet Given a set element like: icmpv6 . dead:beef:00ff::1 The value of 'ff' is irrelevant, any address will be matched as long as the other octets are the same. This is because of too-early register clobbering: ymm7 is reloaded with new packet data (pkt[9]) but it still holds data of an earlier load that wasn't processed yet. The existing tests in nft_concat_range.sh selftests do exercise this code path, but do not trigger incorrect matching due to the network prefix limitation. Fixes: 7400b063969b ("nft_set_pipapo: Introduce AVX2-based lookup implementation") Reported-by: sontu mazumdar Closes: https://lore.kernel.org/netfilter/CANgxkqwnMH7fXra+VUfODT-8+qFLgskq3set1cAzqqJaV4iEZg@mail.gmail.com/T/#t Reviewed-by: Stefano Brivio Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_pipapo_avx2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index b8d3c3213efee5..c15db28c5ebc43 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -994,8 +994,9 @@ static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill, NFT_PIPAPO_AVX2_BUCKET_LOAD8(5, lt, 8, pkt[8], bsize); NFT_PIPAPO_AVX2_AND(6, 2, 3); + NFT_PIPAPO_AVX2_AND(3, 4, 7); NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt, 9, pkt[9], bsize); - NFT_PIPAPO_AVX2_AND(0, 4, 5); + NFT_PIPAPO_AVX2_AND(0, 3, 5); NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 10, pkt[10], bsize); NFT_PIPAPO_AVX2_AND(2, 6, 7); NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 11, pkt[11], bsize); From 27eb86e22f1067a39f05e8878fd83f00e3311dc3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 7 Apr 2025 19:40:19 +0200 Subject: [PATCH 0387/2924] selftests: netfilter: add test case for recent mismatch bug Without 'nft_set_pipapo: fix incorrect avx2 match of 5th field octet" this fails: TEST: reported issues Add two elements, flush, re-add 1s [ OK ] net,mac with reload 0s [ OK ] net,port,proto 3s [ OK ] avx2 false match 0s [FAIL] False match for fe80:dead:01fe:0a02:0b03:6007:8009:a001 Other tests do not detect the kernel bug as they only alter parts in the /64 netmask. Reviewed-by: Stefano Brivio Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- .../net/netfilter/nft_concat_range.sh | 39 ++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh index 47088b00539042..1f5979c1510c3c 100755 --- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh @@ -27,7 +27,7 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto net6_port_net6_port net_port_mac_proto_net" # Reported bugs, also described by TYPE_ variables below -BUGS="flush_remove_add reload net_port_proto_match" +BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch" # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" @@ -387,6 +387,25 @@ race_repeat 0 perf_duration 0 " + +TYPE_avx2_mismatch=" +display avx2 false match +type_spec inet_proto . ipv6_addr +chain_spec meta l4proto . ip6 daddr +dst proto addr6 +src +start 1 +count 1 +src_delta 1 +tools ping +proto icmp6 + +race_repeat 0 + +perf_duration 0 +" + + # Set template for all tests, types and rules are filled in depending on test set_template=' flush ruleset @@ -1629,6 +1648,24 @@ test_bug_net_port_proto_match() { nft flush ruleset } +test_bug_avx2_mismatch() +{ + setup veth send_"${proto}" set || return ${ksft_skip} + + local a1="fe80:dead:01ff:0a02:0b03:6007:8009:a001" + local a2="fe80:dead:01fe:0a02:0b03:6007:8009:a001" + + nft "add element inet filter test { icmpv6 . $a1 }" + + dst_addr6="$a2" + send_icmp6 + + if [ "$(count_packets)" -gt "0" ]; then + err "False match for $a2" + return 1 + fi +} + test_reported_issues() { eval test_bug_"${subtest}" } From 7f33f247138554b84729688169dfbe87724b70ef Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 10 Apr 2025 11:37:12 +0100 Subject: [PATCH 0388/2924] MAINTAINERS: use kernel.org alias My Linaro email will stop working soon. Use @kernel.org email instead. Signed-off-by: Srinivas Kandagatla Link: https://patch.msgid.link/20250410103713.24875-2-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- MAINTAINERS | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 96b82704950184..f9a9d3dced6745 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17368,7 +17368,7 @@ T: git git://git.infradead.org/nvme.git F: drivers/nvme/target/ NVMEM FRAMEWORK -M: Srinivas Kandagatla +M: Srinivas Kandagatla S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/srini/nvmem.git F: Documentation/ABI/stable/sysfs-bus-nvmem @@ -19573,7 +19573,7 @@ S: Supported F: drivers/crypto/intel/qat/ QCOM AUDIO (ASoC) DRIVERS -M: Srinivas Kandagatla +M: Srinivas Kandagatla L: linux-sound@vger.kernel.org L: linux-arm-msm@vger.kernel.org S: Supported @@ -19873,7 +19873,7 @@ F: Documentation/devicetree/bindings/net/qcom,ethqos.yaml F: drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c QUALCOMM FASTRPC DRIVER -M: Srinivas Kandagatla +M: Srinivas Kandagatla M: Amol Maheshwari L: linux-arm-msm@vger.kernel.org L: dri-devel@lists.freedesktop.org @@ -21921,7 +21921,7 @@ S: Maintained F: drivers/media/rc/serial_ir.c SERIAL LOW-POWER INTER-CHIP MEDIA BUS (SLIMbus) -M: Srinivas Kandagatla +M: Srinivas Kandagatla L: linux-sound@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/slimbus/ From 807c1c83152138e2fc22101a57b9346159ad4f4c Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 10 Apr 2025 11:37:13 +0100 Subject: [PATCH 0389/2924] mailmap: Add entry for Srinivas Kandagatla Add entries for the various addresses that I have been using over the years and remap all of them to kernel.org alias. Signed-off-by: Srinivas Kandagatla Link: https://patch.msgid.link/20250410103713.24875-3-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- .mailmap | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.mailmap b/.mailmap index 4f7cd8e231778c..0326d5e750127e 100644 --- a/.mailmap +++ b/.mailmap @@ -685,6 +685,8 @@ Simon Wunderlich Simon Wunderlich Simon Wunderlich Sricharan Ramabadhran +Srinivas Kandagatla +Srinivas Kandagatla Srinivas Ramana Sriram R Sriram Yagnaraman From 9aa33d5b4a53a1945dd2aee45c09282248d3c98b Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Thu, 10 Apr 2025 11:16:43 +0200 Subject: [PATCH 0390/2924] ASoC: fsl: fsl_qmc_audio: Reset audio data pointers on TRIGGER_START event On SNDRV_PCM_TRIGGER_START event, audio data pointers are not reset. This leads to wrong data buffer usage when multiple TRIGGER_START are received and ends to incorrect buffer usage between the user-space and the driver. Indeed, the driver can read data that are not already set by the user-space or the user-space and the driver are writing and reading the same area. Fix that resetting data pointers on each SNDRV_PCM_TRIGGER_START events. Fixes: 075c7125b11c ("ASoC: fsl: Add support for QMC audio") Cc: stable@vger.kernel.org Signed-off-by: Herve Codina Link: https://patch.msgid.link/20250410091643.535627-1-herve.codina@bootlin.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_qmc_audio.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/fsl/fsl_qmc_audio.c b/sound/soc/fsl/fsl_qmc_audio.c index b2979290c97324..5614a8b909edf8 100644 --- a/sound/soc/fsl/fsl_qmc_audio.c +++ b/sound/soc/fsl/fsl_qmc_audio.c @@ -250,6 +250,9 @@ static int qmc_audio_pcm_trigger(struct snd_soc_component *component, switch (cmd) { case SNDRV_PCM_TRIGGER_START: bitmap_zero(prtd->chans_pending, 64); + prtd->buffer_ended = 0; + prtd->ch_dma_addr_current = prtd->ch_dma_addr_start; + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { for (i = 0; i < prtd->channels; i++) prtd->qmc_dai->chans[i].prtd_tx = prtd; From 6bbb2b1286f437b45ccf4828a537429153cd1096 Mon Sep 17 00:00:00 2001 From: Weidong Wang Date: Thu, 10 Apr 2025 10:49:53 +0800 Subject: [PATCH 0391/2924] ASoC: codecs: Add of_match_table for aw888081 driver Add of_match_table for aw88081 driver to make matching between dts and driver more flexible Signed-off-by: Weidong Wang Link: https://patch.msgid.link/20250410024953.26565-1-wangweidong.a@awinic.com Signed-off-by: Mark Brown --- sound/soc/codecs/aw88081.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/soc/codecs/aw88081.c b/sound/soc/codecs/aw88081.c index ad16ab6812cd3f..3dd8428f08cce9 100644 --- a/sound/soc/codecs/aw88081.c +++ b/sound/soc/codecs/aw88081.c @@ -1295,9 +1295,19 @@ static int aw88081_i2c_probe(struct i2c_client *i2c) aw88081_dai, ARRAY_SIZE(aw88081_dai)); } +#if defined(CONFIG_OF) +static const struct of_device_id aw88081_of_match[] = { + { .compatible = "awinic,aw88081" }, + { .compatible = "awinic,aw88083" }, + { } +}; +MODULE_DEVICE_TABLE(of, aw88081_of_match); +#endif + static struct i2c_driver aw88081_i2c_driver = { .driver = { .name = AW88081_I2C_NAME, + .of_match_table = of_match_ptr(aw88081_of_match), }, .probe = aw88081_i2c_probe, .id_table = aw88081_i2c_id, From 5d07ab2a7fa1305e429d9221716582f290b58078 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Thu, 10 Apr 2025 14:56:09 +0800 Subject: [PATCH 0392/2924] spi: fsl-qspi: Fix double cleanup in probe error path Commit 40369bfe717e ("spi: fsl-qspi: use devm function instead of driver remove") introduced managed cleanup via fsl_qspi_cleanup(), but incorrectly retain manual cleanup in two scenarios: - On devm_add_action_or_reset() failure, the function automatically call fsl_qspi_cleanup(). However, the current code still jumps to err_destroy_mutex, repeating cleanup. - After the fsl_qspi_cleanup() action is added successfully, there is no need to manually perform the cleanup in the subsequent error path. However, the current code still jumps to err_destroy_mutex on spi controller failure, repeating cleanup. Skip redundant manual cleanup calls to fix these issues. Cc: stable@vger.kernel.org Fixes: 40369bfe717e ("spi: fsl-qspi: use devm function instead of driver remove") Signed-off-by: Kevin Hao Link: https://patch.msgid.link/20250410-spi-v1-1-56e867cc19cf@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-qspi.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/spi/spi-fsl-qspi.c b/drivers/spi/spi-fsl-qspi.c index 5c59fddb32c1b9..2f54dc09d11b1c 100644 --- a/drivers/spi/spi-fsl-qspi.c +++ b/drivers/spi/spi-fsl-qspi.c @@ -949,17 +949,14 @@ static int fsl_qspi_probe(struct platform_device *pdev) ret = devm_add_action_or_reset(dev, fsl_qspi_cleanup, q); if (ret) - goto err_destroy_mutex; + goto err_put_ctrl; ret = devm_spi_register_controller(dev, ctlr); if (ret) - goto err_destroy_mutex; + goto err_put_ctrl; return 0; -err_destroy_mutex: - mutex_destroy(&q->lock); - err_disable_clk: fsl_qspi_clk_disable_unprep(q); From 82bedbfedd2fc7cd1287732879e515ceb94f8963 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Thu, 10 Apr 2025 14:56:10 +0800 Subject: [PATCH 0393/2924] spi: fsl-spi: Remove redundant probe error message An error message is already emitted by the driver core function call_driver_probe() when the driver probe fails. Therefore, this redundant probe error message is removed. Signed-off-by: Kevin Hao Link: https://patch.msgid.link/20250410-spi-v1-2-56e867cc19cf@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-qspi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/spi/spi-fsl-qspi.c b/drivers/spi/spi-fsl-qspi.c index 2f54dc09d11b1c..b5ecffcaf7955e 100644 --- a/drivers/spi/spi-fsl-qspi.c +++ b/drivers/spi/spi-fsl-qspi.c @@ -963,7 +963,6 @@ static int fsl_qspi_probe(struct platform_device *pdev) err_put_ctrl: spi_controller_put(ctlr); - dev_err(dev, "Freescale QuadSPI probe failed\n"); return ret; } From eaa517b77e63442260640d875f824d1111ca6569 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 9 Apr 2025 14:24:40 +0300 Subject: [PATCH 0394/2924] ethtool: cmis_cdb: Fix incorrect read / write length extension The 'read_write_len_ext' field in 'struct ethtool_cmis_cdb_cmd_args' stores the maximum number of bytes that can be read from or written to the Local Payload (LPL) page in a single multi-byte access. Cited commit started overwriting this field with the maximum number of bytes that can be read from or written to the Extended Payload (LPL) pages in a single multi-byte access. Transceiver modules that support auto paging can advertise a number larger than 255 which is problematic as 'read_write_len_ext' is a 'u8', resulting in the number getting truncated and firmware flashing failing [1]. Fix by ignoring the maximum EPL access size as the kernel does not currently support auto paging (even if the transceiver module does) and will not try to read / write more than 128 bytes at once. [1] Transceiver module firmware flashing started for device enp177s0np0 Transceiver module firmware flashing in progress for device enp177s0np0 Progress: 0% Transceiver module firmware flashing encountered an error for device enp177s0np0 Status message: Write FW block EPL command failed, LPL length is longer than CDB read write length extension allows. Fixes: 9a3b0d078bd8 ("net: ethtool: Add support for writing firmware blocks using EPL payload") Reported-by: Damodharam Ammepalli Closes: https://lore.kernel.org/netdev/20250402183123.321036-3-michael.chan@broadcom.com/ Tested-by: Damodharam Ammepalli Signed-off-by: Ido Schimmel Reviewed-by: Damodharam Ammepalli Reviewed-by: Petr Machata Link: https://patch.msgid.link/20250409112440.365672-1-idosch@nvidia.com Signed-off-by: Paolo Abeni --- net/ethtool/cmis.h | 1 - net/ethtool/cmis_cdb.c | 18 +++--------------- 2 files changed, 3 insertions(+), 16 deletions(-) diff --git a/net/ethtool/cmis.h b/net/ethtool/cmis.h index 1e790413db0e8d..4a9a946cabf05d 100644 --- a/net/ethtool/cmis.h +++ b/net/ethtool/cmis.h @@ -101,7 +101,6 @@ struct ethtool_cmis_cdb_rpl { }; u32 ethtool_cmis_get_max_lpl_size(u8 num_of_byte_octs); -u32 ethtool_cmis_get_max_epl_size(u8 num_of_byte_octs); void ethtool_cmis_cdb_compose_args(struct ethtool_cmis_cdb_cmd_args *args, enum ethtool_cmis_cdb_cmd_id cmd, u8 *lpl, diff --git a/net/ethtool/cmis_cdb.c b/net/ethtool/cmis_cdb.c index d159dc121bde58..0e2691ccb0df38 100644 --- a/net/ethtool/cmis_cdb.c +++ b/net/ethtool/cmis_cdb.c @@ -16,15 +16,6 @@ u32 ethtool_cmis_get_max_lpl_size(u8 num_of_byte_octs) return 8 * (1 + min_t(u8, num_of_byte_octs, 15)); } -/* For accessing the EPL field on page 9Fh, the allowable length extension is - * min(i, 255) byte octets where i specifies the allowable additional number of - * byte octets in a READ or a WRITE. - */ -u32 ethtool_cmis_get_max_epl_size(u8 num_of_byte_octs) -{ - return 8 * (1 + min_t(u8, num_of_byte_octs, 255)); -} - void ethtool_cmis_cdb_compose_args(struct ethtool_cmis_cdb_cmd_args *args, enum ethtool_cmis_cdb_cmd_id cmd, u8 *lpl, u8 lpl_len, u8 *epl, u16 epl_len, @@ -33,19 +24,16 @@ void ethtool_cmis_cdb_compose_args(struct ethtool_cmis_cdb_cmd_args *args, { args->req.id = cpu_to_be16(cmd); args->req.lpl_len = lpl_len; - if (lpl) { + if (lpl) memcpy(args->req.payload, lpl, args->req.lpl_len); - args->read_write_len_ext = - ethtool_cmis_get_max_lpl_size(read_write_len_ext); - } if (epl) { args->req.epl_len = cpu_to_be16(epl_len); args->req.epl = epl; - args->read_write_len_ext = - ethtool_cmis_get_max_epl_size(read_write_len_ext); } args->max_duration = max_duration; + args->read_write_len_ext = + ethtool_cmis_get_max_lpl_size(read_write_len_ext); args->msleep_pre_rpl = msleep_pre_rpl; args->rpl_exp_len = rpl_exp_len; args->flags = flags; From 0c562281199f225a849dbb5b9a40b079ee31dc0e Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Thu, 3 Apr 2025 21:59:24 -0500 Subject: [PATCH 0395/2924] arm64: dts: morello: Fix-up cache nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's no need include the CPU number in the L2 cache node names as the names are local to the CPU nodes. The documented node name is also just "l2-cache". The L3 cache is not part of cpu@0/l2-cache as it is shared among all cores. Move it to /cpus node which is the typical place for shared caches. Signed-off-by: Rob Herring (Arm) Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20250403-dt-cpu-schema-v1-3-076be7171a85@kernel.org> Signed-off-by: Sudeep Holla --- arch/arm64/boot/dts/arm/morello.dtsi | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/arm64/boot/dts/arm/morello.dtsi b/arch/arm64/boot/dts/arm/morello.dtsi index 0bab0b3ea9693e..5bc1c725dc860b 100644 --- a/arch/arm64/boot/dts/arm/morello.dtsi +++ b/arch/arm64/boot/dts/arm/morello.dtsi @@ -44,7 +44,7 @@ next-level-cache = <&l2_0>; clocks = <&scmi_dvfs 0>; - l2_0: l2-cache-0 { + l2_0: l2-cache { compatible = "cache"; cache-level = <2>; /* 8 ways set associative */ @@ -53,13 +53,6 @@ cache-sets = <2048>; cache-unified; next-level-cache = <&l3_0>; - - l3_0: l3-cache { - compatible = "cache"; - cache-level = <3>; - cache-size = <0x100000>; - cache-unified; - }; }; }; @@ -78,7 +71,7 @@ next-level-cache = <&l2_1>; clocks = <&scmi_dvfs 0>; - l2_1: l2-cache-1 { + l2_1: l2-cache { compatible = "cache"; cache-level = <2>; /* 8 ways set associative */ @@ -105,7 +98,7 @@ next-level-cache = <&l2_2>; clocks = <&scmi_dvfs 1>; - l2_2: l2-cache-2 { + l2_2: l2-cache { compatible = "cache"; cache-level = <2>; /* 8 ways set associative */ @@ -132,7 +125,7 @@ next-level-cache = <&l2_3>; clocks = <&scmi_dvfs 1>; - l2_3: l2-cache-3 { + l2_3: l2-cache { compatible = "cache"; cache-level = <2>; /* 8 ways set associative */ @@ -143,6 +136,13 @@ next-level-cache = <&l3_0>; }; }; + + l3_0: l3-cache { + compatible = "cache"; + cache-level = <3>; + cache-size = <0x100000>; + cache-unified; + }; }; firmware { From 285b2c74cf9982e873ef82a2cb1328d9e9406f65 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 10 Apr 2025 14:21:29 +0100 Subject: [PATCH 0396/2924] firmware: cs_dsp: test_bin_error: Fix uninitialized data used as fw version Call cs_dsp_mock_xm_header_get_fw_version() to get the firmware version from the dummy XM header data in cs_dsp_bin_err_test_common_init(). Make the same change to cs_dsp_bin_test_common_init() and remove the cs_dsp_mock_xm_header_get_fw_version_from_regmap() function. The code in cs_dsp_test_bin.c was correctly calling cs_dsp_mock_xm_header_get_fw_version_from_regmap() to fetch the fw version from a dummy header it wrote to XM registers. However in cs_dsp_test_bin_error.c the test doesn't stuff a dummy header into XM, it populates it the normal way using a wmfw file. It should have called cs_dsp_mock_xm_header_get_fw_version() to get the data from its blob buffer, but was calling cs_dsp_mock_xm_header_get_fw_version_from_regmap(). As nothing had been written to the registers this returned the value of uninitialized data. The only other use of cs_dsp_mock_xm_header_get_fw_version_from_regmap() was cs_dsp_test_bin.c, but it doesn't need to use it. It already has a blob buffer containing the dummy XM header so it can use cs_dsp_mock_xm_header_get_fw_version() to read from that. Fixes: cd8c058499b6 ("firmware: cs_dsp: Add KUnit testing of bin error cases") Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20250410132129.1312541-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- .../cirrus/test/cs_dsp_mock_mem_maps.c | 30 ------------------- .../firmware/cirrus/test/cs_dsp_test_bin.c | 2 +- .../cirrus/test/cs_dsp_test_bin_error.c | 2 +- .../linux/firmware/cirrus/cs_dsp_test_utils.h | 1 - 4 files changed, 2 insertions(+), 33 deletions(-) diff --git a/drivers/firmware/cirrus/test/cs_dsp_mock_mem_maps.c b/drivers/firmware/cirrus/test/cs_dsp_mock_mem_maps.c index 161272e47bdabc..73412bcef50c50 100644 --- a/drivers/firmware/cirrus/test/cs_dsp_mock_mem_maps.c +++ b/drivers/firmware/cirrus/test/cs_dsp_mock_mem_maps.c @@ -461,36 +461,6 @@ unsigned int cs_dsp_mock_xm_header_get_alg_base_in_words(struct cs_dsp_test *pri } EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_xm_header_get_alg_base_in_words, "FW_CS_DSP_KUNIT_TEST_UTILS"); -/** - * cs_dsp_mock_xm_header_get_fw_version_from_regmap() - Firmware version. - * - * @priv: Pointer to struct cs_dsp_test. - * - * Return: Firmware version word value. - */ -unsigned int cs_dsp_mock_xm_header_get_fw_version_from_regmap(struct cs_dsp_test *priv) -{ - unsigned int xm = cs_dsp_mock_base_addr_for_mem(priv, WMFW_ADSP2_XM); - union { - struct wmfw_id_hdr adsp2; - struct wmfw_v3_id_hdr halo; - } hdr; - - switch (priv->dsp->type) { - case WMFW_ADSP2: - regmap_raw_read(priv->dsp->regmap, xm, &hdr.adsp2, sizeof(hdr.adsp2)); - return be32_to_cpu(hdr.adsp2.ver); - case WMFW_HALO: - regmap_raw_read(priv->dsp->regmap, xm, &hdr.halo, sizeof(hdr.halo)); - return be32_to_cpu(hdr.halo.ver); - default: - KUNIT_FAIL(priv->test, NULL); - return 0; - } -} -EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_xm_header_get_fw_version_from_regmap, - "FW_CS_DSP_KUNIT_TEST_UTILS"); - /** * cs_dsp_mock_xm_header_get_fw_version() - Firmware version. * diff --git a/drivers/firmware/cirrus/test/cs_dsp_test_bin.c b/drivers/firmware/cirrus/test/cs_dsp_test_bin.c index 1e161bbc5b4a46..163b7faecff466 100644 --- a/drivers/firmware/cirrus/test/cs_dsp_test_bin.c +++ b/drivers/firmware/cirrus/test/cs_dsp_test_bin.c @@ -2198,7 +2198,7 @@ static int cs_dsp_bin_test_common_init(struct kunit *test, struct cs_dsp *dsp) priv->local->bin_builder = cs_dsp_mock_bin_init(priv, 1, - cs_dsp_mock_xm_header_get_fw_version_from_regmap(priv)); + cs_dsp_mock_xm_header_get_fw_version(xm_hdr)); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, priv->local->bin_builder); /* We must provide a dummy wmfw to load */ diff --git a/drivers/firmware/cirrus/test/cs_dsp_test_bin_error.c b/drivers/firmware/cirrus/test/cs_dsp_test_bin_error.c index 8748874f055247..a7ec956d27249e 100644 --- a/drivers/firmware/cirrus/test/cs_dsp_test_bin_error.c +++ b/drivers/firmware/cirrus/test/cs_dsp_test_bin_error.c @@ -451,7 +451,7 @@ static int cs_dsp_bin_err_test_common_init(struct kunit *test, struct cs_dsp *ds local->bin_builder = cs_dsp_mock_bin_init(priv, 1, - cs_dsp_mock_xm_header_get_fw_version_from_regmap(priv)); + cs_dsp_mock_xm_header_get_fw_version(local->xm_header)); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, local->bin_builder); /* Init cs_dsp */ diff --git a/include/linux/firmware/cirrus/cs_dsp_test_utils.h b/include/linux/firmware/cirrus/cs_dsp_test_utils.h index 4f87a908ab4f6b..ecd821ed8064f1 100644 --- a/include/linux/firmware/cirrus/cs_dsp_test_utils.h +++ b/include/linux/firmware/cirrus/cs_dsp_test_utils.h @@ -104,7 +104,6 @@ unsigned int cs_dsp_mock_num_dsp_words_to_num_packed_regs(unsigned int num_dsp_w unsigned int cs_dsp_mock_xm_header_get_alg_base_in_words(struct cs_dsp_test *priv, unsigned int alg_id, int mem_type); -unsigned int cs_dsp_mock_xm_header_get_fw_version_from_regmap(struct cs_dsp_test *priv); unsigned int cs_dsp_mock_xm_header_get_fw_version(struct cs_dsp_mock_xm_header *header); void cs_dsp_mock_xm_header_drop_from_regmap_cache(struct cs_dsp_test *priv); int cs_dsp_mock_xm_header_write_to_regmap(struct cs_dsp_mock_xm_header *header); From 642335f3ea2b3fd6dba03e57e01fa9587843a497 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 29 Nov 2024 21:20:53 +0100 Subject: [PATCH 0397/2924] ext4: don't treat fhandle lookup of ea_inode as FS corruption A file handle that userspace provides to open_by_handle_at() can legitimately contain an outdated inode number that has since been reused for another purpose - that's why the file handle also contains a generation number. But if the inode number has been reused for an ea_inode, check_igot_inode() will notice, __ext4_iget() will go through ext4_error_inode(), and if the inode was newly created, it will also be marked as bad by iget_failed(). This all happens before the point where the inode generation is checked. ext4_error_inode() is supposed to only be used on filesystem corruption; it should not be used when userspace just got unlucky with a stale file handle. So when this happens, let __ext4_iget() just return an error. Fixes: b3e6bcb94590 ("ext4: add EA_INODE checking to ext4_iget()") Signed-off-by: Jann Horn Reviewed-by: Jan Kara Link: https://patch.msgid.link/20241129-ext4-ignore-ea-fhandle-v1-1-e532c0d1cee0@google.com Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 68 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index aede80fa178143..5c57a34cd82c92 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4732,22 +4732,43 @@ static inline void ext4_inode_set_iversion_queried(struct inode *inode, u64 val) inode_set_iversion_queried(inode, val); } -static const char *check_igot_inode(struct inode *inode, ext4_iget_flags flags) - +static int check_igot_inode(struct inode *inode, ext4_iget_flags flags, + const char *function, unsigned int line) { + const char *err_str; + if (flags & EXT4_IGET_EA_INODE) { - if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) - return "missing EA_INODE flag"; + if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) { + err_str = "missing EA_INODE flag"; + goto error; + } if (ext4_test_inode_state(inode, EXT4_STATE_XATTR) || - EXT4_I(inode)->i_file_acl) - return "ea_inode with extended attributes"; + EXT4_I(inode)->i_file_acl) { + err_str = "ea_inode with extended attributes"; + goto error; + } } else { - if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) - return "unexpected EA_INODE flag"; + if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) { + /* + * open_by_handle_at() could provide an old inode number + * that has since been reused for an ea_inode; this does + * not indicate filesystem corruption + */ + if (flags & EXT4_IGET_HANDLE) + return -ESTALE; + err_str = "unexpected EA_INODE flag"; + goto error; + } + } + if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD)) { + err_str = "unexpected bad inode w/o EXT4_IGET_BAD"; + goto error; } - if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD)) - return "unexpected bad inode w/o EXT4_IGET_BAD"; - return NULL; + return 0; + +error: + ext4_error_inode(inode, function, line, 0, err_str); + return -EFSCORRUPTED; } struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, @@ -4759,7 +4780,6 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, struct ext4_inode_info *ei; struct ext4_super_block *es = EXT4_SB(sb)->s_es; struct inode *inode; - const char *err_str; journal_t *journal = EXT4_SB(sb)->s_journal; long ret; loff_t size; @@ -4788,10 +4808,10 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, if (!inode) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) { - if ((err_str = check_igot_inode(inode, flags)) != NULL) { - ext4_error_inode(inode, function, line, 0, err_str); + ret = check_igot_inode(inode, flags, function, line); + if (ret) { iput(inode); - return ERR_PTR(-EFSCORRUPTED); + return ERR_PTR(ret); } return inode; } @@ -5073,13 +5093,21 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, ret = -EFSCORRUPTED; goto bad_inode; } - if ((err_str = check_igot_inode(inode, flags)) != NULL) { - ext4_error_inode(inode, function, line, 0, err_str); - ret = -EFSCORRUPTED; - goto bad_inode; + ret = check_igot_inode(inode, flags, function, line); + /* + * -ESTALE here means there is nothing inherently wrong with the inode, + * it's just not an inode we can return for an fhandle lookup. + */ + if (ret == -ESTALE) { + brelse(iloc.bh); + unlock_new_inode(inode); + iput(inode); + return ERR_PTR(-ESTALE); } - + if (ret) + goto bad_inode; brelse(iloc.bh); + unlock_new_inode(inode); return inode; From ddc592972ff4f1350f456edc3047fc5fb01777aa Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 17:11:16 -0700 Subject: [PATCH 0398/2924] tools headers: Update the KVM headers with the kernel sources To pick up the changes in: af5366bea2cb9dfb KVM: x86: Drop the now unused KVM_X86_DISABLE_VALID_EXITS 915d2f0718a42ee0 KVM: Move KVM_REG_SIZE() definition to common uAPI header 5c17848134ab1ffb KVM: x86/xen: Restrict hypercall MSR to unofficial synthetic range 9364789567f9b492 KVM: x86: Add a VM type define for TDX fa662c9080732b1f KVM: SVM: Add Idle HLT intercept support 3adaee78306148da KVM: arm64: Allow userspace to change the implementation ID registers faf7714a47a25c62 KVM: arm64: nv: Allow userland to set VGIC maintenance IRQ c0000e58c74eed07 KVM: arm64: Introduce KVM_REG_ARM_VENDOR_HYP_BMAP_2 f83c41fb3dddbf47 KVM: arm64: Allow userspace to limit NV support to nVHE Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h diff -u tools/arch/x86/include/uapi/asm/svm.h arch/x86/include/uapi/asm/svm.h diff -u tools/arch/arm64/include/uapi/asm/kvm.h arch/arm64/include/uapi/asm/kvm.h Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Cc: kvm@vger.kernel.org Link: https://lore.kernel.org/r/20250410001125.391820-2-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/arch/arm64/include/uapi/asm/kvm.h | 5 ++--- tools/arch/x86/include/uapi/asm/kvm.h | 4 ++++ tools/arch/x86/include/uapi/asm/svm.h | 2 ++ tools/include/uapi/linux/kvm.h | 9 +++++---- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 6d44f8c8a18fd9..af9d9acaf9975a 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -43,9 +43,6 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_DIRTY_LOG_PAGE_OFFSET 64 -#define KVM_REG_SIZE(id) \ - (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) - struct kvm_regs { struct user_pt_regs regs; /* sp = sp_el0 */ @@ -108,6 +105,7 @@ struct kvm_regs { #define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */ #define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */ #define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */ +#define KVM_ARM_VCPU_HAS_EL2_E2H0 8 /* Limit NV support to E2H RES0 */ struct kvm_vcpu_init { __u32 target; @@ -418,6 +416,7 @@ enum { #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 #define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8 +#define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ 9 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 88585c1de416fa..460306b35a4bfe 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -559,6 +559,9 @@ struct kvm_x86_mce { #define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) #define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8) +#define KVM_XEN_MSR_MIN_INDEX 0x40000000u +#define KVM_XEN_MSR_MAX_INDEX 0x4fffffffu + struct kvm_xen_hvm_config { __u32 flags; __u32 msr; @@ -925,5 +928,6 @@ struct kvm_hyperv_eventfd { #define KVM_X86_SEV_VM 2 #define KVM_X86_SEV_ES_VM 3 #define KVM_X86_SNP_VM 4 +#define KVM_X86_TDX_VM 5 #endif /* _ASM_X86_KVM_H */ diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index 1814b413fd5783..ec1321248dac2a 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -95,6 +95,7 @@ #define SVM_EXIT_CR14_WRITE_TRAP 0x09e #define SVM_EXIT_CR15_WRITE_TRAP 0x09f #define SVM_EXIT_INVPCID 0x0a2 +#define SVM_EXIT_IDLE_HLT 0x0a6 #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402 @@ -224,6 +225,7 @@ { SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \ { SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \ { SVM_EXIT_INVPCID, "invpcid" }, \ + { SVM_EXIT_IDLE_HLT, "idle-halt" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 502ea63b5d2e73..b6ae8ad8934b52 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -617,10 +617,6 @@ struct kvm_ioeventfd { #define KVM_X86_DISABLE_EXITS_HLT (1 << 1) #define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) #define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3) -#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ - KVM_X86_DISABLE_EXITS_HLT | \ - KVM_X86_DISABLE_EXITS_PAUSE | \ - KVM_X86_DISABLE_EXITS_CSTATE) /* for KVM_ENABLE_CAP */ struct kvm_enable_cap { @@ -933,6 +929,7 @@ struct kvm_enable_cap { #define KVM_CAP_PRE_FAULT_MEMORY 236 #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 #define KVM_CAP_X86_GUEST_MODE 238 +#define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239 struct kvm_irq_routing_irqchip { __u32 irqchip; @@ -1070,6 +1067,10 @@ struct kvm_dirty_tlb { #define KVM_REG_SIZE_SHIFT 52 #define KVM_REG_SIZE_MASK 0x00f0000000000000ULL + +#define KVM_REG_SIZE(id) \ + (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) + #define KVM_REG_SIZE_U8 0x0000000000000000ULL #define KVM_REG_SIZE_U16 0x0010000000000000ULL #define KVM_REG_SIZE_U32 0x0020000000000000ULL From 9dbe66640f43a3530a0e7897557f4ea41c3abe85 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 17:11:17 -0700 Subject: [PATCH 0399/2924] tools headers: Update the socket headers with the kernel sources To pick up the changes in: 64e844505bc08cde include: uapi: protocol number and packet structs for AGGFRAG in ESP 18912c520674ec4d tcp: devmem: don't write truncated dmabuf CMSGs to userspace Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/in.h include/uapi/linux/in.h diff -u tools/perf/trace/beauty/include/linux/socket.h include/linux/socket.h Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Cc: netdev@vger.kernel.org Link: https://lore.kernel.org/r/20250410001125.391820-3-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/include/uapi/linux/in.h | 2 ++ tools/perf/trace/beauty/include/linux/socket.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index 5d32d53508d99f..ced0fc3c3aa534 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -79,6 +79,8 @@ enum { #define IPPROTO_MPLS IPPROTO_MPLS IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */ #define IPPROTO_ETHERNET IPPROTO_ETHERNET + IPPROTO_AGGFRAG = 144, /* AGGFRAG in ESP (RFC 9347) */ +#define IPPROTO_AGGFRAG IPPROTO_AGGFRAG IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW IPPROTO_SMC = 256, /* Shared Memory Communications */ diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index d18cc47e89bd01..c3322eb3d6865d 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -392,6 +392,8 @@ struct ucred { extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr); extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); +extern int put_cmsg_notrunc(struct msghdr *msg, int level, int type, int len, + void *data); struct timespec64; struct __kernel_timespec; From ae62977331fcbf5c9a4260c88d9f94450db2d99a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 17:11:18 -0700 Subject: [PATCH 0400/2924] tools headers: Update the uapi/linux/perf_event.h copy with the kernel sources To pick up the changes in: c53e14f1ea4a8f8d perf: Extend per event callchain limit to branch stack Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Link: https://lore.kernel.org/r/20250410001125.391820-4-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/include/uapi/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 0524d541d4e3d5..5fc753c23734df 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -385,6 +385,8 @@ enum perf_event_read_format { * * @sample_max_stack: Max number of frame pointers in a callchain, * should be < /proc/sys/kernel/perf_event_max_stack + * Max number of entries of branch stack + * should be < hardware limit */ struct perf_event_attr { From af74e5fe7453c1cb2b86c601426cfc4ad9ea9753 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 17:11:19 -0700 Subject: [PATCH 0401/2924] tools headers: Update the VFS headers with the kernel sources To pick up the changes in: 7ed6cbe0f8caa6ee fs: add STATX_DIO_READ_ALIGN 8fc7e23a9bd851e6 fs: reformat the statx definition a5874fde3c0884a3 exec: Add a new AT_EXECVE_CHECK flag to execveat(2) 1ebd4a3c095cd538 blk-crypto: add ioctls to create and prepare hardware-wrapped keys af6505e5745b9f3a fs: add RWF_DONTCACHE iocb and FOP_DONTCACHE file_operations flag 10783d0ba0d7731e fs, iov_iter: define meta io descriptor 8f6116b5b77b0536 statmount: add a new supported_mask field 37c4a9590e1efcae statmount: allow to retrieve idmappings Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/stat.h include/uapi/linux/stat.h diff -u tools/perf/trace/beauty/include/uapi/linux/stat.h include/uapi/linux/stat.h diff -u tools/perf/trace/beauty/include/uapi/linux/fcntl.h include/uapi/linux/fcntl.h diff -u tools/perf/trace/beauty/include/uapi/linux/fs.h include/uapi/linux/fs.h diff -u tools/perf/trace/beauty/include/uapi/linux/mount.h include/uapi/linux/mount.h Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20250410001125.391820-5-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/include/uapi/linux/stat.h | 99 ++++++++++++++----- .../trace/beauty/include/uapi/linux/fcntl.h | 4 + .../perf/trace/beauty/include/uapi/linux/fs.h | 21 +++- .../trace/beauty/include/uapi/linux/mount.h | 10 +- .../trace/beauty/include/uapi/linux/stat.h | 99 ++++++++++++++----- 5 files changed, 179 insertions(+), 54 deletions(-) diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index 887a2528644168..f78ee3670dd5d7 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -98,43 +98,93 @@ struct statx_timestamp { */ struct statx { /* 0x00 */ - __u32 stx_mask; /* What results were written [uncond] */ - __u32 stx_blksize; /* Preferred general I/O size [uncond] */ - __u64 stx_attributes; /* Flags conveying information about the file [uncond] */ + /* What results were written [uncond] */ + __u32 stx_mask; + + /* Preferred general I/O size [uncond] */ + __u32 stx_blksize; + + /* Flags conveying information about the file [uncond] */ + __u64 stx_attributes; + /* 0x10 */ - __u32 stx_nlink; /* Number of hard links */ - __u32 stx_uid; /* User ID of owner */ - __u32 stx_gid; /* Group ID of owner */ - __u16 stx_mode; /* File mode */ + /* Number of hard links */ + __u32 stx_nlink; + + /* User ID of owner */ + __u32 stx_uid; + + /* Group ID of owner */ + __u32 stx_gid; + + /* File mode */ + __u16 stx_mode; __u16 __spare0[1]; + /* 0x20 */ - __u64 stx_ino; /* Inode number */ - __u64 stx_size; /* File size */ - __u64 stx_blocks; /* Number of 512-byte blocks allocated */ - __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */ + /* Inode number */ + __u64 stx_ino; + + /* File size */ + __u64 stx_size; + + /* Number of 512-byte blocks allocated */ + __u64 stx_blocks; + + /* Mask to show what's supported in stx_attributes */ + __u64 stx_attributes_mask; + /* 0x40 */ - struct statx_timestamp stx_atime; /* Last access time */ - struct statx_timestamp stx_btime; /* File creation time */ - struct statx_timestamp stx_ctime; /* Last attribute change time */ - struct statx_timestamp stx_mtime; /* Last data modification time */ + /* Last access time */ + struct statx_timestamp stx_atime; + + /* File creation time */ + struct statx_timestamp stx_btime; + + /* Last attribute change time */ + struct statx_timestamp stx_ctime; + + /* Last data modification time */ + struct statx_timestamp stx_mtime; + /* 0x80 */ - __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */ + /* Device ID of special file [if bdev/cdev] */ + __u32 stx_rdev_major; __u32 stx_rdev_minor; - __u32 stx_dev_major; /* ID of device containing file [uncond] */ + + /* ID of device containing file [uncond] */ + __u32 stx_dev_major; __u32 stx_dev_minor; + /* 0x90 */ __u64 stx_mnt_id; - __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ - __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ + + /* Memory buffer alignment for direct I/O */ + __u32 stx_dio_mem_align; + + /* File offset alignment for direct I/O */ + __u32 stx_dio_offset_align; + /* 0xa0 */ - __u64 stx_subvol; /* Subvolume identifier */ - __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ - __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* Subvolume identifier */ + __u64 stx_subvol; + + /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_min; + + /* Max atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; + /* 0xb0 */ - __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ - __u32 __spare1[1]; + /* Max atomic write segment count */ + __u32 stx_atomic_write_segments_max; + + /* File offset alignment for direct I/O reads */ + __u32 stx_dio_read_offset_align; + /* 0xb8 */ __u64 __spare3[9]; /* Spare space for future expansion */ + /* 0x100 */ }; @@ -164,6 +214,7 @@ struct statx { #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ #define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ +#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h index 6e6907e63bfc2b..a15ac2fa4b202f 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h @@ -155,4 +155,8 @@ #define AT_HANDLE_MNT_ID_UNIQUE 0x001 /* Return the u64 unique mount ID. */ #define AT_HANDLE_CONNECTABLE 0x002 /* Request a connectable file handle */ +/* Flags for execveat2(2). */ +#define AT_EXECVE_CHECK 0x10000 /* Only perform a check if execution + would be allowed. */ + #endif /* _UAPI_LINUX_FCNTL_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h index 7539717707337a..e762e1af650c4b 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fs.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h @@ -40,6 +40,15 @@ #define BLOCK_SIZE_BITS 10 #define BLOCK_SIZE (1< Date: Wed, 9 Apr 2025 17:11:20 -0700 Subject: [PATCH 0402/2924] tools headers: Update the syscall table with the kernel sources To pick up the changes in: c4a16820d9019940 fs: add open_tree_attr() 2df1ad0d25803399 x86/arch_prctl: Simplify sys_arch_prctl() e632bca07c8eef1d arm64: generate 64-bit syscall.tbl This is basically to support the new open_tree_attr syscall. But it also needs to update asm-generic unistd.h header to get the new syscall number. And arm64 unistd.h header was converted to use the generic 64-bit header. Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/scripts/syscall.tbl scripts/syscall.tbl diff -u tools/perf/arch/x86/entry/syscalls/syscall_32.tbl arch/x86/entry/syscalls/syscall_32.tbl diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl diff -u tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl diff -u tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl diff -u tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl arch/mips/kernel/syscalls/syscall_n64.tbl diff -u tools/perf/arch/arm/entry/syscalls/syscall.tbl arch/arm/tools/syscall.tbl diff -u tools/perf/arch/sh/entry/syscalls/syscall.tbl arch/sh/kernel/syscalls/syscall.tbl diff -u tools/perf/arch/sparc/entry/syscalls/syscall.tbl arch/sparc/kernel/syscalls/syscall.tbl diff -u tools/perf/arch/xtensa/entry/syscalls/syscall.tbl arch/xtensa/kernel/syscalls/syscall.tbl diff -u tools/arch/arm64/include/uapi/asm/unistd.h arch/arm64/include/uapi/asm/unistd.h diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Cc: linux-arch@vger.kernel.org Link: https://lore.kernel.org/r/20250410001125.391820-6-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/arch/arm64/include/uapi/asm/unistd.h | 24 +------------------ tools/include/uapi/asm-generic/unistd.h | 4 +++- .../perf/arch/arm/entry/syscalls/syscall.tbl | 1 + .../arch/mips/entry/syscalls/syscall_n64.tbl | 1 + .../arch/powerpc/entry/syscalls/syscall.tbl | 1 + .../perf/arch/s390/entry/syscalls/syscall.tbl | 1 + tools/perf/arch/sh/entry/syscalls/syscall.tbl | 1 + .../arch/sparc/entry/syscalls/syscall.tbl | 1 + .../arch/x86/entry/syscalls/syscall_32.tbl | 3 ++- .../arch/x86/entry/syscalls/syscall_64.tbl | 1 + .../arch/xtensa/entry/syscalls/syscall.tbl | 1 + tools/scripts/syscall.tbl | 1 + 12 files changed, 15 insertions(+), 25 deletions(-) diff --git a/tools/arch/arm64/include/uapi/asm/unistd.h b/tools/arch/arm64/include/uapi/asm/unistd.h index 9306726337fe00..df36f23876e863 100644 --- a/tools/arch/arm64/include/uapi/asm/unistd.h +++ b/tools/arch/arm64/include/uapi/asm/unistd.h @@ -1,24 +1,2 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#define __ARCH_WANT_RENAMEAT -#define __ARCH_WANT_NEW_STAT -#define __ARCH_WANT_SET_GET_RLIMIT -#define __ARCH_WANT_TIME32_SYSCALLS -#define __ARCH_WANT_MEMFD_SECRET - -#include +#include diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 88dc393c2bca38..2892a45023af6d 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -849,9 +849,11 @@ __SYSCALL(__NR_getxattrat, sys_getxattrat) __SYSCALL(__NR_listxattrat, sys_listxattrat) #define __NR_removexattrat 466 __SYSCALL(__NR_removexattrat, sys_removexattrat) +#define __NR_open_tree_attr 467 +__SYSCALL(__NR_open_tree_attr, sys_open_tree_attr) #undef __NR_syscalls -#define __NR_syscalls 467 +#define __NR_syscalls 468 /* * 32 bit systems traditionally used different diff --git a/tools/perf/arch/arm/entry/syscalls/syscall.tbl b/tools/perf/arch/arm/entry/syscalls/syscall.tbl index 49eeb2ad8dbd8e..27c1d5ebcd91c8 100644 --- a/tools/perf/arch/arm/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/arm/entry/syscalls/syscall.tbl @@ -481,3 +481,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index c844cd5cda620b..1e8c44c7b61492 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -381,3 +381,4 @@ 464 n64 getxattrat sys_getxattrat 465 n64 listxattrat sys_listxattrat 466 n64 removexattrat sys_removexattrat +467 n64 open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index d8b4ab78bef076..9a084bdb892694 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -557,3 +557,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index e9115b4d8b635b..a4569b96ef06c5 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -469,3 +469,4 @@ 464 common getxattrat sys_getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/sh/entry/syscalls/syscall.tbl b/tools/perf/arch/sh/entry/syscalls/syscall.tbl index c8cad33bf250ea..52a7652fcff639 100644 --- a/tools/perf/arch/sh/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/sh/entry/syscalls/syscall.tbl @@ -470,3 +470,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl index 727f99d333b304..83e45eb6c095a3 100644 --- a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl @@ -512,3 +512,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl index 4d0fb2fba7e208..ac007ea00979dc 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl @@ -396,7 +396,7 @@ 381 i386 pkey_alloc sys_pkey_alloc 382 i386 pkey_free sys_pkey_free 383 i386 statx sys_statx -384 i386 arch_prctl sys_arch_prctl compat_sys_arch_prctl +384 i386 arch_prctl sys_arch_prctl 385 i386 io_pgetevents sys_io_pgetevents_time32 compat_sys_io_pgetevents 386 i386 rseq sys_rseq 393 i386 semget sys_semget @@ -472,3 +472,4 @@ 464 i386 getxattrat sys_getxattrat 465 i386 listxattrat sys_listxattrat 466 i386 removexattrat sys_removexattrat +467 i386 open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 5eb708bff1c791..cfb5ca41e30de1 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -390,6 +390,7 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr # # Due to a historical design error, certain syscalls are numbered differently diff --git a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl index 37effc1b134eea..f657a77314f866 100644 --- a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl @@ -437,3 +437,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/scripts/syscall.tbl b/tools/scripts/syscall.tbl index ebbdb3c42e9f74..580b4e246aecd5 100644 --- a/tools/scripts/syscall.tbl +++ b/tools/scripts/syscall.tbl @@ -407,3 +407,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr From df4bd8c76d49cf5948d63987f4a795c544155906 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 17:11:21 -0700 Subject: [PATCH 0403/2924] tools headers: Update the uapi/linux/prctl.h copy with the kernel sources To pick up the changes in: ec2d0c04624b3c8a posix-timers: Provide a mechanism to allocate a given timer ID Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/perf/trace/beauty/include/uapi/linux/prctl.h include/uapi/linux/prctl.h Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Cc: Thomas Gleixner Link: https://lore.kernel.org/r/20250410001125.391820-7-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/trace/beauty/include/uapi/linux/prctl.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h index 5c6080680cb27b..15c18ef4eb11a0 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h @@ -353,4 +353,15 @@ struct prctl_mm_map { */ #define PR_LOCK_SHADOW_STACK_STATUS 76 +/* + * Controls the mode of timer_create() for CRIU restore operations. + * Enabling this allows CRIU to restore timers with explicit IDs. + * + * Don't use for normal operations as the result might be undefined. + */ +#define PR_TIMER_CREATE_RESTORE_IDS 77 +# define PR_TIMER_CREATE_RESTORE_IDS_OFF 0 +# define PR_TIMER_CREATE_RESTORE_IDS_ON 1 +# define PR_TIMER_CREATE_RESTORE_IDS_GET 2 + #endif /* _LINUX_PRCTL_H */ From 4056cf407253ac81fc960088acfe9579496a871f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 17:11:22 -0700 Subject: [PATCH 0404/2924] tools headers: Update the uapi/asm-generic/mman-common.h copy with the kernel sources To pick up the changes in: 6d61527d931ba07b mm/pkey: Add PKEY_UNRESTRICTED macro Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/include/uapi/asm-generic/mman-common.h include/uapi/asm-generic/mman-common.h Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Cc: linux-arch@vger.kernel.org Link: https://lore.kernel.org/r/20250410001125.391820-8-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/include/uapi/asm-generic/mman-common.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index 1ea2c4c33b86a2..ef1c27fa3c570f 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -85,6 +85,7 @@ /* compatibility flags */ #define MAP_FILE 0 +#define PKEY_UNRESTRICTED 0x0 #define PKEY_DISABLE_ACCESS 0x1 #define PKEY_DISABLE_WRITE 0x2 #define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ From 74709981873d2baa5573735b1f24108206d0197e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 17:11:23 -0700 Subject: [PATCH 0405/2924] tools headers: Update the linux/unaligned.h copy with the kernel sources To pick up the changes in: 3846699217798061 ALSA: rawmidi: Make tied_device=0 as default / unknown 7bb49d2e8b52adac ALSA: rawmidi: Bump protocol version to 2.0.5 b8fefed73a952a33 ALSA: rawmidi: Show substream activity in info ioctl bdf46443f350dd5d ALSA: rawmidi: Expose the tied device number in info ioctl Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/perf/trace/beauty/include/uapi/sound/asound.h include/uapi/sound/asound.h Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Cc: linux-sound@vger.kernel.org Link: https://lore.kernel.org/r/20250410001125.391820-9-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/trace/beauty/include/uapi/sound/asound.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/perf/trace/beauty/include/uapi/sound/asound.h b/tools/perf/trace/beauty/include/uapi/sound/asound.h index 4cd513215bcd8f..5a049eeaeccea5 100644 --- a/tools/perf/trace/beauty/include/uapi/sound/asound.h +++ b/tools/perf/trace/beauty/include/uapi/sound/asound.h @@ -716,7 +716,7 @@ enum { * Raw MIDI section - /dev/snd/midi?? */ -#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 4) +#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 5) enum { SNDRV_RAWMIDI_STREAM_OUTPUT = 0, @@ -728,6 +728,9 @@ enum { #define SNDRV_RAWMIDI_INFO_INPUT 0x00000002 #define SNDRV_RAWMIDI_INFO_DUPLEX 0x00000004 #define SNDRV_RAWMIDI_INFO_UMP 0x00000008 +#define SNDRV_RAWMIDI_INFO_STREAM_INACTIVE 0x00000010 + +#define SNDRV_RAWMIDI_DEVICE_UNKNOWN 0 struct snd_rawmidi_info { unsigned int device; /* RO/WR (control): device number */ @@ -740,7 +743,8 @@ struct snd_rawmidi_info { unsigned char subname[32]; /* name of active or selected subdevice */ unsigned int subdevices_count; unsigned int subdevices_avail; - unsigned char reserved[64]; /* reserved for future use */ + int tied_device; /* R: tied rawmidi device (UMP/legacy) */ + unsigned char reserved[60]; /* reserved for future use */ }; #define SNDRV_RAWMIDI_MODE_FRAMING_MASK (7<<0) From 847f1403d3ee51278dfbece84ec7f199de43daa5 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 17:11:24 -0700 Subject: [PATCH 0406/2924] tools headers: Update the x86 headers with the kernel sources To pick up the changes in: 841326332bcb13ae x86/cpufeatures: Generate the header based on build config 440a65b7d25fb06f x86/mm: Enable AMD translation cache extensions 767ae437a32d6447 x86/mm: Add INVLPGB feature and Kconfig entry b4cc466b97359011 cpufreq/amd-pstate: Replace all AMD_CPPC_* macros with masks 98c7a713db91c5a9 x86/bugs: Add X86_BUG_SPECTRE_V2_USER 8f64eee70cdd3bb8 x86/bugs: Remove X86_FEATURE_USE_IBPB 8442df2b49ed9bcd x86/bugs: KVM: Add support for SRSO_MSR_FIX 70792aed14551e31 x86/cpufeatures: Add CPUID feature bit for Idle HLT intercept 968e9bc4cef87054 x86: move ZMM exclusion list into CPU feature flag c631a2de7ae48d50 perf/x86/intel: Ensure LBRs are disabled when a CPU is starting 38cc6495cdec18a4 x86/sev: Prevent GUEST_TSC_FREQ MSR interception for Secure TSC enabled guests 288bba2f4c8be1e1 x86/cpufeatures: Remove "AMD" from the comments to the AMD-specific leaf 877818802c3e970f x86/bugs: Add SRSO_USER_KERNEL_NO support 8ae3291f773befee x86/sev: Add full support for a segmented RMP table 0cbc0258415814c8 x86/sev: Add support for the RMPREAD instruction 7a470e826d7521be x86/cpufeatures: Free up unused feature bits Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Cc: x86@kernel.org Link: https://lore.kernel.org/r/20250410001125.391820-10-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/arch/x86/include/asm/cpufeatures.h | 28 +++++++++++++++------ tools/arch/x86/include/asm/msr-index.h | 31 +++++++++++++++--------- 2 files changed, 39 insertions(+), 20 deletions(-) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 9e3fa7942e7d3f..6c2c152d8a67b9 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -75,8 +75,8 @@ #define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */ #define X86_FEATURE_K8 ( 3*32+ 4) /* Opteron, Athlon64 */ #define X86_FEATURE_ZEN5 ( 3*32+ 5) /* CPU based on Zen5 microarchitecture */ -#define X86_FEATURE_P3 ( 3*32+ 6) /* P3 */ -#define X86_FEATURE_P4 ( 3*32+ 7) /* P4 */ +/* Free ( 3*32+ 6) */ +/* Free ( 3*32+ 7) */ #define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */ #define X86_FEATURE_UP ( 3*32+ 9) /* "up" SMP kernel running on UP */ #define X86_FEATURE_ART ( 3*32+10) /* "art" Always running timer (ART) */ @@ -329,6 +329,7 @@ #define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */ #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */ +#define X86_FEATURE_INVLPGB (13*32+ 3) /* INVLPGB and TLBSYNC instructions supported */ #define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */ #define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */ #define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ @@ -377,6 +378,7 @@ #define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */ #define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */ #define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */ +#define X86_FEATURE_IDLE_HLT (15*32+30) /* IDLE HLT intercept */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* "avx512vbmi" AVX512 Vector Bit Manipulation instructions*/ @@ -434,15 +436,18 @@ #define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* Speculative Store Bypass Disable */ /* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */ -#define X86_FEATURE_SME (19*32+ 0) /* "sme" AMD Secure Memory Encryption */ -#define X86_FEATURE_SEV (19*32+ 1) /* "sev" AMD Secure Encrypted Virtualization */ +#define X86_FEATURE_SME (19*32+ 0) /* "sme" Secure Memory Encryption */ +#define X86_FEATURE_SEV (19*32+ 1) /* "sev" Secure Encrypted Virtualization */ #define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */ -#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" AMD Secure Encrypted Virtualization - Encrypted State */ -#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" AMD Secure Encrypted Virtualization - Secure Nested Paging */ +#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" Secure Encrypted Virtualization - Secure Nested Paging */ #define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */ -#define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */ -#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */ +#define X86_FEATURE_SME_COHERENT (19*32+10) /* hardware-enforced cache coherency */ +#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */ +#define X86_FEATURE_RMPREAD (19*32+21) /* RMPREAD instruction */ +#define X86_FEATURE_SEGMENTED_RMP (19*32+23) /* Segmented RMP support */ #define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */ +#define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */ /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ #define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ @@ -455,6 +460,11 @@ #define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */ #define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */ +#define X86_FEATURE_SRSO_USER_KERNEL_NO (20*32+30) /* CPU is not affected by SRSO across user/kernel boundaries */ +#define X86_FEATURE_SRSO_BP_SPEC_REDUCE (20*32+31) /* + * BP_CFG[BpSpecReduce] can be used to mitigate SRSO for VMs. + * (SRSO_MSR_FIX in the official doc). + */ /* * Extended auxiliary flags: Linux defined - for features scattered in various @@ -470,6 +480,7 @@ #define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */ #define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */ #define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */ +#define X86_FEATURE_PREFER_YMM (21*32 + 8) /* Avoid ZMM registers due to downclocking */ /* * BUG word(s) @@ -521,4 +532,5 @@ #define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */ #define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */ #define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */ +#define X86_BUG_SPECTRE_V2_USER X86_BUG(1*32 + 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index dc1c1057f26e45..e6134ef2263d50 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -397,7 +397,8 @@ #define MSR_IA32_PASID_VALID BIT_ULL(31) /* DEBUGCTLMSR bits (others vary by model): */ -#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ +#define DEBUGCTLMSR_LBR_BIT 0 /* last branch recording */ +#define DEBUGCTLMSR_LBR (1UL << DEBUGCTLMSR_LBR_BIT) #define DEBUGCTLMSR_BTF_SHIFT 1 #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ #define DEBUGCTLMSR_BUS_LOCK_DETECT (1UL << 2) @@ -610,6 +611,7 @@ #define MSR_AMD_PERF_CTL 0xc0010062 #define MSR_AMD_PERF_STATUS 0xc0010063 #define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 +#define MSR_AMD64_GUEST_TSC_FREQ 0xc0010134 #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 #define MSR_AMD64_OSVW_STATUS 0xc0010141 #define MSR_AMD_PPIN_CTL 0xc00102f0 @@ -646,6 +648,7 @@ #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ #define MSR_AMD64_SVM_AVIC_DOORBELL 0xc001011b #define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e +#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f #define MSR_AMD64_SEV_ES_GHCB 0xc0010130 #define MSR_AMD64_SEV 0xc0010131 #define MSR_AMD64_SEV_ENABLED_BIT 0 @@ -684,11 +687,12 @@ #define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT) #define MSR_AMD64_SNP_RESV_BIT 18 #define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT) - -#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f - #define MSR_AMD64_RMP_BASE 0xc0010132 #define MSR_AMD64_RMP_END 0xc0010133 +#define MSR_AMD64_RMP_CFG 0xc0010136 +#define MSR_AMD64_SEG_RMP_ENABLED_BIT 0 +#define MSR_AMD64_SEG_RMP_ENABLED BIT_ULL(MSR_AMD64_SEG_RMP_ENABLED_BIT) +#define MSR_AMD64_RMP_SEGMENT_SHIFT(x) (((x) & GENMASK_ULL(13, 8)) >> 8) #define MSR_SVSM_CAA 0xc001f000 @@ -699,15 +703,17 @@ #define MSR_AMD_CPPC_REQ 0xc00102b3 #define MSR_AMD_CPPC_STATUS 0xc00102b4 -#define AMD_CPPC_LOWEST_PERF(x) (((x) >> 0) & 0xff) -#define AMD_CPPC_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff) -#define AMD_CPPC_NOMINAL_PERF(x) (((x) >> 16) & 0xff) -#define AMD_CPPC_HIGHEST_PERF(x) (((x) >> 24) & 0xff) +/* Masks for use with MSR_AMD_CPPC_CAP1 */ +#define AMD_CPPC_LOWEST_PERF_MASK GENMASK(7, 0) +#define AMD_CPPC_LOWNONLIN_PERF_MASK GENMASK(15, 8) +#define AMD_CPPC_NOMINAL_PERF_MASK GENMASK(23, 16) +#define AMD_CPPC_HIGHEST_PERF_MASK GENMASK(31, 24) -#define AMD_CPPC_MAX_PERF(x) (((x) & 0xff) << 0) -#define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8) -#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16) -#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24) +/* Masks for use with MSR_AMD_CPPC_REQ */ +#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0) +#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8) +#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16) +#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24) /* AMD Performance Counter Global Status and Control MSRs */ #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300 @@ -719,6 +725,7 @@ /* Zen4 */ #define MSR_ZEN4_BP_CFG 0xc001102e +#define MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT 4 #define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5 /* Fam 19h MSRs */ From 7f56978e5876521eaa90fda0e63630fa64f69bce Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 17:11:25 -0700 Subject: [PATCH 0407/2924] tools headers: Update the arch/x86/lib/memset_64.S copy with the kernel sources To pick up the changes in: 2981557cb0408e14 x86,kcfi: Fix EXPORT_SYMBOL vs kCFI That required adding a copy of include/linux/cfi_types.h and its checking in tools/perf/check-headers.h. Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/arch/x86/lib/memset_64.S arch/x86/lib/memset_64.S Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Cc: x86@kernel.org Link: https://lore.kernel.org/r/20250410001125.391820-11-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/arch/x86/lib/memset_64.S | 3 ++- tools/include/linux/cfi_types.h | 45 +++++++++++++++++++++++++++++++++ tools/perf/check-headers.sh | 1 + 3 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 tools/include/linux/cfi_types.h diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index 0199d56cb479d8..d66b710d628f88 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -28,7 +29,7 @@ * only for the return value that is the same as the source input, * which the compiler could/should do much better anyway. */ -SYM_FUNC_START(__memset) +SYM_TYPED_FUNC_START(__memset) ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS movq %rdi,%r9 diff --git a/tools/include/linux/cfi_types.h b/tools/include/linux/cfi_types.h new file mode 100644 index 00000000000000..6b87136757655c --- /dev/null +++ b/tools/include/linux/cfi_types.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Clang Control Flow Integrity (CFI) type definitions. + */ +#ifndef _LINUX_CFI_TYPES_H +#define _LINUX_CFI_TYPES_H + +#ifdef __ASSEMBLY__ +#include + +#ifdef CONFIG_CFI_CLANG +/* + * Use the __kcfi_typeid_ type identifier symbol to + * annotate indirectly called assembly functions. The compiler emits + * these symbols for all address-taken function declarations in C + * code. + */ +#ifndef __CFI_TYPE +#define __CFI_TYPE(name) \ + .4byte __kcfi_typeid_##name +#endif + +#define SYM_TYPED_ENTRY(name, linkage, align...) \ + linkage(name) ASM_NL \ + align ASM_NL \ + __CFI_TYPE(name) ASM_NL \ + name: + +#define SYM_TYPED_START(name, linkage, align...) \ + SYM_TYPED_ENTRY(name, linkage, align) + +#else /* CONFIG_CFI_CLANG */ + +#define SYM_TYPED_START(name, linkage, align...) \ + SYM_START(name, linkage, align) + +#endif /* CONFIG_CFI_CLANG */ + +#ifndef SYM_TYPED_FUNC_START +#define SYM_TYPED_FUNC_START(name) \ + SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* _LINUX_CFI_TYPES_H */ diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index a4499e5a6f9cb0..857f6646cc23e9 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -20,6 +20,7 @@ FILES=( "include/uapi/linux/stat.h" "include/linux/bits.h" "include/vdso/bits.h" + "include/linux/cfi_types.h" "include/linux/const.h" "include/vdso/const.h" "include/vdso/unaligned.h" From eb73b5a9157221f405b4fe32751da84ee46b7a25 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 1 Apr 2025 13:02:08 -0400 Subject: [PATCH 0408/2924] Bluetooth: hci_event: Fix sending MGMT_EV_DEVICE_FOUND for invalid address This fixes sending MGMT_EV_DEVICE_FOUND for invalid address (00:00:00:00:00:00) which is a regression introduced by a2ec905d1e16 ("Bluetooth: fix kernel oops in store_pending_adv_report") since in the attempt to skip storing data for extended advertisement it actually made the code to skip the entire if statement supposed to send MGMT_EV_DEVICE_FOUND without attempting to use the last_addr_adv which is garanteed to be invalid for extended advertisement since we never store anything on it. Link: https://github.com/bluez/bluez/issues/1157 Link: https://github.com/bluez/bluez/issues/1149#issuecomment-2767215658 Fixes: a2ec905d1e16 ("Bluetooth: fix kernel oops in store_pending_adv_report") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 1d8616f2e740ae..5f808f0b0e9a2a 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -6160,11 +6160,12 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, * event or send an immediate device found event if the data * should not be stored for later. */ - if (!ext_adv && !has_pending_adv_report(hdev)) { + if (!has_pending_adv_report(hdev)) { /* If the report will trigger a SCAN_REQ store it for * later merging. */ - if (type == LE_ADV_IND || type == LE_ADV_SCAN_IND) { + if (!ext_adv && (type == LE_ADV_IND || + type == LE_ADV_SCAN_IND)) { store_pending_adv_report(hdev, bdaddr, bdaddr_type, rssi, flags, data, len); return; From 324dddea321078a6eeb535c2bff5257be74c9799 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 2 Apr 2025 14:01:41 +0300 Subject: [PATCH 0409/2924] Bluetooth: btrtl: Prevent potential NULL dereference The btrtl_initialize() function checks that rtl_load_file() either had an error or it loaded a zero length file. However, if it loaded a zero length file then the error code is not set correctly. It results in an error pointer vs NULL bug, followed by a NULL pointer dereference. This was detected by Smatch: drivers/bluetooth/btrtl.c:592 btrtl_initialize() warn: passing zero to 'ERR_PTR' Fixes: 26503ad25de8 ("Bluetooth: btrtl: split the device initialization into smaller parts") Signed-off-by: Dan Carpenter Reviewed-by: Hans de Goede Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btrtl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index d3eba0d4a57d3b..7838c89e529e0c 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -1215,6 +1215,8 @@ struct btrtl_device_info *btrtl_initialize(struct hci_dev *hdev, rtl_dev_err(hdev, "mandatory config file %s not found", btrtl_dev->ic_info->cfg_name); ret = btrtl_dev->cfg_len; + if (!ret) + ret = -EINVAL; goto err_free; } } From e92900c9803fb35ad6cf599cb268b8ddd9f91940 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 1 Apr 2025 18:04:03 +0300 Subject: [PATCH 0410/2924] Bluetooth: qca: fix NV variant for one of WCN3950 SoCs The QCA_WCN3950_SOC_ID_S should be using qca/cmnv13s.bin, rather than qca/cmnv13u.bin file. Correct the variant suffix to be used for this SoC ID. Fixes: d5712c511cb3 ("Bluetooth: qca: add WCN3950 support") Reported-by: Wojciech Slenska Closes: https://github.com/qualcomm-linux/meta-qcom/pull/817#discussion_r2022866431 Signed-off-by: Dmitry Baryshkov Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btqca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 3d6778b95e0058..edefb9dc76aa1a 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -889,7 +889,7 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, if (le32_to_cpu(ver.soc_id) == QCA_WCN3950_SOC_ID_T) variant = "t"; else if (le32_to_cpu(ver.soc_id) == QCA_WCN3950_SOC_ID_S) - variant = "u"; + variant = "s"; snprintf(config.fwname, sizeof(config.fwname), "qca/cmnv%02x%s.bin", rom_ver, variant); From c174cd0945ad3f1b7e48781e0d22b94f9e89b28b Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Thu, 3 Apr 2025 19:57:59 +0300 Subject: [PATCH 0411/2924] Bluetooth: increment TX timestamping tskey always for stream sockets Documentation/networking/timestamping.rst implies TX timestamping OPT_ID tskey increments for each sendmsg. In practice: TCP socket increments it for all sendmsg, timestamping on or off, but UDP only when timestamping is on. The user-visible counter resets when OPT_ID is turned on, so difference can be seen only if timestamping is enabled for some packets only (eg. via SO_TIMESTAMPING CMSG). Fix BT sockets to work in the same way: stream sockets increment tskey for all sendmsg (seqpacket already increment for timestamped only). Fixes: 134f4b39df7b ("Bluetooth: add support for skb TX SND/COMPLETION timestamping") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 95972fd4c78434..7e1b53857648df 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -3072,6 +3072,7 @@ void hci_setup_tx_timestamp(struct sk_buff *skb, size_t key_offset, const struct sockcm_cookie *sockc) { struct sock *sk = skb ? skb->sk : NULL; + int key; /* This shall be called on a single skb of those generated by user * sendmsg(), and only when the sendmsg() does not return error to @@ -3087,13 +3088,16 @@ void hci_setup_tx_timestamp(struct sk_buff *skb, size_t key_offset, sock_tx_timestamp(sk, sockc, &skb_shinfo(skb)->tx_flags); + if (sk->sk_type == SOCK_STREAM) + key = atomic_add_return(key_offset, &sk->sk_tskey); + if (sockc->tsflags & SOF_TIMESTAMPING_OPT_ID && sockc->tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) { if (sockc->tsflags & SOCKCM_FLAG_TS_OPT_ID) { skb_shinfo(skb)->tskey = sockc->ts_opt_id; } else { - int key = atomic_add_return(key_offset, &sk->sk_tskey); - + if (sk->sk_type != SOCK_STREAM) + key = atomic_inc_return(&sk->sk_tskey); skb_shinfo(skb)->tskey = key - 1; } } From 61a9c6e39c8dfe2fd56dee44f817cf2a1b3f3c71 Mon Sep 17 00:00:00 2001 From: Neeraj Sanjay Kale Date: Thu, 3 Apr 2025 20:32:23 +0530 Subject: [PATCH 0412/2924] Bluetooth: btnxpuart: Revert baudrate change in nxp_shutdown This reverts the change baudrate logic in nxp_shutdown. Earlier, when the driver was removed, it restored the controller baudrate to fw_init_baudrate, so that on re-loading the driver, things work fine. However, if the driver was removed while hci0 interface is down, the change baudrate vendor command could not be sent by the driver. When the driver was re-loaded, host and controller baudrate would be mismatched and hci initialization would fail. The only way to recover would be to reboot the system. This issue was fixed by moving the restore baudrate logic from nxp_serdev_remove() to nxp_shutdown(). This fix however caused another issue with the command "hciconfig hci0 reset", which makes hci0 DOWN and UP immediately. Running "bluetoothctl power off" and "bluetoothctl power on" in a tight loop works fine. To maintain support for "hciconfig reset" command, the above mentioned fix is reverted. Fixes: 6fca6781d19d ("Bluetooth: btnxpuart: Move vendor specific initialization to .post_init") Signed-off-by: Neeraj Sanjay Kale Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btnxpuart.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c index 5091dea762a073..2d58b8be2c5fb4 100644 --- a/drivers/bluetooth/btnxpuart.c +++ b/drivers/bluetooth/btnxpuart.c @@ -1445,9 +1445,6 @@ static int nxp_shutdown(struct hci_dev *hdev) /* HCI_NXP_IND_RESET command may not returns any response */ if (!IS_ERR(skb)) kfree_skb(skb); - } else if (nxpdev->current_baudrate != nxpdev->fw_init_baudrate) { - nxpdev->new_baudrate = nxpdev->fw_init_baudrate; - nxp_set_baudrate_cmd(hdev, NULL); } return 0; @@ -1799,13 +1796,15 @@ static void nxp_serdev_remove(struct serdev_device *serdev) clear_bit(BTNXPUART_FW_DOWNLOADING, &nxpdev->tx_state); wake_up_interruptible(&nxpdev->check_boot_sign_wait_q); wake_up_interruptible(&nxpdev->fw_dnld_done_wait_q); - } - - if (test_bit(HCI_RUNNING, &hdev->flags)) { - /* Ensure shutdown callback is executed before unregistering, so - * that baudrate is reset to initial value. + } else { + /* Restore FW baudrate to fw_init_baudrate if changed. + * This will ensure FW baudrate is in sync with + * driver baudrate in case this driver is re-inserted. */ - nxp_shutdown(hdev); + if (nxpdev->current_baudrate != nxpdev->fw_init_baudrate) { + nxpdev->new_baudrate = nxpdev->fw_init_baudrate; + nxp_set_baudrate_cmd(hdev, NULL); + } } ps_cleanup(nxpdev); From 103308e50db92d1e705cd9818aaf7fb327c14fad Mon Sep 17 00:00:00 2001 From: Neeraj Sanjay Kale Date: Thu, 3 Apr 2025 20:32:22 +0530 Subject: [PATCH 0413/2924] Bluetooth: btnxpuart: Add an error message if FW dump trigger fails This prints an error message if the FW Dump trigger command fails. This scenario is mainly observed in legacy chipsets 8987 and 8997 and also IW416, where this feature is unavailable due to memory constraints. Fixes: 998e447f443f ("Bluetooth: btnxpuart: Add support for HCI coredump feature") Signed-off-by: Neeraj Sanjay Kale Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btnxpuart.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c index 2d58b8be2c5fb4..604ab2bba231c5 100644 --- a/drivers/bluetooth/btnxpuart.c +++ b/drivers/bluetooth/btnxpuart.c @@ -1286,7 +1286,9 @@ static void nxp_coredump(struct hci_dev *hdev) u8 pcmd = 2; skb = nxp_drv_send_cmd(hdev, HCI_NXP_TRIGGER_DUMP, 1, &pcmd); - if (!IS_ERR(skb)) + if (IS_ERR(skb)) + bt_dev_err(hdev, "Failed to trigger FW Dump. (%ld)", PTR_ERR(skb)); + else kfree_skb(skb); } From 522e9ed157e3c21b4dd623c79967f72c21e45b78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Danis?= Date: Wed, 9 Apr 2025 10:53:06 +0200 Subject: [PATCH 0414/2924] Bluetooth: l2cap: Check encryption key size on incoming connection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is required for passing GAP/SEC/SEM/BI-04-C PTS test case: Security Mode 4 Level 4, Responder - Invalid Encryption Key Size - 128 bit This tests the security key with size from 1 to 15 bytes while the Security Mode 4 Level 4 requests 16 bytes key size. Currently PTS fails with the following logs: - expected:Connection Response: Code: [3 (0x03)] Code Identifier: (lt)WildCard: Exists(gt) Length: [8 (0x0008)] Destination CID: (lt)WildCard: Exists(gt) Source CID: [64 (0x0040)] Result: [3 (0x0003)] Connection refused - Security block Status: (lt)WildCard: Exists(gt), but received:Connection Response: Code: [3 (0x03)] Code Identifier: [1 (0x01)] Length: [8 (0x0008)] Destination CID: [64 (0x0040)] Source CID: [64 (0x0040)] Result: [0 (0x0000)] Connection Successful Status: [0 (0x0000)] No further information available And HCI logs: < HCI Command: Read Encrypti.. (0x05|0x0008) plen 2 Handle: 14 Address: 00:1B:DC:F2:24:10 (Vencer Co., Ltd.) > HCI Event: Command Complete (0x0e) plen 7 Read Encryption Key Size (0x05|0x0008) ncmd 1 Status: Success (0x00) Handle: 14 Address: 00:1B:DC:F2:24:10 (Vencer Co., Ltd.) Key size: 7 > ACL Data RX: Handle 14 flags 0x02 dlen 12 L2CAP: Connection Request (0x02) ident 1 len 4 PSM: 4097 (0x1001) Source CID: 64 < ACL Data TX: Handle 14 flags 0x00 dlen 16 L2CAP: Connection Response (0x03) ident 1 len 8 Destination CID: 64 Source CID: 64 Result: Connection successful (0x0000) Status: No further information available (0x0000) Fixes: 288c06973daa ("Bluetooth: Enforce key size of 16 bytes on FIPS level") Signed-off-by: Frédéric Danis Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index c7b66b2ea9f210..f1c4b8bd7a8b35 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -3991,7 +3991,8 @@ static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, /* Check if the ACL is secure enough (if not SDP) */ if (psm != cpu_to_le16(L2CAP_PSM_SDP) && - !hci_conn_check_link_mode(conn->hcon)) { + (!hci_conn_check_link_mode(conn->hcon) || + !l2cap_check_enc_key_size(conn->hcon))) { conn->disc_reason = HCI_ERROR_AUTH_FAILURE; result = L2CAP_CR_SEC_BLOCK; goto response; From adf53771a3123df99ca26e38818760fbcf5c05d0 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 9 Apr 2025 14:24:46 -0700 Subject: [PATCH 0415/2924] riscv: Avoid fortify warning in syscall_get_arguments() When building with CONFIG_FORTIFY_SOURCE=y and W=1, there is a warning because of the memcpy() in syscall_get_arguments(): In file included from include/linux/string.h:392, from include/linux/bitmap.h:13, from include/linux/cpumask.h:12, from arch/riscv/include/asm/processor.h:55, from include/linux/sched.h:13, from kernel/ptrace.c:13: In function 'fortify_memcpy_chk', inlined from 'syscall_get_arguments.isra' at arch/riscv/include/asm/syscall.h:66:2: include/linux/fortify-string.h:580:25: error: call to '__read_overflow2_field' declared with attribute warning: detected read beyond size of field (2nd parameter); maybe use struct_group()? [-Werror=attribute-warning] 580 | __read_overflow2_field(q_size_field, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors The fortified memcpy() routine enforces that the source is not overread and the destination is not overwritten if the size of either field and the size of the copy are known at compile time. The memcpy() in syscall_get_arguments() intentionally overreads from a1 to a5 in 'struct pt_regs' but this is bigger than the size of a1. Normally, this could be solved by wrapping a1 through a5 with struct_group() but there was already a struct_group() applied to these members in commit bba547810c66 ("riscv: tracing: Fix __write_overflow_field in ftrace_partial_regs()"). Just avoid memcpy() altogether and write the copying of args from regs manually, which clears up the warning at the expense of three extra lines of code. Signed-off-by: Nathan Chancellor Reviewed-by: Dmitry V. Levin Fixes: e2c0cdfba7f6 ("RISC-V: User-facing API") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250409-riscv-avoid-fortify-warning-syscall_get_arguments-v1-1-7853436d4755@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/syscall.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h index 121fff429dce66..eceabf59ae482a 100644 --- a/arch/riscv/include/asm/syscall.h +++ b/arch/riscv/include/asm/syscall.h @@ -62,8 +62,11 @@ static inline void syscall_get_arguments(struct task_struct *task, unsigned long *args) { args[0] = regs->orig_a0; - args++; - memcpy(args, ®s->a1, 5 * sizeof(args[0])); + args[1] = regs->a1; + args[2] = regs->a2; + args[3] = regs->a3; + args[4] = regs->a4; + args[5] = regs->a5; } static inline int syscall_get_arch(struct task_struct *task) From ffc59e32c67e599cc473d6427a4aa584399d5b3c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 10 Apr 2025 15:32:20 +0300 Subject: [PATCH 0416/2924] RDMA/bnxt_re: Remove unusable nq variable Remove nq variable from bnxt_re_create_srq() and bnxt_re_destroy_srq() as it generates the following compilation warnings: >> drivers/infiniband/hw/bnxt_re/ib_verbs.c:1777:24: warning: variable 'nq' set but not used [-Wunused-but-set-variable] 1777 | struct bnxt_qplib_nq *nq = NULL; | ^ drivers/infiniband/hw/bnxt_re/ib_verbs.c:1828:24: warning: variable 'nq' set but not used [-Wunused-but-set-variable] 1828 | struct bnxt_qplib_nq *nq = NULL; | ^ 2 warnings generated. Fixes: 6b395d31146a ("RDMA/bnxt_re: Fix budget handling of notification queue") Link: https://patch.msgid.link/r/8a4343e217d7d1c0a5a786b785c4ac57cb72a2a0.1744288299.git.leonro@nvidia.com Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202504091055.CzgXnk4C-lkp@intel.com/ Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index e14b05cd089ab0..063801384b2b04 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1774,10 +1774,7 @@ int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) ib_srq); struct bnxt_re_dev *rdev = srq->rdev; struct bnxt_qplib_srq *qplib_srq = &srq->qplib_srq; - struct bnxt_qplib_nq *nq = NULL; - if (qplib_srq->cq) - nq = qplib_srq->cq->nq; if (rdev->chip_ctx->modes.toggle_bits & BNXT_QPLIB_SRQ_TOGGLE_BIT) { free_page((unsigned long)srq->uctx_srq_page); hash_del(&srq->hash_entry); @@ -1825,7 +1822,6 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, struct ib_udata *udata) { struct bnxt_qplib_dev_attr *dev_attr; - struct bnxt_qplib_nq *nq = NULL; struct bnxt_re_ucontext *uctx; struct bnxt_re_dev *rdev; struct bnxt_re_srq *srq; @@ -1871,7 +1867,6 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, srq->qplib_srq.eventq_hw_ring_id = rdev->nqr->nq[0].ring_id; srq->qplib_srq.sg_info.pgsize = PAGE_SIZE; srq->qplib_srq.sg_info.pgshft = PAGE_SHIFT; - nq = &rdev->nqr->nq[0]; if (udata) { rc = bnxt_re_init_user_srq(rdev, pd, srq, udata); From dcdae6e92d4e062da29235fe88980604595e3f0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Wed, 9 Apr 2025 17:50:06 -0300 Subject: [PATCH 0417/2924] drm/v3d: Fix Indirect Dispatch configuration for V3D 7.1.6 and later MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit is a resubmission of commit 1fe1c66274fb ("drm/v3d: Fix Indirect Dispatch configuration for V3D 7.1.6 and later"), which was accidentally reverted by commit 91dae758bdb8 ("Merge tag 'drm-misc-next-2024-08-01' of https://gitlab.freedesktop.org/drm/misc/kernel into drm-next"), likely due to an unfortunate conflict resolution. From the original commit message: ``` `args->cfg[4]` is configured in Indirect Dispatch using the number of batches. Currently, for all V3D tech versions, `args->cfg[4]` equals the number of batches subtracted by 1. But, for V3D 7.1.6 and later, we must not subtract 1 from the number of batches. Implement the fix by checking the V3D tech version and revision. Fixes several `dEQP-VK.synchronization*` CTS tests related to Indirect Dispatch. ``` Fixes: 91dae758bdb8 ("Merge tag 'drm-misc-next-2024-08-01' of https://gitlab.freedesktop.org/drm/misc/kernel into drm-next") Signed-off-by: Maíra Canal Reviewed-by: Iago Toral Quiroga Link: https://lore.kernel.org/r/20250409205051.9639-1-mcanal@igalia.com --- drivers/gpu/drm/v3d/v3d_sched.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 34c42d6e12cde6..4a7701a33cf8d7 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -428,7 +428,8 @@ v3d_rewrite_csd_job_wg_counts_from_indirect(struct v3d_cpu_job *job) struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]); struct v3d_bo *indirect = to_v3d_bo(indirect_csd->indirect); struct drm_v3d_submit_csd *args = &indirect_csd->job->args; - u32 *wg_counts; + struct v3d_dev *v3d = job->base.v3d; + u32 num_batches, *wg_counts; v3d_get_bo_vaddr(bo); v3d_get_bo_vaddr(indirect); @@ -441,8 +442,17 @@ v3d_rewrite_csd_job_wg_counts_from_indirect(struct v3d_cpu_job *job) args->cfg[0] = wg_counts[0] << V3D_CSD_CFG012_WG_COUNT_SHIFT; args->cfg[1] = wg_counts[1] << V3D_CSD_CFG012_WG_COUNT_SHIFT; args->cfg[2] = wg_counts[2] << V3D_CSD_CFG012_WG_COUNT_SHIFT; - args->cfg[4] = DIV_ROUND_UP(indirect_csd->wg_size, 16) * - (wg_counts[0] * wg_counts[1] * wg_counts[2]) - 1; + + num_batches = DIV_ROUND_UP(indirect_csd->wg_size, 16) * + (wg_counts[0] * wg_counts[1] * wg_counts[2]); + + /* V3D 7.1.6 and later don't subtract 1 from the number of batches */ + if (v3d->ver < 71 || (v3d->ver == 71 && v3d->rev < 6)) + args->cfg[4] = num_batches - 1; + else + args->cfg[4] = num_batches; + + WARN_ON(args->cfg[4] == ~0); for (int i = 0; i < 3; i++) { /* 0xffffffff indicates that the uniform rewrite is not needed */ From 1ddb9ad2ac6e527f220d5821ad54d37d3f9d122a Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 10 Apr 2025 10:00:23 -0700 Subject: [PATCH 0418/2924] selftests/bpf: Make res_spin_lock AA test condition stronger Let's make sure that we see a EDEADLK and ETIMEDOUT whenever checking for the AA tests (in case of simple AA and AA after exhausting 31 entries). Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20250410170023.2670683-1-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/res_spin_lock.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/res_spin_lock.c b/tools/testing/selftests/bpf/progs/res_spin_lock.c index b33385dfbd3503..22c4fb8b9266da 100644 --- a/tools/testing/selftests/bpf/progs/res_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/res_spin_lock.c @@ -38,13 +38,14 @@ int res_spin_lock_test(struct __sk_buff *ctx) r = bpf_res_spin_lock(&elem1->lock); if (r) return r; - if (!bpf_res_spin_lock(&elem2->lock)) { + r = bpf_res_spin_lock(&elem2->lock); + if (!r) { bpf_res_spin_unlock(&elem2->lock); bpf_res_spin_unlock(&elem1->lock); return -1; } bpf_res_spin_unlock(&elem1->lock); - return 0; + return r != -EDEADLK; } SEC("tc") @@ -124,12 +125,15 @@ int res_spin_lock_test_held_lock_max(struct __sk_buff *ctx) /* Trigger AA, after exhausting entries in the held lock table. This * time, only the timeout can save us, as AA detection won't succeed. */ - if (!bpf_res_spin_lock(locks[34])) { + ret = bpf_res_spin_lock(locks[34]); + if (!ret) { bpf_res_spin_unlock(locks[34]); ret = 1; goto end; } + ret = ret != -ETIMEDOUT ? 2 : 0; + end: for (i = i - 1; i >= 0; i--) bpf_res_spin_unlock(locks[i]); From 92b90f780d056a28f3c751c2dfbcd9540c7ae28a Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 10 Apr 2025 07:55:12 -0700 Subject: [PATCH 0419/2924] bpf: Use architecture provided res_smp_cond_load_acquire In v2 of rqspinlock [0], we fixed potential problems with WFE usage in arm64 to fallback to a version copied from Ankur's series [1]. This logic was moved into arch-specific headers in v3 [2]. However, we missed using the arch-provided res_smp_cond_load_acquire in commit ebababcd0372 ("rqspinlock: Hardcode cond_acquire loops for arm64") due to a rebasing mistake between v2 and v3 of the rqspinlock series. Fix the typo to fallback to the arm64 definition as we did in v2. [0]: https://lore.kernel.org/bpf/20250206105435.2159977-18-memxor@gmail.com [1]: https://lore.kernel.org/lkml/20250203214911.898276-1-ankur.a.arora@oracle.com [2]: https://lore.kernel.org/bpf/20250303152305.3195648-9-memxor@gmail.com Fixes: ebababcd0372 ("rqspinlock: Hardcode cond_acquire loops for arm64") Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20250410145512.1876745-1-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- arch/arm64/include/asm/rqspinlock.h | 2 +- kernel/bpf/rqspinlock.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/rqspinlock.h b/arch/arm64/include/asm/rqspinlock.h index 5b80785324b6c3..9ea0a74e589273 100644 --- a/arch/arm64/include/asm/rqspinlock.h +++ b/arch/arm64/include/asm/rqspinlock.h @@ -86,7 +86,7 @@ #endif -#define res_smp_cond_load_acquire_timewait(v, c) smp_cond_load_acquire_timewait(v, c, 0, 1) +#define res_smp_cond_load_acquire(v, c) smp_cond_load_acquire_timewait(v, c, 0, 1) #include diff --git a/kernel/bpf/rqspinlock.c b/kernel/bpf/rqspinlock.c index b896c4a75a5c9b..338305c8852cf6 100644 --- a/kernel/bpf/rqspinlock.c +++ b/kernel/bpf/rqspinlock.c @@ -253,7 +253,7 @@ static noinline int check_timeout(rqspinlock_t *lock, u32 mask, }) #else #define RES_CHECK_TIMEOUT(ts, ret, mask) \ - ({ (ret) = check_timeout(&(ts)); }) + ({ (ret) = check_timeout((lock), (mask), &(ts)); }) #endif /* From 2f41503d647629cfafea42cf6f827e4139536703 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 10 Apr 2025 08:31:42 -0700 Subject: [PATCH 0420/2924] bpf: Convert queue_stack map to rqspinlock Replace all usage of raw_spinlock_t in queue_stack_maps.c with rqspinlock. This is a map type with a set of open syzbot reports reproducing possible deadlocks. Prior attempt to fix the issues was at [0], but was dropped in favor of this approach. Make sure we return the -EBUSY error in case of possible deadlocks or timeouts, just to make sure user space or BPF programs relying on the error code to detect problems do not break. With these changes, the map should be safe to access in any context, including NMIs. [0]: https://lore.kernel.org/all/20240429165658.1305969-1-sidchintamaneni@gmail.com Reported-by: syzbot+8bdfc2c53fb2b63e1871@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/0000000000004c3fc90615f37756@google.com Reported-by: syzbot+252bc5c744d0bba917e1@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000c80abd0616517df9@google.com Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20250410153142.2064340-1-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/queue_stack_maps.c | 35 ++++++++++++----------------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c index d869f51ea93a0e..9a5f94371e5065 100644 --- a/kernel/bpf/queue_stack_maps.c +++ b/kernel/bpf/queue_stack_maps.c @@ -9,13 +9,14 @@ #include #include #include "percpu_freelist.h" +#include #define QUEUE_STACK_CREATE_FLAG_MASK \ (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK) struct bpf_queue_stack { struct bpf_map map; - raw_spinlock_t lock; + rqspinlock_t lock; u32 head, tail; u32 size; /* max_entries + 1 */ @@ -78,7 +79,7 @@ static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr) qs->size = size; - raw_spin_lock_init(&qs->lock); + raw_res_spin_lock_init(&qs->lock); return &qs->map; } @@ -98,12 +99,8 @@ static long __queue_map_get(struct bpf_map *map, void *value, bool delete) int err = 0; void *ptr; - if (in_nmi()) { - if (!raw_spin_trylock_irqsave(&qs->lock, flags)) - return -EBUSY; - } else { - raw_spin_lock_irqsave(&qs->lock, flags); - } + if (raw_res_spin_lock_irqsave(&qs->lock, flags)) + return -EBUSY; if (queue_stack_map_is_empty(qs)) { memset(value, 0, qs->map.value_size); @@ -120,7 +117,7 @@ static long __queue_map_get(struct bpf_map *map, void *value, bool delete) } out: - raw_spin_unlock_irqrestore(&qs->lock, flags); + raw_res_spin_unlock_irqrestore(&qs->lock, flags); return err; } @@ -133,12 +130,8 @@ static long __stack_map_get(struct bpf_map *map, void *value, bool delete) void *ptr; u32 index; - if (in_nmi()) { - if (!raw_spin_trylock_irqsave(&qs->lock, flags)) - return -EBUSY; - } else { - raw_spin_lock_irqsave(&qs->lock, flags); - } + if (raw_res_spin_lock_irqsave(&qs->lock, flags)) + return -EBUSY; if (queue_stack_map_is_empty(qs)) { memset(value, 0, qs->map.value_size); @@ -157,7 +150,7 @@ static long __stack_map_get(struct bpf_map *map, void *value, bool delete) qs->head = index; out: - raw_spin_unlock_irqrestore(&qs->lock, flags); + raw_res_spin_unlock_irqrestore(&qs->lock, flags); return err; } @@ -203,12 +196,8 @@ static long queue_stack_map_push_elem(struct bpf_map *map, void *value, if (flags & BPF_NOEXIST || flags > BPF_EXIST) return -EINVAL; - if (in_nmi()) { - if (!raw_spin_trylock_irqsave(&qs->lock, irq_flags)) - return -EBUSY; - } else { - raw_spin_lock_irqsave(&qs->lock, irq_flags); - } + if (raw_res_spin_lock_irqsave(&qs->lock, irq_flags)) + return -EBUSY; if (queue_stack_map_is_full(qs)) { if (!replace) { @@ -227,7 +216,7 @@ static long queue_stack_map_push_elem(struct bpf_map *map, void *value, qs->head = 0; out: - raw_spin_unlock_irqrestore(&qs->lock, irq_flags); + raw_res_spin_unlock_irqrestore(&qs->lock, irq_flags); return err; } From bcaa391e177c06a58c5f3cd18484a317d40239aa Mon Sep 17 00:00:00 2001 From: Jun Nie Date: Mon, 3 Mar 2025 23:14:30 +0800 Subject: [PATCH 0421/2924] drm/msm/dpu: check every pipe per capability The capability stored in sblk and pipe_hw_caps is checked only for SSPP of the first pipe in the pair with current implementation. That of the 2nd pipe, r_pipe, is not checked and may violate hardware capability. Move requirement check to dpu_plane_atomic_check_pipe() for the check of every pipe. Fixes: ("dbbf57dfd04e6 drm/msm/dpu: split dpu_plane_atomic_check()") Signed-off-by: Jun Nie Reviewed-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/640513/ Link: https://lore.kernel.org/r/20250303-sm8650-v6-14-hmd-deckard-mdss-quad-upstream-oldbootwrapper-36-prep-v8-1-eb5df105c807@linaro.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 71 ++++++++++++----------- 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index af3e541f60c303..aeb90c287245d6 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -729,12 +729,40 @@ static int dpu_plane_check_inline_rotation(struct dpu_plane *pdpu, static int dpu_plane_atomic_check_pipe(struct dpu_plane *pdpu, struct dpu_sw_pipe *pipe, struct dpu_sw_pipe_cfg *pipe_cfg, - const struct msm_format *fmt, - const struct drm_display_mode *mode) + const struct drm_display_mode *mode, + struct drm_plane_state *new_plane_state) { uint32_t min_src_size; struct dpu_kms *kms = _dpu_plane_get_kms(&pdpu->base); int ret; + const struct msm_format *fmt; + uint32_t supported_rotations; + const struct dpu_sspp_cfg *pipe_hw_caps; + const struct dpu_sspp_sub_blks *sblk; + + pipe_hw_caps = pipe->sspp->cap; + sblk = pipe->sspp->cap->sblk; + + /* + * We already have verified scaling against platform limitations. + * Now check if the SSPP supports scaling at all. + */ + if (!sblk->scaler_blk.len && + ((drm_rect_width(&new_plane_state->src) >> 16 != + drm_rect_width(&new_plane_state->dst)) || + (drm_rect_height(&new_plane_state->src) >> 16 != + drm_rect_height(&new_plane_state->dst)))) + return -ERANGE; + + fmt = msm_framebuffer_format(new_plane_state->fb); + + supported_rotations = DRM_MODE_REFLECT_MASK | DRM_MODE_ROTATE_0; + + if (pipe_hw_caps->features & BIT(DPU_SSPP_INLINE_ROTATION)) + supported_rotations |= DRM_MODE_ROTATE_90; + + pipe_cfg->rotation = drm_rotation_simplify(new_plane_state->rotation, + supported_rotations); min_src_size = MSM_FORMAT_IS_YUV(fmt) ? 2 : 1; @@ -923,47 +951,20 @@ static int dpu_plane_atomic_check_sspp(struct drm_plane *plane, struct dpu_plane_state *pstate = to_dpu_plane_state(new_plane_state); struct dpu_sw_pipe *pipe = &pstate->pipe; struct dpu_sw_pipe *r_pipe = &pstate->r_pipe; - const struct msm_format *fmt; struct dpu_sw_pipe_cfg *pipe_cfg = &pstate->pipe_cfg; struct dpu_sw_pipe_cfg *r_pipe_cfg = &pstate->r_pipe_cfg; - uint32_t supported_rotations; - const struct dpu_sspp_cfg *pipe_hw_caps; - const struct dpu_sspp_sub_blks *sblk; int ret = 0; - pipe_hw_caps = pipe->sspp->cap; - sblk = pipe->sspp->cap->sblk; - - /* - * We already have verified scaling against platform limitations. - * Now check if the SSPP supports scaling at all. - */ - if (!sblk->scaler_blk.len && - ((drm_rect_width(&new_plane_state->src) >> 16 != - drm_rect_width(&new_plane_state->dst)) || - (drm_rect_height(&new_plane_state->src) >> 16 != - drm_rect_height(&new_plane_state->dst)))) - return -ERANGE; - - fmt = msm_framebuffer_format(new_plane_state->fb); - - supported_rotations = DRM_MODE_REFLECT_MASK | DRM_MODE_ROTATE_0; - - if (pipe_hw_caps->features & BIT(DPU_SSPP_INLINE_ROTATION)) - supported_rotations |= DRM_MODE_ROTATE_90; - - pipe_cfg->rotation = drm_rotation_simplify(new_plane_state->rotation, - supported_rotations); - r_pipe_cfg->rotation = pipe_cfg->rotation; - - ret = dpu_plane_atomic_check_pipe(pdpu, pipe, pipe_cfg, fmt, - &crtc_state->adjusted_mode); + ret = dpu_plane_atomic_check_pipe(pdpu, pipe, pipe_cfg, + &crtc_state->adjusted_mode, + new_plane_state); if (ret) return ret; if (drm_rect_width(&r_pipe_cfg->src_rect) != 0) { - ret = dpu_plane_atomic_check_pipe(pdpu, r_pipe, r_pipe_cfg, fmt, - &crtc_state->adjusted_mode); + ret = dpu_plane_atomic_check_pipe(pdpu, r_pipe, r_pipe_cfg, + &crtc_state->adjusted_mode, + new_plane_state); if (ret) return ret; } From 5cb1b130e1cd04239cc9c26a98279f4660dce583 Mon Sep 17 00:00:00 2001 From: Chenyuan Yang Date: Thu, 13 Mar 2025 20:10:04 -0500 Subject: [PATCH 0422/2924] drm/msm/dpu: Fix error pointers in dpu_plane_virtual_atomic_check The function dpu_plane_virtual_atomic_check was dereferencing pointers returned by drm_atomic_get_plane_state without checking for errors. This could lead to undefined behavior if the function returns an error pointer. This commit adds checks using IS_ERR to ensure that plane_state is valid before dereferencing them. Similar to commit da29abe71e16 ("drm/amd/display: Fix error pointers in amdgpu_dm_crtc_mem_type_changed"). Fixes: 774bcfb73176 ("drm/msm/dpu: add support for virtual planes") Signed-off-by: Chenyuan Yang Reviewed-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/643132/ Link: https://lore.kernel.org/r/20250314011004.663804-1-chenyuan0y@gmail.com Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index aeb90c287245d6..e03d6091f73640 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -1060,6 +1060,9 @@ static int dpu_plane_virtual_atomic_check(struct drm_plane *plane, struct drm_crtc_state *crtc_state; int ret; + if (IS_ERR(plane_state)) + return PTR_ERR(plane_state); + if (plane_state->crtc) crtc_state = drm_atomic_get_new_crtc_state(state, plane_state->crtc); From 2a34496fef841e8d89a4ccd1a48c7fd664b5c84f Mon Sep 17 00:00:00 2001 From: Qasim Ijaz Date: Tue, 8 Apr 2025 18:22:23 +0100 Subject: [PATCH 0423/2924] drm/msm/dpu: reorder pointer operations after sanity checks to avoid NULL deref _dpu_encoder_trigger_start dereferences "struct dpu_encoder_phys *phys" before the sanity checks which can lead to a NULL pointer dereference if phys is NULL. Fix this by reordering the dereference after the sanity checks. Fixes: 8144d17a81d9 ("drm/msm/dpu: Skip trigger flush and start for CWB") Reviewed-by: Dmitry Baryshkov Signed-off-by: Qasim Ijaz Reviewed-by: Jessica Zhang Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/647536/ Link: https://lore.kernel.org/r/20250408172223.10827-1-qasdev00@gmail.com Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 284e69bb47c179..dd13dc4e95a4be 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -1666,7 +1666,7 @@ static void _dpu_encoder_trigger_flush(struct drm_encoder *drm_enc, */ static void _dpu_encoder_trigger_start(struct dpu_encoder_phys *phys) { - struct dpu_encoder_virt *dpu_enc = to_dpu_encoder_virt(phys->parent); + struct dpu_encoder_virt *dpu_enc; if (!phys) { DPU_ERROR("invalid argument(s)\n"); @@ -1678,6 +1678,8 @@ static void _dpu_encoder_trigger_start(struct dpu_encoder_phys *phys) return; } + dpu_enc = to_dpu_encoder_virt(phys->parent); + if (phys->parent->encoder_type == DRM_MODE_ENCODER_VIRTUAL && dpu_enc->cwb_mask) { DPU_DEBUG("encoder %d CWB enabled, skipping\n", DRMID(phys->parent)); From ddfa00afae800b3dea02fa36f3f4012a8379ae58 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 8 Apr 2025 16:02:44 +0300 Subject: [PATCH 0424/2924] drm/msm/dpu: drop rogue intr_tear_rd_ptr values The commit 5a9d50150c2c ("drm/msm/dpu: shift IRQ indices by 1") shifted IRQ indices by 1, making 'NO_IRQ' to be 0 rather than -1 (and allowing to skip the definition if the IRQ is not present). Several platform files were sketched before that commit, but got applied afterwards. As such, they inherited historical (and currently incorrect) setting of .intr_tear_rd_ptr = -1 for 'NO_IRQ' value. Drop that setting for all the affected platforms. Fixes: 62af6e1cb596 ("drm/msm/dpu: Add support for MSM8917") Fixes: c079680bb0fa ("drm/msm/dpu: Add support for MSM8937") Fixes: 7a6109ce1c2c ("drm/msm/dpu: Add support for MSM8953") Fixes: daf9a92daeb8 ("drm/msm/dpu: Add support for MSM8996") Fixes: 7204df5e7e68 ("drm/msm/dpu: add support for SDM660 and SDM630 platforms") Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/647486/ Link: https://lore.kernel.org/r/20250408-dpu-drop-intr-rd-ptr-v1-1-eeac337d88f8@oss.qualcomm.com Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_14_msm8937.h | 2 -- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_15_msm8917.h | 1 - drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_16_msm8953.h | 3 --- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_7_msm8996.h | 4 ---- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_2_sdm660.h | 3 --- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_3_sdm630.h | 2 -- 6 files changed, 15 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_14_msm8937.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_14_msm8937.h index 1f32807bb5e5d4..ad60089f18ea6c 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_14_msm8937.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_14_msm8937.h @@ -132,7 +132,6 @@ static const struct dpu_intf_cfg msm8937_intf[] = { .prog_fetch_lines_worst_case = 14, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), - .intr_tear_rd_ptr = -1, }, { .name = "intf_2", .id = INTF_2, .base = 0x6b000, .len = 0x268, @@ -141,7 +140,6 @@ static const struct dpu_intf_cfg msm8937_intf[] = { .prog_fetch_lines_worst_case = 14, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 28), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 29), - .intr_tear_rd_ptr = -1, }, }; diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_15_msm8917.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_15_msm8917.h index 42131959ff2202..a1cf89a0a42d5f 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_15_msm8917.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_15_msm8917.h @@ -118,7 +118,6 @@ static const struct dpu_intf_cfg msm8917_intf[] = { .prog_fetch_lines_worst_case = 14, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), - .intr_tear_rd_ptr = -1, }, }; diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_16_msm8953.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_16_msm8953.h index 2b4723a5c67606..eea9b80e2287a8 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_16_msm8953.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_16_msm8953.h @@ -131,7 +131,6 @@ static const struct dpu_intf_cfg msm8953_intf[] = { .prog_fetch_lines_worst_case = 14, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25), - .intr_tear_rd_ptr = -1, }, { .name = "intf_1", .id = INTF_1, .base = 0x6a800, .len = 0x268, @@ -140,7 +139,6 @@ static const struct dpu_intf_cfg msm8953_intf[] = { .prog_fetch_lines_worst_case = 14, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), - .intr_tear_rd_ptr = -1, }, { .name = "intf_2", .id = INTF_2, .base = 0x6b000, .len = 0x268, @@ -149,7 +147,6 @@ static const struct dpu_intf_cfg msm8953_intf[] = { .prog_fetch_lines_worst_case = 14, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 28), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 29), - .intr_tear_rd_ptr = -1, }, }; diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_7_msm8996.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_7_msm8996.h index 5cf19de71f0608..ae18a354e5d2a3 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_7_msm8996.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_7_msm8996.h @@ -241,7 +241,6 @@ static const struct dpu_intf_cfg msm8996_intf[] = { .prog_fetch_lines_worst_case = 25, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25), - .intr_tear_rd_ptr = -1, }, { .name = "intf_1", .id = INTF_1, .base = 0x6a800, .len = 0x268, @@ -250,7 +249,6 @@ static const struct dpu_intf_cfg msm8996_intf[] = { .prog_fetch_lines_worst_case = 25, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), - .intr_tear_rd_ptr = -1, }, { .name = "intf_2", .id = INTF_2, .base = 0x6b000, .len = 0x268, @@ -259,7 +257,6 @@ static const struct dpu_intf_cfg msm8996_intf[] = { .prog_fetch_lines_worst_case = 25, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 28), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 29), - .intr_tear_rd_ptr = -1, }, { .name = "intf_3", .id = INTF_3, .base = 0x6b800, .len = 0x268, @@ -267,7 +264,6 @@ static const struct dpu_intf_cfg msm8996_intf[] = { .prog_fetch_lines_worst_case = 25, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 30), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 31), - .intr_tear_rd_ptr = -1, }, }; diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_2_sdm660.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_2_sdm660.h index 4f2f68b07f203a..bb89da0a481dec 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_2_sdm660.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_2_sdm660.h @@ -202,7 +202,6 @@ static const struct dpu_intf_cfg sdm660_intf[] = { .prog_fetch_lines_worst_case = 21, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25), - .intr_tear_rd_ptr = -1, }, { .name = "intf_1", .id = INTF_1, .base = 0x6a800, .len = 0x280, @@ -211,7 +210,6 @@ static const struct dpu_intf_cfg sdm660_intf[] = { .prog_fetch_lines_worst_case = 21, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), - .intr_tear_rd_ptr = -1, }, { .name = "intf_2", .id = INTF_2, .base = 0x6b000, .len = 0x280, @@ -220,7 +218,6 @@ static const struct dpu_intf_cfg sdm660_intf[] = { .prog_fetch_lines_worst_case = 21, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 28), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 29), - .intr_tear_rd_ptr = -1, }, }; diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_3_sdm630.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_3_sdm630.h index c70bef025ac419..7caf876ca3e30c 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_3_sdm630.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_3_sdm630.h @@ -147,7 +147,6 @@ static const struct dpu_intf_cfg sdm630_intf[] = { .prog_fetch_lines_worst_case = 21, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25), - .intr_tear_rd_ptr = -1, }, { .name = "intf_1", .id = INTF_1, .base = 0x6a800, .len = 0x280, @@ -156,7 +155,6 @@ static const struct dpu_intf_cfg sdm630_intf[] = { .prog_fetch_lines_worst_case = 21, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), - .intr_tear_rd_ptr = -1, }, }; From 87cb582d2f55d379ce95b5bcc4ec596e29b0a65e Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 9 Apr 2025 15:49:36 -0700 Subject: [PATCH 0425/2924] objtool: Fix false-positive "ignoring unreachables" warning There's no need to try to automatically disable unreachable warnings if they've already been manually disabled due to CONFIG_KCOV quirks. This avoids a spurious warning with a KCOV kernel: fs/smb/client/cifs_unicode.o: warning: objtool: cifsConvertToUTF16.part.0+0xce5: ignoring unreachables due to jump table quirk Fixes: eeff7ac61526 ("objtool: Warn when disabling unreachable warnings") Reported-by: kernel test robot Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/5eb28eeb6a724b7d945a961cfdcf8d41e6edf3dc.1744238814.git.jpoimboe@kernel.org Closes: https://lore.kernel.org/r/202504090910.QkvTAR36-lkp@intel.com/ --- tools/objtool/arch/x86/special.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c index 403e587676f1b1..06ca4a2659a45f 100644 --- a/tools/objtool/arch/x86/special.c +++ b/tools/objtool/arch/x86/special.c @@ -126,7 +126,7 @@ struct reloc *arch_find_switch_table(struct objtool_file *file, * indicates a rare GCC quirk/bug which can leave dead * code behind. */ - if (reloc_type(text_reloc) == R_X86_64_PC32) { + if (!file->ignore_unreachables && reloc_type(text_reloc) == R_X86_64_PC32) { WARN_INSN(insn, "ignoring unreachables due to jump table quirk"); file->ignore_unreachables = true; } From 6afd0a3c7ecb5049d75801a3efda0ada70483bd0 Mon Sep 17 00:00:00 2001 From: David Wei Date: Wed, 9 Apr 2025 09:31:53 -0700 Subject: [PATCH 0426/2924] io_uring/zcrx: enable tcp-data-split in selftest For bnxt when the agg ring is used then tcp-data-split is automatically reported to be enabled, but __net_mp_open_rxq() requires tcp-data-split to be explicitly enabled by the user. Enable tcp-data-split explicitly in io_uring zc rx selftest. Signed-off-by: David Wei Link: https://patch.msgid.link/20250409163153.2747918-1-dw@davidwei.uk Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/iou-zcrx.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py index 9f271ab6ec04e6..6a0378e06cabd2 100755 --- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py @@ -35,6 +35,7 @@ def test_zcrx(cfg) -> None: rx_ring = _get_rx_ring_entries(cfg) try: + ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) flow_rule_id = _set_flow_rule(cfg, combined_chans - 1) @@ -48,6 +49,7 @@ def test_zcrx(cfg) -> None: ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) ethtool(f"-X {cfg.ifname} default", host=cfg.remote) ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) + ethtool(f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) def test_zcrx_oneshot(cfg) -> None: @@ -59,6 +61,7 @@ def test_zcrx_oneshot(cfg) -> None: rx_ring = _get_rx_ring_entries(cfg) try: + ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) flow_rule_id = _set_flow_rule(cfg, combined_chans - 1) @@ -72,6 +75,7 @@ def test_zcrx_oneshot(cfg) -> None: ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) ethtool(f"-X {cfg.ifname} default", host=cfg.remote) ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) + ethtool(f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) def main() -> None: From 1293dacbbd43ab9848ac4655f6f2ba1dcc5a96ad Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 9 Apr 2025 10:35:31 -0300 Subject: [PATCH 0427/2924] perf libunwind arm64: Fix missing close parens in an if statement While testing building with libunwind (using LIBUNWIND=1) in various arches I noticed a problem on arm64, on an rpi5 system, a missing close parens in a change related to dso__data_get_fd() usage, fix it. Fixes: 5ac22c35aa8519f1 ("perf dso: Use lock annotations to fix asan deadlock") Signed-off-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/Z_Z3o8KvB2i5c6ab@x1 Signed-off-by: Namhyung Kim --- tools/perf/util/unwind-libunwind-local.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 9fb2c1343c7fe2..0b037e7389a009 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -371,7 +371,7 @@ static int read_unwind_spec_debug_frame(struct dso *dso, * has to be pointed by symsrc_filename */ if (ofs == 0) { - if (dso__data_get_fd(dso, machine, &fd) { + if (dso__data_get_fd(dso, machine, &fd)) { ofs = elf_section_offset(fd, ".debug_frame"); dso__data_put_fd(dso); } From cfe82469a00f0c0983bf4652de3a2972637dfc56 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 8 Apr 2025 13:46:17 -0400 Subject: [PATCH 0428/2924] ipv6: add exception routes to GC list in rt6_insert_exception Commit 5eb902b8e719 ("net/ipv6: Remove expired routes with a separated list of routes.") introduced a separated list for managing route expiration via the GC timer. However, it missed adding exception routes (created by ip6_rt_update_pmtu() and rt6_do_redirect()) to this GC list. As a result, these exceptions were never considered for expiration and removal, leading to stale entries persisting in the routing table. This patch fixes the issue by calling fib6_add_gc_list() in rt6_insert_exception(), ensuring that exception routes are properly tracked and garbage collected when expired. Fixes: 5eb902b8e719 ("net/ipv6: Remove expired routes with a separated list of routes.") Reported-by: Jianlin Shi Signed-off-by: Xin Long Reviewed-by: David Ahern Link: https://patch.msgid.link/837e7506ffb63f47faa2b05d9b85481aad28e1a4.1744134377.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 210b84cecc2427..96f1621e2381c8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1771,6 +1771,7 @@ static int rt6_insert_exception(struct rt6_info *nrt, if (!err) { spin_lock_bh(&f6i->fib6_table->tb6_lock); fib6_update_sernum(net, f6i); + fib6_add_gc_list(f6i); spin_unlock_bh(&f6i->fib6_table->tb6_lock); fib6_force_start_gc(net); } From 3940f5349b476197fb079c5aa19c9a988de64efb Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Tue, 1 Apr 2025 11:23:03 +0200 Subject: [PATCH 0429/2924] x86/i8253: Call clockevent_i8253_disable() with interrupts disabled There's a lockdep false positive warning related to i8253_lock: WARNING: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected ... systemd-sleep/3324 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire: ffffffffb2c23398 (i8253_lock){+.+.}-{2:2}, at: pcspkr_event+0x3f/0xe0 [pcspkr] ... ... which became HARDIRQ-irq-unsafe at: ... lock_acquire+0xd0/0x2f0 _raw_spin_lock+0x30/0x40 clockevent_i8253_disable+0x1c/0x60 pit_timer_init+0x25/0x50 hpet_time_init+0x46/0x50 x86_late_time_init+0x1b/0x40 start_kernel+0x962/0xa00 x86_64_start_reservations+0x24/0x30 x86_64_start_kernel+0xed/0xf0 common_startup_64+0x13e/0x141 ... Lockdep complains due pit_timer_init() using the lock in an IRQ-unsafe fashion, but it's a false positive, because there is no deadlock possible at that point due to init ordering: at the point where pit_timer_init() is called there is no other possible usage of i8253_lock because the system is still in the very early boot stage with no interrupts. But in any case, pit_timer_init() should disable interrupts before calling clockevent_i8253_disable() out of general principle, and to keep lockdep working even in this scenario. Use scoped_guard() for that, as suggested by Thomas Gleixner. [ mingo: Cleaned up the changelog. ] Suggested-by: Thomas Gleixner Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Ingo Molnar Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/Z-uwd4Bnn7FcCShX@gmail.com --- arch/x86/kernel/i8253.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 80e262bb627fe1..cb9852ad609893 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -46,7 +46,8 @@ bool __init pit_timer_init(void) * VMMs otherwise steal CPU time just to pointlessly waggle * the (masked) IRQ. */ - clockevent_i8253_disable(); + scoped_guard(irq) + clockevent_i8253_disable(); return false; } clockevent_i8253_init(true); From df4bf3fa1b1e8d03380206fa027f956a62de517b Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Wed, 9 Apr 2025 00:33:41 +0300 Subject: [PATCH 0430/2924] iommu: Fix crash in report_iommu_fault() The following crash is observed while handling an IOMMU fault with a recent kernel: kernel tried to execute NX-protected page - exploit attempt? (uid: 0) BUG: unable to handle page fault for address: ffff8c708299f700 PGD 19ee01067 P4D 19ee01067 PUD 101c10063 PMD 80000001028001e3 Oops: Oops: 0011 [#1] SMP NOPTI CPU: 4 UID: 0 PID: 139 Comm: irq/25-AMD-Vi Not tainted 6.15.0-rc1+ #20 PREEMPT(lazy) Hardware name: LENOVO 21D0/LNVNB161216, BIOS J6CN50WW 09/27/2024 RIP: 0010:0xffff8c708299f700 Call Trace: ? report_iommu_fault+0x78/0xd3 ? amd_iommu_report_page_fault+0x91/0x150 ? amd_iommu_int_thread+0x77/0x180 ? __pfx_irq_thread_fn+0x10/0x10 ? irq_thread_fn+0x23/0x60 ? irq_thread+0xf9/0x1e0 ? __pfx_irq_thread_dtor+0x10/0x10 ? __pfx_irq_thread+0x10/0x10 ? kthread+0xfc/0x240 ? __pfx_kthread+0x10/0x10 ? ret_from_fork+0x34/0x50 ? __pfx_kthread+0x10/0x10 ? ret_from_fork_asm+0x1a/0x30 report_iommu_fault() checks for an installed handler comparing the corresponding field to NULL. It can (and could before) be called for a domain with a different cookie type - IOMMU_COOKIE_DMA_IOVA, specifically. Cookie is represented as a union so we may end up with a garbage value treated there if this happens for a domain with another cookie type. Formerly there were two exclusive cookie types in the union. IOMMU_DOMAIN_SVA has a dedicated iommu_report_device_fault(). Call the fault handler only if the passed domain has a required cookie type. Found by Linux Verification Center (linuxtesting.org). Fixes: 6aa63a4ec947 ("iommu: Sort out domain user data") Signed-off-by: Fedor Pchelkin Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20250408213342.285955-1-pchelkin@ispras.ru Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index c8033ca6637771..5729e8ecdda383 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2717,7 +2717,8 @@ int report_iommu_fault(struct iommu_domain *domain, struct device *dev, * if upper layers showed interest and installed a fault handler, * invoke it. */ - if (domain->handler) + if (domain->cookie_type == IOMMU_COOKIE_FAULT_HANDLER && + domain->handler) ret = domain->handler(domain, dev, iova, flags, domain->handler_token); From b47158fb42959c417ff2662075c0d46fb783d5d1 Mon Sep 17 00:00:00 2001 From: Wayne Chang Date: Tue, 8 Apr 2025 11:09:05 +0800 Subject: [PATCH 0431/2924] phy: tegra: xusb: Use a bitmask for UTMI pad power state tracking The current implementation uses bias_pad_enable as a reference count to manage the shared bias pad for all UTMI PHYs. However, during system suspension with connected USB devices, multiple power-down requests for the UTMI pad result in a mismatch in the reference count, which in turn produces warnings such as: [ 237.762967] WARNING: CPU: 10 PID: 1618 at tegra186_utmi_pad_power_down+0x160/0x170 [ 237.763103] Call trace: [ 237.763104] tegra186_utmi_pad_power_down+0x160/0x170 [ 237.763107] tegra186_utmi_phy_power_off+0x10/0x30 [ 237.763110] phy_power_off+0x48/0x100 [ 237.763113] tegra_xusb_enter_elpg+0x204/0x500 [ 237.763119] tegra_xusb_suspend+0x48/0x140 [ 237.763122] platform_pm_suspend+0x2c/0xb0 [ 237.763125] dpm_run_callback.isra.0+0x20/0xa0 [ 237.763127] __device_suspend+0x118/0x330 [ 237.763129] dpm_suspend+0x10c/0x1f0 [ 237.763130] dpm_suspend_start+0x88/0xb0 [ 237.763132] suspend_devices_and_enter+0x120/0x500 [ 237.763135] pm_suspend+0x1ec/0x270 The root cause was traced back to the dynamic power-down changes introduced in commit a30951d31b25 ("xhci: tegra: USB2 pad power controls"), where the UTMI pad was being powered down without verifying its current state. This unbalanced behavior led to discrepancies in the reference count. To rectify this issue, this patch replaces the single reference counter with a bitmask, renamed to utmi_pad_enabled. Each bit in the mask corresponds to one of the four USB2 PHYs, allowing us to track each pad's enablement status individually. With this change: - The bias pad is powered on only when the mask is clear. - Each UTMI pad is powered on or down based on its corresponding bit in the mask, preventing redundant operations. - The overall power state of the shared bias pad is maintained correctly during suspend/resume cycles. The mutex used to prevent race conditions during UTMI pad enable/disable operations has been moved from the tegra186_utmi_bias_pad_power_on/off functions to the parent functions tegra186_utmi_pad_power_on/down. This change ensures that there are no race conditions when updating the bitmask. Cc: stable@vger.kernel.org Fixes: a30951d31b25 ("xhci: tegra: USB2 pad power controls") Signed-off-by: Wayne Chang Reviewed-by: Jon Hunter Tested-by: Jon Hunter Link: https://lore.kernel.org/r/20250408030905.990474-1-waynec@nvidia.com Signed-off-by: Vinod Koul --- drivers/phy/tegra/xusb-tegra186.c | 44 +++++++++++++++++++------------ 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/drivers/phy/tegra/xusb-tegra186.c b/drivers/phy/tegra/xusb-tegra186.c index fae6242aa730e0..cc7b8a6a999f74 100644 --- a/drivers/phy/tegra/xusb-tegra186.c +++ b/drivers/phy/tegra/xusb-tegra186.c @@ -237,6 +237,8 @@ #define DATA0_VAL_PD BIT(1) #define USE_XUSB_AO BIT(4) +#define TEGRA_UTMI_PAD_MAX 4 + #define TEGRA186_LANE(_name, _offset, _shift, _mask, _type) \ { \ .name = _name, \ @@ -269,7 +271,7 @@ struct tegra186_xusb_padctl { /* UTMI bias and tracking */ struct clk *usb2_trk_clk; - unsigned int bias_pad_enable; + DECLARE_BITMAP(utmi_pad_enabled, TEGRA_UTMI_PAD_MAX); /* padctl context */ struct tegra186_xusb_padctl_context context; @@ -603,12 +605,8 @@ static void tegra186_utmi_bias_pad_power_on(struct tegra_xusb_padctl *padctl) u32 value; int err; - mutex_lock(&padctl->lock); - - if (priv->bias_pad_enable++ > 0) { - mutex_unlock(&padctl->lock); + if (!bitmap_empty(priv->utmi_pad_enabled, TEGRA_UTMI_PAD_MAX)) return; - } err = clk_prepare_enable(priv->usb2_trk_clk); if (err < 0) @@ -667,17 +665,8 @@ static void tegra186_utmi_bias_pad_power_off(struct tegra_xusb_padctl *padctl) struct tegra186_xusb_padctl *priv = to_tegra186_xusb_padctl(padctl); u32 value; - mutex_lock(&padctl->lock); - - if (WARN_ON(priv->bias_pad_enable == 0)) { - mutex_unlock(&padctl->lock); - return; - } - - if (--priv->bias_pad_enable > 0) { - mutex_unlock(&padctl->lock); + if (!bitmap_empty(priv->utmi_pad_enabled, TEGRA_UTMI_PAD_MAX)) return; - } value = padctl_readl(padctl, XUSB_PADCTL_USB2_BIAS_PAD_CTL1); value |= USB2_PD_TRK; @@ -690,13 +679,13 @@ static void tegra186_utmi_bias_pad_power_off(struct tegra_xusb_padctl *padctl) clk_disable_unprepare(priv->usb2_trk_clk); } - mutex_unlock(&padctl->lock); } static void tegra186_utmi_pad_power_on(struct phy *phy) { struct tegra_xusb_lane *lane = phy_get_drvdata(phy); struct tegra_xusb_padctl *padctl = lane->pad->padctl; + struct tegra186_xusb_padctl *priv = to_tegra186_xusb_padctl(padctl); struct tegra_xusb_usb2_port *port; struct device *dev = padctl->dev; unsigned int index = lane->index; @@ -705,9 +694,16 @@ static void tegra186_utmi_pad_power_on(struct phy *phy) if (!phy) return; + mutex_lock(&padctl->lock); + if (test_bit(index, priv->utmi_pad_enabled)) { + mutex_unlock(&padctl->lock); + return; + } + port = tegra_xusb_find_usb2_port(padctl, index); if (!port) { dev_err(dev, "no port found for USB2 lane %u\n", index); + mutex_unlock(&padctl->lock); return; } @@ -724,18 +720,28 @@ static void tegra186_utmi_pad_power_on(struct phy *phy) value = padctl_readl(padctl, XUSB_PADCTL_USB2_OTG_PADX_CTL1(index)); value &= ~USB2_OTG_PD_DR; padctl_writel(padctl, value, XUSB_PADCTL_USB2_OTG_PADX_CTL1(index)); + + set_bit(index, priv->utmi_pad_enabled); + mutex_unlock(&padctl->lock); } static void tegra186_utmi_pad_power_down(struct phy *phy) { struct tegra_xusb_lane *lane = phy_get_drvdata(phy); struct tegra_xusb_padctl *padctl = lane->pad->padctl; + struct tegra186_xusb_padctl *priv = to_tegra186_xusb_padctl(padctl); unsigned int index = lane->index; u32 value; if (!phy) return; + mutex_lock(&padctl->lock); + if (!test_bit(index, priv->utmi_pad_enabled)) { + mutex_unlock(&padctl->lock); + return; + } + dev_dbg(padctl->dev, "power down UTMI pad %u\n", index); value = padctl_readl(padctl, XUSB_PADCTL_USB2_OTG_PADX_CTL0(index)); @@ -748,7 +754,11 @@ static void tegra186_utmi_pad_power_down(struct phy *phy) udelay(2); + clear_bit(index, priv->utmi_pad_enabled); + tegra186_utmi_bias_pad_power_off(padctl); + + mutex_unlock(&padctl->lock); } static int tegra186_xusb_padctl_vbus_override(struct tegra_xusb_padctl *padctl, From 548183ea388c12b6d76d6982f3d72df3887af0da Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Apr 2025 15:32:46 +0800 Subject: [PATCH 0432/2924] iommu/vt-d: Wire up irq_ack() to irq_move_irq() for posted MSIs Set the posted MSI irq_chip's irq_ack() hook to irq_move_irq() instead of a dummy/empty callback so that posted MSIs process pending changes to the IRQ's SMP affinity. Failure to honor a pending set-affinity results in userspace being unable to change the effective affinity of the IRQ, as IRQD_SETAFFINITY_PENDING is never cleared and so irq_set_affinity_locked() always defers moving the IRQ. The issue is most easily reproducible by setting /proc/irq/xx/smp_affinity multiple times in quick succession, as only the first update is likely to be handled in process context. Fixes: ed1e48ea4370 ("iommu/vt-d: Enable posted mode for device MSIs") Cc: Robert Lippert Cc: Thomas Gleixner Reported-by: Wentao Yang Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20250321194249.1217961-1-seanjc@google.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/irq_remapping.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index ea3ca520391962..3bc2a03ccecaae 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -1287,43 +1287,44 @@ static struct irq_chip intel_ir_chip = { }; /* - * With posted MSIs, all vectors are multiplexed into a single notification - * vector. Devices MSIs are then dispatched in a demux loop where - * EOIs can be coalesced as well. + * With posted MSIs, the MSI vectors are multiplexed into a single notification + * vector, and only the notification vector is sent to the APIC IRR. Device + * MSIs are then dispatched in a demux loop that harvests the MSIs from the + * CPU's Posted Interrupt Request bitmap. I.e. Posted MSIs never get sent to + * the APIC IRR, and thus do not need an EOI. The notification handler instead + * performs a single EOI after processing the PIR. * - * "INTEL-IR-POST" IRQ chip does not do EOI on ACK, thus the dummy irq_ack() - * function. Instead EOI is performed by the posted interrupt notification - * handler. + * Note! Pending SMP/CPU affinity changes, which are per MSI, must still be + * honored, only the APIC EOI is omitted. * * For the example below, 3 MSIs are coalesced into one CPU notification. Only - * one apic_eoi() is needed. + * one apic_eoi() is needed, but each MSI needs to process pending changes to + * its CPU affinity. * * __sysvec_posted_msi_notification() * irq_enter(); * handle_edge_irq() * irq_chip_ack_parent() - * dummy(); // No EOI + * irq_move_irq(); // No EOI * handle_irq_event() * driver_handler() * handle_edge_irq() * irq_chip_ack_parent() - * dummy(); // No EOI + * irq_move_irq(); // No EOI * handle_irq_event() * driver_handler() * handle_edge_irq() * irq_chip_ack_parent() - * dummy(); // No EOI + * irq_move_irq(); // No EOI * handle_irq_event() * driver_handler() * apic_eoi() * irq_exit() + * */ - -static void dummy_ack(struct irq_data *d) { } - static struct irq_chip intel_ir_chip_post_msi = { .name = "INTEL-IR-POST", - .irq_ack = dummy_ack, + .irq_ack = irq_move_irq, .irq_set_affinity = intel_ir_set_affinity, .irq_compose_msi_msg = intel_ir_compose_msi_msg, .irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity, From 7d8c490ba3967719bc023c1f81592659a79bd964 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Thu, 10 Apr 2025 15:32:47 +0800 Subject: [PATCH 0433/2924] iommu/vt-d: Remove an unnecessary call set_dma_ops() Do not touch per-device DMA ops when the driver has been converted to use the dma-iommu API. Fixes: c588072bba6b ("iommu/vt-d: Convert intel iommu driver to the iommu ops") Signed-off-by: Petr Tesarik Link: https://lore.kernel.org/r/20250403165605.278541-1-ptesarik@suse.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 6e67cc66a204c6..b29da2d96d0b18 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3835,7 +3835,6 @@ static void intel_iommu_release_device(struct device *dev) intel_pasid_free_table(dev); intel_iommu_debugfs_remove_dev(info); kfree(info); - set_dma_ops(dev, NULL); } static void intel_iommu_get_resv_regions(struct device *device, From d784552e76a23c4ffad0e383670cd1d86064a6be Mon Sep 17 00:00:00 2001 From: Nitin Rawat Date: Mon, 7 Apr 2025 17:40:08 +0530 Subject: [PATCH 0434/2924] phy: qcom-qmp-ufs: check for mode type for phy setting Generally all target supports Rate B but for very few like SM8550, two sets of UFS PHY settings are provided, one set is to support HS-G5 Rate A and another set is to support HS-G4 and lower gears with Rate B. Commit b02cc9a17679("phy: qcom-qmp-ufs: Add PHY Configuration support for sm8750") apply Rate B setting for SM8550 gear 5 without checking for mode value (Rate A or Rate B) from Controller driver which caused issue as SM8550 support rate A for Gear 5. Fix this by adding mode check before applying Rat B phy setting. Fixes: b02cc9a17679 ("phy: qcom-qmp-ufs: Add PHY Configuration support for sm8750") Reported-by: Neil Armstrong Closes: https://lore.kernel.org/all/430ed11c-0490-45be-897b-27cad9682371@quicinc.com/ Tested-by: Neil Armstrong # on SM8550-QRD Signed-off-by: Nitin Rawat Link: https://lore.kernel.org/r/20250407121008.22230-1-quic_nitirawa@quicinc.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index 45b3b792696e9e..b33e2e2b5014d3 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -1754,7 +1754,8 @@ static void qmp_ufs_init_registers(struct qmp_ufs *qmp, const struct qmp_phy_cfg qmp_ufs_init_all(qmp, &cfg->tbls_hs_overlay[i]); } - qmp_ufs_init_all(qmp, &cfg->tbls_hs_b); + if (qmp->mode == PHY_MODE_UFS_HS_B) + qmp_ufs_init_all(qmp, &cfg->tbls_hs_b); } static int qmp_ufs_com_init(struct qmp_ufs *qmp) From 280e5a30100578106a4305ce0118e0aa9b866f12 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 10 Apr 2025 12:23:48 +0100 Subject: [PATCH 0435/2924] iommu: Clear iommu-dma ops on cleanup If iommu_device_register() encounters an error, it can end up tearing down already-configured groups and default domains, however this currently still leaves devices hooked up to iommu-dma (and even historically the behaviour in this area was at best inconsistent across architectures/drivers...) Although in the case that an IOMMU is present whose driver has failed to probe, users cannot necessarily expect DMA to work anyway, it's still arguable that we should do our best to put things back as if the IOMMU driver was never there at all, and certainly the potential for crashing in iommu-dma itself is undesirable. Make sure we clean up the dev->dma_iommu flag along with everything else. Reported-by: Chen-Yu Tsai Signed-off-by: Robin Murphy Closes: https://lore.kernel.org/all/CAGXv+5HJpTYmQ2h-GD7GjyeYT7bL9EBCvu0mz5LgpzJZtzfW0w@mail.gmail.com/ Tested-by: Chen-Yu Tsai Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/e788aa927f6d827dd4ea1ed608fada79f2bab030.1744284228.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 5729e8ecdda383..4f91a740c15fe7 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -538,6 +538,9 @@ static void iommu_deinit_device(struct device *dev) dev->iommu_group = NULL; module_put(ops->owner); dev_iommu_free(dev); +#ifdef CONFIG_IOMMU_DMA + dev->dma_iommu = false; +#endif } static struct iommu_domain *pasid_array_entry_to_domain(void *entry) From d9d3cede416719c2d41cd4da1955b12a85856e2f Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 20 Mar 2025 14:41:27 +0000 Subject: [PATCH 0436/2924] iommu/ipmmu-vmsa: Register in a sensible order IPMMU registers almost-initialised instances, but misses assigning the drvdata to make them fully functional, so initial calls back into ipmmu_probe_device() are likely to fail unnecessarily. Reorder this to work as it should, also pruning the long-out-of-date comment and adding the missing sysfs cleanup on error for good measure. Reported-by: Geert Uytterhoeven Fixes: bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe path") Signed-off-by: Robin Murphy Tested-by: Geert Uytterhoeven Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/53be6667544de65a15415b699e38a9a965692e45.1742481687.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/ipmmu-vmsa.c | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 074daf1aac4e4c..e424b279a8cd9b 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -1081,31 +1081,24 @@ static int ipmmu_probe(struct platform_device *pdev) } } + platform_set_drvdata(pdev, mmu); /* * Register the IPMMU to the IOMMU subsystem in the following cases: * - R-Car Gen2 IPMMU (all devices registered) * - R-Car Gen3 IPMMU (leaf devices only - skip root IPMMU-MM device) */ - if (!mmu->features->has_cache_leaf_nodes || !ipmmu_is_root(mmu)) { - ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, - dev_name(&pdev->dev)); - if (ret) - return ret; - - ret = iommu_device_register(&mmu->iommu, &ipmmu_ops, &pdev->dev); - if (ret) - return ret; - } + if (mmu->features->has_cache_leaf_nodes && ipmmu_is_root(mmu)) + return 0; - /* - * We can't create the ARM mapping here as it requires the bus to have - * an IOMMU, which only happens when bus_set_iommu() is called in - * ipmmu_init() after the probe function returns. - */ + ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, dev_name(&pdev->dev)); + if (ret) + return ret; - platform_set_drvdata(pdev, mmu); + ret = iommu_device_register(&mmu->iommu, &ipmmu_ops, &pdev->dev); + if (ret) + iommu_device_sysfs_remove(&mmu->iommu); - return 0; + return ret; } static void ipmmu_remove(struct platform_device *pdev) From 715ad3e0ec2b13c27335749f27a5c9f0c0e84064 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 27 Mar 2025 20:06:02 +0100 Subject: [PATCH 0437/2924] xen: fix multicall debug feature Initializing a percpu variable with the address of a struct tagged as .initdata is breaking the build with CONFIG_SECTION_MISMATCH_WARN_ONLY not set to "y". Fix that by using an access function instead returning the .initdata struct address if the percpu space of the struct hasn't been allocated yet. Fixes: 368990a7fe30 ("xen: fix multicall debug data referencing") Reported-by: Borislav Petkov Reviewed-by: Boris Ostrovsky Acked-by: "Borislav Petkov (AMD)" Tested-by: "Borislav Petkov (AMD)" Signed-off-by: Juergen Gross Message-ID: <20250327190602.26015-1-jgross@suse.com> --- arch/x86/xen/multicalls.c | 26 ++++++++++++++------------ arch/x86/xen/smp_pv.c | 1 - arch/x86/xen/xen-ops.h | 3 --- 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 10c660fae8b300..7237d56a9d3f01 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c @@ -54,14 +54,20 @@ struct mc_debug_data { static DEFINE_PER_CPU(struct mc_buffer, mc_buffer); static struct mc_debug_data mc_debug_data_early __initdata; -static DEFINE_PER_CPU(struct mc_debug_data *, mc_debug_data) = - &mc_debug_data_early; static struct mc_debug_data __percpu *mc_debug_data_ptr; DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); static struct static_key mc_debug __ro_after_init; static bool mc_debug_enabled __initdata; +static struct mc_debug_data * __ref get_mc_debug(void) +{ + if (!mc_debug_data_ptr) + return &mc_debug_data_early; + + return this_cpu_ptr(mc_debug_data_ptr); +} + static int __init xen_parse_mc_debug(char *arg) { mc_debug_enabled = true; @@ -71,20 +77,16 @@ static int __init xen_parse_mc_debug(char *arg) } early_param("xen_mc_debug", xen_parse_mc_debug); -void mc_percpu_init(unsigned int cpu) -{ - per_cpu(mc_debug_data, cpu) = per_cpu_ptr(mc_debug_data_ptr, cpu); -} - static int __init mc_debug_enable(void) { unsigned long flags; + struct mc_debug_data __percpu *mcdb; if (!mc_debug_enabled) return 0; - mc_debug_data_ptr = alloc_percpu(struct mc_debug_data); - if (!mc_debug_data_ptr) { + mcdb = alloc_percpu(struct mc_debug_data); + if (!mcdb) { pr_err("xen_mc_debug inactive\n"); static_key_slow_dec(&mc_debug); return -ENOMEM; @@ -93,7 +95,7 @@ static int __init mc_debug_enable(void) /* Be careful when switching to percpu debug data. */ local_irq_save(flags); xen_mc_flush(); - mc_percpu_init(0); + mc_debug_data_ptr = mcdb; local_irq_restore(flags); pr_info("xen_mc_debug active\n"); @@ -155,7 +157,7 @@ void xen_mc_flush(void) trace_xen_mc_flush(b->mcidx, b->argidx, b->cbidx); if (static_key_false(&mc_debug)) { - mcdb = __this_cpu_read(mc_debug_data); + mcdb = get_mc_debug(); memcpy(mcdb->entries, b->entries, b->mcidx * sizeof(struct multicall_entry)); } @@ -235,7 +237,7 @@ struct multicall_space __xen_mc_entry(size_t args) ret.mc = &b->entries[b->mcidx]; if (static_key_false(&mc_debug)) { - struct mc_debug_data *mcdb = __this_cpu_read(mc_debug_data); + struct mc_debug_data *mcdb = get_mc_debug(); mcdb->caller[b->mcidx] = __builtin_return_address(0); mcdb->argsz[b->mcidx] = args; diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 688ff59318ae22..9bb8ff8bff30a6 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -305,7 +305,6 @@ static int xen_pv_kick_ap(unsigned int cpu, struct task_struct *idle) return rc; xen_pmu_init(cpu); - mc_percpu_init(cpu); /* * Why is this a BUG? If the hypercall fails then everything can be diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 63c13a2ccf556a..25e318ef27d6b0 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -261,9 +261,6 @@ void xen_mc_callback(void (*fn)(void *), void *data); */ struct multicall_space xen_mc_extend_args(unsigned long op, size_t arg_size); -/* Do percpu data initialization for multicalls. */ -void mc_percpu_init(unsigned int cpu); - extern bool is_xen_pmu; irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id); From 164a9f712fa53e4c92b2a435bb071a5be0c31dbc Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Thu, 10 Apr 2025 15:31:05 -0400 Subject: [PATCH 0438/2924] x86/xen: Fix __xen_hypercall_setfunc() Hypercall detection is failing with xen_hypercall_intel() chosen even on an AMD processor. Looking at the disassembly, the call to xen_get_vendor() was removed. The check for boot_cpu_has(X86_FEATURE_CPUID) was used as a proxy for the x86_vendor having been set. When CONFIG_X86_REQUIRED_FEATURE_CPUID=y (the default value), DCE eliminates the call to xen_get_vendor(). An uninitialized value 0 means X86_VENDOR_INTEL, so the Intel function is always returned. Remove the if and always call xen_get_vendor() to avoid this issue. Fixes: 3d37d9396eb3 ("x86/cpufeatures: Add {REQUIRED,DISABLED} feature configs") Suggested-by: Juergen Gross Signed-off-by: Jason Andryuk Signed-off-by: Ingo Molnar Reviewed-by: Juergen Gross Cc: Boris Ostrovsky Cc: "H. Peter Anvin" Cc: "Xin Li (Intel)" Link: https://lore.kernel.org/r/20250410193106.16353-1-jason.andryuk@amd.com --- arch/x86/xen/enlighten.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 1b7710bd0d0511..53282dc7d5ac5b 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -103,10 +103,6 @@ noinstr void *__xen_hypercall_setfunc(void) void (*func)(void); /* - * Xen is supported only on CPUs with CPUID, so testing for - * X86_FEATURE_CPUID is a test for early_cpu_init() having been - * run. - * * Note that __xen_hypercall_setfunc() is noinstr only due to a nasty * dependency chain: it is being called via the xen_hypercall static * call when running as a PVH or HVM guest. Hypercalls need to be @@ -118,8 +114,7 @@ noinstr void *__xen_hypercall_setfunc(void) */ instrumentation_begin(); - if (!boot_cpu_has(X86_FEATURE_CPUID)) - xen_get_vendor(); + xen_get_vendor(); if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD || boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)) From 683e9fa1c885a0cffbc10b459a7eee9df92af1c1 Mon Sep 17 00:00:00 2001 From: Maciej Falkowski Date: Tue, 1 Apr 2025 17:57:55 +0200 Subject: [PATCH 0439/2924] accel/ivpu: Flush pending jobs of device's workqueues Use flush_work() instead of cancel_work_sync() for driver IRQ workqueues to guarantee that remaining pending work will be handled. This resolves two issues that were encountered where a driver was left in an incorrect state as the bottom-half was canceled: 1. Cancelling context-abort of a job that is still executing and is causing translation faults which is going to cause additional TDRs 2. Cancelling bottom-half of a DCT (duty-cycle throttling) request which will cause a device to not be adjusted to an external frequency request. Fixes: bc3e5f48b7ee ("accel/ivpu: Use workqueue for IRQ handling") Signed-off-by: Maciej Falkowski Reviewed-by: Lizhi Hou Reviewed-by: Jeff Hugo Signed-off-by: Jacek Lawrynowicz Link: https://lore.kernel.org/r/20250401155755.4049156-1-maciej.falkowski@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 4fa73189502e1a..5e3888ff102212 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -421,9 +421,9 @@ void ivpu_prepare_for_reset(struct ivpu_device *vdev) { ivpu_hw_irq_disable(vdev); disable_irq(vdev->irq); - cancel_work_sync(&vdev->irq_ipc_work); - cancel_work_sync(&vdev->irq_dct_work); - cancel_work_sync(&vdev->context_abort_work); + flush_work(&vdev->irq_ipc_work); + flush_work(&vdev->irq_dct_work); + flush_work(&vdev->context_abort_work); ivpu_ipc_disable(vdev); ivpu_mmu_disable(vdev); } From 082a29e20af43455bc130b14c7426ace6a143819 Mon Sep 17 00:00:00 2001 From: Karol Wachowski Date: Tue, 1 Apr 2025 17:58:17 +0200 Subject: [PATCH 0440/2924] accel/ivpu: Update FW Boot API to version 3.28.3 Update firmware Boot API to 3.28.3 version and adjust driver to API changes for preemption buffers. Use new preemption buffer size fields from FW header added to firmware boot API for preemption buffers allocations, if those new fields are zeroed, use old values instead. Signed-off-by: Karol Wachowski Signed-off-by: Maciej Falkowski Reviewed-by: Jeff Hugo Signed-off-by: Jacek Lawrynowicz Link: https://lore.kernel.org/r/20250401155817.4049220-1-maciej.falkowski@linux.intel.com --- drivers/accel/ivpu/ivpu_fw.c | 14 ++++++-- drivers/accel/ivpu/vpu_boot_api.h | 13 ++++++-- drivers/accel/ivpu/vpu_jsm_api.h | 53 +++++++++++++++++++++---------- 3 files changed, 58 insertions(+), 22 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c index 7a1bb92d8c8161..3799231b39e714 100644 --- a/drivers/accel/ivpu/ivpu_fw.c +++ b/drivers/accel/ivpu/ivpu_fw.c @@ -233,10 +233,20 @@ static int ivpu_fw_parse(struct ivpu_device *vdev) fw->dvfs_mode = 0; fw->sched_mode = ivpu_fw_sched_mode_select(vdev, fw_hdr); - fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size; - fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size; ivpu_info(vdev, "Scheduler mode: %s\n", fw->sched_mode ? "HW" : "OS"); + if (fw_hdr->preemption_buffer_1_max_size) + fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_max_size; + else + fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size; + + if (fw_hdr->preemption_buffer_2_max_size) + fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_max_size; + else + fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size; + ivpu_dbg(vdev, FW_BOOT, "Preemption buffer sizes: primary %u, secondary %u\n", + fw->primary_preempt_buf_size, fw->secondary_preempt_buf_size); + if (fw_hdr->ro_section_start_address && !is_within_range(fw_hdr->ro_section_start_address, fw_hdr->ro_section_size, fw_hdr->image_load_address, diff --git a/drivers/accel/ivpu/vpu_boot_api.h b/drivers/accel/ivpu/vpu_boot_api.h index 908e68ea1c39c2..218468bbbcadaf 100644 --- a/drivers/accel/ivpu/vpu_boot_api.h +++ b/drivers/accel/ivpu/vpu_boot_api.h @@ -26,7 +26,7 @@ * Minor version changes when API backward compatibility is preserved. * Resets to 0 if Major version is incremented. */ -#define VPU_BOOT_API_VER_MINOR 26 +#define VPU_BOOT_API_VER_MINOR 28 /* * API header changed (field names, documentation, formatting) but API itself has not been changed @@ -76,8 +76,15 @@ struct vpu_firmware_header { * submission queue size and device capabilities. */ u32 preemption_buffer_2_size; + /* + * Maximum preemption buffer size that the FW can use: no need for the host + * driver to allocate more space than that specified by these fields. + * A value of 0 means no declared limit. + */ + u32 preemption_buffer_1_max_size; + u32 preemption_buffer_2_max_size; /* Space reserved for future preemption-related fields. */ - u32 preemption_reserved[6]; + u32 preemption_reserved[4]; /* FW image read only section start address, 4KB aligned */ u64 ro_section_start_address; /* FW image read only section size, 4KB aligned */ @@ -134,7 +141,7 @@ enum vpu_trace_destination { /* * Processor bit shifts (for loggable HW components). */ -#define VPU_TRACE_PROC_BIT_ARM 0 +#define VPU_TRACE_PROC_BIT_RESERVED 0 #define VPU_TRACE_PROC_BIT_LRT 1 #define VPU_TRACE_PROC_BIT_LNN 2 #define VPU_TRACE_PROC_BIT_SHV_0 3 diff --git a/drivers/accel/ivpu/vpu_jsm_api.h b/drivers/accel/ivpu/vpu_jsm_api.h index 7215c144158cbd..4b6b2b3d2583a8 100644 --- a/drivers/accel/ivpu/vpu_jsm_api.h +++ b/drivers/accel/ivpu/vpu_jsm_api.h @@ -22,7 +22,7 @@ /* * Minor version changes when API backward compatibility is preserved. */ -#define VPU_JSM_API_VER_MINOR 25 +#define VPU_JSM_API_VER_MINOR 29 /* * API header changed (field names, documentation, formatting) but API itself has not been changed @@ -53,8 +53,7 @@ * Engine indexes. */ #define VPU_ENGINE_COMPUTE 0 -#define VPU_ENGINE_COPY 1 -#define VPU_ENGINE_NB 2 +#define VPU_ENGINE_NB 1 /* * VPU status values. @@ -126,11 +125,13 @@ enum { * When set, indicates that job queue uses native fences (as inline commands * in job queue). Such queues may also use legacy fences (as commands in batch buffers). * When cleared, indicates the job queue only uses legacy fences. - * NOTE: For queues using native fences, VPU expects that all jobs in the queue - * are immediately followed by an inline command object. This object is expected - * to be a fence signal command in most cases, but can also be a NOP in case the host - * does not need per-job fence signalling. Other inline commands objects can be - * inserted between "job and inline command" pairs. + * NOTES: + * 1. For queues using native fences, VPU expects that all jobs in the queue + * are immediately followed by an inline command object. This object is expected + * to be a fence signal command in most cases, but can also be a NOP in case the host + * does not need per-job fence signalling. Other inline commands objects can be + * inserted between "job and inline command" pairs. + * 2. Native fence queues are only supported on VPU 40xx onwards. */ VPU_JOB_QUEUE_FLAGS_USE_NATIVE_FENCE_MASK = (1 << 1U), @@ -275,6 +276,8 @@ struct vpu_inline_cmd { u64 value; /* User VA of the log buffer in which to add log entry on completion. */ u64 log_buffer_va; + /* NPU private data. */ + u64 npu_private_data; } fence; /* Other commands do not have a payload. */ /* Payload definition for future inline commands can be inserted here. */ @@ -791,12 +794,22 @@ struct vpu_jsm_metric_streamer_update { /** Metric group mask that identifies metric streamer instance. */ u64 metric_group_mask; /** - * Address and size of the buffer where the VPU will write metric data. If - * the buffer address is 0 or same as the currently used buffer the VPU will - * continue writing metric data to the current buffer. In this case the - * buffer size is ignored and the size of the current buffer is unchanged. - * If the address is non-zero and differs from the current buffer address the - * VPU will immediately switch data collection to the new buffer. + * Address and size of the buffer where the VPU will write metric data. + * This member dictates how the update operation should perform: + * 1. client needs information about the number of collected samples and the + * amount of data written to the current buffer + * 2. client wants to switch to a new buffer + * + * Case 1. is identified by the buffer address being 0 or the same as the + * currently used buffer address. In this case the buffer size is ignored and + * the size of the current buffer is unchanged. The VPU will return an update + * in the vpu_jsm_metric_streamer_done structure. The internal writing position + * into the buffer is not changed. + * + * Case 2. is identified by the address being non-zero and differs from the + * current buffer address. The VPU will immediately switch data collection to + * the new buffer. Then the VPU will return an update in the + * vpu_jsm_metric_streamer_done structure. */ u64 buffer_addr; u64 buffer_size; @@ -934,6 +947,7 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup { /* * Default quantum in 100ns units for scheduling across processes * within a priority band + * Minimum value supported by NPU is 1ms (10000 in 100ns units). */ u32 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS]; /* @@ -946,8 +960,10 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup { * in situations when it's starved by the focus band. */ u32 normal_band_percentage; - /* Reserved */ - u32 reserved_0; + /* + * TDR timeout value in milliseconds. Default value of 0 meaning no timeout. + */ + u32 tdr_timeout; }; /* @@ -1024,7 +1040,10 @@ struct vpu_ipc_msg_payload_hws_set_context_sched_properties { s32 in_process_priority; /* Zero padding / Reserved */ u32 reserved_1; - /* Context quantum relative to other contexts of same priority in the same process */ + /* + * Context quantum relative to other contexts of same priority in the same process + * Minimum value supported by NPU is 1ms (10000 in 100ns units). + */ u64 context_quantum; /* Grace period when preempting context of the same priority within the same process */ u64 grace_period_same_priority; From 6c683c6887e4addcd6bd1ddce08cafccb0a21e32 Mon Sep 17 00:00:00 2001 From: Denis Arefev Date: Thu, 3 Apr 2025 15:26:01 +0300 Subject: [PATCH 0441/2924] asus-laptop: Fix an uninitialized variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The value returned by acpi_evaluate_integer() is not checked, but the result is not always successful, so it is necessary to add a check of the returned value. If the result remains negative during three iterations of the loop, then the uninitialized variable 'val' will be used in the clamp_val() macro, so it must be initialized with the current value of the 'curr' variable. In this case, the algorithm should be less noisy. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: b23910c2194e ("asus-laptop: Pegatron Lucid accelerometer") Cc: stable@vger.kernel.org Signed-off-by: Denis Arefev Link: https://lore.kernel.org/r/20250403122603.18172-1-arefev@swemel.ru Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-laptop.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c index d460dd194f1965..a0a411b4f2d6d8 100644 --- a/drivers/platform/x86/asus-laptop.c +++ b/drivers/platform/x86/asus-laptop.c @@ -426,11 +426,14 @@ static int asus_pega_lucid_set(struct asus_laptop *asus, int unit, bool enable) static int pega_acc_axis(struct asus_laptop *asus, int curr, char *method) { + unsigned long long val = (unsigned long long)curr; + acpi_status status; int i, delta; - unsigned long long val; - for (i = 0; i < PEGA_ACC_RETRIES; i++) { - acpi_evaluate_integer(asus->handle, method, NULL, &val); + for (i = 0; i < PEGA_ACC_RETRIES; i++) { + status = acpi_evaluate_integer(asus->handle, method, NULL, &val); + if (ACPI_FAILURE(status)) + continue; /* The output is noisy. From reading the ASL * dissassembly, timeout errors are returned with 1's * in the high word, and the lack of locking around From 3343b086c7035222c24956780ea4423655cad6d2 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 7 Apr 2025 11:20:15 +0200 Subject: [PATCH 0442/2924] platform/x86: x86-android-tablets: Add "9v" to Vexia EDU ATLA 10 tablet symbols MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Vexia EDU ATLA 10 tablet comes in 2 different versions with significantly different mainboards. The only outward difference is that the charging barrel on one is marked 5V and the other is marked 9V. Both need to be handled by the x86-android-tablets code. Add 9v to the symbols for the existing support for the 9V Vexia EDU ATLA 10 tablet symbols to prepare for adding support for the 5V version. All this patch does is s/vexia_edu_atla10_info/vexia_edu_atla10_9v_info/. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20250407092017.273124-1-hdegoede@redhat.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- .../platform/x86/x86-android-tablets/dmi.c | 2 +- .../platform/x86/x86-android-tablets/other.c | 64 +++++++++---------- .../x86-android-tablets/x86-android-tablets.h | 2 +- 3 files changed, 34 insertions(+), 34 deletions(-) diff --git a/drivers/platform/x86/x86-android-tablets/dmi.c b/drivers/platform/x86/x86-android-tablets/dmi.c index 3e5fa3b6e2fdfe..e43d482b17a35d 100644 --- a/drivers/platform/x86/x86-android-tablets/dmi.c +++ b/drivers/platform/x86/x86-android-tablets/dmi.c @@ -187,7 +187,7 @@ const struct dmi_system_id x86_android_tablet_ids[] __initconst = { /* Above strings are too generic, also match on BIOS date */ DMI_MATCH(DMI_BIOS_DATE, "08/25/2014"), }, - .driver_data = (void *)&vexia_edu_atla10_info, + .driver_data = (void *)&vexia_edu_atla10_9v_info, }, { /* Whitelabel (sold as various brands) TM800A550L */ diff --git a/drivers/platform/x86/x86-android-tablets/other.c b/drivers/platform/x86/x86-android-tablets/other.c index 1d93d9edb23f48..74dcac8d19d724 100644 --- a/drivers/platform/x86/x86-android-tablets/other.c +++ b/drivers/platform/x86/x86-android-tablets/other.c @@ -599,62 +599,62 @@ const struct x86_dev_info whitelabel_tm800a550l_info __initconst = { }; /* - * Vexia EDU ATLA 10 tablet, Android 4.2 / 4.4 + Guadalinex Ubuntu tablet + * Vexia EDU ATLA 10 tablet 9V, Android 4.2 + Guadalinex Ubuntu tablet * distributed to schools in the Spanish Andalucía region. */ static const char * const crystal_cove_pwrsrc_psy[] = { "crystal_cove_pwrsrc" }; -static const struct property_entry vexia_edu_atla10_ulpmc_props[] = { +static const struct property_entry vexia_edu_atla10_9v_ulpmc_props[] = { PROPERTY_ENTRY_STRING_ARRAY("supplied-from", crystal_cove_pwrsrc_psy), { } }; -static const struct software_node vexia_edu_atla10_ulpmc_node = { - .properties = vexia_edu_atla10_ulpmc_props, +static const struct software_node vexia_edu_atla10_9v_ulpmc_node = { + .properties = vexia_edu_atla10_9v_ulpmc_props, }; -static const char * const vexia_edu_atla10_accel_mount_matrix[] = { +static const char * const vexia_edu_atla10_9v_accel_mount_matrix[] = { "0", "-1", "0", "1", "0", "0", "0", "0", "1" }; -static const struct property_entry vexia_edu_atla10_accel_props[] = { - PROPERTY_ENTRY_STRING_ARRAY("mount-matrix", vexia_edu_atla10_accel_mount_matrix), +static const struct property_entry vexia_edu_atla10_9v_accel_props[] = { + PROPERTY_ENTRY_STRING_ARRAY("mount-matrix", vexia_edu_atla10_9v_accel_mount_matrix), { } }; -static const struct software_node vexia_edu_atla10_accel_node = { - .properties = vexia_edu_atla10_accel_props, +static const struct software_node vexia_edu_atla10_9v_accel_node = { + .properties = vexia_edu_atla10_9v_accel_props, }; -static const struct property_entry vexia_edu_atla10_touchscreen_props[] = { +static const struct property_entry vexia_edu_atla10_9v_touchscreen_props[] = { PROPERTY_ENTRY_U32("hid-descr-addr", 0x0000), PROPERTY_ENTRY_U32("post-reset-deassert-delay-ms", 120), { } }; -static const struct software_node vexia_edu_atla10_touchscreen_node = { - .properties = vexia_edu_atla10_touchscreen_props, +static const struct software_node vexia_edu_atla10_9v_touchscreen_node = { + .properties = vexia_edu_atla10_9v_touchscreen_props, }; -static const struct property_entry vexia_edu_atla10_pmic_props[] = { +static const struct property_entry vexia_edu_atla10_9v_pmic_props[] = { PROPERTY_ENTRY_BOOL("linux,register-pwrsrc-power_supply"), { } }; -static const struct software_node vexia_edu_atla10_pmic_node = { - .properties = vexia_edu_atla10_pmic_props, +static const struct software_node vexia_edu_atla10_9v_pmic_node = { + .properties = vexia_edu_atla10_9v_pmic_props, }; -static const struct x86_i2c_client_info vexia_edu_atla10_i2c_clients[] __initconst = { +static const struct x86_i2c_client_info vexia_edu_atla10_9v_i2c_clients[] __initconst = { { /* I2C attached embedded controller, used to access fuel-gauge */ .board_info = { .type = "vexia_atla10_ec", .addr = 0x76, .dev_name = "ulpmc", - .swnode = &vexia_edu_atla10_ulpmc_node, + .swnode = &vexia_edu_atla10_9v_ulpmc_node, }, .adapter_path = "0000:00:18.1", }, { @@ -679,7 +679,7 @@ static const struct x86_i2c_client_info vexia_edu_atla10_i2c_clients[] __initcon .type = "kxtj21009", .addr = 0x0f, .dev_name = "kxtj21009", - .swnode = &vexia_edu_atla10_accel_node, + .swnode = &vexia_edu_atla10_9v_accel_node, }, .adapter_path = "0000:00:18.5", }, { @@ -688,7 +688,7 @@ static const struct x86_i2c_client_info vexia_edu_atla10_i2c_clients[] __initcon .type = "hid-over-i2c", .addr = 0x38, .dev_name = "FTSC1000", - .swnode = &vexia_edu_atla10_touchscreen_node, + .swnode = &vexia_edu_atla10_9v_touchscreen_node, }, .adapter_path = "0000:00:18.6", .irq_data = { @@ -703,7 +703,7 @@ static const struct x86_i2c_client_info vexia_edu_atla10_i2c_clients[] __initcon .type = "intel_soc_pmic_crc", .addr = 0x6e, .dev_name = "intel_soc_pmic_crc", - .swnode = &vexia_edu_atla10_pmic_node, + .swnode = &vexia_edu_atla10_9v_pmic_node, }, .adapter_path = "0000:00:18.7", .irq_data = { @@ -715,7 +715,7 @@ static const struct x86_i2c_client_info vexia_edu_atla10_i2c_clients[] __initcon } }; -static const struct x86_serdev_info vexia_edu_atla10_serdevs[] __initconst = { +static const struct x86_serdev_info vexia_edu_atla10_9v_serdevs[] __initconst = { { .ctrl.pci.devfn = PCI_DEVFN(0x1e, 3), .ctrl_devname = "serial0", @@ -723,7 +723,7 @@ static const struct x86_serdev_info vexia_edu_atla10_serdevs[] __initconst = { }, }; -static struct gpiod_lookup_table vexia_edu_atla10_ft5416_gpios = { +static struct gpiod_lookup_table vexia_edu_atla10_9v_ft5416_gpios = { .dev_id = "i2c-FTSC1000", .table = { GPIO_LOOKUP("INT33FC:00", 60, "reset", GPIO_ACTIVE_LOW), @@ -731,12 +731,12 @@ static struct gpiod_lookup_table vexia_edu_atla10_ft5416_gpios = { }, }; -static struct gpiod_lookup_table * const vexia_edu_atla10_gpios[] = { - &vexia_edu_atla10_ft5416_gpios, +static struct gpiod_lookup_table * const vexia_edu_atla10_9v_gpios[] = { + &vexia_edu_atla10_9v_ft5416_gpios, NULL }; -static int __init vexia_edu_atla10_init(struct device *dev) +static int __init vexia_edu_atla10_9v_init(struct device *dev) { struct pci_dev *pdev; int ret; @@ -760,13 +760,13 @@ static int __init vexia_edu_atla10_init(struct device *dev) return 0; } -const struct x86_dev_info vexia_edu_atla10_info __initconst = { - .i2c_client_info = vexia_edu_atla10_i2c_clients, - .i2c_client_count = ARRAY_SIZE(vexia_edu_atla10_i2c_clients), - .serdev_info = vexia_edu_atla10_serdevs, - .serdev_count = ARRAY_SIZE(vexia_edu_atla10_serdevs), - .gpiod_lookup_tables = vexia_edu_atla10_gpios, - .init = vexia_edu_atla10_init, +const struct x86_dev_info vexia_edu_atla10_9v_info __initconst = { + .i2c_client_info = vexia_edu_atla10_9v_i2c_clients, + .i2c_client_count = ARRAY_SIZE(vexia_edu_atla10_9v_i2c_clients), + .serdev_info = vexia_edu_atla10_9v_serdevs, + .serdev_count = ARRAY_SIZE(vexia_edu_atla10_9v_serdevs), + .gpiod_lookup_tables = vexia_edu_atla10_9v_gpios, + .init = vexia_edu_atla10_9v_init, .use_pci = true, }; diff --git a/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h b/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h index 63a38a0069bae6..2204bbaf2ed5ae 100644 --- a/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h +++ b/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h @@ -127,7 +127,7 @@ extern const struct x86_dev_info nextbook_ares8_info; extern const struct x86_dev_info nextbook_ares8a_info; extern const struct x86_dev_info peaq_c1010_info; extern const struct x86_dev_info whitelabel_tm800a550l_info; -extern const struct x86_dev_info vexia_edu_atla10_info; +extern const struct x86_dev_info vexia_edu_atla10_9v_info; extern const struct x86_dev_info xiaomi_mipad2_info; extern const struct dmi_system_id x86_android_tablet_ids[]; From 59df54c67be3e587e4217bddd793350fbe8f5feb Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 7 Apr 2025 11:20:16 +0200 Subject: [PATCH 0443/2924] platform/x86: x86-android-tablets: Add Vexia Edu Atla 10 tablet 5V data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Vexia EDU ATLA 10 tablet comes in 2 different versions with significantly different mainboards. The only outward difference is that the charging barrel on one is marked 5V and the other is marked 9V. Both are x86 ACPI tablets which ships with Android x86 as factory OS. with a DSDT which contains a bunch of I2C devices which are not actually there, causing various resource conflicts. Enumeration of these is skipped through the acpi_quirk_skip_i2c_client_enumeration(). Extend the existing support for the 9V version by adding support for manually instantiating the I2C devices which are actually present on the 5V version by adding the necessary device info to the x86-android-tablets module. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20250407092017.273124-2-hdegoede@redhat.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- .../platform/x86/x86-android-tablets/dmi.c | 12 ++++ .../platform/x86/x86-android-tablets/other.c | 60 +++++++++++++++++++ .../x86-android-tablets/x86-android-tablets.h | 1 + 3 files changed, 73 insertions(+) diff --git a/drivers/platform/x86/x86-android-tablets/dmi.c b/drivers/platform/x86/x86-android-tablets/dmi.c index e43d482b17a35d..278c6d151dc492 100644 --- a/drivers/platform/x86/x86-android-tablets/dmi.c +++ b/drivers/platform/x86/x86-android-tablets/dmi.c @@ -179,6 +179,18 @@ const struct dmi_system_id x86_android_tablet_ids[] __initconst = { }, .driver_data = (void *)&peaq_c1010_info, }, + { + /* Vexia Edu Atla 10 tablet 5V version */ + .matches = { + /* Having all 3 of these not set is somewhat unique */ + DMI_MATCH(DMI_SYS_VENDOR, "To be filled by O.E.M."), + DMI_MATCH(DMI_PRODUCT_NAME, "To be filled by O.E.M."), + DMI_MATCH(DMI_BOARD_NAME, "To be filled by O.E.M."), + /* Above strings are too generic, also match on BIOS date */ + DMI_MATCH(DMI_BIOS_DATE, "05/14/2015"), + }, + .driver_data = (void *)&vexia_edu_atla10_5v_info, + }, { /* Vexia Edu Atla 10 tablet 9V version */ .matches = { diff --git a/drivers/platform/x86/x86-android-tablets/other.c b/drivers/platform/x86/x86-android-tablets/other.c index 74dcac8d19d724..f7bd9f863c85ed 100644 --- a/drivers/platform/x86/x86-android-tablets/other.c +++ b/drivers/platform/x86/x86-android-tablets/other.c @@ -598,6 +598,66 @@ const struct x86_dev_info whitelabel_tm800a550l_info __initconst = { .gpiod_lookup_tables = whitelabel_tm800a550l_gpios, }; +/* + * Vexia EDU ATLA 10 tablet 5V, Android 4.4 + Guadalinex Ubuntu tablet + * distributed to schools in the Spanish Andalucía region. + */ +static const struct property_entry vexia_edu_atla10_5v_touchscreen_props[] = { + PROPERTY_ENTRY_U32("hid-descr-addr", 0x0000), + PROPERTY_ENTRY_U32("post-reset-deassert-delay-ms", 120), + { } +}; + +static const struct software_node vexia_edu_atla10_5v_touchscreen_node = { + .properties = vexia_edu_atla10_5v_touchscreen_props, +}; + +static const struct x86_i2c_client_info vexia_edu_atla10_5v_i2c_clients[] __initconst = { + { + /* kxcjk1013 accelerometer */ + .board_info = { + .type = "kxcjk1013", + .addr = 0x0f, + .dev_name = "kxcjk1013", + }, + .adapter_path = "\\_SB_.I2C3", + }, { + /* touchscreen controller */ + .board_info = { + .type = "hid-over-i2c", + .addr = 0x38, + .dev_name = "FTSC1000", + .swnode = &vexia_edu_atla10_5v_touchscreen_node, + }, + .adapter_path = "\\_SB_.I2C4", + .irq_data = { + .type = X86_ACPI_IRQ_TYPE_APIC, + .index = 0x44, + .trigger = ACPI_LEVEL_SENSITIVE, + .polarity = ACPI_ACTIVE_HIGH, + }, + } +}; + +static struct gpiod_lookup_table vexia_edu_atla10_5v_ft5416_gpios = { + .dev_id = "i2c-FTSC1000", + .table = { + GPIO_LOOKUP("INT33FC:01", 26, "reset", GPIO_ACTIVE_LOW), + { } + }, +}; + +static struct gpiod_lookup_table * const vexia_edu_atla10_5v_gpios[] = { + &vexia_edu_atla10_5v_ft5416_gpios, + NULL +}; + +const struct x86_dev_info vexia_edu_atla10_5v_info __initconst = { + .i2c_client_info = vexia_edu_atla10_5v_i2c_clients, + .i2c_client_count = ARRAY_SIZE(vexia_edu_atla10_5v_i2c_clients), + .gpiod_lookup_tables = vexia_edu_atla10_5v_gpios, +}; + /* * Vexia EDU ATLA 10 tablet 9V, Android 4.2 + Guadalinex Ubuntu tablet * distributed to schools in the Spanish Andalucía region. diff --git a/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h b/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h index 2204bbaf2ed5ae..dcf8d49e3b5f48 100644 --- a/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h +++ b/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h @@ -127,6 +127,7 @@ extern const struct x86_dev_info nextbook_ares8_info; extern const struct x86_dev_info nextbook_ares8a_info; extern const struct x86_dev_info peaq_c1010_info; extern const struct x86_dev_info whitelabel_tm800a550l_info; +extern const struct x86_dev_info vexia_edu_atla10_5v_info; extern const struct x86_dev_info vexia_edu_atla10_9v_info; extern const struct x86_dev_info xiaomi_mipad2_info; extern const struct dmi_system_id x86_android_tablet_ids[]; From b129005ddfc0e6daf04a6d3b928a9e474f9b3918 Mon Sep 17 00:00:00 2001 From: David Thompson Date: Mon, 7 Apr 2025 13:25:58 +0000 Subject: [PATCH 0444/2924] mlxbf-bootctl: use sysfs_emit_at() in secure_boot_fuse_state_show() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A warning is seen when running the latest kernel on a BlueField SOC: [251.512704] ------------[ cut here ]------------ [251.512711] invalid sysfs_emit: buf:0000000003aa32ae [251.512720] WARNING: CPU: 1 PID: 705264 at fs/sysfs/file.c:767 sysfs_emit+0xac/0xc8 The warning is triggered because the mlxbf-bootctl driver invokes "sysfs_emit()" with a buffer pointer that is not aligned to the start of the page. The driver should instead use "sysfs_emit_at()" to support non-zero offsets into the destination buffer. Fixes: 9886f575de5a ("platform/mellanox: mlxbf-bootctl: use sysfs_emit() instead of sprintf()") Signed-off-by: David Thompson Link: https://lore.kernel.org/r/20250407132558.2418719-1-davthompson@nvidia.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/mellanox/mlxbf-bootctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/mellanox/mlxbf-bootctl.c b/drivers/platform/mellanox/mlxbf-bootctl.c index b95dcb8d483ce4..c18a5b96de5ce7 100644 --- a/drivers/platform/mellanox/mlxbf-bootctl.c +++ b/drivers/platform/mellanox/mlxbf-bootctl.c @@ -333,9 +333,9 @@ static ssize_t secure_boot_fuse_state_show(struct device *dev, else status = valid ? "Invalid" : "Free"; } - buf_len += sysfs_emit(buf + buf_len, "%d:%s ", key, status); + buf_len += sysfs_emit_at(buf, buf_len, "%d:%s ", key, status); } - buf_len += sysfs_emit(buf + buf_len, "\n"); + buf_len += sysfs_emit_at(buf, buf_len, "\n"); return buf_len; } From fcf27a6a926fd9eeba39e9c3fde43c9298fe284e Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 7 Apr 2025 13:18:21 -0500 Subject: [PATCH 0445/2924] platform/x86: amd: pmf: Fix STT limits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On some platforms it has been observed that STT limits are not being applied properly causing poor performance as power limits are set too low. STT limits that are sent to the platform are supposed to be in Q8.8 format. Convert them before sending. Reported-by: Yijun Shen Fixes: 7c45534afa443 ("platform/x86/amd/pmf: Add support for PMF Policy Binary") Cc: stable@vger.kernel.org Tested-by: Yijun Shen Signed-off-by: Mario Limonciello Acked-by: Shyam Sundar S K Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20250407181915.1482450-1-superm1@kernel.org Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmf/auto-mode.c | 4 ++-- drivers/platform/x86/amd/pmf/cnqf.c | 8 ++++---- drivers/platform/x86/amd/pmf/core.c | 14 ++++++++++++++ drivers/platform/x86/amd/pmf/pmf.h | 1 + drivers/platform/x86/amd/pmf/sps.c | 12 ++++++++---- drivers/platform/x86/amd/pmf/tee-if.c | 6 ++++-- 6 files changed, 33 insertions(+), 12 deletions(-) diff --git a/drivers/platform/x86/amd/pmf/auto-mode.c b/drivers/platform/x86/amd/pmf/auto-mode.c index 02ff68be10d012..a184922bba8d65 100644 --- a/drivers/platform/x86/amd/pmf/auto-mode.c +++ b/drivers/platform/x86/amd/pmf/auto-mode.c @@ -120,9 +120,9 @@ static void amd_pmf_set_automode(struct amd_pmf_dev *dev, int idx, amd_pmf_send_cmd(dev, SET_SPPT_APU_ONLY, false, pwr_ctrl->sppt_apu_only, NULL); amd_pmf_send_cmd(dev, SET_STT_MIN_LIMIT, false, pwr_ctrl->stt_min, NULL); amd_pmf_send_cmd(dev, SET_STT_LIMIT_APU, false, - pwr_ctrl->stt_skin_temp[STT_TEMP_APU], NULL); + fixp_q88_fromint(pwr_ctrl->stt_skin_temp[STT_TEMP_APU]), NULL); amd_pmf_send_cmd(dev, SET_STT_LIMIT_HS2, false, - pwr_ctrl->stt_skin_temp[STT_TEMP_HS2], NULL); + fixp_q88_fromint(pwr_ctrl->stt_skin_temp[STT_TEMP_HS2]), NULL); if (is_apmf_func_supported(dev, APMF_FUNC_SET_FAN_IDX)) apmf_update_fan_idx(dev, config_store.mode_set[idx].fan_control.manual, diff --git a/drivers/platform/x86/amd/pmf/cnqf.c b/drivers/platform/x86/amd/pmf/cnqf.c index bc8899e15c914b..207a0b33d8d368 100644 --- a/drivers/platform/x86/amd/pmf/cnqf.c +++ b/drivers/platform/x86/amd/pmf/cnqf.c @@ -81,10 +81,10 @@ static int amd_pmf_set_cnqf(struct amd_pmf_dev *dev, int src, int idx, amd_pmf_send_cmd(dev, SET_SPPT, false, pc->sppt, NULL); amd_pmf_send_cmd(dev, SET_SPPT_APU_ONLY, false, pc->sppt_apu_only, NULL); amd_pmf_send_cmd(dev, SET_STT_MIN_LIMIT, false, pc->stt_min, NULL); - amd_pmf_send_cmd(dev, SET_STT_LIMIT_APU, false, pc->stt_skin_temp[STT_TEMP_APU], - NULL); - amd_pmf_send_cmd(dev, SET_STT_LIMIT_HS2, false, pc->stt_skin_temp[STT_TEMP_HS2], - NULL); + amd_pmf_send_cmd(dev, SET_STT_LIMIT_APU, false, + fixp_q88_fromint(pc->stt_skin_temp[STT_TEMP_APU]), NULL); + amd_pmf_send_cmd(dev, SET_STT_LIMIT_HS2, false, + fixp_q88_fromint(pc->stt_skin_temp[STT_TEMP_HS2]), NULL); if (is_apmf_func_supported(dev, APMF_FUNC_SET_FAN_IDX)) apmf_update_fan_idx(dev, diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c index a2cb2d5544f5b3..96821101ec773c 100644 --- a/drivers/platform/x86/amd/pmf/core.c +++ b/drivers/platform/x86/amd/pmf/core.c @@ -176,6 +176,20 @@ static void __maybe_unused amd_pmf_dump_registers(struct amd_pmf_dev *dev) dev_dbg(dev->dev, "AMD_PMF_REGISTER_MESSAGE:%x\n", value); } +/** + * fixp_q88_fromint: Convert integer to Q8.8 + * @val: input value + * + * Converts an integer into binary fixed point format where 8 bits + * are used for integer and 8 bits are used for the decimal. + * + * Return: unsigned integer converted to Q8.8 format + */ +u32 fixp_q88_fromint(u32 val) +{ + return val << 8; +} + int amd_pmf_send_cmd(struct amd_pmf_dev *dev, u8 message, bool get, u32 arg, u32 *data) { int rc; diff --git a/drivers/platform/x86/amd/pmf/pmf.h b/drivers/platform/x86/amd/pmf/pmf.h index e6bdee68ccf347..45b60238d5277f 100644 --- a/drivers/platform/x86/amd/pmf/pmf.h +++ b/drivers/platform/x86/amd/pmf/pmf.h @@ -777,6 +777,7 @@ int apmf_install_handler(struct amd_pmf_dev *pmf_dev); int apmf_os_power_slider_update(struct amd_pmf_dev *dev, u8 flag); int amd_pmf_set_dram_addr(struct amd_pmf_dev *dev, bool alloc_buffer); int amd_pmf_notify_sbios_heartbeat_event_v2(struct amd_pmf_dev *dev, u8 flag); +u32 fixp_q88_fromint(u32 val); /* SPS Layer */ int amd_pmf_get_pprof_modes(struct amd_pmf_dev *pmf); diff --git a/drivers/platform/x86/amd/pmf/sps.c b/drivers/platform/x86/amd/pmf/sps.c index d3083383f11fbf..49e14ca94a9e77 100644 --- a/drivers/platform/x86/amd/pmf/sps.c +++ b/drivers/platform/x86/amd/pmf/sps.c @@ -198,9 +198,11 @@ static void amd_pmf_update_slider_v2(struct amd_pmf_dev *dev, int idx) amd_pmf_send_cmd(dev, SET_STT_MIN_LIMIT, false, apts_config_store.val[idx].stt_min_limit, NULL); amd_pmf_send_cmd(dev, SET_STT_LIMIT_APU, false, - apts_config_store.val[idx].stt_skin_temp_limit_apu, NULL); + fixp_q88_fromint(apts_config_store.val[idx].stt_skin_temp_limit_apu), + NULL); amd_pmf_send_cmd(dev, SET_STT_LIMIT_HS2, false, - apts_config_store.val[idx].stt_skin_temp_limit_hs2, NULL); + fixp_q88_fromint(apts_config_store.val[idx].stt_skin_temp_limit_hs2), + NULL); } void amd_pmf_update_slider(struct amd_pmf_dev *dev, bool op, int idx, @@ -217,9 +219,11 @@ void amd_pmf_update_slider(struct amd_pmf_dev *dev, bool op, int idx, amd_pmf_send_cmd(dev, SET_STT_MIN_LIMIT, false, config_store.prop[src][idx].stt_min, NULL); amd_pmf_send_cmd(dev, SET_STT_LIMIT_APU, false, - config_store.prop[src][idx].stt_skin_temp[STT_TEMP_APU], NULL); + fixp_q88_fromint(config_store.prop[src][idx].stt_skin_temp[STT_TEMP_APU]), + NULL); amd_pmf_send_cmd(dev, SET_STT_LIMIT_HS2, false, - config_store.prop[src][idx].stt_skin_temp[STT_TEMP_HS2], NULL); + fixp_q88_fromint(config_store.prop[src][idx].stt_skin_temp[STT_TEMP_HS2]), + NULL); } else if (op == SLIDER_OP_GET) { amd_pmf_send_cmd(dev, GET_SPL, true, ARG_NONE, &table->prop[src][idx].spl); amd_pmf_send_cmd(dev, GET_FPPT, true, ARG_NONE, &table->prop[src][idx].fppt); diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c index a1e43873a07b08..14b99d8b63d2fc 100644 --- a/drivers/platform/x86/amd/pmf/tee-if.c +++ b/drivers/platform/x86/amd/pmf/tee-if.c @@ -123,7 +123,8 @@ static void amd_pmf_apply_policies(struct amd_pmf_dev *dev, struct ta_pmf_enact_ case PMF_POLICY_STT_SKINTEMP_APU: if (dev->prev_data->stt_skintemp_apu != val) { - amd_pmf_send_cmd(dev, SET_STT_LIMIT_APU, false, val, NULL); + amd_pmf_send_cmd(dev, SET_STT_LIMIT_APU, false, + fixp_q88_fromint(val), NULL); dev_dbg(dev->dev, "update STT_SKINTEMP_APU: %u\n", val); dev->prev_data->stt_skintemp_apu = val; } @@ -131,7 +132,8 @@ static void amd_pmf_apply_policies(struct amd_pmf_dev *dev, struct ta_pmf_enact_ case PMF_POLICY_STT_SKINTEMP_HS2: if (dev->prev_data->stt_skintemp_hs2 != val) { - amd_pmf_send_cmd(dev, SET_STT_LIMIT_HS2, false, val, NULL); + amd_pmf_send_cmd(dev, SET_STT_LIMIT_HS2, false, + fixp_q88_fromint(val), NULL); dev_dbg(dev->dev, "update STT_SKINTEMP_HS2: %u\n", val); dev->prev_data->stt_skintemp_hs2 = val; } From 6c2b75404d33caa46a582f2791a70f92232adb71 Mon Sep 17 00:00:00 2001 From: Andrzej Kacprowski Date: Tue, 1 Apr 2025 17:59:11 +0200 Subject: [PATCH 0446/2924] accel/ivpu: Fix the NPU's DPU frequency calculation Fix the frequency returned to the user space by the DRM_IVPU_PARAM_CORE_CLOCK_RATE GET_PARAM IOCTL. The kernel driver returned CPU frequency for MTL and bare PLL frequency for LNL - this was inconsistent and incorrect for both platforms. With this fix the driver returns maximum frequency of the NPU data processing unit (DPU) for all HW generations. This is what user space always expected. Also do not set CPU frequency in boot params - the firmware does not use frequency passed from the driver, it was only used by the early pre-production firmware. With that we can remove CPU frequency calculation code. Show NPU frequency in FREQ_CHANGE interrupt when frequency tracking is enabled. Fixes: 8a27ad81f7d3 ("accel/ivpu: Split IP and buttress code") Cc: stable@vger.kernel.org # v6.11+ Signed-off-by: Andrzej Kacprowski Signed-off-by: Maciej Falkowski Reviewed-by: Jeff Hugo Signed-off-by: Jacek Lawrynowicz Link: https://lore.kernel.org/r/20250401155912.4049340-2-maciej.falkowski@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.c | 4 +- drivers/accel/ivpu/ivpu_fw.c | 3 +- drivers/accel/ivpu/ivpu_hw.h | 11 +-- drivers/accel/ivpu/ivpu_hw_btrs.c | 126 +++++++++++++----------------- drivers/accel/ivpu/ivpu_hw_btrs.h | 6 +- include/uapi/drm/ivpu_accel.h | 4 +- 6 files changed, 66 insertions(+), 88 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 5e3888ff102212..eff1d3ca075f5a 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #include @@ -164,7 +164,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f args->value = vdev->platform; break; case DRM_IVPU_PARAM_CORE_CLOCK_RATE: - args->value = ivpu_hw_ratio_to_freq(vdev, vdev->hw->pll.max_ratio); + args->value = ivpu_hw_dpu_max_freq_get(vdev); break; case DRM_IVPU_PARAM_NUM_CONTEXTS: args->value = ivpu_get_context_count(vdev); diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c index 3799231b39e714..5e1d709c6a464d 100644 --- a/drivers/accel/ivpu/ivpu_fw.c +++ b/drivers/accel/ivpu/ivpu_fw.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #include @@ -576,7 +576,6 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->magic = VPU_BOOT_PARAMS_MAGIC; boot_params->vpu_id = to_pci_dev(vdev->drm.dev)->bus->number; - boot_params->frequency = ivpu_hw_pll_freq_get(vdev); /* * This param is a debug firmware feature. It switches default clock diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h index 16435f2756d024..6e67e736ef8e2a 100644 --- a/drivers/accel/ivpu/ivpu_hw.h +++ b/drivers/accel/ivpu/ivpu_hw.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #ifndef __IVPU_HW_H__ @@ -82,9 +82,9 @@ static inline u64 ivpu_hw_range_size(const struct ivpu_addr_range *range) return range->end - range->start; } -static inline u32 ivpu_hw_ratio_to_freq(struct ivpu_device *vdev, u32 ratio) +static inline u32 ivpu_hw_dpu_max_freq_get(struct ivpu_device *vdev) { - return ivpu_hw_btrs_ratio_to_freq(vdev, ratio); + return ivpu_hw_btrs_dpu_max_freq_get(vdev); } static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev) @@ -92,11 +92,6 @@ static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev) ivpu_hw_ip_irq_clear(vdev); } -static inline u32 ivpu_hw_pll_freq_get(struct ivpu_device *vdev) -{ - return ivpu_hw_btrs_pll_freq_get(vdev); -} - static inline u32 ivpu_hw_profiling_freq_get(struct ivpu_device *vdev) { return vdev->hw->pll.profiling_freq; diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.c b/drivers/accel/ivpu/ivpu_hw_btrs.c index 56c56012b980fd..91eb8a93c15b17 100644 --- a/drivers/accel/ivpu/ivpu_hw_btrs.c +++ b/drivers/accel/ivpu/ivpu_hw_btrs.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ +#include + #include "ivpu_drv.h" #include "ivpu_hw.h" #include "ivpu_hw_btrs.h" @@ -28,17 +30,13 @@ #define BTRS_LNL_ALL_IRQ_MASK ((u32)-1) -#define BTRS_MTL_WP_CONFIG_1_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_5_3) -#define BTRS_MTL_WP_CONFIG_1_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_4_3) -#define BTRS_MTL_WP_CONFIG_2_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_5_3) -#define BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3) -#define BTRS_MTL_WP_CONFIG_0_TILE_PLL_OFF WP_CONFIG(0, 0) #define PLL_CDYN_DEFAULT 0x80 #define PLL_EPP_DEFAULT 0x80 #define PLL_CONFIG_DEFAULT 0x0 -#define PLL_SIMULATION_FREQ 10000000 -#define PLL_REF_CLK_FREQ 50000000 +#define PLL_REF_CLK_FREQ 50000000ull +#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ) + #define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC) #define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC) #define TIMEOUT_US (150 * USEC_PER_MSEC) @@ -62,6 +60,8 @@ #define DCT_ENABLE 0x1 #define DCT_DISABLE 0x0 +static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio); + int ivpu_hw_btrs_irqs_clear_with_0_mtl(struct ivpu_device *vdev) { REGB_WR32(VPU_HW_BTRS_MTL_INTERRUPT_STAT, BTRS_MTL_ALL_IRQ_MASK); @@ -156,7 +156,7 @@ static int info_init_mtl(struct ivpu_device *vdev) hw->tile_fuse = BTRS_MTL_TILE_FUSE_ENABLE_BOTH; hw->sku = BTRS_MTL_TILE_SKU_BOTH; - hw->config = BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO; + hw->config = WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3); return 0; } @@ -334,8 +334,8 @@ int ivpu_hw_btrs_wp_drive(struct ivpu_device *vdev, bool enable) prepare_wp_request(vdev, &wp, enable); - ivpu_dbg(vdev, PM, "PLL workpoint request: %u Hz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n", - PLL_RATIO_TO_FREQ(wp.target), wp.cfg, wp.epp, wp.cdyn); + ivpu_dbg(vdev, PM, "PLL workpoint request: %lu MHz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n", + pll_ratio_to_dpu_freq(vdev, wp.target) / HZ_PER_MHZ, wp.cfg, wp.epp, wp.cdyn); ret = wp_request_send(vdev, &wp); if (ret) { @@ -573,6 +573,39 @@ int ivpu_hw_btrs_wait_for_idle(struct ivpu_device *vdev) return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US); } +static u32 pll_config_get_mtl(struct ivpu_device *vdev) +{ + return REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL); +} + +static u32 pll_config_get_lnl(struct ivpu_device *vdev) +{ + return REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ); +} + +static u32 pll_ratio_to_dpu_freq_mtl(u16 ratio) +{ + return (PLL_RATIO_TO_FREQ(ratio) * 2) / 3; +} + +static u32 pll_ratio_to_dpu_freq_lnl(u16 ratio) +{ + return PLL_RATIO_TO_FREQ(ratio) / 2; +} + +static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return pll_ratio_to_dpu_freq_mtl(ratio); + else + return pll_ratio_to_dpu_freq_lnl(ratio); +} + +u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev) +{ + return pll_ratio_to_dpu_freq(vdev, vdev->hw->pll.max_ratio); +} + /* Handler for IRQs from Buttress core (irqB) */ bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq) { @@ -582,9 +615,12 @@ bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq) if (!status) return false; - if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status)) - ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", - REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL)); + if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status)) { + u32 pll = pll_config_get_mtl(vdev); + + ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz", + pll, pll_ratio_to_dpu_freq_mtl(pll) / HZ_PER_MHZ); + } if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, ATS_ERR, status)) { ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(VPU_HW_BTRS_MTL_ATS_ERR_LOG_0)); @@ -633,8 +669,12 @@ bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq) queue_work(system_wq, &vdev->irq_dct_work); } - if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status)) - ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ)); + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status)) { + u32 pll = pll_config_get_lnl(vdev); + + ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz", + pll, pll_ratio_to_dpu_freq_lnl(pll) / HZ_PER_MHZ); + } if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, ATS_ERR, status)) { ivpu_err(vdev, "ATS_ERR LOG1 0x%08x ATS_ERR_LOG2 0x%08x\n", @@ -717,60 +757,6 @@ void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 acti REGB_WR32(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, val); } -static u32 pll_ratio_to_freq_mtl(u32 ratio, u32 config) -{ - u32 pll_clock = PLL_REF_CLK_FREQ * ratio; - u32 cpu_clock; - - if ((config & 0xff) == MTL_PLL_RATIO_4_3) - cpu_clock = pll_clock * 2 / 4; - else - cpu_clock = pll_clock * 2 / 5; - - return cpu_clock; -} - -u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio) -{ - struct ivpu_hw_info *hw = vdev->hw; - - if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) - return pll_ratio_to_freq_mtl(ratio, hw->config); - else - return PLL_RATIO_TO_FREQ(ratio); -} - -static u32 pll_freq_get_mtl(struct ivpu_device *vdev) -{ - u32 pll_curr_ratio; - - pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL); - pll_curr_ratio &= VPU_HW_BTRS_MTL_CURRENT_PLL_RATIO_MASK; - - if (!ivpu_is_silicon(vdev)) - return PLL_SIMULATION_FREQ; - - return pll_ratio_to_freq_mtl(pll_curr_ratio, vdev->hw->config); -} - -static u32 pll_freq_get_lnl(struct ivpu_device *vdev) -{ - u32 pll_curr_ratio; - - pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ); - pll_curr_ratio &= VPU_HW_BTRS_LNL_PLL_FREQ_RATIO_MASK; - - return PLL_RATIO_TO_FREQ(pll_curr_ratio); -} - -u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev) -{ - if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) - return pll_freq_get_mtl(vdev); - else - return pll_freq_get_lnl(vdev); -} - u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev) { if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.h b/drivers/accel/ivpu/ivpu_hw_btrs.h index 1fd71b4d4ab01a..a2f0877237e829 100644 --- a/drivers/accel/ivpu/ivpu_hw_btrs.h +++ b/drivers/accel/ivpu/ivpu_hw_btrs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #ifndef __IVPU_HW_BTRS_H__ @@ -13,7 +13,6 @@ #define PLL_PROFILING_FREQ_DEFAULT 38400000 #define PLL_PROFILING_FREQ_HIGH 400000000 -#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ) #define DCT_DEFAULT_ACTIVE_PERCENT 15u #define DCT_PERIOD_US 35300u @@ -32,12 +31,11 @@ int ivpu_hw_btrs_ip_reset(struct ivpu_device *vdev); void ivpu_hw_btrs_profiling_freq_reg_set_lnl(struct ivpu_device *vdev); void ivpu_hw_btrs_ats_print_lnl(struct ivpu_device *vdev); void ivpu_hw_btrs_clock_relinquish_disable_lnl(struct ivpu_device *vdev); +u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev); bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq); bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq); int ivpu_hw_btrs_dct_get_request(struct ivpu_device *vdev, bool *enable); void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 dct_percent); -u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev); -u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio); u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev); u32 ivpu_hw_btrs_telemetry_size_get(struct ivpu_device *vdev); u32 ivpu_hw_btrs_telemetry_enable_get(struct ivpu_device *vdev); diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index 746c43bd3eb698..2f24103f45339b 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #ifndef __UAPI_IVPU_DRM_H__ @@ -147,7 +147,7 @@ struct drm_ivpu_param { * platform type when executing on a simulator or emulator (read-only) * * %DRM_IVPU_PARAM_CORE_CLOCK_RATE: - * Current PLL frequency (read-only) + * Maximum frequency of the NPU data processing unit clock (read-only) * * %DRM_IVPU_PARAM_NUM_CONTEXTS: * Maximum number of simultaneously existing contexts (read-only) From 1524c28b995279db7f01abda7bf0fd26e47aefba Mon Sep 17 00:00:00 2001 From: Andrzej Kacprowski Date: Tue, 1 Apr 2025 17:59:12 +0200 Subject: [PATCH 0447/2924] accel/ivpu: Show NPU frequency in sysfs Add sysfs files that show maximum and current frequency of the NPU's data processing unit. New sysfs entries: - npu_max_frequency_mhz - npu_current_frequency_mhz Signed-off-by: Andrzej Kacprowski Signed-off-by: Maciej Falkowski Reviewed-by: Jacek Lawrynowicz Signed-off-by: Jacek Lawrynowicz Link: https://lore.kernel.org/r/20250401155912.4049340-3-maciej.falkowski@linux.intel.com --- drivers/accel/ivpu/ivpu_hw.h | 5 ++++ drivers/accel/ivpu/ivpu_hw_btrs.c | 8 +++++ drivers/accel/ivpu/ivpu_hw_btrs.h | 1 + drivers/accel/ivpu/ivpu_sysfs.c | 49 ++++++++++++++++++++++++++++++- 4 files changed, 62 insertions(+), 1 deletion(-) diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h index 6e67e736ef8e2a..d79668fe1609f6 100644 --- a/drivers/accel/ivpu/ivpu_hw.h +++ b/drivers/accel/ivpu/ivpu_hw.h @@ -87,6 +87,11 @@ static inline u32 ivpu_hw_dpu_max_freq_get(struct ivpu_device *vdev) return ivpu_hw_btrs_dpu_max_freq_get(vdev); } +static inline u32 ivpu_hw_dpu_freq_get(struct ivpu_device *vdev) +{ + return ivpu_hw_btrs_dpu_freq_get(vdev); +} + static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev) { ivpu_hw_ip_irq_clear(vdev); diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.c b/drivers/accel/ivpu/ivpu_hw_btrs.c index 91eb8a93c15b17..b236c7234daabb 100644 --- a/drivers/accel/ivpu/ivpu_hw_btrs.c +++ b/drivers/accel/ivpu/ivpu_hw_btrs.c @@ -606,6 +606,14 @@ u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev) return pll_ratio_to_dpu_freq(vdev, vdev->hw->pll.max_ratio); } +u32 ivpu_hw_btrs_dpu_freq_get(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return pll_ratio_to_dpu_freq_mtl(pll_config_get_mtl(vdev)); + else + return pll_ratio_to_dpu_freq_lnl(pll_config_get_lnl(vdev)); +} + /* Handler for IRQs from Buttress core (irqB) */ bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq) { diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.h b/drivers/accel/ivpu/ivpu_hw_btrs.h index a2f0877237e829..300f749971d4d6 100644 --- a/drivers/accel/ivpu/ivpu_hw_btrs.h +++ b/drivers/accel/ivpu/ivpu_hw_btrs.h @@ -32,6 +32,7 @@ void ivpu_hw_btrs_profiling_freq_reg_set_lnl(struct ivpu_device *vdev); void ivpu_hw_btrs_ats_print_lnl(struct ivpu_device *vdev); void ivpu_hw_btrs_clock_relinquish_disable_lnl(struct ivpu_device *vdev); u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev); +u32 ivpu_hw_btrs_dpu_freq_get(struct ivpu_device *vdev); bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq); bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq); int ivpu_hw_btrs_dct_get_request(struct ivpu_device *vdev, bool *enable); diff --git a/drivers/accel/ivpu/ivpu_sysfs.c b/drivers/accel/ivpu/ivpu_sysfs.c index 97102feaf8ddca..268ab7744a8bbb 100644 --- a/drivers/accel/ivpu/ivpu_sysfs.c +++ b/drivers/accel/ivpu/ivpu_sysfs.c @@ -1,10 +1,12 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation */ #include #include +#include +#include #include "ivpu_drv.h" #include "ivpu_gem.h" @@ -90,10 +92,55 @@ sched_mode_show(struct device *dev, struct device_attribute *attr, char *buf) static DEVICE_ATTR_RO(sched_mode); +/** + * DOC: npu_max_frequency + * + * The npu_max_frequency shows maximum frequency in MHz of the NPU's data + * processing unit + */ +static ssize_t +npu_max_frequency_mhz_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct ivpu_device *vdev = to_ivpu_device(drm); + u32 freq = ivpu_hw_dpu_max_freq_get(vdev); + + return sysfs_emit(buf, "%lu\n", freq / HZ_PER_MHZ); +} + +static DEVICE_ATTR_RO(npu_max_frequency_mhz); + +/** + * DOC: npu_current_frequency_mhz + * + * The npu_current_frequency_mhz shows current frequency in MHz of the NPU's + * data processing unit + */ +static ssize_t +npu_current_frequency_mhz_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct ivpu_device *vdev = to_ivpu_device(drm); + u32 freq = 0; + + /* Read frequency only if device is active, otherwise frequency is 0 */ + if (pm_runtime_get_if_active(vdev->drm.dev) > 0) { + freq = ivpu_hw_dpu_freq_get(vdev); + + pm_runtime_put_autosuspend(vdev->drm.dev); + } + + return sysfs_emit(buf, "%lu\n", freq / HZ_PER_MHZ); +} + +static DEVICE_ATTR_RO(npu_current_frequency_mhz); + static struct attribute *ivpu_dev_attrs[] = { &dev_attr_npu_busy_time_us.attr, &dev_attr_npu_memory_utilization.attr, &dev_attr_sched_mode.attr, + &dev_attr_npu_max_frequency_mhz.attr, + &dev_attr_npu_current_frequency_mhz.attr, NULL, }; From 31660b406d872b5ccb3c2ec6f932969809c35b18 Mon Sep 17 00:00:00 2001 From: Karol Wachowski Date: Tue, 1 Apr 2025 17:59:39 +0200 Subject: [PATCH 0448/2924] accel/ivpu: Add cmdq_id to job related logs Add tracking of command queue ID in JOB debug message to improve debugging capabilities. Signed-off-by: Karol Wachowski Signed-off-by: Maciej Falkowski Reviewed-by: Lizhi Hou Reviewed-by: Jeff Hugo Signed-off-by: Jacek Lawrynowicz Link: https://lore.kernel.org/r/20250401155939.4049467-1-maciej.falkowski@linux.intel.com --- drivers/accel/ivpu/ivpu_job.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index 004059e4f1e89d..863e3cd6ace51e 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -470,8 +470,8 @@ static void ivpu_job_destroy(struct ivpu_job *job) struct ivpu_device *vdev = job->vdev; u32 i; - ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d engine %d", - job->job_id, job->file_priv->ctx.id, job->engine_idx); + ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d cmdq_id %u engine %d", + job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx); for (i = 0; i < job->bo_count; i++) if (job->bos[i]) @@ -564,8 +564,8 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 dma_fence_signal(job->done_fence); trace_job("done", job); - ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n", - job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status); + ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d cmdq_id %u engine %d status 0x%x\n", + job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx, job_status); ivpu_job_destroy(job); ivpu_stop_job_timeout_detection(vdev); @@ -664,8 +664,8 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id) } trace_job("submit", job); - ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d prio %d addr 0x%llx next %d\n", - job->job_id, file_priv->ctx.id, job->engine_idx, cmdq->priority, + ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d cmdq_id %u engine %d prio %d addr 0x%llx next %d\n", + job->job_id, file_priv->ctx.id, cmdq->id, job->engine_idx, cmdq->priority, job->cmd_buf_vpu_addr, cmdq->jobq->header.tail); mutex_unlock(&file_priv->lock); @@ -777,7 +777,8 @@ static int ivpu_submit(struct drm_file *file, struct ivpu_file_priv *file_priv, goto err_free_handles; } - ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u buf_count %u\n", file_priv->ctx.id, buffer_count); + ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u cmdq_id %u buf_count %u\n", + file_priv->ctx.id, cmdq_id, buffer_count); job = ivpu_job_create(file_priv, engine, buffer_count); if (!job) { From 99deffc409b69000ac4877486e69ec6516becd53 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 1 Apr 2025 22:27:31 +0200 Subject: [PATCH 0449/2924] iommu/exynos: Fix suspend/resume with IDENTITY domain Commit bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe path") changed the sequence of probing the SYSMMU controller devices and calls to arm_iommu_attach_device(), what results in resuming SYSMMU controller earlier, when it is still set to IDENTITY mapping. Such change revealed the bug in IDENTITY handling in the exynos-iommu driver. When SYSMMU controller is set to IDENTITY mapping, data->domain is NULL, so adjust checks in suspend & resume callbacks to handle this case correctly. Fixes: b3d14960e629 ("iommu/exynos: Implement an IDENTITY domain") Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20250401202731.2810474-1-m.szyprowski@samsung.com Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 69e23e017d9e5f..317266aca6e28e 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -832,7 +832,7 @@ static int __maybe_unused exynos_sysmmu_suspend(struct device *dev) struct exynos_iommu_owner *owner = dev_iommu_priv_get(master); mutex_lock(&owner->rpm_lock); - if (&data->domain->domain != &exynos_identity_domain) { + if (data->domain) { dev_dbg(data->sysmmu, "saving state\n"); __sysmmu_disable(data); } @@ -850,7 +850,7 @@ static int __maybe_unused exynos_sysmmu_resume(struct device *dev) struct exynos_iommu_owner *owner = dev_iommu_priv_get(master); mutex_lock(&owner->rpm_lock); - if (&data->domain->domain != &exynos_identity_domain) { + if (data->domain) { dev_dbg(data->sysmmu, "restoring state\n"); __sysmmu_enable(data); } From 38e8844005e6068f336a3ad45451a562a0040ca1 Mon Sep 17 00:00:00 2001 From: Louis-Alexis Eyraud Date: Thu, 3 Apr 2025 12:22:12 +0200 Subject: [PATCH 0450/2924] iommu/mediatek: Fix NULL pointer deference in mtk_iommu_device_group Currently, mtk_iommu calls during probe iommu_device_register before the hw_list from driver data is initialized. Since iommu probing issue fix, it leads to NULL pointer dereference in mtk_iommu_device_group when hw_list is accessed with list_first_entry (not null safe). So, change the call order to ensure iommu_device_register is called after the driver data are initialized. Fixes: 9e3a2a643653 ("iommu/mediatek: Adapt sharing and non-sharing pgtable case") Fixes: bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe path") Reviewed-by: Yong Wu Tested-by: Chen-Yu Tsai # MT8183 Juniper, MT8186 Tentacruel Reviewed-by: AngeloGioacchino Del Regno Tested-by: AngeloGioacchino Del Regno Signed-off-by: Louis-Alexis Eyraud Link: https://lore.kernel.org/r/20250403-fix-mtk-iommu-error-v2-1-fe8b18f8b0a8@collabora.com Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 034b0e670384a2..df98d0c65f5469 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1372,15 +1372,6 @@ static int mtk_iommu_probe(struct platform_device *pdev) platform_set_drvdata(pdev, data); mutex_init(&data->mutex); - ret = iommu_device_sysfs_add(&data->iommu, dev, NULL, - "mtk-iommu.%pa", &ioaddr); - if (ret) - goto out_link_remove; - - ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev); - if (ret) - goto out_sysfs_remove; - if (MTK_IOMMU_HAS_FLAG(data->plat_data, SHARE_PGTABLE)) { list_add_tail(&data->list, data->plat_data->hw_list); data->hw_list = data->plat_data->hw_list; @@ -1390,19 +1381,28 @@ static int mtk_iommu_probe(struct platform_device *pdev) data->hw_list = &data->hw_list_head; } + ret = iommu_device_sysfs_add(&data->iommu, dev, NULL, + "mtk-iommu.%pa", &ioaddr); + if (ret) + goto out_list_del; + + ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev); + if (ret) + goto out_sysfs_remove; + if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) { ret = component_master_add_with_match(dev, &mtk_iommu_com_ops, match); if (ret) - goto out_list_del; + goto out_device_unregister; } return ret; -out_list_del: - list_del(&data->list); +out_device_unregister: iommu_device_unregister(&data->iommu); out_sysfs_remove: iommu_device_sysfs_remove(&data->iommu); -out_link_remove: +out_list_del: + list_del(&data->list); if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) device_link_remove(data->smicomm_dev, dev); out_runtime_disable: From ae4814a3aab54ce548950161937176b7f9ec6f77 Mon Sep 17 00:00:00 2001 From: Pei Xiao Date: Mon, 7 Apr 2025 09:53:28 +0800 Subject: [PATCH 0451/2924] iommu: remove unneeded semicolon cocci warnings: drivers/iommu/dma-iommu.c:1788:2-3: Unneeded semicolon so remove unneeded semicolon to fix cocci warnings. Signed-off-by: Pei Xiao Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/tencent_73EEE47E6ECCF538229C9B9E6A0272DA2B05@qq.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index cb7e29dcac15ac..a775e4dbe06f0d 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1754,7 +1754,7 @@ static size_t cookie_msi_granule(const struct iommu_domain *domain) return PAGE_SIZE; default: BUG(); - }; + } } static struct list_head *cookie_msi_pages(const struct iommu_domain *domain) @@ -1766,7 +1766,7 @@ static struct list_head *cookie_msi_pages(const struct iommu_domain *domain) return &domain->msi_cookie->msi_page_list; default: BUG(); - }; + } } static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, From 767e22001dfce64cc03b7def1562338591ab6031 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Mon, 7 Apr 2025 13:19:08 -0700 Subject: [PATCH 0452/2924] iommu/tegra241-cmdqv: Fix warnings due to dmam_free_coherent() Two WARNINGs are observed when SMMU driver rolls back upon failure: arm-smmu-v3.9.auto: Failed to register iommu arm-smmu-v3.9.auto: probe with driver arm-smmu-v3 failed with error -22 ------------[ cut here ]------------ WARNING: CPU: 5 PID: 1 at kernel/dma/mapping.c:74 dmam_free_coherent+0xc0/0xd8 Call trace: dmam_free_coherent+0xc0/0xd8 (P) tegra241_vintf_free_lvcmdq+0x74/0x188 tegra241_cmdqv_remove_vintf+0x60/0x148 tegra241_cmdqv_remove+0x48/0xc8 arm_smmu_impl_remove+0x28/0x60 devm_action_release+0x1c/0x40 ------------[ cut here ]------------ 128 pages are still in use! WARNING: CPU: 16 PID: 1 at mm/page_alloc.c:6902 free_contig_range+0x18c/0x1c8 Call trace: free_contig_range+0x18c/0x1c8 (P) cma_release+0x154/0x2f0 dma_free_contiguous+0x38/0xa0 dma_direct_free+0x10c/0x248 dma_free_attrs+0x100/0x290 dmam_free_coherent+0x78/0xd8 tegra241_vintf_free_lvcmdq+0x74/0x160 tegra241_cmdqv_remove+0x98/0x198 arm_smmu_impl_remove+0x28/0x60 devm_action_release+0x1c/0x40 This is because the LVCMDQ queue memory are managed by devres, while that dmam_free_coherent() is called in the context of devm_action_release(). Jason pointed out that "arm_smmu_impl_probe() has mis-ordered the devres callbacks if ops->device_remove() is going to be manually freeing things that probe allocated": https://lore.kernel.org/linux-iommu/20250407174408.GB1722458@nvidia.com/ In fact, tegra241_cmdqv_init_structures() only allocates memory resources which means any failure that it generates would be similar to -ENOMEM, so there is no point in having that "falling back to standard SMMU" routine, as the standard SMMU would likely fail to allocate memory too. Remove the unwind part in tegra241_cmdqv_init_structures(), and return a proper error code to ask SMMU driver to call tegra241_cmdqv_remove() via impl_ops->device_remove(). Then, drop tegra241_vintf_free_lvcmdq() since devres will take care of that. Fixes: 483e0bd8883a ("iommu/tegra241-cmdqv: Do not allocate vcmdq until dma_set_mask_and_coherent") Cc: stable@vger.kernel.org Suggested-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20250407201908.172225-1-nicolinc@nvidia.com Signed-off-by: Joerg Roedel --- .../iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 32 +++---------------- 1 file changed, 5 insertions(+), 27 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index d525ab43a4aebf..dd7d030d2e8909 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -487,17 +487,6 @@ static int tegra241_cmdqv_hw_reset(struct arm_smmu_device *smmu) /* VCMDQ Resource Helpers */ -static void tegra241_vcmdq_free_smmu_cmdq(struct tegra241_vcmdq *vcmdq) -{ - struct arm_smmu_queue *q = &vcmdq->cmdq.q; - size_t nents = 1 << q->llq.max_n_shift; - size_t qsz = nents << CMDQ_ENT_SZ_SHIFT; - - if (!q->base) - return; - dmam_free_coherent(vcmdq->cmdqv->smmu.dev, qsz, q->base, q->base_dma); -} - static int tegra241_vcmdq_alloc_smmu_cmdq(struct tegra241_vcmdq *vcmdq) { struct arm_smmu_device *smmu = &vcmdq->cmdqv->smmu; @@ -560,7 +549,8 @@ static void tegra241_vintf_free_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) struct tegra241_vcmdq *vcmdq = vintf->lvcmdqs[lidx]; char header[64]; - tegra241_vcmdq_free_smmu_cmdq(vcmdq); + /* Note that the lvcmdq queue memory space is managed by devres */ + tegra241_vintf_deinit_lvcmdq(vintf, lidx); dev_dbg(vintf->cmdqv->dev, @@ -768,13 +758,13 @@ static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu) vintf = kzalloc(sizeof(*vintf), GFP_KERNEL); if (!vintf) - goto out_fallback; + return -ENOMEM; /* Init VINTF0 for in-kernel use */ ret = tegra241_cmdqv_init_vintf(cmdqv, 0, vintf); if (ret) { dev_err(cmdqv->dev, "failed to init vintf0: %d\n", ret); - goto free_vintf; + return ret; } /* Preallocate logical VCMDQs to VINTF0 */ @@ -783,24 +773,12 @@ static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu) vcmdq = tegra241_vintf_alloc_lvcmdq(vintf, lidx); if (IS_ERR(vcmdq)) - goto free_lvcmdq; + return PTR_ERR(vcmdq); } /* Now, we are ready to run all the impl ops */ smmu->impl_ops = &tegra241_cmdqv_impl_ops; return 0; - -free_lvcmdq: - for (lidx--; lidx >= 0; lidx--) - tegra241_vintf_free_lvcmdq(vintf, lidx); - tegra241_cmdqv_deinit_vintf(cmdqv, vintf->idx); -free_vintf: - kfree(vintf); -out_fallback: - dev_info(smmu->impl_dev, "Falling back to standard SMMU CMDQ\n"); - smmu->options &= ~ARM_SMMU_OPT_TEGRA241_CMDQV; - tegra241_cmdqv_remove(smmu); - return 0; } #ifdef CONFIG_IOMMU_DEBUGFS From 4767af82a08ffaa5e55fe71febfa8cdef201b620 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Thu, 10 Apr 2025 19:17:21 +0200 Subject: [PATCH 0453/2924] landlock: Log the TGID of the domain creator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As for other Audit's "pid" fields, Landlock should use the task's TGID instead of its TID. Fix this issue by keeping a reference to the TGID of the domain creator. Existing tests already check for the PID but only with the thread group leader, so always the TGID. A following patch adds dedicated tests for non-leader thread. Remove the current_real_cred() check which does not make sense because we only reference a struct pid, whereas a previous version did reference a struct cred instead. Cc: Christian Brauner Cc: Paul Moore Reviewed-by: Günther Noack Link: https://lore.kernel.org/r/20250410171725.1265860-1-mic@digikod.net Signed-off-by: Mickaël Salaün --- security/landlock/domain.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/landlock/domain.c b/security/landlock/domain.c index bae2e99090131f..a647b68e8d060a 100644 --- a/security/landlock/domain.c +++ b/security/landlock/domain.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "access.h" @@ -99,8 +100,7 @@ static struct landlock_details *get_current_details(void) return ERR_PTR(-ENOMEM); memcpy(details->exe_path, path_str, path_size); - WARN_ON_ONCE(current_cred() != current_real_cred()); - details->pid = get_pid(task_pid(current)); + details->pid = get_pid(task_tgid(current)); details->uid = from_kuid(&init_user_ns, current_uid()); get_task_comm(details->comm, current); return details; From e4a0f9e0cacd93094b619616426a273e0bc9107e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Thu, 10 Apr 2025 19:17:22 +0200 Subject: [PATCH 0454/2924] selftests/landlock: Factor out audit fixture in audit_test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The audit fixture needlessly stores and manages domain_stack. Move it to the audit.layers tests. This will be useful to reuse the audit fixture with the next patch. Cc: Günther Noack Link: https://lore.kernel.org/r/20250410171725.1265860-2-mic@digikod.net Signed-off-by: Mickaël Salaün --- tools/testing/selftests/landlock/audit_test.c | 27 +++++++++---------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c index a0643070c403de..815c0f03e1fbb9 100644 --- a/tools/testing/selftests/landlock/audit_test.c +++ b/tools/testing/selftests/landlock/audit_test.c @@ -40,7 +40,6 @@ FIXTURE(audit) { struct audit_filter audit_filter; int audit_fd; - __u64(*domain_stack)[16]; }; FIXTURE_SETUP(audit) @@ -60,18 +59,10 @@ FIXTURE_SETUP(audit) TH_LOG("Failed to initialize audit: %s", error_msg); } clear_cap(_metadata, CAP_AUDIT_CONTROL); - - self->domain_stack = mmap(NULL, sizeof(*self->domain_stack), - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS, -1, 0); - ASSERT_NE(MAP_FAILED, self->domain_stack); - memset(self->domain_stack, 0, sizeof(*self->domain_stack)); } FIXTURE_TEARDOWN(audit) { - EXPECT_EQ(0, munmap(self->domain_stack, sizeof(*self->domain_stack))); - set_cap(_metadata, CAP_AUDIT_CONTROL); EXPECT_EQ(0, audit_cleanup(self->audit_fd, &self->audit_filter)); clear_cap(_metadata, CAP_AUDIT_CONTROL); @@ -83,9 +74,15 @@ TEST_F(audit, layers) .scoped = LANDLOCK_SCOPE_SIGNAL, }; int status, ruleset_fd, i; + __u64(*domain_stack)[16]; __u64 prev_dom = 3; pid_t child; + domain_stack = mmap(NULL, sizeof(*domain_stack), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(MAP_FAILED, domain_stack); + memset(domain_stack, 0, sizeof(*domain_stack)); + ruleset_fd = landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); ASSERT_LE(0, ruleset_fd); @@ -94,7 +91,7 @@ TEST_F(audit, layers) child = fork(); ASSERT_LE(0, child); if (child == 0) { - for (i = 0; i < ARRAY_SIZE(*self->domain_stack); i++) { + for (i = 0; i < ARRAY_SIZE(*domain_stack); i++) { __u64 denial_dom = 1; __u64 allocated_dom = 2; @@ -115,7 +112,7 @@ TEST_F(audit, layers) /* Checks that the new domain is younger than the previous one. */ EXPECT_GT(allocated_dom, prev_dom); prev_dom = allocated_dom; - (*self->domain_stack)[i] = allocated_dom; + (*domain_stack)[i] = allocated_dom; } /* Checks that we reached the maximum number of layers. */ @@ -142,20 +139,20 @@ TEST_F(audit, layers) /* Purges log from deallocated domains. */ EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, &audit_tv_dom_drop, sizeof(audit_tv_dom_drop))); - for (i = ARRAY_SIZE(*self->domain_stack) - 1; i >= 0; i--) { + for (i = ARRAY_SIZE(*domain_stack) - 1; i >= 0; i--) { __u64 deallocated_dom = 2; EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1, &deallocated_dom)); - EXPECT_EQ((*self->domain_stack)[i], deallocated_dom) + EXPECT_EQ((*domain_stack)[i], deallocated_dom) { TH_LOG("Failed to match domain %llx (#%d)", - (*self->domain_stack)[i], i); + (*domain_stack)[i], i); } } + EXPECT_EQ(0, munmap(domain_stack, sizeof(*domain_stack))); EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, &audit_tv_default, sizeof(audit_tv_default))); - EXPECT_EQ(0, close(ruleset_fd)); } From 6b4566400a2919e6c1137404c53d7cf1ada559aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Thu, 10 Apr 2025 19:17:23 +0200 Subject: [PATCH 0455/2924] selftests/landlock: Add PID tests for audit records MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add audit.thread tests to check that the PID tied to a domain is not a thread ID but the thread group ID. These new tests would not pass without the previous TGID fix. Extend matches_log_domain_allocated() to check against the PID that created the domain. Test coverage for security/landlock is 93.6% of 1524 lines according to gcc/gcov-14. Cc: Christian Brauner Cc: Günther Noack Cc: Paul Moore Link: https://lore.kernel.org/r/20250410171725.1265860-3-mic@digikod.net Signed-off-by: Mickaël Salaün --- tools/testing/selftests/landlock/audit.h | 21 ++- tools/testing/selftests/landlock/audit_test.c | 127 +++++++++++++++++- tools/testing/selftests/landlock/fs_test.c | 3 +- 3 files changed, 141 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/landlock/audit.h b/tools/testing/selftests/landlock/audit.h index b9054086a0c929..18a6014920b5f8 100644 --- a/tools/testing/selftests/landlock/audit.h +++ b/tools/testing/selftests/landlock/audit.h @@ -300,15 +300,22 @@ static int audit_match_record(int audit_fd, const __u16 type, return err; } -static int __maybe_unused matches_log_domain_allocated(int audit_fd, +static int __maybe_unused matches_log_domain_allocated(int audit_fd, pid_t pid, __u64 *domain_id) { - return audit_match_record( - audit_fd, AUDIT_LANDLOCK_DOMAIN, - REGEX_LANDLOCK_PREFIX - " status=allocated mode=enforcing pid=[0-9]\\+ uid=[0-9]\\+" - " exe=\"[^\"]\\+\" comm=\".*_test\"$", - domain_id); + static const char log_template[] = REGEX_LANDLOCK_PREFIX + " status=allocated mode=enforcing pid=%d uid=[0-9]\\+" + " exe=\"[^\"]\\+\" comm=\".*_test\"$"; + char log_match[sizeof(log_template) + 10]; + int log_match_len; + + log_match_len = + snprintf(log_match, sizeof(log_match), log_template, pid); + if (log_match_len > sizeof(log_match)) + return -E2BIG; + + return audit_match_record(audit_fd, AUDIT_LANDLOCK_DOMAIN, log_match, + domain_id); } static int __maybe_unused matches_log_domain_deallocated( diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c index 815c0f03e1fbb9..cfc571afd0eb81 100644 --- a/tools/testing/selftests/landlock/audit_test.c +++ b/tools/testing/selftests/landlock/audit_test.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -104,7 +105,8 @@ TEST_F(audit, layers) matches_log_signal(_metadata, self->audit_fd, getppid(), &denial_dom)); EXPECT_EQ(0, matches_log_domain_allocated( - self->audit_fd, &allocated_dom)); + self->audit_fd, getpid(), + &allocated_dom)); EXPECT_NE(denial_dom, 1); EXPECT_NE(denial_dom, 0); EXPECT_EQ(denial_dom, allocated_dom); @@ -156,6 +158,126 @@ TEST_F(audit, layers) EXPECT_EQ(0, close(ruleset_fd)); } +struct thread_data { + pid_t parent_pid; + int ruleset_fd, pipe_child, pipe_parent; +}; + +static void *thread_audit_test(void *arg) +{ + const struct thread_data *data = (struct thread_data *)arg; + uintptr_t err = 0; + char buffer; + + /* TGID and TID are different for a second thread. */ + if (getpid() == gettid()) { + err = 1; + goto out; + } + + if (landlock_restrict_self(data->ruleset_fd, 0)) { + err = 2; + goto out; + } + + if (close(data->ruleset_fd)) { + err = 3; + goto out; + } + + /* Creates a denial to get the domain ID. */ + if (kill(data->parent_pid, 0) != -1) { + err = 4; + goto out; + } + + if (EPERM != errno) { + err = 5; + goto out; + } + + /* Signals the parent to read denial logs. */ + if (write(data->pipe_child, ".", 1) != 1) { + err = 6; + goto out; + } + + /* Waits for the parent to update audit filters. */ + if (read(data->pipe_parent, &buffer, 1) != 1) { + err = 7; + goto out; + } + +out: + close(data->pipe_child); + close(data->pipe_parent); + return (void *)err; +} + +/* Checks that the PID tied to a domain is not a TID but the TGID. */ +TEST_F(audit, thread) +{ + const struct landlock_ruleset_attr ruleset_attr = { + .scoped = LANDLOCK_SCOPE_SIGNAL, + }; + __u64 denial_dom = 1; + __u64 allocated_dom = 2; + __u64 deallocated_dom = 3; + pthread_t thread; + int pipe_child[2], pipe_parent[2]; + char buffer; + struct thread_data child_data; + + child_data.parent_pid = getppid(); + ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC)); + child_data.pipe_child = pipe_child[1]; + ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC)); + child_data.pipe_parent = pipe_parent[0]; + child_data.ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, child_data.ruleset_fd); + + /* TGID and TID are the same for the initial thread . */ + EXPECT_EQ(getpid(), gettid()); + EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + ASSERT_EQ(0, pthread_create(&thread, NULL, thread_audit_test, + &child_data)); + + /* Waits for the child to generate a denial. */ + ASSERT_EQ(1, read(pipe_child[0], &buffer, 1)); + EXPECT_EQ(0, close(pipe_child[0])); + + /* Matches the signal log to get the domain ID. */ + EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd, + child_data.parent_pid, &denial_dom)); + EXPECT_NE(denial_dom, 1); + EXPECT_NE(denial_dom, 0); + + EXPECT_EQ(0, matches_log_domain_allocated(self->audit_fd, getpid(), + &allocated_dom)); + EXPECT_EQ(denial_dom, allocated_dom); + + /* Updates filter rules to match the drop record. */ + set_cap(_metadata, CAP_AUDIT_CONTROL); + EXPECT_EQ(0, audit_filter_drop(self->audit_fd, AUDIT_ADD_RULE)); + EXPECT_EQ(0, audit_filter_exe(self->audit_fd, &self->audit_filter, + AUDIT_DEL_RULE)); + clear_cap(_metadata, CAP_AUDIT_CONTROL); + + /* Signals the thread to exit, which will generate a domain deallocation. */ + ASSERT_EQ(1, write(pipe_parent[1], ".", 1)); + EXPECT_EQ(0, close(pipe_parent[1])); + ASSERT_EQ(0, pthread_join(thread, NULL)); + + EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, + &audit_tv_dom_drop, sizeof(audit_tv_dom_drop))); + EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1, + &deallocated_dom)); + EXPECT_EQ(denial_dom, deallocated_dom); + EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, + &audit_tv_default, sizeof(audit_tv_default))); +} + FIXTURE(audit_flags) { struct audit_filter audit_filter; @@ -270,7 +392,8 @@ TEST_F(audit_flags, signal) /* Checks domain information records. */ EXPECT_EQ(0, matches_log_domain_allocated( - self->audit_fd, &allocated_dom)); + self->audit_fd, getpid(), + &allocated_dom)); EXPECT_NE(*self->domain_id, 1); EXPECT_NE(*self->domain_id, 0); EXPECT_EQ(*self->domain_id, allocated_dom); diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index f819011a87986e..73729382d40f82 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -5964,7 +5964,8 @@ TEST_F(audit_layout1, refer_handled) EXPECT_EQ(EXDEV, errno); EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer", dir_s1d1)); - EXPECT_EQ(0, matches_log_domain_allocated(self->audit_fd, NULL)); + EXPECT_EQ(0, + matches_log_domain_allocated(self->audit_fd, getpid(), NULL)); EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer", dir_s1d3)); From e153fdea9db04dd0e2e536e2eb125b16bbbc2af7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 20 Mar 2025 16:15:42 +0100 Subject: [PATCH 0456/2924] phy: can-transceiver: Re-instate "mux-states" property presence check On the Renesas Gray Hawk Single development board: can-transceiver-phy can-phy0: /can-phy0: failed to get mux-state (0) "mux-states" is an optional property for CAN transceivers. However, mux_get() always prints an error message in case of an error, including when the property is not present, confusing the user. Fix this by re-instating the property presence check (this time using the proper API) in a wrapper around devm_mux_state_get(). When the multiplexer subsystem gains support for optional muxes, the wrapper can just be removed. In addition, propagate all real errors upstream, instead of ignoring them. Fixes: d02dfd4ceb2e9f34 ("phy: can-transceiver: Drop unnecessary "mux-states" property presence check") Signed-off-by: Geert Uytterhoeven Tested-by: Biju Das Reviewed-by: Biju Das Reviewed-by: Vincent Mailhol Link: https://lore.kernel.org/r/3d7e0d723908284e8cf06ad1f7950c03173178f3.1742483710.git.geert+renesas@glider.be Signed-off-by: Vinod Koul --- drivers/phy/phy-can-transceiver.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/phy/phy-can-transceiver.c b/drivers/phy/phy-can-transceiver.c index 2bec70615449f9..f59caff4b3d4c2 100644 --- a/drivers/phy/phy-can-transceiver.c +++ b/drivers/phy/phy-can-transceiver.c @@ -93,6 +93,16 @@ static const struct of_device_id can_transceiver_phy_ids[] = { }; MODULE_DEVICE_TABLE(of, can_transceiver_phy_ids); +/* Temporary wrapper until the multiplexer subsystem supports optional muxes */ +static inline struct mux_state * +devm_mux_state_get_optional(struct device *dev, const char *mux_name) +{ + if (!of_property_present(dev->of_node, "mux-states")) + return NULL; + + return devm_mux_state_get(dev, mux_name); +} + static int can_transceiver_phy_probe(struct platform_device *pdev) { struct phy_provider *phy_provider; @@ -114,13 +124,11 @@ static int can_transceiver_phy_probe(struct platform_device *pdev) match = of_match_node(can_transceiver_phy_ids, pdev->dev.of_node); drvdata = match->data; - mux_state = devm_mux_state_get(dev, NULL); - if (IS_ERR(mux_state)) { - if (PTR_ERR(mux_state) == -EPROBE_DEFER) - return PTR_ERR(mux_state); - } else { - can_transceiver_phy->mux_state = mux_state; - } + mux_state = devm_mux_state_get_optional(dev, NULL); + if (IS_ERR(mux_state)) + return PTR_ERR(mux_state); + + can_transceiver_phy->mux_state = mux_state; phy = devm_phy_create(dev, dev->of_node, &can_transceiver_phy_ops); From 9cf118aafd6682793c40dde31b5f24d271da3996 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 21 Mar 2025 17:36:14 +0300 Subject: [PATCH 0457/2924] phy: rockchip-samsung-dcphy: Add missing assignment The "ret = " was accidentally dropped so the error handling doesn't work. Fixes: b2a1a2ae7818 ("phy: rockchip: Add Samsung MIPI D-/C-PHY driver") Signed-off-by: Dan Carpenter Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/e64265a4-9543-4728-a49f-ea910fccef7c@stanley.mountain Signed-off-by: Vinod Koul --- drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c b/drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c index 08c78c1bafc9a2..28a052e1736651 100644 --- a/drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c +++ b/drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c @@ -1653,7 +1653,7 @@ static __maybe_unused int samsung_mipi_dcphy_runtime_resume(struct device *dev) return ret; } - clk_prepare_enable(samsung->ref_clk); + ret = clk_prepare_enable(samsung->ref_clk); if (ret) { dev_err(samsung->dev, "Failed to enable reference clock, %d\n", ret); clk_disable_unprepare(samsung->pclk); From d27326a9999286fa45ad063f760e63329254f130 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 8 Apr 2025 14:01:26 +0300 Subject: [PATCH 0458/2924] dma-buf/sw_sync: Decrement refcount on error in sw_sync_ioctl_get_deadline() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Call dma_fence_put(fence) before returning an error if dma_fence_to_sync_pt() fails. Use an unwind ladder at the end of the function to do the cleanup. Fixes: 70e67aaec2f4 ("dma-buf/sw_sync: Add fence deadline support") Signed-off-by: Dan Carpenter Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/a010a1ac-107b-4fc0-a052-9fd3706ad690@stanley.mountain Signed-off-by: Christian König --- drivers/dma-buf/sw_sync.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index f5905d67dedbbb..22a808995f106f 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -438,15 +438,17 @@ static int sw_sync_ioctl_get_deadline(struct sync_timeline *obj, unsigned long a return -EINVAL; pt = dma_fence_to_sync_pt(fence); - if (!pt) - return -EINVAL; + if (!pt) { + ret = -EINVAL; + goto put_fence; + } spin_lock_irqsave(fence->lock, flags); - if (test_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags)) { - data.deadline_ns = ktime_to_ns(pt->deadline); - } else { + if (!test_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags)) { ret = -ENOENT; + goto unlock; } + data.deadline_ns = ktime_to_ns(pt->deadline); spin_unlock_irqrestore(fence->lock, flags); dma_fence_put(fence); @@ -458,6 +460,13 @@ static int sw_sync_ioctl_get_deadline(struct sync_timeline *obj, unsigned long a return -EFAULT; return 0; + +unlock: + spin_unlock_irqrestore(fence->lock, flags); +put_fence: + dma_fence_put(fence); + + return ret; } static long sw_sync_ioctl(struct file *file, unsigned int cmd, From af1352f82729d742506715176c15065a6b583167 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 10 Apr 2025 18:18:23 +0300 Subject: [PATCH 0459/2924] Revert "xhci: Avoid queuing redundant Stop Endpoint command for stalled endpoint" This reverts commit 0c74d232578b1a7071e0312312811cb75b26b202. Paul Menzel reported that the two EP_STALLED patches in 6.15-rc1 cause regression. Turns out that the new flag may never get cleared after reset-resume, preventing xhci from restarting the endpoint. Revert this to take a proper look at it. Link: https://lore.kernel.org/linux-usb/84b400f8-2943-44e0-8803-f3aac3b670af@molgen.mpg.de cc: Paul Menzel cc: Michal Pecio Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250410151828.2868740-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 0452b8d65832b9..6370874bf2656a 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1770,8 +1770,8 @@ static int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) goto done; } - /* In these cases no commands are pending but the endpoint is stopped */ - if (ep->ep_state & (EP_CLEARING_TT | EP_STALLED)) { + /* In this case no commands are pending but the endpoint is stopped */ + if (ep->ep_state & EP_CLEARING_TT) { /* and cancelled TDs can be given back right away */ xhci_dbg(xhci, "Invalidating TDs instantly on slot %d ep %d in state 0x%x\n", urb->dev->slot_id, ep_index, ep->ep_state); @@ -3208,12 +3208,10 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd, return; ep = &vdev->eps[ep_index]; - - spin_lock_irqsave(&xhci->lock, flags); - ep->ep_state &= ~EP_STALLED; /* Bail out if toggle is already being cleared by a endpoint reset */ + spin_lock_irqsave(&xhci->lock, flags); if (ep->ep_state & EP_HARD_CLEAR_TOGGLE) { ep->ep_state &= ~EP_HARD_CLEAR_TOGGLE; spin_unlock_irqrestore(&xhci->lock, flags); From b513cc1905bb360f48be281a1ded272131a8227a Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 10 Apr 2025 18:18:24 +0300 Subject: [PATCH 0460/2924] Revert "xhci: Prevent early endpoint restart when handling STALL errors." This reverts commit 860f5d0d3594005d4588240028f42e8d2bfc725b. Paul Menzel reported that the two EP_STALLED patches in 6.15-rc1 cause regression. Turns out that the new flag may never get cleared after reset-resume, preventing xhci from restarting the endpoint. Revert this to take a proper look at it. Link: https://lore.kernel.org/linux-usb/84b400f8-2943-44e0-8803-f3aac3b670af@molgen.mpg.de cc: Paul Menzel cc: Michal Pecio Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250410151828.2868740-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 7 ++----- drivers/usb/host/xhci.c | 6 ------ drivers/usb/host/xhci.h | 3 +-- 3 files changed, 3 insertions(+), 13 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 5d64c297721cd9..94a2ae2c52e2a2 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -561,8 +561,8 @@ void xhci_ring_ep_doorbell(struct xhci_hcd *xhci, * pointer command pending because the device can choose to start any * stream once the endpoint is on the HW schedule. */ - if (ep_state & (EP_STOP_CMD_PENDING | SET_DEQ_PENDING | EP_HALTED | - EP_CLEARING_TT | EP_STALLED)) + if ((ep_state & EP_STOP_CMD_PENDING) || (ep_state & SET_DEQ_PENDING) || + (ep_state & EP_HALTED) || (ep_state & EP_CLEARING_TT)) return; trace_xhci_ring_ep_doorbell(slot_id, DB_VALUE(ep_index, stream_id)); @@ -2573,9 +2573,6 @@ static void process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, xhci_handle_halted_endpoint(xhci, ep, td, EP_SOFT_RESET); return; - case COMP_STALL_ERROR: - ep->ep_state |= EP_STALLED; - break; default: /* do nothing */ break; diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 6370874bf2656a..ca390beda85b56 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1605,11 +1605,6 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag goto free_priv; } - /* Class driver might not be aware ep halted due to async URB giveback */ - if (*ep_state & EP_STALLED) - dev_dbg(&urb->dev->dev, "URB %p queued before clearing halt\n", - urb); - switch (usb_endpoint_type(&urb->ep->desc)) { case USB_ENDPOINT_XFER_CONTROL: @@ -3208,7 +3203,6 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd, return; ep = &vdev->eps[ep_index]; - ep->ep_state &= ~EP_STALLED; /* Bail out if toggle is already being cleared by a endpoint reset */ spin_lock_irqsave(&xhci->lock, flags); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 37860f1e3abac4..28b6264f8b875a 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -664,7 +664,7 @@ struct xhci_virt_ep { unsigned int err_count; unsigned int ep_state; #define SET_DEQ_PENDING (1 << 0) -#define EP_HALTED (1 << 1) /* Halted host ep handling */ +#define EP_HALTED (1 << 1) /* For stall handling */ #define EP_STOP_CMD_PENDING (1 << 2) /* For URB cancellation */ /* Transitioning the endpoint to using streams, don't enqueue URBs */ #define EP_GETTING_STREAMS (1 << 3) @@ -675,7 +675,6 @@ struct xhci_virt_ep { #define EP_SOFT_CLEAR_TOGGLE (1 << 7) /* usb_hub_clear_tt_buffer is in progress */ #define EP_CLEARING_TT (1 << 8) -#define EP_STALLED (1 << 9) /* For stall handling */ /* ---- Related to URB cancellation ---- */ struct list_head cancelled_td_list; struct xhci_hcd *xhci; From 9e3a28793d2fde7a709e814d2504652eaba6ae98 Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Thu, 10 Apr 2025 18:18:25 +0300 Subject: [PATCH 0461/2924] usb: xhci: Fix Short Packet handling rework ignoring errors A Short Packet event before the last TRB of a TD is followed by another event on the final TRB on spec-compliant HCs, which is most of them. A 'last_td_was_short' flag was added to know if a TD has just completed as Short Packet and another event is to come. The flag was cleared after seeing the event (unless no TDs are pending, but that's a separate bug) or seeing a new TD complete as something other than Short Packet. A rework replaced the flag with an 'old_trb_comp_code' variable. When an event doesn't match the pending TD and the previous event was Short Packet, the new event is silently ignored. To preserve old behavior, 'old_trb_comp_code' should be cleared at this point, but instead it is being set to current comp code, which is often Short Packet again. This can cause more events to be silently ignored, even though they are no longer connected with the old TD that completed short and indicate a serious problem with the driver or the xHC. Common device classes like UAC in async mode, UVC, serial or the UAS status pipe complete as Short Packet routinely and could be affected. Clear 'old_trb_comp_code' to zero, which is an invalid completion code and the same value the variable starts with. This restores original behavior on Short Packet and also works for illegal Etron events, which the code has been extended to cover too. Fixes: b331a3d8097f ("xhci: Handle spurious events on Etron host isoc enpoints") Signed-off-by: Michal Pecio Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250410151828.2868740-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 94a2ae2c52e2a2..4e975caca2359e 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2913,7 +2913,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, if (xhci_spurious_success_tx_event(xhci, ep_ring)) { xhci_dbg(xhci, "Spurious event dma %pad, comp_code %u after %u\n", &ep_trb_dma, trb_comp_code, ep_ring->old_trb_comp_code); - ep_ring->old_trb_comp_code = trb_comp_code; + ep_ring->old_trb_comp_code = 0; return 0; } From 1ea050da5562af9b930d17cbbe9632d30f5df43a Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Thu, 10 Apr 2025 18:18:26 +0300 Subject: [PATCH 0462/2924] usb: xhci: Fix invalid pointer dereference in Etron workaround This check is performed before prepare_transfer() and prepare_ring(), so enqueue can already point at the final link TRB of a segment. And indeed it will, some 0.4% of times this code is called. Then enqueue + 1 is an invalid pointer. It will crash the kernel right away or load some junk which may look like a link TRB and cause the real link TRB to be replaced with a NOOP. This wouldn't end well. Use a functionally equivalent test which doesn't dereference the pointer and always gives correct result. Something has crashed my machine twice in recent days while playing with an Etron HC, and a control transfer stress test ran for confirmation has just crashed it again. The same test passes with this patch applied. Fixes: 5e1c67abc930 ("xhci: Fix control transfer error on Etron xHCI host") Cc: stable@vger.kernel.org Signed-off-by: Michal Pecio Signed-off-by: Mathias Nyman Reviewed-by: Kuangyi Chiang Link: https://lore.kernel.org/r/20250410151828.2868740-5-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 4e975caca2359e..b906bc2eea5f6f 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -3777,7 +3777,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags, * enqueue a No Op TRB, this can prevent the Setup and Data Stage * TRB to be breaked by the Link TRB. */ - if (trb_is_link(ep_ring->enqueue + 1)) { + if (last_trb_on_seg(ep_ring->enq_seg, ep_ring->enqueue + 1)) { field = TRB_TYPE(TRB_TR_NOOP) | ep_ring->cycle_state; queue_trb(xhci, ep_ring, false, 0, 0, TRB_INTR_TARGET(0), field); From bea5892d0ed274e03655223d1977cf59f9aff2f2 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 10 Apr 2025 18:18:27 +0300 Subject: [PATCH 0463/2924] xhci: Limit time spent with xHC interrupts disabled during bus resume Current xhci bus resume implementation prevents xHC host from generating interrupts during high-speed USB 2 and super-speed USB 3 bus resume. Only reason to disable interrupts during bus resume would be to prevent the interrupt handler from interfering with the resume process of USB 2 ports. Host initiated resume of USB 2 ports is done in two stages. The xhci driver first transitions the port from 'U3' to 'Resume' state, then wait in Resume for 20ms, and finally moves port to U0 state. xhci driver can't prevent interrupts by keeping the xhci spinlock due to this 20ms sleep. Limit interrupt disabling to the USB 2 port resume case only. resuming USB 2 ports in bus resume is only done in special cases where USB 2 ports had to be forced to suspend during bus suspend. The current way of preventing interrupts by clearing the 'Interrupt Enable' (INTE) bit in USBCMD register won't prevent the Interrupter registers 'Interrupt Pending' (IP), 'Event Handler Busy' (EHB) and USBSTS register Event Interrupt (EINT) bits from being set. New interrupts can't be issued before those bits are properly clered. Disable interrupts by clearing the interrupter register 'Interrupt Enable' (IE) bit instead. This way IP, EHB and INTE won't be set before IE is enabled again and a new interrupt is triggered. Reported-by: Devyn Liu Closes: https://lore.kernel.org/linux-usb/b1a9e2d51b4d4ff7a304f77c5be8164e@huawei.com/ Cc: stable@vger.kernel.org Tested-by: Devyn Liu Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250410151828.2868740-6-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 30 ++++++++++++++++-------------- drivers/usb/host/xhci.c | 4 ++-- drivers/usb/host/xhci.h | 2 ++ 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index c0f226584a408d..486347776cb29a 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1878,9 +1878,10 @@ int xhci_bus_resume(struct usb_hcd *hcd) int max_ports, port_index; int sret; u32 next_state; - u32 temp, portsc; + u32 portsc; struct xhci_hub *rhub; struct xhci_port **ports; + bool disabled_irq = false; rhub = xhci_get_rhub(hcd); ports = rhub->ports; @@ -1896,17 +1897,20 @@ int xhci_bus_resume(struct usb_hcd *hcd) return -ESHUTDOWN; } - /* delay the irqs */ - temp = readl(&xhci->op_regs->command); - temp &= ~CMD_EIE; - writel(temp, &xhci->op_regs->command); - /* bus specific resume for ports we suspended at bus_suspend */ - if (hcd->speed >= HCD_USB3) + if (hcd->speed >= HCD_USB3) { next_state = XDEV_U0; - else + } else { next_state = XDEV_RESUME; - + if (bus_state->bus_suspended) { + /* + * prevent port event interrupts from interfering + * with usb2 port resume process + */ + xhci_disable_interrupter(xhci->interrupters[0]); + disabled_irq = true; + } + } port_index = max_ports; while (port_index--) { portsc = readl(ports[port_index]->addr); @@ -1974,11 +1978,9 @@ int xhci_bus_resume(struct usb_hcd *hcd) (void) readl(&xhci->op_regs->command); bus_state->next_statechange = jiffies + msecs_to_jiffies(5); - /* re-enable irqs */ - temp = readl(&xhci->op_regs->command); - temp |= CMD_EIE; - writel(temp, &xhci->op_regs->command); - temp = readl(&xhci->op_regs->command); + /* re-enable interrupter */ + if (disabled_irq) + xhci_enable_interrupter(xhci->interrupters[0]); spin_unlock_irqrestore(&xhci->lock, flags); return 0; diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index ca390beda85b56..90eb491267b584 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -322,7 +322,7 @@ static void xhci_zero_64b_regs(struct xhci_hcd *xhci) xhci_info(xhci, "Fault detected\n"); } -static int xhci_enable_interrupter(struct xhci_interrupter *ir) +int xhci_enable_interrupter(struct xhci_interrupter *ir) { u32 iman; @@ -335,7 +335,7 @@ static int xhci_enable_interrupter(struct xhci_interrupter *ir) return 0; } -static int xhci_disable_interrupter(struct xhci_interrupter *ir) +int xhci_disable_interrupter(struct xhci_interrupter *ir) { u32 iman; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 28b6264f8b875a..242ab9fbc8ae6b 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1890,6 +1890,8 @@ int xhci_alloc_tt_info(struct xhci_hcd *xhci, struct usb_tt *tt, gfp_t mem_flags); int xhci_set_interrupter_moderation(struct xhci_interrupter *ir, u32 imod_interval); +int xhci_enable_interrupter(struct xhci_interrupter *ir); +int xhci_disable_interrupter(struct xhci_interrupter *ir); /* xHCI ring, segment, TRB, and TD functions */ dma_addr_t xhci_trb_virt_to_dma(struct xhci_segment *seg, union xhci_trb *trb); From 6907e8093b3070d877ee607e5ceede60cfd08bde Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Fri, 11 Apr 2025 12:22:39 +0100 Subject: [PATCH 0464/2924] nvmem: rockchip-otp: Move read-offset into variant-data The RK3588 has an offset into the OTP area where the readable area begins and automatically adds this to the start address. Other variants are very much similar to rk3588, just with a different offset, so move that value into variant-data. To match the size in bytes, store this value also in bytes and not in number of blocks. Signed-off-by: Heiko Stuebner Tested-by: Nicolas Frattaroli Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20250411112251.68002-2-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/rockchip-otp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nvmem/rockchip-otp.c b/drivers/nvmem/rockchip-otp.c index ebc3f0b24166bc..3edfbfc2d7220f 100644 --- a/drivers/nvmem/rockchip-otp.c +++ b/drivers/nvmem/rockchip-otp.c @@ -59,7 +59,6 @@ #define RK3588_OTPC_AUTO_EN 0x08 #define RK3588_OTPC_INT_ST 0x84 #define RK3588_OTPC_DOUT0 0x20 -#define RK3588_NO_SECURE_OFFSET 0x300 #define RK3588_NBYTES 4 #define RK3588_BURST_NUM 1 #define RK3588_BURST_SHIFT 8 @@ -69,6 +68,7 @@ struct rockchip_data { int size; + int read_offset; const char * const *clks; int num_clks; nvmem_reg_read_t reg_read; @@ -196,7 +196,7 @@ static int rk3588_otp_read(void *context, unsigned int offset, addr_start = round_down(offset, RK3588_NBYTES) / RK3588_NBYTES; addr_end = round_up(offset + bytes, RK3588_NBYTES) / RK3588_NBYTES; addr_len = addr_end - addr_start; - addr_start += RK3588_NO_SECURE_OFFSET; + addr_start += otp->data->read_offset / RK3588_NBYTES; buf = kzalloc(array_size(addr_len, RK3588_NBYTES), GFP_KERNEL); if (!buf) @@ -280,6 +280,7 @@ static const char * const rk3588_otp_clocks[] = { static const struct rockchip_data rk3588_data = { .size = 0x400, + .read_offset = 0xc00, .clks = rk3588_otp_clocks, .num_clks = ARRAY_SIZE(rk3588_otp_clocks), .reg_read = rk3588_otp_read, From 1b23c14c07326a095b93145ca9ea31cf53d4bde1 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Fri, 11 Apr 2025 12:22:40 +0100 Subject: [PATCH 0465/2924] dt-bindings: nvmem: rockchip,otp: add missing limits for clock-names The clocks property correctly declares minItems and maxItems for its variants, but clock-names does not. Both properties are always used together, so should declare the same limits. Suggested-by: Krzysztof Kozlowski Signed-off-by: Heiko Stuebner Acked-by: Conor Dooley Tested-by: Nicolas Frattaroli Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20250411112251.68002-3-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/nvmem/rockchip,otp.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/nvmem/rockchip,otp.yaml b/Documentation/devicetree/bindings/nvmem/rockchip,otp.yaml index a44d44b328091d..3201ff8f933405 100644 --- a/Documentation/devicetree/bindings/nvmem/rockchip,otp.yaml +++ b/Documentation/devicetree/bindings/nvmem/rockchip,otp.yaml @@ -62,6 +62,8 @@ allOf: properties: clocks: maxItems: 3 + clock-names: + maxItems: 3 resets: maxItems: 1 reset-names: @@ -78,6 +80,8 @@ allOf: properties: clocks: minItems: 4 + clock-names: + minItems: 4 resets: minItems: 3 reset-names: From 9165960606dff725174155472583efec60f25bab Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Fri, 11 Apr 2025 12:22:41 +0100 Subject: [PATCH 0466/2924] dt-bindings: nvmem: rockchip,otp: Add compatible for RK3576 Document the OTP memory found on Rockchip RK3576 SoC. The RK3576 uses the same set of clocks as the px30/rk3308 but has one reset more, so adapt the binding to handle this variant as well. Signed-off-by: Heiko Stuebner Acked-by: Conor Dooley Tested-by: Nicolas Frattaroli Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20250411112251.68002-4-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- .../bindings/nvmem/rockchip,otp.yaml | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/Documentation/devicetree/bindings/nvmem/rockchip,otp.yaml b/Documentation/devicetree/bindings/nvmem/rockchip,otp.yaml index 3201ff8f933405..dc89020b095067 100644 --- a/Documentation/devicetree/bindings/nvmem/rockchip,otp.yaml +++ b/Documentation/devicetree/bindings/nvmem/rockchip,otp.yaml @@ -14,6 +14,7 @@ properties: enum: - rockchip,px30-otp - rockchip,rk3308-otp + - rockchip,rk3576-otp - rockchip,rk3588-otp reg: @@ -70,6 +71,26 @@ allOf: items: - const: phy + - if: + properties: + compatible: + contains: + enum: + - rockchip,rk3576-otp + then: + properties: + clocks: + maxItems: 3 + clock-names: + maxItems: 3 + resets: + minItems: 2 + maxItems: 2 + reset-names: + items: + - const: otp + - const: apb + - if: properties: compatible: From 50d75a13a9ce880a5ef07a4ccc63ba561cc2e69a Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Fri, 11 Apr 2025 12:22:42 +0100 Subject: [PATCH 0467/2924] nvmem: rockchip-otp: add rk3576 variant data The variant works very similar to the rk3588, just with a different read-offset and size. Signed-off-by: Heiko Stuebner Tested-by: Nicolas Frattaroli Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20250411112251.68002-5-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/rockchip-otp.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/nvmem/rockchip-otp.c b/drivers/nvmem/rockchip-otp.c index 3edfbfc2d7220f..d88f12c5324264 100644 --- a/drivers/nvmem/rockchip-otp.c +++ b/drivers/nvmem/rockchip-otp.c @@ -274,6 +274,14 @@ static const struct rockchip_data px30_data = { .reg_read = px30_otp_read, }; +static const struct rockchip_data rk3576_data = { + .size = 0x100, + .read_offset = 0x700, + .clks = px30_otp_clocks, + .num_clks = ARRAY_SIZE(px30_otp_clocks), + .reg_read = rk3588_otp_read, +}; + static const char * const rk3588_otp_clocks[] = { "otp", "apb_pclk", "phy", "arb", }; @@ -295,6 +303,10 @@ static const struct of_device_id rockchip_otp_match[] = { .compatible = "rockchip,rk3308-otp", .data = &px30_data, }, + { + .compatible = "rockchip,rk3576-otp", + .data = &rk3576_data, + }, { .compatible = "rockchip,rk3588-otp", .data = &rk3588_data, From f487438d370590193c5635ccbae1b1c51c5b273c Mon Sep 17 00:00:00 2001 From: Akhil P Oommen Date: Fri, 11 Apr 2025 12:22:43 +0100 Subject: [PATCH 0468/2924] dt-bindings: nvmem: qfprom: Add X1E80100 compatible Document compatible string for the QFPROM on X1E80100 platform. Signed-off-by: Akhil P Oommen Reviewed-by: Krzysztof Kozlowski Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20250411112251.68002-6-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml b/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml index 39c209249c9c0b..4c332b44d35eaa 100644 --- a/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml +++ b/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml @@ -51,6 +51,7 @@ properties: - qcom,sm8450-qfprom - qcom,sm8550-qfprom - qcom,sm8650-qfprom + - qcom,x1e80100-qfprom - const: qcom,qfprom reg: From 269e074da1882c296085a27f0742c47ac860072e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Barnab=C3=A1s=20Cz=C3=A9m=C3=A1n?= Date: Fri, 11 Apr 2025 12:22:44 +0100 Subject: [PATCH 0469/2924] dt-bindings: nvmem: Add compatible for MS8937 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document the QFPROM block found on MSM8937. Signed-off-by: Barnabás Czémán Reviewed-by: Krzysztof Kozlowski Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20250411112251.68002-7-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml b/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml index 4c332b44d35eaa..e1a8856ccba457 100644 --- a/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml +++ b/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml @@ -28,6 +28,7 @@ properties: - qcom,msm8226-qfprom - qcom,msm8916-qfprom - qcom,msm8917-qfprom + - qcom,msm8937-qfprom - qcom,msm8974-qfprom - qcom,msm8976-qfprom - qcom,msm8996-qfprom From eed6d954542fb55c814dd54b7fcc1b515bd76464 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 11 Apr 2025 12:22:45 +0100 Subject: [PATCH 0470/2924] dt-bindings: nvmem: fixed-cell: increase bits start value to 31 If NVMEM uses a data stride bigger than a byte, the starting bit of the cell might be bigger than a byte (e.g. if the data comes in the second byte of the 4-byte word). Allow the staring bit to be 8 or greater to reflect such usecases. Signed-off-by: Dmitry Baryshkov Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20250411112251.68002-8-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/nvmem/layouts/fixed-cell.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/nvmem/layouts/fixed-cell.yaml b/Documentation/devicetree/bindings/nvmem/layouts/fixed-cell.yaml index 8b3826243dddfc..38e3ad50ff4fb6 100644 --- a/Documentation/devicetree/bindings/nvmem/layouts/fixed-cell.yaml +++ b/Documentation/devicetree/bindings/nvmem/layouts/fixed-cell.yaml @@ -27,7 +27,7 @@ properties: $ref: /schemas/types.yaml#/definitions/uint32-array items: - minimum: 0 - maximum: 7 + maximum: 31 description: Offset in bit within the address range specified by reg. - minimum: 1 From 7a06ef75107799675ea6e4d73b9df37e18e352a8 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 11 Apr 2025 12:22:46 +0100 Subject: [PATCH 0471/2924] nvmem: core: fix bit offsets of more than one byte If the NVMEM specifies a stride to access data, reading particular cell might require bit offset that is bigger than one byte. Rework NVMEM core code to support bit offsets of more than 8 bits. Signed-off-by: Dmitry Baryshkov Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20250411112251.68002-9-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/core.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index fff85bbf0ecd0f..7872903c08a112 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -837,7 +837,9 @@ static int nvmem_add_cells_from_dt(struct nvmem_device *nvmem, struct device_nod if (addr && len == (2 * sizeof(u32))) { info.bit_offset = be32_to_cpup(addr++); info.nbits = be32_to_cpup(addr); - if (info.bit_offset >= BITS_PER_BYTE || info.nbits < 1) { + if (info.bit_offset >= BITS_PER_BYTE * info.bytes || + info.nbits < 1 || + info.bit_offset + info.nbits > BITS_PER_BYTE * info.bytes) { dev_err(dev, "nvmem: invalid bits on %pOF\n", child); of_node_put(child); return -EINVAL; @@ -1630,21 +1632,29 @@ EXPORT_SYMBOL_GPL(nvmem_cell_put); static void nvmem_shift_read_buffer_in_place(struct nvmem_cell_entry *cell, void *buf) { u8 *p, *b; - int i, extra, bit_offset = cell->bit_offset; + int i, extra, bytes_offset; + int bit_offset = cell->bit_offset; p = b = buf; - if (bit_offset) { + + bytes_offset = bit_offset / BITS_PER_BYTE; + b += bytes_offset; + bit_offset %= BITS_PER_BYTE; + + if (bit_offset % BITS_PER_BYTE) { /* First shift */ - *b++ >>= bit_offset; + *p = *b++ >> bit_offset; /* setup rest of the bytes if any */ for (i = 1; i < cell->bytes; i++) { /* Get bits from next byte and shift them towards msb */ - *p |= *b << (BITS_PER_BYTE - bit_offset); + *p++ |= *b << (BITS_PER_BYTE - bit_offset); - p = b; - *b++ >>= bit_offset; + *p = *b++ >> bit_offset; } + } else if (p != b) { + memmove(p, b, cell->bytes - bytes_offset); + p += cell->bytes - 1; } else { /* point to the msb */ p += cell->bytes - 1; From 13bcd440f2ff38cd7e42a179c223d4b833158b33 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 11 Apr 2025 12:22:47 +0100 Subject: [PATCH 0472/2924] nvmem: core: verify cell's raw_len Check that the NVMEM cell's raw_len is a aligned to word_size. Otherwise Otherwise drivers might face incomplete read while accessing the last part of the NVMEM cell. Signed-off-by: Dmitry Baryshkov Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20250411112251.68002-10-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/core.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index 7872903c08a112..7b8c85f9e035cc 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -605,6 +605,18 @@ static int nvmem_cell_info_to_nvmem_cell_entry_nodup(struct nvmem_device *nvmem, return -EINVAL; } + if (!IS_ALIGNED(cell->raw_len, nvmem->word_size)) { + dev_err(&nvmem->dev, + "cell %s raw len %zd unaligned to nvmem word size %d\n", + cell->name ?: "", cell->raw_len, + nvmem->word_size); + + if (info->raw_len) + return -EINVAL; + + cell->raw_len = ALIGN(cell->raw_len, nvmem->word_size); + } + return 0; } From 6786484223d5705bf7f919c1e5055d478ebeec32 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 11 Apr 2025 12:22:48 +0100 Subject: [PATCH 0473/2924] nvmem: core: update raw_len if the bit reading is required If NVMEM cell uses bit offset or specifies bit truncation, update raw_len manually (following the cell->bytes update), ensuring that the NVMEM access is still word-aligned. Signed-off-by: Dmitry Baryshkov Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20250411112251.68002-11-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index 7b8c85f9e035cc..e206efc29a0044 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -594,9 +594,11 @@ static int nvmem_cell_info_to_nvmem_cell_entry_nodup(struct nvmem_device *nvmem, cell->nbits = info->nbits; cell->np = info->np; - if (cell->nbits) + if (cell->nbits) { cell->bytes = DIV_ROUND_UP(cell->nbits + cell->bit_offset, BITS_PER_BYTE); + cell->raw_len = ALIGN(cell->bytes, nvmem->word_size); + } if (!IS_ALIGNED(cell->offset, nvmem->stride)) { dev_err(&nvmem->dev, From 3566a737db87a9bf360c2fd36433c5149f805f2e Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 11 Apr 2025 12:22:49 +0100 Subject: [PATCH 0474/2924] nvmem: qfprom: switch to 4-byte aligned reads All platforms since Snapdragon 8 Gen1 (SM8450) require using 4-byte reads to access QFPROM data. While older platforms were more than happy with 1-byte reads, change the qfprom driver to use 4-byte reads for all the platforms. Specify stride and word size of 4 bytes. To retain compatibility with the existing DT and to simplify porting data from vendor kernels, use fixup_dt_cell_info in order to bump alignment requirements. Signed-off-by: Dmitry Baryshkov Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20250411112251.68002-12-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/qfprom.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/nvmem/qfprom.c b/drivers/nvmem/qfprom.c index 116a39e804c70b..a872c640b8c5a5 100644 --- a/drivers/nvmem/qfprom.c +++ b/drivers/nvmem/qfprom.c @@ -321,19 +321,32 @@ static int qfprom_reg_read(void *context, unsigned int reg, void *_val, size_t bytes) { struct qfprom_priv *priv = context; - u8 *val = _val; - int i = 0, words = bytes; + u32 *val = _val; void __iomem *base = priv->qfpcorrected; + int words = DIV_ROUND_UP(bytes, sizeof(u32)); + int i; if (read_raw_data && priv->qfpraw) base = priv->qfpraw; - while (words--) - *val++ = readb(base + reg + i++); + for (i = 0; i < words; i++) + *val++ = readl(base + reg + i * sizeof(u32)); return 0; } +/* Align reads to word boundary */ +static void qfprom_fixup_dt_cell_info(struct nvmem_device *nvmem, + struct nvmem_cell_info *cell) +{ + unsigned int byte_offset = cell->offset % sizeof(u32); + + cell->bit_offset += byte_offset * BITS_PER_BYTE; + cell->offset -= byte_offset; + if (byte_offset && !cell->nbits) + cell->nbits = cell->bytes * BITS_PER_BYTE; +} + static void qfprom_runtime_disable(void *data) { pm_runtime_disable(data); @@ -358,10 +371,11 @@ static int qfprom_probe(struct platform_device *pdev) struct nvmem_config econfig = { .name = "qfprom", .add_legacy_fixed_of_cells = true, - .stride = 1, - .word_size = 1, + .stride = 4, + .word_size = 4, .id = NVMEM_DEVID_AUTO, .reg_read = qfprom_reg_read, + .fixup_dt_cell_info = qfprom_fixup_dt_cell_info, }; struct device *dev = &pdev->dev; struct resource *res; From b78de5c2c60f5f65ce401ca791692409ef9ceaec Mon Sep 17 00:00:00 2001 From: Sricharan Ramabadhran Date: Fri, 11 Apr 2025 12:22:50 +0100 Subject: [PATCH 0475/2924] dt-bindings: nvmem: Add compatible for IPQ5018 Document the QFPROM block found on IPQ5018 Reviewed-by: Krzysztof Kozlowski Signed-off-by: Sricharan Ramabadhran Signed-off-by: George Moussalem Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20250411112251.68002-13-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml b/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml index e1a8856ccba457..2b39d27da57b1f 100644 --- a/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml +++ b/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml @@ -19,6 +19,7 @@ properties: - enum: - qcom,apq8064-qfprom - qcom,apq8084-qfprom + - qcom,ipq5018-qfprom - qcom,ipq5332-qfprom - qcom,ipq5424-qfprom - qcom,ipq6018-qfprom From e9a573e2d7c6409519c2aa367d524dba31694f95 Mon Sep 17 00:00:00 2001 From: Rudraksha Gupta Date: Fri, 11 Apr 2025 12:22:51 +0100 Subject: [PATCH 0476/2924] dt-bindings: nvmem: Add compatible for MSM8960 Document the QFPROM on MSM8960. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Rudraksha Gupta Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20250411112251.68002-14-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml b/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml index 2b39d27da57b1f..3f6dc6a3a9f1ad 100644 --- a/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml +++ b/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml @@ -30,6 +30,7 @@ properties: - qcom,msm8916-qfprom - qcom,msm8917-qfprom - qcom,msm8937-qfprom + - qcom,msm8960-qfprom - qcom,msm8974-qfprom - qcom,msm8976-qfprom - qcom,msm8996-qfprom From ec27386de23a511008c53aa2f3434ad180a3ca9a Mon Sep 17 00:00:00 2001 From: Andrei Kuchynski Date: Fri, 21 Mar 2025 14:37:26 +0000 Subject: [PATCH 0477/2924] usb: typec: class: Fix NULL pointer access Concurrent calls to typec_partner_unlink_device can lead to a NULL pointer dereference. This patch adds a mutex to protect USB device pointers and prevent this issue. The same mutex protects both the device pointers and the partner device registration. Cc: stable@vger.kernel.org Fixes: 59de2a56d127 ("usb: typec: Link enumerated USB devices with Type-C partner") Signed-off-by: Andrei Kuchynski Reviewed-by: Benson Leung Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20250321143728.4092417-2-akuchynski@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/class.c | 15 +++++++++++++-- drivers/usb/typec/class.h | 1 + 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index 9c76c3d0c6cff9..eadb150223f8f8 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -1052,6 +1052,7 @@ struct typec_partner *typec_register_partner(struct typec_port *port, partner->usb_mode = USB_MODE_USB3; } + mutex_lock(&port->partner_link_lock); ret = device_register(&partner->dev); if (ret) { dev_err(&port->dev, "failed to register partner (%d)\n", ret); @@ -1063,6 +1064,7 @@ struct typec_partner *typec_register_partner(struct typec_port *port, typec_partner_link_device(partner, port->usb2_dev); if (port->usb3_dev) typec_partner_link_device(partner, port->usb3_dev); + mutex_unlock(&port->partner_link_lock); return partner; } @@ -1083,12 +1085,14 @@ void typec_unregister_partner(struct typec_partner *partner) port = to_typec_port(partner->dev.parent); + mutex_lock(&port->partner_link_lock); if (port->usb2_dev) typec_partner_unlink_device(partner, port->usb2_dev); if (port->usb3_dev) typec_partner_unlink_device(partner, port->usb3_dev); device_unregister(&partner->dev); + mutex_unlock(&port->partner_link_lock); } EXPORT_SYMBOL_GPL(typec_unregister_partner); @@ -2041,10 +2045,11 @@ static struct typec_partner *typec_get_partner(struct typec_port *port) static void typec_partner_attach(struct typec_connector *con, struct device *dev) { struct typec_port *port = container_of(con, struct typec_port, con); - struct typec_partner *partner = typec_get_partner(port); + struct typec_partner *partner; struct usb_device *udev = to_usb_device(dev); enum usb_mode usb_mode; + mutex_lock(&port->partner_link_lock); if (udev->speed < USB_SPEED_SUPER) { usb_mode = USB_MODE_USB2; port->usb2_dev = dev; @@ -2053,18 +2058,22 @@ static void typec_partner_attach(struct typec_connector *con, struct device *dev port->usb3_dev = dev; } + partner = typec_get_partner(port); if (partner) { typec_partner_set_usb_mode(partner, usb_mode); typec_partner_link_device(partner, dev); put_device(&partner->dev); } + mutex_unlock(&port->partner_link_lock); } static void typec_partner_deattach(struct typec_connector *con, struct device *dev) { struct typec_port *port = container_of(con, struct typec_port, con); - struct typec_partner *partner = typec_get_partner(port); + struct typec_partner *partner; + mutex_lock(&port->partner_link_lock); + partner = typec_get_partner(port); if (partner) { typec_partner_unlink_device(partner, dev); put_device(&partner->dev); @@ -2074,6 +2083,7 @@ static void typec_partner_deattach(struct typec_connector *con, struct device *d port->usb2_dev = NULL; else if (port->usb3_dev == dev) port->usb3_dev = NULL; + mutex_unlock(&port->partner_link_lock); } /** @@ -2614,6 +2624,7 @@ struct typec_port *typec_register_port(struct device *parent, ida_init(&port->mode_ids); mutex_init(&port->port_type_lock); + mutex_init(&port->partner_link_lock); port->id = id; port->ops = cap->ops; diff --git a/drivers/usb/typec/class.h b/drivers/usb/typec/class.h index b3076a24ad2eee..db2fe96c48ff0f 100644 --- a/drivers/usb/typec/class.h +++ b/drivers/usb/typec/class.h @@ -59,6 +59,7 @@ struct typec_port { enum typec_port_type port_type; enum usb_mode usb_mode; struct mutex port_type_lock; + struct mutex partner_link_lock; enum typec_orientation orientation; struct typec_switch *sw; From 66e1a887273c6b89f09bc11a40d0a71d5a081a8e Mon Sep 17 00:00:00 2001 From: Andrei Kuchynski Date: Fri, 21 Mar 2025 14:37:27 +0000 Subject: [PATCH 0478/2924] usb: typec: class: Invalidate USB device pointers on partner unregistration To avoid using invalid USB device pointers after a Type-C partner disconnects, this patch clears the pointers upon partner unregistration. This ensures a clean state for future connections. Cc: stable@vger.kernel.org Fixes: 59de2a56d127 ("usb: typec: Link enumerated USB devices with Type-C partner") Signed-off-by: Andrei Kuchynski Reviewed-by: Heikki Krogerus Reviewed-by: Benson Leung Link: https://lore.kernel.org/r/20250321143728.4092417-3-akuchynski@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/class.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index eadb150223f8f8..3df3e373691620 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -1086,10 +1086,14 @@ void typec_unregister_partner(struct typec_partner *partner) port = to_typec_port(partner->dev.parent); mutex_lock(&port->partner_link_lock); - if (port->usb2_dev) + if (port->usb2_dev) { typec_partner_unlink_device(partner, port->usb2_dev); - if (port->usb3_dev) + port->usb2_dev = NULL; + } + if (port->usb3_dev) { typec_partner_unlink_device(partner, port->usb3_dev); + port->usb3_dev = NULL; + } device_unregister(&partner->dev); mutex_unlock(&port->partner_link_lock); From 3b607b75a345b1d808031bf1bb1038e4dac8d521 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Thu, 10 Apr 2025 17:47:23 +0200 Subject: [PATCH 0479/2924] null_blk: Use strscpy() instead of strscpy_pad() in null_add_dev() blk_mq_alloc_disk() already zero-initializes the destination buffer, making strscpy() sufficient for safely copying the disk's name. The additional NUL-padding performed by strscpy_pad() is unnecessary. If the destination buffer has a fixed length, strscpy() automatically determines its size using sizeof() when the argument is omitted. This makes the explicit size argument unnecessary. The source string is also NUL-terminated and meets the __must_be_cstr() requirement of strscpy(). No functional changes intended. Signed-off-by: Thorsten Blum Reviewed-by: Zhu Yanjun Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20250410154727.883207-1-thorsten.blum@linux.dev Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 3bb9cee0a9b55a..aa163ae9b2aa5c 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -2031,7 +2031,7 @@ static int null_add_dev(struct nullb_device *dev) nullb->disk->minors = 1; nullb->disk->fops = &null_ops; nullb->disk->private_data = nullb; - strscpy_pad(nullb->disk->disk_name, nullb->disk_name, DISK_NAME_LEN); + strscpy(nullb->disk->disk_name, nullb->disk_name); if (nullb->dev->zoned) { rv = null_register_zoned_dev(nullb); From a64e4d48a0b77e4ada19ac26ca3a08cd492f6362 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 8 Apr 2025 21:46:29 +0100 Subject: [PATCH 0480/2924] afs: Fix afs_dynroot_readdir() to not use the RCU read lock afs_dynroot_readdir() uses the RCU read lock to walk the cell list whilst emitting cell automount entries - but dir_emit() may write to a userspace buffer, thereby causing a fault to occur and waits to happen. Fix afs_dynroot_readdir() to get a shared lock on net->cells_lock instead. This can be triggered by enabling lockdep, preconfiguring a number of cells, doing "mount -t afs none /afs -o dyn" (or using the kafs-client package with afs.mount systemd unit enabled) and then doing "ls /afs". Fixes: 1d0b929fc070 ("afs: Change dynroot to create contents on demand") Reported-by: syzbot+3b6c5c6a1d0119b687a1@syzkaller.appspotmail.com Reported-by: syzbot+8245611446194a52150d@syzkaller.appspotmail.com Reported-by: syzbot+1aa62e6852a6ad1c7944@syzkaller.appspotmail.com Reported-by: syzbot+54e6c2176ba76c56217e@syzkaller.appspotmail.com Signed-off-by: David Howells Link: https://lore.kernel.org/1638014.1744145189@warthog.procyon.org.uk cc: Marc Dionne cc: linux-afs@lists.infradead.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/afs/dynroot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index 691e0ae607a167..8c6130789fde3a 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -348,9 +348,9 @@ static int afs_dynroot_readdir(struct file *file, struct dir_context *ctx) } if ((unsigned long long)ctx->pos <= AFS_MAX_DYNROOT_CELL_INO) { - rcu_read_lock(); + down_read(&net->cells_lock); ret = afs_dynroot_readdir_cells(net, ctx); - rcu_read_unlock(); + up_read(&net->cells_lock); } return ret; } From b2b4483b5d05026218127fc8f38c69adf69c235b Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 8 Apr 2025 13:00:53 -0700 Subject: [PATCH 0481/2924] dcache: convert dentry flag macros to enum Commit 9748cb2dc393 ("VFS: repack DENTRY_ flags.") changed the value of DCACHE_MOUNTED, which broke drgn's path_lookup() helper. drgn is forced to hard-code it because it's a macro, and macros aren't preserved in debugging information by default. Enums, on the other hand, are included in debugging information. Convert the DCACHE_* flag macros to an enum so that debugging tools like drgn and bpftrace can make use of them. Link: https://github.com/osandov/drgn/blob/2027d0fea84d74b835e77392f7040c2a333180c6/drgn/helpers/linux/fs.py#L43-L46 Signed-off-by: Omar Sandoval Link: https://lore.kernel.org/177665a082f048cf536b9cd6af467b3be6b6e6ed.1744141838.git.osandov@fb.com Signed-off-by: Christian Brauner --- include/linux/dcache.h | 106 +++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 56 deletions(-) diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 8d1395f945bfef..e9f07e37dd6faa 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -173,65 +173,59 @@ struct dentry_operations { */ /* d_flags entries */ -#define DCACHE_OP_HASH BIT(0) -#define DCACHE_OP_COMPARE BIT(1) -#define DCACHE_OP_REVALIDATE BIT(2) -#define DCACHE_OP_DELETE BIT(3) -#define DCACHE_OP_PRUNE BIT(4) - -#define DCACHE_DISCONNECTED BIT(5) - /* This dentry is possibly not currently connected to the dcache tree, in - * which case its parent will either be itself, or will have this flag as - * well. nfsd will not use a dentry with this bit set, but will first - * endeavour to clear the bit either by discovering that it is connected, - * or by performing lookup operations. Any filesystem which supports - * nfsd_operations MUST have a lookup function which, if it finds a - * directory inode with a DCACHE_DISCONNECTED dentry, will d_move that - * dentry into place and return that dentry rather than the passed one, - * typically using d_splice_alias. */ - -#define DCACHE_REFERENCED BIT(6) /* Recently used, don't discard. */ - -#define DCACHE_DONTCACHE BIT(7) /* Purge from memory on final dput() */ - -#define DCACHE_CANT_MOUNT BIT(8) -#define DCACHE_GENOCIDE BIT(9) -#define DCACHE_SHRINK_LIST BIT(10) - -#define DCACHE_OP_WEAK_REVALIDATE BIT(11) - -#define DCACHE_NFSFS_RENAMED BIT(12) - /* this dentry has been "silly renamed" and has to be deleted on the last - * dput() */ -#define DCACHE_FSNOTIFY_PARENT_WATCHED BIT(13) - /* Parent inode is watched by some fsnotify listener */ - -#define DCACHE_DENTRY_KILLED BIT(14) - -#define DCACHE_MOUNTED BIT(15) /* is a mountpoint */ -#define DCACHE_NEED_AUTOMOUNT BIT(16) /* handle automount on this dir */ -#define DCACHE_MANAGE_TRANSIT BIT(17) /* manage transit from this dirent */ +enum dentry_flags { + DCACHE_OP_HASH = BIT(0), + DCACHE_OP_COMPARE = BIT(1), + DCACHE_OP_REVALIDATE = BIT(2), + DCACHE_OP_DELETE = BIT(3), + DCACHE_OP_PRUNE = BIT(4), + /* + * This dentry is possibly not currently connected to the dcache tree, + * in which case its parent will either be itself, or will have this + * flag as well. nfsd will not use a dentry with this bit set, but will + * first endeavour to clear the bit either by discovering that it is + * connected, or by performing lookup operations. Any filesystem which + * supports nfsd_operations MUST have a lookup function which, if it + * finds a directory inode with a DCACHE_DISCONNECTED dentry, will + * d_move that dentry into place and return that dentry rather than the + * passed one, typically using d_splice_alias. + */ + DCACHE_DISCONNECTED = BIT(5), + DCACHE_REFERENCED = BIT(6), /* Recently used, don't discard. */ + DCACHE_DONTCACHE = BIT(7), /* Purge from memory on final dput() */ + DCACHE_CANT_MOUNT = BIT(8), + DCACHE_GENOCIDE = BIT(9), + DCACHE_SHRINK_LIST = BIT(10), + DCACHE_OP_WEAK_REVALIDATE = BIT(11), + /* + * this dentry has been "silly renamed" and has to be deleted on the + * last dput() + */ + DCACHE_NFSFS_RENAMED = BIT(12), + DCACHE_FSNOTIFY_PARENT_WATCHED = BIT(13), /* Parent inode is watched by some fsnotify listener */ + DCACHE_DENTRY_KILLED = BIT(14), + DCACHE_MOUNTED = BIT(15), /* is a mountpoint */ + DCACHE_NEED_AUTOMOUNT = BIT(16), /* handle automount on this dir */ + DCACHE_MANAGE_TRANSIT = BIT(17), /* manage transit from this dirent */ + DCACHE_LRU_LIST = BIT(18), + DCACHE_ENTRY_TYPE = (7 << 19), /* bits 19..21 are for storing type: */ + DCACHE_MISS_TYPE = (0 << 19), /* Negative dentry */ + DCACHE_WHITEOUT_TYPE = (1 << 19), /* Whiteout dentry (stop pathwalk) */ + DCACHE_DIRECTORY_TYPE = (2 << 19), /* Normal directory */ + DCACHE_AUTODIR_TYPE = (3 << 19), /* Lookupless directory (presumed automount) */ + DCACHE_REGULAR_TYPE = (4 << 19), /* Regular file type */ + DCACHE_SPECIAL_TYPE = (5 << 19), /* Other file type */ + DCACHE_SYMLINK_TYPE = (6 << 19), /* Symlink */ + DCACHE_NOKEY_NAME = BIT(22), /* Encrypted name encoded without key */ + DCACHE_OP_REAL = BIT(23), + DCACHE_PAR_LOOKUP = BIT(24), /* being looked up (with parent locked shared) */ + DCACHE_DENTRY_CURSOR = BIT(25), + DCACHE_NORCU = BIT(26), /* No RCU delay for freeing */ +}; + #define DCACHE_MANAGED_DENTRY \ (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT) -#define DCACHE_LRU_LIST BIT(18) - -#define DCACHE_ENTRY_TYPE (7 << 19) /* bits 19..21 are for storing type: */ -#define DCACHE_MISS_TYPE (0 << 19) /* Negative dentry */ -#define DCACHE_WHITEOUT_TYPE (1 << 19) /* Whiteout dentry (stop pathwalk) */ -#define DCACHE_DIRECTORY_TYPE (2 << 19) /* Normal directory */ -#define DCACHE_AUTODIR_TYPE (3 << 19) /* Lookupless directory (presumed automount) */ -#define DCACHE_REGULAR_TYPE (4 << 19) /* Regular file type */ -#define DCACHE_SPECIAL_TYPE (5 << 19) /* Other file type */ -#define DCACHE_SYMLINK_TYPE (6 << 19) /* Symlink */ - -#define DCACHE_NOKEY_NAME BIT(22) /* Encrypted name encoded without key */ -#define DCACHE_OP_REAL BIT(23) - -#define DCACHE_PAR_LOOKUP BIT(24) /* being looked up (with parent locked shared) */ -#define DCACHE_DENTRY_CURSOR BIT(25) -#define DCACHE_NORCU BIT(26) /* No RCU delay for freeing */ - extern seqlock_t rename_lock; /* From d43dbf7322a356733b3e3a997cad51dce174d83c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 9 Apr 2025 10:00:23 +0200 Subject: [PATCH 0482/2924] mount: ensure we don't pointlessly walk the mount tree This logic got broken recently. Add it back. Fixes: 474f7825d533 ("fs: add copy_mount_setattr() helper") Link: https://lore.kernel.org/20250409-sektflaschen-gecko-27c021fbd222@brauner Tested-by: Mikhail Gavrilov Signed-off-by: Christian Brauner --- fs/namespace.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 14935a0500a206..1eb76d6312de51 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -5189,8 +5189,8 @@ static void finish_mount_kattr(struct mount_kattr *kattr) mnt_idmap_put(kattr->mnt_idmap); } -static int copy_mount_setattr(struct mount_attr __user *uattr, size_t usize, - struct mount_kattr *kattr) +static int wants_mount_setattr(struct mount_attr __user *uattr, size_t usize, + struct mount_kattr *kattr) { int ret; struct mount_attr attr; @@ -5213,9 +5213,13 @@ static int copy_mount_setattr(struct mount_attr __user *uattr, size_t usize, if (attr.attr_set == 0 && attr.attr_clr == 0 && attr.propagation == 0) - return 0; + return 0; /* Tell caller to not bother. */ + + ret = build_mount_kattr(&attr, usize, kattr); + if (ret < 0) + return ret; - return build_mount_kattr(&attr, usize, kattr); + return 1; } SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, @@ -5247,8 +5251,8 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, if (flags & AT_RECURSIVE) kattr.kflags |= MOUNT_KATTR_RECURSE; - err = copy_mount_setattr(uattr, usize, &kattr); - if (err) + err = wants_mount_setattr(uattr, usize, &kattr); + if (err <= 0) return err; err = user_path_at(dfd, path, kattr.lookup_flags, &target); @@ -5282,15 +5286,17 @@ SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename, if (flags & AT_RECURSIVE) kattr.kflags |= MOUNT_KATTR_RECURSE; - ret = copy_mount_setattr(uattr, usize, &kattr); - if (ret) + ret = wants_mount_setattr(uattr, usize, &kattr); + if (ret < 0) return ret; - ret = do_mount_setattr(&file->f_path, &kattr); - if (ret) - return ret; + if (ret) { + ret = do_mount_setattr(&file->f_path, &kattr); + if (ret) + return ret; - finish_mount_kattr(&kattr); + finish_mount_kattr(&kattr); + } } fd = get_unused_fd_flags(flags & O_CLOEXEC); From 40cb48eba3b4b79e110c1a35d33a48cac54507a2 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 9 Apr 2025 10:00:15 -0700 Subject: [PATCH 0483/2924] netfs: Only create /proc/fs/netfs with CONFIG_PROC_FS When testing a special config: CONFIG_NETFS_SUPPORTS=y CONFIG_PROC_FS=n The system crashes with something like: [ 3.766197] ------------[ cut here ]------------ [ 3.766484] kernel BUG at mm/mempool.c:560! [ 3.766789] Oops: invalid opcode: 0000 [#1] SMP NOPTI [ 3.767123] CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Tainted: G W [ 3.767777] Tainted: [W]=WARN [ 3.767968] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), [ 3.768523] RIP: 0010:mempool_alloc_slab.cold+0x17/0x19 [ 3.768847] Code: 50 fe ff 58 5b 5d 41 5c 41 5d 41 5e 41 5f e9 93 95 13 00 [ 3.769977] RSP: 0018:ffffc90000013998 EFLAGS: 00010286 [ 3.770315] RAX: 000000000000002f RBX: ffff888100ba8640 RCX: 0000000000000000 [ 3.770749] RDX: 0000000000000000 RSI: 0000000000000003 RDI: 00000000ffffffff [ 3.771217] RBP: 0000000000092880 R08: 0000000000000000 R09: ffffc90000013828 [ 3.771664] R10: 0000000000000001 R11: 00000000ffffffea R12: 0000000000092cc0 [ 3.772117] R13: 0000000000000400 R14: ffff8881004b1620 R15: ffffea0004ef7e40 [ 3.772554] FS: 0000000000000000(0000) GS:ffff8881b5f3c000(0000) knlGS:0000000000000000 [ 3.773061] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 3.773443] CR2: ffffffff830901b4 CR3: 0000000004296001 CR4: 0000000000770ef0 [ 3.773884] PKRU: 55555554 [ 3.774058] Call Trace: [ 3.774232] [ 3.774371] mempool_alloc_noprof+0x6a/0x190 [ 3.774649] ? _printk+0x57/0x80 [ 3.774862] netfs_alloc_request+0x85/0x2ce [ 3.775147] netfs_readahead+0x28/0x170 [ 3.775395] read_pages+0x6c/0x350 [ 3.775623] ? srso_alias_return_thunk+0x5/0xfbef5 [ 3.775928] page_cache_ra_unbounded+0x1bd/0x2a0 [ 3.776247] filemap_get_pages+0x139/0x970 [ 3.776510] ? srso_alias_return_thunk+0x5/0xfbef5 [ 3.776820] filemap_read+0xf9/0x580 [ 3.777054] ? srso_alias_return_thunk+0x5/0xfbef5 [ 3.777368] ? srso_alias_return_thunk+0x5/0xfbef5 [ 3.777674] ? find_held_lock+0x32/0x90 [ 3.777929] ? netfs_start_io_read+0x19/0x70 [ 3.778221] ? netfs_start_io_read+0x19/0x70 [ 3.778489] ? srso_alias_return_thunk+0x5/0xfbef5 [ 3.778800] ? lock_acquired+0x1e6/0x450 [ 3.779054] ? srso_alias_return_thunk+0x5/0xfbef5 [ 3.779379] netfs_buffered_read_iter+0x57/0x80 [ 3.779670] __kernel_read+0x158/0x2c0 [ 3.779927] bprm_execve+0x300/0x7a0 [ 3.780185] kernel_execve+0x10c/0x140 [ 3.780423] ? __pfx_kernel_init+0x10/0x10 [ 3.780690] kernel_init+0xd5/0x150 [ 3.780910] ret_from_fork+0x2d/0x50 [ 3.781156] ? __pfx_kernel_init+0x10/0x10 [ 3.781414] ret_from_fork_asm+0x1a/0x30 [ 3.781677] [ 3.781823] Modules linked in: [ 3.782065] ---[ end trace 0000000000000000 ]--- This is caused by the following error path in netfs_init(): if (!proc_mkdir("fs/netfs", NULL)) goto error_proc; Fix this by adding ifdef in netfs_main(), so that /proc/fs/netfs is only created with CONFIG_PROC_FS. Signed-off-by: Song Liu Link: https://lore.kernel.org/20250409170015.2651829-1-song@kernel.org Acked-by: David Howells Signed-off-by: Christian Brauner --- fs/netfs/main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/netfs/main.c b/fs/netfs/main.c index 4e3e6204083140..70ecc8f5f21034 100644 --- a/fs/netfs/main.c +++ b/fs/netfs/main.c @@ -127,11 +127,13 @@ static int __init netfs_init(void) if (mempool_init_slab_pool(&netfs_subrequest_pool, 100, netfs_subrequest_slab) < 0) goto error_subreqpool; +#ifdef CONFIG_PROC_FS if (!proc_mkdir("fs/netfs", NULL)) goto error_proc; if (!proc_create_seq("fs/netfs/requests", S_IFREG | 0444, NULL, &netfs_requests_seq_ops)) goto error_procfile; +#endif #ifdef CONFIG_FSCACHE_STATS if (!proc_create_single("fs/netfs/stats", S_IFREG | 0444, NULL, netfs_stats_show)) @@ -144,9 +146,11 @@ static int __init netfs_init(void) return 0; error_fscache: +#ifdef CONFIG_PROC_FS error_procfile: remove_proc_subtree("fs/netfs", NULL); error_proc: +#endif mempool_exit(&netfs_subrequest_pool); error_subreqpool: kmem_cache_destroy(netfs_subrequest_slab); From b463d7fd118b984884da7493ae999a62c9892aa3 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 9 Apr 2025 15:05:34 -0700 Subject: [PATCH 0484/2924] fs: Fix filename init after recent refactoring getname_flags() should save __user pointer "filename" in filename->uptr. However, this logic is broken by a recent refactoring. Fix it by passing __user pointer filename to helper initname(). Fixes: 611851010c74 ("fs: dedup handling of struct filename init and refcounts bumps") Cc: Mateusz Guzik Cc: Christian Brauner Signed-off-by: Song Liu Link: https://lore.kernel.org/20250409220534.3635801-1-song@kernel.org Signed-off-by: Christian Brauner --- fs/namei.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 360a86ca1f0270..8510ff53f12e58 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -125,9 +125,9 @@ #define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname)) -static inline void initname(struct filename *name) +static inline void initname(struct filename *name, const char __user *uptr) { - name->uptr = NULL; + name->uptr = uptr; name->aname = NULL; atomic_set(&name->refcnt, 1); } @@ -210,7 +210,7 @@ getname_flags(const char __user *filename, int flags) return ERR_PTR(-ENAMETOOLONG); } } - initname(result); + initname(result, filename); audit_getname(result); return result; } @@ -268,7 +268,7 @@ struct filename *getname_kernel(const char * filename) return ERR_PTR(-ENAMETOOLONG); } memcpy((char *)result->name, filename, len); - initname(result); + initname(result, NULL); audit_getname(result); return result; } From 8e3c15ee0d292c413c66fe10201d1b035a0bea72 Mon Sep 17 00:00:00 2001 From: Gou Hao Date: Thu, 10 Apr 2025 15:12:36 +0800 Subject: [PATCH 0485/2924] iomap: skip unnecessary ifs_block_is_uptodate check In iomap_adjust_read_range, i is either the first !uptodate block, or it is past last for the second loop looking for trailing uptodate blocks. Assuming there's no overflow (there's no combination of huge folios and tiny blksize) then yeah, there is no point in retesting that the same block pointed to by i is uptodate since we hold the folio lock so nobody else could have set it uptodate. Signed-off-by: Gou Hao Link: https://lore.kernel.org/20250410071236.16017-1-gouhao@uniontech.com Reviewed-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig Suggested-by: Christoph Hellwig Signed-off-by: Christian Brauner --- fs/iomap/buffered-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 31553372b33ace..5b08bd417b2872 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -259,7 +259,7 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio, } /* truncate len if we find any trailing uptodate block(s) */ - for ( ; i <= last; i++) { + while (++i <= last) { if (ifs_block_is_uptodate(ifs, i)) { plen -= (last - i + 1) * block_size; last = i - 1; From 5f05c14e7c198415abe936514a6905f8b545b63b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 11 Apr 2025 10:40:54 +0200 Subject: [PATCH 0486/2924] wifi: iwlwifi: pcie: set state to no-FW before reset handshake The reset handshake attempts to kill the firmware, and it'll go into a pretty much dead state once we do that. However, if it times out, then we'll attempt to dump the firmware to be able to see why it didn't respond. During this dump, we cannot treat it as if it was still running, since we just tried to kill it, otherwise dumping will attempt to send a DBGC stop command. As this command will time out, we'll go into a reset loop. For now, fix this by setting the trans->state to say firmware isn't running before doing the reset handshake. In the longer term, we should clean up the way this state is handled. It's not entirely clear but it seems likely that this issue was introduced by my rework of the error handling, prior to that it would've been synchronous at that point and (I think) not have attempted to reset since it was already doing down. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219967 Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219968 Fixes: 7391b2a4f7db ("wifi: iwlwifi: rework firmware error handling") Reviewed-by: Miriam Rachel Korenblit Link: https://patch.msgid.link/20250411104054.63aa4f56894d.Ife70cfe997db03f0d07fdef2b164695739a05a63@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c index 3ece34e30d5806..472f26f83ba833 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c @@ -147,8 +147,14 @@ static void _iwl_trans_pcie_gen2_stop_device(struct iwl_trans *trans) return; if (trans->state >= IWL_TRANS_FW_STARTED && - trans_pcie->fw_reset_handshake) + trans_pcie->fw_reset_handshake) { + /* + * Reset handshake can dump firmware on timeout, but that + * should assume that the firmware is already dead. + */ + trans->state = IWL_TRANS_NO_FW; iwl_trans_pcie_fw_reset_handshake(trans); + } trans_pcie->is_down = true; From 47a742fd977a7a8c39fea890712e9bfdf76f98f1 Mon Sep 17 00:00:00 2001 From: Jan Stancek Date: Thu, 10 Apr 2025 17:05:42 +0200 Subject: [PATCH 0487/2924] fs: use namespace_{lock,unlock} in dissolve_on_fput() In commit b73ec10a4587 ("fs: add fastpath for dissolve_on_fput()"), the namespace_{lock,unlock} has been replaced with scoped_guard using the namespace_sem. This however now also skips processing of 'unmounted' list in namespace_unlock(), and mount is not (immediately) cleaned up. For example, this causes LTP move_mount02 fail: ... move_mount02.c:80: TPASS: invalid-from-fd: move_mount() failed as expected: EBADF (9) move_mount02.c:80: TPASS: invalid-from-path: move_mount() failed as expected: ENOENT (2) move_mount02.c:80: TPASS: invalid-to-fd: move_mount() failed as expected: EBADF (9) move_mount02.c:80: TPASS: invalid-to-path: move_mount() failed as expected: ENOENT (2) move_mount02.c:80: TPASS: invalid-flags: move_mount() failed as expected: EINVAL (22) tst_test.c:1833: TINFO: === Testing on ext3 === tst_test.c:1170: TINFO: Formatting /dev/loop0 with ext3 opts='' extra opts='' mke2fs 1.47.2 (1-Jan-2025) /dev/loop0 is apparently in use by the system; will not make a filesystem here! tst_test.c:1170: TBROK: mkfs.ext3 failed with exit code 1 The test makes number of move_mount() calls but these are all designed to fail with specific errno. Even after test, 'losetup -d' can't detach loop device. Define a new guard for dissolve_on_fput, that will use namespace_{lock,unlock}. Fixes: b73ec10a4587 ("fs: add fastpath for dissolve_on_fput()") Signed-off-by: Jan Stancek Link: https://lore.kernel.org/cad2f042b886bf0ced3d8e3aff120ec5e0125d61.1744297468.git.jstancek@redhat.com Signed-off-by: Christian Brauner --- fs/namespace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index 1eb76d6312de51..d9ca80dcc54450 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1830,6 +1830,8 @@ static inline void namespace_lock(void) down_write(&namespace_sem); } +DEFINE_GUARD(namespace_lock, struct rw_semaphore *, namespace_lock(), namespace_unlock()) + enum umount_tree_flags { UMOUNT_SYNC = 1, UMOUNT_PROPAGATE = 2, @@ -2383,7 +2385,7 @@ void dissolve_on_fput(struct vfsmount *mnt) return; } - scoped_guard(rwsem_write, &namespace_sem) { + scoped_guard(namespace_lock, &namespace_sem) { ns = m->mnt_ns; if (!must_dissolve(ns)) return; From 575fe08c221567cdbf63e078baecaeaed08a1d17 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 11 Apr 2025 12:21:05 +0200 Subject: [PATCH 0488/2924] wifi: iwlwifi: mld: Restart firmware on iwl_mld_no_wowlan_resume() error Commit 44605365f935 ("iwlwifi: mld: fix building with CONFIG_PM_SLEEP disabled") sought to fix build breakage, but inadvertently introduced a new issue: iwl_mld_mac80211_start() no longer calls iwl_mld_start_fw() after having called iwl_mld_stop_fw() in the error path of iwl_mld_no_wowlan_resume(). Fix it. Fixes: 44605365f935 ("iwlwifi: mld: fix building with CONFIG_PM_SLEEP disabled") Reported-by: Miri Korenblit Closes: https://lore.kernel.org/r/MW5PR11MB58106D6BC6403845C330C7AAA3A22@MW5PR11MB5810.namprd11.prod.outlook.com/ Signed-off-by: Lukas Wunner Acked-by: Miri Korenblit Acked-by: Arnd Bergmann Link: https://patch.msgid.link/d3ba1006a1b72ceb58c593fa62b9bd7c73e2e4ed.1744366815.git.lukas@wunner.de Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mld/mac80211.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c index 0b5bc5abb82da5..99e13cfd1e5feb 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c @@ -475,8 +475,8 @@ static int iwl_mld_mac80211_start(struct ieee80211_hw *hw) { struct iwl_mld *mld = IWL_MAC80211_GET_MLD(hw); - int ret; bool in_d3 = false; + int ret = 0; lockdep_assert_wiphy(mld->wiphy); @@ -501,7 +501,7 @@ int iwl_mld_mac80211_start(struct ieee80211_hw *hw) iwl_mld_restart_cleanup(mld); } - if (!in_d3) { + if (!in_d3 || ret) { ret = iwl_mld_start_fw(mld); if (ret) goto error; From 4e28f79e3dffa52d327b46d1a78dac16efb5810b Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Sun, 16 Mar 2025 13:26:54 +0300 Subject: [PATCH 0489/2924] usb: chipidea: ci_hdrc_imx: fix usbmisc handling usbmisc is an optional device property so it is totally valid for the corresponding data->usbmisc_data to have a NULL value. Check that before dereferencing the pointer. Found by Linux Verification Center (linuxtesting.org) with Svace static analysis tool. Fixes: 74adad500346 ("usb: chipidea: ci_hdrc_imx: decrement device's refcount in .remove() and in the error path of .probe()") Cc: stable Signed-off-by: Fedor Pchelkin Acked-by: Peter Chen Link: https://lore.kernel.org/r/20250316102658.490340-2-pchelkin@ispras.ru Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/ci_hdrc_imx.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index 1a7fc638213eba..619779eef33344 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -534,7 +534,8 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) cpu_latency_qos_remove_request(&data->pm_qos_req); data->ci_pdev = NULL; err_put: - put_device(data->usbmisc_data->dev); + if (data->usbmisc_data) + put_device(data->usbmisc_data->dev); return ret; } @@ -559,7 +560,8 @@ static void ci_hdrc_imx_remove(struct platform_device *pdev) if (data->hsic_pad_regulator) regulator_disable(data->hsic_pad_regulator); } - put_device(data->usbmisc_data->dev); + if (data->usbmisc_data) + put_device(data->usbmisc_data->dev); } static void ci_hdrc_imx_shutdown(struct platform_device *pdev) From 8cab0e9a3f3e8d700179e0d6141643d54a267fd5 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Sun, 16 Mar 2025 13:26:55 +0300 Subject: [PATCH 0490/2924] usb: chipidea: ci_hdrc_imx: fix call balance of regulator routines Upon encountering errors during the HSIC pinctrl handling section the regulator should be disabled. Use devm_add_action_or_reset() to let the regulator-disabling routine be handled by device resource management stack. Found by Linux Verification Center (linuxtesting.org). Fixes: 4d6141288c33 ("usb: chipidea: imx: pinctrl for HSIC is optional") Cc: stable Signed-off-by: Fedor Pchelkin Acked-by: Peter Chen Link: https://lore.kernel.org/r/20250316102658.490340-3-pchelkin@ispras.ru Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/ci_hdrc_imx.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index 619779eef33344..d942b3c72640eb 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -336,6 +336,13 @@ static int ci_hdrc_imx_notify_event(struct ci_hdrc *ci, unsigned int event) return ret; } +static void ci_hdrc_imx_disable_regulator(void *arg) +{ + struct ci_hdrc_imx_data *data = arg; + + regulator_disable(data->hsic_pad_regulator); +} + static int ci_hdrc_imx_probe(struct platform_device *pdev) { struct ci_hdrc_imx_data *data; @@ -394,6 +401,13 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) "Failed to enable HSIC pad regulator\n"); goto err_put; } + ret = devm_add_action_or_reset(dev, + ci_hdrc_imx_disable_regulator, data); + if (ret) { + dev_err(dev, + "Failed to add regulator devm action\n"); + goto err_put; + } } } @@ -432,11 +446,11 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) ret = imx_get_clks(dev); if (ret) - goto disable_hsic_regulator; + goto qos_remove_request; ret = imx_prepare_enable_clks(dev); if (ret) - goto disable_hsic_regulator; + goto qos_remove_request; ret = clk_prepare_enable(data->clk_wakeup); if (ret) @@ -526,10 +540,7 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) clk_disable_unprepare(data->clk_wakeup); err_wakeup_clk: imx_disable_unprepare_clks(dev); -disable_hsic_regulator: - if (data->hsic_pad_regulator) - /* don't overwrite original ret (cf. EPROBE_DEFER) */ - regulator_disable(data->hsic_pad_regulator); +qos_remove_request: if (pdata.flags & CI_HDRC_PMQOS) cpu_latency_qos_remove_request(&data->pm_qos_req); data->ci_pdev = NULL; @@ -557,8 +568,6 @@ static void ci_hdrc_imx_remove(struct platform_device *pdev) clk_disable_unprepare(data->clk_wakeup); if (data->plat_data->flags & CI_HDRC_PMQOS) cpu_latency_qos_remove_request(&data->pm_qos_req); - if (data->hsic_pad_regulator) - regulator_disable(data->hsic_pad_regulator); } if (data->usbmisc_data) put_device(data->usbmisc_data->dev); From 8c531e0a8c2d82509ad97c6d3a1e6217c7ed136d Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Sun, 16 Mar 2025 13:26:56 +0300 Subject: [PATCH 0491/2924] usb: chipidea: ci_hdrc_imx: implement usb_phy_init() error handling usb_phy_init() may return an error code if e.g. its implementation fails to prepare/enable some clocks. And properly rollback on probe error path by calling the counterpart usb_phy_shutdown(). Found by Linux Verification Center (linuxtesting.org). Fixes: be9cae2479f4 ("usb: chipidea: imx: Fix ULPI on imx53") Cc: stable Signed-off-by: Fedor Pchelkin Acked-by: Peter Chen Link: https://lore.kernel.org/r/20250316102658.490340-4-pchelkin@ispras.ru Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/ci_hdrc_imx.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index d942b3c72640eb..4f8bfd242b5953 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -484,7 +484,11 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) of_usb_get_phy_mode(np) == USBPHY_INTERFACE_MODE_ULPI) { pdata.flags |= CI_HDRC_OVERRIDE_PHY_CONTROL; data->override_phy_control = true; - usb_phy_init(pdata.usb_phy); + ret = usb_phy_init(pdata.usb_phy); + if (ret) { + dev_err(dev, "Failed to init phy\n"); + goto err_clk; + } } if (pdata.flags & CI_HDRC_SUPPORTS_RUNTIME_PM) @@ -493,7 +497,7 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) ret = imx_usbmisc_init(data->usbmisc_data); if (ret) { dev_err(dev, "usbmisc init failed, ret=%d\n", ret); - goto err_clk; + goto phy_shutdown; } data->ci_pdev = ci_hdrc_add_device(dev, @@ -502,7 +506,7 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) if (IS_ERR(data->ci_pdev)) { ret = PTR_ERR(data->ci_pdev); dev_err_probe(dev, ret, "ci_hdrc_add_device failed\n"); - goto err_clk; + goto phy_shutdown; } if (data->usbmisc_data) { @@ -536,6 +540,9 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) disable_device: ci_hdrc_remove_device(data->ci_pdev); +phy_shutdown: + if (data->override_phy_control) + usb_phy_shutdown(data->phy); err_clk: clk_disable_unprepare(data->clk_wakeup); err_wakeup_clk: From a1059896f2bfdcebcdc7153c3be2307ea319501f Mon Sep 17 00:00:00 2001 From: Ralph Siemsen Date: Tue, 18 Mar 2025 11:09:32 -0400 Subject: [PATCH 0492/2924] usb: cdns3: Fix deadlock when using NCM gadget The cdns3 driver has the same NCM deadlock as fixed in cdnsp by commit 58f2fcb3a845 ("usb: cdnsp: Fix deadlock issue during using NCM gadget"). Under PREEMPT_RT the deadlock can be readily triggered by heavy network traffic, for example using "iperf --bidir" over NCM ethernet link. The deadlock occurs because the threaded interrupt handler gets preempted by a softirq, but both are protected by the same spinlock. Prevent deadlock by disabling softirq during threaded irq handler. Cc: stable Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") Signed-off-by: Ralph Siemsen Acked-by: Peter Chen Reviewed-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20250318-rfs-cdns3-deadlock-v2-1-bfd9cfcee732@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdns3-gadget.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index 694aa14577390b..d9d8dc05b23599 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -1963,6 +1963,7 @@ static irqreturn_t cdns3_device_thread_irq_handler(int irq, void *data) unsigned int bit; unsigned long reg; + local_bh_disable(); spin_lock_irqsave(&priv_dev->lock, flags); reg = readl(&priv_dev->regs->usb_ists); @@ -2004,6 +2005,7 @@ static irqreturn_t cdns3_device_thread_irq_handler(int irq, void *data) irqend: writel(~0, &priv_dev->regs->ep_ien); spin_unlock_irqrestore(&priv_dev->lock, flags); + local_bh_enable(); return ret; } From 38d6e60b6f3a99f8f13bee22eab616136c2c0675 Mon Sep 17 00:00:00 2001 From: Mike Looijmans Date: Tue, 18 Mar 2025 07:44:52 +0100 Subject: [PATCH 0493/2924] usb: dwc3: xilinx: Prevent spike in reset signal The "reset" GPIO controls the RESET signal to an external, usually ULPI PHY, chip. The original code path acquires the signal in LOW state, and then immediately asserts it HIGH again, if the reset signal defaulted to asserted, there'd be a short "spike" before the reset. Here is what happens depending on the pre-existing state of the reset signal: Reset (previously asserted): ~~~|_|~~~~|_______ Reset (previously deasserted): _____|~~~~|_______ ^ ^ ^ A B C At point A, the low going transition is because the reset line is requested using GPIOD_OUT_LOW. If the line is successfully requested, the first thing we do is set it high _without_ any delay. This is point B. So, a glitch occurs between A and B. Requesting the line using GPIOD_OUT_HIGH eliminates the A and B transitions. Instead we get: Reset (previously asserted) : ~~~~~~~~~~|______ Reset (previously deasserted): ____|~~~~~|______ ^ ^ A C Where A and C are the points described above in the code. Point B has been eliminated. The issue was found during code inspection. Also remove the cryptic "toggle ulpi .." comment. Fixes: ca05b38252d7 ("usb: dwc3: xilinx: Add gpio-reset support") Cc: stable Signed-off-by: Mike Looijmans Reviewed-by: Radhey Shyam Pandey Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20250318064518.9320-1-mike.looijmans@topic.nl Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-xilinx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-xilinx.c b/drivers/usb/dwc3/dwc3-xilinx.c index a33a42ba0249ab..4ca7f6240d07df 100644 --- a/drivers/usb/dwc3/dwc3-xilinx.c +++ b/drivers/usb/dwc3/dwc3-xilinx.c @@ -207,15 +207,13 @@ static int dwc3_xlnx_init_zynqmp(struct dwc3_xlnx *priv_data) skip_usb3_phy: /* ulpi reset via gpio-modepin or gpio-framework driver */ - reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW); + reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); if (IS_ERR(reset_gpio)) { return dev_err_probe(dev, PTR_ERR(reset_gpio), "Failed to request reset GPIO\n"); } if (reset_gpio) { - /* Toggle ulpi to reset the phy. */ - gpiod_set_value_cansleep(reset_gpio, 1); usleep_range(5000, 10000); gpiod_set_value_cansleep(reset_gpio, 0); usleep_range(5000, 10000); From bcb60d438547355b8f9ad48645909139b64d3482 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 28 Mar 2025 12:00:59 +0800 Subject: [PATCH 0494/2924] USB: OHCI: Add quirk for LS7A OHCI controller (rev 0x02) The OHCI controller (rev 0x02) under LS7A PCI host has a hardware flaw. MMIO register with offset 0x60/0x64 is treated as legacy PS2-compatible keyboard/mouse interface, which confuse the OHCI controller. Since OHCI only use a 4KB BAR resource indeed, the LS7A OHCI controller's 32KB BAR is wrapped around (the second 4KB BAR space is the same as the first 4KB internally). So we can add an 4KB offset (0x1000) to the OHCI registers (from the PCI BAR resource) as a quirk. Cc: stable Suggested-by: Bjorn Helgaas Reviewed-by: Alan Stern Tested-by: Mingcong Bai Signed-off-by: Huacai Chen Link: https://lore.kernel.org/r/20250328040059.3672979-1-chenhuacai@loongson.cn Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ohci-pci.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/usb/host/ohci-pci.c b/drivers/usb/host/ohci-pci.c index 900ea0d368e034..9f0a6b27e47cb6 100644 --- a/drivers/usb/host/ohci-pci.c +++ b/drivers/usb/host/ohci-pci.c @@ -165,6 +165,25 @@ static int ohci_quirk_amd700(struct usb_hcd *hcd) return 0; } +static int ohci_quirk_loongson(struct usb_hcd *hcd) +{ + struct pci_dev *pdev = to_pci_dev(hcd->self.controller); + + /* + * Loongson's LS7A OHCI controller (rev 0x02) has a + * flaw. MMIO register with offset 0x60/64 is treated + * as legacy PS2-compatible keyboard/mouse interface. + * Since OHCI only use 4KB BAR resource, LS7A OHCI's + * 32KB BAR is wrapped around (the 2nd 4KB BAR space + * is the same as the 1st 4KB internally). So add 4KB + * offset (0x1000) to the OHCI registers as a quirk. + */ + if (pdev->revision == 0x2) + hcd->regs += SZ_4K; /* SZ_4K = 0x1000 */ + + return 0; +} + static int ohci_quirk_qemu(struct usb_hcd *hcd) { struct ohci_hcd *ohci = hcd_to_ohci(hcd); @@ -224,6 +243,10 @@ static const struct pci_device_id ohci_pci_quirks[] = { PCI_DEVICE(PCI_VENDOR_ID_ATI, 0x4399), .driver_data = (unsigned long)ohci_quirk_amd700, }, + { + PCI_DEVICE(PCI_VENDOR_ID_LOONGSON, 0x7a24), + .driver_data = (unsigned long)ohci_quirk_loongson, + }, { .vendor = PCI_VENDOR_ID_APPLE, .device = 0x003f, From 2932b6b547ec36ad2ed60fbf2117c0e46bb7d40a Mon Sep 17 00:00:00 2001 From: Miao Li Date: Tue, 1 Apr 2025 10:30:27 +0800 Subject: [PATCH 0495/2924] usb: quirks: add DELAY_INIT quirk for Silicon Motion Flash Drive Silicon Motion Flash Drive connects to Huawei hisi platforms and performs a system reboot test for two thousand circles, it will randomly work incorrectly on boot, set DELAY_INIT quirk can workaround this issue. Signed-off-by: Miao Li Cc: stable Link: https://lore.kernel.org/r/20250401023027.44894-1-limiao870622@163.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 8efbacc5bc3411..61583a1b7ac65a 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -383,6 +383,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0904, 0x6103), .driver_info = USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL }, + /* Silicon Motion Flash Drive */ + { USB_DEVICE(0x090c, 0x1000), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Sound Devices USBPre2 */ { USB_DEVICE(0x0926, 0x0202), .driver_info = USB_QUIRK_ENDPOINT_IGNORE }, From 0937cb5f345c79d702b4d0d744e2a2529b551cb2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 11 Apr 2025 16:13:34 +0200 Subject: [PATCH 0496/2924] Revert "wifi: mac80211: Update skb's control block key in ieee80211_tx_dequeue()" This reverts commit a104042e2bf6528199adb6ca901efe7b60c2c27f. Since the original bug seems to have been around for years, but a new issue was report with the fix, revert the fix for now. We have a couple of weeks to figure it out for this release, if needed. Reported-by: Bert Karwatzki Closes: https://lore.kernel.org/linux-wireless/20250410215527.3001-1-spasswolf@web.de Fixes: a104042e2bf6 ("wifi: mac80211: Update skb's control block key in ieee80211_tx_dequeue()") Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 34f229a6eab01d..20179db88c4a6c 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3894,7 +3894,6 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, * The key can be removed while the packet was queued, so need to call * this here to get the current key. */ - info->control.hw_key = NULL; r = ieee80211_tx_h_select_key(&tx); if (r != TX_CONTINUE) { ieee80211_free_txskb(&local->hw, skb); From 9ab75eee1a056f896b87d139044dd103adc532b9 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 3 Apr 2025 19:59:45 +0200 Subject: [PATCH 0497/2924] USB: storage: quirk for ADATA Portable HDD CH94 Version 1.60 specifically needs this quirk. Version 2.00 is known good. Cc: stable Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20250403180004.343133-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_uas.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index 1f8c9b16a0fb85..d460d71b425783 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -83,6 +83,13 @@ UNUSUAL_DEV(0x0bc2, 0x331a, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_REPORT_LUNS), +/* Reported-by: Oliver Neukum */ +UNUSUAL_DEV(0x125f, 0xa94a, 0x0160, 0x0160, + "ADATA", + "Portable HDD CH94", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_ATA_1X), + /* Reported-by: Benjamin Tissoires */ UNUSUAL_DEV(0x13fd, 0x3940, 0x0000, 0x9999, "Initio Corporation", From 63ccd26cd1f6600421795f6ca3e625076be06c9f Mon Sep 17 00:00:00 2001 From: Frode Isaksen Date: Thu, 3 Apr 2025 09:28:03 +0200 Subject: [PATCH 0498/2924] usb: dwc3: gadget: check that event count does not exceed event buffer length The event count is read from register DWC3_GEVNTCOUNT. There is a check for the count being zero, but not for exceeding the event buffer length. Check that event count does not exceed event buffer length, avoiding an out-of-bounds access when memcpy'ing the event. Crash log: Unable to handle kernel paging request at virtual address ffffffc0129be000 pc : __memcpy+0x114/0x180 lr : dwc3_check_event_buf+0xec/0x348 x3 : 0000000000000030 x2 : 000000000000dfc4 x1 : ffffffc0129be000 x0 : ffffff87aad60080 Call trace: __memcpy+0x114/0x180 dwc3_interrupt+0x24/0x34 Signed-off-by: Frode Isaksen Fixes: 72246da40f37 ("usb: Introduce DesignWare USB3 DRD Driver") Cc: stable Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20250403072907.448524-1-fisaksen@baylibre.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 47e73c4ed62d3e..8c30d86cc4e3a0 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -4617,6 +4617,12 @@ static irqreturn_t dwc3_check_event_buf(struct dwc3_event_buffer *evt) if (!count) return IRQ_NONE; + if (count > evt->length) { + dev_err_ratelimited(dwc->dev, "invalid count(%u) > evt->length(%u)\n", + count, evt->length); + return IRQ_NONE; + } + evt->count = count; evt->flags |= DWC3_EVENT_PENDING; From e00b39a4f3552c730f1e24c8d62c4a8c6aad4e5d Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 8 Apr 2025 15:57:46 +0200 Subject: [PATCH 0499/2924] USB: VLI disk crashes if LPM is used This device needs the NO_LPM quirk. Cc: stable Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20250408135800.792515-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 61583a1b7ac65a..87b48c17616051 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -542,6 +542,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x2040, 0x7200), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, + /* VLI disk */ + { USB_DEVICE(0x2109, 0x0711), .driver_info = USB_QUIRK_NO_LPM }, + /* Raydium Touchscreen */ { USB_DEVICE(0x2386, 0x3114), .driver_info = USB_QUIRK_NO_LPM }, From 9697f5efcf5fdea65b8390b5eb81bebe746ceedc Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 1 Apr 2025 10:45:38 +0200 Subject: [PATCH 0500/2924] USB: wdm: handle IO errors in wdm_wwan_port_start In case submitting the URB fails we must undo what we've done so far. Fixes: cac6fb015f71 ("usb: class: cdc-wdm: WWAN framework integration") Cc: stable Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20250401084749.175246-2-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-wdm.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 86ee39db013f39..eb86d1c9b4a448 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -829,6 +829,7 @@ static struct usb_class_driver wdm_class = { static int wdm_wwan_port_start(struct wwan_port *port) { struct wdm_device *desc = wwan_port_get_drvdata(port); + int rv; /* The interface is both exposed via the WWAN framework and as a * legacy usbmisc chardev. If chardev is already open, just fail @@ -848,7 +849,15 @@ static int wdm_wwan_port_start(struct wwan_port *port) wwan_port_txon(port); /* Start getting events */ - return usb_submit_urb(desc->validity, GFP_KERNEL); + rv = usb_submit_urb(desc->validity, GFP_KERNEL); + if (rv < 0) { + wwan_port_txoff(port); + desc->manage_power(desc->intf, 0); + /* this must be last lest we race with chardev open */ + clear_bit(WDM_WWAN_IN_USE, &desc->flags); + } + + return rv; } static void wdm_wwan_port_stop(struct wwan_port *port) From c1846ed4eb527bdfe6b3b7dd2c78e2af4bf98f4f Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 1 Apr 2025 10:45:39 +0200 Subject: [PATCH 0501/2924] USB: wdm: close race between wdm_open and wdm_wwan_port_stop Clearing WDM_WWAN_IN_USE must be the last action or we can open a chardev whose URBs are still poisoned Fixes: cac6fb015f71 ("usb: class: cdc-wdm: WWAN framework integration") Cc: stable Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20250401084749.175246-3-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-wdm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index eb86d1c9b4a448..9c686751ddc10e 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -726,7 +726,7 @@ static int wdm_open(struct inode *inode, struct file *file) rv = -EBUSY; goto out; } - + smp_rmb(); /* ordered against wdm_wwan_port_stop() */ rv = usb_autopm_get_interface(desc->intf); if (rv < 0) { dev_err(&desc->intf->dev, "Error autopm - %d\n", rv); @@ -868,8 +868,10 @@ static void wdm_wwan_port_stop(struct wwan_port *port) poison_urbs(desc); desc->manage_power(desc->intf, 0); clear_bit(WDM_READ, &desc->flags); - clear_bit(WDM_WWAN_IN_USE, &desc->flags); unpoison_urbs(desc); + smp_wmb(); /* ordered against wdm_open() */ + /* this must be last lest we open a poisoned device */ + clear_bit(WDM_WWAN_IN_USE, &desc->flags); } static void wdm_wwan_port_tx_complete(struct urb *urb) From 1fdc4dca350c0b8ada0b8ebf212504e1ad55e511 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 1 Apr 2025 10:45:40 +0200 Subject: [PATCH 0502/2924] USB: wdm: wdm_wwan_port_tx_complete mutex in atomic context wdm_wwan_port_tx_complete is called from a completion handler with irqs disabled and possible in IRQ context usb_autopm_put_interface can take a mutex. Hence usb_autopm_put_interface_async must be used. Fixes: cac6fb015f71 ("usb: class: cdc-wdm: WWAN framework integration") Cc: stable Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20250401084749.175246-4-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-wdm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 9c686751ddc10e..fc34a54126904d 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -879,7 +879,7 @@ static void wdm_wwan_port_tx_complete(struct urb *urb) struct sk_buff *skb = urb->context; struct wdm_device *desc = skb_shinfo(skb)->destructor_arg; - usb_autopm_put_interface(desc->intf); + usb_autopm_put_interface_async(desc->intf); wwan_port_txon(desc->wwanp); kfree_skb(skb); } From 73e9cc1ffd3650b12c4eb059dfdafd56e725ceda Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 1 Apr 2025 10:45:41 +0200 Subject: [PATCH 0503/2924] USB: wdm: add annotation This is not understandable without a comment on endianness Fixes: afba937e540c9 ("USB: CDC WDM driver") Cc: stable Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20250401084749.175246-5-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-wdm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index fc34a54126904d..16e7fa4d488d37 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -909,7 +909,7 @@ static int wdm_wwan_port_tx(struct wwan_port *port, struct sk_buff *skb) req->bRequestType = (USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE); req->bRequest = USB_CDC_SEND_ENCAPSULATED_COMMAND; req->wValue = 0; - req->wIndex = desc->inum; + req->wIndex = desc->inum; /* already converted */ req->wLength = cpu_to_le16(skb->len); skb_shinfo(skb)->destructor_arg = desc; From f57edca8c1e6148e6221c3abca4efce2c6eed2cb Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 8 Apr 2025 06:09:15 +0000 Subject: [PATCH 0504/2924] dt-bindings: timer: renesas,tpu: remove obsolete binding Commit 1c4b5ecb7ea1 ("remove the h8300 architecture") removed Renesas TPU timer driver. Let's remove its binding. Signed-off-by: Kuninori Morimoto Cc: Geert Uytterhoeven Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/877c3vnq0k.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Rob Herring (Arm) --- .../bindings/pwm/renesas,tpu-pwm.yaml | 9 --- .../bindings/timer/renesas,tpu.yaml | 56 ------------------- 2 files changed, 65 deletions(-) delete mode 100644 Documentation/devicetree/bindings/timer/renesas,tpu.yaml diff --git a/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.yaml b/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.yaml index a4dfa09344dd72..f85ee5d20ccbb3 100644 --- a/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.yaml +++ b/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.yaml @@ -9,15 +9,6 @@ title: Renesas R-Car Timer Pulse Unit PWM Controller maintainers: - Laurent Pinchart -select: - properties: - compatible: - contains: - const: renesas,tpu - required: - - compatible - - '#pwm-cells' - properties: compatible: items: diff --git a/Documentation/devicetree/bindings/timer/renesas,tpu.yaml b/Documentation/devicetree/bindings/timer/renesas,tpu.yaml deleted file mode 100644 index 7a473b30277515..00000000000000 --- a/Documentation/devicetree/bindings/timer/renesas,tpu.yaml +++ /dev/null @@ -1,56 +0,0 @@ -# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -%YAML 1.2 ---- -$id: http://devicetree.org/schemas/timer/renesas,tpu.yaml# -$schema: http://devicetree.org/meta-schemas/core.yaml# - -title: Renesas H8/300 Timer Pulse Unit - -maintainers: - - Yoshinori Sato - -description: - The TPU is a 16bit timer/counter with configurable clock inputs and - programmable compare match. - This implementation supports only cascade mode. - -select: - properties: - compatible: - contains: - const: renesas,tpu - '#pwm-cells': false - required: - - compatible - -properties: - compatible: - const: renesas,tpu - - reg: - items: - - description: First channel - - description: Second channel - - clocks: - maxItems: 1 - - clock-names: - const: fck - -required: - - compatible - - reg - - clocks - - clock-names - -additionalProperties: false - -examples: - - | - tpu: tpu@ffffe0 { - compatible = "renesas,tpu"; - reg = <0xffffe0 16>, <0xfffff0 12>; - clocks = <&pclk>; - clock-names = "fck"; - }; From 7094832b5ac861b0bd7ed8866c93cb15ef619996 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Tue, 8 Apr 2025 19:22:47 +0200 Subject: [PATCH 0505/2924] serial: msm: Configure correct working mode before starting earlycon The MSM UART DM controller supports different working modes, e.g. DMA or the "single-character mode", where all reads/writes operate on a single character rather than 4 chars (32-bit) at once. When using earlycon, __msm_console_write() always writes 4 characters at a time, but we don't know which mode the bootloader was using and we don't set the mode either. This causes garbled output if the bootloader was using the single-character mode, because only every 4th character appears in the serial console, e.g. "[ 00oni pi 000xf0[ 00i s 5rm9(l)l s 1 1 SPMTA 7:C 5[ 00A ade k d[ 00ano:ameoi .Q1B[ 00ac _idaM00080oo'" If the bootloader was using the DMA ("DM") mode, output would likely fail entirely. Later, when the full serial driver probes, the port is re-initialized and output works as expected. Fix this also for earlycon by clearing the DMEN register and reset+re-enable the transmitter to apply the change. This ensures the transmitter is in the expected state before writing any output. Cc: stable Fixes: 0efe72963409 ("tty: serial: msm: Add earlycon support") Signed-off-by: Stephan Gerhold Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250408-msm-serial-earlycon-v1-1-429080127530@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/msm_serial.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/tty/serial/msm_serial.c b/drivers/tty/serial/msm_serial.c index 1b137e06844425..3449945493ceb4 100644 --- a/drivers/tty/serial/msm_serial.c +++ b/drivers/tty/serial/msm_serial.c @@ -1746,6 +1746,12 @@ msm_serial_early_console_setup_dm(struct earlycon_device *device, if (!device->port.membase) return -ENODEV; + /* Disable DM / single-character modes */ + msm_write(&device->port, 0, UARTDM_DMEN); + msm_write(&device->port, MSM_UART_CR_CMD_RESET_RX, MSM_UART_CR); + msm_write(&device->port, MSM_UART_CR_CMD_RESET_TX, MSM_UART_CR); + msm_write(&device->port, MSM_UART_CR_TX_ENABLE, MSM_UART_CR); + device->con->write = msm_serial_early_write_dm; return 0; } From ee6a44da3c87cf64d67dd02be8c0127a5bf56175 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Noack?= Date: Fri, 11 Apr 2025 09:01:45 +0200 Subject: [PATCH 0506/2924] tty: Require CAP_SYS_ADMIN for all usages of TIOCL_SELMOUSEREPORT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This requirement was overeagerly loosened in commit 2f83e38a095f ("tty: Permit some TIOCL_SETSEL modes without CAP_SYS_ADMIN"), but as it turns out, (1) the logic I implemented there was inconsistent (apologies!), (2) TIOCL_SELMOUSEREPORT might actually be a small security risk after all, and (3) TIOCL_SELMOUSEREPORT is only meant to be used by the mouse daemon (GPM or Consolation), which runs as CAP_SYS_ADMIN already. In more detail: 1. The previous patch has inconsistent logic: In commit 2f83e38a095f ("tty: Permit some TIOCL_SETSEL modes without CAP_SYS_ADMIN"), we checked for sel_mode == TIOCL_SELMOUSEREPORT, but overlooked that the lower four bits of this "mode" parameter were actually used as an additional way to pass an argument. So the patch did actually still require CAP_SYS_ADMIN, if any of the mouse button bits are set, but did not require it if none of the mouse buttons bits are set. This logic is inconsistent and was not intentional. We should have the same policies for using TIOCL_SELMOUSEREPORT independent of the value of the "hidden" mouse button argument. I sent a separate documentation patch to the man page list with more details on TIOCL_SELMOUSEREPORT: https://lore.kernel.org/all/20250223091342.35523-2-gnoack3000@gmail.com/ 2. TIOCL_SELMOUSEREPORT is indeed a potential security risk which can let an attacker simulate "keyboard" input to command line applications on the same terminal, like TIOCSTI and some other TIOCLINUX "selection mode" IOCTLs. By enabling mouse reporting on a terminal and then injecting mouse reports through TIOCL_SELMOUSEREPORT, an attacker can simulate mouse movements on the same terminal, similar to the TIOCSTI keystroke injection attacks that were previously possible with TIOCSTI and other TIOCL_SETSEL selection modes. Many programs (including libreadline/bash) are then prone to misinterpret these mouse reports as normal keyboard input because they do not expect input in the X11 mouse protocol form. The attacker does not have complete control over the escape sequence, but they can at least control the values of two consecutive bytes in the binary mouse reporting escape sequence. I went into more detail on that in the discussion at https://lore.kernel.org/all/20250221.0a947528d8f3@gnoack.org/ It is not equally trivial to simulate arbitrary keystrokes as it was with TIOCSTI (commit 83efeeeb3d04 ("tty: Allow TIOCSTI to be disabled")), but the general mechanism is there, and together with the small number of existing legit use cases (see below), it would be better to revert back to requiring CAP_SYS_ADMIN for TIOCL_SELMOUSEREPORT, as it was already the case before commit 2f83e38a095f ("tty: Permit some TIOCL_SETSEL modes without CAP_SYS_ADMIN"). 3. TIOCL_SELMOUSEREPORT is only used by the mouse daemons (GPM or Consolation), and they are the only legit use case: To quote console_codes(4): The mouse tracking facility is intended to return xterm(1)-compatible mouse status reports. Because the console driver has no way to know the device or type of the mouse, these reports are returned in the console input stream only when the virtual terminal driver receives a mouse update ioctl. These ioctls must be generated by a mouse-aware user-mode application such as the gpm(8) daemon. Jared Finder has also confirmed in https://lore.kernel.org/all/491f3df9de6593df8e70dbe77614b026@finder.org/ that Emacs does not call TIOCL_SELMOUSEREPORT directly, and it would be difficult to find good reasons for doing that, given that it would interfere with the reports that GPM is sending. More information on the interaction between GPM, terminals and the kernel with additional pointers is also available in this patch: https://lore.kernel.org/all/a773e48920aa104a65073671effbdee665c105fc.1603963593.git.tammo.block@gmail.com/ For background on who else uses TIOCL_SELMOUSEREPORT: Debian Code search finds one page of results, the only two known callers are the two mouse daemons GPM and Consolation. (GPM does not show up in the search results because it uses literal numbers to refer to TIOCLINUX-related enums. I looked through GPM by hand instead. TIOCL_SELMOUSEREPORT is also not used from libgpm.) https://codesearch.debian.net/search?q=TIOCL_SELMOUSEREPORT Cc: Jared Finder Cc: Jann Horn Cc: Hanno Böck Cc: Jiri Slaby Cc: Kees Cook Cc: stable Fixes: 2f83e38a095f ("tty: Permit some TIOCL_SETSEL modes without CAP_SYS_ADMIN") Signed-off-by: Günther Noack Link: https://lore.kernel.org/r/20250411070144.3959-2-gnoack3000@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/selection.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index 0bd6544e30a6b3..791e2f1f7c0b65 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -193,13 +193,12 @@ int set_selection_user(const struct tiocl_selection __user *sel, return -EFAULT; /* - * TIOCL_SELCLEAR, TIOCL_SELPOINTER and TIOCL_SELMOUSEREPORT are OK to - * use without CAP_SYS_ADMIN as they do not modify the selection. + * TIOCL_SELCLEAR and TIOCL_SELPOINTER are OK to use without + * CAP_SYS_ADMIN as they do not modify the selection. */ switch (v.sel_mode) { case TIOCL_SELCLEAR: case TIOCL_SELPOINTER: - case TIOCL_SELMOUSEREPORT: break; default: if (!capable(CAP_SYS_ADMIN)) From 8e404ad95d2c10c261e2ef6992c7c12dde03df0e Mon Sep 17 00:00:00 2001 From: Christopher S M Hall Date: Tue, 1 Apr 2025 16:35:29 -0700 Subject: [PATCH 0507/2924] igc: fix PTM cycle trigger logic Writing to clear the PTM status 'valid' bit while the PTM cycle is triggered results in unreliable PTM operation. To fix this, clear the PTM 'trigger' and status after each PTM transaction. The issue can be reproduced with the following: $ sudo phc2sys -R 1000 -O 0 -i tsn0 -m Note: 1000 Hz (-R 1000) is unrealistically large, but provides a way to quickly reproduce the issue. PHC2SYS exits with: "ioctl PTP_OFFSET_PRECISE: Connection timed out" when the PTM transaction fails This patch also fixes a hang in igc_probe() when loading the igc driver in the kdump kernel on systems supporting PTM. The igc driver running in the base kernel enables PTM trigger in igc_probe(). Therefore the driver is always in PTM trigger mode, except in brief periods when manually triggering a PTM cycle. When a crash occurs, the NIC is reset while PTM trigger is enabled. Due to a hardware problem, the NIC is subsequently in a bad busmaster state and doesn't handle register reads/writes. When running igc_probe() in the kdump kernel, the first register access to a NIC register hangs driver probing and ultimately breaks kdump. With this patch, igc has PTM trigger disabled most of the time, and the trigger is only enabled for very brief (10 - 100 us) periods when manually triggering a PTM cycle. Chances that a crash occurs during a PTM trigger are not 0, but extremely reduced. Fixes: a90ec8483732 ("igc: Add support for PTP getcrosststamp()") Reviewed-by: Michal Swiatkowski Tested-by: Mor Bar-Gabay Tested-by: Avigail Dahan Signed-off-by: Christopher S M Hall Reviewed-by: Corinna Vinschen Signed-off-by: Jacob Keller Tested-by: Corinna Vinschen Acked-by: Vinicius Costa Gomes Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_defines.h | 1 + drivers/net/ethernet/intel/igc/igc_ptp.c | 70 ++++++++++++-------- 2 files changed, 42 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 8e449904aa7dbd..2ff292f5f63be2 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -593,6 +593,7 @@ #define IGC_PTM_STAT_T4M1_OVFL BIT(3) /* T4 minus T1 overflow */ #define IGC_PTM_STAT_ADJUST_1ST BIT(4) /* 1588 timer adjusted during 1st PTM cycle */ #define IGC_PTM_STAT_ADJUST_CYC BIT(5) /* 1588 timer adjusted during non-1st PTM cycle */ +#define IGC_PTM_STAT_ALL GENMASK(5, 0) /* Used to clear all status */ /* PCIe PTM Cycle Control */ #define IGC_PTM_CYCLE_CTRL_CYC_TIME(msec) ((msec) & 0x3ff) /* PTM Cycle Time (msec) */ diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 946edbad43022c..c640e346342be8 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -974,13 +974,40 @@ static void igc_ptm_log_error(struct igc_adapter *adapter, u32 ptm_stat) } } +static void igc_ptm_trigger(struct igc_hw *hw) +{ + u32 ctrl; + + /* To "manually" start the PTM cycle we need to set the + * trigger (TRIG) bit + */ + ctrl = rd32(IGC_PTM_CTRL); + ctrl |= IGC_PTM_CTRL_TRIG; + wr32(IGC_PTM_CTRL, ctrl); + /* Perform flush after write to CTRL register otherwise + * transaction may not start + */ + wrfl(); +} + +static void igc_ptm_reset(struct igc_hw *hw) +{ + u32 ctrl; + + ctrl = rd32(IGC_PTM_CTRL); + ctrl &= ~IGC_PTM_CTRL_TRIG; + wr32(IGC_PTM_CTRL, ctrl); + /* Write to clear all status */ + wr32(IGC_PTM_STAT, IGC_PTM_STAT_ALL); +} + static int igc_phc_get_syncdevicetime(ktime_t *device, struct system_counterval_t *system, void *ctx) { - u32 stat, t2_curr_h, t2_curr_l, ctrl; struct igc_adapter *adapter = ctx; struct igc_hw *hw = &adapter->hw; + u32 stat, t2_curr_h, t2_curr_l; int err, count = 100; ktime_t t1, t2_curr; @@ -994,25 +1021,13 @@ static int igc_phc_get_syncdevicetime(ktime_t *device, * are transitory. Repeating the process returns valid * data eventually. */ - - /* To "manually" start the PTM cycle we need to clear and - * then set again the TRIG bit. - */ - ctrl = rd32(IGC_PTM_CTRL); - ctrl &= ~IGC_PTM_CTRL_TRIG; - wr32(IGC_PTM_CTRL, ctrl); - ctrl |= IGC_PTM_CTRL_TRIG; - wr32(IGC_PTM_CTRL, ctrl); - - /* The cycle only starts "for real" when software notifies - * that it has read the registers, this is done by setting - * VALID bit. - */ - wr32(IGC_PTM_STAT, IGC_PTM_STAT_VALID); + igc_ptm_trigger(hw); err = readx_poll_timeout(rd32, IGC_PTM_STAT, stat, stat, IGC_PTM_STAT_SLEEP, IGC_PTM_STAT_TIMEOUT); + igc_ptm_reset(hw); + if (err < 0) { netdev_err(adapter->netdev, "Timeout reading IGC_PTM_STAT register\n"); return err; @@ -1021,15 +1036,7 @@ static int igc_phc_get_syncdevicetime(ktime_t *device, if ((stat & IGC_PTM_STAT_VALID) == IGC_PTM_STAT_VALID) break; - if (stat & ~IGC_PTM_STAT_VALID) { - /* An error occurred, log it. */ - igc_ptm_log_error(adapter, stat); - /* The STAT register is write-1-to-clear (W1C), - * so write the previous error status to clear it. - */ - wr32(IGC_PTM_STAT, stat); - continue; - } + igc_ptm_log_error(adapter, stat); } while (--count); if (!count) { @@ -1255,7 +1262,7 @@ void igc_ptp_stop(struct igc_adapter *adapter) void igc_ptp_reset(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; - u32 cycle_ctrl, ctrl; + u32 cycle_ctrl, ctrl, stat; unsigned long flags; u32 timadj; @@ -1290,14 +1297,19 @@ void igc_ptp_reset(struct igc_adapter *adapter) ctrl = IGC_PTM_CTRL_EN | IGC_PTM_CTRL_START_NOW | IGC_PTM_CTRL_SHRT_CYC(IGC_PTM_SHORT_CYC_DEFAULT) | - IGC_PTM_CTRL_PTM_TO(IGC_PTM_TIMEOUT_DEFAULT) | - IGC_PTM_CTRL_TRIG; + IGC_PTM_CTRL_PTM_TO(IGC_PTM_TIMEOUT_DEFAULT); wr32(IGC_PTM_CTRL, ctrl); /* Force the first cycle to run. */ - wr32(IGC_PTM_STAT, IGC_PTM_STAT_VALID); + igc_ptm_trigger(hw); + + if (readx_poll_timeout_atomic(rd32, IGC_PTM_STAT, stat, + stat, IGC_PTM_STAT_SLEEP, + IGC_PTM_STAT_TIMEOUT)) + netdev_err(adapter->netdev, "Timeout reading IGC_PTM_STAT register\n"); + igc_ptm_reset(hw); break; default: /* No work to do. */ From 714cd033da6fea4cf54a11b3cfd070afde3f31df Mon Sep 17 00:00:00 2001 From: Christopher S M Hall Date: Tue, 1 Apr 2025 16:35:30 -0700 Subject: [PATCH 0508/2924] igc: increase wait time before retrying PTM The i225/i226 hardware retries if it receives an inappropriate response from the upstream device. If the device retries too quickly, the root port does not respond. The wait between attempts was reduced from 10us to 1us in commit 6b8aa753a9f9 ("igc: Decrease PTM short interval from 10 us to 1 us"), which said: With the 10us interval, we were seeing PTM transactions take around 12us. Hardware team suggested this interval could be lowered to 1us which was confirmed with PCIe sniffer. With the 1us interval, PTM dialogs took around 2us. While a 1us short cycle time was thought to be theoretically sufficient, it turns out in practice it is not quite long enough. It is unclear if the problem is in the root port or an issue in i225/i226. Increase the wait from 1us to 4us. Increasing to 2us appeared to work in practice on the setups we have available. A value of 4us was chosen due to the limited hardware available for testing, with a goal of ensuring we wait long enough without overly penalizing the response time when unnecessary. The issue can be reproduced with the following: $ sudo phc2sys -R 1000 -O 0 -i tsn0 -m Note: 1000 Hz (-R 1000) is unrealistically large, but provides a way to quickly reproduce the issue. PHC2SYS exits with: "ioctl PTP_OFFSET_PRECISE: Connection timed out" when the PTM transaction fails Fixes: 6b8aa753a9f9 ("igc: Decrease PTM short interval from 10 us to 1 us") Reviewed-by: Michal Swiatkowski Tested-by: Mor Bar-Gabay Tested-by: Avigail Dahan Signed-off-by: Christopher S M Hall Reviewed-by: Corinna Vinschen Signed-off-by: Jacob Keller Acked-by: Vinicius Costa Gomes Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_defines.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 2ff292f5f63be2..d19325b0e6e0ba 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -574,7 +574,10 @@ #define IGC_PTM_CTRL_SHRT_CYC(usec) (((usec) & 0x3f) << 2) #define IGC_PTM_CTRL_PTM_TO(usec) (((usec) & 0xff) << 8) -#define IGC_PTM_SHORT_CYC_DEFAULT 1 /* Default short cycle interval */ +/* A short cycle time of 1us theoretically should work, but appears to be too + * short in practice. + */ +#define IGC_PTM_SHORT_CYC_DEFAULT 4 /* Default short cycle interval */ #define IGC_PTM_CYC_TIME_DEFAULT 5 /* Default PTM cycle time */ #define IGC_PTM_TIMEOUT_DEFAULT 255 /* Default timeout for PTM errors */ From cd7f7328d691937102732f39f97ead35b15bf803 Mon Sep 17 00:00:00 2001 From: Christopher S M Hall Date: Tue, 1 Apr 2025 16:35:31 -0700 Subject: [PATCH 0509/2924] igc: move ktime snapshot into PTM retry loop Move ktime_get_snapshot() into the loop. If a retry does occur, a more recent snapshot will result in a more accurate cross-timestamp. Fixes: a90ec8483732 ("igc: Add support for PTP getcrosststamp()") Reviewed-by: Michal Swiatkowski Tested-by: Mor Bar-Gabay Tested-by: Avigail Dahan Signed-off-by: Christopher S M Hall Reviewed-by: Corinna Vinschen Signed-off-by: Jacob Keller Acked-by: Vinicius Costa Gomes Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ptp.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index c640e346342be8..516abe7405deee 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -1011,16 +1011,16 @@ static int igc_phc_get_syncdevicetime(ktime_t *device, int err, count = 100; ktime_t t1, t2_curr; - /* Get a snapshot of system clocks to use as historic value. */ - ktime_get_snapshot(&adapter->snapshot); - + /* Doing this in a loop because in the event of a + * badly timed (ha!) system clock adjustment, we may + * get PTM errors from the PCI root, but these errors + * are transitory. Repeating the process returns valid + * data eventually. + */ do { - /* Doing this in a loop because in the event of a - * badly timed (ha!) system clock adjustment, we may - * get PTM errors from the PCI root, but these errors - * are transitory. Repeating the process returns valid - * data eventually. - */ + /* Get a snapshot of system clocks to use as historic value. */ + ktime_get_snapshot(&adapter->snapshot); + igc_ptm_trigger(hw); err = readx_poll_timeout(rd32, IGC_PTM_STAT, stat, From 26a3910afd111f7c1a96dace6dc02f3225063896 Mon Sep 17 00:00:00 2001 From: Christopher S M Hall Date: Tue, 1 Apr 2025 16:35:32 -0700 Subject: [PATCH 0510/2924] igc: handle the IGC_PTP_ENABLED flag correctly All functions in igc_ptp.c called from igc_main.c should check the IGC_PTP_ENABLED flag. Adding check for this flag to stop and reset functions. Fixes: 5f2958052c58 ("igc: Add basic skeleton for PTP") Signed-off-by: Christopher S M Hall Reviewed-by: Corinna Vinschen Signed-off-by: Jacob Keller Tested-by: Mor Bar-Gabay Acked-by: Vinicius Costa Gomes Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ptp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 516abe7405deee..343205bffc3550 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -1244,8 +1244,12 @@ void igc_ptp_suspend(struct igc_adapter *adapter) **/ void igc_ptp_stop(struct igc_adapter *adapter) { + if (!(adapter->ptp_flags & IGC_PTP_ENABLED)) + return; + igc_ptp_suspend(adapter); + adapter->ptp_flags &= ~IGC_PTP_ENABLED; if (adapter->ptp_clock) { ptp_clock_unregister(adapter->ptp_clock); netdev_info(adapter->netdev, "PHC removed\n"); @@ -1266,6 +1270,9 @@ void igc_ptp_reset(struct igc_adapter *adapter) unsigned long flags; u32 timadj; + if (!(adapter->ptp_flags & IGC_PTP_ENABLED)) + return; + /* reset the tstamp_config */ igc_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config); From 1f025759ba394dd53e434d2668cb0597886d9b69 Mon Sep 17 00:00:00 2001 From: Christopher S M Hall Date: Tue, 1 Apr 2025 16:35:33 -0700 Subject: [PATCH 0511/2924] igc: cleanup PTP module if probe fails Make sure that the PTP module is cleaned up if the igc_probe() fails by calling igc_ptp_stop() on exit. Fixes: d89f88419f99 ("igc: Add skeletal frame for Intel(R) 2.5G Ethernet Controller support") Signed-off-by: Christopher S M Hall Reviewed-by: Corinna Vinschen Signed-off-by: Jacob Keller Tested-by: Mor Bar-Gabay Acked-by: Vinicius Costa Gomes Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index f1330379e6bbc5..b1669d7cf43591 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -7231,6 +7231,7 @@ static int igc_probe(struct pci_dev *pdev, err_register: igc_release_hw_control(adapter); + igc_ptp_stop(adapter); err_eeprom: if (!igc_check_reset_block(hw)) igc_reset_phy(hw); From 1a931c4f5e6862e61a4b130cb76b422e1415f644 Mon Sep 17 00:00:00 2001 From: Christopher S M Hall Date: Tue, 1 Apr 2025 16:35:34 -0700 Subject: [PATCH 0512/2924] igc: add lock preventing multiple simultaneous PTM transactions Add a mutex around the PTM transaction to prevent multiple transactors Multiple processes try to initiate a PTM transaction, one or all may fail. This can be reproduced by running two instances of the following: $ sudo phc2sys -O 0 -i tsn0 -m PHC2SYS exits with: "ioctl PTP_OFFSET_PRECISE: Connection timed out" when the PTM transaction fails Note: Normally two instance of PHC2SYS will not run, but one process should not break another. Fixes: a90ec8483732 ("igc: Add support for PTP getcrosststamp()") Signed-off-by: Christopher S M Hall Reviewed-by: Corinna Vinschen Signed-off-by: Jacob Keller Tested-by: Mor Bar-Gabay Acked-by: Vinicius Costa Gomes Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 1 + drivers/net/ethernet/intel/igc/igc_ptp.c | 20 ++++++++++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index c35cc5cb118569..2f265c0959c7a0 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -319,6 +319,7 @@ struct igc_adapter { struct timespec64 prev_ptp_time; /* Pre-reset PTP clock */ ktime_t ptp_reset_start; /* Reset time in clock mono */ struct system_time_snapshot snapshot; + struct mutex ptm_lock; /* Only allow one PTM transaction at a time */ char fw_version[32]; diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 343205bffc3550..612ed26a29c5d4 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -974,6 +974,7 @@ static void igc_ptm_log_error(struct igc_adapter *adapter, u32 ptm_stat) } } +/* The PTM lock: adapter->ptm_lock must be held when calling igc_ptm_trigger() */ static void igc_ptm_trigger(struct igc_hw *hw) { u32 ctrl; @@ -990,6 +991,7 @@ static void igc_ptm_trigger(struct igc_hw *hw) wrfl(); } +/* The PTM lock: adapter->ptm_lock must be held when calling igc_ptm_reset() */ static void igc_ptm_reset(struct igc_hw *hw) { u32 ctrl; @@ -1068,9 +1070,16 @@ static int igc_ptp_getcrosststamp(struct ptp_clock_info *ptp, { struct igc_adapter *adapter = container_of(ptp, struct igc_adapter, ptp_caps); + int ret; - return get_device_system_crosststamp(igc_phc_get_syncdevicetime, - adapter, &adapter->snapshot, cts); + /* This blocks until any in progress PTM transactions complete */ + mutex_lock(&adapter->ptm_lock); + + ret = get_device_system_crosststamp(igc_phc_get_syncdevicetime, + adapter, &adapter->snapshot, cts); + mutex_unlock(&adapter->ptm_lock); + + return ret; } static int igc_ptp_getcyclesx64(struct ptp_clock_info *ptp, @@ -1169,6 +1178,7 @@ void igc_ptp_init(struct igc_adapter *adapter) spin_lock_init(&adapter->ptp_tx_lock); spin_lock_init(&adapter->free_timer_lock); spin_lock_init(&adapter->tmreg_lock); + mutex_init(&adapter->ptm_lock); adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF; @@ -1181,6 +1191,7 @@ void igc_ptp_init(struct igc_adapter *adapter) if (IS_ERR(adapter->ptp_clock)) { adapter->ptp_clock = NULL; netdev_err(netdev, "ptp_clock_register failed\n"); + mutex_destroy(&adapter->ptm_lock); } else if (adapter->ptp_clock) { netdev_info(netdev, "PHC added\n"); adapter->ptp_flags |= IGC_PTP_ENABLED; @@ -1210,10 +1221,12 @@ static void igc_ptm_stop(struct igc_adapter *adapter) struct igc_hw *hw = &adapter->hw; u32 ctrl; + mutex_lock(&adapter->ptm_lock); ctrl = rd32(IGC_PTM_CTRL); ctrl &= ~IGC_PTM_CTRL_EN; wr32(IGC_PTM_CTRL, ctrl); + mutex_unlock(&adapter->ptm_lock); } /** @@ -1255,6 +1268,7 @@ void igc_ptp_stop(struct igc_adapter *adapter) netdev_info(adapter->netdev, "PHC removed\n"); adapter->ptp_flags &= ~IGC_PTP_ENABLED; } + mutex_destroy(&adapter->ptm_lock); } /** @@ -1294,6 +1308,7 @@ void igc_ptp_reset(struct igc_adapter *adapter) if (!igc_is_crosststamp_supported(adapter)) break; + mutex_lock(&adapter->ptm_lock); wr32(IGC_PCIE_DIG_DELAY, IGC_PCIE_DIG_DELAY_DEFAULT); wr32(IGC_PCIE_PHY_DELAY, IGC_PCIE_PHY_DELAY_DEFAULT); @@ -1317,6 +1332,7 @@ void igc_ptp_reset(struct igc_adapter *adapter) netdev_err(adapter->netdev, "Timeout reading IGC_PTM_STAT register\n"); igc_ptm_reset(hw); + mutex_unlock(&adapter->ptm_lock); break; default: /* No work to do. */ From 2b70702917337a8d6d07f03eed961e0119091647 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 18:02:52 -0700 Subject: [PATCH 0513/2924] perf tools: Remove evsel__handle_error_quirks() The evsel__handle_error_quirks() is to fixup invalid event attributes on some architecture based on the error code. Currently it's only used for AMD to disable precise_ip not to use IBS which has more restrictions. But the commit c33aea446bf555ab changed call evsel__precise_ip_fallback for any errors so there's no difference with the above function. To make matter worse, it caused a problem with branch stack on Zen3. The IBS doesn't support branch stack so it should use a regular core PMU event. The default event is set precise_max and it starts with 3. And evsel__precise_ip_fallback() tries with it and reduces the level one by one. At last it tries with 0 but it also failed on Zen3 since the branch stack is not supported for the cycles event. At this point, evsel__precise_ip_fallback() restores the original precise_ip value (3) in the hope that it can succeed with other modifier (like exclude_kernel). Then evsel__handle_error_quirks() see it has precise_ip != 0 and make it retry with 0. This created an infinite loop. Before: $ perf record -b -vv |& grep removing removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD ... After: $ perf record -b true Error: Failure to open event 'cycles:P' on PMU 'cpu' which will be removed. Invalid event (cycles:P) in per-thread mode, enable system wide with '-a'. Error: Failure to open any events for recording. Fixes: c33aea446bf555ab ("perf tools: Fix precise_ip fallback logic") Tested-by: Chun-Tse Shao Cc: Ravi Bangoria Link: https://lore.kernel.org/r/20250410010252.402221-1-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/util/evsel.c | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1974395492d7da..3c030da2e477c7 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2566,25 +2566,6 @@ static bool evsel__detect_missing_features(struct evsel *evsel, struct perf_cpu return false; } -static bool evsel__handle_error_quirks(struct evsel *evsel, int error) -{ - /* - * AMD core PMU tries to forward events with precise_ip to IBS PMU - * implicitly. But IBS PMU has more restrictions so it can fail with - * supported event attributes. Let's forward it back to the core PMU - * by clearing precise_ip only if it's from precise_max (:P). - */ - if ((error == -EINVAL || error == -ENOENT) && x86__is_amd_cpu() && - evsel->core.attr.precise_ip && evsel->precise_max) { - evsel->core.attr.precise_ip = 0; - pr_debug2_peo("removing precise_ip on AMD\n"); - display_attr(&evsel->core.attr); - return true; - } - - return false; -} - static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads, int start_cpu_map_idx, int end_cpu_map_idx) @@ -2730,9 +2711,6 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, if (evsel__precise_ip_fallback(evsel)) goto retry_open; - if (evsel__handle_error_quirks(evsel, err)) - goto retry_open; - out_close: if (err) threads->err_thread = thread; From a650d38915c194b87616a0747a339b20958d17db Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 11 Apr 2025 03:17:59 -0700 Subject: [PATCH 0514/2924] bpf: Convert ringbuf map to rqspinlock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert the raw spinlock used by BPF ringbuf to rqspinlock. Currently, we have an open syzbot report of a potential deadlock. In addition, the ringbuf can fail to reserve spuriously under contention from NMI context. It is potentially attractive to enable unconstrained usage (incl. NMIs) while ensuring no deadlocks manifest at runtime, perform the conversion to rqspinlock to achieve this. This change was benchmarked for BPF ringbuf's multi-producer contention case on an Intel Sapphire Rapids server, with hyperthreading disabled and performance governor turned on. 5 warm up runs were done for each case before obtaining the results. Before (raw_spinlock_t): Ringbuf, multi-producer contention ================================== rb-libbpf nr_prod 1 11.440 ± 0.019M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 2 2.706 ± 0.010M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 3 3.130 ± 0.004M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 4 2.472 ± 0.003M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 8 2.352 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 12 2.813 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 16 1.988 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 20 2.245 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 24 2.148 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 28 2.190 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 32 2.490 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 36 2.180 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 40 2.201 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 44 2.226 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 48 2.164 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-libbpf nr_prod 52 1.874 ± 0.001M/s (drops 0.000 ± 0.000M/s) After (rqspinlock_t): Ringbuf, multi-producer contention ================================== rb-libbpf nr_prod 1 11.078 ± 0.019M/s (drops 0.000 ± 0.000M/s) (-3.16%) rb-libbpf nr_prod 2 2.801 ± 0.014M/s (drops 0.000 ± 0.000M/s) (3.51%) rb-libbpf nr_prod 3 3.454 ± 0.005M/s (drops 0.000 ± 0.000M/s) (10.35%) rb-libbpf nr_prod 4 2.567 ± 0.002M/s (drops 0.000 ± 0.000M/s) (3.84%) rb-libbpf nr_prod 8 2.468 ± 0.001M/s (drops 0.000 ± 0.000M/s) (4.93%) rb-libbpf nr_prod 12 2.510 ± 0.001M/s (drops 0.000 ± 0.000M/s) (-10.77%) rb-libbpf nr_prod 16 2.075 ± 0.001M/s (drops 0.000 ± 0.000M/s) (4.38%) rb-libbpf nr_prod 20 2.640 ± 0.001M/s (drops 0.000 ± 0.000M/s) (17.59%) rb-libbpf nr_prod 24 2.092 ± 0.001M/s (drops 0.000 ± 0.000M/s) (-2.61%) rb-libbpf nr_prod 28 2.426 ± 0.005M/s (drops 0.000 ± 0.000M/s) (10.78%) rb-libbpf nr_prod 32 2.331 ± 0.004M/s (drops 0.000 ± 0.000M/s) (-6.39%) rb-libbpf nr_prod 36 2.306 ± 0.003M/s (drops 0.000 ± 0.000M/s) (5.78%) rb-libbpf nr_prod 40 2.178 ± 0.002M/s (drops 0.000 ± 0.000M/s) (-1.04%) rb-libbpf nr_prod 44 2.293 ± 0.001M/s (drops 0.000 ± 0.000M/s) (3.01%) rb-libbpf nr_prod 48 2.022 ± 0.001M/s (drops 0.000 ± 0.000M/s) (-6.56%) rb-libbpf nr_prod 52 1.809 ± 0.001M/s (drops 0.000 ± 0.000M/s) (-3.47%) There's a fair amount of noise in the benchmark, with numbers on reruns going up and down by 10%, so all changes are in the range of this disturbance, and we see no major regressions. Reported-by: syzbot+850aaf14624dc0c6d366@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/0000000000004aa700061379547e@google.com Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20250411101759.4061366-1-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/ringbuf.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index 1499d8caa9a351..719d73299397b5 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -11,6 +11,7 @@ #include #include #include +#include #define RINGBUF_CREATE_FLAG_MASK (BPF_F_NUMA_NODE) @@ -29,7 +30,7 @@ struct bpf_ringbuf { u64 mask; struct page **pages; int nr_pages; - raw_spinlock_t spinlock ____cacheline_aligned_in_smp; + rqspinlock_t spinlock ____cacheline_aligned_in_smp; /* For user-space producer ring buffers, an atomic_t busy bit is used * to synchronize access to the ring buffers in the kernel, rather than * the spinlock that is used for kernel-producer ring buffers. This is @@ -173,7 +174,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node) if (!rb) return NULL; - raw_spin_lock_init(&rb->spinlock); + raw_res_spin_lock_init(&rb->spinlock); atomic_set(&rb->busy, 0); init_waitqueue_head(&rb->waitq); init_irq_work(&rb->work, bpf_ringbuf_notify); @@ -416,12 +417,8 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size) cons_pos = smp_load_acquire(&rb->consumer_pos); - if (in_nmi()) { - if (!raw_spin_trylock_irqsave(&rb->spinlock, flags)) - return NULL; - } else { - raw_spin_lock_irqsave(&rb->spinlock, flags); - } + if (raw_res_spin_lock_irqsave(&rb->spinlock, flags)) + return NULL; pend_pos = rb->pending_pos; prod_pos = rb->producer_pos; @@ -446,7 +443,7 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size) */ if (new_prod_pos - cons_pos > rb->mask || new_prod_pos - pend_pos > rb->mask) { - raw_spin_unlock_irqrestore(&rb->spinlock, flags); + raw_res_spin_unlock_irqrestore(&rb->spinlock, flags); return NULL; } @@ -458,7 +455,7 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size) /* pairs with consumer's smp_load_acquire() */ smp_store_release(&rb->producer_pos, new_prod_pos); - raw_spin_unlock_irqrestore(&rb->spinlock, flags); + raw_res_spin_unlock_irqrestore(&rb->spinlock, flags); return (void *)hdr + BPF_RINGBUF_HDR_SZ; } From d87e4026d1b20e4f237b29e0c956ad415f533de2 Mon Sep 17 00:00:00 2001 From: K Prateek Nayak Date: Fri, 11 Apr 2025 08:14:39 +0000 Subject: [PATCH 0515/2924] cpufreq/amd-pstate: Enable ITMT support after initializing core rankings When working on dynamic ITMT priority support, it was observed that "asym_prefer_cpu" on AMD systems supporting Preferred Core ranking was always set to the first CPU in the sched group when the system boots up despite another CPU in the group having a higher ITMT ranking. "asym_prefer_cpu" is cached when the sched domain hierarchy is constructed. On AMD systems that support Preferred Core rankings, sched domains are rebuilt when ITMT support is enabled for the first time from amd_pstate*_cpu_init(). Since amd_pstate*_cpu_init() is called to initialize the cpudata for each CPU, the ITMT support is enabled after the first CPU initializes its asym priority but this is too early since other CPUs have not yet initialized their asym priorities and the sched domain is rebuilt only once when the support is toggled on for the first time. Initialize the asym priorities first in amd_pstate*_cpu_init() and then enable ITMT support later in amd_pstate_register_driver() to ensure all CPUs have correctly initialized their asym priorities before sched domain hierarchy is rebuilt. Clear the ITMT support when the amd-pstate driver unregisters since core rankings cannot be trusted unless the update_limits() callback is operational. Remove the delayed work mechanism now that ITMT support is only toggled from the driver init path which is outside the cpuhp critical section. Fixes: f3a052391822 ("cpufreq: amd-pstate: Enable amd-pstate preferred core support") Signed-off-by: K Prateek Nayak Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20250411081439.27652-1-kprateek.nayak@amd.com Signed-off-by: Mario Limonciello --- drivers/cpufreq/amd-pstate.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index c54c031939c830..b961f3a3b58059 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -794,16 +794,6 @@ static void amd_perf_ctl_reset(unsigned int cpu) wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0); } -/* - * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks - * due to locking, so queue the work for later. - */ -static void amd_pstste_sched_prefcore_workfn(struct work_struct *work) -{ - sched_set_itmt_support(); -} -static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn); - #define CPPC_MAX_PERF U8_MAX static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) @@ -814,14 +804,8 @@ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) cpudata->hw_prefcore = true; - /* - * The priorities can be set regardless of whether or not - * sched_set_itmt_support(true) has been called and it is valid to - * update them at any time after it has been called. - */ + /* Priorities must be initialized before ITMT support can be toggled on. */ sched_set_itmt_core_prio((int)READ_ONCE(cpudata->prefcore_ranking), cpudata->cpu); - - schedule_work(&sched_prefcore_work); } static void amd_pstate_update_limits(unsigned int cpu) @@ -1196,6 +1180,9 @@ static ssize_t show_energy_performance_preference( static void amd_pstate_driver_cleanup(void) { + if (amd_pstate_prefcore) + sched_clear_itmt_support(); + cppc_state = AMD_PSTATE_DISABLE; current_pstate_driver = NULL; } @@ -1238,6 +1225,10 @@ static int amd_pstate_register_driver(int mode) return ret; } + /* Enable ITMT support once all CPUs have initialized their asym priorities. */ + if (amd_pstate_prefcore) + sched_set_itmt_support(); + return 0; } From d5f49921707cc73376ad6cf8410218b438fcd233 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 10 Apr 2025 18:11:12 +0200 Subject: [PATCH 0516/2924] dt-bindings: soc: fsl: fsl,ls1028a-reset: Fix maintainer entry make dt_binding_check: Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml: maintainers:0: 'Frank Li' does not match '@' from schema $id: http://devicetree.org/meta-schemas/base.yaml# Fix this by adding Frank's email address. Fixes: 9ca5a7d9d2e05de6 ("dt-bindings: soc: fsl: Add fsl,ls1028a-reset for reset syscon node") Signed-off-by: Geert Uytterhoeven Reviewed-by: Frank Li Link: https://lore.kernel.org/r/185e1e06692dc5b08abcde2d3dd137c78e979d08.1744301283.git.geert+renesas@glider.be Signed-off-by: Rob Herring (Arm) --- .../devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml b/Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml index 31295be910130c..234089b5954ddb 100644 --- a/Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml +++ b/Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Freescale Layerscape Reset Registers Module maintainers: - - Frank Li + - Frank Li description: Reset Module includes chip reset, service processor control and Reset Control From c8ba3f8aff672a5ea36e895f0f8f657271d855d7 Mon Sep 17 00:00:00 2001 From: Zhangfei Gao Date: Mon, 17 Mar 2025 01:13:52 +0000 Subject: [PATCH 0517/2924] PCI: Run quirk_huawei_pcie_sva() before arm_smmu_probe_device() quirk_huawei_pcie_sva() sets properties needed by arm_smmu_probe_device(), but bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe path") changed the iommu_probe_device() flow so arm_smmu_probe_device() is now invoked before the quirk, leading to failures like this: reg-dummy reg-dummy: late IOMMU probe at driver bind, something fishy here! WARNING: CPU: 0 PID: 1 at drivers/iommu/iommu.c:449 __iommu_probe_device+0x140/0x570 RIP: 0010:__iommu_probe_device+0x140/0x570 The SR-IOV enumeration ordering changes like this: pci_iov_add_virtfn pci_device_add pci_fixup_device(pci_fixup_header) <-- device_add bus_notify iommu_bus_notifier + iommu_probe_device + arm_smmu_probe_device pci_bus_add_device pci_fixup_device(pci_fixup_final) <-- device_attach driver_probe_device really_probe pci_dma_configure acpi_dma_configure_id - iommu_probe_device - arm_smmu_probe_device The non-SR-IOV case is similar in that pci_device_add() is called from pci_scan_single_device() in the generic enumeration path and pci_bus_add_device() is called later, after all host bridges have been enumerated. Declare quirk_huawei_pcie_sva() as a header fixup to ensure that it happens before arm_smmu_probe_device(). Fixes: bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe path") Reported-by: Chaitanya Kumar Borah Closes: https://lore.kernel.org/all/SJ1PR11MB61295DE21A1184AEE0786E25B9D22@SJ1PR11MB6129.namprd11.prod.outlook.com/ Signed-off-by: Zhangfei Gao [bhelgaas: commit log, add failure info and reporter] Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20250317011352.5806-1-zhangfei.gao@linaro.org --- drivers/pci/quirks.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 8d610c17e0f2f0..94daca15a096f7 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1990,12 +1990,12 @@ static void quirk_huawei_pcie_sva(struct pci_dev *pdev) device_create_managed_software_node(&pdev->dev, properties, NULL)) pci_warn(pdev, "could not add stall property"); } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_HUAWEI, 0xa250, quirk_huawei_pcie_sva); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_HUAWEI, 0xa251, quirk_huawei_pcie_sva); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_HUAWEI, 0xa255, quirk_huawei_pcie_sva); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_HUAWEI, 0xa256, quirk_huawei_pcie_sva); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_HUAWEI, 0xa258, quirk_huawei_pcie_sva); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_HUAWEI, 0xa259, quirk_huawei_pcie_sva); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HUAWEI, 0xa250, quirk_huawei_pcie_sva); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HUAWEI, 0xa251, quirk_huawei_pcie_sva); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HUAWEI, 0xa255, quirk_huawei_pcie_sva); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HUAWEI, 0xa256, quirk_huawei_pcie_sva); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HUAWEI, 0xa258, quirk_huawei_pcie_sva); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HUAWEI, 0xa259, quirk_huawei_pcie_sva); /* * It's possible for the MSI to get corrupted if SHPC and ACPI are used From 04a80a34c22f4db245f553d8696d1318d1c00ece Mon Sep 17 00:00:00 2001 From: Andy Chiu Date: Wed, 9 Apr 2025 00:02:57 +0800 Subject: [PATCH 0518/2924] ftrace: Properly merge notrace hashes The global notrace hash should be jointly decided by the intersection of each subops's notrace hash, but not the filter hash. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/20250408160258.48563-1-andybnac@gmail.com Fixes: 5fccc7552ccb ("ftrace: Add subops logic to allow one ops to manage many") Signed-off-by: Andy Chiu [ fixed removing of freeing of filter_hash ] Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1a48aedb52552a..8939eeebb02e28 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3526,16 +3526,16 @@ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int ftrace_hash_empty(subops->func_hash->notrace_hash)) { notrace_hash = EMPTY_HASH; } else { - size_bits = max(ops->func_hash->filter_hash->size_bits, - subops->func_hash->filter_hash->size_bits); + size_bits = max(ops->func_hash->notrace_hash->size_bits, + subops->func_hash->notrace_hash->size_bits); notrace_hash = alloc_ftrace_hash(size_bits); if (!notrace_hash) { free_ftrace_hash(filter_hash); return -ENOMEM; } - ret = intersect_hash(¬race_hash, ops->func_hash->filter_hash, - subops->func_hash->filter_hash); + ret = intersect_hash(¬race_hash, ops->func_hash->notrace_hash, + subops->func_hash->notrace_hash); if (ret < 0) { free_ftrace_hash(filter_hash); free_ftrace_hash(notrace_hash); From 0ae6b8ce200da00a78f33c055fdc4fe3225d22ec Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 9 Apr 2025 11:15:50 -0400 Subject: [PATCH 0519/2924] ftrace: Fix accounting of subop hashes The function graph infrastructure uses ftrace to hook to functions. It has a single ftrace_ops to manage all the users of function graph. Each individual user (tracing, bpf, fprobes, etc) has its own ftrace_ops to track the functions it will have its callback called from. These ftrace_ops are "subops" to the main ftrace_ops of the function graph infrastructure. Each ftrace_ops has a filter_hash and a notrace_hash that is defined as: Only trace functions that are in the filter_hash but not in the notrace_hash. If the filter_hash is empty, it means to trace all functions. If the notrace_hash is empty, it means do not disable any function. The function graph main ftrace_ops needs to be a superset containing all the functions to be traced by all the subops it has. The algorithm to perform this merge was incorrect. When the first subops was added to the main ops, it simply made the main ops a copy of the subops (same filter_hash and notrace_hash). When a second ops was added, it joined the new subops filter_hash with the main ops filter_hash as a union of the two sets. The intersect between the new subops notrace_hash and the main ops notrace_hash was created as the new notrace_hash of the main ops. The issue here is that it would then start tracing functions than no subops were tracing. For example if you had two subops that had: subops 1: filter_hash = '*sched*' # trace all functions with "sched" in it notrace_hash = '*time*' # except do not trace functions with "time" subops 2: filter_hash = '*lock*' # trace all functions with "lock" in it notrace_hash = '*clock*' # except do not trace functions with "clock" The intersect of '*time*' functions with '*clock*' functions could be the empty set. That means the main ops will be tracing all functions with '*time*' and all "*clock*" in it! Instead, modify the algorithm to be a bit simpler and correct. First, when adding a new subops, even if it's the first one, do not add the notrace_hash if the filter_hash is not empty. Instead, just add the functions that are in the filter_hash of the subops but not in the notrace_hash of the subops into the main ops filter_hash. There's no reason to add anything to the main ops notrace_hash. The notrace_hash of the main ops should only be non empty iff all subops filter_hashes are empty (meaning to trace all functions) and all subops notrace_hashes include the same functions. That is, the main ops notrace_hash is empty if any subops filter_hash is non empty. The main ops notrace_hash only has content in it if all subops filter_hashes are empty, and the content are only functions that intersect all the subops notrace_hashes. If any subops notrace_hash is empty, then so is the main ops notrace_hash. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Shuah Khan Cc: Andy Chiu Link: https://lore.kernel.org/20250409152720.216356767@goodmis.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 314 ++++++++++++++++++++++++------------------ 1 file changed, 177 insertions(+), 137 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8939eeebb02e28..a8a02868b43588 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3255,6 +3255,31 @@ static int append_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash, return 0; } +/* + * Remove functions from @hash that are in @notrace_hash + */ +static void remove_hash(struct ftrace_hash *hash, struct ftrace_hash *notrace_hash) +{ + struct ftrace_func_entry *entry; + struct hlist_node *tmp; + int size; + int i; + + /* If the notrace hash is empty, there's nothing to do */ + if (ftrace_hash_empty(notrace_hash)) + return; + + size = 1 << hash->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry_safe(entry, tmp, &hash->buckets[i], hlist) { + if (!__ftrace_lookup_ip(notrace_hash, entry->ip)) + continue; + remove_hash_entry(hash, entry); + kfree(entry); + } + } +} + /* * Add to @hash only those that are in both @new_hash1 and @new_hash2 * @@ -3295,67 +3320,6 @@ static int intersect_hash(struct ftrace_hash **hash, struct ftrace_hash *new_has return 0; } -/* Return a new hash that has a union of all @ops->filter_hash entries */ -static struct ftrace_hash *append_hashes(struct ftrace_ops *ops) -{ - struct ftrace_hash *new_hash = NULL; - struct ftrace_ops *subops; - int size_bits; - int ret; - - if (ops->func_hash->filter_hash) - size_bits = ops->func_hash->filter_hash->size_bits; - else - size_bits = FTRACE_HASH_DEFAULT_BITS; - - list_for_each_entry(subops, &ops->subop_list, list) { - ret = append_hash(&new_hash, subops->func_hash->filter_hash, size_bits); - if (ret < 0) { - free_ftrace_hash(new_hash); - return NULL; - } - /* Nothing more to do if new_hash is empty */ - if (ftrace_hash_empty(new_hash)) - break; - } - /* Can't return NULL as that means this failed */ - return new_hash ? : EMPTY_HASH; -} - -/* Make @ops trace evenything except what all its subops do not trace */ -static struct ftrace_hash *intersect_hashes(struct ftrace_ops *ops) -{ - struct ftrace_hash *new_hash = NULL; - struct ftrace_ops *subops; - int size_bits; - int ret; - - list_for_each_entry(subops, &ops->subop_list, list) { - struct ftrace_hash *next_hash; - - if (!new_hash) { - size_bits = subops->func_hash->notrace_hash->size_bits; - new_hash = alloc_and_copy_ftrace_hash(size_bits, ops->func_hash->notrace_hash); - if (!new_hash) - return NULL; - continue; - } - size_bits = new_hash->size_bits; - next_hash = new_hash; - new_hash = alloc_ftrace_hash(size_bits); - ret = intersect_hash(&new_hash, next_hash, subops->func_hash->notrace_hash); - free_ftrace_hash(next_hash); - if (ret < 0) { - free_ftrace_hash(new_hash); - return NULL; - } - /* Nothing more to do if new_hash is empty */ - if (ftrace_hash_empty(new_hash)) - break; - } - return new_hash; -} - static bool ops_equal(struct ftrace_hash *A, struct ftrace_hash *B) { struct ftrace_func_entry *entry; @@ -3427,6 +3391,93 @@ static int ftrace_update_ops(struct ftrace_ops *ops, struct ftrace_hash *filter_ return 0; } +static int add_first_hash(struct ftrace_hash **filter_hash, struct ftrace_hash **notrace_hash, + struct ftrace_ops_hash *func_hash) +{ + /* If the filter hash is not empty, simply remove the nohash from it */ + if (!ftrace_hash_empty(func_hash->filter_hash)) { + *filter_hash = copy_hash(func_hash->filter_hash); + if (!*filter_hash) + return -ENOMEM; + remove_hash(*filter_hash, func_hash->notrace_hash); + *notrace_hash = EMPTY_HASH; + + } else { + *notrace_hash = copy_hash(func_hash->notrace_hash); + if (!*notrace_hash) + return -ENOMEM; + *filter_hash = EMPTY_HASH; + } + return 0; +} + +static int add_next_hash(struct ftrace_hash **filter_hash, struct ftrace_hash **notrace_hash, + struct ftrace_ops_hash *ops_hash, struct ftrace_ops_hash *subops_hash) +{ + int size_bits; + int ret; + + /* If the subops trace all functions so must the main ops */ + if (ftrace_hash_empty(ops_hash->filter_hash) || + ftrace_hash_empty(subops_hash->filter_hash)) { + *filter_hash = EMPTY_HASH; + } else { + /* + * The main ops filter hash is not empty, so its + * notrace_hash had better be, as the notrace hash + * is only used for empty main filter hashes. + */ + WARN_ON_ONCE(!ftrace_hash_empty(ops_hash->notrace_hash)); + + size_bits = max(ops_hash->filter_hash->size_bits, + subops_hash->filter_hash->size_bits); + + /* Copy the subops hash */ + *filter_hash = alloc_and_copy_ftrace_hash(size_bits, subops_hash->filter_hash); + if (!filter_hash) + return -ENOMEM; + /* Remove any notrace functions from the copy */ + remove_hash(*filter_hash, subops_hash->notrace_hash); + + ret = append_hash(filter_hash, ops_hash->filter_hash, + size_bits); + if (ret < 0) { + free_ftrace_hash(*filter_hash); + return ret; + } + } + + /* + * Only process notrace hashes if the main filter hash is empty + * (tracing all functions), otherwise the filter hash will just + * remove the notrace hash functions, and the notrace hash is + * not needed. + */ + if (ftrace_hash_empty(*filter_hash)) { + /* + * Intersect the notrace functions. That is, if two + * subops are not tracing a set of functions, the + * main ops will only not trace the functions that are + * in both subops, but has to trace the functions that + * are only notrace in one of the subops, for the other + * subops to be able to trace them. + */ + size_bits = max(ops_hash->notrace_hash->size_bits, + subops_hash->notrace_hash->size_bits); + *notrace_hash = alloc_ftrace_hash(size_bits); + if (!*notrace_hash) + return -ENOMEM; + + ret = intersect_hash(notrace_hash, ops_hash->notrace_hash, + subops_hash->notrace_hash); + if (ret < 0) { + free_ftrace_hash(*notrace_hash); + return ret; + } + } + return 0; +} + /** * ftrace_startup_subops - enable tracing for subops of an ops * @ops: Manager ops (used to pick all the functions of its subops) @@ -3443,7 +3494,6 @@ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int struct ftrace_hash *notrace_hash; struct ftrace_hash *save_filter_hash; struct ftrace_hash *save_notrace_hash; - int size_bits; int ret; if (unlikely(ftrace_disabled)) @@ -3467,14 +3517,14 @@ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int /* For the first subops to ops just enable it normally */ if (list_empty(&ops->subop_list)) { - /* Just use the subops hashes */ - filter_hash = copy_hash(subops->func_hash->filter_hash); - notrace_hash = copy_hash(subops->func_hash->notrace_hash); - if (!filter_hash || !notrace_hash) { - free_ftrace_hash(filter_hash); - free_ftrace_hash(notrace_hash); - return -ENOMEM; - } + + /* The ops was empty, should have empty hashes */ + WARN_ON_ONCE(!ftrace_hash_empty(ops->func_hash->filter_hash)); + WARN_ON_ONCE(!ftrace_hash_empty(ops->func_hash->notrace_hash)); + + ret = add_first_hash(&filter_hash, ¬race_hash, subops->func_hash); + if (ret < 0) + return ret; save_filter_hash = ops->func_hash->filter_hash; save_notrace_hash = ops->func_hash->notrace_hash; @@ -3500,48 +3550,16 @@ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int /* * Here there's already something attached. Here are the rules: - * o If either filter_hash is empty then the final stays empty - * o Otherwise, the final is a superset of both hashes - * o If either notrace_hash is empty then the final stays empty - * o Otherwise, the final is an intersection between the hashes + * If the new subops and main ops filter hashes are not empty: + * o Make a copy of the subops filter hash + * o Remove all functions in the nohash from it. + * o Add in the main hash filter functions + * o Remove any of these functions from the main notrace hash */ - if (ftrace_hash_empty(ops->func_hash->filter_hash) || - ftrace_hash_empty(subops->func_hash->filter_hash)) { - filter_hash = EMPTY_HASH; - } else { - size_bits = max(ops->func_hash->filter_hash->size_bits, - subops->func_hash->filter_hash->size_bits); - filter_hash = alloc_and_copy_ftrace_hash(size_bits, ops->func_hash->filter_hash); - if (!filter_hash) - return -ENOMEM; - ret = append_hash(&filter_hash, subops->func_hash->filter_hash, - size_bits); - if (ret < 0) { - free_ftrace_hash(filter_hash); - return ret; - } - } - - if (ftrace_hash_empty(ops->func_hash->notrace_hash) || - ftrace_hash_empty(subops->func_hash->notrace_hash)) { - notrace_hash = EMPTY_HASH; - } else { - size_bits = max(ops->func_hash->notrace_hash->size_bits, - subops->func_hash->notrace_hash->size_bits); - notrace_hash = alloc_ftrace_hash(size_bits); - if (!notrace_hash) { - free_ftrace_hash(filter_hash); - return -ENOMEM; - } - ret = intersect_hash(¬race_hash, ops->func_hash->notrace_hash, - subops->func_hash->notrace_hash); - if (ret < 0) { - free_ftrace_hash(filter_hash); - free_ftrace_hash(notrace_hash); - return ret; - } - } + ret = add_next_hash(&filter_hash, ¬race_hash, ops->func_hash, subops->func_hash); + if (ret < 0) + return ret; list_add(&subops->list, &ops->subop_list); @@ -3557,6 +3575,42 @@ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int return ret; } +static int rebuild_hashes(struct ftrace_hash **filter_hash, struct ftrace_hash **notrace_hash, + struct ftrace_ops *ops) +{ + struct ftrace_ops_hash temp_hash; + struct ftrace_ops *subops; + bool first = true; + int ret; + + temp_hash.filter_hash = EMPTY_HASH; + temp_hash.notrace_hash = EMPTY_HASH; + + list_for_each_entry(subops, &ops->subop_list, list) { + *filter_hash = EMPTY_HASH; + *notrace_hash = EMPTY_HASH; + + if (first) { + ret = add_first_hash(filter_hash, notrace_hash, subops->func_hash); + if (ret < 0) + return ret; + first = false; + } else { + ret = add_next_hash(filter_hash, notrace_hash, + &temp_hash, subops->func_hash); + if (ret < 0) { + free_ftrace_hash(temp_hash.filter_hash); + free_ftrace_hash(temp_hash.notrace_hash); + return ret; + } + } + + temp_hash.filter_hash = *filter_hash; + temp_hash.notrace_hash = *notrace_hash; + } + return 0; +} + /** * ftrace_shutdown_subops - Remove a subops from a manager ops * @ops: A manager ops to remove @subops from @@ -3605,14 +3659,9 @@ int ftrace_shutdown_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, in } /* Rebuild the hashes without subops */ - filter_hash = append_hashes(ops); - notrace_hash = intersect_hashes(ops); - if (!filter_hash || !notrace_hash) { - free_ftrace_hash(filter_hash); - free_ftrace_hash(notrace_hash); - list_add(&subops->list, &ops->subop_list); - return -ENOMEM; - } + ret = rebuild_hashes(&filter_hash, ¬race_hash, ops); + if (ret < 0) + return ret; ret = ftrace_update_ops(ops, filter_hash, notrace_hash); if (ret < 0) { @@ -3628,11 +3677,11 @@ int ftrace_shutdown_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, in static int ftrace_hash_move_and_update_subops(struct ftrace_ops *subops, struct ftrace_hash **orig_subhash, - struct ftrace_hash *hash, - int enable) + struct ftrace_hash *hash) { struct ftrace_ops *ops = subops->managed; - struct ftrace_hash **orig_hash; + struct ftrace_hash *notrace_hash; + struct ftrace_hash *filter_hash; struct ftrace_hash *save_hash; struct ftrace_hash *new_hash; int ret; @@ -3649,24 +3698,15 @@ static int ftrace_hash_move_and_update_subops(struct ftrace_ops *subops, return -ENOMEM; } - /* Create a new_hash to hold the ops new functions */ - if (enable) { - orig_hash = &ops->func_hash->filter_hash; - new_hash = append_hashes(ops); - } else { - orig_hash = &ops->func_hash->notrace_hash; - new_hash = intersect_hashes(ops); - } - - /* Move the hash over to the new hash */ - ret = __ftrace_hash_move_and_update_ops(ops, orig_hash, new_hash, enable); - - free_ftrace_hash(new_hash); + ret = rebuild_hashes(&filter_hash, ¬race_hash, ops); + if (!ret) + ret = ftrace_update_ops(ops, filter_hash, notrace_hash); if (ret) { /* Put back the original hash */ - free_ftrace_hash_rcu(*orig_subhash); + new_hash = *orig_subhash; *orig_subhash = save_hash; + free_ftrace_hash_rcu(new_hash); } else { free_ftrace_hash_rcu(save_hash); } @@ -4890,7 +4930,7 @@ static int ftrace_hash_move_and_update_ops(struct ftrace_ops *ops, int enable) { if (ops->flags & FTRACE_OPS_FL_SUBOP) - return ftrace_hash_move_and_update_subops(ops, orig_hash, hash, enable); + return ftrace_hash_move_and_update_subops(ops, orig_hash, hash); /* * If this ops is not enabled, it could be sharing its filters @@ -4909,7 +4949,7 @@ static int ftrace_hash_move_and_update_ops(struct ftrace_ops *ops, list_for_each_entry(subops, &op->subop_list, list) { if ((subops->flags & FTRACE_OPS_FL_ENABLED) && subops->func_hash == ops->func_hash) { - return ftrace_hash_move_and_update_subops(subops, orig_hash, hash, enable); + return ftrace_hash_move_and_update_subops(subops, orig_hash, hash); } } } while_for_each_ftrace_op(op); From a1fc89d409d8fd927622c238b7c7d719e9ecab3d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 9 Apr 2025 11:15:51 -0400 Subject: [PATCH 0520/2924] tracing/selftest: Add test to better test subops filtering of function graph A bug was discovered that showed the accounting of the subops of the ftrace_ops filtering was incorrect. Add a new test to better test the filtering. This test creates two instances, where it will add various filters to both the set_ftrace_filter and the set_ftrace_notrace files and enable function_graph. Then it looks into the enabled_functions file to make sure that the filters are behaving correctly. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Shuah Khan Cc: Andy Chiu Link: https://lore.kernel.org/20250409152720.380778379@goodmis.org Signed-off-by: Steven Rostedt (Google) --- .../test.d/ftrace/fgraph-multi-filter.tc | 177 ++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc new file mode 100644 index 00000000000000..b6d6a312ead5ae --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc @@ -0,0 +1,177 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: ftrace - function graph filters +# requires: set_ftrace_filter function_graph:tracer + +# Make sure that function graph filtering works + +INSTANCE1="instances/test1_$$" +INSTANCE2="instances/test2_$$" + +WD=`pwd` + +do_reset() { + cd $WD + if [ -d $INSTANCE1 ]; then + echo nop > $INSTANCE1/current_tracer + rmdir $INSTANCE1 + fi + if [ -d $INSTANCE2 ]; then + echo nop > $INSTANCE2/current_tracer + rmdir $INSTANCE2 + fi +} + +mkdir $INSTANCE1 +if ! grep -q function_graph $INSTANCE1/available_tracers; then + echo "function_graph not allowed with instances" + rmdir $INSTANCE1 + exit_unsupported +fi + +mkdir $INSTANCE2 + +fail() { # msg + do_reset + echo $1 + exit_fail +} + +disable_tracing +clear_trace + +function_count() { + search=$1 + vsearch=$2 + + if [ -z "$search" ]; then + cat enabled_functions | wc -l + elif [ -z "$vsearch" ]; then + grep $search enabled_functions | wc -l + else + grep $search enabled_functions | grep $vsearch| wc -l + fi +} + +set_fgraph() { + instance=$1 + filter="$2" + notrace="$3" + + echo "$filter" > $instance/set_ftrace_filter + echo "$notrace" > $instance/set_ftrace_notrace + echo function_graph > $instance/current_tracer +} + +check_functions() { + orig_cnt=$1 + test=$2 + + cnt=`function_count $test` + if [ $cnt -gt $orig_cnt ]; then + fail + fi +} + +check_cnt() { + orig_cnt=$1 + search=$2 + vsearch=$3 + + cnt=`function_count $search $vsearch` + if [ $cnt -gt $orig_cnt ]; then + fail + fi +} + +reset_graph() { + instance=$1 + echo nop > $instance/current_tracer +} + +# get any functions that were enabled before the test +total_cnt=`function_count` +sched_cnt=`function_count sched` +lock_cnt=`function_count lock` +time_cnt=`function_count time` +clock_cnt=`function_count clock` +locks_clock_cnt=`function_count locks clock` +clock_locks_cnt=`function_count clock locks` + +# Trace functions with "sched" but not "time" +set_fgraph $INSTANCE1 '*sched*' '*time*' + +# Make sure "time" isn't listed +check_functions $time_cnt 'time' +instance1_cnt=`function_count` + +# Trace functions with "lock" but not "clock" +set_fgraph $INSTANCE2 '*lock*' '*clock*' +instance1_2_cnt=`function_count` + +# Turn off the first instance +reset_graph $INSTANCE1 + +# The second instance doesn't trace "clock" functions +check_functions $clock_cnt 'clock' +instance2_cnt=`function_count` + +# Start from a clean slate +reset_graph $INSTANCE2 +check_functions $total_cnt + +# Trace functions with "lock" but not "clock" +set_fgraph $INSTANCE2 '*lock*' '*clock*' + +# This should match the last time instance 2 was by itself +cnt=`function_count` +if [ $instance2_cnt -ne $cnt ]; then + fail +fi + +# And it should not be tracing "clock" functions +check_functions $clock_cnt 'clock' + +# Trace functions with "sched" but not "time" +set_fgraph $INSTANCE1 '*sched*' '*time*' + +# This should match the last time both instances were enabled +cnt=`function_count` +if [ $instance1_2_cnt -ne $cnt ]; then + fail +fi + +# Turn off the second instance +reset_graph $INSTANCE2 + +# This should match the last time instance 1 was by itself +cnt=`function_count` +if [ $instance1_cnt -ne $cnt ]; then + fail +fi + +# And it should not be tracing "time" functions +check_functions $time_cnt 'time' + +# Start from a clean slate +reset_graph $INSTANCE1 +check_functions $total_cnt + +# Enable all functions but those that have "locks" +set_fgraph $INSTANCE1 '' '*locks*' + +# Enable all functions but those that have "clock" +set_fgraph $INSTANCE2 '' '*clock*' + +# If a function has "locks" it should not have "clock" +check_cnt $locks_clock_cnt locks clock + +# If a function has "clock" it should not have "locks" +check_cnt $clock_locks_cnt clock locks + +reset_graph $INSTANCE1 +reset_graph $INSTANCE2 + +do_reset + +exit 0 From 752e2217d789be2c6a6ac66554b981cd71cd9f31 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 7 Apr 2025 10:03:17 -0700 Subject: [PATCH 0521/2924] smc: Fix lockdep false-positive for IPPROTO_SMC. SMC consists of two sockets: smc_sock and kernel TCP socket. Currently, there are two ways of creating the sockets, and syzbot reported a lockdep splat [0] for the newer way introduced by commit d25a92ccae6b ("net/smc: Introduce IPPROTO_SMC"). socket(AF_SMC , SOCK_STREAM, SMCPROTO_SMC or SMCPROTO_SMC6) socket(AF_INET or AF_INET6, SOCK_STREAM, IPPROTO_SMC) When a socket is allocated, sock_lock_init() sets a lockdep lock class to sk->sk_lock.slock based on its protocol family. In the IPPROTO_SMC case, AF_INET or AF_INET6 lock class is assigned to smc_sock. The repro sets IPV6_JOIN_ANYCAST for IPv6 UDP and SMC socket and exercises smc_switch_to_fallback() for IPPROTO_SMC. 1. smc_switch_to_fallback() is called under lock_sock() and holds smc->clcsock_release_lock. sk_lock-AF_INET6 -> &smc->clcsock_release_lock (sk_lock-AF_SMC) 2. Setting IPV6_JOIN_ANYCAST to SMC holds smc->clcsock_release_lock and calls setsockopt() for the kernel TCP socket, which holds RTNL and the kernel socket's lock_sock(). &smc->clcsock_release_lock -> rtnl_mutex (-> k-sk_lock-AF_INET6) 3. Setting IPV6_JOIN_ANYCAST to UDP holds RTNL and lock_sock(). rtnl_mutex -> sk_lock-AF_INET6 Then, lockdep detects a false-positive circular locking, .-> sk_lock-AF_INET6 -> &smc->clcsock_release_lock -> rtnl_mutex -. `-----------------------------------------------------------------' but IPPROTO_SMC should have the same locking rule as AF_SMC. sk_lock-AF_SMC -> &smc->clcsock_release_lock -> rtnl_mutex -> k-sk_lock-AF_INET6 Let's set the same lock class for smc_sock. Given AF_SMC uses the same lock class for SMCPROTO_SMC and SMCPROTO_SMC6, we do not need to separate the class for AF_INET and AF_INET6. [0]: WARNING: possible circular locking dependency detected 6.14.0-rc3-syzkaller-00267-gff202c5028a1 #0 Not tainted syz.4.1528/11571 is trying to acquire lock: ffffffff8fef8de8 (rtnl_mutex){+.+.}-{4:4}, at: ipv6_sock_ac_close+0xd9/0x110 net/ipv6/anycast.c:220 but task is already holding lock: ffff888027f596a8 (&smc->clcsock_release_lock){+.+.}-{4:4}, at: smc_clcsock_release+0x75/0xe0 net/smc/smc_close.c:30 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&smc->clcsock_release_lock){+.+.}-{4:4}: __mutex_lock_common kernel/locking/mutex.c:585 [inline] __mutex_lock+0x19b/0xb10 kernel/locking/mutex.c:730 smc_switch_to_fallback+0x2d/0xa00 net/smc/af_smc.c:903 smc_sendmsg+0x13d/0x520 net/smc/af_smc.c:2781 sock_sendmsg_nosec net/socket.c:718 [inline] __sock_sendmsg net/socket.c:733 [inline] ____sys_sendmsg+0xaaf/0xc90 net/socket.c:2573 ___sys_sendmsg+0x135/0x1e0 net/socket.c:2627 __sys_sendmsg+0x16e/0x220 net/socket.c:2659 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f -> #1 (sk_lock-AF_INET6){+.+.}-{0:0}: lock_sock_nested+0x3a/0xf0 net/core/sock.c:3645 lock_sock include/net/sock.h:1624 [inline] sockopt_lock_sock net/core/sock.c:1133 [inline] sockopt_lock_sock+0x54/0x70 net/core/sock.c:1124 do_ipv6_setsockopt+0x2160/0x4520 net/ipv6/ipv6_sockglue.c:567 ipv6_setsockopt+0xcb/0x170 net/ipv6/ipv6_sockglue.c:993 udpv6_setsockopt+0x7d/0xd0 net/ipv6/udp.c:1850 do_sock_setsockopt+0x222/0x480 net/socket.c:2303 __sys_setsockopt+0x1a0/0x230 net/socket.c:2328 __do_sys_setsockopt net/socket.c:2334 [inline] __se_sys_setsockopt net/socket.c:2331 [inline] __x64_sys_setsockopt+0xbd/0x160 net/socket.c:2331 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f -> #0 (rtnl_mutex){+.+.}-{4:4}: check_prev_add kernel/locking/lockdep.c:3163 [inline] check_prevs_add kernel/locking/lockdep.c:3282 [inline] validate_chain kernel/locking/lockdep.c:3906 [inline] __lock_acquire+0x249e/0x3c40 kernel/locking/lockdep.c:5228 lock_acquire.part.0+0x11b/0x380 kernel/locking/lockdep.c:5851 __mutex_lock_common kernel/locking/mutex.c:585 [inline] __mutex_lock+0x19b/0xb10 kernel/locking/mutex.c:730 ipv6_sock_ac_close+0xd9/0x110 net/ipv6/anycast.c:220 inet6_release+0x47/0x70 net/ipv6/af_inet6.c:485 __sock_release net/socket.c:647 [inline] sock_release+0x8e/0x1d0 net/socket.c:675 smc_clcsock_release+0xb7/0xe0 net/smc/smc_close.c:34 __smc_release+0x5c2/0x880 net/smc/af_smc.c:301 smc_release+0x1fc/0x5f0 net/smc/af_smc.c:344 __sock_release+0xb0/0x270 net/socket.c:647 sock_close+0x1c/0x30 net/socket.c:1398 __fput+0x3ff/0xb70 fs/file_table.c:464 task_work_run+0x14e/0x250 kernel/task_work.c:227 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] exit_to_user_mode_loop kernel/entry/common.c:114 [inline] exit_to_user_mode_prepare include/linux/entry-common.h:329 [inline] __syscall_exit_to_user_mode_work kernel/entry/common.c:207 [inline] syscall_exit_to_user_mode+0x27b/0x2a0 kernel/entry/common.c:218 do_syscall_64+0xda/0x250 arch/x86/entry/common.c:89 entry_SYSCALL_64_after_hwframe+0x77/0x7f other info that might help us debug this: Chain exists of: rtnl_mutex --> sk_lock-AF_INET6 --> &smc->clcsock_release_lock Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&smc->clcsock_release_lock); lock(sk_lock-AF_INET6); lock(&smc->clcsock_release_lock); lock(rtnl_mutex); *** DEADLOCK *** 2 locks held by syz.4.1528/11571: #0: ffff888077e88208 (&sb->s_type->i_mutex_key#10){+.+.}-{4:4}, at: inode_lock include/linux/fs.h:877 [inline] #0: ffff888077e88208 (&sb->s_type->i_mutex_key#10){+.+.}-{4:4}, at: __sock_release+0x86/0x270 net/socket.c:646 #1: ffff888027f596a8 (&smc->clcsock_release_lock){+.+.}-{4:4}, at: smc_clcsock_release+0x75/0xe0 net/smc/smc_close.c:30 stack backtrace: CPU: 0 UID: 0 PID: 11571 Comm: syz.4.1528 Not tainted 6.14.0-rc3-syzkaller-00267-gff202c5028a1 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:120 print_circular_bug+0x490/0x760 kernel/locking/lockdep.c:2076 check_noncircular+0x31a/0x400 kernel/locking/lockdep.c:2208 check_prev_add kernel/locking/lockdep.c:3163 [inline] check_prevs_add kernel/locking/lockdep.c:3282 [inline] validate_chain kernel/locking/lockdep.c:3906 [inline] __lock_acquire+0x249e/0x3c40 kernel/locking/lockdep.c:5228 lock_acquire.part.0+0x11b/0x380 kernel/locking/lockdep.c:5851 __mutex_lock_common kernel/locking/mutex.c:585 [inline] __mutex_lock+0x19b/0xb10 kernel/locking/mutex.c:730 ipv6_sock_ac_close+0xd9/0x110 net/ipv6/anycast.c:220 inet6_release+0x47/0x70 net/ipv6/af_inet6.c:485 __sock_release net/socket.c:647 [inline] sock_release+0x8e/0x1d0 net/socket.c:675 smc_clcsock_release+0xb7/0xe0 net/smc/smc_close.c:34 __smc_release+0x5c2/0x880 net/smc/af_smc.c:301 smc_release+0x1fc/0x5f0 net/smc/af_smc.c:344 __sock_release+0xb0/0x270 net/socket.c:647 sock_close+0x1c/0x30 net/socket.c:1398 __fput+0x3ff/0xb70 fs/file_table.c:464 task_work_run+0x14e/0x250 kernel/task_work.c:227 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] exit_to_user_mode_loop kernel/entry/common.c:114 [inline] exit_to_user_mode_prepare include/linux/entry-common.h:329 [inline] __syscall_exit_to_user_mode_work kernel/entry/common.c:207 [inline] syscall_exit_to_user_mode+0x27b/0x2a0 kernel/entry/common.c:218 do_syscall_64+0xda/0x250 arch/x86/entry/common.c:89 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f8b4b38d169 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffe4efd22d8 EFLAGS: 00000246 ORIG_RAX: 00000000000001b4 RAX: 0000000000000000 RBX: 00000000000b14a3 RCX: 00007f8b4b38d169 RDX: 0000000000000000 RSI: 000000000000001e RDI: 0000000000000003 RBP: 00007f8b4b5a7ba0 R08: 0000000000000001 R09: 000000114efd25cf R10: 00007f8b4b200000 R11: 0000000000000246 R12: 00007f8b4b5a5fac R13: 00007f8b4b5a5fa0 R14: ffffffffffffffff R15: 00007ffe4efd23f0 Fixes: d25a92ccae6b ("net/smc: Introduce IPPROTO_SMC") Reported-by: syzbot+be6f4b383534d88989f7@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=be6f4b383534d88989f7 Signed-off-by: Kuniyuki Iwashima Reviewed-by: Wenjia Zhang Link: https://patch.msgid.link/20250407170332.26959-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 3e6cb35baf25af..3760131f148450 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -362,6 +362,9 @@ static void smc_destruct(struct sock *sk) return; } +static struct lock_class_key smc_key; +static struct lock_class_key smc_slock_key; + void smc_sk_init(struct net *net, struct sock *sk, int protocol) { struct smc_sock *smc = smc_sk(sk); @@ -375,6 +378,8 @@ void smc_sk_init(struct net *net, struct sock *sk, int protocol) INIT_WORK(&smc->connect_work, smc_connect_work); INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work); INIT_LIST_HEAD(&smc->accept_q); + sock_lock_init_class_and_name(sk, "slock-AF_SMC", &smc_slock_key, + "sk_lock-AF_SMC", &smc_key); spin_lock_init(&smc->accept_q_lock); spin_lock_init(&smc->conn.send_lock); sk->sk_prot->hash(sk); From 18c889a9a419dc1662548777b7122d980bccfdad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 10 Apr 2025 12:43:21 +0200 Subject: [PATCH 0522/2924] selftests/tc-testing: Add test for echo of big TC filters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a selftest that checks whether the kernel can successfully echo a big tc filter, to test the fix introduced in commit: 369609fc6272 ("tc: Ensure we have enough buffer space when sending filter netlink notifications") Signed-off-by: Toke Høiland-Jørgensen Tested-by: Victor Nogueira Link: https://patch.msgid.link/20250410104322.214620-1-toke@redhat.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/actions.json | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json b/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json index 1ba96c46775486..d9fc62ab476c9e 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json @@ -412,5 +412,27 @@ "teardown": [ "$TC qdisc del dev $DUMMY ingress" ] + }, + { + "id": "33f4", + "name": "Check echo of big filter command", + "category": [ + "infra", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY parent root handle 10: fq_codel" + ], + "cmdUnderTest": "bash -c '$TC -echo filter add dev $DUMMY parent 10: u32 match u32 0 0 $(for i in $(seq 32); do echo action pedit munge ip dport set 22; done) | grep \"added filter\"'", + "verifyCmd": "", + "expExitCode": "0", + "matchCount": "0", + "matchPattern": "", + "teardown": [ + "$TC qdisc del dev $DUMMY parent root fq_codel" + ] } ] From 7bdd8f75d16557ee4111c7b2678463cabf0f04c3 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 10 Apr 2025 11:27:40 -0700 Subject: [PATCH 0523/2924] fwctl/cxl: Fix uuid_t usage in uapi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The uuid_t type is kernel internal, and Paul reports the following build error when it is used in a uapi header: usr/include/cxl/features.h:59:9: error: unknown type name ‘uuid_t’ Create a uuid type (__uapi_uuid_t) compatible with the longstanding definition uuid/uuid.h for userspace builds, and use uuid_t directly for kernel builds. Fixes: 9b8e73cdb141 ("cxl: Move cxl feature command structs to user header") Link: https://patch.msgid.link/r/174430961702.617339.13963021112051029933.stgit@dwillia2-xfh.jf.intel.com Suggested-by: Jason Gunthorpe Reported-by: Paul E. McKenney Closes: http://lore.kernel.org/f6489337-67c7-48c8-b48a-58603ec15328@paulmck-laptop Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202504050434.Eb4vugh5-lkp@intel.com/ Signed-off-by: Dan Williams Reviewed-by: Dave Jiang Tested-by: Paul E. McKenney Signed-off-by: Jason Gunthorpe --- include/uapi/cxl/features.h | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/include/uapi/cxl/features.h b/include/uapi/cxl/features.h index d6db8984889fa6..490606d7694b97 100644 --- a/include/uapi/cxl/features.h +++ b/include/uapi/cxl/features.h @@ -8,10 +8,19 @@ #define _UAPI_CXL_FEATURES_H_ #include -#ifndef __KERNEL__ -#include -#else + +typedef unsigned char __uapi_uuid_t[16]; + +#ifdef __KERNEL__ #include +/* + * Note, __uapi_uuid_t is 1-byte aligned on modern compilers and 4-byte + * aligned on others. Ensure that __uapi_uuid_t in a struct is placed at + * a 4-byte aligned offset, or the structure is packed, to ensure + * consistent padding. + */ +static_assert(sizeof(__uapi_uuid_t) == sizeof(uuid_t)); +#define __uapi_uuid_t uuid_t #endif /* @@ -60,7 +69,7 @@ struct cxl_mbox_get_sup_feats_in { * Get Supported Features Supported Feature Entry */ struct cxl_feat_entry { - uuid_t uuid; + __uapi_uuid_t uuid; __le16 id; __le16 get_feat_size; __le16 set_feat_size; @@ -110,7 +119,7 @@ struct cxl_mbox_get_sup_feats_out { * CXL spec r3.2 section 8.2.9.6.2 Table 8-99 */ struct cxl_mbox_get_feat_in { - uuid_t uuid; + __uapi_uuid_t uuid; __le16 offset; __le16 count; __u8 selection; @@ -143,7 +152,7 @@ enum cxl_get_feat_selection { */ struct cxl_mbox_set_feat_in { __struct_group(cxl_mbox_set_feat_hdr, hdr, /* no attrs */, - uuid_t uuid; + __uapi_uuid_t uuid; __le32 flags; __le16 offset; __u8 version; From fd292c1f100ccd006bebb7b4fe7308e68b3753e0 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Wed, 2 Apr 2025 09:56:30 -0700 Subject: [PATCH 0524/2924] pds_fwctl: Fix type and endian complaints Fix a number of type and endian complaints from the sparse checker. Link: https://patch.msgid.link/r/20250402165630.24288-1-shannon.nelson@amd.com Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202504020246.Dfbhxoo9-lkp@intel.com/ Signed-off-by: Shannon Nelson Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Signed-off-by: Jason Gunthorpe --- drivers/fwctl/pds/main.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/drivers/fwctl/pds/main.c b/drivers/fwctl/pds/main.c index 284c4165fdd435..9b9d1f6b55566c 100644 --- a/drivers/fwctl/pds/main.c +++ b/drivers/fwctl/pds/main.c @@ -105,12 +105,14 @@ static int pdsfc_identify(struct pdsfc_dev *pdsfc) static void pdsfc_free_endpoints(struct pdsfc_dev *pdsfc) { struct device *dev = &pdsfc->fwctl.dev; + u32 num_endpoints; int i; if (!pdsfc->endpoints) return; - for (i = 0; pdsfc->endpoint_info && i < pdsfc->endpoints->num_entries; i++) + num_endpoints = le32_to_cpu(pdsfc->endpoints->num_entries); + for (i = 0; pdsfc->endpoint_info && i < num_endpoints; i++) mutex_destroy(&pdsfc->endpoint_info[i].lock); vfree(pdsfc->endpoint_info); pdsfc->endpoint_info = NULL; @@ -199,7 +201,7 @@ static int pdsfc_init_endpoints(struct pdsfc_dev *pdsfc) ep_entry = (struct pds_fwctl_query_data_endpoint *)pdsfc->endpoints->entries; for (i = 0; i < num_endpoints; i++) { mutex_init(&pdsfc->endpoint_info[i].lock); - pdsfc->endpoint_info[i].endpoint = ep_entry[i].id; + pdsfc->endpoint_info[i].endpoint = le32_to_cpu(ep_entry[i].id); } return 0; @@ -214,6 +216,7 @@ static struct pds_fwctl_query_data *pdsfc_get_operations(struct pdsfc_dev *pdsfc struct pds_fwctl_query_data *data; union pds_core_adminq_cmd cmd; dma_addr_t data_pa; + u32 num_entries; int err; int i; @@ -246,8 +249,9 @@ static struct pds_fwctl_query_data *pdsfc_get_operations(struct pdsfc_dev *pdsfc *pa = data_pa; entries = (struct pds_fwctl_query_data_operation *)data->entries; - dev_dbg(dev, "num_entries %d\n", data->num_entries); - for (i = 0; i < data->num_entries; i++) { + num_entries = le32_to_cpu(data->num_entries); + dev_dbg(dev, "num_entries %d\n", num_entries); + for (i = 0; i < num_entries; i++) { /* Translate FW command attribute to fwctl scope */ switch (entries[i].scope) { @@ -267,7 +271,7 @@ static struct pds_fwctl_query_data *pdsfc_get_operations(struct pdsfc_dev *pdsfc break; } dev_dbg(dev, "endpoint %d operation: id %x scope %d\n", - ep, entries[i].id, entries[i].scope); + ep, le32_to_cpu(entries[i].id), entries[i].scope); } return data; @@ -280,24 +284,26 @@ static int pdsfc_validate_rpc(struct pdsfc_dev *pdsfc, struct pds_fwctl_query_data_operation *op_entry; struct pdsfc_rpc_endpoint_info *ep_info = NULL; struct device *dev = &pdsfc->fwctl.dev; + u32 num_entries; int i; /* validate rpc in_len & out_len based * on ident.max_req_sz & max_resp_sz */ - if (rpc->in.len > pdsfc->ident.max_req_sz) { + if (rpc->in.len > le32_to_cpu(pdsfc->ident.max_req_sz)) { dev_dbg(dev, "Invalid request size %u, max %u\n", - rpc->in.len, pdsfc->ident.max_req_sz); + rpc->in.len, le32_to_cpu(pdsfc->ident.max_req_sz)); return -EINVAL; } - if (rpc->out.len > pdsfc->ident.max_resp_sz) { + if (rpc->out.len > le32_to_cpu(pdsfc->ident.max_resp_sz)) { dev_dbg(dev, "Invalid response size %u, max %u\n", - rpc->out.len, pdsfc->ident.max_resp_sz); + rpc->out.len, le32_to_cpu(pdsfc->ident.max_resp_sz)); return -EINVAL; } - for (i = 0; i < pdsfc->endpoints->num_entries; i++) { + num_entries = le32_to_cpu(pdsfc->endpoints->num_entries); + for (i = 0; i < num_entries; i++) { if (pdsfc->endpoint_info[i].endpoint == rpc->in.ep) { ep_info = &pdsfc->endpoint_info[i]; break; @@ -326,8 +332,9 @@ static int pdsfc_validate_rpc(struct pdsfc_dev *pdsfc, /* reject unsupported and/or out of scope commands */ op_entry = (struct pds_fwctl_query_data_operation *)ep_info->operations->entries; - for (i = 0; i < ep_info->operations->num_entries; i++) { - if (PDS_FWCTL_RPC_OPCODE_CMP(rpc->in.op, op_entry[i].id)) { + num_entries = le32_to_cpu(ep_info->operations->num_entries); + for (i = 0; i < num_entries; i++) { + if (PDS_FWCTL_RPC_OPCODE_CMP(rpc->in.op, le32_to_cpu(op_entry[i].id))) { if (scope < op_entry[i].scope) return -EPERM; return 0; @@ -402,7 +409,7 @@ static void *pdsfc_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope, cmd = (union pds_core_adminq_cmd) { .fwctl_rpc = { .opcode = PDS_FWCTL_CMD_RPC, - .flags = PDS_FWCTL_RPC_IND_REQ | PDS_FWCTL_RPC_IND_RESP, + .flags = cpu_to_le16(PDS_FWCTL_RPC_IND_REQ | PDS_FWCTL_RPC_IND_RESP), .ep = cpu_to_le32(rpc->in.ep), .op = cpu_to_le32(rpc->in.op), .req_pa = cpu_to_le64(in_payload_dma_addr), From c92ae5d4f53ebf9c32ace69c1f89a47e8714d18b Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Tue, 8 Apr 2025 15:33:00 -0700 Subject: [PATCH 0525/2924] fwctl: Fix repeated device word in log message Remove the repeated word "device" from a dev_warn() message. Link: https://patch.msgid.link/r/20250408223300.24561-1-shannon.nelson@amd.com Signed-off-by: Shannon Nelson Reviewed-by: Dave Jiang Signed-off-by: Jason Gunthorpe --- drivers/fwctl/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/fwctl/main.c b/drivers/fwctl/main.c index cb1ac9c4023938..bc6378506296cd 100644 --- a/drivers/fwctl/main.c +++ b/drivers/fwctl/main.c @@ -105,7 +105,7 @@ static int fwctl_cmd_rpc(struct fwctl_ucmd *ucmd) if (!test_and_set_bit(0, &fwctl_tainted)) { dev_warn( &fwctl->dev, - "%s(%d): has requested full access to the physical device device", + "%s(%d): has requested full access to the physical device", current->comm, task_pid_nr(current)); add_taint(TAINT_FWCTL, LOCKDEP_STILL_OK); } From d6b3ef9e7a9c2398a71978874957c88c73d270a6 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Wed, 2 Apr 2025 15:01:37 +0200 Subject: [PATCH 0526/2924] mailmap: map Loic Poulain's old email addresses Map old email addresses that are no longer in use. Link: https://lkml.kernel.org/r/20250402130137.12328-1-loic.poulain@oss.qualcomm.com Signed-off-by: Loic Poulain Cc: Simon Horman Signed-off-by: Andrew Morton --- .mailmap | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.mailmap b/.mailmap index 4f7cd8e231778c..6a874f9950bf33 100644 --- a/.mailmap +++ b/.mailmap @@ -438,6 +438,8 @@ Linus Lüssing Li Yang Li Yang Lior David +Loic Poulain +Loic Poulain Lorenzo Pieralisi Lorenzo Stoakes Luca Ceresoli From be8254f694469e60252e35c467ac9a878d7797bf Mon Sep 17 00:00:00 2001 From: Daniel Gomez Date: Fri, 21 Mar 2025 20:24:33 +0000 Subject: [PATCH 0527/2924] radix-tree: add missing cleanup.h Add shared cleanup.h header for radix-tree testing tools. Fixes build error found with kdevops [1]: cc -I../shared -I. -I../../include -I../../../lib -g -Og -Wall -D_LGPL_SOURCE -fsanitize=address -fsanitize=undefined -c -o radix-tree.o radix-tree.c In file included from ../shared/linux/idr.h:1, from radix-tree.c:18: ../shared/linux/../../../../include/linux/idr.h:18:10: fatal error: linux/cleanup.h: No such file or directory 18 | #include | ^~~~~~~~~~~~~~~~~ compilation terminated. make: *** [: radix-tree.o] Error 1 [1] https://github.com/linux-kdevops/kdevops https://github.com/linux-kdevops/linux-mm-kpd/ actions/runs/13971648496/job/39114756401 [akpm@linux-foundation.org: remove unneeded header guards, per Sidhartha] Link: https://lkml.kernel.org/r/20250321-fix-radix-tree-build-v1-1-838a1e6540e2@samsung.com Fixes: 6c8b0b835f00 ("perf/core: Simplify perf_pmu_register()") Signed-off-by: Daniel Gomez Cc: Daniel Gomez Cc: Ingo Molnar Cc: Liam Howlett Cc: Luis Chamberalin Cc: Matthew Wilcox (Oracle) Cc: Ravi Bangoria Cc: Peter Zijlstra Cc: Sidhartha Kumar Signed-off-by: Andrew Morton --- tools/testing/shared/linux/cleanup.h | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 tools/testing/shared/linux/cleanup.h diff --git a/tools/testing/shared/linux/cleanup.h b/tools/testing/shared/linux/cleanup.h new file mode 100644 index 00000000000000..ea3081426ee95b --- /dev/null +++ b/tools/testing/shared/linux/cleanup.h @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include "../../../../include/linux/cleanup.h" From a30951d09c33c899f0e4aca80eb87fad5f10ecfa Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 2 Apr 2025 20:33:11 -0400 Subject: [PATCH 0528/2924] test suite: use %zu to print size_t On 32-bit, we can't use %lu to print a size_t variable and gcc warns us about it. Shame it doesn't warn about it on 64-bit. Link: https://lkml.kernel.org/r/20250403003311.359917-1-Liam.Howlett@oracle.com Fixes: cc86e0c2f306 ("radix tree test suite: add support for slab bulk APIs") Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- tools/testing/shared/linux.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/shared/linux.c b/tools/testing/shared/linux.c index 66dbb362385f3c..0f97fb0d19e19c 100644 --- a/tools/testing/shared/linux.c +++ b/tools/testing/shared/linux.c @@ -150,7 +150,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list) { if (kmalloc_verbose) - pr_debug("Bulk free %p[0-%lu]\n", list, size - 1); + pr_debug("Bulk free %p[0-%zu]\n", list, size - 1); pthread_mutex_lock(&cachep->lock); for (int i = 0; i < size; i++) @@ -168,7 +168,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, size_t i; if (kmalloc_verbose) - pr_debug("Bulk alloc %lu\n", size); + pr_debug("Bulk alloc %zu\n", size); pthread_mutex_lock(&cachep->lock); if (cachep->nr_objs >= size) { From 51339d99c0131bc0d16d378e9b05bc498d2967e2 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 2 Apr 2025 19:55:14 -0700 Subject: [PATCH 0529/2924] locking/local_lock, mm: replace localtry_ helpers with local_trylock_t type Partially revert commit 0aaddfb06882 ("locking/local_lock: Introduce localtry_lock_t"). Remove localtry_*() helpers, since localtry_lock() name might be misinterpreted as "try lock". Introduce local_trylock[_irqsave]() helpers that only work with newly introduced local_trylock_t type. Note that attempt to use local_trylock[_irqsave]() with local_lock_t will cause compilation failure. Usage and behavior in !PREEMPT_RT: local_lock_t lock; // sizeof(lock) == 0 local_lock(&lock); // preempt disable local_lock_irqsave(&lock, ...); // irq save if (local_trylock_irqsave(&lock, ...)) // compilation error local_trylock_t lock; // sizeof(lock) == 4 local_lock(&lock); // preempt disable, acquired = 1 local_lock_irqsave(&lock, ...); // irq save, acquired = 1 if (local_trylock(&lock)) // if (!acquired) preempt disable, acquired = 1 if (local_trylock_irqsave(&lock, ...)) // if (!acquired) irq save, acquired = 1 The existing local_lock_*() macros can be used either with local_lock_t or local_trylock_t. With local_trylock_t they set acquired = 1 while local_unlock_*() clears it. In !PREEMPT_RT local_lock_irqsave(local_lock_t *) disables interrupts to protect critical section, but it doesn't prevent NMI, so the fully reentrant code cannot use local_lock_irqsave(local_lock_t *) for exclusive access. The local_lock_irqsave(local_trylock_t *) helper disables interrupts and sets acquired=1, so local_trylock_irqsave(local_trylock_t *) from NMI attempting to acquire the same lock will return false. In PREEMPT_RT local_lock_irqsave() maps to preemptible spin_lock(). Map local_trylock_irqsave() to preemptible spin_trylock(). When in hard IRQ or NMI return false right away, since spin_trylock() is not safe due to explicit locking in the underneath rt_spin_trylock() implementation. Removing this explicit locking and attempting only "trylock" is undesired due to PI implications. The local_trylock() without _irqsave can be used to avoid the cost of disabling/enabling interrupts by only disabling preemption, so local_trylock() in an interrupt attempting to acquire the same lock will return false. Note there is no need to use local_inc for acquired variable, since it's a percpu variable with strict nesting scopes. Note that guard(local_lock)(&lock) works only for "local_lock_t lock". The patch also makes sure that local_lock_release(l) is called before WRITE_ONCE(l->acquired, 0). Though IRQs are disabled at this point the local_trylock() from NMI will succeed and local_lock_acquire(l) will warn. Link: https://lkml.kernel.org/r/20250403025514.41186-1-alexei.starovoitov@gmail.com Fixes: 0aaddfb06882 ("locking/local_lock: Introduce localtry_lock_t") Signed-off-by: Alexei Starovoitov Acked-by: Vlastimil Babka Acked-by: Sebastian Andrzej Siewior Reviewed-by: Shakeel Butt Cc: Daniel Borkman Cc: Linus Torvalds Cc: Martin KaFai Lau Cc: Michal Hocko Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/local_lock.h | 58 ++------ include/linux/local_lock_internal.h | 207 ++++++++++++---------------- mm/memcontrol.c | 39 +++--- 3 files changed, 114 insertions(+), 190 deletions(-) diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h index 1a0bc35839e360..16a2ee4f8310b1 100644 --- a/include/linux/local_lock.h +++ b/include/linux/local_lock.h @@ -52,44 +52,23 @@ __local_unlock_irqrestore(lock, flags) /** - * localtry_lock_init - Runtime initialize a lock instance - */ -#define localtry_lock_init(lock) __localtry_lock_init(lock) - -/** - * localtry_lock - Acquire a per CPU local lock - * @lock: The lock variable - */ -#define localtry_lock(lock) __localtry_lock(lock) - -/** - * localtry_lock_irq - Acquire a per CPU local lock and disable interrupts - * @lock: The lock variable - */ -#define localtry_lock_irq(lock) __localtry_lock_irq(lock) - -/** - * localtry_lock_irqsave - Acquire a per CPU local lock, save and disable - * interrupts - * @lock: The lock variable - * @flags: Storage for interrupt flags + * local_lock_init - Runtime initialize a lock instance */ -#define localtry_lock_irqsave(lock, flags) \ - __localtry_lock_irqsave(lock, flags) +#define local_trylock_init(lock) __local_trylock_init(lock) /** - * localtry_trylock - Try to acquire a per CPU local lock. + * local_trylock - Try to acquire a per CPU local lock * @lock: The lock variable * * The function can be used in any context such as NMI or HARDIRQ. Due to * locking constrains it will _always_ fail to acquire the lock in NMI or * HARDIRQ context on PREEMPT_RT. */ -#define localtry_trylock(lock) __localtry_trylock(lock) +#define local_trylock(lock) __local_trylock(lock) /** - * localtry_trylock_irqsave - Try to acquire a per CPU local lock, save and disable - * interrupts if acquired + * local_trylock_irqsave - Try to acquire a per CPU local lock, save and disable + * interrupts if acquired * @lock: The lock variable * @flags: Storage for interrupt flags * @@ -97,29 +76,8 @@ * locking constrains it will _always_ fail to acquire the lock in NMI or * HARDIRQ context on PREEMPT_RT. */ -#define localtry_trylock_irqsave(lock, flags) \ - __localtry_trylock_irqsave(lock, flags) - -/** - * local_unlock - Release a per CPU local lock - * @lock: The lock variable - */ -#define localtry_unlock(lock) __localtry_unlock(lock) - -/** - * local_unlock_irq - Release a per CPU local lock and enable interrupts - * @lock: The lock variable - */ -#define localtry_unlock_irq(lock) __localtry_unlock_irq(lock) - -/** - * localtry_unlock_irqrestore - Release a per CPU local lock and restore - * interrupt flags - * @lock: The lock variable - * @flags: Interrupt flags to restore - */ -#define localtry_unlock_irqrestore(lock, flags) \ - __localtry_unlock_irqrestore(lock, flags) +#define local_trylock_irqsave(lock, flags) \ + __local_trylock_irqsave(lock, flags) DEFINE_GUARD(local_lock, local_lock_t __percpu*, local_lock(_T), diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index 67bd13d142fac3..bf2bf40d7b18d0 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -15,10 +15,11 @@ typedef struct { #endif } local_lock_t; +/* local_trylock() and local_trylock_irqsave() only work with local_trylock_t */ typedef struct { local_lock_t llock; - unsigned int acquired; -} localtry_lock_t; + u8 acquired; +} local_trylock_t; #ifdef CONFIG_DEBUG_LOCK_ALLOC # define LOCAL_LOCK_DEBUG_INIT(lockname) \ @@ -29,6 +30,9 @@ typedef struct { }, \ .owner = NULL, +# define LOCAL_TRYLOCK_DEBUG_INIT(lockname) \ + .llock = { LOCAL_LOCK_DEBUG_INIT((lockname).llock) }, + static inline void local_lock_acquire(local_lock_t *l) { lock_map_acquire(&l->dep_map); @@ -56,6 +60,7 @@ static inline void local_lock_debug_init(local_lock_t *l) } #else /* CONFIG_DEBUG_LOCK_ALLOC */ # define LOCAL_LOCK_DEBUG_INIT(lockname) +# define LOCAL_TRYLOCK_DEBUG_INIT(lockname) static inline void local_lock_acquire(local_lock_t *l) { } static inline void local_trylock_acquire(local_lock_t *l) { } static inline void local_lock_release(local_lock_t *l) { } @@ -63,7 +68,7 @@ static inline void local_lock_debug_init(local_lock_t *l) { } #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ #define INIT_LOCAL_LOCK(lockname) { LOCAL_LOCK_DEBUG_INIT(lockname) } -#define INIT_LOCALTRY_LOCK(lockname) { .llock = { LOCAL_LOCK_DEBUG_INIT(lockname.llock) }} +#define INIT_LOCAL_TRYLOCK(lockname) { LOCAL_TRYLOCK_DEBUG_INIT(lockname) } #define __local_lock_init(lock) \ do { \ @@ -76,6 +81,8 @@ do { \ local_lock_debug_init(lock); \ } while (0) +#define __local_trylock_init(lock) __local_lock_init(lock.llock) + #define __spinlock_nested_bh_init(lock) \ do { \ static struct lock_class_key __key; \ @@ -87,149 +94,117 @@ do { \ local_lock_debug_init(lock); \ } while (0) +#define __local_lock_acquire(lock) \ + do { \ + local_trylock_t *tl; \ + local_lock_t *l; \ + \ + l = (local_lock_t *)this_cpu_ptr(lock); \ + tl = (local_trylock_t *)l; \ + _Generic((lock), \ + local_trylock_t *: ({ \ + lockdep_assert(tl->acquired == 0); \ + WRITE_ONCE(tl->acquired, 1); \ + }), \ + default:(void)0); \ + local_lock_acquire(l); \ + } while (0) + #define __local_lock(lock) \ do { \ preempt_disable(); \ - local_lock_acquire(this_cpu_ptr(lock)); \ + __local_lock_acquire(lock); \ } while (0) #define __local_lock_irq(lock) \ do { \ local_irq_disable(); \ - local_lock_acquire(this_cpu_ptr(lock)); \ + __local_lock_acquire(lock); \ } while (0) #define __local_lock_irqsave(lock, flags) \ do { \ local_irq_save(flags); \ - local_lock_acquire(this_cpu_ptr(lock)); \ - } while (0) - -#define __local_unlock(lock) \ - do { \ - local_lock_release(this_cpu_ptr(lock)); \ - preempt_enable(); \ + __local_lock_acquire(lock); \ } while (0) -#define __local_unlock_irq(lock) \ - do { \ - local_lock_release(this_cpu_ptr(lock)); \ - local_irq_enable(); \ - } while (0) - -#define __local_unlock_irqrestore(lock, flags) \ - do { \ - local_lock_release(this_cpu_ptr(lock)); \ - local_irq_restore(flags); \ - } while (0) - -#define __local_lock_nested_bh(lock) \ - do { \ - lockdep_assert_in_softirq(); \ - local_lock_acquire(this_cpu_ptr(lock)); \ - } while (0) - -#define __local_unlock_nested_bh(lock) \ - local_lock_release(this_cpu_ptr(lock)) - -/* localtry_lock_t variants */ - -#define __localtry_lock_init(lock) \ -do { \ - __local_lock_init(&(lock)->llock); \ - WRITE_ONCE((lock)->acquired, 0); \ -} while (0) - -#define __localtry_lock(lock) \ - do { \ - localtry_lock_t *lt; \ - preempt_disable(); \ - lt = this_cpu_ptr(lock); \ - local_lock_acquire(<->llock); \ - WRITE_ONCE(lt->acquired, 1); \ - } while (0) - -#define __localtry_lock_irq(lock) \ - do { \ - localtry_lock_t *lt; \ - local_irq_disable(); \ - lt = this_cpu_ptr(lock); \ - local_lock_acquire(<->llock); \ - WRITE_ONCE(lt->acquired, 1); \ - } while (0) - -#define __localtry_lock_irqsave(lock, flags) \ - do { \ - localtry_lock_t *lt; \ - local_irq_save(flags); \ - lt = this_cpu_ptr(lock); \ - local_lock_acquire(<->llock); \ - WRITE_ONCE(lt->acquired, 1); \ - } while (0) - -#define __localtry_trylock(lock) \ +#define __local_trylock(lock) \ ({ \ - localtry_lock_t *lt; \ - bool _ret; \ + local_trylock_t *tl; \ \ preempt_disable(); \ - lt = this_cpu_ptr(lock); \ - if (!READ_ONCE(lt->acquired)) { \ - WRITE_ONCE(lt->acquired, 1); \ - local_trylock_acquire(<->llock); \ - _ret = true; \ - } else { \ - _ret = false; \ + tl = this_cpu_ptr(lock); \ + if (READ_ONCE(tl->acquired)) { \ preempt_enable(); \ + tl = NULL; \ + } else { \ + WRITE_ONCE(tl->acquired, 1); \ + local_trylock_acquire( \ + (local_lock_t *)tl); \ } \ - _ret; \ + !!tl; \ }) -#define __localtry_trylock_irqsave(lock, flags) \ +#define __local_trylock_irqsave(lock, flags) \ ({ \ - localtry_lock_t *lt; \ - bool _ret; \ + local_trylock_t *tl; \ \ local_irq_save(flags); \ - lt = this_cpu_ptr(lock); \ - if (!READ_ONCE(lt->acquired)) { \ - WRITE_ONCE(lt->acquired, 1); \ - local_trylock_acquire(<->llock); \ - _ret = true; \ - } else { \ - _ret = false; \ + tl = this_cpu_ptr(lock); \ + if (READ_ONCE(tl->acquired)) { \ local_irq_restore(flags); \ + tl = NULL; \ + } else { \ + WRITE_ONCE(tl->acquired, 1); \ + local_trylock_acquire( \ + (local_lock_t *)tl); \ } \ - _ret; \ + !!tl; \ }) -#define __localtry_unlock(lock) \ +#define __local_lock_release(lock) \ + do { \ + local_trylock_t *tl; \ + local_lock_t *l; \ + \ + l = (local_lock_t *)this_cpu_ptr(lock); \ + tl = (local_trylock_t *)l; \ + local_lock_release(l); \ + _Generic((lock), \ + local_trylock_t *: ({ \ + lockdep_assert(tl->acquired == 1); \ + WRITE_ONCE(tl->acquired, 0); \ + }), \ + default:(void)0); \ + } while (0) + +#define __local_unlock(lock) \ do { \ - localtry_lock_t *lt; \ - lt = this_cpu_ptr(lock); \ - WRITE_ONCE(lt->acquired, 0); \ - local_lock_release(<->llock); \ + __local_lock_release(lock); \ preempt_enable(); \ } while (0) -#define __localtry_unlock_irq(lock) \ +#define __local_unlock_irq(lock) \ do { \ - localtry_lock_t *lt; \ - lt = this_cpu_ptr(lock); \ - WRITE_ONCE(lt->acquired, 0); \ - local_lock_release(<->llock); \ + __local_lock_release(lock); \ local_irq_enable(); \ } while (0) -#define __localtry_unlock_irqrestore(lock, flags) \ +#define __local_unlock_irqrestore(lock, flags) \ do { \ - localtry_lock_t *lt; \ - lt = this_cpu_ptr(lock); \ - WRITE_ONCE(lt->acquired, 0); \ - local_lock_release(<->llock); \ + __local_lock_release(lock); \ local_irq_restore(flags); \ } while (0) +#define __local_lock_nested_bh(lock) \ + do { \ + lockdep_assert_in_softirq(); \ + local_lock_acquire(this_cpu_ptr(lock)); \ + } while (0) + +#define __local_unlock_nested_bh(lock) \ + local_lock_release(this_cpu_ptr(lock)) + #else /* !CONFIG_PREEMPT_RT */ /* @@ -237,16 +212,18 @@ do { \ * critical section while staying preemptible. */ typedef spinlock_t local_lock_t; -typedef spinlock_t localtry_lock_t; +typedef spinlock_t local_trylock_t; #define INIT_LOCAL_LOCK(lockname) __LOCAL_SPIN_LOCK_UNLOCKED((lockname)) -#define INIT_LOCALTRY_LOCK(lockname) INIT_LOCAL_LOCK(lockname) +#define INIT_LOCAL_TRYLOCK(lockname) __LOCAL_SPIN_LOCK_UNLOCKED((lockname)) #define __local_lock_init(l) \ do { \ local_spin_lock_init((l)); \ } while (0) +#define __local_trylock_init(l) __local_lock_init(l) + #define __local_lock(__lock) \ do { \ migrate_disable(); \ @@ -283,17 +260,7 @@ do { \ spin_unlock(this_cpu_ptr((lock))); \ } while (0) -/* localtry_lock_t variants */ - -#define __localtry_lock_init(lock) __local_lock_init(lock) -#define __localtry_lock(lock) __local_lock(lock) -#define __localtry_lock_irq(lock) __local_lock(lock) -#define __localtry_lock_irqsave(lock, flags) __local_lock_irqsave(lock, flags) -#define __localtry_unlock(lock) __local_unlock(lock) -#define __localtry_unlock_irq(lock) __local_unlock(lock) -#define __localtry_unlock_irqrestore(lock, flags) __local_unlock_irqrestore(lock, flags) - -#define __localtry_trylock(lock) \ +#define __local_trylock(lock) \ ({ \ int __locked; \ \ @@ -308,11 +275,11 @@ do { \ __locked; \ }) -#define __localtry_trylock_irqsave(lock, flags) \ +#define __local_trylock_irqsave(lock, flags) \ ({ \ typecheck(unsigned long, flags); \ flags = 0; \ - __localtry_trylock(lock); \ + __local_trylock(lock); \ }) #endif /* CONFIG_PREEMPT_RT */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 421740f1bcdc66..c96c1f2b9cf575 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1759,7 +1759,7 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg) } struct memcg_stock_pcp { - localtry_lock_t stock_lock; + local_trylock_t stock_lock; struct mem_cgroup *cached; /* this never be root cgroup */ unsigned int nr_pages; @@ -1774,7 +1774,7 @@ struct memcg_stock_pcp { #define FLUSHING_CACHED_CHARGE 0 }; static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock) = { - .stock_lock = INIT_LOCALTRY_LOCK(stock_lock), + .stock_lock = INIT_LOCAL_TRYLOCK(stock_lock), }; static DEFINE_MUTEX(percpu_charge_mutex); @@ -1805,11 +1805,10 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages, if (nr_pages > MEMCG_CHARGE_BATCH) return ret; - if (!localtry_trylock_irqsave(&memcg_stock.stock_lock, flags)) { - if (!gfpflags_allow_spinning(gfp_mask)) - return ret; - localtry_lock_irqsave(&memcg_stock.stock_lock, flags); - } + if (gfpflags_allow_spinning(gfp_mask)) + local_lock_irqsave(&memcg_stock.stock_lock, flags); + else if (!local_trylock_irqsave(&memcg_stock.stock_lock, flags)) + return ret; stock = this_cpu_ptr(&memcg_stock); stock_pages = READ_ONCE(stock->nr_pages); @@ -1818,7 +1817,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages, ret = true; } - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); return ret; } @@ -1857,14 +1856,14 @@ static void drain_local_stock(struct work_struct *dummy) * drain_stock races is that we always operate on local CPU stock * here with IRQ disabled */ - localtry_lock_irqsave(&memcg_stock.stock_lock, flags); + local_lock_irqsave(&memcg_stock.stock_lock, flags); stock = this_cpu_ptr(&memcg_stock); old = drain_obj_stock(stock); drain_stock(stock); clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); obj_cgroup_put(old); } @@ -1894,7 +1893,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) { unsigned long flags; - if (!localtry_trylock_irqsave(&memcg_stock.stock_lock, flags)) { + if (!local_trylock_irqsave(&memcg_stock.stock_lock, flags)) { /* * In case of unlikely failure to lock percpu stock_lock * uncharge memcg directly. @@ -1907,7 +1906,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) return; } __refill_stock(memcg, nr_pages); - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); } /* @@ -1964,9 +1963,9 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu) stock = &per_cpu(memcg_stock, cpu); /* drain_obj_stock requires stock_lock */ - localtry_lock_irqsave(&memcg_stock.stock_lock, flags); + local_lock_irqsave(&memcg_stock.stock_lock, flags); old = drain_obj_stock(stock); - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); drain_stock(stock); obj_cgroup_put(old); @@ -2787,7 +2786,7 @@ static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, unsigned long flags; int *bytes; - localtry_lock_irqsave(&memcg_stock.stock_lock, flags); + local_lock_irqsave(&memcg_stock.stock_lock, flags); stock = this_cpu_ptr(&memcg_stock); /* @@ -2836,7 +2835,7 @@ static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, if (nr) __mod_objcg_mlstate(objcg, pgdat, idx, nr); - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); obj_cgroup_put(old); } @@ -2846,7 +2845,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes) unsigned long flags; bool ret = false; - localtry_lock_irqsave(&memcg_stock.stock_lock, flags); + local_lock_irqsave(&memcg_stock.stock_lock, flags); stock = this_cpu_ptr(&memcg_stock); if (objcg == READ_ONCE(stock->cached_objcg) && stock->nr_bytes >= nr_bytes) { @@ -2854,7 +2853,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes) ret = true; } - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); return ret; } @@ -2946,7 +2945,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes, unsigned long flags; unsigned int nr_pages = 0; - localtry_lock_irqsave(&memcg_stock.stock_lock, flags); + local_lock_irqsave(&memcg_stock.stock_lock, flags); stock = this_cpu_ptr(&memcg_stock); if (READ_ONCE(stock->cached_objcg) != objcg) { /* reset if necessary */ @@ -2960,7 +2959,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes, stock->nr_bytes &= (PAGE_SIZE - 1); } - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); obj_cgroup_put(old); if (nr_pages) From 1b17cdbb708bf435973d0a57bee4230d242085cf Mon Sep 17 00:00:00 2001 From: Takuma Watanabe Date: Tue, 18 Mar 2025 20:55:19 +0900 Subject: [PATCH 0530/2924] mseal: fix typo and style in documentation Correct a typo in the mseal documentation. Link: https://lkml.kernel.org/r/20250318115521.11654-1-takumaw1990@gmail.com Signed-off-by: Takuma Watanabe Cc: Jeff Xu Signed-off-by: Andrew Morton --- Documentation/userspace-api/mseal.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/userspace-api/mseal.rst b/Documentation/userspace-api/mseal.rst index 1dabfc29be0d1f..7195a7f911075c 100644 --- a/Documentation/userspace-api/mseal.rst +++ b/Documentation/userspace-api/mseal.rst @@ -27,7 +27,7 @@ SYSCALL ======= mseal syscall signature ----------------------- - ``int mseal(void \* addr, size_t len, unsigned long flags)`` + ``int mseal(void *addr, size_t len, unsigned long flags)`` **addr**/**len**: virtual memory address range. The address range set by **addr**/**len** must meet: From 770c8d55c42868239c748a3ebc57c9e37755f842 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Tue, 1 Apr 2025 22:47:12 +0800 Subject: [PATCH 0531/2924] lib/iov_iter: fix to increase non slab folio refcount When testing EROFS file-backed mount over v9fs on qemu, I encountered a folio UAF issue. The page sanity check reports the following call trace. The root cause is that pages in bvec are coalesced across a folio bounary. The refcount of all non-slab folios should be increased to ensure p9_releas_pages can put them correctly. BUG: Bad page state in process md5sum pfn:18300 page: refcount:0 mapcount:0 mapping:00000000d5ad8e4e index:0x60 pfn:0x18300 head: order:0 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0 aops:z_erofs_aops ino:30b0f dentry name(?):"GoogleExtServicesCn.apk" flags: 0x100000000000041(locked|head|node=0|zone=1) raw: 0100000000000041 dead000000000100 dead000000000122 ffff888014b13bd0 raw: 0000000000000060 0000000000000020 00000000ffffffff 0000000000000000 head: 0100000000000041 dead000000000100 dead000000000122 ffff888014b13bd0 head: 0000000000000060 0000000000000020 00000000ffffffff 0000000000000000 head: 0100000000000000 0000000000000000 ffffffffffffffff 0000000000000000 head: 0000000000000010 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set Call Trace: dump_stack_lvl+0x53/0x70 bad_page+0xd4/0x220 __free_pages_ok+0x76d/0xf30 __folio_put+0x230/0x320 p9_release_pages+0x179/0x1f0 p9_virtio_zc_request+0xa2a/0x1230 p9_client_zc_rpc.constprop.0+0x247/0x700 p9_client_read_once+0x34d/0x810 p9_client_read+0xf3/0x150 v9fs_issue_read+0x111/0x360 netfs_unbuffered_read_iter_locked+0x927/0x1390 netfs_unbuffered_read_iter+0xa2/0xe0 vfs_iocb_iter_read+0x2c7/0x460 erofs_fileio_rq_submit+0x46b/0x5b0 z_erofs_runqueue+0x1203/0x21e0 z_erofs_readahead+0x579/0x8b0 read_pages+0x19f/0xa70 page_cache_ra_order+0x4ad/0xb80 filemap_readahead.isra.0+0xe7/0x150 filemap_get_pages+0x7aa/0x1890 filemap_read+0x320/0xc80 vfs_read+0x6c6/0xa30 ksys_read+0xf9/0x1c0 do_syscall_64+0x9e/0x1a0 entry_SYSCALL_64_after_hwframe+0x71/0x79 Link: https://lkml.kernel.org/r/20250401144712.1377719-1-shengyong1@xiaomi.com Fixes: b9c0e49abfca ("mm: decline to manipulate the refcount on a slab page") Signed-off-by: Sheng Yong Reviewed-by: Matthew Wilcox (Oracle) Acked-by: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- lib/iov_iter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 8c7fdb7d8c8fa3..bc9391e55d57ea 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1191,7 +1191,7 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, return -ENOMEM; p = *pages; for (int k = 0; k < n; k++) { - struct folio *folio = page_folio(page); + struct folio *folio = page_folio(page + k); p[k] = page + k; if (!folio_test_slab(folio)) folio_get(folio); From a84edd52f0a0fa193f0f685769939cf84510755b Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Mon, 31 Mar 2025 19:10:24 -0700 Subject: [PATCH 0532/2924] mm/compaction: fix bug in hugetlb handling pathway The compaction code doesn't take references on pages until we're certain we should attempt to handle it. In the hugetlb case, isolate_or_dissolve_huge_page() may return -EBUSY without taking a reference to the folio associated with our pfn. If our folio's refcount drops to 0, compound_nr() becomes unpredictable, making low_pfn and nr_scanned unreliable. The user-visible effect is minimal - this should rarely happen (if ever). Fix this by storing the folio statistics earlier on the stack (just like the THP and Buddy cases). Also revert commit 66fe1cf7f581 ("mm: compaction: use helper compound_nr in isolate_migratepages_block") to make backporting easier. Link: https://lkml.kernel.org/r/20250401021025.637333-1-vishal.moola@gmail.com Fixes: 369fa227c219 ("mm: make alloc_contig_range handle free hugetlb pages") Signed-off-by: Vishal Moola (Oracle) Acked-by: Oscar Salvador Reviewed-by: Zi Yan Cc: Miaohe Lin Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton --- mm/compaction.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index 139f00c0308a3d..ca71fd3c31815c 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -981,13 +981,13 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, } if (PageHuge(page)) { + const unsigned int order = compound_order(page); /* * skip hugetlbfs if we are not compacting for pages * bigger than its order. THPs and other compound pages * are handled below. */ if (!cc->alloc_contig) { - const unsigned int order = compound_order(page); if (order <= MAX_PAGE_ORDER) { low_pfn += (1UL << order) - 1; @@ -1011,8 +1011,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, /* Do not report -EBUSY down the chain */ if (ret == -EBUSY) ret = 0; - low_pfn += compound_nr(page) - 1; - nr_scanned += compound_nr(page) - 1; + low_pfn += (1UL << order) - 1; + nr_scanned += (1UL << order) - 1; goto isolate_fail; } From c5bb27e2da3a6e1006f3e0aeb36d57c1dd1144aa Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Sun, 30 Mar 2025 17:28:09 -0700 Subject: [PATCH 0533/2924] mm/page_alloc: avoid second trylock of zone->lock spin_trylock followed by spin_lock will cause extra write cache access. If the lock is contended it may cause unnecessary cache line bouncing and will execute redundant irq restore/save pair. Therefore, check alloc/fpi_flags first and use spin_trylock or spin_lock. Link: https://lkml.kernel.org/r/20250331002809.94758-1-alexei.starovoitov@gmail.com Fixes: 97769a53f117 ("mm, bpf: Introduce try_alloc_pages() for opportunistic page allocation") Signed-off-by: Alexei Starovoitov Suggested-by: Linus Torvalds Reviewed-by: Sebastian Andrzej Siewior Acked-by: Michal Hocko Acked-by: Vlastimil Babka Reviewed-by: Harry Yoo Reviewed-by: Shakeel Butt Cc: Andrii Nakryiko Cc: Daniel Borkman Cc: Martin KaFai Lau Cc: Michal Hocko Cc: Peter Zijlstra Cc: Steven Rostedt Signed-off-by: Andrew Morton --- mm/page_alloc.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fd6b865cb1abfb..9a219fe8e130bc 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1400,11 +1400,12 @@ static void free_one_page(struct zone *zone, struct page *page, struct llist_head *llhead; unsigned long flags; - if (!spin_trylock_irqsave(&zone->lock, flags)) { - if (unlikely(fpi_flags & FPI_TRYLOCK)) { + if (unlikely(fpi_flags & FPI_TRYLOCK)) { + if (!spin_trylock_irqsave(&zone->lock, flags)) { add_page_to_zone_llist(zone, page, order); return; } + } else { spin_lock_irqsave(&zone->lock, flags); } @@ -2314,9 +2315,10 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, unsigned long flags; int i; - if (!spin_trylock_irqsave(&zone->lock, flags)) { - if (unlikely(alloc_flags & ALLOC_TRYLOCK)) + if (unlikely(alloc_flags & ALLOC_TRYLOCK)) { + if (!spin_trylock_irqsave(&zone->lock, flags)) return 0; + } else { spin_lock_irqsave(&zone->lock, flags); } for (i = 0; i < count; ++i) { @@ -2937,9 +2939,10 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone, do { page = NULL; - if (!spin_trylock_irqsave(&zone->lock, flags)) { - if (unlikely(alloc_flags & ALLOC_TRYLOCK)) + if (unlikely(alloc_flags & ALLOC_TRYLOCK)) { + if (!spin_trylock_irqsave(&zone->lock, flags)) return NULL; + } else { spin_lock_irqsave(&zone->lock, flags); } if (alloc_flags & ALLOC_HIGHATOMIC) From 382360d289c1e1d89df06f33c4d5b874b38ef267 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Thu, 3 Apr 2025 14:41:38 +0800 Subject: [PATCH 0534/2924] mm/hugetlb: fix nid mismatch in alloc_surplus_hugetlb_folio() It's wrong to use nid directly since the nid may be changed in allocation. Use folio_nid() to obtain the nid of folio instead. Fix: 2273dea6b1e1 ("mm/hugetlb: update nr_huge_pages and surplus_huge_pages together") Link: https://lkml.kernel.org/r/20250403064138.2867929-1-liushixin2@huawei.com Signed-off-by: Liu Shixin Acked-by: Oscar Salvador Cc: David Hildenbrand Cc: Kefeng Wang Cc: Muchun Song Cc: Nanyong Sun Signed-off-by: Andrew Morton --- mm/hugetlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 39f92aad7bd1e1..6670f9b9e07ae9 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2271,7 +2271,7 @@ static struct folio *alloc_surplus_hugetlb_folio(struct hstate *h, * as surplus_pages, otherwise it might confuse * persistent_huge_pages() momentarily. */ - __prep_account_new_huge_page(h, nid); + __prep_account_new_huge_page(h, folio_nid(folio)); /* * We could have raced with the pool size change. From 35e214b11df8a73dc4b7256d3ca5d647be83c585 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 28 Mar 2025 18:44:02 +0800 Subject: [PATCH 0535/2924] MAINTAINERS: add Andrew and Baoquan as kexec maintainers Add Andrew as kexec/kdump maintainer because he has been helping review and merge ready kexec/kdump patches. And I would like to nominate myself as kexec maintainer because I always try to review generic kexec codes. Link: https://lkml.kernel.org/r/20250328104402.16826-1-bhe@redhat.com Signed-off-by: Baoquan He Acked-by: Dave Young Acked-by: Simon Horman Cc: Eric Biederman Signed-off-by: Andrew Morton --- MAINTAINERS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 96b82704950184..de97cd54ff241b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12810,6 +12810,7 @@ F: lib/Kconfig.kcsan F: scripts/Makefile.kcsan KDUMP +M: Andrew Morton M: Baoquan He R: Vivek Goyal R: Dave Young @@ -13111,6 +13112,8 @@ F: fs/kernfs/ F: include/linux/kernfs.h KEXEC +M: Andrew Morton +M: Baoquan He L: kexec@lists.infradead.org W: http://kernel.org/pub/linux/utils/kernel/kexec/ F: include/linux/kexec.h From 9c02223e2d9df5cb37c51aedb78f3960294e09b5 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 4 Apr 2025 17:42:32 +0100 Subject: [PATCH 0536/2924] selftests/mm: generate a temporary mountpoint for cgroup filesystem Currently if the filesystem for the cgroups version it wants to use is not mounted charge_reserved_hugetlb.sh and hugetlb_reparenting_test.sh tests will attempt to mount it on the hard coded path /dev/cgroup/memory, deleting that directory when the test finishes. This will fail if there is not a preexisting directory at that path, and since the directory is deleted subsequent runs of the test will fail. Instead of relying on this hard coded directory name use mktemp to generate a temporary directory to use as a mountpoint, fixing both the assumption and the disruption caused by deleting a preexisting directory. This means that if the relevant cgroup filesystem is not already mounted then we rely on having coreutils (which provides mktemp) installed. I suspect that many current users are relying on having things automounted by default, and given that the script relies on bash it's probably not an unreasonable requirement. Link: https://lkml.kernel.org/r/20250404-kselftest-mm-cgroup2-detection-v1-1-3dba6d32ba8c@kernel.org Fixes: 209376ed2a84 ("selftests/vm: make charge_reserved_hugetlb.sh work with existing cgroup setting") Signed-off-by: Mark Brown Cc: Aishwarya TCV Cc: Mark Brown Cc: Mina Almasry Cc: Shuah Khan Cc: Waiman Long Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/charge_reserved_hugetlb.sh | 4 ++-- tools/testing/selftests/mm/hugetlb_reparenting_test.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh index 67df7b47087f03..e1fe16bcbbe880 100755 --- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh +++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh @@ -29,7 +29,7 @@ fi if [[ $cgroup2 ]]; then cgroup_path=$(mount -t cgroup2 | head -1 | awk '{print $3}') if [[ -z "$cgroup_path" ]]; then - cgroup_path=/dev/cgroup/memory + cgroup_path=$(mktemp -d) mount -t cgroup2 none $cgroup_path do_umount=1 fi @@ -37,7 +37,7 @@ if [[ $cgroup2 ]]; then else cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}') if [[ -z "$cgroup_path" ]]; then - cgroup_path=/dev/cgroup/memory + cgroup_path=$(mktemp -d) mount -t cgroup memory,hugetlb $cgroup_path do_umount=1 fi diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh index 11f9bbe7dc222b..0b0d4ba1af2771 100755 --- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh +++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh @@ -23,7 +23,7 @@ fi if [[ $cgroup2 ]]; then CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk '{print $3}') if [[ -z "$CGROUP_ROOT" ]]; then - CGROUP_ROOT=/dev/cgroup/memory + CGROUP_ROOT=$(mktemp -d) mount -t cgroup2 none $CGROUP_ROOT do_umount=1 fi From 41e6ddcaa0f18dda4c3fadf22533775a30d6f72f Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 21 Mar 2025 10:09:37 +0000 Subject: [PATCH 0537/2924] mm/vma: add give_up_on_oom option on modify/merge, use in uffd release Currently, if a VMA merge fails due to an OOM condition arising on commit merge or a failure to duplicate anon_vma's, we report this so the caller can handle it. However there are cases where the caller is only ostensibly trying a merge, and doesn't mind if it fails due to this condition. Since we do not want to introduce an implicit assumption that we only actually modify VMAs after OOM conditions might arise, add a 'give up on oom' option and make an explicit contract that, should this flag be set, we absolutely will not modify any VMAs should OOM arise and just bail out. Since it'd be very unusual for a user to try to vma_modify() with this flag set but be specifying a range within a VMA which ends up being split (which can fail due to rlimit issues, not only OOM), we add a debug warning for this condition. The motivating reason for this is uffd release - syzkaller (and Pedro Falcato's VERY astute analysis) found a way in which an injected fault on allocation, triggering an OOM condition on commit merge, would result in uffd code becoming confused and treating an error value as if it were a VMA pointer. To avoid this, we make use of this new VMG flag to ensure that this never occurs, utilising the fact that, should we be clearing entire VMAs, we do not wish an OOM event to be reported to us. Many thanks to Pedro Falcato for his excellent analysis and Jann Horn for his insightful and intelligent analysis of the situation, both of whom were instrumental in this fix. Link: https://lkml.kernel.org/r/20250321100937.46634-1-lorenzo.stoakes@oracle.com Reported-by: syzbot+20ed41006cf9d842c2b5@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/67dc67f0.050a0220.25ae54.001e.GAE@google.com/ Fixes: 47b16d0462a4 ("mm: abort vma_modify() on merge out of memory failure") Signed-off-by: Lorenzo Stoakes Suggested-by: Pedro Falcato Suggested-by: Jann Horn Cc: Signed-off-by: Andrew Morton --- mm/userfaultfd.c | 13 ++++++++++-- mm/vma.c | 51 ++++++++++++++++++++++++++++++++++++++++++++---- mm/vma.h | 9 ++++++++- 3 files changed, 66 insertions(+), 7 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index fbf2cf62ab9f53..7d5d709cc838ed 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1902,6 +1902,14 @@ struct vm_area_struct *userfaultfd_clear_vma(struct vma_iterator *vmi, unsigned long end) { struct vm_area_struct *ret; + bool give_up_on_oom = false; + + /* + * If we are modifying only and not splitting, just give up on the merge + * if OOM prevents us from merging successfully. + */ + if (start == vma->vm_start && end == vma->vm_end) + give_up_on_oom = true; /* Reset ptes for the whole vma range if wr-protected */ if (userfaultfd_wp(vma)) @@ -1909,7 +1917,7 @@ struct vm_area_struct *userfaultfd_clear_vma(struct vma_iterator *vmi, ret = vma_modify_flags_uffd(vmi, prev, vma, start, end, vma->vm_flags & ~__VM_UFFD_FLAGS, - NULL_VM_UFFD_CTX); + NULL_VM_UFFD_CTX, give_up_on_oom); /* * In the vma_merge() successful mprotect-like case 8: @@ -1960,7 +1968,8 @@ int userfaultfd_register_range(struct userfaultfd_ctx *ctx, new_flags = (vma->vm_flags & ~__VM_UFFD_FLAGS) | vm_flags; vma = vma_modify_flags_uffd(&vmi, prev, vma, start, vma_end, new_flags, - (struct vm_userfaultfd_ctx){ctx}); + (struct vm_userfaultfd_ctx){ctx}, + /* give_up_on_oom = */false); if (IS_ERR(vma)) return PTR_ERR(vma); diff --git a/mm/vma.c b/mm/vma.c index 5cdc5612bfc19e..839d12f02c885d 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -666,6 +666,9 @@ static void vmg_adjust_set_range(struct vma_merge_struct *vmg) /* * Actually perform the VMA merge operation. * + * IMPORTANT: We guarantee that, should vmg->give_up_on_oom is set, to not + * modify any VMAs or cause inconsistent state should an OOM condition arise. + * * Returns 0 on success, or an error value on failure. */ static int commit_merge(struct vma_merge_struct *vmg) @@ -685,6 +688,12 @@ static int commit_merge(struct vma_merge_struct *vmg) init_multi_vma_prep(&vp, vma, vmg); + /* + * If vmg->give_up_on_oom is set, we're safe, because we don't actually + * manipulate any VMAs until we succeed at preallocation. + * + * Past this point, we will not return an error. + */ if (vma_iter_prealloc(vmg->vmi, vma)) return -ENOMEM; @@ -915,7 +924,13 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( if (anon_dup) unlink_anon_vmas(anon_dup); - vmg->state = VMA_MERGE_ERROR_NOMEM; + /* + * We've cleaned up any cloned anon_vma's, no VMAs have been + * modified, no harm no foul if the user requests that we not + * report this and just give up, leaving the VMAs unmerged. + */ + if (!vmg->give_up_on_oom) + vmg->state = VMA_MERGE_ERROR_NOMEM; return NULL; } @@ -926,7 +941,15 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( abort: vma_iter_set(vmg->vmi, start); vma_iter_load(vmg->vmi); - vmg->state = VMA_MERGE_ERROR_NOMEM; + + /* + * This means we have failed to clone anon_vma's correctly, but no + * actual changes to VMAs have occurred, so no harm no foul - if the + * user doesn't want this reported and instead just wants to give up on + * the merge, allow it. + */ + if (!vmg->give_up_on_oom) + vmg->state = VMA_MERGE_ERROR_NOMEM; return NULL; } @@ -1068,6 +1091,10 @@ int vma_expand(struct vma_merge_struct *vmg) /* This should already have been checked by this point. */ VM_WARN_ON_VMG(!can_merge_remove_vma(next), vmg); vma_start_write(next); + /* + * In this case we don't report OOM, so vmg->give_up_on_mm is + * safe. + */ ret = dup_anon_vma(middle, next, &anon_dup); if (ret) return ret; @@ -1090,9 +1117,15 @@ int vma_expand(struct vma_merge_struct *vmg) return 0; nomem: - vmg->state = VMA_MERGE_ERROR_NOMEM; if (anon_dup) unlink_anon_vmas(anon_dup); + /* + * If the user requests that we just give upon OOM, we are safe to do so + * here, as commit merge provides this contract to us. Nothing has been + * changed - no harm no foul, just don't report it. + */ + if (!vmg->give_up_on_oom) + vmg->state = VMA_MERGE_ERROR_NOMEM; return -ENOMEM; } @@ -1534,6 +1567,13 @@ static struct vm_area_struct *vma_modify(struct vma_merge_struct *vmg) if (vmg_nomem(vmg)) return ERR_PTR(-ENOMEM); + /* + * Split can fail for reasons other than OOM, so if the user requests + * this it's probably a mistake. + */ + VM_WARN_ON(vmg->give_up_on_oom && + (vma->vm_start != start || vma->vm_end != end)); + /* Split any preceding portion of the VMA. */ if (vma->vm_start < start) { int err = split_vma(vmg->vmi, vma, start, 1); @@ -1602,12 +1642,15 @@ struct vm_area_struct struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long new_flags, - struct vm_userfaultfd_ctx new_ctx) + struct vm_userfaultfd_ctx new_ctx, + bool give_up_on_oom) { VMG_VMA_STATE(vmg, vmi, prev, vma, start, end); vmg.flags = new_flags; vmg.uffd_ctx = new_ctx; + if (give_up_on_oom) + vmg.give_up_on_oom = true; return vma_modify(&vmg); } diff --git a/mm/vma.h b/mm/vma.h index 7356ca5a22d332..149926e8a6d1ac 100644 --- a/mm/vma.h +++ b/mm/vma.h @@ -114,6 +114,12 @@ struct vma_merge_struct { */ bool just_expand :1; + /* + * If a merge is possible, but an OOM error occurs, give up and don't + * execute the merge, returning NULL. + */ + bool give_up_on_oom :1; + /* Internal flags set during merge process: */ /* @@ -255,7 +261,8 @@ __must_check struct vm_area_struct struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long new_flags, - struct vm_userfaultfd_ctx new_ctx); + struct vm_userfaultfd_ctx new_ctx, + bool give_up_on_oom); __must_check struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg); From a5561c88cf3cddc405ae7a0ef6b606224ddabf0c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 24 Mar 2025 18:32:27 +0100 Subject: [PATCH 0538/2924] ASN.1: add module description This is needed to avoid a build warning: WARNING: modpost: missing MODULE_DESCRIPTION() in lib/asn1_decoder.o Link: https://lkml.kernel.org/r/20250324173242.1501003-2-arnd@kernel.org Fixes: 6c6c1fc09de3 ("modpost: require a MODULE_DESCRIPTION()") Signed-off-by: Arnd Bergmann Tested-by: Geert Uytterhoeven Cc: Jeff Johnson Cc: Masahiro Yamada Cc: Stehen Rothwell Signed-off-by: Andrew Morton --- lib/asn1_decoder.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c index 13da529e2e7247..5738ae286b41ed 100644 --- a/lib/asn1_decoder.c +++ b/lib/asn1_decoder.c @@ -518,4 +518,5 @@ int asn1_ber_decoder(const struct asn1_decoder *decoder, } EXPORT_SYMBOL_GPL(asn1_ber_decoder); +MODULE_DESCRIPTION("Decoder for ASN.1 BER/DER/CER encoded bytestream"); MODULE_LICENSE("GPL"); From 10764175baf4531d0f4b1147fd113660ea476226 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 24 Mar 2025 18:32:28 +0100 Subject: [PATCH 0539/2924] samples/livepatch: add module descriptions Every module should have a description, so add one for each of these modules. [akpm@linux-foundation.org: match the livepatch-callbacks-mod.c description, per Petr] Link: https://lkml.kernel.org/r/20250324173242.1501003-3-arnd@kernel.org Fixes: 6c6c1fc09de3 ("modpost: require a MODULE_DESCRIPTION()") Signed-off-by: Arnd Bergmann Reviewed-by: Petr Mladek Cc: Christophe Leroy Cc: Easwar Hariharan Cc: Jeff Johnson Cc: Jiri Kosina Cc: Joe Lawrence Cc: Josh Poimboeuf Cc: Masahiro Yamada Cc: Miroslav Benes Cc: Stehen Rothwell Signed-off-by: Andrew Morton --- samples/livepatch/livepatch-callbacks-busymod.c | 1 + samples/livepatch/livepatch-callbacks-demo.c | 1 + samples/livepatch/livepatch-callbacks-mod.c | 1 + samples/livepatch/livepatch-sample.c | 1 + samples/livepatch/livepatch-shadow-fix1.c | 1 + samples/livepatch/livepatch-shadow-fix2.c | 1 + 6 files changed, 6 insertions(+) diff --git a/samples/livepatch/livepatch-callbacks-busymod.c b/samples/livepatch/livepatch-callbacks-busymod.c index 69105596e72e68..fadc2a85cb35dd 100644 --- a/samples/livepatch/livepatch-callbacks-busymod.c +++ b/samples/livepatch/livepatch-callbacks-busymod.c @@ -56,4 +56,5 @@ static void livepatch_callbacks_mod_exit(void) module_init(livepatch_callbacks_mod_init); module_exit(livepatch_callbacks_mod_exit); +MODULE_DESCRIPTION("Live patching demo for (un)patching callbacks, support module"); MODULE_LICENSE("GPL"); diff --git a/samples/livepatch/livepatch-callbacks-demo.c b/samples/livepatch/livepatch-callbacks-demo.c index 11c3f4357812db..9e69d9caed258e 100644 --- a/samples/livepatch/livepatch-callbacks-demo.c +++ b/samples/livepatch/livepatch-callbacks-demo.c @@ -192,5 +192,6 @@ static void livepatch_callbacks_demo_exit(void) module_init(livepatch_callbacks_demo_init); module_exit(livepatch_callbacks_demo_exit); +MODULE_DESCRIPTION("Live patching demo for (un)patching callbacks"); MODULE_LICENSE("GPL"); MODULE_INFO(livepatch, "Y"); diff --git a/samples/livepatch/livepatch-callbacks-mod.c b/samples/livepatch/livepatch-callbacks-mod.c index 2a074f422a51d9..d1851b471ad915 100644 --- a/samples/livepatch/livepatch-callbacks-mod.c +++ b/samples/livepatch/livepatch-callbacks-mod.c @@ -38,4 +38,5 @@ static void livepatch_callbacks_mod_exit(void) module_init(livepatch_callbacks_mod_init); module_exit(livepatch_callbacks_mod_exit); +MODULE_DESCRIPTION("Live patching demo for (un)patching callbacks, support module"); MODULE_LICENSE("GPL"); diff --git a/samples/livepatch/livepatch-sample.c b/samples/livepatch/livepatch-sample.c index cd76d7ebe59859..5263a2f31c480e 100644 --- a/samples/livepatch/livepatch-sample.c +++ b/samples/livepatch/livepatch-sample.c @@ -66,5 +66,6 @@ static void livepatch_exit(void) module_init(livepatch_init); module_exit(livepatch_exit); +MODULE_DESCRIPTION("Kernel Live Patching Sample Module"); MODULE_LICENSE("GPL"); MODULE_INFO(livepatch, "Y"); diff --git a/samples/livepatch/livepatch-shadow-fix1.c b/samples/livepatch/livepatch-shadow-fix1.c index f3f153895d6ce7..cbf68ca4009732 100644 --- a/samples/livepatch/livepatch-shadow-fix1.c +++ b/samples/livepatch/livepatch-shadow-fix1.c @@ -168,5 +168,6 @@ static void livepatch_shadow_fix1_exit(void) module_init(livepatch_shadow_fix1_init); module_exit(livepatch_shadow_fix1_exit); +MODULE_DESCRIPTION("Live patching demo for shadow variables"); MODULE_LICENSE("GPL"); MODULE_INFO(livepatch, "Y"); diff --git a/samples/livepatch/livepatch-shadow-fix2.c b/samples/livepatch/livepatch-shadow-fix2.c index 361046a4f10cf4..b99122cb221fc4 100644 --- a/samples/livepatch/livepatch-shadow-fix2.c +++ b/samples/livepatch/livepatch-shadow-fix2.c @@ -128,5 +128,6 @@ static void livepatch_shadow_fix2_exit(void) module_init(livepatch_shadow_fix2_init); module_exit(livepatch_shadow_fix2_exit); +MODULE_DESCRIPTION("Live patching demo for shadow variables"); MODULE_LICENSE("GPL"); MODULE_INFO(livepatch, "Y"); From 6810431bc47301376174331f6d85a70c8520c941 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 24 Mar 2025 18:32:29 +0100 Subject: [PATCH 0540/2924] fpga: tests: add module descriptions Modules without a description now cause a warning: WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/fpga/tests/fpga-bridge-test.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/fpga/tests/fpga-mgr-test.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/fpga/tests/fpga-region-test.o Link: https://lkml.kernel.org/r/20250324173242.1501003-4-arnd@kernel.org Fixes: 6c6c1fc09de3 ("modpost: require a MODULE_DESCRIPTION()") Signed-off-by: Arnd Bergmann Cc: Hao Wu Cc: Jeff Johnson Cc: Marco Pagani Cc: Masahiro Yamada Cc: Moritz Fischer Cc: Russ Weight Cc: Stehen Rothwell Cc: Tom Rix Cc: Xu Yilun Signed-off-by: Andrew Morton --- drivers/fpga/tests/fpga-bridge-test.c | 1 + drivers/fpga/tests/fpga-mgr-test.c | 1 + drivers/fpga/tests/fpga-region-test.c | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/fpga/tests/fpga-bridge-test.c b/drivers/fpga/tests/fpga-bridge-test.c index b9ab29809e9619..124ba40e32b18c 100644 --- a/drivers/fpga/tests/fpga-bridge-test.c +++ b/drivers/fpga/tests/fpga-bridge-test.c @@ -170,4 +170,5 @@ static struct kunit_suite fpga_bridge_suite = { kunit_test_suite(fpga_bridge_suite); +MODULE_DESCRIPTION("KUnit test for the FPGA Bridge"); MODULE_LICENSE("GPL"); diff --git a/drivers/fpga/tests/fpga-mgr-test.c b/drivers/fpga/tests/fpga-mgr-test.c index 9cb37aefbac4b2..8748babb050458 100644 --- a/drivers/fpga/tests/fpga-mgr-test.c +++ b/drivers/fpga/tests/fpga-mgr-test.c @@ -330,4 +330,5 @@ static struct kunit_suite fpga_mgr_suite = { kunit_test_suite(fpga_mgr_suite); +MODULE_DESCRIPTION("KUnit test for the FPGA Manager"); MODULE_LICENSE("GPL"); diff --git a/drivers/fpga/tests/fpga-region-test.c b/drivers/fpga/tests/fpga-region-test.c index 6a108cafded863..020ceac48509fa 100644 --- a/drivers/fpga/tests/fpga-region-test.c +++ b/drivers/fpga/tests/fpga-region-test.c @@ -214,4 +214,5 @@ static struct kunit_suite fpga_region_suite = { kunit_test_suite(fpga_region_suite); +MODULE_DESCRIPTION("KUnit test for the FPGA Region"); MODULE_LICENSE("GPL"); From 75dd4975f569c179284fa644e5be24cbfa77b0ee Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 24 Mar 2025 18:32:31 +0100 Subject: [PATCH 0541/2924] zlib: add module description Modules without a description now cause a warning: WARNING: modpost: missing MODULE_DESCRIPTION() in lib/zlib_inflate/zlib_inflate.o Link: https://lkml.kernel.org/r/20250324173242.1501003-6-arnd@kernel.org Fixes: 6c6c1fc09de3 ("modpost: require a MODULE_DESCRIPTION()") Signed-off-by: Arnd Bergmann Cc: Jeff Johnson Cc: Masahiro Yamada Cc: Stehen Rothwell Signed-off-by: Andrew Morton --- lib/zlib_inflate/inflate_syms.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/zlib_inflate/inflate_syms.c b/lib/zlib_inflate/inflate_syms.c index 9720114c067210..b8996d90e8bcd8 100644 --- a/lib/zlib_inflate/inflate_syms.c +++ b/lib/zlib_inflate/inflate_syms.c @@ -18,4 +18,5 @@ EXPORT_SYMBOL(zlib_inflateEnd); EXPORT_SYMBOL(zlib_inflateReset); EXPORT_SYMBOL(zlib_inflateIncomp); EXPORT_SYMBOL(zlib_inflate_blob); +MODULE_DESCRIPTION("Data decompression using the deflation algorithm"); MODULE_LICENSE("GPL"); From 91640531b92ec63e260b9d5c681d387676ec462c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 24 Mar 2025 18:32:32 +0100 Subject: [PATCH 0542/2924] ucs2_string: add module description Modules without a description now cause a warning: WARNING: modpost: missing MODULE_DESCRIPTION() in lib/ucs2_string.o Link: https://lkml.kernel.org/r/20250324173242.1501003-7-arnd@kernel.org Fixes: 6c6c1fc09de3 ("modpost: require a MODULE_DESCRIPTION()") Signed-off-by: Arnd Bergmann Cc: Jeff Johnson Cc: Masahiro Yamada Cc: Stehen Rothwell Signed-off-by: Andrew Morton --- lib/ucs2_string.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/ucs2_string.c b/lib/ucs2_string.c index 9308bcfb2ad50c..dfb4f2358cabfe 100644 --- a/lib/ucs2_string.c +++ b/lib/ucs2_string.c @@ -165,4 +165,5 @@ ucs2_as_utf8(u8 *dest, const ucs2_char_t *src, unsigned long maxlength) } EXPORT_SYMBOL(ucs2_as_utf8); +MODULE_DESCRIPTION("UCS2 string handling"); MODULE_LICENSE("GPL v2"); From e2ffee91c40fef59ec84dd7379aa634dcefa0a42 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 24 Mar 2025 18:32:34 +0100 Subject: [PATCH 0543/2924] mm/kasan: add module decription Modules without a description now cause a warning: WARNING: modpost: missing MODULE_DESCRIPTION() in mm/kasan/kasan_test.o [akpm@linux-foundation.org: update description text, per Andrey] Link: https://lkml.kernel.org/r/20250324173242.1501003-9-arnd@kernel.org Fixes: 6c6c1fc09de3 ("modpost: require a MODULE_DESCRIPTION()") Signed-off-by: Arnd Bergmann Reviewed-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitriy Vyukov Cc: Jann Horn Cc: Jeff Johnson Cc: Macro Elver Cc: Masahiro Yamada Cc: Nihar Chaithanya Cc: Peter Zijlstra Cc: Sabyrzhan Tasbolatov Cc: Stehen Rothwell Cc: Vincenzo Frascino Signed-off-by: Andrew Morton --- mm/kasan/kasan_test_c.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/kasan/kasan_test_c.c b/mm/kasan/kasan_test_c.c index 3ea317837c2d37..f24e3bef72a44a 100644 --- a/mm/kasan/kasan_test_c.c +++ b/mm/kasan/kasan_test_c.c @@ -2127,4 +2127,5 @@ static struct kunit_suite kasan_kunit_test_suite = { kunit_test_suite(kasan_kunit_test_suite); +MODULE_DESCRIPTION("KUnit tests for checking KASAN bug-detection capabilities"); MODULE_LICENSE("GPL"); From 61c4e6ca8c9364e08c2c132d1bfae2f85af42c7a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 24 Mar 2025 18:32:35 +0100 Subject: [PATCH 0544/2924] kunit: slub: add module description Modules without a description now cause a warning: WARNING: modpost: missing MODULE_DESCRIPTION() in lib/tests/slub_kunit.o Link: https://lkml.kernel.org/r/20250324173242.1501003-10-arnd@kernel.org Fixes: 6c6c1fc09de3 ("modpost: require a MODULE_DESCRIPTION()") Signed-off-by: Arnd Bergmann Cc: Guenetr Roeck Cc: Jeff Johnson Cc: Masahiro Yamada Cc: Pei Xiao Cc: Rae Moar Cc: Stehen Rothwell Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/tests/slub_kunit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/tests/slub_kunit.c b/lib/tests/slub_kunit.c index d47c472b05201b..848b682a2d70ed 100644 --- a/lib/tests/slub_kunit.c +++ b/lib/tests/slub_kunit.c @@ -325,4 +325,5 @@ static struct kunit_suite test_suite = { }; kunit_test_suite(test_suite); +MODULE_DESCRIPTION("Kunit tests for slub allocator"); MODULE_LICENSE("GPL"); From 90abee6d7895d5eef18c91d870d8168be4e76e9d Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 7 Apr 2025 14:01:53 -0400 Subject: [PATCH 0545/2924] mm: page_alloc: speed up fallbacks in rmqueue_bulk() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The test robot identified c2f6ea38fc1b ("mm: page_alloc: don't steal single pages from biggest buddy") as the root cause of a 56.4% regression in vm-scalability::lru-file-mmap-read. Carlos reports an earlier patch, c0cd6f557b90 ("mm: page_alloc: fix freelist movement during block conversion"), as the root cause for a regression in worst-case zone->lock+irqoff hold times. Both of these patches modify the page allocator's fallback path to be less greedy in an effort to stave off fragmentation. The flip side of this is that fallbacks are also less productive each time around, which means the fallback search can run much more frequently. Carlos' traces point to rmqueue_bulk() specifically, which tries to refill the percpu cache by allocating a large batch of pages in a loop. It highlights how once the native freelists are exhausted, the fallback code first scans orders top-down for whole blocks to claim, then falls back to a bottom-up search for the smallest buddy to steal. For the next batch page, it goes through the same thing again. This can be made more efficient. Since rmqueue_bulk() holds the zone->lock over the entire batch, the freelists are not subject to outside changes; when the search for a block to claim has already failed, there is no point in trying again for the next page. Modify __rmqueue() to remember the last successful fallback mode, and restart directly from there on the next rmqueue_bulk() iteration. Oliver confirms that this improves beyond the regression that the test robot reported against c2f6ea38fc1b: commit: f3b92176f4 ("tools/selftests: add guard region test for /proc/$pid/pagemap") c2f6ea38fc ("mm: page_alloc: don't steal single pages from biggest buddy") acc4d5ff0b ("Merge tag 'net-6.15-rc0' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net") 2c847f27c3 ("mm: page_alloc: speed up fallbacks in rmqueue_bulk()") <--- your patch f3b92176f4f7100f c2f6ea38fc1b640aa7a2e155cc1 acc4d5ff0b61eb1715c498b6536 2c847f27c37da65a93d23c237c5 ---------------- --------------------------- --------------------------- --------------------------- %stddev %change %stddev %change %stddev %change %stddev \ | \ | \ | \ 25525364 ± 3% -56.4% 11135467 -57.8% 10779336 +31.6% 33581409 vm-scalability.throughput Carlos confirms that worst-case times are almost fully recovered compared to before the earlier culprit patch: 2dd482ba627d (before freelist hygiene): 1ms c0cd6f557b90 (after freelist hygiene): 90ms next-20250319 (steal smallest buddy): 280ms this patch : 8ms [jackmanb@google.com: comment updates] Link: https://lkml.kernel.org/r/D92AC0P9594X.3BML64MUKTF8Z@google.com [hannes@cmpxchg.org: reset rmqueue_mode in rmqueue_buddy() error loop, per Yunsheng Lin] Link: https://lkml.kernel.org/r/20250409140023.GA2313@cmpxchg.org Link: https://lkml.kernel.org/r/20250407180154.63348-1-hannes@cmpxchg.org Fixes: c0cd6f557b90 ("mm: page_alloc: fix freelist movement during block conversion") Fixes: c2f6ea38fc1b ("mm: page_alloc: don't steal single pages from biggest buddy") Signed-off-by: Johannes Weiner Signed-off-by: Brendan Jackman Reported-by: kernel test robot Reported-by: Carlos Song Tested-by: Carlos Song Tested-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202503271547.fc08b188-lkp@intel.com Reviewed-by: Brendan Jackman Tested-by: Shivank Garg Acked-by: Zi Yan Reviewed-by: Vlastimil Babka Cc: [6.10+] Signed-off-by: Andrew Morton --- mm/page_alloc.c | 113 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 79 insertions(+), 34 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9a219fe8e130bc..1715e34b91af4d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2183,23 +2183,15 @@ try_to_claim_block(struct zone *zone, struct page *page, } /* - * Try finding a free buddy page on the fallback list. - * - * This will attempt to claim a whole pageblock for the requested type - * to ensure grouping of such requests in the future. - * - * If a whole block cannot be claimed, steal an individual page, regressing to - * __rmqueue_smallest() logic to at least break up as little contiguity as - * possible. + * Try to allocate from some fallback migratetype by claiming the entire block, + * i.e. converting it to the allocation's start migratetype. * * The use of signed ints for order and current_order is a deliberate * deviation from the rest of this file, to make the for loop * condition simpler. - * - * Return the stolen page, or NULL if none can be found. */ static __always_inline struct page * -__rmqueue_fallback(struct zone *zone, int order, int start_migratetype, +__rmqueue_claim(struct zone *zone, int order, int start_migratetype, unsigned int alloc_flags) { struct free_area *area; @@ -2237,14 +2229,29 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype, page = try_to_claim_block(zone, page, current_order, order, start_migratetype, fallback_mt, alloc_flags); - if (page) - goto got_one; + if (page) { + trace_mm_page_alloc_extfrag(page, order, current_order, + start_migratetype, fallback_mt); + return page; + } } - if (alloc_flags & ALLOC_NOFRAGMENT) - return NULL; + return NULL; +} + +/* + * Try to steal a single page from some fallback migratetype. Leave the rest of + * the block as its current migratetype, potentially causing fragmentation. + */ +static __always_inline struct page * +__rmqueue_steal(struct zone *zone, int order, int start_migratetype) +{ + struct free_area *area; + int current_order; + struct page *page; + int fallback_mt; + bool claim_block; - /* No luck claiming pageblock. Find the smallest fallback page */ for (current_order = order; current_order < NR_PAGE_ORDERS; current_order++) { area = &(zone->free_area[current_order]); fallback_mt = find_suitable_fallback(area, current_order, @@ -2254,25 +2261,28 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype, page = get_page_from_free_area(area, fallback_mt); page_del_and_expand(zone, page, order, current_order, fallback_mt); - goto got_one; + trace_mm_page_alloc_extfrag(page, order, current_order, + start_migratetype, fallback_mt); + return page; } return NULL; - -got_one: - trace_mm_page_alloc_extfrag(page, order, current_order, - start_migratetype, fallback_mt); - - return page; } +enum rmqueue_mode { + RMQUEUE_NORMAL, + RMQUEUE_CMA, + RMQUEUE_CLAIM, + RMQUEUE_STEAL, +}; + /* * Do the hard work of removing an element from the buddy allocator. * Call me with the zone->lock already held. */ static __always_inline struct page * __rmqueue(struct zone *zone, unsigned int order, int migratetype, - unsigned int alloc_flags) + unsigned int alloc_flags, enum rmqueue_mode *mode) { struct page *page; @@ -2291,16 +2301,48 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype, } } - page = __rmqueue_smallest(zone, order, migratetype); - if (unlikely(!page)) { - if (alloc_flags & ALLOC_CMA) + /* + * First try the freelists of the requested migratetype, then try + * fallbacks modes with increasing levels of fragmentation risk. + * + * The fallback logic is expensive and rmqueue_bulk() calls in + * a loop with the zone->lock held, meaning the freelists are + * not subject to any outside changes. Remember in *mode where + * we found pay dirt, to save us the search on the next call. + */ + switch (*mode) { + case RMQUEUE_NORMAL: + page = __rmqueue_smallest(zone, order, migratetype); + if (page) + return page; + fallthrough; + case RMQUEUE_CMA: + if (alloc_flags & ALLOC_CMA) { page = __rmqueue_cma_fallback(zone, order); - - if (!page) - page = __rmqueue_fallback(zone, order, migratetype, - alloc_flags); + if (page) { + *mode = RMQUEUE_CMA; + return page; + } + } + fallthrough; + case RMQUEUE_CLAIM: + page = __rmqueue_claim(zone, order, migratetype, alloc_flags); + if (page) { + /* Replenished preferred freelist, back to normal mode. */ + *mode = RMQUEUE_NORMAL; + return page; + } + fallthrough; + case RMQUEUE_STEAL: + if (!(alloc_flags & ALLOC_NOFRAGMENT)) { + page = __rmqueue_steal(zone, order, migratetype); + if (page) { + *mode = RMQUEUE_STEAL; + return page; + } + } } - return page; + return NULL; } /* @@ -2312,6 +2354,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, unsigned long count, struct list_head *list, int migratetype, unsigned int alloc_flags) { + enum rmqueue_mode rmqm = RMQUEUE_NORMAL; unsigned long flags; int i; @@ -2323,7 +2366,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, } for (i = 0; i < count; ++i) { struct page *page = __rmqueue(zone, order, migratetype, - alloc_flags); + alloc_flags, &rmqm); if (unlikely(page == NULL)) break; @@ -2948,7 +2991,9 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone, if (alloc_flags & ALLOC_HIGHATOMIC) page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); if (!page) { - page = __rmqueue(zone, order, migratetype, alloc_flags); + enum rmqueue_mode rmqm = RMQUEUE_NORMAL; + + page = __rmqueue(zone, order, migratetype, alloc_flags, &rmqm); /* * If the allocation fails, allow OOM handling and From 60580e0bd587b1df7aa9f749ed735db8377acaab Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Mon, 7 Apr 2025 16:54:35 +0000 Subject: [PATCH 0546/2924] mm/cma: report base address of single range correctly The cma_declare_contiguous_nid code was refactored by commit c009da4258f9 ("mm, cma: support multiple contiguous ranges, if requested"), so that it could use an internal function to attempt a single range area first, and then try a multi-range one. However, that meant that the actual base address used for the !fixed case (base == 0) wasn't available one level up to be printed in the informational message, and it would always end up printing a base address of 0 in the boot message. Make the internal function take a phys_addr_t pointer to the base address, so that the value is available to the caller. [fvdl@google.com: v2] Link: https://lkml.kernel.org/r/20250408164000.3215690-1-fvdl@google.com Link: https://lkml.kernel.org/r/20250407165435.2567898-1-fvdl@google.com Fixes: c009da4258f9 ("mm, cma: support multiple contiguous ranges, if requested") Signed-off-by: Frank van der Linden Reported-by: Geert Uytterhoeven Closes: https://lore.kernel.org/linux-mm/CAMuHMdVWviQ7O9yBFE3f=ev0eVb1CnsQvR6SKtEROBbM6z7g3w@mail.gmail.com/ Tested-by: Geert Uytterhoeven Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/cma.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/mm/cma.c b/mm/cma.c index b06d5fe73399f2..15632939f20a1e 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -35,7 +35,7 @@ struct cma cma_areas[MAX_CMA_AREAS]; unsigned int cma_area_count; -static int __init __cma_declare_contiguous_nid(phys_addr_t base, +static int __init __cma_declare_contiguous_nid(phys_addr_t *basep, phys_addr_t size, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, bool fixed, const char *name, struct cma **res_cma, @@ -370,7 +370,7 @@ int __init cma_declare_contiguous_multi(phys_addr_t total_size, phys_addr_t align, unsigned int order_per_bit, const char *name, struct cma **res_cma, int nid) { - phys_addr_t start, end; + phys_addr_t start = 0, end; phys_addr_t size, sizesum, sizeleft; struct cma_init_memrange *mrp, *mlp, *failed; struct cma_memrange *cmrp; @@ -384,7 +384,7 @@ int __init cma_declare_contiguous_multi(phys_addr_t total_size, /* * First, try it the normal way, producing just one range. */ - ret = __cma_declare_contiguous_nid(0, total_size, 0, align, + ret = __cma_declare_contiguous_nid(&start, total_size, 0, align, order_per_bit, false, name, res_cma, nid); if (ret != -ENOMEM) goto out; @@ -580,7 +580,7 @@ int __init cma_declare_contiguous_nid(phys_addr_t base, { int ret; - ret = __cma_declare_contiguous_nid(base, size, limit, alignment, + ret = __cma_declare_contiguous_nid(&base, size, limit, alignment, order_per_bit, fixed, name, res_cma, nid); if (ret != 0) pr_err("Failed to reserve %ld MiB\n", @@ -592,14 +592,14 @@ int __init cma_declare_contiguous_nid(phys_addr_t base, return ret; } -static int __init __cma_declare_contiguous_nid(phys_addr_t base, +static int __init __cma_declare_contiguous_nid(phys_addr_t *basep, phys_addr_t size, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, bool fixed, const char *name, struct cma **res_cma, int nid) { phys_addr_t memblock_end = memblock_end_of_DRAM(); - phys_addr_t highmem_start; + phys_addr_t highmem_start, base = *basep; int ret; /* @@ -722,12 +722,15 @@ static int __init __cma_declare_contiguous_nid(phys_addr_t base, } ret = cma_init_reserved_mem(base, size, order_per_bit, name, res_cma); - if (ret) + if (ret) { memblock_phys_free(base, size); + return ret; + } (*res_cma)->nid = nid; + *basep = base; - return ret; + return 0; } static void cma_debug_show_areas(struct cma *cma) From aabf58bfaacedb3f54801ba09c6d50daf83b74f4 Mon Sep 17 00:00:00 2001 From: Jinjiang Tu Date: Mon, 7 Apr 2025 20:47:06 +0800 Subject: [PATCH 0547/2924] mm/hugetlb: fix set_max_huge_pages() when there are surplus pages In set_max_huge_pages(), min_count is computed taking into account surplus huge pages, which might lead in some cases to not be able to free huge pages and end up accounting them as surplus instead. One way to solve it is to subtract surplus_huge_pages directly, but we cannot do it blindly because there might be surplus pages that are also free pages, which might happen when we fail to restore the vmemmap for optimized hvo pages. So we could be subtracting the same page twice. In order to work this around, let us first compute the number of free persistent pages, and use that along with surplus pages to compute min_count. Steps to reproduce: 1) create 5 hugetlb folios in Node0 2) run a program to use all the hugetlb folios 3) echo 0 > nr_hugepages for Node0 to free the hugetlb folios. Thus the 5 hugetlb folios in Node0 are accounted as surplus. 4) create 5 hugetlb folios in Node1 5) echo 0 > nr_hugepages for Node1 to free the hugetlb folios The result: Node0 Node1 Total 5 5 Free 0 5 Surp 5 5 The result with this patch: Node0 Node1 Total 5 0 Free 0 0 Surp 5 0 Link: https://lkml.kernel.org/r/20250409055957.3774471-1-tujinjiang@huawei.com Link: https://lkml.kernel.org/r/20250407124706.2688092-1-tujinjiang@huawei.com Fixes: 9a30523066cd ("hugetlb: add per node hstate attributes") Signed-off-by: Jinjiang Tu Acked-by: Oscar Salvador Cc: David Hildenbrand Cc: Kefeng Wang Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 6670f9b9e07ae9..ea065347866e2c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3825,6 +3825,7 @@ static int adjust_pool_surplus(struct hstate *h, nodemask_t *nodes_allowed, static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid, nodemask_t *nodes_allowed) { + unsigned long persistent_free_count; unsigned long min_count; unsigned long allocated; struct folio *folio; @@ -3959,8 +3960,24 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid, * though, we'll note that we're not allowed to exceed surplus * and won't grow the pool anywhere else. Not until one of the * sysctls are changed, or the surplus pages go out of use. + * + * min_count is the expected number of persistent pages, we + * shouldn't calculate min_count by using + * resv_huge_pages + persistent_huge_pages() - free_huge_pages, + * because there may exist free surplus huge pages, and this will + * lead to subtracting twice. Free surplus huge pages come from HVO + * failing to restore vmemmap, see comments in the callers of + * hugetlb_vmemmap_restore_folio(). Thus, we should calculate + * persistent free count first. */ - min_count = h->resv_huge_pages + h->nr_huge_pages - h->free_huge_pages; + persistent_free_count = h->free_huge_pages; + if (h->free_huge_pages > persistent_huge_pages(h)) { + if (h->free_huge_pages > h->surplus_huge_pages) + persistent_free_count -= h->surplus_huge_pages; + else + persistent_free_count = 0; + } + min_count = h->resv_huge_pages + persistent_huge_pages(h) - persistent_free_count; min_count = max(count, min_count); try_to_free_low(h, min_count, nodes_allowed); From 8c583e538aa681ecb293d5606054de70f44b5558 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 7 Apr 2025 19:31:35 +0800 Subject: [PATCH 0548/2924] selftests: mincore: fix tmpfs mincore test failure When running mincore test cases, I encountered the following failures: " mincore_selftest.c:359:check_tmpfs_mmap:Expected ra_pages (511) == 0 (0) mincore_selftest.c:360:check_tmpfs_mmap:Read-ahead pages found in memory check_tmpfs_mmap: Test terminated by assertion FAIL global.check_tmpfs_mmap not ok 5 global.check_tmpfs_mmap FAILED: 4 / 5 tests passed " The reason for the test case failure is that my system automatically enabled tmpfs large folio allocation by adding the 'transparent_hugepage_tmpfs=always' cmdline. However, the test case still expects the tmpfs mounted on /dev/shm to allocate small folios, which leads to assertion failures when verifying readahead pages. As discussed with David, there's no reason to continue checking the readahead logic for tmpfs. Drop it to fix this issue. Link: https://lkml.kernel.org/r/9a00856cc6a8b4e46f4ab8b1af11ce5fc1a31851.1744025467.git.baolin.wang@linux.alibaba.com Fixes: d635ccdb435c ("mm: shmem: add a kernel command line to change the default huge policy for tmpfs") Signed-off-by: Baolin Wang Acked-by: Zi Yan Acked-by: David Hildenbrand Cc: Barry Song <21cnbao@gmail.com> Cc: Hugh Dickins Cc: Matthew Wilcox (Oracle) Cc: Ryan Roberts Signed-off-by: Andrew Morton --- .../testing/selftests/mincore/mincore_selftest.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c index e949a43a614508..0fd4b00bd345b5 100644 --- a/tools/testing/selftests/mincore/mincore_selftest.c +++ b/tools/testing/selftests/mincore/mincore_selftest.c @@ -286,8 +286,7 @@ TEST(check_file_mmap) /* * Test mincore() behavior on a page backed by a tmpfs file. This test - * performs the same steps as the previous one. However, we don't expect - * any readahead in this case. + * performs the same steps as the previous one. */ TEST(check_tmpfs_mmap) { @@ -298,7 +297,6 @@ TEST(check_tmpfs_mmap) int page_size; int fd; int i; - int ra_pages = 0; page_size = sysconf(_SC_PAGESIZE); vec_size = FILE_SIZE / page_size; @@ -341,8 +339,7 @@ TEST(check_tmpfs_mmap) } /* - * Touch a page in the middle of the mapping. We expect only - * that page to be fetched into memory. + * Touch a page in the middle of the mapping. */ addr[FILE_SIZE / 2] = 1; retval = mincore(addr, FILE_SIZE, vec); @@ -351,15 +348,6 @@ TEST(check_tmpfs_mmap) TH_LOG("Page not found in memory after use"); } - i = FILE_SIZE / 2 / page_size + 1; - while (i < vec_size && vec[i]) { - ra_pages++; - i++; - } - ASSERT_EQ(ra_pages, 0) { - TH_LOG("Read-ahead pages found in memory"); - } - munmap(addr, FILE_SIZE); close(fd); free(vec); From 9e2bd67773579fdd2e58de9628c2d319f3f518e7 Mon Sep 17 00:00:00 2001 From: wangxuewen Date: Mon, 7 Apr 2025 18:30:17 +0800 Subject: [PATCH 0549/2924] mm/hugetlb: add a line break at the end of the format string Missing line break at the end of the format string. Link: https://lkml.kernel.org/r/20250407103017.2979821-1-18810879172@163.com Signed-off-by: wangxuewen Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ea065347866e2c..e3e6ac991b9c6b 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4647,7 +4647,7 @@ static void __init hugetlb_sysfs_init(void) err = hugetlb_sysfs_add_hstate(h, hugepages_kobj, hstate_kobjs, &hstate_attr_group); if (err) - pr_err("HugeTLB: Unable to add hstate %s", h->name); + pr_err("HugeTLB: Unable to add hstate %s\n", h->name); } #ifdef CONFIG_NUMA From 8ab1b16023961dc640023b10436d282f905835ad Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Thu, 3 Apr 2025 16:54:17 -0700 Subject: [PATCH 0550/2924] mm: fix filemap_get_folios_contig returning batches of identical folios filemap_get_folios_contig() is supposed to return distinct folios found within [start, end]. Large folios in the Xarray become multi-index entries. xas_next() can iterate through the sub-indexes before finding a sibling entry and breaking out of the loop. This can result in a returned folio_batch containing an indeterminate number of duplicate folios, which forces the callers to skeptically handle the returned batch. This is inefficient and incurs a large maintenance overhead. We can fix this by calling xas_advance() after we have successfully adding a folio to the batch to ensure our Xarray is positioned such that it will correctly find the next folio - similar to filemap_get_read_batch(). Link: https://lkml.kernel.org/r/Z-8s1-kiIDkzgRbc@fedora Fixes: 35b471467f88 ("filemap: add filemap_get_folios_contig()") Signed-off-by: Vishal Moola (Oracle) Reported-by: Qu Wenruo Closes: https://lkml.kernel.org/r/b714e4de-2583-4035-b829-72cfb5eb6fc6@gmx.com Tested-by: Qu Wenruo Cc: Matthew Wilcox (Oracle) Cc: Vivek Kasireddy Cc: Signed-off-by: Andrew Morton --- mm/filemap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/filemap.c b/mm/filemap.c index b5e784f34d980b..7b90cbeb4a1adf 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2244,6 +2244,7 @@ unsigned filemap_get_folios_contig(struct address_space *mapping, *start = folio->index + nr; goto out; } + xas_advance(&xas, folio_next_index(folio) - 1); continue; put_folio: folio_put(folio); From 8c56c5dbcf52220cc9be7a36e7f21ebd5939e0b9 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 8 Apr 2025 10:59:50 +0200 Subject: [PATCH 0551/2924] mm: (un)track_pfn_copy() fix + doc improvements We got a late smatch warning and some additional review feedback. smatch warnings: mm/memory.c:1428 copy_page_range() error: uninitialized symbol 'pfn'. We actually use the pfn only when it is properly initialized; however, we may pass an uninitialized value to a function -- although it will not use it that likely still is UB in C. So let's just fix it by always initializing pfn in the caller of track_pfn_copy(), and improving the documentation of track_pfn_copy(). While at it, clarify the doc of untrack_pfn_copy(), that internal checks make sure if we actually have to untrack anything. Link: https://lkml.kernel.org/r/20250408085950.976103-1-david@redhat.com Fixes: dc84bc2aba85 ("x86/mm/pat: Fix VM_PAT handling when fork() fails in copy_page_range()") Signed-off-by: David Hildenbrand Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202503270941.IFILyNCX-lkp@intel.com/ Reviewed-by: Lorenzo Stoakes Acked-by: Ingo Molnar Cc: Andrew Morton Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Borislav Petkov Cc: Rik van Riel Cc: "H. Peter Anvin" Cc: Linus Torvalds Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 9 ++++++--- mm/memory.c | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index e2b705c149454a..b50447ef1c921c 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1511,8 +1511,9 @@ static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, /* * track_pfn_copy is called when a VM_PFNMAP VMA is about to get the page - * tables copied during copy_page_range(). On success, stores the pfn to be - * passed to untrack_pfn_copy(). + * tables copied during copy_page_range(). Will store the pfn to be + * passed to untrack_pfn_copy() only if there is something to be untracked. + * Callers should initialize the pfn to 0. */ static inline int track_pfn_copy(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, unsigned long *pfn) @@ -1522,7 +1523,9 @@ static inline int track_pfn_copy(struct vm_area_struct *dst_vma, /* * untrack_pfn_copy is called when a VM_PFNMAP VMA failed to copy during - * copy_page_range(), but after track_pfn_copy() was already called. + * copy_page_range(), but after track_pfn_copy() was already called. Can + * be called even if track_pfn_copy() did not actually track anything: + * handled internally. */ static inline void untrack_pfn_copy(struct vm_area_struct *dst_vma, unsigned long pfn) diff --git a/mm/memory.c b/mm/memory.c index 2d8c265fc7d607..1a35165622e1c1 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1361,7 +1361,7 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) struct mm_struct *dst_mm = dst_vma->vm_mm; struct mm_struct *src_mm = src_vma->vm_mm; struct mmu_notifier_range range; - unsigned long next, pfn; + unsigned long next, pfn = 0; bool is_cow; int ret; From 0aa8dbe5a8dcaf0cc083f4a519a2906e1eb4609e Mon Sep 17 00:00:00 2001 From: Jean-Michel Hautbois Date: Fri, 28 Mar 2025 12:14:24 +0100 Subject: [PATCH 0552/2924] mailmap: add entry for Jean-Michel Hautbois As recent contributions where made with the @ideasonboard.com email, any reply would fail. Add the proper address to map this old one. Link: https://lkml.kernel.org/r/20250328-mailmap-v2-v2-1-bdc69d2193ca@yoseli.org Signed-off-by: Jean-Michel Hautbois Acked-by: Laurent Pinchart Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 6a874f9950bf33..6efaee6537e49a 100644 --- a/.mailmap +++ b/.mailmap @@ -322,6 +322,7 @@ Jayachandran C Jayachandran C Jayachandran C +Jean-Michel Hautbois Jean Tourrilhes Jeevan Shriram Jeff Garzik From e6e07b696da529e85d1ba880555b5df5c80a46bd Mon Sep 17 00:00:00 2001 From: "T.J. Mercier" Date: Wed, 9 Apr 2025 22:51:11 +0000 Subject: [PATCH 0553/2924] alloc_tag: handle incomplete bulk allocations in vm_module_tags_populate alloc_pages_bulk_node() may partially succeed and allocate fewer than the requested nr_pages. There are several conditions under which this can occur, but we have encountered the case where CONFIG_PAGE_OWNER is enabled causing all bulk allocations to always fallback to single page allocations due to commit 187ad460b841 ("mm/page_alloc: avoid page allocator recursion with pagesets.lock held"). Currently vm_module_tags_populate() immediately fails when alloc_pages_bulk_node() returns fewer than the requested number of pages. When this happens memory allocation profiling gets disabled, for example [ 14.297583] [9: modprobe: 465] Failed to allocate memory for allocation tags in the module scsc_wlan. Memory allocation profiling is disabled! [ 14.299339] [9: modprobe: 465] modprobe: Failed to insmod '/vendor/lib/modules/scsc_wlan.ko' with args '': Out of memory This patch causes vm_module_tags_populate() to retry bulk allocations for the remaining memory instead of failing immediately which will avoid the disablement of memory allocation profiling. Link: https://lkml.kernel.org/r/20250409225111.3770347-1-tjmercier@google.com Fixes: 0f9b685626da ("alloc_tag: populate memory for module tags as needed") Signed-off-by: T.J. Mercier Reported-by: Janghyuck Kim Acked-by: Suren Baghdasaryan Cc: Kent Overstreet Cc: Signed-off-by: Andrew Morton --- lib/alloc_tag.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index 1d893e31361493..25ecc1334b67dd 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -422,11 +422,20 @@ static int vm_module_tags_populate(void) unsigned long old_shadow_end = ALIGN(phys_end, MODULE_ALIGN); unsigned long new_shadow_end = ALIGN(new_end, MODULE_ALIGN); unsigned long more_pages; - unsigned long nr; + unsigned long nr = 0; more_pages = ALIGN(new_end - phys_end, PAGE_SIZE) >> PAGE_SHIFT; - nr = alloc_pages_bulk_node(GFP_KERNEL | __GFP_NOWARN, - NUMA_NO_NODE, more_pages, next_page); + while (nr < more_pages) { + unsigned long allocated; + + allocated = alloc_pages_bulk_node(GFP_KERNEL | __GFP_NOWARN, + NUMA_NO_NODE, more_pages - nr, next_page + nr); + + if (!allocated) + break; + nr += allocated; + } + if (nr < more_pages || vmap_pages_range(phys_end, phys_end + (nr << PAGE_SHIFT), PAGE_KERNEL, next_page, PAGE_SHIFT) < 0) { From 92868577d05ff75f9f38c6345ed275203827faba Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 9 Apr 2025 15:20:06 +0530 Subject: [PATCH 0554/2924] selftests/mm: fix compiler -Wmaybe-uninitialized warning Following build warning comes up for cow test as 'transferred' variable has not been initialized. Fix the warning via zero init for the variable. CC cow cow.c: In function `do_test_vmsplice_in_parent': cow.c:365:61: warning: `transferred' may be used uninitialized [-Wmaybe-uninitialized] 365 | cur = read(fds[0], new + total, transferred - total); | ~~~~~~~~~~~~^~~~~~~ cow.c:296:29: note: `transferred' was declared here 296 | ssize_t cur, total, transferred; | ^~~~~~~~~~~ CC compaction_test CC gup_longterm Link: https://lkml.kernel.org/r/20250409095006.1422620-1-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Cc: Shuah Khan Cc: Anshuman Khandual Cc: David Hildenbrand Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/cow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index f0cb14ea860842..b6cfe0a4b7dfd4 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -293,7 +293,7 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, .iov_base = mem, .iov_len = size, }; - ssize_t cur, total, transferred; + ssize_t cur, total, transferred = 0; struct comm_pipes comm_pipes; char *old, *new; int ret, fds[2]; From a995199384347261bb3f21b2e171fa7f988bd2f8 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 9 Apr 2025 12:40:43 +0300 Subject: [PATCH 0555/2924] mm: fix apply_to_existing_page_range() In the case of apply_to_existing_page_range(), apply_to_pte_range() is reached with 'create' set to false. When !create, the loop over the PTE page table is broken. apply_to_pte_range() will only move to the next PTE entry if 'create' is true or if the current entry is not pte_none(). This means that the user of apply_to_existing_page_range() will not have 'fn' called for any entries after the first pte_none() in the PTE page table. Fix the loop logic in apply_to_pte_range(). There are no known runtime issues from this, but the fix is trivial enough for stable@ even without a known buggy user. Link: https://lkml.kernel.org/r/20250409094043.1629234-1-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Fixes: be1db4753ee6 ("mm/memory.c: add apply_to_existing_page_range() helper") Cc: Daniel Axtens Cc: David Hildenbrand Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- mm/memory.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 1a35165622e1c1..44481fe7c629ec 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2938,11 +2938,11 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, if (fn) { do { if (create || !pte_none(ptep_get(pte))) { - err = fn(pte++, addr, data); + err = fn(pte, addr, data); if (err) break; } - } while (addr += PAGE_SIZE, addr != end); + } while (pte++, addr += PAGE_SIZE, addr != end); } *mask |= PGTBL_PTE_MODIFIED; From 9ae0c92fec69374c6db8dddb0df00d86b9afa5da Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 11 Apr 2025 09:26:47 +0800 Subject: [PATCH 0556/2924] crypto: scomp - Fix wild memory accesses in scomp_free_streams In order to use scomp_free_streams to free the partially allocted streams in the allocation error path, move the alg->stream assignment to the beginning. Also check for error pointers in scomp_free_streams before freeing the ctx. Finally set alg->stream to NULL to not break subsequent attempts to allocate the streams. Fixes: 3d72ad46a23a ("crypto: acomp - Move stream management into scomp layer") Reported-by: syzkaller Co-developed-by: Kuniyuki Iwashima Signed-off-by: Kuniyuki Iwashima Co-developed-by: Herbert Xu Signed-off-by: Herbert Xu --- crypto/scompress.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/crypto/scompress.c b/crypto/scompress.c index f67ce38d203d84..5762fcc63b5158 100644 --- a/crypto/scompress.c +++ b/crypto/scompress.c @@ -111,13 +111,14 @@ static void scomp_free_streams(struct scomp_alg *alg) struct crypto_acomp_stream __percpu *stream = alg->stream; int i; + alg->stream = NULL; if (!stream) return; for_each_possible_cpu(i) { struct crypto_acomp_stream *ps = per_cpu_ptr(stream, i); - if (!ps->ctx) + if (IS_ERR_OR_NULL(ps->ctx)) break; alg->free_ctx(ps->ctx); @@ -135,6 +136,8 @@ static int scomp_alloc_streams(struct scomp_alg *alg) if (!stream) return -ENOMEM; + alg->stream = stream; + for_each_possible_cpu(i) { struct crypto_acomp_stream *ps = per_cpu_ptr(stream, i); @@ -146,8 +149,6 @@ static int scomp_alloc_streams(struct scomp_alg *alg) spin_lock_init(&ps->lock); } - - alg->stream = stream; return 0; } From b2e689baf220408aff8ee5dfb4edb0817e1632bb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 11 Apr 2025 15:14:18 +0800 Subject: [PATCH 0557/2924] crypto: ahash - Disable request chaining Disable hash request chaining in case a driver that copies an ahash_request object by hand accidentally triggers chaining. Reported-by: Manorit Chawdhry Fixes: f2ffe5a9183d ("crypto: hash - Add request chaining API") Signed-off-by: Herbert Xu Tested-by: Manorit Chawdhry Signed-off-by: Herbert Xu --- crypto/ahash.c | 76 +--------------------------------- include/crypto/hash.h | 6 ++- include/crypto/internal/hash.h | 2 +- 3 files changed, 7 insertions(+), 77 deletions(-) diff --git a/crypto/ahash.c b/crypto/ahash.c index 9f57b925b116da..2d9eec2b2b1c6e 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -315,16 +315,7 @@ EXPORT_SYMBOL_GPL(crypto_ahash_setkey); static bool ahash_request_hasvirt(struct ahash_request *req) { - struct ahash_request *r2; - - if (ahash_request_isvirt(req)) - return true; - - list_for_each_entry(r2, &req->base.list, base.list) - if (ahash_request_isvirt(r2)) - return true; - - return false; + return ahash_request_isvirt(req); } static int ahash_reqchain_virt(struct ahash_save_req_state *state, @@ -472,7 +463,6 @@ static int ahash_do_req_chain(struct ahash_request *req, bool update = op == crypto_ahash_alg(tfm)->update; struct ahash_save_req_state *state; struct ahash_save_req_state state0; - struct ahash_request *r2; u8 *page = NULL; int err; @@ -509,7 +499,6 @@ static int ahash_do_req_chain(struct ahash_request *req, state->offset = 0; state->nbytes = 0; INIT_LIST_HEAD(&state->head); - list_splice_init(&req->base.list, &state->head); if (page) sg_init_one(&state->sg, page, PAGE_SIZE); @@ -540,9 +529,6 @@ static int ahash_do_req_chain(struct ahash_request *req, out_set_chain: req->base.err = err; - list_for_each_entry(r2, &req->base.list, base.list) - r2->base.err = err; - return err; } @@ -551,19 +537,10 @@ int crypto_ahash_init(struct ahash_request *req) struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); if (likely(tfm->using_shash)) { - struct ahash_request *r2; int err; err = crypto_shash_init(prepare_shash_desc(req, tfm)); req->base.err = err; - - list_for_each_entry(r2, &req->base.list, base.list) { - struct shash_desc *desc; - - desc = prepare_shash_desc(r2, tfm); - r2->base.err = crypto_shash_init(desc); - } - return err; } @@ -620,19 +597,10 @@ int crypto_ahash_update(struct ahash_request *req) struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); if (likely(tfm->using_shash)) { - struct ahash_request *r2; int err; err = shash_ahash_update(req, ahash_request_ctx(req)); req->base.err = err; - - list_for_each_entry(r2, &req->base.list, base.list) { - struct shash_desc *desc; - - desc = ahash_request_ctx(r2); - r2->base.err = shash_ahash_update(r2, desc); - } - return err; } @@ -645,19 +613,10 @@ int crypto_ahash_final(struct ahash_request *req) struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); if (likely(tfm->using_shash)) { - struct ahash_request *r2; int err; err = crypto_shash_final(ahash_request_ctx(req), req->result); req->base.err = err; - - list_for_each_entry(r2, &req->base.list, base.list) { - struct shash_desc *desc; - - desc = ahash_request_ctx(r2); - r2->base.err = crypto_shash_final(desc, r2->result); - } - return err; } @@ -670,19 +629,10 @@ int crypto_ahash_finup(struct ahash_request *req) struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); if (likely(tfm->using_shash)) { - struct ahash_request *r2; int err; err = shash_ahash_finup(req, ahash_request_ctx(req)); req->base.err = err; - - list_for_each_entry(r2, &req->base.list, base.list) { - struct shash_desc *desc; - - desc = ahash_request_ctx(r2); - r2->base.err = shash_ahash_finup(r2, desc); - } - return err; } @@ -757,19 +707,10 @@ int crypto_ahash_digest(struct ahash_request *req) struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); if (likely(tfm->using_shash)) { - struct ahash_request *r2; int err; err = shash_ahash_digest(req, prepare_shash_desc(req, tfm)); req->base.err = err; - - list_for_each_entry(r2, &req->base.list, base.list) { - struct shash_desc *desc; - - desc = prepare_shash_desc(r2, tfm); - r2->base.err = shash_ahash_digest(r2, desc); - } - return err; } @@ -1133,20 +1074,5 @@ int ahash_register_instance(struct crypto_template *tmpl, } EXPORT_SYMBOL_GPL(ahash_register_instance); -void ahash_request_free(struct ahash_request *req) -{ - struct ahash_request *tmp; - struct ahash_request *r2; - - if (unlikely(!req)) - return; - - list_for_each_entry_safe(r2, tmp, &req->base.list, base.list) - kfree_sensitive(r2); - - kfree_sensitive(req); -} -EXPORT_SYMBOL_GPL(ahash_request_free); - MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Asynchronous cryptographic hash type"); diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 2aa83ee0ec989b..a67988316d06b8 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -10,6 +10,7 @@ #include #include +#include #include /* Set this bit for virtual address instead of SG list. */ @@ -581,7 +582,10 @@ static inline struct ahash_request *ahash_request_alloc_noprof( * ahash_request_free() - zeroize and free the request data structure * @req: request data structure cipher handle to be freed */ -void ahash_request_free(struct ahash_request *req); +static inline void ahash_request_free(struct ahash_request *req) +{ + kfree_sensitive(req); +} static inline struct ahash_request *ahash_request_cast( struct crypto_async_request *req) diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 485e22cf517efc..052ac7924af3ed 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -249,7 +249,7 @@ static inline struct crypto_shash *__crypto_shash_cast(struct crypto_tfm *tfm) static inline bool ahash_request_chained(struct ahash_request *req) { - return crypto_request_chained(&req->base); + return false; } static inline bool ahash_request_isvirt(struct ahash_request *req) From 8b82f656826c741d032490b089a5638c33f2c91d Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Wed, 9 Apr 2025 11:14:48 +0530 Subject: [PATCH 0558/2924] pds_core: fix memory leak in pdsc_debugfs_add_qcq() The memory allocated for intr_ctrl_regset, which is passed to debugfs_create_regset32() may not be cleaned up when the driver is removed. Fix that by using device managed allocation for it. Fixes: 45d76f492938 ("pds_core: set up device and adminq") Signed-off-by: Abdun Nihaal Reviewed-by: Michal Swiatkowski Reviewed-by: Shannon Nelson Link: https://patch.msgid.link/20250409054450.48606-1-abdun.nihaal@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/debugfs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/debugfs.c b/drivers/net/ethernet/amd/pds_core/debugfs.c index ac37a4e738ae7d..04c5e3abd8d706 100644 --- a/drivers/net/ethernet/amd/pds_core/debugfs.c +++ b/drivers/net/ethernet/amd/pds_core/debugfs.c @@ -154,8 +154,9 @@ void pdsc_debugfs_add_qcq(struct pdsc *pdsc, struct pdsc_qcq *qcq) debugfs_create_u32("index", 0400, intr_dentry, &intr->index); debugfs_create_u32("vector", 0400, intr_dentry, &intr->vector); - intr_ctrl_regset = kzalloc(sizeof(*intr_ctrl_regset), - GFP_KERNEL); + intr_ctrl_regset = devm_kzalloc(pdsc->dev, + sizeof(*intr_ctrl_regset), + GFP_KERNEL); if (!intr_ctrl_regset) return; intr_ctrl_regset->regs = intr_ctrl_regs; From 4c324085062919d4e21c69e5e78456dcec0052fe Mon Sep 17 00:00:00 2001 From: Chenyuan Yang Date: Wed, 9 Apr 2025 19:13:20 -0500 Subject: [PATCH 0559/2924] scsi: ufs: mcq: Add NULL check in ufshcd_mcq_abort() A race can occur between the MCQ completion path and the abort handler: once a request completes, __blk_mq_free_request() sets rq->mq_hctx to NULL, meaning the subsequent ufshcd_mcq_req_to_hwq() call in ufshcd_mcq_abort() can return a NULL pointer. If this NULL pointer is dereferenced, the kernel will crash. Add a NULL check for the returned hwq pointer. If hwq is NULL, log an error and return FAILED, preventing a potential NULL-pointer dereference. As suggested by Bart, the ufshcd_cmd_inflight() check is removed. This is similar to the fix in commit 74736103fb41 ("scsi: ufs: core: Fix ufshcd_abort_one racing issue"). This is found by our static analysis tool KNighter. Signed-off-by: Chenyuan Yang Link: https://lore.kernel.org/r/20250410001320.2219341-1-chenyuan0y@gmail.com Fixes: f1304d442077 ("scsi: ufs: mcq: Added ufshcd_mcq_abort()") Reviewed-by: Bart Van Assche Reviewed-by: Peter Wang Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufs-mcq.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c index 240ce135bbfbc3..f1294c29f48491 100644 --- a/drivers/ufs/core/ufs-mcq.c +++ b/drivers/ufs/core/ufs-mcq.c @@ -677,13 +677,6 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd) unsigned long flags; int err; - if (!ufshcd_cmd_inflight(lrbp->cmd)) { - dev_err(hba->dev, - "%s: skip abort. cmd at tag %d already completed.\n", - __func__, tag); - return FAILED; - } - /* Skip task abort in case previous aborts failed and report failure */ if (lrbp->req_abort_skip) { dev_err(hba->dev, "%s: skip abort. tag %d failed earlier\n", @@ -692,6 +685,11 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd) } hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd)); + if (!hwq) { + dev_err(hba->dev, "%s: skip abort. cmd at tag %d already completed.\n", + __func__, tag); + return FAILED; + } if (ufshcd_mcq_sqe_search(hba, hwq, tag)) { /* From f3fdd4fba16c74697d8bc730b82fb7c1eff7fab3 Mon Sep 17 00:00:00 2001 From: Damodharam Ammepalli Date: Wed, 9 Apr 2025 10:33:12 -0700 Subject: [PATCH 0560/2924] ethtool: cmis_cdb: use correct rpl size in ethtool_cmis_module_poll() rpl is passed as a pointer to ethtool_cmis_module_poll(), so the correct size of rpl is sizeof(*rpl) which should be just 1 byte. Using the pointer size instead can cause stack corruption: Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: ethtool_cmis_wait_for_cond+0xf4/0x100 CPU: 72 UID: 0 PID: 4440 Comm: kworker/72:2 Kdump: loaded Tainted: G OE 6.11.0 #24 Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE Hardware name: Dell Inc. PowerEdge R760/04GWWM, BIOS 1.6.6 09/20/2023 Workqueue: events module_flash_fw_work Call Trace: panic+0x339/0x360 ? ethtool_cmis_wait_for_cond+0xf4/0x100 ? __pfx_status_success+0x10/0x10 ? __pfx_status_fail+0x10/0x10 __stack_chk_fail+0x10/0x10 ethtool_cmis_wait_for_cond+0xf4/0x100 ethtool_cmis_cdb_execute_cmd+0x1fc/0x330 ? __pfx_status_fail+0x10/0x10 cmis_cdb_module_features_get+0x6d/0xd0 ethtool_cmis_cdb_init+0x8a/0xd0 ethtool_cmis_fw_update+0x46/0x1d0 module_flash_fw_work+0x17/0xa0 process_one_work+0x179/0x390 worker_thread+0x239/0x340 ? __pfx_worker_thread+0x10/0x10 kthread+0xcc/0x100 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2d/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 Fixes: a39c84d79625 ("ethtool: cmis_cdb: Add a layer for supporting CDB commands") Reviewed-by: Andy Gospodarek Reviewed-by: Simon Horman Reviewed-by: Ido Schimmel Signed-off-by: Damodharam Ammepalli Signed-off-by: Michael Chan Link: https://patch.msgid.link/20250409173312.733012-1-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- net/ethtool/cmis_cdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ethtool/cmis_cdb.c b/net/ethtool/cmis_cdb.c index 0e2691ccb0df38..3057576bc81e3d 100644 --- a/net/ethtool/cmis_cdb.c +++ b/net/ethtool/cmis_cdb.c @@ -351,7 +351,7 @@ ethtool_cmis_module_poll(struct net_device *dev, struct netlink_ext_ack extack = {}; int err; - ethtool_cmis_page_init(&page_data, 0, offset, sizeof(rpl)); + ethtool_cmis_page_init(&page_data, 0, offset, sizeof(*rpl)); page_data.data = (u8 *)rpl; err = ops->get_module_eeprom_by_page(dev, &page_data, &extack); From 52024cd6ec71a6ca934d0cc12452bd8d49850679 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Thu, 10 Apr 2025 11:53:19 +0800 Subject: [PATCH 0561/2924] net: mctp: Set SOCK_RCU_FREE Bind lookup runs under RCU, so ensure that a socket doesn't go away in the middle of a lookup. Fixes: 833ef3b91de6 ("mctp: Populate socket implementation") Signed-off-by: Matt Johnston Link: https://patch.msgid.link/20250410-mctp-rcu-sock-v1-1-872de9fdc877@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- net/mctp/af_mctp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index dd895617defd37..9b12ca97f41282 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -630,6 +630,9 @@ static int mctp_sk_hash(struct sock *sk) { struct net *net = sock_net(sk); + /* Bind lookup runs under RCU, remain live during that. */ + sock_set_flag(sk, SOCK_RCU_FREE); + mutex_lock(&net->mctp.bind_lock); sk_add_node_rcu(sk, &net->mctp.binds); mutex_unlock(&net->mctp.bind_lock); From cdd445258db9919e9dde497a6d5c3477ea7faf4d Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Fri, 11 Apr 2025 16:44:18 +0530 Subject: [PATCH 0562/2924] scsi: mpi3mr: Fix pending I/O counter Commit 199510e33dea ("scsi: mpi3mr: Update consumer index of reply queues after every 100 replies") introduced a regression with the per-reply queue pending I/O counter which was erroneously decremented, leading to the counter going negative. Drop the incorrect atomic decrement for the pending I/O counter. Fixes: 199510e33dea ("scsi: mpi3mr: Update consumer index of reply queues after every 100 replies") Cc: stable@vger.kernel.org Co-developed-by: Sathya Prakash Signed-off-by: Sathya Prakash Signed-off-by: Ranjan Kumar Link: https://lore.kernel.org/r/20250411111419.135485-2-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_fw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index 3fcb1ad3b070d5..d6e402aacb2af1 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -565,7 +565,7 @@ int mpi3mr_process_op_reply_q(struct mpi3mr_ioc *mrioc, WRITE_ONCE(op_req_q->ci, le16_to_cpu(reply_desc->request_queue_ci)); mpi3mr_process_op_reply_desc(mrioc, reply_desc, &reply_dma, reply_qidx); - atomic_dec(&op_reply_q->pend_ios); + if (reply_dma) mpi3mr_repost_reply_buf(mrioc, reply_dma); num_op_reply++; From 3b5091fee49ffcee512901318ca2425bb1e31a5c Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Fri, 11 Apr 2025 16:44:19 +0530 Subject: [PATCH 0563/2924] scsi: mpi3mr: Reset the pending interrupt flag If an admin interrupt is missed, admin_pend_isr may stay set and trigger admin reply processing even when no admin I/Os are pending. Clearing/Resetting it in the admin completion path prevents this. Fixes: ca41929b2ed5 ("scsi: mpi3mr: Check admin reply queue from Watchdog") Cc: stable@vger.kernel.org Co-developed-by: Sathya Prakash Signed-off-by: Sathya Prakash Signed-off-by: Ranjan Kumar Link: https://lore.kernel.org/r/20250411111419.135485-3-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_fw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index d6e402aacb2af1..003e1f7005c45a 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -451,6 +451,7 @@ int mpi3mr_process_admin_reply_q(struct mpi3mr_ioc *mrioc) return 0; } + atomic_set(&mrioc->admin_pend_isr, 0); reply_desc = (struct mpi3_default_reply_descriptor *)mrioc->admin_reply_base + admin_reply_ci; @@ -2925,6 +2926,7 @@ static int mpi3mr_setup_admin_qpair(struct mpi3mr_ioc *mrioc) mrioc->admin_reply_ci = 0; mrioc->admin_reply_ephase = 1; atomic_set(&mrioc->admin_reply_q_in_use, 0); + atomic_set(&mrioc->admin_pend_isr, 0); if (!mrioc->admin_req_base) { mrioc->admin_req_base = dma_alloc_coherent(&mrioc->pdev->dev, @@ -4653,6 +4655,7 @@ void mpi3mr_memset_buffers(struct mpi3mr_ioc *mrioc) if (mrioc->admin_reply_base) memset(mrioc->admin_reply_base, 0, mrioc->admin_reply_q_sz); atomic_set(&mrioc->admin_reply_q_in_use, 0); + atomic_set(&mrioc->admin_pend_isr, 0); if (mrioc->init_cmds.reply) { memset(mrioc->init_cmds.reply, 0, sizeof(*mrioc->init_cmds.reply)); From f7a11cba0ed79d9d37941dddf69a8a655c8644bc Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 10 Apr 2025 09:11:17 -0700 Subject: [PATCH 0564/2924] bonding: hold ops lock around get_link syzbot reports a case of ethtool_ops->get_link being called without ops lock: ethtool_op_get_link+0x15/0x60 net/ethtool/ioctl.c:63 bond_check_dev_link+0x1fb/0x4b0 drivers/net/bonding/bond_main.c:864 bond_miimon_inspect drivers/net/bonding/bond_main.c:2734 [inline] bond_mii_monitor+0x49d/0x3170 drivers/net/bonding/bond_main.c:2956 process_one_work kernel/workqueue.c:3238 [inline] process_scheduled_works+0xac3/0x18e0 kernel/workqueue.c:3319 worker_thread+0x870/0xd50 kernel/workqueue.c:3400 kthread+0x7b7/0x940 kernel/kthread.c:464 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:153 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245 Commit 04efcee6ef8d ("net: hold instance lock during NETDEV_CHANGE") changed to lockless __linkwatch_sync_dev in ethtool_op_get_link. All paths except bonding are coming via locked ioctl. Add necessary locking to bonding. Reviewed-by: Hangbin Liu Reported-by: syzbot+48c14f61594bdfadb086@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=48c14f61594bdfadb086 Fixes: 04efcee6ef8d ("net: hold instance lock during NETDEV_CHANGE") Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250410161117.3519250-1-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 950d8e4d86f8b4..8ea183da8d5398 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -850,8 +850,9 @@ static int bond_check_dev_link(struct bonding *bond, struct net_device *slave_dev, int reporting) { const struct net_device_ops *slave_ops = slave_dev->netdev_ops; - struct ifreq ifr; struct mii_ioctl_data *mii; + struct ifreq ifr; + int ret; if (!reporting && !netif_running(slave_dev)) return 0; @@ -860,9 +861,13 @@ static int bond_check_dev_link(struct bonding *bond, return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0; /* Try to get link status using Ethtool first. */ - if (slave_dev->ethtool_ops->get_link) - return slave_dev->ethtool_ops->get_link(slave_dev) ? - BMSR_LSTATUS : 0; + if (slave_dev->ethtool_ops->get_link) { + netdev_lock_ops(slave_dev); + ret = slave_dev->ethtool_ops->get_link(slave_dev); + netdev_unlock_ops(slave_dev); + + return ret ? BMSR_LSTATUS : 0; + } /* Ethtool can't be used, fallback to MII ioctls. */ if (slave_ops->ndo_eth_ioctl) { From 7f533cc5ee4c4436cee51dc58e81dfd9c3384418 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Tue, 24 Dec 2024 13:17:57 +0300 Subject: [PATCH 0565/2924] scsi: target: iscsi: Fix timeout on deleted connection NOPIN response timer may expire on a deleted connection and crash with such logs: Did not receive response to NOPIN on CID: 0, failing connection for I_T Nexus (null),i,0x00023d000125,iqn.2017-01.com.iscsi.target,t,0x3d BUG: Kernel NULL pointer dereference on read at 0x00000000 NIP strlcpy+0x8/0xb0 LR iscsit_fill_cxn_timeout_err_stats+0x5c/0xc0 [iscsi_target_mod] Call Trace: iscsit_handle_nopin_response_timeout+0xfc/0x120 [iscsi_target_mod] call_timer_fn+0x58/0x1f0 run_timer_softirq+0x740/0x860 __do_softirq+0x16c/0x420 irq_exit+0x188/0x1c0 timer_interrupt+0x184/0x410 That is because nopin response timer may be re-started on nopin timer expiration. Stop nopin timer before stopping the nopin response timer to be sure that no one of them will be re-started. Signed-off-by: Dmitry Bogdanov Link: https://lore.kernel.org/r/20241224101757.32300-1-d.bogdanov@yadro.com Reviewed-by: Maurizio Lombardi Signed-off-by: Martin K. Petersen --- drivers/target/iscsi/iscsi_target.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 1244ef3aa86c1d..620ba6e0ab0756 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -4263,8 +4263,8 @@ int iscsit_close_connection( spin_unlock(&iscsit_global->ts_bitmap_lock); iscsit_stop_timers_for_cmds(conn); - iscsit_stop_nopin_response_timer(conn); iscsit_stop_nopin_timer(conn); + iscsit_stop_nopin_response_timer(conn); if (conn->conn_transport->iscsit_wait_conn) conn->conn_transport->iscsit_wait_conn(conn); From f8cba9a700cf38b181df7c1d809cd73c6e1b2df9 Mon Sep 17 00:00:00 2001 From: Manish Pandey Date: Fri, 11 Apr 2025 17:46:29 +0530 Subject: [PATCH 0566/2924] scsi: ufs: qcom: Add quirks for Samsung UFS devices Introduce quirks for Samsung UFS devices to adjust PA TX HSG1 sync length and TX_HS_EQUALIZER settings on the Qualcomm UFS Host controller. This ensures proper functionality of Samsung UFS devices with the Qualcomm UFS Host controller. Signed-off-by: Manish Pandey Link: https://lore.kernel.org/r/20250411121630.21330-2-quic_mapa@quicinc.com Reviewed-by: Bean Huo Reviewed-by: Manivannan Sadhasivam Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-qcom.c | 43 +++++++++++++++++++++++++++++++++++++ drivers/ufs/host/ufs-qcom.h | 18 ++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 1b37449fbffc50..c0761ccc1381e3 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -33,6 +33,10 @@ ((((c) >> 16) & MCQ_QCFGPTR_MASK) * MCQ_QCFGPTR_UNIT) #define MCQ_QCFG_SIZE 0x40 +/* De-emphasis for gear-5 */ +#define DEEMPHASIS_3_5_dB 0x04 +#define NO_DEEMPHASIS 0x0 + enum { TSTBUS_UAWM, TSTBUS_UARM, @@ -795,6 +799,23 @@ static int ufs_qcom_icc_update_bw(struct ufs_qcom_host *host) return ufs_qcom_icc_set_bw(host, bw_table.mem_bw, bw_table.cfg_bw); } +static void ufs_qcom_set_tx_hs_equalizer(struct ufs_hba *hba, u32 gear, u32 tx_lanes) +{ + u32 equalizer_val; + int ret, i; + + /* Determine the equalizer value based on the gear */ + equalizer_val = (gear == 5) ? DEEMPHASIS_3_5_dB : NO_DEEMPHASIS; + + for (i = 0; i < tx_lanes; i++) { + ret = ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(TX_HS_EQUALIZER, i), + equalizer_val); + if (ret) + dev_err(hba->dev, "%s: failed equalizer lane %d\n", + __func__, i); + } +} + static int ufs_qcom_pwr_change_notify(struct ufs_hba *hba, enum ufs_notify_change_status status, const struct ufs_pa_layer_attr *dev_max_params, @@ -846,6 +867,11 @@ static int ufs_qcom_pwr_change_notify(struct ufs_hba *hba, dev_req_params->gear_tx, PA_INITIAL_ADAPT); } + + if (hba->dev_quirks & UFS_DEVICE_QUIRK_PA_TX_DEEMPHASIS_TUNING) + ufs_qcom_set_tx_hs_equalizer(hba, + dev_req_params->gear_tx, dev_req_params->lane_tx); + break; case POST_CHANGE: if (ufs_qcom_cfg_timers(hba, false)) { @@ -893,6 +919,16 @@ static int ufs_qcom_quirk_host_pa_saveconfigtime(struct ufs_hba *hba) (pa_vs_config_reg1 | (1 << 12))); } +static void ufs_qcom_override_pa_tx_hsg1_sync_len(struct ufs_hba *hba) +{ + int err; + + err = ufshcd_dme_peer_set(hba, UIC_ARG_MIB(PA_TX_HSG1_SYNC_LENGTH), + PA_TX_HSG1_SYNC_LENGTH_VAL); + if (err) + dev_err(hba->dev, "Failed (%d) set PA_TX_HSG1_SYNC_LENGTH\n", err); +} + static int ufs_qcom_apply_dev_quirks(struct ufs_hba *hba) { int err = 0; @@ -900,6 +936,9 @@ static int ufs_qcom_apply_dev_quirks(struct ufs_hba *hba) if (hba->dev_quirks & UFS_DEVICE_QUIRK_HOST_PA_SAVECONFIGTIME) err = ufs_qcom_quirk_host_pa_saveconfigtime(hba); + if (hba->dev_quirks & UFS_DEVICE_QUIRK_PA_TX_HSG1_SYNC_LENGTH) + ufs_qcom_override_pa_tx_hsg1_sync_len(hba); + return err; } @@ -914,6 +953,10 @@ static struct ufs_dev_quirk ufs_qcom_dev_fixups[] = { { .wmanufacturerid = UFS_VENDOR_WDC, .model = UFS_ANY_MODEL, .quirk = UFS_DEVICE_QUIRK_HOST_PA_TACTIVATE }, + { .wmanufacturerid = UFS_VENDOR_SAMSUNG, + .model = UFS_ANY_MODEL, + .quirk = UFS_DEVICE_QUIRK_PA_TX_HSG1_SYNC_LENGTH | + UFS_DEVICE_QUIRK_PA_TX_DEEMPHASIS_TUNING }, {} }; diff --git a/drivers/ufs/host/ufs-qcom.h b/drivers/ufs/host/ufs-qcom.h index d0e6ec9128e79d..05d4cb569c5005 100644 --- a/drivers/ufs/host/ufs-qcom.h +++ b/drivers/ufs/host/ufs-qcom.h @@ -122,8 +122,11 @@ enum { TMRLUT_HW_CGC_EN | OCSC_HW_CGC_EN) /* QUniPro Vendor specific attributes */ +#define PA_TX_HSG1_SYNC_LENGTH 0x1552 #define PA_VS_CONFIG_REG1 0x9000 #define DME_VS_CORE_CLK_CTRL 0xD002 +#define TX_HS_EQUALIZER 0x0037 + /* bit and mask definitions for DME_VS_CORE_CLK_CTRL attribute */ #define CLK_1US_CYCLES_MASK_V4 GENMASK(27, 16) #define CLK_1US_CYCLES_MASK GENMASK(7, 0) @@ -141,6 +144,21 @@ enum { #define UNIPRO_CORE_CLK_FREQ_201_5_MHZ 202 #define UNIPRO_CORE_CLK_FREQ_403_MHZ 403 +/* TX_HSG1_SYNC_LENGTH attr value */ +#define PA_TX_HSG1_SYNC_LENGTH_VAL 0x4A + +/* + * Some ufs device vendors need a different TSync length. + * Enable this quirk to give an additional TX_HS_SYNC_LENGTH. + */ +#define UFS_DEVICE_QUIRK_PA_TX_HSG1_SYNC_LENGTH BIT(16) + +/* + * Some ufs device vendors need a different Deemphasis setting. + * Enable this quirk to tune TX Deemphasis parameters. + */ +#define UFS_DEVICE_QUIRK_PA_TX_DEEMPHASIS_TUNING BIT(17) + /* ICE allocator type to share AES engines among TX stream and RX stream */ #define ICE_ALLOCATOR_TYPE 2 From 569330a34a31a52c904239439984a59972c11d28 Mon Sep 17 00:00:00 2001 From: Manish Pandey Date: Fri, 11 Apr 2025 17:46:30 +0530 Subject: [PATCH 0567/2924] scsi: ufs: Introduce quirk to extend PA_HIBERN8TIME for UFS devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Samsung UFS devices require additional time in hibern8 mode before exiting, beyond the negotiated handshaking phase between the host and device. Introduce a quirk to increase the PA_HIBERN8TIME parameter by 100 µs, a value derived from experiments, to ensure a proper hibernation process. Signed-off-by: Manish Pandey Link: https://lore.kernel.org/r/20250411121630.21330-3-quic_mapa@quicinc.com Reviewed-by: Bean Huo Reviewed-by: Manivannan Sadhasivam Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 29 +++++++++++++++++++++++++++++ include/ufs/ufs_quirks.h | 6 ++++++ 2 files changed, 35 insertions(+) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 44156041d88f20..ca2b0aae9d11cc 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -278,6 +278,7 @@ static const struct ufs_dev_quirk ufs_fixups[] = { .model = UFS_ANY_MODEL, .quirk = UFS_DEVICE_QUIRK_DELAY_BEFORE_LPM | UFS_DEVICE_QUIRK_HOST_PA_TACTIVATE | + UFS_DEVICE_QUIRK_PA_HIBER8TIME | UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS }, { .wmanufacturerid = UFS_VENDOR_SKHYNIX, .model = UFS_ANY_MODEL, @@ -8470,6 +8471,31 @@ static int ufshcd_quirk_tune_host_pa_tactivate(struct ufs_hba *hba) return ret; } +/** + * ufshcd_quirk_override_pa_h8time - Ensures proper adjustment of PA_HIBERN8TIME. + * @hba: per-adapter instance + * + * Some UFS devices require specific adjustments to the PA_HIBERN8TIME parameter + * to ensure proper hibernation timing. This function retrieves the current + * PA_HIBERN8TIME value and increments it by 100us. + */ +static void ufshcd_quirk_override_pa_h8time(struct ufs_hba *hba) +{ + u32 pa_h8time; + int ret; + + ret = ufshcd_dme_get(hba, UIC_ARG_MIB(PA_HIBERN8TIME), &pa_h8time); + if (ret) { + dev_err(hba->dev, "Failed to get PA_HIBERN8TIME: %d\n", ret); + return; + } + + /* Increment by 1 to increase hibernation time by 100 µs */ + ret = ufshcd_dme_set(hba, UIC_ARG_MIB(PA_HIBERN8TIME), pa_h8time + 1); + if (ret) + dev_err(hba->dev, "Failed updating PA_HIBERN8TIME: %d\n", ret); +} + static void ufshcd_tune_unipro_params(struct ufs_hba *hba) { ufshcd_vops_apply_dev_quirks(hba); @@ -8480,6 +8506,9 @@ static void ufshcd_tune_unipro_params(struct ufs_hba *hba) if (hba->dev_quirks & UFS_DEVICE_QUIRK_HOST_PA_TACTIVATE) ufshcd_quirk_tune_host_pa_tactivate(hba); + + if (hba->dev_quirks & UFS_DEVICE_QUIRK_PA_HIBER8TIME) + ufshcd_quirk_override_pa_h8time(hba); } static void ufshcd_clear_dbg_ufs_stats(struct ufs_hba *hba) diff --git a/include/ufs/ufs_quirks.h b/include/ufs/ufs_quirks.h index 41ff44dfa1db3f..f52de5ed1b3b6e 100644 --- a/include/ufs/ufs_quirks.h +++ b/include/ufs/ufs_quirks.h @@ -107,4 +107,10 @@ struct ufs_dev_quirk { */ #define UFS_DEVICE_QUIRK_DELAY_AFTER_LPM (1 << 11) +/* + * Some ufs devices may need more time to be in hibern8 before exiting. + * Enable this quirk to give it an additional 100us. + */ +#define UFS_DEVICE_QUIRK_PA_HIBER8TIME (1 << 12) + #endif /* UFS_QUIRKS_H_ */ From 5b04080cd6028f0737bbbd0c5b462d226cff9052 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Thu, 10 Apr 2025 10:13:21 +0800 Subject: [PATCH 0568/2924] net: hibmcge: fix incorrect pause frame statistics issue The driver supports pause frames, but does not pass pause frames based on rx pause enable configuration, resulting in incorrect pause frame statistics. like this: mz eno3 '01 80 c2 00 00 01 00 18 2d 04 00 9c 88 08 00 01 ff ff' \ -p 64 -c 100 ethtool -S enp132s0f2 | grep -v ": 0" NIC statistics: rx_octets_total_filt_cnt: 6800 rx_filt_pkt_cnt: 100 The rx pause frames are filtered by the MAC hardware. This patch configures pass pause frames based on the rx puase enable status to ensure that rx pause frames are not filtered. mz eno3 '01 80 c2 00 00 01 00 18 2d 04 00 9c 88 08 00 01 ff ff' \ -p 64 -c 100 ethtool --include-statistics -a enp132s0f2 Pause parameters for enp132s0f2: Autonegotiate: on RX: on TX: on RX negotiated: on TX negotiated: on Statistics: tx_pause_frames: 0 rx_pause_frames: 100 Fixes: 3a03763f3876 ("net: hibmcge: Add pauseparam supported in this module") Signed-off-by: Jijie Shao Reviewed-by: Michal Swiatkowski Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250410021327.590362-2-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c | 3 +++ drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c index 74a18033b44412..7d3bbd3e2adce4 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c @@ -242,6 +242,9 @@ void hbg_hw_set_pause_enable(struct hbg_priv *priv, u32 tx_en, u32 rx_en) HBG_REG_PAUSE_ENABLE_TX_B, tx_en); hbg_reg_write_field(priv, HBG_REG_PAUSE_ENABLE_ADDR, HBG_REG_PAUSE_ENABLE_RX_B, rx_en); + + hbg_reg_write_field(priv, HBG_REG_REC_FILT_CTRL_ADDR, + HBG_REG_REC_FILT_CTRL_PAUSE_FRM_PASS_B, rx_en); } void hbg_hw_get_pause_enable(struct hbg_priv *priv, u32 *tx_en, u32 *rx_en) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h index cc2cc612770d7c..fd623cfd13de70 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h @@ -68,6 +68,7 @@ #define HBG_REG_TRANSMIT_CTRL_AN_EN_B BIT(5) #define HBG_REG_REC_FILT_CTRL_ADDR (HBG_REG_SGMII_BASE + 0x0064) #define HBG_REG_REC_FILT_CTRL_UC_MATCH_EN_B BIT(0) +#define HBG_REG_REC_FILT_CTRL_PAUSE_FRM_PASS_B BIT(4) #define HBG_REG_RX_OCTETS_TOTAL_OK_ADDR (HBG_REG_SGMII_BASE + 0x0080) #define HBG_REG_RX_OCTETS_BAD_ADDR (HBG_REG_SGMII_BASE + 0x0084) #define HBG_REG_RX_UC_PKTS_ADDR (HBG_REG_SGMII_BASE + 0x0088) From 9afaaa54e3eb9b64fc07c06741897800e98ac253 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Thu, 10 Apr 2025 10:13:22 +0800 Subject: [PATCH 0569/2924] net: hibmcge: fix incorrect multicast filtering issue The driver does not support multicast filtering, the mask must be set to 0xFFFFFFFF. Otherwise, incorrect filtering occurs. This patch fixes this problem. Fixes: 37b367d60d0f ("net: hibmcge: Add unicast frame filter supported in this module") Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250410021327.590362-3-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c | 4 ++++ drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c index 7d3bbd3e2adce4..9b65eef62b3fba 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c @@ -234,6 +234,10 @@ void hbg_hw_set_mac_filter_enable(struct hbg_priv *priv, u32 enable) { hbg_reg_write_field(priv, HBG_REG_REC_FILT_CTRL_ADDR, HBG_REG_REC_FILT_CTRL_UC_MATCH_EN_B, enable); + + /* only uc filter is supported, so set all bits of mc mask reg to 1 */ + hbg_reg_write64(priv, HBG_REG_STATION_ADDR_LOW_MSK_0, U64_MAX); + hbg_reg_write64(priv, HBG_REG_STATION_ADDR_LOW_MSK_1, U64_MAX); } void hbg_hw_set_pause_enable(struct hbg_priv *priv, u32 tx_en, u32 rx_en) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h index fd623cfd13de70..a6e7f5e62b48aa 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h @@ -135,6 +135,8 @@ #define HBG_REG_STATION_ADDR_HIGH_4_ADDR (HBG_REG_SGMII_BASE + 0x0224) #define HBG_REG_STATION_ADDR_LOW_5_ADDR (HBG_REG_SGMII_BASE + 0x0228) #define HBG_REG_STATION_ADDR_HIGH_5_ADDR (HBG_REG_SGMII_BASE + 0x022C) +#define HBG_REG_STATION_ADDR_LOW_MSK_0 (HBG_REG_SGMII_BASE + 0x0230) +#define HBG_REG_STATION_ADDR_LOW_MSK_1 (HBG_REG_SGMII_BASE + 0x0238) /* PCU */ #define HBG_REG_TX_FIFO_THRSLD_ADDR (HBG_REG_SGMII_BASE + 0x0420) From 4ad3df755a96012f792c7fa2aa62317db3cba82b Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Thu, 10 Apr 2025 10:13:23 +0800 Subject: [PATCH 0570/2924] net: hibmcge: fix the share of irq statistics among different network ports issue hbg_irqs is a global array which contains irq statistics. However, the irq statistics of different network ports point to the same global array. As a result, the statistics are incorrect. This patch allocates a statistics array for each network port to prevent the statistics of different network ports from affecting each other. irq statistics are removed from hbg_irq_info. Therefore, all data in hbg_irq_info remains unchanged. Therefore, the input parameter of some functions is changed to const. Fixes: 4d089035fa19 ("net: hibmcge: Add interrupt supported in this module") Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250410021327.590362-4-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- .../ethernet/hisilicon/hibmcge/hbg_common.h | 8 ++++--- .../ethernet/hisilicon/hibmcge/hbg_debugfs.c | 4 ++-- .../ethernet/hisilicon/hibmcge/hbg_diagnose.c | 2 +- .../net/ethernet/hisilicon/hibmcge/hbg_irq.c | 24 ++++++++++++------- .../net/ethernet/hisilicon/hibmcge/hbg_main.c | 2 +- 5 files changed, 24 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h index f8cdab62bf85ca..7725cb0c5c8a44 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h @@ -108,14 +108,16 @@ struct hbg_irq_info { bool re_enable; bool need_print; bool need_reset; - u64 count; - void (*irq_handle)(struct hbg_priv *priv, struct hbg_irq_info *info); + void (*irq_handle)(struct hbg_priv *priv, + const struct hbg_irq_info *info); }; struct hbg_vector { char name[HBG_VECTOR_NUM][32]; - struct hbg_irq_info *info_array; + + u64 *stats_array; + const struct hbg_irq_info *info_array; u32 info_array_len; }; diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c index 5e0ba4d5b08d28..9c09e48359907d 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c @@ -61,7 +61,7 @@ static int hbg_dbg_irq_info(struct seq_file *s, void *unused) { struct net_device *netdev = dev_get_drvdata(s->private); struct hbg_priv *priv = netdev_priv(netdev); - struct hbg_irq_info *info; + const struct hbg_irq_info *info; u32 i; for (i = 0; i < priv->vectors.info_array_len; i++) { @@ -73,7 +73,7 @@ static int hbg_dbg_irq_info(struct seq_file *s, void *unused) info->mask)), str_true_false(info->need_reset), str_true_false(info->need_print), - info->count); + priv->vectors.stats_array[i]); } return 0; diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_diagnose.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_diagnose.c index d61c03f34ff057..f23fb5920c3cca 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_diagnose.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_diagnose.c @@ -234,7 +234,7 @@ static u64 hbg_get_irq_stats(struct hbg_vector *vectors, u32 mask) for (i = 0; i < vectors->info_array_len; i++) if (vectors->info_array[i].mask == mask) - return vectors->info_array[i].count; + return vectors->stats_array[i]; return 0; } diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c index e79e9ab3e5308e..8af0bc4cca2166 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c @@ -6,7 +6,7 @@ #include "hbg_hw.h" static void hbg_irq_handle_err(struct hbg_priv *priv, - struct hbg_irq_info *irq_info) + const struct hbg_irq_info *irq_info) { if (irq_info->need_print) dev_err(&priv->pdev->dev, @@ -17,30 +17,30 @@ static void hbg_irq_handle_err(struct hbg_priv *priv, } static void hbg_irq_handle_tx(struct hbg_priv *priv, - struct hbg_irq_info *irq_info) + const struct hbg_irq_info *irq_info) { napi_schedule(&priv->tx_ring.napi); } static void hbg_irq_handle_rx(struct hbg_priv *priv, - struct hbg_irq_info *irq_info) + const struct hbg_irq_info *irq_info) { napi_schedule(&priv->rx_ring.napi); } static void hbg_irq_handle_rx_buf_val(struct hbg_priv *priv, - struct hbg_irq_info *irq_info) + const struct hbg_irq_info *irq_info) { priv->stats.rx_fifo_less_empty_thrsld_cnt++; } #define HBG_IRQ_I(name, handle) \ - {#name, HBG_INT_MSK_##name##_B, false, false, false, 0, handle} + {#name, HBG_INT_MSK_##name##_B, false, false, false, handle} #define HBG_ERR_IRQ_I(name, need_print, ndde_reset) \ {#name, HBG_INT_MSK_##name##_B, true, need_print, \ - ndde_reset, 0, hbg_irq_handle_err} + ndde_reset, hbg_irq_handle_err} -static struct hbg_irq_info hbg_irqs[] = { +static const struct hbg_irq_info hbg_irqs[] = { HBG_IRQ_I(RX, hbg_irq_handle_rx), HBG_IRQ_I(TX, hbg_irq_handle_tx), HBG_ERR_IRQ_I(TX_PKT_CPL, true, true), @@ -64,7 +64,7 @@ static struct hbg_irq_info hbg_irqs[] = { static irqreturn_t hbg_irq_handle(int irq_num, void *p) { - struct hbg_irq_info *info; + const struct hbg_irq_info *info; struct hbg_priv *priv = p; u32 status; u32 i; @@ -79,7 +79,7 @@ static irqreturn_t hbg_irq_handle(int irq_num, void *p) hbg_hw_irq_enable(priv, info->mask, false); hbg_hw_irq_clear(priv, info->mask); - info->count++; + priv->vectors.stats_array[i]++; if (info->irq_handle) info->irq_handle(priv, info); @@ -132,6 +132,12 @@ int hbg_irq_init(struct hbg_priv *priv) irq_names_map[i]); } + vectors->stats_array = devm_kcalloc(&priv->pdev->dev, + ARRAY_SIZE(hbg_irqs), + sizeof(u64), GFP_KERNEL); + if (!vectors->stats_array) + return -ENOMEM; + vectors->info_array = hbg_irqs; vectors->info_array_len = ARRAY_SIZE(hbg_irqs); return 0; diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c index 2ac5454338e4de..e5c961ad4b9b80 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c @@ -21,7 +21,7 @@ static void hbg_all_irq_enable(struct hbg_priv *priv, bool enabled) { - struct hbg_irq_info *info; + const struct hbg_irq_info *info; u32 i; for (i = 0; i < priv->vectors.info_array_len; i++) { From 4e4ac53335de54bcb9d26842df0998cfd8fcaf90 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Thu, 10 Apr 2025 10:13:24 +0800 Subject: [PATCH 0571/2924] net: hibmcge: fix wrong mtu log issue A dbg log is generated when the driver modifies the MTU, which is expected to trace the change of the MTU. However, the log is recorded after WRITE_ONCE(). At this time, netdev->mtu has been changed to the new value. As a result, netdev->mtu is the same as new_mtu. This patch modifies the log location and records logs before WRITE_ONCE(). Fixes: ff4edac6e9bd ("net: hibmcge: Implement some .ndo functions") Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250410021327.590362-5-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c index e5c961ad4b9b80..2e64dc1ab355e3 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c @@ -203,12 +203,12 @@ static int hbg_net_change_mtu(struct net_device *netdev, int new_mtu) if (netif_running(netdev)) return -EBUSY; - hbg_hw_set_mtu(priv, new_mtu); - WRITE_ONCE(netdev->mtu, new_mtu); - dev_dbg(&priv->pdev->dev, "change mtu from %u to %u\n", netdev->mtu, new_mtu); + hbg_hw_set_mtu(priv, new_mtu); + WRITE_ONCE(netdev->mtu, new_mtu); + return 0; } From 1d6c3e06232e5f53458842915bbff28e8fc29244 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Thu, 10 Apr 2025 10:13:25 +0800 Subject: [PATCH 0572/2924] net: hibmcge: fix the incorrect np_link fail state issue. In the debugfs file, the driver displays the np_link fail state based on the HBG_NIC_STATE_NP_LINK_FAIL. However, HBG_NIC_STATE_NP_LINK_FAIL is cleared in hbg_service_task() So, this value of np_link fail is always false. This patch directly reads the related register to display the real state. Fixes: e0306637e85d ("net: hibmcge: Add support for mac link exception handling feature") Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250410021327.590362-6-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c index 9c09e48359907d..01ad82d2f5cc7f 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c @@ -106,6 +106,7 @@ static int hbg_dbg_nic_state(struct seq_file *s, void *unused) { struct net_device *netdev = dev_get_drvdata(s->private); struct hbg_priv *priv = netdev_priv(netdev); + bool np_link_fail; seq_printf(s, "event handling state: %s\n", state_str_true_false(priv, HBG_NIC_STATE_EVENT_HANDLING)); @@ -117,8 +118,10 @@ static int hbg_dbg_nic_state(struct seq_file *s, void *unused) reset_type_str[priv->reset_type]); seq_printf(s, "need reset state: %s\n", state_str_true_false(priv, HBG_NIC_STATE_NEED_RESET)); - seq_printf(s, "np_link fail state: %s\n", - state_str_true_false(priv, HBG_NIC_STATE_NP_LINK_FAIL)); + + np_link_fail = !hbg_reg_read_field(priv, HBG_REG_AN_NEG_STATE_ADDR, + HBG_REG_AN_NEG_STATE_NP_LINK_OK_B); + seq_printf(s, "np_link fail state: %s\n", str_true_false(np_link_fail)); return 0; } From ae6c1dce3244e31011ee65f89fc2484f3cf6cf85 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Thu, 10 Apr 2025 10:13:26 +0800 Subject: [PATCH 0573/2924] net: hibmcge: fix not restore rx pause mac addr after reset issue The MAC hardware supports receiving two types of pause frames from link partner. One is a pause frame with a destination address of 01:80:C2:00:00:01. The other is a pause frame whose destination address is the address of the hibmcge driver. 01:80:C2:00:00:01 is supported by default. In .ndo_set_mac_address(), the hibmcge driver calls .hbg_hw_set_rx_pause_mac_addr() to set its mac address as the destination address of the rx puase frame. Therefore, pause frames with two types of MAC addresses can be received. Currently, the rx pause addr does not restored after reset. As a result, pause frames whose destination address is the hibmcge driver address cannot be correctly received. This patch restores the configuration by calling .hbg_hw_set_rx_pause_mac_addr() after reset is complete. Fixes: 3f5a61f6d504 ("net: hibmcge: Add reset supported in this module") Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250410021327.590362-7-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c index 4e8cb66f601c0e..a0bcfb5a713d13 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c @@ -26,12 +26,15 @@ static void hbg_restore_mac_table(struct hbg_priv *priv) static void hbg_restore_user_def_settings(struct hbg_priv *priv) { + /* The index of host mac is always 0. */ + u64 rx_pause_addr = ether_addr_to_u64(priv->filter.mac_table[0].addr); struct ethtool_pauseparam *pause_param = &priv->user_def.pause_param; hbg_restore_mac_table(priv); hbg_hw_set_mtu(priv, priv->netdev->mtu); hbg_hw_set_pause_enable(priv, pause_param->tx_pause, pause_param->rx_pause); + hbg_hw_set_rx_pause_mac_addr(priv, rx_pause_addr); } int hbg_rebuild(struct hbg_priv *priv) From e1d0b52d87ca68a92f2f8693b8eb475795a9a73f Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Thu, 10 Apr 2025 10:13:27 +0800 Subject: [PATCH 0574/2924] net: hibmcge: fix multiple phy_stop() issue After detecting the np_link_fail exception, the driver attempts to fix the exception by using phy_stop() and phy_start() in the scheduled task. However, hbg_fix_np_link_fail() and .ndo_stop() may be concurrently executed. As a result, phy_stop() is executed twice, and the following Calltrace occurs: hibmcge 0000:84:00.2 enp132s0f2: Link is Down hibmcge 0000:84:00.2: failed to link between MAC and PHY, try to fix... ------------[ cut here ]------------ called from state HALTED WARNING: CPU: 71 PID: 23391 at drivers/net/phy/phy.c:1503 phy_stop... ... pc : phy_stop+0x138/0x180 lr : phy_stop+0x138/0x180 sp : ffff8000c76bbd40 x29: ffff8000c76bbd40 x28: 0000000000000000 x27: 0000000000000000 x26: ffff2020047358c0 x25: ffff202004735940 x24: ffff20200000e405 x23: ffff2020060e5178 x22: ffff2020060e4000 x21: ffff2020060e49c0 x20: ffff2020060e5170 x19: ffff20202538e000 x18: 0000000000000020 x17: 0000000000000000 x16: ffffcede02e28f40 x15: ffffffffffffffff x14: 0000000000000000 x13: 205d313933333254 x12: 5b5d393430303233 x11: ffffcede04555958 x10: ffffcede04495918 x9 : ffffcede0274fee0 x8 : 00000000000bffe8 x7 : c0000000ffff7fff x6 : 0000000000000001 x5 : 00000000002bffa8 x4 : 0000000000000000 x3 : 0000000000000000 x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff20202e429480 Call trace: phy_stop+0x138/0x180 hbg_fix_np_link_fail+0x4c/0x90 [hibmcge] hbg_service_task+0xfc/0x148 [hibmcge] process_one_work+0x180/0x398 worker_thread+0x210/0x328 kthread+0xe0/0xf0 ret_from_fork+0x10/0x20 ---[ end trace 0000000000000000 ]--- This patch adds the rtnl_lock to hbg_fix_np_link_fail() to ensure that other operations are not performed concurrently. In addition, np_link_fail exception can be fixed only when the PHY is link. Fixes: e0306637e85d ("net: hibmcge: Add support for mac link exception handling feature") Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250410021327.590362-8-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c index f29a937ad0879f..42b0083c9193f2 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c @@ -2,6 +2,7 @@ // Copyright (c) 2024 Hisilicon Limited. #include +#include #include "hbg_common.h" #include "hbg_hw.h" #include "hbg_mdio.h" @@ -133,12 +134,17 @@ void hbg_fix_np_link_fail(struct hbg_priv *priv) { struct device *dev = &priv->pdev->dev; + rtnl_lock(); + if (priv->stats.np_link_fail_cnt >= HBG_NP_LINK_FAIL_RETRY_TIMES) { dev_err(dev, "failed to fix the MAC link status\n"); priv->stats.np_link_fail_cnt = 0; - return; + goto unlock; } + if (!priv->mac.phydev->link) + goto unlock; + priv->stats.np_link_fail_cnt++; dev_err(dev, "failed to link between MAC and PHY, try to fix...\n"); @@ -147,6 +153,9 @@ void hbg_fix_np_link_fail(struct hbg_priv *priv) */ hbg_phy_stop(priv); hbg_phy_start(priv); + +unlock: + rtnl_unlock(); } static void hbg_phy_adjust_link(struct net_device *netdev) From 0cd34d98dfd4f2b596415b8f12faf7b946613458 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 6 Apr 2025 22:01:42 +0200 Subject: [PATCH 0575/2924] iio: accel: fxls8962af: Fix wakeup source leaks on device unbind Device can be unbound, so driver must also release memory for the wakeup source. Signed-off-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20250406-b4-device-wakeup-leak-iio-v1-1-2d7d322a4a93@linaro.org Signed-off-by: Jonathan Cameron --- drivers/iio/accel/fxls8962af-core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/iio/accel/fxls8962af-core.c b/drivers/iio/accel/fxls8962af-core.c index 48e4282964a069..bf1d3923a18179 100644 --- a/drivers/iio/accel/fxls8962af-core.c +++ b/drivers/iio/accel/fxls8962af-core.c @@ -1226,8 +1226,11 @@ int fxls8962af_core_probe(struct device *dev, struct regmap *regmap, int irq) if (ret) return ret; - if (device_property_read_bool(dev, "wakeup-source")) - device_init_wakeup(dev, true); + if (device_property_read_bool(dev, "wakeup-source")) { + ret = devm_device_init_wakeup(dev); + if (ret) + return dev_err_probe(dev, ret, "Failed to init wakeup\n"); + } return devm_iio_device_register(dev, indio_dev); } From ad3764b45c1524872b621d5667a56f6a574501bd Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 6 Apr 2025 22:01:43 +0200 Subject: [PATCH 0576/2924] iio: adc: qcom-spmi-iadc: Fix wakeup source leaks on device unbind Device can be unbound, so driver must also release memory for the wakeup source. Signed-off-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20250406-b4-device-wakeup-leak-iio-v1-2-2d7d322a4a93@linaro.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/qcom-spmi-iadc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/qcom-spmi-iadc.c b/drivers/iio/adc/qcom-spmi-iadc.c index 7fb8b2499a1d00..b64a8a407168bb 100644 --- a/drivers/iio/adc/qcom-spmi-iadc.c +++ b/drivers/iio/adc/qcom-spmi-iadc.c @@ -543,7 +543,9 @@ static int iadc_probe(struct platform_device *pdev) else return ret; } else { - device_init_wakeup(iadc->dev, 1); + ret = devm_device_init_wakeup(iadc->dev); + if (ret) + return dev_err_probe(iadc->dev, ret, "Failed to init wakeup\n"); } ret = iadc_update_offset(iadc); From 4551383e78d59b34eea3f4ed28ad22df99e25d59 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 6 Apr 2025 22:01:44 +0200 Subject: [PATCH 0577/2924] iio: imu: st_lsm6dsx: Fix wakeup source leaks on device unbind Device can be unbound, so driver must also release memory for the wakeup source. Signed-off-by: Krzysztof Kozlowski Acked-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250406-b4-device-wakeup-leak-iio-v1-3-2d7d322a4a93@linaro.org Signed-off-by: Jonathan Cameron --- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c index 4fdcc2acc94ed0..96c6106b95eef6 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c @@ -2719,8 +2719,11 @@ int st_lsm6dsx_probe(struct device *dev, int irq, int hw_id, } if (device_property_read_bool(dev, "wakeup-source") || - (pdata && pdata->wakeup_source)) - device_init_wakeup(dev, true); + (pdata && pdata->wakeup_source)) { + err = devm_device_init_wakeup(dev); + if (err) + return dev_err_probe(dev, err, "Failed to init wakeup\n"); + } return 0; } From 485acd207d7daf8cf941a5f0fd0c09bc6d049402 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 11 Apr 2025 13:30:15 -0400 Subject: [PATCH 0578/2924] ftrace: Do not have print_graph_retval() add a newline The retval and retaddr options for function_graph tracer will add a comment at the end of a function for both leaf and non leaf functions that looks like: __wake_up_common(); /* ret=0x1 */ } /* pick_next_task_fair ret=0x0 */ The function print_graph_retval() adds a newline after the "*/". But if that's not called, the caller function needs to make sure there's a newline added. This is confusing and when the function parameters code was added, it added a newline even when calling print_graph_retval() as the fact that the print_graph_retval() function prints a newline isn't obvious. This caused an extra newline to be printed and that made it fail the selftests when the retval option was set, as the selftests were not expecting blank lines being injected into the trace. Instead of having print_graph_retval() print a newline, just have the caller always print the newline regardless if it calls print_graph_retval() or not. This not only fixes this bug, but it also simplifies the code. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250411133015.015ca393@gandalf.local.home Reported-by: Mark Brown Tested-by: Mark Brown Closes: https://lore.kernel.org/all/ccc40f2b-4b9e-4abd-8daf-d22fce2a86f0@sirena.org.uk/ Fixes: ff5c9c576e754 ("ftrace: Add support for function argument to graph tracer") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_functions_graph.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 2f077d4158e5f6..0c357a89c58e01 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -880,8 +880,6 @@ static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent_entr if (print_retval || print_retaddr) trace_seq_puts(s, " /*"); - else - trace_seq_putc(s, '\n'); } else { print_retaddr = false; trace_seq_printf(s, "} /* %ps", func); @@ -899,7 +897,7 @@ static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent_entr } if (!entry || print_retval || print_retaddr) - trace_seq_puts(s, " */\n"); + trace_seq_puts(s, " */"); } #else @@ -975,7 +973,7 @@ print_graph_entry_leaf(struct trace_iterator *iter, } else trace_seq_puts(s, "();"); } - trace_seq_printf(s, "\n"); + trace_seq_putc(s, '\n'); print_graph_irq(iter, graph_ret->func, TRACE_GRAPH_RET, cpu, iter->ent->pid, flags); @@ -1313,10 +1311,11 @@ print_graph_return(struct ftrace_graph_ret_entry *retentry, struct trace_seq *s, * that if the funcgraph-tail option is enabled. */ if (func_match && !(flags & TRACE_GRAPH_PRINT_TAIL)) - trace_seq_puts(s, "}\n"); + trace_seq_puts(s, "}"); else - trace_seq_printf(s, "} /* %ps */\n", (void *)func); + trace_seq_printf(s, "} /* %ps */", (void *)func); } + trace_seq_putc(s, '\n'); /* Overrun */ if (flags & TRACE_GRAPH_PRINT_OVERRUN) From 8d7861ac507d23024c7d74b6cb59a9cca248bcb7 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Fri, 11 Apr 2025 09:37:17 +0200 Subject: [PATCH 0579/2924] rv: Fix out-of-bound memory access in rv_is_container_monitor() When rv_is_container_monitor() is called on the last monitor in rv_monitors_list, KASAN yells: BUG: KASAN: global-out-of-bounds in rv_is_container_monitor+0x101/0x110 Read of size 8 at addr ffffffff97c7c798 by task setup/221 The buggy address belongs to the variable: rv_monitors_list+0x18/0x40 This is due to list_next_entry() is called on the last entry in the list. It wraps around to the first list_head, and the first list_head is not embedded in struct rv_monitor_def. Fix it by checking if the monitor is last in the list. Cc: stable@vger.kernel.org Cc: Gabriele Monaco Fixes: cb85c660fcd4 ("rv: Add option for nested monitors and include sched") Link: https://lore.kernel.org/e85b5eeb7228bfc23b8d7d4ab5411472c54ae91b.1744355018.git.namcao@linutronix.de Signed-off-by: Nam Cao Signed-off-by: Steven Rostedt (Google) --- kernel/trace/rv/rv.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c index 968c5c3b02464f..e4077500a91dbb 100644 --- a/kernel/trace/rv/rv.c +++ b/kernel/trace/rv/rv.c @@ -225,7 +225,12 @@ bool rv_is_nested_monitor(struct rv_monitor_def *mdef) */ bool rv_is_container_monitor(struct rv_monitor_def *mdef) { - struct rv_monitor_def *next = list_next_entry(mdef, list); + struct rv_monitor_def *next; + + if (list_is_last(&mdef->list, &rv_monitors_list)) + return false; + + next = list_next_entry(mdef, list); return next->parent == mdef->monitor || !mdef->monitor->enable; } From 805b743fc163f1abef7ce1bea8eca8dfab5b685b Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Thu, 10 Apr 2025 13:42:22 +0200 Subject: [PATCH 0580/2924] x86/microcode/AMD: Extend the SHA check to Zen5, block loading of any unreleased standalone Zen5 microcode patches All Zen5 machines out there should get BIOS updates which update to the correct microcode patches addressing the microcode signature issue. However, silly people carve out random microcode blobs from BIOS packages and think are doing other people a service this way... Block loading of any unreleased standalone Zen5 microcode patches. Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Ingo Molnar Cc: Cc: Andrew Cooper Cc: Boris Ostrovsky Cc: Linus Torvalds Cc: Maciej S. Szmigiero Cc: Nikolay Borisov Cc: Tom Lendacky Link: https://lore.kernel.org/r/20250410114222.32523-1-bp@kernel.org --- arch/x86/kernel/cpu/microcode/amd.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index b61028cf5c8a3b..4a10d35e70aa54 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -199,6 +199,12 @@ static bool need_sha_check(u32 cur_rev) case 0xa70c0: return cur_rev <= 0xa70C009; break; case 0xaa001: return cur_rev <= 0xaa00116; break; case 0xaa002: return cur_rev <= 0xaa00218; break; + case 0xb0021: return cur_rev <= 0xb002146; break; + case 0xb1010: return cur_rev <= 0xb101046; break; + case 0xb2040: return cur_rev <= 0xb204031; break; + case 0xb4040: return cur_rev <= 0xb404031; break; + case 0xb6000: return cur_rev <= 0xb600031; break; + case 0xb7000: return cur_rev <= 0xb700031; break; default: break; } @@ -214,8 +220,7 @@ static bool verify_sha256_digest(u32 patch_id, u32 cur_rev, const u8 *data, unsi struct sha256_state s; int i; - if (x86_family(bsp_cpuid_1_eax) < 0x17 || - x86_family(bsp_cpuid_1_eax) > 0x19) + if (x86_family(bsp_cpuid_1_eax) < 0x17) return true; if (!need_sha_check(cur_rev)) From ce7e8a65aa1b7e8a6833403b314fa8f2cf133119 Mon Sep 17 00:00:00 2001 From: Tom Vierjahn Date: Mon, 24 Mar 2025 23:09:30 +0100 Subject: [PATCH 0581/2924] Documentation: ext4: Add fields to ext4_super_block documentation Documentation and implementation of the ext4 super block have slightly diverged: Padding has been removed in order to make room for new fields that are still missing in the documentation. Add the new fields s_encryption_level, s_first_error_errorcode, s_last_error_errorcode to the documentation of the ext4 super block. Fixes: f542fbe8d5e8 ("ext4 crypto: reserve codepoints used by the ext4 encryption feature") Fixes: 878520ac45f9 ("ext4: save the error code which triggered an ext4_error() in the superblock") Signed-off-by: Tom Vierjahn Reviewed-by: Ojaswin Mujoo Link: https://patch.msgid.link/20250324221004.5268-1-tom.vierjahn@acm.org Signed-off-by: Theodore Ts'o --- Documentation/filesystems/ext4/super.rst | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/Documentation/filesystems/ext4/super.rst b/Documentation/filesystems/ext4/super.rst index a1eb4a11a1d0f9..1b240661bfa306 100644 --- a/Documentation/filesystems/ext4/super.rst +++ b/Documentation/filesystems/ext4/super.rst @@ -328,9 +328,13 @@ The ext4 superblock is laid out as follows in - s_checksum_type - Metadata checksum algorithm type. The only valid value is 1 (crc32c). * - 0x176 - - __le16 - - s_reserved_pad - - + - \_\_u8 + - s\_encryption\_level + - Versioning level for encryption. + * - 0x177 + - \_\_u8 + - s\_reserved\_pad + - Padding to next 32bits. * - 0x178 - __le64 - s_kbytes_written @@ -466,9 +470,13 @@ The ext4 superblock is laid out as follows in - s_last_error_time_hi - Upper 8 bits of the s_last_error_time field. * - 0x27A - - __u8 - - s_pad[2] - - Zero padding. + - \_\_u8 + - s\_first\_error\_errcode + - + * - 0x27B + - \_\_u8 + - s\_last\_error\_errcode + - * - 0x27C - __le16 - s_encoding From 7e50bbb134aba1df0854f171b596b3a42d35605a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 26 Mar 2025 16:55:51 -0600 Subject: [PATCH 0582/2924] ext4: avoid -Wflex-array-member-not-at-end warning -Wflex-array-member-not-at-end was introduced in GCC-14, and we are getting ready to enable it, globally. Use the `DEFINE_RAW_FLEX()` helper for an on-stack definition of a flexible structure where the size of the flexible-array member is known at compile-time, and refactor the rest of the code, accordingly. So, with these changes, fix the following warning: fs/ext4/mballoc.c:3041:40: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Link: https://patch.msgid.link/Z-SF97N3AxcIMlSi@kspp Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 0d523e9fb3d529..f88424c281943f 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3037,10 +3037,8 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) unsigned char blocksize_bits = min_t(unsigned char, sb->s_blocksize_bits, EXT4_MAX_BLOCK_LOG_SIZE); - struct sg { - struct ext4_group_info info; - ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2]; - } sg; + DEFINE_RAW_FLEX(struct ext4_group_info, sg, bb_counters, + EXT4_MAX_BLOCK_LOG_SIZE + 2); group--; if (group == 0) @@ -3048,7 +3046,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) " 2^0 2^1 2^2 2^3 2^4 2^5 2^6 " " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]\n"); - i = (blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) + + i = (blocksize_bits + 2) * sizeof(sg->bb_counters[0]) + sizeof(struct ext4_group_info); grinfo = ext4_get_group_info(sb, group); @@ -3068,14 +3066,14 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) * We care only about free space counters in the group info and * these are safe to access even after the buddy has been unloaded */ - memcpy(&sg, grinfo, i); - seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free, - sg.info.bb_fragments, sg.info.bb_first_free); + memcpy(sg, grinfo, i); + seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg->bb_free, + sg->bb_fragments, sg->bb_first_free); for (i = 0; i <= 13; i++) seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ? - sg.info.bb_counters[i] : 0); + sg->bb_counters[i] : 0); seq_puts(seq, " ]"); - if (EXT4_MB_GRP_BBITMAP_CORRUPT(&sg.info)) + if (EXT4_MB_GRP_BBITMAP_CORRUPT(sg)) seq_puts(seq, " Block bitmap corrupted!"); seq_putc(seq, '\n'); return 0; From ccad447a3d331a239477c281533bacb585b54a98 Mon Sep 17 00:00:00 2001 From: Ojaswin Mujoo Date: Fri, 28 Mar 2025 11:54:52 +0530 Subject: [PATCH 0583/2924] ext4: make block validity check resistent to sb bh corruption Block validity checks need to be skipped in case they are called for journal blocks since they are part of system's protected zone. Currently, this is done by checking inode->ino against sbi->s_es->s_journal_inum, which is a direct read from the ext4 sb buffer head. If someone modifies this underneath us then the s_journal_inum field might get corrupted. To prevent against this, change the check to directly compare the inode with journal->j_inode. **Slight change in behavior**: During journal init path, check_block_validity etc might be called for journal inode when sbi->s_journal is not set yet. In this case we now proceed with ext4_inode_block_valid() instead of returning early. Since systems zones have not been set yet, it is okay to proceed so we can perform basic checks on the blocks. Suggested-by: Baokun Li Reviewed-by: Baokun Li Reviewed-by: Jan Kara Reviewed-by: Zhang Yi Signed-off-by: Ojaswin Mujoo Link: https://patch.msgid.link/0c06bc9ebfcd6ccfed84a36e79147bf45ff5adc1.1743142920.git.ojaswin@linux.ibm.com Signed-off-by: Theodore Ts'o --- fs/ext4/block_validity.c | 5 ++--- fs/ext4/inode.c | 7 ++++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 87ee3a17bd29c9..e8c5525afc67a2 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -351,10 +351,9 @@ int ext4_check_blockref(const char *function, unsigned int line, { __le32 *bref = p; unsigned int blk; + journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; - if (ext4_has_feature_journal(inode->i_sb) && - (inode->i_ino == - le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) + if (journal && inode == journal->j_inode) return 0; while (bref < p+max) { diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5c57a34cd82c92..f386de8c12f646 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -384,10 +384,11 @@ static int __check_block_validity(struct inode *inode, const char *func, unsigned int line, struct ext4_map_blocks *map) { - if (ext4_has_feature_journal(inode->i_sb) && - (inode->i_ino == - le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) + journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; + + if (journal && inode == journal->j_inode) return 0; + if (!ext4_inode_block_valid(inode, map->m_pblk, map->m_len)) { ext4_error_inode(inode, func, line, map->m_pblk, "lblock %lu mapped to illegal pblock %llu " From 94824ac9a8aaf2fb3c54b4bdde842db80ffa555d Mon Sep 17 00:00:00 2001 From: Artem Sadovnikov Date: Fri, 4 Apr 2025 08:28:05 +0000 Subject: [PATCH 0584/2924] ext4: fix off-by-one error in do_split Syzkaller detected a use-after-free issue in ext4_insert_dentry that was caused by out-of-bounds access due to incorrect splitting in do_split. BUG: KASAN: use-after-free in ext4_insert_dentry+0x36a/0x6d0 fs/ext4/namei.c:2109 Write of size 251 at addr ffff888074572f14 by task syz-executor335/5847 CPU: 0 UID: 0 PID: 5847 Comm: syz-executor335 Not tainted 6.12.0-rc6-syzkaller-00318-ga9cda7c0ffed #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/30/2024 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:377 [inline] print_report+0x169/0x550 mm/kasan/report.c:488 kasan_report+0x143/0x180 mm/kasan/report.c:601 kasan_check_range+0x282/0x290 mm/kasan/generic.c:189 __asan_memcpy+0x40/0x70 mm/kasan/shadow.c:106 ext4_insert_dentry+0x36a/0x6d0 fs/ext4/namei.c:2109 add_dirent_to_buf+0x3d9/0x750 fs/ext4/namei.c:2154 make_indexed_dir+0xf98/0x1600 fs/ext4/namei.c:2351 ext4_add_entry+0x222a/0x25d0 fs/ext4/namei.c:2455 ext4_add_nondir+0x8d/0x290 fs/ext4/namei.c:2796 ext4_symlink+0x920/0xb50 fs/ext4/namei.c:3431 vfs_symlink+0x137/0x2e0 fs/namei.c:4615 do_symlinkat+0x222/0x3a0 fs/namei.c:4641 __do_sys_symlink fs/namei.c:4662 [inline] __se_sys_symlink fs/namei.c:4660 [inline] __x64_sys_symlink+0x7a/0x90 fs/namei.c:4660 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f The following loop is located right above 'if' statement. for (i = count-1; i >= 0; i--) { /* is more than half of this entry in 2nd half of the block? */ if (size + map[i].size/2 > blocksize/2) break; size += map[i].size; move++; } 'i' in this case could go down to -1, in which case sum of active entries wouldn't exceed half the block size, but previous behaviour would also do split in half if sum would exceed at the very last block, which in case of having too many long name files in a single block could lead to out-of-bounds access and following use-after-free. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Cc: stable@vger.kernel.org Fixes: 5872331b3d91 ("ext4: fix potential negative array index in do_split()") Signed-off-by: Artem Sadovnikov Reviewed-by: Jan Kara Link: https://patch.msgid.link/20250404082804.2567-3-a.sadovnikov@ispras.ru Signed-off-by: Theodore Ts'o --- fs/ext4/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 43f96ef4aa0126..dda1791e9e1a9b 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1971,7 +1971,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, * split it in half by count; each resulting block will have at least * half the space free. */ - if (i > 0) + if (i >= 0) split = count - move; else split = count/2; From d833dc597fdc79b3f5b1ca5817aa7a64897e13d3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 13 Apr 2025 11:03:59 +0200 Subject: [PATCH 0585/2924] clang-format: Update the ForEachMacros list for v6.15-rc1 One of my 'git grep' searches tripped on this file listing an already removed primitive. Refresh it. Signed-off-by: Ingo Molnar Cc: Andrew Morton Cc: Linus Torvalds --- .clang-format | 41 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/.clang-format b/.clang-format index fe1aa1a30d4026..86c20ee744dec1 100644 --- a/.clang-format +++ b/.clang-format @@ -92,6 +92,7 @@ ForEachMacros: - '__rq_for_each_bio' - '__shost_for_each_device' - '__sym_for_each' + - '_for_each_counter' - 'apei_estatus_for_each_section' - 'ata_for_each_dev' - 'ata_for_each_link' @@ -141,11 +142,14 @@ ForEachMacros: - 'damon_for_each_target_safe' - 'damos_for_each_filter' - 'damos_for_each_filter_safe' + - 'damos_for_each_ops_filter' + - 'damos_for_each_ops_filter_safe' - 'damos_for_each_quota_goal' - 'damos_for_each_quota_goal_safe' - 'data__for_each_file' - 'data__for_each_file_new' - 'data__for_each_file_start' + - 'def_for_each_cpu' - 'device_for_each_child_node' - 'device_for_each_child_node_scoped' - 'dma_fence_array_for_each' @@ -176,6 +180,7 @@ ForEachMacros: - 'drm_for_each_privobj' - 'drm_gem_for_each_gpuvm_bo' - 'drm_gem_for_each_gpuvm_bo_safe' + - 'drm_gpusvm_for_each_range' - 'drm_gpuva_for_each_op' - 'drm_gpuva_for_each_op_from_reverse' - 'drm_gpuva_for_each_op_reverse' @@ -216,8 +221,10 @@ ForEachMacros: - 'for_each_active_dev_scope' - 'for_each_active_drhd_unit' - 'for_each_active_iommu' + - 'for_each_active_irq' - 'for_each_active_route' - 'for_each_aggr_pgid' + - 'for_each_alloc_capable_rdt_resource' - 'for_each_and_bit' - 'for_each_andnot_bit' - 'for_each_available_child_of_node' @@ -228,6 +235,7 @@ ForEachMacros: - 'for_each_btf_ext_rec' - 'for_each_btf_ext_sec' - 'for_each_bvec' + - 'for_each_capable_rdt_resource' - 'for_each_card_auxs' - 'for_each_card_auxs_safe' - 'for_each_card_components' @@ -241,6 +249,7 @@ ForEachMacros: - 'for_each_cgroup_storage_type' - 'for_each_child_of_node' - 'for_each_child_of_node_scoped' + - 'for_each_child_of_node_with_prefix' - 'for_each_clear_bit' - 'for_each_clear_bit_from' - 'for_each_clear_bitrange' @@ -296,6 +305,7 @@ ForEachMacros: - 'for_each_group_member_head' - 'for_each_hstate' - 'for_each_hwgpio' + - 'for_each_hwgpio_in_range' - 'for_each_if' - 'for_each_inject_fn' - 'for_each_insn' @@ -304,6 +314,7 @@ ForEachMacros: - 'for_each_intid' - 'for_each_iommu' - 'for_each_ip_tunnel_rcu' + - 'for_each_irq_desc' - 'for_each_irq_nr' - 'for_each_lang' - 'for_each_link_ch_maps' @@ -324,6 +335,8 @@ ForEachMacros: - 'for_each_missing_reg' - 'for_each_mle_subelement' - 'for_each_mod_mem_type' + - 'for_each_mon_capable_rdt_resource' + - 'for_each_mp_bvec' - 'for_each_net' - 'for_each_net_continue_reverse' - 'for_each_net_rcu' @@ -351,6 +364,7 @@ ForEachMacros: - 'for_each_node_by_name' - 'for_each_node_by_type' - 'for_each_node_mask' + - 'for_each_node_numadist' - 'for_each_node_state' - 'for_each_node_with_cpus' - 'for_each_node_with_property' @@ -359,6 +373,8 @@ ForEachMacros: - 'for_each_of_allnodes' - 'for_each_of_allnodes_from' - 'for_each_of_cpu_node' + - 'for_each_of_graph_port' + - 'for_each_of_graph_port_endpoint' - 'for_each_of_pci_range' - 'for_each_old_connector_in_state' - 'for_each_old_crtc_in_state' @@ -372,9 +388,11 @@ ForEachMacros: - 'for_each_oldnew_plane_in_state_reverse' - 'for_each_oldnew_private_obj_in_state' - 'for_each_online_cpu' + - 'for_each_online_cpu_wrap' - 'for_each_online_node' - 'for_each_online_pgdat' - 'for_each_or_bit' + - 'for_each_page_ext' - 'for_each_path' - 'for_each_pci_bridge' - 'for_each_pci_dev' @@ -382,8 +400,10 @@ ForEachMacros: - 'for_each_physmem_range' - 'for_each_populated_zone' - 'for_each_possible_cpu' + - 'for_each_possible_cpu_wrap' - 'for_each_present_blessed_reg' - 'for_each_present_cpu' + - 'for_each_present_section_nr' - 'for_each_prime_number' - 'for_each_prime_number_from' - 'for_each_probe_cache_entry' @@ -396,6 +416,7 @@ ForEachMacros: - 'for_each_prop_dlc_cpus' - 'for_each_prop_dlc_platforms' - 'for_each_property_of_node' + - 'for_each_rdt_resource' - 'for_each_reg' - 'for_each_reg_filtered' - 'for_each_reloc' @@ -434,10 +455,10 @@ ForEachMacros: - 'for_each_subelement_id' - 'for_each_sublist' - 'for_each_subsystem' + - 'for_each_suite' - 'for_each_supported_activate_fn' - 'for_each_supported_inject_fn' - 'for_each_sym' - - 'for_each_test' - 'for_each_thread' - 'for_each_token' - 'for_each_unicast_dest_pgid' @@ -499,8 +520,10 @@ ForEachMacros: - 'idr_for_each_entry_continue' - 'idr_for_each_entry_continue_ul' - 'idr_for_each_entry_ul' + - 'iio_for_each_active_channel' - 'in_dev_for_each_ifa_rcu' - 'in_dev_for_each_ifa_rtnl' + - 'in_dev_for_each_ifa_rtnl_net' - 'inet_bind_bucket_for_each' - 'interval_tree_for_each_span' - 'intlist__for_each_entry' @@ -542,7 +565,6 @@ ForEachMacros: - 'list_for_each_prev' - 'list_for_each_prev_safe' - 'list_for_each_rcu' - - 'list_for_each_reverse' - 'list_for_each_safe' - 'llist_for_each' - 'llist_for_each_entry' @@ -552,6 +574,7 @@ ForEachMacros: - 'map__for_each_symbol' - 'map__for_each_symbol_by_name' - 'mas_for_each' + - 'mas_for_each_rev' - 'mci_for_each_dimm' - 'media_device_for_each_entity' - 'media_device_for_each_intf' @@ -561,10 +584,15 @@ ForEachMacros: - 'media_pipeline_for_each_entity' - 'media_pipeline_for_each_pad' - 'mlx5_lag_for_each_peer_mdev' + - 'mptcp_for_each_subflow' - 'msi_domain_for_each_desc' - 'msi_for_each_desc' - 'mt_for_each' + - 'nanddev_io_for_each_block' - 'nanddev_io_for_each_page' + - 'neigh_for_each_in_bucket' + - 'neigh_for_each_in_bucket_rcu' + - 'neigh_for_each_in_bucket_safe' - 'netdev_for_each_lower_dev' - 'netdev_for_each_lower_private' - 'netdev_for_each_lower_private_rcu' @@ -604,11 +632,11 @@ ForEachMacros: - 'perf_evlist__for_each_entry_safe' - 'perf_evlist__for_each_evsel' - 'perf_evlist__for_each_mmap' + - 'perf_evsel_for_each_per_thread_period_safe' - 'perf_hpp_list__for_each_format' - 'perf_hpp_list__for_each_format_safe' - 'perf_hpp_list__for_each_sort_list' - 'perf_hpp_list__for_each_sort_list_safe' - - 'perf_tool_event__for_each_event' - 'plist_for_each' - 'plist_for_each_continue' - 'plist_for_each_entry' @@ -627,7 +655,6 @@ ForEachMacros: - 'rdma_for_each_block' - 'rdma_for_each_port' - 'rdma_umem_for_each_dma_block' - - 'resort_rb__for_each_entry' - 'resource_list_for_each_entry' - 'resource_list_for_each_entry_safe' - 'rhl_for_each_entry_rcu' @@ -658,6 +685,7 @@ ForEachMacros: - 'shost_for_each_device' - 'sk_for_each' - 'sk_for_each_bound' + - 'sk_for_each_bound_safe' - 'sk_for_each_entry_offset_rcu' - 'sk_for_each_from' - 'sk_for_each_rcu' @@ -680,7 +708,11 @@ ForEachMacros: - 'tb_property_for_each' - 'tcf_act_for_each_action' - 'tcf_exts_for_each_action' + - 'test_suite__for_each_test_case' + - 'tool_pmu__for_each_event' + - 'ttm_bo_lru_for_each_reserved_guarded' - 'ttm_resource_manager_for_each_res' + - 'udp_lrpa_for_each_entry_rcu' - 'udp_portaddr_for_each_entry' - 'udp_portaddr_for_each_entry_rcu' - 'usb_hub_for_each_child' @@ -691,6 +723,7 @@ ForEachMacros: - 'v4l2_m2m_for_each_src_buf_safe' - 'virtio_device_for_each_vq' - 'while_for_each_ftrace_op' + - 'workloads__for_each' - 'xa_for_each' - 'xa_for_each_marked' - 'xa_for_each_range' From d62922ba3cfc01dc42e853f90b93c525751e9383 Mon Sep 17 00:00:00 2001 From: Gabriel Shahrouzi Date: Sat, 12 Apr 2025 14:39:33 -0400 Subject: [PATCH 0586/2924] bcachefs: Prevent granting write refs when filesystem is read-only Fix a shutdown WARNING in bch2_dev_free caused by active write I/O references (ca->io_ref[WRITE]) on a device being freed. The problem occurs when: - The filesystem is marked read-only (BCH_FS_rw clear in c->flags). - A subsequent operation (e.g., error handling for device removal) incorrectly tries to grant write references back to a device. - During final shutdown, the read-only flag causes the system to skip stopping write I/O references (bch2_dev_io_ref_stop(ca, WRITE)). - The leftover active write reference triggers the WARN_ON in bch2_dev_free. Prevent this by checking if the filesystem is read-only before attempting to grant write references to a device in the problematic code path. Ensure consistency between the filesystem state flag and the device I/O reference state during shutdown. Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index b79e80a435e09a..788e870bfef6a9 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1757,7 +1757,8 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) up_write(&c->state_lock); return 0; err: - if (ca->mi.state == BCH_MEMBER_STATE_rw && + if (test_bit(BCH_FS_rw, &c->flags) && + ca->mi.state == BCH_MEMBER_STATE_rw && !percpu_ref_is_zero(&ca->io_ref[READ])) __bch2_dev_read_write(c, ca); up_write(&c->state_lock); From 806776ad9c20c6589f957212ef017e75760a08cd Mon Sep 17 00:00:00 2001 From: Alan Huang Date: Sat, 12 Apr 2025 18:20:49 +0800 Subject: [PATCH 0587/2924] bcachefs: Add missing error handling Reported-by: syzbot+d10151bf01574a09a915@syzkaller.appspotmail.com Signed-off-by: Alan Huang Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index d2b07f602da95a..606d684e6f23fc 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -1125,7 +1125,10 @@ int bch2_fs_initialize(struct bch_fs *c) * journal_res_get() will crash if called before this has * set up the journal.pin FIFO and journal.cur pointer: */ - bch2_fs_journal_start(&c->journal, 1); + ret = bch2_fs_journal_start(&c->journal, 1); + if (ret) + goto err; + set_bit(BCH_FS_accounting_replay_done, &c->flags); bch2_journal_set_replay_done(&c->journal); From 7dfd42a07acfeaac4e36620927ea227b1e8da3e9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 9 Apr 2025 21:04:17 -0400 Subject: [PATCH 0588/2924] bcachefs: Don't print data read retry success on non-errors We may end up in the data read retry path when reading cached data and racing with invalidation, or on checksum error when we were reading into a userspace buffer that might have been modified while the read was in flight. These aren't real errors, so we shouldn't print the 'retry success' message. Signed-off-by: Kent Overstreet --- fs/bcachefs/io_read.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index fd627c8d105364..de8ccd593ec73c 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -487,6 +487,8 @@ static void bch2_rbio_retry(struct work_struct *work) .inum = rbio->read_pos.inode, }; struct bch_io_failures failed = { .nr = 0 }; + int orig_error = rbio->ret; + struct btree_trans *trans = bch2_trans_get(c); trace_io_read_retry(&rbio->bio); @@ -519,7 +521,9 @@ static void bch2_rbio_retry(struct work_struct *work) if (ret) { rbio->ret = ret; rbio->bio.bi_status = BLK_STS_IOERR; - } else { + } else if (orig_error != -BCH_ERR_data_read_retry_csum_err_maybe_userspace && + orig_error != -BCH_ERR_data_read_ptr_stale_race && + !failed.nr) { struct printbuf buf = PRINTBUF; lockrestart_do(trans, From 345731a389fa145c76bedebec6e4f3db4cb29486 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 13 Apr 2025 14:53:34 -0400 Subject: [PATCH 0589/2924] bcachefs: fix bch2_dev_usage_full_read_fast() One reference to bch_dev_usage wasn't updated, which meant we weren't reading the full bch_dev_usage_full - oops. Fixes: 955ba7b5ea03 ("bcachefs: bch_dev_usage_full") Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 3 ++- fs/bcachefs/buckets.h | 5 ----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index fea61e60a9eea8..4ef261e8db4f5c 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -37,7 +37,8 @@ void bch2_dev_usage_read_fast(struct bch_dev *ca, struct bch_dev_usage *usage) void bch2_dev_usage_full_read_fast(struct bch_dev *ca, struct bch_dev_usage_full *usage) { memset(usage, 0, sizeof(*usage)); - acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage, dev_usage_u64s()); + acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage, + sizeof(struct bch_dev_usage_full) / sizeof(u64)); } static u64 reserve_factor(u64 r) diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 1c38b165f48b09..8d75b27a141885 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -242,11 +242,6 @@ static inline u64 dev_buckets_available(struct bch_dev *ca, /* Filesystem usage: */ -static inline unsigned dev_usage_u64s(void) -{ - return sizeof(struct bch_dev_usage) / sizeof(u64); -} - struct bch_fs_usage_short bch2_fs_usage_read_short(struct bch_fs *); From 8ffd015db85fea3e15a77027fda6c02ced4d2444 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 13 Apr 2025 11:54:49 -0700 Subject: [PATCH 0590/2924] Linux 6.15-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f4241855650711..c91afd55099ea6 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Baby Opossum Posse # *DOCUMENTATION* From 1aa495a6572f8641da4ec4cd32210deca61bed64 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 11 Apr 2025 13:36:06 +0100 Subject: [PATCH 0591/2924] kunit: configs: Add some Cirrus Logic modules to all_tests Add CONFIG_I2C and CONFIG_SND_SOC_CS35L56_I2C to all_tests.config so that Cirrus Logic modules with KUnit tests will be built. The CS35L56 driver doesn't currently have any KUnit tests itself, but it enables two other libraries that have KUnit tests: cs_dsp and cs-amp-lib. Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20250411123608.1676462-2-rf@opensource.cirrus.com Reviewed-by: David Gow Signed-off-by: Mark Brown --- tools/testing/kunit/configs/all_tests.config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config index cdd9782f9646ae..43d3c31ab53f7b 100644 --- a/tools/testing/kunit/configs/all_tests.config +++ b/tools/testing/kunit/configs/all_tests.config @@ -20,6 +20,7 @@ CONFIG_VFAT_FS=y CONFIG_PCI=y CONFIG_USB4=y +CONFIG_I2C=y CONFIG_NET=y CONFIG_MCTP=y @@ -51,3 +52,4 @@ CONFIG_SOUND=y CONFIG_SND=y CONFIG_SND_SOC=y CONFIG_SND_SOC_TOPOLOGY_BUILD=y +CONFIG_SND_SOC_CS35L56_I2C=y From 96014d91cffb335d3b396771524ff2aba3549865 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 11 Apr 2025 13:36:07 +0100 Subject: [PATCH 0592/2924] ASoC: cs-amp-lib-test: Don't select SND_SOC_CS_AMP_LIB Depend on SND_SOC_CS_AMP_LIB instead of selecting it. KUNIT_ALL_TESTS should only build tests for components that are already being built, it should not cause other stuff to be added to the build. Fixes: 177862317a98 ("ASoC: cs-amp-lib: Add KUnit test for calibration helpers") Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20250411123608.1676462-3-rf@opensource.cirrus.com Reviewed-by: David Gow Signed-off-by: Mark Brown --- sound/soc/codecs/Kconfig | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 40bb7a1d44bcfa..20f99cbee29b3e 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -776,10 +776,9 @@ config SND_SOC_CS_AMP_LIB tristate config SND_SOC_CS_AMP_LIB_TEST - tristate "KUnit test for Cirrus Logic cs-amp-lib" - depends on KUNIT + tristate "KUnit test for Cirrus Logic cs-amp-lib" if !KUNIT_ALL_TESTS + depends on SND_SOC_CS_AMP_LIB && KUNIT default KUNIT_ALL_TESTS - select SND_SOC_CS_AMP_LIB help This builds KUnit tests for the Cirrus Logic common amplifier library. From a0b887f6eb9a0d1be3c57d00b0f3ba8408d3018a Mon Sep 17 00:00:00 2001 From: Nico Pache Date: Fri, 11 Apr 2025 13:36:08 +0100 Subject: [PATCH 0593/2924] firmware: cs_dsp: tests: Depend on FW_CS_DSP rather then enabling it FW_CS_DSP gets enabled if KUNIT is enabled. The test should rather depend on if the feature is enabled. Fix this by moving FW_CS_DSP to the depends on clause. Fixes: dd0b6b1f29b9 ("firmware: cs_dsp: Add KUnit testing of bin file download") Signed-off-by: Nico Pache Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20250411123608.1676462-4-rf@opensource.cirrus.com Reviewed-by: David Gow Signed-off-by: Mark Brown --- drivers/firmware/cirrus/Kconfig | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/firmware/cirrus/Kconfig b/drivers/firmware/cirrus/Kconfig index 0a883091259a2c..e3c2e38b746df9 100644 --- a/drivers/firmware/cirrus/Kconfig +++ b/drivers/firmware/cirrus/Kconfig @@ -6,14 +6,11 @@ config FW_CS_DSP config FW_CS_DSP_KUNIT_TEST_UTILS tristate - depends on KUNIT && REGMAP - select FW_CS_DSP config FW_CS_DSP_KUNIT_TEST tristate "KUnit tests for Cirrus Logic cs_dsp" if !KUNIT_ALL_TESTS - depends on KUNIT && REGMAP + depends on KUNIT && REGMAP && FW_CS_DSP default KUNIT_ALL_TESTS - select FW_CS_DSP select FW_CS_DSP_KUNIT_TEST_UTILS help This builds KUnit tests for cs_dsp. From a9a69c3b38c89d7992fb53db4abb19104b531d32 Mon Sep 17 00:00:00 2001 From: Chenyuan Yang Date: Sun, 6 Apr 2025 16:08:54 -0500 Subject: [PATCH 0594/2924] ASoC: imx-card: Adjust over allocation of memory in imx_card_parse_of() Incorrect types are used as sizeof() arguments in devm_kcalloc(). It should be sizeof(dai_link_data) for link_data instead of sizeof(snd_soc_dai_link). This is found by our static analysis tool. Signed-off-by: Chenyuan Yang Link: https://patch.msgid.link/20250406210854.149316-1-chenyuan0y@gmail.com Signed-off-by: Mark Brown --- sound/soc/fsl/imx-card.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/fsl/imx-card.c b/sound/soc/fsl/imx-card.c index 3686d468506b35..45e000f61eccac 100644 --- a/sound/soc/fsl/imx-card.c +++ b/sound/soc/fsl/imx-card.c @@ -544,7 +544,7 @@ static int imx_card_parse_of(struct imx_card_data *data) if (!card->dai_link) return -ENOMEM; - data->link_data = devm_kcalloc(dev, num_links, sizeof(*link), GFP_KERNEL); + data->link_data = devm_kcalloc(dev, num_links, sizeof(*link_data), GFP_KERNEL); if (!data->link_data) return -ENOMEM; From 9aff2e8df240e84a36f2607f98a0a9924a24e65d Mon Sep 17 00:00:00 2001 From: Sheetal Date: Fri, 4 Apr 2025 10:59:53 +0000 Subject: [PATCH 0595/2924] ASoC: soc-pcm: Fix hw_params() and DAPM widget sequence Issue: When multiple audio streams share a common BE DAI, the BE DAI widget can be powered up before its hardware parameters are configured. This incorrect sequence leads to intermittent pcm_write errors. For example, the below Tegra use-case throws an error: aplay(2 streams) -> AMX(mux) -> ADX(demux) -> arecord(2 streams), here, 'AMX TX' and 'ADX RX' are common BE DAIs. For above usecase when failure happens below sequence is observed: aplay(1) FE open() - BE DAI callbacks added to the list - BE DAI state = SND_SOC_DPCM_STATE_OPEN aplay(2) FE open() - BE DAI callbacks are not added to the list as the state is already SND_SOC_DPCM_STATE_OPEN during aplay(1) FE open(). aplay(2) FE hw_params() - BE DAI hw_params() callback ignored aplay(2) FE prepare() - Widget is powered ON without BE DAI hw_params() call aplay(1) FE hw_params() - BE DAI hw_params() is now called Fix: Add BE DAIs in the list if its state is either SND_SOC_DPCM_STATE_OPEN or SND_SOC_DPCM_STATE_HW_PARAMS as well. It ensures the widget is powered ON after BE DAI hw_params() callback. Fixes: 0c25db3f7621 ("ASoC: soc-pcm: Don't reconnect an already active BE") Signed-off-by: Sheetal Link: https://patch.msgid.link/20250404105953.2784819-1-sheetal@nvidia.com Signed-off-by: Mark Brown --- sound/soc/soc-pcm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 4308a6cbb2e615..43835197d1feea 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1584,10 +1584,13 @@ int dpcm_add_paths(struct snd_soc_pcm_runtime *fe, int stream, /* * Filter for systems with 'component_chaining' enabled. * This helps to avoid unnecessary re-configuration of an - * already active BE on such systems. + * already active BE on such systems and ensures the BE DAI + * widget is powered ON after hw_params() BE DAI callback. */ if (fe->card->component_chaining && (be->dpcm[stream].state != SND_SOC_DPCM_STATE_NEW) && + (be->dpcm[stream].state != SND_SOC_DPCM_STATE_OPEN) && + (be->dpcm[stream].state != SND_SOC_DPCM_STATE_HW_PARAMS) && (be->dpcm[stream].state != SND_SOC_DPCM_STATE_CLOSE)) continue; From 424eafe65647a8d6c690284536e711977153195a Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Mon, 7 Apr 2025 17:33:34 -0300 Subject: [PATCH 0596/2924] i2c: cros-ec-tunnel: defer probe if parent EC is not present When i2c-cros-ec-tunnel and the EC driver are built-in, the EC parent device will not be found, leading to NULL pointer dereference. That can also be reproduced by unbinding the controller driver and then loading i2c-cros-ec-tunnel module (or binding the device). [ 271.991245] BUG: kernel NULL pointer dereference, address: 0000000000000058 [ 271.998215] #PF: supervisor read access in kernel mode [ 272.003351] #PF: error_code(0x0000) - not-present page [ 272.008485] PGD 0 P4D 0 [ 272.011022] Oops: Oops: 0000 [#1] SMP NOPTI [ 272.015207] CPU: 0 UID: 0 PID: 3859 Comm: insmod Tainted: G S 6.15.0-rc1-00004-g44722359ed83 #30 PREEMPT(full) 3c7fb39a552e7d949de2ad921a7d6588d3a4fdc5 [ 272.030312] Tainted: [S]=CPU_OUT_OF_SPEC [ 272.034233] Hardware name: HP Berknip/Berknip, BIOS Google_Berknip.13434.356.0 05/17/2021 [ 272.042400] RIP: 0010:ec_i2c_probe+0x2b/0x1c0 [i2c_cros_ec_tunnel] [ 272.048577] Code: 1f 44 00 00 41 57 41 56 41 55 41 54 53 48 83 ec 10 65 48 8b 05 06 a0 6c e7 48 89 44 24 08 4c 8d 7f 10 48 8b 47 50 4c 8b 60 78 <49> 83 7c 24 58 00 0f 84 2f 01 00 00 48 89 fb be 30 06 00 00 4c 9 [ 272.067317] RSP: 0018:ffffa32082a03940 EFLAGS: 00010282 [ 272.072541] RAX: ffff969580b6a810 RBX: ffff969580b68c10 RCX: 0000000000000000 [ 272.079672] RDX: 0000000000000000 RSI: 0000000000000282 RDI: ffff969580b68c00 [ 272.086804] RBP: 00000000fffffdfb R08: 0000000000000000 R09: 0000000000000000 [ 272.093936] R10: 0000000000000000 R11: ffffffffc0600000 R12: 0000000000000000 [ 272.101067] R13: ffffffffa666fbb8 R14: ffffffffc05b5528 R15: ffff969580b68c10 [ 272.108198] FS: 00007b930906fc40(0000) GS:ffff969603149000(0000) knlGS:0000000000000000 [ 272.116282] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 272.122024] CR2: 0000000000000058 CR3: 000000012631c000 CR4: 00000000003506f0 [ 272.129155] Call Trace: [ 272.131606] [ 272.133709] ? acpi_dev_pm_attach+0xdd/0x110 [ 272.137985] platform_probe+0x69/0xa0 [ 272.141652] really_probe+0x152/0x310 [ 272.145318] __driver_probe_device+0x77/0x110 [ 272.149678] driver_probe_device+0x1e/0x190 [ 272.153864] __driver_attach+0x10b/0x1e0 [ 272.157790] ? driver_attach+0x20/0x20 [ 272.161542] bus_for_each_dev+0x107/0x150 [ 272.165553] bus_add_driver+0x15d/0x270 [ 272.169392] driver_register+0x65/0x110 [ 272.173232] ? cleanup_module+0xa80/0xa80 [i2c_cros_ec_tunnel 3a00532f3f4af4a9eade753f86b0f8dd4e4e5698] [ 272.182617] do_one_initcall+0x110/0x350 [ 272.186543] ? security_kernfs_init_security+0x49/0xd0 [ 272.191682] ? __kernfs_new_node+0x1b9/0x240 [ 272.195954] ? security_kernfs_init_security+0x49/0xd0 [ 272.201093] ? __kernfs_new_node+0x1b9/0x240 [ 272.205365] ? kernfs_link_sibling+0x105/0x130 [ 272.209810] ? kernfs_next_descendant_post+0x1c/0xa0 [ 272.214773] ? kernfs_activate+0x57/0x70 [ 272.218699] ? kernfs_add_one+0x118/0x160 [ 272.222710] ? __kernfs_create_file+0x71/0xa0 [ 272.227069] ? sysfs_add_bin_file_mode_ns+0xd6/0x110 [ 272.232033] ? internal_create_group+0x453/0x4a0 [ 272.236651] ? __vunmap_range_noflush+0x214/0x2d0 [ 272.241355] ? __free_frozen_pages+0x1dc/0x420 [ 272.245799] ? free_vmap_area_noflush+0x10a/0x1c0 [ 272.250505] ? load_module+0x1509/0x16f0 [ 272.254431] do_init_module+0x60/0x230 [ 272.258181] __se_sys_finit_module+0x27a/0x370 [ 272.262627] do_syscall_64+0x6a/0xf0 [ 272.266206] ? do_syscall_64+0x76/0xf0 [ 272.269956] ? irqentry_exit_to_user_mode+0x79/0x90 [ 272.274836] entry_SYSCALL_64_after_hwframe+0x55/0x5d [ 272.279887] RIP: 0033:0x7b9309168d39 [ 272.283466] Code: 5b 41 5c 5d c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d af 40 0c 00 f7 d8 64 89 01 8 [ 272.302210] RSP: 002b:00007fff50f1a288 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 [ 272.309774] RAX: ffffffffffffffda RBX: 000058bf9b50f6d0 RCX: 00007b9309168d39 [ 272.316905] RDX: 0000000000000000 RSI: 000058bf6c103a77 RDI: 0000000000000003 [ 272.324036] RBP: 00007fff50f1a2e0 R08: 00007fff50f19218 R09: 0000000021ec4150 [ 272.331166] R10: 000058bf9b50f7f0 R11: 0000000000000246 R12: 0000000000000000 [ 272.338296] R13: 00000000fffffffe R14: 0000000000000000 R15: 000058bf6c103a77 [ 272.345428] [ 272.347617] Modules linked in: i2c_cros_ec_tunnel(+) [ 272.364585] gsmi: Log Shutdown Reason 0x03 Returning -EPROBE_DEFER will allow the device to be bound once the controller is bound, in the case of built-in drivers. Fixes: 9d230c9e4f4e ("i2c: ChromeOS EC tunnel driver") Signed-off-by: Thadeu Lima de Souza Cascardo Cc: # v3.16+ Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250407-null-ec-parent-v1-1-f7dda62d3110@igalia.com --- drivers/i2c/busses/i2c-cros-ec-tunnel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/i2c/busses/i2c-cros-ec-tunnel.c b/drivers/i2c/busses/i2c-cros-ec-tunnel.c index 43bf90d90eebab..208ce4f9e782cd 100644 --- a/drivers/i2c/busses/i2c-cros-ec-tunnel.c +++ b/drivers/i2c/busses/i2c-cros-ec-tunnel.c @@ -247,6 +247,9 @@ static int ec_i2c_probe(struct platform_device *pdev) u32 remote_bus; int err; + if (!ec) + return dev_err_probe(dev, -EPROBE_DEFER, "couldn't find parent EC device\n"); + if (!ec->cmd_xfer) { dev_err(dev, "Missing sendrecv\n"); return -EINVAL; From cd35b6cb46649750b7dbd0df0e2d767415d8917b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 1 Apr 2025 15:02:21 -0700 Subject: [PATCH 0597/2924] nfs: add missing selections of CONFIG_CRC32 nfs.ko, nfsd.ko, and lockd.ko all use crc32_le(), which is available only when CONFIG_CRC32 is enabled. But the only NFS kconfig option that selected CONFIG_CRC32 was CONFIG_NFS_DEBUG, which is client-specific and did not actually guard the use of crc32_le() even on the client. The code worked around this bug by only actually calling crc32_le() when CONFIG_CRC32 is built-in, instead hard-coding '0' in other cases. This avoided randconfig build errors, and in real kernels the fallback code was unlikely to be reached since CONFIG_CRC32 is 'default y'. But, this really needs to just be done properly, especially now that I'm planning to update CONFIG_CRC32 to not be 'default y'. Therefore, make CONFIG_NFS_FS, CONFIG_NFSD, and CONFIG_LOCKD select CONFIG_CRC32. Then remove the fallback code that becomes unnecessary, as well as the selection of CONFIG_CRC32 from CONFIG_NFS_DEBUG. Fixes: 1264a2f053a3 ("NFS: refactor code for calculating the crc32 hash of a filehandle") Signed-off-by: Eric Biggers Acked-by: Anna Schumaker Signed-off-by: Chuck Lever --- fs/Kconfig | 1 + fs/nfs/Kconfig | 2 +- fs/nfs/internal.h | 7 ------- fs/nfs/nfs4session.h | 4 ---- fs/nfsd/Kconfig | 1 + fs/nfsd/nfsfh.h | 7 ------- include/linux/nfs.h | 7 ------- 7 files changed, 3 insertions(+), 26 deletions(-) diff --git a/fs/Kconfig b/fs/Kconfig index 64d420e3c47580..8fd1011f7d628d 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -368,6 +368,7 @@ config GRACE_PERIOD config LOCKD tristate depends on FILE_LOCKING + select CRC32 select GRACE_PERIOD config LOCKD_V4 diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index d3f76101ad4b91..07932ce9246c17 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -2,6 +2,7 @@ config NFS_FS tristate "NFS client support" depends on INET && FILE_LOCKING && MULTIUSER + select CRC32 select LOCKD select SUNRPC select NFS_COMMON @@ -196,7 +197,6 @@ config NFS_USE_KERNEL_DNS config NFS_DEBUG bool depends on NFS_FS && SUNRPC_DEBUG - select CRC32 default y config NFS_DISABLE_UDP_SUPPORT diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index fae2c7ae4acc28..59bb4d0338f39a 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -899,18 +899,11 @@ u64 nfs_timespec_to_change_attr(const struct timespec64 *ts) return ((u64)ts->tv_sec << 30) + ts->tv_nsec; } -#ifdef CONFIG_CRC32 static inline u32 nfs_stateid_hash(const nfs4_stateid *stateid) { return ~crc32_le(0xFFFFFFFF, &stateid->other[0], NFS4_STATEID_OTHER_SIZE); } -#else -static inline u32 nfs_stateid_hash(nfs4_stateid *stateid) -{ - return 0; -} -#endif static inline bool nfs_error_is_fatal(int err) { diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index 351616c61df541..f9c291e2165cd8 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -148,16 +148,12 @@ static inline void nfs4_copy_sessionid(struct nfs4_sessionid *dst, memcpy(dst->data, src->data, NFS4_MAX_SESSIONID_LEN); } -#ifdef CONFIG_CRC32 /* * nfs_session_id_hash - calculate the crc32 hash for the session id * @session - pointer to session */ #define nfs_session_id_hash(sess_id) \ (~crc32_le(0xFFFFFFFF, &(sess_id)->data[0], sizeof((sess_id)->data))) -#else -#define nfs_session_id_hash(session) (0) -#endif #else /* defined(CONFIG_NFS_V4_1) */ static inline int nfs4_init_session(struct nfs_client *clp) diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 792d3fed1b45fd..731a88f6313ebf 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -4,6 +4,7 @@ config NFSD depends on INET depends on FILE_LOCKING depends on FSNOTIFY + select CRC32 select LOCKD select SUNRPC select EXPORTFS diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 876152a91f122f..5103c2f4d2253a 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -267,7 +267,6 @@ static inline bool fh_fsid_match(const struct knfsd_fh *fh1, return true; } -#ifdef CONFIG_CRC32 /** * knfsd_fh_hash - calculate the crc32 hash for the filehandle * @fh - pointer to filehandle @@ -279,12 +278,6 @@ static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh) { return ~crc32_le(0xFFFFFFFF, fh->fh_raw, fh->fh_size); } -#else -static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh) -{ - return 0; -} -#endif /** * fh_clear_pre_post_attrs - Reset pre/post attributes diff --git a/include/linux/nfs.h b/include/linux/nfs.h index 9ad727ddfedb34..0906a0b40c6aa5 100644 --- a/include/linux/nfs.h +++ b/include/linux/nfs.h @@ -55,7 +55,6 @@ enum nfs3_stable_how { NFS_INVALID_STABLE_HOW = -1 }; -#ifdef CONFIG_CRC32 /** * nfs_fhandle_hash - calculate the crc32 hash for the filehandle * @fh - pointer to filehandle @@ -67,10 +66,4 @@ static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) { return ~crc32_le(0xFFFFFFFF, &fh->data[0], fh->size); } -#else /* CONFIG_CRC32 */ -static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) -{ - return 0; -} -#endif /* CONFIG_CRC32 */ #endif /* _LINUX_NFS_H */ From a1d14d931bf700c1025db8c46d6731aa5cf440f9 Mon Sep 17 00:00:00 2001 From: Li Lingfeng Date: Thu, 10 Apr 2025 09:57:08 +0800 Subject: [PATCH 0598/2924] nfsd: decrease sc_count directly if fail to queue dl_recall A deadlock warning occurred when invoking nfs4_put_stid following a failed dl_recall queue operation: T1 T2 nfs4_laundromat nfs4_get_client_reaplist nfs4_anylock_blockers __break_lease spin_lock // ctx->flc_lock spin_lock // clp->cl_lock nfs4_lockowner_has_blockers locks_owner_has_blockers spin_lock // flctx->flc_lock nfsd_break_deleg_cb nfsd_break_one_deleg nfs4_put_stid refcount_dec_and_lock spin_lock // clp->cl_lock When a file is opened, an nfs4_delegation is allocated with sc_count initialized to 1, and the file_lease holds a reference to the delegation. The file_lease is then associated with the file through kernel_setlease. The disassociation is performed in nfsd4_delegreturn via the following call chain: nfsd4_delegreturn --> destroy_delegation --> destroy_unhashed_deleg --> nfs4_unlock_deleg_lease --> kernel_setlease --> generic_delete_lease The corresponding sc_count reference will be released after this disassociation. Since nfsd_break_one_deleg executes while holding the flc_lock, the disassociation process becomes blocked when attempting to acquire flc_lock in generic_delete_lease. This means: 1) sc_count in nfsd_break_one_deleg will not be decremented to 0; 2) The nfs4_put_stid called by nfsd_break_one_deleg will not attempt to acquire cl_lock; 3) Consequently, no deadlock condition is created. Given that sc_count in nfsd_break_one_deleg remains non-zero, we can safely perform refcount_dec on sc_count directly. This approach effectively avoids triggering deadlock warnings. Fixes: 230ca758453c ("nfsd: put dl_stid if fail to queue dl_recall") Signed-off-by: Li Lingfeng Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2041268b398a4e..59a693f22452b8 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5430,7 +5430,7 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) queued = nfsd4_run_cb(&dp->dl_recall); WARN_ON_ONCE(!queued); if (!queued) - nfs4_put_stid(&dp->dl_stid); + refcount_dec(&dp->dl_stid.sc_count); } /* Called from break_lease() with flc_lock held. */ From 262b73ef442e68e53220b9d6fc5a0d08b557fa42 Mon Sep 17 00:00:00 2001 From: Chunjie Zhu Date: Sat, 12 Apr 2025 21:15:55 -0500 Subject: [PATCH 0599/2924] smb3 client: fix open hardlink on deferred close file error The following Python script results in unexpected behaviour when run on a CIFS filesystem against a Windows Server: # Create file fd = os.open('test', os.O_WRONLY|os.O_CREAT) os.write(fd, b'foo') os.close(fd) # Open and close the file to leave a pending deferred close fd = os.open('test', os.O_RDONLY|os.O_DIRECT) os.close(fd) # Try to open the file via a hard link os.link('test', 'new') newfd = os.open('new', os.O_RDONLY|os.O_DIRECT) The final open returns EINVAL due to the server returning STATUS_INVALID_PARAMETER. The root cause of this is that the client caches lease keys per inode, but the spec requires them to be related to the filename which causes problems when hard links are involved: From MS-SMB2 section 3.3.5.9.11: "The server MUST attempt to locate a Lease by performing a lookup in the LeaseTable.LeaseList using the LeaseKey in the SMB2_CREATE_REQUEST_LEASE_V2 as the lookup key. If a lease is found, Lease.FileDeleteOnClose is FALSE, and Lease.Filename does not match the file name for the incoming request, the request MUST be failed with STATUS_INVALID_PARAMETER" On client side, we first check the context of file open, if it hits above conditions, we first close all opening files which are belong to the same inode, then we do open the hard link file. Cc: stable@vger.kernel.org Signed-off-by: Chunjie Zhu Signed-off-by: Steve French --- fs/smb/client/cifsproto.h | 2 ++ fs/smb/client/file.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index cfcc07905bdf1b..59f6fdfe560ef3 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -163,6 +163,8 @@ extern int cifs_get_writable_path(struct cifs_tcon *tcon, const char *name, extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool); extern int cifs_get_readable_path(struct cifs_tcon *tcon, const char *name, struct cifsFileInfo **ret_file); +extern int cifs_get_hardlink_path(struct cifs_tcon *tcon, struct inode *inode, + struct file *file); extern unsigned int smbCalcSize(void *buf); extern int decode_negTokenInit(unsigned char *security_blob, int length, struct TCP_Server_Info *server); diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 8407fb1086643c..9e8f404b9e56ae 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -1007,6 +1007,11 @@ int cifs_open(struct inode *inode, struct file *file) } else { _cifsFileInfo_put(cfile, true, false); } + } else { + /* hard link on the defeered close file */ + rc = cifs_get_hardlink_path(tcon, inode, file); + if (rc) + cifs_close_deferred_file(CIFS_I(inode)); } if (server->oplocks) @@ -2071,6 +2076,29 @@ cifs_move_llist(struct list_head *source, struct list_head *dest) list_move(li, dest); } +int +cifs_get_hardlink_path(struct cifs_tcon *tcon, struct inode *inode, + struct file *file) +{ + struct cifsFileInfo *open_file = NULL; + struct cifsInodeInfo *cinode = CIFS_I(inode); + int rc = 0; + + spin_lock(&tcon->open_file_lock); + spin_lock(&cinode->open_file_lock); + + list_for_each_entry(open_file, &cinode->openFileList, flist) { + if (file->f_flags == open_file->f_flags) { + rc = -EINVAL; + break; + } + } + + spin_unlock(&cinode->open_file_lock); + spin_unlock(&tcon->open_file_lock); + return rc; +} + void cifs_free_llist(struct list_head *llist) { From c707193a17128fae2802d10cbad7239cc57f0c95 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 2 Apr 2025 13:26:47 -0700 Subject: [PATCH 0600/2924] Revert "smb: client: Fix netns refcount imbalance causing leaks and use-after-free" This reverts commit 4e7f1644f2ac6d01dc584f6301c3b1d5aac4eaef. The commit e9f2517a3e18 ("smb: client: fix TCP timers deadlock after rmmod") is not only a bogus fix for LOCKDEP null-ptr-deref but also introduces a real issue, TCP sockets leak, which will be explained in detail in the next revert. Also, CNA assigned CVE-2024-54680 to it but is rejecting it. [0] Thus, we are reverting the commit and its follow-up commit 4e7f1644f2ac ("smb: client: Fix netns refcount imbalance causing leaks and use-after-free"). Link: https://lore.kernel.org/all/2025040248-tummy-smilingly-4240@gregkh/ #[0] Fixes: 4e7f1644f2ac ("smb: client: Fix netns refcount imbalance causing leaks and use-after-free") Signed-off-by: Kuniyuki Iwashima Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/connect.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 4a0b2d220fe84e..370b79b2eaa669 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -300,7 +300,6 @@ cifs_abort_connection(struct TCP_Server_Info *server) server->ssocket->flags); sock_release(server->ssocket); server->ssocket = NULL; - put_net(cifs_net_ns(server)); } server->sequence_number = 0; server->session_estab = false; @@ -3369,12 +3368,8 @@ generic_ip_connect(struct TCP_Server_Info *server) /* * Grab netns reference for the socket. * - * This reference will be released in several situations: - * - In the failure path before the cifsd thread is started. - * - In the all place where server->socket is released, it is - * also set to NULL. - * - Ultimately in clean_demultiplex_info(), during the final - * teardown. + * It'll be released here, on error, or in clean_demultiplex_info() upon server + * teardown. */ get_net(net); @@ -3390,8 +3385,10 @@ generic_ip_connect(struct TCP_Server_Info *server) } rc = bind_socket(server); - if (rc < 0) + if (rc < 0) { + put_net(cifs_net_ns(server)); return rc; + } /* * Eventually check for other socket options to change from @@ -3446,6 +3443,9 @@ generic_ip_connect(struct TCP_Server_Info *server) (server->rfc1001_sessinit == -1 && sport == htons(RFC1001_PORT))) rc = ip_rfc1001_connect(server); + if (rc < 0) + put_net(cifs_net_ns(server)); + return rc; } From 95d2b9f693ff2a1180a23d7d59acc0c4e72f4c41 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 2 Apr 2025 13:26:48 -0700 Subject: [PATCH 0601/2924] Revert "smb: client: fix TCP timers deadlock after rmmod" This reverts commit e9f2517a3e18a54a3943c098d2226b245d488801. Commit e9f2517a3e18 ("smb: client: fix TCP timers deadlock after rmmod") is intended to fix a null-ptr-deref in LOCKDEP, which is mentioned as CVE-2024-54680, but is actually did not fix anything; The issue can be reproduced on top of it. [0] Also, it reverted the change by commit ef7134c7fc48 ("smb: client: Fix use-after-free of network namespace.") and introduced a real issue by reviving the kernel TCP socket. When a reconnect happens for a CIFS connection, the socket state transitions to FIN_WAIT_1. Then, inet_csk_clear_xmit_timers_sync() in tcp_close() stops all timers for the socket. If an incoming FIN packet is lost, the socket will stay at FIN_WAIT_1 forever, and such sockets could be leaked up to net.ipv4.tcp_max_orphans. Usually, FIN can be retransmitted by the peer, but if the peer aborts the connection, the issue comes into reality. I warned about this privately by pointing out the exact report [1], but the bogus fix was finally merged. So, we should not stop the timers to finally kill the connection on our side in that case, meaning we must not use a kernel socket for TCP whose sk->sk_net_refcnt is 0. The kernel socket does not have a reference to its netns to make it possible to tear down netns without cleaning up every resource in it. For example, tunnel devices use a UDP socket internally, but we can destroy netns without removing such devices and let it complete during exit. Otherwise, netns would be leaked when the last application died. However, this is problematic for TCP sockets because TCP has timers to close the connection gracefully even after the socket is close()d. The lifetime of the socket and its netns is different from the lifetime of the underlying connection. If the socket user does not maintain the netns lifetime, the timer could be fired after the socket is close()d and its netns is freed up, resulting in use-after-free. Actually, we have seen so many similar issues and converted such sockets to have a reference to netns. That's why I converted the CIFS client socket to have a reference to netns (sk->sk_net_refcnt == 1), which is somehow mentioned as out-of-scope of CIFS and technically wrong in e9f2517a3e18, but **is in-scope and right fix**. Regarding the LOCKDEP issue, we can prevent the module unload by bumping the module refcount when switching the LOCKDDEP key in sock_lock_init_class_and_name(). [2] For a while, let's revert the bogus fix. Note that now we can use sk_net_refcnt_upgrade() for the socket conversion, but I'll do so later separately to make backport easy. Link: https://lore.kernel.org/all/20250402020807.28583-1-kuniyu@amazon.com/ #[0] Link: https://lore.kernel.org/netdev/c08bd5378da647a2a4c16698125d180a@huawei.com/ #[1] Link: https://lore.kernel.org/lkml/20250402005841.19846-1-kuniyu@amazon.com/ #[2] Fixes: e9f2517a3e18 ("smb: client: fix TCP timers deadlock after rmmod") Signed-off-by: Kuniyuki Iwashima Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/connect.c | 36 ++++++++++-------------------------- 1 file changed, 10 insertions(+), 26 deletions(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 370b79b2eaa669..df976ce6aed92f 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -1073,13 +1073,9 @@ clean_demultiplex_info(struct TCP_Server_Info *server) msleep(125); if (cifs_rdma_enabled(server)) smbd_destroy(server); - if (server->ssocket) { sock_release(server->ssocket); server->ssocket = NULL; - - /* Release netns reference for the socket. */ - put_net(cifs_net_ns(server)); } if (!list_empty(&server->pending_mid_q)) { @@ -1127,7 +1123,6 @@ clean_demultiplex_info(struct TCP_Server_Info *server) */ } - /* Release netns reference for this server. */ put_net(cifs_net_ns(server)); kfree(server->leaf_fullpath); kfree(server->hostname); @@ -1773,8 +1768,6 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx, tcp_ses->ops = ctx->ops; tcp_ses->vals = ctx->vals; - - /* Grab netns reference for this server. */ cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns)); tcp_ses->sign = ctx->sign; @@ -1902,7 +1895,6 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx, out_err_crypto_release: cifs_crypto_secmech_release(tcp_ses); - /* Release netns reference for this server. */ put_net(cifs_net_ns(tcp_ses)); out_err: @@ -1911,10 +1903,8 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx, cifs_put_tcp_session(tcp_ses->primary_server, false); kfree(tcp_ses->hostname); kfree(tcp_ses->leaf_fullpath); - if (tcp_ses->ssocket) { + if (tcp_ses->ssocket) sock_release(tcp_ses->ssocket); - put_net(cifs_net_ns(tcp_ses)); - } kfree(tcp_ses); } return ERR_PTR(rc); @@ -3358,20 +3348,20 @@ generic_ip_connect(struct TCP_Server_Info *server) socket = server->ssocket; } else { struct net *net = cifs_net_ns(server); + struct sock *sk; - rc = sock_create_kern(net, sfamily, SOCK_STREAM, IPPROTO_TCP, &server->ssocket); + rc = __sock_create(net, sfamily, SOCK_STREAM, + IPPROTO_TCP, &server->ssocket, 1); if (rc < 0) { cifs_server_dbg(VFS, "Error %d creating socket\n", rc); return rc; } - /* - * Grab netns reference for the socket. - * - * It'll be released here, on error, or in clean_demultiplex_info() upon server - * teardown. - */ - get_net(net); + sk = server->ssocket->sk; + __netns_tracker_free(net, &sk->ns_tracker, false); + sk->sk_net_refcnt = 1; + get_net_track(net, &sk->ns_tracker, GFP_KERNEL); + sock_inuse_add(net, 1); /* BB other socket options to set KEEPALIVE, NODELAY? */ cifs_dbg(FYI, "Socket created\n"); @@ -3385,10 +3375,8 @@ generic_ip_connect(struct TCP_Server_Info *server) } rc = bind_socket(server); - if (rc < 0) { - put_net(cifs_net_ns(server)); + if (rc < 0) return rc; - } /* * Eventually check for other socket options to change from @@ -3425,7 +3413,6 @@ generic_ip_connect(struct TCP_Server_Info *server) if (rc < 0) { cifs_dbg(FYI, "Error %d connecting to server\n", rc); trace_smb3_connect_err(server->hostname, server->conn_id, &server->dstaddr, rc); - put_net(cifs_net_ns(server)); sock_release(socket); server->ssocket = NULL; return rc; @@ -3443,9 +3430,6 @@ generic_ip_connect(struct TCP_Server_Info *server) (server->rfc1001_sessinit == -1 && sport == htons(RFC1001_PORT))) rc = ip_rfc1001_connect(server); - if (rc < 0) - put_net(cifs_net_ns(server)); - return rc; } From 8692c7db9a66c5efe7956afc48d21bc994289d95 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 13 Apr 2025 16:28:41 -0400 Subject: [PATCH 0602/2924] bcachefs: btree_root_unreadable_and_scan_found_nothing now AUTOFIX This will likely mean that the btree had only one node - there was nothing or almost nothing in it, and we should reconstruct and continue. Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-errors_format.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 5d43e3504386ad..512c56ee5d9436 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -290,7 +290,7 @@ enum bch_fsck_flags { x(btree_node_bkey_bad_u64s, 260, 0) \ x(btree_node_topology_empty_interior_node, 261, 0) \ x(btree_ptr_v2_min_key_bad, 262, 0) \ - x(btree_root_unreadable_and_scan_found_nothing, 263, 0) \ + x(btree_root_unreadable_and_scan_found_nothing, 263, FSCK_AUTOFIX) \ x(snapshot_node_missing, 264, 0) \ x(dup_backpointer_to_bad_csum_extent, 265, 0) \ x(btree_bitmap_not_marked, 266, FSCK_AUTOFIX) \ From dd303e021996a0e43963d852af8a3277e6f5ed88 Mon Sep 17 00:00:00 2001 From: Ivaylo Ivanov Date: Sun, 13 Apr 2025 19:37:55 +0300 Subject: [PATCH 0603/2924] soc: samsung: usi: prevent wrong bits inversion during unconfiguring Instead of setting bit 1 (USI_OPTION_CLKSTOP_ON) during USI unconfiguring, all the other bits in the USI_OPTION register get inverted, which should not happen as that means the clock will keep getting provided to the USI IP. Remove the unnecessary tilde. Fixes: 11e77776b58a ("soc: samsung: usi: add a routine for unconfiguring the ip") Signed-off-by: Ivaylo Ivanov Link: https://lore.kernel.org/r/20250413163755.788907-1-ivo.ivanov.ivanov1@gmail.com Signed-off-by: Krzysztof Kozlowski --- drivers/soc/samsung/exynos-usi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/samsung/exynos-usi.c b/drivers/soc/samsung/exynos-usi.c index c5661ac19f7b3a..5f7bdf3bab05cc 100644 --- a/drivers/soc/samsung/exynos-usi.c +++ b/drivers/soc/samsung/exynos-usi.c @@ -233,7 +233,7 @@ static void exynos_usi_unconfigure(void *data) /* Make sure that we've stopped providing the clock to USI IP */ val = readl(usi->regs + USI_OPTION); val &= ~USI_OPTION_CLKREQ_ON; - val |= ~USI_OPTION_CLKSTOP_ON; + val |= USI_OPTION_CLKSTOP_ON; writel(val, usi->regs + USI_OPTION); /* Set USI block state to reset */ From 1013f5636fd808569c1f4c40a58a4efc70713a28 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 14 Apr 2025 00:07:34 +0200 Subject: [PATCH 0604/2924] genksyms: Handle typeof_unqual keyword and __seg_{fs,gs} qualifiers Handle typeof_unqual, __typeof_unqual and __typeof_unqual__ keywords using TYPEOF_KEYW token in the same way as typeof keyword. Also ignore x86 __seg_fs and __seg_gs named address space qualifiers using X86_SEG_KEYW token in the same way as const, volatile or restrict qualifiers. Fixes: ac053946f5c4 ("compiler.h: introduce TYPEOF_UNQUAL() macro") Closes: https://lore.kernel.org/lkml/81a25a60-de78-43fb-b56a-131151e1c035@molgen.mpg.de/ Reported-by: Paul Menzel Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Andrew Morton Cc: Linus Torvalds Cc: Masahiro Yamada Link: https://lore.kernel.org/r/20250413220749.270704-1-ubizjak@gmail.com --- scripts/genksyms/keywords.c | 7 +++++++ scripts/genksyms/parse.y | 5 ++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/scripts/genksyms/keywords.c b/scripts/genksyms/keywords.c index b85e0979a00cec..ee1499d2706176 100644 --- a/scripts/genksyms/keywords.c +++ b/scripts/genksyms/keywords.c @@ -17,6 +17,8 @@ static struct resword { { "__signed__", SIGNED_KEYW }, { "__typeof", TYPEOF_KEYW }, { "__typeof__", TYPEOF_KEYW }, + { "__typeof_unqual", TYPEOF_KEYW }, + { "__typeof_unqual__", TYPEOF_KEYW }, { "__volatile", VOLATILE_KEYW }, { "__volatile__", VOLATILE_KEYW }, { "__builtin_va_list", VA_LIST_KEYW }, @@ -40,6 +42,10 @@ static struct resword { // KAO. }, // { "attribute", ATTRIBUTE_KEYW }, + // X86 named address space qualifiers + { "__seg_gs", X86_SEG_KEYW }, + { "__seg_fs", X86_SEG_KEYW }, + { "auto", AUTO_KEYW }, { "char", CHAR_KEYW }, { "const", CONST_KEYW }, @@ -57,6 +63,7 @@ static struct resword { { "struct", STRUCT_KEYW }, { "typedef", TYPEDEF_KEYW }, { "typeof", TYPEOF_KEYW }, + { "typeof_unqual", TYPEOF_KEYW }, { "union", UNION_KEYW }, { "unsigned", UNSIGNED_KEYW }, { "void", VOID_KEYW }, diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y index ee600a804fa103..efdcf07c4eb6da 100644 --- a/scripts/genksyms/parse.y +++ b/scripts/genksyms/parse.y @@ -91,6 +91,8 @@ static void record_compound(struct string_list **keyw, %token TYPEOF_KEYW %token VA_LIST_KEYW +%token X86_SEG_KEYW + %token EXPORT_SYMBOL_KEYW %token ASM_PHRASE @@ -292,7 +294,8 @@ type_qualifier_seq: ; type_qualifier: - CONST_KEYW | VOLATILE_KEYW + X86_SEG_KEYW + | CONST_KEYW | VOLATILE_KEYW | RESTRICT_KEYW { /* restrict has no effect in prototypes so ignore it */ remove_node($1); From b73e05281cd9e37b5525641ca6f4544867372533 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Mar 2025 08:52:13 +0100 Subject: [PATCH 0605/2924] xfs: remove the leftover xfs_{set,clear}_li_failed infrastructure Marking a log item as failed kept a buffer reference around for resubmission of inode and dquote items. For inode items commit 298f7bec503f3 ("xfs: pin inode backing buffer to the inode log item") started pinning the inode item buffers unconditionally and removed the need for this. Later commit acc8f8628c37 ("xfs: attach dquot buffer to dquot log item buffer") did the same for dquot items but didn't fully clean up the xfs_clear_li_failed side for them. Stop adding the extra pin for dquot items and remove the helpers. This happens to fix a call to xfs_buf_free with the AIL lock held, which would be incorrect for the unlikely case freeing the buffer ends up calling vfree. Reported-by: Dan Carpenter Signed-off-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_dquot.c | 3 +-- fs/xfs/xfs_inode_item.c | 6 ------ fs/xfs/xfs_trans_ail.c | 5 ++--- fs/xfs/xfs_trans_priv.h | 28 ---------------------------- 4 files changed, 3 insertions(+), 39 deletions(-) diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index edbc521870a10a..b4e32f0860b7e6 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -1186,9 +1186,8 @@ xfs_qm_dqflush_done( if (test_bit(XFS_LI_IN_AIL, &lip->li_flags) && (lip->li_lsn == qlip->qli_flush_lsn || test_bit(XFS_LI_FAILED, &lip->li_flags))) { - spin_lock(&ailp->ail_lock); - xfs_clear_li_failed(lip); + clear_bit(XFS_LI_FAILED, &lip->li_flags); if (lip->li_lsn == qlip->qli_flush_lsn) { /* xfs_ail_update_finish() drops the AIL lock */ tail_lsn = xfs_ail_delete_one(ailp, lip); diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 40fc1bf900af90..c6cb0b6b9e4605 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -1089,13 +1089,7 @@ xfs_iflush_abort( * state. Whilst the inode is in the AIL, it should have a valid buffer * pointer for push operations to access - it is only safe to remove the * inode from the buffer once it has been removed from the AIL. - * - * We also clear the failed bit before removing the item from the AIL - * as xfs_trans_ail_delete()->xfs_clear_li_failed() will release buffer - * references the inode item owns and needs to hold until we've fully - * aborted the inode log item and detached it from the buffer. */ - clear_bit(XFS_LI_FAILED, &iip->ili_item.li_flags); xfs_trans_ail_delete(&iip->ili_item, 0); /* diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 0fcb1828e598fb..85a649fec6acf6 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -909,10 +909,9 @@ xfs_trans_ail_delete( return; } - /* xfs_ail_update_finish() drops the AIL lock */ - xfs_clear_li_failed(lip); + clear_bit(XFS_LI_FAILED, &lip->li_flags); tail_lsn = xfs_ail_delete_one(ailp, lip); - xfs_ail_update_finish(ailp, tail_lsn); + xfs_ail_update_finish(ailp, tail_lsn); /* drops the AIL lock */ } int diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index bd841df93021ff..f945f0450b16fd 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -167,32 +167,4 @@ xfs_trans_ail_copy_lsn( } #endif -static inline void -xfs_clear_li_failed( - struct xfs_log_item *lip) -{ - struct xfs_buf *bp = lip->li_buf; - - ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags)); - lockdep_assert_held(&lip->li_ailp->ail_lock); - - if (test_and_clear_bit(XFS_LI_FAILED, &lip->li_flags)) { - lip->li_buf = NULL; - xfs_buf_rele(bp); - } -} - -static inline void -xfs_set_li_failed( - struct xfs_log_item *lip, - struct xfs_buf *bp) -{ - lockdep_assert_held(&lip->li_ailp->ail_lock); - - if (!test_and_set_bit(XFS_LI_FAILED, &lip->li_flags)) { - xfs_buf_hold(bp); - lip->li_buf = bp; - } -} - #endif /* __XFS_TRANS_PRIV_H__ */ From a1a56f541a8f634007de4bcb45aa3eaf803154a8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Mar 2025 08:52:14 +0100 Subject: [PATCH 0606/2924] xfs: mark xfs_buf_free as might_sleep() xfs_buf_free can call vunmap, which can sleep. The vunmap path is an unlikely one, so add might_sleep to ensure calling xfs_buf_free from atomic context gets caught more easily. Signed-off-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_buf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 8e7f1b324b3bea..1a2b3f06fa717e 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -105,6 +105,7 @@ xfs_buf_free( { unsigned int size = BBTOB(bp->b_length); + might_sleep(); trace_xfs_buf_free(bp, _RET_IP_); ASSERT(list_empty(&bp->b_lru)); From 845abeb1f06a8a44e21314460eeb14cddfca52cc Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Tue, 25 Mar 2025 09:10:49 +0000 Subject: [PATCH 0607/2924] xfs: add tunable threshold parameter for triggering zone GC Presently we start garbage collection late - when we start running out of free zones to backfill max_open_zones. This is a reasonable default as it minimizes write amplification. The longer we wait, the more blocks are invalidated and reclaim cost less in terms of blocks to relocate. Starting this late however introduces a risk of GC being outcompeted by user writes. If GC can't keep up, user writes will be forced to wait for free zones with high tail latencies as a result. This is not a problem under normal circumstances, but if fragmentation is bad and user write pressure is high (multiple full-throttle writers) we will "bottom out" of free zones. To mitigate this, introduce a zonegc_low_space tunable that lets the user specify a percentage of how much of the unused space that GC should keep available for writing. A high value will reclaim more of the space occupied by unused blocks, creating a larger buffer against write bursts. This comes at a cost as write amplification is increased. To illustrate this using a sample workload, setting zonegc_low_space to 60% avoids high (500ms) max latencies while increasing write amplification by 15%. Signed-off-by: Hans Holmberg Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- Documentation/admin-guide/xfs.rst | 21 ++++++++++++++++++++ fs/xfs/xfs_mount.h | 1 + fs/xfs/xfs_sysfs.c | 32 +++++++++++++++++++++++++++++++ fs/xfs/xfs_zone_alloc.c | 7 +++++++ fs/xfs/xfs_zone_gc.c | 16 ++++++++++++++-- 5 files changed, 75 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/xfs.rst b/Documentation/admin-guide/xfs.rst index b67772cf36d6dc..7b0811d650f919 100644 --- a/Documentation/admin-guide/xfs.rst +++ b/Documentation/admin-guide/xfs.rst @@ -542,3 +542,24 @@ The interesting knobs for XFS workqueues are as follows: nice Relative priority of scheduling the threads. These are the same nice levels that can be applied to userspace processes. ============ =========== + +Zoned Filesystems +================= + +For zoned file systems, the following attributes are exposed in: + + /sys/fs/xfs//zoned/ + + max_open_zones (Min: 1 Default: Varies Max: UINTMAX) + This read-only attribute exposes the maximum number of open zones + available for data placement. The value is determined at mount time and + is limited by the capabilities of the backing zoned device, file system + size and the max_open_zones mount option. + + zonegc_low_space (Min: 0 Default: 0 Max: 100) + Define a percentage for how much of the unused space that GC should keep + available for writing. A high value will reclaim more of the space + occupied by unused blocks, creating a larger buffer against write + bursts at the cost of increased write amplification. Regardless + of this value, garbage collection will always aim to free a minimum + amount of blocks to keep max_open_zones open for data placement purposes. diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 799b84220ebb84..e5192c12e7acf8 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -229,6 +229,7 @@ typedef struct xfs_mount { bool m_finobt_nores; /* no per-AG finobt resv. */ bool m_update_sb; /* sb needs update in mount */ unsigned int m_max_open_zones; + unsigned int m_zonegc_low_space; /* * Bitsets of per-fs metadata that have been checked and/or are sick. diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index b7e82d85f04371..7a5c5ef2db928d 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -718,8 +718,40 @@ max_open_zones_show( } XFS_SYSFS_ATTR_RO(max_open_zones); +static ssize_t +zonegc_low_space_store( + struct kobject *kobj, + const char *buf, + size_t count) +{ + int ret; + unsigned int val; + + ret = kstrtouint(buf, 0, &val); + if (ret) + return ret; + + if (val > 100) + return -EINVAL; + + zoned_to_mp(kobj)->m_zonegc_low_space = val; + + return count; +} + +static ssize_t +zonegc_low_space_show( + struct kobject *kobj, + char *buf) +{ + return sysfs_emit(buf, "%u\n", + zoned_to_mp(kobj)->m_zonegc_low_space); +} +XFS_SYSFS_ATTR_RW(zonegc_low_space); + static struct attribute *xfs_zoned_attrs[] = { ATTR_LIST(max_open_zones), + ATTR_LIST(zonegc_low_space), NULL, }; ATTRIBUTE_GROUPS(xfs_zoned); diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index 52af234936a238..d509e49b2aaae5 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -1201,6 +1201,13 @@ xfs_mount_zones( xfs_set_freecounter(mp, XC_FREE_RTEXTENTS, iz.available + iz.reclaimable); + /* + * The user may configure GC to free up a percentage of unused blocks. + * By default this is 0. GC will always trigger at the minimum level + * for keeping max_open_zones available for data placement. + */ + mp->m_zonegc_low_space = 0; + error = xfs_zone_gc_mount(mp); if (error) goto out_free_zone_info; diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index c5136ea9bb1ddd..8c541ca718722d 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -162,18 +162,30 @@ struct xfs_zone_gc_data { /* * We aim to keep enough zones free in stock to fully use the open zone limit - * for data placement purposes. + * for data placement purposes. Additionally, the m_zonegc_low_space tunable + * can be set to make sure a fraction of the unused blocks are available for + * writing. */ bool xfs_zoned_need_gc( struct xfs_mount *mp) { + s64 available, free; + if (!xfs_group_marked(mp, XG_TYPE_RTG, XFS_RTG_RECLAIMABLE)) return false; - if (xfs_estimate_freecounter(mp, XC_FREE_RTAVAILABLE) < + + available = xfs_estimate_freecounter(mp, XC_FREE_RTAVAILABLE); + + if (available < mp->m_groups[XG_TYPE_RTG].blocks * (mp->m_max_open_zones - XFS_OPEN_GC_ZONES)) return true; + + free = xfs_estimate_freecounter(mp, XC_FREE_RTEXTENTS); + if (available < mult_frac(free, mp->m_zonegc_low_space, 100)) + return true; + return false; } From a37b3b9c3cc595521c7f9d9b2b0b2ad367bf9c98 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 7 Apr 2025 17:30:30 -0700 Subject: [PATCH 0608/2924] xfs: compute buffer address correctly in xmbuf_map_backing_mem Prior to commit e614a00117bc2d, xmbuf_map_backing_mem relied on folio_file_page to return the base page for the xmbuf's loff_t in the xfile, and set b_addr to the page_address of that base page. Now that folio_file_page has been removed from xmbuf_map_backing_mem, we always set b_addr to the folio_address of the folio. This is correct for the situation where the folio size matches the buffer size, but it's totally wrong if tmpfs uses large folios. We need to use offset_in_folio here. Found via xfs/801, which demonstrated evidence of corruption of an in-memory rmap btree block right after initializing an adjacent block. Fixes: e614a00117bc2d ("xfs: cleanup mapping tmpfs folios into the buffer cache") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_buf_mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_buf_mem.c b/fs/xfs/xfs_buf_mem.c index b4ffd80b7cb632..dcbfa274e06dc6 100644 --- a/fs/xfs/xfs_buf_mem.c +++ b/fs/xfs/xfs_buf_mem.c @@ -165,7 +165,7 @@ xmbuf_map_backing_mem( folio_set_dirty(folio); folio_unlock(folio); - bp->b_addr = folio_address(folio); + bp->b_addr = folio_address(folio) + offset_in_folio(folio, pos); return 0; } From 63c1f19a3be3169e51a5812d22a6d0c879414076 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 9 Apr 2025 15:59:56 +0200 Subject: [PATCH 0609/2924] espintcp: fix skb leaks A few error paths are missing a kfree_skb. Fixes: e27cca96cd68 ("xfrm: add espintcp (RFC 8229)") Signed-off-by: Sabrina Dubroca Reviewed-by: Simon Horman Signed-off-by: Steffen Klassert --- net/ipv4/esp4.c | 4 +++- net/ipv6/esp6.c | 4 +++- net/xfrm/espintcp.c | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 0e4076866c0a40..876df672c0bfa5 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -199,8 +199,10 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) sk = esp_find_tcp_sk(x); err = PTR_ERR_OR_ZERO(sk); - if (err) + if (err) { + kfree_skb(skb); goto out; + } bh_lock_sock(sk); if (sock_owned_by_user(sk)) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 9e73944e3b530a..574989b82179c1 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -216,8 +216,10 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) sk = esp6_find_tcp_sk(x); err = PTR_ERR_OR_ZERO(sk); - if (err) + if (err) { + kfree_skb(skb); goto out; + } bh_lock_sock(sk); if (sock_owned_by_user(sk)) diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index fe82e2d073006e..fc7a603b04f130 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -171,8 +171,10 @@ int espintcp_queue_out(struct sock *sk, struct sk_buff *skb) struct espintcp_ctx *ctx = espintcp_getctx(sk); if (skb_queue_len(&ctx->out_queue) >= - READ_ONCE(net_hotdata.max_backlog)) + READ_ONCE(net_hotdata.max_backlog)) { + kfree_skb(skb); return -ENOBUFS; + } __skb_queue_tail(&ctx->out_queue, skb); From 028363685bd0b7a19b4a820f82dd905b1dc83999 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 9 Apr 2025 15:59:57 +0200 Subject: [PATCH 0610/2924] espintcp: remove encap socket caching to avoid reference leak The current scheme for caching the encap socket can lead to reference leaks when we try to delete the netns. The reference chain is: xfrm_state -> enacp_sk -> netns Since the encap socket is a userspace socket, it holds a reference on the netns. If we delete the espintcp state (through flush or individual delete) before removing the netns, the reference on the socket is dropped and the netns is correctly deleted. Otherwise, the netns may not be reachable anymore (if all processes within the ns have terminated), so we cannot delete the xfrm state to drop its reference on the socket. This patch results in a small (~2% in my tests) performance regression. A GC-type mechanism could be added for the socket cache, to clear references if the state hasn't been used "recently", but it's a lot more complex than just not caching the socket. Fixes: e27cca96cd68 ("xfrm: add espintcp (RFC 8229)") Signed-off-by: Sabrina Dubroca Reviewed-by: Simon Horman Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 - net/ipv4/esp4.c | 49 ++++--------------------------------------- net/ipv6/esp6.c | 49 ++++--------------------------------------- net/xfrm/xfrm_state.c | 3 --- 4 files changed, 8 insertions(+), 94 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 39365fd2ea175c..06ab2a3d2ebd10 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -236,7 +236,6 @@ struct xfrm_state { /* Data for encapsulator */ struct xfrm_encap_tmpl *encap; - struct sock __rcu *encap_sk; /* NAT keepalive */ u32 nat_keepalive_interval; /* seconds */ diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 876df672c0bfa5..f14a41ee4aa101 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -120,47 +120,16 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb) } #ifdef CONFIG_INET_ESPINTCP -struct esp_tcp_sk { - struct sock *sk; - struct rcu_head rcu; -}; - -static void esp_free_tcp_sk(struct rcu_head *head) -{ - struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu); - - sock_put(esk->sk); - kfree(esk); -} - static struct sock *esp_find_tcp_sk(struct xfrm_state *x) { struct xfrm_encap_tmpl *encap = x->encap; struct net *net = xs_net(x); - struct esp_tcp_sk *esk; __be16 sport, dport; - struct sock *nsk; struct sock *sk; - sk = rcu_dereference(x->encap_sk); - if (sk && sk->sk_state == TCP_ESTABLISHED) - return sk; - spin_lock_bh(&x->lock); sport = encap->encap_sport; dport = encap->encap_dport; - nsk = rcu_dereference_protected(x->encap_sk, - lockdep_is_held(&x->lock)); - if (sk && sk == nsk) { - esk = kmalloc(sizeof(*esk), GFP_ATOMIC); - if (!esk) { - spin_unlock_bh(&x->lock); - return ERR_PTR(-ENOMEM); - } - RCU_INIT_POINTER(x->encap_sk, NULL); - esk->sk = sk; - call_rcu(&esk->rcu, esp_free_tcp_sk); - } spin_unlock_bh(&x->lock); sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, x->id.daddr.a4, @@ -173,20 +142,6 @@ static struct sock *esp_find_tcp_sk(struct xfrm_state *x) return ERR_PTR(-EINVAL); } - spin_lock_bh(&x->lock); - nsk = rcu_dereference_protected(x->encap_sk, - lockdep_is_held(&x->lock)); - if (encap->encap_sport != sport || - encap->encap_dport != dport) { - sock_put(sk); - sk = nsk ?: ERR_PTR(-EREMCHG); - } else if (sk == nsk) { - sock_put(sk); - } else { - rcu_assign_pointer(x->encap_sk, sk); - } - spin_unlock_bh(&x->lock); - return sk; } @@ -211,6 +166,8 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) err = espintcp_push_skb(sk, skb); bh_unlock_sock(sk); + sock_put(sk); + out: rcu_read_unlock(); return err; @@ -394,6 +351,8 @@ static struct ip_esp_hdr *esp_output_tcp_encap(struct xfrm_state *x, if (IS_ERR(sk)) return ERR_CAST(sk); + sock_put(sk); + *lenp = htons(len); esph = (struct ip_esp_hdr *)(lenp + 1); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 574989b82179c1..72adfc107b557d 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -137,47 +137,16 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb) } #ifdef CONFIG_INET6_ESPINTCP -struct esp_tcp_sk { - struct sock *sk; - struct rcu_head rcu; -}; - -static void esp_free_tcp_sk(struct rcu_head *head) -{ - struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu); - - sock_put(esk->sk); - kfree(esk); -} - static struct sock *esp6_find_tcp_sk(struct xfrm_state *x) { struct xfrm_encap_tmpl *encap = x->encap; struct net *net = xs_net(x); - struct esp_tcp_sk *esk; __be16 sport, dport; - struct sock *nsk; struct sock *sk; - sk = rcu_dereference(x->encap_sk); - if (sk && sk->sk_state == TCP_ESTABLISHED) - return sk; - spin_lock_bh(&x->lock); sport = encap->encap_sport; dport = encap->encap_dport; - nsk = rcu_dereference_protected(x->encap_sk, - lockdep_is_held(&x->lock)); - if (sk && sk == nsk) { - esk = kmalloc(sizeof(*esk), GFP_ATOMIC); - if (!esk) { - spin_unlock_bh(&x->lock); - return ERR_PTR(-ENOMEM); - } - RCU_INIT_POINTER(x->encap_sk, NULL); - esk->sk = sk; - call_rcu(&esk->rcu, esp_free_tcp_sk); - } spin_unlock_bh(&x->lock); sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, &x->id.daddr.in6, @@ -190,20 +159,6 @@ static struct sock *esp6_find_tcp_sk(struct xfrm_state *x) return ERR_PTR(-EINVAL); } - spin_lock_bh(&x->lock); - nsk = rcu_dereference_protected(x->encap_sk, - lockdep_is_held(&x->lock)); - if (encap->encap_sport != sport || - encap->encap_dport != dport) { - sock_put(sk); - sk = nsk ?: ERR_PTR(-EREMCHG); - } else if (sk == nsk) { - sock_put(sk); - } else { - rcu_assign_pointer(x->encap_sk, sk); - } - spin_unlock_bh(&x->lock); - return sk; } @@ -228,6 +183,8 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) err = espintcp_push_skb(sk, skb); bh_unlock_sock(sk); + sock_put(sk); + out: rcu_read_unlock(); return err; @@ -424,6 +381,8 @@ static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x, if (IS_ERR(sk)) return ERR_CAST(sk); + sock_put(sk); + *lenp = htons(len); esph = (struct ip_esp_hdr *)(lenp + 1); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 341d79ecb5c21c..2f57dabcc70b65 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -838,9 +838,6 @@ int __xfrm_state_delete(struct xfrm_state *x) xfrm_nat_keepalive_state_updated(x); spin_unlock(&net->xfrm.xfrm_state_lock); - if (x->encap_sk) - sock_put(rcu_dereference_raw(x->encap_sk)); - xfrm_dev_state_delete(x); /* All xfrm_state objects are created by xfrm_state_alloc. From d2f5819b6ed357c0c350c0616b6b9f38be59adf6 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 11 Apr 2025 08:57:37 -0700 Subject: [PATCH 0611/2924] slab: ensure slab->obj_exts is clear in a newly allocated slab page ktest recently reported crashes while running several buffered io tests with __alloc_tagging_slab_alloc_hook() at the top of the crash call stack. The signature indicates an invalid address dereference with low bits of slab->obj_exts being set. The bits were outside of the range used by page_memcg_data_flags and objext_flags and hence were not masked out by slab_obj_exts() when obtaining the pointer stored in slab->obj_exts. The typical crash log looks like this: 00510 Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010 00510 Mem abort info: 00510 ESR = 0x0000000096000045 00510 EC = 0x25: DABT (current EL), IL = 32 bits 00510 SET = 0, FnV = 0 00510 EA = 0, S1PTW = 0 00510 FSC = 0x05: level 1 translation fault 00510 Data abort info: 00510 ISV = 0, ISS = 0x00000045, ISS2 = 0x00000000 00510 CM = 0, WnR = 1, TnD = 0, TagAccess = 0 00510 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 00510 user pgtable: 4k pages, 39-bit VAs, pgdp=0000000104175000 00510 [0000000000000010] pgd=0000000000000000, p4d=0000000000000000, pud=0000000000000000 00510 Internal error: Oops: 0000000096000045 [#1] SMP 00510 Modules linked in: 00510 CPU: 10 UID: 0 PID: 7692 Comm: cat Not tainted 6.15.0-rc1-ktest-g189e17946605 #19327 NONE 00510 Hardware name: linux,dummy-virt (DT) 00510 pstate: 20001005 (nzCv daif -PAN -UAO -TCO -DIT +SSBS BTYPE=--) 00510 pc : __alloc_tagging_slab_alloc_hook+0xe0/0x190 00510 lr : __kmalloc_noprof+0x150/0x310 00510 sp : ffffff80c87df6c0 00510 x29: ffffff80c87df6c0 x28: 000000000013d1ff x27: 000000000013d200 00510 x26: ffffff80c87df9e0 x25: 0000000000000000 x24: 0000000000000001 00510 x23: ffffffc08041953c x22: 000000000000004c x21: ffffff80c0002180 00510 x20: fffffffec3120840 x19: ffffff80c4821000 x18: 0000000000000000 00510 x17: fffffffec3d02f00 x16: fffffffec3d02e00 x15: fffffffec3d00700 00510 x14: fffffffec3d00600 x13: 0000000000000200 x12: 0000000000000006 00510 x11: ffffffc080bb86c0 x10: 0000000000000000 x9 : ffffffc080201e58 00510 x8 : ffffff80c4821060 x7 : 0000000000000000 x6 : 0000000055555556 00510 x5 : 0000000000000001 x4 : 0000000000000010 x3 : 0000000000000060 00510 x2 : 0000000000000000 x1 : ffffffc080f50cf8 x0 : ffffff80d801d000 00510 Call trace: 00510 __alloc_tagging_slab_alloc_hook+0xe0/0x190 (P) 00510 __kmalloc_noprof+0x150/0x310 00510 __bch2_folio_create+0x5c/0xf8 00510 bch2_folio_create+0x2c/0x40 00510 bch2_readahead+0xc0/0x460 00510 read_pages+0x7c/0x230 00510 page_cache_ra_order+0x244/0x3a8 00510 page_cache_async_ra+0x124/0x170 00510 filemap_readahead.isra.0+0x58/0xa0 00510 filemap_get_pages+0x454/0x7b0 00510 filemap_read+0xdc/0x418 00510 bch2_read_iter+0x100/0x1b0 00510 vfs_read+0x214/0x300 00510 ksys_read+0x6c/0x108 00510 __arm64_sys_read+0x20/0x30 00510 invoke_syscall.constprop.0+0x54/0xe8 00510 do_el0_svc+0x44/0xc8 00510 el0_svc+0x18/0x58 00510 el0t_64_sync_handler+0x104/0x130 00510 el0t_64_sync+0x154/0x158 00510 Code: d5384100 f9401c01 b9401aa3 b40002e1 (f8227881) 00510 ---[ end trace 0000000000000000 ]--- 00510 Kernel panic - not syncing: Oops: Fatal exception 00510 SMP: stopping secondary CPUs 00510 Kernel Offset: disabled 00510 CPU features: 0x0000,000000e0,00000410,8240500b 00510 Memory Limit: none Investigation indicates that these bits are already set when we allocate slab page and are not zeroed out after allocation. We are not yet sure why these crashes start happening only recently but regardless of the reason, not initializing a field that gets used later is wrong. Fix it by initializing slab->obj_exts during slab page allocation. Fixes: 21c690a349ba ("mm: introduce slabobj_ext to support slab object extensions") Reported-by: Kent Overstreet Tested-by: Kent Overstreet Signed-off-by: Suren Baghdasaryan Acked-by: Kent Overstreet Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250411155737.1360746-1-surenb@google.com Signed-off-by: Vlastimil Babka --- mm/slub.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/mm/slub.c b/mm/slub.c index b46f87662e71d4..dc9e729e1d269b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1973,6 +1973,11 @@ static inline void handle_failed_objexts_alloc(unsigned long obj_exts, #define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | \ __GFP_ACCOUNT | __GFP_NOFAIL) +static inline void init_slab_obj_exts(struct slab *slab) +{ + slab->obj_exts = 0; +} + int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, gfp_t gfp, bool new_slab) { @@ -2058,6 +2063,10 @@ static inline bool need_slab_obj_ext(void) #else /* CONFIG_SLAB_OBJ_EXT */ +static inline void init_slab_obj_exts(struct slab *slab) +{ +} + static int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, gfp_t gfp, bool new_slab) { @@ -2637,6 +2646,7 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) slab->objects = oo_objects(oo); slab->inuse = 0; slab->frozen = 0; + init_slab_obj_exts(slab); account_slab(slab, oo_order(oo), s, flags); From 36ff6c3f5084f2dbb6cd89d15b78cf734e9abfa6 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Thu, 10 Apr 2025 12:53:03 +0100 Subject: [PATCH 0612/2924] spi: sun4i: add support for GPIO chip select lines Set use_gpio_descriptors to true so that GPIOs can be used for chip select in accordance with the DT binding. Signed-off-by: Mans Rullgard Acked-by: Jernej Skrabec Link: https://patch.msgid.link/20250410115303.5150-1-mans@mansr.com Signed-off-by: Mark Brown --- drivers/spi/spi-sun4i.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c index fcbe864c9b7d69..f89826d7dc4964 100644 --- a/drivers/spi/spi-sun4i.c +++ b/drivers/spi/spi-sun4i.c @@ -462,6 +462,7 @@ static int sun4i_spi_probe(struct platform_device *pdev) sspi->host = host; host->max_speed_hz = 100 * 1000 * 1000; host->min_speed_hz = 3 * 1000; + host->use_gpio_descriptors = true; host->set_cs = sun4i_spi_set_cs; host->transfer_one = sun4i_spi_transfer_one; host->num_chipselect = 4; From 289cae889a7464281b44df7f777fd5238ddfad7f Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Mon, 7 Apr 2025 15:29:50 +0200 Subject: [PATCH 0613/2924] MAINTAINERS: pci: add entry for Rust PCI code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bjorn, Krzysztof and I agreed that I will maintain the Rust PCI code. Therefore, create a new entry in the MAINTAINERS file. Acked-by: Bjorn Helgaas Acked-by: Krzysztof Wilczyński Link: https://lore.kernel.org/r/20250407133059.164042-1-dakr@kernel.org [ Align Krzysztof's email address. - Danilo ] Signed-off-by: Danilo Krummrich --- MAINTAINERS | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 96b82704950184..778d7d168be34c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18686,6 +18686,16 @@ F: include/asm-generic/pci* F: include/linux/of_pci.h F: include/linux/pci* F: include/uapi/linux/pci* + +PCI SUBSYSTEM [RUST] +M: Danilo Krummrich +R: Bjorn Helgaas +R: Krzysztof Wilczyński +L: linux-pci@vger.kernel.org +S: Maintained +C: irc://irc.oftc.net/linux-pci +T: git git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git +F: rust/helpers/pci.c F: rust/kernel/pci.rs F: samples/rust/rust_driver_pci.rs From 53bd97801632c940767f4c8407c2cbdeb56b40e7 Mon Sep 17 00:00:00 2001 From: Christian Schrefl Date: Sun, 13 Apr 2025 21:26:56 +0200 Subject: [PATCH 0614/2924] rust: firmware: Use `ffi::c_char` type in `FwFunc` The `FwFunc` struct contains an function with a char pointer argument, for which a `*const u8` pointer was used. This is not really the "proper" type for this, so use a `*const kernel::ffi::c_char` pointer instead. This has no real functionality changes, since now `kernel::ffi::c_char` (which bindgen uses for `char`) is now a type alias to `u8` anyways, but before commit 1bae8729e50a ("rust: map `long` to `isize` and `char` to `u8`") the concrete type of `kernel::ffi::c_char` depended on the architecture (However all supported architectures at the time mapped to `i8`). This caused problems on the v6.13 tag when building for 32 bit arm (with my patches), since back then `*const i8` was used in the function argument and the function that bindgen generated used `*const core::ffi::c_char` which Rust mapped to `*const u8` on 32 bit arm. The stable v6.13.y branch does not have this issue since commit 1bae8729e50a ("rust: map `long` to `isize` and `char` to `u8`") was backported. This caused the following build error: ``` error[E0308]: mismatched types --> rust/kernel/firmware.rs:20:4 | 20 | Self(bindings::request_firmware) | ---- ^^^^^^^^^^^^^^^^^^^^^^^^^^ expected fn pointer, found fn item | | | arguments to this function are incorrect | = note: expected fn pointer `unsafe extern "C" fn(_, *const i8, _) -> _` found fn item `unsafe extern "C" fn(_, *const u8, _) -> _ {request_firmware}` note: tuple struct defined here --> rust/kernel/firmware.rs:14:8 | 14 | struct FwFunc( | ^^^^^^ error[E0308]: mismatched types --> rust/kernel/firmware.rs:24:14 | 24 | Self(bindings::firmware_request_nowarn) | ---- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ expected fn pointer, found fn item | | | arguments to this function are incorrect | = note: expected fn pointer `unsafe extern "C" fn(_, *const i8, _) -> _` found fn item `unsafe extern "C" fn(_, *const u8, _) -> _ {firmware_request_nowarn}` note: tuple struct defined here --> rust/kernel/firmware.rs:14:8 | 14 | struct FwFunc( | ^^^^^^ error[E0308]: mismatched types --> rust/kernel/firmware.rs:64:45 | 64 | let ret = unsafe { func.0(pfw as _, name.as_char_ptr(), dev.as_raw()) }; | ------ ^^^^^^^^^^^^^^^^^^ expected `*const i8`, found `*const u8` | | | arguments to this function are incorrect | = note: expected raw pointer `*const i8` found raw pointer `*const u8` error: aborting due to 3 previous errors ``` Fixes: de6582833db0 ("rust: add firmware abstractions") Cc: stable@vger.kernel.org Reviewed-by: Benno Lossin Signed-off-by: Christian Schrefl Acked-by: Miguel Ojeda Link: https://lore.kernel.org/r/20250413-rust_arm_fix_fw_abstaction-v3-1-8dd7c0bbcd47@gmail.com [ Add firmware prefix to commit subject. - Danilo ] Signed-off-by: Danilo Krummrich --- rust/kernel/firmware.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/rust/kernel/firmware.rs b/rust/kernel/firmware.rs index f04b058b09b2d2..2494c96e105f3a 100644 --- a/rust/kernel/firmware.rs +++ b/rust/kernel/firmware.rs @@ -4,7 +4,7 @@ //! //! C header: [`include/linux/firmware.h`](srctree/include/linux/firmware.h) -use crate::{bindings, device::Device, error::Error, error::Result, str::CStr}; +use crate::{bindings, device::Device, error::Error, error::Result, ffi, str::CStr}; use core::ptr::NonNull; /// # Invariants @@ -12,7 +12,11 @@ use core::ptr::NonNull; /// One of the following: `bindings::request_firmware`, `bindings::firmware_request_nowarn`, /// `bindings::firmware_request_platform`, `bindings::request_firmware_direct`. struct FwFunc( - unsafe extern "C" fn(*mut *const bindings::firmware, *const u8, *mut bindings::device) -> i32, + unsafe extern "C" fn( + *mut *const bindings::firmware, + *const ffi::c_char, + *mut bindings::device, + ) -> i32, ); impl FwFunc { From 441016056010e50cee18633b9dc125b24feeb74d Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 9 Apr 2025 17:36:51 +0200 Subject: [PATCH 0615/2924] riscv: Fix unaligned access info messages Ensure we only print messages about command line parameters when the parameters are actually in use. Also complain about the use of the vector parameter when vector support isn't available. Fixes: aecb09e091dc ("riscv: Add parameter for skipping access speed tests") Reported-by: Geert Uytterhoeven Closes: https://lore.kernel.org/all/CAMuHMdVEp2_ho51gkpLLJG2HimqZ1gZ0fa=JA4uNNZjFFqaPMg@mail.gmail.com/ Closes: https://lore.kernel.org/all/CAMuHMdWVMP0MYCLFq+b7H_uz-2omdFiDDUZq0t_gw0L9rrJtkQ@mail.gmail.com/ Signed-off-by: Andrew Jones Tested-by: Geert Uytterhoeven Tested-by: Alexandre Ghiti Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250409153650.84433-2-ajones@ventanamicro.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/unaligned_access_speed.c | 35 +++++++++++++--------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index 585d2dcf2dab1c..b8ba13819d05e5 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -439,29 +439,36 @@ static int __init check_unaligned_access_all_cpus(void) { int cpu; - if (unaligned_scalar_speed_param == RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN && - !check_unaligned_access_emulated_all_cpus()) { - check_unaligned_access_speed_all_cpus(); - } else { - pr_info("scalar unaligned access speed set to '%s' by command line\n", - speed_str[unaligned_scalar_speed_param]); + if (unaligned_scalar_speed_param != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) { + pr_info("scalar unaligned access speed set to '%s' (%lu) by command line\n", + speed_str[unaligned_scalar_speed_param], unaligned_scalar_speed_param); for_each_online_cpu(cpu) per_cpu(misaligned_access_speed, cpu) = unaligned_scalar_speed_param; + } else if (!check_unaligned_access_emulated_all_cpus()) { + check_unaligned_access_speed_all_cpus(); + } + + if (unaligned_vector_speed_param != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) { + if (!has_vector() && + unaligned_vector_speed_param != RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED) { + pr_warn("vector support is not available, ignoring unaligned_vector_speed=%s\n", + speed_str[unaligned_vector_speed_param]); + } else { + pr_info("vector unaligned access speed set to '%s' (%lu) by command line\n", + speed_str[unaligned_vector_speed_param], unaligned_vector_speed_param); + } } if (!has_vector()) unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; - if (unaligned_vector_speed_param == RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN && - !check_vector_unaligned_access_emulated_all_cpus() && - IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) { - kthread_run(vec_check_unaligned_access_speed_all_cpus, - NULL, "vec_check_unaligned_access_speed_all_cpus"); - } else { - pr_info("vector unaligned access speed set to '%s' by command line\n", - speed_str[unaligned_vector_speed_param]); + if (unaligned_vector_speed_param != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) { for_each_online_cpu(cpu) per_cpu(vector_misaligned_access, cpu) = unaligned_vector_speed_param; + } else if (!check_vector_unaligned_access_emulated_all_cpus() && + IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) { + kthread_run(vec_check_unaligned_access_speed_all_cpus, + NULL, "vec_check_unaligned_access_speed_all_cpus"); } /* From e94eb7ea6f206e229791761a5fdf9389f8dbd183 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Wed, 9 Apr 2025 20:21:27 +0200 Subject: [PATCH 0616/2924] riscv: Properly export reserved regions in /proc/iomem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The /proc/iomem represents the kernel's memory map. Regions marked with "Reserved" tells the user that the range should not be tampered with. Kexec-tools, when using the older kexec_load syscall relies on the "Reserved" regions to build the memory segments, that will be the target of the new kexec'd kernel. The RISC-V port tries to expose all reserved regions to userland, but some regions were not properly exposed: Regions that resided in both the "regular" and reserved memory block, e.g. the EFI Memory Map. A missing entry could result in reserved memory being overwritten. It turns out, that arm64, and loongarch had a similar issue a while back: commit d91680e687f4 ("arm64: Fix /proc/iomem for reserved but not memory regions") commit 50d7ba36b916 ("arm64: export memblock_reserve()d regions via /proc/iomem") Similar to the other ports, resolve the issue by splitting the regions in an arch initcall, since we need a working allocator. Fixes: ffe0e5261268 ("RISC-V: Improve init_resources()") Signed-off-by: Björn Töpel Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250409182129.634415-1-bjorn@kernel.org Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/setup.c | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index c174544eefc8ee..f7c9a1caa83e62 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -66,6 +66,9 @@ static struct resource bss_res = { .name = "Kernel bss", }; static struct resource elfcorehdr_res = { .name = "ELF Core hdr", }; #endif +static int num_standard_resources; +static struct resource *standard_resources; + static int __init add_resource(struct resource *parent, struct resource *res) { @@ -139,7 +142,7 @@ static void __init init_resources(void) struct resource *res = NULL; struct resource *mem_res = NULL; size_t mem_res_sz = 0; - int num_resources = 0, res_idx = 0; + int num_resources = 0, res_idx = 0, non_resv_res = 0; int ret = 0; /* + 1 as memblock_alloc() might increase memblock.reserved.cnt */ @@ -193,6 +196,7 @@ static void __init init_resources(void) /* Add /memory regions to the resource tree */ for_each_mem_region(region) { res = &mem_res[res_idx--]; + non_resv_res++; if (unlikely(memblock_is_nomap(region))) { res->name = "Reserved"; @@ -210,6 +214,9 @@ static void __init init_resources(void) goto error; } + num_standard_resources = non_resv_res; + standard_resources = &mem_res[res_idx + 1]; + /* Clean-up any unused pre-allocated resources */ if (res_idx >= 0) memblock_free(mem_res, (res_idx + 1) * sizeof(*mem_res)); @@ -221,6 +228,33 @@ static void __init init_resources(void) memblock_free(mem_res, mem_res_sz); } +static int __init reserve_memblock_reserved_regions(void) +{ + u64 i, j; + + for (i = 0; i < num_standard_resources; i++) { + struct resource *mem = &standard_resources[i]; + phys_addr_t r_start, r_end, mem_size = resource_size(mem); + + if (!memblock_is_region_reserved(mem->start, mem_size)) + continue; + + for_each_reserved_mem_range(j, &r_start, &r_end) { + resource_size_t start, end; + + start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start); + end = min(PFN_PHYS(PFN_UP(r_end)) - 1, mem->end); + + if (start > mem->end || end < mem->start) + continue; + + reserve_region_with_split(mem, start, end, "Reserved"); + } + } + + return 0; +} +arch_initcall(reserve_memblock_reserved_regions); static void __init parse_dtb(void) { From 0b4cce68efb93e31a8e51795d696df6e379cb41c Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 9 Apr 2025 10:14:49 -0700 Subject: [PATCH 0617/2924] riscv: module: Fix out-of-bounds relocation access The current code allows rel[j] to access one element past the end of the relocation section. Simplify to num_relocations which is equivalent to the existing size expression. Fixes: 080c4324fa5e ("riscv: optimize ELF relocation function in riscv") Signed-off-by: Samuel Holland Reviewed-by: Maxim Kochetkov Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250409171526.862481-1-samuel.holland@sifive.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 0ae34d79b87bd9..7f6147c18033b2 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -860,7 +860,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, } j++; - if (j > sechdrs[relsec].sh_size / sizeof(*rel)) + if (j == num_relocations) j = 0; } while (j_idx != j); From 1ee1313f4722e6d67c6e9447ee81d24d6e3ff4ad Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 9 Apr 2025 10:14:50 -0700 Subject: [PATCH 0618/2924] riscv: module: Allocate PLT entries for R_RISCV_PLT32 apply_r_riscv_plt32_rela() may need to emit a PLT entry for the referenced symbol, so there must be space allocated in the PLT. Fixes: 8fd6c5142395 ("riscv: Add remaining module relocations") Signed-off-by: Samuel Holland Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20250409171526.862481-2-samuel.holland@sifive.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/module-sections.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/riscv/kernel/module-sections.c b/arch/riscv/kernel/module-sections.c index e264e59e596e80..91d0b355ceeff6 100644 --- a/arch/riscv/kernel/module-sections.c +++ b/arch/riscv/kernel/module-sections.c @@ -73,16 +73,17 @@ static bool duplicate_rela(const Elf_Rela *rela, int idx) static void count_max_entries(Elf_Rela *relas, int num, unsigned int *plts, unsigned int *gots) { - unsigned int type, i; - - for (i = 0; i < num; i++) { - type = ELF_RISCV_R_TYPE(relas[i].r_info); - if (type == R_RISCV_CALL_PLT) { + for (int i = 0; i < num; i++) { + switch (ELF_R_TYPE(relas[i].r_info)) { + case R_RISCV_CALL_PLT: + case R_RISCV_PLT32: if (!duplicate_rela(relas, i)) (*plts)++; - } else if (type == R_RISCV_GOT_HI20) { + break; + case R_RISCV_GOT_HI20: if (!duplicate_rela(relas, i)) (*gots)++; + break; } } } From fb53a9aa5f5b8bf302f3260a7f1f5a24345ce62a Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Mon, 14 Apr 2025 14:09:48 +0200 Subject: [PATCH 0619/2924] riscv: Provide all alternative macros all the time We need to provide all six forms of the alternative macros (ALTERNATIVE, ALTERNATIVE_2, _ALTERNATIVE_CFG, _ALTERNATIVE_CFG_2, __ALTERNATIVE_CFG, __ALTERNATIVE_CFG_2) for all four cases derived from the two ifdefs (RISCV_ALTERNATIVE, __ASSEMBLY__) in order to ensure all configs can compile. Define this missing ones and ensure all are defined to consume all parameters passed. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202504130710.3IKz6Ibs-lkp@intel.com/ Signed-off-by: Andrew Jones Tested-by: Alexandre Ghiti Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250414120947.135173-2-ajones@ventanamicro.com Signed-off-by: Alexandre Ghiti --- arch/riscv/include/asm/alternative-macros.h | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h index 721ec275ce57e3..231d777d936c2d 100644 --- a/arch/riscv/include/asm/alternative-macros.h +++ b/arch/riscv/include/asm/alternative-macros.h @@ -115,24 +115,19 @@ \old_c .endm -#define _ALTERNATIVE_CFG(old_c, ...) \ - ALTERNATIVE_CFG old_c - -#define _ALTERNATIVE_CFG_2(old_c, ...) \ - ALTERNATIVE_CFG old_c +#define __ALTERNATIVE_CFG(old_c, ...) ALTERNATIVE_CFG old_c +#define __ALTERNATIVE_CFG_2(old_c, ...) ALTERNATIVE_CFG old_c #else /* !__ASSEMBLY__ */ -#define __ALTERNATIVE_CFG(old_c) \ - old_c "\n" +#define __ALTERNATIVE_CFG(old_c, ...) old_c "\n" +#define __ALTERNATIVE_CFG_2(old_c, ...) old_c "\n" -#define _ALTERNATIVE_CFG(old_c, ...) \ - __ALTERNATIVE_CFG(old_c) +#endif /* __ASSEMBLY__ */ -#define _ALTERNATIVE_CFG_2(old_c, ...) \ - __ALTERNATIVE_CFG(old_c) +#define _ALTERNATIVE_CFG(old_c, ...) __ALTERNATIVE_CFG(old_c) +#define _ALTERNATIVE_CFG_2(old_c, ...) __ALTERNATIVE_CFG_2(old_c) -#endif /* __ASSEMBLY__ */ #endif /* CONFIG_RISCV_ALTERNATIVE */ /* From 16c22c56d4282584742022a37d4f79a46ca6094a Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 4 Mar 2025 10:14:42 -0600 Subject: [PATCH 0620/2924] virtio_pci: Use self group type for cap commands Section 2.12.1.2 of v1.4 of the VirtIO spec states: The device and driver capabilities commands are currently defined for self group type. 1. VIRTIO_ADMIN_CMD_CAP_ID_LIST_QUERY 2. VIRTIO_ADMIN_CMD_DEVICE_CAP_GET 3. VIRTIO_ADMIN_CMD_DRIVER_CAP_SET Fixes: bfcad518605d ("virtio: Manage device and driver capabilities via the admin commands") Signed-off-by: Daniel Jurgens Reviewed-by: Parav Pandit Message-Id: <20250304161442.90700-1-danielj@nvidia.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_modern.c | 4 ++-- include/uapi/linux/virtio_pci.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 5eaade7578606e..d50fe030d82534 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -247,7 +247,7 @@ virtio_pci_admin_cmd_dev_parts_objects_enable(struct virtio_device *virtio_dev) sg_init_one(&data_sg, get_data, sizeof(*get_data)); sg_init_one(&result_sg, result, sizeof(*result)); cmd.opcode = cpu_to_le16(VIRTIO_ADMIN_CMD_DEVICE_CAP_GET); - cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SRIOV); + cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SELF); cmd.data_sg = &data_sg; cmd.result_sg = &result_sg; ret = vp_modern_admin_cmd_exec(virtio_dev, &cmd); @@ -305,7 +305,7 @@ static void virtio_pci_admin_cmd_cap_init(struct virtio_device *virtio_dev) sg_init_one(&result_sg, data, sizeof(*data)); cmd.opcode = cpu_to_le16(VIRTIO_ADMIN_CMD_CAP_ID_LIST_QUERY); - cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SRIOV); + cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SELF); cmd.result_sg = &result_sg; ret = vp_modern_admin_cmd_exec(virtio_dev, &cmd); diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index 8549d457125714..c691ac210ce2ef 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -246,6 +246,7 @@ struct virtio_pci_cfg_cap { #define VIRTIO_ADMIN_CMD_LIST_USE 0x1 /* Admin command group type. */ +#define VIRTIO_ADMIN_GROUP_TYPE_SELF 0x0 #define VIRTIO_ADMIN_GROUP_TYPE_SRIOV 0x1 /* Transitional device admin command. */ From a940e0a685575424d33324ec7f0089045249de0a Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Mon, 3 Mar 2025 09:52:37 +0100 Subject: [PATCH 0621/2924] vhost: fix VHOST_*_OWNER documentation VHOST_OWNER_SET and VHOST_OWNER_RESET are used in the documentation instead of VHOST_SET_OWNER and VHOST_RESET_OWNER respectively. To avoid confusion, let's use the right names in the documentation. No change to the API, only the documentation is involved. Signed-off-by: Stefano Garzarella Message-Id: <20250303085237.19990-1-sgarzare@redhat.com> Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/vhost.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index b95dd84eef2db2..d4b3e2ae1314d1 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -28,10 +28,10 @@ /* Set current process as the (exclusive) owner of this file descriptor. This * must be called before any other vhost command. Further calls to - * VHOST_OWNER_SET fail until VHOST_OWNER_RESET is called. */ + * VHOST_SET_OWNER fail until VHOST_RESET_OWNER is called. */ #define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01) /* Give up ownership, and reset the device to default values. - * Allows subsequent call to VHOST_OWNER_SET to succeed. */ + * Allows subsequent call to VHOST_SET_OWNER to succeed. */ #define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02) /* Set up/modify memory layout */ From 2e2f925fe737576df2373931c95e1a2b66efdfef Mon Sep 17 00:00:00 2001 From: Zhongqiu Han Date: Wed, 12 Mar 2025 21:04:12 +0800 Subject: [PATCH 0622/2924] virtio_ring: Fix data race by tagging event_triggered as racy for KCSAN syzbot reports a data-race when accessing the event_triggered, here is the simplified stack when the issue occurred: ================================================================== BUG: KCSAN: data-race in virtqueue_disable_cb / virtqueue_enable_cb_delayed write to 0xffff8881025bc452 of 1 bytes by task 3288 on cpu 0: virtqueue_enable_cb_delayed+0x42/0x3c0 drivers/virtio/virtio_ring.c:2653 start_xmit+0x230/0x1310 drivers/net/virtio_net.c:3264 __netdev_start_xmit include/linux/netdevice.h:5151 [inline] netdev_start_xmit include/linux/netdevice.h:5160 [inline] xmit_one net/core/dev.c:3800 [inline] read to 0xffff8881025bc452 of 1 bytes by interrupt on cpu 1: virtqueue_disable_cb_split drivers/virtio/virtio_ring.c:880 [inline] virtqueue_disable_cb+0x92/0x180 drivers/virtio/virtio_ring.c:2566 skb_xmit_done+0x5f/0x140 drivers/net/virtio_net.c:777 vring_interrupt+0x161/0x190 drivers/virtio/virtio_ring.c:2715 __handle_irq_event_percpu+0x95/0x490 kernel/irq/handle.c:158 handle_irq_event_percpu kernel/irq/handle.c:193 [inline] value changed: 0x01 -> 0x00 ================================================================== When the data race occurs, the function virtqueue_enable_cb_delayed() sets event_triggered to false, and virtqueue_disable_cb_split/packed() reads it as false due to the race condition. Since event_triggered is an unreliable hint used for optimization, this should only cause the driver temporarily suggest that the device not send an interrupt notification when the event index is used. Fix this KCSAN reported data-race issue by explicitly tagging the access as data_racy. Reported-by: syzbot+efe683d57990864b8c8e@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/67c7761a.050a0220.15b4b9.0018.GAE@google.com/ Signed-off-by: Zhongqiu Han Message-Id: <20250312130412.3516307-1-quic_zhonhan@quicinc.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/virtio/virtio_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index fdd2d2b07b5a2a..b784aab6686703 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2650,7 +2650,7 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) struct vring_virtqueue *vq = to_vvq(_vq); if (vq->event_triggered) - vq->event_triggered = false; + data_race(vq->event_triggered = false); return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : virtqueue_enable_cb_delayed_split(_vq); From 0866ee8e50f017731b80891294c0edd0f5fcd0a9 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Thu, 3 Apr 2025 18:38:05 +0200 Subject: [PATCH 0623/2924] rust: disable `clippy::needless_continue` Starting with Rust 1.86.0, Clippy's `needless_continue` lint complains about the last statement of a loop [1], including cases like: while ... { match ... { ... if ... => { ... return ...; } _ => continue, } } as well as nested `match`es in a loop. One solution is changing `continue` for `()` [2], but arguably using `continue` shows the intent better when it is alone in an arm like that. Moreover, I am not sure we want to force people to try to find other ways to write the code either, in cases when that applies. In addition, the help text does not really apply in the new cases the lint has introduced, e.g. here one cannot simply "drop" the expression: warning: this `continue` expression is redundant --> rust/macros/helpers.rs:85:18 | 85 | _ => continue, | ^^^^^^^^ | = help: consider dropping the `continue` expression = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#needless_continue = note: requested on the command line with `-W clippy::needless-continue` The examples in the documentation do not show a case like this, either, so the second "help" line does not help. In addition, locally disabling the lint is not possible with `expect`, since the behavior differs across versions. Using `allow` would be possible, but, even then, an extra line just for this is a bit too much, especially if there are other ways to satisfy the lint. Finally, the lint is still in the "pedantic" category and disabled by default by Clippy. Thus disable the lint, at least for the time being. Feedback was submitted to upstream Clippy, in case this can be improved or perhaps the lint split into several [3]. Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). Link: https://github.com/rust-lang/rust-clippy/pull/13891 [1] Link: https://lore.kernel.org/rust-for-linux/20250401221205.52381-1-ojeda@kernel.org/ [2] Link: https://github.com/rust-lang/rust-clippy/issues/14536 [3] Link: https://lore.kernel.org/r/20250403163805.67770-1-ojeda@kernel.org Reviewed-by: Alice Ryhl Signed-off-by: Miguel Ojeda --- Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/Makefile b/Makefile index 38689a0c36052b..3111475fdc92b3 100644 --- a/Makefile +++ b/Makefile @@ -477,7 +477,6 @@ export rust_common_flags := --edition=2021 \ -Wclippy::ignored_unit_patterns \ -Wclippy::mut_mut \ -Wclippy::needless_bitwise_bool \ - -Wclippy::needless_continue \ -Aclippy::needless_lifetimes \ -Wclippy::no_mangle_with_rust_abi \ -Wclippy::undocumented_unsafe_blocks \ From 2042c352e21d19eaf5f9e22fb6afce72293ef28c Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Mon, 14 Apr 2025 21:37:52 +1000 Subject: [PATCH 0624/2924] dma/mapping.c: dev_dbg support for dma_addressing_limited MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the debug and resolution of an issue involving forced use of bounce buffers, 7170130e4c72 ("x86/mm/init: Handle the special case of device private pages in add_pages(), to not increase max_pfn and trigger dma_addressing_limited() bounce buffers"). It would have been easier to debug the issue if dma_addressing_limited() had debug information about the device not being able to address all of memory and thus forcing all accesses through a bounce buffer. Please see[2] Implement dev_dbg to debug the potential use of bounce buffers when we hit the condition. When swiotlb is used, dma_addressing_limited() is used to determine the size of maximum dma buffer size in dma_direct_max_mapping_size(). The debug prints could be triggered in that check as well (when enabled). Link: https://lore.kernel.org/lkml/20250401000752.249348-1-balbirs@nvidia.com/ [1] Link: https://lore.kernel.org/lkml/20250310112206.4168-1-spasswolf@web.de/ [2] Cc: Marek Szyprowski Cc: Robin Murphy Cc: "Christian König" Cc: Ingo Molnar Cc: Kees Cook Cc: Bjorn Helgaas Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Alex Deucher Cc: Bert Karwatzki Cc: Christoph Hellwig Signed-off-by: Balbir Singh Reviewed-by: Christoph Hellwig Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20250414113752.3298276-1-balbirs@nvidia.com --- kernel/dma/mapping.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index cda127027e48a7..67da08fa672370 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -918,7 +918,7 @@ EXPORT_SYMBOL(dma_set_coherent_mask); * the system, else %false. Lack of addressing bits is the prime reason for * bounce buffering, but might not be the only one. */ -bool dma_addressing_limited(struct device *dev) +static bool __dma_addressing_limited(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -930,6 +930,15 @@ bool dma_addressing_limited(struct device *dev) return false; return !dma_direct_all_ram_mapped(dev); } + +bool dma_addressing_limited(struct device *dev) +{ + if (!__dma_addressing_limited(dev)) + return false; + + dev_dbg(dev, "device is DMA addressing limited\n"); + return true; +} EXPORT_SYMBOL_GPL(dma_addressing_limited); size_t dma_max_mapping_size(struct device *dev) From 46e24a545cdb4556f8128c90ecc34eeae52477a0 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Wed, 9 Apr 2025 00:03:11 +0200 Subject: [PATCH 0625/2924] rust: kasan/kbuild: fix missing flags on first build If KASAN is enabled, and one runs in a clean repository e.g.: make LLVM=1 prepare make LLVM=1 prepare Then the Rust code gets rebuilt, which should not happen. The reason is some of the LLVM KASAN `rustc` flags are added in the second run: -Cllvm-args=-asan-instrumentation-with-call-threshold=10000 -Cllvm-args=-asan-stack=0 -Cllvm-args=-asan-globals=1 -Cllvm-args=-asan-kernel-mem-intrinsic-prefix=1 Further runs do not rebuild Rust because the flags do not change anymore. Rebuilding like that in the second run is bad, even if this just happens with KASAN enabled, but missing flags in the first one is even worse. The root issue is that we pass, for some architectures and for the moment, a generated `target.json` file. That file is not ready by the time `rustc` gets called for the flag test, and thus the flag test fails just because the file is not available, e.g.: $ ... --target=./scripts/target.json ... -Cllvm-args=... error: target file "./scripts/target.json" does not exist There are a few approaches we could take here to solve this. For instance, we could ensure that every time that the config is rebuilt, we regenerate the file and recompute the flags. Or we could use the LLVM version to check for these flags, instead of testing the flag (which may have other advantages, such as allowing us to detect renames on the LLVM side). However, it may be easier than that: `rustc` is aware of the `-Cllvm-args` regardless of the `--target` (e.g. I checked that the list printed is the same, plus that I can check for these flags even if I pass a completely unrelated target), and thus we can just eliminate the dependency completely. Thus filter out the target. This does mean that `rustc-option` cannot be used to test a flag that requires the right target, but we don't have other users yet, it is a minimal change and we want to get rid of custom targets in the future. We could only filter in the case `target.json` is used, to make it work in more cases, but then it would be harder to notice that it may not work in a couple architectures. Cc: Matthew Maurer Cc: Sami Tolvanen Cc: stable@vger.kernel.org Fixes: e3117404b411 ("kbuild: rust: Enable KASAN support") Tested-by: Alice Ryhl Link: https://lore.kernel.org/r/20250408220311.1033475-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- scripts/Makefile.compiler | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.compiler b/scripts/Makefile.compiler index 8956587b85470a..7ed7f92a7daa80 100644 --- a/scripts/Makefile.compiler +++ b/scripts/Makefile.compiler @@ -80,7 +80,7 @@ ld-option = $(call try-run, $(LD) $(KBUILD_LDFLAGS) $(1) -v,$(1),$(2),$(3)) # TODO: remove RUSTC_BOOTSTRAP=1 when we raise the minimum GNU Make version to 4.4 __rustc-option = $(call try-run,\ echo '#![allow(missing_docs)]#![feature(no_core)]#![no_core]' | RUSTC_BOOTSTRAP=1\ - $(1) --sysroot=/dev/null $(filter-out --sysroot=/dev/null,$(2)) $(3)\ + $(1) --sysroot=/dev/null $(filter-out --sysroot=/dev/null --target=%,$(2)) $(3)\ --crate-type=rlib --out-dir=$(TMPOUT) --emit=obj=- - >/dev/null,$(3),$(4)) # rustc-option From 1b4194053f6b30556272ff11750dd518e067ea49 Mon Sep 17 00:00:00 2001 From: "Bird, Tim" Date: Fri, 11 Apr 2025 19:20:18 +0000 Subject: [PATCH 0626/2924] block: add SPDX header line to blk-throttle.h Add an SPDX license identifier line to blk-throttle.h Use 'GPL-2.0' as the identifier, since blk-throttle.c uses that, and blk.h (from which some material was copied when blk-throttle.h was created) also uses that identifier. Signed-off-by: Tim Bird Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/MW5PR13MB5632EE4645BCA24ED111EC0EFDB62@MW5PR13MB5632.namprd13.prod.outlook.com Signed-off-by: Jens Axboe --- block/blk-throttle.h | 1 + 1 file changed, 1 insertion(+) diff --git a/block/blk-throttle.h b/block/blk-throttle.h index 7964cc041e0697..f9f8666891abc3 100644 --- a/block/blk-throttle.h +++ b/block/blk-throttle.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef BLK_THROTTLE_H #define BLK_THROTTLE_H From 40f2eb9b531475dd01b683fdaf61ca3cfd03a51e Mon Sep 17 00:00:00 2001 From: Zheng Qixing Date: Sat, 12 Apr 2025 17:25:54 +0800 Subject: [PATCH 0627/2924] block: fix resource leak in blk_register_queue() error path When registering a queue fails after blk_mq_sysfs_register() is successful but the function later encounters an error, we need to clean up the blk_mq_sysfs resources. Add the missing blk_mq_sysfs_unregister() call in the error path to properly clean up these resources and prevent a memory leak. Fixes: 320ae51feed5 ("blk-mq: new multi-queue block IO queueing mechanism") Signed-off-by: Zheng Qixing Reviewed-by: Christoph Hellwig Reviewed-by: Yu Kuai Link: https://lore.kernel.org/r/20250412092554.475218-1-zhengqixing@huaweicloud.com Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index a2882751f0d21c..1f9b45b0b9ee76 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -909,6 +909,8 @@ int blk_register_queue(struct gendisk *disk) out_debugfs_remove: blk_debugfs_remove(disk); mutex_unlock(&q->sysfs_lock); + if (queue_is_mq(q)) + blk_mq_sysfs_unregister(disk); out_put_queue_kobj: kobject_put(&disk->queue_kobj); return ret; From 2bd42b03ab6b04dde1753bd6b38eeca5c70f3941 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 20 Mar 2025 13:41:42 -0600 Subject: [PATCH 0628/2924] vfio/pci: Virtualize zero INTx PIN if no pdev->irq Typically pdev->irq is consistent with whether the device itself supports INTx, where device support is reported via the PIN register. Therefore the PIN register is often already zero if pdev->irq is zero. Recently virtualization of the PIN register was expanded to include the case where the device supports INTx but the platform does not route the interrupt. This is reported by a value of IRQ_NOTCONNECTED on some architectures. Other architectures just report zero for pdev->irq. We already disallow INTx setup if pdev->irq is zero, therefore add this to the PIN register virtualization criteria so that a consistent view is provided to userspace through virtualized config space and ioctls. Reported-by: Shivaprasad G Bhat Link: https://lore.kernel.org/all/174231895238.2295.12586708771396482526.stgit@linux.ibm.com/ Tested-by: Shivaprasad G Bhat Link: https://lore.kernel.org/r/20250320194145.2816379-1-alex.williamson@redhat.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 14437396d72118..8f02f236b5b4ba 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -1815,7 +1815,7 @@ int vfio_config_init(struct vfio_pci_core_device *vdev) } if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || vdev->nointx || - vdev->pdev->irq == IRQ_NOTCONNECTED) + !vdev->pdev->irq || vdev->pdev->irq == IRQ_NOTCONNECTED) vconfig[PCI_INTERRUPT_PIN] = 0; ret = vfio_cap_init(vdev); From a3cd5f507b72c0532c3345b6913557efab34f405 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sun, 13 Apr 2025 02:23:38 +0200 Subject: [PATCH 0629/2924] objtool/rust: add one more `noreturn` Rust function for Rust 1.86.0 Starting with Rust 1.86.0 (see upstream commit b151b513ba2b ("Insert null checks for pointer dereferences when debug assertions are enabled") [1]), under some kernel configurations with `CONFIG_RUST_DEBUG_ASSERTIONS=y`, one may trigger a new `objtool` warning: rust/kernel.o: warning: objtool: _R..._6kernel9workqueue6system() falls through to next function _R...9workqueue14system_highpri() due to a call to the `noreturn` symbol: core::panicking::panic_null_pointer_dereference Thus add it to the list so that `objtool` knows it is actually `noreturn`. See commit 56d680dd23c3 ("objtool/rust: list `noreturn` Rust functions") for more details. Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). Fixes: 56d680dd23c3 ("objtool/rust: list `noreturn` Rust functions") Link: https://github.com/rust-lang/rust/commit/b151b513ba2b65c7506ec1a80f2712bbd09154d1 [1] Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250413002338.1741593-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- tools/objtool/check.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 4a1f6c3169b3b6..67006eeb30c887 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -225,6 +225,7 @@ static bool is_rust_noreturn(const struct symbol *func) str_ends_with(func->name, "_4core9panicking14panic_nounwind") || str_ends_with(func->name, "_4core9panicking18panic_bounds_check") || str_ends_with(func->name, "_4core9panicking19assert_failed_inner") || + str_ends_with(func->name, "_4core9panicking30panic_null_pointer_dereference") || str_ends_with(func->name, "_4core9panicking36panic_misaligned_pointer_dereference") || strstr(func->name, "_4core9panicking13assert_failed") || strstr(func->name, "_4core9panicking11panic_const24panic_const_") || From 18605e9525ef4617582f73a2712fe9bc7c12149c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 14 Apr 2025 17:33:25 +0200 Subject: [PATCH 0630/2924] cpufreq: intel_pstate: Fix hwp_get_cpu_scaling() Commit b52aaeeadfac ("cpufreq: intel_pstate: Avoid SMP calls to get cpu-type") introduced two issues into hwp_get_cpu_scaling(). First, it made that function use the CPU type of the CPU running the code even though the target CPU is passed as the argument to it and second, it used smp_processor_id() for that even though hwp_get_cpu_scaling() runs in preemptible context. Fix both of these problems by simply passing "cpu" to cpu_data(). Fixes: b52aaeeadfac ("cpufreq: intel_pstate: Avoid SMP calls to get cpu-type") Link: https://lore.kernel.org/linux-pm/20250412103434.5321-1-xry111@xry111.site/ Reported-by: Xi Ruoyao Acked-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/12659608.O9o76ZdvQC@rjwysocki.net --- drivers/cpufreq/intel_pstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 4aad79d26c64f5..f41ed0b9e61015 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2209,7 +2209,7 @@ static int knl_get_turbo_pstate(int cpu) static int hwp_get_cpu_scaling(int cpu) { if (hybrid_scaling_factor) { - struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); + struct cpuinfo_x86 *c = &cpu_data(cpu); u8 cpu_type = c->topo.intel_type; /* From b26c1a85f3fc3cc749380ff94199377fc2d0c203 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 7 Apr 2025 10:58:03 +0200 Subject: [PATCH 0631/2924] kunit: qemu_configs: SH: Respect kunit cmdline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The default SH kunit configuration sets CONFIG_CMDLINE_OVERWRITE which completely disregards the cmdline passed from the bootloader/QEMU in favor of the builtin CONFIG_CMDLINE. However the kunit tool needs to pass arguments to the in-kernel kunit core, for filters and other runtime parameters. Enable CONFIG_CMDLINE_EXTEND instead, so kunit arguments are respected. Link: https://lore.kernel.org/r/20250407-kunit-sh-v1-1-f5432a54cf2f@linutronix.de Fixes: 8110a3cab05e ("kunit: tool: Add support for SH under QEMU") Signed-off-by: Thomas Weißschuh Reviewed-by: David Gow Signed-off-by: Shuah Khan --- tools/testing/kunit/qemu_configs/sh.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/kunit/qemu_configs/sh.py b/tools/testing/kunit/qemu_configs/sh.py index 78a474a5b95f3a..f00cb89fdef6aa 100644 --- a/tools/testing/kunit/qemu_configs/sh.py +++ b/tools/testing/kunit/qemu_configs/sh.py @@ -7,7 +7,9 @@ CONFIG_MEMORY_START=0x0c000000 CONFIG_SH_RTS7751R2D=y CONFIG_RTS7751R2D_PLUS=y -CONFIG_SERIAL_SH_SCI=y''', +CONFIG_SERIAL_SH_SCI=y +CONFIG_CMDLINE_EXTEND=y +''', qemu_arch='sh4', kernel_path='arch/sh/boot/zImage', kernel_command_line='console=ttySC1', From b424bb88afb6719b30340f059bf50953424cdd9d Mon Sep 17 00:00:00 2001 From: Mathieu Dubois-Briand Date: Fri, 11 Apr 2025 16:52:09 +0200 Subject: [PATCH 0632/2924] gpiolib: Allow to use setters with return value for output-only gpios The gpiod_direction_output_raw_commit() function checks if any setter callback is present before doing anything. As the new GPIO setters with return values were introduced, make this check also succeed if one is present. Fixes: 98ce1eb1fd87 ("gpiolib: introduce gpio_chip setters that return values") Signed-off-by: Mathieu Dubois-Briand Link: https://lore.kernel.org/r/20250411-mdb-gpiolib-setters-fix-v2-1-9611280d8822@bootlin.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index b8197502a5ac59..cd4fecbb41f292 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2879,7 +2879,7 @@ static int gpiod_direction_output_raw_commit(struct gpio_desc *desc, int value) * output-only, but if there is then not even a .set() operation it * is pretty tricky to drive the output line. */ - if (!guard.gc->set && !guard.gc->direction_output) { + if (!guard.gc->set && !guard.gc->set_rv && !guard.gc->direction_output) { gpiod_warn(desc, "%s: missing set() and direction_output() operations\n", __func__); From f0433eea468810aebd61d0b9d095e9acd6bea2ed Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 12 Apr 2025 16:30:11 -0700 Subject: [PATCH 0633/2924] net: don't mix device locking in dev_close_many() calls Lockdep found the following dependency: &dev_instance_lock_key#3 --> &rdev->wiphy.mtx --> &net->xdp.lock --> &xs->mutex --> &dev_instance_lock_key#3 The first dependency is the problem. wiphy mutex should be outside the instance locks. The problem happens in notifiers (as always) for CLOSE. We only hold the instance lock for ops locked devices during CLOSE, and WiFi netdevs are not ops locked. Unfortunately, when we dev_close_many() during netns dismantle we may be holding the instance lock of _another_ netdev when issuing a CLOSE for a WiFi device. Lockdep's "Possible unsafe locking scenario" only prints 3 locks and we have 4, plus I think we'd need 3 CPUs, like this: CPU0 CPU1 CPU2 ---- ---- ---- lock(&xs->mutex); lock(&dev_instance_lock_key#3); lock(&rdev->wiphy.mtx); lock(&net->xdp.lock); lock(&xs->mutex); lock(&rdev->wiphy.mtx); lock(&dev_instance_lock_key#3); Tho, I don't think that's possible as CPU1 and CPU2 would be under rtnl_lock. Even if we have per-netns rtnl_lock and wiphy can span network namespaces - CPU0 and CPU1 must be in the same netns to see dev_instance_lock, so CPU0 can't be installing a socket as CPU1 is tearing the netns down. Regardless, our expected lock ordering is that wiphy lock is taken before instance locks, so let's fix this. Go over the ops locked and non-locked devices separately. Note that calling dev_close_many() on an empty list is perfectly fine. All processing (including RCU syncs) are conditional on the list not being empty, already. Fixes: 7e4d784f5810 ("net: hold netdev instance lock during rtnetlink operations") Reported-by: syzbot+6f588c78bf765b62b450@syzkaller.appspotmail.com Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250412233011.309762-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/dev.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 75e104322ad527..5fcbc66d865e2f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11932,15 +11932,24 @@ void unregister_netdevice_many_notify(struct list_head *head, BUG_ON(dev->reg_state != NETREG_REGISTERED); } - /* If device is running, close it first. */ + /* If device is running, close it first. Start with ops locked... */ list_for_each_entry(dev, head, unreg_list) { - list_add_tail(&dev->close_list, &close_head); - netdev_lock_ops(dev); + if (netdev_need_ops_lock(dev)) { + list_add_tail(&dev->close_list, &close_head); + netdev_lock(dev); + } + } + dev_close_many(&close_head, true); + /* ... now unlock them and go over the rest. */ + list_for_each_entry(dev, head, unreg_list) { + if (netdev_need_ops_lock(dev)) + netdev_unlock(dev); + else + list_add_tail(&dev->close_list, &close_head); } dev_close_many(&close_head, true); list_for_each_entry(dev, head, unreg_list) { - netdev_unlock_ops(dev); /* And unlink it from device chain. */ unlist_netdevice(dev); netdev_lock(dev); From 747fb8413aaa36e4c988d45c4fe20d4c2b0778cd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 9 Apr 2025 07:55:41 -0700 Subject: [PATCH 0634/2924] netlink: specs: ovs_vport: align with C codegen capabilities We started generating C code for OvS a while back, but actually C codegen only supports fixed headers specified at the family level right now (schema also allows specifying them per op). ovs_flow and ovs_datapath already specify the fixed header at the family level but ovs_vport does it per op. Move the property, all ops use the same header. This ensures YNL C sees the correct hdr_len: const struct ynl_family ynl_ovs_vport_family = { .name = "ovs_vport", - .hdr_len = sizeof(struct genlmsghdr), + .hdr_len = sizeof(struct genlmsghdr) + sizeof(struct ovs_header), }; Fixes: 7c59c9c8f202 ("tools: ynl: generate code for ovs families") Link: https://patch.msgid.link/20250409145541.580674-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ovs_vport.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Documentation/netlink/specs/ovs_vport.yaml b/Documentation/netlink/specs/ovs_vport.yaml index 86ba9ac2a52103..b538bb99ee9b5f 100644 --- a/Documentation/netlink/specs/ovs_vport.yaml +++ b/Documentation/netlink/specs/ovs_vport.yaml @@ -123,12 +123,12 @@ attribute-sets: operations: name-prefix: ovs-vport-cmd- + fixed-header: ovs-header list: - name: new doc: Create a new OVS vport attribute-set: vport - fixed-header: ovs-header do: request: attributes: @@ -141,7 +141,6 @@ operations: name: del doc: Delete existing OVS vport from a data path attribute-set: vport - fixed-header: ovs-header do: request: attributes: @@ -152,7 +151,6 @@ operations: name: get doc: Get / dump OVS vport configuration and state attribute-set: vport - fixed-header: ovs-header do: &vport-get-op request: attributes: From a727a83ef22591d47e2d219cd8e01bd3616f4611 Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Wed, 9 Apr 2025 10:24:52 +0200 Subject: [PATCH 0635/2924] MAINTAINERS: update HUGETLB reviewers I have done quite some review on hugetlb code over the years, and some work on it as well, the latest being the hugetlb pagewalk unification which is a work in progress, and touches hugetlb code to great lengths. HugeTLB does not have many reviewers, so I would like to help out by offering myself as an official Reviewer. Signed-off-by: Oscar Salvador Link: https://lkml.kernel.org/r/20250409082452.269180-1-osalvador@suse.de Acked-by: David Hildenbrand Acked-by: Vlastimil Babka Acked-by: Muchun Song Cc: Michal Hocko Cc: Peter Xu Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index de97cd54ff241b..56ffc830e5f237 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10954,6 +10954,7 @@ F: include/linux/platform_data/huawei-gaokun-ec.h HUGETLB SUBSYSTEM M: Muchun Song +R: Oscar Salvador L: linux-mm@kvack.org S: Maintained F: Documentation/ABI/testing/sysfs-kernel-mm-hugepages From 65d91192aa66f05710cfddf6a14b5a25ee554dba Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Sat, 12 Apr 2025 12:40:18 +0200 Subject: [PATCH 0636/2924] net: openvswitch: fix nested key length validation in the set() action It's not safe to access nla_len(ovs_key) if the data is smaller than the netlink header. Check that the attribute is OK first. Fixes: ccb1352e76cf ("net: Add Open vSwitch kernel components.") Reported-by: syzbot+b07a9da40df1576b8048@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=b07a9da40df1576b8048 Tested-by: syzbot+b07a9da40df1576b8048@syzkaller.appspotmail.com Signed-off-by: Ilya Maximets Reviewed-by: Eelco Chaudron Acked-by: Aaron Conole Link: https://patch.msgid.link/20250412104052.2073688-1-i.maximets@ovn.org Signed-off-by: Jakub Kicinski --- net/openvswitch/flow_netlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 95e0dd14dc1a32..518be23e48ea93 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2876,7 +2876,8 @@ static int validate_set(const struct nlattr *a, size_t key_len; /* There can be only one key in a action */ - if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) + if (!nla_ok(ovs_key, nla_len(a)) || + nla_total_size(nla_len(ovs_key)) != nla_len(a)) return -EINVAL; key_len = nla_len(ovs_key); From 1e440d5b25b7efccb3defe542a73c51005799a5f Mon Sep 17 00:00:00 2001 From: Sean Heelan Date: Mon, 7 Apr 2025 11:26:50 +0000 Subject: [PATCH 0637/2924] ksmbd: Fix dangling pointer in krb_authenticate krb_authenticate frees sess->user and does not set the pointer to NULL. It calls ksmbd_krb5_authenticate to reinitialise sess->user but that function may return without doing so. If that happens then smb2_sess_setup, which calls krb_authenticate, will be accessing free'd memory when it later uses sess->user. Cc: stable@vger.kernel.org Signed-off-by: Sean Heelan Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index d24d95d15d876b..57839f9708bb6c 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1602,8 +1602,10 @@ static int krb5_authenticate(struct ksmbd_work *work, if (prev_sess_id && prev_sess_id != sess->id) destroy_previous_session(conn, sess->user, prev_sess_id); - if (sess->state == SMB2_SESSION_VALID) + if (sess->state == SMB2_SESSION_VALID) { ksmbd_free_user(sess->user); + sess->user = NULL; + } retval = ksmbd_krb5_authenticate(sess, in_blob, in_len, out_blob, &out_len); From 1df0d4c616138784e033ad337961b6e1a6bcd999 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 15 Apr 2025 09:26:10 +0900 Subject: [PATCH 0638/2924] ksmbd: fix WARNING "do not call blocking ops when !TASK_RUNNING" wait_event_timeout() will set the state of the current task to TASK_UNINTERRUPTIBLE, before doing the condition check. This means that ksmbd_durable_scavenger_alive() will try to acquire the mutex while already in a sleeping state. The scheduler warns us by giving the following warning: do not call blocking ops when !TASK_RUNNING; state=2 set at [<0000000061515a6f>] prepare_to_wait_event+0x9f/0x6c0 WARNING: CPU: 2 PID: 4147 at kernel/sched/core.c:10099 __might_sleep+0x12f/0x160 mutex lock is not needed in ksmbd_durable_scavenger_alive(). Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/vfs_cache.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index 8d1f30dcba7e8e..1f8fa3468173ab 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -713,12 +713,8 @@ static bool tree_conn_fd_check(struct ksmbd_tree_connect *tcon, static bool ksmbd_durable_scavenger_alive(void) { - mutex_lock(&durable_scavenger_lock); - if (!durable_scavenger_running) { - mutex_unlock(&durable_scavenger_lock); + if (!durable_scavenger_running) return false; - } - mutex_unlock(&durable_scavenger_lock); if (kthread_should_stop()) return false; @@ -799,9 +795,7 @@ static int ksmbd_durable_scavenger(void *dummy) break; } - mutex_lock(&durable_scavenger_lock); durable_scavenger_running = false; - mutex_unlock(&durable_scavenger_lock); module_put(THIS_MODULE); From 21a4e47578d44c6b37c4fc4aba8ed7cc8dbb13de Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 11 Apr 2025 15:19:46 +0900 Subject: [PATCH 0639/2924] ksmbd: fix use-after-free in __smb2_lease_break_noti() Move tcp_transport free to ksmbd_conn_free. If ksmbd connection is referenced when ksmbd server thread terminates, It will not be freed, but conn->tcp_transport is freed. __smb2_lease_break_noti can be performed asynchronously when the connection is disconnected. __smb2_lease_break_noti calls ksmbd_conn_write, which can cause use-after-free when conn->ksmbd_transport is already freed. Cc: stable@vger.kernel.org Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/connection.c | 4 +++- fs/smb/server/transport_tcp.c | 14 +++++++++----- fs/smb/server/transport_tcp.h | 1 + 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index c1f22c12911179..83764c230e9d4c 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -39,8 +39,10 @@ void ksmbd_conn_free(struct ksmbd_conn *conn) xa_destroy(&conn->sessions); kvfree(conn->request_buf); kfree(conn->preauth_info); - if (atomic_dec_and_test(&conn->refcnt)) + if (atomic_dec_and_test(&conn->refcnt)) { + ksmbd_free_transport(conn->transport); kfree(conn); + } } /** diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c index 7f38a3c3f5bd69..abedf510899a74 100644 --- a/fs/smb/server/transport_tcp.c +++ b/fs/smb/server/transport_tcp.c @@ -93,17 +93,21 @@ static struct tcp_transport *alloc_transport(struct socket *client_sk) return t; } -static void free_transport(struct tcp_transport *t) +void ksmbd_free_transport(struct ksmbd_transport *kt) { - kernel_sock_shutdown(t->sock, SHUT_RDWR); - sock_release(t->sock); - t->sock = NULL; + struct tcp_transport *t = TCP_TRANS(kt); - ksmbd_conn_free(KSMBD_TRANS(t)->conn); + sock_release(t->sock); kfree(t->iov); kfree(t); } +static void free_transport(struct tcp_transport *t) +{ + kernel_sock_shutdown(t->sock, SHUT_RDWR); + ksmbd_conn_free(KSMBD_TRANS(t)->conn); +} + /** * kvec_array_init() - initialize a IO vector segment * @new: IO vector to be initialized diff --git a/fs/smb/server/transport_tcp.h b/fs/smb/server/transport_tcp.h index 8c9aa624cfe3ca..1e51675ee1b209 100644 --- a/fs/smb/server/transport_tcp.h +++ b/fs/smb/server/transport_tcp.h @@ -8,6 +8,7 @@ int ksmbd_tcp_set_interfaces(char *ifc_list, int ifc_list_sz); struct interface *ksmbd_find_netdev_name_iface_list(char *netdev_name); +void ksmbd_free_transport(struct ksmbd_transport *kt); int ksmbd_tcp_init(void); void ksmbd_tcp_destroy(void); From 18b4fac5ef17f77fed9417d22210ceafd6525fc7 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 15 Apr 2025 09:30:21 +0900 Subject: [PATCH 0640/2924] ksmbd: fix use-after-free in smb_break_all_levII_oplock() There is a room in smb_break_all_levII_oplock that can cause racy issues when unlocking in the middle of the loop. This patch use read lock to protect whole loop. Cc: stable@vger.kernel.org Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/oplock.c | 29 +++++++++-------------------- fs/smb/server/oplock.h | 1 - 2 files changed, 9 insertions(+), 21 deletions(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index f103b1bd040040..81a29857b1e32f 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -129,14 +129,6 @@ static void free_opinfo(struct oplock_info *opinfo) kfree(opinfo); } -static inline void opinfo_free_rcu(struct rcu_head *rcu_head) -{ - struct oplock_info *opinfo; - - opinfo = container_of(rcu_head, struct oplock_info, rcu_head); - free_opinfo(opinfo); -} - struct oplock_info *opinfo_get(struct ksmbd_file *fp) { struct oplock_info *opinfo; @@ -157,8 +149,8 @@ static struct oplock_info *opinfo_get_list(struct ksmbd_inode *ci) if (list_empty(&ci->m_op_list)) return NULL; - rcu_read_lock(); - opinfo = list_first_or_null_rcu(&ci->m_op_list, struct oplock_info, + down_read(&ci->m_lock); + opinfo = list_first_entry(&ci->m_op_list, struct oplock_info, op_entry); if (opinfo) { if (opinfo->conn == NULL || @@ -171,8 +163,7 @@ static struct oplock_info *opinfo_get_list(struct ksmbd_inode *ci) } } } - - rcu_read_unlock(); + up_read(&ci->m_lock); return opinfo; } @@ -185,7 +176,7 @@ void opinfo_put(struct oplock_info *opinfo) if (!atomic_dec_and_test(&opinfo->refcount)) return; - call_rcu(&opinfo->rcu_head, opinfo_free_rcu); + free_opinfo(opinfo); } static void opinfo_add(struct oplock_info *opinfo) @@ -193,7 +184,7 @@ static void opinfo_add(struct oplock_info *opinfo) struct ksmbd_inode *ci = opinfo->o_fp->f_ci; down_write(&ci->m_lock); - list_add_rcu(&opinfo->op_entry, &ci->m_op_list); + list_add(&opinfo->op_entry, &ci->m_op_list); up_write(&ci->m_lock); } @@ -207,7 +198,7 @@ static void opinfo_del(struct oplock_info *opinfo) write_unlock(&lease_list_lock); } down_write(&ci->m_lock); - list_del_rcu(&opinfo->op_entry); + list_del(&opinfo->op_entry); up_write(&ci->m_lock); } @@ -1347,8 +1338,8 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp, ci = fp->f_ci; op = opinfo_get(fp); - rcu_read_lock(); - list_for_each_entry_rcu(brk_op, &ci->m_op_list, op_entry) { + down_read(&ci->m_lock); + list_for_each_entry(brk_op, &ci->m_op_list, op_entry) { if (brk_op->conn == NULL) continue; @@ -1358,7 +1349,6 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp, if (ksmbd_conn_releasing(brk_op->conn)) continue; - rcu_read_unlock(); if (brk_op->is_lease && (brk_op->o_lease->state & (~(SMB2_LEASE_READ_CACHING_LE | SMB2_LEASE_HANDLE_CACHING_LE)))) { @@ -1388,9 +1378,8 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp, oplock_break(brk_op, SMB2_OPLOCK_LEVEL_NONE, NULL); next: opinfo_put(brk_op); - rcu_read_lock(); } - rcu_read_unlock(); + up_read(&ci->m_lock); if (op) opinfo_put(op); diff --git a/fs/smb/server/oplock.h b/fs/smb/server/oplock.h index 3f64f07872638e..9a56eaadd0dd8f 100644 --- a/fs/smb/server/oplock.h +++ b/fs/smb/server/oplock.h @@ -71,7 +71,6 @@ struct oplock_info { struct list_head lease_entry; wait_queue_head_t oplock_q; /* Other server threads */ wait_queue_head_t oplock_brk; /* oplock breaking wait */ - struct rcu_head rcu_head; }; struct lease_break_info { From b37f2f332b40ad1c27f18682a495850f2f04db0a Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 15 Apr 2025 09:31:08 +0900 Subject: [PATCH 0641/2924] ksmbd: fix the warning from __kernel_write_iter [ 2110.972290] ------------[ cut here ]------------ [ 2110.972301] WARNING: CPU: 3 PID: 735 at fs/read_write.c:599 __kernel_write_iter+0x21b/0x280 This patch doesn't allow writing to directory. Cc: stable@vger.kernel.org Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/vfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 8554aa5a1059fd..391d07da586c73 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -479,7 +479,8 @@ int ksmbd_vfs_write(struct ksmbd_work *work, struct ksmbd_file *fp, int err = 0; if (work->conn->connection_type) { - if (!(fp->daccess & (FILE_WRITE_DATA_LE | FILE_APPEND_DATA_LE))) { + if (!(fp->daccess & (FILE_WRITE_DATA_LE | FILE_APPEND_DATA_LE)) || + S_ISDIR(file_inode(fp->filp)->i_mode)) { pr_err("no right to write(%pD)\n", fp->filp); err = -EACCES; goto out; From a93ff742820f75bf8bb3fcf21d9f25ca6eb3d4c6 Mon Sep 17 00:00:00 2001 From: Denis Arefev Date: Wed, 9 Apr 2025 12:04:49 +0300 Subject: [PATCH 0642/2924] ksmbd: Prevent integer overflow in calculation of deadtime The user can set any value for 'deadtime'. This affects the arithmetic expression 'req->deadtime * SMB_ECHO_INTERVAL', which is subject to overflow. The added check makes the server behavior more predictable. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers") Cc: stable@vger.kernel.org Signed-off-by: Denis Arefev Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_ipc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c index 3f185ae60dc514..2a3e2b0ce5570a 100644 --- a/fs/smb/server/transport_ipc.c +++ b/fs/smb/server/transport_ipc.c @@ -310,7 +310,11 @@ static int ipc_server_config_on_startup(struct ksmbd_startup_request *req) server_conf.signing = req->signing; server_conf.tcp_port = req->tcp_port; server_conf.ipc_timeout = req->ipc_timeout * HZ; - server_conf.deadtime = req->deadtime * SMB_ECHO_INTERVAL; + if (check_mul_overflow(req->deadtime, SMB_ECHO_INTERVAL, + &server_conf.deadtime)) { + ret = -EINVAL; + goto out; + } server_conf.share_fake_fscaps = req->share_fake_fscaps; ksmbd_init_domain(req->sub_auth); @@ -337,6 +341,7 @@ static int ipc_server_config_on_startup(struct ksmbd_startup_request *req) server_conf.bind_interfaces_only = req->bind_interfaces_only; ret |= ksmbd_tcp_set_interfaces(KSMBD_STARTUP_CONFIG_INTERFACES(req), req->ifc_list_sz); +out: if (ret) { pr_err("Server configuration error: %s %s %s\n", req->netbios_name, req->server_string, From 29bdc1f1c1df80868fb35bc69d1f073183adc6de Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Mon, 10 Mar 2025 07:44:09 -0500 Subject: [PATCH 0643/2924] book3s64/radix: Fix compile errors when CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP=n Fix compile errors when CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP=n Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Donet Tom Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/8231763344223c193e3452eab0ae8ea966aff466.1741609795.git.donettom@linux.ibm.com --- arch/powerpc/mm/book3s64/radix_pgtable.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 311e2112d782ea..bd6916419472ce 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -976,7 +976,7 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start, return 0; } - +#ifdef CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap) { if (radix_enabled()) @@ -984,6 +984,7 @@ bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap) return false; } +#endif int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node, unsigned long addr, unsigned long next) From 9cf7e13fecbab0894f6986fc6986ab2eba8de52e Mon Sep 17 00:00:00 2001 From: Donet Tom Date: Mon, 10 Mar 2025 07:44:10 -0500 Subject: [PATCH 0644/2924] book3s64/radix : Align section vmemmap start address to PAGE_SIZE A vmemmap altmap is a device-provided region used to provide backing storage for struct pages. For each namespace, the altmap should belong to that same namespace. If the namespaces are created unaligned, there is a chance that the section vmemmap start address could also be unaligned. If the section vmemmap start address is unaligned, the altmap page allocated from the current namespace might be used by the previous namespace also. During the free operation, since the altmap is shared between two namespaces, the previous namespace may detect that the page does not belong to its altmap and incorrectly assume that the page is a normal page. It then attempts to free the normal page, which leads to a kernel crash. Kernel attempted to read user page (18) - exploit attempt? (uid: 0) BUG: Kernel NULL pointer dereference on read at 0x00000018 Faulting instruction address: 0xc000000000530c7c Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries CPU: 32 PID: 2104 Comm: ndctl Kdump: loaded Tainted: G W NIP: c000000000530c7c LR: c000000000530e00 CTR: 0000000000007ffe REGS: c000000015e57040 TRAP: 0300 Tainted: G W MSR: 800000000280b033 CR: 84482404 CFAR: c000000000530dfc DAR: 0000000000000018 DSISR: 40000000 IRQMASK: 0 GPR00: c000000000530e00 c000000015e572e0 c000000002c5cb00 c00c000101008040 GPR04: 0000000000000000 0000000000000007 0000000000000001 000000000000001f GPR08: 0000000000000005 0000000000000000 0000000000000018 0000000000002000 GPR12: c0000000001d2fb0 c0000060de6b0080 0000000000000000 c0000060dbf90020 GPR16: c00c000101008000 0000000000000001 0000000000000000 c000000125b20f00 GPR20: 0000000000000001 0000000000000000 ffffffffffffffff c00c000101007fff GPR24: 0000000000000001 0000000000000000 0000000000000000 0000000000000000 GPR28: 0000000004040201 0000000000000001 0000000000000000 c00c000101008040 NIP [c000000000530c7c] get_pfnblock_flags_mask+0x7c/0xd0 LR [c000000000530e00] free_unref_page_prepare+0x130/0x4f0 Call Trace: free_unref_page+0x50/0x1e0 free_reserved_page+0x40/0x68 free_vmemmap_pages+0x98/0xe0 remove_pte_table+0x164/0x1e8 remove_pmd_table+0x204/0x2c8 remove_pud_table+0x1c4/0x288 remove_pagetable+0x1c8/0x310 vmemmap_free+0x24/0x50 section_deactivate+0x28c/0x2a0 __remove_pages+0x84/0x110 arch_remove_memory+0x38/0x60 memunmap_pages+0x18c/0x3d0 devm_action_release+0x30/0x50 release_nodes+0x68/0x140 devres_release_group+0x100/0x190 dax_pmem_compat_release+0x44/0x80 [dax_pmem_compat] device_for_each_child+0x8c/0x100 [dax_pmem_compat_remove+0x2c/0x50 [dax_pmem_compat] nvdimm_bus_remove+0x78/0x140 [libnvdimm] device_remove+0x70/0xd0 Another issue is that if there is no altmap, a PMD-sized vmemmap page will be allocated from RAM, regardless of the alignment of the section start address. If the section start address is not aligned to the PMD size, a VM_BUG_ON will be triggered when setting the PMD-sized page to page table. In this patch, we are aligning the section vmemmap start address to PAGE_SIZE. After alignment, the start address will not be part of the current namespace, and a normal page will be allocated for the vmemmap mapping of the current section. For the remaining sections, altmaps will be allocated. During the free operation, the normal page will be correctly freed. In the same way, a PMD_SIZE vmemmap page will be allocated only if the section start address is PMD_SIZE-aligned; otherwise, it will fall back to a PAGE-sized vmemmap allocation. Without this patch ================== NS1 start NS2 start _________________________________________________________ | NS1 | NS2 | --------------------------------------------------------- | Altmap| Altmap | .....|Altmap| Altmap | ........... | NS1 | NS1 | | NS2 | NS2 | In the above scenario, NS1 and NS2 are two namespaces. The vmemmap for NS1 comes from Altmap NS1, which belongs to NS1, and the vmemmap for NS2 comes from Altmap NS2, which belongs to NS2. The vmemmap start for NS2 is not aligned, so Altmap NS2 is shared by both NS1 and NS2. During the free operation in NS1, Altmap NS2 is not part of NS1's altmap, causing it to attempt to free an invalid page. With this patch =============== NS1 start NS2 start _________________________________________________________ | NS1 | NS2 | --------------------------------------------------------- | Altmap| Altmap | .....| Normal | Altmap | Altmap |....... | NS1 | NS1 | | Page | NS2 | NS2 | If the vmemmap start for NS2 is not aligned then we are allocating a normal page. NS1 and NS2 vmemmap will be freed correctly. Fixes: 368a0590d954 ("powerpc/book3s64/vmemmap: switch radix to use a different vmemmap handling function") Co-developed-by: Ritesh Harjani (IBM) Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Donet Tom Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/8f98ec2b442977c618f7256cec88eb17dde3f2b9.1741609795.git.donettom@linux.ibm.com --- arch/powerpc/mm/book3s64/radix_pgtable.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index bd6916419472ce..9f764bc42b8cc8 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -1121,6 +1121,19 @@ int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end, in pmd_t *pmd; pte_t *pte; + /* + * Make sure we align the start vmemmap addr so that we calculate + * the correct start_pfn in altmap boundary check to decided whether + * we should use altmap or RAM based backing memory allocation. Also + * the address need to be aligned for set_pte operation. + + * If the start addr is already PMD_SIZE aligned we will try to use + * a pmd mapping. We don't want to be too aggressive here beacause + * that will cause more allocations in RAM. So only if the namespace + * vmemmap start addr is PMD_SIZE aligned we will use PMD mapping. + */ + + start = ALIGN_DOWN(start, PAGE_SIZE); for (addr = start; addr < end; addr = next) { next = pmd_addr_end(addr, end); @@ -1146,8 +1159,8 @@ int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end, in * in altmap block allocation failures, in which case * we fallback to RAM for vmemmap allocation. */ - if (altmap && (!IS_ALIGNED(addr, PMD_SIZE) || - altmap_cross_boundary(altmap, addr, PMD_SIZE))) { + if (!IS_ALIGNED(addr, PMD_SIZE) || (altmap && + altmap_cross_boundary(altmap, addr, PMD_SIZE))) { /* * make sure we don't create altmap mappings * covering things outside the device. From 534f5a8ba27863141e29766467a3e1f61bcb47ac Mon Sep 17 00:00:00 2001 From: Anthony Iliopoulos Date: Wed, 5 Feb 2025 00:18:21 +0100 Subject: [PATCH 0645/2924] powerpc64/ftrace: fix module loading without patchable function entries get_stubs_size assumes that there must always be at least one patchable function entry, which is not always the case (modules that export data but no code), otherwise it returns -ENOEXEC and thus the section header sh_size is set to that value. During module_memory_alloc() the size is passed to execmem_alloc() after being page-aligned and thus set to zero which will cause it to fail the allocation (and thus module loading) as __vmalloc_node_range() checks for zero-sized allocs and returns null: [ 115.466896] module_64: cast_common: doesn't contain __patchable_function_entries. [ 115.469189] ------------[ cut here ]------------ [ 115.469496] WARNING: CPU: 0 PID: 274 at mm/vmalloc.c:3778 __vmalloc_node_range_noprof+0x8b4/0x8f0 ... [ 115.478574] ---[ end trace 0000000000000000 ]--- [ 115.479545] execmem: unable to allocate memory Fix this by removing the check completely, since it is anyway not helpful to propagate this as an error upwards. Fixes: eec37961a56a ("powerpc64/ftrace: Move ftrace sequence out of line") Signed-off-by: Anthony Iliopoulos Acked-by: Naveen N Rao (AMD) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250204231821.39140-1-ailiop@suse.com --- arch/powerpc/kernel/module_64.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 34a5aec4908fba..126bf3b06ab7e2 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -258,10 +258,6 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, break; } } - if (i == hdr->e_shnum) { - pr_err("%s: doesn't contain __patchable_function_entries.\n", me->name); - return -ENOEXEC; - } #endif pr_debug("Looks like a total of %lu stubs, max\n", relocs); From 3700976f2ae8dfec4c17433f8a16c9e6c334cf89 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Mon, 7 Apr 2025 14:10:29 +0530 Subject: [PATCH 0646/2924] powerpc: Add check to select PPC_RADIX_BROADCAST_TLBIE Commit 3d45a3d0d2e6 ("powerpc: Define config option for processors with broadcast TLBIE") added a config option PPC_RADIX_BROADCAST_TLBIE to support processors with broadcast TLBIE. Since this option is relevant only for RADIX_MMU, add a check as a dependency to enable PPC_RADIX_BROADCAST_TLBIE in both powernv and pseries configs. This fixes the unmet config dependency warning reported WARNING: unmet direct dependencies detected for PPC_RADIX_BROADCAST_TLBIE Depends on [n]: PPC_RADIX_MMU [=n] Selected by [y]: - PPC_PSERIES [=y] && PPC64 [=y] && PPC_BOOK3S [=y] Reported-by: kernel test robot Tested-by: Venkat Rao Bagalkote Reviewed-by: Ritesh Harjani (IBM) Closes: https://lore.kernel.org/oe-kbuild-all/202504051857.jRqxM60c-lkp@intel.com/ Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250407084029.357710-1-maddy@linux.ibm.com --- arch/powerpc/platforms/powernv/Kconfig | 2 +- arch/powerpc/platforms/pseries/Kconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig index 3fbe0295ce1418..95d7ba73d43d0d 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -17,7 +17,7 @@ config PPC_POWERNV select MMU_NOTIFIER select FORCE_SMP select ARCH_SUPPORTS_PER_VMA_LOCK - select PPC_RADIX_BROADCAST_TLBIE + select PPC_RADIX_BROADCAST_TLBIE if PPC_RADIX_MMU default y config OPAL_PRD diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index a934c2a262f66a..fa3c2fff082a87 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -23,7 +23,7 @@ config PPC_PSERIES select FORCE_SMP select SWIOTLB select ARCH_SUPPORTS_PER_VMA_LOCK - select PPC_RADIX_BROADCAST_TLBIE + select PPC_RADIX_BROADCAST_TLBIE if PPC_RADIX_MMU default y config PARAVIRT From cf761e3dacc6ad5f65a4886d00da1f9681e6805a Mon Sep 17 00:00:00 2001 From: Jonathan Currier Date: Sun, 17 Nov 2024 17:48:42 -0600 Subject: [PATCH 0647/2924] PCI/MSI: Add an option to write MSIX ENTRY_DATA before any reads Commit 7d5ec3d36123 ("PCI/MSI: Mask all unused MSI-X entries") introduced a readl() from ENTRY_VECTOR_CTRL before the writel() to ENTRY_DATA. This is correct, however some hardware, like the Sun Neptune chips, the NIU module, will cause an error and/or fatal trap if any MSIX table entry is read before the corresponding ENTRY_DATA field is written to. Add an optional early writel() in msix_prepare_msi_desc(). Fixes: 7d5ec3d36123 ("PCI/MSI: Mask all unused MSI-X entries") Signed-off-by: Jonathan Currier Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20241117234843.19236-2-dullfire@yahoo.com --- drivers/pci/msi/msi.c | 3 +++ include/linux/pci.h | 2 ++ 2 files changed, 5 insertions(+) diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index 6569ba3577fe63..8b884878861842 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -615,6 +615,9 @@ void msix_prepare_msi_desc(struct pci_dev *dev, struct msi_desc *desc) void __iomem *addr = pci_msix_desc_addr(desc); desc->pci.msi_attrib.can_mask = 1; + /* Workaround for SUN NIU insanity, which requires write before read */ + if (dev->dev_flags & PCI_DEV_FLAGS_MSIX_TOUCH_ENTRY_DATA_FIRST) + writel(0, addr + PCI_MSIX_ENTRY_DATA); desc->pci.msix_ctrl = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL); } } diff --git a/include/linux/pci.h b/include/linux/pci.h index 0e8e3fd77e9671..51e2bd6405cda5 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -245,6 +245,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 11), /* Device does honor MSI masking despite saying otherwise */ PCI_DEV_FLAGS_HAS_MSI_MASKING = (__force pci_dev_flags_t) (1 << 12), + /* Device requires write to PCI_MSIX_ENTRY_DATA before any MSIX reads */ + PCI_DEV_FLAGS_MSIX_TOUCH_ENTRY_DATA_FIRST = (__force pci_dev_flags_t) (1 << 13), }; enum pci_irq_reroute_variant { From fbb429ddff5c8e479edcc7dde5a542c9295944e6 Mon Sep 17 00:00:00 2001 From: Jonathan Currier Date: Sun, 17 Nov 2024 17:48:43 -0600 Subject: [PATCH 0648/2924] net/niu: Niu requires MSIX ENTRY_DATA fields touch before entry reads Fix niu_try_msix() to not cause a fatal trap on sparc systems. Set PCI_DEV_FLAGS_MSIX_TOUCH_ENTRY_DATA_FIRST on the struct pci_dev to work around a bug in the hardware or firmware. For each vector entry in the msix table, niu chips will cause a fatal trap if any registers in that entry are read before that entries' ENTRY_DATA register is written to. Testing indicates writes to other registers are not sufficient to prevent the fatal trap, however the value does not appear to matter. This only needs to happen once after power up, so simply rebooting into a kernel lacking this fix will NOT cause the trap. NON-RESUMABLE ERROR: Reporting on cpu 64 NON-RESUMABLE ERROR: TPC [0x00000000005f6900] NON-RESUMABLE ERROR: RAW [4010000000000016:00000e37f93e32ff:0000000202000080:ffffffffffffffff NON-RESUMABLE ERROR: 0000000800000000:0000000000000000:0000000000000000:0000000000000000] NON-RESUMABLE ERROR: handle [0x4010000000000016] stick [0x00000e37f93e32ff] NON-RESUMABLE ERROR: type [precise nonresumable] NON-RESUMABLE ERROR: attrs [0x02000080] < ASI sp-faulted priv > NON-RESUMABLE ERROR: raddr [0xffffffffffffffff] NON-RESUMABLE ERROR: insn effective address [0x000000c50020000c] NON-RESUMABLE ERROR: size [0x8] NON-RESUMABLE ERROR: asi [0x00] CPU: 64 UID: 0 PID: 745 Comm: kworker/64:1 Not tainted 6.11.5 #63 Workqueue: events work_for_cpu_fn TSTATE: 0000000011001602 TPC: 00000000005f6900 TNPC: 00000000005f6904 Y: 00000000 Not tainted TPC: g0: 00000000000002e9 g1: 000000000000000c g2: 000000c50020000c g3: 0000000000000100 g4: ffff8000470307c0 g5: ffff800fec5be000 g6: ffff800047a08000 g7: 0000000000000000 o0: ffff800014feb000 o1: ffff800047a0b620 o2: 0000000000000011 o3: ffff800047a0b620 o4: 0000000000000080 o5: 0000000000000011 sp: ffff800047a0ad51 ret_pc: 00000000005f7128 RPC: <__pci_enable_msix_range+0x3cc/0x460> l0: 000000000000000d l1: 000000000000c01f l2: ffff800014feb0a8 l3: 0000000000000020 l4: 000000000000c000 l5: 0000000000000001 l6: 0000000020000000 l7: ffff800047a0b734 i0: ffff800014feb000 i1: ffff800047a0b730 i2: 0000000000000001 i3: 000000000000000d i4: 0000000000000000 i5: 0000000000000000 i6: ffff800047a0ae81 i7: 00000000101888b0 I7: Call Trace: [<00000000101888b0>] niu_try_msix.constprop.0+0xc0/0x130 [niu] [<000000001018f840>] niu_get_invariants+0x183c/0x207c [niu] [<00000000101902fc>] niu_pci_init_one+0x27c/0x2fc [niu] [<00000000005ef3e4>] local_pci_probe+0x28/0x74 [<0000000000469240>] work_for_cpu_fn+0x8/0x1c [<000000000046b008>] process_scheduled_works+0x144/0x210 [<000000000046b518>] worker_thread+0x13c/0x1c0 [<00000000004710e0>] kthread+0xb8/0xc8 [<00000000004060c8>] ret_from_fork+0x1c/0x2c [<0000000000000000>] 0x0 Kernel panic - not syncing: Non-resumable error. Fixes: 7d5ec3d36123 ("PCI/MSI: Mask all unused MSI-X entries") Signed-off-by: Jonathan Currier Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20241117234843.19236-3-dullfire@yahoo.com --- drivers/net/ethernet/sun/niu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 73c07f10f053a4..379b6e90121d9f 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -9064,6 +9064,8 @@ static void niu_try_msix(struct niu *np, u8 *ldg_num_map) msi_vec[i].entry = i; } + pdev->dev_flags |= PCI_DEV_FLAGS_MSIX_TOUCH_ENTRY_DATA_FIRST; + num_irqs = pci_enable_msix_range(pdev, msi_vec, 1, num_irqs); if (num_irqs < 0) { np->flags &= ~NIU_FLAGS_MSIX; From ec0c7afa70d5ccec44e736b60ed2e7c191d054cb Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Mon, 14 Apr 2025 14:27:01 +0530 Subject: [PATCH 0649/2924] drm/i915/display: Add macro for checking 3 DSC engines 3 DSC engines per pipe is currently supported only for BMG. Add a macro to check whether a platform supports 3 DSC engines per pipe. v2:Fix Typo in macro argument. (Suraj). Added fixes tag. Bspec: 50175 Fixes: be7f5fcdf4a0 ("drm/i915/dp: Enable 3 DSC engines for 12 slices") Cc: Ankit Nautiyal Cc: Suraj Kandpal Cc: # v6.14+ Signed-off-by: Ankit Nautiyal Reviewed-by: Suraj Kandpal Link: https://lore.kernel.org/r/20250414085701.2802374-1-ankit.k.nautiyal@intel.com (cherry picked from commit 6998cfce0e1db58c730d08cadc6bfd71e26e2de0) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_device.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h index 717286981687a2..7a3bb77c7af7c2 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.h +++ b/drivers/gpu/drm/i915/display/intel_display_device.h @@ -161,6 +161,7 @@ struct intel_display_platforms { #define HAS_DPT(__display) (DISPLAY_VER(__display) >= 13) #define HAS_DSB(__display) (DISPLAY_INFO(__display)->has_dsb) #define HAS_DSC(__display) (DISPLAY_RUNTIME_INFO(__display)->has_dsc) +#define HAS_DSC_3ENGINES(__display) (DISPLAY_VERx100(__display) == 1401 && HAS_DSC(__display)) #define HAS_DSC_MST(__display) (DISPLAY_VER(__display) >= 12 && HAS_DSC(__display)) #define HAS_FBC(__display) (DISPLAY_RUNTIME_INFO(__display)->fbc_mask != 0) #define HAS_FBC_DIRTY_RECT(__display) (DISPLAY_VER(__display) >= 30) From 3a47280b768748992ee34bd52c394c60b2845af3 Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Mon, 14 Apr 2025 08:12:56 +0530 Subject: [PATCH 0650/2924] drm/i915/dp: Check for HAS_DSC_3ENGINES while configuring DSC slices DSC 12 slices configuration is used for some specific cases with Ultrajoiner. This can be supported only when each of the 4 joined pipes have 3 DSC engines each. Add the missing check for 3 DSC engines support before using 3 DSC slices per pipe. Fixes: be7f5fcdf4a0 ("drm/i915/dp: Enable 3 DSC engines for 12 slices") Cc: Ankit Nautiyal Cc: Suraj Kandpal Cc: # v6.14+ Signed-off-by: Ankit Nautiyal Reviewed-by: Suraj Kandpal Link: https://lore.kernel.org/r/20250414024256.2782702-3-ankit.k.nautiyal@intel.com (cherry picked from commit da9b1c61e7f7b327dd70c5f073ba04d419a55ef8) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 9476aaa9190056..392c3653d0d738 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1050,10 +1050,11 @@ u8 intel_dp_dsc_get_slice_count(const struct intel_connector *connector, u8 test_slice_count = valid_dsc_slicecount[i] * num_joined_pipes; /* - * 3 DSC Slices per pipe need 3 DSC engines, - * which is supported only with Ultrajoiner. + * 3 DSC Slices per pipe need 3 DSC engines, which is supported only + * with Ultrajoiner only for some platforms. */ - if (valid_dsc_slicecount[i] == 3 && num_joined_pipes != 4) + if (valid_dsc_slicecount[i] == 3 && + (!HAS_DSC_3ENGINES(display) || num_joined_pipes != 4)) continue; if (test_slice_count > From e64c0ff0d5d85791fbcd126ee558100a06a24a97 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Thu, 27 Mar 2025 11:16:00 +0800 Subject: [PATCH 0651/2924] pinctrl: imx: Return NULL if no group is matched and found Currently if no group is matched and found, this function will return the last grp to the caller, this is not expected, it is supposed to return NULL in this case. Fixes: e566fc11ea76 ("pinctrl: imx: use generic pinctrl helpers for managing groups") Signed-off-by: Hui Wang Reviewed-by: Frank Li Link: https://lore.kernel.org/20250327031600.99723-1-hui.wang@canonical.com Signed-off-by: Linus Walleij --- drivers/pinctrl/freescale/pinctrl-imx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/freescale/pinctrl-imx.c b/drivers/pinctrl/freescale/pinctrl-imx.c index 842a1e6cbfc41a..18de3132854045 100644 --- a/drivers/pinctrl/freescale/pinctrl-imx.c +++ b/drivers/pinctrl/freescale/pinctrl-imx.c @@ -37,16 +37,16 @@ static inline const struct group_desc *imx_pinctrl_find_group_by_name( struct pinctrl_dev *pctldev, const char *name) { - const struct group_desc *grp = NULL; + const struct group_desc *grp; int i; for (i = 0; i < pctldev->num_groups; i++) { grp = pinctrl_generic_get_group(pctldev, i); if (grp && !strcmp(grp->grp.name, name)) - break; + return grp; } - return grp; + return NULL; } static void imx_pin_dbg_show(struct pinctrl_dev *pctldev, struct seq_file *s, From e56088a13708757da68ad035269d69b93ac8c389 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 29 Mar 2025 20:01:32 +0100 Subject: [PATCH 0652/2924] pinctrl: meson: define the pull up/down resistor value as 60 kOhm The public datasheets of the following Amlogic SoCs describe a typical resistor value for the built-in pull up/down resistor: - Meson8/8b/8m2: not documented - GXBB (S905): 60 kOhm - GXL (S905X): 60 kOhm - GXM (S912): 60 kOhm - G12B (S922X): 60 kOhm - SM1 (S905D3): 60 kOhm The public G12B and SM1 datasheets additionally state min and max values: - min value: 50 kOhm for both, pull-up and pull-down - max value for the pull-up: 70 kOhm - max value for the pull-down: 130 kOhm Use 60 kOhm in the pinctrl-meson driver as well so it's shown in the debugfs output. It may not be accurate for Meson8/8b/8m2 but in reality 60 kOhm is closer to the actual value than 1 Ohm. Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/20250329190132.855196-1-martin.blumenstingl@googlemail.com Signed-off-by: Linus Walleij --- drivers/pinctrl/meson/pinctrl-meson.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c index 253a0cc57e396d..e5a32a0532eeec 100644 --- a/drivers/pinctrl/meson/pinctrl-meson.c +++ b/drivers/pinctrl/meson/pinctrl-meson.c @@ -487,7 +487,7 @@ static int meson_pinconf_get(struct pinctrl_dev *pcdev, unsigned int pin, case PIN_CONFIG_BIAS_PULL_DOWN: case PIN_CONFIG_BIAS_PULL_UP: if (meson_pinconf_get_pull(pc, pin) == param) - arg = 1; + arg = 60000; else return -EINVAL; break; From 457d9772e8a5cdae64f66b5f7d5b0247365191ec Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Tue, 1 Apr 2025 15:50:21 +0200 Subject: [PATCH 0653/2924] pinctrl: airoha: fix wrong PHY LED mapping and PHY2 LED defines The current PHY2 LED define are wrong and actually set BITs outside the related mask. Fix it and set the correct value. While at it, also use FIELD_PREP_CONST macro to make it simple to understand what values are actually applied for the mask. Also fix wrong PHY LED mapping. The SoC Switch supports up to 4 port but the register define mapping for 5 PHY port, starting from 0. The mapping was wrongly defined starting from PHY1. Reorder the function group to start from PHY0. PHY4 is actually never supported as we don't have a GPIO pin to assign. Cc: stable@vger.kernel.org Fixes: 1c8ace2d0725 ("pinctrl: airoha: Add support for EN7581 SoC") Reviewed-by: Benjamin Larsson Signed-off-by: Christian Marangi Acked-by: Lorenzo Bianconi Link: https://lore.kernel.org/20250401135026.18018-1-ansuelsmth@gmail.com Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-airoha.c | 159 ++++++++++------------ 1 file changed, 70 insertions(+), 89 deletions(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-airoha.c b/drivers/pinctrl/mediatek/pinctrl-airoha.c index 547a798b71c8ae..5d84a778683d05 100644 --- a/drivers/pinctrl/mediatek/pinctrl-airoha.c +++ b/drivers/pinctrl/mediatek/pinctrl-airoha.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -112,39 +113,19 @@ #define REG_LAN_LED1_MAPPING 0x0280 #define LAN4_LED_MAPPING_MASK GENMASK(18, 16) -#define LAN4_PHY4_LED_MAP BIT(18) -#define LAN4_PHY2_LED_MAP BIT(17) -#define LAN4_PHY1_LED_MAP BIT(16) -#define LAN4_PHY0_LED_MAP 0 -#define LAN4_PHY3_LED_MAP GENMASK(17, 16) +#define LAN4_PHY_LED_MAP(_n) FIELD_PREP_CONST(LAN4_LED_MAPPING_MASK, (_n)) #define LAN3_LED_MAPPING_MASK GENMASK(14, 12) -#define LAN3_PHY4_LED_MAP BIT(14) -#define LAN3_PHY2_LED_MAP BIT(13) -#define LAN3_PHY1_LED_MAP BIT(12) -#define LAN3_PHY0_LED_MAP 0 -#define LAN3_PHY3_LED_MAP GENMASK(13, 12) +#define LAN3_PHY_LED_MAP(_n) FIELD_PREP_CONST(LAN3_LED_MAPPING_MASK, (_n)) #define LAN2_LED_MAPPING_MASK GENMASK(10, 8) -#define LAN2_PHY4_LED_MAP BIT(12) -#define LAN2_PHY2_LED_MAP BIT(11) -#define LAN2_PHY1_LED_MAP BIT(10) -#define LAN2_PHY0_LED_MAP 0 -#define LAN2_PHY3_LED_MAP GENMASK(11, 10) +#define LAN2_PHY_LED_MAP(_n) FIELD_PREP_CONST(LAN2_LED_MAPPING_MASK, (_n)) #define LAN1_LED_MAPPING_MASK GENMASK(6, 4) -#define LAN1_PHY4_LED_MAP BIT(6) -#define LAN1_PHY2_LED_MAP BIT(5) -#define LAN1_PHY1_LED_MAP BIT(4) -#define LAN1_PHY0_LED_MAP 0 -#define LAN1_PHY3_LED_MAP GENMASK(5, 4) +#define LAN1_PHY_LED_MAP(_n) FIELD_PREP_CONST(LAN1_LED_MAPPING_MASK, (_n)) #define LAN0_LED_MAPPING_MASK GENMASK(2, 0) -#define LAN0_PHY4_LED_MAP BIT(3) -#define LAN0_PHY2_LED_MAP BIT(2) -#define LAN0_PHY1_LED_MAP BIT(1) -#define LAN0_PHY0_LED_MAP 0 -#define LAN0_PHY3_LED_MAP GENMASK(2, 1) +#define LAN0_PHY_LED_MAP(_n) FIELD_PREP_CONST(LAN0_LED_MAPPING_MASK, (_n)) /* CONF */ #define REG_I2C_SDA_E2 0x001c @@ -1476,8 +1457,8 @@ static const struct airoha_pinctrl_func_group phy1_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY1_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1491,8 +1472,8 @@ static const struct airoha_pinctrl_func_group phy1_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY1_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1506,8 +1487,8 @@ static const struct airoha_pinctrl_func_group phy1_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY1_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1521,8 +1502,8 @@ static const struct airoha_pinctrl_func_group phy1_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY1_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(0) }, .regmap_size = 2, }, @@ -1540,8 +1521,8 @@ static const struct airoha_pinctrl_func_group phy2_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY2_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1555,8 +1536,8 @@ static const struct airoha_pinctrl_func_group phy2_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY2_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1570,8 +1551,8 @@ static const struct airoha_pinctrl_func_group phy2_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY2_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1585,8 +1566,8 @@ static const struct airoha_pinctrl_func_group phy2_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY2_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(1) }, .regmap_size = 2, }, @@ -1604,8 +1585,8 @@ static const struct airoha_pinctrl_func_group phy3_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY3_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1619,8 +1600,8 @@ static const struct airoha_pinctrl_func_group phy3_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY3_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1634,8 +1615,8 @@ static const struct airoha_pinctrl_func_group phy3_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY3_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1649,8 +1630,8 @@ static const struct airoha_pinctrl_func_group phy3_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY3_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(2) }, .regmap_size = 2, }, @@ -1668,8 +1649,8 @@ static const struct airoha_pinctrl_func_group phy4_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY4_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1683,8 +1664,8 @@ static const struct airoha_pinctrl_func_group phy4_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY4_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1698,8 +1679,8 @@ static const struct airoha_pinctrl_func_group phy4_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY4_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1713,8 +1694,8 @@ static const struct airoha_pinctrl_func_group phy4_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY4_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(3) }, .regmap_size = 2, }, @@ -1732,8 +1713,8 @@ static const struct airoha_pinctrl_func_group phy1_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY1_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1747,8 +1728,8 @@ static const struct airoha_pinctrl_func_group phy1_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY1_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1762,8 +1743,8 @@ static const struct airoha_pinctrl_func_group phy1_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY1_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1777,8 +1758,8 @@ static const struct airoha_pinctrl_func_group phy1_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY1_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(0) }, .regmap_size = 2, }, @@ -1796,8 +1777,8 @@ static const struct airoha_pinctrl_func_group phy2_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY2_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1811,8 +1792,8 @@ static const struct airoha_pinctrl_func_group phy2_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY2_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1826,8 +1807,8 @@ static const struct airoha_pinctrl_func_group phy2_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY2_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1841,8 +1822,8 @@ static const struct airoha_pinctrl_func_group phy2_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY2_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(1) }, .regmap_size = 2, }, @@ -1860,8 +1841,8 @@ static const struct airoha_pinctrl_func_group phy3_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY3_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1875,8 +1856,8 @@ static const struct airoha_pinctrl_func_group phy3_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY3_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1890,8 +1871,8 @@ static const struct airoha_pinctrl_func_group phy3_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY3_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1905,8 +1886,8 @@ static const struct airoha_pinctrl_func_group phy3_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY3_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(2) }, .regmap_size = 2, }, @@ -1924,8 +1905,8 @@ static const struct airoha_pinctrl_func_group phy4_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY4_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1939,8 +1920,8 @@ static const struct airoha_pinctrl_func_group phy4_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY4_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1954,8 +1935,8 @@ static const struct airoha_pinctrl_func_group phy4_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY4_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1969,8 +1950,8 @@ static const struct airoha_pinctrl_func_group phy4_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY4_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(3) }, .regmap_size = 2, }, From c443279a87d54bf3027925cb3eb2baf51c3b26c9 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 15 Apr 2025 10:22:04 +0200 Subject: [PATCH 0654/2924] Kconfig: switch CONFIG_SYSFS_SYCALL default to n This odd system call will be removed in the future. Let's decouple it from CONFIG_EXPERT and switch the default to n as a first step. Link: https://lore.kernel.org/20250415-dezimieren-wertpapier-9fd18a211a41@brauner Signed-off-by: Christian Brauner --- init/Kconfig | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index dd2ea3b9a79920..63f5974b9fa6ea 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1555,6 +1555,16 @@ config SYSCTL_ARCH_UNALIGN_ALLOW the unaligned access emulation. see arch/parisc/kernel/unaligned.c for reference +config SYSFS_SYSCALL + bool "Sysfs syscall support" + default n + help + sys_sysfs is an obsolete system call no longer supported in libc. + Note that disabling this option is more secure but might break + compatibility with some systems. + + If unsure say N here. + config HAVE_PCSPKR_PLATFORM bool @@ -1599,16 +1609,6 @@ config SGETMASK_SYSCALL If unsure, leave the default option here. -config SYSFS_SYSCALL - bool "Sysfs syscall support" if EXPERT - default y - help - sys_sysfs is an obsolete system call no longer supported in libc. - Note that disabling this option is more secure but might break - compatibility with some systems. - - If unsure say Y here. - config FHANDLE bool "open by fhandle syscalls" if EXPERT select EXPORTFS From ddee68c499f76ae47c011549df5be53db0057402 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 15 Apr 2025 09:45:38 +0200 Subject: [PATCH 0655/2924] hfs{plus}: add deprecation warning Both the hfs and hfsplus filesystem have been orphaned since at least 2014, i.e., over 10 years. It's time to remove them from the kernel as they're exhibiting more and more issues and no one is stepping up to fixing them. Signed-off-by: Christian Brauner --- fs/hfs/super.c | 2 ++ fs/hfsplus/super.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/fs/hfs/super.c b/fs/hfs/super.c index fe09c2093a93d9..4413cd8feb9e3a 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -404,6 +404,8 @@ static int hfs_init_fs_context(struct fs_context *fc) { struct hfs_sb_info *hsb; + pr_warn("The hfs filesystem is deprecated and scheduled to be removed from the kernel in 2025\n"); + hsb = kzalloc(sizeof(struct hfs_sb_info), GFP_KERNEL); if (!hsb) return -ENOMEM; diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 948b8aaee33e3a..58cff4b2a3b4d3 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -656,6 +656,8 @@ static int hfsplus_init_fs_context(struct fs_context *fc) { struct hfsplus_sb_info *sbi; + pr_warn("The hfsplus filesystem is deprecated and scheduled to be removed from the kernel in 2025\n"); + sbi = kzalloc(sizeof(struct hfsplus_sb_info), GFP_KERNEL); if (!sbi) return -ENOMEM; From c86b300b1ea35959a6e2a63a6497226a6ea90b67 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 14 Apr 2025 22:13:33 +0200 Subject: [PATCH 0656/2924] fs: add kern_path_locked_negative() The audit code relies on the fact that kern_path_locked() returned a path even for a negative dentry. If it doesn't find a valid dentry it immediately calls: audit_find_parent(d_backing_inode(parent_path.dentry)); which assumes that parent_path.dentry is still valid. But it isn't since kern_path_locked() has been changed to path_put() also for a negative dentry. Fix this by adding a helper that implements the required audit semantics and allows us to fix the immediate bleeding. We can find a unified solution for this afterwards. Link: https://lore.kernel.org/20250414-rennt-wimmeln-f186c3a780f1@brauner Fixes: 1c3cb50b58c3 ("VFS: change kern_path_locked() and user_path_locked_at() to never return negative dentry") Reported-and-tested-by: Vlastimil Babka Signed-off-by: Christian Brauner --- fs/namei.c | 65 ++++++++++++++++++++++++++++++++----------- include/linux/namei.h | 1 + kernel/audit_watch.c | 16 +++++++---- 3 files changed, 60 insertions(+), 22 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 8510ff53f12e58..267ac9c22dd27f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1665,27 +1665,20 @@ static struct dentry *lookup_dcache(const struct qstr *name, return dentry; } -/* - * Parent directory has inode locked exclusive. This is one - * and only case when ->lookup() gets called on non in-lookup - * dentries - as the matter of fact, this only gets called - * when directory is guaranteed to have no in-lookup children - * at all. - * Will return -ENOENT if name isn't found and LOOKUP_CREATE wasn't passed. - * Will return -EEXIST if name is found and LOOKUP_EXCL was passed. - */ -struct dentry *lookup_one_qstr_excl(const struct qstr *name, - struct dentry *base, - unsigned int flags) +static struct dentry *lookup_one_qstr_excl_raw(const struct qstr *name, + struct dentry *base, + unsigned int flags) { - struct dentry *dentry = lookup_dcache(name, base, flags); + struct dentry *dentry; struct dentry *old; - struct inode *dir = base->d_inode; + struct inode *dir; + dentry = lookup_dcache(name, base, flags); if (dentry) - goto found; + return dentry; /* Don't create child dentry for a dead directory. */ + dir = base->d_inode; if (unlikely(IS_DEADDIR(dir))) return ERR_PTR(-ENOENT); @@ -1698,7 +1691,24 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name, dput(dentry); dentry = old; } -found: + return dentry; +} + +/* + * Parent directory has inode locked exclusive. This is one + * and only case when ->lookup() gets called on non in-lookup + * dentries - as the matter of fact, this only gets called + * when directory is guaranteed to have no in-lookup children + * at all. + * Will return -ENOENT if name isn't found and LOOKUP_CREATE wasn't passed. + * Will return -EEXIST if name is found and LOOKUP_EXCL was passed. + */ +struct dentry *lookup_one_qstr_excl(const struct qstr *name, + struct dentry *base, unsigned int flags) +{ + struct dentry *dentry; + + dentry = lookup_one_qstr_excl_raw(name, base, flags); if (IS_ERR(dentry)) return dentry; if (d_is_negative(dentry) && !(flags & LOOKUP_CREATE)) { @@ -2762,6 +2772,29 @@ static struct dentry *__kern_path_locked(int dfd, struct filename *name, struct return d; } +struct dentry *kern_path_locked_negative(const char *name, struct path *path) +{ + struct filename *filename __free(putname) = getname_kernel(name); + struct dentry *d; + struct qstr last; + int type, error; + + error = filename_parentat(AT_FDCWD, filename, 0, path, &last, &type); + if (error) + return ERR_PTR(error); + if (unlikely(type != LAST_NORM)) { + path_put(path); + return ERR_PTR(-EINVAL); + } + inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT); + d = lookup_one_qstr_excl_raw(&last, path->dentry, 0); + if (IS_ERR(d)) { + inode_unlock(path->dentry->d_inode); + path_put(path); + } + return d; +} + struct dentry *kern_path_locked(const char *name, struct path *path) { struct filename *filename = getname_kernel(name); diff --git a/include/linux/namei.h b/include/linux/namei.h index e3042176cdf489..bbaf55fb3101f2 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -62,6 +62,7 @@ extern struct dentry *kern_path_create(int, const char *, struct path *, unsigne extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int); extern void done_path_create(struct path *, struct dentry *); extern struct dentry *kern_path_locked(const char *, struct path *); +extern struct dentry *kern_path_locked_negative(const char *, struct path *); extern struct dentry *user_path_locked_at(int , const char __user *, struct path *); int vfs_path_parent_lookup(struct filename *filename, unsigned int flags, struct path *parent, struct qstr *last, int *type, diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 367eaf2c78b7ed..0ebbbe37a60f02 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -347,12 +347,17 @@ static void audit_remove_parent_watches(struct audit_parent *parent) /* Get path information necessary for adding watches. */ static int audit_get_nd(struct audit_watch *watch, struct path *parent) { - struct dentry *d = kern_path_locked(watch->path, parent); + struct dentry *d; + + d = kern_path_locked_negative(watch->path, parent); if (IS_ERR(d)) return PTR_ERR(d); - /* update watch filter fields */ - watch->dev = d->d_sb->s_dev; - watch->ino = d_backing_inode(d)->i_ino; + + if (d_is_positive(d)) { + /* update watch filter fields */ + watch->dev = d->d_sb->s_dev; + watch->ino = d_backing_inode(d)->i_ino; + } inode_unlock(d_backing_inode(parent->dentry)); dput(d); @@ -418,11 +423,10 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list) /* caller expects mutex locked */ mutex_lock(&audit_filter_mutex); - if (ret && ret != -ENOENT) { + if (ret) { audit_put_watch(watch); return ret; } - ret = 0; /* either find an old parent or attach a new one */ parent = audit_find_parent(d_backing_inode(parent_path.dentry)); From 8b1879491472c145c58c3cbbaf0e05ea93ee5ddf Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 18 Mar 2025 11:21:41 +0100 Subject: [PATCH 0657/2924] can: fix missing decrement of j1939_proto.inuse_idx Like other protocols on top of AF_CAN family, also j1939_proto.inuse_idx needs to be decremented on socket dismantle. Fixes: 6bffe88452db ("can: add protocol counter for AF_CAN sockets") Reported-by: Oliver Hartkopp Closes: https://lore.kernel.org/linux-can/7e35b13f-bbc4-491e-9081-fb939e1b8df0@hartkopp.net/ Signed-off-by: Davide Caratti Acked-by: Oleksij Rempel Link: https://patch.msgid.link/09ce71f281b9e27d1e3d1104430bf3fceb8c7321.1742292636.git.dcaratti@redhat.com Signed-off-by: Marc Kleine-Budde --- net/can/j1939/socket.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 17226b2341d03d..6fefe7a6876116 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -655,6 +655,7 @@ static int j1939_sk_release(struct socket *sock) sock->sk = NULL; release_sock(sk); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); sock_put(sk); return 0; From 6315d93541f8a5f77c5ef5c4f25233e66d189603 Mon Sep 17 00:00:00 2001 From: Weizhao Ouyang Date: Mon, 24 Mar 2025 19:44:16 +0800 Subject: [PATCH 0658/2924] can: rockchip_canfd: fix broken quirks checks First get the devtype_data then check quirks. Fixes: bbdffb341498 ("can: rockchip_canfd: add quirk for broken CAN-FD support") Signed-off-by: Weizhao Ouyang Reviewed-by: Vincent Mailhol Link: https://patch.msgid.link/20250324114416.10160-1-o451686892@gmail.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rockchip/rockchip_canfd-core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/rockchip/rockchip_canfd-core.c b/drivers/net/can/rockchip/rockchip_canfd-core.c index 46201c126703ce..7107a37da36c7f 100644 --- a/drivers/net/can/rockchip/rockchip_canfd-core.c +++ b/drivers/net/can/rockchip/rockchip_canfd-core.c @@ -902,15 +902,16 @@ static int rkcanfd_probe(struct platform_device *pdev) priv->can.data_bittiming_const = &rkcanfd_data_bittiming_const; priv->can.ctrlmode_supported = CAN_CTRLMODE_LOOPBACK | CAN_CTRLMODE_BERR_REPORTING; - if (!(priv->devtype_data.quirks & RKCANFD_QUIRK_CANFD_BROKEN)) - priv->can.ctrlmode_supported |= CAN_CTRLMODE_FD; priv->can.do_set_mode = rkcanfd_set_mode; priv->can.do_get_berr_counter = rkcanfd_get_berr_counter; priv->ndev = ndev; match = device_get_match_data(&pdev->dev); - if (match) + if (match) { priv->devtype_data = *(struct rkcanfd_devtype_data *)match; + if (!(priv->devtype_data.quirks & RKCANFD_QUIRK_CANFD_BROKEN)) + priv->can.ctrlmode_supported |= CAN_CTRLMODE_FD; + } err = can_rx_offload_add_manual(ndev, &priv->offload, RKCANFD_NAPI_WEIGHT); From 8e553520596bbd5ce832e26e9d721e6a0c797b8b Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 31 Mar 2025 13:56:08 +0100 Subject: [PATCH 0659/2924] intel_th: avoid using deprecated page->mapping, index fields The struct page->mapping, index fields are deprecated and soon to be only available as part of a folio. It is likely the intel_th code which sets page->mapping, index is was implemented out of concern that some aspect of the page fault logic may encounter unexpected problems should they not. However, the appropriate interface for inserting kernel-allocated memory is vm_insert_page() in a VM_MIXEDMAP. By using the helper function vmf_insert_mixed() we can do this with minimal churn in the existing fault handler. By doing so, we bypass the remainder of the faulting logic. The pages are still pinned so there is no possibility of anything unexpected being done with the pages once established. It would also be reasonable to pre-map everything on fault, however to minimise churn we retain the fault handler. We also eliminate all code which clears page->mapping on teardown as this has now become unnecessary. The MSU code relies on faulting to function correctly, so is by definition dependent on CONFIG_MMU. We avoid spurious reports about compilation failure for unsupported platforms by making this requirement explicit in Kconfig as part of this change too. Signed-off-by: Lorenzo Stoakes Acked-by: Alexander Shishkin Link: https://lore.kernel.org/r/20250331125608.60300-1-lorenzo.stoakes@oracle.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/Kconfig | 1 + drivers/hwtracing/intel_th/msu.c | 31 +++++++----------------------- 2 files changed, 8 insertions(+), 24 deletions(-) diff --git a/drivers/hwtracing/intel_th/Kconfig b/drivers/hwtracing/intel_th/Kconfig index 4b6359326ede99..4f7d2b6d79e294 100644 --- a/drivers/hwtracing/intel_th/Kconfig +++ b/drivers/hwtracing/intel_th/Kconfig @@ -60,6 +60,7 @@ config INTEL_TH_STH config INTEL_TH_MSU tristate "Intel(R) Trace Hub Memory Storage Unit" + depends on MMU help Memory Storage Unit (MSU) trace output device enables storing STP traces to system memory. It supports single diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index bf99d79a419204..7163950eb3719c 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -19,6 +19,7 @@ #include #include #include +#include #ifdef CONFIG_X86 #include @@ -976,7 +977,6 @@ static void msc_buffer_contig_free(struct msc *msc) for (off = 0; off < msc->nr_pages << PAGE_SHIFT; off += PAGE_SIZE) { struct page *page = virt_to_page(msc->base + off); - page->mapping = NULL; __free_page(page); } @@ -1158,9 +1158,6 @@ static void __msc_buffer_win_free(struct msc *msc, struct msc_window *win) int i; for_each_sg(win->sgt->sgl, sg, win->nr_segs, i) { - struct page *page = msc_sg_page(sg); - - page->mapping = NULL; dma_free_coherent(msc_dev(win->msc)->parent->parent, PAGE_SIZE, sg_virt(sg), sg_dma_address(sg)); } @@ -1601,22 +1598,10 @@ static void msc_mmap_close(struct vm_area_struct *vma) { struct msc_iter *iter = vma->vm_file->private_data; struct msc *msc = iter->msc; - unsigned long pg; if (!atomic_dec_and_mutex_lock(&msc->mmap_count, &msc->buf_mutex)) return; - /* drop page _refcounts */ - for (pg = 0; pg < msc->nr_pages; pg++) { - struct page *page = msc_buffer_get_page(msc, pg); - - if (WARN_ON_ONCE(!page)) - continue; - - if (page->mapping) - page->mapping = NULL; - } - /* last mapping -- drop user_count */ atomic_dec(&msc->user_count); mutex_unlock(&msc->buf_mutex); @@ -1626,16 +1611,14 @@ static vm_fault_t msc_mmap_fault(struct vm_fault *vmf) { struct msc_iter *iter = vmf->vma->vm_file->private_data; struct msc *msc = iter->msc; + struct page *page; - vmf->page = msc_buffer_get_page(msc, vmf->pgoff); - if (!vmf->page) + page = msc_buffer_get_page(msc, vmf->pgoff); + if (!page) return VM_FAULT_SIGBUS; - get_page(vmf->page); - vmf->page->mapping = vmf->vma->vm_file->f_mapping; - vmf->page->index = vmf->pgoff; - - return 0; + get_page(page); + return vmf_insert_mixed(vmf->vma, vmf->address, page_to_pfn_t(page)); } static const struct vm_operations_struct msc_mmap_ops = { @@ -1676,7 +1659,7 @@ static int intel_th_msc_mmap(struct file *file, struct vm_area_struct *vma) atomic_dec(&msc->user_count); vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vm_flags_set(vma, VM_DONTEXPAND | VM_DONTCOPY); + vm_flags_set(vma, VM_DONTEXPAND | VM_DONTCOPY | VM_MIXEDMAP); vma->vm_ops = &msc_mmap_ops; return ret; } From 332ec18d57de2f77f43a988cbf1cb7693409434a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 14 Apr 2025 19:40:48 +0200 Subject: [PATCH 0660/2924] MAINTAINERS: update the location of the driver-core git tree The driver core git tree has moved, so properly document it. Cc: Rafael J. Wysocki Cc: Danilo Krummrich Cc: Tejun Heo Cc: Dave Ertman Cc: Ira Weiny Link: https://lore.kernel.org/r/2025041447-showbiz-other-7130@gregkh Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 778d7d168be34c..01f86f5c5f81f2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3868,7 +3868,7 @@ M: Greg Kroah-Hartman R: Dave Ertman R: Ira Weiny S: Supported -T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/driver-core/driver-core.git F: Documentation/driver-api/auxiliary_bus.rst F: drivers/base/auxiliary.c F: include/linux/auxiliary_bus.h @@ -7225,7 +7225,7 @@ M: Greg Kroah-Hartman M: "Rafael J. Wysocki" M: Danilo Krummrich S: Supported -T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/driver-core/driver-core.git F: Documentation/core-api/kobject.rst F: drivers/base/ F: fs/debugfs/ @@ -13106,7 +13106,7 @@ KERNFS M: Greg Kroah-Hartman M: Tejun Heo S: Supported -T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/driver-core/driver-core.git F: fs/kernfs/ F: include/linux/kernfs.h From 37ffdbd695c02189dbf23d6e7d2385e0299587ca Mon Sep 17 00:00:00 2001 From: Miao Li Date: Mon, 14 Apr 2025 14:29:35 +0800 Subject: [PATCH 0661/2924] usb: quirks: Add delay init quirk for SanDisk 3.2Gen1 Flash Drive The SanDisk 3.2Gen1 Flash Drive, which VID:PID is in 0781:55a3, just like Silicon Motion Flash Drive: https://lore.kernel.org/r/20250401023027.44894-1-limiao870622@163.com also needs the DELAY_INIT quirk, or it will randomly work incorrectly (e.g.: lsusb and can't list this device info) when connecting Huawei hisi platforms and doing thousand of reboot test circles. Cc: stable Signed-off-by: Miao Li Signed-off-by: Lei Huang Link: https://lore.kernel.org/r/20250414062935.159024-1-limiao870622@163.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 87b48c17616051..36d3df7d040c63 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -369,6 +369,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0781, 0x5583), .driver_info = USB_QUIRK_NO_LPM }, { USB_DEVICE(0x0781, 0x5591), .driver_info = USB_QUIRK_NO_LPM }, + /* SanDisk Corp. SanDisk 3.2Gen1 */ + { USB_DEVICE(0x0781, 0x55a3), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Realforce 87U Keyboard */ { USB_DEVICE(0x0853, 0x011b), .driver_info = USB_QUIRK_NO_LPM }, From 429a98abfc01d3d4378b7a00969437dc3e8f647c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 15 Apr 2025 13:45:08 +0300 Subject: [PATCH 0662/2924] usb: typec: class: Unlocked on error in typec_register_partner() We recently added some locking to this function but this error path was accidentally missed. Unlock before returning. Fixes: ec27386de23a ("usb: typec: class: Fix NULL pointer access") Cc: stable Signed-off-by: Dan Carpenter Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/Z_44tOtmml89wQcM@stanley.mountain Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/class.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index 3df3e373691620..67a533e3515064 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -1056,6 +1056,7 @@ struct typec_partner *typec_register_partner(struct typec_port *port, ret = device_register(&partner->dev); if (ret) { dev_err(&port->dev, "failed to register partner (%d)\n", ret); + mutex_unlock(&port->partner_link_lock); put_device(&partner->dev); return ERR_PTR(ret); } From 63ec4baf725cbde506f0a9640ae6751622b81b0a Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 15 Apr 2025 13:29:27 +0100 Subject: [PATCH 0663/2924] ASoC: Add Cirrus and Wolfson headers to ASoC section of MAINTAINERS Specifically list various Cirrus Logic and Wolfson Micro codec header files under include/sound/ within the ASoC section of MAINTAINERS. Note that not all the include/sound/cs* files are part of ASoC, so more-specific patterns are needed. These files are all part of ASoC codec drivers, and are owned by specific Cirrus Logic and Wolfson Micro sections of MAINTAINERS. But the overall include/sound/* maintainership is only Takashi Iwai and Jaroslav Kysela. So by default get_maintainer.pl would only show Takashi and Jaroslav as maintainers for any patch that changes these files without changing any code under sound/soc. There is a separate MAINTAINERS section for ASoC, so the headers must be added there to make the ASoC maintainers show up in get_maintainer.pl. Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20250415122927.512200-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index ad08ca0f423b85..4878b77f71ee29 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22656,9 +22656,15 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git F: Documentation/devicetree/bindings/sound/ F: Documentation/sound/soc/ F: include/dt-bindings/sound/ +F: include/sound/cs-amp-lib.h +F: include/sound/cs35l* +F: include/sound/cs4271.h +F: include/sound/cs42l* +F: include/sound/madera-pdata.h F: include/sound/soc* F: include/sound/sof.h F: include/sound/sof/ +F: include/sound/wm*.h F: include/trace/events/sof*.h F: include/uapi/sound/asoc.h F: sound/soc/ From e1ca3ff28ab1e2c1e70713ef3fa7943c725742c3 Mon Sep 17 00:00:00 2001 From: Ryo Takakura Date: Sat, 12 Apr 2025 09:18:47 +0900 Subject: [PATCH 0664/2924] serial: sifive: lock port in startup()/shutdown() callbacks startup()/shutdown() callbacks access SIFIVE_SERIAL_IE_OFFS. The register is also accessed from write() callback. If console were printing and startup()/shutdown() callback gets called, its access to the register could be overwritten. Add port->lock to startup()/shutdown() callbacks to make sure their access to SIFIVE_SERIAL_IE_OFFS is synchronized against write() callback. Fixes: 45c054d0815b ("tty: serial: add driver for the SiFive UART") Signed-off-by: Ryo Takakura Reviewed-by: Petr Mladek Cc: stable@vger.kernel.org Reviewed-by: John Ogness Rule: add Link: https://lore.kernel.org/stable/20250330003522.386632-1-ryotkkr98%40gmail.com Link: https://lore.kernel.org/r/20250412001847.183221-1-ryotkkr98@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sifive.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/tty/serial/sifive.c b/drivers/tty/serial/sifive.c index 5904a2d4cefa71..054a8e630aceac 100644 --- a/drivers/tty/serial/sifive.c +++ b/drivers/tty/serial/sifive.c @@ -563,8 +563,11 @@ static void sifive_serial_break_ctl(struct uart_port *port, int break_state) static int sifive_serial_startup(struct uart_port *port) { struct sifive_serial_port *ssp = port_to_sifive_serial_port(port); + unsigned long flags; + uart_port_lock_irqsave(&ssp->port, &flags); __ssp_enable_rxwm(ssp); + uart_port_unlock_irqrestore(&ssp->port, flags); return 0; } @@ -572,9 +575,12 @@ static int sifive_serial_startup(struct uart_port *port) static void sifive_serial_shutdown(struct uart_port *port) { struct sifive_serial_port *ssp = port_to_sifive_serial_port(port); + unsigned long flags; + uart_port_lock_irqsave(&ssp->port, &flags); __ssp_disable_rxwm(ssp); __ssp_disable_txwm(ssp); + uart_port_unlock_irqrestore(&ssp->port, flags); } /** From 170d1a3738908eef6a0dbf378ea77fb4ae8e294d Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Tue, 25 Mar 2025 18:49:00 +0000 Subject: [PATCH 0665/2924] binder: fix offset calculation in debug log The vma start address should be substracted from the buffer's user data address and not the other way around. Cc: Tiffany Y. Yang Cc: stable Fixes: 162c79731448 ("binder: avoid user addresses in debug logs") Signed-off-by: Carlos Llamas Reviewed-by: Tiffany Y. Yang Link: https://lore.kernel.org/r/20250325184902.587138-1-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 76052006bd8714..5fc2c8ee61b19b 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -6373,7 +6373,7 @@ static void print_binder_transaction_ilocked(struct seq_file *m, seq_printf(m, " node %d", buffer->target_node->debug_id); seq_printf(m, " size %zd:%zd offset %lx\n", buffer->data_size, buffer->offsets_size, - proc->alloc.vm_start - buffer->user_data); + buffer->user_data - proc->alloc.vm_start); } static void print_binder_work_ilocked(struct seq_file *m, From 44d9b3f584c59a606b521e7274e658d5b866c699 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Tue, 15 Apr 2025 13:39:01 +0100 Subject: [PATCH 0666/2924] comedi: jr3_pci: Fix synchronous deletion of timer When `jr3_pci_detach()` is called during device removal, it calls `timer_delete_sync()` to stop the timer, but the timer expiry function always reschedules the timer, so the synchronization is ineffective. Call `timer_shutdown_sync()` instead. It does not matter that the timer expiry function pointer is cleared, because the device is being removed. Fixes: 07b509e6584a5 ("Staging: comedi: add jr3_pci driver") Cc: stable Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20250415123901.13483-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/comedi/drivers/jr3_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/comedi/drivers/jr3_pci.c b/drivers/comedi/drivers/jr3_pci.c index cdc842b32babb4..75dce1ff24193b 100644 --- a/drivers/comedi/drivers/jr3_pci.c +++ b/drivers/comedi/drivers/jr3_pci.c @@ -758,7 +758,7 @@ static void jr3_pci_detach(struct comedi_device *dev) struct jr3_pci_dev_private *devpriv = dev->private; if (devpriv) - timer_delete_sync(&devpriv->timer); + timer_shutdown_sync(&devpriv->timer); comedi_pci_detach(dev); } From 25744f849524e806a13ade17c4fb83f6888fe954 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 15 Apr 2025 14:09:45 +0100 Subject: [PATCH 0667/2924] io_uring/zcrx: return ifq id to the user IORING_OP_RECV_ZC requests take a zcrx object id via sqe::zcrx_ifq_idx, which binds it to the corresponding if / queue. However, we don't return that id back to the user. It's fine as currently there can be only one zcrx and the user assumes that its id should be 0, but as we'll need multiple zcrx objects in the future let's explicitly pass it back on registration. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/8714667d370651962f7d1a169032e5f02682a73e.1744722517.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 4 +++- io_uring/zcrx.c | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index ed2beb4def3f6a..8f1fc12bac4620 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -1010,7 +1010,9 @@ struct io_uring_zcrx_ifq_reg { __u64 region_ptr; /* struct io_uring_region_desc * */ struct io_uring_zcrx_offsets offsets; - __u64 __resv[4]; + __u32 zcrx_id; + __u32 __resv2; + __u64 __resv[3]; }; #ifdef __cplusplus diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 0f46e0404c0454..d0eccf277a20bb 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -354,7 +354,8 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, return -EFAULT; if (copy_from_user(&rd, u64_to_user_ptr(reg.region_ptr), sizeof(rd))) return -EFAULT; - if (memchr_inv(®.__resv, 0, sizeof(reg.__resv))) + if (memchr_inv(®.__resv, 0, sizeof(reg.__resv)) || + reg.__resv2 || reg.zcrx_id) return -EINVAL; if (reg.if_rxq == -1 || !reg.rq_entries || reg.flags) return -EINVAL; From 70e4f9bfc13c9abcc97eb9f2feee51cc925524c8 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 15 Apr 2025 14:10:16 +0100 Subject: [PATCH 0668/2924] io_uring/zcrx: add pp to ifq conversion helper It'll likely change how page pools store memory providers, so in preparation for that, keep accesses in one place in io_uring by introducing a helper. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/3522eb8fa9b4e21bcf32e7e9ae656c616b282210.1744722526.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/zcrx.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index d0eccf277a20bb..5defbe8f95f9f8 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -26,6 +26,11 @@ #include "zcrx.h" #include "rsrc.h" +static inline struct io_zcrx_ifq *io_pp_to_ifq(struct page_pool *pp) +{ + return pp->mp_priv; +} + #define IO_DMA_ATTR (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) static void __io_zcrx_unmap_area(struct io_zcrx_ifq *ifq, @@ -586,7 +591,7 @@ static void io_zcrx_refill_slow(struct page_pool *pp, struct io_zcrx_ifq *ifq) static netmem_ref io_pp_zc_alloc_netmems(struct page_pool *pp, gfp_t gfp) { - struct io_zcrx_ifq *ifq = pp->mp_priv; + struct io_zcrx_ifq *ifq = io_pp_to_ifq(pp); /* pp should already be ensuring that */ if (unlikely(pp->alloc.count)) @@ -618,7 +623,7 @@ static bool io_pp_zc_release_netmem(struct page_pool *pp, netmem_ref netmem) static int io_pp_zc_init(struct page_pool *pp) { - struct io_zcrx_ifq *ifq = pp->mp_priv; + struct io_zcrx_ifq *ifq = io_pp_to_ifq(pp); if (WARN_ON_ONCE(!ifq)) return -EINVAL; @@ -637,7 +642,7 @@ static int io_pp_zc_init(struct page_pool *pp) static void io_pp_zc_destroy(struct page_pool *pp) { - struct io_zcrx_ifq *ifq = pp->mp_priv; + struct io_zcrx_ifq *ifq = io_pp_to_ifq(pp); struct io_zcrx_area *area = ifq->area; if (WARN_ON_ONCE(area->free_count != area->nia.num_niovs)) @@ -792,7 +797,7 @@ static int io_zcrx_recv_frag(struct io_kiocb *req, struct io_zcrx_ifq *ifq, niov = netmem_to_net_iov(frag->netmem); if (niov->pp->mp_ops != &io_uring_pp_zc_ops || - niov->pp->mp_priv != ifq) + io_pp_to_ifq(niov->pp) != ifq) return -EFAULT; if (!io_zcrx_queue_cqe(req, niov, ifq, off + skb_frag_off(frag), len)) From 5ff79cabb23a2f14d2ed29e9596aec908905a0e6 Mon Sep 17 00:00:00 2001 From: Kurt Borja Date: Fri, 11 Apr 2025 11:14:35 -0300 Subject: [PATCH 0669/2924] platform/x86: alienware-wmi-wmax: Add G-Mode support to Alienware m16 R1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some users report the Alienware m16 R1 models, support G-Mode. This was manually verified by inspecting their ACPI tables. Cc: stable@vger.kernel.org Signed-off-by: Kurt Borja Link: https://lore.kernel.org/r/20250411-awcc-support-v1-1-09a130ec4560@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/dell/alienware-wmi-wmax.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/dell/alienware-wmi-wmax.c b/drivers/platform/x86/dell/alienware-wmi-wmax.c index 3d3014b5adf046..5b6a0c866be220 100644 --- a/drivers/platform/x86/dell/alienware-wmi-wmax.c +++ b/drivers/platform/x86/dell/alienware-wmi-wmax.c @@ -67,7 +67,7 @@ static const struct dmi_system_id awcc_dmi_table[] __initconst = { DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m16 R1 AMD"), }, - .driver_data = &generic_quirks, + .driver_data = &g_series_quirks, }, { .ident = "Alienware m17 R5", From 202a861205905629c5f10ce0a8358623485e1ae9 Mon Sep 17 00:00:00 2001 From: Kurt Borja Date: Fri, 11 Apr 2025 11:14:36 -0300 Subject: [PATCH 0670/2924] platform/x86: alienware-wmi-wmax: Extend support to more laptops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend thermal control support to: - Alienware Area-51m R2 - Alienware m16 R1 - Alienware m16 R2 - Dell G16 7630 - Dell G5 5505 SE Cc: stable@vger.kernel.org Signed-off-by: Kurt Borja Link: https://lore.kernel.org/r/20250411-awcc-support-v1-2-09a130ec4560@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- .../platform/x86/dell/alienware-wmi-wmax.c | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/drivers/platform/x86/dell/alienware-wmi-wmax.c b/drivers/platform/x86/dell/alienware-wmi-wmax.c index 5b6a0c866be220..0c3be03385f899 100644 --- a/drivers/platform/x86/dell/alienware-wmi-wmax.c +++ b/drivers/platform/x86/dell/alienware-wmi-wmax.c @@ -61,6 +61,22 @@ static struct awcc_quirks generic_quirks = { static struct awcc_quirks empty_quirks; static const struct dmi_system_id awcc_dmi_table[] __initconst = { + { + .ident = "Alienware Area-51m R2", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), + DMI_MATCH(DMI_PRODUCT_NAME, "Alienware Area-51m R2"), + }, + .driver_data = &generic_quirks, + }, + { + .ident = "Alienware m16 R1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), + DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m16 R1"), + }, + .driver_data = &g_series_quirks, + }, { .ident = "Alienware m16 R1 AMD", .matches = { @@ -69,6 +85,14 @@ static const struct dmi_system_id awcc_dmi_table[] __initconst = { }, .driver_data = &g_series_quirks, }, + { + .ident = "Alienware m16 R2", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), + DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m16 R2"), + }, + .driver_data = &generic_quirks, + }, { .ident = "Alienware m17 R5", .matches = { @@ -93,6 +117,14 @@ static const struct dmi_system_id awcc_dmi_table[] __initconst = { }, .driver_data = &generic_quirks, }, + { + .ident = "Alienware x15 R2", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), + DMI_MATCH(DMI_PRODUCT_NAME, "Alienware x15 R2"), + }, + .driver_data = &generic_quirks, + }, { .ident = "Alienware x17 R2", .matches = { @@ -125,6 +157,14 @@ static const struct dmi_system_id awcc_dmi_table[] __initconst = { }, .driver_data = &g_series_quirks, }, + { + .ident = "Dell Inc. G16 7630", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Dell G16 7630"), + }, + .driver_data = &g_series_quirks, + }, { .ident = "Dell Inc. G3 3500", .matches = { @@ -149,6 +189,14 @@ static const struct dmi_system_id awcc_dmi_table[] __initconst = { }, .driver_data = &g_series_quirks, }, + { + .ident = "Dell Inc. G5 5505", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "G5 5505"), + }, + .driver_data = &g_series_quirks, + }, }; enum WMAX_THERMAL_INFORMATION_OPERATIONS { From 912d614ac99e137fd2016777e4b090c46ce84898 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Mon, 14 Apr 2025 16:04:52 +0200 Subject: [PATCH 0671/2924] platform/x86: msi-wmi-platform: Rename "data" variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename the "data" variable inside msi_wmi_platform_read() to avoid a name collision when the driver adds support for a state container struct (that is to be called "data" too) in the future. Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20250414140453.7691-1-W_Armin@gmx.de Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/msi-wmi-platform.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/msi-wmi-platform.c b/drivers/platform/x86/msi-wmi-platform.c index 9b5c7f8c79b0dd..e15681dfca8d7e 100644 --- a/drivers/platform/x86/msi-wmi-platform.c +++ b/drivers/platform/x86/msi-wmi-platform.c @@ -173,7 +173,7 @@ static int msi_wmi_platform_read(struct device *dev, enum hwmon_sensor_types typ struct wmi_device *wdev = dev_get_drvdata(dev); u8 input[32] = { 0 }; u8 output[32]; - u16 data; + u16 value; int ret; ret = msi_wmi_platform_query(wdev, MSI_PLATFORM_GET_FAN, input, sizeof(input), output, @@ -181,11 +181,11 @@ static int msi_wmi_platform_read(struct device *dev, enum hwmon_sensor_types typ if (ret < 0) return ret; - data = get_unaligned_be16(&output[channel * 2 + 1]); - if (!data) + value = get_unaligned_be16(&output[channel * 2 + 1]); + if (!value) *val = 0; else - *val = 480000 / data; + *val = 480000 / value; return 0; } From 88fa80021b77732bc98f73fb69d69c7cc37b9f0d Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Sat, 12 Apr 2025 21:19:24 +0530 Subject: [PATCH 0672/2924] net: ngbe: fix memory leak in ngbe_probe() error path When ngbe_sw_init() is called, memory is allocated for wx->rss_key in wx_init_rss_key(). However, in ngbe_probe() function, the subsequent error paths after ngbe_sw_init() don't free the rss_key. Fix that by freeing it in error path along with wx->mac_table. Also change the label to which execution jumps when ngbe_sw_init() fails, because otherwise, it could lead to a double free for rss_key, when the mac_table allocation fails in wx_sw_init(). Fixes: 02338c484ab6 ("net: ngbe: Initialize sw info and register netdev") Signed-off-by: Abdun Nihaal Reviewed-by: Kory Maincent Reviewed-by: Jiawen Wu Link: https://patch.msgid.link/20250412154927.25908-1-abdun.nihaal@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/wangxun/ngbe/ngbe_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index a6159214ec0a98..91b3055a5a9f59 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -625,7 +625,7 @@ static int ngbe_probe(struct pci_dev *pdev, /* setup the private structure */ err = ngbe_sw_init(wx); if (err) - goto err_free_mac_table; + goto err_pci_release_regions; /* check if flash load is done after hw power up */ err = wx_check_flash_load(wx, NGBE_SPI_ILDR_STATUS_PERST); @@ -719,6 +719,7 @@ static int ngbe_probe(struct pci_dev *pdev, err_clear_interrupt_scheme: wx_clear_interrupt_scheme(wx); err_free_mac_table: + kfree(wx->rss_key); kfree(wx->mac_table); err_pci_release_regions: pci_release_selected_regions(pdev, From 86ce5c0a1dec02e21b4c864b2bc0cc5880a2c13c Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Tue, 8 Apr 2025 16:00:05 +0300 Subject: [PATCH 0673/2924] mei: me: add panther lake H DID Add Panther Lake H device id. Cc: stable Co-developed-by: Tomas Winkler Signed-off-by: Tomas Winkler Signed-off-by: Alexander Usyskin Link: https://lore.kernel.org/r/20250408130005.1358140-1-alexander.usyskin@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 1 + drivers/misc/mei/pci-me.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index a5f88ec97df753..bc40b940ae2145 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -117,6 +117,7 @@ #define MEI_DEV_ID_LNL_M 0xA870 /* Lunar Lake Point M */ +#define MEI_DEV_ID_PTL_H 0xE370 /* Panther Lake H */ #define MEI_DEV_ID_PTL_P 0xE470 /* Panther Lake P */ /* diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index d6ff9d82ae94b3..3f9c60b579ae48 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -124,6 +124,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_LNL_M, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_PTL_H, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_PTL_P, MEI_ME_PCH15_CFG)}, /* required last entry */ From c876be906ce7e518d9ef9926478669c151999e69 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Mon, 17 Mar 2025 10:59:55 -0300 Subject: [PATCH 0674/2924] char: misc: register chrdev region with all possible minors register_chrdev will only register the first 256 minors of a major chrdev. That means that dynamically allocated misc devices with minor above 255 will fail to open with -ENXIO. This was found by kernel test robot when testing a different change that makes all dynamically allocated minors be above 255. This has, however, been separately tested by creating 256 serio_raw devices with the help of userio driver. Ever since allowing misc devices with minors above 128, this has been possible. Fix it by registering all minor numbers from 0 to MINORMASK + 1 for MISC_MAJOR. Reported-by: kernel test robot Cc: stable Closes: https://lore.kernel.org/oe-lkp/202503171507.6c8093d0-lkp@intel.com Fixes: ab760791c0cf ("char: misc: Increase the maximum number of dynamic misc devices to 1048448") Signed-off-by: Thadeu Lima de Souza Cascardo Tested-by: Hou Wenlong Link: https://lore.kernel.org/r/20250317-misc-chrdev-v1-1-6cd05da11aef@igalia.com Signed-off-by: Greg Kroah-Hartman --- drivers/char/misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/misc.c b/drivers/char/misc.c index f7dd455dd0dd3c..dda466f9181acf 100644 --- a/drivers/char/misc.c +++ b/drivers/char/misc.c @@ -315,7 +315,7 @@ static int __init misc_init(void) goto fail_remove; err = -EIO; - if (register_chrdev(MISC_MAJOR, "misc", &misc_fops)) + if (__register_chrdev(MISC_MAJOR, 0, MINORMASK + 1, "misc", &misc_fops)) goto fail_printk; return 0; From 18eb77c75ed01439f96ae5c0f33461eb5134b907 Mon Sep 17 00:00:00 2001 From: Rengarajan S Date: Thu, 13 Mar 2025 22:38:55 +0530 Subject: [PATCH 0675/2924] misc: microchip: pci1xxxx: Fix Kernel panic during IRQ handler registration Resolve kernel panic while accessing IRQ handler associated with the generated IRQ. This is done by acquiring the spinlock and storing the current interrupt state before handling the interrupt request using generic_handle_irq. A previous fix patch was submitted where 'generic_handle_irq' was replaced with 'handle_nested_irq'. However, this change also causes the kernel panic where after determining which GPIO triggered the interrupt and attempting to call handle_nested_irq with the mapped IRQ number, leads to a failure in locating the registered handler. Fixes: 194f9f94a516 ("misc: microchip: pci1xxxx: Resolve kernel panic during GPIO IRQ handling") Cc: stable Signed-off-by: Rengarajan S Link: https://lore.kernel.org/r/20250313170856.20868-2-rengarajan.s@microchip.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c index 04756302b87805..21255cdb24c1eb 100644 --- a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c +++ b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c @@ -37,6 +37,7 @@ struct pci1xxxx_gpio { struct auxiliary_device *aux_dev; void __iomem *reg_base; + raw_spinlock_t wa_lock; struct gpio_chip gpio; spinlock_t lock; int irq_base; @@ -257,6 +258,7 @@ static irqreturn_t pci1xxxx_gpio_irq_handler(int irq, void *dev_id) struct pci1xxxx_gpio *priv = dev_id; struct gpio_chip *gc = &priv->gpio; unsigned long int_status = 0; + unsigned long wa_flags; unsigned long flags; u8 pincount; int bit; @@ -280,7 +282,9 @@ static irqreturn_t pci1xxxx_gpio_irq_handler(int irq, void *dev_id) writel(BIT(bit), priv->reg_base + INTR_STATUS_OFFSET(gpiobank)); spin_unlock_irqrestore(&priv->lock, flags); irq = irq_find_mapping(gc->irq.domain, (bit + (gpiobank * 32))); - handle_nested_irq(irq); + raw_spin_lock_irqsave(&priv->wa_lock, wa_flags); + generic_handle_irq(irq); + raw_spin_unlock_irqrestore(&priv->wa_lock, wa_flags); } } spin_lock_irqsave(&priv->lock, flags); From e9d7748a7468581859d2b85b378135f9688a0aff Mon Sep 17 00:00:00 2001 From: Rengarajan S Date: Thu, 13 Mar 2025 22:38:56 +0530 Subject: [PATCH 0676/2924] misc: microchip: pci1xxxx: Fix incorrect IRQ status handling during ack Under irq_ack, pci1xxxx_assign_bit reads the current interrupt status, modifies and writes the entire value back. Since, the IRQ status bit gets cleared on writing back, the better approach is to directly write the bitmask to the register in order to preserve the value. Fixes: 1f4d8ae231f4 ("misc: microchip: pci1xxxx: Add gpio irq handler and irq helper functions irq_ack, irq_mask, irq_unmask and irq_set_type of irq_chip.") Cc: stable Signed-off-by: Rengarajan S Link: https://lore.kernel.org/r/20250313170856.20868-3-rengarajan.s@microchip.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c index 21255cdb24c1eb..98d3d123004c88 100644 --- a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c +++ b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c @@ -168,7 +168,7 @@ static void pci1xxxx_gpio_irq_ack(struct irq_data *data) unsigned long flags; spin_lock_irqsave(&priv->lock, flags); - pci1xxx_assign_bit(priv->reg_base, INTR_STAT_OFFSET(gpio), (gpio % 32), true); + writel(BIT(gpio % 32), priv->reg_base + INTR_STAT_OFFSET(gpio)); spin_unlock_irqrestore(&priv->lock, flags); } From 1fdb8188c3d505452b40cdb365b1bb32be533a8e Mon Sep 17 00:00:00 2001 From: Yunlong Xing Date: Mon, 14 Apr 2025 11:01:59 +0800 Subject: [PATCH 0677/2924] loop: aio inherit the ioprio of original request Set cmd->iocb.ki_ioprio to the ioprio of loop device's request. The purpose is to inherit the original request ioprio in the aio flow. Signed-off-by: Yunlong Xing Signed-off-by: Zhiguo Niu Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250414030159.501180-1-yunlong.xing@unisoc.com Signed-off-by: Jens Axboe --- drivers/block/loop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 674527d770dc66..dd7f33d47f4f82 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -447,7 +447,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, cmd->iocb.ki_filp = file; cmd->iocb.ki_complete = lo_rw_aio_complete; cmd->iocb.ki_flags = IOCB_DIRECT; - cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); + cmd->iocb.ki_ioprio = req_get_ioprio(rq); if (rw == ITER_SOURCE) ret = file->f_op->write_iter(&cmd->iocb, &iter); From e7bc0010ceb403d025100698586c8e760921d471 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 15 Apr 2025 10:51:47 +0200 Subject: [PATCH 0678/2924] loop: properly send KOBJ_CHANGED uevent for disk device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The original commit message and the wording "uncork" in the code comment indicate that it is expected that the suppressed event instances are automatically sent after unsuppressing. This is not the case, instead they are discarded. In effect this means that no "changed" events are emitted on the device itself by default. While each discovered partition does trigger a changed event on the device, devices without partitions don't have any event emitted. This makes udev miss the device creation and prompted workarounds in userspace. See the linked util-linux/losetup bug. Explicitly emit the events and drop the confusingly worded comments. Link: https://github.com/util-linux/util-linux/issues/2434 Fixes: 498ef5c777d9 ("loop: suppress uevents while reconfiguring the device") Cc: stable@vger.kernel.org Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20250415-loop-uevent-changed-v2-1-0c4e6a923b2a@linutronix.de Signed-off-by: Jens Axboe --- drivers/block/loop.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index dd7f33d47f4f82..3be7f00e7fc740 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -667,8 +667,8 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, error = 0; done: - /* enable and uncork uevent now that we are done */ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0); + kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE); return error; out_err: @@ -1129,8 +1129,8 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode, if (partscan) clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state); - /* enable and uncork uevent now that we are done */ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0); + kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE); loop_global_unlock(lo, is_loop); if (partscan) From dc1771f718548f7d4b93991b174c6e7b5e1ba410 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 10 Mar 2025 22:24:14 -0700 Subject: [PATCH 0679/2924] Revert "drivers: core: synchronize really_probe() and dev_uevent()" This reverts commit c0a40097f0bc81deafc15f9195d1fb54595cd6d0. Probing a device can take arbitrary long time. In the field we observed that, for example, probing a bad micro-SD cards in an external USB card reader (or maybe cards were good but cables were flaky) sometimes takes longer than 2 minutes due to multiple retries at various levels of the stack. We can not block uevent_show() method for that long because udev is reading that attribute very often and that blocks udev and interferes with booting of the system. The change that introduced locking was concerned with dev_uevent() racing with unbinding the driver. However we can handle it without locking (which will be done in subsequent patch). There was also claim that synchronization with probe() is needed to properly load USB drivers, however this is a red herring: the change adding the lock was introduced in May of last year and USB loading and probing worked properly for many years before that. Revert the harmful locking. Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Reviewed-by: Masami Hiramatsu (Google) Link: https://lore.kernel.org/r/20250311052417.1846985-1-dmitry.torokhov@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index d2f9d3a59d6b00..f9c1c623bca5e1 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2726,11 +2726,8 @@ static ssize_t uevent_show(struct device *dev, struct device_attribute *attr, if (!env) return -ENOMEM; - /* Synchronize with really_probe() */ - device_lock(dev); /* let the kset specific function add its keys */ retval = kset->uevent_ops->uevent(&dev->kobj, env); - device_unlock(dev); if (retval) goto out; From 04d3e5461c1f5cf8eec964ab64948ebed826e95e Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 10 Mar 2025 22:24:15 -0700 Subject: [PATCH 0680/2924] driver core: introduce device_set_driver() helper In preparation to closing a race when reading driver pointer in dev_uevent() code, instead of setting device->driver pointer directly introduce device_set_driver() helper. Signed-off-by: Dmitry Torokhov Reviewed-by: Masami Hiramatsu (Google) Link: https://lore.kernel.org/r/20250311052417.1846985-2-dmitry.torokhov@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/base.h | 6 ++++++ drivers/base/core.c | 2 +- drivers/base/dd.c | 7 +++---- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/base/base.h b/drivers/base/base.h index 0042e4774b0ce7..eb203cf8370bc9 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -180,6 +180,12 @@ int driver_add_groups(const struct device_driver *drv, const struct attribute_gr void driver_remove_groups(const struct device_driver *drv, const struct attribute_group **groups); void device_driver_detach(struct device *dev); +static inline void device_set_driver(struct device *dev, const struct device_driver *drv) +{ + // FIXME - this cast should not be needed "soon" + dev->driver = (struct device_driver *)drv; +} + int devres_release_all(struct device *dev); void device_block_probing(void); void device_unblock_probing(void); diff --git a/drivers/base/core.c b/drivers/base/core.c index f9c1c623bca5e1..b000ee61c1494e 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -3697,7 +3697,7 @@ int device_add(struct device *dev) device_pm_remove(dev); dpm_sysfs_remove(dev); DPMError: - dev->driver = NULL; + device_set_driver(dev, NULL); bus_remove_device(dev); BusError: device_remove_attrs(dev); diff --git a/drivers/base/dd.c b/drivers/base/dd.c index f0e4b4aba885c6..b526e0e0f52d79 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -550,7 +550,7 @@ static void device_unbind_cleanup(struct device *dev) arch_teardown_dma_ops(dev); kfree(dev->dma_range_map); dev->dma_range_map = NULL; - dev->driver = NULL; + device_set_driver(dev, NULL); dev_set_drvdata(dev, NULL); if (dev->pm_domain && dev->pm_domain->dismiss) dev->pm_domain->dismiss(dev); @@ -629,8 +629,7 @@ static int really_probe(struct device *dev, const struct device_driver *drv) } re_probe: - // FIXME - this cast should not be needed "soon" - dev->driver = (struct device_driver *)drv; + device_set_driver(dev, drv); /* If using pinctrl, bind pins now before probing */ ret = pinctrl_bind_pins(dev); @@ -1014,7 +1013,7 @@ static int __device_attach(struct device *dev, bool allow_async) if (ret == 0) ret = 1; else { - dev->driver = NULL; + device_set_driver(dev, NULL); ret = 0; } } else { From 18daa52418e7e4629ed1703b64777294209d2622 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 10 Mar 2025 22:24:16 -0700 Subject: [PATCH 0681/2924] driver core: fix potential NULL pointer dereference in dev_uevent() If userspace reads "uevent" device attribute at the same time as another threads unbinds the device from its driver, change to dev->driver from a valid pointer to NULL may result in crash. Fix this by using READ_ONCE() when fetching the pointer, and take bus' drivers klist lock to make sure driver instance will not disappear while we access it. Use WRITE_ONCE() when setting the driver pointer to ensure there is no tearing. Signed-off-by: Dmitry Torokhov Reviewed-by: Masami Hiramatsu (Google) Link: https://lore.kernel.org/r/20250311052417.1846985-3-dmitry.torokhov@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/base.h | 13 ++++++++++++- drivers/base/bus.c | 2 +- drivers/base/core.c | 33 +++++++++++++++++++++++++++++++-- 3 files changed, 44 insertions(+), 4 deletions(-) diff --git a/drivers/base/base.h b/drivers/base/base.h index eb203cf8370bc9..123031a757d916 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -73,6 +73,7 @@ static inline void subsys_put(struct subsys_private *sp) kset_put(&sp->subsys); } +struct subsys_private *bus_to_subsys(const struct bus_type *bus); struct subsys_private *class_to_subsys(const struct class *class); struct driver_private { @@ -182,8 +183,18 @@ void device_driver_detach(struct device *dev); static inline void device_set_driver(struct device *dev, const struct device_driver *drv) { + /* + * Majority (all?) read accesses to dev->driver happens either + * while holding device lock or in bus/driver code that is only + * invoked when the device is bound to a driver and there is no + * concern of the pointer being changed while it is being read. + * However when reading device's uevent file we read driver pointer + * without taking device lock (so we do not block there for + * arbitrary amount of time). We use WRITE_ONCE() here to prevent + * tearing so that READ_ONCE() can safely be used in uevent code. + */ // FIXME - this cast should not be needed "soon" - dev->driver = (struct device_driver *)drv; + WRITE_ONCE(dev->driver, (struct device_driver *)drv); } int devres_release_all(struct device *dev); diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 5ea3b03af9ba6d..5e75e1bce5516d 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -57,7 +57,7 @@ static int __must_check bus_rescan_devices_helper(struct device *dev, * NULL. A call to subsys_put() must be done when finished with the pointer in * order for it to be properly freed. */ -static struct subsys_private *bus_to_subsys(const struct bus_type *bus) +struct subsys_private *bus_to_subsys(const struct bus_type *bus) { struct subsys_private *sp = NULL; struct kobject *kobj; diff --git a/drivers/base/core.c b/drivers/base/core.c index b000ee61c1494e..cbc0099d8ef246 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2624,6 +2624,35 @@ static const char *dev_uevent_name(const struct kobject *kobj) return NULL; } +/* + * Try filling "DRIVER=" uevent variable for a device. Because this + * function may race with binding and unbinding the device from a driver, + * we need to be careful. Binding is generally safe, at worst we miss the + * fact that the device is already bound to a driver (but the driver + * information that is delivered through uevents is best-effort, it may + * become obsolete as soon as it is generated anyways). Unbinding is more + * risky as driver pointer is transitioning to NULL, so READ_ONCE() should + * be used to make sure we are dealing with the same pointer, and to + * ensure that driver structure is not going to disappear from under us + * we take bus' drivers klist lock. The assumption that only registered + * driver can be bound to a device, and to unregister a driver bus code + * will take the same lock. + */ +static void dev_driver_uevent(const struct device *dev, struct kobj_uevent_env *env) +{ + struct subsys_private *sp = bus_to_subsys(dev->bus); + + if (sp) { + scoped_guard(spinlock, &sp->klist_drivers.k_lock) { + struct device_driver *drv = READ_ONCE(dev->driver); + if (drv) + add_uevent_var(env, "DRIVER=%s", drv->name); + } + + subsys_put(sp); + } +} + static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) { const struct device *dev = kobj_to_dev(kobj); @@ -2655,8 +2684,8 @@ static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) if (dev->type && dev->type->name) add_uevent_var(env, "DEVTYPE=%s", dev->type->name); - if (dev->driver) - add_uevent_var(env, "DRIVER=%s", dev->driver->name); + /* Add "DRIVER=%s" variable if the device is bound to a driver */ + dev_driver_uevent(dev, env); /* Add common DT information about the device */ of_device_uevent(dev, env); From a06459657e4ecc4b35a2ee7a9bf5359ecfb077cf Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 15 Apr 2025 08:58:22 -0400 Subject: [PATCH 0682/2924] bcachefs: Silence extent_poisoned error messages extent poisoning is partly so that we don't keep spewing the dmesg log when we've got unreadable data - we don't want to print these. Signed-off-by: Kent Overstreet --- fs/bcachefs/errcode.h | 2 +- fs/bcachefs/extents.c | 2 +- fs/bcachefs/io_read.c | 18 ++++++++++-------- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index c8696f01eb14f6..a615e4852deda8 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -287,7 +287,7 @@ x(EIO, mark_stripe) \ x(EIO, stripe_reconstruct) \ x(EIO, key_type_error) \ - x(EIO, extent_poisened) \ + x(EIO, extent_poisoned) \ x(EIO, missing_indirect_extent) \ x(EIO, invalidate_stripe_to_dev) \ x(EIO, no_encryption_key) \ diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index ae7c7a177e10b2..dca2b8425cc05f 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -139,7 +139,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) - return -BCH_ERR_extent_poisened; + return -BCH_ERR_extent_poisoned; rcu_read_lock(); const union bch_extent_entry *entry; diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index de8ccd593ec73c..def4a26a3b4590 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -1349,14 +1349,16 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio, bch2_trans_iter_exit(trans, &iter); - if (ret) { - struct printbuf buf = PRINTBUF; - lockrestart_do(trans, - bch2_inum_offset_err_msg_trans(trans, &buf, inum, - bvec_iter.bi_sector << 9)); - prt_printf(&buf, "read error: %s", bch2_err_str(ret)); - bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); + if (unlikely(ret)) { + if (ret != -BCH_ERR_extent_poisoned) { + struct printbuf buf = PRINTBUF; + lockrestart_do(trans, + bch2_inum_offset_err_msg_trans(trans, &buf, inum, + bvec_iter.bi_sector << 9)); + prt_printf(&buf, "data read error: %s", bch2_err_str(ret)); + bch_err_ratelimited(c, "%s", buf.buf); + printbuf_exit(&buf); + } rbio->bio.bi_status = BLK_STS_IOERR; rbio->ret = ret; From 14bcf982f428cd98bababe30d4059f93d39a2f14 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 15 Apr 2025 08:44:40 -0400 Subject: [PATCH 0683/2924] bcachefs: Print version_incompat_allowed on startup Let users know if incompatible features aren't enabled Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 788e870bfef6a9..f1eef5e7698bf4 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1013,6 +1013,11 @@ static void print_mount_opts(struct bch_fs *c) bch2_opt_to_text(&p, c, c->disk_sb.sb, opt, v, OPT_SHOW_MOUNT_STYLE); } + if (c->sb.version_incompat_allowed != c->sb.version) { + prt_printf(&p, "\n allowing incompatible features above "); + bch2_version_to_text(&p, c->sb.version_incompat_allowed); + } + bch_info(c, "%s", p.buf); printbuf_exit(&p); } From c3b02e6d67acb9893b6c219d32cdc75a28ffea65 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 15 Apr 2025 09:27:22 -0400 Subject: [PATCH 0684/2924] bcachefs: Log message when incompat version requested but not enabled Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 2 ++ fs/bcachefs/fs-ioctl.c | 2 +- fs/bcachefs/super-io.c | 20 ++++++++++++++++++-- fs/bcachefs/super.c | 1 + 4 files changed, 22 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 5cb0fc384ac020..75f7408da1736b 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -788,6 +788,8 @@ struct bch_fs { unsigned long errors_silent[BITS_TO_LONGS(BCH_FSCK_ERR_MAX)]; u64 btrees_lost_data; } sb; + DARRAY(enum bcachefs_metadata_version) + incompat_versions_requested; #ifdef CONFIG_UNICODE struct unicode_map *cf_encoding; diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index c1553e44e049a3..14886e1d4d6dff 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -69,7 +69,7 @@ static int bch2_inode_flags_set(struct btree_trans *trans, if (ret < 0) return ret; - ret = bch2_request_incompat_feature(c,bcachefs_metadata_version_casefolding); + ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding); if (ret) return ret; diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index e27422b6d9c6a9..25b6bce05c3cce 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -73,14 +73,30 @@ int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version v ? 0 : -BCH_ERR_may_not_use_incompat_feature; + mutex_lock(&c->sb_lock); if (!ret) { - mutex_lock(&c->sb_lock); SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb, max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version)); bch2_write_super(c); - mutex_unlock(&c->sb_lock); + } else { + darray_for_each(c->incompat_versions_requested, i) + if (version == *i) + goto out; + + darray_push(&c->incompat_versions_requested, version); + struct printbuf buf = PRINTBUF; + prt_str(&buf, "requested incompat feature "); + bch2_version_to_text(&buf, version); + prt_str(&buf, " currently not enabled"); + prt_printf(&buf, "\n set version_upgrade=incompat to enable"); + + bch_notice(c, "%s", buf.buf); + printbuf_exit(&buf); } +out: + mutex_unlock(&c->sb_lock); + return ret; } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index f1eef5e7698bf4..e8a17ed1615d30 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -589,6 +589,7 @@ static void __bch2_fs_free(struct bch_fs *c) free_percpu(c->online_reserved); } + darray_exit(&c->incompat_versions_requested); darray_exit(&c->btree_roots_extra); free_percpu(c->pcpu); free_percpu(c->usage); From 10076ae01388caffec3b90abcce05f24a9e4b15e Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 1 Apr 2025 15:28:46 +0300 Subject: [PATCH 0685/2924] drivers/base: Extend documentation with preferred way to use auxbus Document the preferred way to use auxiliary bus. Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/206e8c249f630abd3661deb36b84b26282241040.1743510317.git.leon@kernel.org [ reworded the text a bit - gregkh ] Signed-off-by: Greg Kroah-Hartman --- drivers/base/auxiliary.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/base/auxiliary.c b/drivers/base/auxiliary.c index afa4df4c5a3f37..95717d509ca99d 100644 --- a/drivers/base/auxiliary.c +++ b/drivers/base/auxiliary.c @@ -156,6 +156,16 @@ * }, * .ops = my_custom_ops, * }; + * + * Please note that such custom ops approach is valid, but it is hard to implement + * it right without global locks per-device to protect from auxiliary_drv removal + * during call to that ops. In addition, this implementation lacks proper module + * dependency, which causes to load/unload races between auxiliary parent and devices + * modules. + * + * The most easiest way to provide these ops reliably without needing to + * have a lock is to EXPORT_SYMBOL*() them and rely on already existing + * modules infrastructure for validity and correct dependencies chains. */ static const struct auxiliary_device_id *auxiliary_match_id(const struct auxiliary_device_id *id, From a8e858e29955175ab7587190d449fa6a566d90e5 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 1 Apr 2025 15:28:47 +0300 Subject: [PATCH 0686/2924] drivers/base: Add myself as auxiliary bus reviewer As the one who participated in initial development of auxiliary bus and later reviewed many of existing auxiliary bus consumers, I would like to be CCed on all auxiliary bus changes. Add myself as a reviewer to do not miss new development in that area. Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/b60e74e286b1d3935de46092470f716701c924a1.1743510317.git.leon@kernel.org Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 01f86f5c5f81f2..29c9ea9c0a3c3e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3867,6 +3867,7 @@ AUXILIARY BUS DRIVER M: Greg Kroah-Hartman R: Dave Ertman R: Ira Weiny +R: Leon Romanovsky S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/driver-core/driver-core.git F: Documentation/driver-api/auxiliary_bus.rst From 1ae5e4c0626d6954114d9725990ac9c498f3b1ab Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 8 Apr 2025 12:48:55 +0300 Subject: [PATCH 0687/2924] device property: Add a note to the fwnode.h Add a note to the fwnode.h that the header should not be used directly in the leaf drivers, they all should use the higher level APIs and the respective headers. The purpose of this note is to give guidance to driver writers to avoid repeating a common mistake. Signed-off-by: Andy Shevchenko Reviewed-by: Sakari Ailus Reviewed-by: Rafael J. Wysocki Reviewed-by: Zijun Hu Link: https://lore.kernel.org/r/20250408095229.1298005-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fwnode.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 6fa0a268d53827..097be89487bf5c 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -2,6 +2,11 @@ /* * fwnode.h - Firmware device node object handle type definition. * + * This header file provides low-level data types and definitions for firmware + * and device property providers. The respective API header files supplied by + * them should contain all of the requisite data types and definitions for end + * users, so including it directly should not be necessary. + * * Copyright (C) 2015, Intel Corporation * Author: Rafael J. Wysocki */ From bc2c46426f2d95e58c82f394531afdd034c8706c Mon Sep 17 00:00:00 2001 From: Lizhi Xu Date: Mon, 14 Apr 2025 15:11:23 +0800 Subject: [PATCH 0688/2924] software node: Prevent link creation failure from causing kobj reference count imbalance syzbot reported a uaf in software_node_notify_remove. [1] When any of the two sysfs_create_link() in software_node_notify() fails, the swnode->kobj reference count will not increase normally, which will cause swnode to be released incorrectly due to the imbalance of kobj reference count when executing software_node_notify_remove(). Increase the reference count of kobj before creating the link to avoid uaf. [1] BUG: KASAN: slab-use-after-free in software_node_notify_remove+0x1bc/0x1c0 drivers/base/swnode.c:1108 Read of size 1 at addr ffff888033c08908 by task syz-executor105/5844 Freed by task 5844: software_node_notify_remove+0x159/0x1c0 drivers/base/swnode.c:1106 device_platform_notify_remove drivers/base/core.c:2387 [inline] Fixes: 9eb59204d519 ("iommufd/selftest: Add set_dev_pasid in mock iommu") Reported-by: syzbot+2ff22910687ee0dfd48e@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=2ff22910687ee0dfd48e Tested-by: syzbot+2ff22910687ee0dfd48e@syzkaller.appspotmail.com Signed-off-by: Lizhi Xu Reviewed-by: Sakari Ailus Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250414071123.1228331-1-lizhi.xu@windriver.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/swnode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c index b1726a3515f6fb..5c78fa6ae77257 100644 --- a/drivers/base/swnode.c +++ b/drivers/base/swnode.c @@ -1080,6 +1080,7 @@ void software_node_notify(struct device *dev) if (!swnode) return; + kobject_get(&swnode->kobj); ret = sysfs_create_link(&dev->kobj, &swnode->kobj, "software_node"); if (ret) return; @@ -1089,8 +1090,6 @@ void software_node_notify(struct device *dev) sysfs_remove_link(&dev->kobj, "software_node"); return; } - - kobject_get(&swnode->kobj); } void software_node_notify_remove(struct device *dev) From b9792abb76ae1649080b8d48092c52e24c7bbdc2 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 10 Apr 2025 22:51:10 +1000 Subject: [PATCH 0689/2924] drivers/base/memory: Avoid overhead from for_each_present_section_nr() for_each_present_section_nr() was introduced to add_boot_memory_block() by commit 61659efdb35c ("drivers/base/memory: improve add_boot_memory_block()"). It causes unnecessary overhead when the present sections are really sparse. next_present_section_nr() called by the macro to find the next present section, which is far away from the spanning sections in the specified block. Too much time consumed by next_present_section_nr() in this case, which can lead to softlockup as observed by Aditya Gupta on IBM Power10 machine. watchdog: BUG: soft lockup - CPU#248 stuck for 22s! [swapper/248:1] Modules linked in: CPU: 248 UID: 0 PID: 1 Comm: swapper/248 Not tainted 6.15.0-rc1-next-20250408 #1 VOLUNTARY Hardware name: 9105-22A POWER10 (raw) 0x800200 opal:v7.1-107-gfda75d121942 PowerNV NIP: c00000000209218c LR: c000000002092204 CTR: 0000000000000000 REGS: c00040000418fa30 TRAP: 0900 Not tainted (6.15.0-rc1-next-20250408) MSR: 9000000002009033 CR: 28000428 XER: 00000000 CFAR: 0000000000000000 IRQMASK: 0 GPR00: c000000002092204 c00040000418fcd0 c000000001b08100 0000000000000040 GPR04: 0000000000013e00 c000c03ffebabb00 0000000000c03fff c000400fff587f80 GPR08: 0000000000000000 00000000001196f7 0000000000000000 0000000028000428 GPR12: 0000000000000000 c000000002e80000 c00000000001007c 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR24: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR28: c000000002df7f70 0000000000013dc0 c0000000011dd898 0000000008000000 NIP [c00000000209218c] memory_dev_init+0x114/0x1e0 LR [c000000002092204] memory_dev_init+0x18c/0x1e0 Call Trace: [c00040000418fcd0] [c000000002092204] memory_dev_init+0x18c/0x1e0 (unreliable) [c00040000418fd50] [c000000002091348] driver_init+0x78/0xa4 [c00040000418fd70] [c0000000020063ac] kernel_init_freeable+0x22c/0x370 [c00040000418fde0] [c0000000000100a8] kernel_init+0x34/0x25c [c00040000418fe50] [c00000000000cd94] ret_from_kernel_user_thread+0x14/0x1c Avoid the overhead by folding for_each_present_section_nr() to the outer loop. add_boot_memory_block() is dropped after that. Fixes: 61659efdb35c ("drivers/base/memory: improve add_boot_memory_block()") Closes: https://lore.kernel.org/linux-mm/20250409180344.477916-1-adityag@linux.ibm.com Reported-by: Aditya Gupta Signed-off-by: Gavin Shan Acked-by: Oscar Salvador Tested-by: Aditya Gupta Acked-by: David Hildenbrand Link: https://lore.kernel.org/r/20250410125110.1232329-1-gshan@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/memory.c | 41 +++++++++++++++++------------------------ 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 8f3a41d9bfaa2c..19469e7f88c25e 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -816,21 +816,6 @@ static int add_memory_block(unsigned long block_id, unsigned long state, return 0; } -static int __init add_boot_memory_block(unsigned long base_section_nr) -{ - unsigned long nr; - - for_each_present_section_nr(base_section_nr, nr) { - if (nr >= (base_section_nr + sections_per_block)) - break; - - return add_memory_block(memory_block_id(base_section_nr), - MEM_ONLINE, NULL, NULL); - } - - return 0; -} - static int add_hotplug_memory_block(unsigned long block_id, struct vmem_altmap *altmap, struct memory_group *group) @@ -957,7 +942,7 @@ static const struct attribute_group *memory_root_attr_groups[] = { void __init memory_dev_init(void) { int ret; - unsigned long block_sz, nr; + unsigned long block_sz, block_id, nr; /* Validate the configured memory block size */ block_sz = memory_block_size_bytes(); @@ -970,15 +955,23 @@ void __init memory_dev_init(void) panic("%s() failed to register subsystem: %d\n", __func__, ret); /* - * Create entries for memory sections that were found - * during boot and have been initialized + * Create entries for memory sections that were found during boot + * and have been initialized. Use @block_id to track the last + * handled block and initialize it to an invalid value (ULONG_MAX) + * to bypass the block ID matching check for the first present + * block so that it can be covered. */ - for (nr = 0; nr <= __highest_present_section_nr; - nr += sections_per_block) { - ret = add_boot_memory_block(nr); - if (ret) - panic("%s() failed to add memory block: %d\n", __func__, - ret); + block_id = ULONG_MAX; + for_each_present_section_nr(0, nr) { + if (block_id != ULONG_MAX && memory_block_id(nr) == block_id) + continue; + + block_id = memory_block_id(nr); + ret = add_memory_block(block_id, MEM_ONLINE, NULL, NULL); + if (ret) { + panic("%s() failed to add memory block: %d\n", + __func__, ret); + } } } From 7c7f1bfdb2249f854a736d9b79778c7e5a29a150 Mon Sep 17 00:00:00 2001 From: Haoxiang Li Date: Mon, 10 Mar 2025 09:46:57 +0100 Subject: [PATCH 0690/2924] mcb: fix a double free bug in chameleon_parse_gdd() In chameleon_parse_gdd(), if mcb_device_register() fails, 'mdev' would be released in mcb_device_register() via put_device(). Thus, goto 'err' label and free 'mdev' again causes a double free. Just return if mcb_device_register() fails. Fixes: 3764e82e5150 ("drivers: Introduce MEN Chameleon Bus") Cc: stable Signed-off-by: Haoxiang Li Signed-off-by: Johannes Thumshirn Link: https://lore.kernel.org/r/6201d09e2975ae5789879f79a6de4c38de9edd4a.1741596225.git.jth@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/mcb/mcb-parse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mcb/mcb-parse.c b/drivers/mcb/mcb-parse.c index 02a680c73979b9..bf0d7d58c8b014 100644 --- a/drivers/mcb/mcb-parse.c +++ b/drivers/mcb/mcb-parse.c @@ -96,7 +96,7 @@ static int chameleon_parse_gdd(struct mcb_bus *bus, ret = mcb_device_register(bus, mdev); if (ret < 0) - goto err; + return ret; return 0; From bcfb443557166287ba544be308ed44d788599afa Mon Sep 17 00:00:00 2001 From: Raag Jadav Date: Tue, 18 Mar 2025 17:10:38 +0530 Subject: [PATCH 0691/2924] pps: generators: tio: fix platform_set_drvdata() platform_set_drvdata() is setting a double pointer to struct pps_tio as driver_data, which will point to the local stack of probe function instead of intended data. Set driver_data correctly and fix illegal memory access by its user. BUG: unable to handle page fault for address: ffffc9000117b738 RIP: 0010:hrtimer_active+0x2b/0x60 Call Trace: ? hrtimer_active+0x2b/0x60 hrtimer_cancel+0x19/0x50 pps_gen_tio_remove+0x1e/0x80 [pps_gen_tio] Fixes: c89755d1111f ("pps: generators: Add PPS Generator TIO Driver") Signed-off-by: Raag Jadav Acked-by: Rodolfo Giometti Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250318114038.2058677-1-raag.jadav@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/pps/generators/pps_gen_tio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pps/generators/pps_gen_tio.c b/drivers/pps/generators/pps_gen_tio.c index 1d5ffe055463fc..de00a85bfafa2d 100644 --- a/drivers/pps/generators/pps_gen_tio.c +++ b/drivers/pps/generators/pps_gen_tio.c @@ -230,7 +230,7 @@ static int pps_gen_tio_probe(struct platform_device *pdev) hrtimer_setup(&tio->timer, hrtimer_callback, CLOCK_REALTIME, HRTIMER_MODE_ABS); spin_lock_init(&tio->lock); - platform_set_drvdata(pdev, &tio); + platform_set_drvdata(pdev, tio); return 0; } From 00f1cc14da0f06d2897b8c528df7c7dcf1b8da50 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 18 Mar 2025 15:12:02 +0100 Subject: [PATCH 0692/2924] mei: vsc: Fix fortify-panic caused by invalid counted_by() use gcc 15 honors the __counted_by(len) attribute on vsc_tp_packet.buf[] and the vsc-tp.c code is using this in a wrong way. len does not contain the available size in the buffer, it contains the actual packet length *without* the crc. So as soon as vsc_tp_xfer() tries to add the crc to buf[] the fortify-panic handler gets triggered: [ 80.842193] memcpy: detected buffer overflow: 4 byte write of buffer size 0 [ 80.842243] WARNING: CPU: 4 PID: 272 at lib/string_helpers.c:1032 __fortify_report+0x45/0x50 ... [ 80.843175] __fortify_panic+0x9/0xb [ 80.843186] vsc_tp_xfer.cold+0x67/0x67 [mei_vsc_hw] [ 80.843210] ? seqcount_lockdep_reader_access.constprop.0+0x82/0x90 [ 80.843229] ? lockdep_hardirqs_on+0x7c/0x110 [ 80.843250] mei_vsc_hw_start+0x98/0x120 [mei_vsc] [ 80.843270] mei_reset+0x11d/0x420 [mei] The easiest fix would be to just drop the counted-by but with the exception of the ack buffer in vsc_tp_xfer_helper() which only contains enough room for the packet-header, all other uses of vsc_tp_packet always use a buffer of VSC_TP_MAX_XFER_SIZE bytes for the packet. Instead of just dropping the counted-by, split the vsc_tp_packet struct definition into a header and a full-packet definition and use a fixed size buf[] in the packet definition, this way fortify-source buffer overrun checking still works when enabled. Fixes: 566f5ca97680 ("mei: Add transport driver for IVSC device") Cc: stable@kernel.org Signed-off-by: Hans de Goede Reviewed-by: Alexander Usyskin Reviewed-by: Sakari Ailus Link: https://lore.kernel.org/r/20250318141203.94342-2-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/vsc-tp.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/misc/mei/vsc-tp.c b/drivers/misc/mei/vsc-tp.c index 7be1649b19725c..fa553d4914b6ed 100644 --- a/drivers/misc/mei/vsc-tp.c +++ b/drivers/misc/mei/vsc-tp.c @@ -36,20 +36,24 @@ #define VSC_TP_XFER_TIMEOUT_BYTES 700 #define VSC_TP_PACKET_PADDING_SIZE 1 #define VSC_TP_PACKET_SIZE(pkt) \ - (sizeof(struct vsc_tp_packet) + le16_to_cpu((pkt)->len) + VSC_TP_CRC_SIZE) + (sizeof(struct vsc_tp_packet_hdr) + le16_to_cpu((pkt)->hdr.len) + VSC_TP_CRC_SIZE) #define VSC_TP_MAX_PACKET_SIZE \ - (sizeof(struct vsc_tp_packet) + VSC_TP_MAX_MSG_SIZE + VSC_TP_CRC_SIZE) + (sizeof(struct vsc_tp_packet_hdr) + VSC_TP_MAX_MSG_SIZE + VSC_TP_CRC_SIZE) #define VSC_TP_MAX_XFER_SIZE \ (VSC_TP_MAX_PACKET_SIZE + VSC_TP_XFER_TIMEOUT_BYTES) #define VSC_TP_NEXT_XFER_LEN(len, offset) \ - (len + sizeof(struct vsc_tp_packet) + VSC_TP_CRC_SIZE - offset + VSC_TP_PACKET_PADDING_SIZE) + (len + sizeof(struct vsc_tp_packet_hdr) + VSC_TP_CRC_SIZE - offset + VSC_TP_PACKET_PADDING_SIZE) -struct vsc_tp_packet { +struct vsc_tp_packet_hdr { __u8 sync; __u8 cmd; __le16 len; __le32 seq; - __u8 buf[] __counted_by(len); +}; + +struct vsc_tp_packet { + struct vsc_tp_packet_hdr hdr; + __u8 buf[VSC_TP_MAX_XFER_SIZE - sizeof(struct vsc_tp_packet_hdr)]; }; struct vsc_tp { @@ -158,12 +162,12 @@ static int vsc_tp_dev_xfer(struct vsc_tp *tp, void *obuf, void *ibuf, size_t len static int vsc_tp_xfer_helper(struct vsc_tp *tp, struct vsc_tp_packet *pkt, void *ibuf, u16 ilen) { - int ret, offset = 0, cpy_len, src_len, dst_len = sizeof(struct vsc_tp_packet); + int ret, offset = 0, cpy_len, src_len, dst_len = sizeof(struct vsc_tp_packet_hdr); int next_xfer_len = VSC_TP_PACKET_SIZE(pkt) + VSC_TP_XFER_TIMEOUT_BYTES; u8 *src, *crc_src, *rx_buf = tp->rx_buf; int count_down = VSC_TP_MAX_XFER_COUNT; u32 recv_crc = 0, crc = ~0; - struct vsc_tp_packet ack; + struct vsc_tp_packet_hdr ack; u8 *dst = (u8 *)&ack; bool synced = false; @@ -280,10 +284,10 @@ int vsc_tp_xfer(struct vsc_tp *tp, u8 cmd, const void *obuf, size_t olen, guard(mutex)(&tp->mutex); - pkt->sync = VSC_TP_PACKET_SYNC; - pkt->cmd = cmd; - pkt->len = cpu_to_le16(olen); - pkt->seq = cpu_to_le32(++tp->seq); + pkt->hdr.sync = VSC_TP_PACKET_SYNC; + pkt->hdr.cmd = cmd; + pkt->hdr.len = cpu_to_le16(olen); + pkt->hdr.seq = cpu_to_le32(++tp->seq); memcpy(pkt->buf, obuf, olen); crc = ~crc32(~0, (u8 *)pkt, sizeof(pkt) + olen); From f88c0c72ffb014e5eba676ee337c4eb3b1d6a119 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 18 Mar 2025 15:12:03 +0100 Subject: [PATCH 0693/2924] mei: vsc: Use struct vsc_tp_packet as vsc-tp tx_buf and rx_buf type vsc_tp.tx_buf and vsc_tp.rx_buf point to a struct vsc_tp_packet, use the correct type instead of "void *" and use sizeof(*ptr) when allocating memory for these buffers. Signed-off-by: Hans de Goede Reviewed-by: Alexander Usyskin Reviewed-by: Sakari Ailus Link: https://lore.kernel.org/r/20250318141203.94342-3-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/vsc-tp.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/misc/mei/vsc-tp.c b/drivers/misc/mei/vsc-tp.c index fa553d4914b6ed..da26a080916c54 100644 --- a/drivers/misc/mei/vsc-tp.c +++ b/drivers/misc/mei/vsc-tp.c @@ -71,8 +71,8 @@ struct vsc_tp { u32 seq; /* command buffer */ - void *tx_buf; - void *rx_buf; + struct vsc_tp_packet *tx_buf; + struct vsc_tp_packet *rx_buf; atomic_t assert_cnt; wait_queue_head_t xfer_wait; @@ -164,7 +164,7 @@ static int vsc_tp_xfer_helper(struct vsc_tp *tp, struct vsc_tp_packet *pkt, { int ret, offset = 0, cpy_len, src_len, dst_len = sizeof(struct vsc_tp_packet_hdr); int next_xfer_len = VSC_TP_PACKET_SIZE(pkt) + VSC_TP_XFER_TIMEOUT_BYTES; - u8 *src, *crc_src, *rx_buf = tp->rx_buf; + u8 *src, *crc_src, *rx_buf = (u8 *)tp->rx_buf; int count_down = VSC_TP_MAX_XFER_COUNT; u32 recv_crc = 0, crc = ~0; struct vsc_tp_packet_hdr ack; @@ -324,7 +324,7 @@ int vsc_tp_rom_xfer(struct vsc_tp *tp, const void *obuf, void *ibuf, size_t len) guard(mutex)(&tp->mutex); /* rom xfer is big endian */ - cpu_to_be32_array(tp->tx_buf, obuf, words); + cpu_to_be32_array((u32 *)tp->tx_buf, obuf, words); ret = read_poll_timeout(gpiod_get_value_cansleep, ret, !ret, VSC_TP_ROM_XFER_POLL_DELAY_US, @@ -340,7 +340,7 @@ int vsc_tp_rom_xfer(struct vsc_tp *tp, const void *obuf, void *ibuf, size_t len) return ret; if (ibuf) - be32_to_cpu_array(ibuf, tp->rx_buf, words); + be32_to_cpu_array(ibuf, (u32 *)tp->rx_buf, words); return ret; } @@ -494,11 +494,11 @@ static int vsc_tp_probe(struct spi_device *spi) if (!tp) return -ENOMEM; - tp->tx_buf = devm_kzalloc(dev, VSC_TP_MAX_XFER_SIZE, GFP_KERNEL); + tp->tx_buf = devm_kzalloc(dev, sizeof(*tp->tx_buf), GFP_KERNEL); if (!tp->tx_buf) return -ENOMEM; - tp->rx_buf = devm_kzalloc(dev, VSC_TP_MAX_XFER_SIZE, GFP_KERNEL); + tp->rx_buf = devm_kzalloc(dev, sizeof(*tp->rx_buf), GFP_KERNEL); if (!tp->rx_buf) return -ENOMEM; From 4d239f447f96bd2cb646f89431e9db186c1ccfd4 Mon Sep 17 00:00:00 2001 From: Mahesh Rao Date: Wed, 26 Mar 2025 06:54:46 -0500 Subject: [PATCH 0694/2924] firmware: stratix10-svc: Add of_platform_default_populate() Add of_platform_default_populate() to stratix10-svc driver as the firmware/svc node was moved out of soc. This fixes the failed probing of child drivers of svc node. Cc: stable@vger.kernel.org Fixes: 23c3ebed382a ("arm64: dts: socfpga: agilex: move firmware out of soc node") Reviewed-by: Krzysztof Kozlowski Reviewed-by: Xu Yilun Signed-off-by: Mahesh Rao Signed-off-by: Dinh Nguyen Link: https://lore.kernel.org/r/20250326115446.36123-1-dinguyen@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/stratix10-svc.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/stratix10-svc.c b/drivers/firmware/stratix10-svc.c index 3c52cb73237a43..e3f990d888d718 100644 --- a/drivers/firmware/stratix10-svc.c +++ b/drivers/firmware/stratix10-svc.c @@ -1224,22 +1224,28 @@ static int stratix10_svc_drv_probe(struct platform_device *pdev) if (!svc->intel_svc_fcs) { dev_err(dev, "failed to allocate %s device\n", INTEL_FCS); ret = -ENOMEM; - goto err_unregister_dev; + goto err_unregister_rsu_dev; } ret = platform_device_add(svc->intel_svc_fcs); if (ret) { platform_device_put(svc->intel_svc_fcs); - goto err_unregister_dev; + goto err_unregister_rsu_dev; } + ret = of_platform_default_populate(dev_of_node(dev), NULL, dev); + if (ret) + goto err_unregister_fcs_dev; + dev_set_drvdata(dev, svc); pr_info("Intel Service Layer Driver Initialized\n"); return 0; -err_unregister_dev: +err_unregister_fcs_dev: + platform_device_unregister(svc->intel_svc_fcs); +err_unregister_rsu_dev: platform_device_unregister(svc->stratix10_svc_rsu); err_free_kfifo: kfifo_free(&controller->svc_fifo); @@ -1253,6 +1259,8 @@ static void stratix10_svc_drv_remove(struct platform_device *pdev) struct stratix10_svc *svc = dev_get_drvdata(&pdev->dev); struct stratix10_svc_controller *ctrl = platform_get_drvdata(pdev); + of_platform_depopulate(ctrl->dev); + platform_device_unregister(svc->intel_svc_fcs); platform_device_unregister(svc->stratix10_svc_rsu); From 5ddcc657ba507e926b285394589a3a78603d5f55 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 10 Apr 2025 10:29:43 -0700 Subject: [PATCH 0695/2924] thermal: intel: int340x: Add missing DVFS support flags DVFS (Dynamic Voltage Frequency Scaling) is still supported for DDR memory on Lunar Lake and Panther Lake. Add the missing flag PROC_THERMAL_FEATURE_DVFS. Signed-off-by: Srinivas Pandruvada Link: https://patch.msgid.link/20250410172943.577913-1-srinivas.pandruvada@linux.intel.com [ rjw: Subject edit ] Signed-off-by: Rafael J. Wysocki --- .../intel/int340x_thermal/processor_thermal_device_pci.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c index a55aaa8cef422f..2097aae3994676 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c @@ -485,7 +485,7 @@ static const struct pci_device_id proc_thermal_pci_ids[] = { { PCI_DEVICE_DATA(INTEL, ADL_THERMAL, PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_FIVR | PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_WT_REQ) }, { PCI_DEVICE_DATA(INTEL, LNLM_THERMAL, PROC_THERMAL_FEATURE_MSI_SUPPORT | - PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_DLVR | + PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_DLVR | PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_WT_HINT | PROC_THERMAL_FEATURE_POWER_FLOOR) }, { PCI_DEVICE_DATA(INTEL, MTLP_THERMAL, PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_FIVR | PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_DLVR | @@ -495,8 +495,9 @@ static const struct pci_device_id proc_thermal_pci_ids[] = { { PCI_DEVICE_DATA(INTEL, RPL_THERMAL, PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_FIVR | PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_WT_REQ) }, { PCI_DEVICE_DATA(INTEL, PTL_THERMAL, PROC_THERMAL_FEATURE_RAPL | - PROC_THERMAL_FEATURE_DLVR | PROC_THERMAL_FEATURE_MSI_SUPPORT | - PROC_THERMAL_FEATURE_WT_HINT | PROC_THERMAL_FEATURE_POWER_FLOOR) }, + PROC_THERMAL_FEATURE_DLVR | PROC_THERMAL_FEATURE_DVFS | + PROC_THERMAL_FEATURE_MSI_SUPPORT | PROC_THERMAL_FEATURE_WT_HINT | + PROC_THERMAL_FEATURE_POWER_FLOOR) }, { }, }; From 00c5ff5e9a55dca2e7ca29af4e5f8708731faf11 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 11 Apr 2025 04:54:38 -0700 Subject: [PATCH 0696/2924] thermal: intel: int340x: Fix Panther Lake DLVR support Panther Lake uses the same DLVR register offsets as Lunar Lake, but the driver uses the default register offsets table for it by mistake. Move the selection of register offsets table from the actual attribute read/write callbacks to proc_thermal_rfim_add() and make it handle Panther Lake the same way as Lunar Lake. This way it is clean and in the future such issues can be avoided. Fixes: e50eeababa94 ("thermal: intel: int340x: Panther Lake DLVR support") Signed-off-by: Srinivas Pandruvada Link: https://patch.msgid.link/20250411115438.594114-1-srinivas.pandruvada@linux.intel.com [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- .../int340x_thermal/processor_thermal_rfim.c | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c index dad63f2d5f90fc..3a028b78d9afc0 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c @@ -166,15 +166,18 @@ static const struct mmio_reg adl_dvfs_mmio_regs[] = { { 0, 0x5A40, 1, 0x1, 0}, /* rfi_disable */ }; +static const struct mapping_table *dlvr_mapping; +static const struct mmio_reg *dlvr_mmio_regs_table; + #define RFIM_SHOW(suffix, table)\ static ssize_t suffix##_show(struct device *dev,\ struct device_attribute *attr,\ char *buf)\ {\ - const struct mapping_table *mapping = NULL;\ + const struct mmio_reg *mmio_regs = dlvr_mmio_regs_table;\ + const struct mapping_table *mapping = dlvr_mapping;\ struct proc_thermal_device *proc_priv;\ struct pci_dev *pdev = to_pci_dev(dev);\ - const struct mmio_reg *mmio_regs;\ const char **match_strs;\ int ret, err;\ u32 reg_val;\ @@ -186,12 +189,6 @@ static ssize_t suffix##_show(struct device *dev,\ mmio_regs = adl_dvfs_mmio_regs;\ } else if (table == 2) { \ match_strs = (const char **)dlvr_strings;\ - if (pdev->device == PCI_DEVICE_ID_INTEL_LNLM_THERMAL) {\ - mmio_regs = lnl_dlvr_mmio_regs;\ - mapping = lnl_dlvr_mapping;\ - } else {\ - mmio_regs = dlvr_mmio_regs;\ - } \ } else {\ match_strs = (const char **)fivr_strings;\ mmio_regs = tgl_fivr_mmio_regs;\ @@ -214,12 +211,12 @@ static ssize_t suffix##_store(struct device *dev,\ struct device_attribute *attr,\ const char *buf, size_t count)\ {\ - const struct mapping_table *mapping = NULL;\ + const struct mmio_reg *mmio_regs = dlvr_mmio_regs_table;\ + const struct mapping_table *mapping = dlvr_mapping;\ struct proc_thermal_device *proc_priv;\ struct pci_dev *pdev = to_pci_dev(dev);\ unsigned int input;\ const char **match_strs;\ - const struct mmio_reg *mmio_regs;\ int ret, err;\ u32 reg_val;\ u32 mask;\ @@ -230,12 +227,6 @@ static ssize_t suffix##_store(struct device *dev,\ mmio_regs = adl_dvfs_mmio_regs;\ } else if (table == 2) { \ match_strs = (const char **)dlvr_strings;\ - if (pdev->device == PCI_DEVICE_ID_INTEL_LNLM_THERMAL) {\ - mmio_regs = lnl_dlvr_mmio_regs;\ - mapping = lnl_dlvr_mapping;\ - } else {\ - mmio_regs = dlvr_mmio_regs;\ - } \ } else {\ match_strs = (const char **)fivr_strings;\ mmio_regs = tgl_fivr_mmio_regs;\ @@ -448,6 +439,16 @@ int proc_thermal_rfim_add(struct pci_dev *pdev, struct proc_thermal_device *proc } if (proc_priv->mmio_feature_mask & PROC_THERMAL_FEATURE_DLVR) { + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_LNLM_THERMAL: + case PCI_DEVICE_ID_INTEL_PTL_THERMAL: + dlvr_mmio_regs_table = lnl_dlvr_mmio_regs; + dlvr_mapping = lnl_dlvr_mapping; + break; + default: + dlvr_mmio_regs_table = dlvr_mmio_regs; + break; + } ret = sysfs_create_group(&pdev->dev.kobj, &dlvr_attribute_group); if (ret) return ret; From 19e8019e06b478ab04683418c73ca297d114c425 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Thu, 10 Apr 2025 18:31:00 +0300 Subject: [PATCH 0697/2924] Documentation: PM: runtime: Fix a reference to pm_runtime_autosuspend() pm_runtime_autosuspend() got accidentally renamed as __pm_runtime_autosuspend() whereas the intention in the patch was to rename pm_runtime_put_autosuspend() only. Fix it. Fixes: b7d46644e554 ("PM: runtime: Add pm_runtime_put_autosuspend() replacement") Signed-off-by: Sakari Ailus Reviewed-by: Laurent Pinchart Link: https://patch.msgid.link/20250410153106.4146265-2-sakari.ailus@linux.intel.com Signed-off-by: Rafael J. Wysocki --- Documentation/power/runtime_pm.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/power/runtime_pm.rst b/Documentation/power/runtime_pm.rst index 12f429359a823e..63344bea839361 100644 --- a/Documentation/power/runtime_pm.rst +++ b/Documentation/power/runtime_pm.rst @@ -154,7 +154,7 @@ suspending the device are satisfied) and to queue up a suspend request for the device in that case. If there is no idle callback, or if the callback returns 0, then the PM core will attempt to carry out a runtime suspend of the device, also respecting devices configured for autosuspend. In essence this means a -call to __pm_runtime_autosuspend() (do note that drivers needs to update the +call to pm_runtime_autosuspend() (do note that drivers needs to update the device last busy mark, pm_runtime_mark_last_busy(), to control the delay under this circumstance). To prevent this (for example, if the callback routine has started a delayed suspend), the routine must return a non-zero value. Negative From 1c4494c14b4124f3a13a7f4912b84b633ff4f9ba Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 14 Apr 2025 19:12:41 +0200 Subject: [PATCH 0698/2924] rust: kbuild: use `pound` to support GNU Make < 4.3 GNU Make 4.3 changed the behavior of `#` inside commands in commit c6966b323811 ("[SV 20513] Un-escaped # are not comments in function invocations"): * WARNING: Backward-incompatibility! Number signs (#) appearing inside a macro reference or function invocation no longer introduce comments and should not be escaped with backslashes: thus a call such as: foo := $(shell echo '#') is legal. Previously the number sign needed to be escaped, for example: foo := $(shell echo '\#') Now this latter will resolve to "\#". If you want to write makefiles portable to both versions, assign the number sign to a variable: H := \# foo := $(shell echo '$H') This was claimed to be fixed in 3.81, but wasn't, for some reason. To detect this change search for 'nocomment' in the .FEATURES variable. Unlike other commits in the kernel about this issue, such as commit 633174a7046e ("lib/raid6/test/Makefile: Use $(pound) instead of \# for Make 4.3"), that fixed the issue for newer GNU Makes, in our case it was the opposite, i.e. we need to fix it for the older ones: someone building with e.g. 4.2.1 gets the following error: scripts/Makefile.compiler:81: *** unterminated call to function 'call': missing ')'. Stop. Thus use the existing variable to fix it. Reported-by: moyi geek <1441339168@qq.com> Closes: https://rust-for-linux.zulipchat.com/#narrow/channel/291565/topic/x/near/512001985 Cc: stable@vger.kernel.org Fixes: e72a076c620f ("kbuild: fix issues with rustc-option") Reviewed-by: Nicolas Schier Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250414171241.2126137-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- scripts/Makefile.compiler | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.compiler b/scripts/Makefile.compiler index 7ed7f92a7daa80..f4fcc1eaaeaee8 100644 --- a/scripts/Makefile.compiler +++ b/scripts/Makefile.compiler @@ -79,7 +79,7 @@ ld-option = $(call try-run, $(LD) $(KBUILD_LDFLAGS) $(1) -v,$(1),$(2),$(3)) # Usage: MY_RUSTFLAGS += $(call __rustc-option,$(RUSTC),$(MY_RUSTFLAGS),-Cinstrument-coverage,-Zinstrument-coverage) # TODO: remove RUSTC_BOOTSTRAP=1 when we raise the minimum GNU Make version to 4.4 __rustc-option = $(call try-run,\ - echo '#![allow(missing_docs)]#![feature(no_core)]#![no_core]' | RUSTC_BOOTSTRAP=1\ + echo '$(pound)![allow(missing_docs)]$(pound)![feature(no_core)]$(pound)![no_core]' | RUSTC_BOOTSTRAP=1\ $(1) --sysroot=/dev/null $(filter-out --sysroot=/dev/null --target=%,$(2)) $(3)\ --crate-type=rlib --out-dir=$(TMPOUT) --emit=obj=- - >/dev/null,$(3),$(4)) From 41c721fc093938745d116c3a21326a0ee03bb491 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 1 Apr 2025 06:47:49 -0700 Subject: [PATCH 0699/2924] spi: tegra210-quad: use WARN_ON_ONCE instead of WARN_ON for timeouts Some machines with tegra_qspi_combined_seq_xfer hardware issues generate excessive kernel warnings, severely polluting the logs: dmesg | grep -i "WARNING:.*tegra_qspi_transfer_one_message" | wc -l 94451 This patch replaces WARN_ON with WARN_ON_ONCE for timeout conditions to reduce log spam. The subsequent error message still prints on each occurrence, providing sufficient information about the failure, while the stack trace is only needed once for debugging purposes. Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20250401-tegra-v2-1-126c293ec047@debian.org Signed-off-by: Mark Brown --- drivers/spi/spi-tegra210-quad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c index 08e49a8768943c..2d320fbb8875f0 100644 --- a/drivers/spi/spi-tegra210-quad.c +++ b/drivers/spi/spi-tegra210-quad.c @@ -1117,7 +1117,7 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi, (&tqspi->xfer_completion, QSPI_DMA_TIMEOUT); - if (WARN_ON(ret == 0)) { + if (WARN_ON_ONCE(ret == 0)) { dev_err(tqspi->dev, "QSPI Transfer failed with timeout: %d\n", ret); if (tqspi->is_curr_dma_xfer && From 21f4314e66ed8d40b2ee24185d1a06a07a512eb1 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 1 Apr 2025 06:47:50 -0700 Subject: [PATCH 0700/2924] spi: tegra210-quad: add rate limiting and simplify timeout error message On malfunctioning hardware, timeout error messages can appear thousands of times, creating unnecessary system pressure and log bloat. This patch makes two improvements: 1. Replace dev_err() with dev_err_ratelimited() to prevent log flooding when hardware errors persist 2. Remove the redundant timeout value parameter from the error message, as 'ret' is always zero in this error path These changes reduce logging overhead while maintaining necessary error reporting for debugging purposes. Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20250401-tegra-v2-2-126c293ec047@debian.org Signed-off-by: Mark Brown --- drivers/spi/spi-tegra210-quad.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c index 2d320fbb8875f0..64e1b2f8a0001c 100644 --- a/drivers/spi/spi-tegra210-quad.c +++ b/drivers/spi/spi-tegra210-quad.c @@ -1118,8 +1118,8 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi, QSPI_DMA_TIMEOUT); if (WARN_ON_ONCE(ret == 0)) { - dev_err(tqspi->dev, "QSPI Transfer failed with timeout: %d\n", - ret); + dev_err_ratelimited(tqspi->dev, + "QSPI Transfer failed with timeout\n"); if (tqspi->is_curr_dma_xfer && (tqspi->cur_direction & DATA_DIR_TX)) dmaengine_terminate_all From 0dba7a05b9e47d8b546399117b0ddf2426dc6042 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 15 Apr 2025 16:55:06 +0200 Subject: [PATCH 0701/2924] loop: LOOP_SET_FD: send uevents for partitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the suppression of the uevents before scanning for partitions. The partitions inherit their suppression settings from their parent device, which lead to the uevents being dropped. This is similar to the same changes for LOOP_CONFIGURE done in commit bb430b694226 ("loop: LOOP_CONFIGURE: send uevents for partitions"). Fixes: 498ef5c777d9 ("loop: suppress uevents while reconfiguring the device") Cc: stable@vger.kernel.org Signed-off-by: Thomas Weißschuh Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20250415-loop-uevent-changed-v3-1-60ff69ac6088@linutronix.de Signed-off-by: Jens Axboe --- drivers/block/loop.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 3be7f00e7fc740..e9ec7a45f3f2d1 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -662,12 +662,12 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, * dependency. */ fput(old_file); + dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0); if (partscan) loop_reread_partitions(lo); error = 0; done: - dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0); kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE); return error; @@ -675,6 +675,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, loop_global_unlock(lo, is_loop); out_putf: fput(file); + dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0); goto done; } From d94c12bd97d567de342fd32599e7cd9e50bfa140 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Wed, 2 Apr 2025 17:06:59 -0700 Subject: [PATCH 0702/2924] string: Add load_unaligned_zeropad() code path to sized_strscpy() The call to read_word_at_a_time() in sized_strscpy() is problematic with MTE because it may trigger a tag check fault when reading across a tag granule (16 bytes) boundary. To make this code MTE compatible, let's start using load_unaligned_zeropad() on architectures where it is available (i.e. architectures that define CONFIG_DCACHE_WORD_ACCESS). Because load_unaligned_zeropad() takes care of page boundaries as well as tag granule boundaries, also disable the code preventing crossing page boundaries when using load_unaligned_zeropad(). Signed-off-by: Peter Collingbourne Link: https://linux-review.googlesource.com/id/If4b22e43b5a4ca49726b4bf98ada827fdf755548 Fixes: 94ab5b61ee16 ("kasan, arm64: enable CONFIG_KASAN_HW_TAGS") Cc: stable@vger.kernel.org Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20250403000703.2584581-2-pcc@google.com Signed-off-by: Kees Cook --- lib/string.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/lib/string.c b/lib/string.c index eb4486ed40d259..b632c71df1a506 100644 --- a/lib/string.c +++ b/lib/string.c @@ -119,6 +119,7 @@ ssize_t sized_strscpy(char *dest, const char *src, size_t count) if (count == 0 || WARN_ON_ONCE(count > INT_MAX)) return -E2BIG; +#ifndef CONFIG_DCACHE_WORD_ACCESS #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS /* * If src is unaligned, don't cross a page boundary, @@ -133,12 +134,14 @@ ssize_t sized_strscpy(char *dest, const char *src, size_t count) /* If src or dest is unaligned, don't do word-at-a-time. */ if (((long) dest | (long) src) & (sizeof(long) - 1)) max = 0; +#endif #endif /* - * read_word_at_a_time() below may read uninitialized bytes after the - * trailing zero and use them in comparisons. Disable this optimization - * under KMSAN to prevent false positive reports. + * load_unaligned_zeropad() or read_word_at_a_time() below may read + * uninitialized bytes after the trailing zero and use them in + * comparisons. Disable this optimization under KMSAN to prevent + * false positive reports. */ if (IS_ENABLED(CONFIG_KMSAN)) max = 0; @@ -146,7 +149,11 @@ ssize_t sized_strscpy(char *dest, const char *src, size_t count) while (max >= sizeof(unsigned long)) { unsigned long c, data; +#ifdef CONFIG_DCACHE_WORD_ACCESS + c = load_unaligned_zeropad(src+res); +#else c = read_word_at_a_time(src+res); +#endif if (has_zero(c, &data, &constants)) { data = prep_zero_mask(c, data, &constants); data = create_zero_mask(data); From 62d32440ac127b79747ef930205052803a8efd0f Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Wed, 2 Apr 2025 17:07:00 -0700 Subject: [PATCH 0703/2924] kasan: Add strscpy() test to trigger tag fault on arm64 When we invoke strscpy() with a maximum size of N bytes, it assumes that: - It can always read N bytes from the source. - It always write N bytes (zero-padded) to the destination. On aarch64 with Memory Tagging Extension enabled if we pass an N that is bigger then the source buffer, it would previously trigger an MTE fault. Implement a KASAN KUnit test that triggers the issue with the previous implementation of read_word_at_a_time() on aarch64 with MTE enabled. Cc: Will Deacon Signed-off-by: Vincenzo Frascino Signed-off-by: Catalin Marinas Co-developed-by: Peter Collingbourne Signed-off-by: Peter Collingbourne Reviewed-by: Andrey Konovalov Link: https://linux-review.googlesource.com/id/If88e396b9e7c058c1a4b5a252274120e77b1898a Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20250403000703.2584581-3-pcc@google.com Signed-off-by: Kees Cook --- mm/kasan/kasan_test_c.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/mm/kasan/kasan_test_c.c b/mm/kasan/kasan_test_c.c index 3ea317837c2d37..888912637b5409 100644 --- a/mm/kasan/kasan_test_c.c +++ b/mm/kasan/kasan_test_c.c @@ -1567,6 +1567,7 @@ static void kasan_memcmp(struct kunit *test) static void kasan_strings(struct kunit *test) { char *ptr; + char *src; size_t size = 24; /* @@ -1578,6 +1579,25 @@ static void kasan_strings(struct kunit *test) ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + src = kmalloc(KASAN_GRANULE_SIZE, GFP_KERNEL | __GFP_ZERO); + strscpy(src, "f0cacc1a0000000", KASAN_GRANULE_SIZE); + + /* + * Make sure that strscpy() does not trigger KASAN if it overreads into + * poisoned memory. + * + * The expected size does not include the terminator '\0' + * so it is (KASAN_GRANULE_SIZE - 2) == + * KASAN_GRANULE_SIZE - ("initial removed character" + "\0"). + */ + KUNIT_EXPECT_EQ(test, KASAN_GRANULE_SIZE - 2, + strscpy(ptr, src + 1, KASAN_GRANULE_SIZE)); + + /* strscpy should fail if the first byte is unreadable. */ + KUNIT_EXPECT_KASAN_FAIL(test, strscpy(ptr, src + KASAN_GRANULE_SIZE, + KASAN_GRANULE_SIZE)); + + kfree(src); kfree(ptr); /* From f5c68a4e84f9feca3be578199ec648b676db2030 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 9 Apr 2025 08:11:58 -0700 Subject: [PATCH 0704/2924] hardening: Disable GCC randstruct for COMPILE_TEST There is a GCC crash bug in the randstruct for latest GCC versions that is being tickled by landlock[1]. Temporarily disable GCC randstruct for COMPILE_TEST builds to unbreak CI systems for the coming -rc2. This can be restored once the bug is fixed. Suggested-by: Mark Brown Link: https://lore.kernel.org/all/20250407-kbuild-disable-gcc-plugins-v1-1-5d46ae583f5e@kernel.org/ [1] Acked-by: Mark Brown Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20250409151154.work.872-kees@kernel.org Signed-off-by: Kees Cook --- security/Kconfig.hardening | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index c17366ce8224ef..3fe9d7b945c43e 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -344,7 +344,7 @@ config CC_HAS_RANDSTRUCT choice prompt "Randomize layout of sensitive kernel structures" - default RANDSTRUCT_FULL if COMPILE_TEST && (GCC_PLUGINS || CC_HAS_RANDSTRUCT) + default RANDSTRUCT_FULL if COMPILE_TEST && CC_HAS_RANDSTRUCT default RANDSTRUCT_NONE help If you enable this, the layouts of structures that are entirely From cdc2e1d9d929d7f7009b3a5edca52388a2b0891f Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 14 Apr 2025 15:00:59 -0700 Subject: [PATCH 0705/2924] lib/Kconfig.ubsan: Remove 'default UBSAN' from UBSAN_INTEGER_WRAP CONFIG_UBSAN_INTEGER_WRAP is 'default UBSAN', which is problematic for a couple of reasons. The first is that this sanitizer is under active development on the compiler side to come up with a solution that is maintainable on the compiler side and usable on the kernel side. As a result of this, there are many warnings when the sanitizer is enabled that have no clear path to resolution yet but users may see them and report them in the meantime. The second is that this option was renamed from CONFIG_UBSAN_SIGNED_WRAP, meaning that if a configuration has CONFIG_UBSAN=y but CONFIG_UBSAN_SIGNED_WRAP=n and it is upgraded via olddefconfig (common in non-interactive scenarios such as CI), CONFIG_UBSAN_INTEGER_WRAP will be silently enabled again. Remove 'default UBSAN' from CONFIG_UBSAN_INTEGER_WRAP until it is ready for regular usage and testing from a broader community than the folks actively working on the feature. Cc: stable@vger.kernel.org Fixes: 557f8c582a9b ("ubsan: Reintroduce signed overflow sanitizer") Signed-off-by: Nathan Chancellor Link: https://lore.kernel.org/r/20250414-drop-default-ubsan-integer-wrap-v1-1-392522551d6b@kernel.org Signed-off-by: Kees Cook --- lib/Kconfig.ubsan | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan index 4216b3a4ff218f..f6ea0c5b5da393 100644 --- a/lib/Kconfig.ubsan +++ b/lib/Kconfig.ubsan @@ -118,7 +118,6 @@ config UBSAN_UNREACHABLE config UBSAN_INTEGER_WRAP bool "Perform checking for integer arithmetic wrap-around" - default UBSAN depends on !COMPILE_TEST depends on $(cc-option,-fsanitize-undefined-ignore-overflow-pattern=all) depends on $(cc-option,-fsanitize=signed-integer-overflow) From 9b044614be12d78d3a93767708b8d02fb7dfa9b0 Mon Sep 17 00:00:00 2001 From: Mostafa Saleh Date: Tue, 15 Apr 2025 20:33:54 +0000 Subject: [PATCH 0706/2924] ubsan: Fix panic from test_ubsan_out_of_bounds Running lib_ubsan.ko on arm64 (without CONFIG_UBSAN_TRAP) panics the kernel: [ 31.616546] Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: test_ubsan_out_of_bounds+0x158/0x158 [test_ubsan] [ 31.646817] CPU: 3 UID: 0 PID: 179 Comm: insmod Not tainted 6.15.0-rc2 #1 PREEMPT [ 31.648153] Hardware name: linux,dummy-virt (DT) [ 31.648970] Call trace: [ 31.649345] show_stack+0x18/0x24 (C) [ 31.650960] dump_stack_lvl+0x40/0x84 [ 31.651559] dump_stack+0x18/0x24 [ 31.652264] panic+0x138/0x3b4 [ 31.652812] __ktime_get_real_seconds+0x0/0x10 [ 31.653540] test_ubsan_load_invalid_value+0x0/0xa8 [test_ubsan] [ 31.654388] init_module+0x24/0xff4 [test_ubsan] [ 31.655077] do_one_initcall+0xd4/0x280 [ 31.655680] do_init_module+0x58/0x2b4 That happens because the test corrupts other data in the stack: 400: d5384108 mrs x8, sp_el0 404: f9426d08 ldr x8, [x8, #1240] 408: f85f83a9 ldur x9, [x29, #-8] 40c: eb09011f cmp x8, x9 410: 54000301 b.ne 470 // b.any As there is no guarantee the compiler will order the local variables as declared in the module: volatile char above[4] = { }; /* Protect surrounding memory. */ volatile int arr[4]; volatile char below[4] = { }; /* Protect surrounding memory. */ There is another problem where the out-of-bound index is 5 which is larger than the extra surrounding memory for protection. So, use a struct to enforce the ordering, and fix the index to be 4. Also, remove some of the volatiles and rely on OPTIMIZER_HIDE_VAR() Signed-off-by: Mostafa Saleh Link: https://lore.kernel.org/r/20250415203354.4109415-1-smostafa@google.com Signed-off-by: Kees Cook --- lib/test_ubsan.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/lib/test_ubsan.c b/lib/test_ubsan.c index 8772e5edaa4fa7..a4b6f52b9c5780 100644 --- a/lib/test_ubsan.c +++ b/lib/test_ubsan.c @@ -77,18 +77,22 @@ static void test_ubsan_shift_out_of_bounds(void) static void test_ubsan_out_of_bounds(void) { - volatile int i = 4, j = 5, k = -1; - volatile char above[4] = { }; /* Protect surrounding memory. */ - volatile int arr[4]; - volatile char below[4] = { }; /* Protect surrounding memory. */ + int i = 4, j = 4, k = -1; + volatile struct { + char above[4]; /* Protect surrounding memory. */ + int arr[4]; + char below[4]; /* Protect surrounding memory. */ + } data; - above[0] = below[0]; + OPTIMIZER_HIDE_VAR(i); + OPTIMIZER_HIDE_VAR(j); + OPTIMIZER_HIDE_VAR(k); UBSAN_TEST(CONFIG_UBSAN_BOUNDS, "above"); - arr[j] = i; + data.arr[j] = i; UBSAN_TEST(CONFIG_UBSAN_BOUNDS, "below"); - arr[k] = i; + data.arr[k] = i; } enum ubsan_test_enum { From 3f2925174f8bd811f9399cb4049f6b75fd2fba91 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 14 Apr 2025 16:35:00 +0200 Subject: [PATCH 0707/2924] lib/prime_numbers: KUnit test should not select PRIME_NUMBERS Enabling a (modular) test should not silently enable additional kernel functionality, as that may increase the attack vector of a product. Fix this by making PRIME_NUMBERS_KUNIT_TEST depend on PRIME_NUMBERS instead of selecting it. After this, one can safely enable CONFIG_KUNIT_ALL_TESTS=m to build modules for all appropriate tests for ones system, without pulling in extra unwanted functionality, while still allowing a tester to manually enable PRIME_NUMBERS and this test suite on a system where PRIME_NUMBERS is not enabled by default. Resurrect CONFIG_PRIME_NUMBERS=m in tools/testing/selftests/lib/config for the latter use case. Fixes: 313b38a6ecb46db4 ("lib/prime_numbers: convert self-test to KUnit") Signed-off-by: Geert Uytterhoeven Acked-by: Tamir Duberstein Link: https://lore.kernel.org/r/40f8a40eef4930d3ac9febd205bc171eb04e171c.1744641237.git.geert@linux-m68k.org Signed-off-by: Kees Cook --- lib/Kconfig.debug | 2 +- tools/testing/selftests/lib/config | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 9fe4d8dfe57829..f9051ab610d543 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -3290,7 +3290,7 @@ config GCD_KUNIT_TEST config PRIME_NUMBERS_KUNIT_TEST tristate "Prime number generator test" if !KUNIT_ALL_TESTS depends on KUNIT - select PRIME_NUMBERS + depends on PRIME_NUMBERS default KUNIT_ALL_TESTS help This option enables the KUnit test suite for the {is,next}_prime_number diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config index 81a1f64a22e860..377b3699ff3129 100644 --- a/tools/testing/selftests/lib/config +++ b/tools/testing/selftests/lib/config @@ -1,2 +1,3 @@ CONFIG_TEST_BITMAP=m +CONFIG_PRIME_NUMBERS=m CONFIG_TEST_BITOPS=m From 584e61452f75bfeac2cdd83730b4059526ec60c7 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sat, 12 Apr 2025 09:53:41 +0900 Subject: [PATCH 0708/2924] rust: helpers: Remove volatile qualifier from io helpers Remove the `volatile` qualifier used with __iomem in helper functions in io.c. These helper functions are just wrappers around the corresponding accessors so they are unnecessary. This fixes the following UML build error with CONFIG_RUST enabled: In file included from rust/helpers/helpers.c:19: rust/helpers/io.c:12:10: error: passing 'volatile void *' to parameter of type 'void *' discards qualifiers [-Werror,-Wincompatible-pointer-types-discards-qualifiers] 12 | iounmap(addr); | ^~~~ arch/um/include/asm/io.h:19:42: note: passing argument to parameter 'addr' here 19 | static inline void iounmap(void __iomem *addr) | ^ 1 error generated. [ Arnd explains [1] that removing the qualifier is the way forward (thanks!): Rihgt, I tried this last week when it came up first, removing the 'volatile' annotations in the asm-generic/io.h header and then all the ones that caused build regressions on arm/arm64/x86 randconfig and allmodconfig builds. This patch is a little longer than my original version as I did run into a few regressions later. As far as I can tell, none of these volatile annotations have any actual effect, and most of them date back to ancient kernels where this may have been required. Leaving it out of the rust interface is clearly the right way, and it shouldn't be too hard to upstream the changes below when we need to, but I also don't see any priority to send these. If anyone wants to help out, I can send them the whole patch. I created an issue [2] in case someone wants to help. - Miguel ] Fixes: ce30d94e6855 ("rust: add `io::{Io, IoRaw}` base types") Signed-off-by: FUJITA Tomonori Cc: stable@vger.kernel.org Reviewed-by: Danilo Krummrich Link: https://lore.kernel.org/rust-for-linux/0c844b70-19c7-4b14-ba29-fc99ae0d69f0@app.fastmail.com/ [1] Link: https://github.com/Rust-for-Linux/linux/issues/1156 [2] Link: https://lore.kernel.org/r/20250412005341.157150-1-fujita.tomonori@gmail.com [ Reworded for relative paths. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/helpers/io.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/rust/helpers/io.c b/rust/helpers/io.c index 4c2401ccd72078..15ea187c546625 100644 --- a/rust/helpers/io.c +++ b/rust/helpers/io.c @@ -7,94 +7,94 @@ void __iomem *rust_helper_ioremap(phys_addr_t offset, size_t size) return ioremap(offset, size); } -void rust_helper_iounmap(volatile void __iomem *addr) +void rust_helper_iounmap(void __iomem *addr) { iounmap(addr); } -u8 rust_helper_readb(const volatile void __iomem *addr) +u8 rust_helper_readb(const void __iomem *addr) { return readb(addr); } -u16 rust_helper_readw(const volatile void __iomem *addr) +u16 rust_helper_readw(const void __iomem *addr) { return readw(addr); } -u32 rust_helper_readl(const volatile void __iomem *addr) +u32 rust_helper_readl(const void __iomem *addr) { return readl(addr); } #ifdef CONFIG_64BIT -u64 rust_helper_readq(const volatile void __iomem *addr) +u64 rust_helper_readq(const void __iomem *addr) { return readq(addr); } #endif -void rust_helper_writeb(u8 value, volatile void __iomem *addr) +void rust_helper_writeb(u8 value, void __iomem *addr) { writeb(value, addr); } -void rust_helper_writew(u16 value, volatile void __iomem *addr) +void rust_helper_writew(u16 value, void __iomem *addr) { writew(value, addr); } -void rust_helper_writel(u32 value, volatile void __iomem *addr) +void rust_helper_writel(u32 value, void __iomem *addr) { writel(value, addr); } #ifdef CONFIG_64BIT -void rust_helper_writeq(u64 value, volatile void __iomem *addr) +void rust_helper_writeq(u64 value, void __iomem *addr) { writeq(value, addr); } #endif -u8 rust_helper_readb_relaxed(const volatile void __iomem *addr) +u8 rust_helper_readb_relaxed(const void __iomem *addr) { return readb_relaxed(addr); } -u16 rust_helper_readw_relaxed(const volatile void __iomem *addr) +u16 rust_helper_readw_relaxed(const void __iomem *addr) { return readw_relaxed(addr); } -u32 rust_helper_readl_relaxed(const volatile void __iomem *addr) +u32 rust_helper_readl_relaxed(const void __iomem *addr) { return readl_relaxed(addr); } #ifdef CONFIG_64BIT -u64 rust_helper_readq_relaxed(const volatile void __iomem *addr) +u64 rust_helper_readq_relaxed(const void __iomem *addr) { return readq_relaxed(addr); } #endif -void rust_helper_writeb_relaxed(u8 value, volatile void __iomem *addr) +void rust_helper_writeb_relaxed(u8 value, void __iomem *addr) { writeb_relaxed(value, addr); } -void rust_helper_writew_relaxed(u16 value, volatile void __iomem *addr) +void rust_helper_writew_relaxed(u16 value, void __iomem *addr) { writew_relaxed(value, addr); } -void rust_helper_writel_relaxed(u32 value, volatile void __iomem *addr) +void rust_helper_writel_relaxed(u32 value, void __iomem *addr) { writel_relaxed(value, addr); } #ifdef CONFIG_64BIT -void rust_helper_writeq_relaxed(u64 value, volatile void __iomem *addr) +void rust_helper_writeq_relaxed(u64 value, void __iomem *addr) { writeq_relaxed(value, addr); } From c1b4071ec3a6a594df6c49bf8f04a60a88072525 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sat, 12 Apr 2025 09:05:06 +0900 Subject: [PATCH 0709/2924] rust: helpers: Add dma_alloc_attrs() and dma_free_attrs() Add dma_alloc_attrs() and dma_free_attrs() helpers to fix a build error when CONFIG_HAS_DMA is not enabled. Note that when CONFIG_HAS_DMA is enabled, dma_alloc_attrs() and dma_free_attrs() are included in both bindings_generated.rs and bindings_helpers_generated.rs. The former takes precedence so behavior remains unchanged in that case. This fixes the following build error on UML: error[E0425]: cannot find function `dma_alloc_attrs` in crate `bindings` --> rust/kernel/dma.rs:171:23 | 171 | bindings::dma_alloc_attrs( | ^^^^^^^^^^^^^^^ help: a function with a similar name exists: `dma_alloc_pages` | ::: rust/bindings/bindings_generated.rs:44568:5 | 44568 | / pub fn dma_alloc_pages( 44569 | | dev: *mut device, 44570 | | size: usize, 44571 | | dma_handle: *mut dma_addr_t, 44572 | | dir: dma_data_direction, 44573 | | gfp: gfp_t, 44574 | | ) -> *mut page; | |___________________- similarly named function `dma_alloc_pages` defined here error[E0425]: cannot find function `dma_free_attrs` in crate `bindings` --> rust/kernel/dma.rs:293:23 | 293 | bindings::dma_free_attrs( | ^^^^^^^^^^^^^^ help: a function with a similar name exists: `dma_free_pages` | ::: rust/bindings/bindings_generated.rs:44577:5 | 44577 | / pub fn dma_free_pages( 44578 | | dev: *mut device, 44579 | | size: usize, 44580 | | page: *mut page, 44581 | | dma_handle: dma_addr_t, 44582 | | dir: dma_data_direction, 44583 | | ); | |______- similarly named function `dma_free_pages` defined here Fixes: ad2907b4e308 ("rust: add dma coherent allocator abstraction") Signed-off-by: FUJITA Tomonori Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250412000507.157000-1-fujita.tomonori@gmail.com [ Reworded for relative paths. - Miguel ] Signed-off-by: Miguel Ojeda --- MAINTAINERS | 1 + rust/helpers/dma.c | 16 ++++++++++++++++ rust/helpers/helpers.c | 1 + 3 files changed, 18 insertions(+) create mode 100644 rust/helpers/dma.c diff --git a/MAINTAINERS b/MAINTAINERS index 96b82704950184..bec614ef35d96c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7020,6 +7020,7 @@ L: rust-for-linux@vger.kernel.org S: Supported W: https://rust-for-linux.com T: git https://github.com/Rust-for-Linux/linux.git alloc-next +F: rust/helpers/dma.c F: rust/kernel/dma.rs F: samples/rust/rust_dma.rs diff --git a/rust/helpers/dma.c b/rust/helpers/dma.c new file mode 100644 index 00000000000000..df8b8a77355a32 --- /dev/null +++ b/rust/helpers/dma.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +void *rust_helper_dma_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag, + unsigned long attrs) +{ + return dma_alloc_attrs(dev, size, dma_handle, flag, attrs); +} + +void rust_helper_dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_handle, unsigned long attrs) +{ + dma_free_attrs(dev, size, cpu_addr, dma_handle, attrs); +} diff --git a/rust/helpers/helpers.c b/rust/helpers/helpers.c index e1c21eba9b15b6..1e7c84df725211 100644 --- a/rust/helpers/helpers.c +++ b/rust/helpers/helpers.c @@ -14,6 +14,7 @@ #include "cpumask.c" #include "cred.c" #include "device.c" +#include "dma.c" #include "err.c" #include "fs.c" #include "io.c" From 72b5259053903552ee0314eb422c990c29eb8546 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 15 Apr 2025 16:58:43 -0400 Subject: [PATCH 0710/2924] bcachefs: snapshot_node_missing is now autofix Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-errors_format.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 512c56ee5d9436..dc53d25c7cbb00 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -291,7 +291,7 @@ enum bch_fsck_flags { x(btree_node_topology_empty_interior_node, 261, 0) \ x(btree_ptr_v2_min_key_bad, 262, 0) \ x(btree_root_unreadable_and_scan_found_nothing, 263, FSCK_AUTOFIX) \ - x(snapshot_node_missing, 264, 0) \ + x(snapshot_node_missing, 264, FSCK_AUTOFIX) \ x(dup_backpointer_to_bad_csum_extent, 265, 0) \ x(btree_bitmap_not_marked, 266, FSCK_AUTOFIX) \ x(sb_clean_entry_overrun, 267, 0) \ From bc0b828ef6e561081ebc4c758d0c4d166bb9829c Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 14 Apr 2025 15:18:23 -0600 Subject: [PATCH 0711/2924] Revert "PCI: Avoid reset when disabled via sysfs" This reverts commit 479380efe1625e251008d24b2810283db60d6fcd. The reset_method attribute on a PCI device is only intended to manage the availability of function scoped resets for a device. It was never intended to restrict resets targeting the bus or slot. In introducing a restriction that each device must support function level reset by testing pci_reset_supported(), we essentially create a catch-22, that a device must have a function scope reset in order to support bus/slot reset, when we use bus/slot reset to effect a reset of a device that does not support a function scoped reset, especially multi-function devices. This breaks the majority of uses cases where vfio-pci uses bus/slot resets to manage multifunction devices that do not support function scoped resets. Fixes: 479380efe162 ("PCI: Avoid reset when disabled via sysfs") Reported-by: Cal Peake Closes: https://lore.kernel.org/all/808e1111-27b7-f35b-6d5c-5b275e73677b@absolutedigital.net Reported-by: Athul Krishna Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220010 Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas Reviewed-by: Kevin Tian Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250414211828.3530741-1-alex.williamson@redhat.com --- drivers/pci/pci.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 4d7c9f64ea24ec..e77d5b53c0cec9 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5429,8 +5429,6 @@ static bool pci_bus_resettable(struct pci_bus *bus) return false; list_for_each_entry(dev, &bus->devices, bus_list) { - if (!pci_reset_supported(dev)) - return false; if (dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET || (dev->subordinate && !pci_bus_resettable(dev->subordinate))) return false; @@ -5507,8 +5505,6 @@ static bool pci_slot_resettable(struct pci_slot *slot) list_for_each_entry(dev, &slot->bus->devices, bus_list) { if (!dev->slot || dev->slot != slot) continue; - if (!pci_reset_supported(dev)) - return false; if (dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET || (dev->subordinate && !pci_bus_resettable(dev->subordinate))) return false; From 688abe1027d00b7d4b2ce2d8764a2ae5ec6d250b Mon Sep 17 00:00:00 2001 From: Chenyuan Yang Date: Sat, 12 Apr 2025 13:33:27 -0500 Subject: [PATCH 0712/2924] octeontx2-pf: handle otx2_mbox_get_rsp errors Adding error pointer check after calling otx2_mbox_get_rsp(). This is similar to the commit bd3110bc102a ("octeontx2-pf: handle otx2_mbox_get_rsp errors in otx2_flows.c"). Signed-off-by: Chenyuan Yang Fixes: 6c40ca957fe5 ("octeontx2-pf: Adds TC offload support") Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20250412183327.3550970-1-chenyuan0y@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/rep.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/rep.c b/drivers/net/ethernet/marvell/octeontx2/nic/rep.c index 04e08e06f30ff2..7153a71dfc860e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/rep.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/rep.c @@ -67,6 +67,8 @@ static int rvu_rep_mcam_flow_init(struct rep_dev *rep) rsp = (struct npc_mcam_alloc_entry_rsp *)otx2_mbox_get_rsp (&priv->mbox.mbox, 0, &req->hdr); + if (IS_ERR(rsp)) + goto exit; for (ent = 0; ent < rsp->count; ent++) rep->flow_cfg->flow_ent[ent + allocated] = rsp->entry_list[ent]; From 903d2b9f9efc5b3339d74015fcfc0d9fff276c4c Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 14 Apr 2025 10:39:42 +0200 Subject: [PATCH 0713/2924] net: ethernet: ti: am65-cpsw: fix port_np reference counting A reference to the device tree node is stored in a private struct, thus the reference count has to be incremented. Also, decrement the count on device removal and in the error path. Fixes: 93a76530316a ("net: ethernet: ti: introduce am65x/j721e gigabit eth subsystem driver") Signed-off-by: Michael Walle Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250414083942.4015060-1-mwalle@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index c9fd34787c9986..1e6d2335293df6 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2666,7 +2666,7 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) of_property_read_bool(port_np, "ti,mac-only"); /* get phy/link info */ - port->slave.port_np = port_np; + port->slave.port_np = of_node_get(port_np); ret = of_get_phy_mode(port_np, &port->slave.phy_if); if (ret) { dev_err(dev, "%pOF read phy-mode err %d\n", @@ -2720,6 +2720,17 @@ static void am65_cpsw_nuss_phylink_cleanup(struct am65_cpsw_common *common) } } +static void am65_cpsw_remove_dt(struct am65_cpsw_common *common) +{ + struct am65_cpsw_port *port; + int i; + + for (i = 0; i < common->port_num; i++) { + port = &common->ports[i]; + of_node_put(port->slave.port_np); + } +} + static int am65_cpsw_nuss_init_port_ndev(struct am65_cpsw_common *common, u32 port_idx) { @@ -3622,6 +3633,7 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev) am65_cpsw_nuss_cleanup_ndev(common); am65_cpsw_nuss_phylink_cleanup(common); am65_cpts_release(common->cpts); + am65_cpsw_remove_dt(common); err_of_clear: if (common->mdio_dev) of_platform_device_destroy(common->mdio_dev, NULL); @@ -3661,6 +3673,7 @@ static void am65_cpsw_nuss_remove(struct platform_device *pdev) am65_cpsw_nuss_phylink_cleanup(common); am65_cpts_release(common->cpts); am65_cpsw_disable_serdes_phy(common); + am65_cpsw_remove_dt(common); if (common->mdio_dev) of_platform_device_destroy(common->mdio_dev, NULL); From 12f2d033fae957d84c2c0ce604d2a077e61fa2c0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Apr 2025 07:32:10 -0700 Subject: [PATCH 0714/2924] eth: bnxt: fix missing ring index trim on error path Commit under Fixes converted tx_prod to be free running but missed masking it on the Tx error path. This crashes on error conditions, for example when DMA mapping fails. Fixes: 6d1add95536b ("bnxt_en: Modify TX ring indexing logic.") Reviewed-by: Michael Chan Link: https://patch.msgid.link/20250414143210.458625-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 8725e1e1390825..c8e3468eee612a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -787,7 +787,7 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) dev_kfree_skb_any(skb); tx_kick_pending: if (BNXT_TX_PTP_IS_SET(lflags)) { - txr->tx_buf_ring[txr->tx_prod].is_ts_pkt = 0; + txr->tx_buf_ring[RING_TX(bp, txr->tx_prod)].is_ts_pkt = 0; atomic64_inc(&bp->ptp_cfg->stats.ts_err); if (!(bp->fw_cap & BNXT_FW_CAP_TX_TS_CMP)) /* set SKB to err so PTP worker will clean up */ @@ -795,7 +795,7 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) } if (txr->kick_pending) bnxt_txr_db_kick(bp, txr, txr->tx_prod); - txr->tx_buf_ring[txr->tx_prod].skb = NULL; + txr->tx_buf_ring[RING_TX(bp, txr->tx_prod)].skb = NULL; dev_core_stats_tx_dropped_inc(dev); return NETDEV_TX_OK; } From 2d300ce0b783ba74420052ed3a12010ac93bdb08 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 14 Apr 2025 20:20:21 +0300 Subject: [PATCH 0715/2924] net: fib_rules: Fix iif / oif matching on L3 master device Before commit 40867d74c374 ("net: Add l3mdev index to flow struct and avoid oif reset for port devices") it was possible to use FIB rules to match on a L3 domain. This was done by having a FIB rule match on iif / oif being a L3 master device. It worked because prior to the FIB rule lookup the iif / oif fields in the flow structure were reset to the index of the L3 master device to which the input / output device was enslaved to. The above scheme made it impossible to match on the original input / output device. Therefore, cited commit stopped overwriting the iif / oif fields in the flow structure and instead stored the index of the enslaving L3 master device in a new field ('flowi_l3mdev') in the flow structure. While the change enabled new use cases, it broke the original use case of matching on a L3 domain. Fix this by interpreting the iif / oif matching on a L3 master device as a match against the L3 domain. In other words, if the iif / oif in the FIB rule points to a L3 master device, compare the provided index against 'flowi_l3mdev' rather than 'flowi_{i,o}if'. Before cited commit, a FIB rule that matched on 'iif vrf1' would only match incoming traffic from devices enslaved to 'vrf1'. With the proposed change (i.e., comparing against 'flowi_l3mdev'), the rule would also match traffic originating from a socket bound to 'vrf1'. Avoid that by adding a new flow flag ('FLOWI_FLAG_L3MDEV_OIF') that indicates if the L3 domain was derived from the output interface or the input interface (when not set) and take this flag into account when evaluating the FIB rule against the flow structure. Avoid unnecessary checks in the data path by detecting that a rule matches on a L3 master device when the rule is installed and marking it as such. Tested using the following script [1]. Output before 40867d74c374 (v5.4.291): default dev dummy1 table 100 scope link default dev dummy1 table 200 scope link Output after 40867d74c374: default dev dummy1 table 300 scope link default dev dummy1 table 300 scope link Output with this patch: default dev dummy1 table 100 scope link default dev dummy1 table 200 scope link [1] #!/bin/bash ip link add name vrf1 up type vrf table 10 ip link add name dummy1 up master vrf1 type dummy sysctl -wq net.ipv4.conf.all.forwarding=1 sysctl -wq net.ipv4.conf.all.rp_filter=0 ip route add table 100 default dev dummy1 ip route add table 200 default dev dummy1 ip route add table 300 default dev dummy1 ip rule add prio 0 oif vrf1 table 100 ip rule add prio 1 iif vrf1 table 200 ip rule add prio 2 table 300 ip route get 192.0.2.1 oif dummy1 fibmatch ip route get 192.0.2.1 iif dummy1 from 198.51.100.1 fibmatch Fixes: 40867d74c374 ("net: Add l3mdev index to flow struct and avoid oif reset for port devices") Reported-by: hanhuihui Closes: https://lore.kernel.org/netdev/ec671c4f821a4d63904d0da15d604b75@huawei.com/ Signed-off-by: Ido Schimmel Acked-by: David Ahern Link: https://patch.msgid.link/20250414172022.242991-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/fib_rules.h | 2 ++ include/net/flow.h | 1 + include/net/l3mdev.h | 27 +++++++++++++++++++++++ net/core/fib_rules.c | 48 ++++++++++++++++++++++++++++++++++------- net/l3mdev/l3mdev.c | 4 +++- 5 files changed, 73 insertions(+), 9 deletions(-) diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 5927910ec06e55..6e68e359ad18c2 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -45,6 +45,8 @@ struct fib_rule { struct fib_rule_port_range dport_range; u16 sport_mask; u16 dport_mask; + u8 iif_is_l3_master; + u8 oif_is_l3_master; struct rcu_head rcu; }; diff --git a/include/net/flow.h b/include/net/flow.h index 335bbc52171c10..2a3f0c42f09250 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -38,6 +38,7 @@ struct flowi_common { __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_KNOWN_NH 0x02 +#define FLOWI_FLAG_L3MDEV_OIF 0x04 __u32 flowic_secid; kuid_t flowic_uid; __u32 flowic_multipath_hash; diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index f7fe796e8429a5..1eb8dad18f7e35 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -59,6 +59,20 @@ int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type, struct net *net, int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, struct fib_lookup_arg *arg); +static inline +bool l3mdev_fib_rule_iif_match(const struct flowi *fl, int iifindex) +{ + return !(fl->flowi_flags & FLOWI_FLAG_L3MDEV_OIF) && + fl->flowi_l3mdev == iifindex; +} + +static inline +bool l3mdev_fib_rule_oif_match(const struct flowi *fl, int oifindex) +{ + return fl->flowi_flags & FLOWI_FLAG_L3MDEV_OIF && + fl->flowi_l3mdev == oifindex; +} + void l3mdev_update_flow(struct net *net, struct flowi *fl); int l3mdev_master_ifindex_rcu(const struct net_device *dev); @@ -327,6 +341,19 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, { return 1; } + +static inline +bool l3mdev_fib_rule_iif_match(const struct flowi *fl, int iifindex) +{ + return false; +} + +static inline +bool l3mdev_fib_rule_oif_match(const struct flowi *fl, int oifindex) +{ + return false; +} + static inline void l3mdev_update_flow(struct net *net, struct flowi *fl) { diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 4bc64d912a1c00..7af302080a6608 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -257,6 +257,24 @@ static int nla_put_port_range(struct sk_buff *skb, int attrtype, return nla_put(skb, attrtype, sizeof(*range), range); } +static bool fib_rule_iif_match(const struct fib_rule *rule, int iifindex, + const struct flowi *fl) +{ + u8 iif_is_l3_master = READ_ONCE(rule->iif_is_l3_master); + + return iif_is_l3_master ? l3mdev_fib_rule_iif_match(fl, iifindex) : + fl->flowi_iif == iifindex; +} + +static bool fib_rule_oif_match(const struct fib_rule *rule, int oifindex, + const struct flowi *fl) +{ + u8 oif_is_l3_master = READ_ONCE(rule->oif_is_l3_master); + + return oif_is_l3_master ? l3mdev_fib_rule_oif_match(fl, oifindex) : + fl->flowi_oif == oifindex; +} + static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, struct flowi *fl, int flags, struct fib_lookup_arg *arg) @@ -264,11 +282,11 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, int iifindex, oifindex, ret = 0; iifindex = READ_ONCE(rule->iifindex); - if (iifindex && (iifindex != fl->flowi_iif)) + if (iifindex && !fib_rule_iif_match(rule, iifindex, fl)) goto out; oifindex = READ_ONCE(rule->oifindex); - if (oifindex && (oifindex != fl->flowi_oif)) + if (oifindex && !fib_rule_oif_match(rule, oifindex, fl)) goto out; if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask) @@ -736,16 +754,20 @@ static int fib_nl2rule_rtnl(struct fib_rule *nlrule, struct net_device *dev; dev = __dev_get_by_name(nlrule->fr_net, nlrule->iifname); - if (dev) + if (dev) { nlrule->iifindex = dev->ifindex; + nlrule->iif_is_l3_master = netif_is_l3_master(dev); + } } if (tb[FRA_OIFNAME]) { struct net_device *dev; dev = __dev_get_by_name(nlrule->fr_net, nlrule->oifname); - if (dev) + if (dev) { nlrule->oifindex = dev->ifindex; + nlrule->oif_is_l3_master = netif_is_l3_master(dev); + } } return 0; @@ -1336,11 +1358,17 @@ static void attach_rules(struct list_head *rules, struct net_device *dev) list_for_each_entry(rule, rules, list) { if (rule->iifindex == -1 && - strcmp(dev->name, rule->iifname) == 0) + strcmp(dev->name, rule->iifname) == 0) { WRITE_ONCE(rule->iifindex, dev->ifindex); + WRITE_ONCE(rule->iif_is_l3_master, + netif_is_l3_master(dev)); + } if (rule->oifindex == -1 && - strcmp(dev->name, rule->oifname) == 0) + strcmp(dev->name, rule->oifname) == 0) { WRITE_ONCE(rule->oifindex, dev->ifindex); + WRITE_ONCE(rule->oif_is_l3_master, + netif_is_l3_master(dev)); + } } } @@ -1349,10 +1377,14 @@ static void detach_rules(struct list_head *rules, struct net_device *dev) struct fib_rule *rule; list_for_each_entry(rule, rules, list) { - if (rule->iifindex == dev->ifindex) + if (rule->iifindex == dev->ifindex) { WRITE_ONCE(rule->iifindex, -1); - if (rule->oifindex == dev->ifindex) + WRITE_ONCE(rule->iif_is_l3_master, false); + } + if (rule->oifindex == dev->ifindex) { WRITE_ONCE(rule->oifindex, -1); + WRITE_ONCE(rule->oif_is_l3_master, false); + } } } diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index ca10916340b098..5432a5f2dfc8a0 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -277,8 +277,10 @@ void l3mdev_update_flow(struct net *net, struct flowi *fl) if (fl->flowi_oif) { dev = dev_get_by_index_rcu(net, fl->flowi_oif); if (dev) { - if (!fl->flowi_l3mdev) + if (!fl->flowi_l3mdev) { fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev); + fl->flowi_flags |= FLOWI_FLAG_L3MDEV_OIF; + } /* oif set to L3mdev directs lookup to its table; * reset to avoid oif match in fib_lookup From f9c87590ed6ab78b69042ea31b7b8e37302d53f3 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 14 Apr 2025 20:20:22 +0300 Subject: [PATCH 0716/2924] selftests: fib_rule_tests: Add VRF match tests Add tests for FIB rules that match on iif / oif being a VRF device. Test both good and bad flows. With previous patch ("net: fib_rules: Fix iif / oif matching on L3 master device"): # ./fib_rule_tests.sh [...] Tests passed: 328 Tests failed: 0 Without it: # ./fib_rule_tests.sh [...] Tests passed: 324 Tests failed: 4 Signed-off-by: Ido Schimmel Acked-by: David Ahern Link: https://patch.msgid.link/20250414172022.242991-3-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib_rule_tests.sh | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index b866bab1d92a1c..c7cea556b41694 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -359,6 +359,23 @@ fib_rule6_test() "$getnomatch" "iif flowlabel masked redirect to table" \ "iif flowlabel masked no redirect to table" fi + + $IP link show dev $DEV | grep -q vrf0 + if [ $? -eq 0 ]; then + match="oif vrf0" + getmatch="oif $DEV" + getnomatch="oif lo" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF oif redirect to table" \ + "VRF oif no redirect to table" + + match="from $SRC_IP6 iif vrf0" + getmatch="from $SRC_IP6 iif $DEV" + getnomatch="from $SRC_IP6 iif lo" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF iif redirect to table" \ + "VRF iif no redirect to table" + fi } fib_rule6_vrf_test() @@ -635,6 +652,23 @@ fib_rule4_test() "$getnomatch" "iif dscp masked redirect to table" \ "iif dscp masked no redirect to table" fi + + $IP link show dev $DEV | grep -q vrf0 + if [ $? -eq 0 ]; then + match="oif vrf0" + getmatch="oif $DEV" + getnomatch="oif lo" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF oif redirect to table" \ + "VRF oif no redirect to table" + + match="from $SRC_IP iif vrf0" + getmatch="from $SRC_IP iif $DEV" + getnomatch="from $SRC_IP iif lo" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF iif redirect to table" \ + "VRF iif no redirect to table" + fi } fib_rule4_vrf_test() From 10a77965760c6e2b3eef483be33ae407004df894 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 14 Apr 2025 20:05:37 +0200 Subject: [PATCH 0717/2924] batman-adv: Fix double-hold of meshif when getting enabled It was originally meant to replace the dev_hold with netdev_hold. But this was missed in batadv_hardif_enable_interface(). As result, there was an imbalance and a hang when trying to remove the mesh-interface with (previously) active hard-interfaces: unregister_netdevice: waiting for batadv0 to become free. Usage count = 3 Fixes: 00b35530811f ("batman-adv: adopt netdev_hold() / netdev_put()") Suggested-by: Eric Dumazet Reported-by: syzbot+ff3aa851d46ab82953a3@syzkaller.appspotmail.com Reported-by: syzbot+4036165fc595a74b09b2@syzkaller.appspotmail.com Reported-by: syzbot+c35d73ce910d86c0026e@syzkaller.appspotmail.com Reported-by: syzbot+48c14f61594bdfadb086@syzkaller.appspotmail.com Reported-by: syzbot+f37372d86207b3bb2941@syzkaller.appspotmail.com Signed-off-by: Sven Eckelmann Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250414-double_hold_fix-v5-1-10e056324cde@narfation.org Signed-off-by: Jakub Kicinski --- net/batman-adv/hard-interface.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index f145f966265310..7cd4bdcee43935 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -725,7 +725,6 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, kref_get(&hard_iface->refcount); - dev_hold(mesh_iface); netdev_hold(mesh_iface, &hard_iface->meshif_dev_tracker, GFP_ATOMIC); hard_iface->mesh_iface = mesh_iface; bat_priv = netdev_priv(hard_iface->mesh_iface); From f2fed441c69b9237760840a45a004730ff324faf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 9 Apr 2025 15:09:40 +0200 Subject: [PATCH 0718/2924] loop: stop using vfs_iter_{read,write} for buffered I/O vfs_iter_{read,write} always perform direct I/O when the file has the O_DIRECT flag set, which breaks disabling direct I/O using the LOOP_SET_STATUS / LOOP_SET_STATUS64 ioctls. This was recenly reported as a regression, but as far as I can tell was only uncovered by better checking for block sizes and has been around since the direct I/O support was added. Fix this by using the existing aio code that calls the raw read/write iter methods instead. Note that despite the comments there is no need for block drivers to ever call flush_dcache_page themselves, and the call is a left-over from prehistoric times. Fixes: ab1cb278bc70 ("block: loop: introduce ioctl command of LOOP_SET_DIRECT_IO") Reported-by: Darrick J. Wong Signed-off-by: Christoph Hellwig Reviewed-by: Ming Lei Tested-by: Darrick J. Wong Link: https://lore.kernel.org/r/20250409130940.3685677-1-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/loop.c | 112 +++++++------------------------------------ 1 file changed, 17 insertions(+), 95 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index e9ec7a45f3f2d1..46cba261075f23 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -211,72 +211,6 @@ static void loop_set_size(struct loop_device *lo, loff_t size) kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE); } -static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos) -{ - struct iov_iter i; - ssize_t bw; - - iov_iter_bvec(&i, ITER_SOURCE, bvec, 1, bvec->bv_len); - - bw = vfs_iter_write(file, &i, ppos, 0); - - if (likely(bw == bvec->bv_len)) - return 0; - - printk_ratelimited(KERN_ERR - "loop: Write error at byte offset %llu, length %i.\n", - (unsigned long long)*ppos, bvec->bv_len); - if (bw >= 0) - bw = -EIO; - return bw; -} - -static int lo_write_simple(struct loop_device *lo, struct request *rq, - loff_t pos) -{ - struct bio_vec bvec; - struct req_iterator iter; - int ret = 0; - - rq_for_each_segment(bvec, rq, iter) { - ret = lo_write_bvec(lo->lo_backing_file, &bvec, &pos); - if (ret < 0) - break; - cond_resched(); - } - - return ret; -} - -static int lo_read_simple(struct loop_device *lo, struct request *rq, - loff_t pos) -{ - struct bio_vec bvec; - struct req_iterator iter; - struct iov_iter i; - ssize_t len; - - rq_for_each_segment(bvec, rq, iter) { - iov_iter_bvec(&i, ITER_DEST, &bvec, 1, bvec.bv_len); - len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0); - if (len < 0) - return len; - - flush_dcache_page(bvec.bv_page); - - if (len != bvec.bv_len) { - struct bio *bio; - - __rq_for_each_bio(bio, rq) - zero_fill_bio(bio); - break; - } - cond_resched(); - } - - return 0; -} - static void loop_clear_limits(struct loop_device *lo, int mode) { struct queue_limits lim = queue_limits_start_update(lo->lo_queue); @@ -342,7 +276,7 @@ static void lo_complete_rq(struct request *rq) struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); blk_status_t ret = BLK_STS_OK; - if (!cmd->use_aio || cmd->ret < 0 || cmd->ret == blk_rq_bytes(rq) || + if (cmd->ret < 0 || cmd->ret == blk_rq_bytes(rq) || req_op(rq) != REQ_OP_READ) { if (cmd->ret < 0) ret = errno_to_blk_status(cmd->ret); @@ -358,14 +292,13 @@ static void lo_complete_rq(struct request *rq) cmd->ret = 0; blk_mq_requeue_request(rq, true); } else { - if (cmd->use_aio) { - struct bio *bio = rq->bio; + struct bio *bio = rq->bio; - while (bio) { - zero_fill_bio(bio); - bio = bio->bi_next; - } + while (bio) { + zero_fill_bio(bio); + bio = bio->bi_next; } + ret = BLK_STS_IOERR; end_io: blk_mq_end_request(rq, ret); @@ -445,9 +378,14 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, cmd->iocb.ki_pos = pos; cmd->iocb.ki_filp = file; - cmd->iocb.ki_complete = lo_rw_aio_complete; - cmd->iocb.ki_flags = IOCB_DIRECT; cmd->iocb.ki_ioprio = req_get_ioprio(rq); + if (cmd->use_aio) { + cmd->iocb.ki_complete = lo_rw_aio_complete; + cmd->iocb.ki_flags = IOCB_DIRECT; + } else { + cmd->iocb.ki_complete = NULL; + cmd->iocb.ki_flags = 0; + } if (rw == ITER_SOURCE) ret = file->f_op->write_iter(&cmd->iocb, &iter); @@ -458,7 +396,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, if (ret != -EIOCBQUEUED) lo_rw_aio_complete(&cmd->iocb, ret); - return 0; + return -EIOCBQUEUED; } static int do_req_filebacked(struct loop_device *lo, struct request *rq) @@ -466,15 +404,6 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq) struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); loff_t pos = ((loff_t) blk_rq_pos(rq) << 9) + lo->lo_offset; - /* - * lo_write_simple and lo_read_simple should have been covered - * by io submit style function like lo_rw_aio(), one blocker - * is that lo_read_simple() need to call flush_dcache_page after - * the page is written from kernel, and it isn't easy to handle - * this in io submit style function which submits all segments - * of the req at one time. And direct read IO doesn't need to - * run flush_dcache_page(). - */ switch (req_op(rq)) { case REQ_OP_FLUSH: return lo_req_flush(lo, rq); @@ -490,15 +419,9 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq) case REQ_OP_DISCARD: return lo_fallocate(lo, rq, pos, FALLOC_FL_PUNCH_HOLE); case REQ_OP_WRITE: - if (cmd->use_aio) - return lo_rw_aio(lo, cmd, pos, ITER_SOURCE); - else - return lo_write_simple(lo, rq, pos); + return lo_rw_aio(lo, cmd, pos, ITER_SOURCE); case REQ_OP_READ: - if (cmd->use_aio) - return lo_rw_aio(lo, cmd, pos, ITER_DEST); - else - return lo_read_simple(lo, rq, pos); + return lo_rw_aio(lo, cmd, pos, ITER_DEST); default: WARN_ON_ONCE(1); return -EIO; @@ -1922,7 +1845,6 @@ static void loop_handle_cmd(struct loop_cmd *cmd) struct loop_device *lo = rq->q->queuedata; int ret = 0; struct mem_cgroup *old_memcg = NULL; - const bool use_aio = cmd->use_aio; if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) { ret = -EIO; @@ -1952,7 +1874,7 @@ static void loop_handle_cmd(struct loop_cmd *cmd) } failed: /* complete non-aio request */ - if (!use_aio || ret) { + if (ret != -EIOCBQUEUED) { if (ret == -EOPNOTSUPP) cmd->ret = ret; else From 0b7a4817756c7906d0a8112c953ce88d7cd8d4c6 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Tue, 15 Apr 2025 18:41:10 -0600 Subject: [PATCH 0719/2924] ublk: don't suggest CONFIG_BLK_DEV_UBLK=Y The CONFIG_BLK_DEV_UBLK help text suggests setting the config option to Y so task_work_add() can be used to dispatch I/O, improving performance. However, this mechanism was removed in commit 29dc5d06613f2 ("ublk: kill queuing request by task_work_add"). So remove this paragraph from the config help text. Signed-off-by: Caleb Sander Mateos Reviewed-by: Uday Shankar Link: https://lore.kernel.org/r/20250416004111.3242817-1-csander@purestorage.com Signed-off-by: Jens Axboe --- drivers/block/Kconfig | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index a97f2c40c640dd..9e129fec4bb795 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -388,12 +388,6 @@ config BLK_DEV_UBLK definition isn't finalized yet, and might change according to future requirement, so mark is as experimental now. - Say Y if you want to get better performance because task_work_add() - can be used in IO path for replacing io_uring cmd, which will become - shared between IO tasks and ubq daemon, meantime task_work_add() can - can handle batch more effectively, but task_work_add() isn't exported - for module, so ublk has to be built to kernel. - config BLKDEV_UBLK_LEGACY_OPCODES bool "Support legacy command opcode" depends on BLK_DEV_UBLK From 26d7fb4fd4ca1180e2fa96587dea544563b4962a Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 14 Apr 2025 14:05:09 +0200 Subject: [PATCH 0720/2924] nvme: fixup scan failure for non-ANA multipath controllers Commit 62baf70c3274 caused the ANA log page to be re-read, even on controllers that do not support ANA. While this should generally harmless, some controllers hang on the unsupported log page and never finish probing. Fixes: 62baf70c3274 ("nvme: re-read ANA log page after ns scan completes") Signed-off-by: Hannes Reinecke Tested-by: Srikanth Aithal [hch: more detailed commit message] Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b502ac07483ba8..eb6ea8acb3cca7 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4300,7 +4300,7 @@ static void nvme_scan_work(struct work_struct *work) if (test_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) nvme_queue_scan(ctrl); #ifdef CONFIG_NVME_MULTIPATH - else + else if (ctrl->ana_log_buf) /* Re-read the ANA log page to not miss updates */ queue_work(nvme_wq, &ctrl->ana_work); #endif From 08937bcd4cfe11405d80b35041c38cf4a4b046ed Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 15 Apr 2025 08:47:37 +0200 Subject: [PATCH 0721/2924] nvme-multipath: sysfs links may not be created for devices When rapidly rescanning for new namespaces nvme_mpath_add_sysfs_link() may be called for a block device not added to sysfs. But NVME_NS_SYSFS_ATTR_LINK had already been set, so when checking this device a second time we will fail to create the link. Fix this by exchanging the order of the block device check and the NVME_NS_SYSFS_ATTR_LINK bit check. Fixes: 4dbd2b2ebe4c ("nvme-multipath: Add visibility for round-robin io-policy") Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg ** Reviewed-by: Nilay Shroff Signed-off-by: Christoph Hellwig --- drivers/nvme/host/multipath.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 94152cf423f165..61b1d267ffdaa9 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -1050,6 +1050,13 @@ void nvme_mpath_add_sysfs_link(struct nvme_ns_head *head) srcu_idx = srcu_read_lock(&head->srcu); list_for_each_entry_rcu(ns, &head->list, siblings) { + /* + * Ensure that ns path disk node is already added otherwise we + * may get invalid kobj name for target + */ + if (!test_bit(GD_ADDED, &ns->disk->state)) + continue; + /* * Avoid creating link if it already exists for the given path. * When path ana state transitions from optimized to non- @@ -1065,13 +1072,6 @@ void nvme_mpath_add_sysfs_link(struct nvme_ns_head *head) if (test_and_set_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags)) continue; - /* - * Ensure that ns path disk node is already added otherwise we - * may get invalid kobj name for target - */ - if (!test_bit(GD_ADDED, &ns->disk->state)) - continue; - target = disk_to_dev(ns->disk); /* * Create sysfs link from head gendisk kobject @kobj to the From b1efcc470eb30073f3dedb9a88cffa71ea75d853 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 11 Apr 2025 10:00:15 +0900 Subject: [PATCH 0722/2924] nvmet: auth: use NULL to clear a pointer in nvmet_auth_sq_free() When compiling with C=1, the following sparse warning is generated: auth.c:243:23: warning: Using plain integer as NULL pointer Avoid this warning by using NULL to instead of 0 to set the sq tls_key pointer. Fixes: fa2e0f8bbc68 ("nvmet-tcp: support secure channel concatenation") Signed-off-by: Damien Le Moal Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/target/auth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index 0b0645ac5df478..cef8d77f477b95 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -240,7 +240,7 @@ void nvmet_auth_sq_free(struct nvmet_sq *sq) { cancel_delayed_work(&sq->auth_expired_work); #ifdef CONFIG_NVME_TARGET_TCP_TLS - sq->tls_key = 0; + sq->tls_key = NULL; #endif kfree(sq->dhchap_c1); sq->dhchap_c1 = NULL; From ffe0398c7d6a38af0584d4668d3762b7a97e2275 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 11 Apr 2025 10:42:09 +0900 Subject: [PATCH 0723/2924] nvmet: pci-epf: always fully initialize completion entries For a command that is normally processed through the command request execute() function, the completion entry for the command is initialized by __nvmet_req_complete() and nvmet_pci_epf_cq_work() only needs to set the status field and the phase of the completion entry before posting the entry to the completion queue. However, for commands that are failed due to an internal error (e.g. the command data buffer allocation fails), the command request execute() function is not called and __nvmet_req_complete() is never executed for the command, leaving the command completion entry uninitialized. For such command failed before calling req->execute(), the host ends up seeing completion entries with an invalid submission queue ID and command ID. Avoid such issue by always fully initilizing a command completion entry in nvmet_pci_epf_cq_work(), setting the entry submission queue head, ID and command ID. Fixes: 0faa0fe6f90e ("nvmet: New NVMe PCI endpoint function target driver") Signed-off-by: Damien Le Moal Reviewed-by: Niklas Cassel Signed-off-by: Christoph Hellwig --- drivers/nvme/target/pci-epf.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c index 51c27b32248d01..43296c05319c35 100644 --- a/drivers/nvme/target/pci-epf.c +++ b/drivers/nvme/target/pci-epf.c @@ -1648,16 +1648,17 @@ static int nvmet_pci_epf_process_sq(struct nvmet_pci_epf_ctrl *ctrl, { struct nvmet_pci_epf_iod *iod; int ret, n = 0; + u16 head = sq->head; sq->tail = nvmet_pci_epf_bar_read32(ctrl, sq->db); - while (sq->head != sq->tail && (!ctrl->sq_ab || n < ctrl->sq_ab)) { + while (head != sq->tail && (!ctrl->sq_ab || n < ctrl->sq_ab)) { iod = nvmet_pci_epf_alloc_iod(sq); if (!iod) break; /* Get the NVMe command submitted by the host. */ ret = nvmet_pci_epf_transfer(ctrl, &iod->cmd, - sq->pci_addr + sq->head * sq->qes, + sq->pci_addr + head * sq->qes, sq->qes, DMA_FROM_DEVICE); if (ret) { /* Not much we can do... */ @@ -1666,12 +1667,13 @@ static int nvmet_pci_epf_process_sq(struct nvmet_pci_epf_ctrl *ctrl, } dev_dbg(ctrl->dev, "SQ[%u]: head %u, tail %u, command %s\n", - sq->qid, sq->head, sq->tail, + sq->qid, head, sq->tail, nvmet_pci_epf_iod_name(iod)); - sq->head++; - if (sq->head == sq->depth) - sq->head = 0; + head++; + if (head == sq->depth) + head = 0; + WRITE_ONCE(sq->head, head); n++; queue_work_on(WORK_CPU_UNBOUND, sq->iod_wq, &iod->work); @@ -1761,8 +1763,17 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work) if (!iod) break; - /* Post the IOD completion entry. */ + /* + * Post the IOD completion entry. If the IOD request was + * executed (req->execute() called), the CQE is already + * initialized. However, the IOD may have been failed before + * that, leaving the CQE not properly initialized. So always + * initialize it here. + */ cqe = &iod->cqe; + cqe->sq_head = cpu_to_le16(READ_ONCE(iod->sq->head)); + cqe->sq_id = cpu_to_le16(iod->sq->qid); + cqe->command_id = iod->cmd.common.command_id; cqe->status = cpu_to_le16((iod->status << 1) | cq->phase); dev_dbg(ctrl->dev, From f8e01fa93f3e4fc255d240cfa0c045ce0b5c97ea Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 11 Apr 2025 10:42:10 +0900 Subject: [PATCH 0724/2924] nvmet: pci-epf: clear CC and CSTS when disabling the controller When a host shuts down the controller when shutting down but does so without first disabling the controller, the enable bit remains set in the controller configuration register. When the host restarts and attempts to enable the controller again, the nvmet_pci_epf_poll_cc_work() function is unable to detect the change from 0 to 1 of the enable bit, and thus the controller is not enabled again, which result in a device scan timeout on the host. This problem also occurs if the host shuts down uncleanly or if the PCIe link goes down: as the CC.EN value is not reset, the controller is not enabled again when the host restarts. Fix this by introducing the function nvmet_pci_epf_clear_ctrl_config() to clear the CC and CSTS registers of the controller when the PCIe link is lost (nvmet_pci_epf_stop_ctrl() function), or when starting the controller fails (nvmet_pci_epf_enable_ctrl() fails). Also use this function in nvmet_pci_epf_init_bar() to simplify the initialization of the CC and CSTS registers. Furthermore, modify the function nvmet_pci_epf_disable_ctrl() to clear the CC.EN bit and write this updated value to the BAR register when the controller is shutdown by the host, to ensure that upon restart, we can detect the host setting CC.EN. Fixes: 0faa0fe6f90e ("nvmet: New NVMe PCI endpoint function target driver") Signed-off-by: Damien Le Moal Reviewed-by: Niklas Cassel Signed-off-by: Christoph Hellwig --- drivers/nvme/target/pci-epf.c | 49 +++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c index 43296c05319c35..c3d30d34f8cedc 100644 --- a/drivers/nvme/target/pci-epf.c +++ b/drivers/nvme/target/pci-epf.c @@ -1811,6 +1811,21 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work) NVMET_PCI_EPF_CQ_RETRY_INTERVAL); } +static void nvmet_pci_epf_clear_ctrl_config(struct nvmet_pci_epf_ctrl *ctrl) +{ + struct nvmet_ctrl *tctrl = ctrl->tctrl; + + /* Initialize controller status. */ + tctrl->csts = 0; + ctrl->csts = 0; + nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CSTS, ctrl->csts); + + /* Initialize controller configuration and start polling. */ + tctrl->cc = 0; + ctrl->cc = 0; + nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CC, ctrl->cc); +} + static int nvmet_pci_epf_enable_ctrl(struct nvmet_pci_epf_ctrl *ctrl) { u64 pci_addr, asq, acq; @@ -1876,18 +1891,20 @@ static int nvmet_pci_epf_enable_ctrl(struct nvmet_pci_epf_ctrl *ctrl) return 0; err: - ctrl->csts = 0; + nvmet_pci_epf_clear_ctrl_config(ctrl); return -EINVAL; } -static void nvmet_pci_epf_disable_ctrl(struct nvmet_pci_epf_ctrl *ctrl) +static void nvmet_pci_epf_disable_ctrl(struct nvmet_pci_epf_ctrl *ctrl, + bool shutdown) { int qid; if (!ctrl->enabled) return; - dev_info(ctrl->dev, "Disabling controller\n"); + dev_info(ctrl->dev, "%s controller\n", + shutdown ? "Shutting down" : "Disabling"); ctrl->enabled = false; cancel_delayed_work_sync(&ctrl->poll_sqs); @@ -1904,6 +1921,11 @@ static void nvmet_pci_epf_disable_ctrl(struct nvmet_pci_epf_ctrl *ctrl) nvmet_pci_epf_delete_cq(ctrl->tctrl, 0); ctrl->csts &= ~NVME_CSTS_RDY; + if (shutdown) { + ctrl->csts |= NVME_CSTS_SHST_CMPLT; + ctrl->cc &= ~NVME_CC_ENABLE; + nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CC, ctrl->cc); + } } static void nvmet_pci_epf_poll_cc_work(struct work_struct *work) @@ -1930,12 +1952,10 @@ static void nvmet_pci_epf_poll_cc_work(struct work_struct *work) } if (!nvmet_cc_en(new_cc) && nvmet_cc_en(old_cc)) - nvmet_pci_epf_disable_ctrl(ctrl); + nvmet_pci_epf_disable_ctrl(ctrl, false); - if (nvmet_cc_shn(new_cc) && !nvmet_cc_shn(old_cc)) { - nvmet_pci_epf_disable_ctrl(ctrl); - ctrl->csts |= NVME_CSTS_SHST_CMPLT; - } + if (nvmet_cc_shn(new_cc) && !nvmet_cc_shn(old_cc)) + nvmet_pci_epf_disable_ctrl(ctrl, true); if (!nvmet_cc_shn(new_cc) && nvmet_cc_shn(old_cc)) ctrl->csts &= ~NVME_CSTS_SHST_CMPLT; @@ -1974,16 +1994,10 @@ static void nvmet_pci_epf_init_bar(struct nvmet_pci_epf_ctrl *ctrl) /* Clear Controller Memory Buffer Supported (CMBS). */ ctrl->cap &= ~(0x1ULL << 57); - /* Controller configuration. */ - ctrl->cc = tctrl->cc & (~NVME_CC_ENABLE); - - /* Controller status. */ - ctrl->csts = ctrl->tctrl->csts; - nvmet_pci_epf_bar_write64(ctrl, NVME_REG_CAP, ctrl->cap); nvmet_pci_epf_bar_write32(ctrl, NVME_REG_VS, tctrl->subsys->ver); - nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CSTS, ctrl->csts); - nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CC, ctrl->cc); + + nvmet_pci_epf_clear_ctrl_config(ctrl); } static int nvmet_pci_epf_create_ctrl(struct nvmet_pci_epf *nvme_epf, @@ -2088,7 +2102,8 @@ static void nvmet_pci_epf_stop_ctrl(struct nvmet_pci_epf_ctrl *ctrl) { cancel_delayed_work_sync(&ctrl->poll_cc); - nvmet_pci_epf_disable_ctrl(ctrl); + nvmet_pci_epf_disable_ctrl(ctrl, false); + nvmet_pci_epf_clear_ctrl_config(ctrl); } static void nvmet_pci_epf_destroy_ctrl(struct nvmet_pci_epf_ctrl *ctrl) From ad91308d3bdeb9d90ef4a400f379ce461f0fb6a7 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 11 Apr 2025 10:42:11 +0900 Subject: [PATCH 0725/2924] nvmet: pci-epf: cleanup link state management Since the link_up boolean field of struct nvmet_pci_epf_ctrl is always set to true when nvmet_pci_epf_start_ctrl() is called, assign true to this field in nvmet_pci_epf_start_ctrl(). Conversely, since this field is set to false when nvmet_pci_epf_stop_ctrl() is called, set this field to false directly inside that function. While at it, also add information messages to notify the user of the PCI link state changes to help troubleshoot any link stability issues without needing to enable debug messages. Signed-off-by: Damien Le Moal Reviewed-by: Keith Busch Reviewed-by: Niklas Cassel Signed-off-by: Christoph Hellwig --- drivers/nvme/target/pci-epf.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c index c3d30d34f8cedc..7fab7f3d79b743 100644 --- a/drivers/nvme/target/pci-epf.c +++ b/drivers/nvme/target/pci-epf.c @@ -2095,11 +2095,18 @@ static int nvmet_pci_epf_create_ctrl(struct nvmet_pci_epf *nvme_epf, static void nvmet_pci_epf_start_ctrl(struct nvmet_pci_epf_ctrl *ctrl) { + + dev_info(ctrl->dev, "PCI link up\n"); + ctrl->link_up = true; + schedule_delayed_work(&ctrl->poll_cc, NVMET_PCI_EPF_CC_POLL_INTERVAL); } static void nvmet_pci_epf_stop_ctrl(struct nvmet_pci_epf_ctrl *ctrl) { + dev_info(ctrl->dev, "PCI link down\n"); + ctrl->link_up = false; + cancel_delayed_work_sync(&ctrl->poll_cc); nvmet_pci_epf_disable_ctrl(ctrl, false); @@ -2326,10 +2333,8 @@ static int nvmet_pci_epf_epc_init(struct pci_epf *epf) if (ret) goto out_clear_bar; - if (!epc_features->linkup_notifier) { - ctrl->link_up = true; + if (!epc_features->linkup_notifier) nvmet_pci_epf_start_ctrl(&nvme_epf->ctrl); - } return 0; @@ -2345,7 +2350,6 @@ static void nvmet_pci_epf_epc_deinit(struct pci_epf *epf) struct nvmet_pci_epf *nvme_epf = epf_get_drvdata(epf); struct nvmet_pci_epf_ctrl *ctrl = &nvme_epf->ctrl; - ctrl->link_up = false; nvmet_pci_epf_destroy_ctrl(ctrl); nvmet_pci_epf_deinit_dma(nvme_epf); @@ -2357,7 +2361,6 @@ static int nvmet_pci_epf_link_up(struct pci_epf *epf) struct nvmet_pci_epf *nvme_epf = epf_get_drvdata(epf); struct nvmet_pci_epf_ctrl *ctrl = &nvme_epf->ctrl; - ctrl->link_up = true; nvmet_pci_epf_start_ctrl(ctrl); return 0; @@ -2368,7 +2371,6 @@ static int nvmet_pci_epf_link_down(struct pci_epf *epf) struct nvmet_pci_epf *nvme_epf = epf_get_drvdata(epf); struct nvmet_pci_epf_ctrl *ctrl = &nvme_epf->ctrl; - ctrl->link_up = false; nvmet_pci_epf_stop_ctrl(ctrl); return 0; From 968e1cbb1f6293c3add9607f80b5ce3d29f57583 Mon Sep 17 00:00:00 2001 From: Adam Xue Date: Mon, 14 Apr 2025 14:14:37 -0700 Subject: [PATCH 0726/2924] USB: serial: option: add Sierra Wireless EM9291 Add Sierra Wireless EM9291. Interface 0: MBIM control 1: MBIM data 3: AT port 4: Diagnostic port T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1199 ProdID=90e3 Rev=00.06 S: Manufacturer=Sierra Wireless, Incorporated S: Product=Sierra Wireless EM9291 S: SerialNumber=xxxxxxxxxxxxxxxx C: #Ifs= 4 Cfg#= 1 Atr=a0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=0f(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=(none) E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=(none) E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Adam Xue Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 5cd26dac2069fa..27879cc575365c 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -611,6 +611,7 @@ static void option_instat_callback(struct urb *urb); /* Sierra Wireless products */ #define SIERRA_VENDOR_ID 0x1199 #define SIERRA_PRODUCT_EM9191 0x90d3 +#define SIERRA_PRODUCT_EM9291 0x90e3 /* UNISOC (Spreadtrum) products */ #define UNISOC_VENDOR_ID 0x1782 @@ -2432,6 +2433,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9291, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9291, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, TOZED_PRODUCT_LT70C, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, LUAT_PRODUCT_AIR720U, 0xff, 0, 0) }, { USB_DEVICE_INTERFACE_CLASS(0x1bbb, 0x0530, 0xff), /* TCL IK512 MBIM */ From b399078f882b6e5d32da18b6c696cc84b12f90d5 Mon Sep 17 00:00:00 2001 From: Michael Ehrenreich Date: Mon, 17 Mar 2025 06:17:15 +0100 Subject: [PATCH 0727/2924] USB: serial: ftdi_sio: add support for Abacus Electrics Optical Probe Abacus Electrics makes optical probes for interacting with smart meters over an optical interface. At least one version uses an FT232B chip (as detected by ftdi_sio) with a custom USB PID, which needs to be added to the list to make the device work in a plug-and-play fashion. Signed-off-by: Michael Ehrenreich Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/ftdi_sio.c | 2 ++ drivers/usb/serial/ftdi_sio_ids.h | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 9b34e23b70919f..6ac7a0a5cf074e 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1093,6 +1093,8 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602E_PID, 1) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602E_PID, 2) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602E_PID, 3) }, + /* Abacus Electrics */ + { USB_DEVICE(FTDI_VID, ABACUS_OPTICAL_PROBE_PID) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 52be47d684ea66..9acb6f83732763 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -442,6 +442,11 @@ #define LINX_FUTURE_1_PID 0xF44B /* Linx future device */ #define LINX_FUTURE_2_PID 0xF44C /* Linx future device */ +/* + * Abacus Electrics + */ +#define ABACUS_OPTICAL_PROBE_PID 0xf458 /* ABACUS ELECTRICS Optical Probe */ + /* * Oceanic product ids */ From 4cc01410e1c1dd075df10f750775c81d1cb6672b Mon Sep 17 00:00:00 2001 From: Craig Hesling Date: Tue, 8 Apr 2025 16:27:03 -0700 Subject: [PATCH 0728/2924] USB: serial: simple: add OWON HDS200 series oscilloscope support Add serial support for OWON HDS200 series oscilloscopes and likely many other pieces of OWON test equipment. OWON HDS200 series devices host two USB endpoints, designed to facilitate bidirectional SCPI. SCPI is a predominately ASCII text protocol for test/measurement equipment. Having a serial/tty interface for these devices lowers the barrier to entry for anyone trying to write programs to communicate with them. The following shows the USB descriptor for the OWON HDS272S running firmware V5.7.1: Bus 001 Device 068: ID 5345:1234 Owon PDS6062T Oscilloscope Negotiated speed: Full Speed (12Mbps) Device Descriptor: bLength 18 bDescriptorType 1 bcdUSB 2.00 bDeviceClass 0 [unknown] bDeviceSubClass 0 [unknown] bDeviceProtocol 0 bMaxPacketSize0 64 idVendor 0x5345 Owon idProduct 0x1234 PDS6062T Oscilloscope bcdDevice 1.00 iManufacturer 1 oscilloscope iProduct 2 oscilloscope iSerial 3 oscilloscope bNumConfigurations 1 Configuration Descriptor: bLength 9 bDescriptorType 2 wTotalLength 0x0029 bNumInterfaces 1 bConfigurationValue 1 iConfiguration 0 bmAttributes 0x80 (Bus Powered) MaxPower 100mA Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 0 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 5 Physical Interface Device bInterfaceSubClass 0 [unknown] bInterfaceProtocol 0 iInterface 0 ** UNRECOGNIZED: 09 21 11 01 00 01 22 5f 00 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x81 EP 1 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 32 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x01 EP 1 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 32 Device Status: 0x0000 (Bus Powered) OWON appears to be using the same USB Vendor and Product ID for many of their oscilloscopes. Looking at the discussion about the USB vendor/product ID, in the link bellow, suggests that this VID/PID is shared with VDS, SDS, PDS, and now the HDS series oscilloscopes. Available documentation for these devices seems to indicate that all use a similar SCPI protocol, some with RS232 options. It is likely that this same simple serial setup would work correctly for them all. Link: https://usb-ids.gowdy.us/read/UD/5345/1234 Signed-off-by: Craig Hesling Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/usb-serial-simple.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index 2c12449ff60c51..a0afaf254d1229 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -100,6 +100,11 @@ DEVICE(nokia, NOKIA_IDS); { USB_DEVICE(0x09d7, 0x0100) } /* NovAtel FlexPack GPS */ DEVICE_N(novatel_gps, NOVATEL_IDS, 3); +/* OWON electronic test and measurement equipment driver */ +#define OWON_IDS() \ + { USB_DEVICE(0x5345, 0x1234) } /* HDS200 oscilloscopes and others */ +DEVICE(owon, OWON_IDS); + /* Siemens USB/MPI adapter */ #define SIEMENS_IDS() \ { USB_DEVICE(0x908, 0x0004) } @@ -134,6 +139,7 @@ static struct usb_serial_driver * const serial_drivers[] = { &motorola_tetra_device, &nokia_device, &novatel_gps_device, + &owon_device, &siemens_mpi_device, &suunto_device, &vivopay_device, @@ -153,6 +159,7 @@ static const struct usb_device_id id_table[] = { MOTOROLA_TETRA_IDS(), NOKIA_IDS(), NOVATEL_IDS(), + OWON_IDS(), SIEMENS_IDS(), SUUNTO_IDS(), VIVOPAY_IDS(), From d466304c4322ad391797437cd84cca7ce1660de0 Mon Sep 17 00:00:00 2001 From: Pi Xiange Date: Mon, 14 Apr 2025 11:28:39 +0800 Subject: [PATCH 0729/2924] x86/cpu: Add CPU model number for Bartlett Lake CPUs with Raptor Cove cores Bartlett Lake has a P-core only product with Raptor Cove. [ mingo: Switch around the define as pointed out by Christian Ludloff: Ratpr Cove is the core, Bartlett Lake is the product. Signed-off-by: Pi Xiange Signed-off-by: Ingo Molnar Cc: Christian Ludloff Cc: Peter Zijlstra Cc: Tony Luck Cc: Andrew Cooper Cc: "H. Peter Anvin" Cc: John Ogness Cc: "Ahmed S. Darwish" Cc: x86-cpuid@lists.linux.dev Link: https://lore.kernel.org/r/20250414032839.5368-1-xiange.pi@intel.com --- arch/x86/include/asm/intel-family.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 3a97a7eefb512e..be10c188614fe2 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -126,6 +126,8 @@ #define INTEL_GRANITERAPIDS_X IFM(6, 0xAD) /* Redwood Cove */ #define INTEL_GRANITERAPIDS_D IFM(6, 0xAE) +#define INTEL_BARTLETTLAKE IFM(6, 0xD7) /* Raptor Cove */ + /* "Hybrid" Processors (P-Core/E-Core) */ #define INTEL_LAKEFIELD IFM(6, 0x8A) /* Sunny Cove / Tremont */ From a681b7c17dd21d5aa0da391ceb27a2007ba970a4 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 15 Apr 2025 12:01:08 +0200 Subject: [PATCH 0730/2924] fs: ensure that *path_locked*() helpers leave passed path pristine The functions currently leaving dangling pointers in the passed-in path leading to hard to debug bugs in the long run. Ensure that the path is left in pristine state just like we do in e.g., path_parentat() and other helpers. Link: https://lore.kernel.org/20250414-rennt-wimmeln-f186c3a780f1@brauner Signed-off-by: Christian Brauner --- fs/namei.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 267ac9c22dd27f..84a0e0b0111c78 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2752,46 +2752,48 @@ static int filename_parentat(int dfd, struct filename *name, /* does lookup, returns the object with parent locked */ static struct dentry *__kern_path_locked(int dfd, struct filename *name, struct path *path) { + struct path parent_path __free(path_put) = {}; struct dentry *d; struct qstr last; int type, error; - error = filename_parentat(dfd, name, 0, path, &last, &type); + error = filename_parentat(dfd, name, 0, &parent_path, &last, &type); if (error) return ERR_PTR(error); - if (unlikely(type != LAST_NORM)) { - path_put(path); + if (unlikely(type != LAST_NORM)) return ERR_PTR(-EINVAL); - } - inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT); - d = lookup_one_qstr_excl(&last, path->dentry, 0); + inode_lock_nested(parent_path.dentry->d_inode, I_MUTEX_PARENT); + d = lookup_one_qstr_excl(&last, parent_path.dentry, 0); if (IS_ERR(d)) { - inode_unlock(path->dentry->d_inode); - path_put(path); + inode_unlock(parent_path.dentry->d_inode); + return d; } + path->dentry = no_free_ptr(parent_path.dentry); + path->mnt = no_free_ptr(parent_path.mnt); return d; } struct dentry *kern_path_locked_negative(const char *name, struct path *path) { + struct path parent_path __free(path_put) = {}; struct filename *filename __free(putname) = getname_kernel(name); struct dentry *d; struct qstr last; int type, error; - error = filename_parentat(AT_FDCWD, filename, 0, path, &last, &type); + error = filename_parentat(AT_FDCWD, filename, 0, &parent_path, &last, &type); if (error) return ERR_PTR(error); - if (unlikely(type != LAST_NORM)) { - path_put(path); + if (unlikely(type != LAST_NORM)) return ERR_PTR(-EINVAL); - } - inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT); - d = lookup_one_qstr_excl_raw(&last, path->dentry, 0); + inode_lock_nested(parent_path.dentry->d_inode, I_MUTEX_PARENT); + d = lookup_one_qstr_excl_raw(&last, parent_path.dentry, 0); if (IS_ERR(d)) { - inode_unlock(path->dentry->d_inode); - path_put(path); + inode_unlock(parent_path.dentry->d_inode); + return d; } + path->dentry = no_free_ptr(parent_path.dentry); + path->mnt = no_free_ptr(parent_path.mnt); return d; } From 2b8e6b58889c672e1ae3601d9b2b070be4dc2fbc Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 13 Apr 2025 11:11:42 +0100 Subject: [PATCH 0731/2924] cpufreq: cppc: Fix invalid return value in .get() callback Returning a negative error code in a function with an unsigned return type is a pretty bad idea. It is probably worse when the justification for the change is "our static analisys tool found it". Fixes: cf7de25878a1 ("cppc_cpufreq: Fix possible null pointer dereference") Signed-off-by: Marc Zyngier Cc: "Rafael J. Wysocki" Cc: Viresh Kumar Reviewed-by: Lifeng Zheng Signed-off-by: Viresh Kumar --- drivers/cpufreq/cppc_cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index b3d74f9adcf0bc..cb93f00bafdbaf 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -747,7 +747,7 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) int ret; if (!policy) - return -ENODEV; + return 0; cpu_data = policy->driver_data; From baf2f2c2b4c8e1d398173acd4d2fa9131a86b84e Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Mon, 14 Apr 2025 16:04:53 +0200 Subject: [PATCH 0732/2924] platform/x86: msi-wmi-platform: Workaround a ACPI firmware bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ACPI byte code inside the ACPI control method responsible for handling the WMI method calls uses a global buffer for constructing the return value, yet the ACPI control method itself is not marked as "Serialized". This means that calling WMI methods on this WMI device is not thread-safe, as concurrent WMI method calls will corrupt the global buffer. Fix this by serializing the WMI method calls using a mutex. Cc: stable@vger.kernel.org # 6.x.x: 912d614ac99e: platform/x86: msi-wmi-platform: Rename "data" variable Fixes: 9c0beb6b29e7 ("platform/x86: wmi: Add MSI WMI Platform driver") Tested-by: Antheas Kapenekakis Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20250414140453.7691-2-W_Armin@gmx.de Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- .../wmi/devices/msi-wmi-platform.rst | 4 + drivers/platform/x86/msi-wmi-platform.c | 91 ++++++++++++------- 2 files changed, 63 insertions(+), 32 deletions(-) diff --git a/Documentation/wmi/devices/msi-wmi-platform.rst b/Documentation/wmi/devices/msi-wmi-platform.rst index 31a13694289238..73197b31926a57 100644 --- a/Documentation/wmi/devices/msi-wmi-platform.rst +++ b/Documentation/wmi/devices/msi-wmi-platform.rst @@ -138,6 +138,10 @@ input data, the meaning of which depends on the subfeature being accessed. The output buffer contains a single byte which signals success or failure (``0x00`` on failure) and 31 bytes of output data, the meaning if which depends on the subfeature being accessed. +.. note:: + The ACPI control method responsible for handling the WMI method calls is not thread-safe. + This is a firmware bug that needs to be handled inside the driver itself. + WMI method Get_EC() ------------------- diff --git a/drivers/platform/x86/msi-wmi-platform.c b/drivers/platform/x86/msi-wmi-platform.c index e15681dfca8d7e..dc5e9878cb6822 100644 --- a/drivers/platform/x86/msi-wmi-platform.c +++ b/drivers/platform/x86/msi-wmi-platform.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -17,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -76,8 +78,13 @@ enum msi_wmi_platform_method { MSI_PLATFORM_GET_WMI = 0x1d, }; -struct msi_wmi_platform_debugfs_data { +struct msi_wmi_platform_data { struct wmi_device *wdev; + struct mutex wmi_lock; /* Necessary when calling WMI methods */ +}; + +struct msi_wmi_platform_debugfs_data { + struct msi_wmi_platform_data *data; enum msi_wmi_platform_method method; struct rw_semaphore buffer_lock; /* Protects debugfs buffer */ size_t length; @@ -132,8 +139,9 @@ static int msi_wmi_platform_parse_buffer(union acpi_object *obj, u8 *output, siz return 0; } -static int msi_wmi_platform_query(struct wmi_device *wdev, enum msi_wmi_platform_method method, - u8 *input, size_t input_length, u8 *output, size_t output_length) +static int msi_wmi_platform_query(struct msi_wmi_platform_data *data, + enum msi_wmi_platform_method method, u8 *input, + size_t input_length, u8 *output, size_t output_length) { struct acpi_buffer out = { ACPI_ALLOCATE_BUFFER, NULL }; struct acpi_buffer in = { @@ -147,9 +155,15 @@ static int msi_wmi_platform_query(struct wmi_device *wdev, enum msi_wmi_platform if (!input_length || !output_length) return -EINVAL; - status = wmidev_evaluate_method(wdev, 0x0, method, &in, &out); - if (ACPI_FAILURE(status)) - return -EIO; + /* + * The ACPI control method responsible for handling the WMI method calls + * is not thread-safe. Because of this we have to do the locking ourself. + */ + scoped_guard(mutex, &data->wmi_lock) { + status = wmidev_evaluate_method(data->wdev, 0x0, method, &in, &out); + if (ACPI_FAILURE(status)) + return -EIO; + } obj = out.pointer; if (!obj) @@ -170,13 +184,13 @@ static umode_t msi_wmi_platform_is_visible(const void *drvdata, enum hwmon_senso static int msi_wmi_platform_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long *val) { - struct wmi_device *wdev = dev_get_drvdata(dev); + struct msi_wmi_platform_data *data = dev_get_drvdata(dev); u8 input[32] = { 0 }; u8 output[32]; u16 value; int ret; - ret = msi_wmi_platform_query(wdev, MSI_PLATFORM_GET_FAN, input, sizeof(input), output, + ret = msi_wmi_platform_query(data, MSI_PLATFORM_GET_FAN, input, sizeof(input), output, sizeof(output)); if (ret < 0) return ret; @@ -231,7 +245,7 @@ static ssize_t msi_wmi_platform_write(struct file *fp, const char __user *input, return ret; down_write(&data->buffer_lock); - ret = msi_wmi_platform_query(data->wdev, data->method, payload, data->length, data->buffer, + ret = msi_wmi_platform_query(data->data, data->method, payload, data->length, data->buffer, data->length); up_write(&data->buffer_lock); @@ -277,17 +291,17 @@ static void msi_wmi_platform_debugfs_remove(void *data) debugfs_remove_recursive(dir); } -static void msi_wmi_platform_debugfs_add(struct wmi_device *wdev, struct dentry *dir, +static void msi_wmi_platform_debugfs_add(struct msi_wmi_platform_data *drvdata, struct dentry *dir, const char *name, enum msi_wmi_platform_method method) { struct msi_wmi_platform_debugfs_data *data; struct dentry *entry; - data = devm_kzalloc(&wdev->dev, sizeof(*data), GFP_KERNEL); + data = devm_kzalloc(&drvdata->wdev->dev, sizeof(*data), GFP_KERNEL); if (!data) return; - data->wdev = wdev; + data->data = drvdata; data->method = method; init_rwsem(&data->buffer_lock); @@ -298,82 +312,82 @@ static void msi_wmi_platform_debugfs_add(struct wmi_device *wdev, struct dentry entry = debugfs_create_file(name, 0600, dir, data, &msi_wmi_platform_debugfs_fops); if (IS_ERR(entry)) - devm_kfree(&wdev->dev, data); + devm_kfree(&drvdata->wdev->dev, data); } -static void msi_wmi_platform_debugfs_init(struct wmi_device *wdev) +static void msi_wmi_platform_debugfs_init(struct msi_wmi_platform_data *data) { struct dentry *dir; char dir_name[64]; int ret, method; - scnprintf(dir_name, ARRAY_SIZE(dir_name), "%s-%s", DRIVER_NAME, dev_name(&wdev->dev)); + scnprintf(dir_name, ARRAY_SIZE(dir_name), "%s-%s", DRIVER_NAME, dev_name(&data->wdev->dev)); dir = debugfs_create_dir(dir_name, NULL); if (IS_ERR(dir)) return; - ret = devm_add_action_or_reset(&wdev->dev, msi_wmi_platform_debugfs_remove, dir); + ret = devm_add_action_or_reset(&data->wdev->dev, msi_wmi_platform_debugfs_remove, dir); if (ret < 0) return; for (method = MSI_PLATFORM_GET_PACKAGE; method <= MSI_PLATFORM_GET_WMI; method++) - msi_wmi_platform_debugfs_add(wdev, dir, msi_wmi_platform_debugfs_names[method - 1], + msi_wmi_platform_debugfs_add(data, dir, msi_wmi_platform_debugfs_names[method - 1], method); } -static int msi_wmi_platform_hwmon_init(struct wmi_device *wdev) +static int msi_wmi_platform_hwmon_init(struct msi_wmi_platform_data *data) { struct device *hdev; - hdev = devm_hwmon_device_register_with_info(&wdev->dev, "msi_wmi_platform", wdev, + hdev = devm_hwmon_device_register_with_info(&data->wdev->dev, "msi_wmi_platform", data, &msi_wmi_platform_chip_info, NULL); return PTR_ERR_OR_ZERO(hdev); } -static int msi_wmi_platform_ec_init(struct wmi_device *wdev) +static int msi_wmi_platform_ec_init(struct msi_wmi_platform_data *data) { u8 input[32] = { 0 }; u8 output[32]; u8 flags; int ret; - ret = msi_wmi_platform_query(wdev, MSI_PLATFORM_GET_EC, input, sizeof(input), output, + ret = msi_wmi_platform_query(data, MSI_PLATFORM_GET_EC, input, sizeof(input), output, sizeof(output)); if (ret < 0) return ret; flags = output[MSI_PLATFORM_EC_FLAGS_OFFSET]; - dev_dbg(&wdev->dev, "EC RAM version %lu.%lu\n", + dev_dbg(&data->wdev->dev, "EC RAM version %lu.%lu\n", FIELD_GET(MSI_PLATFORM_EC_MAJOR_MASK, flags), FIELD_GET(MSI_PLATFORM_EC_MINOR_MASK, flags)); - dev_dbg(&wdev->dev, "EC firmware version %.28s\n", + dev_dbg(&data->wdev->dev, "EC firmware version %.28s\n", &output[MSI_PLATFORM_EC_VERSION_OFFSET]); if (!(flags & MSI_PLATFORM_EC_IS_TIGERLAKE)) { if (!force) return -ENODEV; - dev_warn(&wdev->dev, "Loading on a non-Tigerlake platform\n"); + dev_warn(&data->wdev->dev, "Loading on a non-Tigerlake platform\n"); } return 0; } -static int msi_wmi_platform_init(struct wmi_device *wdev) +static int msi_wmi_platform_init(struct msi_wmi_platform_data *data) { u8 input[32] = { 0 }; u8 output[32]; int ret; - ret = msi_wmi_platform_query(wdev, MSI_PLATFORM_GET_WMI, input, sizeof(input), output, + ret = msi_wmi_platform_query(data, MSI_PLATFORM_GET_WMI, input, sizeof(input), output, sizeof(output)); if (ret < 0) return ret; - dev_dbg(&wdev->dev, "WMI interface version %u.%u\n", + dev_dbg(&data->wdev->dev, "WMI interface version %u.%u\n", output[MSI_PLATFORM_WMI_MAJOR_OFFSET], output[MSI_PLATFORM_WMI_MINOR_OFFSET]); @@ -381,7 +395,8 @@ static int msi_wmi_platform_init(struct wmi_device *wdev) if (!force) return -ENODEV; - dev_warn(&wdev->dev, "Loading despite unsupported WMI interface version (%u.%u)\n", + dev_warn(&data->wdev->dev, + "Loading despite unsupported WMI interface version (%u.%u)\n", output[MSI_PLATFORM_WMI_MAJOR_OFFSET], output[MSI_PLATFORM_WMI_MINOR_OFFSET]); } @@ -391,19 +406,31 @@ static int msi_wmi_platform_init(struct wmi_device *wdev) static int msi_wmi_platform_probe(struct wmi_device *wdev, const void *context) { + struct msi_wmi_platform_data *data; int ret; - ret = msi_wmi_platform_init(wdev); + data = devm_kzalloc(&wdev->dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->wdev = wdev; + dev_set_drvdata(&wdev->dev, data); + + ret = devm_mutex_init(&wdev->dev, &data->wmi_lock); + if (ret < 0) + return ret; + + ret = msi_wmi_platform_init(data); if (ret < 0) return ret; - ret = msi_wmi_platform_ec_init(wdev); + ret = msi_wmi_platform_ec_init(data); if (ret < 0) return ret; - msi_wmi_platform_debugfs_init(wdev); + msi_wmi_platform_debugfs_init(data); - return msi_wmi_platform_hwmon_init(wdev); + return msi_wmi_platform_hwmon_init(data); } static const struct wmi_device_id msi_wmi_platform_id_table[] = { From 399eab7f92fb73ffe621294a2d6bec8fc9f3b36b Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Tue, 15 Apr 2025 09:30:15 +0200 Subject: [PATCH 0733/2924] ata: libata-sata: Save all fields from sense data descriptor When filling the taskfile result for a successful NCQ command, we use the SDB FIS from the FIS Receive Area, see e.g. ahci_qc_ncq_fill_rtf(). However, the SDB FIS only has fields STATUS and ERROR. For a successful NCQ command that has sense data, we will have a successful sense data descriptor, in the Sense Data for Successful NCQ Commands log. Since we have access to additional taskfile result fields, fill in these additional fields in qc->result_tf. This matches how for failing/aborted NCQ commands, we will use e.g. ahci_qc_fill_rtf() to fill in some fields, but then for the command that actually caused the NCQ error, we will use ata_eh_read_log_10h(), which provides additional fields, saving additional fields/overriding the qc->result_tf that was fetched using ahci_qc_fill_rtf(). Fixes: 18bd7718b5c4 ("scsi: ata: libata: Handle completion of CDL commands using policy 0xD") Signed-off-by: Niklas Cassel Reviewed-by: Igor Pylypiv Reviewed-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/libata-sata.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c index ba300cc0a3a327..2e4463d3a3561f 100644 --- a/drivers/ata/libata-sata.c +++ b/drivers/ata/libata-sata.c @@ -1510,6 +1510,8 @@ int ata_eh_get_ncq_success_sense(struct ata_link *link) unsigned int err_mask, tag; u8 *sense, sk = 0, asc = 0, ascq = 0; u64 sense_valid, val; + u16 extended_sense; + bool aux_icc_valid; int ret = 0; err_mask = ata_read_log_page(dev, ATA_LOG_SENSE_NCQ, 0, buf, 2); @@ -1529,6 +1531,8 @@ int ata_eh_get_ncq_success_sense(struct ata_link *link) sense_valid = (u64)buf[8] | ((u64)buf[9] << 8) | ((u64)buf[10] << 16) | ((u64)buf[11] << 24); + extended_sense = get_unaligned_le16(&buf[14]); + aux_icc_valid = extended_sense & BIT(15); ata_qc_for_each_raw(ap, qc, tag) { if (!(qc->flags & ATA_QCFLAG_EH) || @@ -1556,6 +1560,17 @@ int ata_eh_get_ncq_success_sense(struct ata_link *link) continue; } + qc->result_tf.nsect = sense[6]; + qc->result_tf.hob_nsect = sense[7]; + qc->result_tf.lbal = sense[8]; + qc->result_tf.lbam = sense[9]; + qc->result_tf.lbah = sense[10]; + qc->result_tf.hob_lbal = sense[11]; + qc->result_tf.hob_lbam = sense[12]; + qc->result_tf.hob_lbah = sense[13]; + if (aux_icc_valid) + qc->result_tf.auxiliary = get_unaligned_le32(&sense[16]); + /* Set sense without also setting scsicmd->result */ scsi_build_sense_buffer(dev->flags & ATA_DFLAG_D_SENSE, qc->scsicmd->sense_buffer, sk, From 1c406526bd84f1e0bd4bb4b50c6eeba0b135765a Mon Sep 17 00:00:00 2001 From: Zhang Xianwei Date: Sat, 15 Mar 2025 14:32:16 +0800 Subject: [PATCH 0734/2924] xfs: Fix spelling mistake "drity" -> "dirty" There is a spelling mistake in fs/xfs/xfs_log.c. Fix it. Signed-off-by: Zhang Xianwei Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 6493bdb5735108..980aabc495128e 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -2888,7 +2888,7 @@ xlog_force_and_check_iclog( * * 1. the current iclog is active and has no data; the previous iclog * is in the active or dirty state. - * 2. the current iclog is drity, and the previous iclog is in the + * 2. the current iclog is dirty, and the previous iclog is in the * active or dirty state. * * We may sleep if: From b7c178d9e57c8fd4238ff77263b877f6f16182ba Mon Sep 17 00:00:00 2001 From: Meir Elisha Date: Tue, 8 Apr 2025 17:38:08 +0300 Subject: [PATCH 0735/2924] md/raid1: Add check for missing source disk in process_checks() During recovery/check operations, the process_checks function loops through available disks to find a 'primary' source with successfully read data. If no suitable source disk is found after checking all possibilities, the 'primary' index will reach conf->raid_disks * 2. Add an explicit check for this condition after the loop. If no source disk was found, print an error message and return early to prevent further processing without a valid primary source. Link: https://lore.kernel.org/linux-raid/20250408143808.1026534-1-meir.elisha@volumez.com Signed-off-by: Meir Elisha Suggested-and-reviewed-by: Yu Kuai Signed-off-by: Yu Kuai --- drivers/md/raid1.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 0efc03cea24efe..de9bccbe7337b5 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2200,14 +2200,9 @@ static int fix_sync_read_error(struct r1bio *r1_bio) if (!rdev_set_badblocks(rdev, sect, s, 0)) abort = 1; } - if (abort) { - conf->recovery_disabled = - mddev->recovery_disabled; - set_bit(MD_RECOVERY_INTR, &mddev->recovery); - md_done_sync(mddev, r1_bio->sectors, 0); - put_buf(r1_bio); + if (abort) return 0; - } + /* Try next page */ sectors -= s; sect += s; @@ -2346,10 +2341,21 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio) int disks = conf->raid_disks * 2; struct bio *wbio; - if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) - /* ouch - failed to read all of that. */ - if (!fix_sync_read_error(r1_bio)) + if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) { + /* + * ouch - failed to read all of that. + * No need to fix read error for check/repair + * because all member disks are read. + */ + if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) || + !fix_sync_read_error(r1_bio)) { + conf->recovery_disabled = mddev->recovery_disabled; + set_bit(MD_RECOVERY_INTR, &mddev->recovery); + md_done_sync(mddev, r1_bio->sectors, 0); + put_buf(r1_bio); return; + } + } if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) process_checks(r1_bio); From c6f1401b1d5fb3b83bd16ba8a0f89a8b1c805993 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 14 Apr 2025 17:33:45 -0700 Subject: [PATCH 0736/2924] xfs: fix fsmap for internal zoned devices Filesystems with an internal zoned rt section use xfs_rtblock_t values that are relative to the start of the data device. When fsmap reports on internal rt sections, it reports the space used by the data section as "OWN_FS". Unfortunately, the logic for resuming a query isn't quite right, so xfs/273 fails because it stress-tests GETFSMAP with a single-record buffer. If we enter the "report fake space as OWN_FS" block with a nonzero key[0].fmr_length, we should add that to key[0].fmr_physical and recheck if we still need to emit the fake record. We should /not/ just return 0 from the whole function because that prevents all rmap record iteration. If we don't enter that block, the resumption is still wrong. keys[*].fmr_physical is a reflection of what we copied out to userspace on a previous query, which means that it already accounts for rgstart. It is not correct to add rtstart_daddr when computing start_rtb or end_rtb, so stop that. While we're at it, add a xfs_has_zoned to make it clear that this is a zoned filesystem thing. Fixes: e50ec7fac81aa2 ("xfs: enable fsmap reporting for internal RT devices") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_fsmap.c | 51 ++++++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index a4bc1642fe5615..414b27a8645886 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -876,6 +876,7 @@ xfs_getfsmap_rtdev_rmapbt( const struct xfs_fsmap *keys, struct xfs_getfsmap_info *info) { + struct xfs_fsmap key0 = *keys; /* struct copy */ struct xfs_mount *mp = tp->t_mountp; struct xfs_rtgroup *rtg = NULL; struct xfs_btree_cur *bt_cur = NULL; @@ -887,32 +888,46 @@ xfs_getfsmap_rtdev_rmapbt( int error = 0; eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rtstart + mp->m_sb.sb_rblocks); - if (keys[0].fmr_physical >= eofs) + if (key0.fmr_physical >= eofs) return 0; + /* + * On zoned filesystems with an internal rt volume, the volume comes + * immediately after the end of the data volume. However, the + * xfs_rtblock_t address space is relative to the start of the data + * device, which means that the first @rtstart fsblocks do not actually + * point anywhere. If a fsmap query comes in with the low key starting + * below @rtstart, report it as "owned by filesystem". + */ rtstart_daddr = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rtstart); - if (keys[0].fmr_physical < rtstart_daddr) { + if (xfs_has_zoned(mp) && key0.fmr_physical < rtstart_daddr) { struct xfs_fsmap_irec frec = { .owner = XFS_RMAP_OWN_FS, .len_daddr = rtstart_daddr, }; - /* Adjust the low key if we are continuing from where we left off. */ - if (keys[0].fmr_length > 0) { - info->low_daddr = keys[0].fmr_physical + keys[0].fmr_length; - return 0; + /* + * Adjust the start of the query range if we're picking up from + * a previous round, and only emit the record if we haven't + * already gone past. + */ + key0.fmr_physical += key0.fmr_length; + if (key0.fmr_physical < rtstart_daddr) { + error = xfs_getfsmap_helper(tp, info, &frec); + if (error) + return error; + + key0.fmr_physical = rtstart_daddr; } - /* Fabricate an rmap entry for space occupied by the data dev */ - error = xfs_getfsmap_helper(tp, info, &frec); - if (error) - return error; + /* Zero the other fields to avoid further adjustments. */ + key0.fmr_owner = 0; + key0.fmr_offset = 0; + key0.fmr_length = 0; } - start_rtb = xfs_daddr_to_rtb(mp, rtstart_daddr + keys[0].fmr_physical); - end_rtb = xfs_daddr_to_rtb(mp, rtstart_daddr + - min(eofs - 1, keys[1].fmr_physical)); - + start_rtb = xfs_daddr_to_rtb(mp, key0.fmr_physical); + end_rtb = xfs_daddr_to_rtb(mp, min(eofs - 1, keys[1].fmr_physical)); info->missing_owner = XFS_FMR_OWN_FREE; /* @@ -920,12 +935,12 @@ xfs_getfsmap_rtdev_rmapbt( * low to the fsmap low key and max out the high key to the end * of the rtgroup. */ - info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset); - error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]); + info->low.rm_offset = XFS_BB_TO_FSBT(mp, key0.fmr_offset); + error = xfs_fsmap_owner_to_rmap(&info->low, &key0); if (error) return error; - info->low.rm_blockcount = XFS_BB_TO_FSBT(mp, keys[0].fmr_length); - xfs_getfsmap_set_irec_flags(&info->low, &keys[0]); + info->low.rm_blockcount = XFS_BB_TO_FSBT(mp, key0.fmr_length); + xfs_getfsmap_set_irec_flags(&info->low, &key0); /* Adjust the low key if we are continuing from where we left off. */ if (info->low.rm_blockcount == 0) { From 1413708f990cb7d025affd706ba9c23e2bfc1a27 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 9 Apr 2025 14:24:46 -0700 Subject: [PATCH 0737/2924] riscv: Avoid fortify warning in syscall_get_arguments() When building with CONFIG_FORTIFY_SOURCE=y and W=1, there is a warning because of the memcpy() in syscall_get_arguments(): In file included from include/linux/string.h:392, from include/linux/bitmap.h:13, from include/linux/cpumask.h:12, from arch/riscv/include/asm/processor.h:55, from include/linux/sched.h:13, from kernel/ptrace.c:13: In function 'fortify_memcpy_chk', inlined from 'syscall_get_arguments.isra' at arch/riscv/include/asm/syscall.h:66:2: include/linux/fortify-string.h:580:25: error: call to '__read_overflow2_field' declared with attribute warning: detected read beyond size of field (2nd parameter); maybe use struct_group()? [-Werror=attribute-warning] 580 | __read_overflow2_field(q_size_field, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors The fortified memcpy() routine enforces that the source is not overread and the destination is not overwritten if the size of either field and the size of the copy are known at compile time. The memcpy() in syscall_get_arguments() intentionally overreads from a1 to a5 in 'struct pt_regs' but this is bigger than the size of a1. Normally, this could be solved by wrapping a1 through a5 with struct_group() but there was already a struct_group() applied to these members in commit bba547810c66 ("riscv: tracing: Fix __write_overflow_field in ftrace_partial_regs()"). Just avoid memcpy() altogether and write the copying of args from regs manually, which clears up the warning at the expense of three extra lines of code. Signed-off-by: Nathan Chancellor Reviewed-by: Alexandre Ghiti Reviewed-by: Dmitry V. Levin Link: https://lore.kernel.org/r/20250409-riscv-avoid-fortify-warning-syscall_get_arguments-v1-1-7853436d4755@kernel.org Signed-off-by: Alexandre Ghiti --- arch/riscv/include/asm/syscall.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h index 121fff429dce66..eceabf59ae482a 100644 --- a/arch/riscv/include/asm/syscall.h +++ b/arch/riscv/include/asm/syscall.h @@ -62,8 +62,11 @@ static inline void syscall_get_arguments(struct task_struct *task, unsigned long *args) { args[0] = regs->orig_a0; - args++; - memcpy(args, ®s->a1, 5 * sizeof(args[0])); + args[1] = regs->a1; + args[2] = regs->a2; + args[3] = regs->a3; + args[4] = regs->a4; + args[5] = regs->a5; } static inline int syscall_get_arch(struct task_struct *task) From b2accfe7ca5bc9f9af28e603b79bdd5ad8df5c0b Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Tue, 1 Apr 2025 06:12:18 +0530 Subject: [PATCH 0738/2924] powerpc/boot: Check for ld-option support Commit 579aee9fc594 ("powerpc: suppress some linker warnings in recent linker versions") enabled support to add linker option "--no-warn-rwx-segments", if the version is greater than 2.39. Similar build warning were reported recently from linker version 2.35.2. ld: warning: arch/powerpc/boot/zImage.epapr has a LOAD segment with RWX permissions ld: warning: arch/powerpc/boot/zImage.pseries has a LOAD segment with RWX permissions Fix the warning by checking for "--no-warn-rwx-segments" option support in linker to enable it, instead of checking for the version range. Fixes: 579aee9fc594 ("powerpc: suppress some linker warnings in recent linker versions") Reported-by: Venkat Rao Bagalkote Suggested-by: Christophe Leroy Tested-by: Venkat Rao Bagalkote Closes: https://lore.kernel.org/linuxppc-dev/61cf556c-4947-4bd6-af63-892fc0966dad@linux.ibm.com/ Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250401004218.24869-1-maddy@linux.ibm.com --- arch/powerpc/boot/wrapper | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 1db60fe13802db..267ca6d4d9b387 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -234,10 +234,8 @@ fi # suppress some warnings in recent ld versions nowarn="-z noexecstack" -if ! ld_is_lld; then - if [ "$LD_VERSION" -ge "$(echo 2.39 | ld_version)" ]; then - nowarn="$nowarn --no-warn-rwx-segments" - fi +if [ $(${CROSS}ld -v --no-warn-rwx-segments &>/dev/null; echo $?) -eq 0 ]; then + nowarn="$nowarn --no-warn-rwx-segments" fi platformo=$object/"$platform".o From 28e89cdac6482f3c980df3e2e245db7366269124 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 15 Apr 2025 11:33:41 +0100 Subject: [PATCH 0739/2924] irqchip/renesas-rzv2h: Prevent TINT spurious interrupt A spurious TINT interrupt is seen during boot on RZ/G3E SMARC EVK. A glitch in the edge detection circuit can cause a spurious interrupt. Clear the status flag after setting the ICU_TSSRk registers, which is recommended in the hardware manual as a countermeasure. Fixes: 0d7605e75ac2 ("irqchip: Add RZ/V2H(P) Interrupt Control Unit (ICU) driver") Signed-off-by: Biju Das Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org --- drivers/irqchip/irq-renesas-rzv2h.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/irqchip/irq-renesas-rzv2h.c b/drivers/irqchip/irq-renesas-rzv2h.c index 3d5b5fdf9bde82..0f0fd7d4dfdf2c 100644 --- a/drivers/irqchip/irq-renesas-rzv2h.c +++ b/drivers/irqchip/irq-renesas-rzv2h.c @@ -170,6 +170,14 @@ static void rzv2h_tint_irq_endisable(struct irq_data *d, bool enable) else tssr &= ~ICU_TSSR_TIEN(tssel_n, priv->info->field_width); writel_relaxed(tssr, priv->base + priv->info->t_offs + ICU_TSSR(k)); + + /* + * A glitch in the edge detection circuit can cause a spurious + * interrupt. Clear the status flag after setting the ICU_TSSRk + * registers, which is recommended by the hardware manual as a + * countermeasure. + */ + writel_relaxed(BIT(tint_nr), priv->base + priv->info->t_offs + ICU_TSCLR); } static void rzv2h_icu_irq_disable(struct irq_data *d) From bbc9462f0cb0c8917a4908e856731708f0cee910 Mon Sep 17 00:00:00 2001 From: Shyam Saini Date: Thu, 27 Feb 2025 10:49:27 -0800 Subject: [PATCH 0740/2924] kernel: param: rename locate_module_kobject The locate_module_kobject() function looks up an existing module_kobject for a given module name. If it cannot find the corresponding module_kobject, it creates one for the given name. This commit renames locate_module_kobject() to lookup_or_create_module_kobject() to better describe its operations. This doesn't change anything functionality wise. Suggested-by: Rasmus Villemoes Signed-off-by: Shyam Saini Link: https://lore.kernel.org/r/20250227184930.34163-2-shyamsaini@linux.microsoft.com Signed-off-by: Petr Pavlu --- kernel/params.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/params.c b/kernel/params.c index 2509f216c9f3cf..a2441ce059aedf 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -760,7 +760,7 @@ void destroy_params(const struct kernel_param *params, unsigned num) params[i].ops->free(params[i].arg); } -static struct module_kobject * __init locate_module_kobject(const char *name) +static struct module_kobject * __init lookup_or_create_module_kobject(const char *name) { struct module_kobject *mk; struct kobject *kobj; @@ -802,7 +802,7 @@ static void __init kernel_add_sysfs_param(const char *name, struct module_kobject *mk; int err; - mk = locate_module_kobject(name); + mk = lookup_or_create_module_kobject(name); if (!mk) return; @@ -873,7 +873,7 @@ static void __init version_sysfs_builtin(void) int err; for (vattr = __start___modver; vattr < __stop___modver; vattr++) { - mk = locate_module_kobject(vattr->module_name); + mk = lookup_or_create_module_kobject(vattr->module_name); if (mk) { err = sysfs_create_file(&mk->kobj, &vattr->mattr.attr); WARN_ON_ONCE(err); From 9b3ae50cb902322a2b5922b9fcf8132d9b4c2a24 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Wed, 16 Apr 2025 09:25:17 +0100 Subject: [PATCH 0741/2924] irqchip/irq-bcm2712-mip: Enable driver when ARCH_BCM2835 is enabled The BCM2712 MIP driver is required for Raspberry PI5, but it's not automatically enabled when ARCH_BCM2835 is enabled and depends on ARCH_BRCMSTB. ARCH_BCM2835 shares drivers with ARCH_BRCMSTB platforms, but Raspberry PI5 does not require the BRCMSTB specific drivers, which are selected via ARCH_BRCMSTB. Enable the interrupt controller for both ARCH_BRCMSTB and ARCH_BCM2835. [ tglx: Massage changelog ] Fixes: 32c6c054661a ("irqchip: Add Broadcom BCM2712 MSI-X interrupt controller") Signed-off-by: Peter Robinson Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250416082523.179507-1-pbrobinson@gmail.com --- drivers/irqchip/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index cec05e443083b8..08bb3b031f2309 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -114,8 +114,8 @@ config I8259 config BCM2712_MIP tristate "Broadcom BCM2712 MSI-X Interrupt Peripheral support" - depends on ARCH_BRCMSTB || COMPILE_TEST - default m if ARCH_BRCMSTB + depends on ARCH_BRCMSTB || ARCH_BCM2835 || COMPILE_TEST + default m if ARCH_BRCMSTB || ARCH_BCM2835 depends on ARM_GIC select GENERIC_IRQ_CHIP select IRQ_DOMAIN_HIERARCHY From 1c7777feb0e2f5925908c489513656ebb443a699 Mon Sep 17 00:00:00 2001 From: Shyam Saini Date: Thu, 27 Feb 2025 10:49:28 -0800 Subject: [PATCH 0742/2924] kernel: refactor lookup_or_create_module_kobject() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the unlikely event of the allocation failing, it is better to let the machine boot with a not fully populated sysfs than to kill it with this BUG_ON(). All callers are already prepared for lookup_or_create_module_kobject() returning NULL. This is also preparation for calling this function from non __init code, where using BUG_ON for allocation failure handling is not acceptable. Since we are here, also start using IS_ENABLED instead of #ifdef construct. Suggested-by: Thomas Weißschuh Suggested-by: Rasmus Villemoes Signed-off-by: Shyam Saini Link: https://lore.kernel.org/r/20250227184930.34163-3-shyamsaini@linux.microsoft.com Signed-off-by: Petr Pavlu --- kernel/params.c | 41 +++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/kernel/params.c b/kernel/params.c index a2441ce059aedf..787662663e3489 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -767,31 +767,28 @@ static struct module_kobject * __init lookup_or_create_module_kobject(const char int err; kobj = kset_find_obj(module_kset, name); - if (kobj) { - mk = to_module_kobject(kobj); - } else { - mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); - BUG_ON(!mk); - - mk->mod = THIS_MODULE; - mk->kobj.kset = module_kset; - err = kobject_init_and_add(&mk->kobj, &module_ktype, NULL, - "%s", name); -#ifdef CONFIG_MODULES - if (!err) - err = sysfs_create_file(&mk->kobj, &module_uevent.attr); -#endif - if (err) { - kobject_put(&mk->kobj); - pr_crit("Adding module '%s' to sysfs failed (%d), the system may be unstable.\n", - name, err); - return NULL; - } + if (kobj) + return to_module_kobject(kobj); - /* So that we hold reference in both cases. */ - kobject_get(&mk->kobj); + mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); + if (!mk) + return NULL; + + mk->mod = THIS_MODULE; + mk->kobj.kset = module_kset; + err = kobject_init_and_add(&mk->kobj, &module_ktype, NULL, "%s", name); + if (IS_ENABLED(CONFIG_MODULES) && !err) + err = sysfs_create_file(&mk->kobj, &module_uevent.attr); + if (err) { + kobject_put(&mk->kobj); + pr_crit("Adding module '%s' to sysfs failed (%d), the system may be unstable.\n", + name, err); + return NULL; } + /* So that we hold reference in both cases. */ + kobject_get(&mk->kobj); + return mk; } From 7c76c813cfc42a7376378a0c4b7250db2eebab81 Mon Sep 17 00:00:00 2001 From: Shyam Saini Date: Thu, 27 Feb 2025 10:49:29 -0800 Subject: [PATCH 0743/2924] kernel: globalize lookup_or_create_module_kobject() lookup_or_create_module_kobject() is marked as static and __init, to make it global drop static keyword. Since this function can be called from non-init code, use __modinit instead of __init, __modinit marker will make it __init if CONFIG_MODULES is not defined. Suggested-by: Rasmus Villemoes Signed-off-by: Shyam Saini Link: https://lore.kernel.org/r/20250227184930.34163-4-shyamsaini@linux.microsoft.com Signed-off-by: Petr Pavlu --- include/linux/module.h | 2 ++ kernel/params.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/module.h b/include/linux/module.h index d94b196d5a34e1..b3329110d6686f 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -162,6 +162,8 @@ extern void cleanup_module(void); #define __INITRODATA_OR_MODULE __INITRODATA #endif /*CONFIG_MODULES*/ +struct module_kobject *lookup_or_create_module_kobject(const char *name); + /* Generic info of form tag = "info" */ #define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info) diff --git a/kernel/params.c b/kernel/params.c index 787662663e3489..e668fc90b83ec5 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -760,7 +760,7 @@ void destroy_params(const struct kernel_param *params, unsigned num) params[i].ops->free(params[i].arg); } -static struct module_kobject * __init lookup_or_create_module_kobject(const char *name) +struct module_kobject __modinit * lookup_or_create_module_kobject(const char *name) { struct module_kobject *mk; struct kobject *kobj; From 68715cb5c0e00284d93f976c6368809f64131b0b Mon Sep 17 00:00:00 2001 From: Chenyuan Yang Date: Tue, 15 Apr 2025 14:41:34 -0500 Subject: [PATCH 0744/2924] ASoC: Intel: sof_sdw: Add NULL check in asoc_sdw_rt_dmic_rtd_init() mic_name returned by devm_kasprintf() could be NULL. Add a check for it. Signed-off-by: Chenyuan Yang Fixes: bee2fe44679f ("ASoC: Intel: sof_sdw: use generic rtd_init function for Realtek SDW DMICs") Link: https://patch.msgid.link/20250415194134.292830-1-chenyuan0y@gmail.com Signed-off-by: Mark Brown --- sound/soc/sdw_utils/soc_sdw_rt_dmic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/sdw_utils/soc_sdw_rt_dmic.c b/sound/soc/sdw_utils/soc_sdw_rt_dmic.c index 46d917a99c51da..97be110a59b63a 100644 --- a/sound/soc/sdw_utils/soc_sdw_rt_dmic.c +++ b/sound/soc/sdw_utils/soc_sdw_rt_dmic.c @@ -29,6 +29,8 @@ int asoc_sdw_rt_dmic_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_da mic_name = devm_kasprintf(card->dev, GFP_KERNEL, "rt715-sdca"); else mic_name = devm_kasprintf(card->dev, GFP_KERNEL, "%s", component->name_prefix); + if (!mic_name) + return -ENOMEM; card->components = devm_kasprintf(card->dev, GFP_KERNEL, "%s mic:%s", card->components, From f95bbfe18512c5c018720468959edac056a17196 Mon Sep 17 00:00:00 2001 From: Shyam Saini Date: Thu, 27 Feb 2025 10:49:30 -0800 Subject: [PATCH 0745/2924] drivers: base: handle module_kobject creation module_add_driver() relies on module_kset list for /sys/module//drivers directory creation. Since, commit 96a1a2412acba ("kernel/params.c: defer most of param_sysfs_init() to late_initcall time") drivers which are initialized from subsys_initcall() or any other higher precedence initcall couldn't find the related kobject entry in the module_kset list because module_kset is not fully populated by the time module_add_driver() refers it. As a consequence, module_add_driver() returns early without calling make_driver_name(). Therefore, /sys/module//drivers is never created. Fix this issue by letting module_add_driver() handle module_kobject creation itself. Fixes: 96a1a2412acb ("kernel/params.c: defer most of param_sysfs_init() to late_initcall time") Cc: stable@vger.kernel.org # requires all other patches from the series Suggested-by: Rasmus Villemoes Signed-off-by: Shyam Saini Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250227184930.34163-5-shyamsaini@linux.microsoft.com Signed-off-by: Petr Pavlu --- drivers/base/module.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/base/module.c b/drivers/base/module.c index 5bc71bea883a06..218aaa0964552f 100644 --- a/drivers/base/module.c +++ b/drivers/base/module.c @@ -42,16 +42,13 @@ int module_add_driver(struct module *mod, const struct device_driver *drv) if (mod) mk = &mod->mkobj; else if (drv->mod_name) { - struct kobject *mkobj; - - /* Lookup built-in module entry in /sys/modules */ - mkobj = kset_find_obj(module_kset, drv->mod_name); - if (mkobj) { - mk = container_of(mkobj, struct module_kobject, kobj); + /* Lookup or create built-in module entry in /sys/modules */ + mk = lookup_or_create_module_kobject(drv->mod_name); + if (mk) { /* remember our module structure */ drv->p->mkobj = mk; - /* kset_find_obj took a reference */ - kobject_put(mkobj); + /* lookup_or_create_module_kobject took a reference */ + kobject_put(&mk->kobj); } } From 3af4bec9c1db3f003be4d5ae09b6a737e4be1612 Mon Sep 17 00:00:00 2001 From: WangYuli Date: Fri, 11 Apr 2025 15:32:21 +0800 Subject: [PATCH 0746/2924] riscv: KGDB: Do not inline arch_kgdb_breakpoint() The arch_kgdb_breakpoint() function defines the kgdb_compiled_break symbol using inline assembly. There's a potential issue where the compiler might inline arch_kgdb_breakpoint(), which would then define the kgdb_compiled_break symbol multiple times, leading to fail to link vmlinux.o. This isn't merely a potential compilation problem. The intent here is to determine the global symbol address of kgdb_compiled_break, and if this function is inlined multiple times, it would logically be a grave error. Link: https://lore.kernel.org/all/4b4187c1-77e5-44b7-885f-d6826723dd9a@sifive.com/ Link: https://lore.kernel.org/all/5b0adf9b-2b22-43fe-ab74-68df94115b9a@ghiti.fr/ Link: https://lore.kernel.org/all/23693e7f-4fff-40f3-a437-e06d827278a5@ghiti.fr/ Fixes: fe89bd2be866 ("riscv: Add KGDB support") Co-developed-by: Huacai Chen Signed-off-by: Huacai Chen Signed-off-by: WangYuli Link: https://lore.kernel.org/r/F22359AFB6FF9FD8+20250411073222.56820-1-wangyuli@uniontech.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/kgdb.h | 9 +-------- arch/riscv/kernel/kgdb.c | 8 ++++++++ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/riscv/include/asm/kgdb.h b/arch/riscv/include/asm/kgdb.h index 46677daf708bd0..cc11c4544cffd1 100644 --- a/arch/riscv/include/asm/kgdb.h +++ b/arch/riscv/include/asm/kgdb.h @@ -19,16 +19,9 @@ #ifndef __ASSEMBLY__ +void arch_kgdb_breakpoint(void); extern unsigned long kgdb_compiled_break; -static inline void arch_kgdb_breakpoint(void) -{ - asm(".global kgdb_compiled_break\n" - ".option norvc\n" - "kgdb_compiled_break: ebreak\n" - ".option rvc\n"); -} - #endif /* !__ASSEMBLY__ */ #define DBG_REG_ZERO "zero" diff --git a/arch/riscv/kernel/kgdb.c b/arch/riscv/kernel/kgdb.c index 2e0266ae6bd728..5d1ce8dacaf58f 100644 --- a/arch/riscv/kernel/kgdb.c +++ b/arch/riscv/kernel/kgdb.c @@ -254,6 +254,14 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) regs->epc = pc; } +noinline void arch_kgdb_breakpoint(void) +{ + asm(".global kgdb_compiled_break\n" + ".option norvc\n" + "kgdb_compiled_break: ebreak\n" + ".option rvc\n"); +} + void kgdb_arch_handle_qxfer_pkt(char *remcom_in_buffer, char *remcom_out_buffer) { From 550c2aa787d1b06efcb11de1877354502a1237f2 Mon Sep 17 00:00:00 2001 From: WangYuli Date: Fri, 11 Apr 2025 15:32:22 +0800 Subject: [PATCH 0747/2924] riscv: KGDB: Remove ".option norvc/.option rvc" for kgdb_compiled_break [ Quoting Samuel Holland: ] This is a separate issue, but using ".option rvc" here is a bug. It will unconditionally enable the C extension for the rest of the file, even if the kernel is being built with CONFIG_RISCV_ISA_C=n. [ Quoting Palmer Dabbelt: ] We're just looking at the address of kgdb_compiled_break, so it's fine if it ends up as a c.ebreak. [ Quoting Alexandre Ghiti: ] .option norvc is used to prevent the assembler from using compressed instructions, but it's generally used when we need to ensure the size of the instructions that are used, which is not the case here as noted by Palmer since we only care about the address. So yes it will work fine with C enabled :) So let's just remove them all. Link: https://lore.kernel.org/all/4b4187c1-77e5-44b7-885f-d6826723dd9a@sifive.com/ Link: https://lore.kernel.org/all/mhng-69513841-5068-441d-be8f-2aeebdc56a08@palmer-ri-x1c9a/ Link: https://lore.kernel.org/all/23693e7f-4fff-40f3-a437-e06d827278a5@ghiti.fr/ Fixes: fe89bd2be866 ("riscv: Add KGDB support") Cc: Samuel Holland Cc: Palmer Dabbelt Cc: Alexandre Ghiti Signed-off-by: WangYuli Link: https://lore.kernel.org/r/8B431C6A4626225C+20250411073222.56820-2-wangyuli@uniontech.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/kgdb.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/riscv/kernel/kgdb.c b/arch/riscv/kernel/kgdb.c index 5d1ce8dacaf58f..9f3db3503dabd6 100644 --- a/arch/riscv/kernel/kgdb.c +++ b/arch/riscv/kernel/kgdb.c @@ -257,9 +257,7 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) noinline void arch_kgdb_breakpoint(void) { asm(".global kgdb_compiled_break\n" - ".option norvc\n" - "kgdb_compiled_break: ebreak\n" - ".option rvc\n"); + "kgdb_compiled_break: ebreak\n"); } void kgdb_arch_handle_qxfer_pkt(char *remcom_in_buffer, From 75caec0c2aa3a7ec84348d438c74cb8a2eb4de97 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 31 Mar 2025 10:16:46 +0300 Subject: [PATCH 0748/2924] i2c: atr: Fix wrong include The fwnode.h is not supposed to be used by the drivers as it has the definitions for the core parts for different device property provider implementations. Drop it. Note, that fwnode API for drivers is provided in property.h which is included here. Fixes: a076a860acae ("media: i2c: add I2C Address Translator (ATR) support") Signed-off-by: Andy Shevchenko Acked-by: Mukesh Kumar Savaliya Reviewed-by: Luca Ceresoli Reviewed-by: Tomi Valkeinen [wsa: reworded subject] Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-atr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-atr.c b/drivers/i2c/i2c-atr.c index 8fe9ddff8e96f6..783fb8df2ebee9 100644 --- a/drivers/i2c/i2c-atr.c +++ b/drivers/i2c/i2c-atr.c @@ -8,12 +8,12 @@ * Originally based on i2c-mux.c */ -#include #include #include #include #include #include +#include #include #include From 07be53cfa81afe94b14fb4bfee8243f2e0125d5e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 14 Apr 2025 21:09:00 -0400 Subject: [PATCH 0749/2924] selftests/ftrace: Differentiate bash and dash in dynevent_limitations.tc bash and dash evaluate variables differently. dash will evaluate '\\' every time it is read whereas bash does not. TEST_STRING="$TEST_STRING \\$i" echo $TEST_STRING With i=123 On bash, that will print "\123" but on dash, that will print the escape sequence of \123 as the \ will be interpreted again in the echo. The dynevent_limitations.tc test created a very large list of arguments to test the maximum number of arguments to pass to the dynamic events file. It had a loop of: TEST_STRING=$1 # Acceptable for i in `seq 1 $MAX_ARGS`; do TEST_STRING="$TEST_STRING \\$i" done echo "$TEST_STRING" >> dynamic_events This worked fine on bash, but when run on dash it failed. This was due to dash interpreting the "\\$i" twice. Once when it was assigned to TEST_STRING and a second time with the echo $TEST_STRING. bash does not process the backslash more than the first time. To solve this, assign a double backslash to a variable "bs" and then echo it to "ts". If "ts" changes, it is dash, if not, it is bash. Then update "bs" accordingly, and use that to assign TEST_STRING. Now this could possibly just check if "$BASH" is defined or not, but this is testing if the issue exists and not just which shell is being used. Link: https://lore.kernel.org/r/20250414210900.4de5e8b9@gandalf.local.home Fixes: 581a7b26ab364 ("selftests/ftrace: Add dynamic events argument limitation test case") Reported-by: Mark Brown Closes: https://lore.kernel.org/all/350786cc-9e40-4396-ab95-4f10d69122fb@sirena.org.uk/ Signed-off-by: Steven Rostedt (Google) Acked-by: Masami Hiramatsu (Google) Signed-off-by: Shuah Khan --- .../test.d/dynevent/dynevent_limitations.tc | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc b/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc index 6b94b678741a4c..f656bccb1a1490 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc @@ -7,11 +7,32 @@ MAX_ARGS=128 EXCEED_ARGS=$((MAX_ARGS + 1)) +# bash and dash evaluate variables differently. +# dash will evaluate '\\' every time it is read whereas bash does not. +# +# TEST_STRING="$TEST_STRING \\$i" +# echo $TEST_STRING +# +# With i=123 +# On bash, that will print "\123" +# but on dash, that will print the escape sequence of \123 as the \ will +# be interpreted again in the echo. +# +# Set a variable "bs" to save a double backslash, then echo that +# to "ts" to see if $ts changed or not. If it changed, it's dash, +# if not, it's bash, and then bs can equal a single backslash. +bs='\\' +ts=`echo $bs` +if [ "$ts" = '\\' ]; then + # this is bash + bs='\' +fi + check_max_args() { # event_header TEST_STRING=$1 # Acceptable for i in `seq 1 $MAX_ARGS`; do - TEST_STRING="$TEST_STRING \\$i" + TEST_STRING="$TEST_STRING $bs$i" done echo "$TEST_STRING" >> dynamic_events echo > dynamic_events From 8772cc49e0b8ab782e475ce5ef659eedab601a09 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Wed, 16 Apr 2025 20:37:56 +0200 Subject: [PATCH 0750/2924] batman-adv: fix duplicate MAC address check batadv_check_known_mac_addr() is both too lenient and too strict: - It is called from batadv_hardif_add_interface(), which means that it checked interfaces that are not used for batman-adv at all. Move it to batadv_hardif_enable_interface(). Also, restrict it to hardifs of the same mesh interface; different mesh interfaces should not interact at all. The batadv_check_known_mac_addr() argument is changed from `struct net_device` to `struct batadv_hard_iface` to achieve this. - The check only cares about hardifs in BATADV_IF_ACTIVE and BATADV_IF_TO_BE_ACTIVATED states, but interfaces in BATADV_IF_INACTIVE state should be checked as well, or the following steps will not result in a warning then they should: - Add two interfaces in down state with different MAC addresses to a mesh as hardifs - Change the MAC addresses so they conflict - Set interfaces to up state Now there will be two active hardifs with the same MAC address, but no warning. Fix by only ignoring hardifs in BATADV_IF_NOT_IN_USE state. The RCU lock can be dropped, as we're holding RTNL anyways when the function is called. Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Signed-off-by: Matthias Schiffer Signed-off-by: Simon Wunderlich --- net/batman-adv/hard-interface.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index f145f966265310..d099434d3dfabd 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -506,28 +506,32 @@ batadv_hardif_is_iface_up(const struct batadv_hard_iface *hard_iface) return false; } -static void batadv_check_known_mac_addr(const struct net_device *net_dev) +static void batadv_check_known_mac_addr(const struct batadv_hard_iface *hard_iface) { - const struct batadv_hard_iface *hard_iface; + const struct net_device *mesh_iface = hard_iface->mesh_iface; + const struct batadv_hard_iface *tmp_hard_iface; - rcu_read_lock(); - list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { - if (hard_iface->if_status != BATADV_IF_ACTIVE && - hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED) + if (!mesh_iface) + return; + + list_for_each_entry(tmp_hard_iface, &batadv_hardif_list, list) { + if (tmp_hard_iface == hard_iface) + continue; + + if (tmp_hard_iface->mesh_iface != mesh_iface) continue; - if (hard_iface->net_dev == net_dev) + if (tmp_hard_iface->if_status == BATADV_IF_NOT_IN_USE) continue; - if (!batadv_compare_eth(hard_iface->net_dev->dev_addr, - net_dev->dev_addr)) + if (!batadv_compare_eth(tmp_hard_iface->net_dev->dev_addr, + hard_iface->net_dev->dev_addr)) continue; pr_warn("The newly added mac address (%pM) already exists on: %s\n", - net_dev->dev_addr, hard_iface->net_dev->name); + hard_iface->net_dev->dev_addr, tmp_hard_iface->net_dev->name); pr_warn("It is strongly recommended to keep mac addresses unique to avoid problems!\n"); } - rcu_read_unlock(); } /** @@ -764,6 +768,8 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, hard_iface->net_dev->name, hardif_mtu, required_mtu); + batadv_check_known_mac_addr(hard_iface); + if (batadv_hardif_is_iface_up(hard_iface)) batadv_hardif_activate_interface(hard_iface); else @@ -902,7 +908,6 @@ batadv_hardif_add_interface(struct net_device *net_dev) batadv_v_hardif_init(hard_iface); - batadv_check_known_mac_addr(hard_iface->net_dev); kref_get(&hard_iface->refcount); list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list); batadv_hardif_generation++; @@ -989,7 +994,7 @@ static int batadv_hard_if_event(struct notifier_block *this, if (hard_iface->if_status == BATADV_IF_NOT_IN_USE) goto hardif_put; - batadv_check_known_mac_addr(hard_iface->net_dev); + batadv_check_known_mac_addr(hard_iface); bat_priv = netdev_priv(hard_iface->mesh_iface); bat_priv->algo_ops->iface.update_mac(hard_iface); From 447fab30955cf7dba7dd563f42b67c02284860c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 28 Mar 2025 18:58:17 +0100 Subject: [PATCH 0751/2924] drm/amdgpu: use a dummy owner for sysfs triggered cleaner shaders v4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise triggering sysfs multiple times without other submissions in between only runs the shader once. v2: add some comment v3: re-add missing cast v4: squash in semicolon fix Signed-off-by: Christian König Reviewed-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher (cherry picked from commit 8b2ae7d492675e8af8902f103364bef59382b935) --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 72af5e5a894a29..cf2df7790077d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1438,9 +1438,11 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; struct drm_gpu_scheduler *sched = &ring->sched; struct drm_sched_entity entity; + static atomic_t counter; struct dma_fence *f; struct amdgpu_job *job; struct amdgpu_ib *ib; + void *owner; int i, r; /* Initialize the scheduler entity */ @@ -1451,9 +1453,15 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) goto err; } - r = amdgpu_job_alloc_with_ib(ring->adev, &entity, NULL, - 64, 0, - &job); + /* + * Use some unique dummy value as the owner to make sure we execute + * the cleaner shader on each submission. The value just need to change + * for each submission and is otherwise meaningless. + */ + owner = (void *)(unsigned long)atomic_inc_return(&counter); + + r = amdgpu_job_alloc_with_ib(ring->adev, &entity, owner, + 64, 0, &job); if (r) goto err; From 1657793def101dac7c9d3b2250391f6a3dd934ba Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 8 Apr 2025 13:09:57 -0500 Subject: [PATCH 0752/2924] drm/amd: Forbid suspending into non-default suspend states On systems that default to 'deep' some userspace software likes to try to suspend in 'deep' first. If there is a failure for any reason (such as -ENOMEM) the failure is ignored and then it will try to use 's2idle' as a fallback. This fails, but more importantly it leads to graphical problems. Forbid this behavior and only allow suspending in the last state supported by the system. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4093 Acked-by: Alex Deucher Link: https://lore.kernel.org/r/20250408180957.4027643-1-superm1@kernel.org Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit 2aabd44aa8a3c08da3d43264c168370f6da5e81d) --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 14 +++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2c04ae13384845..ef6e78224fdfa7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1123,6 +1123,7 @@ struct amdgpu_device { bool in_s3; bool in_s4; bool in_s0ix; + suspend_state_t last_suspend_state; enum pp_mp1_state mp1_state; struct amdgpu_doorbell_index doorbell_index; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 26bf896f1444ed..24ee4710f807f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2548,8 +2548,20 @@ static int amdgpu_pmops_suspend(struct device *dev) adev->in_s0ix = true; else if (amdgpu_acpi_is_s3_active(adev)) adev->in_s3 = true; - if (!adev->in_s0ix && !adev->in_s3) + if (!adev->in_s0ix && !adev->in_s3) { + /* don't allow going deep first time followed by s2idle the next time */ + if (adev->last_suspend_state != PM_SUSPEND_ON && + adev->last_suspend_state != pm_suspend_target_state) { + drm_err_once(drm_dev, "Unsupported suspend state %d\n", + pm_suspend_target_state); + return -EINVAL; + } return 0; + } + + /* cache the state last used for suspend */ + adev->last_suspend_state = pm_suspend_target_state; + return amdgpu_device_suspend(drm_dev, true); } From e7afa85a0d0eba5bf2c0a446ff622ebdbc9812d6 Mon Sep 17 00:00:00 2001 From: ZhenGuo Yin Date: Tue, 8 Apr 2025 16:18:28 +0800 Subject: [PATCH 0753/2924] drm/amdgpu: fix warning of drm_mm_clean MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kernel doorbell BOs needs to be freed before ttm_fini. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4145 Fixes: 54c30d2a8def ("drm/amdgpu: create kernel doorbell pages") Acked-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: ZhenGuo Yin Signed-off-by: Alex Deucher (cherry picked from commit 39938a8ed979e398faa3791a47e282c82bcc6f04) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b34b915203f250..7f354cd532dc14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3510,6 +3510,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) amdgpu_device_mem_scratch_fini(adev); amdgpu_ib_pool_fini(adev); amdgpu_seq64_fini(adev); + amdgpu_doorbell_fini(adev); } if (adev->ip_blocks[i].version->funcs->sw_fini) { r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]); @@ -4858,7 +4859,6 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) iounmap(adev->rmmio); adev->rmmio = NULL; - amdgpu_doorbell_fini(adev); drm_dev_exit(idx); } From 2036be31741b00f030530381643a8b35a5a42b5c Mon Sep 17 00:00:00 2001 From: David Rosca Date: Mon, 7 Apr 2025 13:12:11 +0200 Subject: [PATCH 0754/2924] drm/amdgpu: Add back JPEG to video caps for carrizo and newer JPEG is not supported on Vega only. Fixes: 0a6e7b06bdbe ("drm/amdgpu: Remove JPEG from vega and carrizo video caps") Signed-off-by: David Rosca Reviewed-by: Leo Liu Signed-off-by: Alex Deucher (cherry picked from commit 0f4dfe86fe922c37bcec99dce80a15b4d5d4726d) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/vi.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 86d8bc10d90ab2..9b3510e5311275 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -239,6 +239,13 @@ static const struct amdgpu_video_codec_info cz_video_codecs_decode_array[] = .max_pixels_per_frame = 4096 * 4096, .max_level = 186, }, + { + .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, + .max_width = 4096, + .max_height = 4096, + .max_pixels_per_frame = 4096 * 4096, + .max_level = 0, + }, }; static const struct amdgpu_video_codecs cz_video_codecs_decode = From cd9e6d6fdd2de60bfb4672387c17d4ee7157cf8e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 8 Apr 2025 21:27:15 -0400 Subject: [PATCH 0755/2924] drm/amd/display/dml2: use vzalloc rather than kzalloc The structures are large and they do not require contiguous memory so use vzalloc. Fixes: 70839da63605 ("drm/amd/display: Add new DCN401 sources") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4126 Cc: Aurabindo Pillai Reviewed-by: Aurabindo Pillai Signed-off-by: Alex Deucher (cherry picked from commit 20c50a9a793300a1fc82f3ddd0e3c68f8213fbef) Cc: stable@vger.kernel.org --- .../gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c | 11 ++++++----- drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c | 6 ++++-- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c index 94e99e540691cf..5d16f36ec95c8b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c @@ -2,6 +2,7 @@ // // Copyright 2024 Advanced Micro Devices, Inc. +#include #include "dml2_internal_types.h" #include "dml_top.h" @@ -13,11 +14,11 @@ static bool dml21_allocate_memory(struct dml2_context **dml_ctx) { - *dml_ctx = kzalloc(sizeof(struct dml2_context), GFP_KERNEL); + *dml_ctx = vzalloc(sizeof(struct dml2_context)); if (!(*dml_ctx)) return false; - (*dml_ctx)->v21.dml_init.dml2_instance = kzalloc(sizeof(struct dml2_instance), GFP_KERNEL); + (*dml_ctx)->v21.dml_init.dml2_instance = vzalloc(sizeof(struct dml2_instance)); if (!((*dml_ctx)->v21.dml_init.dml2_instance)) return false; @@ -27,7 +28,7 @@ static bool dml21_allocate_memory(struct dml2_context **dml_ctx) (*dml_ctx)->v21.mode_support.display_config = &(*dml_ctx)->v21.display_config; (*dml_ctx)->v21.mode_programming.display_config = (*dml_ctx)->v21.mode_support.display_config; - (*dml_ctx)->v21.mode_programming.programming = kzalloc(sizeof(struct dml2_display_cfg_programming), GFP_KERNEL); + (*dml_ctx)->v21.mode_programming.programming = vzalloc(sizeof(struct dml2_display_cfg_programming)); if (!((*dml_ctx)->v21.mode_programming.programming)) return false; @@ -115,8 +116,8 @@ bool dml21_create(const struct dc *in_dc, struct dml2_context **dml_ctx, const s void dml21_destroy(struct dml2_context *dml2) { - kfree(dml2->v21.dml_init.dml2_instance); - kfree(dml2->v21.mode_programming.programming); + vfree(dml2->v21.dml_init.dml2_instance); + vfree(dml2->v21.mode_programming.programming); } static void dml21_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *context, struct resource_context *out_new_hw_state, diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index f549a778f6f1a9..e89571874185ee 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -24,6 +24,8 @@ * */ +#include + #include "display_mode_core.h" #include "dml2_internal_types.h" #include "dml2_utils.h" @@ -747,7 +749,7 @@ bool dml2_validate(const struct dc *in_dc, struct dc_state *context, struct dml2 static inline struct dml2_context *dml2_allocate_memory(void) { - return (struct dml2_context *) kzalloc(sizeof(struct dml2_context), GFP_KERNEL); + return (struct dml2_context *) vzalloc(sizeof(struct dml2_context)); } static void dml2_init(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) @@ -821,7 +823,7 @@ void dml2_destroy(struct dml2_context *dml2) if (dml2->architecture == dml2_architecture_21) dml21_destroy(dml2); - kfree(dml2); + vfree(dml2); } void dml2_extract_dram_and_fclk_change_support(struct dml2_context *dml2, From c235a7132258ac30bd43d228222986022d21f5de Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 11 Apr 2025 17:40:26 +0530 Subject: [PATCH 0756/2924] drm/amdgpu: Use the right function for hdp flush There are a few prechecks made before HDP flush like a flush is not required on APU bare metal. Using hdp callback directly bypasses those checks. Use amdgpu_device_flush_hdp which takes care of prechecks. Signed-off-by: Lijo Lazar Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 1d9bff4cf8c53d33ee2ff1b11574e5da739ce61c) --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 12 ++++++------ drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/psp_v14_0.c | 2 +- 10 files changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index a63ce747863f10..23e6a05359c24b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -6114,7 +6114,7 @@ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); @@ -6192,7 +6192,7 @@ static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0); @@ -6269,7 +6269,7 @@ static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); @@ -6644,7 +6644,7 @@ static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index d57db42f953600..2a5c2a1ae3c74f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -2428,7 +2428,7 @@ static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); @@ -2472,7 +2472,7 @@ static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); @@ -2517,7 +2517,7 @@ static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); @@ -3153,7 +3153,7 @@ static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev) amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); @@ -3371,7 +3371,7 @@ static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); @@ -4541,7 +4541,7 @@ static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev) if (r) return r; - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index e7b58e47029271..62a257a4a3e9b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -2324,7 +2324,7 @@ static int gfx_v12_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev) amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); @@ -2468,7 +2468,7 @@ static int gfx_v12_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); @@ -3426,7 +3426,7 @@ static int gfx_v12_0_gfxhub_enable(struct amdgpu_device *adev) if (r) return r; - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 95d894a231fcfc..809b3a882d0d72 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -268,7 +268,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; /* flush hdp cache */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal @@ -969,7 +969,7 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) adev->hdp.funcs->init_registers(adev); /* Flush HDP after it is initialized */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index ad099f136f84eb..e74e26b6a4f23c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -229,7 +229,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; /* flush hdp cache */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal @@ -899,7 +899,7 @@ static int gmc_v11_0_gart_enable(struct amdgpu_device *adev) return r; /* Flush HDP after it is initialized */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index 05c026d0b0d96a..c6f290704d4731 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -297,7 +297,7 @@ static void gmc_v12_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, return; /* flush hdp cache */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal @@ -881,7 +881,7 @@ static int gmc_v12_0_gart_enable(struct amdgpu_device *adev) return r; /* Flush HDP after it is initialized */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 783e0c3b86b4c4..5effe8327d29fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -2435,7 +2435,7 @@ static int gmc_v9_0_hw_init(struct amdgpu_ip_block *ip_block) adev->hdp.funcs->init_registers(adev); /* After HDP is initialized, flush HDP.*/ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) value = false; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index bb5dfc410a667f..215543575f477c 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -533,7 +533,7 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) } memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); vfree(buf); drm_dev_exit(idx); } else { diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index cc621064610f1d..afdf8ce3b4c59e 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -610,7 +610,7 @@ static int psp_v13_0_memory_training(struct psp_context *psp, uint32_t ops) } memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); vfree(buf); drm_dev_exit(idx); } else { diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c index 7c49c3f3c3881e..256288c6cd78ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c @@ -498,7 +498,7 @@ static int psp_v14_0_memory_training(struct psp_context *psp, uint32_t ops) } memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); vfree(buf); drm_dev_exit(idx); } else { From 39e160505198ff8c158f11bce2ba19809a756e8b Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 16 Apr 2025 16:04:10 -0400 Subject: [PATCH 0757/2924] block: integrity: Do not call set_page_dirty_lock() Placing multiple protection information buffers inside the same page can lead to oopses because set_page_dirty_lock() can't be called from interrupt context. Since a protection information buffer is not backed by a file there is no point in setting its page dirty, there is nothing to synchronize. Drop the call to set_page_dirty_lock() and remove the last argument to bio_integrity_unpin_bvec(). Cc: stable@vger.kernel.org Fixes: 492c5d455969 ("block: bio-integrity: directly map user buffers") Signed-off-by: Martin K. Petersen Reviewed-by: Keith Busch Link: https://lore.kernel.org/r/yq1v7r3ev9g.fsf@ca-mkp.ca.oracle.com Signed-off-by: Jens Axboe --- block/bio-integrity.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 608594a154a5b9..43ef6bd06c85eb 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -66,16 +66,12 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, } EXPORT_SYMBOL(bio_integrity_alloc); -static void bio_integrity_unpin_bvec(struct bio_vec *bv, int nr_vecs, - bool dirty) +static void bio_integrity_unpin_bvec(struct bio_vec *bv, int nr_vecs) { int i; - for (i = 0; i < nr_vecs; i++) { - if (dirty && !PageCompound(bv[i].bv_page)) - set_page_dirty_lock(bv[i].bv_page); + for (i = 0; i < nr_vecs; i++) unpin_user_page(bv[i].bv_page); - } } static void bio_integrity_uncopy_user(struct bio_integrity_payload *bip) @@ -91,7 +87,7 @@ static void bio_integrity_uncopy_user(struct bio_integrity_payload *bip) ret = copy_to_iter(bvec_virt(bounce_bvec), bytes, &orig_iter); WARN_ON_ONCE(ret != bytes); - bio_integrity_unpin_bvec(orig_bvecs, orig_nr_vecs, true); + bio_integrity_unpin_bvec(orig_bvecs, orig_nr_vecs); } /** @@ -111,8 +107,7 @@ void bio_integrity_unmap_user(struct bio *bio) return; } - bio_integrity_unpin_bvec(bip->bip_vec, bip->bip_max_vcnt, - bio_data_dir(bio) == READ); + bio_integrity_unpin_bvec(bip->bip_vec, bip->bip_max_vcnt); } /** @@ -198,7 +193,7 @@ static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec, } if (write) - bio_integrity_unpin_bvec(bvec, nr_vecs, false); + bio_integrity_unpin_bvec(bvec, nr_vecs); else memcpy(&bip->bip_vec[1], bvec, nr_vecs * sizeof(*bvec)); @@ -319,7 +314,7 @@ int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter) return 0; release_pages: - bio_integrity_unpin_bvec(bvec, nr_bvecs, false); + bio_integrity_unpin_bvec(bvec, nr_bvecs); free_bvec: if (bvec != stack_vec) kfree(bvec); From e2e49e214145a8f6ece6ecd52fec63ebc2b27ce9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Danis?= Date: Mon, 14 Apr 2025 11:08:15 +0200 Subject: [PATCH 0758/2924] Bluetooth: l2cap: Process valid commands in too long frame MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is required for passing PTS test cases: - L2CAP/COS/CED/BI-14-C Multiple Signaling Command in one PDU, Data Truncated, BR/EDR, Connection Request - L2CAP/COS/CED/BI-15-C Multiple Signaling Command in one PDU, Data Truncated, BR/EDR, Disconnection Request The test procedure defined in L2CAP.TS.p39 for both tests is: 1. The Lower Tester sends a C-frame to the IUT with PDU Length set to 8 and Channel ID set to the correct signaling channel for the logical link. The Information payload contains one L2CAP_ECHO_REQ packet with Data Length set to 0 with 0 octets of echo data and one command packet and Data Length set as specified in Table 4.6 and the correct command data. 2. The IUT sends an L2CAP_ECHO_RSP PDU to the Lower Tester. 3. Perform alternative 3A, 3B, 3C, or 3D depending on the IUT’s response. Alternative 3A (IUT terminates the link): 3A.1 The IUT terminates the link. 3A.2 The test ends with a Pass verdict. Alternative 3B (IUT discards the frame): 3B.1 The IUT does not send a reply to the Lower Tester. Alternative 3C (IUT rejects PDU): 3C.1 The IUT sends an L2CAP_COMMAND_REJECT_RSP PDU to the Lower Tester. Alternative 3D (Any other IUT response): 3D.1 The Upper Tester issues a warning and the test ends. 4. The Lower Tester sends a C-frame to the IUT with PDU Length set to 4 and Channel ID set to the correct signaling channel for the logical link. The Information payload contains Data Length set to 0 with an L2CAP_ECHO_REQ packet with 0 octets of echo data. 5. The IUT sends an L2CAP_ECHO_RSP PDU to the Lower Tester. With expected outcome: In Steps 2 and 5, the IUT responds with an L2CAP_ECHO_RSP. In Step 3A.1, the IUT terminates the link. In Step 3B.1, the IUT does not send a reply to the Lower Tester. In Step 3C.1, the IUT rejects the PDU. In Step 3D.1, the IUT sends any valid response. Currently PTS fails with the following logs: Failed to receive ECHO RESPONSE. And HCI logs: > ACL Data RX: Handle 11 flags 0x02 dlen 20 L2CAP: Information Response (0x0b) ident 2 len 12 Type: Fixed channels supported (0x0003) Result: Success (0x0000) Channels: 0x000000000000002e L2CAP Signaling (BR/EDR) Connectionless reception AMP Manager Protocol L2CAP Signaling (LE) > ACL Data RX: Handle 11 flags 0x02 dlen 13 frame too long 08 01 00 00 08 02 01 00 aa ......... Cc: stable@vger.kernel.org Signed-off-by: Frédéric Danis Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index f1c4b8bd7a8b35..5ca7ac43c58d4b 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7539,8 +7539,24 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) if (skb->len > len) { BT_ERR("Frame is too long (len %u, expected len %d)", skb->len, len); + /* PTS test cases L2CAP/COS/CED/BI-14-C and BI-15-C + * (Multiple Signaling Command in one PDU, Data + * Truncated, BR/EDR) send a C-frame to the IUT with + * PDU Length set to 8 and Channel ID set to the + * correct signaling channel for the logical link. + * The Information payload contains one L2CAP_ECHO_REQ + * packet with Data Length set to 0 with 0 octets of + * echo data and one invalid command packet due to + * data truncated in PDU but present in HCI packet. + * + * Shorter the socket buffer to the PDU length to + * allow to process valid commands from the PDU before + * setting the socket unreliable. + */ + skb->len = len; + l2cap_recv_frame(conn, skb); l2cap_conn_unreliable(conn, ECOMM); - goto drop; + goto unlock; } /* Append fragment into frame (with header) */ From 875db86e1ec75fe633f1e85ed2f92c731cdbf760 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 15 Apr 2025 09:15:19 -0700 Subject: [PATCH 0759/2924] Bluetooth: vhci: Avoid needless snprintf() calls Avoid double-copying of string literals. Use a "const char *" for each string instead of copying from .rodata into stack and then into the skb. We can go directly from .rodata to the skb. This also works around a Clang bug (that has since been fixed[1]). Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202401250927.1poZERd6-lkp@intel.com/ Fixes: ab4e4380d4e1 ("Bluetooth: Add vhci devcoredump support") Link: https://github.com/llvm/llvm-project/commit/ea2e66aa8b6e363b89df66dc44275a0d7ecd70ce [1] Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Reviewed-by: Nathan Chancellor Reviewed-by: Josh Poimboeuf Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_vhci.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index a51935d37e5d71..59f4d7bdffdcb5 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -289,18 +289,18 @@ static void vhci_coredump(struct hci_dev *hdev) static void vhci_coredump_hdr(struct hci_dev *hdev, struct sk_buff *skb) { - char buf[80]; + const char *buf; - snprintf(buf, sizeof(buf), "Controller Name: vhci_ctrl\n"); + buf = "Controller Name: vhci_ctrl\n"; skb_put_data(skb, buf, strlen(buf)); - snprintf(buf, sizeof(buf), "Firmware Version: vhci_fw\n"); + buf = "Firmware Version: vhci_fw\n"; skb_put_data(skb, buf, strlen(buf)); - snprintf(buf, sizeof(buf), "Driver: vhci_drv\n"); + buf = "Driver: vhci_drv\n"; skb_put_data(skb, buf, strlen(buf)); - snprintf(buf, sizeof(buf), "Vendor: vhci\n"); + buf = "Vendor: vhci\n"; skb_put_data(skb, buf, strlen(buf)); } From 8dd3804bf409095901b2b44b70e2e082f726f556 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 16 Apr 2025 18:28:25 -0400 Subject: [PATCH 0760/2924] bcachefs: Add missing READ_ONCE() for metadata replicas If we race with the user changing the metadata_replicas setting, this could cause us to get an incorrectly sized disk reservation. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update_interior.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 55fbeeb8eaaa1c..44b5fe430370f7 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1221,7 +1221,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, ret = bch2_disk_reservation_get(c, &as->disk_res, (nr_nodes[0] + nr_nodes[1]) * btree_sectors(c), - c->opts.metadata_replicas, + READ_ONCE(c->opts.metadata_replicas), disk_res_flags); if (ret) goto err; From 00ffb3724ce743578163f5ade2884374554ca021 Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Mon, 14 Apr 2025 22:36:46 +0530 Subject: [PATCH 0761/2924] cxgb4: fix memory leak in cxgb4_init_ethtool_filters() error path In the for loop used to allocate the loc_array and bmap for each port, a memory leak is possible when the allocation for loc_array succeeds, but the allocation for bmap fails. This is because when the control flow goes to the label free_eth_finfo, only the allocations starting from (i-1)th iteration are freed. Fix that by freeing the loc_array in the bmap allocation error path. Fixes: d915c299f1da ("cxgb4: add skeleton for ethtool n-tuple filters") Signed-off-by: Abdun Nihaal Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250414170649.89156-1-abdun.nihaal@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index 7f3f5afa864f4a..1546c3db08f093 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -2270,6 +2270,7 @@ int cxgb4_init_ethtool_filters(struct adapter *adap) eth_filter->port[i].bmap = bitmap_zalloc(nentries, GFP_KERNEL); if (!eth_filter->port[i].bmap) { ret = -ENOMEM; + kvfree(eth_filter->port[i].loc_array); goto free_eth_finfo; } } From 4d07bbf2d45683841e578a2f255e4c174534bf38 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Apr 2025 14:18:44 -0700 Subject: [PATCH 0762/2924] tools: ynl-gen: don't declare loop iterator in place The codegen tries to follow the "old" C style and declare loop iterators at the start of the block / function. Only nested request handling breaks this style, so adjust it. Reviewed-by: Donald Hunter Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250414211851.602096-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/ynl_gen_c.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index a1427c53703025..305f5696bc4f28 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -654,10 +654,10 @@ def _attr_get(self, ri, var): def attr_put(self, ri, var): if self.attr['type'] in scalars: put_type = self.type - ri.cw.p(f"for (unsigned int i = 0; i < {var}->n_{self.c_name}; i++)") + ri.cw.p(f"for (i = 0; i < {var}->n_{self.c_name}; i++)") ri.cw.p(f"ynl_attr_put_{put_type}(nlh, {self.enum_name}, {var}->{self.c_name}[i]);") elif 'type' not in self.attr or self.attr['type'] == 'nest': - ri.cw.p(f"for (unsigned int i = 0; i < {var}->n_{self.c_name}; i++)") + ri.cw.p(f"for (i = 0; i < {var}->n_{self.c_name}; i++)") self._attr_put_line(ri, var, f"{self.nested_render_name}_put(nlh, " + f"{self.enum_name}, &{var}->{self.c_name}[i])") else: @@ -1644,11 +1644,23 @@ def put_req_nested_prototype(ri, struct, suffix=';'): def put_req_nested(ri, struct): + local_vars = [] + init_lines = [] + + local_vars.append('struct nlattr *nest;') + init_lines.append("nest = ynl_attr_nest_start(nlh, attr_type);") + + for _, arg in struct.member_list(): + if arg.presence_type() == 'count': + local_vars.append('unsigned int i;') + break + put_req_nested_prototype(ri, struct, suffix='') ri.cw.block_start() - ri.cw.write_func_lvar('struct nlattr *nest;') + ri.cw.write_func_lvar(local_vars) - ri.cw.p("nest = ynl_attr_nest_start(nlh, attr_type);") + for line in init_lines: + ri.cw.p(line) for _, arg in struct.member_list(): arg.attr_put(ri, "obj") @@ -1850,6 +1862,11 @@ def print_req(ri): local_vars += ['size_t hdr_len;', 'void *hdr;'] + for _, attr in ri.struct["request"].member_list(): + if attr.presence_type() == 'count': + local_vars += ['unsigned int i;'] + break + print_prototype(ri, direction, terminate=False) ri.cw.block_start() ri.cw.write_func_lvar(local_vars) From dfa464b4a603984d648a9beb9bce72df5858c1e2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Apr 2025 14:18:45 -0700 Subject: [PATCH 0763/2924] tools: ynl-gen: move local vars after the opening bracket The "function writing helper" tries to put local variables between prototype and the opening bracket. Clearly wrong, but up until now nothing actually uses it to write local vars so it wasn't noticed. Reviewed-by: Donald Hunter Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250414211851.602096-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/ynl_gen_c.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index 305f5696bc4f28..662a925bd9e11a 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -1399,9 +1399,9 @@ def write_func_lvar(self, local_vars): def write_func(self, qual_ret, name, body, args=None, local_vars=None): self.write_func_prot(qual_ret=qual_ret, name=name, args=args) + self.block_start() self.write_func_lvar(local_vars=local_vars) - self.block_start() for line in body: self.p(line) self.block_end() From ce6cb8113c842b94e77364b247c4f85c7b34e0c2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Apr 2025 14:18:46 -0700 Subject: [PATCH 0764/2924] tools: ynl-gen: individually free previous values on double set When user calls request_attrA_set() multiple times (for the same attribute), and attrA is of type which allocates memory - we try to free the previously associated values. For array types (including multi-attr) we have only freed the array, but the array may have contained pointers. Refactor the code generation for free attr and reuse the generated lines in setters to flush out the previous state. Since setters are static inlines in the header we need to add forward declarations for the free helpers of pure nested structs. Track which types get used by arrays and include the right forwad declarations. At least ethtool string set and bit set would not be freed without this. Tho, admittedly, overriding already set attribute twice is likely a very very rare thing to do. Fixes: be5bea1cc0bf ("net: add basic C code generators for Netlink") Reviewed-by: Donald Hunter Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250414211851.602096-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/ynl_gen_c.py | 62 +++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 17 deletions(-) diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index 662a925bd9e11a..2d856ccc88f484 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -162,9 +162,15 @@ def _complex_member_type(self, ri): def free_needs_iter(self): return False - def free(self, ri, var, ref): + def _free_lines(self, ri, var, ref): if self.is_multi_val() or self.presence_type() == 'len': - ri.cw.p(f'free({var}->{ref}{self.c_name});') + return [f'free({var}->{ref}{self.c_name});'] + return [] + + def free(self, ri, var, ref): + lines = self._free_lines(ri, var, ref) + for line in lines: + ri.cw.p(line) def arg_member(self, ri): member = self._complex_member_type(ri) @@ -263,6 +269,10 @@ def setter(self, ri, space, direction, deref=False, ref=None): var = "req" member = f"{var}->{'.'.join(ref)}" + local_vars = [] + if self.free_needs_iter(): + local_vars += ['unsigned int i;'] + code = [] presence = '' for i in range(0, len(ref)): @@ -272,6 +282,10 @@ def setter(self, ri, space, direction, deref=False, ref=None): if i == len(ref) - 1 and self.presence_type() != 'bit': continue code.append(presence + ' = 1;') + ref_path = '.'.join(ref[:-1]) + if ref_path: + ref_path += '.' + code += self._free_lines(ri, var, ref_path) code += self._setter_lines(ri, member, presence) func_name = f"{op_prefix(ri, direction, deref=deref)}_set_{'_'.join(ref)}" @@ -279,7 +293,8 @@ def setter(self, ri, space, direction, deref=False, ref=None): alloc = bool([x for x in code if 'alloc(' in x]) if free and not alloc: func_name = '__' + func_name - ri.cw.write_func('static inline void', func_name, body=code, + ri.cw.write_func('static inline void', func_name, local_vars=local_vars, + body=code, args=[f'{type_name(ri, direction, deref=deref)} *{var}'] + self.arg_member(ri)) @@ -482,8 +497,7 @@ def _attr_get(self, ri, var): ['unsigned int len;'] def _setter_lines(self, ri, member, presence): - return [f"free({member});", - f"{presence}_len = strlen({self.c_name});", + return [f"{presence}_len = strlen({self.c_name});", f"{member} = malloc({presence}_len + 1);", f'memcpy({member}, {self.c_name}, {presence}_len);', f'{member}[{presence}_len] = 0;'] @@ -536,8 +550,7 @@ def _attr_get(self, ri, var): ['unsigned int len;'] def _setter_lines(self, ri, member, presence): - return [f"free({member});", - f"{presence}_len = len;", + return [f"{presence}_len = len;", f"{member} = malloc({presence}_len);", f'memcpy({member}, {self.c_name}, {presence}_len);'] @@ -574,12 +587,14 @@ def is_recursive(self): def _complex_member_type(self, ri): return self.nested_struct_type - def free(self, ri, var, ref): + def _free_lines(self, ri, var, ref): + lines = [] at = '&' if self.is_recursive_for_op(ri): at = '' - ri.cw.p(f'if ({var}->{ref}{self.c_name})') - ri.cw.p(f'{self.nested_render_name}_free({at}{var}->{ref}{self.c_name});') + lines += [f'if ({var}->{ref}{self.c_name})'] + lines += [f'{self.nested_render_name}_free({at}{var}->{ref}{self.c_name});'] + return lines def _attr_typol(self): return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, ' @@ -632,15 +647,19 @@ def _complex_member_type(self, ri): def free_needs_iter(self): return 'type' not in self.attr or self.attr['type'] == 'nest' - def free(self, ri, var, ref): + def _free_lines(self, ri, var, ref): + lines = [] if self.attr['type'] in scalars: - ri.cw.p(f"free({var}->{ref}{self.c_name});") + lines += [f"free({var}->{ref}{self.c_name});"] elif 'type' not in self.attr or self.attr['type'] == 'nest': - ri.cw.p(f"for (i = 0; i < {var}->{ref}n_{self.c_name}; i++)") - ri.cw.p(f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);') - ri.cw.p(f"free({var}->{ref}{self.c_name});") + lines += [ + f"for (i = 0; i < {var}->{ref}n_{self.c_name}; i++)", + f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);', + f"free({var}->{ref}{self.c_name});", + ] else: raise Exception(f"Free of MultiAttr sub-type {self.attr['type']} not supported yet") + return lines def _attr_policy(self, policy): return self.base_type._attr_policy(policy) @@ -666,8 +685,7 @@ def attr_put(self, ri, var): def _setter_lines(self, ri, member, presence): # For multi-attr we have a count, not presence, hack up the presence presence = presence[:-(len('_present.') + len(self.c_name))] + "n_" + self.c_name - return [f"free({member});", - f"{member} = {self.c_name};", + return [f"{member} = {self.c_name};", f"{presence} = n_{self.c_name};"] @@ -755,6 +773,7 @@ def __init__(self, family, space_name, type_list=None, inherited=None): self.request = False self.reply = False self.recursive = False + self.in_multi_val = False # used by a MultiAttr or and legacy arrays self.attr_list = [] self.attrs = dict() @@ -1122,6 +1141,10 @@ def _load_nested_sets(self): if attr in rs_members['reply']: self.pure_nested_structs[nested].reply = True + if spec.is_multi_val(): + child = self.pure_nested_structs.get(nested) + child.in_multi_val = True + self._sort_pure_types() # Propagate the request / reply / recursive @@ -1136,6 +1159,8 @@ def _load_nested_sets(self): struct.child_nests.update(child.child_nests) child.request |= struct.request child.reply |= struct.reply + if spec.is_multi_val(): + child.in_multi_val = True if attr_set in struct.child_nests: struct.recursive = True @@ -2958,6 +2983,9 @@ def main(): for attr_set, struct in parsed.pure_nested_structs.items(): ri = RenderInfo(cw, parsed, args.mode, "", "", attr_set) print_type_full(ri, struct) + if struct.request and struct.in_multi_val: + free_rsp_nested_prototype(ri) + cw.nl() for op_name, op in parsed.ops.items(): cw.p(f"/* ============== {op.enum_name} ============== */") From 57e7dedf2b8c72caa6f04b9e08b19e4f370562fa Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Apr 2025 14:18:47 -0700 Subject: [PATCH 0765/2924] tools: ynl-gen: make sure we validate subtype of array-nest ArrayNest AKA indexed-array support currently skips inner type validation. We count the attributes and then we parse them, make sure we call validate, too. Otherwise buggy / unexpected kernel response may lead to crashes. Fixes: be5bea1cc0bf ("net: add basic C code generators for Netlink") Reviewed-by: Donald Hunter Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250414211851.602096-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/ynl_gen_c.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index 2d856ccc88f484..30c0a34b2784ec 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -714,8 +714,11 @@ def _attr_typol(self): def _attr_get(self, ri, var): local_vars = ['const struct nlattr *attr2;'] get_lines = [f'attr_{self.c_name} = attr;', - 'ynl_attr_for_each_nested(attr2, attr)', - f'\t{var}->n_{self.c_name}++;'] + 'ynl_attr_for_each_nested(attr2, attr) {', + '\tif (ynl_attr_validate(yarg, attr2))', + '\t\treturn YNL_PARSE_CB_ERROR;', + f'\t{var}->n_{self.c_name}++;', + '}'] return get_lines, None, local_vars From acf4da17deada7f8b120e051aa6c9cac40dbd83b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Apr 2025 14:18:48 -0700 Subject: [PATCH 0766/2924] netlink: specs: rt-link: add an attr layer around alt-ifname alt-ifname attr is directly placed in requests (as an alternative to ifname) but in responses its wrapped up in IFLA_PROP_LIST and only there is may be multi-attr. See rtnl_fill_prop_list(). Fixes: b2f63d904e72 ("doc/netlink: Add spec for rt link messages") Reviewed-by: Donald Hunter Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250414211851.602096-6-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/rt_link.yaml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt_link.yaml index 31238455f8e9d2..200e9a7e5b11d3 100644 --- a/Documentation/netlink/specs/rt_link.yaml +++ b/Documentation/netlink/specs/rt_link.yaml @@ -1113,11 +1113,10 @@ attribute-sets: - name: prop-list type: nest - nested-attributes: link-attrs + nested-attributes: prop-list-link-attrs - name: alt-ifname type: string - multi-attr: true - name: perm-address type: binary @@ -1163,6 +1162,13 @@ attribute-sets: - name: netns-immutable type: u8 + - + name: prop-list-link-attrs + subset-of: link-attrs + attributes: + - + name: alt-ifname + multi-attr: true - name: af-spec-attrs attributes: @@ -2453,7 +2459,6 @@ operations: - min-mtu - max-mtu - prop-list - - alt-ifname - perm-address - proto-down-reason - parent-dev-name From 540201c0ef7e8e7b169f68a238ade931a81a31a6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Apr 2025 14:18:49 -0700 Subject: [PATCH 0767/2924] netlink: specs: rtnetlink: attribute naming corrections Some attribute names diverge in very minor ways from the C names. These are most likely typos, and they prevent the C codegen from working. Fixes: bc515ed06652 ("netlink: specs: Add a spec for neighbor tables in rtnetlink") Fixes: b2f63d904e72 ("doc/netlink: Add spec for rt link messages") Reviewed-by: Donald Hunter Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250414211851.602096-7-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/rt_link.yaml | 6 +++--- Documentation/netlink/specs/rt_neigh.yaml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt_link.yaml index 200e9a7e5b11d3..03323d7f58dcd3 100644 --- a/Documentation/netlink/specs/rt_link.yaml +++ b/Documentation/netlink/specs/rt_link.yaml @@ -1591,7 +1591,7 @@ attribute-sets: name: nf-call-iptables type: u8 - - name: nf-call-ip6-tables + name: nf-call-ip6tables type: u8 - name: nf-call-arptables @@ -2083,7 +2083,7 @@ attribute-sets: name: id type: u16 - - name: flag + name: flags type: binary struct: ifla-vlan-flags - @@ -2171,7 +2171,7 @@ attribute-sets: type: binary struct: ifla-cacheinfo - - name: icmp6-stats + name: icmp6stats type: binary struct: ifla-icmp6-stats - diff --git a/Documentation/netlink/specs/rt_neigh.yaml b/Documentation/netlink/specs/rt_neigh.yaml index e670b6dc07be4f..a1e137a16abd56 100644 --- a/Documentation/netlink/specs/rt_neigh.yaml +++ b/Documentation/netlink/specs/rt_neigh.yaml @@ -189,7 +189,7 @@ attribute-sets: type: binary display-hint: ipv4 - - name: lladr + name: lladdr type: binary display-hint: mac - From beb3c5ad8829b52057f48a776a9d9558b98c157f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Apr 2025 14:18:50 -0700 Subject: [PATCH 0768/2924] netlink: specs: rt-link: adjust mctp attribute naming MCTP attribute naming is inconsistent. In C we have: IFLA_MCTP_NET, IFLA_MCTP_PHYS_BINDING, ^^^^ but in YAML: - mctp-net - phys-binding ^ no "mctp" It's unclear whether the "mctp" part of the name is supposed to be a prefix or part of attribute name. Make it a prefix, seems cleaner, even tho technically phys-binding was added later. Fixes: b2f63d904e72 ("doc/netlink: Add spec for rt link messages") Reviewed-by: Donald Hunter Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250414211851.602096-8-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/rt_link.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt_link.yaml index 03323d7f58dcd3..6b9d5ee87d93a8 100644 --- a/Documentation/netlink/specs/rt_link.yaml +++ b/Documentation/netlink/specs/rt_link.yaml @@ -2185,9 +2185,10 @@ attribute-sets: type: u32 - name: mctp-attrs + name-prefix: ifla-mctp- attributes: - - name: mctp-net + name: net type: u32 - name: phys-binding From e31f86ee4b9ccb844baf2131da8e5d4d6f23aa1d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Apr 2025 14:18:51 -0700 Subject: [PATCH 0769/2924] netlink: specs: rt-neigh: prefix struct nfmsg members with ndm Attach ndm- to all members of struct nfmsg. We could possibly use name-prefix just for C, but I don't think we have any precedent for using name-prefix on structs, and other rtnetlink sub-specs give full names for fixed header struct members. Fixes: bc515ed06652 ("netlink: specs: Add a spec for neighbor tables in rtnetlink") Reviewed-by: Donald Hunter Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250414211851.602096-9-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/rt_neigh.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/netlink/specs/rt_neigh.yaml b/Documentation/netlink/specs/rt_neigh.yaml index a1e137a16abd56..a843caa72259e1 100644 --- a/Documentation/netlink/specs/rt_neigh.yaml +++ b/Documentation/netlink/specs/rt_neigh.yaml @@ -13,25 +13,25 @@ definitions: type: struct members: - - name: family + name: ndm-family type: u8 - - name: pad + name: ndm-pad type: pad len: 3 - - name: ifindex + name: ndm-ifindex type: s32 - - name: state + name: ndm-state type: u16 enum: nud-state - - name: flags + name: ndm-flags type: u8 enum: ntf-flags - - name: type + name: ndm-type type: u8 enum: rtm-type - From 36355ddfe8955f226a88a543ed354b9f6b84cd70 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 14 Apr 2025 22:04:34 +0200 Subject: [PATCH 0770/2924] net: b53: enable BPDU reception for management port For STP to work, receiving BPDUs is essential, but the appropriate bit was never set. Without GC_RX_BPDU_EN, the switch chip will filter all BPDUs, even if an appropriate PVID VLAN was setup. Fixes: ff39c2d68679 ("net: dsa: b53: Add bridge support") Signed-off-by: Jonas Gorski Link: https://patch.msgid.link/20250414200434.194422-1-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 61d164ffb3ae97..e5ba718979064b 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -737,6 +737,15 @@ static void b53_enable_mib(struct b53_device *dev) b53_write8(dev, B53_MGMT_PAGE, B53_GLOBAL_CONFIG, gc); } +static void b53_enable_stp(struct b53_device *dev) +{ + u8 gc; + + b53_read8(dev, B53_MGMT_PAGE, B53_GLOBAL_CONFIG, &gc); + gc |= GC_RX_BPDU_EN; + b53_write8(dev, B53_MGMT_PAGE, B53_GLOBAL_CONFIG, gc); +} + static u16 b53_default_pvid(struct b53_device *dev) { if (is5325(dev) || is5365(dev)) @@ -876,6 +885,7 @@ static int b53_switch_reset(struct b53_device *dev) } b53_enable_mib(dev); + b53_enable_stp(dev); return b53_flush_arl(dev, FAST_AGE_STATIC); } From eb25de13bd9cf025413a04f25e715d0e99847e30 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 14 Apr 2025 22:00:20 +0200 Subject: [PATCH 0771/2924] net: bridge: switchdev: do not notify new brentries as changed When adding a bridge vlan that is pvid or untagged after the vlan has already been added to any other switchdev backed port, the vlan change will be propagated as changed, since the flags change. This causes the vlan to not be added to the hardware for DSA switches, since the DSA handler ignores any vlans for the CPU or DSA ports that are changed. E.g. the following order of operations would work: $ ip link add swbridge type bridge vlan_filtering 1 vlan_default_pvid 0 $ ip link set lan1 master swbridge $ bridge vlan add dev swbridge vid 1 pvid untagged self $ bridge vlan add dev lan1 vid 1 pvid untagged but this order would break: $ ip link add swbridge type bridge vlan_filtering 1 vlan_default_pvid 0 $ ip link set lan1 master swbridge $ bridge vlan add dev lan1 vid 1 pvid untagged $ bridge vlan add dev swbridge vid 1 pvid untagged self Additionally, the vlan on the bridge itself would become undeletable: $ bridge vlan port vlan-id lan1 1 PVID Egress Untagged swbridge 1 PVID Egress Untagged $ bridge vlan del dev swbridge vid 1 self $ bridge vlan port vlan-id lan1 1 PVID Egress Untagged swbridge 1 Egress Untagged since the vlan was never added to DSA's vlan list, so deleting it will cause an error, causing the bridge code to not remove it. Fix this by checking if flags changed only for vlans that are already brentry and pass changed as false for those that become brentries, as these are a new vlan (member) from the switchdev point of view. Since *changed is set to true for becomes_brentry = true regardless of would_change's value, this will not change any rtnetlink notification delivery, just the value passed on to switchdev in vlan->changed. Fixes: 8d23a54f5bee ("net: bridge: switchdev: differentiate new VLANs from changed ones") Reviewed-by: Vladimir Oltean Signed-off-by: Jonas Gorski Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20250414200020.192715-1-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- net/bridge/br_vlan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index d9a69ec9affe59..939a3aa78d5c46 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -715,8 +715,8 @@ static int br_vlan_add_existing(struct net_bridge *br, u16 flags, bool *changed, struct netlink_ext_ack *extack) { - bool would_change = __vlan_flags_would_change(vlan, flags); bool becomes_brentry = false; + bool would_change = false; int err; if (!br_vlan_is_brentry(vlan)) { @@ -725,6 +725,8 @@ static int br_vlan_add_existing(struct net_bridge *br, return -EINVAL; becomes_brentry = true; + } else { + would_change = __vlan_flags_would_change(vlan, flags); } /* Master VLANs that aren't brentries weren't notified before, From b2727326d0a53709380aa147018085d71a6d4843 Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Tue, 15 Apr 2025 08:59:09 +0530 Subject: [PATCH 0772/2924] net: txgbe: fix memory leak in txgbe_probe() error path When txgbe_sw_init() is called, memory is allocated for wx->rss_key in wx_init_rss_key(). However, in txgbe_probe() function, the subsequent error paths after txgbe_sw_init() don't free the rss_key. Fix that by freeing it in error path along with wx->mac_table. Also change the label to which execution jumps when txgbe_sw_init() fails, because otherwise, it could lead to a double free for rss_key, when the mac_table allocation fails in wx_sw_init(). Fixes: 937d46ecc5f9 ("net: wangxun: add ethtool_ops for channel number") Reported-by: Jiawen Wu Signed-off-by: Abdun Nihaal Reviewed-by: Jiawen Wu Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250415032910.13139-1-abdun.nihaal@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/txgbe/txgbe_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index a2e245e3b01683..38206a46693bcd 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -611,7 +611,7 @@ static int txgbe_probe(struct pci_dev *pdev, /* setup the private structure */ err = txgbe_sw_init(wx); if (err) - goto err_free_mac_table; + goto err_pci_release_regions; /* check if flash load is done after hw power up */ err = wx_check_flash_load(wx, TXGBE_SPI_ILDR_STATUS_PERST); @@ -769,6 +769,7 @@ static int txgbe_probe(struct pci_dev *pdev, wx_clear_interrupt_scheme(wx); wx_control_hw(wx, false); err_free_mac_table: + kfree(wx->rss_key); kfree(wx->mac_table); err_pci_release_regions: pci_release_selected_regions(pdev, From c84f6ce918a9e6f4996597cbc62536bbf2247c96 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 15 Apr 2025 00:28:50 +0300 Subject: [PATCH 0773/2924] net: dsa: mv88e6xxx: avoid unregistering devlink regions which were never registered Russell King reports that a system with mv88e6xxx dereferences a NULL pointer when unbinding this driver: https://lore.kernel.org/netdev/Z_lRkMlTJ1KQ0kVX@shell.armlinux.org.uk/ The crash seems to be in devlink_region_destroy(), which is not NULL tolerant but is given a NULL devlink global region pointer. At least on some chips, some devlink regions are conditionally registered since the blamed commit, see mv88e6xxx_setup_devlink_regions_global(): if (cond && !cond(chip)) continue; These are MV88E6XXX_REGION_STU and MV88E6XXX_REGION_PVT. If the chip does not have an STU or PVT, it should crash like this. To fix the issue, avoid unregistering those regions which are NULL, i.e. were skipped at mv88e6xxx_setup_devlink_regions_global() time. Fixes: 836021a2d0e0 ("net: dsa: mv88e6xxx: Export cross-chip PVT as devlink region") Tested-by: Russell King (Oracle) Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250414212850.2953957-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/devlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/devlink.c b/drivers/net/dsa/mv88e6xxx/devlink.c index 795c8df7b6a743..195460a0a0d418 100644 --- a/drivers/net/dsa/mv88e6xxx/devlink.c +++ b/drivers/net/dsa/mv88e6xxx/devlink.c @@ -736,7 +736,8 @@ void mv88e6xxx_teardown_devlink_regions_global(struct dsa_switch *ds) int i; for (i = 0; i < ARRAY_SIZE(mv88e6xxx_regions); i++) - dsa_devlink_region_destroy(chip->regions[i]); + if (chip->regions[i]) + dsa_devlink_region_destroy(chip->regions[i]); } void mv88e6xxx_teardown_devlink_regions_port(struct dsa_switch *ds, int port) From ea08dfc35f83cfc73493c52f63ae4f2e29edfe8d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 15 Apr 2025 00:29:13 +0300 Subject: [PATCH 0774/2924] net: dsa: mv88e6xxx: fix -ENOENT when deleting VLANs and MST is unsupported Russell King reports that on the ZII dev rev B, deleting a bridge VLAN from a user port fails with -ENOENT: https://lore.kernel.org/netdev/Z_lQXNP0s5-IiJzd@shell.armlinux.org.uk/ This comes from mv88e6xxx_port_vlan_leave() -> mv88e6xxx_mst_put(), which tries to find an MST entry in &chip->msts associated with the SID, but fails and returns -ENOENT as such. But we know that this chip does not support MST at all, so that is not surprising. The question is why does the guard in mv88e6xxx_mst_put() not exit early: if (!sid) return 0; And the answer seems to be simple: the sid comes from vlan.sid which supposedly was previously populated by mv88e6xxx_vtu_get(). But some chip->info->ops->vtu_getnext() implementations do not populate vlan.sid, for example see mv88e6185_g1_vtu_getnext(). In that case, later in mv88e6xxx_port_vlan_leave() we are using a garbage sid which is just residual stack memory. Testing for sid == 0 covers all cases of a non-bridge VLAN or a bridge VLAN mapped to the default MSTI. For some chips, SID 0 is valid and installed by mv88e6xxx_stu_setup(). A chip which does not support the STU would implicitly only support mapping all VLANs to the default MSTI, so although SID 0 is not valid, it would be sufficient, if we were to zero-initialize the vlan structure, to fix the bug, due to the coincidence that a test for vlan.sid == 0 already exists and leads to the same (correct) behavior. Another option which would be sufficient would be to add a test for mv88e6xxx_has_stu() inside mv88e6xxx_mst_put(), symmetric to the one which already exists in mv88e6xxx_mst_get(). But that placement means the caller will have to dereference vlan.sid, which means it will access uninitialized memory, which is not nice even if it ignores it later. So we end up making both modifications, in order to not rely just on the sid == 0 coincidence, but also to avoid having uninitialized structure fields which might get temporarily accessed. Fixes: acaf4d2e36b3 ("net: dsa: mv88e6xxx: MST Offloading") Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250414212913.2955253-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 29a89ab4b78946..08db846cda8dec 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1852,6 +1852,8 @@ static int mv88e6xxx_vtu_get(struct mv88e6xxx_chip *chip, u16 vid, if (!chip->info->ops->vtu_getnext) return -EOPNOTSUPP; + memset(entry, 0, sizeof(*entry)); + entry->vid = vid ? vid - 1 : mv88e6xxx_max_vid(chip); entry->valid = false; @@ -1960,7 +1962,16 @@ static int mv88e6xxx_mst_put(struct mv88e6xxx_chip *chip, u8 sid) struct mv88e6xxx_mst *mst, *tmp; int err; - if (!sid) + /* If the SID is zero, it is for a VLAN mapped to the default MSTI, + * and mv88e6xxx_stu_setup() made sure it is always present, and thus, + * should not be removed here. + * + * If the chip lacks STU support, numerically the "sid" variable will + * happen to also be zero, but we don't want to rely on that fact, so + * we explicitly test that first. In that case, there is also nothing + * to do here. + */ + if (!mv88e6xxx_has_stu(chip) || !sid) return 0; list_for_each_entry_safe(mst, tmp, &chip->msts, node) { From 7afb5fb42d4950f33af2732b8147c552659f79b7 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 15 Apr 2025 00:29:30 +0300 Subject: [PATCH 0775/2924] net: dsa: clean up FDB, MDB, VLAN entries on unbind As explained in many places such as commit b117e1e8a86d ("net: dsa: delete dsa_legacy_fdb_add and dsa_legacy_fdb_del"), DSA is written given the assumption that higher layers have balanced additions/deletions. As such, it only makes sense to be extremely vocal when those assumptions are violated and the driver unbinds with entries still present. But Ido Schimmel points out a very simple situation where that is wrong: https://lore.kernel.org/netdev/ZDazSM5UsPPjQuKr@shredder/ (also briefly discussed by me in the aforementioned commit). Basically, while the bridge bypass operations are not something that DSA explicitly documents, and for the majority of DSA drivers this API simply causes them to go to promiscuous mode, that isn't the case for all drivers. Some have the necessary requirements for bridge bypass operations to do something useful - see dsa_switch_supports_uc_filtering(). Although in tools/testing/selftests/net/forwarding/local_termination.sh, we made an effort to popularize better mechanisms to manage address filters on DSA interfaces from user space - namely macvlan for unicast, and setsockopt(IP_ADD_MEMBERSHIP) - through mtools - for multicast, the fact is that 'bridge fdb add ... self static local' also exists as kernel UAPI, and might be useful to someone, even if only for a quick hack. It seems counter-productive to block that path by implementing shim .ndo_fdb_add and .ndo_fdb_del operations which just return -EOPNOTSUPP in order to prevent the ndo_dflt_fdb_add() and ndo_dflt_fdb_del() from running, although we could do that. Accepting that cleanup is necessary seems to be the only option. Especially since we appear to be coming back at this from a different angle as well. Russell King is noticing that the WARN_ON() triggers even for VLANs: https://lore.kernel.org/netdev/Z_li8Bj8bD4-BYKQ@shell.armlinux.org.uk/ What happens in the bug report above is that dsa_port_do_vlan_del() fails, then the VLAN entry lingers on, and then we warn on unbind and leak it. This is not a straight revert of the blamed commit, but we now add an informational print to the kernel log (to still have a way to see that bugs exist), and some extra comments gathered from past years' experience, to justify the logic. Fixes: 0832cd9f1f02 ("net: dsa: warn if port lists aren't empty in dsa_port_teardown") Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250414212930.2956310-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/dsa/dsa.c | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index e827775baf2ee1..e7e32956070aaa 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -1478,12 +1478,44 @@ static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd) static void dsa_switch_release_ports(struct dsa_switch *ds) { + struct dsa_mac_addr *a, *tmp; struct dsa_port *dp, *next; + struct dsa_vlan *v, *n; dsa_switch_for_each_port_safe(dp, next, ds) { - WARN_ON(!list_empty(&dp->fdbs)); - WARN_ON(!list_empty(&dp->mdbs)); - WARN_ON(!list_empty(&dp->vlans)); + /* These are either entries that upper layers lost track of + * (probably due to bugs), or installed through interfaces + * where one does not necessarily have to remove them, like + * ndo_dflt_fdb_add(). + */ + list_for_each_entry_safe(a, tmp, &dp->fdbs, list) { + dev_info(ds->dev, + "Cleaning up unicast address %pM vid %u from port %d\n", + a->addr, a->vid, dp->index); + list_del(&a->list); + kfree(a); + } + + list_for_each_entry_safe(a, tmp, &dp->mdbs, list) { + dev_info(ds->dev, + "Cleaning up multicast address %pM vid %u from port %d\n", + a->addr, a->vid, dp->index); + list_del(&a->list); + kfree(a); + } + + /* These are entries that upper layers have lost track of, + * probably due to bugs, but also due to dsa_port_do_vlan_del() + * having failed and the VLAN entry still lingering on. + */ + list_for_each_entry_safe(v, n, &dp->vlans, list) { + dev_info(ds->dev, + "Cleaning up vid %u from port %d\n", + v->vid, dp->index); + list_del(&v->list); + kfree(v); + } + list_del(&dp->list); kfree(dp); } From 8bf108d7161ffc6880ad13a0cc109de3cf631727 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 15 Apr 2025 00:30:01 +0300 Subject: [PATCH 0776/2924] net: dsa: free routing table on probe failure If complete = true in dsa_tree_setup(), it means that we are the last switch of the tree which is successfully probing, and we should be setting up all switches from our probe path. After "complete" becomes true, dsa_tree_setup_cpu_ports() or any subsequent function may fail. If that happens, the entire tree setup is in limbo: the first N-1 switches have successfully finished probing (doing nothing but having allocated persistent memory in the tree's dst->ports, and maybe dst->rtable), and switch N failed to probe, ending the tree setup process before anything is tangible from the user's PoV. If switch N fails to probe, its memory (ports) will be freed and removed from dst->ports. However, the dst->rtable elements pointing to its ports, as created by dsa_link_touch(), will remain there, and will lead to use-after-free if dereferenced. If dsa_tree_setup_switches() returns -EPROBE_DEFER, which is entirely possible because that is where ds->ops->setup() is, we get a kasan report like this: ================================================================== BUG: KASAN: slab-use-after-free in mv88e6xxx_setup_upstream_port+0x240/0x568 Read of size 8 at addr ffff000004f56020 by task kworker/u8:3/42 Call trace: __asan_report_load8_noabort+0x20/0x30 mv88e6xxx_setup_upstream_port+0x240/0x568 mv88e6xxx_setup+0xebc/0x1eb0 dsa_register_switch+0x1af4/0x2ae0 mv88e6xxx_register_switch+0x1b8/0x2a8 mv88e6xxx_probe+0xc4c/0xf60 mdio_probe+0x78/0xb8 really_probe+0x2b8/0x5a8 __driver_probe_device+0x164/0x298 driver_probe_device+0x78/0x258 __device_attach_driver+0x274/0x350 Allocated by task 42: __kasan_kmalloc+0x84/0xa0 __kmalloc_cache_noprof+0x298/0x490 dsa_switch_touch_ports+0x174/0x3d8 dsa_register_switch+0x800/0x2ae0 mv88e6xxx_register_switch+0x1b8/0x2a8 mv88e6xxx_probe+0xc4c/0xf60 mdio_probe+0x78/0xb8 really_probe+0x2b8/0x5a8 __driver_probe_device+0x164/0x298 driver_probe_device+0x78/0x258 __device_attach_driver+0x274/0x350 Freed by task 42: __kasan_slab_free+0x48/0x68 kfree+0x138/0x418 dsa_register_switch+0x2694/0x2ae0 mv88e6xxx_register_switch+0x1b8/0x2a8 mv88e6xxx_probe+0xc4c/0xf60 mdio_probe+0x78/0xb8 really_probe+0x2b8/0x5a8 __driver_probe_device+0x164/0x298 driver_probe_device+0x78/0x258 __device_attach_driver+0x274/0x350 The simplest way to fix the bug is to delete the routing table in its entirety. dsa_tree_setup_routing_table() has no problem in regenerating it even if we deleted links between ports other than those of switch N, because dsa_link_touch() first checks whether the port pair already exists in dst->rtable, allocating if not. The deletion of the routing table in its entirety already exists in dsa_tree_teardown(), so refactor that into a function that can also be called from the tree setup error path. In my analysis of the commit to blame, it is the one which added dsa_link elements to dst->rtable. Prior to that, each switch had its own ds->rtable which is freed when the switch fails to probe. But the tree is potentially persistent memory. Fixes: c5f51765a1f6 ("net: dsa: list DSA links in the fabric") Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250414213001.2957964-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/dsa/dsa.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index e7e32956070aaa..436a7e1b412ade 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -862,6 +862,16 @@ static void dsa_tree_teardown_lags(struct dsa_switch_tree *dst) kfree(dst->lags); } +static void dsa_tree_teardown_routing_table(struct dsa_switch_tree *dst) +{ + struct dsa_link *dl, *next; + + list_for_each_entry_safe(dl, next, &dst->rtable, list) { + list_del(&dl->list); + kfree(dl); + } +} + static int dsa_tree_setup(struct dsa_switch_tree *dst) { bool complete; @@ -879,7 +889,7 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) err = dsa_tree_setup_cpu_ports(dst); if (err) - return err; + goto teardown_rtable; err = dsa_tree_setup_switches(dst); if (err) @@ -911,14 +921,14 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) dsa_tree_teardown_switches(dst); teardown_cpu_ports: dsa_tree_teardown_cpu_ports(dst); +teardown_rtable: + dsa_tree_teardown_routing_table(dst); return err; } static void dsa_tree_teardown(struct dsa_switch_tree *dst) { - struct dsa_link *dl, *next; - if (!dst->setup) return; @@ -932,10 +942,7 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst) dsa_tree_teardown_cpu_ports(dst); - list_for_each_entry_safe(dl, next, &dst->rtable, list) { - list_del(&dl->list); - kfree(dl); - } + dsa_tree_teardown_routing_table(dst); pr_info("DSA: tree %d torn down\n", dst->index); From 514eff7b0aa1c5eb645ddbb8676ef3e2d88a8b99 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 15 Apr 2025 00:30:20 +0300 Subject: [PATCH 0777/2924] net: dsa: avoid refcount warnings when ds->ops->tag_8021q_vlan_del() fails This is very similar to the problem and solution from commit 232deb3f9567 ("net: dsa: avoid refcount warnings when ->port_{fdb,mdb}_del returns error"), except for the dsa_port_do_tag_8021q_vlan_del() operation. Fixes: c64b9c05045a ("net: dsa: tag_8021q: add proper cross-chip notifier support") Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250414213020.2959021-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/dsa/tag_8021q.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 3ee53e28ec2e9f..53e03fd8071b4a 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -197,7 +197,7 @@ static int dsa_port_do_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid) err = ds->ops->tag_8021q_vlan_del(ds, port, vid); if (err) { - refcount_inc(&v->refcount); + refcount_set(&v->refcount, 1); return err; } From 2a5970d5aaff8f3e33ce3bfaa403ae88c40de40d Mon Sep 17 00:00:00 2001 From: Sagi Maimon Date: Tue, 15 Apr 2025 08:31:31 +0300 Subject: [PATCH 0778/2924] ptp: ocp: fix start time alignment in ptp_ocp_signal_set In ptp_ocp_signal_set, the start time for periodic signals is not aligned to the next period boundary. The current code rounds up the start time and divides by the period but fails to multiply back by the period, causing misaligned signal starts. Fix this by multiplying the rounded-up value by the period to ensure the start time is the closest next period. Fixes: 4bd46bb037f8e ("ptp: ocp: Use DIV64_U64_ROUND_UP for rounding.") Signed-off-by: Sagi Maimon Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250415053131.129413-1-maimon.sagi@gmail.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_ocp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 7945c6be1f7ce4..faf6e027f89ab4 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -2067,6 +2067,7 @@ ptp_ocp_signal_set(struct ptp_ocp *bp, int gen, struct ptp_ocp_signal *s) if (!s->start) { /* roundup() does not work on 32-bit systems */ s->start = DIV64_U64_ROUND_UP(start_ns, s->period); + s->start *= s->period; s->start = ktime_add(s->start, s->phase); } From 4798cfa2097f0833d54d8f5ce20ef14631917839 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 15 Apr 2025 08:15:52 -0700 Subject: [PATCH 0779/2924] net: don't try to ops lock uninitialized devs We need to be careful when operating on dev while in rtnl_create_link(). Some devices (vxlan) initialize netdev_ops in ->newlink, so later on. Avoid using netdev_lock_ops(), the device isn't registered so we cannot legally call its ops or generate any notifications for it. netdev_ops_assert_locked_or_invisible() is safe to use, it checks registration status first. Reported-by: syzbot+de1c7d68a10e3f123bdd@syzkaller.appspotmail.com Fixes: 04efcee6ef8d ("net: hold instance lock during NETDEV_CHANGE") Acked-by: Stanislav Fomichev Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250415151552.768373-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/dev.c | 2 ++ net/core/rtnetlink.c | 5 +---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 5fcbc66d865e2f..1be7cb73a6024f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1520,6 +1520,8 @@ EXPORT_SYMBOL(netdev_features_change); void netif_state_change(struct net_device *dev) { + netdev_ops_assert_locked_or_invisible(dev); + if (dev->flags & IFF_UP) { struct netdev_notifier_change_info change_info = { .info.dev = dev, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 39a5b72e861f68..c5a7f41982a575 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3676,11 +3676,8 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, nla_len(tb[IFLA_BROADCAST])); if (tb[IFLA_TXQLEN]) dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); - if (tb[IFLA_OPERSTATE]) { - netdev_lock_ops(dev); + if (tb[IFLA_OPERSTATE]) set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); - netdev_unlock_ops(dev); - } if (tb[IFLA_LINKMODE]) dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); if (tb[IFLA_GROUP]) From ec120093180b9d92b0c84cb89a205876f9a4cb40 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 12 Apr 2025 10:30:17 +0800 Subject: [PATCH 0780/2924] selftests: ublk: fix ublk_find_tgt() Bounds check for iterator variable `i` is missed, so add it and fix ublk_find_tgt(). Cc: Johannes Thumshirn Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250412023035.2649275-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.c | 3 +-- tools/testing/selftests/ublk/kublk.h | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 91c282bc767449..74cf70b2f28e51 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -14,13 +14,12 @@ static const struct ublk_tgt_ops *tgt_ops_list[] = { static const struct ublk_tgt_ops *ublk_find_tgt(const char *name) { - const struct ublk_tgt_ops *ops; int i; if (name == NULL) return NULL; - for (i = 0; sizeof(tgt_ops_list) / sizeof(ops); i++) + for (i = 0; i < ARRAY_SIZE(tgt_ops_list); i++) if (strcmp(tgt_ops_list[i]->name, name) == 0) return tgt_ops_list[i]; return NULL; diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 760ff8ffb81070..73294f6e3e49f1 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -30,6 +30,8 @@ #define min(a, b) ((a) < (b) ? (a) : (b)) #endif +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) + /****************** part 1: libublk ********************/ #define CTRL_DEV "/dev/ublk-control" From 9cad26d66b7a6306fa1e3cf64e30941afdadf6c8 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 12 Apr 2025 10:30:18 +0800 Subject: [PATCH 0781/2924] selftests: ublk: add io_uring uapi header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add io_uring UAPI header so that ublk can work with latest uapi definition. Fix the following build failure: stripe.c: In function ‘stripe_to_uring_op’: stripe.c:120:29: error: ‘IORING_OP_READV_FIXED’ undeclared (first use in this function); did you mean ‘IORING_OP_READ_FIXED’? 120 | return zc ? IORING_OP_READV_FIXED : IORING_OP_READV; | ^~~~~~~~~~~~~~~~~~~~~ | IORING_OP_READ_FIXED Reviewed-by: Johannes Thumshirn Fixes: 57ed58c13256 ("selftests: ublk: enable zero copy for stripe target") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250412023035.2649275-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 73294f6e3e49f1..eccf12360a14a2 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include "ublk_dep.h" From 8d31a7e505340a69528cbccb0894ef530f123cbb Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 12 Apr 2025 10:30:19 +0800 Subject: [PATCH 0782/2924] selftests: ublk: cleanup backfile automatically Use global array of $UBLK_BACKFILES for storing all backfile name, then clean them automatically. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250412023035.2649275-4-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/test_common.sh | 36 ++++++++++++------- tools/testing/selftests/ublk/test_loop_01.sh | 8 ++--- tools/testing/selftests/ublk/test_loop_02.sh | 8 ++--- tools/testing/selftests/ublk/test_loop_03.sh | 8 ++--- tools/testing/selftests/ublk/test_loop_04.sh | 9 +++-- tools/testing/selftests/ublk/test_loop_05.sh | 8 ++--- .../testing/selftests/ublk/test_stress_01.sh | 16 ++++----- .../testing/selftests/ublk/test_stress_02.sh | 16 ++++----- .../testing/selftests/ublk/test_stripe_01.sh | 12 +++---- .../testing/selftests/ublk/test_stripe_02.sh | 13 +++---- .../testing/selftests/ublk/test_stripe_03.sh | 12 +++---- .../testing/selftests/ublk/test_stripe_04.sh | 13 +++---- 12 files changed, 70 insertions(+), 89 deletions(-) diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh index a88b359432279b..c7d04da7235aab 100755 --- a/tools/testing/selftests/ublk/test_common.sh +++ b/tools/testing/selftests/ublk/test_common.sh @@ -30,18 +30,26 @@ _run_fio_verify_io() { } _create_backfile() { - local my_size=$1 - local my_file + local index=$1 + local new_size=$2 + local old_file + local new_file - my_file=$(mktemp ublk_file_"${my_size}"_XXXXX) - truncate -s "${my_size}" "${my_file}" - echo "$my_file" + old_file="${UBLK_BACKFILES[$index]}" + [ -f "$old_file" ] && rm -f "$old_file" + + new_file=$(mktemp ublk_file_"${new_size}"_XXXXX) + truncate -s "${new_size}" "${new_file}" + UBLK_BACKFILES["$index"]="$new_file" } -_remove_backfile() { - local file=$1 +_remove_files() { + local file - [ -f "$file" ] && rm -f "$file" + for file in "${UBLK_BACKFILES[@]}"; do + [ -f "$file" ] && rm -f "$file" + done + [ -f "$UBLK_TMP" ] && rm -f "$UBLK_TMP" } _create_tmp_dir() { @@ -129,7 +137,10 @@ _show_result() echo "$1 : [FAIL]" fi fi - [ "$2" -ne 0 ] && exit "$2" + if [ "$2" -ne 0 ]; then + _remove_files + exit "$2" + fi return 0 } @@ -138,16 +149,16 @@ _check_add_dev() { local tid=$1 local code=$2 - shift 2 + if [ "${code}" -ne 0 ]; then - _remove_test_files "$@" _show_result "${tid}" "${code}" fi } _cleanup_test() { "${UBLK_PROG}" del -a - rm -f "$UBLK_TMP" + + _remove_files } _have_feature() @@ -247,6 +258,7 @@ UBLK_TMP=$(mktemp ublk_test_XXXXX) UBLK_PROG=$(_ublk_test_top_dir)/kublk UBLK_TEST_QUIET=1 UBLK_TEST_SHOW_RESULT=1 +UBLK_BACKFILES=() export UBLK_PROG export UBLK_TEST_QUIET export UBLK_TEST_SHOW_RESULT diff --git a/tools/testing/selftests/ublk/test_loop_01.sh b/tools/testing/selftests/ublk/test_loop_01.sh index 1ef8b6044777cb..833fa0dbc7009d 100755 --- a/tools/testing/selftests/ublk/test_loop_01.sh +++ b/tools/testing/selftests/ublk/test_loop_01.sh @@ -12,10 +12,10 @@ fi _prep_test "loop" "write and verify test" -backfile_0=$(_create_backfile 256M) +_create_backfile 0 256M -dev_id=$(_add_ublk_dev -t loop "$backfile_0") -_check_add_dev $TID $? "${backfile_0}" +dev_id=$(_add_ublk_dev -t loop "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? # run fio over the ublk disk _run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M @@ -23,6 +23,4 @@ ERR_CODE=$? _cleanup_test "loop" -_remove_backfile "$backfile_0" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_02.sh b/tools/testing/selftests/ublk/test_loop_02.sh index 03863d825e07da..874568b3646b62 100755 --- a/tools/testing/selftests/ublk/test_loop_02.sh +++ b/tools/testing/selftests/ublk/test_loop_02.sh @@ -8,15 +8,13 @@ ERR_CODE=0 _prep_test "loop" "mkfs & mount & umount" -backfile_0=$(_create_backfile 256M) -dev_id=$(_add_ublk_dev -t loop "$backfile_0") -_check_add_dev $TID $? "$backfile_0" +_create_backfile 0 256M +dev_id=$(_add_ublk_dev -t loop "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? _mkfs_mount_test /dev/ublkb"${dev_id}" ERR_CODE=$? _cleanup_test "loop" -_remove_backfile "$backfile_0" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_03.sh b/tools/testing/selftests/ublk/test_loop_03.sh index e9ca744de8b134..c30f797c642955 100755 --- a/tools/testing/selftests/ublk/test_loop_03.sh +++ b/tools/testing/selftests/ublk/test_loop_03.sh @@ -12,9 +12,9 @@ fi _prep_test "loop" "write and verify over zero copy" -backfile_0=$(_create_backfile 256M) -dev_id=$(_add_ublk_dev -t loop -z "$backfile_0") -_check_add_dev $TID $? "$backfile_0" +_create_backfile 0 256M +dev_id=$(_add_ublk_dev -t loop -z "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? # run fio over the ublk disk _run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M @@ -22,6 +22,4 @@ ERR_CODE=$? _cleanup_test "loop" -_remove_backfile "$backfile_0" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_04.sh b/tools/testing/selftests/ublk/test_loop_04.sh index 1435422c38ec8c..b01d75b3214d21 100755 --- a/tools/testing/selftests/ublk/test_loop_04.sh +++ b/tools/testing/selftests/ublk/test_loop_04.sh @@ -8,15 +8,14 @@ ERR_CODE=0 _prep_test "loop" "mkfs & mount & umount with zero copy" -backfile_0=$(_create_backfile 256M) -dev_id=$(_add_ublk_dev -t loop -z "$backfile_0") -_check_add_dev $TID $? "$backfile_0" +_create_backfile 0 256M + +dev_id=$(_add_ublk_dev -t loop -z "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? _mkfs_mount_test /dev/ublkb"${dev_id}" ERR_CODE=$? _cleanup_test "loop" -_remove_backfile "$backfile_0" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_05.sh b/tools/testing/selftests/ublk/test_loop_05.sh index 2e6e2e6978fca5..de21415330742e 100755 --- a/tools/testing/selftests/ublk/test_loop_05.sh +++ b/tools/testing/selftests/ublk/test_loop_05.sh @@ -12,10 +12,10 @@ fi _prep_test "loop" "write and verify test" -backfile_0=$(_create_backfile 256M) +_create_backfile 0 256M -dev_id=$(_add_ublk_dev -q 2 -t loop "$backfile_0") -_check_add_dev $TID $? "${backfile_0}" +dev_id=$(_add_ublk_dev -q 2 -t loop "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? # run fio over the ublk disk _run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M @@ -23,6 +23,4 @@ ERR_CODE=$? _cleanup_test "loop" -_remove_backfile "$backfile_0" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_01.sh b/tools/testing/selftests/ublk/test_stress_01.sh index a8be24532b24b7..4c37a2cf13a377 100755 --- a/tools/testing/selftests/ublk/test_stress_01.sh +++ b/tools/testing/selftests/ublk/test_stress_01.sh @@ -10,17 +10,13 @@ ublk_io_and_remove() { local size=$1 shift 1 - local backfile="" - if echo "$@" | grep -q "loop"; then - backfile=${*: -1} - fi + DEV_ID=$(_add_ublk_dev "$@") - _check_add_dev $TID $? "${backfile}" + _check_add_dev $TID $? [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)" if ! __run_io_and_remove "${DEV_ID}" "${size}" "no"; then echo "/dev/ublkc${DEV_ID} isn't removed" - _remove_backfile "${backfile}" exit 255 fi } @@ -33,15 +29,15 @@ if [ ${ERR_CODE} -ne 0 ]; then _show_result $TID $ERR_CODE fi -BACK_FILE=$(_create_backfile 256M) -ublk_io_and_remove 256M -t loop -q 4 "${BACK_FILE}" +_create_backfile 0 256M + +ublk_io_and_remove 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" ERR_CODE=$? if [ ${ERR_CODE} -ne 0 ]; then _show_result $TID $ERR_CODE fi -ublk_io_and_remove 256M -t loop -q 4 -z "${BACK_FILE}" +ublk_io_and_remove 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" ERR_CODE=$? _cleanup_test "stress" -_remove_backfile "${BACK_FILE}" _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_02.sh b/tools/testing/selftests/ublk/test_stress_02.sh index 2159e4cc8140ec..4b6ad441d5000c 100755 --- a/tools/testing/selftests/ublk/test_stress_02.sh +++ b/tools/testing/selftests/ublk/test_stress_02.sh @@ -10,17 +10,13 @@ ublk_io_and_kill_daemon() { local size=$1 shift 1 - local backfile="" - if echo "$@" | grep -q "loop"; then - backfile=${*: -1} - fi + DEV_ID=$(_add_ublk_dev "$@") - _check_add_dev $TID $? "${backfile}" + _check_add_dev $TID $? [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs kill ublk server(ublk add $*)" if ! __run_io_and_remove "${DEV_ID}" "${size}" "yes"; then echo "/dev/ublkc${DEV_ID} isn't removed res ${res}" - _remove_backfile "${backfile}" exit 255 fi } @@ -33,15 +29,15 @@ if [ ${ERR_CODE} -ne 0 ]; then _show_result $TID $ERR_CODE fi -BACK_FILE=$(_create_backfile 256M) -ublk_io_and_kill_daemon 256M -t loop -q 4 "${BACK_FILE}" +_create_backfile 0 256M + +ublk_io_and_kill_daemon 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" ERR_CODE=$? if [ ${ERR_CODE} -ne 0 ]; then _show_result $TID $ERR_CODE fi -ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${BACK_FILE}" +ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" ERR_CODE=$? _cleanup_test "stress" -_remove_backfile "${BACK_FILE}" _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stripe_01.sh b/tools/testing/selftests/ublk/test_stripe_01.sh index 7e387ef656ea89..4e4f0fdf3c9b5b 100755 --- a/tools/testing/selftests/ublk/test_stripe_01.sh +++ b/tools/testing/selftests/ublk/test_stripe_01.sh @@ -12,19 +12,15 @@ fi _prep_test "stripe" "write and verify test" -backfile_0=$(_create_backfile 256M) -backfile_1=$(_create_backfile 256M) +_create_backfile 0 256M +_create_backfile 1 256M -dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1") -_check_add_dev $TID $? "${backfile_0}" +dev_id=$(_add_ublk_dev -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? # run fio over the ublk disk _run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M ERR_CODE=$? _cleanup_test "stripe" - -_remove_backfile "$backfile_0" -_remove_backfile "$backfile_1" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stripe_02.sh b/tools/testing/selftests/ublk/test_stripe_02.sh index e8a45fa82dde04..5820ab2efba474 100755 --- a/tools/testing/selftests/ublk/test_stripe_02.sh +++ b/tools/testing/selftests/ublk/test_stripe_02.sh @@ -8,17 +8,14 @@ ERR_CODE=0 _prep_test "stripe" "mkfs & mount & umount" -backfile_0=$(_create_backfile 256M) -backfile_1=$(_create_backfile 256M) -dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1") -_check_add_dev $TID $? "$backfile_0" "$backfile_1" +_create_backfile 0 256M +_create_backfile 1 256M + +dev_id=$(_add_ublk_dev -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? _mkfs_mount_test /dev/ublkb"${dev_id}" ERR_CODE=$? _cleanup_test "stripe" - -_remove_backfile "$backfile_0" -_remove_backfile "$backfile_1" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stripe_03.sh b/tools/testing/selftests/ublk/test_stripe_03.sh index c1b34af36145a8..20b977e27814c4 100755 --- a/tools/testing/selftests/ublk/test_stripe_03.sh +++ b/tools/testing/selftests/ublk/test_stripe_03.sh @@ -12,19 +12,15 @@ fi _prep_test "stripe" "write and verify test" -backfile_0=$(_create_backfile 256M) -backfile_1=$(_create_backfile 256M) +_create_backfile 0 256M +_create_backfile 1 256M -dev_id=$(_add_ublk_dev -q 2 -t stripe "$backfile_0" "$backfile_1") -_check_add_dev $TID $? "${backfile_0}" +dev_id=$(_add_ublk_dev -q 2 -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? # run fio over the ublk disk _run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M ERR_CODE=$? _cleanup_test "stripe" - -_remove_backfile "$backfile_0" -_remove_backfile "$backfile_1" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stripe_04.sh b/tools/testing/selftests/ublk/test_stripe_04.sh index 1f2b642381d179..1b51ed2f1d843e 100755 --- a/tools/testing/selftests/ublk/test_stripe_04.sh +++ b/tools/testing/selftests/ublk/test_stripe_04.sh @@ -8,17 +8,14 @@ ERR_CODE=0 _prep_test "stripe" "mkfs & mount & umount on zero copy" -backfile_0=$(_create_backfile 256M) -backfile_1=$(_create_backfile 256M) -dev_id=$(_add_ublk_dev -t stripe -z -q 2 "$backfile_0" "$backfile_1") -_check_add_dev $TID $? "$backfile_0" "$backfile_1" +_create_backfile 0 256M +_create_backfile 1 256M + +dev_id=$(_add_ublk_dev -t stripe -z -q 2 "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? _mkfs_mount_test /dev/ublkb"${dev_id}" ERR_CODE=$? _cleanup_test "stripe" - -_remove_backfile "$backfile_0" -_remove_backfile "$backfile_1" - _show_result $TID $ERR_CODE From 573840ab90ad5bfc8711f0252cf88db028ad473e Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 12 Apr 2025 10:30:20 +0800 Subject: [PATCH 0783/2924] selftests: ublk: make sure _add_ublk_dev can return in sub-shell Detach ublk daemon from the starting process completely by double-fork and clearing its process group, so that `_add_ublk_dev` can return from sub-shell. Then it is more friendly for writing shell test script for adding/recovering ublk device. Prepare for running ublk test in parallel. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250412023035.2649275-5-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.c | 30 +++++++++++++++---- tools/testing/selftests/ublk/test_common.sh | 15 +++++----- .../testing/selftests/ublk/test_stress_01.sh | 8 ++--- .../testing/selftests/ublk/test_stress_02.sh | 8 ++--- 4 files changed, 39 insertions(+), 22 deletions(-) diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 74cf70b2f28e51..381e31acaad94f 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -654,6 +654,8 @@ static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id) if (write(evtfd, &id, sizeof(id)) != sizeof(id)) return -EINVAL; + close(evtfd); + return 0; } @@ -889,24 +891,40 @@ static int cmd_dev_add(struct dev_ctx *ctx) exit(-1); } - setsid(); res = fork(); if (res == 0) { + int res2; + + setsid(); + res2 = fork(); + if (res2 == 0) { + /* prepare for detaching */ + close(STDIN_FILENO); + close(STDOUT_FILENO); + close(STDERR_FILENO); run: - res = __cmd_dev_add(ctx); - return res; + res = __cmd_dev_add(ctx); + return res; + } else { + /* detached from the foreground task */ + exit(EXIT_SUCCESS); + } } else if (res > 0) { uint64_t id; + int exit_code = EXIT_FAILURE; res = read(ctx->_evtfd, &id, sizeof(id)); close(ctx->_evtfd); if (res == sizeof(id) && id != ERROR_EVTFD_DEVID) { ctx->dev_id = id - 1; - return __cmd_dev_list(ctx); + if (__cmd_dev_list(ctx) >= 0) + exit_code = EXIT_SUCCESS; } - exit(EXIT_FAILURE); + /* wait for child and detach from it */ + wait(NULL); + exit(exit_code); } else { - return res; + exit(EXIT_FAILURE); } } diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh index c7d04da7235aab..c43bd1d5c9c085 100755 --- a/tools/testing/selftests/ublk/test_common.sh +++ b/tools/testing/selftests/ublk/test_common.sh @@ -170,7 +170,6 @@ _have_feature() } _add_ublk_dev() { - local kublk_temp; local dev_id; if [ ! -c /dev/ublk-control ]; then @@ -182,17 +181,17 @@ _add_ublk_dev() { fi fi - kublk_temp=$(mktemp /tmp/kublk-XXXXXX) - if ! "${UBLK_PROG}" add "$@" > "${kublk_temp}" 2>&1; then + if ! dev_id=$("${UBLK_PROG}" add "$@" | grep "dev id" | awk -F '[ :]' '{print $3}'); then echo "fail to add ublk dev $*" - rm -f "${kublk_temp}" return 255 fi - - dev_id=$(grep "dev id" "${kublk_temp}" | awk -F '[ :]' '{print $3}') udevadm settle - rm -f "${kublk_temp}" - echo "${dev_id}" + + if [[ "$dev_id" =~ ^[0-9]+$ ]]; then + echo "${dev_id}" + else + return 255 + fi } # kill the ublk daemon and return ublk device state diff --git a/tools/testing/selftests/ublk/test_stress_01.sh b/tools/testing/selftests/ublk/test_stress_01.sh index 4c37a2cf13a377..61fdbdfe70bc47 100755 --- a/tools/testing/selftests/ublk/test_stress_01.sh +++ b/tools/testing/selftests/ublk/test_stress_01.sh @@ -4,19 +4,19 @@ . "$(cd "$(dirname "$0")" && pwd)"/test_common.sh TID="stress_01" ERR_CODE=0 -DEV_ID=-1 ublk_io_and_remove() { local size=$1 + local dev_id shift 1 - DEV_ID=$(_add_ublk_dev "$@") + dev_id=$(_add_ublk_dev "$@") _check_add_dev $TID $? [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)" - if ! __run_io_and_remove "${DEV_ID}" "${size}" "no"; then - echo "/dev/ublkc${DEV_ID} isn't removed" + if ! __run_io_and_remove "$dev_id" "${size}" "no"; then + echo "/dev/ublkc$dev_id isn't removed" exit 255 fi } diff --git a/tools/testing/selftests/ublk/test_stress_02.sh b/tools/testing/selftests/ublk/test_stress_02.sh index 4b6ad441d5000c..7643e58637c8b8 100755 --- a/tools/testing/selftests/ublk/test_stress_02.sh +++ b/tools/testing/selftests/ublk/test_stress_02.sh @@ -4,19 +4,19 @@ . "$(cd "$(dirname "$0")" && pwd)"/test_common.sh TID="stress_02" ERR_CODE=0 -DEV_ID=-1 ublk_io_and_kill_daemon() { local size=$1 + local dev_id shift 1 - DEV_ID=$(_add_ublk_dev "$@") + dev_id=$(_add_ublk_dev "$@") _check_add_dev $TID $? [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs kill ublk server(ublk add $*)" - if ! __run_io_and_remove "${DEV_ID}" "${size}" "yes"; then - echo "/dev/ublkc${DEV_ID} isn't removed res ${res}" + if ! __run_io_and_remove "$dev_id" "${size}" "yes"; then + echo "/dev/ublkc$dev_id isn't removed res ${res}" exit 255 fi } From bb2cabf23568d74407a3881e81f43777f490299b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 12 Apr 2025 10:30:21 +0800 Subject: [PATCH 0784/2924] selftests: ublk: run stress tests in parallel Run stress tests in parallel, meantime add shell local function to simplify the two stress tests. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250412023035.2649275-6-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/test_common.sh | 34 +++++++++++++++- .../testing/selftests/ublk/test_stress_01.sh | 39 +++++++------------ .../testing/selftests/ublk/test_stress_02.sh | 39 +++++++------------ 3 files changed, 63 insertions(+), 49 deletions(-) diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh index c43bd1d5c9c085..87fd0c824b770b 100755 --- a/tools/testing/selftests/ublk/test_common.sh +++ b/tools/testing/selftests/ublk/test_common.sh @@ -230,7 +230,7 @@ __run_io_and_remove() local kill_server=$3 fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \ - --rw=readwrite --iodepth=64 --size="${size}" --numjobs=4 \ + --rw=readwrite --iodepth=256 --size="${size}" --numjobs=4 \ --runtime=20 --time_based > /dev/null 2>&1 & sleep 2 if [ "${kill_server}" = "yes" ]; then @@ -248,6 +248,38 @@ __run_io_and_remove() wait } +run_io_and_remove() +{ + local size=$1 + local dev_id + shift 1 + + dev_id=$(_add_ublk_dev "$@") + _check_add_dev "$TID" $? + + [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)" + if ! __run_io_and_remove "$dev_id" "${size}" "no"; then + echo "/dev/ublkc$dev_id isn't removed" + exit 255 + fi +} + +run_io_and_kill_daemon() +{ + local size=$1 + local dev_id + shift 1 + + dev_id=$(_add_ublk_dev "$@") + _check_add_dev "$TID" $? + + [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs kill ublk server(ublk add $*)" + if ! __run_io_and_remove "$dev_id" "${size}" "yes"; then + echo "/dev/ublkc$dev_id isn't removed res ${res}" + exit 255 + fi +} + _ublk_test_top_dir() { cd "$(dirname "$0")" && pwd diff --git a/tools/testing/selftests/ublk/test_stress_01.sh b/tools/testing/selftests/ublk/test_stress_01.sh index 61fdbdfe70bc47..7d3150f057d440 100755 --- a/tools/testing/selftests/ublk/test_stress_01.sh +++ b/tools/testing/selftests/ublk/test_stress_01.sh @@ -7,37 +7,28 @@ ERR_CODE=0 ublk_io_and_remove() { - local size=$1 - local dev_id - shift 1 - - dev_id=$(_add_ublk_dev "$@") - _check_add_dev $TID $? - - [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)" - if ! __run_io_and_remove "$dev_id" "${size}" "no"; then - echo "/dev/ublkc$dev_id isn't removed" - exit 255 + run_io_and_remove "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE fi } -_prep_test "stress" "run IO and remove device" - -ublk_io_and_remove 8G -t null -q 4 -ERR_CODE=$? -if [ ${ERR_CODE} -ne 0 ]; then - _show_result $TID $ERR_CODE +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" fi +_prep_test "stress" "run IO and remove device" + _create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M -ublk_io_and_remove 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" -ERR_CODE=$? -if [ ${ERR_CODE} -ne 0 ]; then - _show_result $TID $ERR_CODE -fi +ublk_io_and_remove 8G -t null -q 4 & +ublk_io_and_remove 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" & +ublk_io_and_remove 256M -t stripe -q 4 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait -ublk_io_and_remove 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" -ERR_CODE=$? _cleanup_test "stress" _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_02.sh b/tools/testing/selftests/ublk/test_stress_02.sh index 7643e58637c8b8..1a9065125ae1b2 100755 --- a/tools/testing/selftests/ublk/test_stress_02.sh +++ b/tools/testing/selftests/ublk/test_stress_02.sh @@ -5,39 +5,30 @@ TID="stress_02" ERR_CODE=0 +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + ublk_io_and_kill_daemon() { - local size=$1 - local dev_id - shift 1 - - dev_id=$(_add_ublk_dev "$@") - _check_add_dev $TID $? - - [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs kill ublk server(ublk add $*)" - if ! __run_io_and_remove "$dev_id" "${size}" "yes"; then - echo "/dev/ublkc$dev_id isn't removed res ${res}" - exit 255 + run_io_and_kill_daemon "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE fi } _prep_test "stress" "run IO and kill ublk server" -ublk_io_and_kill_daemon 8G -t null -q 4 -ERR_CODE=$? -if [ ${ERR_CODE} -ne 0 ]; then - _show_result $TID $ERR_CODE -fi - _create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M -ublk_io_and_kill_daemon 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" -ERR_CODE=$? -if [ ${ERR_CODE} -ne 0 ]; then - _show_result $TID $ERR_CODE -fi +ublk_io_and_kill_daemon 8G -t null -q 4 & +ublk_io_and_kill_daemon 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" & +ublk_io_and_kill_daemon 256M -t stripe -q 4 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait -ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" -ERR_CODE=$? _cleanup_test "stress" _show_result $TID $ERR_CODE From d836590d9a9e1d822667e2720ef0d5e69a566aef Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 12 Apr 2025 10:30:22 +0800 Subject: [PATCH 0785/2924] selftests: ublk: add two stress tests for zero copy feature Add stress_03 & stress_04 for covering zero copy feature. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250412023035.2649275-7-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/Makefile | 2 + .../testing/selftests/ublk/test_stress_03.sh | 38 +++++++++++++++++++ .../testing/selftests/ublk/test_stress_04.sh | 37 ++++++++++++++++++ 3 files changed, 77 insertions(+) create mode 100755 tools/testing/selftests/ublk/test_stress_03.sh create mode 100755 tools/testing/selftests/ublk/test_stress_04.sh diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index c7781efea0f33c..7311e8f6bee7aa 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -21,6 +21,8 @@ TEST_PROGS += test_stripe_04.sh TEST_PROGS += test_stress_01.sh TEST_PROGS += test_stress_02.sh +TEST_PROGS += test_stress_03.sh +TEST_PROGS += test_stress_04.sh TEST_GEN_PROGS_EXTENDED = kublk diff --git a/tools/testing/selftests/ublk/test_stress_03.sh b/tools/testing/selftests/ublk/test_stress_03.sh new file mode 100755 index 00000000000000..e0854f71d35b9d --- /dev/null +++ b/tools/testing/selftests/ublk/test_stress_03.sh @@ -0,0 +1,38 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh +TID="stress_03" +ERR_CODE=0 + +ublk_io_and_remove() +{ + run_io_and_remove "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +if ! _have_feature "ZERO_COPY"; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "stress" "run IO and remove device(zero copy)" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_io_and_remove 8G -t null -q 4 -z & +ublk_io_and_remove 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" & +ublk_io_and_remove 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +_cleanup_test "stress" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_04.sh b/tools/testing/selftests/ublk/test_stress_04.sh new file mode 100755 index 00000000000000..1798a98387e887 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stress_04.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh +TID="stress_04" +ERR_CODE=0 + +ublk_io_and_kill_daemon() +{ + run_io_and_kill_daemon "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi +if ! _have_feature "ZERO_COPY"; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "stress" "run IO and kill ublk server(zero copy)" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_io_and_kill_daemon 8G -t null -q 4 -z & +ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" & +ublk_io_and_kill_daemon 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +_cleanup_test "stress" +_show_result $TID $ERR_CODE From 62867a046a223e6eb771e23d2048e839c1d949d7 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 12 Apr 2025 10:30:23 +0800 Subject: [PATCH 0786/2924] selftests: ublk: setup ring with IORING_SETUP_SINGLE_ISSUER/IORING_SETUP_DEFER_TASKRUN It is observed that this way is more efficient for fast nvme backing file. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250412023035.2649275-8-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 381e31acaad94f..c2acd874f9af2e 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -346,7 +346,9 @@ static int ublk_queue_init(struct ublk_queue *q) } ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth, - IORING_SETUP_COOP_TASKRUN); + IORING_SETUP_COOP_TASKRUN | + IORING_SETUP_SINGLE_ISSUER | + IORING_SETUP_DEFER_TASKRUN); if (ret < 0) { ublk_err("ublk dev %d queue %d setup io_uring failed %d\n", q->dev->dev_info.dev_id, q->q_id, ret); From 2f0a692a93a585ead9ccffd0642694946d74411f Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 12 Apr 2025 10:30:24 +0800 Subject: [PATCH 0787/2924] selftests: ublk: set queue pthread's cpu affinity In NUMA machine, ublk IO performance is very sensitive with queue pthread's affinity setting. Retrieve queue's affinity and select the 1st cpu as queue thread's sched affinity, and it is observed that single cpu task affinity can get stable & good performance if client application is put on proper cpu. Dump this info when adding one ublk device. Use shmem to communicate queue's tid between parent and daemon. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250412023035.2649275-9-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.c | 156 +++++++++++++++++++++++++-- tools/testing/selftests/ublk/kublk.h | 11 +- 2 files changed, 159 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index c2acd874f9af2e..1e21e1401a0808 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -206,10 +206,73 @@ static const char *ublk_dev_state_desc(struct ublk_dev *dev) }; } +static void ublk_print_cpu_set(const cpu_set_t *set, char *buf, unsigned len) +{ + unsigned done = 0; + int i; + + for (i = 0; i < CPU_SETSIZE; i++) { + if (CPU_ISSET(i, set)) + done += snprintf(&buf[done], len - done, "%d ", i); + } +} + +static void ublk_adjust_affinity(cpu_set_t *set) +{ + int j, updated = 0; + + /* + * Just keep the 1st CPU now. + * + * In future, auto affinity selection can be tried. + */ + for (j = 0; j < CPU_SETSIZE; j++) { + if (CPU_ISSET(j, set)) { + if (!updated) { + updated = 1; + continue; + } + CPU_CLR(j, set); + } + } +} + +/* Caller must free the allocated buffer */ +static int ublk_ctrl_get_affinity(struct ublk_dev *ctrl_dev, cpu_set_t **ptr_buf) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_GET_QUEUE_AFFINITY, + .flags = CTRL_CMD_HAS_DATA | CTRL_CMD_HAS_BUF, + }; + cpu_set_t *buf; + int i, ret; + + buf = malloc(sizeof(cpu_set_t) * ctrl_dev->dev_info.nr_hw_queues); + if (!buf) + return -ENOMEM; + + for (i = 0; i < ctrl_dev->dev_info.nr_hw_queues; i++) { + data.data[0] = i; + data.len = sizeof(cpu_set_t); + data.addr = (__u64)&buf[i]; + + ret = __ublk_ctrl_cmd(ctrl_dev, &data); + if (ret < 0) { + free(buf); + return ret; + } + ublk_adjust_affinity(&buf[i]); + } + + *ptr_buf = buf; + return 0; +} + static void ublk_ctrl_dump(struct ublk_dev *dev) { struct ublksrv_ctrl_dev_info *info = &dev->dev_info; struct ublk_params p; + cpu_set_t *affinity; int ret; ret = ublk_ctrl_get_params(dev, &p); @@ -218,12 +281,31 @@ static void ublk_ctrl_dump(struct ublk_dev *dev) return; } + ret = ublk_ctrl_get_affinity(dev, &affinity); + if (ret < 0) { + ublk_err("failed to get affinity %m\n"); + return; + } + ublk_log("dev id %d: nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n", info->dev_id, info->nr_hw_queues, info->queue_depth, 1 << p.basic.logical_bs_shift, p.basic.dev_sectors); ublk_log("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n", info->max_io_buf_bytes, info->ublksrv_pid, info->flags, ublk_dev_state_desc(dev)); + + if (affinity) { + char buf[512]; + int i; + + for (i = 0; i < info->nr_hw_queues; i++) { + ublk_print_cpu_set(&affinity[i], buf, sizeof(buf)); + printf("\tqueue %u: tid %d affinity(%s)\n", + i, dev->q[i].tid, buf); + } + free(affinity); + } + fflush(stdout); } @@ -603,9 +685,24 @@ static int ublk_process_io(struct ublk_queue *q) return reapped; } +static void ublk_queue_set_sched_affinity(const struct ublk_queue *q, + cpu_set_t *cpuset) +{ + if (sched_setaffinity(0, sizeof(*cpuset), cpuset) < 0) + ublk_err("ublk dev %u queue %u set affinity failed", + q->dev->dev_info.dev_id, q->q_id); +} + +struct ublk_queue_info { + struct ublk_queue *q; + sem_t *queue_sem; + cpu_set_t *affinity; +}; + static void *ublk_io_handler_fn(void *data) { - struct ublk_queue *q = data; + struct ublk_queue_info *info = data; + struct ublk_queue *q = info->q; int dev_id = q->dev->dev_info.dev_id; int ret; @@ -615,6 +712,10 @@ static void *ublk_io_handler_fn(void *data) dev_id, q->q_id); return NULL; } + /* IO perf is sensitive with queue pthread affinity on NUMA machine*/ + ublk_queue_set_sched_affinity(q, info->affinity); + sem_post(info->queue_sem); + ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n", q->tid, dev_id, q->q_id); @@ -640,7 +741,7 @@ static void ublk_set_parameters(struct ublk_dev *dev) dev->dev_info.dev_id, ret); } -static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id) +static int ublk_send_dev_event(const struct dev_ctx *ctx, struct ublk_dev *dev, int dev_id) { uint64_t id; int evtfd = ctx->_evtfd; @@ -653,10 +754,14 @@ static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id) else id = ERROR_EVTFD_DEVID; + if (dev && ctx->shadow_dev) + memcpy(&ctx->shadow_dev->q, &dev->q, sizeof(dev->q)); + if (write(evtfd, &id, sizeof(id)) != sizeof(id)) return -EINVAL; close(evtfd); + shmdt(ctx->shadow_dev); return 0; } @@ -664,24 +769,46 @@ static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id) static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) { - int ret, i; - void *thread_ret; const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info; + struct ublk_queue_info *qinfo; + cpu_set_t *affinity_buf; + void *thread_ret; + sem_t queue_sem; + int ret, i; ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__); + qinfo = (struct ublk_queue_info *)calloc(sizeof(struct ublk_queue_info), + dinfo->nr_hw_queues); + if (!qinfo) + return -ENOMEM; + + sem_init(&queue_sem, 0, 0); ret = ublk_dev_prep(ctx, dev); if (ret) return ret; + ret = ublk_ctrl_get_affinity(dev, &affinity_buf); + if (ret) + return ret; + for (i = 0; i < dinfo->nr_hw_queues; i++) { dev->q[i].dev = dev; dev->q[i].q_id = i; + + qinfo[i].q = &dev->q[i]; + qinfo[i].queue_sem = &queue_sem; + qinfo[i].affinity = &affinity_buf[i]; pthread_create(&dev->q[i].thread, NULL, ublk_io_handler_fn, - &dev->q[i]); + &qinfo[i]); } + for (i = 0; i < dinfo->nr_hw_queues; i++) + sem_wait(&queue_sem); + free(qinfo); + free(affinity_buf); + /* everything is fine now, start us */ ublk_set_parameters(dev); ret = ublk_ctrl_start_dev(dev, getpid()); @@ -694,7 +821,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) if (ctx->fg) ublk_ctrl_dump(dev); else - ublk_send_dev_event(ctx, dev->dev_info.dev_id); + ublk_send_dev_event(ctx, dev, dev->dev_info.dev_id); /* wait until we are terminated */ for (i = 0; i < dinfo->nr_hw_queues; i++) @@ -873,7 +1000,7 @@ static int __cmd_dev_add(const struct dev_ctx *ctx) fail: if (ret < 0) - ublk_send_dev_event(ctx, -1); + ublk_send_dev_event(ctx, dev, -1); ublk_ctrl_deinit(dev); return ret; } @@ -887,6 +1014,16 @@ static int cmd_dev_add(struct dev_ctx *ctx) if (ctx->fg) goto run; + ctx->_shmid = shmget(IPC_PRIVATE, sizeof(struct ublk_dev), IPC_CREAT | 0666); + if (ctx->_shmid < 0) { + ublk_err("%s: failed to shmget %s\n", __func__, strerror(errno)); + exit(-1); + } + ctx->shadow_dev = (struct ublk_dev *)shmat(ctx->_shmid, NULL, 0); + if (ctx->shadow_dev == (struct ublk_dev *)-1) { + ublk_err("%s: failed to shmat %s\n", __func__, strerror(errno)); + exit(-1); + } ctx->_evtfd = eventfd(0, 0); if (ctx->_evtfd < 0) { ublk_err("%s: failed to create eventfd %s\n", __func__, strerror(errno)); @@ -922,6 +1059,8 @@ static int cmd_dev_add(struct dev_ctx *ctx) if (__cmd_dev_list(ctx) >= 0) exit_code = EXIT_SUCCESS; } + shmdt(ctx->shadow_dev); + shmctl(ctx->_shmid, IPC_RMID, NULL); /* wait for child and detach from it */ wait(NULL); exit(exit_code); @@ -988,6 +1127,9 @@ static int __cmd_dev_list(struct dev_ctx *ctx) ublk_err("%s: can't get dev info from %d: %d\n", __func__, ctx->dev_id, ret); } else { + if (ctx->shadow_dev) + memcpy(&dev->q, ctx->shadow_dev->q, sizeof(dev->q)); + ublk_ctrl_dump(dev); } diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index eccf12360a14a2..85295d3e36cb71 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -20,10 +20,15 @@ #include #include #include +#include +#include #include #include -#include +#include + +/* allow ublk_dep.h to override ublk_cmd.h */ #include "ublk_dep.h" +#include #define __maybe_unused __attribute__((unused)) #define MAX_BACK_FILES 4 @@ -74,6 +79,10 @@ struct dev_ctx { unsigned int chunk_size; int _evtfd; + int _shmid; + + /* built from shmem, only for ublk_dump_dev() */ + struct ublk_dev *shadow_dev; }; struct ublk_ctrl_cmd_data { From 6c62fd04e8bfc06f37ccda0d12fd367591445954 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 12 Apr 2025 10:30:25 +0800 Subject: [PATCH 0788/2924] selftests: ublk: increase max nr_queues and queue depth Increase max nr_queues to 32, and queue depth to 1024. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250412023035.2649275-10-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.c | 2 +- tools/testing/selftests/ublk/kublk.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 1e21e1401a0808..5e805d358739f9 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -1203,7 +1203,7 @@ static int cmd_dev_get_features(void) static int cmd_dev_help(char *exe) { printf("%s add -t [null|loop] [-q nr_queues] [-d depth] [-n dev_id] [backfile1] [backfile2] ...\n", exe); - printf("\t default: nr_queues=2(max 4), depth=128(max 128), dev_id=-1(auto allocation)\n"); + printf("\t default: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); printf("%s del [-n dev_id] -a \n", exe); printf("\t -a delete all devices -n delete specified device\n"); printf("%s list [-n dev_id] -a \n", exe); diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 85295d3e36cb71..9b77137b87008b 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -50,8 +50,8 @@ #define UBLKSRV_IO_IDLE_SECS 20 #define UBLK_IO_MAX_BYTES (1 << 20) -#define UBLK_MAX_QUEUES 4 -#define UBLK_QUEUE_DEPTH 128 +#define UBLK_MAX_QUEUES 32 +#define UBLK_QUEUE_DEPTH 1024 #define UBLK_DBG_DEV (1U << 0) #define UBLK_DBG_QUEUE (1U << 1) From 810b88f3dcb6d04e274b37d05f421330e20a3714 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 12 Apr 2025 10:30:26 +0800 Subject: [PATCH 0789/2924] selftests: ublk: support target specific command line Support target specific command line for making related command line code handling more readable & clean. Also helps for adding new features. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250412023035.2649275-11-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.c | 59 +++++++++++++++++++++++---- tools/testing/selftests/ublk/kublk.h | 20 +++++++-- tools/testing/selftests/ublk/stripe.c | 28 ++++++++++++- 3 files changed, 95 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 5e805d358739f9..03b3d6427775f7 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -5,6 +5,8 @@ #include "kublk.h" +#define MAX_NR_TGT_ARG 64 + unsigned int ublk_dbg_mask = UBLK_LOG; static const struct ublk_tgt_ops *tgt_ops_list[] = { &null_tgt_ops, @@ -1202,12 +1204,25 @@ static int cmd_dev_get_features(void) static int cmd_dev_help(char *exe) { - printf("%s add -t [null|loop] [-q nr_queues] [-d depth] [-n dev_id] [backfile1] [backfile2] ...\n", exe); - printf("\t default: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); + int i; + + printf("%s add -t [null|loop|stripe] [-q nr_queues] [-d depth] [-n dev_id]\n", exe); + printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask]\n"); + printf("\t[target options] [backfile1] [backfile2] ...\n"); + printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); + + for (i = 0; i < sizeof(tgt_ops_list) / sizeof(tgt_ops_list[0]); i++) { + const struct ublk_tgt_ops *ops = tgt_ops_list[i]; + + if (ops->usage) + ops->usage(ops); + } + printf("\n"); + printf("%s del [-n dev_id] -a \n", exe); - printf("\t -a delete all devices -n delete specified device\n"); + printf("\t -a delete all devices -n delete specified device\n\n"); printf("%s list [-n dev_id] -a \n", exe); - printf("\t -a list all devices, -n list specified device, default -a \n"); + printf("\t -a list all devices, -n list specified device, default -a \n\n"); printf("%s features\n", exe); return 0; } @@ -1224,9 +1239,9 @@ int main(int argc, char *argv[]) { "quiet", 0, NULL, 0 }, { "zero_copy", 0, NULL, 'z' }, { "foreground", 0, NULL, 0 }, - { "chunk_size", 1, NULL, 0 }, { 0, 0, 0, 0 } }; + const struct ublk_tgt_ops *ops = NULL; int option_idx, opt; const char *cmd = argv[1]; struct dev_ctx ctx = { @@ -1234,13 +1249,15 @@ int main(int argc, char *argv[]) .nr_hw_queues = 2, .dev_id = -1, .tgt_type = "unknown", - .chunk_size = 65536, /* def chunk size is 64K */ }; int ret = -EINVAL, i; + int tgt_argc = 1; + char *tgt_argv[MAX_NR_TGT_ARG] = { NULL }; if (argc == 1) return ret; + opterr = 0; optind = 2; while ((opt = getopt_long(argc, argv, "t:n:d:q:az", longopts, &option_idx)) != -1) { @@ -1271,8 +1288,26 @@ int main(int argc, char *argv[]) ublk_dbg_mask = 0; if (!strcmp(longopts[option_idx].name, "foreground")) ctx.fg = 1; - if (!strcmp(longopts[option_idx].name, "chunk_size")) - ctx.chunk_size = strtol(optarg, NULL, 10); + break; + case '?': + /* + * target requires every option must have argument + */ + if (argv[optind][0] == '-' || argv[optind - 1][0] != '-') { + fprintf(stderr, "every target option requires argument: %s %s\n", + argv[optind - 1], argv[optind]); + exit(EXIT_FAILURE); + } + + if (tgt_argc < (MAX_NR_TGT_ARG - 1) / 2) { + tgt_argv[tgt_argc++] = argv[optind - 1]; + tgt_argv[tgt_argc++] = argv[optind]; + } else { + fprintf(stderr, "too many target options\n"); + exit(EXIT_FAILURE); + } + optind += 1; + break; } } @@ -1281,6 +1316,14 @@ int main(int argc, char *argv[]) ctx.files[ctx.nr_files++] = argv[i++]; } + ops = ublk_find_tgt(ctx.tgt_type); + if (ops && ops->parse_cmd_line) { + optind = 0; + + tgt_argv[0] = ctx.tgt_type; + ops->parse_cmd_line(&ctx, tgt_argc, tgt_argv); + } + if (!strcmp(cmd, "add")) ret = cmd_dev_add(&ctx); else if (!strcmp(cmd, "del")) diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 9b77137b87008b..7b7446359c8f11 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -63,6 +63,11 @@ struct ublk_dev; struct ublk_queue; +struct stripe_ctx { + /* stripe */ + unsigned int chunk_size; +}; + struct dev_ctx { char tgt_type[16]; unsigned long flags; @@ -75,14 +80,15 @@ struct dev_ctx { unsigned int all:1; unsigned int fg:1; - /* stripe */ - unsigned int chunk_size; - int _evtfd; int _shmid; /* built from shmem, only for ublk_dump_dev() */ struct ublk_dev *shadow_dev; + + union { + struct stripe_ctx stripe; + }; }; struct ublk_ctrl_cmd_data { @@ -119,6 +125,14 @@ struct ublk_tgt_ops { int (*queue_io)(struct ublk_queue *, int tag); void (*tgt_io_done)(struct ublk_queue *, int tag, const struct io_uring_cqe *); + + /* + * Target specific command line handling + * + * each option requires argument for target command line + */ + void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]); + void (*usage)(const struct ublk_tgt_ops *ops); }; struct ublk_tgt { diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c index 179731c3dd6fec..5dbd6392d83de2 100644 --- a/tools/testing/selftests/ublk/stripe.c +++ b/tools/testing/selftests/ublk/stripe.c @@ -281,7 +281,7 @@ static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) .max_sectors = dev->dev_info.max_io_buf_bytes >> 9, }, }; - unsigned chunk_size = ctx->chunk_size; + unsigned chunk_size = ctx->stripe.chunk_size; struct stripe_conf *conf; unsigned chunk_shift; loff_t bytes = 0; @@ -344,10 +344,36 @@ static void ublk_stripe_tgt_deinit(struct ublk_dev *dev) backing_file_tgt_deinit(dev); } +static void ublk_stripe_cmd_line(struct dev_ctx *ctx, int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "chunk_size", 1, NULL, 0 }, + { 0, 0, 0, 0 } + }; + int option_idx, opt; + + ctx->stripe.chunk_size = 65536; + while ((opt = getopt_long(argc, argv, "", + longopts, &option_idx)) != -1) { + switch (opt) { + case 0: + if (!strcmp(longopts[option_idx].name, "chunk_size")) + ctx->stripe.chunk_size = strtol(optarg, NULL, 10); + } + } +} + +static void ublk_stripe_usage(const struct ublk_tgt_ops *ops) +{ + printf("\tstripe: [--chunk_size chunk_size (default 65536)]\n"); +} + const struct ublk_tgt_ops stripe_tgt_ops = { .name = "stripe", .init_tgt = ublk_stripe_tgt_init, .deinit_tgt = ublk_stripe_tgt_deinit, .queue_io = ublk_stripe_queue_io, .tgt_io_done = ublk_stripe_io_done, + .parse_cmd_line = ublk_stripe_cmd_line, + .usage = ublk_stripe_usage, }; From 57e13a2e8cd208db254968631820fc1353da9db0 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 12 Apr 2025 10:30:27 +0800 Subject: [PATCH 0790/2924] selftests: ublk: support user recovery Add user recovery feature. Meantime add user recovery test: generic_04 and generic_05(zero copy) Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250412023035.2649275-12-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/Makefile | 2 + tools/testing/selftests/ublk/kublk.c | 96 +++++++++++++++++-- tools/testing/selftests/ublk/kublk.h | 1 + tools/testing/selftests/ublk/test_common.sh | 57 ++++++++++- .../testing/selftests/ublk/test_generic_04.sh | 40 ++++++++ .../testing/selftests/ublk/test_generic_05.sh | 44 +++++++++ 6 files changed, 230 insertions(+), 10 deletions(-) create mode 100755 tools/testing/selftests/ublk/test_generic_04.sh create mode 100755 tools/testing/selftests/ublk/test_generic_05.sh diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index 7311e8f6bee7aa..d93373384e931f 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -6,6 +6,8 @@ LDLIBS += -lpthread -lm -luring TEST_PROGS := test_generic_01.sh TEST_PROGS += test_generic_02.sh TEST_PROGS += test_generic_03.sh +TEST_PROGS += test_generic_04.sh +TEST_PROGS += test_generic_05.sh TEST_PROGS += test_null_01.sh TEST_PROGS += test_null_02.sh diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 03b3d6427775f7..0cd6dce3f30392 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -119,6 +119,27 @@ static int ublk_ctrl_start_dev(struct ublk_dev *dev, return __ublk_ctrl_cmd(dev, &data); } +static int ublk_ctrl_start_user_recovery(struct ublk_dev *dev) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_START_USER_RECOVERY, + }; + + return __ublk_ctrl_cmd(dev, &data); +} + +static int ublk_ctrl_end_user_recovery(struct ublk_dev *dev, int daemon_pid) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_END_USER_RECOVERY, + .flags = CTRL_CMD_HAS_DATA, + }; + + dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid; + + return __ublk_ctrl_cmd(dev, &data); +} + static int ublk_ctrl_add_dev(struct ublk_dev *dev) { struct ublk_ctrl_cmd_data data = { @@ -812,8 +833,12 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) free(affinity_buf); /* everything is fine now, start us */ - ublk_set_parameters(dev); - ret = ublk_ctrl_start_dev(dev, getpid()); + if (ctx->recovery) + ret = ublk_ctrl_end_user_recovery(dev, getpid()); + else { + ublk_set_parameters(dev); + ret = ublk_ctrl_start_dev(dev, getpid()); + } if (ret < 0) { ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret); goto fail; @@ -988,7 +1013,10 @@ static int __cmd_dev_add(const struct dev_ctx *ctx) } } - ret = ublk_ctrl_add_dev(dev); + if (ctx->recovery) + ret = ublk_ctrl_start_user_recovery(dev); + else + ret = ublk_ctrl_add_dev(dev); if (ret < 0) { ublk_err("%s: can't add dev id %d, type %s ret %d\n", __func__, dev_id, tgt_type, ret); @@ -1202,12 +1230,14 @@ static int cmd_dev_get_features(void) return ret; } -static int cmd_dev_help(char *exe) +static void __cmd_create_help(char *exe, bool recovery) { int i; - printf("%s add -t [null|loop|stripe] [-q nr_queues] [-d depth] [-n dev_id]\n", exe); - printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask]\n"); + printf("%s %s -t [null|loop|stripe] [-q nr_queues] [-d depth] [-n dev_id]\n", + exe, recovery ? "recover" : "add"); + printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask] [-r 0|1 ] [-g 0|1]\n"); + printf("\t[-e 0|1 ] [-i 0|1]\n"); printf("\t[target options] [backfile1] [backfile2] ...\n"); printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); @@ -1217,7 +1247,25 @@ static int cmd_dev_help(char *exe) if (ops->usage) ops->usage(ops); } +} + +static void cmd_add_help(char *exe) +{ + __cmd_create_help(exe, false); + printf("\n"); +} + +static void cmd_recover_help(char *exe) +{ + __cmd_create_help(exe, true); + printf("\tPlease provide exact command line for creating this device with real dev_id\n"); printf("\n"); +} + +static int cmd_dev_help(char *exe) +{ + cmd_add_help(exe); + cmd_recover_help(exe); printf("%s del [-n dev_id] -a \n", exe); printf("\t -a delete all devices -n delete specified device\n\n"); @@ -1239,6 +1287,10 @@ int main(int argc, char *argv[]) { "quiet", 0, NULL, 0 }, { "zero_copy", 0, NULL, 'z' }, { "foreground", 0, NULL, 0 }, + { "recovery", 1, NULL, 'r' }, + { "recovery_fail_io", 1, NULL, 'e'}, + { "recovery_reissue", 1, NULL, 'i'}, + { "get_data", 1, NULL, 'g'}, { 0, 0, 0, 0 } }; const struct ublk_tgt_ops *ops = NULL; @@ -1253,13 +1305,14 @@ int main(int argc, char *argv[]) int ret = -EINVAL, i; int tgt_argc = 1; char *tgt_argv[MAX_NR_TGT_ARG] = { NULL }; + int value; if (argc == 1) return ret; opterr = 0; optind = 2; - while ((opt = getopt_long(argc, argv, "t:n:d:q:az", + while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:az", longopts, &option_idx)) != -1) { switch (opt) { case 'a': @@ -1281,6 +1334,25 @@ int main(int argc, char *argv[]) case 'z': ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_USER_COPY; break; + case 'r': + value = strtol(optarg, NULL, 10); + if (value) + ctx.flags |= UBLK_F_USER_RECOVERY; + break; + case 'e': + value = strtol(optarg, NULL, 10); + if (value) + ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_FAIL_IO; + break; + case 'i': + value = strtol(optarg, NULL, 10); + if (value) + ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE; + break; + case 'g': + value = strtol(optarg, NULL, 10); + if (value) + ctx.flags |= UBLK_F_NEED_GET_DATA; case 0: if (!strcmp(longopts[option_idx].name, "debug_mask")) ublk_dbg_mask = strtol(optarg, NULL, 16); @@ -1326,7 +1398,15 @@ int main(int argc, char *argv[]) if (!strcmp(cmd, "add")) ret = cmd_dev_add(&ctx); - else if (!strcmp(cmd, "del")) + else if (!strcmp(cmd, "recover")) { + if (ctx.dev_id < 0) { + fprintf(stderr, "device id isn't provided for recovering\n"); + ret = -EINVAL; + } else { + ctx.recovery = 1; + ret = cmd_dev_add(&ctx); + } + } else if (!strcmp(cmd, "del")) ret = cmd_dev_del(&ctx); else if (!strcmp(cmd, "list")) { ctx.all = 1; diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 7b7446359c8f11..3d2b9f14491c33 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -79,6 +79,7 @@ struct dev_ctx { unsigned int logging:1; unsigned int all:1; unsigned int fg:1; + unsigned int recovery:1; int _evtfd; int _shmid; diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh index 87fd0c824b770b..e822b2a2729a85 100755 --- a/tools/testing/selftests/ublk/test_common.sh +++ b/tools/testing/selftests/ublk/test_common.sh @@ -169,8 +169,11 @@ _have_feature() return 1 } -_add_ublk_dev() { +_create_ublk_dev() { local dev_id; + local cmd=$1 + + shift 1 if [ ! -c /dev/ublk-control ]; then return ${UBLK_SKIP_CODE} @@ -181,7 +184,7 @@ _add_ublk_dev() { fi fi - if ! dev_id=$("${UBLK_PROG}" add "$@" | grep "dev id" | awk -F '[ :]' '{print $3}'); then + if ! dev_id=$("${UBLK_PROG}" "$cmd" "$@" | grep "dev id" | awk -F '[ :]' '{print $3}'); then echo "fail to add ublk dev $*" return 255 fi @@ -194,6 +197,23 @@ _add_ublk_dev() { fi } +_add_ublk_dev() { + _create_ublk_dev "add" "$@" +} + +_recover_ublk_dev() { + local dev_id + local state + + dev_id=$(_create_ublk_dev "recover" "$@") + for ((j=0;j<20;j++)); do + state=$(_get_ublk_dev_state "${dev_id}") + [ "$state" == "LIVE" ] && break + sleep 1 + done + echo "$state" +} + # kill the ublk daemon and return ublk device state __ublk_kill_daemon() { @@ -280,6 +300,39 @@ run_io_and_kill_daemon() fi } +run_io_and_recover() +{ + local state + local dev_id + + dev_id=$(_add_ublk_dev "$@") + _check_add_dev "$TID" $? + + fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \ + --rw=readwrite --iodepth=256 --size="${size}" --numjobs=4 \ + --runtime=20 --time_based > /dev/null 2>&1 & + sleep 4 + + state=$(__ublk_kill_daemon "${dev_id}" "QUIESCED") + if [ "$state" != "QUIESCED" ]; then + echo "device isn't quiesced($state) after killing daemon" + return 255 + fi + + state=$(_recover_ublk_dev -n "$dev_id" "$@") + if [ "$state" != "LIVE" ]; then + echo "faile to recover to LIVE($state)" + return 255 + fi + + if ! __remove_ublk_dev_return "${dev_id}"; then + echo "delete dev ${dev_id} failed" + return 255 + fi + wait +} + + _ublk_test_top_dir() { cd "$(dirname "$0")" && pwd diff --git a/tools/testing/selftests/ublk/test_generic_04.sh b/tools/testing/selftests/ublk/test_generic_04.sh new file mode 100755 index 00000000000000..8a3bc080c5771e --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_04.sh @@ -0,0 +1,40 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_04" +ERR_CODE=0 + +ublk_run_recover_test() +{ + run_io_and_recover "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "recover" "basic recover function verification" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_run_recover_test -t null -q 2 -r 1 & +ublk_run_recover_test -t loop -q 2 -r 1 "${UBLK_BACKFILES[0]}" & +ublk_run_recover_test -t stripe -q 2 -r 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +ublk_run_recover_test -t null -q 2 -r 1 -i 1 & +ublk_run_recover_test -t loop -q 2 -r 1 -i 1 "${UBLK_BACKFILES[0]}" & +ublk_run_recover_test -t stripe -q 2 -r 1 -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +_cleanup_test "recover" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_05.sh b/tools/testing/selftests/ublk/test_generic_05.sh new file mode 100755 index 00000000000000..714630b4b3295f --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_05.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_04" +ERR_CODE=0 + +ublk_run_recover_test() +{ + run_io_and_recover "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +if ! _have_feature "ZERO_COPY"; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "recover" "basic recover function verification (zero copy)" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_run_recover_test -t null -q 2 -r 1 -z & +ublk_run_recover_test -t loop -q 2 -r 1 -z "${UBLK_BACKFILES[0]}" & +ublk_run_recover_test -t stripe -q 2 -r 1 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +ublk_run_recover_test -t null -q 2 -r 1 -z -i 1 & +ublk_run_recover_test -t loop -q 2 -r 1 -z -i 1 "${UBLK_BACKFILES[0]}" & +ublk_run_recover_test -t stripe -q 2 -r 1 -z -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +_cleanup_test "recover" +_show_result $TID $ERR_CODE From 2f9a30bd16643d842da0921dc37bf00c750b0a8b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 12 Apr 2025 10:30:28 +0800 Subject: [PATCH 0791/2924] selftests: ublk: add test_stress_05.sh Add test_stress_05.sh for covering removing device with recovery enabled. io-hang has been observed with the following patch: https://lore.kernel.org/linux-block/20250403-ublk_timeout-v3-1-aa09f76c7451@purestorage.com/ Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250412023035.2649275-13-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/Makefile | 1 + .../testing/selftests/ublk/test_stress_05.sh | 64 +++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100755 tools/testing/selftests/ublk/test_stress_05.sh diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index d93373384e931f..dddc64036aa1b6 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -25,6 +25,7 @@ TEST_PROGS += test_stress_01.sh TEST_PROGS += test_stress_02.sh TEST_PROGS += test_stress_03.sh TEST_PROGS += test_stress_04.sh +TEST_PROGS += test_stress_05.sh TEST_GEN_PROGS_EXTENDED = kublk diff --git a/tools/testing/selftests/ublk/test_stress_05.sh b/tools/testing/selftests/ublk/test_stress_05.sh new file mode 100755 index 00000000000000..a7071b10224d9f --- /dev/null +++ b/tools/testing/selftests/ublk/test_stress_05.sh @@ -0,0 +1,64 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh +TID="stress_05" +ERR_CODE=0 + +run_io_and_remove() +{ + local size=$1 + local dev_id + local dev_pid + shift 1 + + dev_id=$(_add_ublk_dev "$@") + _check_add_dev $TID $? + + [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)" + + fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \ + --rw=readwrite --iodepth=128 --size="${size}" --numjobs=4 \ + --runtime=40 --time_based > /dev/null 2>&1 & + sleep 4 + + dev_pid=$(_get_ublk_daemon_pid "$dev_id") + kill -9 "$dev_pid" + + if ! __remove_ublk_dev_return "${dev_id}"; then + echo "delete dev ${dev_id} failed" + return 255 + fi +} + +ublk_io_and_remove() +{ + run_io_and_remove "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +_prep_test "stress" "run IO and remove device with recovery enabled" + +_create_backfile 0 256M +_create_backfile 1 256M + +for reissue in $(seq 0 1); do + ublk_io_and_remove 8G -t null -q 4 -g 1 -r 1 -i "$reissue" & + ublk_io_and_remove 256M -t loop -q 4 -g 1 -r 1 -i "$reissue" "${UBLK_BACKFILES[0]}" & + wait +done + +if _have_feature "ZERO_COPY"; then + for reissue in $(seq 0 1); do + ublk_io_and_remove 8G -t null -q 4 -g 1 -z -r 1 -i "$reissue" & + ublk_io_and_remove 256M -t loop -q 4 -g 1 -z -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" & + wait + done +fi + +_cleanup_test "stress" +_show_result $TID $ERR_CODE From 3bf540609cab0402a7c3e40c1425532f3376318a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 12 Apr 2025 10:30:29 +0800 Subject: [PATCH 0792/2924] selftests: ublk: move creating UBLK_TMP into _prep_test() test may exit early because of missing program or not having required feature before calling _prep_test(), then $UBLK_TMP isn't cleaned. Fix it by moving creating $UBLK_TMP into _prep_test(), any resources created since _prep_test() will be cleaned by _cleanup_test(). Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250412023035.2649275-14-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/test_common.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh index e822b2a2729a85..9fc111f64576f9 100755 --- a/tools/testing/selftests/ublk/test_common.sh +++ b/tools/testing/selftests/ublk/test_common.sh @@ -114,6 +114,7 @@ _prep_test() { local type=$1 shift 1 modprobe ublk_drv > /dev/null 2>&1 + UBLK_TMP=$(mktemp ublk_test_XXXXX) [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "ublk $type: $*" } @@ -338,7 +339,6 @@ _ublk_test_top_dir() cd "$(dirname "$0")" && pwd } -UBLK_TMP=$(mktemp ublk_test_XXXXX) UBLK_PROG=$(_ublk_test_top_dir)/kublk UBLK_TEST_QUIET=1 UBLK_TEST_SHOW_RESULT=1 From b69b8edfb27dfa563cd53f590ec42b481f9eb174 Mon Sep 17 00:00:00 2001 From: Uday Shankar Date: Wed, 16 Apr 2025 11:54:35 +0800 Subject: [PATCH 0793/2924] ublk: properly serialize all FETCH_REQs Most uring_cmds issued against ublk character devices are serialized because each command affects only one queue, and there is an early check which only allows a single task (the queue's ubq_daemon) to issue uring_cmds against that queue. However, this mechanism does not work for FETCH_REQs, since they are expected before ubq_daemon is set. Since FETCH_REQs are only used at initialization and not in the fast path, serialize them using the per-ublk-device mutex. This fixes a number of data races that were previously possible if a badly behaved ublk server decided to issue multiple FETCH_REQs against the same qid/tag concurrently. Reported-by: Caleb Sander Mateos Signed-off-by: Uday Shankar Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250416035444.99569-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 77 +++++++++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 28 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index cdb1543fa4a981..bf47f9cb83293e 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1832,8 +1832,8 @@ static void ublk_nosrv_work(struct work_struct *work) /* device can only be started after all IOs are ready */ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq) + __must_hold(&ub->mutex) { - mutex_lock(&ub->mutex); ubq->nr_io_ready++; if (ublk_queue_ready(ubq)) { ubq->ubq_daemon = current; @@ -1845,7 +1845,6 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq) } if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues) complete_all(&ub->completion); - mutex_unlock(&ub->mutex); } static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id, @@ -1929,6 +1928,52 @@ static int ublk_unregister_io_buf(struct io_uring_cmd *cmd, return io_buffer_unregister_bvec(cmd, index, issue_flags); } +static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq, + struct ublk_io *io, __u64 buf_addr) +{ + struct ublk_device *ub = ubq->dev; + int ret = 0; + + /* + * When handling FETCH command for setting up ublk uring queue, + * ub->mutex is the innermost lock, and we won't block for handling + * FETCH, so it is fine even for IO_URING_F_NONBLOCK. + */ + mutex_lock(&ub->mutex); + /* UBLK_IO_FETCH_REQ is only allowed before queue is setup */ + if (ublk_queue_ready(ubq)) { + ret = -EBUSY; + goto out; + } + + /* allow each command to be FETCHed at most once */ + if (io->flags & UBLK_IO_FLAG_ACTIVE) { + ret = -EINVAL; + goto out; + } + + WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV); + + if (ublk_need_map_io(ubq)) { + /* + * FETCH_RQ has to provide IO buffer if NEED GET + * DATA is not enabled + */ + if (!buf_addr && !ublk_need_get_data(ubq)) + goto out; + } else if (buf_addr) { + /* User copy requires addr to be unset */ + ret = -EINVAL; + goto out; + } + + ublk_fill_io_cmd(io, cmd, buf_addr); + ublk_mark_io_ready(ub, ubq); +out: + mutex_unlock(&ub->mutex); + return ret; +} + static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags, const struct ublksrv_io_cmd *ub_cmd) @@ -1985,33 +2030,9 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, case UBLK_IO_UNREGISTER_IO_BUF: return ublk_unregister_io_buf(cmd, ub_cmd->addr, issue_flags); case UBLK_IO_FETCH_REQ: - /* UBLK_IO_FETCH_REQ is only allowed before queue is setup */ - if (ublk_queue_ready(ubq)) { - ret = -EBUSY; - goto out; - } - /* - * The io is being handled by server, so COMMIT_RQ is expected - * instead of FETCH_REQ - */ - if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV) - goto out; - - if (ublk_need_map_io(ubq)) { - /* - * FETCH_RQ has to provide IO buffer if NEED GET - * DATA is not enabled - */ - if (!ub_cmd->addr && !ublk_need_get_data(ubq)) - goto out; - } else if (ub_cmd->addr) { - /* User copy requires addr to be unset */ - ret = -EINVAL; + ret = ublk_fetch(cmd, ubq, io, ub_cmd->addr); + if (ret) goto out; - } - - ublk_fill_io_cmd(io, cmd, ub_cmd->addr); - ublk_mark_io_ready(ub, ubq); break; case UBLK_IO_COMMIT_AND_FETCH_REQ: req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag); From 00b3b0d7cb454d614117c93f33351cdcd20b5b93 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 16 Apr 2025 11:54:36 +0800 Subject: [PATCH 0794/2924] ublk: add ublk_force_abort_dev() Add ublk_force_abort_dev() for handling ublk_nosrv_dev_should_queue_io() in ublk_stop_dev(). Then queue quiesce and unquiesce can be paired in single function. Meantime not change device state to QUIESCED any more, since the disk is going to be removed soon. Reviewed-by: Uday Shankar Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250416035444.99569-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index bf47f9cb83293e..e1b4db2f8a56ec 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1743,22 +1743,20 @@ static void __ublk_quiesce_dev(struct ublk_device *ub) ub->dev_info.state = UBLK_S_DEV_QUIESCED; } -static void ublk_unquiesce_dev(struct ublk_device *ub) +static void ublk_force_abort_dev(struct ublk_device *ub) { int i; - pr_devel("%s: unquiesce ub: dev_id %d state %s\n", + pr_devel("%s: force abort ub: dev_id %d state %s\n", __func__, ub->dev_info.dev_id, ub->dev_info.state == UBLK_S_DEV_LIVE ? "LIVE" : "QUIESCED"); - /* quiesce_work has run. We let requeued rqs be aborted - * before running fallback_wq. "force_abort" must be seen - * after request queue is unqiuesced. Then del_gendisk() - * can move on. - */ + blk_mq_quiesce_queue(ub->ub_disk->queue); + if (ub->dev_info.state == UBLK_S_DEV_LIVE) + ublk_wait_tagset_rqs_idle(ub); + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) ublk_get_queue(ub, i)->force_abort = true; - blk_mq_unquiesce_queue(ub->ub_disk->queue); /* We may have requeued some rqs in ublk_quiesce_queue() */ blk_mq_kick_requeue_list(ub->ub_disk->queue); @@ -1786,11 +1784,8 @@ static void ublk_stop_dev(struct ublk_device *ub) mutex_lock(&ub->mutex); if (ub->dev_info.state == UBLK_S_DEV_DEAD) goto unlock; - if (ublk_nosrv_dev_should_queue_io(ub)) { - if (ub->dev_info.state == UBLK_S_DEV_LIVE) - __ublk_quiesce_dev(ub); - ublk_unquiesce_dev(ub); - } + if (ublk_nosrv_dev_should_queue_io(ub)) + ublk_force_abort_dev(ub); del_gendisk(ub->ub_disk); disk = ublk_detach_disk(ub); put_disk(disk); From 7e26cb69c5e62152a6f05a2ae23605a983a8ef31 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 16 Apr 2025 11:54:37 +0800 Subject: [PATCH 0795/2924] ublk: rely on ->canceling for dealing with ublk_nosrv_dev_should_queue_io Now ublk deals with ublk_nosrv_dev_should_queue_io() by keeping request queue as quiesced. This way is fragile because queue quiesce crosses syscalls or process contexts. Switch to rely on ubq->canceling for dealing with ublk_nosrv_dev_should_queue_io(), because it has been used for this purpose during io_uring context exiting, and it can be reused before recovering too. In ublk_queue_rq(), the request will be added to requeue list without kicking off requeue in case of ubq->canceling, and finally requests added in requeue list will be dispatched from either ublk_stop_dev() or ublk_ctrl_end_recovery(). Meantime we have to move reset of ubq->canceling from ublk_ctrl_start_recovery() to ublk_ctrl_end_recovery(), when IO handling can be recovered completely. Then blk_mq_quiesce_queue() and blk_mq_unquiesce_queue() are always used in same context. Signed-off-by: Ming Lei Reviewed-by: Uday Shankar Link: https://lore.kernel.org/r/20250416035444.99569-4-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index e1b4db2f8a56ec..a479969fd77e4d 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1734,13 +1734,19 @@ static void ublk_wait_tagset_rqs_idle(struct ublk_device *ub) static void __ublk_quiesce_dev(struct ublk_device *ub) { + int i; + pr_devel("%s: quiesce ub: dev_id %d state %s\n", __func__, ub->dev_info.dev_id, ub->dev_info.state == UBLK_S_DEV_LIVE ? "LIVE" : "QUIESCED"); blk_mq_quiesce_queue(ub->ub_disk->queue); + /* mark every queue as canceling */ + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) + ublk_get_queue(ub, i)->canceling = true; ublk_wait_tagset_rqs_idle(ub); ub->dev_info.state = UBLK_S_DEV_QUIESCED; + blk_mq_unquiesce_queue(ub->ub_disk->queue); } static void ublk_force_abort_dev(struct ublk_device *ub) @@ -2961,7 +2967,6 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq) /* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */ ubq->ubq_daemon = NULL; ubq->timeout = false; - ubq->canceling = false; for (i = 0; i < ubq->q_depth; i++) { struct ublk_io *io = &ubq->ios[i]; @@ -3048,20 +3053,18 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub, pr_devel("%s: new ublksrv_pid %d, dev id %d\n", __func__, ublksrv_pid, header->dev_id); - if (ublk_nosrv_dev_should_queue_io(ub)) { - ub->dev_info.state = UBLK_S_DEV_LIVE; - blk_mq_unquiesce_queue(ub->ub_disk->queue); - pr_devel("%s: queue unquiesced, dev id %d.\n", - __func__, header->dev_id); - blk_mq_kick_requeue_list(ub->ub_disk->queue); - } else { - blk_mq_quiesce_queue(ub->ub_disk->queue); - ub->dev_info.state = UBLK_S_DEV_LIVE; - for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { - ublk_get_queue(ub, i)->fail_io = false; - } - blk_mq_unquiesce_queue(ub->ub_disk->queue); + blk_mq_quiesce_queue(ub->ub_disk->queue); + ub->dev_info.state = UBLK_S_DEV_LIVE; + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { + struct ublk_queue *ubq = ublk_get_queue(ub, i); + + ubq->canceling = false; + ubq->fail_io = false; } + blk_mq_unquiesce_queue(ub->ub_disk->queue); + pr_devel("%s: queue unquiesced, dev id %d.\n", + __func__, header->dev_id); + blk_mq_kick_requeue_list(ub->ub_disk->queue); ret = 0; out_unlock: From 728cbac5fe219d3b8a21a0688a08f2b7f8aeda2b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 16 Apr 2025 11:54:38 +0800 Subject: [PATCH 0796/2924] ublk: move device reset into ublk_ch_release() ublk_ch_release() is called after ublk char device is closed, when all uring_cmd are done, so it is perfect fine to move ublk device reset to ublk_ch_release() from ublk_ctrl_start_recovery(). This way can avoid to grab the exiting daemon task_struct too long. However, reset of the following ublk IO flags has to be moved until ublk io_uring queues are ready: - ubq->canceling For requeuing IO in case of ublk_nosrv_dev_should_queue_io() before device is recovered - ubq->fail_io For failing IO in case of UBLK_F_USER_RECOVERY_FAIL_IO before device is recovered - ublk_io->flags For preventing using io->cmd With this way, recovery is simplified a lot. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250416035444.99569-5-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 121 +++++++++++++++++++++++---------------- 1 file changed, 72 insertions(+), 49 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index a479969fd77e4d..1fe39cf85b2f4d 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1074,7 +1074,7 @@ static inline struct ublk_uring_cmd_pdu *ublk_get_uring_cmd_pdu( static inline bool ubq_daemon_is_dying(struct ublk_queue *ubq) { - return ubq->ubq_daemon->flags & PF_EXITING; + return !ubq->ubq_daemon || ubq->ubq_daemon->flags & PF_EXITING; } /* todo: handle partial completion */ @@ -1470,6 +1470,37 @@ static const struct blk_mq_ops ublk_mq_ops = { .timeout = ublk_timeout, }; +static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq) +{ + int i; + + /* All old ioucmds have to be completed */ + ubq->nr_io_ready = 0; + + /* + * old daemon is PF_EXITING, put it now + * + * It could be NULL in case of closing one quisced device. + */ + if (ubq->ubq_daemon) + put_task_struct(ubq->ubq_daemon); + /* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */ + ubq->ubq_daemon = NULL; + ubq->timeout = false; + + for (i = 0; i < ubq->q_depth; i++) { + struct ublk_io *io = &ubq->ios[i]; + + /* + * UBLK_IO_FLAG_CANCELED is kept for avoiding to touch + * io->cmd + */ + io->flags &= UBLK_IO_FLAG_CANCELED; + io->cmd = NULL; + io->addr = 0; + } +} + static int ublk_ch_open(struct inode *inode, struct file *filp) { struct ublk_device *ub = container_of(inode->i_cdev, @@ -1481,10 +1512,26 @@ static int ublk_ch_open(struct inode *inode, struct file *filp) return 0; } +static void ublk_reset_ch_dev(struct ublk_device *ub) +{ + int i; + + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) + ublk_queue_reinit(ub, ublk_get_queue(ub, i)); + + /* set to NULL, otherwise new ubq_daemon cannot mmap the io_cmd_buf */ + ub->mm = NULL; + ub->nr_queues_ready = 0; + ub->nr_privileged_daemon = 0; +} + static int ublk_ch_release(struct inode *inode, struct file *filp) { struct ublk_device *ub = filp->private_data; + /* all uring_cmd has been done now, reset device & ubq */ + ublk_reset_ch_dev(ub); + clear_bit(UB_STATE_OPEN, &ub->state); return 0; } @@ -1831,6 +1878,24 @@ static void ublk_nosrv_work(struct work_struct *work) ublk_cancel_dev(ub); } +/* reset ublk io_uring queue & io flags */ +static void ublk_reset_io_flags(struct ublk_device *ub) +{ + int i, j; + + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { + struct ublk_queue *ubq = ublk_get_queue(ub, i); + + /* UBLK_IO_FLAG_CANCELED can be cleared now */ + spin_lock(&ubq->cancel_lock); + for (j = 0; j < ubq->q_depth; j++) + ubq->ios[j].flags &= ~UBLK_IO_FLAG_CANCELED; + spin_unlock(&ubq->cancel_lock); + ubq->canceling = false; + ubq->fail_io = false; + } +} + /* device can only be started after all IOs are ready */ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq) __must_hold(&ub->mutex) @@ -1844,8 +1909,12 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq) if (capable(CAP_SYS_ADMIN)) ub->nr_privileged_daemon++; } - if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues) + + if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues) { + /* now we are ready for handling ublk io request */ + ublk_reset_io_flags(ub); complete_all(&ub->completion); + } } static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id, @@ -2954,41 +3023,14 @@ static int ublk_ctrl_set_params(struct ublk_device *ub, return ret; } -static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq) -{ - int i; - - WARN_ON_ONCE(!(ubq->ubq_daemon && ubq_daemon_is_dying(ubq))); - - /* All old ioucmds have to be completed */ - ubq->nr_io_ready = 0; - /* old daemon is PF_EXITING, put it now */ - put_task_struct(ubq->ubq_daemon); - /* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */ - ubq->ubq_daemon = NULL; - ubq->timeout = false; - - for (i = 0; i < ubq->q_depth; i++) { - struct ublk_io *io = &ubq->ios[i]; - - /* forget everything now and be ready for new FETCH_REQ */ - io->flags = 0; - io->cmd = NULL; - io->addr = 0; - } -} - static int ublk_ctrl_start_recovery(struct ublk_device *ub, const struct ublksrv_ctrl_cmd *header) { int ret = -EINVAL; - int i; mutex_lock(&ub->mutex); if (ublk_nosrv_should_stop_dev(ub)) goto out_unlock; - if (!ub->nr_queues_ready) - goto out_unlock; /* * START_RECOVERY is only allowd after: * @@ -3012,12 +3054,6 @@ static int ublk_ctrl_start_recovery(struct ublk_device *ub, goto out_unlock; } pr_devel("%s: start recovery for dev id %d.\n", __func__, header->dev_id); - for (i = 0; i < ub->dev_info.nr_hw_queues; i++) - ublk_queue_reinit(ub, ublk_get_queue(ub, i)); - /* set to NULL, otherwise new ubq_daemon cannot mmap the io_cmd_buf */ - ub->mm = NULL; - ub->nr_queues_ready = 0; - ub->nr_privileged_daemon = 0; init_completion(&ub->completion); ret = 0; out_unlock: @@ -3030,7 +3066,6 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub, { int ublksrv_pid = (int)header->data[0]; int ret = -EINVAL; - int i; pr_devel("%s: Waiting for new ubq_daemons(nr: %d) are ready, dev id %d...\n", __func__, ub->dev_info.nr_hw_queues, header->dev_id); @@ -3050,22 +3085,10 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub, goto out_unlock; } ub->dev_info.ublksrv_pid = ublksrv_pid; + ub->dev_info.state = UBLK_S_DEV_LIVE; pr_devel("%s: new ublksrv_pid %d, dev id %d\n", __func__, ublksrv_pid, header->dev_id); - - blk_mq_quiesce_queue(ub->ub_disk->queue); - ub->dev_info.state = UBLK_S_DEV_LIVE; - for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { - struct ublk_queue *ubq = ublk_get_queue(ub, i); - - ubq->canceling = false; - ubq->fail_io = false; - } - blk_mq_unquiesce_queue(ub->ub_disk->queue); - pr_devel("%s: queue unquiesced, dev id %d.\n", - __func__, header->dev_id); blk_mq_kick_requeue_list(ub->ub_disk->queue); - ret = 0; out_unlock: mutex_unlock(&ub->mutex); From 82a8a30c581bbbe653d33c6ce2ef67e3072c7f12 Mon Sep 17 00:00:00 2001 From: Uday Shankar Date: Wed, 16 Apr 2025 11:54:39 +0800 Subject: [PATCH 0797/2924] ublk: improve detection and handling of ublk server exit There are currently two ways in which ublk server exit is detected by ublk_drv: 1. uring_cmd cancellation. If there are any outstanding uring_cmds which have not been completed to the ublk server when it exits, io_uring calls the uring_cmd callback with a special cancellation flag as the issuing task is exiting. 2. I/O timeout. This is needed in addition to the above to handle the "saturated queue" case, when all I/Os for a given queue are in the ublk server, and therefore there are no outstanding uring_cmds to cancel when the ublk server exits. There are a couple of issues with this approach: - It is complex and inelegant to have two methods to detect the same condition - The second method detects ublk server exit only after a long delay (~30s, the default timeout assigned by the block layer). This delays the nosrv behavior from kicking in and potential subsequent recovery of the device. The second issue is brought to light with the new test_generic_06 which will be added in following patch. It fails before this fix: selftests: ublk: test_generic_06.sh dev id is 0 dd: error writing '/dev/ublkb0': Input/output error 1+0 records in 0+0 records out 0 bytes copied, 30.0611 s, 0.0 kB/s DEAD dd took 31 seconds to exit (>= 5s tolerance)! generic_06 : [FAIL] Fix this by instead detecting and handling ublk server exit in the character file release callback. This has several advantages: - This one place can handle both saturated and unsaturated queues. Thus, it replaces both preexisting methods of detecting ublk server exit. - It runs quickly on ublk server exit - there is no 30s delay. - It starts the process of removing task references in ublk_drv. This is needed if we want to relax restrictions in the driver like letting only one thread serve each queue There is also the disadvantage that the character file release callback can also be triggered by intentional close of the file, which is a significant behavior change. Preexisting ublk servers (libublksrv) are dependent on the ability to open/close the file multiple times. To address this, only transition to a nosrv state if the file is released while the ublk device is live. This allows for programs to open/close the file multiple times during setup. It is still a behavior change if a ublk server decides to close/reopen the file while the device is LIVE (i.e. while it is responsible for serving I/O), but that would be highly unusual. This behavior is in line with what is done by FUSE, which is very similar to ublk in that a userspace daemon is providing services traditionally provided by the kernel. With this change in, the new test (and all other selftests, and all ublksrv tests) pass: selftests: ublk: test_generic_06.sh dev id is 0 dd: error writing '/dev/ublkb0': Input/output error 1+0 records in 0+0 records out 0 bytes copied, 0.0376731 s, 0.0 kB/s DEAD generic_04 : [PASS] Signed-off-by: Uday Shankar Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250416035444.99569-6-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 223 +++++++++++++++++++++------------------ 1 file changed, 123 insertions(+), 100 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 1fe39cf85b2f4d..b0a7e5acb2eb1c 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -199,8 +199,6 @@ struct ublk_device { struct completion completion; unsigned int nr_queues_ready; unsigned int nr_privileged_daemon; - - struct work_struct nosrv_work; }; /* header of ublk_params */ @@ -209,8 +207,9 @@ struct ublk_params_header { __u32 types; }; -static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq); - +static void ublk_stop_dev_unlocked(struct ublk_device *ub); +static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq); +static void __ublk_quiesce_dev(struct ublk_device *ub); static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, struct ublk_queue *ubq, int tag, size_t offset); static inline unsigned int ublk_req_build_flags(struct request *req); @@ -1336,8 +1335,6 @@ static void ublk_queue_cmd_list(struct ublk_queue *ubq, struct rq_list *l) static enum blk_eh_timer_return ublk_timeout(struct request *rq) { struct ublk_queue *ubq = rq->mq_hctx->driver_data; - unsigned int nr_inflight = 0; - int i; if (ubq->flags & UBLK_F_UNPRIVILEGED_DEV) { if (!ubq->timeout) { @@ -1348,26 +1345,6 @@ static enum blk_eh_timer_return ublk_timeout(struct request *rq) return BLK_EH_DONE; } - if (!ubq_daemon_is_dying(ubq)) - return BLK_EH_RESET_TIMER; - - for (i = 0; i < ubq->q_depth; i++) { - struct ublk_io *io = &ubq->ios[i]; - - if (!(io->flags & UBLK_IO_FLAG_ACTIVE)) - nr_inflight++; - } - - /* cancelable uring_cmd can't help us if all commands are in-flight */ - if (nr_inflight == ubq->q_depth) { - struct ublk_device *ub = ubq->dev; - - if (ublk_abort_requests(ub, ubq)) { - schedule_work(&ub->nosrv_work); - } - return BLK_EH_DONE; - } - return BLK_EH_RESET_TIMER; } @@ -1525,13 +1502,105 @@ static void ublk_reset_ch_dev(struct ublk_device *ub) ub->nr_privileged_daemon = 0; } +static struct gendisk *ublk_get_disk(struct ublk_device *ub) +{ + struct gendisk *disk; + + spin_lock(&ub->lock); + disk = ub->ub_disk; + if (disk) + get_device(disk_to_dev(disk)); + spin_unlock(&ub->lock); + + return disk; +} + +static void ublk_put_disk(struct gendisk *disk) +{ + if (disk) + put_device(disk_to_dev(disk)); +} + static int ublk_ch_release(struct inode *inode, struct file *filp) { struct ublk_device *ub = filp->private_data; + struct gendisk *disk; + int i; + + /* + * disk isn't attached yet, either device isn't live, or it has + * been removed already, so we needn't to do anything + */ + disk = ublk_get_disk(ub); + if (!disk) + goto out; + + /* + * All uring_cmd are done now, so abort any request outstanding to + * the ublk server + * + * This can be done in lockless way because ublk server has been + * gone + * + * More importantly, we have to provide forward progress guarantee + * without holding ub->mutex, otherwise control task grabbing + * ub->mutex triggers deadlock + * + * All requests may be inflight, so ->canceling may not be set, set + * it now. + */ + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { + struct ublk_queue *ubq = ublk_get_queue(ub, i); + + ubq->canceling = true; + ublk_abort_queue(ub, ubq); + } + blk_mq_kick_requeue_list(disk->queue); + + /* + * All infligh requests have been completed or requeued and any new + * request will be failed or requeued via `->canceling` now, so it is + * fine to grab ub->mutex now. + */ + mutex_lock(&ub->mutex); + + /* double check after grabbing lock */ + if (!ub->ub_disk) + goto unlock; + + /* + * Transition the device to the nosrv state. What exactly this + * means depends on the recovery flags + */ + blk_mq_quiesce_queue(disk->queue); + if (ublk_nosrv_should_stop_dev(ub)) { + /* + * Allow any pending/future I/O to pass through quickly + * with an error. This is needed because del_gendisk + * waits for all pending I/O to complete + */ + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) + ublk_get_queue(ub, i)->force_abort = true; + blk_mq_unquiesce_queue(disk->queue); + + ublk_stop_dev_unlocked(ub); + } else { + if (ublk_nosrv_dev_should_queue_io(ub)) { + __ublk_quiesce_dev(ub); + } else { + ub->dev_info.state = UBLK_S_DEV_FAIL_IO; + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) + ublk_get_queue(ub, i)->fail_io = true; + } + blk_mq_unquiesce_queue(disk->queue); + } +unlock: + mutex_unlock(&ub->mutex); + ublk_put_disk(disk); /* all uring_cmd has been done now, reset device & ubq */ ublk_reset_ch_dev(ub); - +out: clear_bit(UB_STATE_OPEN, &ub->state); return 0; } @@ -1627,37 +1696,22 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq) } /* Must be called when queue is frozen */ -static bool ublk_mark_queue_canceling(struct ublk_queue *ubq) +static void ublk_mark_queue_canceling(struct ublk_queue *ubq) { - bool canceled; - spin_lock(&ubq->cancel_lock); - canceled = ubq->canceling; - if (!canceled) + if (!ubq->canceling) ubq->canceling = true; spin_unlock(&ubq->cancel_lock); - - return canceled; } -static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq) +static void ublk_start_cancel(struct ublk_queue *ubq) { - bool was_canceled = ubq->canceling; - struct gendisk *disk; - - if (was_canceled) - return false; - - spin_lock(&ub->lock); - disk = ub->ub_disk; - if (disk) - get_device(disk_to_dev(disk)); - spin_unlock(&ub->lock); + struct ublk_device *ub = ubq->dev; + struct gendisk *disk = ublk_get_disk(ub); /* Our disk has been dead */ if (!disk) - return false; - + return; /* * Now we are serialized with ublk_queue_rq() * @@ -1666,15 +1720,9 @@ static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq) * touch completed uring_cmd */ blk_mq_quiesce_queue(disk->queue); - was_canceled = ublk_mark_queue_canceling(ubq); - if (!was_canceled) { - /* abort queue is for making forward progress */ - ublk_abort_queue(ub, ubq); - } + ublk_mark_queue_canceling(ubq); blk_mq_unquiesce_queue(disk->queue); - put_device(disk_to_dev(disk)); - - return !was_canceled; + ublk_put_disk(disk); } static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io, @@ -1698,6 +1746,17 @@ static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io, /* * The ublk char device won't be closed when calling cancel fn, so both * ublk device and queue are guaranteed to be live + * + * Two-stage cancel: + * + * - make every active uring_cmd done in ->cancel_fn() + * + * - aborting inflight ublk IO requests in ublk char device release handler, + * which depends on 1st stage because device can only be closed iff all + * uring_cmd are done + * + * Do _not_ try to acquire ub->mutex before all inflight requests are + * aborted, otherwise deadlock may be caused. */ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd, unsigned int issue_flags) @@ -1705,8 +1764,6 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd, struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); struct ublk_queue *ubq = pdu->ubq; struct task_struct *task; - struct ublk_device *ub; - bool need_schedule; struct ublk_io *io; if (WARN_ON_ONCE(!ubq)) @@ -1719,16 +1776,12 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd, if (WARN_ON_ONCE(task && task != ubq->ubq_daemon)) return; - ub = ubq->dev; - need_schedule = ublk_abort_requests(ub, ubq); + if (!ubq->canceling) + ublk_start_cancel(ubq); io = &ubq->ios[pdu->tag]; WARN_ON_ONCE(io->cmd != cmd); ublk_cancel_cmd(ubq, io, issue_flags); - - if (need_schedule) { - schedule_work(&ub->nosrv_work); - } } static inline bool ublk_queue_ready(struct ublk_queue *ubq) @@ -1787,13 +1840,11 @@ static void __ublk_quiesce_dev(struct ublk_device *ub) __func__, ub->dev_info.dev_id, ub->dev_info.state == UBLK_S_DEV_LIVE ? "LIVE" : "QUIESCED"); - blk_mq_quiesce_queue(ub->ub_disk->queue); /* mark every queue as canceling */ for (i = 0; i < ub->dev_info.nr_hw_queues; i++) ublk_get_queue(ub, i)->canceling = true; ublk_wait_tagset_rqs_idle(ub); ub->dev_info.state = UBLK_S_DEV_QUIESCED; - blk_mq_unquiesce_queue(ub->ub_disk->queue); } static void ublk_force_abort_dev(struct ublk_device *ub) @@ -1830,50 +1881,25 @@ static struct gendisk *ublk_detach_disk(struct ublk_device *ub) return disk; } -static void ublk_stop_dev(struct ublk_device *ub) +static void ublk_stop_dev_unlocked(struct ublk_device *ub) + __must_hold(&ub->mutex) { struct gendisk *disk; - mutex_lock(&ub->mutex); if (ub->dev_info.state == UBLK_S_DEV_DEAD) - goto unlock; + return; + if (ublk_nosrv_dev_should_queue_io(ub)) ublk_force_abort_dev(ub); del_gendisk(ub->ub_disk); disk = ublk_detach_disk(ub); put_disk(disk); - unlock: - mutex_unlock(&ub->mutex); - ublk_cancel_dev(ub); } -static void ublk_nosrv_work(struct work_struct *work) +static void ublk_stop_dev(struct ublk_device *ub) { - struct ublk_device *ub = - container_of(work, struct ublk_device, nosrv_work); - int i; - - if (ublk_nosrv_should_stop_dev(ub)) { - ublk_stop_dev(ub); - return; - } - mutex_lock(&ub->mutex); - if (ub->dev_info.state != UBLK_S_DEV_LIVE) - goto unlock; - - if (ublk_nosrv_dev_should_queue_io(ub)) { - __ublk_quiesce_dev(ub); - } else { - blk_mq_quiesce_queue(ub->ub_disk->queue); - ub->dev_info.state = UBLK_S_DEV_FAIL_IO; - for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { - ublk_get_queue(ub, i)->fail_io = true; - } - blk_mq_unquiesce_queue(ub->ub_disk->queue); - } - - unlock: + ublk_stop_dev_unlocked(ub); mutex_unlock(&ub->mutex); ublk_cancel_dev(ub); } @@ -2502,7 +2528,6 @@ static void ublk_remove(struct ublk_device *ub) bool unprivileged; ublk_stop_dev(ub); - cancel_work_sync(&ub->nosrv_work); cdev_device_del(&ub->cdev, &ub->cdev_dev); unprivileged = ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV; ublk_put_device(ub); @@ -2787,7 +2812,6 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header) goto out_unlock; mutex_init(&ub->mutex); spin_lock_init(&ub->lock); - INIT_WORK(&ub->nosrv_work, ublk_nosrv_work); ret = ublk_alloc_dev_number(ub, header->dev_id); if (ret < 0) @@ -2919,7 +2943,6 @@ static inline void ublk_ctrl_cmd_dump(struct io_uring_cmd *cmd) static int ublk_ctrl_stop_dev(struct ublk_device *ub) { ublk_stop_dev(ub); - cancel_work_sync(&ub->nosrv_work); return 0; } From 736b005b413a172670711ee17cab3c8ccab83223 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 16 Apr 2025 11:54:40 +0800 Subject: [PATCH 0798/2924] ublk: remove __ublk_quiesce_dev() Remove __ublk_quiesce_dev() and open code for updating device state as QUIESCED. We needn't to drain inflight requests in __ublk_quiesce_dev() any more, because all inflight requests are aborted in ublk char device release handler. Also we needn't to set ->canceling in __ublk_quiesce_dev() any more because it is done unconditionally now in ublk_ch_release(). Reviewed-by: Uday Shankar Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250416035444.99569-7-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index b0a7e5acb2eb1c..bf708e9e9a1269 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -209,7 +209,6 @@ struct ublk_params_header { static void ublk_stop_dev_unlocked(struct ublk_device *ub); static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq); -static void __ublk_quiesce_dev(struct ublk_device *ub); static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, struct ublk_queue *ubq, int tag, size_t offset); static inline unsigned int ublk_req_build_flags(struct request *req); @@ -1586,7 +1585,8 @@ static int ublk_ch_release(struct inode *inode, struct file *filp) ublk_stop_dev_unlocked(ub); } else { if (ublk_nosrv_dev_should_queue_io(ub)) { - __ublk_quiesce_dev(ub); + /* ->canceling is set and all requests are aborted */ + ub->dev_info.state = UBLK_S_DEV_QUIESCED; } else { ub->dev_info.state = UBLK_S_DEV_FAIL_IO; for (i = 0; i < ub->dev_info.nr_hw_queues; i++) @@ -1832,21 +1832,6 @@ static void ublk_wait_tagset_rqs_idle(struct ublk_device *ub) } } -static void __ublk_quiesce_dev(struct ublk_device *ub) -{ - int i; - - pr_devel("%s: quiesce ub: dev_id %d state %s\n", - __func__, ub->dev_info.dev_id, - ub->dev_info.state == UBLK_S_DEV_LIVE ? - "LIVE" : "QUIESCED"); - /* mark every queue as canceling */ - for (i = 0; i < ub->dev_info.nr_hw_queues; i++) - ublk_get_queue(ub, i)->canceling = true; - ublk_wait_tagset_rqs_idle(ub); - ub->dev_info.state = UBLK_S_DEV_QUIESCED; -} - static void ublk_force_abort_dev(struct ublk_device *ub) { int i; From e63d2228ef831af36f963b3ab8604160cfff84c1 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 16 Apr 2025 11:54:41 +0800 Subject: [PATCH 0799/2924] ublk: simplify aborting ublk request Now ublk_abort_queue() is moved to ublk char device release handler, meantime our request queue is "quiesced" because either ->canceling was set from uring_cmd cancel function or all IOs are inflight and can't be completed by ublk server, things becomes easy much: - all uring_cmd are done, so we needn't to mark io as UBLK_IO_FLAG_ABORTED for handling completion from uring_cmd - ublk char device is closed, no one can hold IO request reference any more, so we can simply complete this request or requeue it for ublk_nosrv_should_reissue_outstanding. Reviewed-by: Uday Shankar Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250416035444.99569-8-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 82 ++++++++++------------------------------ 1 file changed, 20 insertions(+), 62 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index bf708e9e9a1269..2de7b2bd409ddf 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -122,15 +122,6 @@ struct ublk_uring_cmd_pdu { */ #define UBLK_IO_FLAG_OWNED_BY_SRV 0x02 -/* - * IO command is aborted, so this flag is set in case of - * !UBLK_IO_FLAG_ACTIVE. - * - * After this flag is observed, any pending or new incoming request - * associated with this io command will be failed immediately - */ -#define UBLK_IO_FLAG_ABORTED 0x04 - /* * UBLK_IO_FLAG_NEED_GET_DATA is set because IO command requires * get data buffer address from ublksrv. @@ -1083,12 +1074,6 @@ static inline void __ublk_complete_rq(struct request *req) unsigned int unmapped_bytes; blk_status_t res = BLK_STS_OK; - /* called from ublk_abort_queue() code path */ - if (io->flags & UBLK_IO_FLAG_ABORTED) { - res = BLK_STS_IOERR; - goto exit; - } - /* failed read IO if nothing is read */ if (!io->res && req_op(req) == REQ_OP_READ) io->res = -EIO; @@ -1138,47 +1123,6 @@ static void ublk_complete_rq(struct kref *ref) __ublk_complete_rq(req); } -static void ublk_do_fail_rq(struct request *req) -{ - struct ublk_queue *ubq = req->mq_hctx->driver_data; - - if (ublk_nosrv_should_reissue_outstanding(ubq->dev)) - blk_mq_requeue_request(req, false); - else - __ublk_complete_rq(req); -} - -static void ublk_fail_rq_fn(struct kref *ref) -{ - struct ublk_rq_data *data = container_of(ref, struct ublk_rq_data, - ref); - struct request *req = blk_mq_rq_from_pdu(data); - - ublk_do_fail_rq(req); -} - -/* - * Since ublk_rq_task_work_cb always fails requests immediately during - * exiting, __ublk_fail_req() is only called from abort context during - * exiting. So lock is unnecessary. - * - * Also aborting may not be started yet, keep in mind that one failed - * request may be issued by block layer again. - */ -static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io, - struct request *req) -{ - WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE); - - if (ublk_need_req_ref(ubq)) { - struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); - - kref_put(&data->ref, ublk_fail_rq_fn); - } else { - ublk_do_fail_rq(req); - } -} - static void ubq_complete_io_cmd(struct ublk_io *io, int res, unsigned issue_flags) { @@ -1667,10 +1611,26 @@ static void ublk_commit_completion(struct ublk_device *ub, ublk_put_req_ref(ubq, req); } +static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io, + struct request *req) +{ + WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE); + + if (ublk_nosrv_should_reissue_outstanding(ubq->dev)) + blk_mq_requeue_request(req, false); + else { + io->res = -EIO; + __ublk_complete_rq(req); + } +} + /* - * Called from ubq_daemon context via cancel fn, meantime quiesce ublk - * blk-mq queue, so we are called exclusively with blk-mq and ubq_daemon - * context, so everything is serialized. + * Called from ublk char device release handler, when any uring_cmd is + * done, meantime request queue is "quiesced" since all inflight requests + * can't be completed because ublk server is dead. + * + * So no one can hold our request IO reference any more, simply ignore the + * reference, and complete the request immediately */ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq) { @@ -1687,10 +1647,8 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq) * will do it */ rq = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], i); - if (rq && blk_mq_request_started(rq)) { - io->flags |= UBLK_IO_FLAG_ABORTED; + if (rq && blk_mq_request_started(rq)) __ublk_fail_req(ubq, io, rq); - } } } } From 81586652bb1f6c797159161db8d59c18d66b9eb3 Mon Sep 17 00:00:00 2001 From: Uday Shankar Date: Wed, 16 Apr 2025 11:54:42 +0800 Subject: [PATCH 0800/2924] selftests: ublk: add generic_06 for covering fault inject Add one simple fault inject target, and verify if an application using ublk device sees an I/O error quickly after the ublk server dies. Signed-off-by: Uday Shankar Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250416035444.99569-9-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/Makefile | 4 +- tools/testing/selftests/ublk/fault_inject.c | 98 +++++++++++++++++++ tools/testing/selftests/ublk/kublk.c | 3 +- tools/testing/selftests/ublk/kublk.h | 12 ++- .../testing/selftests/ublk/test_generic_06.sh | 41 ++++++++ 5 files changed, 155 insertions(+), 3 deletions(-) create mode 100644 tools/testing/selftests/ublk/fault_inject.c create mode 100755 tools/testing/selftests/ublk/test_generic_06.sh diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index dddc64036aa1b6..ec4624a283bce2 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -8,6 +8,7 @@ TEST_PROGS += test_generic_02.sh TEST_PROGS += test_generic_03.sh TEST_PROGS += test_generic_04.sh TEST_PROGS += test_generic_05.sh +TEST_PROGS += test_generic_06.sh TEST_PROGS += test_null_01.sh TEST_PROGS += test_null_02.sh @@ -31,7 +32,8 @@ TEST_GEN_PROGS_EXTENDED = kublk include ../lib.mk -$(TEST_GEN_PROGS_EXTENDED): kublk.c null.c file_backed.c common.c stripe.c +$(TEST_GEN_PROGS_EXTENDED): kublk.c null.c file_backed.c common.c stripe.c \ + fault_inject.c check: shellcheck -x -f gcc *.sh diff --git a/tools/testing/selftests/ublk/fault_inject.c b/tools/testing/selftests/ublk/fault_inject.c new file mode 100644 index 00000000000000..94a8e729ba4c8f --- /dev/null +++ b/tools/testing/selftests/ublk/fault_inject.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Fault injection ublk target. Hack this up however you like for + * testing specific behaviors of ublk_drv. Currently is a null target + * with a configurable delay before completing each I/O. This delay can + * be used to test ublk_drv's handling of I/O outstanding to the ublk + * server when it dies. + */ + +#include "kublk.h" + +static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx, + struct ublk_dev *dev) +{ + const struct ublksrv_ctrl_dev_info *info = &dev->dev_info; + unsigned long dev_size = 250UL << 30; + + dev->tgt.dev_size = dev_size; + dev->tgt.params = (struct ublk_params) { + .types = UBLK_PARAM_TYPE_BASIC, + .basic = { + .logical_bs_shift = 9, + .physical_bs_shift = 12, + .io_opt_shift = 12, + .io_min_shift = 9, + .max_sectors = info->max_io_buf_bytes >> 9, + .dev_sectors = dev_size >> 9, + }, + }; + + dev->private_data = (void *)(unsigned long)(ctx->fault_inject.delay_us * 1000); + return 0; +} + +static int ublk_fault_inject_queue_io(struct ublk_queue *q, int tag) +{ + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); + struct io_uring_sqe *sqe; + struct __kernel_timespec ts = { + .tv_nsec = (long long)q->dev->private_data, + }; + + ublk_queue_alloc_sqes(q, &sqe, 1); + io_uring_prep_timeout(sqe, &ts, 1, 0); + sqe->user_data = build_user_data(tag, ublksrv_get_op(iod), 0, 1); + + ublk_queued_tgt_io(q, tag, 1); + + return 0; +} + +static void ublk_fault_inject_tgt_io_done(struct ublk_queue *q, int tag, + const struct io_uring_cqe *cqe) +{ + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); + + if (cqe->res != -ETIME) + ublk_err("%s: unexpected cqe res %d\n", __func__, cqe->res); + + if (ublk_completed_tgt_io(q, tag)) + ublk_complete_io(q, tag, iod->nr_sectors << 9); + else + ublk_err("%s: io not complete after 1 cqe\n", __func__); +} + +static void ublk_fault_inject_cmd_line(struct dev_ctx *ctx, int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "delay_us", 1, NULL, 0 }, + { 0, 0, 0, 0 } + }; + int option_idx, opt; + + ctx->fault_inject.delay_us = 0; + while ((opt = getopt_long(argc, argv, "", + longopts, &option_idx)) != -1) { + switch (opt) { + case 0: + if (!strcmp(longopts[option_idx].name, "delay_us")) + ctx->fault_inject.delay_us = strtoll(optarg, NULL, 10); + } + } +} + +static void ublk_fault_inject_usage(const struct ublk_tgt_ops *ops) +{ + printf("\tfault_inject: [--delay_us us (default 0)]\n"); +} + +const struct ublk_tgt_ops fault_inject_tgt_ops = { + .name = "fault_inject", + .init_tgt = ublk_fault_inject_tgt_init, + .queue_io = ublk_fault_inject_queue_io, + .tgt_io_done = ublk_fault_inject_tgt_io_done, + .parse_cmd_line = ublk_fault_inject_cmd_line, + .usage = ublk_fault_inject_usage, +}; diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 0cd6dce3f30392..759f0663714613 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -12,6 +12,7 @@ static const struct ublk_tgt_ops *tgt_ops_list[] = { &null_tgt_ops, &loop_tgt_ops, &stripe_tgt_ops, + &fault_inject_tgt_ops, }; static const struct ublk_tgt_ops *ublk_find_tgt(const char *name) @@ -1234,7 +1235,7 @@ static void __cmd_create_help(char *exe, bool recovery) { int i; - printf("%s %s -t [null|loop|stripe] [-q nr_queues] [-d depth] [-n dev_id]\n", + printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n", exe, recovery ? "recover" : "add"); printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask] [-r 0|1 ] [-g 0|1]\n"); printf("\t[-e 0|1 ] [-i 0|1]\n"); diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 3d2b9f14491c33..29571eb296f166 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -68,6 +68,11 @@ struct stripe_ctx { unsigned int chunk_size; }; +struct fault_inject_ctx { + /* fault_inject */ + unsigned long delay_us; +}; + struct dev_ctx { char tgt_type[16]; unsigned long flags; @@ -81,6 +86,9 @@ struct dev_ctx { unsigned int fg:1; unsigned int recovery:1; + /* fault_inject */ + long long delay_us; + int _evtfd; int _shmid; @@ -88,7 +96,8 @@ struct dev_ctx { struct ublk_dev *shadow_dev; union { - struct stripe_ctx stripe; + struct stripe_ctx stripe; + struct fault_inject_ctx fault_inject; }; }; @@ -384,6 +393,7 @@ static inline int ublk_queue_use_zc(const struct ublk_queue *q) extern const struct ublk_tgt_ops null_tgt_ops; extern const struct ublk_tgt_ops loop_tgt_ops; extern const struct ublk_tgt_ops stripe_tgt_ops; +extern const struct ublk_tgt_ops fault_inject_tgt_ops; void backing_file_tgt_deinit(struct ublk_dev *dev); int backing_file_tgt_init(struct ublk_dev *dev); diff --git a/tools/testing/selftests/ublk/test_generic_06.sh b/tools/testing/selftests/ublk/test_generic_06.sh new file mode 100755 index 00000000000000..b67230c42c847c --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_06.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_06" +ERR_CODE=0 + +_prep_test "fault_inject" "fast cleanup when all I/Os of one hctx are in server" + +# configure ublk server to sleep 2s before completing each I/O +dev_id=$(_add_ublk_dev -t fault_inject -q 2 -d 1 --delay_us 2000000) +_check_add_dev $TID $? + +STARTTIME=${SECONDS} + +dd if=/dev/urandom of=/dev/ublkb${dev_id} oflag=direct bs=4k count=1 status=none > /dev/null 2>&1 & +dd_pid=$! + +__ublk_kill_daemon ${dev_id} "DEAD" + +wait $dd_pid +dd_exitcode=$? + +ENDTIME=${SECONDS} +ELAPSED=$(($ENDTIME - $STARTTIME)) + +# assert that dd sees an error and exits quickly after ublk server is +# killed. previously this relied on seeing an I/O timeout and so would +# take ~30s +if [ $dd_exitcode -eq 0 ]; then + echo "dd unexpectedly exited successfully!" + ERR_CODE=255 +fi +if [ $ELAPSED -ge 5 ]; then + echo "dd took $ELAPSED seconds to exit (>= 5s tolerance)!" + ERR_CODE=255 +fi + +_cleanup_test "fault_inject" +_show_result $TID $ERR_CODE From c9b19ea63036fc537a69265acea1b18dabd1cbd3 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 15 Apr 2025 09:56:59 +0200 Subject: [PATCH 0801/2924] dma-mapping: avoid potential unused data compilation warning When CONFIG_NEED_DMA_MAP_STATE is not defined, dma-mapping clients might report unused data compilation warnings for dma_unmap_*() calls arguments. Redefine macros for those calls to let compiler to notice that it is okay when the provided arguments are not used. Reported-by: Andy Shevchenko Suggested-by: Jakub Kicinski Signed-off-by: Marek Szyprowski Tested-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250415075659.428549-1-m.szyprowski@samsung.com --- include/linux/dma-mapping.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index b79925b1c4333c..85ab710ec0e724 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -629,10 +629,14 @@ static inline int dma_mmap_wc(struct device *dev, #else #define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) #define DEFINE_DMA_UNMAP_LEN(LEN_NAME) -#define dma_unmap_addr(PTR, ADDR_NAME) (0) -#define dma_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0) -#define dma_unmap_len(PTR, LEN_NAME) (0) -#define dma_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) +#define dma_unmap_addr(PTR, ADDR_NAME) \ + ({ typeof(PTR) __p __maybe_unused = PTR; 0; }) +#define dma_unmap_addr_set(PTR, ADDR_NAME, VAL) \ + do { typeof(PTR) __p __maybe_unused = PTR; } while (0) +#define dma_unmap_len(PTR, LEN_NAME) \ + ({ typeof(PTR) __p __maybe_unused = PTR; 0; }) +#define dma_unmap_len_set(PTR, LEN_NAME, VAL) \ + do { typeof(PTR) __p __maybe_unused = PTR; } while (0) #endif #endif /* _LINUX_DMA_MAPPING_H */ From c7b67ddc3c999aa2f8d77be7ef1913298fe78f0e Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Wed, 9 Apr 2025 12:39:56 +0000 Subject: [PATCH 0802/2924] xfs: document zoned rt specifics in admin-guide Document the lifetime, nolifetime and max_open_zones mount options added for zoned rt file systems. Also add documentation describing the max_open_zones sysfs attribute exposed in /sys/fs/xfs//zoned/ Fixes: 4e4d52075577 ("xfs: add the zoned space allocator") Signed-off-by: Hans Holmberg Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- Documentation/admin-guide/xfs.rst | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/Documentation/admin-guide/xfs.rst b/Documentation/admin-guide/xfs.rst index 7b0811d650f919..3e76276bd488b2 100644 --- a/Documentation/admin-guide/xfs.rst +++ b/Documentation/admin-guide/xfs.rst @@ -124,6 +124,14 @@ When mounting an XFS filesystem, the following options are accepted. controls the size of each buffer and so is also relevant to this case. + lifetime (default) or nolifetime + Enable data placement based on write life time hints provided + by the user. This turns on co-allocation of data of similar + life times when statistically favorable to reduce garbage + collection cost. + + These options are only available for zoned rt file systems. + logbsize=value Set the size of each in-memory log buffer. The size may be specified in bytes, or in kilobytes with a "k" suffix. @@ -143,6 +151,14 @@ When mounting an XFS filesystem, the following options are accepted. optional, and the log section can be separate from the data section or contained within it. + max_open_zones=value + Specify the max number of zones to keep open for writing on a + zoned rt device. Many open zones aids file data separation + but may impact performance on HDDs. + + If ``max_open_zones`` is not specified, the value is determined + by the capabilities and the size of the zoned rt device. + noalign Data allocations will not be aligned at stripe unit boundaries. This is only relevant to filesystems created @@ -546,6 +562,19 @@ The interesting knobs for XFS workqueues are as follows: Zoned Filesystems ================= +For zoned file systems, the following attribute is exposed in: + + /sys/fs/xfs//zoned/ + + max_open_zones (Min: 1 Default: Varies Max: UINTMAX) + This read-only attribute exposes the maximum number of open zones + available for data placement. The value is determined at mount time and + is limited by the capabilities of the backing zoned device, file system + size and the max_open_zones mount option. + +Zoned Filesystems +================= + For zoned file systems, the following attributes are exposed in: /sys/fs/xfs//zoned/ From 97994333de2b8062d2df4e6ce0dc65c2dc0f40dc Mon Sep 17 00:00:00 2001 From: Purva Yeshi Date: Thu, 10 Apr 2025 16:32:16 +0530 Subject: [PATCH 0803/2924] dmaengine: idxd: cdev: Fix uninitialized use of sva in idxd_cdev_open Fix Smatch-detected issue: drivers/dma/idxd/cdev.c:321 idxd_cdev_open() error: uninitialized symbol 'sva'. 'sva' pointer may be used uninitialized in error handling paths. Specifically, if PASID support is enabled and iommu_sva_bind_device() returns an error, the code jumps to the cleanup label and attempts to call iommu_sva_unbind_device(sva) without ensuring that sva was successfully assigned. This triggers a Smatch warning about an uninitialized symbol. Initialize sva to NULL at declaration and add a check using IS_ERR_OR_NULL() before unbinding the device. This ensures the function does not use an invalid or uninitialized pointer during cleanup. Signed-off-by: Purva Yeshi Reviewed-by: Dave Jiang Acked-by: Vinicius Costa Gomes Link: https://lore.kernel.org/r/20250410110216.21592-1-purvayeshi550@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/cdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index ff94ee892339d5..7bd031a608943c 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -222,7 +222,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) struct idxd_wq *wq; struct device *dev, *fdev; int rc = 0; - struct iommu_sva *sva; + struct iommu_sva *sva = NULL; unsigned int pasid; struct idxd_cdev *idxd_cdev; @@ -317,7 +317,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) if (device_user_pasid_enabled(idxd)) idxd_xa_pasid_remove(ctx); failed_get_pasid: - if (device_user_pasid_enabled(idxd)) + if (device_user_pasid_enabled(idxd) && !IS_ERR_OR_NULL(sva)) iommu_sva_unbind_device(sva); failed: mutex_unlock(&wq->wq_lock); From fe412e3a6c97cfe49ecf4564a278122f7d78e3f0 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 15 Apr 2025 19:23:37 +0800 Subject: [PATCH 0804/2924] pinctrl: mediatek: common-v1: Fix EINT breakage on older controllers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When EINT support for multiple addresses was introduced, the driver library for the older generations (pinctrl-mtk-common) was not fixed together. This resulted in invalid pointer accesses. Fix up the filled in |struct mtk_eint| in pinctrl-mtk-common to match what is now expected by the mtk-eint library. Reported-by: Uwe Kleine-König Tested-by: Uwe Kleine-König Closes: https://lore.kernel.org/all/43nd5jxpk7b7fv46frqlfjnqfh5jlpqsemeoakqzd4wdi3df6y@w7ycd3k5ezvn/ Fixes: 3ef9f710efcb ("pinctrl: mediatek: Add EINT support for multiple addresses") Cc: Hao Chang Cc: Qingliang Li Signed-off-by: Chen-Yu Tsai Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/20250415112339.2385454-1-wenst@chromium.org Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-mtk-common.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c index 91edb539925a49..7585de11854bec 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c @@ -1015,7 +1015,13 @@ static int mtk_eint_init(struct mtk_pinctrl *pctl, struct platform_device *pdev) if (!pctl->eint) return -ENOMEM; - pctl->eint->base = devm_platform_ioremap_resource(pdev, 0); + pctl->eint->nbase = 1; + /* mtk-eint expects an array */ + pctl->eint->base = devm_kzalloc(pctl->dev, sizeof(pctl->eint->base), GFP_KERNEL); + if (IS_ERR(pctl->eint->base)) + return -ENOMEM; + + pctl->eint->base[0] = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(pctl->eint->base)) return PTR_ERR(pctl->eint->base); From 8260731ccad0451207b45844bb66eb161a209218 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 16 Apr 2025 08:57:45 +0200 Subject: [PATCH 0805/2924] drm/gem: Internally test import_attach for imported objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test struct drm_gem_object.import_attach to detect imported objects. During object clenanup, the dma_buf field might be NULL. Testing it in an object's free callback then incorrectly does a cleanup as for native objects. Happens for calls to drm_mode_destroy_dumb_ioctl() that clears the dma_buf field in drm_gem_object_exported_dma_buf_free(). v3: - only test for import_attach (Boris) v2: - use import_attach.dmabuf instead of dma_buf (Christian) Signed-off-by: Thomas Zimmermann Fixes: b57aa47d39e9 ("drm/gem: Test for imported GEM buffers with helper") Reported-by: Andy Yan Closes: https://lore.kernel.org/dri-devel/38d09d34.4354.196379aa560.Coremail.andyshrk@163.com/ Tested-by: Andy Yan Cc: Thomas Zimmermann Cc: Anusha Srivatsa Cc: Christian König Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: David Airlie Cc: Simona Vetter Cc: Sumit Semwal Cc: "Christian König" Cc: dri-devel@lists.freedesktop.org Cc: linux-media@vger.kernel.org Cc: linaro-mm-sig@lists.linaro.org Reviewed-by: Boris Brezillon Reviewed-by: Simona Vetter Link: https://lore.kernel.org/r/20250416065820.26076-1-tzimmermann@suse.de --- include/drm/drm_gem.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index 2bf893eabb4b23..bcd54020d6ba51 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -585,8 +585,7 @@ static inline bool drm_gem_object_is_shared_for_memory_stats(struct drm_gem_obje */ static inline bool drm_gem_is_imported(const struct drm_gem_object *obj) { - /* The dma-buf's priv field points to the original GEM object. */ - return obj->dma_buf && (obj->dma_buf->priv != obj); + return !!obj->import_attach; } #ifdef CONFIG_LOCKDEP From 0a65bc27bd645894175c059397b4916e31955fb2 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Wed, 16 Apr 2025 18:58:25 +0000 Subject: [PATCH 0806/2924] eventpoll: Set epoll timeout if it's in the future Avoid an edge case where epoll_wait arms a timer and calls schedule() even if the timer will expire immediately. For example: if the user has specified an epoll busy poll usecs which is equal or larger than the epoll_wait/epoll_pwait2 timeout, it is unnecessary to call schedule_hrtimeout_range; the busy poll usecs have consumed the entire timeout duration so it is unnecessary to induce scheduling latency by calling schedule() (via schedule_hrtimeout_range). This can be measured using a simple bpftrace script: tracepoint:sched:sched_switch / args->prev_pid == $1 / { print(kstack()); print(ustack()); } Before this patch is applied: Testing an epoll_wait app with busy poll usecs set to 1000, and epoll_wait timeout set to 1ms using the script above shows: __traceiter_sched_switch+69 __schedule+1495 schedule+32 schedule_hrtimeout_range+159 do_epoll_wait+1424 __x64_sys_epoll_wait+97 do_syscall_64+95 entry_SYSCALL_64_after_hwframe+118 epoll_wait+82 Which is unexpected; the busy poll usecs should have consumed the entire timeout and there should be no reason to arm a timer. After this patch is applied: the same test scenario does not generate a call to schedule() in the above edge case. If the busy poll usecs are reduced (for example usecs: 100, epoll_wait timeout 1ms) the timer is armed as expected. Fixes: bf3b9f6372c4 ("epoll: Add busy poll support to epoll with socket fds.") Signed-off-by: Joe Damato Link: https://lore.kernel.org/20250416185826.26375-1-jdamato@fastly.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/eventpoll.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 100376863a4451..4bc264b854c436 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1996,6 +1996,14 @@ static int ep_try_send_events(struct eventpoll *ep, return res; } +static int ep_schedule_timeout(ktime_t *to) +{ + if (to) + return ktime_after(*to, ktime_get()); + else + return 1; +} + /** * ep_poll - Retrieves ready events, and delivers them to the caller-supplied * event buffer. @@ -2103,7 +2111,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, write_unlock_irq(&ep->lock); - if (!eavail) + if (!eavail && ep_schedule_timeout(to)) timed_out = !schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS); __set_current_state(TASK_RUNNING); From 76c332d119f9048c6e16b52359f401510f18b2ff Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 16 Apr 2025 10:38:05 +0200 Subject: [PATCH 0807/2924] drm/mgag200: Fix value in register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix an off-by-one error when setting the vblanking start in . Commit d6460bd52c27 ("drm/mgag200: Add dedicated variables for blanking fields") switched the value from crtc_vdisplay to crtc_vblank_start, which DRM helpers copy from the former. The commit missed to subtract one though. Reported-by: Wakko Warner Closes: https://lore.kernel.org/dri-devel/CAMwc25rKPKooaSp85zDq2eh-9q4UPZD=RqSDBRp1fAagDnmRmA@mail.gmail.com/ Reported-by: Сергей Closes: https://lore.kernel.org/all/5b193b75-40b1-4342-a16a-ae9fc62f245a@gmail.com/ Closes: https://bbs.archlinux.org/viewtopic.php?id=303819 Signed-off-by: Thomas Zimmermann Fixes: d6460bd52c27 ("drm/mgag200: Add dedicated variables for blanking fields") Cc: Thomas Zimmermann Cc: Jocelyn Falempe Cc: Dave Airlie Cc: dri-devel@lists.freedesktop.org Cc: # v6.12+ Reviewed-by: Jocelyn Falempe Tested-by: Wakko Warner Link: https://lore.kernel.org/r/20250416083847.51764-1-tzimmermann@suse.de --- drivers/gpu/drm/mgag200/mgag200_mode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c index fb71658c3117b2..6067d08aeee34b 100644 --- a/drivers/gpu/drm/mgag200/mgag200_mode.c +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c @@ -223,7 +223,7 @@ void mgag200_set_mode_regs(struct mga_device *mdev, const struct drm_display_mod vsyncstr = mode->crtc_vsync_start - 1; vsyncend = mode->crtc_vsync_end - 1; vtotal = mode->crtc_vtotal - 2; - vblkstr = mode->crtc_vblank_start; + vblkstr = mode->crtc_vblank_start - 1; vblkend = vtotal + 1; linecomp = vdispend; From a374f28700abd20e8a7d026f89aa26f759445918 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 17 Apr 2025 09:28:38 +0200 Subject: [PATCH 0808/2924] cpufreq: fix compile-test defaults Commit 3f66425a4fc8 ("cpufreq: Enable COMPILE_TEST on Arm drivers") enabled compile testing of most Arm CPUFreq drivers but left the existing default values unchanged so that many drivers are enabled by default whenever COMPILE_TEST is selected. This specifically results in the S3C64XX CPUFreq driver being enabled and initialised during boot of non-S3C64XX platforms with the following error logged: cpufreq: Unable to obtain ARMCLK: -2 Commit d4f610a9bafd ("cpufreq: Do not enable by default during compile testing") recently fixed most of the default values, but two entries were missed and two could use a more specific default condition. Fix the default values for drivers that can be compile tested and that should be enabled by default when not compile testing. Fixes: 3f66425a4fc8 ("cpufreq: Enable COMPILE_TEST on Arm drivers") Cc: Rob Herring (Arm) Signed-off-by: Johan Hovold Reviewed-by: Krzysztof Kozlowski Signed-off-by: Viresh Kumar --- drivers/cpufreq/Kconfig.arm | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index d4d625ded285f0..0d46402e30942e 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -76,7 +76,7 @@ config ARM_VEXPRESS_SPC_CPUFREQ config ARM_BRCMSTB_AVS_CPUFREQ tristate "Broadcom STB AVS CPUfreq driver" depends on (ARCH_BRCMSTB && !ARM_SCMI_CPUFREQ) || COMPILE_TEST - default ARCH_BRCMSTB + default y if ARCH_BRCMSTB && !ARM_SCMI_CPUFREQ help Some Broadcom STB SoCs use a co-processor running proprietary firmware ("AVS") to handle voltage and frequency scaling. This driver provides @@ -88,7 +88,7 @@ config ARM_HIGHBANK_CPUFREQ tristate "Calxeda Highbank-based" depends on ARCH_HIGHBANK || COMPILE_TEST depends on CPUFREQ_DT && REGULATOR && PL320_MBOX - default m + default m if ARCH_HIGHBANK help This adds the CPUFreq driver for Calxeda Highbank SoC based boards. @@ -133,7 +133,7 @@ config ARM_MEDIATEK_CPUFREQ config ARM_MEDIATEK_CPUFREQ_HW tristate "MediaTek CPUFreq HW driver" depends on ARCH_MEDIATEK || COMPILE_TEST - default m + default m if ARCH_MEDIATEK help Support for the CPUFreq HW driver. Some MediaTek chipsets have a HW engine to offload the steps @@ -256,7 +256,7 @@ config ARM_TEGRA194_CPUFREQ tristate "Tegra194 CPUFreq support" depends on ARCH_TEGRA_194_SOC || ARCH_TEGRA_234_SOC || (64BIT && COMPILE_TEST) depends on TEGRA_BPMP - default ARCH_TEGRA + default ARCH_TEGRA_194_SOC || ARCH_TEGRA_234_SOC help This adds CPU frequency driver support for Tegra194 SOCs. From 58db1c3cd0ce857e7210b0a95908900c25c28c3e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 16 Apr 2025 15:16:55 -0700 Subject: [PATCH 0809/2924] netfs: Mark __nonstring lookup tables GCC 15's new -Wunterminated-string-initialization notices that the character lookup tables "fscache_cache_states" and "fscache_cookie_states" (which are not used as a C-String) need to be marked as "nonstring": fs/netfs/fscache_cache.c:375:67: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (6 chars into 5 available) [-Wunterminated-string-initialization] 375 | static const char fscache_cache_states[NR__FSCACHE_CACHE_STATE] = "-PAEW"; | ^~~~~~~ fs/netfs/fscache_cookie.c:32:69: warning: initializer-string for array of 'char' truncates NUL terminator but destination lacks 'nonstring' attribute (11 chars into 10 available) [-Wunterminated-string-initialization] 32 | static const char fscache_cookie_states[FSCACHE_COOKIE_STATE__NR] = "-LCAIFUWRD"; | ^~~~~~~~~~~~ Annotate the arrays. Signed-off-by: Kees Cook Link: https://lore.kernel.org/20250416221654.work.028-kees@kernel.org Signed-off-by: Christian Brauner --- fs/netfs/fscache_cache.c | 2 +- fs/netfs/fscache_cookie.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/netfs/fscache_cache.c b/fs/netfs/fscache_cache.c index 9397ed39b0b4ec..8f70f8da064b50 100644 --- a/fs/netfs/fscache_cache.c +++ b/fs/netfs/fscache_cache.c @@ -372,7 +372,7 @@ void fscache_withdraw_cache(struct fscache_cache *cache) EXPORT_SYMBOL(fscache_withdraw_cache); #ifdef CONFIG_PROC_FS -static const char fscache_cache_states[NR__FSCACHE_CACHE_STATE] = "-PAEW"; +static const char fscache_cache_states[NR__FSCACHE_CACHE_STATE] __nonstring = "-PAEW"; /* * Generate a list of caches in /proc/fs/fscache/caches diff --git a/fs/netfs/fscache_cookie.c b/fs/netfs/fscache_cookie.c index d4d4b3a8b10603..3d56fc73435ffb 100644 --- a/fs/netfs/fscache_cookie.c +++ b/fs/netfs/fscache_cookie.c @@ -29,7 +29,7 @@ static LIST_HEAD(fscache_cookie_lru); static DEFINE_SPINLOCK(fscache_cookie_lru_lock); DEFINE_TIMER(fscache_cookie_lru_timer, fscache_cookie_lru_timed_out); static DECLARE_WORK(fscache_cookie_lru_work, fscache_cookie_lru_worker); -static const char fscache_cookie_states[FSCACHE_COOKIE_STATE__NR] = "-LCAIFUWRD"; +static const char fscache_cookie_states[FSCACHE_COOKIE_STATE__NR] __nonstring = "-LCAIFUWRD"; static unsigned int fscache_lru_cookie_timeout = 10 * HZ; void fscache_print_cookie(struct fscache_cookie *cookie, char prefix) From 777d0961ff95b26d5887fdae69900374364976f3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Apr 2025 08:40:42 +0200 Subject: [PATCH 0810/2924] fs: move the bdex_statx call to vfs_getattr_nosec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently bdex_statx is only called from the very high-level vfs_statx_path function, and thus bypassing it for in-kernel calls to vfs_getattr or vfs_getattr_nosec. This breaks querying the block ѕize of the underlying device in the loop driver and also is a pitfall for any other new kernel caller. Move the call into the lowest level helper to ensure all callers get the right results. Fixes: 2d985f8c6b91 ("vfs: support STATX_DIOALIGN on block devices") Fixes: f4774e92aab8 ("loop: take the file system minimum dio alignment into account") Reported-by: "Darrick J. Wong" Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/20250417064042.712140-1-hch@lst.de Signed-off-by: Christian Brauner --- block/bdev.c | 3 +-- fs/stat.c | 32 ++++++++++++++++++-------------- include/linux/blkdev.h | 6 +++--- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/block/bdev.c b/block/bdev.c index 4844d1e27b6fbc..6a34179192c913 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -1272,8 +1272,7 @@ void sync_bdevs(bool wait) /* * Handle STATX_{DIOALIGN, WRITE_ATOMIC} for block devices. */ -void bdev_statx(struct path *path, struct kstat *stat, - u32 request_mask) +void bdev_statx(const struct path *path, struct kstat *stat, u32 request_mask) { struct inode *backing_inode; struct block_device *bdev; diff --git a/fs/stat.c b/fs/stat.c index f13308bfdc983a..3d9222807214a7 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -204,12 +204,25 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat, STATX_ATTR_DAX); idmap = mnt_idmap(path->mnt); - if (inode->i_op->getattr) - return inode->i_op->getattr(idmap, path, stat, - request_mask, - query_flags); + if (inode->i_op->getattr) { + int ret; + + ret = inode->i_op->getattr(idmap, path, stat, request_mask, + query_flags); + if (ret) + return ret; + } else { + generic_fillattr(idmap, request_mask, inode, stat); + } + + /* + * If this is a block device inode, override the filesystem attributes + * with the block device specific parameters that need to be obtained + * from the bdev backing inode. + */ + if (S_ISBLK(stat->mode)) + bdev_statx(path, stat, request_mask); - generic_fillattr(idmap, request_mask, inode, stat); return 0; } EXPORT_SYMBOL(vfs_getattr_nosec); @@ -295,15 +308,6 @@ static int vfs_statx_path(struct path *path, int flags, struct kstat *stat, if (path_mounted(path)) stat->attributes |= STATX_ATTR_MOUNT_ROOT; stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT; - - /* - * If this is a block device inode, override the filesystem - * attributes with the block device specific parameters that need to be - * obtained from the bdev backing inode. - */ - if (S_ISBLK(stat->mode)) - bdev_statx(path, stat, request_mask); - return 0; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e39c45bc0a97ee..678dc38442bfdd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1685,7 +1685,7 @@ int sync_blockdev(struct block_device *bdev); int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend); int sync_blockdev_nowait(struct block_device *bdev); void sync_bdevs(bool wait); -void bdev_statx(struct path *, struct kstat *, u32); +void bdev_statx(const struct path *path, struct kstat *stat, u32 request_mask); void printk_all_partitions(void); int __init early_lookup_bdev(const char *pathname, dev_t *dev); #else @@ -1703,8 +1703,8 @@ static inline int sync_blockdev_nowait(struct block_device *bdev) static inline void sync_bdevs(bool wait) { } -static inline void bdev_statx(struct path *path, struct kstat *stat, - u32 request_mask) +static inline void bdev_statx(const struct path *path, struct kstat *stat, + u32 request_mask) { } static inline void printk_all_partitions(void) From e3fd0577768584ece824c8b661c40fb3d912812a Mon Sep 17 00:00:00 2001 From: Tobias Brunner Date: Tue, 15 Apr 2025 13:13:18 +0200 Subject: [PATCH 0811/2924] xfrm: Fix UDP GRO handling for some corner cases This fixes an issue that's caused if there is a mismatch between the data offset in the GRO header and the length fields in the regular sk_buff due to the pskb_pull()/skb_push() calls. That's because the UDP GRO layer stripped off the UDP header via skb_gro_pull() already while the UDP header was explicitly not pulled/pushed in this function. For example, an IKE packet that triggered this had len=data_len=1268 and the data_offset in the GRO header was 28 (IPv4 + UDP). So pskb_pull() was called with an offset of 28-8=20, which reduced len to 1248 and via pskb_may_pull() and __pskb_pull_tail() it also set data_len to 1248. As the ESP offload module was not loaded, the function bailed out and called skb_push(), which restored len to 1268, however, data_len remained at 1248. So while skb_headlen() was 0 before, it was now 20. The latter caused a difference of 8 instead of 28 (or 0 if pskb_pull()/skb_push() was called with the complete GRO data_offset) in gro_try_pull_from_frag0() that triggered a call to gro_pull_from_frag0() that corrupted the packet. This change uses a more GRO-like approach seen in other GRO receivers via skb_gro_header() to just read the actual data we are interested in and does not try to "restore" the UDP header at this point to call the existing function. If the offload module is not loaded, it immediately bails out, otherwise, it only does a quick check to see if the packet is an IKE or keepalive packet instead of calling the existing function. Fixes: 172bf009c18d ("xfrm: Support GRO for IPv4 ESP in UDP encapsulation") Fixes: 221ddb723d90 ("xfrm: Support GRO for IPv6 ESP in UDP encapsulation") Signed-off-by: Tobias Brunner Signed-off-by: Steffen Klassert --- net/ipv4/xfrm4_input.c | 18 ++++++++++-------- net/ipv6/xfrm6_input.c | 18 ++++++++++-------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index b5b06323cfd94a..0d31a8c108d4f6 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -182,11 +182,15 @@ struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, int offset = skb_gro_offset(skb); const struct net_offload *ops; struct sk_buff *pp = NULL; - int ret; - - offset = offset - sizeof(struct udphdr); + int len, dlen; + __u8 *udpdata; + __be32 *udpdata32; - if (!pskb_pull(skb, offset)) + len = skb->len - offset; + dlen = offset + min(len, 8); + udpdata = skb_gro_header(skb, dlen, offset); + udpdata32 = (__be32 *)udpdata; + if (unlikely(!udpdata)) return NULL; rcu_read_lock(); @@ -194,11 +198,10 @@ struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, if (!ops || !ops->callbacks.gro_receive) goto out; - ret = __xfrm4_udp_encap_rcv(sk, skb, false); - if (ret) + /* check if it is a keepalive or IKE packet */ + if (len <= sizeof(struct ip_esp_hdr) || udpdata32[0] == 0) goto out; - skb_push(skb, offset); NAPI_GRO_CB(skb)->proto = IPPROTO_UDP; pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); @@ -208,7 +211,6 @@ struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, out: rcu_read_unlock(); - skb_push(skb, offset); NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 1; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 4abc5e9d63227a..841c81abaaf4ff 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -179,14 +179,18 @@ struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, int offset = skb_gro_offset(skb); const struct net_offload *ops; struct sk_buff *pp = NULL; - int ret; + int len, dlen; + __u8 *udpdata; + __be32 *udpdata32; if (skb->protocol == htons(ETH_P_IP)) return xfrm4_gro_udp_encap_rcv(sk, head, skb); - offset = offset - sizeof(struct udphdr); - - if (!pskb_pull(skb, offset)) + len = skb->len - offset; + dlen = offset + min(len, 8); + udpdata = skb_gro_header(skb, dlen, offset); + udpdata32 = (__be32 *)udpdata; + if (unlikely(!udpdata)) return NULL; rcu_read_lock(); @@ -194,11 +198,10 @@ struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, if (!ops || !ops->callbacks.gro_receive) goto out; - ret = __xfrm6_udp_encap_rcv(sk, skb, false); - if (ret) + /* check if it is a keepalive or IKE packet */ + if (len <= sizeof(struct ip_esp_hdr) || udpdata32[0] == 0) goto out; - skb_push(skb, offset); NAPI_GRO_CB(skb)->proto = IPPROTO_UDP; pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); @@ -208,7 +211,6 @@ struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, out: rcu_read_unlock(); - skb_push(skb, offset); NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 1; From 50492f942c281af4a48f8028f8409d7b8f2655d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Wed, 16 Apr 2025 17:47:11 +0200 Subject: [PATCH 0812/2924] landlock: Fix documentation for landlock_create_ruleset(2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move and fix the flags documentation, and improve formatting. It makes more sense and it eases maintenance to document syscall flags in landlock.h, where they are defined. This is already the case for landlock_restrict_self(2)'s flags. The flags are now rendered like the syscall's parameters and description. Cc: Günther Noack Cc: Paul Moore Link: https://lore.kernel.org/r/20250416154716.1799902-1-mic@digikod.net Signed-off-by: Mickaël Salaün --- include/uapi/linux/landlock.h | 14 +++++++++----- security/landlock/syscalls.c | 15 +++++++-------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index d9d0cb827117db..9a4b64be9869be 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -53,12 +53,16 @@ struct landlock_ruleset_attr { __u64 scoped; }; -/* - * sys_landlock_create_ruleset() flags: +/** + * DOC: landlock_create_ruleset_flags + * + * **Flags** + * + * %LANDLOCK_CREATE_RULESET_VERSION + * Get the highest supported Landlock ABI version (starting at 1). * - * - %LANDLOCK_CREATE_RULESET_VERSION: Get the highest supported Landlock ABI - * version. - * - %LANDLOCK_CREATE_RULESET_ERRATA: Get a bitmask of fixed issues. + * %LANDLOCK_CREATE_RULESET_ERRATA + * Get a bitmask of fixed issues for the current Landlock ABI version. */ /* clang-format off */ #define LANDLOCK_CREATE_RULESET_VERSION (1U << 0) diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c index 54a9f29e6ebb11..9515dc92b99f13 100644 --- a/security/landlock/syscalls.c +++ b/security/landlock/syscalls.c @@ -169,20 +169,16 @@ const int landlock_abi_version = 7; * the new ruleset. * @size: Size of the pointed &struct landlock_ruleset_attr (needed for * backward and forward compatibility). - * @flags: Supported value: + * @flags: Supported values: + * * - %LANDLOCK_CREATE_RULESET_VERSION * - %LANDLOCK_CREATE_RULESET_ERRATA * * This system call enables to create a new Landlock ruleset, and returns the * related file descriptor on success. * - * If @flags is %LANDLOCK_CREATE_RULESET_VERSION and @attr is NULL and @size is - * 0, then the returned value is the highest supported Landlock ABI version - * (starting at 1). - * - * If @flags is %LANDLOCK_CREATE_RULESET_ERRATA and @attr is NULL and @size is - * 0, then the returned value is a bitmask of fixed issues for the current - * Landlock ABI version. + * If %LANDLOCK_CREATE_RULESET_VERSION or %LANDLOCK_CREATE_RULESET_ERRATA is + * set, then @attr must be NULL and @size must be 0. * * Possible returned errors are: * @@ -191,6 +187,9 @@ const int landlock_abi_version = 7; * - %E2BIG: @attr or @size inconsistencies; * - %EFAULT: @attr or @size inconsistencies; * - %ENOMSG: empty &landlock_ruleset_attr.handled_access_fs. + * + * .. kernel-doc:: include/uapi/linux/landlock.h + * :identifiers: landlock_create_ruleset_flags */ SYSCALL_DEFINE3(landlock_create_ruleset, const struct landlock_ruleset_attr __user *const, attr, From 25b1fc1cdc8931cf26e8d169f65ad07dfd653ca2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Wed, 16 Apr 2025 17:47:12 +0200 Subject: [PATCH 0813/2924] landlock: Fix documentation for landlock_restrict_self(2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix, deduplicate, and improve rendering of landlock_restrict_self(2)'s flags documentation. The flags are now rendered like the syscall's parameters and description. Cc: Günther Noack Cc: Paul Moore Link: https://lore.kernel.org/r/20250416154716.1799902-2-mic@digikod.net Signed-off-by: Mickaël Salaün --- include/uapi/linux/landlock.h | 61 +++++++++++++++++++++-------------- security/landlock/syscalls.c | 12 +++---- 2 files changed, 42 insertions(+), 31 deletions(-) diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index 9a4b64be9869be..8b2a1dc5c70b52 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -69,31 +69,42 @@ struct landlock_ruleset_attr { #define LANDLOCK_CREATE_RULESET_ERRATA (1U << 1) /* clang-format on */ -/* - * sys_landlock_restrict_self() flags: - * - * - %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF: Do not create any log related to the - * enforced restrictions. This should only be set by tools launching unknown - * or untrusted programs (e.g. a sandbox tool, container runtime, system - * service manager). Because programs sandboxing themselves should fix any - * denied access, they should not set this flag to be aware of potential - * issues reported by system's logs (i.e. audit). - * - %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON: Explicitly ask to continue - * logging denied access requests even after an :manpage:`execve(2)` call. - * This flag should only be set if all the programs than can legitimately be - * executed will not try to request a denied access (which could spam audit - * logs). - * - %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF: Do not create any log related - * to the enforced restrictions coming from future nested domains created by - * the caller or its descendants. This should only be set according to a - * runtime configuration (i.e. not hardcoded) by programs launching other - * unknown or untrusted programs that may create their own Landlock domains - * and spam logs. The main use case is for container runtimes to enable users - * to mute buggy sandboxed programs for a specific container image. Other use - * cases include sandboxer tools and init systems. Unlike - * %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF, - * %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF does not impact the requested - * restriction (if any) but only the future nested domains. +/** + * DOC: landlock_restrict_self_flags + * + * **Flags** + * + * %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF + * Do not create any log related to the enforced restrictions. This should + * only be set by tools launching unknown or untrusted programs (e.g. a + * sandbox tool, container runtime, system service manager). Because + * programs sandboxing themselves should fix any denied access, they should + * not set this flag to be aware of potential issues reported by system's + * logs (i.e. audit). + * + * %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON + * Explicitly ask to continue logging denied access requests even after an + * :manpage:`execve(2)` call. This flag should only be set if all the + * programs than can legitimately be executed will not try to request a + * denied access (which could spam audit logs). + * + * %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF + * Do not create any log related to the enforced restrictions coming from + * future nested domains created by the caller or its descendants. This + * should only be set according to a runtime configuration (i.e. not + * hardcoded) by programs launching other unknown or untrusted programs that + * may create their own Landlock domains and spam logs. The main use case + * is for container runtimes to enable users to mute buggy sandboxed + * programs for a specific container image. Other use cases include + * sandboxer tools and init systems. Unlike + * ``LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF``, + * ``LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF`` does not impact the + * requested restriction (if any) but only the future nested domains. + * + * It is allowed to only pass the + * ``LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF`` flag with a @ruleset_fd + * value of -1. + * */ /* clang-format off */ #define LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF (1U << 0) diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c index 9515dc92b99f13..b9561e3417aecb 100644 --- a/security/landlock/syscalls.c +++ b/security/landlock/syscalls.c @@ -451,18 +451,15 @@ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd, * @ruleset_fd: File descriptor tied to the ruleset to merge with the target. * @flags: Supported values: * - * - %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF - * - %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON - * - %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF + * - %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF + * - %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON + * - %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF * * This system call enables to enforce a Landlock ruleset on the current * thread. Enforcing a ruleset requires that the task has %CAP_SYS_ADMIN in its * namespace or is running with no_new_privs. This avoids scenarios where * unprivileged tasks can affect the behavior of privileged children. * - * It is allowed to only pass the %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF - * flag with a @ruleset_fd value of -1. - * * Possible returned errors are: * * - %EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time; @@ -474,6 +471,9 @@ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd, * %CAP_SYS_ADMIN in its namespace. * - %E2BIG: The maximum number of stacked rulesets is reached for the current * thread. + * + * .. kernel-doc:: include/uapi/linux/landlock.h + * :identifiers: landlock_restrict_self_flags */ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32, flags) From 47ce2af848b7301d8571f0e01a0d7c7162d51e4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Wed, 16 Apr 2025 17:47:13 +0200 Subject: [PATCH 0814/2924] landlock: Update log documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix and improve documentation related to landlock_restrict_self(2)'s flags. Update the LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF documentation according to the current semantic. Cc: Günther Noack Cc: Paul Moore Link: https://lore.kernel.org/r/20250416154716.1799902-3-mic@digikod.net Signed-off-by: Mickaël Salaün --- include/uapi/linux/landlock.h | 64 +++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index 8b2a1dc5c70b52..f030adc462ee74 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -74,37 +74,49 @@ struct landlock_ruleset_attr { * * **Flags** * + * By default, denied accesses originating from programs that sandbox themselves + * are logged via the audit subsystem. Such events typically indicate unexpected + * behavior, such as bugs or exploitation attempts. However, to avoid excessive + * logging, access requests denied by a domain not created by the originating + * program are not logged by default. The rationale is that programs should know + * their own behavior, but not necessarily the behavior of other programs. This + * default configuration is suitable for most programs that sandbox themselves. + * For specific use cases, the following flags allow programs to modify this + * default logging behavior. + * + * The %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF and + * %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON flags apply to the newly created + * Landlock domain. + * * %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF - * Do not create any log related to the enforced restrictions. This should - * only be set by tools launching unknown or untrusted programs (e.g. a - * sandbox tool, container runtime, system service manager). Because - * programs sandboxing themselves should fix any denied access, they should - * not set this flag to be aware of potential issues reported by system's - * logs (i.e. audit). + * Disables logging of denied accesses originating from the thread creating + * the Landlock domain, as well as its children, as long as they continue + * running the same executable code (i.e., without an intervening + * :manpage:`execve(2)` call). This is intended for programs that execute + * unknown code without invoking :manpage:`execve(2)`, such as script + * interpreters. Programs that only sandbox themselves should not set this + * flag, so users can be notified of unauthorized access attempts via system + * logs. * * %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON - * Explicitly ask to continue logging denied access requests even after an - * :manpage:`execve(2)` call. This flag should only be set if all the - * programs than can legitimately be executed will not try to request a - * denied access (which could spam audit logs). + * Enables logging of denied accesses after an :manpage:`execve(2)` call, + * providing visibility into unauthorized access attempts by newly executed + * programs within the created Landlock domain. This flag is recommended + * only when all potential executables in the domain are expected to comply + * with the access restrictions, as excessive audit log entries could make + * it more difficult to identify critical events. * * %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF - * Do not create any log related to the enforced restrictions coming from - * future nested domains created by the caller or its descendants. This - * should only be set according to a runtime configuration (i.e. not - * hardcoded) by programs launching other unknown or untrusted programs that - * may create their own Landlock domains and spam logs. The main use case - * is for container runtimes to enable users to mute buggy sandboxed - * programs for a specific container image. Other use cases include - * sandboxer tools and init systems. Unlike - * ``LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF``, - * ``LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF`` does not impact the - * requested restriction (if any) but only the future nested domains. - * - * It is allowed to only pass the - * ``LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF`` flag with a @ruleset_fd - * value of -1. - * + * Disables logging of denied accesses originating from nested Landlock + * domains created by the caller or its descendants. This flag should be set + * according to runtime configuration, not hardcoded, to avoid suppressing + * important security events. It is useful for container runtimes or + * sandboxing tools that may launch programs which themselves create + * Landlock domains and could otherwise generate excessive logs. Unlike + * ``LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF``, this flag only affects + * future nested domains, not the one being created. It can also be used + * with a @ruleset_fd value of -1 to mute subdomain logs without creating a + * domain. */ /* clang-format off */ #define LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF (1U << 0) From d2d31ea8cd80b9830cdab624e94f9d41178fc99d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Apr 2025 15:53:48 +0200 Subject: [PATCH 0815/2924] netfilter: conntrack: fix erronous removal of offload bit The blamed commit exposes a possible issue with flow_offload_teardown(): We might remove the offload bit of a conntrack entry that has been offloaded again. 1. conntrack entry c1 is offloaded via flow f1 (f1->ct == c1). 2. f1 times out and is pushed back to slowpath, c1 offload bit is removed. Due to bug, f1 is not unlinked from rhashtable right away. 3. a new packet arrives for the flow and re-offload is triggered, i.e. f2->ct == c1. This is because lookup in flowtable skip entries with teardown bit set. 4. Next flowtable gc cycle finds f1 again 5. flow_offload_teardown() is called again for f1 and c1 offload bit is removed again, even though we have f2 referencing the same entry. This is harmless, but clearly not correct. Fix the bug that exposes this: set 'teardown = true' to have the gc callback unlink the flowtable entry from the table right away instead of the unintentional defer to the next round. Also prevent flow_offload_teardown() from fixing up the ct state more than once: We could also be called from the data path or a notifier, not only from the flowtable gc callback. NF_FLOW_TEARDOWN can never be unset, so we can use it as synchronization point: if we observe did not see a 0 -> 1 transition, then another CPU is already doing the ct state fixups for us. Fixes: 03428ca5cee9 ("netfilter: conntrack: rework offload nf_conn timeout extension logic") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 9d8361526f82ac..9441ac3d8c1a2e 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -383,8 +383,8 @@ static void flow_offload_del(struct nf_flowtable *flow_table, void flow_offload_teardown(struct flow_offload *flow) { clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status); - set_bit(NF_FLOW_TEARDOWN, &flow->flags); - flow_offload_fixup_ct(flow); + if (!test_and_set_bit(NF_FLOW_TEARDOWN, &flow->flags)) + flow_offload_fixup_ct(flow); } EXPORT_SYMBOL_GPL(flow_offload_teardown); @@ -558,10 +558,12 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct) || - nf_flow_custom_gc(flow_table, flow)) + nf_flow_custom_gc(flow_table, flow)) { flow_offload_teardown(flow); - else if (!teardown) + teardown = true; + } else if (!teardown) { nf_flow_table_extend_ct_timeout(flow->ct); + } if (teardown) { if (test_bit(NF_FLOW_HW, &flow->flags)) { From df180e65305f8c1e020d54bfc2132349fd693de1 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Thu, 3 Apr 2025 11:24:19 -0500 Subject: [PATCH 0816/2924] dmaengine: Revert "dmaengine: dmatest: Fix dmatest waiting less when interrupted" Several issues with this change: * The analysis is flawed and it's unclear what problem is being fixed. There is no difference between wait_event_freezable_timeout() and wait_event_timeout() with respect to device interrupts. And of course "the interrupt notifying the finish of an operation happens during wait_event_freezable_timeout()" -- that's how it's supposed to work. * The link at the "Closes:" tag appears to be an unrelated use-after-free in idxd. * It introduces a regression: dmatest threads are meant to be freezable and this change breaks that. See discussion here: https://lore.kernel.org/dmaengine/878qpa13fe.fsf@AUSNATLYNCH.amd.com/ Fixes: e87ca16e9911 ("dmaengine: dmatest: Fix dmatest waiting less when interrupted") Signed-off-by: Nathan Lynch Link: https://lore.kernel.org/r/20250403-dmaengine-dmatest-revert-waiting-less-v1-1-8227c5a3d7c8@amd.com Signed-off-by: Vinod Koul --- drivers/dma/dmatest.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index d891dfca358e20..91b2fbc0b86471 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -841,9 +841,9 @@ static int dmatest_func(void *data) } else { dma_async_issue_pending(chan); - wait_event_timeout(thread->done_wait, - done->done, - msecs_to_jiffies(params->timeout)); + wait_event_freezable_timeout(thread->done_wait, + done->done, + msecs_to_jiffies(params->timeout)); status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); From cf6ae7ed091059a8d1a70cf184f18ebfd18ab4af Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 26 Mar 2025 14:41:13 +1030 Subject: [PATCH 0817/2924] btrfs: subpage: access correct object when reading bitmap start in subpage_calc_start_bit() Inside the macro, subpage_calc_start_bit(), we need to calculate the offset to the beginning of the folio. But we're using offset_in_page(), on systems with 4K page size and 4K fs block size, this means we will always return offset 0 for a large folio, causing all kinds of errors. Fix it by using offset_in_folio() instead. Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/subpage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c index 11dbd7be6a3b3b..bd252c78a261d4 100644 --- a/fs/btrfs/subpage.c +++ b/fs/btrfs/subpage.c @@ -204,7 +204,7 @@ static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info, btrfs_blocks_per_folio(fs_info, folio); \ \ btrfs_subpage_assert(fs_info, folio, start, len); \ - __start_bit = offset_in_page(start) >> fs_info->sectorsize_bits; \ + __start_bit = offset_in_folio(folio, start) >> fs_info->sectorsize_bits; \ __start_bit += blocks_per_folio * btrfs_bitmap_nr_##name; \ __start_bit; \ }) From bc2dbc4983afedd198490cca043798f57c93e9bf Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sat, 29 Mar 2025 17:46:35 +1030 Subject: [PATCH 0818/2924] btrfs: avoid page_lockend underflow in btrfs_punch_hole_lock_range() [BUG] When running btrfs/004 with 4K fs block size and 64K page size, sometimes fsstress workload can take 100% CPU for a while, but not long enough to trigger a 120s hang warning. [CAUSE] When such 100% CPU usage happens, btrfs_punch_hole_lock_range() is always in the call trace. One example when this problem happens, the function btrfs_punch_hole_lock_range() got the following parameters: lock_start = 4096, lockend = 20469 Then we calculate @page_lockstart by rounding up lock_start to page boundary, which is 64K (page size is 64K). For @page_lockend, we round down the value towards page boundary, which result 0. Then since we need to pass an inclusive end to filemap_range_has_page(), we subtract 1 from the rounded down value, resulting in (u64)-1. In the above case, the range is inside the same page, and we do not even need to call filemap_range_has_page(), not to mention to call it with (u64)-1 at the end. This behavior will cause btrfs_punch_hole_lock_range() to busy loop waiting for irrelevant range to have its pages dropped. [FIX] Calculate @page_lockend by just rounding down @lockend, without decreasing the value by one. So @page_lockend will no longer overflow. Then exit early if @page_lockend is no larger than @page_lockstart. As it means either the range is inside the same page, or the two pages are adjacent already. Finally only decrease @page_lockend when calling filemap_range_has_page(). Fixes: 0528476b6ac7 ("btrfs: fix the filemap_range_has_page() call in btrfs_punch_hole_lock_range()") Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/file.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 262a707d899064..71b8a825c4479a 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2104,15 +2104,20 @@ static void btrfs_punch_hole_lock_range(struct inode *inode, * will always return true. * So here we need to do extra page alignment for * filemap_range_has_page(). + * + * And do not decrease page_lockend right now, as it can be 0. */ const u64 page_lockstart = round_up(lockstart, PAGE_SIZE); - const u64 page_lockend = round_down(lockend + 1, PAGE_SIZE) - 1; + const u64 page_lockend = round_down(lockend + 1, PAGE_SIZE); while (1) { truncate_pagecache_range(inode, lockstart, lockend); lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend, cached_state); + /* The same page or adjacent pages. */ + if (page_lockend <= page_lockstart) + break; /* * We can't have ordered extents in the range, nor dirty/writeback * pages, because we have locked the inode's VFS lock in exclusive @@ -2124,7 +2129,7 @@ static void btrfs_punch_hole_lock_range(struct inode *inode, * we do, unlock the range and retry. */ if (!filemap_range_has_page(inode->i_mapping, page_lockstart, - page_lockend)) + page_lockend - 1)) break; unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend, From 7d82240c457fc15abdf7dedf15104cea774b005b Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 1 Apr 2025 18:20:28 +1030 Subject: [PATCH 0819/2924] btrfs: fix the ASSERT() inside GET_SUBPAGE_BITMAP() After enabling large data folios for tests, I hit the ASSERT() inside GET_SUBPAGE_BITMAP() where blocks_per_folio matches BITS_PER_LONG. The ASSERT() itself is only based on the original subpage fs block size, where we have at most 16 blocks per page, thus "ASSERT(blocks_per_folio < BITS_PER_LONG)". However the experimental large data folio support will set the max folio order according to the BITS_PER_LONG, so we can have a case where a large folio contains exactly BITS_PER_LONG blocks. So the ASSERT() is too strict, change it to "ASSERT(blocks_per_folio <= BITS_PER_LONG)" to avoid the false alert. Reviewed-by: Filipe Manana Reviewed-by: Sweet Tea Dorminy Reviewed-by: Boris Burkov Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/subpage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c index bd252c78a261d4..c0a0b8b063d085 100644 --- a/fs/btrfs/subpage.c +++ b/fs/btrfs/subpage.c @@ -666,7 +666,7 @@ IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked, btrfs_blocks_per_folio(fs_info, folio); \ const struct btrfs_subpage *subpage = folio_get_private(folio); \ \ - ASSERT(blocks_per_folio < BITS_PER_LONG); \ + ASSERT(blocks_per_folio <= BITS_PER_LONG); \ *dst = bitmap_read(subpage->bitmaps, \ blocks_per_folio * btrfs_bitmap_nr_##name, \ blocks_per_folio); \ From b0c26f47992672661340dd6ea931240213016609 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 17 Mar 2025 16:04:01 +0100 Subject: [PATCH 0820/2924] btrfs: zoned: return EIO on RAID1 block group write pointer mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There was a bug report about a NULL pointer dereference in __btrfs_add_free_space_zoned() that ultimately happens because a conversion from the default metadata profile DUP to a RAID1 profile on two disks. The stack trace has the following signature: BTRFS error (device sdc): zoned: write pointer offset mismatch of zones in raid1 profile BUG: kernel NULL pointer dereference, address: 0000000000000058 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: Oops: 0000 [#1] PREEMPT SMP NOPTI RIP: 0010:__btrfs_add_free_space_zoned.isra.0+0x61/0x1a0 RSP: 0018:ffffa236b6f3f6d0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff96c8132f3400 RCX: 0000000000000001 RDX: 0000000010000000 RSI: 0000000000000000 RDI: ffff96c8132f3410 RBP: 0000000010000000 R08: 0000000000000003 R09: 0000000000000000 R10: 0000000000000000 R11: 00000000ffffffff R12: 0000000000000000 R13: ffff96c758f65a40 R14: 0000000000000001 R15: 000011aac0000000 FS: 00007fdab1cb2900(0000) GS:ffff96e60ca00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000058 CR3: 00000001a05ae000 CR4: 0000000000350ef0 Call Trace: ? __die_body.cold+0x19/0x27 ? page_fault_oops+0x15c/0x2f0 ? exc_page_fault+0x7e/0x180 ? asm_exc_page_fault+0x26/0x30 ? __btrfs_add_free_space_zoned.isra.0+0x61/0x1a0 btrfs_add_free_space_async_trimmed+0x34/0x40 btrfs_add_new_free_space+0x107/0x120 btrfs_make_block_group+0x104/0x2b0 btrfs_create_chunk+0x977/0xf20 btrfs_chunk_alloc+0x174/0x510 ? srso_return_thunk+0x5/0x5f btrfs_inc_block_group_ro+0x1b1/0x230 btrfs_relocate_block_group+0x9e/0x410 btrfs_relocate_chunk+0x3f/0x130 btrfs_balance+0x8ac/0x12b0 ? srso_return_thunk+0x5/0x5f ? srso_return_thunk+0x5/0x5f ? __kmalloc_cache_noprof+0x14c/0x3e0 btrfs_ioctl+0x2686/0x2a80 ? srso_return_thunk+0x5/0x5f ? ioctl_has_perm.constprop.0.isra.0+0xd2/0x120 __x64_sys_ioctl+0x97/0xc0 do_syscall_64+0x82/0x160 ? srso_return_thunk+0x5/0x5f ? __memcg_slab_free_hook+0x11a/0x170 ? srso_return_thunk+0x5/0x5f ? kmem_cache_free+0x3f0/0x450 ? srso_return_thunk+0x5/0x5f ? srso_return_thunk+0x5/0x5f ? syscall_exit_to_user_mode+0x10/0x210 ? srso_return_thunk+0x5/0x5f ? do_syscall_64+0x8e/0x160 ? sysfs_emit+0xaf/0xc0 ? srso_return_thunk+0x5/0x5f ? srso_return_thunk+0x5/0x5f ? seq_read_iter+0x207/0x460 ? srso_return_thunk+0x5/0x5f ? vfs_read+0x29c/0x370 ? srso_return_thunk+0x5/0x5f ? srso_return_thunk+0x5/0x5f ? syscall_exit_to_user_mode+0x10/0x210 ? srso_return_thunk+0x5/0x5f ? do_syscall_64+0x8e/0x160 ? srso_return_thunk+0x5/0x5f ? exc_page_fault+0x7e/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7fdab1e0ca6d RSP: 002b:00007ffeb2b60c80 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007fdab1e0ca6d RDX: 00007ffeb2b60d80 RSI: 00000000c4009420 RDI: 0000000000000003 RBP: 00007ffeb2b60cd0 R08: 0000000000000000 R09: 0000000000000013 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffeb2b6343b R14: 00007ffeb2b60d80 R15: 0000000000000001 CR2: 0000000000000058 ---[ end trace 0000000000000000 ]--- The 1st line is the most interesting here: BTRFS error (device sdc): zoned: write pointer offset mismatch of zones in raid1 profile When a RAID1 block-group is created and a write pointer mismatch between the disks in the RAID set is detected, btrfs sets the alloc_offset to the length of the block group marking it as full. Afterwards the code expects that a balance operation will evacuate the data in this block-group and repair the problems. But before this is possible, the new space of this block-group will be accounted in the free space cache. But in __btrfs_add_free_space_zoned() it is being checked if it is a initial creation of a block group and if not a reclaim decision will be made. But the decision if a block-group's free space accounting is done for an initial creation depends on if the size of the added free space is the whole length of the block-group and the allocation offset is 0. But as btrfs_load_block_group_zone_info() sets the allocation offset to the zone capacity (i.e. marking the block-group as full) this initial decision is not met, and the space_info pointer in the 'struct btrfs_block_group' has not yet been assigned. Fail creation of the block group and rely on manual user intervention to re-balance the filesystem. Afterwards the filesystem can be unmounted, mounted in degraded mode and the missing device can be removed after a full balance of the filesystem. Reported-by: 西木野羰基 Link: https://lore.kernel.org/linux-btrfs/CAB_b4sBhDe3tscz=duVyhc9hNE+gu=B8CrgLO152uMyanR8BEA@mail.gmail.com/ Fixes: b1934cd60695 ("btrfs: zoned: handle broken write pointer on zones") Reviewed-by: Anand Jain Signed-off-by: Johannes Thumshirn Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index fb8b8b29c169ca..7c502192cd6bef 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1659,7 +1659,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) * stripe. */ cache->alloc_offset = cache->zone_capacity; - ret = 0; } out: From 50fecb8cf069f0814642ce0bde965bdc1f35a79e Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 5 Apr 2025 18:40:14 +0100 Subject: [PATCH 0821/2924] btrfs: fix invalid inode pointer after failure to create reloc inode If we have a failure at create_reloc_inode(), under the 'out' label we assign an error pointer to the 'inode' variable and then return a weird pointer because we return the expression "&inode->vfs_inode": static noinline_for_stack struct inode *create_reloc_inode( const struct btrfs_block_group *group) { (...) out: (...) if (ret) { if (inode) iput(&inode->vfs_inode); inode = ERR_PTR(ret); } return &inode->vfs_inode; } This can make us return a pointer that is not an error pointer and make the caller proceed as if an error didn't happen and later result in an invalid memory access when dereferencing the inode pointer. Syzbot reported reported such a case with the following stack trace: R10: 0000000000000002 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 431bde82d7b634db R15: 00007ffc55de5790 BTRFS info (device loop0): relocating block group 6881280 flags data|metadata Oops: general protection fault, probably for non-canonical address 0xdffffc0000000045: 0000 [#1] SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000228-0x000000000000022f] CPU: 0 UID: 0 PID: 5332 Comm: syz-executor215 Not tainted 6.14.0-syzkaller-13423-ga8662bcd2ff1 #0 PREEMPT(full) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 RIP: 0010:relocate_file_extent_cluster+0xe7/0x1750 fs/btrfs/relocation.c:2971 Code: 00 74 08 (...) RSP: 0018:ffffc9000d3375e0 EFLAGS: 00010203 RAX: 0000000000000045 RBX: 000000000000022c RCX: ffff888000562440 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff8880452db000 RBP: ffffc9000d337870 R08: ffffffff84089251 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: dffffc0000000000 R13: ffffffff9368a020 R14: 0000000000000394 R15: ffff8880452db000 FS: 000055558bc7b380(0000) GS:ffff88808c596000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055a7a192e740 CR3: 0000000036e2e000 CR4: 0000000000352ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: relocate_block_group+0xa1e/0xd50 fs/btrfs/relocation.c:3657 btrfs_relocate_block_group+0x777/0xd80 fs/btrfs/relocation.c:4011 btrfs_relocate_chunk+0x12c/0x3b0 fs/btrfs/volumes.c:3511 __btrfs_balance+0x1a93/0x25e0 fs/btrfs/volumes.c:4292 btrfs_balance+0xbde/0x10c0 fs/btrfs/volumes.c:4669 btrfs_ioctl_balance+0x3f5/0x660 fs/btrfs/ioctl.c:3586 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:906 [inline] __se_sys_ioctl+0xf1/0x160 fs/ioctl.c:892 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7fb4ef537dd9 Code: 28 00 00 (...) RSP: 002b:00007ffc55de5728 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007ffc55de5750 RCX: 00007fb4ef537dd9 RDX: 0000200000000440 RSI: 00000000c4009420 RDI: 0000000000000003 RBP: 0000000000000002 R08: 00007ffc55de54c6 R09: 00007ffc55de5770 R10: 0000000000000002 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 431bde82d7b634db R15: 00007ffc55de5790 Modules linked in: ---[ end trace 0000000000000000 ]--- RIP: 0010:relocate_file_extent_cluster+0xe7/0x1750 fs/btrfs/relocation.c:2971 Code: 00 74 08 (...) RSP: 0018:ffffc9000d3375e0 EFLAGS: 00010203 RAX: 0000000000000045 RBX: 000000000000022c RCX: ffff888000562440 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff8880452db000 RBP: ffffc9000d337870 R08: ffffffff84089251 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: dffffc0000000000 R13: ffffffff9368a020 R14: 0000000000000394 R15: ffff8880452db000 FS: 000055558bc7b380(0000) GS:ffff88808c596000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055a7a192e740 CR3: 0000000036e2e000 CR4: 0000000000352ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 ---------------- Code disassembly (best guess): 0: 00 74 08 48 add %dh,0x48(%rax,%rcx,1) 4: 89 df mov %ebx,%edi 6: e8 f8 36 24 fe call 0xfe243703 b: 48 89 9c 24 30 01 00 mov %rbx,0x130(%rsp) 12: 00 13: 4c 89 74 24 28 mov %r14,0x28(%rsp) 18: 4d 8b 76 10 mov 0x10(%r14),%r14 1c: 49 8d 9e 98 fe ff ff lea -0x168(%r14),%rbx 23: 48 89 d8 mov %rbx,%rax 26: 48 c1 e8 03 shr $0x3,%rax * 2a: 42 80 3c 20 00 cmpb $0x0,(%rax,%r12,1) <-- trapping instruction 2f: 74 08 je 0x39 31: 48 89 df mov %rbx,%rdi 34: e8 ca 36 24 fe call 0xfe243703 39: 4c 8b 3b mov (%rbx),%r15 3c: 48 rex.W 3d: 8b .byte 0x8b 3e: 44 rex.R 3f: 24 .byte 0x24 So fix this by returning the error immediately. Reported-by: syzbot+7481815bb47ef3e702e2@syzkaller.appspotmail.com Link: https://lore.kernel.org/linux-btrfs/67f14ee9.050a0220.0a13.023e.GAE@google.com/ Fixes: b204e5c7d4dc ("btrfs: make btrfs_iget() return a btrfs inode instead") Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index f948f4f6431c6c..e17bcb03459519 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3803,7 +3803,7 @@ static noinline_for_stack struct inode *create_reloc_inode( if (ret) { if (inode) iput(&inode->vfs_inode); - inode = ERR_PTR(ret); + return ERR_PTR(ret); } return &inode->vfs_inode; } From f1ab0171e9be96fd530329fa54761cff5e09ea95 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 4 Apr 2025 20:19:41 +0200 Subject: [PATCH 0822/2924] btrfs: tree-checker: adjust error code for header level check The whole tree checker returns EUCLEAN, except the one check in btrfs_verify_level_key(). This was inherited from the function that was moved from disk-io.c in 2cac5af16537 ("btrfs: move btrfs_verify_level_key into tree-checker.c") but this should be unified with the rest. Reviewed-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 43979891f7c895..2b66a6130269a3 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -2235,7 +2235,7 @@ int btrfs_verify_level_key(struct extent_buffer *eb, btrfs_err(fs_info, "tree level mismatch detected, bytenr=%llu level expected=%u has=%u", eb->start, check->level, found_level); - return -EIO; + return -EUCLEAN; } if (!check->has_first_key) From c1a79b1a583654f24b17da81ba868b0064077243 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Wed, 19 Mar 2025 10:49:16 +0900 Subject: [PATCH 0823/2924] block: introduce zone capacity helper {bdev,disk}_zone_capacity() takes block_device or gendisk and sector position and returns the zone capacity of the corresponding zone. With that, move disk_nr_zones() and blk_zone_plug_bio() to consolidate them in the same #ifdef block. Signed-off-by: Naohiro Aota Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Chaitanya Kulkarni Signed-off-by: David Sterba --- include/linux/blkdev.h | 67 ++++++++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 22 deletions(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d37751789bf58c..c57babb0adb9c5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -691,23 +691,6 @@ static inline bool blk_queue_is_zoned(struct request_queue *q) (q->limits.features & BLK_FEAT_ZONED); } -#ifdef CONFIG_BLK_DEV_ZONED -static inline unsigned int disk_nr_zones(struct gendisk *disk) -{ - return disk->nr_zones; -} -bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs); -#else /* CONFIG_BLK_DEV_ZONED */ -static inline unsigned int disk_nr_zones(struct gendisk *disk) -{ - return 0; -} -static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) -{ - return false; -} -#endif /* CONFIG_BLK_DEV_ZONED */ - static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) { if (!blk_queue_is_zoned(disk->queue)) @@ -715,11 +698,6 @@ static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) return sector >> ilog2(disk->queue->limits.chunk_sectors); } -static inline unsigned int bdev_nr_zones(struct block_device *bdev) -{ - return disk_nr_zones(bdev->bd_disk); -} - static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { return bdev->bd_disk->queue->limits.max_open_zones; @@ -826,6 +804,51 @@ static inline u64 sb_bdev_nr_blocks(struct super_block *sb) (sb->s_blocksize_bits - SECTOR_SHIFT); } +#ifdef CONFIG_BLK_DEV_ZONED +static inline unsigned int disk_nr_zones(struct gendisk *disk) +{ + return disk->nr_zones; +} +bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs); + +/** + * disk_zone_capacity - returns the zone capacity of zone containing @sector + * @disk: disk to work with + * @sector: sector number within the querying zone + * + * Returns the zone capacity of a zone containing @sector. @sector can be any + * sector in the zone. + */ +static inline unsigned int disk_zone_capacity(struct gendisk *disk, + sector_t sector) +{ + sector_t zone_sectors = disk->queue->limits.chunk_sectors; + + if (sector + zone_sectors >= get_capacity(disk)) + return disk->last_zone_capacity; + return disk->zone_capacity; +} +static inline unsigned int bdev_zone_capacity(struct block_device *bdev, + sector_t pos) +{ + return disk_zone_capacity(bdev->bd_disk, pos); +} +#else /* CONFIG_BLK_DEV_ZONED */ +static inline unsigned int disk_nr_zones(struct gendisk *disk) +{ + return 0; +} +static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) +{ + return false; +} +#endif /* CONFIG_BLK_DEV_ZONED */ + +static inline unsigned int bdev_nr_zones(struct block_device *bdev) +{ + return disk_nr_zones(bdev->bd_disk); +} + int bdev_disk_changed(struct gendisk *disk, bool invalidate); void put_disk(struct gendisk *disk); From 866bafae59ecffcf1840d846cd79740be29f21d6 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Wed, 19 Mar 2025 10:49:17 +0900 Subject: [PATCH 0824/2924] btrfs: zoned: skip reporting zone for new block group There is a potential deadlock if we do report zones in an IO context, detailed in below lockdep report. When one process do a report zones and another process freezes the block device, the report zones side cannot allocate a tag because the freeze is already started. This can thus result in new block group creation to hang forever, blocking the write path. Thankfully, a new block group should be created on empty zones. So, reporting the zones is not necessary and we can set the write pointer = 0 and load the zone capacity from the block layer using bdev_zone_capacity() helper. ====================================================== WARNING: possible circular locking dependency detected 6.14.0-rc1 #252 Not tainted ------------------------------------------------------ modprobe/1110 is trying to acquire lock: ffff888100ac83e0 ((work_completion)(&(&wb->dwork)->work)){+.+.}-{0:0}, at: __flush_work+0x38f/0xb60 but task is already holding lock: ffff8881205b6f20 (&q->q_usage_counter(queue)#16){++++}-{0:0}, at: sd_remove+0x85/0x130 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (&q->q_usage_counter(queue)#16){++++}-{0:0}: blk_queue_enter+0x3d9/0x500 blk_mq_alloc_request+0x47d/0x8e0 scsi_execute_cmd+0x14f/0xb80 sd_zbc_do_report_zones+0x1c1/0x470 sd_zbc_report_zones+0x362/0xd60 blkdev_report_zones+0x1b1/0x2e0 btrfs_get_dev_zones+0x215/0x7e0 [btrfs] btrfs_load_block_group_zone_info+0x6d2/0x2c10 [btrfs] btrfs_make_block_group+0x36b/0x870 [btrfs] btrfs_create_chunk+0x147d/0x2320 [btrfs] btrfs_chunk_alloc+0x2ce/0xcf0 [btrfs] start_transaction+0xce6/0x1620 [btrfs] btrfs_uuid_scan_kthread+0x4ee/0x5b0 [btrfs] kthread+0x39d/0x750 ret_from_fork+0x30/0x70 ret_from_fork_asm+0x1a/0x30 -> #2 (&fs_info->dev_replace.rwsem){++++}-{4:4}: down_read+0x9b/0x470 btrfs_map_block+0x2ce/0x2ce0 [btrfs] btrfs_submit_chunk+0x2d4/0x16c0 [btrfs] btrfs_submit_bbio+0x16/0x30 [btrfs] btree_write_cache_pages+0xb5a/0xf90 [btrfs] do_writepages+0x17f/0x7b0 __writeback_single_inode+0x114/0xb00 writeback_sb_inodes+0x52b/0xe00 wb_writeback+0x1a7/0x800 wb_workfn+0x12a/0xbd0 process_one_work+0x85a/0x1460 worker_thread+0x5e2/0xfc0 kthread+0x39d/0x750 ret_from_fork+0x30/0x70 ret_from_fork_asm+0x1a/0x30 -> #1 (&fs_info->zoned_meta_io_lock){+.+.}-{4:4}: __mutex_lock+0x1aa/0x1360 btree_write_cache_pages+0x252/0xf90 [btrfs] do_writepages+0x17f/0x7b0 __writeback_single_inode+0x114/0xb00 writeback_sb_inodes+0x52b/0xe00 wb_writeback+0x1a7/0x800 wb_workfn+0x12a/0xbd0 process_one_work+0x85a/0x1460 worker_thread+0x5e2/0xfc0 kthread+0x39d/0x750 ret_from_fork+0x30/0x70 ret_from_fork_asm+0x1a/0x30 -> #0 ((work_completion)(&(&wb->dwork)->work)){+.+.}-{0:0}: __lock_acquire+0x2f52/0x5ea0 lock_acquire+0x1b1/0x540 __flush_work+0x3ac/0xb60 wb_shutdown+0x15b/0x1f0 bdi_unregister+0x172/0x5b0 del_gendisk+0x841/0xa20 sd_remove+0x85/0x130 device_release_driver_internal+0x368/0x520 bus_remove_device+0x1f1/0x3f0 device_del+0x3bd/0x9c0 __scsi_remove_device+0x272/0x340 scsi_forget_host+0xf7/0x170 scsi_remove_host+0xd2/0x2a0 sdebug_driver_remove+0x52/0x2f0 [scsi_debug] device_release_driver_internal+0x368/0x520 bus_remove_device+0x1f1/0x3f0 device_del+0x3bd/0x9c0 device_unregister+0x13/0xa0 sdebug_do_remove_host+0x1fb/0x290 [scsi_debug] scsi_debug_exit+0x17/0x70 [scsi_debug] __do_sys_delete_module.isra.0+0x321/0x520 do_syscall_64+0x93/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e other info that might help us debug this: Chain exists of: (work_completion)(&(&wb->dwork)->work) --> &fs_info->dev_replace.rwsem --> &q->q_usage_counter(queue)#16 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&q->q_usage_counter(queue)#16); lock(&fs_info->dev_replace.rwsem); lock(&q->q_usage_counter(queue)#16); lock((work_completion)(&(&wb->dwork)->work)); *** DEADLOCK *** 5 locks held by modprobe/1110: #0: ffff88811f7bc108 (&dev->mutex){....}-{4:4}, at: device_release_driver_internal+0x8f/0x520 #1: ffff8881022ee0e0 (&shost->scan_mutex){+.+.}-{4:4}, at: scsi_remove_host+0x20/0x2a0 #2: ffff88811b4c4378 (&dev->mutex){....}-{4:4}, at: device_release_driver_internal+0x8f/0x520 #3: ffff8881205b6f20 (&q->q_usage_counter(queue)#16){++++}-{0:0}, at: sd_remove+0x85/0x130 #4: ffffffffa3284360 (rcu_read_lock){....}-{1:3}, at: __flush_work+0xda/0xb60 stack backtrace: CPU: 0 UID: 0 PID: 1110 Comm: modprobe Not tainted 6.14.0-rc1 #252 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-3.fc41 04/01/2014 Call Trace: dump_stack_lvl+0x6a/0x90 print_circular_bug.cold+0x1e0/0x274 check_noncircular+0x306/0x3f0 ? __pfx_check_noncircular+0x10/0x10 ? mark_lock+0xf5/0x1650 ? __pfx_check_irq_usage+0x10/0x10 ? lockdep_lock+0xca/0x1c0 ? __pfx_lockdep_lock+0x10/0x10 __lock_acquire+0x2f52/0x5ea0 ? __pfx___lock_acquire+0x10/0x10 ? __pfx_mark_lock+0x10/0x10 lock_acquire+0x1b1/0x540 ? __flush_work+0x38f/0xb60 ? __pfx_lock_acquire+0x10/0x10 ? __pfx_lock_release+0x10/0x10 ? mark_held_locks+0x94/0xe0 ? __flush_work+0x38f/0xb60 __flush_work+0x3ac/0xb60 ? __flush_work+0x38f/0xb60 ? __pfx_mark_lock+0x10/0x10 ? __pfx___flush_work+0x10/0x10 ? __pfx_wq_barrier_func+0x10/0x10 ? __pfx___might_resched+0x10/0x10 ? mark_held_locks+0x94/0xe0 wb_shutdown+0x15b/0x1f0 bdi_unregister+0x172/0x5b0 ? __pfx_bdi_unregister+0x10/0x10 ? up_write+0x1ba/0x510 del_gendisk+0x841/0xa20 ? __pfx_del_gendisk+0x10/0x10 ? _raw_spin_unlock_irqrestore+0x35/0x60 ? __pm_runtime_resume+0x79/0x110 sd_remove+0x85/0x130 device_release_driver_internal+0x368/0x520 ? kobject_put+0x5d/0x4a0 bus_remove_device+0x1f1/0x3f0 device_del+0x3bd/0x9c0 ? __pfx_device_del+0x10/0x10 __scsi_remove_device+0x272/0x340 scsi_forget_host+0xf7/0x170 scsi_remove_host+0xd2/0x2a0 sdebug_driver_remove+0x52/0x2f0 [scsi_debug] ? kernfs_remove_by_name_ns+0xc0/0xf0 device_release_driver_internal+0x368/0x520 ? kobject_put+0x5d/0x4a0 bus_remove_device+0x1f1/0x3f0 device_del+0x3bd/0x9c0 ? __pfx_device_del+0x10/0x10 ? __pfx___mutex_unlock_slowpath+0x10/0x10 device_unregister+0x13/0xa0 sdebug_do_remove_host+0x1fb/0x290 [scsi_debug] scsi_debug_exit+0x17/0x70 [scsi_debug] __do_sys_delete_module.isra.0+0x321/0x520 ? __pfx___do_sys_delete_module.isra.0+0x10/0x10 ? __pfx_slab_free_after_rcu_debug+0x10/0x10 ? kasan_save_stack+0x2c/0x50 ? kasan_record_aux_stack+0xa3/0xb0 ? __call_rcu_common.constprop.0+0xc4/0xfb0 ? kmem_cache_free+0x3a0/0x590 ? __x64_sys_close+0x78/0xd0 do_syscall_64+0x93/0x180 ? lock_is_held_type+0xd5/0x130 ? __call_rcu_common.constprop.0+0x3c0/0xfb0 ? lockdep_hardirqs_on+0x78/0x100 ? __call_rcu_common.constprop.0+0x3c0/0xfb0 ? __pfx___call_rcu_common.constprop.0+0x10/0x10 ? kmem_cache_free+0x3a0/0x590 ? lockdep_hardirqs_on_prepare+0x16d/0x400 ? do_syscall_64+0x9f/0x180 ? lockdep_hardirqs_on+0x78/0x100 ? do_syscall_64+0x9f/0x180 ? __pfx___x64_sys_openat+0x10/0x10 ? lockdep_hardirqs_on_prepare+0x16d/0x400 ? do_syscall_64+0x9f/0x180 ? lockdep_hardirqs_on+0x78/0x100 ? do_syscall_64+0x9f/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7f436712b68b RSP: 002b:00007ffe9f1a8658 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 00005559b367fd80 RCX: 00007f436712b68b RDX: 0000000000000000 RSI: 0000000000000800 RDI: 00005559b367fde8 RBP: 00007ffe9f1a8680 R08: 1999999999999999 R09: 0000000000000000 R10: 00007f43671a5fe0 R11: 0000000000000206 R12: 0000000000000000 R13: 00007ffe9f1a86b0 R14: 0000000000000000 R15: 0000000000000000 Reported-by: Shin'ichiro Kawasaki CC: # 6.13+ Tested-by: Shin'ichiro Kawasaki Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 7c502192cd6bef..4a3e02b49f2957 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1277,7 +1277,7 @@ struct zone_info { static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, struct zone_info *info, unsigned long *active, - struct btrfs_chunk_map *map) + struct btrfs_chunk_map *map, bool new) { struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; struct btrfs_device *device; @@ -1307,6 +1307,8 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, return 0; } + ASSERT(!new || btrfs_dev_is_empty_zone(device, info->physical)); + /* This zone will be used for allocation, so mark this zone non-empty. */ btrfs_dev_clear_zone_empty(device, info->physical); @@ -1319,6 +1321,18 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, * to determine the allocation offset within the zone. */ WARN_ON(!IS_ALIGNED(info->physical, fs_info->zone_size)); + + if (new) { + sector_t capacity; + + capacity = bdev_zone_capacity(device->bdev, info->physical >> SECTOR_SHIFT); + up_read(&dev_replace->rwsem); + info->alloc_offset = 0; + info->capacity = capacity << SECTOR_SHIFT; + + return 0; + } + nofs_flag = memalloc_nofs_save(); ret = btrfs_get_dev_zone(device, info->physical, &zone); memalloc_nofs_restore(nofs_flag); @@ -1588,7 +1602,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) } for (i = 0; i < map->num_stripes; i++) { - ret = btrfs_load_zone_info(fs_info, i, &zone_info[i], active, map); + ret = btrfs_load_zone_info(fs_info, i, &zone_info[i], active, map, new); if (ret) goto out; From 75bc744466444ef417b5f709f72b91c83301bcd1 Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 15 Apr 2025 14:35:41 +0530 Subject: [PATCH 0825/2924] net: ti: icssg-prueth: Fix kernel warning while bringing down network interface During network interface initialization, the NIC driver needs to register its Rx queue with the XDP, to ensure the incoming XDP buffer carries a pointer reference to this info and is stored inside xdp_rxq_info. While this struct isn't tied to XDP prog, if there are any changes in Rx queue, the NIC driver needs to stop the Rx queue by unregistering with XDP before purging and reallocating memory. Drop page_pool destroy during Rx channel reset as this is already handled by XDP during xdp_rxq_info_unreg (Rx queue unregister), failing to do will cause the following warning: warning logs: https://gist.github.com/MeghanaMalladiTI/eb627e5dc8de24e42d7d46572c13e576 Fixes: 46eeb90f03e0 ("net: ti: icssg-prueth: Use page_pool API for RX buffer allocation") Signed-off-by: Meghana Malladi Reviewed-by: Simon Horman Reviewed-by: Roger Quadros Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250415090543.717991-2-m-malladi@ti.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/icssg/icssg_common.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index 14002b0264528c..ec643fb69d30d8 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -1215,9 +1215,6 @@ void prueth_reset_rx_chan(struct prueth_rx_chn *chn, prueth_rx_cleanup); if (disable) k3_udma_glue_disable_rx_chn(chn->rx_chn); - - page_pool_destroy(chn->pg_pool); - chn->pg_pool = NULL; } EXPORT_SYMBOL_GPL(prueth_reset_rx_chan); From 8ed2fa661350f0b49edb765d18173b5c766c3686 Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 15 Apr 2025 14:35:42 +0530 Subject: [PATCH 0826/2924] net: ti: icssg-prueth: Fix possible NULL pointer dereference inside emac_xmit_xdp_frame() There is an error check inside emac_xmit_xdp_frame() function which is called when the driver wants to transmit XDP frame, to check if the allocated tx descriptor is NULL, if true to exit and return ICSSG_XDP_CONSUMED implying failure in transmission. In this case trying to free a descriptor which is NULL will result in kernel crash due to NULL pointer dereference. Fix this error handling and increase netdev tx_dropped stats in the caller of this function if the function returns ICSSG_XDP_CONSUMED. Fixes: 62aa3246f462 ("net: ti: icssg-prueth: Add XDP support") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/70d8dd76-0c76-42fc-8611-9884937c82f5@stanley.mountain/ Signed-off-by: Meghana Malladi Reviewed-by: Simon Horman Reviewed-by: Roger Quadros Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250415090543.717991-3-m-malladi@ti.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/icssg/icssg_common.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index ec643fb69d30d8..b4be76e13a2fb7 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -583,7 +583,7 @@ u32 emac_xmit_xdp_frame(struct prueth_emac *emac, first_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool); if (!first_desc) { netdev_dbg(ndev, "xdp tx: failed to allocate descriptor\n"); - goto drop_free_descs; /* drop */ + return ICSSG_XDP_CONSUMED; /* drop */ } if (page) { /* already DMA mapped by page_pool */ @@ -671,8 +671,10 @@ static u32 emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp, q_idx = smp_processor_id() % emac->tx_ch_num; result = emac_xmit_xdp_frame(emac, xdpf, page, q_idx); - if (result == ICSSG_XDP_CONSUMED) + if (result == ICSSG_XDP_CONSUMED) { + ndev->stats.tx_dropped++; goto drop; + } dev_sw_netstats_rx_add(ndev, xdpf->len); return result; From 7349c9e9979333abfce42da5f9025598083b59c9 Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 15 Apr 2025 14:35:43 +0530 Subject: [PATCH 0827/2924] net: ti: icss-iep: Fix possible NULL pointer dereference for perout request The ICSS IEP driver tracks perout and pps enable state with flags. Currently when disabling pps and perout signals during icss_iep_exit(), results in NULL pointer dereference for perout. To fix the null pointer dereference issue, the icss_iep_perout_enable_hw function can be modified to directly clear the IEP CMP registers when disabling PPS or PEROUT, without referencing the ptp_perout_request structure, as its contents are irrelevant in this case. Fixes: 9b115361248d ("net: ti: icssg-prueth: Fix clearing of IEP_CMP_CFG registers during iep_init") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/7b1c7c36-363a-4085-b26c-4f210bee1df6@stanley.mountain/ Signed-off-by: Meghana Malladi Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250415090543.717991-4-m-malladi@ti.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/icssg/icss_iep.c | 121 +++++++++++------------ 1 file changed, 58 insertions(+), 63 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icss_iep.c b/drivers/net/ethernet/ti/icssg/icss_iep.c index b4a34c57b7b482..2a1c43316f462b 100644 --- a/drivers/net/ethernet/ti/icssg/icss_iep.c +++ b/drivers/net/ethernet/ti/icssg/icss_iep.c @@ -412,6 +412,22 @@ static int icss_iep_perout_enable_hw(struct icss_iep *iep, int ret; u64 cmp; + if (!on) { + /* Disable CMP 1 */ + regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, + IEP_CMP_CFG_CMP_EN(1), 0); + + /* clear CMP regs */ + regmap_write(iep->map, ICSS_IEP_CMP1_REG0, 0); + if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT) + regmap_write(iep->map, ICSS_IEP_CMP1_REG1, 0); + + /* Disable sync */ + regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG, 0); + + return 0; + } + /* Calculate width of the signal for PPS/PEROUT handling */ ts.tv_sec = req->on.sec; ts.tv_nsec = req->on.nsec; @@ -430,64 +446,39 @@ static int icss_iep_perout_enable_hw(struct icss_iep *iep, if (ret) return ret; - if (on) { - /* Configure CMP */ - regmap_write(iep->map, ICSS_IEP_CMP1_REG0, lower_32_bits(cmp)); - if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT) - regmap_write(iep->map, ICSS_IEP_CMP1_REG1, upper_32_bits(cmp)); - /* Configure SYNC, based on req on width */ - regmap_write(iep->map, ICSS_IEP_SYNC_PWIDTH_REG, - div_u64(ns_width, iep->def_inc)); - regmap_write(iep->map, ICSS_IEP_SYNC0_PERIOD_REG, 0); - regmap_write(iep->map, ICSS_IEP_SYNC_START_REG, - div_u64(ns_start, iep->def_inc)); - regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG, 0); /* one-shot mode */ - /* Enable CMP 1 */ - regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, - IEP_CMP_CFG_CMP_EN(1), IEP_CMP_CFG_CMP_EN(1)); - } else { - /* Disable CMP 1 */ - regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, - IEP_CMP_CFG_CMP_EN(1), 0); - - /* clear regs */ - regmap_write(iep->map, ICSS_IEP_CMP1_REG0, 0); - if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT) - regmap_write(iep->map, ICSS_IEP_CMP1_REG1, 0); - } + /* Configure CMP */ + regmap_write(iep->map, ICSS_IEP_CMP1_REG0, lower_32_bits(cmp)); + if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT) + regmap_write(iep->map, ICSS_IEP_CMP1_REG1, upper_32_bits(cmp)); + /* Configure SYNC, based on req on width */ + regmap_write(iep->map, ICSS_IEP_SYNC_PWIDTH_REG, + div_u64(ns_width, iep->def_inc)); + regmap_write(iep->map, ICSS_IEP_SYNC0_PERIOD_REG, 0); + regmap_write(iep->map, ICSS_IEP_SYNC_START_REG, + div_u64(ns_start, iep->def_inc)); + regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG, 0); /* one-shot mode */ + /* Enable CMP 1 */ + regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, + IEP_CMP_CFG_CMP_EN(1), IEP_CMP_CFG_CMP_EN(1)); } else { - if (on) { - u64 start_ns; - - iep->period = ((u64)req->period.sec * NSEC_PER_SEC) + - req->period.nsec; - start_ns = ((u64)req->period.sec * NSEC_PER_SEC) - + req->period.nsec; - icss_iep_update_to_next_boundary(iep, start_ns); - - regmap_write(iep->map, ICSS_IEP_SYNC_PWIDTH_REG, - div_u64(ns_width, iep->def_inc)); - regmap_write(iep->map, ICSS_IEP_SYNC_START_REG, - div_u64(ns_start, iep->def_inc)); - /* Enable Sync in single shot mode */ - regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG, - IEP_SYNC_CTRL_SYNC_N_EN(0) | IEP_SYNC_CTRL_SYNC_EN); - /* Enable CMP 1 */ - regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, - IEP_CMP_CFG_CMP_EN(1), IEP_CMP_CFG_CMP_EN(1)); - } else { - /* Disable CMP 1 */ - regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, - IEP_CMP_CFG_CMP_EN(1), 0); - - /* clear CMP regs */ - regmap_write(iep->map, ICSS_IEP_CMP1_REG0, 0); - if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT) - regmap_write(iep->map, ICSS_IEP_CMP1_REG1, 0); - - /* Disable sync */ - regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG, 0); - } + u64 start_ns; + + iep->period = ((u64)req->period.sec * NSEC_PER_SEC) + + req->period.nsec; + start_ns = ((u64)req->period.sec * NSEC_PER_SEC) + + req->period.nsec; + icss_iep_update_to_next_boundary(iep, start_ns); + + regmap_write(iep->map, ICSS_IEP_SYNC_PWIDTH_REG, + div_u64(ns_width, iep->def_inc)); + regmap_write(iep->map, ICSS_IEP_SYNC_START_REG, + div_u64(ns_start, iep->def_inc)); + /* Enable Sync in single shot mode */ + regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG, + IEP_SYNC_CTRL_SYNC_N_EN(0) | IEP_SYNC_CTRL_SYNC_EN); + /* Enable CMP 1 */ + regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, + IEP_CMP_CFG_CMP_EN(1), IEP_CMP_CFG_CMP_EN(1)); } return 0; @@ -498,11 +489,21 @@ static int icss_iep_perout_enable(struct icss_iep *iep, { int ret = 0; + if (!on) + goto disable; + /* Reject requests with unsupported flags */ if (req->flags & ~(PTP_PEROUT_DUTY_CYCLE | PTP_PEROUT_PHASE)) return -EOPNOTSUPP; + /* Set default "on" time (1ms) for the signal if not passed by the app */ + if (!(req->flags & PTP_PEROUT_DUTY_CYCLE)) { + req->on.sec = 0; + req->on.nsec = NSEC_PER_MSEC; + } + +disable: mutex_lock(&iep->ptp_clk_mutex); if (iep->pps_enabled) { @@ -513,12 +514,6 @@ static int icss_iep_perout_enable(struct icss_iep *iep, if (iep->perout_enabled == !!on) goto exit; - /* Set default "on" time (1ms) for the signal if not passed by the app */ - if (!(req->flags & PTP_PEROUT_DUTY_CYCLE)) { - req->on.sec = 0; - req->on.nsec = NSEC_PER_MSEC; - } - ret = icss_iep_perout_enable_hw(iep, req, on); if (!ret) iep->perout_enabled = !!on; From 96a720db59ab330c8562b2437153faa45dac705f Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 16 Apr 2025 07:24:24 -0700 Subject: [PATCH 0828/2924] perf/x86/intel/uncore: Fix the scale of IIO free running counters on SNR There was a mistake in the SNR uncore spec. The counter increments for every 32 bytes of data sent from the IO agent to the SOC, not 4 bytes which was documented in the spec. The event list has been updated: "EventName": "UNC_IIO_BANDWIDTH_IN.PART0_FREERUN", "BriefDescription": "Free running counter that increments for every 32 bytes of data sent from the IO agent to the SOC", Update the scale of the IIO bandwidth in free running counters as well. Fixes: 210cc5f9db7a ("perf/x86/intel/uncore: Add uncore support for Snow Ridge server") Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250416142426.3933977-1-kan.liang@linux.intel.com --- arch/x86/events/intel/uncore_snbep.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 60973c209c0e64..35da2c486e8d8a 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -4891,28 +4891,28 @@ static struct uncore_event_desc snr_uncore_iio_freerunning_events[] = { INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), /* Free-Running IIO BANDWIDTH IN Counters */ INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), { /* end: all zeroes */ }, }; From 32c7f1150225694d95a51110a93be25db03bb5db Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 16 Apr 2025 07:24:25 -0700 Subject: [PATCH 0829/2924] perf/x86/intel/uncore: Fix the scale of IIO free running counters on ICX There was a mistake in the ICX uncore spec too. The counter increments for every 32 bytes rather than 4 bytes. The same as SNR, there are 1 ioclk and 8 IIO bandwidth in free running counters. Reuse the snr_uncore_iio_freerunning_events(). Fixes: 2b3b76b5ec67 ("perf/x86/intel/uncore: Add Ice Lake server uncore support") Reported-by: Tang Jun Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250416142426.3933977-2-kan.liang@linux.intel.com --- arch/x86/events/intel/uncore_snbep.c | 33 +--------------------------- 1 file changed, 1 insertion(+), 32 deletions(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 35da2c486e8d8a..fb08911a1cf64e 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -5485,37 +5485,6 @@ static struct freerunning_counters icx_iio_freerunning[] = { [ICX_IIO_MSR_BW_IN] = { 0xaa0, 0x1, 0x10, 8, 48, icx_iio_bw_freerunning_box_offsets }, }; -static struct uncore_event_desc icx_uncore_iio_freerunning_events[] = { - /* Free-Running IIO CLOCKS Counter */ - INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), - /* Free-Running IIO BANDWIDTH IN Counters */ - INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), - { /* end: all zeroes */ }, -}; - static struct intel_uncore_type icx_uncore_iio_free_running = { .name = "iio_free_running", .num_counters = 9, @@ -5523,7 +5492,7 @@ static struct intel_uncore_type icx_uncore_iio_free_running = { .num_freerunning_types = ICX_IIO_FREERUNNING_TYPE_MAX, .freerunning = icx_iio_freerunning, .ops = &skx_uncore_iio_freerunning_ops, - .event_descs = icx_uncore_iio_freerunning_events, + .event_descs = snr_uncore_iio_freerunning_events, .format_group = &skx_uncore_iio_freerunning_format_group, }; From 506f981ab40f0b03a11a640cfd77f48b09aff330 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 16 Apr 2025 07:24:26 -0700 Subject: [PATCH 0830/2924] perf/x86/intel/uncore: Fix the scale of IIO free running counters on SPR The scale of IIO bandwidth in free running counters is inherited from the ICX. The counter increments for every 32 bytes rather than 4 bytes. The IIO bandwidth out free running counters don't increment with a consistent size. The increment depends on the requested size. It's impossible to find a fixed increment. Remove it from the event_descs. Fixes: 0378c93a92e2 ("perf/x86/intel/uncore: Support IIO free-running counters on Sapphire Rapids server") Reported-by: Tang Jun Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250416142426.3933977-3-kan.liang@linux.intel.com --- arch/x86/events/intel/uncore_snbep.c | 58 +--------------------------- 1 file changed, 1 insertion(+), 57 deletions(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index fb08911a1cf64e..76d96df1475a1c 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -6289,69 +6289,13 @@ static struct freerunning_counters spr_iio_freerunning[] = { [SPR_IIO_MSR_BW_OUT] = { 0x3808, 0x1, 0x10, 8, 48 }, }; -static struct uncore_event_desc spr_uncore_iio_freerunning_events[] = { - /* Free-Running IIO CLOCKS Counter */ - INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), - /* Free-Running IIO BANDWIDTH IN Counters */ - INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), - /* Free-Running IIO BANDWIDTH OUT Counters */ - INTEL_UNCORE_EVENT_DESC(bw_out_port0, "event=0xff,umask=0x30"), - INTEL_UNCORE_EVENT_DESC(bw_out_port0.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port0.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port1, "event=0xff,umask=0x31"), - INTEL_UNCORE_EVENT_DESC(bw_out_port1.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port1.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port2, "event=0xff,umask=0x32"), - INTEL_UNCORE_EVENT_DESC(bw_out_port2.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port2.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port3, "event=0xff,umask=0x33"), - INTEL_UNCORE_EVENT_DESC(bw_out_port3.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port3.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port4, "event=0xff,umask=0x34"), - INTEL_UNCORE_EVENT_DESC(bw_out_port4.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port4.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port5, "event=0xff,umask=0x35"), - INTEL_UNCORE_EVENT_DESC(bw_out_port5.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port5.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port6, "event=0xff,umask=0x36"), - INTEL_UNCORE_EVENT_DESC(bw_out_port6.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port6.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port7, "event=0xff,umask=0x37"), - INTEL_UNCORE_EVENT_DESC(bw_out_port7.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port7.unit, "MiB"), - { /* end: all zeroes */ }, -}; - static struct intel_uncore_type spr_uncore_iio_free_running = { .name = "iio_free_running", .num_counters = 17, .num_freerunning_types = SPR_IIO_FREERUNNING_TYPE_MAX, .freerunning = spr_iio_freerunning, .ops = &skx_uncore_iio_freerunning_ops, - .event_descs = spr_uncore_iio_freerunning_events, + .event_descs = snr_uncore_iio_freerunning_events, .format_group = &skx_uncore_iio_freerunning_format_group, }; From 9f5595d5f03fd4dc640607a71e89a1daa68fd19d Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 14 Apr 2025 11:24:00 -0500 Subject: [PATCH 0831/2924] platform/x86/amd: pmc: Require at least 2.5 seconds between HW sleep cycles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When an APU exits HW sleep with no active wake sources the Linux kernel will rapidly assert that the APU can enter back into HW sleep. This happens in a few ms. Contrasting this to Windows, Windows can take 10s of seconds to enter back into the resiliency phase for Modern Standby. For some situations this can be problematic because it can cause leakage from VDDCR_SOC to VDD_MISC and force VDD_MISC outside of the electrical design guide specifications. On some designs this will trip the over voltage protection feature (OVP) of the voltage regulator module, but it could cause APU damage as well. To prevent this risk, add an explicit sleep call so that future attempts to enter into HW sleep will have enough time to settle. This will occur while the screen is dark and only on cases that the APU should enter HW sleep again, so it shouldn't be noticeable to any user. Cc: stable@vger.kernel.org Signed-off-by: Mario Limonciello Acked-by: Shyam Sundar S K Link: https://lore.kernel.org/r/20250414162446.3853194-1-superm1@kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmc/pmc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c index d789d6cab79486..0329fafe14ebcd 100644 --- a/drivers/platform/x86/amd/pmc/pmc.c +++ b/drivers/platform/x86/amd/pmc/pmc.c @@ -644,10 +644,9 @@ static void amd_pmc_s2idle_check(void) struct smu_metrics table; int rc; - /* CZN: Ensure that future s0i3 entry attempts at least 10ms passed */ - if (pdev->cpu_id == AMD_CPU_ID_CZN && !get_metrics_table(pdev, &table) && - table.s0i3_last_entry_status) - usleep_range(10000, 20000); + /* Avoid triggering OVP */ + if (!get_metrics_table(pdev, &table) && table.s0i3_last_entry_status) + msleep(2500); /* Dump the IdleMask before we add to the STB */ amd_pmc_idlemask_read(pdev, pdev->dev, NULL); From 8d6955ed76e8a47115f2ea1d9c263ee6f505d737 Mon Sep 17 00:00:00 2001 From: Shouye Liu Date: Thu, 17 Apr 2025 11:23:21 +0800 Subject: [PATCH 0832/2924] platform/x86/intel-uncore-freq: Fix missing uncore sysfs during CPU hotplug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In certain situations, the sysfs for uncore may not be present when all CPUs in a package are offlined and then brought back online after boot. This issue can occur if there is an error in adding the sysfs entry due to a memory allocation failure. Retrying to bring the CPUs online will not resolve the issue, as the uncore_cpu_mask is already set for the package before the failure condition occurs. This issue does not occur if the failure happens during module initialization, as the module will fail to load in the event of any error. To address this, ensure that the uncore_cpu_mask is not set until the successful return of uncore_freq_add_entry(). Fixes: dbce412a7733 ("platform/x86/intel-uncore-freq: Split common and enumeration part") Signed-off-by: Shouye Liu Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250417032321.75580-1-shouyeliu@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- .../x86/intel/uncore-frequency/uncore-frequency.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c index 40bbf8e45fa4bb..bdee5d00f30b80 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c @@ -146,15 +146,13 @@ static int uncore_event_cpu_online(unsigned int cpu) { struct uncore_data *data; int target; + int ret; /* Check if there is an online cpu in the package for uncore MSR */ target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu)); if (target < nr_cpu_ids) return 0; - /* Use this CPU on this die as a control CPU */ - cpumask_set_cpu(cpu, &uncore_cpu_mask); - data = uncore_get_instance(cpu); if (!data) return 0; @@ -163,7 +161,14 @@ static int uncore_event_cpu_online(unsigned int cpu) data->die_id = topology_die_id(cpu); data->domain_id = UNCORE_DOMAIN_ID_INVALID; - return uncore_freq_add_entry(data, cpu); + ret = uncore_freq_add_entry(data, cpu); + if (ret) + return ret; + + /* Use this CPU on this die as a control CPU */ + cpumask_set_cpu(cpu, &uncore_cpu_mask); + + return 0; } static int uncore_event_cpu_offline(unsigned int cpu) From 4a8e04e2bdcb98d513e97b039899bda03b07bcf2 Mon Sep 17 00:00:00 2001 From: Kurt Borja Date: Wed, 16 Apr 2025 13:50:23 -0300 Subject: [PATCH 0833/2924] platform/x86: alienware-wmi-wmax: Fix uninitialized variable due to bad error handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit wmax_thermal_information() may also return -ENOMSG, which would leave `id` uninitialized in thermal_profile_probe. Reorder and modify logic to catch all errors. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/Z_-KVqNbD9ygvE2X@stanley.mountain Fixes: 27e9e6339896 ("platform/x86: alienware-wmi: Refactor thermal control methods") Signed-off-by: Kurt Borja Link: https://lore.kernel.org/r/20250416-smatch-fix-v1-1-35491b462d8f@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/dell/alienware-wmi-wmax.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/dell/alienware-wmi-wmax.c b/drivers/platform/x86/dell/alienware-wmi-wmax.c index 0c3be03385f899..3f9e1e986ecf0a 100644 --- a/drivers/platform/x86/dell/alienware-wmi-wmax.c +++ b/drivers/platform/x86/dell/alienware-wmi-wmax.c @@ -655,12 +655,10 @@ static int thermal_profile_probe(void *drvdata, unsigned long *choices) for (u32 i = 0; i < sys_desc[3]; i++) { ret = wmax_thermal_information(priv->wdev, WMAX_OPERATION_LIST_IDS, i + first_mode, &out_data); - - if (ret == -EIO) - return ret; - if (ret == -EBADRQC) break; + if (ret) + return ret; if (!is_wmax_thermal_code(out_data)) continue; From 951a04ab3a2db4029debfa48d380ef834b93207e Mon Sep 17 00:00:00 2001 From: Tamura Dai Date: Thu, 17 Apr 2025 10:16:05 +0900 Subject: [PATCH 0834/2924] spi: spi-imx: Add check for spi_imx_setupxfer() Add check for the return value of spi_imx_setupxfer(). spi_imx->rx and spi_imx->tx function pointer can be NULL when spi_imx_setupxfer() return error, and make NULL pointer dereference. Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 Call trace: 0x0 spi_imx_pio_transfer+0x50/0xd8 spi_imx_transfer_one+0x18c/0x858 spi_transfer_one_message+0x43c/0x790 __spi_pump_transfer_message+0x238/0x5d4 __spi_sync+0x2b0/0x454 spi_write_then_read+0x11c/0x200 Signed-off-by: Tamura Dai Reviewed-by: Carlos Song Link: https://patch.msgid.link/20250417011700.14436-1-kirinode0@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-imx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 832d6e9009ebe7..c93d80a4d734ec 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -1695,9 +1695,12 @@ static int spi_imx_transfer_one(struct spi_controller *controller, struct spi_device *spi, struct spi_transfer *transfer) { + int ret; struct spi_imx_data *spi_imx = spi_controller_get_devdata(spi->controller); - spi_imx_setupxfer(spi, transfer); + ret = spi_imx_setupxfer(spi, transfer); + if (ret < 0) + return ret; transfer->effective_speed_hz = spi_imx->spi_bus_clk; /* flush rxfifo before transfer */ From 45e00e36718902d81bdaebb37b3a8244e685bc48 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V (Arm)" Date: Tue, 8 Apr 2025 09:03:51 +0530 Subject: [PATCH 0835/2924] iommu/arm-smmu-v3: Add missing S2FWB feature detection Commit 67e4fe398513 ("iommu/arm-smmu-v3: Use S2FWB for NESTED domains") introduced S2FWB usage but omitted the corresponding feature detection. As a result, vIOMMU allocation fails on FVP in arm_vsmmu_alloc(), due to the following check: if (!arm_smmu_master_canwbs(master) && !(smmu->features & ARM_SMMU_FEAT_S2FWB)) return ERR_PTR(-EOPNOTSUPP); This patch adds the missing detection logic to prevent allocation failure when S2FWB is supported. Fixes: 67e4fe398513 ("iommu/arm-smmu-v3: Use S2FWB for NESTED domains") Signed-off-by: Aneesh Kumar K.V (Arm) Reviewed-by: Jason Gunthorpe Reviewed-by: Nicolin Chen Reviewed-by: Pranjal Shrivastava Link: https://lore.kernel.org/r/20250408033351.1012411-1-aneesh.kumar@kernel.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index b4c21aaed1266a..5467f85dd4631a 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -4429,6 +4429,8 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3); if (FIELD_GET(IDR3_RIL, reg)) smmu->features |= ARM_SMMU_FEAT_RANGE_INV; + if (FIELD_GET(IDR3_FWB, reg)) + smmu->features |= ARM_SMMU_FEAT_S2FWB; /* IDR5 */ reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5); From 12f78021973ae422564b234136c702a305932d73 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Sat, 12 Apr 2025 10:23:54 +1000 Subject: [PATCH 0836/2924] iommu/arm-smmu-v3: Fix pgsize_bit for sva domains UBSan caught a bug with IOMMU SVA domains, where the reported exponent value in __arm_smmu_tlb_inv_range() was >= 64. __arm_smmu_tlb_inv_range() uses the domain's pgsize_bitmap to compute the number of pages to invalidate and the invalidation range. Currently arm_smmu_sva_domain_alloc() does not setup the iommu domain's pgsize_bitmap. This leads to __ffs() on the value returning 64 and that leads to undefined behaviour w.r.t. shift operations Fix this by initializing the iommu_domain's pgsize_bitmap to PAGE_SIZE. Effectively the code needs to use the smallest page size for invalidation Cc: stable@vger.kernel.org Fixes: eb6c97647be2 ("iommu/arm-smmu-v3: Avoid constructing invalid range commands") Suggested-by: Jason Gunthorpe Signed-off-by: Balbir Singh Cc: Jean-Philippe Brucker Cc: Will Deacon Cc: Robin Murphy Cc: Joerg Roedel Cc: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20250412002354.3071449-1-balbirs@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 9ba596430e7cf9..980cc6b33c430f 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -411,6 +411,12 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, return ERR_CAST(smmu_domain); smmu_domain->domain.type = IOMMU_DOMAIN_SVA; smmu_domain->domain.ops = &arm_smmu_sva_domain_ops; + + /* + * Choose page_size as the leaf page size for invalidation when + * ARM_SMMU_FEAT_RANGE_INV is present + */ + smmu_domain->domain.pgsize_bitmap = PAGE_SIZE; smmu_domain->smmu = smmu; ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain, From b00d24997a11c10d3e420614f0873b83ce358a34 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 15 Apr 2025 11:56:20 -0700 Subject: [PATCH 0837/2924] iommu/arm-smmu-v3: Fix iommu_device_probe bug due to duplicated stream ids ASPEED VGA card has two built-in devices: 0008:06:00.0 PCI bridge: ASPEED Technology, Inc. AST1150 PCI-to-PCI Bridge (rev 06) 0008:07:00.0 VGA compatible controller: ASPEED Technology, Inc. ASPEED Graphics Family (rev 52) Its toplogy looks like this: +-[0008:00]---00.0-[01-09]--+-00.0-[02-09]--+-00.0-[03]----00.0 Sandisk Corp Device 5017 | +-01.0-[04]-- | +-02.0-[05]----00.0 NVIDIA Corporation Device | +-03.0-[06-07]----00.0-[07]----00.0 ASPEED Technology, Inc. ASPEED Graphics Family | +-04.0-[08]----00.0 Renesas Technology Corp. uPD720201 USB 3.0 Host Controller | \-05.0-[09]----00.0 Realtek Semiconductor Co., Ltd. RTL8111/8168/8411 PCI Express Gigabit Ethernet Controller \-00.1 PMC-Sierra Inc. Device 4028 The IORT logic populaties two identical IDs into the fwspec->ids array via DMA aliasing in iort_pci_iommu_init() called by pci_for_each_dma_alias(). Though the SMMU driver had been able to handle this situation since commit 563b5cbe334e ("iommu/arm-smmu-v3: Cope with duplicated Stream IDs"), that got broken by the later commit cdf315f907d4 ("iommu/arm-smmu-v3: Maintain a SID->device structure"), which ended up with allocating separate streams with the same stuffing. On a kernel prior to v6.15-rc1, there has been an overlooked warning: pci 0008:07:00.0: vgaarb: setting as boot VGA device pci 0008:07:00.0: vgaarb: bridge control possible pci 0008:07:00.0: vgaarb: VGA device added: decodes=io+mem,owns=none,locks=none pcieport 0008:06:00.0: Adding to iommu group 14 ast 0008:07:00.0: stream 67328 already in tree <===== WARNING ast 0008:07:00.0: enabling device (0002 -> 0003) ast 0008:07:00.0: Using default configuration ast 0008:07:00.0: AST 2600 detected ast 0008:07:00.0: [drm] Using analog VGA ast 0008:07:00.0: [drm] dram MCLK=396 Mhz type=1 bus_width=16 [drm] Initialized ast 0.1.0 for 0008:07:00.0 on minor 0 ast 0008:07:00.0: [drm] fb0: astdrmfb frame buffer device With v6.15-rc, since the commit bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe path"), the error returned with the warning is moved to the SMMU device probe flow: arm_smmu_probe_device+0x15c/0x4c0 __iommu_probe_device+0x150/0x4f8 probe_iommu_group+0x44/0x80 bus_for_each_dev+0x7c/0x100 bus_iommu_probe+0x48/0x1a8 iommu_device_register+0xb8/0x178 arm_smmu_device_probe+0x1350/0x1db0 which then fails the entire SMMU driver probe: pci 0008:06:00.0: Adding to iommu group 21 pci 0008:07:00.0: stream 67328 already in tree arm-smmu-v3 arm-smmu-v3.9.auto: Failed to register iommu arm-smmu-v3 arm-smmu-v3.9.auto: probe with driver arm-smmu-v3 failed with error -22 Since SMMU driver had been already expecting a potential duplicated Stream ID in arm_smmu_install_ste_for_dev(), change the arm_smmu_insert_master() routine to ignore a duplicated ID from the fwspec->sids array as well. Note: this has been failing the iommu_device_probe() since 2021, although a recent iommu commit in v6.15-rc1 that moves iommu_device_probe() started to fail the SMMU driver probe. Since nobody has cared about DMA Alias support, leave that as it was but fix the fundamental iommu_device_probe() breakage. Fixes: cdf315f907d4 ("iommu/arm-smmu-v3: Maintain a SID->device structure") Cc: stable@vger.kernel.org Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20250415185620.504299-1-nicolinc@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 5467f85dd4631a..0826b6bdf327fd 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3388,6 +3388,7 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, mutex_lock(&smmu->streams_mutex); for (i = 0; i < fwspec->num_ids; i++) { struct arm_smmu_stream *new_stream = &master->streams[i]; + struct rb_node *existing; u32 sid = fwspec->ids[i]; new_stream->id = sid; @@ -3398,10 +3399,20 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, break; /* Insert into SID tree */ - if (rb_find_add(&new_stream->node, &smmu->streams, - arm_smmu_streams_cmp_node)) { - dev_warn(master->dev, "stream %u already in tree\n", - sid); + existing = rb_find_add(&new_stream->node, &smmu->streams, + arm_smmu_streams_cmp_node); + if (existing) { + struct arm_smmu_master *existing_master = + rb_entry(existing, struct arm_smmu_stream, node) + ->master; + + /* Bridged PCI devices may end up with duplicated IDs */ + if (existing_master == master) + continue; + + dev_warn(master->dev, + "stream %u already in tree from dev %s\n", sid, + dev_name(existing_master->dev)); ret = -EINVAL; break; } From 2d00c34d665bc23f5200962dbc4ac1919317036c Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 11 Apr 2025 15:09:14 +0100 Subject: [PATCH 0838/2924] iommu/arm-smmu-v3: Fail aliasing StreamIDs more gracefully We've never supported StreamID aliasing between devices, and as such they will never have had functioning DMA, but this is not fatal to the SMMU itself. Although aliasing between hard-wired platform device StreamIDs would tend to raise questions about the whole system, in practice it's far more likely to occur relatively innocently due to legacy PCI bridges, where the underlying StreamID mappings are still perfectly reasonable. As such, return a more benign -ENODEV when failing probe for such an unsupported device (and log a more obvious error message), so that it doesn't break the entire SMMU probe now that bus_iommu_probe() runs in the right order and can propagate that error back. The end result is still that the device doesn't get an IOMMU group and probably won't work, same as before. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/39d54e49c8476efc4653e352150d44b185d6d50f.1744380554.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 0826b6bdf327fd..48d910399a1ba6 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3411,9 +3411,9 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, continue; dev_warn(master->dev, - "stream %u already in tree from dev %s\n", sid, - dev_name(existing_master->dev)); - ret = -EINVAL; + "Aliasing StreamID 0x%x (from %s) unsupported, expect DMA to be broken\n", + sid, dev_name(existing_master->dev)); + ret = -ENODEV; break; } } From a5f5e1238f4ff919816f69e77d2537a48911767b Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Tue, 15 Apr 2025 10:41:34 +0000 Subject: [PATCH 0839/2924] perf/x86/intel: Don't clear perf metrics overflow bit unconditionally The below code would always unconditionally clear other status bits like perf metrics overflow bit once PEBS buffer overflows: status &= intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; This is incorrect. Perf metrics overflow bit should be cleared only when fixed counter 3 in PEBS counter group. Otherwise perf metrics overflow could be missed to handle. Closes: https://lore.kernel.org/all/20250225110012.GK31462@noisy.programming.kicks-ass.net/ Fixes: 7b2c05a15d29 ("perf/x86/intel: Generic support for hardware TopDown metrics") Signed-off-by: Dapeng Mi Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Kan Liang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250415104135.318169-1-dapeng1.mi@linux.intel.com --- arch/x86/events/intel/core.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 09d2d66c9f21fa..2b70a3adde2f83 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3049,7 +3049,6 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int bit; int handled = 0; - u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl); inc_irq_stat(apic_perf_irqs); @@ -3093,7 +3092,6 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) handled++; x86_pmu_handle_guest_pebs(regs, &data); static_call(x86_pmu_drain_pebs)(regs, &data); - status &= intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; /* * PMI throttle may be triggered, which stops the PEBS event. @@ -3104,6 +3102,15 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) */ if (pebs_enabled != cpuc->pebs_enabled) wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); + + /* + * Above PEBS handler (PEBS counters snapshotting) has updated fixed + * counter 3 and perf metrics counts if they are in counter group, + * unnecessary to update again. + */ + if (cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS] && + is_pebs_counter_event_group(cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS])) + status &= ~GLOBAL_STATUS_PERF_METRICS_OVF_BIT; } /* @@ -3123,6 +3130,8 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) static_call(intel_pmu_update_topdown_event)(NULL, NULL); } + status &= hybrid(cpuc->pmu, intel_ctrl); + /* * Checkpointed counters can lead to 'spurious' PMIs because the * rollback caused by the PMI will have cleared the overflow status From 71dcc11c2cd9e434c34a63154ecadca21c135ddd Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Tue, 15 Apr 2025 10:41:35 +0000 Subject: [PATCH 0840/2924] perf/x86/intel: Allow to update user space GPRs from PEBS records Currently when a user samples user space GPRs (--user-regs option) with PEBS, the user space GPRs actually always come from software PMI instead of from PEBS hardware. This leads to the sampled GPRs to possibly be inaccurate for single PEBS record case because of the skid between counter overflow and GPRs sampling on PMI. For the large PEBS case, it is even worse. If user sets the exclude_kernel attribute, large PEBS would be used to sample user space GPRs, but since PEBS GPRs group is not really enabled, it leads to all samples in the large PEBS record to share the same piece of user space GPRs, like this reproducer shows: $ perf record -e branches:pu --user-regs=ip,ax -c 100000 ./foo $ perf report -D | grep "AX" .... AX 0x000000003a0d4ead .... AX 0x000000003a0d4ead .... AX 0x000000003a0d4ead .... AX 0x000000003a0d4ead .... AX 0x000000003a0d4ead .... AX 0x000000003a0d4ead .... AX 0x000000003a0d4ead .... AX 0x000000003a0d4ead .... AX 0x000000003a0d4ead .... AX 0x000000003a0d4ead .... AX 0x000000003a0d4ead So enable GPRs group for user space GPRs sampling and prioritize reading GPRs from PEBS. If the PEBS sampled GPRs is not user space GPRs (single PEBS record case), perf_sample_regs_user() modifies them to user space GPRs. [ mingo: Clarified the changelog. ] Fixes: c22497f5838c ("perf/x86/intel: Support adaptive PEBS v4") Signed-off-by: Dapeng Mi Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250415104135.318169-2-dapeng1.mi@linux.intel.com --- arch/x86/events/intel/ds.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 1f7e1a692a7a48..18c3ab579b8b3e 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1399,8 +1399,10 @@ static u64 pebs_update_adaptive_cfg(struct perf_event *event) * + precise_ip < 2 for the non event IP * + For RTM TSX weight we need GPRs for the abort code. */ - gprs = (sample_type & PERF_SAMPLE_REGS_INTR) && - (attr->sample_regs_intr & PEBS_GP_REGS); + gprs = ((sample_type & PERF_SAMPLE_REGS_INTR) && + (attr->sample_regs_intr & PEBS_GP_REGS)) || + ((sample_type & PERF_SAMPLE_REGS_USER) && + (attr->sample_regs_user & PEBS_GP_REGS)); tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) && ((attr->config & INTEL_ARCH_EVENT_MASK) == @@ -2123,7 +2125,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, regs->flags &= ~PERF_EFLAGS_EXACT; } - if (sample_type & PERF_SAMPLE_REGS_INTR) + if (sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)) adaptive_pebs_save_regs(regs, gprs); } From 7950de14ff5fd8da355d872b887ee8b7b5a1f327 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 15 Apr 2025 11:44:07 +0000 Subject: [PATCH 0841/2924] perf/x86/intel: Add Panther Lake support From PMU's perspective, Panther Lake is similar to the previous generation Lunar Lake. Both are hybrid platforms, with e-core and p-core. The key differences are the ARCH PEBS feature and several new events. The ARCH PEBS is supported in the following patches. The new events will be supported later in perf tool. Share the code path with the Lunar Lake. Only update the name. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20250415114428.341182-2-dapeng1.mi@linux.intel.com --- arch/x86/events/intel/core.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 2b70a3adde2f83..00dfe487bd00ea 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -7314,8 +7314,17 @@ __init int intel_pmu_init(void) name = "meteorlake_hybrid"; break; + case INTEL_PANTHERLAKE_L: + pr_cont("Pantherlake Hybrid events, "); + name = "pantherlake_hybrid"; + goto lnl_common; + case INTEL_LUNARLAKE_M: case INTEL_ARROWLAKE: + pr_cont("Lunarlake Hybrid events, "); + name = "lunarlake_hybrid"; + + lnl_common: intel_pmu_init_hybrid(hybrid_big_small); x86_pmu.pebs_latency_data = lnl_latency_data; @@ -7337,8 +7346,6 @@ __init int intel_pmu_init(void) intel_pmu_init_skt(&pmu->pmu); intel_pmu_pebs_data_source_lnl(); - pr_cont("Lunarlake Hybrid events, "); - name = "lunarlake_hybrid"; break; case INTEL_ARROWLAKE_H: From 1ac571288822253db32196c49f240739148417e3 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 17 Apr 2025 10:32:31 +0100 Subject: [PATCH 0842/2924] io_uring/rsrc: don't skip offset calculation Don't optimise for requests with offset=0. Large registered buffers are the preference and hence the user is likely to pass an offset, and the adjustments are not expensive and will be made even cheaper in following patches. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/1c2beb20470ee3c886a363d4d8340d3790db19f3.1744882081.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/rsrc.c | 75 ++++++++++++++++++++++++------------------------- 1 file changed, 37 insertions(+), 38 deletions(-) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index b36c8825550eff..4d62897d1c89e9 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -1036,6 +1036,7 @@ static int io_import_fixed(int ddir, struct iov_iter *iter, struct io_mapped_ubuf *imu, u64 buf_addr, size_t len) { + const struct bio_vec *bvec; size_t offset; int ret; @@ -1054,47 +1055,45 @@ static int io_import_fixed(int ddir, struct iov_iter *iter, offset = buf_addr - imu->ubuf; iov_iter_bvec(iter, ddir, imu->bvec, imu->nr_bvecs, offset + len); - if (offset) { - /* - * Don't use iov_iter_advance() here, as it's really slow for - * using the latter parts of a big fixed buffer - it iterates - * over each segment manually. We can cheat a bit here for user - * registered nodes, because we know that: - * - * 1) it's a BVEC iter, we set it up - * 2) all bvecs are the same in size, except potentially the - * first and last bvec - * - * So just find our index, and adjust the iterator afterwards. - * If the offset is within the first bvec (or the whole first - * bvec, just use iov_iter_advance(). This makes it easier - * since we can just skip the first segment, which may not - * be folio_size aligned. - */ - const struct bio_vec *bvec = imu->bvec; + /* + * Don't use iov_iter_advance() here, as it's really slow for + * using the latter parts of a big fixed buffer - it iterates + * over each segment manually. We can cheat a bit here for user + * registered nodes, because we know that: + * + * 1) it's a BVEC iter, we set it up + * 2) all bvecs are the same in size, except potentially the + * first and last bvec + * + * So just find our index, and adjust the iterator afterwards. + * If the offset is within the first bvec (or the whole first + * bvec, just use iov_iter_advance(). This makes it easier + * since we can just skip the first segment, which may not + * be folio_size aligned. + */ + bvec = imu->bvec; - /* - * Kernel buffer bvecs, on the other hand, don't necessarily - * have the size property of user registered ones, so we have - * to use the slow iter advance. - */ - if (offset < bvec->bv_len) { - iter->count -= offset; - iter->iov_offset = offset; - } else if (imu->is_kbuf) { - iov_iter_advance(iter, offset); - } else { - unsigned long seg_skip; + /* + * Kernel buffer bvecs, on the other hand, don't necessarily + * have the size property of user registered ones, so we have + * to use the slow iter advance. + */ + if (offset < bvec->bv_len) { + iter->count -= offset; + iter->iov_offset = offset; + } else if (imu->is_kbuf) { + iov_iter_advance(iter, offset); + } else { + unsigned long seg_skip; - /* skip first vec */ - offset -= bvec->bv_len; - seg_skip = 1 + (offset >> imu->folio_shift); + /* skip first vec */ + offset -= bvec->bv_len; + seg_skip = 1 + (offset >> imu->folio_shift); - iter->bvec += seg_skip; - iter->nr_segs -= seg_skip; - iter->count -= bvec->bv_len + offset; - iter->iov_offset = offset & ((1UL << imu->folio_shift) - 1); - } + iter->bvec += seg_skip; + iter->nr_segs -= seg_skip; + iter->count -= bvec->bv_len + offset; + iter->iov_offset = offset & ((1UL << imu->folio_shift) - 1); } return 0; From 50169d07548441e3033b9bbaa06e573e7224f140 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 17 Apr 2025 10:32:32 +0100 Subject: [PATCH 0843/2924] io_uring/rsrc: separate kbuf offset adjustments Kernel registered buffers are special because segments are not uniform in size, and we have a bunch of optimisations based on that uniformity for normal buffers. Handle kbuf separately, it'll be cleaner this way. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/4e9e5990b0ab5aee723c0be5cd9b5bcf810375f9.1744882081.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/rsrc.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 4d62897d1c89e9..fddde8ffe81e98 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -1048,11 +1048,14 @@ static int io_import_fixed(int ddir, struct iov_iter *iter, if (!(imu->dir & (1 << ddir))) return -EFAULT; - /* - * Might not be a start of buffer, set size appropriately - * and advance us to the beginning. - */ offset = buf_addr - imu->ubuf; + + if (imu->is_kbuf) { + iov_iter_bvec(iter, ddir, imu->bvec, imu->nr_bvecs, offset + len); + iov_iter_advance(iter, offset); + return 0; + } + iov_iter_bvec(iter, ddir, imu->bvec, imu->nr_bvecs, offset + len); /* @@ -1072,17 +1075,9 @@ static int io_import_fixed(int ddir, struct iov_iter *iter, * be folio_size aligned. */ bvec = imu->bvec; - - /* - * Kernel buffer bvecs, on the other hand, don't necessarily - * have the size property of user registered ones, so we have - * to use the slow iter advance. - */ if (offset < bvec->bv_len) { iter->count -= offset; iter->iov_offset = offset; - } else if (imu->is_kbuf) { - iov_iter_advance(iter, offset); } else { unsigned long seg_skip; From 59852ebad954c8a3ac8b746930c2ea60febe797a Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 17 Apr 2025 10:32:33 +0100 Subject: [PATCH 0844/2924] io_uring/rsrc: refactor io_import_fixed io_import_fixed is a mess. Even though we know the final len of the iterator, we still assign offset + len and do some magic after to correct for that. Do offset calculation first and finalise it with iov_iter_bvec at the end. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/2d5107fed24f8b23245ef2ede9a5a7f7c426df61.1744882081.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/rsrc.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index fddde8ffe81e98..5cf854318b1dd0 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -1037,6 +1037,7 @@ static int io_import_fixed(int ddir, struct iov_iter *iter, u64 buf_addr, size_t len) { const struct bio_vec *bvec; + unsigned nr_segs; size_t offset; int ret; @@ -1056,8 +1057,6 @@ static int io_import_fixed(int ddir, struct iov_iter *iter, return 0; } - iov_iter_bvec(iter, ddir, imu->bvec, imu->nr_bvecs, offset + len); - /* * Don't use iov_iter_advance() here, as it's really slow for * using the latter parts of a big fixed buffer - it iterates @@ -1067,30 +1066,21 @@ static int io_import_fixed(int ddir, struct iov_iter *iter, * 1) it's a BVEC iter, we set it up * 2) all bvecs are the same in size, except potentially the * first and last bvec - * - * So just find our index, and adjust the iterator afterwards. - * If the offset is within the first bvec (or the whole first - * bvec, just use iov_iter_advance(). This makes it easier - * since we can just skip the first segment, which may not - * be folio_size aligned. */ bvec = imu->bvec; - if (offset < bvec->bv_len) { - iter->count -= offset; - iter->iov_offset = offset; - } else { + if (offset >= bvec->bv_len) { unsigned long seg_skip; /* skip first vec */ offset -= bvec->bv_len; seg_skip = 1 + (offset >> imu->folio_shift); - - iter->bvec += seg_skip; - iter->nr_segs -= seg_skip; - iter->count -= bvec->bv_len + offset; - iter->iov_offset = offset & ((1UL << imu->folio_shift) - 1); + bvec += seg_skip; + offset &= (1UL << imu->folio_shift) - 1; } + nr_segs = imu->nr_bvecs - (bvec - imu->bvec); + iov_iter_bvec(iter, ddir, bvec, nr_segs, len); + iter->iov_offset = offset; return 0; } From 80c7378f94cf193cb3bd2101bbcd5aea78d0e211 Mon Sep 17 00:00:00 2001 From: Nitesh Shetty Date: Thu, 17 Apr 2025 10:32:34 +0100 Subject: [PATCH 0845/2924] io_uring/rsrc: send exact nr_segs for fixed buffer Sending exact nr_segs, avoids bio split check and processing in block layer, which takes around 5%[1] of overall CPU utilization. In our setup, we see overall improvement of IOPS from 7.15M to 7.65M [2] and 5% less CPU utilization. [1] 3.52% io_uring [kernel.kallsyms] [k] bio_split_rw_at 1.42% io_uring [kernel.kallsyms] [k] bio_split_rw 0.62% io_uring [kernel.kallsyms] [k] bio_submit_split [2] sudo taskset -c 0,1 ./t/io_uring -b512 -d128 -c32 -s32 -p1 -F1 -B1 -n2 -r4 /dev/nvme0n1 /dev/nvme1n1 Signed-off-by: Nitesh Shetty [Pavel: fixed for kbuf, rebased and reworked on top of cleanups] Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/7a1a49a8d053bd617c244291d63dbfbc07afde36.1744882081.git.asml.silence@gmail.com [axboe: fold in fix factoring in buf reg offset] Signed-off-by: Jens Axboe --- io_uring/rsrc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 5cf854318b1dd0..0c6d7e7415c8b7 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -1037,6 +1037,7 @@ static int io_import_fixed(int ddir, struct iov_iter *iter, u64 buf_addr, size_t len) { const struct bio_vec *bvec; + size_t folio_mask; unsigned nr_segs; size_t offset; int ret; @@ -1067,6 +1068,7 @@ static int io_import_fixed(int ddir, struct iov_iter *iter, * 2) all bvecs are the same in size, except potentially the * first and last bvec */ + folio_mask = (1UL << imu->folio_shift) - 1; bvec = imu->bvec; if (offset >= bvec->bv_len) { unsigned long seg_skip; @@ -1075,10 +1077,9 @@ static int io_import_fixed(int ddir, struct iov_iter *iter, offset -= bvec->bv_len; seg_skip = 1 + (offset >> imu->folio_shift); bvec += seg_skip; - offset &= (1UL << imu->folio_shift) - 1; + offset &= folio_mask; } - - nr_segs = imu->nr_bvecs - (bvec - imu->bvec); + nr_segs = (offset + len + bvec->bv_offset + folio_mask) >> imu->folio_shift; iov_iter_bvec(iter, ddir, bvec, nr_segs, len); iter->iov_offset = offset; return 0; From 8dee308e4c01dea48fc104d37f92d5b58c50b96c Mon Sep 17 00:00:00 2001 From: Pavel Paklov Date: Tue, 25 Mar 2025 09:22:44 +0000 Subject: [PATCH 0846/2924] iommu/amd: Fix potential buffer overflow in parse_ivrs_acpihid There is a string parsing logic error which can lead to an overflow of hid or uid buffers. Comparing ACPIID_LEN against a total string length doesn't take into account the lengths of individual hid and uid buffers so the check is insufficient in some cases. For example if the length of hid string is 4 and the length of the uid string is 260, the length of str will be equal to ACPIID_LEN + 1 but uid string will overflow uid buffer which size is 256. The same applies to the hid string with length 13 and uid string with length 250. Check the length of hid and uid strings separately to prevent buffer overflow. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: ca3bf5d47cec ("iommu/amd: Introduces ivrs_acpihid kernel parameter") Cc: stable@vger.kernel.org Signed-off-by: Pavel Paklov Link: https://lore.kernel.org/r/20250325092259.392844-1-Pavel.Paklov@cyberprotect.ru Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index dd9e26b7b71848..14aa0d77df26df 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3664,6 +3664,14 @@ static int __init parse_ivrs_acpihid(char *str) while (*uid == '0' && *(uid + 1)) uid++; + if (strlen(hid) >= ACPIHID_HID_LEN) { + pr_err("Invalid command line: hid is too long\n"); + return 1; + } else if (strlen(uid) >= ACPIHID_UID_LEN) { + pr_err("Invalid command line: uid is too long\n"); + return 1; + } + i = early_acpihid_map_size++; memcpy(early_acpihid_map[i].hid, hid, strlen(hid)); memcpy(early_acpihid_map[i].uid, uid, strlen(uid)); From 30a3f2f3e4bd6335b727c83c08a982d969752bc1 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Mon, 14 Apr 2025 12:16:35 -0700 Subject: [PATCH 0847/2924] iommu: Fix two issues in iommu_copy_struct_from_user() In the review for iommu_copy_struct_to_user() helper, Matt pointed out that a NULL pointer should be rejected prior to dereferencing it: https://lore.kernel.org/all/86881827-8E2D-461C-BDA3-FA8FD14C343C@nvidia.com And Alok pointed out a typo at the same time: https://lore.kernel.org/all/480536af-6830-43ce-a327-adbd13dc3f1d@oracle.com Since both issues were copied from iommu_copy_struct_from_user(), fix them first in the current header. Fixes: e9d36c07bb78 ("iommu: Add iommu_copy_struct_from_user helper") Cc: stable@vger.kernel.org Signed-off-by: Nicolin Chen Reviewed-by: Kevin Tian Acked-by: Alok Tiwari Reviewed-by: Matthew R. Ochs Link: https://lore.kernel.org/r/20250414191635.450472-1-nicolinc@nvidia.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index ccce8a751e2a55..3a8d35d41fdad5 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -440,10 +440,10 @@ static inline int __iommu_copy_struct_from_user( void *dst_data, const struct iommu_user_data *src_data, unsigned int data_type, size_t data_len, size_t min_len) { - if (src_data->type != data_type) - return -EINVAL; if (WARN_ON(!dst_data || !src_data)) return -EINVAL; + if (src_data->type != data_type) + return -EINVAL; if (src_data->len < min_len || data_len < src_data->len) return -EINVAL; return copy_struct_from_user(dst_data, data_len, src_data->uptr, @@ -456,8 +456,8 @@ static inline int __iommu_copy_struct_from_user( * include/uapi/linux/iommufd.h * @user_data: Pointer to a struct iommu_user_data for user space data info * @data_type: The data type of the @kdst. Must match with @user_data->type - * @min_last: The last memember of the data structure @kdst points in the - * initial version. + * @min_last: The last member of the data structure @kdst points in the initial + * version. * Return 0 for success, otherwise -error. */ #define iommu_copy_struct_from_user(kdst, user_data, data_type, min_last) \ From 4f1492efb495bcef34c9ee8a94af81e6cea5abf4 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 16 Apr 2025 15:36:08 +0800 Subject: [PATCH 0848/2924] iommu/vt-d: Revert ATS timing change to fix boot failure Commit <5518f239aff1> ("iommu/vt-d: Move scalable mode ATS enablement to probe path") changed the PCI ATS enablement logic to run earlier, specifically before the default domain attachment. On some client platforms, this change resulted in boot failures, causing the kernel to panic with the following message and call trace: Kernel panic - not syncing: DMAR hardware is malfunctioning CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.14.0-rc3+ #175 Call Trace: dump_stack_lvl+0x6f/0xb0 dump_stack+0x10/0x16 panic+0x10a/0x2b7 iommu_enable_translation.cold+0xc/0xc intel_iommu_init+0xe39/0xec0 ? trace_hardirqs_on+0x1e/0xd0 ? __pfx_pci_iommu_init+0x10/0x10 pci_iommu_init+0xd/0x40 do_one_initcall+0x5b/0x390 kernel_init_freeable+0x26d/0x2b0 ? __pfx_kernel_init+0x10/0x10 kernel_init+0x15/0x120 ret_from_fork+0x35/0x60 ? __pfx_kernel_init+0x10/0x10 ret_from_fork_asm+0x1a/0x30 RIP: 1f0f:0x0 Code: Unable to access opcode bytes at 0xffffffffffffffd6. RSP: 0000:0000000000000000 EFLAGS: 841f0f2e66 ORIG_RAX: 1f0f2e6600000000 RAX: 0000000000000000 RBX: 1f0f2e6600000000 RCX: 2e66000000000084 RDX: 0000000000841f0f RSI: 000000841f0f2e66 RDI: 00841f0f2e660000 RBP: 00841f0f2e660000 R08: 00841f0f2e660000 R09: 000000841f0f2e66 R10: 0000000000841f0f R11: 2e66000000000084 R12: 000000841f0f2e66 R13: 0000000000841f0f R14: 2e66000000000084 R15: 1f0f2e6600000000 ---[ end Kernel panic - not syncing: DMAR hardware is malfunctioning ]--- Fix this by reverting the timing change for ATS enablement introduced by the offending commit and restoring the previous behavior. Fixes: 5518f239aff1 ("iommu/vt-d: Move scalable mode ATS enablement to probe path") Reported-by: Jarkko Nikula Closes: https://lore.kernel.org/linux-iommu/01b9c72f-460d-4f77-b696-54c6825babc9@linux.intel.com/ Signed-off-by: Lu Baolu Tested-by: Jarkko Nikula Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20250416073608.1799578-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index b29da2d96d0b18..e60b699e7b8cee 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3785,6 +3785,22 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) intel_iommu_debugfs_create_dev(info); + return &iommu->iommu; +free_table: + intel_pasid_free_table(dev); +clear_rbtree: + device_rbtree_remove(info); +free: + kfree(info); + + return ERR_PTR(ret); +} + +static void intel_iommu_probe_finalize(struct device *dev) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + /* * The PCIe spec, in its wisdom, declares that the behaviour of the * device is undefined if you enable PASID support after ATS support. @@ -3792,22 +3808,12 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) * we can't yet know if we're ever going to use it. */ if (info->pasid_supported && - !pci_enable_pasid(pdev, info->pasid_supported & ~1)) + !pci_enable_pasid(to_pci_dev(dev), info->pasid_supported & ~1)) info->pasid_enabled = 1; - if (sm_supported(iommu)) + if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) iommu_enable_pci_ats(info); iommu_enable_pci_pri(info); - - return &iommu->iommu; -free_table: - intel_pasid_free_table(dev); -clear_rbtree: - device_rbtree_remove(info); -free: - kfree(info); - - return ERR_PTR(ret); } static void intel_iommu_release_device(struct device *dev) @@ -4391,6 +4397,7 @@ const struct iommu_ops intel_iommu_ops = { .domain_alloc_sva = intel_svm_domain_alloc, .domain_alloc_nested = intel_iommu_domain_alloc_nested, .probe_device = intel_iommu_probe_device, + .probe_finalize = intel_iommu_probe_finalize, .release_device = intel_iommu_release_device, .get_resv_regions = intel_iommu_get_resv_regions, .device_group = intel_iommu_device_group, From 8ca9590c39b69b55a8de63d2b21b0d44f523b43a Mon Sep 17 00:00:00 2001 From: Yemike Abhilash Chandra Date: Thu, 17 Apr 2025 13:25:21 +0530 Subject: [PATCH 0849/2924] dmaengine: ti: k3-udma: Use cap_mask directly from dma_device structure instead of a local copy Currently, a local dma_cap_mask_t variable is used to store device cap_mask within udma_of_xlate(). However, the DMA_PRIVATE flag in the device cap_mask can get cleared when the last channel is released. This can happen right after storing the cap_mask locally in udma_of_xlate(), and subsequent dma_request_channel() can fail due to mismatch in the cap_mask. Fix this by removing the local dma_cap_mask_t variable and directly using the one from the dma_device structure. Fixes: 25dcb5dd7b7c ("dmaengine: ti: New driver for K3 UDMA") Cc: stable@vger.kernel.org Signed-off-by: Vaishnav Achath Acked-by: Peter Ujfalusi Reviewed-by: Udit Kumar Signed-off-by: Yemike Abhilash Chandra Link: https://lore.kernel.org/r/20250417075521.623651-1-y-abhilashchandra@ti.com Signed-off-by: Vinod Koul --- drivers/dma/ti/k3-udma.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index b223a7aacb0cf1..08ed8cd7f1dd03 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -4246,7 +4246,6 @@ static struct dma_chan *udma_of_xlate(struct of_phandle_args *dma_spec, struct of_dma *ofdma) { struct udma_dev *ud = ofdma->of_dma_data; - dma_cap_mask_t mask = ud->ddev.cap_mask; struct udma_filter_param filter_param; struct dma_chan *chan; @@ -4278,7 +4277,7 @@ static struct dma_chan *udma_of_xlate(struct of_phandle_args *dma_spec, } } - chan = __dma_request_channel(&mask, udma_dma_filter_fn, &filter_param, + chan = __dma_request_channel(&ud->ddev.cap_mask, udma_dma_filter_fn, &filter_param, ofdma->of_node); if (!chan) { dev_err(ud->dev, "get channel fail in %s.\n", __func__); From fca280992af8c2fbd511bc43f65abb4a17363f2f Mon Sep 17 00:00:00 2001 From: Ronald Wahl Date: Mon, 14 Apr 2025 19:31:13 +0200 Subject: [PATCH 0850/2924] dmaengine: ti: k3-udma: Add missing locking Recent kernels complain about a missing lock in k3-udma.c when the lock validator is enabled: [ 4.128073] WARNING: CPU: 0 PID: 746 at drivers/dma/ti/../virt-dma.h:169 udma_start.isra.0+0x34/0x238 [ 4.137352] CPU: 0 UID: 0 PID: 746 Comm: kworker/0:3 Not tainted 6.12.9-arm64 #28 [ 4.144867] Hardware name: pp-v12 (DT) [ 4.148648] Workqueue: events udma_check_tx_completion [ 4.153841] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 4.160834] pc : udma_start.isra.0+0x34/0x238 [ 4.165227] lr : udma_start.isra.0+0x30/0x238 [ 4.169618] sp : ffffffc083cabcf0 [ 4.172963] x29: ffffffc083cabcf0 x28: 0000000000000000 x27: ffffff800001b005 [ 4.180167] x26: ffffffc0812f0000 x25: 0000000000000000 x24: 0000000000000000 [ 4.187370] x23: 0000000000000001 x22: 00000000e21eabe9 x21: ffffff8000fa0670 [ 4.194571] x20: ffffff8001b6bf00 x19: ffffff8000fa0430 x18: ffffffc083b95030 [ 4.201773] x17: 0000000000000000 x16: 00000000f0000000 x15: 0000000000000048 [ 4.208976] x14: 0000000000000048 x13: 0000000000000000 x12: 0000000000000001 [ 4.216179] x11: ffffffc08151a240 x10: 0000000000003ea1 x9 : ffffffc08046ab68 [ 4.223381] x8 : ffffffc083cabac0 x7 : ffffffc081df3718 x6 : 0000000000029fc8 [ 4.230583] x5 : ffffffc0817ee6d8 x4 : 0000000000000bc0 x3 : 0000000000000000 [ 4.237784] x2 : 0000000000000000 x1 : 00000000001fffff x0 : 0000000000000000 [ 4.244986] Call trace: [ 4.247463] udma_start.isra.0+0x34/0x238 [ 4.251509] udma_check_tx_completion+0xd0/0xdc [ 4.256076] process_one_work+0x244/0x3fc [ 4.260129] process_scheduled_works+0x6c/0x74 [ 4.264610] worker_thread+0x150/0x1dc [ 4.268398] kthread+0xd8/0xe8 [ 4.271492] ret_from_fork+0x10/0x20 [ 4.275107] irq event stamp: 220 [ 4.278363] hardirqs last enabled at (219): [] _raw_spin_unlock_irq+0x38/0x50 [ 4.287183] hardirqs last disabled at (220): [] el1_dbg+0x24/0x50 [ 4.294879] softirqs last enabled at (182): [] handle_softirqs+0x1c0/0x3cc [ 4.303437] softirqs last disabled at (177): [] __do_softirq+0x1c/0x28 [ 4.311559] ---[ end trace 0000000000000000 ]--- This commit adds the missing locking. Fixes: 25dcb5dd7b7c ("dmaengine: ti: New driver for K3 UDMA") Cc: Peter Ujfalusi Cc: Vignesh Raghavendra Cc: Vinod Koul Cc: dmaengine@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Ronald Wahl Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20250414173113.80677-1-rwahl@gmx.de Signed-off-by: Vinod Koul --- drivers/dma/ti/k3-udma.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index 08ed8cd7f1dd03..b6255c0601bb29 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -1091,8 +1091,11 @@ static void udma_check_tx_completion(struct work_struct *work) u32 residue_diff; ktime_t time_diff; unsigned long delay; + unsigned long flags; while (1) { + spin_lock_irqsave(&uc->vc.lock, flags); + if (uc->desc) { /* Get previous residue and time stamp */ residue_diff = uc->tx_drain.residue; @@ -1127,6 +1130,8 @@ static void udma_check_tx_completion(struct work_struct *work) break; } + spin_unlock_irqrestore(&uc->vc.lock, flags); + usleep_range(ktime_to_us(delay), ktime_to_us(delay) + 10); continue; @@ -1143,6 +1148,8 @@ static void udma_check_tx_completion(struct work_struct *work) break; } + + spin_unlock_irqrestore(&uc->vc.lock, flags); } static irqreturn_t udma_ring_irq_handler(int irq, void *data) From 6bc2b6c6f16d8e60de518d26da1bc6bc436cf71d Mon Sep 17 00:00:00 2001 From: Bo-Cun Chen Date: Wed, 16 Apr 2025 01:50:46 +0100 Subject: [PATCH 0851/2924] net: ethernet: mtk_eth_soc: reapply mdc divider on reset In the current method, the MDC divider was reset to the default setting of 2.5MHz after the NETSYS SER. Therefore, we need to reapply the MDC divider configuration function in mtk_hw_init() after reset. Fixes: c0a440031d431 ("net: ethernet: mtk_eth_soc: set MDIO bus clock frequency") Signed-off-by: Bo-Cun Chen Signed-off-by: Daniel Golle Link: https://patch.msgid.link/8ab7381447e6cdcb317d5b5a6ddd90a1734efcb0.1744764277.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 39 +++++++++++++-------- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 1 + 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 43197b28b3e745..1a235283b0e9bb 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -871,9 +871,25 @@ static const struct phylink_mac_ops mtk_phylink_ops = { .mac_enable_tx_lpi = mtk_mac_enable_tx_lpi, }; +static void mtk_mdio_config(struct mtk_eth *eth) +{ + u32 val; + + /* Configure MDC Divider */ + val = FIELD_PREP(PPSC_MDC_CFG, eth->mdc_divider); + + /* Configure MDC Turbo Mode */ + if (mtk_is_netsys_v3_or_greater(eth)) + mtk_m32(eth, 0, MISC_MDC_TURBO, MTK_MAC_MISC_V3); + else + val |= PPSC_MDC_TURBO; + + mtk_m32(eth, PPSC_MDC_CFG, val, MTK_PPSC); +} + static int mtk_mdio_init(struct mtk_eth *eth) { - unsigned int max_clk = 2500000, divider; + unsigned int max_clk = 2500000; struct device_node *mii_np; int ret; u32 val; @@ -908,20 +924,9 @@ static int mtk_mdio_init(struct mtk_eth *eth) } max_clk = val; } - divider = min_t(unsigned int, DIV_ROUND_UP(MDC_MAX_FREQ, max_clk), 63); - - /* Configure MDC Turbo Mode */ - if (mtk_is_netsys_v3_or_greater(eth)) - mtk_m32(eth, 0, MISC_MDC_TURBO, MTK_MAC_MISC_V3); - - /* Configure MDC Divider */ - val = FIELD_PREP(PPSC_MDC_CFG, divider); - if (!mtk_is_netsys_v3_or_greater(eth)) - val |= PPSC_MDC_TURBO; - mtk_m32(eth, PPSC_MDC_CFG, val, MTK_PPSC); - - dev_dbg(eth->dev, "MDC is running on %d Hz\n", MDC_MAX_FREQ / divider); - + eth->mdc_divider = min_t(unsigned int, DIV_ROUND_UP(MDC_MAX_FREQ, max_clk), 63); + mtk_mdio_config(eth); + dev_dbg(eth->dev, "MDC is running on %d Hz\n", MDC_MAX_FREQ / eth->mdc_divider); ret = of_mdiobus_register(eth->mii_bus, mii_np); err_put_node: @@ -3974,6 +3979,10 @@ static int mtk_hw_init(struct mtk_eth *eth, bool reset) else mtk_hw_reset(eth); + /* No MT7628/88 support yet */ + if (reset && !MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) + mtk_mdio_config(eth); + if (mtk_is_netsys_v3_or_greater(eth)) { /* Set FE to PDMAv2 if necessary */ val = mtk_r32(eth, MTK_FE_GLO_MISC); diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 90a377ab4359ea..39709649ea8d14 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -1271,6 +1271,7 @@ struct mtk_eth { struct clk *clks[MTK_CLK_MAX]; struct mii_bus *mii_bus; + unsigned int mdc_divider; struct work_struct pending_work; unsigned long state; From 6b02eb372c6776c9abb8bc81cf63f96039c24664 Mon Sep 17 00:00:00 2001 From: Bo-Cun Chen Date: Wed, 16 Apr 2025 01:51:07 +0100 Subject: [PATCH 0852/2924] net: ethernet: mtk_eth_soc: correct the max weight of the queue limit for 100Mbps Without this patch, the maximum weight of the queue limit will be incorrect when linked at 100Mbps due to an apparent typo. Fixes: f63959c7eec31 ("net: ethernet: mtk_eth_soc: implement multi-queue support for per-port queues") Signed-off-by: Bo-Cun Chen Signed-off-by: Daniel Golle Link: https://patch.msgid.link/74111ba0bdb13743313999ed467ce564e8189006.1744764277.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 1a235283b0e9bb..5a3cfb8908a17e 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -734,7 +734,7 @@ static void mtk_set_queue_speed(struct mtk_eth *eth, unsigned int idx, case SPEED_100: val |= MTK_QTX_SCH_MAX_RATE_EN | FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 103) | - FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 3); + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 3) | FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 1); break; case SPEED_1000: @@ -757,7 +757,7 @@ static void mtk_set_queue_speed(struct mtk_eth *eth, unsigned int idx, case SPEED_100: val |= MTK_QTX_SCH_MAX_RATE_EN | FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 1) | - FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 5); + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 5) | FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 1); break; case SPEED_1000: From 1b66124135f5f8640bd540fadda4b20cdd23114b Mon Sep 17 00:00:00 2001 From: Bo-Cun Chen Date: Wed, 16 Apr 2025 01:51:25 +0100 Subject: [PATCH 0853/2924] net: ethernet: mtk_eth_soc: revise QDMA packet scheduler settings The QDMA packet scheduler suffers from a performance issue. Fix this by picking up changes from MediaTek's SDK which change to use Token Bucket instead of Leaky Bucket and fix the SPEED_1000 configuration. Fixes: 160d3a9b1929 ("net: ethernet: mtk_eth_soc: introduce MTK_NETSYS_V2 support") Signed-off-by: Bo-Cun Chen Signed-off-by: Daniel Golle Link: https://patch.msgid.link/18040f60f9e2f5855036b75b28c4332a2d2ebdd8.1744764277.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 5a3cfb8908a17e..bdb98c9d8b1c19 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -762,8 +762,8 @@ static void mtk_set_queue_speed(struct mtk_eth *eth, unsigned int idx, break; case SPEED_1000: val |= MTK_QTX_SCH_MAX_RATE_EN | - FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 10) | - FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 5) | + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 1) | + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 6) | FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 10); break; default: @@ -3320,7 +3320,7 @@ static int mtk_start_dma(struct mtk_eth *eth) if (mtk_is_netsys_v2_or_greater(eth)) val |= MTK_MUTLI_CNT | MTK_RESV_BUF | MTK_WCOMP_EN | MTK_DMAD_WR_WDONE | - MTK_CHK_DDONE_EN | MTK_LEAKY_BUCKET_EN; + MTK_CHK_DDONE_EN; else val |= MTK_RX_BT_32DWORDS; mtk_w32(eth, val, reg_map->qdma.glo_cfg); From cfde542df7dd51d26cf667f4af497878ddffd85a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 15 Apr 2025 11:58:08 +0200 Subject: [PATCH 0854/2924] cpufreq/sched: Fix the usage of CPUFREQ_NEED_UPDATE_LIMITS Commit 8e461a1cb43d ("cpufreq: schedutil: Fix superfluous updates caused by need_freq_update") modified sugov_should_update_freq() to set the need_freq_update flag only for drivers with CPUFREQ_NEED_UPDATE_LIMITS set, but that flag generally needs to be set when the policy limits change because the driver callback may need to be invoked for the new limits to take effect. However, if the return value of cpufreq_driver_resolve_freq() after applying the new limits is still equal to the previously selected frequency, the driver callback needs to be invoked only in the case when CPUFREQ_NEED_UPDATE_LIMITS is set (which means that the driver specifically wants its callback to be invoked every time the policy limits change). Update the code accordingly to avoid missing policy limits changes for drivers without CPUFREQ_NEED_UPDATE_LIMITS. Fixes: 8e461a1cb43d ("cpufreq: schedutil: Fix superfluous updates caused by need_freq_update") Closes: https://lore.kernel.org/lkml/Z_Tlc6Qs-tYpxWYb@linaro.org/ Reported-by: Stephan Gerhold Signed-off-by: Rafael J. Wysocki Reviewed-by: Christian Loehle Link: https://patch.msgid.link/3010358.e9J7NaK4W3@rjwysocki.net --- kernel/sched/cpufreq_schedutil.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 1a19d69b91ed3c..b713ce0a570206 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -83,7 +83,7 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) if (unlikely(sg_policy->limits_changed)) { sg_policy->limits_changed = false; - sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); + sg_policy->need_freq_update = true; return true; } @@ -95,10 +95,22 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time, unsigned int next_freq) { - if (sg_policy->need_freq_update) + if (sg_policy->need_freq_update) { sg_policy->need_freq_update = false; - else if (sg_policy->next_freq == next_freq) + /* + * The policy limits have changed, but if the return value of + * cpufreq_driver_resolve_freq() after applying the new limits + * is still equal to the previously selected frequency, the + * driver callback need not be invoked unless the driver + * specifically wants that to happen on every update of the + * policy limits. + */ + if (sg_policy->next_freq == next_freq && + !cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS)) + return false; + } else if (sg_policy->next_freq == next_freq) { return false; + } sg_policy->next_freq = next_freq; sg_policy->last_freq_update_time = time; From 79443a7e9da3c9f68290a8653837e23aba0fa89f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 15 Apr 2025 11:59:15 +0200 Subject: [PATCH 0855/2924] cpufreq/sched: Explicitly synchronize limits_changed flag handling The handling of the limits_changed flag in struct sugov_policy needs to be explicitly synchronized to ensure that cpufreq policy limits updates will not be missed in some cases. Without that synchronization it is theoretically possible that the limits_changed update in sugov_should_update_freq() will be reordered with respect to the reads of the policy limits in cpufreq_driver_resolve_freq() and in that case, if the limits_changed update in sugov_limits() clobbers the one in sugov_should_update_freq(), the new policy limits may not take effect for a long time. Likewise, the limits_changed update in sugov_limits() may theoretically get reordered with respect to the updates of the policy limits in cpufreq_set_policy() and if sugov_should_update_freq() runs between them, the policy limits change may be missed. To ensure that the above situations will not take place, add memory barriers preventing the reordering in question from taking place and add READ_ONCE() and WRITE_ONCE() annotations around all of the limits_changed flag updates to prevent the compiler from messing up with that code. Fixes: 600f5badb78c ("cpufreq: schedutil: Don't skip freq update when limits change") Cc: 5.3+ # 5.3+ Signed-off-by: Rafael J. Wysocki Reviewed-by: Christian Loehle Link: https://patch.msgid.link/3376719.44csPzL39Z@rjwysocki.net --- kernel/sched/cpufreq_schedutil.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index b713ce0a570206..bcab867575bb5b 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -81,9 +81,20 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) if (!cpufreq_this_cpu_can_update(sg_policy->policy)) return false; - if (unlikely(sg_policy->limits_changed)) { - sg_policy->limits_changed = false; + if (unlikely(READ_ONCE(sg_policy->limits_changed))) { + WRITE_ONCE(sg_policy->limits_changed, false); sg_policy->need_freq_update = true; + + /* + * The above limits_changed update must occur before the reads + * of policy limits in cpufreq_driver_resolve_freq() or a policy + * limits update might be missed, so use a memory barrier to + * ensure it. + * + * This pairs with the write memory barrier in sugov_limits(). + */ + smp_mb(); + return true; } @@ -377,7 +388,7 @@ static inline bool sugov_hold_freq(struct sugov_cpu *sg_cpu) { return false; } static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu) { if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_min) - sg_cpu->sg_policy->limits_changed = true; + WRITE_ONCE(sg_cpu->sg_policy->limits_changed, true); } static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu, @@ -883,7 +894,16 @@ static void sugov_limits(struct cpufreq_policy *policy) mutex_unlock(&sg_policy->work_lock); } - sg_policy->limits_changed = true; + /* + * The limits_changed update below must take place before the updates + * of policy limits in cpufreq_set_policy() or a policy limits update + * might be missed, so use a memory barrier to ensure it. + * + * This pairs with the memory barrier in sugov_should_update_freq(). + */ + smp_wmb(); + + WRITE_ONCE(sg_policy->limits_changed, true); } struct cpufreq_governor schedutil_gov = { From 75da043d8f880bde8616fd81638c4e2cdb186a08 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 15 Apr 2025 12:00:52 +0200 Subject: [PATCH 0856/2924] cpufreq/sched: Set need_freq_update in ignore_dl_rate_limit() Notice that ignore_dl_rate_limit() need not piggy back on the limits_changed handling to achieve its goal (which is to enforce a frequency update before its due time). Namely, if sugov_should_update_freq() is updated to check sg_policy->need_freq_update and return 'true' if it is set when sg_policy->limits_changed is not set, ignore_dl_rate_limit() may set the former directly instead of setting the latter, so it can avoid hitting the memory barrier in sugov_should_update_freq(). Update the code accordingly. Signed-off-by: Rafael J. Wysocki Reviewed-by: Christian Loehle Link: https://patch.msgid.link/10666429.nUPlyArG6x@rjwysocki.net --- kernel/sched/cpufreq_schedutil.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index bcab867575bb5b..816f07f9d30f1a 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -95,6 +95,9 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) */ smp_mb(); + return true; + } else if (sg_policy->need_freq_update) { + /* ignore_dl_rate_limit() wants a new frequency to be found. */ return true; } @@ -388,7 +391,7 @@ static inline bool sugov_hold_freq(struct sugov_cpu *sg_cpu) { return false; } static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu) { if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_min) - WRITE_ONCE(sg_cpu->sg_policy->limits_changed, true); + sg_cpu->sg_policy->need_freq_update = true; } static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu, From 7491cdf46b5cbdf123fc84fbe0a07e9e3d7b7620 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 16 Apr 2025 16:12:37 +0200 Subject: [PATCH 0857/2924] cpufreq: Avoid using inconsistent policy->min and policy->max Since cpufreq_driver_resolve_freq() can run in parallel with cpufreq_set_policy() and there is no synchronization between them, the former may access policy->min and policy->max while the latter is updating them and it may see intermediate values of them due to the way the update is carried out. Also the compiler is free to apply any optimizations it wants both to the stores in cpufreq_set_policy() and to the loads in cpufreq_driver_resolve_freq() which may result in additional inconsistencies. To address this, use WRITE_ONCE() when updating policy->min and policy->max in cpufreq_set_policy() and use READ_ONCE() for reading them in cpufreq_driver_resolve_freq(). Moreover, rearrange the update in cpufreq_set_policy() to avoid storing intermediate values in policy->min and policy->max with the help of the observation that their new values are expected to be properly ordered upfront. Also modify cpufreq_driver_resolve_freq() to take the possible reverse ordering of policy->min and policy->max, which may happen depending on the ordering of operations when this function and cpufreq_set_policy() run concurrently, into account by always honoring the max when it turns out to be less than the min (in case it comes from thermal throttling or similar). Fixes: 151717690694 ("cpufreq: Make policy min/max hard requirements") Cc: 5.16+ # 5.16+ Signed-off-by: Rafael J. Wysocki Reviewed-by: Christian Loehle Acked-by: Viresh Kumar Link: https://patch.msgid.link/5907080.DvuYhMxLoT@rjwysocki.net --- drivers/cpufreq/cpufreq.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 3841c9da6cac36..acf19b0042bb6a 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -540,8 +540,6 @@ static unsigned int __resolve_freq(struct cpufreq_policy *policy, { unsigned int idx; - target_freq = clamp_val(target_freq, policy->min, policy->max); - if (!policy->freq_table) return target_freq; @@ -565,7 +563,22 @@ static unsigned int __resolve_freq(struct cpufreq_policy *policy, unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, unsigned int target_freq) { - return __resolve_freq(policy, target_freq, CPUFREQ_RELATION_LE); + unsigned int min = READ_ONCE(policy->min); + unsigned int max = READ_ONCE(policy->max); + + /* + * If this function runs in parallel with cpufreq_set_policy(), it may + * read policy->min before the update and policy->max after the update + * or the other way around, so there is no ordering guarantee. + * + * Resolve this by always honoring the max (in case it comes from + * thermal throttling or similar). + */ + if (unlikely(min > max)) + min = max; + + return __resolve_freq(policy, clamp_val(target_freq, min, max), + CPUFREQ_RELATION_LE); } EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq); @@ -2384,6 +2397,7 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, if (cpufreq_disabled()) return -ENODEV; + target_freq = clamp_val(target_freq, policy->min, policy->max); target_freq = __resolve_freq(policy, target_freq, relation); pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n", @@ -2708,11 +2722,15 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, * Resolve policy min/max to available frequencies. It ensures * no frequency resolution will neither overshoot the requested maximum * nor undershoot the requested minimum. + * + * Avoid storing intermediate values in policy->max or policy->min and + * compiler optimizations around them because they may be accessed + * concurrently by cpufreq_driver_resolve_freq() during the update. */ - policy->min = new_data.min; - policy->max = new_data.max; - policy->min = __resolve_freq(policy, policy->min, CPUFREQ_RELATION_L); - policy->max = __resolve_freq(policy, policy->max, CPUFREQ_RELATION_H); + WRITE_ONCE(policy->max, __resolve_freq(policy, new_data.max, CPUFREQ_RELATION_H)); + new_data.min = __resolve_freq(policy, new_data.min, CPUFREQ_RELATION_L); + WRITE_ONCE(policy->min, new_data.min > policy->max ? policy->max : new_data.min); + trace_cpu_frequency_limits(policy); cpufreq_update_pressure(policy); From 87c259a7a359e73e6c52c68fcbec79988999b4e6 Mon Sep 17 00:00:00 2001 From: gaoxu Date: Thu, 17 Apr 2025 07:30:00 +0000 Subject: [PATCH 0858/2924] cgroup: Fix compilation issue due to cgroup_mutex not being exported MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When adding folio_memcg function call in the zram module for Android16-6.12, the following error occurs during compilation: ERROR: modpost: "cgroup_mutex" [../soc-repo/zram.ko] undefined! This error is caused by the indirect call to lockdep_is_held(&cgroup_mutex) within folio_memcg. The export setting for cgroup_mutex is controlled by the CONFIG_PROVE_RCU macro. If CONFIG_LOCKDEP is enabled while CONFIG_PROVE_RCU is not, this compilation error will occur. To resolve this issue, add a parallel macro CONFIG_LOCKDEP control to ensure cgroup_mutex is properly exported when needed. Signed-off-by: gao xu Acked-by: Michal Koutný Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 3caf2cd86e65f8..9c1bf7f7c812fc 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -90,7 +90,7 @@ DEFINE_MUTEX(cgroup_mutex); DEFINE_SPINLOCK(css_set_lock); -#ifdef CONFIG_PROVE_RCU +#if (defined CONFIG_PROVE_RCU || defined CONFIG_LOCKDEP) EXPORT_SYMBOL_GPL(cgroup_mutex); EXPORT_SYMBOL_GPL(css_set_lock); #endif From 1bf67c8fdbda21fadd564a12dbe2b13c1ea5eda7 Mon Sep 17 00:00:00 2001 From: "T.J. Mercier" Date: Wed, 16 Apr 2025 21:17:51 +0000 Subject: [PATCH 0859/2924] cgroup/cpuset-v1: Add missing support for cpuset_v2_mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Android has mounted the v1 cpuset controller using filesystem type "cpuset" (not "cgroup") since 2015 [1], and depends on the resulting behavior where the controller name is not added as a prefix for cgroupfs files. [2] Later, a problem was discovered where cpu hotplug onlining did not affect the cpuset/cpus files, which Android carried an out-of-tree patch to address for a while. An attempt was made to upstream this patch, but the recommendation was to use the "cpuset_v2_mode" mount option instead. [3] An effort was made to do so, but this fails with "cgroup: Unknown parameter 'cpuset_v2_mode'" because commit e1cba4b85daa ("cgroup: Add mount flag to enable cpuset to use v2 behavior in v1 cgroup") did not update the special cased cpuset_mount(), and only the cgroup (v1) filesystem type was updated. Add parameter parsing to the cpuset filesystem type so that cpuset_v2_mode works like the cgroup filesystem type: $ mkdir /dev/cpuset $ mount -t cpuset -ocpuset_v2_mode none /dev/cpuset $ mount|grep cpuset none on /dev/cpuset type cgroup (rw,relatime,cpuset,noprefix,cpuset_v2_mode,release_agent=/sbin/cpuset_release_agent) [1] https://cs.android.com/android/_/android/platform/system/core/+/b769c8d24fd7be96f8968aa4c80b669525b930d3 [2] https://cs.android.com/android/platform/superproject/main/+/main:system/core/libprocessgroup/setup/cgroup_map_write.cpp;drc=2dac5d89a0f024a2d0cc46a80ba4ee13472f1681;l=192 [3] https://lore.kernel.org/lkml/f795f8be-a184-408a-0b5a-553d26061385@redhat.com/T/ Fixes: e1cba4b85daa ("cgroup: Add mount flag to enable cpuset to use v2 behavior in v1 cgroup") Signed-off-by: T.J. Mercier Acked-by: Waiman Long Reviewed-by: Kamalesh Babulal Acked-by: Michal Koutný Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 9c1bf7f7c812fc..63e5b90da1f303 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2353,9 +2353,37 @@ static struct file_system_type cgroup2_fs_type = { }; #ifdef CONFIG_CPUSETS_V1 +enum cpuset_param { + Opt_cpuset_v2_mode, +}; + +static const struct fs_parameter_spec cpuset_fs_parameters[] = { + fsparam_flag ("cpuset_v2_mode", Opt_cpuset_v2_mode), + {} +}; + +static int cpuset_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + struct fs_parse_result result; + int opt; + + opt = fs_parse(fc, cpuset_fs_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_cpuset_v2_mode: + ctx->flags |= CGRP_ROOT_CPUSET_V2_MODE; + return 0; + } + return -EINVAL; +} + static const struct fs_context_operations cpuset_fs_context_ops = { .get_tree = cgroup1_get_tree, .free = cgroup_fs_context_free, + .parse_param = cpuset_parse_param, }; /* @@ -2392,6 +2420,7 @@ static int cpuset_init_fs_context(struct fs_context *fc) static struct file_system_type cpuset_fs_type = { .name = "cpuset", .init_fs_context = cpuset_init_fs_context, + .parameters = cpuset_fs_parameters, .fs_flags = FS_USERNS_MOUNT, }; #endif From b419bed4f0a62c65a57dd495185821dd56bc435c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 16 Apr 2025 16:48:26 -0600 Subject: [PATCH 0860/2924] io_uring/rsrc: ensure segments counts are correct on kbuf buffers kbuf imports have the front offset adjusted and segments removed, but the tail segments are still included in the segment count that gets passed in the iov_iter. As the segments aren't necessarily all the same size, move importing to a separate helper and iterate the mapped length to get an exact count. Reviewed-by: Nitesh Shetty Signed-off-by: Jens Axboe --- io_uring/rsrc.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 0c6d7e7415c8b7..f80a77c4973f30 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -1032,6 +1032,26 @@ static int validate_fixed_range(u64 buf_addr, size_t len, return 0; } +static int io_import_kbuf(int ddir, struct iov_iter *iter, + struct io_mapped_ubuf *imu, size_t len, size_t offset) +{ + size_t count = len + offset; + + iov_iter_bvec(iter, ddir, imu->bvec, imu->nr_bvecs, count); + iov_iter_advance(iter, offset); + + if (count < imu->len) { + const struct bio_vec *bvec = iter->bvec; + + while (len > bvec->bv_len) { + len -= bvec->bv_len; + bvec++; + } + iter->nr_segs = 1 + bvec - iter->bvec; + } + return 0; +} + static int io_import_fixed(int ddir, struct iov_iter *iter, struct io_mapped_ubuf *imu, u64 buf_addr, size_t len) @@ -1052,11 +1072,8 @@ static int io_import_fixed(int ddir, struct iov_iter *iter, offset = buf_addr - imu->ubuf; - if (imu->is_kbuf) { - iov_iter_bvec(iter, ddir, imu->bvec, imu->nr_bvecs, offset + len); - iov_iter_advance(iter, offset); - return 0; - } + if (imu->is_kbuf) + return io_import_kbuf(ddir, iter, imu, len, offset); /* * Don't use iov_iter_advance() here, as it's really slow for From 261592ba06aa44001ab95fd47bafa4225bab25cf Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 17 Apr 2025 14:09:56 -0400 Subject: [PATCH 0861/2924] bcachefs: Fix snapshotting a subvolume, then renaming it Subvolume roots and the dirents that point to them are special; they don't obey the normal snapshot versioning rules because they cross snapshot boundaries. We don't keep around older versions of subvolume dirents on rename - we don't need to, because subvolume dirents are only visible in the parent subvolume, and we wouldn't be able to match up the different dirent and inode versions due to crossing the snapshot ID boundary. That means that when we rename a subvolume, that's been snapshotted, the older version of the subvolume root will become dangling - it won't have a dirent that points to it. That's expected, we just need to tell fsck that this is ok. Fixes: https://github.com/koverstreet/bcachefs/issues/856 Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 18308f3d64a1cc..7b25cedd3e40b5 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -321,6 +321,31 @@ static inline bool inode_should_reattach(struct bch_inode_unpacked *inode) inode->bi_subvol == BCACHEFS_ROOT_SUBVOL) return false; + /* + * Subvolume roots are special: older versions of subvolume roots may be + * disconnected, it's only the newest version that matters. + * + * We only keep a single dirent pointing to a subvolume root, i.e. + * older versions of snapshots will not have a different dirent pointing + * to the same subvolume root. + * + * This is because dirents that point to subvolumes are only visible in + * the parent subvolume - versioning is not needed - and keeping them + * around would break fsck, because when we're crossing subvolumes we + * don't have a consistent snapshot ID to do check the inode <-> dirent + * relationships. + * + * Thus, a subvolume root that's been renamed after a snapshot will have + * a disconnected older version - that's expected. + * + * Note that taking a snapshot always updates the root inode (to update + * the dirent backpointer), so a subvolume root inode with + * BCH_INODE_has_child_snapshot is never visible. + */ + if (inode->bi_subvol && + (inode->bi_flags & BCH_INODE_has_child_snapshot)) + return false; + return !inode->bi_dir && !(inode->bi_flags & BCH_INODE_unlinked); } @@ -1007,6 +1032,23 @@ static int check_inode_dirent_inode(struct btree_trans *trans, if (ret && !bch2_err_matches(ret, ENOENT)) return ret; + if ((ret || dirent_points_to_inode_nowarn(d, inode)) && + inode->bi_subvol && + (inode->bi_flags & BCH_INODE_has_child_snapshot)) { + /* Older version of a renamed subvolume root: we won't have a + * correct dirent for it. That's expected, see + * inode_should_reattach(). + * + * We don't clear the backpointer field when doing the rename + * because there might be arbitrarily many versions in older + * snapshots. + */ + inode->bi_dir = 0; + inode->bi_dir_offset = 0; + *write_inode = true; + goto out; + } + if (fsck_err_on(ret, trans, inode_points_to_missing_dirent, "inode points to missing dirent\n%s", @@ -1027,7 +1069,7 @@ static int check_inode_dirent_inode(struct btree_trans *trans, inode->bi_dir_offset = 0; *write_inode = true; } - +out: ret = 0; fsck_err: bch2_trans_iter_exit(trans, &dirent_iter); From 31d1139956112dd047a70b263f4d578921de779a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 17 Apr 2025 10:40:17 -0400 Subject: [PATCH 0862/2924] ftrace: Initialize variables for ftrace_startup/shutdown_subops() The reworking to fix and simplify the ftrace_startup_subops() and the ftrace_shutdown_subops() made it possible for the filter_hash and notrace_hash variables to be used uninitialized in a way that the compiler did not catch it. Initialize both filter_hash and notrace_hash to the EMPTY_HASH as that is what they should be if they never are used. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250417104017.3aea66c2@gandalf.local.home Reported-by: Venkat Rao Bagalkote Tested-by: Venkat Rao Bagalkote Fixes: 0ae6b8ce200d ("ftrace: Fix accounting of subop hashes") Closes: https://lore.kernel.org/all/1db64a42-626d-4b3a-be08-c65e47333ce2@linux.ibm.com/ Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a8a02868b43588..43394445390cd9 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3490,8 +3490,8 @@ static int add_next_hash(struct ftrace_hash **filter_hash, struct ftrace_hash ** */ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int command) { - struct ftrace_hash *filter_hash; - struct ftrace_hash *notrace_hash; + struct ftrace_hash *filter_hash = EMPTY_HASH; + struct ftrace_hash *notrace_hash = EMPTY_HASH; struct ftrace_hash *save_filter_hash; struct ftrace_hash *save_notrace_hash; int ret; @@ -3625,8 +3625,8 @@ static int rebuild_hashes(struct ftrace_hash **filter_hash, struct ftrace_hash * */ int ftrace_shutdown_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int command) { - struct ftrace_hash *filter_hash; - struct ftrace_hash *notrace_hash; + struct ftrace_hash *filter_hash = EMPTY_HASH; + struct ftrace_hash *notrace_hash = EMPTY_HASH; int ret; if (unlikely(ftrace_disabled)) From 08275e59a75047ba8fc0b9853bfdfc88a124763d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 17 Apr 2025 11:09:33 -0400 Subject: [PATCH 0863/2924] ftrace: Reinitialize hash to EMPTY_HASH after freeing There's several locations that free a ftrace hash pointer but may be referenced again. Reset them to EMPTY_HASH so that a u-a-f bug doesn't happen. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250417110933.20ab718b@gandalf.local.home Fixes: 0ae6b8ce200d ("ftrace: Fix accounting of subop hashes") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 43394445390cd9..d0e4a902bb4028 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1297,6 +1297,8 @@ void ftrace_free_filter(struct ftrace_ops *ops) return; free_ftrace_hash(ops->func_hash->filter_hash); free_ftrace_hash(ops->func_hash->notrace_hash); + ops->func_hash->filter_hash = EMPTY_HASH; + ops->func_hash->notrace_hash = EMPTY_HASH; } EXPORT_SYMBOL_GPL(ftrace_free_filter); @@ -3443,6 +3445,7 @@ static int add_next_hash(struct ftrace_hash **filter_hash, struct ftrace_hash ** size_bits); if (ret < 0) { free_ftrace_hash(*filter_hash); + *filter_hash = EMPTY_HASH; return ret; } } @@ -3472,6 +3475,7 @@ static int add_next_hash(struct ftrace_hash **filter_hash, struct ftrace_hash ** subops_hash->notrace_hash); if (ret < 0) { free_ftrace_hash(*notrace_hash); + *notrace_hash = EMPTY_HASH; return ret; } } From c45c585dde535e5ae2c363594bde3e05ce94a296 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 17 Apr 2025 13:59:39 -0400 Subject: [PATCH 0864/2924] ftrace: Free ftrace hashes after they are replaced in the subops code The subops processing creates new hashes when adding and removing subops. There were some places that the old hashes that were replaced were not freed and this caused some memory leaks. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250417135939.245b128d@gandalf.local.home Fixes: 0ae6b8ce200d ("ftrace: Fix accounting of subop hashes") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index d0e4a902bb4028..41dcfcf8b40ae7 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3609,6 +3609,9 @@ static int rebuild_hashes(struct ftrace_hash **filter_hash, struct ftrace_hash * } } + free_ftrace_hash(temp_hash.filter_hash); + free_ftrace_hash(temp_hash.notrace_hash); + temp_hash.filter_hash = *filter_hash; temp_hash.notrace_hash = *notrace_hash; } @@ -3703,8 +3706,11 @@ static int ftrace_hash_move_and_update_subops(struct ftrace_ops *subops, } ret = rebuild_hashes(&filter_hash, ¬race_hash, ops); - if (!ret) + if (!ret) { ret = ftrace_update_ops(ops, filter_hash, notrace_hash); + free_ftrace_hash(filter_hash); + free_ftrace_hash(notrace_hash); + } if (ret) { /* Put back the original hash */ From 92f1d3b40179b15630d72e2c6e4e25a899b67ba9 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sun, 13 Apr 2025 09:44:44 +0800 Subject: [PATCH 0865/2924] ftrace: fix incorrect hash size in register_ftrace_direct() The maximum of the ftrace hash bits is made fls(32) in register_ftrace_direct(), which seems illogical. So, we fix it by making the max hash bits FTRACE_HASH_MAX_BITS instead. Link: https://lore.kernel.org/20250413014444.36724-1-dongml2@chinatelecom.cn Fixes: d05cb470663a ("ftrace: Fix modification of direct_function hash while in use") Signed-off-by: Menglong Dong Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 41dcfcf8b40ae7..61130bb34d6c34 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5964,9 +5964,10 @@ int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) /* Make a copy hash to place the new and the old entries in */ size = hash->count + direct_functions->count; - if (size > 32) - size = 32; - new_hash = alloc_ftrace_hash(fls(size)); + size = fls(size); + if (size > FTRACE_HASH_MAX_BITS) + size = FTRACE_HASH_MAX_BITS; + new_hash = alloc_ftrace_hash(size); if (!new_hash) goto out_unlock; From 3b4e87e6a593d571183c414d81758624da01f2b9 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Sun, 13 Apr 2025 00:10:43 +0200 Subject: [PATCH 0866/2924] ftrace: Fix type of ftrace_graph_ent_entry.depth ftrace_graph_ent.depth is int, but ftrace_graph_ent_entry.depth is unsigned long. This confuses trace-cmd on 64-bit big-endian systems and makes it print a huge amount of spaces. Fix this by using unsigned int, which has a matching size, instead. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Sven Schnelle Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Alexander Gordeev Link: https://lore.kernel.org/20250412221847.17310-2-iii@linux.ibm.com Fixes: ff5c9c576e75 ("ftrace: Add support for function argument to graph tracer") Signed-off-by: Ilya Leoshkevich Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_entries.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index ee40d4e6ad1cc8..4ef4df6623a8d3 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -80,11 +80,11 @@ FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry, F_STRUCT( __field_struct( struct ftrace_graph_ent, graph_ent ) __field_packed( unsigned long, graph_ent, func ) - __field_packed( unsigned long, graph_ent, depth ) + __field_packed( unsigned int, graph_ent, depth ) __dynamic_array(unsigned long, args ) ), - F_printk("--> %ps (%lu)", (void *)__entry->func, __entry->depth) + F_printk("--> %ps (%u)", (void *)__entry->func, __entry->depth) ); #ifdef CONFIG_FUNCTION_GRAPH_RETADDR From 6405f5b70b1c240ffddef01c7a140498f47d4fe7 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 9 Apr 2025 21:59:34 -0700 Subject: [PATCH 0867/2924] drm/xe: Set LRC addresses before guc load The metadata saved in the ADS is read by GuC when it's initialized. Saving the addresses to the LRCs when they are populated is too late as GuC will keep using the old ones. This was causing GuC to use the RCS LRC for any engine class. It's not a big problem on a Linux-only scenario since the they are used by GuC only on media engines when the watchdog is triggered. However, in a virtualization scenario with Windows as the VF, it causes the wrong LRCs to be loaded as the watchdog is used for all engines. Fix it by letting guc_golden_lrc_init() initialize the metadata, like other *_init() functions, and later guc_golden_lrc_populate() to copy the LRCs to the right places. The former is called before the second GuC load, while the latter is called after LRCs have been recorded. Cc: Chee Yin Wong Cc: John Harrison Cc: Matt Roper Cc: Matthew Brost Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: # v6.11+ Reviewed-by: Matthew Brost Tested-by: Chee Yin Wong Link: https://lore.kernel.org/r/20250409-fix-guc-ads-v1-1-494135f7a5d0@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit c31a0b6402d15b530514eee9925adfcb8cfbb1c9) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc_ads.c | 75 ++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index e7c9e095a19f05..7031542a70cebc 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -490,24 +490,52 @@ static void fill_engine_enable_masks(struct xe_gt *gt, engine_enable_mask(gt, XE_ENGINE_CLASS_OTHER)); } -static void guc_prep_golden_lrc_null(struct xe_guc_ads *ads) +/* + * Write the offsets corresponding to the golden LRCs. The actual data is + * populated later by guc_golden_lrc_populate() + */ +static void guc_golden_lrc_init(struct xe_guc_ads *ads) { struct xe_device *xe = ads_to_xe(ads); + struct xe_gt *gt = ads_to_gt(ads); struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), offsetof(struct __guc_ads_blob, system_info)); - u8 guc_class; + size_t alloc_size, real_size; + u32 addr_ggtt, offset; + int class; + + offset = guc_ads_golden_lrc_offset(ads); + addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset; + + for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { + u8 guc_class; + + guc_class = xe_engine_class_to_guc_class(class); - for (guc_class = 0; guc_class <= GUC_MAX_ENGINE_CLASSES; ++guc_class) { if (!info_map_read(xe, &info_map, engine_enabled_masks[guc_class])) continue; + real_size = xe_gt_lrc_size(gt, class); + alloc_size = PAGE_ALIGN(real_size); + + /* + * This interface is slightly confusing. We need to pass the + * base address of the full golden context and the size of just + * the engine state, which is the section of the context image + * that starts after the execlists LRC registers. This is + * required to allow the GuC to restore just the engine state + * when a watchdog reset occurs. + * We calculate the engine state size by removing the size of + * what comes before it in the context image (which is identical + * on all engines). + */ ads_blob_write(ads, ads.eng_state_size[guc_class], - guc_ads_golden_lrc_size(ads) - - xe_lrc_skip_size(xe)); + real_size - xe_lrc_skip_size(xe)); ads_blob_write(ads, ads.golden_context_lrca[guc_class], - xe_bo_ggtt_addr(ads->bo) + - guc_ads_golden_lrc_offset(ads)); + addr_ggtt); + + addr_ggtt += alloc_size; } } @@ -857,7 +885,7 @@ void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads) xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); guc_policies_init(ads); - guc_prep_golden_lrc_null(ads); + guc_golden_lrc_init(ads); guc_mapping_table_init_invalid(gt, &info_map); guc_doorbell_init(ads); @@ -883,7 +911,7 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads) guc_policies_init(ads); fill_engine_enable_masks(gt, &info_map); guc_mmio_reg_state_init(ads); - guc_prep_golden_lrc_null(ads); + guc_golden_lrc_init(ads); guc_mapping_table_init(gt, &info_map); guc_capture_prep_lists(ads); guc_doorbell_init(ads); @@ -903,18 +931,22 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads) guc_ads_private_data_offset(ads)); } -static void guc_populate_golden_lrc(struct xe_guc_ads *ads) +/* + * After the golden LRC's are recorded for each engine class by the first + * submission, copy them to the ADS, as initialized earlier by + * guc_golden_lrc_init(). + */ +static void guc_golden_lrc_populate(struct xe_guc_ads *ads) { struct xe_device *xe = ads_to_xe(ads); struct xe_gt *gt = ads_to_gt(ads); struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), offsetof(struct __guc_ads_blob, system_info)); size_t total_size = 0, alloc_size, real_size; - u32 addr_ggtt, offset; + u32 offset; int class; offset = guc_ads_golden_lrc_offset(ads); - addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset; for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { u8 guc_class; @@ -931,26 +963,9 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads) alloc_size = PAGE_ALIGN(real_size); total_size += alloc_size; - /* - * This interface is slightly confusing. We need to pass the - * base address of the full golden context and the size of just - * the engine state, which is the section of the context image - * that starts after the execlists LRC registers. This is - * required to allow the GuC to restore just the engine state - * when a watchdog reset occurs. - * We calculate the engine state size by removing the size of - * what comes before it in the context image (which is identical - * on all engines). - */ - ads_blob_write(ads, ads.eng_state_size[guc_class], - real_size - xe_lrc_skip_size(xe)); - ads_blob_write(ads, ads.golden_context_lrca[guc_class], - addr_ggtt); - xe_map_memcpy_to(xe, ads_to_map(ads), offset, gt->default_lrc[class], real_size); - addr_ggtt += alloc_size; offset += alloc_size; } @@ -959,7 +974,7 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads) void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads) { - guc_populate_golden_lrc(ads); + guc_golden_lrc_populate(ads); } static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_offset) From 2577b202458cddff85cc154b1fe7f313e0d1f418 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 14 Apr 2025 14:25:40 +0100 Subject: [PATCH 0868/2924] drm/xe/userptr: fix notifier vs folio deadlock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User is reporting what smells like notifier vs folio deadlock, where migrate_pages_batch() on core kernel side is holding folio lock(s) and then interacting with the mappings of it, however those mappings are tied to some userptr, which means calling into the notifier callback and grabbing the notifier lock. With perfect timing it looks possible that the pages we pulled from the hmm fault can get sniped by migrate_pages_batch() at the same time that we are holding the notifier lock to mark the pages as accessed/dirty, but at this point we also want to grab the folio locks(s) to mark them as dirty, but if they are contended from notifier/migrate_pages_batch side then we deadlock since folio lock won't be dropped until we drop the notifier lock. Fortunately the mark_page_accessed/dirty is not really needed in the first place it seems and should have already been done by hmm fault, so just remove it. Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4765 Fixes: 0a98219bcc96 ("drm/xe/hmm: Don't dereference struct page pointers without notifier lock") Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Matthew Brost Cc: # v6.10+ Reviewed-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250414132539.26654-2-matthew.auld@intel.com (cherry picked from commit bd7c0cb695e87c0e43247be8196b4919edbe0e85) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_hmm.c | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c index c3cc0fa105e84a..57b71956ddf42a 100644 --- a/drivers/gpu/drm/xe/xe_hmm.c +++ b/drivers/gpu/drm/xe/xe_hmm.c @@ -19,29 +19,6 @@ static u64 xe_npages_in_range(unsigned long start, unsigned long end) return (end - start) >> PAGE_SHIFT; } -/** - * xe_mark_range_accessed() - mark a range is accessed, so core mm - * have such information for memory eviction or write back to - * hard disk - * @range: the range to mark - * @write: if write to this range, we mark pages in this range - * as dirty - */ -static void xe_mark_range_accessed(struct hmm_range *range, bool write) -{ - struct page *page; - u64 i, npages; - - npages = xe_npages_in_range(range->start, range->end); - for (i = 0; i < npages; i++) { - page = hmm_pfn_to_page(range->hmm_pfns[i]); - if (write) - set_page_dirty_lock(page); - - mark_page_accessed(page); - } -} - static int xe_alloc_sg(struct xe_device *xe, struct sg_table *st, struct hmm_range *range, struct rw_semaphore *notifier_sem) { @@ -331,7 +308,6 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, if (ret) goto out_unlock; - xe_mark_range_accessed(&hmm_range, write); userptr->sg = &userptr->sgt; xe_hmm_userptr_set_mapped(uvma); userptr->notifier_seq = hmm_range.notifier_seq; From 25583ad42d091819157832e894179200ba8b54ee Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 10 Apr 2025 17:27:17 +0100 Subject: [PATCH 0869/2924] drm/xe/dma_buf: stop relying on placement in unmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The is_vram() is checking the current placement, however if we consider exported VRAM with dynamic dma-buf, it looks possible for the xe driver to async evict the memory, notifying the importer, however importer does not have to call unmap_attachment() immediately, but rather just as "soon as possible", like when the dma-resv idles. Following from this we would then pipeline the move, attaching the fence to the manager, and then update the current placement. But when the unmap_attachment() runs at some later point we might see that is_vram() is now false, and take the complete wrong path when dma-unmapping the sg, leading to explosions. To fix this check if the sgl was mapping a struct page. v2: - The attachment can be mapped multiple times it seems, so we can't really rely on encoding something in the attachment->priv. Instead see if the page_link has an encoded struct page. For vram we expect this to be NULL. Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4563 Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Matthew Brost Cc: # v6.8+ Acked-by: Christian König Link: https://lore.kernel.org/r/20250410162716.159403-2-matthew.auld@intel.com (cherry picked from commit d755887f8e5a2a18e15e6632a5193e5feea18499) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_dma_buf.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index f67803e15a0e66..f7a20264ea3305 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -145,10 +145,7 @@ static void xe_dma_buf_unmap(struct dma_buf_attachment *attach, struct sg_table *sgt, enum dma_data_direction dir) { - struct dma_buf *dma_buf = attach->dmabuf; - struct xe_bo *bo = gem_to_xe_bo(dma_buf->priv); - - if (!xe_bo_is_vram(bo)) { + if (sg_page(sgt->sgl)) { dma_unmap_sgtable(attach->dev, sgt, dir, 0); sg_free_table(sgt); kfree(sgt); From 78600df8f593407a3df2d6c48c35d0ad203d7fb4 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 16 Apr 2025 13:16:22 -0700 Subject: [PATCH 0870/2924] drm/xe/pxp: do not queue unneeded terminations from debugfs The PXP terminate debugfs currently unconditionally simulates a termination, no matter what the HW status is. This is unneeded if PXP is not in use and can cause errors if the HW init hasn't completed yet. To solve these issues, we can simply limit the terminations to the cases where PXP is fully initialized and in use. v2: s/pxp_status/ready/ to avoid confusion with pxp->status (John) Fixes: 385a8015b214 ("drm/xe/pxp: Add PXP debugfs support") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4749 Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Reviewed-by: John Harrison Link: https://lore.kernel.org/r/20250416201622.1295369-1-daniele.ceraolospurio@intel.com (cherry picked from commit ba1f62a0cac84757ca35f4217e3cd3a2654233ae) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_pxp_debugfs.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pxp_debugfs.c b/drivers/gpu/drm/xe/xe_pxp_debugfs.c index ccfbacf08efc16..525a2f6bb0767c 100644 --- a/drivers/gpu/drm/xe/xe_pxp_debugfs.c +++ b/drivers/gpu/drm/xe/xe_pxp_debugfs.c @@ -66,9 +66,18 @@ static int pxp_terminate(struct seq_file *m, void *data) { struct xe_pxp *pxp = node_to_pxp(m->private); struct drm_printer p = drm_seq_file_printer(m); + int ready = xe_pxp_get_readiness_status(pxp); - if (!xe_pxp_is_enabled(pxp)) - return -ENODEV; + if (ready < 0) + return ready; /* disabled or error occurred */ + else if (!ready) + return -EBUSY; /* init still in progress */ + + /* no need for a termination if PXP is not active */ + if (pxp->status != XE_PXP_ACTIVE) { + drm_printf(&p, "PXP not active\n"); + return 0; + } /* simulate a termination interrupt */ spin_lock_irq(&pxp->xe->irq.lock); From 750d0ac001e85b754404178ee8ce01cbc76a03be Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Wed, 16 Apr 2025 14:54:48 +0200 Subject: [PATCH 0871/2924] MAINTAINERS: Add entry for Socfpga DWMAC ethernet glue driver Socfpga's DWMAC glue comes in a variety of flavours with multiple options when it comes to physical interfaces, making it not so easy to test. Having access to a Cyclone5 with RGMII as well as Lynx PCS variants, add myself as a maintainer to help with reviews and testing. Signed-off-by: Maxime Chevallier Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250416125453.306029-1-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8c7d796131a811..067f72e8af10ed 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3191,6 +3191,12 @@ M: Dinh Nguyen S: Maintained F: drivers/clk/socfpga/ +ARM/SOCFPGA DWMAC GLUE LAYER +M: Maxime Chevallier +S: Maintained +F: Documentation/devicetree/bindings/net/socfpga-dwmac.txt +F: drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c + ARM/SOCFPGA EDAC BINDINGS M: Matthew Gerlach S: Maintained From a8c5b0ed89a3f2c81c6ae0b041394e6eea0e7024 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 17 Apr 2025 18:30:03 -0400 Subject: [PATCH 0872/2924] tracing: Fix filter string testing The filter string testing uses strncpy_from_kernel/user_nofault() to retrieve the string to test the filter against. The if() statement was incorrect as it considered 0 as a fault, when it is only negative that it faulted. Running the following commands: # cd /sys/kernel/tracing # echo "filename.ustring ~ \"/proc*\"" > events/syscalls/sys_enter_openat/filter # echo 1 > events/syscalls/sys_enter_openat/enable # ls /proc/$$/maps # cat trace Would produce nothing, but with the fix it will produce something like: ls-1192 [007] ..... 8169.828333: sys_openat(dfd: ffffffffffffff9c, filename: 7efc18359904, flags: 80000, mode: 0) Link: https://lore.kernel.org/all/CAEf4BzbVPQ=BjWztmEwBPRKHUwNfKBkS3kce-Rzka6zvbQeVpg@mail.gmail.com/ Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Andrew Morton Link: https://lore.kernel.org/20250417183003.505835fb@gandalf.local.home Fixes: 77360f9bbc7e5 ("tracing: Add test for user space strings when filtering on string pointers") Reported-by: Andrii Nakryiko Reported-by: Mykyta Yatsenko Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_filter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 0993dfc1c5c165..2048560264bb48 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -808,7 +808,7 @@ static __always_inline char *test_string(char *str) kstr = ubuf->buffer; /* For safety, do not trust the string pointer */ - if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE)) + if (strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE) < 0) return NULL; return kstr; } @@ -827,7 +827,7 @@ static __always_inline char *test_ustring(char *str) /* user space address? */ ustr = (char __user *)str; - if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE)) + if (strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE) < 0) return NULL; return kstr; From 4067196a52278156d18d8d6fa7f43970611b1b49 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Sat, 29 Mar 2025 19:10:29 +0200 Subject: [PATCH 0873/2924] mm/page_alloc: fix deadlock on cpu_hotplug_lock in __accept_page() When the last page in the zone is accepted, __accept_page() calls static_branch_dec(). This function takes cpu_hotplug_lock, which can lead to a deadlock if the allocation occurs during CPU bringup path as _cpu_up() also takes the lock. To prevent this deadlock, defer static_branch_dec() to a workqueue. Call static_branch_dec() only when the workqueue is not yet initialized. Workqueues are initialized before CPU bring up, so this will not conflict with the first scenario. Link: https://lkml.kernel.org/r/20250329171030.3942298-1-kirill.shutemov@linux.intel.com Fixes: 55ad43e8ba0f ("mm: add a helper to accept page") Signed-off-by: Kirill A. Shutemov Reported-by: Srikanth Aithal Tested-by: Srikanth Aithal Cc: Dave Hansen Cc: Ashish Kalra Cc: David Hildenbrand Cc: "Edgecombe, Rick P" Cc: Mel Gorman Cc: "Mike Rapoport (IBM)" Cc: Thomas Lendacky Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 3 +++ mm/internal.h | 1 + mm/mm_init.c | 1 + mm/page_alloc.c | 28 ++++++++++++++++++++++++++-- 4 files changed, 31 insertions(+), 2 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 25e80b2ca7f41a..4c95fcc9e9df0c 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -967,6 +967,9 @@ struct zone { #ifdef CONFIG_UNACCEPTED_MEMORY /* Pages to be accepted. All pages on the list are MAX_PAGE_ORDER */ struct list_head unaccepted_pages; + + /* To be called once the last page in the zone is accepted */ + struct work_struct unaccepted_cleanup; #endif /* zone flags, see below */ diff --git a/mm/internal.h b/mm/internal.h index 50c2f590b2d04b..e9695baa592266 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1595,6 +1595,7 @@ unsigned long move_page_tables(struct pagetable_move_control *pmc); #ifdef CONFIG_UNACCEPTED_MEMORY void accept_page(struct page *page); +void unaccepted_cleanup_work(struct work_struct *work); #else /* CONFIG_UNACCEPTED_MEMORY */ static inline void accept_page(struct page *page) { diff --git a/mm/mm_init.c b/mm/mm_init.c index 84f14fa12d0ddb..9659689b8ace01 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1441,6 +1441,7 @@ static void __meminit zone_init_free_lists(struct zone *zone) #ifdef CONFIG_UNACCEPTED_MEMORY INIT_LIST_HEAD(&zone->unaccepted_pages); + INIT_WORK(&zone->unaccepted_cleanup, unaccepted_cleanup_work); #endif } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1715e34b91af4d..e506e365d6f18b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7191,6 +7191,11 @@ static DEFINE_STATIC_KEY_FALSE(zones_with_unaccepted_pages); static bool lazy_accept = true; +void unaccepted_cleanup_work(struct work_struct *work) +{ + static_branch_dec(&zones_with_unaccepted_pages); +} + static int __init accept_memory_parse(char *p) { if (!strcmp(p, "lazy")) { @@ -7229,8 +7234,27 @@ static void __accept_page(struct zone *zone, unsigned long *flags, __free_pages_ok(page, MAX_PAGE_ORDER, FPI_TO_TAIL); - if (last) - static_branch_dec(&zones_with_unaccepted_pages); + if (last) { + /* + * There are two corner cases: + * + * - If allocation occurs during the CPU bring up, + * static_branch_dec() cannot be used directly as + * it causes a deadlock on cpu_hotplug_lock. + * + * Instead, use schedule_work() to prevent deadlock. + * + * - If allocation occurs before workqueues are initialized, + * static_branch_dec() should be called directly. + * + * Workqueues are initialized before CPU bring up, so this + * will not conflict with the first scenario. + */ + if (system_wq) + schedule_work(&zone->unaccepted_cleanup); + else + unaccepted_cleanup_work(&zone->unaccepted_cleanup); + } } void accept_page(struct page *page) From 98b1917cdef92c29fc9a14060d5606c619050c2c Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 10 Apr 2025 11:10:20 +0200 Subject: [PATCH 0874/2924] fs/dax: fix folio splitting issue by resetting old folio order + _nr_pages Alison reports an issue with fsdax when large extends end up using large ZONE_DEVICE folios: [ 417.796271] BUG: kernel NULL pointer dereference, address: 0000000000000b00 [ 417.796982] #PF: supervisor read access in kernel mode [ 417.797540] #PF: error_code(0x0000) - not-present page [ 417.798123] PGD 2a5c5067 P4D 2a5c5067 PUD 2a5c6067 PMD 0 [ 417.798690] Oops: Oops: 0000 [#1] SMP NOPTI [ 417.799178] CPU: 5 UID: 0 PID: 1515 Comm: mmap Tainted: ... [ 417.800150] Tainted: [O]=OOT_MODULE [ 417.800583] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 [ 417.801358] RIP: 0010:__lruvec_stat_mod_folio+0x7e/0x250 [ 417.801948] Code: ... [ 417.803662] RSP: 0000:ffffc90002be3a08 EFLAGS: 00010206 [ 417.804234] RAX: 0000000000000000 RBX: 0000000000000200 RCX: 0000000000000002 [ 417.804984] RDX: ffffffff815652d7 RSI: 0000000000000000 RDI: ffffffff82a2beae [ 417.805689] RBP: ffffc90002be3a28 R08: 0000000000000000 R09: 0000000000000000 [ 417.806384] R10: ffffea0007000040 R11: ffff888376ffe000 R12: 0000000000000001 [ 417.807099] R13: 0000000000000012 R14: ffff88807fe4ab40 R15: ffff888029210580 [ 417.807801] FS: 00007f339fa7a740(0000) GS:ffff8881fa9b9000(0000) knlGS:0000000000000000 [ 417.808570] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 417.809193] CR2: 0000000000000b00 CR3: 000000002a4f0004 CR4: 0000000000370ef0 [ 417.809925] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 417.810622] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 417.811353] Call Trace: [ 417.811709] [ 417.812038] folio_add_file_rmap_ptes+0x143/0x230 [ 417.812566] insert_page_into_pte_locked+0x1ee/0x3c0 [ 417.813132] insert_page+0x78/0xf0 [ 417.813558] vmf_insert_page_mkwrite+0x55/0xa0 [ 417.814088] dax_fault_iter+0x484/0x7b0 [ 417.814542] dax_iomap_pte_fault+0x1ca/0x620 [ 417.815055] dax_iomap_fault+0x39/0x40 [ 417.815499] __xfs_write_fault+0x139/0x380 [ 417.815995] ? __handle_mm_fault+0x5e5/0x1a60 [ 417.816483] xfs_write_fault+0x41/0x50 [ 417.816966] xfs_filemap_fault+0x3b/0xe0 [ 417.817424] __do_fault+0x31/0x180 [ 417.817859] __handle_mm_fault+0xee1/0x1a60 [ 417.818325] ? debug_smp_processor_id+0x17/0x20 [ 417.818844] handle_mm_fault+0xe1/0x2b0 [...] The issue is that when we split a large ZONE_DEVICE folio to order-0 ones, we don't reset the order/_nr_pages. As folio->_nr_pages overlays page[1]->memcg_data, once page[1] is a folio, it suddenly looks like it has folio->memcg_data set. And we never manually initialize folio->memcg_data in fsdax code, because we never expect it to be set at all. When __lruvec_stat_mod_folio() then stumbles over such a folio, it tries to use folio->memcg_data (because it's non-NULL) but it does not actually point at a memcg, resulting in the problem. Alison also observed that these folios sometimes have "locked" set, which is rather concerning (folios locked from the beginning ...). The reason is that the order for large folios is stored in page[1]->flags, which become the folio->flags of a new small folio. Let's fix it by adding a folio helper to clear order/_nr_pages for splitting purposes. Maybe we should reinitialize other large folio flags / folio members as well when splitting, because they might similarly cause harm once page[1] becomes a folio? At least other flags in PAGE_FLAGS_SECOND should not be set for fsdax, so at least page[1]->flags might be as expected with this fix. From a quick glimpse, initializing ->mapping, ->pgmap and ->share should re-initialize most things from a previous page[1] used by large folios that fsdax cares about. For example folio->private might not get reinitialized, but maybe that's not relevant -- no traces of it's use in fsdax code. Needs a closer look. Another thing that should be considered in the future is performing similar checks as we perform in free_tail_page_prepare() -- checking pincount etc. -- when freeing a large fsdax folio. Link: https://lkml.kernel.org/r/20250410091020.119116-1-david@redhat.com Fixes: 4996fc547f5b ("mm: let _folio_nr_pages overlay memcg_data in first tail page") Fixes: 38607c62b34b ("fs/dax: properly refcount fs dax pages") Signed-off-by: David Hildenbrand Reported-by: Alison Schofield Closes: https://lkml.kernel.org/r/Z_W9Oeg-D9FhImf3@aschofie-mobl2.lan Tested-by: Alison Schofield Reviewed-by: Dan Williams Tested-by: "Darrick J. Wong" Cc: Alexander Viro Cc: Christian Brauner Cc: Jan Kara Cc: Matthew Wilcox Cc: Alistair Popple Cc: Christoph Hellwig Signed-off-by: Andrew Morton --- fs/dax.c | 1 + include/linux/mm.h | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/fs/dax.c b/fs/dax.c index af5045b0f476e0..676303419e9e8a 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -396,6 +396,7 @@ static inline unsigned long dax_folio_put(struct folio *folio) order = folio_order(folio); if (!order) return 0; + folio_reset_order(folio); for (i = 0; i < (1UL << order); i++) { struct dev_pagemap *pgmap = page_pgmap(&folio->page); diff --git a/include/linux/mm.h b/include/linux/mm.h index b7f13f087954bd..bf55206935c467 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1218,6 +1218,23 @@ static inline unsigned int folio_order(const struct folio *folio) return folio_large_order(folio); } +/** + * folio_reset_order - Reset the folio order and derived _nr_pages + * @folio: The folio. + * + * Reset the order and derived _nr_pages to 0. Must only be used in the + * process of splitting large folios. + */ +static inline void folio_reset_order(struct folio *folio) +{ + if (WARN_ON_ONCE(!folio_test_large(folio))) + return; + folio->_flags_1 &= ~0xffUL; +#ifdef NR_PAGES_IN_LARGE_FOLIO + folio->_nr_pages = 0; +#endif +} + #include /* From 8ad5ac8f4fc4848d17db809038773ee0bee76b0b Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 10 Apr 2025 11:00:22 +0200 Subject: [PATCH 0875/2924] MAINTAINERS: update SLAB ALLOCATOR maintainers With permission, reduce the number of maintainers. Create a CREDITS entry for Joonsoo (Pekka already has one). Thanks for all the work! Link: https://lkml.kernel.org/r/20250410090021.72296-3-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: Harry Yoo Acked-by: Christoph Lameter (Ampere) Acked-by: David Rientjes Cc: Pekka Enberg Cc: Joonsoo Kim Cc: Roman Gushchin Cc: Brendan Jackman Cc: Johannes Weiner Cc: Michal Hocko Cc: Suren Baghdasaryan Cc: Zi Yan Signed-off-by: Andrew Morton --- CREDITS | 4 ++++ MAINTAINERS | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CREDITS b/CREDITS index 1b77fba6c27ecb..f74d230992d6cb 100644 --- a/CREDITS +++ b/CREDITS @@ -2071,6 +2071,10 @@ S: 660 Harvard Ave. #7 S: Santa Clara, CA 95051 S: USA +N: Joonsoo Kim +E: iamjoonsoo.kim@lge.com +D: Slab allocators + N: Kukjin Kim E: kgene@kernel.org D: Samsung S3C, S5P and Exynos ARM architectures diff --git a/MAINTAINERS b/MAINTAINERS index 8c7d796131a811..16c9e10622dfc9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22249,9 +22249,7 @@ F: drivers/nvmem/layouts/sl28vpd.c SLAB ALLOCATOR M: Christoph Lameter -M: Pekka Enberg M: David Rientjes -M: Joonsoo Kim M: Andrew Morton M: Vlastimil Babka R: Roman Gushchin From 5e610c8c09990dc4bfdfdc56138838a41a718967 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 10 Apr 2025 11:00:23 +0200 Subject: [PATCH 0876/2924] MAINTAINERS: add MM subsection for the page allocator Add a subsection for the page allocator, including compaction as it's crucial for high-order allocations and works together with the anti-fragmentation features. Add reviewers (including myself) who voluteered. Link: https://lkml.kernel.org/r/20250410090021.72296-4-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: Michal Hocko Acked-by: Zi Yan Acked-by: Brendan Jackman Acked-by: Johannes Weiner Cc: Suren Baghdasaryan Cc: Christoph Lameter (Ampere) Cc: David Rientjes Cc: Harry Yoo Cc: Joonsoo Kim Cc: Pekka Enberg Cc: Roman Gushchin Signed-off-by: Andrew Morton --- MAINTAINERS | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 16c9e10622dfc9..2f7da8cc290a99 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15516,6 +15516,21 @@ F: mm/numa.c F: mm/numa_emulation.c F: mm/numa_memblks.c +MEMORY MANAGEMENT - PAGE ALLOCATOR +M: Andrew Morton +R: Vlastimil Babka +R: Suren Baghdasaryan +R: Michal Hocko +R: Brendan Jackman +R: Johannes Weiner +R: Zi Yan +L: linux-mm@kvack.org +S: Maintained +F: mm/compaction.c +F: mm/page_alloc.c +F: include/linux/gfp.h +F: include/linux/compaction.h + MEMORY MANAGEMENT - SECRETMEM M: Andrew Morton M: Mike Rapoport From 6b956934ad6d9f76c66c8fab570b07536c6ca472 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Thu, 10 Apr 2025 16:18:12 +0800 Subject: [PATCH 0877/2924] mm: memcontrol: fix swap counter leak from offline cgroup commit 73f839b6d2ed addressed an issue regarding the swap counter leak that occurred from an offline cgroup. However, commit 89ce924f0bd4 modified the parameter from @swap_memcg to @memcg (presumably this alteration was introduced while resolving conflicts). Fix this problem by reverting this minor change. Link: https://lkml.kernel.org/r/20250410081812.10073-1-songmuchun@bytedance.com Fixes: 89ce924f0bd4 ("mm: memcontrol: move memsw charge callbacks to v1") Signed-off-by: Muchun Song Acked-by: Johannes Weiner Acked-by: Shakeel Butt Acked-by: Roman Gushchin Cc: Michal Hocko Signed-off-by: Andrew Morton --- mm/memcontrol-v1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c index 8660908850dc81..4a9cf27a70af05 100644 --- a/mm/memcontrol-v1.c +++ b/mm/memcontrol-v1.c @@ -620,7 +620,7 @@ void memcg1_swapout(struct folio *folio, swp_entry_t entry) mem_cgroup_id_get_many(swap_memcg, nr_entries - 1); mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries); - swap_cgroup_record(folio, mem_cgroup_id(memcg), entry); + swap_cgroup_record(folio, mem_cgroup_id(swap_memcg), entry); folio_unqueue_deferred_split(folio); folio->memcg_data = 0; From 1413efdb254f41c05ae5c13aa975ecd9733f37a7 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 11 Apr 2025 13:33:28 -0400 Subject: [PATCH 0878/2924] MAINTAINERS: add mmap trace events to MEMORY MAPPING MEMORY MAPPING does not list the mmap.h trace point file, but does list the mmap.c file. Couple the trace points with the users and authors of the trace points for notifications of updates. Link: https://lkml.kernel.org/r/20250411173328.8172-1-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Acked-by: SeongJae Park Acked-by: Steven Rostedt (Google) Acked-by: Vlastimil Babka Cc: Lorenzo Stoakes Cc: Jann Horn Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 2f7da8cc290a99..78006958e7c76d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15562,6 +15562,7 @@ L: linux-mm@kvack.org S: Maintained W: http://www.linux-mm.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm +F: include/trace/events/mmap.h F: mm/mlock.c F: mm/mmap.c F: mm/mprotect.c From 86fba6127e197c7d646e8ee771df6026e14211dc Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 11 Apr 2025 08:27:24 +0100 Subject: [PATCH 0879/2924] MAINTAINERS: add memory advice section The madvise code straddles both VMA and page table manipulation. As a result, separate it out into its own section and add maintainers/reviewers as appropriate. We additionally include the mman-common.h file as this contains the shared madvise flags and it is important we maintain this alongside madvise.c. Link: https://lkml.kernel.org/r/20250411072724.10841-1-lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: Liam R. Howlett Acked-by: Vlastimil Babka Acked-by: Jann Horn Acked-by: David Hildenbrand Signed-off-by: Andrew Morton --- MAINTAINERS | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 78006958e7c76d..aaeb4d7113c348 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15573,6 +15573,20 @@ F: mm/vma.h F: mm/vma_internal.h F: tools/testing/vma/ +MEMORY MAPPING - MADVISE (MEMORY ADVICE) +M: Andrew Morton +M: Liam R. Howlett +M: Lorenzo Stoakes +M: David Hildenbrand +R: Vlastimil Babka +R: Jann Horn +L: linux-mm@kvack.org +S: Maintained +W: http://www.linux-mm.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm +F: include/uapi/asm-generic/mman-common.h +F: mm/madvise.c + MEMORY TECHNOLOGY DEVICES (MTD) M: Miquel Raynal M: Richard Weinberger From 8c03ebd7cdc06bd0d2fecb4d1a609ef1dbb7d0aa Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Thu, 10 Apr 2025 11:57:14 +0800 Subject: [PATCH 0880/2924] mm/gup: fix wrongly calculated returned value in fault_in_safe_writeable() Not like fault_in_readable() or fault_in_writeable(), in fault_in_safe_writeable() local variable 'start' is increased page by page to loop till the whole address range is handled. However, it mistakenly calculates the size of the handled range with 'uaddr - start'. Fix it here. Andreas said: : In gfs2, fault_in_iov_iter_writeable() is used in : gfs2_file_direct_read() and gfs2_file_read_iter(), so this potentially : affects buffered as well as direct reads. This bug could cause those : gfs2 functions to spin in a loop. Link: https://lkml.kernel.org/r/20250410035717.473207-1-bhe@redhat.com Link: https://lkml.kernel.org/r/20250410035717.473207-2-bhe@redhat.com Signed-off-by: Baoquan He Fixes: fe673d3f5bf1 ("mm: gup: make fault_in_safe_writeable() use fixup_user_fault()") Reviewed-by: Oscar Salvador Acked-by: David Hildenbrand Cc: Andreas Gruenbacher Cc: Yanjun.Zhu Cc: Signed-off-by: Andrew Morton --- mm/gup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index 92351e2fa876bf..84461d384ae2be 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2207,8 +2207,8 @@ size_t fault_in_safe_writeable(const char __user *uaddr, size_t size) } while (start != end); mmap_read_unlock(mm); - if (size > (unsigned long)uaddr - start) - return size - ((unsigned long)uaddr - start); + if (size > start - (unsigned long)uaddr) + return size - (start - (unsigned long)uaddr); return 0; } EXPORT_SYMBOL(fault_in_safe_writeable); From fd0ad5e9d158436b4a9b34c60582488585e1d90d Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Mon, 14 Apr 2025 09:35:31 +0200 Subject: [PATCH 0881/2924] docs: ABI: replace mcroce@microsoft.com with new Meta address The Microsoft email address is bouncing: 550 5.4.1 Recipient address rejected: Access denied. So let's replace it with Matteo's current mail address. Link: https://lkml.kernel.org/r/20250414-fix-mcroce-mail-bounce-v3-1-0aed2d71f3d7@pengutronix.de Signed-off-by: Ahmad Fatoum Acked-by: Matteo Croce Link: https://lore.kernel.org/all/BYAPR15MB2504E4B02DFFB1E55871955DA1062@BYAPR15MB2504.namprd15.prod.outlook.com/ Cc: Daniel Lezcano Cc: Jens Axboe Cc: Matteo Croce Cc: Sascha Hauer Signed-off-by: Andrew Morton --- Documentation/ABI/stable/sysfs-block | 2 +- Documentation/ABI/testing/sysfs-kernel-reboot | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block index 3879963f0f01e5..11545c9e2e93f2 100644 --- a/Documentation/ABI/stable/sysfs-block +++ b/Documentation/ABI/stable/sysfs-block @@ -77,7 +77,7 @@ Description: What: /sys/block//diskseq Date: February 2021 -Contact: Matteo Croce +Contact: Matteo Croce Description: The /sys/block//diskseq files reports the disk sequence number, which is a monotonically increasing diff --git a/Documentation/ABI/testing/sysfs-kernel-reboot b/Documentation/ABI/testing/sysfs-kernel-reboot index e117aba46be0e8..52571fd5ddba51 100644 --- a/Documentation/ABI/testing/sysfs-kernel-reboot +++ b/Documentation/ABI/testing/sysfs-kernel-reboot @@ -1,7 +1,7 @@ What: /sys/kernel/reboot Date: November 2020 KernelVersion: 5.11 -Contact: Matteo Croce +Contact: Matteo Croce Description: Interface to set the kernel reboot behavior, similarly to what can be done via the reboot= cmdline option. (see Documentation/admin-guide/kernel-parameters.txt) @@ -9,25 +9,25 @@ Description: Interface to set the kernel reboot behavior, similarly to What: /sys/kernel/reboot/mode Date: November 2020 KernelVersion: 5.11 -Contact: Matteo Croce +Contact: Matteo Croce Description: Reboot mode. Valid values are: cold warm hard soft gpio What: /sys/kernel/reboot/type Date: November 2020 KernelVersion: 5.11 -Contact: Matteo Croce +Contact: Matteo Croce Description: Reboot type. Valid values are: bios acpi kbd triple efi pci What: /sys/kernel/reboot/cpu Date: November 2020 KernelVersion: 5.11 -Contact: Matteo Croce +Contact: Matteo Croce Description: CPU number to use to reboot. What: /sys/kernel/reboot/force Date: November 2020 KernelVersion: 5.11 -Contact: Matteo Croce +Contact: Matteo Croce Description: Don't wait for any other CPUs on reboot and avoid anything that could hang. From 9e888998ea4d22257b07ce911576509486fa0667 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sat, 12 Apr 2025 18:39:12 +0200 Subject: [PATCH 0882/2924] writeback: fix false warning in inode_to_wb() inode_to_wb() is used also for filesystems that don't support cgroup writeback. For these filesystems inode->i_wb is stable during the lifetime of the inode (it points to bdi->wb) and there's no need to hold locks protecting the inode->i_wb dereference. Improve the warning in inode_to_wb() to not trigger for these filesystems. Link: https://lkml.kernel.org/r/20250412163914.3773459-3-agruenba@redhat.com Fixes: aaa2cacf8184 ("writeback: add lockdep annotation to inode_to_wb()") Signed-off-by: Jan Kara Signed-off-by: Andreas Gruenbacher Reviewed-by: Andreas Gruenbacher Cc: Signed-off-by: Andrew Morton --- include/linux/backing-dev.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 8e7af9a03b41dd..e721148c95d07d 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -249,6 +249,7 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode) { #ifdef CONFIG_LOCKDEP WARN_ON_ONCE(debug_locks && + (inode->i_sb->s_iflags & SB_I_CGROUPWB) && (!lockdep_is_held(&inode->i_lock) && !lockdep_is_held(&inode->i_mapping->i_pages.xa_lock) && !lockdep_is_held(&inode->i_wb->list_lock))); From 274fe92de2c4e50dbfd1b30070b4f6d8a27b388a Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Tue, 15 Apr 2025 13:18:59 +0200 Subject: [PATCH 0883/2924] mm, hugetlb: increment the number of pages to be reset on HVO commit 4eeec8c89a0c ("mm: move hugetlb specific things in folio to page[3]") shifted hugetlb specific stuff, and now mapping overlaps _hugetlb_cgroup field. Upon restoring the vmemmap for HVO, only the first two tail pages are reset, and this causes the check in free_tail_page_prepare() to fail as it finds an unexpected mapping value in some tails. Increment the number of pages to be reset to 4 (head + 3 tail pages) Link: https://lkml.kernel.org/r/20250415111859.376302-1-osalvador@suse.de Fixes: 4eeec8c89a0c ("mm: move hugetlb specific things in folio to page[3]") Signed-off-by: Oscar Salvador Suggested-by: David Hildenbrand Reviewed-by: David Hildenbrand Reviewed-by: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb_vmemmap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 9a99dfa3c49583..27245e86df2500 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -238,11 +238,11 @@ static void vmemmap_remap_pte(pte_t *pte, unsigned long addr, * struct page, the special metadata (e.g. page->flags or page->mapping) * cannot copy to the tail struct page structs. The invalid value will be * checked in the free_tail_page_prepare(). In order to avoid the message - * of "corrupted mapping in tail page". We need to reset at least 3 (one - * head struct page struct and two tail struct page structs) struct page + * of "corrupted mapping in tail page". We need to reset at least 4 (one + * head struct page struct and three tail struct page structs) struct page * structs. */ -#define NR_RESET_STRUCT_PAGE 3 +#define NR_RESET_STRUCT_PAGE 4 static inline void reset_struct_pages(struct page *start) { From 8bdea2fce98033d392db16da246843cadeddef39 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 15 Apr 2025 11:50:07 +0200 Subject: [PATCH 0884/2924] mm/memory: move sanity checks in do_wp_page() after mapcount vs. refcount stabilization In __folio_remove_rmap() for RMAP_LEVEL_PMD/RMAP_LEVEL_PUD and with CONFIG_PAGE_MAPCOUNT we first decrement the folio mapcount (and recompute mapped shared vs. mapped exclusively) to then adjust the entire mapcount. This means that another process might stumble in do_wp_page() over a PTE-mapped PMD folio that is indicated as "exclusively mapped", but still has an entire mapcount (PMD mapping), because it is racing with the process that is unmapping the folio (PMD mapping). Note that do_wp_page() will back off once it detects the remaining folio reference from the process that is in the process of unmapping the folio. This will trigger the early VM_WARN_ON_ONCE(folio_entire_mapcount(folio)) check in do_wp_page(), that can easily be reproduced by looping a couple of times over allocating a PMD THP, forking a child where we immediately unmap it again, and writing in the parent concurrently to the THP. [ 252.738129][T16470] ------------[ cut here ]------------ [ 252.739267][T16470] WARNING: CPU: 3 PID: 16470 at mm/memory.c:3738 do_wp_page+0x2a75/0x2c00 [ 252.740968][T16470] Modules linked in: [ 252.741958][T16470] CPU: 3 UID: 0 PID: 16470 Comm: ... ... [ 252.765841][T16470] [ 252.766419][T16470] ? srso_alias_return_thunk+0x5/0xfbef5 [ 252.767558][T16470] ? rcu_is_watching+0x12/0x60 [ 252.768525][T16470] ? srso_alias_return_thunk+0x5/0xfbef5 [ 252.769645][T16470] ? srso_alias_return_thunk+0x5/0xfbef5 [ 252.770778][T16470] ? lock_acquire+0x33/0x80 [ 252.771697][T16470] ? __handle_mm_fault+0x5e8/0x3e40 [ 252.772735][T16470] ? __handle_mm_fault+0x5e8/0x3e40 [ 252.773781][T16470] __handle_mm_fault+0x1869/0x3e40 [ 252.774839][T16470] handle_mm_fault+0x22a/0x640 [ 252.775808][T16470] do_user_addr_fault+0x618/0x1000 [ 252.776847][T16470] exc_page_fault+0x68/0xd0 [ 252.777775][T16470] asm_exc_page_fault+0x26/0x30 While we could adjust the sequence in __folio_remove_rmap(), let's rater move the mapcount sanity checks after the mapcount vs. refcount stabilization phase. With this fix, a simple reproducer is happy. While at it, convert the two VM_WARN_ON_ONCE() we are moving to VM_WARN_ON_ONCE_FOLIO(). Link: https://lkml.kernel.org/r/20250415095007.569836-1-david@redhat.com Fixes: 1da190f4d0a6 ("mm: Copy-on-Write (COW) reuse support for PTE-mapped THP") Signed-off-by: David Hildenbrand Reported-by: syzbot+5e8feb543ca8e12e0ede@syzkaller.appspotmail.com Closes: https://lkml.kernel.org/r/67fab4fe.050a0220.2c5fcf.0011.GAE@google.com Reviewed-by: Oscar Salvador Signed-off-by: Andrew Morton --- mm/memory.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 44481fe7c629ec..ba3ea0a82f7f77 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3734,8 +3734,6 @@ static bool __wp_can_reuse_large_anon_folio(struct folio *folio, return false; VM_WARN_ON_ONCE(folio_test_ksm(folio)); - VM_WARN_ON_ONCE(folio_mapcount(folio) > folio_nr_pages(folio)); - VM_WARN_ON_ONCE(folio_entire_mapcount(folio)); if (unlikely(folio_test_swapcache(folio))) { /* @@ -3760,6 +3758,8 @@ static bool __wp_can_reuse_large_anon_folio(struct folio *folio, if (folio_large_mapcount(folio) != folio_ref_count(folio)) goto unlock; + VM_WARN_ON_ONCE_FOLIO(folio_large_mapcount(folio) > folio_nr_pages(folio), folio); + VM_WARN_ON_ONCE_FOLIO(folio_entire_mapcount(folio), folio); VM_WARN_ON_ONCE(folio_mm_id(folio, 0) != vma->vm_mm->mm_id && folio_mm_id(folio, 1) != vma->vm_mm->mm_id); From 2db93a896fec7109302598cf45de3831340d9f53 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Wed, 16 Apr 2025 14:53:01 +0100 Subject: [PATCH 0885/2924] MAINTAINERS: add Pedro as reviewer to the MEMORY MAPPING section Pedro has offered to review memory mapping code. He has good experience in this area and has provided excellent feedback on memory mapping series in the past so I feel he'll be a great addition. Link: https://lkml.kernel.org/r/20250416135301.43513-1-lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: Vlastimil Babka Acked-by: Pedro Falcato Acked-by: Liam R. Howlett Cc: Jann Horn Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index aaeb4d7113c348..40e1999fd21b6f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15558,6 +15558,7 @@ M: Liam R. Howlett M: Lorenzo Stoakes R: Vlastimil Babka R: Jann Horn +R: Pedro Falcato L: linux-mm@kvack.org S: Maintained W: http://www.linux-mm.org From 38448181459e24257b40d5258afdbaa3565e8cfc Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 16 Apr 2025 09:45:39 -0400 Subject: [PATCH 0886/2924] mm: vmscan: restore high-cpu watermark safety in kswapd Vlastimil points out that commit a211c6550efc ("mm: page_alloc: defrag_mode kswapd/kcompactd watermarks") switched kswapd from zone_watermark_ok_safe() to the standard, percpu-cached version of reading free pages, thus dropping the watermark safety precautions for systems with high CPU counts (e.g. >212 cpus on 64G). Restore them. Since zone_watermark_ok_safe() is no longer the right interface, and this was the last caller of the function anyway, open-code the zone_page_state_snapshot() conditional and delete the function. Link: https://lkml.kernel.org/r/20250416135142.778933-2-hannes@cmpxchg.org Fixes: a211c6550efc ("mm: page_alloc: defrag_mode kswapd/kcompactd watermarks") Signed-off-by: Johannes Weiner Reported-by: Vlastimil Babka Reviewed-by: Vlastimil Babka Cc: Brendan Jackman Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 2 -- mm/page_alloc.c | 12 ------------ mm/vmscan.c | 21 +++++++++++++++++++-- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 4c95fcc9e9df0c..6ccec1bf2896ff 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1502,8 +1502,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, int highest_zoneidx, unsigned int alloc_flags); -bool zone_watermark_ok_safe(struct zone *z, unsigned int order, - unsigned long mark, int highest_zoneidx); /* * Memory initialization context, use to differentiate memory added by * the platform statically or via memory hotplug interface. diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e506e365d6f18b..5669baf2a6fea7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3470,18 +3470,6 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order, return false; } -bool zone_watermark_ok_safe(struct zone *z, unsigned int order, - unsigned long mark, int highest_zoneidx) -{ - long free_pages = zone_page_state(z, NR_FREE_PAGES); - - if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark) - free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES); - - return __zone_watermark_ok(z, order, mark, highest_zoneidx, 0, - free_pages); -} - #ifdef CONFIG_NUMA int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE; diff --git a/mm/vmscan.c b/mm/vmscan.c index b620d74b0f66e3..cc422ad830d635 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -6736,6 +6736,7 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int highest_zoneidx) * meet watermarks. */ for_each_managed_zone_pgdat(zone, pgdat, i, highest_zoneidx) { + enum zone_stat_item item; unsigned long free_pages; if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) @@ -6748,9 +6749,25 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int highest_zoneidx) * blocks to avoid polluting allocator fallbacks. */ if (defrag_mode) - free_pages = zone_page_state(zone, NR_FREE_PAGES_BLOCKS); + item = NR_FREE_PAGES_BLOCKS; else - free_pages = zone_page_state(zone, NR_FREE_PAGES); + item = NR_FREE_PAGES; + + /* + * When there is a high number of CPUs in the system, + * the cumulative error from the vmstat per-cpu cache + * can blur the line between the watermarks. In that + * case, be safe and get an accurate snapshot. + * + * TODO: NR_FREE_PAGES_BLOCKS moves in steps of + * pageblock_nr_pages, while the vmstat pcp threshold + * is limited to 125. On many configurations that + * counter won't actually be per-cpu cached. But keep + * things simple for now; revisit when somebody cares. + */ + free_pages = zone_page_state(zone, item); + if (zone->percpu_drift_mark && free_pages < zone->percpu_drift_mark) + free_pages = zone_page_state_snapshot(zone, item); if (__zone_watermark_ok(zone, order, mark, highest_zoneidx, 0, free_pages)) From a1f0220f3319057b364d871659ef7c10ab78f795 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 16 Apr 2025 09:45:40 -0400 Subject: [PATCH 0887/2924] mm: vmscan: fix kswapd exit condition in defrag_mode Vlastimil points out an issue with kswapd in defrag_mode not waking up kcompactd reliably. Background: When kswapd is woken for any higher-order request, it initially checks those high-order watermarks to decide if work is necesary. However, it cannot (efficiently) meet the contiguity goal of such a request by itself. So once it has reclaimed a compaction gap, it adjusts the request down to check for free order-0 pages, then wakes kcompactd to coalesce them into larger blocks. In defrag_mode, the initial watermark check needs to be analogously against free pageblocks. However, once kswapd drops the high-order to hand off contiguity work, it also needs to fall back to base page watermarks - otherwise it'll keep reclaiming until blocks are freed. While it appears kcompactd is woken up frequently enough to do most of the compaction work, kswapd ends up overreclaiming by quite a bit: DEFRAGMODE DEFRAGMODE-thispatch Hugealloc Time mean 79381.34 ( +0.00%) 88126.12 ( +11.02%) Hugealloc Time stddev 85852.16 ( +0.00%) 135366.75 ( +57.67%) Kbuild Real time 249.35 ( +0.00%) 226.71 ( -9.04%) Kbuild User time 1249.16 ( +0.00%) 1249.37 ( +0.02%) Kbuild System time 171.76 ( +0.00%) 166.93 ( -2.79%) THP fault alloc 51666.87 ( +0.00%) 52685.60 ( +1.97%) THP fault fallback 16970.00 ( +0.00%) 15951.87 ( -6.00%) Direct compact fail 166.53 ( +0.00%) 178.93 ( +7.40%) Direct compact success 17.13 ( +0.00%) 4.13 ( -71.69%) Compact daemon scanned migrate 3095413.33 ( +0.00%) 9231239.53 ( +198.22%) Compact daemon scanned free 2155966.53 ( +0.00%) 7053692.87 ( +227.17%) Compact direct scanned migrate 265642.47 ( +0.00%) 68388.33 ( -74.26%) Compact direct scanned free 130252.60 ( +0.00%) 55634.87 ( -57.29%) Compact total migrate scanned 3361055.80 ( +0.00%) 9299627.87 ( +176.69%) Compact total free scanned 2286219.13 ( +0.00%) 7109327.73 ( +210.96%) Alloc stall 1890.80 ( +0.00%) 6297.60 ( +232.94%) Pages kswapd scanned 9043558.80 ( +0.00%) 5952576.73 ( -34.18%) Pages kswapd reclaimed 1891708.67 ( +0.00%) 1030645.00 ( -45.52%) Pages direct scanned 1017090.60 ( +0.00%) 2688047.60 ( +164.29%) Pages direct reclaimed 92682.60 ( +0.00%) 309770.53 ( +234.22%) Pages total scanned 10060649.40 ( +0.00%) 8640624.33 ( -14.11%) Pages total reclaimed 1984391.27 ( +0.00%) 1340415.53 ( -32.45%) Swap out 884585.73 ( +0.00%) 417781.93 ( -52.77%) Swap in 287106.27 ( +0.00%) 95589.73 ( -66.71%) File refaults 551697.60 ( +0.00%) 426474.80 ( -22.70%) Link: https://lkml.kernel.org/r/20250416135142.778933-3-hannes@cmpxchg.org Fixes: a211c6550efc ("mm: page_alloc: defrag_mode kswapd/kcompactd watermarks") Signed-off-by: Johannes Weiner Reported-by: Vlastimil Babka Reviewed-by: Vlastimil Babka Cc: Brendan Jackman Signed-off-by: Andrew Morton --- mm/vmscan.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index cc422ad830d635..3783e45bfc92e2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -6747,8 +6747,14 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int highest_zoneidx) /* * In defrag_mode, watermarks must be met in whole * blocks to avoid polluting allocator fallbacks. + * + * However, kswapd usually cannot accomplish this on + * its own and needs kcompactd support. Once it's + * reclaimed a compaction gap, and kswapd_shrink_node + * has dropped order, simply ensure there are enough + * base pages for compaction, wake kcompactd & sleep. */ - if (defrag_mode) + if (defrag_mode && order) item = NR_FREE_PAGES_BLOCKS; else item = NR_FREE_PAGES; From ea21641b6a79f9cdd64f8339983c71c89949dcb5 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Wed, 16 Apr 2025 11:38:37 +0100 Subject: [PATCH 0888/2924] MAINTAINERS: add section for locking of mm's and VMAs We place this under memory mapping as related to memory mapping abstractions in the form of mm_struct and vm_area_struct (VMA). Now we have separated out mmap/vma locking logic into the mmap_lock.c and mmap_lock.h files, so this should encapsulate the majority of the mm locking logic in the kernel. Suren is best placed to maintain this logic as the core architect of VMA locking as a whole. Link: https://lkml.kernel.org/r/e6ed679a184ca444b20dfa77af96913fd8b5efa0.1744799282.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Suren Baghdasaryan Reviewed-by: Shakeel Butt Acked-by: David Hildenbrand Reviewed-by: Liam R. Howlett Acked-by: Vlastimil Babka Cc: Matthew Wilcox (Oracle) Cc: "Paul E . McKenney" Cc: SeongJae Park Signed-off-by: Andrew Morton --- MAINTAINERS | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 40e1999fd21b6f..c70f16ed62083a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15574,6 +15574,22 @@ F: mm/vma.h F: mm/vma_internal.h F: tools/testing/vma/ +MEMORY MAPPING - LOCKING +M: Andrew Morton +M: Suren Baghdasaryan +M: Liam R. Howlett +M: Lorenzo Stoakes +R: Vlastimil Babka +R: Shakeel Butt +L: linux-mm@kvack.org +S: Maintained +W: http://www.linux-mm.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm +F: Documentation/mm/process_addrs.rst +F: include/linux/mmap_lock.h +F: include/trace/events/mmap_lock.h +F: mm/mmap_lock.c + MEMORY MAPPING - MADVISE (MEMORY ADVICE) M: Andrew Morton M: Liam R. Howlett From f12ecf5e1c5eca48b8652e893afcdb730384a6aa Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 18 Apr 2025 13:02:27 +0100 Subject: [PATCH 0889/2924] io_uring/zcrx: fix late dma unmap for a dead dev There is a problem with page pools not dma-unmapping immediately when the device is going down, and delaying it until the page pool is destroyed, which is not allowed (see links). That just got fixed for normal page pools, and we need to address memory providers as well. Unmap pages in the memory provider uninstall callback, and protect it with a new lock. There is also a gap between when a dma mapping is created and the mp is installed, so if the device is killed in between, io_uring would be holding on to dma mappings to a dead device with no one to call ->uninstall. Move it to page pool init and rely on ->is_mapped to make sure it's only done once. Link: https://lore.kernel.org/lkml/8067f204-1380-4d37-8ffd-007fc6f26738@kernel.org/T/ Link: https://lore.kernel.org/all/20250409-page-pool-track-dma-v9-0-6a9ef2e0cba8@redhat.com/ Fixes: 34a3e60821ab9 ("io_uring/zcrx: implement zerocopy receive pp memory provider") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/ef9b7db249b14f6e0b570a1bb77ff177389f881c.1744965853.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/zcrx.c | 21 +++++++++++++++++---- io_uring/zcrx.h | 1 + 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 5defbe8f95f9f8..fe86606b9f304d 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -51,14 +51,21 @@ static void __io_zcrx_unmap_area(struct io_zcrx_ifq *ifq, static void io_zcrx_unmap_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area) { + guard(mutex)(&ifq->dma_lock); + if (area->is_mapped) __io_zcrx_unmap_area(ifq, area, area->nia.num_niovs); + area->is_mapped = false; } static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area) { int i; + guard(mutex)(&ifq->dma_lock); + if (area->is_mapped) + return 0; + for (i = 0; i < area->nia.num_niovs; i++) { struct net_iov *niov = &area->nia.niovs[i]; dma_addr_t dma; @@ -280,6 +287,7 @@ static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx) ifq->ctx = ctx; spin_lock_init(&ifq->lock); spin_lock_init(&ifq->rq_lock); + mutex_init(&ifq->dma_lock); return ifq; } @@ -329,6 +337,7 @@ static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq) put_device(ifq->dev); io_free_rbuf_ring(ifq); + mutex_destroy(&ifq->dma_lock); kfree(ifq); } @@ -400,10 +409,6 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, goto err; get_device(ifq->dev); - ret = io_zcrx_map_area(ifq, ifq->area); - if (ret) - goto err; - mp_param.mp_ops = &io_uring_pp_zc_ops; mp_param.mp_priv = ifq; ret = net_mp_open_rxq(ifq->netdev, reg.if_rxq, &mp_param); @@ -624,6 +629,7 @@ static bool io_pp_zc_release_netmem(struct page_pool *pp, netmem_ref netmem) static int io_pp_zc_init(struct page_pool *pp) { struct io_zcrx_ifq *ifq = io_pp_to_ifq(pp); + int ret; if (WARN_ON_ONCE(!ifq)) return -EINVAL; @@ -636,6 +642,10 @@ static int io_pp_zc_init(struct page_pool *pp) if (pp->p.dma_dir != DMA_FROM_DEVICE) return -EOPNOTSUPP; + ret = io_zcrx_map_area(ifq, ifq->area); + if (ret) + return ret; + percpu_ref_get(&ifq->ctx->refs); return 0; } @@ -671,6 +681,9 @@ static void io_pp_uninstall(void *mp_priv, struct netdev_rx_queue *rxq) struct io_zcrx_ifq *ifq = mp_priv; io_zcrx_drop_netdev(ifq); + if (ifq->area) + io_zcrx_unmap_area(ifq, ifq->area); + p->mp_ops = NULL; p->mp_priv = NULL; } diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h index 47f1c0e8c197ca..f2bc811f022c67 100644 --- a/io_uring/zcrx.h +++ b/io_uring/zcrx.h @@ -38,6 +38,7 @@ struct io_zcrx_ifq { struct net_device *netdev; netdevice_tracker netdev_tracker; spinlock_t lock; + struct mutex dma_lock; }; #if defined(CONFIG_IO_URING_ZCRX) From 263e55949d8902a6a09bdb92a1ab6a3f67231abe Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Fri, 18 Apr 2025 11:49:40 +0530 Subject: [PATCH 0890/2924] x86/cpu/amd: Fix workaround for erratum 1054 Erratum 1054 affects AMD Zen processors that are a part of Family 17h Models 00-2Fh and the workaround is to not set HWCR[IRPerfEn]. However, when X86_FEATURE_ZEN1 was introduced, the condition to detect unaffected processors was incorrectly changed in a way that the IRPerfEn bit gets set only for unaffected Zen 1 processors. Ensure that HWCR[IRPerfEn] is set for all unaffected processors. This includes a subset of Zen 1 (Family 17h Models 30h and above) and all later processors. Also clear X86_FEATURE_IRPERF on affected processors so that the IRPerfCount register is not used by other entities like the MSR PMU driver. Fixes: 232afb557835 ("x86/CPU/AMD: Add X86_FEATURE_ZEN1") Signed-off-by: Sandipan Das Signed-off-by: Ingo Molnar Acked-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/caa057a9d6f8ad579e2f1abaa71efbd5bd4eaf6d.1744956467.git.sandipan.das@amd.com --- arch/x86/kernel/cpu/amd.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a839ff506f4548..2b36379ff675dc 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -869,6 +869,16 @@ static void init_amd_zen1(struct cpuinfo_x86 *c) pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n"); setup_force_cpu_bug(X86_BUG_DIV0); + + /* + * Turn off the Instructions Retired free counter on machines that are + * susceptible to erratum #1054 "Instructions Retired Performance + * Counter May Be Inaccurate". + */ + if (c->x86_model < 0x30) { + msr_clear_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT); + clear_cpu_cap(c, X86_FEATURE_IRPERF); + } } static bool cpu_has_zenbleed_microcode(void) @@ -1052,13 +1062,8 @@ static void init_amd(struct cpuinfo_x86 *c) if (!cpu_feature_enabled(X86_FEATURE_XENPV)) set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); - /* - * Turn on the Instructions Retired free counter on machines not - * susceptible to erratum #1054 "Instructions Retired Performance - * Counter May Be Inaccurate". - */ - if (cpu_has(c, X86_FEATURE_IRPERF) && - (boot_cpu_has(X86_FEATURE_ZEN1) && c->x86_model > 0x2f)) + /* Enable the Instructions Retired free counter */ + if (cpu_has(c, X86_FEATURE_IRPERF)) msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT); check_null_seg_clears_base(c); From d54d610243a4508183978871e5faff5502786cd4 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 17 Apr 2025 22:21:21 +0200 Subject: [PATCH 0891/2924] x86/boot/sev: Avoid shared GHCB page for early memory acceptance Communicating with the hypervisor using the shared GHCB page requires clearing the C bit in the mapping of that page. When executing in the context of the EFI boot services, the page tables are owned by the firmware, and this manipulation is not possible. So switch to a different API for accepting memory in SEV-SNP guests, one which is actually supported at the point during boot where the EFI stub may need to accept memory, but the SEV-SNP init code has not executed yet. For simplicity, also switch the memory acceptance carried out by the decompressor when not booting via EFI - this only involves the allocation for the decompressed kernel, and is generally only called after kexec, as normal boot will jump straight into the kernel from the EFI stub. Fixes: 6c3211796326 ("x86/sev: Add SNP-specific unaccepted memory support") Tested-by: Tom Lendacky Co-developed-by: Tom Lendacky Signed-off-by: Tom Lendacky Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Cc: Cc: Dionna Amalie Glaze Cc: Kevin Loughlin Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: linux-efi@vger.kernel.org Link: https://lore.kernel.org/r/20250404082921.2767593-8-ardb+git@google.com # discussion thread #1 Link: https://lore.kernel.org/r/20250410132850.3708703-2-ardb+git@google.com # discussion thread #2 Link: https://lore.kernel.org/r/20250417202120.1002102-2-ardb+git@google.com # final submission --- arch/x86/boot/compressed/mem.c | 5 ++- arch/x86/boot/compressed/sev.c | 67 ++++++++-------------------------- arch/x86/boot/compressed/sev.h | 2 + 3 files changed, 21 insertions(+), 53 deletions(-) diff --git a/arch/x86/boot/compressed/mem.c b/arch/x86/boot/compressed/mem.c index dbba332e4a12d7..f676156d9f3db4 100644 --- a/arch/x86/boot/compressed/mem.c +++ b/arch/x86/boot/compressed/mem.c @@ -34,11 +34,14 @@ static bool early_is_tdx_guest(void) void arch_accept_memory(phys_addr_t start, phys_addr_t end) { + static bool sevsnp; + /* Platform-specific memory-acceptance call goes here */ if (early_is_tdx_guest()) { if (!tdx_accept_memory(start, end)) panic("TDX: Failed to accept memory\n"); - } else if (sev_snp_enabled()) { + } else if (sevsnp || (sev_get_status() & MSR_AMD64_SEV_SNP_ENABLED)) { + sevsnp = true; snp_accept_memory(start, end); } else { error("Cannot accept memory: unknown platform\n"); diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index bb55934c1cee70..89ba168f4f0f02 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -164,10 +164,7 @@ bool sev_snp_enabled(void) static void __page_state_change(unsigned long paddr, enum psc_op op) { - u64 val; - - if (!sev_snp_enabled()) - return; + u64 val, msr; /* * If private -> shared then invalidate the page before requesting the @@ -176,6 +173,9 @@ static void __page_state_change(unsigned long paddr, enum psc_op op) if (op == SNP_PAGE_STATE_SHARED) pvalidate_4k_page(paddr, paddr, false); + /* Save the current GHCB MSR value */ + msr = sev_es_rd_ghcb_msr(); + /* Issue VMGEXIT to change the page state in RMP table. */ sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op)); VMGEXIT(); @@ -185,6 +185,9 @@ static void __page_state_change(unsigned long paddr, enum psc_op op) if ((GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP) || GHCB_MSR_PSC_RESP_VAL(val)) sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); + /* Restore the GHCB MSR value */ + sev_es_wr_ghcb_msr(msr); + /* * Now that page state is changed in the RMP table, validate it so that it is * consistent with the RMP entry. @@ -195,11 +198,17 @@ static void __page_state_change(unsigned long paddr, enum psc_op op) void snp_set_page_private(unsigned long paddr) { + if (!sev_snp_enabled()) + return; + __page_state_change(paddr, SNP_PAGE_STATE_PRIVATE); } void snp_set_page_shared(unsigned long paddr) { + if (!sev_snp_enabled()) + return; + __page_state_change(paddr, SNP_PAGE_STATE_SHARED); } @@ -223,56 +232,10 @@ static bool early_setup_ghcb(void) return true; } -static phys_addr_t __snp_accept_memory(struct snp_psc_desc *desc, - phys_addr_t pa, phys_addr_t pa_end) -{ - struct psc_hdr *hdr; - struct psc_entry *e; - unsigned int i; - - hdr = &desc->hdr; - memset(hdr, 0, sizeof(*hdr)); - - e = desc->entries; - - i = 0; - while (pa < pa_end && i < VMGEXIT_PSC_MAX_ENTRY) { - hdr->end_entry = i; - - e->gfn = pa >> PAGE_SHIFT; - e->operation = SNP_PAGE_STATE_PRIVATE; - if (IS_ALIGNED(pa, PMD_SIZE) && (pa_end - pa) >= PMD_SIZE) { - e->pagesize = RMP_PG_SIZE_2M; - pa += PMD_SIZE; - } else { - e->pagesize = RMP_PG_SIZE_4K; - pa += PAGE_SIZE; - } - - e++; - i++; - } - - if (vmgexit_psc(boot_ghcb, desc)) - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); - - pvalidate_pages(desc); - - return pa; -} - void snp_accept_memory(phys_addr_t start, phys_addr_t end) { - struct snp_psc_desc desc = {}; - unsigned int i; - phys_addr_t pa; - - if (!boot_ghcb && !early_setup_ghcb()) - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); - - pa = start; - while (pa < end) - pa = __snp_accept_memory(&desc, pa, end); + for (phys_addr_t pa = start; pa < end; pa += PAGE_SIZE) + __page_state_change(pa, SNP_PAGE_STATE_PRIVATE); } void sev_es_shutdown_ghcb(void) diff --git a/arch/x86/boot/compressed/sev.h b/arch/x86/boot/compressed/sev.h index fc725a981b093b..4e463f33186df4 100644 --- a/arch/x86/boot/compressed/sev.h +++ b/arch/x86/boot/compressed/sev.h @@ -12,11 +12,13 @@ bool sev_snp_enabled(void); void snp_accept_memory(phys_addr_t start, phys_addr_t end); +u64 sev_get_status(void); #else static inline bool sev_snp_enabled(void) { return false; } static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } +static inline u64 sev_get_status(void) { return 0; } #endif From a34d74877c66ce484ad586d806002ceaedd58657 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Thu, 3 Apr 2025 12:31:37 +0300 Subject: [PATCH 0892/2924] PCI: Restore assigned resources fully after release MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PCI resource fitting code in __assign_resources_sorted() runs in multiple steps. A resource that was successfully assigned may have to be released before the next step attempts assignment again. The assign+release cycle is destructive to a start-aligned struct resource (bridge window or IOV resource) because the start field is overwritten with the real address when the resource got assigned. One symptom: pci 0002:00:00.0: bridge window [mem size 0x00100000]: can't assign; bogus alignment Properly restore the resource after releasing it. The start, end, and flags fields must be stored into the related struct pci_dev_resource in order to be able to restore the resource to its original state. Fixes: 96336ec70264 ("PCI: Perform reset_resource() and build fail list in sync") Reported-by: Guenter Roeck Closes: https://lore.kernel.org/r/01eb7d40-f5b5-4ec5-b390-a5c042c30aff@roeck-us.net/ Reported-by: Nicolas Frattaroli Closes: https://lore.kernel.org/r/3578030.5fSG56mABF@workhorse Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Tested-by: Nicolas Frattaroli Tested-by: Guenter Roeck Tested-by: Ondrej Jirman Link: https://patch.msgid.link/20250403093137.1481-1-ilpo.jarvinen@linux.intel.com --- drivers/pci/setup-bus.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 54d6f4fa3ce166..e994c546422c9e 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -187,6 +187,9 @@ static void pdev_sort_resources(struct pci_dev *dev, struct list_head *head) panic("%s: kzalloc() failed!\n", __func__); tmp->res = r; tmp->dev = dev; + tmp->start = r->start; + tmp->end = r->end; + tmp->flags = r->flags; /* Fallback is smallest one or list is empty */ n = head; @@ -545,6 +548,7 @@ static void __assign_resources_sorted(struct list_head *head, pci_dbg(dev, "%s %pR: releasing\n", res_name, res); release_resource(res); + restore_dev_resource(dev_res); } /* Restore start/end/flags from saved list */ list_for_each_entry(save_res, &save_head, list) From 39e703ed3b48c4262be141072d4f42a8b89a10cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Thu, 17 Apr 2025 15:45:29 +0300 Subject: [PATCH 0893/2924] selftests/pcie_bwctrl: Fix test progs list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit df6f8c4d72ae ("selftests/pcie_bwctrl: Add 'set_pcie_speed.sh' to TEST_PROGS") added set_pcie_speed.sh into TEST_PROGS but that script is a helper that is only being called by set_pcie_cooling_state.sh, not a test case itself. When set_pcie_speed.sh is in TEST_PROGS, selftest harness will execute also it leading to bwctrl selftest errors: # selftests: pcie_bwctrl: set_pcie_speed.sh # cat: /cur_state: No such file or directory not ok 2 selftests: pcie_bwctrl: set_pcie_speed.sh # exit=1 Place set_pcie_speed.sh into TEST_FILES instead to have it included into installed test files but not execute it from the test harness. Fixes: df6f8c4d72ae ("selftests/pcie_bwctrl: Add 'set_pcie_speed.sh' to TEST_PROGS") Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250417124529.11391-1-ilpo.jarvinen@linux.intel.com --- tools/testing/selftests/pcie_bwctrl/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/pcie_bwctrl/Makefile b/tools/testing/selftests/pcie_bwctrl/Makefile index 48ec048f47afda..277f92f9d7537a 100644 --- a/tools/testing/selftests/pcie_bwctrl/Makefile +++ b/tools/testing/selftests/pcie_bwctrl/Makefile @@ -1,2 +1,3 @@ -TEST_PROGS = set_pcie_cooling_state.sh set_pcie_speed.sh +TEST_PROGS = set_pcie_cooling_state.sh +TEST_FILES = set_pcie_speed.sh include ../lib.mk From 183a08715af1491d381b4e22efd61578fbe05fa5 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 10 Apr 2025 03:16:26 -0400 Subject: [PATCH 0894/2924] virtgpu: don't reset on shutdown It looks like GPUs are used after shutdown is invoked. Thus, breaking virtio gpu in the shutdown callback is not a good idea - guest hangs attempting to finish console drawing, with these warnings: [ 20.504464] WARNING: CPU: 0 PID: 568 at drivers/gpu/drm/virtio/virtgpu_vq.c:358 virtio_gpu_queue_ctrl_sgs+0x236/0x290 [virtio_gpu] [ 20.505685] Modules linked in: nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 rfkill ip_set nf_tables nfnetlink vfat fat intel_rapl_msr intel_rapl_common intel_uncore_frequency_common nfit libnvdimm kvm_intel kvm rapl iTCO_wdt iTCO_vendor_support virtio_gpu virtio_dma_buf pcspkr drm_shmem_helper i2c_i801 drm_kms_helper lpc_ich i2c_smbus virtio_balloon joydev drm fuse xfs libcrc32c ahci libahci crct10dif_pclmul crc32_pclmul crc32c_intel libata virtio_net ghash_clmulni_intel net_failover virtio_blk failover serio_raw dm_mirror dm_region_hash dm_log dm_mod [ 20.511847] CPU: 0 PID: 568 Comm: kworker/0:3 Kdump: loaded Tainted: G W ------- --- 5.14.0-578.6675_1757216455.el9.x86_64 #1 [ 20.513157] Hardware name: Red Hat KVM/RHEL, BIOS edk2-20241117-3.el9 11/17/2024 [ 20.513918] Workqueue: events drm_fb_helper_damage_work [drm_kms_helper] [ 20.514626] RIP: 0010:virtio_gpu_queue_ctrl_sgs+0x236/0x290 [virtio_gpu] [ 20.515332] Code: 00 00 48 85 c0 74 0c 48 8b 78 08 48 89 ee e8 51 50 00 00 65 ff 0d 42 e3 74 3f 0f 85 69 ff ff ff 0f 1f 44 00 00 e9 5f ff ff ff <0f> 0b e9 3f ff ff ff 48 83 3c 24 00 74 0e 49 8b 7f 40 48 85 ff 74 [ 20.517272] RSP: 0018:ff34f0a8c0787ad8 EFLAGS: 00010282 [ 20.517820] RAX: 00000000fffffffb RBX: 0000000000000000 RCX: 0000000000000820 [ 20.518565] RDX: 0000000000000000 RSI: ff34f0a8c0787be0 RDI: ff218bef03a26300 [ 20.519308] RBP: ff218bef03a26300 R08: 0000000000000001 R09: ff218bef07224360 [ 20.520059] R10: 0000000000008dc0 R11: 0000000000000002 R12: ff218bef02630028 [ 20.520806] R13: ff218bef0263fb48 R14: ff218bef00cb8000 R15: ff218bef07224360 [ 20.521555] FS: 0000000000000000(0000) GS:ff218bef7ba00000(0000) knlGS:0000000000000000 [ 20.522397] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 20.522996] CR2: 000055ac4f7871c0 CR3: 000000010b9f2002 CR4: 0000000000771ef0 [ 20.523740] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 20.524477] DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400 [ 20.525223] PKRU: 55555554 [ 20.525515] Call Trace: [ 20.525777] [ 20.526003] ? show_trace_log_lvl+0x1c4/0x2df [ 20.526464] ? show_trace_log_lvl+0x1c4/0x2df [ 20.526925] ? virtio_gpu_queue_fenced_ctrl_buffer+0x82/0x2c0 [virtio_gpu] [ 20.527643] ? virtio_gpu_queue_ctrl_sgs+0x236/0x290 [virtio_gpu] [ 20.528282] ? __warn+0x7e/0xd0 [ 20.528621] ? virtio_gpu_queue_ctrl_sgs+0x236/0x290 [virtio_gpu] [ 20.529256] ? report_bug+0x100/0x140 [ 20.529643] ? handle_bug+0x3c/0x70 [ 20.530010] ? exc_invalid_op+0x14/0x70 [ 20.530421] ? asm_exc_invalid_op+0x16/0x20 [ 20.530862] ? virtio_gpu_queue_ctrl_sgs+0x236/0x290 [virtio_gpu] [ 20.531506] ? virtio_gpu_queue_ctrl_sgs+0x174/0x290 [virtio_gpu] [ 20.532148] virtio_gpu_queue_fenced_ctrl_buffer+0x82/0x2c0 [virtio_gpu] [ 20.532843] virtio_gpu_primary_plane_update+0x3e2/0x460 [virtio_gpu] [ 20.533520] drm_atomic_helper_commit_planes+0x108/0x320 [drm_kms_helper] [ 20.534233] drm_atomic_helper_commit_tail+0x45/0x80 [drm_kms_helper] [ 20.534914] commit_tail+0xd2/0x130 [drm_kms_helper] [ 20.535446] drm_atomic_helper_commit+0x11b/0x140 [drm_kms_helper] [ 20.536097] drm_atomic_commit+0xa4/0xe0 [drm] [ 20.536588] ? __pfx___drm_printfn_info+0x10/0x10 [drm] [ 20.537162] drm_atomic_helper_dirtyfb+0x192/0x270 [drm_kms_helper] [ 20.537823] drm_fbdev_shmem_helper_fb_dirty+0x43/0xa0 [drm_shmem_helper] [ 20.538536] drm_fb_helper_damage_work+0x87/0x160 [drm_kms_helper] [ 20.539188] process_one_work+0x194/0x380 [ 20.539612] worker_thread+0x2fe/0x410 [ 20.540007] ? __pfx_worker_thread+0x10/0x10 [ 20.540456] kthread+0xdd/0x100 [ 20.540791] ? __pfx_kthread+0x10/0x10 [ 20.541190] ret_from_fork+0x29/0x50 [ 20.541566] [ 20.541802] ---[ end trace 0000000000000000 ]--- It looks like the shutdown is called in the middle of console drawing, so we should either wait for it to finish, or let drm handle the shutdown. This patch implements this second option: Add an option for drivers to bypass the common break+reset handling. As DRM is careful to flush/synchronize outstanding buffers, it looks like GPU can just have a NOP there. Reviewed-by: Eric Auger Tested-by: Eric Auger Fixes: 8bd2fa086a04 ("virtio: break and reset virtio devices on device_shutdown()") Cc: Eric Auger Cc: Jocelyn Falempe Signed-off-by: Michael S. Tsirkin Message-Id: <8490dbeb6f79ed039e6c11d121002618972538a3.1744293540.git.mst@redhat.com> --- drivers/gpu/drm/virtio/virtgpu_drv.c | 9 +++++++++ drivers/virtio/virtio.c | 6 ++++++ include/linux/virtio.h | 3 +++ 3 files changed, 18 insertions(+) diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index 2d88e390feb468..e32e680c71979e 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -128,6 +128,14 @@ static void virtio_gpu_remove(struct virtio_device *vdev) drm_dev_put(dev); } +static void virtio_gpu_shutdown(struct virtio_device *vdev) +{ + /* + * drm does its own synchronization on shutdown. + * Do nothing here, opt out of device reset. + */ +} + static void virtio_gpu_config_changed(struct virtio_device *vdev) { struct drm_device *dev = vdev->priv; @@ -162,6 +170,7 @@ static struct virtio_driver virtio_gpu_driver = { .id_table = id_table, .probe = virtio_gpu_probe, .remove = virtio_gpu_remove, + .shutdown = virtio_gpu_shutdown, .config_changed = virtio_gpu_config_changed }; diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 150753c3b5782c..95d5d7993e5b1b 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -407,6 +407,12 @@ static void virtio_dev_shutdown(struct device *_d) if (!drv) return; + /* If the driver has its own shutdown method, use that. */ + if (drv->shutdown) { + drv->shutdown(dev); + return; + } + /* * Some devices get wedged if you kick them after they are * reset. Mark all vqs as broken to make sure we don't. diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 4d16c13d0df580..64cb4b04be7add 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -220,6 +220,8 @@ size_t virtio_max_dma_size(const struct virtio_device *vdev); * occurs. * @reset_done: optional function to call after transport specific reset * operation has finished. + * @shutdown: synchronize with the device on shutdown. If provided, replaces + * the virtio core implementation. */ struct virtio_driver { struct device_driver driver; @@ -237,6 +239,7 @@ struct virtio_driver { int (*restore)(struct virtio_device *dev); int (*reset_prepare)(struct virtio_device *dev); int (*reset_done)(struct virtio_device *dev); + void (*shutdown)(struct virtio_device *dev); }; #define drv_to_virtio(__drv) container_of_const(__drv, struct virtio_driver, driver) From fbd3039a64b01b769040677c4fc68badeca8e3b2 Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Sat, 22 Mar 2025 01:29:54 +0100 Subject: [PATCH 0895/2924] virtio_console: fix missing byte order handling for cols and rows As per virtio spec the fields cols and rows are specified as little endian. Although there is no legacy interface requirement that would state that cols and rows need to be handled as native endian when legacy interface is used, unlike for the fields of the adjacent struct virtio_console_control, I decided to err on the side of caution based on some non-conclusive virtio spec repo archaeology and opt for using virtio16_to_cpu() much like for virtio_console_control.event. Strictly by the letter of the spec virtio_le_to_cpu() would have been sufficient. But when the legacy interface is not used, it boils down to the same. And when using the legacy interface, the device formatting these as little endian when the guest is big endian would surprise me more than it using guest native byte order (which would make it compatible with the current implementation). Nevertheless somebody trying to implement the spec following it to the letter could end up forcing little endian byte order when the legacy interface is in use. So IMHO this ultimately needs a judgement call by the maintainers. Fixes: 8345adbf96fc1 ("virtio: console: Accept console size along with resize control message") Signed-off-by: Halil Pasic Cc: stable@vger.kernel.org # v2.6.35+ Message-Id: <20250322002954.3129282-1-pasic@linux.ibm.com> Signed-off-by: Michael S. Tsirkin --- drivers/char/virtio_console.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 5f04951d0dd469..216c5115637d39 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1576,8 +1576,8 @@ static void handle_control_message(struct virtio_device *vdev, break; case VIRTIO_CONSOLE_RESIZE: { struct { - __u16 rows; - __u16 cols; + __virtio16 rows; + __virtio16 cols; } size; if (!is_console_port(port)) @@ -1585,7 +1585,8 @@ static void handle_control_message(struct virtio_device *vdev, memcpy(&size, buf->buf + buf->offset + sizeof(*cpkt), sizeof(size)); - set_console_size(port, size.rows, size.cols); + set_console_size(port, virtio16_to_cpu(vdev, size.rows), + virtio16_to_cpu(vdev, size.cols)); port->cons.hvc->irq_requested = 1; resize_console(port); From 5326ab737a47278dbd16ed3ee7380b26c7056ddd Mon Sep 17 00:00:00 2001 From: Maximilian Immanuel Brandtner Date: Mon, 24 Mar 2025 15:42:46 +0100 Subject: [PATCH 0896/2924] virtio_console: fix order of fields cols and rows According to section 5.3.6.2 (Multiport Device Operation) of the virtio spec(version 1.2) a control buffer with the event VIRTIO_CONSOLE_RESIZE is followed by a virtio_console_resize struct containing cols then rows. The kernel implements this the wrong way around (rows then cols) resulting in the two values being swapped. Signed-off-by: Maximilian Immanuel Brandtner Message-Id: <20250324144300.905535-1-maxbr@linux.ibm.com> Signed-off-by: Michael S. Tsirkin --- drivers/char/virtio_console.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 216c5115637d39..088182e54debd6 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1576,8 +1576,8 @@ static void handle_control_message(struct virtio_device *vdev, break; case VIRTIO_CONSOLE_RESIZE: { struct { - __virtio16 rows; __virtio16 cols; + __virtio16 rows; } size; if (!is_console_port(port)) From fec0abf52609c20279243699d08b660c142ce0aa Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 27 Mar 2025 13:44:35 +0100 Subject: [PATCH 0897/2924] vhost_task: fix vhost_task_create() documentation Commit cb380909ae3b ("vhost: return task creation error instead of NULL") changed the return value of vhost_task_create(), but did not update the documentation. Reflect the change in the documentation: on an error, vhost_task_create() returns an ERR_PTR() and no longer NULL. Signed-off-by: Stefano Garzarella Message-Id: <20250327124435.142831-1-sgarzare@redhat.com> Signed-off-by: Michael S. Tsirkin --- kernel/vhost_task.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c index 2ef2e1b8009165..2f844c279a3e01 100644 --- a/kernel/vhost_task.c +++ b/kernel/vhost_task.c @@ -111,7 +111,7 @@ EXPORT_SYMBOL_GPL(vhost_task_stop); * @arg: data to be passed to fn and handled_kill * @name: the thread's name * - * This returns a specialized task for use by the vhost layer or NULL on + * This returns a specialized task for use by the vhost layer or ERR_PTR() on * failure. The returned task is inactive, and the caller must fire it up * through vhost_task_start(). */ From f591cf9fce724e5075cc67488c43c6e39e8cbe27 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Wed, 2 Apr 2025 23:29:46 -0700 Subject: [PATCH 0898/2924] vhost-scsi: protect vq->log_used with vq->mutex The vhost-scsi completion path may access vq->log_base when vq->log_used is already set to false. vhost-thread QEMU-thread vhost_scsi_complete_cmd_work() -> vhost_add_used() -> vhost_add_used_n() if (unlikely(vq->log_used)) QEMU disables vq->log_used via VHOST_SET_VRING_ADDR. mutex_lock(&vq->mutex); vq->log_used = false now! mutex_unlock(&vq->mutex); QEMU gfree(vq->log_base) log_used() -> log_write(vq->log_base) Assuming the VMM is QEMU. The vq->log_base is from QEMU userpace and can be reclaimed via gfree(). As a result, this causes invalid memory writes to QEMU userspace. The control queue path has the same issue. Signed-off-by: Dongli Zhang Acked-by: Jason Wang Reviewed-by: Mike Christie Message-Id: <20250403063028.16045-2-dongli.zhang@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/scsi.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index f6f5a7ac789455..f846f2aa7c8702 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -627,6 +627,9 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) int ret; llnode = llist_del_all(&svq->completion_list); + + mutex_lock(&svq->vq.mutex); + llist_for_each_entry_safe(cmd, t, llnode, tvc_completion_list) { se_cmd = &cmd->tvc_se_cmd; @@ -660,6 +663,8 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) vhost_scsi_release_cmd_res(se_cmd); } + mutex_unlock(&svq->vq.mutex); + if (signal) vhost_signal(&svq->vs->dev, &svq->vq); } @@ -1432,8 +1437,11 @@ static void vhost_scsi_tmf_resp_work(struct vhost_work *work) else resp_code = VIRTIO_SCSI_S_FUNCTION_REJECTED; + mutex_lock(&tmf->svq->vq.mutex); vhost_scsi_send_tmf_resp(tmf->vhost, &tmf->svq->vq, tmf->in_iovs, tmf->vq_desc, &tmf->resp_iov, resp_code); + mutex_unlock(&tmf->svq->vq.mutex); + vhost_scsi_release_tmf_res(tmf); } From b182687135474d7ed905a07cc6cb2734b359e13e Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Wed, 2 Apr 2025 23:29:47 -0700 Subject: [PATCH 0899/2924] vhost-scsi: Fix vhost_scsi_send_bad_target() Although the support of VIRTIO_F_ANY_LAYOUT + VIRTIO_F_VERSION_1 was signaled by the commit 664ed90e621c ("vhost/scsi: Set VIRTIO_F_ANY_LAYOUT + VIRTIO_F_VERSION_1 feature bits"), vhost_scsi_send_bad_target() still assumes the response in a single descriptor. In addition, although vhost_scsi_send_bad_target() is used by both I/O queue and control queue, the response header is always virtio_scsi_cmd_resp. It is required to use virtio_scsi_ctrl_tmf_resp or virtio_scsi_ctrl_an_resp for control queue. Fixes: 664ed90e621c ("vhost/scsi: Set VIRTIO_F_ANY_LAYOUT + VIRTIO_F_VERSION_1 feature bits") Signed-off-by: Dongli Zhang Acked-by: Jason Wang Reviewed-by: Mike Christie Message-Id: <20250403063028.16045-3-dongli.zhang@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/scsi.c | 48 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index f846f2aa7c8702..59d907b94c5e5b 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1015,23 +1015,46 @@ vhost_scsi_send_status(struct vhost_scsi *vs, struct vhost_virtqueue *vq, pr_err("Faulted on virtio_scsi_cmd_resp\n"); } +#define TYPE_IO_CMD 0 +#define TYPE_CTRL_TMF 1 +#define TYPE_CTRL_AN 2 + static void vhost_scsi_send_bad_target(struct vhost_scsi *vs, struct vhost_virtqueue *vq, - int head, unsigned out) + struct vhost_scsi_ctx *vc, int type) { - struct virtio_scsi_cmd_resp __user *resp; - struct virtio_scsi_cmd_resp rsp; + union { + struct virtio_scsi_cmd_resp cmd; + struct virtio_scsi_ctrl_tmf_resp tmf; + struct virtio_scsi_ctrl_an_resp an; + } rsp; + struct iov_iter iov_iter; + size_t rsp_size; int ret; memset(&rsp, 0, sizeof(rsp)); - rsp.response = VIRTIO_SCSI_S_BAD_TARGET; - resp = vq->iov[out].iov_base; - ret = __copy_to_user(resp, &rsp, sizeof(rsp)); - if (!ret) - vhost_add_used_and_signal(&vs->dev, vq, head, 0); + + if (type == TYPE_IO_CMD) { + rsp_size = sizeof(struct virtio_scsi_cmd_resp); + rsp.cmd.response = VIRTIO_SCSI_S_BAD_TARGET; + } else if (type == TYPE_CTRL_TMF) { + rsp_size = sizeof(struct virtio_scsi_ctrl_tmf_resp); + rsp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET; + } else { + rsp_size = sizeof(struct virtio_scsi_ctrl_an_resp); + rsp.an.response = VIRTIO_SCSI_S_BAD_TARGET; + } + + iov_iter_init(&iov_iter, ITER_DEST, &vq->iov[vc->out], vc->in, + rsp_size); + + ret = copy_to_iter(&rsp, rsp_size, &iov_iter); + + if (likely(ret == rsp_size)) + vhost_add_used_and_signal(&vs->dev, vq, vc->head, 0); else - pr_err("Faulted on virtio_scsi_cmd_resp\n"); + pr_err("Faulted on virtio scsi type=%d\n", type); } static int @@ -1395,7 +1418,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) if (ret == -ENXIO) break; else if (ret == -EIO) - vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out); + vhost_scsi_send_bad_target(vs, vq, &vc, TYPE_IO_CMD); else if (ret == -ENOMEM) vhost_scsi_send_status(vs, vq, vc.head, vc.out, SAM_STAT_TASK_SET_FULL); @@ -1631,7 +1654,10 @@ vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) if (ret == -ENXIO) break; else if (ret == -EIO) - vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out); + vhost_scsi_send_bad_target(vs, vq, &vc, + v_req.type == VIRTIO_SCSI_T_TMF ? + TYPE_CTRL_TMF : + TYPE_CTRL_AN); } while (likely(!vhost_exceeds_weight(vq, ++c, 0))); out: mutex_unlock(&vq->mutex); From 58465d86071b61415e25fb054201f61e83d21465 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Wed, 2 Apr 2025 23:29:48 -0700 Subject: [PATCH 0900/2924] vhost-scsi: Fix vhost_scsi_send_status() Although the support of VIRTIO_F_ANY_LAYOUT + VIRTIO_F_VERSION_1 was signaled by the commit 664ed90e621c ("vhost/scsi: Set VIRTIO_F_ANY_LAYOUT + VIRTIO_F_VERSION_1 feature bits"), vhost_scsi_send_bad_target() still assumes the response in a single descriptor. Similar issue in vhost_scsi_send_bad_target() has been fixed in previous commit. In addition, similar issue for vhost_scsi_complete_cmd_work() has been fixed by the commit 6dd88fd59da8 ("vhost-scsi: unbreak any layout for response"). Fixes: 3ca51662f818 ("vhost-scsi: Add better resource allocation failure handling") Signed-off-by: Dongli Zhang Acked-by: Jason Wang Reviewed-by: Mike Christie Message-Id: <20250403063028.16045-4-dongli.zhang@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/scsi.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 59d907b94c5e5b..26bcf3a7f70cb2 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -999,18 +999,22 @@ static void vhost_scsi_target_queue_cmd(struct vhost_scsi_nexus *nexus, static void vhost_scsi_send_status(struct vhost_scsi *vs, struct vhost_virtqueue *vq, - int head, unsigned int out, u8 status) + struct vhost_scsi_ctx *vc, u8 status) { - struct virtio_scsi_cmd_resp __user *resp; struct virtio_scsi_cmd_resp rsp; + struct iov_iter iov_iter; int ret; memset(&rsp, 0, sizeof(rsp)); rsp.status = status; - resp = vq->iov[out].iov_base; - ret = __copy_to_user(resp, &rsp, sizeof(rsp)); - if (!ret) - vhost_add_used_and_signal(&vs->dev, vq, head, 0); + + iov_iter_init(&iov_iter, ITER_DEST, &vq->iov[vc->out], vc->in, + sizeof(rsp)); + + ret = copy_to_iter(&rsp, sizeof(rsp), &iov_iter); + + if (likely(ret == sizeof(rsp))) + vhost_add_used_and_signal(&vs->dev, vq, vc->head, 0); else pr_err("Faulted on virtio_scsi_cmd_resp\n"); } @@ -1420,7 +1424,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) else if (ret == -EIO) vhost_scsi_send_bad_target(vs, vq, &vc, TYPE_IO_CMD); else if (ret == -ENOMEM) - vhost_scsi_send_status(vs, vq, vc.head, vc.out, + vhost_scsi_send_status(vs, vq, &vc, SAM_STAT_TASK_SET_FULL); } while (likely(!vhost_exceeds_weight(vq, ++c, 0))); out: From 1d2d8524eaffc4d9a116213520d2c650e07c9cc6 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Thu, 17 Apr 2025 11:52:39 -0500 Subject: [PATCH 0901/2924] iio: imu: inv_mpu6050: align buffer for timestamp Align the buffer used with iio_push_to_buffers_with_timestamp() to ensure the s64 timestamp is aligned to 8 bytes. Fixes: 0829edc43e0a ("iio: imu: inv_mpu6050: read the full fifo when processing data") Signed-off-by: David Lechner Link: https://patch.msgid.link/20250417-iio-more-timestamp-alignment-v1-7-eafac1e22318@baylibre.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c index 3d3b27f28c9d1c..273196e647a2b5 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c @@ -50,7 +50,7 @@ irqreturn_t inv_mpu6050_read_fifo(int irq, void *p) u16 fifo_count; u32 fifo_period; s64 timestamp; - u8 data[INV_MPU6050_OUTPUT_DATA_SIZE]; + u8 data[INV_MPU6050_OUTPUT_DATA_SIZE] __aligned(8); size_t i, nb; mutex_lock(&st->lock); From bb49d940344bcb8e2b19e69d7ac86f567887ea9a Mon Sep 17 00:00:00 2001 From: David Lechner Date: Thu, 17 Apr 2025 11:52:37 -0500 Subject: [PATCH 0902/2924] iio: chemical: sps30: use aligned_s64 for timestamp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow the pattern of other drivers and use aligned_s64 for the timestamp. This will ensure that the timestamp is correctly aligned on all architectures. Fixes: a5bf6fdd19c3 ("iio:chemical:sps30: Fix timestamp alignment") Signed-off-by: David Lechner Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250417-iio-more-timestamp-alignment-v1-5-eafac1e22318@baylibre.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/sps30.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/chemical/sps30.c b/drivers/iio/chemical/sps30.c index 6f4f2ba2c09d5e..a7888146188d09 100644 --- a/drivers/iio/chemical/sps30.c +++ b/drivers/iio/chemical/sps30.c @@ -108,7 +108,7 @@ static irqreturn_t sps30_trigger_handler(int irq, void *p) int ret; struct { s32 data[4]; /* PM1, PM2P5, PM4, PM10 */ - s64 ts; + aligned_s64 ts; } scan; mutex_lock(&state->lock); From 6ffa698674053e82e811520642db2650d00d2c01 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Thu, 17 Apr 2025 11:52:36 -0500 Subject: [PATCH 0903/2924] iio: chemical: pms7003: use aligned_s64 for timestamp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow the pattern of other drivers and use aligned_s64 for the timestamp. This will ensure that the timestamp is correctly aligned on all architectures. Also move the unaligned.h header while touching this since it was the only one not in alphabetical order. Fixes: 13e945631c2f ("iio:chemical:pms7003: Fix timestamp alignment and prevent data leak.") Signed-off-by: David Lechner Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250417-iio-more-timestamp-alignment-v1-4-eafac1e22318@baylibre.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/pms7003.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iio/chemical/pms7003.c b/drivers/iio/chemical/pms7003.c index d0bd94912e0a34..e05ce1f12065c6 100644 --- a/drivers/iio/chemical/pms7003.c +++ b/drivers/iio/chemical/pms7003.c @@ -5,7 +5,6 @@ * Copyright (c) Tomasz Duszynski */ -#include #include #include #include @@ -19,6 +18,8 @@ #include #include #include +#include +#include #define PMS7003_DRIVER_NAME "pms7003" @@ -76,7 +77,7 @@ struct pms7003_state { /* Used to construct scan to push to the IIO buffer */ struct { u16 data[3]; /* PM1, PM2P5, PM10 */ - s64 ts; + aligned_s64 ts; } scan; }; From f79aeb6c631b57395f37acbfbe59727e355a714c Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 13 Apr 2025 11:34:36 +0100 Subject: [PATCH 0904/2924] iio: temp: maxim-thermocouple: Fix potential lack of DMA safe buffer. The trick of using __aligned(IIO_DMA_MINALIGN) ensures that there is no overlap between buffers used for DMA and those used for driver state storage that are before the marking. It doesn't ensure anything above state variables found after the marking. Hence move this particular bit of state earlier in the structure. Fixes: 10897f34309b ("iio: temp: maxim_thermocouple: Fix alignment for DMA safety") Reviewed-by: David Lechner Link: https://patch.msgid.link/20250413103443.2420727-14-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/temperature/maxim_thermocouple.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/temperature/maxim_thermocouple.c b/drivers/iio/temperature/maxim_thermocouple.c index c28a7a6dea5f12..555a61e2f3fdd1 100644 --- a/drivers/iio/temperature/maxim_thermocouple.c +++ b/drivers/iio/temperature/maxim_thermocouple.c @@ -121,9 +121,9 @@ static const struct maxim_thermocouple_chip maxim_thermocouple_chips[] = { struct maxim_thermocouple_data { struct spi_device *spi; const struct maxim_thermocouple_chip *chip; + char tc_type; u8 buffer[16] __aligned(IIO_DMA_MINALIGN); - char tc_type; }; static int maxim_thermocouple_read(struct maxim_thermocouple_data *data, From 1bb942287e05dc4c304a003ea85e6dd9a5e7db39 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 13 Apr 2025 11:34:27 +0100 Subject: [PATCH 0905/2924] iio: accel: adxl355: Make timestamp 64-bit aligned using aligned_s64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The IIO ABI requires 64-bit aligned timestamps. In this case insufficient padding would have been added on architectures where an s64 is only 32-bit aligned. Use aligned_s64 to enforce the correct alignment. Fixes: 327a0eaf19d5 ("iio: accel: adxl355: Add triggered buffer support") Reported-by: David Lechner Reviewed-by: Nuno Sá Reviewed-by: David Lechner Link: https://patch.msgid.link/20250413103443.2420727-5-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/accel/adxl355_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/accel/adxl355_core.c b/drivers/iio/accel/adxl355_core.c index e8cd21fa77a698..cbac622ef82117 100644 --- a/drivers/iio/accel/adxl355_core.c +++ b/drivers/iio/accel/adxl355_core.c @@ -231,7 +231,7 @@ struct adxl355_data { u8 transf_buf[3]; struct { u8 buf[14]; - s64 ts; + aligned_s64 ts; } buffer; } __aligned(IIO_DMA_MINALIGN); }; From 5097eaae98e53f9ab9d35801c70da819b92ca907 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 13 Apr 2025 11:34:26 +0100 Subject: [PATCH 0906/2924] iio: adc: dln2: Use aligned_s64 for timestamp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Here the lack of marking allows the overall structure to not be sufficiently aligned resulting in misplacement of the timestamp in iio_push_to_buffers_with_timestamp(). Use aligned_s64 to force the alignment on all architectures. Fixes: 7c0299e879dd ("iio: adc: Add support for DLN2 ADC") Reported-by: David Lechner Reviewed-by: Andy Shevchenko Reviewed-by: Nuno Sá Reviewed-by: David Lechner Link: https://patch.msgid.link/20250413103443.2420727-4-jic23@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/dln2-adc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/dln2-adc.c b/drivers/iio/adc/dln2-adc.c index a1e48a756a7b51..359e26e3f5bcfe 100644 --- a/drivers/iio/adc/dln2-adc.c +++ b/drivers/iio/adc/dln2-adc.c @@ -466,7 +466,7 @@ static irqreturn_t dln2_adc_trigger_h(int irq, void *p) struct iio_dev *indio_dev = pf->indio_dev; struct { __le16 values[DLN2_ADC_MAX_CHANNELS]; - int64_t timestamp_space; + aligned_s64 timestamp_space; } data; struct dln2_adc_get_all_vals dev_data; struct dln2_adc *dln2 = iio_priv(indio_dev); From ffbc26bc91c1f1eb3dcf5d8776e74cbae21ee13a Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 13 Apr 2025 11:34:25 +0100 Subject: [PATCH 0907/2924] iio: adc: ad7768-1: Fix insufficient alignment of timestamp. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On architectures where an s64 is not 64-bit aligned, this may result insufficient alignment of the timestamp and the structure being too small. Use aligned_s64 to force the alignment. Fixes: a1caeebab07e ("iio: adc: ad7768-1: Fix too small buffer passed to iio_push_to_buffers_with_timestamp()") # aligned_s64 newer Reported-by: David Lechner Reviewed-by: Nuno Sá Reviewed-by: David Lechner Link: https://patch.msgid.link/20250413103443.2420727-3-jic23@kernel.org Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7768-1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad7768-1.c b/drivers/iio/adc/ad7768-1.c index 5a863005aca6d1..5e0be36af0c5c2 100644 --- a/drivers/iio/adc/ad7768-1.c +++ b/drivers/iio/adc/ad7768-1.c @@ -168,7 +168,7 @@ struct ad7768_state { union { struct { __be32 chan; - s64 timestamp; + aligned_s64 timestamp; } scan; __be32 d32; u8 d8[2]; From 52d349884738c346961e153f195f4c7fe186fcf4 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 13 Apr 2025 11:34:24 +0100 Subject: [PATCH 0908/2924] iio: adc: ad7266: Fix potential timestamp alignment issue. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On architectures where an s64 is only 32-bit aligned insufficient padding would be left between the earlier elements and the timestamp. Use aligned_s64 to enforce the correct placement and ensure the storage is large enough. Fixes: 54e018da3141 ("iio:ad7266: Mark transfer buffer as __be16") # aligned_s64 is much newer. Reported-by: David Lechner Reviewed-by: Nuno Sá Reviewed-by: David Lechner Link: https://patch.msgid.link/20250413103443.2420727-2-jic23@kernel.org Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7266.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad7266.c b/drivers/iio/adc/ad7266.c index 18559757f9085c..7fef2727f89e98 100644 --- a/drivers/iio/adc/ad7266.c +++ b/drivers/iio/adc/ad7266.c @@ -45,7 +45,7 @@ struct ad7266_state { */ struct { __be16 sample[2]; - s64 timestamp; + aligned_s64 timestamp; } data __aligned(IIO_DMA_MINALIGN); }; From 2e922956277187655ed9bedf7b5c28906e51708f Mon Sep 17 00:00:00 2001 From: Gabriel Shahrouzi Date: Mon, 14 Apr 2025 11:40:49 -0400 Subject: [PATCH 0909/2924] staging: iio: adc: ad7816: Correct conditional logic for store mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mode setting logic in ad7816_store_mode was reversed due to incorrect handling of the strcmp return value. strcmp returns 0 on match, so the `if (strcmp(buf, "full"))` block executed when the input was not "full". This resulted in "full" setting the mode to AD7816_PD (power-down) and other inputs setting it to AD7816_FULL. Fix this by checking it against 0 to correctly check for "full" and "power-down", mapping them to AD7816_FULL and AD7816_PD respectively. Fixes: 7924425db04a ("staging: iio: adc: new driver for AD7816 devices") Cc: stable@vger.kernel.org Signed-off-by: Gabriel Shahrouzi Acked-by: Nuno Sá Link: https://lore.kernel.org/stable/20250414152920.467505-1-gshahrouzi%40gmail.com Link: https://patch.msgid.link/20250414154050.469482-1-gshahrouzi@gmail.com Signed-off-by: Jonathan Cameron --- drivers/staging/iio/adc/ad7816.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/iio/adc/ad7816.c b/drivers/staging/iio/adc/ad7816.c index 6c14d7bcdd6750..081b17f498638b 100644 --- a/drivers/staging/iio/adc/ad7816.c +++ b/drivers/staging/iio/adc/ad7816.c @@ -136,7 +136,7 @@ static ssize_t ad7816_store_mode(struct device *dev, struct iio_dev *indio_dev = dev_to_iio_dev(dev); struct ad7816_chip_info *chip = iio_priv(indio_dev); - if (strcmp(buf, "full")) { + if (strcmp(buf, "full") == 0) { gpiod_set_value(chip->rdwr_pin, 1); chip->mode = AD7816_FULL; } else { From d481ee35247d2a01764667a25f6f512c292ba42d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 18 Apr 2025 10:12:08 -0400 Subject: [PATCH 0910/2924] tracing: selftests: Add testing a user string to filters Running the following commands was broken: # cd /sys/kernel/tracing # echo "filename.ustring ~ \"/proc*\"" > events/syscalls/sys_enter_openat/filter # echo 1 > events/syscalls/sys_enter_openat/enable # ls /proc/$$/maps # cat trace And would produce nothing when it should have produced something like: ls-1192 [007] ..... 8169.828333: sys_openat(dfd: ffffffffffffff9c, filename: 7efc18359904, flags: 80000, mode: 0) Add a test to check this case so that it will be caught if it breaks again. Link: https://lore.kernel.org/linux-trace-kernel/20250417183003.505835fb@gandalf.local.home/ Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Shuah Khan Link: https://lore.kernel.org/20250418101208.38dc81f5@gandalf.local.home Signed-off-by: Steven Rostedt (Google) --- .../test.d/filter/event-filter-function.tc | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc index 118247b8dd84d8..c62165fabd0ce1 100644 --- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc +++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc @@ -80,6 +80,26 @@ if [ $misscnt -gt 0 ]; then exit_fail fi +# Check strings too +if [ -f events/syscalls/sys_enter_openat/filter ]; then + DIRNAME=`basename $TMPDIR` + echo "filename.ustring ~ \"*$DIRNAME*\"" > events/syscalls/sys_enter_openat/filter + echo 1 > events/syscalls/sys_enter_openat/enable + echo 1 > tracing_on + ls /bin/sh + nocnt=`grep openat trace | wc -l` + ls $TMPDIR + echo 0 > tracing_on + hitcnt=`grep openat trace | wc -l`; + echo 0 > events/syscalls/sys_enter_openat/enable + if [ $nocnt -gt 0 ]; then + exit_fail + fi + if [ $hitcnt -eq 0 ]; then + exit_fail + fi +fi + reset_events_filter exit 0 From dc915672f9176799e48ac23a155f48742b15ec6c Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 17 Apr 2025 17:29:33 -0700 Subject: [PATCH 0911/2924] cxl: Fix devm host device for CXL fwctl initialization Testing revealed the following error message for a CXL memdev that has Feature support: [ 56.690430] cxl mem0: Resources present before probing Attach the allocation of cxl_fwctl to the parent device of cxl_memdev. devm_add_* calls for cxl_memdev should not happen before the memdev probe function or outside the scope of the memdev driver. cxl_test missed this bug because cxl_test always arranges for the cxl_mem driver to be loaded before cxl_mock_mem runs. So the driver core always finds the devres list idle in that case. [DJ: Updated subject title and added commit log suggestion from djbw] Fixes: 858ce2f56b52 ("cxl: Add FWCTL support to CXL") Reviewed-by: Dan Williams Reviewed-by: Alison Schofield Link: https://lore.kernel.org/linux-cxl/6801aea053466_71fe2944c@dwillia2-xfh.jf.intel.com.notmuch/ Link: https://patch.msgid.link/20250418002933.406439-1-dave.jiang@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/features.c | 4 ++-- drivers/cxl/pci.c | 2 +- include/cxl/features.h | 5 +++-- tools/testing/cxl/test/mem.c | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/cxl/core/features.c b/drivers/cxl/core/features.c index f4daefe3180e52..150a1776480ab0 100644 --- a/drivers/cxl/core/features.c +++ b/drivers/cxl/core/features.c @@ -677,7 +677,7 @@ static void free_memdev_fwctl(void *_fwctl_dev) fwctl_put(fwctl_dev); } -int devm_cxl_setup_fwctl(struct cxl_memdev *cxlmd) +int devm_cxl_setup_fwctl(struct device *host, struct cxl_memdev *cxlmd) { struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_features_state *cxlfs; @@ -700,7 +700,7 @@ int devm_cxl_setup_fwctl(struct cxl_memdev *cxlmd) if (rc) return rc; - return devm_add_action_or_reset(&cxlmd->dev, free_memdev_fwctl, + return devm_add_action_or_reset(host, free_memdev_fwctl, no_free_ptr(fwctl_dev)); } EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_fwctl, "CXL"); diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 7b14a154463c59..785aa2af5eaac4 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -1018,7 +1018,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; - rc = devm_cxl_setup_fwctl(cxlmd); + rc = devm_cxl_setup_fwctl(&pdev->dev, cxlmd); if (rc) dev_dbg(&pdev->dev, "No CXL FWCTL setup\n"); diff --git a/include/cxl/features.h b/include/cxl/features.h index a3bb34694c06d8..5f7f842765a5f3 100644 --- a/include/cxl/features.h +++ b/include/cxl/features.h @@ -66,7 +66,7 @@ struct cxl_memdev; #ifdef CONFIG_CXL_FEATURES inline struct cxl_features_state *to_cxlfs(struct cxl_dev_state *cxlds); int devm_cxl_setup_features(struct cxl_dev_state *cxlds); -int devm_cxl_setup_fwctl(struct cxl_memdev *cxlmd); +int devm_cxl_setup_fwctl(struct device *host, struct cxl_memdev *cxlmd); #else static inline struct cxl_features_state *to_cxlfs(struct cxl_dev_state *cxlds) { @@ -78,7 +78,8 @@ static inline int devm_cxl_setup_features(struct cxl_dev_state *cxlds) return -EOPNOTSUPP; } -static inline int devm_cxl_setup_fwctl(struct cxl_memdev *cxlmd) +static inline int devm_cxl_setup_fwctl(struct device *host, + struct cxl_memdev *cxlmd) { return -EOPNOTSUPP; } diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index f2957a3e36fe7f..bf9caa908f894c 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -1780,7 +1780,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (rc) return rc; - rc = devm_cxl_setup_fwctl(cxlmd); + rc = devm_cxl_setup_fwctl(&pdev->dev, cxlmd); if (rc) dev_dbg(dev, "No CXL FWCTL setup\n"); From 25174d5cd22f0977034892672a0287f7febcec1c Mon Sep 17 00:00:00 2001 From: Li Ming Date: Thu, 10 Apr 2025 10:45:21 +0800 Subject: [PATCH 0912/2924] cxl/feature: Update out_len in set feature failure case CXL subsystem supports userspace to configure features via fwctl interface, it will configure features by using Set Feature command. Whatever Set Feature succeeds or fails, CXL driver always needs to return a structure fwctl_rpc_cxl_out to caller, and returned size is updated in a out_len parameter. The out_len should be updated not only when the set feature succeeds, but also when the set feature fails. Fixes: eb5dfcb9e36d ("cxl: Add support to handle user feature commands for set feature") Signed-off-by: Li Ming Reviewed-by: Dave Jiang Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250410024521.514095-1-ming.li@zohomail.com Signed-off-by: Dave Jiang --- drivers/cxl/core/features.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/features.c b/drivers/cxl/core/features.c index 150a1776480ab0..1498e2369c3702 100644 --- a/drivers/cxl/core/features.c +++ b/drivers/cxl/core/features.c @@ -528,13 +528,13 @@ static void *cxlctl_set_feature(struct cxl_features_state *cxlfs, rc = cxl_set_feature(cxl_mbox, &feat_in->uuid, feat_in->version, feat_in->feat_data, data_size, flags, offset, &return_code); + *out_len = sizeof(*rpc_out); if (rc) { rpc_out->retval = return_code; return no_free_ptr(rpc_out); } rpc_out->retval = CXL_MBOX_CMD_RC_SUCCESS; - *out_len = sizeof(*rpc_out); return no_free_ptr(rpc_out); } From 117c3b21d3c79af56750f18a54f2c468f30c8a45 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 18 Apr 2025 10:31:29 +0100 Subject: [PATCH 0913/2924] arm64: Rework checks for broken Cavium HW in the PI code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calling into the MIDR checking framework from the PI code has recently become much harder, due to the new fancy "multi-MIDR" support that relies on tables being populated at boot time, but not that early that they are available to the PI code. There are additional issues with this framework, as the code really isn't position independend *at all*. This leads to some ugly breakages, as reported by Ada. It so appears that the only reason for the PI code to call into the MIDR checking code is to cope with The Most Broken ARM64 System Ever, aka Cavium ThunderX, which cannot deal with nG attributes that result of the combination of KASLR and KPTI as a consequence of Erratum 27456. Duplicate the check for the erratum in the PI code, removing the dependency on the bulk of the MIDR checking framework. This allows dropping that same check from kaslr_requires_kpti(), as the KPTI code already relies on the ARM64_WORKAROUND_CAVIUM_27456 cap. Fixes: c8c2647e69bed ("arm64: Make  _midr_in_range_list() an exported function") Reported-by: Ada Couprie Diaz Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/3d97e45a-23cf-419b-9b6f-140b4d88de7b@arm.com Cc: Catalin Marinas Cc: Will Deacon Cc: Shameer Kolothum Cc: Oliver Upton Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20250418093129.1755739-1-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/mmu.h | 11 ----------- arch/arm64/kernel/cpu_errata.c | 2 +- arch/arm64/kernel/image-vars.h | 4 ---- arch/arm64/kernel/pi/map_kernel.c | 25 ++++++++++++++++++++++++- 4 files changed, 25 insertions(+), 17 deletions(-) diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 30a29e88994ba3..6e8aa8e726015e 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -94,17 +94,6 @@ static inline bool kaslr_requires_kpti(void) return false; } - /* - * Systems affected by Cavium erratum 24756 are incompatible - * with KPTI. - */ - if (IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) { - extern const struct midr_range cavium_erratum_27456_cpus[]; - - if (is_midr_in_range_list(cavium_erratum_27456_cpus)) - return false; - } - return true; } diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index b55f5f7057502c..6b0ad5070d3e00 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -335,7 +335,7 @@ static const struct midr_range cavium_erratum_23154_cpus[] = { #endif #ifdef CONFIG_CAVIUM_ERRATUM_27456 -const struct midr_range cavium_erratum_27456_cpus[] = { +static const struct midr_range cavium_erratum_27456_cpus[] = { /* Cavium ThunderX, T88 pass 1.x - 2.1 */ MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 1), /* Cavium ThunderX, T81 pass 1.0 */ diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 5e3c4b58f27904..2004b4f41ade68 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -47,10 +47,6 @@ PROVIDE(__pi_id_aa64smfr0_override = id_aa64smfr0_override); PROVIDE(__pi_id_aa64zfr0_override = id_aa64zfr0_override); PROVIDE(__pi_arm64_sw_feature_override = arm64_sw_feature_override); PROVIDE(__pi_arm64_use_ng_mappings = arm64_use_ng_mappings); -#ifdef CONFIG_CAVIUM_ERRATUM_27456 -PROVIDE(__pi_cavium_erratum_27456_cpus = cavium_erratum_27456_cpus); -PROVIDE(__pi_is_midr_in_range_list = is_midr_in_range_list); -#endif PROVIDE(__pi__ctype = _ctype); PROVIDE(__pi_memstart_offset_seed = memstart_offset_seed); diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c index e57b043f324b51..c6650cfe706c34 100644 --- a/arch/arm64/kernel/pi/map_kernel.c +++ b/arch/arm64/kernel/pi/map_kernel.c @@ -207,6 +207,29 @@ static void __init map_fdt(u64 fdt) dsb(ishst); } +/* + * PI version of the Cavium Eratum 27456 detection, which makes it + * impossible to use non-global mappings. + */ +static bool __init ng_mappings_allowed(void) +{ + static const struct midr_range cavium_erratum_27456_cpus[] __initconst = { + /* Cavium ThunderX, T88 pass 1.x - 2.1 */ + MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 1), + /* Cavium ThunderX, T81 pass 1.0 */ + MIDR_REV(MIDR_THUNDERX_81XX, 0, 0), + {}, + }; + + for (const struct midr_range *r = cavium_erratum_27456_cpus; r->model; r++) { + if (midr_is_cpu_model_range(read_cpuid_id(), r->model, + r->rv_min, r->rv_max)) + return false; + } + + return true; +} + asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt) { static char const chosen_str[] __initconst = "/chosen"; @@ -246,7 +269,7 @@ asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt) u64 kaslr_seed = kaslr_early_init(fdt, chosen); if (kaslr_seed && kaslr_requires_kpti()) - arm64_use_ng_mappings = true; + arm64_use_ng_mappings = ng_mappings_allowed(); kaslr_offset |= kaslr_seed & ~(MIN_KIMG_ALIGN - 1); } From 0747c136753ef44a3b1434a235492ef54081b96e Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Fri, 18 Apr 2025 15:19:02 +0530 Subject: [PATCH 0914/2924] MAINTAINERS: Move Manivannan Sadhasivam as PCI Native host bridge and endpoint maintainer I'm currently maintaining the PCI endpoint subsystem and reviewing the native host bridge and endpoint drivers. However, this affects my endpoint maintainership role since I cannot merge endpoint patches that depend on the controller drivers (which is more common). Moreover, the controller driver patches would also benefit from a helping hand in maintaining them. So I'd like to step up to maintain the native host bridge and endpoint drivers together with the endpoint subsystem. Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20250418094905.9983-1-manivannan.sadhasivam@linaro.org --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 96b82704950184..7f8961d3e57c18 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18633,7 +18633,7 @@ F: drivers/pci/controller/pci-xgene-msi.c PCI NATIVE HOST BRIDGE AND ENDPOINT DRIVERS M: Lorenzo Pieralisi M: Krzysztof Wilczyński -R: Manivannan Sadhasivam +M: Manivannan Sadhasivam R: Rob Herring L: linux-pci@vger.kernel.org S: Supported From 9d78f02503227d3554d26cf8ca73276105c98f3e Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 17 Mar 2025 08:00:06 -0700 Subject: [PATCH 0915/2924] drm/msm/a6xx+: Don't let IB_SIZE overflow IB_SIZE is only b0..b19. Starting with a6xx gen3, additional fields were added above the IB_SIZE. Accidentially setting them can cause badness. Fix this by properly defining the CP_INDIRECT_BUFFER packet and using the generated builder macro to ensure unintended bits are not set. v2: add missing type attribute for IB_BASE v3: fix offset attribute in xml Reported-by: Connor Abbott Fixes: a83366ef19ea ("drm/msm/a6xx: add A640/A650 to gpulist") Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/643396/ --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 8 ++++---- drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml | 7 +++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 1820c167fcee60..28c659c72493ae 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -242,10 +242,10 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) break; fallthrough; case MSM_SUBMIT_CMD_BUF: - OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); + OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); - OUT_RING(ring, submit->cmd[i].size); + OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); ibs++; break; } @@ -377,10 +377,10 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) break; fallthrough; case MSM_SUBMIT_CMD_BUF: - OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); + OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); - OUT_RING(ring, submit->cmd[i].size); + OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); ibs++; break; } diff --git a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml index 55a35182858cca..5a6ae9fc319451 100644 --- a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml +++ b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml @@ -2259,5 +2259,12 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) + + + + + + + From 936a25ef11f5d6c3e3e6736bb8b28e28dfb77918 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 16 Apr 2025 10:45:17 -0700 Subject: [PATCH 0916/2924] input/joystick: magellan: Mark __nonstring look-up table GCC 15's new -Wunterminated-string-initialization notices that the 16 character lookup table "nibbles" (which is not used as a C-String) needs to be marked as "nonstring": drivers/input/joystick/magellan.c: In function 'magellan_crunch_nibbles': drivers/input/joystick/magellan.c:51:44: warning: initializer-string for array of 'unsigned char' truncates NUL terminator but destination lacks 'nonstring' attribute (17 chars into 16 available) [-Wunterminated-string-initialization] 51 | static unsigned char nibbles[16] = "0AB3D56GH9:K Link: https://lore.kernel.org/r/20250416174513.work.662-kees@kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/magellan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/joystick/magellan.c b/drivers/input/joystick/magellan.c index 2eaa25c9c68c25..7622638e5bb8e0 100644 --- a/drivers/input/joystick/magellan.c +++ b/drivers/input/joystick/magellan.c @@ -48,7 +48,7 @@ struct magellan { static int magellan_crunch_nibbles(unsigned char *data, int count) { - static unsigned char nibbles[16] = "0AB3D56GH9:K Date: Fri, 18 Apr 2025 18:37:18 -0700 Subject: [PATCH 0917/2924] Input: sparcspkr - avoid unannotated fall-through Fix follow warnings with clang-21i (and reformat for clarity): drivers/input/misc/sparcspkr.c:78:3: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] 78 | case SND_TONE: break; | ^ drivers/input/misc/sparcspkr.c:78:3: note: insert 'break;' to avoid fall-through 78 | case SND_TONE: break; | ^ | break; drivers/input/misc/sparcspkr.c:113:3: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough] 113 | case SND_TONE: break; | ^ drivers/input/misc/sparcspkr.c:113:3: note: insert 'break;' to avoid fall-through 113 | case SND_TONE: break; | ^ | break; 2 warnings generated. Signed-off-by: WangYuli Link: https://lore.kernel.org/r/6730E40353C76908+20250415052439.155051-1-wangyuli@uniontech.com Signed-off-by: Dmitry Torokhov --- drivers/input/misc/sparcspkr.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/input/misc/sparcspkr.c b/drivers/input/misc/sparcspkr.c index 8d7303fc13bce3..1cfadd73829f83 100644 --- a/drivers/input/misc/sparcspkr.c +++ b/drivers/input/misc/sparcspkr.c @@ -74,9 +74,14 @@ static int bbc_spkr_event(struct input_dev *dev, unsigned int type, unsigned int return -1; switch (code) { - case SND_BELL: if (value) value = 1000; - case SND_TONE: break; - default: return -1; + case SND_BELL: + if (value) + value = 1000; + break; + case SND_TONE: + break; + default: + return -1; } if (value > 20 && value < 32767) @@ -109,9 +114,14 @@ static int grover_spkr_event(struct input_dev *dev, unsigned int type, unsigned return -1; switch (code) { - case SND_BELL: if (value) value = 1000; - case SND_TONE: break; - default: return -1; + case SND_BELL: + if (value) + value = 1000; + break; + case SND_TONE: + break; + default: + return -1; } if (value > 20 && value < 32767) From c6cb8bf79466ae66bd0d07338c7c505ce758e9d7 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Thu, 10 Apr 2025 14:46:32 -0400 Subject: [PATCH 0918/2924] Input: cyttsp5 - ensure minimum reset pulse width The current reset pulse width is measured to be 5us on a Renesas RZ/G2L SOM. The manufacturer's minimum reset pulse width is specified as 10us. Extend reset pulse width to make sure it is long enough on all platforms. Also reword confusing comments about reset pin assertion. Fixes: 5b0c03e24a06 ("Input: Add driver for Cypress Generation 5 touchscreen") Cc: stable@vger.kernel.org Acked-by: Alistair Francis Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20250410184633.1164837-1-hugo@hugovil.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/cyttsp5.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/cyttsp5.c b/drivers/input/touchscreen/cyttsp5.c index eafe5a9b896484..14c43f0a6c217d 100644 --- a/drivers/input/touchscreen/cyttsp5.c +++ b/drivers/input/touchscreen/cyttsp5.c @@ -870,13 +870,16 @@ static int cyttsp5_probe(struct device *dev, struct regmap *regmap, int irq, ts->input->phys = ts->phys; input_set_drvdata(ts->input, ts); - /* Reset the gpio to be in a reset state */ + /* Assert gpio to be in a reset state */ ts->reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); if (IS_ERR(ts->reset_gpio)) { error = PTR_ERR(ts->reset_gpio); dev_err(dev, "Failed to request reset gpio, error %d\n", error); return error; } + + fsleep(10); /* Ensure long-enough reset pulse (minimum 10us). */ + gpiod_set_value_cansleep(ts->reset_gpio, 0); /* Need a delay to have device up */ From aece1cf146741761a1243746db5b72f5ece68290 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 12 Apr 2025 13:36:51 +0800 Subject: [PATCH 0919/2924] Revert "crypto: testmgr - Add multibuffer acomp testing" This reverts commit 99585c2192cb1ce212876e82ef01d1c98c7f4699. Remove the acomp multibuffer tests as they are buggy. Reported-by: Dmitry Antipov Signed-off-by: Herbert Xu --- crypto/testmgr.c | 147 +++++++++++++++++++++-------------------------- 1 file changed, 64 insertions(+), 83 deletions(-) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index abd609d4c8efee..82977ea25db390 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -58,9 +58,6 @@ module_param(fuzz_iterations, uint, 0644); MODULE_PARM_DESC(fuzz_iterations, "number of fuzz test iterations"); #endif -/* Multibuffer is unlimited. Set arbitrary limit for testing. */ -#define MAX_MB_MSGS 16 - #ifdef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS /* a perfect nop */ @@ -3329,48 +3326,27 @@ static int test_acomp(struct crypto_acomp *tfm, int ctcount, int dtcount) { const char *algo = crypto_tfm_alg_driver_name(crypto_acomp_tfm(tfm)); - struct scatterlist *src = NULL, *dst = NULL; - struct acomp_req *reqs[MAX_MB_MSGS] = {}; - char *decomp_out[MAX_MB_MSGS] = {}; - char *output[MAX_MB_MSGS] = {}; - struct crypto_wait wait; - struct acomp_req *req; - int ret = -ENOMEM; unsigned int i; + char *output, *decomp_out; + int ret; + struct scatterlist src, dst; + struct acomp_req *req; + struct crypto_wait wait; - src = kmalloc_array(MAX_MB_MSGS, sizeof(*src), GFP_KERNEL); - if (!src) - goto out; - dst = kmalloc_array(MAX_MB_MSGS, sizeof(*dst), GFP_KERNEL); - if (!dst) - goto out; - - for (i = 0; i < MAX_MB_MSGS; i++) { - reqs[i] = acomp_request_alloc(tfm); - if (!reqs[i]) - goto out; - - acomp_request_set_callback(reqs[i], - CRYPTO_TFM_REQ_MAY_SLEEP | - CRYPTO_TFM_REQ_MAY_BACKLOG, - crypto_req_done, &wait); - if (i) - acomp_request_chain(reqs[i], reqs[0]); - - output[i] = kmalloc(COMP_BUF_SIZE, GFP_KERNEL); - if (!output[i]) - goto out; + output = kmalloc(COMP_BUF_SIZE, GFP_KERNEL); + if (!output) + return -ENOMEM; - decomp_out[i] = kmalloc(COMP_BUF_SIZE, GFP_KERNEL); - if (!decomp_out[i]) - goto out; + decomp_out = kmalloc(COMP_BUF_SIZE, GFP_KERNEL); + if (!decomp_out) { + kfree(output); + return -ENOMEM; } for (i = 0; i < ctcount; i++) { unsigned int dlen = COMP_BUF_SIZE; int ilen = ctemplate[i].inlen; void *input_vec; - int j; input_vec = kmemdup(ctemplate[i].input, ilen, GFP_KERNEL); if (!input_vec) { @@ -3378,61 +3354,70 @@ static int test_acomp(struct crypto_acomp *tfm, goto out; } + memset(output, 0, dlen); crypto_init_wait(&wait); - sg_init_one(src, input_vec, ilen); + sg_init_one(&src, input_vec, ilen); + sg_init_one(&dst, output, dlen); - for (j = 0; j < MAX_MB_MSGS; j++) { - sg_init_one(dst + j, output[j], dlen); - acomp_request_set_params(reqs[j], src, dst + j, ilen, dlen); + req = acomp_request_alloc(tfm); + if (!req) { + pr_err("alg: acomp: request alloc failed for %s\n", + algo); + kfree(input_vec); + ret = -ENOMEM; + goto out; } - req = reqs[0]; + acomp_request_set_params(req, &src, &dst, ilen, dlen); + acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + crypto_req_done, &wait); + ret = crypto_wait_req(crypto_acomp_compress(req), &wait); if (ret) { pr_err("alg: acomp: compression failed on test %d for %s: ret=%d\n", i + 1, algo, -ret); kfree(input_vec); + acomp_request_free(req); goto out; } ilen = req->dlen; dlen = COMP_BUF_SIZE; + sg_init_one(&src, output, ilen); + sg_init_one(&dst, decomp_out, dlen); crypto_init_wait(&wait); - for (j = 0; j < MAX_MB_MSGS; j++) { - sg_init_one(src + j, output[j], ilen); - sg_init_one(dst + j, decomp_out[j], dlen); - acomp_request_set_params(reqs[j], src + j, dst + j, ilen, dlen); - } - - crypto_wait_req(crypto_acomp_decompress(req), &wait); - for (j = 0; j < MAX_MB_MSGS; j++) { - ret = reqs[j]->base.err; - if (ret) { - pr_err("alg: acomp: compression failed on test %d (%d) for %s: ret=%d\n", - i + 1, j, algo, -ret); - kfree(input_vec); - goto out; - } + acomp_request_set_params(req, &src, &dst, ilen, dlen); - if (reqs[j]->dlen != ctemplate[i].inlen) { - pr_err("alg: acomp: Compression test %d (%d) failed for %s: output len = %d\n", - i + 1, j, algo, reqs[j]->dlen); - ret = -EINVAL; - kfree(input_vec); - goto out; - } + ret = crypto_wait_req(crypto_acomp_decompress(req), &wait); + if (ret) { + pr_err("alg: acomp: compression failed on test %d for %s: ret=%d\n", + i + 1, algo, -ret); + kfree(input_vec); + acomp_request_free(req); + goto out; + } - if (memcmp(input_vec, decomp_out[j], reqs[j]->dlen)) { - pr_err("alg: acomp: Compression test %d (%d) failed for %s\n", - i + 1, j, algo); - hexdump(output[j], reqs[j]->dlen); - ret = -EINVAL; - kfree(input_vec); - goto out; - } + if (req->dlen != ctemplate[i].inlen) { + pr_err("alg: acomp: Compression test %d failed for %s: output len = %d\n", + i + 1, algo, req->dlen); + ret = -EINVAL; + kfree(input_vec); + acomp_request_free(req); + goto out; + } + + if (memcmp(input_vec, decomp_out, req->dlen)) { + pr_err("alg: acomp: Compression test %d failed for %s\n", + i + 1, algo); + hexdump(output, req->dlen); + ret = -EINVAL; + kfree(input_vec); + acomp_request_free(req); + goto out; } kfree(input_vec); + acomp_request_free(req); } for (i = 0; i < dtcount; i++) { @@ -3446,9 +3431,10 @@ static int test_acomp(struct crypto_acomp *tfm, goto out; } + memset(output, 0, dlen); crypto_init_wait(&wait); - sg_init_one(src, input_vec, ilen); - sg_init_one(dst, output[0], dlen); + sg_init_one(&src, input_vec, ilen); + sg_init_one(&dst, output, dlen); req = acomp_request_alloc(tfm); if (!req) { @@ -3459,7 +3445,7 @@ static int test_acomp(struct crypto_acomp *tfm, goto out; } - acomp_request_set_params(req, src, dst, ilen, dlen); + acomp_request_set_params(req, &src, &dst, ilen, dlen); acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, crypto_req_done, &wait); @@ -3481,10 +3467,10 @@ static int test_acomp(struct crypto_acomp *tfm, goto out; } - if (memcmp(output[0], dtemplate[i].output, req->dlen)) { + if (memcmp(output, dtemplate[i].output, req->dlen)) { pr_err("alg: acomp: Decompression test %d failed for %s\n", i + 1, algo); - hexdump(output[0], req->dlen); + hexdump(output, req->dlen); ret = -EINVAL; kfree(input_vec); acomp_request_free(req); @@ -3498,13 +3484,8 @@ static int test_acomp(struct crypto_acomp *tfm, ret = 0; out: - acomp_request_free(reqs[0]); - for (i = 0; i < MAX_MB_MSGS; i++) { - kfree(output[i]); - kfree(decomp_out[i]); - } - kfree(dst); - kfree(src); + kfree(decomp_out); + kfree(output); return ret; } From 712dc3e7b979ae9ae1afeb1e87ec92ebb72b9529 Mon Sep 17 00:00:00 2001 From: Asahi Lina Date: Wed, 9 Apr 2025 18:11:21 +0900 Subject: [PATCH 0920/2924] mailmap: Update email for Asahi Lina Add an alias so I can more easily filter kernel-related emails. Signed-off-by: Asahi Lina Link: https://lore.kernel.org/r/20250409-mailmap-lina-email-v1-1-265d05848ae3@asahilina.net Signed-off-by: Sven Peter --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 4f7cd8e231778c..8f0cc0b08ed0c8 100644 --- a/.mailmap +++ b/.mailmap @@ -102,6 +102,7 @@ Ard Biesheuvel Arnaud Patard Arnd Bergmann Arun Kumar Neelakantam +Asahi Lina Ashok Raj Nagarajan Ashwin Chaugule Asutosh Das From dc5befecbe2683ac49fc8dc76aade35e62f4cf30 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Wed, 16 Apr 2025 20:06:18 +0200 Subject: [PATCH 0921/2924] arm64: dts: apple: touchbar: Mark ps_dispdfr_be as always-on The driver depends on boot loader initialized state which resets when the ps_dispdfr_be power-domain is powered off. This happens on suspend or when the driver is missing during boot. Mark the domain as always on until the driver can handle this. Fixes: 7275e795e520 ("arm64: dts: apple: Add touchbar screen nodes") Signed-off-by: Janne Grunau Reviewed-by: Alyssa Rosenzweig Link: https://lore.kernel.org/r/20250416-arm64_dts_apple_touchbar-v1-1-e1c0b53b9125@jannau.net Signed-off-by: Sven Peter --- arch/arm64/boot/dts/apple/t8103-j293.dts | 10 ++++++++++ arch/arm64/boot/dts/apple/t8112-j493.dts | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/arch/arm64/boot/dts/apple/t8103-j293.dts b/arch/arm64/boot/dts/apple/t8103-j293.dts index 2dfe7b895b2bc0..e2d9439397f71a 100644 --- a/arch/arm64/boot/dts/apple/t8103-j293.dts +++ b/arch/arm64/boot/dts/apple/t8103-j293.dts @@ -77,6 +77,16 @@ }; }; +/* + * The driver depends on boot loader initialized state which resets when this + * power-domain is powered off. This happens on suspend or when the driver is + * missing during boot. Mark the domain as always on until the driver can + * handle this. + */ +&ps_dispdfr_be { + apple,always-on; +}; + &display_dfr { status = "okay"; }; diff --git a/arch/arm64/boot/dts/apple/t8112-j493.dts b/arch/arm64/boot/dts/apple/t8112-j493.dts index 3d73f9ee2f46a3..be86d34c6696cb 100644 --- a/arch/arm64/boot/dts/apple/t8112-j493.dts +++ b/arch/arm64/boot/dts/apple/t8112-j493.dts @@ -40,6 +40,16 @@ }; }; +/* + * The driver depends on boot loader initialized state which resets when this + * power-domain is powered off. This happens on suspend or when the driver is + * missing during boot. Mark the domain as always on until the driver can + * handle this. + */ +&ps_dispdfr_be { + apple,always-on; +}; + &display_dfr { status = "okay"; }; From 83b2d345e1786fdab96fc2b52942eebde125e7cd Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 13 Apr 2025 11:08:58 +0300 Subject: [PATCH 0922/2924] x86/e820: Discard high memory that can't be addressed by 32-bit systems Dave Hansen reports the following crash on a 32-bit system with CONFIG_HIGHMEM=y and CONFIG_X86_PAE=y: > 0xf75fe000 is the mem_map[] entry for the first page >4GB. It > obviously wasn't allocated, thus the oops. BUG: unable to handle page fault for address: f75fe000 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page *pdpt = 0000000002da2001 *pde = 000000000300c067 *pte = 0000000000000000 Oops: Oops: 0002 [#1] SMP NOPTI CPU: 0 UID: 0 PID: 0 Comm: swapper Not tainted 6.15.0-rc1-00288-ge618ee89561b-dirty #311 PREEMPT(undef) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 EIP: __free_pages_core+0x3c/0x74 ... Call Trace: memblock_free_pages+0x11/0x2c memblock_free_all+0x2ce/0x3a0 mm_core_init+0xf5/0x320 start_kernel+0x296/0x79c i386_start_kernel+0xad/0xb0 startup_32_smp+0x151/0x154 The mem_map[] is allocated up to the end of ZONE_HIGHMEM which is defined by max_pfn. The bug was introduced by this recent commit: 6faea3422e3b ("arch, mm: streamline HIGHMEM freeing") Previously, freeing of high memory was also clamped to the end of ZONE_HIGHMEM but after this change, memblock_free_all() tries to free memory above the of ZONE_HIGHMEM as well and that causes access to mem_map[] entries beyond the end of the memory map. To fix this, discard the memory after max_pfn from memblock on 32-bit systems so that core MM would be aware only of actually usable memory. Fixes: 6faea3422e3b ("arch, mm: streamline HIGHMEM freeing") Reported-by: Dave Hansen Tested-by: Arnd Bergmann Signed-off-by: Mike Rapoport (Microsoft) Signed-off-by: Ingo Molnar Cc: Andy Shevchenko Cc: Arnd Bergmann Cc: Davide Ciminaghi Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Matthew Wilcox Cc: Paolo Bonzini Cc: Sean Christopherson Cc: kvm@vger.kernel.org Link: https://lore.kernel.org/r/20250413080858.743221-1-rppt@kernel.org # discussion and submission --- arch/x86/kernel/e820.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 9d8dd8deb2a702..9920122018a0b3 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1299,6 +1299,14 @@ void __init e820__memblock_setup(void) memblock_add(entry->addr, entry->size); } + /* + * 32-bit systems are limited to 4BG of memory even with HIGHMEM and + * to even less without it. + * Discard memory after max_pfn - the actual limit detected at runtime. + */ + if (IS_ENABLED(CONFIG_X86_32)) + memblock_remove(PFN_PHYS(max_pfn), -1); + /* Throw away partial pages: */ memblock_trim_memory(PAGE_SIZE); From 408e4504f97c0aa510330f0a04b7ed028fdf3154 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sat, 19 Apr 2025 22:48:59 +0200 Subject: [PATCH 0923/2924] Revert "hfs{plus}: add deprecation warning" This reverts commit ddee68c499f76ae47c011549df5be53db0057402. There's ongoing discussion about better maintenance of at least hfsplus. Rever the deprecation warning for now. Signed-off-by: Christian Brauner --- fs/hfs/super.c | 2 -- fs/hfsplus/super.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 4413cd8feb9e3a..fe09c2093a93d9 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -404,8 +404,6 @@ static int hfs_init_fs_context(struct fs_context *fc) { struct hfs_sb_info *hsb; - pr_warn("The hfs filesystem is deprecated and scheduled to be removed from the kernel in 2025\n"); - hsb = kzalloc(sizeof(struct hfs_sb_info), GFP_KERNEL); if (!hsb) return -ENOMEM; diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 58cff4b2a3b4d3..948b8aaee33e3a 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -656,8 +656,6 @@ static int hfsplus_init_fs_context(struct fs_context *fc) { struct hfsplus_sb_info *sbi; - pr_warn("The hfsplus filesystem is deprecated and scheduled to be removed from the kernel in 2025\n"); - sbi = kzalloc(sizeof(struct hfsplus_sb_info), GFP_KERNEL); if (!sbi) return -ENOMEM; From efabefb05aa1fe534ddb1839980824a763a7f1b0 Mon Sep 17 00:00:00 2001 From: Sahil Siddiq Date: Sat, 19 Apr 2025 21:18:17 +0530 Subject: [PATCH 0924/2924] openrisc: Refactor struct cpuinfo_or1k to reduce duplication The "cpuinfo_or1k" structure currently has identical data members for different cache components. Remove these fields out of struct cpuinfo_or1k and into its own struct. This reduces duplication while keeping cpuinfo_or1k extensible so more cache descriptors can be added in the future. Also add a new field "sets" to the new structure. Signed-off-by: Sahil Siddiq Signed-off-by: Stafford Horne --- arch/openrisc/include/asm/cpuinfo.h | 16 +++++----- arch/openrisc/kernel/setup.c | 45 ++++++++++++++--------------- 2 files changed, 31 insertions(+), 30 deletions(-) diff --git a/arch/openrisc/include/asm/cpuinfo.h b/arch/openrisc/include/asm/cpuinfo.h index 5e4744153d0ec5..82f5d4c06314af 100644 --- a/arch/openrisc/include/asm/cpuinfo.h +++ b/arch/openrisc/include/asm/cpuinfo.h @@ -15,16 +15,18 @@ #ifndef __ASM_OPENRISC_CPUINFO_H #define __ASM_OPENRISC_CPUINFO_H +struct cache_desc { + u32 size; + u32 sets; + u32 block_size; + u32 ways; +}; + struct cpuinfo_or1k { u32 clock_frequency; - u32 icache_size; - u32 icache_block_size; - u32 icache_ways; - - u32 dcache_size; - u32 dcache_block_size; - u32 dcache_ways; + struct cache_desc icache; + struct cache_desc dcache; u16 coreid; }; diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index be56eaafc8b957..66207cd7bb9e98 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -115,16 +115,16 @@ static void print_cpuinfo(void) if (upr & SPR_UPR_DCP) printk(KERN_INFO - "-- dcache: %4d bytes total, %2d bytes/line, %d way(s)\n", - cpuinfo->dcache_size, cpuinfo->dcache_block_size, - cpuinfo->dcache_ways); + "-- dcache: %4d bytes total, %2d bytes/line, %d set(s), %d way(s)\n", + cpuinfo->dcache.size, cpuinfo->dcache.block_size, + cpuinfo->dcache.sets, cpuinfo->dcache.ways); else printk(KERN_INFO "-- dcache disabled\n"); if (upr & SPR_UPR_ICP) printk(KERN_INFO - "-- icache: %4d bytes total, %2d bytes/line, %d way(s)\n", - cpuinfo->icache_size, cpuinfo->icache_block_size, - cpuinfo->icache_ways); + "-- icache: %4d bytes total, %2d bytes/line, %d set(s), %d way(s)\n", + cpuinfo->icache.size, cpuinfo->icache.block_size, + cpuinfo->icache.sets, cpuinfo->icache.ways); else printk(KERN_INFO "-- icache disabled\n"); @@ -156,7 +156,6 @@ void __init setup_cpuinfo(void) { struct device_node *cpu; unsigned long iccfgr, dccfgr; - unsigned long cache_set_size; int cpu_id = smp_processor_id(); struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[cpu_id]; @@ -165,18 +164,18 @@ void __init setup_cpuinfo(void) panic("Couldn't find CPU%d in device tree...\n", cpu_id); iccfgr = mfspr(SPR_ICCFGR); - cpuinfo->icache_ways = 1 << (iccfgr & SPR_ICCFGR_NCW); - cache_set_size = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3); - cpuinfo->icache_block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7); - cpuinfo->icache_size = - cache_set_size * cpuinfo->icache_ways * cpuinfo->icache_block_size; + cpuinfo->icache.ways = 1 << (iccfgr & SPR_ICCFGR_NCW); + cpuinfo->icache.sets = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3); + cpuinfo->icache.block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7); + cpuinfo->icache.size = + cpuinfo->icache.sets * cpuinfo->icache.ways * cpuinfo->icache.block_size; dccfgr = mfspr(SPR_DCCFGR); - cpuinfo->dcache_ways = 1 << (dccfgr & SPR_DCCFGR_NCW); - cache_set_size = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3); - cpuinfo->dcache_block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7); - cpuinfo->dcache_size = - cache_set_size * cpuinfo->dcache_ways * cpuinfo->dcache_block_size; + cpuinfo->dcache.ways = 1 << (dccfgr & SPR_DCCFGR_NCW); + cpuinfo->dcache.sets = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3); + cpuinfo->dcache.block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7); + cpuinfo->dcache.size = + cpuinfo->dcache.sets * cpuinfo->dcache.ways * cpuinfo->dcache.block_size; if (of_property_read_u32(cpu, "clock-frequency", &cpuinfo->clock_frequency)) { @@ -320,14 +319,14 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "revision\t\t: %d\n", vr & SPR_VR_REV); } seq_printf(m, "frequency\t\t: %ld\n", loops_per_jiffy * HZ); - seq_printf(m, "dcache size\t\t: %d bytes\n", cpuinfo->dcache_size); + seq_printf(m, "dcache size\t\t: %d bytes\n", cpuinfo->dcache.size); seq_printf(m, "dcache block size\t: %d bytes\n", - cpuinfo->dcache_block_size); - seq_printf(m, "dcache ways\t\t: %d\n", cpuinfo->dcache_ways); - seq_printf(m, "icache size\t\t: %d bytes\n", cpuinfo->icache_size); + cpuinfo->dcache.block_size); + seq_printf(m, "dcache ways\t\t: %d\n", cpuinfo->dcache.ways); + seq_printf(m, "icache size\t\t: %d bytes\n", cpuinfo->icache.size); seq_printf(m, "icache block size\t: %d bytes\n", - cpuinfo->icache_block_size); - seq_printf(m, "icache ways\t\t: %d\n", cpuinfo->icache_ways); + cpuinfo->icache.block_size); + seq_printf(m, "icache ways\t\t: %d\n", cpuinfo->icache.ways); seq_printf(m, "immu\t\t\t: %d entries, %lu ways\n", 1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2), 1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW)); From 0c4a6e79ef522554bc509294dfe69b24ee78205d Mon Sep 17 00:00:00 2001 From: Sahil Siddiq Date: Sat, 19 Apr 2025 21:18:18 +0530 Subject: [PATCH 0925/2924] openrisc: Introduce new utility functions to flush and invalidate caches According to the OpenRISC architecture manual, the dcache and icache may not be present. When these caches are present, the invalidate and flush registers may be absent. The current implementation does not perform checks to verify their presence before utilizing cache registers, or invalidating and flushing cache blocks. Introduce new functions to detect the presence of cache components and related special-purpose registers. There are a few places where a range of addresses have to be flushed or invalidated and the implementation is duplicated. Introduce new utility functions and macros that generalize this implementation and reduce duplication. Signed-off-by: Sahil Siddiq Signed-off-by: Stafford Horne --- arch/openrisc/include/asm/cacheflush.h | 17 ++++++++ arch/openrisc/include/asm/cpuinfo.h | 8 ++++ arch/openrisc/kernel/dma.c | 18 ++------- arch/openrisc/mm/cache.c | 56 +++++++++++++++++++++----- arch/openrisc/mm/init.c | 5 ++- 5 files changed, 79 insertions(+), 25 deletions(-) diff --git a/arch/openrisc/include/asm/cacheflush.h b/arch/openrisc/include/asm/cacheflush.h index 984c331ff5f474..0e60af486ec155 100644 --- a/arch/openrisc/include/asm/cacheflush.h +++ b/arch/openrisc/include/asm/cacheflush.h @@ -23,6 +23,9 @@ */ extern void local_dcache_page_flush(struct page *page); extern void local_icache_page_inv(struct page *page); +extern void local_dcache_range_flush(unsigned long start, unsigned long end); +extern void local_dcache_range_inv(unsigned long start, unsigned long end); +extern void local_icache_range_inv(unsigned long start, unsigned long end); /* * Data cache flushing always happen on the local cpu. Instruction cache @@ -38,6 +41,20 @@ extern void local_icache_page_inv(struct page *page); extern void smp_icache_page_inv(struct page *page); #endif /* CONFIG_SMP */ +/* + * Even if the actual block size is larger than L1_CACHE_BYTES, paddr + * can be incremented by L1_CACHE_BYTES. When paddr is written to the + * invalidate register, the entire cache line encompassing this address + * is invalidated. Each subsequent reference to the same cache line will + * not affect the invalidation process. + */ +#define local_dcache_block_flush(addr) \ + local_dcache_range_flush(addr, addr + L1_CACHE_BYTES) +#define local_dcache_block_inv(addr) \ + local_dcache_range_inv(addr, addr + L1_CACHE_BYTES) +#define local_icache_block_inv(addr) \ + local_icache_range_inv(addr, addr + L1_CACHE_BYTES) + /* * Synchronizes caches. Whenever a cpu writes executable code to memory, this * should be called to make sure the processor sees the newly written code. diff --git a/arch/openrisc/include/asm/cpuinfo.h b/arch/openrisc/include/asm/cpuinfo.h index 82f5d4c06314af..3cfc4cf0b01934 100644 --- a/arch/openrisc/include/asm/cpuinfo.h +++ b/arch/openrisc/include/asm/cpuinfo.h @@ -15,6 +15,9 @@ #ifndef __ASM_OPENRISC_CPUINFO_H #define __ASM_OPENRISC_CPUINFO_H +#include +#include + struct cache_desc { u32 size; u32 sets; @@ -34,4 +37,9 @@ struct cpuinfo_or1k { extern struct cpuinfo_or1k cpuinfo_or1k[NR_CPUS]; extern void setup_cpuinfo(void); +/* + * Check if the cache component exists. + */ +extern bool cpu_cache_is_present(const unsigned int cache_type); + #endif /* __ASM_OPENRISC_CPUINFO_H */ diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c index b3edbb33b621d0..3a7b5baaa45066 100644 --- a/arch/openrisc/kernel/dma.c +++ b/arch/openrisc/kernel/dma.c @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -24,9 +25,6 @@ static int page_set_nocache(pte_t *pte, unsigned long addr, unsigned long next, struct mm_walk *walk) { - unsigned long cl; - struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()]; - pte_val(*pte) |= _PAGE_CI; /* @@ -36,8 +34,7 @@ page_set_nocache(pte_t *pte, unsigned long addr, flush_tlb_kernel_range(addr, addr + PAGE_SIZE); /* Flush page out of dcache */ - for (cl = __pa(addr); cl < __pa(next); cl += cpuinfo->dcache_block_size) - mtspr(SPR_DCBFR, cl); + local_dcache_range_flush(__pa(addr), __pa(next)); return 0; } @@ -98,21 +95,14 @@ void arch_dma_clear_uncached(void *cpu_addr, size_t size) void arch_sync_dma_for_device(phys_addr_t addr, size_t size, enum dma_data_direction dir) { - unsigned long cl; - struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()]; - switch (dir) { case DMA_TO_DEVICE: /* Flush the dcache for the requested range */ - for (cl = addr; cl < addr + size; - cl += cpuinfo->dcache_block_size) - mtspr(SPR_DCBFR, cl); + local_dcache_range_flush(addr, addr + size); break; case DMA_FROM_DEVICE: /* Invalidate the dcache for the requested range */ - for (cl = addr; cl < addr + size; - cl += cpuinfo->dcache_block_size) - mtspr(SPR_DCBIR, cl); + local_dcache_range_inv(addr, addr + size); break; default: /* diff --git a/arch/openrisc/mm/cache.c b/arch/openrisc/mm/cache.c index eb43b73f385580..0f265b8e73ec22 100644 --- a/arch/openrisc/mm/cache.c +++ b/arch/openrisc/mm/cache.c @@ -14,31 +14,70 @@ #include #include #include +#include #include -static __always_inline void cache_loop(struct page *page, const unsigned int reg) +/* + * Check if the cache component exists. + */ +bool cpu_cache_is_present(const unsigned int cache_type) { - unsigned long paddr = page_to_pfn(page) << PAGE_SHIFT; - unsigned long line = paddr & ~(L1_CACHE_BYTES - 1); + unsigned long upr = mfspr(SPR_UPR); + unsigned long mask = SPR_UPR_UP | cache_type; + + return !((upr & mask) ^ mask); +} + +static __always_inline void cache_loop(unsigned long paddr, unsigned long end, + const unsigned short reg, const unsigned int cache_type) +{ + if (!cpu_cache_is_present(cache_type)) + return; - while (line < paddr + PAGE_SIZE) { - mtspr(reg, line); - line += L1_CACHE_BYTES; + while (paddr < end) { + mtspr(reg, paddr); + paddr += L1_CACHE_BYTES; } } +static __always_inline void cache_loop_page(struct page *page, const unsigned short reg, + const unsigned int cache_type) +{ + unsigned long paddr = page_to_pfn(page) << PAGE_SHIFT; + unsigned long end = paddr + PAGE_SIZE; + + paddr &= ~(L1_CACHE_BYTES - 1); + + cache_loop(paddr, end, reg, cache_type); +} + void local_dcache_page_flush(struct page *page) { - cache_loop(page, SPR_DCBFR); + cache_loop_page(page, SPR_DCBFR, SPR_UPR_DCP); } EXPORT_SYMBOL(local_dcache_page_flush); void local_icache_page_inv(struct page *page) { - cache_loop(page, SPR_ICBIR); + cache_loop_page(page, SPR_ICBIR, SPR_UPR_ICP); } EXPORT_SYMBOL(local_icache_page_inv); +void local_dcache_range_flush(unsigned long start, unsigned long end) +{ + cache_loop(start, end, SPR_DCBFR, SPR_UPR_DCP); +} + +void local_dcache_range_inv(unsigned long start, unsigned long end) +{ + cache_loop(start, end, SPR_DCBIR, SPR_UPR_DCP); +} + +void local_icache_range_inv(unsigned long start, unsigned long end) +{ + cache_loop(start, end, SPR_ICBIR, SPR_UPR_ICP); +} + void update_cache(struct vm_area_struct *vma, unsigned long address, pte_t *pte) { @@ -58,4 +97,3 @@ void update_cache(struct vm_area_struct *vma, unsigned long address, sync_icache_dcache(folio_page(folio, nr)); } } - diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index d0cb1a0126f95d..46b8720db08e23 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -35,6 +35,7 @@ #include #include #include +#include int mem_init_done; @@ -176,8 +177,8 @@ void __init paging_init(void) barrier(); /* Invalidate instruction caches after code modification */ - mtspr(SPR_ICBIR, 0x900); - mtspr(SPR_ICBIR, 0xa00); + local_icache_block_inv(0x900); + local_icache_block_inv(0xa00); /* New TLB miss handlers and kernel page tables are in now place. * Make sure that page flags get updated for all pages in TLB by From 4e6d24a309e60251439f08f15de37b489465f17b Mon Sep 17 00:00:00 2001 From: Sahil Siddiq Date: Sat, 19 Apr 2025 21:18:19 +0530 Subject: [PATCH 0926/2924] openrisc: Add cacheinfo support Add cacheinfo support for OpenRISC. Currently, a few CPU cache attributes pertaining to OpenRISC processors are exposed along with other unrelated CPU attributes in the procfs file system (/proc/cpuinfo). However, a few cache attributes remain unexposed. Provide a mechanism that the generic cacheinfo infrastructure can employ to expose these attributes via the sysfs file system. These attributes can then be exposed in /sys/devices/system/cpu/cpuX/cache/indexN. Move the implementation to pull cache attributes from the processor's registers from arch/openrisc/kernel/setup.c with a few modifications. This implementation is based on similar work done for MIPS and LoongArch. Link: https://raw.githubusercontent.com/openrisc/doc/master/openrisc-arch-1.4-rev0.pdf Signed-off-by: Sahil Siddiq Signed-off-by: Stafford Horne --- arch/openrisc/kernel/Makefile | 2 +- arch/openrisc/kernel/cacheinfo.c | 104 +++++++++++++++++++++++++++++++ arch/openrisc/kernel/setup.c | 44 +------------ 3 files changed, 108 insertions(+), 42 deletions(-) create mode 100644 arch/openrisc/kernel/cacheinfo.c diff --git a/arch/openrisc/kernel/Makefile b/arch/openrisc/kernel/Makefile index 79129161f3e031..e4c7d9bdd598d2 100644 --- a/arch/openrisc/kernel/Makefile +++ b/arch/openrisc/kernel/Makefile @@ -7,7 +7,7 @@ extra-y := vmlinux.lds obj-y := head.o setup.o or32_ksyms.o process.o dma.o \ traps.o time.o irq.o entry.o ptrace.o signal.o \ - sys_call_table.o unwinder.o + sys_call_table.o unwinder.o cacheinfo.o obj-$(CONFIG_SMP) += smp.o sync-timer.o obj-$(CONFIG_STACKTRACE) += stacktrace.o diff --git a/arch/openrisc/kernel/cacheinfo.c b/arch/openrisc/kernel/cacheinfo.c new file mode 100644 index 00000000000000..61230545e4ff6f --- /dev/null +++ b/arch/openrisc/kernel/cacheinfo.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * OpenRISC cacheinfo support + * + * Based on work done for MIPS and LoongArch. All original copyrights + * apply as per the original source declaration. + * + * OpenRISC implementation: + * Copyright (C) 2025 Sahil Siddiq + */ + +#include +#include +#include +#include + +static inline void ci_leaf_init(struct cacheinfo *this_leaf, enum cache_type type, + unsigned int level, struct cache_desc *cache, int cpu) +{ + this_leaf->type = type; + this_leaf->level = level; + this_leaf->coherency_line_size = cache->block_size; + this_leaf->number_of_sets = cache->sets; + this_leaf->ways_of_associativity = cache->ways; + this_leaf->size = cache->size; + cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map); +} + +int init_cache_level(unsigned int cpu) +{ + struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()]; + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + int leaves = 0, levels = 0; + unsigned long upr = mfspr(SPR_UPR); + unsigned long iccfgr, dccfgr; + + if (!(upr & SPR_UPR_UP)) { + printk(KERN_INFO + "-- no UPR register... unable to detect configuration\n"); + return -ENOENT; + } + + if (cpu_cache_is_present(SPR_UPR_DCP)) { + dccfgr = mfspr(SPR_DCCFGR); + cpuinfo->dcache.ways = 1 << (dccfgr & SPR_DCCFGR_NCW); + cpuinfo->dcache.sets = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3); + cpuinfo->dcache.block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7); + cpuinfo->dcache.size = + cpuinfo->dcache.sets * cpuinfo->dcache.ways * cpuinfo->dcache.block_size; + leaves += 1; + printk(KERN_INFO + "-- dcache: %d bytes total, %d bytes/line, %d set(s), %d way(s)\n", + cpuinfo->dcache.size, cpuinfo->dcache.block_size, + cpuinfo->dcache.sets, cpuinfo->dcache.ways); + } else + printk(KERN_INFO "-- dcache disabled\n"); + + if (cpu_cache_is_present(SPR_UPR_ICP)) { + iccfgr = mfspr(SPR_ICCFGR); + cpuinfo->icache.ways = 1 << (iccfgr & SPR_ICCFGR_NCW); + cpuinfo->icache.sets = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3); + cpuinfo->icache.block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7); + cpuinfo->icache.size = + cpuinfo->icache.sets * cpuinfo->icache.ways * cpuinfo->icache.block_size; + leaves += 1; + printk(KERN_INFO + "-- icache: %d bytes total, %d bytes/line, %d set(s), %d way(s)\n", + cpuinfo->icache.size, cpuinfo->icache.block_size, + cpuinfo->icache.sets, cpuinfo->icache.ways); + } else + printk(KERN_INFO "-- icache disabled\n"); + + if (!leaves) + return -ENOENT; + + levels = 1; + + this_cpu_ci->num_leaves = leaves; + this_cpu_ci->num_levels = levels; + + return 0; +} + +int populate_cache_leaves(unsigned int cpu) +{ + struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()]; + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + struct cacheinfo *this_leaf = this_cpu_ci->info_list; + int level = 1; + + if (cpu_cache_is_present(SPR_UPR_DCP)) { + ci_leaf_init(this_leaf, CACHE_TYPE_DATA, level, &cpuinfo->dcache, cpu); + this_leaf->attributes = ((mfspr(SPR_DCCFGR) & SPR_DCCFGR_CWS) >> 8) ? + CACHE_WRITE_BACK : CACHE_WRITE_THROUGH; + this_leaf++; + } + + if (cpu_cache_is_present(SPR_UPR_ICP)) + ci_leaf_init(this_leaf, CACHE_TYPE_INST, level, &cpuinfo->icache, cpu); + + this_cpu_ci->cpu_map_populated = true; + + return 0; +} diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index 66207cd7bb9e98..a9fb9cc6779ebd 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -113,21 +113,6 @@ static void print_cpuinfo(void) return; } - if (upr & SPR_UPR_DCP) - printk(KERN_INFO - "-- dcache: %4d bytes total, %2d bytes/line, %d set(s), %d way(s)\n", - cpuinfo->dcache.size, cpuinfo->dcache.block_size, - cpuinfo->dcache.sets, cpuinfo->dcache.ways); - else - printk(KERN_INFO "-- dcache disabled\n"); - if (upr & SPR_UPR_ICP) - printk(KERN_INFO - "-- icache: %4d bytes total, %2d bytes/line, %d set(s), %d way(s)\n", - cpuinfo->icache.size, cpuinfo->icache.block_size, - cpuinfo->icache.sets, cpuinfo->icache.ways); - else - printk(KERN_INFO "-- icache disabled\n"); - if (upr & SPR_UPR_DMP) printk(KERN_INFO "-- dmmu: %4d entries, %lu way(s)\n", 1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2), @@ -155,7 +140,6 @@ static void print_cpuinfo(void) void __init setup_cpuinfo(void) { struct device_node *cpu; - unsigned long iccfgr, dccfgr; int cpu_id = smp_processor_id(); struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[cpu_id]; @@ -163,20 +147,6 @@ void __init setup_cpuinfo(void) if (!cpu) panic("Couldn't find CPU%d in device tree...\n", cpu_id); - iccfgr = mfspr(SPR_ICCFGR); - cpuinfo->icache.ways = 1 << (iccfgr & SPR_ICCFGR_NCW); - cpuinfo->icache.sets = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3); - cpuinfo->icache.block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7); - cpuinfo->icache.size = - cpuinfo->icache.sets * cpuinfo->icache.ways * cpuinfo->icache.block_size; - - dccfgr = mfspr(SPR_DCCFGR); - cpuinfo->dcache.ways = 1 << (dccfgr & SPR_DCCFGR_NCW); - cpuinfo->dcache.sets = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3); - cpuinfo->dcache.block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7); - cpuinfo->dcache.size = - cpuinfo->dcache.sets * cpuinfo->dcache.ways * cpuinfo->dcache.block_size; - if (of_property_read_u32(cpu, "clock-frequency", &cpuinfo->clock_frequency)) { printk(KERN_WARNING @@ -293,14 +263,14 @@ static int show_cpuinfo(struct seq_file *m, void *v) unsigned int vr, cpucfgr; unsigned int avr; unsigned int version; +#ifdef CONFIG_SMP struct cpuinfo_or1k *cpuinfo = v; + seq_printf(m, "processor\t\t: %d\n", cpuinfo->coreid); +#endif vr = mfspr(SPR_VR); cpucfgr = mfspr(SPR_CPUCFGR); -#ifdef CONFIG_SMP - seq_printf(m, "processor\t\t: %d\n", cpuinfo->coreid); -#endif if (vr & SPR_VR_UVRP) { vr = mfspr(SPR_VR2); version = vr & SPR_VR2_VER; @@ -319,14 +289,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "revision\t\t: %d\n", vr & SPR_VR_REV); } seq_printf(m, "frequency\t\t: %ld\n", loops_per_jiffy * HZ); - seq_printf(m, "dcache size\t\t: %d bytes\n", cpuinfo->dcache.size); - seq_printf(m, "dcache block size\t: %d bytes\n", - cpuinfo->dcache.block_size); - seq_printf(m, "dcache ways\t\t: %d\n", cpuinfo->dcache.ways); - seq_printf(m, "icache size\t\t: %d bytes\n", cpuinfo->icache.size); - seq_printf(m, "icache block size\t: %d bytes\n", - cpuinfo->icache.block_size); - seq_printf(m, "icache ways\t\t: %d\n", cpuinfo->icache.ways); seq_printf(m, "immu\t\t\t: %d entries, %lu ways\n", 1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2), 1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW)); From 20a43732736ac270c35601f7f22a0bcd2db4cba4 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Fri, 28 Feb 2025 21:37:06 +0000 Subject: [PATCH 0927/2924] Documentation: openrisc: Update mailing list The librecores.org mailing list was replaced with vger.kernel.org last year after the old mail server went offline. Update the docs to reflect the new list. Signed-off-by: Stafford Horne --- Documentation/arch/openrisc/openrisc_port.rst | 6 +++--- .../translations/zh_CN/arch/openrisc/openrisc_port.rst | 6 +++--- .../translations/zh_TW/arch/openrisc/openrisc_port.rst | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Documentation/arch/openrisc/openrisc_port.rst b/Documentation/arch/openrisc/openrisc_port.rst index 1565b9546e38d7..a31ae49605763d 100644 --- a/Documentation/arch/openrisc/openrisc_port.rst +++ b/Documentation/arch/openrisc/openrisc_port.rst @@ -7,10 +7,10 @@ target architecture, specifically, is the 32-bit OpenRISC 1000 family (or1k). For information about OpenRISC processors and ongoing development: - ======= ============================= + ======= ============================== website https://openrisc.io - email openrisc@lists.librecores.org - ======= ============================= + email linux-openrisc@vger.kernel.org + ======= ============================== --------------------------------------------------------------------- diff --git a/Documentation/translations/zh_CN/arch/openrisc/openrisc_port.rst b/Documentation/translations/zh_CN/arch/openrisc/openrisc_port.rst index cadc580fa23b3b..d2e4ca8a46c76f 100644 --- a/Documentation/translations/zh_CN/arch/openrisc/openrisc_port.rst +++ b/Documentation/translations/zh_CN/arch/openrisc/openrisc_port.rst @@ -17,10 +17,10 @@ OpenRISC 1000系列(或1k)。 关于OpenRISC处理器和正在进行中的开发的信息: - ======= ============================= + ======= ============================== 网站 https://openrisc.io - 邮箱 openrisc@lists.librecores.org - ======= ============================= + 邮箱 linux-openrisc@vger.kernel.org + ======= ============================== --------------------------------------------------------------------- diff --git a/Documentation/translations/zh_TW/arch/openrisc/openrisc_port.rst b/Documentation/translations/zh_TW/arch/openrisc/openrisc_port.rst index 422fe9f7a3f23d..86590b016d5616 100644 --- a/Documentation/translations/zh_TW/arch/openrisc/openrisc_port.rst +++ b/Documentation/translations/zh_TW/arch/openrisc/openrisc_port.rst @@ -17,10 +17,10 @@ OpenRISC 1000系列(或1k)。 關於OpenRISC處理器和正在進行中的開發的信息: - ======= ============================= + ======= ============================== 網站 https://openrisc.io - 郵箱 openrisc@lists.librecores.org - ======= ============================= + 郵箱 linux-openrisc@vger.kernel.org + ======= ============================== --------------------------------------------------------------------- From 66ffd2f3161124f2f5019b55d8ef3add26a002a5 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Thu, 17 Apr 2025 08:36:02 +0100 Subject: [PATCH 0928/2924] Documentation: openrisc: Update toolchain binaries URL The old development toolchain binaries were hosted in the or1k-gcc development github repo release page. However, now that we have all code upstream I cut releases from stable upstream tarballs. It does not make sense to tag the or1k-gcc github repo releases for these stable releases. Update the toolchain binaries URL to point to where they are now hosted on the or1k-toolchain-build github release page. Signed-off-by: Stafford Horne --- Documentation/arch/openrisc/openrisc_port.rst | 6 +++--- .../translations/zh_CN/arch/openrisc/openrisc_port.rst | 6 +++--- .../translations/zh_TW/arch/openrisc/openrisc_port.rst | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Documentation/arch/openrisc/openrisc_port.rst b/Documentation/arch/openrisc/openrisc_port.rst index a31ae49605763d..a8f307a3b499cf 100644 --- a/Documentation/arch/openrisc/openrisc_port.rst +++ b/Documentation/arch/openrisc/openrisc_port.rst @@ -27,11 +27,11 @@ Toolchain binaries can be obtained from openrisc.io or our github releases page. Instructions for building the different toolchains can be found on openrisc.io or Stafford's toolchain build and release scripts. - ========== ================================================= - binaries https://github.com/openrisc/or1k-gcc/releases + ========== ========================================================== + binaries https://github.com/stffrdhrn/or1k-toolchain-build/releases toolchains https://openrisc.io/software building https://github.com/stffrdhrn/or1k-toolchain-build - ========== ================================================= + ========== ========================================================== 2) Building diff --git a/Documentation/translations/zh_CN/arch/openrisc/openrisc_port.rst b/Documentation/translations/zh_CN/arch/openrisc/openrisc_port.rst index d2e4ca8a46c76f..d728e4db0b857a 100644 --- a/Documentation/translations/zh_CN/arch/openrisc/openrisc_port.rst +++ b/Documentation/translations/zh_CN/arch/openrisc/openrisc_port.rst @@ -36,11 +36,11 @@ OpenRISC工具链和Linux的构建指南 工具链的构建指南可以在openrisc.io或Stafford的工具链构建和发布脚本 中找到。 - ====== ================================================= - 二进制 https://github.com/openrisc/or1k-gcc/releases + ====== ========================================================== + 二进制 https://github.com/stffrdhrn/or1k-toolchain-build/releases 工具链 https://openrisc.io/software 构建 https://github.com/stffrdhrn/or1k-toolchain-build - ====== ================================================= + ====== ========================================================== 2) 构建 diff --git a/Documentation/translations/zh_TW/arch/openrisc/openrisc_port.rst b/Documentation/translations/zh_TW/arch/openrisc/openrisc_port.rst index 86590b016d5616..a1e4517dc601a1 100644 --- a/Documentation/translations/zh_TW/arch/openrisc/openrisc_port.rst +++ b/Documentation/translations/zh_TW/arch/openrisc/openrisc_port.rst @@ -36,11 +36,11 @@ OpenRISC工具鏈和Linux的構建指南 工具鏈的構建指南可以在openrisc.io或Stafford的工具鏈構建和發佈腳本 中找到。 - ====== ================================================= - 二進制 https://github.com/openrisc/or1k-gcc/releases + ====== ========================================================== + 二進制 https://github.com/stffrdhrn/or1k-toolchain-build/releases 工具鏈 https://openrisc.io/software 構建 https://github.com/stffrdhrn/or1k-toolchain-build - ====== ================================================= + ====== ========================================================== 2) 構建 From 494d0939b1bda4d4ddca7d52a6ce6f808ff2c9a5 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 1 Apr 2025 15:04:02 +0800 Subject: [PATCH 0929/2924] ALSA: hda/realtek - Enable speaker for HP platform The speaker doesn't mute when plugged headphone. This platform support 4ch speakers. The speaker pin 0x14 wasn't fill verb table. After assigned model ALC245_FIXUP_HP_SPECTRE_X360_EU0XXX. The speaker can mute when headphone was plugged. Fixes: aa8e3ef4fe53 ("ALSA: hda/realtek: Add quirks for various HP ENVY models") Signed-off-by: Kailang Yang Link: https://lore.kernel.org/eb4c14a4d85740069c909e756bbacb0e@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 877137cb09aca4..8ed613932c5b0e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -441,6 +441,10 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) alc_update_coef_idx(codec, 0x67, 0xf000, 0x3000); fallthrough; case 0x10ec0215: + case 0x10ec0236: + case 0x10ec0245: + case 0x10ec0256: + case 0x10ec0257: case 0x10ec0285: case 0x10ec0289: alc_update_coef_idx(codec, 0x36, 1<<13, 0); @@ -448,12 +452,8 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0233: case 0x10ec0235: - case 0x10ec0236: - case 0x10ec0245: case 0x10ec0255: - case 0x10ec0256: case 0x19e58326: - case 0x10ec0257: case 0x10ec0282: case 0x10ec0283: case 0x10ec0286: @@ -10768,8 +10768,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8caf, "HP Elite mt645 G8 Mobile Thin Client", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8cbd, "HP Pavilion Aero Laptop 13-bg0xxx", ALC245_FIXUP_HP_X360_MUTE_LEDS), - SND_PCI_QUIRK(0x103c, 0x8cdd, "HP Spectre", ALC287_FIXUP_CS35L41_I2C_2), - SND_PCI_QUIRK(0x103c, 0x8cde, "HP Spectre", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x8cdd, "HP Spectre", ALC245_FIXUP_HP_SPECTRE_X360_EU0XXX), + SND_PCI_QUIRK(0x103c, 0x8cde, "HP OmniBook Ultra Flip Laptop 14t", ALC245_FIXUP_HP_SPECTRE_X360_EU0XXX), SND_PCI_QUIRK(0x103c, 0x8cdf, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ce0, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), From f406005e162b660dc405b4f18bf7bcb93a515608 Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Thu, 17 Apr 2025 04:19:23 +0930 Subject: [PATCH 0930/2924] ALSA: usb-audio: Add retry on -EPROTO from usb_set_interface() During initialisation of Focusrite USB audio interfaces, -EPROTO is sometimes returned from usb_set_interface(), which sometimes prevents the device from working: subsequent usb_set_interface() and uac_clock_source_is_valid() calls fail. This patch adds up to 5 retries in endpoint_set_interface(), with a delay starting at 5ms and doubling each time. 5 retries was chosen to allow for longer than expected waits for the interface to start responding correctly; in testing, a single 5ms delay was sufficient to fix the issue. Closes: https://github.com/geoffreybennett/fcp-support/issues/2 Cc: stable@vger.kernel.org Signed-off-by: Geoffrey D. Bennett Link: https://patch.msgid.link/Z//7s9dKsmVxHzY2@m.b4.vu Signed-off-by: Takashi Iwai --- sound/usb/endpoint.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index a29f28eb7d0c64..f36ec98da4601d 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -926,6 +926,8 @@ static int endpoint_set_interface(struct snd_usb_audio *chip, { int altset = set ? ep->altsetting : 0; int err; + int retries = 0; + const int max_retries = 5; if (ep->iface_ref->altset == altset) return 0; @@ -935,8 +937,13 @@ static int endpoint_set_interface(struct snd_usb_audio *chip, usb_audio_dbg(chip, "Setting usb interface %d:%d for EP 0x%x\n", ep->iface, altset, ep->ep_num); +retry: err = usb_set_interface(chip->dev, ep->iface, altset); if (err < 0) { + if (err == -EPROTO && ++retries <= max_retries) { + msleep(5 * (1 << (retries - 1))); + goto retry; + } usb_audio_err_ratelimited( chip, "%d:%d: usb_set_interface failed (%d)\n", ep->iface, altset, err); From f81b33582f9339d2dc17c69b92040d3650bb4bae Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Sat, 12 Apr 2025 09:57:14 +0200 Subject: [PATCH 0931/2924] RDMA/rxe: Fix slab-use-after-free Read in rxe_queue_cleanup bug Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x7d/0xa0 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:378 [inline] print_report+0xcf/0x610 mm/kasan/report.c:489 kasan_report+0xb5/0xe0 mm/kasan/report.c:602 rxe_queue_cleanup+0xd0/0xe0 drivers/infiniband/sw/rxe/rxe_queue.c:195 rxe_cq_cleanup+0x3f/0x50 drivers/infiniband/sw/rxe/rxe_cq.c:132 __rxe_cleanup+0x168/0x300 drivers/infiniband/sw/rxe/rxe_pool.c:232 rxe_create_cq+0x22e/0x3a0 drivers/infiniband/sw/rxe/rxe_verbs.c:1109 create_cq+0x658/0xb90 drivers/infiniband/core/uverbs_cmd.c:1052 ib_uverbs_create_cq+0xc7/0x120 drivers/infiniband/core/uverbs_cmd.c:1095 ib_uverbs_write+0x969/0xc90 drivers/infiniband/core/uverbs_main.c:679 vfs_write fs/read_write.c:677 [inline] vfs_write+0x26a/0xcc0 fs/read_write.c:659 ksys_write+0x1b8/0x200 fs/read_write.c:731 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xaa/0x1b0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f In the function rxe_create_cq, when rxe_cq_from_init fails, the function rxe_cleanup will be called to handle the allocated resources. In fact, some memory resources have already been freed in the function rxe_cq_from_init. Thus, this problem will occur. The solution is to let rxe_cleanup do all the work. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://paste.ubuntu.com/p/tJgC42wDf6/ Tested-by: liuyi Signed-off-by: Zhu Yanjun Link: https://patch.msgid.link/20250412075714.3257358-1-yanjun.zhu@linux.dev Reviewed-by: Daisuke Matsuda Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_cq.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index fec87c9030abdc..fffd144d509eb0 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -56,11 +56,8 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, udata, cq->queue->buf, cq->queue->buf_size, &cq->queue->ip); - if (err) { - vfree(cq->queue->buf); - kfree(cq->queue); + if (err) return err; - } cq->is_user = uresp; From 80f2ab46c2ee16f046b55306dc4db4be53125016 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Mon, 14 Apr 2025 18:42:30 -0500 Subject: [PATCH 0932/2924] irdma: free iwdev->rf after removing MSI-X Currently iwdev->rf is allocated in irdma_probe(), but free in irdma_ib_dealloc_device(). It can be misleading. Move the free to irdma_remove() to be more obvious. Freeing in irdma_ib_dealloc_device() leads to KASAN use-after-free issue. Which can also lead to NULL pointer dereference. Fix this. irdma_deinit_interrupts() can't be moved before freeing iwdef->rf, because in this case deinit interrupts will be done before freeing irqs. The simplest solution is to move kfree(iwdev->rf) to irdma_remove(). Reproducer: sudo rmmod irdma Minified splat(s): BUG: KASAN: use-after-free in irdma_remove+0x257/0x2d0 [irdma] Call Trace: ? __pfx__raw_spin_lock_irqsave+0x10/0x10 ? kfree+0x253/0x450 ? irdma_remove+0x257/0x2d0 [irdma] kasan_report+0xed/0x120 ? irdma_remove+0x257/0x2d0 [irdma] irdma_remove+0x257/0x2d0 [irdma] auxiliary_bus_remove+0x56/0x80 device_release_driver_internal+0x371/0x530 ? kernfs_put.part.0+0x147/0x310 driver_detach+0xbf/0x180 bus_remove_driver+0x11b/0x2a0 auxiliary_driver_unregister+0x1a/0x50 irdma_exit_module+0x40/0x4c [irdma] Oops: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] RIP: 0010:ice_free_rdma_qvector+0x2a/0xa0 [ice] Call Trace: ? ice_free_rdma_qvector+0x2a/0xa0 [ice] irdma_remove+0x179/0x2d0 [irdma] auxiliary_bus_remove+0x56/0x80 device_release_driver_internal+0x371/0x530 ? kobject_put+0x61/0x4b0 driver_detach+0xbf/0x180 bus_remove_driver+0x11b/0x2a0 auxiliary_driver_unregister+0x1a/0x50 irdma_exit_module+0x40/0x4c [irdma] Reported-by: Marcin Szycik Closes: https://lore.kernel.org/netdev/8e533834-4564-472f-b29b-4f1cb7730053@linux.intel.com/ Fixes: 3e0d3cb3fbe0 ("ice, irdma: move interrupts code to irdma") Reviewed-by: Marcin Szycik Signed-off-by: Michal Swiatkowski Signed-off-by: Tatyana Nikolova Link: https://patch.msgid.link/20250414234231.523-1-tatyana.e.nikolova@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/main.c | 2 ++ drivers/infiniband/hw/irdma/verbs.c | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c index 1ee8969595d3d6..d10fd16dcec3d8 100644 --- a/drivers/infiniband/hw/irdma/main.c +++ b/drivers/infiniband/hw/irdma/main.c @@ -255,6 +255,8 @@ static void irdma_remove(struct auxiliary_device *aux_dev) ice_rdma_update_vsi_filter(pf, iwdev->vsi_num, false); irdma_deinit_interrupts(iwdev->rf, pf); + kfree(iwdev->rf); + pr_debug("INIT: Gen2 PF[%d] device remove success\n", PCI_FUNC(pf->pdev->devfn)); } diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index eeb932e5873036..1e8c92826de221 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -4871,5 +4871,4 @@ void irdma_ib_dealloc_device(struct ib_device *ibdev) irdma_rt_deinit_hw(iwdev); irdma_ctrl_deinit_hw(iwdev->rf); - kfree(iwdev->rf); } From 4bcc063939a560f05b05b34be68d20045a646e6e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 14 Apr 2025 18:42:31 -0500 Subject: [PATCH 0933/2924] ice, irdma: fix an off by one in error handling code If we don't allocate the MIN number of IRQs then we need to free what we have and return -ENOMEM. The problem is this loop is off by one so it frees an entry that wasn't allocated and it doesn't free the first entry where i == 0. Fixes: 3e0d3cb3fbe0 ("ice, irdma: move interrupts code to irdma") Signed-off-by: Dan Carpenter Signed-off-by: Tatyana Nikolova Link: https://patch.msgid.link/20250414234231.523-2-tatyana.e.nikolova@intel.com Reviewed-by: Michal Swiatkowski Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c index d10fd16dcec3d8..7599e31b574369 100644 --- a/drivers/infiniband/hw/irdma/main.c +++ b/drivers/infiniband/hw/irdma/main.c @@ -221,7 +221,7 @@ static int irdma_init_interrupts(struct irdma_pci_f *rf, struct ice_pf *pf) break; if (i < IRDMA_MIN_MSIX) { - for (; i > 0; i--) + while (--i >= 0) ice_free_rdma_qvector(pf, &rf->msix_entries[i]); kfree(rf->msix_entries); From d5d45a7f26194460964eb5677a9226697f7b7fdd Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Apr 2025 10:33:23 -0700 Subject: [PATCH 0934/2924] gcc-15: make 'unterminated string initialization' just a warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc-15 enabling -Wunterminated-string-initialization in -Wextra by default was done with the best intentions, but the warning is still quite broken. What annoys me about the warning is that this is a very traditional AND CORRECT way to initialize fixed byte arrays in C: unsigned char hex[16] = "0123456789abcdef"; and we use this all over the kernel. And the warning is fine, but gcc developers apparently never made a reasonable way to disable it. As is (sadly) tradition with these things. Yes, there's "__attribute__((nonstring))", and we have a macro to make that absolutely disgusting syntax more palatable (ie the kernel syntax for that monstrosity is just "__nonstring"). But that attribute is misdesigned. What you'd typically want to do is tell the compiler that you are using a type that isn't a string but a byte array, but that doesn't work at all: warning: ‘nonstring’ attribute does not apply to types [-Wattributes] and because of this fundamental mis-design, you then have to mark each instance of that pattern. This is particularly noticeable in our ACPI code, because ACPI has this notion of a 4-byte "type name" that gets used all over, and is exactly this kind of byte array. This is a sad oversight, because the warning is useful, but really would be so much better if gcc had also given a sane way to indicate that we really just want a byte array type at a type level, not the broken "each and every array definition" level. So now instead of creating a nice "ACPI name" type using something like typedef char acpi_name_t[4] __nonstring; we have to do things like char name[ACPI_NAMESEG_SIZE] __nonstring; in every place that uses this concept and then happens to have the typical initializers. This is annoying me mainly because I think the warning _is_ a good warning, which is why I'm not just turning it off in disgust. But it is hampered by this bad implementation detail. [ And obviously I'm doing this now because system upgrades for me are something that happen in the middle of the release cycle: don't do it before or during travel, or just before or during the busy merge window period. ] Signed-off-by: Linus Torvalds --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index e65f8735c7bf64..0a9992db4fe026 100644 --- a/Makefile +++ b/Makefile @@ -1056,6 +1056,9 @@ KBUILD_CFLAGS += $(call cc-option, -fstrict-flex-arrays=3) KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERFLOW) += $(call cc-option, -Wno-stringop-overflow) KBUILD_CFLAGS-$(CONFIG_CC_STRINGOP_OVERFLOW) += $(call cc-option, -Wstringop-overflow) +#Currently, disable -Wunterminated-string-initialization as an error +KBUILD_CFLAGS += $(call cc-option, -Wno-error=unterminated-string-initialization) + # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += -fno-strict-overflow From 4b4bd8c50f4836ba7d3fcfd6c90f96d2605779fe Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Apr 2025 11:02:18 -0700 Subject: [PATCH 0935/2924] gcc-15: acpi: sprinkle random '__nonstring' crumbles around This is not great: I'd much rather introduce a typedef that is a "ACPI name byte buffer", and use that to mark these special 4-byte ACPI names that do not use NUL termination. But as noted in the previous commit ("gcc-15: make 'unterminated string initialization' just a warning") gcc doesn't actually seem to support that notion, so instead you have to just mark every single array declaration individually. So this is not pretty, but this gets rid of the bulk of the annoying warnings during an allmodconfig build for me. Signed-off-by: Linus Torvalds --- drivers/acpi/acpica/aclocal.h | 4 ++-- drivers/acpi/acpica/nsrepair2.c | 2 +- drivers/acpi/tables.c | 2 +- include/acpi/actbl.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h index 6f4fe47c955bd0..6481c48c22bb7e 100644 --- a/drivers/acpi/acpica/aclocal.h +++ b/drivers/acpi/acpica/aclocal.h @@ -293,7 +293,7 @@ acpi_status (*acpi_internal_method) (struct acpi_walk_state * walk_state); * expected_return_btypes - Allowed type(s) for the return value */ struct acpi_name_info { - char name[ACPI_NAMESEG_SIZE]; + char name[ACPI_NAMESEG_SIZE] __nonstring; u16 argument_list; u8 expected_btypes; }; @@ -370,7 +370,7 @@ typedef acpi_status (*acpi_object_converter) (struct acpi_namespace_node * converted_object); struct acpi_simple_repair_info { - char name[ACPI_NAMESEG_SIZE]; + char name[ACPI_NAMESEG_SIZE] __nonstring; u32 unexpected_btypes; u32 package_index; acpi_object_converter object_converter; diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c index 1bb7b71f07f1f6..330b5e4711daca 100644 --- a/drivers/acpi/acpica/nsrepair2.c +++ b/drivers/acpi/acpica/nsrepair2.c @@ -25,7 +25,7 @@ acpi_status (*acpi_repair_function) (struct acpi_evaluate_info * info, return_object_ptr); typedef struct acpi_repair_info { - char name[ACPI_NAMESEG_SIZE]; + char name[ACPI_NAMESEG_SIZE] __nonstring; acpi_repair_function repair_function; } acpi_repair_info; diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index 2295abbecd14f5..b5205d464a8a2d 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -396,7 +396,7 @@ static u8 __init acpi_table_checksum(u8 *buffer, u32 length) } /* All but ACPI_SIG_RSDP and ACPI_SIG_FACS: */ -static const char table_sigs[][ACPI_NAMESEG_SIZE] __initconst = { +static const char table_sigs[][ACPI_NAMESEG_SIZE] __initconst __nonstring = { ACPI_SIG_BERT, ACPI_SIG_BGRT, ACPI_SIG_CPEP, ACPI_SIG_ECDT, ACPI_SIG_EINJ, ACPI_SIG_ERST, ACPI_SIG_HEST, ACPI_SIG_MADT, ACPI_SIG_MSCT, ACPI_SIG_SBST, ACPI_SIG_SLIT, ACPI_SIG_SRAT, diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h index 451f6276da494f..2fc89704be1797 100644 --- a/include/acpi/actbl.h +++ b/include/acpi/actbl.h @@ -66,7 +66,7 @@ ******************************************************************************/ struct acpi_table_header { - char signature[ACPI_NAMESEG_SIZE]; /* ASCII table signature */ + char signature[ACPI_NAMESEG_SIZE] __nonstring; /* ASCII table signature */ u32 length; /* Length of table in bytes, including this header */ u8 revision; /* ACPI Specification minor version number */ u8 checksum; /* To make sum of entire table == 0 */ From be913e7c4034bd7a5cbfc3d53188344dc588d45c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Apr 2025 11:04:00 -0700 Subject: [PATCH 0936/2924] gcc-15: get rid of misc extra NUL character padding This removes two cases of explicit NUL padding that now causes warnings because of '-Wunterminated-string-initialization' being part of -Wextra in gcc-15. Gcc is being silly in this case when it says that it truncates a NUL terminator, because in these cases there were _multiple_ NUL characters. But we can get rid of the warning by just simplifying the two initializers that trigger the warning for me, so this does exactly that. I'm not sure why the power supply code did that odd .attr_name = #_name "\0", pattern: it was introduced in commit 2cabeaf15129 ("power: supply: core: Cleanup power supply sysfs attribute list"), but that 'attr_name[]' field is an explicitly sized character array in a statically initialized variable, and a string initializer always has a terminating NUL _and_ statically initialized character arrays are zero-padded anyway, so it really seems to be rather extraneous belt-and-suspenders. The zero_uuid[16] initialization in drivers/md/bcache/super.c makes perfect sense, but it isn't necessary for the same reasons, and not worth the new gcc warning noise. Signed-off-by: Linus Torvalds --- drivers/md/bcache/super.c | 2 +- drivers/power/supply/power_supply_sysfs.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index e42f1400cea9d7..813b38aec3e4e0 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -546,7 +546,7 @@ static struct uuid_entry *uuid_find(struct cache_set *c, const char *uuid) static struct uuid_entry *uuid_find_empty(struct cache_set *c) { - static const char zero_uuid[16] = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; + static const char zero_uuid[16] = { 0 }; return uuid_find(c, zero_uuid); } diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c index edb058c19c9c44..439dd0bf8644e5 100644 --- a/drivers/power/supply/power_supply_sysfs.c +++ b/drivers/power/supply/power_supply_sysfs.c @@ -33,7 +33,7 @@ struct power_supply_attr { [POWER_SUPPLY_PROP_ ## _name] = \ { \ .prop_name = #_name, \ - .attr_name = #_name "\0", \ + .attr_name = #_name, \ .text_values = _text, \ .text_values_len = _len, \ } From 05e8d261a34e5c637e37be55c26e42cf5c75ee5c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Apr 2025 11:18:55 -0700 Subject: [PATCH 0937/2924] gcc-15: add '__nonstring' markers to byte arrays All of these cases are perfectly valid and good traditional C, but hit by the "you're not NUL-terminating your byte array" warning. And none of the cases want any terminating NUL character. Mark them __nonstring to shut up gcc-15 (and in the case of the ak8974 magnetometer driver, I just removed the explicit array size and let gcc expand the 3-byte and 6-byte arrays by one extra byte, because it was the simpler change). Signed-off-by: Linus Torvalds --- drivers/iio/magnetometer/ak8974.c | 4 ++-- drivers/input/joystick/magellan.c | 2 +- drivers/net/wireless/ath/carl9170/fw.c | 2 +- fs/cachefiles/key.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/iio/magnetometer/ak8974.c b/drivers/iio/magnetometer/ak8974.c index 08975c60e325cb..7bc341c6969762 100644 --- a/drivers/iio/magnetometer/ak8974.c +++ b/drivers/iio/magnetometer/ak8974.c @@ -535,8 +535,8 @@ static int ak8974_detect(struct ak8974 *ak8974) fab_data2, sizeof(fab_data2)); for (i = 0; i < 3; ++i) { - static const char axis[3] = "XYZ"; - static const char pgaxis[6] = "ZYZXYX"; + static const char axis[] = "XYZ"; + static const char pgaxis[] = "ZYZXYX"; unsigned offz = le16_to_cpu(fab_data2[i]) & 0x7F; unsigned fine = le16_to_cpu(fab_data1[i]); unsigned sens = le16_to_cpu(fab_data1[i + 3]); diff --git a/drivers/input/joystick/magellan.c b/drivers/input/joystick/magellan.c index 2eaa25c9c68c25..d73389af4dd5eb 100644 --- a/drivers/input/joystick/magellan.c +++ b/drivers/input/joystick/magellan.c @@ -48,7 +48,7 @@ struct magellan { static int magellan_crunch_nibbles(unsigned char *data, int count) { - static unsigned char nibbles[16] = "0AB3D56GH9:K #include "internal.h" -static const char cachefiles_charmap[64] = +static const char cachefiles_charmap[64] __nonstring = "0123456789" /* 0 - 9 */ "abcdefghijklmnopqrstuvwxyz" /* 10 - 35 */ "ABCDEFGHIJKLMNOPQRSTUVWXYZ" /* 36 - 61 */ From ac71fabf15679fc7bc56c51bc92bd4b626564c37 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Apr 2025 11:30:11 -0700 Subject: [PATCH 0938/2924] gcc-15: work around sequence-point warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The C sequence points are complicated things, and gcc-15 has apparently added a warning for the case where an object is both used and modified multiple times within the same sequence point. That's a great warning. Or rather, it would be a great warning, except gcc-15 seems to not really be very exact about it, and doesn't notice that the modification are to two entirely different members of the same object: the array counter and the array entries. So that seems kind of silly. That said, the code that gcc complains about is unnecessarily complicated, so moving the array counter update into a separate statement seems like the most straightforward fix for these warnings: drivers/net/wireless/intel/iwlwifi/mld/d3.c: In function ‘iwl_mld_set_netdetect_info’: drivers/net/wireless/intel/iwlwifi/mld/d3.c:1102:66: error: operation on ‘netdetect_info->n_matches’ may be undefined [-Werror=sequence-point] 1102 | netdetect_info->matches[netdetect_info->n_matches++] = match; | ~~~~~~~~~~~~~~~~~~~~~~~~~^~ drivers/net/wireless/intel/iwlwifi/mld/d3.c:1120:58: error: operation on ‘match->n_channels’ may be undefined [-Werror=sequence-point] 1120 | match->channels[match->n_channels++] = | ~~~~~~~~~~~~~~~~~^~ side note: the code at that second warning is actively buggy, and only works on little-endian machines that don't do strict alignment checks. The code casts an array of integers into an array of unsigned long in order to use our bitmap iterators. That happens to work fine on any sane architecture, but it's still wrong. This does *not* fix that more serious problem. This only splits the two assignments into two statements and fixes the compiler warning. I need to get rid of the new warnings in order to be able to actually do any build testing. Signed-off-by: Linus Torvalds --- drivers/net/wireless/intel/iwlwifi/mld/d3.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/d3.c b/drivers/net/wireless/intel/iwlwifi/mld/d3.c index 2c6e8ecd93b7ba..ee99298eebf595 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/d3.c @@ -1099,7 +1099,8 @@ iwl_mld_set_netdetect_info(struct iwl_mld *mld, if (!match) return; - netdetect_info->matches[netdetect_info->n_matches++] = match; + netdetect_info->matches[netdetect_info->n_matches] = match; + netdetect_info->n_matches++; /* We inverted the order of the SSIDs in the scan * request, so invert the index here. @@ -1116,9 +1117,11 @@ iwl_mld_set_netdetect_info(struct iwl_mld *mld, for_each_set_bit(j, (unsigned long *)&matches[i].matching_channels[0], - sizeof(matches[i].matching_channels)) - match->channels[match->n_channels++] = + sizeof(matches[i].matching_channels)) { + match->channels[match->n_channels] = netdetect_cfg->channels[j]->center_freq; + match->n_channels++; + } } } From 9c32cda43eb78f78c73aee4aa344b777714e259b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Apr 2025 13:43:47 -0700 Subject: [PATCH 0939/2924] Linux 6.15-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0a9992db4fe026..3dcad231966222 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Baby Opossum Posse # *DOCUMENTATION* From 9d7a0577c9db35c4cc52db90bc415ea248446472 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Apr 2025 15:30:53 -0700 Subject: [PATCH 0940/2924] gcc-15: disable '-Wunterminated-string-initialization' entirely for now I had left the warning around but as a non-fatal error to get my gcc-15 builds going, but fixed up some of the most annoying warning cases so that it wouldn't be *too* verbose. Because I like the _concept_ of the warning, even if I detested the implementation to shut it up. It turns out the implementation to shut it up is even more broken than I thought, and my "shut up most of the warnings" patch just caused fatal errors on gcc-14 instead. I had tested with clang, but when I upgrade my development environment, I try to do it on all machines because I hate having different systems to maintain, and hadn't realized that gcc-14 now had issues. The ACPI case is literally why I wanted to have a *type* that doesn't trigger the warning (see commit d5d45a7f2619: "gcc-15: make 'unterminated string initialization' just a warning"), instead of marking individual places as "__nonstring". But gcc-14 doesn't like that __nonstring location that shut gcc-15 up, because it's on an array of char arrays, not on one single array: drivers/acpi/tables.c:399:1: error: 'nonstring' attribute ignored on objects of type 'const char[][4]' [-Werror=attributes] 399 | static const char table_sigs[][ACPI_NAMESEG_SIZE] __initconst __nonstring = { | ^~~~~~ and my attempts to nest it properly with a type had failed, because of how gcc doesn't like marking the types as having attributes, only symbols. There may be some trick to it, but I was already annoyed by the bad attribute design, now I'm just entirely fed up with it. I wish gcc had a proper way to say "this type is a *byte* array, not a string". The obvious thing would be to distinguish between "char []" and an explicitly signed "unsigned char []" (as opposed to an implicitly unsigned char, which is typically an architecture-specific default, but for the kernel is universal thanks to '-funsigned-char'). But any "we can typedef a 8-bit type to not become a string just because it's an array" model would be fine. But "__attribute__((nonstring))" is sadly not that sane model. Reported-by: Chris Clayton Fixes: 4b4bd8c50f48 ("gcc-15: acpi: sprinkle random '__nonstring' crumbles around") Fixes: d5d45a7f2619 ("gcc-15: make 'unterminated string initialization' just a warning") Signed-off-by: Linus Torvalds --- Makefile | 4 ++-- drivers/acpi/tables.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 3dcad231966222..e94bbb2298c8d6 100644 --- a/Makefile +++ b/Makefile @@ -1056,8 +1056,8 @@ KBUILD_CFLAGS += $(call cc-option, -fstrict-flex-arrays=3) KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERFLOW) += $(call cc-option, -Wno-stringop-overflow) KBUILD_CFLAGS-$(CONFIG_CC_STRINGOP_OVERFLOW) += $(call cc-option, -Wstringop-overflow) -#Currently, disable -Wunterminated-string-initialization as an error -KBUILD_CFLAGS += $(call cc-option, -Wno-error=unterminated-string-initialization) +#Currently, disable -Wunterminated-string-initialization as broken +KBUILD_CFLAGS += $(call cc-option, -Wno-unterminated-string-initialization) # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += -fno-strict-overflow diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index b5205d464a8a2d..2295abbecd14f5 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -396,7 +396,7 @@ static u8 __init acpi_table_checksum(u8 *buffer, u32 length) } /* All but ACPI_SIG_RSDP and ACPI_SIG_FACS: */ -static const char table_sigs[][ACPI_NAMESEG_SIZE] __initconst __nonstring = { +static const char table_sigs[][ACPI_NAMESEG_SIZE] __initconst = { ACPI_SIG_BERT, ACPI_SIG_BGRT, ACPI_SIG_CPEP, ACPI_SIG_ECDT, ACPI_SIG_EINJ, ACPI_SIG_ERST, ACPI_SIG_HEST, ACPI_SIG_MADT, ACPI_SIG_MSCT, ACPI_SIG_SBST, ACPI_SIG_SLIT, ACPI_SIG_SRAT, From 4c0d2c67ac6d54ba71bb3438147b144c25fdee2c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 17 Apr 2025 20:30:18 -0400 Subject: [PATCH 0941/2924] bcachefs: Fix early startup error path Don't set JOURNAL_running until we're also calling journal_space_available() for the first time. If JOURNAL_running is set, shutdown will write an empty journal entry - but this will hit an assert in journal_entry_open() if we've never called journal_space_available(). Reported-by: syzbot+53bb24d476ef8368a7f0@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/journal.c | 17 +++++++++++++++-- fs/bcachefs/journal.h | 7 +------ fs/bcachefs/recovery.c | 6 +++--- fs/bcachefs/super.c | 19 +++++++++---------- 4 files changed, 28 insertions(+), 21 deletions(-) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index d8f74b6d0a75a8..84cb74ba91e621 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1462,8 +1462,6 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) j->last_empty_seq = cur_seq - 1; /* to match j->seq */ spin_lock(&j->lock); - - set_bit(JOURNAL_running, &j->flags); j->last_flush_write = jiffies; j->reservations.idx = journal_cur_seq(j); @@ -1474,6 +1472,21 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) return 0; } +void bch2_journal_set_replay_done(struct journal *j) +{ + /* + * journal_space_available must happen before setting JOURNAL_running + * JOURNAL_running must happen before JOURNAL_replay_done + */ + spin_lock(&j->lock); + bch2_journal_space_available(j); + + set_bit(JOURNAL_need_flush_write, &j->flags); + set_bit(JOURNAL_running, &j->flags); + set_bit(JOURNAL_replay_done, &j->flags); + spin_unlock(&j->lock); +} + /* init/exit: */ void bch2_dev_journal_exit(struct bch_dev *ca) diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index 47828771f9c239..641e20c05a147d 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -437,12 +437,6 @@ static inline int bch2_journal_error(struct journal *j) struct bch_dev; -static inline void bch2_journal_set_replay_done(struct journal *j) -{ - BUG_ON(!test_bit(JOURNAL_running, &j->flags)); - set_bit(JOURNAL_replay_done, &j->flags); -} - void bch2_journal_unblock(struct journal *); void bch2_journal_block(struct journal *); struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *, u64, bool *); @@ -459,6 +453,7 @@ void bch2_dev_journal_stop(struct journal *, struct bch_dev *); void bch2_fs_journal_stop(struct journal *); int bch2_fs_journal_start(struct journal *, u64); +void bch2_journal_set_replay_done(struct journal *); void bch2_dev_journal_exit(struct bch_dev *); int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 606d684e6f23fc..bea578e339886c 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -1129,13 +1129,13 @@ int bch2_fs_initialize(struct bch_fs *c) if (ret) goto err; - set_bit(BCH_FS_accounting_replay_done, &c->flags); - bch2_journal_set_replay_done(&c->journal); - ret = bch2_fs_read_write_early(c); if (ret) goto err; + set_bit(BCH_FS_accounting_replay_done, &c->flags); + bch2_journal_set_replay_done(&c->journal); + for_each_member_device(c, ca) { ret = bch2_dev_usage_init(ca, false); if (ret) { diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index e8a17ed1615d30..a8bc02540cce75 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -466,29 +466,28 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) clear_bit(BCH_FS_clean_shutdown, &c->flags); + __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) { + bch2_dev_allocator_add(c, ca); + percpu_ref_reinit(&ca->io_ref[WRITE]); + } + bch2_recalc_capacity(c); + /* * First journal write must be a flush write: after a clean shutdown we * don't read the journal, so the first journal write may end up * overwriting whatever was there previously, and there must always be * at least one non-flush write in the journal or recovery will fail: */ + spin_lock(&c->journal.lock); set_bit(JOURNAL_need_flush_write, &c->journal.flags); set_bit(JOURNAL_running, &c->journal.flags); - - __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) { - bch2_dev_allocator_add(c, ca); - percpu_ref_reinit(&ca->io_ref[WRITE]); - } - bch2_recalc_capacity(c); + bch2_journal_space_available(&c->journal); + spin_unlock(&c->journal.lock); ret = bch2_fs_mark_dirty(c); if (ret) goto err; - spin_lock(&c->journal.lock); - bch2_journal_space_available(&c->journal); - spin_unlock(&c->journal.lock); - ret = bch2_journal_reclaim_start(&c->journal); if (ret) goto err; From aa6a591f0fd740e27c54110f8425b53133ad4165 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 17 Apr 2025 22:38:14 -0400 Subject: [PATCH 0942/2924] bcachefs: Fix null ptr deref in bch2_snapshot_tree_oldest_subvol() Reported-by: syzbot+baee8591f336cab0958b@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/snapshot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index b7de29aed83917..fec569c7deb1c7 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -396,7 +396,7 @@ u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root) u32 subvol = 0, s; rcu_read_lock(); - while (id) { + while (id && bch2_snapshot_exists(c, id)) { s = snapshot_t(c, id)->subvol; if (s && (!subvol || s < subvol)) From 417f01e726036b564e2e14c39b2be58e93bf7971 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 17 Apr 2025 22:44:16 -0400 Subject: [PATCH 0943/2924] bcachefs: Error ratelimiting is no longer only during fsck We now more often do repair automatically, without the user invoking fsck - and sometimes that can involve fixing lots of errors, so let's avoid flooding the dmesg log. Signed-off-by: Kent Overstreet --- fs/bcachefs/error.c | 17 ++++++++++++----- fs/bcachefs/error.h | 1 + fs/bcachefs/super.c | 1 + 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index baf5dfb3229843..925b0b54ea2f93 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -272,9 +272,6 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c, { struct fsck_err_state *s; - if (!test_bit(BCH_FS_fsck_running, &c->flags)) - return NULL; - list_for_each_entry(s, &c->fsck_error_msgs, list) if (s->id == id) { /* @@ -639,14 +636,14 @@ int __bch2_bkey_fsck_err(struct bch_fs *c, return ret; } -void bch2_flush_fsck_errs(struct bch_fs *c) +static void __bch2_flush_fsck_errs(struct bch_fs *c, bool print) { struct fsck_err_state *s, *n; mutex_lock(&c->fsck_error_msgs_lock); list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) { - if (s->ratelimited && s->last_msg) + if (print && s->ratelimited && s->last_msg) bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg); list_del(&s->list); @@ -657,6 +654,16 @@ void bch2_flush_fsck_errs(struct bch_fs *c) mutex_unlock(&c->fsck_error_msgs_lock); } +void bch2_flush_fsck_errs(struct bch_fs *c) +{ + __bch2_flush_fsck_errs(c, true); +} + +void bch2_free_fsck_errs(struct bch_fs *c) +{ + __bch2_flush_fsck_errs(c, false); +} + int bch2_inum_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out, subvol_inum inum, u64 offset) { diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index d0d024dc714b11..4a364fd44abe0e 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -93,6 +93,7 @@ int __bch2_fsck_err(struct bch_fs *, struct btree_trans *, _flags, BCH_FSCK_ERR_##_err_type, __VA_ARGS__) void bch2_flush_fsck_errs(struct bch_fs *); +void bch2_free_fsck_errs(struct bch_fs *); #define fsck_err_wrap(_do) \ ({ \ diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index a8bc02540cce75..adbf3bbed6bce2 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -552,6 +552,7 @@ static void __bch2_fs_free(struct bch_fs *c) bch2_find_btree_nodes_exit(&c->found_btree_nodes); bch2_free_pending_node_rewrites(c); + bch2_free_fsck_errs(c); bch2_fs_accounting_exit(c); bch2_fs_sb_errors_exit(c); bch2_fs_counters_exit(c); From 71f8e806a5e4edada72456ee3b2e2d7eab6fadee Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 18 Apr 2025 12:49:00 -0400 Subject: [PATCH 0944/2924] bcachefs: Stricter checks on "key allowed in this btree" Syzbot managed to come up with a filesystem where check/repair got rather confused at finding a reflink pointer in the inodes btree. Currently, the "key allowed in this btree" checks only apply at commit time, not read time - for forwards compatibility. It seems this is too loose. Now, strict key type allowed checks apply: - at commit time (no forward compatibility issues) - for btree node pointers - if it's a known btree, known key type, and the key type has the "BKEY_TYPE_strict_btree_checks" flag. This means we still have the option of using generic key types - e.g. KEY_TYPE_error, KEY_TYPE_set - on more existing btrees in the future, while most key types that are intended for only a specific btree get stricter checks. Reported-by: syzbot+baee8591f336cab0958b@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 80 ++++++++++++++++++----------------- fs/bcachefs/bkey_methods.c | 24 +++++++++-- 2 files changed, 62 insertions(+), 42 deletions(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index a3db328dee31f5..377f04d92a1443 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -366,6 +366,10 @@ static inline void bkey_init(struct bkey *k) #define __BKEY_PADDED(key, pad) \ struct bkey_i key; __u64 key ## _pad[pad] +enum bch_bkey_type_flags { + BKEY_TYPE_strict_btree_checks = BIT(0), +}; + /* * - DELETED keys are used internally to mark keys that should be ignored but * override keys in composition order. Their version number is ignored. @@ -383,46 +387,46 @@ static inline void bkey_init(struct bkey *k) * * - WHITEOUT: for hash table btrees */ -#define BCH_BKEY_TYPES() \ - x(deleted, 0) \ - x(whiteout, 1) \ - x(error, 2) \ - x(cookie, 3) \ - x(hash_whiteout, 4) \ - x(btree_ptr, 5) \ - x(extent, 6) \ - x(reservation, 7) \ - x(inode, 8) \ - x(inode_generation, 9) \ - x(dirent, 10) \ - x(xattr, 11) \ - x(alloc, 12) \ - x(quota, 13) \ - x(stripe, 14) \ - x(reflink_p, 15) \ - x(reflink_v, 16) \ - x(inline_data, 17) \ - x(btree_ptr_v2, 18) \ - x(indirect_inline_data, 19) \ - x(alloc_v2, 20) \ - x(subvolume, 21) \ - x(snapshot, 22) \ - x(inode_v2, 23) \ - x(alloc_v3, 24) \ - x(set, 25) \ - x(lru, 26) \ - x(alloc_v4, 27) \ - x(backpointer, 28) \ - x(inode_v3, 29) \ - x(bucket_gens, 30) \ - x(snapshot_tree, 31) \ - x(logged_op_truncate, 32) \ - x(logged_op_finsert, 33) \ - x(accounting, 34) \ - x(inode_alloc_cursor, 35) +#define BCH_BKEY_TYPES() \ + x(deleted, 0, 0) \ + x(whiteout, 1, 0) \ + x(error, 2, 0) \ + x(cookie, 3, 0) \ + x(hash_whiteout, 4, BKEY_TYPE_strict_btree_checks) \ + x(btree_ptr, 5, BKEY_TYPE_strict_btree_checks) \ + x(extent, 6, BKEY_TYPE_strict_btree_checks) \ + x(reservation, 7, BKEY_TYPE_strict_btree_checks) \ + x(inode, 8, BKEY_TYPE_strict_btree_checks) \ + x(inode_generation, 9, BKEY_TYPE_strict_btree_checks) \ + x(dirent, 10, BKEY_TYPE_strict_btree_checks) \ + x(xattr, 11, BKEY_TYPE_strict_btree_checks) \ + x(alloc, 12, BKEY_TYPE_strict_btree_checks) \ + x(quota, 13, BKEY_TYPE_strict_btree_checks) \ + x(stripe, 14, BKEY_TYPE_strict_btree_checks) \ + x(reflink_p, 15, BKEY_TYPE_strict_btree_checks) \ + x(reflink_v, 16, BKEY_TYPE_strict_btree_checks) \ + x(inline_data, 17, BKEY_TYPE_strict_btree_checks) \ + x(btree_ptr_v2, 18, BKEY_TYPE_strict_btree_checks) \ + x(indirect_inline_data, 19, BKEY_TYPE_strict_btree_checks) \ + x(alloc_v2, 20, BKEY_TYPE_strict_btree_checks) \ + x(subvolume, 21, BKEY_TYPE_strict_btree_checks) \ + x(snapshot, 22, BKEY_TYPE_strict_btree_checks) \ + x(inode_v2, 23, BKEY_TYPE_strict_btree_checks) \ + x(alloc_v3, 24, BKEY_TYPE_strict_btree_checks) \ + x(set, 25, 0) \ + x(lru, 26, BKEY_TYPE_strict_btree_checks) \ + x(alloc_v4, 27, BKEY_TYPE_strict_btree_checks) \ + x(backpointer, 28, BKEY_TYPE_strict_btree_checks) \ + x(inode_v3, 29, BKEY_TYPE_strict_btree_checks) \ + x(bucket_gens, 30, BKEY_TYPE_strict_btree_checks) \ + x(snapshot_tree, 31, BKEY_TYPE_strict_btree_checks) \ + x(logged_op_truncate, 32, BKEY_TYPE_strict_btree_checks) \ + x(logged_op_finsert, 33, BKEY_TYPE_strict_btree_checks) \ + x(accounting, 34, BKEY_TYPE_strict_btree_checks) \ + x(inode_alloc_cursor, 35, BKEY_TYPE_strict_btree_checks) enum bch_bkey_type { -#define x(name, nr) KEY_TYPE_##name = nr, +#define x(name, nr, ...) KEY_TYPE_##name = nr, BCH_BKEY_TYPES() #undef x KEY_TYPE_MAX, diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 15c93576b5c204..00d05ccfaf73bb 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -21,7 +21,7 @@ #include "xattr.h" const char * const bch2_bkey_types[] = { -#define x(name, nr) #name, +#define x(name, nr, ...) #name, BCH_BKEY_TYPES() #undef x NULL @@ -115,7 +115,7 @@ static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_ }) const struct bkey_ops bch2_bkey_ops[] = { -#define x(name, nr) [KEY_TYPE_##name] = bch2_bkey_ops_##name, +#define x(name, nr, ...) [KEY_TYPE_##name] = bch2_bkey_ops_##name, BCH_BKEY_TYPES() #undef x }; @@ -155,6 +155,12 @@ static u64 bch2_key_types_allowed[] = { #undef x }; +static const enum bch_bkey_type_flags bch2_bkey_type_flags[] = { +#define x(name, nr, flags) [KEY_TYPE_##name] = flags, + BCH_BKEY_TYPES() +#undef x +}; + const char *bch2_btree_node_type_str(enum btree_node_type type) { return type == BKEY_TYPE_btree ? "internal btree node" : bch2_btree_id_str(type - 1); @@ -177,8 +183,18 @@ int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, if (type >= BKEY_TYPE_NR) return 0; - bkey_fsck_err_on(k.k->type < KEY_TYPE_MAX && - (type == BKEY_TYPE_btree || (from.flags & BCH_VALIDATE_commit)) && + enum bch_bkey_type_flags bkey_flags = k.k->type < KEY_TYPE_MAX + ? bch2_bkey_type_flags[k.k->type] + : 0; + + bool strict_key_type_allowed = + (from.flags & BCH_VALIDATE_commit) || + type == BKEY_TYPE_btree || + (from.btree < BTREE_ID_NR && + (bkey_flags & BKEY_TYPE_strict_btree_checks)); + + bkey_fsck_err_on(strict_key_type_allowed && + k.k->type < KEY_TYPE_MAX && !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, bkey_invalid_type_for_btree, "invalid key type for btree %s (%s)", From 6468aef231890806ccc4e921b111ff9275880832 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 18 Apr 2025 16:42:50 -0400 Subject: [PATCH 0945/2924] bcachefs: Ensure journal space is block size aligned We don't require that bucket size is block size aligned (although it should be!) - so we need to handle this in the journal code. This fixes an assertion pop in jorunal_entry_close(), where the journal entry overruns available space - after rounding it up to block size. Fixes: https://github.com/koverstreet/bcachefs/issues/854 Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_reclaim.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 5d1547aa118ac8..ea670c3c43d8a0 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -252,7 +252,10 @@ void bch2_journal_space_available(struct journal *j) bch2_journal_set_watermark(j); out: - j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0; + j->cur_entry_sectors = !ret + ? round_down(j->space[journal_space_discarded].next_entry, + block_sectors(c)) + : 0; j->cur_entry_error = ret; if (!ret) From 4c327d03d7c9d5e815a2aada112c442e4a2f8665 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 18 Apr 2025 13:38:23 -0400 Subject: [PATCH 0946/2924] bcachefs: Change __journal_entry_close() assert to ERO We've got some reports of this happening in the wild, and need a bit more info to debug it: https://github.com/koverstreet/bcachefs/issues/854 https://www.reddit.com/r/bcachefs/comments/1k28kjm/surprise_soft_lockup/ Signed-off-by: Kent Overstreet --- fs/bcachefs/journal.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 84cb74ba91e621..bb45d363419488 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -281,7 +281,24 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t sectors = vstruct_blocks_plus(buf->data, c->block_bits, buf->u64s_reserved) << c->block_bits; - BUG_ON(sectors > buf->sectors); + if (unlikely(sectors > buf->sectors)) { + struct printbuf err = PRINTBUF; + err.atomic++; + + prt_printf(&err, "journal entry overran reserved space: %u > %u\n", + sectors, buf->sectors); + prt_printf(&err, "buf u64s %u u64s reserved %u cur_entry_u64s %u block_bits %u\n", + le32_to_cpu(buf->data->u64s), buf->u64s_reserved, + j->cur_entry_u64s, + c->block_bits); + prt_printf(&err, "fatal error - emergency read only"); + bch2_journal_halt_locked(j); + + bch_err(c, "%s", err.buf); + printbuf_exit(&err); + return; + } + buf->sectors = sectors; /* From bfbb76ec9808ce80e661dae77018b77488bb56d0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 19 Apr 2025 02:50:29 -0400 Subject: [PATCH 0947/2924] bcachefs: Fix ref leak in write_super() found with the new enumerated_ref code Signed-off-by: Kent Overstreet --- fs/bcachefs/super-io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 25b6bce05c3cce..cb5d960aed92a6 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -1102,7 +1102,8 @@ int bch2_write_super(struct bch_fs *c) prt_str(&buf, ")"); bch2_fs_fatal_error(c, ": %s", buf.buf); printbuf_exit(&buf); - return -BCH_ERR_sb_not_downgraded; + ret = -BCH_ERR_sb_not_downgraded; + goto out; } darray_for_each(online_devices, ca) { From 10e42b6f25995c6ce7c462e1bcb9684acc0f09a0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 15 Apr 2025 10:26:05 -0400 Subject: [PATCH 0948/2924] bcachefs: bch2_copygc_wakeup() Signed-off-by: Kent Overstreet --- fs/bcachefs/movinggc.h | 9 +++++++++ fs/bcachefs/rebalance.c | 4 ++-- fs/bcachefs/rebalance.h | 2 +- fs/bcachefs/super.c | 4 ++-- fs/bcachefs/sysfs.c | 7 +++---- 5 files changed, 17 insertions(+), 9 deletions(-) diff --git a/fs/bcachefs/movinggc.h b/fs/bcachefs/movinggc.h index ea181fef5bc932..d1885cf67a4574 100644 --- a/fs/bcachefs/movinggc.h +++ b/fs/bcachefs/movinggc.h @@ -5,6 +5,15 @@ unsigned long bch2_copygc_wait_amount(struct bch_fs *); void bch2_copygc_wait_to_text(struct printbuf *, struct bch_fs *); +static inline void bch2_copygc_wakeup(struct bch_fs *c) +{ + rcu_read_lock(); + struct task_struct *p = rcu_dereference(c->copygc_thread); + if (p) + wake_up_process(p); + rcu_read_unlock(); +} + void bch2_copygc_stop(struct bch_fs *); int bch2_copygc_start(struct bch_fs *); void bch2_fs_copygc_init(struct bch_fs *); diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index c63fa53f30d219..39006e6affe3db 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -262,7 +262,7 @@ int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum) int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_set_rebalance_needs_scan_trans(trans, inum)); - rebalance_wakeup(c); + bch2_rebalance_wakeup(c); return ret; } @@ -664,7 +664,7 @@ void bch2_rebalance_stop(struct bch_fs *c) c->rebalance.thread = NULL; if (p) { - /* for sychronizing with rebalance_wakeup() */ + /* for sychronizing with bch2_rebalance_wakeup() */ synchronize_rcu(); kthread_stop(p); diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h index 62a3859d3823f3..e5e8eb4a2dd150 100644 --- a/fs/bcachefs/rebalance.h +++ b/fs/bcachefs/rebalance.h @@ -37,7 +37,7 @@ int bch2_set_rebalance_needs_scan_trans(struct btree_trans *, u64); int bch2_set_rebalance_needs_scan(struct bch_fs *, u64 inum); int bch2_set_fs_needs_rebalance(struct bch_fs *); -static inline void rebalance_wakeup(struct bch_fs *c) +static inline void bch2_rebalance_wakeup(struct bch_fs *c) { struct task_struct *p; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index adbf3bbed6bce2..ee27734f350f8e 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1500,7 +1500,7 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb) printbuf_exit(&name); - rebalance_wakeup(c); + bch2_rebalance_wakeup(c); return 0; } @@ -1646,7 +1646,7 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, if (new_state == BCH_MEMBER_STATE_rw) __bch2_dev_read_write(c, ca); - rebalance_wakeup(c); + bch2_rebalance_wakeup(c); return ret; } diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index e5f003c29369e0..82ee333ddd216c 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -654,11 +654,10 @@ static ssize_t sysfs_opt_store(struct bch_fs *c, bch2_set_rebalance_needs_scan(c, 0); if (v && id == Opt_rebalance_enabled) - rebalance_wakeup(c); + bch2_rebalance_wakeup(c); - if (v && id == Opt_copygc_enabled && - c->copygc_thread) - wake_up_process(c->copygc_thread); + if (v && id == Opt_copygc_enabled) + bch2_copygc_wakeup(c); if (id == Opt_discard && !ca) { mutex_lock(&c->sb_lock); From e4570f4bb231f01e32d44fd38841665f340d6914 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 18 Apr 2025 11:17:13 -0500 Subject: [PATCH 0949/2924] iio: imu: adis16550: align buffers for timestamp Align the buffers used with iio_push_to_buffers_with_timestamp() to ensure the s64 timestamp is aligned to 8 bytes. Fixes: bac4368fab62 ("iio: imu: adis16550: add adis16550 support") Signed-off-by: David Lechner Link: https://patch.msgid.link/20250418-iio-more-timestamp-alignment-v2-1-d6a5d2b1c9fe@baylibre.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/imu/adis16550.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/imu/adis16550.c b/drivers/iio/imu/adis16550.c index b14ea8937c7f5a..28f0dbd0226cbe 100644 --- a/drivers/iio/imu/adis16550.c +++ b/drivers/iio/imu/adis16550.c @@ -836,7 +836,7 @@ static irqreturn_t adis16550_trigger_handler(int irq, void *p) u16 dummy; bool valid; struct iio_poll_func *pf = p; - __be32 data[ADIS16550_MAX_SCAN_DATA]; + __be32 data[ADIS16550_MAX_SCAN_DATA] __aligned(8); struct iio_dev *indio_dev = pf->indio_dev; struct adis16550 *st = iio_priv(indio_dev); struct adis *adis = iio_device_get_drvdata(indio_dev); From ffcd19e9f4cca0c8f9e23e88f968711acefbb37b Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 18 Apr 2025 11:17:14 -0500 Subject: [PATCH 0950/2924] iio: pressure: mprls0025pa: use aligned_s64 for timestamp Follow the pattern of other drivers and use aligned_s64 for the timestamp. This will ensure the struct itself it also 8-byte aligned. While touching this, convert struct mpr_chan to an anonymous struct to consolidate the code a bit to make it easier for future readers. Fixes: 713337d9143e ("iio: pressure: Honeywell mprls0025pa pressure sensor") Signed-off-by: David Lechner Link: https://patch.msgid.link/20250418-iio-more-timestamp-alignment-v2-2-d6a5d2b1c9fe@baylibre.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/mprls0025pa.h | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/iio/pressure/mprls0025pa.h b/drivers/iio/pressure/mprls0025pa.h index 9d5c30afa9d69a..d62a018eaff32b 100644 --- a/drivers/iio/pressure/mprls0025pa.h +++ b/drivers/iio/pressure/mprls0025pa.h @@ -34,16 +34,6 @@ struct iio_dev; struct mpr_data; struct mpr_ops; -/** - * struct mpr_chan - * @pres: pressure value - * @ts: timestamp - */ -struct mpr_chan { - s32 pres; - s64 ts; -}; - enum mpr_func_id { MPR_FUNCTION_A, MPR_FUNCTION_B, @@ -69,6 +59,8 @@ enum mpr_func_id { * reading in a loop until data is ready * @completion: handshake from irq to read * @chan: channel values for buffered mode + * @chan.pres: pressure value + * @chan.ts: timestamp * @buffer: raw conversion data */ struct mpr_data { @@ -87,7 +79,10 @@ struct mpr_data { struct gpio_desc *gpiod_reset; int irq; struct completion completion; - struct mpr_chan chan; + struct { + s32 pres; + aligned_s64 ts; + } chan; u8 buffer[MPR_MEASUREMENT_RD_SIZE] __aligned(IIO_DMA_MINALIGN); }; From f083f8a21cc785ebe3a33f756a3fa3660611f8db Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Fri, 18 Apr 2025 20:37:53 +0200 Subject: [PATCH 0951/2924] iio: adc: ad7606: fix serial register access Fix register read/write routine as per datasheet. When reading multiple consecutive registers, only the first one is read properly. This is due to missing chip select deassert and assert again between first and second 16bit transfer, as shown in the datasheet AD7606C-16, rev 0, figure 110. Fixes: f2a22e1e172f ("iio: adc: ad7606: Add support for software mode for ad7616") Reviewed-by: David Lechner Signed-off-by: Angelo Dureghello Link: https://patch.msgid.link/20250418-wip-bl-ad7606-fix-reg-access-v3-1-d5eeb440c738@baylibre.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7606_spi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad7606_spi.c b/drivers/iio/adc/ad7606_spi.c index 885bf0b68e7775..179115e909888b 100644 --- a/drivers/iio/adc/ad7606_spi.c +++ b/drivers/iio/adc/ad7606_spi.c @@ -131,7 +131,7 @@ static int ad7606_spi_reg_read(struct ad7606_state *st, unsigned int addr) { .tx_buf = &st->d16[0], .len = 2, - .cs_change = 0, + .cs_change = 1, }, { .rx_buf = &st->d16[1], .len = 2, From 609bc31eca06c7408e6860d8b46311ebe45c1fef Mon Sep 17 00:00:00 2001 From: Gabriel Shahrouzi Date: Mon, 21 Apr 2025 09:15:39 -0400 Subject: [PATCH 0952/2924] iio: adis16201: Correct inclinometer channel resolution The inclinometer channels were previously defined with 14 realbits. However, the ADIS16201 datasheet states the resolution for these output channels is 12 bits (Page 14, text description; Page 15, table 7). Correct the realbits value to 12 to accurately reflect the hardware. Fixes: f7fe1d1dd5a5 ("staging: iio: new adis16201 driver") Cc: stable@vger.kernel.org Signed-off-by: Gabriel Shahrouzi Reviewed-by: Marcelo Schmitt Link: https://patch.msgid.link/20250421131539.912966-1-gshahrouzi@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/adis16201.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/accel/adis16201.c b/drivers/iio/accel/adis16201.c index 8601b9a8b8e75c..5127e58eebc7d9 100644 --- a/drivers/iio/accel/adis16201.c +++ b/drivers/iio/accel/adis16201.c @@ -211,9 +211,9 @@ static const struct iio_chan_spec adis16201_channels[] = { BIT(IIO_CHAN_INFO_CALIBBIAS), 0, 14), ADIS_AUX_ADC_CHAN(ADIS16201_AUX_ADC_REG, ADIS16201_SCAN_AUX_ADC, 0, 12), ADIS_INCLI_CHAN(X, ADIS16201_XINCL_OUT_REG, ADIS16201_SCAN_INCLI_X, - BIT(IIO_CHAN_INFO_CALIBBIAS), 0, 14), + BIT(IIO_CHAN_INFO_CALIBBIAS), 0, 12), ADIS_INCLI_CHAN(Y, ADIS16201_YINCL_OUT_REG, ADIS16201_SCAN_INCLI_Y, - BIT(IIO_CHAN_INFO_CALIBBIAS), 0, 14), + BIT(IIO_CHAN_INFO_CALIBBIAS), 0, 12), IIO_CHAN_SOFT_TIMESTAMP(7) }; From 078d3ee7c162cd66d76171579c02d7890bd77daf Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Mon, 7 Apr 2025 19:27:34 +0000 Subject: [PATCH 0953/2924] cxl/core/regs.c: Skip Memory Space Enable check for RCD and RCH Ports According to CXL r3.2 section 8.2.1.2, the PCI_COMMAND register fields, including Memory Space Enable bit, have no effect on the behavior of an RCD Upstream Port. Retaining this check may incorrectly cause cxl_pci_probe() to fail on a valid RCD upstream Port. While the specification is explicit only for RCD Upstream Ports, this check is solely for accessing the RCRB, which is always mapped through memory space. Therefore, its safe to remove the check entirely. In practice, firmware reliably enables the Memory Space Enable bit for RCH Downstream Ports and no failures have been observed. Removing the check simplifies the code and avoids unnecessary special-casing, while relying on BIOS/firmware to configure devices correctly. Moreover, any failures due to inaccessible RCRB regions will still be caught either in __rcrb_to_component() or while parsing the component register block. The following failure was observed in dmesg when the check was present: cxl_pci 0000:7f:00.0: No component registers (-6) Fixes: d5b1a27143cb ("cxl/acpi: Extract component registers of restricted hosts from RCRB") Signed-off-by: Smita Koralahalli Cc: Reviewed-by: Ira Weiny Reviewed-by: Terry Bowman Reviewed-by: Dave Jiang Reviewed-by: Robert Richter Link: https://patch.msgid.link/20250407192734.70631-1-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Dave Jiang --- drivers/cxl/core/regs.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index 117c2e94c761d9..5ca7b0eed568b3 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -581,7 +581,6 @@ resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri resource_size_t rcrb = ri->base; void __iomem *addr; u32 bar0, bar1; - u16 cmd; u32 id; if (which == CXL_RCRB_UPSTREAM) @@ -603,7 +602,6 @@ resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri } id = readl(addr + PCI_VENDOR_ID); - cmd = readw(addr + PCI_COMMAND); bar0 = readl(addr + PCI_BASE_ADDRESS_0); bar1 = readl(addr + PCI_BASE_ADDRESS_1); iounmap(addr); @@ -618,8 +616,6 @@ resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri dev_err(dev, "Failed to access Downstream Port RCRB\n"); return CXL_RESOURCE_NONE; } - if (!(cmd & PCI_COMMAND_MEMORY)) - return CXL_RESOURCE_NONE; /* The RCRB is a Memory Window, and the MEM_TYPE_1M bit is obsolete */ if (bar0 & (PCI_BASE_ADDRESS_MEM_TYPE_1M | PCI_BASE_ADDRESS_SPACE_IO)) return CXL_RESOURCE_NONE; From d64e8e842bb18403d03a85b29caa5cb5f3bd4c7d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 21 Apr 2025 11:52:35 -0400 Subject: [PATCH 0954/2924] bcachefs: Refactor bch2_run_recovery_passes() Don't use a continue; this simplifies the next patch where run_recovery_passes() will be responsible for waking up copygc and rebalance at the appropriate time. Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery_passes.c | 63 +++++++++++++++++------------------ 1 file changed, 30 insertions(+), 33 deletions(-) diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index 593ff142530dfb..8f769804cb5906 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -12,6 +12,7 @@ #include "journal.h" #include "lru.h" #include "logged_ops.h" +#include "movinggc.h" #include "rebalance.h" #include "recovery.h" #include "recovery_passes.h" @@ -262,49 +263,45 @@ int bch2_run_recovery_passes(struct bch_fs *c) */ c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw; - while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) { - c->next_recovery_pass = c->curr_recovery_pass + 1; + spin_lock_irq(&c->recovery_pass_lock); - spin_lock_irq(&c->recovery_pass_lock); + while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) { unsigned pass = c->curr_recovery_pass; + c->next_recovery_pass = pass + 1; + if (c->opts.recovery_pass_last && - c->curr_recovery_pass > c->opts.recovery_pass_last) { - spin_unlock_irq(&c->recovery_pass_lock); + c->curr_recovery_pass > c->opts.recovery_pass_last) break; - } - if (!should_run_recovery_pass(c, pass)) { - c->curr_recovery_pass++; - c->recovery_pass_done = max(c->recovery_pass_done, pass); + if (should_run_recovery_pass(c, pass)) { spin_unlock_irq(&c->recovery_pass_lock); - continue; - } - spin_unlock_irq(&c->recovery_pass_lock); - - ret = bch2_run_recovery_pass(c, pass) ?: - bch2_journal_flush(&c->journal); - - if (!ret && !test_bit(BCH_FS_error, &c->flags)) - bch2_clear_recovery_pass_required(c, pass); - - spin_lock_irq(&c->recovery_pass_lock); - if (c->next_recovery_pass < c->curr_recovery_pass) { - /* - * bch2_run_explicit_recovery_pass() was called: we - * can't always catch -BCH_ERR_restart_recovery because - * it may have been called from another thread (btree - * node read completion) - */ - ret = 0; - c->recovery_passes_complete &= ~(~0ULL << c->curr_recovery_pass); - } else { - c->recovery_passes_complete |= BIT_ULL(pass); - c->recovery_pass_done = max(c->recovery_pass_done, pass); + ret = bch2_run_recovery_pass(c, pass) ?: + bch2_journal_flush(&c->journal); + + if (!ret && !test_bit(BCH_FS_error, &c->flags)) + bch2_clear_recovery_pass_required(c, pass); + spin_lock_irq(&c->recovery_pass_lock); + + if (c->next_recovery_pass < c->curr_recovery_pass) { + /* + * bch2_run_explicit_recovery_pass() was called: we + * can't always catch -BCH_ERR_restart_recovery because + * it may have been called from another thread (btree + * node read completion) + */ + ret = 0; + c->recovery_passes_complete &= ~(~0ULL << c->curr_recovery_pass); + } else { + c->recovery_passes_complete |= BIT_ULL(pass); + c->recovery_pass_done = max(c->recovery_pass_done, pass); + } } + c->curr_recovery_pass = c->next_recovery_pass; - spin_unlock_irq(&c->recovery_pass_lock); } + spin_unlock_irq(&c->recovery_pass_lock); + return ret; } From 387df331298eaa9d6dbb4e30f376d632dd798b46 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 13 Apr 2025 06:44:23 -0400 Subject: [PATCH 0955/2924] bcachefs: Start copygc, rebalance threads earlier Previously, copygc and rebalance weren't started until the very end of mounting, after all recvoery passes have finished. But copygc really should be started earlier, since it may be needed for allocations to make forward progress. Additionally, we've been seeing occasional bug reports where starting the kthread fails due to a pending signal - i.e. we're getting timed out by systemd (during a version upgrade), but we're not seeing the signal until mount is about to complete. Additionally, we now have copygc/rebalance explicitly wait for check_snapshots to complete (if being run); they require that for snapshot_is_ancestor() in the data move path. Signed-off-by: Kent Overstreet --- fs/bcachefs/movinggc.c | 7 +++++ fs/bcachefs/rebalance.c | 7 +++++ fs/bcachefs/recovery.c | 4 +++ fs/bcachefs/recovery_passes.c | 7 +++++ fs/bcachefs/super.c | 50 ++++++++++------------------------- 5 files changed, 39 insertions(+), 36 deletions(-) diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 159410c50861b7..96873372b51600 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -356,6 +356,13 @@ static int bch2_copygc_thread(void *arg) set_freezable(); + /* + * Data move operations can't run until after check_snapshots has + * completed, and bch2_snapshot_is_ancestor() is available. + */ + kthread_wait_freezable(c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots || + kthread_should_stop()); + bch2_move_stats_init(&move_stats, "copygc"); bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats, writepoint_ptr(&c->copygc_write_point), diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 39006e6affe3db..4ccdfc1f34aa35 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -581,6 +581,13 @@ static int bch2_rebalance_thread(void *arg) set_freezable(); + /* + * Data move operations can't run until after check_snapshots has + * completed, and bch2_snapshot_is_ancestor() is available. + */ + kthread_wait_freezable(c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots || + kthread_should_stop()); + bch2_moving_ctxt_init(&ctxt, c, NULL, &r->work_stats, writepoint_ptr(&c->rebalance_write_point), true); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index bea578e339886c..d6c4ef819d40ac 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -18,6 +18,7 @@ #include "journal_seq_blacklist.h" #include "logged_ops.h" #include "move.h" +#include "movinggc.h" #include "namei.h" #include "quota.h" #include "rebalance.h" @@ -1194,6 +1195,9 @@ int bch2_fs_initialize(struct bch_fs *c) c->recovery_pass_done = BCH_RECOVERY_PASS_NR - 1; + bch2_copygc_wakeup(c); + bch2_rebalance_wakeup(c); + if (enabled_qtypes(c)) { ret = bch2_fs_quota_read(c); if (ret) diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index 8f769804cb5906..22f72bb5b8536b 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -266,6 +266,7 @@ int bch2_run_recovery_passes(struct bch_fs *c) spin_lock_irq(&c->recovery_pass_lock); while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) { + unsigned prev_done = c->recovery_pass_done; unsigned pass = c->curr_recovery_pass; c->next_recovery_pass = pass + 1; @@ -299,6 +300,12 @@ int bch2_run_recovery_passes(struct bch_fs *c) } c->curr_recovery_pass = c->next_recovery_pass; + + if (prev_done <= BCH_RECOVERY_PASS_check_snapshots && + c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots) { + bch2_copygc_wakeup(c); + bch2_rebalance_wakeup(c); + } } spin_unlock_irq(&c->recovery_pass_lock); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index ee27734f350f8e..4060af4692f928 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -418,32 +418,6 @@ bool bch2_fs_emergency_read_only_locked(struct bch_fs *c) return ret; } -static int bch2_fs_read_write_late(struct bch_fs *c) -{ - int ret; - - /* - * Data move operations can't run until after check_snapshots has - * completed, and bch2_snapshot_is_ancestor() is available. - * - * Ideally we'd start copygc/rebalance earlier instead of waiting for - * all of recovery/fsck to complete: - */ - ret = bch2_copygc_start(c); - if (ret) { - bch_err(c, "error starting copygc thread"); - return ret; - } - - ret = bch2_rebalance_start(c); - if (ret) { - bch_err(c, "error starting rebalance thread"); - return ret; - } - - return 0; -} - static int __bch2_fs_read_write(struct bch_fs *c, bool early) { int ret; @@ -503,10 +477,17 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) atomic_long_inc(&c->writes[i]); } #endif - if (!early) { - ret = bch2_fs_read_write_late(c); - if (ret) - goto err; + + ret = bch2_copygc_start(c); + if (ret) { + bch_err_msg(c, ret, "error starting copygc thread"); + goto err; + } + + ret = bch2_rebalance_start(c); + if (ret) { + bch_err_msg(c, ret, "error starting rebalance thread"); + goto err; } bch2_do_discards(c); @@ -1082,13 +1063,10 @@ int bch2_fs_start(struct bch_fs *c) wake_up(&c->ro_ref_wait); down_write(&c->state_lock); - if (c->opts.read_only) { + if (c->opts.read_only) bch2_fs_read_only(c); - } else { - ret = !test_bit(BCH_FS_rw, &c->flags) - ? bch2_fs_read_write(c) - : bch2_fs_read_write_late(c); - } + else if (!test_bit(BCH_FS_rw, &c->flags)) + ret = bch2_fs_read_write(c); up_write(&c->state_lock); err: From 4ede80a9a860968f9e63c6b9262683d3139194e0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 21 Apr 2025 18:37:12 -0400 Subject: [PATCH 0956/2924] bcachefs: Allocator now copes with unaligned buckets We had a buggy release of bcachefs-tools that wasn't properly aligning bucket sizes. We can't ask users to reformat - and it's easy to teach the allocator to make sure writes are properly aligned. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 2 ++ fs/bcachefs/alloc_foreground.h | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 7c930ef7738040..effafc3e0ced44 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1425,6 +1425,8 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans, open_bucket_for_each(c, &wp->ptrs, ob, i) wp->sectors_free = min(wp->sectors_free, ob->sectors_free); + wp->sectors_free = rounddown(wp->sectors_free, block_sectors(c)); + BUG_ON(!wp->sectors_free || wp->sectors_free == UINT_MAX); return 0; diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h index 69ec6a012898e6..4c1e33cf57c03b 100644 --- a/fs/bcachefs/alloc_foreground.h +++ b/fs/bcachefs/alloc_foreground.h @@ -110,7 +110,9 @@ static inline void bch2_alloc_sectors_done_inlined(struct bch_fs *c, struct writ unsigned i; open_bucket_for_each(c, &wp->ptrs, ob, i) - ob_push(c, !ob->sectors_free ? &ptrs : &keep, ob); + ob_push(c, ob->sectors_free < block_sectors(c) + ? &ptrs + : &keep, ob); wp->ptrs = keep; mutex_unlock(&wp->lock); From 7a4a86618eb7bceac17878e484a8f98da1395d68 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 28 Mar 2025 12:26:24 -0400 Subject: [PATCH 0957/2924] bcachefs: Implement fileattr_(get|set) inode_operations.fileattr_(get|set) didn't exist when the various flag ioctls where implemented - but they do now, which means we can delete a bunch of ioctl code in favor of standard VFS level wrappers. Closes: https://lore.kernel.org/linux-bcachefs/7ltgrgqgfummyrlvw7hnfhnu42rfiamoq3lpcvrjnlyytldmzp@yazbhusnztqn/ Cc: Petr Vorel Cc: Andrea Cervesato Cc: Dave Chinner Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-ioctl.c | 217 ----------------------------------------- fs/bcachefs/fs-ioctl.h | 75 -------------- fs/bcachefs/fs.c | 173 ++++++++++++++++++++++++++++++++ fs/bcachefs/util.h | 38 ++++++++ 4 files changed, 211 insertions(+), 292 deletions(-) diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index 14886e1d4d6dff..a82dfce9e4ad37 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -21,206 +21,6 @@ #define FSOP_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */ #define FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ -struct flags_set { - unsigned mask; - unsigned flags; - - unsigned projid; - - bool set_projinherit; - bool projinherit; -}; - -static int bch2_inode_flags_set(struct btree_trans *trans, - struct bch_inode_info *inode, - struct bch_inode_unpacked *bi, - void *p) -{ - struct bch_fs *c = inode->v.i_sb->s_fs_info; - /* - * We're relying on btree locking here for exclusion with other ioctl - * calls - use the flags in the btree (@bi), not inode->i_flags: - */ - struct flags_set *s = p; - unsigned newflags = s->flags; - unsigned oldflags = bi->bi_flags & s->mask; - - if (((newflags ^ oldflags) & (BCH_INODE_append|BCH_INODE_immutable)) && - !capable(CAP_LINUX_IMMUTABLE)) - return -EPERM; - - if (!S_ISREG(bi->bi_mode) && - !S_ISDIR(bi->bi_mode) && - (newflags & (BCH_INODE_nodump|BCH_INODE_noatime)) != newflags) - return -EINVAL; - - if ((newflags ^ oldflags) & BCH_INODE_casefolded) { -#ifdef CONFIG_UNICODE - int ret = 0; - /* Not supported on individual files. */ - if (!S_ISDIR(bi->bi_mode)) - return -EOPNOTSUPP; - - /* - * Make sure the dir is empty, as otherwise we'd need to - * rehash everything and update the dirent keys. - */ - ret = bch2_empty_dir_trans(trans, inode_inum(inode)); - if (ret < 0) - return ret; - - ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding); - if (ret) - return ret; - - bch2_check_set_feature(c, BCH_FEATURE_casefolding); -#else - printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n"); - return -EOPNOTSUPP; -#endif - } - - if (s->set_projinherit) { - bi->bi_fields_set &= ~(1 << Inode_opt_project); - bi->bi_fields_set |= ((int) s->projinherit << Inode_opt_project); - } - - bi->bi_flags &= ~s->mask; - bi->bi_flags |= newflags; - - bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v)); - return 0; -} - -static int bch2_ioc_getflags(struct bch_inode_info *inode, int __user *arg) -{ - unsigned flags = map_flags(bch_flags_to_uflags, inode->ei_inode.bi_flags); - - return put_user(flags, arg); -} - -static int bch2_ioc_setflags(struct bch_fs *c, - struct file *file, - struct bch_inode_info *inode, - void __user *arg) -{ - struct flags_set s = { .mask = map_defined(bch_flags_to_uflags) }; - unsigned uflags; - int ret; - - if (get_user(uflags, (int __user *) arg)) - return -EFAULT; - - s.flags = map_flags_rev(bch_flags_to_uflags, uflags); - if (uflags) - return -EOPNOTSUPP; - - ret = mnt_want_write_file(file); - if (ret) - return ret; - - inode_lock(&inode->v); - if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) { - ret = -EACCES; - goto setflags_out; - } - - mutex_lock(&inode->ei_update_lock); - ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?: - bch2_write_inode(c, inode, bch2_inode_flags_set, &s, - ATTR_CTIME); - mutex_unlock(&inode->ei_update_lock); - -setflags_out: - inode_unlock(&inode->v); - mnt_drop_write_file(file); - return ret; -} - -static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode, - struct fsxattr __user *arg) -{ - struct fsxattr fa = { 0 }; - - fa.fsx_xflags = map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags); - - if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project)) - fa.fsx_xflags |= FS_XFLAG_PROJINHERIT; - - fa.fsx_projid = inode->ei_qid.q[QTYP_PRJ]; - - if (copy_to_user(arg, &fa, sizeof(fa))) - return -EFAULT; - - return 0; -} - -static int fssetxattr_inode_update_fn(struct btree_trans *trans, - struct bch_inode_info *inode, - struct bch_inode_unpacked *bi, - void *p) -{ - struct flags_set *s = p; - - if (s->projid != bi->bi_project) { - bi->bi_fields_set |= 1U << Inode_opt_project; - bi->bi_project = s->projid; - } - - return bch2_inode_flags_set(trans, inode, bi, p); -} - -static int bch2_ioc_fssetxattr(struct bch_fs *c, - struct file *file, - struct bch_inode_info *inode, - struct fsxattr __user *arg) -{ - struct flags_set s = { .mask = map_defined(bch_flags_to_xflags) }; - struct fsxattr fa; - int ret; - - if (copy_from_user(&fa, arg, sizeof(fa))) - return -EFAULT; - - s.set_projinherit = true; - s.projinherit = (fa.fsx_xflags & FS_XFLAG_PROJINHERIT) != 0; - fa.fsx_xflags &= ~FS_XFLAG_PROJINHERIT; - - s.flags = map_flags_rev(bch_flags_to_xflags, fa.fsx_xflags); - if (fa.fsx_xflags) - return -EOPNOTSUPP; - - if (fa.fsx_projid >= U32_MAX) - return -EINVAL; - - /* - * inode fields accessible via the xattr interface are stored with a +1 - * bias, so that 0 means unset: - */ - s.projid = fa.fsx_projid + 1; - - ret = mnt_want_write_file(file); - if (ret) - return ret; - - inode_lock(&inode->v); - if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) { - ret = -EACCES; - goto err; - } - - mutex_lock(&inode->ei_update_lock); - ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?: - bch2_set_projid(c, inode, fa.fsx_projid) ?: - bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s, - ATTR_CTIME); - mutex_unlock(&inode->ei_update_lock); -err: - inode_unlock(&inode->v); - mnt_drop_write_file(file); - return ret; -} - static int bch2_reinherit_attrs_fn(struct btree_trans *trans, struct bch_inode_info *inode, struct bch_inode_unpacked *bi, @@ -558,23 +358,6 @@ long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg) long ret; switch (cmd) { - case FS_IOC_GETFLAGS: - ret = bch2_ioc_getflags(inode, (int __user *) arg); - break; - - case FS_IOC_SETFLAGS: - ret = bch2_ioc_setflags(c, file, inode, (int __user *) arg); - break; - - case FS_IOC_FSGETXATTR: - ret = bch2_ioc_fsgetxattr(inode, (void __user *) arg); - break; - - case FS_IOC_FSSETXATTR: - ret = bch2_ioc_fssetxattr(c, file, inode, - (void __user *) arg); - break; - case BCHFS_IOC_REINHERIT_ATTRS: ret = bch2_ioc_reinherit_attrs(c, file, inode, (void __user *) arg); diff --git a/fs/bcachefs/fs-ioctl.h b/fs/bcachefs/fs-ioctl.h index ecd3bfdcde2183..a657e4994b7153 100644 --- a/fs/bcachefs/fs-ioctl.h +++ b/fs/bcachefs/fs-ioctl.h @@ -2,81 +2,6 @@ #ifndef _BCACHEFS_FS_IOCTL_H #define _BCACHEFS_FS_IOCTL_H -/* Inode flags: */ - -/* bcachefs inode flags -> vfs inode flags: */ -static const __maybe_unused unsigned bch_flags_to_vfs[] = { - [__BCH_INODE_sync] = S_SYNC, - [__BCH_INODE_immutable] = S_IMMUTABLE, - [__BCH_INODE_append] = S_APPEND, - [__BCH_INODE_noatime] = S_NOATIME, - [__BCH_INODE_casefolded] = S_CASEFOLD, -}; - -/* bcachefs inode flags -> FS_IOC_GETFLAGS: */ -static const __maybe_unused unsigned bch_flags_to_uflags[] = { - [__BCH_INODE_sync] = FS_SYNC_FL, - [__BCH_INODE_immutable] = FS_IMMUTABLE_FL, - [__BCH_INODE_append] = FS_APPEND_FL, - [__BCH_INODE_nodump] = FS_NODUMP_FL, - [__BCH_INODE_noatime] = FS_NOATIME_FL, - [__BCH_INODE_casefolded] = FS_CASEFOLD_FL, -}; - -/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */ -static const __maybe_unused unsigned bch_flags_to_xflags[] = { - [__BCH_INODE_sync] = FS_XFLAG_SYNC, - [__BCH_INODE_immutable] = FS_XFLAG_IMMUTABLE, - [__BCH_INODE_append] = FS_XFLAG_APPEND, - [__BCH_INODE_nodump] = FS_XFLAG_NODUMP, - [__BCH_INODE_noatime] = FS_XFLAG_NOATIME, - //[__BCH_INODE_PROJINHERIT] = FS_XFLAG_PROJINHERIT; -}; - -#define set_flags(_map, _in, _out) \ -do { \ - unsigned _i; \ - \ - for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \ - if ((_in) & (1 << _i)) \ - (_out) |= _map[_i]; \ - else \ - (_out) &= ~_map[_i]; \ -} while (0) - -#define map_flags(_map, _in) \ -({ \ - unsigned _out = 0; \ - \ - set_flags(_map, _in, _out); \ - _out; \ -}) - -#define map_flags_rev(_map, _in) \ -({ \ - unsigned _i, _out = 0; \ - \ - for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \ - if ((_in) & _map[_i]) { \ - (_out) |= 1 << _i; \ - (_in) &= ~_map[_i]; \ - } \ - (_out); \ -}) - -#define map_defined(_map) \ -({ \ - unsigned _in = ~0; \ - \ - map_flags_rev(_map, _in); \ -}) - -/* Set VFS inode flags from bcachefs inode: */ -static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode) -{ - set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags); -} - long bch2_fs_file_ioctl(struct file *, unsigned, unsigned long); long bch2_compat_fs_ioctl(struct file *, unsigned, unsigned long); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 5a41b1a8e54fd8..e220e3cba29c73 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +52,19 @@ static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, struct bch_subvolume *); +/* Set VFS inode flags from bcachefs inode: */ +static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode) +{ + static const __maybe_unused unsigned bch_flags_to_vfs[] = { + [__BCH_INODE_sync] = S_SYNC, + [__BCH_INODE_immutable] = S_IMMUTABLE, + [__BCH_INODE_append] = S_APPEND, + [__BCH_INODE_noatime] = S_NOATIME, + [__BCH_INODE_casefolded] = S_CASEFOLD, + }; + set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags); +} + void bch2_inode_update_after_write(struct btree_trans *trans, struct bch_inode_info *inode, struct bch_inode_unpacked *bi, @@ -1449,6 +1463,157 @@ static int bch2_open(struct inode *vinode, struct file *file) return generic_file_open(vinode, file); } +/* bcachefs inode flags -> FS_IOC_GETFLAGS: */ +static const __maybe_unused unsigned bch_flags_to_uflags[] = { + [__BCH_INODE_sync] = FS_SYNC_FL, + [__BCH_INODE_immutable] = FS_IMMUTABLE_FL, + [__BCH_INODE_append] = FS_APPEND_FL, + [__BCH_INODE_nodump] = FS_NODUMP_FL, + [__BCH_INODE_noatime] = FS_NOATIME_FL, + [__BCH_INODE_casefolded] = FS_CASEFOLD_FL, +}; + +/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */ +static const __maybe_unused unsigned bch_flags_to_xflags[] = { + [__BCH_INODE_sync] = FS_XFLAG_SYNC, + [__BCH_INODE_immutable] = FS_XFLAG_IMMUTABLE, + [__BCH_INODE_append] = FS_XFLAG_APPEND, + [__BCH_INODE_nodump] = FS_XFLAG_NODUMP, + [__BCH_INODE_noatime] = FS_XFLAG_NOATIME, +}; + +static int bch2_fileattr_get(struct dentry *dentry, + struct fileattr *fa) +{ + struct bch_inode_info *inode = to_bch_ei(d_inode(dentry)); + + fileattr_fill_xflags(fa, map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags)); + + if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project)) + fa->fsx_xflags |= FS_XFLAG_PROJINHERIT; + + if (inode->ei_inode.bi_flags & BCH_INODE_casefolded) + fa->flags |= FS_CASEFOLD_FL; + + fa->fsx_projid = inode->ei_qid.q[QTYP_PRJ]; + return 0; +} + +struct flags_set { + unsigned mask; + unsigned flags; + unsigned projid; + bool set_project; +}; + +static int fssetxattr_inode_update_fn(struct btree_trans *trans, + struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, + void *p) +{ + struct bch_fs *c = trans->c; + struct flags_set *s = p; + + /* + * We're relying on btree locking here for exclusion with other ioctl + * calls - use the flags in the btree (@bi), not inode->i_flags: + */ + unsigned newflags = s->flags; + unsigned oldflags = bi->bi_flags & s->mask; + + if (!S_ISREG(bi->bi_mode) && + !S_ISDIR(bi->bi_mode) && + (newflags & (BCH_INODE_nodump|BCH_INODE_noatime)) != newflags) + return -EINVAL; + + if ((newflags ^ oldflags) & BCH_INODE_casefolded) { +#ifdef CONFIG_UNICODE + int ret = 0; + /* Not supported on individual files. */ + if (!S_ISDIR(bi->bi_mode)) + return -EOPNOTSUPP; + + /* + * Make sure the dir is empty, as otherwise we'd need to + * rehash everything and update the dirent keys. + */ + ret = bch2_empty_dir_trans(trans, inode_inum(inode)); + if (ret < 0) + return ret; + + ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding); + if (ret) + return ret; + + bch2_check_set_feature(c, BCH_FEATURE_casefolding); +#else + printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n"); + return -EOPNOTSUPP; +#endif + } + + if (s->set_project) { + bi->bi_project = s->projid; + bi->bi_fields_set |= BIT(Inode_opt_project); + } + + bi->bi_flags &= ~s->mask; + bi->bi_flags |= newflags; + + bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v)); + return 0; +} + +static int bch2_fileattr_set(struct mnt_idmap *idmap, + struct dentry *dentry, + struct fileattr *fa) +{ + struct bch_inode_info *inode = to_bch_ei(d_inode(dentry)); + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct flags_set s = {}; + int ret; + + if (fa->fsx_valid) { + fa->fsx_xflags &= ~FS_XFLAG_PROJINHERIT; + + s.mask = map_defined(bch_flags_to_xflags); + s.flags |= map_flags_rev(bch_flags_to_xflags, fa->fsx_xflags); + if (fa->fsx_xflags) + return -EOPNOTSUPP; + + if (fa->fsx_projid >= U32_MAX) + return -EINVAL; + + /* + * inode fields accessible via the xattr interface are stored with a +1 + * bias, so that 0 means unset: + */ + if ((inode->ei_inode.bi_project || + fa->fsx_projid) && + inode->ei_inode.bi_project != fa->fsx_projid + 1) { + s.projid = fa->fsx_projid + 1; + s.set_project = true; + } + } + + if (fa->flags_valid) { + s.mask = map_defined(bch_flags_to_uflags); + s.flags |= map_flags_rev(bch_flags_to_uflags, fa->flags); + if (fa->flags) + return -EOPNOTSUPP; + } + + mutex_lock(&inode->ei_update_lock); + ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?: + (s.set_project + ? bch2_set_projid(c, inode, fa->fsx_projid) + : 0) ?: + bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s, + ATTR_CTIME); + mutex_unlock(&inode->ei_update_lock); + return ret; +} + static const struct file_operations bch_file_operations = { .open = bch2_open, .llseek = bch2_llseek, @@ -1476,6 +1641,8 @@ static const struct inode_operations bch_file_inode_operations = { .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif + .fileattr_get = bch2_fileattr_get, + .fileattr_set = bch2_fileattr_set, }; static const struct inode_operations bch_dir_inode_operations = { @@ -1496,6 +1663,8 @@ static const struct inode_operations bch_dir_inode_operations = { .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif + .fileattr_get = bch2_fileattr_get, + .fileattr_set = bch2_fileattr_set, }; static const struct file_operations bch_dir_file_operations = { @@ -1518,6 +1687,8 @@ static const struct inode_operations bch_symlink_inode_operations = { .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif + .fileattr_get = bch2_fileattr_get, + .fileattr_set = bch2_fileattr_set, }; static const struct inode_operations bch_special_inode_operations = { @@ -1528,6 +1699,8 @@ static const struct inode_operations bch_special_inode_operations = { .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif + .fileattr_get = bch2_fileattr_get, + .fileattr_set = bch2_fileattr_set, }; static const struct address_space_operations bch_address_space_operations = { diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 6ba5071ab6ddaa..3e52c7f8ddd225 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -739,4 +739,42 @@ static inline void memcpy_swab(void *_dst, void *_src, size_t len) *--dst = *src++; } +#define set_flags(_map, _in, _out) \ +do { \ + unsigned _i; \ + \ + for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \ + if ((_in) & (1 << _i)) \ + (_out) |= _map[_i]; \ + else \ + (_out) &= ~_map[_i]; \ +} while (0) + +#define map_flags(_map, _in) \ +({ \ + unsigned _out = 0; \ + \ + set_flags(_map, _in, _out); \ + _out; \ +}) + +#define map_flags_rev(_map, _in) \ +({ \ + unsigned _i, _out = 0; \ + \ + for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \ + if ((_in) & _map[_i]) { \ + (_out) |= 1 << _i; \ + (_in) &= ~_map[_i]; \ + } \ + (_out); \ +}) + +#define map_defined(_map) \ +({ \ + unsigned _in = ~0; \ + \ + map_flags_rev(_map, _in); \ +}) + #endif /* _BCACHEFS_UTIL_H */ From 91037037ee3d611ce17f39d75f79c7de394b122a Mon Sep 17 00:00:00 2001 From: Henry Martin Date: Fri, 18 Apr 2025 10:38:13 +0800 Subject: [PATCH 0958/2924] net/mlx5: Fix null-ptr-deref in mlx5_create_{inner_,}ttc_table() Add NULL check for mlx5_get_flow_namespace() returns in mlx5_create_inner_ttc_table() and mlx5_create_ttc_table() to prevent NULL pointer dereference. Fixes: 137f3d50ad2a ("net/mlx5: Support matching on l4_type for ttc_table") Signed-off-by: Henry Martin Reviewed-by: Mark Bloch Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20250418023814.71789-2-bsdhenrymartin@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c index eb3bd9c7f66ebe..228d0f6570d4c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c @@ -655,6 +655,11 @@ struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev, } ns = mlx5_get_flow_namespace(dev, params->ns_type); + if (!ns) { + kvfree(ttc); + return ERR_PTR(-EOPNOTSUPP); + } + groups = use_l4_type ? &inner_ttc_groups[TTC_GROUPS_USE_L4_TYPE] : &inner_ttc_groups[TTC_GROUPS_DEFAULT]; @@ -728,6 +733,11 @@ struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, } ns = mlx5_get_flow_namespace(dev, params->ns_type); + if (!ns) { + kvfree(ttc); + return ERR_PTR(-EOPNOTSUPP); + } + groups = use_l4_type ? &ttc_groups[TTC_GROUPS_USE_L4_TYPE] : &ttc_groups[TTC_GROUPS_DEFAULT]; From fa8fd315127ca48c65e7e6692a84ffcf3d07168e Mon Sep 17 00:00:00 2001 From: Henry Martin Date: Fri, 18 Apr 2025 10:38:14 +0800 Subject: [PATCH 0959/2924] net/mlx5: Move ttc allocation after switch case to prevent leaks Relocate the memory allocation for ttc table after the switch statement that validates params->ns_type in both mlx5_create_inner_ttc_table() and mlx5_create_ttc_table(). This ensures memory is only allocated after confirming valid input, eliminating potential memory leaks when invalid ns_type cases occur. Fixes: 137f3d50ad2a ("net/mlx5: Support matching on l4_type for ttc_table") Signed-off-by: Henry Martin Reviewed-by: Michal Swiatkowski Reviewed-by: Mark Bloch Link: https://patch.msgid.link/20250418023814.71789-3-bsdhenrymartin@gmail.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c index 228d0f6570d4c2..ca9ecec358b20d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c @@ -637,10 +637,6 @@ struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev, bool use_l4_type; int err; - ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); - if (!ttc) - return ERR_PTR(-ENOMEM); - switch (params->ns_type) { case MLX5_FLOW_NAMESPACE_PORT_SEL: use_l4_type = MLX5_CAP_GEN_2(dev, pcc_ifa2) && @@ -654,6 +650,10 @@ struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev, return ERR_PTR(-EINVAL); } + ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); + if (!ttc) + return ERR_PTR(-ENOMEM); + ns = mlx5_get_flow_namespace(dev, params->ns_type); if (!ns) { kvfree(ttc); @@ -715,10 +715,6 @@ struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, bool use_l4_type; int err; - ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); - if (!ttc) - return ERR_PTR(-ENOMEM); - switch (params->ns_type) { case MLX5_FLOW_NAMESPACE_PORT_SEL: use_l4_type = MLX5_CAP_GEN_2(dev, pcc_ifa2) && @@ -732,6 +728,10 @@ struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, return ERR_PTR(-EINVAL); } + ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); + if (!ttc) + return ERR_PTR(-ENOMEM); + ns = mlx5_get_flow_namespace(dev, params->ns_type); if (!ns) { kvfree(ttc); From d3153c3b42707d26c81083b426f2ef0951bce545 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 17 Apr 2025 18:53:17 -0700 Subject: [PATCH 0960/2924] net: fix the missing unlock for detached devices The combined condition was left as is when we converted from __dev_get_by_index() to netdev_get_by_index_lock(). There was no need to undo anything with the former, for the latter we need an unlock. Fixes: 1d22d3060b9b ("net: drop rtnl_lock for queue_mgmt operations") Reviewed-by: Mina Almasry Link: https://patch.msgid.link/20250418015317.1954107-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/netdev-genl.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 5d7af50fe70272..230743bdbb1407 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -861,14 +861,17 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) mutex_lock(&priv->lock); + err = 0; netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex); - if (!netdev || !netif_device_present(netdev)) { + if (!netdev) { err = -ENODEV; goto err_unlock_sock; } - - if (!netdev_need_ops_lock(netdev)) { + if (!netif_device_present(netdev)) + err = -ENODEV; + else if (!netdev_need_ops_lock(netdev)) err = -EOPNOTSUPP; + if (err) { NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_DEV_IFINDEX]); goto err_unlock; From 54bebe46871d4e56e05fcf55c1a37e7efa24e0a8 Mon Sep 17 00:00:00 2001 From: Anastasia Kovaleva Date: Mon, 24 Mar 2025 11:49:33 +0300 Subject: [PATCH 0961/2924] scsi: core: Clear flags for scsi_cmnd that did not complete Commands that have not been completed with scsi_done() do not clear the SCMD_INITIALIZED flag and therefore will not be properly reinitialized. Thus, the next time the scsi_cmnd structure is used, the command may fail in scsi_cmd_runtime_exceeded() due to the old jiffies_at_alloc value: kernel: sd 16:0:1:84: [sdts] tag#405 timing out command, waited 720s kernel: sd 16:0:1:84: [sdts] tag#405 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=66636s Clear flags for commands that have not been completed by SCSI. Fixes: 4abafdc4360d ("block: remove the initialize_rq_fn blk_mq_ops method") Signed-off-by: Anastasia Kovaleva Link: https://lore.kernel.org/r/20250324084933.15932-2-a.kovaleva@yadro.com Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 0d29470e86b0b8..1b43013d72c03b 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1253,8 +1253,12 @@ EXPORT_SYMBOL_GPL(scsi_alloc_request); */ static void scsi_cleanup_rq(struct request *rq) { + struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); + + cmd->flags = 0; + if (rq->rq_flags & RQF_DONTPREP) { - scsi_mq_uninit_cmd(blk_mq_rq_to_pdu(rq)); + scsi_mq_uninit_cmd(cmd); rq->rq_flags &= ~RQF_DONTPREP; } } From 08a966a917fe3d92150fa3cc15793ad5e57051eb Mon Sep 17 00:00:00 2001 From: Chenyuan Yang Date: Sat, 12 Apr 2025 14:59:09 -0500 Subject: [PATCH 0962/2924] scsi: ufs: core: Add NULL check in ufshcd_mcq_compl_pending_transfer() Add a NULL check for the returned hwq pointer by ufshcd_mcq_req_to_hwq(). This is similar to the fix in commit 74736103fb41 ("scsi: ufs: core: Fix ufshcd_abort_one racing issue"). Signed-off-by: Chenyuan Yang Link: https://lore.kernel.org/r/20250412195909.315418-1-chenyuan0y@gmail.com Fixes: ab248643d3d6 ("scsi: ufs: core: Add error handling for MCQ mode") Reviewed-by: Peter Wang Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index ca2b0aae9d11cc..5cb6132b8147a3 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -5678,6 +5678,8 @@ static void ufshcd_mcq_compl_pending_transfer(struct ufs_hba *hba, continue; hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd)); + if (!hwq) + continue; if (force_compl) { ufshcd_mcq_compl_all_cqes_lock(hba, hwq); From b0b7ee3b574a72283399b9232f6190be07f220c0 Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Tue, 15 Apr 2025 15:45:46 +0530 Subject: [PATCH 0963/2924] scsi: mpi3mr: Add level check to control event logging Ensure event logs are only generated when the debug logging level MPI3_DEBUG_EVENT is enabled. This prevents unnecessary logging. Signed-off-by: Ranjan Kumar Link: https://lore.kernel.org/r/20250415101546.204018-1-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_fw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index 003e1f7005c45a..1d7901a8f0e406 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -174,6 +174,9 @@ static void mpi3mr_print_event_data(struct mpi3mr_ioc *mrioc, char *desc = NULL; u16 event; + if (!(mrioc->logging_level & MPI3_DEBUG_EVENT)) + return; + event = event_reply->event; switch (event) { From c083da15f06c808c44444bfcef3c939a07ad394b Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 17 Apr 2025 11:15:01 +0100 Subject: [PATCH 0964/2924] MAINTAINERS: Add ism.h to S390 NETWORKING DRIVERS ism.h appears to be part of s390 networking drivers so add it to the corresponding section in MAINTAINERS. This aids developers, and tooling such as get_maintainer.pl alike to CC patches to the appropriate people and mailing lists. And is in keeping with an ongoing effort for NETWORKING entries in MAINTAINERS to more accurately reflect the way code is maintained. Signed-off-by: Simon Horman Link: https://patch.msgid.link/20250417-ism-maint-v1-1-b001be8545ce@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 067f72e8af10ed..aef5f9a8acf548 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21270,6 +21270,7 @@ L: linux-s390@vger.kernel.org L: netdev@vger.kernel.org S: Supported F: drivers/s390/net/ +F: include/linux/ism.h S390 PCI SUBSYSTEM M: Niklas Schnelle From e00c1517f2bc73186a18ac2cb1d6c5fee7e95239 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 17 Apr 2025 11:15:02 +0100 Subject: [PATCH 0965/2924] MAINTAINERS: Add s390 networking drivers to NETWORKING DRIVERS These files are already correctly covered by the S390 NETWORKING DRIVERS section. In practice commits for these drivers feed into the Networking subsystem. So it seems appropriate to also list them under NETWORKING DRIVERS. This aids developers, and tooling such as get_maintainer.pl alike to CC patches to all the appropriate people and mailing lists. And is in keeping with an ongoing effort for NETWORKING entries in MAINTAINERS to more accurately reflect the way code is maintained. Signed-off-by: Simon Horman Link: https://patch.msgid.link/20250417-ism-maint-v1-2-b001be8545ce@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index aef5f9a8acf548..d980da72ec6030 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16770,6 +16770,7 @@ F: Documentation/networking/net_cachelines/net_device.rst F: drivers/connector/ F: drivers/net/ F: drivers/ptp/ +F: drivers/s390/net/ F: include/dt-bindings/net/ F: include/linux/cn_proc.h F: include/linux/etherdevice.h @@ -16779,6 +16780,7 @@ F: include/linux/fddidevice.h F: include/linux/hippidevice.h F: include/linux/if_* F: include/linux/inetdevice.h +F: include/linux/ism.h F: include/linux/netdev* F: include/linux/platform_data/wiznet.h F: include/uapi/linux/cn_proc.h From cc3628dcd851ddd8d418bf0c897024b4621ddc92 Mon Sep 17 00:00:00 2001 From: Alexey Nepomnyashih Date: Thu, 17 Apr 2025 12:21:17 +0000 Subject: [PATCH 0966/2924] xen-netfront: handle NULL returned by xdp_convert_buff_to_frame() The function xdp_convert_buff_to_frame() may return NULL if it fails to correctly convert the XDP buffer into an XDP frame due to memory constraints, internal errors, or invalid data. Failing to check for NULL may lead to a NULL pointer dereference if the result is used later in processing, potentially causing crashes, data corruption, or undefined behavior. On XDP redirect failure, the associated page must be released explicitly if it was previously retained via get_page(). Failing to do so may result in a memory leak, as the pages reference count is not decremented. Cc: stable@vger.kernel.org # v5.9+ Fixes: 6c5aa6fc4def ("xen networking: add basic XDP support for xen-netfront") Signed-off-by: Alexey Nepomnyashih Link: https://patch.msgid.link/20250417122118.1009824-1-sdl@nppct.ru Signed-off-by: Jakub Kicinski --- drivers/net/xen-netfront.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index fc52d5c4c69b70..5091e1fa4a0df6 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -985,20 +985,27 @@ static u32 xennet_run_xdp(struct netfront_queue *queue, struct page *pdata, act = bpf_prog_run_xdp(prog, xdp); switch (act) { case XDP_TX: - get_page(pdata); xdpf = xdp_convert_buff_to_frame(xdp); + if (unlikely(!xdpf)) { + trace_xdp_exception(queue->info->netdev, prog, act); + break; + } + get_page(pdata); err = xennet_xdp_xmit(queue->info->netdev, 1, &xdpf, 0); - if (unlikely(!err)) + if (unlikely(err <= 0)) { + if (err < 0) + trace_xdp_exception(queue->info->netdev, prog, act); xdp_return_frame_rx_napi(xdpf); - else if (unlikely(err < 0)) - trace_xdp_exception(queue->info->netdev, prog, act); + } break; case XDP_REDIRECT: get_page(pdata); err = xdp_do_redirect(queue->info->netdev, xdp, prog); *need_xdp_flush = true; - if (unlikely(err)) + if (unlikely(err)) { trace_xdp_exception(queue->info->netdev, prog, act); + xdp_return_buff(xdp); + } break; case XDP_PASS: case XDP_DROP: From 2768b2e2f7d25ae8984ebdcde8ec1014b6fdcd89 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 17 Apr 2025 15:00:03 +0300 Subject: [PATCH 0967/2924] net: enetc: register XDP RX queues with frag_size At the time when bpf_xdp_adjust_tail() gained support for non-linear buffers, ENETC was already generating this kind of geometry on RX, due to its use of 2K half page buffers. Frames larger than 1472 bytes (without FCS) are stored as multi-buffer, presenting a need for multi buffer support to work properly even in standard MTU circumstances. Allow bpf_xdp_frags_increase_tail() to know the allocation size of paged data, so it can safely permit growing the tailroom of the buffer from XDP programs. Fixes: bf25146a5595 ("bpf: add frags support to the bpf_xdp_adjust_tail() API") Signed-off-by: Vladimir Oltean Reviewed-by: Wei Fang Link: https://patch.msgid.link/20250417120005.3288549-2-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 2106861463e40f..9b333254c73ecc 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -3362,7 +3362,8 @@ static int enetc_int_vector_init(struct enetc_ndev_priv *priv, int i, bdr->buffer_offset = ENETC_RXB_PAD; priv->rx_ring[i] = bdr; - err = xdp_rxq_info_reg(&bdr->xdp.rxq, priv->ndev, i, 0); + err = __xdp_rxq_info_reg(&bdr->xdp.rxq, priv->ndev, i, 0, + ENETC_RXB_DMA_SIZE_XDP); if (err) goto free_vector; From 1d587faa5be7e9785b682cc5f58ba8f4100c13ea Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 17 Apr 2025 15:00:04 +0300 Subject: [PATCH 0968/2924] net: enetc: refactor bulk flipping of RX buffers to separate function This small snippet of code ensures that we do something with the array of RX software buffer descriptor elements after passing the skb to the stack. In this case, we see if the other half of the page is reusable, and if so, we "turn around" the buffers, making them directly usable by enetc_refill_rx_ring() without going to enetc_new_page(). We will need to perform this kind of buffer flipping from a new code path, i.e. from XDP_PASS. Currently, enetc_build_skb() does it there buffer by buffer, but in a subsequent change we will stop using enetc_build_skb() for XDP_PASS. Signed-off-by: Vladimir Oltean Reviewed-by: Wei Fang Link: https://patch.msgid.link/20250417120005.3288549-3-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 9b333254c73ecc..74721995cb1f99 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1850,6 +1850,16 @@ static void enetc_xdp_drop(struct enetc_bdr *rx_ring, int rx_ring_first, } } +static void enetc_bulk_flip_buff(struct enetc_bdr *rx_ring, int rx_ring_first, + int rx_ring_last) +{ + while (rx_ring_first != rx_ring_last) { + enetc_flip_rx_buff(rx_ring, + &rx_ring->rx_swbd[rx_ring_first]); + enetc_bdr_idx_inc(rx_ring, &rx_ring_first); + } +} + static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, struct napi_struct *napi, int work_limit, struct bpf_prog *prog) @@ -1965,11 +1975,7 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, enetc_xdp_drop(rx_ring, orig_i, i); rx_ring->stats.xdp_redirect_failures++; } else { - while (orig_i != i) { - enetc_flip_rx_buff(rx_ring, - &rx_ring->rx_swbd[orig_i]); - enetc_bdr_idx_inc(rx_ring, &orig_i); - } + enetc_bulk_flip_buff(rx_ring, orig_i, i); xdp_redirect_frm_cnt++; rx_ring->stats.xdp_redirect++; } From 020f0c8b3d396ec8190948f86063e1c45133f839 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 17 Apr 2025 15:00:05 +0300 Subject: [PATCH 0969/2924] net: enetc: fix frame corruption on bpf_xdp_adjust_head/tail() and XDP_PASS Vlatko Markovikj reported that XDP programs attached to ENETC do not work well if they use bpf_xdp_adjust_head() or bpf_xdp_adjust_tail(), combined with the XDP_PASS verdict. A typical use case is to add or remove a VLAN tag. The resulting sk_buff passed to the stack is corrupted, because the algorithm used by the driver for XDP_PASS is to unwind the current buffer pointer in the RX ring and to re-process the current frame with enetc_build_skb() as if XDP hadn't run. That is incorrect because XDP may have modified the geometry of the buffer, which we then are completely unaware of. We are looking at a modified buffer with the original geometry. The initial reaction, both from me and from Vlatko, was to shop around the kernel for code to steal that would calculate a delta between the old and the new XDP buffer geometry, and apply that to the sk_buff too. We noticed that veth and generic xdp have such code. The headroom adjustment is pretty uncontroversial, but what turned out severely problematic is the tailroom. veth has this snippet: __skb_put(skb, off); /* positive on grow, negative on shrink */ which on first sight looks decent enough, except __skb_put() takes an "unsigned int" for the second argument, and the arithmetic seems to only work correctly by coincidence. Second issue, __skb_put() contains a SKB_LINEAR_ASSERT(). It's not a great pattern to make more widespread. The skb may still be nonlinear at that point - it only becomes linear later when resetting skb->data_len to zero. To avoid the above, bpf_prog_run_generic_xdp() does this instead: skb_set_tail_pointer(skb, xdp->data_end - xdp->data); skb->len += off; /* positive on grow, negative on shrink */ which is more open-coded, uses lower-level functions and is in general a bit too much to spread around in driver code. Then there is the snippet: if (xdp_buff_has_frags(xdp)) skb->data_len = skb_shinfo(skb)->xdp_frags_size; else skb->data_len = 0; One would have expected __pskb_trim() to be the function of choice for this task. But it's not used in veth/xdpgeneric because the extraneous fragments were _already_ freed by bpf_xdp_adjust_tail() -> bpf_xdp_frags_shrink_tail() -> ... -> __xdp_return() - the backing memory for the skb frags and the xdp frags is the same, but they don't keep individual references. In fact, that is the biggest reason why this snippet cannot be reused as-is, because ENETC temporarily constructs an skb with the original len and the original number of frags. Because the extraneous frags are already freed by bpf_xdp_adjust_tail() and returned to the page allocator, it means the entire approach of using enetc_build_skb() is questionable for XDP_PASS. To avoid that, one would need to elevate the page refcount of all frags before calling bpf_prog_run_xdp() and drop it after XDP_PASS. There are other things that are missing in ENETC's handling of XDP_PASS, like for example updating skb_shinfo(skb)->meta_len. These are all handled correctly and cleanly in commit 539c1fba1ac7 ("xdp: add generic xdp_build_skb_from_buff()"), added to net-next in Dec 2024, and in addition might even be quicker that way. I have a very strong preference towards backporting that commit for "stable", and that is what is used to fix the handling bugs. It is way too messy to go this deep into the guts of an sk_buff from the code of a device driver. Fixes: d1b15102dd16 ("net: enetc: add support for XDP_DROP and XDP_PASS") Reported-by: Vlatko Markovikj Signed-off-by: Vladimir Oltean Reviewed-by: Wei Fang Link: https://patch.msgid.link/20250417120005.3288549-4-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.c | 26 +++++++++++--------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 74721995cb1f99..3ee52f4b11660a 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1878,11 +1878,10 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, while (likely(rx_frm_cnt < work_limit)) { union enetc_rx_bd *rxbd, *orig_rxbd; - int orig_i, orig_cleaned_cnt; struct xdp_buff xdp_buff; struct sk_buff *skb; + int orig_i, err; u32 bd_status; - int err; rxbd = enetc_rxbd(rx_ring, i); bd_status = le32_to_cpu(rxbd->r.lstatus); @@ -1897,7 +1896,6 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, break; orig_rxbd = rxbd; - orig_cleaned_cnt = cleaned_cnt; orig_i = i; enetc_build_xdp_buff(rx_ring, bd_status, &rxbd, &i, @@ -1925,15 +1923,21 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, rx_ring->stats.xdp_drops++; break; case XDP_PASS: - rxbd = orig_rxbd; - cleaned_cnt = orig_cleaned_cnt; - i = orig_i; - - skb = enetc_build_skb(rx_ring, bd_status, &rxbd, - &i, &cleaned_cnt, - ENETC_RXB_DMA_SIZE_XDP); - if (unlikely(!skb)) + skb = xdp_build_skb_from_buff(&xdp_buff); + /* Probably under memory pressure, stop NAPI */ + if (unlikely(!skb)) { + enetc_xdp_drop(rx_ring, orig_i, i); + rx_ring->stats.xdp_drops++; goto out; + } + + enetc_get_offloads(rx_ring, orig_rxbd, skb); + + /* These buffers are about to be owned by the stack. + * Update our buffer cache (the rx_swbd array elements) + * with their other page halves. + */ + enetc_bulk_flip_buff(rx_ring, orig_i, i); napi_gro_receive(napi, skb); break; From 4b98bf3bff7353d94824c4d874ff2d7f38acc49a Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Tue, 11 Mar 2025 20:41:12 +0100 Subject: [PATCH 0970/2924] arm64: dts: imx8mp: configure GPU and NPU clocks in nominal DTSI Commit 255fbd9eabe7 ("arm64: dts: imx8mp: Add optional nominal drive mode DTSI") added imx8mp-nominal.dtsi, which overrides all overdrive clock rates in imx8mp.dtsi to the nominal rates. At the same time, commit 9f7595b3e5ae ("arm64: dts: imx8mp: configure GPU and NPU clocks to overdrive rate") went in, which changed some clock rates away from the nominal values. Resolve the discrepancy by effectively reverting the changes in the latter commit inside imx8mp-nominal.dtsi. This is required for proper operation of the imx8mp-skov boards, which are currently imx8mp-nominal.dtsi's only users and lets all other boards that don't include it benefit from the new higher frequencies. Signed-off-by: Ahmad Fatoum Signed-off-by: Shawn Guo --- .../boot/dts/freescale/imx8mp-nominal.dtsi | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi index a1b75c9068b288..dc0ccd723c6d92 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi @@ -24,6 +24,20 @@ fsl,operating-mode = "nominal"; }; +&gpu2d { + assigned-clocks = <&clk IMX8MP_CLK_GPU2D_CORE>; + assigned-clock-parents = <&clk IMX8MP_SYS_PLL1_800M>; + assigned-clock-rates = <800000000>; +}; + +&gpu3d { + assigned-clocks = <&clk IMX8MP_CLK_GPU3D_CORE>, + <&clk IMX8MP_CLK_GPU3D_SHADER_CORE>; + assigned-clock-parents = <&clk IMX8MP_SYS_PLL1_800M>, + <&clk IMX8MP_SYS_PLL1_800M>; + assigned-clock-rates = <800000000>, <800000000>; +}; + &pgc_hdmimix { assigned-clocks = <&clk IMX8MP_CLK_HDMI_AXI>, <&clk IMX8MP_CLK_HDMI_APB>; @@ -46,6 +60,18 @@ assigned-clock-rates = <600000000>, <300000000>; }; +&pgc_mlmix { + assigned-clocks = <&clk IMX8MP_CLK_ML_CORE>, + <&clk IMX8MP_CLK_ML_AXI>, + <&clk IMX8MP_CLK_ML_AHB>; + assigned-clock-parents = <&clk IMX8MP_SYS_PLL1_800M>, + <&clk IMX8MP_SYS_PLL1_800M>, + <&clk IMX8MP_SYS_PLL1_800M>; + assigned-clock-rates = <800000000>, + <800000000>, + <300000000>; +}; + &media_blk_ctrl { assigned-clocks = <&clk IMX8MP_CLK_MEDIA_AXI>, <&clk IMX8MP_CLK_MEDIA_APB>, From 02e4232998db357bb8199778722d81ffcff0cb98 Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Fri, 14 Mar 2025 14:01:04 +0800 Subject: [PATCH 0971/2924] arm64: dts: imx95: Correct the range of PCIe app-reg region Correct the range of PCIe app-reg region from 0x2000 to 0x4000 refer to SerDes_SS memory map of i.MX95 Rerference Manual. Fixes: 3b1d5deb29ff ("arm64: dts: imx95: add pcie[0,1] and pcie-ep[0,1] support") Signed-off-by: Richard Zhu Reviewed-by: Frank Li Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx95.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx95.dtsi b/arch/arm64/boot/dts/freescale/imx95.dtsi index 9bb26b466a061a..59f057ba6fa7ff 100644 --- a/arch/arm64/boot/dts/freescale/imx95.dtsi +++ b/arch/arm64/boot/dts/freescale/imx95.dtsi @@ -1626,7 +1626,7 @@ reg = <0 0x4c300000 0 0x10000>, <0 0x60100000 0 0xfe00000>, <0 0x4c360000 0 0x10000>, - <0 0x4c340000 0 0x2000>; + <0 0x4c340000 0 0x4000>; reg-names = "dbi", "config", "atu", "app"; ranges = <0x81000000 0x0 0x00000000 0x0 0x6ff00000 0 0x00100000>, <0x82000000 0x0 0x10000000 0x9 0x10000000 0 0x10000000>; @@ -1673,7 +1673,7 @@ reg = <0 0x4c300000 0 0x10000>, <0 0x4c360000 0 0x1000>, <0 0x4c320000 0 0x1000>, - <0 0x4c340000 0 0x2000>, + <0 0x4c340000 0 0x4000>, <0 0x4c370000 0 0x10000>, <0x9 0 1 0>; reg-names = "dbi","atu", "dbi2", "app", "dma", "addr_space"; @@ -1700,7 +1700,7 @@ reg = <0 0x4c380000 0 0x10000>, <8 0x80100000 0 0xfe00000>, <0 0x4c3e0000 0 0x10000>, - <0 0x4c3c0000 0 0x2000>; + <0 0x4c3c0000 0 0x4000>; reg-names = "dbi", "config", "atu", "app"; ranges = <0x81000000 0 0x00000000 0x8 0x8ff00000 0 0x00100000>, <0x82000000 0 0x10000000 0xa 0x10000000 0 0x10000000>; @@ -1749,7 +1749,7 @@ reg = <0 0x4c380000 0 0x10000>, <0 0x4c3e0000 0 0x1000>, <0 0x4c3a0000 0 0x1000>, - <0 0x4c3c0000 0 0x2000>, + <0 0x4c3c0000 0 0x4000>, <0 0x4c3f0000 0 0x10000>, <0xa 0 1 0>; reg-names = "dbi", "atu", "dbi2", "app", "dma", "addr_space"; From 6e1a7bc8382b0d4208258f7d2a4474fae788dd90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Szymanski?= Date: Fri, 14 Mar 2025 17:20:38 +0100 Subject: [PATCH 0972/2924] ARM: dts: opos6ul: add ksz8081 phy properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit c7e73b5051d6 ("ARM: imx: mach-imx6ul: remove 14x14 EVK specific PHY fixup") removed a PHY fixup that setted the clock mode and the LED mode. Make the Ethernet interface work again by doing as advised in the commit's log, set clock mode and the LED mode in the device tree. Fixes: c7e73b5051d6 ("ARM: imx: mach-imx6ul: remove 14x14 EVK specific PHY fixup") Signed-off-by: Sébastien Szymanski Reviewed-by: Oleksij Rempel Signed-off-by: Shawn Guo --- arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6ul.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6ul.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6ul.dtsi index f2386dcb9ff2c0..dda4fa91b2f2cc 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6ul.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6ul.dtsi @@ -40,6 +40,9 @@ reg = <1>; interrupt-parent = <&gpio4>; interrupts = <16 IRQ_TYPE_LEVEL_LOW>; + micrel,led-mode = <1>; + clocks = <&clks IMX6UL_CLK_ENET_REF>; + clock-names = "rmii-ref"; status = "okay"; }; }; From 3409f843c04df9434beb637602382eeee90e0bee Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 20 Apr 2025 18:47:58 +0200 Subject: [PATCH 0973/2924] ARM: dts: amlogic: meson8: fix reference to unknown/untested PWM clock Device-tree expects absent clocks to be specified as <0> (instead of using <>). This fixes using the FCLK4/FCLK3 clocks as they are now seen at their correct index (while before they were recognized, but at the correct index - resulting in the hardware using a different clock than what the kernel sees). Fixes: 802cff460aab ("ARM: dts: amlogic: meson8: switch to the new PWM controller binding") Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250420164801.330505-2-martin.blumenstingl@googlemail.com Signed-off-by: Neil Armstrong --- arch/arm/boot/dts/amlogic/meson8.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/amlogic/meson8.dtsi b/arch/arm/boot/dts/amlogic/meson8.dtsi index 847f7b1f1e9617..f785e0de0847b5 100644 --- a/arch/arm/boot/dts/amlogic/meson8.dtsi +++ b/arch/arm/boot/dts/amlogic/meson8.dtsi @@ -451,7 +451,7 @@ pwm_ef: pwm@86c0 { compatible = "amlogic,meson8-pwm-v2"; clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "Video PLL" */ + <0>, /* unknown/untested, the datasheet calls it "Video PLL" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; reg = <0x86c0 0x10>; @@ -705,7 +705,7 @@ &pwm_ab { compatible = "amlogic,meson8-pwm-v2"; clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "Video PLL" */ + <0>, /* unknown/untested, the datasheet calls it "Video PLL" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; }; @@ -713,7 +713,7 @@ &pwm_cd { compatible = "amlogic,meson8-pwm-v2"; clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "Video PLL" */ + <0>, /* unknown/untested, the datasheet calls it "Video PLL" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; }; From a994b58f9d1163c4f559bd169721f0fc15866919 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 20 Apr 2025 18:47:59 +0200 Subject: [PATCH 0974/2924] ARM: dts: amlogic: meson8b: fix reference to unknown/untested PWM clock Device-tree expects absent clocks to be specified as <0> (instead of using <>). This fixes using the FCLK4/FCLK3 clocks as they are now seen at their correct index (while before they were recognized, but at the correct index - resulting in the hardware using a different clock than what the kernel sees). Fixes: dbf921861985 ("ARM: dts: amlogic: meson8b: switch to the new PWM controller binding") Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250420164801.330505-3-martin.blumenstingl@googlemail.com Signed-off-by: Neil Armstrong --- arch/arm/boot/dts/amlogic/meson8b.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/amlogic/meson8b.dtsi b/arch/arm/boot/dts/amlogic/meson8b.dtsi index 0876611ce26a8c..fdb0abe23a0c8b 100644 --- a/arch/arm/boot/dts/amlogic/meson8b.dtsi +++ b/arch/arm/boot/dts/amlogic/meson8b.dtsi @@ -406,7 +406,7 @@ compatible = "amlogic,meson8b-pwm-v2", "amlogic,meson8-pwm-v2"; reg = <0x86c0 0x10>; clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "Video PLL" */ + <0>, /* unknown/untested, the datasheet calls it "Video PLL" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; #pwm-cells = <3>; @@ -680,7 +680,7 @@ &pwm_ab { compatible = "amlogic,meson8b-pwm-v2", "amlogic,meson8-pwm-v2"; clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "Video PLL" */ + <0>, /* unknown/untested, the datasheet calls it "Video PLL" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; }; @@ -688,7 +688,7 @@ &pwm_cd { compatible = "amlogic,meson8b-pwm-v2", "amlogic,meson8-pwm-v2"; clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "Video PLL" */ + <0>, /* unknown/untested, the datasheet calls it "Video PLL" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; }; From 511d388a4007ba580feeb2fd2e9ba35a614c093f Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 20 Apr 2025 18:48:00 +0200 Subject: [PATCH 0975/2924] arm64: dts: amlogic: gx: fix reference to unknown/untested PWM clock Device-tree expects absent clocks to be specified as <0> (instead of using <>). This fixes using the FCLK4/FCLK3 clocks as they are now seen at their correct index (while before they were recognized, but at the correct index - resulting in the hardware using a different clock than what the kernel sees). Fixes: a526eeef9a81 ("arm64: dts: amlogic: gx: switch to the new PWM controller binding") Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250420164801.330505-4-martin.blumenstingl@googlemail.com Signed-off-by: Neil Armstrong --- arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi | 6 +++--- arch/arm64/boot/dts/amlogic/meson-gxl.dtsi | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi index 8ebce7114a60b7..6c134592c7bb81 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi @@ -741,7 +741,7 @@ &pwm_ab { clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "vid_pll" */ + <0>, /* unknown/untested, the datasheet calls it "vid_pll" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; }; @@ -752,14 +752,14 @@ &pwm_cd { clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "vid_pll" */ + <0>, /* unknown/untested, the datasheet calls it "vid_pll" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; }; &pwm_ef { clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "vid_pll" */ + <0>, /* unknown/untested, the datasheet calls it "vid_pll" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi index 2dc2fdaecf9ff5..19b8a39de6a033 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi @@ -811,7 +811,7 @@ &pwm_ab { clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "vid_pll" */ + <0>, /* unknown/untested, the datasheet calls it "vid_pll" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; }; @@ -822,14 +822,14 @@ &pwm_cd { clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "vid_pll" */ + <0>, /* unknown/untested, the datasheet calls it "vid_pll" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; }; &pwm_ef { clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "vid_pll" */ + <0>, /* unknown/untested, the datasheet calls it "vid_pll" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; }; From a08b28c1ed454502abeb90ffa4a55445dae1d22a Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 20 Apr 2025 18:48:01 +0200 Subject: [PATCH 0976/2924] arm64: dts: amlogic: g12: fix reference to unknown/untested PWM clock Device-tree expects absent clocks to be specified as <0> (instead of using <>). This fixes using the FCLK4/FCLK3 clocks as they are now seen at their correct index (while before they were recognized, but at the correct index - resulting in the hardware using a different clock than what the kernel sees). Fixes: e6884f2e4129 ("arm64: dts: amlogic: g12: switch to the new PWM controller binding") Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250420164801.330505-5-martin.blumenstingl@googlemail.com Signed-off-by: Neil Armstrong --- arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi index ab2b3f15ef1946..69834b49673d40 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi @@ -2313,7 +2313,7 @@ "amlogic,meson8-pwm-v2"; reg = <0x0 0x19000 0x0 0x20>; clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "vid_pll" */ + <0>, /* unknown/untested, the datasheet calls it "vid_pll" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; #pwm-cells = <3>; @@ -2325,7 +2325,7 @@ "amlogic,meson8-pwm-v2"; reg = <0x0 0x1a000 0x0 0x20>; clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "vid_pll" */ + <0>, /* unknown/untested, the datasheet calls it "vid_pll" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; #pwm-cells = <3>; @@ -2337,7 +2337,7 @@ "amlogic,meson8-pwm-v2"; reg = <0x0 0x1b000 0x0 0x20>; clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "vid_pll" */ + <0>, /* unknown/untested, the datasheet calls it "vid_pll" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; #pwm-cells = <3>; From db91586b1e8f36122a9e5b8fbced11741488dd22 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 18 Apr 2025 15:40:14 +0900 Subject: [PATCH 0977/2924] ata: libata-scsi: Fix ata_mselect_control_ata_feature() return type The function ata_mselect_control_ata_feature() has a return type defined as unsigned int but this function may return negative error codes, which are correctly propagated up the call chain as integers. Fix ata_mselect_control_ata_feature() to have the correct int return type. While at it, also fix a typo in this function description comment. Fixes: df60f9c64576 ("scsi: ata: libata: Add ATA feature control sub-page translation") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Niklas Cassel Reviewed-by: Igor Pylypiv --- drivers/ata/libata-scsi.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 2796c0da82578a..24e662c837e3f1 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3886,12 +3886,11 @@ static int ata_mselect_control_spg0(struct ata_queued_cmd *qc, } /* - * Translate MODE SELECT control mode page, sub-pages f2h (ATA feature mode + * Translate MODE SELECT control mode page, sub-page f2h (ATA feature mode * page) into a SET FEATURES command. */ -static unsigned int ata_mselect_control_ata_feature(struct ata_queued_cmd *qc, - const u8 *buf, int len, - u16 *fp) +static int ata_mselect_control_ata_feature(struct ata_queued_cmd *qc, + const u8 *buf, int len, u16 *fp) { struct ata_device *dev = qc->dev; struct ata_taskfile *tf = &qc->tf; From 88474ad734fb2000805c63e01cc53ea930adf2c7 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sun, 13 Apr 2025 14:45:30 +0900 Subject: [PATCH 0978/2924] ata: libata-scsi: Fix ata_msense_control_ata_feature() For the ATA features subpage of the control mode page, the T10 SAT-6 specifications state that: For a MODE SENSE command, the SATL shall return the CDL_CTRL field value that was last set by an application client. However, the function ata_msense_control_ata_feature() always sets the CDL_CTRL field to the 0x02 value to indicate support for the CDL T2A and T2B pages. This is thus incorrect and the value 0x02 must be reported only after the user enables the CDL feature, which is indicated with the ATA_DFLAG_CDL_ENABLED device flag. When this flag is not set, the CDL_CTRL field of the ATA feature subpage of the control mode page must report a value of 0x00. Fix ata_msense_control_ata_feature() to report the correct values for the CDL_CTRL field, according to the enable/disable state of the device CDL feature. Fixes: df60f9c64576 ("scsi: ata: libata: Add ATA feature control sub-page translation") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Niklas Cassel Reviewed-by: Igor Pylypiv --- drivers/ata/libata-scsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 24e662c837e3f1..a41046cb062cbd 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2453,8 +2453,8 @@ static unsigned int ata_msense_control_ata_feature(struct ata_device *dev, */ put_unaligned_be16(ATA_FEATURE_SUB_MPAGE_LEN - 4, &buf[2]); - if (dev->flags & ATA_DFLAG_CDL) - buf[4] = 0x02; /* Support T2A and T2B pages */ + if (dev->flags & ATA_DFLAG_CDL_ENABLED) + buf[4] = 0x02; /* T2A and T2B pages enabled */ else buf[4] = 0; From 17e897a456752ec9c2d7afb3d9baf268b442451b Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 14 Apr 2025 10:25:05 +0900 Subject: [PATCH 0979/2924] ata: libata-scsi: Improve CDL control With ATA devices supporting the CDL feature, using CDL requires that the feature be enabled with a SET FEATURES command. This command is issued as the translated command for the MODE SELECT command issued by scsi_cdl_enable() when the user enables CDL through the device cdl_enable sysfs attribute. Currently, ata_mselect_control_ata_feature() always translates a MODE SELECT command for the ATA features subpage of the control mode page to a SET FEATURES command to enable or disable CDL based on the cdl_ctrl field. However, there is no need to issue the SET FEATURES command if: 1) The MODE SELECT command requests disabling CDL and CDL is already disabled. 2) The MODE SELECT command requests enabling CDL and CDL is already enabled. Fix ata_mselect_control_ata_feature() to issue the SET FEATURES command only when necessary. Since enabling CDL also implies a reset of the CDL statistics log page, avoiding useless CDL enable operations also avoids clearing the CDL statistics log. Also add debug messages to clearly signal when CDL is being enabled or disabled using a SET FEATURES command. Fixes: df60f9c64576 ("scsi: ata: libata: Add ATA feature control sub-page translation") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Niklas Cassel Reviewed-by: Igor Pylypiv --- drivers/ata/libata-scsi.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index a41046cb062cbd..c0eb8c67a9ff69 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3908,17 +3908,27 @@ static int ata_mselect_control_ata_feature(struct ata_queued_cmd *qc, /* Check cdl_ctrl */ switch (buf[0] & 0x03) { case 0: - /* Disable CDL */ + /* Disable CDL if it is enabled */ + if (!(dev->flags & ATA_DFLAG_CDL_ENABLED)) + return 0; + ata_dev_dbg(dev, "Disabling CDL\n"); cdl_action = 0; dev->flags &= ~ATA_DFLAG_CDL_ENABLED; break; case 0x02: - /* Enable CDL T2A/T2B: NCQ priority must be disabled */ + /* + * Enable CDL if not already enabled. Since this is mutually + * exclusive with NCQ priority, allow this only if NCQ priority + * is disabled. + */ + if (dev->flags & ATA_DFLAG_CDL_ENABLED) + return 0; if (dev->flags & ATA_DFLAG_NCQ_PRIO_ENABLED) { ata_dev_err(dev, "NCQ priority must be disabled to enable CDL\n"); return -EINVAL; } + ata_dev_dbg(dev, "Enabling CDL\n"); cdl_action = 1; dev->flags |= ATA_DFLAG_CDL_ENABLED; break; From 14a3cc755825ef7b34c986aa2786ea815023e9c5 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sun, 13 Apr 2025 11:24:47 +0900 Subject: [PATCH 0980/2924] scsi: Improve CDL control With ATA devices supporting the CDL feature, using CDL requires that the feature be enabled with a SET FEATURES command. This command is issued as the translated command for the MODE SELECT command issued by scsi_cdl_enable() when the user enables CDL through the device cdl_enable sysfs attribute. However, the implementation of scsi_cdl_enable() always issues a MODE SELECT command for ATA devices when the enable argument is true, even if CDL is already enabled on the device. While this does not cause any issue with using CDL descriptors with read/write commands (the CDL feature will be enabled on the drive), issuing the MODE SELECT command even when the device CDL feature is already enabled will cause a reset of the ATA device CDL statistics log page (as defined in ACS, any CDL enable action must reset the device statistics). Avoid this needless actions (and the implied statistics log page reset) by modifying scsi_cdl_enable() to issue the MODE SELECT command to enable CDL if and only if CDL is not reported as already enabled on the device. And while at it, simplify the initialization of the is_ata boolean variable and move the declaration of the scsi mode data and sense header variables to within the scope of ATA device handling. Fixes: 1b22cfb14142 ("scsi: core: Allow enabling and disabling command duration limits") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Niklas Cassel Reviewed-by: Igor Pylypiv Reviewed-by: Martin K. Petersen --- drivers/scsi/scsi.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 53daf923ad8ef3..518a252eb6aa05 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -707,26 +707,23 @@ void scsi_cdl_check(struct scsi_device *sdev) */ int scsi_cdl_enable(struct scsi_device *sdev, bool enable) { - struct scsi_mode_data data; - struct scsi_sense_hdr sshdr; - struct scsi_vpd *vpd; - bool is_ata = false; char buf[64]; + bool is_ata; int ret; if (!sdev->cdl_supported) return -EOPNOTSUPP; rcu_read_lock(); - vpd = rcu_dereference(sdev->vpd_pg89); - if (vpd) - is_ata = true; + is_ata = rcu_dereference(sdev->vpd_pg89); rcu_read_unlock(); /* * For ATA devices, CDL needs to be enabled with a SET FEATURES command. */ if (is_ata) { + struct scsi_mode_data data; + struct scsi_sense_hdr sshdr; char *buf_data; int len; @@ -735,16 +732,30 @@ int scsi_cdl_enable(struct scsi_device *sdev, bool enable) if (ret) return -EINVAL; - /* Enable CDL using the ATA feature page */ + /* Enable or disable CDL using the ATA feature page */ len = min_t(size_t, sizeof(buf), data.length - data.header_length - data.block_descriptor_length); buf_data = buf + data.header_length + data.block_descriptor_length; - if (enable) - buf_data[4] = 0x02; - else - buf_data[4] = 0; + + /* + * If we want to enable CDL and CDL is already enabled on the + * device, do nothing. This avoids needlessly resetting the CDL + * statistics on the device as that is implied by the CDL enable + * action. Similar to this, there is no need to do anything if + * we want to disable CDL and CDL is already disabled. + */ + if (enable) { + if ((buf_data[4] & 0x03) == 0x02) + goto out; + buf_data[4] &= ~0x03; + buf_data[4] |= 0x02; + } else { + if ((buf_data[4] & 0x03) == 0x00) + goto out; + buf_data[4] &= ~0x03; + } ret = scsi_mode_select(sdev, 1, 0, buf_data, len, 5 * HZ, 3, &data, &sshdr); @@ -756,6 +767,7 @@ int scsi_cdl_enable(struct scsi_device *sdev, bool enable) } } +out: sdev->cdl_enable = enable; return 0; From f37bb5486ea536c1d61df89feeaeff3f84f0b560 Mon Sep 17 00:00:00 2001 From: Christian Hewitt Date: Mon, 21 Apr 2025 22:12:59 +0200 Subject: [PATCH 0981/2924] Revert "drm/meson: vclk: fix calculation of 59.94 fractional rates" This reverts commit bfbc68e. The patch does permit the offending YUV420 @ 59.94 phy_freq and vclk_freq mode to match in calculations. It also results in all fractional rates being unavailable for use. This was unintended and requires the patch to be reverted. Fixes: bfbc68e4d869 ("drm/meson: vclk: fix calculation of 59.94 fractional rates") Cc: stable@vger.kernel.org Signed-off-by: Christian Hewitt Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250421201300.778955-2-martin.blumenstingl@googlemail.com Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20250421201300.778955-2-martin.blumenstingl@googlemail.com --- drivers/gpu/drm/meson/meson_vclk.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_vclk.c b/drivers/gpu/drm/meson/meson_vclk.c index 2a942dc6a6dc23..2a82119eb58ed8 100644 --- a/drivers/gpu/drm/meson/meson_vclk.c +++ b/drivers/gpu/drm/meson/meson_vclk.c @@ -790,13 +790,13 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, FREQ_1000_1001(params[i].pixel_freq)); DRM_DEBUG_DRIVER("i = %d phy_freq = %d alt = %d\n", i, params[i].phy_freq, - FREQ_1000_1001(params[i].phy_freq/1000)*1000); + FREQ_1000_1001(params[i].phy_freq/10)*10); /* Match strict frequency */ if (phy_freq == params[i].phy_freq && vclk_freq == params[i].vclk_freq) return MODE_OK; /* Match 1000/1001 variant */ - if (phy_freq == (FREQ_1000_1001(params[i].phy_freq/1000)*1000) && + if (phy_freq == (FREQ_1000_1001(params[i].phy_freq/10)*10) && vclk_freq == FREQ_1000_1001(params[i].vclk_freq)) return MODE_OK; } @@ -1070,7 +1070,7 @@ void meson_vclk_setup(struct meson_drm *priv, unsigned int target, for (freq = 0 ; params[freq].pixel_freq ; ++freq) { if ((phy_freq == params[freq].phy_freq || - phy_freq == FREQ_1000_1001(params[freq].phy_freq/1000)*1000) && + phy_freq == FREQ_1000_1001(params[freq].phy_freq/10)*10) && (vclk_freq == params[freq].vclk_freq || vclk_freq == FREQ_1000_1001(params[freq].vclk_freq))) { if (vclk_freq != params[freq].vclk_freq) From 1017560164b6bbcbc93579266926e6e96675262a Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Mon, 21 Apr 2025 22:13:00 +0200 Subject: [PATCH 0982/2924] drm/meson: use unsigned long long / Hz for frequency types Christian reports that 4K output using YUV420 encoding fails with the following error: Fatal Error, invalid HDMI vclk freq 593406 Modetest shows the following: 3840x2160 59.94 3840 4016 4104 4400 2160 2168 2178 2250 593407 flags: xxxx, xxxx, drm calculated value -------------------------------------^ This indicates that there's a (1kHz) mismatch between the clock calculated by the drm framework and the meson driver. Relevant function call stack: (drm framework) -> meson_encoder_hdmi_atomic_enable() -> meson_encoder_hdmi_set_vclk() -> meson_vclk_setup() The video clock requested by the drm framework is 593407kHz. This is passed by meson_encoder_hdmi_atomic_enable() to meson_encoder_hdmi_set_vclk() and the following formula is applied: - the frequency is halved (which would be 296703.5kHz) and rounded down to the next full integer, which is 296703kHz - TMDS clock is calculated (296703kHz * 10) - video encoder clock is calculated - this needs to match a table from meson_vclk.c and so it doubles the previously halved value again (resulting in 593406kHz) - meson_vclk_setup() can't find (either directly, or by deriving it from 594000kHz * 1000 / 1001 and rounding to the closest integer value - which is 593407kHz as originally requested by the drm framework) a matching clock in it's internal table and errors out with "invalid HDMI vclk freq" Fix the division precision by switching the whole meson driver to use unsigned long long (64-bit) Hz values for clock frequencies instead of unsigned int (32-bit) kHz to fix the rouding error. Fixes: e5fab2ec9ca4 ("drm/meson: vclk: add support for YUV420 setup") Reported-by: Christian Hewitt Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250421201300.778955-3-martin.blumenstingl@googlemail.com Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20250421201300.778955-3-martin.blumenstingl@googlemail.com --- drivers/gpu/drm/meson/meson_drv.c | 2 +- drivers/gpu/drm/meson/meson_drv.h | 2 +- drivers/gpu/drm/meson/meson_encoder_hdmi.c | 29 +-- drivers/gpu/drm/meson/meson_vclk.c | 195 +++++++++++---------- drivers/gpu/drm/meson/meson_vclk.h | 13 +- 5 files changed, 126 insertions(+), 115 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index 81d2ee37e7732d..49ff9f1f16d32a 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -169,7 +169,7 @@ static const struct meson_drm_soc_attr meson_drm_soc_attrs[] = { /* S805X/S805Y HDMI PLL won't lock for HDMI PHY freq > 1,65GHz */ { .limits = { - .max_hdmi_phy_freq = 1650000, + .max_hdmi_phy_freq = 1650000000, }, .attrs = (const struct soc_device_attribute []) { { .soc_id = "GXL (S805*)", }, diff --git a/drivers/gpu/drm/meson/meson_drv.h b/drivers/gpu/drm/meson/meson_drv.h index 3f9345c14f31c1..be4b0e4df6e13e 100644 --- a/drivers/gpu/drm/meson/meson_drv.h +++ b/drivers/gpu/drm/meson/meson_drv.h @@ -37,7 +37,7 @@ struct meson_drm_match_data { }; struct meson_drm_soc_limits { - unsigned int max_hdmi_phy_freq; + unsigned long long max_hdmi_phy_freq; }; struct meson_drm { diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c index 6d1c9262a2cfb7..7752d8ac85f0e5 100644 --- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c +++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c @@ -70,12 +70,12 @@ static void meson_encoder_hdmi_set_vclk(struct meson_encoder_hdmi *encoder_hdmi, { struct meson_drm *priv = encoder_hdmi->priv; int vic = drm_match_cea_mode(mode); - unsigned int phy_freq; - unsigned int vclk_freq; - unsigned int venc_freq; - unsigned int hdmi_freq; + unsigned long long phy_freq; + unsigned long long vclk_freq; + unsigned long long venc_freq; + unsigned long long hdmi_freq; - vclk_freq = mode->clock; + vclk_freq = mode->clock * 1000; /* For 420, pixel clock is half unlike venc clock */ if (encoder_hdmi->output_bus_fmt == MEDIA_BUS_FMT_UYYVYY8_0_5X24) @@ -107,7 +107,8 @@ static void meson_encoder_hdmi_set_vclk(struct meson_encoder_hdmi *encoder_hdmi, if (mode->flags & DRM_MODE_FLAG_DBLCLK) venc_freq /= 2; - dev_dbg(priv->dev, "vclk:%d phy=%d venc=%d hdmi=%d enci=%d\n", + dev_dbg(priv->dev, + "vclk:%lluHz phy=%lluHz venc=%lluHz hdmi=%lluHz enci=%d\n", phy_freq, vclk_freq, venc_freq, hdmi_freq, priv->venc.hdmi_use_enci); @@ -122,10 +123,11 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri struct meson_encoder_hdmi *encoder_hdmi = bridge_to_meson_encoder_hdmi(bridge); struct meson_drm *priv = encoder_hdmi->priv; bool is_hdmi2_sink = display_info->hdmi.scdc.supported; - unsigned int phy_freq; - unsigned int vclk_freq; - unsigned int venc_freq; - unsigned int hdmi_freq; + unsigned long long clock = mode->clock * 1000; + unsigned long long phy_freq; + unsigned long long vclk_freq; + unsigned long long venc_freq; + unsigned long long hdmi_freq; int vic = drm_match_cea_mode(mode); enum drm_mode_status status; @@ -144,12 +146,12 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri if (status != MODE_OK) return status; - return meson_vclk_dmt_supported_freq(priv, mode->clock); + return meson_vclk_dmt_supported_freq(priv, clock); /* Check against supported VIC modes */ } else if (!meson_venc_hdmi_supported_vic(vic)) return MODE_BAD; - vclk_freq = mode->clock; + vclk_freq = clock; /* For 420, pixel clock is half unlike venc clock */ if (drm_mode_is_420_only(display_info, mode) || @@ -179,7 +181,8 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri if (mode->flags & DRM_MODE_FLAG_DBLCLK) venc_freq /= 2; - dev_dbg(priv->dev, "%s: vclk:%d phy=%d venc=%d hdmi=%d\n", + dev_dbg(priv->dev, + "%s: vclk:%lluHz phy=%lluHz venc=%lluHz hdmi=%lluHz\n", __func__, phy_freq, vclk_freq, venc_freq, hdmi_freq); return meson_vclk_vic_supported_freq(priv, phy_freq, vclk_freq); diff --git a/drivers/gpu/drm/meson/meson_vclk.c b/drivers/gpu/drm/meson/meson_vclk.c index 2a82119eb58ed8..3325580d885d0a 100644 --- a/drivers/gpu/drm/meson/meson_vclk.c +++ b/drivers/gpu/drm/meson/meson_vclk.c @@ -110,7 +110,10 @@ #define HDMI_PLL_LOCK BIT(31) #define HDMI_PLL_LOCK_G12A (3 << 30) -#define FREQ_1000_1001(_freq) DIV_ROUND_CLOSEST(_freq * 1000, 1001) +#define PIXEL_FREQ_1000_1001(_freq) \ + DIV_ROUND_CLOSEST_ULL((_freq) * 1000ULL, 1001ULL) +#define PHY_FREQ_1000_1001(_freq) \ + (PIXEL_FREQ_1000_1001(DIV_ROUND_DOWN_ULL(_freq, 10ULL)) * 10) /* VID PLL Dividers */ enum { @@ -360,11 +363,11 @@ enum { }; struct meson_vclk_params { - unsigned int pll_freq; - unsigned int phy_freq; - unsigned int vclk_freq; - unsigned int venc_freq; - unsigned int pixel_freq; + unsigned long long pll_freq; + unsigned long long phy_freq; + unsigned long long vclk_freq; + unsigned long long venc_freq; + unsigned long long pixel_freq; unsigned int pll_od1; unsigned int pll_od2; unsigned int pll_od3; @@ -372,11 +375,11 @@ struct meson_vclk_params { unsigned int vclk_div; } params[] = { [MESON_VCLK_HDMI_ENCI_54000] = { - .pll_freq = 4320000, - .phy_freq = 270000, - .vclk_freq = 54000, - .venc_freq = 54000, - .pixel_freq = 54000, + .pll_freq = 4320000000, + .phy_freq = 270000000, + .vclk_freq = 54000000, + .venc_freq = 54000000, + .pixel_freq = 54000000, .pll_od1 = 4, .pll_od2 = 4, .pll_od3 = 1, @@ -384,11 +387,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_DDR_54000] = { - .pll_freq = 4320000, - .phy_freq = 270000, - .vclk_freq = 54000, - .venc_freq = 54000, - .pixel_freq = 27000, + .pll_freq = 4320000000, + .phy_freq = 270000000, + .vclk_freq = 54000000, + .venc_freq = 54000000, + .pixel_freq = 27000000, .pll_od1 = 4, .pll_od2 = 4, .pll_od3 = 1, @@ -396,11 +399,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_DDR_148500] = { - .pll_freq = 2970000, - .phy_freq = 742500, - .vclk_freq = 148500, - .venc_freq = 148500, - .pixel_freq = 74250, + .pll_freq = 2970000000, + .phy_freq = 742500000, + .vclk_freq = 148500000, + .venc_freq = 148500000, + .pixel_freq = 74250000, .pll_od1 = 4, .pll_od2 = 1, .pll_od3 = 1, @@ -408,11 +411,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_74250] = { - .pll_freq = 2970000, - .phy_freq = 742500, - .vclk_freq = 74250, - .venc_freq = 74250, - .pixel_freq = 74250, + .pll_freq = 2970000000, + .phy_freq = 742500000, + .vclk_freq = 74250000, + .venc_freq = 74250000, + .pixel_freq = 74250000, .pll_od1 = 2, .pll_od2 = 2, .pll_od3 = 2, @@ -420,11 +423,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_148500] = { - .pll_freq = 2970000, - .phy_freq = 1485000, - .vclk_freq = 148500, - .venc_freq = 148500, - .pixel_freq = 148500, + .pll_freq = 2970000000, + .phy_freq = 1485000000, + .vclk_freq = 148500000, + .venc_freq = 148500000, + .pixel_freq = 148500000, .pll_od1 = 1, .pll_od2 = 2, .pll_od3 = 2, @@ -432,11 +435,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_297000] = { - .pll_freq = 5940000, - .phy_freq = 2970000, - .venc_freq = 297000, - .vclk_freq = 297000, - .pixel_freq = 297000, + .pll_freq = 5940000000, + .phy_freq = 2970000000, + .venc_freq = 297000000, + .vclk_freq = 297000000, + .pixel_freq = 297000000, .pll_od1 = 2, .pll_od2 = 1, .pll_od3 = 1, @@ -444,11 +447,11 @@ struct meson_vclk_params { .vclk_div = 2, }, [MESON_VCLK_HDMI_594000] = { - .pll_freq = 5940000, - .phy_freq = 5940000, - .venc_freq = 594000, - .vclk_freq = 594000, - .pixel_freq = 594000, + .pll_freq = 5940000000, + .phy_freq = 5940000000, + .venc_freq = 594000000, + .vclk_freq = 594000000, + .pixel_freq = 594000000, .pll_od1 = 1, .pll_od2 = 1, .pll_od3 = 2, @@ -456,11 +459,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_594000_YUV420] = { - .pll_freq = 5940000, - .phy_freq = 2970000, - .venc_freq = 594000, - .vclk_freq = 594000, - .pixel_freq = 297000, + .pll_freq = 5940000000, + .phy_freq = 2970000000, + .venc_freq = 594000000, + .vclk_freq = 594000000, + .pixel_freq = 297000000, .pll_od1 = 2, .pll_od2 = 1, .pll_od3 = 1, @@ -617,16 +620,16 @@ static void meson_hdmi_pll_set_params(struct meson_drm *priv, unsigned int m, 3 << 20, pll_od_to_reg(od3) << 20); } -#define XTAL_FREQ 24000 +#define XTAL_FREQ (24 * 1000 * 1000) static unsigned int meson_hdmi_pll_get_m(struct meson_drm *priv, - unsigned int pll_freq) + unsigned long long pll_freq) { /* The GXBB PLL has a /2 pre-multiplier */ if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) - pll_freq /= 2; + pll_freq = DIV_ROUND_DOWN_ULL(pll_freq, 2); - return pll_freq / XTAL_FREQ; + return DIV_ROUND_DOWN_ULL(pll_freq, XTAL_FREQ); } #define HDMI_FRAC_MAX_GXBB 4096 @@ -635,12 +638,13 @@ static unsigned int meson_hdmi_pll_get_m(struct meson_drm *priv, static unsigned int meson_hdmi_pll_get_frac(struct meson_drm *priv, unsigned int m, - unsigned int pll_freq) + unsigned long long pll_freq) { - unsigned int parent_freq = XTAL_FREQ; + unsigned long long parent_freq = XTAL_FREQ; unsigned int frac_max = HDMI_FRAC_MAX_GXL; unsigned int frac_m; unsigned int frac; + u32 remainder; /* The GXBB PLL has a /2 pre-multiplier and a larger FRAC width */ if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) { @@ -652,11 +656,11 @@ static unsigned int meson_hdmi_pll_get_frac(struct meson_drm *priv, frac_max = HDMI_FRAC_MAX_G12A; /* We can have a perfect match !*/ - if (pll_freq / m == parent_freq && - pll_freq % m == 0) + if (div_u64_rem(pll_freq, m, &remainder) == parent_freq && + remainder == 0) return 0; - frac = div_u64((u64)pll_freq * (u64)frac_max, parent_freq); + frac = mul_u64_u64_div_u64(pll_freq, frac_max, parent_freq); frac_m = m * frac_max; if (frac_m > frac) return frac_max; @@ -666,7 +670,7 @@ static unsigned int meson_hdmi_pll_get_frac(struct meson_drm *priv, } static bool meson_hdmi_pll_validate_params(struct meson_drm *priv, - unsigned int m, + unsigned long long m, unsigned int frac) { if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) { @@ -694,7 +698,7 @@ static bool meson_hdmi_pll_validate_params(struct meson_drm *priv, } static bool meson_hdmi_pll_find_params(struct meson_drm *priv, - unsigned int freq, + unsigned long long freq, unsigned int *m, unsigned int *frac, unsigned int *od) @@ -706,7 +710,7 @@ static bool meson_hdmi_pll_find_params(struct meson_drm *priv, continue; *frac = meson_hdmi_pll_get_frac(priv, *m, freq * *od); - DRM_DEBUG_DRIVER("PLL params for %dkHz: m=%x frac=%x od=%d\n", + DRM_DEBUG_DRIVER("PLL params for %lluHz: m=%x frac=%x od=%d\n", freq, *m, *frac, *od); if (meson_hdmi_pll_validate_params(priv, *m, *frac)) @@ -718,7 +722,7 @@ static bool meson_hdmi_pll_find_params(struct meson_drm *priv, /* pll_freq is the frequency after the OD dividers */ enum drm_mode_status -meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned int freq) +meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned long long freq) { unsigned int od, m, frac; @@ -741,7 +745,7 @@ EXPORT_SYMBOL_GPL(meson_vclk_dmt_supported_freq); /* pll_freq is the frequency after the OD dividers */ static void meson_hdmi_pll_generic_set(struct meson_drm *priv, - unsigned int pll_freq) + unsigned long long pll_freq) { unsigned int od, m, frac, od1, od2, od3; @@ -756,7 +760,7 @@ static void meson_hdmi_pll_generic_set(struct meson_drm *priv, od1 = od / od2; } - DRM_DEBUG_DRIVER("PLL params for %dkHz: m=%x frac=%x od=%d/%d/%d\n", + DRM_DEBUG_DRIVER("PLL params for %lluHz: m=%x frac=%x od=%d/%d/%d\n", pll_freq, m, frac, od1, od2, od3); meson_hdmi_pll_set_params(priv, m, frac, od1, od2, od3); @@ -764,17 +768,18 @@ static void meson_hdmi_pll_generic_set(struct meson_drm *priv, return; } - DRM_ERROR("Fatal, unable to find parameters for PLL freq %d\n", + DRM_ERROR("Fatal, unable to find parameters for PLL freq %lluHz\n", pll_freq); } enum drm_mode_status -meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, - unsigned int vclk_freq) +meson_vclk_vic_supported_freq(struct meson_drm *priv, + unsigned long long phy_freq, + unsigned long long vclk_freq) { int i; - DRM_DEBUG_DRIVER("phy_freq = %d vclk_freq = %d\n", + DRM_DEBUG_DRIVER("phy_freq = %lluHz vclk_freq = %lluHz\n", phy_freq, vclk_freq); /* Check against soc revision/package limits */ @@ -785,19 +790,19 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, } for (i = 0 ; params[i].pixel_freq ; ++i) { - DRM_DEBUG_DRIVER("i = %d pixel_freq = %d alt = %d\n", + DRM_DEBUG_DRIVER("i = %d pixel_freq = %lluHz alt = %lluHz\n", i, params[i].pixel_freq, - FREQ_1000_1001(params[i].pixel_freq)); - DRM_DEBUG_DRIVER("i = %d phy_freq = %d alt = %d\n", + PIXEL_FREQ_1000_1001(params[i].pixel_freq)); + DRM_DEBUG_DRIVER("i = %d phy_freq = %lluHz alt = %lluHz\n", i, params[i].phy_freq, - FREQ_1000_1001(params[i].phy_freq/10)*10); + PHY_FREQ_1000_1001(params[i].phy_freq)); /* Match strict frequency */ if (phy_freq == params[i].phy_freq && vclk_freq == params[i].vclk_freq) return MODE_OK; /* Match 1000/1001 variant */ - if (phy_freq == (FREQ_1000_1001(params[i].phy_freq/10)*10) && - vclk_freq == FREQ_1000_1001(params[i].vclk_freq)) + if (phy_freq == PHY_FREQ_1000_1001(params[i].phy_freq) && + vclk_freq == PIXEL_FREQ_1000_1001(params[i].vclk_freq)) return MODE_OK; } @@ -805,8 +810,9 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, } EXPORT_SYMBOL_GPL(meson_vclk_vic_supported_freq); -static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, - unsigned int od1, unsigned int od2, unsigned int od3, +static void meson_vclk_set(struct meson_drm *priv, + unsigned long long pll_base_freq, unsigned int od1, + unsigned int od2, unsigned int od3, unsigned int vid_pll_div, unsigned int vclk_div, unsigned int hdmi_tx_div, unsigned int venc_div, bool hdmi_use_enci, bool vic_alternate_clock) @@ -826,15 +832,15 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, meson_hdmi_pll_generic_set(priv, pll_base_freq); } else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) { switch (pll_base_freq) { - case 2970000: + case 2970000000: m = 0x3d; frac = vic_alternate_clock ? 0xd02 : 0xe00; break; - case 4320000: + case 4320000000: m = vic_alternate_clock ? 0x59 : 0x5a; frac = vic_alternate_clock ? 0xe8f : 0; break; - case 5940000: + case 5940000000: m = 0x7b; frac = vic_alternate_clock ? 0xa05 : 0xc00; break; @@ -844,15 +850,15 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, } else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM) || meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXL)) { switch (pll_base_freq) { - case 2970000: + case 2970000000: m = 0x7b; frac = vic_alternate_clock ? 0x281 : 0x300; break; - case 4320000: + case 4320000000: m = vic_alternate_clock ? 0xb3 : 0xb4; frac = vic_alternate_clock ? 0x347 : 0; break; - case 5940000: + case 5940000000: m = 0xf7; frac = vic_alternate_clock ? 0x102 : 0x200; break; @@ -861,15 +867,15 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, meson_hdmi_pll_set_params(priv, m, frac, od1, od2, od3); } else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) { switch (pll_base_freq) { - case 2970000: + case 2970000000: m = 0x7b; frac = vic_alternate_clock ? 0x140b4 : 0x18000; break; - case 4320000: + case 4320000000: m = vic_alternate_clock ? 0xb3 : 0xb4; frac = vic_alternate_clock ? 0x1a3ee : 0; break; - case 5940000: + case 5940000000: m = 0xf7; frac = vic_alternate_clock ? 0x8148 : 0x10000; break; @@ -1025,14 +1031,14 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, } void meson_vclk_setup(struct meson_drm *priv, unsigned int target, - unsigned int phy_freq, unsigned int vclk_freq, - unsigned int venc_freq, unsigned int dac_freq, + unsigned long long phy_freq, unsigned long long vclk_freq, + unsigned long long venc_freq, unsigned long long dac_freq, bool hdmi_use_enci) { bool vic_alternate_clock = false; - unsigned int freq; - unsigned int hdmi_tx_div; - unsigned int venc_div; + unsigned long long freq; + unsigned long long hdmi_tx_div; + unsigned long long venc_div; if (target == MESON_VCLK_TARGET_CVBS) { meson_venci_cvbs_clock_config(priv); @@ -1052,27 +1058,27 @@ void meson_vclk_setup(struct meson_drm *priv, unsigned int target, return; } - hdmi_tx_div = vclk_freq / dac_freq; + hdmi_tx_div = DIV_ROUND_DOWN_ULL(vclk_freq, dac_freq); if (hdmi_tx_div == 0) { - pr_err("Fatal Error, invalid HDMI-TX freq %d\n", + pr_err("Fatal Error, invalid HDMI-TX freq %lluHz\n", dac_freq); return; } - venc_div = vclk_freq / venc_freq; + venc_div = DIV_ROUND_DOWN_ULL(vclk_freq, venc_freq); if (venc_div == 0) { - pr_err("Fatal Error, invalid HDMI venc freq %d\n", + pr_err("Fatal Error, invalid HDMI venc freq %lluHz\n", venc_freq); return; } for (freq = 0 ; params[freq].pixel_freq ; ++freq) { if ((phy_freq == params[freq].phy_freq || - phy_freq == FREQ_1000_1001(params[freq].phy_freq/10)*10) && + phy_freq == PHY_FREQ_1000_1001(params[freq].phy_freq)) && (vclk_freq == params[freq].vclk_freq || - vclk_freq == FREQ_1000_1001(params[freq].vclk_freq))) { + vclk_freq == PIXEL_FREQ_1000_1001(params[freq].vclk_freq))) { if (vclk_freq != params[freq].vclk_freq) vic_alternate_clock = true; else @@ -1098,7 +1104,8 @@ void meson_vclk_setup(struct meson_drm *priv, unsigned int target, } if (!params[freq].pixel_freq) { - pr_err("Fatal Error, invalid HDMI vclk freq %d\n", vclk_freq); + pr_err("Fatal Error, invalid HDMI vclk freq %lluHz\n", + vclk_freq); return; } diff --git a/drivers/gpu/drm/meson/meson_vclk.h b/drivers/gpu/drm/meson/meson_vclk.h index 60617aaf18dd1c..7ac55744e57494 100644 --- a/drivers/gpu/drm/meson/meson_vclk.h +++ b/drivers/gpu/drm/meson/meson_vclk.h @@ -20,17 +20,18 @@ enum { }; /* 27MHz is the CVBS Pixel Clock */ -#define MESON_VCLK_CVBS 27000 +#define MESON_VCLK_CVBS (27 * 1000 * 1000) enum drm_mode_status -meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned int freq); +meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned long long freq); enum drm_mode_status -meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, - unsigned int vclk_freq); +meson_vclk_vic_supported_freq(struct meson_drm *priv, + unsigned long long phy_freq, + unsigned long long vclk_freq); void meson_vclk_setup(struct meson_drm *priv, unsigned int target, - unsigned int phy_freq, unsigned int vclk_freq, - unsigned int venc_freq, unsigned int dac_freq, + unsigned long long phy_freq, unsigned long long vclk_freq, + unsigned long long venc_freq, unsigned long long dac_freq, bool hdmi_use_enci); #endif /* __MESON_VCLK_H */ From 095c8e61f4c71cd4630ee11a82e82cc341b38464 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Thu, 17 Apr 2025 15:55:06 -0400 Subject: [PATCH 0983/2924] drm: panel: jd9365da: fix reset signal polarity in unprepare commit a8972d5a49b4 ("drm: panel: jd9365da-h3: fix reset signal polarity") fixed reset signal polarity in jadard_dsi_probe() and jadard_prepare(). It was not done in jadard_unprepare() because of an incorrect assumption about reset line handling in power off mode. After looking into the datasheet, it now appears that before disabling regulators, the reset line is deasserted first, and if reset_before_power_off_vcioo is true, then the reset line is asserted. Fix reset polarity by inverting gpiod_set_value() second argument in in jadard_unprepare(). Fixes: 6b818c533dd8 ("drm: panel: Add Jadard JD9365DA-H3 DSI panel") Fixes: 2b976ad760dc ("drm/panel: jd9365da: Support for kd101ne3-40ti MIPI-DSI panel") Fixes: a8972d5a49b4 ("drm: panel: jd9365da-h3: fix reset signal polarity") Cc: stable@vger.kernel.org Signed-off-by: Hugo Villeneuve Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250417195507.778731-1-hugo@hugovil.com Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20250417195507.778731-1-hugo@hugovil.com --- drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c b/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c index 7d68a8acfe2ea4..eb0f8373258c34 100644 --- a/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c +++ b/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c @@ -129,11 +129,11 @@ static int jadard_unprepare(struct drm_panel *panel) { struct jadard *jadard = panel_to_jadard(panel); - gpiod_set_value(jadard->reset, 1); + gpiod_set_value(jadard->reset, 0); msleep(120); if (jadard->desc->reset_before_power_off_vcioo) { - gpiod_set_value(jadard->reset, 0); + gpiod_set_value(jadard->reset, 1); usleep_range(1000, 2000); } From 3d7aa0c7b4e96cd460826d932e44710cdeb3378b Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Fri, 18 Apr 2025 10:02:50 +0200 Subject: [PATCH 0984/2924] nvmet: fix out-of-bounds access in nvmet_enable_port When trying to enable a port that has no transport configured yet, nvmet_enable_port() uses NVMF_TRTYPE_MAX (255) to query the transports array, causing an out-of-bounds access: [ 106.058694] BUG: KASAN: global-out-of-bounds in nvmet_enable_port+0x42/0x1da [ 106.058719] Read of size 8 at addr ffffffff89dafa58 by task ln/632 [...] [ 106.076026] nvmet: transport type 255 not supported Since commit 200adac75888, NVMF_TRTYPE_MAX is the default state as configured by nvmet_ports_make(). Avoid this by checking for NVMF_TRTYPE_MAX before proceeding. Fixes: 200adac75888 ("nvme: Add PCI transport type") Signed-off-by: Richard Weinberger Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal --- drivers/nvme/target/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 71f8d06998d602..245475c43127fb 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -324,6 +324,9 @@ int nvmet_enable_port(struct nvmet_port *port) lockdep_assert_held(&nvmet_config_sem); + if (port->disc_addr.trtype == NVMF_TRTYPE_MAX) + return -EINVAL; + ops = nvmet_transports[port->disc_addr.trtype]; if (!ops) { up_write(&nvmet_config_sem); From 7e21ea8149a0e41c3666ee52cc063a6f797a7a2a Mon Sep 17 00:00:00 2001 From: Chen Linxuan Date: Tue, 15 Apr 2025 12:06:16 +0300 Subject: [PATCH 0985/2924] drm/i915/pxp: fix undefined reference to `intel_pxp_gsccs_is_ready_for_sessions' On x86_64 with gcc version 13.3.0, I compile kernel with: make defconfig ./scripts/kconfig/merge_config.sh .config <( echo CONFIG_COMPILE_TEST=y ) make KCFLAGS="-fno-inline-functions -fno-inline-small-functions -fno-inline-functions-called-once" Then I get a linker error: ld: vmlinux.o: in function `pxp_fw_dependencies_completed': kintel_pxp.c:(.text+0x95728f): undefined reference to `intel_pxp_gsccs_is_ready_for_sessions' This is caused by not having a intel_pxp_gsccs_is_ready_for_sessions() header stub for CONFIG_DRM_I915_PXP=n. Add it. Signed-off-by: Chen Linxuan Fixes: 99afb7cc8c44 ("drm/i915/pxp: Add ARB session creation and cleanup") Reviewed-by: Jani Nikula Link: https://lore.kernel.org/r/20250415090616.2649889-1-jani.nikula@intel.com Signed-off-by: Jani Nikula (cherry picked from commit b484c1e225a6a582fc78c4d7af7b286408bb7d41) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h index 9aae779c4da318..4969d3de2bac3d 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h @@ -23,6 +23,7 @@ int intel_pxp_gsccs_init(struct intel_pxp *pxp); int intel_pxp_gsccs_create_session(struct intel_pxp *pxp, int arb_session_id); void intel_pxp_gsccs_end_arb_fw_session(struct intel_pxp *pxp, u32 arb_session_id); +bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp); #else static inline void intel_pxp_gsccs_fini(struct intel_pxp *pxp) @@ -34,8 +35,11 @@ static inline int intel_pxp_gsccs_init(struct intel_pxp *pxp) return 0; } -#endif +static inline bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp) +{ + return false; +} -bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp); +#endif #endif /*__INTEL_PXP_GSCCS_H__ */ From 30a41ed32d3088cd0d682a13d7f30b23baed7e93 Mon Sep 17 00:00:00 2001 From: Fiona Klute Date: Wed, 16 Apr 2025 12:24:13 +0200 Subject: [PATCH 0986/2924] net: phy: microchip: force IRQ polling mode for lan88xx With lan88xx based devices the lan78xx driver can get stuck in an interrupt loop while bringing the device up, flooding the kernel log with messages like the following: lan78xx 2-3:1.0 enp1s0u3: kevent 4 may have been dropped Removing interrupt support from the lan88xx PHY driver forces the driver to use polling instead, which avoids the problem. The issue has been observed with Raspberry Pi devices at least since 4.14 (see [1], bug report for their downstream kernel), as well as with Nvidia devices [2] in 2020, where disabling interrupts was the vendor-suggested workaround (together with the claim that phylib changes in 4.9 made the interrupt handling in lan78xx incompatible). Iperf reports well over 900Mbits/sec per direction with client in --dualtest mode, so there does not seem to be a significant impact on throughput (lan88xx device connected via switch to the peer). [1] https://github.com/raspberrypi/linux/issues/2447 [2] https://forums.developer.nvidia.com/t/jetson-xavier-and-lan7800-problem/142134/11 Link: https://lore.kernel.org/0901d90d-3f20-4a10-b680-9c978e04ddda@lunn.ch Fixes: 792aec47d59d ("add microchip LAN88xx phy driver") Signed-off-by: Fiona Klute Cc: kernel-list@raspberrypi.com Cc: stable@vger.kernel.org Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250416102413.30654-1-fiona.klute@gmx.de Signed-off-by: Paolo Abeni --- drivers/net/phy/microchip.c | 46 +++---------------------------------- 1 file changed, 3 insertions(+), 43 deletions(-) diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c index 0e17cc458efdc7..93de88c1c8fd58 100644 --- a/drivers/net/phy/microchip.c +++ b/drivers/net/phy/microchip.c @@ -37,47 +37,6 @@ static int lan88xx_write_page(struct phy_device *phydev, int page) return __phy_write(phydev, LAN88XX_EXT_PAGE_ACCESS, page); } -static int lan88xx_phy_config_intr(struct phy_device *phydev) -{ - int rc; - - if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { - /* unmask all source and clear them before enable */ - rc = phy_write(phydev, LAN88XX_INT_MASK, 0x7FFF); - rc = phy_read(phydev, LAN88XX_INT_STS); - rc = phy_write(phydev, LAN88XX_INT_MASK, - LAN88XX_INT_MASK_MDINTPIN_EN_ | - LAN88XX_INT_MASK_LINK_CHANGE_); - } else { - rc = phy_write(phydev, LAN88XX_INT_MASK, 0); - if (rc) - return rc; - - /* Ack interrupts after they have been disabled */ - rc = phy_read(phydev, LAN88XX_INT_STS); - } - - return rc < 0 ? rc : 0; -} - -static irqreturn_t lan88xx_handle_interrupt(struct phy_device *phydev) -{ - int irq_status; - - irq_status = phy_read(phydev, LAN88XX_INT_STS); - if (irq_status < 0) { - phy_error(phydev); - return IRQ_NONE; - } - - if (!(irq_status & LAN88XX_INT_STS_LINK_CHANGE_)) - return IRQ_NONE; - - phy_trigger_machine(phydev); - - return IRQ_HANDLED; -} - static int lan88xx_suspend(struct phy_device *phydev) { struct lan88xx_priv *priv = phydev->priv; @@ -528,8 +487,9 @@ static struct phy_driver microchip_phy_driver[] = { .config_aneg = lan88xx_config_aneg, .link_change_notify = lan88xx_link_change_notify, - .config_intr = lan88xx_phy_config_intr, - .handle_interrupt = lan88xx_handle_interrupt, + /* Interrupt handling is broken, do not define related + * functions to force polling. + */ .suspend = lan88xx_suspend, .resume = genphy_resume, From cae5572ec9261f752af834cdaaf5a0ba0afcf256 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Tue, 22 Apr 2025 21:40:34 +1000 Subject: [PATCH 0987/2924] dma-mapping: Fix warning reported for missing prototype lkp reported a warning about missing prototype for a recent patch. The kernel-doc style comments are out of sync, move them to the right function. Cc: Marek Szyprowski Cc: Christoph Hellwig Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202504190615.g9fANxHw-lkp@intel.com/ Signed-off-by: Balbir Singh [mszyprow: reformatted subject] Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20250422114034.3535515-1-balbirs@nvidia.com --- kernel/dma/mapping.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 67da08fa672370..051a32988040ff 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -910,14 +910,6 @@ int dma_set_coherent_mask(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_set_coherent_mask); -/** - * dma_addressing_limited - return if the device is addressing limited - * @dev: device to check - * - * Return %true if the devices DMA mask is too small to address all memory in - * the system, else %false. Lack of addressing bits is the prime reason for - * bounce buffering, but might not be the only one. - */ static bool __dma_addressing_limited(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -931,6 +923,14 @@ static bool __dma_addressing_limited(struct device *dev) return !dma_direct_all_ram_mapped(dev); } +/** + * dma_addressing_limited - return if the device is addressing limited + * @dev: device to check + * + * Return %true if the devices DMA mask is too small to address all memory in + * the system, else %false. Lack of addressing bits is the prime reason for + * bounce buffering, but might not be the only one. + */ bool dma_addressing_limited(struct device *dev) { if (!__dma_addressing_limited(dev)) From 9e8d1013b0c38910cbc9e60de74dbe883878469d Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 16 Apr 2025 18:01:25 +0200 Subject: [PATCH 0988/2924] net: selftests: initialize TCP header and skb payload with zero Zero-initialize TCP header via memset() to avoid garbage values that may affect checksum or behavior during test transmission. Also zero-fill allocated payload and padding regions using memset() after skb_put(), ensuring deterministic content for all outgoing test packets. Fixes: 3e1e58d64c3d ("net: add generic selftest support") Signed-off-by: Oleksij Rempel Cc: stable@vger.kernel.org Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250416160125.2914724-1-o.rempel@pengutronix.de Signed-off-by: Paolo Abeni --- net/core/selftests.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/net/core/selftests.c b/net/core/selftests.c index e99ae983fca9bb..35f807ea995235 100644 --- a/net/core/selftests.c +++ b/net/core/selftests.c @@ -100,10 +100,10 @@ static struct sk_buff *net_test_get_skb(struct net_device *ndev, ehdr->h_proto = htons(ETH_P_IP); if (attr->tcp) { + memset(thdr, 0, sizeof(*thdr)); thdr->source = htons(attr->sport); thdr->dest = htons(attr->dport); thdr->doff = sizeof(struct tcphdr) / 4; - thdr->check = 0; } else { uhdr->source = htons(attr->sport); uhdr->dest = htons(attr->dport); @@ -144,10 +144,18 @@ static struct sk_buff *net_test_get_skb(struct net_device *ndev, attr->id = net_test_next_id; shdr->id = net_test_next_id++; - if (attr->size) - skb_put(skb, attr->size); - if (attr->max_size && attr->max_size > skb->len) - skb_put(skb, attr->max_size - skb->len); + if (attr->size) { + void *payload = skb_put(skb, attr->size); + + memset(payload, 0, attr->size); + } + + if (attr->max_size && attr->max_size > skb->len) { + size_t pad_len = attr->max_size - skb->len; + void *pad = skb_put(skb, pad_len); + + memset(pad, 0, pad_len); + } skb->csum = 0; skb->ip_summed = CHECKSUM_PARTIAL; From c03a49f3093a4903c8a93c8b5c9a297b5343b169 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Wed, 16 Apr 2025 18:07:16 +0200 Subject: [PATCH 0989/2924] net: lwtunnel: disable BHs when required In lwtunnel_{output|xmit}(), dev_xmit_recursion() may be called in preemptible scope for PREEMPT kernels. This patch disables BHs before calling dev_xmit_recursion(). BHs are re-enabled only at the end, since we must ensure the same CPU is used for both dev_xmit_recursion_inc() and dev_xmit_recursion_dec() (and any other recursion levels in some cases) in order to maintain valid per-cpu counters. Reported-by: Alexei Starovoitov Closes: https://lore.kernel.org/netdev/CAADnVQJFWn3dBFJtY+ci6oN1pDFL=TzCmNbRgey7MdYxt_AP2g@mail.gmail.com/ Reported-by: Eduard Zingerman Closes: https://lore.kernel.org/netdev/m2h62qwf34.fsf@gmail.com/ Fixes: 986ffb3a57c5 ("net: lwtunnel: fix recursion loops") Signed-off-by: Justin Iurman Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250416160716.8823-1-justin.iurman@uliege.be Signed-off-by: Paolo Abeni --- net/core/lwtunnel.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index e39a459540ec02..60f27cb4e54f18 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -333,6 +333,8 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) struct dst_entry *dst; int ret; + local_bh_disable(); + if (dev_xmit_recursion()) { net_crit_ratelimited("%s(): recursion limit reached on datapath\n", __func__); @@ -348,8 +350,10 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) lwtstate = dst->lwtstate; if (lwtstate->type == LWTUNNEL_ENCAP_NONE || - lwtstate->type > LWTUNNEL_ENCAP_MAX) - return 0; + lwtstate->type > LWTUNNEL_ENCAP_MAX) { + ret = 0; + goto out; + } ret = -EOPNOTSUPP; rcu_read_lock(); @@ -364,11 +368,13 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (ret == -EOPNOTSUPP) goto drop; - return ret; + goto out; drop: kfree_skb(skb); +out: + local_bh_enable(); return ret; } EXPORT_SYMBOL_GPL(lwtunnel_output); @@ -380,6 +386,8 @@ int lwtunnel_xmit(struct sk_buff *skb) struct dst_entry *dst; int ret; + local_bh_disable(); + if (dev_xmit_recursion()) { net_crit_ratelimited("%s(): recursion limit reached on datapath\n", __func__); @@ -396,8 +404,10 @@ int lwtunnel_xmit(struct sk_buff *skb) lwtstate = dst->lwtstate; if (lwtstate->type == LWTUNNEL_ENCAP_NONE || - lwtstate->type > LWTUNNEL_ENCAP_MAX) - return 0; + lwtstate->type > LWTUNNEL_ENCAP_MAX) { + ret = 0; + goto out; + } ret = -EOPNOTSUPP; rcu_read_lock(); @@ -412,11 +422,13 @@ int lwtunnel_xmit(struct sk_buff *skb) if (ret == -EOPNOTSUPP) goto drop; - return ret; + goto out; drop: kfree_skb(skb); +out: + local_bh_enable(); return ret; } EXPORT_SYMBOL_GPL(lwtunnel_xmit); @@ -428,6 +440,8 @@ int lwtunnel_input(struct sk_buff *skb) struct dst_entry *dst; int ret; + DEBUG_NET_WARN_ON_ONCE(!in_softirq()); + if (dev_xmit_recursion()) { net_crit_ratelimited("%s(): recursion limit reached on datapath\n", __func__); From 460b14b0929fa9f658a7e159ef646ce456962ab0 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 18 Apr 2025 13:27:53 +0200 Subject: [PATCH 0990/2924] spi: stm32-ospi: Fix an error handling path in stm32_ospi_probe() If an error occurs after a successful stm32_ospi_dma_setup() call, some dma_release_channel() calls are needed to release some resources, as already done in the remove function. Fixes: 79b8a705e26c ("spi: stm32: Add OSPI driver") Signed-off-by: Christophe JAILLET Reviewed-by: Patrice Chotard Link: https://patch.msgid.link/2674c8df1d05ab312826b69bfe9559f81d125a0b.1744975624.git.christophe.jaillet@wanadoo.fr Signed-off-by: Mark Brown --- drivers/spi/spi-stm32-ospi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/spi/spi-stm32-ospi.c b/drivers/spi/spi-stm32-ospi.c index 668022098b1eac..9ec9823409cc3d 100644 --- a/drivers/spi/spi-stm32-ospi.c +++ b/drivers/spi/spi-stm32-ospi.c @@ -960,6 +960,10 @@ static int stm32_ospi_probe(struct platform_device *pdev) err_pm_enable: pm_runtime_force_suspend(ospi->dev); mutex_destroy(&ospi->lock); + if (ospi->dma_chtx) + dma_release_channel(ospi->dma_chtx); + if (ospi->dma_chrx) + dma_release_channel(ospi->dma_chrx); return ret; } From bd7c19331913b955a7823e6315ca16bbcc65aeff Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Tue, 22 Apr 2025 14:54:54 +0200 Subject: [PATCH 0991/2924] XFS: fix zoned gc threshold math for 32-bit arches xfs_zoned_need_gc makes use of mult_frac() to calculate the threshold for triggering the zoned garbage collector, but, turns out mult_frac() doesn't properly work with 64-bit data types and this caused build failures on some 32-bit architectures. Fix this by essentially open coding mult_frac() in a 64-bit friendly way. Notice we don't need to bother with counters underflow here because xfs_estimate_freecounter() will always return a positive value, as it leverages percpu_counter_read_positive to read such counters. Fixes: 845abeb1f06a ("xfs: add tunable threshold parameter for triggering zone GC") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202504181233.F7D9Atra-lkp@intel.com/ Signed-off-by: Carlos Maiolino Tested-by: Guenter Roeck Reviewed-by: Christoph Hellwig Reviewed-by: Hans Holmberg Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_zone_gc.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index 8c541ca718722d..81c94dd1d5966b 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -170,7 +170,8 @@ bool xfs_zoned_need_gc( struct xfs_mount *mp) { - s64 available, free; + s64 available, free, threshold; + s32 remainder; if (!xfs_group_marked(mp, XG_TYPE_RTG, XFS_RTG_RECLAIMABLE)) return false; @@ -183,7 +184,12 @@ xfs_zoned_need_gc( return true; free = xfs_estimate_freecounter(mp, XC_FREE_RTEXTENTS); - if (available < mult_frac(free, mp->m_zonegc_low_space, 100)) + + threshold = div_s64_rem(free, 100, &remainder); + threshold = threshold * mp->m_zonegc_low_space + + remainder * div_s64(mp->m_zonegc_low_space, 100); + + if (available < threshold) return true; return false; From f0447f80aec83f1699d599c94618bb5c323963e6 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Tue, 22 Apr 2025 11:50:07 +0000 Subject: [PATCH 0992/2924] xfs: remove duplicate Zoned Filesystems sections in admin-guide Remove the duplicated section and while at it, turn spaces into tabs. Signed-off-by: Hans Holmberg Reviewed-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Fixes: c7b67ddc3c99 ("xfs: document zoned rt specifics in admin-guide") Signed-off-by: Carlos Maiolino --- Documentation/admin-guide/xfs.rst | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/Documentation/admin-guide/xfs.rst b/Documentation/admin-guide/xfs.rst index 3e76276bd488b2..5becb441c3cba0 100644 --- a/Documentation/admin-guide/xfs.rst +++ b/Documentation/admin-guide/xfs.rst @@ -562,7 +562,7 @@ The interesting knobs for XFS workqueues are as follows: Zoned Filesystems ================= -For zoned file systems, the following attribute is exposed in: +For zoned file systems, the following attributes are exposed in: /sys/fs/xfs//zoned/ @@ -572,23 +572,10 @@ For zoned file systems, the following attribute is exposed in: is limited by the capabilities of the backing zoned device, file system size and the max_open_zones mount option. -Zoned Filesystems -================= - -For zoned file systems, the following attributes are exposed in: - - /sys/fs/xfs//zoned/ - - max_open_zones (Min: 1 Default: Varies Max: UINTMAX) - This read-only attribute exposes the maximum number of open zones - available for data placement. The value is determined at mount time and - is limited by the capabilities of the backing zoned device, file system - size and the max_open_zones mount option. - - zonegc_low_space (Min: 0 Default: 0 Max: 100) - Define a percentage for how much of the unused space that GC should keep - available for writing. A high value will reclaim more of the space - occupied by unused blocks, creating a larger buffer against write - bursts at the cost of increased write amplification. Regardless - of this value, garbage collection will always aim to free a minimum - amount of blocks to keep max_open_zones open for data placement purposes. + zonegc_low_space (Min: 0 Default: 0 Max: 100) + Define a percentage for how much of the unused space that GC should keep + available for writing. A high value will reclaim more of the space + occupied by unused blocks, creating a larger buffer against write + bursts at the cost of increased write amplification. Regardless + of this value, garbage collection will always aim to free a minimum + amount of blocks to keep max_open_zones open for data placement purposes. From 30d68cb0c37ebe2dc63aa1d46a28b9163e61caa2 Mon Sep 17 00:00:00 2001 From: Frederick Lawler Date: Thu, 27 Mar 2025 11:09:11 -0500 Subject: [PATCH 0993/2924] ima: process_measurement() needlessly takes inode_lock() on MAY_READ On IMA policy update, if a measure rule exists in the policy, IMA_MEASURE is set for ima_policy_flags which makes the violation_check variable always true. Coupled with a no-action on MAY_READ for a FILE_CHECK call, we're always taking the inode_lock(). This becomes a performance problem for extremely heavy read-only workloads. Therefore, prevent this only in the case there's no action to be taken. Signed-off-by: Frederick Lawler Acked-by: Roberto Sassu Signed-off-by: Mimi Zohar --- security/integrity/ima/ima_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index f3e7ac513db3f5..f99ab1a3b0f092 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -245,7 +245,9 @@ static int process_measurement(struct file *file, const struct cred *cred, &allowed_algos); violation_check = ((func == FILE_CHECK || func == MMAP_CHECK || func == MMAP_CHECK_REQPROT) && - (ima_policy_flag & IMA_MEASURE)); + (ima_policy_flag & IMA_MEASURE) && + ((action & IMA_MEASURE) || + (file->f_mode & FMODE_WRITE))); if (!action && !violation_check) return 0; From 89461db349cc00816c01d55507d511466b3b7151 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Mon, 21 Apr 2025 16:39:29 +0800 Subject: [PATCH 0994/2924] dma-coherent: Warn if OF reserved memory is beyond current coherent DMA mask When a reserved memory region described in the device tree is attached to a device, it is expected that the device's limitations are correctly included in that description. However, if the device driver failed to implement DMA address masking or addressing beyond the default 32 bits (on arm64), then bad things could happen because the DMA address was truncated, such as playing back audio with no actual audio coming out, or DMA overwriting random blocks of kernel memory. Check against the coherent DMA mask when the memory regions are attached to the device. Give a warning when the memory region can not be covered by the mask. A warning instead of a hard error was chosen, because it is possible that existing drivers could be working fine even if they forgot to extend the coherent DMA mask. Signed-off-by: Chen-Yu Tsai Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20250421083930.374173-1-wenst@chromium.org --- kernel/dma/coherent.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c index 3b2bdca9f1d4b0..77c8d9487a9ab1 100644 --- a/kernel/dma/coherent.c +++ b/kernel/dma/coherent.c @@ -336,16 +336,22 @@ static phys_addr_t dma_reserved_default_memory_size __initdata; static int rmem_dma_device_init(struct reserved_mem *rmem, struct device *dev) { - if (!rmem->priv) { - struct dma_coherent_mem *mem; + struct dma_coherent_mem *mem = rmem->priv; + if (!mem) { mem = dma_init_coherent_memory(rmem->base, rmem->base, rmem->size, true); if (IS_ERR(mem)) return PTR_ERR(mem); rmem->priv = mem; } - dma_assign_coherent_memory(dev, rmem->priv); + + /* Warn if the device potentially can't use the reserved memory */ + if (mem->device_base + rmem->size - 1 > + min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit)) + dev_warn(dev, "reserved memory is beyond device's set DMA address range\n"); + + dma_assign_coherent_memory(dev, mem); return 0; } From 4ea404fdbc39971814cd3eb36b43c11fb6f32e17 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 22 Apr 2025 16:43:29 +0100 Subject: [PATCH 0995/2924] lib: Ensure prime numbers tests are included in KUnit test runs When the select of PRIME_MUMBERS was removed from it's KUnit test Kconfig nothing was added to the KUnit configs, meaning that when run via the KUnit runner the tests are neither built nor run. Add PRIME_NUMBERS to all_tests.config so they are enabled when the KUnit runner builds the kernel. Fixes: 3f2925174f8b ("lib/prime_numbers: KUnit test should not select PRIME_NUMBERS") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20250422-lib-fix-prime-numbers-kunit-v1-1-4278c1d4a4ae@kernel.org Signed-off-by: Kees Cook --- tools/testing/kunit/configs/all_tests.config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config index cdd9782f9646ae..7bb885b0c32d40 100644 --- a/tools/testing/kunit/configs/all_tests.config +++ b/tools/testing/kunit/configs/all_tests.config @@ -43,6 +43,8 @@ CONFIG_REGMAP_BUILD=y CONFIG_AUDIT=y +CONFIG_PRIME_NUMBERS=y + CONFIG_SECURITY=y CONFIG_SECURITY_APPARMOR=y CONFIG_SECURITY_LANDLOCK=y From 7ffe3de53a885dbb5836541c2178bd07d1bad7df Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 17 Apr 2025 18:59:15 -0700 Subject: [PATCH 0996/2924] fs/buffer: split locking for pagecache lookups Callers of __find_get_block() may or may not allow for blocking semantics, and is currently assumed that it will not. Layout two paths based on this. The the private_lock scheme will continued to be used for atomic contexts. Otherwise take the folio lock instead, which protects the buffers, such as vs migration and try_to_free_buffers(). Per the "hack idea", the latter can alleviate contention on the private_lock for bdev mappings. For reasons of determinism and avoid making bugs hard to reproduce, the trylocking is not attempted. No change in semantics. All lookup users still take the spinlock. Reviewed-by: Jan Kara Signed-off-by: Davidlohr Bueso Link: https://kdevops.org/ext4/v6.15-rc2.html # [0] Link: https://lore.kernel.org/all/aAAEvcrmREWa1SKF@bombadil.infradead.org/ # [1] Link: https://lore.kernel.org/20250418015921.132400-2-dave@stgolabs.net Tested-by: kdevops@lists.linux.dev Reviewed-by: Luis Chamberlain Signed-off-by: Christian Brauner --- fs/buffer.c | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index c7abb4a029dc84..f8fcffdbe5d905 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -176,18 +176,8 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate) } EXPORT_SYMBOL(end_buffer_write_sync); -/* - * Various filesystems appear to want __find_get_block to be non-blocking. - * But it's the page lock which protects the buffers. To get around this, - * we get exclusion from try_to_free_buffers with the blockdev mapping's - * i_private_lock. - * - * Hack idea: for the blockdev mapping, i_private_lock contention - * may be quite high. This code could TryLock the page, and if that - * succeeds, there is no need to take i_private_lock. - */ static struct buffer_head * -__find_get_block_slow(struct block_device *bdev, sector_t block) +__find_get_block_slow(struct block_device *bdev, sector_t block, bool atomic) { struct address_space *bd_mapping = bdev->bd_mapping; const int blkbits = bd_mapping->host->i_blkbits; @@ -204,7 +194,16 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) if (IS_ERR(folio)) goto out; - spin_lock(&bd_mapping->i_private_lock); + /* + * Folio lock protects the buffers. Callers that cannot block + * will fallback to serializing vs try_to_free_buffers() via + * the i_private_lock. + */ + if (atomic) + spin_lock(&bd_mapping->i_private_lock); + else + folio_lock(folio); + head = folio_buffers(folio); if (!head) goto out_unlock; @@ -236,7 +235,10 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) 1 << blkbits); } out_unlock: - spin_unlock(&bd_mapping->i_private_lock); + if (atomic) + spin_unlock(&bd_mapping->i_private_lock); + else + folio_unlock(folio); folio_put(folio); out: return ret; @@ -1388,14 +1390,15 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size) * it in the LRU and mark it as accessed. If it is not present then return * NULL */ -struct buffer_head * -__find_get_block(struct block_device *bdev, sector_t block, unsigned size) +static struct buffer_head * +find_get_block_common(struct block_device *bdev, sector_t block, + unsigned size, bool atomic) { struct buffer_head *bh = lookup_bh_lru(bdev, block, size); if (bh == NULL) { /* __find_get_block_slow will mark the page accessed */ - bh = __find_get_block_slow(bdev, block); + bh = __find_get_block_slow(bdev, block, atomic); if (bh) bh_lru_install(bh); } else @@ -1403,6 +1406,12 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size) return bh; } + +struct buffer_head * +__find_get_block(struct block_device *bdev, sector_t block, unsigned size) +{ + return find_get_block_common(bdev, block, size, true); +} EXPORT_SYMBOL(__find_get_block); /** From 559a0d7bf1a6e5a5d0ad4ab4b0089145042e3109 Mon Sep 17 00:00:00 2001 From: Viacheslav Dubeyko Date: Thu, 17 Apr 2025 15:35:07 -0700 Subject: [PATCH 0997/2924] MAINTAINERS: add HFS/HFS+ maintainers Both the hfs and hfsplus filesystem have been orphaned since at least 2014, i.e., over 10 years. However, HFS/HFS+ driver needs to stay for Debian Ports as otherwise we won't be able to boot PowerMacs using GRUB because GRUB won't be usable anymore on PowerMacs with HFS/HFS+ being removed from the kernel. This patch proposes to add Viacheslav Dubeyko and John Paul Adrian Glaubitz as maintainers of HFS/HFS+ driver. Signed-off-by: Viacheslav Dubeyko Link: https://lore.kernel.org/20250417223507.1097186-1-slava@dubeyko.com Signed-off-by: Christian Brauner --- MAINTAINERS | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index fa1e04e87d1d63..b8d1e41c27f675 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10457,14 +10457,18 @@ S: Supported F: drivers/infiniband/hw/hfi1 HFS FILESYSTEM +M: Viacheslav Dubeyko +M: John Paul Adrian Glaubitz L: linux-fsdevel@vger.kernel.org -S: Orphan +S: Maintained F: Documentation/filesystems/hfs.rst F: fs/hfs/ HFSPLUS FILESYSTEM +M: Viacheslav Dubeyko +M: John Paul Adrian Glaubitz L: linux-fsdevel@vger.kernel.org -S: Orphan +S: Maintained F: Documentation/filesystems/hfsplus.rst F: fs/hfsplus/ From 2814a7d3d2ff5d2cdd22936f641f758fdb971fa0 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 17 Apr 2025 18:59:16 -0700 Subject: [PATCH 0998/2924] fs/buffer: introduce sleeping flavors for pagecache lookups Add __find_get_block_nonatomic() and sb_find_get_block_nonatomic() calls for which users will be converted where safe. These versions will take the folio lock instead of the mapping's private_lock. Reviewed-by: Jan Kara Signed-off-by: Davidlohr Bueso Link: https://kdevops.org/ext4/v6.15-rc2.html # [0] Link: https://lore.kernel.org/all/aAAEvcrmREWa1SKF@bombadil.infradead.org/ # [1] Link: https://lore.kernel.org/20250418015921.132400-3-dave@stgolabs.net Tested-by: kdevops@lists.linux.dev Reviewed-by: Luis Chamberlain Signed-off-by: Christian Brauner --- fs/buffer.c | 9 +++++++++ include/linux/buffer_head.h | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/fs/buffer.c b/fs/buffer.c index f8fcffdbe5d905..5b1d74c818e975 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1414,6 +1414,15 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size) } EXPORT_SYMBOL(__find_get_block); +/* same as __find_get_block() but allows sleeping contexts */ +struct buffer_head * +__find_get_block_nonatomic(struct block_device *bdev, sector_t block, + unsigned size) +{ + return find_get_block_common(bdev, block, size, false); +} +EXPORT_SYMBOL(__find_get_block_nonatomic); + /** * bdev_getblk - Get a buffer_head in a block device's buffer cache. * @bdev: The block device. diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index f0a4ad7839b62f..c791aa9a08daa0 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -222,6 +222,8 @@ void __wait_on_buffer(struct buffer_head *); wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, unsigned size); +struct buffer_head *__find_get_block_nonatomic(struct block_device *bdev, + sector_t block, unsigned size); struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp); void __brelse(struct buffer_head *); @@ -397,6 +399,12 @@ sb_find_get_block(struct super_block *sb, sector_t block) return __find_get_block(sb->s_bdev, block, sb->s_blocksize); } +static inline struct buffer_head * +sb_find_get_block_nonatomic(struct super_block *sb, sector_t block) +{ + return __find_get_block_nonatomic(sb->s_bdev, block, sb->s_blocksize); +} + static inline void map_bh(struct buffer_head *bh, struct super_block *sb, sector_t block) { From 5b67d43976828dea2394eae2556b369bb7a61f64 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 17 Apr 2025 18:59:17 -0700 Subject: [PATCH 0999/2924] fs/buffer: use sleeping version of __find_get_block() Convert to the new nonatomic flavor to benefit from potential performance benefits and adapt in the future vs migration such that semantics are kept. Convert write_boundary_block() which already takes the buffer lock as well as bdev_getblk() depending on the respective gpf flags. There are no changes in semantics. Suggested-by: Jan Kara Reviewed-by: Jan Kara Signed-off-by: Davidlohr Bueso Link: https://kdevops.org/ext4/v6.15-rc2.html # [0] Link: https://lore.kernel.org/all/aAAEvcrmREWa1SKF@bombadil.infradead.org/ # [1] Link: https://lore.kernel.org/20250418015921.132400-4-dave@stgolabs.net Tested-by: kdevops@lists.linux.dev # [0] [1] Reviewed-by: Luis Chamberlain Signed-off-by: Christian Brauner --- fs/buffer.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 5b1d74c818e975..f8c9e5eb468535 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -658,7 +658,9 @@ EXPORT_SYMBOL(generic_buffers_fsync); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize) { - struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize); + struct buffer_head *bh; + + bh = __find_get_block_nonatomic(bdev, bblock + 1, blocksize); if (bh) { if (buffer_dirty(bh)) write_dirty_buffer(bh, 0); @@ -1440,7 +1442,12 @@ EXPORT_SYMBOL(__find_get_block_nonatomic); struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp) { - struct buffer_head *bh = __find_get_block(bdev, block, size); + struct buffer_head *bh; + + if (gfpflags_allow_blocking(gfp)) + bh = __find_get_block_nonatomic(bdev, block, size); + else + bh = __find_get_block(bdev, block, size); might_alloc(gfp); if (bh) From a0b5ff07491010789fcb012bc8f9dad9d26f9a8b Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 17 Apr 2025 18:59:18 -0700 Subject: [PATCH 1000/2924] fs/ocfs2: use sleeping version of __find_get_block() This is a path that allows for blocking as it does IO. Convert to the new nonatomic flavor to benefit from potential performance benefits and adapt in the future vs migration such that semantics are kept. Suggested-by: Jan Kara Reviewed-by: Jan Kara Signed-off-by: Davidlohr Bueso Link: https://kdevops.org/ext4/v6.15-rc2.html # [0] Link: https://lore.kernel.org/all/aAAEvcrmREWa1SKF@bombadil.infradead.org/ # [1] Link: https://lore.kernel.org/20250418015921.132400-5-dave@stgolabs.net Tested-by: kdevops@lists.linux.dev Reviewed-by: Luis Chamberlain Signed-off-by: Christian Brauner --- fs/ocfs2/journal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index f1b4b3e611cb9b..c7a9729dc9d087 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1249,7 +1249,7 @@ static int ocfs2_force_read_journal(struct inode *inode) } for (i = 0; i < p_blocks; i++, p_blkno++) { - bh = __find_get_block(osb->sb->s_bdev, p_blkno, + bh = __find_get_block_nonatomic(osb->sb->s_bdev, p_blkno, osb->sb->s_blocksize); /* block not cached. */ if (!bh) From f76d4c28a46a9260d85e00dafc8f46d369365d33 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 17 Apr 2025 18:59:19 -0700 Subject: [PATCH 1001/2924] fs/jbd2: use sleeping version of __find_get_block() Convert to the new nonatomic flavor to benefit from potential performance benefits and adapt in the future vs migration such that semantics are kept. - jbd2_journal_revoke(): can sleep (has might_sleep() in the beginning) - jbd2_journal_cancel_revoke(): only used from do_get_write_access() and do_get_create_access() which do sleep. So can sleep. - jbd2_clear_buffer_revoked_flags() - only called from journal commit code which sleeps. So can sleep. Suggested-by: Jan Kara Reviewed-by: Jan Kara Signed-off-by: Davidlohr Bueso Link: https://kdevops.org/ext4/v6.15-rc2.html # [0] Link: https://lore.kernel.org/all/aAAEvcrmREWa1SKF@bombadil.infradead.org/ # [1] Link: https://lore.kernel.org/20250418015921.132400-6-dave@stgolabs.net Tested-by: kdevops@lists.linux.dev Reviewed-by: Luis Chamberlain Signed-off-by: Christian Brauner --- fs/jbd2/revoke.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 0cf0fddbee81c8..1467f6790747d8 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -345,7 +345,8 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr, bh = bh_in; if (!bh) { - bh = __find_get_block(bdev, blocknr, journal->j_blocksize); + bh = __find_get_block_nonatomic(bdev, blocknr, + journal->j_blocksize); if (bh) BUFFER_TRACE(bh, "found on hash"); } @@ -355,7 +356,8 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr, /* If there is a different buffer_head lying around in * memory anywhere... */ - bh2 = __find_get_block(bdev, blocknr, journal->j_blocksize); + bh2 = __find_get_block_nonatomic(bdev, blocknr, + journal->j_blocksize); if (bh2) { /* ... and it has RevokeValid status... */ if (bh2 != bh && buffer_revokevalid(bh2)) @@ -464,7 +466,8 @@ void jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) * state machine will get very upset later on. */ if (need_cancel) { struct buffer_head *bh2; - bh2 = __find_get_block(bh->b_bdev, bh->b_blocknr, bh->b_size); + bh2 = __find_get_block_nonatomic(bh->b_bdev, bh->b_blocknr, + bh->b_size); if (bh2) { if (bh2 != bh) clear_buffer_revoked(bh2); @@ -492,9 +495,9 @@ void jbd2_clear_buffer_revoked_flags(journal_t *journal) struct jbd2_revoke_record_s *record; struct buffer_head *bh; record = (struct jbd2_revoke_record_s *)list_entry; - bh = __find_get_block(journal->j_fs_dev, - record->blocknr, - journal->j_blocksize); + bh = __find_get_block_nonatomic(journal->j_fs_dev, + record->blocknr, + journal->j_blocksize); if (bh) { clear_buffer_revoked(bh); __brelse(bh); From 6e8f57fd09c9fb569d10b2ccc3878155b702591a Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 17 Apr 2025 18:59:20 -0700 Subject: [PATCH 1002/2924] fs/ext4: use sleeping version of sb_find_get_block() Enable ext4_free_blocks() to use it, which has a cond_resched to begin with. Convert to the new nonatomic flavor to benefit from potential performance benefits and adapt in the future vs migration such that semantics are kept. Suggested-by: Jan Kara Reviewed-by: Jan Kara Signed-off-by: Davidlohr Bueso Link: https://kdevops.org/ext4/v6.15-rc2.html # [0] Link: https://lore.kernel.org/all/aAAEvcrmREWa1SKF@bombadil.infradead.org/ # [1] Link: https://lore.kernel.org/20250418015921.132400-7-dave@stgolabs.net Tested-by: kdevops@lists.linux.dev Reviewed-by: Luis Chamberlain Signed-off-by: Christian Brauner --- fs/ext4/mballoc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index f88424c281943f..1e98c5be4e0ad5 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -6642,7 +6642,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, for (i = 0; i < count; i++) { cond_resched(); if (is_metadata) - bh = sb_find_get_block(inode->i_sb, block + i); + bh = sb_find_get_block_nonatomic(inode->i_sb, + block + i); ext4_forget(handle, is_metadata, inode, bh, block + i); } } From 2d900efff915fe24c3948d28eef9078953d87fec Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 17 Apr 2025 18:59:21 -0700 Subject: [PATCH 1003/2924] mm/migrate: fix sleep in atomic for large folios and buffer heads The large folio + buffer head noref migration scenarios are being naughty and blocking while holding a spinlock. As a consequence of the pagecache lookup path taking the folio lock this serializes against migration paths, so they can wait for each other. For the private_lock atomic case, a new BH_Migrate flag is introduced which enables the lookup to bail. This allows the critical region of the private_lock on the migration path to be reduced to the way it was before ebdf4de5642fb6 ("mm: migrate: fix reference check race between __find_get_block() and migration"), that is covering the count checks. The scope is always noref migration. Reported-by: kernel test robot Reported-by: syzbot+f3c6fda1297c748a7076@syzkaller.appspotmail.com Closes: https://lore.kernel.org/oe-lkp/202503101536.27099c77-lkp@intel.com Fixes: 3c20917120ce61 ("block/bdev: enable large folio support for large logical block sizes") Reviewed-by: Jan Kara Co-developed-by: Luis Chamberlain Signed-off-by: Davidlohr Bueso Link: https://kdevops.org/ext4/v6.15-rc2.html # [0] Link: https://lore.kernel.org/all/aAAEvcrmREWa1SKF@bombadil.infradead.org/ # [1] Link: https://lore.kernel.org/20250418015921.132400-8-dave@stgolabs.net Tested-by: kdevops@lists.linux.dev # [0] [1] Reviewed-by: Luis Chamberlain Signed-off-by: Christian Brauner --- fs/buffer.c | 12 +++++++++++- fs/ext4/ialloc.c | 3 ++- include/linux/buffer_head.h | 1 + mm/migrate.c | 8 +++++--- 4 files changed, 19 insertions(+), 5 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index f8c9e5eb468535..7be23ff20b2733 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -207,6 +207,15 @@ __find_get_block_slow(struct block_device *bdev, sector_t block, bool atomic) head = folio_buffers(folio); if (!head) goto out_unlock; + /* + * Upon a noref migration, the folio lock serializes here; + * otherwise bail. + */ + if (test_bit_acquire(BH_Migrate, &head->b_state)) { + WARN_ON(!atomic); + goto out_unlock; + } + bh = head; do { if (!buffer_mapped(bh)) @@ -1390,7 +1399,8 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size) /* * Perform a pagecache lookup for the matching buffer. If it's there, refresh * it in the LRU and mark it as accessed. If it is not present then return - * NULL + * NULL. Atomic context callers may also return NULL if the buffer is being + * migrated; similarly the page is not marked accessed either. */ static struct buffer_head * find_get_block_common(struct block_device *bdev, sector_t block, diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 38bc8d74f4cc23..e7ecc7c8a72969 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -691,7 +691,8 @@ static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino) if (!bh || !buffer_uptodate(bh)) /* * If the block is not in the buffer cache, then it - * must have been written out. + * must have been written out, or, most unlikely, is + * being migrated - false failure should be OK here. */ goto out; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index c791aa9a08daa0..0029ff880e2748 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -34,6 +34,7 @@ enum bh_state_bits { BH_Meta, /* Buffer contains metadata */ BH_Prio, /* Buffer should be submitted with REQ_PRIO */ BH_Defer_Completion, /* Defer AIO completion to workqueue */ + BH_Migrate, /* Buffer is being migrated (norefs) */ BH_PrivateStart,/* not a state bit, but the first bit available * for private allocation by other entities diff --git a/mm/migrate.c b/mm/migrate.c index f3ee6d8d5e2eaa..676d9cfc7059ca 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -845,9 +845,11 @@ static int __buffer_migrate_folio(struct address_space *mapping, return -EAGAIN; if (check_refs) { - bool busy; + bool busy, migrating; bool invalidated = false; + migrating = test_and_set_bit_lock(BH_Migrate, &head->b_state); + VM_WARN_ON_ONCE(migrating); recheck_buffers: busy = false; spin_lock(&mapping->i_private_lock); @@ -859,12 +861,12 @@ static int __buffer_migrate_folio(struct address_space *mapping, } bh = bh->b_this_page; } while (bh != head); + spin_unlock(&mapping->i_private_lock); if (busy) { if (invalidated) { rc = -EAGAIN; goto unlock_buffers; } - spin_unlock(&mapping->i_private_lock); invalidate_bh_lrus(); invalidated = true; goto recheck_buffers; @@ -883,7 +885,7 @@ static int __buffer_migrate_folio(struct address_space *mapping, unlock_buffers: if (check_refs) - spin_unlock(&mapping->i_private_lock); + clear_bit_unlock(BH_Migrate, &head->b_state); bh = head; do { unlock_buffer(bh); From d1f7256a5a525a44ac6a81d0a8ff931317b2acbf Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Fri, 18 Apr 2025 14:57:56 +0200 Subject: [PATCH 1004/2924] fs: fall back to file_ref_put() for non-last reference This reduces the slowdown in face of multiple callers issuing close on what turns out to not be the last reference. Signed-off-by: Mateusz Guzik Link: https://lore.kernel.org/20250418125756.59677-1-mjguzik@gmail.com Reviewed-by: Jan Kara Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202504171513.6d6f8a16-lkp@intel.com Signed-off-by: Christian Brauner --- fs/file.c | 2 +- include/linux/file_ref.h | 19 ++++++------------- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/fs/file.c b/fs/file.c index dc3f7e120e3e5d..3a3146664cf371 100644 --- a/fs/file.c +++ b/fs/file.c @@ -26,7 +26,7 @@ #include "internal.h" -bool __file_ref_put_badval(file_ref_t *ref, unsigned long cnt) +static noinline bool __file_ref_put_badval(file_ref_t *ref, unsigned long cnt) { /* * If the reference count was already in the dead zone, then this diff --git a/include/linux/file_ref.h b/include/linux/file_ref.h index 7db62fbc0500b0..31551e4cb8f34c 100644 --- a/include/linux/file_ref.h +++ b/include/linux/file_ref.h @@ -61,7 +61,6 @@ static inline void file_ref_init(file_ref_t *ref, unsigned long cnt) atomic_long_set(&ref->refcnt, cnt - 1); } -bool __file_ref_put_badval(file_ref_t *ref, unsigned long cnt); bool __file_ref_put(file_ref_t *ref, unsigned long cnt); /** @@ -178,20 +177,14 @@ static __always_inline __must_check bool file_ref_put(file_ref_t *ref) */ static __always_inline __must_check bool file_ref_put_close(file_ref_t *ref) { - long old, new; + long old; old = atomic_long_read(&ref->refcnt); - do { - if (unlikely(old < 0)) - return __file_ref_put_badval(ref, old); - - if (old == FILE_REF_ONEREF) - new = FILE_REF_DEAD; - else - new = old - 1; - } while (!atomic_long_try_cmpxchg(&ref->refcnt, &old, new)); - - return new == FILE_REF_DEAD; + if (likely(old == FILE_REF_ONEREF)) { + if (likely(atomic_long_try_cmpxchg(&ref->refcnt, &old, FILE_REF_DEAD))) + return true; + } + return file_ref_put(ref); } /** From 18853ba782bef65fc81ef2b3370382e5b479c5eb Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Tue, 22 Apr 2025 10:26:32 +0200 Subject: [PATCH 1005/2924] sched_ext: Track currently locked rq Some kfuncs provided by sched_ext may need to operate on a struct rq, but they can be invoked from various contexts, specifically, different scx callbacks. While some of these callbacks are invoked with a particular rq already locked, others are not. This makes it impossible for a kfunc to reliably determine whether it's safe to access a given rq, triggering potential bugs or unsafe behaviors, see for example [1]. To address this, track the currently locked rq whenever a sched_ext callback is invoked via SCX_CALL_OP*(). This allows kfuncs that need to operate on an arbitrary rq to retrieve the currently locked one and apply the appropriate action as needed. [1] https://lore.kernel.org/lkml/20250325140021.73570-1-arighi@nvidia.com/ Suggested-by: Tejun Heo Signed-off-by: Andrea Righi Acked-by: Changwoo Min Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 152 +++++++++++++++++++++++++--------------- kernel/sched/ext_idle.c | 2 +- 2 files changed, 95 insertions(+), 59 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index fdbf249d1c68ec..585bf6d8238b66 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -1118,8 +1118,38 @@ static void scx_kf_disallow(u32 mask) current->scx.kf_mask &= ~mask; } -#define SCX_CALL_OP(mask, op, args...) \ +/* + * Track the rq currently locked. + * + * This allows kfuncs to safely operate on rq from any scx ops callback, + * knowing which rq is already locked. + */ +static DEFINE_PER_CPU(struct rq *, locked_rq); + +static inline void update_locked_rq(struct rq *rq) +{ + /* + * Check whether @rq is actually locked. This can help expose bugs + * or incorrect assumptions about the context in which a kfunc or + * callback is executed. + */ + if (rq) + lockdep_assert_rq_held(rq); + __this_cpu_write(locked_rq, rq); +} + +/* + * Return the rq currently locked from an scx callback, or NULL if no rq is + * locked. + */ +static inline struct rq *scx_locked_rq(void) +{ + return __this_cpu_read(locked_rq); +} + +#define SCX_CALL_OP(mask, op, rq, args...) \ do { \ + update_locked_rq(rq); \ if (mask) { \ scx_kf_allow(mask); \ scx_ops.op(args); \ @@ -1127,11 +1157,14 @@ do { \ } else { \ scx_ops.op(args); \ } \ + update_locked_rq(NULL); \ } while (0) -#define SCX_CALL_OP_RET(mask, op, args...) \ +#define SCX_CALL_OP_RET(mask, op, rq, args...) \ ({ \ __typeof__(scx_ops.op(args)) __ret; \ + \ + update_locked_rq(rq); \ if (mask) { \ scx_kf_allow(mask); \ __ret = scx_ops.op(args); \ @@ -1139,6 +1172,7 @@ do { \ } else { \ __ret = scx_ops.op(args); \ } \ + update_locked_rq(NULL); \ __ret; \ }) @@ -1153,31 +1187,31 @@ do { \ * scx_kf_allowed_on_arg_tasks() to test whether the invocation is allowed on * the specific task. */ -#define SCX_CALL_OP_TASK(mask, op, task, args...) \ +#define SCX_CALL_OP_TASK(mask, op, rq, task, args...) \ do { \ BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL); \ current->scx.kf_tasks[0] = task; \ - SCX_CALL_OP(mask, op, task, ##args); \ + SCX_CALL_OP(mask, op, rq, task, ##args); \ current->scx.kf_tasks[0] = NULL; \ } while (0) -#define SCX_CALL_OP_TASK_RET(mask, op, task, args...) \ +#define SCX_CALL_OP_TASK_RET(mask, op, rq, task, args...) \ ({ \ __typeof__(scx_ops.op(task, ##args)) __ret; \ BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL); \ current->scx.kf_tasks[0] = task; \ - __ret = SCX_CALL_OP_RET(mask, op, task, ##args); \ + __ret = SCX_CALL_OP_RET(mask, op, rq, task, ##args); \ current->scx.kf_tasks[0] = NULL; \ __ret; \ }) -#define SCX_CALL_OP_2TASKS_RET(mask, op, task0, task1, args...) \ +#define SCX_CALL_OP_2TASKS_RET(mask, op, rq, task0, task1, args...) \ ({ \ __typeof__(scx_ops.op(task0, task1, ##args)) __ret; \ BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL); \ current->scx.kf_tasks[0] = task0; \ current->scx.kf_tasks[1] = task1; \ - __ret = SCX_CALL_OP_RET(mask, op, task0, task1, ##args); \ + __ret = SCX_CALL_OP_RET(mask, op, rq, task0, task1, ##args); \ current->scx.kf_tasks[0] = NULL; \ current->scx.kf_tasks[1] = NULL; \ __ret; \ @@ -2172,7 +2206,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags, WARN_ON_ONCE(*ddsp_taskp); *ddsp_taskp = p; - SCX_CALL_OP_TASK(SCX_KF_ENQUEUE, enqueue, p, enq_flags); + SCX_CALL_OP_TASK(SCX_KF_ENQUEUE, enqueue, rq, p, enq_flags); *ddsp_taskp = NULL; if (p->scx.ddsp_dsq_id != SCX_DSQ_INVALID) @@ -2269,7 +2303,7 @@ static void enqueue_task_scx(struct rq *rq, struct task_struct *p, int enq_flags add_nr_running(rq, 1); if (SCX_HAS_OP(runnable) && !task_on_rq_migrating(p)) - SCX_CALL_OP_TASK(SCX_KF_REST, runnable, p, enq_flags); + SCX_CALL_OP_TASK(SCX_KF_REST, runnable, rq, p, enq_flags); if (enq_flags & SCX_ENQ_WAKEUP) touch_core_sched(rq, p); @@ -2283,7 +2317,7 @@ static void enqueue_task_scx(struct rq *rq, struct task_struct *p, int enq_flags __scx_add_event(SCX_EV_SELECT_CPU_FALLBACK, 1); } -static void ops_dequeue(struct task_struct *p, u64 deq_flags) +static void ops_dequeue(struct rq *rq, struct task_struct *p, u64 deq_flags) { unsigned long opss; @@ -2304,7 +2338,7 @@ static void ops_dequeue(struct task_struct *p, u64 deq_flags) BUG(); case SCX_OPSS_QUEUED: if (SCX_HAS_OP(dequeue)) - SCX_CALL_OP_TASK(SCX_KF_REST, dequeue, p, deq_flags); + SCX_CALL_OP_TASK(SCX_KF_REST, dequeue, rq, p, deq_flags); if (atomic_long_try_cmpxchg(&p->scx.ops_state, &opss, SCX_OPSS_NONE)) @@ -2337,7 +2371,7 @@ static bool dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags return true; } - ops_dequeue(p, deq_flags); + ops_dequeue(rq, p, deq_flags); /* * A currently running task which is going off @rq first gets dequeued @@ -2353,11 +2387,11 @@ static bool dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags */ if (SCX_HAS_OP(stopping) && task_current(rq, p)) { update_curr_scx(rq); - SCX_CALL_OP_TASK(SCX_KF_REST, stopping, p, false); + SCX_CALL_OP_TASK(SCX_KF_REST, stopping, rq, p, false); } if (SCX_HAS_OP(quiescent) && !task_on_rq_migrating(p)) - SCX_CALL_OP_TASK(SCX_KF_REST, quiescent, p, deq_flags); + SCX_CALL_OP_TASK(SCX_KF_REST, quiescent, rq, p, deq_flags); if (deq_flags & SCX_DEQ_SLEEP) p->scx.flags |= SCX_TASK_DEQD_FOR_SLEEP; @@ -2377,7 +2411,7 @@ static void yield_task_scx(struct rq *rq) struct task_struct *p = rq->curr; if (SCX_HAS_OP(yield)) - SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, p, NULL); + SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, rq, p, NULL); else p->scx.slice = 0; } @@ -2387,7 +2421,7 @@ static bool yield_to_task_scx(struct rq *rq, struct task_struct *to) struct task_struct *from = rq->curr; if (SCX_HAS_OP(yield)) - return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, from, to); + return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, rq, from, to); else return false; } @@ -2945,7 +2979,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev) * emitted in switch_class(). */ if (SCX_HAS_OP(cpu_acquire)) - SCX_CALL_OP(SCX_KF_REST, cpu_acquire, cpu_of(rq), NULL); + SCX_CALL_OP(SCX_KF_REST, cpu_acquire, rq, cpu_of(rq), NULL); rq->scx.cpu_released = false; } @@ -2990,7 +3024,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev) do { dspc->nr_tasks = 0; - SCX_CALL_OP(SCX_KF_DISPATCH, dispatch, cpu_of(rq), + SCX_CALL_OP(SCX_KF_DISPATCH, dispatch, rq, cpu_of(rq), prev_on_scx ? prev : NULL); flush_dispatch_buf(rq); @@ -3104,7 +3138,7 @@ static void set_next_task_scx(struct rq *rq, struct task_struct *p, bool first) * Core-sched might decide to execute @p before it is * dispatched. Call ops_dequeue() to notify the BPF scheduler. */ - ops_dequeue(p, SCX_DEQ_CORE_SCHED_EXEC); + ops_dequeue(rq, p, SCX_DEQ_CORE_SCHED_EXEC); dispatch_dequeue(rq, p); } @@ -3112,7 +3146,7 @@ static void set_next_task_scx(struct rq *rq, struct task_struct *p, bool first) /* see dequeue_task_scx() on why we skip when !QUEUED */ if (SCX_HAS_OP(running) && (p->scx.flags & SCX_TASK_QUEUED)) - SCX_CALL_OP_TASK(SCX_KF_REST, running, p); + SCX_CALL_OP_TASK(SCX_KF_REST, running, rq, p); clr_task_runnable(p, true); @@ -3193,8 +3227,7 @@ static void switch_class(struct rq *rq, struct task_struct *next) .task = next, }; - SCX_CALL_OP(SCX_KF_CPU_RELEASE, - cpu_release, cpu_of(rq), &args); + SCX_CALL_OP(SCX_KF_CPU_RELEASE, cpu_release, rq, cpu_of(rq), &args); } rq->scx.cpu_released = true; } @@ -3207,7 +3240,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p, /* see dequeue_task_scx() on why we skip when !QUEUED */ if (SCX_HAS_OP(stopping) && (p->scx.flags & SCX_TASK_QUEUED)) - SCX_CALL_OP_TASK(SCX_KF_REST, stopping, p, true); + SCX_CALL_OP_TASK(SCX_KF_REST, stopping, rq, p, true); if (p->scx.flags & SCX_TASK_QUEUED) { set_task_runnable(rq, p); @@ -3348,7 +3381,7 @@ bool scx_prio_less(const struct task_struct *a, const struct task_struct *b, * verifier. */ if (SCX_HAS_OP(core_sched_before) && !scx_rq_bypassing(task_rq(a))) - return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, core_sched_before, + return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, core_sched_before, NULL, (struct task_struct *)a, (struct task_struct *)b); else @@ -3385,7 +3418,7 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag *ddsp_taskp = p; cpu = SCX_CALL_OP_TASK_RET(SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU, - select_cpu, p, prev_cpu, wake_flags); + select_cpu, NULL, p, prev_cpu, wake_flags); p->scx.selected_cpu = cpu; *ddsp_taskp = NULL; if (ops_cpu_valid(cpu, "from ops.select_cpu()")) @@ -3430,8 +3463,8 @@ static void set_cpus_allowed_scx(struct task_struct *p, * designation pointless. Cast it away when calling the operation. */ if (SCX_HAS_OP(set_cpumask)) - SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, p, - (struct cpumask *)p->cpus_ptr); + SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, NULL, + p, (struct cpumask *)p->cpus_ptr); } static void handle_hotplug(struct rq *rq, bool online) @@ -3444,9 +3477,9 @@ static void handle_hotplug(struct rq *rq, bool online) scx_idle_update_selcpu_topology(&scx_ops); if (online && SCX_HAS_OP(cpu_online)) - SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_online, cpu); + SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_online, rq, cpu); else if (!online && SCX_HAS_OP(cpu_offline)) - SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_offline, cpu); + SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_offline, rq, cpu); else scx_ops_exit(SCX_ECODE_ACT_RESTART | SCX_ECODE_RSN_HOTPLUG, "cpu %d going %s, exiting scheduler", cpu, @@ -3550,7 +3583,7 @@ static void task_tick_scx(struct rq *rq, struct task_struct *curr, int queued) curr->scx.slice = 0; touch_core_sched(rq, curr); } else if (SCX_HAS_OP(tick)) { - SCX_CALL_OP_TASK(SCX_KF_REST, tick, curr); + SCX_CALL_OP_TASK(SCX_KF_REST, tick, rq, curr); } if (!curr->scx.slice) @@ -3627,7 +3660,7 @@ static int scx_ops_init_task(struct task_struct *p, struct task_group *tg, bool .fork = fork, }; - ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init_task, p, &args); + ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init_task, NULL, p, &args); if (unlikely(ret)) { ret = ops_sanitize_err("init_task", ret); return ret; @@ -3668,9 +3701,10 @@ static int scx_ops_init_task(struct task_struct *p, struct task_group *tg, bool static void scx_ops_enable_task(struct task_struct *p) { + struct rq *rq = task_rq(p); u32 weight; - lockdep_assert_rq_held(task_rq(p)); + lockdep_assert_rq_held(rq); /* * Set the weight before calling ops.enable() so that the scheduler @@ -3684,20 +3718,22 @@ static void scx_ops_enable_task(struct task_struct *p) p->scx.weight = sched_weight_to_cgroup(weight); if (SCX_HAS_OP(enable)) - SCX_CALL_OP_TASK(SCX_KF_REST, enable, p); + SCX_CALL_OP_TASK(SCX_KF_REST, enable, rq, p); scx_set_task_state(p, SCX_TASK_ENABLED); if (SCX_HAS_OP(set_weight)) - SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, p, p->scx.weight); + SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, rq, p, p->scx.weight); } static void scx_ops_disable_task(struct task_struct *p) { - lockdep_assert_rq_held(task_rq(p)); + struct rq *rq = task_rq(p); + + lockdep_assert_rq_held(rq); WARN_ON_ONCE(scx_get_task_state(p) != SCX_TASK_ENABLED); if (SCX_HAS_OP(disable)) - SCX_CALL_OP_TASK(SCX_KF_REST, disable, p); + SCX_CALL_OP_TASK(SCX_KF_REST, disable, rq, p); scx_set_task_state(p, SCX_TASK_READY); } @@ -3726,7 +3762,7 @@ static void scx_ops_exit_task(struct task_struct *p) } if (SCX_HAS_OP(exit_task)) - SCX_CALL_OP_TASK(SCX_KF_REST, exit_task, p, &args); + SCX_CALL_OP_TASK(SCX_KF_REST, exit_task, task_rq(p), p, &args); scx_set_task_state(p, SCX_TASK_NONE); } @@ -3835,7 +3871,7 @@ static void reweight_task_scx(struct rq *rq, struct task_struct *p, p->scx.weight = sched_weight_to_cgroup(scale_load_down(lw->weight)); if (SCX_HAS_OP(set_weight)) - SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, p, p->scx.weight); + SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, rq, p, p->scx.weight); } static void prio_changed_scx(struct rq *rq, struct task_struct *p, int oldprio) @@ -3851,8 +3887,8 @@ static void switching_to_scx(struct rq *rq, struct task_struct *p) * different scheduler class. Keep the BPF scheduler up-to-date. */ if (SCX_HAS_OP(set_cpumask)) - SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, p, - (struct cpumask *)p->cpus_ptr); + SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, rq, + p, (struct cpumask *)p->cpus_ptr); } static void switched_from_scx(struct rq *rq, struct task_struct *p) @@ -3913,7 +3949,7 @@ int scx_tg_online(struct task_group *tg) struct scx_cgroup_init_args args = { .weight = tg->scx_weight }; - ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init, + ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init, NULL, tg->css.cgroup, &args); if (ret) ret = ops_sanitize_err("cgroup_init", ret); @@ -3935,7 +3971,7 @@ void scx_tg_offline(struct task_group *tg) percpu_down_read(&scx_cgroup_rwsem); if (SCX_HAS_OP(cgroup_exit) && (tg->scx_flags & SCX_TG_INITED)) - SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_exit, tg->css.cgroup); + SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_exit, NULL, tg->css.cgroup); tg->scx_flags &= ~(SCX_TG_ONLINE | SCX_TG_INITED); percpu_up_read(&scx_cgroup_rwsem); @@ -3968,7 +4004,7 @@ int scx_cgroup_can_attach(struct cgroup_taskset *tset) continue; if (SCX_HAS_OP(cgroup_prep_move)) { - ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_prep_move, + ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_prep_move, NULL, p, from, css->cgroup); if (ret) goto err; @@ -3982,8 +4018,8 @@ int scx_cgroup_can_attach(struct cgroup_taskset *tset) err: cgroup_taskset_for_each(p, css, tset) { if (SCX_HAS_OP(cgroup_cancel_move) && p->scx.cgrp_moving_from) - SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_cancel_move, p, - p->scx.cgrp_moving_from, css->cgroup); + SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_cancel_move, NULL, + p, p->scx.cgrp_moving_from, css->cgroup); p->scx.cgrp_moving_from = NULL; } @@ -4001,8 +4037,8 @@ void scx_cgroup_move_task(struct task_struct *p) * cgrp_moving_from set. */ if (SCX_HAS_OP(cgroup_move) && !WARN_ON_ONCE(!p->scx.cgrp_moving_from)) - SCX_CALL_OP_TASK(SCX_KF_UNLOCKED, cgroup_move, p, - p->scx.cgrp_moving_from, tg_cgrp(task_group(p))); + SCX_CALL_OP_TASK(SCX_KF_UNLOCKED, cgroup_move, NULL, + p, p->scx.cgrp_moving_from, tg_cgrp(task_group(p))); p->scx.cgrp_moving_from = NULL; } @@ -4021,8 +4057,8 @@ void scx_cgroup_cancel_attach(struct cgroup_taskset *tset) cgroup_taskset_for_each(p, css, tset) { if (SCX_HAS_OP(cgroup_cancel_move) && p->scx.cgrp_moving_from) - SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_cancel_move, p, - p->scx.cgrp_moving_from, css->cgroup); + SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_cancel_move, NULL, + p, p->scx.cgrp_moving_from, css->cgroup); p->scx.cgrp_moving_from = NULL; } out_unlock: @@ -4035,7 +4071,7 @@ void scx_group_set_weight(struct task_group *tg, unsigned long weight) if (scx_cgroup_enabled && tg->scx_weight != weight) { if (SCX_HAS_OP(cgroup_set_weight)) - SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_set_weight, + SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_set_weight, NULL, tg_cgrp(tg), weight); tg->scx_weight = weight; } @@ -4224,7 +4260,7 @@ static void scx_cgroup_exit(void) continue; rcu_read_unlock(); - SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_exit, css->cgroup); + SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_exit, NULL, css->cgroup); rcu_read_lock(); css_put(css); @@ -4261,7 +4297,7 @@ static int scx_cgroup_init(void) continue; rcu_read_unlock(); - ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init, + ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init, NULL, css->cgroup, &args); if (ret) { css_put(css); @@ -4758,7 +4794,7 @@ static void scx_ops_disable_workfn(struct kthread_work *work) } if (scx_ops.exit) - SCX_CALL_OP(SCX_KF_UNLOCKED, exit, ei); + SCX_CALL_OP(SCX_KF_UNLOCKED, exit, NULL, ei); cancel_delayed_work_sync(&scx_watchdog_work); @@ -4965,7 +5001,7 @@ static void scx_dump_task(struct seq_buf *s, struct scx_dump_ctx *dctx, if (SCX_HAS_OP(dump_task)) { ops_dump_init(s, " "); - SCX_CALL_OP(SCX_KF_REST, dump_task, dctx, p); + SCX_CALL_OP(SCX_KF_REST, dump_task, NULL, dctx, p); ops_dump_exit(); } @@ -5012,7 +5048,7 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len) if (SCX_HAS_OP(dump)) { ops_dump_init(&s, ""); - SCX_CALL_OP(SCX_KF_UNLOCKED, dump, &dctx); + SCX_CALL_OP(SCX_KF_UNLOCKED, dump, NULL, &dctx); ops_dump_exit(); } @@ -5069,7 +5105,7 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len) used = seq_buf_used(&ns); if (SCX_HAS_OP(dump_cpu)) { ops_dump_init(&ns, " "); - SCX_CALL_OP(SCX_KF_REST, dump_cpu, &dctx, cpu, idle); + SCX_CALL_OP(SCX_KF_REST, dump_cpu, NULL, &dctx, cpu, idle); ops_dump_exit(); } @@ -5328,7 +5364,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) scx_idle_enable(ops); if (scx_ops.init) { - ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init); + ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init, NULL); if (ret) { ret = ops_sanitize_err("init", ret); cpus_read_unlock(); diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c index cb343ca889e025..e67a19a071c114 100644 --- a/kernel/sched/ext_idle.c +++ b/kernel/sched/ext_idle.c @@ -674,7 +674,7 @@ void __scx_update_idle(struct rq *rq, bool idle, bool do_notify) * managed by put_prev_task_idle()/set_next_task_idle(). */ if (SCX_HAS_OP(update_idle) && do_notify && !scx_rq_bypassing(rq)) - SCX_CALL_OP(SCX_KF_REST, update_idle, cpu_of(rq), idle); + SCX_CALL_OP(SCX_KF_REST, update_idle, rq, cpu_of(rq), idle); /* * Update the idle masks: From a11d6784d7316a6c77ca9f14fb1a698ebbb3c1fb Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Tue, 22 Apr 2025 10:26:33 +0200 Subject: [PATCH 1006/2924] sched_ext: Fix missing rq lock in scx_bpf_cpuperf_set() scx_bpf_cpuperf_set() can be used to set a performance target level on any CPU. However, it doesn't correctly acquire the corresponding rq lock, which may lead to unsafe behavior and trigger the following warning, due to the lockdep_assert_rq_held() check: [ 51.713737] WARNING: CPU: 3 PID: 3899 at kernel/sched/sched.h:1512 scx_bpf_cpuperf_set+0x1a0/0x1e0 ... [ 51.713836] Call trace: [ 51.713837] scx_bpf_cpuperf_set+0x1a0/0x1e0 (P) [ 51.713839] bpf_prog_62d35beb9301601f_bpfland_init+0x168/0x440 [ 51.713841] bpf__sched_ext_ops_init+0x54/0x8c [ 51.713843] scx_ops_enable.constprop.0+0x2c0/0x10f0 [ 51.713845] bpf_scx_reg+0x18/0x30 [ 51.713847] bpf_struct_ops_link_create+0x154/0x1b0 [ 51.713849] __sys_bpf+0x1934/0x22a0 Fix by properly acquiring the rq lock when possible or raising an error if we try to operate on a CPU that is not the one currently locked. Fixes: d86adb4fc0655 ("sched_ext: Add cpuperf support") Signed-off-by: Andrea Righi Acked-by: Changwoo Min Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 585bf6d8238b66..ac79067dc87e65 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -7113,13 +7113,32 @@ __bpf_kfunc void scx_bpf_cpuperf_set(s32 cpu, u32 perf) } if (ops_cpu_valid(cpu, NULL)) { - struct rq *rq = cpu_rq(cpu); + struct rq *rq = cpu_rq(cpu), *locked_rq = scx_locked_rq(); + struct rq_flags rf; + + /* + * When called with an rq lock held, restrict the operation + * to the corresponding CPU to prevent ABBA deadlocks. + */ + if (locked_rq && rq != locked_rq) { + scx_ops_error("Invalid target CPU %d", cpu); + return; + } + + /* + * If no rq lock is held, allow to operate on any CPU by + * acquiring the corresponding rq lock. + */ + if (!locked_rq) { + rq_lock_irqsave(rq, &rf); + update_rq_clock(rq); + } rq->scx.cpuperf_target = perf; + cpufreq_update_util(rq, 0); - rcu_read_lock_sched_notrace(); - cpufreq_update_util(cpu_rq(cpu), 0); - rcu_read_unlock_sched_notrace(); + if (!locked_rq) + rq_unlock_irqrestore(rq, &rf); } } From 5cf3c602df88b471178a5717b17e529d09acad84 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 17 Apr 2025 10:23:15 -0400 Subject: [PATCH 1007/2924] drm/amdgpu: Use allowed_domains for pinning dmabufs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When determining the domains for pinning DMABufs, filter allowed_domains and fail with a warning if VRAM is forbidden and GTT is not an allowed domain. Fixes: f5e7fabd1f5c ("drm/amdgpu: allow pinning DMA-bufs into VRAM if all importers can do P2P") Suggested-by: Christian König Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 3940796a6eefa555fec688a4adee5659ef9fa431) --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 667080cc9ae1c1..0446586bd5a706 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -77,7 +77,7 @@ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach) { struct dma_buf *dmabuf = attach->dmabuf; struct amdgpu_bo *bo = gem_to_amdgpu_bo(dmabuf->priv); - u32 domains = bo->preferred_domains; + u32 domains = bo->allowed_domains; dma_resv_assert_held(dmabuf->resv); @@ -93,6 +93,9 @@ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach) if (domains & AMDGPU_GEM_DOMAIN_VRAM) bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + if (WARN_ON(!domains)) + return -EINVAL; + return amdgpu_bo_pin(bo, domains); } From 5e56935b519b2fbbca1cafa0cef3c7c3d062f62d Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 15 Apr 2025 23:58:28 -0400 Subject: [PATCH 1008/2924] drm/amdgpu: Don't pin VRAM without DMABUF_MOVE_NOTIFY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pinning of VRAM is for peer devices that don't support dynamic attachment and move notifiers. But it requires that all such peer devices are able to access VRAM via PCIe P2P. Any device without P2P access requires migration to GTT, which fails if the memory is already pinned for another peer device. Sharing between GPUs should not require pinning in VRAM. However, if DMABUF_MOVE_NOTIFY is disabled in the kernel build, even DMABufs shared between GPUs must be pinned, which can lead to failures and functional regressions on systems where some peer GPUs are not P2P accessible. Disable VRAM pinning if move notifiers are disabled in the kernel build to fix regressions when sharing BOs between GPUs. Signed-off-by: Felix Kuehling Tested-by: Hao (Claire) Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 05185812ae3695fe049c14847ce3cbeccff1bf2e) --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 0446586bd5a706..5740e8d1a5226e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -81,14 +81,21 @@ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach) dma_resv_assert_held(dmabuf->resv); - /* - * Try pinning into VRAM to allow P2P with RDMA NICs without ODP + /* Try pinning into VRAM to allow P2P with RDMA NICs without ODP * support if all attachments can do P2P. If any attachment can't do * P2P just pin into GTT instead. + * + * To avoid with conflicting pinnings between GPUs and RDMA when move + * notifiers are disabled, only allow pinning in VRAM when move + * notiers are enabled. */ - list_for_each_entry(attach, &dmabuf->attachments, node) - if (!attach->peer2peer) - domains &= ~AMDGPU_GEM_DOMAIN_VRAM; + if (!IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) { + domains &= ~AMDGPU_GEM_DOMAIN_VRAM; + } else { + list_for_each_entry(attach, &dmabuf->attachments, node) + if (!attach->peer2peer) + domains &= ~AMDGPU_GEM_DOMAIN_VRAM; + } if (domains & AMDGPU_GEM_DOMAIN_VRAM) bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; From 7eb287beeb60be1e4437be2b4e4e9f0da89aab97 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Tue, 1 Apr 2025 17:05:10 -0400 Subject: [PATCH 1009/2924] drm/amd/display: Fix gpu reset in multidisplay config [Why] The indexing of stream_status in dm_gpureset_commit_state() is incorrect. That leads to asserts in multi-display configuration after gpu reset. [How] Adjust the indexing logic to align stream_status with surface_updates. Fixes: cdaae8371aa9 ("drm/amd/display: Handle GPU reset for DC block") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3808 Reviewed-by: Aurabindo Pillai Reviewed-by: Mario Limonciello Signed-off-by: Roman Li Signed-off-by: Zaeem Mohamed Tested-by: Mark Broadworth Signed-off-by: Alex Deucher (cherry picked from commit d91bc901398741d317d9b55c59ca949d4bc7394b) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 9fed4471405f89..8f3a778df64651 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3355,16 +3355,16 @@ static void dm_gpureset_commit_state(struct dc_state *dc_state, for (k = 0; k < dc_state->stream_count; k++) { bundle->stream_update.stream = dc_state->streams[k]; - for (m = 0; m < dc_state->stream_status->plane_count; m++) { + for (m = 0; m < dc_state->stream_status[k].plane_count; m++) { bundle->surface_updates[m].surface = - dc_state->stream_status->plane_states[m]; + dc_state->stream_status[k].plane_states[m]; bundle->surface_updates[m].surface->force_full_update = true; } update_planes_and_stream_adapter(dm->dc, UPDATE_TYPE_FULL, - dc_state->stream_status->plane_count, + dc_state->stream_status[k].plane_count, dc_state->streams[k], &bundle->stream_update, bundle->surface_updates); From 67fe574651c73fe5cc176e35f28f2ec1ba498d14 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Wed, 26 Mar 2025 10:33:51 -0400 Subject: [PATCH 1010/2924] drm/amd/display: Force full update in gpu reset [Why] While system undergoing gpu reset always do full update to sync the dc state before and after reset. [How] Return true in should_reset_plane() if gpu reset detected Reviewed-by: Aurabindo Pillai Reviewed-by: Mario Limonciello Signed-off-by: Roman Li Signed-off-by: Zaeem Mohamed Tested-by: Mark Broadworth Signed-off-by: Alex Deucher (cherry picked from commit 2ba8619b9a378ad218ad6c2e2ccaee8f531e08de) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 8f3a778df64651..61ee530d78eafe 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -11043,6 +11043,9 @@ static bool should_reset_plane(struct drm_atomic_state *state, state->allow_modeset) return true; + if (amdgpu_in_reset(adev) && state->allow_modeset) + return true; + /* Exit early if we know that we're adding or removing the plane. */ if (old_plane_state->crtc != new_plane_state->crtc) return true; From 756c85e4d0ddc497b4ad5b1f41ad54e838e06188 Mon Sep 17 00:00:00 2001 From: Nicholas Susanto Date: Wed, 2 Apr 2025 15:04:08 -0400 Subject: [PATCH 1011/2924] drm/amd/display: Enable urgent latency adjustment on DCN35 [Why] Urgent latency adjustment was disabled on DCN35 due to issues with P0 enablement on some platforms. Without urgent latency, underflows occur when doing certain high timing configurations. After testing, we found that reenabling urgent latency didn't reintroduce p0 support on multiple platforms. [How] renable urgent latency on DCN35 and setting it to 3000 Mhz. This reverts commit 3412860cc4c0c484f53f91b371483e6e4440c3e5. Reviewed-by: Charlene Liu Signed-off-by: Nicholas Susanto Signed-off-by: Zaeem Mohamed Tested-by: Mark Broadworth Signed-off-by: Alex Deucher (cherry picked from commit cd74ce1f0cddffb3f36d0995d0f61e89f0010738) --- drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index 92f0a099d089ac..d9159ca5541249 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -195,9 +195,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .dcn_downspread_percent = 0.5, .gpuvm_min_page_size_bytes = 4096, .hostvm_min_page_size_bytes = 4096, - .do_urgent_latency_adjustment = 0, + .do_urgent_latency_adjustment = 1, .urgent_latency_adjustment_fabric_clock_component_us = 0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000, }; void dcn35_build_wm_range_table_fpu(struct clk_mgr *clk_mgr) From a92741e72f91b904c1d8c3d409ed8dbe9c1f2b26 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 16 Apr 2025 00:19:13 -0400 Subject: [PATCH 1012/2924] drm/amdgpu: Allow P2P access through XGMI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If peer memory is accessible through XGMI, allow leaving it in VRAM rather than forcing its migration to GTT on DMABuf attachment. Signed-off-by: Felix Kuehling Tested-by: Hao (Claire) Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 372c8d72c3680fdea3fbb2d6b089f76b4a6d596a) --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 30 ++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 5740e8d1a5226e..e6913fcf2c7be2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -43,6 +43,29 @@ #include #include +static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops; + +/** + * dma_buf_attach_adev - Helper to get adev of an attachment + * + * @attach: attachment + * + * Returns: + * A struct amdgpu_device * if the attaching device is an amdgpu device or + * partition, NULL otherwise. + */ +static struct amdgpu_device *dma_buf_attach_adev(struct dma_buf_attachment *attach) +{ + if (attach->importer_ops == &amdgpu_dma_buf_attach_ops) { + struct drm_gem_object *obj = attach->importer_priv; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + + return amdgpu_ttm_adev(bo->tbo.bdev); + } + + return NULL; +} + /** * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation * @@ -54,11 +77,13 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach) { + struct amdgpu_device *attach_adev = dma_buf_attach_adev(attach); struct drm_gem_object *obj = dmabuf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) + if (!amdgpu_dmabuf_is_xgmi_accessible(attach_adev, bo) && + pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) attach->peer2peer = false; amdgpu_vm_bo_update_shared(bo); @@ -480,6 +505,9 @@ bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev, struct drm_gem_object *obj = &bo->tbo.base; struct drm_gem_object *gobj; + if (!adev) + return false; + if (obj->import_attach) { struct dma_buf *dma_buf = obj->import_attach->dmabuf; From 870bea21fdf88f45c94c0a3dbb0e3cc1b219680f Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 4 Apr 2025 09:34:52 -0500 Subject: [PATCH 1013/2924] drm/amd/display: Fix ACPI edid parsing on some Lenovo systems [Why] The ACPI EDID in the BIOS of a Lenovo laptop includes 3 blocks, but dm_helpers_probe_acpi_edid() has a start that is 'char'. The 3rd block index starts after 255, so it can't be indexed properly. This leads to problems with the display when the EDID is parsed. [How] Change the variable type to 'short' so that larger values can be indexed. Cc: Renjith Pananchikkal Reported-by: Mark Pearson Suggested-by: David Ober Fixes: c6a837088bed ("drm/amd/display: Fetch the EDID from _DDC if available for eDP") Reviewed-by: Alex Hung Signed-off-by: Mario Limonciello Signed-off-by: Zaeem Mohamed Tested-by: Mark Broadworth Signed-off-by: Alex Deucher (cherry picked from commit a918bb4a90d423ced2976a794f2724c362c1f063) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 2cd35392e2da7e..1395a748d726c4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -918,7 +918,7 @@ dm_helpers_probe_acpi_edid(void *data, u8 *buf, unsigned int block, size_t len) { struct drm_connector *connector = data; struct acpi_device *acpidev = ACPI_COMPANION(connector->dev->dev); - unsigned char start = block * EDID_LENGTH; + unsigned short start = block * EDID_LENGTH; struct edid *edid; int r; From d59bddce49bfd323f1218bb6c3ad314e5c4e8f9d Mon Sep 17 00:00:00 2001 From: George Shen Date: Mon, 7 Apr 2025 12:35:57 -0400 Subject: [PATCH 1014/2924] drm/amd/display: Use 16ms AUX read interval for LTTPR with old sinks [Why/How] LTTPR are required to program DPCD 0000Eh to 0x4 (16ms) upon AUX read reply to this register. Since old Sinks witih DPCD rev 1.1 and earlier may not support this register, assume the mandatory value is programmed by the LTTPR to avoid AUX timeout issues. Reviewed-by: Wenjing Liu Signed-off-by: George Shen Signed-off-by: Zaeem Mohamed Tested-by: Mark Broadworth Signed-off-by: Alex Deucher (cherry picked from commit 1594b60d74959c0680ddf777a74963c98afcdd7e) --- .../link/protocols/link_dp_training_8b_10b.c | 54 ++++++++++++------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c index 34d2e097ca2e6b..5a5d48fadbf27b 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c @@ -35,6 +35,17 @@ #define DC_LOGGER \ link->ctx->logger +static void get_default_8b_10b_lttpr_aux_rd_interval( + union training_aux_rd_interval *training_rd_interval) +{ + /* LTTPR are required to program DPCD 0000Eh to 0x4 (16ms) upon AUX + * read reply to this register. Since old sinks with DPCD rev 1.1 + * and earlier may not support this register, assume the mandatory + * value is programmed by the LTTPR to avoid AUX timeout issues. + */ + training_rd_interval->raw = 0x4; +} + static int32_t get_cr_training_aux_rd_interval(struct dc_link *link, const struct dc_link_settings *link_settings, enum lttpr_mode lttpr_mode) @@ -43,17 +54,22 @@ static int32_t get_cr_training_aux_rd_interval(struct dc_link *link, uint32_t wait_in_micro_secs = 100; memset(&training_rd_interval, 0, sizeof(training_rd_interval)); - if (link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING && - link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) { - core_link_read_dpcd( - link, - DP_TRAINING_AUX_RD_INTERVAL, - (uint8_t *)&training_rd_interval, - sizeof(training_rd_interval)); - if (lttpr_mode != LTTPR_MODE_NON_TRANSPARENT) - wait_in_micro_secs = 400; - if (training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL) - wait_in_micro_secs = training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL * 4000; + if (link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING) { + if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) + core_link_read_dpcd( + link, + DP_TRAINING_AUX_RD_INTERVAL, + (uint8_t *)&training_rd_interval, + sizeof(training_rd_interval)); + else if (dp_is_lttpr_present(link)) + get_default_8b_10b_lttpr_aux_rd_interval(&training_rd_interval); + + if (training_rd_interval.raw != 0) { + if (lttpr_mode != LTTPR_MODE_NON_TRANSPARENT) + wait_in_micro_secs = 400; + if (training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL) + wait_in_micro_secs = training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL * 4000; + } } return wait_in_micro_secs; } @@ -71,13 +87,15 @@ static uint32_t get_eq_training_aux_rd_interval( DP_128B132B_TRAINING_AUX_RD_INTERVAL, (uint8_t *)&training_rd_interval, sizeof(training_rd_interval)); - } else if (link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING && - link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) { - core_link_read_dpcd( - link, - DP_TRAINING_AUX_RD_INTERVAL, - (uint8_t *)&training_rd_interval, - sizeof(training_rd_interval)); + } else if (link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING) { + if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) + core_link_read_dpcd( + link, + DP_TRAINING_AUX_RD_INTERVAL, + (uint8_t *)&training_rd_interval, + sizeof(training_rd_interval)); + else if (dp_is_lttpr_present(link)) + get_default_8b_10b_lttpr_aux_rd_interval(&training_rd_interval); } switch (training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL) { From 6ed0dc3fd39558f48119daf8f99f835deb7d68da Mon Sep 17 00:00:00 2001 From: Leo Li Date: Tue, 18 Mar 2025 18:05:05 -0400 Subject: [PATCH 1015/2924] drm/amd/display: Default IPS to RCG_IN_ACTIVE_IPS2_IN_OFF [Why] Recent findings show negligible power savings between IPS2 and RCG during static desktop. In fact, DCN related clocks are higher when IPS2 is enabled vs RCG. RCG_IN_ACTIVE is also the default policy for another OS supported by DC, and it has faster entry/exit. [How] Remove previous logic that checked for IPS2 support, and just default to `DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF`. Fixes: 199888aa25b3 ("drm/amd/display: Update IPS default mode for DCN35/DCN351") Reviewed-by: Aurabindo Pillai Signed-off-by: Leo Li Signed-off-by: Zaeem Mohamed Tested-by: Mark Broadworth Signed-off-by: Alex Deucher (cherry picked from commit 8f772d79ef39b463ead00ef6f009bebada3a9d49) Cc: stable@vger.kernel.org --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 61ee530d78eafe..5fe0b492156816 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1920,26 +1920,6 @@ static enum dmub_ips_disable_type dm_get_default_ips_mode( switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { case IP_VERSION(3, 5, 0): case IP_VERSION(3, 6, 0): - /* - * On DCN35 systems with Z8 enabled, it's possible for IPS2 + Z8 to - * cause a hard hang. A fix exists for newer PMFW. - * - * As a workaround, for non-fixed PMFW, force IPS1+RCG as the deepest - * IPS state in all cases, except for s0ix and all displays off (DPMS), - * where IPS2 is allowed. - * - * When checking pmfw version, use the major and minor only. - */ - if ((adev->pm.fw_version & 0x00FFFF00) < 0x005D6300) - ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; - else if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(11, 5, 0)) - /* - * Other ASICs with DCN35 that have residency issues with - * IPS2 in idle. - * We want them to use IPS2 only in display off cases. - */ - ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; - break; case IP_VERSION(3, 5, 1): ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; break; From b316727a27d0dac1e6b7ae51204df4d0f241fcc2 Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Wed, 2 Apr 2025 19:03:31 +0200 Subject: [PATCH 1016/2924] drm/amd/display: do not copy invalid CRTC timing info Since b255ce4388e0, it is possible that the CRTC timing information for the preferred mode has not yet been calculated while amdgpu_dm_connector_mode_valid() is running. In this case use the CRTC timing information of the actual mode. Fixes: b255ce4388e0 ("drm/amdgpu: don't change mode in amdgpu_dm_connector_mode_valid()") Closes: https://lore.kernel.org/all/ed09edb167e74167a694f4854102a3de6d2f1433.camel@irl.hu/ Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4085 Signed-off-by: Gergo Koteles Reviewed-by: Zaeem Mohamed Tested-by: Mark Broadworth Signed-off-by: Alex Deucher (cherry picked from commit 20232192a5044d1f66dcbef0a24de1bb8157738d) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5fe0b492156816..536f73131c2d15 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6501,12 +6501,12 @@ decide_crtc_timing_for_drm_display_mode(struct drm_display_mode *drm_mode, const struct drm_display_mode *native_mode, bool scale_enabled) { - if (scale_enabled) { - copy_crtc_timing_for_drm_display_mode(native_mode, drm_mode); - } else if (native_mode->clock == drm_mode->clock && - native_mode->htotal == drm_mode->htotal && - native_mode->vtotal == drm_mode->vtotal) { - copy_crtc_timing_for_drm_display_mode(native_mode, drm_mode); + if (scale_enabled || ( + native_mode->clock == drm_mode->clock && + native_mode->htotal == drm_mode->htotal && + native_mode->vtotal == drm_mode->vtotal)) { + if (native_mode->crtc_clock) + copy_crtc_timing_for_drm_display_mode(native_mode, drm_mode); } else { /* no scaling nor amdgpu inserted, no need to patch */ } From 4c8925cb9db158c812e1e11f3e74b945df7c9801 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 16 Apr 2025 17:16:01 +0100 Subject: [PATCH 1017/2924] net: phylink: fix suspend/resume with WoL enabled and link down When WoL is enabled, we update the software state in phylink to indicate that the link is down, and disable the resolver from bringing the link back up. On resume, we attempt to bring the overall state into consistency by calling the .mac_link_down() method, but this is wrong if the link was already down, as phylink strictly orders the .mac_link_up() and .mac_link_down() methods - and this would break that ordering. Fixes: f97493657c63 ("net: phylink: add suspend/resume support") Signed-off-by: Russell King (Oracle) Tested-by: Russell King (Oracle) Link: https://patch.msgid.link/E1u55Qf-0016RN-PA@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index b68369e2342b27..1bdd5d8bb5b021 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -81,6 +81,7 @@ struct phylink { unsigned int pcs_state; bool link_failed; + bool suspend_link_up; bool major_config_failed; bool mac_supports_eee_ops; bool mac_supports_eee; @@ -2545,14 +2546,16 @@ void phylink_suspend(struct phylink *pl, bool mac_wol) /* Stop the resolver bringing the link up */ __set_bit(PHYLINK_DISABLE_MAC_WOL, &pl->phylink_disable_state); - /* Disable the carrier, to prevent transmit timeouts, - * but one would hope all packets have been sent. This - * also means phylink_resolve() will do nothing. - */ - if (pl->netdev) - netif_carrier_off(pl->netdev); - else + pl->suspend_link_up = phylink_link_is_up(pl); + if (pl->suspend_link_up) { + /* Disable the carrier, to prevent transmit timeouts, + * but one would hope all packets have been sent. This + * also means phylink_resolve() will do nothing. + */ + if (pl->netdev) + netif_carrier_off(pl->netdev); pl->old_link_state = false; + } /* We do not call mac_link_down() here as we want the * link to remain up to receive the WoL packets. @@ -2603,15 +2606,18 @@ void phylink_resume(struct phylink *pl) if (test_bit(PHYLINK_DISABLE_MAC_WOL, &pl->phylink_disable_state)) { /* Wake-on-Lan enabled, MAC handling */ - /* Call mac_link_down() so we keep the overall state balanced. - * Do this under the state_mutex lock for consistency. This - * will cause a "Link Down" message to be printed during - * resume, which is harmless - the true link state will be - * printed when we run a resolve. - */ - mutex_lock(&pl->state_mutex); - phylink_link_down(pl); - mutex_unlock(&pl->state_mutex); + if (pl->suspend_link_up) { + /* Call mac_link_down() so we keep the overall state + * balanced. Do this under the state_mutex lock for + * consistency. This will cause a "Link Down" message + * to be printed during resume, which is harmless - + * the true link state will be printed when we run a + * resolve. + */ + mutex_lock(&pl->state_mutex); + phylink_link_down(pl); + mutex_unlock(&pl->state_mutex); + } /* Re-apply the link parameters so that all the settings get * restored to the MAC. From ce6815585d460c610e9881a5d347c0a34da287e4 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 16 Apr 2025 22:53:19 +0100 Subject: [PATCH 1018/2924] net: phylink: mac_link_(up|down)() clarifications As a result of an email from the fbnic author, I reviewed the phylink documentation, and I have decided to clarify the wording in the mac_link_(up|down)() kernel documentation as this was written from the point of view of mvneta/mvpp2 and is misleading. The documentation talks about forcing the link - indeed, this is what is done in the mvneta and mvpp2 drivers but not at the physical layer but the MACs idea, which has the effect of only allowing or stopping packet flow at the MAC. This "link" needs to be controlled when using a PHY or fixed link to start or stop packet flow at the MAC. However, as the MAC and PCS are tightly integrated, if the MACs idea of the link is forced down, it has the side effect that there is no way to determine that the media link has come up - in this mode, the MAC must be allowed to follow its built-in PCS so we can read the link state. Frame the documentation in more generic terms, to avoid the thought that the physical media link to the partner needs in some way to be forced up or down with these calls; it does not. If that were to be done, it would be a self-fulfilling prophecy - e.g. if the media link goes down, then mac_link_down() will be called, and if the media link is then placed into a forced down state, there is no possibility that the media link will ever come up again - clearly this is a wrong interpretation. These methods are notifications to the MAC about what has happened to the media link state - either from the PHY, or a PCS, or whatever mechanism fixed-link is using. Thus, reword them to get away from talking about changing link state to avoid confusion with media link state. This is not a change of any requirements of these methods. Also, remove the obsolete references to EEE for these methods, we now have the LPI functions for configuring the EEE parameters which renders this redundant, and also makes the passing of "phy" to the mac_link_up() function obsolete. Signed-off-by: Russell King (Oracle) Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/E1u5Ah5-001GO1-7E@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/phylink.h | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 1f5773ab566010..30659b615fca8c 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -361,23 +361,29 @@ int mac_finish(struct phylink_config *config, unsigned int mode, phy_interface_t iface); /** - * mac_link_down() - take the link down + * mac_link_down() - notification that the link has gone down * @config: a pointer to a &struct phylink_config. * @mode: link autonegotiation mode * @interface: link &typedef phy_interface_t mode * - * If @mode is not an in-band negotiation mode (as defined by - * phylink_autoneg_inband()), force the link down and disable any - * Energy Efficient Ethernet MAC configuration. Interface type - * selection must be done in mac_config(). + * Notifies the MAC that the link has gone down. This will not be called + * unless mac_link_up() has been previously called. + * + * The MAC should stop processing packets for transmission and reception. + * phylink will have called netif_carrier_off() to notify the networking + * stack that the link has gone down, so MAC drivers should not make this + * call. + * + * If @mode is %MLO_AN_INBAND, then this function must not prevent the + * link coming up. */ void mac_link_down(struct phylink_config *config, unsigned int mode, phy_interface_t interface); /** - * mac_link_up() - allow the link to come up + * mac_link_up() - notification that the link has come up * @config: a pointer to a &struct phylink_config. - * @phy: any attached phy + * @phy: any attached phy (deprecated - please use LPI interfaces) * @mode: link autonegotiation mode * @interface: link &typedef phy_interface_t mode * @speed: link speed @@ -385,7 +391,10 @@ void mac_link_down(struct phylink_config *config, unsigned int mode, * @tx_pause: link transmit pause enablement status * @rx_pause: link receive pause enablement status * - * Configure the MAC for an established link. + * Notifies the MAC that the link has come up, and the parameters of the + * link as seen from the MACs point of view. If mac_link_up() has been + * called previously, there will be an intervening call to mac_link_down() + * before this method will be subsequently called. * * @speed, @duplex, @tx_pause and @rx_pause indicate the finalised link * settings, and should be used to configure the MAC block appropriately @@ -397,9 +406,9 @@ void mac_link_down(struct phylink_config *config, unsigned int mode, * that the user wishes to override the pause settings, and this should * be allowed when considering the implementation of this method. * - * If in-band negotiation mode is disabled, allow the link to come up. If - * @phy is non-%NULL, configure Energy Efficient Ethernet by calling - * phy_init_eee() and perform appropriate MAC configuration for EEE. + * Once configured, the MAC may begin to process packets for transmission + * and reception. + * * Interface type selection must be done in mac_config(). */ void mac_link_up(struct phylink_config *config, struct phy_device *phy, From b7f0ee992adf601aa00c252418266177eb7ac2bc Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Thu, 17 Apr 2025 11:25:56 +0800 Subject: [PATCH 1019/2924] net: phy: leds: fix memory leak A network restart test on a router led to an out-of-memory condition, which was traced to a memory leak in the PHY LED trigger code. The root cause is misuse of the devm API. The registration function (phy_led_triggers_register) is called from phy_attach_direct, not phy_probe, and the unregister function (phy_led_triggers_unregister) is called from phy_detach, not phy_remove. This means the register and unregister functions can be called multiple times for the same PHY device, but devm-allocated memory is not freed until the driver is unbound. This also prevents kmemleak from detecting the leak, as the devm API internally stores the allocated pointer. Fix this by replacing devm_kzalloc/devm_kcalloc with standard kzalloc/kcalloc, and add the corresponding kfree calls in the unregister path. Fixes: 3928ee6485a3 ("net: phy: leds: Add support for "link" trigger") Fixes: 2e0bc452f472 ("net: phy: leds: add support for led triggers on phy link state change") Signed-off-by: Hao Guan Signed-off-by: Qingfang Deng Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250417032557.2929427-1-dqfext@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy_led_triggers.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/net/phy/phy_led_triggers.c b/drivers/net/phy/phy_led_triggers.c index bd3c9554f6acb5..60893691d4c357 100644 --- a/drivers/net/phy/phy_led_triggers.c +++ b/drivers/net/phy/phy_led_triggers.c @@ -93,9 +93,8 @@ int phy_led_triggers_register(struct phy_device *phy) if (!phy->phy_num_led_triggers) return 0; - phy->led_link_trigger = devm_kzalloc(&phy->mdio.dev, - sizeof(*phy->led_link_trigger), - GFP_KERNEL); + phy->led_link_trigger = kzalloc(sizeof(*phy->led_link_trigger), + GFP_KERNEL); if (!phy->led_link_trigger) { err = -ENOMEM; goto out_clear; @@ -105,10 +104,9 @@ int phy_led_triggers_register(struct phy_device *phy) if (err) goto out_free_link; - phy->phy_led_triggers = devm_kcalloc(&phy->mdio.dev, - phy->phy_num_led_triggers, - sizeof(struct phy_led_trigger), - GFP_KERNEL); + phy->phy_led_triggers = kcalloc(phy->phy_num_led_triggers, + sizeof(struct phy_led_trigger), + GFP_KERNEL); if (!phy->phy_led_triggers) { err = -ENOMEM; goto out_unreg_link; @@ -129,11 +127,11 @@ int phy_led_triggers_register(struct phy_device *phy) out_unreg: while (i--) phy_led_trigger_unregister(&phy->phy_led_triggers[i]); - devm_kfree(&phy->mdio.dev, phy->phy_led_triggers); + kfree(phy->phy_led_triggers); out_unreg_link: phy_led_trigger_unregister(phy->led_link_trigger); out_free_link: - devm_kfree(&phy->mdio.dev, phy->led_link_trigger); + kfree(phy->led_link_trigger); phy->led_link_trigger = NULL; out_clear: phy->phy_num_led_triggers = 0; @@ -147,8 +145,13 @@ void phy_led_triggers_unregister(struct phy_device *phy) for (i = 0; i < phy->phy_num_led_triggers; i++) phy_led_trigger_unregister(&phy->phy_led_triggers[i]); + kfree(phy->phy_led_triggers); + phy->phy_led_triggers = NULL; - if (phy->led_link_trigger) + if (phy->led_link_trigger) { phy_led_trigger_unregister(phy->led_link_trigger); + kfree(phy->led_link_trigger); + phy->led_link_trigger = NULL; + } } EXPORT_SYMBOL_GPL(phy_led_triggers_unregister); From 4bc12818b363bd30f0f7348dd9ab077290a637ae Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Thu, 17 Apr 2025 14:28:03 +0700 Subject: [PATCH 1020/2924] virtio-net: disable delayed refill when pausing rx When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call napi_disable() on the receive queue's napi. In delayed refill_work, it also calls napi_disable() on the receive queue's napi. When napi_disable() is called on an already disabled napi, it will sleep in napi_disable_locked while still holding the netdev_lock. As a result, later napi_enable gets stuck too as it cannot acquire the netdev_lock. This leads to refill_work and the pause-then-resume tx are stuck altogether. This scenario can be reproducible by binding a XDP socket to virtio-net interface without setting up the fill ring. As a result, try_fill_recv will fail until the fill ring is set up and refill_work is scheduled. This commit adds virtnet_rx_(pause/resume)_all helpers and fixes up the virtnet_rx_resume to disable future and cancel all inflights delayed refill_work before calling napi_disable() to pause the rx. Fixes: 413f0271f396 ("net: protect NAPI enablement with netdev_lock()") Acked-by: Michael S. Tsirkin Signed-off-by: Bui Quang Minh Acked-by: Jason Wang Link: https://patch.msgid.link/20250417072806.18660-2-minhquangbui99@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 69 +++++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 12 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 7e4617216a4bd5..848fab51dfa1b7 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3342,7 +3342,8 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) +static void __virtnet_rx_pause(struct virtnet_info *vi, + struct receive_queue *rq) { bool running = netif_running(vi->dev); @@ -3352,17 +3353,63 @@ static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) } } -static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) +static void virtnet_rx_pause_all(struct virtnet_info *vi) +{ + int i; + + /* + * Make sure refill_work does not run concurrently to + * avoid napi_disable race which leads to deadlock. + */ + disable_delayed_refill(vi); + cancel_delayed_work_sync(&vi->refill); + for (i = 0; i < vi->max_queue_pairs; i++) + __virtnet_rx_pause(vi, &vi->rq[i]); +} + +static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) +{ + /* + * Make sure refill_work does not run concurrently to + * avoid napi_disable race which leads to deadlock. + */ + disable_delayed_refill(vi); + cancel_delayed_work_sync(&vi->refill); + __virtnet_rx_pause(vi, rq); +} + +static void __virtnet_rx_resume(struct virtnet_info *vi, + struct receive_queue *rq, + bool refill) { bool running = netif_running(vi->dev); - if (!try_fill_recv(vi, rq, GFP_KERNEL)) + if (refill && !try_fill_recv(vi, rq, GFP_KERNEL)) schedule_delayed_work(&vi->refill, 0); if (running) virtnet_napi_enable(rq); } +static void virtnet_rx_resume_all(struct virtnet_info *vi) +{ + int i; + + enable_delayed_refill(vi); + for (i = 0; i < vi->max_queue_pairs; i++) { + if (i < vi->curr_queue_pairs) + __virtnet_rx_resume(vi, &vi->rq[i], true); + else + __virtnet_rx_resume(vi, &vi->rq[i], false); + } +} + +static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) +{ + enable_delayed_refill(vi); + __virtnet_rx_resume(vi, rq, true); +} + static int virtnet_rx_resize(struct virtnet_info *vi, struct receive_queue *rq, u32 ring_num) { @@ -5959,12 +6006,12 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, if (prog) bpf_prog_add(prog, vi->max_queue_pairs - 1); + virtnet_rx_pause_all(vi); + /* Make sure NAPI is not using any XDP TX queues for RX. */ if (netif_running(dev)) { - for (i = 0; i < vi->max_queue_pairs; i++) { - virtnet_napi_disable(&vi->rq[i]); + for (i = 0; i < vi->max_queue_pairs; i++) virtnet_napi_tx_disable(&vi->sq[i]); - } } if (!prog) { @@ -5996,13 +6043,12 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, vi->xdp_enabled = false; } + virtnet_rx_resume_all(vi); for (i = 0; i < vi->max_queue_pairs; i++) { if (old_prog) bpf_prog_put(old_prog); - if (netif_running(dev)) { - virtnet_napi_enable(&vi->rq[i]); + if (netif_running(dev)) virtnet_napi_tx_enable(&vi->sq[i]); - } } return 0; @@ -6014,11 +6060,10 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); } + virtnet_rx_resume_all(vi); if (netif_running(dev)) { - for (i = 0; i < vi->max_queue_pairs; i++) { - virtnet_napi_enable(&vi->rq[i]); + for (i = 0; i < vi->max_queue_pairs; i++) virtnet_napi_tx_enable(&vi->sq[i]); - } } if (prog) bpf_prog_sub(prog, vi->max_queue_pairs - 1); From 002ba346e3d76bb2b09448beed06c5ea1b0e06b8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 21 Apr 2025 11:31:31 +0800 Subject: [PATCH 1021/2924] crypto: scomp - Fix off-by-one bug when calculating last page Fix off-by-one bug in the last page calculation for src and dst. Reported-by: Nhat Pham Fixes: 2d3553ecb4e3 ("crypto: scomp - Remove support for some non-trivial SG lists") Signed-off-by: Herbert Xu --- crypto/scompress.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crypto/scompress.c b/crypto/scompress.c index 5762fcc63b5158..36934c78d1277d 100644 --- a/crypto/scompress.c +++ b/crypto/scompress.c @@ -215,8 +215,8 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir) spage = nth_page(spage, soff / PAGE_SIZE); soff = offset_in_page(soff); - n = slen / PAGE_SIZE; - n += (offset_in_page(slen) + soff - 1) / PAGE_SIZE; + n = (slen - 1) / PAGE_SIZE; + n += (offset_in_page(slen - 1) + soff) / PAGE_SIZE; if (PageHighMem(nth_page(spage, n)) && size_add(soff, slen) > PAGE_SIZE) break; @@ -243,9 +243,9 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir) dpage = nth_page(dpage, doff / PAGE_SIZE); doff = offset_in_page(doff); - n = dlen / PAGE_SIZE; - n += (offset_in_page(dlen) + doff - 1) / PAGE_SIZE; - if (PageHighMem(dpage + n) && + n = (dlen - 1) / PAGE_SIZE; + n += (offset_in_page(dlen - 1) + doff) / PAGE_SIZE; + if (PageHighMem(nth_page(dpage, n)) && size_add(doff, dlen) > PAGE_SIZE) break; dst = kmap_local_page(dpage) + doff; From 8006aff15516a170640239c5a8e6696c0ba18d8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Tue, 22 Apr 2025 11:57:18 +0200 Subject: [PATCH 1022/2924] crypto: atmel-sha204a - Set hwrng quality to lowest possible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the review by Bill Cox [1], the Atmel SHA204A random number generator produces random numbers with very low entropy. Set the lowest possible entropy for this chip just to be safe. [1] https://www.metzdowd.com/pipermail/cryptography/2014-December/023858.html Fixes: da001fb651b00e1d ("crypto: atmel-i2c - add support for SHA204A random number generator") Cc: Signed-off-by: Marek Behún Acked-by: Ard Biesheuvel Reviewed-by: Linus Walleij Signed-off-by: Herbert Xu --- drivers/crypto/atmel-sha204a.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c index 75bebec2c757bf..0fcf4a39de279d 100644 --- a/drivers/crypto/atmel-sha204a.c +++ b/drivers/crypto/atmel-sha204a.c @@ -163,6 +163,12 @@ static int atmel_sha204a_probe(struct i2c_client *client) i2c_priv->hwrng.name = dev_name(&client->dev); i2c_priv->hwrng.read = atmel_sha204a_rng_read; + /* + * According to review by Bill Cox [1], this HWRNG has very low entropy. + * [1] https://www.metzdowd.com/pipermail/cryptography/2014-December/023858.html + */ + i2c_priv->hwrng.quality = 1; + ret = devm_hwrng_register(&client->dev, &i2c_priv->hwrng); if (ret) dev_warn(&client->dev, "failed to register RNG (%d)\n", ret); From d63527e109e811ef11abb1c2985048fdb528b4cb Mon Sep 17 00:00:00 2001 From: Tung Nguyen Date: Thu, 17 Apr 2025 14:47:15 +0700 Subject: [PATCH 1023/2924] tipc: fix NULL pointer dereference in tipc_mon_reinit_self() syzbot reported: tipc: Node number set to 1055423674 Oops: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 3 UID: 0 PID: 6017 Comm: kworker/3:5 Not tainted 6.15.0-rc1-syzkaller-00246-g900241a5cc15 #0 PREEMPT(full) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 Workqueue: events tipc_net_finalize_work RIP: 0010:tipc_mon_reinit_self+0x11c/0x210 net/tipc/monitor.c:719 ... RSP: 0018:ffffc9000356fb68 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 000000003ee87cba RDX: 0000000000000000 RSI: ffffffff8dbc56a7 RDI: ffff88804c2cc010 RBP: dffffc0000000000 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000007 R13: fffffbfff2111097 R14: ffff88804ead8000 R15: ffff88804ead9010 FS: 0000000000000000(0000) GS:ffff888097ab9000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000f720eb00 CR3: 000000000e182000 CR4: 0000000000352ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: tipc_net_finalize+0x10b/0x180 net/tipc/net.c:140 process_one_work+0x9cc/0x1b70 kernel/workqueue.c:3238 process_scheduled_works kernel/workqueue.c:3319 [inline] worker_thread+0x6c8/0xf10 kernel/workqueue.c:3400 kthread+0x3c2/0x780 kernel/kthread.c:464 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:153 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245 ... RIP: 0010:tipc_mon_reinit_self+0x11c/0x210 net/tipc/monitor.c:719 ... RSP: 0018:ffffc9000356fb68 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 000000003ee87cba RDX: 0000000000000000 RSI: ffffffff8dbc56a7 RDI: ffff88804c2cc010 RBP: dffffc0000000000 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000007 R13: fffffbfff2111097 R14: ffff88804ead8000 R15: ffff88804ead9010 FS: 0000000000000000(0000) GS:ffff888097ab9000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000f720eb00 CR3: 000000000e182000 CR4: 0000000000352ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 There is a racing condition between workqueue created when enabling bearer and another thread created when disabling bearer right after that as follow: enabling_bearer | disabling_bearer --------------- | ---------------- tipc_disc_timeout() | { | bearer_disable() ... | { schedule_work(&tn->work); | tipc_mon_delete() ... | { } | ... | write_lock_bh(&mon->lock); | mon->self = NULL; | write_unlock_bh(&mon->lock); | ... | } tipc_net_finalize_work() | } { | ... | tipc_net_finalize() | { | ... | tipc_mon_reinit_self() | { | ... | write_lock_bh(&mon->lock); | mon->self->addr = tipc_own_addr(net); | write_unlock_bh(&mon->lock); | ... | } | ... | } | ... | } | 'mon->self' is set to NULL in disabling_bearer thread and dereferenced later in enabling_bearer thread. This commit fixes this issue by validating 'mon->self' before assigning node address to it. Reported-by: syzbot+ed60da8d686dc709164c@syzkaller.appspotmail.com Fixes: 46cb01eeeb86 ("tipc: update mon's self addr when node addr generated") Signed-off-by: Tung Nguyen Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250417074826.578115-1-tung.quang.nguyen@est.tech Signed-off-by: Jakub Kicinski --- net/tipc/monitor.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index e2f19627e43d52..b45c5b91bc7afb 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -716,7 +716,8 @@ void tipc_mon_reinit_self(struct net *net) if (!mon) continue; write_lock_bh(&mon->lock); - mon->self->addr = tipc_own_addr(net); + if (mon->self) + mon->self->addr = tipc_own_addr(net); write_unlock_bh(&mon->lock); } } From af5226abb40cae959f424f7ca614787a1c87ce48 Mon Sep 17 00:00:00 2001 From: Salah Triki Date: Wed, 16 Apr 2025 20:26:25 +0100 Subject: [PATCH 1024/2924] smb: server: smb2pdu: check return value of xa_store() xa_store() may fail so check its return value and return error code if error occurred. Signed-off-by: Salah Triki Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 57839f9708bb6c..372021b3f2632c 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1445,7 +1445,7 @@ static int ntlm_authenticate(struct ksmbd_work *work, { struct ksmbd_conn *conn = work->conn; struct ksmbd_session *sess = work->sess; - struct channel *chann = NULL; + struct channel *chann = NULL, *old; struct ksmbd_user *user; u64 prev_id; int sz, rc; @@ -1557,7 +1557,12 @@ static int ntlm_authenticate(struct ksmbd_work *work, return -ENOMEM; chann->conn = conn; - xa_store(&sess->ksmbd_chann_list, (long)conn, chann, KSMBD_DEFAULT_GFP); + old = xa_store(&sess->ksmbd_chann_list, (long)conn, chann, + KSMBD_DEFAULT_GFP); + if (xa_is_err(old)) { + kfree(chann); + return xa_err(old); + } } } From a1f46c99d9ea411f9bf30025b912d881d36fc709 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 17 Apr 2025 10:10:15 +0900 Subject: [PATCH 1025/2924] ksmbd: fix use-after-free in ksmbd_session_rpc_open A UAF issue can occur due to a race condition between ksmbd_session_rpc_open() and __session_rpc_close(). Add rpc_lock to the session to protect it. Cc: stable@vger.kernel.org Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/mgmt/user_session.c | 20 ++++++++++++++------ fs/smb/server/mgmt/user_session.h | 1 + 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c index 3f45f28f6f0f8e..9dec4c2940bc04 100644 --- a/fs/smb/server/mgmt/user_session.c +++ b/fs/smb/server/mgmt/user_session.c @@ -59,10 +59,12 @@ static void ksmbd_session_rpc_clear_list(struct ksmbd_session *sess) struct ksmbd_session_rpc *entry; long index; + down_write(&sess->rpc_lock); xa_for_each(&sess->rpc_handle_list, index, entry) { xa_erase(&sess->rpc_handle_list, index); __session_rpc_close(sess, entry); } + up_write(&sess->rpc_lock); xa_destroy(&sess->rpc_handle_list); } @@ -92,7 +94,7 @@ int ksmbd_session_rpc_open(struct ksmbd_session *sess, char *rpc_name) { struct ksmbd_session_rpc *entry, *old; struct ksmbd_rpc_command *resp; - int method; + int method, id; method = __rpc_method(rpc_name); if (!method) @@ -102,26 +104,29 @@ int ksmbd_session_rpc_open(struct ksmbd_session *sess, char *rpc_name) if (!entry) return -ENOMEM; + down_read(&sess->rpc_lock); entry->method = method; - entry->id = ksmbd_ipc_id_alloc(); - if (entry->id < 0) + entry->id = id = ksmbd_ipc_id_alloc(); + if (id < 0) goto free_entry; - old = xa_store(&sess->rpc_handle_list, entry->id, entry, KSMBD_DEFAULT_GFP); + old = xa_store(&sess->rpc_handle_list, id, entry, KSMBD_DEFAULT_GFP); if (xa_is_err(old)) goto free_id; - resp = ksmbd_rpc_open(sess, entry->id); + resp = ksmbd_rpc_open(sess, id); if (!resp) goto erase_xa; + up_read(&sess->rpc_lock); kvfree(resp); - return entry->id; + return id; erase_xa: xa_erase(&sess->rpc_handle_list, entry->id); free_id: ksmbd_rpc_id_free(entry->id); free_entry: kfree(entry); + up_read(&sess->rpc_lock); return -EINVAL; } @@ -129,9 +134,11 @@ void ksmbd_session_rpc_close(struct ksmbd_session *sess, int id) { struct ksmbd_session_rpc *entry; + down_write(&sess->rpc_lock); entry = xa_erase(&sess->rpc_handle_list, id); if (entry) __session_rpc_close(sess, entry); + up_write(&sess->rpc_lock); } int ksmbd_session_rpc_method(struct ksmbd_session *sess, int id) @@ -439,6 +446,7 @@ static struct ksmbd_session *__session_create(int protocol) sess->sequence_number = 1; rwlock_init(&sess->tree_conns_lock); atomic_set(&sess->refcnt, 2); + init_rwsem(&sess->rpc_lock); ret = __init_smb2_session(sess); if (ret) diff --git a/fs/smb/server/mgmt/user_session.h b/fs/smb/server/mgmt/user_session.h index f21348381d5984..c5749d6ec7151c 100644 --- a/fs/smb/server/mgmt/user_session.h +++ b/fs/smb/server/mgmt/user_session.h @@ -63,6 +63,7 @@ struct ksmbd_session { rwlock_t tree_conns_lock; atomic_t refcnt; + struct rw_semaphore rpc_lock; }; static inline int test_session_flag(struct ksmbd_session *sess, int bit) From 491ef1117c56476f199b481f8c68820fe4c3a7c2 Mon Sep 17 00:00:00 2001 From: Bo-Cun Chen Date: Thu, 17 Apr 2025 17:41:07 +0100 Subject: [PATCH 1026/2924] net: ethernet: mtk_eth_soc: net: revise NETSYSv3 hardware configuration Change hardware configuration for the NETSYSv3. - Enable PSE dummy page mechanism for the GDM1/2/3 - Enable PSE drop mechanism when the WDMA Rx ring full - Enable PSE no-drop mechanism for packets from the WDMA Tx - Correct PSE free drop threshold - Correct PSE CDMA high threshold Fixes: 1953f134a1a8b ("net: ethernet: mtk_eth_soc: add NETSYS_V3 version support") Signed-off-by: Bo-Cun Chen Signed-off-by: Daniel Golle Reviewed-by: Simon Horman Link: https://patch.msgid.link/b71f8fd9d4bb69c646c4d558f9331dd965068606.1744907886.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 24 +++++++++++++++++---- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 10 ++++++++- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index bdb98c9d8b1c19..47807b20231042 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -4043,11 +4043,27 @@ static int mtk_hw_init(struct mtk_eth *eth, bool reset) mtk_w32(eth, 0x21021000, MTK_FE_INT_GRP); if (mtk_is_netsys_v3_or_greater(eth)) { - /* PSE should not drop port1, port8 and port9 packets */ - mtk_w32(eth, 0x00000302, PSE_DROP_CFG); + /* PSE dummy page mechanism */ + mtk_w32(eth, PSE_DUMMY_WORK_GDM(1) | PSE_DUMMY_WORK_GDM(2) | + PSE_DUMMY_WORK_GDM(3) | DUMMY_PAGE_THR, PSE_DUMY_REQ); + + /* PSE free buffer drop threshold */ + mtk_w32(eth, 0x00600009, PSE_IQ_REV(8)); + + /* PSE should not drop port8, port9 and port13 packets from + * WDMA Tx + */ + mtk_w32(eth, 0x00002300, PSE_DROP_CFG); + + /* PSE should drop packets to port8, port9 and port13 on WDMA Rx + * ring full + */ + mtk_w32(eth, 0x00002300, PSE_PPE_DROP(0)); + mtk_w32(eth, 0x00002300, PSE_PPE_DROP(1)); + mtk_w32(eth, 0x00002300, PSE_PPE_DROP(2)); /* GDM and CDM Threshold */ - mtk_w32(eth, 0x00000707, MTK_CDMW0_THRES); + mtk_w32(eth, 0x08000707, MTK_CDMW0_THRES); mtk_w32(eth, 0x00000077, MTK_CDMW1_THRES); /* Disable GDM1 RX CRC stripping */ @@ -4064,7 +4080,7 @@ static int mtk_hw_init(struct mtk_eth *eth, bool reset) mtk_w32(eth, 0x00000300, PSE_DROP_CFG); /* PSE should drop packets to port 8/9 on WDMA Rx ring full */ - mtk_w32(eth, 0x00000300, PSE_PPE0_DROP); + mtk_w32(eth, 0x00000300, PSE_PPE_DROP(0)); /* PSE Free Queue Flow Control */ mtk_w32(eth, 0x01fa01f4, PSE_FQFC_CFG2); diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 39709649ea8d14..88ef2e9c50fc19 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -151,7 +151,15 @@ #define PSE_FQFC_CFG1 0x100 #define PSE_FQFC_CFG2 0x104 #define PSE_DROP_CFG 0x108 -#define PSE_PPE0_DROP 0x110 +#define PSE_PPE_DROP(x) (0x110 + ((x) * 0x4)) + +/* PSE Last FreeQ Page Request Control */ +#define PSE_DUMY_REQ 0x10C +/* PSE_DUMY_REQ is not a typo but actually called like that also in + * MediaTek's datasheet + */ +#define PSE_DUMMY_WORK_GDM(x) BIT(16 + (x)) +#define DUMMY_PAGE_THR 0x1 /* PSE Input Queue Reservation Register*/ #define PSE_IQ_REV(x) (0x140 + (((x) - 1) << 2)) From 0d039eac6e5950f9d1ecc9e410c2fd1feaeab3b6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 23 Apr 2025 02:30:34 +0100 Subject: [PATCH 1027/2924] fix a couple of races in MNT_TREE_BENEATH handling by do_move_mount() Normally do_lock_mount(path, _) is locking a mountpoint pinned by *path and at the time when matching unlock_mount() unlocks that location it is still pinned by the same thing. Unfortunately, for 'beneath' case it's no longer that simple - the object being locked is not the one *path points to. It's the mountpoint of path->mnt. The thing is, without sufficient locking ->mnt_parent may change under us and none of the locks are held at that point. The rules are * mount_lock stabilizes m->mnt_parent for any mount m. * namespace_sem stabilizes m->mnt_parent, provided that m is mounted. * if either of the above holds and refcount of m is positive, we are guaranteed the same for refcount of m->mnt_parent. namespace_sem nests inside inode_lock(), so do_lock_mount() has to take inode_lock() before grabbing namespace_sem. It does recheck that path->mnt is still mounted in the same place after getting namespace_sem, and it does take care to pin the dentry. It is needed, since otherwise we might end up with racing mount --move (or umount) happening while we were getting locks; in that case dentry would no longer be a mountpoint and could've been evicted on memory pressure along with its inode - not something you want when grabbing lock on that inode. However, pinning a dentry is not enough - the matching mount is also pinned only by the fact that path->mnt is mounted on top it and at that point we are not holding any locks whatsoever, so the same kind of races could end up with all references to that mount gone just as we are about to enter inode_lock(). If that happens, we are left with filesystem being shut down while we are holding a dentry reference on it; results are not pretty. What we need to do is grab both dentry and mount at the same time; that makes inode_lock() safe *and* avoids the problem with fs getting shut down under us. After taking namespace_sem we verify that path->mnt is still mounted (which stabilizes its ->mnt_parent) and check that it's still mounted at the same place. From that point on to the matching namespace_unlock() we are guaranteed that mount/dentry pair we'd grabbed are also pinned by being the mountpoint of path->mnt, so we can quietly drop both the dentry reference (as the current code does) and mnt one - it's OK to do under namespace_sem, since we are not dropping the final refs. That solves the problem on do_lock_mount() side; unlock_mount() also has one, since dentry is guaranteed to stay pinned only until the namespace_unlock(). That's easy to fix - just have inode_unlock() done earlier, while it's still pinned by mp->m_dentry. Fixes: 6ac392815628 "fs: allow to mount beneath top mount" # v6.5+ Signed-off-by: Al Viro Signed-off-by: Christian Brauner --- fs/namespace.c | 69 ++++++++++++++++++++++++++------------------------ 1 file changed, 36 insertions(+), 33 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index d9ca80dcc54450..98a5cd756e9ae4 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2826,56 +2826,62 @@ static struct mountpoint *do_lock_mount(struct path *path, bool beneath) struct vfsmount *mnt = path->mnt; struct dentry *dentry; struct mountpoint *mp = ERR_PTR(-ENOENT); + struct path under = {}; for (;;) { - struct mount *m; + struct mount *m = real_mount(mnt); if (beneath) { - m = real_mount(mnt); + path_put(&under); read_seqlock_excl(&mount_lock); - dentry = dget(m->mnt_mountpoint); + under.mnt = mntget(&m->mnt_parent->mnt); + under.dentry = dget(m->mnt_mountpoint); read_sequnlock_excl(&mount_lock); + dentry = under.dentry; } else { dentry = path->dentry; } inode_lock(dentry->d_inode); - if (unlikely(cant_mount(dentry))) { - inode_unlock(dentry->d_inode); - goto out; - } - namespace_lock(); - if (beneath && (!is_mounted(mnt) || m->mnt_mountpoint != dentry)) { + if (unlikely(cant_mount(dentry) || !is_mounted(mnt))) + break; // not to be mounted on + + if (beneath && unlikely(m->mnt_mountpoint != dentry || + &m->mnt_parent->mnt != under.mnt)) { namespace_unlock(); inode_unlock(dentry->d_inode); - goto out; + continue; // got moved } mnt = lookup_mnt(path); - if (likely(!mnt)) + if (unlikely(mnt)) { + namespace_unlock(); + inode_unlock(dentry->d_inode); + path_put(path); + path->mnt = mnt; + path->dentry = dget(mnt->mnt_root); + continue; // got overmounted + } + mp = get_mountpoint(dentry); + if (IS_ERR(mp)) break; - - namespace_unlock(); - inode_unlock(dentry->d_inode); - if (beneath) - dput(dentry); - path_put(path); - path->mnt = mnt; - path->dentry = dget(mnt->mnt_root); - } - - mp = get_mountpoint(dentry); - if (IS_ERR(mp)) { - namespace_unlock(); - inode_unlock(dentry->d_inode); + if (beneath) { + /* + * @under duplicates the references that will stay + * at least until namespace_unlock(), so the path_put() + * below is safe (and OK to do under namespace_lock - + * we are not dropping the final references here). + */ + path_put(&under); + } + return mp; } - -out: + namespace_unlock(); + inode_unlock(dentry->d_inode); if (beneath) - dput(dentry); - + path_put(&under); return mp; } @@ -2886,14 +2892,11 @@ static inline struct mountpoint *lock_mount(struct path *path) static void unlock_mount(struct mountpoint *where) { - struct dentry *dentry = where->m_dentry; - + inode_unlock(where->m_dentry->d_inode); read_seqlock_excl(&mount_lock); put_mountpoint(where); read_sequnlock_excl(&mount_lock); - namespace_unlock(); - inode_unlock(dentry->d_inode); } static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp) From be3f1938d3e6ea8186f0de3dd95245dda4f22c1e Mon Sep 17 00:00:00 2001 From: Dave Chen Date: Tue, 15 Apr 2025 14:33:42 +0800 Subject: [PATCH 1028/2924] btrfs: fix COW handling in run_delalloc_nocow() In run_delalloc_nocow(), when the found btrfs_key's offset > cur_offset, it indicates a gap between the current processing region and the next file extent. The original code would directly jump to the "must_cow" label, which increments the slot and forces a fallback to COW. This behavior might skip an extent item and result in an overestimated COW fallback range. This patch modifies the logic so that when a gap is detected: - If no COW range is already being recorded (cow_start is unset), cow_start is set to cur_offset. - cur_offset is then advanced to the beginning of the next extent. - Instead of jumping to "must_cow", control flows directly to "next_slot" so that the same extent item can be reexamined properly. The change ensures that we accurately account for the extent gap and avoid accidentally extending the range that needs to fallback to COW. CC: stable@vger.kernel.org # 6.6+ Reviewed-by: Filipe Manana Signed-off-by: Dave Chen Signed-off-by: David Sterba --- fs/btrfs/inode.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6eedfbfce1cba3..312fa996a9871c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2129,12 +2129,13 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode, /* * If the found extent starts after requested offset, then - * adjust extent_end to be right before this extent begins + * adjust cur_offset to be right before this extent begins. */ if (found_key.offset > cur_offset) { - extent_end = found_key.offset; - extent_type = 0; - goto must_cow; + if (cow_start == (u64)-1) + cow_start = cur_offset; + cur_offset = found_key.offset; + goto next_slot; } /* From 48c1d1bb525b1c44b8bdc8e7ec5629cb6c2b9fc4 Mon Sep 17 00:00:00 2001 From: Penglei Jiang Date: Mon, 21 Apr 2025 08:40:29 -0700 Subject: [PATCH 1029/2924] btrfs: fix the inode leak in btrfs_iget() [BUG] There is a bug report that a syzbot reproducer can lead to the following busy inode at unmount time: BTRFS info (device loop1): last unmount of filesystem 1680000e-3c1e-4c46-84b6-56bd3909af50 VFS: Busy inodes after unmount of loop1 (btrfs) ------------[ cut here ]------------ kernel BUG at fs/super.c:650! Oops: invalid opcode: 0000 [#1] SMP KASAN NOPTI CPU: 0 UID: 0 PID: 48168 Comm: syz-executor Not tainted 6.15.0-rc2-00471-g119009db2674 #2 PREEMPT(full) Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 RIP: 0010:generic_shutdown_super+0x2e9/0x390 fs/super.c:650 Call Trace: kill_anon_super+0x3a/0x60 fs/super.c:1237 btrfs_kill_super+0x3b/0x50 fs/btrfs/super.c:2099 deactivate_locked_super+0xbe/0x1a0 fs/super.c:473 deactivate_super fs/super.c:506 [inline] deactivate_super+0xe2/0x100 fs/super.c:502 cleanup_mnt+0x21f/0x440 fs/namespace.c:1435 task_work_run+0x14d/0x240 kernel/task_work.c:227 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] exit_to_user_mode_loop kernel/entry/common.c:114 [inline] exit_to_user_mode_prepare include/linux/entry-common.h:329 [inline] __syscall_exit_to_user_mode_work kernel/entry/common.c:207 [inline] syscall_exit_to_user_mode+0x269/0x290 kernel/entry/common.c:218 do_syscall_64+0xd4/0x250 arch/x86/entry/syscall_64.c:100 entry_SYSCALL_64_after_hwframe+0x77/0x7f [CAUSE] When btrfs_alloc_path() failed, btrfs_iget() directly returned without releasing the inode already allocated by btrfs_iget_locked(). This results the above busy inode and trigger the kernel BUG. [FIX] Fix it by calling iget_failed() if btrfs_alloc_path() failed. If we hit error inside btrfs_read_locked_inode(), it will properly call iget_failed(), so nothing to worry about. Although the iget_failed() cleanup inside btrfs_read_locked_inode() is a break of the normal error handling scheme, let's fix the obvious bug and backport first, then rework the error handling later. Reported-by: Penglei Jiang Link: https://lore.kernel.org/linux-btrfs/20250421102425.44431-1-superman.xpt@gmail.com/ Fixes: 7c855e16ab72 ("btrfs: remove conditional path allocation in btrfs_read_locked_inode()") CC: stable@vger.kernel.org # 6.13+ Reviewed-by: Qu Wenruo Signed-off-by: Penglei Jiang Signed-off-by: David Sterba --- fs/btrfs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 312fa996a9871c..d295a37fa0491f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5682,8 +5682,10 @@ struct btrfs_inode *btrfs_iget(u64 ino, struct btrfs_root *root) return inode; path = btrfs_alloc_path(); - if (!path) + if (!path) { + iget_failed(&inode->vfs_inode); return ERR_PTR(-ENOMEM); + } ret = btrfs_read_locked_inode(inode, path); btrfs_free_path(path); From e08e49d986f82c30f42ad0ed43ebbede1e1e3739 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 14 Apr 2025 14:51:58 -0400 Subject: [PATCH 1030/2924] btrfs: adjust subpage bit start based on sectorsize When running machines with 64k page size and a 16k nodesize we started seeing tree log corruption in production. This turned out to be because we were not writing out dirty blocks sometimes, so this in fact affects all metadata writes. When writing out a subpage EB we scan the subpage bitmap for a dirty range. If the range isn't dirty we do bit_start++; to move onto the next bit. The problem is the bitmap is based on the number of sectors that an EB has. So in this case, we have a 64k pagesize, 16k nodesize, but a 4k sectorsize. This means our bitmap is 4 bits for every node. With a 64k page size we end up with 4 nodes per page. To make this easier this is how everything looks [0 16k 32k 48k ] logical address [0 4 8 12 ] radix tree offset [ 64k page ] folio [ 16k eb ][ 16k eb ][ 16k eb ][ 16k eb ] extent buffers [ | | | | | | | | | | | | | | | | ] bitmap Now we use all of our addressing based on fs_info->sectorsize_bits, so as you can see the above our 16k eb->start turns into radix entry 4. When we find a dirty range for our eb, we correctly do bit_start += sectors_per_node, because if we start at bit 0, the next bit for the next eb is 4, to correspond to eb->start 16k. However if our range is clean, we will do bit_start++, which will now put us offset from our radix tree entries. In our case, assume that the first time we check the bitmap the block is not dirty, we increment bit_start so now it == 1, and then we loop around and check again. This time it is dirty, and we go to find that start using the following equation start = folio_start + bit_start * fs_info->sectorsize; so in the case above, eb->start 0 is now dirty, and we calculate start as 0 + 1 * fs_info->sectorsize = 4096 4096 >> 12 = 1 Now we're looking up the radix tree for 1, and we won't find an eb. What's worse is now we're using bit_start == 1, so we do bit_start += sectors_per_node, which is now 5. If that eb is dirty we will run into the same thing, we will look at an offset that is not populated in the radix tree, and now we're skipping the writeout of dirty extent buffers. The best fix for this is to not use sectorsize_bits to address nodes, but that's a larger change. Since this is a fs corruption problem fix it simply by always using sectors_per_node to increment the start bit. Fixes: c4aec299fa8f ("btrfs: introduce submit_eb_subpage() to submit a subpage metadata page") CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Boris Burkov Reviewed-by: Qu Wenruo Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 197f5e51c47446..8515c31f563b8f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2047,7 +2047,7 @@ static int submit_eb_subpage(struct folio *folio, struct writeback_control *wbc) subpage->bitmaps)) { spin_unlock_irqrestore(&subpage->lock, flags); spin_unlock(&folio->mapping->i_private_lock); - bit_start++; + bit_start += sectors_per_node; continue; } From 0db61388b389f43c1ba2f1cee3613feb4fd12150 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 22 Apr 2025 15:33:18 -0700 Subject: [PATCH 1031/2924] perf/core: Change to POLLERR for pinned events with error Commit: f4b07fd62d4d11d5 ("perf/core: Use POLLHUP for pinned events in error") started to emit POLLHUP for pinned events in an error state. But the POLLHUP is also used to signal events that the attached task is terminated. To distinguish pinned per-task events in the error state it would need to check if the task is live. Change it to POLLERR to make it clear. Suggested-by: Gabriel Marin Signed-off-by: Namhyung Kim Signed-off-by: Ingo Molnar Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Kan Liang Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250422223318.180343-1-namhyung@kernel.org --- kernel/events/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index e93c195659140d..95e703891b24f8 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3943,7 +3943,7 @@ static int merge_sched_in(struct perf_event *event, void *data) perf_event_set_state(event, PERF_EVENT_STATE_ERROR); if (*perf_event_fasync(event)) - event->pending_kill = POLL_HUP; + event->pending_kill = POLL_ERR; perf_event_wakeup(event); } else { @@ -6075,7 +6075,7 @@ static __poll_t perf_poll(struct file *file, poll_table *wait) if (unlikely(READ_ONCE(event->state) == PERF_EVENT_STATE_ERROR && event->attr.pinned)) - return events; + return EPOLLERR; /* * Pin the event->rb by taking event->mmap_mutex; otherwise From 34024cf69c51b11c2b608f5d578626b9b1c484f5 Mon Sep 17 00:00:00 2001 From: Hao Chang Date: Tue, 22 Apr 2025 15:52:05 +0800 Subject: [PATCH 1032/2924] pinctrl: mediatek: Fix new design debounce issue Calculate the true offset of eint according to index. Fixes: 3ef9f710efcb ("pinctrl: mediatek: Add EINT support for multiple addresses") Signed-off-by: Hao Chang Signed-off-by: Qingliang Li Link: https://lore.kernel.org/20250422075216.14073-1-ot_chhao.chang@mediatek.com Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/mtk-eint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/mediatek/mtk-eint.c b/drivers/pinctrl/mediatek/mtk-eint.c index ced4ee509b5b6d..b4eb2beab691cf 100644 --- a/drivers/pinctrl/mediatek/mtk-eint.c +++ b/drivers/pinctrl/mediatek/mtk-eint.c @@ -449,7 +449,7 @@ int mtk_eint_set_debounce(struct mtk_eint *eint, unsigned long eint_num, return -EOPNOTSUPP; virq = irq_find_mapping(eint->domain, eint_num); - eint_offset = (eint_num % 4) * 8; + eint_offset = (idx % 4) * 8; d = irq_get_irq_data(virq); set_offset = (idx / 4) * 4 + eint->regs->dbnc_set; From 12df9ec3e1955aed6a0c839f2375cd8e5d5150cf Mon Sep 17 00:00:00 2001 From: Saranya Gopal Date: Mon, 21 Apr 2025 09:43:32 +0530 Subject: [PATCH 1033/2924] platform/x86/intel: hid: Add Pantherlake support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Pantherlake ACPI device ID to the Intel HID driver. While there, clean up the device ID table to remove the ", 0" parts. Suggested-by: Andy Shevchenko Signed-off-by: Saranya Gopal Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250421041332.830136-1-saranya.gopal@intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/hid.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c index 88a1a9ff2f3443..0b5e43444ed603 100644 --- a/drivers/platform/x86/intel/hid.c +++ b/drivers/platform/x86/intel/hid.c @@ -44,16 +44,17 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Alex Hung"); static const struct acpi_device_id intel_hid_ids[] = { - {"INT33D5", 0}, - {"INTC1051", 0}, - {"INTC1054", 0}, - {"INTC1070", 0}, - {"INTC1076", 0}, - {"INTC1077", 0}, - {"INTC1078", 0}, - {"INTC107B", 0}, - {"INTC10CB", 0}, - {"", 0}, + { "INT33D5" }, + { "INTC1051" }, + { "INTC1054" }, + { "INTC1070" }, + { "INTC1076" }, + { "INTC1077" }, + { "INTC1078" }, + { "INTC107B" }, + { "INTC10CB" }, + { "INTC10CC" }, + { } }; MODULE_DEVICE_TABLE(acpi, intel_hid_ids); From 246f9bb62016c423972ea7f2335a8e0ed3521cde Mon Sep 17 00:00:00 2001 From: Kurt Borja Date: Sat, 19 Apr 2025 12:45:29 -0300 Subject: [PATCH 1034/2924] platform/x86: alienware-wmi-wmax: Add support for Alienware m15 R7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend thermal control support to Alienware m15 R7. Cc: stable@vger.kernel.org Tested-by: Romain THERY Signed-off-by: Kurt Borja Link: https://lore.kernel.org/r/20250419-m15-r7-v1-1-18c6eaa27e25@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/dell/alienware-wmi-wmax.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/platform/x86/dell/alienware-wmi-wmax.c b/drivers/platform/x86/dell/alienware-wmi-wmax.c index 3f9e1e986ecf0a..08b82c151e0710 100644 --- a/drivers/platform/x86/dell/alienware-wmi-wmax.c +++ b/drivers/platform/x86/dell/alienware-wmi-wmax.c @@ -69,6 +69,14 @@ static const struct dmi_system_id awcc_dmi_table[] __initconst = { }, .driver_data = &generic_quirks, }, + { + .ident = "Alienware m15 R7", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), + DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m15 R7"), + }, + .driver_data = &generic_quirks, + }, { .ident = "Alienware m16 R1", .matches = { From 77bdac73754e4c0c564c1ca80fe3d9c93b0e715a Mon Sep 17 00:00:00 2001 From: Pavel Nikulin Date: Fri, 18 Apr 2025 20:06:08 +0600 Subject: [PATCH 1035/2924] platform/x86: asus-wmi: Disable OOBE state after resume from hibernation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ASUS firmware resets OOBE state during S4 suspend, so the keyboard blinks during resume from hibernation. This patch disables OOBE state after resume from hibernation. Signed-off-by: Pavel Nikulin Link: https://lore.kernel.org/r/20250418140706.1691-1-pavel@noa-labs.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-wmi.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 38ef778e8c19b9..0c697b46f436a3 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -304,6 +304,7 @@ struct asus_wmi { u32 kbd_rgb_dev; bool kbd_rgb_state_available; + bool oobe_state_available; u8 throttle_thermal_policy_mode; u32 throttle_thermal_policy_dev; @@ -1826,7 +1827,7 @@ static int asus_wmi_led_init(struct asus_wmi *asus) goto error; } - if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_OOBE)) { + if (asus->oobe_state_available) { /* * Disable OOBE state, so that e.g. the keyboard backlight * works. @@ -4723,6 +4724,7 @@ static int asus_wmi_add(struct platform_device *pdev) asus->egpu_enable_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_EGPU); asus->dgpu_disable_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_DGPU); asus->kbd_rgb_state_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_TUF_RGB_STATE); + asus->oobe_state_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_OOBE); asus->ally_mcu_usb_switch = acpi_has_method(NULL, ASUS_USB0_PWR_EC0_CSEE) && dmi_check_system(asus_ally_mcu_quirk); @@ -4970,6 +4972,13 @@ static int asus_hotk_restore(struct device *device) } if (!IS_ERR_OR_NULL(asus->kbd_led.dev)) kbd_led_update(asus); + if (asus->oobe_state_available) { + /* + * Disable OOBE state, so that e.g. the keyboard backlight + * works. + */ + asus_wmi_set_devstate(ASUS_WMI_DEVID_OOBE, 1, NULL); + } if (asus_wmi_has_fnlock_key(asus)) asus_wmi_fnlock_update(asus); From 02c6e43397c39edd0c172859bf8c851b46be09a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C5=A1per=20Nemgar?= Date: Fri, 18 Apr 2025 09:07:38 +0200 Subject: [PATCH 1036/2924] platform/x86: ideapad-laptop: add support for some new buttons MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add entries to unsupported WMI codes in ideapad_keymap[] and one check for WMI code 0x13d to trigger platform_profile_cycle(). Signed-off-by: Gašper Nemgar Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20250418070738.7171-1-gasper.nemgar@gmail.com [ij: joined nested if ()s & major tweaks to changelog] Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/ideapad-laptop.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 17a09b7784edb3..ede483573fe0dd 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1294,6 +1294,16 @@ static const struct key_entry ideapad_keymap[] = { /* Specific to some newer models */ { KE_KEY, 0x3e | IDEAPAD_WMI_KEY, { KEY_MICMUTE } }, { KE_KEY, 0x3f | IDEAPAD_WMI_KEY, { KEY_RFKILL } }, + /* Star- (User Assignable Key) */ + { KE_KEY, 0x44 | IDEAPAD_WMI_KEY, { KEY_PROG1 } }, + /* Eye */ + { KE_KEY, 0x45 | IDEAPAD_WMI_KEY, { KEY_PROG3 } }, + /* Performance toggle also Fn+Q, handled inside ideapad_wmi_notify() */ + { KE_KEY, 0x3d | IDEAPAD_WMI_KEY, { KEY_PROG4 } }, + /* shift + prtsc */ + { KE_KEY, 0x2d | IDEAPAD_WMI_KEY, { KEY_CUT } }, + { KE_KEY, 0x29 | IDEAPAD_WMI_KEY, { KEY_TOUCHPAD_TOGGLE } }, + { KE_KEY, 0x2a | IDEAPAD_WMI_KEY, { KEY_ROOT_MENU } }, { KE_END }, }; @@ -2080,6 +2090,12 @@ static void ideapad_wmi_notify(struct wmi_device *wdev, union acpi_object *data) dev_dbg(&wdev->dev, "WMI fn-key event: 0x%llx\n", data->integer.value); + /* performance button triggered by 0x3d */ + if (data->integer.value == 0x3d && priv->dytc) { + platform_profile_cycle(); + break; + } + /* 0x02 FnLock, 0x03 Esc */ if (data->integer.value == 0x02 || data->integer.value == 0x03) ideapad_fn_lock_led_notify(priv, data->integer.value == 0x02); From 446d28584723e5d4bcdb18cf6ef87cb2bb597dd8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 23 Apr 2025 11:23:15 +0300 Subject: [PATCH 1037/2924] pinctrl: mediatek: common-v1: Fix error checking in mtk_eint_init() The devm_kzalloc() function doesn't return error pointers, it returns NULL on error. Then on the next line it checks the same pointer again by mistake, "->base" instead of "->base[0]". Fixes: fe412e3a6c97 ("pinctrl: mediatek: common-v1: Fix EINT breakage on older controllers") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/aAijc10fHka1WAMX@stanley.mountain Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-mtk-common.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c index 7585de11854bec..8596f3541265e9 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c @@ -1018,12 +1018,12 @@ static int mtk_eint_init(struct mtk_pinctrl *pctl, struct platform_device *pdev) pctl->eint->nbase = 1; /* mtk-eint expects an array */ pctl->eint->base = devm_kzalloc(pctl->dev, sizeof(pctl->eint->base), GFP_KERNEL); - if (IS_ERR(pctl->eint->base)) + if (!pctl->eint->base) return -ENOMEM; pctl->eint->base[0] = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(pctl->eint->base)) - return PTR_ERR(pctl->eint->base); + if (IS_ERR(pctl->eint->base[0])) + return PTR_ERR(pctl->eint->base[0]); pctl->eint->irq = irq_of_parse_and_map(np, 0); if (!pctl->eint->irq) From a3d8f0a7f5e8b193db509c7191fefeed3533fc44 Mon Sep 17 00:00:00 2001 From: LongPing Wei Date: Thu, 17 Apr 2025 11:07:38 +0800 Subject: [PATCH 1038/2924] dm-bufio: don't schedule in atomic context A BUG was reported as below when CONFIG_DEBUG_ATOMIC_SLEEP and try_verify_in_tasklet are enabled. [ 129.444685][ T934] BUG: sleeping function called from invalid context at drivers/md/dm-bufio.c:2421 [ 129.444723][ T934] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 934, name: kworker/1:4 [ 129.444740][ T934] preempt_count: 201, expected: 0 [ 129.444756][ T934] RCU nest depth: 0, expected: 0 [ 129.444781][ T934] Preemption disabled at: [ 129.444789][ T934] [] shrink_work+0x21c/0x248 [ 129.445167][ T934] kernel BUG at kernel/sched/walt/walt_debug.c:16! [ 129.445183][ T934] Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP [ 129.445204][ T934] Skip md ftrace buffer dump for: 0x1609e0 [ 129.447348][ T934] CPU: 1 PID: 934 Comm: kworker/1:4 Tainted: G W OE 6.6.56-android15-8-o-g6f82312b30b9-debug #1 1400000003000000474e5500b3187743670464e8 [ 129.447362][ T934] Hardware name: Qualcomm Technologies, Inc. Parrot QRD, Alpha-M (DT) [ 129.447373][ T934] Workqueue: dm_bufio_cache shrink_work [ 129.447394][ T934] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 129.447406][ T934] pc : android_rvh_schedule_bug+0x0/0x8 [sched_walt_debug] [ 129.447435][ T934] lr : __traceiter_android_rvh_schedule_bug+0x44/0x6c [ 129.447451][ T934] sp : ffffffc0843dbc90 [ 129.447459][ T934] x29: ffffffc0843dbc90 x28: ffffffffffffffff x27: 0000000000000c8b [ 129.447479][ T934] x26: 0000000000000040 x25: ffffff804b3d6260 x24: ffffffd816232b68 [ 129.447497][ T934] x23: ffffff805171c5b4 x22: 0000000000000000 x21: ffffffd816231900 [ 129.447517][ T934] x20: ffffff80306ba898 x19: 0000000000000000 x18: ffffffc084159030 [ 129.447535][ T934] x17: 00000000d2b5dd1f x16: 00000000d2b5dd1f x15: ffffffd816720358 [ 129.447554][ T934] x14: 0000000000000004 x13: ffffff89ef978000 x12: 0000000000000003 [ 129.447572][ T934] x11: ffffffd817a823c4 x10: 0000000000000202 x9 : 7e779c5735de9400 [ 129.447591][ T934] x8 : ffffffd81560d004 x7 : 205b5d3938373434 x6 : ffffffd8167397c8 [ 129.447610][ T934] x5 : 0000000000000000 x4 : 0000000000000001 x3 : ffffffc0843db9e0 [ 129.447629][ T934] x2 : 0000000000002f15 x1 : 0000000000000000 x0 : 0000000000000000 [ 129.447647][ T934] Call trace: [ 129.447655][ T934] android_rvh_schedule_bug+0x0/0x8 [sched_walt_debug 1400000003000000474e550080cce8a8a78606b6] [ 129.447681][ T934] __might_resched+0x190/0x1a8 [ 129.447694][ T934] shrink_work+0x180/0x248 [ 129.447706][ T934] process_one_work+0x260/0x624 [ 129.447718][ T934] worker_thread+0x28c/0x454 [ 129.447729][ T934] kthread+0x118/0x158 [ 129.447742][ T934] ret_from_fork+0x10/0x20 [ 129.447761][ T934] Code: ???????? ???????? ???????? d2b5dd1f (d4210000) [ 129.447772][ T934] ---[ end trace 0000000000000000 ]--- dm_bufio_lock will call spin_lock_bh when try_verify_in_tasklet is enabled, and __scan will be called in atomic context. Fixes: 7cd326747f46 ("dm bufio: remove dm_bufio_cond_resched()") Signed-off-by: LongPing Wei Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka --- drivers/md/dm-bufio.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 9c8ed65cd87e63..f0b5a6931161a0 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -68,6 +68,8 @@ #define LIST_DIRTY 1 #define LIST_SIZE 2 +#define SCAN_RESCHED_CYCLE 16 + /*--------------------------------------------------------------*/ /* @@ -2424,7 +2426,12 @@ static void __scan(struct dm_bufio_client *c) atomic_long_dec(&c->need_shrink); freed++; - cond_resched(); + + if (unlikely(freed % SCAN_RESCHED_CYCLE == 0)) { + dm_bufio_unlock(c); + cond_resched(); + dm_bufio_lock(c); + } } } } From 0a533c3e4246c29d502a7e0fba0e86d80a906b04 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 22 Apr 2025 21:18:33 +0200 Subject: [PATCH 1039/2924] dm-integrity: fix a warning on invalid table line If we use the 'B' mode and we have an invalit table line, cancel_delayed_work_sync would trigger a warning. This commit avoids the warning. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org --- drivers/md/dm-integrity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 2a283feb3319ca..cc3d3897ef428f 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -5164,7 +5164,7 @@ static void dm_integrity_dtr(struct dm_target *ti) BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); BUG_ON(!list_empty(&ic->wait_list)); - if (ic->mode == 'B') + if (ic->mode == 'B' && ic->bitmap_flush_work.work.func) cancel_delayed_work_sync(&ic->bitmap_flush_work); if (ic->metadata_wq) destroy_workqueue(ic->metadata_wq); From 70ad2e6bd180f94be030aef56e59693e36d945f3 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 23 Apr 2025 10:09:44 +0100 Subject: [PATCH 1040/2924] ASoC: cs42l43: Disable headphone clamps during type detection The headphone clamps cause fairly loud pops during type detect because they sink current from the detection process itself. Disable the clamps whilst the type detect runs, to improve the detection pop performance. Signed-off-by: Charles Keepax Link: https://patch.msgid.link/20250423090944.1504538-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l43-jack.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/codecs/cs42l43-jack.c b/sound/soc/codecs/cs42l43-jack.c index 20e6ab6f0d4ad7..6165ac16c3a950 100644 --- a/sound/soc/codecs/cs42l43-jack.c +++ b/sound/soc/codecs/cs42l43-jack.c @@ -654,6 +654,10 @@ static int cs42l43_run_type_detect(struct cs42l43_codec *priv) reinit_completion(&priv->type_detect); + regmap_update_bits(cs42l43->regmap, CS42L43_STEREO_MIC_CLAMP_CTRL, + CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_VAL_MASK, + CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_VAL_MASK); + cs42l43_start_hs_bias(priv, true); regmap_update_bits(cs42l43->regmap, CS42L43_HS2, CS42L43_HSDET_MODE_MASK, 0x3 << CS42L43_HSDET_MODE_SHIFT); @@ -665,6 +669,9 @@ static int cs42l43_run_type_detect(struct cs42l43_codec *priv) CS42L43_HSDET_MODE_MASK, 0x2 << CS42L43_HSDET_MODE_SHIFT); cs42l43_stop_hs_bias(priv); + regmap_update_bits(cs42l43->regmap, CS42L43_STEREO_MIC_CLAMP_CTRL, + CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_VAL_MASK, 0); + if (!time_left) return -ETIMEDOUT; From 8dfa57aabff625bf445548257f7711ef294cd30e Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Mon, 21 Apr 2025 10:03:37 -0700 Subject: [PATCH 1041/2924] dmaengine: idxd: Fix allowing write() from different address spaces Check if the process submitting the descriptor belongs to the same address space as the one that opened the file, reject otherwise. Fixes: 6827738dc684 ("dmaengine: idxd: add a write() method for applications to submit work") Signed-off-by: Vinicius Costa Gomes Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/20250421170337.3008875-1-dave.jiang@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/cdev.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 7bd031a608943c..a2fb2b84775263 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -407,6 +407,9 @@ static int idxd_cdev_mmap(struct file *filp, struct vm_area_struct *vma) if (!idxd->user_submission_safe && !capable(CAP_SYS_RAWIO)) return -EPERM; + if (current->mm != ctx->mm) + return -EPERM; + rc = check_vma(wq, vma, __func__); if (rc < 0) return rc; @@ -473,6 +476,9 @@ static ssize_t idxd_cdev_write(struct file *filp, const char __user *buf, size_t ssize_t written = 0; int i; + if (current->mm != ctx->mm) + return -EPERM; + for (i = 0; i < len/sizeof(struct dsa_hw_desc); i++) { int rc = idxd_submit_user_descriptor(ctx, udesc + i); @@ -493,6 +499,9 @@ static __poll_t idxd_cdev_poll(struct file *filp, struct idxd_device *idxd = wq->idxd; __poll_t out = 0; + if (current->mm != ctx->mm) + return -EPERM; + poll_wait(filp, &wq->err_queue, wait); spin_lock(&idxd->dev_lock); if (idxd->sw_err.valid) From 305245a2e1d633e5f821178c98c6d6132cea2bdb Mon Sep 17 00:00:00 2001 From: Basavaraj Natikar Date: Mon, 21 Apr 2025 17:12:15 +0530 Subject: [PATCH 1042/2924] dmaengine: ptdma: Move variable condition check to the first place and remove redundancy The variable is dereferenced without first checking if it's null, leading to the following warning: 'Variable dereferenced before check: desc.' drivers/dma/amd/ptdma/ptdma-dmaengine.c: pt_cmd_callback_work() warn: variable dereferenced before check 'desc' Therefore, move the condition check for the 'desc' variable to the first place and remove the redundant extra condition check. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/bfa0a979-ce9f-422d-92c3-34921155d048@stanley.mountain/ Fixes: 656543989457 ("dmaengine: ptdma: Utilize the AE4DMA engine's multi-queue functionality") Signed-off-by: Basavaraj Natikar Link: https://lore.kernel.org/r/20250421114215.1687073-1-Basavaraj.Natikar@amd.com Signed-off-by: Vinod Koul --- drivers/dma/amd/ptdma/ptdma-dmaengine.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/dma/amd/ptdma/ptdma-dmaengine.c b/drivers/dma/amd/ptdma/ptdma-dmaengine.c index 715ac3ae067b85..81339664036f33 100644 --- a/drivers/dma/amd/ptdma/ptdma-dmaengine.c +++ b/drivers/dma/amd/ptdma/ptdma-dmaengine.c @@ -342,6 +342,9 @@ static void pt_cmd_callback_work(void *data, int err) struct pt_dma_chan *chan; unsigned long flags; + if (!desc) + return; + dma_chan = desc->vd.tx.chan; chan = to_pt_chan(dma_chan); @@ -355,16 +358,14 @@ static void pt_cmd_callback_work(void *data, int err) desc->status = DMA_ERROR; spin_lock_irqsave(&chan->vc.lock, flags); - if (desc) { - if (desc->status != DMA_COMPLETE) { - if (desc->status != DMA_ERROR) - desc->status = DMA_COMPLETE; + if (desc->status != DMA_COMPLETE) { + if (desc->status != DMA_ERROR) + desc->status = DMA_COMPLETE; - dma_cookie_complete(tx_desc); - dma_descriptor_unmap(tx_desc); - } else { - tx_desc = NULL; - } + dma_cookie_complete(tx_desc); + dma_descriptor_unmap(tx_desc); + } else { + tx_desc = NULL; } spin_unlock_irqrestore(&chan->vc.lock, flags); From abf078c0a322159f5ebe2adaa0cd69dc45b1e710 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 22 Apr 2025 21:32:51 +0200 Subject: [PATCH 1043/2924] wifi: mac80211: restore monitor for outgoing frames This code was accidentally dropped during the cooked monitor removal, but really should've been simplified instead. Add the simple version back. Fixes: 286e69677065 ("wifi: mac80211: Drop cooked monitor support") Link: https://patch.msgid.link/20250422213251.b3d65fd0f323.Id2a6901583f7af86bbe94deb355968b238f350c6@changeid Signed-off-by: Johannes Berg --- net/mac80211/status.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/mac80211/status.c b/net/mac80211/status.c index b17b3cc7fb903d..a362254b310cd5 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -1085,7 +1085,13 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, ieee80211_report_used_skb(local, skb, false, status->ack_hwtstamp); - if (status->free_list) + /* + * This is a bit racy but we can avoid a lot of work + * with this test... + */ + if (local->tx_mntrs) + ieee80211_tx_monitor(local, skb, retry_count, status); + else if (status->free_list) list_add_tail(&skb->list, status->free_list); else dev_kfree_skb(skb); From 72bb272541808188c54d0df30b7edce14d979538 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Sun, 20 Apr 2025 12:01:49 +0300 Subject: [PATCH 1044/2924] Revert "wifi: iwlwifi: add support for BE213" This reverts commit 16a8d9a739430bec9c11eda69226c5a39f3478aa. This device needs commit 75a3313f52b7 ("wifi: iwlwifi: make no_160 more generic"), which has a bug and is being reverted until it is fixed. Since this device wasn't shipped yet it is ok to not support it. Reported-by: Todd Brandt Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220029 Fixes: 16a8d9a73943 ("wifi: iwlwifi: add support for BE213") Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250420115541.581160ae3e4b.Icecc46baee8a797c00ad04fab92d7d1114b84829@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/cfg/sc.c | 2 -- .../net/wireless/intel/iwlwifi/iwl-config.h | 1 - .../wireless/intel/iwlwifi/iwl-nvm-parse.c | 12 ++++------- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 21 +++---------------- 4 files changed, 7 insertions(+), 29 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/sc.c b/drivers/net/wireless/intel/iwlwifi/cfg/sc.c index 670031fd60dc9f..59af36960f9c54 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/sc.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/sc.c @@ -142,8 +142,6 @@ const struct iwl_cfg_trans_params iwl_sc_trans_cfg = { .ltr_delay = IWL_CFG_TRANS_LTR_DELAY_2500US, }; -const char iwl_sp_name[] = "Intel(R) Wi-Fi 7 BE213 160MHz"; - const struct iwl_cfg iwl_cfg_sc = { .fw_name_mac = "sc", IWL_DEVICE_SC, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index b9bd89bfdd7480..7e4864c00780b2 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -550,7 +550,6 @@ extern const char iwl_ax231_name[]; extern const char iwl_ax411_name[]; extern const char iwl_fm_name[]; extern const char iwl_wh_name[]; -extern const char iwl_sp_name[]; extern const char iwl_gl_name[]; extern const char iwl_mtp_name[]; extern const char iwl_dr_name[]; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index cd1b0048bb6daf..08269168b2fab3 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -944,8 +944,7 @@ iwl_nvm_fixup_sband_iftd(struct iwl_trans *trans, IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_MASK); break; case NL80211_BAND_6GHZ: - if (!trans->reduced_cap_sku && - trans->bw_limit >= 320) { + if (!trans->reduced_cap_sku) { iftype_data->eht_cap.eht_cap_elem.phy_cap_info[0] |= IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ; iftype_data->eht_cap.eht_cap_elem.phy_cap_info[1] |= @@ -1099,18 +1098,15 @@ iwl_nvm_fixup_sband_iftd(struct iwl_trans *trans, iftype_data->he_cap.he_cap_elem.phy_cap_info[0] &= ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G; - if (trans->bw_limit < 320 || trans->reduced_cap_sku) { + if (trans->reduced_cap_sku) { memset(&iftype_data->eht_cap.eht_mcs_nss_supp.bw._320, 0, sizeof(iftype_data->eht_cap.eht_mcs_nss_supp.bw._320)); - iftype_data->eht_cap.eht_cap_elem.phy_cap_info[2] &= - ~IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_320MHZ_MASK; - } - - if (trans->reduced_cap_sku) { iftype_data->eht_cap.eht_mcs_nss_supp.bw._80.rx_tx_mcs13_max_nss = 0; iftype_data->eht_cap.eht_mcs_nss_supp.bw._160.rx_tx_mcs13_max_nss = 0; iftype_data->eht_cap.eht_cap_elem.phy_cap_info[8] &= ~IEEE80211_EHT_PHY_CAP8_RX_4096QAM_WIDER_BW_DL_OFDMA; + iftype_data->eht_cap.eht_cap_elem.phy_cap_info[2] &= + ~IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_320MHZ_MASK; } } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 93446c37400814..03f7eb46bbc78c 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1187,13 +1187,8 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC, IWL_CFG_ANY, IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc, iwl_wh_name), - _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_MAC_TYPE_SC, IWL_CFG_ANY, - IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - 160, IWL_CFG_ANY, IWL_CFG_ANY, - iwl_cfg_sc, iwl_sp_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC2, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, @@ -1207,13 +1202,8 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC2, IWL_CFG_ANY, IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc2, iwl_wh_name), - _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_MAC_TYPE_SC2, IWL_CFG_ANY, - IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - 160, IWL_CFG_ANY, IWL_CFG_ANY, - iwl_cfg_sc2, iwl_sp_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC2F, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, @@ -1227,13 +1217,8 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC2F, IWL_CFG_ANY, IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc2f, iwl_wh_name), - _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_MAC_TYPE_SC2F, IWL_CFG_ANY, - IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - 160, IWL_CFG_ANY, IWL_CFG_ANY, - iwl_cfg_sc2f, iwl_sp_name), /* Dr */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, From 64dc5d5e341d6145cce65e35bdfa0d6ab9fc0a75 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Sun, 20 Apr 2025 12:01:50 +0300 Subject: [PATCH 1045/2924] Revert "wifi: iwlwifi: make no_160 more generic" This reverts commit 75a3313f52b7e08e7e73746f69a68c2b7c28bb2b. The indication of the BW limitation in the sub-device ID is not applicable for Killer devices. For those devices, bw_limit will hold a random value, so a matching dev_info might not be found, which leads to a probe failure. Until it is properly fixed, revert this. Reported-by: Todd Brandt Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220029 Fixes: 75a3313f52b7 ("wifi: iwlwifi: make no_160 more generic") Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250420115541.36dd3007151e.I66b6b78db09bfea12ae84dd85603cf1583271474@changeid Signed-off-by: Johannes Berg --- .../net/wireless/intel/iwlwifi/iwl-config.h | 15 +- .../wireless/intel/iwlwifi/iwl-nvm-parse.c | 4 +- .../net/wireless/intel/iwlwifi/iwl-trans.h | 7 +- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 206 +++++++++--------- .../wireless/intel/iwlwifi/tests/devinfo.c | 15 +- 5 files changed, 118 insertions(+), 129 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 7e4864c00780b2..acafee538b8ab0 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -2,7 +2,7 @@ /* * Copyright (C) 2005-2014, 2018-2021 Intel Corporation * Copyright (C) 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2025 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation */ #ifndef __IWL_CONFIG_H__ #define __IWL_CONFIG_H__ @@ -451,8 +451,11 @@ struct iwl_cfg { #define IWL_CFG_RF_ID_HR 0x7 #define IWL_CFG_RF_ID_HR1 0x4 -#define IWL_CFG_BW_NO_LIM (U16_MAX - 1) -#define IWL_CFG_BW_ANY U16_MAX +#define IWL_CFG_NO_160 0x1 +#define IWL_CFG_160 0x0 + +#define IWL_CFG_NO_320 0x1 +#define IWL_CFG_320 0x0 #define IWL_CFG_CORES_BT 0x0 #define IWL_CFG_CORES_BT_GNSS 0x5 @@ -464,7 +467,7 @@ struct iwl_cfg { #define IWL_CFG_IS_JACKET 0x1 #define IWL_SUBDEVICE_RF_ID(subdevice) ((u16)((subdevice) & 0x00F0) >> 4) -#define IWL_SUBDEVICE_BW_LIM(subdevice) ((u16)((subdevice) & 0x0200) >> 9) +#define IWL_SUBDEVICE_NO_160(subdevice) ((u16)((subdevice) & 0x0200) >> 9) #define IWL_SUBDEVICE_CORES(subdevice) ((u16)((subdevice) & 0x1C00) >> 10) struct iwl_dev_info { @@ -472,10 +475,10 @@ struct iwl_dev_info { u16 subdevice; u16 mac_type; u16 rf_type; - u16 bw_limit; u8 mac_step; u8 rf_step; u8 rf_id; + u8 no_160; u8 cores; u8 cdb; u8 jacket; @@ -489,7 +492,7 @@ extern const unsigned int iwl_dev_info_table_size; const struct iwl_dev_info * iwl_pci_find_dev_info(u16 device, u16 subsystem_device, u16 mac_type, u8 mac_step, u16 rf_type, u8 cdb, - u8 jacket, u8 rf_id, u8 bw_limit, u8 cores, u8 rf_step); + u8 jacket, u8 rf_id, u8 no_160, u8 cores, u8 rf_step); extern const struct pci_device_id iwl_hw_card_ids[]; #endif diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 08269168b2fab3..c381511e9ec65b 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2005-2014, 2018-2023, 2025 Intel Corporation + * Copyright (C) 2005-2014, 2018-2023 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -1094,7 +1094,7 @@ iwl_nvm_fixup_sband_iftd(struct iwl_trans *trans, iftype_data->eht_cap.eht_mcs_nss_supp.bw._320.rx_tx_mcs13_max_nss = 0; } - if (trans->bw_limit < 160) + if (trans->no_160) iftype_data->he_cap.he_cap_elem.phy_cap_info[0] &= ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index 25fb4c50e38b1f..18698f0bd2e4d9 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2005-2014, 2018-2023, 2025 Intel Corporation + * Copyright (C) 2005-2014, 2018-2023 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -876,7 +876,7 @@ struct iwl_txq { * only valid for discrete (not integrated) NICs * @invalid_tx_cmd: invalid TX command buffer * @reduced_cap_sku: reduced capability supported SKU - * @bw_limit: the max bandwidth + * @no_160: device not supporting 160 MHz * @step_urm: STEP is in URM, no support for MCS>9 in 320 MHz * @restart: restart worker data * @restart.wk: restart worker @@ -911,8 +911,7 @@ struct iwl_trans { char hw_id_str[52]; u32 sku_id[3]; bool reduced_cap_sku; - u16 bw_limit; - bool step_urm; + u8 no_160:1, step_urm:1; u8 dsbr_urm_fw_dependent:1, dsbr_urm_permanent:1; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 03f7eb46bbc78c..6c22c95f28d5ed 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2005-2014, 2018-2025 Intel Corporation + * Copyright (C) 2005-2014, 2018-2024 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -552,17 +552,16 @@ MODULE_DEVICE_TABLE(pci, iwl_hw_card_ids); EXPORT_SYMBOL_IF_IWLWIFI_KUNIT(iwl_hw_card_ids); #define _IWL_DEV_INFO(_device, _subdevice, _mac_type, _mac_step, _rf_type, \ - _rf_id, _rf_step, _bw_limit, _cores, _cdb, _cfg, _name) \ + _rf_id, _rf_step, _no_160, _cores, _cdb, _cfg, _name) \ { .device = (_device), .subdevice = (_subdevice), .cfg = &(_cfg), \ .name = _name, .mac_type = _mac_type, .rf_type = _rf_type, .rf_step = _rf_step, \ - .bw_limit = _bw_limit, .cores = _cores, .rf_id = _rf_id, \ + .no_160 = _no_160, .cores = _cores, .rf_id = _rf_id, \ .mac_step = _mac_step, .cdb = _cdb, .jacket = IWL_CFG_ANY } #define IWL_DEV_INFO(_device, _subdevice, _cfg, _name) \ _IWL_DEV_INFO(_device, _subdevice, IWL_CFG_ANY, IWL_CFG_ANY, \ - IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, \ - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_ANY, \ - _cfg, _name) + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, \ + IWL_CFG_ANY, _cfg, _name) VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { #if IS_ENABLED(CONFIG_IWLMVM) @@ -725,66 +724,66 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_PU, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_2ac_cfg_soc, iwl9461_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_PU, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_2ac_cfg_soc, iwl9461_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_PU, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_2ac_cfg_soc, iwl9462_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_PU, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_2ac_cfg_soc, iwl9462_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_PU, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_2ac_cfg_soc, iwl9560_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_PU, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_2ac_cfg_soc, iwl9560_name), _IWL_DEV_INFO(0x2526, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_TH, IWL_CFG_ANY, IWL_CFG_RF_TYPE_TH, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT_GNSS, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT_GNSS, IWL_CFG_NO_CDB, iwl9260_2ac_cfg, iwl9270_160_name), _IWL_DEV_INFO(0x2526, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_TH, IWL_CFG_ANY, IWL_CFG_RF_TYPE_TH, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT_GNSS, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT_GNSS, IWL_CFG_NO_CDB, iwl9260_2ac_cfg, iwl9270_name), _IWL_DEV_INFO(0x271B, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_TH, IWL_CFG_ANY, IWL_CFG_RF_TYPE_TH1, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9260_2ac_cfg, iwl9162_160_name), _IWL_DEV_INFO(0x271B, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_TH, IWL_CFG_ANY, IWL_CFG_RF_TYPE_TH1, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9260_2ac_cfg, iwl9162_name), _IWL_DEV_INFO(0x2526, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_TH, IWL_CFG_ANY, IWL_CFG_RF_TYPE_TH, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9260_2ac_cfg, iwl9260_160_name), _IWL_DEV_INFO(0x2526, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_TH, IWL_CFG_ANY, IWL_CFG_RF_TYPE_TH, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9260_2ac_cfg, iwl9260_name), /* Qu with Jf */ @@ -792,132 +791,132 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_b0_jf_b0_cfg, iwl9461_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_b0_jf_b0_cfg, iwl9461_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_b0_jf_b0_cfg, iwl9462_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_b0_jf_b0_cfg, iwl9462_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_b0_jf_b0_cfg, iwl9560_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_b0_jf_b0_cfg, iwl9560_name), _IWL_DEV_INFO(IWL_CFG_ANY, 0x1551, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_b0_jf_b0_cfg, iwl9560_killer_1550s_name), _IWL_DEV_INFO(IWL_CFG_ANY, 0x1552, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_b0_jf_b0_cfg, iwl9560_killer_1550i_name), /* Qu C step */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_c0_jf_b0_cfg, iwl9461_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_c0_jf_b0_cfg, iwl9461_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_c0_jf_b0_cfg, iwl9462_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_c0_jf_b0_cfg, iwl9462_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_c0_jf_b0_cfg, iwl9560_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_c0_jf_b0_cfg, iwl9560_name), _IWL_DEV_INFO(IWL_CFG_ANY, 0x1551, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_c0_jf_b0_cfg, iwl9560_killer_1550s_name), _IWL_DEV_INFO(IWL_CFG_ANY, 0x1552, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_c0_jf_b0_cfg, iwl9560_killer_1550i_name), /* QuZ */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QUZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_quz_a0_jf_b0_cfg, iwl9461_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QUZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_quz_a0_jf_b0_cfg, iwl9461_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QUZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_quz_a0_jf_b0_cfg, iwl9462_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QUZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_quz_a0_jf_b0_cfg, iwl9462_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QUZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_quz_a0_jf_b0_cfg, iwl9560_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QUZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_quz_a0_jf_b0_cfg, iwl9560_name), _IWL_DEV_INFO(IWL_CFG_ANY, 0x1551, IWL_CFG_MAC_TYPE_QUZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_quz_a0_jf_b0_cfg, iwl9560_killer_1550s_name), _IWL_DEV_INFO(IWL_CFG_ANY, 0x1552, IWL_CFG_MAC_TYPE_QUZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_quz_a0_jf_b0_cfg, iwl9560_killer_1550i_name), /* Qu with Hr */ @@ -925,189 +924,189 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_HR1, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_qu_b0_hr1_b0, iwl_ax101_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_qu_b0_hr_b0, iwl_ax203_name), /* Qu C step */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_HR1, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_qu_c0_hr1_b0, iwl_ax101_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_qu_c0_hr_b0, iwl_ax203_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_qu_c0_hr_b0, iwl_ax201_name), /* QuZ */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QUZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR1, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_quz_a0_hr1_b0, iwl_ax101_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QUZ, SILICON_B_STEP, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_quz_a0_hr_b0, iwl_ax203_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QUZ, SILICON_B_STEP, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_quz_a0_hr_b0, iwl_ax201_name), /* Ma */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_MA, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_ma, iwl_ax201_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_MA, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_ma, iwl_ax211_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_MA, IWL_CFG_ANY, IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_ma, iwl_ax231_name), /* So with Hr */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_so_a0_hr_a0, iwl_ax203_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR1, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_so_a0_hr_a0, iwl_ax101_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_so_a0_hr_a0, iwl_ax201_name), /* So-F with Hr */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_so_a0_hr_a0, iwl_ax203_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR1, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_so_a0_hr_a0, iwl_ax101_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_so_a0_hr_a0, iwl_ax201_name), /* So-F with Gf */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_CDB, + IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_CDB, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_name), /* SoF with JF2 */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9560_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9560_name), /* SoF with JF */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9461_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9462_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9461_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9462_name), /* So with GF */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_CDB, + IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_CDB, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_name), /* So with JF2 */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9560_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9560_name), /* So with JF */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9461_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9462_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9461_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9462_name), #endif /* CONFIG_IWLMVM */ @@ -1116,13 +1115,13 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_BZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_bz, iwl_ax201_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_BZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_bz, iwl_ax211_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, @@ -1134,104 +1133,104 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_BZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_bz, iwl_wh_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_BZ_W, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_bz, iwl_ax201_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_BZ_W, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_bz, iwl_ax211_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_BZ_W, IWL_CFG_ANY, IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_bz, iwl_fm_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_BZ_W, IWL_CFG_ANY, IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_bz, iwl_wh_name), /* Ga (Gl) */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_GL, IWL_CFG_ANY, IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_320, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_gl, iwl_gl_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_GL, IWL_CFG_ANY, IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, IWL_CFG_ANY, - 160, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_NO_320, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_gl, iwl_mtp_name), /* Sc */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc, iwl_ax211_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC, IWL_CFG_ANY, IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc, iwl_fm_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC, IWL_CFG_ANY, IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc, iwl_wh_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC2, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc2, iwl_ax211_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC2, IWL_CFG_ANY, IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc2, iwl_fm_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC2, IWL_CFG_ANY, IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc2, iwl_wh_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC2F, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc2f, iwl_ax211_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC2F, IWL_CFG_ANY, IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc2f, iwl_fm_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC2F, IWL_CFG_ANY, IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc2f, iwl_wh_name), /* Dr */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_DR, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_dr, iwl_dr_name), /* Br */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_BR, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_br, iwl_br_name), #endif /* CONFIG_IWLMLD */ }; @@ -1383,7 +1382,7 @@ static int map_crf_id(struct iwl_trans *iwl_trans) VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info * iwl_pci_find_dev_info(u16 device, u16 subsystem_device, u16 mac_type, u8 mac_step, u16 rf_type, u8 cdb, - u8 jacket, u8 rf_id, u8 bw_limit, u8 cores, u8 rf_step) + u8 jacket, u8 rf_id, u8 no_160, u8 cores, u8 rf_step) { int num_devices = ARRAY_SIZE(iwl_dev_info_table); int i; @@ -1426,15 +1425,8 @@ iwl_pci_find_dev_info(u16 device, u16 subsystem_device, dev_info->rf_id != rf_id) continue; - /* - * Check that bw_limit have the same "boolean" value since - * IWL_SUBDEVICE_BW_LIM can only return a boolean value and - * dev_info->bw_limit encodes a non-boolean value. - * dev_info->bw_limit == IWL_CFG_BW_NO_LIM must be equal to - * !bw_limit to have a match. - */ - if (dev_info->bw_limit != IWL_CFG_BW_ANY && - (dev_info->bw_limit == IWL_CFG_BW_NO_LIM) == !!bw_limit) + if (dev_info->no_160 != (u8)IWL_CFG_ANY && + dev_info->no_160 != no_160) continue; if (dev_info->cores != (u8)IWL_CFG_ANY && @@ -1572,13 +1564,13 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) CSR_HW_RFID_IS_CDB(iwl_trans->hw_rf_id), CSR_HW_RFID_IS_JACKET(iwl_trans->hw_rf_id), IWL_SUBDEVICE_RF_ID(pdev->subsystem_device), - IWL_SUBDEVICE_BW_LIM(pdev->subsystem_device), + IWL_SUBDEVICE_NO_160(pdev->subsystem_device), IWL_SUBDEVICE_CORES(pdev->subsystem_device), CSR_HW_RFID_STEP(iwl_trans->hw_rf_id)); if (dev_info) { iwl_trans->cfg = dev_info->cfg; iwl_trans->name = dev_info->name; - iwl_trans->bw_limit = dev_info->bw_limit; + iwl_trans->no_160 = dev_info->no_160 == IWL_CFG_NO_160; } #if IS_ENABLED(CONFIG_IWLMVM) diff --git a/drivers/net/wireless/intel/iwlwifi/tests/devinfo.c b/drivers/net/wireless/intel/iwlwifi/tests/devinfo.c index 7ef5e89c6af27c..d0bda23c628aa6 100644 --- a/drivers/net/wireless/intel/iwlwifi/tests/devinfo.c +++ b/drivers/net/wireless/intel/iwlwifi/tests/devinfo.c @@ -2,7 +2,7 @@ /* * KUnit tests for the iwlwifi device info table * - * Copyright (C) 2023-2025 Intel Corporation + * Copyright (C) 2023-2024 Intel Corporation */ #include #include @@ -13,9 +13,9 @@ MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING"); static void iwl_pci_print_dev_info(const char *pfx, const struct iwl_dev_info *di) { - printk(KERN_DEBUG "%sdev=%.4x,subdev=%.4x,mac_type=%.4x,mac_step=%.4x,rf_type=%.4x,cdb=%d,jacket=%d,rf_id=%.2x,bw_limit=%d,cores=%.2x\n", + printk(KERN_DEBUG "%sdev=%.4x,subdev=%.4x,mac_type=%.4x,mac_step=%.4x,rf_type=%.4x,cdb=%d,jacket=%d,rf_id=%.2x,no_160=%d,cores=%.2x\n", pfx, di->device, di->subdevice, di->mac_type, di->mac_step, - di->rf_type, di->cdb, di->jacket, di->rf_id, di->bw_limit, + di->rf_type, di->cdb, di->jacket, di->rf_id, di->no_160, di->cores); } @@ -31,13 +31,8 @@ static void devinfo_table_order(struct kunit *test) di->mac_type, di->mac_step, di->rf_type, di->cdb, di->jacket, di->rf_id, - di->bw_limit != IWL_CFG_BW_NO_LIM, - di->cores, di->rf_step); - if (!ret) { - iwl_pci_print_dev_info("No entry found for: ", di); - KUNIT_FAIL(test, - "No entry found for entry at index %d\n", idx); - } else if (ret != di) { + di->no_160, di->cores, di->rf_step); + if (ret != di) { iwl_pci_print_dev_info("searched: ", di); iwl_pci_print_dev_info("found: ", ret); KUNIT_FAIL(test, From 4f7a07791944e57ea5f12ce03939e3ad0fd50504 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Sun, 20 Apr 2025 09:59:55 +0300 Subject: [PATCH 1046/2924] wifi: iwlwifi: mld: properly handle async notification in op mode start From the moment that we have ALIVE, we can receive notification that are handled asynchronously. Some notifications (for example iwl_rfi_support_notif) requires an operational FW. So we need to make sure that they were handled in iwl_op_mode_mld_start before we stop the FW. Flush the async_handlers_wk there to achieve that. Also, if loading the FW in op mode start failed, we need to cancel these notifications, as they are from a dead FW. More than that, not doing so can cause us to access freed memory if async_handlers_wk is executed after ieee80211_free_hw is called. Fix this by canceling all async notifications if a failure occurred in init (after ALIVE). Fixes: d1e879ec600f ("wifi: iwlwifi: add iwlmld sub-driver") Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250420095642.1a8579662437.Ifd77d9c1a29fdd278b0a7bfc2709dd5d5e5efdb1@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mld/fw.c | 13 ++++++++++--- drivers/net/wireless/intel/iwlwifi/mld/mld.c | 5 +++++ drivers/net/wireless/intel/iwlwifi/mld/mld.h | 5 ----- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/fw.c b/drivers/net/wireless/intel/iwlwifi/mld/fw.c index 62da137e102479..4b083d447ee2f7 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/fw.c @@ -333,19 +333,22 @@ int iwl_mld_load_fw(struct iwl_mld *mld) ret = iwl_trans_start_hw(mld->trans); if (ret) - return ret; + goto err; ret = iwl_mld_run_fw_init_sequence(mld); if (ret) - return ret; + goto err; ret = iwl_mld_init_mcc(mld); if (ret) - return ret; + goto err; mld->fw_status.running = true; return 0; +err: + iwl_mld_stop_fw(mld); + return ret; } void iwl_mld_stop_fw(struct iwl_mld *mld) @@ -358,6 +361,10 @@ void iwl_mld_stop_fw(struct iwl_mld *mld) iwl_trans_stop_device(mld->trans); + wiphy_work_cancel(mld->wiphy, &mld->async_handlers_wk); + + iwl_mld_purge_async_handlers_list(mld); + mld->fw_status.running = false; } diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mld.c b/drivers/net/wireless/intel/iwlwifi/mld/mld.c index d4a99ae64074f1..cfdf52b43c6848 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mld.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mld.c @@ -417,6 +417,11 @@ iwl_op_mode_mld_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, goto free_hw; } + /* We are about to stop the FW. Notifications may require an + * operational FW, so handle them all here before we stop. + */ + wiphy_work_flush(mld->wiphy, &mld->async_handlers_wk); + iwl_mld_stop_fw(mld); wiphy_unlock(mld->wiphy); diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mld.h b/drivers/net/wireless/intel/iwlwifi/mld/mld.h index 5eceaaf7696db6..a4a16da6ebf3fb 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mld.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/mld.h @@ -298,11 +298,6 @@ iwl_cleanup_mld(struct iwl_mld *mld) #endif iwl_mld_low_latency_restart_cleanup(mld); - - /* Empty the list of async notification handlers so we won't process - * notifications from the dead fw after the reconfig flow. - */ - iwl_mld_purge_async_handlers_list(mld); } enum iwl_power_scheme { From c155f7c3ad1e70a1e203047b20e3bca235ada207 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Sun, 20 Apr 2025 09:59:56 +0300 Subject: [PATCH 1047/2924] wifi: iwlwifi: mld: inform trans on init failure If starting the op mode failed, the opmode memory is being freed, so trans->op_mode needs to be NULLified. Otherwise, trans will access already freed memory. Call iwl_trans_op_mode_leave in that case. Fixes: d1e879ec600f ("wifi: iwlwifi: add iwlmld sub-driver") Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250420095642.3331d1686556.Ifaf15bdd8ef8c59e04effbd2e7aa0034b30eeacb@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mld/mld.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mld.c b/drivers/net/wireless/intel/iwlwifi/mld/mld.c index cfdf52b43c6848..4a0842a46a8d4e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mld.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mld.c @@ -414,7 +414,7 @@ iwl_op_mode_mld_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, wiphy_unlock(mld->wiphy); rtnl_unlock(); iwl_fw_flush_dumps(&mld->fwrt); - goto free_hw; + goto err; } /* We are about to stop the FW. Notifications may require an @@ -460,7 +460,8 @@ iwl_op_mode_mld_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, iwl_mld_leds_exit(mld); free_nvm: kfree(mld->nvm_data); -free_hw: +err: + iwl_trans_op_mode_leave(mld->trans); ieee80211_free_hw(mld->hw); return ERR_PTR(ret); } From d1ee2c1922566257cd6cc4ac7c21974d708fea62 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Sun, 20 Apr 2025 09:59:57 +0300 Subject: [PATCH 1048/2924] wifi: iwlwifi: mld: only create debugfs symlink if it does not exist When mac80211 switches between non-MLO and MLO it will recreate the debugfs directories. This results in the add_if_debugfs handler being called multiple times. As the convenience symlink is created in the mld debugfs directory and not the vif, it will not be removed by mac80211 when this happens and still exists. Add a check and only create the convenience symlink if we have not yet done so. Fixes: d1e879ec600f ("wifi: iwlwifi: add iwlmld sub-driver") Signed-off-by: Benjamin Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250420095642.2490696f032a.I74319c7cf18f7e16a3d331cb96e38504b9fbab66@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mld/debugfs.c | 5 +++-- drivers/net/wireless/intel/iwlwifi/mld/mac80211.c | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mld/debugfs.c index 89d95e9b4f301a..93f9f78e4276bd 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/debugfs.c @@ -949,8 +949,9 @@ void iwl_mld_add_vif_debugfs(struct ieee80211_hw *hw, snprintf(name, sizeof(name), "%pd", vif->debugfs_dir); snprintf(target, sizeof(target), "../../../%pd3/iwlmld", vif->debugfs_dir); - mld_vif->dbgfs_slink = - debugfs_create_symlink(name, mld->debugfs_dir, target); + if (!mld_vif->dbgfs_slink) + mld_vif->dbgfs_slink = + debugfs_create_symlink(name, mld->debugfs_dir, target); if (iwlmld_mod_params.power_scheme != IWL_POWER_SCHEME_CAM && vif->type == NL80211_IFTYPE_STATION) { diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c index 99e13cfd1e5feb..68d97d3b8f0260 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c @@ -651,6 +651,7 @@ void iwl_mld_mac80211_remove_interface(struct ieee80211_hw *hw, #ifdef CONFIG_IWLWIFI_DEBUGFS debugfs_remove(iwl_mld_vif_from_mac80211(vif)->dbgfs_slink); + iwl_mld_vif_from_mac80211(vif)->dbgfs_slink = NULL; #endif iwl_mld_rm_vif(mld, vif); From d49437a6afc707951e5767ef70c9726b6c05da08 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 20 Apr 2025 09:59:58 +0300 Subject: [PATCH 1049/2924] wifi: iwlwifi: back off on continuous errors When errors occur repeatedly, the driver shouldn't go into a tight loop trying to reset the device. Implement the backoff I had already defined IWL_TRANS_RESET_DELAY for, but clearly forgotten the implementation of. Fixes: 9a2f13c40c63 ("wifi: iwlwifi: implement reset escalation") Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250420095642.8816e299efa2.I82cde34e2345a2b33b1f03dbb040f5ad3439a5aa@changeid Signed-off-by: Johannes Berg --- .../net/wireless/intel/iwlwifi/iwl-trans.c | 27 ++++++++++++++----- .../net/wireless/intel/iwlwifi/iwl-trans.h | 7 +++-- .../net/wireless/intel/iwlwifi/pcie/trans.c | 3 ++- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c index c1607b6d0759e0..6125fe70ce7288 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c @@ -21,6 +21,7 @@ struct iwl_trans_dev_restart_data { struct list_head list; unsigned int restart_count; time64_t last_error; + bool backoff; char name[]; }; @@ -125,13 +126,20 @@ iwl_trans_determine_restart_mode(struct iwl_trans *trans) if (!data) return at_least; - if (ktime_get_boottime_seconds() - data->last_error >= + if (!data->backoff && + ktime_get_boottime_seconds() - data->last_error >= IWL_TRANS_RESET_OK_TIME) data->restart_count = 0; index = data->restart_count; - if (index >= ARRAY_SIZE(escalation_list)) + if (index >= ARRAY_SIZE(escalation_list)) { index = ARRAY_SIZE(escalation_list) - 1; + if (!data->backoff) { + data->backoff = true; + return IWL_RESET_MODE_BACKOFF; + } + data->backoff = false; + } return max(at_least, escalation_list[index]); } @@ -140,7 +148,8 @@ iwl_trans_determine_restart_mode(struct iwl_trans *trans) static void iwl_trans_restart_wk(struct work_struct *wk) { - struct iwl_trans *trans = container_of(wk, typeof(*trans), restart.wk); + struct iwl_trans *trans = container_of(wk, typeof(*trans), + restart.wk.work); struct iwl_trans_reprobe *reprobe; enum iwl_reset_mode mode; @@ -168,6 +177,12 @@ static void iwl_trans_restart_wk(struct work_struct *wk) return; mode = iwl_trans_determine_restart_mode(trans); + if (mode == IWL_RESET_MODE_BACKOFF) { + IWL_ERR(trans, "Too many device errors - delay next reset\n"); + queue_delayed_work(system_unbound_wq, &trans->restart.wk, + IWL_TRANS_RESET_DELAY); + return; + } iwl_trans_inc_restart_count(trans->dev); @@ -227,7 +242,7 @@ struct iwl_trans *iwl_trans_alloc(unsigned int priv_size, trans->dev = dev; trans->num_rx_queues = 1; - INIT_WORK(&trans->restart.wk, iwl_trans_restart_wk); + INIT_DELAYED_WORK(&trans->restart.wk, iwl_trans_restart_wk); return trans; } @@ -271,7 +286,7 @@ int iwl_trans_init(struct iwl_trans *trans) void iwl_trans_free(struct iwl_trans *trans) { - cancel_work_sync(&trans->restart.wk); + cancel_delayed_work_sync(&trans->restart.wk); kmem_cache_destroy(trans->dev_cmd_pool); } @@ -403,7 +418,7 @@ void iwl_trans_op_mode_leave(struct iwl_trans *trans) iwl_trans_pcie_op_mode_leave(trans); - cancel_work_sync(&trans->restart.wk); + cancel_delayed_work_sync(&trans->restart.wk); trans->op_mode = NULL; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index 18698f0bd2e4d9..ce4954b0d52462 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -961,7 +961,7 @@ struct iwl_trans { struct iwl_dma_ptr invalid_tx_cmd; struct { - struct work_struct wk; + struct delayed_work wk; struct iwl_fw_error_dump_mode mode; bool during_reset; } restart; @@ -1162,7 +1162,7 @@ static inline void iwl_trans_schedule_reset(struct iwl_trans *trans, */ trans->restart.during_reset = test_bit(STATUS_IN_SW_RESET, &trans->status); - queue_work(system_unbound_wq, &trans->restart.wk); + queue_delayed_work(system_unbound_wq, &trans->restart.wk, 0); } static inline void iwl_trans_fw_error(struct iwl_trans *trans, @@ -1261,6 +1261,9 @@ enum iwl_reset_mode { IWL_RESET_MODE_RESCAN, IWL_RESET_MODE_FUNC_RESET, IWL_RESET_MODE_PROD_RESET, + + /* keep last - special backoff value */ + IWL_RESET_MODE_BACKOFF, }; void iwl_trans_pcie_reset(struct iwl_trans *trans, enum iwl_reset_mode mode); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index c917ed4c19bcc3..b1ccace7377fbc 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -2351,7 +2351,8 @@ void iwl_trans_pcie_reset(struct iwl_trans *trans, enum iwl_reset_mode mode) struct iwl_trans_pcie_removal *removal; char _msg = 0, *msg = &_msg; - if (WARN_ON(mode < IWL_RESET_MODE_REMOVE_ONLY)) + if (WARN_ON(mode < IWL_RESET_MODE_REMOVE_ONLY || + mode == IWL_RESET_MODE_BACKOFF)) return; if (test_bit(STATUS_TRANS_DEAD, &trans->status)) From 60d418e8540402f4732cce4e8df428e747d79e47 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 20 Apr 2025 09:59:59 +0300 Subject: [PATCH 1050/2924] wifi: iwlwifi: mld: fix BAID validity check Perhaps IWL_FW_CHECK() is a bit misnamed, but it just returns the value of the inner condition. Therefore, the current code skips the actual function when it has the BAID data and makes it crash later when it doesn't. Fix the logic. Fixes: d1e879ec600f ("wifi: iwlwifi: add iwlmld sub-driver") Signed-off-by: Johannes Berg Reviewed-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250420095642.9c0b84c44c3b.Ied236258854b149960eb357ec61bf3a572503fbc@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mld/agg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/agg.c b/drivers/net/wireless/intel/iwlwifi/mld/agg.c index db9e0f04f4b77d..687a9450ac9840 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/agg.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/agg.c @@ -124,9 +124,9 @@ void iwl_mld_handle_bar_frame_release_notif(struct iwl_mld *mld, rcu_read_lock(); baid_data = rcu_dereference(mld->fw_id_to_ba[baid]); - if (!IWL_FW_CHECK(mld, !baid_data, - "Got valid BAID %d but not allocated, invalid BAR release!\n", - baid)) + if (IWL_FW_CHECK(mld, !baid_data, + "Got valid BAID %d but not allocated, invalid BAR release!\n", + baid)) goto out_unlock; if (IWL_FW_CHECK(mld, tid != baid_data->tid || From 15220a257319ffe3bf95796326dfe0aacdbeb1c4 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 20 Apr 2025 10:00:00 +0300 Subject: [PATCH 1051/2924] wifi: iwlwifi: don't warn if the NIC is gone in resume Some BIOSes decide to power gate the WLAN device during S3. Since iwlwifi doesn't expect this, it gets very noisy reporting that the device is no longer available. Wifi is still available because iwlwifi recovers, but it spews scary prints in the log. Fix that by failing gracefully. Fixes: e8bb19c1d590 ("wifi: iwlwifi: support fast resume") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219597 Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250420095642.d8d58146c829.I569ca15eaaa774d633038a749cc6ec7448419714@changeid Signed-off-by: Johannes Berg --- .../net/wireless/intel/iwlwifi/iwl-trans.c | 1 - drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 20 ++++++++++++++++--- .../wireless/intel/iwlwifi/pcie/internal.h | 9 +++++---- .../net/wireless/intel/iwlwifi/pcie/trans.c | 13 +++++++++--- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 2 +- 5 files changed, 33 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c index 6125fe70ce7288..e7b2e08645efa7 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c @@ -555,7 +555,6 @@ void __releases(nic_access) iwl_trans_release_nic_access(struct iwl_trans *trans) { iwl_trans_pcie_release_nic_access(trans); - __release(nic_access); } IWL_EXPORT_SYMBOL(iwl_trans_release_nic_access); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 6c22c95f28d5ed..3a605e7c070e43 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1737,10 +1737,24 @@ static int _iwl_pci_resume(struct device *device, bool restore) * need to reset it completely. * Note: MAC (bits 0:7) will be cleared upon suspend even with wowlan, * so assume that any bits there mean that the device is usable. + * For older devices, just try silently to grab the NIC. */ - if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ && - !iwl_read32(trans, CSR_FUNC_SCRATCH)) - device_was_powered_off = true; + if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) { + if (!iwl_read32(trans, CSR_FUNC_SCRATCH)) + device_was_powered_off = true; + } else { + /* + * bh are re-enabled by iwl_trans_pcie_release_nic_access, + * so re-enable them if _iwl_trans_pcie_grab_nic_access fails. + */ + local_bh_disable(); + if (_iwl_trans_pcie_grab_nic_access(trans, true)) { + iwl_trans_pcie_release_nic_access(trans); + } else { + device_was_powered_off = true; + local_bh_enable(); + } + } if (restore || device_was_powered_off) { trans->state = IWL_TRANS_NO_FW; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index 45460f93d24add..114a9195ad7f74 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -558,10 +558,10 @@ void iwl_trans_pcie_free(struct iwl_trans *trans); void iwl_trans_pcie_free_pnvm_dram_regions(struct iwl_dram_regions *dram_regions, struct device *dev); -bool __iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans); -#define _iwl_trans_pcie_grab_nic_access(trans) \ +bool __iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans, bool silent); +#define _iwl_trans_pcie_grab_nic_access(trans, silent) \ __cond_lock(nic_access_nobh, \ - likely(__iwl_trans_pcie_grab_nic_access(trans))) + likely(__iwl_trans_pcie_grab_nic_access(trans, silent))) void iwl_trans_pcie_check_product_reset_status(struct pci_dev *pdev); void iwl_trans_pcie_check_product_reset_mode(struct pci_dev *pdev); @@ -1105,7 +1105,8 @@ void iwl_trans_pcie_set_bits_mask(struct iwl_trans *trans, u32 reg, int iwl_trans_pcie_read_config32(struct iwl_trans *trans, u32 ofs, u32 *val); bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans); -void iwl_trans_pcie_release_nic_access(struct iwl_trans *trans); +void __releases(nic_access_nobh) +iwl_trans_pcie_release_nic_access(struct iwl_trans *trans); /* transport gen 1 exported functions */ void iwl_trans_pcie_fw_alive(struct iwl_trans *trans, u32 scd_addr); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index b1ccace7377fbc..102a6123bba0e4 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -2406,7 +2406,7 @@ EXPORT_SYMBOL(iwl_trans_pcie_reset); * This version doesn't disable BHs but rather assumes they're * already disabled. */ -bool __iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans) +bool __iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans, bool silent) { int ret; struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); @@ -2458,6 +2458,11 @@ bool __iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans) if (unlikely(ret < 0)) { u32 cntrl = iwl_read32(trans, CSR_GP_CNTRL); + if (silent) { + spin_unlock(&trans_pcie->reg_lock); + return false; + } + WARN_ONCE(1, "Timeout waiting for hardware access (CSR_GP_CNTRL 0x%08x)\n", cntrl); @@ -2489,7 +2494,7 @@ bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans) bool ret; local_bh_disable(); - ret = __iwl_trans_pcie_grab_nic_access(trans); + ret = __iwl_trans_pcie_grab_nic_access(trans, false); if (ret) { /* keep BHs disabled until iwl_trans_pcie_release_nic_access */ return ret; @@ -2498,7 +2503,8 @@ bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans) return false; } -void iwl_trans_pcie_release_nic_access(struct iwl_trans *trans) +void __releases(nic_access_nobh) +iwl_trans_pcie_release_nic_access(struct iwl_trans *trans) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); @@ -2525,6 +2531,7 @@ void iwl_trans_pcie_release_nic_access(struct iwl_trans *trans) * scheduled on different CPUs (after we drop reg_lock). */ out: + __release(nic_access_nobh); spin_unlock_bh(&trans_pcie->reg_lock); } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index bb90bcfc676399..9fc4e98693ebf8 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -1021,7 +1021,7 @@ static int iwl_pcie_set_cmd_in_flight(struct iwl_trans *trans, * returned. This needs to be done only on NICs that have * apmg_wake_up_wa set (see above.) */ - if (!_iwl_trans_pcie_grab_nic_access(trans)) + if (!_iwl_trans_pcie_grab_nic_access(trans, false)) return -EIO; /* From a17821321a9b42f26e77335cd525fee72dc1cd63 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 20 Apr 2025 10:00:01 +0300 Subject: [PATCH 1052/2924] wifi: iwlwifi: fix the check for the SCRATCH register upon resume We can't rely on the SCRATCH register being 0 on platform that power gate the NIC in S3. Even in those platforms, the SCRATCH register is still returning 0x1010000. Make sure that we understand that those platforms have powered off the device. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219597 Fixes: cb347bd29d0d ("wifi: iwlwifi: mvm: fix hibernation") Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250420095642.a7e082ee785c.I9418d76f860f54261cfa89e1f7ac10300904ba40@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/iwl-csr.h | 1 + drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h index be9e464c9b7b08..3ff493e920d284 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h @@ -148,6 +148,7 @@ * during a error FW error. */ #define CSR_FUNC_SCRATCH_INIT_VALUE (0x01010101) +#define CSR_FUNC_SCRATCH_POWER_OFF_MASK 0xFFFF /* Bits for CSR_HW_IF_CONFIG_REG */ #define CSR_HW_IF_CONFIG_REG_MSK_MAC_STEP_DASH (0x0000000F) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 3a605e7c070e43..debeea2b3ae577 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1736,11 +1736,13 @@ static int _iwl_pci_resume(struct device *device, bool restore) * Scratch value was altered, this means the device was powered off, we * need to reset it completely. * Note: MAC (bits 0:7) will be cleared upon suspend even with wowlan, - * so assume that any bits there mean that the device is usable. + * but not bits [15:8]. So if we have bits set in lower word, assume + * the device is alive. * For older devices, just try silently to grab the NIC. */ if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) { - if (!iwl_read32(trans, CSR_FUNC_SCRATCH)) + if (!(iwl_read32(trans, CSR_FUNC_SCRATCH) & + CSR_FUNC_SCRATCH_POWER_OFF_MASK)) device_was_powered_off = true; } else { /* From 0fb15ae3b0a9221be01715dac0335647c79f3362 Mon Sep 17 00:00:00 2001 From: Murad Masimov Date: Fri, 21 Mar 2025 21:52:25 +0300 Subject: [PATCH 1053/2924] wifi: plfxlc: Remove erroneous assert in plfxlc_mac_release plfxlc_mac_release() asserts that mac->lock is held. This assertion is incorrect, because even if it was possible, it would not be the valid behaviour. The function is used when probe fails or after the device is disconnected. In both cases mac->lock can not be held as the driver is not working with the device at the moment. All functions that use mac->lock unlock it just after it was held. There is also no need to hold mac->lock for plfxlc_mac_release() itself, as mac data is not affected, except for mac->flags, which is modified atomically. This bug leads to the following warning: ================================================================ WARNING: CPU: 0 PID: 127 at drivers/net/wireless/purelifi/plfxlc/mac.c:106 plfxlc_mac_release+0x7d/0xa0 Modules linked in: CPU: 0 PID: 127 Comm: kworker/0:2 Not tainted 6.1.124-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 Workqueue: usb_hub_wq hub_event RIP: 0010:plfxlc_mac_release+0x7d/0xa0 drivers/net/wireless/purelifi/plfxlc/mac.c:106 Call Trace: probe+0x941/0xbd0 drivers/net/wireless/purelifi/plfxlc/usb.c:694 usb_probe_interface+0x5c0/0xaf0 drivers/usb/core/driver.c:396 really_probe+0x2ab/0xcb0 drivers/base/dd.c:639 __driver_probe_device+0x1a2/0x3d0 drivers/base/dd.c:785 driver_probe_device+0x50/0x420 drivers/base/dd.c:815 __device_attach_driver+0x2cf/0x510 drivers/base/dd.c:943 bus_for_each_drv+0x183/0x200 drivers/base/bus.c:429 __device_attach+0x359/0x570 drivers/base/dd.c:1015 bus_probe_device+0xba/0x1e0 drivers/base/bus.c:489 device_add+0xb48/0xfd0 drivers/base/core.c:3696 usb_set_configuration+0x19dd/0x2020 drivers/usb/core/message.c:2165 usb_generic_driver_probe+0x84/0x140 drivers/usb/core/generic.c:238 usb_probe_device+0x130/0x260 drivers/usb/core/driver.c:293 really_probe+0x2ab/0xcb0 drivers/base/dd.c:639 __driver_probe_device+0x1a2/0x3d0 drivers/base/dd.c:785 driver_probe_device+0x50/0x420 drivers/base/dd.c:815 __device_attach_driver+0x2cf/0x510 drivers/base/dd.c:943 bus_for_each_drv+0x183/0x200 drivers/base/bus.c:429 __device_attach+0x359/0x570 drivers/base/dd.c:1015 bus_probe_device+0xba/0x1e0 drivers/base/bus.c:489 device_add+0xb48/0xfd0 drivers/base/core.c:3696 usb_new_device+0xbdd/0x18f0 drivers/usb/core/hub.c:2620 hub_port_connect drivers/usb/core/hub.c:5477 [inline] hub_port_connect_change drivers/usb/core/hub.c:5617 [inline] port_event drivers/usb/core/hub.c:5773 [inline] hub_event+0x2efe/0x5730 drivers/usb/core/hub.c:5855 process_one_work+0x8a9/0x11d0 kernel/workqueue.c:2292 worker_thread+0xa47/0x1200 kernel/workqueue.c:2439 kthread+0x28d/0x320 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 ================================================================ Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: 68d57a07bfe5 ("wireless: add plfxlc driver for pureLiFi X, XL, XC devices") Reported-by: syzbot+7d4f142f6c288de8abfe@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7d4f142f6c288de8abfe Signed-off-by: Murad Masimov Link: https://patch.msgid.link/20250321185226.71-2-m.masimov@mt-integration.ru Signed-off-by: Johannes Berg --- drivers/net/wireless/purelifi/plfxlc/mac.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/purelifi/plfxlc/mac.c b/drivers/net/wireless/purelifi/plfxlc/mac.c index eae93efa615044..82d1bf7edba20d 100644 --- a/drivers/net/wireless/purelifi/plfxlc/mac.c +++ b/drivers/net/wireless/purelifi/plfxlc/mac.c @@ -102,7 +102,6 @@ int plfxlc_mac_init_hw(struct ieee80211_hw *hw) void plfxlc_mac_release(struct plfxlc_mac *mac) { plfxlc_chip_release(&mac->chip); - lockdep_assert_held(&mac->lock); } int plfxlc_op_start(struct ieee80211_hw *hw) From 8e089e7b585d95122c8122d732d1d5ef8f879396 Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Tue, 22 Apr 2025 12:22:02 +0800 Subject: [PATCH 1054/2924] wifi: brcm80211: fmac: Add error handling for brcmf_usb_dl_writeimage() The function brcmf_usb_dl_writeimage() calls the function brcmf_usb_dl_cmd() but dose not check its return value. The 'state.state' and the 'state.bytes' are uninitialized if the function brcmf_usb_dl_cmd() fails. It is dangerous to use uninitialized variables in the conditions. Add error handling for brcmf_usb_dl_cmd() to jump to error handling path if the brcmf_usb_dl_cmd() fails and the 'state.state' and the 'state.bytes' are uninitialized. Improve the error message to report more detailed error information. Fixes: 71bb244ba2fd ("brcm80211: fmac: add USB support for bcm43235/6/8 chipsets") Cc: stable@vger.kernel.org # v3.4+ Signed-off-by: Wentao Liang Acked-by: Arend van Spriel Link: https://patch.msgid.link/20250422042203.2259-1-vulab@iscas.ac.cn Signed-off-by: Johannes Berg --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c index 2821c27f317ee0..d06c724f63d9c6 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c @@ -896,14 +896,16 @@ brcmf_usb_dl_writeimage(struct brcmf_usbdev_info *devinfo, u8 *fw, int fwlen) } /* 1) Prepare USB boot loader for runtime image */ - brcmf_usb_dl_cmd(devinfo, DL_START, &state, sizeof(state)); + err = brcmf_usb_dl_cmd(devinfo, DL_START, &state, sizeof(state)); + if (err) + goto fail; rdlstate = le32_to_cpu(state.state); rdlbytes = le32_to_cpu(state.bytes); /* 2) Check we are in the Waiting state */ if (rdlstate != DL_WAITING) { - brcmf_err("Failed to DL_START\n"); + brcmf_err("Invalid DL state: %u\n", rdlstate); err = -EINVAL; goto fail; } From 175e69e33c66904dfe910c5f43edfe5c95b32f0c Mon Sep 17 00:00:00 2001 From: Itamar Shalev Date: Wed, 23 Apr 2025 12:25:02 +0300 Subject: [PATCH 1055/2924] wifi: iwlwifi: restore missing initialization of async_handlers_list The initialization of async_handlers_list was accidentally removed in a previous change. This patch restores the missing initialization to ensure proper handler registration. Fixes: 6895d74c11d8 ("wifi: iwlwifi: mld: initialize regulatory early") Signed-off-by: Itamar Shalev Acked-by: Miri Korenblit Link: https://patch.msgid.link/20250423092503.35206-1-itamar.shalev@intel.com Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mld/mld.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mld.c b/drivers/net/wireless/intel/iwlwifi/mld/mld.c index 4a0842a46a8d4e..73d2166a4c2570 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mld.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mld.c @@ -75,6 +75,7 @@ void iwl_construct_mld(struct iwl_mld *mld, struct iwl_trans *trans, /* Setup async RX handling */ spin_lock_init(&mld->async_handlers_lock); + INIT_LIST_HEAD(&mld->async_handlers_list); wiphy_work_init(&mld->async_handlers_wk, iwl_mld_async_handlers_wk); From cfa00a625f1c730e93f96b5b4ba7c1b4dc286c79 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 6 Dec 2024 19:45:31 +0800 Subject: [PATCH 1056/2924] drm/exynos: Remove unnecessary checking It is not needed since drm_atomic_helper_shutdown checks it. Signed-off-by: Guoqing Jiang Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_drv.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index f313ae7bc3a323..6cc7bf77bcacc6 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -355,8 +355,7 @@ static void exynos_drm_platform_shutdown(struct platform_device *pdev) { struct drm_device *drm = platform_get_drvdata(pdev); - if (drm) - drm_atomic_helper_shutdown(drm); + drm_atomic_helper_shutdown(drm); } static struct platform_driver exynos_drm_platform_driver = { From 0253dadc772e83aaa67aea8bf24a71e7ffe13cb0 Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Thu, 6 Mar 2025 12:27:20 +0800 Subject: [PATCH 1057/2924] drm/exynos/vidi: Remove redundant error handling in vidi_get_modes() In the vidi_get_modes() function, if either drm_edid_dup() or drm_edid_alloc() fails, the function will immediately return 0, indicating that no display modes can be retrieved. However, in the event of failure in these two functions, it is still necessary to call the subsequent drm_edid_connector_update() function with a NULL drm_edid as an argument. This ensures that operations such as connector settings are performed in its callee function, _drm_edid_connector_property_update. To maintain the integrity of the operation, redundant error handling needs to be removed. Signed-off-by: Wentao Liang Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_vidi.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c index 08cf79a6202533..e644e2382d77f4 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c @@ -312,9 +312,6 @@ static int vidi_get_modes(struct drm_connector *connector) else drm_edid = drm_edid_alloc(fake_edid_info, sizeof(fake_edid_info)); - if (!drm_edid) - return 0; - drm_edid_connector_update(connector, drm_edid); count = drm_edid_connector_add_modes(connector); From 30b66dd0523df5153319a2abaa2399c7c76945cb Mon Sep 17 00:00:00 2001 From: Anindya Sundar Gayen Date: Fri, 28 Feb 2025 19:32:57 +0530 Subject: [PATCH 1058/2924] drm/exynos: fixed a spelling error Corrected a spelling mistake in the exynos_drm_fimd driver to improve code readability. No functional changes were made. Signed-off-by: Anindya Sundar Gayen Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_fimd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index 1ad87584b1c2c8..c394cc702d7d42 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -731,7 +731,7 @@ static void fimd_win_set_pixfmt(struct fimd_context *ctx, unsigned int win, /* * Setting dma-burst to 16Word causes permanent tearing for very small * buffers, e.g. cursor buffer. Burst Mode switching which based on - * plane size is not recommended as plane size varies alot towards the + * plane size is not recommended as plane size varies a lot towards the * end of the screen and rapid movement causes unstable DMA, but it is * still better to change dma-burst than displaying garbage. */ From e8de68ba86f4f84d388f2d964eba96c034120a84 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 18 Mar 2025 09:07:38 +0100 Subject: [PATCH 1059/2924] drm/exynos: exynos7_drm_decon: Consstify struct decon_data static 'struct decon_data' is only read, so it can be const for code safety. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Alim Akhtar Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos7_drm_decon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c index 5170f72b08309d..f91daefa9d2bc5 100644 --- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c @@ -43,13 +43,13 @@ struct decon_data { unsigned int wincon_burstlen_shift; }; -static struct decon_data exynos7_decon_data = { +static const struct decon_data exynos7_decon_data = { .vidw_buf_start_base = 0x80, .shadowcon_win_protect_shift = 10, .wincon_burstlen_shift = 11, }; -static struct decon_data exynos7870_decon_data = { +static const struct decon_data exynos7870_decon_data = { .vidw_buf_start_base = 0x880, .shadowcon_win_protect_shift = 8, .wincon_burstlen_shift = 10, From c171ad1e8166ff8b3ab9ac94bad2574167b41f66 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 18 Apr 2025 14:07:00 +0100 Subject: [PATCH 1060/2924] drm/exynos: Fix spelling mistake "enqueu" -> "enqueue" There is a spelling mistake in a DRM_DEV_DEBUG_KMS message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_fimc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c index b150cfd92f6ee9..09e33a26caaff9 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c @@ -908,7 +908,7 @@ static void fimc_dst_set_buf_seq(struct fimc_context *ctx, u32 buf_id, u32 buf_num; u32 cfg; - DRM_DEV_DEBUG_KMS(ctx->dev, "buf_id[%d]enqueu[%d]\n", buf_id, enqueue); + DRM_DEV_DEBUG_KMS(ctx->dev, "buf_id[%d]enqueue[%d]\n", buf_id, enqueue); spin_lock_irqsave(&ctx->lock, flags); From 4ce385f56434f3810ef103e1baea357ddcc6667e Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 22 Apr 2025 15:17:17 +0200 Subject: [PATCH 1061/2924] x86/mm: Fix _pgd_alloc() for Xen PV mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recently _pgd_alloc() was switched from using __get_free_pages() to pagetable_alloc_noprof(), which might return a compound page in case the allocation order is larger than 0. On x86 this will be the case if CONFIG_MITIGATION_PAGE_TABLE_ISOLATION is set, even if PTI has been disabled at runtime. When running as a Xen PV guest (this will always disable PTI), using a compound page for a PGD will result in VM_BUG_ON_PGFLAGS being triggered when the Xen code tries to pin the PGD. Fix the Xen issue together with the not needed 8k allocation for a PGD with PTI disabled by replacing PGD_ALLOCATION_ORDER with an inline helper returning the needed order for PGD allocations. Fixes: a9b3c355c2e6 ("asm-generic: pgalloc: provide generic __pgd_{alloc,free}") Reported-by: Petr Vaněk Signed-off-by: Juergen Gross Signed-off-by: Dave Hansen Tested-by: Petr Vaněk Cc:stable@vger.kernel.org Link: https://lore.kernel.org/all/20250422131717.25724-1-jgross%40suse.com --- arch/x86/include/asm/pgalloc.h | 19 +++++++++++-------- arch/x86/kernel/machine_kexec_32.c | 4 ++-- arch/x86/mm/pgtable.c | 4 ++-- arch/x86/platform/efi/efi_64.c | 4 ++-- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index a33147520044b4..c88691b15f3c67 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -6,6 +6,8 @@ #include /* for struct page */ #include +#include + #define __HAVE_ARCH_PTE_ALLOC_ONE #define __HAVE_ARCH_PGD_FREE #include @@ -29,16 +31,17 @@ static inline void paravirt_release_pud(unsigned long pfn) {} static inline void paravirt_release_p4d(unsigned long pfn) {} #endif -#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION /* - * Instead of one PGD, we acquire two PGDs. Being order-1, it is - * both 8k in size and 8k-aligned. That lets us just flip bit 12 - * in a pointer to swap between the two 4k halves. + * In case of Page Table Isolation active, we acquire two PGDs instead of one. + * Being order-1, it is both 8k in size and 8k-aligned. That lets us just + * flip bit 12 in a pointer to swap between the two 4k halves. */ -#define PGD_ALLOCATION_ORDER 1 -#else -#define PGD_ALLOCATION_ORDER 0 -#endif +static inline unsigned int pgd_allocation_order(void) +{ + if (cpu_feature_enabled(X86_FEATURE_PTI)) + return 1; + return 0; +} /* * Allocate and free page tables. diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 80265162aefff9..1f325304c4a842 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -42,7 +42,7 @@ static void load_segments(void) static void machine_kexec_free_page_tables(struct kimage *image) { - free_pages((unsigned long)image->arch.pgd, PGD_ALLOCATION_ORDER); + free_pages((unsigned long)image->arch.pgd, pgd_allocation_order()); image->arch.pgd = NULL; #ifdef CONFIG_X86_PAE free_page((unsigned long)image->arch.pmd0); @@ -59,7 +59,7 @@ static void machine_kexec_free_page_tables(struct kimage *image) static int machine_kexec_alloc_page_tables(struct kimage *image) { image->arch.pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - PGD_ALLOCATION_ORDER); + pgd_allocation_order()); #ifdef CONFIG_X86_PAE image->arch.pmd0 = (pmd_t *)get_zeroed_page(GFP_KERNEL); image->arch.pmd1 = (pmd_t *)get_zeroed_page(GFP_KERNEL); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index a05fcddfc811af..f7ae44d3dd9e01 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -360,7 +360,7 @@ static inline pgd_t *_pgd_alloc(struct mm_struct *mm) * We allocate one page for pgd. */ if (!SHARED_KERNEL_PMD) - return __pgd_alloc(mm, PGD_ALLOCATION_ORDER); + return __pgd_alloc(mm, pgd_allocation_order()); /* * Now PAE kernel is not running as a Xen domain. We can allocate @@ -380,7 +380,7 @@ static inline void _pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pgd_t *_pgd_alloc(struct mm_struct *mm) { - return __pgd_alloc(mm, PGD_ALLOCATION_ORDER); + return __pgd_alloc(mm, pgd_allocation_order()); } static inline void _pgd_free(struct mm_struct *mm, pgd_t *pgd) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index ac57259a432b8c..a4b4ebd41b8fab 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -73,7 +73,7 @@ int __init efi_alloc_page_tables(void) gfp_t gfp_mask; gfp_mask = GFP_KERNEL | __GFP_ZERO; - efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER); + efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, pgd_allocation_order()); if (!efi_pgd) goto fail; @@ -96,7 +96,7 @@ int __init efi_alloc_page_tables(void) if (pgtable_l5_enabled()) free_page((unsigned long)pgd_page_vaddr(*pgd)); free_pgd: - free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); + free_pages((unsigned long)efi_pgd, pgd_allocation_order()); fail: return -ENOMEM; } From bbe5679f30d7690a9b6838a583b9690ea73fe0e9 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Tue, 15 Apr 2025 14:19:00 +0200 Subject: [PATCH 1062/2924] drm/nouveau: Fix WARN_ON in nouveau_fence_context_kill() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Nouveau is mostly designed in a way that it's expected that fences only ever get signaled through nouveau_fence_signal(). However, in at least one other place, nouveau_fence_done(), can signal fences, too. If that happens (race) a signaled fence remains in the pending list for a while, until it gets removed by nouveau_fence_update(). Should nouveau_fence_context_kill() run in the meantime, this would be a bug because the function would attempt to set an error code on an already signaled fence. Have nouveau_fence_context_kill() check for a fence being signaled. Cc: stable@vger.kernel.org # v5.10+ Fixes: ea13e5abf807 ("drm/nouveau: signal pending fences when channel has been killed") Suggested-by: Christian König Signed-off-by: Philipp Stanner Link: https://lore.kernel.org/r/20250415121900.55719-3-phasta@kernel.org Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nouveau_fence.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 7cc84472cecec2..edddfc036c6d1e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -90,7 +90,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error) while (!list_empty(&fctx->pending)) { fence = list_entry(fctx->pending.next, typeof(*fence), head); - if (error) + if (error && !dma_fence_is_signaled_locked(&fence->base)) dma_fence_set_error(&fence->base, error); if (nouveau_fence_signal(fence)) From da6d7db8b1620521d093a973a0110898f6585ff9 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Wed, 23 Apr 2025 13:57:22 +0800 Subject: [PATCH 1063/2924] ASoC: soc-acpi-intel-ptl-match: add empty item to ptl_cs42l43_l3[] An empty item is required to terminate the look up loop. Fixes: ac5b4a24f16f ("ASoC: Intel: soc-acpi-intel-ptl-match: Add cs42l43 support") Signed-off-by: Bard Liao Reviewed-by: Naveen Manohar Reviewed-by: Ranjani Sridharan Link: https://patch.msgid.link/20250423055722.6920-1-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/common/soc-acpi-intel-ptl-match.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c index 6603d8de501ca5..c599eb43eeb110 100644 --- a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c @@ -431,7 +431,8 @@ static const struct snd_soc_acpi_link_adr ptl_cs42l43_l3[] = { .mask = BIT(3), .num_adr = ARRAY_SIZE(cs42l43_3_adr), .adr_d = cs42l43_3_adr, - } + }, + {} }; static const struct snd_soc_acpi_link_adr ptl_rt722_only[] = { From bfb713ea53c746b07ae69fe97fa9b5388e4f34f9 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 17 Apr 2025 14:55:50 +0100 Subject: [PATCH 1064/2924] perf tools: Fix arm64 build by generating unistd_64.h Since pulling in the kernel changes in commit 22f72088ffe6 ("tools headers: Update the syscall table with the kernel sources"), arm64 is no longer using a generic syscall header and generates one from the syscall table. Therefore we must also generate the syscall header for arm64 before building Perf. Add it as a dependency to libperf which uses one syscall number. Perf uses more, but as libperf is a dependency of Perf it will be generated for both. Future platforms that need this will have to add their own syscall-y targets in libperf manually. Unfortunately the arch specific files that do this (e.g. arch/arm64/include/asm/Kbuild) can't easily be imported into the Perf build. But Perf only needs a subset of the generated files anyway, so redefining them is probably the correct thing to do. Fixes: 22f72088ffe6 ("tools headers: Update the syscall table with the kernel sources") Signed-off-by: James Clark Tested-by: Harshit Mogalapalli Link: https://lore.kernel.org/r/20250417-james-perf-fix-gen-syscall-v1-1-1d268c923901@linaro.org Signed-off-by: Namhyung Kim --- tools/lib/perf/Makefile | 12 +++++++++++- tools/perf/Makefile.config | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile index ffcfd777c45181..1a19b5013f4549 100644 --- a/tools/lib/perf/Makefile +++ b/tools/lib/perf/Makefile @@ -42,6 +42,7 @@ libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) TEST_ARGS := $(if $(V),-v) INCLUDES = \ +-I$(OUTPUT)/../arch/$(SRCARCH)/include/generated/uapi \ -I$(srctree)/tools/lib/perf/include \ -I$(srctree)/tools/lib/ \ -I$(srctree)/tools/include \ @@ -99,7 +100,16 @@ $(LIBAPI)-clean: $(call QUIET_CLEAN, libapi) $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null -$(LIBPERF_IN): FORCE +uapi-asm := $(OUTPUT)/../arch/$(SRCARCH)/include/generated/uapi/asm +ifeq ($(SRCARCH),arm64) + syscall-y := $(uapi-asm)/unistd_64.h +endif +uapi-asm-generic: + $(if $(syscall-y),\ + $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.asm-headers obj=$(uapi-asm) \ + generic=include/uapi/asm-generic $(syscall-y),) + +$(LIBPERF_IN): uapi-asm-generic FORCE $(Q)$(MAKE) $(build)=libperf $(LIBPERF_A): $(LIBPERF_IN) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index eea95c6c0c71f7..a52482654d4b77 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -29,6 +29,7 @@ include $(srctree)/tools/scripts/Makefile.arch $(call detected_var,SRCARCH) CFLAGS += -I$(OUTPUT)arch/$(SRCARCH)/include/generated +CFLAGS += -I$(OUTPUT)arch/$(SRCARCH)/include/generated/uapi # Additional ARCH settings for ppc ifeq ($(SRCARCH),powerpc) From 82efd569a8909f2b13140c1b3de88535aea0b051 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 23 Apr 2025 10:21:29 +0200 Subject: [PATCH 1065/2924] locking/local_lock: fix _Generic() matching of local_trylock_t MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Michael Larabel reported [1] a nginx performance regression in v6.15-rc3 and bisected it to commit 51339d99c013 ("locking/local_lock, mm: replace localtry_ helpers with local_trylock_t type") The problem is the _Generic() usage with a default association that masks the fact that "local_trylock_t *" association is not being selected as expected. Replacing the default with the only other expected type "local_lock_t *" reveals the underlying problem: include/linux/local_lock_internal.h:174:26: error: ‘_Generic’ selector of type ‘__seg_gs local_lock_t *’ is not compatible with any association The local_locki's are part of __percpu structures and thus the __percpu attribute is needed to associate the type properly. Add the attribute and keep the default replaced to turn any further mismatches into compile errors. The failure to recognize local_try_lock_t in __local_lock_release() means that a local_trylock[_irqsave]() operation will set tl->acquired to 1 (there's no _Generic() part in the trylock code), but then local_unlock[_irqrestore]() will not set tl->acquired back to 0, so further trylock operations will always fail on the same cpu+lock, while non-trylock operations continue to work - a lockdep_assert() is also not being executed in the _Generic() part of local_lock() code. This means consume_stock() and refill_stock() operations will fail deterministically, resulting in taking the slow paths and worse performance. Fixes: 51339d99c013 ("locking/local_lock, mm: replace localtry_ helpers with local_trylock_t type") Reported-by: Michael Larabel Closes: https://www.phoronix.com/review/linux-615-nginx-regression/2 [1] Signed-off-by: Vlastimil Babka Acked-by: Alexei Starovoitov Signed-off-by: Linus Torvalds --- include/linux/local_lock_internal.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index bf2bf40d7b18d0..8d5ac16a9b1794 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -102,11 +102,11 @@ do { \ l = (local_lock_t *)this_cpu_ptr(lock); \ tl = (local_trylock_t *)l; \ _Generic((lock), \ - local_trylock_t *: ({ \ + __percpu local_trylock_t *: ({ \ lockdep_assert(tl->acquired == 0); \ WRITE_ONCE(tl->acquired, 1); \ }), \ - default:(void)0); \ + __percpu local_lock_t *: (void)0); \ local_lock_acquire(l); \ } while (0) @@ -171,11 +171,11 @@ do { \ tl = (local_trylock_t *)l; \ local_lock_release(l); \ _Generic((lock), \ - local_trylock_t *: ({ \ + __percpu local_trylock_t *: ({ \ lockdep_assert(tl->acquired == 1); \ WRITE_ONCE(tl->acquired, 0); \ }), \ - default:(void)0); \ + __percpu local_lock_t *: (void)0); \ } while (0) #define __local_unlock(lock) \ From a79be02bba5c31f967885c7f3bf3a756d77d11d9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 23 Apr 2025 10:08:29 -0700 Subject: [PATCH 1066/2924] Fix mis-uses of 'cc-option' for warning disablement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was triggered by one of my mis-uses causing odd build warnings on sparc in linux-next, but while figuring out why the "obviously correct" use of cc-option caused such odd breakage, I found eight other cases of the same thing in the tree. The root cause is that 'cc-option' doesn't work for checking negative warning options (ie things like '-Wno-stringop-overflow') because gcc will silently accept options it doesn't recognize, and so 'cc-option' ends up thinking they are perfectly fine. And it all works, until you have a situation where _another_ warning is emitted. At that point the compiler will go "Hmm, maybe the user intended to disable this warning but used that wrong option that I didn't recognize", and generate a warning for the unrecognized negative option. Which explains why we have several cases of this in the tree: the 'cc-option' test really doesn't work for this situation, but most of the time it simply doesn't matter that ity doesn't work. The reason my recently added case caused problems on sparc was pointed out by Thomas Weißschuh: the sparc build had a previous explicit warning that then triggered the new one. I think the best fix for this would be to make 'cc-option' a bit smarter about this sitation, possibly by adding an intentional warning to the test case that then triggers the unrecognized option warning reliably. But the short-term fix is to replace 'cc-option' with an existing helper designed for this exact case: 'cc-disable-warning', which picks the negative warning but uses the positive form for testing the compiler support. Reported-by: Stephen Rothwell Link: https://lore.kernel.org/all/20250422204718.0b4e3f81@canb.auug.org.au/ Explained-by: Thomas Weißschuh Signed-off-by: Linus Torvalds --- Makefile | 4 ++-- arch/loongarch/kernel/Makefile | 8 ++++---- arch/loongarch/kvm/Makefile | 2 +- arch/riscv/kernel/Makefile | 4 ++-- scripts/Makefile.extrawarn | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index e94bbb2298c8d6..07f818186151a1 100644 --- a/Makefile +++ b/Makefile @@ -1053,11 +1053,11 @@ NOSTDINC_FLAGS += -nostdinc KBUILD_CFLAGS += $(call cc-option, -fstrict-flex-arrays=3) #Currently, disable -Wstringop-overflow for GCC 11, globally. -KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERFLOW) += $(call cc-option, -Wno-stringop-overflow) +KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERFLOW) += $(call cc-disable-warning, stringop-overflow) KBUILD_CFLAGS-$(CONFIG_CC_STRINGOP_OVERFLOW) += $(call cc-option, -Wstringop-overflow) #Currently, disable -Wunterminated-string-initialization as broken -KBUILD_CFLAGS += $(call cc-option, -Wno-unterminated-string-initialization) +KBUILD_CFLAGS += $(call cc-disable-warning, unterminated-string-initialization) # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += -fno-strict-overflow diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 4853e8b04c6fbe..f9dcaa60033d95 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -21,10 +21,10 @@ obj-$(CONFIG_CPU_HAS_LBT) += lbt.o obj-$(CONFIG_ARCH_STRICT_ALIGN) += unaligned.o -CFLAGS_module.o += $(call cc-option,-Wno-override-init,) -CFLAGS_syscall.o += $(call cc-option,-Wno-override-init,) -CFLAGS_traps.o += $(call cc-option,-Wno-override-init,) -CFLAGS_perf_event.o += $(call cc-option,-Wno-override-init,) +CFLAGS_module.o += $(call cc-disable-warning, override-init) +CFLAGS_syscall.o += $(call cc-disable-warning, override-init) +CFLAGS_traps.o += $(call cc-disable-warning, override-init) +CFLAGS_perf_event.o += $(call cc-disable-warning, override-init) ifdef CONFIG_FUNCTION_TRACER ifndef CONFIG_DYNAMIC_FTRACE diff --git a/arch/loongarch/kvm/Makefile b/arch/loongarch/kvm/Makefile index f4c8e35c216a4b..cb41d9265662f4 100644 --- a/arch/loongarch/kvm/Makefile +++ b/arch/loongarch/kvm/Makefile @@ -21,4 +21,4 @@ kvm-y += intc/eiointc.o kvm-y += intc/pch_pic.o kvm-y += irqfd.o -CFLAGS_exit.o += $(call cc-option,-Wno-override-init,) +CFLAGS_exit.o += $(call cc-disable-warning, override-init) diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 8d186bfced451e..f7480c9c6f8d73 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -9,8 +9,8 @@ CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_sbi.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE) endif -CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,) -CFLAGS_compat_syscall_table.o += $(call cc-option,-Wno-override-init,) +CFLAGS_syscall_table.o += $(call cc-disable-warning, override-init) +CFLAGS_compat_syscall_table.o += $(call cc-disable-warning, override-init) ifdef CONFIG_KEXEC_CORE AFLAGS_kexec_relocate.o := -mcmodel=medany $(call cc-option,-mno-relax) diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index d75897559d1840..2d6e59561c9d04 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -15,7 +15,7 @@ KBUILD_CFLAGS += -Werror=return-type KBUILD_CFLAGS += -Werror=strict-prototypes KBUILD_CFLAGS += -Wno-format-security KBUILD_CFLAGS += -Wno-trigraphs -KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,) +KBUILD_CFLAGS += $(call cc-disable-warning, frame-address) KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) KBUILD_CFLAGS += -Wmissing-declarations KBUILD_CFLAGS += -Wmissing-prototypes From f2858f308131a09e33afb766cd70119b5b900569 Mon Sep 17 00:00:00 2001 From: Ihor Solodrai Date: Wed, 16 Apr 2025 10:02:46 -0700 Subject: [PATCH 1067/2924] selftests/bpf: Mitigate sockmap_ktls disconnect_after_delete failure "sockmap_ktls disconnect_after_delete" test has been failing on BPF CI after recent merges from netdev: * https://github.com/kernel-patches/bpf/actions/runs/14458537639 * https://github.com/kernel-patches/bpf/actions/runs/14457178732 It happens because disconnect has been disabled for TLS [1], and it renders the test case invalid. Removing all the test code creates a conflict between bpf and bpf-next, so for now only remove the offending assert [2]. The test will be removed later on bpf-next. [1] https://lore.kernel.org/netdev/20250404180334.3224206-1-kuba@kernel.org/ [2] https://lore.kernel.org/bpf/cfc371285323e1a3f3b006bfcf74e6cf7ad65258@linux.dev/ Signed-off-by: Ihor Solodrai Signed-off-by: Andrii Nakryiko Reviewed-by: Jiayuan Chen Link: https://lore.kernel.org/bpf/20250416170246.2438524-1-ihor.solodrai@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c index 2d0796314862ac..0a99fd404f6dc0 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -68,7 +68,6 @@ static void test_sockmap_ktls_disconnect_after_delete(int family, int map) goto close_cli; err = disconnect(cli); - ASSERT_OK(err, "disconnect"); close_cli: close(cli); From c0e473a0d226479e8e925d5ba93f751d8df628e9 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 23 Apr 2025 12:53:42 -0700 Subject: [PATCH 1068/2924] block: fix race between set_blocksize and read paths With the new large sector size support, it's now the case that set_blocksize can change i_blksize and the folio order in a manner that conflicts with a concurrent reader and causes a kernel crash. Specifically, let's say that udev-worker calls libblkid to detect the labels on a block device. The read call can create an order-0 folio to read the first 4096 bytes from the disk. But then udev is preempted. Next, someone tries to mount an 8k-sectorsize filesystem from the same block device. The filesystem calls set_blksize, which sets i_blksize to 8192 and the minimum folio order to 1. Now udev resumes, still holding the order-0 folio it allocated. It then tries to schedule a read bio and do_mpage_readahead tries to create bufferheads for the folio. Unfortunately, blocks_per_folio == 0 because the page size is 4096 but the blocksize is 8192 so no bufferheads are attached and the bh walk never sets bdev. We then submit the bio with a NULL block device and crash. Therefore, truncate the page cache after flushing but before updating i_blksize. However, that's not enough -- we also need to lock out file IO and page faults during the update. Take both the i_rwsem and the invalidate_lock in exclusive mode for invalidations, and in shared mode for read/write operations. I don't know if this is the correct fix, but xfs/259 found it. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Luis Chamberlain Tested-by: Shin'ichiro Kawasaki Link: https://lore.kernel.org/r/174543795699.4139148.2086129139322431423.stgit@frogsfrogsfrogs Signed-off-by: Jens Axboe --- block/bdev.c | 17 +++++++++++++++++ block/blk-zoned.c | 5 ++++- block/fops.c | 16 ++++++++++++++++ block/ioctl.c | 6 ++++++ 4 files changed, 43 insertions(+), 1 deletion(-) diff --git a/block/bdev.c b/block/bdev.c index 4844d1e27b6fbc..3c856e56d5df58 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -169,10 +169,27 @@ int set_blocksize(struct file *file, int size) /* Don't change the size if it is same as current */ if (inode->i_blkbits != blksize_bits(size)) { + /* + * Flush and truncate the pagecache before we reconfigure the + * mapping geometry because folio sizes are variable now. If a + * reader has already allocated a folio whose size is smaller + * than the new min_order but invokes readahead after the new + * min_order becomes visible, readahead will think there are + * "zero" blocks per folio and crash. Take the inode and + * invalidation locks to avoid racing with + * read/write/fallocate. + */ + inode_lock(inode); + filemap_invalidate_lock(inode->i_mapping); + sync_blockdev(bdev); + kill_bdev(bdev); + inode->i_blkbits = blksize_bits(size); mapping_set_folio_min_order(inode->i_mapping, get_order(size)); kill_bdev(bdev); + filemap_invalidate_unlock(inode->i_mapping); + inode_unlock(inode); } return 0; } diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 0c77244a35c92e..8f15d1aa6eb89a 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -343,6 +343,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode, op = REQ_OP_ZONE_RESET; /* Invalidate the page cache, including dirty pages. */ + inode_lock(bdev->bd_mapping->host); filemap_invalidate_lock(bdev->bd_mapping); ret = blkdev_truncate_zone_range(bdev, mode, &zrange); if (ret) @@ -364,8 +365,10 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode, ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors); fail: - if (cmd == BLKRESETZONE) + if (cmd == BLKRESETZONE) { filemap_invalidate_unlock(bdev->bd_mapping); + inode_unlock(bdev->bd_mapping->host); + } return ret; } diff --git a/block/fops.c b/block/fops.c index be9f1dbea9ce0a..e221fdcaa8aaf8 100644 --- a/block/fops.c +++ b/block/fops.c @@ -746,7 +746,14 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = direct_write_fallback(iocb, from, ret, blkdev_buffered_write(iocb, from)); } else { + /* + * Take i_rwsem and invalidate_lock to avoid racing with + * set_blocksize changing i_blkbits/folio order and punching + * out the pagecache. + */ + inode_lock_shared(bd_inode); ret = blkdev_buffered_write(iocb, from); + inode_unlock_shared(bd_inode); } if (ret > 0) @@ -757,6 +764,7 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) { + struct inode *bd_inode = bdev_file_inode(iocb->ki_filp); struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host); loff_t size = bdev_nr_bytes(bdev); loff_t pos = iocb->ki_pos; @@ -793,7 +801,13 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) goto reexpand; } + /* + * Take i_rwsem and invalidate_lock to avoid racing with set_blocksize + * changing i_blkbits/folio order and punching out the pagecache. + */ + inode_lock_shared(bd_inode); ret = filemap_read(iocb, to, ret); + inode_unlock_shared(bd_inode); reexpand: if (unlikely(shorted)) @@ -836,6 +850,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, if ((start | len) & (bdev_logical_block_size(bdev) - 1)) return -EINVAL; + inode_lock(inode); filemap_invalidate_lock(inode->i_mapping); /* @@ -868,6 +883,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, fail: filemap_invalidate_unlock(inode->i_mapping); + inode_unlock(inode); return error; } diff --git a/block/ioctl.c b/block/ioctl.c index faa40f383e2736..e472cc1030c60c 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -142,6 +142,7 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, if (err) return err; + inode_lock(bdev->bd_mapping->host); filemap_invalidate_lock(bdev->bd_mapping); err = truncate_bdev_range(bdev, mode, start, start + len - 1); if (err) @@ -174,6 +175,7 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, blk_finish_plug(&plug); fail: filemap_invalidate_unlock(bdev->bd_mapping); + inode_unlock(bdev->bd_mapping->host); return err; } @@ -199,12 +201,14 @@ static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode, end > bdev_nr_bytes(bdev)) return -EINVAL; + inode_lock(bdev->bd_mapping->host); filemap_invalidate_lock(bdev->bd_mapping); err = truncate_bdev_range(bdev, mode, start, end - 1); if (!err) err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9, GFP_KERNEL); filemap_invalidate_unlock(bdev->bd_mapping); + inode_unlock(bdev->bd_mapping->host); return err; } @@ -236,6 +240,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode, return -EINVAL; /* Invalidate the page cache, including dirty pages */ + inode_lock(bdev->bd_mapping->host); filemap_invalidate_lock(bdev->bd_mapping); err = truncate_bdev_range(bdev, mode, start, end); if (err) @@ -246,6 +251,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode, fail: filemap_invalidate_unlock(bdev->bd_mapping); + inode_unlock(bdev->bd_mapping->host); return err; } From e03463d247ddac66e71143468373df3d74a3a6bd Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 23 Apr 2025 12:53:57 -0700 Subject: [PATCH 1069/2924] block: hoist block size validation code to a separate function Hoist the block size validation code to bdev_validate_blocksize so that we can call it from filesystems that don't care about the bdev pagecache manipulations of set_blocksize. Signed-off-by: Darrick J. Wong Reviewed-by: Luis Chamberlain Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/174543795720.4139148.840349813093799165.stgit@frogsfrogsfrogs Signed-off-by: Jens Axboe --- block/bdev.c | 33 +++++++++++++++++++++++++++------ include/linux/blkdev.h | 1 + 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/block/bdev.c b/block/bdev.c index 3c856e56d5df58..e29b3b6d1707b8 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -152,18 +152,39 @@ static void set_init_blocksize(struct block_device *bdev) get_order(bsize)); } -int set_blocksize(struct file *file, int size) +/** + * bdev_validate_blocksize - check that this block size is acceptable + * @bdev: blockdevice to check + * @block_size: block size to check + * + * For block device users that do not use buffer heads or the block device + * page cache, make sure that this block size can be used with the device. + * + * Return: On success zero is returned, negative error code on failure. + */ +int bdev_validate_blocksize(struct block_device *bdev, int block_size) { - struct inode *inode = file->f_mapping->host; - struct block_device *bdev = I_BDEV(inode); - - if (blk_validate_block_size(size)) + if (blk_validate_block_size(block_size)) return -EINVAL; /* Size cannot be smaller than the size supported by the device */ - if (size < bdev_logical_block_size(bdev)) + if (block_size < bdev_logical_block_size(bdev)) return -EINVAL; + return 0; +} +EXPORT_SYMBOL_GPL(bdev_validate_blocksize); + +int set_blocksize(struct file *file, int size) +{ + struct inode *inode = file->f_mapping->host; + struct block_device *bdev = I_BDEV(inode); + int ret; + + ret = bdev_validate_blocksize(bdev, size); + if (ret) + return ret; + if (!file->private_data) return -EINVAL; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e39c45bc0a97ee..c6b478cfb32d68 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1614,6 +1614,7 @@ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) return bio_end_io_acct_remapped(bio, start_time, bio->bi_bdev); } +int bdev_validate_blocksize(struct block_device *bdev, int block_size); int set_blocksize(struct file *file, int size); int lookup_bdev(const char *pathname, dev_t *dev); From 5533bc70aedc7c9872841ac8649344f8cbc6bc4c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 22 Apr 2025 07:59:41 +0800 Subject: [PATCH 1070/2924] selftests: ublk: fix recover test When adding recovery test: - 'break' is missed for handling '-g' argument - test name of test_generic_05.sh is wrong So fix the two. Fixes: 57e13a2e8cd2 ("selftests: ublk: support user recovery") Signed-off-by: Ming Lei Reviewed-by: Uday Shankar Link: https://lore.kernel.org/r/20250421235947.715272-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.c | 1 + tools/testing/selftests/ublk/test_generic_05.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 759f0663714613..e57a1486bb48d8 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -1354,6 +1354,7 @@ int main(int argc, char *argv[]) value = strtol(optarg, NULL, 10); if (value) ctx.flags |= UBLK_F_NEED_GET_DATA; + break; case 0: if (!strcmp(longopts[option_idx].name, "debug_mask")) ublk_dbg_mask = strtol(optarg, NULL, 16); diff --git a/tools/testing/selftests/ublk/test_generic_05.sh b/tools/testing/selftests/ublk/test_generic_05.sh index 714630b4b3295f..3bb00a34740201 100755 --- a/tools/testing/selftests/ublk/test_generic_05.sh +++ b/tools/testing/selftests/ublk/test_generic_05.sh @@ -3,7 +3,7 @@ . "$(cd "$(dirname "$0")" && pwd)"/test_common.sh -TID="generic_04" +TID="generic_05" ERR_CODE=0 ublk_run_recover_test() From 8f503637898313c048bf21e386e09be90e30cc31 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 22 Apr 2025 07:59:42 +0800 Subject: [PATCH 1071/2924] selftests: ublk: remove useless 'delay_us' from 'struct dev_ctx' 'delay_us' shouldn't be added to 'struct dev_ctx' since now it is handled by per-target command line & 'struct fault_inject_ctx'. So remove it. Fixes: 81586652bb1f ("selftests: ublk: add generic_06 for covering fault inject") Signed-off-by: Ming Lei Reviewed-by: Uday Shankar Link: https://lore.kernel.org/r/20250421235947.715272-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 29571eb296f166..918db5cd633fc1 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -86,9 +86,6 @@ struct dev_ctx { unsigned int fg:1; unsigned int recovery:1; - /* fault_inject */ - long long delay_us; - int _evtfd; int _shmid; From 442cacac2d9935a0698332a568afcb5c6ab8be17 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 16 Apr 2025 16:28:26 +0200 Subject: [PATCH 1072/2924] misc: pci_endpoint_test: Defer IRQ allocation until ioctl(PCITEST_SET_IRQTYPE) Commit a402006d48a9 ("misc: pci_endpoint_test: Remove global 'irq_type' and 'no_msi'") changed so that the default IRQ vector requested by pci_endpoint_test_probe() was no longer the module param 'irq_type', but instead test->irq_type. test->irq_type is by default IRQ_TYPE_UNDEFINED (until someone calls ioctl(PCITEST_SET_IRQTYPE)). However, the commit also changed so that after initializing test->irq_type to IRQ_TYPE_UNDEFINED, it also overrides it with driver_data->irq_type, if the PCI device and vendor ID provides driver_data. This causes a regression for PCI device and vendor IDs that do not provide driver_data, and the host side pci_endpoint_test_driver driver failed to probe on such platforms: pci-endpoint-test 0001:01:00.0: Invalid IRQ type selected pci-endpoint-test 0001:01:00.0: probe with driver pci-endpoint-test failed with error -22 Considering that the pci endpoint selftests and the old pcitest.sh always call ioctl(PCITEST_SET_IRQTYPE) before performing any test that requires IRQs, fix the regression by removing the allocation of IRQs in pci_endpoint_test_probe(). The IRQ allocation will occur when ioctl(PCITEST_SET_IRQTYPE) is called. A positive side effect of this is that even if the endpoint controller has issues with IRQs, the user can do still do all the tests/ioctls() that do not require working IRQs, e.g. PCITEST_BAR and PCITEST_BARS. This also means that we can remove the now unused irq_type from driver_data. The irq_type will always be the one configured by the user using ioctl(PCITEST_SET_IRQTYPE). (A user that does not know, or care which irq_type that is used, can use PCITEST_IRQ_TYPE_AUTO. This has superseded the need for a default irq_type in driver_data.) [bhelgaas: add probe failure details] Fixes: a402006d48a9c ("misc: pci_endpoint_test: Remove global 'irq_type' and 'no_msi'") Signed-off-by: Niklas Cassel Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Tested-by: Frank Li Reviewed-by: Manivannan Sadhasivam Reviewed-by: Frank Li Link: https://patch.msgid.link/20250416142825.336554-2-cassel@kernel.org --- drivers/misc/pci_endpoint_test.c | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c index d294850a35a12c..c4e5e2c977be27 100644 --- a/drivers/misc/pci_endpoint_test.c +++ b/drivers/misc/pci_endpoint_test.c @@ -122,7 +122,6 @@ struct pci_endpoint_test { struct pci_endpoint_test_data { enum pci_barno test_reg_bar; size_t alignment; - int irq_type; }; static inline u32 pci_endpoint_test_readl(struct pci_endpoint_test *test, @@ -948,7 +947,6 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, test_reg_bar = data->test_reg_bar; test->test_reg_bar = test_reg_bar; test->alignment = data->alignment; - test->irq_type = data->irq_type; } init_completion(&test->irq_raised); @@ -970,10 +968,6 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, pci_set_master(pdev); - ret = pci_endpoint_test_alloc_irq_vectors(test, test->irq_type); - if (ret) - goto err_disable_irq; - for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) { if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) { base = pci_ioremap_bar(pdev, bar); @@ -1009,10 +1003,6 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, goto err_ida_remove; } - ret = pci_endpoint_test_request_irq(test); - if (ret) - goto err_kfree_test_name; - pci_endpoint_test_get_capabilities(test); misc_device = &test->miscdev; @@ -1020,7 +1010,7 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, misc_device->name = kstrdup(name, GFP_KERNEL); if (!misc_device->name) { ret = -ENOMEM; - goto err_release_irq; + goto err_kfree_test_name; } misc_device->parent = &pdev->dev; misc_device->fops = &pci_endpoint_test_fops; @@ -1036,9 +1026,6 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, err_kfree_name: kfree(misc_device->name); -err_release_irq: - pci_endpoint_test_release_irq(test); - err_kfree_test_name: kfree(test->name); @@ -1051,8 +1038,6 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, pci_iounmap(pdev, test->bar[bar]); } -err_disable_irq: - pci_endpoint_test_free_irq_vectors(test); pci_release_regions(pdev); err_disable_pdev: @@ -1092,23 +1077,19 @@ static void pci_endpoint_test_remove(struct pci_dev *pdev) static const struct pci_endpoint_test_data default_data = { .test_reg_bar = BAR_0, .alignment = SZ_4K, - .irq_type = PCITEST_IRQ_TYPE_MSI, }; static const struct pci_endpoint_test_data am654_data = { .test_reg_bar = BAR_2, .alignment = SZ_64K, - .irq_type = PCITEST_IRQ_TYPE_MSI, }; static const struct pci_endpoint_test_data j721e_data = { .alignment = 256, - .irq_type = PCITEST_IRQ_TYPE_MSI, }; static const struct pci_endpoint_test_data rk3588_data = { .alignment = SZ_64K, - .irq_type = PCITEST_IRQ_TYPE_MSI, }; /* From 13b4ece33cf9def67966bb8716783c42cec20617 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Mon, 21 Apr 2025 19:07:13 +0200 Subject: [PATCH 1073/2924] mptcp: pm: Defer freeing of MPTCP userspace path manager entries When path manager entries are deleted from the local address list, they are first unlinked from the address list using list_del_rcu(). The entries must not be freed until after the RCU grace period, but the existing code immediately frees the entry. Use kfree_rcu_mightsleep() and adjust sk_omem_alloc in open code instead of using the sock_kfree_s() helper. This code path is only called in a netlink handler, so the "might sleep" function is preferable to adding a rarely-used rcu_head member to struct mptcp_pm_addr_entry. Fixes: 88d097316371 ("mptcp: drop free_list for deleting entries") Cc: stable@vger.kernel.org Signed-off-by: Mat Martineau Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250421-net-mptcp-pm-defer-freeing-v1-1-e731dc6e86b9@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_userspace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 2cb62f026b1f44..a715dcbe0146ed 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -337,7 +337,11 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) release_sock(sk); - sock_kfree_s(sk, match, sizeof(*match)); + kfree_rcu_mightsleep(match); + /* Adjust sk_omem_alloc like sock_kfree_s() does, to match + * with allocation of this memory by sock_kmemdup() + */ + atomic_sub(sizeof(*match), &sk->sk_omem_alloc); err = 0; out: From ce72fea219c13c6485503928181c547d0e26756b Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 21 Apr 2025 19:07:14 +0200 Subject: [PATCH 1074/2924] selftests: mptcp: diag: use mptcp_lib_get_info_value When running diag.sh in a loop, chk_dump_one will report the following "grep: write error": 13 ....chk 2 cestab [ OK ] grep: write error 14 ....chk dump_one [ OK ] 15 ....chk 2->0 msk in use after flush [ OK ] 16 ....chk 2->0 cestab after flush [ OK ] This error is caused by a broken pipe. When the output of 'ss' is processed by grep, 'head -n 1' will exit immediately after getting the first line, causing the subsequent pipe to close. At this time, if 'grep' is still trying to write data to the closed pipe, it will trigger a SIGPIPE signal, causing a write error. One solution is not to use this problematic "head -n 1" command, but to use mptcp_lib_get_info_value() helper defined in mptcp_lib.sh to get the value of 'token'. Fixes: ba2400166570 ("selftests: mptcp: add a test for mptcp_diag_dump_one") Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Tested-by: Gang Yan Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250421-net-mptcp-pm-defer-freeing-v1-2-e731dc6e86b9@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/diag.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 4f55477ffe0877..e7a75341f0f323 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -206,9 +206,8 @@ chk_dump_one() local token local msg - ss_token="$(ss -inmHMN $ns | grep 'token:' |\ - head -n 1 |\ - sed 's/.*token:\([0-9a-f]*\).*/\1/')" + ss_token="$(ss -inmHMN $ns | + mptcp_lib_get_info_value "token" "token")" token="$(ip netns exec $ns ./mptcp_diag -t $ss_token |\ awk -F':[ \t]+' '/^token/ {print $2}')" From 3df275ef0a6ae181e8428a6589ef5d5231e58b5c Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 17 Apr 2025 11:47:30 -0700 Subject: [PATCH 1075/2924] net_sched: hfsc: Fix a UAF vulnerability in class handling This patch fixes a Use-After-Free vulnerability in the HFSC qdisc class handling. The issue occurs due to a time-of-check/time-of-use condition in hfsc_change_class() when working with certain child qdiscs like netem or codel. The vulnerability works as follows: 1. hfsc_change_class() checks if a class has packets (q.qlen != 0) 2. It then calls qdisc_peek_len(), which for certain qdiscs (e.g., codel, netem) might drop packets and empty the queue 3. The code continues assuming the queue is still non-empty, adding the class to vttree 4. This breaks HFSC scheduler assumptions that only non-empty classes are in vttree 5. Later, when the class is destroyed, this can lead to a Use-After-Free The fix adds a second queue length check after qdisc_peek_len() to verify the queue wasn't emptied. Fixes: 21f4d5cc25ec ("net_sched/hfsc: fix curve activation in hfsc_change_class()") Reported-by: Gerrard Tai Reviewed-by: Konstantin Khlebnikov Signed-off-by: Cong Wang Reviewed-by: Jamal Hadi Salim Link: https://patch.msgid.link/20250417184732.943057-2-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/sch_hfsc.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index ce5045eea06568..b368ac0595d5c0 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -961,6 +961,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl != NULL) { int old_flags; + int len = 0; if (parentid) { if (cl->cl_parent && @@ -991,9 +992,13 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (usc != NULL) hfsc_change_usc(cl, usc, cur_time); + if (cl->qdisc->q.qlen != 0) + len = qdisc_peek_len(cl->qdisc); + /* Check queue length again since some qdisc implementations + * (e.g., netem/codel) might empty the queue during the peek + * operation. + */ if (cl->qdisc->q.qlen != 0) { - int len = qdisc_peek_len(cl->qdisc); - if (cl->cl_flags & HFSC_RSC) { if (old_flags & HFSC_RSC) update_ed(cl, len); From 6ccbda44e2cc3d26fd22af54c650d6d5d801addf Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 17 Apr 2025 11:47:31 -0700 Subject: [PATCH 1076/2924] net_sched: hfsc: Fix a potential UAF in hfsc_dequeue() too Similarly to the previous patch, we need to safe guard hfsc_dequeue() too. But for this one, we don't have a reliable reproducer. Fixes: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 ("Linux-2.6.12-rc2") Reported-by: Gerrard Tai Signed-off-by: Cong Wang Reviewed-by: Jamal Hadi Salim Link: https://patch.msgid.link/20250417184732.943057-3-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/sch_hfsc.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index b368ac0595d5c0..6c8ef826cec0b6 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1641,10 +1641,16 @@ hfsc_dequeue(struct Qdisc *sch) if (cl->qdisc->q.qlen != 0) { /* update ed */ next_len = qdisc_peek_len(cl->qdisc); - if (realtime) - update_ed(cl, next_len); - else - update_d(cl, next_len); + /* Check queue length again since some qdisc implementations + * (e.g., netem/codel) might empty the queue during the peek + * operation. + */ + if (cl->qdisc->q.qlen != 0) { + if (realtime) + update_ed(cl, next_len); + else + update_d(cl, next_len); + } } else { /* the class becomes passive */ eltree_remove(cl); From 7629d1a04ad2e76709401b655263040486972c2c Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 17 Apr 2025 11:47:32 -0700 Subject: [PATCH 1077/2924] selftests/tc-testing: Add test for HFSC queue emptying during peek operation Add a selftest to exercise the condition where qdisc implementations like netem or codel might empty the queue during a peek operation. This tests the defensive code path in HFSC that checks the queue length again after peeking to handle this case. Based on the reproducer from Gerrard, improved by Jamal. Reported-by: Gerrard Tai Signed-off-by: Cong Wang Tested-by: Victor Nogueira Reviewed-by: Jamal Hadi Salim Link: https://patch.msgid.link/20250417184732.943057-4-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/qdiscs.json | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index d4ea9cd845a33a..e26bbc1697832f 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -313,5 +313,44 @@ "$TC qdisc del dev $DUMMY handle 1: root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "a4c3", + "name": "Test HFSC with netem/blackhole - queue emptying during peek operation", + "category": [ + "qdisc", + "hfsc", + "netem", + "blackhole" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root drr", + "$TC class add dev $DUMMY parent 1:0 classid 1:1 drr", + "$TC class add dev $DUMMY parent 1:0 classid 1:2 drr", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 plug limit 1024", + "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 hfsc default 1", + "$TC class add dev $DUMMY parent 3:0 classid 3:1 hfsc rt m1 5Mbit d 10ms m2 10Mbit", + "$TC qdisc add dev $DUMMY parent 3:1 handle 4:0 netem delay 1ms", + "$TC qdisc add dev $DUMMY parent 4:1 handle 5:0 blackhole", + "ping -c 3 -W 0.01 -i 0.001 -s 1 10.10.10.10 -I $DUMMY > /dev/null 2>&1 || true", + "$TC class change dev $DUMMY parent 3:0 classid 3:1 hfsc sc m1 5Mbit d 10ms m2 10Mbit", + "$TC class del dev $DUMMY parent 3:0 classid 3:1", + "$TC class add dev $DUMMY parent 3:0 classid 3:1 hfsc rt m1 5Mbit d 10ms m2 10Mbit", + "ping -c 3 -W 0.01 -i 0.001 -s 1 10.10.10.10 -I $DUMMY > /dev/null 2>&1 || true" + ], + "cmdUnderTest": "$TC class change dev $DUMMY parent 3:0 classid 3:1 hfsc sc m1 5Mbit d 10ms m2 10Mbit", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "qdisc hfsc 3:.*parent 1:2.*default 1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] } ] From 497041d763016c2e8314d2f6a329a9b77c3797ca Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Tue, 22 Apr 2025 04:10:20 +0100 Subject: [PATCH 1078/2924] net: dsa: mt7530: sync driver-specific behavior of MT7531 variants MT7531 standalone and MMIO variants found in MT7988 and EN7581 share most basic properties. Despite that, assisted_learning_on_cpu_port and mtu_enforcement_ingress were only applied for MT7531 but not for MT7988 or EN7581, causing the expected issues on MMIO devices. Apply both settings equally also for MT7988 and EN7581 by moving both assignments form mt7531_setup() to mt7531_setup_common(). This fixes unwanted flooding of packets due to unknown unicast during DA lookup, as well as issues with heterogenous MTU settings. Fixes: 7f54cc9772ce ("net: dsa: mt7530: split-off common parts from mt7531_setup") Signed-off-by: Daniel Golle Reviewed-by: Chester A. Unal Link: https://patch.msgid.link/89ed7ec6d4fa0395ac53ad2809742bb1ce61ed12.1745290867.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mt7530.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index d70399bce5b9ee..c5d6628d798075 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2419,6 +2419,9 @@ mt7531_setup_common(struct dsa_switch *ds) struct mt7530_priv *priv = ds->priv; int ret, i; + ds->assisted_learning_on_cpu_port = true; + ds->mtu_enforcement_ingress = true; + mt753x_trap_frames(priv); /* Enable and reset MIB counters */ @@ -2571,9 +2574,6 @@ mt7531_setup(struct dsa_switch *ds) if (ret) return ret; - ds->assisted_learning_on_cpu_port = true; - ds->mtu_enforcement_ingress = true; - return 0; } From d9e2f070d8af60f2c8c02b2ddf0a9e90b4e9220c Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 21 Apr 2025 10:46:03 -0700 Subject: [PATCH 1079/2924] pds_core: Prevent possible adminq overflow/stuck condition The pds_core's adminq is protected by the adminq_lock, which prevents more than 1 command to be posted onto it at any one time. This makes it so the client drivers cannot simultaneously post adminq commands. However, the completions happen in a different context, which means multiple adminq commands can be posted sequentially and all waiting on completion. On the FW side, the backing adminq request queue is only 16 entries long and the retry mechanism and/or overflow/stuck prevention is lacking. This can cause the adminq to get stuck, so commands are no longer processed and completions are no longer sent by the FW. As an initial fix, prevent more than 16 outstanding adminq commands so there's no way to cause the adminq from getting stuck. This works because the backing adminq request queue will never have more than 16 pending adminq commands, so it will never overflow. This is done by reducing the adminq depth to 16. Fixes: 45d76f492938 ("pds_core: set up device and adminq") Reviewed-by: Michal Swiatkowski Reviewed-by: Simon Horman Signed-off-by: Brett Creeley Signed-off-by: Shannon Nelson Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250421174606.3892-2-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/core.c | 5 +---- drivers/net/ethernet/amd/pds_core/core.h | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index 1eb0d92786f715..55163457f12bec 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -325,10 +325,7 @@ static int pdsc_core_init(struct pdsc *pdsc) size_t sz; int err; - /* Scale the descriptor ring length based on number of CPUs and VFs */ - numdescs = max_t(int, PDSC_ADMINQ_MIN_LENGTH, num_online_cpus()); - numdescs += 2 * pci_sriov_get_totalvfs(pdsc->pdev); - numdescs = roundup_pow_of_two(numdescs); + numdescs = PDSC_ADMINQ_MAX_LENGTH; err = pdsc_qcq_alloc(pdsc, PDS_CORE_QTYPE_ADMINQ, 0, "adminq", PDS_CORE_QCQ_F_CORE | PDS_CORE_QCQ_F_INTR, numdescs, diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h index 0bf320c4308369..199473112c295c 100644 --- a/drivers/net/ethernet/amd/pds_core/core.h +++ b/drivers/net/ethernet/amd/pds_core/core.h @@ -16,7 +16,7 @@ #define PDSC_WATCHDOG_SECS 5 #define PDSC_QUEUE_NAME_MAX_SZ 16 -#define PDSC_ADMINQ_MIN_LENGTH 16 /* must be a power of two */ +#define PDSC_ADMINQ_MAX_LENGTH 16 /* must be a power of two */ #define PDSC_NOTIFYQ_LENGTH 64 /* must be a power of two */ #define PDSC_TEARDOWN_RECOVERY false #define PDSC_TEARDOWN_REMOVING true From 2567daad69cd1107fc0ec29b1615f110d7cf7385 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 21 Apr 2025 10:46:04 -0700 Subject: [PATCH 1080/2924] pds_core: handle unsupported PDS_CORE_CMD_FW_CONTROL result If the FW doesn't support the PDS_CORE_CMD_FW_CONTROL command the driver might at the least print garbage and at the worst crash when the user runs the "devlink dev info" devlink command. This happens because the stack variable fw_list is not 0 initialized which results in fw_list.num_fw_slots being a garbage value from the stack. Then the driver tries to access fw_list.fw_names[i] with i >= ARRAY_SIZE and runs off the end of the array. Fix this by initializing the fw_list and by not failing completely if the devcmd fails because other useful information is printed via devlink dev info even if the devcmd fails. Fixes: 45d76f492938 ("pds_core: set up device and adminq") Signed-off-by: Brett Creeley Reviewed-by: Simon Horman Signed-off-by: Shannon Nelson Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250421174606.3892-3-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/devlink.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c index c5c787df61a4ec..d8dc39da4161fb 100644 --- a/drivers/net/ethernet/amd/pds_core/devlink.c +++ b/drivers/net/ethernet/amd/pds_core/devlink.c @@ -105,7 +105,7 @@ int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req, .fw_control.opcode = PDS_CORE_CMD_FW_CONTROL, .fw_control.oper = PDS_CORE_FW_GET_LIST, }; - struct pds_core_fw_list_info fw_list; + struct pds_core_fw_list_info fw_list = {}; struct pdsc *pdsc = devlink_priv(dl); union pds_core_dev_comp comp; char buf[32]; @@ -118,8 +118,6 @@ int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req, if (!err) memcpy_fromio(&fw_list, pdsc->cmd_regs->data, sizeof(fw_list)); mutex_unlock(&pdsc->devcmd_lock); - if (err && err != -EIO) - return err; listlen = min(fw_list.num_fw_slots, ARRAY_SIZE(fw_list.fw_names)); for (i = 0; i < listlen; i++) { From f9559d818205a4a0b9cd87181ef46e101ea11157 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 21 Apr 2025 10:46:05 -0700 Subject: [PATCH 1081/2924] pds_core: Remove unnecessary check in pds_client_adminq_cmd() When the pds_core driver was first created there were some race conditions around using the adminq, especially for client drivers. To reduce the possibility of a race condition there's a check against pf->state in pds_client_adminq_cmd(). This is problematic for a couple of reasons: 1. The PDSC_S_INITING_DRIVER bit is set during probe, but not cleared until after everything in probe is complete, which includes creating the auxiliary devices. For pds_fwctl this means it can't make any adminq commands until after pds_core's probe is complete even though the adminq is fully up by the time pds_fwctl's auxiliary device is created. 2. The race conditions around using the adminq have been fixed and this path is already protected against client drivers calling pds_client_adminq_cmd() if the adminq isn't ready, i.e. see pdsc_adminq_post() -> pdsc_adminq_inc_if_up(). Fix this by removing the pf->state check in pds_client_adminq_cmd() because invalid accesses to pds_core's adminq is already handled by pdsc_adminq_post()->pdsc_adminq_inc_if_up(). Fixes: 10659034c622 ("pds_core: add the aux client API") Reviewed-by: Simon Horman Signed-off-by: Brett Creeley Signed-off-by: Shannon Nelson Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250421174606.3892-4-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/auxbus.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/auxbus.c b/drivers/net/ethernet/amd/pds_core/auxbus.c index eeb72b1809eabd..c9aac27883a3cb 100644 --- a/drivers/net/ethernet/amd/pds_core/auxbus.c +++ b/drivers/net/ethernet/amd/pds_core/auxbus.c @@ -107,9 +107,6 @@ int pds_client_adminq_cmd(struct pds_auxiliary_dev *padev, dev_dbg(pf->dev, "%s: %s opcode %d\n", __func__, dev_name(&padev->aux_dev.dev), req->opcode); - if (pf->state) - return -ENXIO; - /* Wrap the client's request */ cmd.client_request.opcode = PDS_AQ_CMD_CLIENT_CMD; cmd.client_request.client_id = cpu_to_le16(padev->client_id); From 3f77c3dfffc7063428b100c4945ca2a7a8680380 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 21 Apr 2025 10:46:06 -0700 Subject: [PATCH 1082/2924] pds_core: make wait_context part of q_info Make the wait_context a full part of the q_info struct rather than a stack variable that goes away after pdsc_adminq_post() is done so that the context is still available after the wait loop has given up. There was a case where a slow development firmware caused the adminq request to time out, but then later the FW finally finished the request and sent the interrupt. The handler tried to complete_all() the completion context that had been created on the stack in pdsc_adminq_post() but no longer existed. This caused bad pointer usage, kernel crashes, and much wailing and gnashing of teeth. Fixes: 01ba61b55b20 ("pds_core: Add adminq processing and commands") Reviewed-by: Simon Horman Signed-off-by: Shannon Nelson Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250421174606.3892-5-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/adminq.c | 36 +++++++++------------- drivers/net/ethernet/amd/pds_core/core.c | 4 ++- drivers/net/ethernet/amd/pds_core/core.h | 2 +- 3 files changed, 18 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/adminq.c b/drivers/net/ethernet/amd/pds_core/adminq.c index c83a0a80d5334e..506f682d15c10a 100644 --- a/drivers/net/ethernet/amd/pds_core/adminq.c +++ b/drivers/net/ethernet/amd/pds_core/adminq.c @@ -5,11 +5,6 @@ #include "core.h" -struct pdsc_wait_context { - struct pdsc_qcq *qcq; - struct completion wait_completion; -}; - static int pdsc_process_notifyq(struct pdsc_qcq *qcq) { union pds_core_notifyq_comp *comp; @@ -109,10 +104,10 @@ void pdsc_process_adminq(struct pdsc_qcq *qcq) q_info = &q->info[q->tail_idx]; q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1); - /* Copy out the completion data */ - memcpy(q_info->dest, comp, sizeof(*comp)); - - complete_all(&q_info->wc->wait_completion); + if (!completion_done(&q_info->completion)) { + memcpy(q_info->dest, comp, sizeof(*comp)); + complete(&q_info->completion); + } if (cq->tail_idx == cq->num_descs - 1) cq->done_color = !cq->done_color; @@ -162,8 +157,7 @@ irqreturn_t pdsc_adminq_isr(int irq, void *data) static int __pdsc_adminq_post(struct pdsc *pdsc, struct pdsc_qcq *qcq, union pds_core_adminq_cmd *cmd, - union pds_core_adminq_comp *comp, - struct pdsc_wait_context *wc) + union pds_core_adminq_comp *comp) { struct pdsc_queue *q = &qcq->q; struct pdsc_q_info *q_info; @@ -205,9 +199,9 @@ static int __pdsc_adminq_post(struct pdsc *pdsc, /* Post the request */ index = q->head_idx; q_info = &q->info[index]; - q_info->wc = wc; q_info->dest = comp; memcpy(q_info->desc, cmd, sizeof(*cmd)); + reinit_completion(&q_info->completion); dev_dbg(pdsc->dev, "head_idx %d tail_idx %d\n", q->head_idx, q->tail_idx); @@ -231,16 +225,13 @@ int pdsc_adminq_post(struct pdsc *pdsc, union pds_core_adminq_comp *comp, bool fast_poll) { - struct pdsc_wait_context wc = { - .wait_completion = - COMPLETION_INITIALIZER_ONSTACK(wc.wait_completion), - }; unsigned long poll_interval = 1; unsigned long poll_jiffies; unsigned long time_limit; unsigned long time_start; unsigned long time_done; unsigned long remaining; + struct completion *wc; int err = 0; int index; @@ -250,20 +241,19 @@ int pdsc_adminq_post(struct pdsc *pdsc, return -ENXIO; } - wc.qcq = &pdsc->adminqcq; - index = __pdsc_adminq_post(pdsc, &pdsc->adminqcq, cmd, comp, &wc); + index = __pdsc_adminq_post(pdsc, &pdsc->adminqcq, cmd, comp); if (index < 0) { err = index; goto err_out; } + wc = &pdsc->adminqcq.q.info[index].completion; time_start = jiffies; time_limit = time_start + HZ * pdsc->devcmd_timeout; do { /* Timeslice the actual wait to catch IO errors etc early */ poll_jiffies = msecs_to_jiffies(poll_interval); - remaining = wait_for_completion_timeout(&wc.wait_completion, - poll_jiffies); + remaining = wait_for_completion_timeout(wc, poll_jiffies); if (remaining) break; @@ -292,9 +282,11 @@ int pdsc_adminq_post(struct pdsc *pdsc, dev_dbg(pdsc->dev, "%s: elapsed %d msecs\n", __func__, jiffies_to_msecs(time_done - time_start)); - /* Check the results */ - if (time_after_eq(time_done, time_limit)) + /* Check the results and clear an un-completed timeout */ + if (time_after_eq(time_done, time_limit) && !completion_done(wc)) { err = -ETIMEDOUT; + complete(wc); + } dev_dbg(pdsc->dev, "read admin queue completion idx %d:\n", index); dynamic_hex_dump("comp ", DUMP_PREFIX_OFFSET, 16, 1, diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index 55163457f12bec..9512aa4083f054 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -167,8 +167,10 @@ static void pdsc_q_map(struct pdsc_queue *q, void *base, dma_addr_t base_pa) q->base = base; q->base_pa = base_pa; - for (i = 0, cur = q->info; i < q->num_descs; i++, cur++) + for (i = 0, cur = q->info; i < q->num_descs; i++, cur++) { cur->desc = base + (i * q->desc_size); + init_completion(&cur->completion); + } } static void pdsc_cq_map(struct pdsc_cq *cq, void *base, dma_addr_t base_pa) diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h index 199473112c295c..0b53a1fab46d02 100644 --- a/drivers/net/ethernet/amd/pds_core/core.h +++ b/drivers/net/ethernet/amd/pds_core/core.h @@ -96,7 +96,7 @@ struct pdsc_q_info { unsigned int bytes; unsigned int nbufs; struct pdsc_buf_info bufs[PDS_CORE_MAX_FRAGS]; - struct pdsc_wait_context *wc; + struct completion completion; void *dest; }; From e3f506b78d921e48a00d005bea5c45ec36a99240 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Wed, 23 Apr 2025 13:51:54 +0530 Subject: [PATCH 1083/2924] powerpc/boot: Fix dash warning 'commit b2accfe7ca5b ("powerpc/boot: Check for ld-option support")' suppressed linker warnings, but the expressed used did not go well with POSIX shell (dash) resulting with this warning arch/powerpc/boot/wrapper: 237: [: 0: unexpected operator ld: warning: arch/powerpc/boot/zImage.epapr has a LOAD segment with RWX permissions Fix the check to handle the reported warning. Patch also fixes couple of shellcheck reported errors for the same line. In arch/powerpc/boot/wrapper line 237: if [ $(${CROSS}ld -v --no-warn-rwx-segments &>/dev/null; echo $?) -eq 0 ]; then ^-- SC2046 (warning): Quote this to prevent word splitting. ^------^ SC2086 (info): Double quote to prevent globbing and word splitting. ^---------^ SC3020 (warning): In POSIX sh, &> is undefined. Fixes: b2accfe7ca5b ("powerpc/boot: Check for ld-option support") Reported-by: Stephen Rothwell Suggested-by: Stephen Rothwell Tested-by: Venkat Rao Bagalkote Reviewed-by: Stephen Rothwell Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250423082154.30625-1-maddy@linux.ibm.com --- arch/powerpc/boot/wrapper | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 267ca6d4d9b387..3d8dc822282ac8 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -234,7 +234,7 @@ fi # suppress some warnings in recent ld versions nowarn="-z noexecstack" -if [ $(${CROSS}ld -v --no-warn-rwx-segments &>/dev/null; echo $?) -eq 0 ]; then +if "${CROSS}ld" -v --no-warn-rwx-segments >/dev/null 2>&1; then nowarn="$nowarn --no-warn-rwx-segments" fi From c73c67026fe65d6677260dfd15dd968b709dc237 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 18 Apr 2025 21:39:02 +0200 Subject: [PATCH 1084/2924] fanotify: fix flush of mntns marks fanotify_mark(fd, FAN_MARK_FLUSH | FAN_MARK_MNTNS, ...) incorrectly ends up causing removal inode marks. Fixes: 0f46d81f2bce ("fanotify: notify on mount attach and detach") Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/20250418193903.2607617-2-amir73il@gmail.com --- fs/notify/fanotify/fanotify_user.c | 7 +------ include/linux/fsnotify_backend.h | 15 --------------- 2 files changed, 1 insertion(+), 21 deletions(-) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index f2d840ae4ded88..87f861e9004f2d 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1961,12 +1961,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, return -EINVAL; if (mark_cmd == FAN_MARK_FLUSH) { - if (mark_type == FAN_MARK_MOUNT) - fsnotify_clear_vfsmount_marks_by_group(group); - else if (mark_type == FAN_MARK_FILESYSTEM) - fsnotify_clear_sb_marks_by_group(group); - else - fsnotify_clear_inode_marks_by_group(group); + fsnotify_clear_marks_by_group(group, obj_type); return 0; } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 6cd8d1d28b8be4..fc27b53c58c2e3 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -907,21 +907,6 @@ extern void fsnotify_wait_marks_destroyed(void); /* Clear all of the marks of a group attached to a given object type */ extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int obj_type); -/* run all the marks in a group, and clear all of the vfsmount marks */ -static inline void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) -{ - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT); -} -/* run all the marks in a group, and clear all of the inode marks */ -static inline void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group) -{ - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE); -} -/* run all the marks in a group, and clear all of the sn marks */ -static inline void fsnotify_clear_sb_marks_by_group(struct fsnotify_group *group) -{ - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_SB); -} extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark); extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); From cd188e9ef80fd005fd8c8de34ed649bd653d00e5 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 18 Apr 2025 21:39:03 +0200 Subject: [PATCH 1085/2924] selftests/fs/mount-notify: test also remove/flush of mntns marks Regression test for FAN_MARK_MNTFS | FAN_MARK_FLUSH bug. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/20250418193903.2607617-3-amir73il@gmail.com --- .../mount-notify/mount-notify_test.c | 57 +++++++++++++++---- 1 file changed, 46 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c index 4a2d5c454fd15c..59a71f22fb1180 100644 --- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c +++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c @@ -48,8 +48,16 @@ static uint64_t get_mnt_id(struct __test_metadata *const _metadata, static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX"; +static const int mark_cmds[] = { + FAN_MARK_ADD, + FAN_MARK_REMOVE, + FAN_MARK_FLUSH +}; + +#define NUM_FAN_FDS ARRAY_SIZE(mark_cmds) + FIXTURE(fanotify) { - int fan_fd; + int fan_fd[NUM_FAN_FDS]; char buf[256]; unsigned int rem; void *next; @@ -61,7 +69,7 @@ FIXTURE(fanotify) { FIXTURE_SETUP(fanotify) { - int ret; + int i, ret; ASSERT_EQ(unshare(CLONE_NEWNS), 0); @@ -89,20 +97,34 @@ FIXTURE_SETUP(fanotify) self->root_id = get_mnt_id(_metadata, "/"); ASSERT_NE(self->root_id, 0); - self->fan_fd = fanotify_init(FAN_REPORT_MNT, 0); - ASSERT_GE(self->fan_fd, 0); - - ret = fanotify_mark(self->fan_fd, FAN_MARK_ADD | FAN_MARK_MNTNS, - FAN_MNT_ATTACH | FAN_MNT_DETACH, self->ns_fd, NULL); - ASSERT_EQ(ret, 0); + for (i = 0; i < NUM_FAN_FDS; i++) { + self->fan_fd[i] = fanotify_init(FAN_REPORT_MNT | FAN_NONBLOCK, + 0); + ASSERT_GE(self->fan_fd[i], 0); + ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD | + FAN_MARK_MNTNS, + FAN_MNT_ATTACH | FAN_MNT_DETACH, + self->ns_fd, NULL); + ASSERT_EQ(ret, 0); + // On fd[0] we do an extra ADD that changes nothing. + // On fd[1]/fd[2] we REMOVE/FLUSH which removes the mark. + ret = fanotify_mark(self->fan_fd[i], mark_cmds[i] | + FAN_MARK_MNTNS, + FAN_MNT_ATTACH | FAN_MNT_DETACH, + self->ns_fd, NULL); + ASSERT_EQ(ret, 0); + } self->rem = 0; } FIXTURE_TEARDOWN(fanotify) { + int i; + ASSERT_EQ(self->rem, 0); - close(self->fan_fd); + for (i = 0; i < NUM_FAN_FDS; i++) + close(self->fan_fd[i]); ASSERT_EQ(fchdir(self->orig_root), 0); @@ -123,8 +145,21 @@ static uint64_t expect_notify(struct __test_metadata *const _metadata, unsigned int thislen; if (!self->rem) { - ssize_t len = read(self->fan_fd, self->buf, sizeof(self->buf)); - ASSERT_GT(len, 0); + ssize_t len; + int i; + + for (i = NUM_FAN_FDS - 1; i >= 0; i--) { + len = read(self->fan_fd[i], self->buf, + sizeof(self->buf)); + if (i > 0) { + // Groups 1,2 should get EAGAIN + ASSERT_EQ(len, -1); + ASSERT_EQ(errno, EAGAIN); + } else { + // Group 0 should get events + ASSERT_GT(len, 0); + } + } self->rem = len; self->next = (void *) self->buf; From 607b310ada5ef4c738f9dffc758a62a9d309b084 Mon Sep 17 00:00:00 2001 From: Johannes Schneider Date: Wed, 23 Apr 2025 06:47:24 +0200 Subject: [PATCH 1086/2924] net: dp83822: Fix OF_MDIO config check When CONFIG_OF_MDIO is set to be a module the code block is not compiled. Use the IS_ENABLED macro that checks for both built in as well as module. Fixes: 5dc39fd5ef35 ("net: phy: DP83822: Add ability to advertise Fiber connection") Signed-off-by: Johannes Schneider Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/20250423044724.1284492-1-johannes.schneider@leica-geosystems.com Signed-off-by: Paolo Abeni --- drivers/net/phy/dp83822.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index 14f36154963841..e32013eb0186ff 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -730,7 +730,7 @@ static int dp83822_phy_reset(struct phy_device *phydev) return phydev->drv->config_init(phydev); } -#ifdef CONFIG_OF_MDIO +#if IS_ENABLED(CONFIG_OF_MDIO) static const u32 tx_amplitude_100base_tx_gain[] = { 80, 82, 83, 85, 87, 88, 90, 92, 93, 95, 97, 98, 100, 102, 103, 105, From 55cd617566ef14ed607b0b50eff4c0dbd304661b Mon Sep 17 00:00:00 2001 From: Peter Hutterer Date: Mon, 24 Mar 2025 15:36:25 +1000 Subject: [PATCH 1087/2924] HID: bpf: fix BTN_STYLUS for the XP Pen ACK05 remote Usage_Dig_BarrelSwitch was applied in the UsagePage_Button which incorrectly mapped to BTN_TOOL_PENCIL Link: https://gitlab.freedesktop.org/libevdev/udev-hid-bpf/-/merge_requests/193 Fixes: 834da375 ("bpf: add a v6.11+ compatible BPF fixup for the XPPen ACK05 remote") Link: https://patchwork.kernel.org/project/linux-input/patch/20250207-bpf-import-2025-02-07-v1-7-6048fdd5a206@kernel.org/ Signed-off-by: Peter Hutterer Signed-off-by: Jiri Kosina --- drivers/hid/bpf/progs/XPPen__ACK05.bpf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/bpf/progs/XPPen__ACK05.bpf.c b/drivers/hid/bpf/progs/XPPen__ACK05.bpf.c index 1a0aeea6a081cd..a754710fc90b8d 100644 --- a/drivers/hid/bpf/progs/XPPen__ACK05.bpf.c +++ b/drivers/hid/bpf/progs/XPPen__ACK05.bpf.c @@ -157,6 +157,7 @@ static const __u8 fixed_rdesc_vendor[] = { ReportCount(5) // padding Input(Const) // Byte 4 in report - just exists so we get to be a tablet pad + UsagePage_Digitizers Usage_Dig_BarrelSwitch // BTN_STYLUS ReportCount(1) ReportSize(1) From c14e02e68b43f208417891c5e21308723f03e9e6 Mon Sep 17 00:00:00 2001 From: Aditya Garg Date: Wed, 9 Apr 2025 18:42:51 +0530 Subject: [PATCH 1088/2924] HID: hid-appletb-kbd: Fix wrong date and kernel version in sysfs interface docs The driver hid-appletb-kbd was upstreamed in kernel 6.15. But, due to an oversight on my part, I didn't change the kernel version and expected date while upstreaming the driver, thus it remained as 6.5, the original kernel version when the driver was developed for downstream. This commit should fix this. Signed-off-by: Aditya Garg Signed-off-by: Jiri Kosina --- Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd b/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd index 2a19584d091e48..8c9718d83e9d71 100644 --- a/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd +++ b/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd @@ -1,6 +1,6 @@ What: /sys/bus/hid/drivers/hid-appletb-kbd//mode -Date: September, 2023 -KernelVersion: 6.5 +Date: March, 2025 +KernelVersion: 6.15 Contact: linux-input@vger.kernel.org Description: The set of keys displayed on the Touch Bar. From 09d546303b370113323bfff456c4e8cff8756005 Mon Sep 17 00:00:00 2001 From: Qasim Ijaz Date: Thu, 27 Mar 2025 23:11:46 +0000 Subject: [PATCH 1089/2924] HID: thrustmaster: fix memory leak in thrustmaster_interrupts() In thrustmaster_interrupts(), the allocated send_buf is not freed if the usb_check_int_endpoints() check fails, leading to a memory leak. Fix this by ensuring send_buf is freed before returning in the error path. Fixes: 50420d7c79c3 ("HID: hid-thrustmaster: Fix warning in thrustmaster_probe by adding endpoint check") Signed-off-by: Qasim Ijaz Signed-off-by: Jiri Kosina --- drivers/hid/hid-thrustmaster.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/hid-thrustmaster.c b/drivers/hid/hid-thrustmaster.c index 3b81468a1df297..0bf70664c35ee1 100644 --- a/drivers/hid/hid-thrustmaster.c +++ b/drivers/hid/hid-thrustmaster.c @@ -174,6 +174,7 @@ static void thrustmaster_interrupts(struct hid_device *hdev) u8 ep_addr[2] = {b_ep, 0}; if (!usb_check_int_endpoints(usbif, ep_addr)) { + kfree(send_buf); hid_err(hdev, "Unexpected non-int endpoint\n"); return; } From 73fa4597bdc035437fbcd84d6be32bd39f1f2149 Mon Sep 17 00:00:00 2001 From: Alexis Lothore Date: Wed, 23 Apr 2025 09:12:09 +0200 Subject: [PATCH 1090/2924] net: stmmac: fix dwmac1000 ptp timestamp status offset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a PTP interrupt occurs, the driver accesses the wrong offset to learn about the number of available snapshots in the FIFO for dwmac1000: it should be accessing bits 29..25, while it is currently reading bits 19..16 (those are bits about the auxiliary triggers which have generated the timestamps). As a consequence, it does not compute correctly the number of available snapshots, and so possibly do not generate the corresponding clock events if the bogus value ends up being 0. Fix clock events generation by reading the correct bits in the timestamp register for dwmac1000. Fixes: 477c3e1f6363 ("net: stmmac: Introduce dwmac1000 timestamping operations") Signed-off-by: Alexis Lothoré Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/20250423-stmmac_ts-v2-1-e2cf2bbd61b1@bootlin.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/dwmac1000.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h index 967a16212faf00..0c011a47d5a3e9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h @@ -320,8 +320,8 @@ enum rtc_control { /* PTP and timestamping registers */ -#define GMAC3_X_ATSNS GENMASK(19, 16) -#define GMAC3_X_ATSNS_SHIFT 16 +#define GMAC3_X_ATSNS GENMASK(29, 25) +#define GMAC3_X_ATSNS_SHIFT 25 #define GMAC_PTP_TCR_ATSFC BIT(24) #define GMAC_PTP_TCR_ATSEN0 BIT(25) From 7b7491372f8ec2d8c08da18e5d629e55f41dda89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9?= Date: Wed, 23 Apr 2025 09:12:10 +0200 Subject: [PATCH 1091/2924] net: stmmac: fix multiplication overflow when reading timestamp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current way of reading a timestamp snapshot in stmmac can lead to integer overflow, as the computation is done on 32 bits. The issue has been observed on a dwmac-socfpga platform returning chaotic timestamp values due to this overflow. The corresponding multiplication is done with a MUL instruction, which returns 32 bit values. Explicitly casting the value to 64 bits replaced the MUL with a UMLAL, which computes and returns the result on 64 bits, and so returns correctly the timestamps. Prevent this overflow by explicitly casting the intermediate value to u64 to make sure that the whole computation is made on u64. While at it, apply the same cast on the other dwmac variant (GMAC4) method for snapshot retrieval. Fixes: 477c3e1f6363 ("net: stmmac: Introduce dwmac1000 timestamping operations") Signed-off-by: Alexis Lothoré Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/20250423-stmmac_ts-v2-2-e2cf2bbd61b1@bootlin.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index a8b901cdf5cbb3..56b76aaa58f04a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -553,7 +553,7 @@ void dwmac1000_get_ptptime(void __iomem *ptpaddr, u64 *ptp_time) u64 ns; ns = readl(ptpaddr + GMAC_PTP_ATNR); - ns += readl(ptpaddr + GMAC_PTP_ATSR) * NSEC_PER_SEC; + ns += (u64)readl(ptpaddr + GMAC_PTP_ATSR) * NSEC_PER_SEC; *ptp_time = ns; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index 0f59aa98260404..e2840fa241f291 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -222,7 +222,7 @@ static void get_ptptime(void __iomem *ptpaddr, u64 *ptp_time) u64 ns; ns = readl(ptpaddr + PTP_ATNR); - ns += readl(ptpaddr + PTP_ATSR) * NSEC_PER_SEC; + ns += (u64)readl(ptpaddr + PTP_ATSR) * NSEC_PER_SEC; *ptp_time = ns; } From e1ca5f39c2e37a3a8cdae005b94c3fc385be4240 Mon Sep 17 00:00:00 2001 From: Qasim Ijaz Date: Sat, 29 Mar 2025 00:20:03 +0000 Subject: [PATCH 1092/2924] HID: wacom: handle kzalloc() allocation failure in wacom_wac_queue_flush() During wacom_wac_queue_flush() the code calls kzalloc() to allocate a zero initialised buffer which it uses as a storage buffer to get data from the fifo via kfifo_out(). However it does not check kzalloc() for allocation failure which returns NULL and could potentially lead to a NULL deref. Fix this by checking for kzalloc() failure and skipping the current entry if allocation failure occurs. Fixes: 5e013ad20689 ("HID: wacom: Remove static WACOM_PKGLEN_MAX limit") Signed-off-by: Qasim Ijaz Reviewed-by: Jason Gerecke Signed-off-by: Jiri Kosina --- drivers/hid/wacom_sys.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index 97393a3083caef..43d892810c9eeb 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -70,10 +70,16 @@ static void wacom_wac_queue_flush(struct hid_device *hdev, { while (!kfifo_is_empty(fifo)) { int size = kfifo_peek_len(fifo); - u8 *buf = kzalloc(size, GFP_KERNEL); + u8 *buf; unsigned int count; int err; + buf = kzalloc(size, GFP_KERNEL); + if (!buf) { + kfifo_skip(fifo); + continue; + } + count = kfifo_out(fifo, buf, size); if (count != size) { // Hard to say what is the "right" action in this From fd34bf79a617f6298b13b274dc255f192a987e2a Mon Sep 17 00:00:00 2001 From: Qasim Ijaz Date: Mon, 14 Apr 2025 19:33:09 +0100 Subject: [PATCH 1093/2924] HID: wacom: fix memory leak on size mismatch in wacom_wac_queue_flush() In wacom_wac_queue_flush() the code allocates zero initialised buffer which it uses as a storage buffer for copying data from a fifo via kfifo_out(). The kfifo_out() function returns the number of elements it has copied. The code checks if the number of copied elements does not equal the size of the fifo record, if it does not it simply skips the entry and continues to the next iteration. However it does not release the storage buffer leading to a memory leak. Fix the memory leak by freeing the buffer on size mismatch. Fixes: 5e013ad20689 ("HID: wacom: Remove static WACOM_PKGLEN_MAX limit") Reviewed-by: Jason Gerecke Signed-off-by: Qasim Ijaz Signed-off-by: Jiri Kosina --- drivers/hid/wacom_sys.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index 43d892810c9eeb..95a796b3e9f2a3 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -87,6 +87,7 @@ static void wacom_wac_queue_flush(struct hid_device *hdev, // to flush seems reasonable enough, however. hid_warn(hdev, "%s: removed fifo entry with unexpected size\n", __func__); + kfree(buf); continue; } err = hid_report_raw_event(hdev, HID_INPUT_REPORT, buf, size, false); From bd07f751208ba190f9b0db5e5b7f35d5bb4a8a1e Mon Sep 17 00:00:00 2001 From: Henry Martin Date: Tue, 1 Apr 2025 17:48:53 +0800 Subject: [PATCH 1094/2924] HID: uclogic: Add NULL check in uclogic_input_configured() devm_kasprintf() returns NULL when memory allocation fails. Currently, uclogic_input_configured() does not check for this case, which results in a NULL pointer dereference. Add NULL check after devm_kasprintf() to prevent this issue. Fixes: dd613a4e45f8 ("HID: uclogic: Correct devm device reference for hidinput input_dev name") Signed-off-by: Henry Martin Signed-off-by: Jiri Kosina --- drivers/hid/hid-uclogic-core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/hid/hid-uclogic-core.c b/drivers/hid/hid-uclogic-core.c index d8008933c052f5..321c43fb06ae06 100644 --- a/drivers/hid/hid-uclogic-core.c +++ b/drivers/hid/hid-uclogic-core.c @@ -142,11 +142,12 @@ static int uclogic_input_configured(struct hid_device *hdev, suffix = "System Control"; break; } - } - - if (suffix) + } else { hi->input->name = devm_kasprintf(&hdev->dev, GFP_KERNEL, "%s %s", hdev->name, suffix); + if (!hi->input->name) + return -ENOMEM; + } return 0; } From 6bf8ab7774a20e1e60030e20f42ac8cc804fa457 Mon Sep 17 00:00:00 2001 From: Qasim Ijaz Date: Mon, 14 Apr 2025 19:33:17 +0100 Subject: [PATCH 1095/2924] HID: wacom: fix shift OOB in kfifo allocation for zero pktlen During wacom_parse_and_register() the code calls wacom_devm_kfifo_alloc to allocate a fifo. During this operation it passes kfifo_alloc a fifo_size of 0. Kfifo attempts to round the size passed to it to the next power of 2 via roundup_pow_of_two (queue-type data structures do this to maintain efficiency of operations). However during this phase a problem arises when the roundup_pow_of_two() function utilises a shift exponent of fls_long(n-1), where n is the fifo_size. Since n is 0 in this case and n is also an unsigned long, doing n-1 causes unsigned integer wrap-around to occur making the fifo_size 4294967295. So the code effectively does fls_long(4294967295) which results in 64. Returning back to roundup_pow_of_two(), the code utilises a shift exponent of 64. When a shift exponent of 64 is used on a 64-bit type such as 1UL it results in a shift-out-of-bounds. The root cause of the issue seems to stem from insufficient validation of wacom_compute_pktlen(), since in this case the fifo_size comes from wacom_wac->features.pktlen. During wacom_parse_and_register() the wacom_compute_pktlen() function sets the pktlen as 0. To fix this, we should handle cases where wacom_compute_pktlen() results in 0. Reported-by: syzbot Closes: https://syzkaller.appspot.com/bug?extid=d5204cbbdd921f1f7cad Fixes: 5e013ad20689 ("HID: wacom: Remove static WACOM_PKGLEN_MAX limit") Tested-by: Qasim Ijaz Reviewed-by: Jason Gerecke Cc: stable@vger.kernel.org Signed-off-by: Qasim Ijaz Signed-off-by: Jiri Kosina --- drivers/hid/wacom_sys.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index 95a796b3e9f2a3..2d052653ba634f 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -2368,6 +2368,8 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless) unsigned int connect_mask = HID_CONNECT_HIDRAW; features->pktlen = wacom_compute_pktlen(hdev); + if (!features->pktlen) + return -ENODEV; if (!devres_open_group(&hdev->dev, wacom, GFP_KERNEL)) return -ENOMEM; From 0cc2effbc8f522af6b9d871cd27678e6aed9d56c Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 21 Apr 2025 16:32:09 -0500 Subject: [PATCH 1096/2924] HID: amd_sfh: Fix SRA sensor when it's the only sensor On systems that only have an SRA sensor connected to SFH the sensor doesn't get enabled due to a bad optimization condition of breaking the sensor walk loop. This optimization is unnecessary in the first place because if there is only one device then the loop only runs once. Drop the condition and explicitly mark sensor as enabled. Reported-by: Yijun Shen Tested-By: Yijun Shen Fixes: d1c444b47100d ("HID: amd_sfh: Add support to export device operating states") Cc: stable@vger.kernel.org Signed-off-by: Mario Limonciello Acked-by: Basavaraj Natikar Signed-off-by: Jiri Kosina --- drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c b/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c index 25f0ebfcbd5f56..c1bdf1e0d44af9 100644 --- a/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c +++ b/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c @@ -134,9 +134,6 @@ static int amd_sfh1_1_hid_client_init(struct amd_mp2_dev *privdata) for (i = 0; i < cl_data->num_hid_devices; i++) { cl_data->sensor_sts[i] = SENSOR_DISABLED; - if (cl_data->num_hid_devices == 1 && cl_data->sensor_idx[0] == SRA_IDX) - break; - if (cl_data->sensor_idx[i] == SRA_IDX) { info.sensor_idx = cl_data->sensor_idx[i]; writel(0, privdata->mmio + amd_get_p2c_val(privdata, 0)); @@ -145,8 +142,10 @@ static int amd_sfh1_1_hid_client_init(struct amd_mp2_dev *privdata) (privdata, cl_data->sensor_idx[i], ENABLE_SENSOR); cl_data->sensor_sts[i] = (status == 0) ? SENSOR_ENABLED : SENSOR_DISABLED; - if (cl_data->sensor_sts[i] == SENSOR_ENABLED) + if (cl_data->sensor_sts[i] == SENSOR_ENABLED) { + cl_data->is_any_sensor_enabled = true; privdata->dev_en.is_sra_present = true; + } continue; } From f32e8c8095490152b5bc5f467d5034387a4bbd1b Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 21 Apr 2025 16:32:10 -0500 Subject: [PATCH 1097/2924] HID: amd_sfh: Avoid clearing reports for SRA sensor SRA sensor doesn't allocate any memory for reports. Skip trying to clear memory for that sensor in cleanup path. Suggested-by: Basavaraj Natikar Signed-off-by: Mario Limonciello Acked-by: Basavaraj Natikar Signed-off-by: Jiri Kosina --- drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c b/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c index c1bdf1e0d44af9..0a9b44ce4904e4 100644 --- a/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c +++ b/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c @@ -83,6 +83,9 @@ static int amd_sfh_hid_client_deinit(struct amd_mp2_dev *privdata) case ALS_IDX: privdata->dev_en.is_als_present = false; break; + case SRA_IDX: + privdata->dev_en.is_sra_present = false; + break; } if (cl_data->sensor_sts[i] == SENSOR_ENABLED) { @@ -237,6 +240,8 @@ static int amd_sfh1_1_hid_client_init(struct amd_mp2_dev *privdata) cleanup: amd_sfh_hid_client_deinit(privdata); for (i = 0; i < cl_data->num_hid_devices; i++) { + if (cl_data->sensor_idx[i] == SRA_IDX) + continue; devm_kfree(dev, cl_data->feature_report[i]); devm_kfree(dev, in_data->input_report[i]); devm_kfree(dev, cl_data->report_descr[i]); From acae9d5b51cf8d4da87ed13140e3de4970669213 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Wed, 16 Apr 2025 10:58:03 +0800 Subject: [PATCH 1098/2924] HID: hid-steam: Remove the unused variable connected MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Variable connected is not effectively used, so delete it. drivers/hid/hid-steam.c:1153:7: warning: variable ‘connected’ set but not used. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=20462 Signed-off-by: Jiapeng Chong Signed-off-by: Jiri Kosina --- drivers/hid/hid-steam.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index dfd9d22ed559c8..949d307c66a806 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -1150,11 +1150,9 @@ static void steam_client_ll_close(struct hid_device *hdev) struct steam_device *steam = hdev->driver_data; unsigned long flags; - bool connected; spin_lock_irqsave(&steam->lock, flags); steam->client_opened--; - connected = steam->connected && !steam->client_opened; spin_unlock_irqrestore(&steam->lock, flags); schedule_work(&steam->unregister_work); From fa9fdeea1b7d6440c22efa6d59a769eae8bc89f1 Mon Sep 17 00:00:00 2001 From: Milton Barrera Date: Wed, 9 Apr 2025 00:04:28 -0600 Subject: [PATCH 1099/2924] HID: quirks: Add ADATA XPG alpha wireless mouse support This patch adds HID_QUIRK_ALWAYS_POLL for the ADATA XPG wireless gaming mouse (USB ID 125f:7505) and its USB dongle (USB ID 125f:7506). Without this quirk, the device does not generate input events properly. Signed-off-by: Milton Barrera Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 4 ++++ drivers/hid/hid-quirks.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 288a2b864cc41d..1062731315a2a5 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -41,6 +41,10 @@ #define USB_VENDOR_ID_ACTIONSTAR 0x2101 #define USB_DEVICE_ID_ACTIONSTAR_1011 0x1011 +#define USB_VENDOR_ID_ADATA_XPG 0x125f +#define USB_VENDOR_ID_ADATA_XPG_WL_GAMING_MOUSE 0x7505 +#define USB_VENDOR_ID_ADATA_XPG_WL_GAMING_MOUSE_DONGLE 0x7506 + #define USB_VENDOR_ID_ADS_TECH 0x06e1 #define USB_DEVICE_ID_ADS_TECH_RADIO_SI470X 0xa155 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 646171598e4132..0731473cc9b1ad 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -27,6 +27,8 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_AASHIMA, USB_DEVICE_ID_AASHIMA_GAMEPAD), HID_QUIRK_BADPAD }, { HID_USB_DEVICE(USB_VENDOR_ID_AASHIMA, USB_DEVICE_ID_AASHIMA_PREDATOR), HID_QUIRK_BADPAD }, + { HID_USB_DEVICE(USB_VENDOR_ID_ADATA_XPG, USB_VENDOR_ID_ADATA_XPG_WL_GAMING_MOUSE), HID_QUIRK_ALWAYS_POLL }, + { HID_USB_DEVICE(USB_VENDOR_ID_ADATA_XPG, USB_VENDOR_ID_ADATA_XPG_WL_GAMING_MOUSE_DONGLE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_AFATECH, USB_DEVICE_ID_AFATECH_AF9016), HID_QUIRK_FULLSPEED_INTERVAL }, { HID_USB_DEVICE(USB_VENDOR_ID_AIREN, USB_DEVICE_ID_AIREN_SLIMPLUS), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_AKAI_09E8, USB_DEVICE_ID_AKAI_09E8_MIDIMIX), HID_QUIRK_NO_INIT_REPORTS }, From c1b0f5183a4488b6b7790f834ce3a786725b3583 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Thu, 10 Apr 2025 17:15:25 +0300 Subject: [PATCH 1100/2924] ASoC: renesas: rz-ssi: Use NOIRQ_SYSTEM_SLEEP_PM_OPS() In the latest kernel versions system crashes were noticed occasionally during suspend/resume. This occurs because the RZ SSI suspend trigger (called from snd_soc_suspend()) is executed after rz_ssi_pm_ops->suspend() and it accesses IP registers. After the rz_ssi_pm_ops->suspend() is executed the IP clocks are disabled and its reset line is asserted. Since snd_soc_suspend() is invoked through snd_soc_pm_ops->suspend(), snd_soc_pm_ops is associated with soc_driver (defined in sound/soc/soc-core.c), and there is no parent-child relationship between soc_driver and rz_ssi_driver the power management subsystem does not enforce a specific suspend/resume order between the RZ SSI platform driver and soc_driver. To ensure that the suspend/resume function of rz-ssi is executed after snd_soc_suspend(), use NOIRQ_SYSTEM_SLEEP_PM_OPS(). Fixes: 1fc778f7c833 ("ASoC: renesas: rz-ssi: Add suspend to RAM support") Cc: stable@vger.kernel.org Signed-off-by: Claudiu Beznea Link: https://patch.msgid.link/20250410141525.4126502-1-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Mark Brown --- sound/soc/renesas/rz-ssi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/renesas/rz-ssi.c b/sound/soc/renesas/rz-ssi.c index 3a0af4ca7ab6c7..0f7458a4390198 100644 --- a/sound/soc/renesas/rz-ssi.c +++ b/sound/soc/renesas/rz-ssi.c @@ -1244,7 +1244,7 @@ static int rz_ssi_runtime_resume(struct device *dev) static const struct dev_pm_ops rz_ssi_pm_ops = { RUNTIME_PM_OPS(rz_ssi_runtime_suspend, rz_ssi_runtime_resume, NULL) - SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) + NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) }; static struct platform_driver rz_ssi_driver = { From 722a6ad4867ce8c4cb131a3371d0b5389a75dee0 Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Wed, 23 Apr 2025 21:31:57 +0200 Subject: [PATCH 1101/2924] spi: spi-qpic-snand: propagate errors from qcom_spi_block_erase() The qcom_spi_block_erase() function returns with error in case of failure. Change the qcom_spi_send_cmdaddr() function to propagate these errors to the callers instead of returning with success. Fixes: 7304d1909080 ("spi: spi-qpic: add driver for QCOM SPI NAND flash Interface") Signed-off-by: Gabor Juhos Reviewed-by: Abel Vesa Link: https://patch.msgid.link/20250423-qpic-snand-propagate-error-v1-1-4b26ed45fdb5@gmail.com Reviewed-by: Md Sadre Alam Signed-off-by: Mark Brown --- drivers/spi/spi-qpic-snand.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/spi/spi-qpic-snand.c b/drivers/spi/spi-qpic-snand.c index 17eb67e1913261..ae32c452d0bcf8 100644 --- a/drivers/spi/spi-qpic-snand.c +++ b/drivers/spi/spi-qpic-snand.c @@ -1307,8 +1307,7 @@ static int qcom_spi_send_cmdaddr(struct qcom_nand_controller *snandc, snandc->qspi->addr1 = cpu_to_le32(s_op.addr1_reg << 16); snandc->qspi->addr2 = cpu_to_le32(s_op.addr2_reg); snandc->qspi->cmd = cpu_to_le32(cmd); - qcom_spi_block_erase(snandc); - return 0; + return qcom_spi_block_erase(snandc); default: break; } From 5e16f1a68d28965c12b6fa227a306fef8a680f84 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 24 Apr 2025 12:28:39 +0100 Subject: [PATCH 1102/2924] io_uring: don't duplicate flushing in io_req_post_cqe io_req_post_cqe() sets submit_state.cq_flush so that *flush_completions() can take care of batch commiting CQEs. Don't commit it twice by using __io_cq_unlock_post(). Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/41c416660c509cee676b6cad96081274bcb459f3.1745493861.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index c6209fe44cb175..eedb47c8c79e0a 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -872,10 +872,15 @@ bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags) lockdep_assert(!io_wq_current_is_worker()); lockdep_assert_held(&ctx->uring_lock); - __io_cq_lock(ctx); - posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags); + if (!ctx->lockless_cq) { + spin_lock(&ctx->completion_lock); + posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags); + spin_unlock(&ctx->completion_lock); + } else { + posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags); + } + ctx->submit_state.cq_flush = true; - __io_cq_unlock_post(ctx); return posted; } From 1d019736b6f812bebf3ef89d6e887d06e2a822fc Mon Sep 17 00:00:00 2001 From: Uday Shankar Date: Wed, 23 Apr 2025 15:29:03 -0600 Subject: [PATCH 1103/2924] selftests: ublk: common: fix _get_disk_dev_t for pre-9.0 coreutils Some distributions, such as centos stream 9, still have a version of coreutils which does not yet support the %Hr and %Lr formats for stat(1) [1, 2]. Running ublk selftests on these distributions results in the following error in tests that use the _get_disk_dev_t helper: line 23: ?r: syntax error: operand expected (error token is "?r") To better accommodate older distributions, rewrite _get_disk_dev_t to use the much older %t and %T formats for stat instead. [1] https://github.com/coreutils/coreutils/blob/v9.0/NEWS#L114 [2] https://pkgs.org/download/coreutils Signed-off-by: Uday Shankar Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20250423-ublk_selftests-v1-2-7d060e260e76@purestorage.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/test_common.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh index 9fc111f64576f9..a81210ca3e99d2 100755 --- a/tools/testing/selftests/ublk/test_common.sh +++ b/tools/testing/selftests/ublk/test_common.sh @@ -17,8 +17,8 @@ _get_disk_dev_t() { local minor dev=/dev/ublkb"${dev_id}" - major=$(stat -c '%Hr' "$dev") - minor=$(stat -c '%Lr' "$dev") + major="0x"$(stat -c '%t' "$dev") + minor="0x"$(stat -c '%T' "$dev") echo $(( (major & 0xfff) << 20 | (minor & 0xfffff) )) } From 7b720c720253e2070459420b2628a7b9ee6733b3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Apr 2025 10:25:21 +0200 Subject: [PATCH 1104/2924] block: never reduce ra_pages in blk_apply_bdi_limits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the user increased the read-ahead size through sysfs this value currently get lost if the device is reprobe, including on a resume from suspend. As there is no hardware limitation for the read-ahead size there is no real need to reset it or track a separate hardware limitation like for max_sectors. This restores the pre-atomic queue limit behavior in the sd driver as sd did not use blk_queue_io_opt and thus never updated the read ahead size to the value based of the optimal I/O, but changes behavior for all other drivers. As the new behavior seems useful and sd is the driver for which the readahead size tweaks are most useful that seems like a worthwhile trade off. Fixes: 804e498e0496 ("sd: convert to the atomic queue limits API") Reported-by: Holger Hoffstätte Signed-off-by: Christoph Hellwig Tested-by: Holger Hoffstätte Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20250424082521.1967286-1-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-settings.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index 6b2dbe645d23aa..4817e7ca03f83c 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -61,8 +61,14 @@ void blk_apply_bdi_limits(struct backing_dev_info *bdi, /* * For read-ahead of large files to be effective, we need to read ahead * at least twice the optimal I/O size. + * + * There is no hardware limitation for the read-ahead size and the user + * might have increased the read-ahead size through sysfs, so don't ever + * decrease it. */ - bdi->ra_pages = max(lim->io_opt * 2 / PAGE_SIZE, VM_READAHEAD_PAGES); + bdi->ra_pages = max3(bdi->ra_pages, + lim->io_opt * 2 / PAGE_SIZE, + VM_READAHEAD_PAGES); bdi->io_pages = lim->max_sectors >> PAGE_SECTORS_SHIFT; } From c63202140d4b411d27380805c4d68eb11407b7f2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Apr 2025 07:37:39 +0200 Subject: [PATCH 1105/2924] block: move blkdev_{get,put} _no_open prototypes out of blkdev.h These are only to be used by block internal code. Remove the comment as we grew more users due to reworking block device node opening. Signed-off-by: Christoph Hellwig Reviewed-by: Christian Brauner Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20250423053810.1683309-2-hch@lst.de Signed-off-by: Jens Axboe --- block/blk.h | 3 +++ include/linux/blkdev.h | 4 ---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/block/blk.h b/block/blk.h index 006e3be433d287..939e5e1aec9ca2 100644 --- a/block/blk.h +++ b/block/blk.h @@ -94,6 +94,9 @@ static inline void blk_wait_io(struct completion *done) wait_for_completion_io(done); } +struct block_device *blkdev_get_no_open(dev_t dev); +void blkdev_put_no_open(struct block_device *bdev); + #define BIO_INLINE_VECS 4 struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs, gfp_t gfp_mask); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c6b478cfb32d68..14abaeedaa88ad 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1671,10 +1671,6 @@ int bd_prepare_to_claim(struct block_device *bdev, void *holder, const struct blk_holder_ops *hops); void bd_abort_claiming(struct block_device *bdev, void *holder); -/* just for blk-cgroup, don't use elsewhere */ -struct block_device *blkdev_get_no_open(dev_t dev); -void blkdev_put_no_open(struct block_device *bdev); - struct block_device *I_BDEV(struct inode *inode); struct block_device *file_bdev(struct file *bdev_file); bool disk_live(struct gendisk *disk); From d13b7090b2510abaa83a25717466decca23e8226 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Apr 2025 07:37:40 +0200 Subject: [PATCH 1106/2924] block: remove the backing_inode variable in bdev_statx backing_inode is only used once, so remove it and update the comment describing the bdev lookup to be a bit more clear. Signed-off-by: Christoph Hellwig Reviewed-by: Christian Brauner Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20250423053810.1683309-3-hch@lst.de Signed-off-by: Jens Axboe --- block/bdev.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/block/bdev.c b/block/bdev.c index e29b3b6d1707b8..b57c2c0853fc89 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -1313,18 +1313,15 @@ void sync_bdevs(bool wait) void bdev_statx(struct path *path, struct kstat *stat, u32 request_mask) { - struct inode *backing_inode; struct block_device *bdev; - backing_inode = d_backing_inode(path->dentry); - /* - * Note that backing_inode is the inode of a block device node file, - * not the block device's internal inode. Therefore it is *not* valid - * to use I_BDEV() here; the block device has to be looked up by i_rdev + * Note that d_backing_inode() returns the block device node inode, not + * the block device's internal inode. Therefore it is *not* valid to + * use I_BDEV() here; the block device has to be looked up by i_rdev * instead. */ - bdev = blkdev_get_no_open(backing_inode->i_rdev); + bdev = blkdev_get_no_open(d_backing_inode(path->dentry)->i_rdev); if (!bdev) return; From 5f33b5226c9d92359e58e91ad0bf0c1791da36a1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Apr 2025 07:37:41 +0200 Subject: [PATCH 1107/2924] block: don't autoload drivers on stat blkdev_get_no_open can trigger the legacy autoload of block drivers. A simple stat of a block device has not historically done that, so disable this behavior again. Fixes: 9abcfbd235f5 ("block: Add atomic write support for statx") Signed-off-by: Christoph Hellwig Reviewed-by: Christian Brauner Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20250423053810.1683309-4-hch@lst.de Signed-off-by: Jens Axboe --- block/bdev.c | 8 ++++---- block/blk-cgroup.c | 2 +- block/blk.h | 2 +- block/fops.c | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/block/bdev.c b/block/bdev.c index b57c2c0853fc89..520515e4e64ef0 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -815,13 +815,13 @@ static void blkdev_put_part(struct block_device *part) blkdev_put_whole(whole); } -struct block_device *blkdev_get_no_open(dev_t dev) +struct block_device *blkdev_get_no_open(dev_t dev, bool autoload) { struct block_device *bdev; struct inode *inode; inode = ilookup(blockdev_superblock, dev); - if (!inode && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) { + if (!inode && autoload && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) { blk_request_module(dev); inode = ilookup(blockdev_superblock, dev); if (inode) @@ -1043,7 +1043,7 @@ struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, if (ret) return ERR_PTR(ret); - bdev = blkdev_get_no_open(dev); + bdev = blkdev_get_no_open(dev, true); if (!bdev) return ERR_PTR(-ENXIO); @@ -1321,7 +1321,7 @@ void bdev_statx(struct path *path, struct kstat *stat, * use I_BDEV() here; the block device has to be looked up by i_rdev * instead. */ - bdev = blkdev_get_no_open(d_backing_inode(path->dentry)->i_rdev); + bdev = blkdev_get_no_open(d_backing_inode(path->dentry)->i_rdev, false); if (!bdev) return; diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 5905f277057bc5..e172aeda41837e 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -797,7 +797,7 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx) return -EINVAL; input = skip_spaces(input); - bdev = blkdev_get_no_open(MKDEV(major, minor)); + bdev = blkdev_get_no_open(MKDEV(major, minor), true); if (!bdev) return -ENODEV; if (bdev_is_partition(bdev)) { diff --git a/block/blk.h b/block/blk.h index 939e5e1aec9ca2..328075787814a9 100644 --- a/block/blk.h +++ b/block/blk.h @@ -94,7 +94,7 @@ static inline void blk_wait_io(struct completion *done) wait_for_completion_io(done); } -struct block_device *blkdev_get_no_open(dev_t dev); +struct block_device *blkdev_get_no_open(dev_t dev, bool autoload); void blkdev_put_no_open(struct block_device *bdev); #define BIO_INLINE_VECS 4 diff --git a/block/fops.c b/block/fops.c index e221fdcaa8aaf8..82b672d15ea4f8 100644 --- a/block/fops.c +++ b/block/fops.c @@ -642,7 +642,7 @@ static int blkdev_open(struct inode *inode, struct file *filp) if (ret) return ret; - bdev = blkdev_get_no_open(inode->i_rdev); + bdev = blkdev_get_no_open(inode->i_rdev, true); if (!bdev) return -ENXIO; From c4d2519c6ad854dc2114e77d693b3cf1baf55330 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Apr 2025 07:37:42 +0200 Subject: [PATCH 1108/2924] block: don't autoload drivers on blk-cgroup configuration Loading a driver just to configure blk-cgroup doesn't make sense, as that assumes and already existing device. Signed-off-by: Christoph Hellwig Reviewed-by: Christian Brauner Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20250423053810.1683309-5-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index e172aeda41837e..ce93706555c5b9 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -797,7 +797,7 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx) return -EINVAL; input = skip_spaces(input); - bdev = blkdev_get_no_open(MKDEV(major, minor), true); + bdev = blkdev_get_no_open(MKDEV(major, minor), false); if (!bdev) return -ENODEV; if (bdev_is_partition(bdev)) { From 5f9e1698141a724ef63f75ee22fa9007d97de5bb Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 18 Apr 2025 13:03:08 -0400 Subject: [PATCH 1109/2924] KVM: arm64, x86: make kvm_arch_has_irq_bypass() inline kvm_arch_has_irq_bypass() is a small function and even though it does not appear in any *really* hot paths, it's also not entirely rare. Make it inline---it also works out nicely in preparation for using it in kvm-intel.ko and kvm-amd.ko, since the function is not currently exported. Suggested-by: Linus Torvalds Signed-off-by: Paolo Bonzini --- arch/arm64/include/asm/kvm_host.h | 5 +++++ arch/arm64/kvm/arm.c | 5 ----- arch/x86/include/asm/kvm_host.h | 6 ++++++ arch/x86/kvm/x86.c | 5 ----- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e98cfe7855a624..08ba91e6fb035a 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1588,4 +1588,9 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val); #define kvm_has_s1poe(k) \ (kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP)) +static inline bool kvm_arch_has_irq_bypass(void) +{ + return true; +} + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 68fec8c95feef9..19ca57def6292b 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2743,11 +2743,6 @@ bool kvm_arch_irqchip_in_kernel(struct kvm *kvm) return irqchip_in_kernel(kvm); } -bool kvm_arch_has_irq_bypass(void) -{ - return true; -} - int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, struct irq_bypass_producer *prod) { diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3bdae454a95976..7bc174a1f1cb8c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -2423,4 +2424,9 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); */ #define KVM_EXIT_HYPERCALL_MBZ GENMASK_ULL(31, 1) +static inline bool kvm_arch_has_irq_bypass(void) +{ + return enable_apicv && irq_remapping_cap(IRQ_POSTING_CAP); +} + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3712dde0bf9d1f..c1fdd527044c97 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -13556,11 +13556,6 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) } EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma); -bool kvm_arch_has_irq_bypass(void) -{ - return enable_apicv && irq_remapping_cap(IRQ_POSTING_CAP); -} - int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, struct irq_bypass_producer *prod) { From 6560aff981ada9aec8163509a59d8f48283263d6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 1 Apr 2025 09:18:03 -0700 Subject: [PATCH 1110/2924] KVM: SVM: Don't update IRTEs if APICv/AVIC is disabled Skip IRTE updates if AVIC is disabled/unsupported, as forcing the IRTE into remapped mode (kvm_vcpu_apicv_active() will never be true) is unnecessary and wasteful. The IOMMU driver is responsible for putting IRTEs into remapped mode when an IRQ is allocated by a device, long before that device is assigned to a VM. I.e. the kernel as a whole has major issues if the IRTE isn't already in remapped mode. Opportunsitically kvm_arch_has_irq_bypass() to query for APICv/AVIC, so so that all checks in KVM x86 incorporate the same information. Cc: Yosry Ahmed Cc: Jim Mattson Signed-off-by: Sean Christopherson Message-ID: <20250401161804.842968-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/avic.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 65fd245a9953ce..901d8d2dc1691f 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -898,8 +898,7 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, struct kvm_irq_routing_table *irq_rt; int idx, ret = 0; - if (!kvm_arch_has_assigned_device(kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP)) + if (!kvm_arch_has_assigned_device(kvm) || !kvm_arch_has_irq_bypass()) return 0; pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n", From 7537deda36521fa8fff9133b39c46e31893606f2 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 4 Apr 2025 12:38:16 -0700 Subject: [PATCH 1111/2924] KVM: SVM: Allocate IR data using atomic allocation Allocate SVM's interrupt remapping metadata using GFP_ATOMIC as svm_ir_list_add() is called with IRQs are disabled and irqfs.lock held when kvm_irq_routing_update() reacts to GSI routing changes. Fixes: 411b44ba80ab ("svm: Implements update_pi_irte hook to setup posted interrupt") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-ID: <20250404193923.1413163-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/avic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 901d8d2dc1691f..a961e6e670506c 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -820,7 +820,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) * Allocating new amd_iommu_pi_data, which will get * add to the per-vcpu ir_list. */ - ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT); + ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_ATOMIC | __GFP_ACCOUNT); if (!ir) { ret = -ENOMEM; goto out; From 9bcac97dc42d2f4da8229d18feb0fe2b1ce523a2 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 4 Apr 2025 12:38:17 -0700 Subject: [PATCH 1112/2924] KVM: x86: Reset IRTE to host control if *new* route isn't postable Restore an IRTE back to host control (remapped or posted MSI mode) if the *new* GSI route prevents posting the IRQ directly to a vCPU, regardless of the GSI routing type. Updating the IRTE if and only if the new GSI is an MSI results in KVM leaving an IRTE posting to a vCPU. The dangling IRTE can result in interrupts being incorrectly delivered to the guest, and in the worst case scenario can result in use-after-free, e.g. if the VM is torn down, but the underlying host IRQ isn't freed. Fixes: efc644048ecd ("KVM: x86: Update IRTE for posted-interrupts") Fixes: 411b44ba80ab ("svm: Implements update_pi_irte hook to setup posted interrupt") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-ID: <20250404193923.1413163-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/avic.c | 58 ++++++++++++++++++---------------- arch/x86/kvm/vmx/posted_intr.c | 28 ++++++---------- 2 files changed, 41 insertions(+), 45 deletions(-) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index a961e6e670506c..8e09f6ae98fd99 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -896,6 +896,7 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, { struct kvm_kernel_irq_routing_entry *e; struct kvm_irq_routing_table *irq_rt; + bool enable_remapped_mode = true; int idx, ret = 0; if (!kvm_arch_has_assigned_device(kvm) || !kvm_arch_has_irq_bypass()) @@ -932,6 +933,8 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, kvm_vcpu_apicv_active(&svm->vcpu)) { struct amd_iommu_pi_data pi; + enable_remapped_mode = false; + /* Try to enable guest_mode in IRTE */ pi.base = __sme_set(page_to_phys(svm->avic_backing_page) & AVIC_HPA_MASK); @@ -950,33 +953,6 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, */ if (!ret && pi.is_guest_mode) svm_ir_list_add(svm, &pi); - } else { - /* Use legacy mode in IRTE */ - struct amd_iommu_pi_data pi; - - /** - * Here, pi is used to: - * - Tell IOMMU to use legacy mode for this interrupt. - * - Retrieve ga_tag of prior interrupt remapping data. - */ - pi.prev_ga_tag = 0; - pi.is_guest_mode = false; - ret = irq_set_vcpu_affinity(host_irq, &pi); - - /** - * Check if the posted interrupt was previously - * setup with the guest_mode by checking if the ga_tag - * was cached. If so, we need to clean up the per-vcpu - * ir_list. - */ - if (!ret && pi.prev_ga_tag) { - int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag); - struct kvm_vcpu *vcpu; - - vcpu = kvm_get_vcpu_by_id(kvm, id); - if (vcpu) - svm_ir_list_del(to_svm(vcpu), &pi); - } } if (!ret && svm) { @@ -992,6 +968,34 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, } ret = 0; + if (enable_remapped_mode) { + /* Use legacy mode in IRTE */ + struct amd_iommu_pi_data pi; + + /** + * Here, pi is used to: + * - Tell IOMMU to use legacy mode for this interrupt. + * - Retrieve ga_tag of prior interrupt remapping data. + */ + pi.prev_ga_tag = 0; + pi.is_guest_mode = false; + ret = irq_set_vcpu_affinity(host_irq, &pi); + + /** + * Check if the posted interrupt was previously + * setup with the guest_mode by checking if the ga_tag + * was cached. If so, we need to clean up the per-vcpu + * ir_list. + */ + if (!ret && pi.prev_ga_tag) { + int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag); + struct kvm_vcpu *vcpu; + + vcpu = kvm_get_vcpu_by_id(kvm, id); + if (vcpu) + svm_ir_list_del(to_svm(vcpu), &pi); + } + } out: srcu_read_unlock(&kvm->irq_srcu, idx); return ret; diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c index 51116fe69a5001..d70e5b90087d86 100644 --- a/arch/x86/kvm/vmx/posted_intr.c +++ b/arch/x86/kvm/vmx/posted_intr.c @@ -297,6 +297,7 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, { struct kvm_kernel_irq_routing_entry *e; struct kvm_irq_routing_table *irq_rt; + bool enable_remapped_mode = true; struct kvm_lapic_irq irq; struct kvm_vcpu *vcpu; struct vcpu_data vcpu_info; @@ -335,21 +336,8 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, kvm_set_msi_irq(kvm, e, &irq); if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) || - !kvm_irq_is_postable(&irq)) { - /* - * Make sure the IRTE is in remapped mode if - * we don't handle it in posted mode. - */ - ret = irq_set_vcpu_affinity(host_irq, NULL); - if (ret < 0) { - printk(KERN_INFO - "failed to back to remapped mode, irq: %u\n", - host_irq); - goto out; - } - + !kvm_irq_is_postable(&irq)) continue; - } vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); vcpu_info.vector = irq.vector; @@ -357,11 +345,12 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi, vcpu_info.vector, vcpu_info.pi_desc_addr, set); - if (set) - ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); - else - ret = irq_set_vcpu_affinity(host_irq, NULL); + if (!set) + continue; + enable_remapped_mode = false; + + ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); if (ret < 0) { printk(KERN_INFO "%s: failed to update PI IRTE\n", __func__); @@ -369,6 +358,9 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, } } + if (enable_remapped_mode) + ret = irq_set_vcpu_affinity(host_irq, NULL); + ret = 0; out: srcu_read_unlock(&kvm->irq_srcu, idx); From bcda70c56f3e718465cab2aad260cf34183ce1ce Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 4 Apr 2025 12:38:18 -0700 Subject: [PATCH 1113/2924] KVM: x86: Explicitly treat routing entry type changes as changes Explicitly treat type differences as GSI routing changes, as comparing MSI data between two entries could get a false negative, e.g. if userspace changed the type but left the type-specific data as-is. Fixes: 515a0c79e796 ("kvm: irqfd: avoid update unmodified entries of the routing") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-ID: <20250404193923.1413163-4-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c1fdd527044c97..9c98b77b7dc1bb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -13607,7 +13607,8 @@ int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old, struct kvm_kernel_irq_routing_entry *new) { - if (new->type != KVM_IRQ_ROUTING_MSI) + if (old->type != KVM_IRQ_ROUTING_MSI || + new->type != KVM_IRQ_ROUTING_MSI) return true; return !!memcmp(&old->msi, &new->msi, sizeof(new->msi)); From f1fb088d9cecde5c3066d8ff8846789667519b7d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 4 Apr 2025 12:38:19 -0700 Subject: [PATCH 1114/2924] KVM: x86: Take irqfds.lock when adding/deleting IRQ bypass producer Take irqfds.lock when adding/deleting an IRQ bypass producer to ensure irqfd->producer isn't modified while kvm_irq_routing_update() is running. The only lock held when a producer is added/removed is irqbypass's mutex. Fixes: 872768800652 ("KVM: x86: select IRQ_BYPASS_MANAGER") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-ID: <20250404193923.1413163-5-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9c98b77b7dc1bb..a6829a370e6a97 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -13561,15 +13561,22 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, { struct kvm_kernel_irqfd *irqfd = container_of(cons, struct kvm_kernel_irqfd, consumer); + struct kvm *kvm = irqfd->kvm; int ret; - irqfd->producer = prod; kvm_arch_start_assignment(irqfd->kvm); + + spin_lock_irq(&kvm->irqfds.lock); + irqfd->producer = prod; + ret = kvm_x86_call(pi_update_irte)(irqfd->kvm, prod->irq, irqfd->gsi, 1); if (ret) kvm_arch_end_assignment(irqfd->kvm); + spin_unlock_irq(&kvm->irqfds.lock); + + return ret; } @@ -13579,9 +13586,9 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, int ret; struct kvm_kernel_irqfd *irqfd = container_of(cons, struct kvm_kernel_irqfd, consumer); + struct kvm *kvm = irqfd->kvm; WARN_ON(irqfd->producer != prod); - irqfd->producer = NULL; /* * When producer of consumer is unregistered, we change back to @@ -13589,12 +13596,18 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, * when the irq is masked/disabled or the consumer side (KVM * int this case doesn't want to receive the interrupts. */ + spin_lock_irq(&kvm->irqfds.lock); + irqfd->producer = NULL; + ret = kvm_x86_call(pi_update_irte)(irqfd->kvm, prod->irq, irqfd->gsi, 0); if (ret) printk(KERN_INFO "irq bypass consumer (token %p) unregistration" " fails: %d\n", irqfd->consumer.token, ret); + spin_unlock_irq(&kvm->irqfds.lock); + + kvm_arch_end_assignment(irqfd->kvm); } From 07172206a26dcf3f0bf7c3ecaadd4242b008ea54 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 4 Apr 2025 12:38:20 -0700 Subject: [PATCH 1115/2924] iommu/amd: Return an error if vCPU affinity is set for non-vCPU IRTE Return -EINVAL instead of success if amd_ir_set_vcpu_affinity() is invoked without use_vapic; lying to KVM about whether or not the IRTE was configured to post IRQs is all kinds of bad. Fixes: d98de49a53e4 ("iommu/amd: Enable vAPIC interrupt remapping mode by default") Signed-off-by: Sean Christopherson Message-ID: <20250404193923.1413163-6-seanjc@google.com> Signed-off-by: Paolo Bonzini --- drivers/iommu/amd/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index be8761bbef0ffb..00965518b8020b 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -3879,7 +3879,7 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) * we should not modify the IRTE */ if (!dev_data || !dev_data->use_vapic) - return 0; + return -EINVAL; ir_data->cfg = irqd_cfg(data); pi_data->ir_data = ir_data; From aae251a380fe4741594368e0d7836a082b17ae3e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 4 Apr 2025 12:38:21 -0700 Subject: [PATCH 1116/2924] iommu/amd: WARN if KVM attempts to set vCPU affinity without posted intrrupts WARN if KVM attempts to set vCPU affinity when posted interrupts aren't enabled, as KVM shouldn't try to enable posting when they're unsupported, and the IOMMU driver darn well should only advertise posting support when AMD_IOMMU_GUEST_IR_VAPIC() is true. Note, KVM consumes is_guest_mode only on success. Signed-off-by: Sean Christopherson Message-ID: <20250404193923.1413163-7-seanjc@google.com> Signed-off-by: Paolo Bonzini --- drivers/iommu/amd/iommu.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 00965518b8020b..f34209b08b4c54 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -3869,6 +3869,9 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) struct irq_2_irte *irte_info = &ir_data->irq_2_irte; struct iommu_dev_data *dev_data; + if (WARN_ON_ONCE(!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))) + return -EINVAL; + if (ir_data->iommu == NULL) return -EINVAL; @@ -3884,16 +3887,6 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) ir_data->cfg = irqd_cfg(data); pi_data->ir_data = ir_data; - /* Note: - * SVM tries to set up for VAPIC mode, but we are in - * legacy mode. So, we force legacy mode instead. - */ - if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) { - pr_debug("%s: Fall back to using intr legacy remap\n", - __func__); - pi_data->is_guest_mode = false; - } - pi_data->prev_ga_tag = ir_data->cached_ga_tag; if (pi_data->is_guest_mode) { ir_data->ga_root_ptr = (pi_data->base >> 12); From 268cbfe65bb9096f78f98d1e092b1939d3caa382 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 4 Apr 2025 12:38:22 -0700 Subject: [PATCH 1117/2924] KVM: SVM: WARN if an invalid posted interrupt IRTE entry is added Now that the AMD IOMMU doesn't signal success incorrectly, WARN if KVM attempts to track an AMD IRTE entry without metadata. Signed-off-by: Sean Christopherson Message-ID: <20250404193923.1413163-8-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/avic.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 8e09f6ae98fd99..7338879d1c0c48 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -796,12 +796,15 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) struct amd_svm_iommu_ir *ir; u64 entry; + if (WARN_ON_ONCE(!pi->ir_data)) + return -EINVAL; + /** * In some cases, the existing irte is updated and re-set, * so we need to check here if it's already been * added * to the ir_list. */ - if (pi->ir_data && (pi->prev_ga_tag != 0)) { + if (pi->prev_ga_tag) { struct kvm *kvm = svm->vcpu.kvm; u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag); struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id); From ca4f113b0b4c2de6ffb438d5d0ebb7337877c911 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 15 Apr 2025 13:48:20 +0300 Subject: [PATCH 1118/2924] KVM: x86: Do not use kvm_rip_read() unconditionally in KVM tracepoints Not all VMs allow access to RIP. Check guest_state_protected before calling kvm_rip_read(). This avoids, for example, hitting WARN_ON_ONCE in vt_cache_reg() for TDX VMs. Fixes: 81bf912b2c15 ("KVM: TDX: Implement TDX vcpu enter/exit path") Signed-off-by: Adrian Hunter Message-ID: <20250415104821.247234-2-adrian.hunter@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/trace.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index ccda95e53f626d..ba736cbb0587cd 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -11,6 +11,13 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm +#ifdef CREATE_TRACE_POINTS +#define tracing_kvm_rip_read(vcpu) ({ \ + typeof(vcpu) __vcpu = vcpu; \ + __vcpu->arch.guest_state_protected ? 0 : kvm_rip_read(__vcpu); \ + }) +#endif + /* * Tracepoint for guest mode entry. */ @@ -28,7 +35,7 @@ TRACE_EVENT(kvm_entry, TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; - __entry->rip = kvm_rip_read(vcpu); + __entry->rip = tracing_kvm_rip_read(vcpu); __entry->immediate_exit = force_immediate_exit; kvm_x86_call(get_entry_info)(vcpu, &__entry->intr_info, @@ -319,7 +326,7 @@ TRACE_EVENT(name, \ ), \ \ TP_fast_assign( \ - __entry->guest_rip = kvm_rip_read(vcpu); \ + __entry->guest_rip = tracing_kvm_rip_read(vcpu); \ __entry->isa = isa; \ __entry->vcpu_id = vcpu->vcpu_id; \ __entry->requests = READ_ONCE(vcpu->requests); \ @@ -423,7 +430,7 @@ TRACE_EVENT(kvm_page_fault, TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; - __entry->guest_rip = kvm_rip_read(vcpu); + __entry->guest_rip = tracing_kvm_rip_read(vcpu); __entry->fault_address = fault_address; __entry->error_code = error_code; ), From 38e93267ca6807fc34288ce1a9c610bf219fc0e0 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 15 Apr 2025 13:48:21 +0300 Subject: [PATCH 1119/2924] KVM: x86: Do not use kvm_rip_read() unconditionally for KVM_PROFILING Not all VMs allow access to RIP. Check guest_state_protected before calling kvm_rip_read(). This avoids, for example, hitting WARN_ON_ONCE in vt_cache_reg() for TDX VMs. Fixes: 81bf912b2c15 ("KVM: TDX: Implement TDX vcpu enter/exit path") Signed-off-by: Adrian Hunter Message-ID: <20250415104821.247234-3-adrian.hunter@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a6829a370e6a97..df5b99ea1f1812 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11098,7 +11098,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) /* * Profile KVM exit RIPs: */ - if (unlikely(prof_on == KVM_PROFILING)) { + if (unlikely(prof_on == KVM_PROFILING && + !vcpu->arch.guest_state_protected)) { unsigned long rip = kvm_rip_read(vcpu); profile_hit(KVM_PROFILING, (void *)rip); } From 032ce1ea9442e140a80e41078b5431d4c0fa2893 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 24 Apr 2025 12:19:18 +0200 Subject: [PATCH 1120/2924] x86/boot: Work around broken busybox 'truncate' tool The GNU coreutils version of truncate, which is the original, accepts a % prefix for the -s size argument which means the file in question should be padded to a multiple of the given size. This is currently used to pad the setup block of bzImage to a multiple of 4k before appending the decompressor. busybox reimplements truncate but does not support this idiom, and therefore fails the build since commit 9c54baab4401 ("x86/boot: Drop CRC-32 checksum and the build tool that generates it") Since very little build code within the kernel depends on the 'truncate' utility, work around this incompatibility by avoiding truncate altogether, and relying on dd to perform the padding. Fixes: 9c54baab4401 ("x86/boot: Drop CRC-32 checksum and the build tool that generates it") Reported-by: Tested-by: Philipp Stanner Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250424101917.1552527-2-ardb+git@google.com --- arch/x86/boot/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 81f55da8196765..640fcac3af7459 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -59,7 +59,7 @@ KBUILD_CFLAGS += $(CONFIG_CC_IMPLICIT_FALLTHROUGH) $(obj)/bzImage: asflags-y := $(SVGA_MODE) quiet_cmd_image = BUILD $@ - cmd_image = cp $< $@; truncate -s %4K $@; cat $(obj)/vmlinux.bin >>$@ + cmd_image = (dd if=$< bs=4k conv=sync status=none; cat $(filter-out $<,$(real-prereqs))) >$@ $(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin FORCE $(call if_changed,image) From edd43f4d6f50ec3de55a0c9e9df6348d1da51965 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 24 Apr 2025 10:28:14 -0600 Subject: [PATCH 1121/2924] io_uring: fix 'sync' handling of io_fallback_tw() A previous commit added a 'sync' parameter to io_fallback_tw(), which if true, means the caller wants to wait on the fallback thread handling it. But the logic is somewhat messed up, ensure that ctxs are swapped and flushed appropriately. Cc: stable@vger.kernel.org Fixes: dfbe5561ae93 ("io_uring: flush offloaded and delayed task_work on exit") Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index eedb47c8c79e0a..a2b256e96d5da3 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1083,21 +1083,22 @@ static __cold void __io_fallback_tw(struct llist_node *node, bool sync) while (node) { req = container_of(node, struct io_kiocb, io_task_work.node); node = node->next; - if (sync && last_ctx != req->ctx) { + if (last_ctx != req->ctx) { if (last_ctx) { - flush_delayed_work(&last_ctx->fallback_work); + if (sync) + flush_delayed_work(&last_ctx->fallback_work); percpu_ref_put(&last_ctx->refs); } last_ctx = req->ctx; percpu_ref_get(&last_ctx->refs); } - if (llist_add(&req->io_task_work.node, - &req->ctx->fallback_llist)) - schedule_delayed_work(&req->ctx->fallback_work, 1); + if (llist_add(&req->io_task_work.node, &last_ctx->fallback_llist)) + schedule_delayed_work(&last_ctx->fallback_work, 1); } if (last_ctx) { - flush_delayed_work(&last_ctx->fallback_work); + if (sync) + flush_delayed_work(&last_ctx->fallback_work); percpu_ref_put(&last_ctx->refs); } } From be8250786ca94952a19ce87f98ad9906448bc9ef Mon Sep 17 00:00:00 2001 From: Zhenhua Huang Date: Mon, 21 Apr 2025 15:52:32 +0800 Subject: [PATCH 1122/2924] mm, slab: clean up slab->obj_exts always When memory allocation profiling is disabled at runtime or due to an error, shutdown_mem_profiling() is called: slab->obj_exts which previously allocated remains. It won't be cleared by unaccount_slab() because of mem_alloc_profiling_enabled() not true. It's incorrect, slab->obj_exts should always be cleaned up in unaccount_slab() to avoid following error: [...]BUG: Bad page state in process... .. [...]page dumped because: page still charged to cgroup [andriy.shevchenko@linux.intel.com: fold need_slab_obj_ext() into its only user] Fixes: 21c690a349ba ("mm: introduce slabobj_ext to support slab object extensions") Cc: stable@vger.kernel.org Signed-off-by: Zhenhua Huang Acked-by: David Rientjes Acked-by: Harry Yoo Tested-by: Harry Yoo Acked-by: Suren Baghdasaryan Link: https://patch.msgid.link/20250421075232.2165527-1-quic_zhenhuah@quicinc.com Signed-off-by: Vlastimil Babka --- mm/slub.c | 30 ++++++++---------------------- 1 file changed, 8 insertions(+), 22 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index dc9e729e1d269b..be8b09e09d3043 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2028,8 +2028,7 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, return 0; } -/* Should be called only if mem_alloc_profiling_enabled() */ -static noinline void free_slab_obj_exts(struct slab *slab) +static inline void free_slab_obj_exts(struct slab *slab) { struct slabobj_ext *obj_exts; @@ -2049,18 +2048,6 @@ static noinline void free_slab_obj_exts(struct slab *slab) slab->obj_exts = 0; } -static inline bool need_slab_obj_ext(void) -{ - if (mem_alloc_profiling_enabled()) - return true; - - /* - * CONFIG_MEMCG creates vector of obj_cgroup objects conditionally - * inside memcg_slab_post_alloc_hook. No other users for now. - */ - return false; -} - #else /* CONFIG_SLAB_OBJ_EXT */ static inline void init_slab_obj_exts(struct slab *slab) @@ -2077,11 +2064,6 @@ static inline void free_slab_obj_exts(struct slab *slab) { } -static inline bool need_slab_obj_ext(void) -{ - return false; -} - #endif /* CONFIG_SLAB_OBJ_EXT */ #ifdef CONFIG_MEM_ALLOC_PROFILING @@ -2129,7 +2111,7 @@ __alloc_tagging_slab_alloc_hook(struct kmem_cache *s, void *object, gfp_t flags) static inline void alloc_tagging_slab_alloc_hook(struct kmem_cache *s, void *object, gfp_t flags) { - if (need_slab_obj_ext()) + if (mem_alloc_profiling_enabled()) __alloc_tagging_slab_alloc_hook(s, object, flags); } @@ -2601,8 +2583,12 @@ static __always_inline void account_slab(struct slab *slab, int order, static __always_inline void unaccount_slab(struct slab *slab, int order, struct kmem_cache *s) { - if (memcg_kmem_online() || need_slab_obj_ext()) - free_slab_obj_exts(slab); + /* + * The slab object extensions should now be freed regardless of + * whether mem_alloc_profiling_enabled() or not because profiling + * might have been disabled after slab->obj_exts got allocated. + */ + free_slab_obj_exts(slab); mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s), -(PAGE_SIZE << order)); From 087a9eb9e5978e3ba362e1163691e41097e8ca20 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 23 Apr 2025 17:51:31 +0300 Subject: [PATCH 1123/2924] vxlan: vnifilter: Fix unlocked deletion of default FDB entry When a VNI is deleted from a VXLAN device in 'vnifilter' mode, the FDB entry associated with the default remote (assuming one was configured) is deleted without holding the hash lock. This is wrong and will result in a warning [1] being generated by the lockdep annotation that was added by commit ebe642067455 ("vxlan: Create wrappers for FDB lookup"). Reproducer: # ip link add vx0 up type vxlan dstport 4789 external vnifilter local 192.0.2.1 # bridge vni add vni 10010 remote 198.51.100.1 dev vx0 # bridge vni del vni 10010 dev vx0 Fix by acquiring the hash lock before the deletion and releasing it afterwards. Blame the original commit that introduced the issue rather than the one that exposed it. [1] WARNING: CPU: 3 PID: 392 at drivers/net/vxlan/vxlan_core.c:417 vxlan_find_mac+0x17f/0x1a0 [...] RIP: 0010:vxlan_find_mac+0x17f/0x1a0 [...] Call Trace: __vxlan_fdb_delete+0xbe/0x560 vxlan_vni_delete_group+0x2ba/0x940 vxlan_vni_del.isra.0+0x15f/0x580 vxlan_process_vni_filter+0x38b/0x7b0 vxlan_vnifilter_process+0x3bb/0x510 rtnetlink_rcv_msg+0x2f7/0xb70 netlink_rcv_skb+0x131/0x360 netlink_unicast+0x426/0x710 netlink_sendmsg+0x75a/0xc20 __sock_sendmsg+0xc1/0x150 ____sys_sendmsg+0x5aa/0x7b0 ___sys_sendmsg+0xfc/0x180 __sys_sendmsg+0x121/0x1b0 do_syscall_64+0xbb/0x1d0 entry_SYSCALL_64_after_hwframe+0x4b/0x53 Fixes: f9c4bb0b245c ("vxlan: vni filtering support on collect metadata device") Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20250423145131.513029-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_vnifilter.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/vxlan/vxlan_vnifilter.c b/drivers/net/vxlan/vxlan_vnifilter.c index 6e6e9f05509ab0..06d19e90eadb59 100644 --- a/drivers/net/vxlan/vxlan_vnifilter.c +++ b/drivers/net/vxlan/vxlan_vnifilter.c @@ -627,7 +627,11 @@ static void vxlan_vni_delete_group(struct vxlan_dev *vxlan, * default dst remote_ip previously added for this vni */ if (!vxlan_addr_any(&vninode->remote_ip) || - !vxlan_addr_any(&dst->remote_ip)) + !vxlan_addr_any(&dst->remote_ip)) { + u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, + vninode->vni); + + spin_lock_bh(&vxlan->hash_lock[hash_index]); __vxlan_fdb_delete(vxlan, all_zeros_mac, (vxlan_addr_any(&vninode->remote_ip) ? dst->remote_ip : vninode->remote_ip), @@ -635,6 +639,8 @@ static void vxlan_vni_delete_group(struct vxlan_dev *vxlan, vninode->vni, vninode->vni, dst->remote_ifindex, true); + spin_unlock_bh(&vxlan->hash_lock[hash_index]); + } if (vxlan->dev->flags & IFF_UP) { if (vxlan_addr_multicast(&vninode->remote_ip) && From 1a97fea9db9e9b9c4839d4232dde9f505ff5b4cc Mon Sep 17 00:00:00 2001 From: Luo Gengkun Date: Wed, 23 Apr 2025 06:47:24 +0000 Subject: [PATCH 1124/2924] perf/x86: Fix non-sampling (counting) events on certain x86 platforms Perf doesn't work at perf stat for hardware events on certain x86 platforms: $perf stat -- sleep 1 Performance counter stats for 'sleep 1': 16.44 msec task-clock # 0.016 CPUs utilized 2 context-switches # 121.691 /sec 0 cpu-migrations # 0.000 /sec 54 page-faults # 3.286 K/sec cycles instructions branches branch-misses The reason is that the check in x86_pmu_hw_config() for sampling events is unexpectedly applied to counting events as well. It should only impact x86 platforms with limit_period used for non-PEBS events. For Intel platforms, it should only impact some older platforms, e.g., HSW, BDW and NHM. Fixes: 88ec7eedbbd2 ("perf/x86: Fix low freqency setting issue") Signed-off-by: Luo Gengkun Signed-off-by: Ingo Molnar Reviewed-by: Kan Liang Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Link: https://lore.kernel.org/r/20250423064724.3716211-1-luogengkun@huaweicloud.com --- arch/x86/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 6866cc5acb0b57..3a4f031d2f4401 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -629,7 +629,7 @@ int x86_pmu_hw_config(struct perf_event *event) if (event->attr.type == event->pmu->type) event->hw.config |= x86_pmu_get_event_config(event); - if (!event->attr.freq && x86_pmu.limit_period) { + if (is_sampling_event(event) && !event->attr.freq && x86_pmu.limit_period) { s64 left = event->attr.sample_period; x86_pmu.limit_period(event, &left); if (left > event->attr.sample_period) From a2620f8932fa9fdabc3d78ed6efb004ca409019f Mon Sep 17 00:00:00 2001 From: Mikhail Lobanov Date: Mon, 14 Apr 2025 20:12:06 +0300 Subject: [PATCH 1125/2924] KVM: SVM: Forcibly leave SMM mode on SHUTDOWN interception Previously, commit ed129ec9057f ("KVM: x86: forcibly leave nested mode on vCPU reset") addressed an issue where a triple fault occurring in nested mode could lead to use-after-free scenarios. However, the commit did not handle the analogous situation for System Management Mode (SMM). This omission results in triggering a WARN when KVM forces a vCPU INIT after SHUTDOWN interception while the vCPU is in SMM. This situation was reprodused using Syzkaller by: 1) Creating a KVM VM and vCPU 2) Sending a KVM_SMI ioctl to explicitly enter SMM 3) Executing invalid instructions causing consecutive exceptions and eventually a triple fault The issue manifests as follows: WARNING: CPU: 0 PID: 25506 at arch/x86/kvm/x86.c:12112 kvm_vcpu_reset+0x1d2/0x1530 arch/x86/kvm/x86.c:12112 Modules linked in: CPU: 0 PID: 25506 Comm: syz-executor.0 Not tainted 6.1.130-syzkaller-00157-g164fe5dde9b6 #0 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 RIP: 0010:kvm_vcpu_reset+0x1d2/0x1530 arch/x86/kvm/x86.c:12112 Call Trace: shutdown_interception+0x66/0xb0 arch/x86/kvm/svm/svm.c:2136 svm_invoke_exit_handler+0x110/0x530 arch/x86/kvm/svm/svm.c:3395 svm_handle_exit+0x424/0x920 arch/x86/kvm/svm/svm.c:3457 vcpu_enter_guest arch/x86/kvm/x86.c:10959 [inline] vcpu_run+0x2c43/0x5a90 arch/x86/kvm/x86.c:11062 kvm_arch_vcpu_ioctl_run+0x50f/0x1cf0 arch/x86/kvm/x86.c:11283 kvm_vcpu_ioctl+0x570/0xf00 arch/x86/kvm/../../../virt/kvm/kvm_main.c:4122 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:870 [inline] __se_sys_ioctl fs/ioctl.c:856 [inline] __x64_sys_ioctl+0x19a/0x210 fs/ioctl.c:856 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x35/0x80 arch/x86/entry/common.c:81 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Architecturally, INIT is blocked when the CPU is in SMM, hence KVM's WARN() in kvm_vcpu_reset() to guard against KVM bugs, e.g. to detect improper emulation of INIT. SHUTDOWN on SVM is a weird edge case where KVM needs to do _something_ sane with the VMCB, since it's technically undefined, and INIT is the least awful choice given KVM's ABI. So, double down on stuffing INIT on SHUTDOWN, and force the vCPU out of SMM to avoid any weirdness (and the WARN). Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: ed129ec9057f ("KVM: x86: forcibly leave nested mode on vCPU reset") Cc: stable@vger.kernel.org Suggested-by: Sean Christopherson Signed-off-by: Mikhail Lobanov Link: https://lore.kernel.org/r/20250414171207.155121-1-m.lobanov@rosa.ru [sean: massage changelog, make it clear this isn't architectural behavior] Signed-off-by: Sean Christopherson --- arch/x86/kvm/smm.c | 1 + arch/x86/kvm/svm/svm.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c index 699e551ec93bab..9864c057187d8a 100644 --- a/arch/x86/kvm/smm.c +++ b/arch/x86/kvm/smm.c @@ -131,6 +131,7 @@ void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm) kvm_mmu_reset_context(vcpu); } +EXPORT_SYMBOL_GPL(kvm_smm_changed); void process_smi(struct kvm_vcpu *vcpu) { diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index d5d0c5c3300bc1..c5470d842aed43 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2231,6 +2231,10 @@ static int shutdown_interception(struct kvm_vcpu *vcpu) */ if (!sev_es_guest(vcpu->kvm)) { clear_page(svm->vmcb); +#ifdef CONFIG_KVM_SMM + if (is_smm(vcpu)) + kvm_smm_changed(vcpu, false); +#endif kvm_vcpu_reset(vcpu, true); } From a476cadf8ef1fbb9780581316f0199dfc62a81f2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 24 Mar 2025 13:51:28 +0300 Subject: [PATCH 1126/2924] KVM: x86: Check that the high 32bits are clear in kvm_arch_vcpu_ioctl_run() The "kvm_run->kvm_valid_regs" and "kvm_run->kvm_dirty_regs" variables are u64 type. We are only using the lowest 3 bits but we want to ensure that the users are not passing invalid bits so that we can use the remaining bits in the future. However "sync_valid_fields" and kvm_sync_valid_fields() are u32 type so the check only ensures that the lower 32 bits are clear. Fix this by changing the types to u64. Fixes: 74c1807f6c4f ("KVM: x86: block KVM_CAP_SYNC_REGS if guest state is protected") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/ec25aad1-113e-4c6e-8941-43d432251398@stanley.mountain Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index df5b99ea1f1812..9896fd574bfc9d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4597,7 +4597,7 @@ static bool kvm_is_vm_type_supported(unsigned long type) return type < 32 && (kvm_caps.supported_vm_types & BIT(type)); } -static inline u32 kvm_sync_valid_fields(struct kvm *kvm) +static inline u64 kvm_sync_valid_fields(struct kvm *kvm) { return kvm && kvm->arch.has_protected_state ? 0 : KVM_SYNC_X86_VALID_FIELDS; } @@ -11493,7 +11493,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) { struct kvm_queued_exception *ex = &vcpu->arch.exception; struct kvm_run *kvm_run = vcpu->run; - u32 sync_valid_fields; + u64 sync_valid_fields; int r; r = kvm_mmu_post_init_vm(vcpu->kvm); From 85fd85bc025a525354acb2241beb3c5387c551ec Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 23 Apr 2025 09:58:15 +0300 Subject: [PATCH 1127/2924] x86/insn: Fix CTEST instruction decoding insn_decoder_test found a problem with decoding APX CTEST instructions: Found an x86 instruction decoder bug, please report this. ffffffff810021df 62 54 94 05 85 ff ctestneq objdump says 6 bytes, but insn_get_length() says 5 It happens because x86-opcode-map.txt doesn't specify arguments for the instruction and the decoder doesn't expect to see ModRM byte. Fixes: 690ca3a3067f ("x86/insn: Add support for APX EVEX instructions to the opcode map") Signed-off-by: Kirill A. Shutemov Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Adrian Hunter Cc: stable@vger.kernel.org # v6.10+ Link: https://lore.kernel.org/r/20250423065815.2003231-1-kirill.shutemov@linux.intel.com --- arch/x86/lib/x86-opcode-map.txt | 4 ++-- tools/arch/x86/lib/x86-opcode-map.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index caedb3ef6688fc..f5dd84eb55dcda 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -996,8 +996,8 @@ AVXcode: 4 83: Grp1 Ev,Ib (1A),(es) # CTESTSCC instructions are: CTESTB, CTESTBE, CTESTF, CTESTL, CTESTLE, CTESTNB, CTESTNBE, CTESTNL, # CTESTNLE, CTESTNO, CTESTNS, CTESTNZ, CTESTO, CTESTS, CTESTT, CTESTZ -84: CTESTSCC (ev) -85: CTESTSCC (es) | CTESTSCC (66),(es) +84: CTESTSCC Eb,Gb (ev) +85: CTESTSCC Ev,Gv (es) | CTESTSCC Ev,Gv (66),(es) 88: POPCNT Gv,Ev (es) | POPCNT Gv,Ev (66),(es) 8f: POP2 Bq,Rq (000),(11B),(ev) a5: SHLD Ev,Gv,CL (es) | SHLD Ev,Gv,CL (66),(es) diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt index caedb3ef6688fc..f5dd84eb55dcda 100644 --- a/tools/arch/x86/lib/x86-opcode-map.txt +++ b/tools/arch/x86/lib/x86-opcode-map.txt @@ -996,8 +996,8 @@ AVXcode: 4 83: Grp1 Ev,Ib (1A),(es) # CTESTSCC instructions are: CTESTB, CTESTBE, CTESTF, CTESTL, CTESTLE, CTESTNB, CTESTNBE, CTESTNL, # CTESTNLE, CTESTNO, CTESTNS, CTESTNZ, CTESTO, CTESTS, CTESTT, CTESTZ -84: CTESTSCC (ev) -85: CTESTSCC (es) | CTESTSCC (66),(es) +84: CTESTSCC Eb,Gb (ev) +85: CTESTSCC Ev,Gv (es) | CTESTSCC Ev,Gv (66),(es) 88: POPCNT Gv,Ev (es) | POPCNT Gv,Ev (66),(es) 8f: POP2 Bq,Rq (000),(11B),(ev) a5: SHLD Ev,Gv,CL (es) | SHLD Ev,Gv,CL (66),(es) From 121f34341d396b666d8a90b24768b40e08ca0d61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Sat, 19 Apr 2025 13:13:59 +0200 Subject: [PATCH 1128/2924] riscv: Replace function-like macro by static inline function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The flush_icache_range() function is implemented as a "function-like macro with unused parameters", which can result in "unused variables" warnings. Replace the macro with a static inline function, as advised by Documentation/process/coding-style.rst. Fixes: 08f051eda33b ("RISC-V: Flush I$ when making a dirty page executable") Signed-off-by: Björn Töpel Link: https://lore.kernel.org/r/20250419111402.1660267-1-bjorn@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cacheflush.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index 8de73f91bfa371..b59ffeb668d6a5 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -34,11 +34,6 @@ static inline void flush_dcache_page(struct page *page) flush_dcache_folio(page_folio(page)); } -/* - * RISC-V doesn't have an instruction to flush parts of the instruction cache, - * so instead we just flush the whole thing. - */ -#define flush_icache_range(start, end) flush_icache_all() #define flush_icache_user_page(vma, pg, addr, len) \ do { \ if (vma->vm_flags & VM_EXEC) \ @@ -78,6 +73,16 @@ void flush_icache_mm(struct mm_struct *mm, bool local); #endif /* CONFIG_SMP */ +/* + * RISC-V doesn't have an instruction to flush parts of the instruction cache, + * so instead we just flush the whole thing. + */ +#define flush_icache_range flush_icache_range +static inline void flush_icache_range(unsigned long start, unsigned long end) +{ + flush_icache_all(); +} + extern unsigned int riscv_cbom_block_size; extern unsigned int riscv_cboz_block_size; void riscv_init_cbo_blocksizes(void); From 7d1d19a11cfbfd8bae1d89cc010b2cc397cd0c48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Sat, 19 Apr 2025 13:14:00 +0200 Subject: [PATCH 1129/2924] riscv: uprobes: Add missing fence.i after building the XOL buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The XOL (execute out-of-line) buffer is used to single-step the replaced instruction(s) for uprobes. The RISC-V port was missing a proper fence.i (i$ flushing) after constructing the XOL buffer, which can result in incorrect execution of stale/broken instructions. This was found running the BPF selftests "test_progs: uprobe_autoattach, attach_probe" on the Spacemit K1/X60, where the uprobes tests randomly blew up. Reviewed-by: Guo Ren Fixes: 74784081aac8 ("riscv: Add uprobes supported") Signed-off-by: Björn Töpel Link: https://lore.kernel.org/r/20250419111402.1660267-2-bjorn@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/probes/uprobes.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/riscv/kernel/probes/uprobes.c b/arch/riscv/kernel/probes/uprobes.c index 4b3dc8beaf77d3..cc15f7ca6cc17b 100644 --- a/arch/riscv/kernel/probes/uprobes.c +++ b/arch/riscv/kernel/probes/uprobes.c @@ -167,6 +167,7 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, /* Initialize the slot */ void *kaddr = kmap_atomic(page); void *dst = kaddr + (vaddr & ~PAGE_MASK); + unsigned long start = (unsigned long)dst; memcpy(dst, src, len); @@ -176,13 +177,6 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, *(uprobe_opcode_t *)dst = __BUG_INSN_32; } + flush_icache_range(start, start + len); kunmap_atomic(kaddr); - - /* - * We probably need flush_icache_user_page() but it needs vma. - * This should work on most of architectures by default. If - * architecture needs to do something different it can define - * its own version of the function. - */ - flush_dcache_page(page); } From b9e1f873d2e152b1c82dfc50943f4d3ca322f800 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 20 Apr 2025 12:15:15 -0400 Subject: [PATCH 1130/2924] bcachefs: Casefold is now a regular opts.h option Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 1 + fs/bcachefs/fs.c | 33 ++++++++++++++++++++++----------- fs/bcachefs/inode.h | 8 ++++++++ fs/bcachefs/inode_format.h | 9 +++++---- fs/bcachefs/namei.c | 4 ---- fs/bcachefs/opts.h | 5 +++++ fs/bcachefs/str_hash.h | 5 ++--- 7 files changed, 43 insertions(+), 22 deletions(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 377f04d92a1443..d6e4a496f02b64 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -867,6 +867,7 @@ LE64_BITMASK(BCH_SB_VERSION_INCOMPAT_ALLOWED, LE64_BITMASK(BCH_SB_SHARD_INUMS_NBITS, struct bch_sb, flags[6], 0, 4); LE64_BITMASK(BCH_SB_WRITE_ERROR_TIMEOUT,struct bch_sb, flags[6], 4, 14); LE64_BITMASK(BCH_SB_CSUM_ERR_RETRY_NR, struct bch_sb, flags[6], 14, 20); +LE64_BITMASK(BCH_SB_CASEFOLD, struct bch_sb, flags[6], 22, 23); static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb) { diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index e220e3cba29c73..7c1943c83bf6e9 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -53,16 +53,19 @@ static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum, struct bch_subvolume *); /* Set VFS inode flags from bcachefs inode: */ -static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode) +static inline void bch2_inode_flags_to_vfs(struct bch_fs *c, struct bch_inode_info *inode) { static const __maybe_unused unsigned bch_flags_to_vfs[] = { [__BCH_INODE_sync] = S_SYNC, [__BCH_INODE_immutable] = S_IMMUTABLE, [__BCH_INODE_append] = S_APPEND, [__BCH_INODE_noatime] = S_NOATIME, - [__BCH_INODE_casefolded] = S_CASEFOLD, }; + set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags); + + if (bch2_inode_casefold(c, &inode->ei_inode)) + inode->v.i_flags |= S_CASEFOLD; } void bch2_inode_update_after_write(struct btree_trans *trans, @@ -93,7 +96,7 @@ void bch2_inode_update_after_write(struct btree_trans *trans, inode->ei_inode = *bi; - bch2_inode_flags_to_vfs(inode); + bch2_inode_flags_to_vfs(c, inode); } int __must_check bch2_write_inode(struct bch_fs *c, @@ -1470,7 +1473,6 @@ static const __maybe_unused unsigned bch_flags_to_uflags[] = { [__BCH_INODE_append] = FS_APPEND_FL, [__BCH_INODE_nodump] = FS_NODUMP_FL, [__BCH_INODE_noatime] = FS_NOATIME_FL, - [__BCH_INODE_casefolded] = FS_CASEFOLD_FL, }; /* bcachefs inode flags -> FS_IOC_FSGETXATTR: */ @@ -1486,13 +1488,14 @@ static int bch2_fileattr_get(struct dentry *dentry, struct fileattr *fa) { struct bch_inode_info *inode = to_bch_ei(d_inode(dentry)); + struct bch_fs *c = inode->v.i_sb->s_fs_info; fileattr_fill_xflags(fa, map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags)); if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project)) fa->fsx_xflags |= FS_XFLAG_PROJINHERIT; - if (inode->ei_inode.bi_flags & BCH_INODE_casefolded) + if (bch2_inode_casefold(c, &inode->ei_inode)) fa->flags |= FS_CASEFOLD_FL; fa->fsx_projid = inode->ei_qid.q[QTYP_PRJ]; @@ -1504,6 +1507,8 @@ struct flags_set { unsigned flags; unsigned projid; bool set_project; + bool set_casefold; + bool casefold; }; static int fssetxattr_inode_update_fn(struct btree_trans *trans, @@ -1518,15 +1523,12 @@ static int fssetxattr_inode_update_fn(struct btree_trans *trans, * We're relying on btree locking here for exclusion with other ioctl * calls - use the flags in the btree (@bi), not inode->i_flags: */ - unsigned newflags = s->flags; - unsigned oldflags = bi->bi_flags & s->mask; - if (!S_ISREG(bi->bi_mode) && !S_ISDIR(bi->bi_mode) && - (newflags & (BCH_INODE_nodump|BCH_INODE_noatime)) != newflags) + (s->flags & (BCH_INODE_nodump|BCH_INODE_noatime)) != s->flags) return -EINVAL; - if ((newflags ^ oldflags) & BCH_INODE_casefolded) { + if (s->casefold != bch2_inode_casefold(c, bi)) { #ifdef CONFIG_UNICODE int ret = 0; /* Not supported on individual files. */ @@ -1546,6 +1548,10 @@ static int fssetxattr_inode_update_fn(struct btree_trans *trans, return ret; bch2_check_set_feature(c, BCH_FEATURE_casefolding); + + bi->bi_casefold = s->casefold + 1; + bi->bi_fields_set |= BIT(Inode_opt_casefold); + #else printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n"); return -EOPNOTSUPP; @@ -1558,7 +1564,7 @@ static int fssetxattr_inode_update_fn(struct btree_trans *trans, } bi->bi_flags &= ~s->mask; - bi->bi_flags |= newflags; + bi->bi_flags |= s->flags; bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v)); return 0; @@ -1598,6 +1604,11 @@ static int bch2_fileattr_set(struct mnt_idmap *idmap, if (fa->flags_valid) { s.mask = map_defined(bch_flags_to_uflags); + + s.set_casefold = true; + s.casefold = (fa->flags & FS_CASEFOLD_FL) != 0; + fa->flags &= ~FS_CASEFOLD_FL; + s.flags |= map_flags_rev(bch_flags_to_uflags, fa->flags); if (fa->flags) return -EOPNOTSUPP; diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index f82cfbf460d094..c74af15b14f281 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -243,6 +243,14 @@ static inline unsigned bkey_inode_mode(struct bkey_s_c k) } } +static inline bool bch2_inode_casefold(struct bch_fs *c, const struct bch_inode_unpacked *bi) +{ + /* inode apts are stored with a +1 bias: 0 means "unset, use fs opt" */ + return bi->bi_casefold + ? bi->bi_casefold - 1 + : c->opts.casefold; +} + /* i_nlink: */ static inline unsigned nlink_bias(umode_t mode) diff --git a/fs/bcachefs/inode_format.h b/fs/bcachefs/inode_format.h index 117110af1e3f2a..87e193e8ed2543 100644 --- a/fs/bcachefs/inode_format.h +++ b/fs/bcachefs/inode_format.h @@ -103,7 +103,8 @@ struct bch_inode_generation { x(bi_parent_subvol, 32) \ x(bi_nocow, 8) \ x(bi_depth, 32) \ - x(bi_inodes_32bit, 8) + x(bi_inodes_32bit, 8) \ + x(bi_casefold, 8) /* subset of BCH_INODE_FIELDS */ #define BCH_INODE_OPTS() \ @@ -117,7 +118,8 @@ struct bch_inode_generation { x(background_target, 16) \ x(erasure_code, 16) \ x(nocow, 8) \ - x(inodes_32bit, 8) + x(inodes_32bit, 8) \ + x(casefold, 8) enum inode_opt_id { #define x(name, ...) \ @@ -137,8 +139,7 @@ enum inode_opt_id { x(i_sectors_dirty, 6) \ x(unlinked, 7) \ x(backptr_untrusted, 8) \ - x(has_child_snapshot, 9) \ - x(casefolded, 10) + x(has_child_snapshot, 9) /* bits 20+ reserved for packed fields below: */ diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c index 0d65ea96f7a297..46f3c8b100a952 100644 --- a/fs/bcachefs/namei.c +++ b/fs/bcachefs/namei.c @@ -47,10 +47,6 @@ int bch2_create_trans(struct btree_trans *trans, if (ret) goto err; - /* Inherit casefold state from parent. */ - if (S_ISDIR(mode)) - new_inode->bi_flags |= dir_u->bi_flags & BCH_INODE_casefolded; - if (!(flags & BCH_CREATE_SNAPSHOT)) { /* Normal create path - allocate a new inode: */ bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u); diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 4d06313076ff65..dfb14810124c7c 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -228,6 +228,11 @@ enum fsck_err_opts { OPT_BOOL(), \ BCH_SB_ERASURE_CODE, false, \ NULL, "Enable erasure coding (DO NOT USE YET)") \ + x(casefold, u8, \ + OPT_FS|OPT_INODE|OPT_FORMAT, \ + OPT_BOOL(), \ + BCH_SB_CASEFOLD, false, \ + NULL, "Dirent lookups are casefolded") \ x(inodes_32bit, u8, \ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h index 09a354a26c3bd0..0c1a00539bd14a 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -33,7 +33,7 @@ bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt) struct bch_hash_info { u8 type; - struct unicode_map *cf_encoding; + struct unicode_map *cf_encoding; /* * For crc32 or crc64 string hashes the first key value of * the siphash_key (k0) is used as the key. @@ -44,11 +44,10 @@ struct bch_hash_info { static inline struct bch_hash_info bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi) { - /* XXX ick */ struct bch_hash_info info = { .type = INODE_STR_HASH(bi), #ifdef CONFIG_UNICODE - .cf_encoding = !!(bi->bi_flags & BCH_INODE_casefolded) ? c->cf_encoding : NULL, + .cf_encoding = bch2_inode_casefold(c, bi) ? c->cf_encoding : NULL, #endif .siphash_key = { .k0 = bi->bi_hash_seed } }; From 9cdde3c7aa3d5bfc7c7b5ec849e5a3288a66af35 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 24 Apr 2025 12:58:06 -0400 Subject: [PATCH 1131/2924] bcachefs: Fix casefold lookups Add casefolding to bch2_lookup_trans: During the delay between when casefolding was written and when it was merged, the main filesystem lookup path grew self healing - which meant it was no longer using bch2_dirent_lookup_trans(), where casefolding on lookups happens. Signed-off-by: Kent Overstreet --- fs/bcachefs/dirent.c | 16 ++-------------- fs/bcachefs/dirent.h | 15 +++++++++++++++ fs/bcachefs/fs.c | 11 ++++++++--- 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 8488a757811514..92ee59d9e00ece 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -13,8 +13,8 @@ #include -static int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, - const struct qstr *str, struct qstr *out_cf) +int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, + const struct qstr *str, struct qstr *out_cf) { *out_cf = (struct qstr) QSTR_INIT(NULL, 0); @@ -35,18 +35,6 @@ static int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info * #endif } -static inline int bch2_maybe_casefold(struct btree_trans *trans, - const struct bch_hash_info *info, - const struct qstr *str, struct qstr *out_cf) -{ - if (likely(!info->cf_encoding)) { - *out_cf = *str; - return 0; - } else { - return bch2_casefold(trans, info, str, out_cf); - } -} - static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d) { if (bkey_val_bytes(d.k) < offsetof(struct bch_dirent, d_name)) diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index 0880772b80a9b1..9838a7ba7ed1b5 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -23,6 +23,21 @@ struct bch_fs; struct bch_hash_info; struct bch_inode_info; +int bch2_casefold(struct btree_trans *, const struct bch_hash_info *, + const struct qstr *, struct qstr *); + +static inline int bch2_maybe_casefold(struct btree_trans *trans, + const struct bch_hash_info *info, + const struct qstr *str, struct qstr *out_cf) +{ + if (likely(!info->cf_encoding)) { + *out_cf = *str; + return 0; + } else { + return bch2_casefold(trans, info, str, out_cf); + } +} + struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d); static inline unsigned dirent_val_u64s(unsigned len, unsigned cf_len) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 7c1943c83bf6e9..c73e9705281634 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -648,13 +648,18 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans, const struct qstr *name) { struct bch_fs *c = trans->c; - struct btree_iter dirent_iter = {}; subvol_inum inum = {}; struct printbuf buf = PRINTBUF; + struct qstr lookup_name; + int ret = bch2_maybe_casefold(trans, dir_hash_info, name, &lookup_name); + if (ret) + return ERR_PTR(ret); + + struct btree_iter dirent_iter = {}; struct bkey_s_c k = bch2_hash_lookup(trans, &dirent_iter, bch2_dirent_hash_desc, - dir_hash_info, dir, name, 0); - int ret = bkey_err(k); + dir_hash_info, dir, &lookup_name, 0); + ret = bkey_err(k); if (ret) return ERR_PTR(ret); From 7cb85324c4f6817d5147b9d298c71bf9b5e87c6b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 24 Apr 2025 18:07:06 -0400 Subject: [PATCH 1132/2924] bcachefs: unlink: casefold d_invalidate casefolding results in additional aliases on lookup for the non-casefolded names - these need invalidating on unlink. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index c73e9705281634..5b716ffde500c7 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -847,6 +847,11 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, */ set_nlink(&inode->v, 0); } + + if (IS_CASEFOLDED(vdir)) { + d_invalidate(dentry); + d_prune_aliases(&inode->v); + } err: bch2_trans_put(trans); bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode); From caab547686d77733387fe734942846916374ddf2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 21 Apr 2025 21:16:24 -0400 Subject: [PATCH 1133/2924] bcachefs: Print mount opts earlier If we aren't mounting with the correct degraded option, it's helpful to know that before we fail to mount degraded. Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 76 ++++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 39 deletions(-) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 4060af4692f928..e4ab0595c0aef6 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1004,6 +1004,40 @@ static void print_mount_opts(struct bch_fs *c) printbuf_exit(&p); } +static bool bch2_fs_may_start(struct bch_fs *c) +{ + struct bch_dev *ca; + unsigned i, flags = 0; + + if (c->opts.very_degraded) + flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST; + + if (c->opts.degraded) + flags |= BCH_FORCE_IF_DEGRADED; + + if (!c->opts.degraded && + !c->opts.very_degraded) { + mutex_lock(&c->sb_lock); + + for (i = 0; i < c->disk_sb.sb->nr_devices; i++) { + if (!bch2_member_exists(c->disk_sb.sb, i)) + continue; + + ca = bch2_dev_locked(c, i); + + if (!bch2_dev_is_online(ca) && + (ca->mi.state == BCH_MEMBER_STATE_rw || + ca->mi.state == BCH_MEMBER_STATE_ro)) { + mutex_unlock(&c->sb_lock); + return false; + } + } + mutex_unlock(&c->sb_lock); + } + + return bch2_have_enough_devs(c, bch2_online_devs(c), flags, true); +} + int bch2_fs_start(struct bch_fs *c) { time64_t now = ktime_get_real_seconds(); @@ -1011,6 +1045,9 @@ int bch2_fs_start(struct bch_fs *c) print_mount_opts(c); + if (!bch2_fs_may_start(c)) + return -BCH_ERR_insufficient_devices_to_start; + down_write(&c->state_lock); mutex_lock(&c->sb_lock); @@ -1537,40 +1574,6 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca, } } -static bool bch2_fs_may_start(struct bch_fs *c) -{ - struct bch_dev *ca; - unsigned i, flags = 0; - - if (c->opts.very_degraded) - flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST; - - if (c->opts.degraded) - flags |= BCH_FORCE_IF_DEGRADED; - - if (!c->opts.degraded && - !c->opts.very_degraded) { - mutex_lock(&c->sb_lock); - - for (i = 0; i < c->disk_sb.sb->nr_devices; i++) { - if (!bch2_member_exists(c->disk_sb.sb, i)) - continue; - - ca = bch2_dev_locked(c, i); - - if (!bch2_dev_is_online(ca) && - (ca->mi.state == BCH_MEMBER_STATE_rw || - ca->mi.state == BCH_MEMBER_STATE_ro)) { - mutex_unlock(&c->sb_lock); - return false; - } - } - mutex_unlock(&c->sb_lock); - } - - return bch2_have_enough_devs(c, bch2_online_devs(c), flags, true); -} - static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca) { bch2_dev_io_ref_stop(ca, WRITE); @@ -2206,11 +2209,6 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, } up_write(&c->state_lock); - if (!bch2_fs_may_start(c)) { - ret = -BCH_ERR_insufficient_devices_to_start; - goto err_print; - } - if (!c->opts.nostart) { ret = bch2_fs_start(c); if (ret) From 394ef278e1fdadc1f27acd8c0549a8831dfc1833 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 22 Apr 2025 04:44:00 -0400 Subject: [PATCH 1134/2924] bcachefs: Unit test fixes The peek_end() tests expect an empty btree. Signed-off-by: Kent Overstreet --- fs/bcachefs/tests.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index c265b102267ab2..782a05fe7656b5 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -342,6 +342,8 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) */ static int test_peek_end(struct bch_fs *c, u64 nr) { + delete_test_keys(c); + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; @@ -362,6 +364,8 @@ static int test_peek_end(struct bch_fs *c, u64 nr) static int test_peek_end_extents(struct bch_fs *c, u64 nr) { + delete_test_keys(c); + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; From c4f89a1d3590243ef1bbd55557a1f55a4a93927d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 22 Apr 2025 04:45:25 -0400 Subject: [PATCH 1135/2924] bcachefs: Make btree_iter_peek_prev() assert more precise The issue this assert is guarding against is that in BTREE_ITER_filter_snapshots mode we only want to be iterating within a single inode number - if we iterate into another inode number with keys for a different snapshot tree, we'll loop arbitrarily long before finding a key we can return. This comes up in the unit tests, where we're using inode 0 for our test keys. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index e34e9598ef25ab..400ff650553b4d 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2602,7 +2602,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct bch2_trans_verify_not_unlocked_or_in_restart(trans); bch2_btree_iter_verify_entry_exit(iter); - EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bpos_eq(end, POS_MIN)); + EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && iter->pos.inode != end.inode); int ret = trans_maybe_inject_restart(trans, _RET_IP_); if (unlikely(ret)) { From 353739f1d1675692d9de01483c75c15b314710ac Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 22 Apr 2025 04:54:54 -0400 Subject: [PATCH 1136/2924] bcachefs: Fix btree_iter_peek_prev() at end of inode At the end of the inode, on an extents iterator, peek_slot() has to advance to the next position to avoid returning a 0 size extent, which is not allowed. Changing iter->pos confuses peek_prev(), but we don't need to call peek_slot() in this case. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 400ff650553b4d..59fa527ac68548 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2577,7 +2577,10 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct struct bpos end) { if ((iter->flags & (BTREE_ITER_is_extents|BTREE_ITER_filter_snapshots)) && - !bkey_eq(iter->pos, POS_MAX)) { + !bkey_eq(iter->pos, POS_MAX) && + !((iter->flags & BTREE_ITER_is_extents) && + iter->pos.offset == U64_MAX)) { + /* * bkey_start_pos(), for extents, is not monotonically * increasing until after filtering for snapshots: From 28d2d19ccc8e36dacdd65303b051972926943394 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 15 Jan 2024 12:26:56 -0500 Subject: [PATCH 1137/2924] bcachefs: drop duplicate fiemap sync flag FIEMAP_FLAG_SYNC handling was deliberately moved into core code in commit 45dd052e67ad ("fs: handle FIEMAP_FLAG_SYNC in fiemap_prep"), released in kernel v5.8. Update bcachefs accordingly. Signed-off-by: Brian Foster --- fs/bcachefs/fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 5b716ffde500c7..59d919aeda74f4 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1330,7 +1330,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, bool have_extent = false; int ret = 0; - ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC); + ret = fiemap_prep(&ei->v, info, start, &len, 0); if (ret) return ret; From d020a9fb11bd85f4f16392e2a44c46ae9778b3ee Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 15 Jan 2024 14:21:15 -0500 Subject: [PATCH 1138/2924] bcachefs: track current fiemap offset in start variable Signed-off-by: Brian Foster --- fs/bcachefs/fs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 59d919aeda74f4..dabec16f263910 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1387,6 +1387,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, bch2_bkey_buf_realloc(&prev, c, k.k->u64s); sectors = min_t(unsigned, sectors, k.k->size - offset_into_extent); + start = iter.pos.offset + sectors; bch2_cut_front(POS(k.k->p.inode, bkey_start_offset(k.k) + @@ -1407,8 +1408,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, bkey_copy(prev.k, cur.k); have_extent = true; - bch2_btree_iter_set_pos(trans, &iter, - POS(iter.pos.inode, iter.pos.offset + sectors)); + bch2_btree_iter_set_pos(trans, &iter, POS(iter.pos.inode, start)); } bch2_trans_iter_exit(trans, &iter); From 2d55a637095d0eaaad609b8a518589ead34487b3 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 15 Jan 2024 14:27:49 -0500 Subject: [PATCH 1139/2924] bcachefs: refactor fiemap processing into extent helper and struct The bulk of the loop in bch2_fiemap() involves processing the current extent key from the iter, including following indirections and trimming the extent size and such. This patch makes a few changes to reduce the size of the loop and facilitate future changes to support delalloc extents. Define a new bch_fiemap_extent structure to wrap the bkey buffer that holds the extent key to report to userspace along with associated fiemap flags. Update bch2_fill_extent() to take the bch_fiemap_extent as a param instead of the individual fields. Finally, lift the bulk of the extent processing into a bch2_fiemap_extent() helper that takes the current key and formats the bch_fiemap_extent appropriately for the fill function. No functional changes intended by this patch. Signed-off-by: Brian Foster --- fs/bcachefs/fs.c | 87 +++++++++++++++++++++++++++++------------------- 1 file changed, 53 insertions(+), 34 deletions(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index dabec16f263910..00698457dee5b3 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1262,10 +1262,18 @@ static int bch2_tmpfile(struct mnt_idmap *idmap, return finish_open_simple(file, 0); } +struct bch_fiemap_extent { + struct bkey_buf kbuf; + unsigned flags; +}; + static int bch2_fill_extent(struct bch_fs *c, struct fiemap_extent_info *info, - struct bkey_s_c k, unsigned flags) + struct bch_fiemap_extent *fe) { + struct bkey_s_c k = bkey_i_to_s_c(fe->kbuf.k); + unsigned flags = fe->flags; + if (bkey_extent_is_direct_data(k.k)) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; @@ -1318,6 +1326,36 @@ static int bch2_fill_extent(struct bch_fs *c, } } +static int bch2_fiemap_extent(struct btree_trans *trans, + struct btree_iter *iter, struct bkey_s_c k, + struct bch_fiemap_extent *cur) +{ + s64 offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); + unsigned sectors = k.k->size - offset_into_extent; + + bch2_bkey_buf_reassemble(&cur->kbuf, trans->c, k); + + enum btree_id data_btree = BTREE_ID_extents; + int ret = bch2_read_indirect_extent(trans, &data_btree, &offset_into_extent, + &cur->kbuf); + if (ret) + return ret; + + k = bkey_i_to_s_c(cur->kbuf.k); + sectors = min_t(unsigned, sectors, k.k->size - offset_into_extent); + + bch2_cut_front(POS(k.k->p.inode, + bkey_start_offset(k.k) + offset_into_extent), + cur->kbuf.k); + bch2_key_resize(&cur->kbuf.k->k, sectors); + cur->kbuf.k->k.p = iter->pos; + cur->kbuf.k->k.p.offset += cur->kbuf.k->k.size; + + cur->flags = 0; + + return 0; +} + static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, u64 start, u64 len) { @@ -1326,7 +1364,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, struct btree_trans *trans; struct btree_iter iter; struct bkey_s_c k; - struct bkey_buf cur, prev; + struct bch_fiemap_extent cur, prev; bool have_extent = false; int ret = 0; @@ -1340,15 +1378,14 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, start >>= 9; - bch2_bkey_buf_init(&cur); - bch2_bkey_buf_init(&prev); + bch2_bkey_buf_init(&cur.kbuf); + bch2_bkey_buf_init(&prev.kbuf); trans = bch2_trans_get(c); bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, POS(ei->v.i_ino, start), 0); while (!ret || bch2_err_matches(ret, BCH_ERR_transaction_restart)) { - enum btree_id data_btree = BTREE_ID_extents; bch2_trans_begin(trans); @@ -1373,39 +1410,21 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, continue; } - s64 offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); - unsigned sectors = k.k->size - offset_into_extent; - - bch2_bkey_buf_reassemble(&cur, c, k); - - ret = bch2_read_indirect_extent(trans, &data_btree, - &offset_into_extent, &cur); + ret = bch2_fiemap_extent(trans, &iter, k, &cur); if (ret) - continue; - - k = bkey_i_to_s_c(cur.k); - bch2_bkey_buf_realloc(&prev, c, k.k->u64s); - - sectors = min_t(unsigned, sectors, k.k->size - offset_into_extent); - start = iter.pos.offset + sectors; - - bch2_cut_front(POS(k.k->p.inode, - bkey_start_offset(k.k) + - offset_into_extent), - cur.k); - bch2_key_resize(&cur.k->k, sectors); - cur.k->k.p = iter.pos; - cur.k->k.p.offset += cur.k->k.size; + break; + bch2_bkey_buf_realloc(&prev.kbuf, c, cur.kbuf.k->k.u64s); + start = cur.kbuf.k->k.p.offset; if (have_extent) { bch2_trans_unlock(trans); - ret = bch2_fill_extent(c, info, - bkey_i_to_s_c(prev.k), 0); + ret = bch2_fill_extent(c, info, &prev); if (ret) break; } - bkey_copy(prev.k, cur.k); + bkey_copy(prev.kbuf.k, cur.kbuf.k); + prev.flags = cur.flags; have_extent = true; bch2_btree_iter_set_pos(trans, &iter, POS(iter.pos.inode, start)); @@ -1414,13 +1433,13 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, if (!ret && have_extent) { bch2_trans_unlock(trans); - ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k), - FIEMAP_EXTENT_LAST); + prev.flags |= FIEMAP_EXTENT_LAST; + ret = bch2_fill_extent(c, info, &prev); } bch2_trans_put(trans); - bch2_bkey_buf_exit(&cur, c); - bch2_bkey_buf_exit(&prev, c); + bch2_bkey_buf_exit(&cur.kbuf, c); + bch2_bkey_buf_exit(&prev.kbuf, c); return ret < 0 ? ret : 0; } From b9b0494017b5f6d0664ecbcd2d8870800f045581 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 15 Jan 2024 14:46:00 -0500 Subject: [PATCH 1140/2924] bcachefs: add fiemap delalloc extent detection bcachefs currently populates fiemap data from the extents btree. This works correctly when the fiemap sync flag is provided, but if not, it skips all delalloc extents that have not yet been flushed. This is because delalloc extents from buffered writes are first stored as reservation in the pagecache, and only become resident in the extents btree after writeback completes. Update the fiemap implementation to process holes between extents by scanning pagecache for data, via seek data/hole. If a valid data range is found over a hole in the extent btree, fake up an extent key and flag the extent as delalloc for reporting to userspace. Note that this does not necessarily change behavior for the case where there is dirty pagecache over already written extents, where when in COW mode, writeback will allocate new blocks for the underlying ranges. The existing behavior is consistent with btrfs and it is recommended to use the sync flag for the most up to date extent state from fiemap. Signed-off-by: Brian Foster --- fs/bcachefs/fs.c | 119 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 112 insertions(+), 7 deletions(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 00698457dee5b3..f5a9f9e8c457cf 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1356,6 +1356,87 @@ static int bch2_fiemap_extent(struct btree_trans *trans, return 0; } +/* + * Scan a range of an inode for data in pagecache. + * + * Intended to be retryable, so don't modify the output params until success is + * imminent. + */ +static int +bch2_fiemap_hole_pagecache(struct inode *vinode, u64 *start, u64 *end, + bool nonblock) +{ + loff_t dstart, dend; + + dstart = bch2_seek_pagecache_data(vinode, *start, *end, 0, nonblock); + if (dstart < 0) + return dstart; + if (dstart >= *end) + return -ENOENT; + + dend = bch2_seek_pagecache_hole(vinode, dstart, *end, 0, nonblock); + if (dend < 0) + return dend; + + *start = dstart; + *end = dend; + return 0; +} + +/* + * Scan a range of pagecache that corresponds to a file mapping hole in the + * extent btree. If data is found, fake up an extent key so it looks like a + * delalloc extent to the rest of the fiemap processing code. + * + * Returns 0 if cached data was found, -ENOENT if not. + */ +static int +bch2_fiemap_hole(struct btree_trans *trans, struct inode *vinode, u64 start, + u64 end, struct bch_fiemap_extent *cur) +{ + struct bch_fs *c = vinode->i_sb->s_fs_info; + struct bch_inode_info *ei = to_bch_ei(vinode); + struct bkey_i_extent *delextent; + struct bch_extent_ptr ptr = {}; + loff_t dstart = start, dend = end; + int ret; + + /* + * We hold btree locks here so we cannot block on folio locks without + * dropping trans locks first. Run a nonblocking scan for the common + * case of no folios over holes and fall back on failure. + * + * Note that dropping locks like this is technically racy against + * writeback inserting to the extent tree, but a non-sync fiemap scan is + * fundamentally racy with writeback anyways. Therefore, just report the + * range as delalloc regardless of whether we have to cycle trans locks. + */ + ret = bch2_fiemap_hole_pagecache(vinode, &dstart, &dend, true); + if (ret == -EAGAIN) { + /* open coded drop_locks_do() to relock even on error */ + bch2_trans_unlock(trans); + ret = bch2_fiemap_hole_pagecache(vinode, &dstart, &dend, false); + bch2_trans_relock(trans); + } + if (ret < 0) + return ret; + + /* + * Create a fake extent key in the buffer. We have to add a dummy extent + * pointer for the fill code to add an extent entry. It's explicitly + * zeroed to reflect delayed allocation (i.e. phys offset 0). + */ + bch2_bkey_buf_realloc(&cur->kbuf, c, sizeof(*delextent) / sizeof(u64)); + delextent = bkey_extent_init(cur->kbuf.k); + delextent->k.p = POS(ei->v.i_ino, dstart >> 9); + bch2_key_resize(&delextent->k, (dend - dstart) >> 9); + bch2_bkey_append_ptr(&delextent->k_i, ptr); + + cur->flags = FIEMAP_EXTENT_DELALLOC; + + return 0; +} + static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, u64 start, u64 len) { @@ -1386,6 +1467,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, POS(ei->v.i_ino, start), 0); while (!ret || bch2_err_matches(ret, BCH_ERR_transaction_restart)) { + bool have_delalloc = false; bch2_trans_begin(trans); @@ -1404,15 +1486,38 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, if (!k.k) break; - if (!bkey_extent_is_data(k.k) && - k.k->type != KEY_TYPE_reservation) { - bch2_btree_iter_advance(trans, &iter); - continue; + /* + * If a hole exists before the start of the extent key, scan the + * range for pagecache data that might be pending writeback and + * thus not yet exist in the extent tree. + */ + if (iter.pos.offset > start) { + ret = bch2_fiemap_hole(trans, vinode, start << 9, + iter.pos.offset << 9, &cur); + if (!ret) + have_delalloc = true; + else if (ret != -ENOENT) + break; } - ret = bch2_fiemap_extent(trans, &iter, k, &cur); - if (ret) - break; + /* process the current key if there's no delalloc to report */ + if (!have_delalloc) { + if (!bkey_extent_is_data(k.k) && + k.k->type != KEY_TYPE_reservation) { + start = bkey_start_offset(k.k) + k.k->size; + bch2_btree_iter_advance(trans, &iter); + continue; + } + + ret = bch2_fiemap_extent(trans, &iter, k, &cur); + if (ret) + break; + } + + /* + * Store the current extent in prev so we can flag the last + * extent on the way out. + */ bch2_bkey_buf_realloc(&prev.kbuf, c, cur.kbuf.k->k.u64s); start = cur.kbuf.k->k.p.offset; From d1b0f9aa73fe50ee5276708e33d77c4e7054e555 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 22 Apr 2025 11:52:45 -0400 Subject: [PATCH 1141/2924] bcachefs: Rework fiemap transaction restart handling Restart handling in the previous patch was incorrect, so: move btree operations into a separate helper, and run it with a lockrestart_do(). Additionally, clarify whether pagecache or the btree takes precedence. Right now, the btree takes precedence: this is incorrect, but it's needed to pass fstests. Add a giant comment explaining why. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 225 +++++++++++++++++++++++------------------------ 1 file changed, 112 insertions(+), 113 deletions(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index f5a9f9e8c457cf..0f1d61aab90bd5 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1274,6 +1274,8 @@ static int bch2_fill_extent(struct bch_fs *c, struct bkey_s_c k = bkey_i_to_s_c(fe->kbuf.k); unsigned flags = fe->flags; + BUG_ON(!k.k->size); + if (bkey_extent_is_direct_data(k.k)) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; @@ -1326,36 +1328,6 @@ static int bch2_fill_extent(struct bch_fs *c, } } -static int bch2_fiemap_extent(struct btree_trans *trans, - struct btree_iter *iter, struct bkey_s_c k, - struct bch_fiemap_extent *cur) -{ - s64 offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); - unsigned sectors = k.k->size - offset_into_extent; - - bch2_bkey_buf_reassemble(&cur->kbuf, trans->c, k); - - enum btree_id data_btree = BTREE_ID_extents; - int ret = bch2_read_indirect_extent(trans, &data_btree, &offset_into_extent, - &cur->kbuf); - if (ret) - return ret; - - k = bkey_i_to_s_c(cur->kbuf.k); - sectors = min_t(unsigned, sectors, k.k->size - offset_into_extent); - - bch2_cut_front(POS(k.k->p.inode, - bkey_start_offset(k.k) + offset_into_extent), - cur->kbuf.k); - bch2_key_resize(&cur->kbuf.k->k, sectors); - cur->kbuf.k->k.p = iter->pos; - cur->kbuf.k->k.p.offset += cur->kbuf.k->k.size; - - cur->flags = 0; - - return 0; -} - /* * Scan a range of an inode for data in pagecache. * @@ -1371,13 +1343,19 @@ bch2_fiemap_hole_pagecache(struct inode *vinode, u64 *start, u64 *end, dstart = bch2_seek_pagecache_data(vinode, *start, *end, 0, nonblock); if (dstart < 0) return dstart; - if (dstart >= *end) - return -ENOENT; + + if (dstart == *end) { + *start = dstart; + return 0; + } dend = bch2_seek_pagecache_hole(vinode, dstart, *end, 0, nonblock); if (dend < 0) return dend; + /* race */ + BUG_ON(dstart == dend); + *start = dstart; *end = dend; return 0; @@ -1387,18 +1365,15 @@ bch2_fiemap_hole_pagecache(struct inode *vinode, u64 *start, u64 *end, * Scan a range of pagecache that corresponds to a file mapping hole in the * extent btree. If data is found, fake up an extent key so it looks like a * delalloc extent to the rest of the fiemap processing code. - * - * Returns 0 if cached data was found, -ENOENT if not. */ static int -bch2_fiemap_hole(struct btree_trans *trans, struct inode *vinode, u64 start, - u64 end, struct bch_fiemap_extent *cur) +bch2_next_fiemap_pagecache_extent(struct btree_trans *trans, struct bch_inode_info *inode, + u64 start, u64 end, struct bch_fiemap_extent *cur) { - struct bch_fs *c = vinode->i_sb->s_fs_info; - struct bch_inode_info *ei = to_bch_ei(vinode); + struct bch_fs *c = trans->c; struct bkey_i_extent *delextent; struct bch_extent_ptr ptr = {}; - loff_t dstart = start, dend = end; + loff_t dstart = start << 9, dend = end << 9; int ret; /* @@ -1411,13 +1386,10 @@ bch2_fiemap_hole(struct btree_trans *trans, struct inode *vinode, u64 start, * fundamentally racy with writeback anyways. Therefore, just report the * range as delalloc regardless of whether we have to cycle trans locks. */ - ret = bch2_fiemap_hole_pagecache(vinode, &dstart, &dend, true); - if (ret == -EAGAIN) { - /* open coded drop_locks_do() to relock even on error */ - bch2_trans_unlock(trans); - ret = bch2_fiemap_hole_pagecache(vinode, &dstart, &dend, false); - bch2_trans_relock(trans); - } + ret = bch2_fiemap_hole_pagecache(&inode->v, &dstart, &dend, true); + if (ret == -EAGAIN) + ret = drop_locks_do(trans, + bch2_fiemap_hole_pagecache(&inode->v, &dstart, &dend, false)); if (ret < 0) return ret; @@ -1428,8 +1400,8 @@ bch2_fiemap_hole(struct btree_trans *trans, struct inode *vinode, u64 start, */ bch2_bkey_buf_realloc(&cur->kbuf, c, sizeof(*delextent) / sizeof(u64)); delextent = bkey_extent_init(cur->kbuf.k); - delextent->k.p = POS(ei->v.i_ino, dstart >> 9); - bch2_key_resize(&delextent->k, (dend - dstart) >> 9); + delextent->k.p = POS(inode->ei_inum.inum, dend >> 9); + delextent->k.size = (dend - dstart) >> 9; bch2_bkey_append_ptr(&delextent->k_i, ptr); cur->flags = FIEMAP_EXTENT_DELALLOC; @@ -1437,115 +1409,142 @@ bch2_fiemap_hole(struct btree_trans *trans, struct inode *vinode, u64 start, return 0; } +static int bch2_next_fiemap_extent(struct btree_trans *trans, + struct bch_inode_info *inode, + u64 start, u64 end, + struct bch_fiemap_extent *cur) +{ + u32 snapshot; + int ret = bch2_subvolume_get_snapshot(trans, inode->ei_inum.subvol, &snapshot); + if (ret) + return ret; + + struct btree_iter iter; + bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, + SPOS(inode->ei_inum.inum, start, snapshot), 0); + + struct bkey_s_c k = + bch2_btree_iter_peek_max(trans, &iter, POS(inode->ei_inum.inum, end)); + ret = bkey_err(k); + if (ret) + goto err; + + ret = bch2_next_fiemap_pagecache_extent(trans, inode, start, end, cur); + if (ret) + goto err; + + struct bpos pagecache_start = bkey_start_pos(&cur->kbuf.k->k); + + /* + * Does the pagecache or the btree take precedence? + * + * It _should_ be the pagecache, so that we correctly report delalloc + * extents when dirty in the pagecache (we're COW, after all). + * + * But we'd have to add per-sector writeback tracking to + * bch_folio_state, otherwise we report delalloc extents for clean + * cached data in the pagecache. + * + * We should do this, but even then fiemap won't report stable mappings: + * on bcachefs data moves around in the background (copygc, rebalance) + * and we don't provide a way for userspace to lock that out. + */ + if (k.k && + bkey_le(bpos_max(iter.pos, bkey_start_pos(k.k)), + pagecache_start)) { + bch2_bkey_buf_reassemble(&cur->kbuf, trans->c, k); + bch2_cut_front(iter.pos, cur->kbuf.k); + bch2_cut_back(POS(inode->ei_inum.inum, end), cur->kbuf.k); + cur->flags = 0; + } else if (k.k) { + bch2_cut_back(bkey_start_pos(k.k), cur->kbuf.k); + } + + if (cur->kbuf.k->k.type == KEY_TYPE_reflink_p) { + unsigned sectors = cur->kbuf.k->k.size; + s64 offset_into_extent = 0; + enum btree_id data_btree = BTREE_ID_extents; + int ret = bch2_read_indirect_extent(trans, &data_btree, &offset_into_extent, + &cur->kbuf); + if (ret) + goto err; + + struct bkey_i *k = cur->kbuf.k; + sectors = min_t(unsigned, sectors, k->k.size - offset_into_extent); + + bch2_cut_front(POS(k->k.p.inode, + bkey_start_offset(&k->k) + offset_into_extent), + k); + bch2_key_resize(&k->k, sectors); + k->k.p = iter.pos; + k->k.p.offset += k->k.size; + } +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, u64 start, u64 len) { struct bch_fs *c = vinode->i_sb->s_fs_info; struct bch_inode_info *ei = to_bch_ei(vinode); struct btree_trans *trans; - struct btree_iter iter; - struct bkey_s_c k; struct bch_fiemap_extent cur, prev; - bool have_extent = false; int ret = 0; ret = fiemap_prep(&ei->v, info, start, &len, 0); if (ret) return ret; - struct bpos end = POS(ei->v.i_ino, (start + len) >> 9); if (start + len < start) return -EINVAL; start >>= 9; + u64 end = (start + len) >> 9; bch2_bkey_buf_init(&cur.kbuf); bch2_bkey_buf_init(&prev.kbuf); - trans = bch2_trans_get(c); - - bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, - POS(ei->v.i_ino, start), 0); + bkey_init(&prev.kbuf.k->k); - while (!ret || bch2_err_matches(ret, BCH_ERR_transaction_restart)) { - bool have_delalloc = false; - - bch2_trans_begin(trans); + trans = bch2_trans_get(c); - u32 snapshot; - ret = bch2_subvolume_get_snapshot(trans, ei->ei_inum.subvol, &snapshot); + while (start < end) { + ret = lockrestart_do(trans, + bch2_next_fiemap_extent(trans, ei, start, end, &cur)); if (ret) - continue; - - bch2_btree_iter_set_snapshot(trans, &iter, snapshot); + goto err; - k = bch2_btree_iter_peek_max(trans, &iter, end); - ret = bkey_err(k); - if (ret) - continue; + BUG_ON(bkey_start_offset(&cur.kbuf.k->k) < start); + BUG_ON(cur.kbuf.k->k.p.offset > end); - if (!k.k) + if (bkey_start_offset(&cur.kbuf.k->k) == end) break; - /* - * If a hole exists before the start of the extent key, scan the - * range for pagecache data that might be pending writeback and - * thus not yet exist in the extent tree. - */ - if (iter.pos.offset > start) { - ret = bch2_fiemap_hole(trans, vinode, start << 9, - iter.pos.offset << 9, &cur); - if (!ret) - have_delalloc = true; - else if (ret != -ENOENT) - break; - } - - /* process the current key if there's no delalloc to report */ - if (!have_delalloc) { - if (!bkey_extent_is_data(k.k) && - k.k->type != KEY_TYPE_reservation) { - start = bkey_start_offset(k.k) + k.k->size; - bch2_btree_iter_advance(trans, &iter); - continue; - } - - ret = bch2_fiemap_extent(trans, &iter, k, &cur); - if (ret) - break; - } - - /* - * Store the current extent in prev so we can flag the last - * extent on the way out. - */ - bch2_bkey_buf_realloc(&prev.kbuf, c, cur.kbuf.k->k.u64s); start = cur.kbuf.k->k.p.offset; - if (have_extent) { + if (!bkey_deleted(&prev.kbuf.k->k)) { bch2_trans_unlock(trans); ret = bch2_fill_extent(c, info, &prev); if (ret) - break; + goto err; } - bkey_copy(prev.kbuf.k, cur.kbuf.k); + bch2_bkey_buf_copy(&prev.kbuf, c, cur.kbuf.k); prev.flags = cur.flags; - have_extent = true; - - bch2_btree_iter_set_pos(trans, &iter, POS(iter.pos.inode, start)); } - bch2_trans_iter_exit(trans, &iter); - if (!ret && have_extent) { + if (!bkey_deleted(&prev.kbuf.k->k)) { bch2_trans_unlock(trans); prev.flags |= FIEMAP_EXTENT_LAST; ret = bch2_fill_extent(c, info, &prev); } - +err: bch2_trans_put(trans); bch2_bkey_buf_exit(&cur.kbuf, c); bch2_bkey_buf_exit(&prev.kbuf, c); - return ret < 0 ? ret : 0; + + return bch2_err_class(ret < 0 ? ret : 0); } static const struct vm_operations_struct bch_vm_ops = { From a1356ac7749cafc4e27aa62c0c4604b5dca4983e Mon Sep 17 00:00:00 2001 From: "e.kubanski" Date: Wed, 16 Apr 2025 12:19:08 +0200 Subject: [PATCH 1142/2924] xsk: Fix race condition in AF_XDP generic RX path Move rx_lock from xsk_socket to xsk_buff_pool. Fix synchronization for shared umem mode in generic RX path where multiple sockets share single xsk_buff_pool. RX queue is exclusive to xsk_socket, while FILL queue can be shared between multiple sockets. This could result in race condition where two CPU cores access RX path of two different sockets sharing the same umem. Protect both queues by acquiring spinlock in shared xsk_buff_pool. Lock contention may be minimized in the future by some per-thread FQ buffering. It's safe and necessary to move spin_lock_bh(rx_lock) after xsk_rcv_check(): * xs->pool and spinlock_init is synchronized by xsk_bind() -> xsk_is_bound() memory barriers. * xsk_rcv_check() may return true at the moment of xsk_release() or xsk_unbind_dev(), however this will not cause any data races or race conditions. xsk_unbind_dev() removes xdp socket from all maps and waits for completion of all outstanding rx operations. Packets in RX path will either complete safely or drop. Signed-off-by: Eryk Kubanski Fixes: bf0bdd1343efb ("xdp: fix race on generic receive path") Acked-by: Magnus Karlsson Link: https://patch.msgid.link/20250416101908.10919-1-e.kubanski@partner.samsung.com Signed-off-by: Jakub Kicinski --- include/net/xdp_sock.h | 3 --- include/net/xsk_buff_pool.h | 2 ++ net/xdp/xsk.c | 6 +++--- net/xdp/xsk_buff_pool.c | 1 + 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index a58ae7589d1212..e8bd6ddb7b1275 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -71,9 +71,6 @@ struct xdp_sock { */ u32 tx_budget_spent; - /* Protects generic receive. */ - spinlock_t rx_lock; - /* Statistics */ u64 rx_dropped; u64 rx_queue_full; diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 1dcd4d71468a5f..3b243ea70e38a9 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -53,6 +53,8 @@ struct xsk_buff_pool { refcount_t users; struct xdp_umem *umem; struct work_struct work; + /* Protects generic receive in shared and non-shared umem mode. */ + spinlock_t rx_lock; struct list_head free_list; struct list_head xskb_list; u32 heads_cnt; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 5696af45bcf711..4abc81f33d3ebe 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -338,13 +338,14 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) u32 len = xdp_get_buff_len(xdp); int err; - spin_lock_bh(&xs->rx_lock); err = xsk_rcv_check(xs, xdp, len); if (!err) { + spin_lock_bh(&xs->pool->rx_lock); err = __xsk_rcv(xs, xdp, len); xsk_flush(xs); + spin_unlock_bh(&xs->pool->rx_lock); } - spin_unlock_bh(&xs->rx_lock); + return err; } @@ -1734,7 +1735,6 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol, xs = xdp_sk(sk); xs->state = XSK_READY; mutex_init(&xs->mutex); - spin_lock_init(&xs->rx_lock); INIT_LIST_HEAD(&xs->map_list); spin_lock_init(&xs->map_list_lock); diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 25a76c5ce0f128..c5181a9044add5 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -89,6 +89,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, pool->addrs = umem->addrs; pool->tx_metadata_len = umem->tx_metadata_len; pool->tx_sw_csum = umem->flags & XDP_UMEM_TX_SW_CSUM; + spin_lock_init(&pool->rx_lock); INIT_LIST_HEAD(&pool->free_list); INIT_LIST_HEAD(&pool->xskb_list); INIT_LIST_HEAD(&pool->xsk_tx_list); From bf20af07909925ec0ae6cd4f3b7be0279dfa8768 Mon Sep 17 00:00:00 2001 From: "e.kubanski" Date: Wed, 16 Apr 2025 13:29:25 +0200 Subject: [PATCH 1143/2924] xsk: Fix offset calculation in unaligned mode Bring back previous offset calculation behaviour in AF_XDP unaligned umem mode. In unaligned mode, upper 16 bits should contain data offset, lower 48 bits should contain only specific chunk location without offset. Remove pool->headroom duplication into 48bit address. Signed-off-by: Eryk Kubanski Fixes: bea14124bacb ("xsk: Get rid of xdp_buff_xsk::orig_addr") Acked-by: Magnus Karlsson Link: https://patch.msgid.link/20250416112925.7501-1-e.kubanski@partner.samsung.com Signed-off-by: Jakub Kicinski --- include/net/xsk_buff_pool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 3b243ea70e38a9..cac56e6b0869b4 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -240,8 +240,8 @@ static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb, return orig_addr; offset = xskb->xdp.data - xskb->xdp.data_hard_start; - orig_addr -= offset; offset += pool->headroom; + orig_addr -= offset; return orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT); } From eacc77a73275895eca0e3655dc6c671853500e2e Mon Sep 17 00:00:00 2001 From: Vlad Dogaru Date: Wed, 23 Apr 2025 11:36:07 +0300 Subject: [PATCH 1144/2924] net/mlx5e: Use custom tunnel header for vxlan gbp Symbolic (e.g. "vxlan") and custom (e.g. "tunnel_header_0") tunnels cannot be combined, but the match params interface does not have fields for matching on vxlan gbp. To match vxlan bgp, the tc_tun layer uses tunnel_header_0. Allow matching on both VNI and GBP by matching the VNI with a custom tunnel header instead of the symbolic field name. Matching solely on the VNI continues to use the symbolic field name. Fixes: 74a778b4a63f ("net/mlx5: HWS, added definers handling") Signed-off-by: Vlad Dogaru Reviewed-by: Yevgeny Kliteynik Signed-off-by: Mark Bloch Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20250423083611.324567-2-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en/tc_tun_vxlan.c | 32 +++++++++++++++++-- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c index 5c762a71818db0..7a18a469961db8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c @@ -165,9 +165,6 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, struct flow_match_enc_keyid enc_keyid; void *misc_c, *misc_v; - misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); - misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) return 0; @@ -182,6 +179,30 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, err = mlx5e_tc_tun_parse_vxlan_gbp_option(priv, spec, f); if (err) return err; + + /* We can't mix custom tunnel headers with symbolic ones and we + * don't have a symbolic field name for GBP, so we use custom + * tunnel headers in this case. We need hardware support to + * match on custom tunnel headers, but we already know it's + * supported because the previous call successfully checked for + * that. + */ + misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_5); + misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_5); + + /* Shift by 8 to account for the reserved bits in the vxlan + * header after the VNI. + */ + MLX5_SET(fte_match_set_misc5, misc_c, tunnel_header_1, + be32_to_cpu(enc_keyid.mask->keyid) << 8); + MLX5_SET(fte_match_set_misc5, misc_v, tunnel_header_1, + be32_to_cpu(enc_keyid.key->keyid) << 8); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_5; + + return 0; } /* match on VNI is required */ @@ -195,6 +216,11 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, return -EOPNOTSUPP; } + misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni, be32_to_cpu(enc_keyid.mask->keyid)); MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, From 5d1a04f347e6cbf5ffe74da409a5d71fbe8c5f19 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 23 Apr 2025 11:36:08 +0300 Subject: [PATCH 1145/2924] net/mlx5: E-Switch, Initialize MAC Address for Default GID Initialize the source MAC address when creating the default GID entry. Since this entry is used only for loopback traffic, it only needs to be a unicast address. A zeroed-out MAC address is sufficient for this purpose. Without this fix, random bits would be assigned as the source address. If these bits formed a multicast address, the firmware would return an error, preventing the user from switching to switchdev mode: Error: mlx5_core: Failed setting eswitch to offloads. kernel answers: Invalid argument Fixes: 80f09dfc237f ("net/mlx5: Eswitch, enable RoCE loopback traffic") Signed-off-by: Maor Gottlieb Signed-off-by: Mark Bloch Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20250423083611.324567-3-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c index a42f6cd99b7448..f585ef5a342439 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -118,8 +118,8 @@ static void mlx5_rdma_make_default_gid(struct mlx5_core_dev *dev, union ib_gid * static int mlx5_rdma_add_roce_addr(struct mlx5_core_dev *dev) { + u8 mac[ETH_ALEN] = {}; union ib_gid gid; - u8 mac[ETH_ALEN]; mlx5_rdma_make_default_gid(dev, &gid); return mlx5_core_roce_gid_set(dev, 0, From 172c034264c894518c012387f2de2f9d6443505d Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Wed, 23 Apr 2025 11:36:09 +0300 Subject: [PATCH 1146/2924] net/mlx5e: TC, Continue the attr process even if encap entry is invalid Previously the offload of the rule with header rewrite and mirror to both internal and external destinations is skipped if the encap entry is not valid. But it shouldn't because driver will try to offload it again if neighbor is updated and encap entry is valid, to replace the old FTE added for slow path. But the extra split attr doesn't exist at that time as the process is skipped, driver then fails to offload it. To fix this issue, remove the checking and continue the attr process if encap entry is invalid. Fixes: b11bde56246e ("net/mlx5e: TC, Offload rewrite and mirror to both internal and external dests") Signed-off-by: Jianbo Liu Reviewed-by: Cosmin Ratiu Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250423083611.324567-4-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9ba99609999f4f..f1d908f611349f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1750,9 +1750,6 @@ extra_split_attr_dests_needed(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr !list_is_first(&attr->list, &flow->attrs)) return 0; - if (flow_flag_test(flow, SLOW)) - return 0; - esw_attr = attr->esw_attr; if (!esw_attr->split_count || esw_attr->split_count == esw_attr->out_count - 1) @@ -1766,7 +1763,7 @@ extra_split_attr_dests_needed(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr for (i = esw_attr->split_count; i < esw_attr->out_count; i++) { /* external dest with encap is considered as internal by firmware */ if (esw_attr->dests[i].vport == MLX5_VPORT_UPLINK && - !(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) + !(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)) ext_dest = true; else int_dest = true; From 1c2940ec0ddf51c689ee9ab85ead85c11b77809d Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Wed, 23 Apr 2025 11:36:10 +0300 Subject: [PATCH 1147/2924] net/mlx5e: Fix lock order in mlx5e_tx_reporter_ptpsq_unhealthy_recover RTNL needs to be acquired before state_lock. Fixes: fdce06bda7e5 ("net/mlx5e: Acquire RTNL lock before RQs/SQs activation/deactivation") Signed-off-by: Cosmin Ratiu Reviewed-by: Dragos Tatulea Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250423083611.324567-5-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 532c7fa94d172a..dbd9482359e1ec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -176,6 +176,7 @@ static int mlx5e_tx_reporter_ptpsq_unhealthy_recover(void *ctx) priv = ptpsq->txqsq.priv; + rtnl_lock(); mutex_lock(&priv->state_lock); chs = &priv->channels; netdev = priv->netdev; @@ -183,22 +184,19 @@ static int mlx5e_tx_reporter_ptpsq_unhealthy_recover(void *ctx) carrier_ok = netif_carrier_ok(netdev); netif_carrier_off(netdev); - rtnl_lock(); mlx5e_deactivate_priv_channels(priv); - rtnl_unlock(); mlx5e_ptp_close(chs->ptp); err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp); - rtnl_lock(); mlx5e_activate_priv_channels(priv); - rtnl_unlock(); /* return carrier back if needed */ if (carrier_ok) netif_carrier_on(netdev); mutex_unlock(&priv->state_lock); + rtnl_unlock(); return err; } From 90538d23278a981e344d364e923162fce752afeb Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Wed, 23 Apr 2025 11:36:11 +0300 Subject: [PATCH 1148/2924] net/mlx5: E-switch, Fix error handling for enabling roce The cited commit assumes enabling roce always succeeds. But it is not true. Add error handling for it. Fixes: 80f09dfc237f ("net/mlx5: Eswitch, enable RoCE loopback traffic") Signed-off-by: Chris Mi Reviewed-by: Roi Dayan Reviewed-by: Maor Gottlieb Signed-off-by: Mark Bloch Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20250423083611.324567-6-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 5 ++++- drivers/net/ethernet/mellanox/mlx5/core/rdma.c | 9 +++++---- drivers/net/ethernet/mellanox/mlx5/core/rdma.h | 4 ++-- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index a6a8eea5980ca3..0e3a977d533298 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3533,7 +3533,9 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) int err; mutex_init(&esw->offloads.termtbl_mutex); - mlx5_rdma_enable_roce(esw->dev); + err = mlx5_rdma_enable_roce(esw->dev); + if (err) + goto err_roce; err = mlx5_esw_host_number_init(esw); if (err) @@ -3594,6 +3596,7 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) esw_offloads_metadata_uninit(esw); err_metadata: mlx5_rdma_disable_roce(esw->dev); +err_roce: mutex_destroy(&esw->offloads.termtbl_mutex); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c index f585ef5a342439..5c552b71e371c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -140,17 +140,17 @@ void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) mlx5_nic_vport_disable_roce(dev); } -void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) +int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) { int err; if (!MLX5_CAP_GEN(dev, roce)) - return; + return 0; err = mlx5_nic_vport_enable_roce(dev); if (err) { mlx5_core_err(dev, "Failed to enable RoCE: %d\n", err); - return; + return err; } err = mlx5_rdma_add_roce_addr(dev); @@ -165,10 +165,11 @@ void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) goto del_roce_addr; } - return; + return err; del_roce_addr: mlx5_rdma_del_roce_addr(dev); disable_roce: mlx5_nic_vport_disable_roce(dev); + return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.h b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h index 750cff2a71a4bb..3d9e76c3d42fb1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h @@ -8,12 +8,12 @@ #ifdef CONFIG_MLX5_ESWITCH -void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev); +int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev); void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev); #else /* CONFIG_MLX5_ESWITCH */ -static inline void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) {} +static inline int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) {} #endif /* CONFIG_MLX5_ESWITCH */ From 1526a735a7620db8b22ea3d24d3ba7ac262b2aaf Mon Sep 17 00:00:00 2001 From: Michael Riesch Date: Thu, 10 Apr 2025 21:41:30 +0200 Subject: [PATCH 1149/2924] MAINTAINERS: add exclude for dt-bindings to imx entry Since the IMX (as in i.MX, the NXP SoCs) MAINTAINERS entry claims everything that contains the name "imx", hanges to device tree bindings for any Sony IMX image sensor are likely to be sent to the maintainers listed therein. Add the missing exclude to fix that. Fixes: da8b7f0fb02b ("MAINTAINERS: add all files matching "imx" and "mxs" to the IMX entry") Suggested-by: Sebastian Reichel Signed-off-by: Michael Riesch Reviewed-by: Sakari Ailus Signed-off-by: Shawn Guo --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 96b82704950184..c87b26eada7b72 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2519,6 +2519,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git F: arch/arm/boot/dts/nxp/imx/ F: arch/arm/boot/dts/nxp/mxs/ F: arch/arm64/boot/dts/freescale/ +X: Documentation/devicetree/bindings/media/i2c/ X: arch/arm64/boot/dts/freescale/fsl-* X: arch/arm64/boot/dts/freescale/qoriq-* X: drivers/media/i2c/ From d6aa0c178bf81f30ae4a780b2bca653daa2eb633 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 25 Apr 2025 09:37:39 +0800 Subject: [PATCH 1150/2924] ublk: call ublk_dispatch_req() for handling UBLK_U_IO_NEED_GET_DATA We call io_uring_cmd_complete_in_task() to schedule task_work for handling UBLK_U_IO_NEED_GET_DATA. This way is really not necessary because the current context is exactly the ublk queue context, so call ublk_dispatch_req() directly for handling UBLK_U_IO_NEED_GET_DATA. Fixes: 216c8f5ef0f2 ("ublk: replace monitor with cancelable uring_cmd") Tested-by: Jared Holzman Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250425013742.1079549-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 2de7b2bd409ddf..c4d4be4f6fbdaa 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1886,15 +1886,6 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq) } } -static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id, - int tag) -{ - struct ublk_queue *ubq = ublk_get_queue(ub, q_id); - struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag); - - ublk_queue_cmd(ubq, req); -} - static inline int ublk_check_cmd_op(u32 cmd_op) { u32 ioc_type = _IOC_TYPE(cmd_op); @@ -2103,8 +2094,9 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)) goto out; ublk_fill_io_cmd(io, cmd, ub_cmd->addr); - ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag); - break; + req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag); + ublk_dispatch_req(ubq, req, issue_flags); + return -EIOCBQUEUED; default: goto out; } From f40139fde5278d81af3227444fd6e76a76b9506d Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 25 Apr 2025 09:37:40 +0800 Subject: [PATCH 1151/2924] ublk: fix race between io_uring_cmd_complete_in_task and ublk_cancel_cmd ublk_cancel_cmd() calls io_uring_cmd_done() to complete uring_cmd, but we may have scheduled task work via io_uring_cmd_complete_in_task() for dispatching request, then kernel crash can be triggered. Fix it by not trying to canceling the command if ublk block request is started. Fixes: 216c8f5ef0f2 ("ublk: replace monitor with cancelable uring_cmd") Reported-by: Jared Holzman Tested-by: Jared Holzman Closes: https://lore.kernel.org/linux-block/d2179120-171b-47ba-b664-23242981ef19@nvidia.com/ Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250425013742.1079549-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index c4d4be4f6fbdaa..40f971a66d3eb2 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1683,14 +1683,31 @@ static void ublk_start_cancel(struct ublk_queue *ubq) ublk_put_disk(disk); } -static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io, +static void ublk_cancel_cmd(struct ublk_queue *ubq, unsigned tag, unsigned int issue_flags) { + struct ublk_io *io = &ubq->ios[tag]; + struct ublk_device *ub = ubq->dev; + struct request *req; bool done; if (!(io->flags & UBLK_IO_FLAG_ACTIVE)) return; + /* + * Don't try to cancel this command if the request is started for + * avoiding race between io_uring_cmd_done() and + * io_uring_cmd_complete_in_task(). + * + * Either the started request will be aborted via __ublk_abort_rq(), + * then this uring_cmd is canceled next time, or it will be done in + * task work function ublk_dispatch_req() because io_uring guarantees + * that ublk_dispatch_req() is always called + */ + req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag); + if (req && blk_mq_request_started(req)) + return; + spin_lock(&ubq->cancel_lock); done = !!(io->flags & UBLK_IO_FLAG_CANCELED); if (!done) @@ -1722,7 +1739,6 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd, struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); struct ublk_queue *ubq = pdu->ubq; struct task_struct *task; - struct ublk_io *io; if (WARN_ON_ONCE(!ubq)) return; @@ -1737,9 +1753,8 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd, if (!ubq->canceling) ublk_start_cancel(ubq); - io = &ubq->ios[pdu->tag]; - WARN_ON_ONCE(io->cmd != cmd); - ublk_cancel_cmd(ubq, io, issue_flags); + WARN_ON_ONCE(ubq->ios[pdu->tag].cmd != cmd); + ublk_cancel_cmd(ubq, pdu->tag, issue_flags); } static inline bool ublk_queue_ready(struct ublk_queue *ubq) @@ -1752,7 +1767,7 @@ static void ublk_cancel_queue(struct ublk_queue *ubq) int i; for (i = 0; i < ubq->q_depth; i++) - ublk_cancel_cmd(ubq, &ubq->ios[i], IO_URING_F_UNLOCKED); + ublk_cancel_cmd(ubq, i, IO_URING_F_UNLOCKED); } /* Cancel all pending commands, must be called after del_gendisk() returns */ From a32f1923c6d6e9e727d00558a15ec0af6639de19 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 24 Apr 2025 22:15:50 +0200 Subject: [PATCH 1152/2924] crypto: scompress - increment scomp_scratch_users when already allocated Commit ddd0a42671c0 only increments scomp_scratch_users when it was 0, causing a panic when using ipcomp: Oops: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 1 UID: 0 PID: 619 Comm: ping Tainted: G N 6.15.0-rc3-net-00032-ga79be02bba5c #41 PREEMPT(full) Tainted: [N]=TEST Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014 RIP: 0010:inflate_fast+0x5a2/0x1b90 [...] Call Trace: zlib_inflate+0x2d60/0x6620 deflate_sdecompress+0x166/0x350 scomp_acomp_comp_decomp+0x45f/0xa10 scomp_acomp_decompress+0x21/0x120 acomp_do_req_chain+0x3e5/0x4e0 ipcomp_input+0x212/0x550 xfrm_input+0x2de2/0x72f0 [...] Kernel panic - not syncing: Fatal exception in interrupt Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- Instead, let's keep the old increment, and decrement back to 0 if the scratch allocation fails. Fixes: ddd0a42671c0 ("crypto: scompress - Fix scratch allocation failure handling") Signed-off-by: Sabrina Dubroca Signed-off-by: Herbert Xu --- crypto/scompress.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crypto/scompress.c b/crypto/scompress.c index 36934c78d1277d..ffeedcf20b0f18 100644 --- a/crypto/scompress.c +++ b/crypto/scompress.c @@ -163,11 +163,10 @@ static int crypto_scomp_init_tfm(struct crypto_tfm *tfm) if (ret) goto unlock; } - if (!scomp_scratch_users) { + if (!scomp_scratch_users++) { ret = crypto_scomp_alloc_scratches(); if (ret) - goto unlock; - scomp_scratch_users++; + scomp_scratch_users--; } unlock: mutex_unlock(&scomp_lock); From 9bbb8a07fd65fca0f29a869ec3f2435761a6c676 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Mon, 2 Dec 2024 11:19:55 +0100 Subject: [PATCH 1153/2924] tools/hv: update route parsing in kvp daemon After recent changes in the VM network stack, the host fails to display the IP addresses of the VM. As a result the "IP Addresses" column in the "Networking" tab in the Windows Hyper-V Manager is empty. This is caused by a change in the expected output of the "ip route show" command. Previously the gateway address was shown in the third row. Now the gateway addresses might be split into several lines of output. As a result, the string "ra" instead of an IP address is sent to the host. To me more specific, a VM with the wellknown wicked network managing tool still shows the expected output in recent openSUSE Tumbleweed snapshots: ip a show dev uplink;ip -4 route show;ip -6 route show 2: uplink: mtu 1500 qdisc mq state ... link/ether 00:15:5d:d0:93:08 brd ff:ff:ff:ff:ff:ff inet 1.2.3.4/22 brd 1.2.3.255 scope global uplink valid_lft forever preferred_lft forever inet6 fe80::215:5dff:fed0:9308/64 scope link proto kernel_ll valid_lft forever preferred_lft forever default via 1.2.3.254 dev uplink proto dhcp 1.2.3.0/22 dev uplink proto kernel scope link src 1.2.3.4 fe80::/64 dev uplink proto kernel metric 256 pref medium default via fe80::26fc:4e00:3b:74 dev uplink proto ra metric 1024 exp... default via fe80::6a22:8e00:fb:14f8 dev uplink proto ra metric 1024 e... A similar VM, but with NetworkManager as network managing tool: ip a show dev eth0;ip -4 route show;ip -6 route show 2: eth0: mtu 1500 qdisc mq state UP... link/ether 00:15:5d:d0:93:0b brd ff:ff:ff:ff:ff:ff inet 1.2.3.8/22 brd 1.2.3.255 scope global dynamic noprefixroute ... valid_lft 1022sec preferred_lft 1022sec inet6 fe80::215:5dff:fed0:930b/64 scope link noprefixroute valid_lft forever preferred_lft forever default via 1.2.3.254 dev eth0 proto dhcp src 1.2.3.8 metric 100 1.2.3.0/22 dev eth0 proto kernel scope link src 1.2.3.8 metric 100 fe80::/64 dev eth0 proto kernel metric 1024 pref medium default proto ra metric 20100 pref medium nexthop via fe80::6a22:8e00:fb:14f8 dev eth0 weight 1 nexthop via fe80::26fc:4e00:3b:74 dev eth0 weight 1 Adjust the route parsing to use a single line for each line of output. Also use a single shell invocation to retrieve both IPv4 and IPv6 information. The actual IP addresses are expected after the "via" keyword. Signed-off-by: Olaf Hering Reviewed-by: Shradha Gupta Link: https://lore.kernel.org/r/20241202102235.9701-1-olaf@aepfle.de Signed-off-by: Wei Liu Message-ID: <20241202102235.9701-1-olaf@aepfle.de> --- tools/hv/hv_kvp_daemon.c | 108 ++++++++++++++++++++++++++++++--------- 1 file changed, 84 insertions(+), 24 deletions(-) diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 04ba035d67e9de..b9ce3aab15fea7 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -677,6 +678,88 @@ static void kvp_process_ipconfig_file(char *cmd, pclose(file); } +static bool kvp_verify_ip_address(const void *address_string) +{ + char verify_buf[sizeof(struct in6_addr)]; + + if (inet_pton(AF_INET, address_string, verify_buf) == 1) + return true; + if (inet_pton(AF_INET6, address_string, verify_buf) == 1) + return true; + return false; +} + +static void kvp_extract_routes(const char *line, void **output, size_t *remaining) +{ + static const char needle[] = "via "; + const char *match, *haystack = line; + + while ((match = strstr(haystack, needle))) { + const char *address, *next_char; + + /* Address starts after needle. */ + address = match + strlen(needle); + + /* The char following address is a space or end of line. */ + next_char = strpbrk(address, " \t\\"); + if (!next_char) + next_char = address + strlen(address) + 1; + + /* Enough room for address and semicolon. */ + if (*remaining >= (next_char - address) + 1) { + memcpy(*output, address, next_char - address); + /* Terminate string for verification. */ + memcpy(*output + (next_char - address), "", 1); + if (kvp_verify_ip_address(*output)) { + /* Advance output buffer. */ + *output += next_char - address; + *remaining -= next_char - address; + + /* Each address needs a trailing semicolon. */ + memcpy(*output, ";", 1); + *output += 1; + *remaining -= 1; + } + } + haystack = next_char; + } +} + +static void kvp_get_gateway(void *buffer, size_t buffer_len) +{ + static const char needle[] = "default "; + FILE *f; + void *output = buffer; + char *line = NULL; + size_t alloc_size = 0, remaining = buffer_len - 1; + ssize_t num_chars; + + /* Show route information in a single line, for each address family */ + f = popen("ip --oneline -4 route show;ip --oneline -6 route show", "r"); + if (!f) { + /* Convert buffer into C-String. */ + memcpy(output, "", 1); + return; + } + while ((num_chars = getline(&line, &alloc_size, f)) > 0) { + /* Skip short lines. */ + if (num_chars <= strlen(needle)) + continue; + /* Skip lines without default route. */ + if (memcmp(line, needle, strlen(needle))) + continue; + /* Remove trailing newline to simplify further parsing. */ + if (line[num_chars - 1] == '\n') + line[num_chars - 1] = '\0'; + /* Search routes after match. */ + kvp_extract_routes(line + strlen(needle), &output, &remaining); + } + /* Convert buffer into C-String. */ + memcpy(output, "", 1); + free(line); + pclose(f); +} + static void kvp_get_ipconfig_info(char *if_name, struct hv_kvp_ipaddr_value *buffer) { @@ -685,30 +768,7 @@ static void kvp_get_ipconfig_info(char *if_name, char *p; FILE *file; - /* - * Get the address of default gateway (ipv4). - */ - sprintf(cmd, "%s %s", "ip route show dev", if_name); - strcat(cmd, " | awk '/default/ {print $3 }'"); - - /* - * Execute the command to gather gateway info. - */ - kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way, - (MAX_GATEWAY_SIZE * 2), INET_ADDRSTRLEN, 0); - - /* - * Get the address of default gateway (ipv6). - */ - sprintf(cmd, "%s %s", "ip -f inet6 route show dev", if_name); - strcat(cmd, " | awk '/default/ {print $3 }'"); - - /* - * Execute the command to gather gateway info (ipv6). - */ - kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way, - (MAX_GATEWAY_SIZE * 2), INET6_ADDRSTRLEN, 1); - + kvp_get_gateway(buffer->gate_way, sizeof(buffer->gate_way)); /* * Gather the DNS state. From e53e004e346062e15df9511bd4b5a19e34701384 Mon Sep 17 00:00:00 2001 From: Karol Wachowski Date: Wed, 16 Apr 2025 12:26:16 +0200 Subject: [PATCH 1154/2924] accel/ivpu: Correct DCT interrupt handling Fix improper use of dct_active_percent field in DCT interrupt handler causing DCT to never get enabled. Set dct_active_percent internally before IPC to ensure correct driver value even if IPC fails. Set default DCT value to 30 accordingly to HW architecture specification. Fixes: a19bffb10c46 ("accel/ivpu: Implement DCT handling") Signed-off-by: Karol Wachowski Signed-off-by: Maciej Falkowski Reviewed-by: Jeff Hugo Signed-off-by: Jacek Lawrynowicz Link: https://lore.kernel.org/r/20250416102616.384577-1-maciej.falkowski@linux.intel.com --- drivers/accel/ivpu/ivpu_hw_btrs.h | 2 +- drivers/accel/ivpu/ivpu_pm.c | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.h b/drivers/accel/ivpu/ivpu_hw_btrs.h index 300f749971d4d6..d2d82651976d15 100644 --- a/drivers/accel/ivpu/ivpu_hw_btrs.h +++ b/drivers/accel/ivpu/ivpu_hw_btrs.h @@ -14,7 +14,7 @@ #define PLL_PROFILING_FREQ_DEFAULT 38400000 #define PLL_PROFILING_FREQ_HIGH 400000000 -#define DCT_DEFAULT_ACTIVE_PERCENT 15u +#define DCT_DEFAULT_ACTIVE_PERCENT 30u #define DCT_PERIOD_US 35300u int ivpu_hw_btrs_info_init(struct ivpu_device *vdev); diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index b5891e91f7abaf..ac0e224545961d 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -428,16 +428,17 @@ int ivpu_pm_dct_enable(struct ivpu_device *vdev, u8 active_percent) active_us = (DCT_PERIOD_US * active_percent) / 100; inactive_us = DCT_PERIOD_US - active_us; + vdev->pm->dct_active_percent = active_percent; + + ivpu_dbg(vdev, PM, "DCT requested %u%% (D0: %uus, D0i2: %uus)\n", + active_percent, active_us, inactive_us); + ret = ivpu_jsm_dct_enable(vdev, active_us, inactive_us); if (ret) { ivpu_err_ratelimited(vdev, "Failed to enable DCT: %d\n", ret); return ret; } - vdev->pm->dct_active_percent = active_percent; - - ivpu_dbg(vdev, PM, "DCT set to %u%% (D0: %uus, D0i2: %uus)\n", - active_percent, active_us, inactive_us); return 0; } @@ -445,15 +446,16 @@ int ivpu_pm_dct_disable(struct ivpu_device *vdev) { int ret; + vdev->pm->dct_active_percent = 0; + + ivpu_dbg(vdev, PM, "DCT requested to be disabled\n"); + ret = ivpu_jsm_dct_disable(vdev); if (ret) { ivpu_err_ratelimited(vdev, "Failed to disable DCT: %d\n", ret); return ret; } - vdev->pm->dct_active_percent = 0; - - ivpu_dbg(vdev, PM, "DCT disabled\n"); return 0; } @@ -466,7 +468,7 @@ void ivpu_pm_irq_dct_work_fn(struct work_struct *work) if (ivpu_hw_btrs_dct_get_request(vdev, &enable)) return; - if (vdev->pm->dct_active_percent) + if (enable) ret = ivpu_pm_dct_enable(vdev, DCT_DEFAULT_ACTIVE_PERCENT); else ret = ivpu_pm_dct_disable(vdev); From 759ee400d1d95ac903d81a1a229a117be822d077 Mon Sep 17 00:00:00 2001 From: Andrzej Kacprowski Date: Wed, 16 Apr 2025 12:26:29 +0200 Subject: [PATCH 1155/2924] accel/ivpu: Fix the D0i2 disable test mode Correct setup of D0i2 disable which was by mistake set up to value 1 and use BIT(1) instead. Fixes: 011529fe8112 ("accel/ivpu: Implement D0i2 disable test mode") Signed-off-by: Andrzej Kacprowski Signed-off-by: Maciej Falkowski Reviewed-by: Jeff Hugo Signed-off-by: Jacek Lawrynowicz Link: https://lore.kernel.org/r/20250416102629.384626-1-maciej.falkowski@linux.intel.com --- drivers/accel/ivpu/ivpu_fw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c index 5e1d709c6a464d..ccaaf6c100c022 100644 --- a/drivers/accel/ivpu/ivpu_fw.c +++ b/drivers/accel/ivpu/ivpu_fw.c @@ -544,7 +544,7 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_ boot_params->d0i3_entry_vpu_ts); ivpu_dbg(vdev, FW_BOOT, "boot_params.system_time_us = %llu\n", boot_params->system_time_us); - ivpu_dbg(vdev, FW_BOOT, "boot_params.power_profile = %u\n", + ivpu_dbg(vdev, FW_BOOT, "boot_params.power_profile = 0x%x\n", boot_params->power_profile); } @@ -646,7 +646,7 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->d0i3_residency_time_us = 0; boot_params->d0i3_entry_vpu_ts = 0; if (IVPU_WA(disable_d0i2)) - boot_params->power_profile = 1; + boot_params->power_profile |= BIT(1); boot_params->system_time_us = ktime_to_us(ktime_get_real()); wmb(); /* Flush WC buffers after writing bootparams */ From e079d7c4db5cba1e8a315dc93030dfb6c7b49459 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Apr 2025 06:59:41 +0200 Subject: [PATCH 1156/2924] devtmpfs: don't use vfs_getattr_nosec to query i_mode The recent move of the bdev_statx call to the low-level vfs_getattr_nosec helper caused it being used by devtmpfs, which leads to deadlocks in md teardown due to the block device lookup and put interfering with the unusual lifetime rules in md. But as handle_remove only works on inodes created and owned by devtmpfs itself there is no need to use vfs_getattr_nosec vs simply reading the mode from the inode directly. Switch to that to avoid the bdev lookup or any other unintentional side effect. Reported-by: Shin'ichiro Kawasaki Reported-by: Xiao Ni Fixes: 777d0961ff95 ("fs: move the bdex_statx call to vfs_getattr_nosec") Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/20250423045941.1667425-1-hch@lst.de Tested-by: Shin'ichiro Kawasaki Tested-by: Xiao Ni Tested-by: Ayush Jain Tested-by: Heiko Carstens Reviewed-by: Christian Brauner Signed-off-by: Christian Brauner --- drivers/base/devtmpfs.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 6dd1a8860f1c92..31bfb3194b4c29 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -296,7 +296,7 @@ static int delete_path(const char *nodepath) return err; } -static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *stat) +static int dev_mynode(struct device *dev, struct inode *inode) { /* did we create it */ if (inode->i_private != &thread) @@ -304,13 +304,13 @@ static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *sta /* does the dev_t match */ if (is_blockdev(dev)) { - if (!S_ISBLK(stat->mode)) + if (!S_ISBLK(inode->i_mode)) return 0; } else { - if (!S_ISCHR(stat->mode)) + if (!S_ISCHR(inode->i_mode)) return 0; } - if (stat->rdev != dev->devt) + if (inode->i_rdev != dev->devt) return 0; /* ours */ @@ -321,20 +321,16 @@ static int handle_remove(const char *nodename, struct device *dev) { struct path parent; struct dentry *dentry; - struct kstat stat; - struct path p; + struct inode *inode; int deleted = 0; - int err; + int err = 0; dentry = kern_path_locked(nodename, &parent); if (IS_ERR(dentry)) return PTR_ERR(dentry); - p.mnt = parent.mnt; - p.dentry = dentry; - err = vfs_getattr(&p, &stat, STATX_TYPE | STATX_MODE, - AT_STATX_SYNC_AS_STAT); - if (!err && dev_mynode(dev, d_inode(dentry), &stat)) { + inode = d_inode(dentry); + if (dev_mynode(dev, inode)) { struct iattr newattrs; /* * before unlinking this node, reset permissions @@ -342,7 +338,7 @@ static int handle_remove(const char *nodename, struct device *dev) */ newattrs.ia_uid = GLOBAL_ROOT_UID; newattrs.ia_gid = GLOBAL_ROOT_GID; - newattrs.ia_mode = stat.mode & ~0777; + newattrs.ia_mode = inode->i_mode & ~0777; newattrs.ia_valid = ATTR_UID|ATTR_GID|ATTR_MODE; inode_lock(d_inode(dentry)); From e6f141b332ddd9007756751b6afd24f799488fd8 Mon Sep 17 00:00:00 2001 From: "T.J. Mercier" Date: Wed, 23 Apr 2025 18:00:23 +0000 Subject: [PATCH 1157/2924] splice: remove duplicate noinline from pipe_clear_nowait pipe_clear_nowait has two noinline macros, but we only need one. I checked the whole tree, and this is the only occurrence: $ grep -r "noinline .* noinline" fs/splice.c:static noinline void noinline pipe_clear_nowait(struct file *file) $ Fixes: 0f99fc513ddd ("splice: clear FMODE_NOWAIT on file if splice/vmsplice is used") Signed-off-by: "T.J. Mercier" Link: https://lore.kernel.org/20250423180025.2627670-1-tjmercier@google.com Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- fs/splice.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/splice.c b/fs/splice.c index 90d464241f151c..4d6df083e0c06a 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -45,7 +45,7 @@ * here if set to avoid blocking other users of this pipe if splice is * being done on it. */ -static noinline void noinline pipe_clear_nowait(struct file *file) +static noinline void pipe_clear_nowait(struct file *file) { fmode_t fmode = READ_ONCE(file->f_mode); From 1d28f25d6a6c968ebee8ee6d5b65691d5bfcf95f Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Wed, 23 Apr 2025 06:34:22 -0600 Subject: [PATCH 1158/2924] MAINTAINERS: hfs/hfsplus: add myself as maintainer I used to maintain Allwinner SoC cpufreq and thermal drivers and have some work experience in the F2FS file system. I volunteered to maintain the code together with Slava and Adrian. Signed-off-by: Yangtao Li Link: https://lore.kernel.org/20250423123423.2062619-1-frank.li@vivo.com Acked-by: John Paul Adrian Glaubitz Signed-off-by: Christian Brauner --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index b8d1e41c27f675..c3116274cec36f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10459,6 +10459,7 @@ F: drivers/infiniband/hw/hfi1 HFS FILESYSTEM M: Viacheslav Dubeyko M: John Paul Adrian Glaubitz +M: Yangtao Li L: linux-fsdevel@vger.kernel.org S: Maintained F: Documentation/filesystems/hfs.rst @@ -10467,6 +10468,7 @@ F: fs/hfs/ HFSPLUS FILESYSTEM M: Viacheslav Dubeyko M: John Paul Adrian Glaubitz +M: Yangtao Li L: linux-fsdevel@vger.kernel.org S: Maintained F: Documentation/filesystems/hfsplus.rst From f520bed25d17bb31c2d2d72b0a785b593a4e3179 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 24 Apr 2025 15:22:47 +0200 Subject: [PATCH 1159/2924] fs/xattr: Fix handling of AT_FDCWD in setxattrat(2) and getxattrat(2) Currently, setxattrat(2) and getxattrat(2) are wrongly handling the calls of the from setxattrat(AF_FDCWD, NULL, AT_EMPTY_PATH, ...) and fail with -EBADF error instead of operating on CWD. Fix it. Fixes: 6140be90ec70 ("fs/xattr: add *at family syscalls") Signed-off-by: Jan Kara Link: https://lore.kernel.org/20250424132246.16822-2-jack@suse.cz Signed-off-by: Christian Brauner --- fs/xattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xattr.c b/fs/xattr.c index 02bee149ad9674..fabb2a04501ee7 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -703,7 +703,7 @@ static int path_setxattrat(int dfd, const char __user *pathname, return error; filename = getname_maybe_null(pathname, at_flags); - if (!filename) { + if (!filename && dfd >= 0) { CLASS(fd, f)(dfd); if (fd_empty(f)) error = -EBADF; @@ -847,7 +847,7 @@ static ssize_t path_getxattrat(int dfd, const char __user *pathname, return error; filename = getname_maybe_null(pathname, at_flags); - if (!filename) { + if (!filename && dfd >= 0) { CLASS(fd, f)(dfd); if (fd_empty(f)) return -EBADF; From 3dfc0445274252301dfcf3980d79acceea6409d1 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 7 Apr 2025 16:33:06 +0300 Subject: [PATCH 1160/2924] MAINTAINERS: Assign maintainer for the port controller drivers Especially the port manager (tcpm.c) is so major driver that it should have somebody watching over it who really understands it, and the port controller interface in general. Assigning Badhri as the designated reviewer and restoring the status to Maintained from Orphan. Signed-off-by: Heikki Krogerus Cc: Badhri Jagan Sridharan Acked-by: Badhri Jagan Sridharan Link: https://lore.kernel.org/r/20250407133306.387576-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 96b82704950184..71408373ce2249 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -25126,9 +25126,13 @@ S: Maintained F: drivers/usb/typec/mux/pi3usb30532.c USB TYPEC PORT CONTROLLER DRIVERS +M: Badhri Jagan Sridharan L: linux-usb@vger.kernel.org -S: Orphan -F: drivers/usb/typec/tcpm/ +S: Maintained +F: drivers/usb/typec/tcpm/tcpci.c +F: drivers/usb/typec/tcpm/tcpm.c +F: include/linux/usb/tcpci.h +F: include/linux/usb/tcpm.h USB TYPEC TUSB1046 MUX DRIVER M: Romain Gantois From 71cfb1f88f772fb92a68a4ab85b16ccd5cc8535d Mon Sep 17 00:00:00 2001 From: Zixian Zeng Date: Fri, 25 Apr 2025 10:28:12 +0800 Subject: [PATCH 1161/2924] spi: dt-bindings: snps,dw-apb-ssi: Merge duplicate compatible entry Microsemi Ocelot/Jaguar2, Renesas RZ/N1 and T-HEAD TH1520 SoC-specific compatibles, which eventually fallback to the generic DW ssi compatible, it's better to combine them in single entry Suggested-by: Rob Herring Signed-off-by: Zixian Zeng Reviewed-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20250425-sfg-spi-v6-1-2dbe7bb46013@gmail.com Signed-off-by: Mark Brown --- .../bindings/spi/snps,dw-apb-ssi.yaml | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml index bccd00a1ddd0ad..a43d2fb9942d85 100644 --- a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml +++ b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml @@ -56,19 +56,17 @@ properties: enum: - snps,dw-apb-ssi - snps,dwc-ssi-1.01a - - description: Microsemi Ocelot/Jaguar2 SoC SPI Controller - items: - - enum: - - mscc,ocelot-spi - - mscc,jaguar2-spi - - const: snps,dw-apb-ssi - description: Microchip Sparx5 SoC SPI Controller const: microchip,sparx5-spi - description: Amazon Alpine SPI Controller const: amazon,alpine-dw-apb-ssi - - description: Renesas RZ/N1 SPI Controller + - description: Vendor controllers which use snps,dw-apb-ssi as fallback items: - - const: renesas,rzn1-spi + - enum: + - mscc,ocelot-spi + - mscc,jaguar2-spi + - renesas,rzn1-spi + - thead,th1520-spi - const: snps,dw-apb-ssi - description: Intel Keem Bay SPI Controller const: intel,keembay-ssi @@ -88,10 +86,6 @@ properties: - renesas,r9a06g032-spi # RZ/N1D - renesas,r9a06g033-spi # RZ/N1S - const: renesas,rzn1-spi # RZ/N1 - - description: T-HEAD TH1520 SoC SPI Controller - items: - - const: thead,th1520-spi - - const: snps,dw-apb-ssi reg: minItems: 1 From 0889c4d28ad79b55ee8cf3c818e9d86203ace8f0 Mon Sep 17 00:00:00 2001 From: Zixian Zeng Date: Fri, 25 Apr 2025 10:28:13 +0800 Subject: [PATCH 1162/2924] spi: dt-bindings: snps,dw-apb-ssi: Add compatible for SOPHGO SG2042 SoC Sophgo SG2042 ships an SPI controller [1] compatible with the Synopsys DW-SPI IP. Add SoC-specific compatible string and use the generic one as fallback. Link: https://github.com/sophgo/sophgo-doc/blob/main/SG2042/TRM/source/SPI.rst [1] Signed-off-by: Zixian Zeng Acked-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20250425-sfg-spi-v6-2-2dbe7bb46013@gmail.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml index a43d2fb9942d85..53d00ca643b318 100644 --- a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml +++ b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml @@ -66,6 +66,7 @@ properties: - mscc,ocelot-spi - mscc,jaguar2-spi - renesas,rzn1-spi + - sophgo,sg2042-spi - thead,th1520-spi - const: snps,dw-apb-ssi - description: Intel Keem Bay SPI Controller From 8e4d3d8a5e51e07bd0d6cdd81b5e4af79f796927 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Thu, 24 Apr 2025 17:43:33 +0530 Subject: [PATCH 1163/2924] spi: spi-mem: Add fix to avoid divide error For some SPI flash memory operations, dummy bytes are not mandatory. For example, in Winbond SPINAND flash memory devices, the `write_cache` and `update_cache` operation variants have zero dummy bytes. Calculating the duration for SPI memory operations with zero dummy bytes causes a divide error when `ncycles` is calculated in the spi_mem_calc_op_duration(). Add changes to skip the 'ncylcles' calculation for zero dummy bytes. Following divide error is fixed by this change: Oops: divide error: 0000 [#1] PREEMPT SMP NOPTI ... ? do_trap+0xdb/0x100 ? do_error_trap+0x75/0xb0 ? spi_mem_calc_op_duration+0x56/0xb0 ? exc_divide_error+0x3b/0x70 ? spi_mem_calc_op_duration+0x56/0xb0 ? asm_exc_divide_error+0x1b/0x20 ? spi_mem_calc_op_duration+0x56/0xb0 ? spinand_select_op_variant+0xee/0x190 [spinand] spinand_match_and_init+0x13e/0x1a0 [spinand] spinand_manufacturer_match+0x6e/0xa0 [spinand] spinand_probe+0x357/0x7f0 [spinand] ? kernfs_activate+0x87/0xd0 spi_mem_probe+0x7a/0xb0 spi_probe+0x7d/0x130 Fixes: 226d6cb3cb79 ("spi: spi-mem: Estimate the time taken by operations") Suggested-by: Krishnamoorthi M Co-developed-by: Akshata MukundShetty Signed-off-by: Akshata MukundShetty Signed-off-by: Raju Rangoju Link: https://patch.msgid.link/20250424121333.417372-1-Raju.Rangoju@amd.com Reviewed-by: Miquel Raynal Signed-off-by: Mark Brown --- drivers/spi/spi-mem.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c index a31a1db07aa4d2..5db0639d3b0159 100644 --- a/drivers/spi/spi-mem.c +++ b/drivers/spi/spi-mem.c @@ -596,7 +596,11 @@ u64 spi_mem_calc_op_duration(struct spi_mem_op *op) ns_per_cycles = 1000000000 / op->max_freq; ncycles += ((op->cmd.nbytes * 8) / op->cmd.buswidth) / (op->cmd.dtr ? 2 : 1); ncycles += ((op->addr.nbytes * 8) / op->addr.buswidth) / (op->addr.dtr ? 2 : 1); - ncycles += ((op->dummy.nbytes * 8) / op->dummy.buswidth) / (op->dummy.dtr ? 2 : 1); + + /* Dummy bytes are optional for some SPI flash memory operations */ + if (op->dummy.nbytes) + ncycles += ((op->dummy.nbytes * 8) / op->dummy.buswidth) / (op->dummy.dtr ? 2 : 1); + ncycles += ((op->data.nbytes * 8) / op->data.buswidth) / (op->data.dtr ? 2 : 1); return ncycles * ns_per_cycles; From ba85883d160515129b58873f74376a89faf21c7c Mon Sep 17 00:00:00 2001 From: Venkata Prasad Potturu Date: Fri, 25 Apr 2025 11:31:39 +0530 Subject: [PATCH 1164/2924] ASoC: amd: acp: Fix NULL pointer deref on acp resume path update chip data using dev_get_drvdata(dev->parent) instead of dev_get_platdata(dev). BUG: kernel NULL pointer dereference, address: 0000000000000010 Call Trace: ? __pfx_platform_pm_resume+0x10/0x10 platform_pm_resume+0x28/0x60 dpm_run_callback+0x51/0x1a0 device_resume+0x1a6/0x2b0 dpm_resume+0x168/0x230 Fixes: e3933683b25e ("ASoC: amd: acp: Remove redundant acp_dev_data structure") Signed-off-by: Venkata Prasad Potturu Link: https://patch.msgid.link/20250425060144.1773265-1-venkataprasad.potturu@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-rembrandt.c | 2 +- sound/soc/amd/acp/acp-renoir.c | 2 +- sound/soc/amd/acp/acp63.c | 2 +- sound/soc/amd/acp/acp70.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/amd/acp/acp-rembrandt.c b/sound/soc/amd/acp/acp-rembrandt.c index 746b6ed7202965..cccdd10c345e6d 100644 --- a/sound/soc/amd/acp/acp-rembrandt.c +++ b/sound/soc/amd/acp/acp-rembrandt.c @@ -199,7 +199,7 @@ static void rembrandt_audio_remove(struct platform_device *pdev) static int rmb_pcm_resume(struct device *dev) { - struct acp_chip_info *chip = dev_get_platdata(dev); + struct acp_chip_info *chip = dev_get_drvdata(dev->parent); struct acp_stream *stream; struct snd_pcm_substream *substream; snd_pcm_uframes_t buf_in_frames; diff --git a/sound/soc/amd/acp/acp-renoir.c b/sound/soc/amd/acp/acp-renoir.c index ebf0106fc73742..04f6d70b6a92d8 100644 --- a/sound/soc/amd/acp/acp-renoir.c +++ b/sound/soc/amd/acp/acp-renoir.c @@ -146,7 +146,7 @@ static void renoir_audio_remove(struct platform_device *pdev) static int rn_pcm_resume(struct device *dev) { - struct acp_chip_info *chip = dev_get_platdata(dev); + struct acp_chip_info *chip = dev_get_drvdata(dev->parent); struct acp_stream *stream; struct snd_pcm_substream *substream; snd_pcm_uframes_t buf_in_frames; diff --git a/sound/soc/amd/acp/acp63.c b/sound/soc/amd/acp/acp63.c index 52d895e624c723..1f15c96a9b9461 100644 --- a/sound/soc/amd/acp/acp63.c +++ b/sound/soc/amd/acp/acp63.c @@ -250,7 +250,7 @@ static void acp63_audio_remove(struct platform_device *pdev) static int acp63_pcm_resume(struct device *dev) { - struct acp_chip_info *chip = dev_get_platdata(dev); + struct acp_chip_info *chip = dev_get_drvdata(dev->parent); struct acp_stream *stream; struct snd_pcm_substream *substream; snd_pcm_uframes_t buf_in_frames; diff --git a/sound/soc/amd/acp/acp70.c b/sound/soc/amd/acp/acp70.c index 6d5f5ade075c87..217b717e9beb75 100644 --- a/sound/soc/amd/acp/acp70.c +++ b/sound/soc/amd/acp/acp70.c @@ -182,7 +182,7 @@ static void acp_acp70_audio_remove(struct platform_device *pdev) static int acp70_pcm_resume(struct device *dev) { - struct acp_chip_info *chip = dev_get_platdata(dev); + struct acp_chip_info *chip = dev_get_drvdata(dev->parent); struct acp_stream *stream; struct snd_pcm_substream *substream; snd_pcm_uframes_t buf_in_frames; From 6d9b64156d849e358cb49b6b899fb0b7d262bda8 Mon Sep 17 00:00:00 2001 From: Venkata Prasad Potturu Date: Fri, 25 Apr 2025 11:31:40 +0530 Subject: [PATCH 1165/2924] ASoC: amd: acp: Fix NULL pointer deref in acp_i2s_set_tdm_slot Update chip data using dev_get_drvdata(dev->parent) to fix NULL pointer deref in acp_i2s_set_tdm_slot. Fixes: cd60dec8994c ("ASoC: amd: acp: Refactor TDM slots selction based on acp revision id") Signed-off-by: Venkata Prasad Potturu Link: https://patch.msgid.link/20250425060144.1773265-2-venkataprasad.potturu@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-i2s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/amd/acp/acp-i2s.c b/sound/soc/amd/acp/acp-i2s.c index a38409dd1d3409..70fa54d568ef68 100644 --- a/sound/soc/amd/acp/acp-i2s.c +++ b/sound/soc/amd/acp/acp-i2s.c @@ -97,7 +97,7 @@ static int acp_i2s_set_tdm_slot(struct snd_soc_dai *dai, u32 tx_mask, u32 rx_mas struct acp_stream *stream; int slot_len, no_of_slots; - chip = dev_get_platdata(dev); + chip = dev_get_drvdata(dev->parent); switch (slot_width) { case SLOT_WIDTH_8: slot_len = 8; From 138e6da0392ed067d0db7b5b5b4582c3668cfcf9 Mon Sep 17 00:00:00 2001 From: Venkata Prasad Potturu Date: Fri, 25 Apr 2025 11:31:41 +0530 Subject: [PATCH 1166/2924] ASoC: amd: acp: Fix devm_snd_soc_register_card(acp-pdm-mach) failure Add condition check to fix devm_snd_soc_register_card(acp-pdm-mach) deferred probe failure, when pdm DSD entry is not available. [15.910456] acp_mach acp-pdm-mach: devm_snd_soc_register_card(acp-pdm-mach) failed: -517 [15.910536] platform acp-pdm-mach: deferred probe pending: (reason unknown) Fixes: 6e60db74b69c2 ("ASoC: amd: acp: Refactor acp machine select") Signed-off-by: Venkata Prasad Potturu Link: https://patch.msgid.link/20250425060144.1773265-3-venkataprasad.potturu@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-legacy-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/amd/acp/acp-legacy-common.c b/sound/soc/amd/acp/acp-legacy-common.c index b4d68484e06df8..ba8db0851daac4 100644 --- a/sound/soc/amd/acp/acp-legacy-common.c +++ b/sound/soc/amd/acp/acp-legacy-common.c @@ -450,7 +450,7 @@ int acp_machine_select(struct acp_chip_info *chip) struct snd_soc_acpi_mach *mach; int size, platform; - if (chip->flag == FLAG_AMD_LEGACY_ONLY_DMIC) { + if (chip->flag == FLAG_AMD_LEGACY_ONLY_DMIC && chip->is_pdm_dev) { platform = chip->acp_rev; chip->mach_dev = platform_device_register_data(chip->dev, "acp-pdm-mach", PLATFORM_DEVID_NONE, &platform, From a549b927ea3f5e50b1394209b64e6e17e31d4db8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 20 Apr 2025 10:56:59 +0200 Subject: [PATCH 1167/2924] ASoC: Intel: bytcr_rt5640: Add DMI quirk for Acer Aspire SW3-013 Acer Aspire SW3-013 requires the very same quirk as other Acer Aspire model for making it working. Link: https://bugzilla.kernel.org/show_bug.cgi?id=220011 Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20250420085716.12095-1-tiwai@suse.de Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcr_rt5640.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 6446cda0f85727..0f3b8f44e70112 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -576,6 +576,19 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_SSP0_AIF2 | BYT_RT5640_MCLK_EN), }, + { /* Acer Aspire SW3-013 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire SW3-013"), + }, + .driver_data = (void *)(BYT_RT5640_DMIC1_MAP | + BYT_RT5640_JD_SRC_JD2_IN4N | + BYT_RT5640_OVCD_TH_2000UA | + BYT_RT5640_OVCD_SF_0P75 | + BYT_RT5640_DIFF_MIC | + BYT_RT5640_SSP0_AIF1 | + BYT_RT5640_MCLK_EN), + }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Acer"), From 75aea4b0656ead0facd13d2aae4cb77326e53d2f Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 24 Apr 2025 06:47:14 -0700 Subject: [PATCH 1168/2924] perf/x86/intel: Only check the group flag for X86 leader A warning in intel_pmu_lbr_counters_reorder() may be triggered by below perf command. perf record -e "{cpu-clock,cycles/call-graph="lbr"/}" -- sleep 1 It's because the group is mistakenly treated as a branch counter group. The hw.flags of the leader are used to determine whether a group is a branch counters group. However, the hw.flags is only available for a hardware event. The field to store the flags is a union type. For a software event, it's a hrtimer. The corresponding bit may be set if the leader is a software event. For a branch counter group and other groups that have a group flag (e.g., topdown, PEBS counters snapshotting, and ACR), the leader must be a X86 event. Check the X86 event before checking the flag. The patch only fixes the issue for the branch counter group. The following patch will fix the other groups. There may be an alternative way to fix the issue by moving the hw.flags out of the union type. It should work for now. But it's still possible that the flags will be used by other types of events later. As long as that type of event is used as a leader, a similar issue will be triggered. So the alternative way is dropped. Fixes: 33744916196b ("perf/x86/intel: Support branch counters logging") Closes: https://lore.kernel.org/lkml/20250412091423.1839809-1-luogengkun@huaweicloud.com/ Reported-by: Luo Gengkun Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20250424134718.311934-2-kan.liang@linux.intel.com --- arch/x86/events/core.c | 2 +- arch/x86/events/perf_event.h | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 3a4f031d2f4401..139ad80d1df34f 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -754,7 +754,7 @@ void x86_pmu_enable_all(int added) } } -static inline int is_x86_event(struct perf_event *event) +int is_x86_event(struct perf_event *event) { int i; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 2c0ce0e9545e50..4237c379cdc53b 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -110,9 +110,16 @@ static inline bool is_topdown_event(struct perf_event *event) return is_metric_event(event) || is_slots_event(event); } +int is_x86_event(struct perf_event *event); + +static inline bool check_leader_group(struct perf_event *leader, int flags) +{ + return is_x86_event(leader) ? !!(leader->hw.flags & flags) : false; +} + static inline bool is_branch_counters_group(struct perf_event *event) { - return event->group_leader->hw.flags & PERF_X86_EVENT_BRANCH_COUNTERS; + return check_leader_group(event->group_leader, PERF_X86_EVENT_BRANCH_COUNTERS); } static inline bool is_pebs_counter_event_group(struct perf_event *event) From e9988ad7b1744991118ac348a804f9395368a284 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 24 Apr 2025 06:47:15 -0700 Subject: [PATCH 1169/2924] perf/x86/intel: Check the X86 leader for pebs_counter_event_group The PEBS counters snapshotting group also requires a group flag in the leader. The leader must be a X86 event. Fixes: e02e9b0374c3 ("perf/x86/intel: Support PEBS counters snapshotting") Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20250424134718.311934-3-kan.liang@linux.intel.com --- arch/x86/events/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 4237c379cdc53b..46d120597babe5 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -124,7 +124,7 @@ static inline bool is_branch_counters_group(struct perf_event *event) static inline bool is_pebs_counter_event_group(struct perf_event *event) { - return event->group_leader->hw.flags & PERF_X86_EVENT_PEBS_CNTR; + return check_leader_group(event->group_leader, PERF_X86_EVENT_PEBS_CNTR); } struct amd_nb { From 7da9960b59fb7e590eb8538c9428db55a4ea2d23 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 24 Apr 2025 06:47:18 -0700 Subject: [PATCH 1170/2924] perf/x86/intel/ds: Fix counter backwards of non-precise events counters-snapshotting The counter backwards may be observed in the PMI handler when counters-snapshotting some non-precise events in the freq mode. For the non-precise events, it's possible the counters-snapshotting records a positive value for an overflowed PEBS event. Then the HW auto-reload mechanism reset the counter to 0 immediately. Because the pebs_event_reset is cleared in the freq mode, which doesn't set the PERF_X86_EVENT_AUTO_RELOAD. In the PMI handler, 0 will be read rather than the positive value recorded in the counters-snapshotting record. The counters-snapshotting case has to be specially handled. Since the event value has been updated when processing the counters-snapshotting record, only needs to set the new period for the counter via x86_pmu_set_period(). Fixes: e02e9b0374c3 ("perf/x86/intel: Support PEBS counters snapshotting") Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20250424134718.311934-6-kan.liang@linux.intel.com --- arch/x86/events/intel/ds.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 18c3ab579b8b3e..9b20acc0e932e2 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -2379,8 +2379,25 @@ __intel_pmu_pebs_last_event(struct perf_event *event, */ intel_pmu_save_and_restart_reload(event, count); } - } else - intel_pmu_save_and_restart(event); + } else { + /* + * For a non-precise event, it's possible the + * counters-snapshotting records a positive value for the + * overflowed event. Then the HW auto-reload mechanism + * reset the counter to 0 immediately, because the + * pebs_event_reset is cleared if the PERF_X86_EVENT_AUTO_RELOAD + * is not set. The counter backwards may be observed in a + * PMI handler. + * + * Since the event value has been updated when processing the + * counters-snapshotting record, only needs to set the new + * period for the counter. + */ + if (is_pebs_counter_event_group(event)) + static_call(x86_pmu_set_period)(event); + else + intel_pmu_save_and_restart(event); + } } static __always_inline void From 98698ca0e58734bc5c1c24e5bbc7429f981cd186 Mon Sep 17 00:00:00 2001 From: Dave Stevenson Date: Wed, 23 Apr 2025 11:47:15 +0100 Subject: [PATCH 1171/2924] staging: bcm2835-camera: Initialise dev in v4l2_dev Commit 42a2f6664e18 ("staging: vc04_services: Move global g_state to vchiq_state") changed mmal_init to pass dev->v4l2_dev.dev to vchiq_mmal_init, however nothing iniitialised dev->v4l2_dev, so we got a NULL pointer dereference. Set dev->v4l2_dev.dev during bcm2835_mmal_probe. The device pointer could be passed into v4l2_device_register to set it, however that also has other effects that would need additional changes. Fixes: 42a2f6664e18 ("staging: vc04_services: Move global g_state to vchiq_state") Cc: stable@vger.kernel.org Signed-off-by: Dave Stevenson Reviewed-by: Stefan Wahren Link: https://lore.kernel.org/r/20250423-staging-bcm2835-v4l2-fix-v2-1-3227f0ba4700@raspberrypi.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c b/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c index b839b50ac26a54..fa7ea4ca4c36f4 100644 --- a/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c +++ b/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c @@ -1900,6 +1900,7 @@ static int bcm2835_mmal_probe(struct vchiq_device *device) __func__, ret); goto free_dev; } + dev->v4l2_dev.dev = &device->dev; /* setup v4l controls */ ret = bcm2835_mmal_init_controls(dev, &dev->ctrl_handler); From 2ca34b508774aaa590fc3698a54204706ecca4ba Mon Sep 17 00:00:00 2001 From: Gabriel Shahrouzi Date: Fri, 18 Apr 2025 21:29:37 -0400 Subject: [PATCH 1172/2924] staging: axis-fifo: Correct handling of tx_fifo_depth for size validation Remove erroneous subtraction of 4 from the total FIFO depth read from device tree. The stored depth is for checking against total capacity, not initial vacancy. This prevented writes near the FIFO's full size. The check performed just before data transfer, which uses live reads of the TDFV register to determine current vacancy, correctly handles the initial Depth - 4 hardware state and subsequent FIFO fullness. Fixes: 4a965c5f89de ("staging: add driver for Xilinx AXI-Stream FIFO v4.1 IP core") Cc: stable@vger.kernel.org Signed-off-by: Gabriel Shahrouzi Link: https://lore.kernel.org/r/20250419012937.674924-1-gshahrouzi@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/axis-fifo/axis-fifo.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/staging/axis-fifo/axis-fifo.c b/drivers/staging/axis-fifo/axis-fifo.c index 7540c20090c78b..04b3dc3cfe7c84 100644 --- a/drivers/staging/axis-fifo/axis-fifo.c +++ b/drivers/staging/axis-fifo/axis-fifo.c @@ -775,9 +775,6 @@ static int axis_fifo_parse_dt(struct axis_fifo *fifo) goto end; } - /* IP sets TDFV to fifo depth - 4 so we will do the same */ - fifo->tx_fifo_depth -= 4; - ret = get_dts_property(fifo, "xlnx,use-rx-data", &fifo->has_rx_fifo); if (ret) { dev_err(fifo->dt_device, "missing xlnx,use-rx-data property\n"); From c6e8d85fafa7193613db37da29c0e8d6e2515b13 Mon Sep 17 00:00:00 2001 From: Gabriel Shahrouzi Date: Fri, 18 Apr 2025 20:43:06 -0400 Subject: [PATCH 1173/2924] staging: axis-fifo: Remove hardware resets for user errors The axis-fifo driver performs a full hardware reset (via reset_ip_core()) in several error paths within the read and write functions. This reset flushes both TX and RX FIFOs and resets the AXI-Stream links. Allow the user to handle the error without causing hardware disruption or data loss in other FIFO paths. Fixes: 4a965c5f89de ("staging: add driver for Xilinx AXI-Stream FIFO v4.1 IP core") Cc: stable@vger.kernel.org Signed-off-by: Gabriel Shahrouzi Link: https://lore.kernel.org/r/20250419004306.669605-1-gshahrouzi@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/axis-fifo/axis-fifo.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/staging/axis-fifo/axis-fifo.c b/drivers/staging/axis-fifo/axis-fifo.c index 04b3dc3cfe7c84..351f983ef9149b 100644 --- a/drivers/staging/axis-fifo/axis-fifo.c +++ b/drivers/staging/axis-fifo/axis-fifo.c @@ -393,16 +393,14 @@ static ssize_t axis_fifo_read(struct file *f, char __user *buf, bytes_available = ioread32(fifo->base_addr + XLLF_RLR_OFFSET); if (!bytes_available) { - dev_err(fifo->dt_device, "received a packet of length 0 - fifo core will be reset\n"); - reset_ip_core(fifo); + dev_err(fifo->dt_device, "received a packet of length 0\n"); ret = -EIO; goto end_unlock; } if (bytes_available > len) { - dev_err(fifo->dt_device, "user read buffer too small (available bytes=%zu user buffer bytes=%zu) - fifo core will be reset\n", + dev_err(fifo->dt_device, "user read buffer too small (available bytes=%zu user buffer bytes=%zu)\n", bytes_available, len); - reset_ip_core(fifo); ret = -EINVAL; goto end_unlock; } @@ -411,8 +409,7 @@ static ssize_t axis_fifo_read(struct file *f, char __user *buf, /* this probably can't happen unless IP * registers were previously mishandled */ - dev_err(fifo->dt_device, "received a packet that isn't word-aligned - fifo core will be reset\n"); - reset_ip_core(fifo); + dev_err(fifo->dt_device, "received a packet that isn't word-aligned\n"); ret = -EIO; goto end_unlock; } @@ -433,7 +430,6 @@ static ssize_t axis_fifo_read(struct file *f, char __user *buf, if (copy_to_user(buf + copied * sizeof(u32), tmp_buf, copy * sizeof(u32))) { - reset_ip_core(fifo); ret = -EFAULT; goto end_unlock; } @@ -542,7 +538,6 @@ static ssize_t axis_fifo_write(struct file *f, const char __user *buf, if (copy_from_user(tmp_buf, buf + copied * sizeof(u32), copy * sizeof(u32))) { - reset_ip_core(fifo); ret = -EFAULT; goto end_unlock; } From 75673fda0c557ae26078177dd14d4857afbf128d Mon Sep 17 00:00:00 2001 From: Brandon Kammerdiener Date: Thu, 24 Apr 2025 11:32:51 -0400 Subject: [PATCH 1174/2924] bpf: fix possible endless loop in BPF map iteration The _safe variant used here gets the next element before running the callback, avoiding the endless loop condition. Signed-off-by: Brandon Kammerdiener Link: https://lore.kernel.org/r/20250424153246.141677-2-brandon.kammerdiener@intel.com Signed-off-by: Alexei Starovoitov Acked-by: Hou Tao --- kernel/bpf/hashtab.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 5a5adc66b8e224..92b606d600207c 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -2189,7 +2189,7 @@ static long bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_ b = &htab->buckets[i]; rcu_read_lock(); head = &b->head; - hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) { + hlist_nulls_for_each_entry_safe(elem, n, head, hash_node) { key = elem->key; if (is_percpu) { /* current cpu value for percpu map */ From 3d9c463f959f41cd6616ebf8a5d15e9d3ef04f16 Mon Sep 17 00:00:00 2001 From: Brandon Kammerdiener Date: Thu, 24 Apr 2025 11:32:55 -0400 Subject: [PATCH 1175/2924] selftests/bpf: add test for softlock when modifying hashmap while iterating Add test that modifies the map while it's being iterated in such a way that hangs the kernel thread unless the _safe fix is applied to bpf_for_each_hash_elem. Signed-off-by: Brandon Kammerdiener Link: https://lore.kernel.org/r/20250424153246.141677-3-brandon.kammerdiener@intel.com Signed-off-by: Alexei Starovoitov Acked-by: Hou Tao --- .../selftests/bpf/prog_tests/for_each.c | 37 +++++++++++++++++++ .../bpf/progs/for_each_hash_modify.c | 30 +++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/for_each_hash_modify.c diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c index 09f6487f58b9cb..5fea3209566ed5 100644 --- a/tools/testing/selftests/bpf/prog_tests/for_each.c +++ b/tools/testing/selftests/bpf/prog_tests/for_each.c @@ -6,6 +6,7 @@ #include "for_each_array_map_elem.skel.h" #include "for_each_map_elem_write_key.skel.h" #include "for_each_multi_maps.skel.h" +#include "for_each_hash_modify.skel.h" static unsigned int duration; @@ -203,6 +204,40 @@ static void test_multi_maps(void) for_each_multi_maps__destroy(skel); } +static void test_hash_modify(void) +{ + struct for_each_hash_modify *skel; + int max_entries, i, err; + __u64 key, val; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1 + ); + + skel = for_each_hash_modify__open_and_load(); + if (!ASSERT_OK_PTR(skel, "for_each_hash_modify__open_and_load")) + return; + + max_entries = bpf_map__max_entries(skel->maps.hashmap); + for (i = 0; i < max_entries; i++) { + key = i; + val = i; + err = bpf_map__update_elem(skel->maps.hashmap, &key, sizeof(key), + &val, sizeof(val), BPF_ANY); + if (!ASSERT_OK(err, "map_update")) + goto out; + } + + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + ASSERT_OK(topts.retval, "retval"); + +out: + for_each_hash_modify__destroy(skel); +} + void test_for_each(void) { if (test__start_subtest("hash_map")) @@ -213,4 +248,6 @@ void test_for_each(void) test_write_map_key(); if (test__start_subtest("multi_maps")) test_multi_maps(); + if (test__start_subtest("hash_modify")) + test_hash_modify(); } diff --git a/tools/testing/selftests/bpf/progs/for_each_hash_modify.c b/tools/testing/selftests/bpf/progs/for_each_hash_modify.c new file mode 100644 index 00000000000000..82307166f78925 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/for_each_hash_modify.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Intel Corporation */ +#include "vmlinux.h" +#include + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 128); + __type(key, __u64); + __type(value, __u64); +} hashmap SEC(".maps"); + +static int cb(struct bpf_map *map, __u64 *key, __u64 *val, void *arg) +{ + bpf_map_delete_elem(map, key); + bpf_map_update_elem(map, key, val, 0); + return 0; +} + +SEC("tc") +int test_pkt_access(struct __sk_buff *skb) +{ + (void)skb; + + bpf_for_each_map_elem(&hashmap, cb, NULL, 0); + + return 0; +} From f88886de0927a2adf4c1b4c5c1f1d31d2023ef74 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 24 Apr 2025 18:45:42 -0700 Subject: [PATCH 1176/2924] bpf: Add namespace to BPF internal symbols Add namespace to BPF internal symbols used by light skeleton to prevent abuse and document with the code their allowed usage. Fixes: b1d18a7574d0 ("bpf: Extend sys_bpf commands for bpf_syscall programs.") Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/bpf/20250425014542.62385-1-alexei.starovoitov@gmail.com --- Documentation/bpf/bpf_devel_QA.rst | 8 ++++++++ kernel/bpf/preload/bpf_preload_kern.c | 1 + kernel/bpf/syscall.c | 6 +++--- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst index de27e1620821c4..0acb4c9b8d90f3 100644 --- a/Documentation/bpf/bpf_devel_QA.rst +++ b/Documentation/bpf/bpf_devel_QA.rst @@ -382,6 +382,14 @@ In case of new BPF instructions, once the changes have been accepted into the Linux kernel, please implement support into LLVM's BPF back end. See LLVM_ section below for further information. +Q: What "BPF_INTERNAL" symbol namespace is for? +----------------------------------------------- +A: Symbols exported as BPF_INTERNAL can only be used by BPF infrastructure +like preload kernel modules with light skeleton. Most symbols outside +of BPF_INTERNAL are not expected to be used by code outside of BPF either. +Symbols may lack the designation because they predate the namespaces, +or due to an oversight. + Stable submission ================= diff --git a/kernel/bpf/preload/bpf_preload_kern.c b/kernel/bpf/preload/bpf_preload_kern.c index 2fdf3c978db1bd..774e5a5388112e 100644 --- a/kernel/bpf/preload/bpf_preload_kern.c +++ b/kernel/bpf/preload/bpf_preload_kern.c @@ -89,5 +89,6 @@ static void __exit fini(void) } late_initcall(load); module_exit(fini); +MODULE_IMPORT_NS("BPF_INTERNAL"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Embedded BPF programs for introspection in bpffs"); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 9794446bc8c6ca..64c3393e827000 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1583,7 +1583,7 @@ struct bpf_map *bpf_map_get(u32 ufd) return map; } -EXPORT_SYMBOL(bpf_map_get); +EXPORT_SYMBOL_NS(bpf_map_get, "BPF_INTERNAL"); struct bpf_map *bpf_map_get_with_uref(u32 ufd) { @@ -3364,7 +3364,7 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd) bpf_link_inc(link); return link; } -EXPORT_SYMBOL(bpf_link_get_from_fd); +EXPORT_SYMBOL_NS(bpf_link_get_from_fd, "BPF_INTERNAL"); static void bpf_tracing_link_release(struct bpf_link *link) { @@ -6020,7 +6020,7 @@ int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size) return ____bpf_sys_bpf(cmd, attr, size); } } -EXPORT_SYMBOL(kern_sys_bpf); +EXPORT_SYMBOL_NS(kern_sys_bpf, "BPF_INTERNAL"); static const struct bpf_func_proto bpf_sys_bpf_proto = { .func = bpf_sys_bpf, From 3d59224947b024c9b2aa6e149a1537d449adb828 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 24 Apr 2025 21:50:14 +0530 Subject: [PATCH 1177/2924] cpufreq: ACPI: Re-sync CPU boost state on system resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During CPU hotunplug events (such as those occurring during suspend/resume cycles), platform firmware may modify the CPU boost state. If boost was disabled prior to CPU removal, it correctly remains disabled upon re-plug. However, if firmware re-enables boost while the CPU is offline, the CPU may return with boost enabled—even if it was originally disabled—once it is hotplugged back in. This leads to inconsistent behavior and violates user or kernel policy expectations. To maintain consistency, ensure the boost state is re-synchronized with the kernel policy when a CPU is hotplugged back in. Note: This re-synchronization is not necessary during the initial call to ->init() for a CPU, as the cpufreq core handles it via cpufreq_online(). At that point, acpi_cpufreq_driver.boost_enabled is initialized to the value returned by boost_state(0). Fixes: 2b16c631832d ("cpufreq: ACPI: Remove set_boost in acpi_cpufreq_cpu_init()") Reported-by: Nicholas Chin Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220013 Tested-by: Nicholas Chin Reviewed-by: Lifeng Zheng Signed-off-by: Viresh Kumar Link: https://patch.msgid.link/9c7de55fb06015c1b77e7dafd564b659838864e0.1745511526.git.viresh.kumar@linaro.org Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/acpi-cpufreq.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 924314cdeebcec..d26b610e4f24f9 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -909,8 +909,19 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency) pr_warn(FW_WARN "P-state 0 is not max freq\n"); - if (acpi_cpufreq_driver.set_boost) - policy->boost_supported = true; + if (acpi_cpufreq_driver.set_boost) { + if (policy->boost_supported) { + /* + * The firmware may have altered boost state while the + * CPU was offline (for example during a suspend-resume + * cycle). + */ + if (policy->boost_enabled != boost_state(cpu)) + set_boost(policy, policy->boost_enabled); + } else { + policy->boost_supported = true; + } + } return result; From 548762f05d19c5542db7590bcdfb9be1fb928376 Mon Sep 17 00:00:00 2001 From: Haoran Jiang Date: Fri, 25 Apr 2025 17:50:42 +0800 Subject: [PATCH 1178/2924] samples/bpf: Fix compilation failure for samples/bpf on LoongArch Fedora When building the latest samples/bpf on LoongArch Fedora make M=samples/bpf There are compilation errors as follows: In file included from ./linux/samples/bpf/sockex2_kern.c:2: In file included from ./include/uapi/linux/in.h:25: In file included from ./include/linux/socket.h:8: In file included from ./include/linux/uio.h:9: In file included from ./include/linux/thread_info.h:60: In file included from ./arch/loongarch/include/asm/thread_info.h:15: In file included from ./arch/loongarch/include/asm/processor.h:13: In file included from ./arch/loongarch/include/asm/cpu-info.h:11: ./arch/loongarch/include/asm/loongarch.h:13:10: fatal error: 'larchintrin.h' file not found ^~~~~~~~~~~~~~~ 1 error generated. larchintrin.h is included in /usr/lib64/clang/14.0.6/include, and the header file location is specified at compile time. Test on LoongArch Fedora: https://github.com/fedora-remix-loongarch/releases-info Signed-off-by: Haoran Jiang Signed-off-by: zhangxi Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250425095042.838824-1-jianghaoran@kylinos.cn --- samples/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 5b632635e00dde..95a4fa1f1e4474 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -376,7 +376,7 @@ $(obj)/%.o: $(src)/%.c @echo " CLANG-bpf " $@ $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(BPF_EXTRA_CFLAGS) \ -I$(obj) -I$(srctree)/tools/testing/selftests/bpf/ \ - -I$(LIBBPF_INCLUDE) \ + -I$(LIBBPF_INCLUDE) $(CLANG_SYS_INCLUDES) \ -D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \ -D__TARGET_ARCH_$(SRCARCH) -Wno-compare-distinct-pointer-types \ -Wno-gnu-variable-sized-type-not-at-end \ From e7dcd1304b9ac08c428c84fdb96ce6a04ff2114f Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Fri, 25 Apr 2025 19:57:02 +0200 Subject: [PATCH 1179/2924] sched_ext: Remove duplicate BTF_ID_FLAGS definitions Some kfuncs specific to the idle CPU selection policy are registered in both the scx_kfunc_ids_any and scx_kfunc_ids_idle blocks, even though they should only be defined in the latter. Remove the duplicates from scx_kfunc_ids_any. Fixes: 337d1b354a297 ("sched_ext: Move built-in idle CPU selection policy to a separate file") Signed-off-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index ac79067dc87e65..dddb0af36f8daf 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -7369,12 +7369,6 @@ BTF_ID_FLAGS(func, scx_bpf_nr_cpu_ids) BTF_ID_FLAGS(func, scx_bpf_get_possible_cpumask, KF_ACQUIRE) BTF_ID_FLAGS(func, scx_bpf_get_online_cpumask, KF_ACQUIRE) BTF_ID_FLAGS(func, scx_bpf_put_cpumask, KF_RELEASE) -BTF_ID_FLAGS(func, scx_bpf_get_idle_cpumask, KF_ACQUIRE) -BTF_ID_FLAGS(func, scx_bpf_get_idle_smtmask, KF_ACQUIRE) -BTF_ID_FLAGS(func, scx_bpf_put_idle_cpumask, KF_RELEASE) -BTF_ID_FLAGS(func, scx_bpf_test_and_clear_cpu_idle) -BTF_ID_FLAGS(func, scx_bpf_pick_idle_cpu, KF_RCU) -BTF_ID_FLAGS(func, scx_bpf_pick_any_cpu, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_task_running, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_task_cpu, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_cpu_rq) From 6d0417e4e1cf66fd917f06f0454958362714ef7d Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 9 Apr 2025 16:08:48 -0400 Subject: [PATCH 1180/2924] Bluetooth: hci_conn: Fix not setting conn_timeout for Broadcast Receiver Broadcast Receiver requires creating PA sync but the command just generates a status so this makes use of __hci_cmd_sync_status_sk to wait for HCI_EV_LE_PA_SYNC_ESTABLISHED, also because of this chance it is not longer necessary to use a custom method to serialize the process of creating the PA sync since the cmd_work_sync itself ensures only one command would be pending which now awaits for HCI_EV_LE_PA_SYNC_ESTABLISHED before proceeding to next connection. Fixes: 4a5e0ba68676 ("Bluetooth: ISO: Do not emit LE PA Create Sync if previous is pending") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 2 + include/net/bluetooth/hci_core.h | 13 +++-- include/net/bluetooth/hci_sync.h | 2 + net/bluetooth/hci_conn.c | 92 +------------------------------- net/bluetooth/hci_event.c | 6 +-- net/bluetooth/hci_sync.c | 87 ++++++++++++++++++++++++++++-- 6 files changed, 95 insertions(+), 107 deletions(-) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index a8586c3058c7cd..8ea7a063cc6510 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1931,6 +1931,8 @@ struct hci_cp_le_pa_create_sync { __u8 sync_cte_type; } __packed; +#define HCI_OP_LE_PA_CREATE_SYNC_CANCEL 0x2045 + #define HCI_OP_LE_PA_TERM_SYNC 0x2046 struct hci_cp_le_pa_term_sync { __le16 handle; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 5115da34f88128..f20368b9a5d200 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1113,10 +1113,8 @@ static inline struct hci_conn *hci_conn_hash_lookup_bis(struct hci_dev *hdev, return NULL; } -static inline struct hci_conn *hci_conn_hash_lookup_sid(struct hci_dev *hdev, - __u8 sid, - bdaddr_t *dst, - __u8 dst_type) +static inline struct hci_conn * +hci_conn_hash_lookup_create_pa_sync(struct hci_dev *hdev) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *c; @@ -1124,8 +1122,10 @@ static inline struct hci_conn *hci_conn_hash_lookup_sid(struct hci_dev *hdev, rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK || bacmp(&c->dst, dst) || - c->dst_type != dst_type || c->sid != sid) + if (c->type != ISO_LINK) + continue; + + if (!test_bit(HCI_CONN_CREATE_PA_SYNC, &c->flags)) continue; rcu_read_unlock(); @@ -1524,7 +1524,6 @@ bool hci_setup_sync(struct hci_conn *conn, __u16 handle); void hci_sco_setup(struct hci_conn *conn, __u8 status); bool hci_iso_setup_path(struct hci_conn *conn); int hci_le_create_cis_pending(struct hci_dev *hdev); -int hci_pa_create_sync_pending(struct hci_dev *hdev); int hci_le_big_create_sync_pending(struct hci_dev *hdev); int hci_conn_check_create_cis(struct hci_conn *conn); diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 7e2cf0cca939a1..93dac4c7f9e3ef 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -185,3 +185,5 @@ int hci_connect_le_sync(struct hci_dev *hdev, struct hci_conn *conn); int hci_cancel_connect_sync(struct hci_dev *hdev, struct hci_conn *conn); int hci_le_conn_update_sync(struct hci_dev *hdev, struct hci_conn *conn, struct hci_conn_params *params); + +int hci_connect_pa_sync(struct hci_dev *hdev, struct hci_conn *conn); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 7e1b53857648df..c3112ce39f6724 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -2064,95 +2064,6 @@ static int create_big_sync(struct hci_dev *hdev, void *data) return hci_le_create_big(conn, &conn->iso_qos); } -static void create_pa_complete(struct hci_dev *hdev, void *data, int err) -{ - bt_dev_dbg(hdev, ""); - - if (err) - bt_dev_err(hdev, "Unable to create PA: %d", err); -} - -static bool hci_conn_check_create_pa_sync(struct hci_conn *conn) -{ - if (conn->type != ISO_LINK || conn->sid == HCI_SID_INVALID) - return false; - - return true; -} - -static int create_pa_sync(struct hci_dev *hdev, void *data) -{ - struct hci_cp_le_pa_create_sync cp = {0}; - struct hci_conn *conn; - int err = 0; - - hci_dev_lock(hdev); - - rcu_read_lock(); - - /* The spec allows only one pending LE Periodic Advertising Create - * Sync command at a time. If the command is pending now, don't do - * anything. We check for pending connections after each PA Sync - * Established event. - * - * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E - * page 2493: - * - * If the Host issues this command when another HCI_LE_Periodic_ - * Advertising_Create_Sync command is pending, the Controller shall - * return the error code Command Disallowed (0x0C). - */ - list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) { - if (test_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags)) - goto unlock; - } - - list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) { - if (hci_conn_check_create_pa_sync(conn)) { - struct bt_iso_qos *qos = &conn->iso_qos; - - cp.options = qos->bcast.options; - cp.sid = conn->sid; - cp.addr_type = conn->dst_type; - bacpy(&cp.addr, &conn->dst); - cp.skip = cpu_to_le16(qos->bcast.skip); - cp.sync_timeout = cpu_to_le16(qos->bcast.sync_timeout); - cp.sync_cte_type = qos->bcast.sync_cte_type; - - break; - } - } - -unlock: - rcu_read_unlock(); - - hci_dev_unlock(hdev); - - if (bacmp(&cp.addr, BDADDR_ANY)) { - hci_dev_set_flag(hdev, HCI_PA_SYNC); - set_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags); - - err = __hci_cmd_sync_status(hdev, HCI_OP_LE_PA_CREATE_SYNC, - sizeof(cp), &cp, HCI_CMD_TIMEOUT); - if (!err) - err = hci_update_passive_scan_sync(hdev); - - if (err) { - hci_dev_clear_flag(hdev, HCI_PA_SYNC); - clear_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags); - } - } - - return err; -} - -int hci_pa_create_sync_pending(struct hci_dev *hdev) -{ - /* Queue start pa_create_sync and scan */ - return hci_cmd_sync_queue(hdev, create_pa_sync, - NULL, create_pa_complete); -} - struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, __u8 sid, struct bt_iso_qos *qos) @@ -2167,10 +2078,11 @@ struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, conn->dst_type = dst_type; conn->sid = sid; conn->state = BT_LISTEN; + conn->conn_timeout = msecs_to_jiffies(qos->bcast.sync_timeout * 10); hci_conn_hold(conn); - hci_pa_create_sync_pending(hdev); + hci_connect_pa_sync(hdev, conn); return conn; } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 5f808f0b0e9a2a..ea7ccafd055a76 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -6378,8 +6378,7 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data, hci_dev_clear_flag(hdev, HCI_PA_SYNC); - conn = hci_conn_hash_lookup_sid(hdev, ev->sid, &ev->bdaddr, - ev->bdaddr_type); + conn = hci_conn_hash_lookup_create_pa_sync(hdev); if (!conn) { bt_dev_err(hdev, "Unable to find connection for dst %pMR sid 0x%2.2x", @@ -6418,9 +6417,6 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data, } unlock: - /* Handle any other pending PA sync command */ - hci_pa_create_sync_pending(hdev); - hci_dev_unlock(hdev); } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 609b035e5c9041..99c116b056ceeb 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -2693,16 +2693,16 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev) /* Force address filtering if PA Sync is in progress */ if (hci_dev_test_flag(hdev, HCI_PA_SYNC)) { - struct hci_cp_le_pa_create_sync *sent; + struct hci_conn *conn; - sent = hci_sent_cmd_data(hdev, HCI_OP_LE_PA_CREATE_SYNC); - if (sent) { + conn = hci_conn_hash_lookup_create_pa_sync(hdev); + if (conn) { struct conn_params pa; memset(&pa, 0, sizeof(pa)); - bacpy(&pa.addr, &sent->addr); - pa.addr_type = sent->addr_type; + bacpy(&pa.addr, &conn->dst); + pa.addr_type = conn->dst_type; /* Clear first since there could be addresses left * behind. @@ -6895,3 +6895,80 @@ int hci_le_conn_update_sync(struct hci_dev *hdev, struct hci_conn *conn, return __hci_cmd_sync_status(hdev, HCI_OP_LE_CONN_UPDATE, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } + +static void create_pa_complete(struct hci_dev *hdev, void *data, int err) +{ + bt_dev_dbg(hdev, "err %d", err); + + if (!err) + return; + + hci_dev_clear_flag(hdev, HCI_PA_SYNC); + + if (err == -ECANCELED) + return; + + hci_dev_lock(hdev); + + hci_update_passive_scan_sync(hdev); + + hci_dev_unlock(hdev); +} + +static int hci_le_pa_create_sync(struct hci_dev *hdev, void *data) +{ + struct hci_cp_le_pa_create_sync cp; + struct hci_conn *conn = data; + struct bt_iso_qos *qos = &conn->iso_qos; + int err; + + if (!hci_conn_valid(hdev, conn)) + return -ECANCELED; + + if (hci_dev_test_and_set_flag(hdev, HCI_PA_SYNC)) + return -EBUSY; + + /* Mark HCI_CONN_CREATE_PA_SYNC so hci_update_passive_scan_sync can + * program the address in the allow list so PA advertisements can be + * received. + */ + set_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags); + + hci_update_passive_scan_sync(hdev); + + memset(&cp, 0, sizeof(cp)); + cp.options = qos->bcast.options; + cp.sid = conn->sid; + cp.addr_type = conn->dst_type; + bacpy(&cp.addr, &conn->dst); + cp.skip = cpu_to_le16(qos->bcast.skip); + cp.sync_timeout = cpu_to_le16(qos->bcast.sync_timeout); + cp.sync_cte_type = qos->bcast.sync_cte_type; + + /* The spec allows only one pending LE Periodic Advertising Create + * Sync command at a time so we forcefully wait for PA Sync Established + * event since cmd_work can only schedule one command at a time. + * + * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E + * page 2493: + * + * If the Host issues this command when another HCI_LE_Periodic_ + * Advertising_Create_Sync command is pending, the Controller shall + * return the error code Command Disallowed (0x0C). + */ + err = __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_PA_CREATE_SYNC, + sizeof(cp), &cp, + HCI_EV_LE_PA_SYNC_ESTABLISHED, + conn->conn_timeout, NULL); + if (err == -ETIMEDOUT) + __hci_cmd_sync_status(hdev, HCI_OP_LE_PA_CREATE_SYNC_CANCEL, + 0, NULL, HCI_CMD_TIMEOUT); + + return err; +} + +int hci_connect_pa_sync(struct hci_dev *hdev, struct hci_conn *conn) +{ + return hci_cmd_sync_queue_once(hdev, hci_le_pa_create_sync, conn, + create_pa_complete); +} From 024421cf39923927ab2b5fe895d1d922b9abe67f Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 16 Apr 2025 15:43:32 -0400 Subject: [PATCH 1181/2924] Bluetooth: hci_conn: Fix not setting timeout for BIG Create Sync BIG Create Sync requires the command to just generates a status so this makes use of __hci_cmd_sync_status_sk to wait for HCI_EVT_LE_BIG_SYNC_ESTABLISHED, also because of this chance it is not longer necessary to use a custom method to serialize the process of creating the BIG sync since the cmd_work_sync itself ensures only one command would be pending which now awaits for HCI_EVT_LE_BIG_SYNC_ESTABLISHED before proceeding to next connection. Fixes: 42ecf1947135 ("Bluetooth: ISO: Do not emit LE BIG Create Sync if previous is pending") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 2 +- include/net/bluetooth/hci_core.h | 7 ++- include/net/bluetooth/hci_sync.h | 1 + net/bluetooth/hci_conn.c | 89 ++------------------------------ net/bluetooth/hci_event.c | 9 ++-- net/bluetooth/hci_sync.c | 63 ++++++++++++++++++++++ net/bluetooth/iso.c | 26 +++++----- 7 files changed, 88 insertions(+), 109 deletions(-) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 8ea7a063cc6510..797992019f9ee5 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -2832,7 +2832,7 @@ struct hci_evt_le_create_big_complete { __le16 bis_handle[]; } __packed; -#define HCI_EVT_LE_BIG_SYNC_ESTABILISHED 0x1d +#define HCI_EVT_LE_BIG_SYNC_ESTABLISHED 0x1d struct hci_evt_le_big_sync_estabilished { __u8 status; __u8 handle; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index f20368b9a5d200..522d837a23fa46 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1524,7 +1524,6 @@ bool hci_setup_sync(struct hci_conn *conn, __u16 handle); void hci_sco_setup(struct hci_conn *conn, __u8 status); bool hci_iso_setup_path(struct hci_conn *conn); int hci_le_create_cis_pending(struct hci_dev *hdev); -int hci_le_big_create_sync_pending(struct hci_dev *hdev); int hci_conn_check_create_cis(struct hci_conn *conn); struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, @@ -1565,9 +1564,9 @@ struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 data_len, __u8 *data); struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, __u8 sid, struct bt_iso_qos *qos); -int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, - struct bt_iso_qos *qos, - __u16 sync_handle, __u8 num_bis, __u8 bis[]); +int hci_conn_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, + struct bt_iso_qos *qos, __u16 sync_handle, + __u8 num_bis, __u8 bis[]); int hci_conn_check_link_mode(struct hci_conn *conn); int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level); int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type, diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 93dac4c7f9e3ef..72558c826aa1b4 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -187,3 +187,4 @@ int hci_le_conn_update_sync(struct hci_dev *hdev, struct hci_conn *conn, struct hci_conn_params *params); int hci_connect_pa_sync(struct hci_dev *hdev, struct hci_conn *conn); +int hci_connect_big_sync(struct hci_dev *hdev, struct hci_conn *conn); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index c3112ce39f6724..6533e281ada333 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -2087,89 +2087,9 @@ struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, return conn; } -static bool hci_conn_check_create_big_sync(struct hci_conn *conn) -{ - if (!conn->num_bis) - return false; - - return true; -} - -static void big_create_sync_complete(struct hci_dev *hdev, void *data, int err) -{ - bt_dev_dbg(hdev, ""); - - if (err) - bt_dev_err(hdev, "Unable to create BIG sync: %d", err); -} - -static int big_create_sync(struct hci_dev *hdev, void *data) -{ - DEFINE_FLEX(struct hci_cp_le_big_create_sync, pdu, bis, num_bis, 0x11); - struct hci_conn *conn; - - rcu_read_lock(); - - pdu->num_bis = 0; - - /* The spec allows only one pending LE BIG Create Sync command at - * a time. If the command is pending now, don't do anything. We - * check for pending connections after each BIG Sync Established - * event. - * - * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E - * page 2586: - * - * If the Host sends this command when the Controller is in the - * process of synchronizing to any BIG, i.e. the HCI_LE_BIG_Sync_ - * Established event has not been generated, the Controller shall - * return the error code Command Disallowed (0x0C). - */ - list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) { - if (test_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags)) - goto unlock; - } - - list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) { - if (hci_conn_check_create_big_sync(conn)) { - struct bt_iso_qos *qos = &conn->iso_qos; - - set_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags); - - pdu->handle = qos->bcast.big; - pdu->sync_handle = cpu_to_le16(conn->sync_handle); - pdu->encryption = qos->bcast.encryption; - memcpy(pdu->bcode, qos->bcast.bcode, - sizeof(pdu->bcode)); - pdu->mse = qos->bcast.mse; - pdu->timeout = cpu_to_le16(qos->bcast.timeout); - pdu->num_bis = conn->num_bis; - memcpy(pdu->bis, conn->bis, conn->num_bis); - - break; - } - } - -unlock: - rcu_read_unlock(); - - if (!pdu->num_bis) - return 0; - - return hci_send_cmd(hdev, HCI_OP_LE_BIG_CREATE_SYNC, - struct_size(pdu, bis, pdu->num_bis), pdu); -} - -int hci_le_big_create_sync_pending(struct hci_dev *hdev) -{ - /* Queue big_create_sync */ - return hci_cmd_sync_queue_once(hdev, big_create_sync, - NULL, big_create_sync_complete); -} - -int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, - struct bt_iso_qos *qos, - __u16 sync_handle, __u8 num_bis, __u8 bis[]) +int hci_conn_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, + struct bt_iso_qos *qos, __u16 sync_handle, + __u8 num_bis, __u8 bis[]) { int err; @@ -2186,9 +2106,10 @@ int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, hcon->num_bis = num_bis; memcpy(hcon->bis, bis, num_bis); + hcon->conn_timeout = msecs_to_jiffies(qos->bcast.timeout * 10); } - return hci_le_big_create_sync_pending(hdev); + return hci_connect_big_sync(hdev, hcon); } static void create_big_complete(struct hci_dev *hdev, void *data, int err) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index ea7ccafd055a76..6d6061111ac567 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -6928,7 +6928,7 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); - if (!hci_le_ev_skb_pull(hdev, skb, HCI_EVT_LE_BIG_SYNC_ESTABILISHED, + if (!hci_le_ev_skb_pull(hdev, skb, HCI_EVT_LE_BIG_SYNC_ESTABLISHED, flex_array_size(ev, bis, ev->num_bis))) return; @@ -6999,9 +6999,6 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, } unlock: - /* Handle any other pending BIG sync command */ - hci_le_big_create_sync_pending(hdev); - hci_dev_unlock(hdev); } @@ -7123,8 +7120,8 @@ static const struct hci_le_ev { hci_le_create_big_complete_evt, sizeof(struct hci_evt_le_create_big_complete), HCI_MAX_EVENT_SIZE), - /* [0x1d = HCI_EV_LE_BIG_SYNC_ESTABILISHED] */ - HCI_LE_EV_VL(HCI_EVT_LE_BIG_SYNC_ESTABILISHED, + /* [0x1d = HCI_EV_LE_BIG_SYNC_ESTABLISHED] */ + HCI_LE_EV_VL(HCI_EVT_LE_BIG_SYNC_ESTABLISHED, hci_le_big_sync_established_evt, sizeof(struct hci_evt_le_big_sync_estabilished), HCI_MAX_EVENT_SIZE), diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 99c116b056ceeb..e56b1cbedab908 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -6972,3 +6972,66 @@ int hci_connect_pa_sync(struct hci_dev *hdev, struct hci_conn *conn) return hci_cmd_sync_queue_once(hdev, hci_le_pa_create_sync, conn, create_pa_complete); } + +static void create_big_complete(struct hci_dev *hdev, void *data, int err) +{ + struct hci_conn *conn = data; + + bt_dev_dbg(hdev, "err %d", err); + + if (err == -ECANCELED) + return; + + if (hci_conn_valid(hdev, conn)) + clear_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags); +} + +static int hci_le_big_create_sync(struct hci_dev *hdev, void *data) +{ + DEFINE_FLEX(struct hci_cp_le_big_create_sync, cp, bis, num_bis, 0x11); + struct hci_conn *conn = data; + struct bt_iso_qos *qos = &conn->iso_qos; + int err; + + if (!hci_conn_valid(hdev, conn)) + return -ECANCELED; + + set_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags); + + memset(cp, 0, sizeof(*cp)); + cp->handle = qos->bcast.big; + cp->sync_handle = cpu_to_le16(conn->sync_handle); + cp->encryption = qos->bcast.encryption; + memcpy(cp->bcode, qos->bcast.bcode, sizeof(cp->bcode)); + cp->mse = qos->bcast.mse; + cp->timeout = cpu_to_le16(qos->bcast.timeout); + cp->num_bis = conn->num_bis; + memcpy(cp->bis, conn->bis, conn->num_bis); + + /* The spec allows only one pending LE BIG Create Sync command at + * a time, so we forcefully wait for BIG Sync Established event since + * cmd_work can only schedule one command at a time. + * + * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E + * page 2586: + * + * If the Host sends this command when the Controller is in the + * process of synchronizing to any BIG, i.e. the HCI_LE_BIG_Sync_ + * Established event has not been generated, the Controller shall + * return the error code Command Disallowed (0x0C). + */ + err = __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_BIG_CREATE_SYNC, + struct_size(cp, bis, cp->num_bis), cp, + HCI_EVT_LE_BIG_SYNC_ESTABLISHED, + conn->conn_timeout, NULL); + if (err == -ETIMEDOUT) + hci_le_big_terminate_sync(hdev, cp->handle); + + return err; +} + +int hci_connect_big_sync(struct hci_dev *hdev, struct hci_conn *conn) +{ + return hci_cmd_sync_queue_once(hdev, hci_le_big_create_sync, conn, + create_big_complete); +} diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 3501a991f1c64e..2819cda616bce8 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -1462,14 +1462,13 @@ static void iso_conn_big_sync(struct sock *sk) lock_sock(sk); if (!test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags)) { - err = hci_le_big_create_sync(hdev, iso_pi(sk)->conn->hcon, - &iso_pi(sk)->qos, - iso_pi(sk)->sync_handle, - iso_pi(sk)->bc_num_bis, - iso_pi(sk)->bc_bis); + err = hci_conn_big_create_sync(hdev, iso_pi(sk)->conn->hcon, + &iso_pi(sk)->qos, + iso_pi(sk)->sync_handle, + iso_pi(sk)->bc_num_bis, + iso_pi(sk)->bc_bis); if (err) - bt_dev_err(hdev, "hci_le_big_create_sync: %d", - err); + bt_dev_err(hdev, "hci_big_create_sync: %d", err); } release_sock(sk); @@ -1922,7 +1921,7 @@ static void iso_conn_ready(struct iso_conn *conn) hcon); } else if (test_bit(HCI_CONN_BIG_SYNC_FAILED, &hcon->flags)) { ev = hci_recv_event_data(hcon->hdev, - HCI_EVT_LE_BIG_SYNC_ESTABILISHED); + HCI_EVT_LE_BIG_SYNC_ESTABLISHED); /* Get reference to PA sync parent socket, if it exists */ parent = iso_get_sock(&hcon->src, &hcon->dst, @@ -2113,12 +2112,11 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) if (!test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags) && !test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags)) { - err = hci_le_big_create_sync(hdev, - hcon, - &iso_pi(sk)->qos, - iso_pi(sk)->sync_handle, - iso_pi(sk)->bc_num_bis, - iso_pi(sk)->bc_bis); + err = hci_conn_big_create_sync(hdev, hcon, + &iso_pi(sk)->qos, + iso_pi(sk)->sync_handle, + iso_pi(sk)->bc_num_bis, + iso_pi(sk)->bc_bis); if (err) { bt_dev_err(hdev, "hci_le_big_create_sync: %d", err); From d1af1f02ef8653dea4573e444136c8331189cd59 Mon Sep 17 00:00:00 2001 From: Kiran K Date: Thu, 17 Apr 2025 09:18:42 +0530 Subject: [PATCH 1182/2924] Bluetooth: btintel_pcie: Avoid redundant buffer allocation Reuse the skb buffer provided by the PCIe driver to pass it onto the stack, instead of copying it to a new skb. Fixes: c2b636b3f788 ("Bluetooth: btintel_pcie: Add support for PCIe transport") Signed-off-by: Kiran K Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel_pcie.c | 33 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index c1e69fcc9c4fac..efe6ad6d4dc0f8 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -957,8 +957,10 @@ static int btintel_pcie_recv_event(struct hci_dev *hdev, struct sk_buff *skb) /* This is a debug event that comes from IML and OP image when it * starts execution. There is no need pass this event to stack. */ - if (skb->data[2] == 0x97) + if (skb->data[2] == 0x97) { + hci_recv_diag(hdev, skb); return 0; + } } return hci_recv_frame(hdev, skb); @@ -974,7 +976,6 @@ static int btintel_pcie_recv_frame(struct btintel_pcie_data *data, u8 pkt_type; u16 plen; u32 pcie_pkt_type; - struct sk_buff *new_skb; void *pdata; struct hci_dev *hdev = data->hdev; @@ -1051,24 +1052,20 @@ static int btintel_pcie_recv_frame(struct btintel_pcie_data *data, bt_dev_dbg(hdev, "pkt_type: 0x%2.2x len: %u", pkt_type, plen); - new_skb = bt_skb_alloc(plen, GFP_ATOMIC); - if (!new_skb) { - bt_dev_err(hdev, "Failed to allocate memory for skb of len: %u", - skb->len); - ret = -ENOMEM; - goto exit_error; - } - - hci_skb_pkt_type(new_skb) = pkt_type; - skb_put_data(new_skb, skb->data, plen); + hci_skb_pkt_type(skb) = pkt_type; hdev->stat.byte_rx += plen; + skb_trim(skb, plen); if (pcie_pkt_type == BTINTEL_PCIE_HCI_EVT_PKT) - ret = btintel_pcie_recv_event(hdev, new_skb); + ret = btintel_pcie_recv_event(hdev, skb); else - ret = hci_recv_frame(hdev, new_skb); + ret = hci_recv_frame(hdev, skb); + skb = NULL; /* skb is freed in the callee */ exit_error: + if (skb) + kfree_skb(skb); + if (ret) hdev->stat.err_rx++; @@ -1202,8 +1199,6 @@ static void btintel_pcie_rx_work(struct work_struct *work) struct btintel_pcie_data *data = container_of(work, struct btintel_pcie_data, rx_work); struct sk_buff *skb; - int err; - struct hci_dev *hdev = data->hdev; if (test_bit(BTINTEL_PCIE_HWEXP_INPROGRESS, &data->flags)) { /* Unlike usb products, controller will not send hardware @@ -1224,11 +1219,7 @@ static void btintel_pcie_rx_work(struct work_struct *work) /* Process the sk_buf in queue and send to the HCI layer */ while ((skb = skb_dequeue(&data->rx_skb_q))) { - err = btintel_pcie_recv_frame(data, skb); - if (err) - bt_dev_err(hdev, "Failed to send received frame: %d", - err); - kfree_skb(skb); + btintel_pcie_recv_frame(data, skb); } } From 0317b033abcd1d8dd2798f0e2de5e84543d0bd22 Mon Sep 17 00:00:00 2001 From: En-Wei Wu Date: Mon, 21 Apr 2025 21:00:37 +0800 Subject: [PATCH 1183/2924] Bluetooth: btusb: avoid NULL pointer dereference in skb_dequeue() A NULL pointer dereference can occur in skb_dequeue() when processing a QCA firmware crash dump on WCN7851 (0489:e0f3). [ 93.672166] Bluetooth: hci0: ACL memdump size(589824) [ 93.672475] BUG: kernel NULL pointer dereference, address: 0000000000000008 [ 93.672517] Workqueue: hci0 hci_devcd_rx [bluetooth] [ 93.672598] RIP: 0010:skb_dequeue+0x50/0x80 The issue stems from handle_dump_pkt_qca() returning 0 even when a dump packet is successfully processed. This is because it incorrectly forwards the return value of hci_devcd_init() (which returns 0 on success). As a result, the caller (btusb_recv_acl_qca() or btusb_recv_evt_qca()) assumes the packet was not handled and passes it to hci_recv_frame(), leading to premature kfree() of the skb. Later, hci_devcd_rx() attempts to dequeue the same skb from the dump queue, resulting in a NULL pointer dereference. Fix this by: 1. Making handle_dump_pkt_qca() return 0 on success and negative errno on failure, consistent with kernel conventions. 2. Splitting dump packet detection into separate functions for ACL and event packets for better structure and readability. This ensures dump packets are properly identified and consumed, avoiding double handling and preventing NULL pointer access. Fixes: 20981ce2d5a5 ("Bluetooth: btusb: Add WCN6855 devcoredump support") Signed-off-by: En-Wei Wu Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 101 +++++++++++++++++++++++++++----------- 1 file changed, 73 insertions(+), 28 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 5012b5ff92c8a3..a42dedb78e0a07 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -3010,22 +3010,16 @@ static void btusb_coredump_qca(struct hci_dev *hdev) bt_dev_err(hdev, "%s: triggle crash failed (%d)", __func__, err); } -/* - * ==0: not a dump pkt. - * < 0: fails to handle a dump pkt - * > 0: otherwise. - */ +/* Return: 0 on success, negative errno on failure. */ static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) { - int ret = 1; + int ret = 0; u8 pkt_type; u8 *sk_ptr; unsigned int sk_len; u16 seqno; u32 dump_size; - struct hci_event_hdr *event_hdr; - struct hci_acl_hdr *acl_hdr; struct qca_dump_hdr *dump_hdr; struct btusb_data *btdata = hci_get_drvdata(hdev); struct usb_device *udev = btdata->udev; @@ -3035,30 +3029,14 @@ static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) sk_len = skb->len; if (pkt_type == HCI_ACLDATA_PKT) { - acl_hdr = hci_acl_hdr(skb); - if (le16_to_cpu(acl_hdr->handle) != QCA_MEMDUMP_ACL_HANDLE) - return 0; sk_ptr += HCI_ACL_HDR_SIZE; sk_len -= HCI_ACL_HDR_SIZE; - event_hdr = (struct hci_event_hdr *)sk_ptr; - } else { - event_hdr = hci_event_hdr(skb); } - if ((event_hdr->evt != HCI_VENDOR_PKT) - || (event_hdr->plen != (sk_len - HCI_EVENT_HDR_SIZE))) - return 0; - sk_ptr += HCI_EVENT_HDR_SIZE; sk_len -= HCI_EVENT_HDR_SIZE; dump_hdr = (struct qca_dump_hdr *)sk_ptr; - if ((sk_len < offsetof(struct qca_dump_hdr, data)) - || (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) - || (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE)) - return 0; - - /*it is dump pkt now*/ seqno = le16_to_cpu(dump_hdr->seqno); if (seqno == 0) { set_bit(BTUSB_HW_SSR_ACTIVE, &btdata->flags); @@ -3132,17 +3110,84 @@ static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) return ret; } +/* Return: true if the ACL packet is a dump packet, false otherwise. */ +static bool acl_pkt_is_dump_qca(struct hci_dev *hdev, struct sk_buff *skb) +{ + u8 *sk_ptr; + unsigned int sk_len; + + struct hci_event_hdr *event_hdr; + struct hci_acl_hdr *acl_hdr; + struct qca_dump_hdr *dump_hdr; + + sk_ptr = skb->data; + sk_len = skb->len; + + acl_hdr = hci_acl_hdr(skb); + if (le16_to_cpu(acl_hdr->handle) != QCA_MEMDUMP_ACL_HANDLE) + return false; + + sk_ptr += HCI_ACL_HDR_SIZE; + sk_len -= HCI_ACL_HDR_SIZE; + event_hdr = (struct hci_event_hdr *)sk_ptr; + + if ((event_hdr->evt != HCI_VENDOR_PKT) || + (event_hdr->plen != (sk_len - HCI_EVENT_HDR_SIZE))) + return false; + + sk_ptr += HCI_EVENT_HDR_SIZE; + sk_len -= HCI_EVENT_HDR_SIZE; + + dump_hdr = (struct qca_dump_hdr *)sk_ptr; + if ((sk_len < offsetof(struct qca_dump_hdr, data)) || + (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) || + (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE)) + return false; + + return true; +} + +/* Return: true if the event packet is a dump packet, false otherwise. */ +static bool evt_pkt_is_dump_qca(struct hci_dev *hdev, struct sk_buff *skb) +{ + u8 *sk_ptr; + unsigned int sk_len; + + struct hci_event_hdr *event_hdr; + struct qca_dump_hdr *dump_hdr; + + sk_ptr = skb->data; + sk_len = skb->len; + + event_hdr = hci_event_hdr(skb); + + if ((event_hdr->evt != HCI_VENDOR_PKT) + || (event_hdr->plen != (sk_len - HCI_EVENT_HDR_SIZE))) + return false; + + sk_ptr += HCI_EVENT_HDR_SIZE; + sk_len -= HCI_EVENT_HDR_SIZE; + + dump_hdr = (struct qca_dump_hdr *)sk_ptr; + if ((sk_len < offsetof(struct qca_dump_hdr, data)) || + (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) || + (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE)) + return false; + + return true; +} + static int btusb_recv_acl_qca(struct hci_dev *hdev, struct sk_buff *skb) { - if (handle_dump_pkt_qca(hdev, skb)) - return 0; + if (acl_pkt_is_dump_qca(hdev, skb)) + return handle_dump_pkt_qca(hdev, skb); return hci_recv_frame(hdev, skb); } static int btusb_recv_evt_qca(struct hci_dev *hdev, struct sk_buff *skb) { - if (handle_dump_pkt_qca(hdev, skb)) - return 0; + if (evt_pkt_is_dump_qca(hdev, skb)) + return handle_dump_pkt_qca(hdev, skb); return hci_recv_frame(hdev, skb); } From 07e90048e356a29079fbc011cfc2e1fa1d1c5ac9 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Tue, 22 Apr 2025 09:21:55 +0800 Subject: [PATCH 1184/2924] Bluetooth: btmtksdio: Check function enabled before doing close Check BTMTKSDIO_FUNC_ENABLED flag before doing close to prevent btmtksdio_close been called twice. Fixes: 6ac4233afb9a ("Bluetooth: btmtksdio: Prevent enabling interrupts after IRQ handler removal") Signed-off-by: Chris Lu Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btmtksdio.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index edd5eead1e93b0..e5a119ca724350 100644 --- a/drivers/bluetooth/btmtksdio.c +++ b/drivers/bluetooth/btmtksdio.c @@ -723,6 +723,10 @@ static int btmtksdio_close(struct hci_dev *hdev) { struct btmtksdio_dev *bdev = hci_get_drvdata(hdev); + /* Skip btmtksdio_close if BTMTKSDIO_FUNC_ENABLED isn't set */ + if (!test_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state)) + return 0; + sdio_claim_host(bdev->func); /* Disable interrupt */ From 0b6d58bc6ea85e57de25c828444928e4a0aa79cb Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Tue, 22 Apr 2025 09:21:56 +0800 Subject: [PATCH 1185/2924] Bluetooth: btmtksdio: Do close if SDIO card removed without close To prevent Bluetooth SDIO card from be physically removed suddenly, driver needs to ensure btmtksdio_close is called before btmtksdio_remove to disable interrupts and txrx workqueue. Fixes: 6ac4233afb9a ("Bluetooth: btmtksdio: Prevent enabling interrupts after IRQ handler removal") Signed-off-by: Chris Lu Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btmtksdio.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index e5a119ca724350..1d26207b2ba70a 100644 --- a/drivers/bluetooth/btmtksdio.c +++ b/drivers/bluetooth/btmtksdio.c @@ -1447,11 +1447,15 @@ static void btmtksdio_remove(struct sdio_func *func) if (!bdev) return; + hdev = bdev->hdev; + + /* Make sure to call btmtksdio_close before removing sdio card */ + if (test_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state)) + btmtksdio_close(hdev); + /* Be consistent the state in btmtksdio_probe */ pm_runtime_get_noresume(bdev->dev); - hdev = bdev->hdev; - sdio_set_drvdata(func, NULL); hci_unregister_dev(hdev); hci_free_dev(hdev); From 1c7664957e4edb234c69de2db4be1f740d2df564 Mon Sep 17 00:00:00 2001 From: Kiran K Date: Sun, 20 Apr 2025 07:21:56 +0530 Subject: [PATCH 1186/2924] Bluetooth: btintel_pcie: Add additional to checks to clear TX/RX paths Due to a hardware issue, there is a possibility that the driver may miss an MSIx interrupt on the RX/TX data path. Since the TX and RX paths are independent, when a TX MSIx interrupt occurs, the driver can check the RX queue for any pending data and process it if present. The same approach applies to the RX path. Fixes: c2b636b3f788 ("Bluetooth: btintel_pcie: Add support for PCIe transport") Signed-off-by: Chandrashekar Devegowda Signed-off-by: Kiran K Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel_pcie.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index efe6ad6d4dc0f8..0a759ea26fd38f 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -1272,10 +1272,8 @@ static void btintel_pcie_msix_rx_handle(struct btintel_pcie_data *data) bt_dev_dbg(hdev, "RXQ: cr_hia: %u cr_tia: %u", cr_hia, cr_tia); /* Check CR_TIA and CR_HIA for change */ - if (cr_tia == cr_hia) { - bt_dev_warn(hdev, "RXQ: no new CD found"); + if (cr_tia == cr_hia) return; - } rxq = &data->rxq; @@ -1311,6 +1309,16 @@ static irqreturn_t btintel_pcie_msix_isr(int irq, void *data) return IRQ_WAKE_THREAD; } +static inline bool btintel_pcie_is_rxq_empty(struct btintel_pcie_data *data) +{ + return data->ia.cr_hia[BTINTEL_PCIE_RXQ_NUM] == data->ia.cr_tia[BTINTEL_PCIE_RXQ_NUM]; +} + +static inline bool btintel_pcie_is_txackq_empty(struct btintel_pcie_data *data) +{ + return data->ia.cr_tia[BTINTEL_PCIE_TXQ_NUM] == data->ia.cr_hia[BTINTEL_PCIE_TXQ_NUM]; +} + static irqreturn_t btintel_pcie_irq_msix_handler(int irq, void *dev_id) { struct msix_entry *entry = dev_id; @@ -1342,12 +1350,18 @@ static irqreturn_t btintel_pcie_irq_msix_handler(int irq, void *dev_id) btintel_pcie_msix_gp0_handler(data); /* For TX */ - if (intr_fh & BTINTEL_PCIE_MSIX_FH_INT_CAUSES_0) + if (intr_fh & BTINTEL_PCIE_MSIX_FH_INT_CAUSES_0) { btintel_pcie_msix_tx_handle(data); + if (!btintel_pcie_is_rxq_empty(data)) + btintel_pcie_msix_rx_handle(data); + } /* For RX */ - if (intr_fh & BTINTEL_PCIE_MSIX_FH_INT_CAUSES_1) + if (intr_fh & BTINTEL_PCIE_MSIX_FH_INT_CAUSES_1) { btintel_pcie_msix_rx_handle(data); + if (!btintel_pcie_is_txackq_empty(data)) + btintel_pcie_msix_tx_handle(data); + } /* * Before sending the interrupt the HW disables it to prevent a nested From 3908feb1bd7f319a10e18d84369a48163264cc7d Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Thu, 24 Apr 2025 22:51:03 +0300 Subject: [PATCH 1187/2924] Bluetooth: L2CAP: copy RX timestamp to new fragments Copy timestamp too when allocating new skb for received fragment. Fixes missing RX timestamps with fragmentation. Fixes: 4d7ea8ee90e4 ("Bluetooth: L2CAP: Fix handling fragmented length") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 5ca7ac43c58d4b..73472756618a0c 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7415,6 +7415,9 @@ static int l2cap_recv_frag(struct l2cap_conn *conn, struct sk_buff *skb, return -ENOMEM; /* Init rx_len */ conn->rx_len = len; + + skb_set_delivery_time(conn->rx_skb, skb->tstamp, + skb->tstamp_type); } /* Copy as much as the rx_skb can hold */ From 14ae3003e73e777c9b36385a7c86f754b50a1821 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 21 Apr 2025 09:31:34 -0700 Subject: [PATCH 1188/2924] Drivers: hv: Fix bad ref to hv_synic_eventring_tail when CPU goes offline When a CPU goes offline, hv_common_cpu_die() frees the hv_synic_eventring_tail memory for the CPU. But in a normal VM (i.e., not running in the root partition) the per-CPU memory has not been allocated, resulting in a bad memory reference and oops when computing the argument to kfree(). Fix this by freeing the memory only when running in the root partition. Fixes: 04df7ac39943 ("Drivers: hv: Introduce per-cpu event ring tail") Signed-off-by: Michael Kelley Reviewed-by: Nuno Das Neves Link: https://lore.kernel.org/r/20250421163134.2024-1-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20250421163134.2024-1-mhklinux@outlook.com> --- drivers/hv/hv_common.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index a7d7494feaca13..59792e00cecf38 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -566,9 +566,11 @@ int hv_common_cpu_die(unsigned int cpu) * originally allocated memory is reused in hv_common_cpu_init(). */ - synic_eventring_tail = this_cpu_ptr(hv_synic_eventring_tail); - kfree(*synic_eventring_tail); - *synic_eventring_tail = NULL; + if (hv_root_partition()) { + synic_eventring_tail = this_cpu_ptr(hv_synic_eventring_tail); + kfree(*synic_eventring_tail); + *synic_eventring_tail = NULL; + } return 0; } From d934a93bbcccd551c142206b8129903d18126261 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Mon, 10 Feb 2025 23:45:05 +0100 Subject: [PATCH 1189/2924] clk: rockchip: rk3576: define clk_otp_phy_g The phy clock of the OTP block is also present, but was not defined so far. Though its clk-id already existed, so just define its location. Tested-by: Nicolas Frattaroli Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20250210224510.1194963-2-heiko@sntech.de Signed-off-by: Heiko Stuebner --- drivers/clk/rockchip/clk-rk3576.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/rockchip/clk-rk3576.c b/drivers/clk/rockchip/clk-rk3576.c index 595e010341f73a..be703f250197af 100644 --- a/drivers/clk/rockchip/clk-rk3576.c +++ b/drivers/clk/rockchip/clk-rk3576.c @@ -541,6 +541,8 @@ static struct rockchip_clk_branch rk3576_clk_branches[] __initdata = { RK3576_CLKGATE_CON(5), 14, GFLAGS), GATE(CLK_OTPC_AUTO_RD_G, "clk_otpc_auto_rd_g", "xin24m", 0, RK3576_CLKGATE_CON(5), 15, GFLAGS), + GATE(CLK_OTP_PHY_G, "clk_otp_phy_g", "xin24m", 0, + RK3576_CLKGATE_CON(6), 0, GFLAGS), COMPOSITE(CLK_MIPI_CAMERAOUT_M0, "clk_mipi_cameraout_m0", mux_24m_spll_gpll_cpll_p, 0, RK3576_CLKSEL_CON(38), 8, 2, MFLAGS, 0, 8, DFLAGS, RK3576_CLKGATE_CON(6), 3, GFLAGS), From e86e9134e1d1c90a960dd57f59ce574d27b9a124 Mon Sep 17 00:00:00 2001 From: Sean Heelan Date: Sat, 19 Apr 2025 19:59:28 +0100 Subject: [PATCH 1190/2924] ksmbd: fix use-after-free in kerberos authentication Setting sess->user = NULL was introduced to fix the dangling pointer created by ksmbd_free_user. However, it is possible another thread could be operating on the session and make use of sess->user after it has been passed to ksmbd_free_user but before sess->user is set to NULL. Cc: stable@vger.kernel.org Signed-off-by: Sean Heelan Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/auth.c | 14 +++++++++++++- fs/smb/server/smb2pdu.c | 5 ----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/smb/server/auth.c b/fs/smb/server/auth.c index 83caa384974932..b3d121052408cc 100644 --- a/fs/smb/server/auth.c +++ b/fs/smb/server/auth.c @@ -550,7 +550,19 @@ int ksmbd_krb5_authenticate(struct ksmbd_session *sess, char *in_blob, retval = -ENOMEM; goto out; } - sess->user = user; + + if (!sess->user) { + /* First successful authentication */ + sess->user = user; + } else { + if (!ksmbd_compare_user(sess->user, user)) { + ksmbd_debug(AUTH, "different user tried to reuse session\n"); + retval = -EPERM; + ksmbd_free_user(user); + goto out; + } + ksmbd_free_user(user); + } memcpy(sess->sess_key, resp->payload, resp->session_key_len); memcpy(out_blob, resp->payload + resp->session_key_len, diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 372021b3f2632c..acc05657cfc72a 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1607,11 +1607,6 @@ static int krb5_authenticate(struct ksmbd_work *work, if (prev_sess_id && prev_sess_id != sess->id) destroy_previous_session(conn, sess->user, prev_sess_id); - if (sess->state == SMB2_SESSION_VALID) { - ksmbd_free_user(sess->user); - sess->user = NULL; - } - retval = ksmbd_krb5_authenticate(sess, in_blob, in_len, out_blob, &out_len); if (retval) { From 2fc9feff45d92a92cd5f96487655d5be23fb7e2b Mon Sep 17 00:00:00 2001 From: Sean Heelan Date: Mon, 21 Apr 2025 15:39:29 +0000 Subject: [PATCH 1191/2924] ksmbd: fix use-after-free in session logoff The sess->user object can currently be in use by another thread, for example if another connection has sent a session setup request to bind to the session being free'd. The handler for that connection could be in the smb2_sess_setup function which makes use of sess->user. Cc: stable@vger.kernel.org Signed-off-by: Sean Heelan Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index acc05657cfc72a..46aa082457424b 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -2249,10 +2249,6 @@ int smb2_session_logoff(struct ksmbd_work *work) sess->state = SMB2_SESSION_EXPIRED; up_write(&conn->session_lock); - if (sess->user) { - ksmbd_free_user(sess->user); - sess->user = NULL; - } ksmbd_all_conn_set_status(sess_id, KSMBD_SESS_NEED_SETUP); rsp->StructureSize = cpu_to_le16(4); From f0007910784a61556e94c42b401a38116a899c73 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Fri, 25 Apr 2025 21:37:10 +0000 Subject: [PATCH 1192/2924] selftests/bpf: Correct typo in __clang_major__ macro Make sure that CAN_USE_BPF_ST test (compute_live_registers/store) is enabled when __clang_major__ >= 18. Fixes: 2ea8f6a1cda7 ("selftests/bpf: test cases for compute_live_registers()") Signed-off-by: Peilin Ye Link: https://lore.kernel.org/r/20250425213712.1542077-1-yepeilin@google.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/bpf_misc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 13a2e22f546583..863df7c0fdd027 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -221,7 +221,7 @@ #define CAN_USE_GOTOL #endif -#if _clang_major__ >= 18 +#if __clang_major__ >= 18 #define CAN_USE_BPF_ST #endif From 4c2227656d9003f4d77afc76f34dd81b95e4c2c4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 23 Apr 2025 15:36:00 +0200 Subject: [PATCH 1193/2924] vmxnet3: Fix malformed packet sizing in vmxnet3_process_xdp vmxnet3 driver's XDP handling is buggy for packet sizes using ring0 (that is, packet sizes between 128 - 3k bytes). We noticed MTU-related connectivity issues with Cilium's service load- balancing in case of vmxnet3 as NIC underneath. A simple curl to a HTTP backend service where the XDP LB was doing IPIP encap led to overly large packet sizes but only for *some* of the packets (e.g. HTTP GET request) while others (e.g. the prior TCP 3WHS) looked completely fine on the wire. In fact, the pcap recording on the backend node actually revealed that the node with the XDP LB was leaking uninitialized kernel data onto the wire for the affected packets, for example, while the packets should have been 152 bytes their actual size was 1482 bytes, so the remainder after 152 bytes was padded with whatever other data was in that page at the time (e.g. we saw user/payload data from prior processed packets). We only noticed this through an MTU issue, e.g. when the XDP LB node and the backend node both had the same MTU (e.g. 1500) then the curl request got dropped on the backend node's NIC given the packet was too large even though the IPIP-encapped packet normally would never even come close to the MTU limit. Lowering the MTU on the XDP LB (e.g. 1480) allowed to let the curl request succeed (which also indicates that the kernel ignored the padding, and thus the issue wasn't very user-visible). Commit e127ce7699c1 ("vmxnet3: Fix missing reserved tailroom") was too eager to also switch xdp_prepare_buff() from rcd->len to rbi->len. It really needs to stick to rcd->len which is the actual packet length from the descriptor. The latter we also feed into vmxnet3_process_xdp_small(), by the way, and it indicates the correct length needed to initialize the xdp->{data,data_end} parts. For e127ce7699c1 ("vmxnet3: Fix missing reserved tailroom") the relevant part was adapting xdp_init_buff() to address the warning given the xdp_data_hard_end() depends on xdp->frame_sz. With that fixed, traffic on the wire looks good again. Fixes: e127ce7699c1 ("vmxnet3: Fix missing reserved tailroom") Signed-off-by: Daniel Borkmann Tested-by: Andrew Sauber Cc: Anton Protopopov Cc: William Tu Cc: Martin Zaharinov Cc: Ronak Doshi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250423133600.176689-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- drivers/net/vmxnet3/vmxnet3_xdp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/vmxnet3/vmxnet3_xdp.c b/drivers/net/vmxnet3/vmxnet3_xdp.c index 616ecc38d1726c..5f470499e60024 100644 --- a/drivers/net/vmxnet3/vmxnet3_xdp.c +++ b/drivers/net/vmxnet3/vmxnet3_xdp.c @@ -397,7 +397,7 @@ vmxnet3_process_xdp(struct vmxnet3_adapter *adapter, xdp_init_buff(&xdp, PAGE_SIZE, &rq->xdp_rxq); xdp_prepare_buff(&xdp, page_address(page), rq->page_pool->p.offset, - rbi->len, false); + rcd->len, false); xdp_buff_clear_frags_flag(&xdp); xdp_prog = rcu_dereference(rq->adapter->xdp_bpf_prog); From 5ec6d7d737a491256cd37e33910f7ac1978db591 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 25 Apr 2025 01:37:33 +0300 Subject: [PATCH 1194/2924] net: mscc: ocelot: delete PVID VLAN when readding it as non-PVID The following set of commands: ip link add br0 type bridge vlan_filtering 1 # vlan_default_pvid 1 is implicit ip link set swp0 master br0 bridge vlan add dev swp0 vid 1 should result in the dropping of untagged and 802.1p-tagged traffic, but we see that it continues to be accepted. Whereas, had we deleted VID 1 instead, the aforementioned dropping would have worked This is because the ANA_PORT_DROP_CFG update logic doesn't run, because ocelot_vlan_add() only calls ocelot_port_set_pvid() if the new VLAN has the BRIDGE_VLAN_INFO_PVID flag. Similar to other drivers like mt7530_port_vlan_add() which handle this case correctly, we need to test whether the VLAN we're changing used to have the BRIDGE_VLAN_INFO_PVID flag, but lost it now. That amounts to a PVID deletion and should be treated as such. Regarding blame attribution: this never worked properly since the introduction of bridge VLAN filtering in commit 7142529f1688 ("net: mscc: ocelot: add VLAN filtering"). However, there was a significant paradigm shift which aligned the ANA_PORT_DROP_CFG register with the PVID concept rather than with the native VLAN concept, and that change wasn't targeted for 'stable'. Realistically, that is as far as this fix needs to be propagated to. Fixes: be0576fed6d3 ("net: mscc: ocelot: move the logic to drop 802.1p traffic to the pvid deletion") Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250424223734.3096202-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index ef93df52088710..08bee56aea35f3 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -830,6 +830,7 @@ EXPORT_SYMBOL(ocelot_vlan_prepare); int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, bool untagged) { + struct ocelot_port *ocelot_port = ocelot->ports[port]; int err; /* Ignore VID 0 added to our RX filter by the 8021q module, since @@ -849,6 +850,11 @@ int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, ocelot_bridge_vlan_find(ocelot, vid)); if (err) return err; + } else if (ocelot_port->pvid_vlan && + ocelot_bridge_vlan_find(ocelot, vid) == ocelot_port->pvid_vlan) { + err = ocelot_port_set_pvid(ocelot, port, NULL); + if (err) + return err; } /* Untagged egress vlan clasification */ From bf9de1dcd0eecd16020a677c900a70ea9b0a9714 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 25 Apr 2025 01:37:34 +0300 Subject: [PATCH 1195/2924] selftests: net: bridge_vlan_aware: test untagged/8021p-tagged with and without PVID Recent discussions around commit ad1afb003939 ("vlan_dev: VLAN 0 should be treated as "no vlan tag" (802.1p packet)") have sparked the question what happens with the DSA (and possibly other switchdev) data path when the bridge says that ports should have no PVID VLAN, but the 8021q module, as the result of a NETDEV_UP event, decides it should add VID 0 to the RX filter of those bridge ports. Do those bridge ports receive packets tagged with VID 0 or not, now? We don't know, there is no test. In the veth realm, this passes trivially, because veth is not VLAN filtering and this, the 8021q module lacks the instinct to add VID 0 in the first place. In the realm of VLAN filtering NICs with no switchdev offload, this should also pass, because the VLAN groups of the software bridge are consulted, where it can clearly be seen that a PVID is missing, even though the packet was initially accepted by the NIC. The test only poses a challenge for switchdev drivers, which usually have to program to hardware both VLANs from RX filtering, as well as from switchdev. Especially when a switchdev port joins a VLAN-aware bridge, it is unavoidable that it gains the NETIF_F_HW_VLAN_CTAG_FILTER feature, i.e. any 8021q uppers that the bridge port may have must also be committed to the RX filtering table of the interface. When a VLAN-tagged packet is physically received by the port, it is initially indistinguishable whether it will reach the bridge data path or the 8021q upper data path. That is rather the final step of the new tests that we introduce. We need to build context up to that stage, which means the following: - we need to test that 802.1p (VID 0) tagged traffic is received in the first place (on bridge ports with a valid PVID). This is the "8021p" test. - we need to test that the usual paths of reaching a configuration with no PVID on a bridge port are all covered and they all reach the same state. Signed-off-by: Vladimir Oltean Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Link: https://patch.msgid.link/20250424223734.3096202-2-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- .../net/forwarding/bridge_vlan_aware.sh | 96 ++++++++++++++++++- 1 file changed, 95 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh index 90f8a244ea9015..e59fba366a0a67 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid" +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid 8021p drop_untagged" NUM_NETIFS=4 CHECK_TC="yes" source lib.sh @@ -194,6 +194,100 @@ other_tpid() tc qdisc del dev $h2 clsact } +8021p_do() +{ + local should_fail=$1; shift + local mac=de:ad:be:ef:13:37 + + tc filter add dev $h2 ingress protocol all pref 1 handle 101 \ + flower dst_mac $mac action drop + + $MZ -q $h1 -c 1 -b $mac -a own "81:00 00:00 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa" + sleep 1 + + tc -j -s filter show dev $h2 ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_err_fail $should_fail $? "802.1p-tagged reception" + + tc filter del dev $h2 ingress pref 1 +} + +8021p() +{ + RET=0 + + tc qdisc add dev $h2 clsact + ip link set $h2 promisc on + + # Test that with the default_pvid, 1, packets tagged with VID 0 are + # accepted. + 8021p_do 0 + + # Test that packets tagged with VID 0 are still accepted after changing + # the default_pvid. + ip link set br0 type bridge vlan_default_pvid 10 + 8021p_do 0 + + log_test "Reception of 802.1p-tagged traffic" + + ip link set $h2 promisc off + tc qdisc del dev $h2 clsact +} + +send_untagged_and_8021p() +{ + ping_do $h1 192.0.2.2 + check_fail $? + + 8021p_do 1 +} + +drop_untagged() +{ + RET=0 + + tc qdisc add dev $h2 clsact + ip link set $h2 promisc on + + # Test that with no PVID, untagged and 802.1p-tagged traffic is + # dropped. + ip link set br0 type bridge vlan_default_pvid 1 + + # First we reconfigure the default_pvid, 1, as a non-PVID VLAN. + bridge vlan add dev $swp1 vid 1 untagged + send_untagged_and_8021p + bridge vlan add dev $swp1 vid 1 pvid untagged + + # Next we try to delete VID 1 altogether + bridge vlan del dev $swp1 vid 1 + send_untagged_and_8021p + bridge vlan add dev $swp1 vid 1 pvid untagged + + # Set up the bridge without a default_pvid, then check that the 8021q + # module, when the bridge port goes down and then up again, does not + # accidentally re-enable untagged packet reception. + ip link set br0 type bridge vlan_default_pvid 0 + ip link set $swp1 down + ip link set $swp1 up + setup_wait + send_untagged_and_8021p + + # Remove swp1 as a bridge port and let it rejoin the bridge while it + # has no default_pvid. + ip link set $swp1 nomaster + ip link set $swp1 master br0 + send_untagged_and_8021p + + # Restore settings + ip link set br0 type bridge vlan_default_pvid 1 + + log_test "Dropping of untagged and 802.1p-tagged traffic with no PVID" + + ip link set $h2 promisc off + tc qdisc del dev $h2 clsact +} + trap cleanup EXIT setup_prepare From 765f253e28909f161b0211f85cf0431cfee7d6df Mon Sep 17 00:00:00 2001 From: Christian Heusel Date: Thu, 24 Apr 2025 16:00:28 +0200 Subject: [PATCH 1196/2924] Revert "rndis_host: Flag RNDIS modems as WWAN devices" This reverts commit 67d1a8956d2d62fe6b4c13ebabb57806098511d8. Since this commit has been proven to be problematic for the setup of USB-tethered ethernet connections and the related breakage is very noticeable for users it should be reverted until a fixed version of the change can be rolled out. Closes: https://lore.kernel.org/all/e0df2d85-1296-4317-b717-bd757e3ab928@heusel.eu/ Link: https://chaos.social/@gromit/114377862699921553 Link: https://bugzilla.kernel.org/show_bug.cgi?id=220002 Link: https://bugs.gentoo.org/953555 Link: https://bbs.archlinux.org/viewtopic.php?id=304892 Cc: stable@vger.kernel.org Acked-by: Lubomir Rintel Signed-off-by: Christian Heusel Link: https://patch.msgid.link/20250424-usb-tethering-fix-v1-1-b65cf97c740e@heusel.eu Signed-off-by: Jakub Kicinski --- drivers/net/usb/rndis_host.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index bb0bf141587274..7b3739b29c8f72 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -630,16 +630,6 @@ static const struct driver_info zte_rndis_info = { .tx_fixup = rndis_tx_fixup, }; -static const struct driver_info wwan_rndis_info = { - .description = "Mobile Broadband RNDIS device", - .flags = FLAG_WWAN | FLAG_POINTTOPOINT | FLAG_FRAMING_RN | FLAG_NO_SETINT, - .bind = rndis_bind, - .unbind = rndis_unbind, - .status = rndis_status, - .rx_fixup = rndis_rx_fixup, - .tx_fixup = rndis_tx_fixup, -}; - /*-------------------------------------------------------------------------*/ static const struct usb_device_id products [] = { @@ -676,11 +666,9 @@ static const struct usb_device_id products [] = { USB_INTERFACE_INFO(USB_CLASS_WIRELESS_CONTROLLER, 1, 3), .driver_info = (unsigned long) &rndis_info, }, { - /* Mobile Broadband Modem, seen in Novatel Verizon USB730L and - * Telit FN990A (RNDIS) - */ + /* Novatel Verizon USB730L */ USB_INTERFACE_INFO(USB_CLASS_MISC, 4, 1), - .driver_info = (unsigned long)&wwan_rndis_info, + .driver_info = (unsigned long) &rndis_info, }, { }, // END }; From 8548c84c004be3da4ffbe35ed0589041a4050c03 Mon Sep 17 00:00:00 2001 From: Sathesh B Edara Date: Thu, 24 Apr 2025 06:39:44 -0700 Subject: [PATCH 1197/2924] octeon_ep_vf: Resolve netdevice usage count issue The netdevice usage count increases during transmit queue timeouts because netdev_hold is called in ndo_tx_timeout, scheduling a task to reinitialize the card. Although netdev_put is called at the end of the scheduled work, rtnl_unlock checks the reference count during cleanup. This could cause issues if transmit timeout is called on multiple queues. Fixes: cb7dd712189f ("octeon_ep_vf: Add driver framework and device initialization") Signed-off-by: Sathesh B Edara Link: https://patch.msgid.link/20250424133944.28128-1-sedara@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c index 18c922dd5fc64d..ccb69bc5c95292 100644 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c @@ -835,7 +835,9 @@ static void octep_vf_tx_timeout(struct net_device *netdev, unsigned int txqueue) struct octep_vf_device *oct = netdev_priv(netdev); netdev_hold(netdev, NULL, GFP_ATOMIC); - schedule_work(&oct->tx_timeout_task); + if (!schedule_work(&oct->tx_timeout_task)) + netdev_put(netdev, NULL); + } static int octep_vf_set_mac(struct net_device *netdev, void *p) From 8f7ae5a85137b913cb97e2d24409d36548d0bab1 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 24 Apr 2025 05:55:47 -0700 Subject: [PATCH 1198/2924] bnxt_en: improve TX timestamping FIFO configuration Reconfiguration of netdev may trigger close/open procedure which can break FIFO status by adjusting the amount of empty slots for TX timestamps. But it is not really needed because timestamps for the packets sent over the wire still can be retrieved. On the other side, during netdev close procedure any skbs waiting for TX timestamps can be leaked because there is no cleaning procedure called. Free skbs waiting for TX timestamps when closing netdev. Fixes: 8aa2a79e9b95 ("bnxt_en: Increase the max total outstanding PTP TX packets to 4") Reviewed-by: Michael Chan Reviewed-by: Pavan Chebbi Signed-off-by: Vadim Fedorenko Link: https://patch.msgid.link/20250424125547.460632-1-vadfed@meta.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 ++-- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 29 ++++++++++++++----- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h | 1 + 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c8e3468eee612a..2c8e2c19d85484 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3414,6 +3414,9 @@ static void bnxt_free_tx_skbs(struct bnxt *bp) bnxt_free_one_tx_ring_skbs(bp, txr, i); } + + if (bp->ptp_cfg && !(bp->fw_cap & BNXT_FW_CAP_TX_TS_CMP)) + bnxt_ptp_free_txts_skbs(bp->ptp_cfg); } static void bnxt_free_one_rx_ring(struct bnxt *bp, struct bnxt_rx_ring_info *rxr) @@ -12797,8 +12800,6 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) /* VF-reps may need to be re-opened after the PF is re-opened */ if (BNXT_PF(bp)) bnxt_vf_reps_open(bp); - if (bp->ptp_cfg && !(bp->fw_cap & BNXT_FW_CAP_TX_TS_CMP)) - WRITE_ONCE(bp->ptp_cfg->tx_avail, BNXT_MAX_TX_TS); bnxt_ptp_init_rtc(bp, true); bnxt_ptp_cfg_tstamp_filters(bp); if (BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index 2d4e19b96ee744..0669d43472f51b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -794,6 +794,27 @@ static long bnxt_ptp_ts_aux_work(struct ptp_clock_info *ptp_info) return HZ; } +void bnxt_ptp_free_txts_skbs(struct bnxt_ptp_cfg *ptp) +{ + struct bnxt_ptp_tx_req *txts_req; + u16 cons = ptp->txts_cons; + + /* make sure ptp aux worker finished with + * possible BNXT_STATE_OPEN set + */ + ptp_cancel_worker_sync(ptp->ptp_clock); + + ptp->tx_avail = BNXT_MAX_TX_TS; + while (cons != ptp->txts_prod) { + txts_req = &ptp->txts_req[cons]; + if (!IS_ERR_OR_NULL(txts_req->tx_skb)) + dev_kfree_skb_any(txts_req->tx_skb); + cons = NEXT_TXTS(cons); + } + ptp->txts_cons = cons; + ptp_schedule_worker(ptp->ptp_clock, 0); +} + int bnxt_ptp_get_txts_prod(struct bnxt_ptp_cfg *ptp, u16 *prod) { spin_lock_bh(&ptp->ptp_tx_lock); @@ -1105,7 +1126,6 @@ int bnxt_ptp_init(struct bnxt *bp) void bnxt_ptp_clear(struct bnxt *bp) { struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; - int i; if (!ptp) return; @@ -1117,12 +1137,5 @@ void bnxt_ptp_clear(struct bnxt *bp) kfree(ptp->ptp_info.pin_config); ptp->ptp_info.pin_config = NULL; - for (i = 0; i < BNXT_MAX_TX_TS; i++) { - if (ptp->txts_req[i].tx_skb) { - dev_kfree_skb_any(ptp->txts_req[i].tx_skb); - ptp->txts_req[i].tx_skb = NULL; - } - } - bnxt_unmap_ptp_regs(bp); } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h index a95f05e9c579b7..0481161d26ef5d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h @@ -162,6 +162,7 @@ int bnxt_ptp_cfg_tstamp_filters(struct bnxt *bp); void bnxt_ptp_reapply_pps(struct bnxt *bp); int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr); int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr); +void bnxt_ptp_free_txts_skbs(struct bnxt_ptp_cfg *ptp); int bnxt_ptp_get_txts_prod(struct bnxt_ptp_cfg *ptp, u16 *prod); void bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb, u16 prod); int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts); From fb8e9f59d6f292c3d9fea6c155c22ea5fc3053ab Mon Sep 17 00:00:00 2001 From: Yuli Wang Date: Thu, 24 Apr 2025 20:15:22 +0800 Subject: [PATCH 1199/2924] LoongArch: Select ARCH_USE_MEMTEST As of commit dce44566192e ("mm/memtest: add ARCH_USE_MEMTEST"), architectures must select ARCH_USE_MEMTESET to enable CONFIG_MEMTEST. Commit 628c3bb40e9a ("LoongArch: Add boot and setup routines") added support for early_memtest but did not select ARCH_USE_MEMTESET. Fixes: 628c3bb40e9a ("LoongArch: Add boot and setup routines") Tested-by: Erpeng Xu Tested-by: Yuli Wang Signed-off-by: Yuli Wang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 067c0b994648cc..1a2cf012b8f2f5 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -73,6 +73,7 @@ config LOONGARCH select ARCH_SUPPORTS_RT select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS select ARCH_WANT_DEFAULT_BPF_JIT From bb0511d59db9b3e40c8d51f0d151ccd0fd44071d Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 24 Apr 2025 20:15:41 +0800 Subject: [PATCH 1200/2924] LoongArch: Make regs_irqs_disabled() more clear In the current code, the definition of regs_irqs_disabled() is actually "!(regs->csr_prmd & CSR_CRMD_IE)" because arch_irqs_disabled_flags() is defined as "!(flags & CSR_CRMD_IE)", it looks a little strange. Define regs_irqs_disabled() as !(regs->csr_prmd & CSR_PRMD_PIE) directly to make it more clear, no functional change. While at it, the return value of regs_irqs_disabled() is true or false, so change its type to reflect that and also make it always inline. Fixes: 803b0fc5c3f2 ("LoongArch: Add process management") Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/ptrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/asm/ptrace.h b/arch/loongarch/include/asm/ptrace.h index f3ddaed9ef7f08..a5b63c84f8541a 100644 --- a/arch/loongarch/include/asm/ptrace.h +++ b/arch/loongarch/include/asm/ptrace.h @@ -33,9 +33,9 @@ struct pt_regs { unsigned long __last[]; } __aligned(8); -static inline int regs_irqs_disabled(struct pt_regs *regs) +static __always_inline bool regs_irqs_disabled(struct pt_regs *regs) { - return arch_irqs_disabled_flags(regs->csr_prmd); + return !(regs->csr_prmd & CSR_PRMD_PIE); } static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) From cc73cc6bcdb5f959670e3ff9abdc62461452ddff Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 24 Apr 2025 20:15:41 +0800 Subject: [PATCH 1201/2924] LoongArch: Make do_xyz() exception handlers more robust Currently, interrupts need to be disabled before single-step mode is set, it requires that CSR_PRMD_PIE be cleared in save_local_irqflag() which is called by setup_singlestep(), this is reasonable. But in the first kprobe breakpoint exception, if the irq is enabled at the beginning of do_bp(), it will not be disabled at the end of do_bp() due to the CSR_PRMD_PIE has been cleared in save_local_irqflag(). So for this case, it may corrupt exception context when restoring the exception after do_bp() in handle_bp(), this is not reasonable. In order to restore exception safely in handle_bp(), it needs to ensure the irq is disabled at the end of do_bp(), so just add a local variable to record the original interrupt status in the parent context, then use it as the check condition to enable and disable irq in do_bp(). While at it, do the similar thing for other do_xyz() exception handlers to make them more robust. Fixes: 6d4cc40fb5f5 ("LoongArch: Add kprobes support") Suggested-by: Jinyang He Suggested-by: Huacai Chen Co-developed-by: Tianyang Zhang Signed-off-by: Tianyang Zhang Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/traps.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 2ec3106c0da3d1..47fc2de6d15018 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -553,9 +553,10 @@ asmlinkage void noinstr do_ale(struct pt_regs *regs) die_if_kernel("Kernel ale access", regs); force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr); #else + bool pie = regs_irqs_disabled(regs); unsigned int *pc; - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_enable(); perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->csr_badvaddr); @@ -582,7 +583,7 @@ asmlinkage void noinstr do_ale(struct pt_regs *regs) die_if_kernel("Kernel ale access", regs); force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr); out: - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_disable(); #endif irqentry_exit(regs, state); @@ -621,12 +622,13 @@ static void bug_handler(struct pt_regs *regs) asmlinkage void noinstr do_bce(struct pt_regs *regs) { bool user = user_mode(regs); + bool pie = regs_irqs_disabled(regs); unsigned long era = exception_era(regs); u64 badv = 0, lower = 0, upper = ULONG_MAX; union loongarch_instruction insn; irqentry_state_t state = irqentry_enter(regs); - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_enable(); current->thread.trap_nr = read_csr_excode(); @@ -692,7 +694,7 @@ asmlinkage void noinstr do_bce(struct pt_regs *regs) force_sig_bnderr((void __user *)badv, (void __user *)lower, (void __user *)upper); out: - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_disable(); irqentry_exit(regs, state); @@ -710,11 +712,12 @@ asmlinkage void noinstr do_bce(struct pt_regs *regs) asmlinkage void noinstr do_bp(struct pt_regs *regs) { bool user = user_mode(regs); + bool pie = regs_irqs_disabled(regs); unsigned int opcode, bcode; unsigned long era = exception_era(regs); irqentry_state_t state = irqentry_enter(regs); - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_enable(); if (__get_inst(&opcode, (u32 *)era, user)) @@ -780,7 +783,7 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs) } out: - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_disable(); irqentry_exit(regs, state); @@ -1015,6 +1018,7 @@ static void init_restore_lbt(void) asmlinkage void noinstr do_lbt(struct pt_regs *regs) { + bool pie = regs_irqs_disabled(regs); irqentry_state_t state = irqentry_enter(regs); /* @@ -1024,7 +1028,7 @@ asmlinkage void noinstr do_lbt(struct pt_regs *regs) * (including the user using 'MOVGR2GCSR' to turn on TM, which * will not trigger the BTE), we need to check PRMD first. */ - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_enable(); if (!cpu_has_lbt) { @@ -1038,7 +1042,7 @@ asmlinkage void noinstr do_lbt(struct pt_regs *regs) preempt_enable(); out: - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_disable(); irqentry_exit(regs, state); From 2ef174b13344b3b4554d3d28e6f9e2a2c1d3138f Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 24 Apr 2025 20:15:41 +0800 Subject: [PATCH 1202/2924] LoongArch: Handle fp, lsx, lasx and lbt assembly symbols Like the other relevant symbols, export some fp, lsx, lasx and lbt assembly symbols and put the function declarations in header files rather than source files. While at it, use "asmlinkage" for the other existing C prototypes of assembly functions and also do not use the "extern" keyword with function declarations according to the document coding-style.rst. Cc: stable@vger.kernel.org # 6.6+ Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/fpu.h | 39 +++++++++++++++++++------------- arch/loongarch/include/asm/lbt.h | 10 +++++--- arch/loongarch/kernel/fpu.S | 6 +++++ arch/loongarch/kernel/lbt.S | 4 ++++ arch/loongarch/kernel/signal.c | 21 ----------------- 5 files changed, 40 insertions(+), 40 deletions(-) diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h index 3177674228f896..45514f314664d8 100644 --- a/arch/loongarch/include/asm/fpu.h +++ b/arch/loongarch/include/asm/fpu.h @@ -22,22 +22,29 @@ struct sigcontext; #define kernel_fpu_available() cpu_has_fpu -extern void kernel_fpu_begin(void); -extern void kernel_fpu_end(void); - -extern void _init_fpu(unsigned int); -extern void _save_fp(struct loongarch_fpu *); -extern void _restore_fp(struct loongarch_fpu *); - -extern void _save_lsx(struct loongarch_fpu *fpu); -extern void _restore_lsx(struct loongarch_fpu *fpu); -extern void _init_lsx_upper(void); -extern void _restore_lsx_upper(struct loongarch_fpu *fpu); - -extern void _save_lasx(struct loongarch_fpu *fpu); -extern void _restore_lasx(struct loongarch_fpu *fpu); -extern void _init_lasx_upper(void); -extern void _restore_lasx_upper(struct loongarch_fpu *fpu); + +void kernel_fpu_begin(void); +void kernel_fpu_end(void); + +asmlinkage void _init_fpu(unsigned int); +asmlinkage void _save_fp(struct loongarch_fpu *); +asmlinkage void _restore_fp(struct loongarch_fpu *); +asmlinkage int _save_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); +asmlinkage int _restore_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); + +asmlinkage void _save_lsx(struct loongarch_fpu *fpu); +asmlinkage void _restore_lsx(struct loongarch_fpu *fpu); +asmlinkage void _init_lsx_upper(void); +asmlinkage void _restore_lsx_upper(struct loongarch_fpu *fpu); +asmlinkage int _save_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +asmlinkage int _restore_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); + +asmlinkage void _save_lasx(struct loongarch_fpu *fpu); +asmlinkage void _restore_lasx(struct loongarch_fpu *fpu); +asmlinkage void _init_lasx_upper(void); +asmlinkage void _restore_lasx_upper(struct loongarch_fpu *fpu); +asmlinkage int _save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +asmlinkage int _restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); static inline void enable_lsx(void); static inline void disable_lsx(void); diff --git a/arch/loongarch/include/asm/lbt.h b/arch/loongarch/include/asm/lbt.h index e671978bf5523f..38566574e56214 100644 --- a/arch/loongarch/include/asm/lbt.h +++ b/arch/loongarch/include/asm/lbt.h @@ -12,9 +12,13 @@ #include #include -extern void _init_lbt(void); -extern void _save_lbt(struct loongarch_lbt *); -extern void _restore_lbt(struct loongarch_lbt *); +asmlinkage void _init_lbt(void); +asmlinkage void _save_lbt(struct loongarch_lbt *); +asmlinkage void _restore_lbt(struct loongarch_lbt *); +asmlinkage int _save_lbt_context(void __user *regs, void __user *eflags); +asmlinkage int _restore_lbt_context(void __user *regs, void __user *eflags); +asmlinkage int _save_ftop_context(void __user *ftop); +asmlinkage int _restore_ftop_context(void __user *ftop); static inline int is_lbt_enabled(void) { diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index 6ab640101457cc..28caf416ae36e6 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -458,6 +458,7 @@ SYM_FUNC_START(_save_fp_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_save_fp_context) +EXPORT_SYMBOL_GPL(_save_fp_context) /* * a0: fpregs @@ -471,6 +472,7 @@ SYM_FUNC_START(_restore_fp_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_restore_fp_context) +EXPORT_SYMBOL_GPL(_restore_fp_context) /* * a0: fpregs @@ -484,6 +486,7 @@ SYM_FUNC_START(_save_lsx_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_save_lsx_context) +EXPORT_SYMBOL_GPL(_save_lsx_context) /* * a0: fpregs @@ -497,6 +500,7 @@ SYM_FUNC_START(_restore_lsx_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_restore_lsx_context) +EXPORT_SYMBOL_GPL(_restore_lsx_context) /* * a0: fpregs @@ -510,6 +514,7 @@ SYM_FUNC_START(_save_lasx_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_save_lasx_context) +EXPORT_SYMBOL_GPL(_save_lasx_context) /* * a0: fpregs @@ -523,6 +528,7 @@ SYM_FUNC_START(_restore_lasx_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_restore_lasx_context) +EXPORT_SYMBOL_GPL(_restore_lasx_context) .L_fpu_fault: li.w a0, -EFAULT # failure diff --git a/arch/loongarch/kernel/lbt.S b/arch/loongarch/kernel/lbt.S index 001f061d226ab5..71678912d24ce2 100644 --- a/arch/loongarch/kernel/lbt.S +++ b/arch/loongarch/kernel/lbt.S @@ -90,6 +90,7 @@ SYM_FUNC_START(_save_lbt_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_save_lbt_context) +EXPORT_SYMBOL_GPL(_save_lbt_context) /* * a0: scr @@ -110,6 +111,7 @@ SYM_FUNC_START(_restore_lbt_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_restore_lbt_context) +EXPORT_SYMBOL_GPL(_restore_lbt_context) /* * a0: ftop @@ -120,6 +122,7 @@ SYM_FUNC_START(_save_ftop_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_save_ftop_context) +EXPORT_SYMBOL_GPL(_save_ftop_context) /* * a0: ftop @@ -150,6 +153,7 @@ SYM_FUNC_START(_restore_ftop_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_restore_ftop_context) +EXPORT_SYMBOL_GPL(_restore_ftop_context) .L_lbt_fault: li.w a0, -EFAULT # failure diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c index 7a555b60017193..4740cb5b238898 100644 --- a/arch/loongarch/kernel/signal.c +++ b/arch/loongarch/kernel/signal.c @@ -51,27 +51,6 @@ #define lock_lbt_owner() ({ preempt_disable(); pagefault_disable(); }) #define unlock_lbt_owner() ({ pagefault_enable(); preempt_enable(); }) -/* Assembly functions to move context to/from the FPU */ -extern asmlinkage int -_save_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); -extern asmlinkage int -_restore_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); -extern asmlinkage int -_save_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); -extern asmlinkage int -_restore_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); -extern asmlinkage int -_save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); -extern asmlinkage int -_restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); - -#ifdef CONFIG_CPU_HAS_LBT -extern asmlinkage int _save_lbt_context(void __user *regs, void __user *eflags); -extern asmlinkage int _restore_lbt_context(void __user *regs, void __user *eflags); -extern asmlinkage int _save_ftop_context(void __user *ftop); -extern asmlinkage int _restore_ftop_context(void __user *ftop); -#endif - struct rt_sigframe { struct siginfo rs_info; struct ucontext rs_uctx; From c37325cbd91abe3bfab280b3b09947155abe8e07 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Thu, 24 Apr 2025 20:15:41 +0800 Subject: [PATCH 1203/2924] LoongArch: Remove a bogus reference to ZONE_DMA Remove dead code. LoongArch does not have a DMA memory zone (24bit DMA). The architecture does not even define MAX_DMA_PFN. Cc: stable@vger.kernel.org Reviewed-by: Mike Rapoport (Microsoft) Signed-off-by: Petr Tesarik Signed-off-by: Huacai Chen --- arch/loongarch/mm/init.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index fdb7f73ad16016..06f11d9e4ec113 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -65,9 +65,6 @@ void __init paging_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; -#ifdef CONFIG_ZONE_DMA - max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; -#endif #ifdef CONFIG_ZONE_DMA32 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; #endif From bd51834d1cf65a2c801295d230c220aeebf87a73 Mon Sep 17 00:00:00 2001 From: Ming Wang Date: Thu, 24 Apr 2025 20:15:47 +0800 Subject: [PATCH 1204/2924] LoongArch: Return NULL from huge_pte_offset() for invalid PMD LoongArch's huge_pte_offset() currently returns a pointer to a PMD slot even if the underlying entry points to invalid_pte_table (indicating no mapping). Callers like smaps_hugetlb_range() fetch this invalid entry value (the address of invalid_pte_table) via this pointer. The generic is_swap_pte() check then incorrectly identifies this address as a swap entry on LoongArch, because it satisfies the "!pte_present() && !pte_none()" conditions. This misinterpretation, combined with a coincidental match by is_migration_entry() on the address bits, leads to kernel crashes in pfn_swap_entry_to_page(). Fix this at the architecture level by modifying huge_pte_offset() to check the PMD entry's content using pmd_none() before returning. If the entry is invalid (i.e., it points to invalid_pte_table), return NULL instead of the pointer to the slot. Cc: stable@vger.kernel.org Acked-by: Peter Xu Co-developed-by: Hongchen Zhang Signed-off-by: Hongchen Zhang Signed-off-by: Ming Wang Signed-off-by: Huacai Chen --- arch/loongarch/mm/hugetlbpage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/mm/hugetlbpage.c b/arch/loongarch/mm/hugetlbpage.c index e4068906143b33..cea84d7f2b91a1 100644 --- a/arch/loongarch/mm/hugetlbpage.c +++ b/arch/loongarch/mm/hugetlbpage.c @@ -47,7 +47,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, pmd = pmd_offset(pud, addr); } } - return (pte_t *) pmd; + return pmd_none(pmdp_get(pmd)) ? NULL : (pte_t *) pmd; } uint64_t pmd_to_entrylo(unsigned long pmd_val) From 8b2d01fec800081dd68271c01e4d239ef4d7115e Mon Sep 17 00:00:00 2001 From: Yulong Han Date: Thu, 24 Apr 2025 20:15:52 +0800 Subject: [PATCH 1205/2924] LoongArch: KVM: Fix multiple typos of KVM code Fix multiple typos inside arch/loongarch/kvm. Cc: stable@vger.kernel.org Reviewed-by: Yuli Wang Reviewed-by: Bibo Mao Signed-off-by: Yulong Han Signed-off-by: Huacai Chen --- arch/loongarch/kvm/intc/ipi.c | 4 ++-- arch/loongarch/kvm/main.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/kvm/intc/ipi.c b/arch/loongarch/kvm/intc/ipi.c index 93f4acd445236e..fe734dc062ed47 100644 --- a/arch/loongarch/kvm/intc/ipi.c +++ b/arch/loongarch/kvm/intc/ipi.c @@ -111,7 +111,7 @@ static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); srcu_read_unlock(&vcpu->kvm->srcu, idx); if (unlikely(ret)) { - kvm_err("%s: : read date from addr %llx failed\n", __func__, addr); + kvm_err("%s: : read data from addr %llx failed\n", __func__, addr); return ret; } /* Construct the mask by scanning the bit 27-30 */ @@ -127,7 +127,7 @@ static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) ret = kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); srcu_read_unlock(&vcpu->kvm->srcu, idx); if (unlikely(ret)) - kvm_err("%s: : write date to addr %llx failed\n", __func__, addr); + kvm_err("%s: : write data to addr %llx failed\n", __func__, addr); return ret; } diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index d165cd38c6bb38..80ea63d465b8ea 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -296,10 +296,10 @@ int kvm_arch_enable_virtualization_cpu(void) /* * Enable virtualization features granting guest direct control of * certain features: - * GCI=2: Trap on init or unimplement cache instruction. + * GCI=2: Trap on init or unimplemented cache instruction. * TORU=0: Trap on Root Unimplement. * CACTRL=1: Root control cache. - * TOP=0: Trap on Previlege. + * TOP=0: Trap on Privilege. * TOE=0: Trap on Exception. * TIT=0: Trap on Timer. */ From 9ea86232a5520d9d21832d06031ea80f055a6ff8 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Thu, 24 Apr 2025 20:15:52 +0800 Subject: [PATCH 1206/2924] LoongArch: KVM: Fully clear some CSRs when VM reboot Some registers such as LOONGARCH_CSR_ESTAT and LOONGARCH_CSR_GINTC are partly cleared with function _kvm_setcsr(). This comes from the hardware specification, some bits are read only in VM mode, and however they can be written in host mode. So they are partly cleared in VM mode, and can be fully cleared in host mode. These read only bits show pending interrupt or exception status. When VM reset, the read-only bits should be cleared, otherwise vCPU will receive unknown interrupts in boot stage. Here registers LOONGARCH_CSR_ESTAT/LOONGARCH_CSR_GINTC are fully cleared in ioctl KVM_REG_LOONGARCH_VCPU_RESET vCPU reset path. Cc: stable@vger.kernel.org Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/vcpu.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 8e427b37966123..2d3c2a2d1d1cb6 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -902,6 +902,13 @@ static int kvm_set_one_reg(struct kvm_vcpu *vcpu, vcpu->arch.st.guest_addr = 0; memset(&vcpu->arch.irq_pending, 0, sizeof(vcpu->arch.irq_pending)); memset(&vcpu->arch.irq_clear, 0, sizeof(vcpu->arch.irq_clear)); + + /* + * When vCPU reset, clear the ESTAT and GINTC registers + * Other CSR registers are cleared with function _kvm_setcsr(). + */ + kvm_write_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_GINTC, 0); + kvm_write_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_ESTAT, 0); break; default: ret = -EINVAL; From 5add0dbbebd60628b55e5eb8426612dedab7311a Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Thu, 24 Apr 2025 20:15:52 +0800 Subject: [PATCH 1207/2924] LoongArch: KVM: Fix PMU pass-through issue if VM exits to host finally In function kvm_pre_enter_guest(), it prepares to enter guest and check whether there are pending signals or events. And it will not enter guest if there are, PMU pass-through preparation for guest should be cancelled and host should own PMU hardware. Cc: stable@vger.kernel.org Fixes: f4e40ea9f78f ("LoongArch: KVM: Add PMU support for guest") Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/vcpu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 2d3c2a2d1d1cb6..5af32ec62cb16a 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -294,6 +294,7 @@ static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu) vcpu->arch.aux_inuse &= ~KVM_LARCH_SWCSR_LATEST; if (kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending()) { + kvm_lose_pmu(vcpu); /* make sure the vcpu mode has been written */ smp_store_mb(vcpu->mode, OUTSIDE_GUEST_MODE); local_irq_enable(); From 68f9d8974b545668e1be2422240b25a92e304b14 Mon Sep 17 00:00:00 2001 From: Justin Lai Date: Thu, 24 Apr 2025 12:04:44 +0800 Subject: [PATCH 1208/2924] rtase: Modify the condition used to detect overflow in rtase_calc_time_mitigation Fix the following compile error reported by the kernel test robot by modifying the condition used to detect overflow in rtase_calc_time_mitigation. In file included from include/linux/mdio.h:10:0, from drivers/net/ethernet/realtek/rtase/rtase_main.c:58: In function 'u16_encode_bits', inlined from 'rtase_calc_time_mitigation.constprop' at drivers/net/ ethernet/realtek/rtase/rtase_main.c:1915:13, inlined from 'rtase_init_software_variable.isra.41' at drivers/net/ ethernet/realtek/rtase/rtase_main.c:1961:13, inlined from 'rtase_init_one' at drivers/net/ethernet/realtek/ rtase/rtase_main.c:2111:2: >> include/linux/bitfield.h:178:3: error: call to '__field_overflow' declared with attribute error: value doesn't fit into mask __field_overflow(); \ ^~~~~~~~~~~~~~~~~~ include/linux/bitfield.h:198:2: note: in expansion of macro '____MAKE_OP' ____MAKE_OP(u##size,u##size,,) ^~~~~~~~~~~ include/linux/bitfield.h:200:1: note: in expansion of macro '__MAKE_OP' __MAKE_OP(16) ^~~~~~~~~ Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202503182158.nkAlbJWX-lkp@intel.com/ Fixes: a36e9f5cfe9e ("rtase: Add support for a pci table in this module") Signed-off-by: Justin Lai Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250424040444.5530-1-justinlai0215@realtek.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/rtase/rtase_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c index 2aacc1996796db..55b8d36661530c 100644 --- a/drivers/net/ethernet/realtek/rtase/rtase_main.c +++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c @@ -1925,8 +1925,8 @@ static u16 rtase_calc_time_mitigation(u32 time_us) time_us = min_t(int, time_us, RTASE_MITI_MAX_TIME); - msb = fls(time_us); - if (msb >= RTASE_MITI_COUNT_BIT_NUM) { + if (time_us > RTASE_MITI_TIME_COUNT_MASK) { + msb = fls(time_us); time_unit = msb - RTASE_MITI_COUNT_BIT_NUM; time_count = time_us >> (msb - RTASE_MITI_COUNT_BIT_NUM); } else { From 6fe0866014486736cc3ba1c6fd4606d3dbe55c9c Mon Sep 17 00:00:00 2001 From: Louis-Alexis Eyraud Date: Thu, 24 Apr 2025 10:38:48 +0200 Subject: [PATCH 1209/2924] net: ethernet: mtk-star-emac: fix spinlock recursion issues on rx/tx poll Use spin_lock_irqsave and spin_unlock_irqrestore instead of spin_lock and spin_unlock in mtk_star_emac driver to avoid spinlock recursion occurrence that can happen when enabling the DMA interrupts again in rx/tx poll. ``` BUG: spinlock recursion on CPU#0, swapper/0/0 lock: 0xffff00000db9cf20, .magic: dead4ead, .owner: swapper/0/0, .owner_cpu: 0 CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.15.0-rc2-next-20250417-00001-gf6a27738686c-dirty #28 PREEMPT Hardware name: MediaTek MT8365 Open Platform EVK (DT) Call trace: show_stack+0x18/0x24 (C) dump_stack_lvl+0x60/0x80 dump_stack+0x18/0x24 spin_dump+0x78/0x88 do_raw_spin_lock+0x11c/0x120 _raw_spin_lock+0x20/0x2c mtk_star_handle_irq+0xc0/0x22c [mtk_star_emac] __handle_irq_event_percpu+0x48/0x140 handle_irq_event+0x4c/0xb0 handle_fasteoi_irq+0xa0/0x1bc handle_irq_desc+0x34/0x58 generic_handle_domain_irq+0x1c/0x28 gic_handle_irq+0x4c/0x120 do_interrupt_handler+0x50/0x84 el1_interrupt+0x34/0x68 el1h_64_irq_handler+0x18/0x24 el1h_64_irq+0x6c/0x70 regmap_mmio_read32le+0xc/0x20 (P) _regmap_bus_reg_read+0x6c/0xac _regmap_read+0x60/0xdc regmap_read+0x4c/0x80 mtk_star_rx_poll+0x2f4/0x39c [mtk_star_emac] __napi_poll+0x38/0x188 net_rx_action+0x164/0x2c0 handle_softirqs+0x100/0x244 __do_softirq+0x14/0x20 ____do_softirq+0x10/0x20 call_on_irq_stack+0x24/0x64 do_softirq_own_stack+0x1c/0x40 __irq_exit_rcu+0xd4/0x10c irq_exit_rcu+0x10/0x1c el1_interrupt+0x38/0x68 el1h_64_irq_handler+0x18/0x24 el1h_64_irq+0x6c/0x70 cpuidle_enter_state+0xac/0x320 (P) cpuidle_enter+0x38/0x50 do_idle+0x1e4/0x260 cpu_startup_entry+0x34/0x3c rest_init+0xdc/0xe0 console_on_rootfs+0x0/0x6c __primary_switched+0x88/0x90 ``` Fixes: 0a8bd81fd6aa ("net: ethernet: mtk-star-emac: separate tx/rx handling with two NAPIs") Signed-off-by: Louis-Alexis Eyraud Reviewed-by: Maxime Chevallier Acked-by: Bartosz Golaszewski Link: https://patch.msgid.link/20250424-mtk_star_emac-fix-spinlock-recursion-issue-v2-1-f3fde2e529d8@collabora.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_star_emac.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c index 76f202d7f05537..23115881d8e892 100644 --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c @@ -1163,6 +1163,7 @@ static int mtk_star_tx_poll(struct napi_struct *napi, int budget) struct net_device *ndev = priv->ndev; unsigned int head = ring->head; unsigned int entry = ring->tail; + unsigned long flags; while (entry != head && count < (MTK_STAR_RING_NUM_DESCS - 1)) { ret = mtk_star_tx_complete_one(priv); @@ -1182,9 +1183,9 @@ static int mtk_star_tx_poll(struct napi_struct *napi, int budget) netif_wake_queue(ndev); if (napi_complete(napi)) { - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); mtk_star_enable_dma_irq(priv, false, true); - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); } return 0; @@ -1341,6 +1342,7 @@ static int mtk_star_rx(struct mtk_star_priv *priv, int budget) static int mtk_star_rx_poll(struct napi_struct *napi, int budget) { struct mtk_star_priv *priv; + unsigned long flags; int work_done = 0; priv = container_of(napi, struct mtk_star_priv, rx_napi); @@ -1348,9 +1350,9 @@ static int mtk_star_rx_poll(struct napi_struct *napi, int budget) work_done = mtk_star_rx(priv, budget); if (work_done < budget) { napi_complete_done(napi, work_done); - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); mtk_star_enable_dma_irq(priv, true, false); - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); } return work_done; From e54b4db35e201a9173da9cb7abc8377e12abaf87 Mon Sep 17 00:00:00 2001 From: Louis-Alexis Eyraud Date: Thu, 24 Apr 2025 10:38:49 +0200 Subject: [PATCH 1210/2924] net: ethernet: mtk-star-emac: rearm interrupts in rx_poll only when advised In mtk_star_rx_poll function, on event processing completion, the mtk_star_emac driver calls napi_complete_done but ignores its return code and enable RX DMA interrupts inconditionally. This return code gives the info if a device should avoid rearming its interrupts or not, so fix this behaviour by taking it into account. Fixes: 8c7bd5a454ff ("net: ethernet: mtk-star-emac: new driver") Signed-off-by: Louis-Alexis Eyraud Acked-by: Bartosz Golaszewski Link: https://patch.msgid.link/20250424-mtk_star_emac-fix-spinlock-recursion-issue-v2-2-f3fde2e529d8@collabora.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_star_emac.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c index 23115881d8e892..b175119a6a7da5 100644 --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c @@ -1348,8 +1348,7 @@ static int mtk_star_rx_poll(struct napi_struct *napi, int budget) priv = container_of(napi, struct mtk_star_priv, rx_napi); work_done = mtk_star_rx(priv, budget); - if (work_done < budget) { - napi_complete_done(napi, work_done); + if (work_done < budget && napi_complete_done(napi, work_done)) { spin_lock_irqsave(&priv->lock, flags); mtk_star_enable_dma_irq(priv, true, false); spin_unlock_irqrestore(&priv->lock, flags); From 3318dc299b072a0511d6dfd8367f3304fb6d9827 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 22 Apr 2025 17:16:16 +0100 Subject: [PATCH 1211/2924] irqchip/gic-v2m: Prevent use after free of gicv2m_get_fwnode() With ACPI in place, gicv2m_get_fwnode() is registered with the pci subsystem as pci_msi_get_fwnode_cb(), which may get invoked at runtime during a PCI host bridge probe. But, the call back is wrongly marked as __init, causing it to be freed, while being registered with the PCI subsystem and could trigger: Unable to handle kernel paging request at virtual address ffff8000816c0400 gicv2m_get_fwnode+0x0/0x58 (P) pci_set_bus_msi_domain+0x74/0x88 pci_register_host_bridge+0x194/0x548 This is easily reproducible on a Juno board with ACPI boot. Retain the function for later use. Fixes: 0644b3daca28 ("irqchip/gic-v2m: acpi: Introducing GICv2m ACPI support") Signed-off-by: Suzuki K Poulose Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Reviewed-by: Marc Zyngier Cc: stable@vger.kernel.org --- drivers/irqchip/irq-gic-v2m.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index c6989486186667..dc98c39d2b2034 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -421,7 +421,7 @@ static int __init gicv2m_of_init(struct fwnode_handle *parent_handle, #ifdef CONFIG_ACPI static int acpi_num_msi; -static __init struct fwnode_handle *gicv2m_get_fwnode(struct device *dev) +static struct fwnode_handle *gicv2m_get_fwnode(struct device *dev) { struct v2m_data *data; From bbce3de72be56e4b5f68924b7da9630cc89aa1a8 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 25 Apr 2025 01:51:24 -0700 Subject: [PATCH 1212/2924] sched/eevdf: Fix se->slice being set to U64_MAX and resulting crash There is a code path in dequeue_entities() that can set the slice of a sched_entity to U64_MAX, which sometimes results in a crash. The offending case is when dequeue_entities() is called to dequeue a delayed group entity, and then the entity's parent's dequeue is delayed. In that case: 1. In the if (entity_is_task(se)) else block at the beginning of dequeue_entities(), slice is set to cfs_rq_min_slice(group_cfs_rq(se)). If the entity was delayed, then it has no queued tasks, so cfs_rq_min_slice() returns U64_MAX. 2. The first for_each_sched_entity() loop dequeues the entity. 3. If the entity was its parent's only child, then the next iteration tries to dequeue the parent. 4. If the parent's dequeue needs to be delayed, then it breaks from the first for_each_sched_entity() loop _without updating slice_. 5. The second for_each_sched_entity() loop sets the parent's ->slice to the saved slice, which is still U64_MAX. This throws off subsequent calculations with potentially catastrophic results. A manifestation we saw in production was: 6. In update_entity_lag(), se->slice is used to calculate limit, which ends up as a huge negative number. 7. limit is used in se->vlag = clamp(vlag, -limit, limit). Because limit is negative, vlag > limit, so se->vlag is set to the same huge negative number. 8. In place_entity(), se->vlag is scaled, which overflows and results in another huge (positive or negative) number. 9. The adjusted lag is subtracted from se->vruntime, which increases or decreases se->vruntime by a huge number. 10. pick_eevdf() calls entity_eligible()/vruntime_eligible(), which incorrectly returns false because the vruntime is so far from the other vruntimes on the queue, causing the (vruntime - cfs_rq->min_vruntime) * load calulation to overflow. 11. Nothing appears to be eligible, so pick_eevdf() returns NULL. 12. pick_next_entity() tries to dereference the return value of pick_eevdf() and crashes. Dumping the cfs_rq states from the core dumps with drgn showed tell-tale huge vruntime ranges and bogus vlag values, and I also traced se->slice being set to U64_MAX on live systems (which was usually "benign" since the rest of the runqueue needed to be in a particular state to crash). Fix it in dequeue_entities() by always setting slice from the first non-empty cfs_rq. Fixes: aef6987d8954 ("sched/eevdf: Propagate min_slice up the cgroup hierarchy") Signed-off-by: Omar Sandoval Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/f0c2d1072be229e1bdddc73c0703919a8b00c652.1745570998.git.osandov@fb.com --- kernel/sched/fair.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e43993a4e5807e..0fb9bf995a4795 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7081,9 +7081,6 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) h_nr_idle = task_has_idle_policy(p); if (task_sleep || task_delayed || !se->sched_delayed) h_nr_runnable = 1; - } else { - cfs_rq = group_cfs_rq(se); - slice = cfs_rq_min_slice(cfs_rq); } for_each_sched_entity(se) { @@ -7093,6 +7090,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) if (p && &p->se == se) return -1; + slice = cfs_rq_min_slice(cfs_rq); break; } From 831e3f545b0771f91fa94cdb8aa569a73b9ec580 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 24 Apr 2025 09:27:35 -0400 Subject: [PATCH 1213/2924] Revert "sunrpc: clean cache_detail immediately when flush is written frequently" Ondrej reports that certain SELinux tests are failing after commit fc2a169c56de ("sunrpc: clean cache_detail immediately when flush is written frequently"), merged during the v6.15 merge window. Reported-by: Ondrej Mosnacek Fixes: fc2a169c56de ("sunrpc: clean cache_detail immediately when flush is written frequently") Signed-off-by: Chuck Lever --- net/sunrpc/cache.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 004cdb59f0106c..7ce5e28a6c0316 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1536,13 +1536,9 @@ static ssize_t write_flush(struct file *file, const char __user *buf, * or by one second if it has already reached the current time. * Newly added cache entries will always have ->last_refresh greater * that ->flush_time, so they don't get flushed prematurely. - * - * If someone frequently calls the flush interface, we should - * immediately clean the corresponding cache_detail instead of - * continuously accumulating nextcheck. */ - if (cd->flush_time >= now && cd->flush_time < (now + 5)) + if (cd->flush_time >= now) now = cd->flush_time + 1; cd->flush_time = now; From 7ec0987da2c903d58167573e58b39d7ed19fc627 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 24 Apr 2025 10:47:29 +0200 Subject: [PATCH 1214/2924] arm64: dts: rockchip: Align wifi node name with bindings in CB2 Since commit 3c3606793f7e ("dt-bindings: wireless: bcm4329-fmac: Use wireless-controller.yaml schema"), bindings expect 'wifi' as node name: rk3566-bigtreetech-cb2-manta.dtb: sdio-wifi@1: $nodename:0: 'sdio-wifi@1' does not match '^wifi(@.*)?$' Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250424084729.105182-1-krzysztof.kozlowski@linaro.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi index a483514717640f..e7ba477e75f9bf 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi @@ -775,7 +775,7 @@ rockchip,default-sample-phase = <90>; status = "okay"; - sdio-wifi@1 { + wifi@1 { compatible = "brcm,bcm4329-fmac"; reg = <1>; interrupt-parent = <&gpio2>; From 5e6a4ee9799b202fefa8c6264647971f892f0264 Mon Sep 17 00:00:00 2001 From: Tom Vincent Date: Thu, 17 Apr 2025 09:17:53 +0100 Subject: [PATCH 1215/2924] arm64: dts: rockchip: Assign RT5616 MCLK rate on rk3588-friendlyelec-cm3588 The Realtek RT5616 audio codec on the FriendlyElec CM3588 module fails to probe correctly due to the missing clock properties. This results in distorted analogue audio output. Assign MCLK to 12.288 MHz, which allows the codec to advertise most of the standard sample rates per other RK3588 devices. Fixes: e23819cf273c ("arm64: dts: rockchip: Add FriendlyElec CM3588 NAS board") Signed-off-by: Tom Vincent Link: https://lore.kernel.org/r/20250417081753.644950-1-linux@tlvince.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588-friendlyelec-cm3588.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-friendlyelec-cm3588.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-friendlyelec-cm3588.dtsi index 1af0a30866f619..af431fdcbea7a6 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-friendlyelec-cm3588.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-friendlyelec-cm3588.dtsi @@ -222,6 +222,10 @@ compatible = "realtek,rt5616"; reg = <0x1b>; #sound-dai-cells = <0>; + assigned-clocks = <&cru I2S0_8CH_MCLKOUT>; + assigned-clock-rates = <12288000>; + clocks = <&cru I2S0_8CH_MCLKOUT>; + clock-names = "mclk"; }; }; From 573f99c7585f597630f14596550c79e73ffaeef4 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Sun, 13 Apr 2025 15:58:48 +0200 Subject: [PATCH 1216/2924] Revert "arm64: dts: allwinner: h6: Use RSB for AXP805 PMIC connection" This reverts commit 531fdbeedeb89bd32018a35c6e137765c9cc9e97. Hardware that uses I2C wasn't designed with high speeds in mind, so communication with PMIC via RSB can intermittently fail. Go back to I2C as higher speed and efficiency isn't worth the trouble. Fixes: 531fdbeedeb8 ("arm64: dts: allwinner: h6: Use RSB for AXP805 PMIC connection") Link: https://github.com/LibreELEC/LibreELEC.tv/issues/7731 Signed-off-by: Jernej Skrabec Link: https://patch.msgid.link/20250413135848.67283-1-jernej.skrabec@gmail.com Signed-off-by: Chen-Yu Tsai --- .../dts/allwinner/sun50i-h6-beelink-gs1.dts | 38 +++++++++---------- .../dts/allwinner/sun50i-h6-orangepi-3.dts | 14 +++---- .../dts/allwinner/sun50i-h6-orangepi.dtsi | 22 +++++------ 3 files changed, 37 insertions(+), 37 deletions(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts index 13a0e63afeaf3d..2c64d834a2c4f7 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts @@ -152,28 +152,12 @@ vcc-pg-supply = <®_aldo1>; }; -&r_ir { - linux,rc-map-name = "rc-beelink-gs1"; - status = "okay"; -}; - -&r_pio { - /* - * FIXME: We can't add that supply for now since it would - * create a circular dependency between pinctrl, the regulator - * and the RSB Bus. - * - * vcc-pl-supply = <®_aldo1>; - */ - vcc-pm-supply = <®_aldo1>; -}; - -&r_rsb { +&r_i2c { status = "okay"; - axp805: pmic@745 { + axp805: pmic@36 { compatible = "x-powers,axp805", "x-powers,axp806"; - reg = <0x745>; + reg = <0x36>; interrupt-parent = <&r_intc>; interrupts = ; interrupt-controller; @@ -291,6 +275,22 @@ }; }; +&r_ir { + linux,rc-map-name = "rc-beelink-gs1"; + status = "okay"; +}; + +&r_pio { + /* + * PL0 and PL1 are used for PMIC I2C + * don't enable the pl-supply else + * it will fail at boot + * + * vcc-pl-supply = <®_aldo1>; + */ + vcc-pm-supply = <®_aldo1>; +}; + &spdif { pinctrl-names = "default"; pinctrl-0 = <&spdif_tx_pin>; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-3.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-3.dts index ab87c3447cd782..f005072c68a167 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-3.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-3.dts @@ -176,16 +176,12 @@ vcc-pg-supply = <®_vcc_wifi_io>; }; -&r_ir { - status = "okay"; -}; - -&r_rsb { +&r_i2c { status = "okay"; - axp805: pmic@745 { + axp805: pmic@36 { compatible = "x-powers,axp805", "x-powers,axp806"; - reg = <0x745>; + reg = <0x36>; interrupt-parent = <&r_intc>; interrupts = ; interrupt-controller; @@ -296,6 +292,10 @@ }; }; +&r_ir { + status = "okay"; +}; + &rtc { clocks = <&ext_osc32k>; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi.dtsi index d05dc5d6e6b9f7..e34dbb9920216d 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi.dtsi @@ -113,20 +113,12 @@ vcc-pg-supply = <®_aldo1>; }; -&r_ir { - status = "okay"; -}; - -&r_pio { - vcc-pm-supply = <®_bldo3>; -}; - -&r_rsb { +&r_i2c { status = "okay"; - axp805: pmic@745 { + axp805: pmic@36 { compatible = "x-powers,axp805", "x-powers,axp806"; - reg = <0x745>; + reg = <0x36>; interrupt-parent = <&r_intc>; interrupts = ; interrupt-controller; @@ -241,6 +233,14 @@ }; }; +&r_ir { + status = "okay"; +}; + +&r_pio { + vcc-pm-supply = <®_bldo3>; +}; + &rtc { clocks = <&ext_osc32k>; }; From e8fa236e28811473db1594c597f974da0e9b753b Mon Sep 17 00:00:00 2001 From: Chris Chiu Date: Fri, 25 Apr 2025 18:36:18 +0800 Subject: [PATCH 1217/2924] ALSA: hda: Apply volume control on speaker+lineout for HP EliteStudio AIO This hardware has ALC274 codec with speaker NID 0x17 and line out NID 0x16 for audio output. The line out is routed correctly but the speaker is not. Thus the volume can't be controlled. This patch removes DAC NID 0x06 (without volume control) from the connection list for speaker NID 0x17. Routing both speaker and line out pins to DAC NID 0x02 which controls the output volume. Signed-off-by: Chris Chiu Link: https://patch.msgid.link/20250425103618.534951-1-chris.chiu@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8ed613932c5b0e..38ed264c3a49db 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6742,6 +6742,25 @@ static void alc274_fixup_bind_dacs(struct hda_codec *codec, codec->power_save_node = 0; } +/* avoid DAC 0x06 for speaker switch 0x17; it has no volume control */ +static void alc274_fixup_hp_aio_bind_dacs(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + static const hda_nid_t conn[] = { 0x02, 0x03 }; /* exclude 0x06 */ + /* The speaker is routed to the Node 0x06 by a mistake, thus the + * speaker's volume can't be adjusted since the node doesn't have + * Amp-out capability. Assure the speaker and lineout pin to be + * coupled with DAC NID 0x02. + */ + static const hda_nid_t preferred_pairs[] = { + 0x16, 0x02, 0x17, 0x02, 0x21, 0x03, 0 + }; + struct alc_spec *spec = codec->spec; + + snd_hda_override_conn_list(codec, 0x17, ARRAY_SIZE(conn), conn); + spec->gen.preferred_dacs = preferred_pairs; +} + /* avoid DAC 0x06 for bass speaker 0x17; it has no volume control */ static void alc289_fixup_asus_ga401(struct hda_codec *codec, const struct hda_fixup *fix, int action) @@ -7970,6 +7989,7 @@ enum { ALC294_FIXUP_BASS_SPEAKER_15, ALC283_FIXUP_DELL_HP_RESUME, ALC294_FIXUP_ASUS_CS35L41_SPI_2, + ALC274_FIXUP_HP_AIO_BIND_DACS, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -10340,6 +10360,10 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC294_FIXUP_ASUS_HEADSET_MIC, }, + [ALC274_FIXUP_HP_AIO_BIND_DACS] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc274_fixup_hp_aio_bind_dacs, + }, }; static const struct hda_quirk alc269_fixup_tbl[] = { @@ -10774,6 +10798,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8ce0, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8d01, "HP ZBook Power 14 G12", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8d18, "HP EliteStudio 8 AIO", ALC274_FIXUP_HP_AIO_BIND_DACS), SND_PCI_QUIRK(0x103c, 0x8d84, "HP EliteBook X G1i", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8d85, "HP EliteBook 14 G12", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8d86, "HP Elite X360 14 G12", ALC285_FIXUP_HP_GPIO_LED), @@ -10793,6 +10818,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8da1, "HP 16 Clipper OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8da7, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8da8, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x8dd4, "HP EliteStudio 8 AIO", ALC274_FIXUP_HP_AIO_BIND_DACS), SND_PCI_QUIRK(0x103c, 0x8de8, "HP Gemtree", ALC245_FIXUP_TAS2781_SPI_2), SND_PCI_QUIRK(0x103c, 0x8de9, "HP Gemtree", ALC245_FIXUP_TAS2781_SPI_2), SND_PCI_QUIRK(0x103c, 0x8dec, "HP EliteBook 640 G12", ALC236_FIXUP_HP_GPIO_LED), From 56651128e2fbad80f632f388d6bf1f39c928267a Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Thu, 3 Apr 2025 18:11:42 +0200 Subject: [PATCH 1218/2924] MIPS: Fix idle VS timer enqueue MIPS re-enables interrupts on its idle routine and performs a TIF_NEED_RESCHED check afterwards before putting the CPU to sleep. The IRQs firing between the check and the 'wait' instruction may set the TIF_NEED_RESCHED flag. In order to deal with this possible race, IRQs interrupting __r4k_wait() rollback their return address to the beginning of __r4k_wait() so that TIF_NEED_RESCHED is checked again before going back to sleep. However idle IRQs can also queue timers that may require a tick reprogramming through a new generic idle loop iteration but those timers would go unnoticed here because __r4k_wait() only checks TIF_NEED_RESCHED. It doesn't check for pending timers. Fix this with fast-forwarding idle IRQs return address to the end of the idle routine instead of the beginning, so that the generic idle loop handles both TIF_NEED_RESCHED and pending timers. CONFIG_CPU_MICROMIPS has been removed along with the nop instructions. There, NOPs are 2 byte in size, so change the code with 3 _ssnop which are always 4 byte and remove the ifdef. Added ehb to make sure the hazard is always cleared. Fixes: c65a5480ff29 ("[MIPS] Fix potential latency problem due to non-atomic cpu_wait.") Signed-off-by: Marco Crivellari Signed-off-by: Maciej W. Rozycki Acked-by: Frederic Weisbecker Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/idle.h | 3 +- arch/mips/kernel/genex.S | 62 +++++++++++++++++++++--------------- arch/mips/kernel/idle.c | 7 ---- 3 files changed, 37 insertions(+), 35 deletions(-) diff --git a/arch/mips/include/asm/idle.h b/arch/mips/include/asm/idle.h index 0992cad9c632ec..2bc3678455ed0f 100644 --- a/arch/mips/include/asm/idle.h +++ b/arch/mips/include/asm/idle.h @@ -6,8 +6,7 @@ #include extern void (*cpu_wait)(void); -extern void r4k_wait(void); -extern asmlinkage void __r4k_wait(void); +extern asmlinkage void r4k_wait(void); extern void r4k_wait_irqoff(void); static inline int using_rollback_handler(void) diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index a572ce36a24f21..46d975d00298dc 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -104,42 +104,52 @@ handle_vcei: __FINIT - .align 5 /* 32 byte rollback region */ -LEAF(__r4k_wait) - .set push - .set noreorder - /* start of rollback region */ - LONG_L t0, TI_FLAGS($28) - nop - andi t0, _TIF_NEED_RESCHED - bnez t0, 1f - nop - nop - nop -#ifdef CONFIG_CPU_MICROMIPS - nop - nop - nop - nop -#endif + /* Align to 32 bytes for the maximum idle interrupt region size. */ + .align 5 +LEAF(r4k_wait) + /* Keep the ISA bit clear for calculations on local labels here. */ +0: .fill 0 + /* Start of idle interrupt region. */ + local_irq_enable + /* + * If an interrupt lands here, before going idle on the next + * instruction, we must *NOT* go idle since the interrupt could + * have set TIF_NEED_RESCHED or caused a timer to need resched. + * Fall through -- see rollback_handler below -- and have the + * idle loop take care of things. + */ +1: .fill 0 + /* The R2 EI/EHB sequence takes 8 bytes, otherwise pad up. */ + .if 1b - 0b > 32 + .error "overlong idle interrupt region" + .elseif 1b - 0b > 8 + .align 4 + .endif +2: .fill 0 + .equ r4k_wait_idle_size, 2b - 0b + /* End of idle interrupt region; size has to be a power of 2. */ .set MIPS_ISA_ARCH_LEVEL_RAW +r4k_wait_insn: wait - /* end of rollback region (the region size must be power of two) */ -1: +r4k_wait_exit: + .set mips0 + local_irq_disable jr ra - nop - .set pop - END(__r4k_wait) + END(r4k_wait) + .previous .macro BUILD_ROLLBACK_PROLOGUE handler FEXPORT(rollback_\handler) .set push .set noat MFC0 k0, CP0_EPC - PTR_LA k1, __r4k_wait - ori k0, 0x1f /* 32 byte rollback region */ - xori k0, 0x1f + /* Subtract/add 2 to let the ISA bit propagate through the mask. */ + PTR_LA k1, r4k_wait_insn - 2 + ori k0, r4k_wait_idle_size - 2 + .set noreorder bne k0, k1, \handler + PTR_ADDIU k0, r4k_wait_exit - r4k_wait_insn + 2 + .set reorder MTC0 k0, CP0_EPC .set pop .endm diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c index 5abc8b7340f887..80e8a04a642e05 100644 --- a/arch/mips/kernel/idle.c +++ b/arch/mips/kernel/idle.c @@ -35,13 +35,6 @@ static void __cpuidle r3081_wait(void) write_c0_conf(cfg | R30XX_CONF_HALT); } -void __cpuidle r4k_wait(void) -{ - raw_local_irq_enable(); - __r4k_wait(); - raw_local_irq_disable(); -} - /* * This variant is preferable as it allows testing need_resched and going to * sleep depending on the outcome atomically. Unfortunately the "It is From b713f27e32d87c35737ec942dd6f5ed6b7475f48 Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Thu, 3 Apr 2025 18:11:43 +0200 Subject: [PATCH 1219/2924] MIPS: Move r4k_wait() to .cpuidle.text section Fix missing .cpuidle.text section assignment for r4k_wait() to correct backtracing with nmi_backtrace(). Fixes: 97c8580e85cf ("MIPS: Annotate cpu_wait implementations with __cpuidle") Signed-off-by: Marco Crivellari Signed-off-by: Maciej W. Rozycki Acked-by: Frederic Weisbecker Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/genex.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index 46d975d00298dc..2cf312d9a3b096 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -104,6 +104,7 @@ handle_vcei: __FINIT + .section .cpuidle.text,"ax" /* Align to 32 bytes for the maximum idle interrupt region size. */ .align 5 LEAF(r4k_wait) From be0c40da888840fe91b45474cb70779e6cbaf7ca Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 27 Apr 2025 10:10:34 +0200 Subject: [PATCH 1220/2924] ALSA: hda/realtek: Add quirk for HP Spectre x360 15-df1xxx HP Spectre x360 15-df1xxx with SSID 13c:863e requires similar workarounds that were applied to another HP Spectre x360 models; it has a mute LED only, no micmute LEDs, and needs the speaker GPIO seup. Link: https://bugzilla.kernel.org/show_bug.cgi?id=220054 Link: https://patch.msgid.link/20250427081035.11567-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 42 +++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 38ed264c3a49db..1799203af35a85 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6982,6 +6982,41 @@ static void alc285_fixup_hp_spectre_x360_eb1(struct hda_codec *codec, } } +/* GPIO1 = amplifier on/off */ +static void alc285_fixup_hp_spectre_x360_df1(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + struct alc_spec *spec = codec->spec; + static const hda_nid_t conn[] = { 0x02 }; + static const struct hda_pintbl pincfgs[] = { + { 0x14, 0x90170110 }, /* front/high speakers */ + { 0x17, 0x90170130 }, /* back/bass speakers */ + { } + }; + + // enable mute led + alc285_fixup_hp_mute_led_coefbit(codec, fix, action); + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + /* needed for amp of back speakers */ + spec->gpio_mask |= 0x01; + spec->gpio_dir |= 0x01; + snd_hda_apply_pincfgs(codec, pincfgs); + /* share DAC to have unified volume control */ + snd_hda_override_conn_list(codec, 0x14, ARRAY_SIZE(conn), conn); + snd_hda_override_conn_list(codec, 0x17, ARRAY_SIZE(conn), conn); + break; + case HDA_FIXUP_ACT_INIT: + /* need to toggle GPIO to enable the amp of back speakers */ + alc_update_gpio_data(codec, 0x01, true); + msleep(100); + alc_update_gpio_data(codec, 0x01, false); + break; + } +} + static void alc285_fixup_hp_spectre_x360(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -7780,6 +7815,7 @@ enum { ALC280_FIXUP_HP_9480M, ALC245_FIXUP_HP_X360_AMP, ALC285_FIXUP_HP_SPECTRE_X360_EB1, + ALC285_FIXUP_HP_SPECTRE_X360_DF1, ALC285_FIXUP_HP_ENVY_X360, ALC288_FIXUP_DELL_HEADSET_MODE, ALC288_FIXUP_DELL1_MIC_NO_PRESENCE, @@ -9857,6 +9893,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_hp_spectre_x360_eb1 }, + [ALC285_FIXUP_HP_SPECTRE_X360_DF1] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_hp_spectre_x360_df1 + }, [ALC285_FIXUP_HP_ENVY_X360] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_hp_envy_x360, @@ -10588,6 +10628,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x86c1, "HP Laptop 15-da3001TU", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x86c7, "HP Envy AiO 32", ALC274_FIXUP_HP_ENVY_GPIO), SND_PCI_QUIRK(0x103c, 0x86e7, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), + SND_PCI_QUIRK(0x103c, 0x863e, "HP Spectre x360 15-df1xxx", ALC285_FIXUP_HP_SPECTRE_X360_DF1), SND_PCI_QUIRK(0x103c, 0x86e8, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), SND_PCI_QUIRK(0x103c, 0x86f9, "HP Spectre x360 13-aw0xxx", ALC285_FIXUP_HP_SPECTRE_X360_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8716, "HP Elite Dragonfly G2 Notebook PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), @@ -11520,6 +11561,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC295_FIXUP_HP_OMEN, .name = "alc295-hp-omen"}, {.id = ALC285_FIXUP_HP_SPECTRE_X360, .name = "alc285-hp-spectre-x360"}, {.id = ALC285_FIXUP_HP_SPECTRE_X360_EB1, .name = "alc285-hp-spectre-x360-eb1"}, + {.id = ALC285_FIXUP_HP_SPECTRE_X360_DF1, .name = "alc285-hp-spectre-x360-df1"}, {.id = ALC285_FIXUP_HP_ENVY_X360, .name = "alc285-hp-envy-x360"}, {.id = ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP, .name = "alc287-ideapad-bass-spk-amp"}, {.id = ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN, .name = "alc287-yoga9-bass-spk-pin"}, From cc3e3d3a9d09456cf21694b7ea8b9d781e85fda3 Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Sat, 5 Apr 2025 16:37:05 +0200 Subject: [PATCH 1221/2924] MIPS: rename rollback_handler with skipover_handler Recently the rollback region has been changed into an idle interrupt region [1]. This patch make the appropriate changes renaming functions and macro, to reflect the change. [1] https://lore.kernel.org/linux-mips/20250403161143.361461-2-marco.crivellari@suse.com/ Signed-off-by: Marco Crivellari Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/idle.h | 2 +- arch/mips/kernel/genex.S | 10 +++++----- arch/mips/kernel/traps.c | 10 +++++----- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/mips/include/asm/idle.h b/arch/mips/include/asm/idle.h index 2bc3678455ed0f..c7d75807d13fe6 100644 --- a/arch/mips/include/asm/idle.h +++ b/arch/mips/include/asm/idle.h @@ -9,7 +9,7 @@ extern void (*cpu_wait)(void); extern asmlinkage void r4k_wait(void); extern void r4k_wait_irqoff(void); -static inline int using_rollback_handler(void) +static inline int using_skipover_handler(void) { return cpu_wait == r4k_wait; } diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index 2cf312d9a3b096..08c0a01d9a298c 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -116,7 +116,7 @@ LEAF(r4k_wait) * If an interrupt lands here, before going idle on the next * instruction, we must *NOT* go idle since the interrupt could * have set TIF_NEED_RESCHED or caused a timer to need resched. - * Fall through -- see rollback_handler below -- and have the + * Fall through -- see skipover_handler below -- and have the * idle loop take care of things. */ 1: .fill 0 @@ -139,8 +139,8 @@ r4k_wait_exit: END(r4k_wait) .previous - .macro BUILD_ROLLBACK_PROLOGUE handler - FEXPORT(rollback_\handler) + .macro BUILD_SKIPOVER_PROLOGUE handler + FEXPORT(skipover_\handler) .set push .set noat MFC0 k0, CP0_EPC @@ -156,7 +156,7 @@ r4k_wait_exit: .endm .align 5 -BUILD_ROLLBACK_PROLOGUE handle_int +BUILD_SKIPOVER_PROLOGUE handle_int NESTED(handle_int, PT_SIZE, sp) .cfi_signal_frame #ifdef CONFIG_TRACE_IRQFLAGS @@ -276,7 +276,7 @@ NESTED(except_vec_ejtag_debug, 0, sp) * This prototype is copied to ebase + n*IntCtl.VS and patched * to invoke the handler */ -BUILD_ROLLBACK_PROLOGUE except_vec_vi +BUILD_SKIPOVER_PROLOGUE except_vec_vi NESTED(except_vec_vi, 0, sp) SAVE_SOME docfi=1 SAVE_AT docfi=1 diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 39e248d0ed59ac..8ec1e185b35ce0 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -77,7 +77,7 @@ #include "access-helper.h" extern void check_wait(void); -extern asmlinkage void rollback_handle_int(void); +extern asmlinkage void skipover_handle_int(void); extern asmlinkage void handle_int(void); extern asmlinkage void handle_adel(void); extern asmlinkage void handle_ades(void); @@ -2066,7 +2066,7 @@ void *set_vi_handler(int n, vi_handler_t addr) { extern const u8 except_vec_vi[]; extern const u8 except_vec_vi_ori[], except_vec_vi_end[]; - extern const u8 rollback_except_vec_vi[]; + extern const u8 skipover_except_vec_vi[]; unsigned long handler; unsigned long old_handler = vi_handlers[n]; int srssets = current_cpu_data.srsets; @@ -2095,7 +2095,7 @@ void *set_vi_handler(int n, vi_handler_t addr) change_c0_srsmap(0xf << n*4, 0 << n*4); } - vec_start = using_rollback_handler() ? rollback_except_vec_vi : + vec_start = using_skipover_handler() ? skipover_except_vec_vi : except_vec_vi; #if defined(CONFIG_CPU_MICROMIPS) || defined(CONFIG_CPU_BIG_ENDIAN) ori_offset = except_vec_vi_ori - vec_start + 2; @@ -2426,8 +2426,8 @@ void __init trap_init(void) if (board_be_init) board_be_init(); - set_except_vector(EXCCODE_INT, using_rollback_handler() ? - rollback_handle_int : handle_int); + set_except_vector(EXCCODE_INT, using_skipover_handler() ? + skipover_handle_int : handle_int); set_except_vector(EXCCODE_MOD, handle_tlbm); set_except_vector(EXCCODE_TLBL, handle_tlbl); set_except_vector(EXCCODE_TLBS, handle_tlbs); From 7f74c066e5d920b3a2f0f936060984e3b3709250 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Mon, 7 Apr 2025 18:32:21 +0200 Subject: [PATCH 1222/2924] MIPS: CPS: Fix potential NULL pointer dereferences in cps_prepare_cpus() Check the return values of kcalloc() and exit early to avoid potential NULL pointer dereferences. Compile-tested only. Cc: stable@vger.kernel.org Fixes: 75fa6a583882e ("MIPS: CPS: Introduce struct cluster_boot_config") Fixes: 0856c143e1cd3 ("MIPS: CPS: Boot CPUs in secondary clusters") Signed-off-by: Thorsten Blum Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/smp-cps.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index e85bd087467e8c..cc26d56f3ab61a 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -332,6 +332,8 @@ static void __init cps_prepare_cpus(unsigned int max_cpus) mips_cps_cluster_bootcfg = kcalloc(nclusters, sizeof(*mips_cps_cluster_bootcfg), GFP_KERNEL); + if (!mips_cps_cluster_bootcfg) + goto err_out; if (nclusters > 1) mips_cm_update_property(); @@ -348,6 +350,8 @@ static void __init cps_prepare_cpus(unsigned int max_cpus) mips_cps_cluster_bootcfg[cl].core_power = kcalloc(BITS_TO_LONGS(ncores), sizeof(unsigned long), GFP_KERNEL); + if (!mips_cps_cluster_bootcfg[cl].core_power) + goto err_out; /* Allocate VPE boot configuration structs */ for (c = 0; c < ncores; c++) { From 5591ce0069ddda97cdbbea596bed53e698f399c2 Mon Sep 17 00:00:00 2001 From: Wojciech Dubowik Date: Thu, 24 Apr 2025 11:59:14 +0200 Subject: [PATCH 1223/2924] arm64: dts: imx8mm-verdin: Link reg_usdhc2_vqmmc to usdhc2 Define vqmmc regulator-gpio for usdhc2 with vin-supply coming from LDO5. Without this definition LDO5 will be powered down, disabling SD card after bootup. This has been introduced in commit f5aab0438ef1 ("regulator: pca9450: Fix enable register for LDO5"). Fixes: 6a57f224f734 ("arm64: dts: freescale: add initial support for verdin imx8m mini") Fixes: f5aab0438ef1 ("regulator: pca9450: Fix enable register for LDO5") Tested-by: Manuel Traut Reviewed-by: Philippe Schenker Tested-by: Francesco Dolcini Reviewed-by: Francesco Dolcini Cc: stable@vger.kernel.org Signed-off-by: Wojciech Dubowik Signed-off-by: Shawn Guo --- .../boot/dts/freescale/imx8mm-verdin.dtsi | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi index 7251ad3a0017c8..b46566f3ce2056 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi @@ -144,6 +144,19 @@ startup-delay-us = <20000>; }; + reg_usdhc2_vqmmc: regulator-usdhc2-vqmmc { + compatible = "regulator-gpio"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usdhc2_vsel>; + gpios = <&gpio1 4 GPIO_ACTIVE_HIGH>; + regulator-max-microvolt = <3300000>; + regulator-min-microvolt = <1800000>; + states = <1800000 0x1>, + <3300000 0x0>; + regulator-name = "PMIC_USDHC_VSELECT"; + vin-supply = <®_nvcc_sd>; + }; + reserved-memory { #address-cells = <2>; #size-cells = <2>; @@ -269,7 +282,7 @@ "SODIMM_19", "", "", - "", + "PMIC_USDHC_VSELECT", "", "", "", @@ -785,6 +798,7 @@ pinctrl-2 = <&pinctrl_usdhc2_200mhz>, <&pinctrl_usdhc2_cd>; pinctrl-3 = <&pinctrl_usdhc2_sleep>, <&pinctrl_usdhc2_cd_sleep>; vmmc-supply = <®_usdhc2_vmmc>; + vqmmc-supply = <®_usdhc2_vqmmc>; }; &wdog1 { @@ -1206,13 +1220,17 @@ ; /* SODIMM 76 */ }; + pinctrl_usdhc2_vsel: usdhc2vselgrp { + fsl,pins = + ; /* PMIC_USDHC_VSELECT */ + }; + /* * Note: Due to ERR050080 we use discrete external on-module resistors pulling-up to the * on-module +V3.3_1.8_SD (LDO5) rail and explicitly disable the internal pull-ups here. */ pinctrl_usdhc2: usdhc2grp { fsl,pins = - , , /* SODIMM 78 */ , /* SODIMM 74 */ , /* SODIMM 80 */ @@ -1223,7 +1241,6 @@ pinctrl_usdhc2_100mhz: usdhc2-100mhzgrp { fsl,pins = - , , , , @@ -1234,7 +1251,6 @@ pinctrl_usdhc2_200mhz: usdhc2-200mhzgrp { fsl,pins = - , , , , @@ -1246,7 +1262,6 @@ /* Avoid backfeeding with removed card power */ pinctrl_usdhc2_sleep: usdhc2slpgrp { fsl,pins = - , , , , From b4432656b36e5cc1d50a1f2dc15357543add530e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 27 Apr 2025 15:19:23 -0700 Subject: [PATCH 1224/2924] Linux 6.15-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 07f818186151a1..5aa9ee52a765b7 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Baby Opossum Posse # *DOCUMENTATION* From 2d7f844ffac683dea7b1697c016e63081199383e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 6 Apr 2025 10:38:52 +0200 Subject: [PATCH 1225/2924] NFSv4: Handle fatal ENETDOWN and ENETUNREACH errors Ensure that the NFSv4 error handling code recognises the RPC_TASK_NETUNREACH_FATAL flag, and handles the ENETDOWN and ENETUNREACH errors accordingly. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton --- fs/nfs/nfs4proc.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 970f28dbf2539e..1f7cc260b007ef 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -671,6 +671,15 @@ nfs4_async_handle_exception(struct rpc_task *task, struct nfs_server *server, struct nfs_client *clp = server->nfs_client; int ret; + if ((task->tk_rpc_status == -ENETDOWN || + task->tk_rpc_status == -ENETUNREACH) && + task->tk_flags & RPC_TASK_NETUNREACH_FATAL) { + exception->delay = 0; + exception->recovering = 0; + exception->retry = 0; + return -EIO; + } + ret = nfs4_do_handle_exception(server, errorcode, exception); if (exception->delay) { int ret2 = nfs4_exception_should_retrans(server, exception); From 440caf8ee2743d44ae5a6da209854188110993fa Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 6 Apr 2025 11:05:27 +0200 Subject: [PATCH 1226/2924] NFSv4/pnfs: Layoutreturn on close must handle fatal networking errors If we have a fatal ENETDOWN or ENETUNREACH error, then the layoutreturn on close code should also handle that as fatal, and free the layouts. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton --- fs/nfs/pnfs.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 5f582713bf05eb..10fdd065a61c23 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1661,6 +1661,18 @@ int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp, /* Was there an RPC level error? If not, retry */ if (task->tk_rpc_status == 0) break; + /* + * Is there a fatal network level error? + * If so release the layout, but flag the error. + */ + if ((task->tk_rpc_status == -ENETDOWN || + task->tk_rpc_status == -ENETUNREACH) && + task->tk_flags & RPC_TASK_NETUNREACH_FATAL) { + *ret = 0; + (*respp)->lrs_present = 0; + retval = -EIO; + break; + } /* If the call was not sent, let caller handle it */ if (!RPC_WAS_SENT(task)) return 0; From bead8b4953f466514f28e552269110b23a19d6f0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 7 Apr 2025 14:36:41 +0200 Subject: [PATCH 1227/2924] pNFS/flexfiles: Record the RPC errors in the I/O tracepoints When debugging I/O issues, we want to see not just the NFS level errors, but also the RPC level problems, so record both in the tracepoints. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 6 ++--- fs/nfs/nfs4trace.h | 34 +++++++++++++++++--------- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 61ad269c825ff0..e6909cafab6864 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1329,7 +1329,7 @@ static int ff_layout_read_done_cb(struct rpc_task *task, hdr->args.offset, hdr->args.count, &hdr->res.op_status, OP_READ, task->tk_status); - trace_ff_layout_read_error(hdr); + trace_ff_layout_read_error(hdr, task->tk_status); } err = ff_layout_async_handle_error(task, hdr->args.context->state, @@ -1502,7 +1502,7 @@ static int ff_layout_write_done_cb(struct rpc_task *task, hdr->args.offset, hdr->args.count, &hdr->res.op_status, OP_WRITE, task->tk_status); - trace_ff_layout_write_error(hdr); + trace_ff_layout_write_error(hdr, task->tk_status); } err = ff_layout_async_handle_error(task, hdr->args.context->state, @@ -1551,7 +1551,7 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, data->args.offset, data->args.count, &data->res.op_status, OP_COMMIT, task->tk_status); - trace_ff_layout_commit_error(data); + trace_ff_layout_commit_error(data, task->tk_status); } err = ff_layout_async_handle_error(task, NULL, data->ds_clp, diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index bc67fe6801b138..deab4c0e21a064 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -2051,13 +2051,15 @@ TRACE_EVENT(fl_getdevinfo, DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event, TP_PROTO( - const struct nfs_pgio_header *hdr + const struct nfs_pgio_header *hdr, + int error ), - TP_ARGS(hdr), + TP_ARGS(hdr, error), TP_STRUCT__entry( __field(unsigned long, error) + __field(unsigned long, nfs_error) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) @@ -2073,7 +2075,8 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event, TP_fast_assign( const struct inode *inode = hdr->inode; - __entry->error = hdr->res.op_status; + __entry->error = -error; + __entry->nfs_error = hdr->res.op_status; __entry->fhandle = nfs_fhandle_hash(hdr->args.fh); __entry->fileid = NFS_FILEID(inode); __entry->dev = inode->i_sb->s_dev; @@ -2088,7 +2091,8 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event, TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s", + "offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s " + "nfs_error=%lu (%s)", -__entry->error, show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), @@ -2096,28 +2100,32 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event, __entry->fhandle, __entry->offset, __entry->count, __entry->stateid_seq, __entry->stateid_hash, - __get_str(dstaddr) + __get_str(dstaddr), __entry->nfs_error, + show_nfs4_status(__entry->nfs_error) ) ); #define DEFINE_NFS4_FLEXFILES_IO_EVENT(name) \ DEFINE_EVENT(nfs4_flexfiles_io_event, name, \ TP_PROTO( \ - const struct nfs_pgio_header *hdr \ + const struct nfs_pgio_header *hdr, \ + int error \ ), \ - TP_ARGS(hdr)) + TP_ARGS(hdr, error)) DEFINE_NFS4_FLEXFILES_IO_EVENT(ff_layout_read_error); DEFINE_NFS4_FLEXFILES_IO_EVENT(ff_layout_write_error); TRACE_EVENT(ff_layout_commit_error, TP_PROTO( - const struct nfs_commit_data *data + const struct nfs_commit_data *data, + int error ), - TP_ARGS(data), + TP_ARGS(data, error), TP_STRUCT__entry( __field(unsigned long, error) + __field(unsigned long, nfs_error) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) @@ -2131,7 +2139,8 @@ TRACE_EVENT(ff_layout_commit_error, TP_fast_assign( const struct inode *inode = data->inode; - __entry->error = data->res.op_status; + __entry->error = -error; + __entry->nfs_error = data->res.op_status; __entry->fhandle = nfs_fhandle_hash(data->args.fh); __entry->fileid = NFS_FILEID(inode); __entry->dev = inode->i_sb->s_dev; @@ -2142,14 +2151,15 @@ TRACE_EVENT(ff_layout_commit_error, TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%llu count=%u dstaddr=%s", + "offset=%llu count=%u dstaddr=%s nfs_error=%lu (%s)", -__entry->error, show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->offset, __entry->count, - __get_str(dstaddr) + __get_str(dstaddr), __entry->nfs_error, + show_nfs4_status(__entry->nfs_error) ) ); From c457dc1ec770a22636b473ce5d35614adfe97636 Mon Sep 17 00:00:00 2001 From: Li Lingfeng Date: Thu, 17 Apr 2025 15:25:08 +0800 Subject: [PATCH 1228/2924] nfs: handle failure of nfs_get_lock_context in unlock path When memory is insufficient, the allocation of nfs_lock_context in nfs_get_lock_context() fails and returns -ENOMEM. If we mistakenly treat an nfs4_unlockdata structure (whose l_ctx member has been set to -ENOMEM) as valid and proceed to execute rpc_run_task(), this will trigger a NULL pointer dereference in nfs4_locku_prepare. For example: BUG: kernel NULL pointer dereference, address: 000000000000000c PGD 0 P4D 0 Oops: Oops: 0000 [#1] SMP PTI CPU: 15 UID: 0 PID: 12 Comm: kworker/u64:0 Not tainted 6.15.0-rc2-dirty #60 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-2.fc40 Workqueue: rpciod rpc_async_schedule RIP: 0010:nfs4_locku_prepare+0x35/0xc2 Code: 89 f2 48 89 fd 48 c7 c7 68 69 ef b5 53 48 8b 8e 90 00 00 00 48 89 f3 RSP: 0018:ffffbbafc006bdb8 EFLAGS: 00010246 RAX: 000000000000004b RBX: ffff9b964fc1fa00 RCX: 0000000000000000 RDX: 0000000000000000 RSI: fffffffffffffff4 RDI: ffff9ba53fddbf40 RBP: ffff9ba539934000 R08: 0000000000000000 R09: ffffbbafc006bc38 R10: ffffffffb6b689c8 R11: 0000000000000003 R12: ffff9ba539934030 R13: 0000000000000001 R14: 0000000004248060 R15: ffffffffb56d1c30 FS: 0000000000000000(0000) GS:ffff9ba5881f0000(0000) knlGS:00000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000000000c CR3: 000000093f244000 CR4: 00000000000006f0 Call Trace: __rpc_execute+0xbc/0x480 rpc_async_schedule+0x2f/0x40 process_one_work+0x232/0x5d0 worker_thread+0x1da/0x3d0 ? __pfx_worker_thread+0x10/0x10 kthread+0x10d/0x240 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x34/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 Modules linked in: CR2: 000000000000000c ---[ end trace 0000000000000000 ]--- Free the allocated nfs4_unlockdata when nfs_get_lock_context() fails and return NULL to terminate subsequent rpc_run_task, preventing NULL pointer dereference. Fixes: f30cb757f680 ("NFS: Always wait for I/O completion before unlock") Signed-off-by: Li Lingfeng Reviewed-by: Jeff Layton Link: https://lore.kernel.org/r/20250417072508.3850532-1-lilingfeng3@huawei.com Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1f7cc260b007ef..b1d2122bd5a749 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7083,10 +7083,18 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, struct nfs4_unlockdata *p; struct nfs4_state *state = lsp->ls_state; struct inode *inode = state->inode; + struct nfs_lock_context *l_ctx; p = kzalloc(sizeof(*p), GFP_KERNEL); if (p == NULL) return NULL; + l_ctx = nfs_get_lock_context(ctx); + if (!IS_ERR(l_ctx)) { + p->l_ctx = l_ctx; + } else { + kfree(p); + return NULL; + } p->arg.fh = NFS_FH(inode); p->arg.fl = &p->fl; p->arg.seqid = seqid; @@ -7094,7 +7102,6 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, p->lsp = lsp; /* Ensure we don't close file until we're done freeing locks! */ p->ctx = get_nfs_open_context(ctx); - p->l_ctx = nfs_get_lock_context(ctx); locks_init_lock(&p->fl); locks_copy_lock(&p->fl, fl); p->server = NFS_SERVER(inode); From 6b9785dc8b13d9fb75ceec8cf4ea7ec3f3b1edbc Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 10 Apr 2025 16:42:03 -0400 Subject: [PATCH 1229/2924] nfs: don't share pNFS DS connections between net namespaces Currently, different NFS clients can share the same DS connections, even when they are in different net namespaces. If a containerized client creates a DS connection, another container can find and use it. When the first client exits, the connection will close which can lead to stalls in other clients. Add a net namespace pointer to struct nfs4_pnfs_ds, and compare those value to the caller's netns in _data_server_lookup_locked() when searching for a nfs4_pnfs_ds to match. Reported-by: Omar Sandoval Reported-by: Sargun Dillon Closes: https://lore.kernel.org/linux-nfs/Z_ArpQC_vREh_hEA@telecaster/ Tested-by: Sargun Dillon Signed-off-by: Jeff Layton Reviewed-by: Benjamin Coddington Link: https://lore.kernel.org/r/20250410-nfs-ds-netns-v2-1-f80b7979ba80@kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/filelayout/filelayoutdev.c | 6 +++--- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 6 +++--- fs/nfs/pnfs.h | 4 +++- fs/nfs/pnfs_nfs.c | 9 +++++---- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index 4fa304fa5bc4b2..29d9234d5c085f 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -76,6 +76,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, struct page *scratch; struct list_head dsaddrs; struct nfs4_pnfs_ds_addr *da; + struct net *net = server->nfs_client->cl_net; /* set up xdr stream */ scratch = alloc_page(gfp_flags); @@ -159,8 +160,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, mp_count = be32_to_cpup(p); /* multipath count */ for (j = 0; j < mp_count; j++) { - da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net, - &stream, gfp_flags); + da = nfs4_decode_mp_ds_addr(net, &stream, gfp_flags); if (da) list_add_tail(&da->da_node, &dsaddrs); } @@ -170,7 +170,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, goto out_err_free_deviceid; } - dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); + dsaddr->ds_list[i] = nfs4_pnfs_ds_add(net, &dsaddrs, gfp_flags); if (!dsaddr->ds_list[i]) goto out_err_drain_dsaddrs; trace_fl_getdevinfo(server, &pdev->dev_id, dsaddr->ds_list[i]->ds_remotestr); diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index e58bedfb1dcc14..4a304cf17c4b07 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -49,6 +49,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, struct nfs4_pnfs_ds_addr *da; struct nfs4_ff_layout_ds *new_ds = NULL; struct nfs4_ff_ds_version *ds_versions = NULL; + struct net *net = server->nfs_client->cl_net; u32 mp_count; u32 version_count; __be32 *p; @@ -80,8 +81,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, for (i = 0; i < mp_count; i++) { /* multipath ds */ - da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net, - &stream, gfp_flags); + da = nfs4_decode_mp_ds_addr(net, &stream, gfp_flags); if (da) list_add_tail(&da->da_node, &dsaddrs); } @@ -149,7 +149,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, new_ds->ds_versions = ds_versions; new_ds->ds_versions_cnt = version_count; - new_ds->ds = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); + new_ds->ds = nfs4_pnfs_ds_add(net, &dsaddrs, gfp_flags); if (!new_ds->ds) goto out_err_drain_dsaddrs; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 30d2613e912b88..91ff877185c8af 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -60,6 +60,7 @@ struct nfs4_pnfs_ds { struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ char *ds_remotestr; /* comma sep list of addrs */ struct list_head ds_addrs; + const struct net *ds_net; struct nfs_client *ds_clp; refcount_t ds_count; unsigned long ds_state; @@ -415,7 +416,8 @@ int pnfs_generic_commit_pagelist(struct inode *inode, int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, int max); void pnfs_generic_write_commit_done(struct rpc_task *task, void *data); void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds); -struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs, +struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(const struct net *net, + struct list_head *dsaddrs, gfp_t gfp_flags); void nfs4_pnfs_v3_ds_connect_unload(void); int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index dbef837e871ad4..2ee20a0f0b36d3 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -604,12 +604,12 @@ _same_data_server_addrs_locked(const struct list_head *dsaddrs1, * Lookup DS by addresses. nfs4_ds_cache_lock is held */ static struct nfs4_pnfs_ds * -_data_server_lookup_locked(const struct list_head *dsaddrs) +_data_server_lookup_locked(const struct net *net, const struct list_head *dsaddrs) { struct nfs4_pnfs_ds *ds; list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) - if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs)) + if (ds->ds_net == net && _same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs)) return ds; return NULL; } @@ -716,7 +716,7 @@ nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags) * uncached and return cached struct nfs4_pnfs_ds. */ struct nfs4_pnfs_ds * -nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) +nfs4_pnfs_ds_add(const struct net *net, struct list_head *dsaddrs, gfp_t gfp_flags) { struct nfs4_pnfs_ds *tmp_ds, *ds = NULL; char *remotestr; @@ -734,13 +734,14 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags); spin_lock(&nfs4_ds_cache_lock); - tmp_ds = _data_server_lookup_locked(dsaddrs); + tmp_ds = _data_server_lookup_locked(net, dsaddrs); if (tmp_ds == NULL) { INIT_LIST_HEAD(&ds->ds_addrs); list_splice_init(dsaddrs, &ds->ds_addrs); ds->ds_remotestr = remotestr; refcount_set(&ds->ds_count, 1); INIT_LIST_HEAD(&ds->ds_node); + ds->ds_net = net; ds->ds_clp = NULL; list_add(&ds->ds_node, &nfs4_data_server_cache); dprintk("%s add new data server %s\n", __func__, From d5fb22a7c585b12ec3e6cef150689f7386e8cfd0 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 10 Apr 2025 16:42:04 -0400 Subject: [PATCH 1230/2924] nfs: move the nfs4_data_server_cache into struct nfs_net Since struct nfs4_pnfs_ds should not be shared between net namespaces, move from a global list of objects to a per-netns list and spinlock. Tested-by: Sargun Dillon Signed-off-by: Jeff Layton Reviewed-by: Benjamin Coddington Link: https://lore.kernel.org/r/20250410-nfs-ds-netns-v2-2-f80b7979ba80@kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 7 +++++++ fs/nfs/netns.h | 6 +++++- fs/nfs/pnfs_nfs.c | 31 +++++++++++++++++-------------- 3 files changed, 29 insertions(+), 15 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 02c916a550205f..2115c1189c2df1 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1199,6 +1199,10 @@ void nfs_clients_init(struct net *net) INIT_LIST_HEAD(&nn->nfs_volume_list); #if IS_ENABLED(CONFIG_NFS_V4) idr_init(&nn->cb_ident_idr); +#endif +#if IS_ENABLED(CONFIG_NFS_V4_1) + INIT_LIST_HEAD(&nn->nfs4_data_server_cache); + spin_lock_init(&nn->nfs4_data_server_lock); #endif spin_lock_init(&nn->nfs_client_lock); nn->boot_time = ktime_get_real(); @@ -1216,6 +1220,9 @@ void nfs_clients_exit(struct net *net) nfs_cleanup_cb_ident_idr(net); WARN_ON_ONCE(!list_empty(&nn->nfs_client_list)); WARN_ON_ONCE(!list_empty(&nn->nfs_volume_list)); +#if IS_ENABLED(CONFIG_NFS_V4_1) + WARN_ON_ONCE(!list_empty(&nn->nfs4_data_server_cache)); +#endif } #ifdef CONFIG_PROC_FS diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index a68b21603ea9a8..6ba3ea39e928c0 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -31,7 +31,11 @@ struct nfs_net { unsigned short nfs_callback_tcpport; unsigned short nfs_callback_tcpport6; int cb_users[NFS4_MAX_MINOR_VERSION + 1]; -#endif +#endif /* CONFIG_NFS_V4 */ +#if IS_ENABLED(CONFIG_NFS_V4_1) + struct list_head nfs4_data_server_cache; + spinlock_t nfs4_data_server_lock; +#endif /* CONFIG_NFS_V4_1 */ struct nfs_netns_client *nfs_client; spinlock_t nfs_client_lock; ktime_t boot_time; diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 2ee20a0f0b36d3..91ef486f40b943 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -16,6 +16,7 @@ #include "nfs4session.h" #include "internal.h" #include "pnfs.h" +#include "netns.h" #define NFSDBG_FACILITY NFSDBG_PNFS @@ -504,14 +505,14 @@ EXPORT_SYMBOL_GPL(pnfs_generic_commit_pagelist); /* * Data server cache * - * Data servers can be mapped to different device ids. - * nfs4_pnfs_ds reference counting + * Data servers can be mapped to different device ids, but should + * never be shared between net namespaces. + * + * nfs4_pnfs_ds reference counting: * - set to 1 on allocation * - incremented when a device id maps a data server already in the cache. * - decremented when deviceid is removed from the cache. */ -static DEFINE_SPINLOCK(nfs4_ds_cache_lock); -static LIST_HEAD(nfs4_data_server_cache); /* Debug routines */ static void @@ -604,12 +605,12 @@ _same_data_server_addrs_locked(const struct list_head *dsaddrs1, * Lookup DS by addresses. nfs4_ds_cache_lock is held */ static struct nfs4_pnfs_ds * -_data_server_lookup_locked(const struct net *net, const struct list_head *dsaddrs) +_data_server_lookup_locked(const struct nfs_net *nn, const struct list_head *dsaddrs) { struct nfs4_pnfs_ds *ds; - list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) - if (ds->ds_net == net && _same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs)) + list_for_each_entry(ds, &nn->nfs4_data_server_cache, ds_node) + if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs)) return ds; return NULL; } @@ -653,10 +654,11 @@ static void destroy_ds(struct nfs4_pnfs_ds *ds) void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds) { - if (refcount_dec_and_lock(&ds->ds_count, - &nfs4_ds_cache_lock)) { + struct nfs_net *nn = net_generic(ds->ds_net, nfs_net_id); + + if (refcount_dec_and_lock(&ds->ds_count, &nn->nfs4_data_server_lock)) { list_del_init(&ds->ds_node); - spin_unlock(&nfs4_ds_cache_lock); + spin_unlock(&nn->nfs4_data_server_lock); destroy_ds(ds); } } @@ -718,6 +720,7 @@ nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags) struct nfs4_pnfs_ds * nfs4_pnfs_ds_add(const struct net *net, struct list_head *dsaddrs, gfp_t gfp_flags) { + struct nfs_net *nn = net_generic(net, nfs_net_id); struct nfs4_pnfs_ds *tmp_ds, *ds = NULL; char *remotestr; @@ -733,8 +736,8 @@ nfs4_pnfs_ds_add(const struct net *net, struct list_head *dsaddrs, gfp_t gfp_fla /* this is only used for debugging, so it's ok if its NULL */ remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags); - spin_lock(&nfs4_ds_cache_lock); - tmp_ds = _data_server_lookup_locked(net, dsaddrs); + spin_lock(&nn->nfs4_data_server_lock); + tmp_ds = _data_server_lookup_locked(nn, dsaddrs); if (tmp_ds == NULL) { INIT_LIST_HEAD(&ds->ds_addrs); list_splice_init(dsaddrs, &ds->ds_addrs); @@ -743,7 +746,7 @@ nfs4_pnfs_ds_add(const struct net *net, struct list_head *dsaddrs, gfp_t gfp_fla INIT_LIST_HEAD(&ds->ds_node); ds->ds_net = net; ds->ds_clp = NULL; - list_add(&ds->ds_node, &nfs4_data_server_cache); + list_add(&ds->ds_node, &nn->nfs4_data_server_cache); dprintk("%s add new data server %s\n", __func__, ds->ds_remotestr); } else { @@ -755,7 +758,7 @@ nfs4_pnfs_ds_add(const struct net *net, struct list_head *dsaddrs, gfp_t gfp_fla refcount_read(&tmp_ds->ds_count)); ds = tmp_ds; } - spin_unlock(&nfs4_ds_cache_lock); + spin_unlock(&nn->nfs4_data_server_lock); out: return ds; } From d82e86c15364d42706eb8b5249640a839d61a681 Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Tue, 8 Apr 2025 23:53:42 +0300 Subject: [PATCH 1231/2924] nfs: direct: drop useless initializer in nfs_direct_write_completion() In nfs_direct_write_completion(), the local variable req isn't used outside the *while* loop and is assigned to right at the start of that loop's body, so its initializer appears useless -- drop it; then move the declaration to the loop body (which happens to have a pointless empty line anyway)... Found by Linux Verification Center (linuxtesting.org) with the Svace static analysis tool. Signed-off-by: Sergey Shtylyov Reviewed-by: Benjamin Coddington Link: https://lore.kernel.org/r/416219f5-7983-484b-b5a7-5fb7da9561f7@omp.ru Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index f32f8d7c9122bf..48d89716193a7e 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -757,7 +757,6 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) { struct nfs_direct_req *dreq = hdr->dreq; struct nfs_commit_info cinfo; - struct nfs_page *req = nfs_list_entry(hdr->pages.next); struct inode *inode = dreq->inode; int flags = NFS_ODIRECT_DONE; @@ -786,6 +785,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) spin_unlock(&inode->i_lock); while (!list_empty(&hdr->pages)) { + struct nfs_page *req; req = nfs_list_entry(hdr->pages.next); nfs_list_remove_request(req); From c367eea5041c2e5ef6836fe0ba8c5dc75a965b1b Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Wed, 9 Apr 2025 23:36:33 +0300 Subject: [PATCH 1232/2924] nfs: nfs3acl: drop useless assignment in nfs3_get_acl() In nfs3_get_acl(), the local variable status is assigned the result of nfs_refresh_inode() inside the *switch* statement, but that value gets overwritten in the next *if* statement's true branch and is completely ignored if that branch isn't taken... Found by Linux Verification Center (linuxtesting.org) with the Svace static analysis tool. Signed-off-by: Sergey Shtylyov Reviewed-by: Benjamin Coddington Link: https://lore.kernel.org/r/c32dced7-a4fa-43c0-aafe-ef6c819c2f91@omp.ru Signed-off-by: Trond Myklebust --- fs/nfs/nfs3acl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 18d8f6529f615e..a126eb31f62fae 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -104,7 +104,7 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type, bool rcu) switch (status) { case 0: - status = nfs_refresh_inode(inode, res.fattr); + nfs_refresh_inode(inode, res.fattr); break; case -EPFNOSUPPORT: case -EPROTONOSUPPORT: From 1149719442d28c96dc63cad432b5a6db7c300e1a Mon Sep 17 00:00:00 2001 From: Joachim Priesner Date: Mon, 28 Apr 2025 07:36:06 +0200 Subject: [PATCH 1233/2924] ALSA: usb-audio: Add second USB ID for Jabra Evolve 65 headset There seem to be multiple USB device IDs used for these; the one I have reports as 0b0e:030c when powered on. (When powered off, it reports as 0b0e:0311.) Signed-off-by: Joachim Priesner Cc: Link: https://patch.msgid.link/20250428053606.9237-1-joachim.priesner@web.de Signed-off-by: Takashi Iwai --- sound/usb/format.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/usb/format.c b/sound/usb/format.c index 9d32b21a5fbb02..0ba4641a0eb11d 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -260,7 +260,8 @@ static int parse_audio_format_rates_v1(struct snd_usb_audio *chip, struct audiof } /* Jabra Evolve 65 headset */ - if (chip->usb_id == USB_ID(0x0b0e, 0x030b)) { + if (chip->usb_id == USB_ID(0x0b0e, 0x030b) || + chip->usb_id == USB_ID(0x0b0e, 0x030c)) { /* only 48kHz for playback while keeping 16kHz for capture */ if (fp->nr_rates != 1) return set_fixed_rate(fp, 48000, SNDRV_PCM_RATE_48000); From 76047483fe94414edf409dc498498abf346e22f1 Mon Sep 17 00:00:00 2001 From: Sunil Khatri Date: Wed, 23 Apr 2025 09:54:42 +0530 Subject: [PATCH 1234/2924] drm/ttm: fix the warning for hit_low and evict_low MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix the below warning messages: ttm/ttm_bo.c:1098: warning: Function parameter or struct member 'hit_low' not described in 'ttm_bo_swapout_walk' ttm/ttm_bo.c:1098: warning: Function parameter or struct member 'evict_low' not described in 'ttm_bo_swapout_walk' Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Signed-off-by: Sunil Khatri Reviewed-by: Maarten Lankhorst Signed-off-by: Christian König Link: https://lore.kernel.org/r/20250423042442.762108-1-sunil.khatri@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 95b86003c50ded..5bf3c969907c6a 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1093,7 +1093,8 @@ struct ttm_bo_swapout_walk { struct ttm_lru_walk walk; /** @gfp_flags: The gfp flags to use for ttm_tt_swapout() */ gfp_t gfp_flags; - + /** @hit_low: Whether we should attempt to swap BO's with low watermark threshold */ + /** @evict_low: If we cannot swap a bo when @try_low is false (first pass) */ bool hit_low, evict_low; }; From 8b0ba61df5a1c44e2b3cf683831a4fc5e24ea99d Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Thu, 24 Apr 2025 11:28:20 -0400 Subject: [PATCH 1235/2924] fs/xattr.c: fix simple_xattr_list to always include security.* xattrs The vfs has long had a fallback to obtain the security.* xattrs from the LSM when the filesystem does not implement its own listxattr, but shmem/tmpfs and kernfs later gained their own xattr handlers to support other xattrs. Unfortunately, as a side effect, tmpfs and kernfs-based filesystems like sysfs no longer return the synthetic security.* xattr names via listxattr unless they are explicitly set by userspace or initially set upon inode creation after policy load. coreutils has recently switched from unconditionally invoking getxattr for security.* for ls -Z via libselinux to only doing so if listxattr returns the xattr name, breaking ls -Z of such inodes. Before: $ getfattr -m.* /run/initramfs $ getfattr -m.* /sys/kernel/fscaps $ setfattr -n user.foo /run/initramfs $ getfattr -m.* /run/initramfs user.foo After: $ getfattr -m.* /run/initramfs security.selinux $ getfattr -m.* /sys/kernel/fscaps security.selinux $ setfattr -n user.foo /run/initramfs $ getfattr -m.* /run/initramfs security.selinux user.foo Link: https://lore.kernel.org/selinux/CAFqZXNtF8wDyQajPCdGn=iOawX4y77ph0EcfcqcUUj+T87FKyA@mail.gmail.com/ Link: https://lore.kernel.org/selinux/20250423175728.3185-2-stephen.smalley.work@gmail.com/ Signed-off-by: Stephen Smalley Link: https://lore.kernel.org/20250424152822.2719-1-stephen.smalley.work@gmail.com Fixes: b09e0fa4b4ea66266058ee ("tmpfs: implement generic xattr support") Signed-off-by: Christian Brauner --- fs/xattr.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/fs/xattr.c b/fs/xattr.c index fabb2a04501ee7..8ec5b0204bfdc5 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -1428,6 +1428,15 @@ static bool xattr_is_trusted(const char *name) return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN); } +static bool xattr_is_maclabel(const char *name) +{ + const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; + + return !strncmp(name, XATTR_SECURITY_PREFIX, + XATTR_SECURITY_PREFIX_LEN) && + security_ismaclabel(suffix); +} + /** * simple_xattr_list - list all xattr objects * @inode: inode from which to get the xattrs @@ -1460,6 +1469,17 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, if (err) return err; + err = security_inode_listsecurity(inode, buffer, remaining_size); + if (err < 0) + return err; + + if (buffer) { + if (remaining_size < err) + return -ERANGE; + buffer += err; + } + remaining_size -= err; + read_lock(&xattrs->lock); for (rbp = rb_first(&xattrs->rb_root); rbp; rbp = rb_next(rbp)) { xattr = rb_entry(rbp, struct simple_xattr, rb_node); @@ -1468,6 +1488,10 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, if (!trusted && xattr_is_trusted(xattr->name)) continue; + /* skip MAC labels; these are provided by LSM above */ + if (xattr_is_maclabel(xattr->name)) + continue; + err = xattr_list_one(&buffer, &remaining_size, xattr->name); if (err) break; From b71f9804f66c2592d4c3a2397b7374a4039005a5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 18 Apr 2025 22:46:52 -0700 Subject: [PATCH 1236/2924] timekeeping: Prevent coarse clocks going backwards Lei Chen raised an issue with CLOCK_MONOTONIC_COARSE seeing time inconsistencies. Lei tracked down that this was being caused by the adjustment: tk->tkr_mono.xtime_nsec -= offset; which is made to compensate for the unaccumulated cycles in offset when the multiplicator is adjusted forward, so that the non-_COARSE clockids don't see inconsistencies. However, the _COARSE clockid getter functions use the adjusted xtime_nsec value directly and do not compensate the negative offset via the clocksource delta multiplied with the new multiplicator. In that case the caller can observe time going backwards in consecutive calls. By design, this negative adjustment should be fine, because the logic run from timekeeping_adjust() is done after it accumulated approximately multiplicator * interval_cycles into xtime_nsec. The accumulated value is always larger then the mult_adj * offset value, which is subtracted from xtime_nsec. Both operations are done together under the tk_core.lock, so the net change to xtime_nsec is always always be positive. However, do_adjtimex() calls into timekeeping_advance() as well, to apply the NTP frequency adjustment immediately. In this case, timekeeping_advance() does not return early when the offset is smaller then interval_cycles. In that case there is no time accumulated into xtime_nsec. But the subsequent call into timekeeping_adjust(), which modifies the multiplicator, subtracts from xtime_nsec to correct for the new multiplicator. Here because there was no accumulation, xtime_nsec becomes smaller than before, which opens a window up to the next accumulation, where the _COARSE clockid getters, which don't compensate for the offset, can observe the inconsistency. This has been tried to be fixed by forwarding the timekeeper in the case that adjtimex() adjusts the multiplier, which resets the offset to zero: 757b000f7b93 ("timekeeping: Fix possible inconsistencies in _COARSE clockids") That works correctly, but unfortunately causes a regression on the adjtimex() side. There are two issues: 1) The forwarding of the base time moves the update out of the original period and establishes a new one. 2) The clearing of the accumulated NTP error is changing the behaviour as well. User-space expects that multiplier/frequency updates are in effect, when the syscall returns, so delaying the update to the next tick is not solving the problem either. Commit 757b000f7b93 was reverted so that the established expectations of user space implementations (ntpd, chronyd) are restored, but that obviously brought the inconsistencies back. One of the initial approaches to fix this was to establish a separate storage for the coarse time getter nanoseconds part by calculating it from the offset. That was dropped on the floor because not having yet another state to maintain was simpler. But given the result of the above exercise, this solution turns out to be the right one. Bring it back in a slightly modified form. Thus introduce timekeeper::coarse_nsec and store that nanoseconds part in it, switch the time getter functions and the VDSO update to use that value. coarse_nsec is set on operations which forward or initialize the timekeeper and after time was accumulated during a tick. If there is no accumulation the timestamp is unchanged. This leaves the adjtimex() behaviour unmodified and prevents coarse time from going backwards. [ jstultz: Simplified the coarse_nsec calculation and kept behavior so coarse clockids aren't adjusted on each inter-tick adjtimex call, slightly reworked the comments and commit message ] Fixes: da15cfdae033 ("time: Introduce CLOCK_REALTIME_COARSE") Reported-by: Lei Chen Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/all/20250419054706.2319105-1-jstultz@google.com Closes: https://lore.kernel.org/lkml/20250310030004.3705801-1-lei.chen@smartx.com/ --- include/linux/timekeeper_internal.h | 8 +++-- kernel/time/timekeeping.c | 50 ++++++++++++++++++++++++----- kernel/time/vsyscall.c | 4 +-- 3 files changed, 49 insertions(+), 13 deletions(-) diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index e39d4d563b1975..785048a3b3e604 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -51,7 +51,7 @@ struct tk_read_base { * @offs_real: Offset clock monotonic -> clock realtime * @offs_boot: Offset clock monotonic -> clock boottime * @offs_tai: Offset clock monotonic -> clock tai - * @tai_offset: The current UTC to TAI offset in seconds + * @coarse_nsec: The nanoseconds part for coarse time getters * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds * @clock_was_set_seq: The sequence number of clock was set events @@ -76,6 +76,7 @@ struct tk_read_base { * ntp shifted nano seconds. * @ntp_err_mult: Multiplication factor for scaled math conversion * @skip_second_overflow: Flag used to avoid updating NTP twice with same second + * @tai_offset: The current UTC to TAI offset in seconds * * Note: For timespec(64) based interfaces wall_to_monotonic is what * we need to add to xtime (or xtime corrected for sub jiffy times) @@ -100,7 +101,7 @@ struct tk_read_base { * which results in the following cacheline layout: * * 0: seqcount, tkr_mono - * 1: xtime_sec ... tai_offset + * 1: xtime_sec ... coarse_nsec * 2: tkr_raw, raw_sec * 3,4: Internal variables * @@ -121,7 +122,7 @@ struct timekeeper { ktime_t offs_real; ktime_t offs_boot; ktime_t offs_tai; - s32 tai_offset; + u32 coarse_nsec; /* Cacheline 2: */ struct tk_read_base tkr_raw; @@ -144,6 +145,7 @@ struct timekeeper { u32 ntp_error_shift; u32 ntp_err_mult; u32 skip_second_overflow; + s32 tai_offset; }; #ifdef CONFIG_GENERIC_TIME_VSYSCALL diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 1e67d076f1955a..a009c91f7b05fc 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -164,10 +164,34 @@ static inline struct timespec64 tk_xtime(const struct timekeeper *tk) return ts; } +static inline struct timespec64 tk_xtime_coarse(const struct timekeeper *tk) +{ + struct timespec64 ts; + + ts.tv_sec = tk->xtime_sec; + ts.tv_nsec = tk->coarse_nsec; + return ts; +} + +/* + * Update the nanoseconds part for the coarse time keepers. They can't rely + * on xtime_nsec because xtime_nsec could be adjusted by a small negative + * amount when the multiplication factor of the clock is adjusted, which + * could cause the coarse clocks to go slightly backwards. See + * timekeeping_apply_adjustment(). Thus we keep a separate copy for the coarse + * clockids which only is updated when the clock has been set or we have + * accumulated time. + */ +static inline void tk_update_coarse_nsecs(struct timekeeper *tk) +{ + tk->coarse_nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; +} + static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts) { tk->xtime_sec = ts->tv_sec; tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift; + tk_update_coarse_nsecs(tk); } static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts) @@ -175,6 +199,7 @@ static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts) tk->xtime_sec += ts->tv_sec; tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift; tk_normalize_xtime(tk); + tk_update_coarse_nsecs(tk); } static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm) @@ -708,6 +733,7 @@ static void timekeeping_forward_now(struct timekeeper *tk) tk_normalize_xtime(tk); delta -= incr; } + tk_update_coarse_nsecs(tk); } /** @@ -804,8 +830,8 @@ EXPORT_SYMBOL_GPL(ktime_get_with_offset); ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs) { struct timekeeper *tk = &tk_core.timekeeper; - unsigned int seq; ktime_t base, *offset = offsets[offs]; + unsigned int seq; u64 nsecs; WARN_ON(timekeeping_suspended); @@ -813,7 +839,7 @@ ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs) do { seq = read_seqcount_begin(&tk_core.seq); base = ktime_add(tk->tkr_mono.base, *offset); - nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; + nsecs = tk->coarse_nsec; } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -2161,7 +2187,7 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode) struct timekeeper *real_tk = &tk_core.timekeeper; unsigned int clock_set = 0; int shift = 0, maxshift; - u64 offset; + u64 offset, orig_offset; guard(raw_spinlock_irqsave)(&tk_core.lock); @@ -2172,7 +2198,7 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode) offset = clocksource_delta(tk_clock_read(&tk->tkr_mono), tk->tkr_mono.cycle_last, tk->tkr_mono.mask, tk->tkr_mono.clock->max_raw_delta); - + orig_offset = offset; /* Check if there's really nothing to do */ if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK) return false; @@ -2205,6 +2231,14 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode) */ clock_set |= accumulate_nsecs_to_secs(tk); + /* + * To avoid inconsistencies caused adjtimex TK_ADV_FREQ calls + * making small negative adjustments to the base xtime_nsec + * value, only update the coarse clocks if we accumulated time + */ + if (orig_offset != offset) + tk_update_coarse_nsecs(tk); + timekeeping_update_from_shadow(&tk_core, clock_set); return !!clock_set; @@ -2248,7 +2282,7 @@ void ktime_get_coarse_real_ts64(struct timespec64 *ts) do { seq = read_seqcount_begin(&tk_core.seq); - *ts = tk_xtime(tk); + *ts = tk_xtime_coarse(tk); } while (read_seqcount_retry(&tk_core.seq, seq)); } EXPORT_SYMBOL(ktime_get_coarse_real_ts64); @@ -2271,7 +2305,7 @@ void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts) do { seq = read_seqcount_begin(&tk_core.seq); - *ts = tk_xtime(tk); + *ts = tk_xtime_coarse(tk); offset = tk_core.timekeeper.offs_real; } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -2350,12 +2384,12 @@ void ktime_get_coarse_ts64(struct timespec64 *ts) do { seq = read_seqcount_begin(&tk_core.seq); - now = tk_xtime(tk); + now = tk_xtime_coarse(tk); mono = tk->wall_to_monotonic; } while (read_seqcount_retry(&tk_core.seq, seq)); set_normalized_timespec64(ts, now.tv_sec + mono.tv_sec, - now.tv_nsec + mono.tv_nsec); + now.tv_nsec + mono.tv_nsec); } EXPORT_SYMBOL(ktime_get_coarse_ts64); diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index 01c2ab1e897193..32ef27c71b57aa 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -98,12 +98,12 @@ void update_vsyscall(struct timekeeper *tk) /* CLOCK_REALTIME_COARSE */ vdso_ts = &vc[CS_HRES_COARSE].basetime[CLOCK_REALTIME_COARSE]; vdso_ts->sec = tk->xtime_sec; - vdso_ts->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; + vdso_ts->nsec = tk->coarse_nsec; /* CLOCK_MONOTONIC_COARSE */ vdso_ts = &vc[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC_COARSE]; vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; + nsec = tk->coarse_nsec; nsec = nsec + tk->wall_to_monotonic.tv_nsec; vdso_ts->sec += __iter_div_u64_rem(nsec, NSEC_PER_SEC, &vdso_ts->nsec); From 8d16dd7b651b75ce7c79da3539553a25e60668f5 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Thu, 24 Apr 2025 11:06:53 +0800 Subject: [PATCH 1237/2924] MAINTAINERS: erofs: add myself as reviewer I have a solid background in file systems and since much of my recent work has focused on EROFS, I am familiar with it. Now I have the time and am willing to help review EROFS patches. I hope my participation can be helpful to the EROFS patch review process. Signed-off-by: Hongbo Li Acked-by: Chao Yu Acked-by: Gao Xiang Link: https://lore.kernel.org/r/20250424030653.3308358-1-lihongbo22@huawei.com Signed-off-by: Gao Xiang --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3cbf9ac0d83f61..b53a268e4a92f3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8726,6 +8726,7 @@ M: Chao Yu R: Yue Hu R: Jeffle Xu R: Sandeep Dhavale +R: Hongbo Li L: linux-erofs@lists.ozlabs.org S: Maintained W: https://erofs.docs.kernel.org From 4fb7b8fceb0beebbe00712c3daf49ade0386076a Mon Sep 17 00:00:00 2001 From: Niravkumar L Rabara Date: Fri, 25 Apr 2025 07:26:39 -0700 Subject: [PATCH 1238/2924] EDAC/altera: Test the correct error reg offset Test correct structure member, ecc_cecnt_offset, before using it. [ bp: Massage commit message. ] Fixes: 73bcc942f427 ("EDAC, altera: Add Arria10 EDAC support") Signed-off-by: Niravkumar L Rabara Signed-off-by: Matthew Gerlach Signed-off-by: Borislav Petkov (AMD) Acked-by: Dinh Nguyen Cc: stable@kernel.org Link: https://lore.kernel.org/20250425142640.33125-2-matthew.gerlach@altera.com --- drivers/edac/altera_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 3e971f90236330..88d9d2f458ee39 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -99,7 +99,7 @@ static irqreturn_t altr_sdram_mc_err_handler(int irq, void *dev_id) if (status & priv->ecc_stat_ce_mask) { regmap_read(drvdata->mc_vbase, priv->ecc_saddr_offset, &err_addr); - if (priv->ecc_uecnt_offset) + if (priv->ecc_cecnt_offset) regmap_read(drvdata->mc_vbase, priv->ecc_cecnt_offset, &err_count); edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, err_count, From 6dbe3c5418c4368e824bff6ae4889257dd544892 Mon Sep 17 00:00:00 2001 From: Niravkumar L Rabara Date: Fri, 25 Apr 2025 07:26:40 -0700 Subject: [PATCH 1239/2924] EDAC/altera: Set DDR and SDMMC interrupt mask before registration Mask DDR and SDMMC in probe function to avoid spurious interrupts before registration. Removed invalid register write to system manager. Fixes: 1166fde93d5b ("EDAC, altera: Add Arria10 ECC memory init functions") Signed-off-by: Niravkumar L Rabara Signed-off-by: Matthew Gerlach Signed-off-by: Borislav Petkov (AMD) Acked-by: Dinh Nguyen Cc: stable@kernel.org Link: https://lore.kernel.org/20250425142640.33125-3-matthew.gerlach@altera.com --- drivers/edac/altera_edac.c | 7 ++++--- drivers/edac/altera_edac.h | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 88d9d2f458ee39..dcd7008fe06b05 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -1005,9 +1005,6 @@ altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask, } } - /* Interrupt mode set to every SBERR */ - regmap_write(ecc_mgr_map, ALTR_A10_ECC_INTMODE_OFST, - ALTR_A10_ECC_INTMODE); /* Enable ECC */ ecc_set_bits(ecc_ctrl_en_mask, (ecc_block_base + ALTR_A10_ECC_CTRL_OFST)); @@ -2127,6 +2124,10 @@ static int altr_edac_a10_probe(struct platform_device *pdev) return PTR_ERR(edac->ecc_mgr_map); } + /* Set irq mask for DDR SBE to avoid any pending irq before registration */ + regmap_write(edac->ecc_mgr_map, A10_SYSMGR_ECC_INTMASK_SET_OFST, + (A10_SYSMGR_ECC_INTMASK_SDMMCB | A10_SYSMGR_ECC_INTMASK_DDR0)); + edac->irq_chip.name = pdev->dev.of_node->name; edac->irq_chip.irq_mask = a10_eccmgr_irq_mask; edac->irq_chip.irq_unmask = a10_eccmgr_irq_unmask; diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index 3727e72c8c2e70..7248d24c4908d7 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -249,6 +249,8 @@ struct altr_sdram_mc_data { #define A10_SYSMGR_ECC_INTMASK_SET_OFST 0x94 #define A10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98 #define A10_SYSMGR_ECC_INTMASK_OCRAM BIT(1) +#define A10_SYSMGR_ECC_INTMASK_SDMMCB BIT(16) +#define A10_SYSMGR_ECC_INTMASK_DDR0 BIT(17) #define A10_SYSMGR_ECC_INTSTAT_SERR_OFST 0x9C #define A10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xA0 From 2c8a7c66c90832432496616a9a3c07293f1364f3 Mon Sep 17 00:00:00 2001 From: Mingcong Bai Date: Fri, 18 Apr 2025 11:16:42 +0800 Subject: [PATCH 1240/2924] iommu/vt-d: Apply quirk_iommu_igfx for 8086:0044 (QM57/QS57) On the Lenovo ThinkPad X201, when Intel VT-d is enabled in the BIOS, the kernel boots with errors related to DMAR, the graphical interface appeared quite choppy, and the system resets erratically within a minute after it booted: DMAR: DRHD: handling fault status reg 3 DMAR: [DMA Write NO_PASID] Request device [00:02.0] fault addr 0xb97ff000 [fault reason 0x05] PTE Write access is not set Upon comparing boot logs with VT-d on/off, I found that the Intel Calpella quirk (`quirk_calpella_no_shadow_gtt()') correctly applied the igfx IOMMU disable/quirk correctly: pci 0000:00:00.0: DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics Whereas with VT-d on, it went into the "else" branch, which then triggered the DMAR handling fault above: ... else if (!disable_igfx_iommu) { /* we have to ensure the gfx device is idle before we flush */ pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n"); iommu_set_dma_strict(); } Now, this is not exactly scientific, but moving 0x0044 to quirk_iommu_igfx seems to have fixed the aforementioned issue. Running a few `git blame' runs on the function, I have found that the quirk was originally introduced as a fix specific to ThinkPad X201: commit 9eecabcb9a92 ("intel-iommu: Abort IOMMU setup for igfx if BIOS gave no shadow GTT space") Which was later revised twice to the "else" branch we saw above: - 2011: commit 6fbcfb3e467a ("intel-iommu: Workaround IOTLB hang on Ironlake GPU") - 2024: commit ba00196ca41c ("iommu/vt-d: Decouple igfx_off from graphic identity mapping") I'm uncertain whether further testings on this particular laptops were done in 2011 and (honestly I'm not sure) 2024, but I would be happy to do some distro-specific testing if that's what would be required to verify this patch. P.S., I also see IDs 0x0040, 0x0062, and 0x006a listed under the same `quirk_calpella_no_shadow_gtt()' quirk, but I'm not sure how similar these chipsets are (if they share the same issue with VT-d or even, indeed, if this issue is specific to a bug in the Lenovo BIOS). With regards to 0x0062, it seems to be a Centrino wireless card, but not a chipset? I have also listed a couple (distro and kernel) bug reports below as references (some of them are from 7-8 years ago!), as they seem to be similar issue found on different Westmere/Ironlake, Haswell, and Broadwell hardware setups. Cc: stable@vger.kernel.org Fixes: 6fbcfb3e467a ("intel-iommu: Workaround IOTLB hang on Ironlake GPU") Fixes: ba00196ca41c ("iommu/vt-d: Decouple igfx_off from graphic identity mapping") Link: https://groups.google.com/g/qubes-users/c/4NP4goUds2c?pli=1 Link: https://bugs.archlinux.org/task/65362 Link: https://bbs.archlinux.org/viewtopic.php?id=230323 Reported-by: Wenhao Sun Closes: https://bugzilla.kernel.org/show_bug.cgi?id=197029 Signed-off-by: Mingcong Bai Link: https://lore.kernel.org/r/20250415133330.12528-1-jeffbai@aosc.io Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index e60b699e7b8cee..cb0b993bebb4dd 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4439,6 +4439,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx); +/* QM57/QS57 integrated gfx malfunctions with dmar */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_iommu_igfx); + /* Broadwell igfx malfunctions with dmar */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx); @@ -4516,7 +4519,6 @@ static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev) } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt); From 5a2a6c428190f945c5cbf5791f72dbea83e97f66 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Tue, 15 Apr 2025 00:17:16 -0400 Subject: [PATCH 1241/2924] dm: always update the array size in realloc_argv on success realloc_argv() was only updating the array size if it was called with old_argv already allocated. The first time it was called to create an argv array, it would allocate the array but return the array size as zero. dm_split_args() would think that it couldn't store any arguments in the array and would call realloc_argv() again, causing it to reallocate the initial slots (this time using GPF_KERNEL) and finally return a size. Aside from being wasteful, this could cause deadlocks on targets that need to process messages without starting new IO. Instead, realloc_argv should always update the allocated array size on success. Fixes: a0651926553c ("dm table: don't copy from a NULL pointer in realloc_argv()") Cc: stable@vger.kernel.org Signed-off-by: Benjamin Marzinski Signed-off-by: Mikulas Patocka --- drivers/md/dm-table.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 53759dbbe9d605..9e175c5e0634b4 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -523,9 +523,10 @@ static char **realloc_argv(unsigned int *size, char **old_argv) gfp = GFP_NOIO; } argv = kmalloc_array(new_size, sizeof(*argv), gfp); - if (argv && old_argv) { - memcpy(argv, old_argv, *size * sizeof(*argv)); + if (argv) { *size = new_size; + if (old_argv) + memcpy(argv, old_argv, *size * sizeof(*argv)); } kfree(old_argv); From b79028039f440e7d2c4df6ab243060c4e3803e84 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 25 Apr 2025 13:36:21 +0200 Subject: [PATCH 1242/2924] cpufreq: Fix setting policy limits when frequency tables are used Commit 7491cdf46b5c ("cpufreq: Avoid using inconsistent policy->min and policy->max") overlooked the fact that policy->min and policy->max were accessed directly in cpufreq_frequency_table_target() and in the functions called by it. Consequently, the changes made by that commit led to problems with setting policy limits. Address this by passing the target frequency limits to __resolve_freq() and cpufreq_frequency_table_target() and propagating them to the functions called by the latter. Fixes: 7491cdf46b5c ("cpufreq: Avoid using inconsistent policy->min and policy->max") Cc: 5.16+ # 5.16+ Closes: https://lore.kernel.org/linux-pm/aAplED3IA_J0eZN0@linaro.org/ Reported-by: Stephan Gerhold Signed-off-by: Rafael J. Wysocki Tested-by: Stephan Gerhold Reviewed-by: Lifeng Zheng Link: https://patch.msgid.link/5896780.DvuYhMxLoT@rjwysocki.net --- drivers/cpufreq/cpufreq.c | 22 +++++--- drivers/cpufreq/cpufreq_ondemand.c | 3 +- drivers/cpufreq/freq_table.c | 6 +-- include/linux/cpufreq.h | 83 +++++++++++++++++++----------- 4 files changed, 73 insertions(+), 41 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index acf19b0042bb6a..f45ded62b0e082 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -536,14 +536,18 @@ void cpufreq_disable_fast_switch(struct cpufreq_policy *policy) EXPORT_SYMBOL_GPL(cpufreq_disable_fast_switch); static unsigned int __resolve_freq(struct cpufreq_policy *policy, - unsigned int target_freq, unsigned int relation) + unsigned int target_freq, + unsigned int min, unsigned int max, + unsigned int relation) { unsigned int idx; + target_freq = clamp_val(target_freq, min, max); + if (!policy->freq_table) return target_freq; - idx = cpufreq_frequency_table_target(policy, target_freq, relation); + idx = cpufreq_frequency_table_target(policy, target_freq, min, max, relation); policy->cached_resolved_idx = idx; policy->cached_target_freq = target_freq; return policy->freq_table[idx].frequency; @@ -577,8 +581,7 @@ unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, if (unlikely(min > max)) min = max; - return __resolve_freq(policy, clamp_val(target_freq, min, max), - CPUFREQ_RELATION_LE); + return __resolve_freq(policy, target_freq, min, max, CPUFREQ_RELATION_LE); } EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq); @@ -2397,8 +2400,8 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, if (cpufreq_disabled()) return -ENODEV; - target_freq = clamp_val(target_freq, policy->min, policy->max); - target_freq = __resolve_freq(policy, target_freq, relation); + target_freq = __resolve_freq(policy, target_freq, policy->min, + policy->max, relation); pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n", policy->cpu, target_freq, relation, old_target_freq); @@ -2727,8 +2730,11 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, * compiler optimizations around them because they may be accessed * concurrently by cpufreq_driver_resolve_freq() during the update. */ - WRITE_ONCE(policy->max, __resolve_freq(policy, new_data.max, CPUFREQ_RELATION_H)); - new_data.min = __resolve_freq(policy, new_data.min, CPUFREQ_RELATION_L); + WRITE_ONCE(policy->max, __resolve_freq(policy, new_data.max, + new_data.min, new_data.max, + CPUFREQ_RELATION_H)); + new_data.min = __resolve_freq(policy, new_data.min, new_data.min, + new_data.max, CPUFREQ_RELATION_L); WRITE_ONCE(policy->min, new_data.min > policy->max ? policy->max : new_data.min); trace_cpu_frequency_limits(policy); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index a7c38b8b3e7890..0e65d37c923113 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -76,7 +76,8 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, return freq_next; } - index = cpufreq_frequency_table_target(policy, freq_next, relation); + index = cpufreq_frequency_table_target(policy, freq_next, policy->min, + policy->max, relation); freq_req = freq_table[index].frequency; freq_reduc = freq_req * od_tuners->powersave_bias / 1000; freq_avg = freq_req - freq_reduc; diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index c03a91502f8480..35de513af6c94e 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -115,8 +115,8 @@ int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy) EXPORT_SYMBOL_GPL(cpufreq_generic_frequency_table_verify); int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) + unsigned int target_freq, unsigned int min, + unsigned int max, unsigned int relation) { struct cpufreq_frequency_table optimal = { .driver_data = ~0, @@ -147,7 +147,7 @@ int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, cpufreq_for_each_valid_entry_idx(pos, table, i) { freq = pos->frequency; - if ((freq < policy->min) || (freq > policy->max)) + if (freq < min || freq > max) continue; if (freq == target_freq) { optimal.driver_data = i; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 400fee6427a5c0..7a5b391dcc0176 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -776,8 +776,8 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy_data *policy, int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy); int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation); + unsigned int target_freq, unsigned int min, + unsigned int max, unsigned int relation); int cpufreq_frequency_table_get_index(struct cpufreq_policy *policy, unsigned int freq); @@ -840,12 +840,12 @@ static inline int cpufreq_table_find_index_dl(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_l(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_al(policy, target_freq, @@ -855,6 +855,14 @@ static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, efficiencies); } +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_l(policy, target_freq, policy->min, policy->max, efficiencies); +} + /* Find highest freq at or below target in a table in ascending order */ static inline int cpufreq_table_find_index_ah(struct cpufreq_policy *policy, unsigned int target_freq, @@ -908,12 +916,12 @@ static inline int cpufreq_table_find_index_dh(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_h(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_ah(policy, target_freq, @@ -923,6 +931,14 @@ static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, efficiencies); } +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_h(policy, target_freq, policy->min, policy->max, efficiencies); +} + /* Find closest freq to target in a table in ascending order */ static inline int cpufreq_table_find_index_ac(struct cpufreq_policy *policy, unsigned int target_freq, @@ -993,12 +1009,12 @@ static inline int cpufreq_table_find_index_dc(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_c(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_ac(policy, target_freq, @@ -1008,7 +1024,17 @@ static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, efficiencies); } -static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx) +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_c(policy, target_freq, policy->min, policy->max, efficiencies); +} + +static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, + unsigned int min, unsigned int max, + int idx) { unsigned int freq; @@ -1017,11 +1043,13 @@ static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx) freq = policy->freq_table[idx].frequency; - return freq == clamp_val(freq, policy->min, policy->max); + return freq == clamp_val(freq, min, max); } static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, unsigned int target_freq, + unsigned int min, + unsigned int max, unsigned int relation) { bool efficiencies = policy->efficiencies_available && @@ -1032,29 +1060,26 @@ static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, relation &= ~CPUFREQ_RELATION_E; if (unlikely(policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED)) - return cpufreq_table_index_unsorted(policy, target_freq, - relation); + return cpufreq_table_index_unsorted(policy, target_freq, min, + max, relation); retry: switch (relation) { case CPUFREQ_RELATION_L: - idx = cpufreq_table_find_index_l(policy, target_freq, - efficiencies); + idx = find_index_l(policy, target_freq, min, max, efficiencies); break; case CPUFREQ_RELATION_H: - idx = cpufreq_table_find_index_h(policy, target_freq, - efficiencies); + idx = find_index_h(policy, target_freq, min, max, efficiencies); break; case CPUFREQ_RELATION_C: - idx = cpufreq_table_find_index_c(policy, target_freq, - efficiencies); + idx = find_index_c(policy, target_freq, min, max, efficiencies); break; default: WARN_ON_ONCE(1); return 0; } - /* Limit frequency index to honor policy->min/max */ - if (!cpufreq_is_in_limits(policy, idx) && efficiencies) { + /* Limit frequency index to honor min and max */ + if (!cpufreq_is_in_limits(policy, min, max, idx) && efficiencies) { efficiencies = false; goto retry; } From fa7ab64f1e2fdc8f2603aab8e0dd20de89cb10d9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 21 Apr 2025 14:43:34 -0400 Subject: [PATCH 1243/2924] NFS/localio: Fix a race in nfs_local_open_fh() Once the clp->cl_uuid.lock has been dropped, another CPU could come in and free the struct nfsd_file that was just added. To prevent that from happening, take the RCU read lock before dropping the spin lock. Fixes: 86e00412254a ("nfs: cache all open LOCALIO nfsd_file(s) in client") Signed-off-by: Trond Myklebust Reviewed-by: Mike Snitzer --- fs/nfs/localio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index 5c21caeae075c7..4ec952f9f47dde 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -278,6 +278,7 @@ nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred, new = __nfs_local_open_fh(clp, cred, fh, nfl, mode); if (IS_ERR(new)) return NULL; + rcu_read_lock(); /* try to swap in the pointer */ spin_lock(&clp->cl_uuid.lock); nf = rcu_dereference_protected(*pnf, 1); @@ -287,7 +288,6 @@ nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred, rcu_assign_pointer(*pnf, nf); } spin_unlock(&clp->cl_uuid.lock); - rcu_read_lock(); } nf = nfs_local_file_get(nf); rcu_read_unlock(); From 5b1834d6202f86180e451ad1a2a8a193a1da18fc Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 18 Apr 2025 17:25:12 +0100 Subject: [PATCH 1244/2924] drm/fdinfo: Protect against driver unbind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we unbind a driver from the PCI device with an active DRM client, subsequent read of the fdinfo data associated with the file descriptor in question will not end well. Protect the path with a drm_dev_enter/exit() pair. Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Lucas De Marchi Cc: Rodrigo Vivi Cc: Umesh Nerlige Ramappa Reviewed-by: Christian König Fixes: 3f09a0cd4ea3 ("drm: Add common fdinfo helper") Cc: # v6.5+ Signed-off-by: Christian König Link: https://lore.kernel.org/r/20250418162512.72324-1-tvrtko.ursulin@igalia.com --- drivers/gpu/drm/drm_file.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index c299cd94d3f78f..cf2463090d3acc 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -964,6 +964,10 @@ void drm_show_fdinfo(struct seq_file *m, struct file *f) struct drm_file *file = f->private_data; struct drm_device *dev = file->minor->dev; struct drm_printer p = drm_seq_file_printer(m); + int idx; + + if (!drm_dev_enter(dev, &idx)) + return; drm_printf(&p, "drm-driver:\t%s\n", dev->driver->name); drm_printf(&p, "drm-client-id:\t%llu\n", file->client_id); @@ -983,6 +987,8 @@ void drm_show_fdinfo(struct seq_file *m, struct file *f) if (dev->driver->show_fdinfo) dev->driver->show_fdinfo(&p, file); + + drm_dev_exit(idx); } EXPORT_SYMBOL(drm_show_fdinfo); From 20a6cff3b283f0601048ace87ad1bc89627e36f2 Mon Sep 17 00:00:00 2001 From: Yan Zhao Date: Tue, 18 Mar 2025 09:33:33 +0800 Subject: [PATCH 1245/2924] KVM: x86/mmu: Check and free obsolete roots in kvm_mmu_reload() Check request KVM_REQ_MMU_FREE_OBSOLETE_ROOTS to free obsolete roots in kvm_mmu_reload() to prevent kvm_mmu_reload() from seeing a stale obsolete root. Since kvm_mmu_reload() can be called outside the vcpu_enter_guest() path (e.g., kvm_arch_vcpu_pre_fault_memory()), it may be invoked after a root has been marked obsolete and before vcpu_enter_guest() is invoked to process KVM_REQ_MMU_FREE_OBSOLETE_ROOTS and set root.hpa to invalid. This causes kvm_mmu_reload() to fail to load a new root, which can lead to kvm_arch_vcpu_pre_fault_memory() being stuck in the while loop in kvm_tdp_map_page() since RET_PF_RETRY is always returned due to is_page_fault_stale(). Keep the existing check of KVM_REQ_MMU_FREE_OBSOLETE_ROOTS in vcpu_enter_guest() since the cost of kvm_check_request() is negligible, especially a check that's guarded by kvm_request_pending(). Export symbol of kvm_mmu_free_obsolete_roots() as kvm_mmu_reload() is inline and may be called outside of kvm.ko. Fixes: 6e01b7601dfe ("KVM: x86: Implement kvm_arch_vcpu_pre_fault_memory()") Suggested-by: Sean Christopherson Signed-off-by: Yan Zhao Link: https://lore.kernel.org/r/20250318013333.5817-1-yan.y.zhao@intel.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu.h | 3 +++ arch/x86/kvm/mmu/mmu.c | 1 + 2 files changed, 4 insertions(+) diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 050a0e229a4d17..f2b36d32ef40e5 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -104,6 +104,9 @@ void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) { + if (kvm_check_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu)) + kvm_mmu_free_obsolete_roots(vcpu); + /* * Checking root.hpa is sufficient even when KVM has mirror root. * We can have either: diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 63bb77ee1bb16b..387464ebe8e8df 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5974,6 +5974,7 @@ void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu) __kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.root_mmu); __kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.guest_mmu); } +EXPORT_SYMBOL_GPL(kvm_mmu_free_obsolete_roots); static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, int *bytes) From bc43f7114a0e8173968085b21535d57b8030d571 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Mon, 28 Apr 2025 13:37:13 +0200 Subject: [PATCH 1246/2924] drm: adp: Use spin_lock_irqsave for drm device event_lock The lock is used in the interrupt handler so use spin_lock_irqsave to disable interrupts and avoid deadlocks with the irq handler. Fixes: 332122eba628 ("drm: adp: Add Apple Display Pipe driver") Reviewed-by: Alyssa Rosenzweig Signed-off-by: Janne Grunau Link: https://lore.kernel.org/r/20250428-drm_adp_fixes-v2-1-912e081e55d8@jannau.net Signed-off-by: Alyssa Rosenzweig --- drivers/gpu/drm/adp/adp_drv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/adp/adp_drv.c b/drivers/gpu/drm/adp/adp_drv.c index c98c647f981d53..157298a8ff42b9 100644 --- a/drivers/gpu/drm/adp/adp_drv.c +++ b/drivers/gpu/drm/adp/adp_drv.c @@ -310,6 +310,7 @@ static void adp_crtc_atomic_flush(struct drm_crtc *crtc, struct drm_atomic_state *state) { u32 frame_num = 1; + unsigned long flags; struct adp_drv_private *adp = crtc_to_adp(crtc); struct drm_crtc_state *new_state = drm_atomic_get_new_crtc_state(state, crtc); u64 new_size = ALIGN(new_state->mode.hdisplay * @@ -330,13 +331,13 @@ static void adp_crtc_atomic_flush(struct drm_crtc *crtc, } writel(ADBE_FIFO_SYNC | frame_num, adp->be + ADBE_FIFO); //FIXME: use adbe flush interrupt - spin_lock_irq(&crtc->dev->event_lock); + spin_lock_irqsave(&crtc->dev->event_lock, flags); if (crtc->state->event) { drm_crtc_vblank_get(crtc); adp->event = crtc->state->event; } crtc->state->event = NULL; - spin_unlock_irq(&crtc->dev->event_lock); + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); } static const struct drm_crtc_funcs adp_crtc_funcs = { From 7a7d6681d5adde7dc7e648dcc6b9e9be6ca93d5d Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Mon, 28 Apr 2025 13:37:14 +0200 Subject: [PATCH 1247/2924] drm: adp: Handle drm_crtc_vblank_get() errors drm_crtc_vblank_get() may fail when it's called before drm_crtc_vblank_on() on a resetted CRTC. This occurs in drm_crtc_helper_funcs' atomic_flush() calls after drm_atomic_helper_crtc_reset() for example directly after probe. Send the vblank event directly in such cases. Avoids following warning in the subsequent drm_crtc_vblank_put() call from the vblank irq handler as below: adp 228200000.display-pipe: [drm] drm_WARN_ON(atomic_read(&vblank->refcount) == 0) WARNING: CPU: 5 PID: 1206 at drivers/gpu/drm/drm_vblank.c:1247 drm_vblank_put+0x158/0x170 ... Call trace: drm_vblank_put+0x158/0x170 (P) drm_crtc_vblank_put+0x24/0x38 adp_fe_irq+0xd8/0xe8 [adpdrm] __handle_irq_event_percpu+0x94/0x318 handle_irq_event+0x54/0xd0 handle_fasteoi_irq+0xa8/0x240 handle_irq_desc+0x3c/0x68 generic_handle_domain_irq+0x24/0x40 Modifying `crtc->state->event` here is fine as crtc->mutex is locked by the non-async atomic commit. In retrospect this looks so obvious that it doesn't warrant a comment in the file. Signed-off-by: Janne Grunau Reviewed-by: Alyssa Rosenzweig Link: https://lore.kernel.org/r/20250428-drm_adp_fixes-v2-2-912e081e55d8@jannau.net Signed-off-by: Alyssa Rosenzweig --- drivers/gpu/drm/adp/adp_drv.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/adp/adp_drv.c b/drivers/gpu/drm/adp/adp_drv.c index 157298a8ff42b9..bdf27ee742ea01 100644 --- a/drivers/gpu/drm/adp/adp_drv.c +++ b/drivers/gpu/drm/adp/adp_drv.c @@ -331,13 +331,19 @@ static void adp_crtc_atomic_flush(struct drm_crtc *crtc, } writel(ADBE_FIFO_SYNC | frame_num, adp->be + ADBE_FIFO); //FIXME: use adbe flush interrupt - spin_lock_irqsave(&crtc->dev->event_lock, flags); if (crtc->state->event) { - drm_crtc_vblank_get(crtc); - adp->event = crtc->state->event; + struct drm_pending_vblank_event *event = crtc->state->event; + + crtc->state->event = NULL; + spin_lock_irqsave(&crtc->dev->event_lock, flags); + + if (drm_crtc_vblank_get(crtc) != 0) + drm_crtc_send_vblank_event(crtc, event); + else + adp->event = event; + + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); } - crtc->state->event = NULL; - spin_unlock_irqrestore(&crtc->dev->event_lock, flags); } static const struct drm_crtc_funcs adp_crtc_funcs = { From c082a52125d9007b488d590c412fd126aa78c345 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Mon, 28 Apr 2025 13:37:15 +0200 Subject: [PATCH 1248/2924] drm: adp: Enable vblank interrupts in crtc's .atomic_enable Calling drm_crtc_vblank_on() drm_crtc_helper_funcs' atomic_enable is expected to enable vblank interrupts. It may have been avoided here to due to drm_crtc_vblank_get()'s error behavior after drm_crtc_vblank_reset(). With that fixed in the preceding change the driver can call drm_crtc_vblank_on() from adp_crtc_atomic_enable(). Reviewed-by: Alyssa Rosenzweig Signed-off-by: Janne Grunau Link: https://lore.kernel.org/r/20250428-drm_adp_fixes-v2-3-912e081e55d8@jannau.net Signed-off-by: Alyssa Rosenzweig --- drivers/gpu/drm/adp/adp_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/adp/adp_drv.c b/drivers/gpu/drm/adp/adp_drv.c index bdf27ee742ea01..50d26c73301c02 100644 --- a/drivers/gpu/drm/adp/adp_drv.c +++ b/drivers/gpu/drm/adp/adp_drv.c @@ -288,6 +288,7 @@ static void adp_crtc_atomic_enable(struct drm_crtc *crtc, writel(BIT(0), adp->be + ADBE_BLEND_EN3); writel(BIT(0), adp->be + ADBE_BLEND_BYPASS); writel(BIT(0), adp->be + ADBE_BLEND_EN4); + drm_crtc_vblank_on(crtc); } static void adp_crtc_atomic_disable(struct drm_crtc *crtc, @@ -519,8 +520,7 @@ static int adp_drm_bind(struct device *dev) struct adp_drv_private *adp = to_adp(drm); int err; - adp_disable_vblank(adp); - writel(ADP_CTRL_FIFO_ON | ADP_CTRL_VBLANK_ON, adp->fe + ADP_CTRL); + writel(ADP_CTRL_FIFO_ON, adp->fe + ADP_CTRL); adp->next_bridge = drmm_of_get_bridge(&adp->drm, dev->of_node, 0, 0); if (IS_ERR(adp->next_bridge)) { From 8f6dfc4d7037e88cc0a4be4f290829946999341f Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Mon, 28 Apr 2025 13:37:16 +0200 Subject: [PATCH 1249/2924] drm: adp: Remove pointless irq_lock spin lock Interrupt handlers run with interrupts disabled so it is not necessary to protect them against reentrancy. Reviewed-by: Alyssa Rosenzweig Signed-off-by: Janne Grunau Link: https://lore.kernel.org/r/20250428-drm_adp_fixes-v2-4-912e081e55d8@jannau.net Signed-off-by: Alyssa Rosenzweig --- drivers/gpu/drm/adp/adp_drv.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/adp/adp_drv.c b/drivers/gpu/drm/adp/adp_drv.c index 50d26c73301c02..54cde090c3f42a 100644 --- a/drivers/gpu/drm/adp/adp_drv.c +++ b/drivers/gpu/drm/adp/adp_drv.c @@ -121,7 +121,6 @@ struct adp_drv_private { dma_addr_t mask_iova; int be_irq; int fe_irq; - spinlock_t irq_lock; struct drm_pending_vblank_event *event; }; @@ -490,8 +489,6 @@ static irqreturn_t adp_fe_irq(int irq, void *arg) u32 int_status; u32 int_ctl; - spin_lock(&adp->irq_lock); - int_status = readl(adp->fe + ADP_INT_STATUS); if (int_status & ADP_INT_STATUS_VBLANK) { drm_crtc_handle_vblank(&adp->crtc); @@ -509,7 +506,6 @@ static irqreturn_t adp_fe_irq(int irq, void *arg) writel(int_status, adp->fe + ADP_INT_STATUS); - spin_unlock(&adp->irq_lock); return IRQ_HANDLED; } @@ -574,8 +570,6 @@ static int adp_probe(struct platform_device *pdev) if (IS_ERR(adp)) return PTR_ERR(adp); - spin_lock_init(&adp->irq_lock); - dev_set_drvdata(&pdev->dev, &adp->drm); err = adp_parse_of(pdev, adp); From 32dce6b1949a696dc7abddc04de8cbe35c260217 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Tue, 4 Mar 2025 20:12:14 +0100 Subject: [PATCH 1250/2924] drm: Select DRM_KMS_HELPER from DRM_DEBUG_DP_MST_TOPOLOGY_REFS Using "depends on" and "select" for the same Kconfig symbol is known to cause circular dependencies (cmp. "Kconfig recursive dependency limitations" in Documentation/kbuild/kconfig-language.rst. DRM drivers are selecting drm helpers so do the same for DRM_DEBUG_DP_MST_TOPOLOGY_REFS. Fixes following circular dependency reported on x86 for the downstream Asahi Linux tree: error: recursive dependency detected! symbol DRM_KMS_HELPER is selected by DRM_GEM_SHMEM_HELPER symbol DRM_GEM_SHMEM_HELPER is selected by RUST_DRM_GEM_SHMEM_HELPER symbol RUST_DRM_GEM_SHMEM_HELPER is selected by DRM_ASAHI symbol DRM_ASAHI depends on RUST symbol RUST depends on CALL_PADDING symbol CALL_PADDING depends on OBJTOOL symbol OBJTOOL is selected by STACK_VALIDATION symbol STACK_VALIDATION depends on UNWINDER_FRAME_POINTER symbol UNWINDER_FRAME_POINTER is part of choice block at arch/x86/Kconfig.debug:224 symbol unknown is visible depending on UNWINDER_GUESS symbol UNWINDER_GUESS prompt is visible depending on STACKDEPOT symbol STACKDEPOT is selected by DRM_DEBUG_DP_MST_TOPOLOGY_REFS symbol DRM_DEBUG_DP_MST_TOPOLOGY_REFS depends on DRM_KMS_HELPER Fixes: 12a280c72868 ("drm/dp_mst: Add topology ref history tracking for debugging") Cc: stable@vger.kernel.org Signed-off-by: Janne Grunau Acked-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20250304-drm_debug_dp_mst_topo_kconfig-v1-1-e16fd152f258@jannau.net Signed-off-by: Alyssa Rosenzweig --- drivers/gpu/drm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 2cba2b6ebe1c11..f01925ed8176b5 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -188,7 +188,7 @@ config DRM_DEBUG_DP_MST_TOPOLOGY_REFS bool "Enable refcount backtrace history in the DP MST helpers" depends on STACKTRACE_SUPPORT select STACKDEPOT - depends on DRM_KMS_HELPER + select DRM_KMS_HELPER depends on DEBUG_KERNEL depends on EXPERT help From 9a046c1d21f0ae14c73b5e106e5a501dd902b6a9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 9 Apr 2025 14:22:55 +0200 Subject: [PATCH 1251/2924] Input: stmpe-ts - use module alias instead of device table When compile tested with W=1 on x86_64 with driver as built-in: stmpe-ts.c:371:34: error: unused variable 'stmpe_ts_ids' [-Werror,-Wunused-const-variable] Ideally this would be referenced from the platform_driver, but since the compatible string is already matched by the mfd driver for its parent device, that would break probing. In this case, the of_device_id table just serves as a module alias for loading the driver, while the device itself is probed using the platform device name. Remove the table and instead use a module alias that reflects how the driver is actually probed. Link: https://lore.kernel.org/lkml/20240403080702.3509288-8-arnd@kernel.org/ Link: https://lore.kernel.org/lkml/181dbdb8-c050-4966-8cb4-2f39495ff3f9@app.fastmail.com/ Signed-off-by: Arnd Bergmann Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250409122314.2848028-3-arnd@kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/stmpe-ts.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/input/touchscreen/stmpe-ts.c b/drivers/input/touchscreen/stmpe-ts.c index a94a1997f96b7d..af0fb38bcfdcd2 100644 --- a/drivers/input/touchscreen/stmpe-ts.c +++ b/drivers/input/touchscreen/stmpe-ts.c @@ -366,12 +366,7 @@ static struct platform_driver stmpe_ts_driver = { }; module_platform_driver(stmpe_ts_driver); -static const struct of_device_id stmpe_ts_ids[] = { - { .compatible = "st,stmpe-ts", }, - { }, -}; -MODULE_DEVICE_TABLE(of, stmpe_ts_ids); - +MODULE_ALIAS("platform:stmpe-ts"); MODULE_AUTHOR("Luotao Fu "); MODULE_DESCRIPTION("STMPEXXX touchscreen driver"); MODULE_LICENSE("GPL"); From b8ac485a179d8819ae291afbf13a2fd89d76d4f3 Mon Sep 17 00:00:00 2001 From: Mattijs Korpershoek Date: Mon, 28 Apr 2025 10:35:13 +0200 Subject: [PATCH 1252/2924] dt-bindings: mediatek,mt6779-keypad: Update Mattijs' email address Update Mattijs Korpershoek's email address to @kernel.org. Signed-off-by: Mattijs Korpershoek Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250428-keypad-email-v1-1-dde6ac76725b@kernel.org Signed-off-by: Dmitry Torokhov --- .../devicetree/bindings/input/mediatek,mt6779-keypad.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml b/Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml index 517a4ac1bea3df..e365413732e7b9 100644 --- a/Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml +++ b/Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Mediatek's Keypad Controller maintainers: - - Mattijs Korpershoek + - Mattijs Korpershoek allOf: - $ref: /schemas/input/matrix-keymap.yaml# From 6a10a2f1e0502c1f23e3095291c985d9bd8c8488 Mon Sep 17 00:00:00 2001 From: Mattijs Korpershoek Date: Mon, 28 Apr 2025 10:40:17 +0200 Subject: [PATCH 1253/2924] MAINTAINERS: .mailmap: update Mattijs Korpershoek's email address Update Mattijs Korpershoek's email address to @kernel.org. Signed-off-by: Mattijs Korpershoek Link: https://lore.kernel.org/r/20250428-keypad-maintainers-v1-1-4e9c4afba415@kernel.org Signed-off-by: Dmitry Torokhov --- .mailmap | 1 + MAINTAINERS | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index a897c16d3baef9..86e8c8e55ed992 100644 --- a/.mailmap +++ b/.mailmap @@ -472,6 +472,7 @@ Matthias Fuchs Matthieu Baerts Matthieu CASTET Matti Vaittinen +Mattijs Korpershoek Matt Ranostay Matt Ranostay Matt Ranostay Matthew Ranostay diff --git a/MAINTAINERS b/MAINTAINERS index 82eb3de2d76c72..765219b393dba8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14744,7 +14744,7 @@ F: Documentation/devicetree/bindings/media/mediatek-jpeg-*.yaml F: drivers/media/platform/mediatek/jpeg/ MEDIATEK KEYPAD DRIVER -M: Mattijs Korpershoek +M: Mattijs Korpershoek S: Supported F: Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml F: drivers/input/keyboard/mt6779-keypad.c From 7675b5efd81fe6d524e29d5a541f43201e98afa8 Mon Sep 17 00:00:00 2001 From: Mikael Gonella-Bolduc Date: Wed, 23 Apr 2025 09:52:43 -0400 Subject: [PATCH 1254/2924] Input: cyttsp5 - fix power control issue on wakeup The power control function ignores the "on" argument when setting the report ID, and thus is always sending HID_POWER_SLEEP. This causes a problem when trying to wakeup. Fix by sending the state variable, which contains the proper HID_POWER_ON or HID_POWER_SLEEP based on the "on" argument. Fixes: 3c98b8dbdced ("Input: cyttsp5 - implement proper sleep and wakeup procedures") Cc: stable@vger.kernel.org Signed-off-by: Mikael Gonella-Bolduc Signed-off-by: Hugo Villeneuve Reviewed-by: Alistair Francis Link: https://lore.kernel.org/r/20250423135243.1261460-1-hugo@hugovil.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/cyttsp5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/cyttsp5.c b/drivers/input/touchscreen/cyttsp5.c index 14c43f0a6c217d..071b7c9bf566eb 100644 --- a/drivers/input/touchscreen/cyttsp5.c +++ b/drivers/input/touchscreen/cyttsp5.c @@ -580,7 +580,7 @@ static int cyttsp5_power_control(struct cyttsp5 *ts, bool on) int rc; SET_CMD_REPORT_TYPE(cmd[0], 0); - SET_CMD_REPORT_ID(cmd[0], HID_POWER_SLEEP); + SET_CMD_REPORT_ID(cmd[0], state); SET_CMD_OPCODE(cmd[1], HID_CMD_SET_POWER); rc = cyttsp5_write(ts, HID_COMMAND_REG, cmd, sizeof(cmd)); From 22cd66a5db56a07d9e621367cb4d16ff0f6baf56 Mon Sep 17 00:00:00 2001 From: Lode Willems Date: Tue, 22 Apr 2025 13:24:27 +0200 Subject: [PATCH 1255/2924] Input: xpad - add support for 8BitDo Ultimate 2 Wireless Controller This patch adds support for the 8BitDo Ultimate 2 Wireless Controller. Tested using the wireless dongle and plugged in. Signed-off-by: Lode Willems Link: https://lore.kernel.org/r/20250422112457.6728-1-me@lodewillems.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index c33e6f33265ba0..c933e47173bd1e 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -387,6 +387,7 @@ static const struct xpad_device { { 0x2dc8, 0x3106, "8BitDo Ultimate Wireless / Pro 2 Wired Controller", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x3109, "8BitDo Ultimate Wireless Bluetooth", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x310a, "8BitDo Ultimate 2C Wireless Controller", 0, XTYPE_XBOX360 }, + { 0x2dc8, 0x310b, "8BitDo Ultimate 2 Wireless Controller", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x6001, "8BitDo SN30 Pro", 0, XTYPE_XBOX360 }, { 0x2e24, 0x0652, "Hyperkin Duke X-Box One pad", 0, XTYPE_XBOXONE }, { 0x2e24, 0x1688, "Hyperkin X91 X-Box One pad", 0, XTYPE_XBOXONE }, From a2f546330ef9f3471ab9dd5f59e9685733b6c0dc Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 26 Apr 2025 20:07:24 -0400 Subject: [PATCH 1256/2924] bcachefs: Fix losing return code in next_fiemap_extent() Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 0f1d61aab90bd5..72b722d80813a0 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1464,8 +1464,8 @@ static int bch2_next_fiemap_extent(struct btree_trans *trans, unsigned sectors = cur->kbuf.k->k.size; s64 offset_into_extent = 0; enum btree_id data_btree = BTREE_ID_extents; - int ret = bch2_read_indirect_extent(trans, &data_btree, &offset_into_extent, - &cur->kbuf); + ret = bch2_read_indirect_extent(trans, &data_btree, &offset_into_extent, + &cur->kbuf); if (ret) goto err; From c83311c5b90d12ea22e847fd9390e2fdb6a34f68 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 26 Apr 2025 11:38:58 -0400 Subject: [PATCH 1257/2924] bcachefs: Use generic_set_sb_d_ops for standard casefolding d_ops Suggested-by: Al Viro Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 11 ++++++++--- fs/bcachefs/namei.c | 3 +++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 72b722d80813a0..113db85b6ef9d0 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -66,6 +66,8 @@ static inline void bch2_inode_flags_to_vfs(struct bch_fs *c, struct bch_inode_in if (bch2_inode_casefold(c, &inode->ei_inode)) inode->v.i_flags |= S_CASEFOLD; + else + inode->v.i_flags &= ~S_CASEFOLD; } void bch2_inode_update_after_write(struct btree_trans *trans, @@ -848,10 +850,8 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, set_nlink(&inode->v, 0); } - if (IS_CASEFOLDED(vdir)) { + if (IS_CASEFOLDED(vdir)) d_invalidate(dentry); - d_prune_aliases(&inode->v); - } err: bch2_trans_put(trans); bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode); @@ -2571,6 +2571,11 @@ static int bch2_fs_get_tree(struct fs_context *fc) if (ret) goto err_put_super; +#ifdef CONFIG_UNICODE + sb->s_encoding = c->cf_encoding; +#endif + generic_set_sb_d_ops(sb); + vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM); ret = PTR_ERR_OR_ZERO(vinode); bch_err_msg(c, ret, "mounting: error getting root inode"); diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c index 46f3c8b100a952..52c58c6d53d243 100644 --- a/fs/bcachefs/namei.c +++ b/fs/bcachefs/namei.c @@ -343,6 +343,9 @@ bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u, bool ret = false; for (id = 0; id < Inode_opt_nr; id++) { + if (!S_ISDIR(dst_u->bi_mode) && id == Inode_opt_casefold) + continue; + /* Skip attributes that were explicitly set on this inode */ if (dst_u->bi_fields_set & (1 << id)) continue; From 70c3d89f49523933365d91010f88206855bc1990 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 26 Apr 2025 12:09:33 -0400 Subject: [PATCH 1258/2924] bcachefs: Emit unicode version message on startup fstests expects this Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index e4ab0595c0aef6..32fccca350ed8e 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -823,25 +823,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) if (ret) goto err; -#ifdef CONFIG_UNICODE - /* Default encoding until we can potentially have more as an option. */ - c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING); - if (IS_ERR(c->cf_encoding)) { - printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u", - unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), - unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), - unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); - ret = -EINVAL; - goto err; - } -#else - if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) { - printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n"); - ret = -EINVAL; - goto err; - } -#endif - pr_uuid(&name, c->sb.user_uuid.b); ret = name.allocation_failure ? -BCH_ERR_ENOMEM_fs_name_alloc : 0; if (ret) @@ -941,6 +922,29 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) if (ret) goto err; +#ifdef CONFIG_UNICODE + /* Default encoding until we can potentially have more as an option. */ + c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING); + if (IS_ERR(c->cf_encoding)) { + printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u", + unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); + ret = -EINVAL; + goto err; + } + bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u", + unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); +#else + if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) { + printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n"); + ret = -EINVAL; + goto err; + } +#endif + for (i = 0; i < c->sb.nr_devices; i++) { if (!bch2_member_exists(c->disk_sb.sb, i)) continue; From bdc32a10a29c3993b3c6c38b21951b66bea525d7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 26 Apr 2025 12:19:47 -0400 Subject: [PATCH 1259/2924] bcachefs: Add missing utf8_unload() Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 32fccca350ed8e..27943082c093c3 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -531,6 +531,10 @@ static void __bch2_fs_free(struct bch_fs *c) for (unsigned i = 0; i < BCH_TIME_STAT_NR; i++) bch2_time_stats_exit(&c->times[i]); +#ifdef CONFIG_UNICODE + utf8_unload(c->cf_encoding); +#endif + bch2_find_btree_nodes_exit(&c->found_btree_nodes); bch2_free_pending_node_rewrites(c); bch2_free_fsck_errs(c); From 3c24020119a5f55ec902b5fdc24d8666d76340b3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 26 Apr 2025 11:05:32 -0400 Subject: [PATCH 1260/2924] bcachefs: Run BCH_RECOVERY_PASS_reconstruct_snapshots on missing subvol -> snapshot Fix this repair path. Signed-off-by: Kent Overstreet --- fs/bcachefs/subvolume.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 5537283d0beaeb..d0209f7658bb87 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -6,6 +6,7 @@ #include "errcode.h" #include "error.h" #include "fs.h" +#include "recovery_passes.h" #include "snapshot.h" #include "subvolume.h" @@ -44,8 +45,8 @@ static int check_subvol(struct btree_trans *trans, ret = bch2_snapshot_lookup(trans, snapid, &snapshot); if (bch2_err_matches(ret, ENOENT)) - bch_err(c, "subvolume %llu points to nonexistent snapshot %u", - k.k->p.offset, snapid); + return bch2_run_explicit_recovery_pass(c, + BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret; if (ret) return ret; From 9e9c28acfdc78292100fdd0e46587bf43a174451 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 26 Apr 2025 11:05:32 -0400 Subject: [PATCH 1261/2924] bcachefs: Add upgrade table entry from 0.14 There are a few errors that needed to be marked as autofix. Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-downgrade.c | 4 ++++ fs/bcachefs/sb-errors_format.h | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index acb5d845841e53..badd0e17ada5a0 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -20,6 +20,10 @@ * x(version, recovery_passes, errors...) */ #define UPGRADE_TABLE() \ + x(snapshot_2, \ + RECOVERY_PASS_ALL_FSCK, \ + BCH_FSCK_ERR_subvol_root_wrong_bi_subvol, \ + BCH_FSCK_ERR_subvol_not_master_and_not_snapshot) \ x(backpointers, \ RECOVERY_PASS_ALL_FSCK) \ x(inode_v3, \ diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index dc53d25c7cbb00..3711aa5009ac35 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -205,9 +205,9 @@ enum bch_fsck_flags { x(snapshot_bad_depth, 184, 0) \ x(snapshot_bad_skiplist, 185, 0) \ x(subvol_pos_bad, 186, 0) \ - x(subvol_not_master_and_not_snapshot, 187, 0) \ + x(subvol_not_master_and_not_snapshot, 187, FSCK_AUTOFIX) \ x(subvol_to_missing_root, 188, 0) \ - x(subvol_root_wrong_bi_subvol, 189, 0) \ + x(subvol_root_wrong_bi_subvol, 189, FSCK_AUTOFIX) \ x(bkey_in_missing_snapshot, 190, 0) \ x(inode_pos_inode_nonzero, 191, 0) \ x(inode_pos_blockdev_range, 192, 0) \ From 002466446abae31a15e8b89adb54ee08653eccd1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 28 Apr 2025 12:09:53 -0400 Subject: [PATCH 1262/2924] bcachefs: fix bch2_dev_buckets_resize() The resize memcpy path was totally busted. Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 4ef261e8db4f5c..e1efae43982a87 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1307,13 +1307,11 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1); if (resize) { - bucket_gens->nbuckets = min(bucket_gens->nbuckets, - old_bucket_gens->nbuckets); - bucket_gens->nbuckets_minus_first = - bucket_gens->nbuckets - bucket_gens->first_bucket; + u64 copy = min(bucket_gens->nbuckets, + old_bucket_gens->nbuckets); memcpy(bucket_gens->b, old_bucket_gens->b, - bucket_gens->nbuckets); + sizeof(bucket_gens->b[0]) * copy); } rcu_assign_pointer(ca->bucket_gens, bucket_gens); From e7f1a52849a01c63c796a9dfe6697d05fff23324 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 28 Apr 2025 12:01:51 -0400 Subject: [PATCH 1263/2924] bcachefs: Improve bch2_dev_bucket_missing() More useful error message. Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-members.c | 6 ++++-- fs/bcachefs/sb-members.h | 13 ++++++++----- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 116131f95815f3..72779912939b6f 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -15,9 +15,11 @@ void bch2_dev_missing(struct bch_fs *c, unsigned dev) bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev); } -void bch2_dev_bucket_missing(struct bch_fs *c, struct bpos bucket) +void bch2_dev_bucket_missing(struct bch_dev *ca, u64 bucket) { - bch2_fs_inconsistent(c, "pointer to nonexistent bucket %llu:%llu", bucket.inode, bucket.offset); + bch2_fs_inconsistent(ca->fs, + "pointer to nonexistent bucket %llu on device %s (valid range %u-%llu)", + bucket, ca->name, ca->mi.first_bucket, ca->mi.nbuckets); } #define x(t, n, ...) [n] = #t, diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index 06bb41a3f3605b..42786657522ce1 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -249,20 +249,23 @@ static inline struct bch_dev *bch2_dev_tryget(struct bch_fs *c, unsigned dev) static inline struct bch_dev *bch2_dev_bucket_tryget_noerror(struct bch_fs *c, struct bpos bucket) { struct bch_dev *ca = bch2_dev_tryget_noerror(c, bucket.inode); - if (ca && !bucket_valid(ca, bucket.offset)) { + if (ca && unlikely(!bucket_valid(ca, bucket.offset))) { bch2_dev_put(ca); ca = NULL; } return ca; } -void bch2_dev_bucket_missing(struct bch_fs *, struct bpos); +void bch2_dev_bucket_missing(struct bch_dev *, u64); static inline struct bch_dev *bch2_dev_bucket_tryget(struct bch_fs *c, struct bpos bucket) { - struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(c, bucket); - if (!ca) - bch2_dev_bucket_missing(c, bucket); + struct bch_dev *ca = bch2_dev_tryget(c, bucket.inode); + if (ca && unlikely(!bucket_valid(ca, bucket.offset))) { + bch2_dev_bucket_missing(ca, bucket.offset); + bch2_dev_put(ca); + ca = NULL; + } return ca; } From eca5b56ccfdf583a8781503646fb39554f8624bd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 28 Apr 2025 12:11:31 -0400 Subject: [PATCH 1264/2924] bcachefs: Don't generate alloc updates to invalid buckets Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index e1efae43982a87..31fbc2716d8bf5 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -604,6 +604,13 @@ static int bch2_trigger_pointer(struct btree_trans *trans, } struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr); + if (!bucket_valid(ca, bucket.offset)) { + if (insert) { + bch2_dev_bucket_missing(ca, bucket.offset); + ret = -BCH_ERR_trigger_pointer; + } + goto err; + } if (flags & BTREE_TRIGGER_transactional) { struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0); From c366b1672d74cb008974f6e36e34dc191621f3bb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 26 Apr 2025 09:31:23 -0400 Subject: [PATCH 1265/2924] bcachefs: btree_node_data_missing is now autofix Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-errors_format.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 3711aa5009ac35..a4ad5924107b23 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -46,7 +46,7 @@ enum bch_fsck_flags { x(btree_node_unsupported_version, 34, 0) \ x(btree_node_bset_older_than_sb_min, 35, 0) \ x(btree_node_bset_newer_than_sb, 36, 0) \ - x(btree_node_data_missing, 37, 0) \ + x(btree_node_data_missing, 37, FSCK_AUTOFIX) \ x(btree_node_bset_after_end, 38, 0) \ x(btree_node_replicas_sectors_written_mismatch, 39, 0) \ x(btree_node_replicas_data_mismatch, 40, 0) \ From f04dd30f1bef1ed2e74a4050af6e5e5e3869bac3 Mon Sep 17 00:00:00 2001 From: Vishal Badole Date: Thu, 24 Apr 2025 18:32:48 +0530 Subject: [PATCH 1266/2924] amd-xgbe: Fix to ensure dependent features are toggled with RX checksum offload According to the XGMAC specification, enabling features such as Layer 3 and Layer 4 Packet Filtering, Split Header and Virtualized Network support automatically selects the IPC Full Checksum Offload Engine on the receive side. When RX checksum offload is disabled, these dependent features must also be disabled to prevent abnormal behavior caused by mismatched feature dependencies. Ensure that toggling RX checksum offload (disabling or enabling) properly disables or enables all dependent features, maintaining consistent and expected behavior in the network device. Cc: stable@vger.kernel.org Fixes: 1a510ccf5869 ("amd-xgbe: Add support for VXLAN offload capabilities") Signed-off-by: Vishal Badole Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250424130248.428865-1-Vishal.Badole@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/xgbe/xgbe-desc.c | 9 +++++++-- drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 24 +++++++++++++++++++++-- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 11 +++++++++-- drivers/net/ethernet/amd/xgbe/xgbe.h | 4 ++++ 4 files changed, 42 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c index 230726d7b74f63..d41b58fad37bbf 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c @@ -373,8 +373,13 @@ static int xgbe_map_rx_buffer(struct xgbe_prv_data *pdata, } /* Set up the header page info */ - xgbe_set_buffer_data(&rdata->rx.hdr, &ring->rx_hdr_pa, - XGBE_SKB_ALLOC_SIZE); + if (pdata->netdev->features & NETIF_F_RXCSUM) { + xgbe_set_buffer_data(&rdata->rx.hdr, &ring->rx_hdr_pa, + XGBE_SKB_ALLOC_SIZE); + } else { + xgbe_set_buffer_data(&rdata->rx.hdr, &ring->rx_hdr_pa, + pdata->rx_buf_size); + } /* Set up the buffer page info */ xgbe_set_buffer_data(&rdata->rx.buf, &ring->rx_buf_pa, diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index f393228d41c7be..f1b0fb02b3cd14 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -320,6 +320,18 @@ static void xgbe_config_sph_mode(struct xgbe_prv_data *pdata) XGMAC_IOWRITE_BITS(pdata, MAC_RCR, HDSMS, XGBE_SPH_HDSMS_SIZE); } +static void xgbe_disable_sph_mode(struct xgbe_prv_data *pdata) +{ + unsigned int i; + + for (i = 0; i < pdata->channel_count; i++) { + if (!pdata->channel[i]->rx_ring) + break; + + XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_CR, SPH, 0); + } +} + static int xgbe_write_rss_reg(struct xgbe_prv_data *pdata, unsigned int type, unsigned int index, unsigned int val) { @@ -3545,8 +3557,12 @@ static int xgbe_init(struct xgbe_prv_data *pdata) xgbe_config_tx_coalesce(pdata); xgbe_config_rx_buffer_size(pdata); xgbe_config_tso_mode(pdata); - xgbe_config_sph_mode(pdata); - xgbe_config_rss(pdata); + + if (pdata->netdev->features & NETIF_F_RXCSUM) { + xgbe_config_sph_mode(pdata); + xgbe_config_rss(pdata); + } + desc_if->wrapper_tx_desc_init(pdata); desc_if->wrapper_rx_desc_init(pdata); xgbe_enable_dma_interrupts(pdata); @@ -3702,5 +3718,9 @@ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if) hw_if->disable_vxlan = xgbe_disable_vxlan; hw_if->set_vxlan_id = xgbe_set_vxlan_id; + /* For Split Header*/ + hw_if->enable_sph = xgbe_config_sph_mode; + hw_if->disable_sph = xgbe_disable_sph_mode; + DBGPR("<--xgbe_init_function_ptrs\n"); } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index d84a310dfcd40e..8e09ad8fa022a3 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -2257,10 +2257,17 @@ static int xgbe_set_features(struct net_device *netdev, if (ret) return ret; - if ((features & NETIF_F_RXCSUM) && !rxcsum) + if ((features & NETIF_F_RXCSUM) && !rxcsum) { + hw_if->enable_sph(pdata); + hw_if->enable_vxlan(pdata); hw_if->enable_rx_csum(pdata); - else if (!(features & NETIF_F_RXCSUM) && rxcsum) + schedule_work(&pdata->restart_work); + } else if (!(features & NETIF_F_RXCSUM) && rxcsum) { + hw_if->disable_sph(pdata); + hw_if->disable_vxlan(pdata); hw_if->disable_rx_csum(pdata); + schedule_work(&pdata->restart_work); + } if ((features & NETIF_F_HW_VLAN_CTAG_RX) && !rxvlan) hw_if->enable_rx_vlan_stripping(pdata); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index d85386cac8d166..ed5d43c16d0e23 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -865,6 +865,10 @@ struct xgbe_hw_if { void (*enable_vxlan)(struct xgbe_prv_data *); void (*disable_vxlan)(struct xgbe_prv_data *); void (*set_vxlan_id)(struct xgbe_prv_data *); + + /* For Split Header */ + void (*enable_sph)(struct xgbe_prv_data *pdata); + void (*disable_sph)(struct xgbe_prv_data *pdata); }; /* This structure represents implementation specific routines for an From 8c47d5753a119f1c986bc3ed92e9178d2624e1e8 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Fri, 25 Apr 2025 05:29:53 +0100 Subject: [PATCH 1267/2924] net: ethernet: mtk_eth_soc: sync mtk_clks_source_name array When removing the clock bits for clocks which aren't used by the Ethernet driver their names should also have been removed from the mtk_clks_source_name array. Remove them now as enum mtk_clks_map needs to match the mtk_clks_source_name array so the driver can make sure that all required clocks are present and correctly name missing clocks. Fixes: 887b1d1adb2e ("net: ethernet: mtk_eth_soc: drop clocks unused by Ethernet driver") Signed-off-by: Daniel Golle Reviewed-by: Simon Horman Link: https://patch.msgid.link/d075e706ff1cebc07f9ec666736d0b32782fd487.1745555321.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 47807b20231042..83068925c589d9 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -269,12 +269,8 @@ static const char * const mtk_clks_source_name[] = { "ethwarp_wocpu2", "ethwarp_wocpu1", "ethwarp_wocpu0", - "top_usxgmii0_sel", - "top_usxgmii1_sel", "top_sgm0_sel", "top_sgm1_sel", - "top_xfi_phy0_xtal_sel", - "top_xfi_phy1_xtal_sel", "top_eth_gmii_sel", "top_eth_refck_50m_sel", "top_eth_sys_200m_sel", From 10c34b7d71a4ff8c06d926f1846edf8295ed75bf Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Fri, 25 Apr 2025 19:14:18 +0200 Subject: [PATCH 1268/2924] netlink: specs: ethtool: Remove UAPI duplication of phy-upstream enum The phy-upstream enum is already defined in the ethtool.h UAPI header and used by the ethtool userspace tool. However, the ethtool spec does not reference it, causing YNL to auto-generate a duplicate and redundant enum. Fix this by updating the spec to reference the existing UAPI enum in ethtool.h. Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20250425171419.947352-1-kory.maincent@bootlin.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ethtool.yaml | 4 +++- include/uapi/linux/ethtool_netlink_generated.h | 5 ----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 655d8d10fe248d..c650cd3dcb80bc 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -89,8 +89,10 @@ definitions: doc: Group of short_detected states - name: phy-upstream-type - enum-name: + enum-name: phy-upstream + header: linux/ethtool.h type: enum + name-prefix: phy-upstream entries: [ mac, phy ] - name: tcp-data-split diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index fe24c3459ac0f2..30c8dad6214e9a 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -31,11 +31,6 @@ enum ethtool_header_flags { ETHTOOL_FLAG_STATS = 4, }; -enum { - ETHTOOL_PHY_UPSTREAM_TYPE_MAC, - ETHTOOL_PHY_UPSTREAM_TYPE_PHY, -}; - enum ethtool_tcp_data_split { ETHTOOL_TCP_DATA_SPLIT_UNKNOWN, ETHTOOL_TCP_DATA_SPLIT_DISABLED, From dfd76010f8e821b66116dec3c7d90dd2403d1396 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 25 Apr 2025 13:38:57 -0700 Subject: [PATCH 1269/2924] pds_core: remove write-after-free of client_id A use-after-free error popped up in stress testing: [Mon Apr 21 21:21:33 2025] BUG: KFENCE: use-after-free write in pdsc_auxbus_dev_del+0xef/0x160 [pds_core] [Mon Apr 21 21:21:33 2025] Use-after-free write at 0x000000007013ecd1 (in kfence-#47): [Mon Apr 21 21:21:33 2025] pdsc_auxbus_dev_del+0xef/0x160 [pds_core] [Mon Apr 21 21:21:33 2025] pdsc_remove+0xc0/0x1b0 [pds_core] [Mon Apr 21 21:21:33 2025] pci_device_remove+0x24/0x70 [Mon Apr 21 21:21:33 2025] device_release_driver_internal+0x11f/0x180 [Mon Apr 21 21:21:33 2025] driver_detach+0x45/0x80 [Mon Apr 21 21:21:33 2025] bus_remove_driver+0x83/0xe0 [Mon Apr 21 21:21:33 2025] pci_unregister_driver+0x1a/0x80 The actual device uninit usually happens on a separate thread scheduled after this code runs, but there is no guarantee of order of thread execution, so this could be a problem. There's no actual need to clear the client_id at this point, so simply remove the offending code. Fixes: 10659034c622 ("pds_core: add the aux client API") Signed-off-by: Shannon Nelson Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250425203857.71547-1-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/auxbus.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/amd/pds_core/auxbus.c b/drivers/net/ethernet/amd/pds_core/auxbus.c index c9aac27883a3cb..92f359f2b44920 100644 --- a/drivers/net/ethernet/amd/pds_core/auxbus.c +++ b/drivers/net/ethernet/amd/pds_core/auxbus.c @@ -186,7 +186,6 @@ void pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf, pds_client_unregister(pf, padev->client_id); auxiliary_device_delete(&padev->aux_dev); auxiliary_device_uninit(&padev->aux_dev); - padev->client_id = 0; *pd_ptr = NULL; mutex_unlock(&pf->config_lock); From f99a3fbf023e20b626be4b0f042463d598050c9a Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Fri, 25 Apr 2025 19:07:05 -0300 Subject: [PATCH 1270/2924] net_sched: drr: Fix double list add in class with netem as child qdisc As described in Gerrard's report [1], there are use cases where a netem child qdisc will make the parent qdisc's enqueue callback reentrant. In the case of drr, there won't be a UAF, but the code will add the same classifier to the list twice, which will cause memory corruption. In addition to checking for qlen being zero, this patch checks whether the class was already added to the active_list (cl_is_active) before adding to the list to cover for the reentrant case. [1] https://lore.kernel.org/netdev/CAHcdcOm+03OD2j6R0=YHKqmy=VgJ8xEOKuP6c7mSgnp-TEJJbw@mail.gmail.com/ Fixes: 37d9cf1a3ce3 ("sched: Fix detection of empty queues in child qdiscs") Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20250425220710.3964791-2-victor@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/sch_drr.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index e0a81d313aa767..9b6d79bd873712 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -35,6 +35,11 @@ struct drr_sched { struct Qdisc_class_hash clhash; }; +static bool cl_is_active(struct drr_class *cl) +{ + return !list_empty(&cl->alist); +} + static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid) { struct drr_sched *q = qdisc_priv(sch); @@ -337,7 +342,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl; int err = 0; - bool first; cl = drr_classify(skb, sch, &err); if (cl == NULL) { @@ -347,7 +351,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - first = !cl->qdisc->q.qlen; err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { @@ -357,7 +360,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - if (first) { + if (!cl_is_active(cl)) { list_add_tail(&cl->alist, &q->active); cl->deficit = cl->quantum; } From 141d34391abbb315d68556b7c67ad97885407547 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Fri, 25 Apr 2025 19:07:06 -0300 Subject: [PATCH 1271/2924] net_sched: hfsc: Fix a UAF vulnerability in class with netem as child qdisc As described in Gerrard's report [1], we have a UAF case when an hfsc class has a netem child qdisc. The crux of the issue is that hfsc is assuming that checking for cl->qdisc->q.qlen == 0 guarantees that it hasn't inserted the class in the vttree or eltree (which is not true for the netem duplicate case). This patch checks the n_active class variable to make sure that the code won't insert the class in the vttree or eltree twice, catering for the reentrant case. [1] https://lore.kernel.org/netdev/CAHcdcOm+03OD2j6R0=YHKqmy=VgJ8xEOKuP6c7mSgnp-TEJJbw@mail.gmail.com/ Fixes: 37d9cf1a3ce3 ("sched: Fix detection of empty queues in child qdiscs") Reported-by: Gerrard Tai Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20250425220710.3964791-3-victor@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/sch_hfsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 6c8ef826cec0b6..cb8c525ea20eab 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1569,7 +1569,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) return err; } - if (first) { + if (first && !cl->cl_nactive) { if (cl->cl_flags & HFSC_RSC) init_ed(cl, len); if (cl->cl_flags & HFSC_FSC) From 1a6d0c00fa07972384b0c308c72db091d49988b6 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Fri, 25 Apr 2025 19:07:07 -0300 Subject: [PATCH 1272/2924] net_sched: ets: Fix double list add in class with netem as child qdisc As described in Gerrard's report [1], there are use cases where a netem child qdisc will make the parent qdisc's enqueue callback reentrant. In the case of ets, there won't be a UAF, but the code will add the same classifier to the list twice, which will cause memory corruption. In addition to checking for qlen being zero, this patch checks whether the class was already added to the active_list (cl_is_active) before doing the addition to cater for the reentrant case. [1] https://lore.kernel.org/netdev/CAHcdcOm+03OD2j6R0=YHKqmy=VgJ8xEOKuP6c7mSgnp-TEJJbw@mail.gmail.com/ Fixes: 37d9cf1a3ce3 ("sched: Fix detection of empty queues in child qdiscs") Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20250425220710.3964791-4-victor@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/sch_ets.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index c3bdeb14185bea..2c069f0181c62b 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -74,6 +74,11 @@ static const struct nla_policy ets_class_policy[TCA_ETS_MAX + 1] = { [TCA_ETS_QUANTA_BAND] = { .type = NLA_U32 }, }; +static bool cl_is_active(struct ets_class *cl) +{ + return !list_empty(&cl->alist); +} + static int ets_quantum_parse(struct Qdisc *sch, const struct nlattr *attr, unsigned int *quantum, struct netlink_ext_ack *extack) @@ -416,7 +421,6 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct ets_sched *q = qdisc_priv(sch); struct ets_class *cl; int err = 0; - bool first; cl = ets_classify(skb, sch, &err); if (!cl) { @@ -426,7 +430,6 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - first = !cl->qdisc->q.qlen; err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { @@ -436,7 +439,7 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - if (first && !ets_class_is_strict(q, cl)) { + if (!cl_is_active(cl) && !ets_class_is_strict(q, cl)) { list_add_tail(&cl->alist, &q->active); cl->deficit = cl->quantum; } From f139f37dcdf34b67f5bf92bc8e0f7f6b3ac63aa4 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Fri, 25 Apr 2025 19:07:08 -0300 Subject: [PATCH 1273/2924] net_sched: qfq: Fix double list add in class with netem as child qdisc As described in Gerrard's report [1], there are use cases where a netem child qdisc will make the parent qdisc's enqueue callback reentrant. In the case of qfq, there won't be a UAF, but the code will add the same classifier to the list twice, which will cause memory corruption. This patch checks whether the class was already added to the agg->active list (cl_is_active) before doing the addition to cater for the reentrant case. [1] https://lore.kernel.org/netdev/CAHcdcOm+03OD2j6R0=YHKqmy=VgJ8xEOKuP6c7mSgnp-TEJJbw@mail.gmail.com/ Fixes: 37d9cf1a3ce3 ("sched: Fix detection of empty queues in child qdiscs") Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20250425220710.3964791-5-victor@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/sch_qfq.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 687a932eb9b2f6..bf1282cb22ebae 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -202,6 +202,11 @@ struct qfq_sched { */ enum update_reason {enqueue, requeue}; +static bool cl_is_active(struct qfq_class *cl) +{ + return !list_empty(&cl->alist); +} + static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid) { struct qfq_sched *q = qdisc_priv(sch); @@ -1215,7 +1220,6 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct qfq_class *cl; struct qfq_aggregate *agg; int err = 0; - bool first; cl = qfq_classify(skb, sch, &err); if (cl == NULL) { @@ -1237,7 +1241,6 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, } gso_segs = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; - first = !cl->qdisc->q.qlen; err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { pr_debug("qfq_enqueue: enqueue failed %d\n", err); @@ -1253,8 +1256,8 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, ++sch->q.qlen; agg = cl->agg; - /* if the queue was not empty, then done here */ - if (!first) { + /* if the class is active, then done here */ + if (cl_is_active(cl)) { if (unlikely(skb == cl->qdisc->ops->peek(cl->qdisc)) && list_first_entry(&agg->active, struct qfq_class, alist) == cl && cl->deficit < len) From a6e1c5aa16dd5d351603c9d3ae259a069eabdcc2 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Fri, 25 Apr 2025 19:07:09 -0300 Subject: [PATCH 1274/2924] selftests: tc-testing: Add TDC tests that exercise reentrant enqueue behaviour Add 5 TDC tests that exercise the reentrant enqueue behaviour in drr, ets, qfq, and hfsc: - Test DRR's enqueue reentrant behaviour with netem (which caused a double list add) - Test ETS's enqueue reentrant behaviour with netem (which caused a double list add) - Test QFQ's enqueue reentrant behaviour with netem (which caused a double list add) - Test HFSC's enqueue reentrant behaviour with netem (which caused a UAF) - Test nested DRR's enqueue reentrant behaviour with netem (which caused a double list add) Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Link: https://patch.msgid.link/20250425220710.3964791-6-victor@mojatatu.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/qdiscs.json | 186 ++++++++++++++++++ 1 file changed, 186 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index e26bbc1697832f..0843f6d37e9c77 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -352,5 +352,191 @@ "$TC qdisc del dev $DUMMY handle 1:0 root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "90ec", + "name": "Test DRR's enqueue reentrant behaviour with netem", + "category": [ + "qdisc", + "drr" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root drr", + "$TC class replace dev $DUMMY parent 1:0 classid 1:1 drr", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1" + ], + "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true", + "expExitCode": "0", + "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0", + "matchJSON": [ + { + "kind": "drr", + "handle": "1:", + "bytes": 196, + "packets": 2 + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "1f1f", + "name": "Test ETS's enqueue reentrant behaviour with netem", + "category": [ + "qdisc", + "ets" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root ets bands 2", + "$TC class replace dev $DUMMY parent 1:0 classid 1:1 ets quantum 1500", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1" + ], + "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true", + "expExitCode": "0", + "verifyCmd": "$TC -j -s class show dev $DUMMY", + "matchJSON": [ + { + "class": "ets", + "handle": "1:1", + "stats": { + "bytes": 196, + "packets": 2 + } + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "5e6d", + "name": "Test QFQ's enqueue reentrant behaviour with netem", + "category": [ + "qdisc", + "qfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root qfq", + "$TC class replace dev $DUMMY parent 1:0 classid 1:1 qfq weight 100 maxpkt 1500", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1" + ], + "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true", + "expExitCode": "0", + "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0", + "matchJSON": [ + { + "kind": "qfq", + "handle": "1:", + "bytes": 196, + "packets": 2 + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "bf1d", + "name": "Test HFSC's enqueue reentrant behaviour with netem", + "category": [ + "qdisc", + "hfsc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root hfsc", + "$TC class add dev $DUMMY parent 1:0 classid 1:1 hfsc ls m2 10Mbit", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip dst 10.10.10.1/32 flowid 1:1", + "$TC class add dev $DUMMY parent 1:0 classid 1:2 hfsc ls m2 10Mbit", + "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 netem duplicate 100%", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 2 u32 match ip dst 10.10.10.2/32 flowid 1:2", + "ping -c 1 10.10.10.1 -I$DUMMY > /dev/null || true", + "$TC filter del dev $DUMMY parent 1:0 protocol ip prio 1", + "$TC class del dev $DUMMY classid 1:1" + ], + "cmdUnderTest": "ping -c 1 10.10.10.2 -I$DUMMY > /dev/null || true", + "expExitCode": "0", + "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0", + "matchJSON": [ + { + "kind": "hfsc", + "handle": "1:", + "bytes": 392, + "packets": 4 + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "7c3b", + "name": "Test nested DRR's enqueue reentrant behaviour with netem", + "category": [ + "qdisc", + "drr" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root drr", + "$TC class add dev $DUMMY parent 1:0 classid 1:1 drr", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1", + "$TC qdisc add dev $DUMMY handle 2:0 parent 1:1 drr", + "$TC class add dev $DUMMY classid 2:1 parent 2:0 drr", + "$TC filter add dev $DUMMY parent 2:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 2:1", + "$TC qdisc add dev $DUMMY parent 2:1 handle 3:0 netem duplicate 100%" + ], + "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true", + "expExitCode": "0", + "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0", + "matchJSON": [ + { + "kind": "drr", + "handle": "1:", + "bytes": 196, + "packets": 2 + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] } ] From 3ffcd7b657c9d96eb3ffe174449b4248dd7fc6a9 Mon Sep 17 00:00:00 2001 From: Paul Greenwalt Date: Fri, 25 Apr 2025 15:26:31 -0700 Subject: [PATCH 1275/2924] ice: fix Get Tx Topology AQ command error on E830 The Get Tx Topology AQ command (opcode 0x0418) has different read flag requirements depending on the hardware/firmware. For E810, E822, and E823 firmware the read flag must be set, and for newer hardware (E825 and E830) it must not be set. This results in failure to configure Tx topology and the following warning message during probe: DDP package does not support Tx scheduling layers switching feature - please update to the latest DDP package and try again The current implementation only handles E825-C but not E830. It is confusing as we first check ice_is_e825c() and then set the flag in the set case. Finally, we check ice_is_e825c() again and set the flag for all other hardware in both the set and get case. Instead, notice that we always need the read flag for set, but only need the read flag for get on E810, E822, and E823 firmware. Fix the logic to check the MAC type and set the read flag in get only on the older devices which require it. Fixes: ba1124f58afd ("ice: Add E830 device IDs, MAC type and registers") Signed-off-by: Paul Greenwalt Signed-off-by: Jacob Keller Reviewed-by: Michal Swiatkowski Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20250425222636.3188441-2-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_ddp.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.c b/drivers/net/ethernet/intel/ice/ice_ddp.c index 69d5b1a28491db..59323c019544fc 100644 --- a/drivers/net/ethernet/intel/ice/ice_ddp.c +++ b/drivers/net/ethernet/intel/ice/ice_ddp.c @@ -2345,15 +2345,15 @@ ice_get_set_tx_topo(struct ice_hw *hw, u8 *buf, u16 buf_size, cmd->set_flags |= ICE_AQC_TX_TOPO_FLAGS_SRC_RAM | ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW; - if (hw->mac_type == ICE_MAC_GENERIC_3K_E825) - desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); } else { ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_tx_topo); cmd->get_flags = ICE_AQC_TX_TOPO_GET_RAM; - } - if (hw->mac_type != ICE_MAC_GENERIC_3K_E825) - desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + if (hw->mac_type == ICE_MAC_E810 || + hw->mac_type == ICE_MAC_GENERIC) + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + } status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); if (status) From 425c5f266b2edeee0ce16fedd8466410cdcfcfe3 Mon Sep 17 00:00:00 2001 From: Xuanqiang Luo Date: Fri, 25 Apr 2025 15:26:32 -0700 Subject: [PATCH 1276/2924] ice: Check VF VSI Pointer Value in ice_vc_add_fdir_fltr() As mentioned in the commit baeb705fd6a7 ("ice: always check VF VSI pointer values"), we need to perform a null pointer check on the return value of ice_get_vf_vsi() before using it. Fixes: 6ebbe97a4881 ("ice: Add a per-VF limit on number of FDIR filters") Signed-off-by: Xuanqiang Luo Reviewed-by: Przemek Kitszel Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20250425222636.3188441-3-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c index 7752920d7a8ee4..1cca9b2262e866 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c @@ -2097,6 +2097,11 @@ int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg) pf = vf->pf; dev = ice_pf_to_dev(pf); vf_vsi = ice_get_vf_vsi(vf); + if (!vf_vsi) { + dev_err(dev, "Can not get FDIR vf_vsi for VF %u\n", vf->vf_id); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err_exit; + } #define ICE_VF_MAX_FDIR_FILTERS 128 if (!ice_fdir_num_avail_fltr(&pf->hw, vf_vsi) || From 713dd6c2deca88cba0596b1e2576f7b7a8e5c59e Mon Sep 17 00:00:00 2001 From: Madhu Chittim Date: Fri, 25 Apr 2025 15:26:33 -0700 Subject: [PATCH 1277/2924] idpf: fix offloads support for encapsulated packets Split offloads into csum, tso and other offloads so that tunneled packets do not by default have all the offloads enabled. Stateless offloads for encapsulated packets are not yet supported in firmware/software but in the driver we were setting the features same as non encapsulated features. Fixed naming to clarify CSUM bits are being checked for Tx. Inherit netdev features to VLAN interfaces as well. Fixes: 0fe45467a104 ("idpf: add create vport and netdev configuration") Reviewed-by: Sridhar Samudrala Signed-off-by: Madhu Chittim Tested-by: Zachary Goldstein Tested-by: Samuel Salin Signed-off-by: Tony Nguyen Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250425222636.3188441-4-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/idpf/idpf.h | 18 +++---- drivers/net/ethernet/intel/idpf/idpf_lib.c | 57 ++++++++-------------- 2 files changed, 27 insertions(+), 48 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index 66544faab710aa..aef0e9775a3305 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -629,13 +629,13 @@ bool idpf_is_capability_ena(struct idpf_adapter *adapter, bool all, VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V4 |\ VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V6) -#define IDPF_CAP_RX_CSUM_L4V4 (\ - VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_TCP |\ - VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_UDP) +#define IDPF_CAP_TX_CSUM_L4V4 (\ + VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_TCP |\ + VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_UDP) -#define IDPF_CAP_RX_CSUM_L4V6 (\ - VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_TCP |\ - VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_UDP) +#define IDPF_CAP_TX_CSUM_L4V6 (\ + VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_TCP |\ + VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_UDP) #define IDPF_CAP_RX_CSUM (\ VIRTCHNL2_CAP_RX_CSUM_L3_IPV4 |\ @@ -644,11 +644,9 @@ bool idpf_is_capability_ena(struct idpf_adapter *adapter, bool all, VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_TCP |\ VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_UDP) -#define IDPF_CAP_SCTP_CSUM (\ +#define IDPF_CAP_TX_SCTP_CSUM (\ VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_SCTP |\ - VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_SCTP |\ - VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_SCTP |\ - VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_SCTP) + VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_SCTP) #define IDPF_CAP_TUNNEL_TX_CSUM (\ VIRTCHNL2_CAP_TX_CSUM_L3_SINGLE_TUNNEL |\ diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index aa755dedb41d98..730a9c7a59f2b0 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -703,8 +703,10 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) { struct idpf_adapter *adapter = vport->adapter; struct idpf_vport_config *vport_config; + netdev_features_t other_offloads = 0; + netdev_features_t csum_offloads = 0; + netdev_features_t tso_offloads = 0; netdev_features_t dflt_features; - netdev_features_t offloads = 0; struct idpf_netdev_priv *np; struct net_device *netdev; u16 idx = vport->idx; @@ -766,53 +768,32 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) if (idpf_is_cap_ena_all(adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS)) dflt_features |= NETIF_F_RXHASH; - if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_RX_CSUM_L4V4)) - dflt_features |= NETIF_F_IP_CSUM; - if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_RX_CSUM_L4V6)) - dflt_features |= NETIF_F_IPV6_CSUM; + if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_TX_CSUM_L4V4)) + csum_offloads |= NETIF_F_IP_CSUM; + if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_TX_CSUM_L4V6)) + csum_offloads |= NETIF_F_IPV6_CSUM; if (idpf_is_cap_ena(adapter, IDPF_CSUM_CAPS, IDPF_CAP_RX_CSUM)) - dflt_features |= NETIF_F_RXCSUM; - if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_SCTP_CSUM)) - dflt_features |= NETIF_F_SCTP_CRC; + csum_offloads |= NETIF_F_RXCSUM; + if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_TX_SCTP_CSUM)) + csum_offloads |= NETIF_F_SCTP_CRC; if (idpf_is_cap_ena(adapter, IDPF_SEG_CAPS, VIRTCHNL2_CAP_SEG_IPV4_TCP)) - dflt_features |= NETIF_F_TSO; + tso_offloads |= NETIF_F_TSO; if (idpf_is_cap_ena(adapter, IDPF_SEG_CAPS, VIRTCHNL2_CAP_SEG_IPV6_TCP)) - dflt_features |= NETIF_F_TSO6; + tso_offloads |= NETIF_F_TSO6; if (idpf_is_cap_ena_all(adapter, IDPF_SEG_CAPS, VIRTCHNL2_CAP_SEG_IPV4_UDP | VIRTCHNL2_CAP_SEG_IPV6_UDP)) - dflt_features |= NETIF_F_GSO_UDP_L4; + tso_offloads |= NETIF_F_GSO_UDP_L4; if (idpf_is_cap_ena_all(adapter, IDPF_RSC_CAPS, IDPF_CAP_RSC)) - offloads |= NETIF_F_GRO_HW; - /* advertise to stack only if offloads for encapsulated packets is - * supported - */ - if (idpf_is_cap_ena(vport->adapter, IDPF_SEG_CAPS, - VIRTCHNL2_CAP_SEG_TX_SINGLE_TUNNEL)) { - offloads |= NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_GRE | - NETIF_F_GSO_GRE_CSUM | - NETIF_F_GSO_PARTIAL | - NETIF_F_GSO_UDP_TUNNEL_CSUM | - NETIF_F_GSO_IPXIP4 | - NETIF_F_GSO_IPXIP6 | - 0; - - if (!idpf_is_cap_ena_all(vport->adapter, IDPF_CSUM_CAPS, - IDPF_CAP_TUNNEL_TX_CSUM)) - netdev->gso_partial_features |= - NETIF_F_GSO_UDP_TUNNEL_CSUM; - - netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM; - offloads |= NETIF_F_TSO_MANGLEID; - } + other_offloads |= NETIF_F_GRO_HW; if (idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_LOOPBACK)) - offloads |= NETIF_F_LOOPBACK; + other_offloads |= NETIF_F_LOOPBACK; - netdev->features |= dflt_features; - netdev->hw_features |= dflt_features | offloads; - netdev->hw_enc_features |= dflt_features | offloads; + netdev->features |= dflt_features | csum_offloads | tso_offloads; + netdev->hw_features |= netdev->features | other_offloads; + netdev->vlan_features |= netdev->features | other_offloads; + netdev->hw_enc_features |= dflt_features | other_offloads; idpf_set_ethtool_ops(netdev); netif_set_affinity_auto(netdev); SET_NETDEV_DEV(netdev, &adapter->pdev->dev); From 9c51f24c1ac7cbde9cc94a54137775dc52aae491 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 22 Apr 2025 18:03:47 +0100 Subject: [PATCH 1278/2924] scsi: myrb: Fix spelling mistake "statux" -> "status" There is a spelling mistake in a dev_err() message. Fix it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20250422170347.66792-1-colin.i.king@gmail.com Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/myrb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/myrb.c b/drivers/scsi/myrb.c index dc4bd422b60191..486db5b2f05d2d 100644 --- a/drivers/scsi/myrb.c +++ b/drivers/scsi/myrb.c @@ -891,7 +891,7 @@ static bool myrb_enable_mmio(struct myrb_hba *cb, mbox_mmio_init_t mmio_init_fn) status = mmio_init_fn(pdev, base, &mbox); if (status != MYRB_STATUS_SUCCESS) { dev_err(&pdev->dev, - "Failed to enable mailbox, statux %02X\n", + "Failed to enable mailbox, status %02X\n", status); return false; } From 0e9693b97a0eee1df7bae33aec207c975fbcbdb8 Mon Sep 17 00:00:00 2001 From: Keoseong Park Date: Fri, 25 Apr 2025 10:06:05 +0900 Subject: [PATCH 1279/2924] scsi: ufs: core: Remove redundant query_complete trace The query_complete trace was not removed after ufshcd_issue_dev_cmd() was called from the bsg path, resulting in duplicate output. Below is an example of the trace: ufs-utils-773 [000] ..... 218.176933: ufshcd_upiu: query_send: 0000:00:04.0: HDR:16 00 00 1f 00 01 00 00 00 00 00 00, OSF:03 07 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ufs-utils-773 [000] ..... 218.177145: ufshcd_upiu: query_complete: 0000:00:04.0: HDR:36 00 00 1f 00 01 00 00 00 00 00 00, OSF:03 07 00 00 00 00 00 00 00 00 00 08 00 00 00 00 ufs-utils-773 [000] ..... 218.177146: ufshcd_upiu: query_complete: 0000:00:04.0: HDR:36 00 00 1f 00 01 00 00 00 00 00 00, OSF:03 07 00 00 00 00 00 00 00 00 00 08 00 00 00 00 Remove the redundant trace call in the bsg path, preventing duplication. Signed-off-by: Keoseong Park Link: https://lore.kernel.org/r/20250425010605epcms2p67e89b351398832fe0fd547404d3afc65@epcms2p6 Fixes: 71aabb747d5f ("scsi: ufs: core: Reuse exec_dev_cmd") Reviewed-by: Avri Altman Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 5cb6132b8147a3..7735421e399182 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -7265,8 +7265,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba, err = -EINVAL; } } - ufshcd_add_query_upiu_trace(hba, err ? UFS_QUERY_ERR : UFS_QUERY_COMP, - (struct utp_upiu_req *)lrbp->ucd_rsp_ptr); return err; } From 652dd6558b8b5a1a04fef129e0231ee493e24951 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 28 Apr 2025 20:12:01 -0400 Subject: [PATCH 1280/2924] bcachefs: btree_root_unreadable_and_scan_found_nothing autofix for non data btrees If loosing a btree won't cause data loss - i.e. it's an alloc btree, or we can easily reconstruct it - we shouldn't require user action to continue repair. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_gc.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 7b98ba2dec6472..37b69d89341f4f 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -47,6 +47,27 @@ #define DROP_PREV_NODE 11 #define DID_FILL_FROM_SCAN 12 +/* + * Returns true if it's a btree we can easily reconstruct, or otherwise won't + * cause data loss if it's missing: + */ +static bool btree_id_important(enum btree_id btree) +{ + if (btree_id_is_alloc(btree)) + return false; + + switch (btree) { + case BTREE_ID_quotas: + case BTREE_ID_snapshot_trees: + case BTREE_ID_logged_ops: + case BTREE_ID_rebalance_work: + case BTREE_ID_subvolume_children: + return false; + default: + return true; + } +} + static const char * const bch2_gc_phase_strs[] = { #define x(n) #n, GC_PHASES() @@ -534,8 +555,10 @@ int bch2_check_topology(struct bch_fs *c) r->error = 0; if (!bch2_btree_has_scanned_nodes(c, i)) { - mustfix_fsck_err(trans, btree_root_unreadable_and_scan_found_nothing, - "no nodes found for btree %s, continue?", buf.buf); + __fsck_err(trans, + FSCK_CAN_FIX|(!btree_id_important(i) ? FSCK_AUTOFIX : 0), + btree_root_unreadable_and_scan_found_nothing, + "no nodes found for btree %s, continue?", buf.buf); bch2_btree_root_alloc_fake_trans(trans, i, 0); } else { bch2_btree_root_alloc_fake_trans(trans, i, 1); From e5a3b8cf3330a774e5f5f06a2b7cf20116447297 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 28 Apr 2025 20:25:15 -0400 Subject: [PATCH 1281/2924] bcachefs: More informative error message when shutting down due to error Signed-off-by: Kent Overstreet --- fs/bcachefs/error.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 925b0b54ea2f93..6b8695b1349c96 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -478,7 +478,9 @@ int __bch2_fsck_err(struct bch_fs *c, } else if (!test_bit(BCH_FS_fsck_running, &c->flags)) { if (c->opts.errors != BCH_ON_ERROR_continue || !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { - prt_str(out, ", shutting down"); + prt_str_indented(out, ", shutting down\n" + "error not marked as autofix and not in fsck\n" + "run fsck, and forward to devs so error can be marked for self-healing"); inconsistent = true; print = true; ret = -BCH_ERR_fsck_errors_not_fixed; From 9a4a858c9b365d817231f6f3592dc26e9d4191bb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 28 Apr 2025 20:28:58 -0400 Subject: [PATCH 1282/2924] bcachefs: Use bch2_kvmalloc() for journal keys array We can hit this limit fairly easy when we have to reconstuct large amounts of alloc info on large filesystems. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_journal_iter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c index 7d6c971db23c33..ade3b5addd7592 100644 --- a/fs/bcachefs/btree_journal_iter.c +++ b/fs/bcachefs/btree_journal_iter.c @@ -288,7 +288,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, .size = max_t(size_t, keys->size, 8) * 2, }; - new_keys.data = kvmalloc_array(new_keys.size, sizeof(new_keys.data[0]), GFP_KERNEL); + new_keys.data = bch2_kvmalloc(new_keys.size * sizeof(new_keys.data[0]), GFP_KERNEL); if (!new_keys.data) { bch_err(c, "%s: error allocating new key array (size %zu)", __func__, new_keys.size); From dbe4674802ec8e43835900490b3492299464ad27 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 28 Apr 2025 20:38:04 -0400 Subject: [PATCH 1283/2924] bcachefs: Topology error after insert is now an ERO A user hit this, and this will naturally be easier to debug if we don't panic. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update_interior.c | 49 +++++++++++++++++++---------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 44b5fe430370f7..00307356d7c814 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1389,7 +1389,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, printbuf_exit(&buf); } -static void +static int bch2_btree_insert_keys_interior(struct btree_update *as, struct btree_trans *trans, struct btree_path *path, @@ -1411,7 +1411,8 @@ bch2_btree_insert_keys_interior(struct btree_update *as, insert = bkey_next(insert)) bch2_insert_fixup_btree_ptr(as, trans, path, b, &node_iter, insert); - if (bch2_btree_node_check_topology(trans, b)) { + int ret = bch2_btree_node_check_topology(trans, b); + if (ret) { struct printbuf buf = PRINTBUF; for (struct bkey_i *k = keys->keys; @@ -1421,11 +1422,15 @@ bch2_btree_insert_keys_interior(struct btree_update *as, prt_newline(&buf); } - panic("%s(): check_topology error: inserted keys\n%s", __func__, buf.buf); + bch2_fs_fatal_error(as->c, "%ps -> %s(): check_topology error %s: inserted keys\n%s", + (void *) _RET_IP_, __func__, bch2_err_str(ret), buf.buf); + dump_stack(); + return ret; } memmove_u64s_down(keys->keys, insert, keys->top_p - insert->_data); keys->top_p -= insert->_data - keys->keys_p; + return 0; } static bool key_deleted_in_insert(struct keylist *insert_keys, struct bpos pos) @@ -1559,11 +1564,11 @@ static void __btree_split_node(struct btree_update *as, * nodes that were coalesced, and thus in the middle of a child node post * coalescing: */ -static void btree_split_insert_keys(struct btree_update *as, - struct btree_trans *trans, - btree_path_idx_t path_idx, - struct btree *b, - struct keylist *keys) +static int btree_split_insert_keys(struct btree_update *as, + struct btree_trans *trans, + btree_path_idx_t path_idx, + struct btree *b, + struct keylist *keys) { struct btree_path *path = trans->paths + path_idx; @@ -1573,8 +1578,12 @@ static void btree_split_insert_keys(struct btree_update *as, bch2_btree_node_iter_init(&node_iter, b, &bch2_keylist_front(keys)->k.p); - bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys); + int ret = bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys); + if (ret) + return ret; } + + return 0; } static int btree_split(struct btree_update *as, struct btree_trans *trans, @@ -1607,8 +1616,10 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, __btree_split_node(as, trans, b, n, keys); if (keys) { - btree_split_insert_keys(as, trans, path, n1, keys); - btree_split_insert_keys(as, trans, path, n2, keys); + ret = btree_split_insert_keys(as, trans, path, n1, keys) ?: + btree_split_insert_keys(as, trans, path, n2, keys); + if (ret) + goto err; BUG_ON(!bch2_keylist_empty(keys)); } @@ -1654,7 +1665,9 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, n3->sib_u64s[0] = U16_MAX; n3->sib_u64s[1] = U16_MAX; - btree_split_insert_keys(as, trans, path, n3, &as->parent_keys); + ret = btree_split_insert_keys(as, trans, path, n3, &as->parent_keys); + if (ret) + goto err; } } else { trace_and_count(c, btree_node_compact, trans, b); @@ -1662,7 +1675,9 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, n1 = bch2_btree_node_alloc_replacement(as, trans, b); if (keys) { - btree_split_insert_keys(as, trans, path, n1, keys); + ret = btree_split_insert_keys(as, trans, path, n1, keys); + if (ret) + goto err; BUG_ON(!bch2_keylist_empty(keys)); } @@ -1809,15 +1824,15 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t goto split; } - ret = bch2_btree_node_check_topology(trans, b); + + ret = bch2_btree_node_check_topology(trans, b) ?: + bch2_btree_insert_keys_interior(as, trans, path, b, + path->l[b->c.level].iter, keys); if (ret) { bch2_btree_node_unlock_write(trans, path, b); return ret; } - bch2_btree_insert_keys_interior(as, trans, path, b, - path->l[b->c.level].iter, keys); - trans_for_each_path_with_node(trans, b, linked, i) bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b); From bbfe756dc3062c1e934f06e5ba39c239aa953b92 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Tue, 29 Apr 2025 01:09:33 +0200 Subject: [PATCH 1284/2924] fs/erofs/fileio: call erofs_onlinefolio_split() after bio_add_folio() If bio_add_folio() fails (because it is full), erofs_fileio_scan_folio() needs to submit the I/O request via erofs_fileio_rq_submit() and allocate a new I/O request with an empty `struct bio`. Then it retries the bio_add_folio() call. However, at this point, erofs_onlinefolio_split() has already been called which increments `folio->private`; the retry will call erofs_onlinefolio_split() again, but there will never be a matching erofs_onlinefolio_end() call. This leaves the folio locked forever and all waiters will be stuck in folio_wait_bit_common(). This bug has been added by commit ce63cb62d794 ("erofs: support unencoded inodes for fileio"), but was practically unreachable because there was room for 256 folios in the `struct bio` - until commit 9f74ae8c9ac9 ("erofs: shorten bvecs[] for file-backed mounts") which reduced the array capacity to 16 folios. It was now trivial to trigger the bug by manually invoking readahead from userspace, e.g.: posix_fadvise(fd, 0, st.st_size, POSIX_FADV_WILLNEED); This should be fixed by invoking erofs_onlinefolio_split() only after bio_add_folio() has succeeded. This is safe: asynchronous completions invoking erofs_onlinefolio_end() will not unlock the folio because erofs_fileio_scan_folio() is still holding a reference to be released by erofs_onlinefolio_end() at the end. Fixes: ce63cb62d794 ("erofs: support unencoded inodes for fileio") Fixes: 9f74ae8c9ac9 ("erofs: shorten bvecs[] for file-backed mounts") Cc: stable@vger.kernel.org Signed-off-by: Max Kellermann Reviewed-by: Gao Xiang Tested-by: Hongbo Li Link: https://lore.kernel.org/r/20250428230933.3422273-1-max.kellermann@ionos.com Signed-off-by: Gao Xiang --- fs/erofs/fileio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c index 4fa0a0121288bd..60c7cc4c105c67 100644 --- a/fs/erofs/fileio.c +++ b/fs/erofs/fileio.c @@ -150,10 +150,10 @@ static int erofs_fileio_scan_folio(struct erofs_fileio *io, struct folio *folio) io->rq->bio.bi_iter.bi_sector = io->dev.m_pa >> 9; attached = 0; } - if (!attached++) - erofs_onlinefolio_split(folio); if (!bio_add_folio(&io->rq->bio, folio, len, cur)) goto io_retry; + if (!attached++) + erofs_onlinefolio_split(folio); io->dev.m_pa += len; } cur += len; From c1c9cad50c5c35cd4de1b54af59a28bf07451593 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Sun, 23 Mar 2025 05:49:06 -0700 Subject: [PATCH 1285/2924] drm/xe/svm: fix dereferencing error pointer in drm_gpusvm_range_alloc() xe_svm_range_alloc() returns ERR_PTR(-ENOMEM) on failure and there is a dereference of "range" after that: --> range->gpusvm = gpusvm; In xe_svm_range_alloc(), when memory allocation fails return NULL instead to handle this situation. Fixes: 99624bdff867 ("drm/gpusvm: Add support for GPU Shared Virtual Memory") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/adaef4dd-5866-48ca-bc22-4a1ddef20381@stanley.mountain/ Signed-off-by: Harshit Mogalapalli Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20250323124907.3946370-1-harshit.m.mogalapalli@oracle.com (cherry picked from commit 7a0322122cfdd9a6f10fc7701023d75c98eb3d22) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index f8c128524d9f33..0b6547c06961a0 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -79,7 +79,7 @@ xe_svm_range_alloc(struct drm_gpusvm *gpusvm) range = kzalloc(sizeof(*range), GFP_KERNEL); if (!range) - return ERR_PTR(-ENOMEM); + return NULL; INIT_LIST_HEAD(&range->garbage_collector_link); xe_vm_get(gpusvm_to_vm(gpusvm)); From 5e639707ddb8f080fbde805a1bfa6668a1b45298 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 17 Apr 2025 12:52:12 -0700 Subject: [PATCH 1286/2924] drm/xe/guc: Fix capture of steering registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The list of registers to capture on a GPU hang includes some that require steering. Unfortunately, the flag to say this was being wiped to due a missing OR on the assignment of the next flag field. Fix that. Fixes: b170d696c1e2 ("drm/xe/guc: Add XE_LP steered register lists") Cc: Zhanjun Dong Cc: Alan Previn Cc: Matt Roper Cc: Lucas De Marchi Cc: "Thomas Hellström" Cc: Rodrigo Vivi Cc: intel-xe@lists.freedesktop.org Signed-off-by: John Harrison Reviewed-by: Matt Roper Reviewed-by: Zhanjun Dong Link: https://lore.kernel.org/r/20250417195215.3002210-2-John.C.Harrison@Intel.com (cherry picked from commit 532da44b54a10d50ebad14a8a02bd0b78ec23e8b) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc_capture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index f6d523e4c5feb7..9095618648bcbc 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -359,7 +359,7 @@ static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext, ext->reg = XE_REG(extlist->reg.__reg.addr); ext->flags = FIELD_PREP(GUC_REGSET_STEERING_NEEDED, 1); - ext->flags = FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id); + ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id); ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id); ext->regname = extlist->name; } From b1852c5de2f2a37dd4462f7837c9e3e678f9e546 Mon Sep 17 00:00:00 2001 From: Clark Wang Date: Mon, 21 Apr 2025 14:23:41 +0800 Subject: [PATCH 1287/2924] i2c: imx-lpi2c: Fix clock count when probe defers Deferred probe with pm_runtime_put() may delay clock disable, causing incorrect clock usage count. Use pm_runtime_put_sync() to ensure the clock is disabled immediately. Fixes: 13d6eb20fc79 ("i2c: imx-lpi2c: add runtime pm support") Signed-off-by: Clark Wang Signed-off-by: Carlos Song Cc: # v4.16+ Link: https://lore.kernel.org/r/20250421062341.2471922-1-carlos.song@nxp.com Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-imx-lpi2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c index 0d4b3935e68732..342d47e675869d 100644 --- a/drivers/i2c/busses/i2c-imx-lpi2c.c +++ b/drivers/i2c/busses/i2c-imx-lpi2c.c @@ -1380,9 +1380,9 @@ static int lpi2c_imx_probe(struct platform_device *pdev) return 0; rpm_disable: - pm_runtime_put(&pdev->dev); - pm_runtime_disable(&pdev->dev); pm_runtime_dont_use_autosuspend(&pdev->dev); + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); return ret; } From 12b8a672d2aa053064151659f49e7310674d42d3 Mon Sep 17 00:00:00 2001 From: Maulik Shah Date: Tue, 29 Apr 2025 09:32:29 +0530 Subject: [PATCH 1288/2924] pinctrl: qcom: Fix PINGROUP definition for sm8750 On newer SoCs intr_target_bit position is at 8 instead of 5. Fix it. Also add missing intr_wakeup_present_bit and intr_wakeup_enable_bit which enables forwarding of GPIO interrupts to parent PDC interrupt controller. Fixes: afe9803e3b82 ("pinctrl: qcom: Add sm8750 pinctrl driver") Signed-off-by: Maulik Shah Reviewed-by: Abel Vesa Reviewed-by: Dmitry Baryshkov Reviewed-by: Melody Olvera Link: https://lore.kernel.org/20250429-pinctrl_sm8750-v2-1-87d45dd3bd82@oss.qualcomm.com Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-sm8750.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8750.c b/drivers/pinctrl/qcom/pinctrl-sm8750.c index 1af11cd95fb0e6..b94fb4ee0ec380 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8750.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8750.c @@ -46,7 +46,9 @@ .out_bit = 1, \ .intr_enable_bit = 0, \ .intr_status_bit = 0, \ - .intr_target_bit = 5, \ + .intr_wakeup_present_bit = 6, \ + .intr_wakeup_enable_bit = 7, \ + .intr_target_bit = 8, \ .intr_target_kpss_val = 3, \ .intr_raw_status_bit = 4, \ .intr_polarity_bit = 1, \ From 13a6d4265665201a795a2ff5a3e6e4d183fc9c33 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 28 Apr 2025 13:47:52 +0200 Subject: [PATCH 1289/2924] pmdomain: renesas: rcar: Remove obsolete nullify checks All nullify users and helpers were removed, but the R-Car SYSC drivers still checked for nullified domains. Remove the obsolete checks. Fixes: c8d87704444a8ac7 ("pmdomain: renesas: rcar-sysc: Remove rcar_sysc_nullify() helper") Signed-off-by: Geert Uytterhoeven Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/107f2bf9f13b29f0f623d2959a5347ec151fb089.1745840768.git.geert+renesas@glider.be Signed-off-by: Ulf Hansson --- drivers/pmdomain/renesas/rcar-gen4-sysc.c | 5 ----- drivers/pmdomain/renesas/rcar-sysc.c | 5 ----- 2 files changed, 10 deletions(-) diff --git a/drivers/pmdomain/renesas/rcar-gen4-sysc.c b/drivers/pmdomain/renesas/rcar-gen4-sysc.c index 66409cff2083fc..e001b5c25bed00 100644 --- a/drivers/pmdomain/renesas/rcar-gen4-sysc.c +++ b/drivers/pmdomain/renesas/rcar-gen4-sysc.c @@ -338,11 +338,6 @@ static int __init rcar_gen4_sysc_pd_init(void) struct rcar_gen4_sysc_pd *pd; size_t n; - if (!area->name) { - /* Skip NULLified area */ - continue; - } - n = strlen(area->name) + 1; pd = kzalloc(sizeof(*pd) + n, GFP_KERNEL); if (!pd) { diff --git a/drivers/pmdomain/renesas/rcar-sysc.c b/drivers/pmdomain/renesas/rcar-sysc.c index dce1a6d37e8012..047495f54e8adc 100644 --- a/drivers/pmdomain/renesas/rcar-sysc.c +++ b/drivers/pmdomain/renesas/rcar-sysc.c @@ -396,11 +396,6 @@ static int __init rcar_sysc_pd_init(void) struct rcar_sysc_pd *pd; size_t n; - if (!area->name) { - /* Skip NULLified area */ - continue; - } - n = strlen(area->name) + 1; pd = kzalloc(sizeof(*pd) + n, GFP_KERNEL); if (!pd) { From 730d837979bac203c786f2c5b0707f5426275c0d Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 29 Apr 2025 10:29:36 +0800 Subject: [PATCH 1290/2924] selftests: ublk: fix UBLK_F_NEED_GET_DATA Commit 57e13a2e8cd2 ("selftests: ublk: support user recovery") starts to support UBLK_F_NEED_GET_DATA for covering recovery feature, however the ublk utility implementation isn't done correctly. Fix it by supporting UBLK_F_NEED_GET_DATA correctly. Also add test generic_07 for covering UBLK_F_NEED_GET_DATA. Reviewed-by: Caleb Sander Mateos Fixes: 57e13a2e8cd2 ("selftests: ublk: support user recovery") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250429022941.1718671-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/Makefile | 1 + tools/testing/selftests/ublk/kublk.c | 22 +++++++++------ tools/testing/selftests/ublk/kublk.h | 1 + .../testing/selftests/ublk/test_generic_07.sh | 28 +++++++++++++++++++ .../testing/selftests/ublk/test_stress_05.sh | 8 +++--- 5 files changed, 48 insertions(+), 12 deletions(-) create mode 100755 tools/testing/selftests/ublk/test_generic_07.sh diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index ec4624a283bce2..f34ac0bac69623 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -9,6 +9,7 @@ TEST_PROGS += test_generic_03.sh TEST_PROGS += test_generic_04.sh TEST_PROGS += test_generic_05.sh TEST_PROGS += test_generic_06.sh +TEST_PROGS += test_generic_07.sh TEST_PROGS += test_null_01.sh TEST_PROGS += test_null_02.sh diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index e57a1486bb48d8..842b40736a9b81 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -536,12 +536,17 @@ int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag) if (!(io->flags & UBLKSRV_IO_FREE)) return 0; - /* we issue because we need either fetching or committing */ + /* + * we issue because we need either fetching or committing or + * getting data + */ if (!(io->flags & - (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP))) + (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_NEED_GET_DATA))) return 0; - if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP) + if (io->flags & UBLKSRV_NEED_GET_DATA) + cmd_op = UBLK_U_IO_NEED_GET_DATA; + else if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP) cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ; else if (io->flags & UBLKSRV_NEED_FETCH_RQ) cmd_op = UBLK_U_IO_FETCH_REQ; @@ -658,6 +663,9 @@ static void ublk_handle_cqe(struct io_uring *r, assert(tag < q->q_depth); if (q->tgt_ops->queue_io) q->tgt_ops->queue_io(q, tag); + } else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) { + io->flags |= UBLKSRV_NEED_GET_DATA | UBLKSRV_IO_FREE; + ublk_queue_io_cmd(q, io, tag); } else { /* * COMMIT_REQ will be completed immediately since no fetching @@ -1237,7 +1245,7 @@ static void __cmd_create_help(char *exe, bool recovery) printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n", exe, recovery ? "recover" : "add"); - printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask] [-r 0|1 ] [-g 0|1]\n"); + printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask] [-r 0|1 ] [-g]\n"); printf("\t[-e 0|1 ] [-i 0|1]\n"); printf("\t[target options] [backfile1] [backfile2] ...\n"); printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); @@ -1313,7 +1321,7 @@ int main(int argc, char *argv[]) opterr = 0; optind = 2; - while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:az", + while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:gaz", longopts, &option_idx)) != -1) { switch (opt) { case 'a': @@ -1351,9 +1359,7 @@ int main(int argc, char *argv[]) ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE; break; case 'g': - value = strtol(optarg, NULL, 10); - if (value) - ctx.flags |= UBLK_F_NEED_GET_DATA; + ctx.flags |= UBLK_F_NEED_GET_DATA; break; case 0: if (!strcmp(longopts[option_idx].name, "debug_mask")) diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 918db5cd633fc1..44ee1e4ac55b2d 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -115,6 +115,7 @@ struct ublk_io { #define UBLKSRV_NEED_FETCH_RQ (1UL << 0) #define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1) #define UBLKSRV_IO_FREE (1UL << 2) +#define UBLKSRV_NEED_GET_DATA (1UL << 3) unsigned short flags; unsigned short refs; /* used by target code only */ diff --git a/tools/testing/selftests/ublk/test_generic_07.sh b/tools/testing/selftests/ublk/test_generic_07.sh new file mode 100755 index 00000000000000..cba86451fa5e57 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_07.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_07" +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "generic" "test UBLK_F_NEED_GET_DATA" + +_create_backfile 0 256M +dev_id=$(_add_ublk_dev -t loop -q 2 -g "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? + +# run fio over the ublk disk +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M +ERR_CODE=$? +if [ "$ERR_CODE" -eq 0 ]; then + _mkfs_mount_test /dev/ublkb"${dev_id}" + ERR_CODE=$? +fi + +_cleanup_test "generic" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_05.sh b/tools/testing/selftests/ublk/test_stress_05.sh index a7071b10224d9f..88601b48f1cd37 100755 --- a/tools/testing/selftests/ublk/test_stress_05.sh +++ b/tools/testing/selftests/ublk/test_stress_05.sh @@ -47,15 +47,15 @@ _create_backfile 0 256M _create_backfile 1 256M for reissue in $(seq 0 1); do - ublk_io_and_remove 8G -t null -q 4 -g 1 -r 1 -i "$reissue" & - ublk_io_and_remove 256M -t loop -q 4 -g 1 -r 1 -i "$reissue" "${UBLK_BACKFILES[0]}" & + ublk_io_and_remove 8G -t null -q 4 -g -r 1 -i "$reissue" & + ublk_io_and_remove 256M -t loop -q 4 -g -r 1 -i "$reissue" "${UBLK_BACKFILES[0]}" & wait done if _have_feature "ZERO_COPY"; then for reissue in $(seq 0 1); do - ublk_io_and_remove 8G -t null -q 4 -g 1 -z -r 1 -i "$reissue" & - ublk_io_and_remove 256M -t loop -q 4 -g 1 -z -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" & + ublk_io_and_remove 8G -t null -q 4 -g -z -r 1 -i "$reissue" & + ublk_io_and_remove 256M -t loop -q 4 -g -z -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" & wait done fi From 69edf98be844375807f299397c516fb1e962b3cc Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 29 Apr 2025 10:29:37 +0800 Subject: [PATCH 1291/2924] ublk: decouple zero copy from user copy UBLK_F_USER_COPY and UBLK_F_SUPPORT_ZERO_COPY are two different features, and shouldn't be coupled together. Commit 1f6540e2aabb ("ublk: zc register/unregister bvec") enables user copy automatically in case of UBLK_F_SUPPORT_ZERO_COPY, this way isn't correct. So decouple zero copy from user copy, and use independent helper to check each one. Fixes: 1f6540e2aabb ("ublk: zc register/unregister bvec") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250429022941.1718671-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 40f971a66d3eb2..0a3a3c64316d7e 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -205,11 +205,6 @@ static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, static inline unsigned int ublk_req_build_flags(struct request *req); static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq, int tag); -static inline bool ublk_dev_is_user_copy(const struct ublk_device *ub) -{ - return ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY); -} - static inline bool ublk_dev_is_zoned(const struct ublk_device *ub) { return ub->dev_info.flags & UBLK_F_ZONED; @@ -609,14 +604,19 @@ static void ublk_apply_params(struct ublk_device *ub) ublk_dev_param_zoned_apply(ub); } +static inline bool ublk_support_zero_copy(const struct ublk_queue *ubq) +{ + return ubq->flags & UBLK_F_SUPPORT_ZERO_COPY; +} + static inline bool ublk_support_user_copy(const struct ublk_queue *ubq) { - return ubq->flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY); + return ubq->flags & UBLK_F_USER_COPY; } static inline bool ublk_need_map_io(const struct ublk_queue *ubq) { - return !ublk_support_user_copy(ubq); + return !ublk_support_user_copy(ubq) && !ublk_support_zero_copy(ubq); } static inline bool ublk_need_req_ref(const struct ublk_queue *ubq) @@ -624,8 +624,11 @@ static inline bool ublk_need_req_ref(const struct ublk_queue *ubq) /* * read()/write() is involved in user copy, so request reference * has to be grabbed + * + * for zero copy, request buffer need to be registered to io_uring + * buffer table, so reference is needed */ - return ublk_support_user_copy(ubq); + return ublk_support_user_copy(ubq) || ublk_support_zero_copy(ubq); } static inline void ublk_init_req_ref(const struct ublk_queue *ubq, @@ -2245,6 +2248,9 @@ static struct request *ublk_check_and_get_req(struct kiocb *iocb, if (!ubq) return ERR_PTR(-EINVAL); + if (!ublk_support_user_copy(ubq)) + return ERR_PTR(-EACCES); + if (tag >= ubq->q_depth) return ERR_PTR(-EINVAL); @@ -2783,13 +2789,18 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header) ub->dev_info.flags |= UBLK_F_CMD_IOCTL_ENCODE | UBLK_F_URING_CMD_COMP_IN_TASK; - /* GET_DATA isn't needed any more with USER_COPY */ - if (ublk_dev_is_user_copy(ub)) + /* GET_DATA isn't needed any more with USER_COPY or ZERO COPY */ + if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY)) ub->dev_info.flags &= ~UBLK_F_NEED_GET_DATA; - /* Zoned storage support requires user copy feature */ + /* + * Zoned storage support requires reuse `ublksrv_io_cmd->addr` for + * returning write_append_lba, which is only allowed in case of + * user copy or zero copy + */ if (ublk_dev_is_zoned(ub) && - (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) || !ublk_dev_is_user_copy(ub))) { + (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) || !(ub->dev_info.flags & + (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY)))) { ret = -EINVAL; goto out_free_dev_number; } From 6240f43b29f285a40eebeb789756673af7a7d67c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 29 Apr 2025 10:29:38 +0800 Subject: [PATCH 1292/2924] ublk: enhance check for register/unregister io buffer command The simple check of UBLK_IO_FLAG_OWNED_BY_SRV can avoid incorrect register/unregister io buffer easily, so check it before calling starting to register/un-register io buffer. Also only allow io buffer register/unregister uring_cmd in case of UBLK_F_SUPPORT_ZERO_COPY. Also mark argument 'ublk_queue *' of ublk_register_io_buf as const. Reviewed-by: Caleb Sander Mateos Fixes: 1f6540e2aabb ("ublk: zc register/unregister bvec") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250429022941.1718671-4-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 0a3a3c64316d7e..c624d8f653ae37 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -201,7 +201,7 @@ struct ublk_params_header { static void ublk_stop_dev_unlocked(struct ublk_device *ub); static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq); static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, - struct ublk_queue *ubq, int tag, size_t offset); + const struct ublk_queue *ubq, int tag, size_t offset); static inline unsigned int ublk_req_build_flags(struct request *req); static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq, int tag); @@ -1949,13 +1949,20 @@ static void ublk_io_release(void *priv) } static int ublk_register_io_buf(struct io_uring_cmd *cmd, - struct ublk_queue *ubq, unsigned int tag, + const struct ublk_queue *ubq, unsigned int tag, unsigned int index, unsigned int issue_flags) { struct ublk_device *ub = cmd->file->private_data; + const struct ublk_io *io = &ubq->ios[tag]; struct request *req; int ret; + if (!ublk_support_zero_copy(ubq)) + return -EINVAL; + + if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)) + return -EINVAL; + req = __ublk_check_and_get_req(ub, ubq, tag, 0); if (!req) return -EINVAL; @@ -1971,8 +1978,17 @@ static int ublk_register_io_buf(struct io_uring_cmd *cmd, } static int ublk_unregister_io_buf(struct io_uring_cmd *cmd, + const struct ublk_queue *ubq, unsigned int tag, unsigned int index, unsigned int issue_flags) { + const struct ublk_io *io = &ubq->ios[tag]; + + if (!ublk_support_zero_copy(ubq)) + return -EINVAL; + + if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)) + return -EINVAL; + return io_buffer_unregister_bvec(cmd, index, issue_flags); } @@ -2076,7 +2092,7 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, case UBLK_IO_REGISTER_IO_BUF: return ublk_register_io_buf(cmd, ubq, tag, ub_cmd->addr, issue_flags); case UBLK_IO_UNREGISTER_IO_BUF: - return ublk_unregister_io_buf(cmd, ub_cmd->addr, issue_flags); + return ublk_unregister_io_buf(cmd, ubq, tag, ub_cmd->addr, issue_flags); case UBLK_IO_FETCH_REQ: ret = ublk_fetch(cmd, ubq, io, ub_cmd->addr); if (ret) @@ -2128,7 +2144,7 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, } static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, - struct ublk_queue *ubq, int tag, size_t offset) + const struct ublk_queue *ubq, int tag, size_t offset) { struct request *req; From a584b2630b0d31f8a20e4ccb4de370b160177b8a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 29 Apr 2025 10:29:39 +0800 Subject: [PATCH 1293/2924] ublk: remove the check of ublk_need_req_ref() from __ublk_check_and_get_req __ublk_check_and_get_req() is only called from ublk_check_and_get_req() and ublk_register_io_buf(), the same check has been covered in the two calling sites. So remove the check from __ublk_check_and_get_req(). Suggested-by: Caleb Sander Mateos Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250429022941.1718671-5-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index c624d8f653ae37..f9032076bc0615 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -2148,9 +2148,6 @@ static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, { struct request *req; - if (!ublk_need_req_ref(ubq)) - return NULL; - req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag); if (!req) return NULL; From 56f1f30e6795b890463d9b20b11e576adf5a2f77 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 29 Apr 2025 14:48:41 +0200 Subject: [PATCH 1294/2924] ALSA: ump: Fix buffer overflow at UMP SysEx message conversion The conversion function from MIDI 1.0 to UMP packet contains an internal buffer to keep the incoming MIDI bytes, and its size is 4, as it was supposed to be the max size for a MIDI1 UMP packet data. However, the implementation overlooked that SysEx is handled in a different format, and it can be up to 6 bytes, as found in do_convert_to_ump(). It leads eventually to a buffer overflow, and may corrupt the memory when a longer SysEx message is received. The fix is simply to extend the buffer size to 6 to fit with the SysEx UMP message. Fixes: 0b5288f5fe63 ("ALSA: ump: Add legacy raw MIDI support") Reported-by: Argusee Link: https://patch.msgid.link/20250429124845.25128-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/ump_convert.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sound/ump_convert.h b/include/sound/ump_convert.h index d099ae27f8491a..682499b871eac4 100644 --- a/include/sound/ump_convert.h +++ b/include/sound/ump_convert.h @@ -19,7 +19,7 @@ struct ump_cvt_to_ump_bank { /* context for converting from MIDI1 byte stream to UMP packet */ struct ump_cvt_to_ump { /* MIDI1 intermediate buffer */ - unsigned char buf[4]; + unsigned char buf[6]; /* up to 6 bytes for SysEx */ int len; int cmd_bytes; From a75401227eeb827b1a162df1aa9d5b33da921c43 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 24 Apr 2025 10:18:01 -0700 Subject: [PATCH 1295/2924] nvme-pci: fix queue unquiesce check on slot_reset A zero return means the reset was successfully scheduled. We don't want to unquiesce the queues while the reset_work is pending, as that will just flush out requeued requests to a failed completion. Fixes: 71a5bb153be104 ("nvme: ensure disabling pairs with unquiesce") Reported-by: Dhankaran Singh Ajravat Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b178d52eac1b7f..c9e2a5450bc0f3 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3575,7 +3575,7 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev) dev_info(dev->ctrl.device, "restart after slot reset\n"); pci_restore_state(pdev); - if (!nvme_try_sched_reset(&dev->ctrl)) + if (nvme_try_sched_reset(&dev->ctrl)) nvme_unquiesce_io_queues(&dev->ctrl); return PCI_ERS_RESULT_RECOVERED; } From 5b960f92ac3e5b4d7f60a506a6b6735eead1da01 Mon Sep 17 00:00:00 2001 From: Wentao Guan Date: Tue, 22 Apr 2025 20:17:25 +0800 Subject: [PATCH 1296/2924] nvme-pci: add quirks for device 126f:1001 This commit adds NVME_QUIRK_NO_DEEPEST_PS and NVME_QUIRK_BOGUS_NID for device [126f:1001]. It is similar to commit e89086c43f05 ("drivers/nvme: Add quirks for device 126f:2262") Diff is according the dmesg, use NVME_QUIRK_IGNORE_DEV_SUBNQN. dmesg | grep -i nvme0: nvme nvme0: pci function 0000:01:00.0 nvme nvme0: missing or invalid SUBNQN field. nvme nvme0: 12/0/0 default/read/poll queues Link:https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e89086c43f0500bc7c4ce225495b73b8ce234c1f Signed-off-by: Wentao Guan Signed-off-by: WangYuli Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c9e2a5450bc0f3..a8d4443edc4956 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3623,6 +3623,9 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1217, 0x8760), /* O2 Micro 64GB Steam Deck */ .driver_data = NVME_QUIRK_DMAPOOL_ALIGN_512, }, + { PCI_DEVICE(0x126f, 0x1001), /* Silicon Motion generic */ + .driver_data = NVME_QUIRK_NO_DEEPEST_PS | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x126f, 0x2262), /* Silicon Motion generic */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS | NVME_QUIRK_BOGUS_NID, }, From ab35ad950d439ec3409509835d229b3d93d3c7f9 Mon Sep 17 00:00:00 2001 From: Wentao Guan Date: Thu, 24 Apr 2025 10:40:10 +0800 Subject: [PATCH 1297/2924] nvme-pci: add quirks for WDC Blue SN550 15b7:5009 Add two quirks for the WDC Blue SN550 (PCI ID 15b7:5009) based on user reports and hardware analysis: - NVME_QUIRK_NO_DEEPEST_PS: liaozw talked to me the problem and solved with nvme_core.default_ps_max_latency_us=0, so add the quirk. I also found some reports in the following link. - NVME_QUIRK_BROKEN_MSI: after get the lspci from Jack Rio. I think that the disk also have NVME_QUIRK_BROKEN_MSI. described in commit d5887dc6b6c0 ("nvme-pci: Add quirk for broken MSIs") as sean said in link which match the MSI 1/32 and MSI-X 17. Log: lspci -nn | grep -i memory 03:00.0 Non-Volatile memory controller [0108]: Sandisk Corp SanDisk Ultra 3D / WD PC SN530, IX SN530, Blue SN550 NVMe SSD (DRAM-less) [15b7:5009] (rev 01) lspci -v -d 15b7:5009 03:00.0 Non-Volatile memory controller: Sandisk Corp SanDisk Ultra 3D / WD PC SN530, IX SN530, Blue SN550 NVMe SSD (DRAM-less) (rev 01) (prog-if 02 [NVM Express]) Subsystem: Sandisk Corp WD Blue SN550 NVMe SSD Flags: bus master, fast devsel, latency 0, IRQ 35, IOMMU group 10 Memory at fe800000 (64-bit, non-prefetchable) [size=16K] Memory at fe804000 (64-bit, non-prefetchable) [size=256] Capabilities: [80] Power Management version 3 Capabilities: [90] MSI: Enable- Count=1/32 Maskable- 64bit+ Capabilities: [b0] MSI-X: Enable+ Count=17 Masked- Capabilities: [c0] Express Endpoint, MSI 00 Capabilities: [100] Advanced Error Reporting Capabilities: [150] Device Serial Number 00-00-00-00-00-00-00-00 Capabilities: [1b8] Latency Tolerance Reporting Capabilities: [300] Secondary PCI Express Capabilities: [900] L1 PM Substates Kernel driver in use: nvme dmesg | grep nvme [ 0.000000] Command line: BOOT_IMAGE=/vmlinuz-6.12.20-amd64-desktop-rolling root=UUID= ro splash quiet nvme_core.default_ps_max_latency_us=0 DEEPIN_GFXMODE= [ 0.059301] Kernel command line: BOOT_IMAGE=/vmlinuz-6.12.20-amd64-desktop-rolling root=UUID= ro splash quiet nvme_core.default_ps_max_latency_us=0 DEEPIN_GFXMODE= [ 0.542430] nvme nvme0: pci function 0000:03:00.0 [ 0.560426] nvme nvme0: allocated 32 MiB host memory buffer. [ 0.562491] nvme nvme0: 16/0/0 default/read/poll queues [ 0.567764] nvme0n1: p1 p2 p3 p4 p5 p6 p7 p8 p9 [ 6.388726] EXT4-fs (nvme0n1p7): mounted filesystem ro with ordered data mode. Quota mode: none. [ 6.893421] EXT4-fs (nvme0n1p7): re-mounted r/w. Quota mode: none. [ 7.125419] Adding 16777212k swap on /dev/nvme0n1p8. Priority:-2 extents:1 across:16777212k SS [ 7.157588] EXT4-fs (nvme0n1p6): mounted filesystem r/w with ordered data mode. Quota mode: none. [ 7.165021] EXT4-fs (nvme0n1p9): mounted filesystem r/w with ordered data mode. Quota mode: none. [ 8.036932] nvme nvme0: using unchecked data buffer [ 8.096023] block nvme0n1: No UUID available providing old NGUID Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d5887dc6b6c054d0da3cd053afc15b7be1f45ff6 Link: https://lore.kernel.org/all/20240422162822.3539156-1-sean.anderson@linux.dev/ Reported-by: liaozw Closes: https://bbs.deepin.org.cn/post/286300 Reported-by: rugk Closes: https://bugzilla.kernel.org/show_bug.cgi?id=208123 Signed-off-by: Wentao Guan Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a8d4443edc4956..2e30e9be7408cb 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3649,6 +3649,9 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x15b7, 0x5008), /* Sandisk SN530 */ .driver_data = NVME_QUIRK_BROKEN_MSI }, + { PCI_DEVICE(0x15b7, 0x5009), /* Sandisk SN550 */ + .driver_data = NVME_QUIRK_BROKEN_MSI | + NVME_QUIRK_NO_DEEPEST_PS }, { PCI_DEVICE(0x1987, 0x5012), /* Phison E12 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */ From 690d722e02819ef978f90cd7553973eba1007e6c Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 23 Apr 2025 08:18:44 -0500 Subject: [PATCH 1298/2924] drivers/platform/x86/amd: pmf: Check for invalid sideloaded Smart PC Policies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a policy is passed into amd_pmf_get_pb_data() that causes the engine to fail to start there is a memory leak. Free the memory in this failure path. Fixes: 10817f28e5337 ("platform/x86/amd/pmf: Add capability to sideload of policy binary") Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20250423132002.3984997-2-superm1@kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmf/tee-if.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c index 14b99d8b63d2fc..e008ac079fade3 100644 --- a/drivers/platform/x86/amd/pmf/tee-if.c +++ b/drivers/platform/x86/amd/pmf/tee-if.c @@ -364,9 +364,14 @@ static ssize_t amd_pmf_get_pb_data(struct file *filp, const char __user *buf, amd_pmf_hex_dump_pb(dev); ret = amd_pmf_start_policy_engine(dev); if (ret < 0) - return ret; + goto cleanup; return length; + +cleanup: + kfree(dev->policy_buf); + dev->policy_buf = NULL; + return ret; } static const struct file_operations pb_fops = { From 8e81b9cd6e95188d12c9cc25d40b61dd5ea05ace Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 23 Apr 2025 08:18:45 -0500 Subject: [PATCH 1299/2924] drivers/platform/x86/amd: pmf: Check for invalid Smart PC Policies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 376a8c2a14439 ("platform/x86/amd/pmf: Update PMF Driver for Compatibility with new PMF-TA") added support for platforms that support an updated TA, however it also exposed a number of platforms that although they have support for the updated TA don't actually populate a policy binary. Add an explicit check that the policy binary isn't empty before initializing the TA. Reported-by: Christian Heusel Closes: https://lore.kernel.org/platform-driver-x86/ae644428-5bf2-4b30-81ba-0b259ed3449b@heusel.eu/ Fixes: 376a8c2a14439 ("platform/x86/amd/pmf: Update PMF Driver for Compatibility with new PMF-TA") Signed-off-by: Mario Limonciello Tested-by: Christian Heusel Link: https://lore.kernel.org/r/20250423132002.3984997-3-superm1@kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmf/tee-if.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c index e008ac079fade3..d3bd12ad036ae0 100644 --- a/drivers/platform/x86/amd/pmf/tee-if.c +++ b/drivers/platform/x86/amd/pmf/tee-if.c @@ -334,6 +334,11 @@ static int amd_pmf_start_policy_engine(struct amd_pmf_dev *dev) return 0; } +static inline bool amd_pmf_pb_valid(struct amd_pmf_dev *dev) +{ + return memchr_inv(dev->policy_buf, 0xff, dev->policy_sz); +} + #ifdef CONFIG_AMD_PMF_DEBUG static void amd_pmf_hex_dump_pb(struct amd_pmf_dev *dev) { @@ -361,6 +366,11 @@ static ssize_t amd_pmf_get_pb_data(struct file *filp, const char __user *buf, dev->policy_buf = new_policy_buf; dev->policy_sz = length; + if (!amd_pmf_pb_valid(dev)) { + ret = -EINVAL; + goto cleanup; + } + amd_pmf_hex_dump_pb(dev); ret = amd_pmf_start_policy_engine(dev); if (ret < 0) @@ -533,6 +543,12 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) memcpy_fromio(dev->policy_buf, dev->policy_base, dev->policy_sz); + if (!amd_pmf_pb_valid(dev)) { + dev_info(dev->dev, "No Smart PC policy present\n"); + ret = -EINVAL; + goto err_free_policy; + } + amd_pmf_hex_dump_pb(dev); dev->prev_data = kzalloc(sizeof(*dev->prev_data), GFP_KERNEL); From 0581d384f344ed0a963dd27cbff3c7af80c189e7 Mon Sep 17 00:00:00 2001 From: Suma Hegde Date: Fri, 25 Apr 2025 10:23:57 +0000 Subject: [PATCH 1300/2924] platform/x86/amd/hsmp: Make amd_hsmp and hsmp_acpi as mutually exclusive drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit amd_hsmp and hsmp_acpi are intended to be mutually exclusive drivers and amd_hsmp is for legacy platforms. To achieve this, it is essential to check for the presence of the ACPI device in plat.c. If the hsmp ACPI device entry is found, allow the hsmp_acpi driver to manage the hsmp and return an error from plat.c. Additionally, rename the driver from amd_hsmp to hsmp_acpi to prevent "Driver 'amd_hsmp' is already registered, aborting..." error in case both drivers are loaded simultaneously. Also, support both platform device based and ACPI based probing for family 0x1A models 0x00 to 0x0F, implement only ACPI based probing for family 0x1A, models 0x10 to 0x1F. Return false from legacy_hsmp_support() for this platform. This aligns with the condition check in is_f1a_m0h(). Link: https://lore.kernel.org/platform-driver-x86/aALZxvHWmphNL1wa@gourry-fedora-PF4VCD3F/ Fixes: 7d3135d16356 ("platform/x86/amd/hsmp: Create separate ACPI, plat and common drivers") Reviewed-by: Naveen Krishna Chatradhi Co-developed-by: Gregory Price Signed-off-by: Gregory Price Signed-off-by: Suma Hegde Link: https://lore.kernel.org/r/20250425102357.266790-1-suma.hegde@amd.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/hsmp/acpi.c | 3 +-- drivers/platform/x86/amd/hsmp/hsmp.h | 1 + drivers/platform/x86/amd/hsmp/plat.c | 6 +++++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/amd/hsmp/acpi.c b/drivers/platform/x86/amd/hsmp/acpi.c index c1eccb3c80c5c9..eaae044e4f824c 100644 --- a/drivers/platform/x86/amd/hsmp/acpi.c +++ b/drivers/platform/x86/amd/hsmp/acpi.c @@ -27,9 +27,8 @@ #include "hsmp.h" -#define DRIVER_NAME "amd_hsmp" +#define DRIVER_NAME "hsmp_acpi" #define DRIVER_VERSION "2.3" -#define ACPI_HSMP_DEVICE_HID "AMDI0097" /* These are the strings specified in ACPI table */ #define MSG_IDOFF_STR "MsgIdOffset" diff --git a/drivers/platform/x86/amd/hsmp/hsmp.h b/drivers/platform/x86/amd/hsmp/hsmp.h index af8b21f821d668..d58d4f0c20d552 100644 --- a/drivers/platform/x86/amd/hsmp/hsmp.h +++ b/drivers/platform/x86/amd/hsmp/hsmp.h @@ -23,6 +23,7 @@ #define HSMP_CDEV_NAME "hsmp_cdev" #define HSMP_DEVNODE_NAME "hsmp" +#define ACPI_HSMP_DEVICE_HID "AMDI0097" struct hsmp_mbaddr_info { u32 base_addr; diff --git a/drivers/platform/x86/amd/hsmp/plat.c b/drivers/platform/x86/amd/hsmp/plat.c index b9782a078dbd2f..81931e808bbc81 100644 --- a/drivers/platform/x86/amd/hsmp/plat.c +++ b/drivers/platform/x86/amd/hsmp/plat.c @@ -11,6 +11,7 @@ #include +#include #include #include #include @@ -266,7 +267,7 @@ static bool legacy_hsmp_support(void) } case 0x1A: switch (boot_cpu_data.x86_model) { - case 0x00 ... 0x1F: + case 0x00 ... 0x0F: return true; default: return false; @@ -288,6 +289,9 @@ static int __init hsmp_plt_init(void) return ret; } + if (acpi_dev_present(ACPI_HSMP_DEVICE_HID, NULL, -1)) + return -ENODEV; + hsmp_pdev = get_hsmp_pdev(); if (!hsmp_pdev) return -ENOMEM; From 48ccf21fa8dc595c8aa4f1d347b593dcae0727d0 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 8 Apr 2025 16:07:58 +0200 Subject: [PATCH 1301/2924] drm/tests: shmem: Fix memleak The drm_gem_shmem_test_get_pages_sgt() gets a scatter-gather table using the drm_gem_shmem_get_sg_table() function and rightfully calls sg_free_table() on it. However, it's also supposed to kfree() the returned sg_table, but doesn't. This leads to a memory leak, reported by kmemleak. Fix it by adding a kunit action to kfree the sgt when the test ends. Reported-by: Philipp Stanner Closes: https://lore.kernel.org/dri-devel/a7655158a6367ac46194d57f4b7433ef0772a73e.camel@mailbox.org/ Fixes: 93032ae634d4 ("drm/test: add a test suite for GEM objects backed by shmem") Reviewed-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/20250408140758.1831333-1-mripard@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/tests/drm_gem_shmem_test.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/tests/drm_gem_shmem_test.c b/drivers/gpu/drm/tests/drm_gem_shmem_test.c index fd4215e2f982d2..925fbc2cda700a 100644 --- a/drivers/gpu/drm/tests/drm_gem_shmem_test.c +++ b/drivers/gpu/drm/tests/drm_gem_shmem_test.c @@ -216,6 +216,9 @@ static void drm_gem_shmem_test_get_pages_sgt(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sgt); KUNIT_EXPECT_NULL(test, shmem->sgt); + ret = kunit_add_action_or_reset(test, kfree_wrapper, sgt); + KUNIT_ASSERT_EQ(test, ret, 0); + ret = kunit_add_action_or_reset(test, sg_free_table_wrapper, sgt); KUNIT_ASSERT_EQ(test, ret, 0); From 1a8bc0fe8039e1e57f68c4a588f0403d98bfeb1f Mon Sep 17 00:00:00 2001 From: Russell Cloran Date: Mon, 14 Apr 2025 22:32:59 -0700 Subject: [PATCH 1302/2924] drm/mipi-dbi: Fix blanking for non-16 bit formats On r6x2b6x2g6x2 displays not enough blank data is sent to blank the entire screen. When support for these displays was added, the dirty function was updated to handle the different amount of data, but blanking was not, and remained hardcoded as 2 bytes per pixel. This change applies almost the same algorithm used in the dirty function to the blank function, but there is no fb available at that point, and no concern about having to transform any data, so the dbidev pixel format is always used for calculating the length. Fixes: 4aebb79021f3 ("drm/mipi-dbi: Add support for DRM_FORMAT_RGB888") Signed-off-by: Russell Cloran Link: https://lore.kernel.org/r/20250415053259.79572-1-rcloran@gmail.com Signed-off-by: Maxime Ripard --- drivers/gpu/drm/drm_mipi_dbi.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_mipi_dbi.c b/drivers/gpu/drm/drm_mipi_dbi.c index 89e05a5bed1de8..a4cd476f9b3026 100644 --- a/drivers/gpu/drm/drm_mipi_dbi.c +++ b/drivers/gpu/drm/drm_mipi_dbi.c @@ -404,12 +404,16 @@ static void mipi_dbi_blank(struct mipi_dbi_dev *dbidev) u16 height = drm->mode_config.min_height; u16 width = drm->mode_config.min_width; struct mipi_dbi *dbi = &dbidev->dbi; - size_t len = width * height * 2; + const struct drm_format_info *dst_format; + size_t len; int idx; if (!drm_dev_enter(drm, &idx)) return; + dst_format = drm_format_info(dbidev->pixel_format); + len = drm_format_info_min_pitch(dst_format, 0, width) * height; + memset(dbidev->tx_buf, 0, len); mipi_dbi_set_window_address(dbidev, 0, width - 1, 0, height - 1); From de2b2107d5a41a91ab603e135fb6e408abbee28e Mon Sep 17 00:00:00 2001 From: Christian Bruel Date: Mon, 28 Apr 2025 14:06:58 +0200 Subject: [PATCH 1303/2924] arm64: dts: st: Adjust interrupt-controller for stm32mp25 SoCs Use gic-400 compatible and remove address-cells = <1> on aarch64 Fixes: 5d30d03aaf785 ("arm64: dts: st: introduce stm32mp25 SoCs family") Signed-off-by: Christian Bruel Link: https://lore.kernel.org/r/20250415111654.2103767-2-christian.bruel@foss.st.com Signed-off-by: Alexandre Torgue Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/st/stm32mp251.dtsi | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/st/stm32mp251.dtsi b/arch/arm64/boot/dts/st/stm32mp251.dtsi index f3c6cdfd7008c5..379e290313dc0e 100644 --- a/arch/arm64/boot/dts/st/stm32mp251.dtsi +++ b/arch/arm64/boot/dts/st/stm32mp251.dtsi @@ -115,9 +115,8 @@ }; intc: interrupt-controller@4ac00000 { - compatible = "arm,cortex-a7-gic"; + compatible = "arm,gic-400"; #interrupt-cells = <3>; - #address-cells = <1>; interrupt-controller; reg = <0x0 0x4ac10000 0x0 0x1000>, <0x0 0x4ac20000 0x0 0x2000>, From 06c231fe953a26f4bc9d7a37ba1b9b288a59c7c2 Mon Sep 17 00:00:00 2001 From: Christian Bruel Date: Mon, 28 Apr 2025 14:06:59 +0200 Subject: [PATCH 1304/2924] arm64: dts: st: Use 128kB size for aliased GIC400 register access on stm32mp25 SoCs Adjust the size of 8kB GIC regions to 128kB so that each 4kB is mapped 16 times over a 64kB region. The offset is then adjusted in the irq-gic driver. see commit 12e14066f4835 ("irqchip/GIC: Add workaround for aliased GIC400") Fixes: 5d30d03aaf785 ("arm64: dts: st: introduce stm32mp25 SoCs family") Suggested-by: Marc Zyngier Signed-off-by: Christian Bruel Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20250415111654.2103767-3-christian.bruel@foss.st.com Signed-off-by: Alexandre Torgue Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/st/stm32mp251.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/st/stm32mp251.dtsi b/arch/arm64/boot/dts/st/stm32mp251.dtsi index 379e290313dc0e..87110f91e4895a 100644 --- a/arch/arm64/boot/dts/st/stm32mp251.dtsi +++ b/arch/arm64/boot/dts/st/stm32mp251.dtsi @@ -119,9 +119,9 @@ #interrupt-cells = <3>; interrupt-controller; reg = <0x0 0x4ac10000 0x0 0x1000>, - <0x0 0x4ac20000 0x0 0x2000>, - <0x0 0x4ac40000 0x0 0x2000>, - <0x0 0x4ac60000 0x0 0x2000>; + <0x0 0x4ac20000 0x0 0x20000>, + <0x0 0x4ac40000 0x0 0x20000>, + <0x0 0x4ac60000 0x0 0x20000>; }; psci { From 02dc83f09c7262533123e7e4dae9c4f9fc40ed17 Mon Sep 17 00:00:00 2001 From: Christian Bruel Date: Mon, 28 Apr 2025 14:07:00 +0200 Subject: [PATCH 1305/2924] arm64: dts: st: Adjust interrupt-controller for stm32mp21 SoCs Use gic-400 compatible for aarch64 Fixes: 7a57b1bb1afbf ("arm64: dts: st: introduce stm32mp21 SoCs family") Signed-off-by: Christian Bruel Link: https://lore.kernel.org/r/20250415111654.2103767-4-christian.bruel@foss.st.com Signed-off-by: Alexandre Torgue Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/st/stm32mp211.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/st/stm32mp211.dtsi b/arch/arm64/boot/dts/st/stm32mp211.dtsi index 6dd1377f3e1d8b..52a8209471b8d1 100644 --- a/arch/arm64/boot/dts/st/stm32mp211.dtsi +++ b/arch/arm64/boot/dts/st/stm32mp211.dtsi @@ -116,7 +116,7 @@ }; intc: interrupt-controller@4ac10000 { - compatible = "arm,cortex-a7-gic"; + compatible = "arm,gic-400"; reg = <0x4ac10000 0x0 0x1000>, <0x4ac20000 0x0 0x2000>, <0x4ac40000 0x0 0x2000>, From 1bc229e9bb9cd502caeec639cb3cff50aea078f0 Mon Sep 17 00:00:00 2001 From: Christian Bruel Date: Mon, 28 Apr 2025 14:07:01 +0200 Subject: [PATCH 1306/2924] arm64: dts: st: Use 128kB size for aliased GIC400 register access on stm32mp21 SoCs Adjust the size of 8kB GIC regions to 128kB so that each 4kB is mapped 16 times over a 64kB region. The offset is then adjusted in the irq-gic driver. see commit 12e14066f4835 ("irqchip/GIC: Add workaround for aliased GIC400") Fixes: 7a57b1bb1afbf ("arm64: dts: st: introduce stm32mp21 SoCs family") Suggested-by: Marc Zyngier Signed-off-by: Christian Bruel Link: https://lore.kernel.org/r/20250415111654.2103767-5-christian.bruel@foss.st.com Signed-off-by: Alexandre Torgue Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/st/stm32mp211.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/st/stm32mp211.dtsi b/arch/arm64/boot/dts/st/stm32mp211.dtsi index 52a8209471b8d1..bf888d60cd4f01 100644 --- a/arch/arm64/boot/dts/st/stm32mp211.dtsi +++ b/arch/arm64/boot/dts/st/stm32mp211.dtsi @@ -118,9 +118,9 @@ intc: interrupt-controller@4ac10000 { compatible = "arm,gic-400"; reg = <0x4ac10000 0x0 0x1000>, - <0x4ac20000 0x0 0x2000>, - <0x4ac40000 0x0 0x2000>, - <0x4ac60000 0x0 0x2000>; + <0x4ac20000 0x0 0x20000>, + <0x4ac40000 0x0 0x20000>, + <0x4ac60000 0x0 0x20000>; #interrupt-cells = <3>; interrupt-controller; }; From 3a1e1082097b8fa2e5d420de105f4cce804c38b2 Mon Sep 17 00:00:00 2001 From: Christian Bruel Date: Mon, 28 Apr 2025 14:07:02 +0200 Subject: [PATCH 1307/2924] arm64: dts: st: Adjust interrupt-controller for stm32mp23 SoCs Use gic-400 compatible and remove address-cells = <1> for aarch64 Fixes: e9b03ef21386e ("arm64: dts: st: introduce stm32mp23 SoCs family") Signed-off-by: Christian Bruel Link: https://lore.kernel.org/r/20250415111654.2103767-6-christian.bruel@foss.st.com Signed-off-by: Alexandre Torgue Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/st/stm32mp231.dtsi | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/st/stm32mp231.dtsi b/arch/arm64/boot/dts/st/stm32mp231.dtsi index 8820d219a33e6e..3f73cf1d443d43 100644 --- a/arch/arm64/boot/dts/st/stm32mp231.dtsi +++ b/arch/arm64/boot/dts/st/stm32mp231.dtsi @@ -1201,13 +1201,12 @@ }; intc: interrupt-controller@4ac10000 { - compatible = "arm,cortex-a7-gic"; + compatible = "arm,gic-400"; reg = <0x4ac10000 0x1000>, <0x4ac20000 0x2000>, <0x4ac40000 0x2000>, <0x4ac60000 0x2000>; #interrupt-cells = <3>; - #address-cells = <1>; interrupt-controller; }; }; From 2ef5c66cba6171feab05e62e1b22df970b238544 Mon Sep 17 00:00:00 2001 From: Christian Bruel Date: Mon, 28 Apr 2025 14:07:03 +0200 Subject: [PATCH 1308/2924] arm64: dts: st: Use 128kB size for aliased GIC400 register access on stm32mp23 SoCs Adjust the size of 8kB GIC regions to 128kB so that each 4kB is mapped 16 times over a 64kB region. The offset is then adjusted in the irq-gic driver. see commit 12e14066f4835 ("irqchip/GIC: Add workaround for aliased GIC400") Fixes: e9b03ef21386e ("arm64: dts: st: introduce stm32mp23 SoCs family") Suggested-by: Marc Zyngier Signed-off-by: Christian Bruel Link: https://lore.kernel.org/r/20250415111654.2103767-7-christian.bruel@foss.st.com Signed-off-by: Alexandre Torgue Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/st/stm32mp231.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/st/stm32mp231.dtsi b/arch/arm64/boot/dts/st/stm32mp231.dtsi index 3f73cf1d443d43..75697acd1345b2 100644 --- a/arch/arm64/boot/dts/st/stm32mp231.dtsi +++ b/arch/arm64/boot/dts/st/stm32mp231.dtsi @@ -1203,9 +1203,9 @@ intc: interrupt-controller@4ac10000 { compatible = "arm,gic-400"; reg = <0x4ac10000 0x1000>, - <0x4ac20000 0x2000>, - <0x4ac40000 0x2000>, - <0x4ac60000 0x2000>; + <0x4ac20000 0x20000>, + <0x4ac40000 0x20000>, + <0x4ac60000 0x20000>; #interrupt-cells = <3>; interrupt-controller; }; From 11cdb506d0fbf5ac05bf55f5afcb3a215c316490 Mon Sep 17 00:00:00 2001 From: Gary Bisson Date: Tue, 29 Apr 2025 09:16:29 -0700 Subject: [PATCH 1309/2924] Input: mtk-pmic-keys - fix possible null pointer dereference In mtk_pmic_keys_probe, the regs parameter is only set if the button is parsed in the device tree. However, on hardware where the button is left floating, that node will most likely be removed not to enable that input. In that case the code will try to dereference a null pointer. Let's use the regs struct instead as it is defined for all supported platforms. Note that it is ok setting the key reg even if that latter is disabled as the interrupt won't be enabled anyway. Fixes: b581acb49aec ("Input: mtk-pmic-keys - transfer per-key bit in mtk_pmic_keys_regs") Signed-off-by: Gary Bisson Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/mtk-pmic-keys.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/keyboard/mtk-pmic-keys.c b/drivers/input/keyboard/mtk-pmic-keys.c index 5ad6be9141603a..061d48350df661 100644 --- a/drivers/input/keyboard/mtk-pmic-keys.c +++ b/drivers/input/keyboard/mtk-pmic-keys.c @@ -147,8 +147,8 @@ static void mtk_pmic_keys_lp_reset_setup(struct mtk_pmic_keys *keys, u32 value, mask; int error; - kregs_home = keys->keys[MTK_PMIC_HOMEKEY_INDEX].regs; - kregs_pwr = keys->keys[MTK_PMIC_PWRKEY_INDEX].regs; + kregs_home = ®s->keys_regs[MTK_PMIC_HOMEKEY_INDEX]; + kregs_pwr = ®s->keys_regs[MTK_PMIC_PWRKEY_INDEX]; error = of_property_read_u32(keys->dev->of_node, "power-off-time-sec", &long_press_debounce); From e38be1c7647c8c78304ce6d931b3b654e27948b3 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Mon, 28 Apr 2025 23:43:20 +0200 Subject: [PATCH 1310/2924] sched_ext: Fix rq lock state in hotplug ops The ops.cpu_online() and ops.cpu_offline() callbacks incorrectly assume that the rq involved in the operation is locked, which is not the case during hotplug, triggering the following warning: WARNING: CPU: 1 PID: 20 at kernel/sched/sched.h:1504 handle_hotplug+0x280/0x340 Fix by not tracking the target rq as locked in the context of ops.cpu_online() and ops.cpu_offline(). Fixes: 18853ba782bef ("sched_ext: Track currently locked rq") Reported-by: Tejun Heo Signed-off-by: Andrea Righi Tested-by: Changwoo Min Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index dddb0af36f8daf..4e37b40ce280cb 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -3477,9 +3477,9 @@ static void handle_hotplug(struct rq *rq, bool online) scx_idle_update_selcpu_topology(&scx_ops); if (online && SCX_HAS_OP(cpu_online)) - SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_online, rq, cpu); + SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_online, NULL, cpu); else if (!online && SCX_HAS_OP(cpu_offline)) - SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_offline, rq, cpu); + SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_offline, NULL, cpu); else scx_ops_exit(SCX_ECODE_ACT_RESTART | SCX_ECODE_RSN_HOTPLUG, "cpu %d going %s, exiting scheduler", cpu, From 0759e77a6d9bd34a874da73721ce4a7dc6665023 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 29 Apr 2025 20:36:15 +0200 Subject: [PATCH 1311/2924] ALSA: usb-audio: Fix duplicated name in MIDI substream names The MIDI substream name string is constructed from the combination of the card shortname (which is taken from USB iProduct) and the USB iJack. The problem is that some devices put the product name to the iJack field, too. For example, aplaymidi -l output on the Lanchkey MK 49 are like: % aplaymidi -l Port Client name Port name 44:0 Launchkey MK4 49 Launchkey MK4 49 Launchkey MK4 44:1 Launchkey MK4 49 Launchkey MK4 49 Launchkey MK4 where the actual iJack name can't be seen because it's truncated due to the doubly words. For resolving those situations, this patch compares the iJack string with the card shortname, and drops if both start with the same words. Then the result becomes like: % aplaymidi -l Port Client name Port name 40:0 Launchkey MK4 49 Launchkey MK4 49 MIDI In 40:1 Launchkey MK4 49 Launchkey MK4 49 DAW In A caveat is that there are some pre-defined names for certain devices in the driver code, and this workaround shouldn't be applied to them. Similarly, when the iJack isn't specified, we should skip this check, too. The patch added those checks in addition to the string comparison. Suggested-by: Paul Davis Tested-by: Paul Davis Link: https://lore.kernel.org/CAFa_cKmEDQWcJatbYWi6A58Zg4Ma9_6Nr3k5LhqwyxC-P_kXtw@mail.gmail.com Link: https://patch.msgid.link/20250429183626.20773-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/midi.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/sound/usb/midi.c b/sound/usb/midi.c index dcdd7e9e1ae974..cfed000f243ab9 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -1885,10 +1885,18 @@ static void snd_usbmidi_init_substream(struct snd_usb_midi *umidi, } port_info = find_port_info(umidi, number); - name_format = port_info ? port_info->name : - (jack_name != default_jack_name ? "%s %s" : "%s %s %d"); - snprintf(substream->name, sizeof(substream->name), - name_format, umidi->card->shortname, jack_name, number + 1); + if (port_info || jack_name == default_jack_name || + strncmp(umidi->card->shortname, jack_name, strlen(umidi->card->shortname)) != 0) { + name_format = port_info ? port_info->name : + (jack_name != default_jack_name ? "%s %s" : "%s %s %d"); + snprintf(substream->name, sizeof(substream->name), + name_format, umidi->card->shortname, jack_name, number + 1); + } else { + /* The manufacturer included the iProduct name in the jack + * name, do not use both + */ + strscpy(substream->name, jack_name); + } *rsubstream = substream; } From e7e5ae71831c44d58627a991e603845a2fed2cab Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 25 Apr 2025 16:50:47 +0100 Subject: [PATCH 1312/2924] net: dlink: Correct endianness handling of led_mode As it's name suggests, parse_eeprom() parses EEPROM data. This is done by reading data, 16 bits at a time as follows: for (i = 0; i < 128; i++) ((__le16 *) sromdata)[i] = cpu_to_le16(read_eeprom(np, i)); sromdata is at the same memory location as psrom. And the type of psrom is a pointer to struct t_SROM. As can be seen in the loop above, data is stored in sromdata, and thus psrom, as 16-bit little-endian values. However, the integer fields of t_SROM are host byte order integers. And in the case of led_mode this leads to a little endian value being incorrectly treated as host byte order. Looking at rio_set_led_mode, this does appear to be a bug as that code masks led_mode with 0x1, 0x2 and 0x8. Logic that would be effected by a reversed byte order. This problem would only manifest on big endian hosts. Found by inspection while investigating a sparse warning regarding the crc field of t_SROM. I believe that warning is a false positive. And although I plan to send a follow-up to use little-endian types for other the integer fields of PSROM_t I do not believe that will involve any bug fixes. Compile tested only. Fixes: c3f45d322cbd ("dl2k: Add support for IP1000A-based cards") Signed-off-by: Simon Horman Link: https://patch.msgid.link/20250425-dlink-led-mode-v1-1-6bae3c36e736@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/dlink/dl2k.c | 2 +- drivers/net/ethernet/dlink/dl2k.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index d88fbecdab4b89..232e839a9d0719 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -352,7 +352,7 @@ parse_eeprom (struct net_device *dev) eth_hw_addr_set(dev, psrom->mac_addr); if (np->chip_id == CHIP_IP1000A) { - np->led_mode = psrom->led_mode; + np->led_mode = le16_to_cpu(psrom->led_mode); return 0; } diff --git a/drivers/net/ethernet/dlink/dl2k.h b/drivers/net/ethernet/dlink/dl2k.h index 195dc6cfd8955c..0e33e2eaae9606 100644 --- a/drivers/net/ethernet/dlink/dl2k.h +++ b/drivers/net/ethernet/dlink/dl2k.h @@ -335,7 +335,7 @@ typedef struct t_SROM { u16 sub_system_id; /* 0x06 */ u16 pci_base_1; /* 0x08 (IP1000A only) */ u16 pci_base_2; /* 0x0a (IP1000A only) */ - u16 led_mode; /* 0x0c (IP1000A only) */ + __le16 led_mode; /* 0x0c (IP1000A only) */ u16 reserved1[9]; /* 0x0e-0x1f */ u8 mac_addr[6]; /* 0x20-0x25 */ u8 reserved2[10]; /* 0x26-0x2f */ From b23285e93bef729e67519a5209d5b7fde3b4af50 Mon Sep 17 00:00:00 2001 From: Da Xue Date: Fri, 25 Apr 2025 15:20:09 -0400 Subject: [PATCH 1313/2924] net: mdio: mux-meson-gxl: set reversed bit when using internal phy This bit is necessary to receive packets from the internal PHY. Without this bit set, no activity occurs on the interface. Normally u-boot sets this bit, but if u-boot is compiled without net support, the interface will be up but without any activity. If bit is set once, it will work until the IP is powered down or reset. The vendor SDK sets this bit along with the PHY_ID bits. Signed-off-by: Da Xue Fixes: 9a24e1ff4326 ("net: mdio: add amlogic gxl mdio mux support") Link: https://patch.msgid.link/20250425192009.1439508-1-da@libre.computer Signed-off-by: Jakub Kicinski --- drivers/net/mdio/mdio-mux-meson-gxl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/mdio/mdio-mux-meson-gxl.c b/drivers/net/mdio/mdio-mux-meson-gxl.c index 00c66240136b10..3dd12a8c8b03e9 100644 --- a/drivers/net/mdio/mdio-mux-meson-gxl.c +++ b/drivers/net/mdio/mdio-mux-meson-gxl.c @@ -17,6 +17,7 @@ #define REG2_LEDACT GENMASK(23, 22) #define REG2_LEDLINK GENMASK(25, 24) #define REG2_DIV4SEL BIT(27) +#define REG2_REVERSED BIT(28) #define REG2_ADCBYPASS BIT(30) #define REG2_CLKINSEL BIT(31) #define ETH_REG3 0x4 @@ -65,7 +66,7 @@ static void gxl_enable_internal_mdio(struct gxl_mdio_mux *priv) * The only constraint is that it must match the one in * drivers/net/phy/meson-gxl.c to properly match the PHY. */ - writel(FIELD_PREP(REG2_PHYID, EPHY_GXL_ID), + writel(REG2_REVERSED | FIELD_PREP(REG2_PHYID, EPHY_GXL_ID), priv->regs + ETH_REG2); /* Enable the internal phy */ From 8988c4b91945173a6b5505764915d470f0238fdc Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 29 Apr 2025 15:22:18 +0100 Subject: [PATCH 1314/2924] perf tools: Fix in-source libperf build When libperf is built alone in-source, $(OUTPUT) isn't set. This causes the generated uapi path to resolve to '/../arch' which results in a permissions error: mkdir: cannot create directory '/../arch': Permission denied Fix it by removing the preceding '/..' which means that it gets generated either in the tools/lib/perf part of the tree or the OUTPUT folder. Some other rules that rely on OUTPUT further refine this conditionally depending on whether it's an in-source or out-of-source build, but I don't think we need the extra complexity here. And this rule is slightly different to others because the header is needed by both libperf and Perf. This is further complicated by the fact that Perf always passes O=... to libperf even for in source builds, meaning that OUTPUT isn't set consistently between projects. Because we're no longer going one level up to try to generate the file in the tools/ folder, Perf's include rule needs to descend into libperf. Also fix the clean rule while we're here. Reported-by: Thorsten Leemhuis Closes: https://lore.kernel.org/linux-perf-users/7703f88e-ccb7-4c98-9da4-8aad224e780f@leemhuis.info/ Fixes: bfb713ea53c7 ("perf tools: Fix arm64 build by generating unistd_64.h") Signed-off-by: James Clark Tested-by: Thorsten Leemhuis Link: https://lore.kernel.org/r/20250429-james-perf-fix-libperf-in-source-build-v1-1-a1a827ac15e5@linaro.org Signed-off-by: Namhyung Kim --- tools/lib/perf/Makefile | 6 +++--- tools/perf/Makefile.config | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile index 1a19b5013f4549..7fbb50b74c00b3 100644 --- a/tools/lib/perf/Makefile +++ b/tools/lib/perf/Makefile @@ -42,7 +42,7 @@ libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) TEST_ARGS := $(if $(V),-v) INCLUDES = \ --I$(OUTPUT)/../arch/$(SRCARCH)/include/generated/uapi \ +-I$(OUTPUT)arch/$(SRCARCH)/include/generated/uapi \ -I$(srctree)/tools/lib/perf/include \ -I$(srctree)/tools/lib/ \ -I$(srctree)/tools/include \ @@ -100,7 +100,7 @@ $(LIBAPI)-clean: $(call QUIET_CLEAN, libapi) $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null -uapi-asm := $(OUTPUT)/../arch/$(SRCARCH)/include/generated/uapi/asm +uapi-asm := $(OUTPUT)arch/$(SRCARCH)/include/generated/uapi/asm ifeq ($(SRCARCH),arm64) syscall-y := $(uapi-asm)/unistd_64.h endif @@ -130,7 +130,7 @@ all: fixdep clean: $(LIBAPI)-clean $(call QUIET_CLEAN, libperf) $(RM) $(LIBPERF_A) \ *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBPERF_VERSION) .*.d .*.cmd tests/*.o LIBPERF-CFLAGS $(LIBPERF_PC) \ - $(TESTS_STATIC) $(TESTS_SHARED) + $(TESTS_STATIC) $(TESTS_SHARED) $(syscall-y) TESTS_IN = tests-in.o diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index a52482654d4b77..b7769a22fe1afa 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -29,7 +29,7 @@ include $(srctree)/tools/scripts/Makefile.arch $(call detected_var,SRCARCH) CFLAGS += -I$(OUTPUT)arch/$(SRCARCH)/include/generated -CFLAGS += -I$(OUTPUT)arch/$(SRCARCH)/include/generated/uapi +CFLAGS += -I$(OUTPUT)libperf/arch/$(SRCARCH)/include/generated/uapi # Additional ARCH settings for ppc ifeq ($(SRCARCH),powerpc) From 4bf593be2e462623c4c34c7e3b604eb3f8f9de45 Mon Sep 17 00:00:00 2001 From: Nicolas Frattaroli Date: Tue, 29 Apr 2025 18:51:55 +0200 Subject: [PATCH 1315/2924] arm64: dts: rockchip: fix Sige5 RTC interrupt pin Someone made a typo when they added the RTC to the Sige5 DTS, which resulted in it using interrupts from GPIO0 B0 instead of GPIO0 A0. The pinctrl entry for it wasn't typoed though, curiously enough. The Sige5 v1.1 schematic was used to verify that GPIO0 A0 is the correct pin for the RTC wakeup interrupt, so let's change it to that. Fixes: 40f742b07ab2 ("arm64: dts: rockchip: Add rk3576-armsom-sige5 board") Signed-off-by: Nicolas Frattaroli Link: https://lore.kernel.org/r/20250429-sige5-rtc-oopsie-v1-1-8686767d0f1f@collabora.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts b/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts index 828bde7fab68dc..314067ba6f3c4f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts +++ b/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts @@ -610,7 +610,7 @@ reg = <0x51>; clock-output-names = "hym8563"; interrupt-parent = <&gpio0>; - interrupts = ; + interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&hym8563_int>; wakeup-source; From 8a558cbda51bef09773c72bf74a32047479110c7 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Fri, 4 Apr 2025 12:54:21 +0200 Subject: [PATCH 1316/2924] idpf: fix potential memory leak on kcalloc() failure In case of failing on rss_data->rss_key allocation the function is freeing vport without freeing earlier allocated q_vector_idxs. Fix it. Move from freeing in error branch to goto scheme. Fixes: d4d558718266 ("idpf: initialize interrupts and enable vport") Reviewed-by: Pavan Kumar Linga Reviewed-by: Aleksandr Loktionov Suggested-by: Pavan Kumar Linga Signed-off-by: Michal Swiatkowski Reviewed-by: Simon Horman Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_lib.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 730a9c7a59f2b0..82f09b4030bce2 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -1113,11 +1113,9 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter, num_max_q = max(max_q->max_txq, max_q->max_rxq); vport->q_vector_idxs = kcalloc(num_max_q, sizeof(u16), GFP_KERNEL); - if (!vport->q_vector_idxs) { - kfree(vport); + if (!vport->q_vector_idxs) + goto free_vport; - return NULL; - } idpf_vport_init(vport, max_q); /* This alloc is done separate from the LUT because it's not strictly @@ -1127,11 +1125,9 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter, */ rss_data = &adapter->vport_config[idx]->user_config.rss_data; rss_data->rss_key = kzalloc(rss_data->rss_key_size, GFP_KERNEL); - if (!rss_data->rss_key) { - kfree(vport); + if (!rss_data->rss_key) + goto free_vector_idxs; - return NULL; - } /* Initialize default rss key */ netdev_rss_key_fill((void *)rss_data->rss_key, rss_data->rss_key_size); @@ -1144,6 +1140,13 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter, adapter->next_vport = idpf_get_free_slot(adapter); return vport; + +free_vector_idxs: + kfree(vport->q_vector_idxs); +free_vport: + kfree(vport); + + return NULL; } /** From ed375b182140eeb9c73609b17939c8a29b27489e Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Thu, 10 Apr 2025 13:52:23 +0200 Subject: [PATCH 1317/2924] idpf: protect shutdown from reset Before the referenced commit, the shutdown just called idpf_remove(), this way IDPF_REMOVE_IN_PROG was protecting us from the serv_task rescheduling reset. Without this flag set the shutdown process is vulnerable to HW reset or any other triggering conditions (such as default mailbox being destroyed). When one of conditions checked in idpf_service_task becomes true, vc_event_task can be rescheduled during shutdown, this leads to accessing freed memory e.g. idpf_req_rel_vector_indexes() trying to read vport->q_vector_idxs. This in turn causes the system to become defunct during e.g. systemctl kexec. Considering using IDPF_REMOVE_IN_PROG would lead to more heavy shutdown process, instead just cancel the serv_task before cancelling adapter->serv_task before cancelling adapter->vc_event_task to ensure that reset will not be scheduled while we are doing a shutdown. Fixes: 4c9106f4906a ("idpf: fix adapter NULL pointer dereference on reboot") Reviewed-by: Michal Swiatkowski Signed-off-by: Larysa Zaremba Reviewed-by: Simon Horman Reviewed-by: Emil Tantilov Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/idpf/idpf_main.c b/drivers/net/ethernet/intel/idpf/idpf_main.c index bec4a02c53733e..b35713036a54ab 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_main.c +++ b/drivers/net/ethernet/intel/idpf/idpf_main.c @@ -89,6 +89,7 @@ static void idpf_shutdown(struct pci_dev *pdev) { struct idpf_adapter *adapter = pci_get_drvdata(pdev); + cancel_delayed_work_sync(&adapter->serv_task); cancel_delayed_work_sync(&adapter->vc_event_task); idpf_vc_core_deinit(adapter); idpf_deinit_dflt_mbx(adapter); From c7d6cb96d5c33b5148f3dc76fcd30a9b8cd9e973 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 22 Apr 2025 14:03:09 -0700 Subject: [PATCH 1318/2924] igc: fix lock order in igc_ptp_reset Commit 1a931c4f5e68 ("igc: add lock preventing multiple simultaneous PTM transactions") added a new mutex to protect concurrent PTM transactions. This lock is acquired in igc_ptp_reset() in order to ensure the PTM registers are properly disabled after a device reset. The flow where the lock is acquired already holds a spinlock, so acquiring a mutex leads to a sleep-while-locking bug, reported both by smatch, and the kernel test robot. The critical section in igc_ptp_reset() does correctly use the readx_poll_timeout_atomic variants, but the standard PTM flow uses regular sleeping variants. This makes converting the mutex to a spinlock a bit tricky. Instead, re-order the locking in igc_ptp_reset. Acquire the mutex first, and then the tmreg_lock spinlock. This is safe because there is no other ordering dependency on these locks, as this is the only place where both locks were acquired simultaneously. Indeed, any other flow acquiring locks in that order would be wrong regardless. Signed-off-by: Jacob Keller Fixes: 1a931c4f5e68 ("igc: add lock preventing multiple simultaneous PTM transactions") Link: https://lore.kernel.org/intel-wired-lan/Z_-P-Hc1yxcw0lTB@stanley.mountain/ Link: https://lore.kernel.org/intel-wired-lan/202504211511.f7738f5d-lkp@intel.com/T/#u Reviewed-by: Przemek Kitszel Reviewed-by: Vitaly Lifshits Tested-by: Mor Bar-Gabay Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ptp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 612ed26a29c5d4..efc7b30e421133 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -1290,6 +1290,8 @@ void igc_ptp_reset(struct igc_adapter *adapter) /* reset the tstamp_config */ igc_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config); + mutex_lock(&adapter->ptm_lock); + spin_lock_irqsave(&adapter->tmreg_lock, flags); switch (adapter->hw.mac.type) { @@ -1308,7 +1310,6 @@ void igc_ptp_reset(struct igc_adapter *adapter) if (!igc_is_crosststamp_supported(adapter)) break; - mutex_lock(&adapter->ptm_lock); wr32(IGC_PCIE_DIG_DELAY, IGC_PCIE_DIG_DELAY_DEFAULT); wr32(IGC_PCIE_PHY_DELAY, IGC_PCIE_PHY_DELAY_DEFAULT); @@ -1332,7 +1333,6 @@ void igc_ptp_reset(struct igc_adapter *adapter) netdev_err(adapter->netdev, "Timeout reading IGC_PTM_STAT register\n"); igc_ptm_reset(hw); - mutex_unlock(&adapter->ptm_lock); break; default: /* No work to do. */ @@ -1349,5 +1349,7 @@ void igc_ptp_reset(struct igc_adapter *adapter) out: spin_unlock_irqrestore(&adapter->tmreg_lock, flags); + mutex_unlock(&adapter->ptm_lock); + wrfl(); } From 6e0490fc36cdac696f96e57b61d93b9ae32e0f4c Mon Sep 17 00:00:00 2001 From: Chad Monroe Date: Sun, 27 Apr 2025 02:05:44 +0100 Subject: [PATCH 1319/2924] net: ethernet: mtk_eth_soc: fix SER panic with 4GB+ RAM If the mtk_poll_rx() function detects the MTK_RESETTING flag, it will jump to release_desc and refill the high word of the SDP on the 4GB RFB. Subsequently, mtk_rx_clean will process an incorrect SDP, leading to a panic. Add patch from MediaTek's SDK to resolve this. Fixes: 2d75891ebc09 ("net: ethernet: mtk_eth_soc: support 36-bit DMA addressing on MT7988") Link: https://git01.mediatek.com/plugins/gitiles/openwrt/feeds/mtk-openwrt-feeds/+/71f47ea785699c6aa3b922d66c2bdc1a43da25b1 Signed-off-by: Chad Monroe Link: https://patch.msgid.link/4adc2aaeb0fb1b9cdc56bf21cf8e7fa328daa345.1745715843.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 83068925c589d9..8fda4ce80d811d 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2248,14 +2248,18 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, ring->data[idx] = new_data; rxd->rxd1 = (unsigned int)dma_addr; release_desc: + if (MTK_HAS_CAPS(eth->soc->caps, MTK_36BIT_DMA)) { + if (unlikely(dma_addr == DMA_MAPPING_ERROR)) + addr64 = FIELD_GET(RX_DMA_ADDR64_MASK, + rxd->rxd2); + else + addr64 = RX_DMA_PREP_ADDR64(dma_addr); + } + if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) rxd->rxd2 = RX_DMA_LSO; else - rxd->rxd2 = RX_DMA_PREP_PLEN0(ring->buf_size); - - if (MTK_HAS_CAPS(eth->soc->caps, MTK_36BIT_DMA) && - likely(dma_addr != DMA_MAPPING_ERROR)) - rxd->rxd2 |= RX_DMA_PREP_ADDR64(dma_addr); + rxd->rxd2 = RX_DMA_PREP_PLEN0(ring->buf_size) | addr64; ring->calc_idx = idx; done++; From 426d487bca38b34f39c483edfc6313a036446b33 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 26 Apr 2025 17:48:55 +0300 Subject: [PATCH 1320/2924] net: dsa: felix: fix broken taprio gate states after clock jump Simplest setup to reproduce the issue: connect 2 ports of the LS1028A-RDB together (eno0 with swp0) and run: $ ip link set eno0 up && ip link set swp0 up $ tc qdisc replace dev swp0 parent root handle 100 taprio num_tc 8 \ queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 map 0 1 2 3 4 5 6 7 \ base-time 0 sched-entry S 20 300000 sched-entry S 10 200000 \ sched-entry S 20 300000 sched-entry S 48 200000 \ sched-entry S 20 300000 sched-entry S 83 200000 \ sched-entry S 40 300000 sched-entry S 00 200000 flags 2 $ ptp4l -i eno0 -f /etc/linuxptp/configs/gPTP.cfg -m & $ ptp4l -i swp0 -f /etc/linuxptp/configs/gPTP.cfg -m One will observe that the PTP state machine on swp0 starts synchronizing, then it attempts to do a clock step, and after that, it never fails to recover from the condition below. ptp4l[82.427]: selected best master clock 00049f.fffe.05f627 ptp4l[82.428]: port 1 (swp0): MASTER to UNCALIBRATED on RS_SLAVE ptp4l[83.252]: port 1 (swp0): UNCALIBRATED to SLAVE on MASTER_CLOCK_SELECTED ptp4l[83.886]: rms 4537731277 max 9075462553 freq -18518 +/- 11467 delay 818 +/- 0 ptp4l[84.170]: timed out while polling for tx timestamp ptp4l[84.171]: increasing tx_timestamp_timeout or increasing kworker priority may correct this issue, but a driver bug likely causes it ptp4l[84.172]: port 1 (swp0): send peer delay request failed ptp4l[84.173]: port 1 (swp0): clearing fault immediately ptp4l[84.269]: port 1 (swp0): SLAVE to LISTENING on INIT_COMPLETE ptp4l[85.303]: timed out while polling for tx timestamp ptp4l[84.171]: increasing tx_timestamp_timeout or increasing kworker priority may correct this issue, but a driver bug likely causes it ptp4l[84.172]: port 1 (swp0): send peer delay request failed ptp4l[84.173]: port 1 (swp0): clearing fault immediately ptp4l[84.269]: port 1 (swp0): SLAVE to LISTENING on INIT_COMPLETE ptp4l[85.303]: timed out while polling for tx timestamp ptp4l[85.304]: increasing tx_timestamp_timeout or increasing kworker priority may correct this issue, but a driver bug likely causes it ptp4l[85.305]: port 1 (swp0): send peer delay response failed ptp4l[85.306]: port 1 (swp0): clearing fault immediately ptp4l[86.304]: timed out while polling for tx timestamp A hint is given by the non-zero statistics for dropped packets which were expecting hardware TX timestamps: $ ethtool --include-statistics -T swp0 (...) Statistics: tx_pkts: 30 tx_lost: 11 tx_err: 0 We know that when PTP clock stepping takes place (from ocelot_ptp_settime64() or from ocelot_ptp_adjtime()), vsc9959_tas_clock_adjust() is called. Another interesting hint is that placing an early return in vsc9959_tas_clock_adjust(), so as to neutralize this function, fixes the issue and TX timestamps are no longer dropped. The debugging function written by me and included below is intended to read the GCL RAM, after the admin schedule became operational, through the two status registers available for this purpose: QSYS_GCL_STATUS_REG_1 and QSYS_GCL_STATUS_REG_2. static void vsc9959_print_tas_gcl(struct ocelot *ocelot) { u32 val, list_length, interval, gate_state; int i, err; err = read_poll_timeout(ocelot_read, val, !(val & QSYS_PARAM_STATUS_REG_8_CONFIG_PENDING), 10, 100000, false, ocelot, QSYS_PARAM_STATUS_REG_8); if (err) { dev_err(ocelot->dev, "Failed to wait for TAS config pending bit to clear: %pe\n", ERR_PTR(err)); return; } val = ocelot_read(ocelot, QSYS_PARAM_STATUS_REG_3); list_length = QSYS_PARAM_STATUS_REG_3_LIST_LENGTH_X(val); dev_info(ocelot->dev, "GCL length: %u\n", list_length); for (i = 0; i < list_length; i++) { ocelot_rmw(ocelot, QSYS_GCL_STATUS_REG_1_GCL_ENTRY_NUM(i), QSYS_GCL_STATUS_REG_1_GCL_ENTRY_NUM_M, QSYS_GCL_STATUS_REG_1); interval = ocelot_read(ocelot, QSYS_GCL_STATUS_REG_2); val = ocelot_read(ocelot, QSYS_GCL_STATUS_REG_1); gate_state = QSYS_GCL_STATUS_REG_1_GATE_STATE_X(val); dev_info(ocelot->dev, "GCL entry %d: states 0x%x interval %u\n", i, gate_state, interval); } } Calling it from two places: after the initial QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE performed by vsc9959_qos_port_tas_set(), and after the one done by vsc9959_tas_clock_adjust(), I notice the following difference. From the tc-taprio process context, where the schedule was initially configured, the GCL looks like this: mscc_felix 0000:00:00.5: GCL length: 8 mscc_felix 0000:00:00.5: GCL entry 0: states 0x20 interval 300000 mscc_felix 0000:00:00.5: GCL entry 1: states 0x10 interval 200000 mscc_felix 0000:00:00.5: GCL entry 2: states 0x20 interval 300000 mscc_felix 0000:00:00.5: GCL entry 3: states 0x48 interval 200000 mscc_felix 0000:00:00.5: GCL entry 4: states 0x20 interval 300000 mscc_felix 0000:00:00.5: GCL entry 5: states 0x83 interval 200000 mscc_felix 0000:00:00.5: GCL entry 6: states 0x40 interval 300000 mscc_felix 0000:00:00.5: GCL entry 7: states 0x0 interval 200000 But from the ptp4l clock stepping process context, when the vsc9959_tas_clock_adjust() hook is called, the GCL RAM of the operational schedule now looks like this: mscc_felix 0000:00:00.5: GCL length: 8 mscc_felix 0000:00:00.5: GCL entry 0: states 0x0 interval 0 mscc_felix 0000:00:00.5: GCL entry 1: states 0x0 interval 0 mscc_felix 0000:00:00.5: GCL entry 2: states 0x0 interval 0 mscc_felix 0000:00:00.5: GCL entry 3: states 0x0 interval 0 mscc_felix 0000:00:00.5: GCL entry 4: states 0x0 interval 0 mscc_felix 0000:00:00.5: GCL entry 5: states 0x0 interval 0 mscc_felix 0000:00:00.5: GCL entry 6: states 0x0 interval 0 mscc_felix 0000:00:00.5: GCL entry 7: states 0x0 interval 0 I do not have a formal explanation, just experimental conclusions. It appears that after triggering QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE for a port's TAS, the GCL entry RAM is updated anyway, despite what the documentation claims: "Specify the time interval in QSYS::GCL_CFG_REG_2.TIME_INTERVAL. This triggers the actual RAM write with the gate state and the time interval for the entry number specified". We don't touch that register (through vsc9959_tas_gcl_set()) from vsc9959_tas_clock_adjust(), yet the GCL RAM is updated anyway. It seems to be updated with effectively stale memory, which in my testing can hold a variety of things, including even pieces of the previously applied schedule, for particular schedule lengths. As such, in most circumstances it is very difficult to pinpoint this issue, because the newly updated schedule would "behave strangely", but ultimately might still pass traffic to some extent, due to some gate entries still being present in the stale GCL entry RAM. It is easy to miss. With the particular schedule given at the beginning, the GCL RAM "happens" to be reproducibly rewritten with all zeroes, and this is consistent with what we see: when the time-aware shaper has gate entries with all gates closed, traffic is dropped on TX, no wonder we can't retrieve TX timestamps. Rewriting the GCL entry RAM when reapplying the new base time fixes the observed issue. Fixes: 8670dc33f48b ("net: dsa: felix: update base time of time-aware shaper when adjusting PTP time") Reported-by: Richie Pearn Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250426144859.3128352-2-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix_vsc9959.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 940f1b71226d64..7b35d24c38d765 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1543,7 +1543,7 @@ static void vsc9959_tas_clock_adjust(struct ocelot *ocelot) struct tc_taprio_qopt_offload *taprio; struct ocelot_port *ocelot_port; struct timespec64 base_ts; - int port; + int i, port; u32 val; mutex_lock(&ocelot->fwd_domain_lock); @@ -1575,6 +1575,9 @@ static void vsc9959_tas_clock_adjust(struct ocelot *ocelot) QSYS_PARAM_CFG_REG_3_BASE_TIME_SEC_MSB_M, QSYS_PARAM_CFG_REG_3); + for (i = 0; i < taprio->num_entries; i++) + vsc9959_tas_gcl_set(ocelot, i, &taprio->entries[i]); + ocelot_rmw(ocelot, QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE, QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE, QSYS_TAS_PARAM_CFG_CTRL); From efa6eb7d77aaf5b05eed25c0ecbf7754cc325c83 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 26 Apr 2025 17:48:56 +0300 Subject: [PATCH 1321/2924] selftests: net: tsn_lib: create common helper for counting received packets This snippet will be necessary for a future isochron-based test, so provide a simpler high-level interface for counting the received packets. Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250426144859.3128352-3-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/ocelot/psfp.sh | 7 +------ tools/testing/selftests/net/forwarding/tsn_lib.sh | 11 +++++++++++ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/drivers/net/ocelot/psfp.sh b/tools/testing/selftests/drivers/net/ocelot/psfp.sh index bed748dde4b066..f96a4bc7120f39 100755 --- a/tools/testing/selftests/drivers/net/ocelot/psfp.sh +++ b/tools/testing/selftests/drivers/net/ocelot/psfp.sh @@ -272,12 +272,7 @@ run_test() "" \ "${isochron_dat}" - # Count all received packets by looking at the non-zero RX timestamps - received=$(isochron report \ - --input-file "${isochron_dat}" \ - --printf-format "%u\n" --printf-args "R" | \ - grep -w -v '0' | wc -l) - + received=$(isochron_report_num_received "${isochron_dat}") if [ "${received}" = "${expected}" ]; then RET=0 else diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh index b91bcd8008a993..19da1ccceac823 100644 --- a/tools/testing/selftests/net/forwarding/tsn_lib.sh +++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh @@ -247,3 +247,14 @@ isochron_do() cpufreq_restore ${ISOCHRON_CPU} } + +isochron_report_num_received() +{ + local isochron_dat=$1; shift + + # Count all received packets by looking at the non-zero RX timestamps + isochron report \ + --input-file "${isochron_dat}" \ + --printf-format "%u\n" --printf-args "R" | \ + grep -w -v '0' | wc -l +} From f52fe6efd61f54c5cb0e19ef1fde96cf23048a70 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 26 Apr 2025 17:48:57 +0300 Subject: [PATCH 1322/2924] selftests: net: tsn_lib: add window_size argument to isochron_do() Make out-of-band testing (send a packet when its traffic class gate is closed, expecting it to be delayed) more predictable by allowing the window size to be customized by isochron_do(). From man isochron-send, the window size alters the advance time (the delta between the transmission time of the packet, and its expected TX time when using SO_TXTIME or tc-taprio on the sender). In absence of the argument, isochron-send defaults to maximizing the advance time (making it equal to the cycle length). The default behavior is exactly what is problematic. An advance time that is too large will make packets intended to be out-of-band still be potentially in-band with an open gate from the schedule's previous cycle. We need to allow that advance time to be reduced. Perhaps a bit confusingly, isochron_do() has a shift_time argument currently, but that does not help here. The shift time shifts both the user space wakeup time and the expected TX time by equal amounts, it is unable of bringing them closer to one another. Set the window size properly for the Ocelot PSFP selftest as well. That used to work due to a very carefully chosen SHIFT_TIME_NS. I've re-tested that the test still works properly. Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250426144859.3128352-4-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/ocelot/psfp.sh | 1 + tools/testing/selftests/net/forwarding/tsn_lib.sh | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/tools/testing/selftests/drivers/net/ocelot/psfp.sh b/tools/testing/selftests/drivers/net/ocelot/psfp.sh index f96a4bc7120f39..8972f42dfe03ec 100755 --- a/tools/testing/selftests/drivers/net/ocelot/psfp.sh +++ b/tools/testing/selftests/drivers/net/ocelot/psfp.sh @@ -266,6 +266,7 @@ run_test() "${base_time}" \ "${CYCLE_TIME_NS}" \ "${SHIFT_TIME_NS}" \ + "${GATE_DURATION_NS}" \ "${NUM_PKTS}" \ "${STREAM_VID}" \ "${STREAM_PRIO}" \ diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh index 19da1ccceac823..bcee7960a39f24 100644 --- a/tools/testing/selftests/net/forwarding/tsn_lib.sh +++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh @@ -182,6 +182,7 @@ isochron_do() local base_time=$1; shift local cycle_time=$1; shift local shift_time=$1; shift + local window_size=$1; shift local num_pkts=$1; shift local vid=$1; shift local priority=$1; shift @@ -212,6 +213,10 @@ isochron_do() extra_args="${extra_args} --shift-time=${shift_time}" fi + if ! [ -z "${window_size}" ]; then + extra_args="${extra_args} --window-size=${window_size}" + fi + if [ "${use_l2}" = "true" ]; then extra_args="${extra_args} --l2 --etype=0xdead ${vid}" receiver_extra_args="--l2 --etype=0xdead" From 4eb9da050f005fbbb7d301e8e99cfdb6e4771a0d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 26 Apr 2025 17:48:58 +0300 Subject: [PATCH 1323/2924] selftests: net: tc_taprio: new test Add a forwarding path test for tc-taprio, based on isochron. This is specifically intended for NICs with an offloaded data path (switchdev/DSA) and requires taprio 'flags 2'. Also, $h1 and $h2 must support hardware timestamping, and $h1 tc-etf offload, for isochron to work. Packets received by a switch while the egress port has a taprio schedule with an open gate for the traffic class must be sent right away. Packets received by the switch while the traffic class gate must be delayed until it opens. Packets received by the switch must be dropped if the gate for the traffic class never opens. Packets should pass if the maximum SDU for the traffic class allows it, and should be dropped otherwise. The schedule should auto-update itself if clock jumps take place while taprio is installed. Repeat most of the above tests after forcing two clock jumps, one backwards (in Jan 1970) and one back into the present. Symlink it from tools/testing/selftests/drivers/net/dsa, because usually DSA ports have the same MAC address, and we need STABLE_MAC_ADDRS=yes from its forwarding.config for the test to run successfully. Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250426144859.3128352-5-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/dsa/tc_taprio.sh | 1 + .../selftests/net/forwarding/tc_taprio.sh | 421 ++++++++++++++++++ .../selftests/net/forwarding/tsn_lib.sh | 10 + 3 files changed, 432 insertions(+) create mode 120000 tools/testing/selftests/drivers/net/dsa/tc_taprio.sh create mode 100755 tools/testing/selftests/net/forwarding/tc_taprio.sh diff --git a/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh b/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh new file mode 120000 index 00000000000000..d16a65e7595d9c --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh @@ -0,0 +1 @@ +run_net_forwarding_test.sh \ No newline at end of file diff --git a/tools/testing/selftests/net/forwarding/tc_taprio.sh b/tools/testing/selftests/net/forwarding/tc_taprio.sh new file mode 100755 index 00000000000000..8992aeabfe0b43 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_taprio.sh @@ -0,0 +1,421 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" \ + test_clock_jump_backward \ + test_taprio_after_ptp \ + test_max_sdu \ + test_clock_jump_backward_forward \ +" +NUM_NETIFS=4 +source tc_common.sh +source lib.sh +source tsn_lib.sh + +require_command python3 + +# The test assumes the usual topology from the README, where h1 is connected to +# swp1, h2 to swp2, and swp1 and swp2 are together in a bridge. +# Additional assumption: h1 and h2 use the same PHC, and so do swp1 and swp2. +# By synchronizing h1 to swp1 via PTP, h2 is also implicitly synchronized to +# swp1 (and both to CLOCK_REALTIME). +h1=${NETIFS[p1]} +swp1=${NETIFS[p2]} +swp2=${NETIFS[p3]} +h2=${NETIFS[p4]} + +UDS_ADDRESS_H1="/var/run/ptp4l_h1" +UDS_ADDRESS_SWP1="/var/run/ptp4l_swp1" + +H1_IPV4="192.0.2.1" +H2_IPV4="192.0.2.2" +H1_IPV6="2001:db8:1::1" +H2_IPV6="2001:db8:1::2" + +# Tunables +NUM_PKTS=100 +STREAM_VID=10 +STREAM_PRIO_1=6 +STREAM_PRIO_2=5 +STREAM_PRIO_3=4 +# PTP uses TC 0 +ALL_GATES=$((1 << 0 | 1 << STREAM_PRIO_1 | 1 << STREAM_PRIO_2)) +# Use a conservative cycle of 10 ms to allow the test to still pass when the +# kernel has some extra overhead like lockdep etc +CYCLE_TIME_NS=10000000 +# Create two Gate Control List entries, one OPEN and one CLOSE, of equal +# durations +GATE_DURATION_NS=$((CYCLE_TIME_NS / 2)) +# Give 2/3 of the cycle time to user space and 1/3 to the kernel +FUDGE_FACTOR=$((CYCLE_TIME_NS / 3)) +# Shift the isochron base time by half the gate time, so that packets are +# always received by swp1 close to the middle of the time slot, to minimize +# inaccuracies due to network sync +SHIFT_TIME_NS=$((GATE_DURATION_NS / 2)) + +path_delay= + +h1_create() +{ + simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h1_destroy() +{ + simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h2_create() +{ + simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +h2_destroy() +{ + simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +switch_create() +{ + local h2_mac_addr=$(mac_get $h2) + + ip link set $swp1 up + ip link set $swp2 up + + ip link add br0 type bridge vlan_filtering 1 + ip link set $swp1 master br0 + ip link set $swp2 master br0 + ip link set br0 up + + bridge vlan add dev $swp2 vid $STREAM_VID + bridge vlan add dev $swp1 vid $STREAM_VID + bridge fdb add dev $swp2 \ + $h2_mac_addr vlan $STREAM_VID static master +} + +switch_destroy() +{ + ip link del br0 +} + +ptp_setup() +{ + # Set up swp1 as a master PHC for h1, synchronized to the local + # CLOCK_REALTIME. + phc2sys_start $UDS_ADDRESS_SWP1 + ptp4l_start $h1 true $UDS_ADDRESS_H1 + ptp4l_start $swp1 false $UDS_ADDRESS_SWP1 +} + +ptp_cleanup() +{ + ptp4l_stop $swp1 + ptp4l_stop $h1 + phc2sys_stop +} + +txtime_setup() +{ + local if_name=$1 + + tc qdisc add dev $if_name clsact + # Classify PTP on TC 7 and isochron on TC 6 + tc filter add dev $if_name egress protocol 0x88f7 \ + flower action skbedit priority 7 + tc filter add dev $if_name egress protocol 802.1Q \ + flower vlan_ethtype 0xdead action skbedit priority 6 + tc qdisc add dev $if_name handle 100: parent root mqprio num_tc 8 \ + queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \ + map 0 1 2 3 4 5 6 7 \ + hw 1 + # Set up TC 5, 6, 7 for SO_TXTIME. tc-mqprio queues count from 1. + tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_1 + 1)) etf \ + clockid CLOCK_TAI offload delta $FUDGE_FACTOR + tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_2 + 1)) etf \ + clockid CLOCK_TAI offload delta $FUDGE_FACTOR + tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_3 + 1)) etf \ + clockid CLOCK_TAI offload delta $FUDGE_FACTOR +} + +txtime_cleanup() +{ + local if_name=$1 + + tc qdisc del dev $if_name clsact + tc qdisc del dev $if_name root +} + +taprio_replace() +{ + local if_name="$1"; shift + local extra_args="$1"; shift + + # STREAM_PRIO_1 always has an open gate. + # STREAM_PRIO_2 has a gate open for GATE_DURATION_NS (half the cycle time) + # STREAM_PRIO_3 always has a closed gate. + tc qdisc replace dev $if_name root stab overhead 24 taprio num_tc 8 \ + queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \ + map 0 1 2 3 4 5 6 7 \ + sched-entry S $(printf "%x" $ALL_GATES) $GATE_DURATION_NS \ + sched-entry S $(printf "%x" $((ALL_GATES & ~(1 << STREAM_PRIO_2)))) $GATE_DURATION_NS \ + base-time 0 flags 0x2 $extra_args + taprio_wait_for_admin $if_name +} + +taprio_cleanup() +{ + local if_name=$1 + + tc qdisc del dev $if_name root +} + +probe_path_delay() +{ + local isochron_dat="$(mktemp)" + local received + + log_info "Probing path delay" + + isochron_do "$h1" "$h2" "$UDS_ADDRESS_H1" "" 0 \ + "$CYCLE_TIME_NS" "" "" "$NUM_PKTS" \ + "$STREAM_VID" "$STREAM_PRIO_1" "" "$isochron_dat" + + received=$(isochron_report_num_received "$isochron_dat") + if [ "$received" != "$NUM_PKTS" ]; then + echo "Cannot establish basic data path between $h1 and $h2" + exit $ksft_fail + fi + + printf "pdelay = {}\n" > isochron_data.py + isochron report --input-file "$isochron_dat" \ + --printf-format "pdelay[%u] = %d - %d\n" \ + --printf-args "qRT" \ + >> isochron_data.py + cat <<-'EOF' > isochron_postprocess.py + #!/usr/bin/env python3 + + from isochron_data import pdelay + import numpy as np + + w = np.array(list(pdelay.values())) + print("{}".format(np.max(w))) + EOF + path_delay=$(python3 ./isochron_postprocess.py) + + log_info "Path delay from $h1 to $h2 estimated at $path_delay ns" + + if [ "$path_delay" -gt "$GATE_DURATION_NS" ]; then + echo "Path delay larger than gate duration, aborting" + exit $ksft_fail + fi + + rm -f ./isochron_data.py 2> /dev/null + rm -f ./isochron_postprocess.py 2> /dev/null + rm -f "$isochron_dat" 2> /dev/null +} + +setup_prepare() +{ + vrf_prepare + + h1_create + h2_create + switch_create + + txtime_setup $h1 + + # Temporarily set up PTP just to probe the end-to-end path delay. + ptp_setup + probe_path_delay + ptp_cleanup +} + +cleanup() +{ + pre_cleanup + + isochron_recv_stop + txtime_cleanup $h1 + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +run_test() +{ + local base_time=$1; shift + local stream_prio=$1; shift + local expected_delay=$1; shift + local should_fail=$1; shift + local test_name=$1; shift + local isochron_dat="$(mktemp)" + local received + local median_delay + + RET=0 + + # Set the shift time equal to the cycle time, which effectively + # cancels the default advance time. Packets won't be sent early in + # software, which ensures that they won't prematurely enter through + # the open gate in __test_out_of_band(). Also, the gate is open for + # long enough that this won't cause a problem in __test_in_band(). + isochron_do "$h1" "$h2" "$UDS_ADDRESS_H1" "" "$base_time" \ + "$CYCLE_TIME_NS" "$SHIFT_TIME_NS" "$GATE_DURATION_NS" \ + "$NUM_PKTS" "$STREAM_VID" "$stream_prio" "" "$isochron_dat" + + received=$(isochron_report_num_received "$isochron_dat") + [ "$received" = "$NUM_PKTS" ] + check_err_fail $should_fail $? "Reception of $NUM_PKTS packets" + + if [ $should_fail = 0 ] && [ "$received" = "$NUM_PKTS" ]; then + printf "pdelay = {}\n" > isochron_data.py + isochron report --input-file "$isochron_dat" \ + --printf-format "pdelay[%u] = %d - %d\n" \ + --printf-args "qRT" \ + >> isochron_data.py + cat <<-'EOF' > isochron_postprocess.py + #!/usr/bin/env python3 + + from isochron_data import pdelay + import numpy as np + + w = np.array(list(pdelay.values())) + print("{}".format(int(np.median(w)))) + EOF + median_delay=$(python3 ./isochron_postprocess.py) + + # If the condition below is true, packets were delayed by a closed gate + [ "$median_delay" -gt $((path_delay + expected_delay)) ] + check_fail $? "Median delay $median_delay is greater than expected delay $expected_delay plus path delay $path_delay" + + # If the condition below is true, packets were sent expecting them to + # hit a closed gate in the switch, but were not delayed + [ "$expected_delay" -gt 0 ] && [ "$median_delay" -lt "$expected_delay" ] + check_fail $? "Median delay $median_delay is less than expected delay $expected_delay" + fi + + log_test "$test_name" + + rm -f ./isochron_data.py 2> /dev/null + rm -f ./isochron_postprocess.py 2> /dev/null + rm -f "$isochron_dat" 2> /dev/null +} + +__test_always_open() +{ + run_test 0.000000000 $STREAM_PRIO_1 0 0 "Gate always open" +} + +__test_always_closed() +{ + run_test 0.000000000 $STREAM_PRIO_3 0 1 "Gate always closed" +} + +__test_in_band() +{ + # Send packets in-band with the OPEN gate entry + run_test 0.000000000 $STREAM_PRIO_2 0 0 "In band with gate" +} + +__test_out_of_band() +{ + # Send packets in-band with the CLOSE gate entry + run_test 0.005000000 $STREAM_PRIO_2 \ + $((GATE_DURATION_NS - SHIFT_TIME_NS)) 0 \ + "Out of band with gate" +} + +run_subtests() +{ + __test_always_open + __test_always_closed + __test_in_band + __test_out_of_band +} + +test_taprio_after_ptp() +{ + log_info "Setting up taprio after PTP" + ptp_setup + taprio_replace $swp2 + run_subtests + taprio_cleanup $swp2 + ptp_cleanup +} + +__test_under_max_sdu() +{ + # Limit max-sdu for STREAM_PRIO_1 + taprio_replace "$swp2" "max-sdu 0 0 0 0 0 0 100 0" + run_test 0.000000000 $STREAM_PRIO_1 0 0 "Under maximum SDU" +} + +__test_over_max_sdu() +{ + # Limit max-sdu for STREAM_PRIO_1 + taprio_replace "$swp2" "max-sdu 0 0 0 0 0 0 20 0" + run_test 0.000000000 $STREAM_PRIO_1 0 1 "Over maximum SDU" +} + +test_max_sdu() +{ + ptp_setup + __test_under_max_sdu + __test_over_max_sdu + taprio_cleanup $swp2 + ptp_cleanup +} + +# Perform a clock jump in the past without synchronization running, so that the +# time base remains where it was set by phc_ctl. +test_clock_jump_backward() +{ + # This is a more complex schedule specifically crafted in a way that + # has been problematic on NXP LS1028A. Not much to test with it other + # than the fact that it passes traffic. + tc qdisc replace dev $swp2 root stab overhead 24 taprio num_tc 8 \ + queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 map 0 1 2 3 4 5 6 7 \ + base-time 0 sched-entry S 20 300000 sched-entry S 10 200000 \ + sched-entry S 20 300000 sched-entry S 48 200000 \ + sched-entry S 20 300000 sched-entry S 83 200000 \ + sched-entry S 40 300000 sched-entry S 00 200000 flags 2 + + log_info "Forcing a backward clock jump" + phc_ctl $swp1 set 0 + + ping_test $h1 192.0.2.2 + taprio_cleanup $swp2 +} + +# Test that taprio tolerates clock jumps. +# Since ptp4l and phc2sys are running, it is expected for the time to +# eventually recover (through yet another clock jump). Isochron waits +# until that is the case. +test_clock_jump_backward_forward() +{ + log_info "Forcing a backward and a forward clock jump" + taprio_replace $swp2 + phc_ctl $swp1 set 0 + ptp_setup + ping_test $h1 192.0.2.2 + run_subtests + ptp_cleanup + taprio_cleanup $swp2 +} + +tc_offload_check +if [[ $? -ne 0 ]]; then + log_test_skip "Could not test offloaded functionality" + exit $EXIT_STATUS +fi + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh index bcee7960a39f24..08c044ff6689d4 100644 --- a/tools/testing/selftests/net/forwarding/tsn_lib.sh +++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh @@ -2,6 +2,8 @@ # SPDX-License-Identifier: GPL-2.0 # Copyright 2021-2022 NXP +tc_testing_scripts_dir=$(dirname $0)/../../tc-testing/scripts + REQUIRE_ISOCHRON=${REQUIRE_ISOCHRON:=yes} REQUIRE_LINUXPTP=${REQUIRE_LINUXPTP:=yes} @@ -18,6 +20,7 @@ fi if [[ "$REQUIRE_LINUXPTP" = "yes" ]]; then require_command phc2sys require_command ptp4l + require_command phc_ctl fi phc2sys_start() @@ -263,3 +266,10 @@ isochron_report_num_received() --printf-format "%u\n" --printf-args "R" | \ grep -w -v '0' | wc -l } + +taprio_wait_for_admin() +{ + local if_name="$1"; shift + + "$tc_testing_scripts_dir/taprio_wait_for_admin.sh" "$(which tc)" "$if_name" +} From b936a9b8d4a585ccb6d454921c36286bfe63e01d Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 26 Apr 2025 17:32:09 +0200 Subject: [PATCH 1324/2924] net: ipv6: fix UDPv6 GSO segmentation with NAT If any address or port is changed, update it in all packets and recalculate checksum. Fixes: 9fd1ff5d2ac7 ("udp: Support UDP fraglist GRO/GSO.") Signed-off-by: Felix Fietkau Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250426153210.14044-1-nbd@nbd.name Signed-off-by: Jakub Kicinski --- net/ipv4/udp_offload.c | 61 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 2c0725583be39f..9a8142ccbabe44 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -247,6 +247,62 @@ static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs) return segs; } +static void __udpv6_gso_segment_csum(struct sk_buff *seg, + struct in6_addr *oldip, + const struct in6_addr *newip, + __be16 *oldport, __be16 newport) +{ + struct udphdr *uh = udp_hdr(seg); + + if (ipv6_addr_equal(oldip, newip) && *oldport == newport) + return; + + if (uh->check) { + inet_proto_csum_replace16(&uh->check, seg, oldip->s6_addr32, + newip->s6_addr32, true); + + inet_proto_csum_replace2(&uh->check, seg, *oldport, newport, + false); + if (!uh->check) + uh->check = CSUM_MANGLED_0; + } + + *oldip = *newip; + *oldport = newport; +} + +static struct sk_buff *__udpv6_gso_segment_list_csum(struct sk_buff *segs) +{ + const struct ipv6hdr *iph; + const struct udphdr *uh; + struct ipv6hdr *iph2; + struct sk_buff *seg; + struct udphdr *uh2; + + seg = segs; + uh = udp_hdr(seg); + iph = ipv6_hdr(seg); + uh2 = udp_hdr(seg->next); + iph2 = ipv6_hdr(seg->next); + + if (!(*(const u32 *)&uh->source ^ *(const u32 *)&uh2->source) && + ipv6_addr_equal(&iph->saddr, &iph2->saddr) && + ipv6_addr_equal(&iph->daddr, &iph2->daddr)) + return segs; + + while ((seg = seg->next)) { + uh2 = udp_hdr(seg); + iph2 = ipv6_hdr(seg); + + __udpv6_gso_segment_csum(seg, &iph2->saddr, &iph->saddr, + &uh2->source, uh->source); + __udpv6_gso_segment_csum(seg, &iph2->daddr, &iph->daddr, + &uh2->dest, uh->dest); + } + + return segs; +} + static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb, netdev_features_t features, bool is_ipv6) @@ -259,7 +315,10 @@ static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb, udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss); - return is_ipv6 ? skb : __udpv4_gso_segment_list_csum(skb); + if (is_ipv6) + return __udpv6_gso_segment_list_csum(skb); + else + return __udpv4_gso_segment_list_csum(skb); } struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, From 95b2536137eeb66f20947e0fb0d0c100c8d6a140 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 29 Apr 2025 09:35:19 +0200 Subject: [PATCH 1325/2924] ASoC: Intel: catpt: avoid type mismatch in dev_dbg() format Depending on the architecture __ffs() returns either an 'unsigned long' or 'unsigned int' result. Compile-testing this driver on targets that use the latter produces a warning: sound/soc/intel/catpt/dsp.c: In function 'catpt_dsp_set_srampge': sound/soc/intel/catpt/dsp.c:181:44: error: format '%ld' expects argument of type 'long int', but argument 4 has type 'u32' {aka 'unsigned int'} [-Werror=format=] 181 | dev_dbg(cdev->dev, "sanitize block %ld: off 0x%08x\n", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Change the type of the local variable to match the format string and avoid the warning on any architecture. Signed-off-by: Arnd Bergmann Acked-by: Cezary Rojewski Link: https://patch.msgid.link/20250429073545.3558494-1-arnd@kernel.org Signed-off-by: Mark Brown --- sound/soc/intel/catpt/dsp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/catpt/dsp.c b/sound/soc/intel/catpt/dsp.c index 5993819cc58a26..008a20a2acbda7 100644 --- a/sound/soc/intel/catpt/dsp.c +++ b/sound/soc/intel/catpt/dsp.c @@ -156,7 +156,7 @@ static void catpt_dsp_set_srampge(struct catpt_dev *cdev, struct resource *sram, { unsigned long old; u32 off = sram->start; - u32 b = __ffs(mask); + unsigned long b = __ffs(mask); old = catpt_readl_pci(cdev, VDRTCTL0) & mask; dev_dbg(cdev->dev, "SRAMPGE [0x%08lx] 0x%08lx -> 0x%08lx", From 36fd6275818e93d5bc44140d546bf2a45e88feee Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Mon, 28 Apr 2025 09:30:55 +0200 Subject: [PATCH 1326/2924] spi: spi-qpic-snand: fix NAND_READ_LOCATION_2 register handling The precomputed value for the NAND_READ_LOCATION_2 register should be stored in 'snandc->regs->read_location2'. Fix the qcom_spi_set_read_loc_first() function accordingly. Fixes: 7304d1909080 ("spi: spi-qpic: add driver for QCOM SPI NAND flash Interface") Signed-off-by: Gabor Juhos Reviewed-by: Md Sadre Alam Link: https://patch.msgid.link/20250428-qpic-snand-readloc2-fix-v1-1-50ce0877ff72@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-qpic-snand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-qpic-snand.c b/drivers/spi/spi-qpic-snand.c index ae32c452d0bcf8..94948c8781e83f 100644 --- a/drivers/spi/spi-qpic-snand.c +++ b/drivers/spi/spi-qpic-snand.c @@ -142,7 +142,7 @@ static void qcom_spi_set_read_loc_first(struct qcom_nand_controller *snandc, else if (reg == NAND_READ_LOCATION_1) snandc->regs->read_location1 = locreg_val; else if (reg == NAND_READ_LOCATION_2) - snandc->regs->read_location1 = locreg_val; + snandc->regs->read_location2 = locreg_val; else if (reg == NAND_READ_LOCATION_3) snandc->regs->read_location3 = locreg_val; } From 9fcd53c3206618166facf03ee3d465f66a107e01 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Tue, 29 Apr 2025 07:50:56 +0000 Subject: [PATCH 1327/2924] erofs: remove unused enum type Opt_err is not used in EROFS, we can remove it. Signed-off-by: Hongbo Li Reviewed-by: Gao Xiang Link: https://lore.kernel.org/r/20250429075056.689570-1-lihongbo22@huawei.com Signed-off-by: Gao Xiang --- fs/erofs/super.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index cadec6b1b55451..da6ee7c39290d5 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -357,7 +357,6 @@ static void erofs_default_options(struct erofs_sb_info *sbi) enum { Opt_user_xattr, Opt_acl, Opt_cache_strategy, Opt_dax, Opt_dax_enum, Opt_device, Opt_fsid, Opt_domain_id, Opt_directio, - Opt_err }; static const struct constant_table erofs_param_cache_strategy[] = { From 4d5b71b487291da9f92e352c0a7e39f256d60db8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 30 Apr 2025 07:31:41 +0200 Subject: [PATCH 1328/2924] ALSA: hda/realtek: Fix built-mic regression on other ASUS models A few ASUS models use the ALC256_FIXUP_ASUS_HEADSET_MODE although they have no built-in mic pin on NID 0x13, as found in the commit c1732ede5e80 ("ALSA: hda/realtek - Fix headset and mic on several Asus laptops with ALC256"). This was relatively harmless in the past as NID 0x13 was assigned as the secondary mic. But since the fix for the pin sort order, this pin became the primary one, hence user started noticing the broken input, and we've fixed already for a few ASUS models to switch to ALC256_FIXUP_ASUS_MIC_NO_PRESENCE. This patch corrects the other ASUS models to use the right quirk entry for fixing the built-in mic regression. Here we cover X541SA (1043:12e0), X541UV (1043:12f0), Z550SA (1043:13bf0) and X555UB (1043:1ccd). Fixes: 3b4309546b48 ("ALSA: hda: Fix headset detection failure due to unstable sort") Link: https://bugzilla.kernel.org/show_bug.cgi?id=220058 Link: https://patch.msgid.link/20250430053210.31776-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 1799203af35a85..7810d5f9b5aac7 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10910,10 +10910,10 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x12a3, "Asus N7691ZM", ALC269_FIXUP_ASUS_N7601ZM), SND_PCI_QUIRK(0x1043, 0x12af, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x12b4, "ASUS B3405CCA / P3405CCA", ALC294_FIXUP_ASUS_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x12e0, "ASUS X541SA", ALC256_FIXUP_ASUS_MIC), - SND_PCI_QUIRK(0x1043, 0x12f0, "ASUS X541UV", ALC256_FIXUP_ASUS_MIC), + SND_PCI_QUIRK(0x1043, 0x12e0, "ASUS X541SA", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1043, 0x12f0, "ASUS X541UV", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1313, "Asus K42JZ", ALC269VB_FIXUP_ASUS_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1043, 0x13b0, "ASUS Z550SA", ALC256_FIXUP_ASUS_MIC), + SND_PCI_QUIRK(0x1043, 0x13b0, "ASUS Z550SA", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1427, "Asus Zenbook UX31E", ALC269VB_FIXUP_ASUS_ZENBOOK), SND_PCI_QUIRK(0x1043, 0x1433, "ASUS GX650PY/PZ/PV/PU/PYV/PZV/PIV/PVV", ALC285_FIXUP_ASUS_I2C_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1460, "Asus VivoBook 15", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), @@ -10967,7 +10967,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1c92, "ASUS ROG Strix G15", ALC285_FIXUP_ASUS_G533Z_PINS), SND_PCI_QUIRK(0x1043, 0x1c9f, "ASUS G614JU/JV/JI", ALC285_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1caf, "ASUS G634JY/JZ/JI/JG", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), - SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC), + SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1ccf, "ASUS G814JU/JV/JI", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1cdf, "ASUS G814JY/JZ/JG", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1cef, "ASUS G834JY/JZ/JI/JG", ALC285_FIXUP_ASUS_HEADSET_MIC), From 417fae2c40896f0a67ce7fa7d9b8c6056ec36dd9 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 25 Apr 2025 16:32:55 +0200 Subject: [PATCH 1329/2924] xfrm: ipcomp: fix truesize computation on receive ipcomp_post_acomp currently drops all frags (via pskb_trim_unique(skb, 0)), and then subtracts the old skb->data_len from truesize. This adjustment has already be done during trimming (in skb_condense), so we don't need to do it again. This shows up for example when running fragmented traffic over ipcomp, we end up hitting the WARN_ON_ONCE in skb_try_coalesce. Fixes: eb2953d26971 ("xfrm: ipcomp: Use crypto_acomp interface") Signed-off-by: Sabrina Dubroca Acked-by: Herbert Xu Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_ipcomp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index 0c14205343941d..907c3ccb440dab 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -48,7 +48,6 @@ static int ipcomp_post_acomp(struct sk_buff *skb, int err, int hlen) { struct acomp_req *req = ipcomp_cb(skb)->req; struct ipcomp_req_extra *extra; - const int plen = skb->data_len; struct scatterlist *dsg; int len, dlen; @@ -64,7 +63,7 @@ static int ipcomp_post_acomp(struct sk_buff *skb, int err, int hlen) /* Only update truesize on input. */ if (!hlen) - skb->truesize += dlen - plen; + skb->truesize += dlen; skb->data_len = dlen; skb->len += dlen; From 3e6244429ba38f8dee3336b8b805948276b281ab Mon Sep 17 00:00:00 2001 From: Ze Huang Date: Mon, 28 Apr 2025 17:24:36 +0800 Subject: [PATCH 1330/2924] riscv: dts: sophgo: fix DMA data-width configuration for CV18xx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "snps,data-width" property[1] defines the AXI data width of the DMA controller as: width = 8 × (2^n) bits (0 = 8 bits, 1 = 16 bits, 2 = 32 bits, ..., 6 = 512 bits) where "n" is the value of "snps,data-width". For the CV18xx DMA controller, the correct AXI data width is 32 bits, corresponding to "snps,data-width = 2". Test results on Milkv Duo S can be found here [2]. Link: https://github.com/torvalds/linux/blob/master/Documentation/devicetree/bindings/dma/snps%2Cdw-axi-dmac.yaml#L74 [1] Link: https://gist.github.com/Sutter099/4fa99bb2d89e5af975983124704b3861 [2] Fixes: 514951a81a5e ("riscv: dts: sophgo: cv18xx: add DMA controller") Co-developed-by: Yu Yuan Signed-off-by: Yu Yuan Signed-off-by: Ze Huang Link: https://lore.kernel.org/r/20250428-duo-dma-config-v1-1-eb6ad836ca42@whut.edu.cn Signed-off-by: Inochi Amaoto Signed-off-by: Chen Wang Signed-off-by: Chen Wang --- arch/riscv/boot/dts/sophgo/cv18xx.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/boot/dts/sophgo/cv18xx.dtsi b/arch/riscv/boot/dts/sophgo/cv18xx.dtsi index c18822ec849f35..58cd546392e056 100644 --- a/arch/riscv/boot/dts/sophgo/cv18xx.dtsi +++ b/arch/riscv/boot/dts/sophgo/cv18xx.dtsi @@ -341,7 +341,7 @@ 1024 1024 1024 1024>; snps,priority = <0 1 2 3 4 5 6 7>; snps,dma-masters = <2>; - snps,data-width = <4>; + snps,data-width = <2>; status = "disabled"; }; From c4eb2f88d2796ab90c5430e11c48709716181364 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Fri, 25 Apr 2025 11:28:22 +0200 Subject: [PATCH 1331/2924] accel/ivpu: Increase state dump msg timeout Increase JMS message state dump command timeout to 100 ms. On some platforms, the FW may take a bit longer than 50 ms to dump its state to the log buffer and we don't want to miss any debug info during TDR. Fixes: 5e162f872d7a ("accel/ivpu: Add FW state dump on TDR") Cc: stable@vger.kernel.org # v6.13+ Reviewed-by: Jeff Hugo Signed-off-by: Jacek Lawrynowicz Link: https://lore.kernel.org/r/20250425092822.2194465-1-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c index ec9a3629da3a92..633160470c939f 100644 --- a/drivers/accel/ivpu/ivpu_hw.c +++ b/drivers/accel/ivpu/ivpu_hw.c @@ -119,7 +119,7 @@ static void timeouts_init(struct ivpu_device *vdev) else vdev->timeout.autosuspend = 100; vdev->timeout.d0i3_entry_msg = 5; - vdev->timeout.state_dump_msg = 10; + vdev->timeout.state_dump_msg = 100; } } From f2ecc700d1ef53127c14a3463442a8ac2a6a5cf5 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Fri, 25 Apr 2025 11:33:40 +0200 Subject: [PATCH 1332/2924] accel/ivpu: Fix pm related deadlocks in cmdq ioctls Fix deadlocks in ivpu_cmdq_create_ioctl() and ivpu_cmdq_destroy_ioctl() related to runtime suspend. Runtime suspend acquires file_priv->lock mutex by calling ivpu_cmdq_reset_all_contexts(). The same lock is acquired in the cmdq ioctls. If one of the cmdq ioctls is called while runtime suspend is in progress, it can lead to a deadlock. Call stacks from example deadlock below. Runtime suspend thread: [ 3443.179717] Call Trace: [ 3443.179724] __schedule+0x4b6/0x16b0 [ 3443.179732] ? __mod_timer+0x27d/0x3a0 [ 3443.179738] schedule+0x2f/0x140 [ 3443.179741] schedule_preempt_disabled+0x19/0x30 [ 3443.179743] __mutex_lock.constprop.0+0x335/0x7d0 [ 3443.179745] ? xas_find+0x1ed/0x260 [ 3443.179747] ? xa_find+0x8e/0xf0 [ 3443.179749] __mutex_lock_slowpath+0x13/0x20 [ 3443.179751] mutex_lock+0x41/0x60 [ 3443.179757] ivpu_cmdq_reset_all_contexts+0x82/0x150 [intel_vpu a9bd091a97f28f0235f161316b29f8234f437295] [ 3443.179786] ivpu_pm_runtime_suspend_cb+0x1f1/0x3f0 [intel_vpu a9bd091a97f28f0235f161316b29f8234f437295] [ 3443.179850] pci_pm_runtime_suspend+0x6e/0x1f0 [ 3443.179870] ? __pfx_pci_pm_runtime_suspend+0x10/0x10 [ 3443.179886] __rpm_callback+0x48/0x130 [ 3443.179899] rpm_callback+0x64/0x70 [ 3443.179911] rpm_suspend+0x12c/0x630 [ 3443.179922] ? __schedule+0x4be/0x16b0 [ 3443.179941] pm_runtime_work+0xca/0xf0 [ 3443.179955] process_one_work+0x188/0x3d0 [ 3443.179971] worker_thread+0x2b9/0x3c0 [ 3443.179984] kthread+0xfb/0x220 [ 3443.180001] ? __pfx_worker_thread+0x10/0x10 [ 3443.180013] ? __pfx_kthread+0x10/0x10 [ 3443.180029] ret_from_fork+0x47/0x70 [ 3443.180044] ? __pfx_kthread+0x10/0x10 [ 3443.180059] ret_from_fork_asm+0x1a/0x30 User space thread: [ 3443.180128] Call Trace: [ 3443.180138] __schedule+0x4b6/0x16b0 [ 3443.180159] schedule+0x2f/0x140 [ 3443.180163] rpm_resume+0x1a7/0x6a0 [ 3443.180165] ? __pfx_autoremove_wake_function+0x10/0x10 [ 3443.180169] __pm_runtime_resume+0x56/0x90 [ 3443.180171] ivpu_rpm_get+0x28/0xb0 [intel_vpu a9bd091a97f28f0235f161316b29f8234f437295] [ 3443.180181] ivpu_ipc_send_receive+0x6d/0x120 [intel_vpu a9bd091a97f28f0235f161316b29f8234f437295] [ 3443.180193] ? free_frozen_pages+0x395/0x670 [ 3443.180199] ? __free_pages+0xa7/0xc0 [ 3443.180202] ivpu_jsm_hws_destroy_cmdq+0x76/0xf0 [intel_vpu a9bd091a97f28f0235f161316b29f8234f437295] [ 3443.180213] ? locks_dispose_list+0x6c/0xa0 [ 3443.180219] ? kmem_cache_free+0x342/0x470 [ 3443.180222] ? vm_area_free+0x19/0x30 [ 3443.180225] ? xas_load+0x17/0xf0 [ 3443.180229] ? xa_load+0x72/0xb0 [ 3443.180230] ivpu_cmdq_unregister.isra.0+0xb1/0x100 [intel_vpu a9bd091a97f28f0235f161316b29f8234f437295] [ 3443.180241] ivpu_cmdq_destroy_ioctl+0x8d/0x130 [intel_vpu a9bd091a97f28f0235f161316b29f8234f437295] [ 3443.180251] ? __pfx_ivpu_cmdq_destroy_ioctl+0x10/0x10 [intel_vpu a9bd091a97f28f0235f161316b29f8234f437295] [ 3443.180260] drm_ioctl_kernel+0xb3/0x110 [ 3443.180265] drm_ioctl+0x2ca/0x580 [ 3443.180266] ? __pfx_ivpu_cmdq_destroy_ioctl+0x10/0x10 [intel_vpu a9bd091a97f28f0235f161316b29f8234f437295] [ 3443.180275] ? __fput+0x1ae/0x2f0 [ 3443.180279] ? kmem_cache_free+0x342/0x470 [ 3443.180282] __x64_sys_ioctl+0xa9/0xe0 [ 3443.180286] x64_sys_call+0x13b7/0x26f0 [ 3443.180289] do_syscall_64+0x62/0x180 [ 3443.180291] entry_SYSCALL_64_after_hwframe+0x71/0x79 Fixes: 465a3914b254 ("accel/ivpu: Add API for command queue create/destroy/submit") Reviewed-by: Jeff Hugo Signed-off-by: Jacek Lawrynowicz Link: https://lore.kernel.org/r/20250425093341.2202895-1-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_job.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index 863e3cd6ace51e..e17b3deda2012c 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -874,15 +874,21 @@ int ivpu_cmdq_submit_ioctl(struct drm_device *dev, void *data, struct drm_file * int ivpu_cmdq_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; struct drm_ivpu_cmdq_create *args = data; struct ivpu_cmdq *cmdq; + int ret; - if (!ivpu_is_capable(file_priv->vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) + if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) return -ENODEV; if (args->priority > DRM_IVPU_JOB_PRIORITY_REALTIME) return -EINVAL; + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + mutex_lock(&file_priv->lock); cmdq = ivpu_cmdq_create(file_priv, ivpu_job_to_jsm_priority(args->priority), false); @@ -891,6 +897,8 @@ int ivpu_cmdq_create_ioctl(struct drm_device *dev, void *data, struct drm_file * mutex_unlock(&file_priv->lock); + ivpu_rpm_put(vdev); + return cmdq ? 0 : -ENOMEM; } @@ -900,28 +908,35 @@ int ivpu_cmdq_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file struct ivpu_device *vdev = file_priv->vdev; struct drm_ivpu_cmdq_destroy *args = data; struct ivpu_cmdq *cmdq; - u32 cmdq_id; + u32 cmdq_id = 0; int ret; if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) return -ENODEV; + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + mutex_lock(&file_priv->lock); cmdq = xa_load(&file_priv->cmdq_xa, args->cmdq_id); if (!cmdq || cmdq->is_legacy) { ret = -ENOENT; - goto err_unlock; + } else { + cmdq_id = cmdq->id; + ivpu_cmdq_destroy(file_priv, cmdq); + ret = 0; } - cmdq_id = cmdq->id; - ivpu_cmdq_destroy(file_priv, cmdq); mutex_unlock(&file_priv->lock); - ivpu_cmdq_abort_all_jobs(vdev, file_priv->ctx.id, cmdq_id); - return 0; -err_unlock: - mutex_unlock(&file_priv->lock); + /* Abort any pending jobs only if cmdq was destroyed */ + if (!ret) + ivpu_cmdq_abort_all_jobs(vdev, file_priv->ctx.id, cmdq_id); + + ivpu_rpm_put(vdev); + return ret; } From 75680b7cd461b169c7ccd2a0fba7542868b7fce2 Mon Sep 17 00:00:00 2001 From: Karol Wachowski Date: Fri, 25 Apr 2025 11:36:56 +0200 Subject: [PATCH 1333/2924] accel/ivpu: Correct mutex unlock order in job submission The mutex unlock for vdev->submitted_jobs_lock was incorrectly placed before unlocking file_priv->lock. Change order of unlocks to avoid potential race conditions. Fixes: 5bbccadaf33e ("accel/ivpu: Abort all jobs after command queue unregister") Signed-off-by: Karol Wachowski Reviewed-by: Jeff Hugo Signed-off-by: Jacek Lawrynowicz Link: https://lore.kernel.org/r/20250425093656.2228168-1-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_job.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index e17b3deda2012c..b28da35c30b675 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -681,8 +681,8 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id) err_erase_xa: xa_erase(&vdev->submitted_jobs_xa, job->job_id); err_unlock: - mutex_unlock(&vdev->submitted_jobs_lock); mutex_unlock(&file_priv->lock); + mutex_unlock(&vdev->submitted_jobs_lock); ivpu_rpm_put(vdev); return ret; } From 58f6217e5d0132a9f14e401e62796916aa055c1b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 25 Apr 2025 17:13:55 -0700 Subject: [PATCH 1334/2924] perf/x86/intel: KVM: Mask PEBS_ENABLE loaded for guest with vCPU's value. When generating the MSR_IA32_PEBS_ENABLE value that will be loaded on VM-Entry to a KVM guest, mask the value with the vCPU's desired PEBS_ENABLE value. Consulting only the host kernel's host vs. guest masks results in running the guest with PEBS enabled even when the guest doesn't want to use PEBS. Because KVM uses perf events to proxy the guest virtual PMU, simply looking at exclude_host can't differentiate between events created by host userspace, and events created by KVM on behalf of the guest. Running the guest with PEBS unexpectedly enabled typically manifests as crashes due to a near-infinite stream of #PFs. E.g. if the guest hasn't written MSR_IA32_DS_AREA, the CPU will hit page faults on address '0' when trying to record PEBS events. The issue is most easily reproduced by running `perf kvm top` from before commit 7b100989b4f6 ("perf evlist: Remove __evlist__add_default") (after which, `perf kvm top` effectively stopped using PEBS). The userspace side of perf creates a guest-only PEBS event, which intel_guest_get_msrs() misconstrues a guest-*owned* PEBS event. Arguably, this is a userspace bug, as enabling PEBS on guest-only events simply cannot work, and userspace can kill VMs in many other ways (there is no danger to the host). However, even if this is considered to be bad userspace behavior, there's zero downside to perf/KVM restricting PEBS to guest-owned events. Note, commit 854250329c02 ("KVM: x86/pmu: Disable guest PEBS temporarily in two rare situations") fixed the case where host userspace is profiling KVM *and* userspace, but missed the case where userspace is profiling only KVM. Fixes: c59a1f106f5c ("KVM: x86/pmu: Add IA32_PEBS_ENABLE MSR emulation for extended PEBS") Closes: https://lore.kernel.org/all/Z_VUswFkWiTYI0eD@do-x1carbon Reported-by: Seth Forshee Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dapeng Mi Tested-by: "Seth Forshee (DigitalOcean)" Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250426001355.1026530-1-seanjc@google.com --- arch/x86/events/intel/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 00dfe487bd00ea..c5f385413392b1 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4395,7 +4395,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) arr[pebs_enable] = (struct perf_guest_switch_msr){ .msr = MSR_IA32_PEBS_ENABLE, .host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask, - .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask, + .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask & kvm_pmu->pebs_enable, }; if (arr[pebs_enable].host) { From 9ab7a709c926c16b4433cf02d04fcbcf35aaab2b Mon Sep 17 00:00:00 2001 From: Shravya KN Date: Mon, 28 Apr 2025 15:58:56 -0700 Subject: [PATCH 1335/2924] bnxt_en: Fix error handling path in bnxt_init_chip() WARN_ON() is triggered in __flush_work() if bnxt_init_chip() fails because we call cancel_work_sync() on dim work that has not been initialized. WARNING: CPU: 37 PID: 5223 at kernel/workqueue.c:4201 __flush_work.isra.0+0x212/0x230 The driver relies on the BNXT_STATE_NAPI_DISABLED bit to check if dim work has already been cancelled. But in the bnxt_open() path, BNXT_STATE_NAPI_DISABLED is not set and this causes the error path to think that it needs to cancel the uninitalized dim work. Fix it by setting BNXT_STATE_NAPI_DISABLED during initialization. The bit will be cleared when we enable NAPI and initialize dim work. Fixes: 40452969a506 ("bnxt_en: Fix DIM shutdown") Suggested-by: Somnath Kotur Reviewed-by: Somnath Kotur Signed-off-by: Shravya KN Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 2c8e2c19d85484..c4bccc683597c4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11602,6 +11602,9 @@ static void bnxt_init_napi(struct bnxt *bp) poll_fn = bnxt_poll_p5; else if (BNXT_CHIP_TYPE_NITRO_A0(bp)) cp_nr_rings--; + + set_bit(BNXT_STATE_NAPI_DISABLED, &bp->state); + for (i = 0; i < cp_nr_rings; i++) { bnapi = bp->bnapi[i]; netif_napi_add_config_locked(bp->dev, &bnapi->napi, poll_fn, From 8e6cc9045380f3f0c48ebda2bda5e1abe263388d Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Mon, 28 Apr 2025 15:58:57 -0700 Subject: [PATCH 1336/2924] bnxt_en: Fix ethtool selftest output in one of the failure cases When RDMA driver is loaded, running offline self test is not supported and driver returns failure early. But it is not clearing the input buffer and hence the application prints some junk characters for individual test results. Fix it by clearing the buffer before returning. Fixes: 895621f1c816 ("bnxt_en: Don't support offline self test when RoCE driver is loaded") Reviewed-by: Somnath Kotur Signed-off-by: Kalesh AP Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 48dd5922e4dd8b..7be37976f3e490 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -4991,6 +4991,7 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, if (!bp->num_tests || !BNXT_PF(bp)) return; + memset(buf, 0, sizeof(u64) * bp->num_tests); if (etest->flags & ETH_TEST_FL_OFFLINE && bnxt_ulp_registered(bp->edev)) { etest->flags |= ETH_TEST_FL_FAILED; @@ -4998,7 +4999,6 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, return; } - memset(buf, 0, sizeof(u64) * bp->num_tests); if (!netif_running(dev)) { etest->flags |= ETH_TEST_FL_FAILED; return; From a63db07e4ecd45b027718168faf7d798bb47bf58 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Mon, 28 Apr 2025 15:58:58 -0700 Subject: [PATCH 1337/2924] bnxt_en: Add missing skb_mark_for_recycle() in bnxt_rx_vlan() If bnxt_rx_vlan() fails because the VLAN protocol ID is invalid, the SKB is freed but we're missing the call to recycle it. This may cause the warning: "page_pool_release_retry() stalled pool shutdown" Add the missing skb_mark_for_recycle() in bnxt_rx_vlan(). Fixes: 86b05508f775 ("bnxt_en: Use the unified RX page pool buffers for XDP and non-XDP") Reviewed-by: Kalesh AP Reviewed-by: Pavan Chebbi Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c4bccc683597c4..cfc9ccab39bf8c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2015,6 +2015,7 @@ static struct sk_buff *bnxt_rx_vlan(struct sk_buff *skb, u8 cmp_type, } return skb; vlan_err: + skb_mark_for_recycle(skb); dev_kfree_skb(skb); return NULL; } From 1ae04e489dd757e1e61999362f33e7c554c3b9e3 Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Mon, 28 Apr 2025 15:58:59 -0700 Subject: [PATCH 1338/2924] bnxt_en: call pci_alloc_irq_vectors() after bnxt_reserve_rings() On some architectures (e.g. ARM), calling pci_alloc_irq_vectors() will immediately cause the MSIX table to be written. This will not work if we haven't called bnxt_reserve_rings() to properly map the MSIX table to the MSIX vectors reserved by FW. Fix the FW error recovery path to delay the bnxt_init_int_mode() -> pci_alloc_irq_vectors() call by removing it from bnxt_hwrm_if_change(). bnxt_request_irq() later in the code path will call it and by then the MSIX table is properly mapped. Fixes: 4343838ca5eb ("bnxt_en: Replace deprecated PCI MSIX APIs") Suggested-by: Michael Chan Signed-off-by: Kashyap Desai Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index cfc9ccab39bf8c..3b2ea36f25a2a7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12399,13 +12399,8 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) set_bit(BNXT_STATE_ABORT_ERR, &bp->state); return rc; } + /* IRQ will be initialized later in bnxt_request_irq()*/ bnxt_clear_int_mode(bp); - rc = bnxt_init_int_mode(bp); - if (rc) { - clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state); - netdev_err(bp->dev, "init int mode failed\n"); - return rc; - } } rc = bnxt_cancel_reservations(bp, fw_reset); } From c2d20a3814d1b57dea6db82229edd702bde9c878 Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Mon, 28 Apr 2025 15:59:00 -0700 Subject: [PATCH 1339/2924] bnxt_en: delay pci_alloc_irq_vectors() in the AER path This patch is similar to the last patch to delay the pci_alloc_irq_vectors() call in the AER path until after calling bnxt_reserve_rings(). bnxt_reserve_rings() needs to properly map the MSIX table first before we call pci_alloc_irq_vectors() which may immediately write to the MSIX table in some architectures. Move the bnxt_init_int_mode() call from bnxt_io_slot_reset() to bnxt_io_resume() after calling bnxt_reserve_rings(). With this change, the AER path may call bnxt_open() -> bnxt_hwrm_if_change() with bp->irq_tbl set to NULL. bp->irq_tbl is cleared when we call bnxt_clear_int_mode() in bnxt_io_slot_reset(). So we cannot use !bp->irq_tbl to detect aborted FW reset. Add a new BNXT_FW_RESET_STATE_ABORT to detect aborted FW reset in bnxt_hwrm_if_change(). Signed-off-by: Kashyap Desai Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 17 +++++++++++------ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 3b2ea36f25a2a7..78e496b0ec2625 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12325,12 +12325,15 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) { struct hwrm_func_drv_if_change_output *resp; struct hwrm_func_drv_if_change_input *req; - bool fw_reset = !bp->irq_tbl; bool resc_reinit = false; bool caps_change = false; int rc, retry = 0; + bool fw_reset; u32 flags = 0; + fw_reset = (bp->fw_reset_state == BNXT_FW_RESET_STATE_ABORT); + bp->fw_reset_state = 0; + if (!(bp->fw_cap & BNXT_FW_CAP_IF_CHANGE)) return 0; @@ -14833,7 +14836,7 @@ static void bnxt_fw_reset_abort(struct bnxt *bp, int rc) clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); if (bp->fw_reset_state != BNXT_FW_RESET_STATE_POLL_VF) bnxt_dl_health_fw_status_update(bp, false); - bp->fw_reset_state = 0; + bp->fw_reset_state = BNXT_FW_RESET_STATE_ABORT; netif_close(bp->dev); } @@ -16931,10 +16934,9 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) if (!err) result = PCI_ERS_RESULT_RECOVERED; + /* IRQ will be initialized later in bnxt_io_resume */ bnxt_ulp_irq_stop(bp); bnxt_clear_int_mode(bp); - err = bnxt_init_int_mode(bp); - bnxt_ulp_irq_restart(bp, err); } reset_exit: @@ -16963,10 +16965,13 @@ static void bnxt_io_resume(struct pci_dev *pdev) err = bnxt_hwrm_func_qcaps(bp); if (!err) { - if (netif_running(netdev)) + if (netif_running(netdev)) { err = bnxt_open(netdev); - else + } else { err = bnxt_reserve_rings(bp, true); + if (!err) + err = bnxt_init_int_mode(bp); + } } if (!err) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 21726cf565866d..bc8b3b7e915d3b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2614,6 +2614,7 @@ struct bnxt { #define BNXT_FW_RESET_STATE_POLL_FW 4 #define BNXT_FW_RESET_STATE_OPENING 5 #define BNXT_FW_RESET_STATE_POLL_FW_DOWN 6 +#define BNXT_FW_RESET_STATE_ABORT 7 u16 fw_reset_min_dsecs; #define BNXT_DFLT_FW_RST_MIN_DSECS 20 From ea9376cf68230e05492f22ca45d329f16e262c7b Mon Sep 17 00:00:00 2001 From: Shruti Parab Date: Mon, 28 Apr 2025 15:59:01 -0700 Subject: [PATCH 1340/2924] bnxt_en: Fix coredump logic to free allocated buffer When handling HWRM_DBG_COREDUMP_LIST FW command in bnxt_hwrm_dbg_dma_data(), the allocated buffer info->dest_buf is not freed in the error path. In the normal path, info->dest_buf is assigned to coredump->data and it will eventually be freed after the coredump is collected. Free info->dest_buf immediately inside bnxt_hwrm_dbg_dma_data() in the error path. Fixes: c74751f4c392 ("bnxt_en: Return error if FW returns more data than dump length") Reported-by: Michael Chan Reviewed-by: Kalesh AP Signed-off-by: Shruti Parab Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c index 5576e7cf846317..9a74793648f692 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c @@ -116,6 +116,11 @@ static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg, memcpy(info->dest_buf + off, dma_buf, len); } else { rc = -ENOBUFS; + if (cmn_req->req_type == + cpu_to_le16(HWRM_DBG_COREDUMP_LIST)) { + kfree(info->dest_buf); + info->dest_buf = NULL; + } break; } } From 6b87bd94f34370bbf1dfa59352bed8efab5bf419 Mon Sep 17 00:00:00 2001 From: Shruti Parab Date: Mon, 28 Apr 2025 15:59:02 -0700 Subject: [PATCH 1341/2924] bnxt_en: Fix out-of-bound memcpy() during ethtool -w When retrieving the FW coredump using ethtool, it can sometimes cause memory corruption: BUG: KFENCE: memory corruption in __bnxt_get_coredump+0x3ef/0x670 [bnxt_en] Corrupted memory at 0x000000008f0f30e8 [ ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ] (in kfence-#45): __bnxt_get_coredump+0x3ef/0x670 [bnxt_en] ethtool_get_dump_data+0xdc/0x1a0 __dev_ethtool+0xa1e/0x1af0 dev_ethtool+0xa8/0x170 dev_ioctl+0x1b5/0x580 sock_do_ioctl+0xab/0xf0 sock_ioctl+0x1ce/0x2e0 __x64_sys_ioctl+0x87/0xc0 do_syscall_64+0x5c/0xf0 entry_SYSCALL_64_after_hwframe+0x78/0x80 ... This happens when copying the coredump segment list in bnxt_hwrm_dbg_dma_data() with the HWRM_DBG_COREDUMP_LIST FW command. The info->dest_buf buffer is allocated based on the number of coredump segments returned by the FW. The segment list is then DMA'ed by the FW and the length of the DMA is returned by FW. The driver then copies this DMA'ed segment list to info->dest_buf. In some cases, this DMA length may exceed the info->dest_buf length and cause the above BUG condition. Fix it by capping the copy length to not exceed the length of info->dest_buf. The extra DMA data contains no useful information. This code path is shared for the HWRM_DBG_COREDUMP_LIST and the HWRM_DBG_COREDUMP_RETRIEVE FW commands. The buffering is different for these 2 FW commands. To simplify the logic, we need to move the line to adjust the buffer length for HWRM_DBG_COREDUMP_RETRIEVE up, so that the new check to cap the copy length will work for both commands. Fixes: c74751f4c392 ("bnxt_en: Return error if FW returns more data than dump length") Reviewed-by: Kalesh AP Signed-off-by: Shruti Parab Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnxt/bnxt_coredump.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c index 9a74793648f692..a000d3f630bd3b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c @@ -110,10 +110,19 @@ static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg, } } + if (cmn_req->req_type == + cpu_to_le16(HWRM_DBG_COREDUMP_RETRIEVE)) + info->dest_buf_size += len; + if (info->dest_buf) { if ((info->seg_start + off + len) <= BNXT_COREDUMP_BUF_LEN(info->buf_len)) { - memcpy(info->dest_buf + off, dma_buf, len); + u16 copylen = min_t(u16, len, + info->dest_buf_size - off); + + memcpy(info->dest_buf + off, dma_buf, copylen); + if (copylen < len) + break; } else { rc = -ENOBUFS; if (cmn_req->req_type == @@ -125,10 +134,6 @@ static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg, } } - if (cmn_req->req_type == - cpu_to_le16(HWRM_DBG_COREDUMP_RETRIEVE)) - info->dest_buf_size += len; - if (!(cmn_resp->flags & HWRM_DBG_CMN_FLAGS_MORE)) break; From 02e8be5a032cae0f4ca33c6053c44d83cf4acc93 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 28 Apr 2025 15:59:03 -0700 Subject: [PATCH 1342/2924] bnxt_en: Fix ethtool -d byte order for 32-bit values For version 1 register dump that includes the PCIe stats, the existing code incorrectly assumes that all PCIe stats are 64-bit values. Fix it by using an array containing the starting and ending index of the 32-bit values. The loop in bnxt_get_regs() will use the array to do proper endian swap for the 32-bit values. Fixes: b5d600b027eb ("bnxt_en: Add support for 'ethtool -d'") Reviewed-by: Shruti Parab Reviewed-by: Kalesh AP Reviewed-by: Andy Gospodarek Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 38 ++++++++++++++++--- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 7be37976f3e490..f5d490bf997e34 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2062,6 +2062,17 @@ static int bnxt_get_regs_len(struct net_device *dev) return reg_len; } +#define BNXT_PCIE_32B_ENTRY(start, end) \ + { offsetof(struct pcie_ctx_hw_stats, start), \ + offsetof(struct pcie_ctx_hw_stats, end) } + +static const struct { + u16 start; + u16 end; +} bnxt_pcie_32b_entries[] = { + BNXT_PCIE_32B_ENTRY(pcie_ltssm_histogram[0], pcie_ltssm_histogram[3]), +}; + static void bnxt_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p) { @@ -2094,12 +2105,27 @@ static void bnxt_get_regs(struct net_device *dev, struct ethtool_regs *regs, req->pcie_stat_host_addr = cpu_to_le64(hw_pcie_stats_addr); rc = hwrm_req_send(bp, req); if (!rc) { - __le64 *src = (__le64 *)hw_pcie_stats; - u64 *dst = (u64 *)(_p + BNXT_PXP_REG_LEN); - int i; - - for (i = 0; i < sizeof(*hw_pcie_stats) / sizeof(__le64); i++) - dst[i] = le64_to_cpu(src[i]); + u8 *dst = (u8 *)(_p + BNXT_PXP_REG_LEN); + u8 *src = (u8 *)hw_pcie_stats; + int i, j; + + for (i = 0, j = 0; i < sizeof(*hw_pcie_stats); ) { + if (i >= bnxt_pcie_32b_entries[j].start && + i <= bnxt_pcie_32b_entries[j].end) { + u32 *dst32 = (u32 *)(dst + i); + + *dst32 = le32_to_cpu(*(__le32 *)(src + i)); + i += 4; + if (i > bnxt_pcie_32b_entries[j].end && + j < ARRAY_SIZE(bnxt_pcie_32b_entries) - 1) + j++; + } else { + u64 *dst64 = (u64 *)(dst + i); + + *dst64 = le64_to_cpu(*(__le64 *)(src + i)); + i += 8; + } + } } hwrm_req_drop(bp, req); } From 77e40bbce93059658aee02786a32c5c98a240a8a Mon Sep 17 00:00:00 2001 From: Michael Liang Date: Tue, 29 Apr 2025 10:42:01 -0600 Subject: [PATCH 1343/2924] nvme-tcp: fix premature queue removal and I/O failover MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch addresses a data corruption issue observed in nvme-tcp during testing. In an NVMe native multipath setup, when an I/O timeout occurs, all inflight I/Os are canceled almost immediately after the kernel socket is shut down. These canceled I/Os are reported as host path errors, triggering a failover that succeeds on a different path. However, at this point, the original I/O may still be outstanding in the host's network transmission path (e.g., the NIC’s TX queue). From the user-space app's perspective, the buffer associated with the I/O is considered completed since they're acked on the different path and may be reused for new I/O requests. Because nvme-tcp enables zero-copy by default in the transmission path, this can lead to corrupted data being sent to the original target, ultimately causing data corruption. We can reproduce this data corruption by injecting delay on one path and triggering i/o timeout. To prevent this issue, this change ensures that all inflight transmissions are fully completed from host's perspective before returning from queue stop. To handle concurrent I/O timeout from multiple namespaces under the same controller, always wait in queue stop regardless of queue's state. This aligns with the behavior of queue stopping in other NVMe fabric transports. Fixes: 3f2304f8c6d6 ("nvme-tcp: add NVMe over TCP host driver") Signed-off-by: Michael Liang Reviewed-by: Mohamed Khalfella Reviewed-by: Randy Jennings Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 72d260201d8cfe..aba365f97cf6b4 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1946,7 +1946,7 @@ static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue) cancel_work_sync(&queue->io_work); } -static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) +static void nvme_tcp_stop_queue_nowait(struct nvme_ctrl *nctrl, int qid) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); struct nvme_tcp_queue *queue = &ctrl->queues[qid]; @@ -1965,6 +1965,31 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) mutex_unlock(&queue->queue_lock); } +static void nvme_tcp_wait_queue(struct nvme_ctrl *nctrl, int qid) +{ + struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); + struct nvme_tcp_queue *queue = &ctrl->queues[qid]; + int timeout = 100; + + while (timeout > 0) { + if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags) || + !sk_wmem_alloc_get(queue->sock->sk)) + return; + msleep(2); + timeout -= 2; + } + dev_warn(nctrl->device, + "qid %d: timeout draining sock wmem allocation expired\n", + qid); +} + +static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) +{ + nvme_tcp_stop_queue_nowait(nctrl, qid); + nvme_tcp_wait_queue(nctrl, qid); +} + + static void nvme_tcp_setup_sock_ops(struct nvme_tcp_queue *queue) { write_lock_bh(&queue->sock->sk->sk_callback_lock); @@ -2032,7 +2057,9 @@ static void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl) int i; for (i = 1; i < ctrl->queue_count; i++) - nvme_tcp_stop_queue(ctrl, i); + nvme_tcp_stop_queue_nowait(ctrl, i); + for (i = 1; i < ctrl->queue_count; i++) + nvme_tcp_wait_queue(ctrl, i); } static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl, From 521987940ad4fd37fe3d0340ec6f39c4e8e91e36 Mon Sep 17 00:00:00 2001 From: Alistair Francis Date: Wed, 30 Apr 2025 08:40:25 +1000 Subject: [PATCH 1344/2924] nvme-tcp: select CONFIG_TLS from CONFIG_NVME_TCP_TLS Ensure that TLS support is enabled in the kernel when CONFIG_NVME_TCP_TLS is enabled. Without this the code compiles, but does not actually work unless something else enables CONFIG_TLS. Fixes: be8e82caa68 ("nvme-tcp: enable TLS handshake upcall") Signed-off-by: Alistair Francis Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index d47dfa80fb9560..4d64b6935bb915 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -102,6 +102,7 @@ config NVME_TCP_TLS depends on NVME_TCP select NET_HANDSHAKE select KEYS + select TLS help Enables TLS encryption for NVMe TCP using the netlink handshake API. From ac38b7ef704c0659568fd4b2c7e6c1255fc51798 Mon Sep 17 00:00:00 2001 From: Alistair Francis Date: Wed, 30 Apr 2025 08:23:47 +1000 Subject: [PATCH 1345/2924] nvmet-tcp: select CONFIG_TLS from CONFIG_NVME_TARGET_TCP_TLS Ensure that TLS support is enabled in the kernel when CONFIG_NVME_TARGET_TCP_TLS is enabled. Without this the code compiles, but does not actually work unless something else enables CONFIG_TLS. Fixes: 675b453e0241 ("nvmet-tcp: enable TLS handshake upcall") Signed-off-by: Alistair Francis Reviewed-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index fb7446d6d6829b..4c253b433bf78d 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -98,6 +98,7 @@ config NVME_TARGET_TCP_TLS bool "NVMe over Fabrics TCP target TLS encryption support" depends on NVME_TARGET_TCP select NET_HANDSHAKE + select TLS help Enables TLS encryption for the NVMe TCP target using the netlink handshake API. From 46d22b47df2741996af277a2838b95f130436c13 Mon Sep 17 00:00:00 2001 From: Alistair Francis Date: Wed, 23 Apr 2025 16:06:21 +1000 Subject: [PATCH 1346/2924] nvmet-tcp: don't restore null sk_state_change queue->state_change is set as part of nvmet_tcp_set_queue_sock(), but if the TCP connection isn't established when nvmet_tcp_set_queue_sock() is called then queue->state_change isn't set and sock->sk->sk_state_change isn't replaced. As such we don't need to restore sock->sk->sk_state_change if queue->state_change is NULL. This avoids NULL pointer dereferences such as this: [ 286.462026][ C0] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 286.462814][ C0] #PF: supervisor instruction fetch in kernel mode [ 286.463796][ C0] #PF: error_code(0x0010) - not-present page [ 286.464392][ C0] PGD 8000000140620067 P4D 8000000140620067 PUD 114201067 PMD 0 [ 286.465086][ C0] Oops: Oops: 0010 [#1] SMP KASAN PTI [ 286.465559][ C0] CPU: 0 UID: 0 PID: 1628 Comm: nvme Not tainted 6.15.0-rc2+ #11 PREEMPT(voluntary) [ 286.466393][ C0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-3.fc41 04/01/2014 [ 286.467147][ C0] RIP: 0010:0x0 [ 286.467420][ C0] Code: Unable to access opcode bytes at 0xffffffffffffffd6. [ 286.467977][ C0] RSP: 0018:ffff8883ae008580 EFLAGS: 00010246 [ 286.468425][ C0] RAX: 0000000000000000 RBX: ffff88813fd34100 RCX: ffffffffa386cc43 [ 286.469019][ C0] RDX: 1ffff11027fa68b6 RSI: 0000000000000008 RDI: ffff88813fd34100 [ 286.469545][ C0] RBP: ffff88813fd34160 R08: 0000000000000000 R09: ffffed1027fa682c [ 286.470072][ C0] R10: ffff88813fd34167 R11: 0000000000000000 R12: ffff88813fd344c3 [ 286.470585][ C0] R13: ffff88813fd34112 R14: ffff88813fd34aec R15: ffff888132cdd268 [ 286.471070][ C0] FS: 00007fe3c04c7d80(0000) GS:ffff88840743f000(0000) knlGS:0000000000000000 [ 286.471644][ C0] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 286.472543][ C0] CR2: ffffffffffffffd6 CR3: 000000012daca000 CR4: 00000000000006f0 [ 286.473500][ C0] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 286.474467][ C0] DR3: 0000000000000000 DR6: 00000000ffff07f0 DR7: 0000000000000400 [ 286.475453][ C0] Call Trace: [ 286.476102][ C0] [ 286.476719][ C0] tcp_fin+0x2bb/0x440 [ 286.477429][ C0] tcp_data_queue+0x190f/0x4e60 [ 286.478174][ C0] ? __build_skb_around+0x234/0x330 [ 286.478940][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.479659][ C0] ? __pfx_tcp_data_queue+0x10/0x10 [ 286.480431][ C0] ? tcp_try_undo_loss+0x640/0x6c0 [ 286.481196][ C0] ? seqcount_lockdep_reader_access.constprop.0+0x82/0x90 [ 286.482046][ C0] ? kvm_clock_get_cycles+0x14/0x30 [ 286.482769][ C0] ? ktime_get+0x66/0x150 [ 286.483433][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.484146][ C0] tcp_rcv_established+0x6e4/0x2050 [ 286.484857][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.485523][ C0] ? ipv4_dst_check+0x160/0x2b0 [ 286.486203][ C0] ? __pfx_tcp_rcv_established+0x10/0x10 [ 286.486917][ C0] ? lock_release+0x217/0x2c0 [ 286.487595][ C0] tcp_v4_do_rcv+0x4d6/0x9b0 [ 286.488279][ C0] tcp_v4_rcv+0x2af8/0x3e30 [ 286.488904][ C0] ? raw_local_deliver+0x51b/0xad0 [ 286.489551][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.490198][ C0] ? __pfx_tcp_v4_rcv+0x10/0x10 [ 286.490813][ C0] ? __pfx_raw_local_deliver+0x10/0x10 [ 286.491487][ C0] ? __pfx_nf_confirm+0x10/0x10 [nf_conntrack] [ 286.492275][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.492900][ C0] ip_protocol_deliver_rcu+0x8f/0x370 [ 286.493579][ C0] ip_local_deliver_finish+0x297/0x420 [ 286.494268][ C0] ip_local_deliver+0x168/0x430 [ 286.494867][ C0] ? __pfx_ip_local_deliver+0x10/0x10 [ 286.495498][ C0] ? __pfx_ip_local_deliver_finish+0x10/0x10 [ 286.496204][ C0] ? ip_rcv_finish_core+0x19a/0x1f20 [ 286.496806][ C0] ? lock_release+0x217/0x2c0 [ 286.497414][ C0] ip_rcv+0x455/0x6e0 [ 286.497945][ C0] ? __pfx_ip_rcv+0x10/0x10 [ 286.498550][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.499137][ C0] ? __pfx_ip_rcv_finish+0x10/0x10 [ 286.499763][ C0] ? lock_release+0x217/0x2c0 [ 286.500327][ C0] ? dl_scaled_delta_exec+0xd1/0x2c0 [ 286.500922][ C0] ? __pfx_ip_rcv+0x10/0x10 [ 286.501480][ C0] __netif_receive_skb_one_core+0x166/0x1b0 [ 286.502173][ C0] ? __pfx___netif_receive_skb_one_core+0x10/0x10 [ 286.502903][ C0] ? lock_acquire+0x2b2/0x310 [ 286.503487][ C0] ? process_backlog+0x372/0x1350 [ 286.504087][ C0] ? lock_release+0x217/0x2c0 [ 286.504642][ C0] process_backlog+0x3b9/0x1350 [ 286.505214][ C0] ? process_backlog+0x372/0x1350 [ 286.505779][ C0] __napi_poll.constprop.0+0xa6/0x490 [ 286.506363][ C0] net_rx_action+0x92e/0xe10 [ 286.506889][ C0] ? __pfx_net_rx_action+0x10/0x10 [ 286.507437][ C0] ? timerqueue_add+0x1f0/0x320 [ 286.507977][ C0] ? sched_clock_cpu+0x68/0x540 [ 286.508492][ C0] ? lock_acquire+0x2b2/0x310 [ 286.509043][ C0] ? kvm_sched_clock_read+0xd/0x20 [ 286.509607][ C0] ? handle_softirqs+0x1aa/0x7d0 [ 286.510187][ C0] handle_softirqs+0x1f2/0x7d0 [ 286.510754][ C0] ? __pfx_handle_softirqs+0x10/0x10 [ 286.511348][ C0] ? irqtime_account_irq+0x181/0x290 [ 286.511937][ C0] ? __dev_queue_xmit+0x85d/0x3450 [ 286.512510][ C0] do_softirq.part.0+0x89/0xc0 [ 286.513100][ C0] [ 286.513548][ C0] [ 286.513953][ C0] __local_bh_enable_ip+0x112/0x140 [ 286.514522][ C0] ? __dev_queue_xmit+0x85d/0x3450 [ 286.515072][ C0] __dev_queue_xmit+0x872/0x3450 [ 286.515619][ C0] ? nft_do_chain+0xe16/0x15b0 [nf_tables] [ 286.516252][ C0] ? __pfx___dev_queue_xmit+0x10/0x10 [ 286.516817][ C0] ? selinux_ip_postroute+0x43c/0xc50 [ 286.517433][ C0] ? __pfx_selinux_ip_postroute+0x10/0x10 [ 286.518061][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.518606][ C0] ? ip_output+0x164/0x4a0 [ 286.519149][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.519671][ C0] ? ip_finish_output2+0x17d5/0x1fb0 [ 286.520258][ C0] ip_finish_output2+0xb4b/0x1fb0 [ 286.520787][ C0] ? __pfx_ip_finish_output2+0x10/0x10 [ 286.521355][ C0] ? __ip_finish_output+0x15d/0x750 [ 286.521890][ C0] ip_output+0x164/0x4a0 [ 286.522372][ C0] ? __pfx_ip_output+0x10/0x10 [ 286.522872][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.523402][ C0] ? _raw_spin_unlock_irqrestore+0x4c/0x60 [ 286.524031][ C0] ? __pfx_ip_finish_output+0x10/0x10 [ 286.524605][ C0] ? __ip_queue_xmit+0x999/0x2260 [ 286.525200][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.525744][ C0] ? ipv4_dst_check+0x16a/0x2b0 [ 286.526279][ C0] ? lock_release+0x217/0x2c0 [ 286.526793][ C0] __ip_queue_xmit+0x1883/0x2260 [ 286.527324][ C0] ? __skb_clone+0x54c/0x730 [ 286.527827][ C0] __tcp_transmit_skb+0x209b/0x37a0 [ 286.528374][ C0] ? __pfx___tcp_transmit_skb+0x10/0x10 [ 286.528952][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.529472][ C0] ? seqcount_lockdep_reader_access.constprop.0+0x82/0x90 [ 286.530152][ C0] ? trace_hardirqs_on+0x12/0x120 [ 286.530691][ C0] tcp_write_xmit+0xb81/0x88b0 [ 286.531224][ C0] ? mod_memcg_state+0x4d/0x60 [ 286.531736][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.532253][ C0] __tcp_push_pending_frames+0x90/0x320 [ 286.532826][ C0] tcp_send_fin+0x141/0xb50 [ 286.533352][ C0] ? __pfx_tcp_send_fin+0x10/0x10 [ 286.533908][ C0] ? __local_bh_enable_ip+0xab/0x140 [ 286.534495][ C0] inet_shutdown+0x243/0x320 [ 286.535077][ C0] nvme_tcp_alloc_queue+0xb3b/0x2590 [nvme_tcp] [ 286.535709][ C0] ? do_raw_spin_lock+0x129/0x260 [ 286.536314][ C0] ? __pfx_nvme_tcp_alloc_queue+0x10/0x10 [nvme_tcp] [ 286.536996][ C0] ? do_raw_spin_unlock+0x54/0x1e0 [ 286.537550][ C0] ? _raw_spin_unlock+0x29/0x50 [ 286.538127][ C0] ? do_raw_spin_lock+0x129/0x260 [ 286.538664][ C0] ? __pfx_do_raw_spin_lock+0x10/0x10 [ 286.539249][ C0] ? nvme_tcp_alloc_admin_queue+0xd5/0x340 [nvme_tcp] [ 286.539892][ C0] ? __wake_up+0x40/0x60 [ 286.540392][ C0] nvme_tcp_alloc_admin_queue+0xd5/0x340 [nvme_tcp] [ 286.541047][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.541589][ C0] nvme_tcp_setup_ctrl+0x8b/0x7a0 [nvme_tcp] [ 286.542254][ C0] ? _raw_spin_unlock_irqrestore+0x4c/0x60 [ 286.542887][ C0] ? __pfx_nvme_tcp_setup_ctrl+0x10/0x10 [nvme_tcp] [ 286.543568][ C0] ? trace_hardirqs_on+0x12/0x120 [ 286.544166][ C0] ? _raw_spin_unlock_irqrestore+0x35/0x60 [ 286.544792][ C0] ? nvme_change_ctrl_state+0x196/0x2e0 [nvme_core] [ 286.545477][ C0] nvme_tcp_create_ctrl+0x839/0xb90 [nvme_tcp] [ 286.546126][ C0] nvmf_dev_write+0x3db/0x7e0 [nvme_fabrics] [ 286.546775][ C0] ? rw_verify_area+0x69/0x520 [ 286.547334][ C0] vfs_write+0x218/0xe90 [ 286.547854][ C0] ? do_syscall_64+0x9f/0x190 [ 286.548408][ C0] ? trace_hardirqs_on_prepare+0xdb/0x120 [ 286.549037][ C0] ? syscall_exit_to_user_mode+0x93/0x280 [ 286.549659][ C0] ? __pfx_vfs_write+0x10/0x10 [ 286.550259][ C0] ? do_syscall_64+0x9f/0x190 [ 286.550840][ C0] ? syscall_exit_to_user_mode+0x8e/0x280 [ 286.551516][ C0] ? trace_hardirqs_on_prepare+0xdb/0x120 [ 286.552180][ C0] ? syscall_exit_to_user_mode+0x93/0x280 [ 286.552834][ C0] ? ksys_read+0xf5/0x1c0 [ 286.553386][ C0] ? __pfx_ksys_read+0x10/0x10 [ 286.553964][ C0] ksys_write+0xf5/0x1c0 [ 286.554499][ C0] ? __pfx_ksys_write+0x10/0x10 [ 286.555072][ C0] ? trace_hardirqs_on_prepare+0xdb/0x120 [ 286.555698][ C0] ? syscall_exit_to_user_mode+0x93/0x280 [ 286.556319][ C0] ? do_syscall_64+0x54/0x190 [ 286.556866][ C0] do_syscall_64+0x93/0x190 [ 286.557420][ C0] ? rcu_read_unlock+0x17/0x60 [ 286.557986][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.558526][ C0] ? lock_release+0x217/0x2c0 [ 286.559087][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.559659][ C0] ? count_memcg_events.constprop.0+0x4a/0x60 [ 286.560476][ C0] ? exc_page_fault+0x7a/0x110 [ 286.561064][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.561647][ C0] ? lock_release+0x217/0x2c0 [ 286.562257][ C0] ? do_user_addr_fault+0x171/0xa00 [ 286.562839][ C0] ? do_user_addr_fault+0x4a2/0xa00 [ 286.563453][ C0] ? irqentry_exit_to_user_mode+0x84/0x270 [ 286.564112][ C0] ? rcu_is_watching+0x11/0xb0 [ 286.564677][ C0] ? irqentry_exit_to_user_mode+0x84/0x270 [ 286.565317][ C0] ? trace_hardirqs_on_prepare+0xdb/0x120 [ 286.565922][ C0] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 286.566542][ C0] RIP: 0033:0x7fe3c05e6504 [ 286.567102][ C0] Code: c7 00 16 00 00 00 b8 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 80 3d c5 8b 10 00 00 74 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 55 48 89 e5 48 83 ec 20 48 89 [ 286.568931][ C0] RSP: 002b:00007fff76444f58 EFLAGS: 00000202 ORIG_RAX: 0000000000000001 [ 286.569807][ C0] RAX: ffffffffffffffda RBX: 000000003b40d930 RCX: 00007fe3c05e6504 [ 286.570621][ C0] RDX: 00000000000000cf RSI: 000000003b40d930 RDI: 0000000000000003 [ 286.571443][ C0] RBP: 0000000000000003 R08: 00000000000000cf R09: 000000003b40d930 [ 286.572246][ C0] R10: 0000000000000000 R11: 0000000000000202 R12: 000000003b40cd60 [ 286.573069][ C0] R13: 00000000000000cf R14: 00007fe3c07417f8 R15: 00007fe3c073502e [ 286.573886][ C0] Closes: https://lore.kernel.org/linux-nvme/5hdonndzoqa265oq3bj6iarwtfk5dewxxjtbjvn5uqnwclpwt6@a2n6w3taxxex/ Signed-off-by: Alistair Francis Reviewed-by: Sagi Grimberg Tested-by: Shin'ichiro Kawasaki Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index f2d0c920269b94..12a5cb8641ca30 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1560,6 +1560,9 @@ static void nvmet_tcp_restore_socket_callbacks(struct nvmet_tcp_queue *queue) { struct socket *sock = queue->sock; + if (!queue->state_change) + return; + write_lock_bh(&sock->sk->sk_callback_lock); sock->sk->sk_data_ready = queue->data_ready; sock->sk->sk_state_change = queue->state_change; From 8edb86b2ed1d63cc400aecae8eb8c8114837171a Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 25 Apr 2025 11:34:34 +0200 Subject: [PATCH 1347/2924] nvmet-auth: always free derived key data After calling nvme_auth_derive_tls_psk() we need to free the resulting psk data, as either TLS is disable (and we don't need the data anyway) or the psk data is copied into the resulting key (and can be free, too). Fixes: fa2e0f8bbc68 ("nvmet-tcp: support secure channel concatenation") Reported-by: Yi Zhang Suggested-by: Maurizio Lombardi Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Tested-by: Yi Zhang Signed-off-by: Christoph Hellwig --- drivers/nvme/target/auth.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index cef8d77f477b95..9429b821840856 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -600,13 +600,12 @@ void nvmet_auth_insert_psk(struct nvmet_sq *sq) pr_warn("%s: ctrl %d qid %d failed to refresh key, error %ld\n", __func__, sq->ctrl->cntlid, sq->qid, PTR_ERR(tls_key)); tls_key = NULL; - kfree_sensitive(tls_psk); } if (sq->ctrl->tls_key) key_put(sq->ctrl->tls_key); sq->ctrl->tls_key = tls_key; #endif - + kfree_sensitive(tls_psk); out_free_digest: kfree_sensitive(digest); out_free_psk: From f024d3a8ded0d8d2129ae123d7a5305c29ca44ce Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 30 Apr 2025 07:17:17 -0600 Subject: [PATCH 1348/2924] io_uring/fdinfo: annotate racy sq/cq head/tail reads syzbot complains about the cached sq head read, and it's totally right. But we don't need to care, it's just reading fdinfo, and reading the CQ or SQ tail/head entries are known racy in that they are just a view into that very instant and may of course be outdated by the time they are reported. Annotate both the SQ head and CQ tail read with data_race() to avoid this syzbot complaint. Link: https://lore.kernel.org/io-uring/6811f6dc.050a0220.39e3a1.0d0e.GAE@google.com/ Reported-by: syzbot+3e77fd302e99f5af9394@syzkaller.appspotmail.com Signed-off-by: Jens Axboe --- io_uring/fdinfo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index f60d0a9d505e25..9414ca6d101c0e 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -123,11 +123,11 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) seq_printf(m, "SqMask:\t0x%x\n", sq_mask); seq_printf(m, "SqHead:\t%u\n", sq_head); seq_printf(m, "SqTail:\t%u\n", sq_tail); - seq_printf(m, "CachedSqHead:\t%u\n", ctx->cached_sq_head); + seq_printf(m, "CachedSqHead:\t%u\n", data_race(ctx->cached_sq_head)); seq_printf(m, "CqMask:\t0x%x\n", cq_mask); seq_printf(m, "CqHead:\t%u\n", cq_head); seq_printf(m, "CqTail:\t%u\n", cq_tail); - seq_printf(m, "CachedCqTail:\t%u\n", ctx->cached_cq_tail); + seq_printf(m, "CachedCqTail:\t%u\n", data_race(ctx->cached_cq_tail)); seq_printf(m, "SQEs:\t%u\n", sq_tail - sq_head); sq_entries = min(sq_tail - sq_head, ctx->sq_entries); for (i = 0; i < sq_entries; i++) { From 533a8a67cc3b072a24122d1ea10a8db66a5c9393 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 28 Apr 2025 21:50:07 +0200 Subject: [PATCH 1349/2924] soundwire: intel_auxdevice: Fix system suspend/resume handling Before commit bca84a7b93fd ("PM: sleep: Use DPM_FLAG_SMART_SUSPEND conditionally") the runtime PM status of the device in intel_resume() had always been RPM_ACTIVE because setting DPM_FLAG_SMART_SUSPEND had caused the core to call pm_runtime_set_active() for that device during the "noirq" resume phase. For this reason, the pm_runtime_suspended() check in intel_resume() had never triggered and the code depending on it had never run. That had not caused any observable functional issues to appear, so effectively the code in question had never been needed. After commit bca84a7b93fd the core does not call pm_runtime_set_active() for all devices with DPM_FLAG_SMART_SUSPEND set any more and the code depending on the pm_runtime_suspended() check in intel_resume() runs if the device is runtime-suspended prior to a system-wide suspend transition. Unfortunately, when it runs, it breaks things due to the attempt to runtime-resume bus->dev which most likely is not ready for a runtime resume at that point. It also does other more-or-less questionable things. Namely, it calls pm_runtime_idle() for a device with a nonzero runtime PM usage counter which has no effect (all devices have nonzero runtime PM usage counters during system-wide suspend and resume). It also calls pm_runtime_mark_last_busy() for the device even though devices cannot runtime-suspend during system-wide suspend and resume (because their runtime PM usage counters are nonzero) and an analogous call is made in the same function later. Moreover, it sets the runtime PM status of the device to RPM_ACTIVE before activating it. For the reasons listed above, remove that code altogether. On top of that, add a pm_runtime_disable() call to intel_suspend() to prevent the device from being runtime-resumed at any point after intel_suspend() has started to manipulate it because the changes made by that function would be undone by a runtime-suspend of the device. Next, once runtime PM has been disabled, the runtime PM status of the device cannot change, so pm_runtime_status_suspended() can be used instead of pm_runtime_suspended() in intel_suspend(). Finally, make intel_resume() call pm_runtime_set_active() at the end to set the runtime PM status of the device to "active" because it has just been activated and re-enable runtime PM for it after that. Additionally, drop the setting of DPM_FLAG_SMART_SUSPEND from the driver because it has no effect on devices handled by it. Fixes: bca84a7b93fd ("PM: sleep: Use DPM_FLAG_SMART_SUSPEND conditionally") Reported-by: Bard Liao Tested-by: Bard Liao Signed-off-by: Rafael J. Wysocki Acked-by: Bard Liao Link: https://patch.msgid.link/12680420.O9o76ZdvQC@rjwysocki.net --- drivers/soundwire/intel_auxdevice.c | 36 +++++++++++------------------ 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/drivers/soundwire/intel_auxdevice.c b/drivers/soundwire/intel_auxdevice.c index 5ea6399e6c9b01..10a602d4843ae8 100644 --- a/drivers/soundwire/intel_auxdevice.c +++ b/drivers/soundwire/intel_auxdevice.c @@ -353,9 +353,6 @@ static int intel_link_probe(struct auxiliary_device *auxdev, /* use generic bandwidth allocation algorithm */ sdw->cdns.bus.compute_params = sdw_compute_params; - /* avoid resuming from pm_runtime suspend if it's not required */ - dev_pm_set_driver_flags(dev, DPM_FLAG_SMART_SUSPEND); - ret = sdw_bus_master_add(bus, dev, dev->fwnode); if (ret) { dev_err(dev, "sdw_bus_master_add fail: %d\n", ret); @@ -640,7 +637,10 @@ static int __maybe_unused intel_suspend(struct device *dev) return 0; } - if (pm_runtime_suspended(dev)) { + /* Prevent runtime PM from racing with the code below. */ + pm_runtime_disable(dev); + + if (pm_runtime_status_suspended(dev)) { dev_dbg(dev, "pm_runtime status: suspended\n"); clock_stop_quirks = sdw->link_res->clock_stop_quirks; @@ -648,7 +648,7 @@ static int __maybe_unused intel_suspend(struct device *dev) if ((clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) || !clock_stop_quirks) { - if (pm_runtime_suspended(dev->parent)) { + if (pm_runtime_status_suspended(dev->parent)) { /* * paranoia check: this should not happen with the .prepare * resume to full power @@ -715,7 +715,6 @@ static int __maybe_unused intel_resume(struct device *dev) struct sdw_cdns *cdns = dev_get_drvdata(dev); struct sdw_intel *sdw = cdns_to_intel(cdns); struct sdw_bus *bus = &cdns->bus; - int link_flags; int ret; if (bus->prop.hw_disabled || !sdw->startup_done) { @@ -724,23 +723,6 @@ static int __maybe_unused intel_resume(struct device *dev) return 0; } - if (pm_runtime_suspended(dev)) { - dev_dbg(dev, "pm_runtime status was suspended, forcing active\n"); - - /* follow required sequence from runtime_pm.rst */ - pm_runtime_disable(dev); - pm_runtime_set_active(dev); - pm_runtime_mark_last_busy(dev); - pm_runtime_enable(dev); - - pm_runtime_resume(bus->dev); - - link_flags = md_flags >> (bus->link_id * 8); - - if (!(link_flags & SDW_INTEL_MASTER_DISABLE_PM_RUNTIME_IDLE)) - pm_runtime_idle(dev); - } - ret = sdw_intel_link_power_up(sdw); if (ret) { dev_err(dev, "%s failed: %d\n", __func__, ret); @@ -760,6 +742,14 @@ static int __maybe_unused intel_resume(struct device *dev) return ret; } + /* + * Runtime PM has been disabled in intel_suspend(), so set the status + * to active because the device has just been resumed and re-enable + * runtime PM. + */ + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + /* * after system resume, the pm_runtime suspend() may kick in * during the enumeration, before any children device force the From ac4e04d9e378f5aa826c2406ad7871ae1b6a6fb9 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 29 Apr 2025 14:07:11 -0700 Subject: [PATCH 1350/2924] cpufreq: intel_pstate: Unchecked MSR aceess in legacy mode When turbo mode is unavailable on a Skylake-X system, executing the command: # echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo results in an unchecked MSR access error: WRMSR to 0x199 (attempted to write 0x0000000100001300). This issue was reproduced on an OEM (Original Equipment Manufacturer) system and is not a common problem across all Skylake-X systems. This error occurs because the MSR 0x199 Turbo Engage Bit (bit 32) is set when turbo mode is disabled. The issue arises when intel_pstate fails to detect that turbo mode is disabled. Here intel_pstate relies on MSR_IA32_MISC_ENABLE bit 38 to determine the status of turbo mode. However, on this system, bit 38 is not set even when turbo mode is disabled. According to the Intel Software Developer's Manual (SDM), the BIOS sets this bit during platform initialization to enable or disable opportunistic processor performance operations. Logically, this bit should be set in such cases. However, the SDM also specifies that "OS and applications must use CPUID leaf 06H to detect processors with opportunistic processor performance operations enabled." Therefore, in addition to checking MSR_IA32_MISC_ENABLE bit 38, verify that CPUID.06H:EAX[1] is 0 to accurately determine if turbo mode is disabled. Fixes: 4521e1a0ce17 ("cpufreq: intel_pstate: Reflect current no_turbo state correctly") Signed-off-by: Srinivas Pandruvada Cc: All applicable Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index f41ed0b9e61015..ba9bf06f1c7736 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -598,6 +598,9 @@ static bool turbo_is_disabled(void) { u64 misc_en; + if (!cpu_feature_enabled(X86_FEATURE_IDA)) + return true; + rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); return !!(misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE); From 74c72419ec8da5cbc9c49410d3c44bb954538bdd Mon Sep 17 00:00:00 2001 From: Jethro Donaldson Date: Wed, 30 Apr 2025 00:59:15 +1200 Subject: [PATCH 1351/2924] smb: client: fix zero length for mkdir POSIX create context SMB create requests issued via smb311_posix_mkdir() have an incorrect length of zero bytes for the POSIX create context data. ksmbd server rejects such requests and logs "cli req too short" causing mkdir to fail with "invalid argument" on the client side. It also causes subsequent rmmod to crash in cifs_destroy_request_bufs() Inspection of packets sent by cifs.ko using wireshark show valid data for the SMB2_POSIX_CREATE_CONTEXT is appended with the correct offset, but with an incorrect length of zero bytes. Fails with ksmbd+cifs.ko only as Windows server/client does not use POSIX extensions. Fix smb311_posix_mkdir() to set req->CreateContextsLength as part of appending the POSIX creation context to the request. Signed-off-by: Jethro Donaldson Acked-by: Paulo Alcantara (Red Hat) Reviewed-by: Namjae Jeon Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/smb2pdu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index c4d52bebd37d30..a9a49555d43bf6 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -2921,6 +2921,7 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode, req->CreateContextsOffset = cpu_to_le32( sizeof(struct smb2_create_req) + iov[1].iov_len); + le32_add_cpu(&req->CreateContextsLength, iov[n_iov-1].iov_len); pc_buf = iov[n_iov-1].iov_base; } From 1041c117a2c33cdffc4f695ac4b469e9124d24d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 30 Dec 2024 20:34:18 +0100 Subject: [PATCH 1352/2924] cifs: Fix and improve cifs_query_path_info() and cifs_query_file_info() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CAP_NT_SMBS was not negotiated then do not issue CIFSSMBQPathInfo() and CIFSSMBQFileInfo() commands. CIFSSMBQPathInfo() is not supported by non-NT Win9x SMB server and CIFSSMBQFileInfo() returns from Win9x SMB server bogus data in Attributes field (for example lot of files are marked as reparse points, even Win9x does not support them and read-only bit is not marked for read-only files). Correct information is returned by CIFSFindFirst() or SMBQueryInformation() command. So as a fallback in cifs_query_path_info() function use CIFSFindFirst() with SMB_FIND_FILE_FULL_DIRECTORY_INFO level which is supported by both NT and non-NT servers and as a last option use SMBQueryInformation() as it was before. And in function cifs_query_file_info() immediately returns -EOPNOTSUPP when not communicating with NT server. Client then revalidate inode entry by the cifs_query_path_info() call, which is working fine. So fstat() syscall on already opened file will receive correct information. Note that both fallback functions in non-UNICODE mode expands wildcards. Therefore those fallback functions cannot be used on paths which contain SMB wildcard characters (* ? " > <). CIFSFindFirst() returns all 4 time attributes as opposite of SMBQueryInformation() which returns only one. With this change it is possible to query all 4 times attributes from Win9x server and at the same time, client minimize sending of unsupported commands to server. Signed-off-by: Pali Rohár Signed-off-by: Steve French --- fs/smb/client/smb1ops.c | 103 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 95 insertions(+), 8 deletions(-) diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index 0adeec652dc191..5c7a80bce5bd54 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -543,24 +543,104 @@ static int cifs_query_path_info(const unsigned int xid, const char *full_path, struct cifs_open_info_data *data) { - int rc; + int rc = -EOPNOTSUPP; FILE_ALL_INFO fi = {}; + struct cifs_search_info search_info = {}; + bool non_unicode_wildcard = false; data->reparse_point = false; data->adjust_tz = false; - /* could do find first instead but this returns more info */ - rc = CIFSSMBQPathInfo(xid, tcon, full_path, &fi, 0 /* not legacy */, cifs_sb->local_nls, - cifs_remap(cifs_sb)); /* - * BB optimize code so we do not make the above call when server claims - * no NT SMB support and the above call failed at least once - set flag - * in tcon or mount. + * First try CIFSSMBQPathInfo() function which returns more info + * (NumberOfLinks) than CIFSFindFirst() fallback function. + * Some servers like Win9x do not support SMB_QUERY_FILE_ALL_INFO over + * TRANS2_QUERY_PATH_INFORMATION, but supports it with filehandle over + * TRANS2_QUERY_FILE_INFORMATION (function CIFSSMBQFileInfo(). But SMB + * Open command on non-NT servers works only for files, does not work + * for directories. And moreover Win9x SMB server returns bogus data in + * SMB_QUERY_FILE_ALL_INFO Attributes field. So for non-NT servers, + * do not even use CIFSSMBQPathInfo() or CIFSSMBQFileInfo() function. + */ + if (tcon->ses->capabilities & CAP_NT_SMBS) + rc = CIFSSMBQPathInfo(xid, tcon, full_path, &fi, 0 /* not legacy */, + cifs_sb->local_nls, cifs_remap(cifs_sb)); + + /* + * Non-UNICODE variant of fallback functions below expands wildcards, + * so they cannot be used for querying paths with wildcard characters. */ - if ((rc == -EOPNOTSUPP) || (rc == -EINVAL)) { + if (rc && !(tcon->ses->capabilities & CAP_UNICODE) && strpbrk(full_path, "*?\"><")) + non_unicode_wildcard = true; + + /* + * Then fallback to CIFSFindFirst() which works also with non-NT servers + * but does not does not provide NumberOfLinks. + */ + if ((rc == -EOPNOTSUPP || rc == -EINVAL) && + !non_unicode_wildcard) { + if (!(tcon->ses->capabilities & tcon->ses->server->vals->cap_nt_find)) + search_info.info_level = SMB_FIND_FILE_INFO_STANDARD; + else + search_info.info_level = SMB_FIND_FILE_FULL_DIRECTORY_INFO; + rc = CIFSFindFirst(xid, tcon, full_path, cifs_sb, NULL, + CIFS_SEARCH_CLOSE_ALWAYS | CIFS_SEARCH_CLOSE_AT_END, + &search_info, false); + if (rc == 0) { + if (!(tcon->ses->capabilities & tcon->ses->server->vals->cap_nt_find)) { + FIND_FILE_STANDARD_INFO *di; + int offset = tcon->ses->server->timeAdj; + + di = (FIND_FILE_STANDARD_INFO *)search_info.srch_entries_start; + fi.CreationTime = cpu_to_le64(cifs_UnixTimeToNT(cnvrtDosUnixTm( + di->CreationDate, di->CreationTime, offset))); + fi.LastAccessTime = cpu_to_le64(cifs_UnixTimeToNT(cnvrtDosUnixTm( + di->LastAccessDate, di->LastAccessTime, offset))); + fi.LastWriteTime = cpu_to_le64(cifs_UnixTimeToNT(cnvrtDosUnixTm( + di->LastWriteDate, di->LastWriteTime, offset))); + fi.ChangeTime = fi.LastWriteTime; + fi.Attributes = cpu_to_le32(le16_to_cpu(di->Attributes)); + fi.AllocationSize = cpu_to_le64(le32_to_cpu(di->AllocationSize)); + fi.EndOfFile = cpu_to_le64(le32_to_cpu(di->DataSize)); + } else { + FILE_FULL_DIRECTORY_INFO *di; + + di = (FILE_FULL_DIRECTORY_INFO *)search_info.srch_entries_start; + fi.CreationTime = di->CreationTime; + fi.LastAccessTime = di->LastAccessTime; + fi.LastWriteTime = di->LastWriteTime; + fi.ChangeTime = di->ChangeTime; + fi.Attributes = di->ExtFileAttributes; + fi.AllocationSize = di->AllocationSize; + fi.EndOfFile = di->EndOfFile; + fi.EASize = di->EaSize; + } + fi.NumberOfLinks = cpu_to_le32(1); + fi.DeletePending = 0; + fi.Directory = !!(le32_to_cpu(fi.Attributes) & ATTR_DIRECTORY); + cifs_buf_release(search_info.ntwrk_buf_start); + } else if (!full_path[0]) { + /* + * CIFSFindFirst() does not work on root path if the + * root path was exported on the server from the top + * level path (drive letter). + */ + rc = -EOPNOTSUPP; + } + } + + /* + * If everything failed then fallback to the legacy SMB command + * SMB_COM_QUERY_INFORMATION which works with all servers, but + * provide just few information. + */ + if ((rc == -EOPNOTSUPP || rc == -EINVAL) && !non_unicode_wildcard) { rc = SMBQueryInformation(xid, tcon, full_path, &fi, cifs_sb->local_nls, cifs_remap(cifs_sb)); data->adjust_tz = true; + } else if ((rc == -EOPNOTSUPP || rc == -EINVAL) && non_unicode_wildcard) { + /* Path with non-UNICODE wildcard character cannot exist. */ + rc = -ENOENT; } if (!rc) { @@ -639,6 +719,13 @@ static int cifs_query_file_info(const unsigned int xid, struct cifs_tcon *tcon, int rc; FILE_ALL_INFO fi = {}; + /* + * CIFSSMBQFileInfo() for non-NT servers returns bogus data in + * Attributes fields. So do not use this command for non-NT servers. + */ + if (!(tcon->ses->capabilities & CAP_NT_SMBS)) + return -EOPNOTSUPP; + if (cfile->symlink_target) { data->symlink_target = kstrdup(cfile->symlink_target, GFP_KERNEL); if (!data->symlink_target) From f122121796f91168d0894c2710b8dd71330a34f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 30 Dec 2024 21:32:39 +0100 Subject: [PATCH 1353/2924] cifs: Fix changing times and read-only attr over SMB1 smb_set_file_info() function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Function CIFSSMBSetPathInfo() is not supported by non-NT servers and returns error. Fallback code via open filehandle and CIFSSMBSetFileInfo() does not work neither because CIFS_open() works also only on NT server. Therefore currently the whole smb_set_file_info() function as a SMB1 callback for the ->set_file_info() does not work with older non-NT SMB servers, like Win9x and others. This change implements fallback code in smb_set_file_info() which will works with any server and allows to change time values and also to set or clear read-only attributes. To make existing fallback code via CIFSSMBSetFileInfo() working with also non-NT servers, it is needed to change open function from CIFS_open() (which is NT specific) to cifs_open_file() which works with any server (this is just a open wrapper function which choose the correct open function supported by the server). CIFSSMBSetFileInfo() is working also on non-NT servers, but zero time values are not treated specially. So first it is needed to fill all time values if some of them are missing, via cifs_query_path_info() call. There is another issue, opening file in write-mode (needed for changing attributes) is not possible when the file has read-only attribute set. The only option how to clear read-only attribute is via SMB_COM_SETATTR command. And opening directory is not possible neither and here the SMB_COM_SETATTR command is the only option how to change attributes. And CIFSSMBSetFileInfo() does not honor setting read-only attribute, so for setting is also needed to use SMB_COM_SETATTR command. Existing code in cifs_query_path_info() is already using SMB_COM_GETATTR as a fallback code path (function SMBQueryInformation()), so introduce a new function SMBSetInformation which will implement SMB_COM_SETATTR command. My testing showed that Windows XP SMB1 client is also using SMB_COM_SETATTR command for setting or clearing read-only attribute against non-NT server. So this can prove that this is the correct way how to do it. With this change it is possible set all 4 time values and all attributes, including clearing and setting read-only bit on non-NT SMB servers. Tested against Win98 SMB1 server. This change fixes "touch" command which was failing when called on existing file. And fixes also "chmod +w" and "chmod -w" commands which were also failing (as they are changing read-only attribute). Note that this change depends on following change "cifs: Improve cifs_query_path_info() and cifs_query_file_info()" as it require to query all 4 time attribute values. Signed-off-by: Pali Rohár Signed-off-by: Steve French --- fs/smb/client/cifspdu.h | 5 +- fs/smb/client/cifsproto.h | 4 ++ fs/smb/client/cifssmb.c | 57 +++++++++++++++++++ fs/smb/client/smb1ops.c | 112 +++++++++++++++++++++++++++++++++++--- 4 files changed, 166 insertions(+), 12 deletions(-) diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h index 18d67ab113f0ec..1b79fe07476f65 100644 --- a/fs/smb/client/cifspdu.h +++ b/fs/smb/client/cifspdu.h @@ -1266,10 +1266,9 @@ typedef struct smb_com_query_information_rsp { typedef struct smb_com_setattr_req { struct smb_hdr hdr; /* wct = 8 */ __le16 attr; - __le16 time_low; - __le16 time_high; + __le32 last_write_time; __le16 reserved[5]; /* must be zero */ - __u16 ByteCount; + __le16 ByteCount; __u8 BufferFormat; /* 4 = ASCII */ unsigned char fileName[]; } __attribute__((packed)) SETATTR_REQ; diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 59f6fdfe560ef3..ecf774a8f1ca01 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -395,6 +395,10 @@ extern int CIFSSMBQFSUnixInfo(const unsigned int xid, struct cifs_tcon *tcon); extern int CIFSSMBQFSPosixInfo(const unsigned int xid, struct cifs_tcon *tcon, struct kstatfs *FSData); +extern int SMBSetInformation(const unsigned int xid, struct cifs_tcon *tcon, + const char *fileName, __le32 attributes, __le64 write_time, + const struct nls_table *nls_codepage, + struct cifs_sb_info *cifs_sb); extern int CIFSSMBSetPathInfo(const unsigned int xid, struct cifs_tcon *tcon, const char *fileName, const FILE_BASIC_INFO *data, const struct nls_table *nls_codepage, diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 60cb264a01e511..f55457b4b82e36 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -5171,6 +5171,63 @@ CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, return rc; } +int +SMBSetInformation(const unsigned int xid, struct cifs_tcon *tcon, + const char *fileName, __le32 attributes, __le64 write_time, + const struct nls_table *nls_codepage, + struct cifs_sb_info *cifs_sb) +{ + SETATTR_REQ *pSMB; + SETATTR_RSP *pSMBr; + struct timespec64 ts; + int bytes_returned; + int name_len; + int rc; + + cifs_dbg(FYI, "In %s path %s\n", __func__, fileName); + +retry: + rc = smb_init(SMB_COM_SETATTR, 8, tcon, (void **) &pSMB, + (void **) &pSMBr); + if (rc) + return rc; + + if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { + name_len = + cifsConvertToUTF16((__le16 *) pSMB->fileName, + fileName, PATH_MAX, nls_codepage, + cifs_remap(cifs_sb)); + name_len++; /* trailing null */ + name_len *= 2; + } else { + name_len = copy_path_name(pSMB->fileName, fileName); + } + /* Only few attributes can be set by this command, others are not accepted by Win9x. */ + pSMB->attr = cpu_to_le16(le32_to_cpu(attributes) & + (ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM | ATTR_ARCHIVE)); + /* Zero write time value (in both NT and SETATTR formats) means to not change it. */ + if (le64_to_cpu(write_time) != 0) { + ts = cifs_NTtimeToUnix(write_time); + pSMB->last_write_time = cpu_to_le32(ts.tv_sec); + } + pSMB->BufferFormat = 0x04; + name_len++; /* account for buffer type byte */ + inc_rfc1001_len(pSMB, (__u16)name_len); + pSMB->ByteCount = cpu_to_le16(name_len); + + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + (struct smb_hdr *) pSMBr, &bytes_returned, 0); + if (rc) + cifs_dbg(FYI, "Send error in %s = %d\n", __func__, rc); + + cifs_buf_release(pSMB); + + if (rc == -EAGAIN) + goto retry; + + return rc; +} + /* Some legacy servers such as NT4 require that the file times be set on an open handle, rather than by pathname - this is awkward due to potential access conflicts on the open, but it is unavoidable for these diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index 5c7a80bce5bd54..ab26b9e9b0e70f 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -896,6 +896,9 @@ smb_set_file_info(struct inode *inode, const char *full_path, struct cifs_fid fid; struct cifs_open_parms oparms; struct cifsFileInfo *open_file; + FILE_BASIC_INFO new_buf; + struct cifs_open_info_data query_data; + __le64 write_time = buf->LastWriteTime; struct cifsInodeInfo *cinode = CIFS_I(inode); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct tcon_link *tlink = NULL; @@ -903,20 +906,58 @@ smb_set_file_info(struct inode *inode, const char *full_path, /* if the file is already open for write, just use that fileid */ open_file = find_writable_file(cinode, FIND_WR_FSUID_ONLY); + if (open_file) { fid.netfid = open_file->fid.netfid; netpid = open_file->pid; tcon = tlink_tcon(open_file->tlink); - goto set_via_filehandle; + } else { + tlink = cifs_sb_tlink(cifs_sb); + if (IS_ERR(tlink)) { + rc = PTR_ERR(tlink); + tlink = NULL; + goto out; + } + tcon = tlink_tcon(tlink); } - tlink = cifs_sb_tlink(cifs_sb); - if (IS_ERR(tlink)) { - rc = PTR_ERR(tlink); - tlink = NULL; - goto out; + /* + * Non-NT servers interprets zero time value in SMB_SET_FILE_BASIC_INFO + * over TRANS2_SET_FILE_INFORMATION as a valid time value. NT servers + * interprets zero time value as do not change existing value on server. + * API of ->set_file_info() callback expects that zero time value has + * the NT meaning - do not change. Therefore if server is non-NT and + * some time values in "buf" are zero, then fetch missing time values. + */ + if (!(tcon->ses->capabilities & CAP_NT_SMBS) && + (!buf->CreationTime || !buf->LastAccessTime || + !buf->LastWriteTime || !buf->ChangeTime)) { + rc = cifs_query_path_info(xid, tcon, cifs_sb, full_path, &query_data); + if (rc) { + if (open_file) { + cifsFileInfo_put(open_file); + open_file = NULL; + } + goto out; + } + /* + * Original write_time from buf->LastWriteTime is preserved + * as SMBSetInformation() interprets zero as do not change. + */ + new_buf = *buf; + buf = &new_buf; + if (!buf->CreationTime) + buf->CreationTime = query_data.fi.CreationTime; + if (!buf->LastAccessTime) + buf->LastAccessTime = query_data.fi.LastAccessTime; + if (!buf->LastWriteTime) + buf->LastWriteTime = query_data.fi.LastWriteTime; + if (!buf->ChangeTime) + buf->ChangeTime = query_data.fi.ChangeTime; } - tcon = tlink_tcon(tlink); + + if (open_file) + goto set_via_filehandle; rc = CIFSSMBSetPathInfo(xid, tcon, full_path, buf, cifs_sb->local_nls, cifs_sb); @@ -937,8 +978,45 @@ smb_set_file_info(struct inode *inode, const char *full_path, .fid = &fid, }; - cifs_dbg(FYI, "calling SetFileInfo since SetPathInfo for times not supported by this server\n"); - rc = CIFS_open(xid, &oparms, &oplock, NULL); + if (S_ISDIR(inode->i_mode) && !(tcon->ses->capabilities & CAP_NT_SMBS)) { + /* Opening directory path is not possible on non-NT servers. */ + rc = -EOPNOTSUPP; + } else { + /* + * Use cifs_open_file() instead of CIFS_open() as the + * cifs_open_file() selects the correct function which + * works also on non-NT servers. + */ + rc = cifs_open_file(xid, &oparms, &oplock, NULL); + /* + * Opening path for writing on non-NT servers is not + * possible when the read-only attribute is already set. + * Non-NT server in this case returns -EACCES. For those + * servers the only possible way how to clear the read-only + * bit is via SMB_COM_SETATTR command. + */ + if (rc == -EACCES && + (cinode->cifsAttrs & ATTR_READONLY) && + le32_to_cpu(buf->Attributes) != 0 && /* 0 = do not change attrs */ + !(le32_to_cpu(buf->Attributes) & ATTR_READONLY) && + !(tcon->ses->capabilities & CAP_NT_SMBS)) + rc = -EOPNOTSUPP; + } + + /* Fallback to SMB_COM_SETATTR command when absolutelty needed. */ + if (rc == -EOPNOTSUPP) { + cifs_dbg(FYI, "calling SetInformation since SetPathInfo for attrs/times not supported by this server\n"); + rc = SMBSetInformation(xid, tcon, full_path, + buf->Attributes != 0 ? buf->Attributes : cpu_to_le32(cinode->cifsAttrs), + write_time, + cifs_sb->local_nls, cifs_sb); + if (rc == 0) + cinode->cifsAttrs = le32_to_cpu(buf->Attributes); + else + rc = -EACCES; + goto out; + } + if (rc != 0) { if (rc == -EIO) rc = -EINVAL; @@ -946,6 +1024,7 @@ smb_set_file_info(struct inode *inode, const char *full_path, } netpid = current->tgid; + cifs_dbg(FYI, "calling SetFileInfo since SetPathInfo for attrs/times not supported by this server\n"); set_via_filehandle: rc = CIFSSMBSetFileInfo(xid, tcon, buf, fid.netfid, netpid); @@ -956,6 +1035,21 @@ smb_set_file_info(struct inode *inode, const char *full_path, CIFSSMBClose(xid, tcon, fid.netfid); else cifsFileInfo_put(open_file); + + /* + * Setting the read-only bit is not honered on non-NT servers when done + * via open-semantics. So for setting it, use SMB_COM_SETATTR command. + * This command works only after the file is closed, so use it only when + * operation was called without the filehandle. + */ + if (open_file == NULL && + !(tcon->ses->capabilities & CAP_NT_SMBS) && + le32_to_cpu(buf->Attributes) & ATTR_READONLY) { + SMBSetInformation(xid, tcon, full_path, + buf->Attributes, + 0 /* do not change write time */, + cifs_sb->local_nls, cifs_sb); + } out: if (tlink != NULL) cifs_put_tlink(tlink); From 2feaa92c7c0123013a2a3e3d02ff8a5f5a794e96 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 28 Apr 2025 23:33:06 -0400 Subject: [PATCH 1354/2924] bcachefs: improve missing journal write device error message Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 2a54ac79189bf1..63cdf885c9e2a4 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1782,7 +1782,7 @@ static CLOSURE_CALLBACK(journal_write_submit) struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, WRITE); if (!ca) { /* XXX: fix this */ - bch_err(c, "missing device for journal write\n"); + bch_err(c, "missing device %u for journal write", ptr->dev); continue; } From 5e63d579e752549dc256a952bcb35ade398ee921 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 29 Apr 2025 14:30:01 -0400 Subject: [PATCH 1355/2924] bcachefs: readdir fixes - Don't call bch2_trans_relock() after dir_emit(); taking a transaction restart here will cause us to emit the same dirent to userspace twice - Fix incorrect checking of the return value on dir_emit(): "true" means success, keep going, but bch2_dir_emit() needs to return true when we're finished iterating. https://github.com/koverstreet/bcachefs/issues/867 Signed-off-by: Kent Overstreet --- fs/bcachefs/dirent.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 92ee59d9e00ece..8a680e52c1ed12 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -685,7 +685,7 @@ static int bch2_dir_emit(struct dir_context *ctx, struct bkey_s_c_dirent d, subv vfs_d_type(d.v->d_type)); if (ret) ctx->pos = d.k->p.offset + 1; - return ret; + return !ret; } int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx) @@ -710,7 +710,7 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx) if (ret2 > 0) continue; - ret2 ?: drop_locks_do(trans, bch2_dir_emit(ctx, dirent, target)); + ret2 ?: (bch2_trans_unlock(trans), bch2_dir_emit(ctx, dirent, target)); }))); bch2_bkey_buf_exit(&sk, c); From 63f5235e0291152a2ac2c4ef3c1196cb6dfb3ef7 Mon Sep 17 00:00:00 2001 From: Chris Chiu Date: Wed, 30 Apr 2025 18:18:43 +0800 Subject: [PATCH 1356/2924] ALSA: hda/realtek - Add more HP laptops which need mute led fixup More HP EliteBook with Realtek HDA codec ALC3247 and combined CS35L56 Amplifiers need quirk ALC236_FIXUP_HP_GPIO_LED to fix the micmute LED. Signed-off-by: Chris Chiu Cc: Link: https://patch.msgid.link/20250430101843.150833-1-chris.chiu@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 7810d5f9b5aac7..8a2b09e4a7d5ac 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10863,8 +10863,11 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8de8, "HP Gemtree", ALC245_FIXUP_TAS2781_SPI_2), SND_PCI_QUIRK(0x103c, 0x8de9, "HP Gemtree", ALC245_FIXUP_TAS2781_SPI_2), SND_PCI_QUIRK(0x103c, 0x8dec, "HP EliteBook 640 G12", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8ded, "HP EliteBook 640 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8dee, "HP EliteBook 660 G12", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8def, "HP EliteBook 660 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8df0, "HP EliteBook 630 G12", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8df1, "HP EliteBook 630 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8dfc, "HP EliteBook 645 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8dfe, "HP EliteBook 665 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8e11, "HP Trekker", ALC287_FIXUP_CS35L41_I2C_2), From 650266ac4c7230c89bcd1307acf5c9c92cfa85e2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 30 Apr 2025 11:05:54 +0300 Subject: [PATCH 1357/2924] dm: add missing unlock on in dm_keyslot_evict() We need to call dm_put_live_table() even if dm_get_live_table() returns NULL. Fixes: 9355a9eb21a5 ("dm: support key eviction from keyslot managers of underlying devices") Cc: stable@vger.kernel.org # v5.12+ Signed-off-by: Dan Carpenter Signed-off-by: Mikulas Patocka --- drivers/md/dm-table.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 9e175c5e0634b4..31d67a1a91dd60 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1173,7 +1173,7 @@ static int dm_keyslot_evict(struct blk_crypto_profile *profile, t = dm_get_live_table(md, &srcu_idx); if (!t) - return 0; + goto put_live_table; for (unsigned int i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i); @@ -1184,6 +1184,7 @@ static int dm_keyslot_evict(struct blk_crypto_profile *profile, (void *)key); } +put_live_table: dm_put_live_table(md, srcu_idx); return 0; } From c44572e0cc13c9afff83fd333135a0aa9b27ba26 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sun, 27 Apr 2025 13:34:24 +0200 Subject: [PATCH 1358/2924] MIPS: Fix MAX_REG_OFFSET Fix MAX_REG_OFFSET to point to the last register in 'pt_regs' and not to the marker itself, which could allow regs_get_register() to return an invalid offset. Fixes: 40e084a506eb ("MIPS: Add uprobes support.") Suggested-by: Maciej W. Rozycki Signed-off-by: Thorsten Blum Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/ptrace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h index 85fa9962266a2b..ef72c46b556887 100644 --- a/arch/mips/include/asm/ptrace.h +++ b/arch/mips/include/asm/ptrace.h @@ -65,7 +65,8 @@ static inline void instruction_pointer_set(struct pt_regs *regs, /* Query offset/name of register from its name/offset */ extern int regs_query_register_offset(const char *name); -#define MAX_REG_OFFSET (offsetof(struct pt_regs, __last)) +#define MAX_REG_OFFSET \ + (offsetof(struct pt_regs, __last) - sizeof(unsigned long)) /** * regs_get_register() - get register value from its offset From e6a3fc4f10b872d02e25f83227e725c79b25d893 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 30 Apr 2025 14:48:37 +0200 Subject: [PATCH 1359/2924] genirq/msi: Prevent NULL pointer dereference in msi_domain_debug_show() irq_domain_debug_show_one() calls msi_domain_debug_show() with a non-NULL domain pointer and a NULL irq_data pointer. irq_debug_show_data() calls it with a NULL domain pointer. The domain pointer is not used, but the irq_data pointer is required to be non-NULL and lacks a NULL pointer check. Add the missing NULL pointer check to ensure there is a non-NULL irq_data pointer in msi_domain_debug_show() before dereferencing it. [ tglx: Massaged change log ] Fixes: 01499ae673dc ("genirq/msi: Expose MSI message data in debugfs") Signed-off-by: Andrew Jones Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250430124836.49964-2-ajones@ventanamicro.com --- kernel/irq/msi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 5c8d43cdb0a318..c05ba7ca00faad 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -761,7 +761,7 @@ static int msi_domain_translate(struct irq_domain *domain, struct irq_fwspec *fw static void msi_domain_debug_show(struct seq_file *m, struct irq_domain *d, struct irq_data *irqd, int ind) { - struct msi_desc *desc = irq_data_get_msi_desc(irqd); + struct msi_desc *desc = irqd ? irq_data_get_msi_desc(irqd) : NULL; if (!desc) return; From edea92770a3b6454dc796fc5436a3315bb402181 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Wed, 30 Apr 2025 18:52:08 +0200 Subject: [PATCH 1360/2924] ASoC: stm32: sai: skip useless iterations on kernel rate loop the frequency of the kernel clock must be greater than or equal to the bitclock rate. When searching for a convenient kernel clock rate in stm32_sai_set_parent_rate() function, it is useless to continue the loop below bitclock rate, as it will result in a invalid kernel clock rate. Change the loop output condition. Fixes: 2cfe1ff22555 ("ASoC: stm32: sai: add stm32mp25 support") Signed-off-by: Olivier Moysan Link: https://patch.msgid.link/20250430165210.321273-2-olivier.moysan@foss.st.com Signed-off-by: Mark Brown --- sound/soc/stm/stm32_sai_sub.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index e8c1abf1ae0a70..4d018b4bc3f0fa 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -409,11 +409,11 @@ static int stm32_sai_set_parent_rate(struct stm32_sai_sub_data *sai, unsigned int rate) { struct platform_device *pdev = sai->pdev; - unsigned int sai_ck_rate, sai_ck_max_rate, sai_curr_rate, sai_new_rate; + unsigned int sai_ck_rate, sai_ck_max_rate, sai_ck_min_rate, sai_curr_rate, sai_new_rate; int div, ret; /* - * Set maximum expected kernel clock frequency + * Set minimum and maximum expected kernel clock frequency * - mclk on or spdif: * f_sai_ck = MCKDIV * mclk-fs * fs * Here typical 256 ratio is assumed for mclk-fs @@ -423,13 +423,16 @@ static int stm32_sai_set_parent_rate(struct stm32_sai_sub_data *sai, * Set constraint MCKDIV * FRL <= 256, to ensure MCKDIV is in available range * f_sai_ck = sai_ck_max_rate * pow_of_two(FRL) / 256 */ + sai_ck_min_rate = rate * 256; if (!(rate % SAI_RATE_11K)) sai_ck_max_rate = SAI_MAX_SAMPLE_RATE_11K * 256; else sai_ck_max_rate = SAI_MAX_SAMPLE_RATE_8K * 256; - if (!sai->sai_mclk && !STM_SAI_PROTOCOL_IS_SPDIF(sai)) + if (!sai->sai_mclk && !STM_SAI_PROTOCOL_IS_SPDIF(sai)) { + sai_ck_min_rate = rate * sai->fs_length; sai_ck_max_rate /= DIV_ROUND_CLOSEST(256, roundup_pow_of_two(sai->fs_length)); + } /* * Request exclusivity, as the clock is shared by SAI sub-blocks and by @@ -472,7 +475,7 @@ static int stm32_sai_set_parent_rate(struct stm32_sai_sub_data *sai, /* Try a lower frequency */ div++; sai_ck_rate = sai_ck_max_rate / div; - } while (sai_ck_rate > rate); + } while (sai_ck_rate >= sai_ck_min_rate); /* No accurate rate found */ dev_err(&pdev->dev, "Failed to find an accurate rate"); From cce34d113e2a592806abcdc02c7f8513775d8b20 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Wed, 30 Apr 2025 18:52:09 +0200 Subject: [PATCH 1361/2924] ASoC: stm32: sai: add a check on minimal kernel frequency On MP2 SoCs SAI kernel clock rate is managed through stm32_sai_set_parent_rate() function. If the kernel clock rate was set previously to a low frequency, this frequency may be too low to support the newly requested audio stream rate. However the stm32_sai_rate_accurate() will only check accuracy against the maximum kernel clock rate. The function will return leaving the kernel clock rate unchanged. Add a check on minimal frequency requirement, to avoid this. Fixes: 2cfe1ff22555 ("ASoC: stm32: sai: add stm32mp25 support") Signed-off-by: Olivier Moysan Link: https://patch.msgid.link/20250430165210.321273-3-olivier.moysan@foss.st.com Signed-off-by: Mark Brown --- sound/soc/stm/stm32_sai_sub.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index 4d018b4bc3f0fa..bf5299ba11c3c9 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -447,7 +447,10 @@ static int stm32_sai_set_parent_rate(struct stm32_sai_sub_data *sai, * return immediately. */ sai_curr_rate = clk_get_rate(sai->sai_ck); - if (stm32_sai_rate_accurate(sai_ck_max_rate, sai_curr_rate)) + dev_dbg(&pdev->dev, "kernel clock rate: min [%u], max [%u], current [%u]", + sai_ck_min_rate, sai_ck_max_rate, sai_curr_rate); + if (stm32_sai_rate_accurate(sai_ck_max_rate, sai_curr_rate) && + sai_curr_rate >= sai_ck_min_rate) return 0; /* From 02b44a2b2bdcee03cbb92484d31e9ca1b91b2a38 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 30 Apr 2025 11:31:19 +0100 Subject: [PATCH 1362/2924] ASoC: intel/sdw_utils: Add volume limit to cs42l43 speakers The volume control for cs42l43 speakers has a maximum gain of +31.5 dB. However, for many use cases, this can cause distorted audio, depending various factors, such as other signal-processing elements in the chain, for example if the audio passes through a gain control before reaching the codec or the signal path has been tuned for a particular maximum gain in the codec. In the case of systems which use the soc_sdw_cs42l43 driver, audio will likely be distorted in all cases above 0 dB, therefore add a volume limit of 128, which is 0 dB maximum volume inside this driver. Signed-off-by: Stefan Binding Reviewed-by: Charles Keepax Link: https://patch.msgid.link/20250430103134.24579-2-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/sdw_utils/soc_sdw_cs42l43.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/soc/sdw_utils/soc_sdw_cs42l43.c b/sound/soc/sdw_utils/soc_sdw_cs42l43.c index 668c9d28a1c12d..b415d45d520d0c 100644 --- a/sound/soc/sdw_utils/soc_sdw_cs42l43.c +++ b/sound/soc/sdw_utils/soc_sdw_cs42l43.c @@ -20,6 +20,8 @@ #include #include +#define CS42L43_SPK_VOLUME_0DB 128 /* 0dB Max */ + static const struct snd_soc_dapm_route cs42l43_hs_map[] = { { "Headphone", NULL, "cs42l43 AMP3_OUT" }, { "Headphone", NULL, "cs42l43 AMP4_OUT" }, @@ -117,6 +119,14 @@ int asoc_sdw_cs42l43_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_so return -ENOMEM; } + ret = snd_soc_limit_volume(card, "cs42l43 Speaker Digital Volume", + CS42L43_SPK_VOLUME_0DB); + if (ret) + dev_err(card->dev, "cs42l43 speaker volume limit failed: %d\n", ret); + else + dev_info(card->dev, "Setting CS42L43 Speaker volume limit to %d\n", + CS42L43_SPK_VOLUME_0DB); + ret = snd_soc_dapm_add_routes(&card->dapm, cs42l43_spk_map, ARRAY_SIZE(cs42l43_spk_map)); if (ret) From d5463e531c128ff1b141fdba2e13345cd50028a4 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Wed, 30 Apr 2025 11:31:20 +0100 Subject: [PATCH 1363/2924] ASoC: intel/sdw_utils: Add volume limit to cs35l56 speakers The volume control for cs35l56 speakers has a maximum gain of +12 dB. However, for many use cases, this can cause distorted audio, depending various factors, such as other signal-processing elements in the chain, for example if the audio passes through a gain control before reaching the amp or the signal path has been tuned for a particular maximum gain in the amp. In the case of systems which use the soc_sdw_* driver, audio will likely be distorted in all cases above 0 dB, therefore add a volume limit of 400, which is 0 dB maximum volume inside this driver. The volume limit should be applied to both soundwire and soundwire bridge configurations. Signed-off-by: Stefan Binding Link: https://patch.msgid.link/20250430103134.24579-3-sbinding@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 1 + sound/soc/sdw_utils/soc_sdw_bridge_cs35l56.c | 4 ++++ sound/soc/sdw_utils/soc_sdw_cs_amp.c | 24 ++++++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index 36a4a1e1d8ca28..d8bd5d37131aa0 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -226,6 +226,7 @@ int asoc_sdw_cs_amp_init(struct snd_soc_card *card, bool playback); int asoc_sdw_cs_spk_feedback_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); +int asoc_sdw_cs35l56_volume_limit(struct snd_soc_card *card, const char *name_prefix); /* MAXIM codec support */ int asoc_sdw_maxim_init(struct snd_soc_card *card, diff --git a/sound/soc/sdw_utils/soc_sdw_bridge_cs35l56.c b/sound/soc/sdw_utils/soc_sdw_bridge_cs35l56.c index 246e5c2e0af55f..c7e55f4433514c 100644 --- a/sound/soc/sdw_utils/soc_sdw_bridge_cs35l56.c +++ b/sound/soc/sdw_utils/soc_sdw_bridge_cs35l56.c @@ -60,6 +60,10 @@ static int asoc_sdw_bridge_cs35l56_asp_init(struct snd_soc_pcm_runtime *rtd) /* 4 x 16-bit sample slots and FSYNC=48000, BCLK=3.072 MHz */ for_each_rtd_codec_dais(rtd, i, codec_dai) { + ret = asoc_sdw_cs35l56_volume_limit(card, codec_dai->component->name_prefix); + if (ret) + return ret; + ret = snd_soc_dai_set_tdm_slot(codec_dai, tx_mask, rx_mask, 4, 16); if (ret < 0) return ret; diff --git a/sound/soc/sdw_utils/soc_sdw_cs_amp.c b/sound/soc/sdw_utils/soc_sdw_cs_amp.c index 4b6181cf29716f..35b550bcd4ded5 100644 --- a/sound/soc/sdw_utils/soc_sdw_cs_amp.c +++ b/sound/soc/sdw_utils/soc_sdw_cs_amp.c @@ -16,6 +16,25 @@ #define CODEC_NAME_SIZE 8 #define CS_AMP_CHANNELS_PER_AMP 4 +#define CS35L56_SPK_VOLUME_0DB 400 /* 0dB Max */ + +int asoc_sdw_cs35l56_volume_limit(struct snd_soc_card *card, const char *name_prefix) +{ + char *volume_ctl_name; + int ret; + + volume_ctl_name = kasprintf(GFP_KERNEL, "%s Speaker Volume", name_prefix); + if (!volume_ctl_name) + return -ENOMEM; + + ret = snd_soc_limit_volume(card, volume_ctl_name, CS35L56_SPK_VOLUME_0DB); + if (ret) + dev_err(card->dev, "%s limit set failed: %d\n", volume_ctl_name, ret); + + kfree(volume_ctl_name); + return ret; +} +EXPORT_SYMBOL_NS(asoc_sdw_cs35l56_volume_limit, "SND_SOC_SDW_UTILS"); int asoc_sdw_cs_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai) { @@ -40,6 +59,11 @@ int asoc_sdw_cs_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai snprintf(widget_name, sizeof(widget_name), "%s SPK", codec_dai->component->name_prefix); + + ret = asoc_sdw_cs35l56_volume_limit(card, codec_dai->component->name_prefix); + if (ret) + return ret; + ret = snd_soc_dapm_add_routes(&card->dapm, &route, 1); if (ret) return ret; From 3cc393d2232ec770b5f79bf0673d67702a3536c3 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Tue, 29 Apr 2025 11:49:10 +0200 Subject: [PATCH 1364/2924] ASoC: simple-card-utils: Fix pointer check in graph_util_parse_link_direction Actually check if the passed pointers are valid, before writing to them. This also fixes a USBAN warning: UBSAN: invalid-load in ../sound/soc/fsl/imx-card.c:687:25 load of value 255 is not a valid value for type '_Bool' This is because playback_only is uninitialized and is not written to, as the playback-only property is absent. Fixes: 844de7eebe97 ("ASoC: audio-graph-card2: expand dai_link property part") Signed-off-by: Alexander Stein Link: https://patch.msgid.link/20250429094910.1150970-1-alexander.stein@ew.tq-group.com Signed-off-by: Mark Brown --- sound/soc/generic/simple-card-utils.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c index a1ccc300e68ca8..3ae2a212a2e38a 100644 --- a/sound/soc/generic/simple-card-utils.c +++ b/sound/soc/generic/simple-card-utils.c @@ -1174,9 +1174,9 @@ void graph_util_parse_link_direction(struct device_node *np, bool is_playback_only = of_property_read_bool(np, "playback-only"); bool is_capture_only = of_property_read_bool(np, "capture-only"); - if (is_playback_only) + if (playback_only) *playback_only = is_playback_only; - if (is_capture_only) + if (capture_only) *capture_only = is_capture_only; } EXPORT_SYMBOL_GPL(graph_util_parse_link_direction); From 7f91f012c1df07af6b915d1f8cece202774bb50e Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 1 May 2025 01:24:43 +0530 Subject: [PATCH 1365/2924] ASoC: amd: ps: fix for irq handler return status If any Soundwire manager interrupt is reported, and wake interrupt is not reported, in this scenario irq_flag will be set to zero, which results in interrupt handler return status as IRQ_NONE. Add new irq flag 'wake_irq_flag' check for SoundWire wake interrupt handling to fix incorrect irq handling return status. Fixes: 3898b189079c8 ("ASoC: amd: ps: add soundwire wake interrupt handling") Signed-off-by: Vijendar Mukunda Link: https://patch.msgid.link/20250430195517.3065308-1-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/ps/pci-ps.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/soc/amd/ps/pci-ps.c b/sound/soc/amd/ps/pci-ps.c index 8e57f31ef7f7b8..7936b31736323b 100644 --- a/sound/soc/amd/ps/pci-ps.c +++ b/sound/soc/amd/ps/pci-ps.c @@ -193,6 +193,7 @@ static irqreturn_t acp63_irq_handler(int irq, void *dev_id) struct amd_sdw_manager *amd_manager; u32 ext_intr_stat, ext_intr_stat1; u16 irq_flag = 0; + u16 wake_irq_flag = 0; u16 sdw_dma_irq_flag = 0; adata = dev_id; @@ -231,7 +232,7 @@ static irqreturn_t acp63_irq_handler(int irq, void *dev_id) } if (adata->acp_rev >= ACP70_PCI_REV) - irq_flag = check_and_handle_acp70_sdw_wake_irq(adata); + wake_irq_flag = check_and_handle_acp70_sdw_wake_irq(adata); if (ext_intr_stat & BIT(PDM_DMA_STAT)) { ps_pdm_data = dev_get_drvdata(&adata->pdm_dev->dev); @@ -245,7 +246,7 @@ static irqreturn_t acp63_irq_handler(int irq, void *dev_id) if (sdw_dma_irq_flag) return IRQ_WAKE_THREAD; - if (irq_flag) + if (irq_flag | wake_irq_flag) return IRQ_HANDLED; else return IRQ_NONE; From 4f79eaa2ceac86a0e0f304b0bab556cca5bf4f30 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 30 Apr 2025 15:56:34 -0700 Subject: [PATCH 1366/2924] kbuild: Properly disable -Wunterminated-string-initialization for clang MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clang and GCC have different behaviors around disabling warnings included in -Wall and -Wextra and the order in which flags are specified, which is exposed by clang's new support for -Wunterminated-string-initialization. $ cat test.c const char foo[3] = "FOO"; const char bar[3] __attribute__((__nonstring__)) = "BAR"; $ clang -fsyntax-only -Wextra test.c test.c:1:21: warning: initializer-string for character array is too long, array size is 3 but initializer has size 4 (including the null terminating character); did you mean to use the 'nonstring' attribute? [-Wunterminated-string-initialization] 1 | const char foo[3] = "FOO"; | ^~~~~ $ clang -fsyntax-only -Wextra -Wno-unterminated-string-initialization test.c $ clang -fsyntax-only -Wno-unterminated-string-initialization -Wextra test.c test.c:1:21: warning: initializer-string for character array is too long, array size is 3 but initializer has size 4 (including the null terminating character); did you mean to use the 'nonstring' attribute? [-Wunterminated-string-initialization] 1 | const char foo[3] = "FOO"; | ^~~~~ $ gcc -fsyntax-only -Wextra test.c test.c:1:21: warning: initializer-string for array of ‘char’ truncates NUL terminator but destination lacks ‘nonstring’ attribute (4 chars into 3 available) [-Wunterminated-string-initialization] 1 | const char foo[3] = "FOO"; | ^~~~~ $ gcc -fsyntax-only -Wextra -Wno-unterminated-string-initialization test.c $ gcc -fsyntax-only -Wno-unterminated-string-initialization -Wextra test.c Move -Wextra up right below -Wall in Makefile.extrawarn to ensure these flags are at the beginning of the warning options list. Move the couple of warning options that have been added to the main Makefile since commit e88ca24319e4 ("kbuild: consolidate warning flags in scripts/Makefile.extrawarn") to scripts/Makefile.extrawarn after -Wall / -Wextra to ensure they get properly disabled for all compilers. Fixes: 9d7a0577c9db ("gcc-15: disable '-Wunterminated-string-initialization' entirely for now") Link: https://github.com/llvm/llvm-project/issues/10359 Signed-off-by: Nathan Chancellor Signed-off-by: Linus Torvalds --- Makefile | 7 ------- scripts/Makefile.extrawarn | 9 ++++++++- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 5aa9ee52a765b7..94be5dfb81fb3c 100644 --- a/Makefile +++ b/Makefile @@ -1052,13 +1052,6 @@ NOSTDINC_FLAGS += -nostdinc # perform bounds checking. KBUILD_CFLAGS += $(call cc-option, -fstrict-flex-arrays=3) -#Currently, disable -Wstringop-overflow for GCC 11, globally. -KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERFLOW) += $(call cc-disable-warning, stringop-overflow) -KBUILD_CFLAGS-$(CONFIG_CC_STRINGOP_OVERFLOW) += $(call cc-option, -Wstringop-overflow) - -#Currently, disable -Wunterminated-string-initialization as broken -KBUILD_CFLAGS += $(call cc-disable-warning, unterminated-string-initialization) - # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += -fno-strict-overflow diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 2d6e59561c9d04..d88acdf4085524 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -8,6 +8,7 @@ # Default set of warnings, always enabled KBUILD_CFLAGS += -Wall +KBUILD_CFLAGS += -Wextra KBUILD_CFLAGS += -Wundef KBUILD_CFLAGS += -Werror=implicit-function-declaration KBUILD_CFLAGS += -Werror=implicit-int @@ -56,6 +57,13 @@ KBUILD_CFLAGS += -Wno-pointer-sign # globally built with -Wcast-function-type. KBUILD_CFLAGS += $(call cc-option, -Wcast-function-type) +# Currently, disable -Wstringop-overflow for GCC 11, globally. +KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERFLOW) += $(call cc-disable-warning, stringop-overflow) +KBUILD_CFLAGS-$(CONFIG_CC_STRINGOP_OVERFLOW) += $(call cc-option, -Wstringop-overflow) + +# Currently, disable -Wunterminated-string-initialization as broken +KBUILD_CFLAGS += $(call cc-disable-warning, unterminated-string-initialization) + # The allocators already balk at large sizes, so silence the compiler # warnings for bounds checks involving those possible values. While # -Wno-alloc-size-larger-than would normally be used here, earlier versions @@ -82,7 +90,6 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init) # Warn if there is an enum types mismatch KBUILD_CFLAGS += $(call cc-option,-Wenum-conversion) -KBUILD_CFLAGS += -Wextra KBUILD_CFLAGS += -Wunused # From 4a9c3c3215491f25bc66d615faa921c814b1a479 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Wed, 30 Apr 2025 10:53:25 +0100 Subject: [PATCH 1367/2924] clk: sunxi-ng: fix order of arguments in clock macro When introducing the SUNXI_CCU_MP_DATA_WITH_MUX_GATE_FEAT macro, the order of the last two arguments was different between the users and the definition: features became flags and flags became features. This just didn't end up in a disaster yet because most users ended up passing 0 for both arguments, and other clocks (for the new A523 SoC) are not yet used. Swap the order of the arguments in the definition, so that users stay untouched. Fixes: cdbb9d0d09db ("clk: sunxi-ng: mp: provide wrappers for setting feature flags") Signed-off-by: Andre Przywara Reviewed-by: Jernej Skrabec Link: https://patch.msgid.link/20250430095325.477311-1-andre.przywara@arm.com [wens@csie.org: fix typo in commit message] Signed-off-by: Chen-Yu Tsai --- drivers/clk/sunxi-ng/ccu_mp.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/clk/sunxi-ng/ccu_mp.h b/drivers/clk/sunxi-ng/ccu_mp.h index b35aeec70484d6..8fc7fdb7ef4941 100644 --- a/drivers/clk/sunxi-ng/ccu_mp.h +++ b/drivers/clk/sunxi-ng/ccu_mp.h @@ -109,8 +109,7 @@ struct ccu_mp { _mshift, _mwidth, \ _pshift, _pwidth, \ _muxshift, _muxwidth, \ - _gate, _features, \ - _flags) \ + _gate, _flags, _features) \ struct ccu_mp _struct = { \ .enable = _gate, \ .m = _SUNXI_CCU_DIV(_mshift, _mwidth), \ From 05450c48a35810c5025cffb41dbf566cfb122415 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 30 Apr 2025 23:18:49 -0400 Subject: [PATCH 1368/2924] bcachefs: Kill ERO in __bch2_i_sectors_acct() We won't be root causing this in the immediate future, and it's fairly innocuous - so just log it in the superblock. https://github.com/koverstreet/bcachefs/issues/869 Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io.c | 23 +++++++++++++++++++---- fs/bcachefs/sb-errors_format.h | 5 ++++- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 65c2c33d253dff..28619b6bd32703 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -144,10 +144,25 @@ int __must_check bch2_write_inode_size(struct bch_fs *c, void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, struct quota_res *quota_res, s64 sectors) { - bch2_fs_inconsistent_on((s64) inode->v.i_blocks + sectors < 0, c, - "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)", - inode->v.i_ino, (u64) inode->v.i_blocks, sectors, - inode->ei_inode.bi_sectors); + if (unlikely((s64) inode->v.i_blocks + sectors < 0)) { + struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + prt_printf(&buf, "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)", + inode->v.i_ino, (u64) inode->v.i_blocks, sectors, + inode->ei_inode.bi_sectors); + + bool repeat = false, print = false, suppress = false; + bch2_count_fsck_err(c, vfs_inode_i_blocks_underflow, buf.buf, &repeat, &print, &suppress); + if (print) + bch2_print_str(c, buf.buf); + printbuf_exit(&buf); + + if (sectors < 0) + sectors = -inode->v.i_blocks; + else + sectors = 0; + } + inode->v.i_blocks += sectors; #ifdef CONFIG_BCACHEFS_QUOTA diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index a4ad5924107b23..582e77cbaf8fc3 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -236,6 +236,7 @@ enum bch_fsck_flags { x(inode_has_child_snapshots_wrong, 287, 0) \ x(inode_unreachable, 210, FSCK_AUTOFIX) \ x(inode_journal_seq_in_future, 299, FSCK_AUTOFIX) \ + x(vfs_inode_i_blocks_underflow, 311, FSCK_AUTOFIX) \ x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \ x(deleted_inode_missing, 212, FSCK_AUTOFIX) \ x(deleted_inode_is_dir, 213, FSCK_AUTOFIX) \ @@ -317,7 +318,9 @@ enum bch_fsck_flags { x(directory_size_mismatch, 303, FSCK_AUTOFIX) \ x(dirent_cf_name_too_big, 304, 0) \ x(dirent_stray_data_after_cf_name, 305, 0) \ - x(MAX, 308, 0) + x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \ + x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \ + x(MAX, 312, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, From 3a72e369412d6bf7f6a8af410ac4bdd7d48deb62 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 30 Apr 2025 23:56:00 -0400 Subject: [PATCH 1369/2924] bcachefs: check for inode.bi_sectors underflow Signed-off-by: Kent Overstreet --- fs/bcachefs/io_write.c | 21 +++++++++++++++++++++ fs/bcachefs/sb-errors_format.h | 3 ++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index a418fa62f09d23..c1237da079ede2 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -255,6 +255,27 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, } if (i_sectors_delta) { + s64 bi_sectors = le64_to_cpu(inode->v.bi_sectors); + if (unlikely(bi_sectors + i_sectors_delta < 0)) { + struct bch_fs *c = trans->c; + struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + prt_printf(&buf, "inode %llu i_sectors underflow: %lli + %lli < 0", + extent_iter->pos.inode, bi_sectors, i_sectors_delta); + + bool repeat = false, print = false, suppress = false; + bch2_count_fsck_err(c, inode_i_sectors_underflow, buf.buf, + &repeat, &print, &suppress); + if (print) + bch2_print_str(c, buf.buf); + printbuf_exit(&buf); + + if (i_sectors_delta < 0) + i_sectors_delta = -bi_sectors; + else + i_sectors_delta = 0; + } + le64_add_cpu(&inode->v.bi_sectors, i_sectors_delta); inode_update_flags = 0; } diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 582e77cbaf8fc3..822a0b42432d90 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -236,6 +236,7 @@ enum bch_fsck_flags { x(inode_has_child_snapshots_wrong, 287, 0) \ x(inode_unreachable, 210, FSCK_AUTOFIX) \ x(inode_journal_seq_in_future, 299, FSCK_AUTOFIX) \ + x(inode_i_sectors_underflow, 312, FSCK_AUTOFIX) \ x(vfs_inode_i_blocks_underflow, 311, FSCK_AUTOFIX) \ x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \ x(deleted_inode_missing, 212, FSCK_AUTOFIX) \ @@ -320,7 +321,7 @@ enum bch_fsck_flags { x(dirent_stray_data_after_cf_name, 305, 0) \ x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \ x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \ - x(MAX, 312, 0) + x(MAX, 313, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, From e660d7ca7488214ae19d964d196f89b4237ec829 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 1 May 2025 00:01:29 -0400 Subject: [PATCH 1370/2924] bcachefs: Kill ERO for i_blocks check in truncate Replace with logging the error in the superblock. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io.c | 21 ++++++++++++++++----- fs/bcachefs/sb-errors_format.h | 3 ++- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 28619b6bd32703..9657144666b8b2 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -517,11 +517,22 @@ int bchfs_truncate(struct mnt_idmap *idmap, goto err; } - bch2_fs_inconsistent_on(!inode->v.i_size && inode->v.i_blocks && - !bch2_journal_error(&c->journal), c, - "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)", - inode->v.i_ino, (u64) inode->v.i_blocks, - inode->ei_inode.bi_sectors); + if (unlikely(!inode->v.i_size && inode->v.i_blocks && + !bch2_journal_error(&c->journal))) { + struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + prt_printf(&buf, + "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)", + inode->v.i_ino, (u64) inode->v.i_blocks, + inode->ei_inode.bi_sectors); + + bool repeat = false, print = false, suppress = false; + bch2_count_fsck_err(c, vfs_inode_i_blocks_not_zero_at_truncate, buf.buf, + &repeat, &print, &suppress); + if (print) + bch2_print_str(c, buf.buf); + printbuf_exit(&buf); + } ret = bch2_setattr_nonsize(idmap, inode, iattr); err: diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 822a0b42432d90..3b69a924086fd5 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -238,6 +238,7 @@ enum bch_fsck_flags { x(inode_journal_seq_in_future, 299, FSCK_AUTOFIX) \ x(inode_i_sectors_underflow, 312, FSCK_AUTOFIX) \ x(vfs_inode_i_blocks_underflow, 311, FSCK_AUTOFIX) \ + x(vfs_inode_i_blocks_not_zero_at_truncate, 313, FSCK_AUTOFIX) \ x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \ x(deleted_inode_missing, 212, FSCK_AUTOFIX) \ x(deleted_inode_is_dir, 213, FSCK_AUTOFIX) \ @@ -321,7 +322,7 @@ enum bch_fsck_flags { x(dirent_stray_data_after_cf_name, 305, 0) \ x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \ x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \ - x(MAX, 313, 0) + x(MAX, 314, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, From 87ec7d5249bb8ebf40261420da069fa238c21789 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 3 Apr 2025 13:25:24 +0200 Subject: [PATCH 1371/2924] KVM: RISC-V: reset smstateen CSRs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not resetting smstateen is a potential security hole, because VU might be able to access state that VS does not properly context-switch. Fixes: 81f0f314fec9 ("RISCV: KVM: Add sstateen0 context save/restore") Signed-off-by: Radim Krčmář Link: https://lore.kernel.org/r/20250403112522.1566629-8-rkrcmar@ventanamicro.com Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 60d684c76c5873..02635bac91f175 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -77,6 +77,8 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) memcpy(cntx, reset_cntx, sizeof(*cntx)); spin_unlock(&vcpu->arch.reset_cntx_lock); + memset(&vcpu->arch.smstateen_csr, 0, sizeof(vcpu->arch.smstateen_csr)); + kvm_riscv_vcpu_fp_reset(vcpu); kvm_riscv_vcpu_vector_reset(vcpu); From c59f7c9661b9d3ee33a21d7b4f1dd4b77079e3e7 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 30 Apr 2025 20:15:48 -0300 Subject: [PATCH 1372/2924] smb: client: ensure aligned IO sizes Make all IO sizes multiple of PAGE_SIZE, either negotiated by the server or passed through rsize, wsize and bsize mount options, to prevent from breaking DIO reads and writes against servers that enforce alignment as specified in MS-FSA 2.1.5.3 and 2.1.5.4. Cc: linux-cifs@vger.kernel.org Reviewed-by: David Howells Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/connect.c | 23 +------------------ fs/smb/client/file.c | 6 ++--- fs/smb/client/fs_context.c | 25 +++++--------------- fs/smb/client/fs_context.h | 47 ++++++++++++++++++++++++++++++++++++++ fs/smb/client/smb1ops.c | 8 +++---- fs/smb/client/smb2pdu.c | 8 ++----- 6 files changed, 62 insertions(+), 55 deletions(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index df976ce6aed92f..6bf04d9a549138 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -3753,28 +3753,7 @@ int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx) } } - /* - * Clamp the rsize/wsize mount arguments if they are too big for the server - * and set the rsize/wsize to the negotiated values if not passed in by - * the user on mount - */ - if ((cifs_sb->ctx->wsize == 0) || - (cifs_sb->ctx->wsize > server->ops->negotiate_wsize(tcon, ctx))) { - cifs_sb->ctx->wsize = - round_down(server->ops->negotiate_wsize(tcon, ctx), PAGE_SIZE); - /* - * in the very unlikely event that the server sent a max write size under PAGE_SIZE, - * (which would get rounded down to 0) then reset wsize to absolute minimum eg 4096 - */ - if (cifs_sb->ctx->wsize == 0) { - cifs_sb->ctx->wsize = PAGE_SIZE; - cifs_dbg(VFS, "wsize too small, reset to minimum ie PAGE_SIZE, usually 4096\n"); - } - } - if ((cifs_sb->ctx->rsize == 0) || - (cifs_sb->ctx->rsize > server->ops->negotiate_rsize(tcon, ctx))) - cifs_sb->ctx->rsize = server->ops->negotiate_rsize(tcon, ctx); - + cifs_negotiate_iosize(server, cifs_sb->ctx, tcon); /* * The cookie is initialized from volume info returned above. * Inside cifs_fscache_get_super_cookie it checks diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 9e8f404b9e56ae..851b74f557c10e 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -160,10 +160,8 @@ static int cifs_prepare_read(struct netfs_io_subrequest *subreq) server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses); rdata->server = server; - if (cifs_sb->ctx->rsize == 0) - cifs_sb->ctx->rsize = - server->ops->negotiate_rsize(tlink_tcon(req->cfile->tlink), - cifs_sb->ctx); + cifs_negotiate_rsize(server, cifs_sb->ctx, + tlink_tcon(req->cfile->tlink)); rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, &size, &rdata->credits); diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 2980941b96679c..a634a34d4086a0 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1021,6 +1021,7 @@ static int smb3_reconfigure(struct fs_context *fc) struct dentry *root = fc->root; struct cifs_sb_info *cifs_sb = CIFS_SB(root->d_sb); struct cifs_ses *ses = cifs_sb_master_tcon(cifs_sb)->ses; + unsigned int rsize = ctx->rsize, wsize = ctx->wsize; char *new_password = NULL, *new_password2 = NULL; bool need_recon = false; int rc; @@ -1103,11 +1104,8 @@ static int smb3_reconfigure(struct fs_context *fc) STEAL_STRING(cifs_sb, ctx, iocharset); /* if rsize or wsize not passed in on remount, use previous values */ - if (ctx->rsize == 0) - ctx->rsize = cifs_sb->ctx->rsize; - if (ctx->wsize == 0) - ctx->wsize = cifs_sb->ctx->wsize; - + ctx->rsize = rsize ? CIFS_ALIGN_RSIZE(fc, rsize) : cifs_sb->ctx->rsize; + ctx->wsize = wsize ? CIFS_ALIGN_WSIZE(fc, wsize) : cifs_sb->ctx->wsize; smb3_cleanup_fs_context_contents(cifs_sb->ctx); rc = smb3_fs_context_dup(cifs_sb->ctx, ctx); @@ -1312,7 +1310,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, __func__); goto cifs_parse_mount_err; } - ctx->bsize = result.uint_32; + ctx->bsize = CIFS_ALIGN_BSIZE(fc, result.uint_32); ctx->got_bsize = true; break; case Opt_rasize: @@ -1336,24 +1334,13 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->rasize = result.uint_32; break; case Opt_rsize: - ctx->rsize = result.uint_32; + ctx->rsize = CIFS_ALIGN_RSIZE(fc, result.uint_32); ctx->got_rsize = true; ctx->vol_rsize = ctx->rsize; break; case Opt_wsize: - ctx->wsize = result.uint_32; + ctx->wsize = CIFS_ALIGN_WSIZE(fc, result.uint_32); ctx->got_wsize = true; - if (ctx->wsize % PAGE_SIZE != 0) { - ctx->wsize = round_down(ctx->wsize, PAGE_SIZE); - if (ctx->wsize == 0) { - ctx->wsize = PAGE_SIZE; - cifs_dbg(VFS, "wsize too small, reset to minimum %ld\n", PAGE_SIZE); - } else { - cifs_dbg(VFS, - "wsize rounded down to %d to multiple of PAGE_SIZE %ld\n", - ctx->wsize, PAGE_SIZE); - } - } ctx->vol_wsize = ctx->wsize; break; case Opt_acregmax: diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h index d1d29249bcdb97..9e83302ce4b801 100644 --- a/fs/smb/client/fs_context.h +++ b/fs/smb/client/fs_context.h @@ -20,6 +20,21 @@ cifs_dbg(VFS, fmt, ## __VA_ARGS__); \ } while (0) +static inline size_t cifs_io_align(struct fs_context *fc, + const char *name, size_t size) +{ + if (!size || !IS_ALIGNED(size, PAGE_SIZE)) { + cifs_errorf(fc, "unaligned %s, making it a multiple of %lu bytes\n", + name, PAGE_SIZE); + size = umax(round_down(size, PAGE_SIZE), PAGE_SIZE); + } + return size; +} + +#define CIFS_ALIGN_WSIZE(_fc, _size) cifs_io_align(_fc, "wsize", _size) +#define CIFS_ALIGN_RSIZE(_fc, _size) cifs_io_align(_fc, "rsize", _size) +#define CIFS_ALIGN_BSIZE(_fc, _size) cifs_io_align(_fc, "bsize", _size) + enum smb_version { Smb_1 = 1, Smb_20, @@ -361,4 +376,36 @@ static inline void cifs_mount_unlock(void) mutex_unlock(&cifs_mount_mutex); } +static inline void cifs_negotiate_rsize(struct TCP_Server_Info *server, + struct smb3_fs_context *ctx, + struct cifs_tcon *tcon) +{ + unsigned int size; + + size = umax(server->ops->negotiate_rsize(tcon, ctx), PAGE_SIZE); + if (ctx->rsize) + size = umax(umin(ctx->rsize, size), PAGE_SIZE); + ctx->rsize = round_down(size, PAGE_SIZE); +} + +static inline void cifs_negotiate_wsize(struct TCP_Server_Info *server, + struct smb3_fs_context *ctx, + struct cifs_tcon *tcon) +{ + unsigned int size; + + size = umax(server->ops->negotiate_wsize(tcon, ctx), PAGE_SIZE); + if (ctx->wsize) + size = umax(umin(ctx->wsize, size), PAGE_SIZE); + ctx->wsize = round_down(size, PAGE_SIZE); +} + +static inline void cifs_negotiate_iosize(struct TCP_Server_Info *server, + struct smb3_fs_context *ctx, + struct cifs_tcon *tcon) +{ + cifs_negotiate_rsize(server, ctx, tcon); + cifs_negotiate_wsize(server, ctx, tcon); +} + #endif diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index ab26b9e9b0e70f..b27a182629ece9 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -432,7 +432,7 @@ cifs_negotiate(const unsigned int xid, } static unsigned int -cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) +smb1_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) { __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); struct TCP_Server_Info *server = tcon->ses->server; @@ -467,7 +467,7 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) } static unsigned int -cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) +smb1_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) { __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); struct TCP_Server_Info *server = tcon->ses->server; @@ -1342,8 +1342,8 @@ struct smb_version_operations smb1_operations = { .check_trans2 = cifs_check_trans2, .need_neg = cifs_need_neg, .negotiate = cifs_negotiate, - .negotiate_wsize = cifs_negotiate_wsize, - .negotiate_rsize = cifs_negotiate_rsize, + .negotiate_wsize = smb1_negotiate_wsize, + .negotiate_rsize = smb1_negotiate_rsize, .sess_setup = CIFS_SessSetup, .logoff = CIFSSMBLogoff, .tree_connect = CIFSTCon, diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index a9a49555d43bf6..0b35816d551f7a 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -4093,12 +4093,8 @@ static void cifs_renegotiate_iosize(struct TCP_Server_Info *server, return; spin_lock(&tcon->sb_list_lock); - list_for_each_entry(cifs_sb, &tcon->cifs_sb_list, tcon_sb_link) { - cifs_sb->ctx->rsize = - server->ops->negotiate_rsize(tcon, cifs_sb->ctx); - cifs_sb->ctx->wsize = - server->ops->negotiate_wsize(tcon, cifs_sb->ctx); - } + list_for_each_entry(cifs_sb, &tcon->cifs_sb_list, tcon_sb_link) + cifs_negotiate_iosize(server, cifs_sb->ctx, tcon); spin_unlock(&tcon->sb_list_lock); } From 927069d5c40c1cfa7b2d13cfc6d7d58bc6f85c50 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Wed, 30 Apr 2025 10:03:43 -0700 Subject: [PATCH 1373/2924] bnxt_en: fix module unload sequence Recent updates to the PTP part of bnxt changed the way PTP FIFO is cleared, skbs waiting for TX timestamps are now cleared during ndo_close() call. To do clearing procedure, the ptp structure must exist and point to a valid address. Module destroy sequence had ptp clear code running before netdev close causing invalid memory access and kernel crash. Change the sequence to destroy ptp structure after device close. Fixes: 8f7ae5a85137 ("bnxt_en: improve TX timestamping FIFO configuration") Reported-by: Taehee Yoo Closes: https://lore.kernel.org/netdev/CAMArcTWDe2cd41=ub=zzvYifaYcYv-N-csxfqxUvejy_L0D6UQ@mail.gmail.com/ Signed-off-by: Vadim Fedorenko Reviewed-by: Jacob Keller Reviewed-by: Michael Chan Tested-by: Taehee Yoo Link: https://patch.msgid.link/20250430170343.759126-1-vadfed@meta.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 78e496b0ec2625..86a5de44b6f3fe 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -16006,8 +16006,8 @@ static void bnxt_remove_one(struct pci_dev *pdev) bnxt_rdma_aux_device_del(bp); - bnxt_ptp_clear(bp); unregister_netdev(dev); + bnxt_ptp_clear(bp); bnxt_rdma_aux_device_uninit(bp); From f920436a44295ca791ebb6dae3f4190142eec703 Mon Sep 17 00:00:00 2001 From: Jibin Zhang Date: Tue, 29 Apr 2025 09:59:48 +0800 Subject: [PATCH 1374/2924] net: use sock_gen_put() when sk_state is TCP_TIME_WAIT It is possible for a pointer of type struct inet_timewait_sock to be returned from the functions __inet_lookup_established() and __inet6_lookup_established(). This can cause a crash when the returned pointer is of type struct inet_timewait_sock and sock_put() is called on it. The following is a crash call stack that shows sk->sk_wmem_alloc being accessed in sk_free() during the call to sock_put() on a struct inet_timewait_sock pointer. To avoid this issue, use sock_gen_put() instead of sock_put() when sk->sk_state is TCP_TIME_WAIT. mrdump.ko ipanic() + 120 vmlinux notifier_call_chain(nr_to_call=-1, nr_calls=0) + 132 vmlinux atomic_notifier_call_chain(val=0) + 56 vmlinux panic() + 344 vmlinux add_taint() + 164 vmlinux end_report() + 136 vmlinux kasan_report(size=0) + 236 vmlinux report_tag_fault() + 16 vmlinux do_tag_recovery() + 16 vmlinux __do_kernel_fault() + 88 vmlinux do_bad_area() + 28 vmlinux do_tag_check_fault() + 60 vmlinux do_mem_abort() + 80 vmlinux el1_abort() + 56 vmlinux el1h_64_sync_handler() + 124 vmlinux > 0xFFFFFFC080011294() vmlinux __lse_atomic_fetch_add_release(v=0xF2FFFF82A896087C) vmlinux __lse_atomic_fetch_sub_release(v=0xF2FFFF82A896087C) vmlinux arch_atomic_fetch_sub_release(i=1, v=0xF2FFFF82A896087C) + 8 vmlinux raw_atomic_fetch_sub_release(i=1, v=0xF2FFFF82A896087C) + 8 vmlinux atomic_fetch_sub_release(i=1, v=0xF2FFFF82A896087C) + 8 vmlinux __refcount_sub_and_test(i=1, r=0xF2FFFF82A896087C, oldp=0) + 8 vmlinux __refcount_dec_and_test(r=0xF2FFFF82A896087C, oldp=0) + 8 vmlinux refcount_dec_and_test(r=0xF2FFFF82A896087C) + 8 vmlinux sk_free(sk=0xF2FFFF82A8960700) + 28 vmlinux sock_put() + 48 vmlinux tcp6_check_fraglist_gro() + 236 vmlinux tcp6_gro_receive() + 624 vmlinux ipv6_gro_receive() + 912 vmlinux dev_gro_receive() + 1116 vmlinux napi_gro_receive() + 196 ccmni.ko ccmni_rx_callback() + 208 ccmni.ko ccmni_queue_recv_skb() + 388 ccci_dpmaif.ko dpmaif_rxq_push_thread() + 1088 vmlinux kthread() + 268 vmlinux 0xFFFFFFC08001F30C() Fixes: c9d1d23e5239 ("net: add heuristic for enabling TCP fraglist GRO") Signed-off-by: Jibin Zhang Signed-off-by: Shiming Cheng Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250429020412.14163-1-shiming.cheng@mediatek.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_offload.c | 2 +- net/ipv6/tcpv6_offload.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 934f777f29d36c..d293087b426df7 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -439,7 +439,7 @@ static void tcp4_check_fraglist_gro(struct list_head *head, struct sk_buff *skb, iif, sdif); NAPI_GRO_CB(skb)->is_flist = !sk; if (sk) - sock_put(sk); + sock_gen_put(sk); } INDIRECT_CALLABLE_SCOPE diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index d9b11fe41bf0c9..a8a04f441e7888 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -42,7 +42,7 @@ static void tcp6_check_fraglist_gro(struct list_head *head, struct sk_buff *skb, iif, sdif); NAPI_GRO_CB(skb)->is_flist = !sk; if (sk) - sock_put(sk); + sock_gen_put(sk); #endif /* IS_ENABLED(CONFIG_IPV6) */ } From e98386d79a23c57cf179fe4138322e277aa3aa74 Mon Sep 17 00:00:00 2001 From: Sagi Maimon Date: Tue, 29 Apr 2025 10:33:20 +0300 Subject: [PATCH 1375/2924] ptp: ocp: Fix NULL dereference in Adva board SMA sysfs operations On Adva boards, SMA sysfs store/get operations can call __handle_signal_outputs() or __handle_signal_inputs() while the `irig` and `dcf` pointers are uninitialized, leading to a NULL pointer dereference in __handle_signal() and causing a kernel crash. Adva boards don't use `irig` or `dcf` functionality, so add Adva-specific callbacks `ptp_ocp_sma_adva_set_outputs()` and `ptp_ocp_sma_adva_set_inputs()` that avoid invoking `irig` or `dcf` input/output routines. Fixes: ef61f5528fca ("ptp: ocp: add Adva timecard support") Signed-off-by: Sagi Maimon Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250429073320.33277-1-maimon.sagi@gmail.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_ocp.c | 52 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index faf6e027f89ab4..2ccdca4f69604b 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -2578,12 +2578,60 @@ static const struct ocp_sma_op ocp_fb_sma_op = { .set_output = ptp_ocp_sma_fb_set_output, }; +static int +ptp_ocp_sma_adva_set_output(struct ptp_ocp *bp, int sma_nr, u32 val) +{ + u32 reg, mask, shift; + unsigned long flags; + u32 __iomem *gpio; + + gpio = sma_nr > 2 ? &bp->sma_map1->gpio2 : &bp->sma_map2->gpio2; + shift = sma_nr & 1 ? 0 : 16; + + mask = 0xffff << (16 - shift); + + spin_lock_irqsave(&bp->lock, flags); + + reg = ioread32(gpio); + reg = (reg & mask) | (val << shift); + + iowrite32(reg, gpio); + + spin_unlock_irqrestore(&bp->lock, flags); + + return 0; +} + +static int +ptp_ocp_sma_adva_set_inputs(struct ptp_ocp *bp, int sma_nr, u32 val) +{ + u32 reg, mask, shift; + unsigned long flags; + u32 __iomem *gpio; + + gpio = sma_nr > 2 ? &bp->sma_map2->gpio1 : &bp->sma_map1->gpio1; + shift = sma_nr & 1 ? 0 : 16; + + mask = 0xffff << (16 - shift); + + spin_lock_irqsave(&bp->lock, flags); + + reg = ioread32(gpio); + reg = (reg & mask) | (val << shift); + + iowrite32(reg, gpio); + + spin_unlock_irqrestore(&bp->lock, flags); + + return 0; +} + static const struct ocp_sma_op ocp_adva_sma_op = { .tbl = { ptp_ocp_adva_sma_in, ptp_ocp_adva_sma_out }, .init = ptp_ocp_sma_fb_init, .get = ptp_ocp_sma_fb_get, - .set_inputs = ptp_ocp_sma_fb_set_inputs, - .set_output = ptp_ocp_sma_fb_set_output, + .set_inputs = ptp_ocp_sma_adva_set_inputs, + .set_output = ptp_ocp_sma_adva_set_output, }; static int From 2d52e2e38b85c8b7bc00dca55c2499f46f8c8198 Mon Sep 17 00:00:00 2001 From: Thangaraj Samynathan Date: Tue, 29 Apr 2025 10:55:27 +0530 Subject: [PATCH 1376/2924] net: lan743x: Fix memleak issue when GSO enabled Always map the `skb` to the LS descriptor. Previously skb was mapped to EXT descriptor when the number of fragments is zero with GSO enabled. Mapping the skb to EXT descriptor prevents it from being freed, leading to a memory leak Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver") Signed-off-by: Thangaraj Samynathan Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250429052527.10031-1-thangaraj.s@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan743x_main.c | 8 ++++++-- drivers/net/ethernet/microchip/lan743x_main.h | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 23760b613d3ecf..e2d6bfb5d69334 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -1815,6 +1815,7 @@ static void lan743x_tx_frame_add_lso(struct lan743x_tx *tx, if (nr_frags <= 0) { tx->frame_data0 |= TX_DESC_DATA0_LS_; tx->frame_data0 |= TX_DESC_DATA0_IOC_; + tx->frame_last = tx->frame_first; } tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail]; tx_descriptor->data0 = cpu_to_le32(tx->frame_data0); @@ -1884,6 +1885,7 @@ static int lan743x_tx_frame_add_fragment(struct lan743x_tx *tx, tx->frame_first = 0; tx->frame_data0 = 0; tx->frame_tail = 0; + tx->frame_last = 0; return -ENOMEM; } @@ -1924,16 +1926,18 @@ static void lan743x_tx_frame_end(struct lan743x_tx *tx, TX_DESC_DATA0_DTYPE_DATA_) { tx->frame_data0 |= TX_DESC_DATA0_LS_; tx->frame_data0 |= TX_DESC_DATA0_IOC_; + tx->frame_last = tx->frame_tail; } - tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail]; - buffer_info = &tx->buffer_info[tx->frame_tail]; + tx_descriptor = &tx->ring_cpu_ptr[tx->frame_last]; + buffer_info = &tx->buffer_info[tx->frame_last]; buffer_info->skb = skb; if (time_stamp) buffer_info->flags |= TX_BUFFER_INFO_FLAG_TIMESTAMP_REQUESTED; if (ignore_sync) buffer_info->flags |= TX_BUFFER_INFO_FLAG_IGNORE_SYNC; + tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail]; tx_descriptor->data0 = cpu_to_le32(tx->frame_data0); tx->frame_tail = lan743x_tx_next_index(tx, tx->frame_tail); tx->last_tail = tx->frame_tail; diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h index 7f73d66854bee4..db5fc73e41cca5 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.h +++ b/drivers/net/ethernet/microchip/lan743x_main.h @@ -980,6 +980,7 @@ struct lan743x_tx { u32 frame_first; u32 frame_data0; u32 frame_tail; + u32 frame_last; struct lan743x_tx_buffer_info *buffer_info; From a179aad12badc43201cbf45d1e8ed2c1383c76b9 Mon Sep 17 00:00:00 2001 From: Mattias Barthel Date: Tue, 29 Apr 2025 11:08:26 +0200 Subject: [PATCH 1377/2924] net: fec: ERR007885 Workaround for conventional TX Activate TX hang workaround also in fec_enet_txq_submit_skb() when TSO is not enabled. Errata: ERR007885 Symptoms: NETDEV WATCHDOG: eth0 (fec): transmit queue 0 timed out commit 37d6017b84f7 ("net: fec: Workaround for imx6sx enet tx hang when enable three queues") There is a TDAR race condition for mutliQ when the software sets TDAR and the UDMA clears TDAR simultaneously or in a small window (2-4 cycles). This will cause the udma_tx and udma_tx_arbiter state machines to hang. So, the Workaround is checking TDAR status four time, if TDAR cleared by hardware and then write TDAR, otherwise don't set TDAR. Fixes: 53bb20d1faba ("net: fec: add variable reg_desc_active to speed things up") Signed-off-by: Mattias Barthel Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250429090826.3101258-1-mattiasbarthel@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fec_main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index a86cfebedaa8b5..17e9bddb9ddd58 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -714,7 +714,12 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq, txq->bd.cur = bdp; /* Trigger transmission start */ - writel(0, txq->bd.reg_desc_active); + if (!(fep->quirks & FEC_QUIRK_ERR007885) || + !readl(txq->bd.reg_desc_active) || + !readl(txq->bd.reg_desc_active) || + !readl(txq->bd.reg_desc_active) || + !readl(txq->bd.reg_desc_active)) + writel(0, txq->bd.reg_desc_active); return 0; } From 34f42736b325287a7b2ce37e415838f539767bda Mon Sep 17 00:00:00 2001 From: Sathesh B Edara Date: Tue, 29 Apr 2025 04:46:24 -0700 Subject: [PATCH 1378/2924] octeon_ep: Fix host hang issue during device reboot When the host loses heartbeat messages from the device, the driver calls the device-specific ndo_stop function, which frees the resources. If the driver is unloaded in this scenario, it calls ndo_stop again, attempting to free resources that have already been freed, leading to a host hang issue. To resolve this, dev_close should be called instead of the device-specific stop function.dev_close internally calls ndo_stop to stop the network interface and performs additional cleanup tasks. During the driver unload process, if the device is already down, ndo_stop is not called. Fixes: 5cb96c29aa0e ("octeon_ep: add heartbeat monitor") Signed-off-by: Sathesh B Edara Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250429114624.19104-1-sedara@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 0a679e95196fed..24499bb36c0057 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -1223,7 +1223,7 @@ static void octep_hb_timeout_task(struct work_struct *work) miss_cnt); rtnl_lock(); if (netif_running(oct->netdev)) - octep_stop(oct->netdev); + dev_close(oct->netdev); rtnl_unlock(); } From ef2383d078edcbe3055032436b16cdf206f26de2 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Wed, 30 Apr 2025 17:30:49 +0800 Subject: [PATCH 1379/2924] net: hns3: store rx VLAN tag offload state for VF The VF driver missed to store the rx VLAN tag strip state when user change the rx VLAN tag offload state. And it will default to enable the rx vlan tag strip when re-init VF device after reset. So if user disable rx VLAN tag offload, and trig reset, then the HW will still strip the VLAN tag from packet nad fill into RX BD, but the VF driver will ignore it for rx VLAN tag offload disabled. It may cause the rx VLAN tag dropped. Fixes: b2641e2ad456 ("net: hns3: Add support of hardware rx-vlan-offload to HNS3 VF driver") Signed-off-by: Jian Shen Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250430093052.2400464-2-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- .../hisilicon/hns3/hns3vf/hclgevf_main.c | 25 ++++++++++++++----- .../hisilicon/hns3/hns3vf/hclgevf_main.h | 1 + 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 9ba767740a043f..dada42e7e0ec96 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -1292,9 +1292,8 @@ static void hclgevf_sync_vlan_filter(struct hclgevf_dev *hdev) rtnl_unlock(); } -static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) +static int hclgevf_en_hw_strip_rxvtag_cmd(struct hclgevf_dev *hdev, bool enable) { - struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); struct hclge_vf_to_pf_msg send_msg; hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_VLAN, @@ -1303,6 +1302,19 @@ static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) return hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0); } +static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) +{ + struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + int ret; + + ret = hclgevf_en_hw_strip_rxvtag_cmd(hdev, enable); + if (ret) + return ret; + + hdev->rxvtag_strip_en = enable; + return 0; +} + static int hclgevf_reset_tqp(struct hnae3_handle *handle) { #define HCLGEVF_RESET_ALL_QUEUE_DONE 1U @@ -2204,12 +2216,13 @@ static int hclgevf_rss_init_hw(struct hclgevf_dev *hdev) tc_valid, tc_size); } -static int hclgevf_init_vlan_config(struct hclgevf_dev *hdev) +static int hclgevf_init_vlan_config(struct hclgevf_dev *hdev, + bool rxvtag_strip_en) { struct hnae3_handle *nic = &hdev->nic; int ret; - ret = hclgevf_en_hw_strip_rxvtag(nic, true); + ret = hclgevf_en_hw_strip_rxvtag(nic, rxvtag_strip_en); if (ret) { dev_err(&hdev->pdev->dev, "failed to enable rx vlan offload, ret = %d\n", ret); @@ -2879,7 +2892,7 @@ static int hclgevf_reset_hdev(struct hclgevf_dev *hdev) if (ret) return ret; - ret = hclgevf_init_vlan_config(hdev); + ret = hclgevf_init_vlan_config(hdev, hdev->rxvtag_strip_en); if (ret) { dev_err(&hdev->pdev->dev, "failed(%d) to initialize VLAN config\n", ret); @@ -2994,7 +3007,7 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) goto err_config; } - ret = hclgevf_init_vlan_config(hdev); + ret = hclgevf_init_vlan_config(hdev, true); if (ret) { dev_err(&hdev->pdev->dev, "failed(%d) to initialize VLAN config\n", ret); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index cccef32284616b..0208425ab594f5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -253,6 +253,7 @@ struct hclgevf_dev { int *vector_irq; bool gro_en; + bool rxvtag_strip_en; unsigned long vlan_del_fail_bmap[BITS_TO_LONGS(VLAN_N_VID)]; From 8e6b9c6ea5a55045eed6526d8ee49e93192d1a58 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Wed, 30 Apr 2025 17:30:50 +0800 Subject: [PATCH 1380/2924] net: hns3: fix an interrupt residual problem When a VF is passthrough to a VM, and the VM is killed, the reported interrupt may not been handled, it will remain, and won't be clear by the nic engine even with a flr or tqp reset. When the VM restart, the interrupt of the first vector may be dropped by the second enable_irq in vfio, see the issue below: https://gitlab.com/qemu-project/qemu/-/issues/2884#note_2423361621 We notice that the vfio has always behaved this way, and the interrupt is a residue of the nic engine, so we fix the problem by moving the vector enable process out of the enable_irq loop. Fixes: 08a100689d4b ("net: hns3: re-organize vector handle") Signed-off-by: Yonglong Liu Signed-off-by: Jijie Shao Link: https://patch.msgid.link/20250430093052.2400464-3-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/hisilicon/hns3/hns3_enet.c | 82 +++++++++---------- 1 file changed, 39 insertions(+), 43 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 9ff797fb36c456..b03b8758c7774e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -473,20 +473,14 @@ static void hns3_mask_vector_irq(struct hns3_enet_tqp_vector *tqp_vector, writel(mask_en, tqp_vector->mask_addr); } -static void hns3_vector_enable(struct hns3_enet_tqp_vector *tqp_vector) +static void hns3_irq_enable(struct hns3_enet_tqp_vector *tqp_vector) { napi_enable(&tqp_vector->napi); enable_irq(tqp_vector->vector_irq); - - /* enable vector */ - hns3_mask_vector_irq(tqp_vector, 1); } -static void hns3_vector_disable(struct hns3_enet_tqp_vector *tqp_vector) +static void hns3_irq_disable(struct hns3_enet_tqp_vector *tqp_vector) { - /* disable vector */ - hns3_mask_vector_irq(tqp_vector, 0); - disable_irq(tqp_vector->vector_irq); napi_disable(&tqp_vector->napi); cancel_work_sync(&tqp_vector->rx_group.dim.work); @@ -707,11 +701,42 @@ static int hns3_set_rx_cpu_rmap(struct net_device *netdev) return 0; } +static void hns3_enable_irqs_and_tqps(struct net_device *netdev) +{ + struct hns3_nic_priv *priv = netdev_priv(netdev); + struct hnae3_handle *h = priv->ae_handle; + u16 i; + + for (i = 0; i < priv->vector_num; i++) + hns3_irq_enable(&priv->tqp_vector[i]); + + for (i = 0; i < priv->vector_num; i++) + hns3_mask_vector_irq(&priv->tqp_vector[i], 1); + + for (i = 0; i < h->kinfo.num_tqps; i++) + hns3_tqp_enable(h->kinfo.tqp[i]); +} + +static void hns3_disable_irqs_and_tqps(struct net_device *netdev) +{ + struct hns3_nic_priv *priv = netdev_priv(netdev); + struct hnae3_handle *h = priv->ae_handle; + u16 i; + + for (i = 0; i < h->kinfo.num_tqps; i++) + hns3_tqp_disable(h->kinfo.tqp[i]); + + for (i = 0; i < priv->vector_num; i++) + hns3_mask_vector_irq(&priv->tqp_vector[i], 0); + + for (i = 0; i < priv->vector_num; i++) + hns3_irq_disable(&priv->tqp_vector[i]); +} + static int hns3_nic_net_up(struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); struct hnae3_handle *h = priv->ae_handle; - int i, j; int ret; ret = hns3_nic_reset_all_ring(h); @@ -720,23 +745,13 @@ static int hns3_nic_net_up(struct net_device *netdev) clear_bit(HNS3_NIC_STATE_DOWN, &priv->state); - /* enable the vectors */ - for (i = 0; i < priv->vector_num; i++) - hns3_vector_enable(&priv->tqp_vector[i]); - - /* enable rcb */ - for (j = 0; j < h->kinfo.num_tqps; j++) - hns3_tqp_enable(h->kinfo.tqp[j]); + hns3_enable_irqs_and_tqps(netdev); /* start the ae_dev */ ret = h->ae_algo->ops->start ? h->ae_algo->ops->start(h) : 0; if (ret) { set_bit(HNS3_NIC_STATE_DOWN, &priv->state); - while (j--) - hns3_tqp_disable(h->kinfo.tqp[j]); - - for (j = i - 1; j >= 0; j--) - hns3_vector_disable(&priv->tqp_vector[j]); + hns3_disable_irqs_and_tqps(netdev); } return ret; @@ -823,17 +838,9 @@ static void hns3_reset_tx_queue(struct hnae3_handle *h) static void hns3_nic_net_down(struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = hns3_get_handle(netdev); const struct hnae3_ae_ops *ops; - int i; - /* disable vectors */ - for (i = 0; i < priv->vector_num; i++) - hns3_vector_disable(&priv->tqp_vector[i]); - - /* disable rcb */ - for (i = 0; i < h->kinfo.num_tqps; i++) - hns3_tqp_disable(h->kinfo.tqp[i]); + hns3_disable_irqs_and_tqps(netdev); /* stop ae_dev */ ops = priv->ae_handle->ae_algo->ops; @@ -5864,8 +5871,6 @@ int hns3_set_channels(struct net_device *netdev, void hns3_external_lb_prepare(struct net_device *ndev, bool if_running) { struct hns3_nic_priv *priv = netdev_priv(ndev); - struct hnae3_handle *h = priv->ae_handle; - int i; if (!if_running) return; @@ -5876,11 +5881,7 @@ void hns3_external_lb_prepare(struct net_device *ndev, bool if_running) netif_carrier_off(ndev); netif_tx_disable(ndev); - for (i = 0; i < priv->vector_num; i++) - hns3_vector_disable(&priv->tqp_vector[i]); - - for (i = 0; i < h->kinfo.num_tqps; i++) - hns3_tqp_disable(h->kinfo.tqp[i]); + hns3_disable_irqs_and_tqps(ndev); /* delay ring buffer clearing to hns3_reset_notify_uninit_enet * during reset process, because driver may not be able @@ -5896,7 +5897,6 @@ void hns3_external_lb_restore(struct net_device *ndev, bool if_running) { struct hns3_nic_priv *priv = netdev_priv(ndev); struct hnae3_handle *h = priv->ae_handle; - int i; if (!if_running) return; @@ -5912,11 +5912,7 @@ void hns3_external_lb_restore(struct net_device *ndev, bool if_running) clear_bit(HNS3_NIC_STATE_DOWN, &priv->state); - for (i = 0; i < priv->vector_num; i++) - hns3_vector_enable(&priv->tqp_vector[i]); - - for (i = 0; i < h->kinfo.num_tqps; i++) - hns3_tqp_enable(h->kinfo.tqp[i]); + hns3_enable_irqs_and_tqps(ndev); netif_tx_wake_all_queues(ndev); From e317aebeefcb3b0c71f2305af3c22871ca6b3833 Mon Sep 17 00:00:00 2001 From: Hao Lan Date: Wed, 30 Apr 2025 17:30:51 +0800 Subject: [PATCH 1381/2924] net: hns3: fixed debugfs tm_qset size The size of the tm_qset file of debugfs is limited to 64 KB, which is too small in the scenario with 1280 qsets. The size needs to be expanded to 1 MB. Fixes: 5e69ea7ee2a6 ("net: hns3: refactor the debugfs process") Signed-off-by: Hao Lan Signed-off-by: Peiyang Wang Signed-off-by: Jijie Shao Link: https://patch.msgid.link/20250430093052.2400464-4-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 09749e9f739868..4e5d8bc39a1bf3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -61,7 +61,7 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = { .name = "tm_qset", .cmd = HNAE3_DBG_CMD_TM_QSET, .dentry = HNS3_DBG_DENTRY_TM, - .buf_len = HNS3_DBG_READ_LEN, + .buf_len = HNS3_DBG_READ_LEN_1MB, .init = hns3_dbg_common_file_init, }, { From 4971394d9d624f91689d766f31ce668d169d9959 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Wed, 30 Apr 2025 17:30:52 +0800 Subject: [PATCH 1382/2924] net: hns3: defer calling ptp_clock_register() Currently the ptp_clock_register() is called before relative ptp resource ready. It may cause unexpected result when upper layer called the ptp API during the timewindow. Fix it by moving the ptp_clock_register() to the function end. Fixes: 0bf5eb788512 ("net: hns3: add support for PTP") Signed-off-by: Jian Shen Signed-off-by: Jijie Shao Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250430093052.2400464-5-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c index 59cc9221185f28..ec581d4b696f59 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c @@ -440,6 +440,13 @@ static int hclge_ptp_create_clock(struct hclge_dev *hdev) ptp->info.settime64 = hclge_ptp_settime; ptp->info.n_alarm = 0; + + spin_lock_init(&ptp->lock); + ptp->io_base = hdev->hw.hw.io_base + HCLGE_PTP_REG_OFFSET; + ptp->ts_cfg.rx_filter = HWTSTAMP_FILTER_NONE; + ptp->ts_cfg.tx_type = HWTSTAMP_TX_OFF; + hdev->ptp = ptp; + ptp->clock = ptp_clock_register(&ptp->info, &hdev->pdev->dev); if (IS_ERR(ptp->clock)) { dev_err(&hdev->pdev->dev, @@ -451,12 +458,6 @@ static int hclge_ptp_create_clock(struct hclge_dev *hdev) return -ENODEV; } - spin_lock_init(&ptp->lock); - ptp->io_base = hdev->hw.hw.io_base + HCLGE_PTP_REG_OFFSET; - ptp->ts_cfg.rx_filter = HWTSTAMP_FILTER_NONE; - ptp->ts_cfg.tx_type = HWTSTAMP_TX_OFF; - hdev->ptp = ptp; - return 0; } From 55f362885951b2d00fd7fbb02ef0227deea572c2 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 30 Apr 2025 15:30:40 +0200 Subject: [PATCH 1383/2924] net: vertexcom: mse102x: Fix possible stuck of SPI interrupt The MSE102x doesn't provide any SPI commands for interrupt handling. So in case the interrupt fired before the driver requests the IRQ, the interrupt will never fire again. In order to fix this always poll for pending packets after opening the interface. Fixes: 2f207cbf0dd4 ("net: vertexcom: Add MSE102x SPI support") Signed-off-by: Stefan Wahren Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250430133043.7722-2-wahrenst@gmx.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/vertexcom/mse102x.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c index 89dc4c401a8de4..92ebf163315989 100644 --- a/drivers/net/ethernet/vertexcom/mse102x.c +++ b/drivers/net/ethernet/vertexcom/mse102x.c @@ -509,6 +509,7 @@ static irqreturn_t mse102x_irq(int irq, void *_mse) static int mse102x_net_open(struct net_device *ndev) { struct mse102x_net *mse = netdev_priv(ndev); + struct mse102x_net_spi *mses = to_mse102x_spi(mse); int ret; ret = request_threaded_irq(ndev->irq, NULL, mse102x_irq, IRQF_ONESHOT, @@ -524,6 +525,13 @@ static int mse102x_net_open(struct net_device *ndev) netif_carrier_on(ndev); + /* The SPI interrupt can stuck in case of pending packet(s). + * So poll for possible packet(s) to re-arm the interrupt. + */ + mutex_lock(&mses->lock); + mse102x_rx_pkt_spi(mse); + mutex_unlock(&mses->lock); + netif_dbg(mse, ifup, ndev, "network device up\n"); return 0; From 74987089ec678b4018dba0a609e9f4bf6ef7f4ad Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 30 Apr 2025 15:30:41 +0200 Subject: [PATCH 1384/2924] net: vertexcom: mse102x: Fix LEN_MASK The LEN_MASK for CMD_RTS doesn't cover the whole parameter mask. The Bit 11 is reserved, so adjust LEN_MASK accordingly. Fixes: 2f207cbf0dd4 ("net: vertexcom: Add MSE102x SPI support") Signed-off-by: Stefan Wahren Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250430133043.7722-3-wahrenst@gmx.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/vertexcom/mse102x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c index 92ebf163315989..3edf2c3753f0eb 100644 --- a/drivers/net/ethernet/vertexcom/mse102x.c +++ b/drivers/net/ethernet/vertexcom/mse102x.c @@ -33,7 +33,7 @@ #define CMD_CTR (0x2 << CMD_SHIFT) #define CMD_MASK GENMASK(15, CMD_SHIFT) -#define LEN_MASK GENMASK(CMD_SHIFT - 1, 0) +#define LEN_MASK GENMASK(CMD_SHIFT - 2, 0) #define DET_CMD_LEN 4 #define DET_SOF_LEN 2 From d4dda902dac194e3231a1ed0f76c6c3b6340ba8a Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 30 Apr 2025 15:30:42 +0200 Subject: [PATCH 1385/2924] net: vertexcom: mse102x: Add range check for CMD_RTS Since there is no protection in the SPI protocol against electrical interferences, the driver shouldn't blindly trust the length payload of CMD_RTS. So introduce a bounds check for incoming frames. Fixes: 2f207cbf0dd4 ("net: vertexcom: Add MSE102x SPI support") Signed-off-by: Stefan Wahren Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250430133043.7722-4-wahrenst@gmx.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/vertexcom/mse102x.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c index 3edf2c3753f0eb..2c06d1d05164fc 100644 --- a/drivers/net/ethernet/vertexcom/mse102x.c +++ b/drivers/net/ethernet/vertexcom/mse102x.c @@ -6,6 +6,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -337,8 +338,9 @@ static void mse102x_rx_pkt_spi(struct mse102x_net *mse) } rxlen = cmd_resp & LEN_MASK; - if (!rxlen) { - net_dbg_ratelimited("%s: No frame length defined\n", __func__); + if (rxlen < ETH_ZLEN || rxlen > VLAN_ETH_FRAME_LEN) { + net_dbg_ratelimited("%s: Invalid frame length: %d\n", __func__, + rxlen); mse->stats.invalid_len++; return; } From ee512922ddd7d64afe2b28830a88f19063217649 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 30 Apr 2025 15:30:43 +0200 Subject: [PATCH 1386/2924] net: vertexcom: mse102x: Fix RX error handling In case the CMD_RTS got corrupted by interferences, the MSE102x doesn't allow a retransmission of the command. Instead the Ethernet frame must be shifted out of the SPI FIFO. Since the actual length is unknown, assume the maximum possible value. Fixes: 2f207cbf0dd4 ("net: vertexcom: Add MSE102x SPI support") Signed-off-by: Stefan Wahren Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250430133043.7722-5-wahrenst@gmx.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/vertexcom/mse102x.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c index 2c06d1d05164fc..e4d993f3137407 100644 --- a/drivers/net/ethernet/vertexcom/mse102x.c +++ b/drivers/net/ethernet/vertexcom/mse102x.c @@ -263,7 +263,7 @@ static int mse102x_tx_frame_spi(struct mse102x_net *mse, struct sk_buff *txp, } static int mse102x_rx_frame_spi(struct mse102x_net *mse, u8 *buff, - unsigned int frame_len) + unsigned int frame_len, bool drop) { struct mse102x_net_spi *mses = to_mse102x_spi(mse); struct spi_transfer *xfer = &mses->spi_xfer; @@ -281,6 +281,9 @@ static int mse102x_rx_frame_spi(struct mse102x_net *mse, u8 *buff, netdev_err(mse->ndev, "%s: spi_sync() failed: %d\n", __func__, ret); mse->stats.xfer_err++; + } else if (drop) { + netdev_dbg(mse->ndev, "%s: Drop frame\n", __func__); + ret = -EINVAL; } else if (*sof != cpu_to_be16(DET_SOF)) { netdev_dbg(mse->ndev, "%s: SPI start of frame is invalid (0x%04x)\n", __func__, *sof); @@ -308,6 +311,7 @@ static void mse102x_rx_pkt_spi(struct mse102x_net *mse) struct sk_buff *skb; unsigned int rxalign; unsigned int rxlen; + bool drop = false; __be16 rx = 0; u16 cmd_resp; u8 *rxpkt; @@ -330,7 +334,8 @@ static void mse102x_rx_pkt_spi(struct mse102x_net *mse) net_dbg_ratelimited("%s: Unexpected response (0x%04x)\n", __func__, cmd_resp); mse->stats.invalid_rts++; - return; + drop = true; + goto drop; } net_dbg_ratelimited("%s: Unexpected response to first CMD\n", @@ -342,9 +347,16 @@ static void mse102x_rx_pkt_spi(struct mse102x_net *mse) net_dbg_ratelimited("%s: Invalid frame length: %d\n", __func__, rxlen); mse->stats.invalid_len++; - return; + drop = true; } + /* In case of a invalid CMD_RTS, the frame must be consumed anyway. + * So assume the maximum possible frame length. + */ +drop: + if (drop) + rxlen = VLAN_ETH_FRAME_LEN; + rxalign = ALIGN(rxlen + DET_SOF_LEN + DET_DFT_LEN, 4); skb = netdev_alloc_skb_ip_align(mse->ndev, rxalign); if (!skb) @@ -355,7 +367,7 @@ static void mse102x_rx_pkt_spi(struct mse102x_net *mse) * They are copied, but ignored. */ rxpkt = skb_put(skb, rxlen) - DET_SOF_LEN; - if (mse102x_rx_frame_spi(mse, rxpkt, rxlen)) { + if (mse102x_rx_frame_spi(mse, rxpkt, rxlen, drop)) { mse->ndev->stats.rx_errors++; dev_kfree_skb(skb); return; From be593d9d91c5a3a363d456b9aceb71029aeb3f1d Mon Sep 17 00:00:00 2001 From: Chris Bainbridge Date: Thu, 17 Apr 2025 16:50:05 -0500 Subject: [PATCH 1387/2924] drm/amd/display: Fix slab-use-after-free in hdcp The HDCP code in amdgpu_dm_hdcp.c copies pointers to amdgpu_dm_connector objects without incrementing the kref reference counts. When using a USB-C dock, and the dock is unplugged, the corresponding amdgpu_dm_connector objects are freed, creating dangling pointers in the HDCP code. When the dock is plugged back, the dangling pointers are dereferenced, resulting in a slab-use-after-free: [ 66.775837] BUG: KASAN: slab-use-after-free in event_property_validate+0x42f/0x6c0 [amdgpu] [ 66.776171] Read of size 4 at addr ffff888127804120 by task kworker/0:1/10 [ 66.776179] CPU: 0 UID: 0 PID: 10 Comm: kworker/0:1 Not tainted 6.14.0-rc7-00180-g54505f727a38-dirty #233 [ 66.776183] Hardware name: HP HP Pavilion Aero Laptop 13-be0xxx/8916, BIOS F.17 12/18/2024 [ 66.776186] Workqueue: events event_property_validate [amdgpu] [ 66.776494] Call Trace: [ 66.776496] [ 66.776497] dump_stack_lvl+0x70/0xa0 [ 66.776504] print_report+0x175/0x555 [ 66.776507] ? __virt_addr_valid+0x243/0x450 [ 66.776510] ? kasan_complete_mode_report_info+0x66/0x1c0 [ 66.776515] kasan_report+0xeb/0x1c0 [ 66.776518] ? event_property_validate+0x42f/0x6c0 [amdgpu] [ 66.776819] ? event_property_validate+0x42f/0x6c0 [amdgpu] [ 66.777121] __asan_report_load4_noabort+0x14/0x20 [ 66.777124] event_property_validate+0x42f/0x6c0 [amdgpu] [ 66.777342] ? __lock_acquire+0x6b40/0x6b40 [ 66.777347] ? enable_assr+0x250/0x250 [amdgpu] [ 66.777571] process_one_work+0x86b/0x1510 [ 66.777575] ? pwq_dec_nr_in_flight+0xcf0/0xcf0 [ 66.777578] ? assign_work+0x16b/0x280 [ 66.777580] ? lock_is_held_type+0xa3/0x130 [ 66.777583] worker_thread+0x5c0/0xfa0 [ 66.777587] ? process_one_work+0x1510/0x1510 [ 66.777588] kthread+0x3a2/0x840 [ 66.777591] ? kthread_is_per_cpu+0xd0/0xd0 [ 66.777594] ? trace_hardirqs_on+0x4f/0x60 [ 66.777597] ? _raw_spin_unlock_irq+0x27/0x60 [ 66.777599] ? calculate_sigpending+0x77/0xa0 [ 66.777602] ? kthread_is_per_cpu+0xd0/0xd0 [ 66.777605] ret_from_fork+0x40/0x90 [ 66.777607] ? kthread_is_per_cpu+0xd0/0xd0 [ 66.777609] ret_from_fork_asm+0x11/0x20 [ 66.777614] [ 66.777643] Allocated by task 10: [ 66.777646] kasan_save_stack+0x39/0x60 [ 66.777649] kasan_save_track+0x14/0x40 [ 66.777652] kasan_save_alloc_info+0x37/0x50 [ 66.777655] __kasan_kmalloc+0xbb/0xc0 [ 66.777658] __kmalloc_cache_noprof+0x1c8/0x4b0 [ 66.777661] dm_dp_add_mst_connector+0xdd/0x5c0 [amdgpu] [ 66.777880] drm_dp_mst_port_add_connector+0x47e/0x770 [drm_display_helper] [ 66.777892] drm_dp_send_link_address+0x1554/0x2bf0 [drm_display_helper] [ 66.777901] drm_dp_check_and_send_link_address+0x187/0x1f0 [drm_display_helper] [ 66.777909] drm_dp_mst_link_probe_work+0x2b8/0x410 [drm_display_helper] [ 66.777917] process_one_work+0x86b/0x1510 [ 66.777919] worker_thread+0x5c0/0xfa0 [ 66.777922] kthread+0x3a2/0x840 [ 66.777925] ret_from_fork+0x40/0x90 [ 66.777927] ret_from_fork_asm+0x11/0x20 [ 66.777932] Freed by task 1713: [ 66.777935] kasan_save_stack+0x39/0x60 [ 66.777938] kasan_save_track+0x14/0x40 [ 66.777940] kasan_save_free_info+0x3b/0x60 [ 66.777944] __kasan_slab_free+0x52/0x70 [ 66.777946] kfree+0x13f/0x4b0 [ 66.777949] dm_dp_mst_connector_destroy+0xfa/0x150 [amdgpu] [ 66.778179] drm_connector_free+0x7d/0xb0 [ 66.778184] drm_mode_object_put.part.0+0xee/0x160 [ 66.778188] drm_mode_object_put+0x37/0x50 [ 66.778191] drm_atomic_state_default_clear+0x220/0xd60 [ 66.778194] __drm_atomic_state_free+0x16e/0x2a0 [ 66.778197] drm_mode_atomic_ioctl+0x15ed/0x2ba0 [ 66.778200] drm_ioctl_kernel+0x17a/0x310 [ 66.778203] drm_ioctl+0x584/0xd10 [ 66.778206] amdgpu_drm_ioctl+0xd2/0x1c0 [amdgpu] [ 66.778375] __x64_sys_ioctl+0x139/0x1a0 [ 66.778378] x64_sys_call+0xee7/0xfb0 [ 66.778381] do_syscall_64+0x87/0x140 [ 66.778385] entry_SYSCALL_64_after_hwframe+0x4b/0x53 Fix this by properly incrementing and decrementing the reference counts when making and deleting copies of the amdgpu_dm_connector pointers. (Mario: rebase on current code and update fixes tag) Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4006 Signed-off-by: Chris Bainbridge Fixes: da3fd7ac0bcf3 ("drm/amd/display: Update CP property based on HW query") Reviewed-by: Alex Hung Link: https://lore.kernel.org/r/20250417215005.37964-1-mario.limonciello@amd.com Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit d4673f3c3b3dcb74e36e53cdfc880baa7a87b330) Cc: stable@vger.kernel.org --- .../amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 5198a079b46343..8f22ad9665430a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -173,6 +173,9 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work, unsigned int conn_index = aconnector->base.index; guard(mutex)(&hdcp_w->mutex); + drm_connector_get(&aconnector->base); + if (hdcp_w->aconnector[conn_index]) + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); hdcp_w->aconnector[conn_index] = aconnector; memset(&link_adjust, 0, sizeof(link_adjust)); @@ -220,7 +223,6 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, unsigned int conn_index = aconnector->base.index; guard(mutex)(&hdcp_w->mutex); - hdcp_w->aconnector[conn_index] = aconnector; /* the removal of display will invoke auth reset -> hdcp destroy and * we'd expect the Content Protection (CP) property changed back to @@ -236,7 +238,10 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, } mod_hdcp_remove_display(&hdcp_w->hdcp, aconnector->base.index, &hdcp_w->output); - + if (hdcp_w->aconnector[conn_index]) { + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); + hdcp_w->aconnector[conn_index] = NULL; + } process_output(hdcp_w); } @@ -254,6 +259,10 @@ void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int link_inde for (conn_index = 0; conn_index < AMDGPU_DM_MAX_DISPLAY_INDEX; conn_index++) { hdcp_w->encryption_status[conn_index] = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF; + if (hdcp_w->aconnector[conn_index]) { + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); + hdcp_w->aconnector[conn_index] = NULL; + } } process_output(hdcp_w); @@ -488,6 +497,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) struct hdcp_workqueue *hdcp_work = handle; struct amdgpu_dm_connector *aconnector = config->dm_stream_ctx; int link_index = aconnector->dc_link->link_index; + unsigned int conn_index = aconnector->base.index; struct mod_hdcp_display *display = &hdcp_work[link_index].display; struct mod_hdcp_link *link = &hdcp_work[link_index].link; struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index]; @@ -544,7 +554,10 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) guard(mutex)(&hdcp_w->mutex); mod_hdcp_add_display(&hdcp_w->hdcp, link, display, &hdcp_w->output); - + drm_connector_get(&aconnector->base); + if (hdcp_w->aconnector[conn_index]) + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); + hdcp_w->aconnector[conn_index] = aconnector; process_output(hdcp_w); } From 9397204ffae887bd557e7053609174b3eb9d6f5c Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 17 Apr 2025 12:02:09 -0400 Subject: [PATCH 1388/2924] drm/amdgpu: Fail DMABUF map of XGMI-accessible memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If peer memory is XGMI-accessible, we should never access it through PCIe P2P DMA mappings. PCIe P2P is slower, has different coherence behaviour, limited or no support for atomics, or may not work at all. Fail with a warning if DMABUF mappings of such memory are attempted. Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit dbe4c63689bc6b5fd3ab72650ea4b6a667e96a68) --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index e6913fcf2c7be2..44e120f9f76497 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -199,6 +199,11 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, break; case TTM_PL_VRAM: + /* XGMI-accessible memory should never be DMA-mapped */ + if (WARN_ON(amdgpu_dmabuf_is_xgmi_accessible( + dma_buf_attach_adev(attach), bo))) + return ERR_PTR(-EINVAL); + r = amdgpu_vram_mgr_alloc_sgt(adev, bo->tbo.resource, 0, bo->tbo.base.size, attach->dev, dir, &sgt); From 79af0604eb80ca1f86a1f265a0b1f9d4fccbc18f Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Mon, 21 Apr 2025 13:25:51 +0530 Subject: [PATCH 1389/2924] drm/amdgpu: Fix offset for HDP remap in nbio v7.11 APUs in passthrough mode use HDP flush. 0x7F000 offset used for remapping HDP flush is mapped to VPE space which could get power gated. Use another unused offset in BIF space. Signed-off-by: Lijo Lazar Acked-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit d8116a32cdbe456c7f511183eb9ab187e3d590fb) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c index 2ece3ae75ec125..bed5ef4d878892 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c @@ -360,7 +360,7 @@ static void nbio_v7_11_get_clockgating_state(struct amdgpu_device *adev, *flags |= AMD_CG_SUPPORT_BIF_LS; } -#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) +#define MMIO_REG_HOLE_OFFSET 0x44000 static void nbio_v7_11_set_reg_remap(struct amdgpu_device *adev) { From 6718b10a5b98ad6629cd6b2004b0628fe68beac0 Mon Sep 17 00:00:00 2001 From: Sonny Jiang Date: Wed, 23 Apr 2025 12:32:01 -0400 Subject: [PATCH 1390/2924] drm/amdgpu: Add DPG pause for VCN v5.0.1 For vcn5.0.1 only, enable DPG PAUSE to avoid DPG resets. Signed-off-by: Sonny Jiang Reviewed-by: Leo Liu Signed-off-by: Alex Deucher (cherry picked from commit 3e5f86c14c3440171f2a3e7a68ceb739297726e9) --- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c | 54 +++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c index 581d8629b9d956..e0e84ef7f56866 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c @@ -502,6 +502,52 @@ static void vcn_v5_0_1_enable_clock_gating(struct amdgpu_vcn_inst *vinst) { } +/** + * vcn_v5_0_1_pause_dpg_mode - VCN pause with dpg mode + * + * @vinst: VCN instance + * @new_state: pause state + * + * Pause dpg mode for VCN block + */ +static int vcn_v5_0_1_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, + struct dpg_pause_state *new_state) +{ + struct amdgpu_device *adev = vinst->adev; + uint32_t reg_data = 0; + int vcn_inst; + + vcn_inst = GET_INST(VCN, vinst->inst); + + /* pause/unpause if state is changed */ + if (vinst->pause_state.fw_based != new_state->fw_based) { + DRM_DEV_DEBUG(adev->dev, "dpg pause state changed %d -> %d %s\n", + vinst->pause_state.fw_based, new_state->fw_based, + new_state->fw_based ? "VCN_DPG_STATE__PAUSE" : "VCN_DPG_STATE__UNPAUSE"); + reg_data = RREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { + /* pause DPG */ + reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data); + + /* wait for ACK */ + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_DPG_PAUSE, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + } else { + /* unpause DPG, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data); + } + vinst->pause_state.fw_based = new_state->fw_based; + } + + return 0; +} + + /** * vcn_v5_0_1_start_dpg_mode - VCN start with dpg mode * @@ -518,6 +564,7 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, volatile struct amdgpu_vcn5_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__PAUSE}; int vcn_inst; uint32_t tmp; @@ -582,6 +629,9 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, if (indirect) amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM); + /* Pause dpg */ + vcn_v5_0_1_pause_dpg_mode(vinst, &state); + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, lower_32_bits(ring->gpu_addr)); @@ -775,9 +825,13 @@ static void vcn_v5_0_1_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) int inst_idx = vinst->inst; uint32_t tmp; int vcn_inst; + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; vcn_inst = GET_INST(VCN, inst_idx); + /* Unpause dpg */ + vcn_v5_0_1_pause_dpg_mode(vinst, &state); + /* Wait for power status to be 1 */ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); From 59820fde001500c167342257650541280c622b73 Mon Sep 17 00:00:00 2001 From: Wayne Chang Date: Fri, 18 Apr 2025 16:12:28 +0800 Subject: [PATCH 1391/2924] usb: gadget: tegra-xudc: ACK ST_RC after clearing CTRL_RUN We identified a bug where the ST_RC bit in the status register was not being acknowledged after clearing the CTRL_RUN bit in the control register. This could lead to unexpected behavior in the USB gadget drivers. This patch resolves the issue by adding the necessary code to explicitly acknowledge ST_RC after clearing CTRL_RUN based on the programming sequence, ensuring proper state transition. Fixes: 49db427232fe ("usb: gadget: Add UDC driver for tegra XUSB device mode controller") Cc: stable Signed-off-by: Wayne Chang Link: https://lore.kernel.org/r/20250418081228.1194779-1-waynec@nvidia.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/tegra-xudc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/gadget/udc/tegra-xudc.c b/drivers/usb/gadget/udc/tegra-xudc.c index c7fdbc55fb0b97..2957316fd3d003 100644 --- a/drivers/usb/gadget/udc/tegra-xudc.c +++ b/drivers/usb/gadget/udc/tegra-xudc.c @@ -1749,6 +1749,10 @@ static int __tegra_xudc_ep_disable(struct tegra_xudc_ep *ep) val = xudc_readl(xudc, CTRL); val &= ~CTRL_RUN; xudc_writel(xudc, val, CTRL); + + val = xudc_readl(xudc, ST); + if (val & ST_RC) + xudc_writel(xudc, ST_RC, ST); } dev_info(xudc->dev, "ep %u disabled\n", ep->index); From 241e2ce88e5a494be7a5d44c0697592f1632fbee Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Fri, 18 Apr 2025 04:55:16 +0000 Subject: [PATCH 1392/2924] usb: cdnsp: Fix issue with resuming from L1 In very rare cases after resuming controller from L1 to L0 it reads registers before the clock UTMI have been enabled and as the result driver reads incorrect value. Most of registers are in APB domain clock but some of them (e.g. PORTSC) are in UTMI domain clock. After entering to L1 state the UTMI clock can be disabled. When controller transition from L1 to L0 the port status change event is reported and in interrupt runtime function driver reads PORTSC. During this read operation controller synchronize UTMI and APB domain but UTMI clock is still disabled and in result it reads 0xFFFFFFFF value. To fix this issue driver increases APB timeout value. The issue is platform specific and if the default value of APB timeout is not sufficient then this time should be set Individually for each platform. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Cc: stable Signed-off-by: Pawel Laszczak Acked-by: Peter Chen Link: https://lore.kernel.org/r/PH7PR07MB953846C57973E4DB134CAA71DDBF2@PH7PR07MB9538.namprd07.prod.outlook.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-gadget.c | 29 +++++++++++++++++++++++++++++ drivers/usb/cdns3/cdnsp-gadget.h | 3 +++ drivers/usb/cdns3/cdnsp-pci.c | 12 ++++++++++-- drivers/usb/cdns3/core.h | 3 +++ 4 files changed, 45 insertions(+), 2 deletions(-) diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c index 87f31084173509..7f5534db2086e8 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.c +++ b/drivers/usb/cdns3/cdnsp-gadget.c @@ -139,6 +139,26 @@ static void cdnsp_clear_port_change_bit(struct cdnsp_device *pdev, (portsc & PORT_CHANGE_BITS), port_regs); } +static void cdnsp_set_apb_timeout_value(struct cdnsp_device *pdev) +{ + struct cdns *cdns = dev_get_drvdata(pdev->dev); + __le32 __iomem *reg; + void __iomem *base; + u32 offset = 0; + u32 val; + + if (!cdns->override_apb_timeout) + return; + + base = &pdev->cap_regs->hc_capbase; + offset = cdnsp_find_next_ext_cap(base, offset, D_XEC_PRE_REGS_CAP); + reg = base + offset + REG_CHICKEN_BITS_3_OFFSET; + + val = le32_to_cpu(readl(reg)); + val = CHICKEN_APB_TIMEOUT_SET(val, cdns->override_apb_timeout); + writel(cpu_to_le32(val), reg); +} + static void cdnsp_set_chicken_bits_2(struct cdnsp_device *pdev, u32 bit) { __le32 __iomem *reg; @@ -1798,6 +1818,15 @@ static int cdnsp_gen_setup(struct cdnsp_device *pdev) pdev->hci_version = HC_VERSION(pdev->hcc_params); pdev->hcc_params = readl(&pdev->cap_regs->hcc_params); + /* + * Override the APB timeout value to give the controller more time for + * enabling UTMI clock and synchronizing APB and UTMI clock domains. + * This fix is platform specific and is required to fixes issue with + * reading incorrect value from PORTSC register after resuming + * from L1 state. + */ + cdnsp_set_apb_timeout_value(pdev); + cdnsp_get_rev_cap(pdev); /* Make sure the Device Controller is halted. */ diff --git a/drivers/usb/cdns3/cdnsp-gadget.h b/drivers/usb/cdns3/cdnsp-gadget.h index 84887dfea7635b..87ac0cd113e770 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.h +++ b/drivers/usb/cdns3/cdnsp-gadget.h @@ -520,6 +520,9 @@ struct cdnsp_rev_cap { #define REG_CHICKEN_BITS_2_OFFSET 0x48 #define CHICKEN_XDMA_2_TP_CACHE_DIS BIT(28) +#define REG_CHICKEN_BITS_3_OFFSET 0x4C +#define CHICKEN_APB_TIMEOUT_SET(p, val) (((p) & ~GENMASK(21, 0)) | (val)) + /* XBUF Extended Capability ID. */ #define XBUF_CAP_ID 0xCB #define XBUF_RX_TAG_MASK_0_OFFSET 0x1C diff --git a/drivers/usb/cdns3/cdnsp-pci.c b/drivers/usb/cdns3/cdnsp-pci.c index a51144504ff337..8c361b8394e959 100644 --- a/drivers/usb/cdns3/cdnsp-pci.c +++ b/drivers/usb/cdns3/cdnsp-pci.c @@ -28,6 +28,8 @@ #define PCI_DRIVER_NAME "cdns-pci-usbssp" #define PLAT_DRIVER_NAME "cdns-usbssp" +#define CHICKEN_APB_TIMEOUT_VALUE 0x1C20 + static struct pci_dev *cdnsp_get_second_fun(struct pci_dev *pdev) { /* @@ -139,6 +141,14 @@ static int cdnsp_pci_probe(struct pci_dev *pdev, cdnsp->otg_irq = pdev->irq; } + /* + * Cadence PCI based platform require some longer timeout for APB + * to fixes domain clock synchronization issue after resuming + * controller from L1 state. + */ + cdnsp->override_apb_timeout = CHICKEN_APB_TIMEOUT_VALUE; + pci_set_drvdata(pdev, cdnsp); + if (pci_is_enabled(func)) { cdnsp->dev = dev; cdnsp->gadget_init = cdnsp_gadget_init; @@ -148,8 +158,6 @@ static int cdnsp_pci_probe(struct pci_dev *pdev, goto free_cdnsp; } - pci_set_drvdata(pdev, cdnsp); - device_wakeup_enable(&pdev->dev); if (pci_dev_run_wake(pdev)) pm_runtime_put_noidle(&pdev->dev); diff --git a/drivers/usb/cdns3/core.h b/drivers/usb/cdns3/core.h index 921cccf1ca9db2..801be9e61340e7 100644 --- a/drivers/usb/cdns3/core.h +++ b/drivers/usb/cdns3/core.h @@ -79,6 +79,8 @@ struct cdns3_platform_data { * @pdata: platform data from glue layer * @lock: spinlock structure * @xhci_plat_data: xhci private data structure pointer + * @override_apb_timeout: hold value of APB timeout. For value 0 the default + * value in CHICKEN_BITS_3 will be preserved. * @gadget_init: pointer to gadget initialization function */ struct cdns { @@ -117,6 +119,7 @@ struct cdns { struct cdns3_platform_data *pdata; spinlock_t lock; struct xhci_plat_priv *xhci_plat_data; + u32 override_apb_timeout; int (*gadget_init)(struct cdns *cdns); }; From 732f35cf8bdfece582f6e4a9c659119036577308 Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Tue, 22 Apr 2025 19:40:01 +0800 Subject: [PATCH 1393/2924] usb: host: tegra: Prevent host controller crash when OTG port is used When a USB device is connected to the OTG port, the tegra_xhci_id_work() routine transitions the PHY to host mode and calls xhci_hub_control() with the SetPortFeature command to enable port power. In certain cases, the XHCI controller may be in a low-power state when this operation occurs. If xhci_hub_control() is invoked while the controller is suspended, the PORTSC register may return 0xFFFFFFFF, indicating a read failure. This causes xhci_hc_died() to be triggered, leading to host controller shutdown. Example backtrace: [ 105.445736] Workqueue: events tegra_xhci_id_work [ 105.445747] dump_backtrace+0x0/0x1e8 [ 105.445759] xhci_hc_died.part.48+0x40/0x270 [ 105.445769] tegra_xhci_set_port_power+0xc0/0x240 [ 105.445774] tegra_xhci_id_work+0x130/0x240 To prevent this, ensure the controller is fully resumed before interacting with hardware registers by calling pm_runtime_get_sync() prior to the host mode transition and xhci_hub_control(). Fixes: f836e7843036 ("usb: xhci-tegra: Add OTG support") Cc: stable Signed-off-by: Jim Lin Signed-off-by: Wayne Chang Link: https://lore.kernel.org/r/20250422114001.126367-1-waynec@nvidia.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-tegra.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c index b5c362c2051d70..0c7af44d4dae50 100644 --- a/drivers/usb/host/xhci-tegra.c +++ b/drivers/usb/host/xhci-tegra.c @@ -1364,6 +1364,7 @@ static void tegra_xhci_id_work(struct work_struct *work) tegra->otg_usb3_port = tegra_xusb_padctl_get_usb3_companion(tegra->padctl, tegra->otg_usb2_port); + pm_runtime_get_sync(tegra->dev); if (tegra->host_mode) { /* switch to host mode */ if (tegra->otg_usb3_port >= 0) { @@ -1393,6 +1394,7 @@ static void tegra_xhci_id_work(struct work_struct *work) } tegra_xhci_set_port_power(tegra, true, true); + pm_runtime_mark_last_busy(tegra->dev); } else { if (tegra->otg_usb3_port >= 0) @@ -1400,6 +1402,7 @@ static void tegra_xhci_id_work(struct work_struct *work) tegra_xhci_set_port_power(tegra, true, false); } + pm_runtime_put_autosuspend(tegra->dev); } #if IS_ENABLED(CONFIG_PM) || IS_ENABLED(CONFIG_PM_SLEEP) From 8e3820271c517ceb89ab7442656ba49fa23ee1d0 Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Tue, 22 Apr 2025 16:02:29 +0530 Subject: [PATCH 1394/2924] usb: gadget: f_ecm: Add get_status callback When host sends GET_STATUS to ECM interface, handle the request from the function driver. Since the interface is wakeup capable, set the corresponding bit, and set RW bit if the function is already armed for wakeup by the host. Cc: stable Fixes: 481c225c4802 ("usb: gadget: Handle function suspend feature selector") Signed-off-by: Prashanth K Reviewed-by: Thinh Nguyen Link: https://lore.kernel.org/r/20250422103231.1954387-2-prashanth.k@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_ecm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/gadget/function/f_ecm.c b/drivers/usb/gadget/function/f_ecm.c index 80841de845b091..027226325039f0 100644 --- a/drivers/usb/gadget/function/f_ecm.c +++ b/drivers/usb/gadget/function/f_ecm.c @@ -892,6 +892,12 @@ static void ecm_resume(struct usb_function *f) gether_resume(&ecm->port); } +static int ecm_get_status(struct usb_function *f) +{ + return (f->func_wakeup_armed ? USB_INTRF_STAT_FUNC_RW : 0) | + USB_INTRF_STAT_FUNC_RW_CAP; +} + static void ecm_free(struct usb_function *f) { struct f_ecm *ecm; @@ -960,6 +966,7 @@ static struct usb_function *ecm_alloc(struct usb_function_instance *fi) ecm->port.func.disable = ecm_disable; ecm->port.func.free_func = ecm_free; ecm->port.func.suspend = ecm_suspend; + ecm->port.func.get_status = ecm_get_status; ecm->port.func.resume = ecm_resume; return &ecm->port.func; From 5977a58dd5a4865198b0204b998adb0f634abe19 Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Tue, 22 Apr 2025 16:02:30 +0530 Subject: [PATCH 1395/2924] usb: gadget: Use get_status callback to set remote wakeup capability Currently when the host sends GET_STATUS request for an interface, we use get_status callbacks to set/clear remote wakeup capability of that interface. And if get_status callback isn't present for that interface, then we assume its remote wakeup capability based on bmAttributes. Now consider a scenario, where we have a USB configuration with multiple interfaces (say ECM + ADB), here ECM is remote wakeup capable and as of now ADB isn't. And bmAttributes will indicate the device as wakeup capable. With the current implementation, when host sends GET_STATUS request for both interfaces, we will set FUNC_RW_CAP for both. This results in USB3 CV Chapter 9.15 (Function Remote Wakeup Test) failures as host expects remote wakeup from both interfaces. The above scenario is just an example, and the failure can be observed if we use configuration with any interface except ECM. Hence avoid configuring remote wakeup capability from composite driver based on bmAttributes, instead use get_status callbacks and let the function drivers decide this. Cc: stable Fixes: 481c225c4802 ("usb: gadget: Handle function suspend feature selector") Signed-off-by: Prashanth K Reviewed-by: Thinh Nguyen Link: https://lore.kernel.org/r/20250422103231.1954387-3-prashanth.k@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 869ad99afb48bb..8dbc132a505e39 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -2011,15 +2011,13 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) if (f->get_status) { status = f->get_status(f); + if (status < 0) break; - } else { - /* Set D0 and D1 bits based on func wakeup capability */ - if (f->config->bmAttributes & USB_CONFIG_ATT_WAKEUP) { - status |= USB_INTRF_STAT_FUNC_RW_CAP; - if (f->func_wakeup_armed) - status |= USB_INTRF_STAT_FUNC_RW; - } + + /* if D5 is not set, then device is not wakeup capable */ + if (!(f->config->bmAttributes & USB_CONFIG_ATT_WAKEUP)) + status &= ~(USB_INTRF_STAT_FUNC_RW_CAP | USB_INTRF_STAT_FUNC_RW); } put_unaligned_le16(status & 0x0000ffff, req->buf); From 2372f1caeca433c4c01c2482f73fbe057f5168ce Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Tue, 22 Apr 2025 16:02:31 +0530 Subject: [PATCH 1396/2924] usb: dwc3: gadget: Make gadget_wakeup asynchronous Currently gadget_wakeup() waits for U0 synchronously if it was called from func_wakeup(), this is because we need to send the function wakeup command soon after the link is active. And the call is made synchronous by polling DSTS continuosly for 20000 times in __dwc3_gadget_wakeup(). But it observed that sometimes the link is not active even after polling 20K times, leading to remote wakeup failures. Adding a small delay between each poll helps, but that won't guarantee resolution in future. Hence make the gadget_wakeup completely asynchronous. Since multiple interfaces can issue a function wakeup at once, add a new variable wakeup_pending_funcs which will indicate the functions that has issued func_wakup, this is represented in a bitmap format. If the link is in U3, dwc3_gadget_func_wakeup() will set the bit corresponding to interface_id and bail out. Once link comes back to U0, linksts_change irq is triggered, where the function wakeup command is sent based on bitmap. Cc: stable Fixes: 92c08a84b53e ("usb: dwc3: Add function suspend and function wakeup support") Signed-off-by: Prashanth K Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20250422103231.1954387-4-prashanth.k@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.h | 4 +++ drivers/usb/dwc3/gadget.c | 60 +++++++++++++++------------------------ 2 files changed, 27 insertions(+), 37 deletions(-) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index aaa39e663f60a5..27eae4cf223dfd 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -1164,6 +1164,9 @@ struct dwc3_scratchpad_array { * @gsbuscfg0_reqinfo: store GSBUSCFG0.DATRDREQINFO, DESRDREQINFO, * DATWRREQINFO, and DESWRREQINFO value passed from * glue driver. + * @wakeup_pending_funcs: Indicates whether any interface has requested for + * function wakeup in bitmap format where bit position + * represents interface_id. */ struct dwc3 { struct work_struct drd_work; @@ -1394,6 +1397,7 @@ struct dwc3 { int num_ep_resized; struct dentry *debug_root; u32 gsbuscfg0_reqinfo; + u32 wakeup_pending_funcs; }; #define INCRX_BURST_MODE 0 diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 8c30d86cc4e3a0..321361288935db 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -276,8 +276,6 @@ int dwc3_send_gadget_generic_command(struct dwc3 *dwc, unsigned int cmd, return ret; } -static int __dwc3_gadget_wakeup(struct dwc3 *dwc, bool async); - /** * dwc3_send_gadget_ep_cmd - issue an endpoint command * @dep: the endpoint to which the command is going to be issued @@ -2359,10 +2357,8 @@ static int dwc3_gadget_get_frame(struct usb_gadget *g) return __dwc3_gadget_get_frame(dwc); } -static int __dwc3_gadget_wakeup(struct dwc3 *dwc, bool async) +static int __dwc3_gadget_wakeup(struct dwc3 *dwc) { - int retries; - int ret; u32 reg; @@ -2390,8 +2386,7 @@ static int __dwc3_gadget_wakeup(struct dwc3 *dwc, bool async) return -EINVAL; } - if (async) - dwc3_gadget_enable_linksts_evts(dwc, true); + dwc3_gadget_enable_linksts_evts(dwc, true); ret = dwc3_gadget_set_link_state(dwc, DWC3_LINK_STATE_RECOV); if (ret < 0) { @@ -2410,27 +2405,8 @@ static int __dwc3_gadget_wakeup(struct dwc3 *dwc, bool async) /* * Since link status change events are enabled we will receive - * an U0 event when wakeup is successful. So bail out. + * an U0 event when wakeup is successful. */ - if (async) - return 0; - - /* poll until Link State changes to ON */ - retries = 20000; - - while (retries--) { - reg = dwc3_readl(dwc->regs, DWC3_DSTS); - - /* in HS, means ON */ - if (DWC3_DSTS_USBLNKST(reg) == DWC3_LINK_STATE_U0) - break; - } - - if (DWC3_DSTS_USBLNKST(reg) != DWC3_LINK_STATE_U0) { - dev_err(dwc->dev, "failed to send remote wakeup\n"); - return -EINVAL; - } - return 0; } @@ -2451,7 +2427,7 @@ static int dwc3_gadget_wakeup(struct usb_gadget *g) spin_unlock_irqrestore(&dwc->lock, flags); return -EINVAL; } - ret = __dwc3_gadget_wakeup(dwc, true); + ret = __dwc3_gadget_wakeup(dwc); spin_unlock_irqrestore(&dwc->lock, flags); @@ -2479,14 +2455,10 @@ static int dwc3_gadget_func_wakeup(struct usb_gadget *g, int intf_id) */ link_state = dwc3_gadget_get_link_state(dwc); if (link_state == DWC3_LINK_STATE_U3) { - ret = __dwc3_gadget_wakeup(dwc, false); - if (ret) { - spin_unlock_irqrestore(&dwc->lock, flags); - return -EINVAL; - } - dwc3_resume_gadget(dwc); - dwc->suspended = false; - dwc->link_state = DWC3_LINK_STATE_U0; + dwc->wakeup_pending_funcs |= BIT(intf_id); + ret = __dwc3_gadget_wakeup(dwc); + spin_unlock_irqrestore(&dwc->lock, flags); + return ret; } ret = dwc3_send_gadget_generic_command(dwc, DWC3_DGCMD_DEV_NOTIFICATION, @@ -4353,6 +4325,8 @@ static void dwc3_gadget_linksts_change_interrupt(struct dwc3 *dwc, { enum dwc3_link_state next = evtinfo & DWC3_LINK_STATE_MASK; unsigned int pwropt; + int ret; + int intf_id; /* * WORKAROUND: DWC3 < 2.50a have an issue when configured without @@ -4428,7 +4402,7 @@ static void dwc3_gadget_linksts_change_interrupt(struct dwc3 *dwc, switch (next) { case DWC3_LINK_STATE_U0: - if (dwc->gadget->wakeup_armed) { + if (dwc->gadget->wakeup_armed || dwc->wakeup_pending_funcs) { dwc3_gadget_enable_linksts_evts(dwc, false); dwc3_resume_gadget(dwc); dwc->suspended = false; @@ -4451,6 +4425,18 @@ static void dwc3_gadget_linksts_change_interrupt(struct dwc3 *dwc, } dwc->link_state = next; + + /* Proceed with func wakeup if any interfaces that has requested */ + while (dwc->wakeup_pending_funcs && (next == DWC3_LINK_STATE_U0)) { + intf_id = ffs(dwc->wakeup_pending_funcs) - 1; + ret = dwc3_send_gadget_generic_command(dwc, DWC3_DGCMD_DEV_NOTIFICATION, + DWC3_DGCMDPAR_DN_FUNC_WAKE | + DWC3_DGCMDPAR_INTF_SEL(intf_id)); + if (ret) + dev_err(dwc->dev, "Failed to send DN wake for intf %d\n", intf_id); + + dwc->wakeup_pending_funcs &= ~BIT(intf_id); + } } static void dwc3_gadget_suspend_interrupt(struct dwc3 *dwc, From a5c7973539b010874a37a0e846e62ac6f00553ba Mon Sep 17 00:00:00 2001 From: Alexey Charkov Date: Fri, 25 Apr 2025 18:11:11 +0400 Subject: [PATCH 1397/2924] usb: uhci-platform: Make the clock really optional Device tree bindings state that the clock is optional for UHCI platform controllers, and some existing device trees don't provide those - such as those for VIA/WonderMedia devices. The driver however fails to probe now if no clock is provided, because devm_clk_get returns an error pointer in such case. Switch to devm_clk_get_optional instead, so that it could probe again on those platforms where no clocks are given. Cc: stable Fixes: 26c502701c52 ("usb: uhci: Add clk support to uhci-platform") Signed-off-by: Alexey Charkov Link: https://lore.kernel.org/r/20250425-uhci-clock-optional-v1-1-a1d462592f29@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/uhci-platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/uhci-platform.c b/drivers/usb/host/uhci-platform.c index a7c934404ebc7e..62318291f5664c 100644 --- a/drivers/usb/host/uhci-platform.c +++ b/drivers/usb/host/uhci-platform.c @@ -121,7 +121,7 @@ static int uhci_hcd_platform_probe(struct platform_device *pdev) } /* Get and enable clock if any specified */ - uhci->clk = devm_clk_get(&pdev->dev, NULL); + uhci->clk = devm_clk_get_optional(&pdev->dev, NULL); if (IS_ERR(uhci->clk)) { ret = PTR_ERR(uhci->clk); goto err_rmr; From 9f657a92805cfc98e11cf5da9e8f4e02ecff2260 Mon Sep 17 00:00:00 2001 From: Lukasz Czechowski Date: Fri, 25 Apr 2025 17:18:06 +0200 Subject: [PATCH 1398/2924] usb: misc: onboard_usb_dev: fix support for Cypress HX3 hubs The Cypress HX3 USB3.0 hubs use different PID values depending on the product variant. The comment in compatibles table is misleading, as the currently used PIDs (0x6504 and 0x6506 for USB 3.0 and USB 2.0, respectively) are defaults for the CYUSB331x, while CYUSB330x and CYUSB332x variants use different values. Based on the datasheet [1], update the compatible usb devices table to handle different types of the hub. The change also includes vendor mode PIDs, which are used by the hub in I2C Master boot mode, if connected EEPROM contains invalid signature or is blank. This allows to correctly boot the hub even if the EEPROM will have broken content. Number of vcc supplies and timing requirements are the same for all HX variants, so the platform driver's match table does not have to be extended. [1] https://www.infineon.com/dgdl/Infineon-HX3_USB_3_0_Hub_Consumer_Industrial-DataSheet-v22_00-EN.pdf?fileId=8ac78c8c7d0d8da4017d0ecb53f644b8 Table 9. PID Values Fixes: b43cd82a1a40 ("usb: misc: onboard-hub: add support for Cypress HX3 USB 3.0 family") Cc: stable Signed-off-by: Lukasz Czechowski Link: https://lore.kernel.org/r/20250425-onboard_usb_dev-v2-1-4a76a474a010@thaumatec.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/onboard_usb_dev.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/usb/misc/onboard_usb_dev.c b/drivers/usb/misc/onboard_usb_dev.c index 75ac3c6aa92d0d..f5372dfa241a9c 100644 --- a/drivers/usb/misc/onboard_usb_dev.c +++ b/drivers/usb/misc/onboard_usb_dev.c @@ -569,8 +569,14 @@ static void onboard_dev_usbdev_disconnect(struct usb_device *udev) } static const struct usb_device_id onboard_dev_id_table[] = { - { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6504) }, /* CYUSB33{0,1,2}x/CYUSB230x 3.0 HUB */ - { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6506) }, /* CYUSB33{0,1,2}x/CYUSB230x 2.0 HUB */ + { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6500) }, /* CYUSB330x 3.0 HUB */ + { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6502) }, /* CYUSB330x 2.0 HUB */ + { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6503) }, /* CYUSB33{0,1}x 2.0 HUB, Vendor Mode */ + { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6504) }, /* CYUSB331x 3.0 HUB */ + { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6506) }, /* CYUSB331x 2.0 HUB */ + { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6507) }, /* CYUSB332x 2.0 HUB, Vendor Mode */ + { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6508) }, /* CYUSB332x 3.0 HUB */ + { USB_DEVICE(VENDOR_ID_CYPRESS, 0x650a) }, /* CYUSB332x 2.0 HUB */ { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6570) }, /* CY7C6563x 2.0 HUB */ { USB_DEVICE(VENDOR_ID_GENESYS, 0x0608) }, /* Genesys Logic GL850G USB 2.0 HUB */ { USB_DEVICE(VENDOR_ID_GENESYS, 0x0610) }, /* Genesys Logic GL852G USB 2.0 HUB */ From 364618c89d4c57c85e5fc51a2446cd939bf57802 Mon Sep 17 00:00:00 2001 From: Andrei Kuchynski Date: Thu, 24 Apr 2025 08:44:28 +0000 Subject: [PATCH 1399/2924] usb: typec: ucsi: displayport: Fix deadlock This patch introduces the ucsi_con_mutex_lock / ucsi_con_mutex_unlock functions to the UCSI driver. ucsi_con_mutex_lock ensures the connector mutex is only locked if a connection is established and the partner pointer is valid. This resolves a deadlock scenario where ucsi_displayport_remove_partner holds con->mutex waiting for dp_altmode_work to complete while dp_altmode_work attempts to acquire it. Cc: stable Fixes: af8622f6a585 ("usb: typec: ucsi: Support for DisplayPort alt mode") Signed-off-by: Andrei Kuchynski Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20250424084429.3220757-2-akuchynski@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/displayport.c | 19 +++++++++------- drivers/usb/typec/ucsi/ucsi.c | 34 ++++++++++++++++++++++++++++ drivers/usb/typec/ucsi/ucsi.h | 2 ++ 3 files changed, 47 insertions(+), 8 deletions(-) diff --git a/drivers/usb/typec/ucsi/displayport.c b/drivers/usb/typec/ucsi/displayport.c index 420af5139c70a3..acd053d4e38c30 100644 --- a/drivers/usb/typec/ucsi/displayport.c +++ b/drivers/usb/typec/ucsi/displayport.c @@ -54,7 +54,8 @@ static int ucsi_displayport_enter(struct typec_altmode *alt, u32 *vdo) u8 cur = 0; int ret; - mutex_lock(&dp->con->lock); + if (!ucsi_con_mutex_lock(dp->con)) + return -ENOTCONN; if (!dp->override && dp->initialized) { const struct typec_altmode *p = typec_altmode_get_partner(alt); @@ -100,7 +101,7 @@ static int ucsi_displayport_enter(struct typec_altmode *alt, u32 *vdo) schedule_work(&dp->work); ret = 0; err_unlock: - mutex_unlock(&dp->con->lock); + ucsi_con_mutex_unlock(dp->con); return ret; } @@ -112,7 +113,8 @@ static int ucsi_displayport_exit(struct typec_altmode *alt) u64 command; int ret = 0; - mutex_lock(&dp->con->lock); + if (!ucsi_con_mutex_lock(dp->con)) + return -ENOTCONN; if (!dp->override) { const struct typec_altmode *p = typec_altmode_get_partner(alt); @@ -144,7 +146,7 @@ static int ucsi_displayport_exit(struct typec_altmode *alt) schedule_work(&dp->work); out_unlock: - mutex_unlock(&dp->con->lock); + ucsi_con_mutex_unlock(dp->con); return ret; } @@ -202,20 +204,21 @@ static int ucsi_displayport_vdm(struct typec_altmode *alt, int cmd = PD_VDO_CMD(header); int svdm_version; - mutex_lock(&dp->con->lock); + if (!ucsi_con_mutex_lock(dp->con)) + return -ENOTCONN; if (!dp->override && dp->initialized) { const struct typec_altmode *p = typec_altmode_get_partner(alt); dev_warn(&p->dev, "firmware doesn't support alternate mode overriding\n"); - mutex_unlock(&dp->con->lock); + ucsi_con_mutex_unlock(dp->con); return -EOPNOTSUPP; } svdm_version = typec_altmode_get_svdm_version(alt); if (svdm_version < 0) { - mutex_unlock(&dp->con->lock); + ucsi_con_mutex_unlock(dp->con); return svdm_version; } @@ -259,7 +262,7 @@ static int ucsi_displayport_vdm(struct typec_altmode *alt, break; } - mutex_unlock(&dp->con->lock); + ucsi_con_mutex_unlock(dp->con); return 0; } diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index e8c7e9dc49309c..01ce858a1a2b34 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -1922,6 +1922,40 @@ void ucsi_set_drvdata(struct ucsi *ucsi, void *data) } EXPORT_SYMBOL_GPL(ucsi_set_drvdata); +/** + * ucsi_con_mutex_lock - Acquire the connector mutex + * @con: The connector interface to lock + * + * Returns true on success, false if the connector is disconnected + */ +bool ucsi_con_mutex_lock(struct ucsi_connector *con) +{ + bool mutex_locked = false; + bool connected = true; + + while (connected && !mutex_locked) { + mutex_locked = mutex_trylock(&con->lock) != 0; + connected = UCSI_CONSTAT(con, CONNECTED); + if (connected && !mutex_locked) + msleep(20); + } + + connected = connected && con->partner; + if (!connected && mutex_locked) + mutex_unlock(&con->lock); + + return connected; +} + +/** + * ucsi_con_mutex_unlock - Release the connector mutex + * @con: The connector interface to unlock + */ +void ucsi_con_mutex_unlock(struct ucsi_connector *con) +{ + mutex_unlock(&con->lock); +} + /** * ucsi_create - Allocate UCSI instance * @dev: Device interface to the PPM (Platform Policy Manager) diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h index 3a2c1762bec1bf..9c5278a0c5d409 100644 --- a/drivers/usb/typec/ucsi/ucsi.h +++ b/drivers/usb/typec/ucsi/ucsi.h @@ -94,6 +94,8 @@ int ucsi_register(struct ucsi *ucsi); void ucsi_unregister(struct ucsi *ucsi); void *ucsi_get_drvdata(struct ucsi *ucsi); void ucsi_set_drvdata(struct ucsi *ucsi, void *data); +bool ucsi_con_mutex_lock(struct ucsi_connector *con); +void ucsi_con_mutex_unlock(struct ucsi_connector *con); void ucsi_connector_change(struct ucsi *ucsi, u8 num); From 312d79669e71283d05c05cc49a1a31e59e3d9e0e Mon Sep 17 00:00:00 2001 From: Andrei Kuchynski Date: Thu, 24 Apr 2025 08:44:29 +0000 Subject: [PATCH 1400/2924] usb: typec: ucsi: displayport: Fix NULL pointer access This patch ensures that the UCSI driver waits for all pending tasks in the ucsi_displayport_work workqueue to finish executing before proceeding with the partner removal. Cc: stable Fixes: af8622f6a585 ("usb: typec: ucsi: Support for DisplayPort alt mode") Signed-off-by: Andrei Kuchynski Reviewed-by: Heikki Krogerus Reviewed-by: Benson Leung Link: https://lore.kernel.org/r/20250424084429.3220757-3-akuchynski@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/displayport.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/typec/ucsi/displayport.c b/drivers/usb/typec/ucsi/displayport.c index acd053d4e38c30..8aae80b457d74d 100644 --- a/drivers/usb/typec/ucsi/displayport.c +++ b/drivers/usb/typec/ucsi/displayport.c @@ -299,6 +299,8 @@ void ucsi_displayport_remove_partner(struct typec_altmode *alt) if (!dp) return; + cancel_work_sync(&dp->work); + dp->data.conf = 0; dp->data.status = 0; dp->initialized = false; From 8614ecdb1570e4fffe87ebdc62b613ed66f1f6a6 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Fri, 25 Apr 2025 05:55:40 +0000 Subject: [PATCH 1401/2924] usb: cdnsp: fix L1 resume issue for RTL_REVISION_NEW_LPM version The controllers with rtl version larger than RTL_REVISION_NEW_LPM (0x00002700) has bug which causes that controller doesn't resume from L1 state. It happens if after receiving LPM packet controller starts transitioning to L1 and in this moment the driver force resuming by write operation to PORTSC.PLS. It's corner case and happens when write operation to PORTSC occurs during device delay before transitioning to L1 after transmitting ACK time (TL1TokenRetry). Forcing transition from L1->L0 by driver for revision larger than RTL_REVISION_NEW_LPM is not needed, so driver can simply fix this issue through block call of cdnsp_force_l0_go function. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Cc: stable Signed-off-by: Pawel Laszczak Acked-by: Peter Chen Link: https://lore.kernel.org/r/PH7PR07MB9538B55C3A6E71F9ED29E980DD842@PH7PR07MB9538.namprd07.prod.outlook.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-gadget.c | 2 ++ drivers/usb/cdns3/cdnsp-gadget.h | 3 +++ drivers/usb/cdns3/cdnsp-ring.c | 3 ++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c index 7f5534db2086e8..4824a10df07e7c 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.c +++ b/drivers/usb/cdns3/cdnsp-gadget.c @@ -1793,6 +1793,8 @@ static void cdnsp_get_rev_cap(struct cdnsp_device *pdev) reg += cdnsp_find_next_ext_cap(reg, 0, RTL_REV_CAP); pdev->rev_cap = reg; + pdev->rtl_revision = readl(&pdev->rev_cap->rtl_revision); + dev_info(pdev->dev, "Rev: %08x/%08x, eps: %08x, buff: %08x/%08x\n", readl(&pdev->rev_cap->ctrl_revision), readl(&pdev->rev_cap->rtl_revision), diff --git a/drivers/usb/cdns3/cdnsp-gadget.h b/drivers/usb/cdns3/cdnsp-gadget.h index 87ac0cd113e770..12534be52f39df 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.h +++ b/drivers/usb/cdns3/cdnsp-gadget.h @@ -1360,6 +1360,7 @@ struct cdnsp_port { * @rev_cap: Controller Capabilities Registers. * @hcs_params1: Cached register copies of read-only HCSPARAMS1 * @hcc_params: Cached register copies of read-only HCCPARAMS1 + * @rtl_revision: Cached controller rtl revision. * @setup: Temporary buffer for setup packet. * @ep0_preq: Internal allocated request used during enumeration. * @ep0_stage: ep0 stage during enumeration process. @@ -1414,6 +1415,8 @@ struct cdnsp_device { __u32 hcs_params1; __u32 hcs_params3; __u32 hcc_params; + #define RTL_REVISION_NEW_LPM 0x2700 + __u32 rtl_revision; /* Lock used in interrupt thread context. */ spinlock_t lock; struct usb_ctrlrequest setup; diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index 46852529499d16..fd06cb85c4ea84 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -308,7 +308,8 @@ static bool cdnsp_ring_ep_doorbell(struct cdnsp_device *pdev, writel(db_value, reg_addr); - cdnsp_force_l0_go(pdev); + if (pdev->rtl_revision < RTL_REVISION_NEW_LPM) + cdnsp_force_l0_go(pdev); /* Doorbell was set. */ return true; From 054c5145540e5ad5b80adf23a5e3e2fc281fb8aa Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 30 Apr 2025 15:48:10 +0200 Subject: [PATCH 1402/2924] USB: usbtmc: use interruptible sleep in usbtmc_read usbtmc_read() calls usbtmc_generic_read() which uses interruptible sleep, but usbtmc_read() itself uses uninterruptble sleep for mutual exclusion between threads. That makes no sense. Both should use interruptible sleep. Fixes: 5b775f672cc99 ("USB: add USB test and measurement class driver") Cc: stable Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20250430134810.226015-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usbtmc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index 34e46ef308abfd..634c3bcbb413e4 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -1380,7 +1380,10 @@ static ssize_t usbtmc_read(struct file *filp, char __user *buf, if (!buffer) return -ENOMEM; - mutex_lock(&data->io_mutex); + retval = mutex_lock_interruptible(&data->io_mutex); + if (retval < 0) + goto exit_nolock; + if (data->zombie) { retval = -ENODEV; goto exit; @@ -1503,6 +1506,7 @@ static ssize_t usbtmc_read(struct file *filp, char __user *buf, exit: mutex_unlock(&data->io_mutex); +exit_nolock: kfree(buffer); return retval; } From e918d3959b5ae0e793b8f815ce62240e10ba03a4 Mon Sep 17 00:00:00 2001 From: RD Babiera Date: Tue, 29 Apr 2025 23:47:01 +0000 Subject: [PATCH 1403/2924] usb: typec: tcpm: delay SNK_TRY_WAIT_DEBOUNCE to SRC_TRYWAIT transition This patch fixes Type-C Compliance Test TD 4.7.6 - Try.SNK DRP Connect SNKAS. The compliance tester moves into SNK_UNATTACHED during toggling and expects the PUT to apply Rp after tPDDebounce of detection. If the port is in SNK_TRY_WAIT_DEBOUNCE, it will move into SRC_TRYWAIT immediately and apply Rp. This violates TD 4.7.5.V.3, where the tester confirms that the PUT attaches Rp after the transitions to Unattached.SNK for tPDDebounce. Change the tcpm_set_state delay between SNK_TRY_WAIT_DEBOUNCE and SRC_TRYWAIT to tPDDebounce. Fixes: a0a3e04e6b2c ("staging: typec: tcpm: Check for Rp for tPDDebounce") Cc: stable Signed-off-by: RD Babiera Reviewed-by: Badhri Jagan Sridharan Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20250429234703.3748506-2-rdbabiera@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index a99db4e025cd04..8adf6f95463304 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -5965,7 +5965,7 @@ static void _tcpm_cc_change(struct tcpm_port *port, enum typec_cc_status cc1, case SNK_TRY_WAIT_DEBOUNCE: if (!tcpm_port_is_sink(port)) { port->max_wait = 0; - tcpm_set_state(port, SRC_TRYWAIT, 0); + tcpm_set_state(port, SRC_TRYWAIT, PD_T_PD_DEBOUNCE); } break; case SRC_TRY_WAIT: From 95deee37a12364f410d22c6a8383f59738a2fef3 Mon Sep 17 00:00:00 2001 From: Will McVicker Date: Thu, 24 Apr 2025 11:04:19 -0700 Subject: [PATCH 1404/2924] platform: Fix race condition during DMA configure at IOMMU probe time To avoid a race between the IOMMU probing thread and the device driver async probing thread during configuration of the platform DMA, update `platform_dma_configure()` to read `dev->driver` once and test if it's NULL before using it. This ensures that we don't de-reference an invalid platform driver pointer if the device driver is asynchronously bound while configuring the DMA. Fixes: bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe path") Signed-off-by: Will McVicker Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20250424180420.3928523-1-willmcvicker@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 1813cfd0c4bdf4..cfccf3ff36e76e 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -1440,7 +1440,7 @@ static void platform_shutdown(struct device *_dev) static int platform_dma_configure(struct device *dev) { - struct platform_driver *drv = to_platform_driver(dev->driver); + struct device_driver *drv = READ_ONCE(dev->driver); struct fwnode_handle *fwnode = dev_fwnode(dev); enum dev_dma_attr attr; int ret = 0; @@ -1451,8 +1451,8 @@ static int platform_dma_configure(struct device *dev) attr = acpi_get_dma_attr(to_acpi_device_node(fwnode)); ret = acpi_dma_configure(dev, attr); } - /* @drv may not be valid when we're called from the IOMMU layer */ - if (ret || !dev->driver || drv->driver_managed_dma) + /* @dev->driver may not be valid when we're called from the IOMMU layer */ + if (ret || !drv || to_platform_driver(drv)->driver_managed_dma) return ret; ret = iommu_device_use_default_domain(dev); From e2699274d5a43b95af1b806aa6e3b1157107665d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 30 Apr 2025 22:37:13 -0400 Subject: [PATCH 1405/2924] bcachefs: Fix __bch2_dev_group_set() bch2_sb_disk_groups_to_cpu() goes off of the superblock member info, so we need to set that first. Reported-by: Stijn Tintel Signed-off-by: Kent Overstreet --- fs/bcachefs/disk_groups.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c index 1186280b29e903..2ca3cbf12b7135 100644 --- a/fs/bcachefs/disk_groups.c +++ b/fs/bcachefs/disk_groups.c @@ -470,23 +470,22 @@ void bch2_disk_path_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned int __bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name) { - struct bch_member *mi; - int ret, v = -1; + lockdep_assert_held(&c->sb_lock); - if (!strlen(name) || !strcmp(name, "none")) - return 0; - v = bch2_disk_path_find_or_create(&c->disk_sb, name); - if (v < 0) - return v; + if (!strlen(name) || !strcmp(name, "none")) { + struct bch_member *mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + SET_BCH_MEMBER_GROUP(mi, 0); + } else { + int v = bch2_disk_path_find_or_create(&c->disk_sb, name); + if (v < 0) + return v; - ret = bch2_sb_disk_groups_to_cpu(c); - if (ret) - return ret; + struct bch_member *mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + SET_BCH_MEMBER_GROUP(mi, v + 1); + } - mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); - SET_BCH_MEMBER_GROUP(mi, v + 1); - return 0; + return bch2_sb_disk_groups_to_cpu(c); } int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name) From 5a295bad38b1057dd13811242ac981bb674ab190 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Thu, 17 Apr 2025 17:07:17 -0700 Subject: [PATCH 1406/2924] drm/xe/eustall: Resolve a possible circular locking dependency Use a separate lock in the polling function eu_stall_data_buf_poll() instead of eu_stall->stream_lock. This would prevent a possible circular locking dependency leading to a deadlock as described below. This would also require additional locking with the new lock in the read function. <4> [787.192986] ====================================================== <4> [787.192988] WARNING: possible circular locking dependency detected <4> [787.192991] 6.14.0-rc7-xe+ #1 Tainted: G U <4> [787.192993] ------------------------------------------------------ <4> [787.192994] xe_eu_stall/20093 is trying to acquire lock: <4> [787.192996] ffff88819847e2c0 ((work_completion) (&(&stream->buf_poll_work)->work)), at: __flush_work+0x1f8/0x5e0 <4> [787.193005] but task is already holding lock: <4> [787.193007] ffff88814ce83ba8 (>->eu_stall->stream_lock){3:3}, at: xe_eu_stall_stream_ioctl+0x41/0x6a0 [xe] <4> [787.193090] which lock already depends on the new lock. <4> [787.193093] the existing dependency chain (in reverse order) is: <4> [787.193095] -> #1 (>->eu_stall->stream_lock){+.+.}-{3:3}: <4> [787.193099] __mutex_lock+0xb4/0xe40 <4> [787.193104] mutex_lock_nested+0x1b/0x30 <4> [787.193106] eu_stall_data_buf_poll_work_fn+0x44/0x1d0 [xe] <4> [787.193155] process_one_work+0x21c/0x740 <4> [787.193159] worker_thread+0x1db/0x3c0 <4> [787.193161] kthread+0x10d/0x270 <4> [787.193164] ret_from_fork+0x44/0x70 <4> [787.193168] ret_from_fork_asm+0x1a/0x30 <4> [787.193172] -> #0 ((work_completion)(&(&stream->buf_poll_work)->work)){+.+.}-{0:0}: <4> [787.193176] __lock_acquire+0x1637/0x2810 <4> [787.193180] lock_acquire+0xc9/0x300 <4> [787.193183] __flush_work+0x219/0x5e0 <4> [787.193186] cancel_delayed_work_sync+0x87/0x90 <4> [787.193189] xe_eu_stall_disable_locked+0x9a/0x260 [xe] <4> [787.193237] xe_eu_stall_stream_ioctl+0x5b/0x6a0 [xe] <4> [787.193285] __x64_sys_ioctl+0xa4/0xe0 <4> [787.193289] x64_sys_call+0x131e/0x2650 <4> [787.193292] do_syscall_64+0x91/0x180 <4> [787.193295] entry_SYSCALL_64_after_hwframe+0x76/0x7e <4> [787.193299] other info that might help us debug this: <4> [787.193302] Possible unsafe locking scenario: <4> [787.193304] CPU0 CPU1 <4> [787.193305] ---- ---- <4> [787.193306] lock(>->eu_stall->stream_lock); <4> [787.193308] lock((work_completion) (&(&stream->buf_poll_work)->work)); <4> [787.193311] lock(>->eu_stall->stream_lock); <4> [787.193313] lock((work_completion) (&(&stream->buf_poll_work)->work)); <4> [787.193315] *** DEADLOCK *** Fixes: 760edec939685 ("drm/xe/eustall: Add support to read() and poll() EU stall data") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4598 Signed-off-by: Harish Chegondi Reviewed-by: Ashutosh Dixit Signed-off-by: Ashutosh Dixit Link: https://lore.kernel.org/r/c896932fca84f79db2df5942911997ed77b2b9b6.1744934656.git.harish.chegondi@intel.com (cherry picked from commit c2b1f1b8641372bb2e563c49eb25632623a860fc) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_eu_stall.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index f2bb9168967c2b..78f28f3c5e5c55 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -52,6 +52,8 @@ struct xe_eu_stall_data_stream { struct xe_gt *gt; struct xe_bo *bo; + /* Lock to protect data buffer pointers */ + struct mutex xecore_buf_lock; struct per_xecore_buf *xecore_buf; struct { bool reported_to_user; @@ -378,7 +380,7 @@ static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream) u16 group, instance; unsigned int xecore; - mutex_lock(>->eu_stall->stream_lock); + mutex_lock(&stream->xecore_buf_lock); for_each_dss_steering(xecore, gt, group, instance) { xecore_buf = &stream->xecore_buf[xecore]; read_ptr = xecore_buf->read; @@ -396,7 +398,7 @@ static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream) set_bit(xecore, stream->data_drop.mask); xecore_buf->write = write_ptr; } - mutex_unlock(>->eu_stall->stream_lock); + mutex_unlock(&stream->xecore_buf_lock); return min_data_present; } @@ -511,11 +513,13 @@ static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *st unsigned int xecore; int ret = 0; + mutex_lock(&stream->xecore_buf_lock); if (bitmap_weight(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS)) { if (!stream->data_drop.reported_to_user) { stream->data_drop.reported_to_user = true; xe_gt_dbg(gt, "EU stall data dropped in XeCores: %*pb\n", XE_MAX_DSS_FUSE_BITS, stream->data_drop.mask); + mutex_unlock(&stream->xecore_buf_lock); return -EIO; } stream->data_drop.reported_to_user = false; @@ -527,6 +531,7 @@ static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *st if (ret || count == total_size) break; } + mutex_unlock(&stream->xecore_buf_lock); return total_size ?: (ret ?: -EAGAIN); } @@ -583,6 +588,7 @@ static void xe_eu_stall_stream_free(struct xe_eu_stall_data_stream *stream) { struct xe_gt *gt = stream->gt; + mutex_destroy(&stream->xecore_buf_lock); gt->eu_stall->stream = NULL; kfree(stream); } @@ -718,6 +724,7 @@ static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream, } init_waitqueue_head(&stream->poll_wq); + mutex_init(&stream->xecore_buf_lock); INIT_DELAYED_WORK(&stream->buf_poll_work, eu_stall_data_buf_poll_work_fn); stream->per_xecore_buf_size = per_xecore_buf_size; stream->sampling_rate_mult = props->sampling_rate_mult; From 1d622a4fe2b9a30cd4af2e858d793d05f8a82774 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Sun, 20 Apr 2025 22:59:01 -0700 Subject: [PATCH 1407/2924] drm/xe/eustall: Do not support EU stall on SRIOV VF EU stall sampling is not supported on SRIOV VF. Do not initialize or open EU stall stream on SRIOV VF. Fixes: 9a0b11d4cf3b ("drm/xe/eustall: Add support to init, enable and disable EU stall sampling") Signed-off-by: Harish Chegondi Reviewed-by: Ashutosh Dixit Signed-off-by: Ashutosh Dixit Link: https://lore.kernel.org/r/10db5d1c7e17aadca7078ff74575b7ffc0d5d6b8.1745215022.git.harish.chegondi@intel.com (cherry picked from commit 6ed20625a4b8189a1bd6598aa58e03147ce378ee) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_eu_stall.c | 3 +++ drivers/gpu/drm/xe/xe_eu_stall.h | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index 78f28f3c5e5c55..e2bb156c71fb08 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -210,6 +210,9 @@ int xe_eu_stall_init(struct xe_gt *gt) struct xe_device *xe = gt_to_xe(gt); int ret; + if (!xe_eu_stall_supported_on_platform(xe)) + return 0; + gt->eu_stall = kzalloc(sizeof(*gt->eu_stall), GFP_KERNEL); if (!gt->eu_stall) { ret = -ENOMEM; diff --git a/drivers/gpu/drm/xe/xe_eu_stall.h b/drivers/gpu/drm/xe/xe_eu_stall.h index ed9d0f2335664d..d1c76e50379929 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.h +++ b/drivers/gpu/drm/xe/xe_eu_stall.h @@ -7,6 +7,7 @@ #define __XE_EU_STALL_H__ #include "xe_gt_types.h" +#include "xe_sriov.h" size_t xe_eu_stall_get_per_xecore_buf_size(void); size_t xe_eu_stall_data_record_size(struct xe_device *xe); @@ -19,6 +20,6 @@ int xe_eu_stall_stream_open(struct drm_device *dev, static inline bool xe_eu_stall_supported_on_platform(struct xe_device *xe) { - return xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20; + return !IS_SRIOV_VF(xe) && (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20); } #endif From fee4d171451c1ad9e8aaf65fc0ab7d143a33bd72 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 1 May 2025 11:47:47 +0100 Subject: [PATCH 1408/2924] arm64: errata: Add missing sentinels to Spectre-BHB MIDR arrays MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit a5951389e58d ("arm64: errata: Add newer ARM cores to the spectre_bhb_loop_affected() lists") added some additional CPUs to the Spectre-BHB workaround, including some new arrays for designs that require new 'k' values for the workaround to be effective. Unfortunately, the new arrays omitted the sentinel entry and so is_midr_in_range_list() will walk off the end when it doesn't find a match. With UBSAN enabled, this leads to a crash during boot when is_midr_in_range_list() is inlined (which was more common prior to c8c2647e69be ("arm64: Make  _midr_in_range_list() an exported function")): | Internal error: aarch64 BRK: 00000000f2000001 [#1] PREEMPT SMP | pstate: 804000c5 (Nzcv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : spectre_bhb_loop_affected+0x28/0x30 | lr : is_spectre_bhb_affected+0x170/0x190 | [...] | Call trace: | spectre_bhb_loop_affected+0x28/0x30 | update_cpu_capabilities+0xc0/0x184 | init_cpu_features+0x188/0x1a4 | cpuinfo_store_boot_cpu+0x4c/0x60 | smp_prepare_boot_cpu+0x38/0x54 | start_kernel+0x8c/0x478 | __primary_switched+0xc8/0xd4 | Code: 6b09011f 54000061 52801080 d65f03c0 (d4200020) | ---[ end trace 0000000000000000 ]--- | Kernel panic - not syncing: aarch64 BRK: Fatal exception Add the missing sentinel entries. Cc: Lee Jones Cc: James Morse Cc: Doug Anderson Cc: Shameer Kolothum Cc: Reported-by: Greg Kroah-Hartman Fixes: a5951389e58d ("arm64: errata: Add newer ARM cores to the spectre_bhb_loop_affected() lists") Signed-off-by: Will Deacon Reviewed-by: Lee Jones Reviewed-by: Douglas Anderson Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20250501104747.28431-1-will@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/proton-pack.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index b198dde79e591d..b607f6dfc5e6f6 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -879,10 +879,12 @@ static u8 spectre_bhb_loop_affected(void) static const struct midr_range spectre_bhb_k132_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), + {}, }; static const struct midr_range spectre_bhb_k38_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A715), MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), + {}, }; static const struct midr_range spectre_bhb_k32_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), From 28580052e634fe8fa327e6f25c35590374be754b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 1 May 2025 12:38:00 -0400 Subject: [PATCH 1409/2924] bcachefs: add missing sched_annotate_sleep() 00594 ------------[ cut here ]------------ 00594 do not call blocking ops when !TASK_RUNNING; state=2 set at [<000000003e51ef4a>] prepare_to_wait_event+0x5c/0x1c0 00594 WARNING: CPU: 12 PID: 1117 at kernel/sched/core.c:8741 __might_sleep+0x74/0x88 00594 Modules linked in: 00594 CPU: 12 UID: 0 PID: 1117 Comm: umount Not tainted 6.15.0-rc4-ktest-g3a72e369412d #21845 PREEMPT 00594 Hardware name: linux,dummy-virt (DT) 00594 pstate: 60001005 (nZCv daif -PAN -UAO -TCO -DIT +SSBS BTYPE=--) 00594 pc : __might_sleep+0x74/0x88 00594 lr : __might_sleep+0x74/0x88 00594 sp : ffffff80c8d67a90 00594 x29: ffffff80c8d67a90 x28: ffffff80f5903500 x27: 0000000000000000 00594 x26: 0000000000000000 x25: ffffff80cf5002a0 x24: ffffffc087dad000 00594 x23: ffffff80c8d67b40 x22: 0000000000000000 x21: 0000000000000000 00594 x20: 0000000000000242 x19: ffffffc080b92020 x18: 00000000ffffffff 00594 x17: 30303c5b20746120 x16: 74657320323d6574 x15: 617473203b474e49 00594 x14: 0000000000000001 x13: 00000000000c0000 x12: ffffff80facc0000 00594 x11: 0000000000000001 x10: 0000000000000001 x9 : ffffffc0800b0774 00594 x8 : c0000000fffbffff x7 : ffffffc087dac670 x6 : 00000000015fffa8 00594 x5 : ffffff80facbffa8 x4 : ffffff80fbd30b90 x3 : 0000000000000000 00594 x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffffff80f5903500 00594 Call trace: 00594 __might_sleep+0x74/0x88 (P) 00594 __mutex_lock+0x64/0x8d8 00594 mutex_lock_nested+0x28/0x38 00594 bch2_fs_ec_flush+0xf8/0x128 00594 __bch2_fs_read_only+0x54/0x1d8 00594 bch2_fs_read_only+0x3e0/0x438 00594 __bch2_fs_stop+0x5c/0x250 00594 bch2_put_super+0x18/0x28 00594 generic_shutdown_super+0x6c/0x140 00594 bch2_kill_sb+0x1c/0x38 00594 deactivate_locked_super+0x54/0xd0 00594 deactivate_super+0x70/0x90 00594 cleanup_mnt+0xec/0x188 00594 __cleanup_mnt+0x18/0x28 00594 task_work_run+0x90/0xd8 00594 do_notify_resume+0x138/0x148 00594 el0_svc+0x9c/0xa0 00594 el0t_64_sync_handler+0x104/0x130 00594 el0t_64_sync+0x154/0x158 Signed-off-by: Kent Overstreet --- fs/bcachefs/ec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index a396865e8b176b..fff58b78327c1a 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -2204,10 +2204,10 @@ void bch2_fs_ec_stop(struct bch_fs *c) static bool bch2_fs_ec_flush_done(struct bch_fs *c) { - bool ret; + sched_annotate_sleep(); mutex_lock(&c->ec_stripe_new_lock); - ret = list_empty(&c->ec_stripe_new_list); + bool ret = list_empty(&c->ec_stripe_new_list); mutex_unlock(&c->ec_stripe_new_lock); return ret; From f5178c41bb43444a6008150fe6094497135d07cb Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Tue, 22 Apr 2025 20:30:25 +0900 Subject: [PATCH 1410/2924] tracing: Fix oob write in trace_seq_to_buffer() syzbot reported this bug: ================================================================== BUG: KASAN: slab-out-of-bounds in trace_seq_to_buffer kernel/trace/trace.c:1830 [inline] BUG: KASAN: slab-out-of-bounds in tracing_splice_read_pipe+0x6be/0xdd0 kernel/trace/trace.c:6822 Write of size 4507 at addr ffff888032b6b000 by task syz.2.320/7260 CPU: 1 UID: 0 PID: 7260 Comm: syz.2.320 Not tainted 6.15.0-rc1-syzkaller-00301-g3bde70a2c827 #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:408 [inline] print_report+0xc3/0x670 mm/kasan/report.c:521 kasan_report+0xe0/0x110 mm/kasan/report.c:634 check_region_inline mm/kasan/generic.c:183 [inline] kasan_check_range+0xef/0x1a0 mm/kasan/generic.c:189 __asan_memcpy+0x3c/0x60 mm/kasan/shadow.c:106 trace_seq_to_buffer kernel/trace/trace.c:1830 [inline] tracing_splice_read_pipe+0x6be/0xdd0 kernel/trace/trace.c:6822 .... ================================================================== It has been reported that trace_seq_to_buffer() tries to copy more data than PAGE_SIZE to buf. Therefore, to prevent this, we should use the smaller of trace_seq_used(&iter->seq) and PAGE_SIZE as an argument. Link: https://lore.kernel.org/20250422113026.13308-1-aha310510@gmail.com Reported-by: syzbot+c8cd2d2c412b868263fb@syzkaller.appspotmail.com Fixes: 3c56819b14b0 ("tracing: splice support for tracing_pipe") Suggested-by: Steven Rostedt Signed-off-by: Jeongjun Park Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8ddf6b17215caa..6d52dc108f0070 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6821,13 +6821,14 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, /* Copy the data into the page, so we can start over. */ ret = trace_seq_to_buffer(&iter->seq, page_address(spd.pages[i]), - trace_seq_used(&iter->seq)); + min((size_t)trace_seq_used(&iter->seq), + PAGE_SIZE)); if (ret < 0) { __free_page(spd.pages[i]); break; } spd.partial[i].offset = 0; - spd.partial[i].len = trace_seq_used(&iter->seq); + spd.partial[i].len = ret; trace_seq_init(&iter->seq); } From 3c1d9cfa8458a4d6b6cd9bc3ca7bb1591130a31c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 22 Apr 2025 23:13:35 +0100 Subject: [PATCH 1411/2924] ftrace: Fix NULL memory allocation check The check for a failed memory location is incorrectly checking the wrong level of pointer indirection by checking !filter_hash rather than !*filter_hash. Fix this. Cc: asami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250422221335.89896-1-colin.i.king@gmail.com Fixes: 0ae6b8ce200d ("ftrace: Fix accounting of subop hashes") Signed-off-by: Colin Ian King Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 61130bb34d6c34..6981830c312859 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3436,7 +3436,7 @@ static int add_next_hash(struct ftrace_hash **filter_hash, struct ftrace_hash ** /* Copy the subops hash */ *filter_hash = alloc_and_copy_ftrace_hash(size_bits, subops_hash->filter_hash); - if (!filter_hash) + if (!*filter_hash) return -ENOMEM; /* Remove any notrace functions from the copy */ remove_hash(*filter_hash, subops_hash->notrace_hash); From 1be8e54a1e0f0a4bf70e3d65f94ca1738ee4f1f3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 1 May 2025 15:19:09 -0400 Subject: [PATCH 1412/2924] tracing: Fix trace_adjust_address() when there is no modules in scratch area The function trace_adjust_address() is used to map addresses of modules stored in the persistent memory and are also loaded in the current boot to return the current address for the module. If there's only one module entry, it will simply use that, otherwise it performs a bsearch of the entry array to find the modules to offset with. The issue is if there are no modules in the array. The code does not account for that and ends up referencing the first element in the array which does not exist and causes a crash. If nr_entries is zero, exit out early as if this was a core kernel address. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250501151909.65910359@gandalf.local.home Fixes: 35a380ddbc653 ("tracing: Show last module text symbols in the stacktrace") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6d52dc108f0070..5b8db27fb6ef37 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6043,8 +6043,10 @@ unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr) tscratch = tr->scratch; /* if there is no tscrach, module_delta must be NULL. */ module_delta = READ_ONCE(tr->module_delta); - if (!module_delta || tscratch->entries[0].mod_addr > addr) + if (!module_delta || !tscratch->nr_entries || + tscratch->entries[0].mod_addr > addr) { return addr + tr->text_delta; + } /* Note that entries must be sorted. */ nr_entries = tscratch->nr_entries; From 4426e6b4ecf632bb75d973051e1179b8bfac2320 Mon Sep 17 00:00:00 2001 From: Aaron Kling Date: Wed, 23 Apr 2025 21:03:03 -0500 Subject: [PATCH 1413/2924] spi: tegra114: Don't fail set_cs_timing when delays are zero The original code would skip null delay pointers, but when the pointers were converted to point within the spi_device struct, the check was not updated to skip delays of zero. Hence all spi devices that didn't set delays would fail to probe. Fixes: 04e6bb0d6bb1 ("spi: modify set_cs_timing parameter") Cc: stable@vger.kernel.org Signed-off-by: Aaron Kling Link: https://patch.msgid.link/20250423-spi-tegra114-v1-1-2d608bcc12f9@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-tegra114.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c index 3822d7c8d8edb9..2a8bb798e95b95 100644 --- a/drivers/spi/spi-tegra114.c +++ b/drivers/spi/spi-tegra114.c @@ -728,9 +728,9 @@ static int tegra_spi_set_hw_cs_timing(struct spi_device *spi) u32 inactive_cycles; u8 cs_state; - if (setup->unit != SPI_DELAY_UNIT_SCK || - hold->unit != SPI_DELAY_UNIT_SCK || - inactive->unit != SPI_DELAY_UNIT_SCK) { + if ((setup->unit && setup->unit != SPI_DELAY_UNIT_SCK) || + (hold->unit && hold->unit != SPI_DELAY_UNIT_SCK) || + (inactive->unit && inactive->unit != SPI_DELAY_UNIT_SCK)) { dev_err(&spi->dev, "Invalid delay unit %d, should be SPI_DELAY_UNIT_SCK\n", SPI_DELAY_UNIT_SCK); From 6846100b00d97d3d6f05766ae86a0d821d849e78 Mon Sep 17 00:00:00 2001 From: Alan Huang Date: Fri, 2 May 2025 04:01:31 +0800 Subject: [PATCH 1414/2924] bcachefs: Remove incorrect __counted_by annotation This actually reverts 86e92eeeb237 ("bcachefs: Annotate struct bch_xattr with __counted_by()"). After the x_name, there is a value. According to the disscussion[1], __counted_by assumes that the flexible array member contains exactly the amount of elements that are specified. Now there are users came across a false positive detection of an out of bounds write caused by the __counted_by here[2], so revert that. [1] https://lore.kernel.org/lkml/Zv8VDKWN1GzLRT-_@archlinux/T/#m0ce9541c5070146320efd4f928cc1ff8de69e9b2 [2] https://privatebin.net/?a0d4e97d590d71e1#9bLmp2Kb5NU6X6cZEucchDcu88HzUQwHUah8okKPReEt Signed-off-by: Alan Huang Signed-off-by: Kent Overstreet --- fs/bcachefs/xattr_format.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/xattr_format.h b/fs/bcachefs/xattr_format.h index c7916011ef34d3..67426e33d04e56 100644 --- a/fs/bcachefs/xattr_format.h +++ b/fs/bcachefs/xattr_format.h @@ -13,7 +13,13 @@ struct bch_xattr { __u8 x_type; __u8 x_name_len; __le16 x_val_len; - __u8 x_name[] __counted_by(x_name_len); + /* + * x_name contains the name and value counted by + * x_name_len + x_val_len. The introduction of + * __counted_by(x_name_len) caused a false positive + * detection of an out of bounds write. + */ + __u8 x_name[]; } __packed __aligned(8); #endif /* _BCACHEFS_XATTR_FORMAT_H */ From 53e3e5babc0963a92d856a5ec0ce92c59f54bc12 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 30 Apr 2025 11:18:28 +0900 Subject: [PATCH 1415/2924] ksmbd: prevent rename with empty string Client can send empty newname string to ksmbd server. It will cause a kernel oops from d_alloc. This patch return the error when attempting to rename a file or directory with an empty new name string. Cc: stable@vger.kernel.org Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 46aa082457424b..f2a2be8467c669 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -633,6 +633,11 @@ smb2_get_name(const char *src, const int maxlen, struct nls_table *local_nls) return name; } + if (*name == '\0') { + kfree(name); + return ERR_PTR(-EINVAL); + } + if (*name == '\\') { pr_err("not allow directory name included leading slash\n"); kfree(name); From eb4447bcce915b43b691123118893fca4f372a8f Mon Sep 17 00:00:00 2001 From: Wang Zhaolong Date: Wed, 30 Apr 2025 11:16:23 +0800 Subject: [PATCH 1416/2924] ksmbd: fix memory leak in parse_lease_state() The previous patch that added bounds check for create lease context introduced a memory leak. When the bounds check fails, the function returns NULL without freeing the previously allocated lease_ctx_info structure. This patch fixes the issue by adding kfree(lreq) before returning NULL in both boundary check cases. Fixes: bab703ed8472 ("ksmbd: add bounds check for create lease context") Signed-off-by: Wang Zhaolong Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/oplock.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 81a29857b1e32f..03f606afad93a0 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -1496,7 +1496,7 @@ struct lease_ctx_info *parse_lease_state(void *open_req) if (le16_to_cpu(cc->DataOffset) + le32_to_cpu(cc->DataLength) < sizeof(struct create_lease_v2) - 4) - return NULL; + goto err_out; memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); lreq->req_state = lc->lcontext.LeaseState; @@ -1512,7 +1512,7 @@ struct lease_ctx_info *parse_lease_state(void *open_req) if (le16_to_cpu(cc->DataOffset) + le32_to_cpu(cc->DataLength) < sizeof(struct create_lease)) - return NULL; + goto err_out; memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); lreq->req_state = lc->lcontext.LeaseState; @@ -1521,6 +1521,9 @@ struct lease_ctx_info *parse_lease_state(void *open_req) lreq->version = 1; } return lreq; +err_out: + kfree(lreq); + return NULL; } /** From 11854fe263eb1b9a8efa33b0c087add7719ea9b4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 25 Apr 2025 15:45:06 -0700 Subject: [PATCH 1417/2924] binfmt_elf: Move brk for static PIE even if ASLR disabled In commit bbdc6076d2e5 ("binfmt_elf: move brk out of mmap when doing direct loader exec"), the brk was moved out of the mmap region when loading static PIE binaries (ET_DYN without INTERP). The common case for these binaries was testing new ELF loaders, so the brk needed to be away from mmap to avoid colliding with stack, future mmaps (of the loader-loaded binary), etc. But this was only done when ASLR was enabled, in an attempt to minimize changes to memory layouts. After adding support to respect alignment requirements for static PIE binaries in commit 3545deff0ec7 ("binfmt_elf: Honor PT_LOAD alignment for static PIE"), it became possible to have a large gap after the final PT_LOAD segment and the top of the mmap region. This means that future mmap allocations might go after the last PT_LOAD segment (where brk might be if ASLR was disabled) instead of before them (where they traditionally ended up). On arm64, running with ASLR disabled, Ubuntu 22.04's "ldconfig" binary, a static PIE, has alignment requirements that leaves a gap large enough after the last PT_LOAD segment to fit the vdso and vvar, but still leave enough space for the brk (which immediately follows the last PT_LOAD segment) to be allocated by the binary. fffff7f20000-fffff7fde000 r-xp 00000000 fe:02 8110426 /sbin/ldconfig.real fffff7fee000-fffff7ff5000 rw-p 000be000 fe:02 8110426 /sbin/ldconfig.real fffff7ff5000-fffff7ffa000 rw-p 00000000 00:00 0 ***[brk will go here at fffff7ffa000]*** fffff7ffc000-fffff7ffe000 r--p 00000000 00:00 0 [vvar] fffff7ffe000-fffff8000000 r-xp 00000000 00:00 0 [vdso] fffffffdf000-1000000000000 rw-p 00000000 00:00 0 [stack] After commit 0b3bc3354eb9 ("arm64: vdso: Switch to generic storage implementation"), the arm64 vvar grew slightly, and suddenly the brk collided with the allocation. fffff7f20000-fffff7fde000 r-xp 00000000 fe:02 8110426 /sbin/ldconfig.real fffff7fee000-fffff7ff5000 rw-p 000be000 fe:02 8110426 /sbin/ldconfig.real fffff7ff5000-fffff7ffa000 rw-p 00000000 00:00 0 ***[oops, no room any more, vvar is at fffff7ffa000!]*** fffff7ffa000-fffff7ffe000 r--p 00000000 00:00 0 [vvar] fffff7ffe000-fffff8000000 r-xp 00000000 00:00 0 [vdso] fffffffdf000-1000000000000 rw-p 00000000 00:00 0 [stack] The solution is to unconditionally move the brk out of the mmap region for static PIE binaries. Whether ASLR is enabled or not does not change if there may be future mmap allocation collisions with a growing brk region. Update memory layout comments (with kernel-doc headings), consolidate the setting of mm->brk to later (it isn't needed early), move static PIE brk out of mmap unconditionally, and make sure brk(2) knows to base brk position off of mm->start_brk not mm->end_data no matter what the cause of moving it is (via current->brk_randomized). For the CONFIG_COMPAT_BRK case, though, leave the logic unchanged, as we can never safely move the brk. These systems, however, are not using specially aligned static PIE binaries. Reported-by: Ryan Roberts Closes: https://lore.kernel.org/lkml/f93db308-4a0e-4806-9faf-98f890f5a5e6@arm.com/ Fixes: bbdc6076d2e5 ("binfmt_elf: move brk out of mmap when doing direct loader exec") Link: https://lore.kernel.org/r/20250425224502.work.520-kees@kernel.org Reviewed-by: Ryan Roberts Tested-by: Ryan Roberts Signed-off-by: Kees Cook --- fs/binfmt_elf.c | 71 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 47 insertions(+), 24 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 584fa89bc8776e..4c1ea6b52a53de 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -830,6 +830,7 @@ static int load_elf_binary(struct linux_binprm *bprm) struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL; struct elf_phdr *elf_property_phdata = NULL; unsigned long elf_brk; + bool brk_moved = false; int retval, i; unsigned long elf_entry; unsigned long e_entry; @@ -1097,15 +1098,19 @@ static int load_elf_binary(struct linux_binprm *bprm) /* Calculate any requested alignment. */ alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum); - /* - * There are effectively two types of ET_DYN - * binaries: programs (i.e. PIE: ET_DYN with PT_INTERP) - * and loaders (ET_DYN without PT_INTERP, since they - * _are_ the ELF interpreter). The loaders must - * be loaded away from programs since the program - * may otherwise collide with the loader (especially - * for ET_EXEC which does not have a randomized - * position). For example to handle invocations of + /** + * DOC: PIE handling + * + * There are effectively two types of ET_DYN ELF + * binaries: programs (i.e. PIE: ET_DYN with + * PT_INTERP) and loaders (i.e. static PIE: ET_DYN + * without PT_INTERP, usually the ELF interpreter + * itself). Loaders must be loaded away from programs + * since the program may otherwise collide with the + * loader (especially for ET_EXEC which does not have + * a randomized position). + * + * For example, to handle invocations of * "./ld.so someprog" to test out a new version of * the loader, the subsequent program that the * loader loads must avoid the loader itself, so @@ -1118,6 +1123,9 @@ static int load_elf_binary(struct linux_binprm *bprm) * ELF_ET_DYN_BASE and loaders are loaded into the * independently randomized mmap region (0 load_bias * without MAP_FIXED nor MAP_FIXED_NOREPLACE). + * + * See below for "brk" handling details, which is + * also affected by program vs loader and ASLR. */ if (interpreter) { /* On ET_DYN with PT_INTERP, we do the ASLR. */ @@ -1234,8 +1242,6 @@ static int load_elf_binary(struct linux_binprm *bprm) start_data += load_bias; end_data += load_bias; - current->mm->start_brk = current->mm->brk = ELF_PAGEALIGN(elf_brk); - if (interpreter) { elf_entry = load_elf_interp(interp_elf_ex, interpreter, @@ -1291,27 +1297,44 @@ static int load_elf_binary(struct linux_binprm *bprm) mm->end_data = end_data; mm->start_stack = bprm->p; - if ((current->flags & PF_RANDOMIZE) && (snapshot_randomize_va_space > 1)) { + /** + * DOC: "brk" handling + * + * For architectures with ELF randomization, when executing a + * loader directly (i.e. static PIE: ET_DYN without PT_INTERP), + * move the brk area out of the mmap region and into the unused + * ELF_ET_DYN_BASE region. Since "brk" grows up it may collide + * early with the stack growing down or other regions being put + * into the mmap region by the kernel (e.g. vdso). + * + * In the CONFIG_COMPAT_BRK case, though, everything is turned + * off because we're not allowed to move the brk at all. + */ + if (!IS_ENABLED(CONFIG_COMPAT_BRK) && + IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) && + elf_ex->e_type == ET_DYN && !interpreter) { + elf_brk = ELF_ET_DYN_BASE; + /* This counts as moving the brk, so let brk(2) know. */ + brk_moved = true; + } + mm->start_brk = mm->brk = ELF_PAGEALIGN(elf_brk); + + if ((current->flags & PF_RANDOMIZE) && snapshot_randomize_va_space > 1) { /* - * For architectures with ELF randomization, when executing - * a loader directly (i.e. no interpreter listed in ELF - * headers), move the brk area out of the mmap region - * (since it grows up, and may collide early with the stack - * growing down), and into the unused ELF_ET_DYN_BASE region. + * If we didn't move the brk to ELF_ET_DYN_BASE (above), + * leave a gap between .bss and brk. */ - if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) && - elf_ex->e_type == ET_DYN && !interpreter) { - mm->brk = mm->start_brk = ELF_ET_DYN_BASE; - } else { - /* Otherwise leave a gap between .bss and brk. */ + if (!brk_moved) mm->brk = mm->start_brk = mm->brk + PAGE_SIZE; - } mm->brk = mm->start_brk = arch_randomize_brk(mm); + brk_moved = true; + } + #ifdef compat_brk_randomized + if (brk_moved) current->brk_randomized = 1; #endif - } if (current->personality & MMAP_PAGE_ZERO) { /* Why this, you ask??? Well SVr4 maps page 0 as read-only, From 0a8f11f8569e7ed16cbcedeb28c4350f6378fea6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 1 May 2025 22:41:28 -0400 Subject: [PATCH 1418/2924] tracing: Do not take trace_event_sem in print_event_fields() On some paths in print_event_fields() it takes the trace_event_sem for read, even though it should always be held when the function is called. Remove the taking of that mutex and add a lockdep_assert_held_read() to make sure the trace_event_sem is held when print_event_fields() is called. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250501224128.0b1f0571@batman.local.home Fixes: 80a76994b2d88 ("tracing: Add "fields" option to show raw trace event fields") Reported-by: syzbot+441582c1592938fccf09@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/6813ff5e.050a0220.14dd7d.001b.GAE@google.com/ Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index fee40ffbd49061..b9ab06c9954323 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -1042,11 +1042,12 @@ enum print_line_t print_event_fields(struct trace_iterator *iter, struct trace_event_call *call; struct list_head *head; + lockdep_assert_held_read(&trace_event_sem); + /* ftrace defined events have separate call structures */ if (event->type <= __TRACE_LAST_TYPE) { bool found = false; - down_read(&trace_event_sem); list_for_each_entry(call, &ftrace_events, list) { if (call->event.type == event->type) { found = true; @@ -1056,7 +1057,6 @@ enum print_line_t print_event_fields(struct trace_iterator *iter, if (call->event.type > __TRACE_LAST_TYPE) break; } - up_read(&trace_event_sem); if (!found) { trace_seq_printf(&iter->seq, "UNKNOWN TYPE %d\n", event->type); goto out; From 3393c90daf4e1704c6a5c3833439f461663a2e1d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 21 Apr 2025 08:15:38 -0700 Subject: [PATCH 1419/2924] drm/xe/hwmon: Fix kernel version documentation for temperature The version in the sysfs attribute should correspond to the version in which this is enabled and visible for end users. It usually doesn't correspond to the version in which the patch was developed, but rather a release that will contain it. Update them to 6.15. Fixes: dac328dea701 ("drm/xe/hwmon: expose package and vram temperature") Reported-by: Ulisses Furquim Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4840 Reviewed-by: Rodrigo Vivi Reviewed-by: Raag Jadav Link: https://lore.kernel.org/r/20250421-hwmon-doc-fix-v1-1-9f68db702249@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 8500393a8e6c58e5e7c135133ad792fc6fd5b6f4) Signed-off-by: Lucas De Marchi --- Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon index 9bce281314dfde..cb207c79680d9a 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon +++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon @@ -111,7 +111,7 @@ Description: RO. Package current voltage in millivolt. What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/temp2_input Date: March 2025 -KernelVersion: 6.14 +KernelVersion: 6.15 Contact: intel-xe@lists.freedesktop.org Description: RO. Package temperature in millidegree Celsius. @@ -119,7 +119,7 @@ Description: RO. Package temperature in millidegree Celsius. What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/temp3_input Date: March 2025 -KernelVersion: 6.14 +KernelVersion: 6.15 Contact: intel-xe@lists.freedesktop.org Description: RO. VRAM temperature in millidegree Celsius. From e8e3a804f3845a147fbdf73f910c12ddb3a2a86f Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Sun, 27 Apr 2025 19:47:52 -0700 Subject: [PATCH 1420/2924] drm/gpusvm: set has_dma_mapping inside mapping loop The 'has_dma_mapping' flag should be set once there is a mapping so it could be unmapped in case of error. v2: - Resend for CI Fixes: 99624bdff867 ("drm/gpusvm: Add support for GPU Shared Virtual Memory") Signed-off-by: Dafna Hirschfeld Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20250428024752.881292-1-matthew.brost@intel.com (cherry picked from commit f64cf7b681af72d3f715c0d0fd72091a54471c1a) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/drm_gpusvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index 38431e8360e783..de424e670995cf 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -1469,9 +1469,9 @@ int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, } i += 1 << order; num_dma_mapped = i; + range->flags.has_dma_mapping = true; } - range->flags.has_dma_mapping = true; if (zdd) { range->flags.has_devmem_pages = true; range->dpagemap = dpagemap; From cac01bd178d6a2a23727f138d647ce1a0e8a73a1 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Fri, 2 May 2025 09:09:39 +0200 Subject: [PATCH 1421/2924] usb: usbtmc: Fix erroneous get_stb ioctl error returns wait_event_interruptible_timeout returns a long The return was being assigned to an int causing an integer overflow when the remaining jiffies > INT_MAX resulting in random error returns. Use a long return value and convert to int ioctl return only on error. When the return value of wait_event_interruptible_timeout was <= INT_MAX the number of remaining jiffies was returned which has no meaning for the user. Return 0 on success. Reported-by: Michael Katzmann Fixes: dbf3e7f654c0 ("Implement an ioctl to support the USMTMC-USB488 READ_STATUS_BYTE operation.") Cc: stable@vger.kernel.org Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250502070941.31819-2-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usbtmc.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index 634c3bcbb413e4..e91f11c1ab5fde 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -482,6 +482,7 @@ static int usbtmc_get_stb(struct usbtmc_file_data *file_data, __u8 *stb) u8 *buffer; u8 tag; int rv; + long wait_rv; dev_dbg(dev, "Enter ioctl_read_stb iin_ep_present: %d\n", data->iin_ep_present); @@ -511,16 +512,17 @@ static int usbtmc_get_stb(struct usbtmc_file_data *file_data, __u8 *stb) } if (data->iin_ep_present) { - rv = wait_event_interruptible_timeout( + wait_rv = wait_event_interruptible_timeout( data->waitq, atomic_read(&data->iin_data_valid) != 0, file_data->timeout); - if (rv < 0) { - dev_dbg(dev, "wait interrupted %d\n", rv); + if (wait_rv < 0) { + dev_dbg(dev, "wait interrupted %ld\n", wait_rv); + rv = wait_rv; goto exit; } - if (rv == 0) { + if (wait_rv == 0) { dev_dbg(dev, "wait timed out\n"); rv = -ETIMEDOUT; goto exit; @@ -539,6 +541,8 @@ static int usbtmc_get_stb(struct usbtmc_file_data *file_data, __u8 *stb) dev_dbg(dev, "stb:0x%02x received %d\n", (unsigned int)*stb, rv); + rv = 0; + exit: /* bump interrupt bTag */ data->iin_bTag += 1; From a9747c9b8b59ab4207effd20eb91a890acb44e16 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Fri, 2 May 2025 09:09:40 +0200 Subject: [PATCH 1422/2924] usb: usbtmc: Fix erroneous wait_srq ioctl return wait_event_interruptible_timeout returns a long The return was being assigned to an int causing an integer overflow when the remaining jiffies > INT_MAX resulting in random error returns. Use a long return value, converting to the int ioctl return only on error. Fixes: 739240a9f6ac ("usb: usbtmc: Add ioctl USBTMC488_IOCTL_WAIT_SRQ") Cc: stable@vger.kernel.org Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250502070941.31819-3-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usbtmc.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index e91f11c1ab5fde..4761a5ecadd6f3 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -606,9 +606,9 @@ static int usbtmc488_ioctl_wait_srq(struct usbtmc_file_data *file_data, { struct usbtmc_device_data *data = file_data->data; struct device *dev = &data->intf->dev; - int rv; u32 timeout; unsigned long expire; + long wait_rv; if (!data->iin_ep_present) { dev_dbg(dev, "no interrupt endpoint present\n"); @@ -622,25 +622,24 @@ static int usbtmc488_ioctl_wait_srq(struct usbtmc_file_data *file_data, mutex_unlock(&data->io_mutex); - rv = wait_event_interruptible_timeout( - data->waitq, - atomic_read(&file_data->srq_asserted) != 0 || - atomic_read(&file_data->closing), - expire); + wait_rv = wait_event_interruptible_timeout( + data->waitq, + atomic_read(&file_data->srq_asserted) != 0 || + atomic_read(&file_data->closing), + expire); mutex_lock(&data->io_mutex); /* Note! disconnect or close could be called in the meantime */ if (atomic_read(&file_data->closing) || data->zombie) - rv = -ENODEV; + return -ENODEV; - if (rv < 0) { - /* dev can be invalid now! */ - pr_debug("%s - wait interrupted %d\n", __func__, rv); - return rv; + if (wait_rv < 0) { + dev_dbg(dev, "%s - wait interrupted %ld\n", __func__, wait_rv); + return wait_rv; } - if (rv == 0) { + if (wait_rv == 0) { dev_dbg(dev, "%s - wait timed out\n", __func__); return -ETIMEDOUT; } From 4e77d3ec7c7c0d9535ccf1138827cb9bb5480b9b Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Fri, 2 May 2025 09:09:41 +0200 Subject: [PATCH 1423/2924] usb: usbtmc: Fix erroneous generic_read ioctl return wait_event_interruptible_timeout returns a long The return value was being assigned to an int causing an integer overflow when the remaining jiffies > INT_MAX which resulted in random error returns. Use a long return value, converting to the int ioctl return only on error. Fixes: bb99794a4792 ("usb: usbtmc: Add ioctl for vendor specific read") Cc: stable@vger.kernel.org Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250502070941.31819-4-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usbtmc.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index 4761a5ecadd6f3..740d2d2b19fbe0 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -833,6 +833,7 @@ static ssize_t usbtmc_generic_read(struct usbtmc_file_data *file_data, unsigned long expire; int bufcount = 1; int again = 0; + long wait_rv; /* mutex already locked */ @@ -945,19 +946,24 @@ static ssize_t usbtmc_generic_read(struct usbtmc_file_data *file_data, if (!(flags & USBTMC_FLAG_ASYNC)) { dev_dbg(dev, "%s: before wait time %lu\n", __func__, expire); - retval = wait_event_interruptible_timeout( + wait_rv = wait_event_interruptible_timeout( file_data->wait_bulk_in, usbtmc_do_transfer(file_data), expire); - dev_dbg(dev, "%s: wait returned %d\n", - __func__, retval); + dev_dbg(dev, "%s: wait returned %ld\n", + __func__, wait_rv); + + if (wait_rv < 0) { + retval = wait_rv; + goto error; + } - if (retval <= 0) { - if (retval == 0) - retval = -ETIMEDOUT; + if (wait_rv == 0) { + retval = -ETIMEDOUT; goto error; } + } urb = usb_get_from_anchor(&file_data->in_anchor); From 6f9a8ab796c6528d22de3c504c81fce7dde63d8a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 25 Apr 2025 23:23:29 -0700 Subject: [PATCH 1424/2924] btrfs: compression: adjust cb->compressed_folios allocation type In preparation for making the kmalloc() family of allocators type aware, we need to make sure that the returned type from the allocation matches the type of the variable being assigned. (Before, the allocator would always return "void *", which can be implicitly cast to any pointer type.) The assigned type is "struct folio **" but the returned type will be "struct page **". These are the same allocation size (pointer size), but the types don't match. Adjust the allocation type to match the assignment. Reviewed-by: Qu Wenruo Signed-off-by: Kees Cook Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/compression.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index e7f8ee5d48a4f1..7f11ef559be6e7 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -606,7 +606,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio) free_extent_map(em); cb->nr_folios = DIV_ROUND_UP(compressed_len, PAGE_SIZE); - cb->compressed_folios = kcalloc(cb->nr_folios, sizeof(struct page *), GFP_NOFS); + cb->compressed_folios = kcalloc(cb->nr_folios, sizeof(struct folio *), GFP_NOFS); if (!cb->compressed_folios) { ret = BLK_STS_RESOURCE; goto out_free_bio; From bc7e0975093567f51be8e1bdf4aa5900a3cf0b1e Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Fri, 25 Apr 2025 09:25:06 -0400 Subject: [PATCH 1425/2924] btrfs: correct the order of prelim_ref arguments in btrfs__prelim_ref btrfs_prelim_ref() calls the old and new reference variables in the incorrect order. This causes a NULL pointer dereference because oldref is passed as NULL to trace_btrfs_prelim_ref_insert(). Note, trace_btrfs_prelim_ref_insert() is being called with newref as oldref (and oldref as NULL) on purpose in order to print out the values of newref. To reproduce: echo 1 > /sys/kernel/debug/tracing/events/btrfs/btrfs_prelim_ref_insert/enable Perform some writeback operations. Backtrace: BUG: kernel NULL pointer dereference, address: 0000000000000018 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 115949067 P4D 115949067 PUD 11594a067 PMD 0 Oops: Oops: 0000 [#1] SMP NOPTI CPU: 1 UID: 0 PID: 1188 Comm: fsstress Not tainted 6.15.0-rc2-tester+ #47 PREEMPT(voluntary) 7ca2cef72d5e9c600f0c7718adb6462de8149622 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-2-gc13ff2cd-prebuilt.qemu.org 04/01/2014 RIP: 0010:trace_event_raw_event_btrfs__prelim_ref+0x72/0x130 Code: e8 43 81 9f ff 48 85 c0 74 78 4d 85 e4 0f 84 8f 00 00 00 49 8b 94 24 c0 06 00 00 48 8b 0a 48 89 48 08 48 8b 52 08 48 89 50 10 <49> 8b 55 18 48 89 50 18 49 8b 55 20 48 89 50 20 41 0f b6 55 28 88 RSP: 0018:ffffce44820077a0 EFLAGS: 00010286 RAX: ffff8c6b403f9014 RBX: ffff8c6b55825730 RCX: 304994edf9cf506b RDX: d8b11eb7f0fdb699 RSI: ffff8c6b403f9010 RDI: ffff8c6b403f9010 RBP: 0000000000000001 R08: 0000000000000001 R09: 0000000000000010 R10: 00000000ffffffff R11: 0000000000000000 R12: ffff8c6b4e8fb000 R13: 0000000000000000 R14: ffffce44820077a8 R15: ffff8c6b4abd1540 FS: 00007f4dc6813740(0000) GS:ffff8c6c1d378000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000018 CR3: 000000010eb42000 CR4: 0000000000750ef0 PKRU: 55555554 Call Trace: prelim_ref_insert+0x1c1/0x270 find_parent_nodes+0x12a6/0x1ee0 ? __entry_text_end+0x101f06/0x101f09 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 btrfs_is_data_extent_shared+0x167/0x640 ? fiemap_process_hole+0xd0/0x2c0 extent_fiemap+0xa5c/0xbc0 ? __entry_text_end+0x101f05/0x101f09 btrfs_fiemap+0x7e/0xd0 do_vfs_ioctl+0x425/0x9d0 __x64_sys_ioctl+0x75/0xc0 Signed-off-by: Goldwyn Rodrigues Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 549ab3b4196180..3efc00cc1bcd29 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1928,7 +1928,7 @@ DECLARE_EVENT_CLASS(btrfs__prelim_ref, TP_PROTO(const struct btrfs_fs_info *fs_info, const struct prelim_ref *oldref, const struct prelim_ref *newref, u64 tree_size), - TP_ARGS(fs_info, newref, oldref, tree_size), + TP_ARGS(fs_info, oldref, newref, tree_size), TP_STRUCT__entry_btrfs( __field( u64, root_id ) From d6fe0c69b3aa5c985380b794bdf8e6e9b1811e60 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Fri, 25 Apr 2025 12:47:50 -0700 Subject: [PATCH 1426/2924] btrfs: handle empty eb->folios in num_extent_folios() num_extent_folios() unconditionally calls folio_order() on eb->folios[0]. If that is NULL this will be a segfault. It is reasonable for it to return 0 as the number of folios in the eb when the first entry is NULL, so do that instead. Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 2e261892c7bc79..f5b28b5c4908b2 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -298,6 +298,8 @@ static inline int __pure num_extent_pages(const struct extent_buffer *eb) */ static inline int __pure num_extent_folios(const struct extent_buffer *eb) { + if (!eb->folios[0]) + return 0; if (folio_order(eb->folios[0])) return 1; return num_extent_pages(eb); From f95d186255b319c48a365d47b69bd997fecb674e Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 29 Apr 2025 15:17:50 +0930 Subject: [PATCH 1427/2924] btrfs: avoid NULL pointer dereference if no valid csum tree [BUG] When trying read-only scrub on a btrfs with rescue=idatacsums mount option, it will crash with the following call trace: BUG: kernel NULL pointer dereference, address: 0000000000000208 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page CPU: 1 UID: 0 PID: 835 Comm: btrfs Tainted: G O 6.15.0-rc3-custom+ #236 PREEMPT(full) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS unknown 02/02/2022 RIP: 0010:btrfs_lookup_csums_bitmap+0x49/0x480 [btrfs] Call Trace: scrub_find_fill_first_stripe+0x35b/0x3d0 [btrfs] scrub_simple_mirror+0x175/0x290 [btrfs] scrub_stripe+0x5f7/0x6f0 [btrfs] scrub_chunk+0x9a/0x150 [btrfs] scrub_enumerate_chunks+0x333/0x660 [btrfs] btrfs_scrub_dev+0x23e/0x600 [btrfs] btrfs_ioctl+0x1dcf/0x2f80 [btrfs] __x64_sys_ioctl+0x97/0xc0 do_syscall_64+0x4f/0x120 entry_SYSCALL_64_after_hwframe+0x76/0x7e [CAUSE] Mount option "rescue=idatacsums" will completely skip loading the csum tree, so that any data read will not find any data csum thus we will ignore data checksum verification. Normally call sites utilizing csum tree will check the fs state flag NO_DATA_CSUMS bit, but unfortunately scrub does not check that bit at all. This results in scrub to call btrfs_search_slot() on a NULL pointer and triggered above crash. [FIX] Check both extent and csum tree root before doing any tree search. Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/scrub.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 2c5edcee94502b..c3b2e29e3e019d 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1541,8 +1541,8 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg, u64 extent_gen; int ret; - if (unlikely(!extent_root)) { - btrfs_err(fs_info, "no valid extent root for scrub"); + if (unlikely(!extent_root || !csum_root)) { + btrfs_err(fs_info, "no valid extent or csum root for scrub"); return -EUCLEAN; } memset(stripe->sectors, 0, sizeof(struct scrub_sector_verification) * From 8fb1dcbbcc1ffe6ed7cf3f0f96d2737491dd1fbf Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 17 Jan 2025 09:09:34 +1030 Subject: [PATCH 1428/2924] Revert "btrfs: canonicalize the device path before adding it" This reverts commit 7e06de7c83a746e58d4701e013182af133395188. Commit 7e06de7c83a7 ("btrfs: canonicalize the device path before adding it") tries to make btrfs to use "/dev/mapper/*" name first, then any filename inside "/dev/" as the device path. This is mostly fine when there is only the root namespace involved, but when multiple namespace are involved, things can easily go wrong for the d_path() usage. As d_path() returns a file path that is namespace dependent, the resulted string may not make any sense in another namespace. Furthermore, the "/dev/" prefix checks itself is not reliable, one can still make a valid initramfs without devtmpfs, and fill all needed device nodes manually. Overall the userspace has all its might to pass whatever device path for mount, and we are not going to win the war trying to cover every corner case. So just revert that commit, and do no extra d_path() based file path sanity check. CC: stable@vger.kernel.org # 6.12+ Link: https://lore.kernel.org/linux-fsdevel/20250115185608.GA2223535@zen.localdomain/ Reviewed-by: Boris Burkov Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 91 +--------------------------------------------- 1 file changed, 1 insertion(+), 90 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c8c21c55be53bd..8e6b6fed7429a9 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -733,82 +733,6 @@ const u8 *btrfs_sb_fsid_ptr(const struct btrfs_super_block *sb) return has_metadata_uuid ? sb->metadata_uuid : sb->fsid; } -/* - * We can have very weird soft links passed in. - * One example is "/proc/self/fd/", which can be a soft link to - * a block device. - * - * But it's never a good idea to use those weird names. - * Here we check if the path (not following symlinks) is a good one inside - * "/dev/". - */ -static bool is_good_dev_path(const char *dev_path) -{ - struct path path = { .mnt = NULL, .dentry = NULL }; - char *path_buf = NULL; - char *resolved_path; - bool is_good = false; - int ret; - - if (!dev_path) - goto out; - - path_buf = kmalloc(PATH_MAX, GFP_KERNEL); - if (!path_buf) - goto out; - - /* - * Do not follow soft link, just check if the original path is inside - * "/dev/". - */ - ret = kern_path(dev_path, 0, &path); - if (ret) - goto out; - resolved_path = d_path(&path, path_buf, PATH_MAX); - if (IS_ERR(resolved_path)) - goto out; - if (strncmp(resolved_path, "/dev/", strlen("/dev/"))) - goto out; - is_good = true; -out: - kfree(path_buf); - path_put(&path); - return is_good; -} - -static int get_canonical_dev_path(const char *dev_path, char *canonical) -{ - struct path path = { .mnt = NULL, .dentry = NULL }; - char *path_buf = NULL; - char *resolved_path; - int ret; - - if (!dev_path) { - ret = -EINVAL; - goto out; - } - - path_buf = kmalloc(PATH_MAX, GFP_KERNEL); - if (!path_buf) { - ret = -ENOMEM; - goto out; - } - - ret = kern_path(dev_path, LOOKUP_FOLLOW, &path); - if (ret) - goto out; - resolved_path = d_path(&path, path_buf, PATH_MAX); - if (IS_ERR(resolved_path)) { - ret = PTR_ERR(resolved_path); - goto out; - } - ret = strscpy(canonical, resolved_path, PATH_MAX); -out: - kfree(path_buf); - path_put(&path); - return ret; -} - static bool is_same_device(struct btrfs_device *device, const char *new_path) { struct path old = { .mnt = NULL, .dentry = NULL }; @@ -1513,23 +1437,12 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags, bool new_device_added = false; struct btrfs_device *device = NULL; struct file *bdev_file; - char *canonical_path = NULL; u64 bytenr; dev_t devt; int ret; lockdep_assert_held(&uuid_mutex); - if (!is_good_dev_path(path)) { - canonical_path = kmalloc(PATH_MAX, GFP_KERNEL); - if (canonical_path) { - ret = get_canonical_dev_path(path, canonical_path); - if (ret < 0) { - kfree(canonical_path); - canonical_path = NULL; - } - } - } /* * Avoid an exclusive open here, as the systemd-udev may initiate the * device scan which may race with the user's mount or mkfs command, @@ -1574,8 +1487,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags, goto free_disk_super; } - device = device_list_add(canonical_path ? : path, disk_super, - &new_device_added); + device = device_list_add(path, disk_super, &new_device_added); if (!IS_ERR(device) && new_device_added) btrfs_free_stale_devices(device->devt, device); @@ -1584,7 +1496,6 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags, error_bdev_put: fput(bdev_file); - kfree(canonical_path); return device; } From 38e541051e1d19e8b1479a6af587a7884653e041 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Thu, 1 May 2025 02:10:48 +0800 Subject: [PATCH 1429/2924] btrfs: open code folio_index() in btree_clear_folio_dirty_tag() The folio_index() helper is only needed for mixed usage of page cache and swap cache, for pure page cache usage, the caller can just use folio->index instead. It can't be a swap cache folio here. Swap mapping may only call into fs through 'swap_rw' but btrfs does not use that method for swap. Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Qu Wenruo Signed-off-by: Kairui Song Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8515c31f563b8f..13bdd60da3c738 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3508,8 +3508,8 @@ static void btree_clear_folio_dirty_tag(struct folio *folio) ASSERT(folio_test_locked(folio)); xa_lock_irq(&folio->mapping->i_pages); if (!folio_test_dirty(folio)) - __xa_clear_mark(&folio->mapping->i_pages, - folio_index(folio), PAGECACHE_TAG_DIRTY); + __xa_clear_mark(&folio->mapping->i_pages, folio->index, + PAGECACHE_TAG_DIRTY); xa_unlock_irq(&folio->mapping->i_pages); } From f31fe8165d365379d858c53bef43254c7d6d1cfd Mon Sep 17 00:00:00 2001 From: Naman Jain Date: Fri, 2 May 2025 13:18:10 +0530 Subject: [PATCH 1430/2924] uio_hv_generic: Fix sysfs creation path for ring buffer On regular bootup, devices get registered to VMBus first, so when uio_hv_generic driver for a particular device type is probed, the device is already initialized and added, so sysfs creation in hv_uio_probe() works fine. However, when the device is removed and brought back, the channel gets rescinded and the device again gets registered to VMBus. However this time, the uio_hv_generic driver is already registered to probe for that device and in this case sysfs creation is tried before the device's kobject gets initialized completely. Fix this by moving the core logic of sysfs creation of ring buffer, from uio_hv_generic to HyperV's VMBus driver, where the rest of the sysfs attributes for the channels are defined. While doing that, make use of attribute groups and macros, instead of creating sysfs directly, to ensure better error handling and code flow. Problematic path: vmbus_process_offer (A new offer comes for the VMBus device) vmbus_add_channel_work vmbus_device_register |-> device_register | |... | |-> hv_uio_probe | |... | |-> sysfs_create_bin_file (leads to a warning as | the primary channel's kobject, which is used to | create the sysfs file, is not yet initialized) |-> kset_create_and_add |-> vmbus_add_channel_kobj (initialization of the primary channel's kobject happens later) Above code flow is sequential and the warning is always reproducible in this path. Fixes: 9ab877a6ccf8 ("uio_hv_generic: make ring buffer attribute for primary channel") Cc: stable@kernel.org Suggested-by: Saurabh Sengar Suggested-by: Michael Kelley Reviewed-by: Michael Kelley Tested-by: Michael Kelley Reviewed-by: Dexuan Cui Signed-off-by: Naman Jain Link: https://lore.kernel.org/r/20250502074811.2022-2-namjain@linux.microsoft.com Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hyperv_vmbus.h | 6 +++ drivers/hv/vmbus_drv.c | 100 ++++++++++++++++++++++++++++++++++- drivers/uio/uio_hv_generic.c | 39 ++++++-------- include/linux/hyperv.h | 6 +++ 4 files changed, 128 insertions(+), 23 deletions(-) diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 29780f3a747848..0b450e53161e51 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -477,4 +477,10 @@ static inline int hv_debug_add_dev_dir(struct hv_device *dev) #endif /* CONFIG_HYPERV_TESTING */ +/* Create and remove sysfs entry for memory mapped ring buffers for a channel */ +int hv_create_ring_sysfs(struct vmbus_channel *channel, + int (*hv_mmap_ring_buffer)(struct vmbus_channel *channel, + struct vm_area_struct *vma)); +int hv_remove_ring_sysfs(struct vmbus_channel *channel); + #endif /* _HYPERV_VMBUS_H */ diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 8d3cff42bdbb2a..0f16a83cc2d6bf 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1802,6 +1802,27 @@ static ssize_t subchannel_id_show(struct vmbus_channel *channel, } static VMBUS_CHAN_ATTR_RO(subchannel_id); +static int hv_mmap_ring_buffer_wrapper(struct file *filp, struct kobject *kobj, + const struct bin_attribute *attr, + struct vm_area_struct *vma) +{ + struct vmbus_channel *channel = container_of(kobj, struct vmbus_channel, kobj); + + /* + * hv_(create|remove)_ring_sysfs implementation ensures that mmap_ring_buffer + * is not NULL. + */ + return channel->mmap_ring_buffer(channel, vma); +} + +static struct bin_attribute chan_attr_ring_buffer = { + .attr = { + .name = "ring", + .mode = 0600, + }, + .size = 2 * SZ_2M, + .mmap = hv_mmap_ring_buffer_wrapper, +}; static struct attribute *vmbus_chan_attrs[] = { &chan_attr_out_mask.attr, &chan_attr_in_mask.attr, @@ -1821,6 +1842,11 @@ static struct attribute *vmbus_chan_attrs[] = { NULL }; +static struct bin_attribute *vmbus_chan_bin_attrs[] = { + &chan_attr_ring_buffer, + NULL +}; + /* * Channel-level attribute_group callback function. Returns the permission for * each attribute, and returns 0 if an attribute is not visible. @@ -1841,9 +1867,24 @@ static umode_t vmbus_chan_attr_is_visible(struct kobject *kobj, return attr->mode; } +static umode_t vmbus_chan_bin_attr_is_visible(struct kobject *kobj, + const struct bin_attribute *attr, int idx) +{ + const struct vmbus_channel *channel = + container_of(kobj, struct vmbus_channel, kobj); + + /* Hide ring attribute if channel's ring_sysfs_visible is set to false */ + if (attr == &chan_attr_ring_buffer && !channel->ring_sysfs_visible) + return 0; + + return attr->attr.mode; +} + static const struct attribute_group vmbus_chan_group = { .attrs = vmbus_chan_attrs, - .is_visible = vmbus_chan_attr_is_visible + .bin_attrs = vmbus_chan_bin_attrs, + .is_visible = vmbus_chan_attr_is_visible, + .is_bin_visible = vmbus_chan_bin_attr_is_visible, }; static const struct kobj_type vmbus_chan_ktype = { @@ -1851,6 +1892,63 @@ static const struct kobj_type vmbus_chan_ktype = { .release = vmbus_chan_release, }; +/** + * hv_create_ring_sysfs() - create "ring" sysfs entry corresponding to ring buffers for a channel. + * @channel: Pointer to vmbus_channel structure + * @hv_mmap_ring_buffer: function pointer for initializing the function to be called on mmap of + * channel's "ring" sysfs node, which is for the ring buffer of that channel. + * Function pointer is of below type: + * int (*hv_mmap_ring_buffer)(struct vmbus_channel *channel, + * struct vm_area_struct *vma)) + * This has a pointer to the channel and a pointer to vm_area_struct, + * used for mmap, as arguments. + * + * Sysfs node for ring buffer of a channel is created along with other fields, however its + * visibility is disabled by default. Sysfs creation needs to be controlled when the use-case + * is running. + * For example, HV_NIC device is used either by uio_hv_generic or hv_netvsc at any given point of + * time, and "ring" sysfs is needed only when uio_hv_generic is bound to that device. To avoid + * exposing the ring buffer by default, this function is reponsible to enable visibility of + * ring for userspace to use. + * Note: Race conditions can happen with userspace and it is not encouraged to create new + * use-cases for this. This was added to maintain backward compatibility, while solving + * one of the race conditions in uio_hv_generic while creating sysfs. + * + * Returns 0 on success or error code on failure. + */ +int hv_create_ring_sysfs(struct vmbus_channel *channel, + int (*hv_mmap_ring_buffer)(struct vmbus_channel *channel, + struct vm_area_struct *vma)) +{ + struct kobject *kobj = &channel->kobj; + + channel->mmap_ring_buffer = hv_mmap_ring_buffer; + channel->ring_sysfs_visible = true; + + return sysfs_update_group(kobj, &vmbus_chan_group); +} +EXPORT_SYMBOL_GPL(hv_create_ring_sysfs); + +/** + * hv_remove_ring_sysfs() - remove ring sysfs entry corresponding to ring buffers for a channel. + * @channel: Pointer to vmbus_channel structure + * + * Hide "ring" sysfs for a channel by changing its is_visible attribute and updating sysfs group. + * + * Returns 0 on success or error code on failure. + */ +int hv_remove_ring_sysfs(struct vmbus_channel *channel) +{ + struct kobject *kobj = &channel->kobj; + int ret; + + channel->ring_sysfs_visible = false; + ret = sysfs_update_group(kobj, &vmbus_chan_group); + channel->mmap_ring_buffer = NULL; + return ret; +} +EXPORT_SYMBOL_GPL(hv_remove_ring_sysfs); + /* * vmbus_add_channel_kobj - setup a sub-directory under device/channels */ diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c index 1b19b56474950f..69c1df0f4ca541 100644 --- a/drivers/uio/uio_hv_generic.c +++ b/drivers/uio/uio_hv_generic.c @@ -131,15 +131,12 @@ static void hv_uio_rescind(struct vmbus_channel *channel) vmbus_device_unregister(channel->device_obj); } -/* Sysfs API to allow mmap of the ring buffers +/* Function used for mmap of ring buffer sysfs interface. * The ring buffer is allocated as contiguous memory by vmbus_open */ -static int hv_uio_ring_mmap(struct file *filp, struct kobject *kobj, - const struct bin_attribute *attr, - struct vm_area_struct *vma) +static int +hv_uio_ring_mmap(struct vmbus_channel *channel, struct vm_area_struct *vma) { - struct vmbus_channel *channel - = container_of(kobj, struct vmbus_channel, kobj); void *ring_buffer = page_address(channel->ringbuffer_page); if (channel->state != CHANNEL_OPENED_STATE) @@ -149,15 +146,6 @@ static int hv_uio_ring_mmap(struct file *filp, struct kobject *kobj, channel->ringbuffer_pagecount << PAGE_SHIFT); } -static const struct bin_attribute ring_buffer_bin_attr = { - .attr = { - .name = "ring", - .mode = 0600, - }, - .size = 2 * SZ_2M, - .mmap = hv_uio_ring_mmap, -}; - /* Callback from VMBUS subsystem when new channel created. */ static void hv_uio_new_channel(struct vmbus_channel *new_sc) @@ -178,8 +166,7 @@ hv_uio_new_channel(struct vmbus_channel *new_sc) /* Disable interrupts on sub channel */ new_sc->inbound.ring_buffer->interrupt_mask = 1; set_channel_read_mode(new_sc, HV_CALL_ISR); - - ret = sysfs_create_bin_file(&new_sc->kobj, &ring_buffer_bin_attr); + ret = hv_create_ring_sysfs(new_sc, hv_uio_ring_mmap); if (ret) { dev_err(device, "sysfs create ring bin file failed; %d\n", ret); vmbus_close(new_sc); @@ -350,10 +337,18 @@ hv_uio_probe(struct hv_device *dev, goto fail_close; } - ret = sysfs_create_bin_file(&channel->kobj, &ring_buffer_bin_attr); - if (ret) - dev_notice(&dev->device, - "sysfs create ring bin file failed; %d\n", ret); + /* + * This internally calls sysfs_update_group, which returns a non-zero value if it executes + * before sysfs_create_group. This is expected as the 'ring' will be created later in + * vmbus_device_register() -> vmbus_add_channel_kobj(). Thus, no need to check the return + * value and print warning. + * + * Creating/exposing sysfs in driver probe is not encouraged as it can lead to race + * conditions with userspace. For backward compatibility, "ring" sysfs could not be removed + * or decoupled from uio_hv_generic probe. Userspace programs can make use of inotify + * APIs to make sure that ring is created. + */ + hv_create_ring_sysfs(channel, hv_uio_ring_mmap); hv_set_drvdata(dev, pdata); @@ -375,7 +370,7 @@ hv_uio_remove(struct hv_device *dev) if (!pdata) return; - sysfs_remove_bin_file(&dev->channel->kobj, &ring_buffer_bin_attr); + hv_remove_ring_sysfs(dev->channel); uio_unregister_device(&pdata->info); hv_uio_cleanup(dev, pdata); diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 675959fb97ba9d..d6ffe01962c2f8 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1002,6 +1002,12 @@ struct vmbus_channel { /* The max size of a packet on this channel */ u32 max_pkt_size; + + /* function to mmap ring buffer memory to the channel's sysfs ring attribute */ + int (*mmap_ring_buffer)(struct vmbus_channel *channel, struct vm_area_struct *vma); + + /* boolean to control visibility of sysfs for ring buffer */ + bool ring_sysfs_visible; }; #define lock_requestor(channel, flags) \ From 65995e97a1caacf0024bebda3332b8d1f0f443c4 Mon Sep 17 00:00:00 2001 From: Naman Jain Date: Fri, 2 May 2025 13:18:11 +0530 Subject: [PATCH 1431/2924] Drivers: hv: Make the sysfs node size for the ring buffer dynamic The ring buffer size varies across VMBus channels. The size of sysfs node for the ring buffer is currently hardcoded to 4 MB. Userspace clients either use fstat() or hardcode this size for doing mmap(). To address this, make the sysfs node size dynamic to reflect the actual ring buffer size for each channel. This will ensure that fstat() on ring sysfs node always returns the correct size of ring buffer. Reviewed-by: Michael Kelley Tested-by: Michael Kelley Reviewed-by: Dexuan Cui Signed-off-by: Naman Jain Link: https://lore.kernel.org/r/20250502074811.2022-3-namjain@linux.microsoft.com Signed-off-by: Greg Kroah-Hartman --- drivers/hv/vmbus_drv.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 0f16a83cc2d6bf..e3d51a3163163c 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1820,7 +1820,6 @@ static struct bin_attribute chan_attr_ring_buffer = { .name = "ring", .mode = 0600, }, - .size = 2 * SZ_2M, .mmap = hv_mmap_ring_buffer_wrapper, }; static struct attribute *vmbus_chan_attrs[] = { @@ -1880,11 +1879,21 @@ static umode_t vmbus_chan_bin_attr_is_visible(struct kobject *kobj, return attr->attr.mode; } +static size_t vmbus_chan_bin_size(struct kobject *kobj, + const struct bin_attribute *bin_attr, int a) +{ + const struct vmbus_channel *channel = + container_of(kobj, struct vmbus_channel, kobj); + + return channel->ringbuffer_pagecount << PAGE_SHIFT; +} + static const struct attribute_group vmbus_chan_group = { .attrs = vmbus_chan_attrs, .bin_attrs = vmbus_chan_bin_attrs, .is_visible = vmbus_chan_attr_is_visible, .is_bin_visible = vmbus_chan_bin_attr_is_visible, + .bin_size = vmbus_chan_bin_size, }; static const struct kobj_type vmbus_chan_ktype = { From d9ec73301099ec5975505e1c3effbe768bab9490 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Tue, 29 Apr 2025 20:58:27 +0200 Subject: [PATCH 1432/2924] fs/eventpoll: fix endless busy loop after timeout has expired After commit 0a65bc27bd64 ("eventpoll: Set epoll timeout if it's in the future"), the following program would immediately enter a busy loop in the kernel: ``` int main() { int e = epoll_create1(0); struct epoll_event event = {.events = EPOLLIN}; epoll_ctl(e, EPOLL_CTL_ADD, 0, &event); const struct timespec timeout = {.tv_nsec = 1}; epoll_pwait2(e, &event, 1, &timeout, 0); } ``` This happens because the given (non-zero) timeout of 1 nanosecond usually expires before ep_poll() is entered and then ep_schedule_timeout() returns false, but `timed_out` is never set because the code line that sets it is skipped. This quickly turns into a soft lockup, RCU stalls and deadlocks, inflicting severe headaches to the whole system. When the timeout has expired, we don't need to schedule a hrtimer, but we should set the `timed_out` variable. Therefore, I suggest moving the ep_schedule_timeout() check into the `timed_out` expression instead of skipping it. brauner: Note that there was an earlier fix by Joe Damato in response to my bug report in [1]. Fixes: 0a65bc27bd64 ("eventpoll: Set epoll timeout if it's in the future") Cc: Joe Damato Cc: stable@vger.kernel.org Signed-off-by: Max Kellermann Link: https://lore.kernel.org/20250429153419.94723-1-jdamato@fastly.com [1] Link: https://lore.kernel.org/20250429185827.3564438-1-max.kellermann@ionos.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/eventpoll.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 4bc264b854c436..d4dbffdedd08e3 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -2111,9 +2111,10 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, write_unlock_irq(&ep->lock); - if (!eavail && ep_schedule_timeout(to)) - timed_out = !schedule_hrtimeout_range(to, slack, - HRTIMER_MODE_ABS); + if (!eavail) + timed_out = !ep_schedule_timeout(to) || + !schedule_hrtimeout_range(to, slack, + HRTIMER_MODE_ABS); __set_current_state(TASK_RUNNING); /* From b662b162c3d06f120749eea0351ec9317d9dd905 Mon Sep 17 00:00:00 2001 From: Feng Jiang Date: Wed, 9 Apr 2025 09:46:33 +0800 Subject: [PATCH 1433/2924] drm: Fix potential overflow issue in event_string array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When calling scnprintf() to append recovery method to event_string, the second argument should be `sizeof(event_string) - len`, otherwise there is a potential overflow problem. Fixes: b7cf9f4ac1b8 ("drm: Introduce device wedged event") Signed-off-by: Feng Jiang Reviewed-by: André Almeida Reviewed-by: Raag Jadav Link: https://lore.kernel.org/r/20250409014633.31303-1-jiangfeng@kylinos.cn Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/drm_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 17fc5dc708f472..60e5ac179c151a 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -549,7 +549,7 @@ int drm_dev_wedged_event(struct drm_device *dev, unsigned long method) if (drm_WARN_ONCE(dev, !recovery, "invalid recovery method %u\n", opt)) break; - len += scnprintf(event_string + len, sizeof(event_string), "%s,", recovery); + len += scnprintf(event_string + len, sizeof(event_string) - len, "%s,", recovery); } if (recovery) From 0c314cda93258cd1f0055a278a6576b5d4aeabf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 30 Apr 2025 11:20:13 +0200 Subject: [PATCH 1434/2924] arm64: vdso: Work around invalid absolute relocations from GCC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All vDSO code needs to be completely position independent. Symbol references are marked as hidden so the compiler emits PC-relative relocations. However GCC emits absolute relocations for symbol-relative references with an offset >= 64KiB. After recent refactorings in the vDSO code this is the case in __arch_get_vdso_u_timens_data() with a page size of 64KiB. Work around the issue by preventing the optimizer from seeing the offsets. Fixes: 83a2a6b8cfc5 ("vdso/gettimeofday: Prepare do_hres_timens() for introduction of struct vdso_clock") Reported-by: Jan Stancek Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Acked-by: Catalin Marinas Link: https://lore.kernel.org/all/20250430-vdso-absolute-reloc-v2-1-5efcc3bc4b26@linutronix.de Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120002 Closes: https://lore.kernel.org/lkml/aApGPAoctq_eoE2g@t14ultra/ --- arch/arm64/include/asm/vdso/gettimeofday.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h index 92a2b59a9f3df4..3322c7047d84fe 100644 --- a/arch/arm64/include/asm/vdso/gettimeofday.h +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -99,6 +99,19 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, return res; } +#if IS_ENABLED(CONFIG_CC_IS_GCC) && IS_ENABLED(CONFIG_PAGE_SIZE_64KB) +static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data(void) +{ + const struct vdso_time_data *ret = &vdso_u_time_data; + + /* Work around invalid absolute relocations */ + OPTIMIZER_HIDE_VAR(ret); + + return ret; +} +#define __arch_get_vdso_u_time_data __arch_get_vdso_u_time_data +#endif /* IS_ENABLED(CONFIG_CC_IS_GCC) && IS_ENABLED(CONFIG_PAGE_SIZE_64KB) */ + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ From 38a05c0b87833f5b188ae43b428b1f792df2b384 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Fri, 2 May 2025 13:22:28 +0200 Subject: [PATCH 1435/2924] irqchip/qcom-mpm: Prevent crash when trying to handle non-wake GPIOs On Qualcomm chipsets not all GPIOs are wakeup capable. Those GPIOs do not have a corresponding MPM pin and should not be handled inside the MPM driver. The IRQ domain hierarchy is always applied, so it's required to explicitly disconnect the hierarchy for those. The pinctrl-msm driver marks these with GPIO_NO_WAKE_IRQ. qcom-pdc has a check for this, but irq-qcom-mpm is currently missing the check. This is causing crashes when setting up interrupts for non-wake GPIOs: root@rb1:~# gpiomon -c gpiochip1 10 irq: IRQ159: trimming hierarchy from :soc@0:interrupt-controller@f200000-1 Unable to handle kernel paging request at virtual address ffff8000a1dc3820 Hardware name: Qualcomm Technologies, Inc. Robotics RB1 (DT) pc : mpm_set_type+0x80/0xcc lr : mpm_set_type+0x5c/0xcc Call trace: mpm_set_type+0x80/0xcc (P) qcom_mpm_set_type+0x64/0x158 irq_chip_set_type_parent+0x20/0x38 msm_gpio_irq_set_type+0x50/0x530 __irq_set_trigger+0x60/0x184 __setup_irq+0x304/0x6bc request_threaded_irq+0xc8/0x19c edge_detector_setup+0x260/0x364 linereq_create+0x420/0x5a8 gpio_ioctl+0x2d4/0x6c0 Fix this by copying the check for GPIO_NO_WAKE_IRQ from qcom-pdc.c, so that MPM is removed entirely from the hierarchy for non-wake GPIOs. Fixes: a6199bb514d8 ("irqchip: Add Qualcomm MPM controller driver") Reported-by: Alexey Klimov Signed-off-by: Stephan Gerhold Signed-off-by: Thomas Gleixner Tested-by: Alexey Klimov Reviewed-by: Bartosz Golaszewski Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20250502-irq-qcom-mpm-fix-no-wake-v1-1-8a1eafcd28d4@linaro.org --- drivers/irqchip/irq-qcom-mpm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/irqchip/irq-qcom-mpm.c b/drivers/irqchip/irq-qcom-mpm.c index 7942d8eb3d00ea..f772deb9cba574 100644 --- a/drivers/irqchip/irq-qcom-mpm.c +++ b/drivers/irqchip/irq-qcom-mpm.c @@ -227,6 +227,9 @@ static int qcom_mpm_alloc(struct irq_domain *domain, unsigned int virq, if (ret) return ret; + if (pin == GPIO_NO_WAKE_IRQ) + return irq_domain_disconnect_hierarchy(domain, virq); + ret = irq_domain_set_hwirq_and_chip(domain, virq, pin, &qcom_mpm_chip, priv); if (ret) From 35e4079bf1a2570abffce6ababa631afcf8ea0e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Wed, 30 Apr 2025 17:51:52 -0300 Subject: [PATCH 1436/2924] drm/v3d: Add job to pending list if the reset was skipped MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a CL/CSD job times out, we check if the GPU has made any progress since the last timeout. If so, instead of resetting the hardware, we skip the reset and let the timer get rearmed. This gives long-running jobs a chance to complete. However, when `timedout_job()` is called, the job in question is removed from the pending list, which means it won't be automatically freed through `free_job()`. Consequently, when we skip the reset and keep the job running, the job won't be freed when it finally completes. This situation leads to a memory leak, as exposed in [1] and [2]. Similarly to commit 704d3d60fec4 ("drm/etnaviv: don't block scheduler when GPU is still active"), this patch ensures the job is put back on the pending list when extending the timeout. Cc: stable@vger.kernel.org # 6.0 Reported-by: Daivik Bhatia Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12227 [1] Closes: https://github.com/raspberrypi/linux/issues/6817 [2] Reviewed-by: Iago Toral Quiroga Acked-by: Tvrtko Ursulin Link: https://lore.kernel.org/r/20250430210643.57924-1-mcanal@igalia.com Signed-off-by: Maíra Canal --- drivers/gpu/drm/v3d/v3d_sched.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 4a7701a33cf8d7..eb35482f6fb577 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -744,11 +744,16 @@ v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job) return DRM_GPU_SCHED_STAT_NOMINAL; } -/* If the current address or return address have changed, then the GPU - * has probably made progress and we should delay the reset. This - * could fail if the GPU got in an infinite loop in the CL, but that - * is pretty unlikely outside of an i-g-t testcase. - */ +static void +v3d_sched_skip_reset(struct drm_sched_job *sched_job) +{ + struct drm_gpu_scheduler *sched = sched_job->sched; + + spin_lock(&sched->job_list_lock); + list_add(&sched_job->list, &sched->pending_list); + spin_unlock(&sched->job_list_lock); +} + static enum drm_gpu_sched_stat v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q, u32 *timedout_ctca, u32 *timedout_ctra) @@ -758,9 +763,16 @@ v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q, u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(q)); u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(q)); + /* If the current address or return address have changed, then the GPU + * has probably made progress and we should delay the reset. This + * could fail if the GPU got in an infinite loop in the CL, but that + * is pretty unlikely outside of an i-g-t testcase. + */ if (*timedout_ctca != ctca || *timedout_ctra != ctra) { *timedout_ctca = ctca; *timedout_ctra = ctra; + + v3d_sched_skip_reset(sched_job); return DRM_GPU_SCHED_STAT_NOMINAL; } @@ -800,11 +812,13 @@ v3d_csd_job_timedout(struct drm_sched_job *sched_job) struct v3d_dev *v3d = job->base.v3d; u32 batches = V3D_CORE_READ(0, V3D_CSD_CURRENT_CFG4(v3d->ver)); - /* If we've made progress, skip reset and let the timer get - * rearmed. + /* If we've made progress, skip reset, add the job to the pending + * list, and let the timer get rearmed. */ if (job->timedout_batches != batches) { job->timedout_batches = batches; + + v3d_sched_skip_reset(sched_job); return DRM_GPU_SCHED_STAT_NOMINAL; } From 5fea0c6c0ebe0a645c247f29fc683852cf51af70 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 28 Apr 2025 13:55:31 -0500 Subject: [PATCH 1437/2924] KVM: SVM: Update dump_ghcb() to use the GHCB snapshot fields Commit 4e15a0ddc3ff ("KVM: SEV: snapshot the GHCB before accessing it") updated the SEV code to take a snapshot of the GHCB before using it. But the dump_ghcb() function wasn't updated to use the snapshot locations. This results in incorrect output from dump_ghcb() for the "is_valid" and "valid_bitmap" fields. Update dump_ghcb() to use the proper locations. Fixes: 4e15a0ddc3ff ("KVM: SEV: snapshot the GHCB before accessing it") Signed-off-by: Tom Lendacky Reviewed-by: Liam Merwick Link: https://lore.kernel.org/r/8f03878443681496008b1b37b7c4bf77a342b459.1745866531.git.thomas.lendacky@amd.com [sean: add comment and snapshot qualifier] Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/sev.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0bc708ee278877..a7a7dc5073363b 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -3173,9 +3173,14 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu) kvfree(svm->sev_es.ghcb_sa); } +static u64 kvm_ghcb_get_sw_exit_code(struct vmcb_control_area *control) +{ + return (((u64)control->exit_code_hi) << 32) | control->exit_code; +} + static void dump_ghcb(struct vcpu_svm *svm) { - struct ghcb *ghcb = svm->sev_es.ghcb; + struct vmcb_control_area *control = &svm->vmcb->control; unsigned int nbits; /* Re-use the dump_invalid_vmcb module parameter */ @@ -3184,18 +3189,24 @@ static void dump_ghcb(struct vcpu_svm *svm) return; } - nbits = sizeof(ghcb->save.valid_bitmap) * 8; + nbits = sizeof(svm->sev_es.valid_bitmap) * 8; - pr_err("GHCB (GPA=%016llx):\n", svm->vmcb->control.ghcb_gpa); + /* + * Print KVM's snapshot of the GHCB values that were (unsuccessfully) + * used to handle the exit. If the guest has since modified the GHCB + * itself, dumping the raw GHCB won't help debug why KVM was unable to + * handle the VMGEXIT that KVM observed. + */ + pr_err("GHCB (GPA=%016llx) snapshot:\n", svm->vmcb->control.ghcb_gpa); pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code", - ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb)); + kvm_ghcb_get_sw_exit_code(control), kvm_ghcb_sw_exit_code_is_valid(svm)); pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1", - ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb)); + control->exit_info_1, kvm_ghcb_sw_exit_info_1_is_valid(svm)); pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2", - ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb)); + control->exit_info_2, kvm_ghcb_sw_exit_info_2_is_valid(svm)); pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch", - ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb)); - pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap); + svm->sev_es.sw_scratch, kvm_ghcb_sw_scratch_is_valid(svm)); + pr_err("%-20s%*pb\n", "valid_bitmap", nbits, svm->sev_es.valid_bitmap); } static void sev_es_sync_to_ghcb(struct vcpu_svm *svm) @@ -3266,11 +3277,6 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm) memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); } -static u64 kvm_ghcb_get_sw_exit_code(struct vmcb_control_area *control) -{ - return (((u64)control->exit_code_hi) << 32) | control->exit_code; -} - static int sev_es_validate_vmgexit(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; From 9129633d568edd36aa22bf703b12835153cec985 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 30 Apr 2025 15:09:54 -0700 Subject: [PATCH 1438/2924] KVM: x86/mmu: Prevent installing hugepages when mem attributes are changing When changing memory attributes on a subset of a potential hugepage, add the hugepage to the invalidation range tracking to prevent installing a hugepage until the attributes are fully updated. Like the actual hugepage tracking updates in kvm_arch_post_set_memory_attributes(), process only the head and tail pages, as any potential hugepages that are entirely covered by the range will already be tracked. Note, only hugepage chunks whose current attributes are NOT mixed need to be added to the invalidation set, as mixed attributes already prevent installing a hugepage, and it's perfectly safe to install a smaller mapping for a gfn whose attributes aren't changing. Fixes: 8dd2eee9d526 ("KVM: x86/mmu: Handle page fault for private memory") Cc: stable@vger.kernel.org Reported-by: Michael Roth Tested-by: Michael Roth Link: https://lore.kernel.org/r/20250430220954.522672-1-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 69 ++++++++++++++++++++++++++++++++---------- 1 file changed, 53 insertions(+), 16 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 387464ebe8e8df..8d1b632e33d288 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -7670,9 +7670,30 @@ void kvm_mmu_pre_destroy_vm(struct kvm *kvm) } #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES +static bool hugepage_test_mixed(struct kvm_memory_slot *slot, gfn_t gfn, + int level) +{ + return lpage_info_slot(gfn, slot, level)->disallow_lpage & KVM_LPAGE_MIXED_FLAG; +} + +static void hugepage_clear_mixed(struct kvm_memory_slot *slot, gfn_t gfn, + int level) +{ + lpage_info_slot(gfn, slot, level)->disallow_lpage &= ~KVM_LPAGE_MIXED_FLAG; +} + +static void hugepage_set_mixed(struct kvm_memory_slot *slot, gfn_t gfn, + int level) +{ + lpage_info_slot(gfn, slot, level)->disallow_lpage |= KVM_LPAGE_MIXED_FLAG; +} + bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, struct kvm_gfn_range *range) { + struct kvm_memory_slot *slot = range->slot; + int level; + /* * Zap SPTEs even if the slot can't be mapped PRIVATE. KVM x86 only * supports KVM_MEMORY_ATTRIBUTE_PRIVATE, and so it *seems* like KVM @@ -7687,6 +7708,38 @@ bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, if (WARN_ON_ONCE(!kvm_arch_has_private_mem(kvm))) return false; + if (WARN_ON_ONCE(range->end <= range->start)) + return false; + + /* + * If the head and tail pages of the range currently allow a hugepage, + * i.e. reside fully in the slot and don't have mixed attributes, then + * add each corresponding hugepage range to the ongoing invalidation, + * e.g. to prevent KVM from creating a hugepage in response to a fault + * for a gfn whose attributes aren't changing. Note, only the range + * of gfns whose attributes are being modified needs to be explicitly + * unmapped, as that will unmap any existing hugepages. + */ + for (level = PG_LEVEL_2M; level <= KVM_MAX_HUGEPAGE_LEVEL; level++) { + gfn_t start = gfn_round_for_level(range->start, level); + gfn_t end = gfn_round_for_level(range->end - 1, level); + gfn_t nr_pages = KVM_PAGES_PER_HPAGE(level); + + if ((start != range->start || start + nr_pages > range->end) && + start >= slot->base_gfn && + start + nr_pages <= slot->base_gfn + slot->npages && + !hugepage_test_mixed(slot, start, level)) + kvm_mmu_invalidate_range_add(kvm, start, start + nr_pages); + + if (end == start) + continue; + + if ((end + nr_pages) > range->end && + (end + nr_pages) <= (slot->base_gfn + slot->npages) && + !hugepage_test_mixed(slot, end, level)) + kvm_mmu_invalidate_range_add(kvm, end, end + nr_pages); + } + /* Unmap the old attribute page. */ if (range->arg.attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE) range->attr_filter = KVM_FILTER_SHARED; @@ -7696,23 +7749,7 @@ bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, return kvm_unmap_gfn_range(kvm, range); } -static bool hugepage_test_mixed(struct kvm_memory_slot *slot, gfn_t gfn, - int level) -{ - return lpage_info_slot(gfn, slot, level)->disallow_lpage & KVM_LPAGE_MIXED_FLAG; -} - -static void hugepage_clear_mixed(struct kvm_memory_slot *slot, gfn_t gfn, - int level) -{ - lpage_info_slot(gfn, slot, level)->disallow_lpage &= ~KVM_LPAGE_MIXED_FLAG; -} -static void hugepage_set_mixed(struct kvm_memory_slot *slot, gfn_t gfn, - int level) -{ - lpage_info_slot(gfn, slot, level)->disallow_lpage |= KVM_LPAGE_MIXED_FLAG; -} static bool hugepage_has_attrs(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, int level, unsigned long attrs) From b1525d0a8d3ad98487775232c3a861f3f4d894ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Sat, 3 May 2025 08:53:58 +0200 Subject: [PATCH 1439/2924] landlock: Remove KUnit test that triggers a warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A KUnit test checking boundaries triggers a canary warning, which may be disturbing. Let's remove this test for now. Hopefully, KUnit will soon get support for suppressing warning backtraces [1]. Cc: Alessandro Carminati Cc: Andrew Morton Cc: Günther Noack Reported-by: Tingmao Wang Closes: https://lore.kernel.org/r/20250327213807.12964-1-m@maowtm.org Link: https://lore.kernel.org/r/20250425193249.78b45d2589575c15f483c3d8@linux-foundation.org [1] Link: https://lore.kernel.org/r/20250503065359.3625407-1-mic@digikod.net Signed-off-by: Mickaël Salaün --- security/landlock/audit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/landlock/audit.c b/security/landlock/audit.c index 7e5e0ed0e4e5fe..58d5c40d4d0e1d 100644 --- a/security/landlock/audit.c +++ b/security/landlock/audit.c @@ -175,7 +175,7 @@ static void test_get_hierarchy(struct kunit *const test) KUNIT_EXPECT_EQ(test, 10, get_hierarchy(&dom2, 0)->id); KUNIT_EXPECT_EQ(test, 20, get_hierarchy(&dom2, 1)->id); KUNIT_EXPECT_EQ(test, 30, get_hierarchy(&dom2, 2)->id); - KUNIT_EXPECT_EQ(test, 30, get_hierarchy(&dom2, -1)->id); + /* KUNIT_EXPECT_EQ(test, 30, get_hierarchy(&dom2, -1)->id); */ } #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ From 8ed12ab1319b2d8e4a529504777aacacf71371e4 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 28 Apr 2025 19:43:22 +0200 Subject: [PATCH 1440/2924] x86/boot/sev: Support memory acceptance in the EFI stub under SVSM Commit: d54d610243a4 ("x86/boot/sev: Avoid shared GHCB page for early memory acceptance") provided a fix for SEV-SNP memory acceptance from the EFI stub when running at VMPL #0. However, that fix was insufficient for SVSM SEV-SNP guests running at VMPL >0, as those rely on a SVSM calling area, which is a shared buffer whose address is programmed into a SEV-SNP MSR, and the SEV init code that sets up this calling area executes much later during the boot. Given that booting via the EFI stub at VMPL >0 implies that the firmware has configured this calling area already, reuse it for performing memory acceptance in the EFI stub. Fixes: fcd042e86422 ("x86/sev: Perform PVALIDATE using the SVSM when not at VMPL0") Tested-by: Tom Lendacky Co-developed-by: Tom Lendacky Signed-off-by: Tom Lendacky Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Cc: Cc: Dionna Amalie Glaze Cc: Kevin Loughlin Cc: linux-efi@vger.kernel.org Link: https://lore.kernel.org/r/20250428174322.2780170-2-ardb+git@google.com --- arch/x86/boot/compressed/mem.c | 5 +---- arch/x86/boot/compressed/sev.c | 40 ++++++++++++++++++++++++++++++++++ arch/x86/boot/compressed/sev.h | 2 ++ 3 files changed, 43 insertions(+), 4 deletions(-) diff --git a/arch/x86/boot/compressed/mem.c b/arch/x86/boot/compressed/mem.c index f676156d9f3db4..0e9f84ab4bdcd1 100644 --- a/arch/x86/boot/compressed/mem.c +++ b/arch/x86/boot/compressed/mem.c @@ -34,14 +34,11 @@ static bool early_is_tdx_guest(void) void arch_accept_memory(phys_addr_t start, phys_addr_t end) { - static bool sevsnp; - /* Platform-specific memory-acceptance call goes here */ if (early_is_tdx_guest()) { if (!tdx_accept_memory(start, end)) panic("TDX: Failed to accept memory\n"); - } else if (sevsnp || (sev_get_status() & MSR_AMD64_SEV_SNP_ENABLED)) { - sevsnp = true; + } else if (early_is_sevsnp_guest()) { snp_accept_memory(start, end); } else { error("Cannot accept memory: unknown platform\n"); diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 89ba168f4f0f02..0003e4416efd16 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -645,3 +645,43 @@ void sev_prep_identity_maps(unsigned long top_level_pgt) sev_verify_cbit(top_level_pgt); } + +bool early_is_sevsnp_guest(void) +{ + static bool sevsnp; + + if (sevsnp) + return true; + + if (!(sev_get_status() & MSR_AMD64_SEV_SNP_ENABLED)) + return false; + + sevsnp = true; + + if (!snp_vmpl) { + unsigned int eax, ebx, ecx, edx; + + /* + * CPUID Fn8000_001F_EAX[28] - SVSM support + */ + eax = 0x8000001f; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + if (eax & BIT(28)) { + struct msr m; + + /* Obtain the address of the calling area to use */ + boot_rdmsr(MSR_SVSM_CAA, &m); + boot_svsm_caa = (void *)m.q; + boot_svsm_caa_pa = m.q; + + /* + * The real VMPL level cannot be discovered, but the + * memory acceptance routines make no use of that so + * any non-zero value suffices here. + */ + snp_vmpl = U8_MAX; + } + } + return true; +} diff --git a/arch/x86/boot/compressed/sev.h b/arch/x86/boot/compressed/sev.h index 4e463f33186df4..d3900384b8abb5 100644 --- a/arch/x86/boot/compressed/sev.h +++ b/arch/x86/boot/compressed/sev.h @@ -13,12 +13,14 @@ bool sev_snp_enabled(void); void snp_accept_memory(phys_addr_t start, phys_addr_t end); u64 sev_get_status(void); +bool early_is_sevsnp_guest(void); #else static inline bool sev_snp_enabled(void) { return false; } static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } static inline u64 sev_get_status(void) { return 0; } +static inline bool early_is_sevsnp_guest(void) { return false; } #endif From b53e523261bf058ea4a518b482222e7a277b186b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 4 May 2025 08:06:28 -0600 Subject: [PATCH 1441/2924] io_uring: always arm linked timeouts prior to issue There are a few spots where linked timeouts are armed, and not all of them adhere to the pre-arm, attempt issue, post-arm pattern. This can be problematic if the linked request returns that it will trigger a callback later, and does so before the linked timeout is fully armed. Consolidate all the linked timeout handling into __io_issue_sqe(), rather than have it spread throughout the various issue entry points. Cc: stable@vger.kernel.org Link: https://github.com/axboe/liburing/issues/1390 Reported-by: Chase Hiltz Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 50 ++++++++++++++------------------------------- 1 file changed, 15 insertions(+), 35 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index a2b256e96d5da3..769814d7115302 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -448,24 +448,6 @@ static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req) return req->link; } -static inline struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req) -{ - if (likely(!(req->flags & REQ_F_ARM_LTIMEOUT))) - return NULL; - return __io_prep_linked_timeout(req); -} - -static noinline void __io_arm_ltimeout(struct io_kiocb *req) -{ - io_queue_linked_timeout(__io_prep_linked_timeout(req)); -} - -static inline void io_arm_ltimeout(struct io_kiocb *req) -{ - if (unlikely(req->flags & REQ_F_ARM_LTIMEOUT)) - __io_arm_ltimeout(req); -} - static void io_prep_async_work(struct io_kiocb *req) { const struct io_issue_def *def = &io_issue_defs[req->opcode]; @@ -518,7 +500,6 @@ static void io_prep_async_link(struct io_kiocb *req) static void io_queue_iowq(struct io_kiocb *req) { - struct io_kiocb *link = io_prep_linked_timeout(req); struct io_uring_task *tctx = req->tctx; BUG_ON(!tctx); @@ -543,8 +524,6 @@ static void io_queue_iowq(struct io_kiocb *req) trace_io_uring_queue_async_work(req, io_wq_is_hashed(&req->work)); io_wq_enqueue(tctx->io_wq, &req->work); - if (link) - io_queue_linked_timeout(link); } static void io_req_queue_iowq_tw(struct io_kiocb *req, io_tw_token_t tw) @@ -1724,15 +1703,22 @@ static bool io_assign_file(struct io_kiocb *req, const struct io_issue_def *def, return !!req->file; } +#define REQ_ISSUE_SLOW_FLAGS (REQ_F_CREDS | REQ_F_ARM_LTIMEOUT) + static inline int __io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags, const struct io_issue_def *def) { const struct cred *creds = NULL; + struct io_kiocb *link = NULL; int ret; - if (unlikely((req->flags & REQ_F_CREDS) && req->creds != current_cred())) - creds = override_creds(req->creds); + if (unlikely(req->flags & REQ_ISSUE_SLOW_FLAGS)) { + if ((req->flags & REQ_F_CREDS) && req->creds != current_cred()) + creds = override_creds(req->creds); + if (req->flags & REQ_F_ARM_LTIMEOUT) + link = __io_prep_linked_timeout(req); + } if (!def->audit_skip) audit_uring_entry(req->opcode); @@ -1742,8 +1728,12 @@ static inline int __io_issue_sqe(struct io_kiocb *req, if (!def->audit_skip) audit_uring_exit(!ret, ret); - if (creds) - revert_creds(creds); + if (unlikely(creds || link)) { + if (creds) + revert_creds(creds); + if (link) + io_queue_linked_timeout(link); + } return ret; } @@ -1769,7 +1759,6 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) if (ret == IOU_ISSUE_SKIP_COMPLETE) { ret = 0; - io_arm_ltimeout(req); /* If the op doesn't have a file, we're not polling for it */ if ((req->ctx->flags & IORING_SETUP_IOPOLL) && def->iopoll_queue) @@ -1824,8 +1813,6 @@ void io_wq_submit_work(struct io_wq_work *work) else req_ref_get(req); - io_arm_ltimeout(req); - /* either cancelled or io-wq is dying, so don't touch tctx->iowq */ if (atomic_read(&work->flags) & IO_WQ_WORK_CANCEL) { fail: @@ -1941,15 +1928,11 @@ struct file *io_file_get_normal(struct io_kiocb *req, int fd) static void io_queue_async(struct io_kiocb *req, int ret) __must_hold(&req->ctx->uring_lock) { - struct io_kiocb *linked_timeout; - if (ret != -EAGAIN || (req->flags & REQ_F_NOWAIT)) { io_req_defer_failed(req, ret); return; } - linked_timeout = io_prep_linked_timeout(req); - switch (io_arm_poll_handler(req, 0)) { case IO_APOLL_READY: io_kbuf_recycle(req, 0); @@ -1962,9 +1945,6 @@ static void io_queue_async(struct io_kiocb *req, int ret) case IO_APOLL_OK: break; } - - if (linked_timeout) - io_queue_linked_timeout(linked_timeout); } static inline void io_queue_sqe(struct io_kiocb *req) From de3629baf5a33af1919dec7136d643b0662e85ef Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 3 May 2025 18:24:01 +0200 Subject: [PATCH 1442/2924] parisc: Fix double SIGFPE crash Camm noticed that on parisc a SIGFPE exception will crash an application with a second SIGFPE in the signal handler. Dave analyzed it, and it happens because glibc uses a double-word floating-point store to atomically update function descriptors. As a result of lazy binding, we hit a floating-point store in fpe_func almost immediately. When the T bit is set, an assist exception trap occurs when when the co-processor encounters *any* floating-point instruction except for a double store of register %fr0. The latter cancels all pending traps. Let's fix this by clearing the Trap (T) bit in the FP status register before returning to the signal handler in userspace. The issue can be reproduced with this test program: root@parisc:~# cat fpe.c static void fpe_func(int sig, siginfo_t *i, void *v) { sigset_t set; sigemptyset(&set); sigaddset(&set, SIGFPE); sigprocmask(SIG_UNBLOCK, &set, NULL); printf("GOT signal %d with si_code %ld\n", sig, i->si_code); } int main() { struct sigaction action = { .sa_sigaction = fpe_func, .sa_flags = SA_RESTART|SA_SIGINFO }; sigaction(SIGFPE, &action, 0); feenableexcept(FE_OVERFLOW); return printf("%lf\n",1.7976931348623158E308*1.7976931348623158E308); } root@parisc:~# gcc fpe.c -lm root@parisc:~# ./a.out Floating point exception root@parisc:~# strace -f ./a.out execve("./a.out", ["./a.out"], 0xf9ac7034 /* 20 vars */) = 0 getrlimit(RLIMIT_STACK, {rlim_cur=8192*1024, rlim_max=RLIM_INFINITY}) = 0 ... rt_sigaction(SIGFPE, {sa_handler=0x1110a, sa_mask=[], sa_flags=SA_RESTART|SA_SIGINFO}, NULL, 8) = 0 --- SIGFPE {si_signo=SIGFPE, si_code=FPE_FLTOVF, si_addr=0x1078f} --- --- SIGFPE {si_signo=SIGFPE, si_code=FPE_FLTOVF, si_addr=0xf8f21237} --- +++ killed by SIGFPE +++ Floating point exception Signed-off-by: Helge Deller Suggested-by: John David Anglin Reported-by: Camm Maguire Cc: stable@vger.kernel.org --- arch/parisc/math-emu/driver.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/parisc/math-emu/driver.c b/arch/parisc/math-emu/driver.c index 34495446e051c2..71829cb7bc812a 100644 --- a/arch/parisc/math-emu/driver.c +++ b/arch/parisc/math-emu/driver.c @@ -97,9 +97,19 @@ handle_fpe(struct pt_regs *regs) memcpy(regs->fr, frcopy, sizeof regs->fr); if (signalcode != 0) { - force_sig_fault(signalcode >> 24, signalcode & 0xffffff, - (void __user *) regs->iaoq[0]); - return -1; + int sig = signalcode >> 24; + + if (sig == SIGFPE) { + /* + * Clear floating point trap bit to avoid trapping + * again on the first floating-point instruction in + * the userspace signal handler. + */ + regs->fr[0] &= ~(1ULL << 38); + } + force_sig_fault(sig, signalcode & 0xffffff, + (void __user *) regs->iaoq[0]); + return -1; } return signalcode ? -1 : 0; From df2e19a883fdea698cdbed4987987db999b19d58 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 4 May 2025 13:50:09 -0400 Subject: [PATCH 1443/2924] bcachefs: thread_with_stdio: fix spinning instead of exiting bch2_stdio_redirect_vprintf() was missing a check for stdio->done, i.e. exiting. This caused the thread attempting to print to spin, and since it was being called from the kthread ran by thread_with_stdio, the userspace side hung as well. Change it to return -EPIPE - i.e. writing to a pipe that's been closed. Reported-by: Jan Solanti Signed-off-by: Kent Overstreet --- fs/bcachefs/thread_with_file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/thread_with_file.c b/fs/bcachefs/thread_with_file.c index dea73bc1cb51cb..314a24d15d4e7c 100644 --- a/fs/bcachefs/thread_with_file.c +++ b/fs/bcachefs/thread_with_file.c @@ -455,8 +455,10 @@ ssize_t bch2_stdio_redirect_vprintf(struct stdio_redirect *stdio, bool nonblocki struct stdio_buf *buf = &stdio->output; unsigned long flags; ssize_t ret; - again: + if (stdio->done) + return -EPIPE; + spin_lock_irqsave(&buf->lock, flags); ret = bch2_darray_vprintf(&buf->buf, GFP_NOWAIT, fmt, args); spin_unlock_irqrestore(&buf->lock, flags); From 92a09c47464d040866cf2b4cd052bc60555185fb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 4 May 2025 13:55:04 -0700 Subject: [PATCH 1444/2924] Linux 6.15-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 94be5dfb81fb3c..b29cc321ffd9c6 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Baby Opossum Posse # *DOCUMENTATION* From 68025adfc13e6cd15eebe2293f77659f47daf13b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 4 Apr 2025 17:05:19 +0200 Subject: [PATCH 1445/2924] um: fix _nofault accesses Nathan reported [1] that when built with clang, the um kernel crashes pretty much immediately. This turned out to be an issue with the inline assembly I had added, when clang used %rax/%eax for both operands. Reorder it so current->thread.segv_continue is written first, and then the lifetime of _faulted won't have overlap with the lifetime of segv_continue. In the email thread Benjamin also pointed out that current->mm is only NULL for true kernel tasks, but we could do this for a userspace task, so the current->thread.segv_continue logic must be lifted out of the mm==NULL check. Finally, while looking at this, put a barrier() so the NULL assignment to thread.segv_continue cannot be reorder before the possibly faulting operation. Reported-by: Nathan Chancellor Closes: https://lore.kernel.org/r/20250402221254.GA384@ax162 [1] Fixes: d1d7f01f7cd3 ("um: mark rodata read-only and implement _nofault accesses") Tested-by: Nathan Chancellor Signed-off-by: Johannes Berg --- arch/um/include/asm/uaccess.h | 2 ++ arch/um/kernel/trap.c | 26 ++++++++++++------------ arch/x86/um/shared/sysdep/faultinfo_32.h | 2 +- arch/x86/um/shared/sysdep/faultinfo_64.h | 2 +- 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/arch/um/include/asm/uaccess.h b/arch/um/include/asm/uaccess.h index 3a08f9029a3f9a..1c6e0ae41b0c5a 100644 --- a/arch/um/include/asm/uaccess.h +++ b/arch/um/include/asm/uaccess.h @@ -55,6 +55,7 @@ do { \ goto err_label; \ } \ *((type *)dst) = get_unaligned((type *)(src)); \ + barrier(); \ current->thread.segv_continue = NULL; \ } while (0) @@ -66,6 +67,7 @@ do { \ if (__faulted) \ goto err_label; \ put_unaligned(*((type *)src), (type *)(dst)); \ + barrier(); \ current->thread.segv_continue = NULL; \ } while (0) diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index ce073150dc20a8..ef2272e92a4321 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -225,20 +225,20 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, panic("Failed to sync kernel TLBs: %d", err); goto out; } - else if (current->mm == NULL) { - if (current->pagefault_disabled) { - if (!mc) { - show_regs(container_of(regs, struct pt_regs, regs)); - panic("Segfault with pagefaults disabled but no mcontext"); - } - if (!current->thread.segv_continue) { - show_regs(container_of(regs, struct pt_regs, regs)); - panic("Segfault without recovery target"); - } - mc_set_rip(mc, current->thread.segv_continue); - current->thread.segv_continue = NULL; - goto out; + else if (current->pagefault_disabled) { + if (!mc) { + show_regs(container_of(regs, struct pt_regs, regs)); + panic("Segfault with pagefaults disabled but no mcontext"); } + if (!current->thread.segv_continue) { + show_regs(container_of(regs, struct pt_regs, regs)); + panic("Segfault without recovery target"); + } + mc_set_rip(mc, current->thread.segv_continue); + current->thread.segv_continue = NULL; + goto out; + } + else if (current->mm == NULL) { show_regs(container_of(regs, struct pt_regs, regs)); panic("Segfault with no mm"); } diff --git a/arch/x86/um/shared/sysdep/faultinfo_32.h b/arch/x86/um/shared/sysdep/faultinfo_32.h index ab5c8e47049c31..9193a7790a71a4 100644 --- a/arch/x86/um/shared/sysdep/faultinfo_32.h +++ b/arch/x86/um/shared/sysdep/faultinfo_32.h @@ -31,8 +31,8 @@ struct faultinfo { #define ___backtrack_faulted(_faulted) \ asm volatile ( \ - "mov $0, %0\n" \ "movl $__get_kernel_nofault_faulted_%=,%1\n" \ + "mov $0, %0\n" \ "jmp _end_%=\n" \ "__get_kernel_nofault_faulted_%=:\n" \ "mov $1, %0;" \ diff --git a/arch/x86/um/shared/sysdep/faultinfo_64.h b/arch/x86/um/shared/sysdep/faultinfo_64.h index 26fb4835d3e9a3..61e4ca1e0ab58d 100644 --- a/arch/x86/um/shared/sysdep/faultinfo_64.h +++ b/arch/x86/um/shared/sysdep/faultinfo_64.h @@ -31,8 +31,8 @@ struct faultinfo { #define ___backtrack_faulted(_faulted) \ asm volatile ( \ - "mov $0, %0\n" \ "movq $__get_kernel_nofault_faulted_%=,%1\n" \ + "mov $0, %0\n" \ "jmp _end_%=\n" \ "__get_kernel_nofault_faulted_%=:\n" \ "mov $1, %0;" \ From 5214a9f6c0f56644acb9d2cbb58facf1856d322b Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Mon, 14 Apr 2025 11:59:33 +0200 Subject: [PATCH 1446/2924] x86/microcode: Consolidate the loader enablement checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consolidate the whole logic which determines whether the microcode loader should be enabled or not into a single function and call it everywhere. Well, almost everywhere - not in mk_early_pgtbl_32() because there the kernel is running without paging enabled and checking dis_ucode_ldr et al would require physical addresses and uglification of the code. But since this is 32-bit, the easier thing to do is to simply map the initrd unconditionally especially since that mapping is getting removed later anyway by zap_early_initrd_mapping() and avoid the uglification. In doing so, address the issue of old 486er machines without CPUID support, not booting current kernels. [ mingo: Fix no previous prototype for ‘microcode_loader_disabled’ [-Wmissing-prototypes] ] Fixes: 4c585af7180c1 ("x86/boot/32: Temporarily map initrd for microcode loading") Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Ingo Molnar Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://lore.kernel.org/r/CANpbe9Wm3z8fy9HbgS8cuhoj0TREYEEkBipDuhgkWFvqX0UoVQ@mail.gmail.com --- arch/x86/include/asm/microcode.h | 2 + arch/x86/kernel/cpu/microcode/amd.c | 6 ++- arch/x86/kernel/cpu/microcode/core.c | 58 ++++++++++++++---------- arch/x86/kernel/cpu/microcode/intel.c | 2 +- arch/x86/kernel/cpu/microcode/internal.h | 1 - arch/x86/kernel/head32.c | 4 -- 6 files changed, 41 insertions(+), 32 deletions(-) diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 695e569159c1d1..be7cddc414e4fb 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -17,10 +17,12 @@ struct ucode_cpu_info { void load_ucode_bsp(void); void load_ucode_ap(void); void microcode_bsp_resume(void); +bool __init microcode_loader_disabled(void); #else static inline void load_ucode_bsp(void) { } static inline void load_ucode_ap(void) { } static inline void microcode_bsp_resume(void) { } +static inline bool __init microcode_loader_disabled(void) { return false; } #endif extern unsigned long initrd_start_early; diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 4a10d35e70aa54..96cb992d50ef55 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -1098,15 +1098,17 @@ static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t siz static int __init save_microcode_in_initrd(void) { - unsigned int cpuid_1_eax = native_cpuid_eax(1); struct cpuinfo_x86 *c = &boot_cpu_data; struct cont_desc desc = { 0 }; + unsigned int cpuid_1_eax; enum ucode_state ret; struct cpio_data cp; - if (dis_ucode_ldr || c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) + if (microcode_loader_disabled() || c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) return 0; + cpuid_1_eax = native_cpuid_eax(1); + if (!find_blobs_in_containers(&cp)) return -EINVAL; diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index b3658d11e7b692..079f046ee26d19 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -41,8 +41,8 @@ #include "internal.h" -static struct microcode_ops *microcode_ops; -bool dis_ucode_ldr = true; +static struct microcode_ops *microcode_ops; +static bool dis_ucode_ldr = false; bool force_minrev = IS_ENABLED(CONFIG_MICROCODE_LATE_FORCE_MINREV); module_param(force_minrev, bool, S_IRUSR | S_IWUSR); @@ -84,6 +84,9 @@ static bool amd_check_current_patch_level(void) u32 lvl, dummy, i; u32 *levels; + if (x86_cpuid_vendor() != X86_VENDOR_AMD) + return false; + native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy); levels = final_levels; @@ -95,27 +98,29 @@ static bool amd_check_current_patch_level(void) return false; } -static bool __init check_loader_disabled_bsp(void) +bool __init microcode_loader_disabled(void) { - static const char *__dis_opt_str = "dis_ucode_ldr"; - const char *cmdline = boot_command_line; - const char *option = __dis_opt_str; + if (dis_ucode_ldr) + return true; /* - * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not - * completely accurate as xen pv guests don't see that CPUID bit set but - * that's good enough as they don't land on the BSP path anyway. + * Disable when: + * + * 1) The CPU does not support CPUID. + * + * 2) Bit 31 in CPUID[1]:ECX is clear + * The bit is reserved for hypervisor use. This is still not + * completely accurate as XEN PV guests don't see that CPUID bit + * set, but that's good enough as they don't land on the BSP + * path anyway. + * + * 3) Certain AMD patch levels are not allowed to be + * overwritten. */ - if (native_cpuid_ecx(1) & BIT(31)) - return true; - - if (x86_cpuid_vendor() == X86_VENDOR_AMD) { - if (amd_check_current_patch_level()) - return true; - } - - if (cmdline_find_option_bool(cmdline, option) <= 0) - dis_ucode_ldr = false; + if (!have_cpuid_p() || + native_cpuid_ecx(1) & BIT(31) || + amd_check_current_patch_level()) + dis_ucode_ldr = true; return dis_ucode_ldr; } @@ -125,7 +130,10 @@ void __init load_ucode_bsp(void) unsigned int cpuid_1_eax; bool intel = true; - if (!have_cpuid_p()) + if (cmdline_find_option_bool(boot_command_line, "dis_ucode_ldr") > 0) + dis_ucode_ldr = true; + + if (microcode_loader_disabled()) return; cpuid_1_eax = native_cpuid_eax(1); @@ -146,9 +154,6 @@ void __init load_ucode_bsp(void) return; } - if (check_loader_disabled_bsp()) - return; - if (intel) load_ucode_intel_bsp(&early_data); else @@ -159,6 +164,11 @@ void load_ucode_ap(void) { unsigned int cpuid_1_eax; + /* + * Can't use microcode_loader_disabled() here - .init section + * hell. It doesn't have to either - the BSP variant must've + * parsed cmdline already anyway. + */ if (dis_ucode_ldr) return; @@ -810,7 +820,7 @@ static int __init microcode_init(void) struct cpuinfo_x86 *c = &boot_cpu_data; int error; - if (dis_ucode_ldr) + if (microcode_loader_disabled()) return -EINVAL; if (c->x86_vendor == X86_VENDOR_INTEL) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 819199bc0119b2..2a397da43923ba 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -389,7 +389,7 @@ static int __init save_builtin_microcode(void) if (xchg(&ucode_patch_va, NULL) != UCODE_BSP_LOADED) return 0; - if (dis_ucode_ldr || boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + if (microcode_loader_disabled() || boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return 0; uci.mc = get_microcode_blob(&uci, true); diff --git a/arch/x86/kernel/cpu/microcode/internal.h b/arch/x86/kernel/cpu/microcode/internal.h index 5df621752fefac..50a9702ae4e2b5 100644 --- a/arch/x86/kernel/cpu/microcode/internal.h +++ b/arch/x86/kernel/cpu/microcode/internal.h @@ -94,7 +94,6 @@ static inline unsigned int x86_cpuid_family(void) return x86_family(eax); } -extern bool dis_ucode_ldr; extern bool force_minrev; #ifdef CONFIG_CPU_SUP_AMD diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index de001b2146abf3..375f2d7f1762d4 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -145,10 +145,6 @@ void __init __no_stack_protector mk_early_pgtbl_32(void) *ptr = (unsigned long)ptep + PAGE_OFFSET; #ifdef CONFIG_MICROCODE_INITRD32 - /* Running on a hypervisor? */ - if (native_cpuid_ecx(1) & BIT(31)) - return; - params = (struct boot_params *)__pa_nodebug(&boot_params); if (!params->hdr.ramdisk_size || !params->hdr.ramdisk_image) return; From 936b73feab5cd7eae8fe3d08a7ac9b1f8ac68042 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Mon, 28 Apr 2025 11:35:55 -0700 Subject: [PATCH 1447/2924] drm/i915/slpc: Balance the inc/dec for num_waiters As seen in some recent failures, SLPC num_waiters value is < 0. This happens because the inc/dec are not balanced. We should skip decrement for the same conditions as the increment. Currently, we do that for power saving profile mode. This patch also ensures that num_waiters is incremented in the case min_softlimit is at boost freq. It ensures that we don't reduce the frequency while this request is in flight. v2: Add Fixes tags Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13598 Fixes: f864a29afc32 ("drm/i915/slpc: Optmize waitboost for SLPC") Fixes: 4a82ceb04ad4 ("drm/i915/slpc: Add sysfs for SLPC power profiles") Cc: Sk Anirban Reviewed-by: Sk Anirban Signed-off-by: Vinay Belgaumkar Signed-off-by: Daniele Ceraolo Spurio Link: https://lore.kernel.org/r/20250428183555.3250021-1-vinay.belgaumkar@intel.com (cherry picked from commit d26e55085f4b7a63677670db827541209257b313) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_rps.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 64e9317f58fb6f..71ee01d9ef642c 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1001,6 +1001,10 @@ void intel_rps_dec_waiters(struct intel_rps *rps) if (rps_uses_slpc(rps)) { slpc = rps_to_slpc(rps); + /* Don't decrement num_waiters for req where increment was skipped */ + if (slpc->power_profile == SLPC_POWER_PROFILES_POWER_SAVING) + return; + intel_guc_slpc_dec_waiters(slpc); } else { atomic_dec(&rps->num_waiters); @@ -1029,11 +1033,15 @@ void intel_rps_boost(struct i915_request *rq) if (slpc->power_profile == SLPC_POWER_PROFILES_POWER_SAVING) return; - if (slpc->min_freq_softlimit >= slpc->boost_freq) - return; - /* Return if old value is non zero */ if (!atomic_fetch_inc(&slpc->num_waiters)) { + /* + * Skip queuing boost work if frequency is already boosted, + * but still increment num_waiters. + */ + if (slpc->min_freq_softlimit >= slpc->boost_freq) + return; + GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", rq->fence.context, rq->fence.seqno); queue_work(rps_to_gt(rps)->i915->unordered_wq, From b19fa45715ce9cfcc597ed140df31115e969b39d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 5 May 2025 07:20:52 +0200 Subject: [PATCH 1448/2924] ASoC: mediatek: mt8188-mt6359: select CONFIG_SND_SOC_MT6359_ACCDET The driver support was added without selecting the codec, which leads to a link failure: aarch64-linux-ld: sound/soc/mediatek/mt8188/mt8188-mt6359.o: in function `mt8188_mt6359_init': mt8188-mt6359.c:(.text+0x19f0): undefined reference to `mt6359_accdet_enable_jack_detect' Fixes: f35d834d67ad ("ASoC: mediatek: mt8188-mt6359: Add accdet headset jack detect support") Signed-off-by: Arnd Bergmann Reviewed-by: AngeloGioacchino Del Regno Link: https://patch.msgid.link/20250505052106.1811802-1-arnd@kernel.org Signed-off-by: Mark Brown --- sound/soc/mediatek/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/mediatek/Kconfig b/sound/soc/mediatek/Kconfig index 3033e2d3fe1684..e148d4d9416011 100644 --- a/sound/soc/mediatek/Kconfig +++ b/sound/soc/mediatek/Kconfig @@ -229,6 +229,7 @@ config SND_SOC_MT8188_MT6359 tristate "ASoC Audio driver for MT8188 with MT6359 and I2S codecs" depends on SND_SOC_MT8188 && MTK_PMIC_WRAP depends on I2C + select SND_SOC_MT6359_ACCDET select SND_SOC_MT6359 select SND_SOC_HDMI_CODEC select SND_SOC_DMIC From a73fa3690a1f3014d6677e368dce4e70767a6ba2 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 2 May 2025 13:10:35 +0200 Subject: [PATCH 1449/2924] spi: loopback-test: Do not split 1024-byte hexdumps spi_test_print_hex_dump() prints buffers holding less than 1024 bytes in full. Larger buffers are truncated: only the first 512 and the last 512 bytes are printed, separated by a truncation message. The latter is confusing in case the buffer holds exactly 1024 bytes, as all data is printed anyway. Fix this by printing buffers holding up to and including 1024 bytes in full. Fixes: 84e0c4e5e2c4ef42 ("spi: add loopback test driver to allow for spi_master regression tests") Signed-off-by: Geert Uytterhoeven Link: https://patch.msgid.link/37ee1bc90c6554c9347040adabf04188c8f704aa.1746184171.git.geert+renesas@glider.be Signed-off-by: Mark Brown --- drivers/spi/spi-loopback-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-loopback-test.c b/drivers/spi/spi-loopback-test.c index 31a878d9458d95..7740f94847a883 100644 --- a/drivers/spi/spi-loopback-test.c +++ b/drivers/spi/spi-loopback-test.c @@ -420,7 +420,7 @@ MODULE_LICENSE("GPL"); static void spi_test_print_hex_dump(char *pre, const void *ptr, size_t len) { /* limit the hex_dump */ - if (len < 1024) { + if (len <= 1024) { print_hex_dump(KERN_INFO, pre, DUMP_PREFIX_OFFSET, 16, 1, ptr, len, 0); From 0f67578587bb9e5a8eecfcdf6b8a501b5bd90526 Mon Sep 17 00:00:00 2001 From: Christian Hewitt Date: Sat, 3 May 2025 08:44:43 +0000 Subject: [PATCH 1450/2924] arm64: dts: amlogic: dreambox: fix missing clkc_audio node Add the clkc_audio node to fix audio support on Dreambox One/Two. Fixes: 83a6f4c62cb1 ("arm64: dts: meson: add initial support for Dreambox One/Two") CC: stable@vger.kernel.org Suggested-by: Emanuel Strobel Signed-off-by: Christian Hewitt Reviewed-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20250503084443.3704866-1-christianshewitt@gmail.com Signed-off-by: Neil Armstrong --- arch/arm64/boot/dts/amlogic/meson-g12b-dreambox.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-dreambox.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-dreambox.dtsi index de35fa2d7a6de3..8e3e3354ed67a9 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12b-dreambox.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12b-dreambox.dtsi @@ -116,6 +116,10 @@ status = "okay"; }; +&clkc_audio { + status = "okay"; +}; + &frddr_a { status = "okay"; }; From eb16b3727c05ed36420c90eca1e8f0e279514c1c Mon Sep 17 00:00:00 2001 From: Nylon Chen Date: Fri, 11 Apr 2025 15:38:49 +0800 Subject: [PATCH 1451/2924] riscv: misaligned: Add handling for ZCB instructions Add support for the Zcb extension's compressed half-word instructions (C.LHU, C.LH, and C.SH) in the RISC-V misaligned access trap handler. Signed-off-by: Zong Li Signed-off-by: Nylon Chen Fixes: 956d705dd279 ("riscv: Unaligned load/store handling for M_MODE") Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250411073850.3699180-2-nylon.chen@sifive.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/traps_misaligned.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 4354c87c0376fd..dde5d11dc1b50d 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -88,6 +88,13 @@ #define INSN_MATCH_C_FSWSP 0xe002 #define INSN_MASK_C_FSWSP 0xe003 +#define INSN_MATCH_C_LHU 0x8400 +#define INSN_MASK_C_LHU 0xfc43 +#define INSN_MATCH_C_LH 0x8440 +#define INSN_MASK_C_LH 0xfc43 +#define INSN_MATCH_C_SH 0x8c00 +#define INSN_MASK_C_SH 0xfc43 + #define INSN_LEN(insn) ((((insn) & 0x3) < 0x3) ? 2 : 4) #if defined(CONFIG_64BIT) @@ -431,6 +438,13 @@ static int handle_scalar_misaligned_load(struct pt_regs *regs) fp = 1; len = 4; #endif + } else if ((insn & INSN_MASK_C_LHU) == INSN_MATCH_C_LHU) { + len = 2; + insn = RVC_RS2S(insn) << SH_RD; + } else if ((insn & INSN_MASK_C_LH) == INSN_MATCH_C_LH) { + len = 2; + shift = 8 * (sizeof(ulong) - len); + insn = RVC_RS2S(insn) << SH_RD; } else { regs->epc = epc; return -1; @@ -530,6 +544,9 @@ static int handle_scalar_misaligned_store(struct pt_regs *regs) len = 4; val.data_ulong = GET_F32_RS2C(insn, regs); #endif + } else if ((insn & INSN_MASK_C_SH) == INSN_MATCH_C_SH) { + len = 2; + val.data_ulong = GET_RS2S(insn, regs); } else { regs->epc = epc; return -1; From f5c84eff634ba003326aa034c414e2a9dcb7c6a7 Mon Sep 17 00:00:00 2001 From: Lizhi Xu Date: Mon, 28 Apr 2025 22:36:26 +0800 Subject: [PATCH 1452/2924] loop: Add sanity check for read/write_iter Some file systems do not support read_iter/write_iter, such as selinuxfs in this issue. So before calling them, first confirm that the interface is supported and then call it. It is releavant in that vfs_iter_read/write have the check, and removal of their used caused szybot to be able to hit this issue. Fixes: f2fed441c69b ("loop: stop using vfs_iter__{read,write} for buffered I/O") Reported-by: syzbot+6af973a3b8dfd2faefdc@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=6af973a3b8dfd2faefdc Signed-off-by: Lizhi Xu Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250428143626.3318717-1-lizhi.xu@windriver.com Signed-off-by: Jens Axboe --- drivers/block/loop.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 46cba261075f23..b8ba7de0875395 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -505,6 +505,17 @@ static void loop_assign_backing_file(struct loop_device *lo, struct file *file) lo->lo_min_dio_size = loop_query_min_dio_size(lo); } +static int loop_check_backing_file(struct file *file) +{ + if (!file->f_op->read_iter) + return -EINVAL; + + if ((file->f_mode & FMODE_WRITE) && !file->f_op->write_iter) + return -EINVAL; + + return 0; +} + /* * loop_change_fd switched the backing store of a loopback device to * a new file. This is useful for operating system installers to free up @@ -526,6 +537,10 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, if (!file) return -EBADF; + error = loop_check_backing_file(file); + if (error) + return error; + /* suppress uevents while reconfiguring the device */ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1); @@ -963,6 +978,14 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode, if (!file) return -EBADF; + + if ((mode & BLK_OPEN_WRITE) && !file->f_op->write_iter) + return -EINVAL; + + error = loop_check_backing_file(file); + if (error) + return error; + is_loop = is_loop_device(file); /* This is safe, since we have a reference from open(). */ From 94cff94634e506a4a44684bee1875d2dbf782722 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 4 Apr 2025 15:31:16 +0200 Subject: [PATCH 1453/2924] clocksource/i8253: Use raw_spinlock_irqsave() in clockevent_i8253_disable() On x86 during boot, clockevent_i8253_disable() can be invoked via x86_late_time_init -> hpet_time_init() -> pit_timer_init() which happens with enabled interrupts. If some of the old i8253 hardware is actually used then lockdep will notice that i8253_lock is used in hard interrupt context. This causes lockdep to complain because it observed the lock being acquired with interrupts enabled and in hard interrupt context. Make clockevent_i8253_disable() acquire the lock with raw_spinlock_irqsave() to cure this. [ tglx: Massage change log and use guard() ] Fixes: c8c4076723dac ("x86/timer: Skip PIT initialization on modern chipsets") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20250404133116.p-XRWJXf@linutronix.de --- drivers/clocksource/i8253.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/clocksource/i8253.c b/drivers/clocksource/i8253.c index 39f7c2d736d169..b603c25f3dfaac 100644 --- a/drivers/clocksource/i8253.c +++ b/drivers/clocksource/i8253.c @@ -103,7 +103,7 @@ int __init clocksource_i8253_init(void) #ifdef CONFIG_CLKEVT_I8253 void clockevent_i8253_disable(void) { - raw_spin_lock(&i8253_lock); + guard(raw_spinlock_irqsave)(&i8253_lock); /* * Writing the MODE register should stop the counter, according to @@ -132,8 +132,6 @@ void clockevent_i8253_disable(void) outb_p(0, PIT_CH0); outb_p(0x30, PIT_MODE); - - raw_spin_unlock(&i8253_lock); } static int pit_shutdown(struct clock_event_device *evt) From 0db8a9a943e9b651952a62e6e985effb4161ac5e Mon Sep 17 00:00:00 2001 From: Konstantin Shkolnyy Date: Fri, 18 Apr 2025 10:20:14 -0500 Subject: [PATCH 1454/2924] s390/configs: Enable VDPA on Nvidia ConnectX-6 network card ConnectX-6 is the first VDPA-capable NIC. For earlier NICs, Nvidia implements a VDPA emulation in s/w, which hasn't been validated on s390. Add options necessary for VDPA to work. These options are also added because Fedora has them: CONFIG_VDPA_SIM CONFIG_VDPA_SIM_NET CONFIG_VDPA_SIM_BLOCK CONFIG_VDPA_USER CONFIG_VP_VDPA Signed-off-by: Konstantin Shkolnyy Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 8 ++++++++ arch/s390/configs/defconfig | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 6f2c9ce1b15484..68ba3bedfe00e1 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -628,8 +628,16 @@ CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_MEM=m CONFIG_VIRTIO_INPUT=y +CONFIG_VDPA=m +CONFIG_VDPA_SIM=m +CONFIG_VDPA_SIM_NET=m +CONFIG_VDPA_SIM_BLOCK=m +CONFIG_VDPA_USER=m +CONFIG_MLX5_VDPA_NET=m +CONFIG_VP_VDPA=m CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m +CONFIG_VHOST_VDPA=m CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index f18a7d97ac216d..57b34f9fabf57c 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -618,8 +618,16 @@ CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_MEM=m CONFIG_VIRTIO_INPUT=y +CONFIG_VDPA=m +CONFIG_VDPA_SIM=m +CONFIG_VDPA_SIM_NET=m +CONFIG_VDPA_SIM_BLOCK=m +CONFIG_VDPA_USER=m +CONFIG_MLX5_VDPA_NET=m +CONFIG_VP_VDPA=m CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m +CONFIG_VHOST_VDPA=m CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y From d2b8111c22d7d82f28df574acc54aec44ce3d45c Mon Sep 17 00:00:00 2001 From: Konstantin Shkolnyy Date: Fri, 18 Apr 2025 10:20:15 -0500 Subject: [PATCH 1455/2924] s390/configs: Enable options required for TC flow offload While testing Open vSwitch with Nvidia ConnectX-6 NIC, it was noticed that it didn't offload TC flows into the NIC, and its log contained many messages such as: "failed to offload flow: No such file or directory: " and, upon enabling more versose logging, additionally: "received NAK error=2 - TC classifier not found" The options enabled here are listed as requirements in Nvidia online documentation, among other options that were already enabled. Now all options listed by Nvidia are enabled.. This option is also added because Fedora has it: CONFIG_NET_EMATCH Signed-off-by: Konstantin Shkolnyy Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 6 ++++++ arch/s390/configs/defconfig | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 68ba3bedfe00e1..193d251b4c8fc6 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -395,6 +395,9 @@ CONFIG_CLS_U32_MARK=y CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_CGROUP=y CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_CLS_MATCHALL=m +CONFIG_NET_EMATCH=y CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m @@ -405,6 +408,9 @@ CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m +CONFIG_NET_ACT_VLAN=m +CONFIG_NET_ACT_TUNNEL_KEY=m +CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GATE=m CONFIG_NET_TC_SKB_EXT=y CONFIG_DNS_RESOLVER=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 57b34f9fabf57c..ec8fdeba4f3e6e 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -385,6 +385,9 @@ CONFIG_CLS_U32_MARK=y CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_CGROUP=y CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_CLS_MATCHALL=m +CONFIG_NET_EMATCH=y CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m @@ -395,6 +398,9 @@ CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m +CONFIG_NET_ACT_VLAN=m +CONFIG_NET_ACT_TUNNEL_KEY=m +CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GATE=m CONFIG_NET_TC_SKB_EXT=y CONFIG_DNS_RESOLVER=y From ae952eea6f4a7e2193f8721a5366049946e012e7 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 24 Apr 2025 17:07:01 +0200 Subject: [PATCH 1456/2924] s390/entry: Fix last breaking event handling in case of stack corruption In case of stack corruption stack_invalid() is called and the expectation is that register r10 contains the last breaking event address. This dependency is quite subtle and broke a couple of years ago without that anybody noticed. Fix this by getting rid of the dependency and read the last breaking event address from lowcore. Fixes: 56e62a737028 ("s390: convert to generic entry") Acked-by: Ilya Leoshkevich Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/kernel/entry.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index dd291c9ad6a61b..9980c17ba22d95 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -602,7 +602,8 @@ SYM_CODE_START(stack_invalid) stmg %r0,%r7,__PT_R0(%r11) stmg %r8,%r9,__PT_PSW(%r11) mvc __PT_R8(64,%r11),0(%r14) - stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2 + GET_LC %r2 + mvc __PT_ORIG_GPR2(8,%r11),__LC_PGM_LAST_BREAK(%r2) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs jg kernel_stack_invalid From 833542b3e3d23f640aef6569ba1fd641bc195fa0 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Tue, 29 Apr 2025 18:21:01 +0200 Subject: [PATCH 1457/2924] s390/dcssblk: Fix build error with CONFIG_DAX=m and CONFIG_DCSSBLK=y After commit 653d7825c149 ("dcssblk: mark DAX broken, remove FS_DAX_LIMITED support") moved the "select DAX" from config DCSSBLK to the new config DCSSBLK_DAX, randconfig tests could result in build errors like this: s390-linux-ld: drivers/s390/block/dcssblk.o: in function `dcssblk_shared_store': drivers/s390/block/dcssblk.c:417: undefined reference to `kill_dax' s390-linux-ld: drivers/s390/block/dcssblk.c:418: undefined reference to `put_dax' This is because it's now possible to have CONFIG_DCSSBLK=y, but CONFIG_DAX=m. Fix this by adding "depends on DAX || DAX=n" to config DCSSBLK, to make it explicit that we want either no DAX, or the same "y/m" for both config DAX and DCSSBLK, similar to config BLK_DEV_DM. This also requires removing the "select DAX" from config DCSSBLK_DAX, or else there would be a recursive dependency detected. DCSSBLK_DAX is marked as BROKEN at the moment, and won't work well with DAX anyway, so it doesn't really matter if it is selected. Fixes: 653d7825c149 ("dcssblk: mark DAX broken, remove FS_DAX_LIMITED support") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202504291604.pvjonhWX-lkp@intel.com/ Signed-off-by: Gerald Schaefer Signed-off-by: Heiko Carstens --- drivers/s390/block/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig index 4bfe469c04aaba..8c1c908d2c6e72 100644 --- a/drivers/s390/block/Kconfig +++ b/drivers/s390/block/Kconfig @@ -5,7 +5,7 @@ comment "S/390 block device drivers" config DCSSBLK def_tristate m prompt "DCSSBLK support" - depends on S390 && BLOCK + depends on S390 && BLOCK && (DAX || DAX=n) help Support for dcss block device @@ -14,7 +14,6 @@ config DCSSBLK_DAX depends on DCSSBLK # requires S390 ZONE_DEVICE support depends on BROKEN - select DAX prompt "DCSSBLK DAX support" help Enable DAX operation for the dcss block device From 3a47b1e3cea247857aa48cfe3d0a07e989e6c57d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 30 Apr 2025 16:41:01 +0200 Subject: [PATCH 1458/2924] s390: Update defconfigs Just the regular update of all defconfigs. Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 14 +++++--------- arch/s390/configs/defconfig | 10 +++------- arch/s390/configs/zfcpdump_defconfig | 1 - 3 files changed, 8 insertions(+), 17 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 193d251b4c8fc6..24b22f6a9e9959 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -38,7 +38,6 @@ CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y -# CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y @@ -92,7 +91,6 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y -CONFIG_ZSMALLOC=y CONFIG_ZSMALLOC_STAT=y CONFIG_SLAB_BUCKETS=y CONFIG_SLUB_STATS=y @@ -668,7 +666,6 @@ CONFIG_NILFS2_FS=m CONFIG_BCACHEFS_FS=y CONFIG_BCACHEFS_QUOTA=y CONFIG_BCACHEFS_POSIX_ACL=y -CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_VERITY=y @@ -738,11 +735,10 @@ CONFIG_NLS_UTF8=m CONFIG_DLM=m CONFIG_UNICODE=y CONFIG_PERSISTENT_KEYRINGS=y +CONFIG_BIG_KEYS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y -CONFIG_HARDENED_USERCOPY=y -CONFIG_FORTIFY_SOURCE=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_LOCKDOWN_LSM=y @@ -755,6 +751,8 @@ CONFIG_IMA=y CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y +CONFIG_FORTIFY_SOURCE=y +CONFIG_HARDENED_USERCOPY=y CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_USER=m # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set @@ -770,7 +768,6 @@ CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m @@ -815,7 +812,6 @@ CONFIG_CRYPTO_SHA3_512_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_DES_S390=m -CONFIG_CRYPTO_CHACHA_S390=m CONFIG_CRYPTO_HMAC_S390=m CONFIG_ZCRYPT=m CONFIG_PKEY=m @@ -826,9 +822,9 @@ CONFIG_PKEY_UV=m CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_SYSTEM_BLACKLIST_KEYRING=y +CONFIG_CRYPTO_KRB5=m +CONFIG_CRYPTO_KRB5_SELFTESTS=y CONFIG_CORDIC=m -CONFIG_CRYPTO_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_RANDOM32_SELFTEST=y CONFIG_XZ_DEC_MICROLZMA=y CONFIG_DMA_CMA=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index ec8fdeba4f3e6e..2b8b42d569bc93 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -36,7 +36,6 @@ CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y -# CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y @@ -86,7 +85,6 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y -CONFIG_ZSMALLOC=y CONFIG_ZSMALLOC_STAT=y CONFIG_SLAB_BUCKETS=y # CONFIG_COMPAT_BRK is not set @@ -655,7 +653,6 @@ CONFIG_NILFS2_FS=m CONFIG_BCACHEFS_FS=m CONFIG_BCACHEFS_QUOTA=y CONFIG_BCACHEFS_POSIX_ACL=y -CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_VERITY=y @@ -725,6 +722,7 @@ CONFIG_NLS_UTF8=m CONFIG_DLM=m CONFIG_UNICODE=y CONFIG_PERSISTENT_KEYRINGS=y +CONFIG_BIG_KEYS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y @@ -756,7 +754,6 @@ CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m @@ -802,7 +799,6 @@ CONFIG_CRYPTO_SHA3_512_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_DES_S390=m -CONFIG_CRYPTO_CHACHA_S390=m CONFIG_CRYPTO_HMAC_S390=m CONFIG_ZCRYPT=m CONFIG_PKEY=m @@ -813,10 +809,10 @@ CONFIG_PKEY_UV=m CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_SYSTEM_BLACKLIST_KEYRING=y +CONFIG_CRYPTO_KRB5=m +CONFIG_CRYPTO_KRB5_SELFTESTS=y CONFIG_CORDIC=m CONFIG_PRIME_NUMBERS=m -CONFIG_CRYPTO_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_XZ_DEC_MICROLZMA=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 853b2326a171bb..8163c1702720bc 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -70,7 +70,6 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF4=y CONFIG_DEBUG_FS=y CONFIG_PANIC_ON_OOPS=y -# CONFIG_SCHED_DEBUG is not set CONFIG_RCU_CPU_STALL_TIMEOUT=60 # CONFIG_RCU_TRACE is not set # CONFIG_FTRACE is not set From 6328bdc988d23201c700e1e7e04eb05a1149ac1e Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Mon, 5 May 2025 15:56:29 +0300 Subject: [PATCH 1459/2924] usb: xhci: Don't trust the EP Context cycle bit when moving HW dequeue VIA VL805 doesn't bother updating the EP Context cycle bit when the endpoint halts. This is seen by patching xhci_move_dequeue_past_td() to print the cycle bits of the EP Context and the TRB at hw_dequeue and then disconnecting a flash drive while reading it. Actual cycle state is random as expected, but the EP Context bit is always 1. This means that the cycle state produced by this function is wrong half the time, and then the endpoint stops working. Work around it by looking at the cycle bit of TD's end_trb instead of believing the Endpoint or Stream Context. Specifically: - rename cycle_found to hw_dequeue_found to avoid confusion - initialize new_cycle from td->end_trb instead of hw_dequeue - switch new_cycle toggling to happen after end_trb is found Now a workload which regularly stalls the device works normally for a few hours and clearly demonstrates the HW bug - the EP Context bit is not updated in a new cycle until Set TR Dequeue overwrites it: [ +0,000298] sd 10:0:0:0: [sdc] Attached SCSI disk [ +0,011758] cycle bits: TRB 1 EP Ctx 1 [ +5,947138] cycle bits: TRB 1 EP Ctx 1 [ +0,065731] cycle bits: TRB 0 EP Ctx 1 [ +0,064022] cycle bits: TRB 0 EP Ctx 0 [ +0,063297] cycle bits: TRB 0 EP Ctx 0 [ +0,069823] cycle bits: TRB 0 EP Ctx 0 [ +0,063390] cycle bits: TRB 1 EP Ctx 0 [ +0,063064] cycle bits: TRB 1 EP Ctx 1 [ +0,062293] cycle bits: TRB 1 EP Ctx 1 [ +0,066087] cycle bits: TRB 0 EP Ctx 1 [ +0,063636] cycle bits: TRB 0 EP Ctx 0 [ +0,066360] cycle bits: TRB 0 EP Ctx 0 Also tested on the buggy ASM1042 which moves EP Context dequeue to the next TRB after errors, one problem case addressed by the rework that implemented this loop. In this case hw_dequeue can be enqueue, so simply picking the cycle bit of TRB at hw_dequeue wouldn't work. Commit 5255660b208a ("xhci: add quirk for host controllers that don't update endpoint DCS") tried to solve the stale cycle problem, but it was more complex and got reverted due to a reported issue. Cc: Jonathan Bell Cc: Oliver Neukum Signed-off-by: Michal Pecio Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250505125630.561699-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index b906bc2eea5f6f..423bf36495705e 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -699,7 +699,7 @@ static int xhci_move_dequeue_past_td(struct xhci_hcd *xhci, int new_cycle; dma_addr_t addr; u64 hw_dequeue; - bool cycle_found = false; + bool hw_dequeue_found = false; bool td_last_trb_found = false; u32 trb_sct = 0; int ret; @@ -715,25 +715,24 @@ static int xhci_move_dequeue_past_td(struct xhci_hcd *xhci, hw_dequeue = xhci_get_hw_deq(xhci, dev, ep_index, stream_id); new_seg = ep_ring->deq_seg; new_deq = ep_ring->dequeue; - new_cycle = hw_dequeue & 0x1; + new_cycle = le32_to_cpu(td->end_trb->generic.field[3]) & TRB_CYCLE; /* - * We want to find the pointer, segment and cycle state of the new trb - * (the one after current TD's end_trb). We know the cycle state at - * hw_dequeue, so walk the ring until both hw_dequeue and end_trb are - * found. + * Walk the ring until both the next TRB and hw_dequeue are found (don't + * move hw_dequeue back if it went forward due to a HW bug). Cycle state + * is loaded from a known good TRB, track later toggles to maintain it. */ do { - if (!cycle_found && xhci_trb_virt_to_dma(new_seg, new_deq) + if (!hw_dequeue_found && xhci_trb_virt_to_dma(new_seg, new_deq) == (dma_addr_t)(hw_dequeue & ~0xf)) { - cycle_found = true; + hw_dequeue_found = true; if (td_last_trb_found) break; } if (new_deq == td->end_trb) td_last_trb_found = true; - if (cycle_found && trb_is_link(new_deq) && + if (td_last_trb_found && trb_is_link(new_deq) && link_trb_toggles_cycle(new_deq)) new_cycle ^= 0x1; @@ -745,7 +744,7 @@ static int xhci_move_dequeue_past_td(struct xhci_hcd *xhci, return -EINVAL; } - } while (!cycle_found || !td_last_trb_found); + } while (!hw_dequeue_found || !td_last_trb_found); /* Don't update the ring cycle state for the producer (us). */ addr = xhci_trb_virt_to_dma(new_seg, new_deq); From cab63934c33b12c0d1e9f4da7450928057f2c142 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 5 May 2025 15:56:30 +0300 Subject: [PATCH 1460/2924] xhci: dbc: Avoid event polling busyloop if pending rx transfers are inactive. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Event polling delay is set to 0 if there are any pending requests in either rx or tx requests lists. Checking for pending requests does not work well for "IN" transfers as the tty driver always queues requests to the list and TRBs to the ring, preparing to receive data from the host. This causes unnecessary busylooping and cpu hogging. Only set the event polling delay to 0 if there are pending tx "write" transfers, or if it was less than 10ms since last active data transfer in any direction. Cc: Łukasz Bartosik Fixes: fb18e5bb9660 ("xhci: dbc: poll at different rate depending on data transfer activity") Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250505125630.561699-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-dbgcap.c | 19 ++++++++++++++++--- drivers/usb/host/xhci-dbgcap.h | 3 +++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c index fd7895b24367db..0d4ce5734165ed 100644 --- a/drivers/usb/host/xhci-dbgcap.c +++ b/drivers/usb/host/xhci-dbgcap.c @@ -823,6 +823,7 @@ static enum evtreturn xhci_dbc_do_handle_events(struct xhci_dbc *dbc) { dma_addr_t deq; union xhci_trb *evt; + enum evtreturn ret = EVT_DONE; u32 ctrl, portsc; bool update_erdp = false; @@ -909,6 +910,7 @@ static enum evtreturn xhci_dbc_do_handle_events(struct xhci_dbc *dbc) break; case TRB_TYPE(TRB_TRANSFER): dbc_handle_xfer_event(dbc, evt); + ret = EVT_XFER_DONE; break; default: break; @@ -927,7 +929,7 @@ static enum evtreturn xhci_dbc_do_handle_events(struct xhci_dbc *dbc) lo_hi_writeq(deq, &dbc->regs->erdp); } - return EVT_DONE; + return ret; } static void xhci_dbc_handle_events(struct work_struct *work) @@ -936,6 +938,7 @@ static void xhci_dbc_handle_events(struct work_struct *work) struct xhci_dbc *dbc; unsigned long flags; unsigned int poll_interval; + unsigned long busypoll_timelimit; dbc = container_of(to_delayed_work(work), struct xhci_dbc, event_work); poll_interval = dbc->poll_interval; @@ -954,11 +957,21 @@ static void xhci_dbc_handle_events(struct work_struct *work) dbc->driver->disconnect(dbc); break; case EVT_DONE: - /* set fast poll rate if there are pending data transfers */ + /* + * Set fast poll rate if there are pending out transfers, or + * a transfer was recently processed + */ + busypoll_timelimit = dbc->xfer_timestamp + + msecs_to_jiffies(DBC_XFER_INACTIVITY_TIMEOUT); + if (!list_empty(&dbc->eps[BULK_OUT].list_pending) || - !list_empty(&dbc->eps[BULK_IN].list_pending)) + time_is_after_jiffies(busypoll_timelimit)) poll_interval = 0; break; + case EVT_XFER_DONE: + dbc->xfer_timestamp = jiffies; + poll_interval = 0; + break; default: dev_info(dbc->dev, "stop handling dbc events\n"); return; diff --git a/drivers/usb/host/xhci-dbgcap.h b/drivers/usb/host/xhci-dbgcap.h index 9dc8f4d8077cc4..47ac72c2286d9a 100644 --- a/drivers/usb/host/xhci-dbgcap.h +++ b/drivers/usb/host/xhci-dbgcap.h @@ -96,6 +96,7 @@ struct dbc_ep { #define DBC_WRITE_BUF_SIZE 8192 #define DBC_POLL_INTERVAL_DEFAULT 64 /* milliseconds */ #define DBC_POLL_INTERVAL_MAX 5000 /* milliseconds */ +#define DBC_XFER_INACTIVITY_TIMEOUT 10 /* milliseconds */ /* * Private structure for DbC hardware state: */ @@ -142,6 +143,7 @@ struct xhci_dbc { enum dbc_state state; struct delayed_work event_work; unsigned int poll_interval; /* ms */ + unsigned long xfer_timestamp; unsigned resume_required:1; struct dbc_ep eps[2]; @@ -187,6 +189,7 @@ struct dbc_request { enum evtreturn { EVT_ERR = -1, EVT_DONE, + EVT_XFER_DONE, EVT_GSER, EVT_DISC, }; From 844f766e02d0aba973d78f3ade38ad8bae399347 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 4 May 2025 15:01:34 -0400 Subject: [PATCH 1461/2924] bcachefs: Improve want_cached_ptr() If promote target isn't set, rebalance should still leave a cached copy on the faster device. Fall back to foreground_target if it's set, or allow a cached copy on any device if neither are set. Signed-off-by: Kent Overstreet --- fs/bcachefs/extents.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index dca2b8425cc05f..e597fb9c98236c 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -1056,8 +1056,9 @@ bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1, struct bke static bool want_cached_ptr(struct bch_fs *c, struct bch_io_opts *opts, struct bch_extent_ptr *ptr) { - if (!opts->promote_target || - !bch2_dev_in_target(c, ptr->dev, opts->promote_target)) + unsigned target = opts->promote_target ?: opts->foreground_target; + + if (target && !bch2_dev_in_target(c, ptr->dev, target)) return false; struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev); From 50a7b899a0d806f961edadfc3357cb6679826795 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 4 May 2025 16:31:40 -0400 Subject: [PATCH 1462/2924] bcachefs: Ensure proper write alignment There was a buggy version of bcachefs-tools which picked misaligned bucket sizes when formatting, and we're also about to do dynamic block sizes - which will allow picking logical block size or physical block size of the device per-write, allowing for better compression ratios at the cost of slightly worse write performance (i.e. forcing the device to do RMW or extra buffering). To account for this, tweak bch2_alloc_sectors_start() to properly align open_buckets to the blocksize of the write we're about to do. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index effafc3e0ced44..7ec022e9361ae1 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1422,11 +1422,31 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans, wp->sectors_free = UINT_MAX; - open_bucket_for_each(c, &wp->ptrs, ob, i) + open_bucket_for_each(c, &wp->ptrs, ob, i) { + /* + * Ensure proper write alignment - either due to misaligned + * bucket sizes (from buggy bcachefs-tools), or writes that mix + * logical/physical alignment: + */ + struct bch_dev *ca = ob_dev(c, ob); + u64 offset = bucket_to_sector(ca, ob->bucket) + + ca->mi.bucket_size - + ob->sectors_free; + unsigned align = round_up(offset, block_sectors(c)) - offset; + + ob->sectors_free = max_t(int, 0, ob->sectors_free - align); + wp->sectors_free = min(wp->sectors_free, ob->sectors_free); + } wp->sectors_free = rounddown(wp->sectors_free, block_sectors(c)); + /* Did alignment use up space in an open_bucket? */ + if (unlikely(!wp->sectors_free)) { + bch2_alloc_sectors_done(c, wp); + goto retry; + } + BUG_ON(!wp->sectors_free || wp->sectors_free == UINT_MAX); return 0; From 7a69fa65718a7258aa89fca06b975f4357daeac3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 5 May 2025 14:13:21 -0400 Subject: [PATCH 1463/2924] bcachefs: Add missing barriers before wake_up_bit() wake_up() doesn't require a barrier - but wake_up_bit() does. This only affected non x86, and primarily lead to lost wakeups after btree node reads. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 9 ++++++++- fs/bcachefs/buckets.h | 1 + fs/bcachefs/ec.h | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 5fd4a58d2ad27d..60782f3e5aec84 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -41,6 +41,7 @@ void bch2_btree_node_io_unlock(struct btree *b) clear_btree_node_write_in_flight_inner(b); clear_btree_node_write_in_flight(b); + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_write_in_flight); } @@ -1400,6 +1401,7 @@ static void btree_node_read_work(struct work_struct *work) printbuf_exit(&buf); clear_btree_node_read_in_flight(b); + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); } @@ -1595,6 +1597,7 @@ static CLOSURE_CALLBACK(btree_node_read_all_replicas_done) printbuf_exit(&buf); clear_btree_node_read_in_flight(b); + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); } @@ -1721,6 +1724,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, set_btree_node_read_error(b); bch2_btree_lost_data(c, b->c.btree_id); clear_btree_node_read_in_flight(b); + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); printbuf_exit(&buf); return; @@ -2061,8 +2065,10 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b, u64 start if (new & (1U << BTREE_NODE_write_in_flight)) __bch2_btree_node_write(c, b, BTREE_WRITE_ALREADY_STARTED|type); - else + else { + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_write_in_flight); + } } static void btree_node_write_done(struct bch_fs *c, struct btree *b, u64 start_time) @@ -2175,6 +2181,7 @@ static void btree_node_write_endio(struct bio *bio) } clear_btree_node_write_in_flight_inner(b); + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_write_in_flight_inner); INIT_WORK(&wb->work, btree_node_write_work); queue_work(c->btree_io_complete_wq, &wb->work); diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 8d75b27a141885..af1532de4a3747 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -44,6 +44,7 @@ static inline void bucket_unlock(struct bucket *b) BUILD_BUG_ON(!((union ulong_byte_assert) { .ulong = 1UL << BUCKET_LOCK_BITNR }).byte); clear_bit_unlock(BUCKET_LOCK_BITNR, (void *) &b->lock); + smp_mb__after_atomic(); wake_up_bit((void *) &b->lock, BUCKET_LOCK_BITNR); } diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index 62d27e04d763f5..51893e1ee874f0 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -160,6 +160,7 @@ static inline void gc_stripe_unlock(struct gc_stripe *s) BUILD_BUG_ON(!((union ulong_byte_assert) { .ulong = 1UL << BUCKET_LOCK_BITNR }).byte); clear_bit_unlock(BUCKET_LOCK_BITNR, (void *) &s->lock); + smp_mb__after_atomic(); wake_up_bit((void *) &s->lock, BUCKET_LOCK_BITNR); } From aed4ccbf4595e08f3fa80c7faaf1d2188d61bc70 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 4 May 2025 18:46:16 -0400 Subject: [PATCH 1464/2924] bcachefs: fix hung task timeout in journal read Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_io.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 63cdf885c9e2a4..ded18a94ed021e 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -19,6 +19,7 @@ #include #include +#include void bch2_journal_pos_from_member_info_set(struct bch_fs *c) { @@ -1262,7 +1263,8 @@ int bch2_journal_read(struct bch_fs *c, degraded = true; } - closure_sync(&jlist.cl); + while (closure_sync_timeout(&jlist.cl, sysctl_hung_task_timeout_secs * HZ / 2)) + ; if (jlist.ret) return jlist.ret; From 157dbc4a321f5bb6f8b6c724d12ba720a90f1a7c Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 5 May 2025 19:31:48 +0200 Subject: [PATCH 1465/2924] KVM: arm64: Fix uninitialized memcache pointer in user_mem_abort() Commit fce886a60207 ("KVM: arm64: Plumb the pKVM MMU in KVM") made the initialization of the local memcache variable in user_mem_abort() conditional, leaving a codepath where it is used uninitialized via kvm_pgtable_stage2_map(). This can fail on any path that requires a stage-2 allocation without transition via a permission fault or dirty logging. Fix this by making sure that memcache is always valid. Fixes: fce886a60207 ("KVM: arm64: Plumb the pKVM MMU in KVM") Signed-off-by: Sebastian Ott Reviewed-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/kvmarm/3f5db4c7-ccce-fb95-595c-692fa7aad227@redhat.com/ Link: https://lore.kernel.org/r/20250505173148.33900-1-sebott@redhat.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/mmu.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 754f2fe0cc6738..eeda92330ade70 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1501,6 +1501,11 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } + if (!is_protected_kvm_enabled()) + memcache = &vcpu->arch.mmu_page_cache; + else + memcache = &vcpu->arch.pkvm_memcache; + /* * Permission faults just need to update the existing leaf entry, * and so normally don't require allocations from the memcache. The @@ -1510,13 +1515,11 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (!fault_is_perm || (logging_active && write_fault)) { int min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu); - if (!is_protected_kvm_enabled()) { - memcache = &vcpu->arch.mmu_page_cache; + if (!is_protected_kvm_enabled()) ret = kvm_mmu_topup_memory_cache(memcache, min_pages); - } else { - memcache = &vcpu->arch.pkvm_memcache; + else ret = topup_hyp_memcache(memcache, min_pages); - } + if (ret) return ret; } From 859c60276e12a3a18c65a3f9325e656a068c9b62 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 29 Apr 2025 12:43:26 +0100 Subject: [PATCH 1466/2924] KVM: arm64: Force HCR_EL2.xMO to 1 at all times in VHE mode We keep setting and clearing these bits depending on the role of the host kernel, mimicking what we do for nVHE. But that's actually pretty pointless, as we always want physical interrupts to make it to the host, at EL2. This has also two problems: - it prevents IRQs from being taken when these bits are cleared if the implementation has chosen to implement these bits as masks when HCR_EL2.{TGE,xMO}=={0,0} - it triggers a bad erratum on the AmpereOne HW, which catches fire on clearing these bits while an interrupt is being taken (AC03_CPU_36). Let's kill these two birds with a single stone, and permanently set the xMO bits when running VHE. This involves a bit of surgery on code paths that rely on flipping these bits on and off for other purposes. Note that the earliest setting of hcr_el2 (in the init_hcr_el2 macro) is left untouched as is runs extremely early, with interrupts disabled, and soon enough overwritten with the final value containing the xMO bits. Reported-by: D Scott Phillips Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20250429114326.3618875-1-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_arm.h | 2 +- arch/arm64/kvm/hyp/vgic-v3-sr.c | 36 +++++++++++++++++++------------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 974d72b5905b86..bba4b0e9309154 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -100,7 +100,7 @@ HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 | HCR_TID1) #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA) #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) -#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) +#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H | HCR_AMO | HCR_IMO | HCR_FMO) #define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM) #define MPAMHCR_HOST_FLAGS 0 diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index ed363aa3027e59..50aa8dbcae75b0 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -429,23 +429,27 @@ u64 __vgic_v3_get_gic_config(void) /* * To check whether we have a MMIO-based (GICv2 compatible) * CPU interface, we need to disable the system register - * view. To do that safely, we have to prevent any interrupt - * from firing (which would be deadly). + * view. * - * Note that this only makes sense on VHE, as interrupts are - * already masked for nVHE as part of the exception entry to - * EL2. - */ - if (has_vhe()) - flags = local_daif_save(); - - /* * Table 11-2 "Permitted ICC_SRE_ELx.SRE settings" indicates * that to be able to set ICC_SRE_EL1.SRE to 0, all the * interrupt overrides must be set. You've got to love this. + * + * As we always run VHE with HCR_xMO set, no extra xMO + * manipulation is required in that case. + * + * To safely disable SRE, we have to prevent any interrupt + * from firing (which would be deadly). This only makes sense + * on VHE, as interrupts are already masked for nVHE as part + * of the exception entry to EL2. */ - sysreg_clear_set(hcr_el2, 0, HCR_AMO | HCR_FMO | HCR_IMO); - isb(); + if (has_vhe()) { + flags = local_daif_save(); + } else { + sysreg_clear_set(hcr_el2, 0, HCR_AMO | HCR_FMO | HCR_IMO); + isb(); + } + write_gicreg(0, ICC_SRE_EL1); isb(); @@ -453,11 +457,13 @@ u64 __vgic_v3_get_gic_config(void) write_gicreg(sre, ICC_SRE_EL1); isb(); - sysreg_clear_set(hcr_el2, HCR_AMO | HCR_FMO | HCR_IMO, 0); - isb(); - if (has_vhe()) + if (has_vhe()) { local_daif_restore(flags); + } else { + sysreg_clear_set(hcr_el2, HCR_AMO | HCR_FMO | HCR_IMO, 0); + isb(); + } val = (val & ICC_SRE_EL1_SRE) ? 0 : (1ULL << 63); val |= read_gicreg(ICH_VTR_EL2); From 7af7cfbe78e23af0ac3cbe9b2b16da69f1412222 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 29 Apr 2025 12:41:16 +0100 Subject: [PATCH 1467/2924] KVM: arm64: Prevent userspace from disabling AArch64 support at any virtualisable EL A sorry excuse for a selftest is trying to disable AArch64 support. And yes, this goes as well as you can imagine. Let's forbid this sort of things. Normal userspace shouldn't get caught doing that. Signed-off-by: Marc Zyngier Reviewed-by: Ganapatrao Kulkarni Link: https://lore.kernel.org/r/20250429114117.3618800-2-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/sys_regs.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 005ad28f730681..5dde9285afc809 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1945,6 +1945,12 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, if ((hw_val & mpam_mask) == (user_val & mpam_mask)) user_val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + /* Fail the guest's request to disable the AA64 ISA at EL{0,1,2} */ + if (!FIELD_GET(ID_AA64PFR0_EL1_EL0, user_val) || + !FIELD_GET(ID_AA64PFR0_EL1_EL1, user_val) || + (vcpu_has_nv(vcpu) && !FIELD_GET(ID_AA64PFR0_EL1_EL2, user_val))) + return -EINVAL; + return set_id_reg(vcpu, rd, user_val); } From b60e285b6acdec6d24fbde36d59f6d806f91cdc6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 29 Apr 2025 12:41:17 +0100 Subject: [PATCH 1468/2924] KVM: arm64: selftest: Don't try to disable AArch64 support Trying to cut the branch you are sat on is pretty dumb. And so is trying to disable the instruction set you are executing on. Signed-off-by: Marc Zyngier Reviewed-by: Ganapatrao Kulkarni Link: https://lore.kernel.org/r/20250429114117.3618800-3-maz@kernel.org Signed-off-by: Oliver Upton --- tools/testing/selftests/kvm/arm64/set_id_regs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index 322b9d3b012559..57708de2075df7 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -129,10 +129,10 @@ static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0), REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 1), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 1), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 1), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 1), REG_FTR_END, }; From 9c618560994794d6f477e81f3b695fe799feec8a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 5 May 2025 15:52:57 -0400 Subject: [PATCH 1469/2924] bcachefs: Call bch2_fs_start before getting vfs superblock This reverts 1fdbe0b184c8 bcachefs: Make sure c->vfs_sb is set before starting fs switched up bch2_fs_get_tree() so that we got a superblock before calling bch2_fs_start, so that c->vfs_sb would always be initialized while the filesystem was active. This turned out not to be necessary, because blk_holder_ops were implemented using our own locking, not vfs locking. And this had the side effect of creating a super_block and doing our full recovery (including potentially fsck) before setting SB_BORN, which causes things like sync calls to hang until our recovery is finished. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 113db85b6ef9d0..b6801861c66f1c 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -2502,10 +2502,9 @@ static int bch2_fs_get_tree(struct fs_context *fc) bch2_opts_apply(&c->opts, opts); - /* - * need to initialise sb and set c->vfs_sb _before_ starting fs, - * for blk_holder_ops - */ + ret = bch2_fs_start(c); + if (ret) + goto err_stop_fs; sb = sget(fc->fs_type, NULL, bch2_set_super, fc->sb_flags|SB_NOSEC, c); ret = PTR_ERR_OR_ZERO(sb); @@ -2567,10 +2566,6 @@ static int bch2_fs_get_tree(struct fs_context *fc) sb->s_shrink->seeks = 0; - ret = bch2_fs_start(c); - if (ret) - goto err_put_super; - #ifdef CONFIG_UNICODE sb->s_encoding = c->cf_encoding; #endif From 3769478610135e82b262640252d90f6efb05be71 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 28 Apr 2025 16:29:54 -0700 Subject: [PATCH 1470/2924] sch_htb: make htb_deactivate() idempotent Alan reported a NULL pointer dereference in htb_next_rb_node() after we made htb_qlen_notify() idempotent. It turns out in the following case it introduced some regression: htb_dequeue_tree(): |-> fq_codel_dequeue() |-> qdisc_tree_reduce_backlog() |-> htb_qlen_notify() |-> htb_deactivate() |-> htb_next_rb_node() |-> htb_deactivate() For htb_next_rb_node(), after calling the 1st htb_deactivate(), the clprio[prio]->ptr could be already set to NULL, which means htb_next_rb_node() is vulnerable here. For htb_deactivate(), although we checked qlen before calling it, in case of qlen==0 after qdisc_tree_reduce_backlog(), we may call it again which triggers the warning inside. To fix the issues here, we need to: 1) Make htb_deactivate() idempotent, that is, simply return if we already call it before. 2) Make htb_next_rb_node() safe against ptr==NULL. Many thanks to Alan for testing and for the reproducer. Fixes: 5ba8b837b522 ("sch_htb: make htb_qlen_notify() idempotent") Reported-by: Alan J. Wylie Signed-off-by: Cong Wang Link: https://patch.msgid.link/20250428232955.1740419-2-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/sch_htb.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 4b9a639b642e1e..14bf71f570570f 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -348,7 +348,8 @@ static void htb_add_to_wait_tree(struct htb_sched *q, */ static inline void htb_next_rb_node(struct rb_node **n) { - *n = rb_next(*n); + if (*n) + *n = rb_next(*n); } /** @@ -609,8 +610,8 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl) */ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) { - WARN_ON(!cl->prio_activity); - + if (!cl->prio_activity) + return; htb_deactivate_prios(q, cl); cl->prio_activity = 0; } @@ -1485,8 +1486,6 @@ static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg) { struct htb_class *cl = (struct htb_class *)arg; - if (!cl->prio_activity) - return; htb_deactivate(qdisc_priv(sch), cl); } @@ -1740,8 +1739,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg, if (cl->parent) cl->parent->children--; - if (cl->prio_activity) - htb_deactivate(q, cl); + htb_deactivate(q, cl); if (cl->cmode != HTB_CAN_SEND) htb_safe_rb_erase(&cl->pq_node, @@ -1949,8 +1947,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* turn parent into inner node */ qdisc_purge_queue(parent->leaf.q); parent_qdisc = parent->leaf.q; - if (parent->prio_activity) - htb_deactivate(q, parent); + htb_deactivate(q, parent); /* remove from evt list because of level change */ if (parent->cmode != HTB_CAN_SEND) { From 63890286f557aa5c4eed7e90a5a31658de8fdb4d Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 28 Apr 2025 16:29:55 -0700 Subject: [PATCH 1471/2924] selftests/tc-testing: Add a test case to cover basic HTB+FQ_CODEL case Integrate the reproducer from Alan into TC selftests and use scapy to generate TCP traffic instead of relying on ping command. Cc: Alan J. Wylie Signed-off-by: Cong Wang Link: https://patch.msgid.link/20250428232955.1740419-3-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/qdiscs.json | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 0843f6d37e9c77..a951c0d33cd2d0 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -538,5 +538,40 @@ "$TC qdisc del dev $DUMMY handle 1:0 root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "62c4", + "name": "Test HTB with FQ_CODEL - basic functionality", + "category": [ + "qdisc", + "htb", + "fq_codel" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 root handle 1: htb default 11", + "$TC class add dev $DEV1 parent 1: classid 1:1 htb rate 10kbit", + "$TC class add dev $DEV1 parent 1:1 classid 1:11 htb rate 10kbit prio 0 quantum 1486", + "$TC qdisc add dev $DEV1 parent 1:11 fq_codel quantum 300 noecn", + "sleep 0.5" + ], + "scapy": { + "iface": "$DEV0", + "count": 5, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/TCP(sport=12345, dport=80)" + }, + "cmdUnderTest": "$TC -s qdisc show dev $DEV1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DEV1 | grep -A 5 'qdisc fq_codel'", + "matchPattern": "Sent [0-9]+ bytes [0-9]+ pkt", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 handle 1: root" + ] } ] From 1e20324b23f0afba27997434fb978f1e4a1dbcb6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 30 Apr 2025 09:37:58 -0700 Subject: [PATCH 1472/2924] virtio-net: don't re-enable refill work too early when NAPI is disabled Commit 4bc12818b363 ("virtio-net: disable delayed refill when pausing rx") fixed a deadlock between reconfig paths and refill work trying to disable the same NAPI instance. The refill work can't run in parallel with reconfig because trying to double-disable a NAPI instance causes a stall under the instance lock, which the reconfig path needs to re-enable the NAPI and therefore unblock the stalled thread. There are two cases where we re-enable refill too early. One is in the virtnet_set_queues() handler. We call it when installing XDP: virtnet_rx_pause_all(vi); ... virtnet_napi_tx_disable(..); ... virtnet_set_queues(..); ... virtnet_rx_resume_all(..); We want the work to be disabled until we call virtnet_rx_resume_all(), but virtnet_set_queues() kicks it before NAPIs were re-enabled. The other case is a more trivial case of mis-ordering in __virtnet_rx_resume() found by code inspection. Taking the spin lock in virtnet_set_queues() (requested during review) may be unnecessary as we are under rtnl_lock and so are all paths writing to ->refill_enabled. Acked-by: Michael S. Tsirkin Reviewed-by: Bui Quang Minh Fixes: 4bc12818b363 ("virtio-net: disable delayed refill when pausing rx") Fixes: 413f0271f396 ("net: protect NAPI enablement with netdev_lock()") Link: https://patch.msgid.link/20250430163758.3029367-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 848fab51dfa1b7..b5549d542c0220 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3383,12 +3383,15 @@ static void __virtnet_rx_resume(struct virtnet_info *vi, bool refill) { bool running = netif_running(vi->dev); + bool schedule_refill = false; if (refill && !try_fill_recv(vi, rq, GFP_KERNEL)) - schedule_delayed_work(&vi->refill, 0); - + schedule_refill = true; if (running) virtnet_napi_enable(rq); + + if (schedule_refill) + schedule_delayed_work(&vi->refill, 0); } static void virtnet_rx_resume_all(struct virtnet_info *vi) @@ -3728,8 +3731,10 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) succ: vi->curr_queue_pairs = queue_pairs; /* virtnet_open() will refill when device is going to up. */ - if (dev->flags & IFF_UP) + spin_lock_bh(&vi->refill_lock); + if (dev->flags & IFF_UP && vi->refill_enabled) schedule_delayed_work(&vi->refill, 0); + spin_unlock_bh(&vi->refill_lock); return 0; } From 4397684a292a71fbc1e815c3e283f7490ddce5ae Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 30 Apr 2025 09:38:36 -0700 Subject: [PATCH 1473/2924] virtio-net: free xsk_buffs on error in virtnet_xsk_pool_enable() The selftests added to our CI by Bui Quang Minh recently reveals that there is a mem leak on the error path of virtnet_xsk_pool_enable(): unreferenced object 0xffff88800a68a000 (size 2048): comm "xdp_helper", pid 318, jiffies 4294692778 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 0): __kvmalloc_node_noprof+0x402/0x570 virtnet_xsk_pool_enable+0x293/0x6a0 (drivers/net/virtio_net.c:5882) xp_assign_dev+0x369/0x670 (net/xdp/xsk_buff_pool.c:226) xsk_bind+0x6a5/0x1ae0 __sys_bind+0x15e/0x230 __x64_sys_bind+0x72/0xb0 do_syscall_64+0xc1/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f Acked-by: Jason Wang Fixes: e9f3962441c0 ("virtio_net: xsk: rx: support fill with xsk buffer") Link: https://patch.msgid.link/20250430163836.3029761-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index b5549d542c0220..f9e3e628ec4df9 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -5890,8 +5890,10 @@ static int virtnet_xsk_pool_enable(struct net_device *dev, hdr_dma = virtqueue_dma_map_single_attrs(sq->vq, &xsk_hdr, vi->hdr_len, DMA_TO_DEVICE, 0); - if (virtqueue_dma_mapping_error(sq->vq, hdr_dma)) - return -ENOMEM; + if (virtqueue_dma_mapping_error(sq->vq, hdr_dma)) { + err = -ENOMEM; + goto err_free_buffs; + } err = xsk_pool_dma_map(pool, dma_dev, 0); if (err) @@ -5919,6 +5921,8 @@ static int virtnet_xsk_pool_enable(struct net_device *dev, err_xsk_map: virtqueue_dma_unmap_single_attrs(rq->vq, hdr_dma, vi->hdr_len, DMA_TO_DEVICE, 0); +err_free_buffs: + kvfree(rq->xsk_buffs); return err; } From e66b0a8f048bc8590eb1047480f946898a3f80c9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 15 Apr 2025 09:52:30 +0200 Subject: [PATCH 1474/2924] i2c: omap: fix deprecated of_property_read_bool() use Using of_property_read_bool() for non-boolean properties is deprecated and results in a warning during runtime since commit c141ecc3cecd ("of: Warn when of_property_read_bool() is used on non-boolean properties"). Fixes: b6ef830c60b6 ("i2c: omap: Add support for setting mux") Cc: Jayesh Choudhary Signed-off-by: Johan Hovold Acked-by: Mukesh Kumar Savaliya Link: https://lore.kernel.org/r/20250415075230.16235-1-johan+linaro@kernel.org Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-omap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index 16afb9ca19bba9..876791d20ed55e 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -1454,7 +1454,7 @@ omap_i2c_probe(struct platform_device *pdev) (1000 * omap->speed / 8); } - if (of_property_read_bool(node, "mux-states")) { + if (of_property_present(node, "mux-states")) { struct mux_state *mux_state; mux_state = devm_mux_state_get(&pdev->dev, NULL); From c360eb0c3ccb95306704fd221442283ee82f1f58 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 30 Apr 2025 11:21:35 -0500 Subject: [PATCH 1475/2924] dt-bindings: net: ethernet-controller: Add informative text about RGMII delays Device Tree and Ethernet MAC driver writers often misunderstand RGMII delays. Rewrite the Normative section in terms of the PCB, is the PCB adding the 2ns delay. This meaning was previous implied by the definition, but often wrongly interpreted due to the ambiguous wording and looking at the definition from the wrong perspective. The new definition concentrates clearly on the hardware, and should be less ambiguous. Add an Informative section to the end of the binding describing in detail what the four RGMII delays mean. This expands on just the PCB meaning, adding in the implications for the MAC and PHY. Additionally, when the MAC or PHY needs to add a delay, which is software configuration, describe how Linux does this, in the hope of reducing errors. Make it clear other users of device tree binding may implement the software configuration in other ways while still conforming to the binding. Fixes: 9d3de3c58347 ("dt-bindings: net: Add YAML schemas for the generic Ethernet options") Signed-off-by: Andrew Lunn Acked-by: Conor Dooley Link: https://patch.msgid.link/20250430-v6-15-rc3-net-rgmii-delays-v2-1-099ae651d5e5@lunn.ch Signed-off-by: Jakub Kicinski --- .../bindings/net/ethernet-controller.yaml | 97 +++++++++++++++++-- 1 file changed, 90 insertions(+), 7 deletions(-) diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml index 45819b2358002b..a2d4c626f659a5 100644 --- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml @@ -74,19 +74,17 @@ properties: - rev-rmii - moca - # RX and TX delays are added by the MAC when required + # RX and TX delays are provided by the PCB. See below - rgmii - # RGMII with internal RX and TX delays provided by the PHY, - # the MAC should not add the RX or TX delays in this case + # RX and TX delays are not provided by the PCB. This is the most + # frequent case. See below - rgmii-id - # RGMII with internal RX delay provided by the PHY, the MAC - # should not add an RX delay in this case + # TX delay is provided by the PCB. See below - rgmii-rxid - # RGMII with internal TX delay provided by the PHY, the MAC - # should not add an TX delay in this case + # RX delay is provided by the PCB. See below - rgmii-txid - rtbi - smii @@ -286,4 +284,89 @@ allOf: additionalProperties: true +# Informative +# =========== +# +# 'phy-modes' & 'phy-connection-type' properties 'rgmii', 'rgmii-id', +# 'rgmii-rxid', and 'rgmii-txid' are frequently used wrongly by +# developers. This informative section clarifies their usage. +# +# The RGMII specification requires a 2ns delay between the data and +# clock signals on the RGMII bus. How this delay is implemented is not +# specified. +# +# One option is to make the clock traces on the PCB longer than the +# data traces. A sufficiently difference in length can provide the 2ns +# delay. If both the RX and TX delays are implemented in this manner, +# 'rgmii' should be used, so indicating the PCB adds the delays. +# +# If the PCB does not add these delays via extra long traces, +# 'rgmii-id' should be used. Here, 'id' refers to 'internal delay', +# where either the MAC or PHY adds the delay. +# +# If only one of the two delays are implemented via extra long clock +# lines, either 'rgmii-rxid' or 'rgmii-txid' should be used, +# indicating the MAC or PHY should implement one of the delays +# internally, while the PCB implements the other delay. +# +# Device Tree describes hardware, and in this case, it describes the +# PCB between the MAC and the PHY, if the PCB implements delays or +# not. +# +# In practice, very few PCBs make use of extra long clock lines. Hence +# any RGMII phy mode other than 'rgmii-id' is probably wrong, and is +# unlikely to be accepted during review without details provided in +# the commit description and comments in the .dts file. +# +# When the PCB does not implement the delays, the MAC or PHY must. As +# such, this is software configuration, and so not described in Device +# Tree. +# +# The following describes how Linux implements the configuration of +# the MAC and PHY to add these delays when the PCB does not. As stated +# above, developers often get this wrong, and the aim of this section +# is reduce the frequency of these errors by Linux developers. Other +# users of the Device Tree may implement it differently, and still be +# consistent with both the normative and informative description +# above. +# +# By default in Linux, when using phylib/phylink, the MAC is expected +# to read the 'phy-mode' from Device Tree, not implement any delays, +# and pass the value to the PHY. The PHY will then implement delays as +# specified by the 'phy-mode'. The PHY should always be reconfigured +# to implement the needed delays, replacing any setting performed by +# strapping or the bootloader, etc. +# +# Experience to date is that all PHYs which implement RGMII also +# implement the ability to add or not add the needed delays. Hence +# this default is expected to work in all cases. Ignoring this default +# is likely to be questioned by Reviews, and require a strong argument +# to be accepted. +# +# There are a small number of cases where the MAC has hard coded +# delays which cannot be disabled. The 'phy-mode' only describes the +# PCB. The inability to disable the delays in the MAC does not change +# the meaning of 'phy-mode'. It does however mean that a 'phy-mode' of +# 'rgmii' is now invalid, it cannot be supported, since both the PCB +# and the MAC and PHY adding delays cannot result in a functional +# link. Thus the MAC should report a fatal error for any modes which +# cannot be supported. When the MAC implements the delay, it must +# ensure that the PHY does not also implement the same delay. So it +# must modify the phy-mode it passes to the PHY, removing the delay it +# has added. Failure to remove the delay will result in a +# non-functioning link. +# +# Sometimes there is a need to fine tune the delays. Often the MAC or +# PHY can perform this fine tuning. In the MAC node, the Device Tree +# properties 'rx-internal-delay-ps' and 'tx-internal-delay-ps' should +# be used to indicate fine tuning performed by the MAC. The values +# expected here are small. A value of 2000ps, i.e 2ns, and a phy-mode +# of 'rgmii' will not be accepted by Reviewers. +# +# If the PHY is to perform fine tuning, the properties +# 'rx-internal-delay-ps' and 'tx-internal-delay-ps' in the PHY node +# should be used. When the PHY is implementing delays, e.g. 'rgmii-id' +# these properties should have a value near to 2000ps. If the PCB is +# implementing delays, e.g. 'rgmii', a small value can be used to fine +# tune the delay added by the PCB. ... From 3e6a0243ff002ddbd7ee18a8974ae61d2e6ed00d Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sat, 3 May 2025 00:57:52 +0200 Subject: [PATCH 1476/2924] gre: Fix again IPv6 link-local address generation. Use addrconf_addr_gen() to generate IPv6 link-local addresses on GRE devices in most cases and fall back to using add_v4_addrs() only in case the GRE configuration is incompatible with addrconf_addr_gen(). GRE used to use addrconf_addr_gen() until commit e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address") restricted this use to gretap and ip6gretap devices, and created add_v4_addrs() (borrowed from SIT) for non-Ethernet GRE ones. The original problem came when commit 9af28511be10 ("addrconf: refuse isatap eui64 for INADDR_ANY") made __ipv6_isatap_ifid() fail when its addr parameter was 0. The commit says that this would create an invalid address, however, I couldn't find any RFC saying that the generated interface identifier would be wrong. Anyway, since gre over IPv4 devices pass their local tunnel address to __ipv6_isatap_ifid(), that commit broke their IPv6 link-local address generation when the local address was unspecified. Then commit e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address") tried to fix that case by defining add_v4_addrs() and calling it to generate the IPv6 link-local address instead of using addrconf_addr_gen() (apart for gretap and ip6gretap devices, which would still use the regular addrconf_addr_gen(), since they have a MAC address). That broke several use cases because add_v4_addrs() isn't properly integrated into the rest of IPv6 Neighbor Discovery code. Several of these shortcomings have been fixed over time, but add_v4_addrs() remains broken on several aspects. In particular, it doesn't send any Router Sollicitations, so the SLAAC process doesn't start until the interface receives a Router Advertisement. Also, add_v4_addrs() mostly ignores the address generation mode of the interface (/proc/sys/net/ipv6/conf/*/addr_gen_mode), thus breaking the IN6_ADDR_GEN_MODE_RANDOM and IN6_ADDR_GEN_MODE_STABLE_PRIVACY cases. Fix the situation by using add_v4_addrs() only in the specific scenario where the normal method would fail. That is, for interfaces that have all of the following characteristics: * run over IPv4, * transport IP packets directly, not Ethernet (that is, not gretap interfaces), * tunnel endpoint is INADDR_ANY (that is, 0), * device address generation mode is EUI64. In all other cases, revert back to the regular addrconf_addr_gen(). Also, remove the special case for ip6gre interfaces in add_v4_addrs(), since ip6gre devices now always use addrconf_addr_gen() instead. Note: This patch was originally applied as commit 183185a18ff9 ("gre: Fix IPv6 link-local address generation."). However, it was then reverted by commit fc486c2d060f ("Revert "gre: Fix IPv6 link-local address generation."") because it uncovered another bug that ended up breaking net/forwarding/ip6gre_custom_multipath_hash.sh. That other bug has now been fixed by commit 4d0ab3a6885e ("ipv6: Start path selection from the first nexthop"). Therefore we can now revive this GRE patch (no changes since original commit 183185a18ff9 ("gre: Fix IPv6 link-local address generation."). Fixes: e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address") Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/a88cc5c4811af36007645d610c95102dccb360a6.1746225214.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9ba83f0c992836..c6b22170dc4928 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3214,16 +3214,13 @@ static void add_v4_addrs(struct inet6_dev *idev) struct in6_addr addr; struct net_device *dev; struct net *net = dev_net(idev->dev); - int scope, plen, offset = 0; + int scope, plen; u32 pflags = 0; ASSERT_RTNL(); memset(&addr, 0, sizeof(struct in6_addr)); - /* in case of IP6GRE the dev_addr is an IPv6 and therefore we use only the last 4 bytes */ - if (idev->dev->addr_len == sizeof(struct in6_addr)) - offset = sizeof(struct in6_addr) - 4; - memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4); + memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4); if (!(idev->dev->flags & IFF_POINTOPOINT) && idev->dev->type == ARPHRD_SIT) { scope = IPV6_ADDR_COMPATv4; @@ -3534,7 +3531,13 @@ static void addrconf_gre_config(struct net_device *dev) return; } - if (dev->type == ARPHRD_ETHER) { + /* Generate the IPv6 link-local address using addrconf_addr_gen(), + * unless we have an IPv4 GRE device not bound to an IP address and + * which is in EUI64 mode (as __ipv6_isatap_ifid() would fail in this + * case). Such devices fall back to add_v4_addrs() instead. + */ + if (!(dev->type == ARPHRD_IPGRE && *(__be32 *)dev->dev_addr == 0 && + idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)) { addrconf_addr_gen(idev, true); return; } From b6a6006b0e3d10e62c465613f07ff49268baedb1 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sat, 3 May 2025 00:57:59 +0200 Subject: [PATCH 1477/2924] selftests: Add IPv6 link-local address generation tests for GRE devices. GRE devices have their special code for IPv6 link-local address generation that has been the source of several regressions in the past. Add selftest to check that all gre, ip6gre, gretap and ip6gretap get an IPv6 link-link local address in accordance with the net.ipv6.conf..addr_gen_mode sysctl. Note: This patch was originally applied as commit 6f50175ccad4 ("selftests: Add IPv6 link-local address generation tests for GRE devices."). However, it was then reverted by commit 355d940f4d5a ("Revert "selftests: Add IPv6 link-local address generation tests for GRE devices."") because the commit it depended on was going to be reverted. Now that the situation is resolved, we can add this selftest again (no changes since original patch, appart from context update in tools/testing/selftests/net/Makefile). Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Link: https://patch.msgid.link/2c3a5733cb3a6e3119504361a9b9f89fda570a2d.1746225214.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + .../testing/selftests/net/gre_ipv6_lladdr.sh | 177 ++++++++++++++++++ 2 files changed, 178 insertions(+) create mode 100755 tools/testing/selftests/net/gre_ipv6_lladdr.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 124078b56fa447..70a38f485d4d1c 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -31,6 +31,7 @@ TEST_PROGS += veth.sh TEST_PROGS += ioam6.sh TEST_PROGS += gro.sh TEST_PROGS += gre_gso.sh +TEST_PROGS += gre_ipv6_lladdr.sh TEST_PROGS += cmsg_so_mark.sh TEST_PROGS += cmsg_so_priority.sh TEST_PROGS += test_so_rcv.sh diff --git a/tools/testing/selftests/net/gre_ipv6_lladdr.sh b/tools/testing/selftests/net/gre_ipv6_lladdr.sh new file mode 100755 index 00000000000000..5b34f6e1f8314a --- /dev/null +++ b/tools/testing/selftests/net/gre_ipv6_lladdr.sh @@ -0,0 +1,177 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./lib.sh + +PAUSE_ON_FAIL="no" + +# The trap function handler +# +exit_cleanup_all() +{ + cleanup_all_ns + + exit "${EXIT_STATUS}" +} + +# Add fake IPv4 and IPv6 networks on the loopback device, to be used as +# underlay by future GRE devices. +# +setup_basenet() +{ + ip -netns "${NS0}" link set dev lo up + ip -netns "${NS0}" address add dev lo 192.0.2.10/24 + ip -netns "${NS0}" address add dev lo 2001:db8::10/64 nodad +} + +# Check if network device has an IPv6 link-local address assigned. +# +# Parameters: +# +# * $1: The network device to test +# * $2: An extra regular expression that should be matched (to verify the +# presence of extra attributes) +# * $3: The expected return code from grep (to allow checking the absence of +# a link-local address) +# * $4: The user visible name for the scenario being tested +# +check_ipv6_ll_addr() +{ + local DEV="$1" + local EXTRA_MATCH="$2" + local XRET="$3" + local MSG="$4" + + RET=0 + set +e + ip -netns "${NS0}" -6 address show dev "${DEV}" scope link | grep "fe80::" | grep -q "${EXTRA_MATCH}" + check_err_fail "${XRET}" $? "" + log_test "${MSG}" + set -e +} + +# Create a GRE device and verify that it gets an IPv6 link-local address as +# expected. +# +# Parameters: +# +# * $1: The device type (gre, ip6gre, gretap or ip6gretap) +# * $2: The local underlay IP address (can be an IPv4, an IPv6 or "any") +# * $3: The remote underlay IP address (can be an IPv4, an IPv6 or "any") +# * $4: The IPv6 interface identifier generation mode to use for the GRE +# device (eui64, none, stable-privacy or random). +# +test_gre_device() +{ + local GRE_TYPE="$1" + local LOCAL_IP="$2" + local REMOTE_IP="$3" + local MODE="$4" + local ADDR_GEN_MODE + local MATCH_REGEXP + local MSG + + ip link add netns "${NS0}" name gretest type "${GRE_TYPE}" local "${LOCAL_IP}" remote "${REMOTE_IP}" + + case "${MODE}" in + "eui64") + ADDR_GEN_MODE=0 + MATCH_REGEXP="" + MSG="${GRE_TYPE}, mode: 0 (EUI64), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + ;; + "none") + ADDR_GEN_MODE=1 + MATCH_REGEXP="" + MSG="${GRE_TYPE}, mode: 1 (none), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=1 # No link-local address should be generated + ;; + "stable-privacy") + ADDR_GEN_MODE=2 + MATCH_REGEXP="stable-privacy" + MSG="${GRE_TYPE}, mode: 2 (stable privacy), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + # Initialise stable_secret (required for stable-privacy mode) + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.stable_secret="2001:db8::abcd" + ;; + "random") + ADDR_GEN_MODE=3 + MATCH_REGEXP="stable-privacy" + MSG="${GRE_TYPE}, mode: 3 (random), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + ;; + esac + + # Check that IPv6 link-local address is generated when device goes up + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" + ip -netns "${NS0}" link set dev gretest up + check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}" + + # Now disable link-local address generation + ip -netns "${NS0}" link set dev gretest down + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode=1 + ip -netns "${NS0}" link set dev gretest up + + # Check that link-local address generation works when re-enabled while + # the device is already up + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" + check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}" + + ip -netns "${NS0}" link del dev gretest +} + +test_gre4() +{ + local GRE_TYPE + local MODE + + for GRE_TYPE in "gre" "gretap"; do + printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + + for MODE in "eui64" "none" "stable-privacy" "random"; do + test_gre_device "${GRE_TYPE}" 192.0.2.10 192.0.2.11 "${MODE}" + test_gre_device "${GRE_TYPE}" any 192.0.2.11 "${MODE}" + test_gre_device "${GRE_TYPE}" 192.0.2.10 any "${MODE}" + done + done +} + +test_gre6() +{ + local GRE_TYPE + local MODE + + for GRE_TYPE in "ip6gre" "ip6gretap"; do + printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + + for MODE in "eui64" "none" "stable-privacy" "random"; do + test_gre_device "${GRE_TYPE}" 2001:db8::10 2001:db8::11 "${MODE}" + test_gre_device "${GRE_TYPE}" any 2001:db8::11 "${MODE}" + test_gre_device "${GRE_TYPE}" 2001:db8::10 any "${MODE}" + done + done +} + +usage() +{ + echo "Usage: $0 [-p]" + exit 1 +} + +while getopts :p o +do + case $o in + p) PAUSE_ON_FAIL="yes";; + *) usage;; + esac +done + +setup_ns NS0 + +set -e +trap exit_cleanup_all EXIT + +setup_basenet + +test_gre4 +test_gre6 From b344a48cbe5fb24697addc6afbb7358b938a7d31 Mon Sep 17 00:00:00 2001 From: Mohsin Bashir Date: Fri, 2 May 2025 18:35:16 -0700 Subject: [PATCH 1478/2924] selftests: drv: net: fix test failure on ipv6 sys The `get_interface_info` call has ip version hard-coded which leads to failures on an IPV6 system. The NetDrvEnv class already gathers information about remote interface, so instead of fixing the local implementation switch to using cfg.remote_ifname. Before: ./drivers/net/ping.py Traceback (most recent call last): File "/new_tests/./drivers/net/ping.py", line 217, in main() File "/new_tests/./drivers/net/ping.py", line 204, in main get_interface_info(cfg) File "/new_tests/./drivers/net/ping.py", line 128, in get_interface_info raise KsftFailEx('Can not get remote interface') net.lib.py.ksft.KsftFailEx: Can not get remote interface After: ./drivers/net/ping.py TAP version 13 1..6 ok 1 ping.test_default # SKIP Test requires IPv4 connectivity ok 2 ping.test_xdp_generic_sb # SKIP Test requires IPv4 connectivity ok 3 ping.test_xdp_generic_mb # SKIP Test requires IPv4 connectivity ok 4 ping.test_xdp_native_sb # SKIP Test requires IPv4 connectivity ok 5 ping.test_xdp_native_mb # SKIP Test requires IPv4 connectivity ok 6 ping.test_xdp_offload # SKIP device does not support offloaded XDP Totals: pass:0 fail:0 xfail:0 xpass:0 skip:6 error:0 Fixes: 75cc19c8ff89 ("selftests: drv-net: add xdp cases for ping.py") Signed-off-by: Mohsin Bashir Reviewed-by: David Wei Link: https://patch.msgid.link/20250503013518.1722913-2-mohsin.bashr@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/ping.py | 22 +++++++++------------ 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index 4b682286606617..5272e8b3536dc9 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -9,7 +9,6 @@ from lib.py import bkg, cmd, wait_port_listen, rand_port from lib.py import defer, ethtool, ip -remote_ifname="" no_sleep=False def _test_v4(cfg) -> None: @@ -57,7 +56,7 @@ def _set_offload_checksum(cfg, netnl, on) -> None: def _set_xdp_generic_sb_on(cfg) -> None: prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" - cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) cmd(f"ip link set dev {cfg.ifname} mtu 1500 xdpgeneric obj {prog} sec xdp", shell=True) defer(cmd, f"ip link set dev {cfg.ifname} xdpgeneric off") @@ -66,8 +65,8 @@ def _set_xdp_generic_sb_on(cfg) -> None: def _set_xdp_generic_mb_on(cfg) -> None: prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" - cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote) - defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 9000", shell=True, host=cfg.remote) + defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote) ip("link set dev %s mtu 9000 xdpgeneric obj %s sec xdp.frags" % (cfg.ifname, prog)) defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdpgeneric off") @@ -76,7 +75,7 @@ def _set_xdp_generic_mb_on(cfg) -> None: def _set_xdp_native_sb_on(cfg) -> None: prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" - cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) cmd(f"ip -j link set dev {cfg.ifname} mtu 1500 xdp obj {prog} sec xdp", shell=True) defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") xdp_info = ip("-d link show %s" % (cfg.ifname), json=True)[0] @@ -93,8 +92,8 @@ def _set_xdp_native_sb_on(cfg) -> None: def _set_xdp_native_mb_on(cfg) -> None: prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" - cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote) - defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 9000", shell=True, host=cfg.remote) + defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote) try: cmd(f"ip link set dev {cfg.ifname} mtu 9000 xdp obj {prog} sec xdp.frags", shell=True) defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") @@ -112,18 +111,15 @@ def _set_xdp_offload_on(cfg) -> None: except Exception as e: raise KsftSkipEx('device does not support offloaded XDP') defer(ip, f"link set dev {cfg.ifname} xdpoffload off") - cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) if no_sleep != True: time.sleep(10) def get_interface_info(cfg) -> None: - global remote_ifname global no_sleep - remote_info = cmd(f"ip -4 -o addr show to {cfg.remote_addr_v['4']} | awk '{{print $2}}'", shell=True, host=cfg.remote).stdout - remote_ifname = remote_info.rstrip('\n') - if remote_ifname == "": + if cfg.remote_ifname == "": raise KsftFailEx('Can not get remote interface') local_info = ip("-d link show %s" % (cfg.ifname), json=True)[0] if 'parentbus' in local_info and local_info['parentbus'] == "netdevsim": @@ -136,7 +132,7 @@ def set_interface_init(cfg) -> None: cmd(f"ip link set dev {cfg.ifname} xdp off ", shell=True) cmd(f"ip link set dev {cfg.ifname} xdpgeneric off ", shell=True) cmd(f"ip link set dev {cfg.ifname} xdpoffload off", shell=True) - cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) def test_default(cfg, netnl) -> None: _set_offload_checksum(cfg, netnl, "off") From 8bb7d8e5cf7f44ea89a445975cbd78888324f234 Mon Sep 17 00:00:00 2001 From: Mohsin Bashir Date: Fri, 2 May 2025 18:35:17 -0700 Subject: [PATCH 1479/2924] selftests: drv: net: avoid skipping tests On a system with either of the ipv4 or ipv6 information missing, tests are currently skipped. Ideally, the test should run as long as at least one address family is present. This patch make test run whenever possible. Before: ./drivers/net/ping.py TAP version 13 1..6 ok 1 ping.test_default # SKIP Test requires IPv4 connectivity ok 2 ping.test_xdp_generic_sb # SKIP Test requires IPv4 connectivity ok 3 ping.test_xdp_generic_mb # SKIP Test requires IPv4 connectivity ok 4 ping.test_xdp_native_sb # SKIP Test requires IPv4 connectivity ok 5 ping.test_xdp_native_mb # SKIP Test requires IPv4 connectivity ok 6 ping.test_xdp_offload # SKIP device does not support offloaded XDP Totals: pass:0 fail:0 xfail:0 xpass:0 skip:6 error:0 After: ./drivers/net/ping.py TAP version 13 1..6 ok 1 ping.test_default ok 2 ping.test_xdp_generic_sb ok 3 ping.test_xdp_generic_mb ok 4 ping.test_xdp_native_sb ok 5 ping.test_xdp_native_mb ok 6 ping.test_xdp_offload # SKIP device does not support offloaded XDP Totals: pass:5 fail:0 xfail:0 xpass:0 skip:1 error:0 Fixes: 75cc19c8ff89 ("selftests: drv-net: add xdp cases for ping.py") Signed-off-by: Mohsin Bashir Link: https://patch.msgid.link/20250503013518.1722913-3-mohsin.bashr@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/ping.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index 5272e8b3536dc9..16b7d3ab0fc889 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -12,7 +12,8 @@ no_sleep=False def _test_v4(cfg) -> None: - cfg.require_ipver("4") + if not cfg.addr_v["4"]: + return cmd("ping -c 1 -W0.5 " + cfg.remote_addr_v["4"]) cmd("ping -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote) @@ -20,7 +21,8 @@ def _test_v4(cfg) -> None: cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote) def _test_v6(cfg) -> None: - cfg.require_ipver("6") + if not cfg.addr_v["6"]: + return cmd("ping -c 1 -W5 " + cfg.remote_addr_v["6"]) cmd("ping -c 1 -W5 " + cfg.addr_v["6"], host=cfg.remote) From 4a9d494ca24b7fd509b3953fcb43d580cb05097b Mon Sep 17 00:00:00 2001 From: Mohsin Bashir Date: Fri, 2 May 2025 18:35:18 -0700 Subject: [PATCH 1480/2924] selftests: drv: net: add version indicator Currently, the test result does not differentiate between the cases when either one of the address families are configured or if both the address families are configured. Ideally, the result should report if a particular case was skipped. ./drivers/net/ping.py TAP version 13 1..7 ok 1 ping.test_default_v4 # SKIP Test requires IPv4 connectivity ok 2 ping.test_default_v6 ok 3 ping.test_xdp_generic_sb ok 4 ping.test_xdp_generic_mb ok 5 ping.test_xdp_native_sb ok 6 ping.test_xdp_native_mb ok 7 ping.test_xdp_offload # SKIP device does not support offloaded XDP Totals: pass:5 fail:0 xfail:0 xpass:0 skip:2 error:0 Fixes: 75cc19c8ff89 ("selftests: drv-net: add xdp cases for ping.py") Signed-off-by: Mohsin Bashir Reviewed-by: David Wei Link: https://patch.msgid.link/20250503013518.1722913-4-mohsin.bashr@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/ping.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index 16b7d3ab0fc889..af8df2313a3b4b 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -136,13 +136,23 @@ def set_interface_init(cfg) -> None: cmd(f"ip link set dev {cfg.ifname} xdpoffload off", shell=True) cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) -def test_default(cfg, netnl) -> None: +def test_default_v4(cfg, netnl) -> None: + cfg.require_ipver("4") + _set_offload_checksum(cfg, netnl, "off") _test_v4(cfg) - _test_v6(cfg) _test_tcp(cfg) _set_offload_checksum(cfg, netnl, "on") _test_v4(cfg) + _test_tcp(cfg) + +def test_default_v6(cfg, netnl) -> None: + cfg.require_ipver("6") + + _set_offload_checksum(cfg, netnl, "off") + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") _test_v6(cfg) _test_tcp(cfg) @@ -200,7 +210,8 @@ def main() -> None: with NetDrvEpEnv(__file__) as cfg: get_interface_info(cfg) set_interface_init(cfg) - ksft_run([test_default, + ksft_run([test_default_v4, + test_default_v6, test_xdp_generic_sb, test_xdp_generic_mb, test_xdp_native_sb, From 4720f9707c783f642332dee3d56dccaefa850e42 Mon Sep 17 00:00:00 2001 From: David Wei Date: Fri, 2 May 2025 21:30:50 -0700 Subject: [PATCH 1481/2924] tools: ynl-gen: validate 0 len strings from kernel Strings from the kernel are guaranteed to be null terminated and ynl_attr_validate() checks for this. But it doesn't check if the string has a len of 0, which would cause problems when trying to access data[len - 1]. Fix this by checking that len is positive. Signed-off-by: David Wei Link: https://patch.msgid.link/20250503043050.861238-1-dw@davidwei.uk Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index ce32cb35007d6f..c4da34048ef858 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -364,7 +364,7 @@ int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr) "Invalid attribute (binary %s)", policy->name); return -1; case YNL_PT_NUL_STR: - if ((!policy->len || len <= policy->len) && !data[len - 1]) + if (len && (!policy->len || len <= policy->len) && !data[len - 1]) break; yerr(yarg->ys, YNL_ERROR_ATTR_INVALID, "Invalid attribute (string %s)", policy->name); From 6ba0982c3235a047c953bf73a7b014af7840c4de Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 5 May 2025 12:35:19 -0700 Subject: [PATCH 1482/2924] swapfile: disable swapon for bs > ps devices Devices which have a requirement for bs > ps cannot be supported for swap as swap still needs work. Now that the block device cache sets the min order for block devices we need this stop gap otherwise all swap operations are rejected. Without this you'll end up with errors on these devices as the swap code still needs much love to support min order. With this we at least now put a stop gap of its use, until the swap subsystem completes its major overhaul: mkswap: /dev/nvme3n1: warning: wiping old swap signature. Setting up swapspace version 1, size = 100 GiB (107374178304 bytes) no label, UUID=6af76b5c-7e7b-4902-b7f7-4c24dde6fa36 swapon: /dev/nvme3n1: swapon failed: Invalid argument Reviewed-by: Davidlohr Bueso Signed-off-by: Luis Chamberlain Link: https://lore.kernel.org/aBkS926thy9zvdZb@bombadil.infradead.org Signed-off-by: Christian Brauner --- mm/swapfile.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mm/swapfile.c b/mm/swapfile.c index 2eff8b51a9455f..74a6c580b00dd8 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3322,6 +3322,15 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) goto bad_swap_unlock_inode; } + /* + * The swap subsystem needs a major overhaul to support this. + * It doesn't work yet so just disable it for now. + */ + if (mapping_min_folio_order(mapping) > 0) { + error = -EINVAL; + goto bad_swap_unlock_inode; + } + /* * Read the swap header. */ From 2bb04ea9e5b7a2ef583c042ed5f8111804606e9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 2 May 2025 15:01:01 +0200 Subject: [PATCH 1483/2924] drm/ttm: Fix ttm_backup kerneldoc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The docs were not properly updated from an earlier version of the code. Fixes: e7b5d23e5d47 ("drm/ttm: Provide a shmem backup implementation") Cc: Christian König Cc: Matthew Brost Cc: Matthew Auld Cc: dri-devel@lists.freedesktop.org Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250502130101.3185-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/ttm/ttm_backup.c | 2 +- include/drm/ttm/ttm_backup.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_backup.c b/drivers/gpu/drm/ttm/ttm_backup.c index 93c007f18855d2..f58e7393888f4c 100644 --- a/drivers/gpu/drm/ttm/ttm_backup.c +++ b/drivers/gpu/drm/ttm/ttm_backup.c @@ -55,7 +55,7 @@ void ttm_backup_drop(struct ttm_backup *backup, pgoff_t handle) * @backup: The struct backup pointer used to back up the page. * @dst: The struct page to copy into. * @handle: The handle returned when the page was backed up. - * @intr: Try to perform waits interruptable or at least killable. + * @intr: Try to perform waits interruptible or at least killable. * * Return: 0 on success, Negative error code on failure, notably * -EINTR if @intr was set to true and a signal is pending. diff --git a/include/drm/ttm/ttm_backup.h b/include/drm/ttm/ttm_backup.h index 24ad120b882746..574b932177cc33 100644 --- a/include/drm/ttm/ttm_backup.h +++ b/include/drm/ttm/ttm_backup.h @@ -16,7 +16,7 @@ struct ttm_backup; * @handle: The handle to convert. * * Converts an opaque handle received from the - * struct ttm_backoup_ops::backup_page() function to an (invalid) + * ttm_backup_backup_page() function to an (invalid) * struct page pointer suitable for a struct page array. * * Return: An (invalid) struct page pointer. @@ -45,8 +45,8 @@ static inline bool ttm_backup_page_ptr_is_handle(const struct page *page) * * Return: The handle that was previously used in * ttm_backup_handle_to_page_ptr() to obtain a struct page pointer, suitable - * for use as argument in the struct ttm_backup_ops drop() or - * copy_backed_up_page() functions. + * for use as argument in the struct ttm_backup_drop() or + * ttm_backup_copy_page() functions. */ static inline unsigned long ttm_backup_page_ptr_to_handle(const struct page *page) From d4ad53adfe21df1464bae5ed916085a69d6ffb3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 2 May 2025 15:00:14 +0200 Subject: [PATCH 1484/2924] drm/ttm: Remove the struct ttm_backup abstraction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The abstraction was previously added to support separate ttm_backup implementations. However with the current implementation casting from a struct file to a struct ttm_backup, we run into trouble since struct file may have randomized the layout and gcc complains. Remove the struct ttm_backup abstraction Cc: dri-devel@lists.freedesktop.org Cc: Matthew Brost Cc: Dave Airlie Cc: Christian König Cc: Matthew Auld Cc: Al Viro Reported-by: Kees Cook Closes: https://lore.kernel.org/dri-devel/9c8dbbafdaf9f3f089da2cde5a772d69579b3795.camel@linux.intel.com/T/#mb153ab9216cb813b92bdeb36f391ad4808c2ba29 Suggested-by: Christian König Fixes: 70d645deac98 ("drm/ttm: Add helpers for shrinking") Signed-off-by: Thomas Hellström Reviewed-by: Christian König Link: https://lore.kernel.org/r/20250502130014.3156-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/ttm/ttm_backup.c | 42 +++++++++----------------------- drivers/gpu/drm/ttm/ttm_pool.c | 6 ++--- drivers/gpu/drm/ttm/ttm_tt.c | 2 +- include/drm/ttm/ttm_backup.h | 12 ++++----- include/drm/ttm/ttm_tt.h | 2 +- 5 files changed, 21 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_backup.c b/drivers/gpu/drm/ttm/ttm_backup.c index f58e7393888f4c..9e2d72c447eec9 100644 --- a/drivers/gpu/drm/ttm/ttm_backup.c +++ b/drivers/gpu/drm/ttm/ttm_backup.c @@ -7,20 +7,6 @@ #include #include -/* - * Casting from randomized struct file * to struct ttm_backup * is fine since - * struct ttm_backup is never defined nor dereferenced. - */ -static struct file *ttm_backup_to_file(struct ttm_backup *backup) -{ - return (void *)backup; -} - -static struct ttm_backup *ttm_file_to_backup(struct file *file) -{ - return (void *)file; -} - /* * Need to map shmem indices to handle since a handle value * of 0 means error, following the swp_entry_t convention. @@ -40,12 +26,12 @@ static pgoff_t ttm_backup_handle_to_shmem_idx(pgoff_t handle) * @backup: The struct backup pointer used to obtain the handle * @handle: The handle obtained from the @backup_page function. */ -void ttm_backup_drop(struct ttm_backup *backup, pgoff_t handle) +void ttm_backup_drop(struct file *backup, pgoff_t handle) { loff_t start = ttm_backup_handle_to_shmem_idx(handle); start <<= PAGE_SHIFT; - shmem_truncate_range(file_inode(ttm_backup_to_file(backup)), start, + shmem_truncate_range(file_inode(backup), start, start + PAGE_SIZE - 1); } @@ -60,11 +46,10 @@ void ttm_backup_drop(struct ttm_backup *backup, pgoff_t handle) * Return: 0 on success, Negative error code on failure, notably * -EINTR if @intr was set to true and a signal is pending. */ -int ttm_backup_copy_page(struct ttm_backup *backup, struct page *dst, +int ttm_backup_copy_page(struct file *backup, struct page *dst, pgoff_t handle, bool intr) { - struct file *filp = ttm_backup_to_file(backup); - struct address_space *mapping = filp->f_mapping; + struct address_space *mapping = backup->f_mapping; struct folio *from_folio; pgoff_t idx = ttm_backup_handle_to_shmem_idx(handle); @@ -106,12 +91,11 @@ int ttm_backup_copy_page(struct ttm_backup *backup, struct page *dst, * the folio size- and usage. */ s64 -ttm_backup_backup_page(struct ttm_backup *backup, struct page *page, +ttm_backup_backup_page(struct file *backup, struct page *page, bool writeback, pgoff_t idx, gfp_t page_gfp, gfp_t alloc_gfp) { - struct file *filp = ttm_backup_to_file(backup); - struct address_space *mapping = filp->f_mapping; + struct address_space *mapping = backup->f_mapping; unsigned long handle = 0; struct folio *to_folio; int ret; @@ -161,9 +145,9 @@ ttm_backup_backup_page(struct ttm_backup *backup, struct page *page, * * After a call to this function, it's illegal to use the @backup pointer. */ -void ttm_backup_fini(struct ttm_backup *backup) +void ttm_backup_fini(struct file *backup) { - fput(ttm_backup_to_file(backup)); + fput(backup); } /** @@ -194,14 +178,10 @@ EXPORT_SYMBOL_GPL(ttm_backup_bytes_avail); * * Create a backup utilizing shmem objects. * - * Return: A pointer to a struct ttm_backup on success, + * Return: A pointer to a struct file on success, * an error pointer on error. */ -struct ttm_backup *ttm_backup_shmem_create(loff_t size) +struct file *ttm_backup_shmem_create(loff_t size) { - struct file *filp; - - filp = shmem_file_setup("ttm shmem backup", size, 0); - - return ttm_file_to_backup(filp); + return shmem_file_setup("ttm shmem backup", size, 0); } diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 83b10706ba896b..c2ea865be65720 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -506,7 +506,7 @@ static void ttm_pool_allocated_page_commit(struct page *allocated, * if successful, populate the page-table and dma-address arrays. */ static int ttm_pool_restore_commit(struct ttm_pool_tt_restore *restore, - struct ttm_backup *backup, + struct file *backup, const struct ttm_operation_ctx *ctx, struct ttm_pool_alloc_state *alloc) @@ -655,7 +655,7 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, pgoff_t start_page, pgoff_t end_page) { struct page **pages = &tt->pages[start_page]; - struct ttm_backup *backup = tt->backup; + struct file *backup = tt->backup; pgoff_t i, nr; for (i = start_page; i < end_page; i += nr, pages += nr) { @@ -963,7 +963,7 @@ void ttm_pool_drop_backed_up(struct ttm_tt *tt) long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt *tt, const struct ttm_backup_flags *flags) { - struct ttm_backup *backup = tt->backup; + struct file *backup = tt->backup; struct page *page; unsigned long handle; gfp_t alloc_gfp; diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index df0aa6c4b8b8c1..698cd4bf5e4648 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -544,7 +544,7 @@ EXPORT_SYMBOL(ttm_tt_pages_limit); */ int ttm_tt_setup_backup(struct ttm_tt *tt) { - struct ttm_backup *backup = + struct file *backup = ttm_backup_shmem_create(((loff_t)tt->num_pages) << PAGE_SHIFT); if (WARN_ON_ONCE(!(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE))) diff --git a/include/drm/ttm/ttm_backup.h b/include/drm/ttm/ttm_backup.h index 574b932177cc33..c33cba111171fb 100644 --- a/include/drm/ttm/ttm_backup.h +++ b/include/drm/ttm/ttm_backup.h @@ -9,8 +9,6 @@ #include #include -struct ttm_backup; - /** * ttm_backup_handle_to_page_ptr() - Convert handle to struct page pointer * @handle: The handle to convert. @@ -55,20 +53,20 @@ ttm_backup_page_ptr_to_handle(const struct page *page) return (unsigned long)page >> 1; } -void ttm_backup_drop(struct ttm_backup *backup, pgoff_t handle); +void ttm_backup_drop(struct file *backup, pgoff_t handle); -int ttm_backup_copy_page(struct ttm_backup *backup, struct page *dst, +int ttm_backup_copy_page(struct file *backup, struct page *dst, pgoff_t handle, bool intr); s64 -ttm_backup_backup_page(struct ttm_backup *backup, struct page *page, +ttm_backup_backup_page(struct file *backup, struct page *page, bool writeback, pgoff_t idx, gfp_t page_gfp, gfp_t alloc_gfp); -void ttm_backup_fini(struct ttm_backup *backup); +void ttm_backup_fini(struct file *backup); u64 ttm_backup_bytes_avail(void); -struct ttm_backup *ttm_backup_shmem_create(loff_t size); +struct file *ttm_backup_shmem_create(loff_t size); #endif diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h index 13cf47f3322f62..406437ad674bf1 100644 --- a/include/drm/ttm/ttm_tt.h +++ b/include/drm/ttm/ttm_tt.h @@ -118,7 +118,7 @@ struct ttm_tt { * ttm_tt_create() callback is responsible for assigning * this field. */ - struct ttm_backup *backup; + struct file *backup; /** * @caching: The current caching state of the pages, see enum * ttm_caching. From 363cd2b81cfdf706bbfc9ec78db000c9b1ecc552 Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Fri, 2 May 2025 19:04:12 +0100 Subject: [PATCH 1485/2924] arm64: cpufeature: Move arm64_use_ng_mappings to the .data section to prevent wrong idmap generation The PTE_MAYBE_NG macro sets the nG page table bit according to the value of "arm64_use_ng_mappings". This variable is currently placed in the .bss section. create_init_idmap() is called before the .bss section initialisation which is done in early_map_kernel(). Therefore, data/test_prot in create_init_idmap() could be set incorrectly through the PAGE_KERNEL -> PROT_DEFAULT -> PTE_MAYBE_NG macros. # llvm-objdump-21 --syms vmlinux-gcc | grep arm64_use_ng_mappings ffff800082f242a8 g O .bss 0000000000000001 arm64_use_ng_mappings The create_init_idmap() function disassembly compiled with llvm-21: // create_init_idmap() ffff80008255c058: d10103ff sub sp, sp, #0x40 ffff80008255c05c: a9017bfd stp x29, x30, [sp, #0x10] ffff80008255c060: a90257f6 stp x22, x21, [sp, #0x20] ffff80008255c064: a9034ff4 stp x20, x19, [sp, #0x30] ffff80008255c068: 910043fd add x29, sp, #0x10 ffff80008255c06c: 90003fc8 adrp x8, 0xffff800082d54000 ffff80008255c070: d280e06a mov x10, #0x703 // =1795 ffff80008255c074: 91400409 add x9, x0, #0x1, lsl #12 // =0x1000 ffff80008255c078: 394a4108 ldrb w8, [x8, #0x290] ------------- (1) ffff80008255c07c: f2e00d0a movk x10, #0x68, lsl #48 ffff80008255c080: f90007e9 str x9, [sp, #0x8] ffff80008255c084: aa0103f3 mov x19, x1 ffff80008255c088: aa0003f4 mov x20, x0 ffff80008255c08c: 14000000 b 0xffff80008255c08c <__pi_create_init_idmap+0x34> ffff80008255c090: aa082d56 orr x22, x10, x8, lsl #11 -------- (2) Note (1) is loading the arm64_use_ng_mappings value in w8 and (2) is set the text or data prot with the w8 value to set PTE_NG bit. If the .bss section isn't initialized, x8 could include a garbage value and generate an incorrect mapping. Annotate arm64_use_ng_mappings as __read_mostly so that it is placed in the .data section. Fixes: 84b04d3e6bdb ("arm64: kernel: Create initial ID map from C code") Cc: stable@vger.kernel.org # 6.9.x Tested-by: Nathan Chancellor Signed-off-by: Yeoreum Yun Link: https://lore.kernel.org/r/20250502180412.3774883-1-yeoreum.yun@arm.com [catalin.marinas@arm.com: use __read_mostly instead of __ro_after_init] [catalin.marinas@arm.com: slight tweaking of the code comment] Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpufeature.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9c4d6d552b25cb..4c46d80aa64b7c 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -114,7 +114,14 @@ static struct arm64_cpu_capabilities const __ro_after_init *cpucap_ptrs[ARM64_NC DECLARE_BITMAP(boot_cpucaps, ARM64_NCAPS); -bool arm64_use_ng_mappings = false; +/* + * arm64_use_ng_mappings must be placed in the .data section, otherwise it + * ends up in the .bss section where it is initialized in early_map_kernel() + * after the MMU (with the idmap) was enabled. create_init_idmap() - which + * runs before early_map_kernel() and reads the variable via PTE_MAYBE_NG - + * may end up generating an incorrect idmap page table attributes. + */ +bool arm64_use_ng_mappings __read_mostly = false; EXPORT_SYMBOL(arm64_use_ng_mappings); DEFINE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector) = vectors; From 4db6c75124d871fbabf8243f947d34cc7e0697fc Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Mon, 5 May 2025 02:07:32 +0100 Subject: [PATCH 1486/2924] net: ethernet: mtk_eth_soc: reset all TX queues on DMA free The purpose of resetting the TX queue is to reset the byte and packet count as well as to clear the software flow control XOFF bit. MediaTek developers pointed out that netdev_reset_queue would only resets queue 0 of the network device. Queues that are not reset may cause unexpected issues. Packets may stop being sent after reset and "transmit timeout" log may be displayed. Import fix from MediaTek's SDK to resolve this issue. Link: https://git01.mediatek.com/plugins/gitiles/openwrt/feeds/mtk-openwrt-feeds/+/319c0d9905579a46dc448579f892f364f1f84818 Fixes: f63959c7eec31 ("net: ethernet: mtk_eth_soc: implement multi-queue support for per-port queues") Signed-off-by: Daniel Golle Link: https://patch.msgid.link/c9ff9adceac4f152239a0f65c397f13547639175.1746406763.git.daniel@makrotopia.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 8fda4ce80d811d..53c39561b6d9a8 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -3186,11 +3186,19 @@ static int mtk_dma_init(struct mtk_eth *eth) static void mtk_dma_free(struct mtk_eth *eth) { const struct mtk_soc_data *soc = eth->soc; - int i; + int i, j, txqs = 1; + + if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) + txqs = MTK_QDMA_NUM_QUEUES; + + for (i = 0; i < MTK_MAX_DEVS; i++) { + if (!eth->netdev[i]) + continue; + + for (j = 0; j < txqs; j++) + netdev_tx_reset_subqueue(eth->netdev[i], j); + } - for (i = 0; i < MTK_MAX_DEVS; i++) - if (eth->netdev[i]) - netdev_reset_queue(eth->netdev[i]); if (!MTK_HAS_CAPS(soc->caps, MTK_SRAM) && eth->scratch_ring) { dma_free_coherent(eth->dma_dev, MTK_QDMA_RING_SIZE * soc->tx.desc_size, From e8716b5b0dff1b3d523b4a83fd5e94d57b887c5c Mon Sep 17 00:00:00 2001 From: Frank Wunderlich Date: Mon, 5 May 2025 02:07:58 +0100 Subject: [PATCH 1487/2924] net: ethernet: mtk_eth_soc: do not reset PSE when setting FE Remove redundant PSE reset. When setting FE register there is no need to reset PSE, doing so may cause FE to work abnormal. Link: https://git01.mediatek.com/plugins/gitiles/openwrt/feeds/mtk-openwrt-feeds/+/3a5223473e086a4b54a2b9a44df7d9ddcc2bc75a Fixes: dee4dd10c79aa ("net: ethernet: mtk_eth_soc: ppe: add support for multiple PPEs") Signed-off-by: Frank Wunderlich Link: https://patch.msgid.link/18f0ac7d83f82defa3342c11ef0d1362f6b81e88.1746406763.git.daniel@makrotopia.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 53c39561b6d9a8..22a532695fb0dd 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -3473,9 +3473,6 @@ static int mtk_open(struct net_device *dev) } mtk_gdm_config(eth, target_mac->id, gdm_config); } - /* Reset and enable PSE */ - mtk_w32(eth, RST_GL_PSE, MTK_RST_GL); - mtk_w32(eth, 0, MTK_RST_GL); napi_enable(ð->tx_napi); napi_enable(ð->rx_napi); From 7c6fa1797a725732981f2d77711c867166737719 Mon Sep 17 00:00:00 2001 From: Kevin Baker Date: Mon, 5 May 2025 12:02:56 -0500 Subject: [PATCH 1488/2924] drm/panel: simple: Update timings for AUO G101EVN010 Switch to panel timings based on datasheet for the AUO G101EVN01.0 LVDS panel. Default timings were tested on the panel. Previous mode-based timings resulted in horizontal display shift. Signed-off-by: Kevin Baker Fixes: 4fb86404a977 ("drm/panel: simple: Add AUO G101EVN010 panel support") Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250505170256.1385113-1-kevinb@ventureresearch.com Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20250505170256.1385113-1-kevinb@ventureresearch.com --- drivers/gpu/drm/panel/panel-simple.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 232b03c1a259eb..33a37539de574e 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -1027,27 +1027,28 @@ static const struct panel_desc auo_g070vvn01 = { }, }; -static const struct drm_display_mode auo_g101evn010_mode = { - .clock = 68930, - .hdisplay = 1280, - .hsync_start = 1280 + 82, - .hsync_end = 1280 + 82 + 2, - .htotal = 1280 + 82 + 2 + 84, - .vdisplay = 800, - .vsync_start = 800 + 8, - .vsync_end = 800 + 8 + 2, - .vtotal = 800 + 8 + 2 + 6, +static const struct display_timing auo_g101evn010_timing = { + .pixelclock = { 64000000, 68930000, 85000000 }, + .hactive = { 1280, 1280, 1280 }, + .hfront_porch = { 8, 64, 256 }, + .hback_porch = { 8, 64, 256 }, + .hsync_len = { 40, 168, 767 }, + .vactive = { 800, 800, 800 }, + .vfront_porch = { 4, 8, 100 }, + .vback_porch = { 4, 8, 100 }, + .vsync_len = { 8, 16, 223 }, }; static const struct panel_desc auo_g101evn010 = { - .modes = &auo_g101evn010_mode, - .num_modes = 1, + .timings = &auo_g101evn010_timing, + .num_timings = 1, .bpc = 6, .size = { .width = 216, .height = 135, }, .bus_format = MEDIA_BUS_FMT_RGB666_1X7X3_SPWG, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, .connector_type = DRM_MODE_CONNECTOR_LVDS, }; From f1aff4bc199cb92c055668caed65505e3b4d2656 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 6 May 2025 11:31:50 +0000 Subject: [PATCH 1489/2924] dm: fix copying after src array boundaries The blammed commit copied to argv the size of the reallocated argv, instead of the size of the old_argv, thus reading and copying from past the old_argv allocated memory. Following BUG_ON was hit: [ 3.038929][ T1] kernel BUG at lib/string_helpers.c:1040! [ 3.039147][ T1] Internal error: Oops - BUG: 00000000f2000800 [#1] SMP ... [ 3.056489][ T1] Call trace: [ 3.056591][ T1] __fortify_panic+0x10/0x18 (P) [ 3.056773][ T1] dm_split_args+0x20c/0x210 [ 3.056942][ T1] dm_table_add_target+0x13c/0x360 [ 3.057132][ T1] table_load+0x110/0x3ac [ 3.057292][ T1] dm_ctl_ioctl+0x424/0x56c [ 3.057457][ T1] __arm64_sys_ioctl+0xa8/0xec [ 3.057634][ T1] invoke_syscall+0x58/0x10c [ 3.057804][ T1] el0_svc_common+0xa8/0xdc [ 3.057970][ T1] do_el0_svc+0x1c/0x28 [ 3.058123][ T1] el0_svc+0x50/0xac [ 3.058266][ T1] el0t_64_sync_handler+0x60/0xc4 [ 3.058452][ T1] el0t_64_sync+0x1b0/0x1b4 [ 3.058620][ T1] Code: f800865e a9bf7bfd 910003fd 941f48aa (d4210000) [ 3.058897][ T1] ---[ end trace 0000000000000000 ]--- [ 3.059083][ T1] Kernel panic - not syncing: Oops - BUG: Fatal exception Fix it by copying the size of src, and not the size of dst, as it was. Fixes: 5a2a6c428190 ("dm: always update the array size in realloc_argv on success") Cc: stable@vger.kernel.org Signed-off-by: Tudor Ambarus Signed-off-by: Mikulas Patocka --- drivers/md/dm-table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 31d67a1a91dd60..6b23e777e10e7f 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -524,9 +524,9 @@ static char **realloc_argv(unsigned int *size, char **old_argv) } argv = kmalloc_array(new_size, sizeof(*argv), gfp); if (argv) { - *size = new_size; if (old_argv) memcpy(argv, old_argv, *size * sizeof(*argv)); + *size = new_size; } kfree(old_argv); From 0ca6df4f40cf4c32487944aaf48319cb6c25accc Mon Sep 17 00:00:00 2001 From: Norbert Szetei Date: Fri, 2 May 2025 08:21:58 +0900 Subject: [PATCH 1490/2924] ksmbd: prevent out-of-bounds stream writes by validating *pos ksmbd_vfs_stream_write() did not validate whether the write offset (*pos) was within the bounds of the existing stream data length (v_len). If *pos was greater than or equal to v_len, this could lead to an out-of-bounds memory write. This patch adds a check to ensure *pos is less than v_len before proceeding. If the condition fails, -EINVAL is returned. Cc: stable@vger.kernel.org Signed-off-by: Norbert Szetei Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/vfs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 391d07da586c73..482eba0f4dc11a 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -426,6 +426,13 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos, goto out; } + if (v_len <= *pos) { + pr_err("stream write position %lld is out of bounds (stream length: %zd)\n", + *pos, v_len); + err = -EINVAL; + goto out; + } + if (v_len < size) { wbuf = kvzalloc(size, KSMBD_DEFAULT_GFP); if (!wbuf) { From 36991c1ccde2d5a521577c448ffe07fcccfe104d Mon Sep 17 00:00:00 2001 From: Sean Heelan Date: Tue, 6 May 2025 22:04:52 +0900 Subject: [PATCH 1491/2924] ksmbd: Fix UAF in __close_file_table_ids A use-after-free is possible if one thread destroys the file via __ksmbd_close_fd while another thread holds a reference to it. The existing checks on fp->refcount are not sufficient to prevent this. The fix takes ft->lock around the section which removes the file from the file table. This prevents two threads acquiring the same file pointer via __close_file_table_ids, as well as the other functions which retrieve a file from the IDR and which already use this same lock. Cc: stable@vger.kernel.org Signed-off-by: Sean Heelan Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/vfs_cache.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index 1f8fa3468173ab..dfed6fce890498 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -661,21 +661,40 @@ __close_file_table_ids(struct ksmbd_file_table *ft, bool (*skip)(struct ksmbd_tree_connect *tcon, struct ksmbd_file *fp)) { - unsigned int id; - struct ksmbd_file *fp; - int num = 0; + struct ksmbd_file *fp; + unsigned int id = 0; + int num = 0; + + while (1) { + write_lock(&ft->lock); + fp = idr_get_next(ft->idr, &id); + if (!fp) { + write_unlock(&ft->lock); + break; + } - idr_for_each_entry(ft->idr, fp, id) { - if (skip(tcon, fp)) + if (skip(tcon, fp) || + !atomic_dec_and_test(&fp->refcount)) { + id++; + write_unlock(&ft->lock); continue; + } set_close_state_blocked_works(fp); + idr_remove(ft->idr, fp->volatile_id); + fp->volatile_id = KSMBD_NO_FID; + write_unlock(&ft->lock); + + down_write(&fp->f_ci->m_lock); + list_del_init(&fp->node); + up_write(&fp->f_ci->m_lock); - if (!atomic_dec_and_test(&fp->refcount)) - continue; __ksmbd_close_fd(ft, fp); + num++; + id++; } + return num; } From dcaeeb8ae84c5506ebc574732838264f3887738c Mon Sep 17 00:00:00 2001 From: Antonios Salios Date: Fri, 25 Apr 2025 13:17:45 +0200 Subject: [PATCH 1492/2924] can: m_can: m_can_class_allocate_dev(): initialize spin lock on device probe The spin lock tx_handling_spinlock in struct m_can_classdev is not being initialized. This leads the following spinlock bad magic complaint from the kernel, eg. when trying to send CAN frames with cansend from can-utils: | BUG: spinlock bad magic on CPU#0, cansend/95 | lock: 0xff60000002ec1010, .magic: 00000000, .owner: /-1, .owner_cpu: 0 | CPU: 0 UID: 0 PID: 95 Comm: cansend Not tainted 6.15.0-rc3-00032-ga79be02bba5c #5 NONE | Hardware name: MachineWare SIM-V (DT) | Call Trace: | [] dump_backtrace+0x1c/0x24 | [] show_stack+0x28/0x34 | [] dump_stack_lvl+0x4a/0x68 | [] dump_stack+0x14/0x1c | [] spin_dump+0x62/0x6e | [] do_raw_spin_lock+0xd0/0x142 | [] _raw_spin_lock_irqsave+0x20/0x2c | [] m_can_start_xmit+0x90/0x34a | [] dev_hard_start_xmit+0xa6/0xee | [] sch_direct_xmit+0x114/0x292 | [] __dev_queue_xmit+0x3b0/0xaa8 | [] can_send+0xc6/0x242 | [] raw_sendmsg+0x1a8/0x36c | [] sock_write_iter+0x9a/0xee | [] vfs_write+0x184/0x3a6 | [] ksys_write+0xa0/0xc0 | [] __riscv_sys_write+0x14/0x1c | [] do_trap_ecall_u+0x168/0x212 | [] handle_exception+0x146/0x152 Initializing the spin lock in m_can_class_allocate_dev solves that problem. Fixes: 1fa80e23c150 ("can: m_can: Introduce a tx_fifo_in_flight counter") Signed-off-by: Antonios Salios Reviewed-by: Vincent Mailhol Link: https://patch.msgid.link/20250425111744.37604-2-antonios@mwa.re Reviewed-by: Markus Schneider-Pargmann Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 884a6352c42b7b..326ede9d400fa2 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -2379,6 +2379,7 @@ struct m_can_classdev *m_can_class_allocate_dev(struct device *dev, SET_NETDEV_DEV(net_dev, dev); m_can_of_parse_mram(class_dev, mram_config_vals); + spin_lock_init(&class_dev->tx_handling_spinlock); out: return class_dev; } From 5e1663810e11c64956aa7e280cf74b2f3284d816 Mon Sep 17 00:00:00 2001 From: Kelsey Maes Date: Wed, 30 Apr 2025 09:15:01 -0700 Subject: [PATCH 1493/2924] can: mcp251xfd: fix TDC setting for low data bit rates The TDC is currently hardcoded enabled. This means that even for lower CAN-FD data bitrates (with a DBRP (data bitrate prescaler) > 2) a TDC is configured. This leads to a bus-off condition. ISO 11898-1 section 11.3.3 says "Transmitter delay compensation" (TDC) is only applicable if DBRP is 1 or 2. To fix the problem, switch the driver to use the TDC calculation provided by the CAN driver framework (which respects ISO 11898-1 section 11.3.3). This has the positive side effect that userspace can control TDC as needed. Demonstration of the feature in action: | $ ip link set can0 up type can bitrate 125000 dbitrate 500000 fd on | $ ip -details link show can0 | 3: can0: mtu 72 qdisc pfifo_fast state UP mode DEFAULT group default qlen 10 | link/can promiscuity 0 allmulti 0 minmtu 0 maxmtu 0 | can state ERROR-ACTIVE (berr-counter tx 0 rx 0) restart-ms 0 | bitrate 125000 sample-point 0.875 | tq 50 prop-seg 69 phase-seg1 70 phase-seg2 20 sjw 10 brp 2 | mcp251xfd: tseg1 2..256 tseg2 1..128 sjw 1..128 brp 1..256 brp_inc 1 | dbitrate 500000 dsample-point 0.875 | dtq 125 dprop-seg 6 dphase-seg1 7 dphase-seg2 2 dsjw 1 dbrp 5 | mcp251xfd: dtseg1 1..32 dtseg2 1..16 dsjw 1..16 dbrp 1..256 dbrp_inc 1 | tdcv 0..63 tdco 0..63 | clock 40000000 numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 tso_max_size 65536 tso_max_segs 65535 gro_max_size 65536 parentbus spi parentdev spi0.0 | $ ip link set can0 up type can bitrate 1000000 dbitrate 4000000 fd on | $ ip -details link show can0 | 3: can0: mtu 72 qdisc pfifo_fast state UP mode DEFAULT group default qlen 10 | link/can promiscuity 0 allmulti 0 minmtu 0 maxmtu 0 | can state ERROR-ACTIVE (berr-counter tx 0 rx 0) restart-ms 0 | bitrate 1000000 sample-point 0.750 | tq 25 prop-seg 14 phase-seg1 15 phase-seg2 10 sjw 5 brp 1 | mcp251xfd: tseg1 2..256 tseg2 1..128 sjw 1..128 brp 1..256 brp_inc 1 | dbitrate 4000000 dsample-point 0.700 | dtq 25 dprop-seg 3 dphase-seg1 3 dphase-seg2 3 dsjw 1 dbrp 1 | tdco 7 | mcp251xfd: dtseg1 1..32 dtseg2 1..16 dsjw 1..16 dbrp 1..256 dbrp_inc 1 | tdcv 0..63 tdco 0..63 | clock 40000000 numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 tso_max_size 65536 tso_max_segs 65535 gro_max_size 65536 parentbus spi parentdev spi0.0 There has been some confusion about the MCP2518FD using a relative or absolute TDCO due to the datasheet specifying a range of [-64,63]. I have a custom board with a 40 MHz clock and an estimated loop delay of 100 to 216 ns. During testing at a data bit rate of 4 Mbit/s I found that using can_get_relative_tdco() resulted in bus-off errors. The final TDCO value was 1 which corresponds to a 10% SSP in an absolute configuration. This behavior is expected if the TDCO value is really absolute and not relative. Using priv->can.tdc.tdco instead results in a final TDCO of 8, setting the SSP at exactly 80%. This configuration works. The automatic, manual, and off TDC modes were tested at speeds up to, and including, 8 Mbit/s on real hardware and behave as expected. Fixes: 55e5b97f003e ("can: mcp25xxfd: add driver for Microchip MCP25xxFD SPI CAN") Reported-by: Kelsey Maes Closes: https://lore.kernel.org/all/C2121586-C87F-4B23-A933-845362C29CA1@vpprocess.com Reviewed-by: Vincent Mailhol Signed-off-by: Kelsey Maes Link: https://patch.msgid.link/20250430161501.79370-1-kelsey@vpprocess.com [mkl: add comment] Signed-off-by: Marc Kleine-Budde --- .../net/can/spi/mcp251xfd/mcp251xfd-core.c | 40 +++++++++++++++---- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 3bc56517fe7a99..064d81c724f4c3 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -75,6 +75,24 @@ static const struct can_bittiming_const mcp251xfd_data_bittiming_const = { .brp_inc = 1, }; +/* The datasheet of the mcp2518fd (DS20006027B) specifies a range of + * [-64,63] for TDCO, indicating a relative TDCO. + * + * Manual tests have shown, that using a relative TDCO configuration + * results in bus off, while an absolute configuration works. + * + * For TDCO use the max value (63) from the data sheet, but 0 as the + * minimum. + */ +static const struct can_tdc_const mcp251xfd_tdc_const = { + .tdcv_min = 0, + .tdcv_max = 63, + .tdco_min = 0, + .tdco_max = 63, + .tdcf_min = 0, + .tdcf_max = 0, +}; + static const char *__mcp251xfd_get_model_str(enum mcp251xfd_model model) { switch (model) { @@ -510,8 +528,7 @@ static int mcp251xfd_set_bittiming(const struct mcp251xfd_priv *priv) { const struct can_bittiming *bt = &priv->can.bittiming; const struct can_bittiming *dbt = &priv->can.data_bittiming; - u32 val = 0; - s8 tdco; + u32 tdcmod, val = 0; int err; /* CAN Control Register @@ -575,11 +592,16 @@ static int mcp251xfd_set_bittiming(const struct mcp251xfd_priv *priv) return err; /* Transmitter Delay Compensation */ - tdco = clamp_t(int, dbt->brp * (dbt->prop_seg + dbt->phase_seg1), - -64, 63); - val = FIELD_PREP(MCP251XFD_REG_TDC_TDCMOD_MASK, - MCP251XFD_REG_TDC_TDCMOD_AUTO) | - FIELD_PREP(MCP251XFD_REG_TDC_TDCO_MASK, tdco); + if (priv->can.ctrlmode & CAN_CTRLMODE_TDC_AUTO) + tdcmod = MCP251XFD_REG_TDC_TDCMOD_AUTO; + else if (priv->can.ctrlmode & CAN_CTRLMODE_TDC_MANUAL) + tdcmod = MCP251XFD_REG_TDC_TDCMOD_MANUAL; + else + tdcmod = MCP251XFD_REG_TDC_TDCMOD_DISABLED; + + val = FIELD_PREP(MCP251XFD_REG_TDC_TDCMOD_MASK, tdcmod) | + FIELD_PREP(MCP251XFD_REG_TDC_TDCV_MASK, priv->can.tdc.tdcv) | + FIELD_PREP(MCP251XFD_REG_TDC_TDCO_MASK, priv->can.tdc.tdco); return regmap_write(priv->map_reg, MCP251XFD_REG_TDC, val); } @@ -2083,10 +2105,12 @@ static int mcp251xfd_probe(struct spi_device *spi) priv->can.do_get_berr_counter = mcp251xfd_get_berr_counter; priv->can.bittiming_const = &mcp251xfd_bittiming_const; priv->can.data_bittiming_const = &mcp251xfd_data_bittiming_const; + priv->can.tdc_const = &mcp251xfd_tdc_const; priv->can.ctrlmode_supported = CAN_CTRLMODE_LOOPBACK | CAN_CTRLMODE_LISTENONLY | CAN_CTRLMODE_BERR_REPORTING | CAN_CTRLMODE_FD | CAN_CTRLMODE_FD_NON_ISO | - CAN_CTRLMODE_CC_LEN8_DLC; + CAN_CTRLMODE_CC_LEN8_DLC | CAN_CTRLMODE_TDC_AUTO | + CAN_CTRLMODE_TDC_MANUAL; set_bit(MCP251XFD_FLAGS_DOWN, priv->flags); priv->ndev = ndev; priv->spi = spi; From 84f5eb833f53ae192baed4cfb8d9eaab43481fc9 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 2 May 2025 16:13:44 +0200 Subject: [PATCH 1494/2924] can: mcp251xfd: mcp251xfd_remove(): fix order of unregistration calls If a driver is removed, the driver framework invokes the driver's remove callback. A CAN driver's remove function calls unregister_candev(), which calls net_device_ops::ndo_stop further down in the call stack for interfaces which are in the "up" state. With the mcp251xfd driver the removal of the module causes the following warning: | WARNING: CPU: 0 PID: 352 at net/core/dev.c:7342 __netif_napi_del_locked+0xc8/0xd8 as can_rx_offload_del() deletes the NAPI, while it is still active, because the interface is still up. To fix the warning, first unregister the network interface, which calls net_device_ops::ndo_stop, which disables the NAPI, and then call can_rx_offload_del(). Fixes: 55e5b97f003e ("can: mcp25xxfd: add driver for Microchip MCP25xxFD SPI CAN") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250502-can-rx-offload-del-v1-1-59a9b131589d@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 064d81c724f4c3..c30b04f8fc0df8 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -2198,8 +2198,8 @@ static void mcp251xfd_remove(struct spi_device *spi) struct mcp251xfd_priv *priv = spi_get_drvdata(spi); struct net_device *ndev = priv->ndev; - can_rx_offload_del(&priv->offload); mcp251xfd_unregister(priv); + can_rx_offload_del(&priv->offload); spi->max_speed_hz = priv->spi_max_speed_hz_orig; free_candev(ndev); } From db492e24f9b05547ba12b4783f09c9d943cf42fe Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 6 May 2025 13:27:30 +0200 Subject: [PATCH 1495/2924] block: only update request sector if needed In case of a ZONE APPEND write, regardless of native ZONE APPEND or the emulation layer in the zone write plugging code, the sector the data got written to by the device needs to be updated in the bio. At the moment, this is done for every native ZONE APPEND write and every request that is flagged with 'BIO_ZONE_WRITE_PLUGGING'. But thus superfluously updates the sector for regular writes to a zoned block device. Check if a bio is a native ZONE APPEND write or if the bio is flagged as 'BIO_EMULATES_ZONE_APPEND', meaning the block layer's zone write plugging code handles the ZONE APPEND and translates it into a regular write and back. Only if one of these two criterion is met, update the sector in the bio upon completion. Signed-off-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/dea089581cb6b777c1cd1500b38ac0b61df4b2d1.1746530748.git.jth@kernel.org Signed-off-by: Jens Axboe --- block/blk.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/blk.h b/block/blk.h index 328075787814a9..594eeba7b9495c 100644 --- a/block/blk.h +++ b/block/blk.h @@ -480,7 +480,8 @@ static inline void blk_zone_update_request_bio(struct request *rq, * the original BIO sector so that blk_zone_write_plug_bio_endio() can * lookup the zone write plug. */ - if (req_op(rq) == REQ_OP_ZONE_APPEND || bio_zone_write_plugging(bio)) + if (req_op(rq) == REQ_OP_ZONE_APPEND || + bio_flagged(bio, BIO_EMULATES_ZONE_APPEND)) bio->bi_iter.bi_sector = rq->__sector; } void blk_zone_write_plug_bio_endio(struct bio *bio); From 037ada7a3181300218e4fd78bef6a741cfa7f808 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 2 May 2025 16:13:45 +0200 Subject: [PATCH 1496/2924] can: rockchip_canfd: rkcanfd_remove(): fix order of unregistration calls If a driver is removed, the driver framework invokes the driver's remove callback. A CAN driver's remove function calls unregister_candev(), which calls net_device_ops::ndo_stop further down in the call stack for interfaces which are in the "up" state. The removal of the module causes a warning, as can_rx_offload_del() deletes the NAPI, while it is still active, because the interface is still up. To fix the warning, first unregister the network interface, which calls net_device_ops::ndo_stop, which disables the NAPI, and then call can_rx_offload_del(). Fixes: ff60bfbaf67f ("can: rockchip_canfd: add driver for Rockchip CAN-FD controller") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250502-can-rx-offload-del-v1-2-59a9b131589d@pengutronix.de Reviewed-by: Markus Schneider-Pargmann Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rockchip/rockchip_canfd-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/rockchip/rockchip_canfd-core.c b/drivers/net/can/rockchip/rockchip_canfd-core.c index 7107a37da36c7f..c3fb3176ce4221 100644 --- a/drivers/net/can/rockchip/rockchip_canfd-core.c +++ b/drivers/net/can/rockchip/rockchip_canfd-core.c @@ -937,8 +937,8 @@ static void rkcanfd_remove(struct platform_device *pdev) struct rkcanfd_priv *priv = platform_get_drvdata(pdev); struct net_device *ndev = priv->ndev; - can_rx_offload_del(&priv->offload); rkcanfd_unregister(priv); + can_rx_offload_del(&priv->offload); free_candev(ndev); } From 0713a1b3276b98c7dafbeefef00d7bc3a9119a84 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 2 May 2025 16:13:46 +0200 Subject: [PATCH 1497/2924] can: mcan: m_can_class_unregister(): fix order of unregistration calls If a driver is removed, the driver framework invokes the driver's remove callback. A CAN driver's remove function calls unregister_candev(), which calls net_device_ops::ndo_stop further down in the call stack for interfaces which are in the "up" state. The removal of the module causes a warning, as can_rx_offload_del() deletes the NAPI, while it is still active, because the interface is still up. To fix the warning, first unregister the network interface, which calls net_device_ops::ndo_stop, which disables the NAPI, and then call can_rx_offload_del(). Fixes: 1be37d3b0414 ("can: m_can: fix periph RX path: use rx-offload to ensure skbs are sent from softirq context") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250502-can-rx-offload-del-v1-3-59a9b131589d@pengutronix.de Reviewed-by: Markus Schneider-Pargmann Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 326ede9d400fa2..c2c116ce1087c0 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -2463,9 +2463,9 @@ EXPORT_SYMBOL_GPL(m_can_class_register); void m_can_class_unregister(struct m_can_classdev *cdev) { + unregister_candev(cdev->net); if (cdev->is_peripheral) can_rx_offload_del(&cdev->offload); - unregister_candev(cdev->net); } EXPORT_SYMBOL_GPL(m_can_class_unregister); From 511e64e13d8cc72853275832e3f372607466c18c Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 29 Apr 2025 09:05:55 +0200 Subject: [PATCH 1498/2924] can: gw: fix RCU/BH usage in cgw_create_job() As reported by Sebastian Andrzej Siewior the use of local_bh_disable() is only feasible in uni processor systems to update the modification rules. The usual use-case to update the modification rules is to update the data of the modifications but not the modification types (AND/OR/XOR/SET) or the checksum functions itself. To omit additional memory allocations to maintain fast modification switching times, the modification description space is doubled at gw-job creation time so that only the reference to the active modification description is changed under rcu protection. Rename cgw_job::mod to cf_mod and make it a RCU pointer. Allocate in cgw_create_job() and free it together with cgw_job in cgw_job_free_rcu(). Update all users to dereference cgw_job::cf_mod with a RCU accessor and if possible once. [bigeasy: Replace mod1/mod2 from the Oliver's original patch with dynamic allocation, use RCU annotation and accessor] Reported-by: Sebastian Andrzej Siewior Closes: https://lore.kernel.org/linux-can/20231031112349.y0aLoBrz@linutronix.de/ Fixes: dd895d7f21b2 ("can: cangw: introduce optional uid to reference created routing jobs") Tested-by: Oliver Hartkopp Signed-off-by: Oliver Hartkopp Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20250429070555.cs-7b_eZ@linutronix.de Signed-off-by: Marc Kleine-Budde --- net/can/gw.c | 149 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 90 insertions(+), 59 deletions(-) diff --git a/net/can/gw.c b/net/can/gw.c index ef93293c1fae39..55eccb1c7620c0 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -130,7 +130,7 @@ struct cgw_job { u32 handled_frames; u32 dropped_frames; u32 deleted_frames; - struct cf_mod mod; + struct cf_mod __rcu *cf_mod; union { /* CAN frame data source */ struct net_device *dev; @@ -459,6 +459,7 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) struct cgw_job *gwj = (struct cgw_job *)data; struct canfd_frame *cf; struct sk_buff *nskb; + struct cf_mod *mod; int modidx = 0; /* process strictly Classic CAN or CAN FD frames */ @@ -506,7 +507,8 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) * When there is at least one modification function activated, * we need to copy the skb as we want to modify skb->data. */ - if (gwj->mod.modfunc[0]) + mod = rcu_dereference(gwj->cf_mod); + if (mod->modfunc[0]) nskb = skb_copy(skb, GFP_ATOMIC); else nskb = skb_clone(skb, GFP_ATOMIC); @@ -529,8 +531,8 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) cf = (struct canfd_frame *)nskb->data; /* perform preprocessed modification functions if there are any */ - while (modidx < MAX_MODFUNCTIONS && gwj->mod.modfunc[modidx]) - (*gwj->mod.modfunc[modidx++])(cf, &gwj->mod); + while (modidx < MAX_MODFUNCTIONS && mod->modfunc[modidx]) + (*mod->modfunc[modidx++])(cf, mod); /* Has the CAN frame been modified? */ if (modidx) { @@ -546,11 +548,11 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) } /* check for checksum updates */ - if (gwj->mod.csumfunc.crc8) - (*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8); + if (mod->csumfunc.crc8) + (*mod->csumfunc.crc8)(cf, &mod->csum.crc8); - if (gwj->mod.csumfunc.xor) - (*gwj->mod.csumfunc.xor)(cf, &gwj->mod.csum.xor); + if (mod->csumfunc.xor) + (*mod->csumfunc.xor)(cf, &mod->csum.xor); } /* clear the skb timestamp if not configured the other way */ @@ -581,9 +583,20 @@ static void cgw_job_free_rcu(struct rcu_head *rcu_head) { struct cgw_job *gwj = container_of(rcu_head, struct cgw_job, rcu); + /* cgw_job::cf_mod is always accessed from the same cgw_job object within + * the same RCU read section. Once cgw_job is scheduled for removal, + * cf_mod can also be removed without mandating an additional grace period. + */ + kfree(rcu_access_pointer(gwj->cf_mod)); kmem_cache_free(cgw_cache, gwj); } +/* Return cgw_job::cf_mod with RTNL protected section */ +static struct cf_mod *cgw_job_cf_mod(struct cgw_job *gwj) +{ + return rcu_dereference_protected(gwj->cf_mod, rtnl_is_locked()); +} + static int cgw_notifier(struct notifier_block *nb, unsigned long msg, void *ptr) { @@ -616,6 +629,7 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, { struct rtcanmsg *rtcan; struct nlmsghdr *nlh; + struct cf_mod *mod; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtcan), flags); if (!nlh) @@ -650,82 +664,83 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, goto cancel; } + mod = cgw_job_cf_mod(gwj); if (gwj->flags & CGW_FLAGS_CAN_FD) { struct cgw_fdframe_mod mb; - if (gwj->mod.modtype.and) { - memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.and; + if (mod->modtype.and) { + memcpy(&mb.cf, &mod->modframe.and, sizeof(mb.cf)); + mb.modtype = mod->modtype.and; if (nla_put(skb, CGW_FDMOD_AND, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.or) { - memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.or; + if (mod->modtype.or) { + memcpy(&mb.cf, &mod->modframe.or, sizeof(mb.cf)); + mb.modtype = mod->modtype.or; if (nla_put(skb, CGW_FDMOD_OR, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.xor) { - memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.xor; + if (mod->modtype.xor) { + memcpy(&mb.cf, &mod->modframe.xor, sizeof(mb.cf)); + mb.modtype = mod->modtype.xor; if (nla_put(skb, CGW_FDMOD_XOR, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.set) { - memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.set; + if (mod->modtype.set) { + memcpy(&mb.cf, &mod->modframe.set, sizeof(mb.cf)); + mb.modtype = mod->modtype.set; if (nla_put(skb, CGW_FDMOD_SET, sizeof(mb), &mb) < 0) goto cancel; } } else { struct cgw_frame_mod mb; - if (gwj->mod.modtype.and) { - memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.and; + if (mod->modtype.and) { + memcpy(&mb.cf, &mod->modframe.and, sizeof(mb.cf)); + mb.modtype = mod->modtype.and; if (nla_put(skb, CGW_MOD_AND, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.or) { - memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.or; + if (mod->modtype.or) { + memcpy(&mb.cf, &mod->modframe.or, sizeof(mb.cf)); + mb.modtype = mod->modtype.or; if (nla_put(skb, CGW_MOD_OR, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.xor) { - memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.xor; + if (mod->modtype.xor) { + memcpy(&mb.cf, &mod->modframe.xor, sizeof(mb.cf)); + mb.modtype = mod->modtype.xor; if (nla_put(skb, CGW_MOD_XOR, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.set) { - memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.set; + if (mod->modtype.set) { + memcpy(&mb.cf, &mod->modframe.set, sizeof(mb.cf)); + mb.modtype = mod->modtype.set; if (nla_put(skb, CGW_MOD_SET, sizeof(mb), &mb) < 0) goto cancel; } } - if (gwj->mod.uid) { - if (nla_put_u32(skb, CGW_MOD_UID, gwj->mod.uid) < 0) + if (mod->uid) { + if (nla_put_u32(skb, CGW_MOD_UID, mod->uid) < 0) goto cancel; } - if (gwj->mod.csumfunc.crc8) { + if (mod->csumfunc.crc8) { if (nla_put(skb, CGW_CS_CRC8, CGW_CS_CRC8_LEN, - &gwj->mod.csum.crc8) < 0) + &mod->csum.crc8) < 0) goto cancel; } - if (gwj->mod.csumfunc.xor) { + if (mod->csumfunc.xor) { if (nla_put(skb, CGW_CS_XOR, CGW_CS_XOR_LEN, - &gwj->mod.csum.xor) < 0) + &mod->csum.xor) < 0) goto cancel; } @@ -1059,7 +1074,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct rtcanmsg *r; struct cgw_job *gwj; - struct cf_mod mod; + struct cf_mod *mod; struct can_can_gw ccgw; u8 limhops = 0; int err = 0; @@ -1078,37 +1093,48 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, if (r->gwtype != CGW_TYPE_CAN_CAN) return -EINVAL; - err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops); + mod = kmalloc(sizeof(*mod), GFP_KERNEL); + if (!mod) + return -ENOMEM; + + err = cgw_parse_attr(nlh, mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops); if (err < 0) - return err; + goto out_free_cf; - if (mod.uid) { + if (mod->uid) { ASSERT_RTNL(); /* check for updating an existing job with identical uid */ hlist_for_each_entry(gwj, &net->can.cgw_list, list) { - if (gwj->mod.uid != mod.uid) + struct cf_mod *old_cf; + + old_cf = cgw_job_cf_mod(gwj); + if (old_cf->uid != mod->uid) continue; /* interfaces & filters must be identical */ - if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw))) - return -EINVAL; + if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw))) { + err = -EINVAL; + goto out_free_cf; + } - /* update modifications with disabled softirq & quit */ - local_bh_disable(); - memcpy(&gwj->mod, &mod, sizeof(mod)); - local_bh_enable(); + rcu_assign_pointer(gwj->cf_mod, mod); + kfree_rcu_mightsleep(old_cf); return 0; } } /* ifindex == 0 is not allowed for job creation */ - if (!ccgw.src_idx || !ccgw.dst_idx) - return -ENODEV; + if (!ccgw.src_idx || !ccgw.dst_idx) { + err = -ENODEV; + goto out_free_cf; + } gwj = kmem_cache_alloc(cgw_cache, GFP_KERNEL); - if (!gwj) - return -ENOMEM; + if (!gwj) { + err = -ENOMEM; + goto out_free_cf; + } gwj->handled_frames = 0; gwj->dropped_frames = 0; @@ -1118,7 +1144,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, gwj->limit_hops = limhops; /* insert already parsed information */ - memcpy(&gwj->mod, &mod, sizeof(mod)); + RCU_INIT_POINTER(gwj->cf_mod, mod); memcpy(&gwj->ccgw, &ccgw, sizeof(ccgw)); err = -ENODEV; @@ -1152,9 +1178,11 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, if (!err) hlist_add_head_rcu(&gwj->list, &net->can.cgw_list); out: - if (err) + if (err) { kmem_cache_free(cgw_cache, gwj); - +out_free_cf: + kfree(mod); + } return err; } @@ -1214,19 +1242,22 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, /* remove only the first matching entry */ hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) { + struct cf_mod *cf_mod; + if (gwj->flags != r->flags) continue; if (gwj->limit_hops != limhops) continue; + cf_mod = cgw_job_cf_mod(gwj); /* we have a match when uid is enabled and identical */ - if (gwj->mod.uid || mod.uid) { - if (gwj->mod.uid != mod.uid) + if (cf_mod->uid || mod.uid) { + if (cf_mod->uid != mod.uid) continue; } else { /* no uid => check for identical modifications */ - if (memcmp(&gwj->mod, &mod, sizeof(mod))) + if (memcmp(cf_mod, &mod, sizeof(mod))) continue; } From d90b023718a17d308d831fde36b3bb6fa3b511e0 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 4 May 2025 18:26:45 -0500 Subject: [PATCH 1499/2924] smb3 client: warn when parse contexts returns error on compounded operation Coverity noticed that the rc on smb2_parse_contexts() was not being checked in the case of compounded operations. Since we don't want to stop parsing the following compounded responses which are likely valid, we can't easily error out here, but at least print a warning message if server has a bug causing us to skip parsing the open response contexts. Addresses-Coverity: 1639191 Acked-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/smb2inode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 57d9bfbadd97b2..2a3e46b8e15af6 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -666,6 +666,8 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, /* smb2_parse_contexts() fills idata->fi.IndexNumber */ rc = smb2_parse_contexts(server, &rsp_iov[0], &oparms->fid->epoch, oparms->fid->lease_key, &oplock, &idata->fi, NULL); + if (rc) + cifs_dbg(VFS, "rc: %d parsing context of compound op\n", rc); } for (i = 0; i < num_cmds; i++) { From d4e89d212d401672e9cdfe825d947ee3a9fbe3f5 Mon Sep 17 00:00:00 2001 From: Daniel Sneddon Date: Mon, 5 May 2025 14:35:12 -0700 Subject: [PATCH 1500/2924] x86/bpf: Call branch history clearing sequence on exit Classic BPF programs have been identified as potential vectors for intra-mode Branch Target Injection (BTI) attacks. Classic BPF programs can be run by unprivileged users. They allow unprivileged code to execute inside the kernel. Attackers can use unprivileged cBPF to craft branch history in kernel mode that can influence the target of indirect branches. Introduce a branch history buffer (BHB) clearing sequence during the JIT compilation of classic BPF programs. The clearing sequence is the same as is used in previous mitigations to protect syscalls. Since eBPF programs already have their own mitigations in place, only insert the call on classic programs that aren't run by privileged users. Signed-off-by: Daniel Sneddon Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Acked-by: Daniel Borkmann Reviewed-by: Alexandre Chartre --- arch/x86/net/bpf_jit_comp.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 9e5fe2ba858f08..6fb786c8b2aa38 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1502,6 +1502,30 @@ static void emit_priv_frame_ptr(u8 **pprog, void __percpu *priv_frame_ptr) #define PRIV_STACK_GUARD_SZ 8 #define PRIV_STACK_GUARD_VAL 0xEB9F12345678eb9fULL +static int emit_spectre_bhb_barrier(u8 **pprog, u8 *ip, + struct bpf_prog *bpf_prog) +{ + u8 *prog = *pprog; + u8 *func; + + if (cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_LOOP)) { + /* The clearing sequence clobbers eax and ecx. */ + EMIT1(0x50); /* push rax */ + EMIT1(0x51); /* push rcx */ + ip += 2; + + func = (u8 *)clear_bhb_loop; + ip += x86_call_depth_emit_accounting(&prog, func, ip); + + if (emit_call(&prog, func, ip)) + return -EINVAL; + EMIT1(0x59); /* pop rcx */ + EMIT1(0x58); /* pop rax */ + } + *pprog = prog; + return 0; +} + static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image, int oldproglen, struct jit_context *ctx, bool jmp_padding) { @@ -2544,6 +2568,13 @@ st: if (is_imm8(insn->off)) seen_exit = true; /* Update cleanup_addr */ ctx->cleanup_addr = proglen; + if (bpf_prog_was_classic(bpf_prog) && + !capable(CAP_SYS_ADMIN)) { + u8 *ip = image + addrs[i - 1]; + + if (emit_spectre_bhb_barrier(&prog, ip, bpf_prog)) + return -EINVAL; + } if (bpf_prog->aux->exception_boundary) { pop_callee_regs(&prog, all_callee_regs_used); pop_r12(&prog); From 9f725eec8fc0b39bdc07dcc8897283c367c1a163 Mon Sep 17 00:00:00 2001 From: Daniel Sneddon Date: Mon, 5 May 2025 14:35:12 -0700 Subject: [PATCH 1501/2924] x86/bpf: Add IBHF call at end of classic BPF Classic BPF programs can be run by unprivileged users, allowing unprivileged code to execute inside the kernel. Attackers can use this to craft branch history in kernel mode that can influence the target of indirect branches. BHI_DIS_S provides user-kernel isolation of branch history, but cBPF can be used to bypass this protection by crafting branch history in kernel mode. To stop intra-mode attacks via cBPF programs, Intel created a new instruction Indirect Branch History Fence (IBHF). IBHF prevents the predicted targets of subsequent indirect branches from being influenced by branch history prior to the IBHF. IBHF is only effective while BHI_DIS_S is enabled. Add the IBHF instruction to cBPF jitted code's exit path. Add the new fence when the hardware mitigation is enabled (i.e., X86_FEATURE_CLEAR_BHB_HW is set) or after the software sequence (X86_FEATURE_CLEAR_BHB_LOOP) is being used in a virtual machine. Note that X86_FEATURE_CLEAR_BHB_HW and X86_FEATURE_CLEAR_BHB_LOOP are mutually exclusive, so the JIT compiler will only emit the new fence, not the SW sequence, when X86_FEATURE_CLEAR_BHB_HW is set. Hardware that enumerates BHI_NO basically has BHI_DIS_S protections always enabled, regardless of the value of BHI_DIS_S. Since BHI_DIS_S doesn't protect against intra-mode attacks, enumerate BHI bug on BHI_NO hardware as well. Signed-off-by: Daniel Sneddon Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Acked-by: Daniel Borkmann Reviewed-by: Alexandre Chartre --- arch/x86/kernel/cpu/common.c | 9 ++++++--- arch/x86/net/bpf_jit_comp.c | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 12126adbc3a9a7..5ab13d9241c0ad 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1439,9 +1439,12 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (vulnerable_to_rfds(x86_arch_cap_msr)) setup_force_cpu_bug(X86_BUG_RFDS); - /* When virtualized, eIBRS could be hidden, assume vulnerable */ - if (!(x86_arch_cap_msr & ARCH_CAP_BHI_NO) && - !cpu_matches(cpu_vuln_whitelist, NO_BHI) && + /* + * Intel parts with eIBRS are vulnerable to BHI attacks. Parts with + * BHI_NO still need to use the BHI mitigation to prevent Intra-mode + * attacks. When virtualized, eIBRS could be hidden, assume vulnerable. + */ + if (!cpu_matches(cpu_vuln_whitelist, NO_BHI) && (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) || boot_cpu_has(X86_FEATURE_HYPERVISOR))) setup_force_cpu_bug(X86_BUG_BHI); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 6fb786c8b2aa38..e472572392ef6c 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -41,6 +41,8 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) #define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) +#define EMIT5(b1, b2, b3, b4, b5) \ + do { EMIT1(b1); EMIT4(b2, b3, b4, b5); } while (0) #define EMIT1_off32(b1, off) \ do { EMIT1(b1); EMIT(off, 4); } while (0) @@ -1522,6 +1524,23 @@ static int emit_spectre_bhb_barrier(u8 **pprog, u8 *ip, EMIT1(0x59); /* pop rcx */ EMIT1(0x58); /* pop rax */ } + /* Insert IBHF instruction */ + if ((cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_LOOP) && + cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) || + (cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_HW) && + IS_ENABLED(CONFIG_X86_64))) { + /* + * Add an Indirect Branch History Fence (IBHF). IBHF acts as a + * fence preventing branch history from before the fence from + * affecting indirect branches after the fence. This is + * specifically used in cBPF jitted code to prevent Intra-mode + * BHI attacks. The IBHF instruction is designed to be a NOP on + * hardware that doesn't need or support it. The REP and REX.W + * prefixes are required by the microcode, and they also ensure + * that the NOP is unlikely to be used in existing code. + */ + EMIT5(0xF3, 0x48, 0x0F, 0x1E, 0xF8); /* ibhf */ + } *pprog = prog; return 0; } From 073fdbe02c69c43fb7c0d547ec265c7747d4a646 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 5 May 2025 14:35:12 -0700 Subject: [PATCH 1502/2924] x86/bhi: Do not set BHI_DIS_S in 32-bit mode With the possibility of intra-mode BHI via cBPF, complete mitigation for BHI is to use IBHF (history fence) instruction with BHI_DIS_S set. Since this new instruction is only available in 64-bit mode, setting BHI_DIS_S in 32-bit mode is only a partial mitigation. Do not set BHI_DIS_S in 32-bit mode so as to avoid reporting misleading mitigated status. With this change IBHF won't be used in 32-bit mode, also remove the CONFIG_X86_64 check from emit_spectre_bhb_barrier(). Suggested-by: Josh Poimboeuf Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Josh Poimboeuf Reviewed-by: Alexandre Chartre --- arch/x86/kernel/cpu/bugs.c | 6 +++--- arch/x86/net/bpf_jit_comp.c | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 362602b705cc43..f219f0f4f2d11d 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1697,11 +1697,11 @@ static void __init bhi_select_mitigation(void) return; } - /* Mitigate in hardware if supported */ - if (spec_ctrl_bhi_dis()) + if (!IS_ENABLED(CONFIG_X86_64)) return; - if (!IS_ENABLED(CONFIG_X86_64)) + /* Mitigate in hardware if supported */ + if (spec_ctrl_bhi_dis()) return; if (bhi_mitigation == BHI_MITIGATION_VMEXIT_ONLY) { diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index e472572392ef6c..8a0fabb850b772 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1527,8 +1527,7 @@ static int emit_spectre_bhb_barrier(u8 **pprog, u8 *ip, /* Insert IBHF instruction */ if ((cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_LOOP) && cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) || - (cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_HW) && - IS_ENABLED(CONFIG_X86_64))) { + cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_HW)) { /* * Add an Indirect Branch History Fence (IBHF). IBHF acts as a * fence preventing branch history from before the fence from @@ -1538,6 +1537,8 @@ static int emit_spectre_bhb_barrier(u8 **pprog, u8 *ip, * hardware that doesn't need or support it. The REP and REX.W * prefixes are required by the microcode, and they also ensure * that the NOP is unlikely to be used in existing code. + * + * IBHF is not a valid instruction in 32-bit mode. */ EMIT5(0xF3, 0x48, 0x0F, 0x1E, 0xF8); /* ibhf */ } From 42e31f0daf80d9f7bc4ab4000f2795ec3ddf5206 Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Tue, 6 May 2025 13:10:12 +0200 Subject: [PATCH 1503/2924] mm,mm_init: Mark set_high_memory as __init set_high_memory() touches arch_zone_lowest_possible_pfn which is marked as __initdata, which creates a section mismatch. Since the only user of the function is free_area_init() which is also marked as __init, mark set_high_memory() as __init as well. Signed-off-by: Oscar Salvador Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202505060901.Qcs06UoB-lkp@intel.com/ Link: https://lore.kernel.org/r/20250506111012.108743-1-osalvador@suse.de Signed-off-by: Mike Rapoport (Microsoft) --- mm/mm_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mm_init.c b/mm/mm_init.c index 9659689b8ace01..327764ca0ee4e2 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1786,7 +1786,7 @@ static bool arch_has_descending_max_zone_pfns(void) return IS_ENABLED(CONFIG_ARC) && !IS_ENABLED(CONFIG_ARC_HAS_PAE40); } -static void set_high_memory(void) +static void __init set_high_memory(void) { phys_addr_t highmem = memblock_end_of_DRAM(); From d0706bfd3ee40923c001c6827b786a309e2a8713 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Tue, 6 May 2025 17:10:08 +0200 Subject: [PATCH 1504/2924] RDMA/core: Fix "KASAN: slab-use-after-free Read in ib_register_device" problem Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:408 [inline] print_report+0xc3/0x670 mm/kasan/report.c:521 kasan_report+0xe0/0x110 mm/kasan/report.c:634 strlen+0x93/0xa0 lib/string.c:420 __fortify_strlen include/linux/fortify-string.h:268 [inline] get_kobj_path_length lib/kobject.c:118 [inline] kobject_get_path+0x3f/0x2a0 lib/kobject.c:158 kobject_uevent_env+0x289/0x1870 lib/kobject_uevent.c:545 ib_register_device drivers/infiniband/core/device.c:1472 [inline] ib_register_device+0x8cf/0xe00 drivers/infiniband/core/device.c:1393 rxe_register_device+0x275/0x320 drivers/infiniband/sw/rxe/rxe_verbs.c:1552 rxe_net_add+0x8e/0xe0 drivers/infiniband/sw/rxe/rxe_net.c:550 rxe_newlink+0x70/0x190 drivers/infiniband/sw/rxe/rxe.c:225 nldev_newlink+0x3a3/0x680 drivers/infiniband/core/nldev.c:1796 rdma_nl_rcv_msg+0x387/0x6e0 drivers/infiniband/core/netlink.c:195 rdma_nl_rcv_skb.constprop.0.isra.0+0x2e5/0x450 netlink_unicast_kernel net/netlink/af_netlink.c:1313 [inline] netlink_unicast+0x53a/0x7f0 net/netlink/af_netlink.c:1339 netlink_sendmsg+0x8d1/0xdd0 net/netlink/af_netlink.c:1883 sock_sendmsg_nosec net/socket.c:712 [inline] __sock_sendmsg net/socket.c:727 [inline] ____sys_sendmsg+0xa95/0xc70 net/socket.c:2566 ___sys_sendmsg+0x134/0x1d0 net/socket.c:2620 __sys_sendmsg+0x16d/0x220 net/socket.c:2652 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xcd/0x260 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f This problem is similar to the problem that the commit 1d6a9e7449e2 ("RDMA/core: Fix use-after-free when rename device name") fixes. The root cause is: the function ib_device_rename() renames the name with lock. But in the function kobject_uevent(), this name is accessed without lock protection at the same time. The solution is to add the lock protection when this name is accessed in the function kobject_uevent(). Fixes: 779e0bf47632 ("RDMA/core: Do not indicate device ready when device enablement fails") Link: https://patch.msgid.link/r/20250506151008.75701-1-yanjun.zhu@linux.dev Reported-by: syzbot+e2ce9e275ecc70a30b72@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=e2ce9e275ecc70a30b72 Signed-off-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index b4e3e4beb7f455..d4263385850a7a 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1352,6 +1352,9 @@ static void ib_device_notify_register(struct ib_device *device) down_read(&devices_rwsem); + /* Mark for userspace that device is ready */ + kobject_uevent(&device->dev.kobj, KOBJ_ADD); + ret = rdma_nl_notify_event(device, 0, RDMA_REGISTER_EVENT); if (ret) goto out; @@ -1468,10 +1471,9 @@ int ib_register_device(struct ib_device *device, const char *name, return ret; } dev_set_uevent_suppress(&device->dev, false); - /* Mark for userspace that device is ready */ - kobject_uevent(&device->dev.kobj, KOBJ_ADD); ib_device_notify_register(device); + ib_device_put(device); return 0; From c1d9dac0db168198b6f63f460665256dedad9b6e Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 2 May 2025 16:40:31 -0600 Subject: [PATCH 1505/2924] vfio/pci: Align huge faults to order The vfio-pci huge_fault handler doesn't make any attempt to insert a mapping containing the faulting address, it only inserts mappings if the faulting address and resulting pfn are aligned. This works in a lot of cases, particularly in conjunction with QEMU where DMA mappings linearly fault the mmap. However, there are configurations where we don't get that linear faulting and pages are faulted on-demand. The scenario reported in the bug below is such a case, where the physical address width of the CPU is greater than that of the IOMMU, resulting in a VM where guest firmware has mapped device MMIO beyond the address width of the IOMMU. In this configuration, the MMIO is faulted on demand and tracing indicates that occasionally the faults generate a VM_FAULT_OOM. Given the use case, this results in a "error: kvm run failed Bad address", killing the VM. The host is not under memory pressure in this test, therefore it's suspected that VM_FAULT_OOM is actually the result of a NULL return from __pte_offset_map_lock() in the get_locked_pte() path from insert_pfn(). This suggests a potential race inserting a pte concurrent to a pmd, and maybe indicates some deficiency in the mm layer properly handling such a case. Nevertheless, Peter noted the inconsistency of vfio-pci's huge_fault handler where our mapping granularity depends on the alignment of the faulting address relative to the order rather than aligning the faulting address to the order to more consistently insert huge mappings. This change not only uses the page tables more consistently and efficiently, but as any fault to an aligned page results in the same mapping, the race condition suspected in the VM_FAULT_OOM is avoided. Reported-by: Adolfo Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220057 Fixes: 09dfc8a5f2ce ("vfio/pci: Fallback huge faults for unaligned pfn") Cc: stable@vger.kernel.org Tested-by: Adolfo Co-developed-by: Peter Xu Signed-off-by: Peter Xu Link: https://lore.kernel.org/r/20250502224035.3183451-1-alex.williamson@redhat.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 35f9046af315ff..6328c3a05bcdd4 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1646,14 +1646,14 @@ static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf, { struct vm_area_struct *vma = vmf->vma; struct vfio_pci_core_device *vdev = vma->vm_private_data; - unsigned long pfn, pgoff = vmf->pgoff - vma->vm_pgoff; + unsigned long addr = vmf->address & ~((PAGE_SIZE << order) - 1); + unsigned long pgoff = (addr - vma->vm_start) >> PAGE_SHIFT; + unsigned long pfn = vma_to_pfn(vma) + pgoff; vm_fault_t ret = VM_FAULT_SIGBUS; - pfn = vma_to_pfn(vma) + pgoff; - - if (order && (pfn & ((1 << order) - 1) || - vmf->address & ((PAGE_SIZE << order) - 1) || - vmf->address + (PAGE_SIZE << order) > vma->vm_end)) { + if (order && (addr < vma->vm_start || + addr + (PAGE_SIZE << order) > vma->vm_end || + pfn & ((1 << order) - 1))) { ret = VM_FAULT_FALLBACK; goto out; } From 023c1f2f0609218103cbcb48e0104b144d4a16dc Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Thu, 24 Apr 2025 18:01:42 +0530 Subject: [PATCH 1506/2924] wifi: cfg80211: fix out-of-bounds access during multi-link element defragmentation Currently during the multi-link element defragmentation process, the multi-link element length added to the total IEs length when calculating the length of remaining IEs after the multi-link element in cfg80211_defrag_mle(). This could lead to out-of-bounds access if the multi-link element or its corresponding fragment elements are the last elements in the IEs buffer. To address this issue, correctly calculate the remaining IEs length by deducting the multi-link element end offset from total IEs end offset. Cc: stable@vger.kernel.org Fixes: 2481b5da9c6b ("wifi: cfg80211: handle BSS data contained in ML probe responses") Signed-off-by: Veerendranath Jakkam Link: https://patch.msgid.link/20250424-fix_mle_defragmentation_oob_access-v1-1-84412a1743fa@quicinc.com Signed-off-by: Johannes Berg --- net/wireless/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 9865f305275da6..ddd3a97f6609d1 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -2681,7 +2681,7 @@ cfg80211_defrag_mle(const struct element *mle, const u8 *ie, size_t ielen, /* Required length for first defragmentation */ buf_len = mle->datalen - 1; for_each_element(elem, mle->data + mle->datalen, - ielen - sizeof(*mle) + mle->datalen) { + ie + ielen - mle->data - mle->datalen) { if (elem->id != WLAN_EID_FRAGMENT) break; From e12a42f64fc3d74872b349eedd47f90c6676b78a Mon Sep 17 00:00:00 2001 From: Michael-CY Lee Date: Mon, 5 May 2025 16:19:46 +0800 Subject: [PATCH 1507/2924] wifi: mac80211: fix the type of status_code for negotiated TID to Link Mapping The status code should be type of __le16. Fixes: 83e897a961b8 ("wifi: ieee80211: add definitions for negotiated TID to Link map") Fixes: 8f500fbc6c65 ("wifi: mac80211: process and save negotiated TID to Link mapping request") Signed-off-by: Michael-CY Lee Link: https://patch.msgid.link/20250505081946.3927214-1-michael-cy.lee@mediatek.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 +- net/mac80211/mlme.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 508d466de1cc24..457b4fba88bd00 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1526,7 +1526,7 @@ struct ieee80211_mgmt { struct { u8 action_code; u8 dialog_token; - u8 status_code; + __le16 status_code; u8 variable[]; } __packed ttlm_res; struct { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5d1f2d6d09ad02..35eaf0812c5b24 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7675,6 +7675,7 @@ ieee80211_send_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.ttlm_res); int ttlm_max_len = 2 + 1 + sizeof(struct ieee80211_ttlm_elem) + 1 + 2 * 2 * IEEE80211_TTLM_NUM_TIDS; + u16 status_code; skb = dev_alloc_skb(local->tx_headroom + hdr_len + ttlm_max_len); if (!skb) @@ -7697,19 +7698,18 @@ ieee80211_send_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, WARN_ON(1); fallthrough; case NEG_TTLM_RES_REJECT: - mgmt->u.action.u.ttlm_res.status_code = - WLAN_STATUS_DENIED_TID_TO_LINK_MAPPING; + status_code = WLAN_STATUS_DENIED_TID_TO_LINK_MAPPING; break; case NEG_TTLM_RES_ACCEPT: - mgmt->u.action.u.ttlm_res.status_code = WLAN_STATUS_SUCCESS; + status_code = WLAN_STATUS_SUCCESS; break; case NEG_TTLM_RES_SUGGEST_PREFERRED: - mgmt->u.action.u.ttlm_res.status_code = - WLAN_STATUS_PREF_TID_TO_LINK_MAPPING_SUGGESTED; + status_code = WLAN_STATUS_PREF_TID_TO_LINK_MAPPING_SUGGESTED; ieee80211_neg_ttlm_add_suggested_map(skb, neg_ttlm); break; } + mgmt->u.action.u.ttlm_res.status_code = cpu_to_le16(status_code); ieee80211_tx_skb(sdata, skb); } @@ -7875,7 +7875,7 @@ void ieee80211_process_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, * This can be better implemented in the future, to handle request * rejections. */ - if (mgmt->u.action.u.ttlm_res.status_code != WLAN_STATUS_SUCCESS) + if (le16_to_cpu(mgmt->u.action.u.ttlm_res.status_code) != WLAN_STATUS_SUCCESS) __ieee80211_disconnect(sdata); } From ebedf8b7f05b9c886d68d63025db8d1b12343157 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 6 May 2025 21:42:59 +0200 Subject: [PATCH 1508/2924] wifi: iwlwifi: add support for Killer on MTL For now, we need another entry for these devices, this will be changed completely for 6.16. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219926 Link: https://patch.msgid.link/20250506214258.2efbdc9e9a82.I31915ec252bd1c74bd53b89a0e214e42a74b6f2e@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index debeea2b3ae577..00056e76ea3dd6 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -588,6 +588,8 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { IWL_DEV_INFO(0x7A70, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name), IWL_DEV_INFO(0x7AF0, 0x1691, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690s_name), IWL_DEV_INFO(0x7AF0, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name), + IWL_DEV_INFO(0x7F70, 0x1691, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690s_name), + IWL_DEV_INFO(0x7F70, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name), IWL_DEV_INFO(0x271C, 0x0214, iwl9260_2ac_cfg, iwl9260_1_name), IWL_DEV_INFO(0x7E40, 0x1691, iwl_cfg_ma, iwl_ax411_killer_1690s_name), From 19f5ca461d5fc09bdf93a9f8e4bd78ed3a49dc71 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 2 May 2025 16:02:33 +0200 Subject: [PATCH 1509/2924] objtool/rust: add one more `noreturn` Rust function for Rust 1.87.0 Starting with Rust 1.87.0 (expected 2025-05-15), `objtool` may report: rust/core.o: warning: objtool: _R..._4core9panicking9panic_fmt() falls through to next function _R..._4core9panicking18panic_nounwind_fmt() rust/core.o: warning: objtool: _R..._4core9panicking18panic_nounwind_fmt() falls through to next function _R..._4core9panicking5panic() The reason is that `rust_begin_unwind` is now mangled: _R..._7___rustc17rust_begin_unwind Thus add the mangled one to the list so that `objtool` knows it is actually `noreturn`. See commit 56d680dd23c3 ("objtool/rust: list `noreturn` Rust functions") for more details. Alternatively, we could remove the fixed one in `noreturn.h` and relax this test to cover both, but it seems best to be strict as long as we can. Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). Cc: Josh Poimboeuf Cc: Peter Zijlstra Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250502140237.1659624-2-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- tools/objtool/check.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 3a411064fa34b6..b21b12ec88d960 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -227,6 +227,7 @@ static bool is_rust_noreturn(const struct symbol *func) str_ends_with(func->name, "_4core9panicking19assert_failed_inner") || str_ends_with(func->name, "_4core9panicking30panic_null_pointer_dereference") || str_ends_with(func->name, "_4core9panicking36panic_misaligned_pointer_dereference") || + str_ends_with(func->name, "_7___rustc17rust_begin_unwind") || strstr(func->name, "_4core9panicking13assert_failed") || strstr(func->name, "_4core9panicking11panic_const24panic_const_") || (strstr(func->name, "_4core5slice5index24slice_") && From a39f3087092716f2bd531d6fdc20403c3dc2a879 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 2 May 2025 16:02:34 +0200 Subject: [PATCH 1510/2924] rust: allow Rust 1.87.0's `clippy::ptr_eq` lint Starting with Rust 1.87.0 (expected 2025-05-15) [1], Clippy may expand the `ptr_eq` lint, e.g.: error: use `core::ptr::eq` when comparing raw pointers --> rust/kernel/list.rs:438:12 | 438 | if self.first == item { | ^^^^^^^^^^^^^^^^^^ help: try: `core::ptr::eq(self.first, item)` | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#ptr_eq = note: `-D clippy::ptr-eq` implied by `-D warnings` = help: to override `-D warnings` add `#[allow(clippy::ptr_eq)]` It is expected that a PR to relax the lint will be backported [2] by the time Rust 1.87.0 releases, since the lint was considered too eager (at least by default) [3]. Thus allow the lint temporarily just in case. Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). Link: https://github.com/rust-lang/rust-clippy/pull/14339 [1] Link: https://github.com/rust-lang/rust-clippy/pull/14526 [2] Link: https://github.com/rust-lang/rust-clippy/issues/14525 [3] Link: https://lore.kernel.org/r/20250502140237.1659624-3-ojeda@kernel.org [ Converted to `allow`s since backport was confirmed. - Miguel ] Signed-off-by: Miguel Ojeda --- rust/kernel/alloc/kvec.rs | 3 +++ rust/kernel/list.rs | 3 +++ 2 files changed, 6 insertions(+) diff --git a/rust/kernel/alloc/kvec.rs b/rust/kernel/alloc/kvec.rs index ae9d072741cedb..87a71fd40c3cad 100644 --- a/rust/kernel/alloc/kvec.rs +++ b/rust/kernel/alloc/kvec.rs @@ -2,6 +2,9 @@ //! Implementation of [`Vec`]. +// May not be needed in Rust 1.87.0 (pending beta backport). +#![allow(clippy::ptr_eq)] + use super::{ allocator::{KVmalloc, Kmalloc, Vmalloc}, layout::ArrayLayout, diff --git a/rust/kernel/list.rs b/rust/kernel/list.rs index a335c3b1ff5e48..2054682c5724ce 100644 --- a/rust/kernel/list.rs +++ b/rust/kernel/list.rs @@ -4,6 +4,9 @@ //! A linked list implementation. +// May not be needed in Rust 1.87.0 (pending beta backport). +#![allow(clippy::ptr_eq)] + use crate::sync::ArcBorrow; use crate::types::Opaque; use core::iter::{DoubleEndedIterator, FusedIterator}; From 7129ea6e242b00938532537da41ddf5fa3e21471 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 2 May 2025 16:02:35 +0200 Subject: [PATCH 1511/2924] rust: clean Rust 1.88.0's `unnecessary_transmutes` lint Starting with Rust 1.88.0 (expected 2025-06-26) [1][2], `rustc` may introduce a new lint that catches unnecessary transmutes, e.g.: error: unnecessary transmute --> rust/uapi/uapi_generated.rs:23242:18 | 23242 | unsafe { ::core::mem::transmute(self._bitfield_1.get(0usize, 1u8) as u8) } | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: replace this with: `(self._bitfield_1.get(0usize, 1u8) as u8 == 1)` | = note: `-D unnecessary-transmutes` implied by `-D warnings` = help: to override `-D warnings` add `#[allow(unnecessary_transmutes)]` There are a lot of them (at least 300), but luckily they are all in `bindgen`-generated code. Thus clean all up by allowing it there. Since unknown lints trigger a lint itself in older compilers, do it conditionally so that we can keep the `unknown_lints` lint enabled. Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). Link: https://github.com/rust-lang/rust/pull/136083 [1] Link: https://github.com/rust-lang/rust/issues/136067 [2] Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250502140237.1659624-4-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- init/Kconfig | 3 +++ rust/bindings/lib.rs | 1 + rust/uapi/lib.rs | 1 + 3 files changed, 5 insertions(+) diff --git a/init/Kconfig b/init/Kconfig index 63f5974b9fa6ea..4cdd1049283c15 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -140,6 +140,9 @@ config LD_CAN_USE_KEEP_IN_OVERLAY config RUSTC_HAS_COERCE_POINTEE def_bool RUSTC_VERSION >= 108400 +config RUSTC_HAS_UNNECESSARY_TRANSMUTES + def_bool RUSTC_VERSION >= 108800 + config PAHOLE_VERSION int default $(shell,$(srctree)/scripts/pahole-version.sh $(PAHOLE)) diff --git a/rust/bindings/lib.rs b/rust/bindings/lib.rs index 014af0d1fc70cb..a08eb5518cac5d 100644 --- a/rust/bindings/lib.rs +++ b/rust/bindings/lib.rs @@ -26,6 +26,7 @@ #[allow(dead_code)] #[allow(clippy::undocumented_unsafe_blocks)] +#[cfg_attr(CONFIG_RUSTC_HAS_UNNECESSARY_TRANSMUTES, allow(unnecessary_transmutes))] mod bindings_raw { // Manual definition for blocklisted types. type __kernel_size_t = usize; diff --git a/rust/uapi/lib.rs b/rust/uapi/lib.rs index 13495910271faf..c98d7a8cde77da 100644 --- a/rust/uapi/lib.rs +++ b/rust/uapi/lib.rs @@ -24,6 +24,7 @@ unreachable_pub, unsafe_op_in_unsafe_fn )] +#![cfg_attr(CONFIG_RUSTC_HAS_UNNECESSARY_TRANSMUTES, allow(unnecessary_transmutes))] // Manual definition of blocklisted types. type __kernel_size_t = usize; From c016722fd57551f8a6fcf472c9d2bcf2130ea0ec Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 2 May 2025 16:02:36 +0200 Subject: [PATCH 1512/2924] rust: clean Rust 1.88.0's warning about `clippy::disallowed_macros` configuration Starting with Rust 1.88.0 (expected 2025-06-26) [1], Clippy may start warning about paths that do not resolve in the `disallowed_macros` configuration: warning: `kernel::dbg` does not refer to an existing macro --> .clippy.toml:10:5 | 10 | { path = "kernel::dbg", reason = "the `dbg!` macro is intended as a debugging tool" }, | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This is a lint we requested at [2], due to the trouble debugging the lint due to false negatives (e.g. [3]), which we use to emulate `clippy::dbg_macro` [4]. See commit 8577c9dca799 ("rust: replace `clippy::dbg_macro` with `disallowed_macros`") for more details. Given the false negatives are not resolved yet, it is expected that Clippy complains about not finding this macro. Thus, until the false negatives are fixed (and, even then, probably we will need to wait for the MSRV to raise enough), use the escape hatch to allow an invalid path. Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). Link: https://github.com/rust-lang/rust-clippy/pull/14397 [1] Link: https://github.com/rust-lang/rust-clippy/issues/11432 [2] Link: https://github.com/rust-lang/rust-clippy/issues/11431 [3] Link: https://github.com/rust-lang/rust-clippy/issues/11303 [4] Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250502140237.1659624-5-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- .clippy.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.clippy.toml b/.clippy.toml index 815c94732ed785..137f41d203de37 100644 --- a/.clippy.toml +++ b/.clippy.toml @@ -7,5 +7,5 @@ check-private-items = true disallowed-macros = [ # The `clippy::dbg_macro` lint only works with `std::dbg!`, thus we simulate # it here, see: https://github.com/rust-lang/rust-clippy/issues/11303. - { path = "kernel::dbg", reason = "the `dbg!` macro is intended as a debugging tool" }, + { path = "kernel::dbg", reason = "the `dbg!` macro is intended as a debugging tool", allow-invalid = true }, ] From 211dcf77856db64c73e0c3b9ce0c624ec855daca Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 2 May 2025 16:02:37 +0200 Subject: [PATCH 1513/2924] rust: clean Rust 1.88.0's `clippy::uninlined_format_args` lint Starting with Rust 1.88.0 (expected 2025-06-26) [1], `rustc` may move back the `uninlined_format_args` to `style` from `pedantic` (it was there waiting for rust-analyzer suppotr), and thus we will start to see lints like: warning: variables can be used directly in the `format!` string --> rust/macros/kunit.rs:105:37 | 105 | let kunit_wrapper_fn_name = format!("kunit_rust_wrapper_{}", test); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#uninlined_format_args help: change this to | 105 - let kunit_wrapper_fn_name = format!("kunit_rust_wrapper_{}", test); 105 + let kunit_wrapper_fn_name = format!("kunit_rust_wrapper_{test}"); There is even a case that is a pure removal: warning: variables can be used directly in the `format!` string --> rust/macros/module.rs:51:13 | 51 | format!("{field}={content}\0", field = field, content = content) | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#uninlined_format_args help: change this to | 51 - format!("{field}={content}\0", field = field, content = content) 51 + format!("{field}={content}\0") The lints all seem like nice cleanups, thus just apply them. We may want to disable `allow-mixed-uninlined-format-args` in the future. Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). Link: https://github.com/rust-lang/rust-clippy/pull/14160 [1] Acked-by: Benno Lossin Reviewed-by: Tamir Duberstein Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250502140237.1659624-6-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- drivers/gpu/nova-core/gpu.rs | 2 +- rust/kernel/str.rs | 46 +++++++++++------------ rust/macros/kunit.rs | 13 ++----- rust/macros/module.rs | 19 +++------- rust/macros/paste.rs | 2 +- rust/pin-init/internal/src/pinned_drop.rs | 3 +- 6 files changed, 35 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index 17c9660da45034..ab0e5a72a05992 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -93,7 +93,7 @@ impl Chipset { // For now, redirect to fmt::Debug for convenience. impl fmt::Display for Chipset { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{:?}", self) + write!(f, "{self:?}") } } diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs index 878111cb77bc84..fb61ce81ea2868 100644 --- a/rust/kernel/str.rs +++ b/rust/kernel/str.rs @@ -73,7 +73,7 @@ impl fmt::Display for BStr { b'\r' => f.write_str("\\r")?, // Printable characters. 0x20..=0x7e => f.write_char(b as char)?, - _ => write!(f, "\\x{:02x}", b)?, + _ => write!(f, "\\x{b:02x}")?, } } Ok(()) @@ -109,7 +109,7 @@ impl fmt::Debug for BStr { b'\\' => f.write_str("\\\\")?, // Printable characters. 0x20..=0x7e => f.write_char(b as char)?, - _ => write!(f, "\\x{:02x}", b)?, + _ => write!(f, "\\x{b:02x}")?, } } f.write_char('"') @@ -447,7 +447,7 @@ impl fmt::Display for CStr { // Printable character. f.write_char(c as char)?; } else { - write!(f, "\\x{:02x}", c)?; + write!(f, "\\x{c:02x}")?; } } Ok(()) @@ -479,7 +479,7 @@ impl fmt::Debug for CStr { // Printable characters. b'\"' => f.write_str("\\\"")?, 0x20..=0x7e => f.write_char(c as char)?, - _ => write!(f, "\\x{:02x}", c)?, + _ => write!(f, "\\x{c:02x}")?, } } f.write_str("\"") @@ -641,13 +641,13 @@ mod tests { #[test] fn test_cstr_display() { let hello_world = CStr::from_bytes_with_nul(b"hello, world!\0").unwrap(); - assert_eq!(format!("{}", hello_world), "hello, world!"); + assert_eq!(format!("{hello_world}"), "hello, world!"); let non_printables = CStr::from_bytes_with_nul(b"\x01\x09\x0a\0").unwrap(); - assert_eq!(format!("{}", non_printables), "\\x01\\x09\\x0a"); + assert_eq!(format!("{non_printables}"), "\\x01\\x09\\x0a"); let non_ascii = CStr::from_bytes_with_nul(b"d\xe9j\xe0 vu\0").unwrap(); - assert_eq!(format!("{}", non_ascii), "d\\xe9j\\xe0 vu"); + assert_eq!(format!("{non_ascii}"), "d\\xe9j\\xe0 vu"); let good_bytes = CStr::from_bytes_with_nul(b"\xf0\x9f\xa6\x80\0").unwrap(); - assert_eq!(format!("{}", good_bytes), "\\xf0\\x9f\\xa6\\x80"); + assert_eq!(format!("{good_bytes}"), "\\xf0\\x9f\\xa6\\x80"); } #[test] @@ -658,47 +658,47 @@ mod tests { bytes[i as usize] = i.wrapping_add(1); } let cstr = CStr::from_bytes_with_nul(&bytes).unwrap(); - assert_eq!(format!("{}", cstr), ALL_ASCII_CHARS); + assert_eq!(format!("{cstr}"), ALL_ASCII_CHARS); } #[test] fn test_cstr_debug() { let hello_world = CStr::from_bytes_with_nul(b"hello, world!\0").unwrap(); - assert_eq!(format!("{:?}", hello_world), "\"hello, world!\""); + assert_eq!(format!("{hello_world:?}"), "\"hello, world!\""); let non_printables = CStr::from_bytes_with_nul(b"\x01\x09\x0a\0").unwrap(); - assert_eq!(format!("{:?}", non_printables), "\"\\x01\\x09\\x0a\""); + assert_eq!(format!("{non_printables:?}"), "\"\\x01\\x09\\x0a\""); let non_ascii = CStr::from_bytes_with_nul(b"d\xe9j\xe0 vu\0").unwrap(); - assert_eq!(format!("{:?}", non_ascii), "\"d\\xe9j\\xe0 vu\""); + assert_eq!(format!("{non_ascii:?}"), "\"d\\xe9j\\xe0 vu\""); let good_bytes = CStr::from_bytes_with_nul(b"\xf0\x9f\xa6\x80\0").unwrap(); - assert_eq!(format!("{:?}", good_bytes), "\"\\xf0\\x9f\\xa6\\x80\""); + assert_eq!(format!("{good_bytes:?}"), "\"\\xf0\\x9f\\xa6\\x80\""); } #[test] fn test_bstr_display() { let hello_world = BStr::from_bytes(b"hello, world!"); - assert_eq!(format!("{}", hello_world), "hello, world!"); + assert_eq!(format!("{hello_world}"), "hello, world!"); let escapes = BStr::from_bytes(b"_\t_\n_\r_\\_\'_\"_"); - assert_eq!(format!("{}", escapes), "_\\t_\\n_\\r_\\_'_\"_"); + assert_eq!(format!("{escapes}"), "_\\t_\\n_\\r_\\_'_\"_"); let others = BStr::from_bytes(b"\x01"); - assert_eq!(format!("{}", others), "\\x01"); + assert_eq!(format!("{others}"), "\\x01"); let non_ascii = BStr::from_bytes(b"d\xe9j\xe0 vu"); - assert_eq!(format!("{}", non_ascii), "d\\xe9j\\xe0 vu"); + assert_eq!(format!("{non_ascii}"), "d\\xe9j\\xe0 vu"); let good_bytes = BStr::from_bytes(b"\xf0\x9f\xa6\x80"); - assert_eq!(format!("{}", good_bytes), "\\xf0\\x9f\\xa6\\x80"); + assert_eq!(format!("{good_bytes}"), "\\xf0\\x9f\\xa6\\x80"); } #[test] fn test_bstr_debug() { let hello_world = BStr::from_bytes(b"hello, world!"); - assert_eq!(format!("{:?}", hello_world), "\"hello, world!\""); + assert_eq!(format!("{hello_world:?}"), "\"hello, world!\""); let escapes = BStr::from_bytes(b"_\t_\n_\r_\\_\'_\"_"); - assert_eq!(format!("{:?}", escapes), "\"_\\t_\\n_\\r_\\\\_'_\\\"_\""); + assert_eq!(format!("{escapes:?}"), "\"_\\t_\\n_\\r_\\\\_'_\\\"_\""); let others = BStr::from_bytes(b"\x01"); - assert_eq!(format!("{:?}", others), "\"\\x01\""); + assert_eq!(format!("{others:?}"), "\"\\x01\""); let non_ascii = BStr::from_bytes(b"d\xe9j\xe0 vu"); - assert_eq!(format!("{:?}", non_ascii), "\"d\\xe9j\\xe0 vu\""); + assert_eq!(format!("{non_ascii:?}"), "\"d\\xe9j\\xe0 vu\""); let good_bytes = BStr::from_bytes(b"\xf0\x9f\xa6\x80"); - assert_eq!(format!("{:?}", good_bytes), "\"\\xf0\\x9f\\xa6\\x80\""); + assert_eq!(format!("{good_bytes:?}"), "\"\\xf0\\x9f\\xa6\\x80\""); } } diff --git a/rust/macros/kunit.rs b/rust/macros/kunit.rs index 4f553ecf40c0a7..99ccac82edde3a 100644 --- a/rust/macros/kunit.rs +++ b/rust/macros/kunit.rs @@ -15,10 +15,7 @@ pub(crate) fn kunit_tests(attr: TokenStream, ts: TokenStream) -> TokenStream { } if attr.len() > 255 { - panic!( - "The test suite name `{}` exceeds the maximum length of 255 bytes", - attr - ) + panic!("The test suite name `{attr}` exceeds the maximum length of 255 bytes") } let mut tokens: Vec<_> = ts.into_iter().collect(); @@ -102,16 +99,14 @@ pub(crate) fn kunit_tests(attr: TokenStream, ts: TokenStream) -> TokenStream { let mut kunit_macros = "".to_owned(); let mut test_cases = "".to_owned(); for test in &tests { - let kunit_wrapper_fn_name = format!("kunit_rust_wrapper_{}", test); + let kunit_wrapper_fn_name = format!("kunit_rust_wrapper_{test}"); let kunit_wrapper = format!( - "unsafe extern \"C\" fn {}(_test: *mut kernel::bindings::kunit) {{ {}(); }}", - kunit_wrapper_fn_name, test + "unsafe extern \"C\" fn {kunit_wrapper_fn_name}(_test: *mut kernel::bindings::kunit) {{ {test}(); }}" ); writeln!(kunit_macros, "{kunit_wrapper}").unwrap(); writeln!( test_cases, - " kernel::kunit::kunit_case(kernel::c_str!(\"{}\"), {}),", - test, kunit_wrapper_fn_name + " kernel::kunit::kunit_case(kernel::c_str!(\"{test}\"), {kunit_wrapper_fn_name})," ) .unwrap(); } diff --git a/rust/macros/module.rs b/rust/macros/module.rs index a9418fbc9b4453..2f66107847f785 100644 --- a/rust/macros/module.rs +++ b/rust/macros/module.rs @@ -48,7 +48,7 @@ impl<'a> ModInfoBuilder<'a> { ) } else { // Loadable modules' modinfo strings go as-is. - format!("{field}={content}\0", field = field, content = content) + format!("{field}={content}\0") }; write!( @@ -126,10 +126,7 @@ impl ModuleInfo { }; if seen_keys.contains(&key) { - panic!( - "Duplicated key \"{}\". Keys can only be specified once.", - key - ); + panic!("Duplicated key \"{key}\". Keys can only be specified once."); } assert_eq!(expect_punct(it), ':'); @@ -143,10 +140,7 @@ impl ModuleInfo { "license" => info.license = expect_string_ascii(it), "alias" => info.alias = Some(expect_string_array(it)), "firmware" => info.firmware = Some(expect_string_array(it)), - _ => panic!( - "Unknown key \"{}\". Valid keys are: {:?}.", - key, EXPECTED_KEYS - ), + _ => panic!("Unknown key \"{key}\". Valid keys are: {EXPECTED_KEYS:?}."), } assert_eq!(expect_punct(it), ','); @@ -158,7 +152,7 @@ impl ModuleInfo { for key in REQUIRED_KEYS { if !seen_keys.iter().any(|e| e == key) { - panic!("Missing required key \"{}\".", key); + panic!("Missing required key \"{key}\"."); } } @@ -170,10 +164,7 @@ impl ModuleInfo { } if seen_keys != ordered_keys { - panic!( - "Keys are not ordered as expected. Order them like: {:?}.", - ordered_keys - ); + panic!("Keys are not ordered as expected. Order them like: {ordered_keys:?}."); } info diff --git a/rust/macros/paste.rs b/rust/macros/paste.rs index 6529a387673fb5..cce712d19855b5 100644 --- a/rust/macros/paste.rs +++ b/rust/macros/paste.rs @@ -50,7 +50,7 @@ fn concat_helper(tokens: &[TokenTree]) -> Vec<(String, Span)> { let tokens = group.stream().into_iter().collect::>(); segments.append(&mut concat_helper(tokens.as_slice())); } - token => panic!("unexpected token in paste segments: {:?}", token), + token => panic!("unexpected token in paste segments: {token:?}"), }; } diff --git a/rust/pin-init/internal/src/pinned_drop.rs b/rust/pin-init/internal/src/pinned_drop.rs index c824dd8b436dfb..c4ca7a70b726a5 100644 --- a/rust/pin-init/internal/src/pinned_drop.rs +++ b/rust/pin-init/internal/src/pinned_drop.rs @@ -28,8 +28,7 @@ pub(crate) fn pinned_drop(_args: TokenStream, input: TokenStream) -> TokenStream // Found the end of the generics, this should be `PinnedDrop`. assert!( matches!(tt, TokenTree::Ident(i) if i.to_string() == "PinnedDrop"), - "expected 'PinnedDrop', found: '{:?}'", - tt + "expected 'PinnedDrop', found: '{tt:?}'" ); pinned_drop_idx = Some(i); break; From 5595c31c370957aabe739ac3996aedba8267603f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Anikiel?= Date: Thu, 10 Apr 2025 11:54:20 +0000 Subject: [PATCH 1514/2924] x86/Kconfig: make CFI_AUTO_DEFAULT depend on !RUST or Rust >= 1.88 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calling core::fmt::write() from rust code while FineIBT is enabled results in a kernel panic: [ 4614.199779] kernel BUG at arch/x86/kernel/cet.c:132! [ 4614.205343] Oops: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI [ 4614.211781] CPU: 2 UID: 0 PID: 6057 Comm: dmabuf_dump Tainted: G U O 6.12.17-android16-0-g6ab38c534a43 #1 9da040f27673ec3945e23b998a0f8bd64c846599 [ 4614.227832] Tainted: [U]=USER, [O]=OOT_MODULE [ 4614.241247] RIP: 0010:do_kernel_cp_fault+0xea/0xf0 ... [ 4614.398144] RIP: 0010:_RNvXs5_NtNtNtCs3o2tGsuHyou_4core3fmt3num3impyNtB9_7Display3fmt+0x0/0x20 [ 4614.407792] Code: 48 f7 df 48 0f 48 f9 48 89 f2 89 c6 5d e9 18 fd ff ff 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 41 81 ea 14 61 af 2c 74 03 0f 0b 90 <66> 0f 1f 00 55 48 89 e5 48 89 f2 48 8b 3f be 01 00 00 00 5d e9 e7 [ 4614.428775] RSP: 0018:ffffb95acfa4ba68 EFLAGS: 00010246 [ 4614.434609] RAX: 0000000000000000 RBX: 0000000000000010 RCX: 0000000000000000 [ 4614.442587] RDX: 0000000000000007 RSI: ffffb95acfa4ba70 RDI: ffffb95acfa4bc88 [ 4614.450557] RBP: ffffb95acfa4bae0 R08: ffff0a00ffffff05 R09: 0000000000000070 [ 4614.458527] R10: 0000000000000000 R11: ffffffffab67eaf0 R12: ffffb95acfa4bcc8 [ 4614.466493] R13: ffffffffac5d50f0 R14: 0000000000000000 R15: 0000000000000000 [ 4614.474473] ? __cfi__RNvXs5_NtNtNtCs3o2tGsuHyou_4core3fmt3num3impyNtB9_7Display3fmt+0x10/0x10 [ 4614.484118] ? _RNvNtCs3o2tGsuHyou_4core3fmt5write+0x1d2/0x250 This happens because core::fmt::write() calls core::fmt::rt::Argument::fmt(), which currently has CFI disabled: library/core/src/fmt/rt.rs: 171 // FIXME: Transmuting formatter in new and indirectly branching to/calling 172 // it here is an explicit CFI violation. 173 #[allow(inline_no_sanitize)] 174 #[no_sanitize(cfi, kcfi)] 175 #[inline] 176 pub(super) unsafe fn fmt(&self, f: &mut Formatter<'_>) -> Result { This causes a Control Protection exception, because FineIBT has sealed off the original function's endbr64. This makes rust currently incompatible with FineIBT. Add a Kconfig dependency that prevents FineIBT from getting turned on by default if rust is enabled. [ Rust 1.88.0 (scheduled for 2025-06-26) should have this fixed [1], and thus we relaxed the condition with Rust >= 1.88. When `objtool` lands checking for this with e.g. [2], the plan is to ideally run that in upstream Rust's CI to prevent regressions early [3], since we do not control `core`'s source code. Alice tested the Rust PR backported to an older compiler. Peter would like that Rust provides a stable `core` which can be pulled into the kernel: "Relying on that much out of tree code is 'unfortunate'". - Miguel ] Signed-off-by: Paweł Anikiel Reviewed-by: Alice Ryhl Acked-by: Peter Zijlstra Link: https://github.com/rust-lang/rust/pull/139632 [1] Link: https://lore.kernel.org/rust-for-linux/20250410154556.GB9003@noisy.programming.kicks-ass.net/ [2] Link: https://github.com/rust-lang/rust/pull/139632#issuecomment-2801950873 [3] Link: https://lore.kernel.org/r/20250410115420.366349-1-panikiel@google.com Link: https://lore.kernel.org/r/att0-CANiq72kjDM0cKALVy4POEzhfdT4nO7tqz0Pm7xM+3=_0+L1t=A@mail.gmail.com [ Reduced splat. - Miguel ] Signed-off-by: Miguel Ojeda --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4b9f378e05f6be..5873c9e39919cb 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2368,6 +2368,7 @@ config STRICT_SIGALTSTACK_SIZE config CFI_AUTO_DEFAULT bool "Attempt to use FineIBT by default at boot time" depends on FINEIBT + depends on !RUST || RUSTC_VERSION >= 108800 default y help Attempt to use FineIBT by default at boot time. If enabled, From 0093cb194a7511d1e68865fa35b763c72e44c2f0 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Mon, 5 May 2025 09:19:38 -0700 Subject: [PATCH 1515/2924] ice: use DSN instead of PCI BDF for ice_adapter index Use Device Serial Number instead of PCI bus/device/function for the index of struct ice_adapter. Functions on the same physical device should point to the very same ice_adapter instance, but with two PFs, when at least one of them is PCI-e passed-through to a VM, it is no longer the case - PFs will get seemingly random PCI BDF values, and thus indices, what finally leds to each of them being on their own instance of ice_adapter. That causes them to don't attempt any synchronization of the PTP HW clock usage, or any other future resources. DSN works nicely in place of the index, as it is "immutable" in terms of virtualization. Fixes: 0e2bddf9e5f9 ("ice: add ice_adapter for shared data across PFs on the same NIC") Suggested-by: Jacob Keller Suggested-by: Jakub Kicinski Suggested-by: Jiri Pirko Reviewed-by: Aleksandr Loktionov Signed-off-by: Przemek Kitszel Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20250505161939.2083581-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_adapter.c | 47 ++++++++------------ drivers/net/ethernet/intel/ice/ice_adapter.h | 6 ++- 2 files changed, 22 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c index 01a08cfd0090ac..66e070095d1bbe 100644 --- a/drivers/net/ethernet/intel/ice/ice_adapter.c +++ b/drivers/net/ethernet/intel/ice/ice_adapter.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only // SPDX-FileCopyrightText: Copyright Red Hat -#include #include #include #include @@ -14,32 +13,16 @@ static DEFINE_XARRAY(ice_adapters); static DEFINE_MUTEX(ice_adapters_mutex); -/* PCI bus number is 8 bits. Slot is 5 bits. Domain can have the rest. */ -#define INDEX_FIELD_DOMAIN GENMASK(BITS_PER_LONG - 1, 13) -#define INDEX_FIELD_DEV GENMASK(31, 16) -#define INDEX_FIELD_BUS GENMASK(12, 5) -#define INDEX_FIELD_SLOT GENMASK(4, 0) - -static unsigned long ice_adapter_index(const struct pci_dev *pdev) +static unsigned long ice_adapter_index(u64 dsn) { - unsigned int domain = pci_domain_nr(pdev->bus); - - WARN_ON(domain > FIELD_MAX(INDEX_FIELD_DOMAIN)); - - switch (pdev->device) { - case ICE_DEV_ID_E825C_BACKPLANE: - case ICE_DEV_ID_E825C_QSFP: - case ICE_DEV_ID_E825C_SFP: - case ICE_DEV_ID_E825C_SGMII: - return FIELD_PREP(INDEX_FIELD_DEV, pdev->device); - default: - return FIELD_PREP(INDEX_FIELD_DOMAIN, domain) | - FIELD_PREP(INDEX_FIELD_BUS, pdev->bus->number) | - FIELD_PREP(INDEX_FIELD_SLOT, PCI_SLOT(pdev->devfn)); - } +#if BITS_PER_LONG == 64 + return dsn; +#else + return (u32)dsn ^ (u32)(dsn >> 32); +#endif } -static struct ice_adapter *ice_adapter_new(void) +static struct ice_adapter *ice_adapter_new(u64 dsn) { struct ice_adapter *adapter; @@ -47,6 +30,7 @@ static struct ice_adapter *ice_adapter_new(void) if (!adapter) return NULL; + adapter->device_serial_number = dsn; spin_lock_init(&adapter->ptp_gltsyn_time_lock); refcount_set(&adapter->refcount, 1); @@ -77,23 +61,26 @@ static void ice_adapter_free(struct ice_adapter *adapter) * Return: Pointer to ice_adapter on success. * ERR_PTR() on error. -ENOMEM is the only possible error. */ -struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev) +struct ice_adapter *ice_adapter_get(struct pci_dev *pdev) { - unsigned long index = ice_adapter_index(pdev); + u64 dsn = pci_get_dsn(pdev); struct ice_adapter *adapter; + unsigned long index; int err; + index = ice_adapter_index(dsn); scoped_guard(mutex, &ice_adapters_mutex) { err = xa_insert(&ice_adapters, index, NULL, GFP_KERNEL); if (err == -EBUSY) { adapter = xa_load(&ice_adapters, index); refcount_inc(&adapter->refcount); + WARN_ON_ONCE(adapter->device_serial_number != dsn); return adapter; } if (err) return ERR_PTR(err); - adapter = ice_adapter_new(); + adapter = ice_adapter_new(dsn); if (!adapter) return ERR_PTR(-ENOMEM); xa_store(&ice_adapters, index, adapter, GFP_KERNEL); @@ -110,11 +97,13 @@ struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev) * * Context: Process, may sleep. */ -void ice_adapter_put(const struct pci_dev *pdev) +void ice_adapter_put(struct pci_dev *pdev) { - unsigned long index = ice_adapter_index(pdev); + u64 dsn = pci_get_dsn(pdev); struct ice_adapter *adapter; + unsigned long index; + index = ice_adapter_index(dsn); scoped_guard(mutex, &ice_adapters_mutex) { adapter = xa_load(&ice_adapters, index); if (WARN_ON(!adapter)) diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.h b/drivers/net/ethernet/intel/ice/ice_adapter.h index e233225848b384..ac15c0d2bc1a47 100644 --- a/drivers/net/ethernet/intel/ice/ice_adapter.h +++ b/drivers/net/ethernet/intel/ice/ice_adapter.h @@ -32,6 +32,7 @@ struct ice_port_list { * @refcount: Reference count. struct ice_pf objects hold the references. * @ctrl_pf: Control PF of the adapter * @ports: Ports list + * @device_serial_number: DSN cached for collision detection on 32bit systems */ struct ice_adapter { refcount_t refcount; @@ -40,9 +41,10 @@ struct ice_adapter { struct ice_pf *ctrl_pf; struct ice_port_list ports; + u64 device_serial_number; }; -struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev); -void ice_adapter_put(const struct pci_dev *pdev); +struct ice_adapter *ice_adapter_get(struct pci_dev *pdev); +void ice_adapter_put(struct pci_dev *pdev); #endif /* _ICE_ADAPTER_H */ From 08e9f2d584c4732180edee4cb2dbfa7586d7d5a3 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Mon, 5 May 2025 22:47:13 +0300 Subject: [PATCH 1516/2924] net: Lock netdevices during dev_shutdown __qdisc_destroy() calls into various qdiscs .destroy() op, which in turn can call .ndo_setup_tc(), which requires the netdev instance lock. This commit extends the critical section in unregister_netdevice_many_notify() to cover dev_shutdown() (and dev_tcx_uninstall() as a side-effect) and acquires the netdev instance lock in __dev_change_net_namespace() for the other dev_shutdown() call. This should now guarantee that for all qdisc ops, the netdev instance lock is held during .ndo_setup_tc(). Fixes: a0527ee2df3f ("net: hold netdev instance lock during qdisc ndo_setup_tc") Signed-off-by: Cosmin Ratiu Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250505194713.1723399-1-cratiu@nvidia.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 1be7cb73a6024f..92e004c354ea0d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11966,9 +11966,9 @@ void unregister_netdevice_many_notify(struct list_head *head, struct sk_buff *skb = NULL; /* Shutdown queueing discipline. */ + netdev_lock_ops(dev); dev_shutdown(dev); dev_tcx_uninstall(dev); - netdev_lock_ops(dev); dev_xdp_uninstall(dev); dev_memory_provider_uninstall(dev); netdev_unlock_ops(dev); @@ -12161,7 +12161,9 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, synchronize_net(); /* Shutdown queueing discipline. */ + netdev_lock_ops(dev); dev_shutdown(dev); + netdev_unlock_ops(dev); /* Notify protocols, that we are about to destroy * this device. They should clean all the things. From 35076d2223c731f7be75af61e67f90807384d030 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Tue, 6 May 2025 18:18:50 +0800 Subject: [PATCH 1517/2924] erofs: ensure the extra temporary copy is valid for shortened bvecs When compressed data deduplication is enabled, multiple logical extents may reference the same compressed physical cluster. The previous commit 94c43de73521 ("erofs: fix wrong primary bvec selection on deduplicated extents") already avoids using shortened bvecs. However, in such cases, the extra temporary buffers also need to be preserved for later use in z_erofs_fill_other_copies() to to prevent data corruption. IOWs, extra temporary buffers have to be retained not only due to varying start relative offsets (`pageofs_out`, as indicated by `pcl->multibases`) but also because of shortened bvecs. android.hardware.graphics.composer@2.1.so : 270696 bytes 0: 0.. 204185 | 204185 : 628019200.. 628084736 | 65536 -> 1: 204185.. 225536 | 21351 : 544063488.. 544129024 | 65536 2: 225536.. 270696 | 45160 : 0.. 0 | 0 com.android.vndk.v28.apex : 93814897 bytes ... 364: 53869896..54095257 | 225361 : 543997952.. 544063488 | 65536 -> 365: 54095257..54309344 | 214087 : 544063488.. 544129024 | 65536 366: 54309344..54514557 | 205213 : 544129024.. 544194560 | 65536 ... Both 204185 and 54095257 have the same start relative offset of 3481, but the logical page 55 of `android.hardware.graphics.composer@2.1.so` ranges from 225280 to 229632, forming a shortened bvec [225280, 225536) that cannot be used for decompressing the range from 54095257 to 54309344 of `com.android.vndk.v28.apex`. Since `pcl->multibases` is already meaningless, just mark `be->keepxcpy` on demand for simplicity. Again, this issue can only lead to data corruption if `-Ededupe` is on. Fixes: 94c43de73521 ("erofs: fix wrong primary bvec selection on deduplicated extents") Reviewed-by: Hongbo Li Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20250506101850.191506-1-hsiangkao@linux.alibaba.com --- fs/erofs/zdata.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 5c061aaeeb45b3..b8e6b76c23d5ee 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -79,9 +79,6 @@ struct z_erofs_pcluster { /* L: whether partial decompression or not */ bool partial; - /* L: indicate several pageofs_outs or not */ - bool multibases; - /* L: whether extra buffer allocations are best-effort */ bool besteffort; @@ -1046,8 +1043,6 @@ static int z_erofs_scan_folio(struct z_erofs_frontend *f, break; erofs_onlinefolio_split(folio); - if (f->pcl->pageofs_out != (map->m_la & ~PAGE_MASK)) - f->pcl->multibases = true; if (f->pcl->length < offset + end - map->m_la) { f->pcl->length = offset + end - map->m_la; f->pcl->pageofs_out = map->m_la & ~PAGE_MASK; @@ -1093,7 +1088,6 @@ struct z_erofs_backend { struct page *onstack_pages[Z_EROFS_ONSTACK_PAGES]; struct super_block *sb; struct z_erofs_pcluster *pcl; - /* pages with the longest decompressed length for deduplication */ struct page **decompressed_pages; /* pages to keep the compressed data */ @@ -1102,6 +1096,8 @@ struct z_erofs_backend { struct list_head decompressed_secondary_bvecs; struct page **pagepool; unsigned int onstack_used, nr_pages; + /* indicate if temporary copies should be preserved for later use */ + bool keepxcpy; }; struct z_erofs_bvec_item { @@ -1112,18 +1108,20 @@ struct z_erofs_bvec_item { static void z_erofs_do_decompressed_bvec(struct z_erofs_backend *be, struct z_erofs_bvec *bvec) { + int poff = bvec->offset + be->pcl->pageofs_out; struct z_erofs_bvec_item *item; - unsigned int pgnr; - - if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK) && - (bvec->end == PAGE_SIZE || - bvec->offset + bvec->end == be->pcl->length)) { - pgnr = (bvec->offset + be->pcl->pageofs_out) >> PAGE_SHIFT; - DBG_BUGON(pgnr >= be->nr_pages); - if (!be->decompressed_pages[pgnr]) { - be->decompressed_pages[pgnr] = bvec->page; + struct page **page; + + if (!(poff & ~PAGE_MASK) && (bvec->end == PAGE_SIZE || + bvec->offset + bvec->end == be->pcl->length)) { + DBG_BUGON((poff >> PAGE_SHIFT) >= be->nr_pages); + page = be->decompressed_pages + (poff >> PAGE_SHIFT); + if (!*page) { + *page = bvec->page; return; } + } else { + be->keepxcpy = true; } /* (cold path) one pcluster is requested multiple times */ @@ -1289,7 +1287,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err) .alg = pcl->algorithmformat, .inplace_io = overlapped, .partial_decoding = pcl->partial, - .fillgaps = pcl->multibases, + .fillgaps = be->keepxcpy, .gfp = pcl->besteffort ? GFP_KERNEL : GFP_NOWAIT | __GFP_NORETRY }, be->pagepool); @@ -1346,7 +1344,6 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err) pcl->length = 0; pcl->partial = true; - pcl->multibases = false; pcl->besteffort = false; pcl->bvset.nextpage = NULL; pcl->vcnt = 0; From 78cd408356fe3edbac66598772fd347bf3e32c1f Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 5 May 2025 18:19:19 -0700 Subject: [PATCH 1518/2924] net: add missing instance lock to dev_set_promiscuity Accidentally spotted while trying to understand what else needs to be renamed to netif_ prefix. Most of the calls to dev_set_promiscuity are adjacent to dev_set_allmulti or dev_disable_lro so it should be safe to add the lock. Note that new netif_set_promiscuity is currently unused, the locked paths call __dev_set_promiscuity directly. Fixes: ad7c7b2172c3 ("net: hold netdev instance lock during sysfs operations") Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250506011919.2882313-1-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + net/core/dev.c | 14 +------------- net/core/dev_api.c | 23 +++++++++++++++++++++++ 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2d11d013cabedb..7ea022750e4e09 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4972,6 +4972,7 @@ static inline void __dev_mc_unsync(struct net_device *dev, /* Functions used for secondary unicast and multicast support */ void dev_set_rx_mode(struct net_device *dev); +int netif_set_promiscuity(struct net_device *dev, int inc); int dev_set_promiscuity(struct net_device *dev, int inc); int netif_set_allmulti(struct net_device *dev, int inc, bool notify); int dev_set_allmulti(struct net_device *dev, int inc); diff --git a/net/core/dev.c b/net/core/dev.c index 92e004c354ea0d..11da1e272ec205 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9193,18 +9193,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify) return 0; } -/** - * dev_set_promiscuity - update promiscuity count on a device - * @dev: device - * @inc: modifier - * - * Add or remove promiscuity from a device. While the count in the device - * remains above zero the interface remains promiscuous. Once it hits zero - * the device reverts back to normal filtering operation. A negative inc - * value is used to drop promiscuity on the device. - * Return 0 if successful or a negative errno code on error. - */ -int dev_set_promiscuity(struct net_device *dev, int inc) +int netif_set_promiscuity(struct net_device *dev, int inc) { unsigned int old_flags = dev->flags; int err; @@ -9216,7 +9205,6 @@ int dev_set_promiscuity(struct net_device *dev, int inc) dev_set_rx_mode(dev); return err; } -EXPORT_SYMBOL(dev_set_promiscuity); int netif_set_allmulti(struct net_device *dev, int inc, bool notify) { diff --git a/net/core/dev_api.c b/net/core/dev_api.c index 90898cd540ced7..f9a160ab596f3a 100644 --- a/net/core/dev_api.c +++ b/net/core/dev_api.c @@ -267,6 +267,29 @@ void dev_disable_lro(struct net_device *dev) } EXPORT_SYMBOL(dev_disable_lro); +/** + * dev_set_promiscuity() - update promiscuity count on a device + * @dev: device + * @inc: modifier + * + * Add or remove promiscuity from a device. While the count in the device + * remains above zero the interface remains promiscuous. Once it hits zero + * the device reverts back to normal filtering operation. A negative inc + * value is used to drop promiscuity on the device. + * Return 0 if successful or a negative errno code on error. + */ +int dev_set_promiscuity(struct net_device *dev, int inc) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_set_promiscuity(dev, inc); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL(dev_set_promiscuity); + /** * dev_set_allmulti() - update allmulti count on a device * @dev: device From 650415fca0a97472fdd79725e35152614d1aad76 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Fri, 2 May 2025 10:58:00 +0200 Subject: [PATCH 1519/2924] nvme: unblock ctrl state transition for firmware update The original nvme subsystem design didn't have a CONNECTING state; the state machine allowed transitions from RESETTING to LIVE directly. With the introduction of nvme fabrics the CONNECTING state was introduce. Over time the nvme-pci started to use the CONNECTING state as well. Eventually, a bug fix for the nvme-fc started to depend that the only valid transition to LIVE was from CONNECTING. Though this change didn't update the firmware update handler which was still depending on RESETTING to LIVE transition. The simplest way to address it for the time being is to switch into CONNECTING state before going to LIVE state. Fixes: d2fe192348f9 ("nvme: only allow entering LIVE from CONNECTING state") Reported-by: Guenter Roeck Signed-off-by: Daniel Wagner Closes: https://lore.kernel.org/all/0134ea15-8d5f-41f7-9e9a-d7e6d82accaa@roeck-us.net Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Guenter Roeck --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index eb6ea8acb3cca7..ac53629fce68d4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4493,7 +4493,8 @@ static void nvme_fw_act_work(struct work_struct *work) msleep(100); } - if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) + if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING) || + !nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) return; nvme_unquiesce_io_queues(ctrl); From ffea7c73d181db273be8b306be6bc573dfe2257b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 30 Apr 2025 11:59:15 +0100 Subject: [PATCH 1520/2924] KVM: arm64: Properly save/restore HCRX_EL2 Rather than restoring HCRX_EL2 to a fixed value on vcpu exit, perform a full save/restore of the register, ensuring that we don't lose bits that would have been set at some point in the host kernel lifetime, such as the GCSEn bit. Fixes: ff5181d8a2a82 ("arm64/gcs: Provide basic EL2 setup to allow GCS usage at EL0 and EL1") Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20250430105916.3815157-2-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/include/hyp/switch.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index b741ea6aefa58f..96f625dc725669 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -235,6 +235,8 @@ static inline void __deactivate_traps_mpam(void) static inline void __activate_traps_common(struct kvm_vcpu *vcpu) { + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); + /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ write_sysreg(1 << 15, hstr_el2); @@ -245,11 +247,8 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) * EL1 instead of being trapped to EL2. */ if (system_supports_pmuv3()) { - struct kvm_cpu_context *hctxt; - write_sysreg(0, pmselr_el0); - hctxt = host_data_ptr(host_ctxt); ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0); write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); vcpu_set_flag(vcpu, PMUSERENR_ON_CPU); @@ -269,6 +268,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) hcrx &= ~clr; } + ctxt_sys_reg(hctxt, HCRX_EL2) = read_sysreg_s(SYS_HCRX_EL2); write_sysreg_s(hcrx, SYS_HCRX_EL2); } @@ -278,19 +278,18 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) { + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); + write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2); write_sysreg(0, hstr_el2); if (system_supports_pmuv3()) { - struct kvm_cpu_context *hctxt; - - hctxt = host_data_ptr(host_ctxt); write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0); vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU); } if (cpus_have_final_cap(ARM64_HAS_HCX)) - write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2); + write_sysreg_s(ctxt_sys_reg(hctxt, HCRX_EL2), SYS_HCRX_EL2); __deactivate_traps_hfgxtr(vcpu); __deactivate_traps_mpam(); From ef296ee98bb19ee2a6dbf76155184c99da0d7c71 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 30 Apr 2025 11:59:16 +0100 Subject: [PATCH 1521/2924] KVM: arm64: Kill HCRX_HOST_FLAGS HCRX_HOST_FLAGS, like most of these hardcoded setups, are not a good match for options that can be selectively enabled or disabled. Nothing but the early setup is relying on it now, so kill the macro and move the bag of bits where they belong. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20250430105916.3815157-3-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/include/asm/el2_setup.h | 2 +- arch/arm64/include/asm/kvm_arm.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index ebceaae3c749b8..d40e427ddad94a 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -52,7 +52,7 @@ mrs x0, id_aa64mmfr1_el1 ubfx x0, x0, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4 cbz x0, .Lskip_hcrx_\@ - mov_q x0, HCRX_HOST_FLAGS + mov_q x0, (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM) /* Enable GCS if supported */ mrs_s x1, SYS_ID_AA64PFR1_EL1 diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index bba4b0e9309154..e9c8a581e16f44 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -102,7 +102,6 @@ #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H | HCR_AMO | HCR_IMO | HCR_FMO) -#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM) #define MPAMHCR_HOST_FLAGS 0 /* TCR_EL2 Registers bits */ From 3949e28786cd0afcd96a46ce6629245203f629e5 Mon Sep 17 00:00:00 2001 From: Mostafa Saleh Date: Thu, 1 May 2025 16:24:50 +0000 Subject: [PATCH 1522/2924] KVM: arm64: Fix memory check in host_stage2_set_owner_locked() I found this simple bug while preparing some patches for pKVM. AFAICT, it should be harmless (besides crashing the kernel if it was misbehaving) Fixes: e94a7dea2972 ("KVM: arm64: Move host page ownership tracking to the hyp vmemmap") Signed-off-by: Mostafa Saleh Link: https://lore.kernel.org/r/20250501162450.2784043-1-smostafa@google.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 2a5284f749b427..e80f3ebd3e2a26 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -503,7 +503,7 @@ int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id) { int ret; - if (!addr_is_memory(addr)) + if (!range_is_memory(addr, addr + size)) return -EPERM; ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt, From 55dd5b4db3bf04cf077a8d1712f6295d4517c337 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 7 May 2025 11:49:41 +0200 Subject: [PATCH 1523/2924] udf: Make sure i_lenExtents is uptodate on inode eviction UDF maintains total length of all extents in i_lenExtents. Generally we keep extent lengths (and thus i_lenExtents) block aligned because it makes the file appending logic simpler. However the standard mandates that the inode size must match the length of all extents and thus we trim the last extent when closing the file. To catch possible bugs we also verify that i_lenExtents matches i_size when evicting inode from memory. Commit b405c1e58b73 ("udf: refactor udf_next_aext() to handle error") however broke the code updating i_lenExtents and thus udf_evict_inode() ended up spewing lots of errors about incorrectly sized extents although the extents were actually sized properly. Fix the updating of i_lenExtents to silence the errors. Fixes: b405c1e58b73 ("udf: refactor udf_next_aext() to handle error") CC: stable@vger.kernel.org Signed-off-by: Jan Kara --- fs/udf/truncate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c index 4f33a4a4888613..b4071c9cf8c951 100644 --- a/fs/udf/truncate.c +++ b/fs/udf/truncate.c @@ -115,7 +115,7 @@ void udf_truncate_tail_extent(struct inode *inode) } /* This inode entry is in-memory only and thus we don't have to mark * the inode dirty */ - if (ret == 0) + if (ret >= 0) iinfo->i_lenExtents = inode->i_size; brelse(epos.bh); } From 42420c50c68f3e95e90de2479464f420602229fc Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 30 Apr 2025 15:26:18 +0200 Subject: [PATCH 1524/2924] s390/pci: Fix missing check for zpci_create_device() error return The zpci_create_device() function returns an error pointer that needs to be checked before dereferencing it as a struct zpci_dev pointer. Add the missing check in __clp_add() where it was missed when adding the scan_list in the fixed commit. Simply not adding the device to the scan list results in the previous behavior. Cc: stable@vger.kernel.org Fixes: 0467cdde8c43 ("s390/pci: Sort PCI functions prior to creating virtual busses") Signed-off-by: Niklas Schnelle Reviewed-by: Gerd Bayer Signed-off-by: Heiko Carstens --- arch/s390/pci/pci_clp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 9a929bbcc39722..241f7251c8730f 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -428,6 +428,8 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data) return; } zdev = zpci_create_device(entry->fid, entry->fh, entry->config_state); + if (IS_ERR(zdev)) + return; list_add_tail(&zdev->entry, scan_list); } From 05a2538f2b48500cf4e8a0a0ce76623cc5bafcf1 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 30 Apr 2025 15:26:19 +0200 Subject: [PATCH 1525/2924] s390/pci: Fix duplicate pci_dev_put() in disable_slot() when PF has child VFs With commit bcb5d6c76903 ("s390/pci: introduce lock to synchronize state of zpci_dev's") the code to ignore power off of a PF that has child VFs was changed from a direct return to a goto to the unlock and pci_dev_put() section. The change however left the existing pci_dev_put() untouched resulting in a doubple put. This can subsequently cause a use after free if the struct pci_dev is released in an unexpected state. Fix this by removing the extra pci_dev_put(). Cc: stable@vger.kernel.org Fixes: bcb5d6c76903 ("s390/pci: introduce lock to synchronize state of zpci_dev's") Signed-off-by: Niklas Schnelle Reviewed-by: Gerd Bayer Signed-off-by: Heiko Carstens --- drivers/pci/hotplug/s390_pci_hpc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c index 055518ee354dc9..e9e9aaa91770ae 100644 --- a/drivers/pci/hotplug/s390_pci_hpc.c +++ b/drivers/pci/hotplug/s390_pci_hpc.c @@ -59,7 +59,6 @@ static int disable_slot(struct hotplug_slot *hotplug_slot) pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); if (pdev && pci_num_vf(pdev)) { - pci_dev_put(pdev); rc = -EBUSY; goto out; } From a032f29a15412fab9f4352e0032836d51420a338 Mon Sep 17 00:00:00 2001 From: John Chau Date: Mon, 5 May 2025 01:55:13 +0900 Subject: [PATCH 1526/2924] platform/x86: thinkpad_acpi: Support also NEC Lavie X1475JAS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change get_thinkpad_model_data() to check for additional vendor name "NEC" in order to support NEC Lavie X1475JAS notebook (and perhaps more). The reason of this works with minimal changes is because NEC Lavie X1475JAS is a Thinkpad inside. ACPI dumps reveals its OEM ID to be "LENOVO", BIOS version "R2PET30W" matches typical Lenovo BIOS version, the existence of HKEY of LEN0268, with DMI fw string is "R2PHT24W". I compiled and tested with my own machine, attached the dmesg below as proof of work: [ 6.288932] thinkpad_acpi: ThinkPad ACPI Extras v0.26 [ 6.288937] thinkpad_acpi: http://ibm-acpi.sf.net/ [ 6.288938] thinkpad_acpi: ThinkPad BIOS R2PET30W (1.11 ), EC R2PHT24W [ 6.307000] thinkpad_acpi: radio switch found; radios are enabled [ 6.307030] thinkpad_acpi: This ThinkPad has standard ACPI backlight brightness control, supported by the ACPI video driver [ 6.307033] thinkpad_acpi: Disabling thinkpad-acpi brightness events by default... [ 6.320322] thinkpad_acpi: rfkill switch tpacpi_bluetooth_sw: radio is unblocked [ 6.371963] thinkpad_acpi: secondary fan control detected & enabled [ 6.391922] thinkpad_acpi: battery 1 registered (start 0, stop 85, behaviours: 0x7) [ 6.398375] input: ThinkPad Extra Buttons as /devices/platform/thinkpad_acpi/input/input13 Signed-off-by: John Chau Link: https://lore.kernel.org/r/20250504165513.295135-1-johnchau@0atlas.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/thinkpad_acpi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 5790095c175e6f..92b21e49faf62b 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -11478,6 +11478,8 @@ static int __must_check __init get_thinkpad_model_data( tp->vendor = PCI_VENDOR_ID_IBM; else if (dmi_name_in_vendors("LENOVO")) tp->vendor = PCI_VENDOR_ID_LENOVO; + else if (dmi_name_in_vendors("NEC")) + tp->vendor = PCI_VENDOR_ID_LENOVO; else return 0; From 0887817e4953885fbd6a5c1bec2fdd339261eb19 Mon Sep 17 00:00:00 2001 From: Runhua He Date: Wed, 7 May 2025 18:01:03 +0800 Subject: [PATCH 1527/2924] platform/x86/amd/pmc: Declare quirk_spurious_8042 for MECHREVO Wujie 14XA (GX4HRXL) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MECHREVO Wujie 14XA (GX4HRXL) wakes up immediately after s2idle entry. This happens regardless of whether the laptop is plugged into AC power, or whether any peripheral is plugged into the laptop. Similar to commit a55bdad5dfd1 ("platform/x86/amd/pmc: Disable keyboard wakeup on AMD Framework 13"), the MECHREVO Wujie 14XA wakes up almost instantly after s2idle suspend entry (IRQ1 is the keyboard): 2025-04-18 17:23:57,588 DEBUG: PM: Triggering wakeup from IRQ 9 2025-04-18 17:23:57,588 DEBUG: PM: Triggering wakeup from IRQ 1 Add this model to the spurious_8042 quirk to workaround this. This patch does not affect the wake-up function of the built-in keyboard. Because the firmware of this machine adds an insurance for keyboard wake-up events, as it always triggers an additional IRQ 9 to wake up the system. Suggested-by: Mingcong Bai Suggested-by: Xinhui Yang Suggested-by: Rong Zhang Fixes: a55bdad5dfd1 ("platform/x86/amd/pmc: Disable keyboard wakeup on AMD Framework 13") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4166 Cc: Mario Limonciello Link: https://zhuanldan.zhihu.com/p/730538041 Tested-by: Yemu Lu Signed-off-by: Runhua He Link: https://lore.kernel.org/r/20250507100103.995395-1-hua@aosc.io Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmc/pmc-quirks.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/platform/x86/amd/pmc/pmc-quirks.c b/drivers/platform/x86/amd/pmc/pmc-quirks.c index b4f49720c87f62..2e3f6fc67c568d 100644 --- a/drivers/platform/x86/amd/pmc/pmc-quirks.c +++ b/drivers/platform/x86/amd/pmc/pmc-quirks.c @@ -217,6 +217,13 @@ static const struct dmi_system_id fwbug_list[] = { DMI_MATCH(DMI_BIOS_VERSION, "03.05"), } }, + { + .ident = "MECHREVO Wujie 14X (GX4HRXL)", + .driver_data = &quirk_spurious_8042, + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "WUJIE14-GX4HRXL"), + } + }, {} }; From bfcfe6d335a967f8ea0c1980960e6f0205b5de6e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 1 May 2025 15:17:02 +0200 Subject: [PATCH 1528/2924] platform/x86: asus-wmi: Fix wlan_ctrl_by_user detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The wlan_ctrl_by_user detection was introduced by commit a50bd128f28c ("asus-wmi: record wlan status while controlled by userapp"). Quoting from that commit's commit message: """ When you call WMIMethod(DSTS, 0x00010011) to get WLAN status, it may return (1) 0x00050001 (On) (2) 0x00050000 (Off) (3) 0x00030001 (On) (4) 0x00030000 (Off) (5) 0x00000002 (Unknown) (1), (2) means that the model has hardware GPIO for WLAN, you can call WMIMethod(DEVS, 0x00010011, 1 or 0) to turn WLAN on/off. (3), (4) means that the model doesn’t have hardware GPIO, you need to use API or driver library to turn WLAN on/off, and call WMIMethod(DEVS, 0x00010012, 1 or 0) to set WLAN LED status. After you set WLAN LED status, you can see the WLAN status is changed with WMIMethod(DSTS, 0x00010011). Because the status is recorded lastly (ex: Windows), you can use it for synchronization. (5) means that the model doesn’t have WLAN device. WLAN is the ONLY special case with upper rule. """ The wlan_ctrl_by_user flag should be set on 0x0003000? ((3), (4) above) return values, but the flag mistakenly also gets set on laptops with 0x0005000? ((1), (2)) return values. This is causing rfkill problems on laptops where 0x0005000? is returned. Fix the check to only set the wlan_ctrl_by_user flag for 0x0003000? return values. Fixes: a50bd128f28c ("asus-wmi: record wlan status while controlled by userapp") Link: https://bugzilla.kernel.org/show_bug.cgi?id=219786 Signed-off-by: Hans de Goede Reviewed-by: Armin Wolf Link: https://lore.kernel.org/r/20250501131702.103360-2-hdegoede@redhat.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/asus-wmi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 0c697b46f436a3..47cc766624d7bb 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -4779,7 +4779,8 @@ static int asus_wmi_add(struct platform_device *pdev) goto fail_leds; asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WLAN, &result); - if (result & (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT)) + if ((result & (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT)) == + (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT)) asus->driver->wlan_ctrl_by_user = 1; if (!(asus->driver->wlan_ctrl_by_user && ashs_present())) { From cd9c058489053e172a6654cad82ee936d1b09fab Mon Sep 17 00:00:00 2001 From: John Ernberg Date: Fri, 2 May 2025 11:40:55 +0000 Subject: [PATCH 1529/2924] xen: swiotlb: Use swiotlb bouncing if kmalloc allocation demands it Xen swiotlb support was missed when the patch set starting with 4ab5f8ec7d71 ("mm/slab: decouple ARCH_KMALLOC_MINALIGN from ARCH_DMA_MINALIGN") was merged. When running Xen on iMX8QXP, a SoC without IOMMU, the effect was that USB transfers ended up corrupted when there was more than one URB inflight at the same time. Add a call to dma_kmalloc_needs_bounce() to make sure that allocations too small for DMA get bounced via swiotlb. Closes: https://lore.kernel.org/linux-usb/ab2776f0-b838-4cf6-a12a-c208eb6aad59@actia.se/ Fixes: 4ab5f8ec7d71 ("mm/slab: decouple ARCH_KMALLOC_MINALIGN from ARCH_DMA_MINALIGN") Cc: stable@kernel.org # v6.5+ Signed-off-by: John Ernberg Reviewed-by: Stefano Stabellini Signed-off-by: Juergen Gross Message-ID: <20250502114043.1968976-2-john.ernberg@actia.se> --- drivers/xen/swiotlb-xen.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 1f65795cf5d7a2..ef56a2500ed69a 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -217,6 +217,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, * buffering it. */ if (dma_capable(dev, dev_addr, size, true) && + !dma_kmalloc_needs_bounce(dev, size, dir) && !range_straddles_page_boundary(phys, size) && !xen_arch_need_swiotlb(dev, phys, dev_addr) && !is_swiotlb_force_bounce(dev)) From 687b2bae0efff9b25e071737d6af5004e6e35af5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 7 May 2025 07:34:24 -0600 Subject: [PATCH 1530/2924] io_uring: ensure deferred completions are flushed for multishot Multishot normally uses io_req_post_cqe() to post completions, but when stopping it, it may finish up with a deferred completion. This is fine, except if another multishot event triggers before the deferred completions get flushed. If this occurs, then CQEs may get reordered in the CQ ring, as new multishot completions get posted before the deferred ones are flushed. This can cause confusion on the application side, if strict ordering is required for the use case. When multishot posting via io_req_post_cqe(), flush any pending deferred completions first, if any. Cc: stable@vger.kernel.org # 6.1+ Reported-by: Norman Maurer Reported-by: Christian Mazakas Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 769814d7115302..541e65a1eebfde 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -848,6 +848,14 @@ bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags) struct io_ring_ctx *ctx = req->ctx; bool posted; + /* + * If multishot has already posted deferred completions, ensure that + * those are flushed first before posting this one. If not, CQEs + * could get reordered. + */ + if (!wq_list_empty(&ctx->submit_state.compl_reqs)) + __io_submit_flush_completions(ctx); + lockdep_assert(!io_wq_current_is_worker()); lockdep_assert_held(&ctx->uring_lock); From 90989869baae47ee2aa3bcb6f6eb9fbbe4287958 Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Tue, 6 May 2025 16:44:56 -0400 Subject: [PATCH 1531/2924] xenbus: Allow PVH dom0 a non-local xenstore Make xenbus_init() allow a non-local xenstore for a PVH dom0 - it is currently forced to XS_LOCAL. With Hyperlaunch booting dom0 and a xenstore stubdom, dom0 can be handled as a regular XS_HVM following the late init path. Ideally we'd drop the use of xen_initial_domain() and just check for the event channel instead. However, ARM has a xen,enhanced no-xenstore mode, where the event channel and PFN would both be 0. Retain the xen_initial_domain() check, and use that for an additional check when the event channel is 0. Check the full 64bit HVM_PARAM_STORE_EVTCHN value to catch the off chance that high bits are set for the 32bit event channel. Signed-off-by: Jason Andryuk Change-Id: I5506da42e4c6b8e85079fefb2f193c8de17c7437 Reviewed-by: Stefano Stabellini Signed-off-by: Juergen Gross Message-ID: <20250506204456.5220-1-jason.andryuk@amd.com> --- drivers/xen/xenbus/xenbus_probe.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 6d32ffb0113650..86fe6e77905669 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -966,9 +966,15 @@ static int __init xenbus_init(void) if (xen_pv_domain()) xen_store_domain_type = XS_PV; if (xen_hvm_domain()) + { xen_store_domain_type = XS_HVM; - if (xen_hvm_domain() && xen_initial_domain()) - xen_store_domain_type = XS_LOCAL; + err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); + if (err) + goto out_error; + xen_store_evtchn = (int)v; + if (!v && xen_initial_domain()) + xen_store_domain_type = XS_LOCAL; + } if (xen_pv_domain() && !xen_start_info->store_evtchn) xen_store_domain_type = XS_LOCAL; if (xen_pv_domain() && xen_start_info->store_evtchn) @@ -987,10 +993,6 @@ static int __init xenbus_init(void) xen_store_interface = gfn_to_virt(xen_store_gfn); break; case XS_HVM: - err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); - if (err) - goto out_error; - xen_store_evtchn = (int)v; err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); if (err) goto out_error; From 1f0304dfd9d217c2f8b04a9ef4b3258a66eedd27 Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Tue, 6 May 2025 17:09:33 -0400 Subject: [PATCH 1532/2924] xenbus: Use kref to track req lifetime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Marek reported seeing a NULL pointer fault in the xenbus_thread callstack: BUG: kernel NULL pointer dereference, address: 0000000000000000 RIP: e030:__wake_up_common+0x4c/0x180 Call Trace: __wake_up_common_lock+0x82/0xd0 process_msg+0x18e/0x2f0 xenbus_thread+0x165/0x1c0 process_msg+0x18e is req->cb(req). req->cb is set to xs_wake_up(), a thin wrapper around wake_up(), or xenbus_dev_queue_reply(). It seems like it was xs_wake_up() in this case. It seems like req may have woken up the xs_wait_for_reply(), which kfree()ed the req. When xenbus_thread resumes, it faults on the zero-ed data. Linux Device Drivers 2nd edition states: "Normally, a wake_up call can cause an immediate reschedule to happen, meaning that other processes might run before wake_up returns." ... which would match the behaviour observed. Change to keeping two krefs on each request. One for the caller, and one for xenbus_thread. Each will kref_put() when finished, and the last will free it. This use of kref matches the description in Documentation/core-api/kref.rst Link: https://lore.kernel.org/xen-devel/ZO0WrR5J0xuwDIxW@mail-itl/ Reported-by: Marek Marczykowski-Górecki Fixes: fd8aa9095a95 ("xen: optimize xenbus driver for multiple concurrent xenstore accesses") Cc: stable@vger.kernel.org Signed-off-by: Jason Andryuk Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross Message-ID: <20250506210935.5607-1-jason.andryuk@amd.com> --- drivers/xen/xenbus/xenbus.h | 2 ++ drivers/xen/xenbus/xenbus_comms.c | 9 ++++----- drivers/xen/xenbus/xenbus_dev_frontend.c | 2 +- drivers/xen/xenbus/xenbus_xs.c | 18 ++++++++++++++++-- 4 files changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/xen/xenbus/xenbus.h b/drivers/xen/xenbus/xenbus.h index 13821e7e825efb..9ac0427724a301 100644 --- a/drivers/xen/xenbus/xenbus.h +++ b/drivers/xen/xenbus/xenbus.h @@ -77,6 +77,7 @@ enum xb_req_state { struct xb_req_data { struct list_head list; wait_queue_head_t wq; + struct kref kref; struct xsd_sockmsg msg; uint32_t caller_req_id; enum xsd_sockmsg_type type; @@ -103,6 +104,7 @@ int xb_init_comms(void); void xb_deinit_comms(void); int xs_watch_msg(struct xs_watch_event *event); void xs_request_exit(struct xb_req_data *req); +void xs_free_req(struct kref *kref); int xenbus_match(struct device *_dev, const struct device_driver *_drv); int xenbus_dev_probe(struct device *_dev); diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index e5fda0256feb3d..82df2da1b880b8 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -309,8 +309,8 @@ static int process_msg(void) virt_wmb(); req->state = xb_req_state_got_reply; req->cb(req); - } else - kfree(req); + } + kref_put(&req->kref, xs_free_req); } mutex_unlock(&xs_response_mutex); @@ -386,14 +386,13 @@ static int process_writes(void) state.req->msg.type = XS_ERROR; state.req->err = err; list_del(&state.req->list); - if (state.req->state == xb_req_state_aborted) - kfree(state.req); - else { + if (state.req->state != xb_req_state_aborted) { /* write err, then update state */ virt_wmb(); state.req->state = xb_req_state_got_reply; wake_up(&state.req->wq); } + kref_put(&state.req->kref, xs_free_req); mutex_unlock(&xb_write_mutex); diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index 46f8916597e53d..f5c21ba64df571 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -406,7 +406,7 @@ void xenbus_dev_queue_reply(struct xb_req_data *req) mutex_unlock(&u->reply_mutex); kfree(req->body); - kfree(req); + kref_put(&req->kref, xs_free_req); kref_put(&u->kref, xenbus_file_free); diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index d32c726f7a12d0..dcf9182c8451ad 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -112,6 +112,12 @@ static void xs_suspend_exit(void) wake_up_all(&xs_state_enter_wq); } +void xs_free_req(struct kref *kref) +{ + struct xb_req_data *req = container_of(kref, struct xb_req_data, kref); + kfree(req); +} + static uint32_t xs_request_enter(struct xb_req_data *req) { uint32_t rq_id; @@ -237,6 +243,12 @@ static void xs_send(struct xb_req_data *req, struct xsd_sockmsg *msg) req->caller_req_id = req->msg.req_id; req->msg.req_id = xs_request_enter(req); + /* + * Take 2nd ref. One for this thread, and the second for the + * xenbus_thread. + */ + kref_get(&req->kref); + mutex_lock(&xb_write_mutex); list_add_tail(&req->list, &xb_write_list); notify = list_is_singular(&xb_write_list); @@ -261,8 +273,8 @@ static void *xs_wait_for_reply(struct xb_req_data *req, struct xsd_sockmsg *msg) if (req->state == xb_req_state_queued || req->state == xb_req_state_wait_reply) req->state = xb_req_state_aborted; - else - kfree(req); + + kref_put(&req->kref, xs_free_req); mutex_unlock(&xb_write_mutex); return ret; @@ -291,6 +303,7 @@ int xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void *par) req->cb = xenbus_dev_queue_reply; req->par = par; req->user_req = true; + kref_init(&req->kref); xs_send(req, msg); @@ -319,6 +332,7 @@ static void *xs_talkv(struct xenbus_transaction t, req->num_vecs = num_vecs; req->cb = xs_wake_up; req->user_req = false; + kref_init(&req->kref); msg.req_id = 0; msg.tx_id = t.id; From 428dc9fc0873989d73918d4a9cc22745b7bbc799 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 May 2025 11:30:39 -1000 Subject: [PATCH 1533/2924] sched_ext: bpf_iter_scx_dsq_new() should always initialize iterator BPF programs may call next() and destroy() on BPF iterators even after new() returns an error value (e.g. bpf_for_each() macro ignores error returns from new()). bpf_iter_scx_dsq_new() could leave the iterator in an uninitialized state after an error return causing bpf_iter_scx_dsq_next() to dereference garbage data. Make bpf_iter_scx_dsq_new() always clear $kit->dsq so that next() and destroy() become noops. Signed-off-by: Tejun Heo Fixes: 650ba21b131e ("sched_ext: Implement DSQ iterator") Cc: stable@vger.kernel.org # v6.12+ Acked-by: Andrea Righi --- kernel/sched/ext.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 4e37b40ce280cb..f5133249fd4d92 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -6827,6 +6827,12 @@ __bpf_kfunc int bpf_iter_scx_dsq_new(struct bpf_iter_scx_dsq *it, u64 dsq_id, BUILD_BUG_ON(__alignof__(struct bpf_iter_scx_dsq_kern) != __alignof__(struct bpf_iter_scx_dsq)); + /* + * next() and destroy() will be called regardless of the return value. + * Always clear $kit->dsq. + */ + kit->dsq = NULL; + if (flags & ~__SCX_DSQ_ITER_USER_FLAGS) return -EINVAL; From 1e2e3044c1bc64a64aa0eaf7c17f7832c26c9775 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 29 Apr 2025 15:05:59 -0400 Subject: [PATCH 1534/2924] Bluetooth: MGMT: Fix MGMT_OP_ADD_DEVICE invalid device flags Device flags could be updated in the meantime while MGMT_OP_ADD_DEVICE is pending on hci_update_passive_scan_sync so instead of setting the current_flags as cmd->user_data just do a lookup using hci_conn_params_lookup and use the latest stored flags. Fixes: a182d9c84f9c ("Bluetooth: MGMT: Fix Add Device to responding before completing") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index c1e1e529e26cc2..46b22708dfbd2d 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -7506,11 +7506,16 @@ static void add_device_complete(struct hci_dev *hdev, void *data, int err) struct mgmt_cp_add_device *cp = cmd->param; if (!err) { + struct hci_conn_params *params; + + params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, + le_addr_type(cp->addr.type)); + device_added(cmd->sk, hdev, &cp->addr.bdaddr, cp->addr.type, cp->action); device_flags_changed(NULL, hdev, &cp->addr.bdaddr, cp->addr.type, hdev->conn_flags, - PTR_UINT(cmd->user_data)); + params ? params->flags : 0); } mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_ADD_DEVICE, @@ -7613,8 +7618,6 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, goto unlock; } - cmd->user_data = UINT_PTR(current_flags); - err = hci_cmd_sync_queue(hdev, add_device_sync, cmd, add_device_complete); if (err < 0) { From a6aeb739974ec73e5217c75a7c008a688d3d5cf1 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Wed, 7 May 2025 09:50:44 +0300 Subject: [PATCH 1535/2924] module: ensure that kobject_put() is safe for module type kobjects In 'lookup_or_create_module_kobject()', an internal kobject is created using 'module_ktype'. So call to 'kobject_put()' on error handling path causes an attempt to use an uninitialized completion pointer in 'module_kobject_release()'. In this scenario, we just want to release kobject without an extra synchronization required for a regular module unloading process, so adding an extra check whether 'complete()' is actually required makes 'kobject_put()' safe. Reported-by: syzbot+7fb8a372e1f6add936dd@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=7fb8a372e1f6add936dd Fixes: 942e443127e9 ("module: Fix mod->mkobj.kobj potentially freed too early") Cc: stable@vger.kernel.org Suggested-by: Petr Pavlu Signed-off-by: Dmitry Antipov Link: https://lore.kernel.org/r/20250507065044.86529-1-dmantipov@yandex.ru Signed-off-by: Petr Pavlu --- kernel/params.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/params.c b/kernel/params.c index e668fc90b83ec5..b92d64161b758d 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -943,7 +943,9 @@ struct kset *module_kset; static void module_kobj_release(struct kobject *kobj) { struct module_kobject *mk = to_module_kobject(kobj); - complete(mk->kobj_completion); + + if (mk->kobj_completion) + complete(mk->kobj_completion); } const struct kobj_type module_ktype = { From 3ca02e63edccb78ef3659bebc68579c7224a6ca2 Mon Sep 17 00:00:00 2001 From: Paul Aurich Date: Tue, 6 May 2025 22:28:09 -0700 Subject: [PATCH 1536/2924] smb: client: Avoid race in open_cached_dir with lease breaks A pre-existing valid cfid returned from find_or_create_cached_dir might race with a lease break, meaning open_cached_dir doesn't consider it valid, and thinks it's newly-constructed. This leaks a dentry reference if the allocation occurs before the queued lease break work runs. Avoid the race by extending holding the cfid_list_lock across find_or_create_cached_dir and when the result is checked. Cc: stable@vger.kernel.org Reviewed-by: Henrique Carvalho Signed-off-by: Paul Aurich Signed-off-by: Steve French --- fs/smb/client/cached_dir.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index fe738623cf1ba9..240d82c6f90806 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -29,7 +29,6 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids, { struct cached_fid *cfid; - spin_lock(&cfids->cfid_list_lock); list_for_each_entry(cfid, &cfids->entries, entry) { if (!strcmp(cfid->path, path)) { /* @@ -38,25 +37,20 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids, * being deleted due to a lease break. */ if (!cfid->time || !cfid->has_lease) { - spin_unlock(&cfids->cfid_list_lock); return NULL; } kref_get(&cfid->refcount); - spin_unlock(&cfids->cfid_list_lock); return cfid; } } if (lookup_only) { - spin_unlock(&cfids->cfid_list_lock); return NULL; } if (cfids->num_entries >= max_cached_dirs) { - spin_unlock(&cfids->cfid_list_lock); return NULL; } cfid = init_cached_dir(path); if (cfid == NULL) { - spin_unlock(&cfids->cfid_list_lock); return NULL; } cfid->cfids = cfids; @@ -74,7 +68,6 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids, */ cfid->has_lease = true; - spin_unlock(&cfids->cfid_list_lock); return cfid; } @@ -187,8 +180,10 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, if (!utf16_path) return -ENOMEM; + spin_lock(&cfids->cfid_list_lock); cfid = find_or_create_cached_dir(cfids, path, lookup_only, tcon->max_cached_dirs); if (cfid == NULL) { + spin_unlock(&cfids->cfid_list_lock); kfree(utf16_path); return -ENOENT; } @@ -197,7 +192,6 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, * Otherwise, it is either a new entry or laundromat worker removed it * from @cfids->entries. Caller will put last reference if the latter. */ - spin_lock(&cfids->cfid_list_lock); if (cfid->has_lease && cfid->time) { spin_unlock(&cfids->cfid_list_lock); *ret_cfid = cfid; From 473f09f362e5979efbff40c9bbce58892ad39d66 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 6 May 2025 00:22:26 -0400 Subject: [PATCH 1537/2924] bcachefs: journal_shutdown is EROFS, not EIO We often filter out EROFS errors to avoid log spew after an emergency shutdown - journal_shutdown is just another emergency shutdown error. Signed-off-by: Kent Overstreet --- fs/bcachefs/errcode.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index a615e4852deda8..d9ebffa5b3a282 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -269,7 +269,7 @@ x(BCH_ERR_invalid_sb, invalid_sb_downgrade) \ x(BCH_ERR_invalid, invalid_bkey) \ x(BCH_ERR_operation_blocked, nocow_lock_blocked) \ - x(EIO, journal_shutdown) \ + x(EROFS, journal_shutdown) \ x(EIO, journal_flush_err) \ x(EIO, journal_write_err) \ x(EIO, btree_node_read_err) \ From 2fea3aa76e352cd071bee71e5c43da339639b310 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 7 May 2025 13:50:00 -0400 Subject: [PATCH 1538/2924] bcachefs: Filter out harmless EROFS error messages These just indicate that we're shutting down. Signed-off-by: Kent Overstreet --- fs/bcachefs/move.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index fc396b9fa75451..dfdbb9259985b6 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -784,7 +784,8 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, goto err; ret = bch2_btree_write_buffer_tryflush(trans); - bch_err_msg(c, ret, "flushing btree write buffer"); + if (!bch2_err_matches(ret, EROFS)) + bch_err_msg(c, ret, "flushing btree write buffer"); if (ret) goto err; From da18dabc3784663a088943e613c36cd17aeb52d3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 7 May 2025 16:54:25 -0400 Subject: [PATCH 1539/2924] bcachefs: Ensure superblock gets written when we go ERO When we go emergency read-only, make sure we do a final write_super() to persist counters and error counts - this can be critical for piecing together what fsck was doing. Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 27943082c093c3..84a37d971ffdae 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -377,6 +377,11 @@ void bch2_fs_read_only(struct bch_fs *c) bch_verbose(c, "marking filesystem clean"); bch2_fs_mark_clean(c); } else { + /* Make sure error counts/counters are persisted */ + mutex_lock(&c->sb_lock); + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + bch_verbose(c, "done going read-only, filesystem not clean"); } } From 8e4d28036c293241b312b1fceafb32b994f80fcc Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 7 May 2025 13:32:15 -0400 Subject: [PATCH 1540/2924] bcachefs: Don't aggressively discard the journal We frequently use 'bcachefs list_journal -a' for debugging, as it provides a record of all btree transactions, and a history of what happened. But it's not so useful if we immediately discard journal buckets right after they're no longer dirty. This tweaks journal reclaim to only discard when we're low on space, keeping the journal mostly un-discarded. Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_reclaim.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index ea670c3c43d8a0..976464d8a695ae 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -266,10 +266,11 @@ void bch2_journal_space_available(struct journal *j) static bool should_discard_bucket(struct journal *j, struct journal_device *ja) { - bool ret; - spin_lock(&j->lock); - ret = ja->discard_idx != ja->dirty_idx_ondisk; + unsigned min_free = max(4, ja->nr / 8); + + bool ret = bch2_journal_dev_buckets_available(j, ja, journal_space_discarded) < min_free && + ja->discard_idx != ja->dirty_idx_ondisk; spin_unlock(&j->lock); return ret; From f04f03d3e99bc8f89b6af5debf07ff67d961bc23 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 7 May 2025 14:52:55 -0700 Subject: [PATCH 1541/2924] Input: synaptics - enable SMBus for HP Elitebook 850 G1 The kernel reports that the touchpad for this device can support SMBus mode. Reported-by: jt Link: https://lore.kernel.org/r/iys5dbv3ldddsgobfkxldazxyp54kay4bozzmagga6emy45jop@2ebvuxgaui4u Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 309c360aab5597..fdb47106011856 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -190,6 +190,7 @@ static const char * const smbus_pnp_ids[] = { "LEN2054", /* E480 */ "LEN2055", /* E580 */ "LEN2068", /* T14 Gen 1 */ + "SYN3003", /* HP EliteBook 850 G1 */ "SYN3015", /* HP EliteBook 840 G2 */ "SYN3052", /* HP EliteBook 840 G4 */ "SYN3221", /* HP 15-ay000 */ From 6d7ea0881000966607772451b789b5fb5766f11d Mon Sep 17 00:00:00 2001 From: Manuel Fombuena Date: Wed, 7 May 2025 12:05:26 -0700 Subject: [PATCH 1542/2924] Input: synaptics - enable InterTouch on Dynabook Portege X30-D [ 5.989588] psmouse serio1: synaptics: Your touchpad (PNP: TOS0213 PNP0f03) says it can support a different bus. If i2c-hid and hid-rmi are not used, you might want to try setting psmouse.synaptics_intertouch to 1 and report this to linux-input@vger.kernel.org. [ 6.039923] psmouse serio1: synaptics: Touchpad model: 1, fw: 9.32, id: 0x1e2a1, caps: 0xf00223/0x840300/0x12e800/0x52d884, board id: 3322, fw id: 2658004 The board is labelled TM3322. Present on the Toshiba / Dynabook Portege X30-D and possibly others. Confirmed working well with psmouse.synaptics_intertouch=1 and local build. Signed-off-by: Manuel Fombuena Signed-off-by: Aditya Garg Link: https://lore.kernel.org/r/PN3PR01MB9597711E7933A08389FEC31DB888A@PN3PR01MB9597.INDPRD01.PROD.OUTLOOK.COM Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index fdb47106011856..9c63e3c31fa0a3 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -196,6 +196,7 @@ static const char * const smbus_pnp_ids[] = { "SYN3221", /* HP 15-ay000 */ "SYN323d", /* HP Spectre X360 13-w013dx */ "SYN3257", /* HP Envy 13-ad105ng */ + "TOS0213", /* Dynabook Portege X30-D */ NULL }; #endif From 47d768b32e644b56901bb4bbbdb1feb01ea86c85 Mon Sep 17 00:00:00 2001 From: Aditya Garg Date: Wed, 7 May 2025 12:06:32 -0700 Subject: [PATCH 1543/2924] Input: synaptics - enable InterTouch on Dynabook Portege X30L-G Enable InterTouch mode on Dynabook Portege X30L-G by adding "TOS01f6" to the list of SMBus-enabled variants. Reported-by: Xuntao Chi Tested-by: Xuntao Chi Signed-off-by: Aditya Garg Link: https://lore.kernel.org/r/PN3PR01MB959786E4AC797160CDA93012B888A@PN3PR01MB9597.INDPRD01.PROD.OUTLOOK.COM Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 9c63e3c31fa0a3..9a1592378c1b17 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -196,6 +196,7 @@ static const char * const smbus_pnp_ids[] = { "SYN3221", /* HP 15-ay000 */ "SYN323d", /* HP Spectre X360 13-w013dx */ "SYN3257", /* HP Envy 13-ad105ng */ + "TOS01f6", /* Dynabook Portege X30L-G */ "TOS0213", /* Dynabook Portege X30-D */ NULL }; From 2abc698ac77314e0de5b33a6d96a39c5159d88e4 Mon Sep 17 00:00:00 2001 From: Aditya Garg Date: Wed, 7 May 2025 12:09:00 -0700 Subject: [PATCH 1544/2924] Input: synaptics - enable InterTouch on TUXEDO InfinityBook Pro 14 v5 Enable InterTouch mode on TUXEDO InfinityBook Pro 14 v5 by adding "SYN1221" to the list of SMBus-enabled variants. Add support for InterTouch on SYN1221 by adding it to the list of SMBus-enabled variants. Reported-by: Matthias Eilert Tested-by: Matthias Eilert Signed-off-by: Aditya Garg Link: https://lore.kernel.org/r/PN3PR01MB9597C033C4BC20EE2A0C4543B888A@PN3PR01MB9597.INDPRD01.PROD.OUTLOOK.COM Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 9a1592378c1b17..a582bf6540c5da 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -190,6 +190,7 @@ static const char * const smbus_pnp_ids[] = { "LEN2054", /* E480 */ "LEN2055", /* E580 */ "LEN2068", /* T14 Gen 1 */ + "SYN1221", /* TUXEDO InfinityBook Pro 14 v5 */ "SYN3003", /* HP EliteBook 850 G1 */ "SYN3015", /* HP EliteBook 840 G2 */ "SYN3052", /* HP EliteBook 840 G4 */ From a609cb4cc07aa9ab8f50466622814356c06f2c17 Mon Sep 17 00:00:00 2001 From: Aditya Garg Date: Wed, 7 May 2025 12:12:15 -0700 Subject: [PATCH 1545/2924] Input: synaptics - enable InterTouch on Dell Precision M3800 Enable InterTouch mode on Dell Precision M3800 by adding "DLL060d" to the list of SMBus-enabled variants. Reported-by: Markus Rathgeb Signed-off-by: Aditya Garg Link: https://lore.kernel.org/r/PN3PR01MB959789DD6D574E16141E5DC4B888A@PN3PR01MB9597.INDPRD01.PROD.OUTLOOK.COM Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index a582bf6540c5da..c5c88a75a019ec 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -164,6 +164,7 @@ static const char * const topbuttonpad_pnp_ids[] = { #ifdef CONFIG_MOUSE_PS2_SYNAPTICS_SMBUS static const char * const smbus_pnp_ids[] = { /* all of the topbuttonpad_pnp_ids are valid, we just add some extras */ + "DLL060d", /* Dell Precision M3800 */ "LEN0048", /* X1 Carbon 3 */ "LEN0046", /* X250 */ "LEN0049", /* Yoga 11e */ From e34090d7214e0516eb8722aee295cb2507317c07 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 3 May 2025 01:01:18 +0300 Subject: [PATCH 1546/2924] ipvs: fix uninit-value for saddr in do_output_route4 syzbot reports for uninit-value for the saddr argument [1]. commit 4754957f04f5 ("ipvs: do not use random local source address for tunnels") already implies that the input value of saddr should be ignored but the code is still reading it which can prevent to connect the route. Fix it by changing the argument to ret_saddr. [1] BUG: KMSAN: uninit-value in do_output_route4+0x42c/0x4d0 net/netfilter/ipvs/ip_vs_xmit.c:147 do_output_route4+0x42c/0x4d0 net/netfilter/ipvs/ip_vs_xmit.c:147 __ip_vs_get_out_rt+0x403/0x21d0 net/netfilter/ipvs/ip_vs_xmit.c:330 ip_vs_tunnel_xmit+0x205/0x2380 net/netfilter/ipvs/ip_vs_xmit.c:1136 ip_vs_in_hook+0x1aa5/0x35b0 net/netfilter/ipvs/ip_vs_core.c:2063 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xf7/0x400 net/netfilter/core.c:626 nf_hook include/linux/netfilter.h:269 [inline] __ip_local_out+0x758/0x7e0 net/ipv4/ip_output.c:118 ip_local_out net/ipv4/ip_output.c:127 [inline] ip_send_skb+0x6a/0x3c0 net/ipv4/ip_output.c:1501 udp_send_skb+0xfda/0x1b70 net/ipv4/udp.c:1195 udp_sendmsg+0x2fe3/0x33c0 net/ipv4/udp.c:1483 inet_sendmsg+0x1fc/0x280 net/ipv4/af_inet.c:851 sock_sendmsg_nosec net/socket.c:712 [inline] __sock_sendmsg+0x267/0x380 net/socket.c:727 ____sys_sendmsg+0x91b/0xda0 net/socket.c:2566 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2620 __sys_sendmmsg+0x41d/0x880 net/socket.c:2702 __compat_sys_sendmmsg net/compat.c:360 [inline] __do_compat_sys_sendmmsg net/compat.c:367 [inline] __se_compat_sys_sendmmsg net/compat.c:364 [inline] __ia32_compat_sys_sendmmsg+0xc8/0x140 net/compat.c:364 ia32_sys_call+0x3ffa/0x41f0 arch/x86/include/generated/asm/syscalls_32.h:346 do_syscall_32_irqs_on arch/x86/entry/syscall_32.c:83 [inline] __do_fast_syscall_32+0xb0/0x110 arch/x86/entry/syscall_32.c:306 do_fast_syscall_32+0x38/0x80 arch/x86/entry/syscall_32.c:331 do_SYSENTER_32+0x1f/0x30 arch/x86/entry/syscall_32.c:369 entry_SYSENTER_compat_after_hwframe+0x84/0x8e Uninit was created at: slab_post_alloc_hook mm/slub.c:4167 [inline] slab_alloc_node mm/slub.c:4210 [inline] __kmalloc_cache_noprof+0x8fa/0xe00 mm/slub.c:4367 kmalloc_noprof include/linux/slab.h:905 [inline] ip_vs_dest_dst_alloc net/netfilter/ipvs/ip_vs_xmit.c:61 [inline] __ip_vs_get_out_rt+0x35d/0x21d0 net/netfilter/ipvs/ip_vs_xmit.c:323 ip_vs_tunnel_xmit+0x205/0x2380 net/netfilter/ipvs/ip_vs_xmit.c:1136 ip_vs_in_hook+0x1aa5/0x35b0 net/netfilter/ipvs/ip_vs_core.c:2063 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xf7/0x400 net/netfilter/core.c:626 nf_hook include/linux/netfilter.h:269 [inline] __ip_local_out+0x758/0x7e0 net/ipv4/ip_output.c:118 ip_local_out net/ipv4/ip_output.c:127 [inline] ip_send_skb+0x6a/0x3c0 net/ipv4/ip_output.c:1501 udp_send_skb+0xfda/0x1b70 net/ipv4/udp.c:1195 udp_sendmsg+0x2fe3/0x33c0 net/ipv4/udp.c:1483 inet_sendmsg+0x1fc/0x280 net/ipv4/af_inet.c:851 sock_sendmsg_nosec net/socket.c:712 [inline] __sock_sendmsg+0x267/0x380 net/socket.c:727 ____sys_sendmsg+0x91b/0xda0 net/socket.c:2566 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2620 __sys_sendmmsg+0x41d/0x880 net/socket.c:2702 __compat_sys_sendmmsg net/compat.c:360 [inline] __do_compat_sys_sendmmsg net/compat.c:367 [inline] __se_compat_sys_sendmmsg net/compat.c:364 [inline] __ia32_compat_sys_sendmmsg+0xc8/0x140 net/compat.c:364 ia32_sys_call+0x3ffa/0x41f0 arch/x86/include/generated/asm/syscalls_32.h:346 do_syscall_32_irqs_on arch/x86/entry/syscall_32.c:83 [inline] __do_fast_syscall_32+0xb0/0x110 arch/x86/entry/syscall_32.c:306 do_fast_syscall_32+0x38/0x80 arch/x86/entry/syscall_32.c:331 do_SYSENTER_32+0x1f/0x30 arch/x86/entry/syscall_32.c:369 entry_SYSENTER_compat_after_hwframe+0x84/0x8e CPU: 0 UID: 0 PID: 22408 Comm: syz.4.5165 Not tainted 6.15.0-rc3-syzkaller-00019-gbc3372351d0c #0 PREEMPT(undef) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 Reported-by: syzbot+04b9a82855c8aed20860@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/68138dfa.050a0220.14dd7d.0017.GAE@google.com/ Fixes: 4754957f04f5 ("ipvs: do not use random local source address for tunnels") Signed-off-by: Julian Anastasov Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_xmit.c | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 3313bceb6cc99d..014f077403695f 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -119,13 +119,12 @@ __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu) return false; } -/* Get route to daddr, update *saddr, optionally bind route to saddr */ +/* Get route to daddr, optionally bind route to saddr */ static struct rtable *do_output_route4(struct net *net, __be32 daddr, - int rt_mode, __be32 *saddr) + int rt_mode, __be32 *ret_saddr) { struct flowi4 fl4; struct rtable *rt; - bool loop = false; memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; @@ -135,23 +134,17 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr, retry: rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) { - /* Invalid saddr ? */ - if (PTR_ERR(rt) == -EINVAL && *saddr && - rt_mode & IP_VS_RT_MODE_CONNECT && !loop) { - *saddr = 0; - flowi4_update_output(&fl4, 0, daddr, 0); - goto retry; - } IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); return NULL; - } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { + } + if (rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { ip_rt_put(rt); - *saddr = fl4.saddr; flowi4_update_output(&fl4, 0, daddr, fl4.saddr); - loop = true; + rt_mode = 0; goto retry; } - *saddr = fl4.saddr; + if (ret_saddr) + *ret_saddr = fl4.saddr; return rt; } @@ -344,19 +337,15 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, if (ret_saddr) *ret_saddr = dest_dst->dst_saddr.ip; } else { - __be32 saddr = htonl(INADDR_ANY); - noref = 0; /* For such unconfigured boxes avoid many route lookups * for performance reasons because we do not remember saddr */ rt_mode &= ~IP_VS_RT_MODE_CONNECT; - rt = do_output_route4(net, daddr, rt_mode, &saddr); + rt = do_output_route4(net, daddr, rt_mode, ret_saddr); if (!rt) goto err_unreach; - if (ret_saddr) - *ret_saddr = saddr; } local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0; From 8478a729c0462273188263136880480729e9efca Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Wed, 7 May 2025 17:01:59 +0200 Subject: [PATCH 1547/2924] netfilter: ipset: fix region locking in hash types Region locking introduced in v5.6-rc4 contained three macros to handle the region locks: ahash_bucket_start(), ahash_bucket_end() which gave back the start and end hash bucket values belonging to a given region lock and ahash_region() which should give back the region lock belonging to a given hash bucket. The latter was incorrect which can lead to a race condition between the garbage collector and adding new elements when a hash type of set is defined with timeouts. Fixes: f66ee0410b1c ("netfilter: ipset: Fix "INFO: rcu detected stall in hash_xxx" reports") Reported-by: Kota Toda Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_gen.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index cf3ce72c3de645..5251524b96afac 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -64,7 +64,7 @@ struct hbucket { #define ahash_sizeof_regions(htable_bits) \ (ahash_numof_locks(htable_bits) * sizeof(struct ip_set_region)) #define ahash_region(n, htable_bits) \ - ((n) % ahash_numof_locks(htable_bits)) + ((n) / jhash_size(HTABLE_REGION_BITS)) #define ahash_bucket_start(h, htable_bits) \ ((htable_bits) < HTABLE_REGION_BITS ? 0 \ : (h) * jhash_size(HTABLE_REGION_BITS)) From 9984db63742099ee3f3cff35cf71306d10e64356 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Mon, 14 Apr 2025 12:56:48 -0400 Subject: [PATCH 1548/2924] drm/amd/display: Fix invalid context error in dml helper [Why] "BUG: sleeping function called from invalid context" error. after: "drm/amd/display: Protect FPU in dml2_validate()/dml21_validate()" The populate_dml_plane_cfg_from_plane_state() uses the GFP_KERNEL flag for memory allocation, which shouldn't be used in atomic contexts. The allocation is needed only for using another helper function get_scaler_data_for_plane(). [How] Modify helpers to pass a pointer to scaler_data within existing context, eliminating the need for dynamic memory allocation/deallocation and copying. Fixes: 366e77cd4923 ("drm/amd/display: Protect FPU in dml2_validate()/dml21_validate()") Reviewed-by: Aurabindo Pillai Signed-off-by: Roman Li Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit bd3e84bc98f81b44f2c43936bdadc3241d654259) Cc: stable@vger.kernel.org --- .../amd/display/dc/dml2/dml2_translation_helper.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 2061d43b92e1b9..ab6baf2698012c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -973,7 +973,9 @@ static void populate_dml_surface_cfg_from_plane_state(enum dml_project_id dml2_p } } -static void get_scaler_data_for_plane(const struct dc_plane_state *in, struct dc_state *context, struct scaler_data *out) +static struct scaler_data *get_scaler_data_for_plane( + const struct dc_plane_state *in, + struct dc_state *context) { int i; struct pipe_ctx *temp_pipe = &context->res_ctx.temp_pipe; @@ -994,7 +996,7 @@ static void get_scaler_data_for_plane(const struct dc_plane_state *in, struct dc } ASSERT(i < MAX_PIPES); - memcpy(out, &temp_pipe->plane_res.scl_data, sizeof(*out)); + return &temp_pipe->plane_res.scl_data; } static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned int location, @@ -1057,11 +1059,7 @@ static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out const struct dc_plane_state *in, struct dc_state *context, const struct soc_bounding_box_st *soc) { - struct scaler_data *scaler_data = kzalloc(sizeof(*scaler_data), GFP_KERNEL); - if (!scaler_data) - return; - - get_scaler_data_for_plane(in, context, scaler_data); + struct scaler_data *scaler_data = get_scaler_data_for_plane(in, context); out->CursorBPP[location] = dml_cur_32bit; out->CursorWidth[location] = 256; @@ -1126,8 +1124,6 @@ static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out out->DynamicMetadataTransmittedBytes[location] = 0; out->NumberOfCursors[location] = 1; - - kfree(scaler_data); } static unsigned int map_stream_to_dml_display_cfg(const struct dml2_context *dml2, From f1c6be3999d2be2673a51a9be0caf9348e254e52 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Wed, 16 Apr 2025 11:26:54 -0400 Subject: [PATCH 1549/2924] drm/amd/display: more liberal vmin/vmax update for freesync [Why] FAMS2 expects vmin/vmax to be updated in the case when freesync is off, but supported. But we only update it when freesync is enabled. [How] Change the vsync handler such that dc_stream_adjust_vmin_vmax() its called irrespective of whether freesync is enabled. If freesync is supported, then there is no harm in updating vmin/vmax registers. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3546 Reviewed-by: ChiaHsuan Chung Signed-off-by: Aurabindo Pillai Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit cfb2d41831ee5647a4ae0ea7c24971a92d5dfa0d) Cc: stable@vger.kernel.org --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 536f73131c2d15..93dd9e273c6518 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -673,15 +673,21 @@ static void dm_crtc_high_irq(void *interrupt_params) spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags); if (acrtc->dm_irq_params.stream && - acrtc->dm_irq_params.vrr_params.supported && - acrtc->dm_irq_params.freesync_config.state == - VRR_STATE_ACTIVE_VARIABLE) { + acrtc->dm_irq_params.vrr_params.supported) { + bool replay_en = acrtc->dm_irq_params.stream->link->replay_settings.replay_feature_enabled; + bool psr_en = acrtc->dm_irq_params.stream->link->psr_settings.psr_feature_enabled; + bool fs_active_var_en = acrtc->dm_irq_params.freesync_config.state == VRR_STATE_ACTIVE_VARIABLE; + mod_freesync_handle_v_update(adev->dm.freesync_module, acrtc->dm_irq_params.stream, &acrtc->dm_irq_params.vrr_params); - dc_stream_adjust_vmin_vmax(adev->dm.dc, acrtc->dm_irq_params.stream, - &acrtc->dm_irq_params.vrr_params.adjust); + /* update vmin_vmax only if freesync is enabled, or only if PSR and REPLAY are disabled */ + if (fs_active_var_en || (!fs_active_var_en && !replay_en && !psr_en)) { + dc_stream_adjust_vmin_vmax(adev->dm.dc, + acrtc->dm_irq_params.stream, + &acrtc->dm_irq_params.vrr_params.adjust); + } } /* From 2a24755774ef8db33139e2d9a968cc585c6488da Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Wed, 23 Apr 2025 10:02:11 -0600 Subject: [PATCH 1550/2924] drm/amd/display: Remove unnecessary DC_FP_START/DC_FP_END [WHY & HOW] Remove the unnecessary DC_FP_START/DC_FP_END pair to reduce time in preempt_disable. It also fixes "BUG: sleeping function called from invalid context" error messages because of calling kzalloc with GFP_KERNEL which can sleep. Reviewed-by: Aurabindo Pillai Signed-off-by: Alex Hung Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 94da0735b67b3ada90a3e2e017d306f070306f1d) --- .../gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 2a59cc61ed8c91..944650cb13ded7 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -2114,8 +2114,6 @@ static bool dcn32_resource_construct( #define REG_STRUCT dccg_regs dccg_regs_init(); - DC_FP_START(); - ctx->dc_bios->regs = &bios_regs; pool->base.res_cap = &res_cap_dcn32; @@ -2501,14 +2499,10 @@ static bool dcn32_resource_construct( if (ASICREV_IS_GC_11_0_3(dc->ctx->asic_id.hw_internal_rev) && (dc->config.sdpif_request_limit_words_per_umc == 0)) dc->config.sdpif_request_limit_words_per_umc = 16; - DC_FP_END(); - return true; create_fail: - DC_FP_END(); - dcn32_resource_destruct(pool); return false; From eba692ca3abca258b3214a6e4126afefad1822f0 Mon Sep 17 00:00:00 2001 From: Austin Zheng Date: Thu, 17 Apr 2025 10:24:29 -0400 Subject: [PATCH 1551/2924] drm/amd/display: Call FP Protect Before Mode Programming/Mode Support [Why] Memory allocation occurs within dml21_validate() for adding phantom planes. May cause kernel to be tainted due to usage of FP Start. [How] Move FP start from dml21_validate to before mode programming/mode support. Calculations requiring floating point are all done within mode programming or mode support. Reviewed-by: Alvin Lee Signed-off-by: Austin Zheng Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit fe3250f10819b411808ab9ae1d824c5fc9b59170) --- drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c index 5d16f36ec95c8b..ed6584535e898e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c @@ -234,7 +234,9 @@ static bool dml21_mode_check_and_programming(const struct dc *in_dc, struct dc_s if (!result) return false; + DC_FP_START(); result = dml2_build_mode_programming(mode_programming); + DC_FP_END(); if (!result) return false; @@ -277,7 +279,9 @@ static bool dml21_check_mode_support(const struct dc *in_dc, struct dc_state *co mode_support->dml2_instance = dml_init->dml2_instance; dml21_map_dc_state_into_dml_display_cfg(in_dc, context, dml_ctx); dml_ctx->v21.mode_programming.dml2_instance->scratch.build_mode_programming_locals.mode_programming_params.programming = dml_ctx->v21.mode_programming.programming; + DC_FP_START(); is_supported = dml2_check_mode_supported(mode_support); + DC_FP_END(); if (!is_supported) return false; @@ -288,16 +292,12 @@ bool dml21_validate(const struct dc *in_dc, struct dc_state *context, struct dml { bool out = false; - DC_FP_START(); - /* Use dml_validate_only for fast_validate path */ if (fast_validate) out = dml21_check_mode_support(in_dc, context, dml_ctx); else out = dml21_mode_check_and_programming(in_dc, context, dml_ctx); - DC_FP_END(); - return out; } From 5a3846648c0523fd850b7f0aec78c0139453ab8b Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Fri, 18 Apr 2025 16:31:59 +0800 Subject: [PATCH 1552/2924] drm/amd/display: Shift DMUB AUX reply command if necessary [Why] Defined value of dmub AUX reply command field get updated but didn't adjust dm receiving side accordingly. [How] Check the received reply command value to see if it's updated version or not. Adjust it if necessary. Fixes: ead08b95fa50 ("drm/amd/display: Fix race condition in DPIA AUX transfer") Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Ray Wu Signed-off-by: Wayne Lin Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit d5c9ade755a9afa210840708a12a8f44c0d532f4) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 93dd9e273c6518..d9292da5694802 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -12758,8 +12758,11 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( goto out; } + payload->reply[0] = adev->dm.dmub_notify->aux_reply.command & 0xF; + if (adev->dm.dmub_notify->aux_reply.command & 0xF0) + /* The reply is stored in the top nibble of the command. */ + payload->reply[0] = (adev->dm.dmub_notify->aux_reply.command >> 4) & 0xF; - payload->reply[0] = adev->dm.dmub_notify->aux_reply.command; if (!payload->write && p_notify->aux_reply.length && (payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) { From bc70e11b550d37fbd9eaed0f113ba560894f1609 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Sun, 20 Apr 2025 16:29:07 +0800 Subject: [PATCH 1553/2924] drm/amd/display: Fix the checking condition in dmub aux handling [Why & How] Fix the checking condition for detecting AUX_RET_ERROR_PROTOCOL_ERROR. It was wrongly checking by "not equals to" Reviewed-by: Ray Wu Signed-off-by: Wayne Lin Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 1db6c9e9b62e1a8912f0a281c941099fca678da3) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d9292da5694802..68bbe7d5777356 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -12749,7 +12749,7 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( * Transient states before tunneling is enabled could * lead to this error. We can ignore this for now. */ - if (p_notify->result != AUX_RET_ERROR_PROTOCOL_ERROR) { + if (p_notify->result == AUX_RET_ERROR_PROTOCOL_ERROR) { DRM_WARN("DPIA AUX failed on 0x%x(%d), error %d\n", payload->address, payload->length, p_notify->result); From 396dc51b3b7ea524bf8061f478332d0039e96d5d Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Sun, 20 Apr 2025 16:56:54 +0800 Subject: [PATCH 1554/2924] drm/amd/display: Remove incorrect checking in dmub aux handler [Why & How] "Request length != reply length" is expected behavior defined in spec. It's not an invalid reply. Besides, replied data handling logic is not designed to be written in amdgpu_dm_process_dmub_aux_transfer_sync(). Remove the incorrectly handling section. Fixes: ead08b95fa50 ("drm/amd/display: Fix race condition in DPIA AUX transfer") Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Ray Wu Signed-off-by: Wayne Lin Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 81b5c6fa62af62fe89ae9576f41aae37830b94cb) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 68bbe7d5777356..d63f219de88ec2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -12764,19 +12764,9 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( payload->reply[0] = (adev->dm.dmub_notify->aux_reply.command >> 4) & 0xF; if (!payload->write && p_notify->aux_reply.length && - (payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) { - - if (payload->length != p_notify->aux_reply.length) { - DRM_WARN("invalid read length %d from DPIA AUX 0x%x(%d)!\n", - p_notify->aux_reply.length, - payload->address, payload->length); - *operation_result = AUX_RET_ERROR_INVALID_REPLY; - goto out; - } - + (payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) memcpy(payload->data, p_notify->aux_reply.data, p_notify->aux_reply.length); - } /* success */ ret = p_notify->aux_reply.length; From 3924f45d4de7250a603fd7b50379237a6a0e5adf Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Sun, 20 Apr 2025 17:50:03 +0800 Subject: [PATCH 1555/2924] drm/amd/display: Copy AUX read reply data whenever length > 0 [Why] amdgpu_dm_process_dmub_aux_transfer_sync() should return all exact data reply from the sink side. Don't do the analysis job in it. [How] Remove unnecessary check condition AUX_TRANSACTION_REPLY_AUX_ACK. Fixes: ead08b95fa50 ("drm/amd/display: Fix race condition in DPIA AUX transfer") Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Ray Wu Signed-off-by: Wayne Lin Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 9b540e3fe6796fec4fb1344f3be8952fc2f084d4) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d63f219de88ec2..64df8ca448b3b8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -12763,8 +12763,7 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( /* The reply is stored in the top nibble of the command. */ payload->reply[0] = (adev->dm.dmub_notify->aux_reply.command >> 4) & 0xF; - if (!payload->write && p_notify->aux_reply.length && - (payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) + if (!payload->write && p_notify->aux_reply.length) memcpy(payload->data, p_notify->aux_reply.data, p_notify->aux_reply.length); From 65924ec69b29296845c7f628112353438e63ea56 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Sun, 20 Apr 2025 19:22:14 +0800 Subject: [PATCH 1556/2924] drm/amd/display: Fix wrong handling for AUX_DEFER case [Why] We incorrectly ack all bytes get written when the reply actually is defer. When it's defer, means sink is not ready for the request. We should retry the request. [How] Only reply all data get written when receive I2C_ACK|AUX_ACK. Otherwise, reply the number of actual written bytes received from the sink. Add some messages to facilitate debugging as well. Fixes: ad6756b4d773 ("drm/amd/display: Shift dc link aux to aux_payload") Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Ray Wu Signed-off-by: Wayne Lin Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 3637e457eb0000bc37d8bbbec95964aad2fb29fd) Cc: stable@vger.kernel.org --- .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 28 ++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 7ceedf626d23fe..074b79fd5822e7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -51,6 +51,9 @@ #define PEAK_FACTOR_X1000 1006 +/* + * This function handles both native AUX and I2C-Over-AUX transactions. + */ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) { @@ -87,15 +90,25 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, if (adev->dm.aux_hpd_discon_quirk) { if (msg->address == DP_SIDEBAND_MSG_DOWN_REQ_BASE && operation_result == AUX_RET_ERROR_HPD_DISCON) { - result = 0; + result = msg->size; operation_result = AUX_RET_SUCCESS; } } - if (payload.write && result >= 0) - result = msg->size; + /* + * result equals to 0 includes the cases of AUX_DEFER/I2C_DEFER + */ + if (payload.write && result >= 0) { + if (result) { + /*one byte indicating partially written bytes. Force 0 to retry*/ + drm_info(adev_to_drm(adev), "amdgpu: AUX partially written\n"); + result = 0; + } else if (!payload.reply[0]) + /*I2C_ACK|AUX_ACK*/ + result = msg->size; + } - if (result < 0) + if (result < 0) { switch (operation_result) { case AUX_RET_SUCCESS: break; @@ -114,6 +127,13 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, break; } + drm_info(adev_to_drm(adev), "amdgpu: DP AUX transfer fail:%d\n", operation_result); + } + + if (payload.reply[0]) + drm_info(adev_to_drm(adev), "amdgpu: AUX reply command not ACK: 0x%02x.", + payload.reply[0]); + return result; } From b7e84fb708392b37e5dbb2a95db9b94a0e3f0aa2 Mon Sep 17 00:00:00 2001 From: Ruijing Dong Date: Fri, 2 May 2025 11:19:26 -0400 Subject: [PATCH 1557/2924] drm/amdgpu/vcn: using separate VCN1_AON_SOC offset VCN1_AON_SOC_ADDRESS_3_0 offset varies on different VCN generations, the issue in vcn4.0.5 is caused by a different VCN1_AON_SOC_ADDRESS_3_0 offset. This patch does the following: 1. use the same offset for other VCN generations. 2. use the vcn4.0.5 special offset 3. update vcn_4_0 and vcn_5_0 Acked-by: Saleemkhan Jamadar Reviewed-by: Leo Liu Signed-off-by: Ruijing Dong Signed-off-by: Alex Deucher (cherry picked from commit 5c89ceda9984498b28716944633a9a01cbb2c90d) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 1 - drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 4 +++- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 3 ++- 8 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index cdcdae7f71ce97..83adf81defc711 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -66,7 +66,6 @@ #define VCN_ENC_CMD_REG_WAIT 0x0000000c #define VCN_AON_SOC_ADDRESS_2_0 0x1f800 -#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define VCN_VID_IP_ADDRESS_2_0 0x0 #define VCN_AON_IP_ADDRESS_2_0 0x30000 diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 8e7a36f26e9cb3..b8d835c9e17eda 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -39,6 +39,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fa00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48200 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x1fd #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x503 diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index d716510b8dd686..3eec1b8feaeea4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -39,6 +39,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fa00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48200 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27 #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 22ae1939476f0a..0b19f0ab4480da 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -40,6 +40,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fa00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48200 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27 #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index c6f6392c1c20b6..1f777c125b00de 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -46,6 +46,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define VCN_HARVEST_MMSCH 0 @@ -614,7 +615,8 @@ static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst, /* VCN global tiling registers */ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( - VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); + VCN, inst_idx, regUVD_GFX10_ADDR_CONFIG), + adev->gfx.config.gb_addr_config, 0, indirect); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 3e176b4b7c69dd..012f6ea928ec66 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -45,6 +45,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_3[] = { SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index ba603b2246e2ee..a1171e6152ed2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -46,6 +46,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 (0x48300 + 0x38000) +#define VCN1_AON_SOC_ADDRESS_3_0 (0x48000 + 0x38000) #define VCN_HARVEST_MMSCH 0 diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index d99d05f42f1d93..b90da3d3e1406b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -533,7 +533,8 @@ static void vcn_v5_0_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst, /* VCN global tiling registers */ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( - VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); + VCN, inst_idx, regUVD_GFX10_ADDR_CONFIG), + adev->gfx.config.gb_addr_config, 0, indirect); return; } From d0ce1aaa8531a4a4707711cab5721374751c51b0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 1 May 2025 13:00:16 -0400 Subject: [PATCH 1558/2924] Revert "drm/amd: Stop evicting resources on APUs in suspend" This reverts commit 3a9626c816db901def438dc2513622e281186d39. This breaks S4 because we end up setting the s3/s0ix flags even when we are entering s4 since prepare is used by both flows. The causes both the S3/s0ix and s4 flags to be set which breaks several checks in the driver which assume they are mutually exclusive. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3634 Cc: Mario Limonciello Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit ce8f7d95899c2869b47ea6ce0b3e5bf304b2fff4) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 -- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 18 ------------------ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 ++--------- 3 files changed, 2 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index ef6e78224fdfa7..c3641331d4de73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1614,11 +1614,9 @@ static inline void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_cap #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND) bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev); bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev); -void amdgpu_choose_low_power_state(struct amdgpu_device *adev); #else static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; } static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; } -static inline void amdgpu_choose_low_power_state(struct amdgpu_device *adev) { } #endif void amdgpu_register_gpu_instance(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index b7f8f2ff143dd1..707e131f89d237 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -1533,22 +1533,4 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) #endif /* CONFIG_AMD_PMC */ } -/** - * amdgpu_choose_low_power_state - * - * @adev: amdgpu_device_pointer - * - * Choose the target low power state for the GPU - */ -void amdgpu_choose_low_power_state(struct amdgpu_device *adev) -{ - if (adev->in_runpm) - return; - - if (amdgpu_acpi_is_s0ix_active(adev)) - adev->in_s0ix = true; - else if (amdgpu_acpi_is_s3_active(adev)) - adev->in_s3 = true; -} - #endif /* CONFIG_SUSPEND */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 7f354cd532dc14..5ac7bd5942d014 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4949,15 +4949,13 @@ int amdgpu_device_prepare(struct drm_device *dev) struct amdgpu_device *adev = drm_to_adev(dev); int i, r; - amdgpu_choose_low_power_state(adev); - if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; /* Evict the majority of BOs before starting suspend sequence */ r = amdgpu_device_evict_resources(adev); if (r) - goto unprepare; + return r; flush_delayed_work(&adev->gfx.gfx_off_delay_work); @@ -4968,15 +4966,10 @@ int amdgpu_device_prepare(struct drm_device *dev) continue; r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]); if (r) - goto unprepare; + return r; } return 0; - -unprepare: - adev->in_s0ix = adev->in_s3 = adev->in_s4 = false; - - return r; } /** From 4aaffc85751da5722e858e4333e8cf0aa4b6c78f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 1 May 2025 13:46:46 -0400 Subject: [PATCH 1559/2924] drm/amdgpu: fix pm notifier handling Set the s3/s0ix and s4 flags in the pm notifier so that we can skip the resource evictions properly in pm prepare based on whether we are suspending or hibernating. Drop the eviction as processes are not frozen at this time, we we can end up getting stuck trying to evict VRAM while applications continue to submit work which causes the buffers to get pulled back into VRAM. v2: Move suspend flags out of pm notifier (Mario) Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4178 Fixes: 2965e6355dcd ("drm/amd: Add Suspend/Hibernate notification callback support") Cc: Mario Limonciello Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit 06f2dcc241e7e5c681f81fbc46cacdf4bfd7d6d7) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 18 +++++------------- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 10 +--------- 2 files changed, 6 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5ac7bd5942d014..f8b3e04d71eda1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4907,28 +4907,20 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) * @data: data * * This function is called when the system is about to suspend or hibernate. - * It is used to evict resources from the device before the system goes to - * sleep while there is still access to swap. + * It is used to set the appropriate flags so that eviction can be optimized + * in the pm prepare callback. */ static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode, void *data) { struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb); - int r; switch (mode) { case PM_HIBERNATION_PREPARE: adev->in_s4 = true; - fallthrough; - case PM_SUSPEND_PREPARE: - r = amdgpu_device_evict_resources(adev); - /* - * This is considered non-fatal at this time because - * amdgpu_device_prepare() will also fatally evict resources. - * See https://gitlab.freedesktop.org/drm/amd/-/issues/3781 - */ - if (r) - drm_warn(adev_to_drm(adev), "Failed to evict resources, freeze active processes if problems occur: %d\n", r); + break; + case PM_POST_HIBERNATION: + adev->in_s4 = false; break; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 24ee4710f807f5..72c807f5822e34 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2615,13 +2615,8 @@ static int amdgpu_pmops_freeze(struct device *dev) static int amdgpu_pmops_thaw(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); - struct amdgpu_device *adev = drm_to_adev(drm_dev); - int r; - - r = amdgpu_device_resume(drm_dev, true); - adev->in_s4 = false; - return r; + return amdgpu_device_resume(drm_dev, true); } static int amdgpu_pmops_poweroff(struct device *dev) @@ -2634,9 +2629,6 @@ static int amdgpu_pmops_poweroff(struct device *dev) static int amdgpu_pmops_restore(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); - struct amdgpu_device *adev = drm_to_adev(drm_dev); - - adev->in_s4 = false; return amdgpu_device_resume(drm_dev, true); } From f690e3974755a650259a45d71456decc9c96a282 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Apr 2025 12:45:04 -0400 Subject: [PATCH 1560/2924] drm/amdgpu/hdp4: use memcfg register to post the write for HDP flush Reading back the remapped HDP flush register seems to cause problems on some platforms. All we need is a read, so read back the memcfg register. Fixes: c9b8dcabb52a ("drm/amdgpu/hdp4.0: do a posting read when flushing HDP") Reported-by: Alexey Klimov Link: https://lists.freedesktop.org/archives/amd-gfx/2025-April/123150.html Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4119 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3908 Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher (cherry picked from commit 5c937b4a6050316af37ef214825b6340b5e9e391) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index f1dc13b3ab38e6..cbbeadeb53f72d 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -41,7 +41,12 @@ static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev, { if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } From 6beb6835c1fbb3f676aebb51a5fee6b77fed9308 Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Tue, 6 May 2025 16:28:54 +0200 Subject: [PATCH 1561/2924] openvswitch: Fix unsafe attribute parsing in output_userspace() This patch replaces the manual Netlink attribute iteration in output_userspace() with nla_for_each_nested(), which ensures that only well-formed attributes are processed. Fixes: ccb1352e76cf ("net: Add Open vSwitch kernel components.") Signed-off-by: Eelco Chaudron Acked-by: Ilya Maximets Acked-by: Aaron Conole Link: https://patch.msgid.link/0bd65949df61591d9171c0dc13e42cea8941da10.1746541734.git.echaudro@redhat.com Signed-off-by: Jakub Kicinski --- net/openvswitch/actions.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 61fea7baae5d5c..2f22ca59586f25 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -975,8 +975,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, upcall.cmd = OVS_PACKET_CMD_ACTION; upcall.mru = OVS_CB(skb)->mru; - for (a = nla_data(attr), rem = nla_len(attr); rem > 0; - a = nla_next(a, &rem)) { + nla_for_each_nested(a, attr, rem) { switch (nla_type(a)) { case OVS_USERSPACE_ATTR_USERDATA: upcall.userdata = a; From 4a7843cc8a41b9612becccc07715ed017770eb89 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 6 May 2025 18:56:47 +0200 Subject: [PATCH 1562/2924] net: airoha: Add missing field to ppe_mbox_data struct The official Airoha EN7581 firmware requires adding max_packet field in ppe_mbox_data struct while the unofficial one used to develop the Airoha EN7581 flowtable support does not require this field. This patch does not introduce any real backwards compatible issue since EN7581 fw is not publicly available in linux-firmware or other repositories (e.g. OpenWrt) yet and the official fw version will use this new layout. For this reason this change needs to be backported. Moreover, make explicit the padding added by the compiler introducing the rsv array in init_info struct. At the same time use u32 instead of int for init_info and set_info struct definitions in ppe_mbox_data struct. Fixes: 23290c7bc190d ("net: airoha: Introduce Airoha NPU support") Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250506-airoha-en7581-fix-ppe_mbox_data-v5-1-29cabed6864d@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_npu.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_npu.c b/drivers/net/ethernet/airoha/airoha_npu.c index 7a5710f9ccf6a4..ead0625e781f57 100644 --- a/drivers/net/ethernet/airoha/airoha_npu.c +++ b/drivers/net/ethernet/airoha/airoha_npu.c @@ -104,12 +104,14 @@ struct ppe_mbox_data { u8 xpon_hal_api; u8 wan_xsi; u8 ct_joyme4; - int ppe_type; - int wan_mode; - int wan_sel; + u8 max_packet; + u8 rsv[3]; + u32 ppe_type; + u32 wan_mode; + u32 wan_sel; } init_info; struct { - int func_id; + u32 func_id; u32 size; u32 data; } set_info; From c4327229948879814229b46aa26a750718888503 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Mon, 5 May 2025 21:58:04 +0200 Subject: [PATCH 1563/2924] bpf: Scrub packet on bpf_redirect_peer When bpf_redirect_peer is used to redirect packets to a device in another network namespace, the skb isn't scrubbed. That can lead skb information from one namespace to be "misused" in another namespace. As one example, this is causing Cilium to drop traffic when using bpf_redirect_peer to redirect packets that just went through IPsec decryption to a container namespace. The following pwru trace shows (1) the packet path from the host's XFRM layer to the container's XFRM layer where it's dropped and (2) the number of active skb extensions at each function. NETNS MARK IFACE TUPLE FUNC 4026533547 d00 eth0 10.244.3.124:35473->10.244.2.158:53 xfrm_rcv_cb .active_extensions = (__u8)2, 4026533547 d00 eth0 10.244.3.124:35473->10.244.2.158:53 xfrm4_rcv_cb .active_extensions = (__u8)2, 4026533547 d00 eth0 10.244.3.124:35473->10.244.2.158:53 gro_cells_receive .active_extensions = (__u8)2, [...] 4026533547 0 eth0 10.244.3.124:35473->10.244.2.158:53 skb_do_redirect .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 ip_rcv .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 ip_rcv_core .active_extensions = (__u8)2, [...] 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 udp_queue_rcv_one_skb .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 __xfrm_policy_check .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 __xfrm_decode_session .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 security_xfrm_decode_session .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 kfree_skb_reason(SKB_DROP_REASON_XFRM_POLICY) .active_extensions = (__u8)2, In this case, there are no XFRM policies in the container's network namespace so the drop is unexpected. When we decrypt the IPsec packet, the XFRM state used for decryption is set in the skb extensions. This information is preserved across the netns switch. When we reach the XFRM policy check in the container's netns, __xfrm_policy_check drops the packet with LINUX_MIB_XFRMINNOPOLS because a (container-side) XFRM policy can't be found that matches the (host-side) XFRM state used for decryption. This patch fixes this by scrubbing the packet when using bpf_redirect_peer, as is done on typical netns switches via veth devices except skb->mark and skb->tstamp are not zeroed. Fixes: 9aa1206e8f482 ("bpf: Add redirect_peer helper") Signed-off-by: Paul Chaignon Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://patch.msgid.link/1728ead5e0fe45e7a6542c36bd4e3ca07a73b7d6.1746460653.git.paul.chaignon@gmail.com Signed-off-by: Jakub Kicinski --- net/core/filter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/filter.c b/net/core/filter.c index 79cab4d78dc3c6..577a4504e26fa0 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2509,6 +2509,7 @@ int skb_do_redirect(struct sk_buff *skb) goto out_drop; skb->dev = dev; dev_sw_netstats_rx_add(dev, skb->len); + skb_scrub_packet(skb, false); return -EAGAIN; } return flags & BPF_F_NEIGH ? From f5c79ffdc250bc8c90fd4fdf1e5d7ac4647912d5 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Mon, 5 May 2025 21:58:39 +0200 Subject: [PATCH 1564/2924] bpf: Clarify handling of mark and tstamp by redirect_peer When switching network namespaces with the bpf_redirect_peer helper, the skb->mark and skb->tstamp fields are not zeroed out like they can be on a typical netns switch. This patch clarifies that in the helper description. Signed-off-by: Paul Chaignon Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://patch.msgid.link/ccc86af26d43c5c0b776bcba2601b7479c0d46d0.1746460653.git.paul.chaignon@gmail.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/bpf.h | 3 +++ tools/include/uapi/linux/bpf.h | 3 +++ 2 files changed, 6 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 28705ae677849f..fd404729b1154b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4968,6 +4968,9 @@ union bpf_attr { * the netns switch takes place from ingress to ingress without * going through the CPU's backlog queue. * + * *skb*\ **->mark** and *skb*\ **->tstamp** are not cleared during + * the netns switch. + * * The *flags* argument is reserved and must be 0. The helper is * currently only supported for tc BPF program types at the * ingress hook and for veth and netkit target device types. The diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 28705ae677849f..fd404729b1154b 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4968,6 +4968,9 @@ union bpf_attr { * the netns switch takes place from ingress to ingress without * going through the CPU's backlog queue. * + * *skb*\ **->mark** and *skb*\ **->tstamp** are not cleared during + * the netns switch. + * * The *flags* argument is reserved and must be 0. The helper is * currently only supported for tc BPF program types at the * ingress hook and for veth and netkit target device types. The From e5641daa0ea1932e2b0fa7f27843e712244f6538 Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 6 May 2025 16:35:44 +0530 Subject: [PATCH 1565/2924] net: ti: icssg-prueth: Set XDP feature flags for ndev xdp_features demonstrates what all XDP capabilities are supported on a given network device. The driver needs to set these xdp_features flag to let the network stack know what XDP features a given driver is supporting. These flags need to be set for a given ndev irrespective of any XDP program being loaded or not. Fixes: 62aa3246f462 ("net: ti: icssg-prueth: Add XDP support") Signed-off-by: Meghana Malladi Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250506110546.4065715-2-m-malladi@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 443f90fa65575a..ee35fecf61e7c6 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -1109,11 +1109,6 @@ static int emac_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frame static int emac_xdp_setup(struct prueth_emac *emac, struct netdev_bpf *bpf) { struct bpf_prog *prog = bpf->prog; - xdp_features_t val; - - val = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | - NETDEV_XDP_ACT_NDO_XMIT; - xdp_set_features_flag(emac->ndev, val); if (!emac->xdpi.prog && !prog) return 0; @@ -1291,6 +1286,10 @@ static int prueth_netdev_init(struct prueth *prueth, ndev->hw_features = NETIF_F_SG; ndev->features = ndev->hw_features | NETIF_F_HW_VLAN_CTAG_FILTER; ndev->hw_features |= NETIF_PRUETH_HSR_OFFLOAD_FEATURES; + xdp_set_features_flag(ndev, + NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT); netif_napi_add(ndev, &emac->napi_rx, icssg_napi_rx_poll); hrtimer_setup(&emac->rx_hrtimer, &emac_rx_timer_callback, CLOCK_MONOTONIC, From 8b3fae3e2376b70b7a76005263bc34d034b9c7bf Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 6 May 2025 16:35:45 +0530 Subject: [PATCH 1566/2924] net: ti: icssg-prueth: Fix kernel panic during concurrent Tx queue access Add __netif_tx_lock() to ensure that only one packet is being transmitted at a time to avoid race conditions in the netif_txq struct and prevent packet data corruption. Failing to do so causes kernel panic with the following error: [ 2184.746764] ------------[ cut here ]------------ [ 2184.751412] kernel BUG at lib/dynamic_queue_limits.c:99! [ 2184.756728] Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP logs: https://gist.github.com/MeghanaMalladiTI/9c7aa5fc3b7fb03f87c74aad487956e9 The lock is acquired before calling emac_xmit_xdp_frame() and released after the call returns. This ensures that the TX queue is protected from concurrent access during the transmission of XDP frames. Fixes: 62aa3246f462 ("net: ti: icssg-prueth: Add XDP support") Signed-off-by: Meghana Malladi Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250506110546.4065715-3-m-malladi@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_common.c | 7 ++++++- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index b4be76e13a2fb7..38a80e004d1cf2 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -650,6 +650,8 @@ static u32 emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp, struct page *page, u32 *len) { struct net_device *ndev = emac->ndev; + struct netdev_queue *netif_txq; + int cpu = smp_processor_id(); struct bpf_prog *xdp_prog; struct xdp_frame *xdpf; u32 pkt_len = *len; @@ -669,8 +671,11 @@ static u32 emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp, goto drop; } - q_idx = smp_processor_id() % emac->tx_ch_num; + q_idx = cpu % emac->tx_ch_num; + netif_txq = netdev_get_tx_queue(ndev, q_idx); + __netif_tx_lock(netif_txq, cpu); result = emac_xmit_xdp_frame(emac, xdpf, page, q_idx); + __netif_tx_unlock(netif_txq); if (result == ICSSG_XDP_CONSUMED) { ndev->stats.tx_dropped++; goto drop; diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index ee35fecf61e7c6..86fc1278127c74 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -1075,17 +1075,21 @@ static int emac_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frame { struct prueth_emac *emac = netdev_priv(dev); struct net_device *ndev = emac->ndev; + struct netdev_queue *netif_txq; + int cpu = smp_processor_id(); struct xdp_frame *xdpf; unsigned int q_idx; int nxmit = 0; u32 err; int i; - q_idx = smp_processor_id() % emac->tx_ch_num; + q_idx = cpu % emac->tx_ch_num; + netif_txq = netdev_get_tx_queue(ndev, q_idx); if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) return -EINVAL; + __netif_tx_lock(netif_txq, cpu); for (i = 0; i < n; i++) { xdpf = frames[i]; err = emac_xmit_xdp_frame(emac, xdpf, NULL, q_idx); @@ -1095,6 +1099,7 @@ static int emac_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frame } nxmit++; } + __netif_tx_unlock(netif_txq); return nxmit; } From 1884fc85ae6ed0652fa324c66b1d89e25174e73b Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 6 May 2025 16:35:46 +0530 Subject: [PATCH 1567/2924] net: ti: icssg-prueth: Report BQL before sending XDP packets When sending out any kind of traffic, it is essential that the driver keeps reporting BQL of the number of bytes that have been sent so that BQL can track the amount of data in the queue and prevents it from overflowing. If BQL is not reported, the driver may continue sending packets even when the queue is full, leading to packet loss, congestion and decreased network performance. Currently this is missing in emac_xmit_xdp_frame() and this patch fixes it. Fixes: 62aa3246f462 ("net: ti: icssg-prueth: Add XDP support") Signed-off-by: Meghana Malladi Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250506110546.4065715-4-m-malladi@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_common.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index 38a80e004d1cf2..d88a0180294e0f 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -187,7 +187,6 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn, xdp_return_frame(xdpf); break; default: - netdev_err(ndev, "tx_complete: invalid swdata type %d\n", swdata->type); prueth_xmit_free(tx_chn, desc_tx); ndev->stats.tx_dropped++; continue; @@ -567,6 +566,7 @@ u32 emac_xmit_xdp_frame(struct prueth_emac *emac, { struct cppi5_host_desc_t *first_desc; struct net_device *ndev = emac->ndev; + struct netdev_queue *netif_txq; struct prueth_tx_chn *tx_chn; dma_addr_t desc_dma, buf_dma; struct prueth_swdata *swdata; @@ -620,12 +620,17 @@ u32 emac_xmit_xdp_frame(struct prueth_emac *emac, swdata->data.xdpf = xdpf; } + /* Report BQL before sending the packet */ + netif_txq = netdev_get_tx_queue(ndev, tx_chn->id); + netdev_tx_sent_queue(netif_txq, xdpf->len); + cppi5_hdesc_set_pktlen(first_desc, xdpf->len); desc_dma = k3_cppi_desc_pool_virt2dma(tx_chn->desc_pool, first_desc); ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, first_desc, desc_dma); if (ret) { netdev_err(ndev, "xdp tx: push failed: %d\n", ret); + netdev_tx_completed_queue(netif_txq, 1, xdpf->len); goto drop_free_descs; } @@ -984,6 +989,7 @@ enum netdev_tx icssg_ndo_start_xmit(struct sk_buff *skb, struct net_device *ndev ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, first_desc, desc_dma); if (ret) { netdev_err(ndev, "tx: push failed: %d\n", ret); + netdev_tx_completed_queue(netif_txq, 1, pkt_len); goto drop_free_descs; } From e979a7c79fbc706f6dac913af379ef4caa04d3d5 Mon Sep 17 00:00:00 2001 From: Aaron Kling Date: Tue, 6 May 2025 13:36:59 -0500 Subject: [PATCH 1568/2924] spi: tegra114: Use value to check for invalid delays A delay unit of 0 is a valid entry, thus it is not valid to check for unused delays. Instead, check the value field; if that is zero, the given delay is unset. Fixes: 4426e6b4ecf6 ("spi: tegra114: Don't fail set_cs_timing when delays are zero") Cc: stable@vger.kernel.org Signed-off-by: Aaron Kling Reviewed-by: Jon Hunter Link: https://patch.msgid.link/20250506-spi-tegra114-fixup-v1-1-136dc2f732f3@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-tegra114.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c index 2a8bb798e95b95..795a8482c2c700 100644 --- a/drivers/spi/spi-tegra114.c +++ b/drivers/spi/spi-tegra114.c @@ -728,9 +728,9 @@ static int tegra_spi_set_hw_cs_timing(struct spi_device *spi) u32 inactive_cycles; u8 cs_state; - if ((setup->unit && setup->unit != SPI_DELAY_UNIT_SCK) || - (hold->unit && hold->unit != SPI_DELAY_UNIT_SCK) || - (inactive->unit && inactive->unit != SPI_DELAY_UNIT_SCK)) { + if ((setup->value && setup->unit != SPI_DELAY_UNIT_SCK) || + (hold->value && hold->unit != SPI_DELAY_UNIT_SCK) || + (inactive->value && inactive->unit != SPI_DELAY_UNIT_SCK)) { dev_err(&spi->dev, "Invalid delay unit %d, should be SPI_DELAY_UNIT_SCK\n", SPI_DELAY_UNIT_SCK); From 5f93185a757ff38b36f849c659aeef368db15a68 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:00 +0200 Subject: [PATCH 1569/2924] net: dsa: b53: allow leaky reserved multicast Allow reserved multicast to ignore VLAN membership so STP and other management protocols work without a PVID VLAN configured when using a vlan aware bridge. Fixes: 967dd82ffc52 ("net: dsa: b53: Add support for Broadcom RoboSwitch") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-2-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index e5ba718979064b..62866165ad03c3 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -373,9 +373,11 @@ static void b53_enable_vlan(struct b53_device *dev, int port, bool enable, b53_read8(dev, B53_VLAN_PAGE, B53_VLAN_CTRL5, &vc5); } + vc1 &= ~VC1_RX_MCST_FWD_EN; + if (enable) { vc0 |= VC0_VLAN_EN | VC0_VID_CHK_EN | VC0_VID_HASH_VID; - vc1 |= VC1_RX_MCST_UNTAG_EN | VC1_RX_MCST_FWD_EN; + vc1 |= VC1_RX_MCST_UNTAG_EN; vc4 &= ~VC4_ING_VID_CHECK_MASK; if (enable_filtering) { vc4 |= VC4_ING_VID_VIO_DROP << VC4_ING_VID_CHECK_S; @@ -393,7 +395,7 @@ static void b53_enable_vlan(struct b53_device *dev, int port, bool enable, } else { vc0 &= ~(VC0_VLAN_EN | VC0_VID_CHK_EN | VC0_VID_HASH_VID); - vc1 &= ~(VC1_RX_MCST_UNTAG_EN | VC1_RX_MCST_FWD_EN); + vc1 &= ~VC1_RX_MCST_UNTAG_EN; vc4 &= ~VC4_ING_VID_CHECK_MASK; vc5 &= ~VC5_DROP_VTABLE_MISS; From 425f11d4cc9bd9e97e6825d9abb2c51a068ca7b5 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:01 +0200 Subject: [PATCH 1570/2924] net: dsa: b53: keep CPU port always tagged again The Broadcom management header does not carry the original VLAN tag state information, just the ingress port, so for untagged frames we do not know from which VLAN they originated. Therefore keep the CPU port always tagged except for VLAN 0. Fixes the following setup: $ ip link add br0 type bridge vlan_filtering 1 $ ip link set sw1p1 master br0 $ bridge vlan add dev br0 pvid untagged self $ ip link add sw1p2.10 link sw1p2 type vlan id 10 Where VID 10 would stay untagged on the CPU port. Fixes: 2c32a3d3c233 ("net: dsa: b53: Do not force CPU to be always tagged") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-3-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 62866165ad03c3..9d4fb54b4ced6b 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1135,6 +1135,11 @@ static int b53_setup(struct dsa_switch *ds) */ ds->untag_bridge_pvid = dev->tag_protocol == DSA_TAG_PROTO_NONE; + /* The switch does not tell us the original VLAN for untagged + * packets, so keep the CPU port always tagged. + */ + ds->untag_vlan_aware_bridge_pvid = true; + ret = b53_reset_switch(dev); if (ret) { dev_err(ds->dev, "failed to reset switch\n"); @@ -1545,6 +1550,9 @@ int b53_vlan_add(struct dsa_switch *ds, int port, if (vlan->vid == 0 && vlan->vid == b53_default_pvid(dev)) untagged = true; + if (vlan->vid > 0 && dsa_is_cpu_port(ds, port)) + untagged = false; + vl->members |= BIT(port); if (untagged && !b53_vlan_port_needs_forced_tagged(ds, port)) vl->untag |= BIT(port); From f480851981043d9bb6447ca9883ade9247b9a0ad Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:02 +0200 Subject: [PATCH 1571/2924] net: dsa: b53: fix clearing PVID of a port Currently the PVID of ports are only set when adding/updating VLANs with PVID set or removing VLANs, but not when clearing the PVID flag of a VLAN. E.g. the following flow $ ip link add br0 type bridge vlan_filtering 1 $ ip link set sw1p1 master bridge $ bridge vlan add dev sw1p1 vid 10 pvid untagged $ bridge vlan add dev sw1p1 vid 10 untagged Would keep the PVID set as 10, despite the flag being cleared. Fix this by checking if we need to unset the PVID on vlan updates. Fixes: a2482d2ce349 ("net: dsa: b53: Plug in VLAN support") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-4-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 9d4fb54b4ced6b..65d74c455c573a 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1537,12 +1537,21 @@ int b53_vlan_add(struct dsa_switch *ds, int port, bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; struct b53_vlan *vl; + u16 old_pvid, new_pvid; int err; err = b53_vlan_prepare(ds, port, vlan); if (err) return err; + b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &old_pvid); + if (pvid) + new_pvid = vlan->vid; + else if (!pvid && vlan->vid == old_pvid) + new_pvid = b53_default_pvid(dev); + else + new_pvid = old_pvid; + vl = &dev->vlans[vlan->vid]; b53_get_vlan_entry(dev, vlan->vid, vl); @@ -1562,9 +1571,9 @@ int b53_vlan_add(struct dsa_switch *ds, int port, b53_set_vlan_entry(dev, vlan->vid, vl); b53_fast_age_vlan(dev, vlan->vid); - if (pvid && !dsa_is_cpu_port(ds, port)) { + if (!dsa_is_cpu_port(ds, port) && new_pvid != old_pvid) { b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), - vlan->vid); + new_pvid); b53_fast_age_vlan(dev, vlan->vid); } From 083c6b28c0cbcd83b6af1a10f2c82937129b3438 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:03 +0200 Subject: [PATCH 1572/2924] net: dsa: b53: fix flushing old pvid VLAN on pvid change Presumably the intention here was to flush the VLAN of the old pvid, not the added VLAN again, which we already flushed before. Fixes: a2482d2ce349 ("net: dsa: b53: Plug in VLAN support") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-5-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 65d74c455c573a..c67c0b5fbc1bec 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1574,7 +1574,7 @@ int b53_vlan_add(struct dsa_switch *ds, int port, if (!dsa_is_cpu_port(ds, port) && new_pvid != old_pvid) { b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), new_pvid); - b53_fast_age_vlan(dev, vlan->vid); + b53_fast_age_vlan(dev, old_pvid); } return 0; From a1c1901c5cc881425cc45992ab6c5418174e9e5a Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:04 +0200 Subject: [PATCH 1573/2924] net: dsa: b53: fix VLAN ID for untagged vlan on bridge leave The untagged default VLAN is added to the default vlan, which may be one, but we modify the VLAN 0 entry on bridge leave. Fix this to use the correct VLAN entry for the default pvid. Fixes: fea83353177a ("net: dsa: b53: Fix default VLAN ID") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-6-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index c67c0b5fbc1bec..c60b552b945cee 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1986,7 +1986,7 @@ EXPORT_SYMBOL(b53_br_join); void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) { struct b53_device *dev = ds->priv; - struct b53_vlan *vl = &dev->vlans[0]; + struct b53_vlan *vl; s8 cpu_port = dsa_to_port(ds, port)->cpu_dp->index; unsigned int i; u16 pvlan, reg, pvid; @@ -2012,6 +2012,7 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) dev->ports[port].vlan_ctl_mask = pvlan; pvid = b53_default_pvid(dev); + vl = &dev->vlans[pvid]; /* Make this port join all VLANs without VLAN entries */ if (is58xx(dev)) { From 13b152ae40495966501697693f048f47430c50fd Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:05 +0200 Subject: [PATCH 1574/2924] net: dsa: b53: always rejoin default untagged VLAN on bridge leave While JOIN_ALL_VLAN allows to join all VLANs, we still need to keep the default VLAN enabled so that untagged traffic stays untagged. So rejoin the default VLAN even for switches with JOIN_ALL_VLAN support. Fixes: 48aea33a77ab ("net: dsa: b53: Add JOIN_ALL_VLAN support") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-7-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index c60b552b945cee..4871e117f5ef05 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2021,12 +2021,12 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) if (!(reg & BIT(cpu_port))) reg |= BIT(cpu_port); b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg); - } else { - b53_get_vlan_entry(dev, pvid, vl); - vl->members |= BIT(port) | BIT(cpu_port); - vl->untag |= BIT(port) | BIT(cpu_port); - b53_set_vlan_entry(dev, pvid, vl); } + + b53_get_vlan_entry(dev, pvid, vl); + vl->members |= BIT(port) | BIT(cpu_port); + vl->untag |= BIT(port) | BIT(cpu_port); + b53_set_vlan_entry(dev, pvid, vl); } EXPORT_SYMBOL(b53_br_leave); From 45e9d59d39503bb3e6ab4d258caea4ba6496e2dc Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:06 +0200 Subject: [PATCH 1575/2924] net: dsa: b53: do not allow to configure VLAN 0 Since we cannot set forwarding destinations per VLAN, we should not have a VLAN 0 configured, as it would allow untagged traffic to work across ports on VLAN aware bridges regardless if a PVID untagged VLAN exists. So remove the VLAN 0 on join, an re-add it on leave. But only do so if we have a VLAN aware bridge, as without it, untagged traffic would become tagged with VID 0 on a VLAN unaware bridge. Fixes: a2482d2ce349 ("net: dsa: b53: Plug in VLAN support") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-8-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 36 ++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 4871e117f5ef05..0b28791cca526b 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1544,6 +1544,9 @@ int b53_vlan_add(struct dsa_switch *ds, int port, if (err) return err; + if (vlan->vid == 0) + return 0; + b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &old_pvid); if (pvid) new_pvid = vlan->vid; @@ -1556,10 +1559,7 @@ int b53_vlan_add(struct dsa_switch *ds, int port, b53_get_vlan_entry(dev, vlan->vid, vl); - if (vlan->vid == 0 && vlan->vid == b53_default_pvid(dev)) - untagged = true; - - if (vlan->vid > 0 && dsa_is_cpu_port(ds, port)) + if (dsa_is_cpu_port(ds, port)) untagged = false; vl->members |= BIT(port); @@ -1589,6 +1589,9 @@ int b53_vlan_del(struct dsa_switch *ds, int port, struct b53_vlan *vl; u16 pvid; + if (vlan->vid == 0) + return 0; + b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &pvid); vl = &dev->vlans[vlan->vid]; @@ -1935,8 +1938,9 @@ int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, bool *tx_fwd_offload, struct netlink_ext_ack *extack) { struct b53_device *dev = ds->priv; + struct b53_vlan *vl; s8 cpu_port = dsa_to_port(ds, port)->cpu_dp->index; - u16 pvlan, reg; + u16 pvlan, reg, pvid; unsigned int i; /* On 7278, port 7 which connects to the ASP should only receive @@ -1945,6 +1949,9 @@ int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, if (dev->chip_id == BCM7278_DEVICE_ID && port == 7) return -EINVAL; + pvid = b53_default_pvid(dev); + vl = &dev->vlans[pvid]; + /* Make this port leave the all VLANs join since we will have proper * VLAN entries from now on */ @@ -1956,6 +1963,15 @@ int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg); } + if (ds->vlan_filtering) { + b53_get_vlan_entry(dev, pvid, vl); + vl->members &= ~BIT(port); + if (vl->members == BIT(cpu_port)) + vl->members &= ~BIT(cpu_port); + vl->untag = vl->members; + b53_set_vlan_entry(dev, pvid, vl); + } + b53_read16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(port), &pvlan); b53_for_each_port(dev, i) { @@ -2023,10 +2039,12 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg); } - b53_get_vlan_entry(dev, pvid, vl); - vl->members |= BIT(port) | BIT(cpu_port); - vl->untag |= BIT(port) | BIT(cpu_port); - b53_set_vlan_entry(dev, pvid, vl); + if (ds->vlan_filtering) { + b53_get_vlan_entry(dev, pvid, vl); + vl->members |= BIT(port) | BIT(cpu_port); + vl->untag |= BIT(port) | BIT(cpu_port); + b53_set_vlan_entry(dev, pvid, vl); + } } EXPORT_SYMBOL(b53_br_leave); From f089652b6b16452535dcc5cbaa6e2bb05acd3f93 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:07 +0200 Subject: [PATCH 1576/2924] net: dsa: b53: do not program vlans when vlan filtering is off Documentation/networking/switchdev.rst says: - with VLAN filtering turned off: the bridge is strictly VLAN unaware and its data path will process all Ethernet frames as if they are VLAN-untagged. The bridge VLAN database can still be modified, but the modifications should have no effect while VLAN filtering is turned off. This breaks if we immediately apply the VLAN configuration, so skip writing it when vlan_filtering is off. Fixes: 0ee2af4ebbe3 ("net: dsa: set configure_vlan_while_not_filtering to true by default") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-9-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 48 +++++++++++++++++++------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 0b28791cca526b..ee2f1be626184c 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1547,6 +1547,9 @@ int b53_vlan_add(struct dsa_switch *ds, int port, if (vlan->vid == 0) return 0; + if (!ds->vlan_filtering) + return 0; + b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &old_pvid); if (pvid) new_pvid = vlan->vid; @@ -1592,6 +1595,9 @@ int b53_vlan_del(struct dsa_switch *ds, int port, if (vlan->vid == 0) return 0; + if (!ds->vlan_filtering) + return 0; + b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &pvid); vl = &dev->vlans[vlan->vid]; @@ -1952,18 +1958,20 @@ int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, pvid = b53_default_pvid(dev); vl = &dev->vlans[pvid]; - /* Make this port leave the all VLANs join since we will have proper - * VLAN entries from now on - */ - if (is58xx(dev)) { - b53_read16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, ®); - reg &= ~BIT(port); - if ((reg & BIT(cpu_port)) == BIT(cpu_port)) - reg &= ~BIT(cpu_port); - b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg); - } - if (ds->vlan_filtering) { + /* Make this port leave the all VLANs join since we will have + * proper VLAN entries from now on + */ + if (is58xx(dev)) { + b53_read16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, + ®); + reg &= ~BIT(port); + if ((reg & BIT(cpu_port)) == BIT(cpu_port)) + reg &= ~BIT(cpu_port); + b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, + reg); + } + b53_get_vlan_entry(dev, pvid, vl); vl->members &= ~BIT(port); if (vl->members == BIT(cpu_port)) @@ -2030,16 +2038,16 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) pvid = b53_default_pvid(dev); vl = &dev->vlans[pvid]; - /* Make this port join all VLANs without VLAN entries */ - if (is58xx(dev)) { - b53_read16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, ®); - reg |= BIT(port); - if (!(reg & BIT(cpu_port))) - reg |= BIT(cpu_port); - b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg); - } - if (ds->vlan_filtering) { + /* Make this port join all VLANs without VLAN entries */ + if (is58xx(dev)) { + b53_read16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, ®); + reg |= BIT(port); + if (!(reg & BIT(cpu_port))) + reg |= BIT(cpu_port); + b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg); + } + b53_get_vlan_entry(dev, pvid, vl); vl->members |= BIT(port) | BIT(cpu_port); vl->untag |= BIT(port) | BIT(cpu_port); From 2dc2bd57111582895e10f54ea380329c89873f1c Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:08 +0200 Subject: [PATCH 1577/2924] net: dsa: b53: fix toggling vlan_filtering To allow runtime switching between vlan aware and vlan non-aware mode, we need to properly keep track of any bridge VLAN configuration. Likewise, we need to know when we actually switch between both modes, to not have to rewrite the full VLAN table every time we update the VLANs. So keep track of the current vlan_filtering mode, and on changes, apply the appropriate VLAN configuration. Fixes: 0ee2af4ebbe3 ("net: dsa: set configure_vlan_while_not_filtering to true by default") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-10-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 104 ++++++++++++++++++++++--------- drivers/net/dsa/b53/b53_priv.h | 2 + 2 files changed, 75 insertions(+), 31 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index ee2f1be626184c..0a7749b416f777 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -763,6 +763,22 @@ static bool b53_vlan_port_needs_forced_tagged(struct dsa_switch *ds, int port) return dev->tag_protocol == DSA_TAG_PROTO_NONE && dsa_is_cpu_port(ds, port); } +static bool b53_vlan_port_may_join_untagged(struct dsa_switch *ds, int port) +{ + struct b53_device *dev = ds->priv; + struct dsa_port *dp; + + if (!dev->vlan_filtering) + return true; + + dp = dsa_to_port(ds, port); + + if (dsa_port_is_cpu(dp)) + return true; + + return dp->bridge == NULL; +} + int b53_configure_vlan(struct dsa_switch *ds) { struct b53_device *dev = ds->priv; @@ -781,7 +797,7 @@ int b53_configure_vlan(struct dsa_switch *ds) b53_do_vlan_op(dev, VTA_CMD_CLEAR); } - b53_enable_vlan(dev, -1, dev->vlan_enabled, ds->vlan_filtering); + b53_enable_vlan(dev, -1, dev->vlan_enabled, dev->vlan_filtering); /* Create an untagged VLAN entry for the default PVID in case * CONFIG_VLAN_8021Q is disabled and there are no calls to @@ -789,26 +805,39 @@ int b53_configure_vlan(struct dsa_switch *ds) * entry. Do this only when the tagging protocol is not * DSA_TAG_PROTO_NONE */ + v = &dev->vlans[def_vid]; b53_for_each_port(dev, i) { - v = &dev->vlans[def_vid]; - v->members |= BIT(i); + if (!b53_vlan_port_may_join_untagged(ds, i)) + continue; + + vl.members |= BIT(i); if (!b53_vlan_port_needs_forced_tagged(ds, i)) - v->untag = v->members; - b53_write16(dev, B53_VLAN_PAGE, - B53_VLAN_PORT_DEF_TAG(i), def_vid); + vl.untag = vl.members; + b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(i), + def_vid); } + b53_set_vlan_entry(dev, def_vid, &vl); - /* Upon initial call we have not set-up any VLANs, but upon - * system resume, we need to restore all VLAN entries. - */ - for (vid = def_vid; vid < dev->num_vlans; vid++) { - v = &dev->vlans[vid]; + if (dev->vlan_filtering) { + /* Upon initial call we have not set-up any VLANs, but upon + * system resume, we need to restore all VLAN entries. + */ + for (vid = def_vid + 1; vid < dev->num_vlans; vid++) { + v = &dev->vlans[vid]; - if (!v->members) - continue; + if (!v->members) + continue; + + b53_set_vlan_entry(dev, vid, v); + b53_fast_age_vlan(dev, vid); + } - b53_set_vlan_entry(dev, vid, v); - b53_fast_age_vlan(dev, vid); + b53_for_each_port(dev, i) { + if (!dsa_is_cpu_port(ds, i)) + b53_write16(dev, B53_VLAN_PAGE, + B53_VLAN_PORT_DEF_TAG(i), + dev->ports[i].pvid); + } } return 0; @@ -1127,7 +1156,9 @@ EXPORT_SYMBOL(b53_setup_devlink_resources); static int b53_setup(struct dsa_switch *ds) { struct b53_device *dev = ds->priv; + struct b53_vlan *vl; unsigned int port; + u16 pvid; int ret; /* Request bridge PVID untagged when DSA_TAG_PROTO_NONE is set @@ -1146,6 +1177,15 @@ static int b53_setup(struct dsa_switch *ds) return ret; } + /* setup default vlan for filtering mode */ + pvid = b53_default_pvid(dev); + vl = &dev->vlans[pvid]; + b53_for_each_port(dev, port) { + vl->members |= BIT(port); + if (!b53_vlan_port_needs_forced_tagged(ds, port)) + vl->untag |= BIT(port); + } + b53_reset_mib(dev); ret = b53_apply_config(dev); @@ -1499,7 +1539,10 @@ int b53_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering, { struct b53_device *dev = ds->priv; - b53_enable_vlan(dev, port, dev->vlan_enabled, vlan_filtering); + if (dev->vlan_filtering != vlan_filtering) { + dev->vlan_filtering = vlan_filtering; + b53_apply_config(dev); + } return 0; } @@ -1524,7 +1567,7 @@ static int b53_vlan_prepare(struct dsa_switch *ds, int port, if (vlan->vid >= dev->num_vlans) return -ERANGE; - b53_enable_vlan(dev, port, true, ds->vlan_filtering); + b53_enable_vlan(dev, port, true, dev->vlan_filtering); return 0; } @@ -1547,21 +1590,17 @@ int b53_vlan_add(struct dsa_switch *ds, int port, if (vlan->vid == 0) return 0; - if (!ds->vlan_filtering) - return 0; - - b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &old_pvid); + old_pvid = dev->ports[port].pvid; if (pvid) new_pvid = vlan->vid; else if (!pvid && vlan->vid == old_pvid) new_pvid = b53_default_pvid(dev); else new_pvid = old_pvid; + dev->ports[port].pvid = new_pvid; vl = &dev->vlans[vlan->vid]; - b53_get_vlan_entry(dev, vlan->vid, vl); - if (dsa_is_cpu_port(ds, port)) untagged = false; @@ -1571,6 +1610,9 @@ int b53_vlan_add(struct dsa_switch *ds, int port, else vl->untag &= ~BIT(port); + if (!dev->vlan_filtering) + return 0; + b53_set_vlan_entry(dev, vlan->vid, vl); b53_fast_age_vlan(dev, vlan->vid); @@ -1595,23 +1637,22 @@ int b53_vlan_del(struct dsa_switch *ds, int port, if (vlan->vid == 0) return 0; - if (!ds->vlan_filtering) - return 0; - - b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &pvid); + pvid = dev->ports[port].pvid; vl = &dev->vlans[vlan->vid]; - b53_get_vlan_entry(dev, vlan->vid, vl); - vl->members &= ~BIT(port); if (pvid == vlan->vid) pvid = b53_default_pvid(dev); + dev->ports[port].pvid = pvid; if (untagged && !b53_vlan_port_needs_forced_tagged(ds, port)) vl->untag &= ~(BIT(port)); + if (!dev->vlan_filtering) + return 0; + b53_set_vlan_entry(dev, vlan->vid, vl); b53_fast_age_vlan(dev, vlan->vid); @@ -1958,7 +1999,7 @@ int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, pvid = b53_default_pvid(dev); vl = &dev->vlans[pvid]; - if (ds->vlan_filtering) { + if (dev->vlan_filtering) { /* Make this port leave the all VLANs join since we will have * proper VLAN entries from now on */ @@ -2038,7 +2079,7 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) pvid = b53_default_pvid(dev); vl = &dev->vlans[pvid]; - if (ds->vlan_filtering) { + if (dev->vlan_filtering) { /* Make this port join all VLANs without VLAN entries */ if (is58xx(dev)) { b53_read16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, ®); @@ -2803,6 +2844,7 @@ struct b53_device *b53_switch_alloc(struct device *base, ds->ops = &b53_switch_ops; ds->phylink_mac_ops = &b53_phylink_mac_ops; dev->vlan_enabled = true; + dev->vlan_filtering = false; /* Let DSA handle the case were multiple bridges span the same switch * device and different VLAN awareness settings are requested, which * would be breaking filtering semantics for any of the other bridge diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 0166c37a13a7fc..4636e27fd1ee91 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -96,6 +96,7 @@ struct b53_pcs { struct b53_port { u16 vlan_ctl_mask; + u16 pvid; struct ethtool_keee eee; }; @@ -147,6 +148,7 @@ struct b53_device { unsigned int num_vlans; struct b53_vlan *vlans; bool vlan_enabled; + bool vlan_filtering; unsigned int num_ports; struct b53_port *ports; From 9f34ad89bcf0e6df6f8b01f1bdab211493fc66d1 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:09 +0200 Subject: [PATCH 1578/2924] net: dsa: b53: fix learning on VLAN unaware bridges When VLAN filtering is off, we configure the switch to forward, but not learn on VLAN table misses. This effectively disables learning while not filtering. Fix this by switching to forward and learn. Setting the learning disable register will still control whether learning actually happens. Fixes: dad8d7c6452b ("net: dsa: b53: Properly account for VLAN filtering") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-11-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 0a7749b416f777..a2c0b44fc6bef1 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -383,7 +383,7 @@ static void b53_enable_vlan(struct b53_device *dev, int port, bool enable, vc4 |= VC4_ING_VID_VIO_DROP << VC4_ING_VID_CHECK_S; vc5 |= VC5_DROP_VTABLE_MISS; } else { - vc4 |= VC4_ING_VID_VIO_FWD << VC4_ING_VID_CHECK_S; + vc4 |= VC4_NO_ING_VID_CHK << VC4_ING_VID_CHECK_S; vc5 &= ~VC5_DROP_VTABLE_MISS; } From 2e7179c628d3cb9aee75e412473813b099e11ed4 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 29 Apr 2025 22:17:10 +0200 Subject: [PATCH 1579/2924] net: dsa: b53: do not set learning and unicast/multicast on up When a port gets set up, b53 disables learning and enables the port for flooding. This can undo any bridge configuration on the port. E.g. the following flow would disable learning on a port: $ ip link add br0 type bridge $ ip link set sw1p1 master br0 <- enables learning for sw1p1 $ ip link set br0 up $ ip link set sw1p1 up <- disables learning again Fix this by populating dsa_switch_ops::port_setup(), and set up initial config there. Fixes: f9b3827ee66c ("net: dsa: b53: Support setting learning on port") Signed-off-by: Jonas Gorski Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250429201710.330937-12-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 21 +++++++++++++-------- drivers/net/dsa/b53/b53_priv.h | 1 + drivers/net/dsa/bcm_sf2.c | 1 + 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index a2c0b44fc6bef1..9eb39cfa5fb275 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -578,6 +578,18 @@ static void b53_eee_enable_set(struct dsa_switch *ds, int port, bool enable) b53_write16(dev, B53_EEE_PAGE, B53_EEE_EN_CTRL, reg); } +int b53_setup_port(struct dsa_switch *ds, int port) +{ + struct b53_device *dev = ds->priv; + + b53_port_set_ucast_flood(dev, port, true); + b53_port_set_mcast_flood(dev, port, true); + b53_port_set_learning(dev, port, false); + + return 0; +} +EXPORT_SYMBOL(b53_setup_port); + int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy) { struct b53_device *dev = ds->priv; @@ -590,10 +602,6 @@ int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy) cpu_port = dsa_to_port(ds, port)->cpu_dp->index; - b53_port_set_ucast_flood(dev, port, true); - b53_port_set_mcast_flood(dev, port, true); - b53_port_set_learning(dev, port, false); - if (dev->ops->irq_enable) ret = dev->ops->irq_enable(dev, port); if (ret) @@ -724,10 +732,6 @@ static void b53_enable_cpu_port(struct b53_device *dev, int port) b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), port_ctrl); b53_brcm_hdr_setup(dev->ds, port); - - b53_port_set_ucast_flood(dev, port, true); - b53_port_set_mcast_flood(dev, port, true); - b53_port_set_learning(dev, port, false); } static void b53_enable_mib(struct b53_device *dev) @@ -2387,6 +2391,7 @@ static const struct dsa_switch_ops b53_switch_ops = { .phy_read = b53_phy_read16, .phy_write = b53_phy_write16, .phylink_get_caps = b53_phylink_get_caps, + .port_setup = b53_setup_port, .port_enable = b53_enable_port, .port_disable = b53_disable_port, .support_eee = b53_support_eee, diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 4636e27fd1ee91..2cf3e6a81e3785 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -384,6 +384,7 @@ enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds, int port, enum dsa_tag_protocol mprot); void b53_mirror_del(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror); +int b53_setup_port(struct dsa_switch *ds, int port); int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy); void b53_disable_port(struct dsa_switch *ds, int port); void b53_brcm_hdr_setup(struct dsa_switch *ds, int port); diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index fa2bf3fa90191a..454a8c7fd7eea5 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1230,6 +1230,7 @@ static const struct dsa_switch_ops bcm_sf2_ops = { .resume = bcm_sf2_sw_resume, .get_wol = bcm_sf2_sw_get_wol, .set_wol = bcm_sf2_sw_set_wol, + .port_setup = b53_setup_port, .port_enable = bcm_sf2_port_setup, .port_disable = bcm_sf2_port_disable, .support_eee = b53_support_eee, From df84d2fd35c6f2d6b2241e47f26e1829eab26186 Mon Sep 17 00:00:00 2001 From: Lance Yang Date: Thu, 17 Apr 2025 23:30:41 +0800 Subject: [PATCH 1580/2924] mailmap: add entries for Lance Yang I'm moving to @linux.dev and mapping my old addresses to it. Link: https://lkml.kernel.org/r/20250417153041.38977-1-lance.yang@linux.dev Signed-off-by: Lance Yang Signed-off-by: Andrew Morton --- .mailmap | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.mailmap b/.mailmap index 9afde79e1936e5..6f0a4e965d6a4e 100644 --- a/.mailmap +++ b/.mailmap @@ -447,6 +447,8 @@ Luca Ceresoli Luca Weiss Lukasz Luba Luo Jie +Lance Yang +Lance Yang Maciej W. Rozycki Maciej W. Rozycki Maharaja Kennadyrajan From bd1261b16d9131d79723d982d54295e7f309797a Mon Sep 17 00:00:00 2001 From: Heming Zhao Date: Mon, 14 Apr 2025 14:01:23 +0800 Subject: [PATCH 1581/2924] ocfs2: fix the issue with discontiguous allocation in the global_bitmap commit 4eb7b93e0310 ("ocfs2: improve write IO performance when fragmentation is high") introduced another regression. The following ocfs2-test case can trigger this issue: > discontig_runner.sh => activate_discontig_bg.sh => resv_unwritten: > ${RESV_UNWRITTEN_BIN} -f ${WORK_PLACE}/large_testfile -s 0 -l \ > $((${FILE_MAJOR_SIZE_M}*1024*1024)) In my env, test disk size (by "fdisk -l "): > 53687091200 bytes, 104857600 sectors. Above command is: > /usr/local/ocfs2-test/bin/resv_unwritten -f \ > /mnt/ocfs2/ocfs2-activate-discontig-bg-dir/large_testfile -s 0 -l \ > 53187969024 Error log: > [*] Reserve 50724M space for a LARGE file, reserve 200M space for future test. > ioctl error 28: "No space left on device" > resv allocation failed Unknown error -1 > reserve unwritten region from 0 to 53187969024. Call flow: __ocfs2_change_file_space //by ioctl OCFS2_IOC_RESVSP64 ocfs2_allocate_unwritten_extents //start:0 len:53187969024 while() + ocfs2_get_clusters //cpos:0, alloc_size:1623168 (cluster number) + ocfs2_extend_allocation + ocfs2_lock_allocators | + choose OCFS2_AC_USE_MAIN & ocfs2_cluster_group_search | + ocfs2_add_inode_data ocfs2_add_clusters_in_btree __ocfs2_claim_clusters ocfs2_claim_suballoc_bits + During the allocation of the final part of the large file (after ~47GB), no chain had the required contiguous bits_wanted. Consequently, the allocation failed. How to fix: When OCFS2 is encountering fragmented allocation, the file system should stop attempting bits_wanted contiguous allocation and instead provide the largest available contiguous free bits from the cluster groups. Link: https://lkml.kernel.org/r/20250414060125.19938-2-heming.zhao@suse.com Fixes: 4eb7b93e0310 ("ocfs2: improve write IO performance when fragmentation is high") Signed-off-by: Heming Zhao Reported-by: Gautham Ananthakrishna Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/suballoc.c | 38 ++++++++++++++++++++++++++++++++------ fs/ocfs2/suballoc.h | 1 + 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index f7b483f0de2add..6ac4dcd54588cf 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -698,10 +698,12 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode, ac, cl); - if (PTR_ERR(bg_bh) == -ENOSPC) + if (PTR_ERR(bg_bh) == -ENOSPC) { + ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG; bg_bh = ocfs2_block_group_alloc_discontig(handle, alloc_inode, ac, cl); + } if (IS_ERR(bg_bh)) { status = PTR_ERR(bg_bh); bg_bh = NULL; @@ -1794,6 +1796,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, { int status; u16 chain; + u32 contig_bits; u64 next_group; struct inode *alloc_inode = ac->ac_inode; struct buffer_head *group_bh = NULL; @@ -1819,10 +1822,21 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, status = -ENOSPC; /* for now, the chain search is a bit simplistic. We just use * the 1st group with any empty bits. */ - while ((status = ac->ac_group_search(alloc_inode, group_bh, - bits_wanted, min_bits, - ac->ac_max_block, - res)) == -ENOSPC) { + while (1) { + if (ac->ac_which == OCFS2_AC_USE_MAIN_DISCONTIG) { + contig_bits = le16_to_cpu(bg->bg_contig_free_bits); + if (!contig_bits) + contig_bits = ocfs2_find_max_contig_free_bits(bg->bg_bitmap, + le16_to_cpu(bg->bg_bits), 0); + if (bits_wanted > contig_bits && contig_bits >= min_bits) + bits_wanted = contig_bits; + } + + status = ac->ac_group_search(alloc_inode, group_bh, + bits_wanted, min_bits, + ac->ac_max_block, res); + if (status != -ENOSPC) + break; if (!bg->bg_next_group) break; @@ -1982,6 +1996,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, victim = ocfs2_find_victim_chain(cl); ac->ac_chain = victim; +search: status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, res, &bits_left); if (!status) { @@ -2022,6 +2037,16 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, } } + /* Chains can't supply the bits_wanted contiguous space. + * We should switch to using every single bit when allocating + * from the global bitmap. */ + if (i == le16_to_cpu(cl->cl_next_free_rec) && + status == -ENOSPC && ac->ac_which == OCFS2_AC_USE_MAIN) { + ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG; + ac->ac_chain = victim; + goto search; + } + set_hint: if (status != -ENOSPC) { /* If the next search of this group is not likely to @@ -2365,7 +2390,8 @@ int __ocfs2_claim_clusters(handle_t *handle, BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL - && ac->ac_which != OCFS2_AC_USE_MAIN); + && ac->ac_which != OCFS2_AC_USE_MAIN + && ac->ac_which != OCFS2_AC_USE_MAIN_DISCONTIG); if (ac->ac_which == OCFS2_AC_USE_LOCAL) { WARN_ON(min_clusters > 1); diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index b481b834857d33..bcf2ed4a86310b 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -29,6 +29,7 @@ struct ocfs2_alloc_context { #define OCFS2_AC_USE_MAIN 2 #define OCFS2_AC_USE_INODE 3 #define OCFS2_AC_USE_META 4 +#define OCFS2_AC_USE_MAIN_DISCONTIG 5 u32 ac_which; /* these are used by the chain search */ From 00a241f528427b63c415a410293b86e66098888e Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 16 Apr 2025 18:09:50 -0700 Subject: [PATCH 1582/2924] x86: disable image size check for test builds 64-bit allyesconfig builds fail with x86_64-linux-ld: kernel image bigger than KERNEL_IMAGE_SIZE Bisect points to commit 6f110a5e4f99 ("Disable SLUB_TINY for build testing") as the responsible commit. Reverting that patch does indeed fix the problem. Further analysis shows that disabling SLUB_TINY enables KASAN, and that KASAN is responsible for the image size increase. Solve the build problem by disabling the image size check for test builds. [akpm@linux-foundation.org: add comment, fix nearby typo (sink->sync)] [akpm@linux-foundation.org: fix comment snafu Link: https://lore.kernel.org/oe-kbuild-all/202504191813.4r9H6Glt-lkp@intel.com/ Link: https://lkml.kernel.org/r/20250417010950.2203847-1-linux@roeck-us.net Fixes: 6f110a5e4f99 ("Disable SLUB_TINY for build testing") Signed-off-by: Guenter Roeck Cc: Linus Torvalds Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Borislav Betkov Cc: Dmitriy Vyukov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleinxer Cc: Vincenzo Frascino Cc: Signed-off-by: Andrew Morton --- arch/x86/kernel/vmlinux.lds.S | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index ccdc45e5b75961..aa4d0221583c23 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -466,10 +466,18 @@ SECTIONS } /* - * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility: + * COMPILE_TEST kernels can be large - CONFIG_KASAN, for example, can cause + * this. Let's assume that nobody will be running a COMPILE_TEST kernel and + * let's assert that fuller build coverage is more valuable than being able to + * run a COMPILE_TEST kernel. + */ +#ifndef CONFIG_COMPILE_TEST +/* + * The ASSERT() sync to . is intentional, for binutils 2.14 compatibility: */ . = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), "kernel image bigger than KERNEL_IMAGE_SIZE"); +#endif /* needed for Clang - see arch/x86/entry/entry.S */ PROVIDE(__ref_stack_chk_guard = __stack_chk_guard); From e81224f0ba22e13038381d41a8356ab256d56f23 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 18 Apr 2025 16:00:52 +0100 Subject: [PATCH 1583/2924] MAINTAINERS: add reverse mapping section Separate out the reverse mapping part of memory management and assign appropriate maintainers and reviewers. David has long been invovled in work with the reverse mapping and continues to do so, so is well suited to maintain this area of the kernel. I have a lot of experience working with the anonymous reverse mapping and continue to work in this area, and also have good knowledge of the walking code and code related to VMAs. This helps people identify who to ask for help, and also additionally makes life easier in review. Link: https://lkml.kernel.org/r/20250418150052.299220-1-lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: David Hildenbrand Acked-by: Liam R. Howlett Acked-by: Harry Yoo Acked-by: Vlastimil Babka Signed-off-by: Andrew Morton --- MAINTAINERS | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 69511c3b2b76fb..c90842b4239e87 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15505,7 +15505,6 @@ F: include/linux/mm_*.h F: include/linux/mmzone.h F: include/linux/mmu_notifier.h F: include/linux/pagewalk.h -F: include/linux/rmap.h F: include/trace/events/ksm.h F: mm/ F: tools/mm/ @@ -15545,6 +15544,19 @@ F: mm/page_alloc.c F: include/linux/gfp.h F: include/linux/compaction.h +MEMORY MANAGEMENT - RMAP (REVERSE MAPPING) +M: Andrew Morton +M: David Hildenbrand +M: Lorenzo Stoakes +R: Rik van Riel +R: Liam R. Howlett +R: Vlastimil Babka +R: Harry Yoo +L: linux-mm@kvack.org +S: Maintained +F: include/linux/rmap.h +F: mm/rmap.c + MEMORY MANAGEMENT - SECRETMEM M: Andrew Morton M: Mike Rapoport From be6e843fc51a584672dfd9c4a6a24c8cb81d5fb7 Mon Sep 17 00:00:00 2001 From: Gavin Guo Date: Mon, 21 Apr 2025 19:35:36 +0800 Subject: [PATCH 1584/2924] mm/huge_memory: fix dereferencing invalid pmd migration entry When migrating a THP, concurrent access to the PMD migration entry during a deferred split scan can lead to an invalid address access, as illustrated below. To prevent this invalid access, it is necessary to check the PMD migration entry and return early. In this context, there is no need to use pmd_to_swp_entry and pfn_swap_entry_to_page to verify the equality of the target folio. Since the PMD migration entry is locked, it cannot be served as the target. Mailing list discussion and explanation from Hugh Dickins: "An anon_vma lookup points to a location which may contain the folio of interest, but might instead contain another folio: and weeding out those other folios is precisely what the "folio != pmd_folio((*pmd)" check (and the "risk of replacing the wrong folio" comment a few lines above it) is for." BUG: unable to handle page fault for address: ffffea60001db008 CPU: 0 UID: 0 PID: 2199114 Comm: tee Not tainted 6.14.0+ #4 NONE Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 RIP: 0010:split_huge_pmd_locked+0x3b5/0x2b60 Call Trace: try_to_migrate_one+0x28c/0x3730 rmap_walk_anon+0x4f6/0x770 unmap_folio+0x196/0x1f0 split_huge_page_to_list_to_order+0x9f6/0x1560 deferred_split_scan+0xac5/0x12a0 shrinker_debugfs_scan_write+0x376/0x470 full_proxy_write+0x15c/0x220 vfs_write+0x2fc/0xcb0 ksys_write+0x146/0x250 do_syscall_64+0x6a/0x120 entry_SYSCALL_64_after_hwframe+0x76/0x7e The bug is found by syzkaller on an internal kernel, then confirmed on upstream. Link: https://lkml.kernel.org/r/20250421113536.3682201-1-gavinguo@igalia.com Link: https://lore.kernel.org/all/20250414072737.1698513-1-gavinguo@igalia.com/ Link: https://lore.kernel.org/all/20250418085802.2973519-1-gavinguo@igalia.com/ Fixes: 84c3fc4e9c56 ("mm: thp: check pmd migration entry in common path") Signed-off-by: Gavin Guo Acked-by: David Hildenbrand Acked-by: Hugh Dickins Acked-by: Zi Yan Reviewed-by: Gavin Shan Cc: Florent Revest Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Signed-off-by: Andrew Morton --- mm/huge_memory.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 2a47682d1ab777..47d76d03ce30b4 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3075,6 +3075,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, bool freeze, struct folio *folio) { + bool pmd_migration = is_pmd_migration_entry(*pmd); + VM_WARN_ON_ONCE(folio && !folio_test_pmd_mappable(folio)); VM_WARN_ON_ONCE(!IS_ALIGNED(address, HPAGE_PMD_SIZE)); VM_WARN_ON_ONCE(folio && !folio_test_locked(folio)); @@ -3085,9 +3087,12 @@ void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address, * require a folio to check the PMD against. Otherwise, there * is a risk of replacing the wrong folio. */ - if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) || - is_pmd_migration_entry(*pmd)) { - if (folio && folio != pmd_folio(*pmd)) + if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) || pmd_migration) { + /* + * Do not apply pmd_folio() to a migration entry; and folio lock + * guarantees that it must be of the wrong folio anyway. + */ + if (folio && (pmd_migration || folio != pmd_folio(*pmd))) return; __split_huge_pmd_locked(vma, pmd, address, freeze); } From 31d4cd4eb2f8d9b87ebfa6a5e443a59e3b3d7b8c Mon Sep 17 00:00:00 2001 From: Mark Tinguely Date: Fri, 11 Apr 2025 11:31:24 -0500 Subject: [PATCH 1585/2924] ocfs2: fix panic in failed foilio allocation commit 7e119cff9d0a ("ocfs2: convert w_pages to w_folios") and commit 9a5e08652dc4b ("ocfs2: use an array of folios instead of an array of pages") save -ENOMEM in the folio array upon allocation failure and call the folio array free code. The folio array free code expects either valid folio pointers or NULL. Finding the -ENOMEM will result in a panic. Fix by NULLing the error folio entry. Link: https://lkml.kernel.org/r/c879a52b-835c-4fa0-902b-8b2e9196dcbd@oracle.com Fixes: 7e119cff9d0a ("ocfs2: convert w_pages to w_folios") Fixes: 9a5e08652dc4b ("ocfs2: use an array of folios instead of an array of pages") Signed-off-by: Mark Tinguely Reviewed-by: Matthew Wilcox (Oracle) Cc: Changwei Ge Cc: Joel Becker Cc: Junxiao Bi Cc: Mark Fasheh Cc: Nathan Chancellor Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/alloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index b8ac85b548c7e5..821cb7874685e1 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -6918,6 +6918,7 @@ static int ocfs2_grab_folios(struct inode *inode, loff_t start, loff_t end, if (IS_ERR(folios[numfolios])) { ret = PTR_ERR(folios[numfolios]); mlog_errno(ret); + folios[numfolios] = NULL; goto out; } From a47694ecb8bcf2d60d665eaade913f4f59956c44 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Wed, 23 Apr 2025 13:30:42 +0100 Subject: [PATCH 1586/2924] MAINTAINERS: add core mm section In furtherance of ongoing efforts to ensure people are aware of who de-facto maintains/has an interest in specific parts of mm, as well trying to avoid get_maintainers.pl listing only Andrew and the mailing list for mm files - establish a 'core' memory management section establishing David as co-maintainer alongside Andrew (thanks David for volunteering!) along with a number of relevant reviewers. We try to keep things as fine-grained as possible, so we place only obviously 'general' mm things here. For files which are specific to a particular part of mm, we prefer new entries. Link: https://lkml.kernel.org/r/20250423123042.59082-1-lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: Michal Hocko Acked-by: Vlastimil Babka Acked-by: Mike Rapoport (Microsoft) Acked-by: Liam R. Howlett Acked-by: David Hildenbrand Cc: Lorenzo Stoakes Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- MAINTAINERS | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index c90842b4239e87..5b5d07abdf329c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15494,14 +15494,11 @@ F: Documentation/mm/ F: include/linux/gfp.h F: include/linux/gfp_types.h F: include/linux/memfd.h -F: include/linux/memory.h F: include/linux/memory_hotplug.h F: include/linux/memory-tiers.h F: include/linux/mempolicy.h F: include/linux/mempool.h F: include/linux/memremap.h -F: include/linux/mm.h -F: include/linux/mm_*.h F: include/linux/mmzone.h F: include/linux/mmu_notifier.h F: include/linux/pagewalk.h @@ -15511,6 +15508,31 @@ F: tools/mm/ F: tools/testing/selftests/mm/ N: include/linux/page[-_]* +MEMORY MANAGEMENT - CORE +M: Andrew Morton +M: David Hildenbrand +R: Lorenzo Stoakes +R: Liam R. Howlett +R: Vlastimil Babka +R: Mike Rapoport +R: Suren Baghdasaryan +R: Michal Hocko +L: linux-mm@kvack.org +S: Maintained +W: http://www.linux-mm.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm +F: include/linux/memory.h +F: include/linux/mm.h +F: include/linux/mm_*.h +F: include/linux/mmdebug.h +F: include/linux/pagewalk.h +F: mm/Kconfig +F: mm/debug.c +F: mm/init-mm.c +F: mm/memory.c +F: mm/pagewalk.c +F: mm/util.c + MEMORY MANAGEMENT - EXECMEM M: Andrew Morton M: Mike Rapoport From ab00ddd802f80e31fc9639c652d736fe3913feae Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 23 Apr 2025 18:36:45 +0800 Subject: [PATCH 1587/2924] selftests/mm: compaction_test: support platform with huge mount of memory When running mm selftest to verify mm patches, 'compaction_test' case failed on an x86 server with 1TB memory. And the root cause is that it has too much free memory than what the test supports. The test case tries to allocate 100000 huge pages, which is about 200 GB for that x86 server, and when it succeeds, it expects it's large than 1/3 of 80% of the free memory in system. This logic only works for platform with 750 GB ( 200 / (1/3) / 80% ) or less free memory, and may raise false alarm for others. Fix it by changing the fixed page number to self-adjustable number according to the real number of free memory. Link: https://lkml.kernel.org/r/20250423103645.2758-1-feng.tang@linux.alibaba.com Fixes: bd67d5c15cc1 ("Test compaction of mlocked memory") Signed-off-by: Feng Tang Acked-by: Dev Jain Reviewed-by: Baolin Wang Tested-by: Baolin Wang Cc: Shuah Khan Cc: Sri Jayaramappa Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/compaction_test.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c index 2c3a0eb6b22d31..9bc4591c7b1699 100644 --- a/tools/testing/selftests/mm/compaction_test.c +++ b/tools/testing/selftests/mm/compaction_test.c @@ -90,6 +90,8 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size, int compaction_index = 0; char nr_hugepages[20] = {0}; char init_nr_hugepages[24] = {0}; + char target_nr_hugepages[24] = {0}; + int slen; snprintf(init_nr_hugepages, sizeof(init_nr_hugepages), "%lu", initial_nr_hugepages); @@ -106,11 +108,18 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size, goto out; } - /* Request a large number of huge pages. The Kernel will allocate - as much as it can */ - if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) { - ksft_print_msg("Failed to write 100000 to /proc/sys/vm/nr_hugepages: %s\n", - strerror(errno)); + /* + * Request huge pages for about half of the free memory. The Kernel + * will allocate as much as it can, and we expect it will get at least 1/3 + */ + nr_hugepages_ul = mem_free / hugepage_size / 2; + snprintf(target_nr_hugepages, sizeof(target_nr_hugepages), + "%lu", nr_hugepages_ul); + + slen = strlen(target_nr_hugepages); + if (write(fd, target_nr_hugepages, slen) != slen) { + ksft_print_msg("Failed to write %lu to /proc/sys/vm/nr_hugepages: %s\n", + nr_hugepages_ul, strerror(errno)); goto close_fd; } From 95567729173e62e0e60a1f8ad9eb2e1320a8ccac Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 24 Apr 2025 17:57:28 -0400 Subject: [PATCH 1588/2924] mm/userfaultfd: fix uninitialized output field for -EAGAIN race While discussing some userfaultfd relevant issues recently, Andrea noticed a potential ABI breakage with -EAGAIN on almost all userfaultfd ioctl()s. Quote from Andrea, explaining how -EAGAIN was processed, and how this should fix it (taking example of UFFDIO_COPY ioctl): The "mmap_changing" and "stale pmd" conditions are already reported as -EAGAIN written in the copy field, this does not change it. This change removes the subnormal case that left copy.copy uninitialized and required apps to explicitly set the copy field to get deterministic behavior (which is a requirement contrary to the documentation in both the manpage and source code). In turn there's no alteration to backwards compatibility as result of this change because userland will find the copy field consistently set to -EAGAIN, and not anymore sometime -EAGAIN and sometime uninitialized. Even then the change only can make a difference to non cooperative users of userfaultfd, so when UFFD_FEATURE_EVENT_* is enabled, which is not true for the vast majority of apps using userfaultfd or this unintended uninitialized field may have been noticed sooner. Meanwhile, since this bug existed for years, it also almost affects all ioctl()s that was introduced later. Besides UFFDIO_ZEROPAGE, these also get affected in the same way: - UFFDIO_CONTINUE - UFFDIO_POISON - UFFDIO_MOVE This patch should have fixed all of them. Link: https://lkml.kernel.org/r/20250424215729.194656-2-peterx@redhat.com Fixes: df2cc96e7701 ("userfaultfd: prevent non-cooperative events vs mcopy_atomic races") Fixes: f619147104c8 ("userfaultfd: add UFFDIO_CONTINUE ioctl") Fixes: fc71884a5f59 ("mm: userfaultfd: add new UFFDIO_POISON ioctl") Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI") Signed-off-by: Peter Xu Reported-by: Andrea Arcangeli Suggested-by: Andrea Arcangeli Reviewed-by: David Hildenbrand Cc: Mike Rapoport Cc: Axel Rasmussen Cc: Suren Baghdasaryan Cc: Signed-off-by: Andrew Morton --- fs/userfaultfd.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index d80f943461992f..22f4bf956ba1c4 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1585,8 +1585,11 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx, user_uffdio_copy = (struct uffdio_copy __user *) arg; ret = -EAGAIN; - if (atomic_read(&ctx->mmap_changing)) + if (unlikely(atomic_read(&ctx->mmap_changing))) { + if (unlikely(put_user(ret, &user_uffdio_copy->copy))) + return -EFAULT; goto out; + } ret = -EFAULT; if (copy_from_user(&uffdio_copy, user_uffdio_copy, @@ -1641,8 +1644,11 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx, user_uffdio_zeropage = (struct uffdio_zeropage __user *) arg; ret = -EAGAIN; - if (atomic_read(&ctx->mmap_changing)) + if (unlikely(atomic_read(&ctx->mmap_changing))) { + if (unlikely(put_user(ret, &user_uffdio_zeropage->zeropage))) + return -EFAULT; goto out; + } ret = -EFAULT; if (copy_from_user(&uffdio_zeropage, user_uffdio_zeropage, @@ -1744,8 +1750,11 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg) user_uffdio_continue = (struct uffdio_continue __user *)arg; ret = -EAGAIN; - if (atomic_read(&ctx->mmap_changing)) + if (unlikely(atomic_read(&ctx->mmap_changing))) { + if (unlikely(put_user(ret, &user_uffdio_continue->mapped))) + return -EFAULT; goto out; + } ret = -EFAULT; if (copy_from_user(&uffdio_continue, user_uffdio_continue, @@ -1801,8 +1810,11 @@ static inline int userfaultfd_poison(struct userfaultfd_ctx *ctx, unsigned long user_uffdio_poison = (struct uffdio_poison __user *)arg; ret = -EAGAIN; - if (atomic_read(&ctx->mmap_changing)) + if (unlikely(atomic_read(&ctx->mmap_changing))) { + if (unlikely(put_user(ret, &user_uffdio_poison->updated))) + return -EFAULT; goto out; + } ret = -EFAULT; if (copy_from_user(&uffdio_poison, user_uffdio_poison, @@ -1870,8 +1882,12 @@ static int userfaultfd_move(struct userfaultfd_ctx *ctx, user_uffdio_move = (struct uffdio_move __user *) arg; - if (atomic_read(&ctx->mmap_changing)) - return -EAGAIN; + ret = -EAGAIN; + if (unlikely(atomic_read(&ctx->mmap_changing))) { + if (unlikely(put_user(ret, &user_uffdio_move->move))) + return -EFAULT; + goto out; + } if (copy_from_user(&uffdio_move, user_uffdio_move, /* don't copy "move" last field */ From 09fc97b3abe9e72c799a1c757acc47e746844619 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 24 Apr 2025 12:16:32 +0100 Subject: [PATCH 1589/2924] MAINTAINERS: add mm THP section As part of the ongoing efforts to sub-divide memory management maintainership and reviewership, establish a section for Transparent Huge Page support and add appropriate maintainers and reviewers. [lorenzo.stoakes@oracle.com: add Dev Jain as THP reviewer] Link: https://lkml.kernel.org/r/327e6f2f-0f0f-48af-9ca2-3f8cadf0d8bf@lucifer.local Link: https://lkml.kernel.org/r/20250424111632.103637-1-lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: David Hildenbrand Reviewed-by: Ryan Roberts Reviewed-by: Baolin Wang Acked-by: Zi Yan Cc: Dev Jain Cc: Liam Howlett Cc: Mariano Pache Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- MAINTAINERS | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 5b5d07abdf329c..f62b266630cf15 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15587,6 +15587,30 @@ S: Maintained F: include/linux/secretmem.h F: mm/secretmem.c +MEMORY MANAGEMENT - THP (TRANSPARENT HUGE PAGE) +M: Andrew Morton +M: David Hildenbrand +R: Zi Yan +R: Baolin Wang +R: Lorenzo Stoakes +R: Liam R. Howlett +R: Nico Pache +R: Ryan Roberts +R: Dev Jain +L: linux-mm@kvack.org +S: Maintained +W: http://www.linux-mm.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm +F: Documentation/admin-guide/mm/transhuge.rst +F: include/linux/huge_mm.h +F: include/linux/khugepaged.h +F: include/trace/events/huge_memory.h +F: mm/huge_memory.c +F: mm/khugepaged.c +F: tools/testing/selftests/mm/khugepaged.c +F: tools/testing/selftests/mm/split_huge_page_test.c +F: tools/testing/selftests/mm/transhuge-stress.c + MEMORY MANAGEMENT - USERFAULTFD M: Andrew Morton R: Peter Xu From 80fbee76ebbdf92acd9f293178850bf0b103a6b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 24 Apr 2025 08:02:51 +0200 Subject: [PATCH 1590/2924] mailmap: map Uwe's BayLibre addresses to a single one MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When I started working for BayLibre I wasn't aware that the mailserver rewrote the sender address and so a few commits entered kernel history with a working but unexpected address. Map the unexpected to the intended one. This also makes the author of those commits (e.g. 32b4f1a4f07f ("pwm: jz4740: Another few conversions to regmap_{set,clear}_bits()")) match the address used in the sign-off line. Link: https://lkml.kernel.org/r/20250424060250.3085683-2-u.kleine-koenig@baylibre.com Signed-off-by: Uwe Kleine-König Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 6f0a4e965d6a4e..4f661e434eaa0a 100644 --- a/.mailmap +++ b/.mailmap @@ -751,6 +751,7 @@ Tvrtko Ursulin Tycho Andersen Tzung-Bi Shih Uwe Kleine-König +Uwe Kleine-König Uwe Kleine-König Uwe Kleine-König Uwe Kleine-König From c0fb83088f0cc4ee4706e0495ee8b06f49daa716 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 24 Apr 2025 15:45:11 +0200 Subject: [PATCH 1591/2924] ocfs2: switch osb->disable_recovery to enum Patch series "ocfs2: Fix deadlocks in quota recovery", v3. This implements another approach to fixing quota recovery deadlocks. We avoid grabbing sb->s_umount semaphore from ocfs2_finish_quota_recovery() and instead stop quota recovery early in ocfs2_dismount_volume(). This patch (of 3): We will need more recovery states than just pure enable / disable to fix deadlocks with quota recovery. Switch osb->disable_recovery to enum. Link: https://lkml.kernel.org/r/20250424134301.1392-1-jack@suse.cz Link: https://lkml.kernel.org/r/20250424134515.18933-4-jack@suse.cz Fixes: 5f530de63cfc ("ocfs2: Use s_umount for quota recovery protection") Signed-off-by: Jan Kara Reviewed-by: Heming Zhao Tested-by: Heming Zhao Acked-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Murad Masimov Cc: Shichangkuo Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/journal.c | 14 ++++++++------ fs/ocfs2/ocfs2.h | 7 ++++++- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index c7a9729dc9d087..e79f10cc43d91e 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -174,7 +174,7 @@ int ocfs2_recovery_init(struct ocfs2_super *osb) struct ocfs2_recovery_map *rm; mutex_init(&osb->recovery_lock); - osb->disable_recovery = 0; + osb->recovery_state = OCFS2_REC_ENABLED; osb->recovery_thread_task = NULL; init_waitqueue_head(&osb->recovery_event); @@ -206,7 +206,7 @@ void ocfs2_recovery_exit(struct ocfs2_super *osb) /* disable any new recovery threads and wait for any currently * running ones to exit. Do this before setting the vol_state. */ mutex_lock(&osb->recovery_lock); - osb->disable_recovery = 1; + osb->recovery_state = OCFS2_REC_DISABLED; mutex_unlock(&osb->recovery_lock); wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); @@ -1582,14 +1582,16 @@ static int __ocfs2_recovery_thread(void *arg) void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) { + int was_set = -1; + mutex_lock(&osb->recovery_lock); + if (osb->recovery_state < OCFS2_REC_DISABLED) + was_set = ocfs2_recovery_map_set(osb, node_num); trace_ocfs2_recovery_thread(node_num, osb->node_num, - osb->disable_recovery, osb->recovery_thread_task, - osb->disable_recovery ? - -1 : ocfs2_recovery_map_set(osb, node_num)); + osb->recovery_state, osb->recovery_thread_task, was_set); - if (osb->disable_recovery) + if (osb->recovery_state == OCFS2_REC_DISABLED) goto out; if (osb->recovery_thread_task) diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 51c52768132d70..e713361939f00a 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -308,6 +308,11 @@ enum ocfs2_journal_trigger_type { void ocfs2_initialize_journal_triggers(struct super_block *sb, struct ocfs2_triggers triggers[]); +enum ocfs2_recovery_state { + OCFS2_REC_ENABLED = 0, + OCFS2_REC_DISABLED, +}; + struct ocfs2_journal; struct ocfs2_slot_info; struct ocfs2_recovery_map; @@ -370,7 +375,7 @@ struct ocfs2_super struct ocfs2_recovery_map *recovery_map; struct ocfs2_replay_map *replay_map; struct task_struct *recovery_thread_task; - int disable_recovery; + enum ocfs2_recovery_state recovery_state; wait_queue_head_t checkpoint_event; struct ocfs2_journal *journal; unsigned long osb_commit_interval; From 8f947e0fd595951460f5a6e1ac29baa82fa02eab Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 24 Apr 2025 15:45:12 +0200 Subject: [PATCH 1592/2924] ocfs2: implement handshaking with ocfs2 recovery thread We will need ocfs2 recovery thread to acknowledge transitions of recovery_state when disabling particular types of recovery. This is similar to what currently happens when disabling recovery completely, just more general. Implement the handshake and use it for exit from recovery. Link: https://lkml.kernel.org/r/20250424134515.18933-5-jack@suse.cz Fixes: 5f530de63cfc ("ocfs2: Use s_umount for quota recovery protection") Signed-off-by: Jan Kara Reviewed-by: Heming Zhao Tested-by: Heming Zhao Acked-by: Joseph Qi Cc: Changwei Ge Cc: Joel Becker Cc: Jun Piao Cc: Junxiao Bi Cc: Mark Fasheh Cc: Murad Masimov Cc: Shichangkuo Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/journal.c | 52 +++++++++++++++++++++++++++++++--------------- fs/ocfs2/ocfs2.h | 4 ++++ 2 files changed, 39 insertions(+), 17 deletions(-) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index e79f10cc43d91e..2987df396ad8d6 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -190,31 +190,48 @@ int ocfs2_recovery_init(struct ocfs2_super *osb) return 0; } -/* we can't grab the goofy sem lock from inside wait_event, so we use - * memory barriers to make sure that we'll see the null task before - * being woken up */ static int ocfs2_recovery_thread_running(struct ocfs2_super *osb) { - mb(); return osb->recovery_thread_task != NULL; } -void ocfs2_recovery_exit(struct ocfs2_super *osb) +static void ocfs2_recovery_disable(struct ocfs2_super *osb, + enum ocfs2_recovery_state state) { - struct ocfs2_recovery_map *rm; - - /* disable any new recovery threads and wait for any currently - * running ones to exit. Do this before setting the vol_state. */ mutex_lock(&osb->recovery_lock); - osb->recovery_state = OCFS2_REC_DISABLED; + /* + * If recovery thread is not running, we can directly transition to + * final state. + */ + if (!ocfs2_recovery_thread_running(osb)) { + osb->recovery_state = state + 1; + goto out_lock; + } + osb->recovery_state = state; + /* Wait for recovery thread to acknowledge state transition */ + wait_event_cmd(osb->recovery_event, + !ocfs2_recovery_thread_running(osb) || + osb->recovery_state >= state + 1, + mutex_unlock(&osb->recovery_lock), + mutex_lock(&osb->recovery_lock)); +out_lock: mutex_unlock(&osb->recovery_lock); - wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); - /* At this point, we know that no more recovery threads can be - * launched, so wait for any recovery completion work to - * complete. */ + /* + * At this point we know that no more recovery work can be queued so + * wait for any recovery completion work to complete. + */ if (osb->ocfs2_wq) flush_workqueue(osb->ocfs2_wq); +} + +void ocfs2_recovery_exit(struct ocfs2_super *osb) +{ + struct ocfs2_recovery_map *rm; + + /* disable any new recovery threads and wait for any currently + * running ones to exit. Do this before setting the vol_state. */ + ocfs2_recovery_disable(osb, OCFS2_REC_WANT_DISABLE); /* * Now that recovery is shut down, and the osb is about to be @@ -1569,7 +1586,8 @@ static int __ocfs2_recovery_thread(void *arg) ocfs2_free_replay_slots(osb); osb->recovery_thread_task = NULL; - mb(); /* sync with ocfs2_recovery_thread_running */ + if (osb->recovery_state == OCFS2_REC_WANT_DISABLE) + osb->recovery_state = OCFS2_REC_DISABLED; wake_up(&osb->recovery_event); mutex_unlock(&osb->recovery_lock); @@ -1585,13 +1603,13 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) int was_set = -1; mutex_lock(&osb->recovery_lock); - if (osb->recovery_state < OCFS2_REC_DISABLED) + if (osb->recovery_state < OCFS2_REC_WANT_DISABLE) was_set = ocfs2_recovery_map_set(osb, node_num); trace_ocfs2_recovery_thread(node_num, osb->node_num, osb->recovery_state, osb->recovery_thread_task, was_set); - if (osb->recovery_state == OCFS2_REC_DISABLED) + if (osb->recovery_state >= OCFS2_REC_WANT_DISABLE) goto out; if (osb->recovery_thread_task) diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index e713361939f00a..70b2d5c8c22820 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -310,6 +310,10 @@ void ocfs2_initialize_journal_triggers(struct super_block *sb, enum ocfs2_recovery_state { OCFS2_REC_ENABLED = 0, + OCFS2_REC_WANT_DISABLE, + /* + * Must be OCFS2_REC_WANT_DISABLE + 1 for ocfs2_recovery_exit() to work + */ OCFS2_REC_DISABLED, }; From fcaf3b2683b05a9684acdebda706a12025a6927a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 24 Apr 2025 15:45:13 +0200 Subject: [PATCH 1593/2924] ocfs2: stop quota recovery before disabling quotas Currently quota recovery is synchronized with unmount using sb->s_umount semaphore. That is however prone to deadlocks because flush_workqueue(osb->ocfs2_wq) called from umount code can wait for quota recovery to complete while ocfs2_finish_quota_recovery() waits for sb->s_umount semaphore. Grabbing of sb->s_umount semaphore in ocfs2_finish_quota_recovery() is only needed to protect that function from disabling of quotas from ocfs2_dismount_volume(). Handle this problem by disabling quota recovery early during unmount in ocfs2_dismount_volume() instead so that we can drop acquisition of sb->s_umount from ocfs2_finish_quota_recovery(). Link: https://lkml.kernel.org/r/20250424134515.18933-6-jack@suse.cz Fixes: 5f530de63cfc ("ocfs2: Use s_umount for quota recovery protection") Signed-off-by: Jan Kara Reported-by: Shichangkuo Reported-by: Murad Masimov Reviewed-by: Heming Zhao Tested-by: Heming Zhao Acked-by: Joseph Qi Cc: Changwei Ge Cc: Joel Becker Cc: Jun Piao Cc: Junxiao Bi Cc: Mark Fasheh Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/journal.c | 20 ++++++++++++++++++-- fs/ocfs2/journal.h | 1 + fs/ocfs2/ocfs2.h | 6 ++++++ fs/ocfs2/quota_local.c | 9 ++------- fs/ocfs2/super.c | 3 +++ 5 files changed, 30 insertions(+), 9 deletions(-) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 2987df396ad8d6..e5f58ff2175f41 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -225,6 +225,11 @@ static void ocfs2_recovery_disable(struct ocfs2_super *osb, flush_workqueue(osb->ocfs2_wq); } +void ocfs2_recovery_disable_quota(struct ocfs2_super *osb) +{ + ocfs2_recovery_disable(osb, OCFS2_REC_QUOTA_WANT_DISABLE); +} + void ocfs2_recovery_exit(struct ocfs2_super *osb) { struct ocfs2_recovery_map *rm; @@ -1489,6 +1494,18 @@ static int __ocfs2_recovery_thread(void *arg) } } restart: + if (quota_enabled) { + mutex_lock(&osb->recovery_lock); + /* Confirm that recovery thread will no longer recover quotas */ + if (osb->recovery_state == OCFS2_REC_QUOTA_WANT_DISABLE) { + osb->recovery_state = OCFS2_REC_QUOTA_DISABLED; + wake_up(&osb->recovery_event); + } + if (osb->recovery_state >= OCFS2_REC_QUOTA_DISABLED) + quota_enabled = 0; + mutex_unlock(&osb->recovery_lock); + } + status = ocfs2_super_lock(osb, 1); if (status < 0) { mlog_errno(status); @@ -1592,8 +1609,7 @@ static int __ocfs2_recovery_thread(void *arg) mutex_unlock(&osb->recovery_lock); - if (quota_enabled) - kfree(rm_quota); + kfree(rm_quota); return status; } diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index e3c3a35dc5e0e7..6397170f302f22 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -148,6 +148,7 @@ void ocfs2_wait_for_recovery(struct ocfs2_super *osb); int ocfs2_recovery_init(struct ocfs2_super *osb); void ocfs2_recovery_exit(struct ocfs2_super *osb); +void ocfs2_recovery_disable_quota(struct ocfs2_super *osb); int ocfs2_compute_replay_slots(struct ocfs2_super *osb); void ocfs2_free_replay_slots(struct ocfs2_super *osb); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 70b2d5c8c22820..6aaa94c554c12a 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -310,6 +310,12 @@ void ocfs2_initialize_journal_triggers(struct super_block *sb, enum ocfs2_recovery_state { OCFS2_REC_ENABLED = 0, + OCFS2_REC_QUOTA_WANT_DISABLE, + /* + * Must be OCFS2_REC_QUOTA_WANT_DISABLE + 1 for + * ocfs2_recovery_disable_quota() to work. + */ + OCFS2_REC_QUOTA_DISABLED, OCFS2_REC_WANT_DISABLE, /* * Must be OCFS2_REC_WANT_DISABLE + 1 for ocfs2_recovery_exit() to work diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 2956d888c13145..e272429da3db34 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -453,8 +453,7 @@ struct ocfs2_quota_recovery *ocfs2_begin_quota_recovery( /* Sync changes in local quota file into global quota file and * reinitialize local quota file. - * The function expects local quota file to be already locked and - * s_umount locked in shared mode. */ + * The function expects local quota file to be already locked. */ static int ocfs2_recover_local_quota_file(struct inode *lqinode, int type, struct ocfs2_quota_recovery *rec) @@ -588,7 +587,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, { unsigned int ino[OCFS2_MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE, LOCAL_GROUP_QUOTA_SYSTEM_INODE }; - struct super_block *sb = osb->sb; struct ocfs2_local_disk_dqinfo *ldinfo; struct buffer_head *bh; handle_t *handle; @@ -600,7 +598,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, printk(KERN_NOTICE "ocfs2: Finishing quota recovery on device (%s) for " "slot %u\n", osb->dev_str, slot_num); - down_read(&sb->s_umount); for (type = 0; type < OCFS2_MAXQUOTAS; type++) { if (list_empty(&(rec->r_list[type]))) continue; @@ -677,7 +674,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, break; } out: - up_read(&sb->s_umount); kfree(rec); return status; } @@ -843,8 +839,7 @@ static int ocfs2_local_free_info(struct super_block *sb, int type) ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk); /* - * s_umount held in exclusive mode protects us against racing with - * recovery thread... + * ocfs2_dismount_volume() has already aborted quota recovery... */ if (oinfo->dqi_rec) { ocfs2_free_quota_recovery(oinfo->dqi_rec); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 8bb5022f30824b..3d2533950bae20 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1812,6 +1812,9 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) /* Orphan scan should be stopped as early as possible */ ocfs2_orphan_scan_stop(osb); + /* Stop quota recovery so that we can disable quotas */ + ocfs2_recovery_disable_quota(osb); + ocfs2_disable_quotas(osb); /* All dquots should be freed by now */ From a8efadda8649506e80d256cc09656acc0783df2b Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 25 Apr 2025 17:24:36 +0100 Subject: [PATCH 1594/2924] tools/testing/selftests: fix guard region test tmpfs assumption The current implementation of the guard region tests assume that /tmp is mounted as tmpfs, that is shmem. This isn't always the case, and at least one instance of a spurious test failure has been reported as a result. This assumption is unsafe, rushed and silly - and easily remedied by simply using memfd, so do so. We also have to fixup the readonly_file test to explicitly only be applicable to file-backed cases. Link: https://lkml.kernel.org/r/20250425162436.564002-1-lorenzo.stoakes@oracle.com Fixes: 272f37d3e99a ("tools/selftests: expand all guard region tests to file-backed") Signed-off-by: Lorenzo Stoakes Reported-by: Ryan Roberts Closes: https://lore.kernel.org/linux-mm/a2d2766b-0ab4-437b-951a-8595a7506fe9@arm.com/ Reviewed-by: Ryan Roberts Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/guard-regions.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c index b3d0e277109616..eba43ead13ae82 100644 --- a/tools/testing/selftests/mm/guard-regions.c +++ b/tools/testing/selftests/mm/guard-regions.c @@ -271,12 +271,16 @@ FIXTURE_SETUP(guard_regions) self->page_size = (unsigned long)sysconf(_SC_PAGESIZE); setup_sighandler(); - if (variant->backing == ANON_BACKED) + switch (variant->backing) { + case ANON_BACKED: return; - - self->fd = open_file( - variant->backing == SHMEM_BACKED ? "/tmp/" : "", - self->path); + case LOCAL_FILE_BACKED: + self->fd = open_file("", self->path); + break; + case SHMEM_BACKED: + self->fd = memfd_create(self->path, 0); + break; + } /* We truncate file to at least 100 pages, tests can modify as needed. */ ASSERT_EQ(ftruncate(self->fd, 100 * self->page_size), 0); @@ -1696,7 +1700,7 @@ TEST_F(guard_regions, readonly_file) char *ptr; int i; - if (variant->backing == ANON_BACKED) + if (variant->backing != LOCAL_FILE_BACKED) SKIP(return, "Read-only test specific to file-backed"); /* Map shared so we can populate with pattern, populate it, unmap. */ From a0309faf1cb0622cac7c820150b7abf2024acff5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 25 Apr 2025 17:11:07 -0700 Subject: [PATCH 1595/2924] mm: vmalloc: support more granular vrealloc() sizing Introduce struct vm_struct::requested_size so that the requested (re)allocation size is retained separately from the allocated area size. This means that KASAN will correctly poison the correct spans of requested bytes. This also means we can support growing the usable portion of an allocation that can already be supported by the existing area's existing allocation. Link: https://lkml.kernel.org/r/20250426001105.it.679-kees@kernel.org Fixes: 3ddc2fefe6f3 ("mm: vmalloc: implement vrealloc()") Signed-off-by: Kees Cook Reported-by: Erhard Furtner Closes: https://lore.kernel.org/all/20250408192503.6149a816@outsider.home/ Reviewed-by: Danilo Krummrich Cc: Michal Hocko Cc: "Uladzislau Rezki (Sony)" Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- include/linux/vmalloc.h | 1 + mm/vmalloc.c | 31 ++++++++++++++++++++++++------- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 31e9ffd936e393..5ca8d4dd149d4e 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -61,6 +61,7 @@ struct vm_struct { unsigned int nr_pages; phys_addr_t phys_addr; const void *caller; + unsigned long requested_size; }; struct vmap_area { diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 3ed720a787ecd8..2d7511654831e9 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1940,7 +1940,7 @@ static inline void setup_vmalloc_vm(struct vm_struct *vm, { vm->flags = flags; vm->addr = (void *)va->va_start; - vm->size = va_size(va); + vm->size = vm->requested_size = va_size(va); vm->caller = caller; va->vm = vm; } @@ -3133,6 +3133,7 @@ struct vm_struct *__get_vm_area_node(unsigned long size, area->flags = flags; area->caller = caller; + area->requested_size = requested_size; va = alloc_vmap_area(size, align, start, end, node, gfp_mask, 0, area); if (IS_ERR(va)) { @@ -4063,6 +4064,8 @@ EXPORT_SYMBOL(vzalloc_node_noprof); */ void *vrealloc_noprof(const void *p, size_t size, gfp_t flags) { + struct vm_struct *vm = NULL; + size_t alloced_size = 0; size_t old_size = 0; void *n; @@ -4072,15 +4075,17 @@ void *vrealloc_noprof(const void *p, size_t size, gfp_t flags) } if (p) { - struct vm_struct *vm; - vm = find_vm_area(p); if (unlikely(!vm)) { WARN(1, "Trying to vrealloc() nonexistent vm area (%p)\n", p); return NULL; } - old_size = get_vm_area_size(vm); + alloced_size = get_vm_area_size(vm); + old_size = vm->requested_size; + if (WARN(alloced_size < old_size, + "vrealloc() has mismatched area vs requested sizes (%p)\n", p)) + return NULL; } /* @@ -4088,14 +4093,26 @@ void *vrealloc_noprof(const void *p, size_t size, gfp_t flags) * would be a good heuristic for when to shrink the vm_area? */ if (size <= old_size) { - /* Zero out spare memory. */ - if (want_init_on_alloc(flags)) + /* Zero out "freed" memory. */ + if (want_init_on_free()) memset((void *)p + size, 0, old_size - size); + vm->requested_size = size; kasan_poison_vmalloc(p + size, old_size - size); - kasan_unpoison_vmalloc(p, size, KASAN_VMALLOC_PROT_NORMAL); return (void *)p; } + /* + * We already have the bytes available in the allocation; use them. + */ + if (size <= alloced_size) { + kasan_unpoison_vmalloc(p + old_size, size - old_size, + KASAN_VMALLOC_PROT_NORMAL); + /* Zero out "alloced" memory. */ + if (want_init_on_alloc(flags)) + memset((void *)p + old_size, 0, size - old_size); + vm->requested_size = size; + } + /* TODO: Grow the vm_area, i.e. allocate and map additional pages. */ n = __vmalloc_noprof(size, flags); if (!n) From 22adb528621ddc92f887882a658507fbf88a5214 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Mon, 28 Apr 2025 18:49:34 +0530 Subject: [PATCH 1596/2924] selftests/mm: fix build break when compiling pkey_util.c Commit 50910acd6f615 ("selftests/mm: use sys_pkey helpers consistently") added a pkey_util.c to refactor some of the protection_keys functions accessible by other tests. But this broken the build in powerpc in two ways, pkey-powerpc.h: In function `arch_is_powervm': pkey-powerpc.h:73:21: error: storage size of `buf' isn't known 73 | struct stat buf; | ^~~ pkey-powerpc.h:75:14: error: implicit declaration of function `stat'; did you mean `strcat'? [-Wimplicit-function-declaration] 75 | if ((stat("/sys/firmware/devicetree/base/ibm,partition-name", &buf) == 0) && | ^~~~ | strcat Since pkey_util.c includes pkeys-helper.h, which in turn includes pkeys-powerpc.h, stat.h including is missing for "struct stat". This is fixed by adding "sys/stat.h" in pkeys-powerpc.h Secondly, pkey-powerpc.h:55:18: warning: format `%llx' expects argument of type `long long unsigned int', but argument 3 has type `u64' {aka `long unsigned int'} [-Wformat=] 55 | dprintf4("%s() changing %016llx to %016llx\n", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 56 | __func__, __read_pkey_reg(), pkey_reg); | ~~~~~~~~~~~~~~~~~ | | | u64 {aka long unsigned int} pkey-helpers.h:63:32: note: in definition of macro `dprintf_level' 63 | sigsafe_printf(args); \ | ^~~~ These format specifier related warning are removed by adding "__SANE_USERSPACE_TYPES__" to pkeys_utils.c. Link: https://lkml.kernel.org/r/20250428131937.641989-1-nysal@linux.ibm.com Fixes: 50910acd6f61 ("selftests/mm: use sys_pkey helpers consistently") Signed-off-by: Madhavan Srinivasan Signed-off-by: Nysal Jan K.A. Tested-by: Venkat Rao Bagalkote Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/pkey-powerpc.h | 2 ++ tools/testing/selftests/mm/pkey_util.c | 1 + 2 files changed, 3 insertions(+) diff --git a/tools/testing/selftests/mm/pkey-powerpc.h b/tools/testing/selftests/mm/pkey-powerpc.h index 1bad310d282ad6..d8ec906b812092 100644 --- a/tools/testing/selftests/mm/pkey-powerpc.h +++ b/tools/testing/selftests/mm/pkey-powerpc.h @@ -3,6 +3,8 @@ #ifndef _PKEYS_POWERPC_H #define _PKEYS_POWERPC_H +#include + #ifndef SYS_pkey_alloc # define SYS_pkey_alloc 384 # define SYS_pkey_free 385 diff --git a/tools/testing/selftests/mm/pkey_util.c b/tools/testing/selftests/mm/pkey_util.c index ca4ad0d44ab2e9..255b332f7a08b2 100644 --- a/tools/testing/selftests/mm/pkey_util.c +++ b/tools/testing/selftests/mm/pkey_util.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only +#define __SANE_USERSPACE_TYPES__ #include #include From 8cf6ecb18baac867585fe1cba5dde6dbf3b6d29a Mon Sep 17 00:00:00 2001 From: "Nysal Jan K.A." Date: Mon, 28 Apr 2025 18:49:35 +0530 Subject: [PATCH 1597/2924] selftests/mm: fix a build failure on powerpc The compiler is unaware of the size of code generated by the ".rept" assembler directive. This results in the compiler emitting branch instructions where the offset to branch to exceeds the maximum allowed value, resulting in build failures like the following: CC protection_keys /tmp/ccypKWAE.s: Assembler messages: /tmp/ccypKWAE.s:2073: Error: operand out of range (0x0000000000020158 is not between 0xffffffffffff8000 and 0x0000000000007ffc) /tmp/ccypKWAE.s:2509: Error: operand out of range (0x0000000000020130 is not between 0xffffffffffff8000 and 0x0000000000007ffc) Fix the issue by manually adding nop instructions using the preprocessor. Link: https://lkml.kernel.org/r/20250428131937.641989-2-nysal@linux.ibm.com Fixes: 46036188ea1f ("selftests/mm: build with -O2") Reported-by: Madhavan Srinivasan Signed-off-by: Nysal Jan K.A. Tested-by: Venkat Rao Bagalkote Reviewed-by: Donet Tom Tested-by: Donet Tom Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/pkey-powerpc.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/pkey-powerpc.h b/tools/testing/selftests/mm/pkey-powerpc.h index d8ec906b812092..17bf2d1b0192e0 100644 --- a/tools/testing/selftests/mm/pkey-powerpc.h +++ b/tools/testing/selftests/mm/pkey-powerpc.h @@ -104,8 +104,18 @@ static inline void expect_fault_on_read_execonly_key(void *p1, int pkey) return; } +#define REPEAT_8(s) s s s s s s s s +#define REPEAT_64(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) \ + REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) +#define REPEAT_512(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) \ + REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) +#define REPEAT_4096(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) \ + REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) +#define REPEAT_16384(s) REPEAT_4096(s) REPEAT_4096(s) \ + REPEAT_4096(s) REPEAT_4096(s) + /* 4-byte instructions * 16384 = 64K page */ -#define __page_o_noops() asm(".rept 16384 ; nop; .endr") +#define __page_o_noops() asm(REPEAT_16384("nop\n")) static inline void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) { From 9a9794a81a8a1be8670aa673ec0f0fbfbddeccae Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Tue, 29 Apr 2025 17:48:03 +0800 Subject: [PATCH 1598/2924] mm, swap: fix false warning for large allocation with !THP_SWAP The !CONFIG_THP_SWAP check existed before just fine because slot cache would reject high order allocation and let the caller split all folios and try again. But slot cache is gone, so large allocation will directly go to the allocator, and the allocator should just fail silently to inform caller to do the folio split, this is totally fine and expected. Remove this meaningless warning. Link: https://lkml.kernel.org/r/20250429094803.85518-1-ryncsn@gmail.com Fixes: 0ff67f990bd4 ("mm, swap: remove swap slot cache") Signed-off-by: Kairui Song Reported-by: Heiko Carstens Closes: https://lore.kernel.org/linux-mm/20250428135252.25453B17-hca@linux.ibm.com/ Tested-by: Heiko Carstens Cc: Baoquan He Signed-off-by: Andrew Morton --- mm/swapfile.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 2eff8b51a9455f..f214843612dc32 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1272,13 +1272,22 @@ int folio_alloc_swap(struct folio *folio, gfp_t gfp) VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); VM_BUG_ON_FOLIO(!folio_test_uptodate(folio), folio); - /* - * Should not even be attempting large allocations when huge - * page swap is disabled. Warn and fail the allocation. - */ - if (order && (!IS_ENABLED(CONFIG_THP_SWAP) || size > SWAPFILE_CLUSTER)) { - VM_WARN_ON_ONCE(1); - return -EINVAL; + if (order) { + /* + * Reject large allocation when THP_SWAP is disabled, + * the caller should split the folio and try again. + */ + if (!IS_ENABLED(CONFIG_THP_SWAP)) + return -EAGAIN; + + /* + * Allocation size should never exceed cluster size + * (HPAGE_PMD_SIZE). + */ + if (size > SWAPFILE_CLUSTER) { + VM_WARN_ON_ONCE(1); + return -EINVAL; + } } local_lock(&percpu_swap_cluster.lock); From dac2a4f663c4bcd75add07aedf9d30c9f8e5ead5 Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Thu, 1 May 2025 04:43:24 +0000 Subject: [PATCH 1599/2924] mm/hugetlb: copy the CMA flag when demoting Since commit d2d786714080 ("mm/hugetlb: enable bootmem allocation from CMA areas"), a flag is used to mark hugetlb folios as allocated from CMA. This flag is also used to decide if it should be freed to CMA. However, the flag isn't copied to the smaller folios when a hugetlb folio is broken up for demotion, which would cause it to be freed incorrectly. Fix this by copying the flag to the smaller order hugetlb pages created from the original one. Link: https://lkml.kernel.org/r/20250501044325.20365-1-fvdl@google.com Fixes: d2d786714080 ("mm/hugetlb: enable bootmem allocation from CMA areas") Signed-off-by: Frank van der Linden Reviewed-by: Anshuman Khandual Reviewed-by: Oscar Salvador Reviewed-by: Jane Chu Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/hugetlb.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e3e6ac991b9c6b..6ea1be71aa429e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4034,10 +4034,13 @@ static long demote_free_hugetlb_folios(struct hstate *src, struct hstate *dst, list_for_each_entry_safe(folio, next, src_list, lru) { int i; + bool cma; if (folio_test_hugetlb_vmemmap_optimized(folio)) continue; + cma = folio_test_hugetlb_cma(folio); + list_del(&folio->lru); split_page_owner(&folio->page, huge_page_order(src), huge_page_order(dst)); @@ -4053,6 +4056,9 @@ static long demote_free_hugetlb_folios(struct hstate *src, struct hstate *dst, new_folio->mapping = NULL; init_new_hugetlb_folio(dst, new_folio); + /* Copy the CMA flag so that it is freed correctly */ + if (cma) + folio_set_hugetlb_cma(new_folio); list_add(&new_folio->lru, &dst_list); } } From fb881cd7604536b17a1927fb0533f9a6982ffcc5 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sat, 3 May 2025 14:33:14 +0900 Subject: [PATCH 1600/2924] nilfs2: fix deadlock warnings caused by lock dependency in init_nilfs() After commit c0e473a0d226 ("block: fix race between set_blocksize and read paths") was merged, set_blocksize() called by sb_set_blocksize() now locks the inode of the backing device file. As a result of this change, syzbot started reporting deadlock warnings due to a circular dependency involving the semaphore "ns_sem" of the nilfs object, the inode lock of the backing device file, and the locks that this inode lock is transitively dependent on. This is caused by a new lock dependency added by the above change, since init_nilfs() calls sb_set_blocksize() in the lock section of "ns_sem". However, these warnings are false positives because init_nilfs() is called in the early stage of the mount operation and the filesystem has not yet started. The reason why "ns_sem" is locked in init_nilfs() was to avoid a race condition in nilfs_fill_super() caused by sharing a nilfs object among multiple filesystem instances (super block structures) in the early implementation. However, nilfs objects and super block structures have long ago become one-to-one, and there is no longer any need to use the semaphore there. So, fix this issue by removing the use of the semaphore "ns_sem" in init_nilfs(). Link: https://lkml.kernel.org/r/20250503053327.12294-1-konishi.ryusuke@gmail.com Fixes: c0e473a0d226 ("block: fix race between set_blocksize and read paths") Signed-off-by: Ryusuke Konishi Reported-by: syzbot+00f7f5b884b117ee6773@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=00f7f5b884b117ee6773 Tested-by: syzbot+00f7f5b884b117ee6773@syzkaller.appspotmail.com Reported-by: syzbot+f30591e72bfc24d4715b@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=f30591e72bfc24d4715b Tested-by: syzbot+f30591e72bfc24d4715b@syzkaller.appspotmail.com> Signed-off-by: Andrew Morton --- fs/nilfs2/the_nilfs.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index cb01ea81724dca..d0bcf744c553a0 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -705,8 +705,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb) int blocksize; int err; - down_write(&nilfs->ns_sem); - blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE); if (!blocksize) { nilfs_err(sb, "unable to set blocksize"); @@ -779,7 +777,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb) set_nilfs_init(nilfs); err = 0; out: - up_write(&nilfs->ns_sem); return err; failed_sbh: From 7b08b74f3d99f6b801250683c751d391128799ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Van=C4=9Bk?= Date: Fri, 2 May 2025 23:50:19 +0200 Subject: [PATCH 1601/2924] mm: fix folio_pte_batch() on XEN PV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On XEN PV, folio_pte_batch() can incorrectly batch beyond the end of a folio due to a corner case in pte_advance_pfn(). Specifically, when the PFN following the folio maps to an invalidated MFN, expected_pte = pte_advance_pfn(expected_pte, nr); produces a pte_none(). If the actual next PTE in memory is also pte_none(), the pte_same() succeeds, if (!pte_same(pte, expected_pte)) break; the loop is not broken, and batching continues into unrelated memory. For example, with a 4-page folio, the PTE layout might look like this: [ 53.465673] [ T2552] folio_pte_batch: printing PTE values at addr=0x7f1ac9dc5000 [ 53.465674] [ T2552] PTE[453] = 000000010085c125 [ 53.465679] [ T2552] PTE[454] = 000000010085d125 [ 53.465682] [ T2552] PTE[455] = 000000010085e125 [ 53.465684] [ T2552] PTE[456] = 000000010085f125 [ 53.465686] [ T2552] PTE[457] = 0000000000000000 <-- not present [ 53.465689] [ T2552] PTE[458] = 0000000101da7125 pte_advance_pfn(PTE[456]) returns a pte_none() due to invalid PFN->MFN mapping. The next actual PTE (PTE[457]) is also pte_none(), so the loop continues and includes PTE[457] in the batch, resulting in 5 batched entries for a 4-page folio. This triggers the following warning: [ 53.465751] [ T2552] page: refcount:85 mapcount:20 mapping:ffff88813ff4f6a8 index:0x110 pfn:0x10085c [ 53.465754] [ T2552] head: order:2 mapcount:80 entire_mapcount:0 nr_pages_mapped:4 pincount:0 [ 53.465756] [ T2552] memcg:ffff888003573000 [ 53.465758] [ T2552] aops:0xffffffff8226fd20 ino:82467c dentry name(?):"libc.so.6" [ 53.465761] [ T2552] flags: 0x2000000000416c(referenced|uptodate|lru|active|private|head|node=0|zone=2) [ 53.465764] [ T2552] raw: 002000000000416c ffffea0004021f08 ffffea0004021908 ffff88813ff4f6a8 [ 53.465767] [ T2552] raw: 0000000000000110 ffff888133d8bd40 0000005500000013 ffff888003573000 [ 53.465768] [ T2552] head: 002000000000416c ffffea0004021f08 ffffea0004021908 ffff88813ff4f6a8 [ 53.465770] [ T2552] head: 0000000000000110 ffff888133d8bd40 0000005500000013 ffff888003573000 [ 53.465772] [ T2552] head: 0020000000000202 ffffea0004021701 000000040000004f 00000000ffffffff [ 53.465774] [ T2552] head: 0000000300000003 8000000300000002 0000000000000013 0000000000000004 [ 53.465775] [ T2552] page dumped because: VM_WARN_ON_FOLIO((_Generic((page + nr_pages - 1), const struct page *: (const struct folio *)_compound_head(page + nr_pages - 1), struct page *: (struct folio *)_compound_head(page + nr_pages - 1))) != folio) Original code works as expected everywhere, except on XEN PV, where pte_advance_pfn() can yield a pte_none() after balloon inflation due to MFNs invalidation. In XEN, pte_advance_pfn() ends up calling __pte()->xen_make_pte()->pte_pfn_to_mfn(), which returns pte_none() when mfn == INVALID_P2M_ENTRY. The pte_pfn_to_mfn() documents that nastiness: If there's no mfn for the pfn, then just create an empty non-present pte. Unfortunately this loses information about the original pfn, so pte_mfn_to_pfn is asymmetric. While such hacks should certainly be removed, we can do better in folio_pte_batch() and simply check ahead of time how many PTEs we can possibly batch in our folio. This way, we can not only fix the issue but cleanup the code: removing the pte_pfn() check inside the loop body and avoiding end_ptr comparison + arithmetic. Link: https://lkml.kernel.org/r/20250502215019.822-2-arkamar@atlas.cz Fixes: f8d937761d65 ("mm/memory: optimize fork() with PTE-mapped THP") Co-developed-by: David Hildenbrand Signed-off-by: David Hildenbrand Signed-off-by: Petr Vaněk Cc: Ryan Roberts Cc: Signed-off-by: Andrew Morton --- mm/internal.h | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index e9695baa592266..25a29872c634ba 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -248,11 +248,9 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr, pte_t *start_ptep, pte_t pte, int max_nr, fpb_t flags, bool *any_writable, bool *any_young, bool *any_dirty) { - unsigned long folio_end_pfn = folio_pfn(folio) + folio_nr_pages(folio); - const pte_t *end_ptep = start_ptep + max_nr; pte_t expected_pte, *ptep; bool writable, young, dirty; - int nr; + int nr, cur_nr; if (any_writable) *any_writable = false; @@ -265,11 +263,15 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr, VM_WARN_ON_FOLIO(!folio_test_large(folio) || max_nr < 1, folio); VM_WARN_ON_FOLIO(page_folio(pfn_to_page(pte_pfn(pte))) != folio, folio); + /* Limit max_nr to the actual remaining PFNs in the folio we could batch. */ + max_nr = min_t(unsigned long, max_nr, + folio_pfn(folio) + folio_nr_pages(folio) - pte_pfn(pte)); + nr = pte_batch_hint(start_ptep, pte); expected_pte = __pte_batch_clear_ignored(pte_advance_pfn(pte, nr), flags); ptep = start_ptep + nr; - while (ptep < end_ptep) { + while (nr < max_nr) { pte = ptep_get(ptep); if (any_writable) writable = !!pte_write(pte); @@ -282,14 +284,6 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr, if (!pte_same(pte, expected_pte)) break; - /* - * Stop immediately once we reached the end of the folio. In - * corner cases the next PFN might fall into a different - * folio. - */ - if (pte_pfn(pte) >= folio_end_pfn) - break; - if (any_writable) *any_writable |= writable; if (any_young) @@ -297,12 +291,13 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr, if (any_dirty) *any_dirty |= dirty; - nr = pte_batch_hint(ptep, pte); - expected_pte = pte_advance_pfn(expected_pte, nr); - ptep += nr; + cur_nr = pte_batch_hint(ptep, pte); + expected_pte = pte_advance_pfn(expected_pte, cur_nr); + ptep += cur_nr; + nr += cur_nr; } - return min(ptep - start_ptep, max_nr); + return min(nr, max_nr); } /** From f34343cc11afc7bb1f881c3492bee3484016bf71 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 6 May 2025 08:59:39 -0700 Subject: [PATCH 1602/2924] fbnic: Fix initialization of mailbox descriptor rings Address to issues with the FW mailbox descriptor initialization. We need to reverse the order of accesses when we invalidate an entry versus writing an entry. When writing an entry we write upper and then lower as the lower 32b contain the valid bit that makes the entire address valid. However for invalidation we should write it in the reverse order so that the upper is marked invalid before we update it. Without this change we may see FW attempt to access pages with the upper 32b of the address set to 0 which will likely result in DMAR faults due to write access failures on mailbox shutdown. Fixes: da3cde08209e ("eth: fbnic: Add FW communication mechanism") Signed-off-by: Alexander Duyck Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Link: https://patch.msgid.link/174654717972.499179.8083789731819297034.stgit@ahduyck-xeon-server.home.arpa Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 32 ++++++++++++++++------ 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index 88db3dacb94059..c4956f0a741e8b 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -17,11 +17,29 @@ static void __fbnic_mbx_wr_desc(struct fbnic_dev *fbd, int mbx_idx, { u32 desc_offset = FBNIC_IPC_MBX(mbx_idx, desc_idx); + /* Write the upper 32b and then the lower 32b. Doing this the + * FW can then read lower, upper, lower to verify that the state + * of the descriptor wasn't changed mid-transaction. + */ fw_wr32(fbd, desc_offset + 1, upper_32_bits(desc)); fw_wrfl(fbd); fw_wr32(fbd, desc_offset, lower_32_bits(desc)); } +static void __fbnic_mbx_invalidate_desc(struct fbnic_dev *fbd, int mbx_idx, + int desc_idx, u32 desc) +{ + u32 desc_offset = FBNIC_IPC_MBX(mbx_idx, desc_idx); + + /* For initialization we write the lower 32b of the descriptor first. + * This way we can set the state to mark it invalid before we clear the + * upper 32b. + */ + fw_wr32(fbd, desc_offset, desc); + fw_wrfl(fbd); + fw_wr32(fbd, desc_offset + 1, 0); +} + static u64 __fbnic_mbx_rd_desc(struct fbnic_dev *fbd, int mbx_idx, int desc_idx) { u32 desc_offset = FBNIC_IPC_MBX(mbx_idx, desc_idx); @@ -41,21 +59,17 @@ static void fbnic_mbx_init_desc_ring(struct fbnic_dev *fbd, int mbx_idx) * solid stop for the firmware to hit when it is done looping * through the ring. */ - __fbnic_mbx_wr_desc(fbd, mbx_idx, 0, 0); - - fw_wrfl(fbd); + __fbnic_mbx_invalidate_desc(fbd, mbx_idx, 0, 0); /* We then fill the rest of the ring starting at the end and moving * back toward descriptor 0 with skip descriptors that have no * length nor address, and tell the firmware that they can skip * them and just move past them to the one we initialized to 0. */ - for (desc_idx = FBNIC_IPC_MBX_DESC_LEN; --desc_idx;) { - __fbnic_mbx_wr_desc(fbd, mbx_idx, desc_idx, - FBNIC_IPC_MBX_DESC_FW_CMPL | - FBNIC_IPC_MBX_DESC_HOST_CMPL); - fw_wrfl(fbd); - } + for (desc_idx = FBNIC_IPC_MBX_DESC_LEN; --desc_idx;) + __fbnic_mbx_invalidate_desc(fbd, mbx_idx, desc_idx, + FBNIC_IPC_MBX_DESC_FW_CMPL | + FBNIC_IPC_MBX_DESC_HOST_CMPL); } void fbnic_mbx_init(struct fbnic_dev *fbd) From 3b12f00ddd08e888273b2ac0488d396d90a836fc Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 6 May 2025 08:59:46 -0700 Subject: [PATCH 1603/2924] fbnic: Gate AXI read/write enabling on FW mailbox In order to prevent the device from throwing spurious writes and/or reads at us we need to gate the AXI fabric interface to the PCIe until such time as we know the FW is in a known good state. To accomplish this we use the mailbox as a mechanism for us to recognize that the FW has acknowledged our presence and is no longer sending any stale message data to us. We start in fbnic_mbx_init by calling fbnic_mbx_reset_desc_ring function, disabling the DMA in both directions, and then invalidating all the descriptors in each ring. We then poll the mailbox in fbnic_mbx_poll_tx_ready and when the interrupt is set by the FW we pick it up and mark the mailboxes as ready, while also enabling the DMA. Once we have completed all the transactions and need to shut down we call into fbnic_mbx_clean which will in turn call fbnic_mbx_reset_desc_ring for each ring and shut down the DMA and once again invalidate the descriptors. Fixes: 3646153161f1 ("eth: fbnic: Add register init to set PCIe/Ethernet device config") Fixes: da3cde08209e ("eth: fbnic: Add FW communication mechanism") Signed-off-by: Alexander Duyck Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Link: https://patch.msgid.link/174654718623.499179.7445197308109347982.stgit@ahduyck-xeon-server.home.arpa Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_csr.h | 2 ++ drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 38 +++++++++++++++++---- drivers/net/ethernet/meta/fbnic/fbnic_mac.c | 6 ---- 3 files changed, 33 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h index 3b12a0ab59065e..51bee8072420b7 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h @@ -796,8 +796,10 @@ enum { /* PUL User Registers */ #define FBNIC_CSR_START_PUL_USER 0x31000 /* CSR section delimiter */ #define FBNIC_PUL_OB_TLP_HDR_AW_CFG 0x3103d /* 0xc40f4 */ +#define FBNIC_PUL_OB_TLP_HDR_AW_CFG_FLUSH CSR_BIT(19) #define FBNIC_PUL_OB_TLP_HDR_AW_CFG_BME CSR_BIT(18) #define FBNIC_PUL_OB_TLP_HDR_AR_CFG 0x3103e /* 0xc40f8 */ +#define FBNIC_PUL_OB_TLP_HDR_AR_CFG_FLUSH CSR_BIT(19) #define FBNIC_PUL_OB_TLP_HDR_AR_CFG_BME CSR_BIT(18) #define FBNIC_PUL_USER_OB_RD_TLP_CNT_31_0 \ 0x3106e /* 0xc41b8 */ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index c4956f0a741e8b..f4749bfd840c52 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -51,10 +51,26 @@ static u64 __fbnic_mbx_rd_desc(struct fbnic_dev *fbd, int mbx_idx, int desc_idx) return desc; } -static void fbnic_mbx_init_desc_ring(struct fbnic_dev *fbd, int mbx_idx) +static void fbnic_mbx_reset_desc_ring(struct fbnic_dev *fbd, int mbx_idx) { int desc_idx; + /* Disable DMA transactions from the device, + * and flush any transactions triggered during cleaning + */ + switch (mbx_idx) { + case FBNIC_IPC_MBX_RX_IDX: + wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AW_CFG, + FBNIC_PUL_OB_TLP_HDR_AW_CFG_FLUSH); + break; + case FBNIC_IPC_MBX_TX_IDX: + wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AR_CFG, + FBNIC_PUL_OB_TLP_HDR_AR_CFG_FLUSH); + break; + } + + wrfl(fbd); + /* Initialize first descriptor to all 0s. Doing this gives us a * solid stop for the firmware to hit when it is done looping * through the ring. @@ -90,7 +106,7 @@ void fbnic_mbx_init(struct fbnic_dev *fbd) wr32(fbd, FBNIC_INTR_CLEAR(0), 1u << FBNIC_FW_MSIX_ENTRY); for (i = 0; i < FBNIC_IPC_MBX_INDICES; i++) - fbnic_mbx_init_desc_ring(fbd, i); + fbnic_mbx_reset_desc_ring(fbd, i); } static int fbnic_mbx_map_msg(struct fbnic_dev *fbd, int mbx_idx, @@ -155,7 +171,7 @@ static void fbnic_mbx_clean_desc_ring(struct fbnic_dev *fbd, int mbx_idx) { int i; - fbnic_mbx_init_desc_ring(fbd, mbx_idx); + fbnic_mbx_reset_desc_ring(fbd, mbx_idx); for (i = FBNIC_IPC_MBX_DESC_LEN; i--;) fbnic_mbx_unmap_and_free_msg(fbd, mbx_idx, i); @@ -354,7 +370,7 @@ static int fbnic_fw_xmit_cap_msg(struct fbnic_dev *fbd) return (err == -EOPNOTSUPP) ? 0 : err; } -static void fbnic_mbx_postinit_desc_ring(struct fbnic_dev *fbd, int mbx_idx) +static void fbnic_mbx_init_desc_ring(struct fbnic_dev *fbd, int mbx_idx) { struct fbnic_fw_mbx *mbx = &fbd->mbx[mbx_idx]; @@ -366,10 +382,18 @@ static void fbnic_mbx_postinit_desc_ring(struct fbnic_dev *fbd, int mbx_idx) switch (mbx_idx) { case FBNIC_IPC_MBX_RX_IDX: + /* Enable DMA writes from the device */ + wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AW_CFG, + FBNIC_PUL_OB_TLP_HDR_AW_CFG_BME); + /* Make sure we have a page for the FW to write to */ fbnic_mbx_alloc_rx_msgs(fbd); break; case FBNIC_IPC_MBX_TX_IDX: + /* Enable DMA reads from the device */ + wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AR_CFG, + FBNIC_PUL_OB_TLP_HDR_AR_CFG_BME); + /* Force version to 1 if we successfully requested an update * from the firmware. This should be overwritten once we get * the actual version from the firmware in the capabilities @@ -386,7 +410,7 @@ static void fbnic_mbx_postinit(struct fbnic_dev *fbd) { int i; - /* We only need to do this on the first interrupt following init. + /* We only need to do this on the first interrupt following reset. * this primes the mailbox so that we will have cleared all the * skip descriptors. */ @@ -396,7 +420,7 @@ static void fbnic_mbx_postinit(struct fbnic_dev *fbd) wr32(fbd, FBNIC_INTR_CLEAR(0), 1u << FBNIC_FW_MSIX_ENTRY); for (i = 0; i < FBNIC_IPC_MBX_INDICES; i++) - fbnic_mbx_postinit_desc_ring(fbd, i); + fbnic_mbx_init_desc_ring(fbd, i); } /** @@ -894,7 +918,7 @@ int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd) * avoid the mailbox getting stuck closed if the interrupt * is reset. */ - fbnic_mbx_init_desc_ring(fbd, FBNIC_IPC_MBX_TX_IDX); + fbnic_mbx_reset_desc_ring(fbd, FBNIC_IPC_MBX_TX_IDX); msleep(200); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c index 14291401f46321..dde4a37116e20e 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c @@ -79,12 +79,6 @@ static void fbnic_mac_init_axi(struct fbnic_dev *fbd) fbnic_init_readrq(fbd, FBNIC_QM_RNI_RBP_CTL, cls, readrq); fbnic_init_mps(fbd, FBNIC_QM_RNI_RDE_CTL, cls, mps); fbnic_init_mps(fbd, FBNIC_QM_RNI_RCM_CTL, cls, mps); - - /* Enable XALI AR/AW outbound */ - wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AW_CFG, - FBNIC_PUL_OB_TLP_HDR_AW_CFG_BME); - wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AR_CFG, - FBNIC_PUL_OB_TLP_HDR_AR_CFG_BME); } static void fbnic_mac_init_qm(struct fbnic_dev *fbd) From 682a61281d1036962b68d651994cc407371daef5 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 6 May 2025 08:59:52 -0700 Subject: [PATCH 1604/2924] fbnic: Add additional handling of IRQs We have two issues that need to be addressed in our IRQ handling. One is the fact that we can end up double-freeing IRQs in the event of an exception handling error such as a PCIe reset/recovery that fails. To prevent that from becoming an issue we can use the msix_vector values to indicate that we have successfully requested/freed the IRQ by only setting or clearing them when we have completed the given action. The other issue is that we have several potential races in our IRQ path due to us manipulating the mask before the vector has been truly disabled. In order to handle that in the case of the FW mailbox we need to not auto-enable the IRQ and instead will be enabling/disabling it separately. In the case of the PCS vector we can mitigate this by unmapping it and synchronizing the IRQ before we clear the mask. The general order of operations after this change is now to request the interrupt, poll the FW mailbox to ready, and then enable the interrupt. For the shutdown we do the reverse where we disable the interrupt, flush any pending Tx, and then free the IRQ. I am renaming the enable/disable to request/free to be equivilent with the IRQ calls being used. We may see additions in the future to enable/disable the IRQs versus request/free them for certain use cases. Fixes: da3cde08209e ("eth: fbnic: Add FW communication mechanism") Fixes: 69684376eed5 ("eth: fbnic: Add link detection") Signed-off-by: Alexander Duyck Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Link: https://patch.msgid.link/174654719271.499179.3634535105127848325.stgit@ahduyck-xeon-server.home.arpa Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic.h | 8 +- drivers/net/ethernet/meta/fbnic/fbnic_irq.c | 142 ++++++++++++------ .../net/ethernet/meta/fbnic/fbnic_netdev.c | 5 +- drivers/net/ethernet/meta/fbnic/fbnic_pci.c | 14 +- 4 files changed, 110 insertions(+), 59 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h index 4ca7b99ef13153..de6b1a340f5580 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic.h @@ -154,14 +154,14 @@ struct fbnic_dev *fbnic_devlink_alloc(struct pci_dev *pdev); void fbnic_devlink_register(struct fbnic_dev *fbd); void fbnic_devlink_unregister(struct fbnic_dev *fbd); -int fbnic_fw_enable_mbx(struct fbnic_dev *fbd); -void fbnic_fw_disable_mbx(struct fbnic_dev *fbd); +int fbnic_fw_request_mbx(struct fbnic_dev *fbd); +void fbnic_fw_free_mbx(struct fbnic_dev *fbd); void fbnic_hwmon_register(struct fbnic_dev *fbd); void fbnic_hwmon_unregister(struct fbnic_dev *fbd); -int fbnic_pcs_irq_enable(struct fbnic_dev *fbd); -void fbnic_pcs_irq_disable(struct fbnic_dev *fbd); +int fbnic_pcs_request_irq(struct fbnic_dev *fbd); +void fbnic_pcs_free_irq(struct fbnic_dev *fbd); void fbnic_napi_name_irqs(struct fbnic_dev *fbd); int fbnic_napi_request_irq(struct fbnic_dev *fbd, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_irq.c b/drivers/net/ethernet/meta/fbnic/fbnic_irq.c index 1bbc0e56f3a039..1c88a2bf3a7a78 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_irq.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_irq.c @@ -19,69 +19,105 @@ static irqreturn_t fbnic_fw_msix_intr(int __always_unused irq, void *data) return IRQ_HANDLED; } +static int __fbnic_fw_enable_mbx(struct fbnic_dev *fbd, int vector) +{ + int err; + + /* Initialize mailbox and attempt to poll it into ready state */ + fbnic_mbx_init(fbd); + err = fbnic_mbx_poll_tx_ready(fbd); + if (err) { + dev_warn(fbd->dev, "FW mailbox did not enter ready state\n"); + return err; + } + + /* Enable interrupt and unmask the vector */ + enable_irq(vector); + fbnic_wr32(fbd, FBNIC_INTR_MASK_CLEAR(0), 1u << FBNIC_FW_MSIX_ENTRY); + + return 0; +} + /** - * fbnic_fw_enable_mbx - Configure and initialize Firmware Mailbox + * fbnic_fw_request_mbx - Configure and initialize Firmware Mailbox * @fbd: Pointer to device to initialize * - * This function will initialize the firmware mailbox rings, enable the IRQ - * and initialize the communication between the Firmware and the host. The - * firmware is expected to respond to the initialization by sending an - * interrupt essentially notifying the host that it has seen the - * initialization and is now synced up. + * This function will allocate the IRQ and then reinitialize the mailbox + * starting communication between the host and firmware. * * Return: non-zero on failure. **/ -int fbnic_fw_enable_mbx(struct fbnic_dev *fbd) +int fbnic_fw_request_mbx(struct fbnic_dev *fbd) { - u32 vector = fbd->fw_msix_vector; - int err; + struct pci_dev *pdev = to_pci_dev(fbd->dev); + int vector, err; + + WARN_ON(fbd->fw_msix_vector); + + vector = pci_irq_vector(pdev, FBNIC_FW_MSIX_ENTRY); + if (vector < 0) + return vector; /* Request the IRQ for FW Mailbox vector. */ err = request_threaded_irq(vector, NULL, &fbnic_fw_msix_intr, - IRQF_ONESHOT, dev_name(fbd->dev), fbd); + IRQF_ONESHOT | IRQF_NO_AUTOEN, + dev_name(fbd->dev), fbd); if (err) return err; /* Initialize mailbox and attempt to poll it into ready state */ - fbnic_mbx_init(fbd); - err = fbnic_mbx_poll_tx_ready(fbd); - if (err) { - dev_warn(fbd->dev, "FW mailbox did not enter ready state\n"); + err = __fbnic_fw_enable_mbx(fbd, vector); + if (err) free_irq(vector, fbd); - return err; - } - /* Enable interrupts */ - fbnic_wr32(fbd, FBNIC_INTR_MASK_CLEAR(0), 1u << FBNIC_FW_MSIX_ENTRY); + fbd->fw_msix_vector = vector; - return 0; + return err; } /** - * fbnic_fw_disable_mbx - Disable mailbox and place it in standby state - * @fbd: Pointer to device to disable + * fbnic_fw_disable_mbx - Temporarily place mailbox in standby state + * @fbd: Pointer to device * - * This function will disable the mailbox interrupt, free any messages still - * in the mailbox and place it into a standby state. The firmware is - * expected to see the update and assume that the host is in the reset state. + * Shutdown the mailbox by notifying the firmware to stop sending us logs, mask + * and synchronize the IRQ, and then clean up the rings. **/ -void fbnic_fw_disable_mbx(struct fbnic_dev *fbd) +static void fbnic_fw_disable_mbx(struct fbnic_dev *fbd) { - /* Disable interrupt and free vector */ - fbnic_wr32(fbd, FBNIC_INTR_MASK_SET(0), 1u << FBNIC_FW_MSIX_ENTRY); + /* Disable interrupt and synchronize the IRQ */ + disable_irq(fbd->fw_msix_vector); - /* Free the vector */ - free_irq(fbd->fw_msix_vector, fbd); + /* Mask the vector */ + fbnic_wr32(fbd, FBNIC_INTR_MASK_SET(0), 1u << FBNIC_FW_MSIX_ENTRY); /* Make sure disabling logs message is sent, must be done here to * avoid risk of completing without a running interrupt. */ fbnic_mbx_flush_tx(fbd); - - /* Reset the mailboxes to the initialized state */ fbnic_mbx_clean(fbd); } +/** + * fbnic_fw_free_mbx - Disable mailbox and place it in standby state + * @fbd: Pointer to device to disable + * + * This function will disable the mailbox interrupt, free any messages still + * in the mailbox and place it into a disabled state. The firmware is + * expected to see the update and assume that the host is in the reset state. + **/ +void fbnic_fw_free_mbx(struct fbnic_dev *fbd) +{ + /* Vector has already been freed */ + if (!fbd->fw_msix_vector) + return; + + fbnic_fw_disable_mbx(fbd); + + /* Free the vector */ + free_irq(fbd->fw_msix_vector, fbd); + fbd->fw_msix_vector = 0; +} + static irqreturn_t fbnic_pcs_msix_intr(int __always_unused irq, void *data) { struct fbnic_dev *fbd = data; @@ -101,7 +137,7 @@ static irqreturn_t fbnic_pcs_msix_intr(int __always_unused irq, void *data) } /** - * fbnic_pcs_irq_enable - Configure the MAC to enable it to advertise link + * fbnic_pcs_request_irq - Configure the PCS to enable it to advertise link * @fbd: Pointer to device to initialize * * This function provides basic bringup for the MAC/PCS IRQ. For now the IRQ @@ -109,41 +145,61 @@ static irqreturn_t fbnic_pcs_msix_intr(int __always_unused irq, void *data) * * Return: non-zero on failure. **/ -int fbnic_pcs_irq_enable(struct fbnic_dev *fbd) +int fbnic_pcs_request_irq(struct fbnic_dev *fbd) { - u32 vector = fbd->pcs_msix_vector; - int err; + struct pci_dev *pdev = to_pci_dev(fbd->dev); + int vector, err; - /* Request the IRQ for MAC link vector. - * Map MAC cause to it, and unmask it + WARN_ON(fbd->pcs_msix_vector); + + vector = pci_irq_vector(pdev, FBNIC_PCS_MSIX_ENTRY); + if (vector < 0) + return vector; + + /* Request the IRQ for PCS link vector. + * Map PCS cause to it, and unmask it */ err = request_irq(vector, &fbnic_pcs_msix_intr, 0, fbd->netdev->name, fbd); if (err) return err; + /* Map and enable interrupt, unmask vector after link is configured */ fbnic_wr32(fbd, FBNIC_INTR_MSIX_CTRL(FBNIC_INTR_MSIX_CTRL_PCS_IDX), FBNIC_PCS_MSIX_ENTRY | FBNIC_INTR_MSIX_CTRL_ENABLE); + fbd->pcs_msix_vector = vector; + return 0; } /** - * fbnic_pcs_irq_disable - Teardown the MAC IRQ to prepare for stopping + * fbnic_pcs_free_irq - Teardown the PCS IRQ to prepare for stopping * @fbd: Pointer to device that is stopping * - * This function undoes the work done in fbnic_pcs_irq_enable and prepares + * This function undoes the work done in fbnic_pcs_request_irq and prepares * the device to no longer receive traffic on the host interface. **/ -void fbnic_pcs_irq_disable(struct fbnic_dev *fbd) +void fbnic_pcs_free_irq(struct fbnic_dev *fbd) { + /* Vector has already been freed */ + if (!fbd->pcs_msix_vector) + return; + /* Disable interrupt */ fbnic_wr32(fbd, FBNIC_INTR_MSIX_CTRL(FBNIC_INTR_MSIX_CTRL_PCS_IDX), FBNIC_PCS_MSIX_ENTRY); + fbnic_wrfl(fbd); + + /* Synchronize IRQ to prevent race that would unmask vector */ + synchronize_irq(fbd->pcs_msix_vector); + + /* Mask the vector */ fbnic_wr32(fbd, FBNIC_INTR_MASK_SET(0), 1u << FBNIC_PCS_MSIX_ENTRY); /* Free the vector */ free_irq(fbd->pcs_msix_vector, fbd); + fbd->pcs_msix_vector = 0; } void fbnic_synchronize_irq(struct fbnic_dev *fbd, int nr) @@ -226,9 +282,6 @@ void fbnic_free_irqs(struct fbnic_dev *fbd) { struct pci_dev *pdev = to_pci_dev(fbd->dev); - fbd->pcs_msix_vector = 0; - fbd->fw_msix_vector = 0; - fbd->num_irqs = 0; pci_free_irq_vectors(pdev); @@ -254,8 +307,5 @@ int fbnic_alloc_irqs(struct fbnic_dev *fbd) fbd->num_irqs = num_irqs; - fbd->pcs_msix_vector = pci_irq_vector(pdev, FBNIC_PCS_MSIX_ENTRY); - fbd->fw_msix_vector = pci_irq_vector(pdev, FBNIC_FW_MSIX_ENTRY); - return 0; } diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c index 79a01fdd1dd168..2524d9b88d591f 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c @@ -44,9 +44,10 @@ int __fbnic_open(struct fbnic_net *fbn) if (err) goto time_stop; - err = fbnic_pcs_irq_enable(fbd); + err = fbnic_pcs_request_irq(fbd); if (err) goto time_stop; + /* Pull the BMC config and initialize the RPC */ fbnic_bmc_rpc_init(fbd); fbnic_rss_reinit(fbd, fbn); @@ -82,7 +83,7 @@ static int fbnic_stop(struct net_device *netdev) struct fbnic_net *fbn = netdev_priv(netdev); fbnic_down(fbn); - fbnic_pcs_irq_disable(fbn->fbd); + fbnic_pcs_free_irq(fbn->fbd); fbnic_time_stop(fbn); fbnic_fw_xmit_ownership_msg(fbn->fbd, false); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c index 6cbbc2ee3e1f98..4e8595239c0faa 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c @@ -283,7 +283,7 @@ static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto free_irqs; } - err = fbnic_fw_enable_mbx(fbd); + err = fbnic_fw_request_mbx(fbd); if (err) { dev_err(&pdev->dev, "Firmware mailbox initialization failure\n"); @@ -363,7 +363,7 @@ static void fbnic_remove(struct pci_dev *pdev) fbnic_hwmon_unregister(fbd); fbnic_dbg_fbd_exit(fbd); fbnic_devlink_unregister(fbd); - fbnic_fw_disable_mbx(fbd); + fbnic_fw_free_mbx(fbd); fbnic_free_irqs(fbd); fbnic_devlink_free(fbd); @@ -387,7 +387,7 @@ static int fbnic_pm_suspend(struct device *dev) rtnl_unlock(); null_uc_addr: - fbnic_fw_disable_mbx(fbd); + fbnic_fw_free_mbx(fbd); /* Free the IRQs so they aren't trying to occupy sleeping CPUs */ fbnic_free_irqs(fbd); @@ -420,7 +420,7 @@ static int __fbnic_pm_resume(struct device *dev) fbd->mac->init_regs(fbd); /* Re-enable mailbox */ - err = fbnic_fw_enable_mbx(fbd); + err = fbnic_fw_request_mbx(fbd); if (err) goto err_free_irqs; @@ -438,15 +438,15 @@ static int __fbnic_pm_resume(struct device *dev) if (netif_running(netdev)) { err = __fbnic_open(fbn); if (err) - goto err_disable_mbx; + goto err_free_mbx; } rtnl_unlock(); return 0; -err_disable_mbx: +err_free_mbx: rtnl_unlock(); - fbnic_fw_disable_mbx(fbd); + fbnic_fw_free_mbx(fbd); err_free_irqs: fbnic_free_irqs(fbd); err_invalidate_uc_addr: From 0f9a959a0addd9bbc47e5d16c36b3a7f97981915 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 6 May 2025 08:59:59 -0700 Subject: [PATCH 1605/2924] fbnic: Actually flush_tx instead of stalling out The fbnic_mbx_flush_tx function had a number of issues. First, we were waiting 200ms for the firmware to process the packets. We can drop this to 20ms and in almost all cases this should be more than enough time. So by changing this we can significantly reduce shutdown time. Second, we were not making sure that the Tx path was actually shut off. As such we could still have packets added while we were flushing the mailbox. To prevent that we can now clear the ready flag for the Tx side and it should stay down since the interrupt is disabled. Third, we kept re-reading the tail due to the second issue. The tail should not move after we have started the flush so we can just read it once while we are holding the mailbox Tx lock. By doing that we are guaranteed that the value should be consistent. Fourth, we were keeping a count of descriptors cleaned due to the second and third issues called out. That count is not a valid reason to be exiting the cleanup, and with the tail only being read once we shouldn't see any cases where the tail moves after the disable so the tracking of count can be dropped. Fifth, we were using attempts * sleep time to determine how long we would wait in our polling loop to flush out the Tx. This can be very imprecise. In order to tighten up the timing we are shifting over to using a jiffies value of jiffies + 10 * HZ + 1 to determine the jiffies value we should stop polling at as this should be accurate within once sleep cycle for the total amount of time spent polling. Fixes: da3cde08209e ("eth: fbnic: Add FW communication mechanism") Signed-off-by: Alexander Duyck Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Link: https://patch.msgid.link/174654719929.499179.16406653096197423749.stgit@ahduyck-xeon-server.home.arpa Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 31 +++++++++++----------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index f4749bfd840c52..d019191d6ae97e 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -930,35 +930,36 @@ int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd) void fbnic_mbx_flush_tx(struct fbnic_dev *fbd) { + unsigned long timeout = jiffies + 10 * HZ + 1; struct fbnic_fw_mbx *tx_mbx; - int attempts = 50; - u8 count = 0; - - /* Nothing to do if there is no mailbox */ - if (!fbnic_fw_present(fbd)) - return; + u8 tail; /* Record current Rx stats */ tx_mbx = &fbd->mbx[FBNIC_IPC_MBX_TX_IDX]; - /* Nothing to do if mailbox never got to ready */ - if (!tx_mbx->ready) - return; + spin_lock_irq(&fbd->fw_tx_lock); + + /* Clear ready to prevent any further attempts to transmit */ + tx_mbx->ready = false; + + /* Read tail to determine the last tail state for the ring */ + tail = tx_mbx->tail; + + spin_unlock_irq(&fbd->fw_tx_lock); /* Give firmware time to process packet, - * we will wait up to 10 seconds which is 50 waits of 200ms. + * we will wait up to 10 seconds which is 500 waits of 20ms. */ do { u8 head = tx_mbx->head; - if (head == tx_mbx->tail) + /* Tx ring is empty once head == tail */ + if (head == tail) break; - msleep(200); + msleep(20); fbnic_mbx_process_tx_msgs(fbd); - - count += (tx_mbx->head - head) % FBNIC_IPC_MBX_DESC_LEN; - } while (count < FBNIC_IPC_MBX_DESC_LEN && --attempts); + } while (time_is_after_jiffies(timeout)); } void fbnic_get_fw_ver_commit_str(struct fbnic_dev *fbd, char *fw_version, From cdbb2dc3996a60ed3d7431c1239a8ca98c778e04 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 6 May 2025 09:00:05 -0700 Subject: [PATCH 1606/2924] fbnic: Cleanup handling of completions There was an issue in that if we were to shutdown we could be left with a completion in flight as the mailbox went away. To address that I have added an fbnic_mbx_evict_all_cmpl function that is meant to essentially create a "broken pipe" type response so that all callers will receive an error indicating that the connection has been broken as a result of us shutting down the mailbox. Fixes: 378e5cc1c6c6 ("eth: fbnic: hwmon: Add completion infrastructure for firmware requests") Signed-off-by: Alexander Duyck Reviewed-by: Jacob Keller Link: https://patch.msgid.link/174654720578.499179.380252598204530873.stgit@ahduyck-xeon-server.home.arpa Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index d019191d6ae97e..732875aae46cd6 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -928,6 +928,20 @@ int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd) return attempts ? 0 : -ETIMEDOUT; } +static void __fbnic_fw_evict_cmpl(struct fbnic_fw_completion *cmpl_data) +{ + cmpl_data->result = -EPIPE; + complete(&cmpl_data->done); +} + +static void fbnic_mbx_evict_all_cmpl(struct fbnic_dev *fbd) +{ + if (fbd->cmpl_data) { + __fbnic_fw_evict_cmpl(fbd->cmpl_data); + fbd->cmpl_data = NULL; + } +} + void fbnic_mbx_flush_tx(struct fbnic_dev *fbd) { unsigned long timeout = jiffies + 10 * HZ + 1; @@ -945,6 +959,9 @@ void fbnic_mbx_flush_tx(struct fbnic_dev *fbd) /* Read tail to determine the last tail state for the ring */ tail = tx_mbx->tail; + /* Flush any completions as we are no longer processing Rx */ + fbnic_mbx_evict_all_cmpl(fbd); + spin_unlock_irq(&fbd->fw_tx_lock); /* Give firmware time to process packet, From ab064f6005973d456f95ae99cd9ea0d8ab676cce Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 6 May 2025 09:00:12 -0700 Subject: [PATCH 1607/2924] fbnic: Improve responsiveness of fbnic_mbx_poll_tx_ready There were a couple different issues found in fbnic_mbx_poll_tx_ready. Among them were the fact that we were sleeping much longer than we actually needed to as the actual FW could respond in under 20ms. The other issue was that we would just keep polling the mailbox even if the device itself had gone away. To address the responsiveness issues we can decrease the sleeps to 20ms and use a jiffies based timeout value rather than just counting the number of times we slept and then polled. To address the hardware going away we can move the check for the firmware BAR being present from where it was and place it inside the loop after the mailbox descriptor ring is initialized and before we sleep so that we just abort and return an error if the device went away during initialization. With these two changes we see a significant improvement in boot times for the driver. Fixes: da3cde08209e ("eth: fbnic: Add FW communication mechanism") Signed-off-by: Alexander Duyck Reviewed-by: Jacob Keller Link: https://patch.msgid.link/174654721224.499179.2698616208976624755.stgit@ahduyck-xeon-server.home.arpa Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index 732875aae46cd6..d344b454f28ba7 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -905,27 +905,30 @@ void fbnic_mbx_poll(struct fbnic_dev *fbd) int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd) { + unsigned long timeout = jiffies + 10 * HZ + 1; struct fbnic_fw_mbx *tx_mbx; - int attempts = 50; - - /* Immediate fail if BAR4 isn't there */ - if (!fbnic_fw_present(fbd)) - return -ENODEV; tx_mbx = &fbd->mbx[FBNIC_IPC_MBX_TX_IDX]; - while (!tx_mbx->ready && --attempts) { + while (!tx_mbx->ready) { + if (!time_is_after_jiffies(timeout)) + return -ETIMEDOUT; + /* Force the firmware to trigger an interrupt response to * avoid the mailbox getting stuck closed if the interrupt * is reset. */ fbnic_mbx_reset_desc_ring(fbd, FBNIC_IPC_MBX_TX_IDX); - msleep(200); + /* Immediate fail if BAR4 went away */ + if (!fbnic_fw_present(fbd)) + return -ENODEV; + + msleep(20); fbnic_mbx_poll(fbd); } - return attempts ? 0 : -ETIMEDOUT; + return 0; } static void __fbnic_fw_evict_cmpl(struct fbnic_fw_completion *cmpl_data) From 1b34d1c1dc8384884febd83140c9afbc7c4b9eb8 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 6 May 2025 09:00:18 -0700 Subject: [PATCH 1608/2924] fbnic: Pull fbnic_fw_xmit_cap_msg use out of interrupt context This change pulls the call to fbnic_fw_xmit_cap_msg out of fbnic_mbx_init_desc_ring and instead places it in the polling function for getting the Tx ready. Doing that we can avoid the potential issue with an interrupt coming in later from the firmware that causes it to get fired in interrupt context. Fixes: 20d2e88cc746 ("eth: fbnic: Add initial messaging to notify FW of our presence") Signed-off-by: Alexander Duyck Reviewed-by: Jacob Keller Link: https://patch.msgid.link/174654721876.499179.9839651602256668493.stgit@ahduyck-xeon-server.home.arpa Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 43 ++++++++-------------- 1 file changed, 16 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index d344b454f28ba7..90a45c7015436f 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -352,24 +352,6 @@ static int fbnic_fw_xmit_simple_msg(struct fbnic_dev *fbd, u32 msg_type) return err; } -/** - * fbnic_fw_xmit_cap_msg - Allocate and populate a FW capabilities message - * @fbd: FBNIC device structure - * - * Return: NULL on failure to allocate, error pointer on error, or pointer - * to new TLV test message. - * - * Sends a single TLV header indicating the host wants the firmware to - * confirm the capabilities and version. - **/ -static int fbnic_fw_xmit_cap_msg(struct fbnic_dev *fbd) -{ - int err = fbnic_fw_xmit_simple_msg(fbd, FBNIC_TLV_MSG_ID_HOST_CAP_REQ); - - /* Return 0 if we are not calling this on ASIC */ - return (err == -EOPNOTSUPP) ? 0 : err; -} - static void fbnic_mbx_init_desc_ring(struct fbnic_dev *fbd, int mbx_idx) { struct fbnic_fw_mbx *mbx = &fbd->mbx[mbx_idx]; @@ -393,15 +375,6 @@ static void fbnic_mbx_init_desc_ring(struct fbnic_dev *fbd, int mbx_idx) /* Enable DMA reads from the device */ wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AR_CFG, FBNIC_PUL_OB_TLP_HDR_AR_CFG_BME); - - /* Force version to 1 if we successfully requested an update - * from the firmware. This should be overwritten once we get - * the actual version from the firmware in the capabilities - * request message. - */ - if (!fbnic_fw_xmit_cap_msg(fbd) && - !fbd->fw_cap.running.mgmt.version) - fbd->fw_cap.running.mgmt.version = 1; break; } } @@ -907,6 +880,7 @@ int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd) { unsigned long timeout = jiffies + 10 * HZ + 1; struct fbnic_fw_mbx *tx_mbx; + int err; tx_mbx = &fbd->mbx[FBNIC_IPC_MBX_TX_IDX]; while (!tx_mbx->ready) { @@ -928,7 +902,22 @@ int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd) fbnic_mbx_poll(fbd); } + /* Request an update from the firmware. This should overwrite + * mgmt.version once we get the actual version from the firmware + * in the capabilities request message. + */ + err = fbnic_fw_xmit_simple_msg(fbd, FBNIC_TLV_MSG_ID_HOST_CAP_REQ); + if (err) + goto clean_mbx; + + /* Use "1" to indicate we entered the state waiting for a response */ + fbd->fw_cap.running.mgmt.version = 1; + return 0; +clean_mbx: + /* Cleanup Rx buffers and disable mailbox */ + fbnic_mbx_clean(fbd); + return err; } static void __fbnic_fw_evict_cmpl(struct fbnic_fw_completion *cmpl_data) From ce2fa1dba204c761582674cf2eb9cbe0b949b5c7 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 6 May 2025 09:00:25 -0700 Subject: [PATCH 1609/2924] fbnic: Do not allow mailbox to toggle to ready outside fbnic_mbx_poll_tx_ready We had originally thought to have the mailbox go to ready in the background while we were doing other things. One issue with this though is that we can't disable it by clearing the ready state without also blocking interrupts or calls to mbx_poll as it will just pop back to life during an interrupt. In order to prevent that from happening we can pull the code for toggling to ready out of the interrupt path and instead place it in the fbnic_mbx_poll_tx_ready path so that it becomes the only spot where the Rx/Tx can toggle to the ready state. By doing this we can prevent races where we disable the DMA and/or free buffers only to have an interrupt fire and undo what we have done. Fixes: da3cde08209e ("eth: fbnic: Add FW communication mechanism") Signed-off-by: Alexander Duyck Reviewed-by: Jacob Keller Link: https://patch.msgid.link/174654722518.499179.11612865740376848478.stgit@ahduyck-xeon-server.home.arpa Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 27 ++++++++-------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index 90a45c7015436f..3d9636a6c968ec 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -356,10 +356,6 @@ static void fbnic_mbx_init_desc_ring(struct fbnic_dev *fbd, int mbx_idx) { struct fbnic_fw_mbx *mbx = &fbd->mbx[mbx_idx]; - /* This is a one time init, so just exit if it is completed */ - if (mbx->ready) - return; - mbx->ready = true; switch (mbx_idx) { @@ -379,21 +375,18 @@ static void fbnic_mbx_init_desc_ring(struct fbnic_dev *fbd, int mbx_idx) } } -static void fbnic_mbx_postinit(struct fbnic_dev *fbd) +static bool fbnic_mbx_event(struct fbnic_dev *fbd) { - int i; - /* We only need to do this on the first interrupt following reset. * this primes the mailbox so that we will have cleared all the * skip descriptors. */ if (!(rd32(fbd, FBNIC_INTR_STATUS(0)) & (1u << FBNIC_FW_MSIX_ENTRY))) - return; + return false; wr32(fbd, FBNIC_INTR_CLEAR(0), 1u << FBNIC_FW_MSIX_ENTRY); - for (i = 0; i < FBNIC_IPC_MBX_INDICES; i++) - fbnic_mbx_init_desc_ring(fbd, i); + return true; } /** @@ -870,7 +863,7 @@ static void fbnic_mbx_process_rx_msgs(struct fbnic_dev *fbd) void fbnic_mbx_poll(struct fbnic_dev *fbd) { - fbnic_mbx_postinit(fbd); + fbnic_mbx_event(fbd); fbnic_mbx_process_tx_msgs(fbd); fbnic_mbx_process_rx_msgs(fbd); @@ -879,11 +872,9 @@ void fbnic_mbx_poll(struct fbnic_dev *fbd) int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd) { unsigned long timeout = jiffies + 10 * HZ + 1; - struct fbnic_fw_mbx *tx_mbx; - int err; + int err, i; - tx_mbx = &fbd->mbx[FBNIC_IPC_MBX_TX_IDX]; - while (!tx_mbx->ready) { + do { if (!time_is_after_jiffies(timeout)) return -ETIMEDOUT; @@ -898,9 +889,11 @@ int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd) return -ENODEV; msleep(20); + } while (!fbnic_mbx_event(fbd)); - fbnic_mbx_poll(fbd); - } + /* FW has shown signs of life. Enable DMA and start Tx/Rx */ + for (i = 0; i < FBNIC_IPC_MBX_INDICES; i++) + fbnic_mbx_init_desc_ring(fbd, i); /* Request an update from the firmware. This should overwrite * mgmt.version once we get the actual version from the firmware From 23fa6a23d97182d36ca3c71e43c804fa91e46a03 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 May 2025 17:32:20 -0700 Subject: [PATCH 1610/2924] net: export a helper for adding up queue stats Older drivers and drivers with lower queue counts often have a static array of queues, rather than allocating structs for each queue on demand. Add a helper for adding up qstats from a queue range. Expectation is that driver will pass a queue range [netdev->real_num_*x_queues, MAX). It was tempting to always use num_*x_queues as the end, but virtio seems to clamp its queue count after allocating the netdev. And this way we can trivaly reuse the helper for [0, real_..). Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20250507003221.823267-2-kuba@kernel.org Signed-off-by: Paolo Abeni --- include/net/netdev_queues.h | 6 ++++ net/core/netdev-genl.c | 69 +++++++++++++++++++++++++++---------- 2 files changed, 56 insertions(+), 19 deletions(-) diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index 825141d675e583..d01c82983b4d6d 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -103,6 +103,12 @@ struct netdev_stat_ops { struct netdev_queue_stats_tx *tx); }; +void netdev_stat_queue_sum(struct net_device *netdev, + int rx_start, int rx_end, + struct netdev_queue_stats_rx *rx_sum, + int tx_start, int tx_end, + struct netdev_queue_stats_tx *tx_sum); + /** * struct netdev_queue_mgmt_ops - netdev ops for queue management * diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 230743bdbb1407..dae9f0d432fb34 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -708,25 +708,66 @@ netdev_nl_stats_by_queue(struct net_device *netdev, struct sk_buff *rsp, return 0; } +/** + * netdev_stat_queue_sum() - add up queue stats from range of queues + * @netdev: net_device + * @rx_start: index of the first Rx queue to query + * @rx_end: index after the last Rx queue (first *not* to query) + * @rx_sum: output Rx stats, should be already initialized + * @tx_start: index of the first Tx queue to query + * @tx_end: index after the last Tx queue (first *not* to query) + * @tx_sum: output Tx stats, should be already initialized + * + * Add stats from [start, end) range of queue IDs to *x_sum structs. + * The sum structs must be already initialized. Usually this + * helper is invoked from the .get_base_stats callbacks of drivers + * to account for stats of disabled queues. In that case the ranges + * are usually [netdev->real_num_*x_queues, netdev->num_*x_queues). + */ +void netdev_stat_queue_sum(struct net_device *netdev, + int rx_start, int rx_end, + struct netdev_queue_stats_rx *rx_sum, + int tx_start, int tx_end, + struct netdev_queue_stats_tx *tx_sum) +{ + const struct netdev_stat_ops *ops; + struct netdev_queue_stats_rx rx; + struct netdev_queue_stats_tx tx; + int i; + + ops = netdev->stat_ops; + + for (i = rx_start; i < rx_end; i++) { + memset(&rx, 0xff, sizeof(rx)); + if (ops->get_queue_stats_rx) + ops->get_queue_stats_rx(netdev, i, &rx); + netdev_nl_stats_add(rx_sum, &rx, sizeof(rx)); + } + for (i = tx_start; i < tx_end; i++) { + memset(&tx, 0xff, sizeof(tx)); + if (ops->get_queue_stats_tx) + ops->get_queue_stats_tx(netdev, i, &tx); + netdev_nl_stats_add(tx_sum, &tx, sizeof(tx)); + } +} +EXPORT_SYMBOL(netdev_stat_queue_sum); + static int netdev_nl_stats_by_netdev(struct net_device *netdev, struct sk_buff *rsp, const struct genl_info *info) { - struct netdev_queue_stats_rx rx_sum, rx; - struct netdev_queue_stats_tx tx_sum, tx; - const struct netdev_stat_ops *ops; + struct netdev_queue_stats_rx rx_sum; + struct netdev_queue_stats_tx tx_sum; void *hdr; - int i; - ops = netdev->stat_ops; /* Netdev can't guarantee any complete counters */ - if (!ops->get_base_stats) + if (!netdev->stat_ops->get_base_stats) return 0; memset(&rx_sum, 0xff, sizeof(rx_sum)); memset(&tx_sum, 0xff, sizeof(tx_sum)); - ops->get_base_stats(netdev, &rx_sum, &tx_sum); + netdev->stat_ops->get_base_stats(netdev, &rx_sum, &tx_sum); /* The op was there, but nothing reported, don't bother */ if (!memchr_inv(&rx_sum, 0xff, sizeof(rx_sum)) && @@ -739,18 +780,8 @@ netdev_nl_stats_by_netdev(struct net_device *netdev, struct sk_buff *rsp, if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex)) goto nla_put_failure; - for (i = 0; i < netdev->real_num_rx_queues; i++) { - memset(&rx, 0xff, sizeof(rx)); - if (ops->get_queue_stats_rx) - ops->get_queue_stats_rx(netdev, i, &rx); - netdev_nl_stats_add(&rx_sum, &rx, sizeof(rx)); - } - for (i = 0; i < netdev->real_num_tx_queues; i++) { - memset(&tx, 0xff, sizeof(tx)); - if (ops->get_queue_stats_tx) - ops->get_queue_stats_tx(netdev, i, &tx); - netdev_nl_stats_add(&tx_sum, &tx, sizeof(tx)); - } + netdev_stat_queue_sum(netdev, 0, netdev->real_num_rx_queues, &rx_sum, + 0, netdev->real_num_tx_queues, &tx_sum); if (netdev_nl_stats_write_rx(rsp, &rx_sum) || netdev_nl_stats_write_tx(rsp, &tx_sum)) From 001160ec8c59115efc39e197d40829bdafd4d7f5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 May 2025 17:32:21 -0700 Subject: [PATCH 1611/2924] virtio-net: fix total qstat values NIPA tests report that the interface statistics reported via qstat are lower than those reported via ip link. Looks like this is because some tests flip the queue count up and down, and we end up with some of the traffic accounted on disabled queues. Add up counters from disabled queues. Fixes: d888f04c09bb ("virtio-net: support queue stat") Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20250507003221.823267-3-kuba@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/virtio_net.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index f9e3e628ec4df9..e53ba600605a5d 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -5678,6 +5678,10 @@ static void virtnet_get_base_stats(struct net_device *dev, if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) tx->hw_drop_ratelimits = 0; + + netdev_stat_queue_sum(dev, + dev->real_num_rx_queues, vi->max_queue_pairs, rx, + dev->real_num_tx_queues, vi->max_queue_pairs, tx); } static const struct netdev_stat_ops virtnet_stat_ops = { From 10aba126bc86904e2f5afeaa26354e877a593c95 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Tue, 6 May 2025 01:15:11 +0200 Subject: [PATCH 1612/2924] MAINTAINERS: Remove entry for Seth Heasley Seth's mails bounce back, remove his maintainership. Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250505231511.3175151-1-andi.shyti@kernel.org --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 69511c3b2b76fb..70229ed0c55ad8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11235,7 +11235,6 @@ S: Maintained F: drivers/i2c/busses/i2c-cht-wc.c I2C/SMBUS ISMT DRIVER -M: Seth Heasley M: Neil Horman L: linux-i2c@vger.kernel.org F: Documentation/i2c/busses/i2c-ismt.rst From 0f5757667ec0aaf2456c3b76fcf0c6c3ea3591fe Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 8 May 2025 09:29:23 +0300 Subject: [PATCH 1613/2924] pmdomain: core: Fix error checking in genpd_dev_pm_attach_by_id() The error checking for of_count_phandle_with_args() does not handle negative error codes correctly. The problem is that "index" is a u32 so in the condition "if (index >= num_domains)" negative error codes stored in "num_domains" are type promoted to very high positive values and "index" is always going to be valid. Test for negative error codes first and then test if "index" is valid. Fixes: 3ccf3f0cd197 ("PM / Domains: Enable genpd_dev_pm_attach_by_id|name() for single PM domain") Signed-off-by: Dan Carpenter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/aBxPQ8AI8N5v-7rL@stanley.mountain Signed-off-by: Ulf Hansson --- drivers/pmdomain/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pmdomain/core.c b/drivers/pmdomain/core.c index 9b2f28b34bb51a..d6c1ddb807b208 100644 --- a/drivers/pmdomain/core.c +++ b/drivers/pmdomain/core.c @@ -3126,7 +3126,7 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev, /* Verify that the index is within a valid range. */ num_domains = of_count_phandle_with_args(dev->of_node, "power-domains", "#power-domain-cells"); - if (index >= num_domains) + if (num_domains < 0 || index >= num_domains) return NULL; /* Allocate and register device on the genpd bus. */ From fd94de9f9e7aac11ec659e386b9db1203d502023 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Tue, 22 Apr 2025 18:23:08 +0200 Subject: [PATCH 1614/2924] riscv: misaligned: factorize trap handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since both load/store and user/kernel should use almost the same path and that we are going to add some code around that, factorize it. Signed-off-by: Clément Léger Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250422162324.956065-2-cleger@rivosinc.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/traps.c | 66 +++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 8ff8e8b36524b7..b1d991c78a2337 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -198,47 +198,53 @@ asmlinkage __visible __trap_section void do_trap_insn_illegal(struct pt_regs *re DO_ERROR_INFO(do_trap_load_fault, SIGSEGV, SEGV_ACCERR, "load access fault"); -asmlinkage __visible __trap_section void do_trap_load_misaligned(struct pt_regs *regs) +enum misaligned_access_type { + MISALIGNED_STORE, + MISALIGNED_LOAD, +}; +static const struct { + const char *type_str; + int (*handler)(struct pt_regs *regs); +} misaligned_handler[] = { + [MISALIGNED_STORE] = { + .type_str = "Oops - store (or AMO) address misaligned", + .handler = handle_misaligned_store, + }, + [MISALIGNED_LOAD] = { + .type_str = "Oops - load address misaligned", + .handler = handle_misaligned_load, + }, +}; + +static void do_trap_misaligned(struct pt_regs *regs, enum misaligned_access_type type) { - if (user_mode(regs)) { + irqentry_state_t state; + + if (user_mode(regs)) irqentry_enter_from_user_mode(regs); + else + state = irqentry_nmi_enter(regs); - if (handle_misaligned_load(regs)) - do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, - "Oops - load address misaligned"); + if (misaligned_handler[type].handler(regs)) + do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, + misaligned_handler[type].type_str); + if (user_mode(regs)) irqentry_exit_to_user_mode(regs); - } else { - irqentry_state_t state = irqentry_nmi_enter(regs); - - if (handle_misaligned_load(regs)) - do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, - "Oops - load address misaligned"); - + else irqentry_nmi_exit(regs, state); - } } -asmlinkage __visible __trap_section void do_trap_store_misaligned(struct pt_regs *regs) +asmlinkage __visible __trap_section void do_trap_load_misaligned(struct pt_regs *regs) { - if (user_mode(regs)) { - irqentry_enter_from_user_mode(regs); - - if (handle_misaligned_store(regs)) - do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, - "Oops - store (or AMO) address misaligned"); - - irqentry_exit_to_user_mode(regs); - } else { - irqentry_state_t state = irqentry_nmi_enter(regs); - - if (handle_misaligned_store(regs)) - do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, - "Oops - store (or AMO) address misaligned"); + do_trap_misaligned(regs, MISALIGNED_LOAD); +} - irqentry_nmi_exit(regs, state); - } +asmlinkage __visible __trap_section void do_trap_store_misaligned(struct pt_regs *regs) +{ + do_trap_misaligned(regs, MISALIGNED_STORE); } + DO_ERROR_INFO(do_trap_store_fault, SIGSEGV, SEGV_ACCERR, "store (or AMO) access fault"); DO_ERROR_INFO(do_trap_ecall_s, From 453805f0a28fc5091e46145e6560c776f7c7a611 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Tue, 22 Apr 2025 18:23:09 +0200 Subject: [PATCH 1615/2924] riscv: misaligned: enable IRQs while handling misaligned accesses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can safely reenable IRQs if coming from userspace. This allows to access user memory that could potentially trigger a page fault. Fixes: b686ecdeacf6 ("riscv: misaligned: Restrict user access to kernel memory") Signed-off-by: Clément Léger Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250422162324.956065-3-cleger@rivosinc.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/traps.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index b1d991c78a2337..9c83848797a78b 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -220,19 +220,23 @@ static void do_trap_misaligned(struct pt_regs *regs, enum misaligned_access_type { irqentry_state_t state; - if (user_mode(regs)) + if (user_mode(regs)) { irqentry_enter_from_user_mode(regs); - else + local_irq_enable(); + } else { state = irqentry_nmi_enter(regs); + } if (misaligned_handler[type].handler(regs)) do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, misaligned_handler[type].type_str); - if (user_mode(regs)) + if (user_mode(regs)) { + local_irq_disable(); irqentry_exit_to_user_mode(regs); - else + } else { irqentry_nmi_exit(regs, state); + } } asmlinkage __visible __trap_section void do_trap_load_misaligned(struct pt_regs *regs) From 897e8aece3c8a1a66b3eae58df1832a524d45319 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Tue, 22 Apr 2025 18:23:10 +0200 Subject: [PATCH 1616/2924] riscv: misaligned: use get_user() instead of __get_user() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we can safely handle user memory accesses while in the misaligned access handlers, use get_user() instead of __get_user() to have user memory access checks. Signed-off-by: Clément Léger Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20250422162324.956065-4-cleger@rivosinc.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/traps_misaligned.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index dde5d11dc1b50d..77c788660223b3 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -275,7 +275,7 @@ static unsigned long get_f32_rs(unsigned long insn, u8 fp_reg_offset, int __ret; \ \ if (user_mode(regs)) { \ - __ret = __get_user(insn, (type __user *) insn_addr); \ + __ret = get_user(insn, (type __user *) insn_addr); \ } else { \ insn = *(type *)insn_addr; \ __ret = 0; \ From ae08d55807c099357c047dba17624b09414635dd Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Sun, 4 May 2025 12:19:20 +0200 Subject: [PATCH 1617/2924] riscv: Fix kernel crash due to PR_SET_TAGGED_ADDR_CTRL When userspace does PR_SET_TAGGED_ADDR_CTRL, but Supm extension is not available, the kernel crashes: Oops - illegal instruction [#1] [snip] epc : set_tagged_addr_ctrl+0x112/0x15a ra : set_tagged_addr_ctrl+0x74/0x15a epc : ffffffff80011ace ra : ffffffff80011a30 sp : ffffffc60039be10 [snip] status: 0000000200000120 badaddr: 0000000010a79073 cause: 0000000000000002 set_tagged_addr_ctrl+0x112/0x15a __riscv_sys_prctl+0x352/0x73c do_trap_ecall_u+0x17c/0x20c andle_exception+0x150/0x15c Fix it by checking if Supm is available. Fixes: 09d6775f503b ("riscv: Add support for userspace pointer masking") Signed-off-by: Nam Cao Cc: stable@vger.kernel.org Reviewed-by: Samuel Holland Link: https://lore.kernel.org/r/20250504101920.3393053-1-namcao@linutronix.de Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/process.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 7c244de7718008..3db2c0c07acd07 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -275,6 +275,9 @@ long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg) unsigned long pmm; u8 pmlen; + if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM)) + return -EINVAL; + if (is_compat_thread(ti)) return -EINVAL; From e9d86b8e17e725ee6088970981ab99f29abd1997 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Tue, 8 Apr 2025 09:28:51 +0200 Subject: [PATCH 1618/2924] scripts: Do not strip .rela.dyn section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .rela.dyn section contains runtime relocations and is only emitted for a relocatable kernel. riscv uses this section to relocate the kernel at runtime but that section is stripped from vmlinux. That prevents kexec to successfully load vmlinux since it does not contain the relocations info needed. Fixes: 559d1e45a16d ("riscv: Use --emit-relocs in order to move .rela.dyn in init") Tested-by: Björn Töpel Reviewed-by: Björn Töpel Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250408072851.90275-1-alexghiti@rivosinc.com Signed-off-by: Alexandre Ghiti --- scripts/Makefile.vmlinux | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux index b0a6cd5b818c9f..85d60d98640169 100644 --- a/scripts/Makefile.vmlinux +++ b/scripts/Makefile.vmlinux @@ -13,7 +13,7 @@ ifdef CONFIG_ARCH_VMLINUX_NEEDS_RELOCS vmlinux-final := vmlinux.unstripped quiet_cmd_strip_relocs = RSTRIP $@ - cmd_strip_relocs = $(OBJCOPY) --remove-section='.rel*' $< $@ + cmd_strip_relocs = $(OBJCOPY) --remove-section='.rel*' --remove-section=!'.rel*.dyn' $< $@ vmlinux: $(vmlinux-final) FORCE $(call if_changed,strip_relocs) From 7f1c3de1370bc6a8ad5157336b258067dac0ae9c Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 7 May 2025 07:52:18 -0700 Subject: [PATCH 1619/2924] riscv: Disallow PR_GET_TAGGED_ADDR_CTRL without Supm When the prctl() interface for pointer masking was added, it did not check that the pointer masking ISA extension was supported, only the individual submodes. Userspace could still attempt to disable pointer masking and query the pointer masking state. commit 81de1afb2dd1 ("riscv: Fix kernel crash due to PR_SET_TAGGED_ADDR_CTRL") disallowed the former, as the senvcfg write could crash on older systems. PR_GET_TAGGED_ADDR_CTRL state does not crash, because it reads only kernel-internal state and not senvcfg, but it should still be disallowed for consistency. Fixes: 09d6775f503b ("riscv: Add support for userspace pointer masking") Signed-off-by: Samuel Holland Reviewed-by: Nam Cao Link: https://lore.kernel.org/r/20250507145230.2272871-1-samuel.holland@sifive.com Signed-off-by: Alexandre Ghiti --- arch/riscv/kernel/process.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 3db2c0c07acd07..15d8f75902f858 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -333,6 +333,9 @@ long get_tagged_addr_ctrl(struct task_struct *task) struct thread_info *ti = task_thread_info(task); long ret = 0; + if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM)) + return -EINVAL; + if (is_compat_thread(ti)) return -EINVAL; From b7e3ec4e17e27420ebe976c7714881b64c28d63b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Wed, 7 May 2025 16:14:14 -0400 Subject: [PATCH 1620/2924] ASoC: mediatek: mt8188-mt6359: Depend on MT6359_ACCDET set or disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 0116a7d84b32 ("ASoC: mediatek: mt6359: Add stub for mt6359_accdet_enable_jack_detect") added a stub for mt6359_accdet_enable_jack_detect() in order to allow the mt8188-mt6359 driver to be enabled without requiring the mt6359-accdet to also be enabled, since it is not always needed. However, in the case that CONFIG_SND_SOC_MT8188_MT6359=y and CONFIG_SND_SOC_MT6359_ACCDET=m, a link error will happen, which commit b19fa45715ce ("ASoC: mediatek: mt8188-mt6359: select CONFIG_SND_SOC_MT6359_ACCDET") solved by selecting CONFIG_SND_SOC_MT6359_ACCDET. In order to not require CONFIG_SND_SOC_MT6359_ACCDET as originally intended, but also prevent the link error, depend on ACCDET being enabled or disabled (which will force MT8188_MT6359=m if MT6359_ACCDET=m). Fixes: f35d834d67ad ("ASoC: mediatek: mt8188-mt6359: Add accdet headset jack detect support") Fixes: b19fa45715ce ("ASoC: mediatek: mt8188-mt6359: select CONFIG_SND_SOC_MT6359_ACCDET") Suggested-by: Arnd Bergmann Signed-off-by: Nícolas F. R. A. Prado Reviewed-by: AngeloGioacchino Del Regno Acked-by: Arnd Bergmann Link: https://patch.msgid.link/20250507-mt8188-mt6359-accdet-depend-v1-1-aad70ce62964@collabora.com Signed-off-by: Mark Brown --- sound/soc/mediatek/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/mediatek/Kconfig b/sound/soc/mediatek/Kconfig index e148d4d9416011..90e367586493d7 100644 --- a/sound/soc/mediatek/Kconfig +++ b/sound/soc/mediatek/Kconfig @@ -228,8 +228,8 @@ config SND_SOC_MT8188 config SND_SOC_MT8188_MT6359 tristate "ASoC Audio driver for MT8188 with MT6359 and I2S codecs" depends on SND_SOC_MT8188 && MTK_PMIC_WRAP + depends on SND_SOC_MT6359_ACCDET || !SND_SOC_MT6359_ACCDET depends on I2C - select SND_SOC_MT6359_ACCDET select SND_SOC_MT6359 select SND_SOC_HDMI_CODEC select SND_SOC_DMIC From e3417ab75ab2e7dca6372a1bfa26b1be3ac5889e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 5 May 2025 11:03:00 -0700 Subject: [PATCH 1621/2924] KVM: SVM: Set/clear SRSO's BP_SPEC_REDUCE on 0 <=> 1 VM count transitions Set the magic BP_SPEC_REDUCE bit to mitigate SRSO when running VMs if and only if KVM has at least one active VM. Leaving the bit set at all times unfortunately degrades performance by a wee bit more than expected. Use a dedicated spinlock and counter instead of hooking virtualization enablement, as changing the behavior of kvm.enable_virt_at_load based on SRSO_BP_SPEC_REDUCE is painful, and has its own drawbacks, e.g. could result in performance issues for flows that are sensitive to VM creation latency. Defer setting BP_SPEC_REDUCE until VMRUN is imminent to avoid impacting performance on CPUs that aren't running VMs, e.g. if a setup is using housekeeping CPUs. Setting BP_SPEC_REDUCE in task context, i.e. without blasting IPIs to all CPUs, also helps avoid serializing 1<=>N transitions without incurring a gross amount of complexity (see the Link for details on how ugly coordinating via IPIs gets). Link: https://lore.kernel.org/all/aBOnzNCngyS_pQIW@google.com Fixes: 8442df2b49ed ("x86/bugs: KVM: Add support for SRSO_MSR_FIX") Reported-by: Michael Larabel Closes: https://www.phoronix.com/review/linux-615-amd-regression Cc: Borislav Petkov Tested-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20250505180300.973137-1-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 71 ++++++++++++++++++++++++++++++++++++++---- arch/x86/kvm/svm/svm.h | 2 ++ 2 files changed, 67 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index c5470d842aed43..a89c271a1951f4 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -607,9 +607,6 @@ static void svm_disable_virtualization_cpu(void) kvm_cpu_svm_disable(); amd_pmu_disable_virt(); - - if (cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE)) - msr_clear_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT); } static int svm_enable_virtualization_cpu(void) @@ -687,9 +684,6 @@ static int svm_enable_virtualization_cpu(void) rdmsr(MSR_TSC_AUX, sev_es_host_save_area(sd)->tsc_aux, msr_hi); } - if (cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE)) - msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT); - return 0; } @@ -1518,6 +1512,63 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu) __free_pages(virt_to_page(svm->msrpm), get_order(MSRPM_SIZE)); } +#ifdef CONFIG_CPU_MITIGATIONS +static DEFINE_SPINLOCK(srso_lock); +static atomic_t srso_nr_vms; + +static void svm_srso_clear_bp_spec_reduce(void *ign) +{ + struct svm_cpu_data *sd = this_cpu_ptr(&svm_data); + + if (!sd->bp_spec_reduce_set) + return; + + msr_clear_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT); + sd->bp_spec_reduce_set = false; +} + +static void svm_srso_vm_destroy(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE)) + return; + + if (atomic_dec_return(&srso_nr_vms)) + return; + + guard(spinlock)(&srso_lock); + + /* + * Verify a new VM didn't come along, acquire the lock, and increment + * the count before this task acquired the lock. + */ + if (atomic_read(&srso_nr_vms)) + return; + + on_each_cpu(svm_srso_clear_bp_spec_reduce, NULL, 1); +} + +static void svm_srso_vm_init(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE)) + return; + + /* + * Acquire the lock on 0 => 1 transitions to ensure a potential 1 => 0 + * transition, i.e. destroying the last VM, is fully complete, e.g. so + * that a delayed IPI doesn't clear BP_SPEC_REDUCE after a vCPU runs. + */ + if (atomic_inc_not_zero(&srso_nr_vms)) + return; + + guard(spinlock)(&srso_lock); + + atomic_inc(&srso_nr_vms); +} +#else +static void svm_srso_vm_init(void) { } +static void svm_srso_vm_destroy(void) { } +#endif + static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1550,6 +1601,11 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) (!boot_cpu_has(X86_FEATURE_V_TSC_AUX) || !sev_es_guest(vcpu->kvm))) kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull); + if (cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE) && + !sd->bp_spec_reduce_set) { + sd->bp_spec_reduce_set = true; + msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT); + } svm->guest_state_loaded = true; } @@ -5040,6 +5096,8 @@ static void svm_vm_destroy(struct kvm *kvm) { avic_vm_destroy(kvm); sev_vm_destroy(kvm); + + svm_srso_vm_destroy(); } static int svm_vm_init(struct kvm *kvm) @@ -5065,6 +5123,7 @@ static int svm_vm_init(struct kvm *kvm) return ret; } + svm_srso_vm_init(); return 0; } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index d4490eaed55dd4..f16b068c4228b8 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -335,6 +335,8 @@ struct svm_cpu_data { u32 next_asid; u32 min_asid; + bool bp_spec_reduce_set; + struct vmcb *save_area; unsigned long save_area_pa; From c82b6357a5465a3222780ac5d3edcdfb02208cc3 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 30 Apr 2025 15:07:03 -0400 Subject: [PATCH 1622/2924] Bluetooth: hci_event: Fix not using key encryption size when its known This fixes the regression introduced by 50c1241e6a8a ("Bluetooth: l2cap: Check encryption key size on incoming connection") introduced a check for l2cap_check_enc_key_size which checks for hcon->enc_key_size which may not be initialized if HCI_OP_READ_ENC_KEY_SIZE is still pending. If the key encryption size is known, due previously reading it using HCI_OP_READ_ENC_KEY_SIZE, then store it as part of link_key/smp_ltk structures so the next time the encryption is changed their values are used as conn->enc_key_size thus avoiding the racing against HCI_OP_READ_ENC_KEY_SIZE. Now that the enc_size is stored as part of key the information the code then attempts to check that there is no downgrade of security if HCI_OP_READ_ENC_KEY_SIZE returns a value smaller than what has been previously stored. Link: https://bugzilla.kernel.org/show_bug.cgi?id=220061 Link: https://bugzilla.kernel.org/show_bug.cgi?id=220063 Fixes: 522e9ed157e3 ("Bluetooth: l2cap: Check encryption key size on incoming connection") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_conn.c | 24 +++++++++++ net/bluetooth/hci_event.c | 73 ++++++++++++++++++-------------- 3 files changed, 67 insertions(+), 31 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 522d837a23fa46..54bfeeaa09959b 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1798,6 +1798,7 @@ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list, void hci_uuids_clear(struct hci_dev *hdev); void hci_link_keys_clear(struct hci_dev *hdev); +u8 *hci_conn_key_enc_size(struct hci_conn *conn); struct link_key *hci_find_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr); struct link_key *hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, bdaddr_t *bdaddr, u8 *val, u8 type, diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 6533e281ada333..946d2ae551f86c 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -3023,3 +3023,27 @@ void hci_conn_tx_dequeue(struct hci_conn *conn) kfree_skb(skb); } + +u8 *hci_conn_key_enc_size(struct hci_conn *conn) +{ + if (conn->type == ACL_LINK) { + struct link_key *key; + + key = hci_find_link_key(conn->hdev, &conn->dst); + if (!key) + return NULL; + + return &key->pin_len; + } else if (conn->type == LE_LINK) { + struct smp_ltk *ltk; + + ltk = hci_find_ltk(conn->hdev, &conn->dst, conn->dst_type, + conn->role); + if (!ltk) + return NULL; + + return <k->enc_size; + } + + return NULL; +} diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 6d6061111ac567..c38ada69c3d7f2 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -739,10 +739,17 @@ static u8 hci_cc_read_enc_key_size(struct hci_dev *hdev, void *data, handle); conn->enc_key_size = 0; } else { + u8 *key_enc_size = hci_conn_key_enc_size(conn); + conn->enc_key_size = rp->key_size; status = 0; - if (conn->enc_key_size < hdev->min_enc_key_size) { + /* Attempt to check if the key size is too small or if it has + * been downgraded from the last time it was stored as part of + * the link_key. + */ + if (conn->enc_key_size < hdev->min_enc_key_size || + (key_enc_size && conn->enc_key_size < *key_enc_size)) { /* As slave role, the conn->state has been set to * BT_CONNECTED and l2cap conn req might not be received * yet, at this moment the l2cap layer almost does @@ -755,6 +762,10 @@ static u8 hci_cc_read_enc_key_size(struct hci_dev *hdev, void *data, clear_bit(HCI_CONN_ENCRYPT, &conn->flags); clear_bit(HCI_CONN_AES_CCM, &conn->flags); } + + /* Update the key encryption size with the connection one */ + if (key_enc_size && *key_enc_size != conn->enc_key_size) + *key_enc_size = conn->enc_key_size; } hci_encrypt_cfm(conn, status); @@ -3065,6 +3076,34 @@ static void hci_inquiry_result_evt(struct hci_dev *hdev, void *edata, hci_dev_unlock(hdev); } +static int hci_read_enc_key_size(struct hci_dev *hdev, struct hci_conn *conn) +{ + struct hci_cp_read_enc_key_size cp; + u8 *key_enc_size = hci_conn_key_enc_size(conn); + + if (!read_key_size_capable(hdev)) { + conn->enc_key_size = HCI_LINK_KEY_SIZE; + return -EOPNOTSUPP; + } + + bt_dev_dbg(hdev, "hcon %p", conn); + + memset(&cp, 0, sizeof(cp)); + cp.handle = cpu_to_le16(conn->handle); + + /* If the key enc_size is already known, use it as conn->enc_key_size, + * otherwise use hdev->min_enc_key_size so the likes of + * l2cap_check_enc_key_size don't fail while waiting for + * HCI_OP_READ_ENC_KEY_SIZE response. + */ + if (key_enc_size && *key_enc_size) + conn->enc_key_size = *key_enc_size; + else + conn->enc_key_size = hdev->min_enc_key_size; + + return hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE, sizeof(cp), &cp); +} + static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -3157,23 +3196,11 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, if (ev->encr_mode == 1 && !test_bit(HCI_CONN_ENCRYPT, &conn->flags) && ev->link_type == ACL_LINK) { struct link_key *key; - struct hci_cp_read_enc_key_size cp; key = hci_find_link_key(hdev, &ev->bdaddr); if (key) { set_bit(HCI_CONN_ENCRYPT, &conn->flags); - - if (!read_key_size_capable(hdev)) { - conn->enc_key_size = HCI_LINK_KEY_SIZE; - } else { - cp.handle = cpu_to_le16(conn->handle); - if (hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE, - sizeof(cp), &cp)) { - bt_dev_err(hdev, "sending read key size failed"); - conn->enc_key_size = HCI_LINK_KEY_SIZE; - } - } - + hci_read_enc_key_size(hdev, conn); hci_encrypt_cfm(conn, ev->status); } } @@ -3612,24 +3639,8 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data, /* Try reading the encryption key size for encrypted ACL links */ if (!ev->status && ev->encrypt && conn->type == ACL_LINK) { - struct hci_cp_read_enc_key_size cp; - - /* Only send HCI_Read_Encryption_Key_Size if the - * controller really supports it. If it doesn't, assume - * the default size (16). - */ - if (!read_key_size_capable(hdev)) { - conn->enc_key_size = HCI_LINK_KEY_SIZE; - goto notify; - } - - cp.handle = cpu_to_le16(conn->handle); - if (hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE, - sizeof(cp), &cp)) { - bt_dev_err(hdev, "sending read key size failed"); - conn->enc_key_size = HCI_LINK_KEY_SIZE; + if (hci_read_enc_key_size(hdev, conn)) goto notify; - } goto unlock; } From 63de8abd97ddb9b758bd8f915ecbd18e1f1a87a0 Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 9 Dec 2021 15:12:19 +0000 Subject: [PATCH 1623/2924] arm64: insn: Add support for encoding DSB To generate code in the eBPF epilogue that uses the DSB instruction, insn.c needs a heler to encode the type and domain. Re-use the crm encoding logic from the DMB instruction. Signed-off-by: James Morse Reviewed-by: Catalin Marinas --- arch/arm64/include/asm/insn.h | 1 + arch/arm64/lib/insn.c | 60 +++++++++++++++++++++-------------- 2 files changed, 38 insertions(+), 23 deletions(-) diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 39577f1d079a98..18c7811774d301 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -706,6 +706,7 @@ u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, } #endif u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type); +u32 aarch64_insn_gen_dsb(enum aarch64_insn_mb_type type); u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result, enum aarch64_insn_system_register sysreg); diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c index 9bef696e2230be..4e298baddc2e56 100644 --- a/arch/arm64/lib/insn.c +++ b/arch/arm64/lib/insn.c @@ -5,6 +5,7 @@ * * Copyright (C) 2014-2016 Zi Shen Lim */ +#include #include #include #include @@ -1500,43 +1501,41 @@ u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant, return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, Rm); } -u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type) +static u32 __get_barrier_crm_val(enum aarch64_insn_mb_type type) { - u32 opt; - u32 insn; - switch (type) { case AARCH64_INSN_MB_SY: - opt = 0xf; - break; + return 0xf; case AARCH64_INSN_MB_ST: - opt = 0xe; - break; + return 0xe; case AARCH64_INSN_MB_LD: - opt = 0xd; - break; + return 0xd; case AARCH64_INSN_MB_ISH: - opt = 0xb; - break; + return 0xb; case AARCH64_INSN_MB_ISHST: - opt = 0xa; - break; + return 0xa; case AARCH64_INSN_MB_ISHLD: - opt = 0x9; - break; + return 0x9; case AARCH64_INSN_MB_NSH: - opt = 0x7; - break; + return 0x7; case AARCH64_INSN_MB_NSHST: - opt = 0x6; - break; + return 0x6; case AARCH64_INSN_MB_NSHLD: - opt = 0x5; - break; + return 0x5; default: - pr_err("%s: unknown dmb type %d\n", __func__, type); + pr_err("%s: unknown barrier type %d\n", __func__, type); return AARCH64_BREAK_FAULT; } +} + +u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type) +{ + u32 opt; + u32 insn; + + opt = __get_barrier_crm_val(type); + if (opt == AARCH64_BREAK_FAULT) + return AARCH64_BREAK_FAULT; insn = aarch64_insn_get_dmb_value(); insn &= ~GENMASK(11, 8); @@ -1545,6 +1544,21 @@ u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type) return insn; } +u32 aarch64_insn_gen_dsb(enum aarch64_insn_mb_type type) +{ + u32 opt, insn; + + opt = __get_barrier_crm_val(type); + if (opt == AARCH64_BREAK_FAULT) + return AARCH64_BREAK_FAULT; + + insn = aarch64_insn_get_dsb_base_value(); + insn &= ~GENMASK(11, 8); + insn |= (opt << 8); + + return insn; +} + u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result, enum aarch64_insn_system_register sysreg) { From e7956c92f396a44eeeb6eaf7a5b5e1ad24db6748 Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 19 Aug 2024 14:15:53 +0100 Subject: [PATCH 1624/2924] arm64: proton-pack: Expose whether the platform is mitigated by firmware is_spectre_bhb_fw_affected() allows the caller to determine if the CPU is known to need a firmware mitigation. CPUs are either on the list of CPUs we know about, or firmware has been queried and reported that the platform is affected - and mitigated by firmware. This helper is not useful to determine if the platform is mitigated by firmware. A CPU could be on the know list, but the firmware may not be implemented. Its affected but not mitigated. spectre_bhb_enable_mitigation() handles this distinction by checking the firmware state before enabling the mitigation. Add a helper to expose this state. This will be used by the BPF JIT to determine if calling firmware for a mitigation is necessary and supported. Signed-off-by: James Morse Reviewed-by: Catalin Marinas --- arch/arm64/include/asm/spectre.h | 1 + arch/arm64/kernel/proton-pack.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index f1524cdeacf1c4..78fb720987fb45 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -97,6 +97,7 @@ enum mitigation_state arm64_get_meltdown_state(void); enum mitigation_state arm64_get_spectre_bhb_state(void); bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope); +bool is_spectre_bhb_fw_mitigated(void); void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused); bool try_emulate_el1_ssbs(struct pt_regs *regs, u32 instr); diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index b607f6dfc5e6f6..355f1f3098a24d 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -1094,6 +1094,11 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) update_mitigation_state(&spectre_bhb_state, state); } +bool is_spectre_bhb_fw_mitigated(void) +{ + return test_bit(BHB_FW, &system_bhb_mitigations); +} + /* Patched to NOP when enabled */ void noinstr spectre_bhb_patch_loop_mitigation_enable(struct alt_instr *alt, __le32 *origptr, From a1152be30a043d2d4dcb1683415f328bf3c51978 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Apr 2025 13:55:17 +0100 Subject: [PATCH 1625/2924] arm64: proton-pack: Expose whether the branchy loop k value Add a helper to expose the k value of the branchy loop. This is needed by the BPF JIT to generate the mitigation sequence in BPF programs. Signed-off-by: James Morse Reviewed-by: Catalin Marinas --- arch/arm64/include/asm/spectre.h | 1 + arch/arm64/kernel/proton-pack.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index 78fb720987fb45..bca12134245c6a 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -97,6 +97,7 @@ enum mitigation_state arm64_get_meltdown_state(void); enum mitigation_state arm64_get_spectre_bhb_state(void); bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope); +u8 get_spectre_bhb_loop_value(void); bool is_spectre_bhb_fw_mitigated(void); void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused); bool try_emulate_el1_ssbs(struct pt_regs *regs, u32 instr); diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 355f1f3098a24d..3154094a9e33d4 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -999,6 +999,11 @@ bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, return true; } +u8 get_spectre_bhb_loop_value(void) +{ + return max_bhb_k; +} + static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot) { const char *v = arm64_get_bp_hardening_vector(slot); From 0dfefc2ea2f29ced2416017d7e5b1253a54c2735 Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 9 Dec 2021 15:13:24 +0000 Subject: [PATCH 1626/2924] arm64: bpf: Add BHB mitigation to the epilogue for cBPF programs A malicious BPF program may manipulate the branch history to influence what the hardware speculates will happen next. On exit from a BPF program, emit the BHB mititgation sequence. This is only applied for 'classic' cBPF programs that are loaded by seccomp. Signed-off-by: James Morse Reviewed-by: Catalin Marinas Acked-by: Daniel Borkmann --- arch/arm64/include/asm/spectre.h | 1 + arch/arm64/kernel/proton-pack.c | 2 +- arch/arm64/net/bpf_jit_comp.c | 54 +++++++++++++++++++++++++++++--- 3 files changed, 52 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index bca12134245c6a..8fef1262609011 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -97,6 +97,7 @@ enum mitigation_state arm64_get_meltdown_state(void); enum mitigation_state arm64_get_spectre_bhb_state(void); bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope); +extern bool __nospectre_bhb; u8 get_spectre_bhb_loop_value(void); bool is_spectre_bhb_fw_mitigated(void); void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused); diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 3154094a9e33d4..4459b613077e93 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -1021,7 +1021,7 @@ static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot) isb(); } -static bool __read_mostly __nospectre_bhb; +bool __read_mostly __nospectre_bhb; static int __init parse_spectre_bhb_param(char *str) { __nospectre_bhb = true; diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 70d7c89d3ac907..0ab8e47062d9a1 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -7,6 +7,7 @@ #define pr_fmt(fmt) "bpf_jit: " fmt +#include #include #include #include @@ -17,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -939,7 +941,48 @@ static void build_plt(struct jit_ctx *ctx) plt->target = (u64)&dummy_tramp; } -static void build_epilogue(struct jit_ctx *ctx) +/* Clobbers BPF registers 1-4, aka x0-x3 */ +static void __maybe_unused build_bhb_mitigation(struct jit_ctx *ctx) +{ + const u8 r1 = bpf2a64[BPF_REG_1]; /* aka x0 */ + u8 k = get_spectre_bhb_loop_value(); + + if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY) || + cpu_mitigations_off() || __nospectre_bhb || + arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) + return; + + if (supports_clearbhb(SCOPE_SYSTEM)) { + emit(aarch64_insn_gen_hint(AARCH64_INSN_HINT_CLEARBHB), ctx); + return; + } + + if (k) { + emit_a64_mov_i64(r1, k, ctx); + emit(A64_B(1), ctx); + emit(A64_SUBS_I(true, r1, r1, 1), ctx); + emit(A64_B_(A64_COND_NE, -2), ctx); + emit(aarch64_insn_gen_dsb(AARCH64_INSN_MB_ISH), ctx); + emit(aarch64_insn_get_isb_value(), ctx); + } + + if (is_spectre_bhb_fw_mitigated()) { + emit(A64_ORR_I(false, r1, AARCH64_INSN_REG_ZR, + ARM_SMCCC_ARCH_WORKAROUND_3), ctx); + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: + emit(aarch64_insn_get_hvc_value(), ctx); + break; + case SMCCC_CONDUIT_SMC: + emit(aarch64_insn_get_smc_value(), ctx); + break; + default: + pr_err_once("Firmware mitigation enabled with unknown conduit\n"); + } + } +} + +static void build_epilogue(struct jit_ctx *ctx, bool was_classic) { const u8 r0 = bpf2a64[BPF_REG_0]; const u8 ptr = bpf2a64[TCCNT_PTR]; @@ -952,10 +995,13 @@ static void build_epilogue(struct jit_ctx *ctx) emit(A64_POP(A64_ZR, ptr, A64_SP), ctx); + if (was_classic) + build_bhb_mitigation(ctx); + /* Restore FP/LR registers */ emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); - /* Set return value */ + /* Move the return value from bpf:r0 (aka x7) to x0 */ emit(A64_MOV(1, A64_R(0), r0), ctx); /* Authenticate lr */ @@ -1898,7 +1944,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) } ctx.epilogue_offset = ctx.idx; - build_epilogue(&ctx); + build_epilogue(&ctx, was_classic); build_plt(&ctx); extable_align = __alignof__(struct exception_table_entry); @@ -1961,7 +2007,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) goto out_free_hdr; } - build_epilogue(&ctx); + build_epilogue(&ctx, was_classic); build_plt(&ctx); /* Extra pass to validate JITed code. */ From f300769ead032513a68e4a02e806393402e626f8 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Apr 2025 16:03:38 +0100 Subject: [PATCH 1627/2924] arm64: bpf: Only mitigate cBPF programs loaded by unprivileged users Support for eBPF programs loaded by unprivileged users is typically disabled. This means only cBPF programs need to be mitigated for BHB. In addition, only mitigate cBPF programs that were loaded by an unprivileged user. Privileged users can also load the same program via eBPF, making the mitigation pointless. Signed-off-by: James Morse Reviewed-by: Catalin Marinas Acked-by: Daniel Borkmann --- arch/arm64/net/bpf_jit_comp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 0ab8e47062d9a1..634d78422adb27 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -952,6 +952,9 @@ static void __maybe_unused build_bhb_mitigation(struct jit_ctx *ctx) arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) return; + if (capable(CAP_SYS_ADMIN)) + return; + if (supports_clearbhb(SCOPE_SYSTEM)) { emit(aarch64_insn_gen_hint(AARCH64_INSN_HINT_CLEARBHB), ctx); return; From efe676a1a7554219eae0b0dcfe1e0cdcc9ef9aef Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 12 Aug 2024 17:50:22 +0100 Subject: [PATCH 1628/2924] arm64: proton-pack: Add new CPUs 'k' values for branch mitigation Update the list of 'k' values for the branch mitigation from arm's website. Add the values for Cortex-X1C. The MIDR_EL1 value can be found here: https://developer.arm.com/documentation/101968/0002/Register-descriptions/AArch> Link: https://developer.arm.com/documentation/110280/2-0/?lang=en Signed-off-by: James Morse Reviewed-by: Catalin Marinas --- arch/arm64/include/asm/cputype.h | 2 ++ arch/arm64/kernel/proton-pack.c | 1 + 2 files changed, 3 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index d1cc0571798bfa..dffff6763812f9 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -81,6 +81,7 @@ #define ARM_CPU_PART_CORTEX_A78AE 0xD42 #define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A510 0xD46 +#define ARM_CPU_PART_CORTEX_X1C 0xD4C #define ARM_CPU_PART_CORTEX_A520 0xD80 #define ARM_CPU_PART_CORTEX_A710 0xD47 #define ARM_CPU_PART_CORTEX_A715 0xD4D @@ -168,6 +169,7 @@ #define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) +#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C) #define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) #define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715) diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 4459b613077e93..edf1783ffc8174 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -891,6 +891,7 @@ static u8 spectre_bhb_loop_affected(void) MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE), MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C), MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), MIDR_ALL_VERSIONS(MIDR_CORTEX_X2), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), From c0d0a9ff6d5b5b23ddabde8bcbafb28fa454ae00 Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Thu, 8 May 2025 16:30:36 +0800 Subject: [PATCH 1629/2924] block: remove test of incorrect io priority level Ever since commit eca2040972b4("scsi: block: ioprio: Clean up interface definition"), the macro IOPRIO_PRIO_LEVEL() will mask the level value to something between 0 and 7 so necessarily, level will always be lower than IOPRIO_NR_LEVELS(8). Remove this obsolete check. Reported-by: Kexin Wei Cc: Damien Le Moal Signed-off-by: Aaron Lu Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20250508083018.GA769554@bytedance Signed-off-by: Jens Axboe --- block/ioprio.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/block/ioprio.c b/block/ioprio.c index 73301a261429ff..f0ee2798539c01 100644 --- a/block/ioprio.c +++ b/block/ioprio.c @@ -46,12 +46,8 @@ int ioprio_check_cap(int ioprio) */ if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE)) return -EPERM; - fallthrough; - /* rt has prio field too */ - case IOPRIO_CLASS_BE: - if (level >= IOPRIO_NR_LEVELS) - return -EINVAL; break; + case IOPRIO_CLASS_BE: case IOPRIO_CLASS_IDLE: break; case IOPRIO_CLASS_NONE: From 0e33e0f339b91eecd9558311449a3d1e728722d4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Apr 2025 12:46:56 -0400 Subject: [PATCH 1630/2924] drm/amdgpu/hdp5: use memcfg register to post the write for HDP flush Reading back the remapped HDP flush register seems to cause problems on some platforms. All we need is a read, so read back the memcfg register. Fixes: cf424020e040 ("drm/amdgpu/hdp5.0: do a posting read when flushing HDP") Reported-by: Alexey Klimov Link: https://lists.freedesktop.org/archives/amd-gfx/2025-April/123150.html Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4119 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3908 Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher (cherry picked from commit a5cb344033c7598762e89255e8ff52827abb57a4) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c index 43195c07974808..086a647308df07 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c @@ -32,7 +32,12 @@ static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev, { if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } From dbc988c689333faeeed44d5561f372ff20395304 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Apr 2025 12:47:37 -0400 Subject: [PATCH 1631/2924] drm/amdgpu/hdp5.2: use memcfg register to post the write for HDP flush Reading back the remapped HDP flush register seems to cause problems on some platforms. All we need is a read, so read back the memcfg register. Fixes: f756dbac1ce1 ("drm/amdgpu/hdp5.2: do a posting read when flushing HDP") Reported-by: Alexey Klimov Link: https://lists.freedesktop.org/archives/amd-gfx/2025-April/123150.html Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4119 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3908 Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher (cherry picked from commit 4a89b7698e771914b4d5b571600c76e2fdcbe2a9) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c index fcb8dd2876bcc2..40940b4ab4007b 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c @@ -33,7 +33,17 @@ static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev, if (!ring || !ring->funcs->emit_wreg) { WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + if (amdgpu_sriov_vf(adev)) { + /* this is fine because SR_IOV doesn't remap the register */ + RREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + } else { + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); + } } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, From ca28e80abe4219c8f1a2961ae05102d70af6dc87 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Apr 2025 12:48:51 -0400 Subject: [PATCH 1632/2924] drm/amdgpu/hdp6: use memcfg register to post the write for HDP flush Reading back the remapped HDP flush register seems to cause problems on some platforms. All we need is a read, so read back the memcfg register. Fixes: abe1cbaec6cf ("drm/amdgpu/hdp6.0: do a posting read when flushing HDP") Reported-by: Alexey Klimov Link: https://lists.freedesktop.org/archives/amd-gfx/2025-April/123150.html Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4119 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3908 Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher (cherry picked from commit 84141ff615951359c9a99696fd79a36c465ed847) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c index a88d25a06c29b5..6ccd31c8bc6928 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c @@ -35,7 +35,12 @@ static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev, { if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } From 5a11a2767731139bf87e667331aa2209e33a1d19 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Apr 2025 12:50:02 -0400 Subject: [PATCH 1633/2924] drm/amdgpu/hdp7: use memcfg register to post the write for HDP flush Reading back the remapped HDP flush register seems to cause problems on some platforms. All we need is a read, so read back the memcfg register. Fixes: 689275140cb8 ("drm/amdgpu/hdp7.0: do a posting read when flushing HDP") Reported-by: Alexey Klimov Link: https://lists.freedesktop.org/archives/amd-gfx/2025-April/123150.html Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4119 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3908 Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher (cherry picked from commit dbc064adfcf9095e7d895bea87b2f75c1ab23236) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c index 49f7eb4fbd117d..2c9239a22f3986 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c @@ -32,7 +32,12 @@ static void hdp_v7_0_flush_hdp(struct amdgpu_device *adev, { if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } From adfab6b39202481bb43286fff94def4953793fdb Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 7 May 2025 21:30:25 -0500 Subject: [PATCH 1634/2924] ACPI: PPTT: Fix processor subtable walk The original PPTT code had a bug where the processor subtable length was not correctly validated when encountering a truncated acpi_pptt_processor node. Commit 7ab4f0e37a0f4 ("ACPI PPTT: Fix coding mistakes in a couple of sizeof() calls") attempted to fix this by validating the size is as large as the acpi_pptt_processor node structure. This introduced a regression where the last processor node in the PPTT table is ignored if it doesn't contain any private resources. That results errors like: ACPI PPTT: PPTT table found, but unable to locate core XX (XX) ACPI: SPE must be homogeneous Furthermore, it fails in a common case where the node length isn't equal to the acpi_pptt_processor structure size, leaving the original bug in a modified form. Correct the regression by adjusting the loop termination conditions as suggested by the bug reporters. An additional check performed after the subtable node type is detected, validates the acpi_pptt_processor node is fully contained in the PPTT table. Repeating the check in acpi_pptt_leaf_node() is largely redundant as the node is already known to be fully contained in the table. The case where a final truncated node's parent property is accepted, but the node itself is rejected should not be considered a bug. Fixes: 7ab4f0e37a0f4 ("ACPI PPTT: Fix coding mistakes in a couple of sizeof() calls") Reported-by: Maximilian Heyne Closes: https://lore.kernel.org/linux-acpi/20250506-draco-taped-15f475cd@mheyne-amazon/ Reported-by: Yicong Yang Closes: https://lore.kernel.org/linux-acpi/20250507035124.28071-1-yangyicong@huawei.com/ Signed-off-by: Jeremy Linton Tested-by: Yicong Yang Reviewed-by: Sudeep Holla Tested-by: Maximilian Heyne Cc: All applicable # 7ab4f0e37a0f4: ACPI PPTT: Fix coding mistakes ... Link: https://patch.msgid.link/20250508023025.1301030-1-jeremy.linton@arm.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/pptt.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index f73ce6e13065dd..54676e3d82dd59 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -231,16 +231,18 @@ static int acpi_pptt_leaf_node(struct acpi_table_header *table_hdr, sizeof(struct acpi_table_pptt)); proc_sz = sizeof(struct acpi_pptt_processor); - while ((unsigned long)entry + proc_sz < table_end) { + /* ignore subtable types that are smaller than a processor node */ + while ((unsigned long)entry + proc_sz <= table_end) { cpu_node = (struct acpi_pptt_processor *)entry; + if (entry->type == ACPI_PPTT_TYPE_PROCESSOR && cpu_node->parent == node_entry) return 0; if (entry->length == 0) return 0; + entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry, entry->length); - } return 1; } @@ -273,15 +275,18 @@ static struct acpi_pptt_processor *acpi_find_processor_node(struct acpi_table_he proc_sz = sizeof(struct acpi_pptt_processor); /* find the processor structure associated with this cpuid */ - while ((unsigned long)entry + proc_sz < table_end) { + while ((unsigned long)entry + proc_sz <= table_end) { cpu_node = (struct acpi_pptt_processor *)entry; if (entry->length == 0) { pr_warn("Invalid zero length subtable\n"); break; } + /* entry->length may not equal proc_sz, revalidate the processor structure length */ if (entry->type == ACPI_PPTT_TYPE_PROCESSOR && acpi_cpu_id == cpu_node->acpi_processor_id && + (unsigned long)entry + entry->length <= table_end && + entry->length == proc_sz + cpu_node->number_of_priv_resources * sizeof(u32) && acpi_pptt_leaf_node(table_hdr, cpu_node)) { return (struct acpi_pptt_processor *)entry; } From 391008f34e711253c5983b0bf52277cc43723127 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 8 Apr 2025 08:59:15 -0700 Subject: [PATCH 1635/2924] drm/xe: Add page queue multiplier For an unknown reason the math to determine the PF queue size does is not correct - compute UMD applications are overflowing the PF queue which is fatal. A multippier of 8 fixes the problem. Fixes: 3338e4f90c14 ("drm/xe: Use topology to determine page fault queue size") Cc: stable@vger.kernel.org Signed-off-by: Matthew Brost Reviewed-by: Jagmeet Randhawa Link: https://lore.kernel.org/r/20250408155915.78770-1-matthew.brost@intel.com (cherry picked from commit 29582e0ea75c95668d168b12406e3c56cf5a73c4) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index c5ad9a0a89c2b3..0c22b3a3665500 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -435,9 +435,16 @@ static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss, XE_MAX_EU_FUSE_BITS) * num_dss; - /* user can issue separate page faults per EU and per CS */ + /* + * user can issue separate page faults per EU and per CS + * + * XXX: Multiplier required as compute UMD are getting PF queue errors + * without it. Follow on why this multiplier is required. + */ +#define PF_MULTIPLIER 8 pf_queue->num_dw = - (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW; + (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER; +#undef PF_MULTIPLIER pf_queue->gt = gt; pf_queue->data = devm_kcalloc(xe->drm.dev, pf_queue->num_dw, From 51c0ee84e4dc339287b2d7335f2b54d747794c83 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Mon, 28 Apr 2025 13:53:57 +0530 Subject: [PATCH 1636/2924] drm/xe/tests/mocs: Hold XE_FORCEWAKE_ALL for LNCF regs LNCF registers report wrong values when XE_FORCEWAKE_GT only is held. Holding XE_FORCEWAKE_ALL ensures correct operations on LNCF regs. V2(Himal): - Use xe_force_wake_ref_has_domain Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1999 Fixes: a6a4ea6d7d37 ("drm/xe: Add mocs kunit") Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20250428082357.1730068-1-tejas.upadhyay@intel.com Signed-off-by: Tejas Upadhyay (cherry picked from commit 70a2585e582058e94fe4381a337be42dec800337) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/tests/xe_mocs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c index ef1e5256c56a8a..0e502feaca8186 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs.c @@ -46,8 +46,11 @@ static void read_l3cc_table(struct xe_gt *gt, unsigned int fw_ref, i; u32 reg_val; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - KUNIT_ASSERT_NE_MSG(test, fw_ref, 0, "Forcewake Failed.\n"); + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + xe_force_wake_put(gt_to_fw(gt), fw_ref); + KUNIT_ASSERT_TRUE_MSG(test, true, "Forcewake Failed.\n"); + } for (i = 0; i < info->num_mocs_regs; i++) { if (!(i & 1)) { From 03552d8ac0afcc080c339faa0b726e2c0e9361cb Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 2 May 2025 08:51:04 -0700 Subject: [PATCH 1637/2924] drm/xe/gsc: do not flush the GSC worker from the reset path The workqueue used for the reset worker is marked as WQ_MEM_RECLAIM, while the GSC one isn't (and can't be as we need to do memory allocations in the gsc worker). Therefore, we can't flush the latter from the former. The reason why we had such a flush was to avoid interrupting either the GSC FW load or in progress GSC proxy operations. GSC proxy operations fall into 2 categories: 1) GSC proxy init: this only happens once immediately after GSC FW load and does not support being interrupted. The only way to recover from an interruption of the proxy init is to do an FLR and re-load the GSC. 2) GSC proxy request: this can happen in response to a request that the driver sends to the GSC. If this is interrupted, the GSC FW will timeout and the driver request will be failed, but overall the GSC will keep working fine. Flushing the work allowed us to avoid interruption in both cases (unless the hang came from the GSC engine itself, in which case we're toast anyway). However, a failure on a proxy request is tolerable if we're in a scenario where we're triggering a GT reset (i.e., something is already gone pretty wrong), so what we really need to avoid is interrupting the init flow, which we can do by polling on the register that reports when the proxy init is complete (as that ensure us that all the load and init operations have been completed). Note that during suspend we still want to do a flush of the worker to make sure it completes any operations involving the HW before the power is cut. v2: fix spelling in commit msg, rename waiter function (Julia) Fixes: dd0e89e5edc2 ("drm/xe/gsc: GSC FW load") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4830 Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Cc: Alan Previn Cc: # v6.8+ Reviewed-by: Julia Filipchuk Link: https://lore.kernel.org/r/20250502155104.2201469-1-daniele.ceraolospurio@intel.com (cherry picked from commit 12370bfcc4f0bdf70279ec5b570eb298963422b5) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gsc.c | 22 ++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gsc.h | 1 + drivers/gpu/drm/xe/xe_gsc_proxy.c | 11 +++++++++++ drivers/gpu/drm/xe/xe_gsc_proxy.h | 1 + drivers/gpu/drm/xe/xe_gt.c | 2 +- drivers/gpu/drm/xe/xe_uc.c | 8 +++++++- drivers/gpu/drm/xe/xe_uc.h | 1 + 7 files changed, 44 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index fd41113f857251..0bcf97063ff61a 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -555,6 +555,28 @@ void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc) flush_work(&gsc->work); } +void xe_gsc_stop_prepare(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + int ret; + + if (!xe_uc_fw_is_loadable(&gsc->fw) || xe_uc_fw_is_in_error_state(&gsc->fw)) + return; + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GSC); + + /* + * If the GSC FW load or the proxy init are interrupted, the only way + * to recover it is to do an FLR and reload the GSC from scratch. + * Therefore, let's wait for the init to complete before stopping + * operations. The proxy init is the last step, so we can just wait on + * that + */ + ret = xe_gsc_wait_for_proxy_init_done(gsc); + if (ret) + xe_gt_err(gt, "failed to wait for GSC init completion before uc stop\n"); +} + /* * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a * GSC engine reset by writing a notification bit in the GS1 register and then diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h index d99f66c38075c4..b8b8e0810ad94a 100644 --- a/drivers/gpu/drm/xe/xe_gsc.h +++ b/drivers/gpu/drm/xe/xe_gsc.h @@ -16,6 +16,7 @@ struct xe_hw_engine; int xe_gsc_init(struct xe_gsc *gsc); int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc); void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc); +void xe_gsc_stop_prepare(struct xe_gsc *gsc); void xe_gsc_load_start(struct xe_gsc *gsc); void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec); diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index 8cf70b228ff3b6..d0519cd6704a11 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -71,6 +71,17 @@ bool xe_gsc_proxy_init_done(struct xe_gsc *gsc) HECI1_FWSTS1_PROXY_STATE_NORMAL; } +int xe_gsc_wait_for_proxy_init_done(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + + /* Proxy init can take up to 500ms, so wait double that for safety */ + return xe_mmio_wait32(>->mmio, HECI_FWSTS1(MTL_GSC_HECI1_BASE), + HECI1_FWSTS1_CURRENT_STATE, + HECI1_FWSTS1_PROXY_STATE_NORMAL, + USEC_PER_SEC, NULL, false); +} + static void __gsc_proxy_irq_rmw(struct xe_gsc *gsc, u32 clr, u32 set) { struct xe_gt *gt = gsc_to_gt(gsc); diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.h b/drivers/gpu/drm/xe/xe_gsc_proxy.h index fdef56995cd43e..765602221dbcf3 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.h +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.h @@ -12,6 +12,7 @@ struct xe_gsc; int xe_gsc_proxy_init(struct xe_gsc *gsc); bool xe_gsc_proxy_init_done(struct xe_gsc *gsc); +int xe_gsc_wait_for_proxy_init_done(struct xe_gsc *gsc); int xe_gsc_proxy_start(struct xe_gsc *gsc); int xe_gsc_proxy_request_handler(struct xe_gsc *gsc); diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 10a9e3c72b3604..66198cf2662c57 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -857,7 +857,7 @@ void xe_gt_suspend_prepare(struct xe_gt *gt) fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - xe_uc_stop_prepare(>->uc); + xe_uc_suspend_prepare(>->uc); xe_force_wake_put(gt_to_fw(gt), fw_ref); } diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index c14bd22820441c..3a8751a8b92dde 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -244,7 +244,7 @@ void xe_uc_gucrc_disable(struct xe_uc *uc) void xe_uc_stop_prepare(struct xe_uc *uc) { - xe_gsc_wait_for_worker_completion(&uc->gsc); + xe_gsc_stop_prepare(&uc->gsc); xe_guc_stop_prepare(&uc->guc); } @@ -278,6 +278,12 @@ static void uc_reset_wait(struct xe_uc *uc) goto again; } +void xe_uc_suspend_prepare(struct xe_uc *uc) +{ + xe_gsc_wait_for_worker_completion(&uc->gsc); + xe_guc_stop_prepare(&uc->guc); +} + int xe_uc_suspend(struct xe_uc *uc) { /* GuC submission not enabled, nothing to do */ diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index 3813c1ede450ee..c23e6f5e251417 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -18,6 +18,7 @@ int xe_uc_reset_prepare(struct xe_uc *uc); void xe_uc_stop_prepare(struct xe_uc *uc); void xe_uc_stop(struct xe_uc *uc); int xe_uc_start(struct xe_uc *uc); +void xe_uc_suspend_prepare(struct xe_uc *uc); int xe_uc_suspend(struct xe_uc *uc); int xe_uc_sanitize_reset(struct xe_uc *uc); void xe_uc_declare_wedged(struct xe_uc *uc); From 9d271a4f5ba52520e448ab223b1a91c6e35f17c7 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Wed, 7 May 2025 02:23:02 +0000 Subject: [PATCH 1638/2924] drm/xe: Release force wake first then runtime power xe_force_wake_get() is dependent on xe_pm_runtime_get(), so for the release path, xe_force_wake_put() should be called first then xe_pm_runtime_put(). Combine the error path and normal path together with goto. Fixes: 85d547608ef5 ("drm/xe/xe_gt_debugfs: Update handling of xe_force_wake_get return") Cc: Himal Prasad Ghimiray Cc: Rodrigo Vivi Signed-off-by: Shuicheng Lin Reviewed-by: Himal Prasad Ghimiray Link: https://lore.kernel.org/r/20250507022302.2187527-1-shuicheng.lin@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 432cd94efdca06296cc5e76d673546f58aa90ee1) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_debugfs.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 2d63a69cbfa38e..f7005a3643e627 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -92,22 +92,23 @@ static int hw_engines(struct xe_gt *gt, struct drm_printer *p) struct xe_hw_engine *hwe; enum xe_hw_engine_id id; unsigned int fw_ref; + int ret = 0; xe_pm_runtime_get(xe); fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { - xe_pm_runtime_put(xe); - xe_force_wake_put(gt_to_fw(gt), fw_ref); - return -ETIMEDOUT; + ret = -ETIMEDOUT; + goto fw_put; } for_each_hw_engine(hwe, gt, id) xe_hw_engine_print(hwe, p); +fw_put: xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_pm_runtime_put(xe); - return 0; + return ret; } static int powergate_info(struct xe_gt *gt, struct drm_printer *p) From 564467e9d06c6352fac9100e8957d40a1a50234c Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Fri, 2 May 2025 17:00:52 +0000 Subject: [PATCH 1639/2924] drm/xe: Add config control for svm flush work Without CONFIG_DRM_XE_GPUSVM set, GPU SVM is not initialized thus below warning pops. Refine the flush work code to be controlled by the config to avoid below warning: " [ 453.132028] ------------[ cut here ]------------ [ 453.132527] WARNING: CPU: 9 PID: 4491 at kernel/workqueue.c:4205 __flush_work+0x379/0x3a0 [ 453.133355] Modules linked in: xe drm_ttm_helper ttm gpu_sched drm_buddy drm_suballoc_helper drm_gpuvm drm_exec [ 453.134352] CPU: 9 UID: 0 PID: 4491 Comm: xe_exec_mix_mod Tainted: G U W 6.15.0-rc3+ #7 PREEMPT(full) [ 453.135405] Tainted: [U]=USER, [W]=WARN ... [ 453.136921] RIP: 0010:__flush_work+0x379/0x3a0 [ 453.137417] Code: 8b 45 00 48 8b 55 08 89 c7 48 c1 e8 04 83 e7 08 83 e0 0f 83 cf 02 89 c6 48 0f ba 6d 00 03 e9 d5 fe ff ff 0f 0b e9 db fd ff ff <0f> 0b 45 31 e4 e9 d1 fd ff ff 0f 0b e9 03 ff ff ff 0f 0b e9 d6 fe [ 453.139250] RSP: 0018:ffffc90000c67b18 EFLAGS: 00010246 [ 453.139782] RAX: 0000000000000000 RBX: ffff888108a24000 RCX: 0000000000002000 [ 453.140521] RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff8881016d61c8 [ 453.141253] RBP: ffff8881016d61c8 R08: 0000000000000000 R09: 0000000000000000 [ 453.141985] R10: 0000000000000000 R11: 0000000008a24000 R12: 0000000000000001 [ 453.142709] R13: 0000000000000002 R14: 0000000000000000 R15: ffff888107db8c00 [ 453.143450] FS: 00007f44853d4c80(0000) GS:ffff8882f469b000(0000) knlGS:0000000000000000 [ 453.144276] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 453.144853] CR2: 00007f4487629228 CR3: 00000001016aa000 CR4: 00000000000406f0 [ 453.145594] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 453.146320] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 453.147061] Call Trace: [ 453.147336] [ 453.147579] ? tick_nohz_tick_stopped+0xd/0x30 [ 453.148067] ? xas_load+0x9/0xb0 [ 453.148435] ? xa_load+0x6f/0xb0 [ 453.148781] __xe_vm_bind_ioctl+0xbd5/0x1500 [xe] [ 453.149338] ? dev_printk_emit+0x48/0x70 [ 453.149762] ? _dev_printk+0x57/0x80 [ 453.150148] ? drm_ioctl+0x17c/0x440 [ 453.150544] ? __drm_dev_vprintk+0x36/0x90 [ 453.150983] ? __pfx_xe_vm_bind_ioctl+0x10/0x10 [xe] [ 453.151575] ? drm_ioctl_kernel+0x9f/0xf0 [ 453.151998] ? __pfx_xe_vm_bind_ioctl+0x10/0x10 [xe] [ 453.152560] drm_ioctl_kernel+0x9f/0xf0 [ 453.152968] drm_ioctl+0x20f/0x440 [ 453.153332] ? __pfx_xe_vm_bind_ioctl+0x10/0x10 [xe] [ 453.153893] ? ioctl_has_perm.constprop.0.isra.0+0xae/0x100 [ 453.154489] ? memory_bm_test_bit+0x5/0x60 [ 453.154935] xe_drm_ioctl+0x47/0x70 [xe] [ 453.155419] __x64_sys_ioctl+0x8d/0xc0 [ 453.155824] do_syscall_64+0x47/0x110 [ 453.156228] entry_SYSCALL_64_after_hwframe+0x76/0x7e " v2 (Matt): refine commit message to have more details add Fixes tag move the code to xe_svm.h which already have the config remove a blank line per codestyle suggestion Fixes: 63f6e480d115 ("drm/xe: Add SVM garbage collector") Cc: Matthew Brost Signed-off-by: Shuicheng Lin Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20250502170052.1787973-1-shuicheng.lin@intel.com (cherry picked from commit 9d80698bcd97a5ad1088bcbb055e73fd068895e2) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_svm.c | 12 ++++++++++++ drivers/gpu/drm/xe/xe_svm.h | 8 ++++++++ drivers/gpu/drm/xe/xe_vm.c | 3 +-- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 0b6547c06961a0..24c578e1170e40 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -947,3 +947,15 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) return 0; } #endif + +/** + * xe_svm_flush() - SVM flush + * @vm: The VM. + * + * Flush all SVM actions. + */ +void xe_svm_flush(struct xe_vm *vm) +{ + if (xe_vm_in_fault_mode(vm)) + flush_work(&vm->svm.garbage_collector.work); +} diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h index e059590e5076e7..be306fe7aaa4b3 100644 --- a/drivers/gpu/drm/xe/xe_svm.h +++ b/drivers/gpu/drm/xe/xe_svm.h @@ -72,6 +72,9 @@ bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end); int xe_svm_bo_evict(struct xe_bo *bo); void xe_svm_range_debug(struct xe_svm_range *range, const char *operation); + +void xe_svm_flush(struct xe_vm *vm); + #else static inline bool xe_svm_range_pages_valid(struct xe_svm_range *range) { @@ -124,6 +127,11 @@ static inline void xe_svm_range_debug(struct xe_svm_range *range, const char *operation) { } + +static inline void xe_svm_flush(struct xe_vm *vm) +{ +} + #endif /** diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 60303998bd612f..367c84b90e9ef7 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3312,8 +3312,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } /* Ensure all UNMAPs visible */ - if (xe_vm_in_fault_mode(vm)) - flush_work(&vm->svm.garbage_collector.work); + xe_svm_flush(vm); err = down_write_killable(&vm->lock); if (err) From 3e14c7207a975eefcda1929b2134a9f4119dde45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Wed, 26 Mar 2025 12:08:00 +0000 Subject: [PATCH 1640/2924] clk: s2mps11: initialise clk_hw_onecell_data::num before accessing ::hws[] in probe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With UBSAN enabled, we're getting the following trace: UBSAN: array-index-out-of-bounds in .../drivers/clk/clk-s2mps11.c:186:3 index 0 is out of range for type 'struct clk_hw *[] __counted_by(num)' (aka 'struct clk_hw *[]') This is because commit f316cdff8d67 ("clk: Annotate struct clk_hw_onecell_data with __counted_by") annotated the hws member of that struct with __counted_by, which informs the bounds sanitizer about the number of elements in hws, so that it can warn when hws is accessed out of bounds. As noted in that change, the __counted_by member must be initialised with the number of elements before the first array access happens, otherwise there will be a warning from each access prior to the initialisation because the number of elements is zero. This occurs in s2mps11_clk_probe() due to ::num being assigned after ::hws access. Move the assignment to satisfy the requirement of assign-before-access. Cc: stable@vger.kernel.org Fixes: f316cdff8d67 ("clk: Annotate struct clk_hw_onecell_data with __counted_by") Signed-off-by: André Draszik Link: https://lore.kernel.org/r/20250326-s2mps11-ubsan-v1-1-fcc6fce5c8a9@linaro.org Reviewed-by: Krzysztof Kozlowski Signed-off-by: Stephen Boyd --- drivers/clk/clk-s2mps11.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/clk/clk-s2mps11.c b/drivers/clk/clk-s2mps11.c index 014db638662407..8ddf3a9a53dfd5 100644 --- a/drivers/clk/clk-s2mps11.c +++ b/drivers/clk/clk-s2mps11.c @@ -137,6 +137,8 @@ static int s2mps11_clk_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; + clk_data->num = S2MPS11_CLKS_NUM; + switch (hwid) { case S2MPS11X: s2mps11_reg = S2MPS11_REG_RTC_CTRL; @@ -186,7 +188,6 @@ static int s2mps11_clk_probe(struct platform_device *pdev) clk_data->hws[i] = &s2mps11_clks[i].hw; } - clk_data->num = S2MPS11_CLKS_NUM; of_clk_add_hw_provider(s2mps11_clks->clk_np, of_clk_hw_onecell_get, clk_data); From 6b3ab7f2cbfaeb6580709cd8ef4d72cfd01bfde4 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 7 May 2025 21:47:45 +0100 Subject: [PATCH 1641/2924] net: qede: Initialize qede_ll_ops with designated initializer After a recent change [1] in clang's randstruct implementation to randomize structures that only contain function pointers, there is an error because qede_ll_ops get randomized but does not use a designated initializer for the first member: drivers/net/ethernet/qlogic/qede/qede_main.c:206:2: error: a randomized struct can only be initialized with a designated initializer 206 | { | ^ Explicitly initialize the common member using a designated initializer to fix the build. Cc: stable@vger.kernel.org Fixes: 035f7f87b729 ("randstruct: Enable Clang support") Link: https://github.com/llvm/llvm-project/commit/04364fb888eea6db9811510607bed4b200bcb082 [1] Signed-off-by: Nathan Chancellor Link: https://patch.msgid.link/20250507-qede-fix-clang-randstruct-v1-1-5ccc15626fba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qede/qede_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 99df00c30b8c6c..b5d744d2586f72 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -203,7 +203,7 @@ static struct pci_driver qede_pci_driver = { }; static struct qed_eth_cb_ops qede_ll_ops = { - { + .common = { #ifdef CONFIG_RFS_ACCEL .arfs_filter_op = qede_arfs_filter_op, #endif From da8bf5daa5e55a6af2b285ecda460d6454712ff4 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 8 May 2025 12:24:10 -0500 Subject: [PATCH 1642/2924] memblock: Accept allocated memory before use in memblock_double_array() When increasing the array size in memblock_double_array() and the slab is not yet available, a call to memblock_find_in_range() is used to reserve/allocate memory. However, the range returned may not have been accepted, which can result in a crash when booting an SNP guest: RIP: 0010:memcpy_orig+0x68/0x130 Code: ... RSP: 0000:ffffffff9cc03ce8 EFLAGS: 00010006 RAX: ff11001ff83e5000 RBX: 0000000000000000 RCX: fffffffffffff000 RDX: 0000000000000bc0 RSI: ffffffff9dba8860 RDI: ff11001ff83e5c00 RBP: 0000000000002000 R08: 0000000000000000 R09: 0000000000002000 R10: 000000207fffe000 R11: 0000040000000000 R12: ffffffff9d06ef78 R13: ff11001ff83e5000 R14: ffffffff9dba7c60 R15: 0000000000000c00 memblock_double_array+0xff/0x310 memblock_add_range+0x1fb/0x2f0 memblock_reserve+0x4f/0xa0 memblock_alloc_range_nid+0xac/0x130 memblock_alloc_internal+0x53/0xc0 memblock_alloc_try_nid+0x3d/0xa0 swiotlb_init_remap+0x149/0x2f0 mem_init+0xb/0xb0 mm_core_init+0x8f/0x350 start_kernel+0x17e/0x5d0 x86_64_start_reservations+0x14/0x30 x86_64_start_kernel+0x92/0xa0 secondary_startup_64_no_verify+0x194/0x19b Mitigate this by calling accept_memory() on the memory range returned before the slab is available. Prior to v6.12, the accept_memory() interface used a 'start' and 'end' parameter instead of 'start' and 'size', therefore the accept_memory() call must be adjusted to specify 'start + size' for 'end' when applying to kernels prior to v6.12. Cc: stable@vger.kernel.org # see patch description, needs adjustments for <= 6.11 Fixes: dcdfdd40fa82 ("mm: Add support for unaccepted memory") Signed-off-by: Tom Lendacky Link: https://lore.kernel.org/r/da1ac73bf4ded761e21b4e4bb5178382a580cd73.1746725050.git.thomas.lendacky@amd.com Signed-off-by: Mike Rapoport (Microsoft) --- mm/memblock.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mm/memblock.c b/mm/memblock.c index d3509414b8c3a1..0e9ebb8aa7fe54 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -457,7 +457,14 @@ static int __init_memblock memblock_double_array(struct memblock_type *type, min(new_area_start, memblock.current_limit), new_alloc_size, PAGE_SIZE); - new_array = addr ? __va(addr) : NULL; + if (addr) { + /* The memory may not have been accepted, yet. */ + accept_memory(addr, new_alloc_size); + + new_array = __va(addr); + } else { + new_array = NULL; + } } if (!addr) { pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n", From 732b87a409667a370b87955c518e5d004de740b5 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 7 May 2025 18:19:53 +0300 Subject: [PATCH 1643/2924] drm/i915/dp: Fix determining SST/MST mode during MTP TU state computation Determining the SST/MST mode during state computation must be done based on the output type stored in the CRTC state, which in turn is set once based on the modeset connector's SST vs. MST type and will not change as long as the connector is using the CRTC. OTOH the MST mode indicated by the given connector's intel_dp::is_mst flag can change independently of the above output type, based on what sink is at any moment plugged to the connector. Fix the state computation accordingly. Cc: Jani Nikula Fixes: f6971d7427c2 ("drm/i915/mst: adapt intel_dp_mtp_tu_compute_config() for 128b/132b SST") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4607 Reviewed-by: Jani Nikula Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20250507151953.251846-1-imre.deak@intel.com (cherry picked from commit 0f45696ddb2b901fbf15cb8d2e89767be481d59f) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 02f95108c63799..6dc2d31ccb5a53 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -242,7 +242,7 @@ int intel_dp_mtp_tu_compute_config(struct intel_dp *intel_dp, to_intel_connector(conn_state->connector); const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; - bool is_mst = intel_dp->is_mst; + bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST); int bpp_x16, slots = -EINVAL; int dsc_slice_count = 0; int max_dpt_bpp_x16; From 4d14b1069e9e672dbe1adab52594076da6f4a62d Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 9 May 2025 11:56:33 +0300 Subject: [PATCH 1644/2924] ASoC: SOF: ipc4-control: Use SOF_CTRL_CMD_BINARY as numid for bytes_ext The header.numid is set to scontrol->comp_id in bytes_ext_get and it is ignored during bytes_ext_put. The use of comp_id is not quite great as it is kernel internal identification number. Set the header.numid to SOF_CTRL_CMD_BINARY during get and validate the numid during put to provide consistent and compatible identification number as IPC3. For IPC4 existing tooling also ignored the numid but with the use of SOF_CTRL_CMD_BINARY the different handling of the blobs can be dropped, providing better user experience. Reported-by: Seppo Ingalsuo Closes: https://github.com/thesofproject/linux/issues/5282 Fixes: a062c8899fed ("ASoC: SOF: ipc4-control: Add support for bytes control get and put") Cc: stable@vger.kernel.org Signed-off-by: Peter Ujfalusi Reviewed-by: Seppo Ingalsuo Reviewed-by: Ranjani Sridharan Reviewed-by: Liam Girdwood Link: https://patch.msgid.link/20250509085633.14930-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/ipc4-control.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/sound/soc/sof/ipc4-control.c b/sound/soc/sof/ipc4-control.c index 576f407cd456af..976a4794d61000 100644 --- a/sound/soc/sof/ipc4-control.c +++ b/sound/soc/sof/ipc4-control.c @@ -531,6 +531,14 @@ static int sof_ipc4_bytes_ext_put(struct snd_sof_control *scontrol, return -EINVAL; } + /* Check header id */ + if (header.numid != SOF_CTRL_CMD_BINARY) { + dev_err_ratelimited(scomp->dev, + "Incorrect numid for bytes put %d\n", + header.numid); + return -EINVAL; + } + /* Verify the ABI header first */ if (copy_from_user(&abi_hdr, tlvd->tlv, sizeof(abi_hdr))) return -EFAULT; @@ -613,7 +621,8 @@ static int _sof_ipc4_bytes_ext_get(struct snd_sof_control *scontrol, if (data_size > size) return -ENOSPC; - header.numid = scontrol->comp_id; + /* Set header id and length */ + header.numid = SOF_CTRL_CMD_BINARY; header.length = data_size; if (copy_to_user(tlvd, &header, sizeof(struct snd_ctl_tlv))) From 98db16f314b3a0d6e5acd94708ea69751436467f Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 9 May 2025 11:59:51 +0300 Subject: [PATCH 1645/2924] ASoC: SOF: ipc4-pcm: Delay reporting is only supported for playback direction The firmware does not provide any information for capture streams via the shared pipeline registers. To avoid reporting invalid delay value for capture streams to user space we need to disable it. Fixes: af74dbd0dbcf ("ASoC: SOF: ipc4-pcm: allocate time info for pcm delay feature") Cc: stable@vger.kernel.org Signed-off-by: Peter Ujfalusi Reviewed-by: Bard Liao Reviewed-by: Liam Girdwood Link: https://patch.msgid.link/20250509085951.15696-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/ipc4-pcm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/sof/ipc4-pcm.c b/sound/soc/sof/ipc4-pcm.c index 1a2841899ff5a9..c09b424ab863d7 100644 --- a/sound/soc/sof/ipc4-pcm.c +++ b/sound/soc/sof/ipc4-pcm.c @@ -798,7 +798,8 @@ static int sof_ipc4_pcm_setup(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm spcm->stream[stream].private = stream_priv; - if (!support_info) + /* Delay reporting is only supported on playback */ + if (!support_info || stream == SNDRV_PCM_STREAM_CAPTURE) continue; time_info = kzalloc(sizeof(*time_info), GFP_KERNEL); From 4e7010826e96702d7fad13dbe85de4e94052f833 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 9 May 2025 11:13:08 +0300 Subject: [PATCH 1646/2924] ASoC: SOF: Intel: hda-bus: Use PIO mode on ACE2+ platforms Keep using the PIO mode for commands on ACE2+ platforms, similarly how the legacy stack is configured. Fixes: 05cf17f1bf6d ("ASoC: SOF: Intel: hda-bus: Use PIO mode for Lunar Lake") Signed-off-by: Peter Ujfalusi Reviewed-by: Bard Liao Reviewed-by: Ranjani Sridharan Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250509081308.13784-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/hda-bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sof/intel/hda-bus.c b/sound/soc/sof/intel/hda-bus.c index b1be03011d7e15..6492e1cefbfb60 100644 --- a/sound/soc/sof/intel/hda-bus.c +++ b/sound/soc/sof/intel/hda-bus.c @@ -76,7 +76,7 @@ void sof_hda_bus_init(struct snd_sof_dev *sdev, struct device *dev) snd_hdac_ext_bus_init(bus, dev, &bus_core_ops, sof_hda_ext_ops); - if (chip && chip->hw_ip_version == SOF_INTEL_ACE_2_0) + if (chip && chip->hw_ip_version >= SOF_INTEL_ACE_2_0) bus->use_pio_for_commands = true; #else snd_hdac_ext_bus_init(bus, dev, NULL, NULL); From 6052f05254b4fe7b16bbd8224779af52fba98b71 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Fri, 9 May 2025 11:53:18 +0300 Subject: [PATCH 1647/2924] ASoc: SOF: topology: connect DAI to a single DAI link MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The partial matching of DAI widget to link names, can cause problems if one of the widget names is a substring of another. E.g. with names "Foo1" and Foo10", it's not possible to correctly link up "Foo1". Modify the logic so that if multiple DAI links match the widget stream name, prioritize a full match if one is found. Fixes: fe88788779fc ("ASoC: SOF: topology: Use partial match for connecting DAI link and DAI widget") Link: https://github.com/thesofproject/linux/issues/5308 Signed-off-by: Kai Vehmanen Reviewed-by: Péter Ujfalusi Reviewed-by: Ranjani Sridharan Cc: stable@vger.kernel.org Signed-off-by: Peter Ujfalusi Link: https://patch.msgid.link/20250509085318.13936-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/topology.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c index dc9cb832406783..14aa8ecc4bc426 100644 --- a/sound/soc/sof/topology.c +++ b/sound/soc/sof/topology.c @@ -1063,7 +1063,7 @@ static int sof_connect_dai_widget(struct snd_soc_component *scomp, struct snd_sof_dai *dai) { struct snd_soc_card *card = scomp->card; - struct snd_soc_pcm_runtime *rtd; + struct snd_soc_pcm_runtime *rtd, *full, *partial; struct snd_soc_dai *cpu_dai; int stream; int i; @@ -1080,12 +1080,22 @@ static int sof_connect_dai_widget(struct snd_soc_component *scomp, else goto end; + full = NULL; + partial = NULL; list_for_each_entry(rtd, &card->rtd_list, list) { /* does stream match DAI link ? */ - if (!rtd->dai_link->stream_name || - !strstr(rtd->dai_link->stream_name, w->sname)) - continue; + if (rtd->dai_link->stream_name) { + if (!strcmp(rtd->dai_link->stream_name, w->sname)) { + full = rtd; + break; + } else if (strstr(rtd->dai_link->stream_name, w->sname)) { + partial = rtd; + } + } + } + rtd = full ? full : partial; + if (rtd) { for_each_rtd_cpu_dais(rtd, i, cpu_dai) { /* * Please create DAI widget in the right order From 9379508f0661a5042ab7e0234e604038c2d63f83 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Tue, 22 Apr 2025 09:12:35 +0200 Subject: [PATCH 1648/2924] arm64: dts: imx8mp: use 800MHz NoC OPP for nominal drive mode When running in nominal drive mode, the maximum allowed frequency for the NoC is 800MHz, but the OPP table for the i.MX8MP interconnect device listed the 1GHz operating point for the NoC, regardless of the active mode. The newly introduced imx8mp-nominal.dtsi header reconfigures the clock controller to observe nominal drive mode limits, so have it modify the maximum NoC OPP as well. Fixes: 255fbd9eabe7 ("arm64: dts: imx8mp: Add optional nominal drive mode DTSI") Signed-off-by: Ahmad Fatoum Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi | 2 ++ arch/arm64/boot/dts/freescale/imx8mp.dtsi | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi index dc0ccd723c6d92..2ce1860b244d5e 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi @@ -88,3 +88,5 @@ <0>, <0>, <400000000>, <1039500000>; }; + +/delete-node/ &{noc_opp_table/opp-1000000000}; diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index ce6793b2d57eef..7c1c87eab54cc6 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -1645,6 +1645,12 @@ opp-hz = /bits/ 64 <200000000>; }; + /* Nominal drive mode maximum */ + opp-800000000 { + opp-hz = /bits/ 64 <800000000>; + }; + + /* Overdrive mode maximum */ opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; }; From 04679f3c27e132c1a2d3881de2f0c5d7128de7c1 Mon Sep 17 00:00:00 2001 From: Jeremy Bongio Date: Wed, 7 May 2025 12:30:10 +0000 Subject: [PATCH 1649/2924] fs: Remove redundant errseq_set call in mark_buffer_write_io_error. mark_buffer_write_io_error sets sb->s_wb_err to -EIO twice. Once in mapping_set_error and once in errseq_set. Only mapping_set_error checks if bh->b_assoc_map->host is NULL. Discovered during null pointer dereference during writeback to a failing device: [] ? mark_buffer_write_io_error+0x98/0xc0 [] ? mark_buffer_write_io_error+0x8e/0xc0 [] end_buffer_async_write+0x90/0xd0 [] end_bio_bh_io_sync+0x2b/0x40 [] blk_update_request+0x1b6/0x480 [] blk_mq_end_request+0x18/0x30 [] blk_mq_dispatch_rq_list+0x4da/0x8e0 [] __blk_mq_sched_dispatch_requests+0x218/0x6a0 [] blk_mq_sched_dispatch_requests+0x3a/0x80 [] blk_mq_run_hw_queue+0x108/0x330 [] blk_mq_flush_plug_list+0x178/0x5f0 [] __blk_flush_plug+0x41/0x120 [] blk_finish_plug+0x22/0x40 [] wb_writeback+0x150/0x280 [] ? set_worker_desc+0x9f/0xc0 [] wb_workfn+0x24e/0x4a0 Fixes: 485e9605c0573 ("fs/buffer.c: record blockdev write errors in super_block that it backs") Signed-off-by: Jeremy Bongio Link: https://lore.kernel.org/20250507123010.1228243-1-jbongio@google.com Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- fs/buffer.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 7be23ff20b2733..7ba1807145aa8b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1220,10 +1220,8 @@ void mark_buffer_write_io_error(struct buffer_head *bh) /* FIXME: do we need to set this in both places? */ if (bh->b_folio && bh->b_folio->mapping) mapping_set_error(bh->b_folio->mapping, -EIO); - if (bh->b_assoc_map) { + if (bh->b_assoc_map) mapping_set_error(bh->b_assoc_map, -EIO); - errseq_set(&bh->b_assoc_map->host->i_sb->s_wb_err, -EIO); - } } EXPORT_SYMBOL(mark_buffer_write_io_error); From 2d3cbfd6d54a2c39ce3244f33f85c595844bd7b8 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 6 May 2025 21:35:58 -0700 Subject: [PATCH 1650/2924] net_sched: Flush gso_skb list too during ->change() Previously, when reducing a qdisc's limit via the ->change() operation, only the main skb queue was trimmed, potentially leaving packets in the gso_skb list. This could result in NULL pointer dereference when we only check sch->limit against sch->q.qlen. This patch introduces a new helper, qdisc_dequeue_internal(), which ensures both the gso_skb list and the main queue are properly flushed when trimming excess packets. All relevant qdiscs (codel, fq, fq_codel, fq_pie, hhf, pie) are updated to use this helper in their ->change() routines. Fixes: 76e3cc126bb2 ("codel: Controlled Delay AQM") Fixes: 4b549a2ef4be ("fq_codel: Fair Queue Codel AQM") Fixes: afe4fd062416 ("pkt_sched: fq: Fair Queue packet scheduler") Fixes: ec97ecf1ebe4 ("net: sched: add Flow Queue PIE packet scheduler") Fixes: 10239edf86f1 ("net-qdisc-hhf: Heavy-Hitter Filter (HHF) qdisc") Fixes: d4b36210c2e6 ("net: pkt_sched: PIE AQM scheme") Reported-by: Will Reported-by: Savy Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/sch_generic.h | 15 +++++++++++++++ net/sched/sch_codel.c | 2 +- net/sched/sch_fq.c | 2 +- net/sched/sch_fq_codel.c | 2 +- net/sched/sch_fq_pie.c | 2 +- net/sched/sch_hhf.c | 2 +- net/sched/sch_pie.c | 2 +- 7 files changed, 21 insertions(+), 6 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index d48c657191cd01..1c05fed05f2bc2 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1031,6 +1031,21 @@ static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh) return skb; } +static inline struct sk_buff *qdisc_dequeue_internal(struct Qdisc *sch, bool direct) +{ + struct sk_buff *skb; + + skb = __skb_dequeue(&sch->gso_skb); + if (skb) { + sch->q.qlen--; + return skb; + } + if (direct) + return __qdisc_dequeue_head(&sch->q); + else + return sch->dequeue(sch); +} + static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) { struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 12dd71139da396..c93761040c6e77 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -144,7 +144,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt, qlen = sch->q.qlen; while (sch->q.qlen > sch->limit) { - struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); + struct sk_buff *skb = qdisc_dequeue_internal(sch, true); dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 2ca5332cfcc5c5..902ff54706072b 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -1136,7 +1136,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, sch_tree_lock(sch); } while (sch->q.qlen > sch->limit) { - struct sk_buff *skb = fq_dequeue(sch); + struct sk_buff *skb = qdisc_dequeue_internal(sch, false); if (!skb) break; diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 6c9029f71e88d3..2a0f3a513bfaa1 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -441,7 +441,7 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, while (sch->q.qlen > sch->limit || q->memory_usage > q->memory_limit) { - struct sk_buff *skb = fq_codel_dequeue(sch); + struct sk_buff *skb = qdisc_dequeue_internal(sch, false); q->cstats.drop_len += qdisc_pkt_len(skb); rtnl_kfree_skbs(skb, skb); diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index f3b8203d3e855b..df7fac95ab1513 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -366,7 +366,7 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt, /* Drop excess packets if new limit is lower */ while (sch->q.qlen > sch->limit) { - struct sk_buff *skb = fq_pie_qdisc_dequeue(sch); + struct sk_buff *skb = qdisc_dequeue_internal(sch, false); len_dropped += qdisc_pkt_len(skb); num_dropped += 1; diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 44d9efe1a96a89..5aa434b4670738 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -564,7 +564,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt, qlen = sch->q.qlen; prev_backlog = sch->qstats.backlog; while (sch->q.qlen > sch->limit) { - struct sk_buff *skb = hhf_dequeue(sch); + struct sk_buff *skb = qdisc_dequeue_internal(sch, false); rtnl_kfree_skbs(skb, skb); } diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 3771d000b30d08..ff49a6c97033aa 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -195,7 +195,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt, /* Drop excess packets if new limit is lower */ qlen = sch->q.qlen; while (sch->q.qlen > sch->limit) { - struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); + struct sk_buff *skb = qdisc_dequeue_internal(sch, true); dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); From 16ce349b15069334710faa10f2e09866f8391f26 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 6 May 2025 21:35:59 -0700 Subject: [PATCH 1651/2924] selftests/tc-testing: Add qdisc limit trimming tests Added new test cases for FQ, FQ_CODEL, FQ_PIE, and HHF qdiscs to verify queue trimming behavior when the qdisc limit is dynamically reduced. Each test injects packets, reduces the qdisc limit, and checks that the new limit is enforced. This is still best effort since timing qdisc backlog is not easy. Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- .../tc-testing/tc-tests/qdiscs/codel.json | 24 +++++++++++++++++++ .../tc-testing/tc-tests/qdiscs/fq.json | 22 +++++++++++++++++ .../tc-testing/tc-tests/qdiscs/fq_codel.json | 22 +++++++++++++++++ .../tc-testing/tc-tests/qdiscs/fq_pie.json | 22 +++++++++++++++++ .../tc-testing/tc-tests/qdiscs/hhf.json | 22 +++++++++++++++++ .../tc-testing/tc-tests/qdiscs/pie.json | 24 +++++++++++++++++++ 6 files changed, 136 insertions(+) create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json index e9469ee71e6fc7..6d515d0e5ed696 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json @@ -189,5 +189,29 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "deb1", + "name": "CODEL test qdisc limit trimming", + "category": ["qdisc", "codel"], + "plugins": { + "requires": ["nsPlugin", "scapyPlugin"] + }, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root codel limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root codel limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc codel 1: root refcnt [0-9]+ limit 1p target 5ms interval 100ms", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json index 3a537b2ec4c972..24faf4e12dfa05 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json @@ -377,5 +377,27 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "9479", + "name": "FQ test qdisc limit trimming", + "category": ["qdisc", "fq"], + "plugins": {"requires": ["nsPlugin", "scapyPlugin"]}, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root fq limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root fq limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 1p", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json index 9774b1e8801bba..4ce62b857fd7ab 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json @@ -294,5 +294,27 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "0436", + "name": "FQ_CODEL test qdisc limit trimming", + "category": ["qdisc", "fq_codel"], + "plugins": {"requires": ["nsPlugin", "scapyPlugin"]}, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root fq_codel limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root fq_codel limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 1p flows 1024 quantum.*target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 64", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json index d012d88d67fee6..229fe1bf4a9062 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json @@ -18,5 +18,27 @@ "matchCount": "1", "teardown": [ ] + }, + { + "id": "83bf", + "name": "FQ_PIE test qdisc limit trimming", + "category": ["qdisc", "fq_pie"], + "plugins": {"requires": ["nsPlugin", "scapyPlugin"]}, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root fq_pie limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root fq_pie limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc fq_pie 1: root refcnt [0-9]+ limit 1p", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json index dbef5474b26bdc..0ca19fac54a57d 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json @@ -188,5 +188,27 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "385f", + "name": "HHF test qdisc limit trimming", + "category": ["qdisc", "hhf"], + "plugins": {"requires": ["nsPlugin", "scapyPlugin"]}, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root hhf limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root hhf limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc hhf 1: root refcnt [0-9]+ limit 1p.*hh_limit 2048 reset_timeout 40ms admit_bytes 128Kb evict_timeout 1s non_hh_weight 2", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json new file mode 100644 index 00000000000000..1a98b66e803071 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json @@ -0,0 +1,24 @@ +[ + { + "id": "6158", + "name": "PIE test qdisc limit trimming", + "category": ["qdisc", "pie"], + "plugins": {"requires": ["nsPlugin", "scapyPlugin"]}, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root pie limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root pie limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc pie 1: root refcnt [0-9]+ limit 1p", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] + } +] From 92835cebab120f8a5f023a26a792a2ac3f816c4f Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Thu, 8 May 2025 14:12:03 -0400 Subject: [PATCH 1652/2924] io_uring/sqpoll: Increase task_work submission batch size Our QA team reported a 10%-23%, throughput reduction on an io_uring sqpoll testcase doing IO to a null_blk, that I traced back to a reduction of the device submission queue depth utilization. It turns out that, after commit af5d68f8892f ("io_uring/sqpoll: manage task_work privately"), we capped the number of task_work entries that can be completed from a single spin of sqpoll to only 8 entries, before the sqpoll goes around to (potentially) sleep. While this cap doesn't drive the submission side directly, it impacts the completion behavior, which affects the number of IO queued by fio per sqpoll cycle on the submission side, and io_uring ends up seeing less ios per sqpoll cycle. As a result, block layer plugging is less effective, and we see more time spent inside the block layer in profilings charts, and increased submission latency measured by fio. There are other places that have increased overhead once sqpoll sleeps more often, such as the sqpoll utilization calculation. But, in this microbenchmark, those were not representative enough in perf charts, and their removal didn't yield measurable changes in throughput. The major overhead comes from the fact we plug less, and less often, when submitting to the block layer. My benchmark is: fio --ioengine=io_uring --direct=1 --iodepth=128 --runtime=300 --bs=4k \ --invalidate=1 --time_based --ramp_time=10 --group_reporting=1 \ --filename=/dev/nullb0 --name=RandomReads-direct-nullb-sqpoll-4k-1 \ --rw=randread --numjobs=1 --sqthread_poll In one machine, tested on top of Linux 6.15-rc1, we have the following baseline: READ: bw=4994MiB/s (5236MB/s), 4994MiB/s-4994MiB/s (5236MB/s-5236MB/s), io=439GiB (471GB), run=90001-90001msec With this patch: READ: bw=5762MiB/s (6042MB/s), 5762MiB/s-5762MiB/s (6042MB/s-6042MB/s), io=506GiB (544GB), run=90001-90001msec which is a 15% improvement in measured bandwidth. The average submission latency is noticeably lowered too. As measured by fio: Baseline: lat (usec): min=20, max=241, avg=99.81, stdev=3.38 Patched: lat (usec): min=26, max=226, avg=86.48, stdev=4.82 If we look at blktrace, we can also see the plugging behavior is improved. In the baseline, we end up limited to plugging 8 requests in the block layer regardless of the device queue depth size, while after patching we can drive more io, and we manage to utilize the full device queue. In the baseline, after a stabilization phase, an ordinary submission looks like: 254,0 1 49942 0.016028795 5977 U N [iou-sqp-5976] 7 After patching, I see consistently more requests per unplug. 254,0 1 4996 0.001432872 3145 U N [iou-sqp-3144] 32 Ideally, the cap size would at least be the deep enough to fill the device queue, but we can't predict that behavior, or assume all IO goes to a single device, and thus can't guess the ideal batch size. We also don't want to let the tw run unbounded, though I'm not sure it would really be a problem. Instead, let's just give it a more sensible value that will allow for more efficient batching. I've tested with different cap values, and initially proposed to increase the cap to 1024. Jens argued it is too big of a bump and I observed that, with 32, I'm no longer able to observe this bottleneck in any of my machines. Fixes: af5d68f8892f ("io_uring/sqpoll: manage task_work privately") Signed-off-by: Gabriel Krisman Bertazi Link: https://lore.kernel.org/r/20250508181203.3785544-1-krisman@suse.de Signed-off-by: Jens Axboe --- io_uring/sqpoll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index d037cc68e9d3ea..03c699493b5ab6 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -20,7 +20,7 @@ #include "sqpoll.h" #define IORING_SQPOLL_CAP_ENTRIES_VALUE 8 -#define IORING_TW_CAP_ENTRIES_VALUE 8 +#define IORING_TW_CAP_ENTRIES_VALUE 32 enum { IO_SQ_THREAD_SHOULD_STOP = 0, From c6888983134e2ccc2db8ffd2720b0d4826d952e4 Mon Sep 17 00:00:00 2001 From: Himanshu Bhavani Date: Mon, 5 May 2025 11:28:27 +0530 Subject: [PATCH 1653/2924] arm64: dts: imx8mp-var-som: Fix LDO5 shutdown causing SD card timeout Fix SD card timeout issue caused by LDO5 regulator getting disabled after boot. The kernel log shows LDO5 being disabled, which leads to a timeout on USDHC2: [ 33.760561] LDO5: disabling [ 81.119861] mmc1: Timeout waiting for hardware interrupt. To prevent this, set regulator-boot-on and regulator-always-on for LDO5. Also add the vqmmc regulator to properly support 1.8V/3.3V signaling for USDHC2 using a GPIO-controlled regulator. Fixes: 6c2a1f4f71258 ("arm64: dts: imx8mp-var-som-symphony: Add Variscite Symphony board and VAR-SOM-MX8MP SoM") Signed-off-by: Himanshu Bhavani Acked-by: Tarang Raval Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mp-var-som.dtsi | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-var-som.dtsi index b2ac2583a59292..b59da91fdd041f 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-var-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-var-som.dtsi @@ -35,7 +35,6 @@ <0x1 0x00000000 0 0xc0000000>; }; - reg_usdhc2_vmmc: regulator-usdhc2-vmmc { compatible = "regulator-fixed"; regulator-name = "VSD_3V3"; @@ -46,6 +45,16 @@ startup-delay-us = <100>; off-on-delay-us = <12000>; }; + + reg_usdhc2_vqmmc: regulator-usdhc2-vqmmc { + compatible = "regulator-gpio"; + regulator-name = "VSD_VSEL"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <3300000>; + gpios = <&gpio2 12 GPIO_ACTIVE_HIGH>; + states = <3300000 0x0 1800000 0x1>; + vin-supply = <&ldo5>; + }; }; &A53_0 { @@ -205,6 +214,7 @@ pinctrl-2 = <&pinctrl_usdhc2_200mhz>, <&pinctrl_usdhc2_gpio>; cd-gpios = <&gpio1 14 GPIO_ACTIVE_LOW>; vmmc-supply = <®_usdhc2_vmmc>; + vqmmc-supply = <®_usdhc2_vqmmc>; bus-width = <4>; status = "okay"; }; From fea4e317f9e7e1f449ce90dedc27a2d2a95bee5a Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 8 May 2025 15:41:32 -0700 Subject: [PATCH 1654/2924] x86/mm: Eliminate window where TLB flushes may be inadvertently skipped tl;dr: There is a window in the mm switching code where the new CR3 is set and the CPU should be getting TLB flushes for the new mm. But should_flush_tlb() has a bug and suppresses the flush. Fix it by widening the window where should_flush_tlb() sends an IPI. Long Version: === History === There were a few things leading up to this. First, updating mm_cpumask() was observed to be too expensive, so it was made lazier. But being lazy caused too many unnecessary IPIs to CPUs due to the now-lazy mm_cpumask(). So code was added to cull mm_cpumask() periodically[2]. But that culling was a bit too aggressive and skipped sending TLB flushes to CPUs that need them. So here we are again. === Problem === The too-aggressive code in should_flush_tlb() strikes in this window: // Turn on IPIs for this CPU/mm combination, but only // if should_flush_tlb() agrees: cpumask_set_cpu(cpu, mm_cpumask(next)); next_tlb_gen = atomic64_read(&next->context.tlb_gen); choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); load_new_mm_cr3(need_flush); // ^ After 'need_flush' is set to false, IPIs *MUST* // be sent to this CPU and not be ignored. this_cpu_write(cpu_tlbstate.loaded_mm, next); // ^ Not until this point does should_flush_tlb() // become true! should_flush_tlb() will suppress TLB flushes between load_new_mm_cr3() and writing to 'loaded_mm', which is a window where they should not be suppressed. Whoops. === Solution === Thankfully, the fuzzy "just about to write CR3" window is already marked with loaded_mm==LOADED_MM_SWITCHING. Simply checking for that state in should_flush_tlb() is sufficient to ensure that the CPU is targeted with an IPI. This will cause more TLB flush IPIs. But the window is relatively small and I do not expect this to cause any kind of measurable performance impact. Update the comment where LOADED_MM_SWITCHING is written since it grew yet another user. Peter Z also raised a concern that should_flush_tlb() might not observe 'loaded_mm' and 'is_lazy' in the same order that switch_mm_irqs_off() writes them. Add a barrier to ensure that they are observed in the order they are written. Signed-off-by: Dave Hansen Acked-by: Rik van Riel Link: https://lore.kernel.org/oe-lkp/202411282207.6bd28eae-lkp@intel.com/ [1] Fixes: 6db2526c1d69 ("x86/mm/tlb: Only trim the mm_cpumask once a second") [2] Reported-by: Stephen Dolan Cc: stable@vger.kernel.org Acked-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Signed-off-by: Linus Torvalds --- arch/x86/mm/tlb.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index eb83348f930512..b6d6750e4bd121 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -899,8 +899,9 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, cond_mitigation(tsk); /* - * Let nmi_uaccess_okay() and finish_asid_transition() - * know that CR3 is changing. + * Indicate that CR3 is about to change. nmi_uaccess_okay() + * and others are sensitive to the window where mm_cpumask(), + * CR3 and cpu_tlbstate.loaded_mm are not all in sync. */ this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING); barrier(); @@ -1204,8 +1205,16 @@ static void flush_tlb_func(void *info) static bool should_flush_tlb(int cpu, void *data) { + struct mm_struct *loaded_mm = per_cpu(cpu_tlbstate.loaded_mm, cpu); struct flush_tlb_info *info = data; + /* + * Order the 'loaded_mm' and 'is_lazy' against their + * write ordering in switch_mm_irqs_off(). Ensure + * 'is_lazy' is at least as new as 'loaded_mm'. + */ + smp_rmb(); + /* Lazy TLB will get flushed at the next context switch. */ if (per_cpu(cpu_tlbstate_shared.is_lazy, cpu)) return false; @@ -1214,8 +1223,15 @@ static bool should_flush_tlb(int cpu, void *data) if (!info->mm) return true; + /* + * While switching, the remote CPU could have state from + * either the prev or next mm. Assume the worst and flush. + */ + if (loaded_mm == LOADED_MM_SWITCHING) + return true; + /* The target mm is loaded, and the CPU is not lazy. */ - if (per_cpu(cpu_tlbstate.loaded_mm, cpu) == info->mm) + if (loaded_mm == info->mm) return true; /* In cpumask, but not the loaded mm? Periodically remove by flushing. */ From 39b5ef791d109dd54c7c2e6e87933edfcc0ad1ac Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 8 May 2025 15:24:13 -0400 Subject: [PATCH 1655/2924] cgroup/cpuset: Extend kthread_is_per_cpu() check to all PF_NO_SETAFFINITY tasks Commit ec5fbdfb99d1 ("cgroup/cpuset: Enable update_tasks_cpumask() on top_cpuset") enabled us to pull CPUs dedicated to child partitions from tasks in top_cpuset by ignoring per cpu kthreads. However, there can be other kthreads that are not per cpu but have PF_NO_SETAFFINITY flag set to indicate that we shouldn't mess with their CPU affinity. For other kthreads, their affinity will be changed to skip CPUs dedicated to child partitions whether it is an isolating or a scheduling one. As all the per cpu kthreads have PF_NO_SETAFFINITY set, the PF_NO_SETAFFINITY tasks are essentially a superset of per cpu kthreads. Fix this issue by dropping the kthread_is_per_cpu() check and checking the PF_NO_SETAFFINITY flag instead. Fixes: ec5fbdfb99d1 ("cgroup/cpuset: Enable update_tasks_cpumask() on top_cpuset") Signed-off-by: Waiman Long Acked-by: Frederic Weisbecker Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 306b6043009145..24b70ea3e6ce97 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -1116,9 +1116,11 @@ void cpuset_update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus) if (top_cs) { /* - * Percpu kthreads in top_cpuset are ignored + * PF_NO_SETAFFINITY tasks are ignored. + * All per cpu kthreads should have PF_NO_SETAFFINITY + * flag set, see kthread_set_per_cpu(). */ - if (kthread_is_per_cpu(task)) + if (task->flags & PF_NO_SETAFFINITY) continue; cpumask_andnot(new_cpus, possible_mask, subpartitions_cpus); } else { From eb0851e14432f3b87c77b704c835ac376deda03a Mon Sep 17 00:00:00 2001 From: I Hsin Cheng Date: Tue, 6 May 2025 02:43:38 +0800 Subject: [PATCH 1656/2924] drm/meson: Use 1000ULL when operating with mode->clock Coverity scan reported the usage of "mode->clock * 1000" may lead to integer overflow. Use "1000ULL" instead of "1000" when utilizing it to avoid potential integer overflow issue. Link: https://scan5.scan.coverity.com/#/project-view/10074/10063?selectedIssue=1646759 Signed-off-by: I Hsin Cheng Reviewed-by: Martin Blumenstingl Fixes: 1017560164b6 ("drm/meson: use unsigned long long / Hz for frequency types") Link: https://lore.kernel.org/r/20250505184338.678540-1-richard120310@gmail.com Signed-off-by: Neil Armstrong --- drivers/gpu/drm/meson/meson_encoder_hdmi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c index 7752d8ac85f0e5..c08fa93e50a30e 100644 --- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c +++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c @@ -75,7 +75,7 @@ static void meson_encoder_hdmi_set_vclk(struct meson_encoder_hdmi *encoder_hdmi, unsigned long long venc_freq; unsigned long long hdmi_freq; - vclk_freq = mode->clock * 1000; + vclk_freq = mode->clock * 1000ULL; /* For 420, pixel clock is half unlike venc clock */ if (encoder_hdmi->output_bus_fmt == MEDIA_BUS_FMT_UYYVYY8_0_5X24) @@ -123,7 +123,7 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri struct meson_encoder_hdmi *encoder_hdmi = bridge_to_meson_encoder_hdmi(bridge); struct meson_drm *priv = encoder_hdmi->priv; bool is_hdmi2_sink = display_info->hdmi.scdc.supported; - unsigned long long clock = mode->clock * 1000; + unsigned long long clock = mode->clock * 1000ULL; unsigned long long phy_freq; unsigned long long vclk_freq; unsigned long long venc_freq; From 1ac116ce6468670eeda39345a5585df308243dca Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Fri, 11 Apr 2025 15:36:38 -0700 Subject: [PATCH 1657/2924] Documentation: x86/bugs/its: Add ITS documentation Add the admin-guide for Indirect Target Selection (ITS). Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Josh Poimboeuf Reviewed-by: Alexandre Chartre --- Documentation/admin-guide/hw-vuln/index.rst | 1 + .../hw-vuln/indirect-target-selection.rst | 168 ++++++++++++++++++ 2 files changed, 169 insertions(+) create mode 100644 Documentation/admin-guide/hw-vuln/indirect-target-selection.rst diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index 451874b8135d8e..ce296b8430fc98 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -23,3 +23,4 @@ are configurable at compile, boot or run time. gather_data_sampling reg-file-data-sampling rsb + indirect-target-selection diff --git a/Documentation/admin-guide/hw-vuln/indirect-target-selection.rst b/Documentation/admin-guide/hw-vuln/indirect-target-selection.rst new file mode 100644 index 00000000000000..d9ca64108d2332 --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/indirect-target-selection.rst @@ -0,0 +1,168 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Indirect Target Selection (ITS) +=============================== + +ITS is a vulnerability in some Intel CPUs that support Enhanced IBRS and were +released before Alder Lake. ITS may allow an attacker to control the prediction +of indirect branches and RETs located in the lower half of a cacheline. + +ITS is assigned CVE-2024-28956 with a CVSS score of 4.7 (Medium). + +Scope of Impact +--------------- +- **eIBRS Guest/Host Isolation**: Indirect branches in KVM/kernel may still be + predicted with unintended target corresponding to a branch in the guest. + +- **Intra-Mode BTI**: In-kernel training such as through cBPF or other native + gadgets. + +- **Indirect Branch Prediction Barrier (IBPB)**: After an IBPB, indirect + branches may still be predicted with targets corresponding to direct branches + executed prior to the IBPB. This is fixed by the IPU 2025.1 microcode, which + should be available via distro updates. Alternatively microcode can be + obtained from Intel's github repository [#f1]_. + +Affected CPUs +------------- +Below is the list of ITS affected CPUs [#f2]_ [#f3]_: + + ======================== ============ ==================== =============== + Common name Family_Model eIBRS Intra-mode BTI + Guest/Host Isolation + ======================== ============ ==================== =============== + SKYLAKE_X (step >= 6) 06_55H Affected Affected + ICELAKE_X 06_6AH Not affected Affected + ICELAKE_D 06_6CH Not affected Affected + ICELAKE_L 06_7EH Not affected Affected + TIGERLAKE_L 06_8CH Not affected Affected + TIGERLAKE 06_8DH Not affected Affected + KABYLAKE_L (step >= 12) 06_8EH Affected Affected + KABYLAKE (step >= 13) 06_9EH Affected Affected + COMETLAKE 06_A5H Affected Affected + COMETLAKE_L 06_A6H Affected Affected + ROCKETLAKE 06_A7H Not affected Affected + ======================== ============ ==================== =============== + +- All affected CPUs enumerate Enhanced IBRS feature. +- IBPB isolation is affected on all ITS affected CPUs, and need a microcode + update for mitigation. +- None of the affected CPUs enumerate BHI_CTRL which was introduced in Golden + Cove (Alder Lake and Sapphire Rapids). This can help guests to determine the + host's affected status. +- Intel Atom CPUs are not affected by ITS. + +Mitigation +---------- +As only the indirect branches and RETs that have their last byte of instruction +in the lower half of the cacheline are vulnerable to ITS, the basic idea behind +the mitigation is to not allow indirect branches in the lower half. + +This is achieved by relying on existing retpoline support in the kernel, and in +compilers. ITS-vulnerable retpoline sites are runtime patched to point to newly +added ITS-safe thunks. These safe thunks consists of indirect branch in the +second half of the cacheline. Not all retpoline sites are patched to thunks, if +a retpoline site is evaluated to be ITS-safe, it is replaced with an inline +indirect branch. + +Dynamic thunks +~~~~~~~~~~~~~~ +From a dynamically allocated pool of safe-thunks, each vulnerable site is +replaced with a new thunk, such that they get a unique address. This could +improve the branch prediction accuracy. Also, it is a defense-in-depth measure +against aliasing. + +Note, for simplicity, indirect branches in eBPF programs are always replaced +with a jump to a static thunk in __x86_indirect_its_thunk_array. If required, +in future this can be changed to use dynamic thunks. + +All vulnerable RETs are replaced with a static thunk, they do not use dynamic +thunks. This is because RETs get their prediction from RSB mostly that does not +depend on source address. RETs that underflow RSB may benefit from dynamic +thunks. But, RETs significantly outnumber indirect branches, and any benefit +from a unique source address could be outweighed by the increased icache +footprint and iTLB pressure. + +Retpoline +~~~~~~~~~ +Retpoline sequence also mitigates ITS-unsafe indirect branches. For this +reason, when retpoline is enabled, ITS mitigation only relocates the RETs to +safe thunks. Unless user requested the RSB-stuffing mitigation. + +RSB Stuffing +~~~~~~~~~~~~ +RSB-stuffing via Call Depth Tracking is a mitigation for Retbleed RSB-underflow +attacks. And it also mitigates RETs that are vulnerable to ITS. + +Mitigation in guests +^^^^^^^^^^^^^^^^^^^^ +All guests deploy ITS mitigation by default, irrespective of eIBRS enumeration +and Family/Model of the guest. This is because eIBRS feature could be hidden +from a guest. One exception to this is when a guest enumerates BHI_DIS_S, which +indicates that the guest is running on an unaffected host. + +To prevent guests from unnecessarily deploying the mitigation on unaffected +platforms, Intel has defined ITS_NO bit(62) in MSR IA32_ARCH_CAPABILITIES. When +a guest sees this bit set, it should not enumerate the ITS bug. Note, this bit +is not set by any hardware, but is **intended for VMMs to synthesize** it for +guests as per the host's affected status. + +Mitigation options +^^^^^^^^^^^^^^^^^^ +The ITS mitigation can be controlled using the "indirect_target_selection" +kernel parameter. The available options are: + + ======== =================================================================== + on (default) Deploy the "Aligned branch/return thunks" mitigation. + If spectre_v2 mitigation enables retpoline, aligned-thunks are only + deployed for the affected RET instructions. Retpoline mitigates + indirect branches. + + off Disable ITS mitigation. + + vmexit Equivalent to "=on" if the CPU is affected by guest/host isolation + part of ITS. Otherwise, mitigation is not deployed. This option is + useful when host userspace is not in the threat model, and only + attacks from guest to host are considered. + + stuff Deploy RSB-fill mitigation when retpoline is also deployed. + Otherwise, deploy the default mitigation. When retpoline mitigation + is enabled, RSB-stuffing via Call-Depth-Tracking also mitigates + ITS. + + force Force the ITS bug and deploy the default mitigation. + ======== =================================================================== + +Sysfs reporting +--------------- + +The sysfs file showing ITS mitigation status is: + + /sys/devices/system/cpu/vulnerabilities/indirect_target_selection + +Note, microcode mitigation status is not reported in this file. + +The possible values in this file are: + +.. list-table:: + + * - Not affected + - The processor is not vulnerable. + * - Vulnerable + - System is vulnerable and no mitigation has been applied. + * - Vulnerable, KVM: Not affected + - System is vulnerable to intra-mode BTI, but not affected by eIBRS + guest/host isolation. + * - Mitigation: Aligned branch/return thunks + - The mitigation is enabled, affected indirect branches and RETs are + relocated to safe thunks. + * - Mitigation: Retpolines, Stuffing RSB + - The mitigation is enabled using retpoline and RSB stuffing. + +References +---------- +.. [#f1] Microcode repository - https://github.com/intel/Intel-Linux-Processor-Microcode-Data-Files + +.. [#f2] Affected Processors list - https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html + +.. [#f3] Affected Processors list (machine readable) - https://github.com/intel/Intel-affected-processor-list From 159013a7ca18c271ff64192deb62a689b622d860 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Fri, 21 Jun 2024 17:40:41 -0700 Subject: [PATCH 1658/2924] x86/its: Enumerate Indirect Target Selection (ITS) bug ITS bug in some pre-Alderlake Intel CPUs may allow indirect branches in the first half of a cache line get predicted to a target of a branch located in the second half of the cache line. Set X86_BUG_ITS on affected CPUs. Mitigation to follow in later commits. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Josh Poimboeuf Reviewed-by: Alexandre Chartre --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 8 +++++ arch/x86/kernel/cpu/common.c | 58 +++++++++++++++++++++++------- arch/x86/kvm/x86.c | 4 ++- 4 files changed, 58 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 6c2c152d8a67b9..2ff978fa414b5a 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -533,4 +533,5 @@ #define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */ #define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */ #define X86_BUG_SPECTRE_V2_USER X86_BUG(1*32 + 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */ +#define X86_BUG_ITS X86_BUG(1*32 + 6) /* "its" CPU is affected by Indirect Target Selection */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index e6134ef2263d50..e7d2f460fcc699 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -211,6 +211,14 @@ * VERW clears CPU Register * File. */ +#define ARCH_CAP_ITS_NO BIT_ULL(62) /* + * Not susceptible to + * Indirect Target Selection. + * This bit is not set by + * HW, but is synthesized by + * VMMs for guests to know + * their affected status. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 12126adbc3a9a7..accdee7ab1f70f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1227,6 +1227,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define GDS BIT(6) /* CPU is affected by Register File Data Sampling */ #define RFDS BIT(7) +/* CPU is affected by Indirect Target Selection */ +#define ITS BIT(8) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS), @@ -1238,22 +1240,25 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPS(INTEL_BROADWELL_G, X86_STEP_MAX, SRBDS), VULNBL_INTEL_STEPS(INTEL_BROADWELL_X, X86_STEP_MAX, MMIO), VULNBL_INTEL_STEPS(INTEL_BROADWELL, X86_STEP_MAX, SRBDS), - VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, X86_STEP_MAX, MMIO | RETBLEED | GDS), + VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, 0x5, MMIO | RETBLEED | GDS), + VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS), VULNBL_INTEL_STEPS(INTEL_SKYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS), VULNBL_INTEL_STEPS(INTEL_SKYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS), - VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS), - VULNBL_INTEL_STEPS(INTEL_KABYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, 0xb, MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS), + VULNBL_INTEL_STEPS(INTEL_KABYLAKE, 0xc, MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPS(INTEL_KABYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS), VULNBL_INTEL_STEPS(INTEL_CANNONLAKE_L, X86_STEP_MAX, RETBLEED), - VULNBL_INTEL_STEPS(INTEL_ICELAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS), - VULNBL_INTEL_STEPS(INTEL_ICELAKE_D, X86_STEP_MAX, MMIO | GDS), - VULNBL_INTEL_STEPS(INTEL_ICELAKE_X, X86_STEP_MAX, MMIO | GDS), - VULNBL_INTEL_STEPS(INTEL_COMETLAKE, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS), - VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, 0x0, MMIO | RETBLEED), - VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS), - VULNBL_INTEL_STEPS(INTEL_TIGERLAKE_L, X86_STEP_MAX, GDS), - VULNBL_INTEL_STEPS(INTEL_TIGERLAKE, X86_STEP_MAX, GDS), + VULNBL_INTEL_STEPS(INTEL_ICELAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS), + VULNBL_INTEL_STEPS(INTEL_ICELAKE_D, X86_STEP_MAX, MMIO | GDS | ITS), + VULNBL_INTEL_STEPS(INTEL_ICELAKE_X, X86_STEP_MAX, MMIO | GDS | ITS), + VULNBL_INTEL_STEPS(INTEL_COMETLAKE, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS), + VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, 0x0, MMIO | RETBLEED | ITS), + VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS), + VULNBL_INTEL_STEPS(INTEL_TIGERLAKE_L, X86_STEP_MAX, GDS | ITS), + VULNBL_INTEL_STEPS(INTEL_TIGERLAKE, X86_STEP_MAX, GDS | ITS), VULNBL_INTEL_STEPS(INTEL_LAKEFIELD, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED), - VULNBL_INTEL_STEPS(INTEL_ROCKETLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS), + VULNBL_INTEL_STEPS(INTEL_ROCKETLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS), VULNBL_INTEL_TYPE(INTEL_ALDERLAKE, ATOM, RFDS), VULNBL_INTEL_STEPS(INTEL_ALDERLAKE_L, X86_STEP_MAX, RFDS), VULNBL_INTEL_TYPE(INTEL_RAPTORLAKE, ATOM, RFDS), @@ -1318,6 +1323,32 @@ static bool __init vulnerable_to_rfds(u64 x86_arch_cap_msr) return cpu_matches(cpu_vuln_blacklist, RFDS); } +static bool __init vulnerable_to_its(u64 x86_arch_cap_msr) +{ + /* The "immunity" bit trumps everything else: */ + if (x86_arch_cap_msr & ARCH_CAP_ITS_NO) + return false; + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return false; + + /* None of the affected CPUs have BHI_CTRL */ + if (boot_cpu_has(X86_FEATURE_BHI_CTRL)) + return false; + + /* + * If a VMM did not expose ITS_NO, assume that a guest could + * be running on a vulnerable hardware or may migrate to such + * hardware. + */ + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + return true; + + if (cpu_matches(cpu_vuln_blacklist, ITS)) + return true; + + return false; +} + static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 x86_arch_cap_msr = x86_read_arch_cap_msr(); @@ -1449,6 +1480,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_AMD_IBPB) && !cpu_has(c, X86_FEATURE_AMD_IBPB_RET)) setup_force_cpu_bug(X86_BUG_IBPB_NO_RET); + if (vulnerable_to_its(x86_arch_cap_msr)) + setup_force_cpu_bug(X86_BUG_ITS); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index df5b99ea1f1812..2626567c35df61 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1584,7 +1584,7 @@ EXPORT_SYMBOL_GPL(kvm_emulate_rdpmc); ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \ ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \ ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \ - ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO) + ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO | ARCH_CAP_ITS_NO) static u64 kvm_get_arch_capabilities(void) { @@ -1618,6 +1618,8 @@ static u64 kvm_get_arch_capabilities(void) data |= ARCH_CAP_MDS_NO; if (!boot_cpu_has_bug(X86_BUG_RFDS)) data |= ARCH_CAP_RFDS_NO; + if (!boot_cpu_has_bug(X86_BUG_ITS)) + data |= ARCH_CAP_ITS_NO; if (!boot_cpu_has(X86_FEATURE_RTM)) { /* From 8754e67ad4ac692c67ff1f99c0d07156f04ae40c Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Fri, 21 Jun 2024 21:17:21 -0700 Subject: [PATCH 1659/2924] x86/its: Add support for ITS-safe indirect thunk Due to ITS, indirect branches in the lower half of a cacheline may be vulnerable to branch target injection attack. Introduce ITS-safe thunks to patch indirect branches in the lower half of cacheline with the thunk. Also thunk any eBPF generated indirect branches in emit_indirect_jump(). Below category of indirect branches are not mitigated: - Indirect branches in the .init section are not mitigated because they are discarded after boot. - Indirect branches that are explicitly marked retpoline-safe. Note that retpoline also mitigates the indirect branches against ITS. This is because the retpoline sequence fills an RSB entry before RET, and it does not suffer from RSB-underflow part of the ITS. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Josh Poimboeuf Reviewed-by: Alexandre Chartre --- arch/x86/Kconfig | 11 +++++++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/nospec-branch.h | 4 +++ arch/x86/kernel/alternative.c | 45 ++++++++++++++++++++++++++-- arch/x86/kernel/vmlinux.lds.S | 6 ++++ arch/x86/lib/retpoline.S | 28 +++++++++++++++++ arch/x86/net/bpf_jit_comp.c | 5 +++- 7 files changed, 96 insertions(+), 4 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4b9f378e05f6be..a6bd0590c4371b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2710,6 +2710,17 @@ config MITIGATION_SSB of speculative execution in a similar way to the Meltdown and Spectre security vulnerabilities. +config MITIGATION_ITS + bool "Enable Indirect Target Selection mitigation" + depends on CPU_SUP_INTEL && X86_64 + depends on MITIGATION_RETPOLINE && MITIGATION_RETHUNK + default y + help + Enable Indirect Target Selection (ITS) mitigation. ITS is a bug in + BPU on some Intel CPUs that may allow Spectre V2 style attacks. If + disabled, mitigation cannot be enabled via cmdline. + See + endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 2ff978fa414b5a..03af8191073a21 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -481,6 +481,7 @@ #define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */ #define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */ #define X86_FEATURE_PREFER_YMM (21*32 + 8) /* Avoid ZMM registers due to downclocking */ +#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32 + 9) /* Use thunk for indirect branches in lower half of cacheline */ /* * BUG word(s) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 5c43f145454ddd..a5eb0cd93c9273 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -336,10 +336,14 @@ #else /* __ASSEMBLER__ */ +#define ITS_THUNK_SIZE 64 + typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; +typedef u8 its_thunk_t[ITS_THUNK_SIZE]; extern retpoline_thunk_t __x86_indirect_thunk_array[]; extern retpoline_thunk_t __x86_indirect_call_thunk_array[]; extern retpoline_thunk_t __x86_indirect_jump_thunk_array[]; +extern its_thunk_t __x86_indirect_its_thunk_array[]; #ifdef CONFIG_MITIGATION_RETHUNK extern void __x86_return_thunk(void); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index bf82c6f7d69055..333c0295b0a522 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -581,7 +581,8 @@ static int emit_indirect(int op, int reg, u8 *bytes) return i; } -static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 *bytes) +static int __emit_trampoline(void *addr, struct insn *insn, u8 *bytes, + void *call_dest, void *jmp_dest) { u8 op = insn->opcode.bytes[0]; int i = 0; @@ -602,7 +603,7 @@ static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 switch (op) { case CALL_INSN_OPCODE: __text_gen_insn(bytes+i, op, addr+i, - __x86_indirect_call_thunk_array[reg], + call_dest, CALL_INSN_SIZE); i += CALL_INSN_SIZE; break; @@ -610,7 +611,7 @@ static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 case JMP32_INSN_OPCODE: clang_jcc: __text_gen_insn(bytes+i, op, addr+i, - __x86_indirect_jump_thunk_array[reg], + jmp_dest, JMP32_INSN_SIZE); i += JMP32_INSN_SIZE; break; @@ -625,6 +626,35 @@ static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 return i; } +static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 *bytes) +{ + return __emit_trampoline(addr, insn, bytes, + __x86_indirect_call_thunk_array[reg], + __x86_indirect_jump_thunk_array[reg]); +} + +#ifdef CONFIG_MITIGATION_ITS +static int emit_its_trampoline(void *addr, struct insn *insn, int reg, u8 *bytes) +{ + return __emit_trampoline(addr, insn, bytes, + __x86_indirect_its_thunk_array[reg], + __x86_indirect_its_thunk_array[reg]); +} + +/* Check if an indirect branch is at ITS-unsafe address */ +static bool cpu_wants_indirect_its_thunk_at(unsigned long addr, int reg) +{ + if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) + return false; + + /* Indirect branch opcode is 2 or 3 bytes depending on reg */ + addr += 1 + reg / 8; + + /* Lower-half of the cacheline? */ + return !(addr & 0x20); +} +#endif + /* * Rewrite the compiler generated retpoline thunk calls. * @@ -699,6 +729,15 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) bytes[i++] = 0xe8; /* LFENCE */ } +#ifdef CONFIG_MITIGATION_ITS + /* + * Check if the address of last byte of emitted-indirect is in + * lower-half of the cacheline. Such branches need ITS mitigation. + */ + if (cpu_wants_indirect_its_thunk_at((unsigned long)addr + i, reg)) + return emit_its_trampoline(addr, insn, reg, bytes); +#endif + ret = emit_indirect(op, reg, bytes + i); if (ret < 0) return ret; diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index ccdc45e5b75961..c3e048e7e161aa 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -497,6 +497,12 @@ PROVIDE(__ref_stack_chk_guard = __stack_chk_guard); "SRSO function pair won't alias"); #endif +#if defined(CONFIG_MITIGATION_ITS) && !defined(CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B) +. = ASSERT(__x86_indirect_its_thunk_rax & 0x20, "__x86_indirect_thunk_rax not in second half of cacheline"); +. = ASSERT(((__x86_indirect_its_thunk_rcx - __x86_indirect_its_thunk_rax) % 64) == 0, "Indirect thunks are not cacheline apart"); +. = ASSERT(__x86_indirect_its_thunk_array == __x86_indirect_its_thunk_rax, "Gap in ITS thunk array"); +#endif + #endif /* CONFIG_X86_64 */ /* diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index a26c43abd47d8d..a06891892853f9 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -367,6 +367,34 @@ SYM_FUNC_END(call_depth_return_thunk) #endif /* CONFIG_MITIGATION_CALL_DEPTH_TRACKING */ +#ifdef CONFIG_MITIGATION_ITS + +.macro ITS_THUNK reg + +SYM_INNER_LABEL(__x86_indirect_its_thunk_\reg, SYM_L_GLOBAL) + UNWIND_HINT_UNDEFINED + ANNOTATE_NOENDBR + ANNOTATE_RETPOLINE_SAFE + jmp *%\reg + int3 + .align 32, 0xcc /* fill to the end of the line */ + .skip 32, 0xcc /* skip to the next upper half */ +.endm + +/* ITS mitigation requires thunks be aligned to upper half of cacheline */ +.align 64, 0xcc +.skip 32, 0xcc +SYM_CODE_START(__x86_indirect_its_thunk_array) + +#define GEN(reg) ITS_THUNK reg +#include +#undef GEN + + .align 64, 0xcc +SYM_CODE_END(__x86_indirect_its_thunk_array) + +#endif + /* * This function name is magical and is used by -mfunction-return=thunk-extern * for the compiler to generate JMPs to it. diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 9e5fe2ba858f08..01e8119db5dbaf 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -661,7 +661,10 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) { u8 *prog = *pprog; - if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { + if (cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) { + OPTIMIZER_HIDE_VAR(reg); + emit_jump(&prog, &__x86_indirect_its_thunk_array[reg], ip); + } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { EMIT_LFENCE(); EMIT2(0xFF, 0xE0 + reg); } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { From a75bf27fe41abe658c53276a0c486c4bf9adecfc Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Fri, 21 Jun 2024 21:17:21 -0700 Subject: [PATCH 1660/2924] x86/its: Add support for ITS-safe return thunk RETs in the lower half of cacheline may be affected by ITS bug, specifically when the RSB-underflows. Use ITS-safe return thunk for such RETs. RETs that are not patched: - RET in retpoline sequence does not need to be patched, because the sequence itself fills an RSB before RET. - RET in Call Depth Tracking (CDT) thunks __x86_indirect_{call|jump}_thunk and call_depth_return_thunk are not patched because CDT by design prevents RSB-underflow. - RETs in .init section are not reachable after init. - RETs that are explicitly marked safe with ANNOTATE_UNRET_SAFE. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Josh Poimboeuf Reviewed-by: Alexandre Chartre --- arch/x86/include/asm/alternative.h | 14 ++++++++++++++ arch/x86/include/asm/nospec-branch.h | 6 ++++++ arch/x86/kernel/alternative.c | 19 +++++++++++++++++-- arch/x86/kernel/ftrace.c | 2 +- arch/x86/kernel/static_call.c | 4 ++-- arch/x86/kernel/vmlinux.lds.S | 4 ++++ arch/x86/lib/retpoline.S | 13 ++++++++++++- arch/x86/net/bpf_jit_comp.c | 2 +- 8 files changed, 57 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 4a37a8bd87fdfa..4037c611537c85 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -124,6 +124,20 @@ static __always_inline int x86_call_depth_emit_accounting(u8 **pprog, } #endif +#if defined(CONFIG_MITIGATION_RETHUNK) && defined(CONFIG_OBJTOOL) +extern bool cpu_wants_rethunk(void); +extern bool cpu_wants_rethunk_at(void *addr); +#else +static __always_inline bool cpu_wants_rethunk(void) +{ + return false; +} +static __always_inline bool cpu_wants_rethunk_at(void *addr) +{ + return false; +} +#endif + #ifdef CONFIG_SMP extern void alternatives_smp_module_add(struct module *mod, char *name, void *locks, void *locks_end, diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index a5eb0cd93c9273..7d04ade3354115 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -367,6 +367,12 @@ static inline void srso_return_thunk(void) {} static inline void srso_alias_return_thunk(void) {} #endif +#ifdef CONFIG_MITIGATION_ITS +extern void its_return_thunk(void); +#else +static inline void its_return_thunk(void) {} +#endif + extern void retbleed_return_thunk(void); extern void srso_return_thunk(void); extern void srso_alias_return_thunk(void); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 333c0295b0a522..3e67e72bffdc98 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -814,6 +814,21 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) #ifdef CONFIG_MITIGATION_RETHUNK +bool cpu_wants_rethunk(void) +{ + return cpu_feature_enabled(X86_FEATURE_RETHUNK); +} + +bool cpu_wants_rethunk_at(void *addr) +{ + if (!cpu_feature_enabled(X86_FEATURE_RETHUNK)) + return false; + if (x86_return_thunk != its_return_thunk) + return true; + + return !((unsigned long)addr & 0x20); +} + /* * Rewrite the compiler generated return thunk tail-calls. * @@ -830,7 +845,7 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes) int i = 0; /* Patch the custom return thunks... */ - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { + if (cpu_wants_rethunk_at(addr)) { i = JMP32_INSN_SIZE; __text_gen_insn(bytes, JMP32_INSN_OPCODE, addr, x86_return_thunk, i); } else { @@ -847,7 +862,7 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) { s32 *s; - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + if (cpu_wants_rethunk()) static_call_force_reinit(); for (s = start; s < end; s++) { diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index cace6e8d7cc77a..5eb1514af5593e 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -354,7 +354,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) goto fail; ip = trampoline + size; - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + if (cpu_wants_rethunk_at(ip)) __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, x86_return_thunk, JMP32_INSN_SIZE); else memcpy(ip, retq, sizeof(retq)); diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index a59c72e7764522..c3d7ff44b29adc 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -81,7 +81,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, break; case RET: - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + if (cpu_wants_rethunk_at(insn)) code = text_gen_insn(JMP32_INSN_OPCODE, insn, x86_return_thunk); else code = &retinsn; @@ -90,7 +90,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, case JCC: if (!func) { func = __static_call_return; - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + if (cpu_wants_rethunk()) func = x86_return_thunk; } diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index c3e048e7e161aa..e97f5773c8df24 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -503,6 +503,10 @@ PROVIDE(__ref_stack_chk_guard = __stack_chk_guard); . = ASSERT(__x86_indirect_its_thunk_array == __x86_indirect_its_thunk_rax, "Gap in ITS thunk array"); #endif +#if defined(CONFIG_MITIGATION_ITS) && !defined(CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B) +. = ASSERT(its_return_thunk & 0x20, "its_return_thunk not in second half of cacheline"); +#endif + #endif /* CONFIG_X86_64 */ /* diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index a06891892853f9..ebca28fe7e3133 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -393,7 +393,18 @@ SYM_CODE_START(__x86_indirect_its_thunk_array) .align 64, 0xcc SYM_CODE_END(__x86_indirect_its_thunk_array) -#endif +.align 64, 0xcc +.skip 32, 0xcc +SYM_CODE_START(its_return_thunk) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + ANNOTATE_UNRET_SAFE + ret + int3 +SYM_CODE_END(its_return_thunk) +EXPORT_SYMBOL(its_return_thunk) + +#endif /* CONFIG_MITIGATION_ITS */ /* * This function name is magical and is used by -mfunction-return=thunk-extern diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 01e8119db5dbaf..a5b65c09910bab 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -686,7 +686,7 @@ static void emit_return(u8 **pprog, u8 *ip) { u8 *prog = *pprog; - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { + if (cpu_wants_rethunk()) { emit_jump(&prog, x86_return_thunk, ip); } else { EMIT1(0xC3); /* ret */ From f4818881c47fd91fcb6d62373c57c7844e3de1c0 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Fri, 21 Jun 2024 20:23:23 -0700 Subject: [PATCH 1661/2924] x86/its: Enable Indirect Target Selection mitigation Indirect Target Selection (ITS) is a bug in some pre-ADL Intel CPUs with eIBRS. It affects prediction of indirect branch and RETs in the lower half of cacheline. Due to ITS such branches may get wrongly predicted to a target of (direct or indirect) branch that is located in the upper half of the cacheline. Scope of impact =============== Guest/host isolation -------------------- When eIBRS is used for guest/host isolation, the indirect branches in the VMM may still be predicted with targets corresponding to branches in the guest. Intra-mode ---------- cBPF or other native gadgets can be used for intra-mode training and disclosure using ITS. User/kernel isolation --------------------- When eIBRS is enabled user/kernel isolation is not impacted. Indirect Branch Prediction Barrier (IBPB) ----------------------------------------- After an IBPB, indirect branches may be predicted with targets corresponding to direct branches which were executed prior to IBPB. This is mitigated by a microcode update. Add cmdline parameter indirect_target_selection=off|on|force to control the mitigation to relocate the affected branches to an ITS-safe thunk i.e. located in the upper half of cacheline. Also add the sysfs reporting. When retpoline mitigation is deployed, ITS safe-thunks are not needed, because retpoline sequence is already ITS-safe. Similarly, when call depth tracking (CDT) mitigation is deployed (retbleed=stuff), ITS safe return thunk is not used, as CDT prevents RSB-underflow. To not overcomplicate things, ITS mitigation is not supported with spectre-v2 lfence;jmp mitigation. Moreover, it is less practical to deploy lfence;jmp mitigation on ITS affected parts anyways. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Josh Poimboeuf Reviewed-by: Alexandre Chartre --- .../ABI/testing/sysfs-devices-system-cpu | 1 + .../admin-guide/kernel-parameters.txt | 13 ++ arch/x86/kernel/cpu/bugs.c | 140 +++++++++++++++++- drivers/base/cpu.c | 3 + include/linux/cpu.h | 2 + 5 files changed, 155 insertions(+), 4 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 206079d3bd5b12..6a1acabb29d85f 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -511,6 +511,7 @@ Description: information about CPUs heterogeneity. What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/gather_data_sampling + /sys/devices/system/cpu/vulnerabilities/indirect_target_selection /sys/devices/system/cpu/vulnerabilities/itlb_multihit /sys/devices/system/cpu/vulnerabilities/l1tf /sys/devices/system/cpu/vulnerabilities/mds diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index d9fd26b95b3409..d3e78e795e0d6f 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2202,6 +2202,18 @@ different crypto accelerators. This option can be used to achieve best performance for particular HW. + indirect_target_selection= [X86,Intel] Mitigation control for Indirect + Target Selection(ITS) bug in Intel CPUs. Updated + microcode is also required for a fix in IBPB. + + on: Enable mitigation (default). + off: Disable mitigation. + force: Force the ITS bug and deploy default + mitigation. + + For details see: + Documentation/admin-guide/hw-vuln/indirect-target-selection.rst + init= [KNL] Format: Run specified binary instead of /sbin/init as init @@ -3693,6 +3705,7 @@ expose users to several CPU vulnerabilities. Equivalent to: if nokaslr then kpti=0 [ARM64] gather_data_sampling=off [X86] + indirect_target_selection=off [X86] kvm.nx_huge_pages=off [X86] l1tf=off [X86] mds=off [X86] diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 362602b705cc43..417596ce5352b5 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -49,6 +49,7 @@ static void __init srbds_select_mitigation(void); static void __init l1d_flush_select_mitigation(void); static void __init srso_select_mitigation(void); static void __init gds_select_mitigation(void); +static void __init its_select_mitigation(void); /* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; @@ -66,6 +67,14 @@ static DEFINE_MUTEX(spec_ctrl_mutex); void (*x86_return_thunk)(void) __ro_after_init = __x86_return_thunk; +static void __init set_return_thunk(void *thunk) +{ + if (x86_return_thunk != __x86_return_thunk) + pr_warn("x86/bugs: return thunk changed\n"); + + x86_return_thunk = thunk; +} + /* Update SPEC_CTRL MSR and its cached copy unconditionally */ static void update_spec_ctrl(u64 val) { @@ -178,6 +187,7 @@ void __init cpu_select_mitigations(void) */ srso_select_mitigation(); gds_select_mitigation(); + its_select_mitigation(); } /* @@ -1118,7 +1128,7 @@ static void __init retbleed_select_mitigation(void) setup_force_cpu_cap(X86_FEATURE_RETHUNK); setup_force_cpu_cap(X86_FEATURE_UNRET); - x86_return_thunk = retbleed_return_thunk; + set_return_thunk(retbleed_return_thunk); if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) @@ -1153,7 +1163,7 @@ static void __init retbleed_select_mitigation(void) setup_force_cpu_cap(X86_FEATURE_RETHUNK); setup_force_cpu_cap(X86_FEATURE_CALL_DEPTH); - x86_return_thunk = call_depth_return_thunk; + set_return_thunk(call_depth_return_thunk); break; default: @@ -1187,6 +1197,115 @@ static void __init retbleed_select_mitigation(void) pr_info("%s\n", retbleed_strings[retbleed_mitigation]); } +#undef pr_fmt +#define pr_fmt(fmt) "ITS: " fmt + +enum its_mitigation_cmd { + ITS_CMD_OFF, + ITS_CMD_ON, +}; + +enum its_mitigation { + ITS_MITIGATION_OFF, + ITS_MITIGATION_ALIGNED_THUNKS, + ITS_MITIGATION_RETPOLINE_STUFF, +}; + +static const char * const its_strings[] = { + [ITS_MITIGATION_OFF] = "Vulnerable", + [ITS_MITIGATION_ALIGNED_THUNKS] = "Mitigation: Aligned branch/return thunks", + [ITS_MITIGATION_RETPOLINE_STUFF] = "Mitigation: Retpolines, Stuffing RSB", +}; + +static enum its_mitigation its_mitigation __ro_after_init = ITS_MITIGATION_ALIGNED_THUNKS; + +static enum its_mitigation_cmd its_cmd __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_ITS) ? ITS_CMD_ON : ITS_CMD_OFF; + +static int __init its_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!IS_ENABLED(CONFIG_MITIGATION_ITS)) { + pr_err("Mitigation disabled at compile time, ignoring option (%s)", str); + return 0; + } + + if (!strcmp(str, "off")) { + its_cmd = ITS_CMD_OFF; + } else if (!strcmp(str, "on")) { + its_cmd = ITS_CMD_ON; + } else if (!strcmp(str, "force")) { + its_cmd = ITS_CMD_ON; + setup_force_cpu_bug(X86_BUG_ITS); + } else { + pr_err("Ignoring unknown indirect_target_selection option (%s).", str); + } + + return 0; +} +early_param("indirect_target_selection", its_parse_cmdline); + +static void __init its_select_mitigation(void) +{ + enum its_mitigation_cmd cmd = its_cmd; + + if (!boot_cpu_has_bug(X86_BUG_ITS) || cpu_mitigations_off()) { + its_mitigation = ITS_MITIGATION_OFF; + return; + } + + /* Retpoline+CDT mitigates ITS, bail out */ + if (boot_cpu_has(X86_FEATURE_RETPOLINE) && + boot_cpu_has(X86_FEATURE_CALL_DEPTH)) { + its_mitigation = ITS_MITIGATION_RETPOLINE_STUFF; + goto out; + } + + /* Exit early to avoid irrelevant warnings */ + if (cmd == ITS_CMD_OFF) { + its_mitigation = ITS_MITIGATION_OFF; + goto out; + } + if (spectre_v2_enabled == SPECTRE_V2_NONE) { + pr_err("WARNING: Spectre-v2 mitigation is off, disabling ITS\n"); + its_mitigation = ITS_MITIGATION_OFF; + goto out; + } + if (!IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) || + !IS_ENABLED(CONFIG_MITIGATION_RETHUNK)) { + pr_err("WARNING: ITS mitigation depends on retpoline and rethunk support\n"); + its_mitigation = ITS_MITIGATION_OFF; + goto out; + } + if (IS_ENABLED(CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B)) { + pr_err("WARNING: ITS mitigation is not compatible with CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B\n"); + its_mitigation = ITS_MITIGATION_OFF; + goto out; + } + if (boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE)) { + pr_err("WARNING: ITS mitigation is not compatible with lfence mitigation\n"); + its_mitigation = ITS_MITIGATION_OFF; + goto out; + } + + switch (cmd) { + case ITS_CMD_OFF: + its_mitigation = ITS_MITIGATION_OFF; + break; + case ITS_CMD_ON: + its_mitigation = ITS_MITIGATION_ALIGNED_THUNKS; + if (!boot_cpu_has(X86_FEATURE_RETPOLINE)) + setup_force_cpu_cap(X86_FEATURE_INDIRECT_THUNK_ITS); + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + set_return_thunk(its_return_thunk); + break; + } +out: + pr_info("%s\n", its_strings[its_mitigation]); +} + #undef pr_fmt #define pr_fmt(fmt) "Spectre V2 : " fmt @@ -2607,10 +2726,10 @@ static void __init srso_select_mitigation(void) if (boot_cpu_data.x86 == 0x19) { setup_force_cpu_cap(X86_FEATURE_SRSO_ALIAS); - x86_return_thunk = srso_alias_return_thunk; + set_return_thunk(srso_alias_return_thunk); } else { setup_force_cpu_cap(X86_FEATURE_SRSO); - x86_return_thunk = srso_return_thunk; + set_return_thunk(srso_return_thunk); } if (has_microcode) srso_mitigation = SRSO_MITIGATION_SAFE_RET; @@ -2800,6 +2919,11 @@ static ssize_t rfds_show_state(char *buf) return sysfs_emit(buf, "%s\n", rfds_strings[rfds_mitigation]); } +static ssize_t its_show_state(char *buf) +{ + return sysfs_emit(buf, "%s\n", its_strings[its_mitigation]); +} + static char *stibp_state(void) { if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && @@ -2982,6 +3106,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_RFDS: return rfds_show_state(buf); + case X86_BUG_ITS: + return its_show_state(buf); + default: break; } @@ -3061,6 +3188,11 @@ ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attrib { return cpu_show_common(dev, attr, buf, X86_BUG_RFDS); } + +ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_ITS); +} #endif void __warn_thunk(void) diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index a7e5118498758e..50651435577c8f 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -600,6 +600,7 @@ CPU_SHOW_VULN_FALLBACK(spec_rstack_overflow); CPU_SHOW_VULN_FALLBACK(gds); CPU_SHOW_VULN_FALLBACK(reg_file_data_sampling); CPU_SHOW_VULN_FALLBACK(ghostwrite); +CPU_SHOW_VULN_FALLBACK(indirect_target_selection); static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); @@ -616,6 +617,7 @@ static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NU static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL); static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling, NULL); static DEVICE_ATTR(ghostwrite, 0444, cpu_show_ghostwrite, NULL); +static DEVICE_ATTR(indirect_target_selection, 0444, cpu_show_indirect_target_selection, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -633,6 +635,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_gather_data_sampling.attr, &dev_attr_reg_file_data_sampling.attr, &dev_attr_ghostwrite.attr, + &dev_attr_indirect_target_selection.attr, NULL }; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index e3049543008b9c..3aa955102b349a 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -78,6 +78,8 @@ extern ssize_t cpu_show_gds(struct device *dev, extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_ghostwrite(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_indirect_target_selection(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, From 2665281a07e19550944e8354a2024635a7b2714a Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 18 Nov 2024 09:53:12 -0800 Subject: [PATCH 1662/2924] x86/its: Add "vmexit" option to skip mitigation on some CPUs Ice Lake generation CPUs are not affected by guest/host isolation part of ITS. If a user is only concerned about KVM guests, they can now choose a new cmdline option "vmexit" that will not deploy the ITS mitigation when CPU is not affected by guest/host isolation. This saves the performance overhead of ITS mitigation on Ice Lake gen CPUs. When "vmexit" option selected, if the CPU is affected by ITS guest/host isolation, the default ITS mitigation is deployed. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Josh Poimboeuf Reviewed-by: Alexandre Chartre --- .../admin-guide/kernel-parameters.txt | 2 ++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/bugs.c | 11 +++++++++++ arch/x86/kernel/cpu/common.c | 19 ++++++++++++------- 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index d3e78e795e0d6f..e70d15095f28b4 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2210,6 +2210,8 @@ off: Disable mitigation. force: Force the ITS bug and deploy default mitigation. + vmexit: Only deploy mitigation if CPU is affected by + guest/host isolation part of ITS. For details see: Documentation/admin-guide/hw-vuln/indirect-target-selection.rst diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 03af8191073a21..39e61212ac9a91 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -535,4 +535,5 @@ #define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */ #define X86_BUG_SPECTRE_V2_USER X86_BUG(1*32 + 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */ #define X86_BUG_ITS X86_BUG(1*32 + 6) /* "its" CPU is affected by Indirect Target Selection */ +#define X86_BUG_ITS_NATIVE_ONLY X86_BUG(1*32 + 7) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 417596ce5352b5..e919d645d83031 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1203,16 +1203,19 @@ static void __init retbleed_select_mitigation(void) enum its_mitigation_cmd { ITS_CMD_OFF, ITS_CMD_ON, + ITS_CMD_VMEXIT, }; enum its_mitigation { ITS_MITIGATION_OFF, + ITS_MITIGATION_VMEXIT_ONLY, ITS_MITIGATION_ALIGNED_THUNKS, ITS_MITIGATION_RETPOLINE_STUFF, }; static const char * const its_strings[] = { [ITS_MITIGATION_OFF] = "Vulnerable", + [ITS_MITIGATION_VMEXIT_ONLY] = "Mitigation: Vulnerable, KVM: Not affected", [ITS_MITIGATION_ALIGNED_THUNKS] = "Mitigation: Aligned branch/return thunks", [ITS_MITIGATION_RETPOLINE_STUFF] = "Mitigation: Retpolines, Stuffing RSB", }; @@ -1239,6 +1242,8 @@ static int __init its_parse_cmdline(char *str) } else if (!strcmp(str, "force")) { its_cmd = ITS_CMD_ON; setup_force_cpu_bug(X86_BUG_ITS); + } else if (!strcmp(str, "vmexit")) { + its_cmd = ITS_CMD_VMEXIT; } else { pr_err("Ignoring unknown indirect_target_selection option (%s).", str); } @@ -1294,6 +1299,12 @@ static void __init its_select_mitigation(void) case ITS_CMD_OFF: its_mitigation = ITS_MITIGATION_OFF; break; + case ITS_CMD_VMEXIT: + if (boot_cpu_has_bug(X86_BUG_ITS_NATIVE_ONLY)) { + its_mitigation = ITS_MITIGATION_VMEXIT_ONLY; + goto out; + } + fallthrough; case ITS_CMD_ON: its_mitigation = ITS_MITIGATION_ALIGNED_THUNKS; if (!boot_cpu_has(X86_FEATURE_RETPOLINE)) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index accdee7ab1f70f..6a6c3be5af6a86 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1229,6 +1229,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define RFDS BIT(7) /* CPU is affected by Indirect Target Selection */ #define ITS BIT(8) +/* CPU is affected by Indirect Target Selection, but guest-host isolation is not affected */ +#define ITS_NATIVE_ONLY BIT(9) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS), @@ -1249,16 +1251,16 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPS(INTEL_KABYLAKE, 0xc, MMIO | RETBLEED | GDS | SRBDS), VULNBL_INTEL_STEPS(INTEL_KABYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS), VULNBL_INTEL_STEPS(INTEL_CANNONLAKE_L, X86_STEP_MAX, RETBLEED), - VULNBL_INTEL_STEPS(INTEL_ICELAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS), - VULNBL_INTEL_STEPS(INTEL_ICELAKE_D, X86_STEP_MAX, MMIO | GDS | ITS), - VULNBL_INTEL_STEPS(INTEL_ICELAKE_X, X86_STEP_MAX, MMIO | GDS | ITS), + VULNBL_INTEL_STEPS(INTEL_ICELAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY), + VULNBL_INTEL_STEPS(INTEL_ICELAKE_D, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY), + VULNBL_INTEL_STEPS(INTEL_ICELAKE_X, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY), VULNBL_INTEL_STEPS(INTEL_COMETLAKE, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS), VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, 0x0, MMIO | RETBLEED | ITS), VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS), - VULNBL_INTEL_STEPS(INTEL_TIGERLAKE_L, X86_STEP_MAX, GDS | ITS), - VULNBL_INTEL_STEPS(INTEL_TIGERLAKE, X86_STEP_MAX, GDS | ITS), + VULNBL_INTEL_STEPS(INTEL_TIGERLAKE_L, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY), + VULNBL_INTEL_STEPS(INTEL_TIGERLAKE, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY), VULNBL_INTEL_STEPS(INTEL_LAKEFIELD, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED), - VULNBL_INTEL_STEPS(INTEL_ROCKETLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS), + VULNBL_INTEL_STEPS(INTEL_ROCKETLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY), VULNBL_INTEL_TYPE(INTEL_ALDERLAKE, ATOM, RFDS), VULNBL_INTEL_STEPS(INTEL_ALDERLAKE_L, X86_STEP_MAX, RFDS), VULNBL_INTEL_TYPE(INTEL_RAPTORLAKE, ATOM, RFDS), @@ -1480,8 +1482,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_AMD_IBPB) && !cpu_has(c, X86_FEATURE_AMD_IBPB_RET)) setup_force_cpu_bug(X86_BUG_IBPB_NO_RET); - if (vulnerable_to_its(x86_arch_cap_msr)) + if (vulnerable_to_its(x86_arch_cap_msr)) { setup_force_cpu_bug(X86_BUG_ITS); + if (cpu_matches(cpu_vuln_blacklist, ITS_NATIVE_ONLY)) + setup_force_cpu_bug(X86_BUG_ITS_NATIVE_ONLY); + } if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; From facd226f7e0c8ca936ac114aba43cb3e8b94e41e Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Mon, 2 Dec 2024 12:07:08 -0800 Subject: [PATCH 1663/2924] x86/its: Add support for RSB stuffing mitigation When retpoline mitigation is enabled for spectre-v2, enabling call-depth-tracking and RSB stuffing also mitigates ITS. Add cmdline option indirect_target_selection=stuff to allow enabling RSB stuffing mitigation. When retpoline mitigation is not enabled, =stuff option is ignored, and default mitigation for ITS is deployed. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Josh Poimboeuf Reviewed-by: Alexandre Chartre --- .../admin-guide/kernel-parameters.txt | 3 +++ arch/x86/kernel/cpu/bugs.c | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index e70d15095f28b4..8f75ec17739944 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2212,6 +2212,9 @@ mitigation. vmexit: Only deploy mitigation if CPU is affected by guest/host isolation part of ITS. + stuff: Deploy RSB-fill mitigation when retpoline is + also deployed. Otherwise, deploy the default + mitigation. For details see: Documentation/admin-guide/hw-vuln/indirect-target-selection.rst diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e919d645d83031..c6dcc03a43da69 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1204,6 +1204,7 @@ enum its_mitigation_cmd { ITS_CMD_OFF, ITS_CMD_ON, ITS_CMD_VMEXIT, + ITS_CMD_RSB_STUFF, }; enum its_mitigation { @@ -1244,6 +1245,8 @@ static int __init its_parse_cmdline(char *str) setup_force_cpu_bug(X86_BUG_ITS); } else if (!strcmp(str, "vmexit")) { its_cmd = ITS_CMD_VMEXIT; + } else if (!strcmp(str, "stuff")) { + its_cmd = ITS_CMD_RSB_STUFF; } else { pr_err("Ignoring unknown indirect_target_selection option (%s).", str); } @@ -1295,6 +1298,12 @@ static void __init its_select_mitigation(void) goto out; } + if (cmd == ITS_CMD_RSB_STUFF && + (!boot_cpu_has(X86_FEATURE_RETPOLINE) || !IS_ENABLED(CONFIG_MITIGATION_CALL_DEPTH_TRACKING))) { + pr_err("RSB stuff mitigation not supported, using default\n"); + cmd = ITS_CMD_ON; + } + switch (cmd) { case ITS_CMD_OFF: its_mitigation = ITS_MITIGATION_OFF; @@ -1312,6 +1321,16 @@ static void __init its_select_mitigation(void) setup_force_cpu_cap(X86_FEATURE_RETHUNK); set_return_thunk(its_return_thunk); break; + case ITS_CMD_RSB_STUFF: + its_mitigation = ITS_MITIGATION_RETPOLINE_STUFF; + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + setup_force_cpu_cap(X86_FEATURE_CALL_DEPTH); + set_return_thunk(call_depth_return_thunk); + if (retbleed_mitigation == RETBLEED_MITIGATION_NONE) { + retbleed_mitigation = RETBLEED_MITIGATION_STUFF; + pr_info("Retbleed mitigation updated to stuffing\n"); + } + break; } out: pr_info("%s\n", its_strings[its_mitigation]); From f0cd7091cc5a032c8870b4285305d9172569d126 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Fri, 2 May 2025 06:25:19 -0700 Subject: [PATCH 1664/2924] x86/its: Align RETs in BHB clear sequence to avoid thunking The software mitigation for BHI is to execute BHB clear sequence at syscall entry, and possibly after a cBPF program. ITS mitigation thunks RETs in the lower half of the cacheline. This causes the RETs in the BHB clear sequence to be thunked as well, adding unnecessary branches to the BHB clear sequence. Since the sequence is in hot path, align the RET instructions in the sequence to avoid thunking. This is how disassembly clear_bhb_loop() looks like after this change: 0x44 <+4>: mov $0x5,%ecx 0x49 <+9>: call 0xffffffff81001d9b 0x4e <+14>: jmp 0xffffffff81001de5 0x53 <+19>: int3 ... 0x9b <+91>: call 0xffffffff81001dce 0xa0 <+96>: ret 0xa1 <+97>: int3 ... 0xce <+142>: mov $0x5,%eax 0xd3 <+147>: jmp 0xffffffff81001dd6 0xd5 <+149>: nop 0xd6 <+150>: sub $0x1,%eax 0xd9 <+153>: jne 0xffffffff81001dd3 0xdb <+155>: sub $0x1,%ecx 0xde <+158>: jne 0xffffffff81001d9b 0xe0 <+160>: ret 0xe1 <+161>: int3 0xe2 <+162>: int3 0xe3 <+163>: int3 0xe4 <+164>: int3 0xe5 <+165>: lfence 0xe8 <+168>: pop %rbp 0xe9 <+169>: ret Suggested-by: Andrew Cooper Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Alexandre Chartre --- arch/x86/entry/entry_64.S | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f40bdf97d390a7..ed04a968cc7d00 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1525,7 +1525,9 @@ SYM_CODE_END(rewind_stack_and_make_dead) * ORC to unwind properly. * * The alignment is for performance and not for safety, and may be safely - * refactored in the future if needed. + * refactored in the future if needed. The .skips are for safety, to ensure + * that all RETs are in the second half of a cacheline to mitigate Indirect + * Target Selection, rather than taking the slowpath via its_return_thunk. */ SYM_FUNC_START(clear_bhb_loop) ANNOTATE_NOENDBR @@ -1536,10 +1538,22 @@ SYM_FUNC_START(clear_bhb_loop) call 1f jmp 5f .align 64, 0xcc + /* + * Shift instructions so that the RET is in the upper half of the + * cacheline and don't take the slowpath to its_return_thunk. + */ + .skip 32 - (.Lret1 - 1f), 0xcc ANNOTATE_INTRA_FUNCTION_CALL 1: call 2f - RET +.Lret1: RET .align 64, 0xcc + /* + * As above shift instructions for RET at .Lret2 as well. + * + * This should be ideally be: .skip 32 - (.Lret2 - 2f), 0xcc + * but some Clang versions (e.g. 18) don't like this. + */ + .skip 32 - 18, 0xcc 2: movl $5, %eax 3: jmp 4f nop @@ -1547,7 +1561,7 @@ SYM_FUNC_START(clear_bhb_loop) jnz 3b sub $1, %ecx jnz 1b - RET +.Lret2: RET 5: lfence pop %rbp RET From 3238532ba55c9bf9b63ae17ebfc5deb320c737c3 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 31 Mar 2025 21:06:42 +0200 Subject: [PATCH 1665/2924] MAINTAINERS: delete email for Shiraz Hashim The email address bounced. I couldn't find a newer one in recent git history (last activity 9 years ago), so delete this email entry. Signed-off-by: Wolfram Sang Link: https://lore.kernel.org/r/20250331190731.5094-2-wsa+renesas@sang-engineering.com Signed-off-by: Arnd Bergmann --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 53ea11c7faf062..786f12b623c7df 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22826,7 +22826,6 @@ F: drivers/accessibility/speakup/ SPEAR PLATFORM/CLOCK/PINCTRL SUPPORT M: Viresh Kumar -M: Shiraz Hashim L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: soc@lists.linux.dev S: Maintained From 09acc3266cdd8e32a62dd6c5052f44724111120b Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 9 Apr 2025 16:02:54 -0500 Subject: [PATCH 1666/2924] arm64: dts: amazon: Fix simple-bus node name schema warnings Fix a couple of node name warnings from the schema checks: arch/arm64/boot/dts/amazon/alpine-v2-evp.dt.yaml: io-fabric: $nodename:0: 'io-fabric' does not match '^(bus|soc|axi|ahb|apb)(@[0-9a-f]+)?$' arch/arm64/boot/dts/amazon/alpine-v3-evp.dt.yaml: io-fabric: $nodename:0: 'io-fabric' does not match '^(bus|soc|axi|ahb|apb)(@[0-9a-f]+)?$' Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250409210255.1541298-1-robh@kernel.org Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/amazon/alpine-v2.dtsi | 2 +- arch/arm64/boot/dts/amazon/alpine-v3.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/amazon/alpine-v2.dtsi b/arch/arm64/boot/dts/amazon/alpine-v2.dtsi index da9de4986660f2..5a72f0b64247d5 100644 --- a/arch/arm64/boot/dts/amazon/alpine-v2.dtsi +++ b/arch/arm64/boot/dts/amazon/alpine-v2.dtsi @@ -151,7 +151,7 @@ al,msi-num-spis = <160>; }; - io-fabric@fc000000 { + io-bus@fc000000 { compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm64/boot/dts/amazon/alpine-v3.dtsi b/arch/arm64/boot/dts/amazon/alpine-v3.dtsi index 8b6156b5af659f..dea60d136c2e3d 100644 --- a/arch/arm64/boot/dts/amazon/alpine-v3.dtsi +++ b/arch/arm64/boot/dts/amazon/alpine-v3.dtsi @@ -361,7 +361,7 @@ interrupt-parent = <&gic>; }; - io-fabric@fc000000 { + io-bus@fc000000 { compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; From d6d1e3e6580ca35071ad474381f053cbf1fb6414 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 23 Apr 2025 16:30:11 +0200 Subject: [PATCH 1667/2924] mm/execmem: Unify early execmem_cache behaviour Early kernel memory is RWX, only at the end of early boot (before SMP) do we mark things ROX. Have execmem_cache mirror this behaviour for early users. This avoids having to remember what code is execmem and what is not -- we can poke everything with impunity ;-) Also performance for not having to do endless text_poke_mm switches. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Alexandre Chartre --- arch/x86/mm/init_32.c | 3 +++ arch/x86/mm/init_64.c | 3 +++ include/linux/execmem.h | 8 +++++++- mm/execmem.c | 40 +++++++++++++++++++++++++++++++++++++--- 4 files changed, 50 insertions(+), 4 deletions(-) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index ad662cc4605c69..7fa3965dcef158 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -755,6 +756,8 @@ void mark_rodata_ro(void) pr_info("Write protecting kernel text and read-only data: %luk\n", size >> 10); + execmem_cache_make_ro(); + kernel_set_to_readonly = 1; #ifdef CONFIG_CPA_DEBUG diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 7c4f6f591f2b24..949a447f75ec7e 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -1391,6 +1392,8 @@ void mark_rodata_ro(void) (end - start) >> 10); set_memory_ro(start, (end - start) >> PAGE_SHIFT); + execmem_cache_make_ro(); + kernel_set_to_readonly = 1; /* diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 65655a5d1be283..089c71cc8ddf17 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -53,7 +53,7 @@ enum execmem_range_flags { EXECMEM_ROX_CACHE = (1 << 1), }; -#ifdef CONFIG_ARCH_HAS_EXECMEM_ROX +#if defined(CONFIG_ARCH_HAS_EXECMEM_ROX) && defined(CONFIG_EXECMEM) /** * execmem_fill_trapping_insns - set memory to contain instructions that * will trap @@ -93,9 +93,15 @@ int execmem_make_temp_rw(void *ptr, size_t size); * Return: 0 on success or negative error code on failure. */ int execmem_restore_rox(void *ptr, size_t size); + +/* + * Called from mark_readonly(), where the system transitions to ROX. + */ +void execmem_cache_make_ro(void); #else static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; } static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; } +static inline void execmem_cache_make_ro(void) { } #endif /** diff --git a/mm/execmem.c b/mm/execmem.c index e6c4f5076ca8d8..6f7a2653b280ed 100644 --- a/mm/execmem.c +++ b/mm/execmem.c @@ -254,6 +254,34 @@ static void *__execmem_cache_alloc(struct execmem_range *range, size_t size) return ptr; } +static bool execmem_cache_rox = false; + +void execmem_cache_make_ro(void) +{ + struct maple_tree *free_areas = &execmem_cache.free_areas; + struct maple_tree *busy_areas = &execmem_cache.busy_areas; + MA_STATE(mas_free, free_areas, 0, ULONG_MAX); + MA_STATE(mas_busy, busy_areas, 0, ULONG_MAX); + struct mutex *mutex = &execmem_cache.mutex; + void *area; + + execmem_cache_rox = true; + + mutex_lock(mutex); + + mas_for_each(&mas_free, area, ULONG_MAX) { + unsigned long pages = mas_range_len(&mas_free) >> PAGE_SHIFT; + set_memory_ro(mas_free.index, pages); + } + + mas_for_each(&mas_busy, area, ULONG_MAX) { + unsigned long pages = mas_range_len(&mas_busy) >> PAGE_SHIFT; + set_memory_ro(mas_busy.index, pages); + } + + mutex_unlock(mutex); +} + static int execmem_cache_populate(struct execmem_range *range, size_t size) { unsigned long vm_flags = VM_ALLOW_HUGE_VMAP; @@ -274,9 +302,15 @@ static int execmem_cache_populate(struct execmem_range *range, size_t size) /* fill memory with instructions that will trap */ execmem_fill_trapping_insns(p, alloc_size, /* writable = */ true); - err = set_memory_rox((unsigned long)p, vm->nr_pages); - if (err) - goto err_free_mem; + if (execmem_cache_rox) { + err = set_memory_rox((unsigned long)p, vm->nr_pages); + if (err) + goto err_free_mem; + } else { + err = set_memory_x((unsigned long)p, vm->nr_pages); + if (err) + goto err_free_mem; + } err = execmem_cache_add(p, alloc_size); if (err) From ebebe30794d38c51f71fe4951ba6af4159d9837d Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Sat, 3 May 2025 09:46:31 -0700 Subject: [PATCH 1668/2924] x86/ibt: Keep IBT disabled during alternative patching cfi_rewrite_callers() updates the fineIBT hash matching at the caller side, but except for paranoid-mode it relies on apply_retpoline() and friends for any ENDBR relocation. This could temporarily cause an indirect branch to land on a poisoned ENDBR. For instance, with para-virtualization enabled, a simple wrmsrl() could have an indirect branch pointing to native_write_msr() who's ENDBR has been relocated due to fineIBT: : push %rbp mov %rsp,%rbp mov %esi,%eax mov %rsi,%rdx shr $0x20,%rdx mov %edi,%edi mov %rax,%rsi call *0x21e65d0(%rip) # ^^^^^^^^^^^^^^^^^^^^^^^ Such an indirect call during the alternative patching could #CP if the caller is not *yet* adjusted for the new target ENDBR. To prevent a false #CP, keep CET-IBT disabled until all callers are patched. Patching during the module load does not need to be guarded by IBT-disable because the module code is not executed until the patching is complete. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Alexandre Chartre --- arch/x86/kernel/alternative.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 3e67e72bffdc98..6e0b09b2c97201 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -31,6 +31,7 @@ #include #include #include +#include int __read_mostly alternatives_patched; @@ -2085,6 +2086,8 @@ static noinline void __init alt_reloc_selftest(void) void __init alternative_instructions(void) { + u64 ibt; + int3_selftest(); /* @@ -2111,6 +2114,9 @@ void __init alternative_instructions(void) */ paravirt_set_cap(); + /* Keep CET-IBT disabled until caller/callee are patched */ + ibt = ibt_save(/*disable*/ true); + __apply_fineibt(__retpoline_sites, __retpoline_sites_end, __cfi_sites, __cfi_sites_end, true); @@ -2134,6 +2140,8 @@ void __init alternative_instructions(void) */ apply_seal_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end); + ibt_restore(ibt); + #ifdef CONFIG_SMP /* Patch to UP if other cpus not imminent. */ if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) { From 872df34d7c51a79523820ea6a14860398c639b87 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Oct 2024 10:05:48 -0700 Subject: [PATCH 1669/2924] x86/its: Use dynamic thunks for indirect branches ITS mitigation moves the unsafe indirect branches to a safe thunk. This could degrade the prediction accuracy as the source address of indirect branches becomes same for different execution paths. To improve the predictions, and hence the performance, assign a separate thunk for each indirect callsite. This is also a defense-in-depth measure to avoid indirect branches aliasing with each other. As an example, 5000 dynamic thunks would utilize around 16 bits of the address space, thereby gaining entropy. For a BTB that uses 32 bits for indexing, dynamic thunks could provide better prediction accuracy over fixed thunks. Have ITS thunks be variable sized and use EXECMEM_MODULE_TEXT such that they are both more flexible (got to extend them later) and live in 2M TLBs, just like kernel code, avoiding undue TLB pressure. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Alexandre Chartre --- arch/x86/Kconfig | 1 + arch/x86/include/asm/alternative.h | 10 +++ arch/x86/kernel/alternative.c | 127 ++++++++++++++++++++++++++++- arch/x86/kernel/module.c | 6 ++ include/linux/execmem.h | 3 + include/linux/module.h | 5 ++ 6 files changed, 149 insertions(+), 3 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a6bd0590c4371b..a1156a554f6d5a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2714,6 +2714,7 @@ config MITIGATION_ITS bool "Enable Indirect Target Selection mitigation" depends on CPU_SUP_INTEL && X86_64 depends on MITIGATION_RETPOLINE && MITIGATION_RETHUNK + select EXECMEM default y help Enable Indirect Target Selection (ITS) mitigation. ITS is a bug in diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 4037c611537c85..47948ebbb40969 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -124,6 +124,16 @@ static __always_inline int x86_call_depth_emit_accounting(u8 **pprog, } #endif +#ifdef CONFIG_MITIGATION_ITS +extern void its_init_mod(struct module *mod); +extern void its_fini_mod(struct module *mod); +extern void its_free_mod(struct module *mod); +#else /* CONFIG_MITIGATION_ITS */ +static inline void its_init_mod(struct module *mod) { } +static inline void its_fini_mod(struct module *mod) { } +static inline void its_free_mod(struct module *mod) { } +#endif + #if defined(CONFIG_MITIGATION_RETHUNK) && defined(CONFIG_OBJTOOL) extern bool cpu_wants_rethunk(void); extern bool cpu_wants_rethunk_at(void *addr); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 6e0b09b2c97201..7a10e3ed5d0b5b 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -32,6 +33,7 @@ #include #include #include +#include int __read_mostly alternatives_patched; @@ -125,6 +127,121 @@ const unsigned char * const x86_nops[ASM_NOP_MAX+1] = #endif }; +#ifdef CONFIG_MITIGATION_ITS + +static struct module *its_mod; +static void *its_page; +static unsigned int its_offset; + +/* Initialize a thunk with the "jmp *reg; int3" instructions. */ +static void *its_init_thunk(void *thunk, int reg) +{ + u8 *bytes = thunk; + int i = 0; + + if (reg >= 8) { + bytes[i++] = 0x41; /* REX.B prefix */ + reg -= 8; + } + bytes[i++] = 0xff; + bytes[i++] = 0xe0 + reg; /* jmp *reg */ + bytes[i++] = 0xcc; + + return thunk; +} + +void its_init_mod(struct module *mod) +{ + if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) + return; + + mutex_lock(&text_mutex); + its_mod = mod; + its_page = NULL; +} + +void its_fini_mod(struct module *mod) +{ + if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) + return; + + WARN_ON_ONCE(its_mod != mod); + + its_mod = NULL; + its_page = NULL; + mutex_unlock(&text_mutex); + + for (int i = 0; i < mod->its_num_pages; i++) { + void *page = mod->its_page_array[i]; + execmem_restore_rox(page, PAGE_SIZE); + } +} + +void its_free_mod(struct module *mod) +{ + if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) + return; + + for (int i = 0; i < mod->its_num_pages; i++) { + void *page = mod->its_page_array[i]; + execmem_free(page); + } + kfree(mod->its_page_array); +} + +static void *its_alloc(void) +{ + void *page __free(execmem) = execmem_alloc(EXECMEM_MODULE_TEXT, PAGE_SIZE); + + if (!page) + return NULL; + + if (its_mod) { + void *tmp = krealloc(its_mod->its_page_array, + (its_mod->its_num_pages+1) * sizeof(void *), + GFP_KERNEL); + if (!tmp) + return NULL; + + its_mod->its_page_array = tmp; + its_mod->its_page_array[its_mod->its_num_pages++] = page; + + execmem_make_temp_rw(page, PAGE_SIZE); + } + + return no_free_ptr(page); +} + +static void *its_allocate_thunk(int reg) +{ + int size = 3 + (reg / 8); + void *thunk; + + if (!its_page || (its_offset + size - 1) >= PAGE_SIZE) { + its_page = its_alloc(); + if (!its_page) { + pr_err("ITS page allocation failed\n"); + return NULL; + } + memset(its_page, INT3_INSN_OPCODE, PAGE_SIZE); + its_offset = 32; + } + + /* + * If the indirect branch instruction will be in the lower half + * of a cacheline, then update the offset to reach the upper half. + */ + if ((its_offset + size - 1) % 64 < 32) + its_offset = ((its_offset - 1) | 0x3F) + 33; + + thunk = its_page + its_offset; + its_offset += size; + + return its_init_thunk(thunk, reg); +} + +#endif + /* * Nomenclature for variable names to simplify and clarify this code and ease * any potential staring at it: @@ -637,9 +754,13 @@ static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 #ifdef CONFIG_MITIGATION_ITS static int emit_its_trampoline(void *addr, struct insn *insn, int reg, u8 *bytes) { - return __emit_trampoline(addr, insn, bytes, - __x86_indirect_its_thunk_array[reg], - __x86_indirect_its_thunk_array[reg]); + u8 *thunk = __x86_indirect_its_thunk_array[reg]; + u8 *tmp = its_allocate_thunk(reg); + + if (tmp) + thunk = tmp; + + return __emit_trampoline(addr, insn, bytes, thunk, thunk); } /* Check if an indirect branch is at ITS-unsafe address */ diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index a7998f35170175..ff07558b7ebc65 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -266,6 +266,8 @@ int module_finalize(const Elf_Ehdr *hdr, ibt_endbr = s; } + its_init_mod(me); + if (retpolines || cfi) { void *rseg = NULL, *cseg = NULL; unsigned int rsize = 0, csize = 0; @@ -286,6 +288,9 @@ int module_finalize(const Elf_Ehdr *hdr, void *rseg = (void *)retpolines->sh_addr; apply_retpolines(rseg, rseg + retpolines->sh_size); } + + its_fini_mod(me); + if (returns) { void *rseg = (void *)returns->sh_addr; apply_returns(rseg, rseg + returns->sh_size); @@ -326,4 +331,5 @@ int module_finalize(const Elf_Ehdr *hdr, void module_arch_cleanup(struct module *mod) { alternatives_smp_module_del(mod); + its_free_mod(mod); } diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 089c71cc8ddf17..ca42d5e46ccc6b 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -4,6 +4,7 @@ #include #include +#include #if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ !defined(CONFIG_KASAN_VMALLOC) @@ -176,6 +177,8 @@ void *execmem_alloc(enum execmem_type type, size_t size); */ void execmem_free(void *ptr); +DEFINE_FREE(execmem, void *, if (_T) execmem_free(_T)); + #ifdef CONFIG_MMU /** * execmem_vmap - create virtual mapping for EXECMEM_MODULE_DATA memory diff --git a/include/linux/module.h b/include/linux/module.h index b3329110d6686f..8050f77c3b64f8 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -586,6 +586,11 @@ struct module { atomic_t refcnt; #endif +#ifdef CONFIG_MITIGATION_ITS + int its_num_pages; + void **its_page_array; +#endif + #ifdef CONFIG_CONSTRUCTORS /* Constructor functions. */ ctor_fn_t *ctors; From e52c1dc7455d32c8a55f9949d300e5e87d011fa6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 23 Apr 2025 09:57:31 +0200 Subject: [PATCH 1670/2924] x86/its: FineIBT-paranoid vs ITS FineIBT-paranoid was using the retpoline bytes for the paranoid check, disabling retpolines, because all parts that have IBT also have eIBRS and thus don't need no stinking retpolines. Except... ITS needs the retpolines for indirect calls must not be in the first half of a cacheline :-/ So what was the paranoid call sequence: : 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d a: 4d 8d 5b lea -0x10(%r11), %r11 e: 75 fd jne d 10: 41 ff d3 call *%r11 13: 90 nop Now becomes: : 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d a: 4d 8d 5b f0 lea -0x10(%r11), %r11 e: 2e e8 XX XX XX XX cs call __x86_indirect_paranoid_thunk_r11 Where the paranoid_thunk looks like: 1d: (bad) __x86_indirect_paranoid_thunk_r11: 1e: 75 fd jne 1d __x86_indirect_its_thunk_r11: 20: 41 ff eb jmp *%r11 23: cc int3 [ dhansen: remove initialization to false ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Alexandre Chartre --- arch/x86/include/asm/alternative.h | 8 ++ arch/x86/kernel/alternative.c | 145 +++++++++++++++++++++++++---- arch/x86/lib/retpoline.S | 15 ++- arch/x86/net/bpf_jit_comp.c | 2 +- tools/objtool/arch/x86/decode.c | 9 ++ 5 files changed, 159 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 47948ebbb40969..f2294784babc30 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -6,6 +6,7 @@ #include #include #include +#include #define ALT_FLAGS_SHIFT 16 @@ -128,10 +129,17 @@ static __always_inline int x86_call_depth_emit_accounting(u8 **pprog, extern void its_init_mod(struct module *mod); extern void its_fini_mod(struct module *mod); extern void its_free_mod(struct module *mod); +extern u8 *its_static_thunk(int reg); #else /* CONFIG_MITIGATION_ITS */ static inline void its_init_mod(struct module *mod) { } static inline void its_fini_mod(struct module *mod) { } static inline void its_free_mod(struct module *mod) { } +static inline u8 *its_static_thunk(int reg) +{ + WARN_ONCE(1, "ITS not compiled in"); + + return NULL; +} #endif #if defined(CONFIG_MITIGATION_RETHUNK) && defined(CONFIG_OBJTOOL) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 7a10e3ed5d0b5b..48fd04e9011483 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -127,6 +127,10 @@ const unsigned char * const x86_nops[ASM_NOP_MAX+1] = #endif }; +#ifdef CONFIG_FINEIBT +static bool cfi_paranoid __ro_after_init; +#endif + #ifdef CONFIG_MITIGATION_ITS static struct module *its_mod; @@ -137,8 +141,25 @@ static unsigned int its_offset; static void *its_init_thunk(void *thunk, int reg) { u8 *bytes = thunk; + int offset = 0; int i = 0; +#ifdef CONFIG_FINEIBT + if (cfi_paranoid) { + /* + * When ITS uses indirect branch thunk the fineibt_paranoid + * caller sequence doesn't fit in the caller site. So put the + * remaining part of the sequence ( + JNE) into the ITS + * thunk. + */ + bytes[i++] = 0xea; /* invalid instruction */ + bytes[i++] = 0x75; /* JNE */ + bytes[i++] = 0xfd; + + offset = 1; + } +#endif + if (reg >= 8) { bytes[i++] = 0x41; /* REX.B prefix */ reg -= 8; @@ -147,7 +168,7 @@ static void *its_init_thunk(void *thunk, int reg) bytes[i++] = 0xe0 + reg; /* jmp *reg */ bytes[i++] = 0xcc; - return thunk; + return thunk + offset; } void its_init_mod(struct module *mod) @@ -217,6 +238,17 @@ static void *its_allocate_thunk(int reg) int size = 3 + (reg / 8); void *thunk; +#ifdef CONFIG_FINEIBT + /* + * The ITS thunk contains an indirect jump and an int3 instruction so + * its size is 3 or 4 bytes depending on the register used. If CFI + * paranoid is used then 3 extra bytes are added in the ITS thunk to + * complete the fineibt_paranoid caller sequence. + */ + if (cfi_paranoid) + size += 3; +#endif + if (!its_page || (its_offset + size - 1) >= PAGE_SIZE) { its_page = its_alloc(); if (!its_page) { @@ -240,6 +272,18 @@ static void *its_allocate_thunk(int reg) return its_init_thunk(thunk, reg); } +u8 *its_static_thunk(int reg) +{ + u8 *thunk = __x86_indirect_its_thunk_array[reg]; + +#ifdef CONFIG_FINEIBT + /* Paranoid thunk starts 2 bytes before */ + if (cfi_paranoid) + return thunk - 2; +#endif + return thunk; +} + #endif /* @@ -775,8 +819,17 @@ static bool cpu_wants_indirect_its_thunk_at(unsigned long addr, int reg) /* Lower-half of the cacheline? */ return !(addr & 0x20); } +#else /* CONFIG_MITIGATION_ITS */ + +#ifdef CONFIG_FINEIBT +static bool cpu_wants_indirect_its_thunk_at(unsigned long addr, int reg) +{ + return false; +} #endif +#endif /* CONFIG_MITIGATION_ITS */ + /* * Rewrite the compiler generated retpoline thunk calls. * @@ -893,6 +946,7 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) int len, ret; u8 bytes[16]; u8 op1, op2; + u8 *dest; ret = insn_decode_kernel(&insn, addr); if (WARN_ON_ONCE(ret < 0)) @@ -909,6 +963,12 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) case CALL_INSN_OPCODE: case JMP32_INSN_OPCODE: + /* Check for cfi_paranoid + ITS */ + dest = addr + insn.length + insn.immediate.value; + if (dest[-1] == 0xea && (dest[0] & 0xf0) == 0x70) { + WARN_ON_ONCE(cfi_mode != CFI_FINEIBT); + continue; + } break; case 0x0f: /* escape */ @@ -1198,8 +1258,6 @@ int cfi_get_func_arity(void *func) static bool cfi_rand __ro_after_init = true; static u32 cfi_seed __ro_after_init; -static bool cfi_paranoid __ro_after_init = false; - /* * Re-hash the CFI hash with a boot-time seed while making sure the result is * not a valid ENDBR instruction. @@ -1612,6 +1670,19 @@ static int cfi_rand_callers(s32 *start, s32 *end) return 0; } +static int emit_paranoid_trampoline(void *addr, struct insn *insn, int reg, u8 *bytes) +{ + u8 *thunk = (void *)__x86_indirect_its_thunk_array[reg] - 2; + +#ifdef CONFIG_MITIGATION_ITS + u8 *tmp = its_allocate_thunk(reg); + if (tmp) + thunk = tmp; +#endif + + return __emit_trampoline(addr, insn, bytes, thunk, thunk); +} + static int cfi_rewrite_callers(s32 *start, s32 *end) { s32 *s; @@ -1653,9 +1724,14 @@ static int cfi_rewrite_callers(s32 *start, s32 *end) memcpy(bytes, fineibt_paranoid_start, fineibt_paranoid_size); memcpy(bytes + fineibt_caller_hash, &hash, 4); - ret = emit_indirect(op, 11, bytes + fineibt_paranoid_ind); - if (WARN_ON_ONCE(ret != 3)) - continue; + if (cpu_wants_indirect_its_thunk_at((unsigned long)addr + fineibt_paranoid_ind, 11)) { + emit_paranoid_trampoline(addr + fineibt_caller_size, + &insn, 11, bytes + fineibt_caller_size); + } else { + ret = emit_indirect(op, 11, bytes + fineibt_paranoid_ind); + if (WARN_ON_ONCE(ret != 3)) + continue; + } text_poke_early(addr, bytes, fineibt_paranoid_size); } @@ -1882,29 +1958,66 @@ static bool decode_fineibt_bhi(struct pt_regs *regs, unsigned long *target, u32 return false; } +static bool is_paranoid_thunk(unsigned long addr) +{ + u32 thunk; + + __get_kernel_nofault(&thunk, (u32 *)addr, u32, Efault); + return (thunk & 0x00FFFFFF) == 0xfd75ea; + +Efault: + return false; +} + /* * regs->ip points to a LOCK Jcc.d8 instruction from the fineibt_paranoid_start[] - * sequence. + * sequence, or to an invalid instruction (0xea) + Jcc.d8 for cfi_paranoid + ITS + * thunk. */ static bool decode_fineibt_paranoid(struct pt_regs *regs, unsigned long *target, u32 *type) { unsigned long addr = regs->ip - fineibt_paranoid_ud; - u32 hash; - if (!cfi_paranoid || !is_cfi_trap(addr + fineibt_caller_size - LEN_UD2)) + if (!cfi_paranoid) return false; - __get_kernel_nofault(&hash, addr + fineibt_caller_hash, u32, Efault); - *target = regs->r11 + fineibt_preamble_size; - *type = regs->r10; + if (is_cfi_trap(addr + fineibt_caller_size - LEN_UD2)) { + *target = regs->r11 + fineibt_preamble_size; + *type = regs->r10; + + /* + * Since the trapping instruction is the exact, but LOCK prefixed, + * Jcc.d8 that got us here, the normal fixup will work. + */ + return true; + } /* - * Since the trapping instruction is the exact, but LOCK prefixed, - * Jcc.d8 that got us here, the normal fixup will work. + * The cfi_paranoid + ITS thunk combination results in: + * + * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d + * 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d + * a: 4d 8d 5b f0 lea -0x10(%r11), %r11 + * e: 2e e8 XX XX XX XX cs call __x86_indirect_paranoid_thunk_r11 + * + * Where the paranoid_thunk looks like: + * + * 1d: (bad) + * __x86_indirect_paranoid_thunk_r11: + * 1e: 75 fd jne 1d + * __x86_indirect_its_thunk_r11: + * 20: 41 ff eb jmp *%r11 + * 23: cc int3 + * */ - return true; + if (is_paranoid_thunk(regs->ip)) { + *target = regs->r11 + fineibt_preamble_size; + *type = regs->r10; + + regs->ip = *target; + return true; + } -Efault: return false; } diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index ebca28fe7e3133..39374949daa2f6 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -371,6 +371,15 @@ SYM_FUNC_END(call_depth_return_thunk) .macro ITS_THUNK reg +/* + * If CFI paranoid is used then the ITS thunk starts with opcodes (0xea; jne 1b) + * that complete the fineibt_paranoid caller sequence. + */ +1: .byte 0xea +SYM_INNER_LABEL(__x86_indirect_paranoid_thunk_\reg, SYM_L_GLOBAL) + UNWIND_HINT_UNDEFINED + ANNOTATE_NOENDBR + jne 1b SYM_INNER_LABEL(__x86_indirect_its_thunk_\reg, SYM_L_GLOBAL) UNWIND_HINT_UNDEFINED ANNOTATE_NOENDBR @@ -378,19 +387,19 @@ SYM_INNER_LABEL(__x86_indirect_its_thunk_\reg, SYM_L_GLOBAL) jmp *%\reg int3 .align 32, 0xcc /* fill to the end of the line */ - .skip 32, 0xcc /* skip to the next upper half */ + .skip 32 - (__x86_indirect_its_thunk_\reg - 1b), 0xcc /* skip to the next upper half */ .endm /* ITS mitigation requires thunks be aligned to upper half of cacheline */ .align 64, 0xcc -.skip 32, 0xcc -SYM_CODE_START(__x86_indirect_its_thunk_array) +.skip 29, 0xcc #define GEN(reg) ITS_THUNK reg #include #undef GEN .align 64, 0xcc +SYM_FUNC_ALIAS(__x86_indirect_its_thunk_array, __x86_indirect_its_thunk_rax) SYM_CODE_END(__x86_indirect_its_thunk_array) .align 64, 0xcc diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index a5b65c09910bab..a31e58c6d89e02 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -663,7 +663,7 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) if (cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) { OPTIMIZER_HIDE_VAR(reg); - emit_jump(&prog, &__x86_indirect_its_thunk_array[reg], ip); + emit_jump(&prog, its_static_thunk(reg), ip); } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { EMIT_LFENCE(); EMIT2(0xFF, 0xE0 + reg); diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 3ce7b54003c229..687c5eafb49a7e 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -189,6 +189,15 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec op2 = ins.opcode.bytes[1]; op3 = ins.opcode.bytes[2]; + /* + * XXX hack, decoder is buggered and thinks 0xea is 7 bytes long. + */ + if (op1 == 0xea) { + insn->len = 1; + insn->type = INSN_BUG; + return 0; + } + if (ins.rex_prefix.nbytes) { rex = ins.rex_prefix.bytes[0]; rex_w = X86_REX_W(rex) >> 3; From 7a9b709e7cc5ce1ffb84ce07bf6d157e1de758df Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Tue, 24 Dec 2024 16:09:28 -0800 Subject: [PATCH 1671/2924] selftest/x86/bugs: Add selftests for ITS Below are the tests added for Indirect Target Selection (ITS): - its_sysfs.py - Check if sysfs reflects the correct mitigation status for the mitigation selected via the kernel cmdline. - its_permutations.py - tests mitigation selection with cmdline permutations with other bugs like spectre_v2 and retbleed. - its_indirect_alignment.py - verifies that for addresses in .retpoline_sites section that belong to lower half of cacheline are patched to ITS-safe thunk. Typical output looks like below: Site 49: function symbol: __x64_sys_restart_syscall+0x1f <0xffffffffbb1509af> # vmlinux: 0xffffffff813509af: jmp 0xffffffff81f5a8e0 # kcore: 0xffffffffbb1509af: jmpq *%rax # ITS thunk NOT expected for site 49 # PASSED: Found *%rax # Site 50: function symbol: __resched_curr+0xb0 <0xffffffffbb181910> # vmlinux: 0xffffffff81381910: jmp 0xffffffff81f5a8e0 # kcore: 0xffffffffbb181910: jmp 0xffffffffc02000fc # ITS thunk expected for site 50 # PASSED: Found 0xffffffffc02000fc -> jmpq *%rax - its_ret_alignment.py - verifies that for addresses in .return_sites section that belong to lower half of cacheline are patched to its_return_thunk. Typical output looks like below: Site 97: function symbol: collect_event+0x48 <0xffffffffbb007f18> # vmlinux: 0xffffffff81207f18: jmp 0xffffffff81f5b500 # kcore: 0xffffffffbb007f18: jmp 0xffffffffbbd5b560 # PASSED: Found jmp 0xffffffffbbd5b560 # Site 98: function symbol: collect_event+0xa4 <0xffffffffbb007f74> # vmlinux: 0xffffffff81207f74: jmp 0xffffffff81f5b500 # kcore: 0xffffffffbb007f74: retq # PASSED: Found retq Some of these tests have dependency on tools like virtme-ng[1] and drgn[2]. When the dependencies are not met, the test will be skipped. [1] https://github.com/arighi/virtme-ng [2] https://github.com/osandov/drgn Co-developed-by: Tao Zhang Signed-off-by: Tao Zhang Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen --- tools/testing/selftests/Makefile | 1 + tools/testing/selftests/x86/bugs/Makefile | 3 + tools/testing/selftests/x86/bugs/common.py | 164 ++++++++++++++++++ .../x86/bugs/its_indirect_alignment.py | 150 ++++++++++++++++ .../selftests/x86/bugs/its_permutations.py | 109 ++++++++++++ .../selftests/x86/bugs/its_ret_alignment.py | 139 +++++++++++++++ tools/testing/selftests/x86/bugs/its_sysfs.py | 65 +++++++ 7 files changed, 631 insertions(+) create mode 100644 tools/testing/selftests/x86/bugs/Makefile create mode 100755 tools/testing/selftests/x86/bugs/common.py create mode 100755 tools/testing/selftests/x86/bugs/its_indirect_alignment.py create mode 100755 tools/testing/selftests/x86/bugs/its_permutations.py create mode 100755 tools/testing/selftests/x86/bugs/its_ret_alignment.py create mode 100755 tools/testing/selftests/x86/bugs/its_sysfs.py diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index c77c8c8e3d9bdd..80fb84fa3cfcbd 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -121,6 +121,7 @@ TARGETS += user_events TARGETS += vDSO TARGETS += mm TARGETS += x86 +TARGETS += x86/bugs TARGETS += zram #Please keep the TARGETS list alphabetically sorted # Run "make quicktest=1 run_tests" or diff --git a/tools/testing/selftests/x86/bugs/Makefile b/tools/testing/selftests/x86/bugs/Makefile new file mode 100644 index 00000000000000..8ff2d7226c7f3f --- /dev/null +++ b/tools/testing/selftests/x86/bugs/Makefile @@ -0,0 +1,3 @@ +TEST_PROGS := its_sysfs.py its_permutations.py its_indirect_alignment.py its_ret_alignment.py +TEST_FILES := common.py +include ../../lib.mk diff --git a/tools/testing/selftests/x86/bugs/common.py b/tools/testing/selftests/x86/bugs/common.py new file mode 100755 index 00000000000000..2f9664a80617a6 --- /dev/null +++ b/tools/testing/selftests/x86/bugs/common.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# This contains kselftest framework adapted common functions for testing +# mitigation for x86 bugs. + +import os, sys, re, shutil + +sys.path.insert(0, '../../kselftest') +import ksft + +def read_file(path): + if not os.path.exists(path): + return None + with open(path, 'r') as file: + return file.read().strip() + +def cpuinfo_has(arg): + cpuinfo = read_file('/proc/cpuinfo') + if arg in cpuinfo: + return True + return False + +def cmdline_has(arg): + cmdline = read_file('/proc/cmdline') + if arg in cmdline: + return True + return False + +def cmdline_has_either(args): + cmdline = read_file('/proc/cmdline') + for arg in args: + if arg in cmdline: + return True + return False + +def cmdline_has_none(args): + return not cmdline_has_either(args) + +def cmdline_has_all(args): + cmdline = read_file('/proc/cmdline') + for arg in args: + if arg not in cmdline: + return False + return True + +def get_sysfs(bug): + return read_file("/sys/devices/system/cpu/vulnerabilities/" + bug) + +def sysfs_has(bug, mitigation): + status = get_sysfs(bug) + if mitigation in status: + return True + return False + +def sysfs_has_either(bugs, mitigations): + for bug in bugs: + for mitigation in mitigations: + if sysfs_has(bug, mitigation): + return True + return False + +def sysfs_has_none(bugs, mitigations): + return not sysfs_has_either(bugs, mitigations) + +def sysfs_has_all(bugs, mitigations): + for bug in bugs: + for mitigation in mitigations: + if not sysfs_has(bug, mitigation): + return False + return True + +def bug_check_pass(bug, found): + ksft.print_msg(f"\nFound: {found}") + # ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}") + ksft.test_result_pass(f'{bug}: {found}') + +def bug_check_fail(bug, found, expected): + ksft.print_msg(f'\nFound:\t {found}') + ksft.print_msg(f'Expected:\t {expected}') + ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}") + ksft.test_result_fail(f'{bug}: {found}') + +def bug_status_unknown(bug, found): + ksft.print_msg(f'\nUnknown status: {found}') + ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}") + ksft.test_result_fail(f'{bug}: {found}') + +def basic_checks_sufficient(bug, mitigation): + if not mitigation: + bug_status_unknown(bug, "None") + return True + elif mitigation == "Not affected": + ksft.test_result_pass(bug) + return True + elif mitigation == "Vulnerable": + if cmdline_has_either([f'{bug}=off', 'mitigations=off']): + bug_check_pass(bug, mitigation) + return True + return False + +def get_section_info(vmlinux, section_name): + from elftools.elf.elffile import ELFFile + with open(vmlinux, 'rb') as f: + elffile = ELFFile(f) + section = elffile.get_section_by_name(section_name) + if section is None: + ksft.print_msg("Available sections in vmlinux:") + for sec in elffile.iter_sections(): + ksft.print_msg(sec.name) + raise ValueError(f"Section {section_name} not found in {vmlinux}") + return section['sh_addr'], section['sh_offset'], section['sh_size'] + +def get_patch_sites(vmlinux, offset, size): + import struct + output = [] + with open(vmlinux, 'rb') as f: + f.seek(offset) + i = 0 + while i < size: + data = f.read(4) # s32 + if not data: + break + sym_offset = struct.unpack(' 1: + arg_vmlinux = os.sys.argv[1] + if not os.path.exists(arg_vmlinux): + ksft.test_result_fail(f"its_indirect_alignment.py: vmlinux not found at argument path: {arg_vmlinux}") + ksft.exit_fail() + os.makedirs(f"/usr/lib/debug/lib/modules/{os.uname().release}", exist_ok=True) + os.system(f'cp {arg_vmlinux} /usr/lib/debug/lib/modules/$(uname -r)/vmlinux') + +vmlinux = f"/usr/lib/debug/lib/modules/{os.uname().release}/vmlinux" +if not os.path.exists(vmlinux): + ksft.test_result_fail(f"its_indirect_alignment.py: vmlinux not found at {vmlinux}") + ksft.exit_fail() + +ksft.print_msg(f"Using vmlinux: {vmlinux}") + +retpolines_start_vmlinux, retpolines_sec_offset, size = c.get_section_info(vmlinux, '.retpoline_sites') +ksft.print_msg(f"vmlinux: Section .retpoline_sites (0x{retpolines_start_vmlinux:x}) found at 0x{retpolines_sec_offset:x} with size 0x{size:x}") + +sites_offset = c.get_patch_sites(vmlinux, retpolines_sec_offset, size) +total_retpoline_tests = len(sites_offset) +ksft.print_msg(f"Found {total_retpoline_tests} retpoline sites") + +prog = c.get_runtime_kernel() +retpolines_start_kcore = prog.symbol('__retpoline_sites').address +ksft.print_msg(f'kcore: __retpoline_sites: 0x{retpolines_start_kcore:x}') + +x86_indirect_its_thunk_r15 = prog.symbol('__x86_indirect_its_thunk_r15').address +ksft.print_msg(f'kcore: __x86_indirect_its_thunk_r15: 0x{x86_indirect_its_thunk_r15:x}') + +tests_passed = 0 +tests_failed = 0 +tests_unknown = 0 + +with open(vmlinux, 'rb') as f: + elffile = ELFFile(f) + text_section = elffile.get_section_by_name('.text') + + for i in range(0, len(sites_offset)): + site = retpolines_start_kcore + sites_offset[i] + vmlinux_site = retpolines_start_vmlinux + sites_offset[i] + passed = unknown = failed = False + try: + vmlinux_insn = c.get_instruction_from_vmlinux(elffile, text_section, text_section['sh_addr'], vmlinux_site) + kcore_insn = list(cap.disasm(prog.read(site, 16), site))[0] + operand = kcore_insn.op_str + insn_end = site + kcore_insn.size - 1 # TODO handle Jcc.32 __x86_indirect_thunk_\reg + safe_site = insn_end & 0x20 + site_status = "" if safe_site else "(unsafe)" + + ksft.print_msg(f"\nSite {i}: {identify_address(prog, site)} <0x{site:x}> {site_status}") + ksft.print_msg(f"\tvmlinux: 0x{vmlinux_insn.address:x}:\t{vmlinux_insn.mnemonic}\t{vmlinux_insn.op_str}") + ksft.print_msg(f"\tkcore: 0x{kcore_insn.address:x}:\t{kcore_insn.mnemonic}\t{kcore_insn.op_str}") + + if (site & 0x20) ^ (insn_end & 0x20): + ksft.print_msg(f"\tSite at safe/unsafe boundary: {str(kcore_insn.bytes)} {kcore_insn.mnemonic} {operand}") + if safe_site: + tests_passed += 1 + passed = True + ksft.print_msg(f"\tPASSED: At safe address") + continue + + if operand.startswith('0xffffffff'): + thunk = int(operand, 16) + if thunk > x86_indirect_its_thunk_r15: + insn_at_thunk = list(cap.disasm(prog.read(thunk, 16), thunk))[0] + operand += ' -> ' + insn_at_thunk.mnemonic + ' ' + insn_at_thunk.op_str + ' ' + if 'jmp' in insn_at_thunk.mnemonic and thunk & 0x20: + ksft.print_msg(f"\tPASSED: Found {operand} at safe address") + passed = True + if not passed: + if kcore_insn.operands[0].type == capstone.CS_OP_IMM: + operand += ' <' + prog.symbol(int(operand, 16)) + '>' + if '__x86_indirect_its_thunk_' in operand: + ksft.print_msg(f"\tPASSED: Found {operand}") + else: + ksft.print_msg(f"\tPASSED: Found direct branch: {kcore_insn}, ITS thunk not required.") + passed = True + else: + unknown = True + if passed: + tests_passed += 1 + elif unknown: + ksft.print_msg(f"UNKNOWN: unexpected operand: {kcore_insn}") + tests_unknown += 1 + else: + ksft.print_msg(f'\t************* FAILED *************') + ksft.print_msg(f"\tFound {kcore_insn.bytes} {kcore_insn.mnemonic} {operand}") + ksft.print_msg(f'\t**********************************') + tests_failed += 1 + except Exception as e: + ksft.print_msg(f"UNKNOWN: An unexpected error occurred: {e}") + tests_unknown += 1 + +ksft.print_msg(f"\n\nSummary:") +ksft.print_msg(f"PASS: \t{tests_passed} \t/ {total_retpoline_tests}") +ksft.print_msg(f"FAIL: \t{tests_failed} \t/ {total_retpoline_tests}") +ksft.print_msg(f"UNKNOWN: \t{tests_unknown} \t/ {total_retpoline_tests}") + +if tests_failed == 0: + ksft.test_result_pass("All ITS return thunk sites passed") +else: + ksft.test_result_fail(f"{tests_failed} ITS return thunk sites failed") +ksft.finished() diff --git a/tools/testing/selftests/x86/bugs/its_permutations.py b/tools/testing/selftests/x86/bugs/its_permutations.py new file mode 100755 index 00000000000000..3204f4728c62cc --- /dev/null +++ b/tools/testing/selftests/x86/bugs/its_permutations.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# Test for indirect target selection (ITS) cmdline permutations with other bugs +# like spectre_v2 and retbleed. + +import os, sys, subprocess, itertools, re, shutil + +test_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, test_dir + '/../../kselftest') +import ksft +import common as c + +bug = "indirect_target_selection" +mitigation = c.get_sysfs(bug) + +if not mitigation or "Not affected" in mitigation: + ksft.test_result_skip("Skipping its_permutations.py: not applicable") + ksft.finished() + +if shutil.which('vng') is None: + ksft.test_result_skip("Skipping its_permutations.py: virtme-ng ('vng') not found in PATH.") + ksft.finished() + +TEST = f"{test_dir}/its_sysfs.py" +default_kparam = ['clearcpuid=hypervisor', 'panic=5', 'panic_on_warn=1', 'oops=panic', 'nmi_watchdog=1', 'hung_task_panic=1'] + +DEBUG = " -v " + +# Install dependencies +# https://github.com/arighi/virtme-ng +# apt install virtme-ng +BOOT_CMD = f"vng --run {test_dir}/../../../../../arch/x86/boot/bzImage " +#BOOT_CMD += DEBUG + +bug = "indirect_target_selection" + +input_options = { + 'indirect_target_selection' : ['off', 'on', 'stuff', 'vmexit'], + 'retbleed' : ['off', 'stuff', 'auto'], + 'spectre_v2' : ['off', 'on', 'eibrs', 'retpoline', 'ibrs', 'eibrs,retpoline'], +} + +def pretty_print(output): + OKBLUE = '\033[94m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + + # Define patterns and their corresponding colors + patterns = { + r"^ok \d+": OKGREEN, + r"^not ok \d+": FAIL, + r"^# Testing .*": OKBLUE, + r"^# Found: .*": WARNING, + r"^# Totals: .*": BOLD, + r"pass:([1-9]\d*)": OKGREEN, + r"fail:([1-9]\d*)": FAIL, + r"skip:([1-9]\d*)": WARNING, + } + + # Apply colors based on patterns + for pattern, color in patterns.items(): + output = re.sub(pattern, lambda match: f"{color}{match.group(0)}{ENDC}", output, flags=re.MULTILINE) + + print(output) + +combinations = list(itertools.product(*input_options.values())) +ksft.print_header() +ksft.set_plan(len(combinations)) + +logs = "" + +for combination in combinations: + append = "" + log = "" + for p in default_kparam: + append += f' --append={p}' + command = BOOT_CMD + append + test_params = "" + for i, key in enumerate(input_options.keys()): + param = f'{key}={combination[i]}' + test_params += f' {param}' + command += f" --append={param}" + command += f" -- {TEST}" + test_name = f"{bug} {test_params}" + pretty_print(f'# Testing {test_name}') + t = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + t.wait() + output, _ = t.communicate() + if t.returncode == 0: + ksft.test_result_pass(test_name) + else: + ksft.test_result_fail(test_name) + output = output.decode() + log += f" {output}" + pretty_print(log) + logs += output + "\n" + +# Optionally use tappy to parse the output +# apt install python3-tappy +with open("logs.txt", "w") as f: + f.write(logs) + +ksft.finished() diff --git a/tools/testing/selftests/x86/bugs/its_ret_alignment.py b/tools/testing/selftests/x86/bugs/its_ret_alignment.py new file mode 100755 index 00000000000000..f40078d9f6ffc1 --- /dev/null +++ b/tools/testing/selftests/x86/bugs/its_ret_alignment.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# Test for indirect target selection (ITS) mitigation. +# +# Tests if the RETs are correctly patched by evaluating the +# vmlinux .return_sites in /proc/kcore. +# +# Install dependencies +# add-apt-repository ppa:michel-slm/kernel-utils +# apt update +# apt install -y python3-drgn python3-pyelftools python3-capstone +# +# Run on target machine +# mkdir -p /usr/lib/debug/lib/modules/$(uname -r) +# cp $VMLINUX /usr/lib/debug/lib/modules/$(uname -r)/vmlinux +# +# Usage: ./its_ret_alignment.py + +import os, sys, argparse +from pathlib import Path + +this_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, this_dir + '/../../kselftest') +import ksft +import common as c + +bug = "indirect_target_selection" +mitigation = c.get_sysfs(bug) +if not mitigation or "Aligned branch/return thunks" not in mitigation: + ksft.test_result_skip("Skipping its_ret_alignment.py: Aligned branch/return thunks not enabled") + ksft.finished() + +c.check_dependencies_or_skip(['drgn', 'elftools', 'capstone'], script_name="its_ret_alignment.py") + +from elftools.elf.elffile import ELFFile +from drgn.helpers.common.memory import identify_address + +cap = c.init_capstone() + +if len(os.sys.argv) > 1: + arg_vmlinux = os.sys.argv[1] + if not os.path.exists(arg_vmlinux): + ksft.test_result_fail(f"its_ret_alignment.py: vmlinux not found at user-supplied path: {arg_vmlinux}") + ksft.exit_fail() + os.makedirs(f"/usr/lib/debug/lib/modules/{os.uname().release}", exist_ok=True) + os.system(f'cp {arg_vmlinux} /usr/lib/debug/lib/modules/$(uname -r)/vmlinux') + +vmlinux = f"/usr/lib/debug/lib/modules/{os.uname().release}/vmlinux" +if not os.path.exists(vmlinux): + ksft.test_result_fail(f"its_ret_alignment.py: vmlinux not found at {vmlinux}") + ksft.exit_fail() + +ksft.print_msg(f"Using vmlinux: {vmlinux}") + +rethunks_start_vmlinux, rethunks_sec_offset, size = c.get_section_info(vmlinux, '.return_sites') +ksft.print_msg(f"vmlinux: Section .return_sites (0x{rethunks_start_vmlinux:x}) found at 0x{rethunks_sec_offset:x} with size 0x{size:x}") + +sites_offset = c.get_patch_sites(vmlinux, rethunks_sec_offset, size) +total_rethunk_tests = len(sites_offset) +ksft.print_msg(f"Found {total_rethunk_tests} rethunk sites") + +prog = c.get_runtime_kernel() +rethunks_start_kcore = prog.symbol('__return_sites').address +ksft.print_msg(f'kcore: __rethunk_sites: 0x{rethunks_start_kcore:x}') + +its_return_thunk = prog.symbol('its_return_thunk').address +ksft.print_msg(f'kcore: its_return_thunk: 0x{its_return_thunk:x}') + +tests_passed = 0 +tests_failed = 0 +tests_unknown = 0 +tests_skipped = 0 + +with open(vmlinux, 'rb') as f: + elffile = ELFFile(f) + text_section = elffile.get_section_by_name('.text') + + for i in range(len(sites_offset)): + site = rethunks_start_kcore + sites_offset[i] + vmlinux_site = rethunks_start_vmlinux + sites_offset[i] + try: + passed = unknown = failed = skipped = False + + symbol = identify_address(prog, site) + vmlinux_insn = c.get_instruction_from_vmlinux(elffile, text_section, text_section['sh_addr'], vmlinux_site) + kcore_insn = list(cap.disasm(prog.read(site, 16), site))[0] + + insn_end = site + kcore_insn.size - 1 + + safe_site = insn_end & 0x20 + site_status = "" if safe_site else "(unsafe)" + + ksft.print_msg(f"\nSite {i}: {symbol} <0x{site:x}> {site_status}") + ksft.print_msg(f"\tvmlinux: 0x{vmlinux_insn.address:x}:\t{vmlinux_insn.mnemonic}\t{vmlinux_insn.op_str}") + ksft.print_msg(f"\tkcore: 0x{kcore_insn.address:x}:\t{kcore_insn.mnemonic}\t{kcore_insn.op_str}") + + if safe_site: + tests_passed += 1 + passed = True + ksft.print_msg(f"\tPASSED: At safe address") + continue + + if "jmp" in kcore_insn.mnemonic: + passed = True + elif "ret" not in kcore_insn.mnemonic: + skipped = True + + if passed: + ksft.print_msg(f"\tPASSED: Found {kcore_insn.mnemonic} {kcore_insn.op_str}") + tests_passed += 1 + elif skipped: + ksft.print_msg(f"\tSKIPPED: Found '{kcore_insn.mnemonic}'") + tests_skipped += 1 + elif unknown: + ksft.print_msg(f"UNKNOWN: An unknown instruction: {kcore_insn}") + tests_unknown += 1 + else: + ksft.print_msg(f'\t************* FAILED *************') + ksft.print_msg(f"\tFound {kcore_insn.mnemonic} {kcore_insn.op_str}") + ksft.print_msg(f'\t**********************************') + tests_failed += 1 + except Exception as e: + ksft.print_msg(f"UNKNOWN: An unexpected error occurred: {e}") + tests_unknown += 1 + +ksft.print_msg(f"\n\nSummary:") +ksft.print_msg(f"PASSED: \t{tests_passed} \t/ {total_rethunk_tests}") +ksft.print_msg(f"FAILED: \t{tests_failed} \t/ {total_rethunk_tests}") +ksft.print_msg(f"SKIPPED: \t{tests_skipped} \t/ {total_rethunk_tests}") +ksft.print_msg(f"UNKNOWN: \t{tests_unknown} \t/ {total_rethunk_tests}") + +if tests_failed == 0: + ksft.test_result_pass("All ITS return thunk sites passed.") +else: + ksft.test_result_fail(f"{tests_failed} failed sites need ITS return thunks.") +ksft.finished() diff --git a/tools/testing/selftests/x86/bugs/its_sysfs.py b/tools/testing/selftests/x86/bugs/its_sysfs.py new file mode 100755 index 00000000000000..7bca81f2f6065b --- /dev/null +++ b/tools/testing/selftests/x86/bugs/its_sysfs.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# Test for Indirect Target Selection(ITS) mitigation sysfs status. + +import sys, os, re +this_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, this_dir + '/../../kselftest') +import ksft + +from common import * + +bug = "indirect_target_selection" +mitigation = get_sysfs(bug) + +ITS_MITIGATION_ALIGNED_THUNKS = "Mitigation: Aligned branch/return thunks" +ITS_MITIGATION_RETPOLINE_STUFF = "Mitigation: Retpolines, Stuffing RSB" +ITS_MITIGATION_VMEXIT_ONLY = "Mitigation: Vulnerable, KVM: Not affected" +ITS_MITIGATION_VULNERABLE = "Vulnerable" + +def check_mitigation(): + if mitigation == ITS_MITIGATION_ALIGNED_THUNKS: + if cmdline_has(f'{bug}=stuff') and sysfs_has("spectre_v2", "Retpolines"): + bug_check_fail(bug, ITS_MITIGATION_ALIGNED_THUNKS, ITS_MITIGATION_RETPOLINE_STUFF) + return + if cmdline_has(f'{bug}=vmexit') and cpuinfo_has('its_native_only'): + bug_check_fail(bug, ITS_MITIGATION_ALIGNED_THUNKS, ITS_MITIGATION_VMEXIT_ONLY) + return + bug_check_pass(bug, ITS_MITIGATION_ALIGNED_THUNKS) + return + + if mitigation == ITS_MITIGATION_RETPOLINE_STUFF: + if cmdline_has(f'{bug}=stuff') and sysfs_has("spectre_v2", "Retpolines"): + bug_check_pass(bug, ITS_MITIGATION_RETPOLINE_STUFF) + return + if sysfs_has('retbleed', 'Stuffing'): + bug_check_pass(bug, ITS_MITIGATION_RETPOLINE_STUFF) + return + bug_check_fail(bug, ITS_MITIGATION_RETPOLINE_STUFF, ITS_MITIGATION_ALIGNED_THUNKS) + + if mitigation == ITS_MITIGATION_VMEXIT_ONLY: + if cmdline_has(f'{bug}=vmexit') and cpuinfo_has('its_native_only'): + bug_check_pass(bug, ITS_MITIGATION_VMEXIT_ONLY) + return + bug_check_fail(bug, ITS_MITIGATION_VMEXIT_ONLY, ITS_MITIGATION_ALIGNED_THUNKS) + + if mitigation == ITS_MITIGATION_VULNERABLE: + if sysfs_has("spectre_v2", "Vulnerable"): + bug_check_pass(bug, ITS_MITIGATION_VULNERABLE) + else: + bug_check_fail(bug, "Mitigation", ITS_MITIGATION_VULNERABLE) + + bug_status_unknown(bug, mitigation) + return + +ksft.print_header() +ksft.set_plan(1) +ksft.print_msg(f'{bug}: {mitigation} ...') + +if not basic_checks_sufficient(bug, mitigation): + check_mitigation() + +ksft.finished() From 250cf3693060a5f803c5f1ddc082bb06b16112a9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 27 Apr 2025 15:41:51 -0400 Subject: [PATCH 1672/2924] __legitimize_mnt(): check for MNT_SYNC_UMOUNT should be under mount_lock ... or we risk stealing final mntput from sync umount - raising mnt_count after umount(2) has verified that victim is not busy, but before it has set MNT_SYNC_UMOUNT; in that case __legitimize_mnt() doesn't see that it's safe to quietly undo mnt_count increment and leaves dropping the reference to caller, where it'll be a full-blown mntput(). Check under mount_lock is needed; leaving the current one done before taking that makes no sense - it's nowhere near common enough to bother with. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/namespace.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 98a5cd756e9ae4..eba4748388b1d0 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -790,12 +790,8 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq) smp_mb(); // see mntput_no_expire() if (likely(!read_seqretry(&mount_lock, seq))) return 0; - if (bastard->mnt_flags & MNT_SYNC_UMOUNT) { - mnt_add_count(mnt, -1); - return 1; - } lock_mount_hash(); - if (unlikely(bastard->mnt_flags & MNT_DOOMED)) { + if (unlikely(bastard->mnt_flags & (MNT_SYNC_UMOUNT | MNT_DOOMED))) { mnt_add_count(mnt, -1); unlock_mount_hash(); return 1; From 45375814eb3f4245956c0c85092a4eee4441d167 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 8 May 2025 03:54:14 +0000 Subject: [PATCH 1673/2924] tools/net/ynl: ethtool: fix crash when Hardware Clock info is missing Fix a crash in the ethtool YNL implementation when Hardware Clock information is not present in the response. This ensures graceful handling of devices or drivers that do not provide this optional field. e.g. Traceback (most recent call last): File "/net/tools/net/ynl/pyynl/./ethtool.py", line 438, in main() ~~~~^^ File "/net/tools/net/ynl/pyynl/./ethtool.py", line 341, in main print(f'PTP Hardware Clock: {tsinfo["phc-index"]}') ~~~~~~^^^^^^^^^^^^^ KeyError: 'phc-index' Fixes: f3d07b02b2b8 ("tools: ynl: ethtool testing tool") Signed-off-by: Hangbin Liu Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250508035414.82974-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/ethtool.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/tools/net/ynl/pyynl/ethtool.py b/tools/net/ynl/pyynl/ethtool.py index af7fddd7b085bf..cab6b576c8762e 100755 --- a/tools/net/ynl/pyynl/ethtool.py +++ b/tools/net/ynl/pyynl/ethtool.py @@ -338,16 +338,24 @@ def main(): print('Capabilities:') [print(f'\t{v}') for v in bits_to_dict(tsinfo['timestamping'])] - print(f'PTP Hardware Clock: {tsinfo["phc-index"]}') + print(f'PTP Hardware Clock: {tsinfo.get("phc-index", "none")}') - print('Hardware Transmit Timestamp Modes:') - [print(f'\t{v}') for v in bits_to_dict(tsinfo['tx-types'])] + if 'tx-types' in tsinfo: + print('Hardware Transmit Timestamp Modes:') + [print(f'\t{v}') for v in bits_to_dict(tsinfo['tx-types'])] + else: + print('Hardware Transmit Timestamp Modes: none') + + if 'rx-filters' in tsinfo: + print('Hardware Receive Filter Modes:') + [print(f'\t{v}') for v in bits_to_dict(tsinfo['rx-filters'])] + else: + print('Hardware Receive Filter Modes: none') - print('Hardware Receive Filter Modes:') - [print(f'\t{v}') for v in bits_to_dict(tsinfo['rx-filters'])] + if 'stats' in tsinfo and tsinfo['stats']: + print('Statistics:') + [print(f'\t{k}: {v}') for k, v in tsinfo['stats'].items()] - print('Statistics:') - [print(f'\t{k}: {v}') for k, v in tsinfo['stats'].items()] return print(f'Settings for {args.device}:') From f11cf946c0a92c560a890d68e4775723353599e1 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Thu, 8 May 2025 13:18:32 +0800 Subject: [PATCH 1674/2924] net: mctp: Don't access ifa_index when missing In mctp_dump_addrinfo, ifa_index can be used to filter interfaces, but only when the struct ifaddrmsg is provided. Otherwise it will be comparing to uninitialised memory - reproducible in the syzkaller case from dhcpd, or busybox "ip addr show". The kernel MCTP implementation has always filtered by ifa_index, so existing userspace programs expecting to dump MCTP addresses must already be passing a valid ifa_index value (either 0 or a real index). BUG: KMSAN: uninit-value in mctp_dump_addrinfo+0x208/0xac0 net/mctp/device.c:128 mctp_dump_addrinfo+0x208/0xac0 net/mctp/device.c:128 rtnl_dump_all+0x3ec/0x5b0 net/core/rtnetlink.c:4380 rtnl_dumpit+0xd5/0x2f0 net/core/rtnetlink.c:6824 netlink_dump+0x97b/0x1690 net/netlink/af_netlink.c:2309 Fixes: 583be982d934 ("mctp: Add device handling and netlink interface") Reported-by: syzbot+e76d52dadc089b9d197f@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/68135815.050a0220.3a872c.000e.GAE@google.com/ Reported-by: syzbot+1065a199625a388fce60@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/681357d6.050a0220.14dd7d.000d.GAE@google.com/ Signed-off-by: Matt Johnston Link: https://patch.msgid.link/20250508-mctp-addr-dump-v2-1-c8a53fd2dd66@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- net/mctp/device.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/net/mctp/device.c b/net/mctp/device.c index 8e0724c56723de..7c0dcf3df31962 100644 --- a/net/mctp/device.c +++ b/net/mctp/device.c @@ -117,11 +117,18 @@ static int mctp_dump_addrinfo(struct sk_buff *skb, struct netlink_callback *cb) struct net_device *dev; struct ifaddrmsg *hdr; struct mctp_dev *mdev; - int ifindex, rc; - - hdr = nlmsg_data(cb->nlh); - // filter by ifindex if requested - ifindex = hdr->ifa_index; + int ifindex = 0, rc; + + /* Filter by ifindex if a header is provided */ + if (cb->nlh->nlmsg_len >= nlmsg_msg_size(sizeof(*hdr))) { + hdr = nlmsg_data(cb->nlh); + ifindex = hdr->ifa_index; + } else { + if (cb->strict_check) { + NL_SET_ERR_MSG(cb->extack, "mctp: Invalid header for addr dump request"); + return -EINVAL; + } + } rcu_read_lock(); for_each_netdev_dump(net, dev, mcb->ifindex) { From 97c4e094a4b2edbb4fffeda718f8e806f825a18f Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Thu, 8 May 2025 11:44:34 +0300 Subject: [PATCH 1675/2924] tests/ncdevmem: Fix double-free of queue array netdev_bind_rx takes ownership of the queue array passed as parameter and frees it, so a queue array buffer cannot be reused across multiple netdev_bind_rx calls. This commit fixes that by always passing in a newly created queue array to all netdev_bind_rx calls in ncdevmem. Fixes: 85585b4bc8d8 ("selftests: add ncdevmem, netcat for devmem TCP") Signed-off-by: Cosmin Ratiu Acked-by: Stanislav Fomichev Reviewed-by: Joe Damato Reviewed-by: Mina Almasry Link: https://patch.msgid.link/20250508084434.1933069-1-cratiu@nvidia.com Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/hw/ncdevmem.c | 55 ++++++++----------- 1 file changed, 22 insertions(+), 33 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index 2bf14ac2b8c624..9d48004ff1a178 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -431,6 +431,22 @@ static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) return 0; } +static struct netdev_queue_id *create_queues(void) +{ + struct netdev_queue_id *queues; + size_t i = 0; + + queues = calloc(num_queues, sizeof(*queues)); + for (i = 0; i < num_queues; i++) { + queues[i]._present.type = 1; + queues[i]._present.id = 1; + queues[i].type = NETDEV_QUEUE_TYPE_RX; + queues[i].id = start_queue + i; + } + + return queues; +} + int do_server(struct memory_buffer *mem) { char ctrl_data[sizeof(int) * 20000]; @@ -448,7 +464,6 @@ int do_server(struct memory_buffer *mem) char buffer[256]; int socket_fd; int client_fd; - size_t i = 0; int ret; ret = parse_address(server_ip, atoi(port), &server_sin); @@ -471,16 +486,7 @@ int do_server(struct memory_buffer *mem) sleep(1); - queues = malloc(sizeof(*queues) * num_queues); - - for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; - } - - if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) + if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) error(1, 0, "Failed to bind\n"); tmp_mem = malloc(mem->size); @@ -545,7 +551,6 @@ int do_server(struct memory_buffer *mem) goto cleanup; } - i++; for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { if (cm->cmsg_level != SOL_SOCKET || (cm->cmsg_type != SCM_DEVMEM_DMABUF && @@ -630,10 +635,8 @@ int do_server(struct memory_buffer *mem) void run_devmem_tests(void) { - struct netdev_queue_id *queues; struct memory_buffer *mem; struct ynl_sock *ys; - size_t i = 0; mem = provider->alloc(getpagesize() * NUM_PAGES); @@ -641,38 +644,24 @@ void run_devmem_tests(void) if (configure_rss()) error(1, 0, "rss error\n"); - queues = calloc(num_queues, sizeof(*queues)); - if (configure_headersplit(1)) error(1, 0, "Failed to configure header split\n"); - if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) + if (!bind_rx_queue(ifindex, mem->fd, + calloc(num_queues, sizeof(struct netdev_queue_id)), + num_queues, &ys)) error(1, 0, "Binding empty queues array should have failed\n"); - for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; - } - if (configure_headersplit(0)) error(1, 0, "Failed to configure header split\n"); - if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) + if (!bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) error(1, 0, "Configure dmabuf with header split off should have failed\n"); if (configure_headersplit(1)) error(1, 0, "Failed to configure header split\n"); - for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; - } - - if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) + if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) error(1, 0, "Failed to bind\n"); /* Deactivating a bound queue should not be legal */ From 65781e19dcfcb4aed1167d87a3ffcc2a0c071d47 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 28 Apr 2025 23:56:14 -0400 Subject: [PATCH 1676/2924] do_umount(): add missing barrier before refcount checks in sync case do_umount() analogue of the race fixed in 119e1ef80ecf "fix __legitimize_mnt()/mntput() race". Here we want to make sure that if __legitimize_mnt() doesn't notice our lock_mount_hash(), we will notice their refcount increment. Harder to hit than mntput_no_expire() one, fortunately, and consequences are milder (sync umount acting like umount -l on a rare race with RCU pathwalk hitting at just the wrong time instead of use-after-free galore mntput_no_expire() counterpart used to be hit). Still a bug... Fixes: 48a066e72d97 ("RCU'd vfsmounts") Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/namespace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index eba4748388b1d0..d8a344d0a80aab 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -787,7 +787,7 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq) return 0; mnt = real_mount(bastard); mnt_add_count(mnt, 1); - smp_mb(); // see mntput_no_expire() + smp_mb(); // see mntput_no_expire() and do_umount() if (likely(!read_seqretry(&mount_lock, seq))) return 0; lock_mount_hash(); @@ -2044,6 +2044,7 @@ static int do_umount(struct mount *mnt, int flags) umount_tree(mnt, UMOUNT_PROPAGATE); retval = 0; } else { + smp_mb(); // paired with __legitimize_mnt() shrink_submounts(mnt); retval = -EBUSY; if (!propagate_mount_busy(mnt, 2)) { From 267fc3a06a37bec30cc5b4d97fb8409102bc7a9d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 28 Apr 2025 21:43:23 -0400 Subject: [PATCH 1677/2924] do_move_mount(): don't leak MNTNS_PROPAGATING on failures as it is, a failed move_mount(2) from anon namespace breaks all further propagation into that namespace, including normal mounts in non-anon namespaces that would otherwise propagate there. Fixes: 064fe6e233e8 ("mount: handle mount propagation for detached mount trees") Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/namespace.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index d8a344d0a80aab..04a9bb9f31fad1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3715,15 +3715,14 @@ static int do_move_mount(struct path *old_path, if (err) goto out; - if (is_anon_ns(ns)) - ns->mntns_flags &= ~MNTNS_PROPAGATING; - /* if the mount is moved, it should no longer be expire * automatically */ list_del_init(&old->mnt_expire); if (attached) put_mountpoint(old_mp); out: + if (is_anon_ns(ns)) + ns->mntns_flags &= ~MNTNS_PROPAGATING; unlock_mount(mp); if (!err) { if (attached) { From d1ddc6f1d9f0cf887834eb54a5a68bbfeec1bb77 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 May 2025 15:35:51 -0400 Subject: [PATCH 1678/2924] fix IS_MNT_PROPAGATING uses propagate_mnt() does not attach anything to mounts created during propagate_mnt() itself. What's more, anything on ->mnt_slave_list of such new mount must also be new, so we don't need to even look there. When move_mount() had been introduced, we've got an additional class of mounts to skip - if we are moving from anon namespace, we do not want to propagate to mounts we are moving (i.e. all mounts in that anon namespace). Unfortunately, the part about "everything on their ->mnt_slave_list will also be ignorable" is not true - if we have propagation graph A -> B -> C and do OPEN_TREE_CLONE open_tree() of B, we get A -> [B <-> B'] -> C as propagation graph, where B' is a clone of B in our detached tree. Making B private will result in A -> B' -> C C still gets propagation from A, as it would after making B private if we hadn't done that open_tree(), but now the propagation goes through B'. Trying to move_mount() our detached tree on subdirectory in A should have * moved B' on that subdirectory in A * skipped the corresponding subdirectory in B' itself * copied B' on the corresponding subdirectory in C. As it is, the logics in propagation_next() and friends ends up skipping propagation into C, since it doesn't consider anything downstream of B'. IOW, walking the propagation graph should only skip the ->mnt_slave_list of new mounts; the only places where the check for "in that one anon namespace" are applicable are propagate_one() (where we should treat that as the same kind of thing as "mountpoint we are looking at is not visible in the mount we are looking at") and propagation_would_overmount(). The latter is better dealt with in the caller (can_move_mount_beneath()); on the first call of propagation_would_overmount() the test is always false, on the second it is always true in "move from anon namespace" case and always false in "move within our namespace" one, so it's easier to just use check_mnt() before bothering with the second call and be done with that. Fixes: 064fe6e233e8 ("mount: handle mount propagation for detached mount trees") Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/namespace.c | 3 ++- fs/pnode.c | 17 +++++++++-------- fs/pnode.h | 2 +- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 04a9bb9f31fad1..1b466c54a357d1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3557,7 +3557,8 @@ static int can_move_mount_beneath(const struct path *from, * @mnt_from itself. This defeats the whole purpose of mounting * @mnt_from beneath @mnt_to. */ - if (propagation_would_overmount(parent_mnt_to, mnt_from, mp)) + if (check_mnt(mnt_from) && + propagation_would_overmount(parent_mnt_to, mnt_from, mp)) return -EINVAL; return 0; diff --git a/fs/pnode.c b/fs/pnode.c index 7a062a5de10e36..fb77427df39e2e 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -150,7 +150,7 @@ static struct mount *propagation_next(struct mount *m, struct mount *origin) { /* are there any slaves of this mount? */ - if (!IS_MNT_PROPAGATED(m) && !list_empty(&m->mnt_slave_list)) + if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list)) return first_slave(m); while (1) { @@ -174,7 +174,7 @@ static struct mount *skip_propagation_subtree(struct mount *m, * Advance m such that propagation_next will not return * the slaves of m. */ - if (!IS_MNT_PROPAGATED(m) && !list_empty(&m->mnt_slave_list)) + if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list)) m = last_slave(m); return m; @@ -185,7 +185,7 @@ static struct mount *next_group(struct mount *m, struct mount *origin) while (1) { while (1) { struct mount *next; - if (!IS_MNT_PROPAGATED(m) && !list_empty(&m->mnt_slave_list)) + if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list)) return first_slave(m); next = next_peer(m); if (m->mnt_group_id == origin->mnt_group_id) { @@ -226,11 +226,15 @@ static int propagate_one(struct mount *m, struct mountpoint *dest_mp) struct mount *child; int type; /* skip ones added by this propagate_mnt() */ - if (IS_MNT_PROPAGATED(m)) + if (IS_MNT_NEW(m)) return 0; - /* skip if mountpoint isn't covered by it */ + /* skip if mountpoint isn't visible in m */ if (!is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) return 0; + /* skip if m is in the anon_ns we are emptying */ + if (m->mnt_ns->mntns_flags & MNTNS_PROPAGATING) + return 0; + if (peers(m, last_dest)) { type = CL_MAKE_SHARED; } else { @@ -380,9 +384,6 @@ bool propagation_would_overmount(const struct mount *from, if (!IS_MNT_SHARED(from)) return false; - if (IS_MNT_PROPAGATED(to)) - return false; - if (to->mnt.mnt_root != mp->m_dentry) return false; diff --git a/fs/pnode.h b/fs/pnode.h index ddafe0d087ca0a..34b6247af01d92 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -12,7 +12,7 @@ #define IS_MNT_SHARED(m) ((m)->mnt.mnt_flags & MNT_SHARED) #define IS_MNT_SLAVE(m) ((m)->mnt_master) -#define IS_MNT_PROPAGATED(m) (!(m)->mnt_ns || ((m)->mnt_ns->mntns_flags & MNTNS_PROPAGATING)) +#define IS_MNT_NEW(m) (!(m)->mnt_ns) #define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED) #define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE) #define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED) From e4f349bd6e58051df698b82f94721f18a02a293d Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Thu, 8 May 2025 14:16:00 +0930 Subject: [PATCH 1679/2924] net: mctp: Ensure keys maintain only one ref to corresponding dev mctp_flow_prepare_output() is called in mctp_route_output(), which places outbound packets onto a given interface. The packet may represent a message fragment, in which case we provoke an unbalanced reference count to the underlying device. This causes trouble if we ever attempt to remove the interface: [ 48.702195] usb 1-1: USB disconnect, device number 2 [ 58.883056] unregister_netdevice: waiting for mctpusb0 to become free. Usage count = 2 [ 69.022548] unregister_netdevice: waiting for mctpusb0 to become free. Usage count = 2 [ 79.172568] unregister_netdevice: waiting for mctpusb0 to become free. Usage count = 2 ... Predicate the invocation of mctp_dev_set_key() in mctp_flow_prepare_output() on not already having associated the device with the key. It's not yet realistic to uphold the property that the key maintains only one device reference earlier in the transmission sequence as the route (and therefore the device) may not be known at the time the key is associated with the socket. Fixes: 67737c457281 ("mctp: Pass flow data & flow release events to drivers") Acked-by: Jeremy Kerr Signed-off-by: Andrew Jeffery Link: https://patch.msgid.link/20250508-mctp-dev-refcount-v1-1-d4f965c67bb5@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- net/mctp/route.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mctp/route.c b/net/mctp/route.c index 4c460160914f01..d9c8e5a5f9ce9a 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -313,8 +313,10 @@ static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev) key = flow->key; - if (WARN_ON(key->dev && key->dev != dev)) + if (key->dev) { + WARN_ON(key->dev != dev); return; + } mctp_dev_set_key(dev, key); } From 9dda18a32b4a6693fccd3f7c0738af646147b1cf Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 10 Apr 2025 05:22:21 -0700 Subject: [PATCH 1680/2924] tracing: fprobe: Fix RCU warning message in list traversal When CONFIG_PROVE_RCU_LIST is enabled, fprobe triggers the following warning: WARNING: suspicious RCU usage kernel/trace/fprobe.c:457 RCU-list traversed in non-reader section!! other info that might help us debug this: #1: ffffffff863c4e08 (fprobe_mutex){+.+.}-{4:4}, at: fprobe_module_callback+0x7b/0x8c0 Call Trace: fprobe_module_callback notifier_call_chain blocking_notifier_call_chain This warning occurs because fprobe_remove_node_in_module() traverses an RCU list using RCU primitives without holding an RCU read lock. However, the function is only called from fprobe_module_callback(), which holds the fprobe_mutex lock that provides sufficient protection for safely traversing the list. Fix the warning by specifying the locking design to the CONFIG_PROVE_RCU_LIST mechanism. Add the lockdep_is_held() argument to hlist_for_each_entry_rcu() to inform the RCU checker that fprobe_mutex provides the required protection. Link: https://lore.kernel.org/all/20250410-fprobe-v1-1-068ef5f41436@debian.org/ Fixes: a3dc2983ca7b90 ("tracing: fprobe: Cleanup fprobe hash when module unloading") Signed-off-by: Breno Leitao Tested-by: Antonio Quartulli Tested-by: Matthieu Baerts (NGI0) Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/fprobe.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index 95c6e3473a76b5..ba7ff14f5339b5 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -454,7 +454,8 @@ static void fprobe_remove_node_in_module(struct module *mod, struct hlist_head * struct fprobe_hlist_node *node; int ret = 0; - hlist_for_each_entry_rcu(node, head, hlist) { + hlist_for_each_entry_rcu(node, head, hlist, + lockdep_is_held(&fprobe_mutex)) { if (!within_module(node->addr, mod)) continue; if (delete_fprobe_node(node)) From e41b5af4519f90f9a751805ede2102ae36caf5d0 Mon Sep 17 00:00:00 2001 From: Paul Cacheux Date: Sun, 4 May 2025 20:27:52 +0200 Subject: [PATCH 1681/2924] tracing: add missing trace_probe_log_clear for eprobes Make sure trace_probe_log_clear is called in the tracing eprobe code path, matching the trace_probe_log_init call. Link: https://lore.kernel.org/all/20250504-fix-trace-probe-log-race-v3-1-9e99fec7eddc@gmail.com/ Signed-off-by: Paul Cacheux Acked-by: Steven Rostedt (Google) Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace_eprobe.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index c08355c3ef32b4..916555f0de811f 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -969,10 +969,13 @@ static int __trace_eprobe_create(int argc, const char *argv[]) goto error; } } + trace_probe_log_clear(); return ret; + parse_error: ret = -EINVAL; error: + trace_probe_log_clear(); trace_event_probe_cleanup(ep); return ret; } From e98960bc4df931a2f4fa88f7e55370699ca4dd82 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 6 Mar 2025 12:50:21 +0100 Subject: [PATCH 1682/2924] Input: hisi_powerkey - enable system-wakeup for s2idle To wake up the system from s2idle when pressing the power-button, let's convert from using pm_wakeup_event() to pm_wakeup_dev_event(), as it allows us to specify the "hard" in-parameter, which needs to be set for s2idle. Signed-off-by: Ulf Hansson Link: https://lore.kernel.org/r/20250306115021.797426-1-ulf.hansson@linaro.org Signed-off-by: Dmitry Torokhov --- drivers/input/misc/hisi_powerkey.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/misc/hisi_powerkey.c b/drivers/input/misc/hisi_powerkey.c index d3c293a95d322e..d315017324d93c 100644 --- a/drivers/input/misc/hisi_powerkey.c +++ b/drivers/input/misc/hisi_powerkey.c @@ -30,7 +30,7 @@ static irqreturn_t hi65xx_power_press_isr(int irq, void *q) { struct input_dev *input = q; - pm_wakeup_event(input->dev.parent, MAX_HELD_TIME); + pm_wakeup_dev_event(input->dev.parent, MAX_HELD_TIME, true); input_report_key(input, KEY_POWER, 1); input_sync(input); From d05a424bea9aa3435009d5c462055008cc1545d8 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Fri, 28 Mar 2025 16:43:36 -0700 Subject: [PATCH 1683/2924] Input: xpad - fix two controller table values Two controllers -- Mad Catz JOYTECH NEO SE Advanced and PDP Mirror's Edge Official -- were missing the value of the mapping field, and thus wouldn't detect properly. Signed-off-by: Vicki Pfau Link: https://lore.kernel.org/r/20250328234345.989761-1-vi@endrift.com Fixes: 540602a43ae5 ("Input: xpad - add a few new VID/PID combinations") Fixes: 3492321e2e60 ("Input: xpad - add multiple supported devices") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index c933e47173bd1e..2c935cf79ddcc9 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -205,7 +205,7 @@ static const struct xpad_device { { 0x0738, 0x9871, "Mad Catz Portable Drum", 0, XTYPE_XBOX360 }, { 0x0738, 0xb726, "Mad Catz Xbox controller - MW2", 0, XTYPE_XBOX360 }, { 0x0738, 0xb738, "Mad Catz MVC2TE Stick 2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, - { 0x0738, 0xbeef, "Mad Catz JOYTECH NEO SE Advanced GamePad", XTYPE_XBOX360 }, + { 0x0738, 0xbeef, "Mad Catz JOYTECH NEO SE Advanced GamePad", 0, XTYPE_XBOX360 }, { 0x0738, 0xcb02, "Saitek Cyborg Rumble Pad - PC/Xbox 360", 0, XTYPE_XBOX360 }, { 0x0738, 0xcb03, "Saitek P3200 Rumble Pad - PC/Xbox 360", 0, XTYPE_XBOX360 }, { 0x0738, 0xcb29, "Saitek Aviator Stick AV8R02", 0, XTYPE_XBOX360 }, @@ -240,7 +240,7 @@ static const struct xpad_device { { 0x0e6f, 0x0146, "Rock Candy Wired Controller for Xbox One", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0147, "PDP Marvel Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x015c, "PDP Xbox One Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, - { 0x0e6f, 0x015d, "PDP Mirror's Edge Official Wired Controller for Xbox One", XTYPE_XBOXONE }, + { 0x0e6f, 0x015d, "PDP Mirror's Edge Official Wired Controller for Xbox One", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0161, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0162, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0163, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, From 4ef46367073b107ec22f46fe5f12176e87c238e8 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Sat, 10 May 2025 22:59:25 -0700 Subject: [PATCH 1684/2924] Input: xpad - fix Share button on Xbox One controllers The Share button, if present, is always one of two offsets from the end of the file, depending on the presence of a specific interface. As we lack parsing for the identify packet we can't automatically determine the presence of that interface, but we can hardcode which of these offsets is correct for a given controller. More controllers are probably fixable by adding the MAP_SHARE_BUTTON in the future, but for now I only added the ones that I have the ability to test directly. Signed-off-by: Vicki Pfau Link: https://lore.kernel.org/r/20250328234345.989761-2-vi@endrift.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 2c935cf79ddcc9..8ee7d8e5d1c733 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -77,12 +77,13 @@ * xbox d-pads should map to buttons, as is required for DDR pads * but we map them to axes when possible to simplify things */ -#define MAP_DPAD_TO_BUTTONS (1 << 0) -#define MAP_TRIGGERS_TO_BUTTONS (1 << 1) -#define MAP_STICKS_TO_NULL (1 << 2) -#define MAP_SELECT_BUTTON (1 << 3) -#define MAP_PADDLES (1 << 4) -#define MAP_PROFILE_BUTTON (1 << 5) +#define MAP_DPAD_TO_BUTTONS BIT(0) +#define MAP_TRIGGERS_TO_BUTTONS BIT(1) +#define MAP_STICKS_TO_NULL BIT(2) +#define MAP_SHARE_BUTTON BIT(3) +#define MAP_PADDLES BIT(4) +#define MAP_PROFILE_BUTTON BIT(5) +#define MAP_SHARE_OFFSET BIT(6) #define DANCEPAD_MAP_CONFIG (MAP_DPAD_TO_BUTTONS | \ MAP_TRIGGERS_TO_BUTTONS | MAP_STICKS_TO_NULL) @@ -135,7 +136,7 @@ static const struct xpad_device { { 0x03f0, 0x048D, "HyperX Clutch", 0, XTYPE_XBOX360 }, /* wireless */ { 0x03f0, 0x0495, "HyperX Clutch Gladiate", 0, XTYPE_XBOXONE }, { 0x03f0, 0x07A0, "HyperX Clutch Gladiate RGB", 0, XTYPE_XBOXONE }, - { 0x03f0, 0x08B6, "HyperX Clutch Gladiate", 0, XTYPE_XBOXONE }, /* v2 */ + { 0x03f0, 0x08B6, "HyperX Clutch Gladiate", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, /* v2 */ { 0x03f0, 0x09B4, "HyperX Clutch Tanto", 0, XTYPE_XBOXONE }, { 0x044f, 0x0f00, "Thrustmaster Wheel", 0, XTYPE_XBOX }, { 0x044f, 0x0f03, "Thrustmaster Wheel", 0, XTYPE_XBOX }, @@ -159,7 +160,7 @@ static const struct xpad_device { { 0x045e, 0x0719, "Xbox 360 Wireless Receiver", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W }, { 0x045e, 0x0b00, "Microsoft X-Box One Elite 2 pad", MAP_PADDLES, XTYPE_XBOXONE }, { 0x045e, 0x0b0a, "Microsoft X-Box Adaptive Controller", MAP_PROFILE_BUTTON, XTYPE_XBOXONE }, - { 0x045e, 0x0b12, "Microsoft Xbox Series S|X Controller", MAP_SELECT_BUTTON, XTYPE_XBOXONE }, + { 0x045e, 0x0b12, "Microsoft Xbox Series S|X Controller", MAP_SHARE_BUTTON | MAP_SHARE_OFFSET, XTYPE_XBOXONE }, { 0x046d, 0xc21d, "Logitech Gamepad F310", 0, XTYPE_XBOX360 }, { 0x046d, 0xc21e, "Logitech Gamepad F510", 0, XTYPE_XBOX360 }, { 0x046d, 0xc21f, "Logitech Gamepad F710", 0, XTYPE_XBOX360 }, @@ -211,7 +212,7 @@ static const struct xpad_device { { 0x0738, 0xcb29, "Saitek Aviator Stick AV8R02", 0, XTYPE_XBOX360 }, { 0x0738, 0xf738, "Super SFIV FightStick TE S", 0, XTYPE_XBOX360 }, { 0x07ff, 0xffff, "Mad Catz GamePad", 0, XTYPE_XBOX360 }, - { 0x0b05, 0x1a38, "ASUS ROG RAIKIRI", 0, XTYPE_XBOXONE }, + { 0x0b05, 0x1a38, "ASUS ROG RAIKIRI", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x0b05, 0x1abb, "ASUS ROG RAIKIRI PRO", 0, XTYPE_XBOXONE }, { 0x0c12, 0x0005, "Intec wireless", 0, XTYPE_XBOX }, { 0x0c12, 0x8801, "Nyko Xbox Controller", 0, XTYPE_XBOX }, @@ -391,7 +392,7 @@ static const struct xpad_device { { 0x2dc8, 0x6001, "8BitDo SN30 Pro", 0, XTYPE_XBOX360 }, { 0x2e24, 0x0652, "Hyperkin Duke X-Box One pad", 0, XTYPE_XBOXONE }, { 0x2e24, 0x1688, "Hyperkin X91 X-Box One pad", 0, XTYPE_XBOXONE }, - { 0x2e95, 0x0504, "SCUF Gaming Controller", MAP_SELECT_BUTTON, XTYPE_XBOXONE }, + { 0x2e95, 0x0504, "SCUF Gaming Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x31e3, 0x1100, "Wooting One", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1200, "Wooting Two", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1210, "Wooting Lekker", 0, XTYPE_XBOX360 }, @@ -1028,7 +1029,7 @@ static void xpad360w_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned cha * The report format was gleaned from * https://github.com/kylelemons/xbox/blob/master/xbox.go */ -static void xpadone_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data) +static void xpadone_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data, u32 len) { struct input_dev *dev = xpad->dev; bool do_sync = false; @@ -1069,8 +1070,12 @@ static void xpadone_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char /* menu/view buttons */ input_report_key(dev, BTN_START, data[4] & BIT(2)); input_report_key(dev, BTN_SELECT, data[4] & BIT(3)); - if (xpad->mapping & MAP_SELECT_BUTTON) - input_report_key(dev, KEY_RECORD, data[22] & BIT(0)); + if (xpad->mapping & MAP_SHARE_BUTTON) { + if (xpad->mapping & MAP_SHARE_OFFSET) + input_report_key(dev, KEY_RECORD, data[len - 26] & BIT(0)); + else + input_report_key(dev, KEY_RECORD, data[len - 18] & BIT(0)); + } /* buttons A,B,X,Y */ input_report_key(dev, BTN_A, data[4] & BIT(4)); @@ -1218,7 +1223,7 @@ static void xpad_irq_in(struct urb *urb) xpad360w_process_packet(xpad, 0, xpad->idata); break; case XTYPE_XBOXONE: - xpadone_process_packet(xpad, 0, xpad->idata); + xpadone_process_packet(xpad, 0, xpad->idata, urb->actual_length); break; default: xpad_process_packet(xpad, 0, xpad->idata); @@ -1945,7 +1950,7 @@ static int xpad_init_input(struct usb_xpad *xpad) xpad->xtype == XTYPE_XBOXONE) { for (i = 0; xpad360_btn[i] >= 0; i++) input_set_capability(input_dev, EV_KEY, xpad360_btn[i]); - if (xpad->mapping & MAP_SELECT_BUTTON) + if (xpad->mapping & MAP_SHARE_BUTTON) input_set_capability(input_dev, EV_KEY, KEY_RECORD); } else { for (i = 0; xpad_btn[i] >= 0; i++) From 7026d23cb383120712f3a214b0b33ca349cd21a0 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Sat, 10 May 2025 23:00:10 -0700 Subject: [PATCH 1685/2924] Input: xpad - add support for several more controllers This adds support for several new controllers, all of which include Share buttons: - HORI Drum controller - PowerA Fusion Pro 4 - 8BitDo Ultimate 3-mode Controller - Hyperkin DuchesS Xbox One controller - PowerA MOGA XP-Ultra controller Signed-off-by: Vicki Pfau Link: https://lore.kernel.org/r/20250328234345.989761-4-vi@endrift.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 8ee7d8e5d1c733..418553cadc031c 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -282,6 +282,7 @@ static const struct xpad_device { { 0x0f0d, 0x00dc, "HORIPAD FPS for Nintendo Switch", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0f0d, 0x0151, "Hori Racing Wheel Overdrive for Xbox Series X", 0, XTYPE_XBOXONE }, { 0x0f0d, 0x0152, "Hori Racing Wheel Overdrive for Xbox Series X", 0, XTYPE_XBOXONE }, + { 0x0f0d, 0x01b2, "HORI Taiko No Tatsujin Drum Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x0f30, 0x010b, "Philips Recoil", 0, XTYPE_XBOX }, { 0x0f30, 0x0202, "Joytech Advanced Controller", 0, XTYPE_XBOX }, { 0x0f30, 0x8888, "BigBen XBMiniPad Controller", 0, XTYPE_XBOX }, @@ -354,6 +355,8 @@ static const struct xpad_device { { 0x20d6, 0x2001, "BDA Xbox Series X Wired Controller", 0, XTYPE_XBOXONE }, { 0x20d6, 0x2009, "PowerA Enhanced Wired Controller for Xbox Series X|S", 0, XTYPE_XBOXONE }, { 0x20d6, 0x281f, "PowerA Wired Controller For Xbox 360", 0, XTYPE_XBOX360 }, + { 0x20d6, 0x400b, "PowerA FUSION Pro 4 Wired Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, + { 0x20d6, 0x890b, "PowerA MOGA XP-Ultra Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x2345, 0xe00b, "Machenike G5 Pro Controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5000, "Razer Atrox Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x24c6, 0x5300, "PowerA MINI PROEX Controller", 0, XTYPE_XBOX360 }, @@ -385,11 +388,13 @@ static const struct xpad_device { { 0x294b, 0x3404, "Snakebyte GAMEPAD RGB X", 0, XTYPE_XBOXONE }, { 0x2993, 0x2001, "TECNO Pocket Go", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x2000, "8BitDo Pro 2 Wired Controller fox Xbox", 0, XTYPE_XBOXONE }, + { 0x2dc8, 0x200f, "8BitDo Ultimate 3-mode Controller for Xbox", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x2dc8, 0x3106, "8BitDo Ultimate Wireless / Pro 2 Wired Controller", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x3109, "8BitDo Ultimate Wireless Bluetooth", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x310a, "8BitDo Ultimate 2C Wireless Controller", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x310b, "8BitDo Ultimate 2 Wireless Controller", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x6001, "8BitDo SN30 Pro", 0, XTYPE_XBOX360 }, + { 0x2e24, 0x0423, "Hyperkin DuchesS Xbox One pad", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x2e24, 0x0652, "Hyperkin Duke X-Box One pad", 0, XTYPE_XBOXONE }, { 0x2e24, 0x1688, "Hyperkin X91 X-Box One pad", 0, XTYPE_XBOXONE }, { 0x2e95, 0x0504, "SCUF Gaming Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, @@ -716,8 +721,10 @@ static const struct xboxone_init_packet xboxone_init_packets[] = { XBOXONE_INIT_PKT(0x045e, 0x0b00, xboxone_s_init), XBOXONE_INIT_PKT(0x045e, 0x0b00, extra_input_packet_init), XBOXONE_INIT_PKT(0x0e6f, 0x0000, xboxone_pdp_led_on), + XBOXONE_INIT_PKT(0x0f0d, 0x01b2, xboxone_pdp_led_on), XBOXONE_INIT_PKT(0x20d6, 0xa01a, xboxone_pdp_led_on), XBOXONE_INIT_PKT(0x0e6f, 0x0000, xboxone_pdp_auth), + XBOXONE_INIT_PKT(0x0f0d, 0x01b2, xboxone_pdp_auth), XBOXONE_INIT_PKT(0x20d6, 0xa01a, xboxone_pdp_auth), XBOXONE_INIT_PKT(0x24c6, 0x541a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x542a, xboxone_rumblebegin_init), From 93406e9d024058b3bf487656ddd0ac552e5a366e Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Sat, 10 May 2025 23:06:34 -0700 Subject: [PATCH 1686/2924] Input: xpad - fix xpad_device sorting A recent commit put one entry in the wrong place. This just moves it to the right place. Signed-off-by: Vicki Pfau Link: https://lore.kernel.org/r/20250328234345.989761-5-vi@endrift.com Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 418553cadc031c..57a5ff3d1992c5 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -141,9 +141,9 @@ static const struct xpad_device { { 0x044f, 0x0f00, "Thrustmaster Wheel", 0, XTYPE_XBOX }, { 0x044f, 0x0f03, "Thrustmaster Wheel", 0, XTYPE_XBOX }, { 0x044f, 0x0f07, "Thrustmaster, Inc. Controller", 0, XTYPE_XBOX }, - { 0x044f, 0xd01e, "ThrustMaster, Inc. ESWAP X 2 ELDEN RING EDITION", 0, XTYPE_XBOXONE }, { 0x044f, 0x0f10, "Thrustmaster Modena GT Wheel", 0, XTYPE_XBOX }, { 0x044f, 0xb326, "Thrustmaster Gamepad GP XID", 0, XTYPE_XBOX360 }, + { 0x044f, 0xd01e, "ThrustMaster, Inc. ESWAP X 2 ELDEN RING EDITION", 0, XTYPE_XBOXONE }, { 0x045e, 0x0202, "Microsoft X-Box pad v1 (US)", 0, XTYPE_XBOX }, { 0x045e, 0x0285, "Microsoft X-Box pad (Japan)", 0, XTYPE_XBOX }, { 0x045e, 0x0287, "Microsoft Xbox Controller S", 0, XTYPE_XBOX }, From f7387eff4bad33d12719c66c43541c095556ae4e Mon Sep 17 00:00:00 2001 From: Seongman Lee Date: Sun, 11 May 2025 18:23:28 +0900 Subject: [PATCH 1687/2924] x86/sev: Fix operator precedence in GHCB_MSR_VMPL_REQ_LEVEL macro The GHCB_MSR_VMPL_REQ_LEVEL macro lacked parentheses around the bitmask expression, causing the shift operation to bind too early. As a result, when requesting VMPL1 (e.g., GHCB_MSR_VMPL_REQ_LEVEL(1)), incorrect values such as 0x000000016 were generated instead of the intended 0x100000016 (the requested VMPL level is specified in GHCBData[39:32]). Fix the precedence issue by grouping the masked value before applying the shift. [ bp: Massage commit message. ] Fixes: 34ff65901735 ("x86/sev: Use kernel provided SVSM Calling Areas") Signed-off-by: Seongman Lee Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/20250511092329.12680-1-cloudlee1719@gmail.com --- arch/x86/include/asm/sev-common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index acb85b9346d84b..0020d77a080001 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -116,7 +116,7 @@ enum psc_op { #define GHCB_MSR_VMPL_REQ 0x016 #define GHCB_MSR_VMPL_REQ_LEVEL(v) \ /* GHCBData[39:32] */ \ - (((u64)(v) & GENMASK_ULL(7, 0) << 32) | \ + ((((u64)(v) & GENMASK_ULL(7, 0)) << 32) | \ /* GHCBDdata[11:0] */ \ GHCB_MSR_VMPL_REQ) From ff7b190aef6cccdb6f14d20c5753081fe6420e0b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 11 May 2025 15:45:27 +0200 Subject: [PATCH 1688/2924] ALSA: seq: Fix delivery of UMP events to group ports When an event with UMP message is sent to a UMP client, the EP port receives always no matter where the event is sent to, as it's a catch-all port. OTOH, if an event is sent to EP port, and if the event has a certain UMP Group, it should have been delivered to the associated UMP Group port, too, but this was ignored, so far. This patch addresses the behavior. Now a UMP event sent to the Endpoint port will be delivered to the subscribers of the UMP group port the event is associated with. The patch also does a bit of refactoring to simplify the code about __deliver_to_subscribers(). Fixes: 177ccf811df4 ("ALSA: seq: Support MIDI 2.0 UMP Endpoint port") Link: https://patch.msgid.link/20250511134528.6314-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/seq/seq_clientmgr.c | 52 ++++++++++++++++++++------------ sound/core/seq/seq_ump_convert.c | 18 +++++++++++ sound/core/seq/seq_ump_convert.h | 1 + 3 files changed, 52 insertions(+), 19 deletions(-) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 198c598a539398..880240924bfd05 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -732,15 +732,21 @@ static int snd_seq_deliver_single_event(struct snd_seq_client *client, */ static int __deliver_to_subscribers(struct snd_seq_client *client, struct snd_seq_event *event, - struct snd_seq_client_port *src_port, - int atomic, int hop) + int port, int atomic, int hop) { + struct snd_seq_client_port *src_port; struct snd_seq_subscribers *subs; int err, result = 0, num_ev = 0; union __snd_seq_event event_saved; size_t saved_size; struct snd_seq_port_subs_info *grp; + if (port < 0) + return 0; + src_port = snd_seq_port_use_ptr(client, port); + if (!src_port) + return 0; + /* save original event record */ saved_size = snd_seq_event_packet_size(event); memcpy(&event_saved, event, saved_size); @@ -775,6 +781,7 @@ static int __deliver_to_subscribers(struct snd_seq_client *client, read_unlock(&grp->list_lock); else up_read(&grp->list_mutex); + snd_seq_port_unlock(src_port); memcpy(event, &event_saved, saved_size); return (result < 0) ? result : num_ev; } @@ -783,25 +790,32 @@ static int deliver_to_subscribers(struct snd_seq_client *client, struct snd_seq_event *event, int atomic, int hop) { - struct snd_seq_client_port *src_port; - int ret = 0, ret2; - - src_port = snd_seq_port_use_ptr(client, event->source.port); - if (src_port) { - ret = __deliver_to_subscribers(client, event, src_port, atomic, hop); - snd_seq_port_unlock(src_port); - } - - if (client->ump_endpoint_port < 0 || - event->source.port == client->ump_endpoint_port) - return ret; + int ret; +#if IS_ENABLED(CONFIG_SND_SEQ_UMP) + int ret2; +#endif - src_port = snd_seq_port_use_ptr(client, client->ump_endpoint_port); - if (!src_port) + ret = __deliver_to_subscribers(client, event, + event->source.port, atomic, hop); +#if IS_ENABLED(CONFIG_SND_SEQ_UMP) + if (!snd_seq_client_is_ump(client) || client->ump_endpoint_port < 0) return ret; - ret2 = __deliver_to_subscribers(client, event, src_port, atomic, hop); - snd_seq_port_unlock(src_port); - return ret2 < 0 ? ret2 : ret; + /* If it's an event from EP port (and with a UMP group), + * deliver to subscribers of the corresponding UMP group port, too. + * Or, if it's from non-EP port, deliver to subscribers of EP port, too. + */ + if (event->source.port == client->ump_endpoint_port) + ret2 = __deliver_to_subscribers(client, event, + snd_seq_ump_group_port(event), + atomic, hop); + else + ret2 = __deliver_to_subscribers(client, event, + client->ump_endpoint_port, + atomic, hop); + if (ret2 < 0) + return ret2; +#endif + return ret; } /* deliver an event to the destination port(s). diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c index ff7e558b4d51d0..db2f169cae11ea 100644 --- a/sound/core/seq/seq_ump_convert.c +++ b/sound/core/seq/seq_ump_convert.c @@ -1285,3 +1285,21 @@ int snd_seq_deliver_to_ump(struct snd_seq_client *source, else return cvt_to_ump_midi1(dest, dest_port, event, atomic, hop); } + +/* return the UMP group-port number of the event; + * return -1 if groupless or non-UMP event + */ +int snd_seq_ump_group_port(const struct snd_seq_event *event) +{ + const struct snd_seq_ump_event *ump_ev = + (const struct snd_seq_ump_event *)event; + unsigned char type; + + if (!snd_seq_ev_is_ump(event)) + return -1; + type = ump_message_type(ump_ev->ump[0]); + if (ump_is_groupless_msg(type)) + return -1; + /* group-port number starts from 1 */ + return ump_message_group(ump_ev->ump[0]) + 1; +} diff --git a/sound/core/seq/seq_ump_convert.h b/sound/core/seq/seq_ump_convert.h index 6c146d8032804f..4abf0a7637d701 100644 --- a/sound/core/seq/seq_ump_convert.h +++ b/sound/core/seq/seq_ump_convert.h @@ -18,5 +18,6 @@ int snd_seq_deliver_to_ump(struct snd_seq_client *source, struct snd_seq_client_port *dest_port, struct snd_seq_event *event, int atomic, int hop); +int snd_seq_ump_group_port(const struct snd_seq_event *event); #endif /* __SEQ_UMP_CONVERT_H */ From 1f93d877f09d987f08baedd50597aaaa72a37be4 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 8 May 2025 21:12:07 +0300 Subject: [PATCH 1689/2924] ALSA/hda: intel-sdw-acpi: Correct sdw_intel_acpi_scan() function parameter The acpi_handle should be just a handle and not a pointer in sdw_intel_acpi_scan() parameter list. It is called with 'acpi_handle handle' as parameter and it is passing it to acpi_walk_namespace, which also expects acpi_handle and not acpi_handle* Signed-off-by: Peter Ujfalusi Reviewed-by: Bard Liao Reviewed-by: Liam Girdwood Reviewed-by: Ranjani Sridharan Link: https://patch.msgid.link/20250508181207.22113-1-peter.ujfalusi@linux.intel.com Signed-off-by: Takashi Iwai --- include/linux/soundwire/sdw_intel.h | 2 +- sound/hda/intel-sdw-acpi.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 493d9de4e47241..dc6ebaee3d18b2 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -365,7 +365,7 @@ struct sdw_intel_res { * on e.g. which machine driver to select (I2S mode, HDaudio or * SoundWire). */ -int sdw_intel_acpi_scan(acpi_handle *parent_handle, +int sdw_intel_acpi_scan(acpi_handle parent_handle, struct sdw_intel_acpi_info *info); void sdw_intel_process_wakeen_event(struct sdw_intel_ctx *ctx); diff --git a/sound/hda/intel-sdw-acpi.c b/sound/hda/intel-sdw-acpi.c index 8686adaf453103..d3511135f7d39f 100644 --- a/sound/hda/intel-sdw-acpi.c +++ b/sound/hda/intel-sdw-acpi.c @@ -177,7 +177,7 @@ static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level, * sdw_intel_startup() is required for creation of devices and bus * startup */ -int sdw_intel_acpi_scan(acpi_handle *parent_handle, +int sdw_intel_acpi_scan(acpi_handle parent_handle, struct sdw_intel_acpi_info *info) { acpi_status status; From dd33993a9721ab1dae38bd37c9f665987d554239 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 11 May 2025 16:11:45 +0200 Subject: [PATCH 1690/2924] ALSA: ump: Fix a typo of snd_ump_stream_msg_device_info s/devince/device/ It's used only internally, so no any behavior changes. Fixes: 37e0e14128e0 ("ALSA: ump: Support UMP Endpoint and Function Block parsing") Acked-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20250511141147.10246-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- drivers/usb/gadget/function/f_midi2.c | 2 +- include/sound/ump_msg.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/f_midi2.c b/drivers/usb/gadget/function/f_midi2.c index 12e866fb311d63..0a800ba53816a8 100644 --- a/drivers/usb/gadget/function/f_midi2.c +++ b/drivers/usb/gadget/function/f_midi2.c @@ -475,7 +475,7 @@ static void reply_ump_stream_ep_info(struct f_midi2_ep *ep) /* reply a UMP EP device info */ static void reply_ump_stream_ep_device(struct f_midi2_ep *ep) { - struct snd_ump_stream_msg_devince_info rep = { + struct snd_ump_stream_msg_device_info rep = { .type = UMP_MSG_TYPE_STREAM, .status = UMP_STREAM_MSG_STATUS_DEVICE_INFO, .manufacture_id = ep->info.manufacturer, diff --git a/include/sound/ump_msg.h b/include/sound/ump_msg.h index 72f60ddfea7535..9556b4755a1ed8 100644 --- a/include/sound/ump_msg.h +++ b/include/sound/ump_msg.h @@ -604,7 +604,7 @@ struct snd_ump_stream_msg_ep_info { } __packed; /* UMP Stream Message: Device Info Notification (128bit) */ -struct snd_ump_stream_msg_devince_info { +struct snd_ump_stream_msg_device_info { #ifdef __BIG_ENDIAN_BITFIELD /* 0 */ u32 type:4; @@ -754,7 +754,7 @@ struct snd_ump_stream_msg_fb_name { union snd_ump_stream_msg { struct snd_ump_stream_msg_ep_discovery ep_discovery; struct snd_ump_stream_msg_ep_info ep_info; - struct snd_ump_stream_msg_devince_info device_info; + struct snd_ump_stream_msg_device_info device_info; struct snd_ump_stream_msg_stream_cfg stream_cfg; struct snd_ump_stream_msg_fb_discovery fb_discovery; struct snd_ump_stream_msg_fb_info fb_info; From 82f2b0b97b36ee3fcddf0f0780a9a0825d52fec3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 11 May 2025 14:54:11 -0700 Subject: [PATCH 1691/2924] Linux 6.15-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b29cc321ffd9c6..64c514f4bc1939 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Baby Opossum Posse # *DOCUMENTATION* From a833a693a490ecff8ba377654c6d4d333718b6b1 Mon Sep 17 00:00:00 2001 From: Wupeng Ma Date: Thu, 10 Apr 2025 14:26:33 +0800 Subject: [PATCH 1692/2924] mm: hugetlb: fix incorrect fallback for subpool During our testing with hugetlb subpool enabled, we observe that hstate->resv_huge_pages may underflow into negative values. Root cause analysis reveals a race condition in subpool reservation fallback handling as follow: hugetlb_reserve_pages() /* Attempt subpool reservation */ gbl_reserve = hugepage_subpool_get_pages(spool, chg); /* Global reservation may fail after subpool allocation */ if (hugetlb_acct_memory(h, gbl_reserve) < 0) goto out_put_pages; out_put_pages: /* This incorrectly restores reservation to subpool */ hugepage_subpool_put_pages(spool, chg); When hugetlb_acct_memory() fails after subpool allocation, the current implementation over-commits subpool reservations by returning the full 'chg' value instead of the actual allocated 'gbl_reserve' amount. This discrepancy propagates to global reservations during subsequent releases, eventually causing resv_huge_pages underflow. This problem can be trigger easily with the following steps: 1. reverse hugepage for hugeltb allocation 2. mount hugetlbfs with min_size to enable hugetlb subpool 3. alloc hugepages with two task(make sure the second will fail due to insufficient amount of hugepages) 4. with for a few seconds and repeat step 3 which will make hstate->resv_huge_pages to go below zero. To fix this problem, return corrent amount of pages to subpool during the fallback after hugepage_subpool_get_pages is called. Link: https://lkml.kernel.org/r/20250410062633.3102457-1-mawupeng1@huawei.com Fixes: 1c5ecae3a93f ("hugetlbfs: add minimum size accounting to subpools") Signed-off-by: Wupeng Ma Tested-by: Joshua Hahn Reviewed-by: Oscar Salvador Cc: David Hildenbrand Cc: Ma Wupeng Cc: Muchun Song Cc: Signed-off-by: Andrew Morton --- mm/hugetlb.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 6ea1be71aa429e..7ae38bfb9096c4 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3010,7 +3010,7 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, struct hugepage_subpool *spool = subpool_vma(vma); struct hstate *h = hstate_vma(vma); struct folio *folio; - long retval, gbl_chg; + long retval, gbl_chg, gbl_reserve; map_chg_state map_chg; int ret, idx; struct hugetlb_cgroup *h_cg = NULL; @@ -3163,8 +3163,16 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, hugetlb_cgroup_uncharge_cgroup_rsvd(idx, pages_per_huge_page(h), h_cg); out_subpool_put: - if (map_chg) - hugepage_subpool_put_pages(spool, 1); + /* + * put page to subpool iff the quota of subpool's rsv_hpages is used + * during hugepage_subpool_get_pages. + */ + if (map_chg && !gbl_chg) { + gbl_reserve = hugepage_subpool_put_pages(spool, 1); + hugetlb_acct_memory(h, -gbl_reserve); + } + + out_end_reservation: if (map_chg != MAP_CHG_ENFORCED) vma_end_reservation(h, vma, addr); @@ -7239,7 +7247,7 @@ bool hugetlb_reserve_pages(struct inode *inode, struct vm_area_struct *vma, vm_flags_t vm_flags) { - long chg = -1, add = -1; + long chg = -1, add = -1, spool_resv, gbl_resv; struct hstate *h = hstate_inode(inode); struct hugepage_subpool *spool = subpool_inode(inode); struct resv_map *resv_map; @@ -7374,8 +7382,16 @@ bool hugetlb_reserve_pages(struct inode *inode, return true; out_put_pages: - /* put back original number of pages, chg */ - (void)hugepage_subpool_put_pages(spool, chg); + spool_resv = chg - gbl_reserve; + if (spool_resv) { + /* put sub pool's reservation back, chg - gbl_reserve */ + gbl_resv = hugepage_subpool_put_pages(spool, spool_resv); + /* + * subpool's reserved pages can not be put back due to race, + * return to hstate. + */ + hugetlb_acct_memory(h, -gbl_resv); + } out_uncharge_cgroup: hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h), chg * pages_per_huge_page(h), h_cg); From e9f180d7cfde23b9f8eebd60272465176373ab2c Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 22 Apr 2025 16:49:42 +0200 Subject: [PATCH 1693/2924] kernel/fork: only call untrack_pfn_clear() on VMAs duplicated for fork() Not intuitive, but vm_area_dup() located in kernel/fork.c is not only used for duplicating VMAs during fork(), but also for duplicating VMAs when splitting VMAs or when mremap()'ing them. VM_PFNMAP mappings can at least get ordinarily mremap()'ed (no change in size) and apparently also shrunk during mremap(), which implies duplicating the VMA in __split_vma() first. In case of ordinary mremap() (no change in size), we first duplicate the VMA in copy_vma_and_data()->copy_vma() to then call untrack_pfn_clear() on the old VMA: we effectively move the VM_PAT reservation. So the untrack_pfn_clear() call on the new VMA duplicating is wrong in that context. Splitting of VMAs seems problematic, because we don't duplicate/adjust the reservation when splitting the VMA. Instead, in memtype_erase() -- called during zapping/munmap -- we shrink a reservation in case only the end address matches: Assume we split a VMA into A and B, both would share a reservation until B is unmapped. So when unmapping B, the reservation would be updated to cover only A. When unmapping A, we would properly remove the now-shrunk reservation. That scenario describes the mremap() shrinking (old_size > new_size), where we split + unmap B, and the untrack_pfn_clear() on the new VMA when is wrong. What if we manage to split a VM_PFNMAP VMA into A and B and unmap A first? It would be broken because we would never free the reservation. Likely, there are ways to trigger such a VMA split outside of mremap(). Affecting other VMA duplication was not intended, vm_area_dup() being used outside of kernel/fork.c was an oversight. So let's fix that for; how to handle VMA splits better should be investigated separately. With a simple reproducer that uses mprotect() to split such a VMA I can trigger x86/PAT: pat_mremap:26448 freeing invalid memtype [mem 0x00000000-0x00000fff] Link: https://lkml.kernel.org/r/20250422144942.2871395-1-david@redhat.com Fixes: dc84bc2aba85 ("x86/mm/pat: Fix VM_PAT handling when fork() fails in copy_page_range()") Signed-off-by: David Hildenbrand Reviewed-by: Lorenzo Stoakes Cc: Ingo Molnar Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Borislav Petkov Cc: Rik van Riel Cc: "H. Peter Anvin" Cc: Linus Torvalds Signed-off-by: Andrew Morton --- kernel/fork.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index c4b26cd8998b8e..168681fc4b25a9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -498,10 +498,6 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) vma_numab_state_init(new); dup_anon_vma_name(orig, new); - /* track_pfn_copy() will later take care of copying internal state. */ - if (unlikely(new->vm_flags & VM_PFNMAP)) - untrack_pfn_clear(new); - return new; } @@ -672,6 +668,11 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, tmp = vm_area_dup(mpnt); if (!tmp) goto fail_nomem; + + /* track_pfn_copy() will later take care of copying internal state. */ + if (unlikely(tmp->vm_flags & VM_PFNMAP)) + untrack_pfn_clear(tmp); + retval = vma_dup_policy(mpnt, tmp); if (retval) goto fail_nomem_policy; From 4b7c0857f87a4c56e6e0a774939c2504b7afdc00 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Fri, 25 Apr 2025 15:43:25 +0800 Subject: [PATCH 1694/2924] mm/memory: fix mapcount / refcount sanity check for mTHP reuse The following WARNING was triggered during swap stress test with mTHP enabled: [ 6609.335758] ------------[ cut here ]------------ [ 6609.337758] WARNING: CPU: 82 PID: 755116 at mm/memory.c:3794 do_wp_page+0x1084/0x10e0 [ 6609.340922] Modules linked in: zram virtiofs [ 6609.342699] CPU: 82 UID: 0 PID: 755116 Comm: sh Kdump: loaded Not tainted 6.15.0-rc1+ #1429 PREEMPT(voluntary) [ 6609.347620] Hardware name: Red Hat KVM/RHEL-AV, BIOS 0.0.0 02/06/2015 [ 6609.349909] RIP: 0010:do_wp_page+0x1084/0x10e0 [ 6609.351532] Code: ff ff 48 c7 c6 80 ba 49 82 4c 89 ef e8 95 fd fe ff 0f 0b bd f5 ff ff ff e9 43 fb ff ff 41 83 a9 bc 12 00 00 01 e9 5c fb ff ff <0f> 0b e9 a6 fc ff ff 65 ff 00 f0 48 0f b a 6d 00 1f 0f 83 82 fc ff [ 6609.357959] RSP: 0000:ffffc90002273d40 EFLAGS: 00010287 [ 6609.359915] RAX: 000000000000000f RBX: 0000000000000000 RCX: 000fffffffe00000 [ 6609.362606] RDX: 0000000000000010 RSI: 000055a119ac1000 RDI: ffffea000ae6ec00 [ 6609.365143] RBP: ffffea000ae6ec68 R08: 84000002b9bb1025 R09: 000055a119ab6000 [ 6609.367569] R10: ffff8881caa2ad80 R11: 0000000000000000 R12: ffff8881caa2ad80 [ 6609.370255] R13: ffffea000ae6ec00 R14: 000055a119ac1c9c R15: ffffc90002273dd8 [ 6609.373007] FS: 00007f08e467f740(0000) GS:ffff88a07c214000(0000) knlGS:0000000000000000 [ 6609.375999] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 6609.377946] CR2: 000055a119ac1c9c CR3: 00000001adfd6005 CR4: 0000000000770eb0 [ 6609.380376] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 6609.382853] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 6609.385216] PKRU: 55555554 [ 6609.386141] Call Trace: [ 6609.387017] [ 6609.387718] ? ___pte_offset_map+0x1b/0x110 [ 6609.389056] __handle_mm_fault+0xa51/0xf00 [ 6609.390363] ? exc_page_fault+0x6a/0x140 [ 6609.391629] handle_mm_fault+0x13d/0x360 [ 6609.392856] do_user_addr_fault+0x2f2/0x7f0 [ 6609.394160] ? sigprocmask+0x77/0xa0 [ 6609.395375] exc_page_fault+0x6a/0x140 [ 6609.396735] asm_exc_page_fault+0x26/0x30 [ 6609.398224] RIP: 0033:0x55a1050bc18b [ 6609.399567] Code: 8b 3f 4d 85 ff 74 40 41 39 5f 18 75 f2 49 8b 7f 08 44 38 27 75 e9 4c 89 c6 4c 89 45 c8 e8 bd 83 fa ff 4c 8b 45 c8 85 c0 75 d5 <41> 83 47 1c 01 48 83 c4 28 4c 89 f8 5b 4 1 5c 41 5d 41 5e 41 5f 5d [ 6609.405971] RSP: 002b:00007ffcf5f37d90 EFLAGS: 00010246 [ 6609.407737] RAX: 0000000000000000 RBX: 00000000182768fa RCX: 0000000000000000 [ 6609.410151] RDX: 00000000000000fa RSI: 000055a105175c7b RDI: 000055a119ac1c60 [ 6609.412606] RBP: 00007ffcf5f37de0 R08: 000055a105175c7b R09: 0000000000000000 [ 6609.414998] R10: 000000004d2dfb5a R11: 0000000000000246 R12: 0000000000000050 [ 6609.417193] R13: 00000000000000fa R14: 000055a119abaf60 R15: 000055a119ac1c80 [ 6609.419268] [ 6609.419928] ---[ end trace 0000000000000000 ]--- The WARN_ON here is simply incorrect. The refcount here must be at least the mapcount, not the opposite. Each mapcount must have a corresponding refcount, but the refcount may increase if other components grab the folio, which is acceptable. Meanwhile, having a mapcount larger than refcount is a real problem. So fix the WARN_ON condition. Link: https://lkml.kernel.org/r/20250425074325.61833-1-ryncsn@gmail.com Fixes: 1da190f4d0a6 ("mm: Copy-on-Write (COW) reuse support for PTE-mapped THP") Signed-off-by: Kairui Song Reported-by: Kairui Song Closes: https://lore.kernel.org/all/CAMgjq7D+ea3eg9gRCVvRnto3Sv3_H3WVhupX4e=k8T5QAfBHbw@mail.gmail.com/ Suggested-by: David Hildenbrand Acked-by: David Hildenbrand Reviewed-by: Anshuman Khandual Reviewed-by: Oscar Salvador Signed-off-by: Andrew Morton --- mm/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index ba3ea0a82f7f77..49199410805cd0 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3751,7 +3751,7 @@ static bool __wp_can_reuse_large_anon_folio(struct folio *folio, /* Stabilize the mapcount vs. refcount and recheck. */ folio_lock_large_mapcount(folio); - VM_WARN_ON_ONCE(folio_large_mapcount(folio) < folio_ref_count(folio)); + VM_WARN_ON_ONCE_FOLIO(folio_large_mapcount(folio) > folio_ref_count(folio), folio); if (folio_test_large_maybe_mapped_shared(folio)) goto unlock; From 0ae0227fa31dda5bfc6b5a0145952d46fe57408b Mon Sep 17 00:00:00 2001 From: David Wang <00107082@163.com> Date: Tue, 6 May 2025 03:30:34 +0800 Subject: [PATCH 1695/2924] mm/codetag: move tag retrieval back upfront in __free_pages() Commit 51ff4d7486f0 ("mm: avoid extra mem_alloc_profiling_enabled() checks") introduces a possible use-after-free scenario, when page is non-compound, page[0] could be released by other thread right after put_page_testzero failed in current thread, pgalloc_tag_sub_pages afterwards would manipulate an invalid page for accounting remaining pages: [timeline] [thread1] [thread2] | alloc_page non-compound V | get_page, rf counter inc V | in ___free_pages | put_page_testzero fails V | put_page, page released V | in ___free_pages, | pgalloc_tag_sub_pages | manipulate an invalid page V Restore __free_pages() to its state before, retrieve alloc tag beforehand. Link: https://lkml.kernel.org/r/20250505193034.91682-1-00107082@163.com Fixes: 51ff4d7486f0 ("mm: avoid extra mem_alloc_profiling_enabled() checks") Signed-off-by: David Wang <00107082@163.com> Acked-by: Suren Baghdasaryan Reviewed-by: Vlastimil Babka Cc: Brendan Jackman Cc: Johannes Weiner Cc: Michal Hocko Cc: Shakeel Butt Cc: Vlastimil Babka Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/pgalloc_tag.h | 8 ++++++++ mm/page_alloc.c | 15 ++++++--------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index c740779778304a..8a7f4f802c5748 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -188,6 +188,13 @@ static inline struct alloc_tag *__pgalloc_tag_get(struct page *page) return tag; } +static inline struct alloc_tag *pgalloc_tag_get(struct page *page) +{ + if (mem_alloc_profiling_enabled()) + return __pgalloc_tag_get(page); + return NULL; +} + void pgalloc_tag_split(struct folio *folio, int old_order, int new_order); void pgalloc_tag_swap(struct folio *new, struct folio *old); @@ -199,6 +206,7 @@ static inline void clear_page_tag_ref(struct page *page) {} static inline void alloc_tag_sec_init(void) {} static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) {} static inline void pgalloc_tag_swap(struct folio *new, struct folio *old) {} +static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; } #endif /* CONFIG_MEM_ALLOC_PROFILING */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5669baf2a6fea7..1b00e14a97803f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1151,14 +1151,9 @@ static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) __pgalloc_tag_sub(page, nr); } -static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr) +/* When tag is not NULL, assuming mem_alloc_profiling_enabled */ +static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) { - struct alloc_tag *tag; - - if (!mem_alloc_profiling_enabled()) - return; - - tag = __pgalloc_tag_get(page); if (tag) this_cpu_sub(tag->counters->bytes, PAGE_SIZE * nr); } @@ -1168,7 +1163,7 @@ static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr) static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, unsigned int nr) {} static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {} -static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr) {} +static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {} #endif /* CONFIG_MEM_ALLOC_PROFILING */ @@ -5065,11 +5060,13 @@ static void ___free_pages(struct page *page, unsigned int order, { /* get PageHead before we drop reference */ int head = PageHead(page); + /* get alloc tag in case the page is released by others */ + struct alloc_tag *tag = pgalloc_tag_get(page); if (put_page_testzero(page)) __free_frozen_pages(page, order, fpi_flags); else if (!head) { - pgalloc_tag_sub_pages(page, (1 << order) - 1); + pgalloc_tag_sub_pages(tag, (1 << order) - 1); while (order-- > 0) __free_frozen_pages(page + (1 << order), order, fpi_flags); From d55582d6c947a195ed48a1893ba23d2f077bf224 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Tue, 6 May 2025 18:36:01 +0100 Subject: [PATCH 1696/2924] MAINTAINERS: add mm GUP section As part of the ongoing efforts to sub-divide memory management maintainership and reviewership, establish a section for GUP (Get User Pages) support and add appropriate maintainers and reviewers. Link: https://lkml.kernel.org/r/20250506173601.97562-1-lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: John Hubbard Acked-by: David Hildenbrand Cc: Jason Gunthorpe Cc: Peter Xu Signed-off-by: Andrew Morton --- MAINTAINERS | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index f21f1dabb5fe1d..88102bb64dfec5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15542,6 +15542,18 @@ S: Maintained F: include/linux/execmem.h F: mm/execmem.c +MEMORY MANAGEMENT - GUP (GET USER PAGES) +M: Andrew Morton +M: David Hildenbrand +R: Jason Gunthorpe +R: John Hubbard +R: Peter Xu +L: linux-mm@kvack.org +S: Maintained +W: http://www.linux-mm.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm +F: mm/gup.c + MEMORY MANAGEMENT - NUMA MEMBLOCKS AND NUMA EMULATION M: Andrew Morton M: Mike Rapoport From 23fa022a07555a9cd2dcfe827c769a89c4c2e21e Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 6 May 2025 14:25:08 +0300 Subject: [PATCH 1697/2924] mm/page_alloc: ensure try_alloc_pages() plays well with unaccepted memory try_alloc_pages() will not attempt to allocate memory if the system has *any* unaccepted memory. Memory is accepted as needed and can remain in the system indefinitely, causing the interface to always fail. Rather than immediately giving up, attempt to use already accepted memory on free lists. Pass 'alloc_flags' to cond_accept_memory() and do not accept new memory for ALLOC_TRYLOCK requests. Found via code inspection - only BPF uses this at present and the runtime effects are unclear. Link: https://lkml.kernel.org/r/20250506112509.905147-2-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Fixes: 97769a53f117 ("mm, bpf: Introduce try_alloc_pages() for opportunistic page allocation") Cc: Alexei Starovoitov Cc: Vlastimil Babka Cc: Suren Baghdasaryan Cc: Michal Hocko Cc: Brendan Jackman Cc: Johannes Weiner Cc: Signed-off-by: Andrew Morton --- mm/page_alloc.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1b00e14a97803f..7248e300d36efb 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -290,7 +290,8 @@ EXPORT_SYMBOL(nr_online_nodes); #endif static bool page_contains_unaccepted(struct page *page, unsigned int order); -static bool cond_accept_memory(struct zone *zone, unsigned int order); +static bool cond_accept_memory(struct zone *zone, unsigned int order, + int alloc_flags); static bool __free_unaccepted(struct page *page); int page_group_by_mobility_disabled __read_mostly; @@ -3611,7 +3612,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, } } - cond_accept_memory(zone, order); + cond_accept_memory(zone, order, alloc_flags); /* * Detect whether the number of free pages is below high @@ -3638,7 +3639,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, gfp_mask)) { int ret; - if (cond_accept_memory(zone, order)) + if (cond_accept_memory(zone, order, alloc_flags)) goto try_this_zone; /* @@ -3691,7 +3692,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, return page; } else { - if (cond_accept_memory(zone, order)) + if (cond_accept_memory(zone, order, alloc_flags)) goto try_this_zone; /* Try again if zone has deferred pages */ @@ -4844,7 +4845,7 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid, goto failed; } - cond_accept_memory(zone, 0); + cond_accept_memory(zone, 0, alloc_flags); retry_this_zone: mark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK) + nr_pages; if (zone_watermark_fast(zone, 0, mark, @@ -4853,7 +4854,7 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid, break; } - if (cond_accept_memory(zone, 0)) + if (cond_accept_memory(zone, 0, alloc_flags)) goto retry_this_zone; /* Try again if zone has deferred pages */ @@ -7281,7 +7282,8 @@ static inline bool has_unaccepted_memory(void) return static_branch_unlikely(&zones_with_unaccepted_pages); } -static bool cond_accept_memory(struct zone *zone, unsigned int order) +static bool cond_accept_memory(struct zone *zone, unsigned int order, + int alloc_flags) { long to_accept, wmark; bool ret = false; @@ -7292,6 +7294,10 @@ static bool cond_accept_memory(struct zone *zone, unsigned int order) if (list_empty(&zone->unaccepted_pages)) return false; + /* Bailout, since try_to_accept_memory_one() needs to take a lock */ + if (alloc_flags & ALLOC_TRYLOCK) + return false; + wmark = promo_wmark_pages(zone); /* @@ -7348,7 +7354,8 @@ static bool page_contains_unaccepted(struct page *page, unsigned int order) return false; } -static bool cond_accept_memory(struct zone *zone, unsigned int order) +static bool cond_accept_memory(struct zone *zone, unsigned int order, + int alloc_flags) { return false; } @@ -7419,11 +7426,6 @@ struct page *try_alloc_pages_noprof(int nid, unsigned int order) if (!pcp_allowed_order(order)) return NULL; -#ifdef CONFIG_UNACCEPTED_MEMORY - /* Bailout, since try_to_accept_memory_one() needs to take a lock */ - if (has_unaccepted_memory()) - return NULL; -#endif /* Bailout, since _deferred_grow_zone() needs to take a lock */ if (deferred_pages_enabled()) return NULL; From fefc075182275057ce607effaa3daa9e6e3bdc73 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 6 May 2025 16:32:07 +0300 Subject: [PATCH 1698/2924] mm/page_alloc: fix race condition in unaccepted memory handling The page allocator tracks the number of zones that have unaccepted memory using static_branch_enc/dec() and uses that static branch in hot paths to determine if it needs to deal with unaccepted memory. Borislav and Thomas pointed out that the tracking is racy: operations on static_branch are not serialized against adding/removing unaccepted pages to/from the zone. Sanity checks inside static_branch machinery detects it: WARNING: CPU: 0 PID: 10 at kernel/jump_label.c:276 __static_key_slow_dec_cpuslocked+0x8e/0xa0 The comment around the WARN() explains the problem: /* * Warn about the '-1' case though; since that means a * decrement is concurrent with a first (0->1) increment. IOW * people are trying to disable something that wasn't yet fully * enabled. This suggests an ordering problem on the user side. */ The effect of this static_branch optimization is only visible on microbenchmark. Instead of adding more complexity around it, remove it altogether. Link: https://lkml.kernel.org/r/20250506133207.1009676-1-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Fixes: dcdfdd40fa82 ("mm: Add support for unaccepted memory") Link: https://lore.kernel.org/all/20250506092445.GBaBnVXXyvnazly6iF@fat_crate.local Reported-by: Borislav Petkov Tested-by: Borislav Petkov (AMD) Reported-by: Thomas Gleixner Cc: Vlastimil Babka Cc: Suren Baghdasaryan Cc: Michal Hocko Cc: Brendan Jackman Cc: Johannes Weiner Cc: [6.5+] Signed-off-by: Andrew Morton --- mm/internal.h | 1 - mm/mm_init.c | 1 - mm/page_alloc.c | 47 ----------------------------------------------- 3 files changed, 49 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index 25a29872c634ba..5c7a2b43ad7620 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1590,7 +1590,6 @@ unsigned long move_page_tables(struct pagetable_move_control *pmc); #ifdef CONFIG_UNACCEPTED_MEMORY void accept_page(struct page *page); -void unaccepted_cleanup_work(struct work_struct *work); #else /* CONFIG_UNACCEPTED_MEMORY */ static inline void accept_page(struct page *page) { diff --git a/mm/mm_init.c b/mm/mm_init.c index 327764ca0ee4e2..eedce9321e13dc 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1441,7 +1441,6 @@ static void __meminit zone_init_free_lists(struct zone *zone) #ifdef CONFIG_UNACCEPTED_MEMORY INIT_LIST_HEAD(&zone->unaccepted_pages); - INIT_WORK(&zone->unaccepted_cleanup, unaccepted_cleanup_work); #endif } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7248e300d36efb..8258349e49acb7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7172,16 +7172,8 @@ bool has_managed_dma(void) #ifdef CONFIG_UNACCEPTED_MEMORY -/* Counts number of zones with unaccepted pages. */ -static DEFINE_STATIC_KEY_FALSE(zones_with_unaccepted_pages); - static bool lazy_accept = true; -void unaccepted_cleanup_work(struct work_struct *work) -{ - static_branch_dec(&zones_with_unaccepted_pages); -} - static int __init accept_memory_parse(char *p) { if (!strcmp(p, "lazy")) { @@ -7206,11 +7198,7 @@ static bool page_contains_unaccepted(struct page *page, unsigned int order) static void __accept_page(struct zone *zone, unsigned long *flags, struct page *page) { - bool last; - list_del(&page->lru); - last = list_empty(&zone->unaccepted_pages); - account_freepages(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE); __mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES); __ClearPageUnaccepted(page); @@ -7219,28 +7207,6 @@ static void __accept_page(struct zone *zone, unsigned long *flags, accept_memory(page_to_phys(page), PAGE_SIZE << MAX_PAGE_ORDER); __free_pages_ok(page, MAX_PAGE_ORDER, FPI_TO_TAIL); - - if (last) { - /* - * There are two corner cases: - * - * - If allocation occurs during the CPU bring up, - * static_branch_dec() cannot be used directly as - * it causes a deadlock on cpu_hotplug_lock. - * - * Instead, use schedule_work() to prevent deadlock. - * - * - If allocation occurs before workqueues are initialized, - * static_branch_dec() should be called directly. - * - * Workqueues are initialized before CPU bring up, so this - * will not conflict with the first scenario. - */ - if (system_wq) - schedule_work(&zone->unaccepted_cleanup); - else - unaccepted_cleanup_work(&zone->unaccepted_cleanup); - } } void accept_page(struct page *page) @@ -7277,20 +7243,12 @@ static bool try_to_accept_memory_one(struct zone *zone) return true; } -static inline bool has_unaccepted_memory(void) -{ - return static_branch_unlikely(&zones_with_unaccepted_pages); -} - static bool cond_accept_memory(struct zone *zone, unsigned int order, int alloc_flags) { long to_accept, wmark; bool ret = false; - if (!has_unaccepted_memory()) - return false; - if (list_empty(&zone->unaccepted_pages)) return false; @@ -7328,22 +7286,17 @@ static bool __free_unaccepted(struct page *page) { struct zone *zone = page_zone(page); unsigned long flags; - bool first = false; if (!lazy_accept) return false; spin_lock_irqsave(&zone->lock, flags); - first = list_empty(&zone->unaccepted_pages); list_add_tail(&page->lru, &zone->unaccepted_pages); account_freepages(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE); __mod_zone_page_state(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES); __SetPageUnaccepted(page); spin_unlock_irqrestore(&zone->lock, flags); - if (first) - static_branch_inc(&zones_with_unaccepted_pages); - return true; } From 02f5bf89f0b0a50f821425932a3590eeb9f193ac Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 7 May 2025 14:42:24 +0900 Subject: [PATCH 1699/2924] zsmalloc: don't underflow size calculation in zs_obj_write() Do not mix class->size and object size during offsets/sizes calculation in zs_obj_write(). Size classes can merge into clusters, based on objects-per-zspage and pages-per-zspage characteristics, so some size classes can store objects smaller than class->size. This becomes problematic when object size is much smaller than class->size. zsmalloc can falsely decide that object spans two physical pages, because a larger class->size value is used for that check, while the actual object is much smaller and fits the free space of the first physical page, so there is nothing to write to the second page and memcpy() size calculation underflows. Unable to handle kernel paging request at virtual address ffffc00081ff4000 pc : __memcpy+0x10/0x24 lr : zs_obj_write+0x1b0/0x1d0 [zsmalloc] Call trace: __memcpy+0x10/0x24 (P) zram_write_page+0x150/0x4fc [zram] zram_submit_bio+0x5e0/0x6a4 [zram] __submit_bio+0x168/0x220 submit_bio_noacct_nocheck+0x128/0x2c8 submit_bio_noacct+0x19c/0x2f8 This is mostly seen on system with larger page-sizes, because size class cluters of such systems hold wider size ranges than on 4K PAGE_SIZE systems. Assume a 16K PAGE_SIZE system, a write of 820 bytes object to a 864-bytes size class at offset 15560. 15560 + 864 is more than 16384 so zsmalloc attempts to memcpy() it to two physical pages. However, 16384 - 15560 = 824 which is more than 820, so the object in fact doesn't span two physical pages, and there is no data to write to the second physical page. We always know the exact size in bytes of the object that we are about to write (store), so use it instead of class->size. Link: https://lkml.kernel.org/r/20250507054312.4135983-1-senozhatsky@chromium.org Fixes: 44f76413496e ("zsmalloc: introduce new object mapping API") Signed-off-by: Sergey Senozhatsky Reported-by: Igor Belousov Tested-by: Igor Belousov Acked-by: Johannes Weiner Cc: Minchan Kim Signed-off-by: Andrew Morton --- mm/zsmalloc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 961b270f023c23..d14a7e317ac8bf 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1243,19 +1243,19 @@ void zs_obj_write(struct zs_pool *pool, unsigned long handle, class = zspage_class(pool, zspage); off = offset_in_page(class->size * obj_idx); - if (off + class->size <= PAGE_SIZE) { + if (!ZsHugePage(zspage)) + off += ZS_HANDLE_SIZE; + + if (off + mem_len <= PAGE_SIZE) { /* this object is contained entirely within a page */ void *dst = kmap_local_zpdesc(zpdesc); - if (!ZsHugePage(zspage)) - off += ZS_HANDLE_SIZE; memcpy(dst + off, handle_mem, mem_len); kunmap_local(dst); } else { /* this object spans two pages */ size_t sizes[2]; - off += ZS_HANDLE_SIZE; sizes[0] = PAGE_SIZE - off; sizes[1] = mem_len - sizes[0]; From 75cb1cca2c880179a11c7dd9380b6f14e41a06a4 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Fri, 9 May 2025 10:09:12 +1200 Subject: [PATCH 1700/2924] mm: userfaultfd: correct dirty flags set for both present and swap pte MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As David pointed out, what truly matters for mremap and userfaultfd move operations is the soft dirty bit. The current comment and implementation—which always sets the dirty bit for present PTEs and fails to set the soft dirty bit for swap PTEs—are incorrect. This could break features like Checkpoint-Restore in Userspace (CRIU). This patch updates the behavior to correctly set the soft dirty bit for both present and swap PTEs in accordance with mremap. Link: https://lkml.kernel.org/r/20250508220912.7275-1-21cnbao@gmail.com Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI") Signed-off-by: Barry Song Reported-by: David Hildenbrand Closes: https://lore.kernel.org/linux-mm/02f14ee1-923f-47e3-a994-4950afb9afcc@redhat.com/ Acked-by: Peter Xu Reviewed-by: Suren Baghdasaryan Cc: Lokesh Gidra Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton --- mm/userfaultfd.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 7d5d709cc838ed..e0db855c89b41a 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1064,8 +1064,13 @@ static int move_present_pte(struct mm_struct *mm, src_folio->index = linear_page_index(dst_vma, dst_addr); orig_dst_pte = mk_pte(&src_folio->page, dst_vma->vm_page_prot); - /* Follow mremap() behavior and treat the entry dirty after the move */ - orig_dst_pte = pte_mkwrite(pte_mkdirty(orig_dst_pte), dst_vma); + /* Set soft dirty bit so userspace can notice the pte was moved */ +#ifdef CONFIG_MEM_SOFT_DIRTY + orig_dst_pte = pte_mksoft_dirty(orig_dst_pte); +#endif + if (pte_dirty(orig_src_pte)) + orig_dst_pte = pte_mkdirty(orig_dst_pte); + orig_dst_pte = pte_mkwrite(orig_dst_pte, dst_vma); set_pte_at(mm, dst_addr, dst_pte, orig_dst_pte); out: @@ -1100,6 +1105,9 @@ static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma, } orig_src_pte = ptep_get_and_clear(mm, src_addr, src_pte); +#ifdef CONFIG_MEM_SOFT_DIRTY + orig_src_pte = pte_swp_mksoft_dirty(orig_src_pte); +#endif set_pte_at(mm, dst_addr, dst_pte, orig_src_pte); double_pt_unlock(dst_ptl, src_ptl); From 9520a2b3f0b5e182f73410e45b9b92ea51d9b828 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 7 Apr 2025 23:08:44 +0000 Subject: [PATCH 1701/2924] kbuild: Require pahole v1.29 with GENDWARFKSYMS on X86 With CONFIG_GENDWARFKSYMS, __gendwarfksyms_ptr variables are added to the kernel in EXPORT_SYMBOL() to ensure DWARF type information is available for exported symbols in the TUs where they're actually exported. These symbols are dropped when linking vmlinux, but dangling references to them remain in DWARF. With CONFIG_DEBUG_INFO_BTF enabled on X86, pahole versions after commit 47dcb534e253 ("btf_encoder: Stop indexing symbols for VARs") and before commit 9810758003ce ("btf_encoder: Verify 0 address DWARF variables are in ELF section") place these symbols in the .data..percpu section, which results in an "Invalid offset" error in btf_datasec_check_meta() during boot, as all the variables are at zero offset and have non-zero size. If CONFIG_DEBUG_INFO_BTF_MODULES is enabled, this also results in a failure to load modules with: failed to validate module [$module] BTF: -22 As the issue occurs in pahole v1.28 and the fix was merged after v1.29 was released, require pahole v1.29 when GENDWARFKSYMS is enabled with DEBUG_INFO_BTF on X86. Reported-by: Paolo Pisati Signed-off-by: Sami Tolvanen Signed-off-by: Masahiro Yamada --- kernel/module/Kconfig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig index d7762ef5949a2c..39278737bb68fd 100644 --- a/kernel/module/Kconfig +++ b/kernel/module/Kconfig @@ -192,6 +192,11 @@ config GENDWARFKSYMS depends on !DEBUG_INFO_REDUCED && !DEBUG_INFO_SPLIT # Requires ELF object files. depends on !LTO + # To avoid conflicts with the discarded __gendwarfksyms_ptr symbols on + # X86, requires pahole before commit 47dcb534e253 ("btf_encoder: Stop + # indexing symbols for VARs") or after commit 9810758003ce ("btf_encoder: + # Verify 0 address DWARF variables are in ELF section"). + depends on !X86 || !DEBUG_INFO_BTF || PAHOLE_VERSION < 128 || PAHOLE_VERSION > 129 help Calculate symbol versions from DWARF debugging information using gendwarfksyms. Requires DEBUG_INFO to be enabled. From 56045757accf5a51c9146585acd5f76fa4fbdb97 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 25 Apr 2025 20:08:15 -0700 Subject: [PATCH 1702/2924] usr/include: openrisc: don't HDRTEST bpf_perf_event.h Since openrisc does not support PERF_EVENTS, omit the HDRTEST of bpf_perf_event.h for arch/openrisc/. Fixes a build error: usr/include/linux/bpf_perf_event.h:14:28: error: field 'regs' has incomplete type Signed-off-by: Randy Dunlap Acked-by: Stafford Horne Signed-off-by: Masahiro Yamada --- usr/include/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/usr/include/Makefile b/usr/include/Makefile index e3d6b03527fecc..f02f41941b60c8 100644 --- a/usr/include/Makefile +++ b/usr/include/Makefile @@ -59,6 +59,10 @@ ifeq ($(SRCARCH),arc) no-header-test += linux/bpf_perf_event.h endif +ifeq ($(SRCARCH),openrisc) +no-header-test += linux/bpf_perf_event.h +endif + ifeq ($(SRCARCH),powerpc) no-header-test += linux/bpf_perf_event.h endif From 657f96cb7c06a2ef85d166f1b055baeb6511c324 Mon Sep 17 00:00:00 2001 From: WangYuli Date: Tue, 22 Apr 2025 18:54:01 +0800 Subject: [PATCH 1703/2924] kbuild: deb-pkg: Add libdw-dev:native to Build-Depends-Arch The dwarf.h header, which is included by scripts/gendwarfksyms/gendwarfksyms.h, resides within the libdw-dev package. This portion of the code is compiled under the condition that CONFIG_GENDWARFKSYMS is enabled. Consequently, add libdw-dev to Build-Depends-Arch to prevent unforeseen compilation failures. Fix follow possible error: In file included from scripts/gendwarfksyms/symbols.c:6: scripts/gendwarfksyms/gendwarfksyms.h:6:10: fatal error: 'dwarf.h' file not found 6 | #include | ^~~~~~~~~ Fixes: f28568841ae0 ("tools: Add gendwarfksyms") Reviewed-by: Sami Tolvanen Signed-off-by: WangYuli Reviewed-by: Nicolas Schier Tested-by: Nicolas Schier Signed-off-by: Masahiro Yamada --- scripts/package/mkdebian | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian index 744ddba01d93f9..d4b007b38a4759 100755 --- a/scripts/package/mkdebian +++ b/scripts/package/mkdebian @@ -210,7 +210,7 @@ Rules-Requires-Root: no Build-Depends: debhelper-compat (= 12) Build-Depends-Arch: bc, bison, flex, gcc-${host_gnu} , - kmod, libelf-dev:native, + kmod, libdw-dev:native, libelf-dev:native, libssl-dev:native, libssl-dev , python3:native, rsync Homepage: https://www.kernel.org/ From 5bd6bdd0f76e257c029ff34100f2959cd4992486 Mon Sep 17 00:00:00 2001 From: WangYuli Date: Tue, 22 Apr 2025 18:54:02 +0800 Subject: [PATCH 1704/2924] kbuild: rpm-pkg: Add (elfutils-devel or libdw-devel) to BuildRequires The dwarf.h header, which is included by scripts/gendwarfksyms/gendwarfksyms.h, resides within elfutils-devel or libdw-devel package. This portion of the code is compiled under the condition that CONFIG_GENDWARFKSYMS is enabled. Consequently, add (elfutils-devel or libdw-devel) to BuildRequires to prevent unforeseen compilation failures. Fix follow possible error: In file included from scripts/gendwarfksyms/cache.c:6: scripts/gendwarfksyms/gendwarfksyms.h:6:10: fatal error: 'dwarf.h' file not found 6 | #include | ^~~~~~~~~ Link: https://lore.kernel.org/all/3e52d80d-0c60-4df5-8cb5-21d4b1fce7b7@suse.com/ Fixes: f28568841ae0 ("tools: Add gendwarfksyms") Suggested-by: Petr Pavlu Signed-off-by: WangYuli Reviewed-by: Nicolas Schier Signed-off-by: Masahiro Yamada --- scripts/package/kernel.spec | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/package/kernel.spec b/scripts/package/kernel.spec index 726f34e1196018..98f206cb7c6079 100644 --- a/scripts/package/kernel.spec +++ b/scripts/package/kernel.spec @@ -16,6 +16,7 @@ Source1: config Source2: diff.patch Provides: kernel-%{KERNELRELEASE} BuildRequires: bc binutils bison dwarves +BuildRequires: (elfutils-devel or libdw-devel) BuildRequires: (elfutils-libelf-devel or libelf-devel) flex BuildRequires: gcc make openssl openssl-devel perl python3 rsync From d0afcfeb9e3810ec89d1ffde1a0e36621bb75dca Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 6 May 2025 14:02:01 -0700 Subject: [PATCH 1705/2924] kbuild: Disable -Wdefault-const-init-unsafe A new on by default warning in clang [1] aims to flags instances where const variables without static or thread local storage or const members in aggregate types are not initialized because it can lead to an indeterminate value. This is quite noisy for the kernel due to instances originating from header files such as: drivers/gpu/drm/i915/gt/intel_ring.h:62:2: error: default initialization of an object of type 'typeof (ring->size)' (aka 'const unsigned int') leaves the object uninitialized [-Werror,-Wdefault-const-init-var-unsafe] 62 | typecheck(typeof(ring->size), next); | ^ include/linux/typecheck.h:10:9: note: expanded from macro 'typecheck' 10 | ({ type __dummy; \ | ^ include/net/ip.h:478:14: error: default initialization of an object of type 'typeof (rt->dst.expires)' (aka 'const unsigned long') leaves the object uninitialized [-Werror,-Wdefault-const-init-var-unsafe] 478 | if (mtu && time_before(jiffies, rt->dst.expires)) | ^ include/linux/jiffies.h:138:26: note: expanded from macro 'time_before' 138 | #define time_before(a,b) time_after(b,a) | ^ include/linux/jiffies.h:128:3: note: expanded from macro 'time_after' 128 | (typecheck(unsigned long, a) && \ | ^ include/linux/typecheck.h:11:12: note: expanded from macro 'typecheck' 11 | typeof(x) __dummy2; \ | ^ include/linux/list.h:409:27: warning: default initialization of an object of type 'union (unnamed union at include/linux/list.h:409:27)' with const member leaves the object uninitialized [-Wdefault-const-init-field-unsafe] 409 | struct list_head *next = smp_load_acquire(&head->next); | ^ include/asm-generic/barrier.h:176:29: note: expanded from macro 'smp_load_acquire' 176 | #define smp_load_acquire(p) __smp_load_acquire(p) | ^ arch/arm64/include/asm/barrier.h:164:59: note: expanded from macro '__smp_load_acquire' 164 | union { __unqual_scalar_typeof(*p) __val; char __c[1]; } __u; \ | ^ include/linux/list.h:409:27: note: member '__val' declared 'const' here crypto/scatterwalk.c:66:22: error: default initialization of an object of type 'struct scatter_walk' with const member leaves the object uninitialized [-Werror,-Wdefault-const-init-field-unsafe] 66 | struct scatter_walk walk; | ^ include/crypto/algapi.h:112:15: note: member 'addr' declared 'const' here 112 | void *const addr; | ^ fs/hugetlbfs/inode.c:733:24: error: default initialization of an object of type 'struct vm_area_struct' with const member leaves the object uninitialized [-Werror,-Wdefault-const-init-field-unsafe] 733 | struct vm_area_struct pseudo_vma; | ^ include/linux/mm_types.h:803:20: note: member 'vm_flags' declared 'const' here 803 | const vm_flags_t vm_flags; | ^ Silencing the instances from typecheck.h is difficult because '= {}' is not available in older but supported compilers and '= {0}' would cause warnings about a literal 0 being treated as NULL. While it might be possible to come up with a local hack to silence the warning for clang-21+, it may not be worth it since -Wuninitialized will still trigger if an uninitialized const variable is actually used. In all audited cases of the "field" variant of the warning, the members are either not used in the particular call path, modified through other means such as memset() / memcpy() because the containing object is not const, or are within a union with other non-const members. Since this warning does not appear to have a high signal to noise ratio, just disable it. Cc: stable@vger.kernel.org Link: https://github.com/llvm/llvm-project/commit/576161cb6069e2c7656a8ef530727a0f4aefff30 [1] Reported-by: Linux Kernel Functional Testing Closes: https://lore.kernel.org/CA+G9fYuNjKcxFKS_MKPRuga32XbndkLGcY-PVuoSwzv6VWbY=w@mail.gmail.com/ Reported-by: Marcus Seyfarth Closes: https://github.com/ClangBuiltLinux/linux/issues/2088 Signed-off-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- scripts/Makefile.extrawarn | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index d88acdf4085524..fd649c68e198ba 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -37,6 +37,18 @@ KBUILD_CFLAGS += -Wno-gnu # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111219 KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow-non-kprintf) KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation-non-kprintf) + +# Clang may emit a warning when a const variable, such as the dummy variables +# in typecheck(), or const member of an aggregate type are not initialized, +# which can result in unexpected behavior. However, in many audited cases of +# the "field" variant of the warning, this is intentional because the field is +# never used within a particular call path, the field is within a union with +# other non-const members, or the containing object is not const so the field +# can be modified via memcpy() / memset(). While the variable warning also gets +# disabled with this same switch, there should not be too much coverage lost +# because -Wuninitialized will still flag when an uninitialized const variable +# is used. +KBUILD_CFLAGS += $(call cc-disable-warning, default-const-init-unsafe) else # gcc inanely warns about local variables called 'main' From ab09da75700e9d25c7dfbc7f7934920beb5e39b9 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 7 May 2025 16:49:33 +0900 Subject: [PATCH 1706/2924] um: let 'make clean' properly clean underlying SUBARCH as well Building the kernel with O= is affected by stale in-tree build artifacts. So, if the source tree is not clean, Kbuild displays the following: $ make ARCH=um O=build defconfig make[1]: Entering directory '/.../linux/build' *** *** The source tree is not clean, please run 'make ARCH=um mrproper' *** in /.../linux *** make[2]: *** [/.../linux/Makefile:673: outputmakefile] Error 1 make[1]: *** [/.../linux/Makefile:248: __sub-make] Error 2 make[1]: Leaving directory '/.../linux/build' make: *** [Makefile:248: __sub-make] Error 2 Usually, running 'make mrproper' is sufficient for cleaning the source tree for out-of-tree builds. However, building UML generates build artifacts not only in arch/um/, but also in the SUBARCH directory (i.e., arch/x86/). If in-tree stale files remain under arch/x86/, Kbuild will reuse them instead of creating new ones under the specified build directory. This commit makes 'make ARCH=um clean' recurse into the SUBARCH directory. Reported-by: Shuah Khan Closes: https://lore.kernel.org/lkml/20250502172459.14175-1-skhan@linuxfoundation.org/ Signed-off-by: Masahiro Yamada Acked-by: Johannes Berg Reviewed-by: David Gow Reviewed-by: Shuah Khan --- arch/um/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/um/Makefile b/arch/um/Makefile index 1d36a613aad83d..9ed792e565c917 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -154,5 +154,6 @@ MRPROPER_FILES += $(HOST_DIR)/include/generated archclean: @find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \ -o -name '*.gcov' \) -type f -print | xargs rm -f + $(Q)$(MAKE) -f $(srctree)/Makefile ARCH=$(HEADER_ARCH) clean export HEADER_ARCH SUBARCH USER_CFLAGS CFLAGS_NO_HARDENING DEV_NULL_PATH From d1b99cdf22e0416440265166824ebabfcb5f1afa Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 9 May 2025 22:23:59 +0900 Subject: [PATCH 1707/2924] init: remove unused CONFIG_CC_CAN_LINK_STATIC This is a leftover from commit 98e20e5e13d2 ("bpfilter: remove bpfilter"). Signed-off-by: Masahiro Yamada --- init/Kconfig | 5 ----- 1 file changed, 5 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index 4cdd1049283c15..bf3a920064bec1 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -87,11 +87,6 @@ config CC_CAN_LINK default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m64-flag)) if 64BIT default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m32-flag)) -config CC_CAN_LINK_STATIC - bool - default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m64-flag) -static) if 64BIT - default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m32-flag) -static) - # Fixed in GCC 14, 13.3, 12.4 and 11.5 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 config GCC_ASM_GOTO_OUTPUT_BROKEN From f0e4b333cf67b3d5da56bd01a125f45c102f7d27 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 11 May 2025 12:55:19 +0900 Subject: [PATCH 1708/2924] kbuild: fix dependency on sorttable Commit ac4f06789b4f ("kbuild: Create intermediate vmlinux build with relocations preserved") missed replacing one occurrence of "vmlinux" that was added during the same development cycle. Fixes: ac4f06789b4f ("kbuild: Create intermediate vmlinux build with relocations preserved") Signed-off-by: Masahiro Yamada Acked-by: Ard Biesheuvel --- scripts/Makefile.vmlinux | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux index 85d60d98640169..0c4bb1274a08fb 100644 --- a/scripts/Makefile.vmlinux +++ b/scripts/Makefile.vmlinux @@ -94,7 +94,7 @@ $(vmlinux-final): $(RESOLVE_BTFIDS) endif ifdef CONFIG_BUILDTIME_TABLE_SORT -vmlinux: scripts/sorttable +$(vmlinux-final): scripts/sorttable endif # module.builtin.ranges From 020d7f14489b8bc38c6bd4c5b3c25262e8b6de63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sun, 11 May 2025 08:02:27 +0200 Subject: [PATCH 1709/2924] Revert "kbuild: make all file references relative to source root" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit cacd22ce69585a91c386243cd662ada962431e63. -ffile-prefix-map breaks the ability of debuggers to find the source file corresponding to object files. As there is no simple or uniform way to specify the source directory explicitly, this breaks developers workflows. Revert the unconditional usage of -ffile-prefix-map. Reported-by: Matthieu Baerts Closes: https://lore.kernel.org/lkml/edc50aa7-0740-4942-8c15-96f12f2acc7e@kernel.org/ Reported-by: Ville Syrjälä Closes: https://lore.kernel.org/lkml/aBEttQH4kimHFScx@intel.com/ Fixes: cacd22ce6958 ("kbuild: make all file references relative to source root") Signed-off-by: Thomas Weißschuh Signed-off-by: Masahiro Yamada --- Documentation/kbuild/reproducible-builds.rst | 17 +++++++++++++++++ Makefile | 2 +- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/Documentation/kbuild/reproducible-builds.rst b/Documentation/kbuild/reproducible-builds.rst index a7762486c93fcd..f2dcc39044e66d 100644 --- a/Documentation/kbuild/reproducible-builds.rst +++ b/Documentation/kbuild/reproducible-builds.rst @@ -46,6 +46,21 @@ The kernel embeds the building user and host names in `KBUILD_BUILD_USER and KBUILD_BUILD_HOST`_ variables. If you are building from a git commit, you could use its committer address. +Absolute filenames +------------------ + +When the kernel is built out-of-tree, debug information may include +absolute filenames for the source files. This must be overridden by +including the ``-fdebug-prefix-map`` option in the `KCFLAGS`_ variable. + +Depending on the compiler used, the ``__FILE__`` macro may also expand +to an absolute filename in an out-of-tree build. Kbuild automatically +uses the ``-fmacro-prefix-map`` option to prevent this, if it is +supported. + +The Reproducible Builds web site has more information about these +`prefix-map options`_. + Generated files in source packages ---------------------------------- @@ -116,5 +131,7 @@ See ``scripts/setlocalversion`` for details. .. _KBUILD_BUILD_TIMESTAMP: kbuild.html#kbuild-build-timestamp .. _KBUILD_BUILD_USER and KBUILD_BUILD_HOST: kbuild.html#kbuild-build-user-kbuild-build-host +.. _KCFLAGS: kbuild.html#kcflags +.. _prefix-map options: https://reproducible-builds.org/docs/build-path/ .. _Reproducible Builds project: https://reproducible-builds.org/ .. _SOURCE_DATE_EPOCH: https://reproducible-builds.org/docs/source-date-epoch/ diff --git a/Makefile b/Makefile index 64c514f4bc1939..52f84aaf102525 100644 --- a/Makefile +++ b/Makefile @@ -1068,7 +1068,7 @@ KBUILD_CFLAGS += -fno-builtin-wcslen # change __FILE__ to the relative path to the source directory ifdef building_out_of_srctree -KBUILD_CPPFLAGS += $(call cc-option,-ffile-prefix-map=$(srcroot)/=) +KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srcroot)/=) KBUILD_RUSTFLAGS += --remap-path-prefix=$(srcroot)/= endif From 8cf5b3f836147d8d4e7c6eb4c01945b97dab8297 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sun, 11 May 2025 08:02:28 +0200 Subject: [PATCH 1710/2924] Revert "kbuild, rust: use -fremap-path-prefix to make paths relative" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit dbdffaf50ff9cee3259a7cef8a7bd9e0f0ba9f13. --remap-path-prefix breaks the ability of debuggers to find the source file corresponding to object files. As there is no simple or uniform way to specify the source directory explicitly, this breaks developers workflows. Revert the unconditional usage of --remap-path-prefix, equivalent to the same change for -ffile-prefix-map in KBUILD_CPPFLAGS. Fixes: dbdffaf50ff9 ("kbuild, rust: use -fremap-path-prefix to make paths relative") Signed-off-by: Thomas Weißschuh Acked-by: Miguel Ojeda Signed-off-by: Masahiro Yamada --- Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/Makefile b/Makefile index 52f84aaf102525..0eb9e6c49b32aa 100644 --- a/Makefile +++ b/Makefile @@ -1069,7 +1069,6 @@ KBUILD_CFLAGS += -fno-builtin-wcslen # change __FILE__ to the relative path to the source directory ifdef building_out_of_srctree KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srcroot)/=) -KBUILD_RUSTFLAGS += --remap-path-prefix=$(srcroot)/= endif # include additional Makefiles when needed From e0cd396d899805d56df91b989f8efad3a36df0da Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 12 May 2025 14:36:58 +0900 Subject: [PATCH 1711/2924] kbuild: fix typos "module.builtin" to "modules.builtin" The filenames in the comments do not match the actual generated files. Signed-off-by: Masahiro Yamada --- scripts/Makefile.vmlinux | 2 +- scripts/Makefile.vmlinux_o | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux index 0c4bb1274a08fb..b64862dc6f08d4 100644 --- a/scripts/Makefile.vmlinux +++ b/scripts/Makefile.vmlinux @@ -97,7 +97,7 @@ ifdef CONFIG_BUILDTIME_TABLE_SORT $(vmlinux-final): scripts/sorttable endif -# module.builtin.ranges +# modules.builtin.ranges # --------------------------------------------------------------------------- ifdef CONFIG_BUILTIN_MODULE_RANGES __default: modules.builtin.ranges diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o index 938c7457717ea0..b024ffb3e20187 100644 --- a/scripts/Makefile.vmlinux_o +++ b/scripts/Makefile.vmlinux_o @@ -73,7 +73,7 @@ vmlinux.o: $(initcalls-lds) vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE targets += vmlinux.o -# module.builtin.modinfo +# modules.builtin.modinfo # --------------------------------------------------------------------------- OBJCOPYFLAGS_modules.builtin.modinfo := -j .modinfo -O binary @@ -82,7 +82,7 @@ targets += modules.builtin.modinfo modules.builtin.modinfo: vmlinux.o FORCE $(call if_changed,objcopy) -# module.builtin +# modules.builtin # --------------------------------------------------------------------------- # The second line aids cases where multiple modules share the same object. From 9c1798259b9420f38f1fa1b83e3d864c3eb1a83e Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 17 Apr 2025 07:34:58 -0300 Subject: [PATCH 1712/2924] drm/tiny: panel-mipi-dbi: Use drm_client_setup_with_fourcc() Since commit 559358282e5b ("drm/fb-helper: Don't use the preferred depth for the BPP default"), RGB565 displays such as the CFAF240320X no longer render correctly: colors are distorted and the content is shown twice horizontally. This regression is due to the fbdev emulation layer defaulting to 32 bits per pixel, whereas the display expects 16 bpp (RGB565). As a result, the framebuffer data is incorrectly interpreted by the panel. Fix the issue by calling drm_client_setup_with_fourcc() with a format explicitly selected based on the display's bits-per-pixel value. For 16 bpp, use DRM_FORMAT_RGB565; for other values, fall back to the previous behavior. This ensures that the allocated framebuffer format matches the hardware expectations, avoiding color and layout corruption. Tested on a CFAF240320X display with an RGB565 configuration, confirming correct colors and layout after applying this patch. Cc: stable@vger.kernel.org Fixes: 559358282e5b ("drm/fb-helper: Don't use the preferred depth for the BPP default") Signed-off-by: Fabio Estevam Reviewed-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Signed-off-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20250417103458.2496790-1-festevam@gmail.com --- drivers/gpu/drm/tiny/panel-mipi-dbi.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tiny/panel-mipi-dbi.c b/drivers/gpu/drm/tiny/panel-mipi-dbi.c index 0460ecaef4bd98..23914a9f7fd376 100644 --- a/drivers/gpu/drm/tiny/panel-mipi-dbi.c +++ b/drivers/gpu/drm/tiny/panel-mipi-dbi.c @@ -390,7 +390,10 @@ static int panel_mipi_dbi_spi_probe(struct spi_device *spi) spi_set_drvdata(spi, drm); - drm_client_setup(drm, NULL); + if (bpp == 16) + drm_client_setup_with_fourcc(drm, DRM_FORMAT_RGB565); + else + drm_client_setup_with_fourcc(drm, DRM_FORMAT_RGB888); return 0; } From 3039ed432745f8fdf5cbb43fdc60b2e1aad624c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Mon, 12 May 2025 11:37:30 +0200 Subject: [PATCH 1713/2924] landlock: Improve bit operations in audit code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the BIT() and BIT_ULL() macros in the new audit code instead of explicit shifts to improve readability. Use bitmask instead of modulo operation to simplify code. Add test_range1_rand15() and test_range2_rand15() KUnit tests to improve get_id_range() coverage. Signed-off-by: Günther Noack Link: https://lore.kernel.org/r/20250512093732.1408485-1-mic@digikod.net Signed-off-by: Mickaël Salaün --- security/landlock/audit.c | 2 +- security/landlock/id.c | 33 +++++++++++++++++++++++++++++++-- security/landlock/syscalls.c | 3 ++- 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/security/landlock/audit.c b/security/landlock/audit.c index 58d5c40d4d0e1d..c52d079cdb77ba 100644 --- a/security/landlock/audit.c +++ b/security/landlock/audit.c @@ -437,7 +437,7 @@ void landlock_log_denial(const struct landlock_cred_security *const subject, return; /* Checks if the current exec was restricting itself. */ - if (subject->domain_exec & (1 << youngest_layer)) { + if (subject->domain_exec & BIT(youngest_layer)) { /* Ignores denials for the same execution. */ if (!youngest_denied->log_same_exec) return; diff --git a/security/landlock/id.c b/security/landlock/id.c index 11fab9259c157f..56f7cc0fc7440f 100644 --- a/security/landlock/id.c +++ b/security/landlock/id.c @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -25,7 +26,7 @@ static void __init init_id(atomic64_t *const counter, const u32 random_32bits) * Ensures sure 64-bit values are always used by user space (or may * fail with -EOVERFLOW), and makes this testable. */ - init = 1ULL << 32; + init = BIT_ULL(32); /* * Makes a large (2^32) boot-time value to limit ID collision in logs @@ -105,7 +106,7 @@ static u64 get_id_range(size_t number_of_ids, atomic64_t *const counter, * to get a new ID (e.g. a full landlock_restrict_self() call), and the * cost of draining all available IDs during the system's uptime. */ - random_4bits = random_4bits % (1 << 4); + random_4bits &= 0b1111; step = number_of_ids + random_4bits; /* It is safe to cast a signed atomic to an unsigned value. */ @@ -144,6 +145,19 @@ static void test_range1_rand1(struct kunit *const test) init + 2); } +static void test_range1_rand15(struct kunit *const test) +{ + atomic64_t counter; + u64 init; + + init = get_random_u32(); + atomic64_set(&counter, init); + KUNIT_EXPECT_EQ(test, get_id_range(1, &counter, 15), init); + KUNIT_EXPECT_EQ( + test, get_id_range(get_random_u8(), &counter, get_random_u8()), + init + 16); +} + static void test_range1_rand16(struct kunit *const test) { atomic64_t counter; @@ -196,6 +210,19 @@ static void test_range2_rand2(struct kunit *const test) init + 4); } +static void test_range2_rand15(struct kunit *const test) +{ + atomic64_t counter; + u64 init; + + init = get_random_u32(); + atomic64_set(&counter, init); + KUNIT_EXPECT_EQ(test, get_id_range(2, &counter, 15), init); + KUNIT_EXPECT_EQ( + test, get_id_range(get_random_u8(), &counter, get_random_u8()), + init + 17); +} + static void test_range2_rand16(struct kunit *const test) { atomic64_t counter; @@ -232,10 +259,12 @@ static struct kunit_case __refdata test_cases[] = { KUNIT_CASE(test_init_once), KUNIT_CASE(test_range1_rand0), KUNIT_CASE(test_range1_rand1), + KUNIT_CASE(test_range1_rand15), KUNIT_CASE(test_range1_rand16), KUNIT_CASE(test_range2_rand0), KUNIT_CASE(test_range2_rand1), KUNIT_CASE(test_range2_rand2), + KUNIT_CASE(test_range2_rand15), KUNIT_CASE(test_range2_rand16), {} /* clang-format on */ diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c index b9561e3417aecb..33eafb71e4f31b 100644 --- a/security/landlock/syscalls.c +++ b/security/landlock/syscalls.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -563,7 +564,7 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32, new_llcred->domain = new_dom; #ifdef CONFIG_AUDIT - new_llcred->domain_exec |= 1 << (new_dom->num_layers - 1); + new_llcred->domain_exec |= BIT(new_dom->num_layers - 1); #endif /* CONFIG_AUDIT */ return commit_creds(new_cred); From 386cd3dcfd63491619b4034b818737fc0219e128 Mon Sep 17 00:00:00 2001 From: Alexey Makhalov Date: Tue, 18 Mar 2025 00:40:31 +0000 Subject: [PATCH 1714/2924] MAINTAINERS: Update Alexey Makhalov's email address Fix a typo in an email address. Closes: https://lore.kernel.org/all/20240925-rational-succinct-vulture-cca9fb@lemur/T/ Reported-by: Konstantin Ryabitsev Reported-by: Juergen Gross Signed-off-by: Alexey Makhalov Signed-off-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/20250318004031.2703923-1-alexey.makhalov@broadcom.com --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 69511c3b2b76fb..0d3a252db2b6c0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18373,7 +18373,7 @@ F: include/uapi/linux/ppdev.h PARAVIRT_OPS INTERFACE M: Juergen Gross R: Ajay Kaher -R: Alexey Makhalov +R: Alexey Makhalov R: Broadcom internal kernel review list L: virtualization@lists.linux.dev L: x86@kernel.org @@ -25859,7 +25859,7 @@ F: drivers/misc/vmw_balloon.c VMWARE HYPERVISOR INTERFACE M: Ajay Kaher -M: Alexey Makhalov +M: Alexey Makhalov R: Broadcom internal kernel review list L: virtualization@lists.linux.dev L: x86@kernel.org @@ -25887,7 +25887,7 @@ F: drivers/scsi/vmw_pvscsi.h VMWARE VIRTUAL PTP CLOCK DRIVER M: Nick Shi R: Ajay Kaher -R: Alexey Makhalov +R: Alexey Makhalov R: Broadcom internal kernel review list L: netdev@vger.kernel.org S: Supported From 2f0c6fceae0df59a9c0edb5d41529b90ee9f2fcb Mon Sep 17 00:00:00 2001 From: Markus Burri Date: Thu, 8 May 2025 15:06:08 +0200 Subject: [PATCH 1715/2924] accel/ivpu: Use effective buffer size for zero terminator Use the effective written size instead of original size as index for zero termination. If the input from user-space is to larger and the input is truncated, the original size is out-of-bound. Since there is an upfront size check here, the change is for consistency. Signed-off-by: Markus Burri Reviewed-by: Jacek Lawrynowicz Signed-off-by: Jacek Lawrynowicz Link: https://lore.kernel.org/r/20250508130612.82270-3-markus.burri@mt.com --- drivers/accel/ivpu/ivpu_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c index f0dad0c9ce3381..cd24ccd20ba6cc 100644 --- a/drivers/accel/ivpu/ivpu_debugfs.c +++ b/drivers/accel/ivpu/ivpu_debugfs.c @@ -455,7 +455,7 @@ priority_bands_fops_write(struct file *file, const char __user *user_buf, size_t if (ret < 0) return ret; - buf[size] = '\0'; + buf[ret] = '\0'; ret = sscanf(buf, "%u %u %u %u", &band, &grace_period, &process_grace_period, &process_quantum); if (ret != 4) From 8098514bd5ca98beca6ec725751d82d0d5b492d8 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 9 May 2025 08:38:02 -0700 Subject: [PATCH 1716/2924] block: always allocate integrity buffer when required Many nvme metadata formats can not strip or generate the metadata on the controller side. For these formats, a host provided integrity buffer is mandatory even if it isn't checked. The block integrity read_verify and write_generate attributes prevent allocating the metadata buffer, but we need it when the format requires it, otherwise reads and writes will be rejected by the driver with IO errors. Assume the integrity buffer can be offloaded to the controller if the metadata size is the same as the protection information size. Otherwise provide an unchecked host buffer when the read verify or write generation attributes are disabled. This fixes the following nvme warning: ------------[ cut here ]------------ WARNING: CPU: 1 PID: 371 at drivers/nvme/host/core.c:1036 nvme_setup_rw+0x122/0x210 ... RIP: 0010:nvme_setup_rw+0x122/0x210 ... Call Trace: nvme_setup_cmd+0x1b4/0x280 nvme_queue_rqs+0xc4/0x1f0 [nvme] blk_mq_dispatch_queue_requests+0x24a/0x430 blk_mq_flush_plug_list+0x50/0x140 __blk_flush_plug+0xc1/0x100 __submit_bio+0x1c1/0x360 ? submit_bio_noacct_nocheck+0x2d6/0x3c0 submit_bio_noacct_nocheck+0x2d6/0x3c0 ? submit_bio_noacct+0x47/0x4c0 submit_bio_wait+0x48/0xa0 __blkdev_direct_IO_simple+0xee/0x210 ? current_time+0x1d/0x100 ? current_time+0x1d/0x100 ? __bio_clone+0xb0/0xb0 blkdev_read_iter+0xbb/0x140 vfs_read+0x239/0x310 ksys_read+0x58/0xc0 do_syscall_64+0x6c/0x180 entry_SYSCALL_64_after_hwframe+0x4b/0x53 Signed-off-by: Keith Busch Reviewed-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250509153802.3482493-1-kbusch@meta.com Signed-off-by: Jens Axboe --- block/bio-integrity-auto.c | 62 +++++++++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 15 deletions(-) diff --git a/block/bio-integrity-auto.c b/block/bio-integrity-auto.c index e524c609be5066..9c665766479201 100644 --- a/block/bio-integrity-auto.c +++ b/block/bio-integrity-auto.c @@ -9,6 +9,7 @@ * not aware of PI. */ #include +#include #include #include "blk.h" @@ -43,6 +44,29 @@ static void bio_integrity_verify_fn(struct work_struct *work) bio_endio(bio); } +#define BIP_CHECK_FLAGS (BIP_CHECK_GUARD | BIP_CHECK_REFTAG | BIP_CHECK_APPTAG) +static bool bip_should_check(struct bio_integrity_payload *bip) +{ + return bip->bip_flags & BIP_CHECK_FLAGS; +} + +static bool bi_offload_capable(struct blk_integrity *bi) +{ + switch (bi->csum_type) { + case BLK_INTEGRITY_CSUM_CRC64: + return bi->tuple_size == sizeof(struct crc64_pi_tuple); + case BLK_INTEGRITY_CSUM_CRC: + case BLK_INTEGRITY_CSUM_IP: + return bi->tuple_size == sizeof(struct t10_pi_tuple); + default: + pr_warn_once("%s: unknown integrity checksum type:%d\n", + __func__, bi->csum_type); + fallthrough; + case BLK_INTEGRITY_CSUM_NONE: + return false; + } +} + /** * __bio_integrity_endio - Integrity I/O completion function * @bio: Protected bio @@ -54,12 +78,12 @@ static void bio_integrity_verify_fn(struct work_struct *work) */ bool __bio_integrity_endio(struct bio *bio) { - struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); struct bio_integrity_payload *bip = bio_integrity(bio); struct bio_integrity_data *bid = container_of(bip, struct bio_integrity_data, bip); - if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && bi->csum_type) { + if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && + bip_should_check(bip)) { INIT_WORK(&bid->work, bio_integrity_verify_fn); queue_work(kintegrityd_wq, &bid->work); return false; @@ -84,6 +108,7 @@ bool bio_integrity_prep(struct bio *bio) { struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); struct bio_integrity_data *bid; + bool set_flags = true; gfp_t gfp = GFP_NOIO; unsigned int len; void *buf; @@ -100,19 +125,24 @@ bool bio_integrity_prep(struct bio *bio) switch (bio_op(bio)) { case REQ_OP_READ: - if (bi->flags & BLK_INTEGRITY_NOVERIFY) - return true; + if (bi->flags & BLK_INTEGRITY_NOVERIFY) { + if (bi_offload_capable(bi)) + return true; + set_flags = false; + } break; case REQ_OP_WRITE: - if (bi->flags & BLK_INTEGRITY_NOGENERATE) - return true; - /* * Zero the memory allocated to not leak uninitialized kernel * memory to disk for non-integrity metadata where nothing else * initializes the memory. */ - if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE) + if (bi->flags & BLK_INTEGRITY_NOGENERATE) { + if (bi_offload_capable(bi)) + return true; + set_flags = false; + gfp |= __GFP_ZERO; + } else if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE) gfp |= __GFP_ZERO; break; default: @@ -137,19 +167,21 @@ bool bio_integrity_prep(struct bio *bio) bid->bip.bip_flags |= BIP_BLOCK_INTEGRITY; bip_set_seed(&bid->bip, bio->bi_iter.bi_sector); - if (bi->csum_type == BLK_INTEGRITY_CSUM_IP) - bid->bip.bip_flags |= BIP_IP_CHECKSUM; - if (bi->csum_type) - bid->bip.bip_flags |= BIP_CHECK_GUARD; - if (bi->flags & BLK_INTEGRITY_REF_TAG) - bid->bip.bip_flags |= BIP_CHECK_REFTAG; + if (set_flags) { + if (bi->csum_type == BLK_INTEGRITY_CSUM_IP) + bid->bip.bip_flags |= BIP_IP_CHECKSUM; + if (bi->csum_type) + bid->bip.bip_flags |= BIP_CHECK_GUARD; + if (bi->flags & BLK_INTEGRITY_REF_TAG) + bid->bip.bip_flags |= BIP_CHECK_REFTAG; + } if (bio_integrity_add_page(bio, virt_to_page(buf), len, offset_in_page(buf)) < len) goto err_end_io; /* Auto-generate integrity metadata if this is a write */ - if (bio_data_dir(bio) == WRITE) + if (bio_data_dir(bio) == WRITE && bip_should_check(&bid->bip)) blk_integrity_generate(bio); else bid->saved_bio_iter = bio->bi_iter; From 63166b815dc163b2e46426cecf707dc5923d6d13 Mon Sep 17 00:00:00 2001 From: hexue Date: Mon, 12 May 2025 13:20:25 +0800 Subject: [PATCH 1717/2924] io_uring/uring_cmd: fix hybrid polling initialization issue Modify the check for whether the timer is initialized during IO transfer when passthrough is used with hybrid polling, to ensure that it's always setup correctly. Cc: stable@vger.kernel.org Fixes: 01ee194d1aba ("io_uring: add support for hybrid IOPOLL") Signed-off-by: hexue Link: https://lore.kernel.org/r/20250512052025.293031-1-xue01.he@samsung.com Signed-off-by: Jens Axboe --- io_uring/uring_cmd.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index a9ea7d29cdd978..430ed620ddfe01 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -254,6 +254,11 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) return -EOPNOTSUPP; issue_flags |= IO_URING_F_IOPOLL; req->iopoll_completed = 0; + if (ctx->flags & IORING_SETUP_HYBRID_IOPOLL) { + /* make sure every req only blocks once */ + req->flags &= ~REQ_F_IOPOLL_STATE; + req->iopoll_start = ktime_get_ns(); + } } ret = file->f_op->uring_cmd(ioucmd, issue_flags); From f446c6311e86618a1f81eb576b56a6266307238f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 12 May 2025 09:06:06 -0600 Subject: [PATCH 1718/2924] io_uring/memmap: don't use page_address() on a highmem page For older/32-bit systems with highmem, don't assume that the pages in a mapped region are always going to be mapped. If io_region_init_ptr() finds that the pages are coalescable, also check if the first page is a HighMem page or not. If it is, fall through to the usual vmap() mapping rather than attempt to get the unmapped page address. Cc: stable@vger.kernel.org Fixes: c4d0ac1c1567 ("io_uring/memmap: optimise single folio regions") Link: https://lore.kernel.org/all/681fe2fb.050a0220.f2294.001a.GAE@google.com/ Reported-by: syzbot+5b8c4abafcb1d791ccfc@syzkaller.appspotmail.com Link: https://lore.kernel.org/all/681fed0a.050a0220.f2294.001c.GAE@google.com/ Reported-by: syzbot+6456a99dfdc2e78c4feb@syzkaller.appspotmail.com Tested-by: syzbot+6456a99dfdc2e78c4feb@syzkaller.appspotmail.com Signed-off-by: Jens Axboe --- io_uring/memmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/memmap.c b/io_uring/memmap.c index 76fcc79656b002..07f8a5cbd37ec7 100644 --- a/io_uring/memmap.c +++ b/io_uring/memmap.c @@ -116,7 +116,7 @@ static int io_region_init_ptr(struct io_mapped_region *mr) void *ptr; if (io_check_coalesce_buffer(mr->pages, mr->nr_pages, &ifd)) { - if (ifd.nr_folios == 1) { + if (ifd.nr_folios == 1 && !PageHighMem(mr->pages[0])) { mr->ptr = page_address(mr->pages[0]); return 0; } From 579cb52ecd46ce0351fc3d40882ebdb60332a0bc Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 24 Apr 2025 23:22:23 +0200 Subject: [PATCH 1719/2924] MAINTAINERS: add me as maintainer for the gpio sloppy logic analyzer This was forgotten when the analyzer went upstream. Signed-off-by: Wolfram Sang Link: https://lore.kernel.org/r/20250424212234.5313-2-wsa+renesas@sang-engineering.com Signed-off-by: Bartosz Golaszewski --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index f21f1dabb5fe1d..a634c1c7188c3b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10147,6 +10147,13 @@ F: drivers/gpio/gpio-regmap.c F: include/linux/gpio/regmap.h K: (devm_)?gpio_regmap_(un)?register +GPIO SLOPPY LOGIC ANALYZER +M: Wolfram Sang +S: Supported +F: Documentation/dev-tools/gpio-sloppy-logic-analyzer.rst +F: drivers/gpio/gpio-sloppy-logic-analyzer.c +F: tools/gpio/gpio-sloppy-logic-analyzer.sh + GPIO SUBSYSTEM M: Linus Walleij M: Bartosz Golaszewski From 1b0c192c92ea1fe2dcb178f84adf15fe37c3e7c8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 9 May 2025 15:26:57 -0400 Subject: [PATCH 1720/2924] tracing: samples: Initialize trace_array_printk() with the correct function When using trace_array_printk() on a created instance, the correct function to use to initialize it is: trace_array_init_printk() Not trace_printk_init_buffer() The former is a proper function to use, the latter is for initializing trace_printk() and causes the NOTICE banner to be displayed. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Divya Indi Link: https://lore.kernel.org/20250509152657.0f6744d9@gandalf.local.home Fixes: 89ed42495ef4a ("tracing: Sample module to demonstrate kernel access to Ftrace instances.") Fixes: 38ce2a9e33db6 ("tracing: Add trace_array_init_printk() to initialize instance trace_printk() buffers") Signed-off-by: Steven Rostedt (Google) --- samples/ftrace/sample-trace-array.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/ftrace/sample-trace-array.c b/samples/ftrace/sample-trace-array.c index dac67c3674576d..4147616102f906 100644 --- a/samples/ftrace/sample-trace-array.c +++ b/samples/ftrace/sample-trace-array.c @@ -112,7 +112,7 @@ static int __init sample_trace_array_init(void) /* * If context specific per-cpu buffers havent already been allocated. */ - trace_printk_init_buffers(); + trace_array_init_printk(tr); simple_tsk = kthread_run(simple_thread, NULL, "sample-instance"); if (IS_ERR(simple_tsk)) { From 54db6d1bdd71fa90172a2a6aca3308bbf7fa7eb5 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 5 May 2025 16:03:16 +0100 Subject: [PATCH 1721/2924] btrfs: fix discard worker infinite loop after disabling discard If the discard worker is running and there's currently only one block group, that block group is a data block group, it's in the unused block groups discard list and is being used (it got an extent allocated from it after becoming unused), the worker can end up in an infinite loop if a transaction abort happens or the async discard is disabled (during remount or unmount for example). This happens like this: 1) Task A, the discard worker, is at peek_discard_list() and find_next_block_group() returns block group X; 2) Block group X is in the unused block groups discard list (its discard index is BTRFS_DISCARD_INDEX_UNUSED) since at some point in the past it become an unused block group and was added to that list, but then later it got an extent allocated from it, so its ->used counter is not zero anymore; 3) The current transaction is aborted by task B and we end up at __btrfs_handle_fs_error() in the transaction abort path, where we call btrfs_discard_stop(), which clears BTRFS_FS_DISCARD_RUNNING from fs_info, and then at __btrfs_handle_fs_error() we set the fs to RO mode (setting SB_RDONLY in the super block's s_flags field); 4) Task A calls __add_to_discard_list() with the goal of moving the block group from the unused block groups discard list into another discard list, but at __add_to_discard_list() we end up doing nothing because btrfs_run_discard_work() returns false, since the super block has SB_RDONLY set in its flags and BTRFS_FS_DISCARD_RUNNING is not set anymore in fs_info->flags. So block group X remains in the unused block groups discard list; 5) Task A then does a goto into the 'again' label, calls find_next_block_group() again we gets block group X again. Then it repeats the previous steps over and over since there are not other block groups in the discard lists and block group X is never moved out of the unused block groups discard list since btrfs_run_discard_work() keeps returning false and therefore __add_to_discard_list() doesn't move block group X out of that discard list. When this happens we can get a soft lockup report like this: [71.957] watchdog: BUG: soft lockup - CPU#0 stuck for 27s! [kworker/u4:3:97] [71.957] Modules linked in: xfs af_packet rfkill (...) [71.957] CPU: 0 UID: 0 PID: 97 Comm: kworker/u4:3 Tainted: G W 6.14.2-1-default #1 openSUSE Tumbleweed 968795ef2b1407352128b466fe887416c33af6fa [71.957] Tainted: [W]=WARN [71.957] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 [71.957] Workqueue: btrfs_discard btrfs_discard_workfn [btrfs] [71.957] RIP: 0010:btrfs_discard_workfn+0xc4/0x400 [btrfs] [71.957] Code: c1 01 48 83 (...) [71.957] RSP: 0018:ffffafaec03efe08 EFLAGS: 00000246 [71.957] RAX: ffff897045500000 RBX: ffff8970413ed8d0 RCX: 0000000000000000 [71.957] RDX: 0000000000000001 RSI: ffff8970413ed8d0 RDI: 0000000a8f1272ad [71.957] RBP: 0000000a9d61c60e R08: ffff897045500140 R09: 8080808080808080 [71.957] R10: ffff897040276800 R11: fefefefefefefeff R12: ffff8970413ed860 [71.957] R13: ffff897045500000 R14: ffff8970413ed868 R15: 0000000000000000 [71.957] FS: 0000000000000000(0000) GS:ffff89707bc00000(0000) knlGS:0000000000000000 [71.957] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [71.957] CR2: 00005605bcc8d2f0 CR3: 000000010376a001 CR4: 0000000000770ef0 [71.957] PKRU: 55555554 [71.957] Call Trace: [71.957] [71.957] process_one_work+0x17e/0x330 [71.957] worker_thread+0x2ce/0x3f0 [71.957] ? __pfx_worker_thread+0x10/0x10 [71.957] kthread+0xef/0x220 [71.957] ? __pfx_kthread+0x10/0x10 [71.957] ret_from_fork+0x34/0x50 [71.957] ? __pfx_kthread+0x10/0x10 [71.957] ret_from_fork_asm+0x1a/0x30 [71.957] [71.957] Kernel panic - not syncing: softlockup: hung tasks [71.987] CPU: 0 UID: 0 PID: 97 Comm: kworker/u4:3 Tainted: G W L 6.14.2-1-default #1 openSUSE Tumbleweed 968795ef2b1407352128b466fe887416c33af6fa [71.989] Tainted: [W]=WARN, [L]=SOFTLOCKUP [71.989] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 [71.991] Workqueue: btrfs_discard btrfs_discard_workfn [btrfs] [71.992] Call Trace: [71.993] [71.994] dump_stack_lvl+0x5a/0x80 [71.994] panic+0x10b/0x2da [71.995] watchdog_timer_fn.cold+0x9a/0xa1 [71.996] ? __pfx_watchdog_timer_fn+0x10/0x10 [71.997] __hrtimer_run_queues+0x132/0x2a0 [71.997] hrtimer_interrupt+0xff/0x230 [71.998] __sysvec_apic_timer_interrupt+0x55/0x100 [71.999] sysvec_apic_timer_interrupt+0x6c/0x90 [72.000] [72.000] [72.001] asm_sysvec_apic_timer_interrupt+0x1a/0x20 [72.002] RIP: 0010:btrfs_discard_workfn+0xc4/0x400 [btrfs] [72.002] Code: c1 01 48 83 (...) [72.005] RSP: 0018:ffffafaec03efe08 EFLAGS: 00000246 [72.006] RAX: ffff897045500000 RBX: ffff8970413ed8d0 RCX: 0000000000000000 [72.006] RDX: 0000000000000001 RSI: ffff8970413ed8d0 RDI: 0000000a8f1272ad [72.007] RBP: 0000000a9d61c60e R08: ffff897045500140 R09: 8080808080808080 [72.008] R10: ffff897040276800 R11: fefefefefefefeff R12: ffff8970413ed860 [72.009] R13: ffff897045500000 R14: ffff8970413ed868 R15: 0000000000000000 [72.010] ? btrfs_discard_workfn+0x51/0x400 [btrfs 23b01089228eb964071fb7ca156eee8cd3bf996f] [72.011] process_one_work+0x17e/0x330 [72.012] worker_thread+0x2ce/0x3f0 [72.013] ? __pfx_worker_thread+0x10/0x10 [72.014] kthread+0xef/0x220 [72.014] ? __pfx_kthread+0x10/0x10 [72.015] ret_from_fork+0x34/0x50 [72.015] ? __pfx_kthread+0x10/0x10 [72.016] ret_from_fork_asm+0x1a/0x30 [72.017] [72.017] Kernel Offset: 0x15000000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) [72.019] Rebooting in 90 seconds.. So fix this by making sure we move a block group out of the unused block groups discard list when calling __add_to_discard_list(). Fixes: 2bee7eb8bb81 ("btrfs: discard one region at a time in async discard") Link: https://bugzilla.suse.com/show_bug.cgi?id=1242012 CC: stable@vger.kernel.org # 5.10+ Reviewed-by: Boris Burkov Reviewed-by: Daniel Vacek Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/discard.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c index d6eef4bd9e9d45..de23c4b3515e58 100644 --- a/fs/btrfs/discard.c +++ b/fs/btrfs/discard.c @@ -94,8 +94,6 @@ static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl, struct btrfs_block_group *block_group) { lockdep_assert_held(&discard_ctl->lock); - if (!btrfs_run_discard_work(discard_ctl)) - return; if (list_empty(&block_group->discard_list) || block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) { @@ -118,6 +116,9 @@ static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl, if (!btrfs_is_block_group_data_only(block_group)) return; + if (!btrfs_run_discard_work(discard_ctl)) + return; + spin_lock(&discard_ctl->lock); __add_to_discard_list(discard_ctl, block_group); spin_unlock(&discard_ctl->lock); @@ -244,6 +245,18 @@ static struct btrfs_block_group *peek_discard_list( block_group->used != 0) { if (btrfs_is_block_group_data_only(block_group)) { __add_to_discard_list(discard_ctl, block_group); + /* + * The block group must have been moved to other + * discard list even if discard was disabled in + * the meantime or a transaction abort happened, + * otherwise we can end up in an infinite loop, + * always jumping into the 'again' label and + * keep getting this block group over and over + * in case there are no other block groups in + * the discard lists. + */ + ASSERT(block_group->discard_index != + BTRFS_DISCARD_INDEX_UNUSED); } else { list_del_init(&block_group->discard_list); btrfs_put_block_group(block_group); From a0fd1c6098633f9a95fc2f636383546c82b704c3 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Wed, 7 May 2025 12:42:24 -0700 Subject: [PATCH 1722/2924] btrfs: fix folio leak in submit_one_async_extent() If btrfs_reserve_extent() fails while submitting an async_extent for a compressed write, then we fail to call free_async_extent_pages() on the async_extent and leak its folios. A likely cause for such a failure would be btrfs_reserve_extent() failing to find a large enough contiguous free extent for the compressed extent. I was able to reproduce this by: 1. mount with compress-force=zstd:3 2. fallocating most of a filesystem to a big file 3. fragmenting the remaining free space 4. trying to copy in a file which zstd would generate large compressed extents for (vmlinux worked well for this) Step 4. hits the memory leak and can be repeated ad nauseam to eventually exhaust the system memory. Fix this by detecting the case where we fallback to uncompressed submission for a compressed async_extent and ensuring that we call free_async_extent_pages(). Fixes: 131a821a243f ("btrfs: fallback if compressed IO fails for ENOSPC") CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Filipe Manana Co-developed-by: Josef Bacik Signed-off-by: Boris Burkov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d295a37fa0491f..c1bd17915f8165 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1109,6 +1109,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk, struct extent_state *cached = NULL; struct extent_map *em; int ret = 0; + bool free_pages = false; u64 start = async_extent->start; u64 end = async_extent->start + async_extent->ram_size - 1; @@ -1129,7 +1130,10 @@ static void submit_one_async_extent(struct async_chunk *async_chunk, } if (async_extent->compress_type == BTRFS_COMPRESS_NONE) { + ASSERT(!async_extent->folios); + ASSERT(async_extent->nr_folios == 0); submit_uncompressed_range(inode, async_extent, locked_folio); + free_pages = true; goto done; } @@ -1145,6 +1149,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk, * fall back to uncompressed. */ submit_uncompressed_range(inode, async_extent, locked_folio); + free_pages = true; goto done; } @@ -1186,6 +1191,8 @@ static void submit_one_async_extent(struct async_chunk *async_chunk, done: if (async_chunk->blkcg_css) kthread_associate_blkcg(NULL); + if (free_pages) + free_async_extent_pages(async_extent); kfree(async_extent); return; From 4ce2affc6ef9f84b4aebbf18bd5c57397b6024eb Mon Sep 17 00:00:00 2001 From: Kyoji Ogasawara Date: Fri, 9 May 2025 19:26:31 +0900 Subject: [PATCH 1723/2924] btrfs: add back warning for mount option commit values exceeding 300 The Btrfs documentation states that if the commit value is greater than 300 a warning should be issued. The warning was accidentally lost in the new mount API update. Fixes: 6941823cc878 ("btrfs: remove old mount API code") CC: stable@vger.kernel.org # 6.12+ Reviewed-by: Qu Wenruo Reviewed-by: Anand Jain Signed-off-by: Kyoji Ogasawara Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/fs.h | 1 + fs/btrfs/super.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h index bcca43046064b8..7baa2ed45198f9 100644 --- a/fs/btrfs/fs.h +++ b/fs/btrfs/fs.h @@ -300,6 +300,7 @@ enum { #define BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR 0ULL #define BTRFS_DEFAULT_COMMIT_INTERVAL (30) +#define BTRFS_WARNING_COMMIT_INTERVAL (300) #define BTRFS_DEFAULT_MAX_INLINE (2048) struct btrfs_dev_replace { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 7121d8c7a318ec..7310e2fa852621 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -569,6 +569,10 @@ static int btrfs_parse_param(struct fs_context *fc, struct fs_parameter *param) break; case Opt_commit_interval: ctx->commit_interval = result.uint_32; + if (ctx->commit_interval > BTRFS_WARNING_COMMIT_INTERVAL) { + btrfs_warn(NULL, "excessive commit interval %u, use with care", + ctx->commit_interval); + } if (ctx->commit_interval == 0) ctx->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; break; From c92d6089d8ad7d4d815ebcedee3f3907b539ff1f Mon Sep 17 00:00:00 2001 From: Mathieu Othacehe Date: Fri, 9 May 2025 14:19:35 +0200 Subject: [PATCH 1724/2924] net: cadence: macb: Fix a possible deadlock in macb_halt_tx. There is a situation where after THALT is set high, TGO stays high as well. Because jiffies are never updated, as we are in a context with interrupts disabled, we never exit that loop and have a deadlock. That deadlock was noticed on a sama5d4 device that stayed locked for days. Use retries instead of jiffies so that the timeout really works and we do not have a deadlock anymore. Fixes: e86cd53afc590 ("net/macb: better manage tx errors") Signed-off-by: Mathieu Othacehe Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250509121935.16282-1-othacehe@gnu.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 1fe8ec37491b19..e1e8bd2ec155b8 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -997,22 +997,15 @@ static void macb_update_stats(struct macb *bp) static int macb_halt_tx(struct macb *bp) { - unsigned long halt_time, timeout; - u32 status; + u32 status; macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(THALT)); - timeout = jiffies + usecs_to_jiffies(MACB_HALT_TIMEOUT); - do { - halt_time = jiffies; - status = macb_readl(bp, TSR); - if (!(status & MACB_BIT(TGO))) - return 0; - - udelay(250); - } while (time_before(halt_time, timeout)); - - return -ETIMEDOUT; + /* Poll TSR until TGO is cleared or timeout. */ + return read_poll_timeout_atomic(macb_readl, status, + !(status & MACB_BIT(TGO)), + 250, MACB_HALT_TIMEOUT, false, + bp, TSR); } static void macb_tx_unmap(struct macb *bp, struct macb_tx_skb *tx_skb, int budget) From af5f54b0ef9ef72d5fc7d57f0406372e8f317958 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Fri, 9 May 2025 10:28:50 +0300 Subject: [PATCH 1725/2924] net: Lock lower level devices when updating features __netdev_update_features() expects the netdevice to be ops-locked, but it gets called recursively on the lower level netdevices to sync their features, and nothing locks those. This commit fixes that, with the assumption that it shouldn't be possible for both higher-level and lover-level netdevices to require the instance lock, because that would lead to lock dependency warnings. Without this, playing with higher level (e.g. vxlan) netdevices on top of netdevices with instance locking enabled can run into issues: WARNING: CPU: 59 PID: 206496 at ./include/net/netdev_lock.h:17 netif_napi_add_weight_locked+0x753/0xa60 [...] Call Trace: mlx5e_open_channel+0xc09/0x3740 [mlx5_core] mlx5e_open_channels+0x1f0/0x770 [mlx5_core] mlx5e_safe_switch_params+0x1b5/0x2e0 [mlx5_core] set_feature_lro+0x1c2/0x330 [mlx5_core] mlx5e_handle_feature+0xc8/0x140 [mlx5_core] mlx5e_set_features+0x233/0x2e0 [mlx5_core] __netdev_update_features+0x5be/0x1670 __netdev_update_features+0x71f/0x1670 dev_ethtool+0x21c5/0x4aa0 dev_ioctl+0x438/0xae0 sock_ioctl+0x2ba/0x690 __x64_sys_ioctl+0xa78/0x1700 do_syscall_64+0x6d/0x140 entry_SYSCALL_64_after_hwframe+0x4b/0x53 Fixes: 7e4d784f5810 ("net: hold netdev instance lock during rtnetlink operations") Signed-off-by: Cosmin Ratiu Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250509072850.2002821-1-cratiu@nvidia.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 11da1e272ec205..0d891634c69270 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10441,6 +10441,7 @@ static void netdev_sync_lower_features(struct net_device *upper, if (!(features & feature) && (lower->features & feature)) { netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n", &feature, lower->name); + netdev_lock_ops(lower); lower->wanted_features &= ~feature; __netdev_update_features(lower); @@ -10449,6 +10450,7 @@ static void netdev_sync_lower_features(struct net_device *upper, &feature, lower->name); else netdev_features_change(lower); + netdev_unlock_ops(lower); } } } From 498625a8ab2c8e1c9ab5105744310e8d6952cc01 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 9 May 2025 14:38:16 +0300 Subject: [PATCH 1726/2924] net: dsa: sja1105: discard incoming frames in BR_STATE_LISTENING It has been reported that when under a bridge with stp_state=1, the logs get spammed with this message: [ 251.734607] fsl_dpaa2_eth dpni.5 eth0: Couldn't decode source port Further debugging shows the following info associated with packets: source_port=-1, switch_id=-1, vid=-1, vbid=1 In other words, they are data plane packets which are supposed to be decoded by dsa_tag_8021q_find_port_by_vbid(), but the latter (correctly) refuses to do so, because no switch port is currently in BR_STATE_LEARNING or BR_STATE_FORWARDING - so the packet is effectively unexpected. The error goes away after the port progresses to BR_STATE_LEARNING in 15 seconds (the default forward_time of the bridge), because then, dsa_tag_8021q_find_port_by_vbid() can correctly associate the data plane packets with a plausible bridge port in a plausible STP state. Re-reading IEEE 802.1D-1990, I see the following: "4.4.2 Learning: (...) The Forwarding Process shall discard received frames." IEEE 802.1D-2004 further clarifies: "DISABLED, BLOCKING, LISTENING, and BROKEN all correspond to the DISCARDING port state. While those dot1dStpPortStates serve to distinguish reasons for discarding frames, the operation of the Forwarding and Learning processes is the same for all of them. (...) LISTENING represents a port that the spanning tree algorithm has selected to be part of the active topology (computing a Root Port or Designated Port role) but is temporarily discarding frames to guard against loops or incorrect learning." Well, this is not what the driver does - instead it sets mac[port].ingress = true. To get rid of the log spam, prevent unexpected data plane packets to be received by software by discarding them on ingress in the LISTENING state. In terms of blame attribution: the prints only date back to commit d7f9787a763f ("net: dsa: tag_8021q: add support for imprecise RX based on the VBID"). However, the settings would permit a LISTENING port to forward to a FORWARDING port, and the standard suggests that's not OK. Fixes: 640f763f98c2 ("net: dsa: sja1105: Add support for Spanning Tree Protocol") Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250509113816.2221992-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/sja1105/sja1105_main.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index f8454f3b6f9c5d..f674c400f05b29 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -2081,6 +2081,7 @@ static void sja1105_bridge_stp_state_set(struct dsa_switch *ds, int port, switch (state) { case BR_STATE_DISABLED: case BR_STATE_BLOCKING: + case BR_STATE_LISTENING: /* From UM10944 description of DRPDTAG (why put this there?): * "Management traffic flows to the port regardless of the state * of the INGRESS flag". So BPDUs are still be allowed to pass. @@ -2090,11 +2091,6 @@ static void sja1105_bridge_stp_state_set(struct dsa_switch *ds, int port, mac[port].egress = false; mac[port].dyn_learn = false; break; - case BR_STATE_LISTENING: - mac[port].ingress = true; - mac[port].egress = false; - mac[port].dyn_learn = false; - break; case BR_STATE_LEARNING: mac[port].ingress = true; mac[port].egress = false; From e8007fad5457ea547ca63bb011fdb03213571c7e Mon Sep 17 00:00:00 2001 From: Steve Siwinski Date: Thu, 8 May 2025 16:01:22 -0400 Subject: [PATCH 1727/2924] scsi: sd_zbc: block: Respect bio vector limits for REPORT ZONES buffer The REPORT ZONES buffer size is currently limited by the HBA's maximum segment count to ensure the buffer can be mapped. However, the block layer further limits the number of iovec entries to 1024 when allocating a bio. To avoid allocation of buffers too large to be mapped, further restrict the maximum buffer size to BIO_MAX_INLINE_VECS. Replace the UIO_MAXIOV symbolic name with the more contextually appropriate BIO_MAX_INLINE_VECS. Fixes: b091ac616846 ("sd_zbc: Fix report zones buffer allocation") Cc: stable@vger.kernel.org Signed-off-by: Steve Siwinski Link: https://lore.kernel.org/r/20250508200122.243129-1-ssiwinski@atto.com Reviewed-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- block/bio.c | 2 +- drivers/scsi/sd_zbc.c | 6 +++++- include/linux/bio.h | 1 + 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/block/bio.c b/block/bio.c index 4e6c85a33d74db..4be592d37fb666 100644 --- a/block/bio.c +++ b/block/bio.c @@ -611,7 +611,7 @@ struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask) { struct bio *bio; - if (nr_vecs > UIO_MAXIOV) + if (nr_vecs > BIO_MAX_INLINE_VECS) return NULL; return kmalloc(struct_size(bio, bi_inline_vecs, nr_vecs), gfp_mask); } diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 7a447ff600d276..a8db66428f80d4 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -169,6 +169,7 @@ static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp, unsigned int nr_zones, size_t *buflen) { struct request_queue *q = sdkp->disk->queue; + unsigned int max_segments; size_t bufsize; void *buf; @@ -180,12 +181,15 @@ static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp, * Furthermore, since the report zone command cannot be split, make * sure that the allocated buffer can always be mapped by limiting the * number of pages allocated to the HBA max segments limit. + * Since max segments can be larger than the max inline bio vectors, + * further limit the allocated buffer to BIO_MAX_INLINE_VECS. */ nr_zones = min(nr_zones, sdkp->zone_info.nr_zones); bufsize = roundup((nr_zones + 1) * 64, SECTOR_SIZE); bufsize = min_t(size_t, bufsize, queue_max_hw_sectors(q) << SECTOR_SHIFT); - bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT); + max_segments = min(BIO_MAX_INLINE_VECS, queue_max_segments(q)); + bufsize = min_t(size_t, bufsize, max_segments << PAGE_SHIFT); while (bufsize >= SECTOR_SIZE) { buf = kvzalloc(bufsize, GFP_KERNEL | __GFP_NORETRY); diff --git a/include/linux/bio.h b/include/linux/bio.h index cafc7c215de8be..b786ec5bcc81d0 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -11,6 +11,7 @@ #include #define BIO_MAX_VECS 256U +#define BIO_MAX_INLINE_VECS UIO_MAXIOV struct queue_limits; From 40696426b8c8c4f13cf6ac52f0470eed144be4b2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 6 May 2025 20:35:40 -0700 Subject: [PATCH 1728/2924] nvme-pci: make nvme_pci_npages_prp() __always_inline The only reason nvme_pci_npages_prp() could be used as a compile-time known result in BUILD_BUG_ON() is because the compiler was always choosing to inline the function. Under special circumstances (sanitizer coverage functions disabled for __init functions on ARCH=um), the compiler decided to stop inlining it: drivers/nvme/host/pci.c: In function 'nvme_init': include/linux/compiler_types.h:557:45: error: call to '__compiletime_assert_678' declared with attribute error: BUILD_BUG_ON failed: nvme_pci_npages_prp() > NVME_MAX_NR_ALLOCATIONS 557 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) | ^ include/linux/compiler_types.h:538:25: note: in definition of macro '__compiletime_assert' 538 | prefix ## suffix(); \ | ^~~~~~ include/linux/compiler_types.h:557:9: note: in expansion of macro '_compiletime_assert' 557 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) | ^~~~~~~~~~~~~~~~~~~ include/linux/build_bug.h:39:37: note: in expansion of macro 'compiletime_assert' 39 | #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg) | ^~~~~~~~~~~~~~~~~~ include/linux/build_bug.h:50:9: note: in expansion of macro 'BUILD_BUG_ON_MSG' 50 | BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition) | ^~~~~~~~~~~~~~~~ drivers/nvme/host/pci.c:3804:9: note: in expansion of macro 'BUILD_BUG_ON' 3804 | BUILD_BUG_ON(nvme_pci_npages_prp() > NVME_MAX_NR_ALLOCATIONS); | ^~~~~~~~~~~~ Force it to be __always_inline to make sure it is always available for use with BUILD_BUG_ON(). Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202505061846.12FMyRjj-lkp@intel.com/ Fixes: c372cdd1efdf ("nvme-pci: iod npages fits in s8") Signed-off-by: Kees Cook Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 2e30e9be7408cb..de084ece213611 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -390,7 +390,7 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, __le32 *dbbuf_db, * as it only leads to a small amount of wasted memory for the lifetime of * the I/O. */ -static int nvme_pci_npages_prp(void) +static __always_inline int nvme_pci_npages_prp(void) { unsigned max_bytes = (NVME_MAX_KB_SZ * 1024) + NVME_CTRL_PAGE_SIZE; unsigned nprps = DIV_ROUND_UP(max_bytes, NVME_CTRL_PAGE_SIZE); From 3d8932133dcecbd9bef1559533c1089601006f45 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 8 May 2025 16:57:06 +0200 Subject: [PATCH 1729/2924] nvme-pci: acquire cq_poll_lock in nvme_poll_irqdisable We need to lock this queue for that condition because the timeout work executes per-namespace and can poll the poll CQ. Reported-by: Hannes Reinecke Closes: https://lore.kernel.org/all/20240902130728.1999-1-hare@kernel.org/ Fixes: a0fa9647a54e ("NVMe: add blk polling support") Signed-off-by: Keith Busch Signed-off-by: Daniel Wagner Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index de084ece213611..a9390ac7211ea7 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1202,7 +1202,9 @@ static void nvme_poll_irqdisable(struct nvme_queue *nvmeq) WARN_ON_ONCE(test_bit(NVMEQ_POLLED, &nvmeq->flags)); disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); + spin_lock(&nvmeq->cq_poll_lock); nvme_poll_cq(nvmeq, NULL); + spin_unlock(&nvmeq->cq_poll_lock); enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); } From 85adf2094abb9084770dc4ab302aaa9c5d26dd2d Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 9 May 2025 08:25:00 +0900 Subject: [PATCH 1730/2924] nvmet: pci-epf: clear completion queue IRQ flag on delete The function nvmet_pci_epf_delete_cq() unconditionally calls nvmet_pci_epf_remove_irq_vector() even for completion queues that do not have interrupts enabled. Furthermore, for completion queues that do have IRQ enabled, deleting and re-creating the completion queue leaves the flag NVMET_PCI_EPF_Q_IRQ_ENABLED set, even if the completion queue is being re-created with IRQ disabled. Fix these issues by calling nvmet_pci_epf_remove_irq_vector() only if NVMET_PCI_EPF_Q_IRQ_ENABLED is set and make sure to always clear that flag. Fixes: 0faa0fe6f90e ("nvmet: New NVMe PCI endpoint function target driver") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Christoph Hellwig --- drivers/nvme/target/pci-epf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c index 7fab7f3d79b743..d5442991f2fbd4 100644 --- a/drivers/nvme/target/pci-epf.c +++ b/drivers/nvme/target/pci-epf.c @@ -1344,7 +1344,8 @@ static u16 nvmet_pci_epf_delete_cq(struct nvmet_ctrl *tctrl, u16 cqid) cancel_delayed_work_sync(&cq->work); nvmet_pci_epf_drain_queue(cq); - nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector); + if (test_and_clear_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags)) + nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector); nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &cq->pci_map); return NVME_SC_SUCCESS; From 2c3a6f6a28051f323baf19b48af86e48b812831d Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 9 May 2025 08:25:01 +0900 Subject: [PATCH 1731/2924] nvmet: pci-epf: do not fall back to using INTX if not supported Some endpoint PCIe controllers do not support raising legacy INTX interrupts. This support is indicated by the intx_capable field of struct pci_epc_features. Modify nvmet_pci_epf_raise_irq() to not automatically fallback to trying raising an INTX interrupt after an MSI or MSI-X error if the controller does not support INTX. Signed-off-by: Damien Le Moal Signed-off-by: Christoph Hellwig --- drivers/nvme/target/pci-epf.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c index d5442991f2fbd4..bde7818c673d8c 100644 --- a/drivers/nvme/target/pci-epf.c +++ b/drivers/nvme/target/pci-epf.c @@ -636,14 +636,16 @@ static void nvmet_pci_epf_raise_irq(struct nvmet_pci_epf_ctrl *ctrl, switch (nvme_epf->irq_type) { case PCI_IRQ_MSIX: case PCI_IRQ_MSI: + /* + * If we fail to raise an MSI or MSI-X interrupt, it is likely + * because the host is using legacy INTX IRQs (e.g. BIOS, + * grub), but we can fallback to the INTX type only if the + * endpoint controller supports this type. + */ ret = pci_epc_raise_irq(epf->epc, epf->func_no, epf->vfunc_no, nvme_epf->irq_type, cq->vector + 1); - if (!ret) + if (!ret || !nvme_epf->epc_features->intx_capable) break; - /* - * If we got an error, it is likely because the host is using - * legacy IRQs (e.g. BIOS, grub). - */ fallthrough; case PCI_IRQ_INTX: ret = pci_epc_raise_irq(epf->epc, epf->func_no, epf->vfunc_no, From 4236e600bf902202214aa6277e84c4738c56f762 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 9 May 2025 08:25:02 +0900 Subject: [PATCH 1732/2924] nvmet: pci-epf: cleanup nvmet_pci_epf_raise_irq() There is no point in taking the controller irq_lock and calling nvmet_pci_epf_should_raise_irq() for a completion queue which does not have IRQ enabled (NVMET_PCI_EPF_Q_IRQ_ENABLED flag is not set). Move the test for the NVMET_PCI_EPF_Q_IRQ_ENABLED flag out of nvmet_pci_epf_should_raise_irq() to the top of nvmet_pci_epf_raise_irq() to return early when no IRQ should be raised. Also, use dev_err_ratelimited() to avoid a message storm under load when raising IRQs is failing. Signed-off-by: Damien Le Moal Reviewed-by: Niklas Cassel Signed-off-by: Christoph Hellwig --- drivers/nvme/target/pci-epf.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c index bde7818c673d8c..a6ccf3fcccc239 100644 --- a/drivers/nvme/target/pci-epf.c +++ b/drivers/nvme/target/pci-epf.c @@ -596,9 +596,6 @@ static bool nvmet_pci_epf_should_raise_irq(struct nvmet_pci_epf_ctrl *ctrl, struct nvmet_pci_epf_irq_vector *iv = cq->iv; bool ret; - if (!test_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags)) - return false; - /* IRQ coalescing for the admin queue is not allowed. */ if (!cq->qid) return true; @@ -625,7 +622,8 @@ static void nvmet_pci_epf_raise_irq(struct nvmet_pci_epf_ctrl *ctrl, struct pci_epf *epf = nvme_epf->epf; int ret = 0; - if (!test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags)) + if (!test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags) || + !test_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags)) return; mutex_lock(&ctrl->irq_lock); @@ -658,7 +656,9 @@ static void nvmet_pci_epf_raise_irq(struct nvmet_pci_epf_ctrl *ctrl, } if (ret) - dev_err(ctrl->dev, "Failed to raise IRQ (err=%d)\n", ret); + dev_err_ratelimited(ctrl->dev, + "CQ[%u]: Failed to raise IRQ (err=%d)\n", + cq->qid, ret); unlock: mutex_unlock(&ctrl->irq_lock); From 4f6f3f4fe31695cd69874457573a2236c05fef02 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 9 May 2025 08:25:03 +0900 Subject: [PATCH 1733/2924] nvmet: pci-epf: improve debug message Improve the debug message of nvmet_pci_epf_create_cq() to indicate if a completion queue IRQ is disabled. Signed-off-by: Damien Le Moal Reviewed-by: Niklas Cassel Signed-off-by: Christoph Hellwig --- drivers/nvme/target/pci-epf.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c index a6ccf3fcccc239..94a908b2340ee7 100644 --- a/drivers/nvme/target/pci-epf.c +++ b/drivers/nvme/target/pci-epf.c @@ -1321,8 +1321,14 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl, set_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags); - dev_dbg(ctrl->dev, "CQ[%u]: %u entries of %zu B, IRQ vector %u\n", - cqid, qsize, cq->qes, cq->vector); + if (test_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags)) + dev_dbg(ctrl->dev, + "CQ[%u]: %u entries of %zu B, IRQ vector %u\n", + cqid, qsize, cq->qes, cq->vector); + else + dev_dbg(ctrl->dev, + "CQ[%u]: %u entries of %zu B, IRQ disabled\n", + cqid, qsize, cq->qes); return NVME_SC_SUCCESS; From 8113d610a79885db5d53b5e01138ed4159e05ce9 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 9 May 2025 08:25:04 +0900 Subject: [PATCH 1734/2924] nvmet: pci-epf: remove NVMET_PCI_EPF_Q_IS_SQ The flag NVMET_PCI_EPF_Q_IS_SQ is set but never used. Remove it. Signed-off-by: Damien Le Moal Signed-off-by: Christoph Hellwig --- drivers/nvme/target/pci-epf.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c index 94a908b2340ee7..7123c855b5a67c 100644 --- a/drivers/nvme/target/pci-epf.c +++ b/drivers/nvme/target/pci-epf.c @@ -62,8 +62,7 @@ static DEFINE_MUTEX(nvmet_pci_epf_ports_mutex); #define NVMET_PCI_EPF_CQ_RETRY_INTERVAL msecs_to_jiffies(1) enum nvmet_pci_epf_queue_flags { - NVMET_PCI_EPF_Q_IS_SQ = 0, /* The queue is a submission queue */ - NVMET_PCI_EPF_Q_LIVE, /* The queue is live */ + NVMET_PCI_EPF_Q_LIVE = 0, /* The queue is live */ NVMET_PCI_EPF_Q_IRQ_ENABLED, /* IRQ is enabled for this queue */ }; @@ -1542,7 +1541,6 @@ static void nvmet_pci_epf_init_queue(struct nvmet_pci_epf_ctrl *ctrl, if (sq) { queue = &ctrl->sq[qid]; - set_bit(NVMET_PCI_EPF_Q_IS_SQ, &queue->flags); } else { queue = &ctrl->cq[qid]; INIT_DELAYED_WORK(&queue->work, nvmet_pci_epf_cq_work); From 76ca05e0abe31a4f47a5b5a85041b5a22c03baf8 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Sun, 4 May 2025 10:14:33 +0200 Subject: [PATCH 1735/2924] net: dsa: microchip: let phylink manage PHY EEE configuration on KSZ switches Phylink expects MAC drivers to provide LPI callbacks to properly manage Energy Efficient Ethernet (EEE) configuration. On KSZ switches with integrated PHYs, LPI is internally handled by hardware, while ports without integrated PHYs have no documented MAC-level LPI support. Provide dummy mac_disable_tx_lpi() and mac_enable_tx_lpi() callbacks to satisfy phylink requirements. Also, set default EEE capabilities during phylink initialization where applicable. Since phylink can now gracefully handle optional EEE configuration, remove the need for the MICREL_NO_EEE PHY flag. This change addresses issues caused by incomplete EEE refactoring introduced in commit fe0d4fd9285e ("net: phy: Keep track of EEE configuration"). It is not easily possible to fix all older kernels, but this patch ensures proper behavior on latest kernels and can be considered for backporting to stable kernels starting from v6.14. Fixes: fe0d4fd9285e ("net: phy: Keep track of EEE configuration") Signed-off-by: Oleksij Rempel Cc: stable@vger.kernel.org # v6.14+ Link: https://patch.msgid.link/20250504081434.424489-2-o.rempel@pengutronix.de Signed-off-by: Paolo Abeni --- drivers/net/dsa/microchip/ksz_common.c | 135 ++++++++++++++++++++----- 1 file changed, 107 insertions(+), 28 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 89f0796894af66..f95a9aac56ee1b 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -265,16 +265,70 @@ static void ksz_phylink_mac_link_down(struct phylink_config *config, unsigned int mode, phy_interface_t interface); +/** + * ksz_phylink_mac_disable_tx_lpi() - Callback to signal LPI support (Dummy) + * @config: phylink config structure + * + * This function is a dummy handler. See ksz_phylink_mac_enable_tx_lpi() for + * a detailed explanation of EEE/LPI handling in KSZ switches. + */ +static void ksz_phylink_mac_disable_tx_lpi(struct phylink_config *config) +{ +} + +/** + * ksz_phylink_mac_enable_tx_lpi() - Callback to signal LPI support (Dummy) + * @config: phylink config structure + * @timer: timer value before entering LPI (unused) + * @tx_clock_stop: whether to stop the TX clock in LPI mode (unused) + * + * This function signals to phylink that the driver architecture supports + * LPI management, enabling phylink to control EEE advertisement during + * negotiation according to IEEE Std 802.3 (Clause 78). + * + * Hardware Management of EEE/LPI State: + * For KSZ switch ports with integrated PHYs (e.g., KSZ9893R ports 1-2), + * observation and testing suggest that the actual EEE / Low Power Idle (LPI) + * state transitions are managed autonomously by the hardware based on + * the auto-negotiation results. (Note: While the datasheet describes EEE + * operation based on negotiation, it doesn't explicitly detail the internal + * MAC/PHY interaction, so autonomous hardware management of the MAC state + * for LPI is inferred from observed behavior). + * This hardware control, consistent with the switch's ability to operate + * autonomously via strapping, means MAC-level software intervention is not + * required or exposed for managing the LPI state once EEE is negotiated. + * (Ref: KSZ9893R Data Sheet DS00002420D, primarily Section 4.7.5 explaining + * EEE, also Sections 4.1.7 on Auto-Negotiation and 3.2.1 on Configuration + * Straps). + * + * Additionally, ports configured as MAC interfaces (e.g., KSZ9893R port 3) + * lack documented MAC-level LPI control. + * + * Therefore, this callback performs no action and serves primarily to inform + * phylink of LPI awareness and to document the inferred hardware behavior. + * + * Returns: 0 (Always success) + */ +static int ksz_phylink_mac_enable_tx_lpi(struct phylink_config *config, + u32 timer, bool tx_clock_stop) +{ + return 0; +} + static const struct phylink_mac_ops ksz88x3_phylink_mac_ops = { .mac_config = ksz88x3_phylink_mac_config, .mac_link_down = ksz_phylink_mac_link_down, .mac_link_up = ksz8_phylink_mac_link_up, + .mac_disable_tx_lpi = ksz_phylink_mac_disable_tx_lpi, + .mac_enable_tx_lpi = ksz_phylink_mac_enable_tx_lpi, }; static const struct phylink_mac_ops ksz8_phylink_mac_ops = { .mac_config = ksz_phylink_mac_config, .mac_link_down = ksz_phylink_mac_link_down, .mac_link_up = ksz8_phylink_mac_link_up, + .mac_disable_tx_lpi = ksz_phylink_mac_disable_tx_lpi, + .mac_enable_tx_lpi = ksz_phylink_mac_enable_tx_lpi, }; static const struct ksz_dev_ops ksz88xx_dev_ops = { @@ -358,6 +412,8 @@ static const struct phylink_mac_ops ksz9477_phylink_mac_ops = { .mac_config = ksz_phylink_mac_config, .mac_link_down = ksz_phylink_mac_link_down, .mac_link_up = ksz9477_phylink_mac_link_up, + .mac_disable_tx_lpi = ksz_phylink_mac_disable_tx_lpi, + .mac_enable_tx_lpi = ksz_phylink_mac_enable_tx_lpi, }; static const struct ksz_dev_ops ksz9477_dev_ops = { @@ -401,6 +457,8 @@ static const struct phylink_mac_ops lan937x_phylink_mac_ops = { .mac_config = ksz_phylink_mac_config, .mac_link_down = ksz_phylink_mac_link_down, .mac_link_up = ksz9477_phylink_mac_link_up, + .mac_disable_tx_lpi = ksz_phylink_mac_disable_tx_lpi, + .mac_enable_tx_lpi = ksz_phylink_mac_enable_tx_lpi, }; static const struct ksz_dev_ops lan937x_dev_ops = { @@ -2016,6 +2074,18 @@ static void ksz_phylink_get_caps(struct dsa_switch *ds, int port, if (dev->dev_ops->get_caps) dev->dev_ops->get_caps(dev, port, config); + + if (ds->ops->support_eee && ds->ops->support_eee(ds, port)) { + memcpy(config->lpi_interfaces, config->supported_interfaces, + sizeof(config->lpi_interfaces)); + + config->lpi_capabilities = MAC_100FD; + if (dev->info->gbit_capable[port]) + config->lpi_capabilities |= MAC_1000FD; + + /* EEE is fully operational */ + config->eee_enabled_default = true; + } } void ksz_r_mib_stats64(struct ksz_device *dev, int port) @@ -3008,31 +3078,6 @@ static u32 ksz_get_phy_flags(struct dsa_switch *ds, int port) if (!port) return MICREL_KSZ8_P1_ERRATA; break; - case KSZ8567_CHIP_ID: - /* KSZ8567R Errata DS80000752C Module 4 */ - case KSZ8765_CHIP_ID: - case KSZ8794_CHIP_ID: - case KSZ8795_CHIP_ID: - /* KSZ879x/KSZ877x/KSZ876x Errata DS80000687C Module 2 */ - case KSZ9477_CHIP_ID: - /* KSZ9477S Errata DS80000754A Module 4 */ - case KSZ9567_CHIP_ID: - /* KSZ9567S Errata DS80000756A Module 4 */ - case KSZ9896_CHIP_ID: - /* KSZ9896C Errata DS80000757A Module 3 */ - case KSZ9897_CHIP_ID: - case LAN9646_CHIP_ID: - /* KSZ9897R Errata DS80000758C Module 4 */ - /* Energy Efficient Ethernet (EEE) feature select must be manually disabled - * The EEE feature is enabled by default, but it is not fully - * operational. It must be manually disabled through register - * controls. If not disabled, the PHY ports can auto-negotiate - * to enable EEE, and this feature can cause link drops when - * linked to another device supporting EEE. - * - * The same item appears in the errata for all switches above. - */ - return MICREL_NO_EEE; } return 0; @@ -3466,6 +3511,20 @@ static int ksz_max_mtu(struct dsa_switch *ds, int port) return -EOPNOTSUPP; } +/** + * ksz_support_eee - Determine Energy Efficient Ethernet (EEE) support for a + * port + * @ds: Pointer to the DSA switch structure + * @port: Port number to check + * + * This function also documents devices where EEE was initially advertised but + * later withdrawn due to reliability issues, as described in official errata + * documents. These devices are explicitly listed to record known limitations, + * even if there is no technical necessity for runtime checks. + * + * Returns: true if the internal PHY on the given port supports fully + * operational EEE, false otherwise. + */ static bool ksz_support_eee(struct dsa_switch *ds, int port) { struct ksz_device *dev = ds->priv; @@ -3475,15 +3534,35 @@ static bool ksz_support_eee(struct dsa_switch *ds, int port) switch (dev->chip_id) { case KSZ8563_CHIP_ID: + case KSZ9563_CHIP_ID: + case KSZ9893_CHIP_ID: + return true; case KSZ8567_CHIP_ID: + /* KSZ8567R Errata DS80000752C Module 4 */ + case KSZ8765_CHIP_ID: + case KSZ8794_CHIP_ID: + case KSZ8795_CHIP_ID: + /* KSZ879x/KSZ877x/KSZ876x Errata DS80000687C Module 2 */ case KSZ9477_CHIP_ID: - case KSZ9563_CHIP_ID: + /* KSZ9477S Errata DS80000754A Module 4 */ case KSZ9567_CHIP_ID: - case KSZ9893_CHIP_ID: + /* KSZ9567S Errata DS80000756A Module 4 */ case KSZ9896_CHIP_ID: + /* KSZ9896C Errata DS80000757A Module 3 */ case KSZ9897_CHIP_ID: case LAN9646_CHIP_ID: - return true; + /* KSZ9897R Errata DS80000758C Module 4 */ + /* Energy Efficient Ethernet (EEE) feature select must be + * manually disabled + * The EEE feature is enabled by default, but it is not fully + * operational. It must be manually disabled through register + * controls. If not disabled, the PHY ports can auto-negotiate + * to enable EEE, and this feature can cause link drops when + * linked to another device supporting EEE. + * + * The same item appears in the errata for all switches above. + */ + break; } return false; From 8c619eb21b8e87ae95877e9cca9fcb0e3115776e Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Sun, 4 May 2025 10:14:34 +0200 Subject: [PATCH 1736/2924] net: phy: micrel: remove KSZ9477 EEE quirks now handled by phylink The KSZ9477 PHY driver contained workarounds for broken EEE capability advertisements by manually masking supported EEE modes and forcibly disabling EEE if MICREL_NO_EEE was set. With proper MAC-side EEE handling implemented via phylink, these quirks are no longer necessary. Remove MICREL_NO_EEE handling and the use of ksz9477_get_features(). This simplifies the PHY driver and avoids duplicated EEE management logic. Signed-off-by: Oleksij Rempel Cc: stable@vger.kernel.org # v6.14+ Link: https://patch.msgid.link/20250504081434.424489-3-o.rempel@pengutronix.de Signed-off-by: Paolo Abeni --- drivers/net/phy/micrel.c | 7 ------- include/linux/micrel_phy.h | 1 - 2 files changed, 8 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 24882d30f68589..e2c6569d8c45ca 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -2027,12 +2027,6 @@ static int ksz9477_config_init(struct phy_device *phydev) return err; } - /* According to KSZ9477 Errata DS80000754C (Module 4) all EEE modes - * in this switch shall be regarded as broken. - */ - if (phydev->dev_flags & MICREL_NO_EEE) - phy_disable_eee(phydev); - return kszphy_config_init(phydev); } @@ -5705,7 +5699,6 @@ static struct phy_driver ksphy_driver[] = { .handle_interrupt = kszphy_handle_interrupt, .suspend = genphy_suspend, .resume = ksz9477_resume, - .get_features = ksz9477_get_features, } }; module_phy_driver(ksphy_driver); diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 591bf5b5e8dc22..9af01bdd86d26d 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -44,7 +44,6 @@ #define MICREL_PHY_50MHZ_CLK BIT(0) #define MICREL_PHY_FXEN BIT(1) #define MICREL_KSZ8_P1_ERRATA BIT(2) -#define MICREL_NO_EEE BIT(3) #define MICREL_KSZ9021_EXTREG_CTRL 0xB #define MICREL_KSZ9021_EXTREG_DATA_WRITE 0xC From 2b24eb060c2bb9ef79e1d3bcf633ba1bc95215d6 Mon Sep 17 00:00:00 2001 From: Christian Heusel Date: Mon, 12 May 2025 22:23:37 +0200 Subject: [PATCH 1737/2924] ALSA: usb-audio: Add sample rate quirk for Audioengine D1 A user reported on the Arch Linux Forums that their device is emitting the following message in the kernel journal, which is fixed by adding the quirk as submitted in this patch: > kernel: usb 1-2: current rate 8436480 is different from the runtime rate 48000 There also is an entry for this product line added long time ago. Their specific device has the following ID: $ lsusb | grep Audio Bus 001 Device 002: ID 1101:0003 EasyPass Industrial Co., Ltd Audioengine D1 Link: https://bbs.archlinux.org/viewtopic.php?id=305494 Fixes: 93f9d1a4ac593 ("ALSA: usb-audio: Apply sample rate quirk for Audioengine D1") Cc: stable@vger.kernel.org Signed-off-by: Christian Heusel Link: https://patch.msgid.link/20250512-audioengine-quirk-addition-v1-1-4c370af6eff7@heusel.eu Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 9112313a9dbc00..eb192834db68ca 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2250,6 +2250,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_FIXED_RATE), DEVICE_FLG(0x0fd9, 0x0008, /* Hauppauge HVR-950Q */ QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), + DEVICE_FLG(0x1101, 0x0003, /* Audioengine D1 */ + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x1224, 0x2a25, /* Jieli Technology USB PHY 2.0 */ QUIRK_FLAG_GET_SAMPLE_RATE | QUIRK_FLAG_MIC_RES_16), DEVICE_FLG(0x1395, 0x740a, /* Sennheiser DECT */ From 66e48ef6ef506c89ec1b3851c6f9f5f80b5835ff Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 13 May 2025 09:31:04 +0200 Subject: [PATCH 1738/2924] ALSA: sh: SND_AICA should depend on SH_DMA_API If CONFIG_SH_DMA_API=n: WARNING: unmet direct dependencies detected for G2_DMA Depends on [n]: SH_DREAMCAST [=y] && SH_DMA_API [=n] Selected by [y]: - SND_AICA [=y] && SOUND [=y] && SND [=y] && SND_SUPERH [=y] && SH_DREAMCAST [=y] SND_AICA selects G2_DMA. As the latter depends on SH_DMA_API, the former should depend on SH_DMA_API, too. Fixes: f477a538c14d07f8 ("sh: dma: fix kconfig dependency for G2_DMA") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202505131320.PzgTtl9H-lkp@intel.com/ Signed-off-by: Geert Uytterhoeven Link: https://patch.msgid.link/b90625f8a9078d0d304bafe862cbe3a3fab40082.1747121335.git.geert+renesas@glider.be Signed-off-by: Takashi Iwai --- sound/sh/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/sh/Kconfig b/sound/sh/Kconfig index b75fbb3236a7b9..f5fa09d740b4c9 100644 --- a/sound/sh/Kconfig +++ b/sound/sh/Kconfig @@ -14,7 +14,7 @@ if SND_SUPERH config SND_AICA tristate "Dreamcast Yamaha AICA sound" - depends on SH_DREAMCAST + depends on SH_DREAMCAST && SH_DMA_API select SND_PCM select G2_DMA help From 3e38f946062b4845961ab86b726651b4457b2af8 Mon Sep 17 00:00:00 2001 From: Emanuele Ghidoli Date: Mon, 12 May 2025 11:54:41 +0200 Subject: [PATCH 1739/2924] gpio: pca953x: fix IRQ storm on system wake up If an input changes state during wake-up and is used as an interrupt source, the IRQ handler reads the volatile input register to clear the interrupt mask and deassert the IRQ line. However, the IRQ handler is triggered before access to the register is granted, causing the read operation to fail. As a result, the IRQ handler enters a loop, repeatedly printing the "failed reading register" message, until `pca953x_resume()` is eventually called, which restores the driver context and enables access to registers. Fix by disabling the IRQ line before entering suspend mode, and re-enabling it after the driver context is restored in `pca953x_resume()`. An IRQ can be disabled with disable_irq() and still wake the system as long as the IRQ has wake enabled, so the wake-up functionality is preserved. Fixes: b76574300504 ("gpio: pca953x: Restore registers after suspend/resume cycle") Cc: stable@vger.kernel.org Signed-off-by: Emanuele Ghidoli Signed-off-by: Francesco Dolcini Reviewed-by: Andy Shevchenko Tested-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20250512095441.31645-1-francesco@dolcini.it Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pca953x.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 442435ded020ac..13cc120cf11f14 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -1204,6 +1204,8 @@ static int pca953x_restore_context(struct pca953x_chip *chip) guard(mutex)(&chip->i2c_lock); + if (chip->client->irq > 0) + enable_irq(chip->client->irq); regcache_cache_only(chip->regmap, false); regcache_mark_dirty(chip->regmap); ret = pca953x_regcache_sync(chip); @@ -1216,6 +1218,10 @@ static int pca953x_restore_context(struct pca953x_chip *chip) static void pca953x_save_context(struct pca953x_chip *chip) { guard(mutex)(&chip->i2c_lock); + + /* Disable IRQ to prevent early triggering while regmap "cache only" is on */ + if (chip->client->irq > 0) + disable_irq(chip->client->irq); regcache_cache_only(chip->regmap, true); } From 7118be7c6072f40391923543fdd1563b8d56377c Mon Sep 17 00:00:00 2001 From: Markus Burri Date: Fri, 9 May 2025 17:04:59 +0200 Subject: [PATCH 1740/2924] gpio: virtuser: fix potential out-of-bound write If the caller wrote more characters, count is truncated to the max available space in "simple_write_to_buffer". Check that the input size does not exceed the buffer size. Write a zero termination afterwards. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202505091754.285hHbr2-lkp@intel.com/ Signed-off-by: Markus Burri Link: https://lore.kernel.org/r/20250509150459.115489-1-markus.burri@mt.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-virtuser.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-virtuser.c b/drivers/gpio/gpio-virtuser.c index 13407fd4f0ebe8..eab6726953b411 100644 --- a/drivers/gpio/gpio-virtuser.c +++ b/drivers/gpio/gpio-virtuser.c @@ -401,10 +401,15 @@ static ssize_t gpio_virtuser_direction_do_write(struct file *file, char buf[32], *trimmed; int ret, dir, val = 0; - ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count); + if (count >= sizeof(buf)) + return -EINVAL; + + ret = simple_write_to_buffer(buf, sizeof(buf) - 1, ppos, user_buf, count); if (ret < 0) return ret; + buf[ret] = '\0'; + trimmed = strim(buf); if (strcmp(trimmed, "input") == 0) { @@ -623,12 +628,15 @@ static ssize_t gpio_virtuser_consumer_write(struct file *file, char buf[GPIO_VIRTUSER_NAME_BUF_LEN + 2]; int ret; + if (count >= sizeof(buf)) + return -EINVAL; + ret = simple_write_to_buffer(buf, GPIO_VIRTUSER_NAME_BUF_LEN, ppos, user_buf, count); if (ret < 0) return ret; - buf[strlen(buf) - 1] = '\0'; + buf[ret] = '\0'; ret = gpiod_set_consumer_name(data->ad.desc, buf); if (ret) From fd837de3c9cb1a162c69bc1fb1f438467fe7f2f5 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Sat, 10 May 2025 12:44:41 +0900 Subject: [PATCH 1741/2924] tracing: probes: Fix a possible race in trace_probe_log APIs Since the shared trace_probe_log variable can be accessed and modified via probe event create operation of kprobe_events, uprobe_events, and dynamic_events, it should be protected. In the dynamic_events, all operations are serialized by `dyn_event_ops_mutex`. But kprobe_events and uprobe_events interfaces are not serialized. To solve this issue, introduces dyn_event_create(), which runs create() operation under the mutex, for kprobe_events and uprobe_events. This also uses lockdep to check the mutex is held when using trace_probe_log* APIs. Link: https://lore.kernel.org/all/174684868120.551552.3068655787654268804.stgit@devnote2/ Reported-by: Paul Cacheux Closes: https://lore.kernel.org/all/20250510074456.805a16872b591e2971a4d221@kernel.org/ Fixes: ab105a4fb894 ("tracing: Use tracing error_log with probe events") Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace_dynevent.c | 16 +++++++++++++++- kernel/trace/trace_dynevent.h | 1 + kernel/trace/trace_kprobe.c | 2 +- kernel/trace/trace_probe.c | 9 +++++++++ kernel/trace/trace_uprobe.c | 2 +- 5 files changed, 27 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index a322e4f249a503..5d64a18cacacc6 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -16,7 +16,7 @@ #include "trace_output.h" /* for trace_event_sem */ #include "trace_dynevent.h" -static DEFINE_MUTEX(dyn_event_ops_mutex); +DEFINE_MUTEX(dyn_event_ops_mutex); static LIST_HEAD(dyn_event_ops_list); bool trace_event_dyn_try_get_ref(struct trace_event_call *dyn_call) @@ -116,6 +116,20 @@ int dyn_event_release(const char *raw_command, struct dyn_event_operations *type return ret; } +/* + * Locked version of event creation. The event creation must be protected by + * dyn_event_ops_mutex because of protecting trace_probe_log. + */ +int dyn_event_create(const char *raw_command, struct dyn_event_operations *type) +{ + int ret; + + mutex_lock(&dyn_event_ops_mutex); + ret = type->create(raw_command); + mutex_unlock(&dyn_event_ops_mutex); + return ret; +} + static int create_dyn_event(const char *raw_command) { struct dyn_event_operations *ops; diff --git a/kernel/trace/trace_dynevent.h b/kernel/trace/trace_dynevent.h index 936477a111d3e7..beee3f8d754444 100644 --- a/kernel/trace/trace_dynevent.h +++ b/kernel/trace/trace_dynevent.h @@ -100,6 +100,7 @@ void *dyn_event_seq_next(struct seq_file *m, void *v, loff_t *pos); void dyn_event_seq_stop(struct seq_file *m, void *v); int dyn_events_release_all(struct dyn_event_operations *type); int dyn_event_release(const char *raw_command, struct dyn_event_operations *type); +int dyn_event_create(const char *raw_command, struct dyn_event_operations *type); /* * for_each_dyn_event - iterate over the dyn_event list diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 2703b96d899064..3e5c47b6d7b290 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1089,7 +1089,7 @@ static int create_or_delete_trace_kprobe(const char *raw_command) if (raw_command[0] == '-') return dyn_event_release(raw_command, &trace_kprobe_ops); - ret = trace_kprobe_create(raw_command); + ret = dyn_event_create(raw_command, &trace_kprobe_ops); return ret == -ECANCELED ? -EINVAL : ret; } diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 2eeecb6c95eea5..424751cdf31f9f 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -154,9 +154,12 @@ static const struct fetch_type *find_fetch_type(const char *type, unsigned long } static struct trace_probe_log trace_probe_log; +extern struct mutex dyn_event_ops_mutex; void trace_probe_log_init(const char *subsystem, int argc, const char **argv) { + lockdep_assert_held(&dyn_event_ops_mutex); + trace_probe_log.subsystem = subsystem; trace_probe_log.argc = argc; trace_probe_log.argv = argv; @@ -165,11 +168,15 @@ void trace_probe_log_init(const char *subsystem, int argc, const char **argv) void trace_probe_log_clear(void) { + lockdep_assert_held(&dyn_event_ops_mutex); + memset(&trace_probe_log, 0, sizeof(trace_probe_log)); } void trace_probe_log_set_index(int index) { + lockdep_assert_held(&dyn_event_ops_mutex); + trace_probe_log.index = index; } @@ -178,6 +185,8 @@ void __trace_probe_log_err(int offset, int err_type) char *command, *p; int i, len = 0, pos = 0; + lockdep_assert_held(&dyn_event_ops_mutex); + if (!trace_probe_log.argv) return; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 3386439ec9f674..35cf76c75dd766 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -741,7 +741,7 @@ static int create_or_delete_trace_uprobe(const char *raw_command) if (raw_command[0] == '-') return dyn_event_release(raw_command, &trace_uprobe_ops); - ret = trace_uprobe_create(raw_command); + ret = dyn_event_create(raw_command, &trace_uprobe_ops); return ret == -ECANCELED ? -EINVAL : ret; } From 4227ea91e2657f7965e34313448e9d0a2b67712e Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Thu, 8 May 2025 11:14:24 +0200 Subject: [PATCH 1742/2924] net: dsa: b53: prevent standalone from trying to forward to other ports When bridged ports and standalone ports share a VLAN, e.g. via VLAN uppers, or untagged traffic with a vlan unaware bridge, the ASIC will still try to forward traffic to known FDB entries on standalone ports. But since the port VLAN masks prevent forwarding to bridged ports, this traffic will be dropped. This e.g. can be observed in the bridge_vlan_unaware ping tests, where this breaks pinging with learning on. Work around this by enabling the simplified EAP mode on switches supporting it for standalone ports, which causes the ASIC to redirect traffic of unknown source MAC addresses to the CPU port. Since standalone ports do not learn, there are no known source MAC addresses, so effectively this redirects all incoming traffic to the CPU port. Fixes: ff39c2d68679 ("net: dsa: b53: Add bridge support") Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20250508091424.26870-1-jonas.gorski@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/dsa/b53/b53_common.c | 33 ++++++++++++++++++++++++++++++++ drivers/net/dsa/b53/b53_regs.h | 14 ++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 9eb39cfa5fb275..7216eb8f949367 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -326,6 +326,26 @@ static void b53_get_vlan_entry(struct b53_device *dev, u16 vid, } } +static void b53_set_eap_mode(struct b53_device *dev, int port, int mode) +{ + u64 eap_conf; + + if (is5325(dev) || is5365(dev) || dev->chip_id == BCM5389_DEVICE_ID) + return; + + b53_read64(dev, B53_EAP_PAGE, B53_PORT_EAP_CONF(port), &eap_conf); + + if (is63xx(dev)) { + eap_conf &= ~EAP_MODE_MASK_63XX; + eap_conf |= (u64)mode << EAP_MODE_SHIFT_63XX; + } else { + eap_conf &= ~EAP_MODE_MASK; + eap_conf |= (u64)mode << EAP_MODE_SHIFT; + } + + b53_write64(dev, B53_EAP_PAGE, B53_PORT_EAP_CONF(port), eap_conf); +} + static void b53_set_forwarding(struct b53_device *dev, int enable) { u8 mgmt; @@ -586,6 +606,13 @@ int b53_setup_port(struct dsa_switch *ds, int port) b53_port_set_mcast_flood(dev, port, true); b53_port_set_learning(dev, port, false); + /* Force all traffic to go to the CPU port to prevent the ASIC from + * trying to forward to bridged ports on matching FDB entries, then + * dropping frames because it isn't allowed to forward there. + */ + if (dsa_is_user_port(ds, port)) + b53_set_eap_mode(dev, port, EAP_MODE_SIMPLIFIED); + return 0; } EXPORT_SYMBOL(b53_setup_port); @@ -2042,6 +2069,9 @@ int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, pvlan |= BIT(i); } + /* Disable redirection of unknown SA to the CPU port */ + b53_set_eap_mode(dev, port, EAP_MODE_BASIC); + /* Configure the local port VLAN control membership to include * remote ports and update the local port bitmask */ @@ -2077,6 +2107,9 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) pvlan &= ~BIT(i); } + /* Enable redirection of unknown SA to the CPU port */ + b53_set_eap_mode(dev, port, EAP_MODE_SIMPLIFIED); + b53_write16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(port), pvlan); dev->ports[port].vlan_ctl_mask = pvlan; diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index bfbcb66bef6626..5f7a0e5c5709d3 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -50,6 +50,9 @@ /* Jumbo Frame Registers */ #define B53_JUMBO_PAGE 0x40 +/* EAP Registers */ +#define B53_EAP_PAGE 0x42 + /* EEE Control Registers Page */ #define B53_EEE_PAGE 0x92 @@ -480,6 +483,17 @@ #define JMS_MIN_SIZE 1518 #define JMS_MAX_SIZE 9724 +/************************************************************************* + * EAP Page Registers + *************************************************************************/ +#define B53_PORT_EAP_CONF(i) (0x20 + 8 * (i)) +#define EAP_MODE_SHIFT 51 +#define EAP_MODE_SHIFT_63XX 50 +#define EAP_MODE_MASK (0x3ull << EAP_MODE_SHIFT) +#define EAP_MODE_MASK_63XX (0x3ull << EAP_MODE_SHIFT_63XX) +#define EAP_MODE_BASIC 0 +#define EAP_MODE_SIMPLIFIED 3 + /************************************************************************* * EEE Configuration Page Registers *************************************************************************/ From 578e1b96fad7402ff7e9c7648c8f1ad0225147c8 Mon Sep 17 00:00:00 2001 From: Rong Zhang Date: Mon, 12 May 2025 23:24:19 +0800 Subject: [PATCH 1743/2924] HID: bpf: abort dispatch if device destroyed The current HID bpf implementation assumes no output report/request will go through it after hid_bpf_destroy_device() has been called. This leads to a bug that unplugging certain types of HID devices causes a cleaned- up SRCU to be accessed. The bug was previously a hidden failure until a recent x86 percpu change [1] made it access not-present pages. The bug will be triggered if the conditions below are met: A) a device under the driver has some LEDs on B) hid_ll_driver->request() is uninplemented (e.g., logitech-djreceiver) If condition A is met, hidinput_led_worker() is always scheduled *after* hid_bpf_destroy_device(). hid_destroy_device ` hid_bpf_destroy_device ` cleanup_srcu_struct(&hdev->bpf.srcu) ` hid_remove_device ` ... ` led_classdev_unregister ` led_trigger_set(led_cdev, NULL) ` led_set_brightness(led_cdev, LED_OFF) ` ... ` input_inject_event ` input_event_dispose ` hidinput_input_event ` schedule_work(&hid->led_work) [hidinput_led_worker] This is fine when condition B is not met, where hidinput_led_worker() calls hid_ll_driver->request(). This is the case for most HID drivers, which implement it or use the generic one from usbhid. The driver itself or an underlying driver will then abort processing the request. Otherwise, hidinput_led_worker() tries hid_hw_output_report() and leads to the bug. hidinput_led_worker ` hid_hw_output_report ` dispatch_hid_bpf_output_report ` srcu_read_lock(&hdev->bpf.srcu) ` srcu_read_unlock(&hdev->bpf.srcu, idx) The bug has existed since the introduction [2] of dispatch_hid_bpf_output_report(). However, the same bug also exists in dispatch_hid_bpf_raw_requests(), and I've reproduced (no visible effect because of the lack of [1], but confirmed bpf.destroyed == 1) the bug against the commit (i.e., the Fixes:) introducing the function. This is because hidinput_led_worker() falls back to hid_hw_raw_request() when hid_ll_driver->output_report() is uninplemented (e.g., logitech- djreceiver). hidinput_led_worker ` hid_hw_output_report: -ENOSYS ` hid_hw_raw_request ` dispatch_hid_bpf_raw_requests ` srcu_read_lock(&hdev->bpf.srcu) ` srcu_read_unlock(&hdev->bpf.srcu, idx) Fix the issue by returning early in the two mentioned functions if hid_bpf has been marked as destroyed. Though dispatch_hid_bpf_device_event() handles input events, and there is no evidence that it may be called after the destruction, the same check, as a safety net, is also added to it to maintain the consistency among all dispatch functions. The impact of the bug on other architectures is unclear. Even if it acts as a hidden failure, this is still dangerous because it corrupts whatever is on the address calculated by SRCU. Thus, CC'ing the stable list. [1]: commit 9d7de2aa8b41 ("x86/percpu/64: Use relative percpu offsets") [2]: commit 9286675a2aed ("HID: bpf: add HID-BPF hooks for hid_hw_output_report") Closes: https://lore.kernel.org/all/20250506145548.GGaBoi9Jzp3aeJizTR@fat_crate.local/ Fixes: 8bd0488b5ea5 ("HID: bpf: add HID-BPF hooks for hid_hw_raw_requests") Cc: stable@vger.kernel.org Signed-off-by: Rong Zhang Tested-by: Petr Tesarik Link: https://patch.msgid.link/20250512152420.87441-1-i@rong.moe Signed-off-by: Benjamin Tissoires --- drivers/hid/bpf/hid_bpf_dispatch.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c index 2e96ec6a3073da..9a06f9b0e4ef33 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.c +++ b/drivers/hid/bpf/hid_bpf_dispatch.c @@ -38,6 +38,9 @@ dispatch_hid_bpf_device_event(struct hid_device *hdev, enum hid_report_type type struct hid_bpf_ops *e; int ret; + if (unlikely(hdev->bpf.destroyed)) + return ERR_PTR(-ENODEV); + if (type >= HID_REPORT_TYPES) return ERR_PTR(-EINVAL); @@ -93,6 +96,9 @@ int dispatch_hid_bpf_raw_requests(struct hid_device *hdev, struct hid_bpf_ops *e; int ret, idx; + if (unlikely(hdev->bpf.destroyed)) + return -ENODEV; + if (rtype >= HID_REPORT_TYPES) return -EINVAL; @@ -130,6 +136,9 @@ int dispatch_hid_bpf_output_report(struct hid_device *hdev, struct hid_bpf_ops *e; int ret, idx; + if (unlikely(hdev->bpf.destroyed)) + return -ENODEV; + idx = srcu_read_lock(&hdev->bpf.srcu); list_for_each_entry_srcu(e, &hdev->bpf.prog_list, list, srcu_read_lock_held(&hdev->bpf.srcu)) { From 4ca45af0a56d00b86285d6fdd720dca3215059a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matti=20Lehtim=C3=A4ki?= Date: Mon, 12 May 2025 02:40:15 +0300 Subject: [PATCH 1744/2924] remoteproc: qcom_wcnss: Fix on platforms without fallback regulators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recent change to handle platforms with only single power domain broke pronto-v3 which requires power domains and doesn't have fallback voltage regulators in case power domains are missing. Add a check to verify the number of fallback voltage regulators before using the code which handles single power domain situation. Fixes: 65991ea8a6d1 ("remoteproc: qcom_wcnss: Handle platforms with only single power domain") Signed-off-by: Matti Lehtimäki Tested-by: Luca Weiss # sdm632-fairphone-fp3 Link: https://lore.kernel.org/r/20250511234026.94735-1-matti.lehtimaki@gmail.com Signed-off-by: Bjorn Andersson --- drivers/remoteproc/qcom_wcnss.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/remoteproc/qcom_wcnss.c b/drivers/remoteproc/qcom_wcnss.c index 775b056d795a8b..2c7e519a2254ba 100644 --- a/drivers/remoteproc/qcom_wcnss.c +++ b/drivers/remoteproc/qcom_wcnss.c @@ -456,7 +456,8 @@ static int wcnss_init_regulators(struct qcom_wcnss *wcnss, if (wcnss->num_pds) { info += wcnss->num_pds; /* Handle single power domain case */ - num_vregs += num_pd_vregs - wcnss->num_pds; + if (wcnss->num_pds < num_pd_vregs) + num_vregs += num_pd_vregs - wcnss->num_pds; } else { num_vregs += num_pd_vregs; } From 98e6da673cc6dd46ca9a599802bd2c8f83606710 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Thu, 1 May 2025 13:06:31 +0100 Subject: [PATCH 1745/2924] clk: sunxi-ng: d1: Add missing divider for MMC mod clocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The D1/R528/T113 SoCs have a hidden divider of 2 in the MMC mod clocks, just as other recent SoCs. So far we did not describe that, which led to the resulting MMC clock rate to be only half of its intended value. Use a macro that allows to describe a fixed post-divider, to compensate for that divisor. This brings the MMC performance on those SoCs to its expected level, so about 23 MB/s for SD cards, instead of the 11 MB/s measured so far. Fixes: 35b97bb94111 ("clk: sunxi-ng: Add support for the D1 SoC clocks") Reported-by: Kuba Szczodrzyński Signed-off-by: Andre Przywara Link: https://patch.msgid.link/20250501120631.837186-1-andre.przywara@arm.com Signed-off-by: Chen-Yu Tsai --- drivers/clk/sunxi-ng/ccu-sun20i-d1.c | 44 ++++++++++++++++------------ drivers/clk/sunxi-ng/ccu_mp.h | 22 ++++++++++++++ 2 files changed, 47 insertions(+), 19 deletions(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun20i-d1.c b/drivers/clk/sunxi-ng/ccu-sun20i-d1.c index bb66c906ebbb62..e83d4fd40240fa 100644 --- a/drivers/clk/sunxi-ng/ccu-sun20i-d1.c +++ b/drivers/clk/sunxi-ng/ccu-sun20i-d1.c @@ -412,19 +412,23 @@ static const struct clk_parent_data mmc0_mmc1_parents[] = { { .hw = &pll_periph0_2x_clk.common.hw }, { .hw = &pll_audio1_div2_clk.common.hw }, }; -static SUNXI_CCU_MP_DATA_WITH_MUX_GATE(mmc0_clk, "mmc0", mmc0_mmc1_parents, 0x830, - 0, 4, /* M */ - 8, 2, /* P */ - 24, 3, /* mux */ - BIT(31), /* gate */ - 0); - -static SUNXI_CCU_MP_DATA_WITH_MUX_GATE(mmc1_clk, "mmc1", mmc0_mmc1_parents, 0x834, - 0, 4, /* M */ - 8, 2, /* P */ - 24, 3, /* mux */ - BIT(31), /* gate */ - 0); +static SUNXI_CCU_MP_DATA_WITH_MUX_GATE_POSTDIV(mmc0_clk, "mmc0", + mmc0_mmc1_parents, 0x830, + 0, 4, /* M */ + 8, 2, /* P */ + 24, 3, /* mux */ + BIT(31), /* gate */ + 2, /* post-div */ + 0); + +static SUNXI_CCU_MP_DATA_WITH_MUX_GATE_POSTDIV(mmc1_clk, "mmc1", + mmc0_mmc1_parents, 0x834, + 0, 4, /* M */ + 8, 2, /* P */ + 24, 3, /* mux */ + BIT(31), /* gate */ + 2, /* post-div */ + 0); static const struct clk_parent_data mmc2_parents[] = { { .fw_name = "hosc" }, @@ -433,12 +437,14 @@ static const struct clk_parent_data mmc2_parents[] = { { .hw = &pll_periph0_800M_clk.common.hw }, { .hw = &pll_audio1_div2_clk.common.hw }, }; -static SUNXI_CCU_MP_DATA_WITH_MUX_GATE(mmc2_clk, "mmc2", mmc2_parents, 0x838, - 0, 4, /* M */ - 8, 2, /* P */ - 24, 3, /* mux */ - BIT(31), /* gate */ - 0); +static SUNXI_CCU_MP_DATA_WITH_MUX_GATE_POSTDIV(mmc2_clk, "mmc2", mmc2_parents, + 0x838, + 0, 4, /* M */ + 8, 2, /* P */ + 24, 3, /* mux */ + BIT(31), /* gate */ + 2, /* post-div */ + 0); static SUNXI_CCU_GATE_HWS(bus_mmc0_clk, "bus-mmc0", psi_ahb_hws, 0x84c, BIT(0), 0); diff --git a/drivers/clk/sunxi-ng/ccu_mp.h b/drivers/clk/sunxi-ng/ccu_mp.h index 8fc7fdb7ef4941..bb09c649bfa356 100644 --- a/drivers/clk/sunxi-ng/ccu_mp.h +++ b/drivers/clk/sunxi-ng/ccu_mp.h @@ -52,6 +52,28 @@ struct ccu_mp { } \ } +#define SUNXI_CCU_MP_DATA_WITH_MUX_GATE_POSTDIV(_struct, _name, _parents, \ + _reg, \ + _mshift, _mwidth, \ + _pshift, _pwidth, \ + _muxshift, _muxwidth, \ + _gate, _postdiv, _flags)\ + struct ccu_mp _struct = { \ + .enable = _gate, \ + .m = _SUNXI_CCU_DIV(_mshift, _mwidth), \ + .p = _SUNXI_CCU_DIV(_pshift, _pwidth), \ + .mux = _SUNXI_CCU_MUX(_muxshift, _muxwidth), \ + .fixed_post_div = _postdiv, \ + .common = { \ + .reg = _reg, \ + .features = CCU_FEATURE_FIXED_POSTDIV, \ + .hw.init = CLK_HW_INIT_PARENTS_DATA(_name, \ + _parents, \ + &ccu_mp_ops, \ + _flags), \ + } \ + } + #define SUNXI_CCU_MP_WITH_MUX_GATE(_struct, _name, _parents, _reg, \ _mshift, _mwidth, \ _pshift, _pwidth, \ From d2062cc1b1c367d5d019f595ef860159e1301351 Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Mon, 28 Apr 2025 21:41:51 +0000 Subject: [PATCH 1746/2924] x86/sev: Do not touch VMSA pages during SNP guest memory kdump When kdump is running makedumpfile to generate vmcore and dump SNP guest memory it touches the VMSA page of the vCPU executing kdump. It then results in unrecoverable #NPF/RMP faults as the VMSA page is marked busy/in-use when the vCPU is running and subsequently a causes guest softlockup/hang. Additionally, other APs may be halted in guest mode and their VMSA pages are marked busy and touching these VMSA pages during guest memory dump will also cause #NPF. Issue AP_DESTROY GHCB calls on other APs to ensure they are kicked out of guest mode and then clear the VMSA bit on their VMSA pages. If the vCPU running kdump is an AP, mark it's VMSA page as offline to ensure that makedumpfile excludes that page while dumping guest memory. Fixes: 3074152e56c9 ("x86/sev: Convert shared memory back to private on kexec") Signed-off-by: Ashish Kalra Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Pankaj Gupta Reviewed-by: Tom Lendacky Tested-by: Srikanth Aithal Cc: stable@vger.kernel.org Link: https://lore.kernel.org/20250428214151.155464-1-Ashish.Kalra@amd.com --- arch/x86/coco/sev/core.c | 244 +++++++++++++++++++++++++-------------- 1 file changed, 158 insertions(+), 86 deletions(-) diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index b0c1a7a574974a..41060ba41b5c43 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -959,6 +959,102 @@ void snp_accept_memory(phys_addr_t start, phys_addr_t end) set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); } +static int vmgexit_ap_control(u64 event, struct sev_es_save_area *vmsa, u32 apic_id) +{ + bool create = event != SVM_VMGEXIT_AP_DESTROY; + struct ghcb_state state; + unsigned long flags; + struct ghcb *ghcb; + int ret = 0; + + local_irq_save(flags); + + ghcb = __sev_get_ghcb(&state); + + vc_ghcb_invalidate(ghcb); + + if (create) + ghcb_set_rax(ghcb, vmsa->sev_features); + + ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION); + ghcb_set_sw_exit_info_1(ghcb, + ((u64)apic_id << 32) | + ((u64)snp_vmpl << 16) | + event); + ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa)); + + sev_es_wr_ghcb_msr(__pa(ghcb)); + VMGEXIT(); + + if (!ghcb_sw_exit_info_1_is_valid(ghcb) || + lower_32_bits(ghcb->save.sw_exit_info_1)) { + pr_err("SNP AP %s error\n", (create ? "CREATE" : "DESTROY")); + ret = -EINVAL; + } + + __sev_put_ghcb(&state); + + local_irq_restore(flags); + + return ret; +} + +static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa) +{ + int ret; + + if (snp_vmpl) { + struct svsm_call call = {}; + unsigned long flags; + + local_irq_save(flags); + + call.caa = this_cpu_read(svsm_caa); + call.rcx = __pa(va); + + if (make_vmsa) { + /* Protocol 0, Call ID 2 */ + call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU); + call.rdx = __pa(caa); + call.r8 = apic_id; + } else { + /* Protocol 0, Call ID 3 */ + call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU); + } + + ret = svsm_perform_call_protocol(&call); + + local_irq_restore(flags); + } else { + /* + * If the kernel runs at VMPL0, it can change the VMSA + * bit for a page using the RMPADJUST instruction. + * However, for the instruction to succeed it must + * target the permissions of a lesser privileged (higher + * numbered) VMPL level, so use VMPL1. + */ + u64 attrs = 1; + + if (make_vmsa) + attrs |= RMPADJUST_VMSA_PAGE_BIT; + + ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs); + } + + return ret; +} + +static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id) +{ + int err; + + err = snp_set_vmsa(vmsa, NULL, apic_id, false); + if (err) + pr_err("clear VMSA page failed (%u), leaking page\n", err); + else + free_page((unsigned long)vmsa); +} + static void set_pte_enc(pte_t *kpte, int level, void *va) { struct pte_enc_desc d = { @@ -1055,6 +1151,65 @@ void snp_kexec_begin(void) pr_warn("Failed to stop shared<->private conversions\n"); } +/* + * Shutdown all APs except the one handling kexec/kdump and clearing + * the VMSA tag on AP's VMSA pages as they are not being used as + * VMSA page anymore. + */ +static void shutdown_all_aps(void) +{ + struct sev_es_save_area *vmsa; + int apic_id, this_cpu, cpu; + + this_cpu = get_cpu(); + + /* + * APs are already in HLT loop when enc_kexec_finish() callback + * is invoked. + */ + for_each_present_cpu(cpu) { + vmsa = per_cpu(sev_vmsa, cpu); + + /* + * The BSP or offlined APs do not have guest allocated VMSA + * and there is no need to clear the VMSA tag for this page. + */ + if (!vmsa) + continue; + + /* + * Cannot clear the VMSA tag for the currently running vCPU. + */ + if (this_cpu == cpu) { + unsigned long pa; + struct page *p; + + pa = __pa(vmsa); + /* + * Mark the VMSA page of the running vCPU as offline + * so that is excluded and not touched by makedumpfile + * while generating vmcore during kdump. + */ + p = pfn_to_online_page(pa >> PAGE_SHIFT); + if (p) + __SetPageOffline(p); + continue; + } + + apic_id = cpuid_to_apicid[cpu]; + + /* + * Issue AP destroy to ensure AP gets kicked out of guest mode + * to allow using RMPADJUST to remove the VMSA tag on it's + * VMSA page. + */ + vmgexit_ap_control(SVM_VMGEXIT_AP_DESTROY, vmsa, apic_id); + snp_cleanup_vmsa(vmsa, apic_id); + } + + put_cpu(); +} + void snp_kexec_finish(void) { struct sev_es_runtime_data *data; @@ -1069,6 +1224,8 @@ void snp_kexec_finish(void) if (!IS_ENABLED(CONFIG_KEXEC_CORE)) return; + shutdown_all_aps(); + unshare_all_memory(); /* @@ -1090,51 +1247,6 @@ void snp_kexec_finish(void) } } -static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa) -{ - int ret; - - if (snp_vmpl) { - struct svsm_call call = {}; - unsigned long flags; - - local_irq_save(flags); - - call.caa = this_cpu_read(svsm_caa); - call.rcx = __pa(va); - - if (make_vmsa) { - /* Protocol 0, Call ID 2 */ - call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU); - call.rdx = __pa(caa); - call.r8 = apic_id; - } else { - /* Protocol 0, Call ID 3 */ - call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU); - } - - ret = svsm_perform_call_protocol(&call); - - local_irq_restore(flags); - } else { - /* - * If the kernel runs at VMPL0, it can change the VMSA - * bit for a page using the RMPADJUST instruction. - * However, for the instruction to succeed it must - * target the permissions of a lesser privileged (higher - * numbered) VMPL level, so use VMPL1. - */ - u64 attrs = 1; - - if (make_vmsa) - attrs |= RMPADJUST_VMSA_PAGE_BIT; - - ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs); - } - - return ret; -} - #define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK) #define INIT_CS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK) #define INIT_DS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_WRITE_MASK) @@ -1166,24 +1278,10 @@ static void *snp_alloc_vmsa_page(int cpu) return page_address(p + 1); } -static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id) -{ - int err; - - err = snp_set_vmsa(vmsa, NULL, apic_id, false); - if (err) - pr_err("clear VMSA page failed (%u), leaking page\n", err); - else - free_page((unsigned long)vmsa); -} - static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip) { struct sev_es_save_area *cur_vmsa, *vmsa; - struct ghcb_state state; struct svsm_ca *caa; - unsigned long flags; - struct ghcb *ghcb; u8 sipi_vector; int cpu, ret; u64 cr4; @@ -1297,33 +1395,7 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip) } /* Issue VMGEXIT AP Creation NAE event */ - local_irq_save(flags); - - ghcb = __sev_get_ghcb(&state); - - vc_ghcb_invalidate(ghcb); - ghcb_set_rax(ghcb, vmsa->sev_features); - ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION); - ghcb_set_sw_exit_info_1(ghcb, - ((u64)apic_id << 32) | - ((u64)snp_vmpl << 16) | - SVM_VMGEXIT_AP_CREATE); - ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa)); - - sev_es_wr_ghcb_msr(__pa(ghcb)); - VMGEXIT(); - - if (!ghcb_sw_exit_info_1_is_valid(ghcb) || - lower_32_bits(ghcb->save.sw_exit_info_1)) { - pr_err("SNP AP Creation error\n"); - ret = -EINVAL; - } - - __sev_put_ghcb(&state); - - local_irq_restore(flags); - - /* Perform cleanup if there was an error */ + ret = vmgexit_ap_control(SVM_VMGEXIT_AP_CREATE, vmsa, apic_id); if (ret) { snp_cleanup_vmsa(vmsa, apic_id); vmsa = NULL; From 82b7f88f2316c5442708daeb0b5ec5aa54c8ff7f Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Tue, 6 May 2025 18:35:29 +0000 Subject: [PATCH 1747/2924] x86/sev: Make sure pages are not skipped during kdump When shared pages are being converted to private during kdump, additional checks are performed. They include handling the case of a GHCB page being contained within a huge page. Currently, this check incorrectly skips a page just below the GHCB page from being transitioned back to private during kdump preparation. This skipped page causes a 0x404 #VC exception when it is accessed later while dumping guest memory for vmcore generation. Correct the range to be checked for GHCB contained in a huge page. Also, ensure that the skipped huge page containing the GHCB page is transitioned back to private by applying the correct address mask later when changing GHCBs to private at end of kdump preparation. [ bp: Massage commit message. ] Fixes: 3074152e56c9 ("x86/sev: Convert shared memory back to private on kexec") Signed-off-by: Ashish Kalra Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tom Lendacky Tested-by: Srikanth Aithal Cc: stable@vger.kernel.org Link: https://lore.kernel.org/20250506183529.289549-1-Ashish.Kalra@amd.com --- arch/x86/coco/sev/core.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index 41060ba41b5c43..36beaac713c128 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -1101,7 +1101,8 @@ static void unshare_all_memory(void) data = per_cpu(runtime_data, cpu); ghcb = (unsigned long)&data->ghcb_page; - if (addr <= ghcb && ghcb <= addr + size) { + /* Handle the case of a huge page containing the GHCB page */ + if (addr <= ghcb && ghcb < addr + size) { skipped_addr = true; break; } @@ -1213,8 +1214,8 @@ static void shutdown_all_aps(void) void snp_kexec_finish(void) { struct sev_es_runtime_data *data; + unsigned long size, addr; unsigned int level, cpu; - unsigned long size; struct ghcb *ghcb; pte_t *pte; @@ -1242,8 +1243,10 @@ void snp_kexec_finish(void) ghcb = &data->ghcb_page; pte = lookup_address((unsigned long)ghcb, &level); size = page_level_size(level); - set_pte_enc(pte, level, (void *)ghcb); - snp_set_memory_private((unsigned long)ghcb, (size / PAGE_SIZE)); + /* Handle the case of a huge page containing the GHCB page */ + addr = (unsigned long)ghcb & page_level_mask(level); + set_pte_enc(pte, level, (void *)addr); + snp_set_memory_private(addr, (size / PAGE_SIZE)); } } From a0fa7873f2f869087b1e7793f7fac3713a1e3afe Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Wed, 7 May 2025 11:04:32 -0400 Subject: [PATCH 1748/2924] drm/amdgpu: csa unmap use uninterruptible lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After process exit to unmap csa and free GPU vm, if signal is accepted and then waiting to take vm lock is interrupted and return, it causes memory leaking and below warning backtrace. Change to use uninterruptible wait lock fix the issue. WARNING: CPU: 69 PID: 167800 at amd/amdgpu/amdgpu_kms.c:1525 amdgpu_driver_postclose_kms+0x294/0x2a0 [amdgpu] Call Trace: drm_file_free.part.0+0x1da/0x230 [drm] drm_close_helper.isra.0+0x65/0x70 [drm] drm_release+0x6a/0x120 [drm] amdgpu_drm_release+0x51/0x60 [amdgpu] __fput+0x9f/0x280 ____fput+0xe/0x20 task_work_run+0x67/0xa0 do_exit+0x217/0x3c0 do_group_exit+0x3b/0xb0 get_signal+0x14a/0x8d0 arch_do_signal_or_restart+0xde/0x100 exit_to_user_mode_loop+0xc1/0x1a0 exit_to_user_mode_prepare+0xf4/0x100 syscall_exit_to_user_mode+0x17/0x40 do_syscall_64+0x69/0xc0 Signed-off-by: Philip Yang Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 7dbbfb3c171a6f63b01165958629c9c26abf38ab) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index cfdf558b48b648..02138aa557935e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -109,7 +109,7 @@ int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct drm_exec exec; int r; - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { r = amdgpu_vm_lock_pd(vm, &exec, 0); if (likely(!r)) From 2d73b0845ab3963856e857b810600e5594bc29f4 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Thu, 8 May 2025 13:37:35 +0800 Subject: [PATCH 1749/2924] drm/amdgpu: fix incorrect MALL size for GFX1151 On GFX1151, the reported MALL cache size reflects only half of its actual size; this adjustment corrects the discrepancy. Signed-off-by: Tim Huang Acked-by: Alex Deucher Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher (cherry picked from commit 0a5c060b593ad152318f89e5564bfdfcff8a6ac0) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index e74e26b6a4f23c..fec9a007533acc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -752,6 +752,18 @@ static int gmc_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gmc.vram_type = vram_type; adev->gmc.vram_vendor = vram_vendor; + /* The mall_size is already calculated as mall_size_per_umc * num_umc. + * However, for gfx1151, which features a 2-to-1 UMC mapping, + * the result must be multiplied by 2 to determine the actual mall size. + */ + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(11, 5, 1): + adev->gmc.mall_size *= 2; + break; + default: + break; + } + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): From d433981385c62c72080e26f1c00a961d18b233be Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Fri, 25 Apr 2025 14:44:02 +0800 Subject: [PATCH 1750/2924] drm/amd/display: Correct the reply value when AUX write incomplete [Why] Now forcing aux->transfer to return 0 when incomplete AUX write is inappropriate. It should return bytes have been transferred. [How] aux->transfer is asked not to change original msg except reply field of drm_dp_aux_msg structure. Copy the msg->buffer when it's write request, and overwrite the first byte when sink reply 1 byte indicating partially written byte number. Then we can return the correct value without changing the original msg. Fixes: 3637e457eb00 ("drm/amd/display: Fix wrong handling for AUX_DEFER case") Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Ray Wu Signed-off-by: Wayne Lin Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 7ac37f0dcd2e0b729fa7b5513908dc8ab802b540) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- .../drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 10 ++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 64df8ca448b3b8..1525c408d45203 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -12763,7 +12763,8 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( /* The reply is stored in the top nibble of the command. */ payload->reply[0] = (adev->dm.dmub_notify->aux_reply.command >> 4) & 0xF; - if (!payload->write && p_notify->aux_reply.length) + /*write req may receive a byte indicating partially written number as well*/ + if (p_notify->aux_reply.length) memcpy(payload->data, p_notify->aux_reply.data, p_notify->aux_reply.length); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 074b79fd5822e7..c93eef6f7a6b98 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -62,6 +62,7 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, enum aux_return_code_type operation_result; struct amdgpu_device *adev; struct ddc_service *ddc; + uint8_t copy[16]; if (WARN_ON(msg->size > 16)) return -E2BIG; @@ -77,6 +78,11 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, (msg->request & DP_AUX_I2C_WRITE_STATUS_UPDATE) != 0; payload.defer_delay = 0; + if (payload.write) { + memcpy(copy, msg->buffer, msg->size); + payload.data = copy; + } + result = dc_link_aux_transfer_raw(TO_DM_AUX(aux)->ddc_service, &payload, &operation_result); @@ -100,9 +106,9 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, */ if (payload.write && result >= 0) { if (result) { - /*one byte indicating partially written bytes. Force 0 to retry*/ + /*one byte indicating partially written bytes*/ drm_info(adev_to_drm(adev), "amdgpu: AUX partially written\n"); - result = 0; + result = payload.data[0]; } else if (!payload.reply[0]) /*I2C_ACK|AUX_ACK*/ result = msg->size; From 190818d1b653fbf16138286345e67c8c1a83f29f Mon Sep 17 00:00:00 2001 From: Gabe Teeger Date: Fri, 25 Apr 2025 10:31:39 -0400 Subject: [PATCH 1751/2924] Revert: "drm/amd/display: Enable urgent latency adjustment on DCN35" This reverts commit 756c85e4d0dd ("drm/amd/display: Enable urgent latency adjustment on DCN35") Reason for revert: Negative power impact. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Gabe Teeger Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 9334c491cd8f388232b9a187bf0ddb728482bd6f) --- drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index d9159ca5541249..92f0a099d089ac 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -195,9 +195,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .dcn_downspread_percent = 0.5, .gpuvm_min_page_size_bytes = 4096, .hostvm_min_page_size_bytes = 4096, - .do_urgent_latency_adjustment = 1, + .do_urgent_latency_adjustment = 0, .urgent_latency_adjustment_fabric_clock_component_us = 0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, }; void dcn35_build_wm_range_table_fpu(struct clk_mgr *clk_mgr) From 874697e127931bf50a37ce9d96ee80f3a08a0c38 Mon Sep 17 00:00:00 2001 From: John Olender Date: Wed, 16 Apr 2025 02:54:26 -0400 Subject: [PATCH 1752/2924] drm/amd/display: Defer BW-optimization-blocked DRR adjustments [Why & How] Instead of dropping DRR updates, defer them. This fixes issues where monitor continues to see incorrect refresh rate after VRR was turned off by userspace. Fixes: 32953485c558 ("drm/amd/display: Do not update DRR while BW optimizations pending") Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3546 Reviewed-by: Sun peng Li Signed-off-by: John Olender Signed-off-by: Aurabindo Pillai Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 53761b7ecd83e6fbb9f2206f8c980a6aa308c844) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 ++ drivers/gpu/drm/amd/display/dc/core/dc.c | 10 +++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 1525c408d45203..cc01b9c68b479e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -372,6 +372,8 @@ get_crtc_by_otg_inst(struct amdgpu_device *adev, static inline bool is_dc_timing_adjust_needed(struct dm_crtc_state *old_state, struct dm_crtc_state *new_state) { + if (new_state->stream->adjust.timing_adjust_pending) + return true; if (new_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED) return true; else if (amdgpu_dm_crtc_vrr_active(old_state) != amdgpu_dm_crtc_vrr_active(new_state)) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 28d1353f403dfb..ba4ce8a63158bb 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -439,9 +439,12 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc, * Don't adjust DRR while there's bandwidth optimizations pending to * avoid conflicting with firmware updates. */ - if (dc->ctx->dce_version > DCE_VERSION_MAX) - if (dc->optimized_required || dc->wm_optimized_required) + if (dc->ctx->dce_version > DCE_VERSION_MAX) { + if (dc->optimized_required || dc->wm_optimized_required) { + stream->adjust.timing_adjust_pending = true; return false; + } + } dc_exit_ips_for_hw_access(dc); @@ -3168,7 +3171,8 @@ static void copy_stream_update_to_stream(struct dc *dc, if (update->crtc_timing_adjust) { if (stream->adjust.v_total_min != update->crtc_timing_adjust->v_total_min || - stream->adjust.v_total_max != update->crtc_timing_adjust->v_total_max) + stream->adjust.v_total_max != update->crtc_timing_adjust->v_total_max || + stream->adjust.timing_adjust_pending) update->crtc_timing_adjust->timing_adjust_pending = true; stream->adjust = *update->crtc_timing_adjust; update->crtc_timing_adjust->timing_adjust_pending = false; From 3c1a467372e0c356b1d3c59f6d199ed5a6612dd1 Mon Sep 17 00:00:00 2001 From: George Shen Date: Thu, 24 Apr 2025 10:02:59 -0400 Subject: [PATCH 1753/2924] drm/amd/display: fix link_set_dpms_off multi-display MST corner case [Why & How] When MST config is unplugged/replugged too quickly, it can potentially result in a scenario where previous DC state has not been reset before the HPD link detection sequence begins. In this case, driver will disable the streams/link prior to re-enabling the link for link training. There is a bug in the current logic that does not account for the fact that current_state can be released and cleared prior to swapping to a new state (resulting in the pipe_ctx stream pointers to be cleared) in between disabling streams. To resolve this, cache the original streams prior to committing any stream updates. Reviewed-by: Wenjing Liu Signed-off-by: George Shen Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 1561782686ccc36af844d55d31b44c938dd412dc) --- drivers/gpu/drm/amd/display/dc/link/link_dpms.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index 268626e73c543d..53c961f86d43c0 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -148,6 +148,7 @@ void link_blank_dp_stream(struct dc_link *link, bool hw_init) void link_set_all_streams_dpms_off_for_link(struct dc_link *link) { struct pipe_ctx *pipes[MAX_PIPES]; + struct dc_stream_state *streams[MAX_PIPES]; struct dc_state *state = link->dc->current_state; uint8_t count; int i; @@ -160,10 +161,18 @@ void link_set_all_streams_dpms_off_for_link(struct dc_link *link) link_get_master_pipes_with_dpms_on(link, state, &count, pipes); + /* The subsequent call to dc_commit_updates_for_stream for a full update + * will release the current state and swap to a new state. Releasing the + * current state results in the stream pointers in the pipe_ctx structs + * to be zero'd. Hence, cache all streams prior to dc_commit_updates_for_stream. + */ + for (i = 0; i < count; i++) + streams[i] = pipes[i]->stream; + for (i = 0; i < count; i++) { - stream_update.stream = pipes[i]->stream; + stream_update.stream = streams[i]; dc_commit_updates_for_stream(link->ctx->dc, NULL, 0, - pipes[i]->stream, &stream_update, + streams[i], &stream_update, state); } From 2ddac70fed50485aa4ae49cdb7478ce41d8d4715 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Tue, 29 Apr 2025 05:03:30 +0800 Subject: [PATCH 1754/2924] drm/amd/display: check stream id dml21 wrapper to get plane_id [Why & How] Fix a false positive warning which occurs due to lack of correct checks when querying plane_id in DML21. This fixes the warning when performing a mode1 reset (cat /sys/kernel/debug/dri/1/amdgpu_gpu_recover): [ 35.751250] WARNING: CPU: 11 PID: 326 at /tmp/amd.PHpyAl7v/amd/amdgpu/../display/dc/dml2/dml2_dc_resource_mgmt.c:91 dml2_map_dc_pipes+0x243d/0x3f40 [amdgpu] [ 35.751434] Modules linked in: amdgpu(OE) amddrm_ttm_helper(OE) amdttm(OE) amddrm_buddy(OE) amdxcp(OE) amddrm_exec(OE) amd_sched(OE) amdkcl(OE) drm_suballoc_helper drm_ttm_helper ttm drm_display_helper cec rc_core i2c_algo_bit rfcomm qrtr cmac algif_hash algif_skcipher af_alg bnep amd_atl intel_rapl_msr intel_rapl_common snd_hda_codec_hdmi snd_hda_intel edac_mce_amd snd_intel_dspcfg snd_intel_sdw_acpi snd_hda_codec kvm_amd snd_hda_core snd_hwdep snd_pcm kvm snd_seq_midi snd_seq_midi_event snd_rawmidi crct10dif_pclmul polyval_clmulni polyval_generic btusb ghash_clmulni_intel sha256_ssse3 btrtl sha1_ssse3 snd_seq btintel aesni_intel btbcm btmtk snd_seq_device crypto_simd sunrpc cryptd bluetooth snd_timer ccp binfmt_misc rapl snd i2c_piix4 wmi_bmof gigabyte_wmi k10temp i2c_smbus soundcore gpio_amdpt mac_hid sch_fq_codel msr parport_pc ppdev lp parport efi_pstore nfnetlink dmi_sysfs ip_tables x_tables autofs4 hid_generic usbhid hid crc32_pclmul igc ahci xhci_pci libahci xhci_pci_renesas video wmi [ 35.751501] CPU: 11 UID: 0 PID: 326 Comm: kworker/u64:9 Tainted: G OE 6.11.0-21-generic #21~24.04.1-Ubuntu [ 35.751504] Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE [ 35.751505] Hardware name: Gigabyte Technology Co., Ltd. X670E AORUS PRO X/X670E AORUS PRO X, BIOS F30 05/22/2024 [ 35.751506] Workqueue: amdgpu-reset-dev amdgpu_debugfs_reset_work [amdgpu] [ 35.751638] RIP: 0010:dml2_map_dc_pipes+0x243d/0x3f40 [amdgpu] [ 35.751794] Code: 6d 0c 00 00 8b 84 24 88 00 00 00 41 3b 44 9c 20 0f 84 fc 07 00 00 48 83 c3 01 48 83 fb 06 75 b3 4c 8b 64 24 68 4c 8b 6c 24 40 <0f> 0b b8 06 00 00 00 49 8b 94 24 a0 49 00 00 89 c3 83 f8 07 0f 87 [ 35.751796] RSP: 0018:ffffbfa3805d7680 EFLAGS: 00010246 [ 35.751798] RAX: 0000000000010000 RBX: 0000000000000006 RCX: 0000000000000000 [ 35.751799] RDX: 0000000000000000 RSI: 0000000000000005 RDI: 0000000000000000 [ 35.751800] RBP: ffffbfa3805d78f0 R08: 0000000000000000 R09: 0000000000000000 [ 35.751801] R10: 0000000000000000 R11: 0000000000000000 R12: ffffbfa383249000 [ 35.751802] R13: ffffa0e68f280000 R14: ffffbfa383249658 R15: 0000000000000000 [ 35.751803] FS: 0000000000000000(0000) GS:ffffa0edbe580000(0000) knlGS:0000000000000000 [ 35.751804] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 35.751805] CR2: 00005d847ef96c58 CR3: 000000041de3e000 CR4: 0000000000f50ef0 [ 35.751806] PKRU: 55555554 [ 35.751807] Call Trace: [ 35.751810] [ 35.751816] ? show_regs+0x6c/0x80 [ 35.751820] ? __warn+0x88/0x140 [ 35.751822] ? dml2_map_dc_pipes+0x243d/0x3f40 [amdgpu] [ 35.751964] ? report_bug+0x182/0x1b0 [ 35.751969] ? handle_bug+0x6e/0xb0 [ 35.751972] ? exc_invalid_op+0x18/0x80 [ 35.751974] ? asm_exc_invalid_op+0x1b/0x20 [ 35.751978] ? dml2_map_dc_pipes+0x243d/0x3f40 [amdgpu] [ 35.752117] ? math_pow+0x48/0xa0 [amdgpu] [ 35.752256] ? srso_alias_return_thunk+0x5/0xfbef5 [ 35.752260] ? math_pow+0x48/0xa0 [amdgpu] [ 35.752400] ? srso_alias_return_thunk+0x5/0xfbef5 [ 35.752403] ? math_pow+0x11/0xa0 [amdgpu] [ 35.752524] ? srso_alias_return_thunk+0x5/0xfbef5 [ 35.752526] ? core_dcn4_mode_programming+0xe4d/0x20d0 [amdgpu] [ 35.752663] ? srso_alias_return_thunk+0x5/0xfbef5 [ 35.752669] dml21_validate+0x3d4/0x980 [amdgpu] Reviewed-by: Austin Zheng Signed-off-by: Aurabindo Pillai Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit f8ad62c0a93e5dd94243e10f1b742232e4d6411e) --- .../dc/dml2/dml21/dml21_translation_helper.c | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 0c8ec30ea67268..731fbd4bc600b4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -910,7 +910,7 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm } //TODO : Could be possibly moved to a common helper layer. -static bool dml21_wrapper_get_plane_id(const struct dc_state *context, const struct dc_plane_state *plane, unsigned int *plane_id) +static bool dml21_wrapper_get_plane_id(const struct dc_state *context, unsigned int stream_id, const struct dc_plane_state *plane, unsigned int *plane_id) { int i, j; @@ -918,10 +918,12 @@ static bool dml21_wrapper_get_plane_id(const struct dc_state *context, const str return false; for (i = 0; i < context->stream_count; i++) { - for (j = 0; j < context->stream_status[i].plane_count; j++) { - if (context->stream_status[i].plane_states[j] == plane) { - *plane_id = (i << 16) | j; - return true; + if (context->streams[i]->stream_id == stream_id) { + for (j = 0; j < context->stream_status[i].plane_count; j++) { + if (context->stream_status[i].plane_states[j] == plane) { + *plane_id = (i << 16) | j; + return true; + } } } } @@ -944,14 +946,14 @@ static unsigned int map_stream_to_dml21_display_cfg(const struct dml2_context *d return location; } -static unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx, +static unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx, unsigned int stream_id, const struct dc_plane_state *plane, const struct dc_state *context) { unsigned int plane_id; int i = 0; int location = -1; - if (!dml21_wrapper_get_plane_id(context, plane, &plane_id)) { + if (!dml21_wrapper_get_plane_id(context, stream_id, plane, &plane_id)) { ASSERT(false); return -1; } @@ -1037,7 +1039,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location; } else { for (plane_index = 0; plane_index < context->stream_status[stream_index].plane_count; plane_index++) { - disp_cfg_plane_location = map_plane_to_dml21_display_cfg(dml_ctx, context->stream_status[stream_index].plane_states[plane_index], context); + disp_cfg_plane_location = map_plane_to_dml21_display_cfg(dml_ctx, context->streams[stream_index]->stream_id, context->stream_status[stream_index].plane_states[plane_index], context); if (disp_cfg_plane_location < 0) disp_cfg_plane_location = dml_dispcfg->num_planes++; @@ -1048,7 +1050,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s populate_dml21_plane_config_from_plane_state(dml_ctx, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location], context->stream_status[stream_index].plane_states[plane_index], context, stream_index); dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location; - if (dml21_wrapper_get_plane_id(context, context->stream_status[stream_index].plane_states[plane_index], &dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location])) + if (dml21_wrapper_get_plane_id(context, context->streams[stream_index]->stream_id, context->stream_status[stream_index].plane_states[plane_index], &dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location])) dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[disp_cfg_plane_location] = true; /* apply forced pstate policy */ From a3b7e65b6be59e686e163fa1ceb0922f996897c2 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Wed, 30 Apr 2025 11:11:47 -0300 Subject: [PATCH 1755/2924] drm/amd/display: Fix null check of pipe_ctx->plane_state for update_dchubp_dpp Similar to commit 6a057072ddd1 ("drm/amd/display: Fix null check for pipe_ctx->plane_state in dcn20_program_pipe") that addresses a null pointer dereference on dcn20_update_dchubp_dpp. This is the same function hooked for update_dchubp_dpp in dcn401, with the same issue. Fix possible null pointer deference on dcn401_program_pipe too. Fixes: 63ab80d9ac0a ("drm/amd/display: DML2.1 Post-Si Cleanup") Signed-off-by: Melissa Wen Reviewed-by: Alex Hung Signed-off-by: Alex Deucher (cherry picked from commit d8d47f739752227957d8efc0cb894761bfe1d879) --- drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 5489f3d431f64d..3af6a3402b8949 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -1980,9 +1980,9 @@ void dcn401_program_pipe( dc->res_pool->hubbub, pipe_ctx->plane_res.hubp->inst, pipe_ctx->hubp_regs.det_size); } - if (pipe_ctx->update_flags.raw || - (pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.raw) || - pipe_ctx->stream->update_flags.raw) + if (pipe_ctx->plane_state && (pipe_ctx->update_flags.raw || + pipe_ctx->plane_state->update_flags.raw || + pipe_ctx->stream->update_flags.raw)) dc->hwss.update_dchubp_dpp(dc, pipe_ctx, context); if (pipe_ctx->plane_state && (pipe_ctx->update_flags.bits.enable || From d33724ffb743d3d2698bd969e29253ae0cff9739 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Tue, 13 May 2025 11:20:24 +0800 Subject: [PATCH 1756/2924] drm/amd/display: Avoid flooding unnecessary info messages It's expected that we'll encounter temporary exceptions during aux transactions. Adjust logging from drm_info to drm_dbg_dp to prevent flooding with unnecessary log messages. Fixes: 3637e457eb00 ("drm/amd/display: Fix wrong handling for AUX_DEFER case") Cc: Mario Limonciello Cc: Alex Deucher Signed-off-by: Wayne Lin Acked-by: Alex Deucher Link: https://lore.kernel.org/r/20250513032026.838036-1-Wayne.Lin@amd.com Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit 9a9c3e1fe5256da14a0a307dff0478f90c55fc8c) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index c93eef6f7a6b98..5cdbc86ef8f5a9 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -107,7 +107,7 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, if (payload.write && result >= 0) { if (result) { /*one byte indicating partially written bytes*/ - drm_info(adev_to_drm(adev), "amdgpu: AUX partially written\n"); + drm_dbg_dp(adev_to_drm(adev), "amdgpu: AUX partially written\n"); result = payload.data[0]; } else if (!payload.reply[0]) /*I2C_ACK|AUX_ACK*/ @@ -133,11 +133,11 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, break; } - drm_info(adev_to_drm(adev), "amdgpu: DP AUX transfer fail:%d\n", operation_result); + drm_dbg_dp(adev_to_drm(adev), "amdgpu: DP AUX transfer fail:%d\n", operation_result); } if (payload.reply[0]) - drm_info(adev_to_drm(adev), "amdgpu: AUX reply command not ACK: 0x%02x.", + drm_dbg_dp(adev_to_drm(adev), "amdgpu: AUX reply command not ACK: 0x%02x.", payload.reply[0]); return result; From 2d2f82e1a8a118d26c2489674b6f6cbb41d04376 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Thu, 8 May 2025 13:29:31 +0200 Subject: [PATCH 1757/2924] drm/xe: Fix the gem shrinker name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The xe buffer object shrinker name is visible in the /shrinker directory and most if not all other shinkers follow a naming convention that looks like -_: Follow the same convention for xe, changing the name to drm-xe_gem:. Other shrinkers typically use the device node for but since drm drivers typically don't have a single unique device- node, instead use the unique name in the drm device. Fixes: 00c8efc3180f ("drm/xe: Add a shrinker for xe bos") Cc: Matthew Brost Signed-off-by: Thomas Hellström Reviewed-by: Francois Dugast Link: https://lore.kernel.org/r/20250508112931.3347-1-thomas.hellstrom@linux.intel.com (cherry picked from commit 243bf99e2fe75edf8df1711c1377b6fc020b806c) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_shrinker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_shrinker.c b/drivers/gpu/drm/xe/xe_shrinker.c index 8184390f9c7b9c..86d47aaf035892 100644 --- a/drivers/gpu/drm/xe/xe_shrinker.c +++ b/drivers/gpu/drm/xe/xe_shrinker.c @@ -227,7 +227,7 @@ struct xe_shrinker *xe_shrinker_create(struct xe_device *xe) if (!shrinker) return ERR_PTR(-ENOMEM); - shrinker->shrink = shrinker_alloc(0, "xe system shrinker"); + shrinker->shrink = shrinker_alloc(0, "drm-xe_gem:%s", xe->drm.unique); if (!shrinker->shrink) { kfree(shrinker); return ERR_PTR(-ENOMEM); From 24ee8d9432b5744fce090af3d829a39aa4abf63f Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 13 May 2025 20:48:57 +0000 Subject: [PATCH 1758/2924] x86/CPU/AMD: Add X86_FEATURE_ZEN6 Add a synthetic feature flag for Zen6. [ bp: Move the feature flag to a free slot and avoid future merge conflicts from incoming stuff. ] Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/20250513204857.3376577-1-yazen.ghannam@amd.com --- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/kernel/cpu/amd.c | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 6c2c152d8a67b9..9bb17c714f4ae7 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -75,7 +75,7 @@ #define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */ #define X86_FEATURE_K8 ( 3*32+ 4) /* Opteron, Athlon64 */ #define X86_FEATURE_ZEN5 ( 3*32+ 5) /* CPU based on Zen5 microarchitecture */ -/* Free ( 3*32+ 6) */ +#define X86_FEATURE_ZEN6 ( 3*32+ 6) /* CPU based on Zen6 microarchitecture */ /* Free ( 3*32+ 7) */ #define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */ #define X86_FEATURE_UP ( 3*32+ 9) /* "up" SMP kernel running on UP */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 2b36379ff675dc..4e06baab40bb3f 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -472,6 +472,11 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) case 0x60 ... 0x7f: setup_force_cpu_cap(X86_FEATURE_ZEN5); break; + case 0x50 ... 0x5f: + case 0x90 ... 0xaf: + case 0xc0 ... 0xcf: + setup_force_cpu_cap(X86_FEATURE_ZEN6); + break; default: goto warn; } From 9f35e33144ae5377d6a8de86dd3bd4d995c6ac65 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 12 May 2025 19:58:39 -0700 Subject: [PATCH 1759/2924] x86/its: Fix build errors when CONFIG_MODULES=n Fix several build errors when CONFIG_MODULES=n, including the following: ../arch/x86/kernel/alternative.c:195:25: error: incomplete definition of type 'struct module' 195 | for (int i = 0; i < mod->its_num_pages; i++) { Fixes: 872df34d7c51 ("x86/its: Use dynamic thunks for indirect branches") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Acked-by: Dave Hansen Tested-by: Steven Rostedt (Google) Reviewed-by: Alexandre Chartre Signed-off-by: Linus Torvalds --- arch/x86/kernel/alternative.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 48fd04e9011483..45bcff181cbae9 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -133,7 +133,9 @@ static bool cfi_paranoid __ro_after_init; #ifdef CONFIG_MITIGATION_ITS +#ifdef CONFIG_MODULES static struct module *its_mod; +#endif static void *its_page; static unsigned int its_offset; @@ -171,6 +173,7 @@ static void *its_init_thunk(void *thunk, int reg) return thunk + offset; } +#ifdef CONFIG_MODULES void its_init_mod(struct module *mod) { if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) @@ -209,6 +212,7 @@ void its_free_mod(struct module *mod) } kfree(mod->its_page_array); } +#endif /* CONFIG_MODULES */ static void *its_alloc(void) { @@ -217,6 +221,7 @@ static void *its_alloc(void) if (!page) return NULL; +#ifdef CONFIG_MODULES if (its_mod) { void *tmp = krealloc(its_mod->its_page_array, (its_mod->its_num_pages+1) * sizeof(void *), @@ -229,6 +234,7 @@ static void *its_alloc(void) execmem_make_temp_rw(page, PAGE_SIZE); } +#endif /* CONFIG_MODULES */ return no_free_ptr(page); } From 396786af1cea5ca1c041f113a6002d62f277ccb0 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 11 May 2025 07:12:00 +0200 Subject: [PATCH 1760/2924] tools: ynl-gen: Allow multi-attr without nested-attributes again Since commit ce6cb8113c84 ("tools: ynl-gen: individually free previous values on double set"), specifying the "multi-attr" property raises an error unless the "nested-attributes" property is specified as well: File "tools/net/ynl/./pyynl/ynl_gen_c.py", line 1147, in _load_nested_sets child = self.pure_nested_structs.get(nested) ^^^^^^ UnboundLocalError: cannot access local variable 'nested' where it is not associated with a value This appears to be a bug since there are existing specs which omit "nested-attributes" on "multi-attr" attributes. Also, according to Documentation/userspace-api/netlink/specs.rst, multi-attr "is the recommended way of implementing arrays (no extra nesting)", suggesting that nesting should even be avoided in favor of multi-attr. Fix the indentation of the if-block introduced by the commit to avoid the error. Fixes: ce6cb8113c84 ("tools: ynl-gen: individually free previous values on double set") Signed-off-by: Lukas Wunner Link: https://patch.msgid.link/d6b58684b7e5bfb628f7313e6893d0097904e1d1.1746940107.git.lukas@wunner.de Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/ynl_gen_c.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index 30c0a34b2784ec..a7f08edbc23598 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -1143,10 +1143,9 @@ def _load_nested_sets(self): self.pure_nested_structs[nested].request = True if attr in rs_members['reply']: self.pure_nested_structs[nested].reply = True - - if spec.is_multi_val(): - child = self.pure_nested_structs.get(nested) - child.in_multi_val = True + if spec.is_multi_val(): + child = self.pure_nested_structs.get(nested) + child.in_multi_val = True self._sort_pure_types() From 7fd7ad6f36af36f30a06d165eff3780cb139fa79 Mon Sep 17 00:00:00 2001 From: Konstantin Shkolnyy Date: Wed, 7 May 2025 10:14:56 -0500 Subject: [PATCH 1761/2924] vsock/test: Fix occasional failure in SIOCOUTQ tests These tests: "SOCK_STREAM ioctl(SIOCOUTQ) 0 unsent bytes" "SOCK_SEQPACKET ioctl(SIOCOUTQ) 0 unsent bytes" output: "Unexpected 'SIOCOUTQ' value, expected 0, got 64 (CLIENT)". They test that the SIOCOUTQ ioctl reports 0 unsent bytes after the data have been received by the other side. However, sometimes there is a delay in updating this "unsent bytes" counter, and the test fails even though the counter properly goes to 0 several milliseconds later. The delay occurs in the kernel because the used buffer notification callback virtio_vsock_tx_done(), called upon receipt of the data by the other side, doesn't update the counter itself. It delegates that to a kernel thread (via vsock->tx_work). Sometimes that thread is delayed more than the test expects. Change the test to poll SIOCOUTQ until it returns 0 or a timeout occurs. Signed-off-by: Konstantin Shkolnyy Reviewed-by: Stefano Garzarella Fixes: 18ee44ce97c1 ("test/vsock: add ioctl unsent bytes test") Link: https://patch.msgid.link/20250507151456.2577061-1-kshk@linux.ibm.com Signed-off-by: Jakub Kicinski --- tools/testing/vsock/vsock_test.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index d0f6d253ac72d0..613551132a9663 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -1264,21 +1264,25 @@ static void test_unsent_bytes_client(const struct test_opts *opts, int type) send_buf(fd, buf, sizeof(buf), 0, sizeof(buf)); control_expectln("RECEIVED"); - ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent); - if (ret < 0) { - if (errno == EOPNOTSUPP) { - fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n"); - } else { + /* SIOCOUTQ isn't guaranteed to instantly track sent data. Even though + * the "RECEIVED" message means that the other side has received the + * data, there can be a delay in our kernel before updating the "unsent + * bytes" counter. Repeat SIOCOUTQ until it returns 0. + */ + timeout_begin(TIMEOUT); + do { + ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent); + if (ret < 0) { + if (errno == EOPNOTSUPP) { + fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n"); + break; + } perror("ioctl"); exit(EXIT_FAILURE); } - } else if (ret == 0 && sock_bytes_unsent != 0) { - fprintf(stderr, - "Unexpected 'SIOCOUTQ' value, expected 0, got %i\n", - sock_bytes_unsent); - exit(EXIT_FAILURE); - } - + timeout_check("SIOCOUTQ"); + } while (sock_bytes_unsent != 0); + timeout_end(); close(fd); } From 588431474eb7572e57a927fa8558c9ba2f8af143 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Sun, 11 May 2025 13:15:52 +0300 Subject: [PATCH 1762/2924] net/mlx5e: Disable MACsec offload for uplink representor profile MACsec offload is not supported in switchdev mode for uplink representors. When switching to the uplink representor profile, the MACsec offload feature must be cleared from the netdevice's features. If left enabled, attempts to add offloads result in a null pointer dereference, as the uplink representor does not support MACsec offload even though the feature bit remains set. Clear NETIF_F_HW_MACSEC in mlx5e_fix_uplink_rep_features(). Kernel log: Oops: general protection fault, probably for non-canonical address 0xdffffc000000000f: 0000 [#1] SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000078-0x000000000000007f] CPU: 29 UID: 0 PID: 4714 Comm: ip Not tainted 6.14.0-rc4_for_upstream_debug_2025_03_02_17_35 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:__mutex_lock+0x128/0x1dd0 Code: d0 7c 08 84 d2 0f 85 ad 15 00 00 8b 35 91 5c fe 03 85 f6 75 29 49 8d 7e 60 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 a6 15 00 00 4d 3b 76 60 0f 85 fd 0b 00 00 65 ff RSP: 0018:ffff888147a4f160 EFLAGS: 00010206 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000001 RDX: 000000000000000f RSI: 0000000000000000 RDI: 0000000000000078 RBP: ffff888147a4f2e0 R08: ffffffffa05d2c19 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000 R13: dffffc0000000000 R14: 0000000000000018 R15: ffff888152de0000 FS: 00007f855e27d800(0000) GS:ffff88881ee80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000004e5768 CR3: 000000013ae7c005 CR4: 0000000000372eb0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400 Call Trace: ? die_addr+0x3d/0xa0 ? exc_general_protection+0x144/0x220 ? asm_exc_general_protection+0x22/0x30 ? mlx5e_macsec_add_secy+0xf9/0x700 [mlx5_core] ? __mutex_lock+0x128/0x1dd0 ? lockdep_set_lock_cmp_fn+0x190/0x190 ? mlx5e_macsec_add_secy+0xf9/0x700 [mlx5_core] ? mutex_lock_io_nested+0x1ae0/0x1ae0 ? lock_acquire+0x1c2/0x530 ? macsec_upd_offload+0x145/0x380 ? lockdep_hardirqs_on_prepare+0x400/0x400 ? kasan_save_stack+0x30/0x40 ? kasan_save_stack+0x20/0x40 ? kasan_save_track+0x10/0x30 ? __kasan_kmalloc+0x77/0x90 ? __kmalloc_noprof+0x249/0x6b0 ? genl_family_rcv_msg_attrs_parse.constprop.0+0xb5/0x240 ? mlx5e_macsec_add_secy+0xf9/0x700 [mlx5_core] mlx5e_macsec_add_secy+0xf9/0x700 [mlx5_core] ? mlx5e_macsec_add_rxsa+0x11a0/0x11a0 [mlx5_core] macsec_update_offload+0x26c/0x820 ? macsec_set_mac_address+0x4b0/0x4b0 ? lockdep_hardirqs_on_prepare+0x284/0x400 ? _raw_spin_unlock_irqrestore+0x47/0x50 macsec_upd_offload+0x2c8/0x380 ? macsec_update_offload+0x820/0x820 ? __nla_parse+0x22/0x30 ? genl_family_rcv_msg_attrs_parse.constprop.0+0x15e/0x240 genl_family_rcv_msg_doit+0x1cc/0x2a0 ? genl_family_rcv_msg_attrs_parse.constprop.0+0x240/0x240 ? cap_capable+0xd4/0x330 genl_rcv_msg+0x3ea/0x670 ? genl_family_rcv_msg_dumpit+0x2a0/0x2a0 ? lockdep_set_lock_cmp_fn+0x190/0x190 ? macsec_update_offload+0x820/0x820 netlink_rcv_skb+0x12b/0x390 ? genl_family_rcv_msg_dumpit+0x2a0/0x2a0 ? netlink_ack+0xd80/0xd80 ? rwsem_down_read_slowpath+0xf90/0xf90 ? netlink_deliver_tap+0xcd/0xac0 ? netlink_deliver_tap+0x155/0xac0 ? _copy_from_iter+0x1bb/0x12c0 genl_rcv+0x24/0x40 netlink_unicast+0x440/0x700 ? netlink_attachskb+0x760/0x760 ? lock_acquire+0x1c2/0x530 ? __might_fault+0xbb/0x170 netlink_sendmsg+0x749/0xc10 ? netlink_unicast+0x700/0x700 ? __might_fault+0xbb/0x170 ? netlink_unicast+0x700/0x700 __sock_sendmsg+0xc5/0x190 ____sys_sendmsg+0x53f/0x760 ? import_iovec+0x7/0x10 ? kernel_sendmsg+0x30/0x30 ? __copy_msghdr+0x3c0/0x3c0 ? filter_irq_stacks+0x90/0x90 ? stack_depot_save_flags+0x28/0xa30 ___sys_sendmsg+0xeb/0x170 ? kasan_save_stack+0x30/0x40 ? copy_msghdr_from_user+0x110/0x110 ? do_syscall_64+0x6d/0x140 ? lock_acquire+0x1c2/0x530 ? __virt_addr_valid+0x116/0x3b0 ? __virt_addr_valid+0x1da/0x3b0 ? lock_downgrade+0x680/0x680 ? __delete_object+0x21/0x50 __sys_sendmsg+0xf7/0x180 ? __sys_sendmsg_sock+0x20/0x20 ? kmem_cache_free+0x14c/0x4e0 ? __x64_sys_close+0x78/0xd0 do_syscall_64+0x6d/0x140 entry_SYSCALL_64_after_hwframe+0x4b/0x53 RIP: 0033:0x7f855e113367 Code: 0e 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 RSP: 002b:00007ffd15e90c88 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f855e113367 RDX: 0000000000000000 RSI: 00007ffd15e90cf0 RDI: 0000000000000004 RBP: 00007ffd15e90dbc R08: 0000000000000028 R09: 000000000045d100 R10: 00007f855e011dd8 R11: 0000000000000246 R12: 0000000000000019 R13: 0000000067c6b785 R14: 00000000004a1e80 R15: 0000000000000000 Modules linked in: 8021q garp mrp sch_ingress openvswitch nsh mlx5_ib mlx5_fwctl mlx5_dpll mlx5_core rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm ib_uverbs ib_core xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay zram zsmalloc fuse [last unloaded: mlx5_core] ---[ end trace 0000000000000000 ]--- Fixes: 8ff0ac5be144 ("net/mlx5: Add MACsec offload Tx command support") Signed-off-by: Carolina Jubran Reviewed-by: Shahar Shitrit Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/1746958552-561295-1-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 3506024c245391..9bd166f489e7ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4349,6 +4349,10 @@ static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev if (netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER) netdev_warn(netdev, "Disabling HW_VLAN CTAG FILTERING, not supported in switchdev mode\n"); + features &= ~NETIF_F_HW_MACSEC; + if (netdev->features & NETIF_F_HW_MACSEC) + netdev_warn(netdev, "Disabling HW MACsec offload, not supported in switchdev mode\n"); + return features; } From d5c17e36549cb417f249ecbd20c9ab008aaf45d3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 12 May 2025 16:17:51 +0300 Subject: [PATCH 1763/2924] docs: networking: timestamping: improve stacked PHC sentence The first paragraph makes no grammatical sense. I suppose a portion of the intended sentece is missing: "[The challenge with ] stacked PHCs (...) is that they uncover bugs". Rephrase, and at the same time simplify the structure of the sentence a little bit, it is not easy to follow. Fixes: 94d9f78f4d64 ("docs: networking: timestamping: add section for stacked PHC devices") Signed-off-by: Vladimir Oltean Acked-by: Richard Cochran Link: https://patch.msgid.link/20250512131751.320283-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- Documentation/networking/timestamping.rst | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst index b8fef8101176dc..7aabead906487e 100644 --- a/Documentation/networking/timestamping.rst +++ b/Documentation/networking/timestamping.rst @@ -811,11 +811,9 @@ Documentation/devicetree/bindings/ptp/timestamper.txt for more details. 3.2.4 Other caveats for MAC drivers ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Stacked PHCs, especially DSA (but not only) - since that doesn't require any -modification to MAC drivers, so it is more difficult to ensure correctness of -all possible code paths - is that they uncover bugs which were impossible to -trigger before the existence of stacked PTP clocks. One example has to do with -this line of code, already presented earlier:: +The use of stacked PHCs may uncover MAC driver bugs which were impossible to +trigger without them. One example has to do with this line of code, already +presented earlier:: skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; From 9d8a99c5a7c7f4f7eca2c168a4ec254409670035 Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Mon, 12 May 2025 10:18:27 +0530 Subject: [PATCH 1764/2924] qlcnic: fix memory leak in qlcnic_sriov_channel_cfg_cmd() In one of the error paths in qlcnic_sriov_channel_cfg_cmd(), the memory allocated in qlcnic_sriov_alloc_bc_mbx_args() for mailbox arguments is not freed. Fix that by jumping to the error path that frees them, by calling qlcnic_free_mbx_args(). This was found using static analysis. Fixes: f197a7aa6288 ("qlcnic: VF-PF communication channel implementation") Signed-off-by: Abdun Nihaal Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250512044829.36400-1-abdun.nihaal@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c index 28d24d59efb84f..d57b976b904095 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c @@ -1484,8 +1484,11 @@ static int qlcnic_sriov_channel_cfg_cmd(struct qlcnic_adapter *adapter, u8 cmd_o } cmd_op = (cmd.rsp.arg[0] & 0xff); - if (cmd.rsp.arg[0] >> 25 == 2) - return 2; + if (cmd.rsp.arg[0] >> 25 == 2) { + ret = 2; + goto out; + } + if (cmd_op == QLCNIC_BC_CMD_CHANNEL_INIT) set_bit(QLC_BC_VF_STATE, &vf->state); else From 0b91fda3a1f044141e1e615456ff62508c32b202 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Wed, 7 May 2025 13:31:58 +0200 Subject: [PATCH 1765/2924] xfrm: Sanitize marks before insert Prior to this patch, the mark is sanitized (applying the state's mask to the state's value) only on inserts when checking if a conflicting XFRM state or policy exists. We discovered in Cilium that this same sanitization does not occur in the hot-path __xfrm_state_lookup. In the hot-path, the sk_buff's mark is simply compared to the state's value: if ((mark & x->mark.m) != x->mark.v) continue; Therefore, users can define unsanitized marks (ex. 0xf42/0xf00) which will never match any packet. This commit updates __xfrm_state_insert and xfrm_policy_insert to store the sanitized marks, thus removing this footgun. This has the side effect of changing the ip output, as the returned mark will have the mask applied to it when printed. Fixes: 3d6acfa7641f ("xfrm: SA lookups with mark") Signed-off-by: Paul Chaignon Signed-off-by: Louis DeLosSantos Co-developed-by: Louis DeLosSantos Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 3 +++ net/xfrm/xfrm_state.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 143ac3aa753746..f4bad8c895d668 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1581,6 +1581,9 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) struct xfrm_policy *delpol; struct hlist_head *chain; + /* Sanitize mark before store */ + policy->mark.v &= policy->mark.m; + spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); if (chain) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 2f57dabcc70b65..07fe8e5daa32b0 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1718,6 +1718,9 @@ static void __xfrm_state_insert(struct xfrm_state *x) list_add(&x->km.all, &net->xfrm.state_all); + /* Sanitize mark before store */ + x->mark.v &= x->mark.m; + h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family); XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h, From a21675ee3b1ba094e229ae4cd8bddf7d215ab1b9 Mon Sep 17 00:00:00 2001 From: Alan Adamson Date: Thu, 8 May 2025 15:38:00 -0700 Subject: [PATCH 1766/2924] nvme: multipath: enable BLK_FEAT_ATOMIC_WRITES for multipathing A change to QEMU resulted in all nvme controllers (single and multi-controller subsystems) to have its CMIC.MCTRS bit set which indicates the subsystem supports multiple controllers and it is possible a namespace can be shared between those multiple controllers in a multipath configuration. When a namespace of a CMIC.MCTRS enabled subsystem is allocated, a multipath node is created. The queue limits for this node are inherited from the namespace being allocated. When inheriting queue limits, the features being inherited need to be specified. The atomic write feature (BLK_FEAT_ATOMIC_WRITES) was not specified so the atomic queue limits were not inherited by the multipath disk node which resulted in the sysfs atomic write attributes being zeroed. The fix is to include BLK_FEAT_ATOMIC_WRITES in the list of features to be inherited. Signed-off-by: Alan Adamson Reviewed-by: John Garry Signed-off-by: Christoph Hellwig --- drivers/nvme/host/multipath.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 61b1d267ffdaa9..537a6271a06e24 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -638,7 +638,8 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) blk_set_stacking_limits(&lim); lim.dma_alignment = 3; - lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | BLK_FEAT_POLL; + lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | + BLK_FEAT_POLL | BLK_FEAT_ATOMIC_WRITES; if (head->ids.csi == NVME_CSI_ZNS) lim.features |= BLK_FEAT_ZONED; From 3f097adb9b6c804636bcf8d01e0e7bc037bee0d3 Mon Sep 17 00:00:00 2001 From: Hal Feng Date: Tue, 22 Apr 2025 18:12:44 +0800 Subject: [PATCH 1767/2924] phy: starfive: jh7110-usb: Fix USB 2.0 host occasional detection failure JH7110 USB 2.0 host fails to detect USB 2.0 devices occasionally. With a long time of debugging and testing, we found that setting Rx clock gating control signal to normal power consumption mode can solve this problem. Signed-off-by: Hal Feng Link: https://lore.kernel.org/r/20250422101244.51686-1-hal.feng@starfivetech.com Signed-off-by: Vinod Koul --- drivers/phy/starfive/phy-jh7110-usb.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/phy/starfive/phy-jh7110-usb.c b/drivers/phy/starfive/phy-jh7110-usb.c index cb5454fbe2c8fa..b505d89860b439 100644 --- a/drivers/phy/starfive/phy-jh7110-usb.c +++ b/drivers/phy/starfive/phy-jh7110-usb.c @@ -18,6 +18,8 @@ #include #define USB_125M_CLK_RATE 125000000 +#define USB_CLK_MODE_OFF 0x0 +#define USB_CLK_MODE_RX_NORMAL_PWR BIT(1) #define USB_LS_KEEPALIVE_OFF 0x4 #define USB_LS_KEEPALIVE_ENABLE BIT(4) @@ -78,6 +80,7 @@ static int jh7110_usb2_phy_init(struct phy *_phy) { struct jh7110_usb2_phy *phy = phy_get_drvdata(_phy); int ret; + unsigned int val; ret = clk_set_rate(phy->usb_125m_clk, USB_125M_CLK_RATE); if (ret) @@ -87,6 +90,10 @@ static int jh7110_usb2_phy_init(struct phy *_phy) if (ret) return ret; + val = readl(phy->regs + USB_CLK_MODE_OFF); + val |= USB_CLK_MODE_RX_NORMAL_PWR; + writel(val, phy->regs + USB_CLK_MODE_OFF); + return 0; } From f9475055b11c0c70979bd1667a76b2ebae638eb7 Mon Sep 17 00:00:00 2001 From: Algea Cao Date: Sun, 27 Apr 2025 17:51:24 +0800 Subject: [PATCH 1768/2924] phy: phy-rockchip-samsung-hdptx: Fix PHY PLL output 50.25MHz error When using HDMI PLL frequency division coefficient at 50.25MHz that is calculated by rk_hdptx_phy_clk_pll_calc(), it fails to get PHY LANE lock. Although the calculated values are within the allowable range of PHY PLL configuration. In order to fix the PHY LANE lock error and provide the expected 50.25MHz output, manually compute the required PHY PLL frequency division coefficient and add it to ropll_tmds_cfg configuration table. Signed-off-by: Algea Cao Reviewed-by: Cristian Ciocaltea Acked-by: Heiko Stuebner Link: https://lore.kernel.org/r/20250427095124.3354439-1-algea.cao@rock-chips.com Signed-off-by: Vinod Koul --- drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c index fe7c0574835636..77236f012a1f75 100644 --- a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c +++ b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c @@ -476,6 +476,8 @@ static const struct ropll_config ropll_tmds_cfg[] = { 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, { 650000, 162, 162, 1, 1, 11, 1, 1, 1, 1, 1, 1, 1, 54, 0, 16, 4, 1, 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, + { 502500, 84, 84, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 11, 1, 4, 5, + 4, 11, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, { 337500, 0x70, 0x70, 1, 1, 0xf, 1, 1, 1, 1, 1, 1, 1, 0x2, 0, 0x01, 5, 1, 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, { 400000, 100, 100, 1, 1, 11, 1, 1, 0, 1, 0, 1, 1, 0x9, 0, 0x05, 0, From fb98bd0a13de2c9d96cb5c00c81b5ca118ac9d71 Mon Sep 17 00:00:00 2001 From: Alessandro Grassi Date: Fri, 2 May 2025 11:55:20 +0200 Subject: [PATCH 1769/2924] spi: spi-sun4i: fix early activation The SPI interface is activated before the CPOL setting is applied. In that moment, the clock idles high and CS goes low. After a short delay, CPOL and other settings are applied, which may cause the clock to change state and idle low. This transition is not part of a clock cycle, and it can confuse the receiving device. To prevent this unexpected transition, activate the interface while CPOL and the other settings are being applied. Signed-off-by: Alessandro Grassi Link: https://patch.msgid.link/20250502095520.13825-1-alessandro.grassi@mailbox.org Signed-off-by: Mark Brown --- drivers/spi/spi-sun4i.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c index f89826d7dc4964..aa92fd5a35a98f 100644 --- a/drivers/spi/spi-sun4i.c +++ b/drivers/spi/spi-sun4i.c @@ -264,6 +264,9 @@ static int sun4i_spi_transfer_one(struct spi_controller *host, else reg |= SUN4I_CTL_DHB; + /* Now that the settings are correct, enable the interface */ + reg |= SUN4I_CTL_ENABLE; + sun4i_spi_write(sspi, SUN4I_CTL_REG, reg); /* Ensure that we have a parent clock fast enough */ @@ -404,7 +407,7 @@ static int sun4i_spi_runtime_resume(struct device *dev) } sun4i_spi_write(sspi, SUN4I_CTL_REG, - SUN4I_CTL_ENABLE | SUN4I_CTL_MASTER | SUN4I_CTL_TP); + SUN4I_CTL_MASTER | SUN4I_CTL_TP); return 0; From 6b0cd72757c69bc2d45da42b41023e288d02e772 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Thu, 8 May 2025 09:49:43 +0300 Subject: [PATCH 1770/2924] regulator: max20086: fix invalid memory access max20086_parse_regulators_dt() calls of_regulator_match() using an array of struct of_regulator_match allocated on the stack for the matches argument. of_regulator_match() calls devm_of_regulator_put_matches(), which calls devres_alloc() to allocate a struct devm_of_regulator_matches which will be de-allocated using devm_of_regulator_put_matches(). struct devm_of_regulator_matches is populated with the stack allocated matches array. If the device fails to probe, devm_of_regulator_put_matches() will be called and will try to call of_node_put() on that stack pointer, generating the following dmesg entries: max20086 6-0028: Failed to read DEVICE_ID reg: -121 kobject: '\xc0$\xa5\x03' (000000002cebcb7a): is not initialized, yet kobject_put() is being called. Followed by a stack trace matching the call flow described above. Switch to allocating the matches array using devm_kcalloc() to avoid accessing the stack pointer long after it's out of scope. This also has the advantage of allowing multiple max20086 to probe without overriding the data stored inside the global of_regulator_match. Fixes: bfff546aae50 ("regulator: Add MAX20086-MAX20089 driver") Signed-off-by: Cosmin Tanislav Link: https://patch.msgid.link/20250508064947.2567255-1-demonsingur@gmail.com Signed-off-by: Mark Brown --- drivers/regulator/max20086-regulator.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/max20086-regulator.c b/drivers/regulator/max20086-regulator.c index 59eb23d467ec05..198d45f8e88493 100644 --- a/drivers/regulator/max20086-regulator.c +++ b/drivers/regulator/max20086-regulator.c @@ -132,7 +132,7 @@ static int max20086_regulators_register(struct max20086 *chip) static int max20086_parse_regulators_dt(struct max20086 *chip, bool *boot_on) { - struct of_regulator_match matches[MAX20086_MAX_REGULATORS] = { }; + struct of_regulator_match *matches; struct device_node *node; unsigned int i; int ret; @@ -143,6 +143,11 @@ static int max20086_parse_regulators_dt(struct max20086 *chip, bool *boot_on) return -ENODEV; } + matches = devm_kcalloc(chip->dev, chip->info->num_outputs, + sizeof(*matches), GFP_KERNEL); + if (!matches) + return -ENOMEM; + for (i = 0; i < chip->info->num_outputs; ++i) matches[i].name = max20086_output_names[i]; From 314007549d89adebdd1e214a743d7e26edbd075e Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Mon, 12 May 2025 11:59:01 +0530 Subject: [PATCH 1771/2924] octeontx2-pf: Fix ethtool support for SDP representors The hardware supports multiple MAC types, including RPM, SDP, and LBK. However, features such as link settings and pause frames are only available on RPM MAC, and not supported on SDP or LBK. This patch updates the ethtool operations logic accordingly to reflect this behavior. Fixes: 2f7f33a09516 ("octeontx2-pf: Add representors for sdp MAC") Signed-off-by: Hariprasad Kelam Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- .../net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index 010385b2998869..45b8c9230184d8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -315,7 +315,7 @@ static void otx2_get_pauseparam(struct net_device *netdev, struct otx2_nic *pfvf = netdev_priv(netdev); struct cgx_pause_frm_cfg *req, *rsp; - if (is_otx2_lbkvf(pfvf->pdev)) + if (is_otx2_lbkvf(pfvf->pdev) || is_otx2_sdp_rep(pfvf->pdev)) return; mutex_lock(&pfvf->mbox.lock); @@ -347,7 +347,7 @@ static int otx2_set_pauseparam(struct net_device *netdev, if (pause->autoneg) return -EOPNOTSUPP; - if (is_otx2_lbkvf(pfvf->pdev)) + if (is_otx2_lbkvf(pfvf->pdev) || is_otx2_sdp_rep(pfvf->pdev)) return -EOPNOTSUPP; if (pause->rx_pause) @@ -941,8 +941,8 @@ static u32 otx2_get_link(struct net_device *netdev) { struct otx2_nic *pfvf = netdev_priv(netdev); - /* LBK link is internal and always UP */ - if (is_otx2_lbkvf(pfvf->pdev)) + /* LBK and SDP links are internal and always UP */ + if (is_otx2_lbkvf(pfvf->pdev) || is_otx2_sdp_rep(pfvf->pdev)) return 1; return pfvf->linfo.link_up; } @@ -1413,7 +1413,7 @@ static int otx2vf_get_link_ksettings(struct net_device *netdev, { struct otx2_nic *pfvf = netdev_priv(netdev); - if (is_otx2_lbkvf(pfvf->pdev)) { + if (is_otx2_lbkvf(pfvf->pdev) || is_otx2_sdp_rep(pfvf->pdev)) { cmd->base.duplex = DUPLEX_FULL; cmd->base.speed = SPEED_100000; } else { From 9e000f1b7f31684cc5927e034360b87ac7919593 Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Wed, 14 May 2025 17:24:44 +0800 Subject: [PATCH 1772/2924] ALSA: es1968: Add error handling for snd_pcm_hw_constraint_pow2() The function snd_es1968_capture_open() calls the function snd_pcm_hw_constraint_pow2(), but does not check its return value. A proper implementation can be found in snd_cx25821_pcm_open(). Add error handling for snd_pcm_hw_constraint_pow2() and propagate its error code. Fixes: b942cf815b57 ("[ALSA] es1968 - Fix stuttering capture") Cc: stable@vger.kernel.org # v2.6.22 Signed-off-by: Wentao Liang Link: https://patch.msgid.link/20250514092444.331-1-vulab@iscas.ac.cn Signed-off-by: Takashi Iwai --- sound/pci/es1968.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/pci/es1968.c b/sound/pci/es1968.c index c6c018b40c69f9..4e0693f0ab0f89 100644 --- a/sound/pci/es1968.c +++ b/sound/pci/es1968.c @@ -1561,7 +1561,7 @@ static int snd_es1968_capture_open(struct snd_pcm_substream *substream) struct snd_pcm_runtime *runtime = substream->runtime; struct es1968 *chip = snd_pcm_substream_chip(substream); struct esschan *es; - int apu1, apu2; + int err, apu1, apu2; apu1 = snd_es1968_alloc_apu_pair(chip, ESM_APU_PCM_CAPTURE); if (apu1 < 0) @@ -1605,7 +1605,9 @@ static int snd_es1968_capture_open(struct snd_pcm_substream *substream) runtime->hw = snd_es1968_capture; runtime->hw.buffer_bytes_max = runtime->hw.period_bytes_max = calc_available_memory_size(chip) - 1024; /* keep MIXBUF size */ - snd_pcm_hw_constraint_pow2(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_BYTES); + err = snd_pcm_hw_constraint_pow2(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_BYTES); + if (err < 0) + return err; spin_lock_irq(&chip->substream_lock); list_add(&es->list, &chip->substream_list); From b04f0d89e880bc2cca6a5c73cf287082c91878da Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Fri, 9 May 2025 15:48:52 +0200 Subject: [PATCH 1773/2924] arm64: dts: marvell: uDPU: define pinctrl state for alarm LEDs The two alarm LEDs of on the uDPU board are stopped working since commit 78efa53e715e ("leds: Init leds class earlier"). The LEDs are driven by the GPIO{15,16} pins of the North Bridge GPIO controller. These pins are part of the 'spi_quad' pin group for which the 'spi' function is selected via the default pinctrl state of the 'spi' node. This is wrong however, since in order to allow controlling the LEDs, the pins should use the 'gpio' function. Before the commit mentined above, the 'spi' function is selected first by the pinctrl core before probing the spi driver, but then it gets overridden to 'gpio' implicitly via the devm_gpiod_get_index_optional() call from the 'leds-gpio' driver. After the commit, the LED subsystem gets initialized before the SPI subsystem, so the function of the pin group remains 'spi' which in turn prevents controlling of the LEDs. Despite the change of the initialization order, the root cause is that the pinctrl state definition is wrong since its initial commit 0d45062cfc89 ("arm64: dts: marvell: Add device tree for uDPU board"), To fix the problem, override the function in the 'spi_quad_pins' node to 'gpio' and move the pinctrl state definition from the 'spi' node into the 'leds' node. Cc: stable@vger.kernel.org # needs adjustment for < 6.1 Fixes: 0d45062cfc89 ("arm64: dts: marvell: Add device tree for uDPU board") Signed-off-by: Gabor Juhos Signed-off-by: Imre Kaloz Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi index 3a9b6907185d03..24282084570787 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi @@ -26,6 +26,8 @@ leds { compatible = "gpio-leds"; + pinctrl-names = "default"; + pinctrl-0 = <&spi_quad_pins>; led-power1 { label = "udpu:green:power"; @@ -82,8 +84,6 @@ &spi0 { status = "okay"; - pinctrl-names = "default"; - pinctrl-0 = <&spi_quad_pins>; flash@0 { compatible = "jedec,spi-nor"; @@ -108,6 +108,10 @@ }; }; +&spi_quad_pins { + function = "gpio"; +}; + &pinctrl_nb { i2c2_recovery_pins: i2c2-recovery-pins { groups = "i2c2"; From 83c178470e0bf690d34c8c08440f2421b82e881c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 23 Apr 2025 16:08:23 +0300 Subject: [PATCH 1774/2924] phy: tegra: xusb: remove a stray unlock We used to take a lock in tegra186_utmi_bias_pad_power_on() but now we have moved the lock into the caller. Unfortunately, when we moved the lock this unlock was left behind and it results in a double unlock. Delete it now. Fixes: b47158fb4295 ("phy: tegra: xusb: Use a bitmask for UTMI pad power state tracking") Signed-off-by: Dan Carpenter Reviewed-by: Jon Hunter Link: https://lore.kernel.org/r/aAjmR6To4EnvRl4G@stanley.mountain Signed-off-by: Vinod Koul --- drivers/phy/tegra/xusb-tegra186.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/phy/tegra/xusb-tegra186.c b/drivers/phy/tegra/xusb-tegra186.c index cc7b8a6a999f74..23a23f2d64e586 100644 --- a/drivers/phy/tegra/xusb-tegra186.c +++ b/drivers/phy/tegra/xusb-tegra186.c @@ -656,8 +656,6 @@ static void tegra186_utmi_bias_pad_power_on(struct tegra_xusb_padctl *padctl) } else { clk_disable_unprepare(priv->usb2_trk_clk); } - - mutex_unlock(&padctl->lock); } static void tegra186_utmi_bias_pad_power_off(struct tegra_xusb_padctl *padctl) From 54c4c58713aaff76c2422ff5750e557ab3b100d7 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 7 May 2025 15:50:28 +0300 Subject: [PATCH 1775/2924] phy: renesas: rcar-gen3-usb2: Fix role detection on unbind/bind It has been observed on the Renesas RZ/G3S SoC that unbinding and binding the PHY driver leads to role autodetection failures. This issue occurs when PHY 3 is the first initialized PHY. PHY 3 does not have an interrupt associated with the USB2_INT_ENABLE register (as rcar_gen3_int_enable[3] = 0). As a result, rcar_gen3_init_otg() is called to initialize OTG without enabling PHY interrupts. To resolve this, add rcar_gen3_is_any_otg_rphy_initialized() and call it in role_store(), role_show(), and rcar_gen3_init_otg(). At the same time, rcar_gen3_init_otg() is only called when initialization for a PHY with interrupt bits is in progress. As a result, the struct rcar_gen3_phy::otg_initialized is no longer needed. Fixes: 549b6b55b005 ("phy: renesas: rcar-gen3-usb2: enable/disable independent irqs") Cc: stable@vger.kernel.org Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Reviewed-by: Lad Prabhakar Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20250507125032.565017-2-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Vinod Koul --- drivers/phy/renesas/phy-rcar-gen3-usb2.c | 33 ++++++++++-------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/drivers/phy/renesas/phy-rcar-gen3-usb2.c b/drivers/phy/renesas/phy-rcar-gen3-usb2.c index 775f4f973a6cc2..46afba2fe0dc69 100644 --- a/drivers/phy/renesas/phy-rcar-gen3-usb2.c +++ b/drivers/phy/renesas/phy-rcar-gen3-usb2.c @@ -107,7 +107,6 @@ struct rcar_gen3_phy { struct rcar_gen3_chan *ch; u32 int_enable_bits; bool initialized; - bool otg_initialized; bool powered; }; @@ -320,16 +319,15 @@ static bool rcar_gen3_is_any_rphy_initialized(struct rcar_gen3_chan *ch) return false; } -static bool rcar_gen3_needs_init_otg(struct rcar_gen3_chan *ch) +static bool rcar_gen3_is_any_otg_rphy_initialized(struct rcar_gen3_chan *ch) { - int i; - - for (i = 0; i < NUM_OF_PHYS; i++) { - if (ch->rphys[i].otg_initialized) - return false; + for (enum rcar_gen3_phy_index i = PHY_INDEX_BOTH_HC; i <= PHY_INDEX_EHCI; + i++) { + if (ch->rphys[i].initialized) + return true; } - return true; + return false; } static bool rcar_gen3_are_all_rphys_power_off(struct rcar_gen3_chan *ch) @@ -351,7 +349,7 @@ static ssize_t role_store(struct device *dev, struct device_attribute *attr, bool is_b_device; enum phy_mode cur_mode, new_mode; - if (!ch->is_otg_channel || !rcar_gen3_is_any_rphy_initialized(ch)) + if (!ch->is_otg_channel || !rcar_gen3_is_any_otg_rphy_initialized(ch)) return -EIO; if (sysfs_streq(buf, "host")) @@ -389,7 +387,7 @@ static ssize_t role_show(struct device *dev, struct device_attribute *attr, { struct rcar_gen3_chan *ch = dev_get_drvdata(dev); - if (!ch->is_otg_channel || !rcar_gen3_is_any_rphy_initialized(ch)) + if (!ch->is_otg_channel || !rcar_gen3_is_any_otg_rphy_initialized(ch)) return -EIO; return sprintf(buf, "%s\n", rcar_gen3_is_host(ch) ? "host" : @@ -402,6 +400,9 @@ static void rcar_gen3_init_otg(struct rcar_gen3_chan *ch) void __iomem *usb2_base = ch->base; u32 val; + if (!ch->is_otg_channel || rcar_gen3_is_any_otg_rphy_initialized(ch)) + return; + /* Should not use functions of read-modify-write a register */ val = readl(usb2_base + USB2_LINECTRL1); val = (val & ~USB2_LINECTRL1_DP_RPD) | USB2_LINECTRL1_DPRPD_EN | @@ -465,12 +466,9 @@ static int rcar_gen3_phy_usb2_init(struct phy *p) writel(USB2_SPD_RSM_TIMSET_INIT, usb2_base + USB2_SPD_RSM_TIMSET); writel(USB2_OC_TIMSET_INIT, usb2_base + USB2_OC_TIMSET); - /* Initialize otg part */ - if (channel->is_otg_channel) { - if (rcar_gen3_needs_init_otg(channel)) - rcar_gen3_init_otg(channel); - rphy->otg_initialized = true; - } + /* Initialize otg part (only if we initialize a PHY with IRQs). */ + if (rphy->int_enable_bits) + rcar_gen3_init_otg(channel); rphy->initialized = true; @@ -486,9 +484,6 @@ static int rcar_gen3_phy_usb2_exit(struct phy *p) rphy->initialized = false; - if (channel->is_otg_channel) - rphy->otg_initialized = false; - val = readl(usb2_base + USB2_INT_ENABLE); val &= ~rphy->int_enable_bits; if (!rcar_gen3_is_any_rphy_initialized(channel)) From de76809f60cc938d3580bbbd5b04b7d12af6ce3a Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 7 May 2025 15:50:29 +0300 Subject: [PATCH 1776/2924] phy: renesas: rcar-gen3-usb2: Move IRQ request in probe Commit 08b0ad375ca6 ("phy: renesas: rcar-gen3-usb2: move IRQ registration to init") moved the IRQ request operation from probe to struct phy_ops::phy_init API to avoid triggering interrupts (which lead to register accesses) while the PHY clocks (enabled through runtime PM APIs) are not active. If this happens, it results in a synchronous abort. One way to reproduce this issue is by enabling CONFIG_DEBUG_SHIRQ, which calls free_irq() on driver removal. Move the IRQ request and free operations back to probe, and take the runtime PM state into account in IRQ handler. This commit is preparatory for the subsequent fixes in this series. Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Reviewed-by: Lad Prabhakar Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20250507125032.565017-3-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Vinod Koul --- drivers/phy/renesas/phy-rcar-gen3-usb2.c | 46 +++++++++++++----------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/drivers/phy/renesas/phy-rcar-gen3-usb2.c b/drivers/phy/renesas/phy-rcar-gen3-usb2.c index 46afba2fe0dc69..bb05fd26eb7f29 100644 --- a/drivers/phy/renesas/phy-rcar-gen3-usb2.c +++ b/drivers/phy/renesas/phy-rcar-gen3-usb2.c @@ -120,7 +120,6 @@ struct rcar_gen3_chan { struct work_struct work; struct mutex lock; /* protects rphys[...].powered */ enum usb_dr_mode dr_mode; - int irq; u32 obint_enable_bits; bool extcon_host; bool is_otg_channel; @@ -428,16 +427,25 @@ static irqreturn_t rcar_gen3_phy_usb2_irq(int irq, void *_ch) { struct rcar_gen3_chan *ch = _ch; void __iomem *usb2_base = ch->base; - u32 status = readl(usb2_base + USB2_OBINTSTA); + struct device *dev = ch->dev; irqreturn_t ret = IRQ_NONE; + u32 status; + pm_runtime_get_noresume(dev); + + if (pm_runtime_suspended(dev)) + goto rpm_put; + + status = readl(usb2_base + USB2_OBINTSTA); if (status & ch->obint_enable_bits) { - dev_vdbg(ch->dev, "%s: %08x\n", __func__, status); + dev_vdbg(dev, "%s: %08x\n", __func__, status); writel(ch->obint_enable_bits, usb2_base + USB2_OBINTSTA); rcar_gen3_device_recognition(ch); ret = IRQ_HANDLED; } +rpm_put: + pm_runtime_put_noidle(dev); return ret; } @@ -447,17 +455,6 @@ static int rcar_gen3_phy_usb2_init(struct phy *p) struct rcar_gen3_chan *channel = rphy->ch; void __iomem *usb2_base = channel->base; u32 val; - int ret; - - if (!rcar_gen3_is_any_rphy_initialized(channel) && channel->irq >= 0) { - INIT_WORK(&channel->work, rcar_gen3_phy_usb2_work); - ret = request_irq(channel->irq, rcar_gen3_phy_usb2_irq, - IRQF_SHARED, dev_name(channel->dev), channel); - if (ret < 0) { - dev_err(channel->dev, "No irq handler (%d)\n", channel->irq); - return ret; - } - } /* Initialize USB2 part */ val = readl(usb2_base + USB2_INT_ENABLE); @@ -490,9 +487,6 @@ static int rcar_gen3_phy_usb2_exit(struct phy *p) val &= ~USB2_INT_ENABLE_UCOM_INTEN; writel(val, usb2_base + USB2_INT_ENABLE); - if (channel->irq >= 0 && !rcar_gen3_is_any_rphy_initialized(channel)) - free_irq(channel->irq, channel); - return 0; } @@ -698,7 +692,7 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct rcar_gen3_chan *channel; struct phy_provider *provider; - int ret = 0, i; + int ret = 0, i, irq; if (!dev->of_node) { dev_err(dev, "This driver needs device tree\n"); @@ -714,8 +708,6 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev) return PTR_ERR(channel->base); channel->obint_enable_bits = USB2_OBINT_BITS; - /* get irq number here and request_irq for OTG in phy_init */ - channel->irq = platform_get_irq_optional(pdev, 0); channel->dr_mode = rcar_gen3_get_dr_mode(dev->of_node); if (channel->dr_mode != USB_DR_MODE_UNKNOWN) { channel->is_otg_channel = true; @@ -784,6 +776,20 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev) channel->vbus = NULL; } + irq = platform_get_irq_optional(pdev, 0); + if (irq < 0 && irq != -ENXIO) { + ret = irq; + goto error; + } else if (irq > 0) { + INIT_WORK(&channel->work, rcar_gen3_phy_usb2_work); + ret = devm_request_irq(dev, irq, rcar_gen3_phy_usb2_irq, + IRQF_SHARED, dev_name(dev), channel); + if (ret < 0) { + dev_err(dev, "Failed to request irq (%d)\n", irq); + goto error; + } + } + provider = devm_of_phy_provider_register(dev, rcar_gen3_phy_usb2_xlate); if (IS_ERR(provider)) { dev_err(dev, "Failed to register PHY provider\n"); From 55a387ebb9219cbe4edfa8ba9996ccb0e7ad4932 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 7 May 2025 15:50:30 +0300 Subject: [PATCH 1777/2924] phy: renesas: rcar-gen3-usb2: Lock around hardware registers and driver data The phy-rcar-gen3-usb2 driver exposes four individual PHYs that are requested and configured by PHY users. The struct phy_ops APIs access the same set of registers to configure all PHYs. Additionally, PHY settings can be modified through sysfs or an IRQ handler. While some struct phy_ops APIs are protected by a driver-wide mutex, others rely on individual PHY-specific mutexes. This approach can lead to various issues, including: 1/ the IRQ handler may interrupt PHY settings in progress, racing with hardware configuration protected by a mutex lock 2/ due to msleep(20) in rcar_gen3_init_otg(), while a configuration thread suspends to wait for the delay, another thread may try to configure another PHY (with phy_init() + phy_power_on()); re-running the phy_init() goes to the exact same configuration code, re-running the same hardware configuration on the same set of registers (and bits) which might impact the result of the msleep for the 1st configuring thread 3/ sysfs can configure the hardware (though role_store()) and it can still race with the phy_init()/phy_power_on() APIs calling into the drivers struct phy_ops To address these issues, add a spinlock to protect hardware register access and driver private data structures (e.g., calls to rcar_gen3_is_any_rphy_initialized()). Checking driver-specific data remains necessary as all PHY instances share common settings. With this change, the existing mutex protection is removed and the cleanup.h helpers are used. While at it, to keep the code simpler, do not skip regulator_enable()/regulator_disable() APIs in rcar_gen3_phy_usb2_power_on()/rcar_gen3_phy_usb2_power_off() as the regulators enable/disable operations are reference counted anyway. Fixes: f3b5a8d9b50d ("phy: rcar-gen3-usb2: Add R-Car Gen3 USB2 PHY driver") Cc: stable@vger.kernel.org Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Reviewed-by: Lad Prabhakar Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20250507125032.565017-4-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Vinod Koul --- drivers/phy/renesas/phy-rcar-gen3-usb2.c | 49 +++++++++++++----------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/drivers/phy/renesas/phy-rcar-gen3-usb2.c b/drivers/phy/renesas/phy-rcar-gen3-usb2.c index bb05fd26eb7f29..00ce564463de88 100644 --- a/drivers/phy/renesas/phy-rcar-gen3-usb2.c +++ b/drivers/phy/renesas/phy-rcar-gen3-usb2.c @@ -9,6 +9,7 @@ * Copyright (C) 2014 Cogent Embedded, Inc. */ +#include #include #include #include @@ -118,7 +119,7 @@ struct rcar_gen3_chan { struct regulator *vbus; struct reset_control *rstc; struct work_struct work; - struct mutex lock; /* protects rphys[...].powered */ + spinlock_t lock; /* protects access to hardware and driver data structure. */ enum usb_dr_mode dr_mode; u32 obint_enable_bits; bool extcon_host; @@ -348,6 +349,8 @@ static ssize_t role_store(struct device *dev, struct device_attribute *attr, bool is_b_device; enum phy_mode cur_mode, new_mode; + guard(spinlock_irqsave)(&ch->lock); + if (!ch->is_otg_channel || !rcar_gen3_is_any_otg_rphy_initialized(ch)) return -EIO; @@ -415,7 +418,7 @@ static void rcar_gen3_init_otg(struct rcar_gen3_chan *ch) val = readl(usb2_base + USB2_ADPCTRL); writel(val | USB2_ADPCTRL_IDPULLUP, usb2_base + USB2_ADPCTRL); } - msleep(20); + mdelay(20); writel(0xffffffff, usb2_base + USB2_OBINTSTA); writel(ch->obint_enable_bits, usb2_base + USB2_OBINTEN); @@ -436,12 +439,14 @@ static irqreturn_t rcar_gen3_phy_usb2_irq(int irq, void *_ch) if (pm_runtime_suspended(dev)) goto rpm_put; - status = readl(usb2_base + USB2_OBINTSTA); - if (status & ch->obint_enable_bits) { - dev_vdbg(dev, "%s: %08x\n", __func__, status); - writel(ch->obint_enable_bits, usb2_base + USB2_OBINTSTA); - rcar_gen3_device_recognition(ch); - ret = IRQ_HANDLED; + scoped_guard(spinlock, &ch->lock) { + status = readl(usb2_base + USB2_OBINTSTA); + if (status & ch->obint_enable_bits) { + dev_vdbg(dev, "%s: %08x\n", __func__, status); + writel(ch->obint_enable_bits, usb2_base + USB2_OBINTSTA); + rcar_gen3_device_recognition(ch); + ret = IRQ_HANDLED; + } } rpm_put: @@ -456,6 +461,8 @@ static int rcar_gen3_phy_usb2_init(struct phy *p) void __iomem *usb2_base = channel->base; u32 val; + guard(spinlock_irqsave)(&channel->lock); + /* Initialize USB2 part */ val = readl(usb2_base + USB2_INT_ENABLE); val |= USB2_INT_ENABLE_UCOM_INTEN | rphy->int_enable_bits; @@ -479,6 +486,8 @@ static int rcar_gen3_phy_usb2_exit(struct phy *p) void __iomem *usb2_base = channel->base; u32 val; + guard(spinlock_irqsave)(&channel->lock); + rphy->initialized = false; val = readl(usb2_base + USB2_INT_ENABLE); @@ -498,16 +507,17 @@ static int rcar_gen3_phy_usb2_power_on(struct phy *p) u32 val; int ret = 0; - mutex_lock(&channel->lock); - if (!rcar_gen3_are_all_rphys_power_off(channel)) - goto out; - if (channel->vbus) { ret = regulator_enable(channel->vbus); if (ret) - goto out; + return ret; } + guard(spinlock_irqsave)(&channel->lock); + + if (!rcar_gen3_are_all_rphys_power_off(channel)) + goto out; + val = readl(usb2_base + USB2_USBCTR); val |= USB2_USBCTR_PLL_RST; writel(val, usb2_base + USB2_USBCTR); @@ -517,7 +527,6 @@ static int rcar_gen3_phy_usb2_power_on(struct phy *p) out: /* The powered flag should be set for any other phys anyway */ rphy->powered = true; - mutex_unlock(&channel->lock); return 0; } @@ -528,18 +537,12 @@ static int rcar_gen3_phy_usb2_power_off(struct phy *p) struct rcar_gen3_chan *channel = rphy->ch; int ret = 0; - mutex_lock(&channel->lock); - rphy->powered = false; - - if (!rcar_gen3_are_all_rphys_power_off(channel)) - goto out; + scoped_guard(spinlock_irqsave, &channel->lock) + rphy->powered = false; if (channel->vbus) ret = regulator_disable(channel->vbus); -out: - mutex_unlock(&channel->lock); - return ret; } @@ -750,7 +753,7 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev) if (phy_data->no_adp_ctrl) channel->obint_enable_bits = USB2_OBINT_IDCHG_EN; - mutex_init(&channel->lock); + spin_lock_init(&channel->lock); for (i = 0; i < NUM_OF_PHYS; i++) { channel->rphys[i].phy = devm_phy_create(dev, NULL, phy_data->phy_usb2_ops); From 9ce71e85b29eb63e48e294479742e670513f03a0 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 7 May 2025 15:50:31 +0300 Subject: [PATCH 1778/2924] phy: renesas: rcar-gen3-usb2: Assert PLL reset on PHY power off Assert PLL reset on PHY power off. This saves power. Fixes: f3b5a8d9b50d ("phy: rcar-gen3-usb2: Add R-Car Gen3 USB2 PHY driver") Cc: stable@vger.kernel.org Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Reviewed-by: Lad Prabhakar Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20250507125032.565017-5-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Vinod Koul --- drivers/phy/renesas/phy-rcar-gen3-usb2.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/phy/renesas/phy-rcar-gen3-usb2.c b/drivers/phy/renesas/phy-rcar-gen3-usb2.c index 00ce564463de88..118899efda70b3 100644 --- a/drivers/phy/renesas/phy-rcar-gen3-usb2.c +++ b/drivers/phy/renesas/phy-rcar-gen3-usb2.c @@ -537,9 +537,17 @@ static int rcar_gen3_phy_usb2_power_off(struct phy *p) struct rcar_gen3_chan *channel = rphy->ch; int ret = 0; - scoped_guard(spinlock_irqsave, &channel->lock) + scoped_guard(spinlock_irqsave, &channel->lock) { rphy->powered = false; + if (rcar_gen3_are_all_rphys_power_off(channel)) { + u32 val = readl(channel->base + USB2_USBCTR); + + val |= USB2_USBCTR_PLL_RST; + writel(val, channel->base + USB2_USBCTR); + } + } + if (channel->vbus) ret = regulator_disable(channel->vbus); From 86e70849f4b2b4597ac9f7c7931f2a363774be25 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 7 May 2025 15:50:32 +0300 Subject: [PATCH 1779/2924] phy: renesas: rcar-gen3-usb2: Set timing registers only once phy-rcar-gen3-usb2 driver exports 4 PHYs. The timing registers are common to all PHYs. There is no need to set them every time a PHY is initialized. Set timing register only when the 1st PHY is initialized. Fixes: f3b5a8d9b50d ("phy: rcar-gen3-usb2: Add R-Car Gen3 USB2 PHY driver") Cc: stable@vger.kernel.org Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Reviewed-by: Lad Prabhakar Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20250507125032.565017-6-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Vinod Koul --- drivers/phy/renesas/phy-rcar-gen3-usb2.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/phy/renesas/phy-rcar-gen3-usb2.c b/drivers/phy/renesas/phy-rcar-gen3-usb2.c index 118899efda70b3..9fdf17e0848a28 100644 --- a/drivers/phy/renesas/phy-rcar-gen3-usb2.c +++ b/drivers/phy/renesas/phy-rcar-gen3-usb2.c @@ -467,8 +467,11 @@ static int rcar_gen3_phy_usb2_init(struct phy *p) val = readl(usb2_base + USB2_INT_ENABLE); val |= USB2_INT_ENABLE_UCOM_INTEN | rphy->int_enable_bits; writel(val, usb2_base + USB2_INT_ENABLE); - writel(USB2_SPD_RSM_TIMSET_INIT, usb2_base + USB2_SPD_RSM_TIMSET); - writel(USB2_OC_TIMSET_INIT, usb2_base + USB2_OC_TIMSET); + + if (!rcar_gen3_is_any_rphy_initialized(channel)) { + writel(USB2_SPD_RSM_TIMSET_INIT, usb2_base + USB2_SPD_RSM_TIMSET); + writel(USB2_OC_TIMSET_INIT, usb2_base + USB2_OC_TIMSET); + } /* Initialize otg part (only if we initialize a PHY with IRQs). */ if (rphy->int_enable_bits) From b2ea5f49580c0762d17d80d8083cb89bc3acf74f Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Mon, 3 Mar 2025 15:27:39 +0800 Subject: [PATCH 1780/2924] phy: Fix error handling in tegra_xusb_port_init If device_add() fails, do not use device_unregister() for error handling. device_unregister() consists two functions: device_del() and put_device(). device_unregister() should only be called after device_add() succeeded because device_del() undoes what device_add() does if successful. Change device_unregister() to put_device() call before returning from the function. As comment of device_add() says, 'if device_add() succeeds, you should call device_del() when you want to get rid of it. If device_add() has not succeeded, use only put_device() to drop the reference count'. Found by code review. Cc: stable@vger.kernel.org Fixes: 53d2a715c240 ("phy: Add Tegra XUSB pad controller support") Signed-off-by: Ma Ke Acked-by: Thierry Reding Link: https://lore.kernel.org/r/20250303072739.3874987-1-make24@iscas.ac.cn Signed-off-by: Vinod Koul --- drivers/phy/tegra/xusb.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c index 79d4814d758d5e..c89df95aa6ca98 100644 --- a/drivers/phy/tegra/xusb.c +++ b/drivers/phy/tegra/xusb.c @@ -548,16 +548,16 @@ static int tegra_xusb_port_init(struct tegra_xusb_port *port, err = dev_set_name(&port->dev, "%s-%u", name, index); if (err < 0) - goto unregister; + goto put_device; err = device_add(&port->dev); if (err < 0) - goto unregister; + goto put_device; return 0; -unregister: - device_unregister(&port->dev); +put_device: + put_device(&port->dev); return err; } From d871198ee431d90f5308d53998c1ba1d5db5619a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 13 May 2025 15:02:23 -0600 Subject: [PATCH 1781/2924] io_uring/fdinfo: grab ctx->uring_lock around io_uring_show_fdinfo() Not everything requires locking in there, which is why the 'has_lock' variable exists. But enough does that it's a bit unwieldy to manage. Wrap the whole thing in a ->uring_lock trylock, and just return with no output if we fail to grab it. The existing trylock() will already have greatly diminished utility/output for the failure case. This fixes an issue with reading the SQE fields, if the ring is being actively resized at the same time. Reported-by: Jann Horn Fixes: 79cfe9e59c2a ("io_uring/register: add IORING_REGISTER_RESIZE_RINGS") Signed-off-by: Jens Axboe --- io_uring/fdinfo.c | 48 ++++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index 9414ca6d101c0e..e0d6a59a89fa1b 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -86,13 +86,8 @@ static inline void napi_show_fdinfo(struct io_ring_ctx *ctx, } #endif -/* - * Caller holds a reference to the file already, we don't need to do - * anything else to get an extra reference. - */ -__cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) +static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) { - struct io_ring_ctx *ctx = file->private_data; struct io_overflow_cqe *ocqe; struct io_rings *r = ctx->rings; struct rusage sq_usage; @@ -106,7 +101,6 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) unsigned int sq_entries, cq_entries; int sq_pid = -1, sq_cpu = -1; u64 sq_total_time = 0, sq_work_time = 0; - bool has_lock; unsigned int i; if (ctx->flags & IORING_SETUP_CQE32) @@ -176,15 +170,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) seq_printf(m, "\n"); } - /* - * Avoid ABBA deadlock between the seq lock and the io_uring mutex, - * since fdinfo case grabs it in the opposite direction of normal use - * cases. If we fail to get the lock, we just don't iterate any - * structures that could be going away outside the io_uring mutex. - */ - has_lock = mutex_trylock(&ctx->uring_lock); - - if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) { + if (ctx->flags & IORING_SETUP_SQPOLL) { struct io_sq_data *sq = ctx->sq_data; /* @@ -206,7 +192,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) seq_printf(m, "SqTotalTime:\t%llu\n", sq_total_time); seq_printf(m, "SqWorkTime:\t%llu\n", sq_work_time); seq_printf(m, "UserFiles:\t%u\n", ctx->file_table.data.nr); - for (i = 0; has_lock && i < ctx->file_table.data.nr; i++) { + for (i = 0; i < ctx->file_table.data.nr; i++) { struct file *f = NULL; if (ctx->file_table.data.nodes[i]) @@ -218,7 +204,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) } } seq_printf(m, "UserBufs:\t%u\n", ctx->buf_table.nr); - for (i = 0; has_lock && i < ctx->buf_table.nr; i++) { + for (i = 0; i < ctx->buf_table.nr; i++) { struct io_mapped_ubuf *buf = NULL; if (ctx->buf_table.nodes[i]) @@ -228,7 +214,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) else seq_printf(m, "%5u: \n", i); } - if (has_lock && !xa_empty(&ctx->personalities)) { + if (!xa_empty(&ctx->personalities)) { unsigned long index; const struct cred *cred; @@ -238,7 +224,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) } seq_puts(m, "PollList:\n"); - for (i = 0; has_lock && i < (1U << ctx->cancel_table.hash_bits); i++) { + for (i = 0; i < (1U << ctx->cancel_table.hash_bits); i++) { struct io_hash_bucket *hb = &ctx->cancel_table.hbs[i]; struct io_kiocb *req; @@ -247,9 +233,6 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) task_work_pending(req->tctx->task)); } - if (has_lock) - mutex_unlock(&ctx->uring_lock); - seq_puts(m, "CqOverflowList:\n"); spin_lock(&ctx->completion_lock); list_for_each_entry(ocqe, &ctx->cq_overflow_list, list) { @@ -262,4 +245,23 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) spin_unlock(&ctx->completion_lock); napi_show_fdinfo(ctx, m); } + +/* + * Caller holds a reference to the file already, we don't need to do + * anything else to get an extra reference. + */ +__cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) +{ + struct io_ring_ctx *ctx = file->private_data; + + /* + * Avoid ABBA deadlock between the seq lock and the io_uring mutex, + * since fdinfo case grabs it in the opposite direction of normal use + * cases. + */ + if (mutex_trylock(&ctx->uring_lock)) { + __io_uring_show_fdinfo(ctx, m); + mutex_unlock(&ctx->uring_lock); + } +} #endif From 8695f060a02953b33ac6240895dcb9c7ce16c91c Mon Sep 17 00:00:00 2001 From: Alan Adamson Date: Thu, 8 May 2025 15:38:01 -0700 Subject: [PATCH 1782/2924] nvme: all namespaces in a subsystem must adhere to a common atomic write size The first namespace configured in a subsystem sets the subsystem's atomic write size based on its AWUPF or NAWUPF. Subsequent namespaces must have an atomic write size (per their AWUPF or NAWUPF) less than or equal to the subsystem's atomic write size, or their probing will be rejected. Signed-off-by: Alan Adamson [hch: fold in review comments from John Garry] Signed-off-by: Christoph Hellwig Reviewed-by: John Garry --- drivers/nvme/host/core.c | 30 +++++++++++++++++++++++++++--- drivers/nvme/host/nvme.h | 3 ++- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ac53629fce68d4..6b04473c0ab73c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2059,7 +2059,21 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id, if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; else - atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs; + atomic_bs = (1 + ns->ctrl->awupf) * bs; + + /* + * Set subsystem atomic bs. + */ + if (ns->ctrl->subsys->atomic_bs) { + if (atomic_bs != ns->ctrl->subsys->atomic_bs) { + dev_err_ratelimited(ns->ctrl->device, + "%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n", + ns->disk ? ns->disk->disk_name : "?", + ns->ctrl->subsys->atomic_bs, + atomic_bs); + } + } else + ns->ctrl->subsys->atomic_bs = atomic_bs; nvme_update_atomic_write_disk_info(ns, id, lim, bs, atomic_bs); } @@ -2201,6 +2215,17 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, nvme_set_chunk_sectors(ns, id, &lim); if (!nvme_update_disk_info(ns, id, &lim)) capacity = 0; + + /* + * Validate the max atomic write size fits within the subsystem's + * atomic write capabilities. + */ + if (lim.atomic_write_hw_max > ns->ctrl->subsys->atomic_bs) { + blk_mq_unfreeze_queue(ns->disk->queue, memflags); + ret = -ENXIO; + goto out; + } + nvme_config_discard(ns, &lim); if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && ns->head->ids.csi == NVME_CSI_ZNS) @@ -3031,7 +3056,6 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) kfree(subsys); return -EINVAL; } - subsys->awupf = le16_to_cpu(id->awupf); nvme_mpath_default_iopolicy(subsys); subsys->dev.class = &nvme_subsys_class; @@ -3441,7 +3465,7 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) dev_pm_qos_expose_latency_tolerance(ctrl->device); else if (!ctrl->apst_enabled && prev_apst_enabled) dev_pm_qos_hide_latency_tolerance(ctrl->device); - + ctrl->awupf = le16_to_cpu(id->awupf); out_free: kfree(id); return ret; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 51e07864212710..8fc4683418a3a6 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -410,6 +410,7 @@ struct nvme_ctrl { enum nvme_ctrl_type cntrltype; enum nvme_dctype dctype; + u16 awupf; /* 0's based value. */ }; static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl) @@ -442,11 +443,11 @@ struct nvme_subsystem { u8 cmic; enum nvme_subsys_type subtype; u16 vendor_id; - u16 awupf; /* 0's based awupf value. */ struct ida ns_ida; #ifdef CONFIG_NVME_MULTIPATH enum nvme_iopolicy iopolicy; #endif + u32 atomic_bs; }; /* From 72c7d62583ebce7baeb61acce6057c361f73be4a Mon Sep 17 00:00:00 2001 From: Hyejeong Choi Date: Mon, 12 May 2025 21:06:38 -0500 Subject: [PATCH 1783/2924] dma-buf: insert memory barrier before updating num_fences MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit smp_store_mb() inserts memory barrier after storing operation. It is different with what the comment is originally aiming so Null pointer dereference can be happened if memory update is reordered. Signed-off-by: Hyejeong Choi Fixes: a590d0fdbaa5 ("dma-buf: Update reservation shared_count after adding the new fence") CC: stable@vger.kernel.org Reviewed-by: Christian König Link: https://lore.kernel.org/r/20250513020638.GA2329653@au1-maretx-p37.eng.sarc.samsung.com Signed-off-by: Christian König --- drivers/dma-buf/dma-resv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 5f8d010516f07f..b1ef4546346d44 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -320,8 +320,9 @@ void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence, count++; dma_resv_list_set(fobj, i, fence, usage); - /* pointer update must be visible before we extend the num_fences */ - smp_store_mb(fobj->num_fences, count); + /* fence update must be visible before we extend the num_fences */ + smp_wmb(); + fobj->num_fences = count; } EXPORT_SYMBOL(dma_resv_add_fence); From 09dab6ce0243bc1939bd1f77a066ccdd72d44efe Mon Sep 17 00:00:00 2001 From: Wengang Wang Date: Mon, 5 May 2025 16:35:49 -0700 Subject: [PATCH 1784/2924] xfs: free up mp->m_free[0].count in error case In xfs_init_percpu_counters(), memory for mp->m_free[0].count wasn't freed in error case. Free it up in this patch. Signed-off-by: Wengang Wang Fixes: 712bae96631852 ("xfs: generalize the freespace and reserved blocks handling") Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index b2dd0c0bf50979..3be041647ec17e 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1149,7 +1149,7 @@ xfs_init_percpu_counters( return 0; free_freecounters: - while (--i > 0) + while (--i >= 0) percpu_counter_destroy(&mp->m_free[i].count); percpu_counter_destroy(&mp->m_delalloc_rtextents); free_delalloc: From fbecd731de05707a73a3d86c6cbe6b9441cdbedc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 12 May 2025 16:43:05 +0200 Subject: [PATCH 1785/2924] xfs: fix zoned GC data corruption due to wrong bv_offset xfs_zone_gc_write_chunk writes out the data buffer read in earlier using the same bio, and currenly looks at bv_offset for the offset into the scratch folio for that. But commit 26064d3e2b4d ("block: fix adding folio to bio") changed how bv_page and bv_offset are calculated for adding larger folios, breaking this fragile logic. Switch to extracting the full physical address from the old bio_vec, and calculate the offset into the folio from that instead. This fixes data corruption during garbage collection with heavy rockdsb workloads. Thanks to Hans for tracking down the culprit commit during long bisection sessions. Fixes: 26064d3e2b4d ("block: fix adding folio to bio") Fixes: 080d01c41d44 ("xfs: implement zoned garbage collection") Reported-by: Hans Holmberg Signed-off-by: Christoph Hellwig Reviewed-by: Hans Holmberg Tested-by: Hans Holmberg Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_zone_gc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index 81c94dd1d5966b..d613a4094db65c 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -807,7 +807,8 @@ xfs_zone_gc_write_chunk( { struct xfs_zone_gc_data *data = chunk->data; struct xfs_mount *mp = chunk->ip->i_mount; - unsigned int folio_offset = chunk->bio.bi_io_vec->bv_offset; + phys_addr_t bvec_paddr = + bvec_phys(bio_first_bvec_all(&chunk->bio)); struct xfs_gc_bio *split_chunk; if (chunk->bio.bi_status) @@ -822,7 +823,7 @@ xfs_zone_gc_write_chunk( bio_reset(&chunk->bio, mp->m_rtdev_targp->bt_bdev, REQ_OP_WRITE); bio_add_folio_nofail(&chunk->bio, chunk->scratch->folio, chunk->len, - folio_offset); + offset_in_folio(chunk->scratch->folio, bvec_paddr)); while ((split_chunk = xfs_zone_gc_split_write(data, chunk))) xfs_zone_gc_submit_write(data, split_chunk); From 95b613339c0e5fe651a3ef7605708478bc34a5af Mon Sep 17 00:00:00 2001 From: "Nirjhar Roy (IBM)" Date: Mon, 12 May 2025 22:00:32 +0530 Subject: [PATCH 1786/2924] xfs: Fail remount with noattr2 on a v5 with v4 enabled Bug: When we compile the kernel with CONFIG_XFS_SUPPORT_V4=y, remount with "-o remount,noattr2" on a v5 XFS does not fail explicitly. Reproduction: mkfs.xfs -f /dev/loop0 mount /dev/loop0 /mnt/scratch mount -o remount,noattr2 /dev/loop0 /mnt/scratch However, with CONFIG_XFS_SUPPORT_V4=n, the remount correctly fails explicitly. This is because the way the following 2 functions are defined: static inline bool xfs_has_attr2 (struct xfs_mount *mp) { return !IS_ENABLED(CONFIG_XFS_SUPPORT_V4) || (mp->m_features & XFS_FEAT_ATTR2); } static inline bool xfs_has_noattr2 (const struct xfs_mount *mp) { return mp->m_features & XFS_FEAT_NOATTR2; } xfs_has_attr2() returns true when CONFIG_XFS_SUPPORT_V4=n and hence, the following if condition in xfs_fs_validate_params() succeeds and returns -EINVAL: /* * We have not read the superblock at this point, so only the attr2 * mount option can set the attr2 feature by this stage. */ if (xfs_has_attr2(mp) && xfs_has_noattr2(mp)) { xfs_warn(mp, "attr2 and noattr2 cannot both be specified."); return -EINVAL; } With CONFIG_XFS_SUPPORT_V4=y, xfs_has_attr2() always return false and hence no error is returned. Fix: Check if the existing mount has crc enabled(i.e, of type v5 and has attr2 enabled) and the remount has noattr2, if yes, return -EINVAL. I have tested xfs/{189,539} in fstests with v4 and v5 XFS with both CONFIG_XFS_SUPPORT_V4=y/n and they both behave as expected. This patch also fixes remount from noattr2 -> attr2 (on a v4 xfs). Related discussion in [1] [1] https://lore.kernel.org/all/Z65o6nWxT00MaUrW@dread.disaster.area/ Signed-off-by: Nirjhar Roy (IBM) Reviewed-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_super.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 3be041647ec17e..4a11ddccc563ad 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -2114,6 +2114,21 @@ xfs_fs_reconfigure( if (error) return error; + /* attr2 -> noattr2 */ + if (xfs_has_noattr2(new_mp)) { + if (xfs_has_crc(mp)) { + xfs_warn(mp, + "attr2 is always enabled for a V5 filesystem - can't be changed."); + return -EINVAL; + } + mp->m_features &= ~XFS_FEAT_ATTR2; + mp->m_features |= XFS_FEAT_NOATTR2; + } else if (xfs_has_attr2(new_mp)) { + /* noattr2 -> attr2 */ + mp->m_features &= ~XFS_FEAT_NOATTR2; + mp->m_features |= XFS_FEAT_ATTR2; + } + /* inode32 -> inode64 */ if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) { mp->m_features &= ~XFS_FEAT_SMALL_INUMS; @@ -2126,6 +2141,17 @@ xfs_fs_reconfigure( mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount); } + /* + * Now that mp has been modified according to the remount options, we + * do a final option validation with xfs_finish_flags() just like it is + * just like it is done during mount. We cannot use + * done during mount. We cannot use xfs_finish_flags() on new_mp as it + * contains only the user given options. + */ + error = xfs_finish_flags(mp); + if (error) + return error; + /* ro -> rw */ if (xfs_is_readonly(mp) && !(flags & SB_RDONLY)) { error = xfs_remount_rw(mp); From fa8deae92f473a2ebc0e1c7cfa316f5c083e1880 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Mon, 12 May 2025 13:42:55 +0200 Subject: [PATCH 1787/2924] xfs: Fix a comment on xfs_ail_delete It doesn't return anything. Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino Reviewed-by: Chandan Babu R Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_trans_ail.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 85a649fec6acf6..7d327a3e5a736a 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -315,7 +315,7 @@ xfs_ail_splice( } /* - * Delete the given item from the AIL. Return a pointer to the item. + * Delete the given item from the AIL. */ static void xfs_ail_delete( From 08c73a4b2e3cd2ff9db0ecb3c5c0dd7e23edc770 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Mon, 12 May 2025 13:42:56 +0200 Subject: [PATCH 1788/2924] xfs: Fix comment on xfs_trans_ail_update_bulk() This function doesn't take the AIL lock, but should be called with AIL lock held. Also (hopefuly) simplify the comment. Signed-off-by: Carlos Maiolino Reviewed-by: Christoph Hellwig Reviewed-by: Chandan Babu R Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_trans_ail.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 7d327a3e5a736a..67c328d23e4ae9 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -777,26 +777,28 @@ xfs_ail_update_finish( } /* - * xfs_trans_ail_update - bulk AIL insertion operation. + * xfs_trans_ail_update_bulk - bulk AIL insertion operation. * - * @xfs_trans_ail_update takes an array of log items that all need to be + * @xfs_trans_ail_update_bulk takes an array of log items that all need to be * positioned at the same LSN in the AIL. If an item is not in the AIL, it will - * be added. Otherwise, it will be repositioned by removing it and re-adding - * it to the AIL. If we move the first item in the AIL, update the log tail to - * match the new minimum LSN in the AIL. + * be added. Otherwise, it will be repositioned by removing it and re-adding + * it to the AIL. * - * This function takes the AIL lock once to execute the update operations on - * all the items in the array, and as such should not be called with the AIL - * lock held. As a result, once we have the AIL lock, we need to check each log - * item LSN to confirm it needs to be moved forward in the AIL. + * If we move the first item in the AIL, update the log tail to match the new + * minimum LSN in the AIL. * - * To optimise the insert operation, we delete all the items from the AIL in - * the first pass, moving them into a temporary list, then splice the temporary - * list into the correct position in the AIL. This avoids needing to do an - * insert operation on every item. + * This function should be called with the AIL lock held. * - * This function must be called with the AIL lock held. The lock is dropped - * before returning. + * To optimise the insert operation, we add all items to a temporary list, then + * splice this list into the correct position in the AIL. + * + * Items that are already in the AIL are first deleted from their current + * location before being added to the temporary list. + * + * This avoids needing to do an insert operation on every item. + * + * The AIL lock is dropped by xfs_ail_update_finish() before returning to + * the caller. */ void xfs_trans_ail_update_bulk( From 3fd2f4bc010cdfbc07dd21018dc65bd9370eb7a4 Mon Sep 17 00:00:00 2001 From: Shuai Xue Date: Fri, 4 Apr 2025 20:02:09 +0800 Subject: [PATCH 1789/2924] dmaengine: idxd: fix memory leak in error handling path of idxd_setup_wqs Memory allocated for wqs is not freed if an error occurs during idxd_setup_wqs(). To fix it, free the allocated memory in the reverse order of allocation before exiting the function in case of an error. Fixes: 7c5dd23e57c1 ("dmaengine: idxd: fix wq conf_dev 'struct device' lifetime") Fixes: 700af3a0a26c ("dmaengine: idxd: add 'struct idxd_dev' as wrapper for conf_dev") Fixes: de5819b99489 ("dmaengine: idxd: track enabled workqueues in bitmap") Fixes: b0325aefd398 ("dmaengine: idxd: add WQ operation cap restriction support") Cc: stable@vger.kernel.org Signed-off-by: Shuai Xue Reviewed-by: Dave Jiang Reviewed-by: Fenghua Yu Link: https://lore.kernel.org/r/20250404120217.48772-2-xueshuai@linux.alibaba.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/init.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index fca1d29249998d..80fb189c9624f6 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -169,8 +169,8 @@ static int idxd_setup_wqs(struct idxd_device *idxd) idxd->wq_enable_map = bitmap_zalloc_node(idxd->max_wqs, GFP_KERNEL, dev_to_node(dev)); if (!idxd->wq_enable_map) { - kfree(idxd->wqs); - return -ENOMEM; + rc = -ENOMEM; + goto err_bitmap; } for (i = 0; i < idxd->max_wqs; i++) { @@ -189,10 +189,8 @@ static int idxd_setup_wqs(struct idxd_device *idxd) conf_dev->bus = &dsa_bus_type; conf_dev->type = &idxd_wq_device_type; rc = dev_set_name(conf_dev, "wq%d.%d", idxd->id, wq->id); - if (rc < 0) { - put_device(conf_dev); + if (rc < 0) goto err; - } mutex_init(&wq->wq_lock); init_waitqueue_head(&wq->err_queue); @@ -203,7 +201,6 @@ static int idxd_setup_wqs(struct idxd_device *idxd) wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES; wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev)); if (!wq->wqcfg) { - put_device(conf_dev); rc = -ENOMEM; goto err; } @@ -211,9 +208,8 @@ static int idxd_setup_wqs(struct idxd_device *idxd) if (idxd->hw.wq_cap.op_config) { wq->opcap_bmap = bitmap_zalloc(IDXD_MAX_OPCAP_BITS, GFP_KERNEL); if (!wq->opcap_bmap) { - put_device(conf_dev); rc = -ENOMEM; - goto err; + goto err_opcap_bmap; } bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS); } @@ -224,12 +220,28 @@ static int idxd_setup_wqs(struct idxd_device *idxd) return 0; - err: +err_opcap_bmap: + kfree(wq->wqcfg); + +err: + put_device(conf_dev); + kfree(wq); + while (--i >= 0) { wq = idxd->wqs[i]; + if (idxd->hw.wq_cap.op_config) + bitmap_free(wq->opcap_bmap); + kfree(wq->wqcfg); conf_dev = wq_confdev(wq); put_device(conf_dev); + kfree(wq); + } + bitmap_free(idxd->wq_enable_map); + +err_bitmap: + kfree(idxd->wqs); + return rc; } From 817bced19d1dbdd0b473580d026dc0983e30e17b Mon Sep 17 00:00:00 2001 From: Shuai Xue Date: Fri, 4 Apr 2025 20:02:10 +0800 Subject: [PATCH 1790/2924] dmaengine: idxd: fix memory leak in error handling path of idxd_setup_engines Memory allocated for engines is not freed if an error occurs during idxd_setup_engines(). To fix it, free the allocated memory in the reverse order of allocation before exiting the function in case of an error. Fixes: 75b911309060 ("dmaengine: idxd: fix engine conf_dev lifetime") Cc: stable@vger.kernel.org Signed-off-by: Shuai Xue Reviewed-by: Dave Jiang Reviewed-by: Fenghua Yu Link: https://lore.kernel.org/r/20250404120217.48772-3-xueshuai@linux.alibaba.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/init.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 80fb189c9624f6..ff6ec3c0f60456 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -275,6 +275,7 @@ static int idxd_setup_engines(struct idxd_device *idxd) rc = dev_set_name(conf_dev, "engine%d.%d", idxd->id, engine->id); if (rc < 0) { put_device(conf_dev); + kfree(engine); goto err; } @@ -288,7 +289,10 @@ static int idxd_setup_engines(struct idxd_device *idxd) engine = idxd->engines[i]; conf_dev = engine_confdev(engine); put_device(conf_dev); + kfree(engine); } + kfree(idxd->engines); + return rc; } From aa6f4f945b10eac57aed46154ae7d6fada7fccc7 Mon Sep 17 00:00:00 2001 From: Shuai Xue Date: Fri, 4 Apr 2025 20:02:11 +0800 Subject: [PATCH 1791/2924] dmaengine: idxd: fix memory leak in error handling path of idxd_setup_groups Memory allocated for groups is not freed if an error occurs during idxd_setup_groups(). To fix it, free the allocated memory in the reverse order of allocation before exiting the function in case of an error. Fixes: defe49f96012 ("dmaengine: idxd: fix group conf_dev lifetime") Cc: stable@vger.kernel.org Signed-off-by: Shuai Xue Reviewed-by: Dave Jiang Reviewed-by: Fenghua Yu Link: https://lore.kernel.org/r/20250404120217.48772-4-xueshuai@linux.alibaba.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/init.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index ff6ec3c0f60456..7f0a26e2e0a5ce 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -326,6 +326,7 @@ static int idxd_setup_groups(struct idxd_device *idxd) rc = dev_set_name(conf_dev, "group%d.%d", idxd->id, group->id); if (rc < 0) { put_device(conf_dev); + kfree(group); goto err; } @@ -350,7 +351,10 @@ static int idxd_setup_groups(struct idxd_device *idxd) while (--i >= 0) { group = idxd->groups[i]; put_device(group_confdev(group)); + kfree(group); } + kfree(idxd->groups); + return rc; } From 61259fb96e023f7299c442c48b13e72c441fc0f2 Mon Sep 17 00:00:00 2001 From: Shuai Xue Date: Fri, 4 Apr 2025 20:02:12 +0800 Subject: [PATCH 1792/2924] dmaengine: idxd: Add missing cleanup for early error out in idxd_setup_internals The idxd_setup_internals() is missing some cleanup when things fail in the middle. Add the appropriate cleanup routines: - cleanup groups - cleanup enginces - cleanup wqs to make sure it exits gracefully. Fixes: defe49f96012 ("dmaengine: idxd: fix group conf_dev lifetime") Cc: stable@vger.kernel.org Suggested-by: Fenghua Yu Signed-off-by: Shuai Xue Reviewed-by: Fenghua Yu Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20250404120217.48772-5-xueshuai@linux.alibaba.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/init.c | 58 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 7f0a26e2e0a5ce..a40fb2fd500618 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -155,6 +155,25 @@ static void idxd_cleanup_interrupts(struct idxd_device *idxd) pci_free_irq_vectors(pdev); } +static void idxd_clean_wqs(struct idxd_device *idxd) +{ + struct idxd_wq *wq; + struct device *conf_dev; + int i; + + for (i = 0; i < idxd->max_wqs; i++) { + wq = idxd->wqs[i]; + if (idxd->hw.wq_cap.op_config) + bitmap_free(wq->opcap_bmap); + kfree(wq->wqcfg); + conf_dev = wq_confdev(wq); + put_device(conf_dev); + kfree(wq); + } + bitmap_free(idxd->wq_enable_map); + kfree(idxd->wqs); +} + static int idxd_setup_wqs(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; @@ -245,6 +264,21 @@ static int idxd_setup_wqs(struct idxd_device *idxd) return rc; } +static void idxd_clean_engines(struct idxd_device *idxd) +{ + struct idxd_engine *engine; + struct device *conf_dev; + int i; + + for (i = 0; i < idxd->max_engines; i++) { + engine = idxd->engines[i]; + conf_dev = engine_confdev(engine); + put_device(conf_dev); + kfree(engine); + } + kfree(idxd->engines); +} + static int idxd_setup_engines(struct idxd_device *idxd) { struct idxd_engine *engine; @@ -296,6 +330,19 @@ static int idxd_setup_engines(struct idxd_device *idxd) return rc; } +static void idxd_clean_groups(struct idxd_device *idxd) +{ + struct idxd_group *group; + int i; + + for (i = 0; i < idxd->max_groups; i++) { + group = idxd->groups[i]; + put_device(group_confdev(group)); + kfree(group); + } + kfree(idxd->groups); +} + static int idxd_setup_groups(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; @@ -410,7 +457,7 @@ static int idxd_init_evl(struct idxd_device *idxd) static int idxd_setup_internals(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; - int rc, i; + int rc; init_waitqueue_head(&idxd->cmd_waitq); @@ -441,14 +488,11 @@ static int idxd_setup_internals(struct idxd_device *idxd) err_evl: destroy_workqueue(idxd->wq); err_wkq_create: - for (i = 0; i < idxd->max_groups; i++) - put_device(group_confdev(idxd->groups[i])); + idxd_clean_groups(idxd); err_group: - for (i = 0; i < idxd->max_engines; i++) - put_device(engine_confdev(idxd->engines[i])); + idxd_clean_engines(idxd); err_engine: - for (i = 0; i < idxd->max_wqs; i++) - put_device(wq_confdev(idxd->wqs[i])); + idxd_clean_wqs(idxd); err_wqs: return rc; } From 61d651572b6c4fe50c7b39a390760f3a910c7ccf Mon Sep 17 00:00:00 2001 From: Shuai Xue Date: Fri, 4 Apr 2025 20:02:13 +0800 Subject: [PATCH 1793/2924] dmaengine: idxd: Add missing cleanups in cleanup internals The idxd_cleanup_internals() function only decreases the reference count of groups, engines, and wqs but is missing the step to release memory resources. To fix this, use the cleanup helper to properly release the memory resources. Fixes: ddf742d4f3f1 ("dmaengine: idxd: Add missing cleanup for early error out in probe call") Cc: stable@vger.kernel.org Signed-off-by: Shuai Xue Reviewed-by: Fenghua Yu Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20250404120217.48772-6-xueshuai@linux.alibaba.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/init.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index a40fb2fd500618..f8129d2d53f1f1 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -407,14 +407,9 @@ static int idxd_setup_groups(struct idxd_device *idxd) static void idxd_cleanup_internals(struct idxd_device *idxd) { - int i; - - for (i = 0; i < idxd->max_groups; i++) - put_device(group_confdev(idxd->groups[i])); - for (i = 0; i < idxd->max_engines; i++) - put_device(engine_confdev(idxd->engines[i])); - for (i = 0; i < idxd->max_wqs; i++) - put_device(wq_confdev(idxd->wqs[i])); + idxd_clean_groups(idxd); + idxd_clean_engines(idxd); + idxd_clean_wqs(idxd); destroy_workqueue(idxd->wq); } From 46a5cca76c76c86063000a12936f8e7875295838 Mon Sep 17 00:00:00 2001 From: Shuai Xue Date: Fri, 4 Apr 2025 20:02:14 +0800 Subject: [PATCH 1794/2924] dmaengine: idxd: fix memory leak in error handling path of idxd_alloc Memory allocated for idxd is not freed if an error occurs during idxd_alloc(). To fix it, free the allocated memory in the reverse order of allocation before exiting the function in case of an error. Fixes: a8563a33a5e2 ("dmanegine: idxd: reformat opcap output to match bitmap_parse() input") Cc: stable@vger.kernel.org Signed-off-by: Shuai Xue Reviewed-by: Dave Jiang Reviewed-by: Fenghua Yu Link: https://lore.kernel.org/r/20250404120217.48772-7-xueshuai@linux.alibaba.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/init.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index f8129d2d53f1f1..302d8983ed8c7b 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -604,28 +604,34 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_d idxd_dev_set_type(&idxd->idxd_dev, idxd->data->type); idxd->id = ida_alloc(&idxd_ida, GFP_KERNEL); if (idxd->id < 0) - return NULL; + goto err_ida; idxd->opcap_bmap = bitmap_zalloc_node(IDXD_MAX_OPCAP_BITS, GFP_KERNEL, dev_to_node(dev)); - if (!idxd->opcap_bmap) { - ida_free(&idxd_ida, idxd->id); - return NULL; - } + if (!idxd->opcap_bmap) + goto err_opcap; device_initialize(conf_dev); conf_dev->parent = dev; conf_dev->bus = &dsa_bus_type; conf_dev->type = idxd->data->dev_type; rc = dev_set_name(conf_dev, "%s%d", idxd->data->name_prefix, idxd->id); - if (rc < 0) { - put_device(conf_dev); - return NULL; - } + if (rc < 0) + goto err_name; spin_lock_init(&idxd->dev_lock); spin_lock_init(&idxd->cmd_lock); return idxd; + +err_name: + put_device(conf_dev); + bitmap_free(idxd->opcap_bmap); +err_opcap: + ida_free(&idxd_ida, idxd->id); +err_ida: + kfree(idxd); + + return NULL; } static int idxd_enable_system_pasid(struct idxd_device *idxd) From 90022b3a6981ec234902be5dbf0f983a12c759fc Mon Sep 17 00:00:00 2001 From: Shuai Xue Date: Fri, 4 Apr 2025 20:02:15 +0800 Subject: [PATCH 1795/2924] dmaengine: idxd: fix memory leak in error handling path of idxd_pci_probe Memory allocated for idxd is not freed if an error occurs during idxd_pci_probe(). To fix it, free the allocated memory in the reverse order of allocation before exiting the function in case of an error. Fixes: bfe1d56091c1 ("dmaengine: idxd: Init and probe for Intel data accelerators") Cc: stable@vger.kernel.org Signed-off-by: Shuai Xue Reviewed-by: Dave Jiang Reviewed-by: Fenghua Yu Link: https://lore.kernel.org/r/20250404120217.48772-8-xueshuai@linux.alibaba.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/init.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 302d8983ed8c7b..f2b5b17538c069 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -587,6 +587,17 @@ static void idxd_read_caps(struct idxd_device *idxd) idxd->hw.iaa_cap.bits = ioread64(idxd->reg_base + IDXD_IAACAP_OFFSET); } +static void idxd_free(struct idxd_device *idxd) +{ + if (!idxd) + return; + + put_device(idxd_confdev(idxd)); + bitmap_free(idxd->opcap_bmap); + ida_free(&idxd_ida, idxd->id); + kfree(idxd); +} + static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_data *data) { struct device *dev = &pdev->dev; @@ -1255,7 +1266,7 @@ int idxd_pci_probe_alloc(struct idxd_device *idxd, struct pci_dev *pdev, err: pci_iounmap(pdev, idxd->reg_base); err_iomap: - put_device(idxd_confdev(idxd)); + idxd_free(idxd); err_idxd_alloc: pci_disable_device(pdev); return rc; From d5449ff1b04dfe9ed8e455769aa01e4c2ccf6805 Mon Sep 17 00:00:00 2001 From: Shuai Xue Date: Fri, 4 Apr 2025 20:02:16 +0800 Subject: [PATCH 1796/2924] dmaengine: idxd: Add missing idxd cleanup to fix memory leak in remove call The remove call stack is missing idxd cleanup to free bitmap, ida and the idxd_device. Call idxd_free() helper routines to make sure we exit gracefully. Fixes: bfe1d56091c1 ("dmaengine: idxd: Init and probe for Intel data accelerators") Cc: stable@vger.kernel.org Suggested-by: Vinicius Costa Gomes Signed-off-by: Shuai Xue Reviewed-by: Fenghua Yu Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20250404120217.48772-9-xueshuai@linux.alibaba.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index f2b5b17538c069..974b926bd93009 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -1335,6 +1335,7 @@ static void idxd_remove(struct pci_dev *pdev) destroy_workqueue(idxd->wq); perfmon_pmu_remove(idxd); put_device(idxd_confdev(idxd)); + idxd_free(idxd); } static struct pci_driver idxd_pci_driver = { From a409e919ca321cc0e28f8abf96fde299f0072a81 Mon Sep 17 00:00:00 2001 From: Shuai Xue Date: Fri, 4 Apr 2025 20:02:17 +0800 Subject: [PATCH 1797/2924] dmaengine: idxd: Refactor remove call with idxd_cleanup() helper The idxd_cleanup() helper cleans up perfmon, interrupts, internals and so on. Refactor remove call with the idxd_cleanup() helper to avoid code duplication. Note, this also fixes the missing put_device() for idxd groups, enginces and wqs. Fixes: bfe1d56091c1 ("dmaengine: idxd: Init and probe for Intel data accelerators") Cc: stable@vger.kernel.org Suggested-by: Vinicius Costa Gomes Signed-off-by: Shuai Xue Reviewed-by: Fenghua Yu Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20250404120217.48772-10-xueshuai@linux.alibaba.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/init.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 974b926bd93009..760b7d81fcd846 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -1308,7 +1308,6 @@ static void idxd_shutdown(struct pci_dev *pdev) static void idxd_remove(struct pci_dev *pdev) { struct idxd_device *idxd = pci_get_drvdata(pdev); - struct idxd_irq_entry *irq_entry; idxd_unregister_devices(idxd); /* @@ -1321,21 +1320,12 @@ static void idxd_remove(struct pci_dev *pdev) get_device(idxd_confdev(idxd)); device_unregister(idxd_confdev(idxd)); idxd_shutdown(pdev); - if (device_pasid_enabled(idxd)) - idxd_disable_system_pasid(idxd); idxd_device_remove_debugfs(idxd); - - irq_entry = idxd_get_ie(idxd, 0); - free_irq(irq_entry->vector, irq_entry); - pci_free_irq_vectors(pdev); + idxd_cleanup(idxd); pci_iounmap(pdev, idxd->reg_base); - if (device_user_pasid_enabled(idxd)) - idxd_disable_sva(pdev); - pci_disable_device(pdev); - destroy_workqueue(idxd->wq); - perfmon_pmu_remove(idxd); put_device(idxd_confdev(idxd)); idxd_free(idxd); + pci_disable_device(pdev); } static struct pci_driver idxd_pci_driver = { From ae74cd15ade833adc289279b5c6f12e78f64d4d7 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 8 May 2025 10:05:48 -0700 Subject: [PATCH 1798/2924] dmaengine: idxd: Fix ->poll() return value The fix to block access from different address space did not return a correct value for ->poll() change. kernel test bot reported that a return value of type __poll_t is expected rather than int. Fix to return POLLNVAL to indicate invalid request. Fixes: 8dfa57aabff6 ("dmaengine: idxd: Fix allowing write() from different address spaces") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202505081851.rwD7jVxg-lkp@intel.com/ Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/20250508170548.2747425-1-dave.jiang@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/cdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index a2fb2b84775263..6d12033649f817 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -500,7 +500,7 @@ static __poll_t idxd_cdev_poll(struct file *filp, __poll_t out = 0; if (current->mm != ctx->mm) - return -EPERM; + return POLLNVAL; poll_wait(filp, &wq->err_queue, wait); spin_lock(&idxd->dev_lock); From 157ae5ffd76a2857ccb4b7ce40bc5a344ca00395 Mon Sep 17 00:00:00 2001 From: Qiu-ji Chen Date: Thu, 8 May 2025 15:36:33 +0800 Subject: [PATCH 1799/2924] dmaengine: mediatek: Fix a possible deadlock error in mtk_cqdma_tx_status() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a potential deadlock bug. Observe that in the mtk-cqdma.c file, functions like mtk_cqdma_issue_pending() and mtk_cqdma_free_active_desc() properly acquire the pc lock before the vc lock when handling pc and vc fields. However, mtk_cqdma_tx_status() violates this order by first acquiring the vc lock before invoking mtk_cqdma_find_active_desc(), which subsequently takes the pc lock. This reversed locking sequence (vc → pc) contradicts the established pc → vc order and creates deadlock risks. Fix the issue by moving the vc lock acquisition code from mtk_cqdma_find_active_desc() to mtk_cqdma_tx_status(). Ensure the pc lock is acquired before the vc lock in the calling function to maintain correct locking hierarchy. Note that since mtk_cqdma_find_active_desc() is a static function with only one caller (mtk_cqdma_tx_status()), this modification safely eliminates the deadlock possibility without affecting other components. This possible bug is found by an experimental static analysis tool developed by our team. This tool analyzes the locking APIs to extract function pairs that can be concurrently executed, and then analyzes the instructions in the paired functions to identify possible concurrency bugs including deadlocks, data races and atomicity violations. Fixes: b1f01e48df5a ("dmaengine: mediatek: Add MediaTek Command-Queue DMA controller for MT6765 SoC") Cc: stable@vger.kernel.org Signed-off-by: Qiu-ji Chen Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20250508073634.3719-1-chenqiuji666@gmail.com Signed-off-by: Vinod Koul --- drivers/dma/mediatek/mtk-cqdma.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/dma/mediatek/mtk-cqdma.c b/drivers/dma/mediatek/mtk-cqdma.c index d5ddb4e30e7150..e35271ac1eed2a 100644 --- a/drivers/dma/mediatek/mtk-cqdma.c +++ b/drivers/dma/mediatek/mtk-cqdma.c @@ -422,13 +422,10 @@ static struct virt_dma_desc *mtk_cqdma_find_active_desc(struct dma_chan *c, struct virt_dma_desc *vd; unsigned long flags; - spin_lock_irqsave(&cvc->pc->lock, flags); list_for_each_entry(vd, &cvc->pc->queue, node) if (vd->tx.cookie == cookie) { - spin_unlock_irqrestore(&cvc->pc->lock, flags); return vd; } - spin_unlock_irqrestore(&cvc->pc->lock, flags); list_for_each_entry(vd, &cvc->vc.desc_issued, node) if (vd->tx.cookie == cookie) @@ -452,9 +449,11 @@ static enum dma_status mtk_cqdma_tx_status(struct dma_chan *c, if (ret == DMA_COMPLETE || !txstate) return ret; + spin_lock_irqsave(&cvc->pc->lock, flags); spin_lock_irqsave(&cvc->vc.lock, flags); vd = mtk_cqdma_find_active_desc(c, cookie); spin_unlock_irqrestore(&cvc->vc.lock, flags); + spin_unlock_irqrestore(&cvc->pc->lock, flags); if (vd) { cvd = to_cqdma_vdesc(vd); From 5e27af0514e2249a9ccc9a762abd3b74e03a1f90 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Thu, 24 Apr 2025 13:48:29 +0200 Subject: [PATCH 1800/2924] dmaengine: fsl-edma: Fix return code for unhandled interrupts For fsl,imx93-edma4 two DMA channels share the same interrupt. So in case fsl_edma3_tx_handler is called for the "wrong" channel, the return code must be IRQ_NONE. This signalize that the interrupt wasn't handled. Fixes: 72f5801a4e2b ("dmaengine: fsl-edma: integrate v3 support") Signed-off-by: Stefan Wahren Reviewed-by: Joy Zou Link: https://lore.kernel.org/r/20250424114829.9055-1-wahrenst@gmx.net Signed-off-by: Vinod Koul --- drivers/dma/fsl-edma-main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c index 756d67325db526..66bfa28d984e1b 100644 --- a/drivers/dma/fsl-edma-main.c +++ b/drivers/dma/fsl-edma-main.c @@ -57,7 +57,7 @@ static irqreturn_t fsl_edma3_tx_handler(int irq, void *dev_id) intr = edma_readl_chreg(fsl_chan, ch_int); if (!intr) - return IRQ_HANDLED; + return IRQ_NONE; edma_writel_chreg(fsl_chan, 1, ch_int); From 2468b0e3d5659dfde77f081f266e1111a981efb8 Mon Sep 17 00:00:00 2001 From: Tianyang Zhang Date: Wed, 14 May 2025 22:17:43 +0800 Subject: [PATCH 1801/2924] LoongArch: Prevent cond_resched() occurring within kernel-fpu When CONFIG_PREEMPT_COUNT is not configured (i.e. CONFIG_PREEMPT_NONE/ CONFIG_PREEMPT_VOLUNTARY), preempt_disable() / preempt_enable() merely acts as a barrier(). However, in these cases cond_resched() can still trigger a context switch and modify the CSR.EUEN, resulting in do_fpu() exception being activated within the kernel-fpu critical sections, as demonstrated in the following path: dcn32_calculate_wm_and_dlg() DC_FP_START() dcn32_calculate_wm_and_dlg_fpu() dcn32_find_dummy_latency_index_for_fw_based_mclk_switch() dcn32_internal_validate_bw() dcn32_enable_phantom_stream() dc_create_stream_for_sink() kzalloc(GFP_KERNEL) __kmem_cache_alloc_node() __cond_resched() DC_FP_END() This patch is similar to commit d02198550423a0b (x86/fpu: Improve crypto performance by making kernel-mode FPU reliably usable in softirqs). It uses local_bh_disable() instead of preempt_disable() for non-RT kernels so it can avoid the cond_resched() issue, and also extend the kernel-fpu application scenarios to the softirq context. Cc: stable@vger.kernel.org Signed-off-by: Tianyang Zhang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/kfpu.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/kernel/kfpu.c b/arch/loongarch/kernel/kfpu.c index ec5b28e570c963..4c476904227f95 100644 --- a/arch/loongarch/kernel/kfpu.c +++ b/arch/loongarch/kernel/kfpu.c @@ -18,11 +18,28 @@ static unsigned int euen_mask = CSR_EUEN_FPEN; static DEFINE_PER_CPU(bool, in_kernel_fpu); static DEFINE_PER_CPU(unsigned int, euen_current); +static inline void fpregs_lock(void) +{ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_disable(); + else + local_bh_disable(); +} + +static inline void fpregs_unlock(void) +{ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_enable(); + else + local_bh_enable(); +} + void kernel_fpu_begin(void) { unsigned int *euen_curr; - preempt_disable(); + if (!irqs_disabled()) + fpregs_lock(); WARN_ON(this_cpu_read(in_kernel_fpu)); @@ -73,7 +90,8 @@ void kernel_fpu_end(void) this_cpu_write(in_kernel_fpu, false); - preempt_enable(); + if (!irqs_disabled()) + fpregs_unlock(); } EXPORT_SYMBOL_GPL(kernel_fpu_end); From 90436d234230e9a950ccd87831108b688b27a234 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 14 May 2025 22:17:43 +0800 Subject: [PATCH 1802/2924] LoongArch: Fix MAX_REG_OFFSET calculation Fix MAX_REG_OFFSET calculation, make it point to the last register in 'struct pt_regs' and not to the marker itself, which could allow regs_get_register() to return an invalid offset. Cc: stable@vger.kernel.org Fixes: 803b0fc5c3f2baa6e5 ("LoongArch: Add process management") Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/ptrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/ptrace.h b/arch/loongarch/include/asm/ptrace.h index a5b63c84f8541a..e5d21e836d993c 100644 --- a/arch/loongarch/include/asm/ptrace.h +++ b/arch/loongarch/include/asm/ptrace.h @@ -55,7 +55,7 @@ static inline void instruction_pointer_set(struct pt_regs *regs, unsigned long v /* Query offset/name of register from its name/offset */ extern int regs_query_register_offset(const char *name); -#define MAX_REG_OFFSET (offsetof(struct pt_regs, __last)) +#define MAX_REG_OFFSET (offsetof(struct pt_regs, __last) - sizeof(unsigned long)) /** * regs_get_register() - get register value from its offset From 3e245b7b74c3a2ead5fa4bad27cc275284c75189 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 14 May 2025 22:17:52 +0800 Subject: [PATCH 1803/2924] LoongArch: Move __arch_cpu_idle() to .cpuidle.text section Now arch_cpu_idle() is annotated with __cpuidle which means it is in the .cpuidle.text section, but __arch_cpu_idle() isn't. Thus, fix the missing .cpuidle.text section assignment for __arch_cpu_idle() in order to correct backtracing with nmi_backtrace(). The principle is similar to the commit 97c8580e85cf81c ("MIPS: Annotate cpu_wait implementations with __cpuidle") Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen --- arch/loongarch/kernel/genex.S | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S index 4f09121417818d..733a7665e434dc 100644 --- a/arch/loongarch/kernel/genex.S +++ b/arch/loongarch/kernel/genex.S @@ -16,6 +16,7 @@ #include #include + .section .cpuidle.text, "ax" .align 5 SYM_FUNC_START(__arch_cpu_idle) /* start of idle interrupt region */ @@ -31,14 +32,16 @@ SYM_FUNC_START(__arch_cpu_idle) */ idle 0 /* end of idle interrupt region */ -1: jr ra +idle_exit: + jr ra SYM_FUNC_END(__arch_cpu_idle) + .previous SYM_CODE_START(handle_vint) UNWIND_HINT_UNDEFINED BACKUP_T0T1 SAVE_ALL - la_abs t1, 1b + la_abs t1, idle_exit LONG_L t0, sp, PT_ERA /* 3 instructions idle interrupt region */ ori t0, t0, 0b1100 From ceb9155d058a11242aa0572875c44e9713b1a2be Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 14 May 2025 22:17:52 +0800 Subject: [PATCH 1804/2924] LoongArch: Save and restore CSR.CNTC for hibernation Save and restore CSR.CNTC for hibernation which is similar to suspend. For host this is unnecessary because sched clock is ensured continuous, but for kvm guest sched clock isn't enough because rdtime.d should also be continuous. Host::rdtime.d = Host::CSR.CNTC + counter Guest::rdtime.d = Host::CSR.CNTC + Host::CSR.GCNTC + Guest::CSR.CNTC + counter so, Guest::rdtime.d = Host::rdtime.d + Host::CSR.GCNTC + Guest::CSR.CNTC To ensure Guest::rdtime.d continuous, Host::rdtime.d should be at first continuous, while Host::CSR.GCNTC / Guest::CSR.CNTC is maintained by KVM. Cc: stable@vger.kernel.org Signed-off-by: Xianglai Li Signed-off-by: Huacai Chen --- arch/loongarch/kernel/time.c | 2 +- arch/loongarch/power/hibernate.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index e2d3bfeb636643..bc75a3a69fc8d5 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -111,7 +111,7 @@ static unsigned long __init get_loops_per_jiffy(void) return lpj; } -static long init_offset __nosavedata; +static long init_offset; void save_counter(void) { diff --git a/arch/loongarch/power/hibernate.c b/arch/loongarch/power/hibernate.c index 1e0590542f987c..e7b7346592cb2a 100644 --- a/arch/loongarch/power/hibernate.c +++ b/arch/loongarch/power/hibernate.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -14,6 +15,7 @@ struct pt_regs saved_regs; void save_processor_state(void) { + save_counter(); saved_crmd = csr_read32(LOONGARCH_CSR_CRMD); saved_prmd = csr_read32(LOONGARCH_CSR_PRMD); saved_euen = csr_read32(LOONGARCH_CSR_EUEN); @@ -26,6 +28,7 @@ void save_processor_state(void) void restore_processor_state(void) { + sync_counter(); csr_write32(saved_crmd, LOONGARCH_CSR_CRMD); csr_write32(saved_prmd, LOONGARCH_CSR_PRMD); csr_write32(saved_euen, LOONGARCH_CSR_EUEN); From 0b326b2371f94e798137cc1a3c5c2eef2bc69061 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 14 May 2025 22:18:10 +0800 Subject: [PATCH 1805/2924] LoongArch: uprobes: Remove user_{en,dis}able_single_step() When executing the "perf probe" and "perf stat" test cases about some cryptographic algorithm, the output shows that "Trace/breakpoint trap". This is because it uses the software singlestep breakpoint for uprobes on LoongArch, and no need to use the hardware singlestep. So just remove the related function call to user_{en,dis}able_single_step() for uprobes on LoongArch. How to reproduce: Please make sure CONFIG_UPROBE_EVENTS is set and openssl supports sm2 algorithm, then execute the following command. cd tools/perf && make ./perf probe -x /usr/lib64/libcrypto.so BN_mod_mul_montgomery ./perf stat -e probe_libcrypto:BN_mod_mul_montgomery openssl speed sm2 Cc: stable@vger.kernel.org Fixes: 19bc6cb64092 ("LoongArch: Add uprobes support") Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/uprobes.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/loongarch/kernel/uprobes.c b/arch/loongarch/kernel/uprobes.c index 87abc7137b738e..0ab9d8d631c413 100644 --- a/arch/loongarch/kernel/uprobes.c +++ b/arch/loongarch/kernel/uprobes.c @@ -42,7 +42,6 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) utask->autask.saved_trap_nr = current->thread.trap_nr; current->thread.trap_nr = UPROBE_TRAP_NR; instruction_pointer_set(regs, utask->xol_vaddr); - user_enable_single_step(current); return 0; } @@ -59,8 +58,6 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) else instruction_pointer_set(regs, utask->vaddr + LOONGARCH_INSN_SIZE); - user_disable_single_step(current); - return 0; } @@ -70,7 +67,6 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) current->thread.trap_nr = utask->autask.saved_trap_nr; instruction_pointer_set(regs, utask->vaddr); - user_disable_single_step(current); } bool arch_uprobe_xol_was_trapped(struct task_struct *t) From 12614f794274f63fbdfe76771b2b332077d63848 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 14 May 2025 22:18:10 +0800 Subject: [PATCH 1806/2924] LoongArch: uprobes: Remove redundant code about resume_era arch_uprobe_skip_sstep() returns true if instruction was emulated, that is to say, there is no need to single step for the emulated instructions. regs->csr_era will point to the destination address directly after the exception, so the resume_era related code is redundant, just remove them. Cc: stable@vger.kernel.org Fixes: 19bc6cb64092 ("LoongArch: Add uprobes support") Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/uprobes.h | 1 - arch/loongarch/kernel/uprobes.c | 7 +------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/loongarch/include/asm/uprobes.h b/arch/loongarch/include/asm/uprobes.h index 99a0d198927f8b..025fc3f0a1028d 100644 --- a/arch/loongarch/include/asm/uprobes.h +++ b/arch/loongarch/include/asm/uprobes.h @@ -15,7 +15,6 @@ typedef u32 uprobe_opcode_t; #define UPROBE_XOLBP_INSN __emit_break(BRK_UPROBE_XOLBP) struct arch_uprobe { - unsigned long resume_era; u32 insn[2]; u32 ixol[2]; bool simulate; diff --git a/arch/loongarch/kernel/uprobes.c b/arch/loongarch/kernel/uprobes.c index 0ab9d8d631c413..6022eb0f71dbce 100644 --- a/arch/loongarch/kernel/uprobes.c +++ b/arch/loongarch/kernel/uprobes.c @@ -52,11 +52,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); current->thread.trap_nr = utask->autask.saved_trap_nr; - - if (auprobe->simulate) - instruction_pointer_set(regs, auprobe->resume_era); - else - instruction_pointer_set(regs, utask->vaddr + LOONGARCH_INSN_SIZE); + instruction_pointer_set(regs, utask->vaddr + LOONGARCH_INSN_SIZE); return 0; } @@ -86,7 +82,6 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) insn.word = auprobe->insn[0]; arch_simulate_insn(insn, regs); - auprobe->resume_era = regs->csr_era; return true; } From fb0ea6e4878a45b1ac81972027907fc424a792e6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 May 2025 18:28:15 +0100 Subject: [PATCH 1807/2924] irqchip: Drop MSI_CHIP_FLAG_SET_ACK from unsuspecting MSI drivers Commit 1c000dcaad2be ("irqchip/irq-msi-lib: Optionally set default irq_eoi()/irq_ack()") added blanket MSI_CHIP_FLAG_SET_ACK flags, irrespective of whether the underlying irqchip required it or not. Drop it from a number of drivers that do not require it. Fixes: 1c000dcaad2be ("irqchip/irq-msi-lib: Optionally set default irq_eoi()/irq_ack()") Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250513172819.2216709-6-maz@kernel.org --- drivers/irqchip/irq-gic-v2m.c | 2 +- drivers/irqchip/irq-gic-v3-its-msi-parent.c | 2 +- drivers/irqchip/irq-gic-v3-mbi.c | 2 +- drivers/irqchip/irq-mvebu-gicp.c | 2 +- drivers/irqchip/irq-mvebu-odmi.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index dc98c39d2b2034..cc6a6c1585d20b 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -252,7 +252,7 @@ static void __init gicv2m_teardown(void) static struct msi_parent_ops gicv2m_msi_parent_ops = { .supported_flags = GICV2M_MSI_FLAGS_SUPPORTED, .required_flags = GICV2M_MSI_FLAGS_REQUIRED, - .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, + .chip_flags = MSI_CHIP_FLAG_SET_EOI, .bus_select_token = DOMAIN_BUS_NEXUS, .bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI, .prefix = "GICv2m-", diff --git a/drivers/irqchip/irq-gic-v3-its-msi-parent.c b/drivers/irqchip/irq-gic-v3-its-msi-parent.c index bdb04c8081480d..c5a7eb1c041959 100644 --- a/drivers/irqchip/irq-gic-v3-its-msi-parent.c +++ b/drivers/irqchip/irq-gic-v3-its-msi-parent.c @@ -203,7 +203,7 @@ static bool its_init_dev_msi_info(struct device *dev, struct irq_domain *domain, const struct msi_parent_ops gic_v3_its_msi_parent_ops = { .supported_flags = ITS_MSI_FLAGS_SUPPORTED, .required_flags = ITS_MSI_FLAGS_REQUIRED, - .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, + .chip_flags = MSI_CHIP_FLAG_SET_EOI, .bus_select_token = DOMAIN_BUS_NEXUS, .bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI, .prefix = "ITS-", diff --git a/drivers/irqchip/irq-gic-v3-mbi.c b/drivers/irqchip/irq-gic-v3-mbi.c index 34e9ca77a8c368..647b18e24e0c2d 100644 --- a/drivers/irqchip/irq-gic-v3-mbi.c +++ b/drivers/irqchip/irq-gic-v3-mbi.c @@ -197,7 +197,7 @@ static bool mbi_init_dev_msi_info(struct device *dev, struct irq_domain *domain, static const struct msi_parent_ops gic_v3_mbi_msi_parent_ops = { .supported_flags = MBI_MSI_FLAGS_SUPPORTED, .required_flags = MBI_MSI_FLAGS_REQUIRED, - .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, + .chip_flags = MSI_CHIP_FLAG_SET_EOI, .bus_select_token = DOMAIN_BUS_NEXUS, .bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI, .prefix = "MBI-", diff --git a/drivers/irqchip/irq-mvebu-gicp.c b/drivers/irqchip/irq-mvebu-gicp.c index d67f93f6d75056..60b976286636f2 100644 --- a/drivers/irqchip/irq-mvebu-gicp.c +++ b/drivers/irqchip/irq-mvebu-gicp.c @@ -161,7 +161,7 @@ static const struct irq_domain_ops gicp_domain_ops = { static const struct msi_parent_ops gicp_msi_parent_ops = { .supported_flags = GICP_MSI_FLAGS_SUPPORTED, .required_flags = GICP_MSI_FLAGS_REQUIRED, - .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, + .chip_flags = MSI_CHIP_FLAG_SET_EOI, .bus_select_token = DOMAIN_BUS_GENERIC_MSI, .bus_select_mask = MATCH_PLATFORM_MSI, .prefix = "GICP-", diff --git a/drivers/irqchip/irq-mvebu-odmi.c b/drivers/irqchip/irq-mvebu-odmi.c index 28f7e81df94f0a..54f6f081157339 100644 --- a/drivers/irqchip/irq-mvebu-odmi.c +++ b/drivers/irqchip/irq-mvebu-odmi.c @@ -157,7 +157,7 @@ static const struct irq_domain_ops odmi_domain_ops = { static const struct msi_parent_ops odmi_msi_parent_ops = { .supported_flags = ODMI_MSI_FLAGS_SUPPORTED, .required_flags = ODMI_MSI_FLAGS_REQUIRED, - .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, + .chip_flags = MSI_CHIP_FLAG_SET_EOI, .bus_select_token = DOMAIN_BUS_GENERIC_MSI, .bus_select_mask = MATCH_PLATFORM_MSI, .prefix = "ODMI-", From 7dd7f39fce0022b386ef1ea5ffef92ecc7dfc6af Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Wed, 14 May 2025 09:37:49 -0400 Subject: [PATCH 1808/2924] ASoC: SOF: Intel: hda: Fix UAF when reloading module hda_generic_machine_select() appends -idisp to the tplg filename by allocating a new string with devm_kasprintf(), then stores the string right back into the global variable snd_soc_acpi_intel_hda_machines. When the module is unloaded, this memory is freed, resulting in a global variable pointing to freed memory. Reloading the module then triggers a use-after-free: BUG: KFENCE: use-after-free read in string+0x48/0xe0 Use-after-free read at 0x00000000967e0109 (in kfence-#99): string+0x48/0xe0 vsnprintf+0x329/0x6e0 devm_kvasprintf+0x54/0xb0 devm_kasprintf+0x58/0x80 hda_machine_select.cold+0x198/0x17a2 [snd_sof_intel_hda_generic] sof_probe_work+0x7f/0x600 [snd_sof] process_one_work+0x17b/0x330 worker_thread+0x2ce/0x3f0 kthread+0xcf/0x100 ret_from_fork+0x31/0x50 ret_from_fork_asm+0x1a/0x30 kfence-#99: 0x00000000198a940f-0x00000000ace47d9d, size=64, cache=kmalloc-64 allocated by task 333 on cpu 8 at 17.798069s (130.453553s ago): devm_kmalloc+0x52/0x120 devm_kvasprintf+0x66/0xb0 devm_kasprintf+0x58/0x80 hda_machine_select.cold+0x198/0x17a2 [snd_sof_intel_hda_generic] sof_probe_work+0x7f/0x600 [snd_sof] process_one_work+0x17b/0x330 worker_thread+0x2ce/0x3f0 kthread+0xcf/0x100 ret_from_fork+0x31/0x50 ret_from_fork_asm+0x1a/0x30 freed by task 1543 on cpu 4 at 141.586686s (6.665010s ago): release_nodes+0x43/0xb0 devres_release_all+0x90/0xf0 device_unbind_cleanup+0xe/0x70 device_release_driver_internal+0x1c1/0x200 driver_detach+0x48/0x90 bus_remove_driver+0x6d/0xf0 pci_unregister_driver+0x42/0xb0 __do_sys_delete_module+0x1d1/0x310 do_syscall_64+0x82/0x190 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fix it by copying the match array with devm_kmemdup_array() before we modify it. Fixes: 5458411d7594 ("ASoC: SOF: Intel: hda: refactoring topology name fixup for HDA mach") Suggested-by: Peter Ujfalusi Acked-by: Peter Ujfalusi Signed-off-by: Tavian Barnes Link: https://patch.msgid.link/570b15570b274520a0d9052f4e0f064a29c950ef.1747229716.git.tavianator@tavianator.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/hda.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index b34e5fdf10f16f..6a3932d90b43a9 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -1049,7 +1049,21 @@ static void hda_generic_machine_select(struct snd_sof_dev *sdev, if (!*mach && codec_num <= 2) { bool tplg_fixup = false; - hda_mach = snd_soc_acpi_intel_hda_machines; + /* + * make a local copy of the match array since we might + * be modifying it + */ + hda_mach = devm_kmemdup_array(sdev->dev, + snd_soc_acpi_intel_hda_machines, + 2, /* we have one entry + sentinel in the array */ + sizeof(snd_soc_acpi_intel_hda_machines[0]), + GFP_KERNEL); + if (!hda_mach) { + dev_err(bus->dev, + "%s: failed to duplicate the HDA match table\n", + __func__); + return; + } dev_info(bus->dev, "using HDA machine driver %s now\n", hda_mach->drv_name); From 08f959759e1e6e9c4b898c51a7d387ac3480630b Mon Sep 17 00:00:00 2001 From: Nicolas Frattaroli Date: Wed, 23 Apr 2025 09:53:32 +0200 Subject: [PATCH 1809/2924] mmc: sdhci-of-dwcmshc: add PD workaround on RK3576 RK3576's power domains have a peculiar design where the PD_NVM power domain, of which the sdhci controller is a part, seemingly does not have idempotent runtime disable/enable. The end effect is that if PD_NVM gets turned off by the generic power domain logic because all the devices depending on it are suspended, then the next time the sdhci device is unsuspended, it'll hang the SoC as soon as it tries accessing the CQHCI registers. RK3576's UFS support needed a new dev_pm_genpd_rpm_always_on function added to the generic power domains API to handle what appears to be a similar hardware design. Use this new function to ask for the same treatment in the sdhci controller by giving rk3576 its own platform data with its own postinit function. The benefit of doing this instead of marking the power domains always on in the power domain core is that we only do this if we know the platform we're running on actually uses the sdhci controller. For others, keeping PD_NVM always on would be a waste, as they won't run into this specific issue. The only other IP in PD_NVM that could be affected is FSPI0. If it gets a mainline driver, it will probably want to do the same thing. Acked-by: Adrian Hunter Signed-off-by: Nicolas Frattaroli Reviewed-by: Shawn Lin Fixes: cfee1b507758 ("pmdomain: rockchip: Add support for RK3576 SoC") Cc: # v6.15+ Link: https://lore.kernel.org/r/20250423-rk3576-emmc-fix-v3-1-0bf80e29967f@collabora.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-dwcmshc.c | 40 +++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c index 09b9ab15e4995f..a20d03fdd6a93e 100644 --- a/drivers/mmc/host/sdhci-of-dwcmshc.c +++ b/drivers/mmc/host/sdhci-of-dwcmshc.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -745,6 +746,29 @@ static void dwcmshc_rk35xx_postinit(struct sdhci_host *host, struct dwcmshc_priv } } +static void dwcmshc_rk3576_postinit(struct sdhci_host *host, struct dwcmshc_priv *dwc_priv) +{ + struct device *dev = mmc_dev(host->mmc); + int ret; + + /* + * This works around the design of the RK3576's power domains, which + * makes the PD_NVM power domain, which the sdhci controller on the + * RK3576 is in, never come back the same way once it's run-time + * suspended once. This can happen during early kernel boot if no driver + * is using either PD_NVM or its child power domain PD_SDGMAC for a + * short moment, leading to it being turned off to save power. By + * keeping it on, sdhci suspending won't lead to PD_NVM becoming a + * candidate for getting turned off. + */ + ret = dev_pm_genpd_rpm_always_on(dev, true); + if (ret && ret != -EOPNOTSUPP) + dev_warn(dev, "failed to set PD rpm always on, SoC may hang later: %pe\n", + ERR_PTR(ret)); + + dwcmshc_rk35xx_postinit(host, dwc_priv); +} + static int th1520_execute_tuning(struct sdhci_host *host, u32 opcode) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); @@ -1176,6 +1200,18 @@ static const struct dwcmshc_pltfm_data sdhci_dwcmshc_rk35xx_pdata = { .postinit = dwcmshc_rk35xx_postinit, }; +static const struct dwcmshc_pltfm_data sdhci_dwcmshc_rk3576_pdata = { + .pdata = { + .ops = &sdhci_dwcmshc_rk35xx_ops, + .quirks = SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN | + SDHCI_QUIRK_BROKEN_TIMEOUT_VAL, + .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN | + SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN, + }, + .init = dwcmshc_rk35xx_init, + .postinit = dwcmshc_rk3576_postinit, +}; + static const struct dwcmshc_pltfm_data sdhci_dwcmshc_th1520_pdata = { .pdata = { .ops = &sdhci_dwcmshc_th1520_ops, @@ -1274,6 +1310,10 @@ static const struct of_device_id sdhci_dwcmshc_dt_ids[] = { .compatible = "rockchip,rk3588-dwcmshc", .data = &sdhci_dwcmshc_rk35xx_pdata, }, + { + .compatible = "rockchip,rk3576-dwcmshc", + .data = &sdhci_dwcmshc_rk3576_pdata, + }, { .compatible = "rockchip,rk3568-dwcmshc", .data = &sdhci_dwcmshc_rk35xx_pdata, From e765bf89f42b5c82132a556b630affeb82b2a21f Mon Sep 17 00:00:00 2001 From: Ilya Guterman Date: Sat, 10 May 2025 19:21:30 +0900 Subject: [PATCH 1810/2924] nvme-pci: add NVME_QUIRK_NO_DEEPEST_PS quirk for SOLIDIGM P44 Pro This commit adds the NVME_QUIRK_NO_DEEPEST_PS quirk for device [126f:2262], which belongs to device SOLIDIGM P44 Pro SSDPFKKW020X7 The device frequently have trouble exiting the deepest power state (5), resulting in the entire disk being unresponsive. Verified by setting nvme_core.default_ps_max_latency_us=10000 and observing the expected behavior. Signed-off-by: Ilya Guterman Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a9390ac7211ea7..f1dd804151b1c9 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3739,6 +3739,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, { PCI_DEVICE(0x1e49, 0x0041), /* ZHITAI TiPro7000 NVMe SSD */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, + { PCI_DEVICE(0x025e, 0xf1ac), /* SOLIDIGM P44 pro SSDPFKKW020X7 */ + .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, { PCI_DEVICE(0xc0a9, 0x540a), /* Crucial P2 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1d97, 0x2263), /* Lexar NM610 */ From 1cfe51ef07ca3286581d612debfb0430eeccbb65 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 13 May 2025 19:56:41 +0200 Subject: [PATCH 1811/2924] i2c: designware: Fix an error handling path in i2c_dw_pci_probe() If navi_amd_register_client() fails, the previous i2c_dw_probe() call should be undone by a corresponding i2c_del_adapter() call, as already done in the remove function. Fixes: 17631e8ca2d3 ("i2c: designware: Add driver support for AMD NAVI GPU") Signed-off-by: Christophe JAILLET Cc: # v5.13+ Acked-by: Jarkko Nikula Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/fcd9651835a32979df8802b2db9504c523a8ebbb.1747158983.git.christophe.jaillet@wanadoo.fr --- drivers/i2c/busses/i2c-designware-pcidrv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-designware-pcidrv.c b/drivers/i2c/busses/i2c-designware-pcidrv.c index 8e0267c7cc294e..f21f9877c04047 100644 --- a/drivers/i2c/busses/i2c-designware-pcidrv.c +++ b/drivers/i2c/busses/i2c-designware-pcidrv.c @@ -278,9 +278,11 @@ static int i2c_dw_pci_probe(struct pci_dev *pdev, if ((dev->flags & MODEL_MASK) == MODEL_AMD_NAVI_GPU) { dev->slave = i2c_new_ccgx_ucsi(&dev->adapter, dev->irq, &dgpu_node); - if (IS_ERR(dev->slave)) + if (IS_ERR(dev->slave)) { + i2c_del_adapter(&dev->adapter); return dev_err_probe(device, PTR_ERR(dev->slave), "register UCSI failed\n"); + } } pm_runtime_set_autosuspend_delay(device, 1000); From fe14c0f096f58d2569e587e9f4b05d772272bbb4 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Tue, 22 Apr 2025 11:58:11 -0300 Subject: [PATCH 1812/2924] Revert "drm/amd/display: Hardware cursor changes color when switched to software cursor" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 272e6aab14bbf98d7a06b2b1cd6308a02d4a10a1. Applying degamma curve to the cursor by default breaks Linux userspace expectation. On Linux, AMD display manager enables cursor degamma ROM just for implict sRGB on HW versions where degamma is split into two blocks: degamma ROM for pre-defined TFs and `gamma correction` for user/custom curves, and degamma ROM settings doesn't apply to cursor plane. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1513 Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2803 Reported-by: Michel Dänzer Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4144 Signed-off-by: Melissa Wen Reviewed-by: Alex Hung Signed-off-by: Alex Deucher (cherry picked from commit f6a305d4748801a6c799ae9375b2ecff3aed094b) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c index 1236e0f9a2560c..712aff7e17f7a0 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c @@ -120,10 +120,11 @@ void dpp401_set_cursor_attributes( enum dc_cursor_color_format color_format = cursor_attributes->color_format; int cur_rom_en = 0; - // DCN4 should always do Cursor degamma for Cursor Color modes if (color_format == CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA || color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA) { - cur_rom_en = 1; + if (cursor_attributes->attribute_flags.bits.ENABLE_CURSOR_DEGAMMA) { + cur_rom_en = 1; + } } REG_UPDATE_3(CURSOR0_CONTROL, From ee7360fc27d6045510f8fe459b5649b2af27811a Mon Sep 17 00:00:00 2001 From: "David (Ming Qiang) Wu" Date: Mon, 12 May 2025 15:14:43 -0400 Subject: [PATCH 1813/2924] drm/amdgpu: read back register after written for VCN v4.0.5 On VCN v4.0.5 there is a race condition where the WPTR is not updated after starting from idle when doorbell is used. Adding register read-back after written at function end is to ensure all register writes are done before they can be used. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12528 Signed-off-by: David (Ming Qiang) Wu Reviewed-by: Mario Limonciello Tested-by: Mario Limonciello Reviewed-by: Alex Deucher Reviewed-by: Ruijing Dong Signed-off-by: Alex Deucher (cherry picked from commit 07c9db090b86e5211188e1b351303fbc673378cf) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index a1171e6152ed2d..f11df9c2ec1318 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -1023,6 +1023,10 @@ static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst, ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | VCN_RB1_DB_CTRL__EN_MASK); + /* Keeping one read-back to ensure all register writes are done, otherwise + * it may introduce race conditions */ + RREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL); + return 0; } @@ -1205,6 +1209,10 @@ static int vcn_v4_0_5_start(struct amdgpu_vcn_inst *vinst) WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + /* Keeping one read-back to ensure all register writes are done, otherwise + * it may introduce race conditions */ + RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); + return 0; } From b1f704107cf27906a9cea542b626b96019104663 Mon Sep 17 00:00:00 2001 From: Aradhya Bhatia Date: Mon, 12 May 2025 06:50:04 +0000 Subject: [PATCH 1814/2924] drm/xe/xe2hpg: Add Wa_22021007897 Add Wa_22021007897 for the Xe2_HPG (graphics version: 20.01) IP. It is a permanent workaround, and applicable on all the steppings. Reviewed-by: Gustavo Sousa Reviewed-by: Tejas Upadhyay Signed-off-by: Aradhya Bhatia Link: https://lore.kernel.org/r/20250512065004.2576-1-aradhya.bhatia@intel.com Signed-off-by: Matt Roper (cherry picked from commit e5c13e2c505b73a8667ef9a0fd5cbd4227e483e6) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_wa.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index da1f198ac107cc..181913967ac9b7 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -157,6 +157,7 @@ #define XEHPG_SC_INSTDONE_EXTRA2 XE_REG_MCR(0x7108) #define COMMON_SLICE_CHICKEN4 XE_REG(0x7300, XE_REG_OPTION_MASKED) +#define SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE REG_BIT(12) #define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6) #define COMMON_SLICE_CHICKEN3 XE_REG(0x7304, XE_REG_OPTION_MASKED) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 24f644c0a67365..2f833f0d575f24 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -815,6 +815,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) }, + { XE_RTP_NAME("22021007897"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) + }, /* Xe3_LPG */ { XE_RTP_NAME("14021490052"), From 7bd68ce21d39150d80806233965326359f517b78 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Mon, 12 May 2025 06:54:55 -0700 Subject: [PATCH 1815/2924] drm/gpusvm: Introduce devmem_only flag for allocation This commit adds a new flag, devmem_only, to the drm_gpusvm structure. The purpose of this flag is to ensure that the get_pages function allocates memory exclusively from the device's memory. If the allocation from device memory fails, the function will return an -EFAULT error. Required for shared CPU and GPU atomics on certain devices. v3: - s/vram_only/devmem_only/ Fixes: 99624bdff867 ("drm/gpusvm: Add support for GPU Shared Virtual Memory") Cc: stable@vger.kernel.org Signed-off-by: Matthew Brost Signed-off-by: Himal Prasad Ghimiray Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250512135500.1405019-2-matthew.brost@intel.com (cherry picked from commit 8a9b978ebd47df9e0694c34748c2d6fa0c31eb4d) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/drm_gpusvm.c | 5 +++++ include/drm/drm_gpusvm.h | 2 ++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index de424e670995cf..a58d03e6cac277 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -1454,6 +1454,11 @@ int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, goto err_unmap; } + if (ctx->devmem_only) { + err = -EFAULT; + goto err_unmap; + } + addr = dma_map_page(gpusvm->drm->dev, page, 0, PAGE_SIZE << order, diff --git a/include/drm/drm_gpusvm.h b/include/drm/drm_gpusvm.h index df120b4d1f836c..9fd25fc880a492 100644 --- a/include/drm/drm_gpusvm.h +++ b/include/drm/drm_gpusvm.h @@ -286,6 +286,7 @@ struct drm_gpusvm { * @in_notifier: entering from a MMU notifier * @read_only: operating on read-only memory * @devmem_possible: possible to use device memory + * @devmem_only: use only device memory * * Context that is DRM GPUSVM is operating in (i.e. user arguments). */ @@ -294,6 +295,7 @@ struct drm_gpusvm_ctx { unsigned int in_notifier :1; unsigned int read_only :1; unsigned int devmem_possible :1; + unsigned int devmem_only :1; }; int drm_gpusvm_init(struct drm_gpusvm *gpusvm, From 794f5493f518916380578f14d999de92b930b609 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 12 May 2025 06:54:56 -0700 Subject: [PATCH 1816/2924] drm/xe: Strict migration policy for atomic SVM faults MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mixing GPU and CPU atomics does not work unless a strict migration policy of GPU atomics must be device memory. Enforce a policy of must be in VRAM with a retry loop of 3 attempts, if retry loop fails abort fault. Removing always_migrate_to_vram modparam as we now have real migration policy. v2: - Only retry migration on atomics - Drop alway migrate modparam v3: - Only set vram_only on DGFX (Himal) - Bail on get_pages failure if vram_only and retry count exceeded (Himal) - s/vram_only/devmem_only - Update xe_svm_range_is_valid to accept devmem_only argument v4: - Fix logic bug get_pages failure v5: - Fix commit message (Himal) - Mention removing always_migrate_to_vram in commit message (Lucas) - Fix xe_svm_range_is_valid to check for devmem pages - Bail on devmem_only && !migrate_devmem (Thomas) v6: - Add READ_ONCE barriers for opportunistic checks (Thomas) - Pair READ_ONCE with WRITE_ONCE (Thomas) v7: - Adjust comments (Thomas) Fixes: 2f118c949160 ("drm/xe: Add SVM VRAM migration") Cc: stable@vger.kernel.org Signed-off-by: Himal Prasad Ghimiray Signed-off-by: Matthew Brost Acked-by: Himal Prasad Ghimiray Reviewed-by: Thomas Hellström Link: https://lore.kernel.org/r/20250512135500.1405019-3-matthew.brost@intel.com (cherry picked from commit a9ac0fa455b050d03e3032501368048fb284d318) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/drm_gpusvm.c | 23 +++++-- drivers/gpu/drm/xe/xe_module.c | 3 - drivers/gpu/drm/xe/xe_module.h | 1 - drivers/gpu/drm/xe/xe_pt.c | 14 ++++- drivers/gpu/drm/xe/xe_svm.c | 111 +++++++++++++++++++++++++-------- drivers/gpu/drm/xe/xe_svm.h | 5 -- include/drm/drm_gpusvm.h | 40 +++++++----- 7 files changed, 140 insertions(+), 57 deletions(-) diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index a58d03e6cac277..41f6616bcf76fa 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -1118,6 +1118,10 @@ static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, lockdep_assert_held(&gpusvm->notifier_lock); if (range->flags.has_dma_mapping) { + struct drm_gpusvm_range_flags flags = { + .__flags = range->flags.__flags, + }; + for (i = 0, j = 0; i < npages; j++) { struct drm_pagemap_device_addr *addr = &range->dma_addr[j]; @@ -1131,8 +1135,12 @@ static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, dev, *addr); i += 1 << addr->order; } - range->flags.has_devmem_pages = false; - range->flags.has_dma_mapping = false; + + /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ + flags.has_devmem_pages = false; + flags.has_dma_mapping = false; + WRITE_ONCE(range->flags.__flags, flags.__flags); + range->dpagemap = NULL; } } @@ -1334,6 +1342,7 @@ int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, int err = 0; struct dev_pagemap *pagemap; struct drm_pagemap *dpagemap; + struct drm_gpusvm_range_flags flags; retry: hmm_range.notifier_seq = mmu_interval_read_begin(notifier); @@ -1378,7 +1387,8 @@ int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, */ drm_gpusvm_notifier_lock(gpusvm); - if (range->flags.unmapped) { + flags.__flags = range->flags.__flags; + if (flags.unmapped) { drm_gpusvm_notifier_unlock(gpusvm); err = -EFAULT; goto err_free; @@ -1474,14 +1484,17 @@ int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, } i += 1 << order; num_dma_mapped = i; - range->flags.has_dma_mapping = true; + flags.has_dma_mapping = true; } if (zdd) { - range->flags.has_devmem_pages = true; + flags.has_devmem_pages = true; range->dpagemap = dpagemap; } + /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ + WRITE_ONCE(range->flags.__flags, flags.__flags); + drm_gpusvm_notifier_unlock(gpusvm); kvfree(pfns); set_seqno: diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index 9f4632e39a1ad5..e861c694f3361b 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -29,9 +29,6 @@ struct xe_modparam xe_modparam = { module_param_named(svm_notifier_size, xe_modparam.svm_notifier_size, uint, 0600); MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size(in MiB), must be power of 2"); -module_param_named(always_migrate_to_vram, xe_modparam.always_migrate_to_vram, bool, 0444); -MODULE_PARM_DESC(always_migrate_to_vram, "Always migrate to VRAM on GPU fault"); - module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444); MODULE_PARM_DESC(force_execlist, "Force Execlist submission"); diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h index 84339e509c80d6..5a3bfea8b7b4c4 100644 --- a/drivers/gpu/drm/xe/xe_module.h +++ b/drivers/gpu/drm/xe/xe_module.h @@ -12,7 +12,6 @@ struct xe_modparam { bool force_execlist; bool probe_display; - bool always_migrate_to_vram; u32 force_vram_bar_size; int guc_log_level; char *guc_firmware_path; diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index ffaf0d02dc7de9..856038553b812a 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -2232,11 +2232,19 @@ static void op_commit(struct xe_vm *vm, } case DRM_GPUVA_OP_DRIVER: { + /* WRITE_ONCE pairs with READ_ONCE in xe_svm.c */ + if (op->subop == XE_VMA_SUBOP_MAP_RANGE) { - op->map_range.range->tile_present |= BIT(tile->id); - op->map_range.range->tile_invalidated &= ~BIT(tile->id); + WRITE_ONCE(op->map_range.range->tile_present, + op->map_range.range->tile_present | + BIT(tile->id)); + WRITE_ONCE(op->map_range.range->tile_invalidated, + op->map_range.range->tile_invalidated & + ~BIT(tile->id)); } else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) { - op->unmap_range.range->tile_present &= ~BIT(tile->id); + WRITE_ONCE(op->unmap_range.range->tile_present, + op->unmap_range.range->tile_present & + ~BIT(tile->id)); } break; } diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 24c578e1170e40..e65d3b820ee024 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -15,8 +15,17 @@ static bool xe_svm_range_in_vram(struct xe_svm_range *range) { - /* Not reliable without notifier lock */ - return range->base.flags.has_devmem_pages; + /* + * Advisory only check whether the range is currently backed by VRAM + * memory. + */ + + struct drm_gpusvm_range_flags flags = { + /* Pairs with WRITE_ONCE in drm_gpusvm.c */ + .__flags = READ_ONCE(range->base.flags.__flags), + }; + + return flags.has_devmem_pages; } static bool xe_svm_range_has_vram_binding(struct xe_svm_range *range) @@ -645,9 +654,16 @@ void xe_svm_fini(struct xe_vm *vm) } static bool xe_svm_range_is_valid(struct xe_svm_range *range, - struct xe_tile *tile) + struct xe_tile *tile, + bool devmem_only) { - return (range->tile_present & ~range->tile_invalidated) & BIT(tile->id); + /* + * Advisory only check whether the range currently has a valid mapping, + * READ_ONCE pairs with WRITE_ONCE in xe_pt.c + */ + return ((READ_ONCE(range->tile_present) & + ~READ_ONCE(range->tile_invalidated)) & BIT(tile->id)) && + (!devmem_only || xe_svm_range_in_vram(range)); } static struct xe_vram_region *tile_to_vr(struct xe_tile *tile) @@ -712,6 +728,36 @@ static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, return err; } +static bool supports_4K_migration(struct xe_device *xe) +{ + if (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) + return false; + + return true; +} + +static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, + struct xe_vma *vma) +{ + struct xe_vm *vm = range_to_vm(&range->base); + u64 range_size = xe_svm_range_size(range); + + if (!range->base.flags.migrate_devmem) + return false; + + if (xe_svm_range_in_vram(range)) { + drm_dbg(&vm->xe->drm, "Range is already in VRAM\n"); + return false; + } + + if (range_size <= SZ_64K && !supports_4K_migration(vm->xe)) { + drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n"); + return false; + } + + return true; +} + /** * xe_svm_handle_pagefault() - SVM handle page fault * @vm: The VM. @@ -735,11 +781,14 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), .check_pages_threshold = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0, + .devmem_only = atomic && IS_DGFX(vm->xe) && + IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), }; struct xe_svm_range *range; struct drm_gpusvm_range *r; struct drm_exec exec; struct dma_fence *fence; + int migrate_try_count = ctx.devmem_only ? 3 : 1; ktime_t end = 0; int err; @@ -758,24 +807,30 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, if (IS_ERR(r)) return PTR_ERR(r); + if (ctx.devmem_only && !r->flags.migrate_devmem) + return -EACCES; + range = to_xe_range(r); - if (xe_svm_range_is_valid(range, tile)) + if (xe_svm_range_is_valid(range, tile, ctx.devmem_only)) return 0; range_debug(range, "PAGE FAULT"); - /* XXX: Add migration policy, for now migrate range once */ - if (!range->skip_migrate && range->base.flags.migrate_devmem && - xe_svm_range_size(range) >= SZ_64K) { - range->skip_migrate = true; - + if (--migrate_try_count >= 0 && + xe_svm_range_needs_migrate_to_vram(range, vma)) { err = xe_svm_alloc_vram(vm, tile, range, &ctx); if (err) { - drm_dbg(&vm->xe->drm, - "VRAM allocation failed, falling back to " - "retrying fault, asid=%u, errno=%pe\n", - vm->usm.asid, ERR_PTR(err)); - goto retry; + if (migrate_try_count || !ctx.devmem_only) { + drm_dbg(&vm->xe->drm, + "VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n", + vm->usm.asid, ERR_PTR(err)); + goto retry; + } else { + drm_err(&vm->xe->drm, + "VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n", + vm->usm.asid, ERR_PTR(err)); + return err; + } } } @@ -783,15 +838,22 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, r, &ctx); /* Corner where CPU mappings have changed */ if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) { - if (err == -EOPNOTSUPP) { - range_debug(range, "PAGE FAULT - EVICT PAGES"); - drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base); + if (migrate_try_count > 0 || !ctx.devmem_only) { + if (err == -EOPNOTSUPP) { + range_debug(range, "PAGE FAULT - EVICT PAGES"); + drm_gpusvm_range_evict(&vm->svm.gpusvm, + &range->base); + } + drm_dbg(&vm->xe->drm, + "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n", + vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); + range_debug(range, "PAGE FAULT - RETRY PAGES"); + goto retry; + } else { + drm_err(&vm->xe->drm, + "Get pages failed, retry count exceeded, asid=%u, gpusvm=%p, errno=%pe\n", + vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); } - drm_dbg(&vm->xe->drm, - "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n", - vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); - range_debug(range, "PAGE FAULT - RETRY PAGES"); - goto retry; } if (err) { range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT"); @@ -825,9 +887,6 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, } drm_exec_fini(&exec); - if (xe_modparam.always_migrate_to_vram) - range->skip_migrate = false; - dma_fence_wait(fence, false); dma_fence_put(fence); diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h index be306fe7aaa4b3..fe58ac2f4baaca 100644 --- a/drivers/gpu/drm/xe/xe_svm.h +++ b/drivers/gpu/drm/xe/xe_svm.h @@ -36,11 +36,6 @@ struct xe_svm_range { * range. Protected by GPU SVM notifier lock. */ u8 tile_invalidated; - /** - * @skip_migrate: Skip migration to VRAM, protected by GPU fault handler - * locking. - */ - u8 skip_migrate :1; }; #if IS_ENABLED(CONFIG_DRM_GPUSVM) diff --git a/include/drm/drm_gpusvm.h b/include/drm/drm_gpusvm.h index 9fd25fc880a492..653d48dbe1c11a 100644 --- a/include/drm/drm_gpusvm.h +++ b/include/drm/drm_gpusvm.h @@ -185,6 +185,31 @@ struct drm_gpusvm_notifier { } flags; }; +/** + * struct drm_gpusvm_range_flags - Structure representing a GPU SVM range flags + * + * @migrate_devmem: Flag indicating whether the range can be migrated to device memory + * @unmapped: Flag indicating if the range has been unmapped + * @partial_unmap: Flag indicating if the range has been partially unmapped + * @has_devmem_pages: Flag indicating if the range has devmem pages + * @has_dma_mapping: Flag indicating if the range has a DMA mapping + * @__flags: Flags for range in u16 form (used for READ_ONCE) + */ +struct drm_gpusvm_range_flags { + union { + struct { + /* All flags below must be set upon creation */ + u16 migrate_devmem : 1; + /* All flags below must be set / cleared under notifier lock */ + u16 unmapped : 1; + u16 partial_unmap : 1; + u16 has_devmem_pages : 1; + u16 has_dma_mapping : 1; + }; + u16 __flags; + }; +}; + /** * struct drm_gpusvm_range - Structure representing a GPU SVM range * @@ -198,11 +223,6 @@ struct drm_gpusvm_notifier { * @dpagemap: The struct drm_pagemap of the device pages we're dma-mapping. * Note this is assuming only one drm_pagemap per range is allowed. * @flags: Flags for range - * @flags.migrate_devmem: Flag indicating whether the range can be migrated to device memory - * @flags.unmapped: Flag indicating if the range has been unmapped - * @flags.partial_unmap: Flag indicating if the range has been partially unmapped - * @flags.has_devmem_pages: Flag indicating if the range has devmem pages - * @flags.has_dma_mapping: Flag indicating if the range has a DMA mapping * * This structure represents a GPU SVM range used for tracking memory ranges * mapped in a DRM device. @@ -216,15 +236,7 @@ struct drm_gpusvm_range { unsigned long notifier_seq; struct drm_pagemap_device_addr *dma_addr; struct drm_pagemap *dpagemap; - struct { - /* All flags below must be set upon creation */ - u16 migrate_devmem : 1; - /* All flags below must be set / cleared under notifier lock */ - u16 unmapped : 1; - u16 partial_unmap : 1; - u16 has_devmem_pages : 1; - u16 has_dma_mapping : 1; - } flags; + struct drm_gpusvm_range_flags flags; }; /** From df8c37810b5a4a5c8bff1f880f59163c800a4985 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 12 May 2025 06:54:57 -0700 Subject: [PATCH 1817/2924] drm/gpusvm: Add timeslicing support to GPU SVM Add timeslicing support to GPU SVM which will guarantee the GPU a minimum execution time on piece of physical memory before migration back to CPU. Intended to implement strict migration policies which require memory to be in a certain placement for correct execution. Required for shared CPU and GPU atomics on certain devices. Fixes: 99624bdff867 ("drm/gpusvm: Add support for GPU Shared Virtual Memory") Cc: stable@vger.kernel.org Signed-off-by: Matthew Brost Reviewed-by: Himal Prasad Ghimiray Link: https://lore.kernel.org/r/20250512135500.1405019-4-matthew.brost@intel.com (cherry picked from commit 8dc1812b5b3a42311d28eb385eed88e2053ad3cb) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/drm_gpusvm.c | 9 +++++++++ include/drm/drm_gpusvm.h | 5 +++++ 2 files changed, 14 insertions(+) diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index 41f6616bcf76fa..4b2f32889f00f8 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -1783,6 +1783,8 @@ int drm_gpusvm_migrate_to_devmem(struct drm_gpusvm *gpusvm, goto err_finalize; /* Upon success bind devmem allocation to range and zdd */ + devmem_allocation->timeslice_expiration = get_jiffies_64() + + msecs_to_jiffies(ctx->timeslice_ms); zdd->devmem_allocation = devmem_allocation; /* Owns ref */ err_finalize: @@ -2003,6 +2005,13 @@ static int __drm_gpusvm_migrate_to_ram(struct vm_area_struct *vas, void *buf; int i, err = 0; + if (page) { + zdd = page->zone_device_data; + if (time_before64(get_jiffies_64(), + zdd->devmem_allocation->timeslice_expiration)) + return 0; + } + start = ALIGN_DOWN(fault_addr, size); end = ALIGN(fault_addr + 1, size); diff --git a/include/drm/drm_gpusvm.h b/include/drm/drm_gpusvm.h index 653d48dbe1c11a..eaf704d3d05e8b 100644 --- a/include/drm/drm_gpusvm.h +++ b/include/drm/drm_gpusvm.h @@ -89,6 +89,7 @@ struct drm_gpusvm_devmem_ops { * @ops: Pointer to the operations structure for GPU SVM device memory * @dpagemap: The struct drm_pagemap of the pages this allocation belongs to. * @size: Size of device memory allocation + * @timeslice_expiration: Timeslice expiration in jiffies */ struct drm_gpusvm_devmem { struct device *dev; @@ -97,6 +98,7 @@ struct drm_gpusvm_devmem { const struct drm_gpusvm_devmem_ops *ops; struct drm_pagemap *dpagemap; size_t size; + u64 timeslice_expiration; }; /** @@ -295,6 +297,8 @@ struct drm_gpusvm { * @check_pages_threshold: Check CPU pages for present if chunk is less than or * equal to threshold. If not present, reduce chunk * size. + * @timeslice_ms: The timeslice MS which in minimum time a piece of memory + * remains with either exclusive GPU or CPU access. * @in_notifier: entering from a MMU notifier * @read_only: operating on read-only memory * @devmem_possible: possible to use device memory @@ -304,6 +308,7 @@ struct drm_gpusvm { */ struct drm_gpusvm_ctx { unsigned long check_pages_threshold; + unsigned long timeslice_ms; unsigned int in_notifier :1; unsigned int read_only :1; unsigned int devmem_possible :1; From 1b36ea2fc6879fed02b675e86867bc422b76f50e Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 12 May 2025 06:54:58 -0700 Subject: [PATCH 1818/2924] drm/xe: Timeslice GPU on atomic SVM fault Ensure GPU can make forward progress on an atomic SVM GPU fault by giving the GPU a timeslice of 5ms v2: - Reduce timeslice to 5ms - Double timeslice on retry - Split out GPU SVM changes into independent patch v5: - Double timeslice in a few more places Fixes: 2f118c949160 ("drm/xe: Add SVM VRAM migration") Cc: stable@vger.kernel.org Signed-off-by: Matthew Brost Reviewed-by: Himal Prasad Ghimiray Link: https://lore.kernel.org/r/20250512135500.1405019-5-matthew.brost@intel.com (cherry picked from commit a5d8d3be1dea8154edbbea481081469627665659) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_svm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index e65d3b820ee024..975094c1a58279 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -783,6 +783,8 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0, .devmem_only = atomic && IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), + .timeslice_ms = atomic && IS_DGFX(vm->xe) && + IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? 5 : 0, }; struct xe_svm_range *range; struct drm_gpusvm_range *r; @@ -819,6 +821,7 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, if (--migrate_try_count >= 0 && xe_svm_range_needs_migrate_to_vram(range, vma)) { err = xe_svm_alloc_vram(vm, tile, range, &ctx); + ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ if (err) { if (migrate_try_count || !ctx.devmem_only) { drm_dbg(&vm->xe->drm, @@ -838,6 +841,7 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, r, &ctx); /* Corner where CPU mappings have changed */ if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) { + ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ if (migrate_try_count > 0 || !ctx.devmem_only) { if (err == -EOPNOTSUPP) { range_debug(range, "PAGE FAULT - EVICT PAGES"); @@ -877,6 +881,7 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, drm_exec_fini(&exec); err = PTR_ERR(fence); if (err == -EAGAIN) { + ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ range_debug(range, "PAGE FAULT - RETRY BIND"); goto retry; } From 66c8f7b435bddb7d8577ac8a57e175a6cb147227 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Fri, 9 May 2025 09:12:01 -0700 Subject: [PATCH 1819/2924] drm/xe: Save CTX_TIMESTAMP mmio value instead of LRC value For determining actual job execution time, save the current value of the CTX_TIMESTAMP register rather than the value saved in LRC since the current register value is the closest to the start time of the job. v2: Define MI_STORE_REGISTER_MEM to fix compile error v3: Place MI_STORE_REGISTER_MEM sorted by MI_INSTR (Lucas) Fixes: 65921374c48f ("drm/xe: Emit ctx timestamp copy in ring ops") Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Matthew Brost Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250509161159.2173069-6-umesh.nerlige.ramappa@intel.com (cherry picked from commit 38b14233e5deff51db8faec287b4acd227152246) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/instructions/xe_mi_commands.h | 4 ++++ drivers/gpu/drm/xe/xe_lrc.c | 2 +- drivers/gpu/drm/xe/xe_ring_ops.c | 7 ++----- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h index 167fb0f742de7b..5a47991b4b81fc 100644 --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h @@ -47,6 +47,10 @@ #define MI_LRI_FORCE_POSTED REG_BIT(12) #define MI_LRI_LEN(x) (((x) & 0xff) + 1) +#define MI_STORE_REGISTER_MEM (__MI_INSTR(0x24) | XE_INSTR_NUM_DW(4)) +#define MI_SRM_USE_GGTT REG_BIT(22) +#define MI_SRM_ADD_CS_OFFSET REG_BIT(19) + #define MI_FLUSH_DW __MI_INSTR(0x26) #define MI_FLUSH_DW_PROTECTED_MEM_EN REG_BIT(22) #define MI_FLUSH_DW_STORE_INDEX REG_BIT(21) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index df3ceddede070a..48e8fd85eabe35 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -684,7 +684,7 @@ static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc) { - /* The start seqno is stored in the driver-defined portion of PPHWSP */ + /* This is stored in the driver-defined portion of PPHWSP */ return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET; } diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index a7582b097ae678..bc1689db4cd716 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -234,13 +234,10 @@ static u32 get_ppgtt_flag(struct xe_sched_job *job) static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i) { - dw[i++] = MI_COPY_MEM_MEM | MI_COPY_MEM_MEM_SRC_GGTT | - MI_COPY_MEM_MEM_DST_GGTT; + dw[i++] = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; + dw[i++] = RING_CTX_TIMESTAMP(0).addr; dw[i++] = xe_lrc_ctx_job_timestamp_ggtt_addr(lrc); dw[i++] = 0; - dw[i++] = xe_lrc_ctx_timestamp_ggtt_addr(lrc); - dw[i++] = 0; - dw[i++] = MI_NOOP; return i; } From ce15563e49fb0b5c802564433ff8468acd1339eb Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Fri, 9 May 2025 09:12:02 -0700 Subject: [PATCH 1820/2924] drm/xe: Save the gt pointer in lrc and drop the tile Save the gt pointer in the lrc so that it can used for gt based helpers. Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Matthew Brost Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250509161159.2173069-7-umesh.nerlige.ramappa@intel.com (cherry picked from commit 741d3ef8b8b88fab2729ca89de1180e49bc9cef0) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_lrc.c | 4 ++-- drivers/gpu/drm/xe/xe_lrc_types.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 48e8fd85eabe35..79b43fec4f79fc 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -864,7 +864,7 @@ static void *empty_lrc_data(struct xe_hw_engine *hwe) static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) { - u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile); + u64 desc = xe_vm_pdp4_descriptor(vm, gt_to_tile(lrc->gt)); xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); @@ -896,6 +896,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, int err; kref_init(&lrc->refcount); + lrc->gt = gt; lrc->flags = 0; lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class); if (xe_gt_has_indirect_ring_state(gt)) @@ -914,7 +915,6 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, return PTR_ERR(lrc->bo); lrc->size = lrc_size; - lrc->tile = gt_to_tile(hwe->gt); lrc->ring.size = ring_size; lrc->ring.tail = 0; lrc->ctx_timestamp = 0; diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index 71ecb453f811a4..cd38586ae98932 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -25,8 +25,8 @@ struct xe_lrc { /** @size: size of lrc including any indirect ring state page */ u32 size; - /** @tile: tile which this LRC belongs to */ - struct xe_tile *tile; + /** @gt: gt which this LRC belongs to */ + struct xe_gt *gt; /** @flags: LRC flags */ #define XE_LRC_FLAG_INDIRECT_RING_STATE 0x1 From 617d824c5323b8474b3665ae6c410c98b839e0b0 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Fri, 9 May 2025 09:12:03 -0700 Subject: [PATCH 1821/2924] drm/xe: Add WA BB to capture active context utilization Context Timestamp (CTX_TIMESTAMP) in the LRC accumulates the run ticks of the context, but only gets updated when the context switches out. In order to check how long a context has been active before it switches out, two things are required: (1) Determine if the context is running: To do so, we program the WA BB to set an initial value for CTX_TIMESTAMP in the LRC. The value chosen is 1 since 0 is the initial value when the LRC is initialized. During a query, we just check for this value to determine if the context is active. If the context switched out, it would overwrite this location with the actual CTX_TIMESTAMP MMIO value. Note that WA BB runs as the last part of the context restore, so reusing this LRC location will not clobber anything. (2) Calculate the time that the context has been active for: The CTX_TIMESTAMP ticks only when the context is active. If a context is active, we just use the CTX_TIMESTAMP MMIO as the new value of utilization. While doing so, we need to read the CTX_TIMESTAMP MMIO for the specific engine instance. Since we do not know which instance the context is running on until it is scheduled, we also read the ENGINE_ID MMIO in the WA BB and store it in the PPHSWP. Using the above 2 instructions in a WA BB, capture active context utilization. v2: (Matt Brost) - This breaks TDR, fix it by saving the CTX_TIMESTAMP register "drm/xe: Save CTX_TIMESTAMP mmio value instead of LRC value" - Drop tile from LRC if using gt "drm/xe: Save the gt pointer in LRC and drop the tile" v3: - Remove helpers for bb_per_ctx_ptr (Matt) - Add define for context active value (Matt) - Use 64 bit CTX TIMESTAMP for platforms that support it. For platforms that don't, live with the rare race. (Matt, Lucas) - Convert engine id to hwe and get the MMIO value (Lucas) - Correct commit message on when WA BB runs (Lucas) v4: - s/GRAPHICS_VER(...)/xe->info.has_64bit_timestamp/ (Matt) - Drop support for active utilization on a VF (CI failure) - In xe_lrc_init ensure the lrc value is 0 to begin with (CI regression) v5: - Minor checkpatch fix - Squash into previous commit and make TDR use 32-bit time - Update code comment to match commit msg Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4532 Cc: # v6.13+ Suggested-by: Lucas De Marchi Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250509161159.2173069-8-umesh.nerlige.ramappa@intel.com (cherry picked from commit 82b98cadb01f63cdb159e596ec06866d00f8e8c7) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_engine_regs.h | 5 + drivers/gpu/drm/xe/regs/xe_lrc_layout.h | 2 + drivers/gpu/drm/xe/xe_device_types.h | 2 + drivers/gpu/drm/xe/xe_exec_queue.c | 2 +- drivers/gpu/drm/xe/xe_guc_submit.c | 2 +- drivers/gpu/drm/xe/xe_lrc.c | 193 +++++++++++++++++++++-- drivers/gpu/drm/xe/xe_lrc.h | 5 +- drivers/gpu/drm/xe/xe_lrc_types.h | 5 +- drivers/gpu/drm/xe/xe_pci.c | 2 + drivers/gpu/drm/xe/xe_pci_types.h | 1 + drivers/gpu/drm/xe/xe_trace_lrc.h | 8 +- 11 files changed, 208 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index fb8ec317b6ee6c..891f928d80ce82 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -43,6 +43,10 @@ #define XEHPC_BCS8_RING_BASE 0x3ee000 #define GSCCS_RING_BASE 0x11a000 +#define ENGINE_ID(base) XE_REG((base) + 0x8c) +#define ENGINE_INSTANCE_ID REG_GENMASK(9, 4) +#define ENGINE_CLASS_ID REG_GENMASK(2, 0) + #define RING_TAIL(base) XE_REG((base) + 0x30) #define TAIL_ADDR REG_GENMASK(20, 3) @@ -154,6 +158,7 @@ #define STOP_RING REG_BIT(8) #define RING_CTX_TIMESTAMP(base) XE_REG((base) + 0x3a8) +#define RING_CTX_TIMESTAMP_UDW(base) XE_REG((base) + 0x3ac) #define CSBE_DEBUG_STATUS(base) XE_REG((base) + 0x3fc) #define RING_FORCE_TO_NONPRIV(base, i) XE_REG(((base) + 0x4d0) + (i) * 4) diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h index 57944f90bbf6e9..994af591a2e85e 100644 --- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h +++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h @@ -11,7 +11,9 @@ #define CTX_RING_TAIL (0x06 + 1) #define CTX_RING_START (0x08 + 1) #define CTX_RING_CTL (0x0a + 1) +#define CTX_BB_PER_CTX_PTR (0x12 + 1) #define CTX_TIMESTAMP (0x22 + 1) +#define CTX_TIMESTAMP_UDW (0x24 + 1) #define CTX_INDIRECT_RING_STATE (0x26 + 1) #define CTX_PDP0_UDW (0x30 + 1) #define CTX_PDP0_LDW (0x32 + 1) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 9f8667ebba853e..0482f26aa48033 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -330,6 +330,8 @@ struct xe_device { u8 has_sriov:1; /** @info.has_usm: Device has unified shared memory support */ u8 has_usm:1; + /** @info.has_64bit_timestamp: Device supports 64-bit timestamps */ + u8 has_64bit_timestamp:1; /** @info.is_dgfx: is discrete device */ u8 is_dgfx:1; /** diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 606922d9dd7302..cd9b1c32f30f80 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -830,7 +830,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) { struct xe_device *xe = gt_to_xe(q->gt); struct xe_lrc *lrc; - u32 old_ts, new_ts; + u64 old_ts, new_ts; int idx; /* diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 31bc2022bfc2d8..769781d577df60 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -941,7 +941,7 @@ static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) return xe_sched_invalidate_job(job, 2); } - ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]); + ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(q->lrc[0])); ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); /* diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 79b43fec4f79fc..03bfba696b3789 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -24,6 +24,7 @@ #include "xe_hw_fence.h" #include "xe_map.h" #include "xe_memirq.h" +#include "xe_mmio.h" #include "xe_sriov.h" #include "xe_trace_lrc.h" #include "xe_vm.h" @@ -650,6 +651,7 @@ u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) #define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8) #define LRC_PARALLEL_PPHWSP_OFFSET 2048 +#define LRC_ENGINE_ID_PPHWSP_OFFSET 2096 #define LRC_PPHWSP_SIZE SZ_4K u32 xe_lrc_regs_offset(struct xe_lrc *lrc) @@ -694,11 +696,21 @@ static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; } +static inline u32 __xe_lrc_engine_id_offset(struct xe_lrc *lrc) +{ + return xe_lrc_pphwsp_offset(lrc) + LRC_ENGINE_ID_PPHWSP_OFFSET; +} + static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc) { return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32); } +static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc) +{ + return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP_UDW * sizeof(u32); +} + static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) { /* Indirect ring state page is at the very end of LRC */ @@ -726,8 +738,10 @@ DECL_MAP_ADDR_HELPERS(regs) DECL_MAP_ADDR_HELPERS(start_seqno) DECL_MAP_ADDR_HELPERS(ctx_job_timestamp) DECL_MAP_ADDR_HELPERS(ctx_timestamp) +DECL_MAP_ADDR_HELPERS(ctx_timestamp_udw) DECL_MAP_ADDR_HELPERS(parallel) DECL_MAP_ADDR_HELPERS(indirect_ring) +DECL_MAP_ADDR_HELPERS(engine_id) #undef DECL_MAP_ADDR_HELPERS @@ -742,19 +756,38 @@ u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc) return __xe_lrc_ctx_timestamp_ggtt_addr(lrc); } +/** + * xe_lrc_ctx_timestamp_udw_ggtt_addr() - Get ctx timestamp udw GGTT address + * @lrc: Pointer to the lrc. + * + * Returns: ctx timestamp udw GGTT address + */ +u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc) +{ + return __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc); +} + /** * xe_lrc_ctx_timestamp() - Read ctx timestamp value * @lrc: Pointer to the lrc. * * Returns: ctx timestamp value */ -u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc) +u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc) { struct xe_device *xe = lrc_to_xe(lrc); struct iosys_map map; + u32 ldw, udw = 0; map = __xe_lrc_ctx_timestamp_map(lrc); - return xe_map_read32(xe, &map); + ldw = xe_map_read32(xe, &map); + + if (xe->info.has_64bit_timestamp) { + map = __xe_lrc_ctx_timestamp_udw_map(lrc); + udw = xe_map_read32(xe, &map); + } + + return (u64)udw << 32 | ldw; } /** @@ -877,6 +910,65 @@ static void xe_lrc_finish(struct xe_lrc *lrc) xe_bo_unpin(lrc->bo); xe_bo_unlock(lrc->bo); xe_bo_put(lrc->bo); + xe_bo_unpin_map_no_vm(lrc->bb_per_ctx_bo); +} + +/* + * xe_lrc_setup_utilization() - Setup wa bb to assist in calculating active + * context run ticks. + * @lrc: Pointer to the lrc. + * + * Context Timestamp (CTX_TIMESTAMP) in the LRC accumulates the run ticks of the + * context, but only gets updated when the context switches out. In order to + * check how long a context has been active before it switches out, two things + * are required: + * + * (1) Determine if the context is running: + * To do so, we program the WA BB to set an initial value for CTX_TIMESTAMP in + * the LRC. The value chosen is 1 since 0 is the initial value when the LRC is + * initialized. During a query, we just check for this value to determine if the + * context is active. If the context switched out, it would overwrite this + * location with the actual CTX_TIMESTAMP MMIO value. Note that WA BB runs as + * the last part of context restore, so reusing this LRC location will not + * clobber anything. + * + * (2) Calculate the time that the context has been active for: + * The CTX_TIMESTAMP ticks only when the context is active. If a context is + * active, we just use the CTX_TIMESTAMP MMIO as the new value of utilization. + * While doing so, we need to read the CTX_TIMESTAMP MMIO for the specific + * engine instance. Since we do not know which instance the context is running + * on until it is scheduled, we also read the ENGINE_ID MMIO in the WA BB and + * store it in the PPHSWP. + */ +#define CONTEXT_ACTIVE 1ULL +static void xe_lrc_setup_utilization(struct xe_lrc *lrc) +{ + u32 *cmd; + + cmd = lrc->bb_per_ctx_bo->vmap.vaddr; + + *cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; + *cmd++ = ENGINE_ID(0).addr; + *cmd++ = __xe_lrc_engine_id_ggtt_addr(lrc); + *cmd++ = 0; + + *cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); + *cmd++ = __xe_lrc_ctx_timestamp_ggtt_addr(lrc); + *cmd++ = 0; + *cmd++ = lower_32_bits(CONTEXT_ACTIVE); + + if (lrc_to_xe(lrc)->info.has_64bit_timestamp) { + *cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); + *cmd++ = __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc); + *cmd++ = 0; + *cmd++ = upper_32_bits(CONTEXT_ACTIVE); + } + + *cmd++ = MI_BATCH_BUFFER_END; + + xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, + xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1); + } #define PVC_CTX_ASID (0x2e + 1) @@ -893,6 +985,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, void *init_data = NULL; u32 arb_enable; u32 lrc_size; + u32 bo_flags; int err; kref_init(&lrc->refcount); @@ -902,22 +995,30 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, if (xe_gt_has_indirect_ring_state(gt)) lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; + bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE; + /* * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address * via VM bind calls. */ lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); + bo_flags); if (IS_ERR(lrc->bo)) return PTR_ERR(lrc->bo); + lrc->bb_per_ctx_bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, + ttm_bo_type_kernel, + bo_flags); + if (IS_ERR(lrc->bb_per_ctx_bo)) { + err = PTR_ERR(lrc->bb_per_ctx_bo); + goto err_lrc_finish; + } + lrc->size = lrc_size; lrc->ring.size = ring_size; lrc->ring.tail = 0; - lrc->ctx_timestamp = 0; xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, hwe->fence_irq, hwe->name); @@ -990,7 +1091,10 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) | _MASKED_BIT_ENABLE(CTX_CTRL_PXP_ENABLE)); + lrc->ctx_timestamp = 0; xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0); + if (lrc_to_xe(lrc)->info.has_64bit_timestamp) + xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP_UDW, 0); if (xe->info.has_asid && vm) xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); @@ -1019,6 +1123,8 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, map = __xe_lrc_start_seqno_map(lrc); xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); + xe_lrc_setup_utilization(lrc); + return 0; err_lrc_finish: @@ -1238,6 +1344,21 @@ struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) return __xe_lrc_parallel_map(lrc); } +/** + * xe_lrc_engine_id() - Read engine id value + * @lrc: Pointer to the lrc. + * + * Returns: context id value + */ +static u32 xe_lrc_engine_id(struct xe_lrc *lrc) +{ + struct xe_device *xe = lrc_to_xe(lrc); + struct iosys_map map; + + map = __xe_lrc_engine_id_map(lrc); + return xe_map_read32(xe, &map); +} + static int instr_dw(u32 cmd_header) { /* GFXPIPE "SINGLE_DW" opcodes are a single dword */ @@ -1684,7 +1805,7 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset; snapshot->lrc_snapshot = NULL; - snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); + snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc)); snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); return snapshot; } @@ -1784,22 +1905,74 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) kfree(snapshot); } +static int get_ctx_timestamp(struct xe_lrc *lrc, u32 engine_id, u64 *reg_ctx_ts) +{ + u16 class = REG_FIELD_GET(ENGINE_CLASS_ID, engine_id); + u16 instance = REG_FIELD_GET(ENGINE_INSTANCE_ID, engine_id); + struct xe_hw_engine *hwe; + u64 val; + + hwe = xe_gt_hw_engine(lrc->gt, class, instance, false); + if (xe_gt_WARN_ONCE(lrc->gt, !hwe || xe_hw_engine_is_reserved(hwe), + "Unexpected engine class:instance %d:%d for context utilization\n", + class, instance)) + return -1; + + if (lrc_to_xe(lrc)->info.has_64bit_timestamp) + val = xe_mmio_read64_2x32(&hwe->gt->mmio, + RING_CTX_TIMESTAMP(hwe->mmio_base)); + else + val = xe_mmio_read32(&hwe->gt->mmio, + RING_CTX_TIMESTAMP(hwe->mmio_base)); + + *reg_ctx_ts = val; + + return 0; +} + /** * xe_lrc_update_timestamp() - Update ctx timestamp * @lrc: Pointer to the lrc. * @old_ts: Old timestamp value * * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and - * update saved value. + * update saved value. With support for active contexts, the calculation may be + * slightly racy, so follow a read-again logic to ensure that the context is + * still active before returning the right timestamp. * * Returns: New ctx timestamp value */ -u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts) +u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts) { + u64 lrc_ts, reg_ts; + u32 engine_id; + *old_ts = lrc->ctx_timestamp; - lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); + lrc_ts = xe_lrc_ctx_timestamp(lrc); + /* CTX_TIMESTAMP mmio read is invalid on VF, so return the LRC value */ + if (IS_SRIOV_VF(lrc_to_xe(lrc))) { + lrc->ctx_timestamp = lrc_ts; + goto done; + } + + if (lrc_ts == CONTEXT_ACTIVE) { + engine_id = xe_lrc_engine_id(lrc); + if (!get_ctx_timestamp(lrc, engine_id, ®_ts)) + lrc->ctx_timestamp = reg_ts; + + /* read lrc again to ensure context is still active */ + lrc_ts = xe_lrc_ctx_timestamp(lrc); + } + + /* + * If context switched out, just use the lrc_ts. Note that this needs to + * be a separate if condition. + */ + if (lrc_ts != CONTEXT_ACTIVE) + lrc->ctx_timestamp = lrc_ts; +done: trace_xe_lrc_update_timestamp(lrc, *old_ts); return lrc->ctx_timestamp; diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index 0b40f349ab95d0..eb6e8de8c939e9 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -120,7 +120,8 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot); u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc); -u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc); +u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc); +u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc); u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc); u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc); @@ -136,6 +137,6 @@ u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc); * * Returns the current LRC timestamp */ -u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts); +u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts); #endif diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index cd38586ae98932..ae24cf6f8dd998 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -52,7 +52,10 @@ struct xe_lrc { struct xe_hw_fence_ctx fence_ctx; /** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */ - u32 ctx_timestamp; + u64 ctx_timestamp; + + /** @bb_per_ctx_bo: buffer object for per context batch wa buffer */ + struct xe_bo *bb_per_ctx_bo; }; struct xe_lrc_snapshot; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 818f023166d5d3..f4d108dc49b1b1 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -140,6 +140,7 @@ static const struct xe_graphics_desc graphics_xelpg = { .has_indirect_ring_state = 1, \ .has_range_tlb_invalidation = 1, \ .has_usm = 1, \ + .has_64bit_timestamp = 1, \ .va_bits = 48, \ .vm_max_level = 4, \ .hw_engine_mask = \ @@ -668,6 +669,7 @@ static int xe_info_init(struct xe_device *xe, xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation; xe->info.has_usm = graphics_desc->has_usm; + xe->info.has_64bit_timestamp = graphics_desc->has_64bit_timestamp; for_each_remote_tile(tile, xe, id) { int err; diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index e9b9bbc138d377..ca6b10d3557349 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -21,6 +21,7 @@ struct xe_graphics_desc { u8 has_indirect_ring_state:1; u8 has_range_tlb_invalidation:1; u8 has_usm:1; + u8 has_64bit_timestamp:1; }; struct xe_media_desc { diff --git a/drivers/gpu/drm/xe/xe_trace_lrc.h b/drivers/gpu/drm/xe/xe_trace_lrc.h index 5c669a0b21808e..d525cbee1e3411 100644 --- a/drivers/gpu/drm/xe/xe_trace_lrc.h +++ b/drivers/gpu/drm/xe/xe_trace_lrc.h @@ -19,12 +19,12 @@ #define __dev_name_lrc(lrc) dev_name(gt_to_xe((lrc)->fence_ctx.gt)->drm.dev) TRACE_EVENT(xe_lrc_update_timestamp, - TP_PROTO(struct xe_lrc *lrc, uint32_t old), + TP_PROTO(struct xe_lrc *lrc, uint64_t old), TP_ARGS(lrc, old), TP_STRUCT__entry( __field(struct xe_lrc *, lrc) - __field(u32, old) - __field(u32, new) + __field(u64, old) + __field(u64, new) __string(name, lrc->fence_ctx.name) __string(device_id, __dev_name_lrc(lrc)) ), @@ -36,7 +36,7 @@ TRACE_EVENT(xe_lrc_update_timestamp, __assign_str(name); __assign_str(device_id); ), - TP_printk("lrc=:%p lrc->name=%s old=%u new=%u device_id:%s", + TP_printk("lrc=:%p lrc->name=%s old=%llu new=%llu device_id:%s", __entry->lrc, __get_str(name), __entry->old, __entry->new, __get_str(device_id)) From e333332657f615ac2b55aa35565c4a882018bbe9 Mon Sep 17 00:00:00 2001 From: pengdonglin Date: Mon, 12 May 2025 17:42:45 +0800 Subject: [PATCH 1822/2924] ftrace: Fix preemption accounting for stacktrace trigger command When using the stacktrace trigger command to trace syscalls, the preemption count was consistently reported as 1 when the system call event itself had 0 ("."). For example: root@ubuntu22-vm:/sys/kernel/tracing/events/syscalls/sys_enter_read $ echo stacktrace > trigger $ echo 1 > enable sshd-416 [002] ..... 232.864910: sys_read(fd: a, buf: 556b1f3221d0, count: 8000) sshd-416 [002] ...1. 232.864913: => ftrace_syscall_enter => syscall_trace_enter => do_syscall_64 => entry_SYSCALL_64_after_hwframe The root cause is that the trace framework disables preemption in __DO_TRACE before invoking the trigger callback. Use the tracing_gen_ctx_dec() that will accommodate for the increase of the preemption count in __DO_TRACE when calling the callback. The result is the accurate reporting of: sshd-410 [004] ..... 210.117660: sys_read(fd: 4, buf: 559b725ba130, count: 40000) sshd-410 [004] ..... 210.117662: => ftrace_syscall_enter => syscall_trace_enter => do_syscall_64 => entry_SYSCALL_64_after_hwframe Cc: stable@vger.kernel.org Fixes: ce33c845b030c ("tracing: Dump stacktrace trigger to the corresponding instance") Link: https://lore.kernel.org/20250512094246.1167956-1-dolinux.peng@gmail.com Signed-off-by: pengdonglin Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_trigger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index b66b6d235d9130..6e87ae2a1a66bf 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -1560,7 +1560,7 @@ stacktrace_trigger(struct event_trigger_data *data, struct trace_event_file *file = data->private_data; if (file) - __trace_stack(file->tr, tracing_gen_ctx(), STACK_SKIP); + __trace_stack(file->tr, tracing_gen_ctx_dec(), STACK_SKIP); else trace_dump_stack(STACK_SKIP); } From 11aff32439df6ca5b3b891b43032faf88f4a6a29 Mon Sep 17 00:00:00 2001 From: pengdonglin Date: Mon, 12 May 2025 17:42:46 +0800 Subject: [PATCH 1823/2924] ftrace: Fix preemption accounting for stacktrace filter command The preemption count of the stacktrace filter command to trace ksys_read is consistently incorrect: $ echo ksys_read:stacktrace > set_ftrace_filter <...>-453 [004] ...1. 38.308956: => ksys_read => do_syscall_64 => entry_SYSCALL_64_after_hwframe The root cause is that the trace framework disables preemption when invoking the filter command callback in function_trace_probe_call: preempt_disable_notrace(); probe_ops->func(ip, parent_ip, probe_opsbe->tr, probe_ops, probe->data); preempt_enable_notrace(); Use tracing_gen_ctx_dec() to account for the preempt_disable_notrace(), which will output the correct preemption count: $ echo ksys_read:stacktrace > set_ftrace_filter <...>-410 [006] ..... 31.420396: => ksys_read => do_syscall_64 => entry_SYSCALL_64_after_hwframe Cc: stable@vger.kernel.org Fixes: 36590c50b2d07 ("tracing: Merge irqflags + preempt counter.") Link: https://lore.kernel.org/20250512094246.1167956-2-dolinux.peng@gmail.com Signed-off-by: pengdonglin Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_functions.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 98ccf3f00c519d..4e37a0f6aaa384 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -633,11 +633,7 @@ ftrace_traceoff(unsigned long ip, unsigned long parent_ip, static __always_inline void trace_stack(struct trace_array *tr) { - unsigned int trace_ctx; - - trace_ctx = tracing_gen_ctx(); - - __trace_stack(tr, trace_ctx, FTRACE_STACK_SKIP); + __trace_stack(tr, tracing_gen_ctx_dec(), FTRACE_STACK_SKIP); } static void From 1d6c39c89f617c9fec6bbae166e25b16a014f7c8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 13 May 2025 11:50:32 -0400 Subject: [PATCH 1824/2924] ring-buffer: Fix persistent buffer when commit page is the reader page The ring buffer is made up of sub buffers (sometimes called pages as they are by default PAGE_SIZE). It has the following "pages": "tail page" - this is the page that the next write will write to "head page" - this is the page that the reader will swap the reader page with. "reader page" - This belongs to the reader, where it will swap the head page from the ring buffer so that the reader does not race with the writer. The writer may end up on the "reader page" if the ring buffer hasn't written more than one page, where the "tail page" and the "head page" are the same. The persistent ring buffer has meta data that points to where these pages exist so on reboot it can re-create the pointers to the cpu_buffer descriptor. But when the commit page is on the reader page, the logic is incorrect. The check to see if the commit page is on the reader page checked if the head page was the reader page, which would never happen, as the head page is always in the ring buffer. The correct check would be to test if the commit page is on the reader page. If that's the case, then it can exit out early as the commit page is only on the reader page when there's only one page of data in the buffer. There's no reason to iterate the ring buffer pages to find the "commit page" as it is already found. To trigger this bug: # echo 1 > /sys/kernel/tracing/instances/boot_mapped/events/syscalls/sys_enter_fchownat/enable # touch /tmp/x # chown sshd /tmp/x # reboot On boot up, the dmesg will have: Ring buffer meta [0] is from previous boot! Ring buffer meta [1] is from previous boot! Ring buffer meta [2] is from previous boot! Ring buffer meta [3] is from previous boot! Ring buffer meta [4] commit page not found Ring buffer meta [5] is from previous boot! Ring buffer meta [6] is from previous boot! Ring buffer meta [7] is from previous boot! Where the buffer on CPU 4 had a "commit page not found" error and that buffer is cleared and reset causing the output to be empty and the data lost. When it works correctly, it has: # cat /sys/kernel/tracing/instances/boot_mapped/trace_pipe <...>-1137 [004] ..... 998.205323: sys_enter_fchownat: __syscall_nr=0x104 (260) dfd=0xffffff9c (4294967196) filename=(0xffffc90000a0002c) user=0x3e8 (1000) group=0xffffffff (4294967295) flag=0x0 (0 Cc: stable@vger.kernel.org Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250513115032.3e0b97f7@gandalf.local.home Fixes: 5f3b6e839f3ce ("ring-buffer: Validate boot range memory events") Reported-by: Tasos Sahanidis Tested-by: Tasos Sahanidis Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index c0f877d39a241d..3f9bf562beea23 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1887,10 +1887,12 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer) head_page = cpu_buffer->head_page; - /* If both the head and commit are on the reader_page then we are done. */ - if (head_page == cpu_buffer->reader_page && - head_page == cpu_buffer->commit_page) + /* If the commit_buffer is the reader page, update the commit page */ + if (meta->commit_buffer == (unsigned long)cpu_buffer->reader_page->page) { + cpu_buffer->commit_page = cpu_buffer->reader_page; + /* Nothing more to do, the only page is the reader page */ goto done; + } /* Iterate until finding the commit page */ for (i = 0; i < meta->nr_subbufs + 1; i++, rb_inc_page(&head_page)) { From cd52cc3544e400e53e6d4b7bfc5263e7a867b5ab Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 10 May 2025 11:30:21 -0400 Subject: [PATCH 1825/2924] bcachefs: Don't strip rebalance_opts from indirect extents Fix bch2_bkey_clear_needs_rebalance(): indirect extents are never supposed to have bch_extent_rebalance stripped off, because that's how we get the IO path options when we don't have the original inode it belonged to. Signed-off-by: Kent Overstreet --- fs/bcachefs/rebalance.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 4ccdfc1f34aa35..623273556aa978 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -309,7 +309,7 @@ static int bch2_bkey_clear_needs_rebalance(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) { - if (!bch2_bkey_rebalance_opts(k)) + if (k.k->type == KEY_TYPE_reflink_v || !bch2_bkey_rebalance_opts(k)) return 0; struct bkey_i *n = bch2_bkey_make_mut(trans, iter, &k, 0); From b1c71cb492bb26dc883b11b5bd085d2467508f84 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 10 May 2025 14:42:37 -0400 Subject: [PATCH 1826/2924] bcachefs: Fix broken btree_path lock invariants in next_node() This fixes btree locking assert pops users were seeing during evacuate: https://github.com/koverstreet/bcachefs/issues/878 May 09 22:45:02 sharon kernel: bcachefs (68116e25-fa2d-4c6f-86c7-e8b431d792ae): bch2_btree_insert_node(): node not locked at level 1 May 09 22:45:02 sharon kernel: bch2_btree_node_rewrite [bcachefs]: watermark=btree no_check_rw alloc l=0-1 mode=none nodes_written=0 cl.remaining=2 journal_seq=0 May 09 22:45:02 sharon kernel: path: idx 1 ref 1:0 S B btree=alloc level=0 pos 0:3699637:0 0:3698012:1-0:3699637:0 bch2_move_btree.isra.0+0x1db/0x490 [bcachefs] uptodate 0 locks_want 2 May 09 22:45:02 sharon kernel: l=0 locks intent seq 4 node ffff8bd700c93600 May 09 22:45:02 sharon kernel: l=1 locks unlocked seq 1712 node ffff8bd6fd5e7a00 May 09 22:45:02 sharon kernel: l=2 locks unlocked seq 2295 node ffff8bd6cc725400 May 09 22:45:02 sharon kernel: l=3 locks unlocked seq 0 node 0000000000000000 Evacuate walks btree nodes with bch2_btree_iter_next_node() and rewrites them, bch2_btree_update_start() upgrades the path to take intent locks as far as it needs to. But next_node() does low level unlock/relock calls on individual nodes, and didn't handle the case where a path is supposed to be holding multiple intent locks. If a path has locks_want > 1, it needs to be either holding locks on all the btree nodes (at each level) requested, or none of them. Fix this with a bch2_btree_path_downgrade(). Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 59fa527ac68548..9c9bc7b6c71279 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1971,6 +1971,12 @@ struct btree *bch2_btree_iter_next_node(struct btree_trans *trans, struct btree_ return NULL; } + /* + * We don't correctly handle nodes with extra intent locks here: + * downgrade so we don't violate locking invariants + */ + bch2_btree_path_downgrade(trans, path); + if (!bch2_btree_node_relock(trans, path, path->level + 1)) { __bch2_btree_path_unlock(trans, path); path->l[path->level].b = ERR_PTR(-BCH_ERR_no_btree_node_relock); From 7b6759b1991d427cf9a562b2891b8c3e87a19c76 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 13 May 2025 12:55:44 -0400 Subject: [PATCH 1827/2924] bcachefs: Fix livelock in journal_entry_open() When the journal is low on space, we might do discards from journal_res_get() -> journal_entry_open(). Make sure we set j->can_discard correctly, so that if we're low on space but not because discards aren't keeping up we don't livelock. Fixes: 8e4d28036c29 ("bcachefs: Don't aggressively discard the journal") Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_reclaim.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 976464d8a695ae..cc00b0fc40d8e3 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -17,6 +17,8 @@ #include #include +static bool __should_discard_bucket(struct journal *, struct journal_device *); + /* Free space calculations: */ static unsigned journal_space_from(struct journal_device *ja, @@ -203,8 +205,7 @@ void bch2_journal_space_available(struct journal *j) ja->bucket_seq[ja->dirty_idx_ondisk] < j->last_seq_ondisk) ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr; - if (ja->discard_idx != ja->dirty_idx_ondisk) - can_discard = true; + can_discard |= __should_discard_bucket(j, ja); max_entry_size = min_t(unsigned, max_entry_size, ca->mi.bucket_size); nr_online++; @@ -264,13 +265,19 @@ void bch2_journal_space_available(struct journal *j) /* Discards - last part of journal reclaim: */ -static bool should_discard_bucket(struct journal *j, struct journal_device *ja) +static bool __should_discard_bucket(struct journal *j, struct journal_device *ja) { - spin_lock(&j->lock); unsigned min_free = max(4, ja->nr / 8); - bool ret = bch2_journal_dev_buckets_available(j, ja, journal_space_discarded) < min_free && + return bch2_journal_dev_buckets_available(j, ja, journal_space_discarded) < + min_free && ja->discard_idx != ja->dirty_idx_ondisk; +} + +static bool should_discard_bucket(struct journal *j, struct journal_device *ja) +{ + spin_lock(&j->lock); + bool ret = __should_discard_bucket(j, ja); spin_unlock(&j->lock); return ret; From 19b22d04cd44ded1ea5af7849aec9cbf4021c852 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 13 May 2025 14:27:01 -0400 Subject: [PATCH 1828/2924] bcachefs: Don't set btree nodes as accessed on fill Prevent jobs that do lots of scanning (i.e. evacuatee, scrub) from causing OOMs. The shrinker code seems to be having issues when it doesn't do any freeing because it's just flipping off the acccessed bit - and the accessed bit shouldn't be set on first use anyways. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_cache.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 9b80201c7982f5..89989129579728 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -852,7 +852,6 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea b->sib_u64s[1] = 0; b->whiteout_u64s = 0; bch2_btree_keys_init(b); - set_btree_node_accessed(b); bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc], start_time); @@ -1286,6 +1285,10 @@ struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans, six_unlock_read(&b->c.lock); goto retry; } + + /* avoid atomic set bit if it's not needed: */ + if (!btree_node_accessed(b)) + set_btree_node_accessed(b); } /* XXX: waiting on IO with btree locks held: */ @@ -1301,10 +1304,6 @@ struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans, prefetch(p + L1_CACHE_BYTES * 2); } - /* avoid atomic set bit if it's not needed: */ - if (!btree_node_accessed(b)) - set_btree_node_accessed(b); - if (unlikely(btree_node_read_error(b))) { six_unlock_read(&b->c.lock); b = ERR_PTR(-BCH_ERR_btree_node_read_err_cached); From 61198e62878477926c9e986eaf6bc40aafb0bf64 Mon Sep 17 00:00:00 2001 From: Alan Huang Date: Tue, 13 May 2025 18:54:26 +0800 Subject: [PATCH 1829/2924] bcachefs: Fix self deadlock Before invoking bch2_accounting_mem_mod_locked in bch2_gc_accounting_done, we already write locked mark_lock, in bch2_accounting_mem_insert, we lock mark_lock again. Signed-off-by: Alan Huang Signed-off-by: Kent Overstreet --- fs/bcachefs/disk_accounting.c | 17 +++++++++++++++-- fs/bcachefs/disk_accounting.h | 16 +++++++++++----- 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index b007319b72e91c..1f0422bfae359f 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -376,6 +376,19 @@ int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a, return ret; } +int bch2_accounting_mem_insert_locked(struct bch_fs *c, struct bkey_s_c_accounting a, + enum bch_accounting_mode mode) +{ + struct bch_replicas_padded r; + + if (mode != BCH_ACCOUNTING_read && + accounting_to_replicas(&r.e, a.k->p) && + !bch2_replicas_marked_locked(c, &r.e)) + return -BCH_ERR_btree_insert_need_mark_replicas; + + return __bch2_accounting_mem_insert(c, a); +} + static bool accounting_mem_entry_is_zero(struct accounting_mem_entry *e) { for (unsigned i = 0; i < e->nr_counters; i++) @@ -583,7 +596,7 @@ int bch2_gc_accounting_done(struct bch_fs *c) accounting_key_init(&k_i.k, &acc_k, src_v, nr); bch2_accounting_mem_mod_locked(trans, bkey_i_to_s_c_accounting(&k_i.k), - BCH_ACCOUNTING_normal); + BCH_ACCOUNTING_normal, true); preempt_disable(); struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage); @@ -612,7 +625,7 @@ static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k) percpu_down_read(&c->mark_lock); int ret = bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k), - BCH_ACCOUNTING_read); + BCH_ACCOUNTING_read, false); percpu_up_read(&c->mark_lock); return ret; } diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h index abb1f6206fe928..d557b99b3c0ae6 100644 --- a/fs/bcachefs/disk_accounting.h +++ b/fs/bcachefs/disk_accounting.h @@ -136,6 +136,7 @@ enum bch_accounting_mode { }; int bch2_accounting_mem_insert(struct bch_fs *, struct bkey_s_c_accounting, enum bch_accounting_mode); +int bch2_accounting_mem_insert_locked(struct bch_fs *, struct bkey_s_c_accounting, enum bch_accounting_mode); void bch2_accounting_mem_gc(struct bch_fs *); static inline bool bch2_accounting_is_mem(struct disk_accounting_pos acc) @@ -150,7 +151,8 @@ static inline bool bch2_accounting_is_mem(struct disk_accounting_pos acc) */ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, struct bkey_s_c_accounting a, - enum bch_accounting_mode mode) + enum bch_accounting_mode mode, + bool write_locked) { struct bch_fs *c = trans->c; struct bch_accounting_mem *acc = &c->accounting; @@ -189,7 +191,11 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, while ((idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), accounting_pos_cmp, &a.k->p)) >= acc->k.nr) { - int ret = bch2_accounting_mem_insert(c, a, mode); + int ret = 0; + if (unlikely(write_locked)) + ret = bch2_accounting_mem_insert_locked(c, a, mode); + else + ret = bch2_accounting_mem_insert(c, a, mode); if (ret) return ret; } @@ -206,7 +212,7 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, static inline int bch2_accounting_mem_add(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc) { percpu_down_read(&trans->c->mark_lock); - int ret = bch2_accounting_mem_mod_locked(trans, a, gc ? BCH_ACCOUNTING_gc : BCH_ACCOUNTING_normal); + int ret = bch2_accounting_mem_mod_locked(trans, a, gc ? BCH_ACCOUNTING_gc : BCH_ACCOUNTING_normal, false); percpu_up_read(&trans->c->mark_lock); return ret; } @@ -259,7 +265,7 @@ static inline int bch2_accounting_trans_commit_hook(struct btree_trans *trans, EBUG_ON(bversion_zero(a->k.bversion)); return likely(!(commit_flags & BCH_TRANS_COMMIT_skip_accounting_apply)) - ? bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), BCH_ACCOUNTING_normal) + ? bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), BCH_ACCOUNTING_normal, false) : 0; } @@ -271,7 +277,7 @@ static inline void bch2_accounting_trans_commit_revert(struct btree_trans *trans struct bkey_s_accounting a = accounting_i_to_s(a_i); bch2_accounting_neg(a); - bch2_accounting_mem_mod_locked(trans, a.c, BCH_ACCOUNTING_normal); + bch2_accounting_mem_mod_locked(trans, a.c, BCH_ACCOUNTING_normal, false); bch2_accounting_neg(a); } } From 43b9fece2d9687cde58cc7eec4548dd1c35e2198 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 13 May 2025 21:14:17 -0400 Subject: [PATCH 1830/2924] bcachefs: Fix set_should_be_locked() call in peek_slot() set_should_be_locked() needs to be called before peek_key_cache(), which traverses other paths and may do a trans unlock/relock. This fixes an assertion pop in path_peek_slot(), when the path we're using is unexpectedly not uptodate. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 9c9bc7b6c71279..a873ec1baf581c 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2749,7 +2749,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre ret = trans_maybe_inject_restart(trans, _RET_IP_); if (unlikely(ret)) { k = bkey_s_c_err(ret); - goto out_no_locked; + goto out; } /* extents can't span inode numbers: */ @@ -2769,13 +2769,15 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); if (unlikely(ret)) { k = bkey_s_c_err(ret); - goto out_no_locked; + goto out; } struct btree_path *path = btree_iter_path(trans, iter); if (unlikely(!btree_path_node(path, path->level))) return bkey_s_c_null; + btree_path_set_should_be_locked(trans, path); + if ((iter->flags & BTREE_ITER_cached) || !(iter->flags & (BTREE_ITER_is_extents|BTREE_ITER_filter_snapshots))) { k = bkey_s_c_null; @@ -2796,12 +2798,12 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre if (!bkey_err(k)) iter->k = *k.k; /* We're not returning a key from iter->path: */ - goto out_no_locked; + goto out; } - k = bch2_btree_path_peek_slot(trans->paths + iter->path, &iter->k); + k = bch2_btree_path_peek_slot(btree_iter_path(trans, iter), &iter->k); if (unlikely(!k.k)) - goto out_no_locked; + goto out; if (unlikely(k.k->type == KEY_TYPE_whiteout && (iter->flags & BTREE_ITER_filter_snapshots) && @@ -2839,7 +2841,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre } if (unlikely(bkey_err(k))) - goto out_no_locked; + goto out; next = k.k ? bkey_start_pos(k.k) : POS_MAX; @@ -2861,8 +2863,6 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre } } out: - btree_path_set_should_be_locked(trans, btree_iter_path(trans, iter)); -out_no_locked: bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify(trans, iter); ret = bch2_btree_iter_verify_ret(trans, iter, k); From a12cb6f758177fcf9bdb7d18b4724eaa29023740 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 14 May 2025 13:40:47 -0400 Subject: [PATCH 1831/2924] bcachefs: Fix accidental O(n^2) in fiemap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since bch2_seek_pagecache_data() searches for dirty data, we only want to call it for holes in the extents btree - otherwise we have an accidental O(n^2), as we repeatedly search the same range. Reported-by: Marcin Mirosław Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index b6801861c66f1c..4b742e62255bb7 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1429,7 +1429,9 @@ static int bch2_next_fiemap_extent(struct btree_trans *trans, if (ret) goto err; - ret = bch2_next_fiemap_pagecache_extent(trans, inode, start, end, cur); + u64 pagecache_end = k.k ? max(start, bkey_start_offset(k.k)) : end; + + ret = bch2_next_fiemap_pagecache_extent(trans, inode, start, pagecache_end, cur); if (ret) goto err; From d1041d8eab31f9b4c696ee060141d1b0d156f84d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 9 May 2025 15:05:19 -0400 Subject: [PATCH 1832/2924] bcachefs: Fix missing commit in backpointer to missing target Fsck wants to do transaction commits from an outer context; it may have other repair to do (i.e. duplicate backpointers). But when calling backpointer_not_found() from runtime code, i.e. runtime self healing, we should be doing the commit - the outer context expects to just be doing lookups. This fixes bugs where we get stuck spinning, reported as "RCU lock hold time warnings. Signed-off-by: Kent Overstreet --- fs/bcachefs/backpointers.c | 117 +++++++++++++++++++++++++------------ 1 file changed, 79 insertions(+), 38 deletions(-) diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index ff26bb51515004..5f195d2280a4ea 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -192,7 +192,8 @@ static inline int bch2_backpointers_maybe_flush(struct btree_trans *trans, static int backpointer_target_not_found(struct btree_trans *trans, struct bkey_s_c_backpointer bp, struct bkey_s_c target_k, - struct bkey_buf *last_flushed) + struct bkey_buf *last_flushed, + bool commit) { struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; @@ -228,18 +229,77 @@ static int backpointer_target_not_found(struct btree_trans *trans, } if (fsck_err(trans, backpointer_to_missing_ptr, - "%s", buf.buf)) + "%s", buf.buf)) { ret = bch2_backpointer_del(trans, bp.k->p); + if (ret || !commit) + goto out; + + /* + * Normally, on transaction commit from inside a transaction, + * we'll return -BCH_ERR_transaction_restart_nested, since a + * transaction commit invalidates pointers given out by peek(). + * + * However, since we're updating a write buffer btree, if we + * return a transaction restart and loop we won't see that the + * backpointer has been deleted without an additional write + * buffer flush - and those are expensive. + * + * So we're relying on the caller immediately advancing to the + * next backpointer and starting a new transaction immediately + * after backpointer_get_key() returns NULL: + */ + ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); + } +out: fsck_err: printbuf_exit(&buf); return ret; } -struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, - struct bkey_s_c_backpointer bp, - struct btree_iter *iter, - unsigned iter_flags, - struct bkey_buf *last_flushed) +static struct btree *__bch2_backpointer_get_node(struct btree_trans *trans, + struct bkey_s_c_backpointer bp, + struct btree_iter *iter, + struct bkey_buf *last_flushed, + bool commit) +{ + struct bch_fs *c = trans->c; + + BUG_ON(!bp.v->level); + + bch2_trans_node_iter_init(trans, iter, + bp.v->btree_id, + bp.v->pos, + 0, + bp.v->level - 1, + 0); + struct btree *b = bch2_btree_iter_peek_node(trans, iter); + if (IS_ERR_OR_NULL(b)) + goto err; + + BUG_ON(b->c.level != bp.v->level - 1); + + if (extent_matches_bp(c, bp.v->btree_id, bp.v->level, + bkey_i_to_s_c(&b->key), bp)) + return b; + + if (btree_node_will_make_reachable(b)) { + b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); + } else { + int ret = backpointer_target_not_found(trans, bp, bkey_i_to_s_c(&b->key), + last_flushed, commit); + b = ret ? ERR_PTR(ret) : NULL; + } +err: + bch2_trans_iter_exit(trans, iter); + return b; +} + +static struct bkey_s_c __bch2_backpointer_get_key(struct btree_trans *trans, + struct bkey_s_c_backpointer bp, + struct btree_iter *iter, + unsigned iter_flags, + struct bkey_buf *last_flushed, + bool commit) { struct bch_fs *c = trans->c; @@ -277,10 +337,10 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, bch2_trans_iter_exit(trans, iter); if (!bp.v->level) { - int ret = backpointer_target_not_found(trans, bp, k, last_flushed); + int ret = backpointer_target_not_found(trans, bp, k, last_flushed, commit); return ret ? bkey_s_c_err(ret) : bkey_s_c_null; } else { - struct btree *b = bch2_backpointer_get_node(trans, bp, iter, last_flushed); + struct btree *b = __bch2_backpointer_get_node(trans, bp, iter, last_flushed, commit); if (b == ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node)) return bkey_s_c_null; if (IS_ERR_OR_NULL(b)) @@ -295,35 +355,16 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans, struct btree_iter *iter, struct bkey_buf *last_flushed) { - struct bch_fs *c = trans->c; - - BUG_ON(!bp.v->level); - - bch2_trans_node_iter_init(trans, iter, - bp.v->btree_id, - bp.v->pos, - 0, - bp.v->level - 1, - 0); - struct btree *b = bch2_btree_iter_peek_node(trans, iter); - if (IS_ERR_OR_NULL(b)) - goto err; - - BUG_ON(b->c.level != bp.v->level - 1); - - if (extent_matches_bp(c, bp.v->btree_id, bp.v->level, - bkey_i_to_s_c(&b->key), bp)) - return b; + return __bch2_backpointer_get_node(trans, bp, iter, last_flushed, true); +} - if (btree_node_will_make_reachable(b)) { - b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); - } else { - int ret = backpointer_target_not_found(trans, bp, bkey_i_to_s_c(&b->key), last_flushed); - b = ret ? ERR_PTR(ret) : NULL; - } -err: - bch2_trans_iter_exit(trans, iter); - return b; +struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, + struct bkey_s_c_backpointer bp, + struct btree_iter *iter, + unsigned iter_flags, + struct bkey_buf *last_flushed) +{ + return __bch2_backpointer_get_key(trans, bp, iter, iter_flags, last_flushed, true); } static int bch2_check_backpointer_has_valid_bucket(struct btree_trans *trans, struct bkey_s_c k, @@ -521,7 +562,7 @@ static int check_bp_exists(struct btree_trans *trans, struct bkey_s_c_backpointer other_bp = bkey_s_c_to_backpointer(bp_k); struct bkey_s_c other_extent = - bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0, NULL); + __bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0, NULL, false); ret = bkey_err(other_extent); if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) ret = 0; From 9c09e59cc55cdf7feb29971fd792fc1947010b79 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 14 May 2025 18:53:48 -0400 Subject: [PATCH 1833/2924] bcachefs: fix wrong arg to fsck_err() fsck_err() needs the btree transaction passed to it if there is one - so that it can unlock/relock around prompting userspace for fixing the error. Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 7b25cedd3e40b5..71d428f376a5f8 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -2446,7 +2446,7 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, u32 parent = le32_to_cpu(s.v->fs_path_parent); if (darray_u32_has(&subvol_path, parent)) { - if (fsck_err(c, subvol_loop, "subvolume loop")) + if (fsck_err(trans, subvol_loop, "subvolume loop")) ret = reattach_subvol(trans, s); break; } From f0d17942ea3edec191f1c0fc0d2cd7feca8de2f0 Mon Sep 17 00:00:00 2001 From: Vicki Pfau Date: Tue, 13 May 2025 15:59:48 -0700 Subject: [PATCH 1834/2924] Input: xpad - add more controllers Adds support for a revision of the Turtle Beach Recon Wired Controller, the Turtle Beach Stealth Ultra, and the PowerA Wired Controller. Signed-off-by: Vicki Pfau Link: https://lore.kernel.org/r/20250513225950.2719387-1-vi@endrift.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 57a5ff3d1992c5..1008858f78e207 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -290,6 +290,8 @@ static const struct xpad_device { { 0x1038, 0x1430, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, { 0x1038, 0x1431, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, { 0x10f5, 0x7005, "Turtle Beach Recon Controller", 0, XTYPE_XBOXONE }, + { 0x10f5, 0x7008, "Turtle Beach Recon Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, + { 0x10f5, 0x7073, "Turtle Beach Stealth Ultra Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x11c9, 0x55f0, "Nacon GC-100XF", 0, XTYPE_XBOX360 }, { 0x11ff, 0x0511, "PXN V900", 0, XTYPE_XBOX360 }, { 0x1209, 0x2882, "Ardwiino Controller", 0, XTYPE_XBOX360 }, @@ -354,6 +356,7 @@ static const struct xpad_device { { 0x1ee9, 0x1590, "ZOTAC Gaming Zone", 0, XTYPE_XBOX360 }, { 0x20d6, 0x2001, "BDA Xbox Series X Wired Controller", 0, XTYPE_XBOXONE }, { 0x20d6, 0x2009, "PowerA Enhanced Wired Controller for Xbox Series X|S", 0, XTYPE_XBOXONE }, + { 0x20d6, 0x2064, "PowerA Wired Controller for Xbox", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x20d6, 0x281f, "PowerA Wired Controller For Xbox 360", 0, XTYPE_XBOX360 }, { 0x20d6, 0x400b, "PowerA FUSION Pro 4 Wired Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x20d6, 0x890b, "PowerA MOGA XP-Ultra Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, From 1fe4a44b7fa3955bcb7b4067c07b778fe90d8ee7 Mon Sep 17 00:00:00 2001 From: Jethro Donaldson Date: Thu, 15 May 2025 01:23:23 +1200 Subject: [PATCH 1835/2924] smb: client: fix memory leak during error handling for POSIX mkdir The response buffer for the CREATE request handled by smb311_posix_mkdir() is leaked on the error path (goto err_free_rsp_buf) because the structure pointer *rsp passed to free_rsp_buf() is not assigned until *after* the error condition is checked. As *rsp is initialised to NULL, free_rsp_buf() becomes a no-op and the leak is instead reported by __kmem_cache_shutdown() upon subsequent rmmod of cifs.ko if (and only if) the error path has been hit. Pass rsp_iov.iov_base to free_rsp_buf() instead, similar to the code in other functions in smb2pdu.c for which *rsp is assigned late. Cc: stable@vger.kernel.org Signed-off-by: Jethro Donaldson Signed-off-by: Steve French --- fs/smb/client/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 0b35816d551f7a..4e28632b5fd661 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -2968,7 +2968,7 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode, /* Eventually save off posix specific response info and timestamps */ err_free_rsp_buf: - free_rsp_buf(resp_buftype, rsp); + free_rsp_buf(resp_buftype, rsp_iov.iov_base); kfree(pc_buf); err_free_req: cifs_small_buf_release(req); From 3965c23773e81c476f6de30ccc5d201c59ff9714 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 12 May 2025 14:58:36 -0300 Subject: [PATCH 1836/2924] smb: client: fix zero rsize error messages cifs_prepare_read() might be called with a disconnected channel, where TCP_Server_Info::max_read is set to zero due to reconnect, so calling ->negotiate_rize() will set @rsize to default min IO size (64KiB) and then logging CIFS: VFS: SMB: Zero rsize calculated, using minimum value 65536 If the reconnect happens in cifsd thread, cifs_renegotiate_iosize() will end up being called and then @rsize set to the expected value. Since we can't rely on the value of @server->max_read by the time we call cifs_prepare_read(), try to ->negotiate_rize() only if @cifs_sb->ctx->rsize is zero. Reported-by: Steve French Fixes: c59f7c9661b9 ("smb: client: ensure aligned IO sizes") Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/file.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 851b74f557c10e..950aa4f912f5cd 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -160,8 +160,10 @@ static int cifs_prepare_read(struct netfs_io_subrequest *subreq) server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses); rdata->server = server; - cifs_negotiate_rsize(server, cifs_sb->ctx, - tlink_tcon(req->cfile->tlink)); + if (cifs_sb->ctx->rsize == 0) { + cifs_negotiate_rsize(server, cifs_sb->ctx, + tlink_tcon(req->cfile->tlink)); + } rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, &size, &rdata->credits); From 539fbab37881e32ba6a708a100de6db19e1e7e7d Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Mon, 7 Apr 2025 15:28:05 +0300 Subject: [PATCH 1837/2924] tpm: Mask TPM RC in tpm2_start_auth_session() tpm2_start_auth_session() does not mask TPM RC correctly from the callers: [ 28.766528] tpm tpm0: A TPM error (2307) occurred start auth session Process TPM RCs inside tpm2_start_auth_session(), and map them to POSIX error codes. Cc: stable@vger.kernel.org # v6.10+ Fixes: 699e3efd6c64 ("tpm: Add HMAC session start and end functions") Reported-by: Herbert Xu Closes: https://lore.kernel.org/linux-integrity/Z_NgdRHuTKP6JK--@gondor.apana.org.au/ Reviewed-by: Stefano Garzarella Signed-off-by: Jarkko Sakkinen --- drivers/char/tpm/tpm2-sessions.c | 20 ++++++-------------- include/linux/tpm.h | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/drivers/char/tpm/tpm2-sessions.c b/drivers/char/tpm/tpm2-sessions.c index 3f89635ba5e855..7b5049b3d476ef 100644 --- a/drivers/char/tpm/tpm2-sessions.c +++ b/drivers/char/tpm/tpm2-sessions.c @@ -40,11 +40,6 @@ * * These are the usage functions: * - * tpm2_start_auth_session() which allocates the opaque auth structure - * and gets a session from the TPM. This must be called before - * any of the following functions. The session is protected by a - * session_key which is derived from a random salt value - * encrypted to the NULL seed. * tpm2_end_auth_session() kills the session and frees the resources. * Under normal operation this function is done by * tpm_buf_check_hmac_response(), so this is only to be used on @@ -963,16 +958,13 @@ static int tpm2_load_null(struct tpm_chip *chip, u32 *null_key) } /** - * tpm2_start_auth_session() - create a HMAC authentication session with the TPM - * @chip: the TPM chip structure to create the session with + * tpm2_start_auth_session() - Create an a HMAC authentication session + * @chip: A TPM chip * - * This function loads the NULL seed from its saved context and starts - * an authentication session on the null seed, fills in the - * @chip->auth structure to contain all the session details necessary - * for performing the HMAC, encrypt and decrypt operations and - * returns. The NULL seed is flushed before this function returns. + * Loads the ephemeral key (null seed), and starts an HMAC authenticated + * session. The null seed is flushed before the return. * - * Return: zero on success or actual error encountered. + * Returns zero on success, or a POSIX error code. */ int tpm2_start_auth_session(struct tpm_chip *chip) { @@ -1024,7 +1016,7 @@ int tpm2_start_auth_session(struct tpm_chip *chip) /* hash algorithm for session */ tpm_buf_append_u16(&buf, TPM_ALG_SHA256); - rc = tpm_transmit_cmd(chip, &buf, 0, "start auth session"); + rc = tpm_ret_to_err(tpm_transmit_cmd(chip, &buf, 0, "StartAuthSession")); tpm2_flush_context(chip, null_key); if (rc == TPM2_RC_SUCCESS) diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 6c3125300c009a..9ac9768cc8f7f9 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -257,6 +257,7 @@ enum tpm2_return_codes { TPM2_RC_TESTING = 0x090A, /* RC_WARN */ TPM2_RC_REFERENCE_H0 = 0x0910, TPM2_RC_RETRY = 0x0922, + TPM2_RC_SESSION_MEMORY = 0x0903, }; enum tpm2_command_codes { @@ -437,6 +438,24 @@ static inline u32 tpm2_rc_value(u32 rc) return (rc & BIT(7)) ? rc & 0xbf : rc; } +/* + * Convert a return value from tpm_transmit_cmd() to POSIX error code. + */ +static inline ssize_t tpm_ret_to_err(ssize_t ret) +{ + if (ret < 0) + return ret; + + switch (tpm2_rc_value(ret)) { + case TPM2_RC_SUCCESS: + return 0; + case TPM2_RC_SESSION_MEMORY: + return -ENOMEM; + default: + return -EFAULT; + } +} + #if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE) extern int tpm_is_tpm2(struct tpm_chip *chip); From 32d495b384a2db7d23c2295e03e6b6edb1c0db8d Mon Sep 17 00:00:00 2001 From: Purva Yeshi Date: Thu, 10 Apr 2025 16:04:42 +0530 Subject: [PATCH 1838/2924] char: tpm: tpm-buf: Add sanity check fallback in read helpers Fix Smatch-detected issue: drivers/char/tpm/tpm-buf.c:208 tpm_buf_read_u8() error: uninitialized symbol 'value'. drivers/char/tpm/tpm-buf.c:225 tpm_buf_read_u16() error: uninitialized symbol 'value'. drivers/char/tpm/tpm-buf.c:242 tpm_buf_read_u32() error: uninitialized symbol 'value'. Zero-initialize the return values in tpm_buf_read_u8(), tpm_buf_read_u16(), and tpm_buf_read_u32() to guard against uninitialized data in case of a boundary overflow. Add defensive initialization ensures the return values are always defined, preventing undefined behavior if the unexpected happens. Signed-off-by: Purva Yeshi Reviewed-by: Stefano Garzarella Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- drivers/char/tpm/tpm-buf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/char/tpm/tpm-buf.c b/drivers/char/tpm/tpm-buf.c index e49a19fea3bdf6..dc882fc9fa9efc 100644 --- a/drivers/char/tpm/tpm-buf.c +++ b/drivers/char/tpm/tpm-buf.c @@ -201,7 +201,7 @@ static void tpm_buf_read(struct tpm_buf *buf, off_t *offset, size_t count, void */ u8 tpm_buf_read_u8(struct tpm_buf *buf, off_t *offset) { - u8 value; + u8 value = 0; tpm_buf_read(buf, offset, sizeof(value), &value); @@ -218,7 +218,7 @@ EXPORT_SYMBOL_GPL(tpm_buf_read_u8); */ u16 tpm_buf_read_u16(struct tpm_buf *buf, off_t *offset) { - u16 value; + u16 value = 0; tpm_buf_read(buf, offset, sizeof(value), &value); @@ -235,7 +235,7 @@ EXPORT_SYMBOL_GPL(tpm_buf_read_u16); */ u32 tpm_buf_read_u32(struct tpm_buf *buf, off_t *offset) { - u32 value; + u32 value = 0; tpm_buf_read(buf, offset, sizeof(value), &value); From 2f661f71fda1fc0c42b7746ca5b7da529eb6b5be Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Fri, 4 Apr 2025 10:23:14 +0200 Subject: [PATCH 1839/2924] tpm: tis: Double the timeout B to 4s With some Infineon chips the timeouts in tpm_tis_send_data (both B and C) can reach up to about 2250 ms. Timeout C is retried since commit de9e33df7762 ("tpm, tpm_tis: Workaround failed command reception on Infineon devices") Timeout B still needs to be extended. The problem is most commonly encountered with context related operation such as load context/save context. These are issued directly by the kernel, and there is no retry logic for them. When a filesystem is set up to use the TPM for unlocking the boot fails, and restarting the userspace service is ineffective. This is likely because ignoring a load context/save context result puts the real TPM state and the TPM state expected by the kernel out of sync. Chips known to be affected: tpm_tis IFX1522:00: 2.0 TPM (device-id 0x1D, rev-id 54) Description: SLB9672 Firmware Revision: 15.22 tpm_tis MSFT0101:00: 2.0 TPM (device-id 0x1B, rev-id 22) Firmware Revision: 7.83 tpm_tis MSFT0101:00: 2.0 TPM (device-id 0x1A, rev-id 16) Firmware Revision: 5.63 Link: https://lore.kernel.org/linux-integrity/Z5pI07m0Muapyu9w@kitsune.suse.cz/ Signed-off-by: Michal Suchanek Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- drivers/char/tpm/tpm_tis_core.h | 2 +- include/linux/tpm.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm_tis_core.h b/drivers/char/tpm/tpm_tis_core.h index 970d02c337c7f1..6c3aa480396b64 100644 --- a/drivers/char/tpm/tpm_tis_core.h +++ b/drivers/char/tpm/tpm_tis_core.h @@ -54,7 +54,7 @@ enum tis_int_flags { enum tis_defaults { TIS_MEM_LEN = 0x5000, TIS_SHORT_TIMEOUT = 750, /* ms */ - TIS_LONG_TIMEOUT = 2000, /* 2 sec */ + TIS_LONG_TIMEOUT = 4000, /* 4 secs */ TIS_TIMEOUT_MIN_ATML = 14700, /* usecs */ TIS_TIMEOUT_MAX_ATML = 15000, /* usecs */ }; diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 9ac9768cc8f7f9..a3d8305e88a51e 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -224,7 +224,7 @@ enum tpm2_const { enum tpm2_timeouts { TPM2_TIMEOUT_A = 750, - TPM2_TIMEOUT_B = 2000, + TPM2_TIMEOUT_B = 4000, TPM2_TIMEOUT_C = 200, TPM2_TIMEOUT_D = 30, TPM2_DURATION_SHORT = 20, From a9fb87b8b86918e34ef6bf3316311f41bc1a5b1f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 13 May 2025 15:13:16 -0700 Subject: [PATCH 1840/2924] netlink: specs: tc: fix a couple of attribute names Fix up spelling of two attribute names. These are clearly typoes and will prevent C codegen from working. Let's treat this as a fix to get the correction into users' hands ASAP, and prevent anyone depending on the wrong names. Fixes: a1bcfde83669 ("doc/netlink/specs: Add a spec for tc") Link: https://patch.msgid.link/20250513221316.841700-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/tc.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/netlink/specs/tc.yaml b/Documentation/netlink/specs/tc.yaml index aacccea5dfe42a..5e1ff04f51f267 100644 --- a/Documentation/netlink/specs/tc.yaml +++ b/Documentation/netlink/specs/tc.yaml @@ -2745,7 +2745,7 @@ attribute-sets: type: u16 byte-order: big-endian - - name: key-l2-tpv3-sid + name: key-l2tpv3-sid type: u32 byte-order: big-endian - @@ -3504,7 +3504,7 @@ attribute-sets: name: rate64 type: u64 - - name: prate4 + name: prate64 type: u64 - name: burst From f3dd5fb2fa494dcbdb10f8d27f2deac8ef61a2fc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 13 May 2025 15:16:38 -0700 Subject: [PATCH 1841/2924] netlink: specs: tc: all actions are indexed arrays Some TC filters have actions listed as indexed arrays of nests and some as just nests. They are all indexed arrays, the handling is common across filters. Fixes: 2267672a6190 ("doc/netlink/specs: Update the tc spec") Link: https://patch.msgid.link/20250513221638.842532-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/tc.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/netlink/specs/tc.yaml b/Documentation/netlink/specs/tc.yaml index 5e1ff04f51f267..953aa837958b3f 100644 --- a/Documentation/netlink/specs/tc.yaml +++ b/Documentation/netlink/specs/tc.yaml @@ -2017,7 +2017,8 @@ attribute-sets: attributes: - name: act - type: nest + type: indexed-array + sub-type: nest nested-attributes: tc-act-attrs - name: police @@ -2250,7 +2251,8 @@ attribute-sets: attributes: - name: act - type: nest + type: indexed-array + sub-type: nest nested-attributes: tc-act-attrs - name: police From 865ab2461375e3a5a2526f91f9a9f17b8931bc9e Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Mon, 12 May 2025 18:12:36 +0530 Subject: [PATCH 1842/2924] octeontx2-pf: macsec: Fix incorrect max transmit size in TX secy MASCEC hardware block has a field called maximum transmit size for TX secy. Max packet size going out of MCS block has be programmed taking into account full packet size which has L2 header,SecTag and ICV. MACSEC offload driver is configuring max transmit size as macsec interface MTU which is incorrect. Say with 1500 MTU of real device, macsec interface created on top of real device will have MTU of 1468(1500 - (SecTag + ICV)). This is causing packets from macsec interface of size greater than or equal to 1468 are not getting transmitted out because driver programmed max transmit size as 1468 instead of 1514(1500 + ETH_HDR_LEN). Fixes: c54ffc73601c ("octeontx2-pf: mcs: Introduce MACSEC hardware offloading") Signed-off-by: Subbaraya Sundeep Reviewed-by: Simon Horman Link: https://patch.msgid.link/1747053756-4529-1-git-send-email-sbhatta@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c index f3b9daffaec3c2..4c7e0f345cb5ba 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c @@ -531,7 +531,8 @@ static int cn10k_mcs_write_tx_secy(struct otx2_nic *pfvf, if (sw_tx_sc->encrypt) sectag_tci |= (MCS_TCI_E | MCS_TCI_C); - policy = FIELD_PREP(MCS_TX_SECY_PLCY_MTU, secy->netdev->mtu); + policy = FIELD_PREP(MCS_TX_SECY_PLCY_MTU, + pfvf->netdev->mtu + OTX2_ETH_HLEN); /* Write SecTag excluding AN bits(1..0) */ policy |= FIELD_PREP(MCS_TX_SECY_PLCY_ST_TCI, sectag_tci >> 2); policy |= FIELD_PREP(MCS_TX_SECY_PLCY_ST_OFFSET, tag_offset); From 141a8dec88ba257429965f163dd16e005f1a9718 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 13 May 2025 10:10:07 +0800 Subject: [PATCH 1843/2924] net: txgbe: Fix to calculate EEPROM checksum for AML devices In the new firmware version, the shadow ram reserves some space to store I2C information, so the checksum calculation needs to skip this section. Otherwise, the driver will fail to probe because the invalid EEPROM checksum. Fixes: 2e5af6b2ae85 ("net: txgbe: Add basic support for new AML devices") Signed-off-by: Jiawen Wu Reviewed-by: Simon Horman Link: https://patch.msgid.link/1C6BF7A937237F5A+20250513021009.145708-2-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c | 8 +++++++- drivers/net/ethernet/wangxun/txgbe/txgbe_type.h | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c index 4b9921b7bb1125..a054b259d435dd 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c @@ -99,9 +99,15 @@ static int txgbe_calc_eeprom_checksum(struct wx *wx, u16 *checksum) } local_buffer = eeprom_ptrs; - for (i = 0; i < TXGBE_EEPROM_LAST_WORD; i++) + for (i = 0; i < TXGBE_EEPROM_LAST_WORD; i++) { + if (wx->mac.type == wx_mac_aml) { + if (i >= TXGBE_EEPROM_I2C_SRART_PTR && + i < TXGBE_EEPROM_I2C_END_PTR) + local_buffer[i] = 0xffff; + } if (i != wx->eeprom.sw_region_offset + TXGBE_EEPROM_CHECKSUM) *checksum += local_buffer[i]; + } kvfree(eeprom_ptrs); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h index 9c1c26234cad95..f423012dec2256 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h @@ -158,6 +158,8 @@ #define TXGBE_EEPROM_VERSION_L 0x1D #define TXGBE_EEPROM_VERSION_H 0x1E #define TXGBE_ISCSI_BOOT_CONFIG 0x07 +#define TXGBE_EEPROM_I2C_SRART_PTR 0x580 +#define TXGBE_EEPROM_I2C_END_PTR 0x800 #define TXGBE_MAX_MSIX_VECTORS 64 #define TXGBE_MAX_FDIR_INDICES 63 From 42efa358f033492097b214309eda464387d15318 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 13 May 2025 10:10:08 +0800 Subject: [PATCH 1844/2924] net: libwx: Fix FW mailbox reply timeout For the new SW-FW interaction, the timeout waiting for the firmware to return is too short. So that some mailbox commands cannot be completed. Use the 'timeout' parameter instead of fixed timeout value for flexible configuration. Fixes: 2e5af6b2ae85 ("net: txgbe: Add basic support for new AML devices") Signed-off-by: Jiawen Wu Reviewed-by: Simon Horman Link: https://patch.msgid.link/5D5BDE3EA501BDB8+20250513021009.145708-3-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/libwx/wx_hw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index aed45abafb1b79..ccdc57e9b0d56c 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -434,8 +434,8 @@ static int wx_host_interface_command_r(struct wx *wx, u32 *buffer, wr32m(wx, WX_SW2FW_MBOX_CMD, WX_SW2FW_MBOX_CMD_VLD, WX_SW2FW_MBOX_CMD_VLD); /* polling reply from FW */ - err = read_poll_timeout(wx_poll_fw_reply, reply, reply, 1000, 50000, - true, wx, buffer, send_cmd); + err = read_poll_timeout(wx_poll_fw_reply, reply, reply, 2000, + timeout * 1000, true, wx, buffer, send_cmd); if (err) { wx_err(wx, "Polling from FW messages timeout, cmd: 0x%x, index: %d\n", send_cmd, wx->swfw_index); From 09e76365baa1d3fb7617d48f00662d32dd9d4828 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 13 May 2025 10:10:09 +0800 Subject: [PATCH 1845/2924] net: libwx: Fix FW mailbox unknown command For the new SW-FW interaction, missing the error return if there is an unknown command. It causes the driver to mistakenly believe that the interaction is complete. This problem occurs when new driver is paired with old firmware, which does not support the new mailbox commands. Fixes: 2e5af6b2ae85 ("net: txgbe: Add basic support for new AML devices") Signed-off-by: Jiawen Wu Reviewed-by: Simon Horman Link: https://patch.msgid.link/64DBB705D35A0016+20250513021009.145708-4-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/libwx/wx_hw.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index ccdc57e9b0d56c..490d34233d38c5 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -442,6 +442,12 @@ static int wx_host_interface_command_r(struct wx *wx, u32 *buffer, goto rel_out; } + if (hdr->cmd_or_resp.ret_status == 0x80) { + wx_err(wx, "Unknown FW command: 0x%x\n", send_cmd); + err = -EINVAL; + goto rel_out; + } + /* expect no reply from FW then return */ if (!return_data) goto rel_out; From 1bdea6fad6fb985ff13828373c48e337c4e939f9 Mon Sep 17 00:00:00 2001 From: Bo-Cun Chen Date: Tue, 13 May 2025 05:27:30 +0100 Subject: [PATCH 1846/2924] net: ethernet: mtk_eth_soc: fix typo for declaration MT7988 ESW capability Since MTK_ESW_BIT is a bit number rather than a bitmap, it causes MTK_HAS_CAPS to produce incorrect results. This leads to the ETH driver not declaring MAC capabilities correctly for the MT7988 ESW. Fixes: 445eb6448ed3 ("net: ethernet: mtk_eth_soc: add basic support for MT7988 SoC") Signed-off-by: Bo-Cun Chen Signed-off-by: Daniel Golle Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/b8b37f409d1280fad9c4d32521e6207f63cd3213.1747110258.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 22a532695fb0dd..6c92072b4c2808 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -4748,7 +4748,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) } if (mtk_is_netsys_v3_or_greater(mac->hw) && - MTK_HAS_CAPS(mac->hw->soc->caps, MTK_ESW_BIT) && + MTK_HAS_CAPS(mac->hw->soc->caps, MTK_ESW) && id == MTK_GMAC1_ID) { mac->phylink_config.mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | From bf449f35e77fd44017abf991fac1f9ab7705bbe0 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Tue, 13 May 2025 12:45:54 +0530 Subject: [PATCH 1847/2924] octeontx2-af: Fix CGX Receive counters Each CGX block supports 4 logical MACs (LMACS). Receive counters CGX_CMR_RX_STAT0-8 are per LMAC and CGX_CMR_RX_STAT9-12 are per CGX. Due a bug in previous patch, stale Per CGX counters values observed. Fixes: 66208910e57a ("octeontx2-af: Support to retrieve CGX LMAC stats") Signed-off-by: Hariprasad Kelam Link: https://patch.msgid.link/20250513071554.728922-1-hkelam@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 0b27a695008bdb..971993586fb49d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -717,6 +717,11 @@ int cgx_get_rx_stats(void *cgxd, int lmac_id, int idx, u64 *rx_stat) if (!is_lmac_valid(cgx, lmac_id)) return -ENODEV; + + /* pass lmac as 0 for CGX_CMR_RX_STAT9-12 */ + if (idx >= CGX_RX_STAT_GLOBAL_INDEX) + lmac_id = 0; + *rx_stat = cgx_read(cgx, lmac_id, CGXX_CMRX_RX_STAT0 + (idx * 8)); return 0; } From 380b75d3078626aadd0817de61f3143f5db6e393 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 12 May 2025 17:06:00 -0700 Subject: [PATCH 1848/2924] Drivers: hv: Allow vmbus_sendpacket_mpb_desc() to create multiple ranges vmbus_sendpacket_mpb_desc() is currently used only by the storvsc driver and is hardcoded to create a single GPA range. To allow it to also be used by the netvsc driver to create multiple GPA ranges, no longer hardcode as having a single GPA range. Allow the calling driver to specify the rangecount in the supplied descriptor. Update the storvsc driver to reflect this new approach. Cc: # 6.1.x Signed-off-by: Michael Kelley Link: https://patch.msgid.link/20250513000604.1396-2-mhklinux@outlook.com Signed-off-by: Jakub Kicinski --- drivers/hv/channel.c | 6 +++--- drivers/scsi/storvsc_drv.c | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index fb8cd8469328ee..4ffd5eaa781725 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -1136,9 +1136,10 @@ int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer); /* - * vmbus_sendpacket_multipagebuffer - Send a multi-page buffer packet + * vmbus_sendpacket_mpb_desc - Send one or more multi-page buffer packets * using a GPADL Direct packet type. - * The buffer includes the vmbus descriptor. + * The desc argument must include space for the VMBus descriptor. The + * rangecount field must already be set. */ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, struct vmbus_packet_mpb_array *desc, @@ -1160,7 +1161,6 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, desc->length8 = (u16)(packetlen_aligned >> 3); desc->transactionid = VMBUS_RQST_ERROR; /* will be updated in hv_ringbuffer_write() */ desc->reserved = 0; - desc->rangecount = 1; bufferlist[0].iov_base = desc; bufferlist[0].iov_len = desc_size; diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 35db061ae3ecde..2e6b2412d2c946 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1819,6 +1819,7 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) return SCSI_MLQUEUE_DEVICE_BUSY; } + payload->rangecount = 1; payload->range.len = length; payload->range.offset = offset_in_hvpg; From 4f98616b855cb0e3b5917918bb07b44728eb96ea Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 12 May 2025 17:06:01 -0700 Subject: [PATCH 1849/2924] hv_netvsc: Use vmbus_sendpacket_mpb_desc() to send VMBus messages netvsc currently uses vmbus_sendpacket_pagebuffer() to send VMBus messages. This function creates a series of GPA ranges, each of which contains a single PFN. However, if the rndis header in the VMBus message crosses a page boundary, the netvsc protocol with the host requires that both PFNs for the rndis header must be in a single "GPA range" data structure, which isn't possible with vmbus_sendpacket_pagebuffer(). As the first step in fixing this, add a new function netvsc_build_mpb_array() to build a VMBus message with multiple GPA ranges, each of which may contain multiple PFNs. Use vmbus_sendpacket_mpb_desc() to send this VMBus message to the host. There's no functional change since higher levels of netvsc don't maintain or propagate knowledge of contiguous PFNs. Based on its input, netvsc_build_mpb_array() still produces a separate GPA range for each PFN and the behavior is the same as with vmbus_sendpacket_pagebuffer(). But the groundwork is laid for a subsequent patch to provide the necessary grouping. Cc: # 6.1.x Signed-off-by: Michael Kelley Link: https://patch.msgid.link/20250513000604.1396-3-mhklinux@outlook.com Signed-off-by: Jakub Kicinski --- drivers/net/hyperv/netvsc.c | 50 +++++++++++++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index d6f5b9ea3109d2..5882d6670200ae 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1055,6 +1055,42 @@ static int netvsc_dma_map(struct hv_device *hv_dev, return 0; } +/* Build an "array" of mpb entries describing the data to be transferred + * over VMBus. After the desc header fields, each "array" entry is variable + * size, and each entry starts after the end of the previous entry. The + * "offset" and "len" fields for each entry imply the size of the entry. + * + * The pfns are in HV_HYP_PAGE_SIZE, because all communication with Hyper-V + * uses that granularity, even if the system page size of the guest is larger. + * Each entry in the input "pb" array must describe a contiguous range of + * guest physical memory so that the pfns are sequential if the range crosses + * a page boundary. The offset field must be < HV_HYP_PAGE_SIZE. + */ +static inline void netvsc_build_mpb_array(struct hv_page_buffer *pb, + u32 page_buffer_count, + struct vmbus_packet_mpb_array *desc, + u32 *desc_size) +{ + struct hv_mpb_array *mpb_entry = &desc->range; + int i, j; + + for (i = 0; i < page_buffer_count; i++) { + u32 offset = pb[i].offset; + u32 len = pb[i].len; + + mpb_entry->offset = offset; + mpb_entry->len = len; + + for (j = 0; j < HVPFN_UP(offset + len); j++) + mpb_entry->pfn_array[j] = pb[i].pfn + j; + + mpb_entry = (struct hv_mpb_array *)&mpb_entry->pfn_array[j]; + } + + desc->rangecount = page_buffer_count; + *desc_size = (char *)mpb_entry - (char *)desc; +} + static inline int netvsc_send_pkt( struct hv_device *device, struct hv_netvsc_packet *packet, @@ -1097,6 +1133,9 @@ static inline int netvsc_send_pkt( packet->dma_range = NULL; if (packet->page_buf_cnt) { + struct vmbus_channel_packet_page_buffer desc; + u32 desc_size; + if (packet->cp_partial) pb += packet->rmsg_pgcnt; @@ -1106,11 +1145,12 @@ static inline int netvsc_send_pkt( goto exit; } - ret = vmbus_sendpacket_pagebuffer(out_channel, - pb, packet->page_buf_cnt, - &nvmsg, sizeof(nvmsg), - req_id); - + netvsc_build_mpb_array(pb, packet->page_buf_cnt, + (struct vmbus_packet_mpb_array *)&desc, + &desc_size); + ret = vmbus_sendpacket_mpb_desc(out_channel, + (struct vmbus_packet_mpb_array *)&desc, + desc_size, &nvmsg, sizeof(nvmsg), req_id); if (ret) netvsc_dma_unmap(ndev_ctx->device_ctx, packet); } else { From 41a6328b2c55276f89ea3812069fd7521e348bbf Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 12 May 2025 17:06:02 -0700 Subject: [PATCH 1850/2924] hv_netvsc: Preserve contiguous PFN grouping in the page buffer array Starting with commit dca5161f9bd0 ("hv_netvsc: Check status in SEND_RNDIS_PKT completion message") in the 6.3 kernel, the Linux driver for Hyper-V synthetic networking (netvsc) occasionally reports "nvsp_rndis_pkt_complete error status: 2".[1] This error indicates that Hyper-V has rejected a network packet transmit request from the guest, and the outgoing network packet is dropped. Higher level network protocols presumably recover and resend the packet so there is no functional error, but performance is slightly impacted. Commit dca5161f9bd0 is not the cause of the error -- it only added reporting of an error that was already happening without any notice. The error has presumably been present since the netvsc driver was originally introduced into Linux. The root cause of the problem is that the netvsc driver in Linux may send an incorrectly formatted VMBus message to Hyper-V when transmitting the network packet. The incorrect formatting occurs when the rndis header of the VMBus message crosses a page boundary due to how the Linux skb head memory is aligned. In such a case, two PFNs are required to describe the location of the rndis header, even though they are contiguous in guest physical address (GPA) space. Hyper-V requires that two rndis header PFNs be in a single "GPA range" data struture, but current netvsc code puts each PFN in its own GPA range, which Hyper-V rejects as an error. The incorrect formatting occurs only for larger packets that netvsc must transmit via a VMBus "GPA Direct" message. There's no problem when netvsc transmits a smaller packet by copying it into a pre- allocated send buffer slot because the pre-allocated slots don't have page crossing issues. After commit 14ad6ed30a10 ("net: allow small head cache usage with large MAX_SKB_FRAGS values") in the 6.14-rc4 kernel, the error occurs much more frequently in VMs with 16 or more vCPUs. It may occur every few seconds, or even more frequently, in an ssh session that outputs a lot of text. Commit 14ad6ed30a10 subtly changes how skb head memory is allocated, making it much more likely that the rndis header will cross a page boundary when the vCPU count is 16 or more. The changes in commit 14ad6ed30a10 are perfectly valid -- they just had the side effect of making the netvsc bug more prominent. Current code in init_page_array() creates a separate page buffer array entry for each PFN required to identify the data to be transmitted. Contiguous PFNs get separate entries in the page buffer array, and any information about contiguity is lost. Fix the core issue by having init_page_array() construct the page buffer array to represent contiguous ranges rather than individual pages. When these ranges are subsequently passed to netvsc_build_mpb_array(), it can build GPA ranges that contain multiple PFNs, as required to avoid the error "nvsp_rndis_pkt_complete error status: 2". If instead the network packet is sent by copying into a pre-allocated send buffer slot, the copy proceeds using the contiguous ranges rather than individual pages, but the result of the copying is the same. Also fix rndis_filter_send_request() to construct a contiguous range, since it has its own page buffer array. This change has a side benefit in CoCo VMs in that netvsc_dma_map() calls dma_map_single() on each contiguous range instead of on each page. This results in fewer calls to dma_map_single() but on larger chunks of memory, which should reduce contention on the swiotlb. Since the page buffer array now contains one entry for each contiguous range instead of for each individual page, the number of entries in the array can be reduced, saving 208 bytes of stack space in netvsc_xmit() when MAX_SKG_FRAGS has the default value of 17. [1] https://bugzilla.kernel.org/show_bug.cgi?id=217503 Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217503 Cc: # 6.1.x Signed-off-by: Michael Kelley Link: https://patch.msgid.link/20250513000604.1396-4-mhklinux@outlook.com Signed-off-by: Jakub Kicinski --- drivers/net/hyperv/hyperv_net.h | 12 ++++++ drivers/net/hyperv/netvsc_drv.c | 63 ++++++++----------------------- drivers/net/hyperv/rndis_filter.c | 24 +++--------- 3 files changed, 32 insertions(+), 67 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 70f7cb383228ea..76725f25abd52d 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -893,6 +893,18 @@ struct nvsp_message { sizeof(struct nvsp_message)) #define NETVSC_MIN_IN_MSG_SIZE sizeof(struct vmpacket_descriptor) +/* Maximum # of contiguous data ranges that can make up a trasmitted packet. + * Typically it's the max SKB fragments plus 2 for the rndis packet and the + * linear portion of the SKB. But if MAX_SKB_FRAGS is large, the value may + * need to be limited to MAX_PAGE_BUFFER_COUNT, which is the max # of entries + * in a GPA direct packet sent to netvsp over VMBus. + */ +#if MAX_SKB_FRAGS + 2 < MAX_PAGE_BUFFER_COUNT +#define MAX_DATA_RANGES (MAX_SKB_FRAGS + 2) +#else +#define MAX_DATA_RANGES MAX_PAGE_BUFFER_COUNT +#endif + /* Estimated requestor size: * out_ring_size/min_out_msg_size + in_ring_size/min_in_msg_size */ diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index c51b318b8a72e4..929f6b3de76837 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -326,43 +326,10 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, return txq; } -static u32 fill_pg_buf(unsigned long hvpfn, u32 offset, u32 len, - struct hv_page_buffer *pb) -{ - int j = 0; - - hvpfn += offset >> HV_HYP_PAGE_SHIFT; - offset = offset & ~HV_HYP_PAGE_MASK; - - while (len > 0) { - unsigned long bytes; - - bytes = HV_HYP_PAGE_SIZE - offset; - if (bytes > len) - bytes = len; - pb[j].pfn = hvpfn; - pb[j].offset = offset; - pb[j].len = bytes; - - offset += bytes; - len -= bytes; - - if (offset == HV_HYP_PAGE_SIZE && len) { - hvpfn++; - offset = 0; - j++; - } - } - - return j + 1; -} - static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb, struct hv_netvsc_packet *packet, struct hv_page_buffer *pb) { - u32 slots_used = 0; - char *data = skb->data; int frags = skb_shinfo(skb)->nr_frags; int i; @@ -371,28 +338,28 @@ static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb, * 2. skb linear data * 3. skb fragment data */ - slots_used += fill_pg_buf(virt_to_hvpfn(hdr), - offset_in_hvpage(hdr), - len, - &pb[slots_used]); + pb[0].offset = offset_in_hvpage(hdr); + pb[0].len = len; + pb[0].pfn = virt_to_hvpfn(hdr); packet->rmsg_size = len; - packet->rmsg_pgcnt = slots_used; + packet->rmsg_pgcnt = 1; - slots_used += fill_pg_buf(virt_to_hvpfn(data), - offset_in_hvpage(data), - skb_headlen(skb), - &pb[slots_used]); + pb[1].offset = offset_in_hvpage(skb->data); + pb[1].len = skb_headlen(skb); + pb[1].pfn = virt_to_hvpfn(skb->data); for (i = 0; i < frags; i++) { skb_frag_t *frag = skb_shinfo(skb)->frags + i; + struct hv_page_buffer *cur_pb = &pb[i + 2]; + u64 pfn = page_to_hvpfn(skb_frag_page(frag)); + u32 offset = skb_frag_off(frag); - slots_used += fill_pg_buf(page_to_hvpfn(skb_frag_page(frag)), - skb_frag_off(frag), - skb_frag_size(frag), - &pb[slots_used]); + cur_pb->offset = offset_in_hvpage(offset); + cur_pb->len = skb_frag_size(frag); + cur_pb->pfn = pfn + (offset >> HV_HYP_PAGE_SHIFT); } - return slots_used; + return frags + 2; } static int count_skb_frag_slots(struct sk_buff *skb) @@ -483,7 +450,7 @@ static int netvsc_xmit(struct sk_buff *skb, struct net_device *net, bool xdp_tx) struct net_device *vf_netdev; u32 rndis_msg_size; u32 hash; - struct hv_page_buffer pb[MAX_PAGE_BUFFER_COUNT]; + struct hv_page_buffer pb[MAX_DATA_RANGES]; /* If VF is present and up then redirect packets to it. * Skip the VF if it is marked down or has no carrier. diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 82747dfacd70f0..9e73959e61ee0b 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -225,8 +225,7 @@ static int rndis_filter_send_request(struct rndis_device *dev, struct rndis_request *req) { struct hv_netvsc_packet *packet; - struct hv_page_buffer page_buf[2]; - struct hv_page_buffer *pb = page_buf; + struct hv_page_buffer pb; int ret; /* Setup the packet to send it */ @@ -235,27 +234,14 @@ static int rndis_filter_send_request(struct rndis_device *dev, packet->total_data_buflen = req->request_msg.msg_len; packet->page_buf_cnt = 1; - pb[0].pfn = virt_to_phys(&req->request_msg) >> - HV_HYP_PAGE_SHIFT; - pb[0].len = req->request_msg.msg_len; - pb[0].offset = offset_in_hvpage(&req->request_msg); - - /* Add one page_buf when request_msg crossing page boundary */ - if (pb[0].offset + pb[0].len > HV_HYP_PAGE_SIZE) { - packet->page_buf_cnt++; - pb[0].len = HV_HYP_PAGE_SIZE - - pb[0].offset; - pb[1].pfn = virt_to_phys((void *)&req->request_msg - + pb[0].len) >> HV_HYP_PAGE_SHIFT; - pb[1].offset = 0; - pb[1].len = req->request_msg.msg_len - - pb[0].len; - } + pb.pfn = virt_to_phys(&req->request_msg) >> HV_HYP_PAGE_SHIFT; + pb.len = req->request_msg.msg_len; + pb.offset = offset_in_hvpage(&req->request_msg); trace_rndis_send(dev->ndev, 0, &req->request_msg); rcu_read_lock_bh(); - ret = netvsc_send(dev->ndev, packet, NULL, pb, NULL, false); + ret = netvsc_send(dev->ndev, packet, NULL, &pb, NULL, false); rcu_read_unlock_bh(); return ret; From 5bbc644bbf4e97a05bc0cb052189004588ff8a09 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 12 May 2025 17:06:03 -0700 Subject: [PATCH 1851/2924] hv_netvsc: Remove rmsg_pgcnt init_page_array() now always creates a single page buffer array entry for the rndis message, even if the rndis message crosses a page boundary. As such, the number of page buffer array entries used for the rndis message must no longer be tracked -- it is always just 1. Remove the rmsg_pgcnt field and use "1" where the value is needed. Cc: # 6.1.x Signed-off-by: Michael Kelley Link: https://patch.msgid.link/20250513000604.1396-5-mhklinux@outlook.com Signed-off-by: Jakub Kicinski --- drivers/net/hyperv/hyperv_net.h | 1 - drivers/net/hyperv/netvsc.c | 7 +++---- drivers/net/hyperv/netvsc_drv.c | 1 - 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 76725f25abd52d..cb6f5482d203e1 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -158,7 +158,6 @@ struct hv_netvsc_packet { u8 cp_partial; /* partial copy into send buffer */ u8 rmsg_size; /* RNDIS header and PPI size */ - u8 rmsg_pgcnt; /* page count of RNDIS header and PPI */ u8 page_buf_cnt; u16 q_idx; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 5882d6670200ae..720104661d7f24 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -953,8 +953,7 @@ static void netvsc_copy_to_send_buf(struct netvsc_device *net_device, + pend_size; int i; u32 padding = 0; - u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt : - packet->page_buf_cnt; + u32 page_count = packet->cp_partial ? 1 : packet->page_buf_cnt; u32 remain; /* Add padding */ @@ -1137,7 +1136,7 @@ static inline int netvsc_send_pkt( u32 desc_size; if (packet->cp_partial) - pb += packet->rmsg_pgcnt; + pb++; ret = netvsc_dma_map(ndev_ctx->device_ctx, packet, pb); if (ret) { @@ -1299,7 +1298,7 @@ int netvsc_send(struct net_device *ndev, packet->send_buf_index = section_index; if (packet->cp_partial) { - packet->page_buf_cnt -= packet->rmsg_pgcnt; + packet->page_buf_cnt--; packet->total_data_buflen = msd_len + packet->rmsg_size; } else { packet->page_buf_cnt = 0; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 929f6b3de76837..d8b169ac0343c5 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -343,7 +343,6 @@ static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb, pb[0].len = len; pb[0].pfn = virt_to_hvpfn(hdr); packet->rmsg_size = len; - packet->rmsg_pgcnt = 1; pb[1].offset = offset_in_hvpage(skb->data); pb[1].len = skb_headlen(skb); From 45a442fe369e6c4e0b4aa9f63b31c3f2f9e2090e Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 12 May 2025 17:06:04 -0700 Subject: [PATCH 1852/2924] Drivers: hv: vmbus: Remove vmbus_sendpacket_pagebuffer() With the netvsc driver changed to use vmbus_sendpacket_mpb_desc() instead of vmbus_sendpacket_pagebuffer(), the latter has no remaining callers. Remove it. Cc: # 6.1.x Signed-off-by: Michael Kelley Link: https://patch.msgid.link/20250513000604.1396-6-mhklinux@outlook.com Signed-off-by: Jakub Kicinski --- drivers/hv/channel.c | 59 ------------------------------------------ include/linux/hyperv.h | 7 ----- 2 files changed, 66 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 4ffd5eaa781725..35f26fa1ffe76e 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -1076,65 +1076,6 @@ int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer, } EXPORT_SYMBOL(vmbus_sendpacket); -/* - * vmbus_sendpacket_pagebuffer - Send a range of single-page buffer - * packets using a GPADL Direct packet type. This interface allows you - * to control notifying the host. This will be useful for sending - * batched data. Also the sender can control the send flags - * explicitly. - */ -int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, - struct hv_page_buffer pagebuffers[], - u32 pagecount, void *buffer, u32 bufferlen, - u64 requestid) -{ - int i; - struct vmbus_channel_packet_page_buffer desc; - u32 descsize; - u32 packetlen; - u32 packetlen_aligned; - struct kvec bufferlist[3]; - u64 aligned_data = 0; - - if (pagecount > MAX_PAGE_BUFFER_COUNT) - return -EINVAL; - - /* - * Adjust the size down since vmbus_channel_packet_page_buffer is the - * largest size we support - */ - descsize = sizeof(struct vmbus_channel_packet_page_buffer) - - ((MAX_PAGE_BUFFER_COUNT - pagecount) * - sizeof(struct hv_page_buffer)); - packetlen = descsize + bufferlen; - packetlen_aligned = ALIGN(packetlen, sizeof(u64)); - - /* Setup the descriptor */ - desc.type = VM_PKT_DATA_USING_GPA_DIRECT; - desc.flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; - desc.dataoffset8 = descsize >> 3; /* in 8-bytes granularity */ - desc.length8 = (u16)(packetlen_aligned >> 3); - desc.transactionid = VMBUS_RQST_ERROR; /* will be updated in hv_ringbuffer_write() */ - desc.reserved = 0; - desc.rangecount = pagecount; - - for (i = 0; i < pagecount; i++) { - desc.range[i].len = pagebuffers[i].len; - desc.range[i].offset = pagebuffers[i].offset; - desc.range[i].pfn = pagebuffers[i].pfn; - } - - bufferlist[0].iov_base = &desc; - bufferlist[0].iov_len = descsize; - bufferlist[1].iov_base = buffer; - bufferlist[1].iov_len = bufferlen; - bufferlist[2].iov_base = &aligned_data; - bufferlist[2].iov_len = (packetlen_aligned - packetlen); - - return hv_ringbuffer_write(channel, bufferlist, 3, requestid, NULL); -} -EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer); - /* * vmbus_sendpacket_mpb_desc - Send one or more multi-page buffer packets * using a GPADL Direct packet type. diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 675959fb97ba9d..ea806cef4a0d68 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1161,13 +1161,6 @@ extern int vmbus_sendpacket(struct vmbus_channel *channel, enum vmbus_packet_type type, u32 flags); -extern int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, - struct hv_page_buffer pagebuffers[], - u32 pagecount, - void *buffer, - u32 bufferlen, - u64 requestid); - extern int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, struct vmbus_packet_mpb_array *mpb, u32 desc_size, From 78ab4be549533432d97ea8989d2f00b508fa68d8 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Tue, 6 May 2025 14:55:39 +0300 Subject: [PATCH 1853/2924] wifi: mt76: disable napi on driver removal A warning on driver removal started occurring after commit 9dd05df8403b ("net: warn if NAPI instance wasn't shut down"). Disable tx napi before deleting it in mt76_dma_cleanup(). WARNING: CPU: 4 PID: 18828 at net/core/dev.c:7288 __netif_napi_del_locked+0xf0/0x100 CPU: 4 UID: 0 PID: 18828 Comm: modprobe Not tainted 6.15.0-rc4 #4 PREEMPT(lazy) Hardware name: ASUS System Product Name/PRIME X670E-PRO WIFI, BIOS 3035 09/05/2024 RIP: 0010:__netif_napi_del_locked+0xf0/0x100 Call Trace: mt76_dma_cleanup+0x54/0x2f0 [mt76] mt7921_pci_remove+0xd5/0x190 [mt7921e] pci_device_remove+0x47/0xc0 device_release_driver_internal+0x19e/0x200 driver_detach+0x48/0x90 bus_remove_driver+0x6d/0xf0 pci_unregister_driver+0x2e/0xb0 __do_sys_delete_module.isra.0+0x197/0x2e0 do_syscall_64+0x7b/0x160 entry_SYSCALL_64_after_hwframe+0x76/0x7e Tested with mt7921e but the same pattern can be actually applied to other mt76 drivers calling mt76_dma_cleanup() during removal. Tx napi is enabled in their *_dma_init() functions and only toggled off and on again inside their suspend/resume/reset paths. So it should be okay to disable tx napi in such a generic way. Found by Linux Verification Center (linuxtesting.org). Fixes: 2ac515a5d74f ("mt76: mt76x02: use napi polling for tx cleanup") Cc: stable@vger.kernel.org Signed-off-by: Fedor Pchelkin Tested-by: Ming Yen Hsieh Link: https://patch.msgid.link/20250506115540.19045-1-pchelkin@ispras.ru Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/dma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index 844af16ee55131..35b4ec91979e6a 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -1011,6 +1011,7 @@ void mt76_dma_cleanup(struct mt76_dev *dev) int i; mt76_worker_disable(&dev->tx_worker); + napi_disable(&dev->tx_napi); netif_napi_del(&dev->tx_napi); for (i = 0; i < ARRAY_SIZE(dev->phys); i++) { From 0aa8496adda570c2005410a30df963a16643a3dc Mon Sep 17 00:00:00 2001 From: Ming Yen Hsieh Date: Fri, 9 May 2025 09:04:20 +0800 Subject: [PATCH 1854/2924] wifi: mt76: mt7925: fix missing hdr_trans_tlv command for broadcast wtbl Ensure that the hdr_trans_tlv command is included in the broadcast wtbl to prevent the IPv6 and multicast packet from being dropped by the chip. Cc: stable@vger.kernel.org Fixes: cb1353ef3473 ("wifi: mt76: mt7925: integrate *mlo_sta_cmd and *sta_cmd") Reported-by: Benjamin Xiao Tested-by: Niklas Schnelle Signed-off-by: Ming Yen Hsieh Link: https://lore.kernel.org/lkml/EmWnO5b-acRH1TXbGnkx41eJw654vmCR-8_xMBaPMwexCnfkvKCdlU5u19CGbaapJ3KRu-l3B-tSUhf8CCQwL0odjo6Cd5YG5lvNeB-vfdg=@pm.me/ Link: https://patch.msgid.link/20250509010421.403022-1-mingyen.hsieh@mediatek.com Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7925/mcu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c index e61da76b2097bf..14b1f603fb6224 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c @@ -1924,14 +1924,14 @@ mt7925_mcu_sta_cmd(struct mt76_phy *phy, mt7925_mcu_sta_mld_tlv(skb, info->vif, info->link_sta->sta); mt7925_mcu_sta_eht_mld_tlv(skb, info->vif, info->link_sta->sta); } - - mt7925_mcu_sta_hdr_trans_tlv(skb, info->vif, info->link_sta); } if (!info->enable) { mt7925_mcu_sta_remove_tlv(skb); mt76_connac_mcu_add_tlv(skb, STA_REC_MLD_OFF, sizeof(struct tlv)); + } else { + mt7925_mcu_sta_hdr_trans_tlv(skb, info->vif, info->link_sta); } return mt76_mcu_skb_send_msg(dev, skb, info->cmd, true); From dcb479fde00be9a151c047d0a7c0626b64eb0019 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Mon, 12 May 2025 18:22:37 +0530 Subject: [PATCH 1855/2924] octeontx2-pf: Do not reallocate all ntuple filters If ntuple filters count is modified followed by unicast filters count using devlink then the ntuple count set by user is ignored and all the ntuple filters are being reallocated. Fix this by storing the ntuple count set by user. Without this patch, say if user tries to modify ntuple count as 8 followed by ucast filter count as 4 using devlink commands then ntuple count is being reverted to default value 16 i.e, not retaining user set value 8. Fixes: 39c469188b6d ("octeontx2-pf: Add ucast filter count configurability via devlink.") Signed-off-by: Subbaraya Sundeep Reviewed-by: Simon Horman Link: https://patch.msgid.link/1747054357-5850-1-git-send-email-sbhatta@marvell.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h | 1 + drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c | 1 + drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c | 3 ++- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 1e88422825be7a..d6b4b74e4002b1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -356,6 +356,7 @@ struct otx2_flow_config { struct list_head flow_list_tc; u8 ucast_flt_cnt; bool ntuple; + u16 ntuple_cnt; }; struct dev_hw_ops { diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c index 33ec9a7f7c0339..e13ae5484c19cb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c @@ -41,6 +41,7 @@ static int otx2_dl_mcam_count_set(struct devlink *devlink, u32 id, if (!pfvf->flow_cfg) return 0; + pfvf->flow_cfg->ntuple_cnt = ctx->val.vu16; otx2_alloc_mcam_entries(pfvf, ctx->val.vu16); return 0; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c index 47bfd1fb37d4bc..64c6d9162ef644 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c @@ -247,7 +247,7 @@ int otx2_mcam_entry_init(struct otx2_nic *pfvf) mutex_unlock(&pfvf->mbox.lock); /* Allocate entries for Ntuple filters */ - count = otx2_alloc_mcam_entries(pfvf, OTX2_DEFAULT_FLOWCOUNT); + count = otx2_alloc_mcam_entries(pfvf, flow_cfg->ntuple_cnt); if (count <= 0) { otx2_clear_ntuple_flow_info(pfvf, flow_cfg); return 0; @@ -307,6 +307,7 @@ int otx2_mcam_flow_init(struct otx2_nic *pf) INIT_LIST_HEAD(&pf->flow_cfg->flow_list_tc); pf->flow_cfg->ucast_flt_cnt = OTX2_DEFAULT_UNICAST_FLOWS; + pf->flow_cfg->ntuple_cnt = OTX2_DEFAULT_FLOWCOUNT; /* Allocate bare minimum number of MCAM entries needed for * unicast and ntuple filters. From 811d6a923b40fc130f91abf49151f57cf9ac2a6f Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Thu, 15 May 2025 11:42:13 +0100 Subject: [PATCH 1856/2924] dmaengine: mediatek: drop unused variable Commit 157ae5ffd76a dmaengine: mediatek: Fix a possible deadlock error in mtk_cqdma_tx_status() fixed locks but kept unused varibale leading to warning and build failure (due to warning treated as errors) drivers/dma/mediatek/mtk-cqdma.c: In function 'mtk_cqdma_find_active_desc': drivers/dma/mediatek/mtk-cqdma.c:423:23: error: unused variable 'flags' [-Werror=unused-variable] 423 | unsigned long flags; | ^~~~~ Fix by dropping this unused flag Reported-by: Stephen Rothwell Fixes: 157ae5ffd76a ("dmaengine: mediatek: Fix a possible deadlock error in mtk_cqdma_tx_status()") Signed-off-by: Vinod Koul --- drivers/dma/mediatek/mtk-cqdma.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/dma/mediatek/mtk-cqdma.c b/drivers/dma/mediatek/mtk-cqdma.c index e35271ac1eed2a..47c8adfdc15504 100644 --- a/drivers/dma/mediatek/mtk-cqdma.c +++ b/drivers/dma/mediatek/mtk-cqdma.c @@ -420,7 +420,6 @@ static struct virt_dma_desc *mtk_cqdma_find_active_desc(struct dma_chan *c, { struct mtk_cqdma_vchan *cvc = to_cqdma_vchan(c); struct virt_dma_desc *vd; - unsigned long flags; list_for_each_entry(vd, &cvc->pc->queue, node) if (vd->tx.cookie == cookie) { From 7b9938a14460e8ec7649ca2e80ac0aae9815bf02 Mon Sep 17 00:00:00 2001 From: Nicolas Chauvet Date: Thu, 15 May 2025 12:21:32 +0200 Subject: [PATCH 1857/2924] ALSA: usb-audio: Add sample rate quirk for Microdia JP001 USB Camera Microdia JP001 does not support reading the sample rate which leads to many lines of "cannot get freq at ep 0x84". This patch adds the USB ID to quirks.c and avoids those error messages. usb 7-4: New USB device found, idVendor=0c45, idProduct=636b, bcdDevice= 1.00 usb 7-4: New USB device strings: Mfr=2, Product=1, SerialNumber=3 usb 7-4: Product: JP001 usb 7-4: Manufacturer: JP001 usb 7-4: SerialNumber: JP001 usb 7-4: 3:1: cannot get freq at ep 0x84 Cc: Signed-off-by: Nicolas Chauvet Link: https://patch.msgid.link/20250515102132.73062-1-kwizart@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index eb192834db68ca..dbbc9eb935a4b3 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2242,6 +2242,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x0c45, 0x6340, /* Sonix HD USB Camera */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x0c45, 0x636b, /* Microdia JP001 USB Camera */ + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x0d8c, 0x0014, /* USB Audio Device */ QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x0ecb, 0x205c, /* JBL Quantum610 Wireless */ From 82bbe02b2500ef0a62053fe2eb84773fe31c5a0a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 9 May 2025 11:46:45 -0700 Subject: [PATCH 1858/2924] wifi: mac80211: Set n_channels after allocating struct cfg80211_scan_request Make sure that n_channels is set after allocating the struct cfg80211_registered_device::int_scan_req member. Seen with syzkaller: UBSAN: array-index-out-of-bounds in net/mac80211/scan.c:1208:5 index 0 is out of range for type 'struct ieee80211_channel *[] __counted_by(n_channels)' (aka 'struct ieee80211_channel *[]') This was missed in the initial conversions because I failed to locate the allocation likely due to the "sizeof(void *)" not matching the "channels" array type. Reported-by: syzbot+4bcdddd48bb6f0be0da1@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/680fd171.050a0220.2b69d1.045e.GAE@google.com/ Fixes: e3eac9f32ec0 ("wifi: cfg80211: Annotate struct cfg80211_scan_request with __counted_by") Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Link: https://patch.msgid.link/20250509184641.work.542-kees@kernel.org Signed-off-by: Johannes Berg --- net/mac80211/main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 741e6c7edcb7c7..6b6de43d9420ac 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1354,10 +1354,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) hw->wiphy->software_iftypes |= BIT(NL80211_IFTYPE_MONITOR); - local->int_scan_req = kzalloc(sizeof(*local->int_scan_req) + - sizeof(void *) * channels, GFP_KERNEL); + local->int_scan_req = kzalloc(struct_size(local->int_scan_req, + channels, channels), + GFP_KERNEL); if (!local->int_scan_req) return -ENOMEM; + local->int_scan_req->n_channels = channels; eth_broadcast_addr(local->int_scan_req->bssid); From a7e255ff9fe4d9b8b902023aaf5b7a673786bb50 Mon Sep 17 00:00:00 2001 From: Valtteri Koskivuori Date: Fri, 9 May 2025 21:42:49 +0300 Subject: [PATCH 1859/2924] platform/x86: fujitsu-laptop: Support Lifebook S2110 hotkeys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The S2110 has an additional set of media playback control keys enabled by a hardware toggle button that switches the keys between "Application" and "Player" modes. Toggling "Player" mode just shifts the scancode of each hotkey up by 4. Add defines for new scancodes, and a keymap and dmi id for the S2110. Tested on a Fujitsu Lifebook S2110. Signed-off-by: Valtteri Koskivuori Acked-by: Jonathan Woithe Link: https://lore.kernel.org/r/20250509184251.713003-1-vkoskiv@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/fujitsu-laptop.c | 33 +++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index a0eae24ca9e608..162809140f68a2 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -17,13 +17,13 @@ /* * fujitsu-laptop.c - Fujitsu laptop support, providing access to additional * features made available on a range of Fujitsu laptops including the - * P2xxx/P5xxx/S6xxx/S7xxx series. + * P2xxx/P5xxx/S2xxx/S6xxx/S7xxx series. * * This driver implements a vendor-specific backlight control interface for * Fujitsu laptops and provides support for hotkeys present on certain Fujitsu * laptops. * - * This driver has been tested on a Fujitsu Lifebook S6410, S7020 and + * This driver has been tested on a Fujitsu Lifebook S2110, S6410, S7020 and * P8010. It should work on most P-series and S-series Lifebooks, but * YMMV. * @@ -107,7 +107,11 @@ #define KEY2_CODE 0x411 #define KEY3_CODE 0x412 #define KEY4_CODE 0x413 -#define KEY5_CODE 0x420 +#define KEY5_CODE 0x414 +#define KEY6_CODE 0x415 +#define KEY7_CODE 0x416 +#define KEY8_CODE 0x417 +#define KEY9_CODE 0x420 /* Hotkey ringbuffer limits */ #define MAX_HOTKEY_RINGBUFFER_SIZE 100 @@ -560,7 +564,7 @@ static const struct key_entry keymap_default[] = { { KE_KEY, KEY2_CODE, { KEY_PROG2 } }, { KE_KEY, KEY3_CODE, { KEY_PROG3 } }, { KE_KEY, KEY4_CODE, { KEY_PROG4 } }, - { KE_KEY, KEY5_CODE, { KEY_RFKILL } }, + { KE_KEY, KEY9_CODE, { KEY_RFKILL } }, /* Soft keys read from status flags */ { KE_KEY, FLAG_RFKILL, { KEY_RFKILL } }, { KE_KEY, FLAG_TOUCHPAD_TOGGLE, { KEY_TOUCHPAD_TOGGLE } }, @@ -584,6 +588,18 @@ static const struct key_entry keymap_p8010[] = { { KE_END, 0 } }; +static const struct key_entry keymap_s2110[] = { + { KE_KEY, KEY1_CODE, { KEY_PROG1 } }, /* "A" */ + { KE_KEY, KEY2_CODE, { KEY_PROG2 } }, /* "B" */ + { KE_KEY, KEY3_CODE, { KEY_WWW } }, /* "Internet" */ + { KE_KEY, KEY4_CODE, { KEY_EMAIL } }, /* "E-mail" */ + { KE_KEY, KEY5_CODE, { KEY_STOPCD } }, + { KE_KEY, KEY6_CODE, { KEY_PLAYPAUSE } }, + { KE_KEY, KEY7_CODE, { KEY_PREVIOUSSONG } }, + { KE_KEY, KEY8_CODE, { KEY_NEXTSONG } }, + { KE_END, 0 } +}; + static const struct key_entry *keymap = keymap_default; static int fujitsu_laptop_dmi_keymap_override(const struct dmi_system_id *id) @@ -621,6 +637,15 @@ static const struct dmi_system_id fujitsu_laptop_dmi_table[] = { }, .driver_data = (void *)keymap_p8010 }, + { + .callback = fujitsu_laptop_dmi_keymap_override, + .ident = "Fujitsu LifeBook S2110", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK S2110"), + }, + .driver_data = (void *)keymap_s2110 + }, {} }; From 4e89a4077490f52cde652d17e32519b666abf3a6 Mon Sep 17 00:00:00 2001 From: Vladimir Moskovkin Date: Wed, 14 May 2025 12:12:55 +0000 Subject: [PATCH 1860/2924] platform/x86: dell-wmi-sysman: Avoid buffer overflow in current_password_store() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the 'buf' array received from the user contains an empty string, the 'length' variable will be zero. Accessing the 'buf' array element with index 'length - 1' will result in a buffer overflow. Add a check for an empty string. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: e8a60aa7404b ("platform/x86: Introduce support for Systems Management Driver over WMI for Dell Systems") Cc: stable@vger.kernel.org Signed-off-by: Vladimir Moskovkin Link: https://lore.kernel.org/r/39973642a4f24295b4a8fad9109c5b08@kaspersky.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c b/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c index 230e6ee966366a..d8f1bf5e58a0f4 100644 --- a/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c +++ b/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c @@ -45,7 +45,7 @@ static ssize_t current_password_store(struct kobject *kobj, int length; length = strlen(buf); - if (buf[length-1] == '\n') + if (length && buf[length - 1] == '\n') length--; /* firmware does verifiation of min/max password length, From 08fb624802d8786253994d8ebdbbcdaa186f04f5 Mon Sep 17 00:00:00 2001 From: Andrew Bresticker Date: Wed, 14 May 2025 10:13:20 -0700 Subject: [PATCH 1861/2924] irqchip/riscv-imsic: Start local sync timer on correct CPU When starting the local sync timer to synchronize the state of a remote CPU it should be added on the CPU to be synchronized, not the initiating CPU. This results in interrupt delivery being delayed until the timer eventually runs (due to another mask/unmask/migrate operation) on the target CPU. Fixes: 0f67911e821c ("irqchip/riscv-imsic: Separate next and previous pointers in IMSIC vector") Signed-off-by: Andrew Bresticker Signed-off-by: Thomas Gleixner Reviewed-by: Anup Patel Link: https://lore.kernel.org/all/20250514171320.3494917-1-abrestic@rivosinc.com --- drivers/irqchip/irq-riscv-imsic-state.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/irqchip/irq-riscv-imsic-state.c b/drivers/irqchip/irq-riscv-imsic-state.c index bdf5cd2037f289..62f76950a113b5 100644 --- a/drivers/irqchip/irq-riscv-imsic-state.c +++ b/drivers/irqchip/irq-riscv-imsic-state.c @@ -208,17 +208,17 @@ static bool __imsic_local_sync(struct imsic_local_priv *lpriv) } #ifdef CONFIG_SMP -static void __imsic_local_timer_start(struct imsic_local_priv *lpriv) +static void __imsic_local_timer_start(struct imsic_local_priv *lpriv, unsigned int cpu) { lockdep_assert_held(&lpriv->lock); if (!timer_pending(&lpriv->timer)) { lpriv->timer.expires = jiffies + 1; - add_timer_on(&lpriv->timer, smp_processor_id()); + add_timer_on(&lpriv->timer, cpu); } } #else -static inline void __imsic_local_timer_start(struct imsic_local_priv *lpriv) +static inline void __imsic_local_timer_start(struct imsic_local_priv *lpriv, unsigned int cpu) { } #endif @@ -233,7 +233,7 @@ void imsic_local_sync_all(bool force_all) if (force_all) bitmap_fill(lpriv->dirty_bitmap, imsic->global.nr_ids + 1); if (!__imsic_local_sync(lpriv)) - __imsic_local_timer_start(lpriv); + __imsic_local_timer_start(lpriv, smp_processor_id()); raw_spin_unlock_irqrestore(&lpriv->lock, flags); } @@ -278,7 +278,7 @@ static void __imsic_remote_sync(struct imsic_local_priv *lpriv, unsigned int cpu return; } - __imsic_local_timer_start(lpriv); + __imsic_local_timer_start(lpriv, cpu); } } #else From 92ec4855034b2c4d13f117558dc73d20581fa9ff Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 14 May 2025 14:48:05 +0200 Subject: [PATCH 1862/2924] mlxsw: spectrum_router: Fix use-after-free when deleting GRE net devices The driver only offloads neighbors that are constructed on top of net devices registered by it or their uppers (which are all Ethernet). The device supports GRE encapsulation and decapsulation of forwarded traffic, but the driver will not offload dummy neighbors constructed on top of GRE net devices as they are not uppers of its net devices: # ip link add name gre1 up type gre tos inherit local 192.0.2.1 remote 198.51.100.1 # ip neigh add 0.0.0.0 lladdr 0.0.0.0 nud noarp dev gre1 $ ip neigh show dev gre1 nud noarp 0.0.0.0 lladdr 0.0.0.0 NOARP (Note that the neighbor is not marked with 'offload') When the driver is reloaded and the existing configuration is replayed, the driver does not perform the same check regarding existing neighbors and offloads the previously added one: # devlink dev reload pci/0000:01:00.0 $ ip neigh show dev gre1 nud noarp 0.0.0.0 lladdr 0.0.0.0 offload NOARP If the neighbor is later deleted, the driver will ignore the notification (given the GRE net device is not its upper) and will therefore keep referencing freed memory, resulting in a use-after-free [1] when the net device is deleted: # ip neigh del 0.0.0.0 lladdr 0.0.0.0 dev gre1 # ip link del dev gre1 Fix by skipping neighbor replay if the net device for which the replay is performed is not our upper. [1] BUG: KASAN: slab-use-after-free in mlxsw_sp_neigh_entry_update+0x1ea/0x200 Read of size 8 at addr ffff888155b0e420 by task ip/2282 [...] Call Trace: dump_stack_lvl+0x6f/0xa0 print_address_description.constprop.0+0x6f/0x350 print_report+0x108/0x205 kasan_report+0xdf/0x110 mlxsw_sp_neigh_entry_update+0x1ea/0x200 mlxsw_sp_router_rif_gone_sync+0x2a8/0x440 mlxsw_sp_rif_destroy+0x1e9/0x750 mlxsw_sp_netdevice_ipip_ol_event+0x3c9/0xdc0 mlxsw_sp_router_netdevice_event+0x3ac/0x15e0 notifier_call_chain+0xca/0x150 call_netdevice_notifiers_info+0x7f/0x100 unregister_netdevice_many_notify+0xc8c/0x1d90 rtnl_dellink+0x34e/0xa50 rtnetlink_rcv_msg+0x6fb/0xb70 netlink_rcv_skb+0x131/0x360 netlink_unicast+0x426/0x710 netlink_sendmsg+0x75a/0xc20 __sock_sendmsg+0xc1/0x150 ____sys_sendmsg+0x5aa/0x7b0 ___sys_sendmsg+0xfc/0x180 __sys_sendmsg+0x121/0x1b0 do_syscall_64+0xbb/0x1d0 entry_SYSCALL_64_after_hwframe+0x4b/0x53 Fixes: 8fdb09a7674c ("mlxsw: spectrum_router: Replay neighbours when RIF is made") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: Petr Machata Link: https://patch.msgid.link/c53c02c904fde32dad484657be3b1477884e9ad6.1747225701.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 464821dd492dae..a2033837182e86 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3014,6 +3014,9 @@ static int mlxsw_sp_neigh_rif_made_sync(struct mlxsw_sp *mlxsw_sp, .rif = rif, }; + if (!mlxsw_sp_dev_lower_is_port(mlxsw_sp_rif_dev(rif))) + return 0; + neigh_for_each(&arp_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms); if (rms.err) goto err_arp; From 325eb217e41fa14f307c7cc702bd18d0bb38fe84 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 13 May 2025 23:29:08 -0700 Subject: [PATCH 1863/2924] bnxt_en: bring back rtnl_lock() in the bnxt_open() path Error recovery, PCIe AER, resume, and TX timeout will invoke bnxt_open() with netdev_lock only. This will cause RTNL assert failure in netif_set_real_num_tx_queues(), netif_set_real_num_tx_queues(), and netif_set_real_num_tx_queues(). Example error recovery assert: RTNL: assertion failed at net/core/dev.c (3178) WARNING: CPU: 3 PID: 3392 at net/core/dev.c:3178 netif_set_real_num_tx_queues+0x1fd/0x210 Call Trace: ? __pfx_bnxt_msix+0x10/0x10 [bnxt_en] __bnxt_open_nic+0x1ef/0xb20 [bnxt_en] bnxt_open+0xda/0x130 [bnxt_en] bnxt_fw_reset_task+0x21f/0x780 [bnxt_en] process_scheduled_works+0x9d/0x400 For now, bring back rtnl_lock() in all these code paths that can invoke bnxt_open(). In the bnxt_queue_start() error path, we don't have rtnl_lock held so we just change it to call netif_close() instead of bnxt_reset_task() for simplicity. This error path is unlikely so it should be fine. Fixes: 004b5008016a ("eth: bnxt: remove most dependencies on RTNL") Reviewed-by: Kalesh AP Reviewed-by: Pavan Chebbi Reviewed-by: Andy Gospodarek Signed-off-by: Michael Chan Link: https://patch.msgid.link/20250514062908.2766677-1-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 36 ++++++++++++++++++----- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 86a5de44b6f3fe..6afc2ab6fad228 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -14013,13 +14013,28 @@ static void bnxt_unlock_sp(struct bnxt *bp) netdev_unlock(bp->dev); } +/* Same as bnxt_lock_sp() with additional rtnl_lock */ +static void bnxt_rtnl_lock_sp(struct bnxt *bp) +{ + clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state); + rtnl_lock(); + netdev_lock(bp->dev); +} + +static void bnxt_rtnl_unlock_sp(struct bnxt *bp) +{ + set_bit(BNXT_STATE_IN_SP_TASK, &bp->state); + netdev_unlock(bp->dev); + rtnl_unlock(); +} + /* Only called from bnxt_sp_task() */ static void bnxt_reset(struct bnxt *bp, bool silent) { - bnxt_lock_sp(bp); + bnxt_rtnl_lock_sp(bp); if (test_bit(BNXT_STATE_OPEN, &bp->state)) bnxt_reset_task(bp, silent); - bnxt_unlock_sp(bp); + bnxt_rtnl_unlock_sp(bp); } /* Only called from bnxt_sp_task() */ @@ -14027,9 +14042,9 @@ static void bnxt_rx_ring_reset(struct bnxt *bp) { int i; - bnxt_lock_sp(bp); + bnxt_rtnl_lock_sp(bp); if (!test_bit(BNXT_STATE_OPEN, &bp->state)) { - bnxt_unlock_sp(bp); + bnxt_rtnl_unlock_sp(bp); return; } /* Disable and flush TPA before resetting the RX ring */ @@ -14068,7 +14083,7 @@ static void bnxt_rx_ring_reset(struct bnxt *bp) } if (bp->flags & BNXT_FLAG_TPA) bnxt_set_tpa(bp, true); - bnxt_unlock_sp(bp); + bnxt_rtnl_unlock_sp(bp); } static void bnxt_fw_fatal_close(struct bnxt *bp) @@ -14960,15 +14975,17 @@ static void bnxt_fw_reset_task(struct work_struct *work) bp->fw_reset_state = BNXT_FW_RESET_STATE_OPENING; fallthrough; case BNXT_FW_RESET_STATE_OPENING: - while (!netdev_trylock(bp->dev)) { + while (!rtnl_trylock()) { bnxt_queue_fw_reset_work(bp, HZ / 10); return; } + netdev_lock(bp->dev); rc = bnxt_open(bp->dev); if (rc) { netdev_err(bp->dev, "bnxt_open() failed during FW reset\n"); bnxt_fw_reset_abort(bp, rc); netdev_unlock(bp->dev); + rtnl_unlock(); goto ulp_start; } @@ -14988,6 +15005,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) bnxt_dl_health_fw_status_update(bp, true); } netdev_unlock(bp->dev); + rtnl_unlock(); bnxt_ulp_start(bp, 0); bnxt_reenable_sriov(bp); netdev_lock(bp->dev); @@ -15936,7 +15954,7 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) rc); napi_enable_locked(&bnapi->napi); bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons); - bnxt_reset_task(bp, true); + netif_close(dev); return rc; } @@ -16752,6 +16770,7 @@ static int bnxt_resume(struct device *device) struct bnxt *bp = netdev_priv(dev); int rc = 0; + rtnl_lock(); netdev_lock(dev); rc = pci_enable_device(bp->pdev); if (rc) { @@ -16796,6 +16815,7 @@ static int bnxt_resume(struct device *device) resume_exit: netdev_unlock(bp->dev); + rtnl_unlock(); bnxt_ulp_start(bp, rc); if (!rc) bnxt_reenable_sriov(bp); @@ -16961,6 +16981,7 @@ static void bnxt_io_resume(struct pci_dev *pdev) int err; netdev_info(bp->dev, "PCI Slot Resume\n"); + rtnl_lock(); netdev_lock(netdev); err = bnxt_hwrm_func_qcaps(bp); @@ -16978,6 +16999,7 @@ static void bnxt_io_resume(struct pci_dev *pdev) netif_device_attach(netdev); netdev_unlock(netdev); + rtnl_unlock(); bnxt_ulp_start(bp, err); if (!err) bnxt_reenable_sriov(bp); From 491deb9b8c4ad12fe51d554a69b8165b9ef9429f Mon Sep 17 00:00:00 2001 From: Pengtao He Date: Wed, 14 May 2025 21:20:13 +0800 Subject: [PATCH 1864/2924] net/tls: fix kernel panic when alloc_page failed We cannot set frag_list to NULL pointer when alloc_page failed. It will be used in tls_strp_check_queue_ok when the next time tls_strp_read_sock is called. This is because we don't reset full_len in tls_strp_flush_anchor_copy() so the recv path will try to continue handling the partial record on the next call but we dettached the rcvq from the frag list. Alternative fix would be to reset full_len. Unable to handle kernel NULL pointer dereference at virtual address 0000000000000028 Call trace: tls_strp_check_rcv+0x128/0x27c tls_strp_data_ready+0x34/0x44 tls_data_ready+0x3c/0x1f0 tcp_data_ready+0x9c/0xe4 tcp_data_queue+0xf6c/0x12d0 tcp_rcv_established+0x52c/0x798 Fixes: 84c61fe1a75b ("tls: rx: do not use the standard strparser") Signed-off-by: Pengtao He Link: https://patch.msgid.link/20250514132013.17274-1-hept.hept.hept@gmail.com Signed-off-by: Jakub Kicinski --- net/tls/tls_strp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c index 77e33e1e340e31..65b0da6fdf6a79 100644 --- a/net/tls/tls_strp.c +++ b/net/tls/tls_strp.c @@ -396,7 +396,6 @@ static int tls_strp_read_copy(struct tls_strparser *strp, bool qshort) return 0; shinfo = skb_shinfo(strp->anchor); - shinfo->frag_list = NULL; /* If we don't know the length go max plus page for cipher overhead */ need_spc = strp->stm.full_len ?: TLS_MAX_PAYLOAD_SIZE + PAGE_SIZE; @@ -412,6 +411,8 @@ static int tls_strp_read_copy(struct tls_strparser *strp, bool qshort) page, 0, 0); } + shinfo->frag_list = NULL; + strp->copy_mode = 1; strp->stm.offset = 0; From b3ca9eef6646576ad506a96d941d87a69f66732a Mon Sep 17 00:00:00 2001 From: Gerhard Engleder Date: Wed, 14 May 2025 21:56:57 +0200 Subject: [PATCH 1865/2924] tsnep: fix timestamping with a stacked DSA driver This driver is susceptible to a form of the bug explained in commit c26a2c2ddc01 ("gianfar: Fix TX timestamping with a stacked DSA driver") and in Documentation/networking/timestamping.rst section "Other caveats for MAC drivers", specifically it timestamps any skb which has SKBTX_HW_TSTAMP, and does not consider if timestamping has been enabled in adapter->hwtstamp_config.tx_type. Evaluate the proper TX timestamping condition only once on the TX path (in tsnep_xmit_frame_ring()) and store the result in an additional TX entry flag. Evaluate the new TX entry flag in the TX confirmation path (in tsnep_tx_poll()). This way SKBTX_IN_PROGRESS is set by the driver as required, but never evaluated. SKBTX_IN_PROGRESS shall not be evaluated as it can be set by a stacked DSA driver and evaluating it would lead to unwanted timestamps. Fixes: 403f69bbdbad ("tsnep: Add TSN endpoint Ethernet MAC driver") Suggested-by: Vladimir Oltean Signed-off-by: Gerhard Engleder Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20250514195657.25874-1-gerhard@engleder-embedded.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/engleder/tsnep_main.c | 30 ++++++++++++++-------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c index 625245b0845c7a..eba73246f98664 100644 --- a/drivers/net/ethernet/engleder/tsnep_main.c +++ b/drivers/net/ethernet/engleder/tsnep_main.c @@ -67,6 +67,8 @@ #define TSNEP_TX_TYPE_XDP_NDO_MAP_PAGE (TSNEP_TX_TYPE_XDP_NDO | TSNEP_TX_TYPE_MAP_PAGE) #define TSNEP_TX_TYPE_XDP (TSNEP_TX_TYPE_XDP_TX | TSNEP_TX_TYPE_XDP_NDO) #define TSNEP_TX_TYPE_XSK BIT(12) +#define TSNEP_TX_TYPE_TSTAMP BIT(13) +#define TSNEP_TX_TYPE_SKB_TSTAMP (TSNEP_TX_TYPE_SKB | TSNEP_TX_TYPE_TSTAMP) #define TSNEP_XDP_TX BIT(0) #define TSNEP_XDP_REDIRECT BIT(1) @@ -386,8 +388,7 @@ static void tsnep_tx_activate(struct tsnep_tx *tx, int index, int length, if (entry->skb) { entry->properties = length & TSNEP_DESC_LENGTH_MASK; entry->properties |= TSNEP_DESC_INTERRUPT_FLAG; - if ((entry->type & TSNEP_TX_TYPE_SKB) && - (skb_shinfo(entry->skb)->tx_flags & SKBTX_IN_PROGRESS)) + if ((entry->type & TSNEP_TX_TYPE_SKB_TSTAMP) == TSNEP_TX_TYPE_SKB_TSTAMP) entry->properties |= TSNEP_DESC_EXTENDED_WRITEBACK_FLAG; /* toggle user flag to prevent false acknowledge @@ -479,7 +480,8 @@ static int tsnep_tx_map_frag(skb_frag_t *frag, struct tsnep_tx_entry *entry, return mapped; } -static int tsnep_tx_map(struct sk_buff *skb, struct tsnep_tx *tx, int count) +static int tsnep_tx_map(struct sk_buff *skb, struct tsnep_tx *tx, int count, + bool do_tstamp) { struct device *dmadev = tx->adapter->dmadev; struct tsnep_tx_entry *entry; @@ -505,6 +507,9 @@ static int tsnep_tx_map(struct sk_buff *skb, struct tsnep_tx *tx, int count) entry->type = TSNEP_TX_TYPE_SKB_INLINE; mapped = 0; } + + if (do_tstamp) + entry->type |= TSNEP_TX_TYPE_TSTAMP; } else { skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; @@ -558,11 +563,12 @@ static int tsnep_tx_unmap(struct tsnep_tx *tx, int index, int count) static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb, struct tsnep_tx *tx) { - int count = 1; struct tsnep_tx_entry *entry; + bool do_tstamp = false; + int count = 1; int length; - int i; int retval; + int i; if (skb_shinfo(skb)->nr_frags > 0) count += skb_shinfo(skb)->nr_frags; @@ -579,7 +585,13 @@ static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb, entry = &tx->entry[tx->write]; entry->skb = skb; - retval = tsnep_tx_map(skb, tx, count); + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + tx->adapter->hwtstamp_config.tx_type == HWTSTAMP_TX_ON) { + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + do_tstamp = true; + } + + retval = tsnep_tx_map(skb, tx, count, do_tstamp); if (retval < 0) { tsnep_tx_unmap(tx, tx->write, count); dev_kfree_skb_any(entry->skb); @@ -591,9 +603,6 @@ static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb, } length = retval; - if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) - skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; - for (i = 0; i < count; i++) tsnep_tx_activate(tx, (tx->write + i) & TSNEP_RING_MASK, length, i == count - 1); @@ -844,8 +853,7 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget) length = tsnep_tx_unmap(tx, tx->read, count); - if ((entry->type & TSNEP_TX_TYPE_SKB) && - (skb_shinfo(entry->skb)->tx_flags & SKBTX_IN_PROGRESS) && + if (((entry->type & TSNEP_TX_TYPE_SKB_TSTAMP) == TSNEP_TX_TYPE_SKB_TSTAMP) && (__le32_to_cpu(entry->desc_wb->properties) & TSNEP_DESC_EXTENDED_WRITEBACK_FLAG)) { struct skb_shared_hwtstamps hwtstamps; From 0afc44d8cdf6029cce0a92873f0de5ac9416cec8 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Wed, 14 May 2025 15:40:28 +0000 Subject: [PATCH 1866/2924] net: devmem: fix kernel panic when netlink socket close after module unload Kernel panic occurs when a devmem TCP socket is closed after NIC module is unloaded. This is Devmem TCP unregistration scenarios. number is an order. (a)netlink socket close (b)pp destroy (c)uninstall result 1 2 3 OK 1 3 2 (d)Impossible 2 1 3 OK 3 1 2 (e)Kernel panic 2 3 1 (d)Impossible 3 2 1 (d)Impossible (a) netdev_nl_sock_priv_destroy() is called when devmem TCP socket is closed. (b) page_pool_destroy() is called when the interface is down. (c) mp_ops->uninstall() is called when an interface is unregistered. (d) There is no scenario in mp_ops->uninstall() is called before page_pool_destroy(). Because unregister_netdevice_many_notify() closes interfaces first and then calls mp_ops->uninstall(). (e) netdev_nl_sock_priv_destroy() accesses struct net_device to acquire netdev_lock(). But if the interface module has already been removed, net_device pointer is invalid, so it causes kernel panic. In summary, there are only 3 possible scenarios. A. sk close -> pp destroy -> uninstall. B. pp destroy -> sk close -> uninstall. C. pp destroy -> uninstall -> sk close. Case C is a kernel panic scenario. In order to fix this problem, It makes mp_dmabuf_devmem_uninstall() set binding->dev to NULL. It indicates an bound net_device was unregistered. It makes netdev_nl_sock_priv_destroy() do not acquire netdev_lock() if binding->dev is NULL. A new binding->lock is added to protect a dev of a binding. So, lock ordering is like below. priv->lock netdev_lock(dev) binding->lock Tests: Scenario A: ./ncdevmem -s 192.168.1.4 -c 192.168.1.2 -f $interface -l -p 8000 \ -v 7 -t 1 -q 1 & pid=$! sleep 10 kill $pid ip link set $interface down modprobe -rv $module Scenario B: ./ncdevmem -s 192.168.1.4 -c 192.168.1.2 -f $interface -l -p 8000 \ -v 7 -t 1 -q 1 & pid=$! sleep 10 ip link set $interface down kill $pid modprobe -rv $module Scenario C: ./ncdevmem -s 192.168.1.4 -c 192.168.1.2 -f $interface -l -p 8000 \ -v 7 -t 1 -q 1 & pid=$! sleep 10 modprobe -rv $module sleep 5 kill $pid Splat looks like: Oops: general protection fault, probably for non-canonical address 0xdffffc001fffa9f7: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN NOPTI KASAN: probably user-memory-access in range [0x00000000fffd4fb8-0x00000000fffd4fbf] CPU: 0 UID: 0 PID: 2041 Comm: ncdevmem Tainted: G B W 6.15.0-rc1+ #2 PREEMPT(undef) 0947ec89efa0fd68838b78e36aa1617e97ff5d7f Tainted: [B]=BAD_PAGE, [W]=WARN RIP: 0010:__mutex_lock (./include/linux/sched.h:2244 kernel/locking/mutex.c:400 kernel/locking/mutex.c:443 kernel/locking/mutex.c:605 kernel/locking/mutex.c:746) Code: ea 03 80 3c 02 00 0f 85 4f 13 00 00 49 8b 1e 48 83 e3 f8 74 6a 48 b8 00 00 00 00 00 fc ff df 48 8d 7b 34 48 89 fa 48 c1 ea 03 <0f> b6 f RSP: 0018:ffff88826f7ef730 EFLAGS: 00010203 RAX: dffffc0000000000 RBX: 00000000fffd4f88 RCX: ffffffffaa9bc811 RDX: 000000001fffa9f7 RSI: 0000000000000008 RDI: 00000000fffd4fbc RBP: ffff88826f7ef8b0 R08: 0000000000000000 R09: ffffed103e6aa1a4 R10: 0000000000000007 R11: ffff88826f7ef442 R12: fffffbfff669f65e R13: ffff88812a830040 R14: ffff8881f3550d20 R15: 00000000fffd4f88 FS: 0000000000000000(0000) GS:ffff888866c05000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000563bed0cb288 CR3: 00000001a7c98000 CR4: 00000000007506f0 PKRU: 55555554 Call Trace: ... netdev_nl_sock_priv_destroy (net/core/netdev-genl.c:953 (discriminator 3)) genl_release (net/netlink/genetlink.c:653 net/netlink/genetlink.c:694 net/netlink/genetlink.c:705) ... netlink_release (net/netlink/af_netlink.c:737) ... __sock_release (net/socket.c:647) sock_close (net/socket.c:1393) Fixes: 1d22d3060b9b ("net: drop rtnl_lock for queue_mgmt operations") Signed-off-by: Taehee Yoo Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250514154028.1062909-1-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- net/core/devmem.c | 7 +++++++ net/core/devmem.h | 2 ++ net/core/netdev-genl.c | 11 +++++++++++ 3 files changed, 20 insertions(+) diff --git a/net/core/devmem.c b/net/core/devmem.c index 6e27a47d049354..2db428ab6b8be4 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -200,6 +200,8 @@ net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, refcount_set(&binding->ref, 1); + mutex_init(&binding->lock); + binding->dmabuf = dmabuf; binding->attachment = dma_buf_attach(binding->dmabuf, dev->dev.parent); @@ -379,6 +381,11 @@ static void mp_dmabuf_devmem_uninstall(void *mp_priv, xa_for_each(&binding->bound_rxqs, xa_idx, bound_rxq) { if (bound_rxq == rxq) { xa_erase(&binding->bound_rxqs, xa_idx); + if (xa_empty(&binding->bound_rxqs)) { + mutex_lock(&binding->lock); + binding->dev = NULL; + mutex_unlock(&binding->lock); + } break; } } diff --git a/net/core/devmem.h b/net/core/devmem.h index 7fc158d527293b..a1aabc9685cc67 100644 --- a/net/core/devmem.h +++ b/net/core/devmem.h @@ -20,6 +20,8 @@ struct net_devmem_dmabuf_binding { struct sg_table *sgt; struct net_device *dev; struct gen_pool *chunk_pool; + /* Protect dev */ + struct mutex lock; /* The user holds a ref (via the netlink API) for as long as they want * the binding to remain alive. Each page pool using this binding holds diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index dae9f0d432fb34..a877693fecd65a 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -979,14 +979,25 @@ void netdev_nl_sock_priv_destroy(struct netdev_nl_sock *priv) { struct net_devmem_dmabuf_binding *binding; struct net_devmem_dmabuf_binding *temp; + netdevice_tracker dev_tracker; struct net_device *dev; mutex_lock(&priv->lock); list_for_each_entry_safe(binding, temp, &priv->bindings, list) { + mutex_lock(&binding->lock); dev = binding->dev; + if (!dev) { + mutex_unlock(&binding->lock); + net_devmem_unbind_dmabuf(binding); + continue; + } + netdev_hold(dev, &dev_tracker, GFP_KERNEL); + mutex_unlock(&binding->lock); + netdev_lock(dev); net_devmem_unbind_dmabuf(binding); netdev_unlock(dev); + netdev_put(dev, &dev_tracker); } mutex_unlock(&priv->lock); } From 99bcd91fabada0dbb1d5f0de44532d8008db93c6 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 8 May 2025 16:44:52 +0300 Subject: [PATCH 1867/2924] perf/x86/intel: Fix segfault with PEBS-via-PT with sample_freq Currently, using PEBS-via-PT with a sample frequency instead of a sample period, causes a segfault. For example: BUG: kernel NULL pointer dereference, address: 0000000000000195 ? __die_body.cold+0x19/0x27 ? page_fault_oops+0xca/0x290 ? exc_page_fault+0x7e/0x1b0 ? asm_exc_page_fault+0x26/0x30 ? intel_pmu_pebs_event_update_no_drain+0x40/0x60 ? intel_pmu_pebs_event_update_no_drain+0x32/0x60 intel_pmu_drain_pebs_icl+0x333/0x350 handle_pmi_common+0x272/0x3c0 intel_pmu_handle_irq+0x10a/0x2e0 perf_event_nmi_handler+0x2a/0x50 That happens because intel_pmu_pebs_event_update_no_drain() assumes all the pebs_enabled bits represent counter indexes, which is not always the case. In this particular case, bits 60 and 61 are set for PEBS-via-PT purposes. The behaviour of PEBS-via-PT with sample frequency is questionable because although a PMI is generated (PEBS_PMI_AFTER_EACH_RECORD), the period is not adjusted anyway. Putting that aside, fix intel_pmu_pebs_event_update_no_drain() by passing the mask of counter bits instead of 'size'. Note, prior to the Fixes commit, 'size' would be limited to the maximum counter index, so the issue was not hit. Fixes: 722e42e45c2f1 ("perf/x86: Support counter mask") Signed-off-by: Adrian Hunter Signed-off-by: Ingo Molnar Reviewed-by: Kan Liang Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Cc: Ian Rogers Cc: linux-perf-users@vger.kernel.org Link: https://lore.kernel.org/r/20250508134452.73960-1-adrian.hunter@intel.com --- arch/x86/events/intel/ds.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 9b20acc0e932e2..8d86e91bd5e5c3 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -2465,8 +2465,9 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_ setup_pebs_fixed_sample_data); } -static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size) +static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, u64 mask) { + u64 pebs_enabled = cpuc->pebs_enabled & mask; struct perf_event *event; int bit; @@ -2477,7 +2478,7 @@ static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int * It needs to call intel_pmu_save_and_restart_reload() to * update the event->count for this case. */ - for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, size) { + for_each_set_bit(bit, (unsigned long *)&pebs_enabled, X86_PMC_IDX_MAX) { event = cpuc->events[bit]; if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) intel_pmu_save_and_restart_reload(event, 0); @@ -2512,7 +2513,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d } if (unlikely(base >= top)) { - intel_pmu_pebs_event_update_no_drain(cpuc, size); + intel_pmu_pebs_event_update_no_drain(cpuc, mask); return; } @@ -2626,7 +2627,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d (hybrid(cpuc->pmu, fixed_cntr_mask64) << INTEL_PMC_IDX_FIXED); if (unlikely(base >= top)) { - intel_pmu_pebs_event_update_no_drain(cpuc, X86_PMC_IDX_MAX); + intel_pmu_pebs_event_update_no_drain(cpuc, mask); return; } From dd24f87f65c957f30e605e44961d2fd53a44c780 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 16 May 2025 00:26:01 +0800 Subject: [PATCH 1868/2924] ublk: fix dead loop when canceling io command Commit: f40139fde527 ("ublk: fix race between io_uring_cmd_complete_in_task and ublk_cancel_cmd") adds a request state check in ublk_cancel_cmd(), and if the request is started, skips canceling this uring_cmd. However, the current uring_cmd may be in ACTIVE state, without block request coming to the uring command. Meantime, if the cached request in tag_set.tags[tag] has been delivered to ublk server and reycycled, then this uring_cmd can't be canceled. ublk requests are aborted in ublk char device release handler, which depends on canceling all ACTIVE uring_cmd. So it causes a dead loop. Fix this issue by not taking a stale request into account when canceling uring_cmd in ublk_cancel_cmd(). Reported-by: Shinichiro Kawasaki Closes: https://lore.kernel.org/linux-block/mruqwpf4tqenkbtgezv5oxwq7ngyq24jzeyqy4ixzvivatbbxv@4oh2wzz4e6qn/ Fixes: f40139fde527 ("ublk: fix race between io_uring_cmd_complete_in_task and ublk_cancel_cmd") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250515162601.77346-1-ming.lei@redhat.com [axboe: rewording of commit message] Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index f9032076bc0615..dc104c025cd568 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1708,7 +1708,7 @@ static void ublk_cancel_cmd(struct ublk_queue *ubq, unsigned tag, * that ublk_dispatch_req() is always called */ req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag); - if (req && blk_mq_request_started(req)) + if (req && blk_mq_request_started(req) && req->tag == tag) return; spin_lock(&ubq->cancel_lock); From 7af8479d9eb4319b4ba7b47a8c4d2c55af1c31e1 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 7 May 2025 15:00:30 -0400 Subject: [PATCH 1869/2924] Bluetooth: L2CAP: Fix not checking l2cap_chan security level l2cap_check_enc_key_size shall check the security level of the l2cap_chan rather than the hci_conn since for incoming connection request that may be different as hci_conn may already been encrypted using a different security level. Fixes: 522e9ed157e3 ("Bluetooth: l2cap: Check encryption key size on incoming connection") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 73472756618a0c..042d3ac3b4a38e 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1411,7 +1411,8 @@ static void l2cap_request_info(struct l2cap_conn *conn) sizeof(req), &req); } -static bool l2cap_check_enc_key_size(struct hci_conn *hcon) +static bool l2cap_check_enc_key_size(struct hci_conn *hcon, + struct l2cap_chan *chan) { /* The minimum encryption key size needs to be enforced by the * host stack before establishing any L2CAP connections. The @@ -1425,7 +1426,7 @@ static bool l2cap_check_enc_key_size(struct hci_conn *hcon) int min_key_size = hcon->hdev->min_enc_key_size; /* On FIPS security level, key size must be 16 bytes */ - if (hcon->sec_level == BT_SECURITY_FIPS) + if (chan->sec_level == BT_SECURITY_FIPS) min_key_size = 16; return (!test_bit(HCI_CONN_ENCRYPT, &hcon->flags) || @@ -1453,7 +1454,7 @@ static void l2cap_do_start(struct l2cap_chan *chan) !__l2cap_no_conn_pending(chan)) return; - if (l2cap_check_enc_key_size(conn->hcon)) + if (l2cap_check_enc_key_size(conn->hcon, chan)) l2cap_start_connection(chan); else __set_chan_timer(chan, L2CAP_DISC_TIMEOUT); @@ -1528,7 +1529,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn) continue; } - if (l2cap_check_enc_key_size(conn->hcon)) + if (l2cap_check_enc_key_size(conn->hcon, chan)) l2cap_start_connection(chan); else l2cap_chan_close(chan, ECONNREFUSED); @@ -3992,7 +3993,7 @@ static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, /* Check if the ACL is secure enough (if not SDP) */ if (psm != cpu_to_le16(L2CAP_PSM_SDP) && (!hci_conn_check_link_mode(conn->hcon) || - !l2cap_check_enc_key_size(conn->hcon))) { + !l2cap_check_enc_key_size(conn->hcon, pchan))) { conn->disc_reason = HCI_ERROR_AUTH_FAILURE; result = L2CAP_CR_SEC_BLOCK; goto response; @@ -7352,7 +7353,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) } if (chan->state == BT_CONNECT) { - if (!status && l2cap_check_enc_key_size(hcon)) + if (!status && l2cap_check_enc_key_size(hcon, chan)) l2cap_start_connection(chan); else __set_chan_timer(chan, L2CAP_DISC_TIMEOUT); @@ -7362,7 +7363,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) struct l2cap_conn_rsp rsp; __u16 res, stat; - if (!status && l2cap_check_enc_key_size(hcon)) { + if (!status && l2cap_check_enc_key_size(hcon, chan)) { if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) { res = L2CAP_CR_PEND; stat = L2CAP_CS_AUTHOR_PEND; From 4bcb0c7dc25446b99fc7a8fa2a143d69f3314162 Mon Sep 17 00:00:00 2001 From: En-Wei Wu Date: Thu, 8 May 2025 22:15:20 +0800 Subject: [PATCH 1870/2924] Bluetooth: btusb: use skb_pull to avoid unsafe access in QCA dump handling Use skb_pull() and skb_pull_data() to safely parse QCA dump packets. This avoids direct pointer math on skb->data, which could lead to invalid access if the packet is shorter than expected. Fixes: 20981ce2d5a5 ("Bluetooth: btusb: Add WCN6855 devcoredump support") Signed-off-by: En-Wei Wu Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 98 ++++++++++++++++----------------------- 1 file changed, 40 insertions(+), 58 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index a42dedb78e0a07..256b451bbe065f 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -3014,9 +3014,8 @@ static void btusb_coredump_qca(struct hci_dev *hdev) static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) { int ret = 0; + unsigned int skip = 0; u8 pkt_type; - u8 *sk_ptr; - unsigned int sk_len; u16 seqno; u32 dump_size; @@ -3025,18 +3024,13 @@ static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) struct usb_device *udev = btdata->udev; pkt_type = hci_skb_pkt_type(skb); - sk_ptr = skb->data; - sk_len = skb->len; + skip = sizeof(struct hci_event_hdr); + if (pkt_type == HCI_ACLDATA_PKT) + skip += sizeof(struct hci_acl_hdr); - if (pkt_type == HCI_ACLDATA_PKT) { - sk_ptr += HCI_ACL_HDR_SIZE; - sk_len -= HCI_ACL_HDR_SIZE; - } - - sk_ptr += HCI_EVENT_HDR_SIZE; - sk_len -= HCI_EVENT_HDR_SIZE; + skb_pull(skb, skip); + dump_hdr = (struct qca_dump_hdr *)skb->data; - dump_hdr = (struct qca_dump_hdr *)sk_ptr; seqno = le16_to_cpu(dump_hdr->seqno); if (seqno == 0) { set_bit(BTUSB_HW_SSR_ACTIVE, &btdata->flags); @@ -3056,16 +3050,15 @@ static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) btdata->qca_dump.ram_dump_size = dump_size; btdata->qca_dump.ram_dump_seqno = 0; - sk_ptr += offsetof(struct qca_dump_hdr, data0); - sk_len -= offsetof(struct qca_dump_hdr, data0); + + skb_pull(skb, offsetof(struct qca_dump_hdr, data0)); usb_disable_autosuspend(udev); bt_dev_info(hdev, "%s memdump size(%u)\n", (pkt_type == HCI_ACLDATA_PKT) ? "ACL" : "event", dump_size); } else { - sk_ptr += offsetof(struct qca_dump_hdr, data); - sk_len -= offsetof(struct qca_dump_hdr, data); + skb_pull(skb, offsetof(struct qca_dump_hdr, data)); } if (!btdata->qca_dump.ram_dump_size) { @@ -3085,7 +3078,6 @@ static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) return ret; } - skb_pull(skb, skb->len - sk_len); hci_devcd_append(hdev, skb); btdata->qca_dump.ram_dump_seqno++; if (seqno == QCA_LAST_SEQUENCE_NUM) { @@ -3113,68 +3105,58 @@ static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) /* Return: true if the ACL packet is a dump packet, false otherwise. */ static bool acl_pkt_is_dump_qca(struct hci_dev *hdev, struct sk_buff *skb) { - u8 *sk_ptr; - unsigned int sk_len; - struct hci_event_hdr *event_hdr; struct hci_acl_hdr *acl_hdr; struct qca_dump_hdr *dump_hdr; + struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); + bool is_dump = false; - sk_ptr = skb->data; - sk_len = skb->len; - - acl_hdr = hci_acl_hdr(skb); - if (le16_to_cpu(acl_hdr->handle) != QCA_MEMDUMP_ACL_HANDLE) + if (!clone) return false; - sk_ptr += HCI_ACL_HDR_SIZE; - sk_len -= HCI_ACL_HDR_SIZE; - event_hdr = (struct hci_event_hdr *)sk_ptr; - - if ((event_hdr->evt != HCI_VENDOR_PKT) || - (event_hdr->plen != (sk_len - HCI_EVENT_HDR_SIZE))) - return false; + acl_hdr = skb_pull_data(clone, sizeof(*acl_hdr)); + if (!acl_hdr || (le16_to_cpu(acl_hdr->handle) != QCA_MEMDUMP_ACL_HANDLE)) + goto out; - sk_ptr += HCI_EVENT_HDR_SIZE; - sk_len -= HCI_EVENT_HDR_SIZE; + event_hdr = skb_pull_data(clone, sizeof(*event_hdr)); + if (!event_hdr || (event_hdr->evt != HCI_VENDOR_PKT)) + goto out; - dump_hdr = (struct qca_dump_hdr *)sk_ptr; - if ((sk_len < offsetof(struct qca_dump_hdr, data)) || - (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) || - (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE)) - return false; + dump_hdr = skb_pull_data(clone, sizeof(*dump_hdr)); + if (!dump_hdr || (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) || + (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE)) + goto out; - return true; + is_dump = true; +out: + consume_skb(clone); + return is_dump; } /* Return: true if the event packet is a dump packet, false otherwise. */ static bool evt_pkt_is_dump_qca(struct hci_dev *hdev, struct sk_buff *skb) { - u8 *sk_ptr; - unsigned int sk_len; - struct hci_event_hdr *event_hdr; struct qca_dump_hdr *dump_hdr; + struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); + bool is_dump = false; - sk_ptr = skb->data; - sk_len = skb->len; - - event_hdr = hci_event_hdr(skb); - - if ((event_hdr->evt != HCI_VENDOR_PKT) - || (event_hdr->plen != (sk_len - HCI_EVENT_HDR_SIZE))) + if (!clone) return false; - sk_ptr += HCI_EVENT_HDR_SIZE; - sk_len -= HCI_EVENT_HDR_SIZE; + event_hdr = skb_pull_data(clone, sizeof(*event_hdr)); + if (!event_hdr || (event_hdr->evt != HCI_VENDOR_PKT)) + goto out; - dump_hdr = (struct qca_dump_hdr *)sk_ptr; - if ((sk_len < offsetof(struct qca_dump_hdr, data)) || - (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) || - (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE)) - return false; + dump_hdr = skb_pull_data(clone, sizeof(*dump_hdr)); + if (!dump_hdr || (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) || + (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE)) + goto out; - return true; + is_dump = true; +out: + consume_skb(clone); + return is_dump; } static int btusb_recv_acl_qca(struct hci_dev *hdev, struct sk_buff *skb) From 93a81ca0657758b607c3f4ba889ae806be9beb73 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 16 May 2025 10:08:16 +0200 Subject: [PATCH 1871/2924] ALSA: pcm: Fix race of buffer access at PCM OSS layer The PCM OSS layer tries to clear the buffer with the silence data at initialization (or reconfiguration) of a stream with the explicit call of snd_pcm_format_set_silence() with runtime->dma_area. But this may lead to a UAF because the accessed runtime->dma_area might be freed concurrently, as it's performed outside the PCM ops. For avoiding it, move the code into the PCM core and perform it inside the buffer access lock, so that it won't be changed during the operation. Reported-by: syzbot+32d4647f551007595173@syzkaller.appspotmail.com Closes: https://lore.kernel.org/68164d8e.050a0220.11da1b.0019.GAE@google.com Cc: Link: https://patch.msgid.link/20250516080817.20068-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/pcm.h | 2 ++ sound/core/oss/pcm_oss.c | 3 +-- sound/core/pcm_native.c | 11 +++++++++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 8becb450488736..8582d22f381847 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -1404,6 +1404,8 @@ int snd_pcm_lib_mmap_iomem(struct snd_pcm_substream *substream, struct vm_area_s #define snd_pcm_lib_mmap_iomem NULL #endif +void snd_pcm_runtime_buffer_set_silence(struct snd_pcm_runtime *runtime); + /** * snd_pcm_limit_isa_dma_size - Get the max size fitting with ISA DMA transfer * @dma: DMA number diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index 4683b9139c566a..4ecb17bd5436e7 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -1074,8 +1074,7 @@ static int snd_pcm_oss_change_params_locked(struct snd_pcm_substream *substream) runtime->oss.params = 0; runtime->oss.prepare = 1; runtime->oss.buffer_used = 0; - if (runtime->dma_area) - snd_pcm_format_set_silence(runtime->format, runtime->dma_area, bytes_to_samples(runtime, runtime->dma_bytes)); + snd_pcm_runtime_buffer_set_silence(runtime); runtime->oss.period_frames = snd_pcm_alsa_frames(substream, oss_period_size); diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 6c2b6a62d9d2f8..853ac5bb33ff2a 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -723,6 +723,17 @@ static void snd_pcm_buffer_access_unlock(struct snd_pcm_runtime *runtime) atomic_inc(&runtime->buffer_accessing); } +/* fill the PCM buffer with the current silence format; called from pcm_oss.c */ +void snd_pcm_runtime_buffer_set_silence(struct snd_pcm_runtime *runtime) +{ + snd_pcm_buffer_access_lock(runtime); + if (runtime->dma_area) + snd_pcm_format_set_silence(runtime->format, runtime->dma_area, + bytes_to_samples(runtime, runtime->dma_bytes)); + snd_pcm_buffer_access_unlock(runtime); +} +EXPORT_SYMBOL_GPL(snd_pcm_runtime_buffer_set_silence); + #if IS_ENABLED(CONFIG_SND_PCM_OSS) #define is_oss_stream(substream) ((substream)->oss.oss) #else From 03680913744de17fa49e62b1d8f71bab42b0b721 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Fri, 16 May 2025 11:08:10 +0200 Subject: [PATCH 1872/2924] x86/mm: Remove duplicated word in warning message Commit bbeb69ce3013 ("x86/mm: Remove CONFIG_HIGHMEM64G support") introduces a new warning message MSG_HIGHMEM_TRIMMED, which accidentally introduces a duplicated 'for for' in the warning message. Remove this duplicated word. This was noticed while reviewing for references to obsolete kernel build config options. Fixes: bbeb69ce3013 ("x86/mm: Remove CONFIG_HIGHMEM64G support") Signed-off-by: Lukas Bulwahn Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dave Hansen Cc: H. Peter Anvin Cc: Peter Zijlstra Cc: kernel-janitors@vger.kernel.org Link: https://lore.kernel.org/r/20250516090810.556623-1-lukas.bulwahn@redhat.com --- arch/x86/mm/init_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index ad662cc4605c69..148eba50265a59 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -565,7 +565,7 @@ static void __init lowmem_pfn_init(void) "only %luMB highmem pages available, ignoring highmem size of %luMB!\n" #define MSG_HIGHMEM_TRIMMED \ - "Warning: only 4GB will be used. Support for for CONFIG_HIGHMEM64G was removed!\n" + "Warning: only 4GB will be used. Support for CONFIG_HIGHMEM64G was removed!\n" /* * We have more RAM than fits into lowmem - we try to put it into * highmem, also taking the highmem=x boot parameter into account: From 5ad8a4ddc45048bc2fe23b75357b6bf185db004f Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 16 May 2025 14:53:37 +0800 Subject: [PATCH 1873/2924] ALSA: hda/realtek - restore auto-mute mode for Dell Chrome platform This board need to shutdown Class-D amp to avoid EMI issue. Restore the Auto-Mute mode item will off pin control when Auto-mute mode was enable. Signed-off-by: Kailang Yang Links: https://lore.kernel.org/ee8bbe5236464c369719d96269ba8ef8@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8a2b09e4a7d5ac..dcfaddc3ae133f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6830,7 +6830,10 @@ static void alc256_fixup_chromebook(struct hda_codec *codec, switch (action) { case HDA_FIXUP_ACT_PRE_PROBE: - spec->gen.suppress_auto_mute = 1; + if (codec->core.subsystem_id == 0x10280d76) + spec->gen.suppress_auto_mute = 0; + else + spec->gen.suppress_auto_mute = 1; spec->gen.suppress_auto_mic = 1; spec->en_3kpull_low = false; break; From 6d6d7f91cc8c111d40416ac9240a3bb9396c5235 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 10 May 2025 10:50:13 -0400 Subject: [PATCH 1874/2924] NFSv4/pnfs: Reset the layout state after a layoutreturn If there are still layout segments in the layout plh_return_lsegs list after a layout return, we should be resetting the state to ensure they eventually get returned as well. Fixes: 68f744797edd ("pNFS: Do not free layout segments that are marked for return") Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 10fdd065a61c23..fc7c5fb1019806 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -745,6 +745,14 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, return remaining; } +static void pnfs_reset_return_info(struct pnfs_layout_hdr *lo) +{ + struct pnfs_layout_segment *lseg; + + list_for_each_entry(lseg, &lo->plh_return_segs, pls_list) + pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0); +} + static void pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo, struct list_head *free_me, @@ -1292,6 +1300,7 @@ void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, pnfs_mark_matching_lsegs_invalid(lo, &freeme, range, seq); pnfs_free_returned_lsegs(lo, &freeme, range, seq); pnfs_set_layout_stateid(lo, stateid, NULL, true); + pnfs_reset_return_info(lo); } else pnfs_mark_layout_stateid_invalid(lo, &freeme); out_unlock: From 28511504f3ac73ebf45cbbe0dafeca1026e9a8f3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 10 May 2025 11:05:36 -0400 Subject: [PATCH 1875/2924] NFS/pnfs: Fix the error path in pnfs_layoutreturn_retry_later_locked() If there isn't a valid layout, or the layout stateid has changed, the cleanup after a layout return should clear out the old data. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index fc7c5fb1019806..3adb7d0dbec7ac 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1254,21 +1254,15 @@ static void pnfs_clear_layoutcommit(struct inode *inode, static void pnfs_layoutreturn_retry_later_locked(struct pnfs_layout_hdr *lo, const nfs4_stateid *arg_stateid, - const struct pnfs_layout_range *range) + const struct pnfs_layout_range *range, + struct list_head *freeme) { - const struct pnfs_layout_segment *lseg; - u32 seq = be32_to_cpu(arg_stateid->seqid); - if (pnfs_layout_is_valid(lo) && - nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid)) { - list_for_each_entry(lseg, &lo->plh_return_segs, pls_list) { - if (pnfs_seqid_is_newer(lseg->pls_seq, seq) || - !pnfs_should_free_range(&lseg->pls_range, range)) - continue; - pnfs_set_plh_return_info(lo, range->iomode, seq); - break; - } - } + nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid)) + pnfs_reset_return_info(lo); + else + pnfs_mark_layout_stateid_invalid(lo, freeme); + pnfs_clear_layoutreturn_waitbit(lo); } void pnfs_layoutreturn_retry_later(struct pnfs_layout_hdr *lo, @@ -1276,11 +1270,12 @@ void pnfs_layoutreturn_retry_later(struct pnfs_layout_hdr *lo, const struct pnfs_layout_range *range) { struct inode *inode = lo->plh_inode; + LIST_HEAD(freeme); spin_lock(&inode->i_lock); - pnfs_layoutreturn_retry_later_locked(lo, arg_stateid, range); - pnfs_clear_layoutreturn_waitbit(lo); + pnfs_layoutreturn_retry_later_locked(lo, arg_stateid, range, &freeme); spin_unlock(&inode->i_lock); + pnfs_free_lseg_list(&freeme); } void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, @@ -1716,6 +1711,7 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args, struct inode *inode = args->inode; const nfs4_stateid *res_stateid = NULL; struct nfs4_xdr_opaque_data *ld_private = args->ld_private; + LIST_HEAD(freeme); switch (ret) { case -NFS4ERR_BADSESSION: @@ -1724,9 +1720,9 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args, case -NFS4ERR_NOMATCHING_LAYOUT: spin_lock(&inode->i_lock); pnfs_layoutreturn_retry_later_locked(lo, &args->stateid, - &args->range); - pnfs_clear_layoutreturn_waitbit(lo); + &args->range, &freeme); spin_unlock(&inode->i_lock); + pnfs_free_lseg_list(&freeme); break; case 0: if (res->lrs_present) From dcd21b609d4abc7303f8683bce4f35d78d7d6830 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 27 Apr 2025 18:21:06 -0400 Subject: [PATCH 1876/2924] NFS: Avoid flushing data while holding directory locks in nfs_rename() The Linux client assumes that all filehandles are non-volatile for renames within the same directory (otherwise sillyrename cannot work). However, the existence of the Linux 'subtree_check' export option has meant that nfs_rename() has always assumed it needs to flush writes before attempting to rename. Since NFSv4 does allow the client to query whether or not the server exhibits this behaviour, and since knfsd does actually set the appropriate flag when 'subtree_check' is enabled on an export, it should be OK to optimise away the write flushing behaviour in the cases where it is clearly not needed. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton --- fs/nfs/client.c | 2 ++ fs/nfs/dir.c | 15 ++++++++++++++- include/linux/nfs_fs_sb.h | 12 +++++++++--- 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 2115c1189c2df1..6d63b958c4bb13 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1105,6 +1105,8 @@ struct nfs_server *nfs_create_server(struct fs_context *fc) if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) server->namelen = NFS2_MAXNAMLEN; } + /* Linux 'subtree_check' borkenness mandates this setting */ + server->fh_expire_type = NFS_FH_VOL_RENAME; if (!(fattr->valid & NFS_ATTR_FATTR)) { error = ctx->nfs_mod->rpc_ops->getattr(server, ctx->mntfh, diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index bd23fc736b3947..d0e0b435a84316 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2676,6 +2676,18 @@ nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data) unblock_revalidate(new_dentry); } +static bool nfs_rename_is_unsafe_cross_dir(struct dentry *old_dentry, + struct dentry *new_dentry) +{ + struct nfs_server *server = NFS_SB(old_dentry->d_sb); + + if (old_dentry->d_parent != new_dentry->d_parent) + return false; + if (server->fh_expire_type & NFS_FH_RENAME_UNSAFE) + return !(server->fh_expire_type & NFS_FH_NOEXPIRE_WITH_OPEN); + return true; +} + /* * RENAME * FIXME: Some nfsds, like the Linux user space nfsd, may generate a @@ -2763,7 +2775,8 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, } - if (S_ISREG(old_inode->i_mode)) + if (S_ISREG(old_inode->i_mode) && + nfs_rename_is_unsafe_cross_dir(old_dentry, new_dentry)) nfs_sync_inode(old_inode); task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, must_unblock ? nfs_unblock_rename : NULL); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 71319637a84e61..ee03f3cef30ca5 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -213,6 +213,15 @@ struct nfs_server { char *fscache_uniq; /* Uniquifier (or NULL) */ #endif + /* The following #defines numerically match the NFSv4 equivalents */ +#define NFS_FH_NOEXPIRE_WITH_OPEN (0x1) +#define NFS_FH_VOLATILE_ANY (0x2) +#define NFS_FH_VOL_MIGRATION (0x4) +#define NFS_FH_VOL_RENAME (0x8) +#define NFS_FH_RENAME_UNSAFE (NFS_FH_VOLATILE_ANY | NFS_FH_VOL_RENAME) + u32 fh_expire_type; /* V4 bitmask representing file + handle volatility type for + this filesystem */ u32 pnfs_blksize; /* layout_blksize attr */ #if IS_ENABLED(CONFIG_NFS_V4) u32 attr_bitmask[3];/* V4 bitmask representing the set @@ -236,9 +245,6 @@ struct nfs_server { u32 acl_bitmask; /* V4 bitmask representing the ACEs that are supported on this filesystem */ - u32 fh_expire_type; /* V4 bitmask representing file - handle volatility type for - this filesystem */ struct pnfs_layoutdriver_type *pnfs_curr_ld; /* Active layout driver */ struct rpc_wait_queue roc_rpcwaitq; void *pnfs_ld_data; /* per mount point data */ From c9e455581e2ba87ee38c126e8dc49a424b9df0cf Mon Sep 17 00:00:00 2001 From: Sagi Maimon Date: Wed, 14 May 2025 10:35:41 +0300 Subject: [PATCH 1877/2924] ptp: ocp: Limit signal/freq counts in summary output functions The debugfs summary output could access uninitialized elements in the freq_in[] and signal_out[] arrays, causing NULL pointer dereferences and triggering a kernel Oops (page_fault_oops). This patch adds u8 fields (nr_freq_in, nr_signal_out) to track the number of initialized elements, with a maximum of 4 per array. The summary output functions are updated to respect these limits, preventing out-of-bounds access and ensuring safe array handling. Widen the label variables because the change confuses GCC about max length of the strings. Fixes: ef61f5528fca ("ptp: ocp: add Adva timecard support") Signed-off-by: Sagi Maimon Reviewed-by: Simon Horman Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250514073541.35817-1-maimon.sagi@gmail.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_ocp.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 2ccdca4f69604b..e63481f24238da 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -315,6 +315,8 @@ struct ptp_ocp_serial_port { #define OCP_BOARD_ID_LEN 13 #define OCP_SERIAL_LEN 6 #define OCP_SMA_NUM 4 +#define OCP_SIGNAL_NUM 4 +#define OCP_FREQ_NUM 4 enum { PORT_GNSS, @@ -342,8 +344,8 @@ struct ptp_ocp { struct dcf_master_reg __iomem *dcf_out; struct dcf_slave_reg __iomem *dcf_in; struct tod_reg __iomem *nmea_out; - struct frequency_reg __iomem *freq_in[4]; - struct ptp_ocp_ext_src *signal_out[4]; + struct frequency_reg __iomem *freq_in[OCP_FREQ_NUM]; + struct ptp_ocp_ext_src *signal_out[OCP_SIGNAL_NUM]; struct ptp_ocp_ext_src *pps; struct ptp_ocp_ext_src *ts0; struct ptp_ocp_ext_src *ts1; @@ -378,10 +380,12 @@ struct ptp_ocp { u32 utc_tai_offset; u32 ts_window_adjust; u64 fw_cap; - struct ptp_ocp_signal signal[4]; + struct ptp_ocp_signal signal[OCP_SIGNAL_NUM]; struct ptp_ocp_sma_connector sma[OCP_SMA_NUM]; const struct ocp_sma_op *sma_op; struct dpll_device *dpll; + int signals_nr; + int freq_in_nr; }; #define OCP_REQ_TIMESTAMP BIT(0) @@ -2697,6 +2701,8 @@ ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r) bp->eeprom_map = fb_eeprom_map; bp->fw_version = ioread32(&bp->image->version); bp->sma_op = &ocp_fb_sma_op; + bp->signals_nr = 4; + bp->freq_in_nr = 4; ptp_ocp_fb_set_version(bp); @@ -2862,6 +2868,8 @@ ptp_ocp_art_board_init(struct ptp_ocp *bp, struct ocp_resource *r) bp->fw_version = ioread32(&bp->reg->version); bp->fw_tag = 2; bp->sma_op = &ocp_art_sma_op; + bp->signals_nr = 4; + bp->freq_in_nr = 4; /* Enable MAC serial port during initialisation */ iowrite32(1, &bp->board_config->mro50_serial_activate); @@ -2888,6 +2896,8 @@ ptp_ocp_adva_board_init(struct ptp_ocp *bp, struct ocp_resource *r) bp->flash_start = 0xA00000; bp->eeprom_map = fb_eeprom_map; bp->sma_op = &ocp_adva_sma_op; + bp->signals_nr = 2; + bp->freq_in_nr = 2; version = ioread32(&bp->image->version); /* if lower 16 bits are empty, this is the fw loader. */ @@ -4008,7 +4018,7 @@ _signal_summary_show(struct seq_file *s, struct ptp_ocp *bp, int nr) { struct signal_reg __iomem *reg = bp->signal_out[nr]->mem; struct ptp_ocp_signal *signal = &bp->signal[nr]; - char label[8]; + char label[16]; bool on; u32 val; @@ -4031,7 +4041,7 @@ static void _frequency_summary_show(struct seq_file *s, int nr, struct frequency_reg __iomem *reg) { - char label[8]; + char label[16]; bool on; u32 val; @@ -4175,11 +4185,11 @@ ptp_ocp_summary_show(struct seq_file *s, void *data) } if (bp->fw_cap & OCP_CAP_SIGNAL) - for (i = 0; i < 4; i++) + for (i = 0; i < bp->signals_nr; i++) _signal_summary_show(s, bp, i); if (bp->fw_cap & OCP_CAP_FREQ) - for (i = 0; i < 4; i++) + for (i = 0; i < bp->freq_in_nr; i++) _frequency_summary_show(s, i, bp->freq_in[i]); if (bp->irig_out) { From 6b1d3c5f675cc794a015138b115afff172fb4c58 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 14 May 2025 15:03:19 -0700 Subject: [PATCH 1878/2924] team: grab team lock during team_change_rx_flags Syzkaller reports the following issue: BUG: sleeping function called from invalid context at kernel/locking/mutex.c:578 netdev_lock include/linux/netdevice.h:2751 [inline] netdev_lock_ops include/net/netdev_lock.h:42 [inline] dev_set_promiscuity+0x10e/0x260 net/core/dev_api.c:285 bond_set_promiscuity drivers/net/bonding/bond_main.c:922 [inline] bond_change_rx_flags+0x219/0x690 drivers/net/bonding/bond_main.c:4732 dev_change_rx_flags net/core/dev.c:9145 [inline] __dev_set_promiscuity+0x3f5/0x590 net/core/dev.c:9189 netif_set_promiscuity+0x50/0xe0 net/core/dev.c:9201 dev_set_promiscuity+0x126/0x260 net/core/dev_api.c:286 ^^ all of the above is under rcu lock team_change_rx_flags+0x1b3/0x330 drivers/net/team/team_core.c:1785 dev_change_rx_flags net/core/dev.c:9145 [inline] __dev_set_promiscuity+0x3f5/0x590 net/core/dev.c:9189 netif_set_promiscuity+0x50/0xe0 net/core/dev.c:9201 dev_set_promiscuity+0x126/0x260 net/core/dev_api.c:286 hsr_del_port+0x25e/0x2d0 net/hsr/hsr_slave.c:233 hsr_netdev_notify+0x827/0xb60 net/hsr/hsr_main.c:104 notifier_call_chain+0x1b3/0x3e0 kernel/notifier.c:85 call_netdevice_notifiers_extack net/core/dev.c:2214 [inline] call_netdevice_notifiers net/core/dev.c:2228 [inline] unregister_netdevice_many_notify+0x15d8/0x2330 net/core/dev.c:11970 rtnl_delete_link net/core/rtnetlink.c:3522 [inline] rtnl_dellink+0x488/0x710 net/core/rtnetlink.c:3564 rtnetlink_rcv_msg+0x7cc/0xb70 net/core/rtnetlink.c:6955 netlink_rcv_skb+0x219/0x490 net/netlink/af_netlink.c:2534 netlink_unicast_kernel net/netlink/af_netlink.c:1313 [inline] netlink_unicast+0x758/0x8d0 net/netlink/af_netlink.c:1339 netlink_sendmsg+0x805/0xb30 net/netlink/af_netlink.c:1883 team_change_rx_flags runs under rcu lock which means we can't grab instance lock for the lower devices. Switch to team->lock, similar to what we already do for team_set_mac_address and team_change_mtu. Fixes: 78cd408356fe ("net: add missing instance lock to dev_set_promiscuity") Reported-by: syzbot+53485086a41dbb43270a@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=53485086a41dbb43270a Link: https://lore.kernel.org/netdev/6822cc81.050a0220.f2294.00e8.GAE@google.com Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250514220319.3505158-1-stfomichev@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/team/team_core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index d8fc0c79745d33..b75ceb90359f89 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -1778,8 +1778,8 @@ static void team_change_rx_flags(struct net_device *dev, int change) struct team_port *port; int inc; - rcu_read_lock(); - list_for_each_entry_rcu(port, &team->port_list, list) { + mutex_lock(&team->lock); + list_for_each_entry(port, &team->port_list, list) { if (change & IFF_PROMISC) { inc = dev->flags & IFF_PROMISC ? 1 : -1; dev_set_promiscuity(port->dev, inc); @@ -1789,7 +1789,7 @@ static void team_change_rx_flags(struct net_device *dev, int change) dev_set_allmulti(port->dev, inc); } } - rcu_read_unlock(); + mutex_unlock(&team->lock); } static void team_set_rx_mode(struct net_device *dev) From ba54bce747fa9e07896c1abd9b48545f7b4b31d2 Mon Sep 17 00:00:00 2001 From: Jakob Unterwurzacher Date: Thu, 15 May 2025 09:29:19 +0200 Subject: [PATCH 1879/2924] net: dsa: microchip: linearize skb for tail-tagging switches The pointer arithmentic for accessing the tail tag only works for linear skbs. For nonlinear skbs, it reads uninitialized memory inside the skb headroom, essentially randomizing the tag. I have observed it gets set to 6 most of the time. Example where ksz9477_rcv thinks that the packet from port 1 comes from port 6 (which does not exist for the ksz9896 that's in use), dropping the packet. Debug prints added by me (not included in this patch): [ 256.645337] ksz9477_rcv:323 tag0=6 [ 256.645349] skb len=47 headroom=78 headlen=0 tailroom=0 mac=(64,14) mac_len=14 net=(78,0) trans=78 shinfo(txflags=0 nr_frags=1 gso(size=0 type=0 segs=0)) csum(0x0 start=0 offset=0 ip_summed=0 complete_sw=0 valid=0 level=0) hash(0x0 sw=0 l4=0) proto=0x00f8 pkttype=1 iif=3 priority=0x0 mark=0x0 alloc_cpu=0 vlan_all=0x0 encapsulation=0 inner(proto=0x0000, mac=0, net=0, trans=0) [ 256.645377] dev name=end1 feat=0x0002e10200114bb3 [ 256.645386] skb headroom: 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 256.645395] skb headroom: 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 256.645403] skb headroom: 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 256.645411] skb headroom: 00000030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 256.645420] skb headroom: 00000040: ff ff ff ff ff ff 00 1c 19 f2 e2 db 08 06 [ 256.645428] skb frag: 00000000: 00 01 08 00 06 04 00 01 00 1c 19 f2 e2 db 0a 02 [ 256.645436] skb frag: 00000010: 00 83 00 00 00 00 00 00 0a 02 a0 2f 00 00 00 00 [ 256.645444] skb frag: 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 [ 256.645452] ksz_common_rcv:92 dsa_conduit_find_user returned NULL Call skb_linearize before trying to access the tag. This patch fixes ksz9477_rcv which is used by the ksz9896 I have at hand, and also applies the same fix to ksz8795_rcv which seems to have the same problem. Signed-off-by: Jakob Unterwurzacher CC: stable@vger.kernel.org Fixes: 016e43a26bab ("net: dsa: ksz: Add KSZ8795 tag code") Fixes: 8b8010fb7876 ("dsa: add support for Microchip KSZ tail tagging") Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20250515072920.2313014-1-jakob.unterwurzacher@cherry.de Signed-off-by: Jakub Kicinski --- net/dsa/tag_ksz.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index c33d4bf179297c..0b7564b53790da 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -140,7 +140,12 @@ static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev) static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev) { - u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; + u8 *tag; + + if (skb_linearize(skb)) + return NULL; + + tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; return ksz_common_rcv(skb, dev, tag[0] & KSZ8795_TAIL_TAG_EG_PORT_M, KSZ_EGRESS_TAG_LEN); @@ -311,10 +316,16 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb, static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev) { - /* Tag decoding */ - u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; - unsigned int port = tag[0] & KSZ9477_TAIL_TAG_EG_PORT_M; unsigned int len = KSZ_EGRESS_TAG_LEN; + unsigned int port; + u8 *tag; + + if (skb_linearize(skb)) + return NULL; + + /* Tag decoding */ + tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; + port = tag[0] & KSZ9477_TAIL_TAG_EG_PORT_M; /* Extra 4-bytes PTP timestamp */ if (tag[0] & KSZ9477_PTP_TAG_INDICATION) { From 91b6dbced0ef1d680afdd69b14fc83d50ebafaf3 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 15 May 2025 11:48:48 +0300 Subject: [PATCH 1880/2924] bridge: netfilter: Fix forwarding of fragmented packets When netfilter defrag hooks are loaded (due to the presence of conntrack rules, for example), fragmented packets entering the bridge will be defragged by the bridge's pre-routing hook (br_nf_pre_routing() -> ipv4_conntrack_defrag()). Later on, in the bridge's post-routing hook, the defragged packet will be fragmented again. If the size of the largest fragment is larger than what the kernel has determined as the destination MTU (using ip_skb_dst_mtu()), the defragged packet will be dropped. Before commit ac6627a28dbf ("net: ipv4: Consolidate ipv4_mtu and ip_dst_mtu_maybe_forward"), ip_skb_dst_mtu() would return dst_mtu() as the destination MTU. Assuming the dst entry attached to the packet is the bridge's fake rtable one, this would simply be the bridge's MTU (see fake_mtu()). However, after above mentioned commit, ip_skb_dst_mtu() ends up returning the route's MTU stored in the dst entry's metrics. Ideally, in case the dst entry is the bridge's fake rtable one, this should be the bridge's MTU as the bridge takes care of updating this metric when its MTU changes (see br_change_mtu()). Unfortunately, the last operation is a no-op given the metrics attached to the fake rtable entry are marked as read-only. Therefore, ip_skb_dst_mtu() ends up returning 1500 (the initial MTU value) and defragged packets are dropped during fragmentation when dealing with large fragments and high MTU (e.g., 9k). Fix by moving the fake rtable entry's metrics to be per-bridge (in a similar fashion to the fake rtable entry itself) and marking them as writable, thereby allowing MTU changes to be reflected. Fixes: 62fa8a846d7d ("net: Implement read-only protection and COW'ing of metrics.") Fixes: 33eb9873a283 ("bridge: initialize fake_rtable metrics") Reported-by: Venkat Venkatsubra Closes: https://lore.kernel.org/netdev/PH0PR10MB4504888284FF4CBA648197D0ACB82@PH0PR10MB4504.namprd10.prod.outlook.com/ Tested-by: Venkat Venkatsubra Signed-off-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20250515084848.727706-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/bridge/br_nf_core.c | 7 ++----- net/bridge/br_private.h | 1 + 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c index 98aea5485aaef4..a8c67035e23c00 100644 --- a/net/bridge/br_nf_core.c +++ b/net/bridge/br_nf_core.c @@ -65,17 +65,14 @@ static struct dst_ops fake_dst_ops = { * ipt_REJECT needs it. Future netfilter modules might * require us to fill additional fields. */ -static const u32 br_dst_default_metrics[RTAX_MAX] = { - [RTAX_MTU - 1] = 1500, -}; - void br_netfilter_rtable_init(struct net_bridge *br) { struct rtable *rt = &br->fake_rtable; rcuref_init(&rt->dst.__rcuref, 1); rt->dst.dev = br->dev; - dst_init_metrics(&rt->dst, br_dst_default_metrics, true); + dst_init_metrics(&rt->dst, br->metrics, false); + dst_metric_set(&rt->dst, RTAX_MTU, br->dev->mtu); rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE; rt->dst.ops = &fake_dst_ops; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index d5b3c5936a79e1..4715a8d6dc3266 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -505,6 +505,7 @@ struct net_bridge { struct rtable fake_rtable; struct rt6_info fake_rt6_info; }; + u32 metrics[RTAX_MAX]; #endif u16 group_fwd_mask; u16 group_fwd_mask_required; From 43f0999af011fba646e015f0bb08b6c3002a0170 Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Thu, 15 May 2025 19:04:56 +0000 Subject: [PATCH 1881/2924] vmxnet3: update MTU after device quiesce Currently, when device mtu is updated, vmxnet3 updates netdev mtu, quiesces the device and then reactivates it for the ESXi to know about the new mtu. So, technically the OS stack can start using the new mtu before ESXi knows about the new mtu. This can lead to issues for TSO packets which use mss as per the new mtu configured. This patch fixes this issue by moving the mtu write after device quiesce. Cc: stable@vger.kernel.org Fixes: d1a890fa37f2 ("net: VMware virtual Ethernet NIC driver: vmxnet3") Signed-off-by: Ronak Doshi Acked-by: Guolin Yang Changes v1-> v2: Moved MTU write after destroy of rx rings Link: https://patch.msgid.link/20250515190457.8597-1-ronak.doshi@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/vmxnet3/vmxnet3_drv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 3df6aabc7e339e..c676979c7ab940 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -3607,8 +3607,6 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu) struct vmxnet3_adapter *adapter = netdev_priv(netdev); int err = 0; - WRITE_ONCE(netdev->mtu, new_mtu); - /* * Reset_work may be in the middle of resetting the device, wait for its * completion. @@ -3622,6 +3620,7 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu) /* we need to re-create the rx queue based on the new mtu */ vmxnet3_rq_destroy_all(adapter); + WRITE_ONCE(netdev->mtu, new_mtu); vmxnet3_adjust_rx_ring_size(adapter); err = vmxnet3_rq_create_all(adapter); if (err) { @@ -3638,6 +3637,8 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu) "Closing it\n", err); goto out; } + } else { + WRITE_ONCE(netdev->mtu, new_mtu); } out: From d6d2b0e1538d5c381ec0ca95afaf772c096ea5dc Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 15 May 2025 08:33:06 +0200 Subject: [PATCH 1882/2924] net: airoha: Fix page recycling in airoha_qdma_rx_process() Do not recycle the page twice in airoha_qdma_rx_process routine in case of error. Just run dev_kfree_skb() if the skb has been allocated and marked for recycling. Run page_pool_put_full_page() directly if the skb has not been allocated yet. Moreover, rely on DMA address from queue entry element instead of reading it from the DMA descriptor for DMA syncing in airoha_qdma_rx_process(). Fixes: e12182ddb6e71 ("net: airoha: Enable Rx Scatter-Gather") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250515-airoha-fix-rx-process-error-condition-v2-1-657e92c894b9@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index d748dc6de92367..1e9ab65218ff14 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -614,7 +614,6 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget) struct airoha_queue_entry *e = &q->entry[q->tail]; struct airoha_qdma_desc *desc = &q->desc[q->tail]; u32 hash, reason, msg1 = le32_to_cpu(desc->msg1); - dma_addr_t dma_addr = le32_to_cpu(desc->addr); struct page *page = virt_to_head_page(e->buf); u32 desc_ctrl = le32_to_cpu(desc->ctrl); struct airoha_gdm_port *port; @@ -623,22 +622,16 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget) if (!(desc_ctrl & QDMA_DESC_DONE_MASK)) break; - if (!dma_addr) - break; - - len = FIELD_GET(QDMA_DESC_LEN_MASK, desc_ctrl); - if (!len) - break; - q->tail = (q->tail + 1) % q->ndesc; q->queued--; - dma_sync_single_for_cpu(eth->dev, dma_addr, + dma_sync_single_for_cpu(eth->dev, e->dma_addr, SKB_WITH_OVERHEAD(q->buf_size), dir); + len = FIELD_GET(QDMA_DESC_LEN_MASK, desc_ctrl); data_len = q->skb ? q->buf_size : SKB_WITH_OVERHEAD(q->buf_size); - if (data_len < len) + if (!len || data_len < len) goto free_frag; p = airoha_qdma_get_gdm_port(eth, desc); @@ -701,9 +694,12 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget) q->skb = NULL; continue; free_frag: - page_pool_put_full_page(q->page_pool, page, true); - dev_kfree_skb(q->skb); - q->skb = NULL; + if (q->skb) { + dev_kfree_skb(q->skb); + q->skb = NULL; + } else { + page_pool_put_full_page(q->page_pool, page, true); + } } airoha_qdma_fill_rx_queue(q); From c46286fdd6aa1d0e33c245bcffe9ff2428a777bd Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 15 May 2025 18:49:26 +0200 Subject: [PATCH 1883/2924] mr: consolidate the ipmr_can_free_table() checks. Guoyu Yin reported a splat in the ipmr netns cleanup path: WARNING: CPU: 2 PID: 14564 at net/ipv4/ipmr.c:440 ipmr_free_table net/ipv4/ipmr.c:440 [inline] WARNING: CPU: 2 PID: 14564 at net/ipv4/ipmr.c:440 ipmr_rules_exit+0x135/0x1c0 net/ipv4/ipmr.c:361 Modules linked in: CPU: 2 UID: 0 PID: 14564 Comm: syz.4.838 Not tainted 6.14.0 #1 Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 RIP: 0010:ipmr_free_table net/ipv4/ipmr.c:440 [inline] RIP: 0010:ipmr_rules_exit+0x135/0x1c0 net/ipv4/ipmr.c:361 Code: ff df 48 c1 ea 03 80 3c 02 00 75 7d 48 c7 83 60 05 00 00 00 00 00 00 5b 5d 41 5c 41 5d 41 5e e9 71 67 7f 00 e8 4c 2d 8a fd 90 <0f> 0b 90 eb 93 e8 41 2d 8a fd 0f b6 2d 80 54 ea 01 31 ff 89 ee e8 RSP: 0018:ffff888109547c58 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff888108c12dc0 RCX: ffffffff83e09868 RDX: ffff8881022b3300 RSI: ffffffff83e098d4 RDI: 0000000000000005 RBP: ffff888104288000 R08: 0000000000000000 R09: ffffed10211825c9 R10: 0000000000000001 R11: ffff88801816c4a0 R12: 0000000000000001 R13: ffff888108c13320 R14: ffff888108c12dc0 R15: fffffbfff0b74058 FS: 00007f84f39316c0(0000) GS:ffff88811b100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f84f3930f98 CR3: 0000000113b56000 CR4: 0000000000350ef0 Call Trace: ipmr_net_exit_batch+0x50/0x90 net/ipv4/ipmr.c:3160 ops_exit_list+0x10c/0x160 net/core/net_namespace.c:177 setup_net+0x47d/0x8e0 net/core/net_namespace.c:394 copy_net_ns+0x25d/0x410 net/core/net_namespace.c:516 create_new_namespaces+0x3f6/0xaf0 kernel/nsproxy.c:110 unshare_nsproxy_namespaces+0xc3/0x180 kernel/nsproxy.c:228 ksys_unshare+0x78d/0x9a0 kernel/fork.c:3342 __do_sys_unshare kernel/fork.c:3413 [inline] __se_sys_unshare kernel/fork.c:3411 [inline] __x64_sys_unshare+0x31/0x40 kernel/fork.c:3411 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xa6/0x1a0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f84f532cc29 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f84f3931038 EFLAGS: 00000246 ORIG_RAX: 0000000000000110 RAX: ffffffffffffffda RBX: 00007f84f5615fa0 RCX: 00007f84f532cc29 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000040000400 RBP: 00007f84f53fba18 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 00007f84f5615fa0 R15: 00007fff51c5f328 The running kernel has CONFIG_IP_MROUTE_MULTIPLE_TABLES disabled, and the sanity check for such build is still too loose. Address the issue consolidating the relevant sanity check in a single helper regardless of the kernel configuration. Also share it between the ipv4 and ipv6 code. Reported-by: Guoyu Yin Fixes: 50b94204446e ("ipmr: tune the ipmr_can_free_table() checks.") Signed-off-by: Paolo Abeni Link: https://patch.msgid.link/372dc261e1bf12742276e1b984fc5a071b7fc5a8.1747321903.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/mroute_base.h | 5 +++++ net/ipv4/ipmr.c | 12 +----------- net/ipv6/ip6mr.c | 12 +----------- 3 files changed, 7 insertions(+), 22 deletions(-) diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 58a2401e4b551b..0075f6e5c3da9d 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -262,6 +262,11 @@ struct mr_table { int mroute_reg_vif_num; }; +static inline bool mr_can_free_table(struct net *net) +{ + return !check_net(net) || !net_initialized(net); +} + #ifdef CONFIG_IP_MROUTE_COMMON void vif_device_init(struct vif_device *v, struct net_device *dev, diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a8b04d4abcaae8..85dc208f32e997 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -120,11 +120,6 @@ static void ipmr_expire_process(struct timer_list *t); lockdep_rtnl_is_held() || \ list_empty(&net->ipv4.mr_tables)) -static bool ipmr_can_free_table(struct net *net) -{ - return !check_net(net) || !net_initialized(net); -} - static struct mr_table *ipmr_mr_table_iter(struct net *net, struct mr_table *mrt) { @@ -317,11 +312,6 @@ EXPORT_SYMBOL(ipmr_rule_default); #define ipmr_for_each_table(mrt, net) \ for (mrt = net->ipv4.mrt; mrt; mrt = NULL) -static bool ipmr_can_free_table(struct net *net) -{ - return !check_net(net); -} - static struct mr_table *ipmr_mr_table_iter(struct net *net, struct mr_table *mrt) { @@ -437,7 +427,7 @@ static void ipmr_free_table(struct mr_table *mrt) { struct net *net = read_pnet(&mrt->net); - WARN_ON_ONCE(!ipmr_can_free_table(net)); + WARN_ON_ONCE(!mr_can_free_table(net)); timer_shutdown_sync(&mrt->ipmr_expire_timer); mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC | diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index b413c9c8a21c6f..3276cde5ebd704 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -108,11 +108,6 @@ static void ipmr_expire_process(struct timer_list *t); lockdep_rtnl_is_held() || \ list_empty(&net->ipv6.mr6_tables)) -static bool ip6mr_can_free_table(struct net *net) -{ - return !check_net(net) || !net_initialized(net); -} - static struct mr_table *ip6mr_mr_table_iter(struct net *net, struct mr_table *mrt) { @@ -306,11 +301,6 @@ EXPORT_SYMBOL(ip6mr_rule_default); #define ip6mr_for_each_table(mrt, net) \ for (mrt = net->ipv6.mrt6; mrt; mrt = NULL) -static bool ip6mr_can_free_table(struct net *net) -{ - return !check_net(net); -} - static struct mr_table *ip6mr_mr_table_iter(struct net *net, struct mr_table *mrt) { @@ -416,7 +406,7 @@ static void ip6mr_free_table(struct mr_table *mrt) { struct net *net = read_pnet(&mrt->net); - WARN_ON_ONCE(!ip6mr_can_free_table(net)); + WARN_ON_ONCE(!mr_can_free_table(net)); timer_shutdown_sync(&mrt->ipmr_expire_timer); mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC | From 49771a7578cdfb73ca957dc1ca9516872c8b406b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 17 May 2025 18:37:02 -0400 Subject: [PATCH 1884/2924] bcachefs: Fix bch2_btree_path_traverse_cached() when paths realloced btree_key_cache_fill() will allocate and traverse another path (for the underlying btree), so we can't hold pointers to paths across a call - we have to pass indices. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 2 +- fs/bcachefs/btree_key_cache.c | 25 +++++++++++++++++-------- fs/bcachefs/btree_key_cache.h | 3 +-- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index a873ec1baf581c..ac5f2046550da3 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1162,7 +1162,7 @@ int bch2_btree_path_traverse_one(struct btree_trans *trans, } if (path->cached) { - ret = bch2_btree_path_traverse_cached(trans, path, flags); + ret = bch2_btree_path_traverse_cached(trans, path_idx, flags); goto out; } diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 2b186584a291bb..669825f89cdd84 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -301,9 +301,11 @@ static noinline_for_stack void do_trace_key_cache_fill(struct btree_trans *trans } static noinline int btree_key_cache_fill(struct btree_trans *trans, - struct btree_path *ck_path, + btree_path_idx_t ck_path_idx, unsigned flags) { + struct btree_path *ck_path = trans->paths + ck_path_idx; + if (flags & BTREE_ITER_cached_nofill) { ck_path->l[0].b = NULL; return 0; @@ -325,6 +327,7 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans, goto err; /* Recheck after btree lookup, before allocating: */ + ck_path = trans->paths + ck_path_idx; ret = bch2_btree_key_cache_find(c, ck_path->btree_id, ck_path->pos) ? -EEXIST : 0; if (unlikely(ret)) goto out; @@ -344,10 +347,11 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans, } static inline int btree_path_traverse_cached_fast(struct btree_trans *trans, - struct btree_path *path) + btree_path_idx_t path_idx) { struct bch_fs *c = trans->c; struct bkey_cached *ck; + struct btree_path *path = trans->paths + path_idx; retry: ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos); if (!ck) @@ -373,27 +377,32 @@ static inline int btree_path_traverse_cached_fast(struct btree_trans *trans, return 0; } -int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path *path, +int bch2_btree_path_traverse_cached(struct btree_trans *trans, + btree_path_idx_t path_idx, unsigned flags) { - EBUG_ON(path->level); - - path->l[1].b = NULL; + EBUG_ON(trans->paths[path_idx].level); int ret; do { - ret = btree_path_traverse_cached_fast(trans, path); + ret = btree_path_traverse_cached_fast(trans, path_idx); if (unlikely(ret == -ENOENT)) - ret = btree_key_cache_fill(trans, path, flags); + ret = btree_key_cache_fill(trans, path_idx, flags); } while (ret == -EEXIST); + struct btree_path *path = trans->paths + path_idx; + if (unlikely(ret)) { path->uptodate = BTREE_ITER_NEED_TRAVERSE; if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) { btree_node_unlock(trans, path, 0); path->l[0].b = ERR_PTR(ret); } + } else { + BUG_ON(path->uptodate); + BUG_ON(!path->nodes_locked); } + return ret; } diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h index 51d6289b8dee32..82d8c72512a93a 100644 --- a/fs/bcachefs/btree_key_cache.h +++ b/fs/bcachefs/btree_key_cache.h @@ -40,8 +40,7 @@ int bch2_btree_key_cache_journal_flush(struct journal *, struct bkey_cached * bch2_btree_key_cache_find(struct bch_fs *, enum btree_id, struct bpos); -int bch2_btree_path_traverse_cached(struct btree_trans *, struct btree_path *, - unsigned); +int bch2_btree_path_traverse_cached(struct btree_trans *, btree_path_idx_t, unsigned); bool bch2_btree_insert_key_cached(struct btree_trans *, unsigned, struct btree_insert_entry *); From a5806cd506af5a7c19bcd596e4708b5c464bfd21 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 18 May 2025 13:57:29 -0700 Subject: [PATCH 1885/2924] Linux 6.15-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0eb9e6c49b32aa..a9edd030365372 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Baby Opossum Posse # *DOCUMENTATION* From a7a8fe56e932a36f43e031b398aef92341bf5ea0 Mon Sep 17 00:00:00 2001 From: Wang Zhaolong Date: Fri, 16 May 2025 17:12:55 +0800 Subject: [PATCH 1886/2924] smb: client: Fix use-after-free in cifs_fill_dirent There is a race condition in the readdir concurrency process, which may access the rsp buffer after it has been released, triggering the following KASAN warning. ================================================================== BUG: KASAN: slab-use-after-free in cifs_fill_dirent+0xb03/0xb60 [cifs] Read of size 4 at addr ffff8880099b819c by task a.out/342975 CPU: 2 UID: 0 PID: 342975 Comm: a.out Not tainted 6.15.0-rc6+ #240 PREEMPT(full) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.1-2.fc37 04/01/2014 Call Trace: dump_stack_lvl+0x53/0x70 print_report+0xce/0x640 kasan_report+0xb8/0xf0 cifs_fill_dirent+0xb03/0xb60 [cifs] cifs_readdir+0x12cb/0x3190 [cifs] iterate_dir+0x1a1/0x520 __x64_sys_getdents+0x134/0x220 do_syscall_64+0x4b/0x110 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7f996f64b9f9 Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0d f7 c3 0c 00 f7 d8 64 89 8 RSP: 002b:00007f996f53de78 EFLAGS: 00000207 ORIG_RAX: 000000000000004e RAX: ffffffffffffffda RBX: 00007f996f53ecdc RCX: 00007f996f64b9f9 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000003 RBP: 00007f996f53dea0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000207 R12: ffffffffffffff88 R13: 0000000000000000 R14: 00007ffc8cd9a500 R15: 00007f996f51e000 Allocated by task 408: kasan_save_stack+0x20/0x40 kasan_save_track+0x14/0x30 __kasan_slab_alloc+0x6e/0x70 kmem_cache_alloc_noprof+0x117/0x3d0 mempool_alloc_noprof+0xf2/0x2c0 cifs_buf_get+0x36/0x80 [cifs] allocate_buffers+0x1d2/0x330 [cifs] cifs_demultiplex_thread+0x22b/0x2690 [cifs] kthread+0x394/0x720 ret_from_fork+0x34/0x70 ret_from_fork_asm+0x1a/0x30 Freed by task 342979: kasan_save_stack+0x20/0x40 kasan_save_track+0x14/0x30 kasan_save_free_info+0x3b/0x60 __kasan_slab_free+0x37/0x50 kmem_cache_free+0x2b8/0x500 cifs_buf_release+0x3c/0x70 [cifs] cifs_readdir+0x1c97/0x3190 [cifs] iterate_dir+0x1a1/0x520 __x64_sys_getdents64+0x134/0x220 do_syscall_64+0x4b/0x110 entry_SYSCALL_64_after_hwframe+0x76/0x7e The buggy address belongs to the object at ffff8880099b8000 which belongs to the cache cifs_request of size 16588 The buggy address is located 412 bytes inside of freed 16588-byte region [ffff8880099b8000, ffff8880099bc0cc) The buggy address belongs to the physical page: page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x99b8 head: order:3 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0 anon flags: 0x80000000000040(head|node=0|zone=1) page_type: f5(slab) raw: 0080000000000040 ffff888001e03400 0000000000000000 dead000000000001 raw: 0000000000000000 0000000000010001 00000000f5000000 0000000000000000 head: 0080000000000040 ffff888001e03400 0000000000000000 dead000000000001 head: 0000000000000000 0000000000010001 00000000f5000000 0000000000000000 head: 0080000000000003 ffffea0000266e01 00000000ffffffff 00000000ffffffff head: ffffffffffffffff 0000000000000000 00000000ffffffff 0000000000000008 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8880099b8080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8880099b8100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff8880099b8180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8880099b8200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8880099b8280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== POC is available in the link [1]. The problem triggering process is as follows: Process 1 Process 2 ----------------------------------------------------------------- cifs_readdir /* file->private_data == NULL */ initiate_cifs_search cifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); smb2_query_dir_first ->query_dir_first() SMB2_query_directory SMB2_query_directory_init cifs_send_recv smb2_parse_query_directory srch_inf->ntwrk_buf_start = (char *)rsp; srch_inf->srch_entries_start = (char *)rsp + ... srch_inf->last_entry = (char *)rsp + ... srch_inf->smallBuf = true; find_cifs_entry /* if (cfile->srch_inf.ntwrk_buf_start) */ cifs_small_buf_release(cfile->srch_inf // free cifs_readdir ->iterate_shared() /* file->private_data != NULL */ find_cifs_entry /* in while (...) loop */ smb2_query_dir_next ->query_dir_next() SMB2_query_directory SMB2_query_directory_init cifs_send_recv compound_send_recv smb_send_rqst __smb_send_rqst rc = -ERESTARTSYS; /* if (fatal_signal_pending()) */ goto out; return rc /* if (cfile->srch_inf.last_entry) */ cifs_save_resume_key() cifs_fill_dirent // UAF /* if (rc) */ return -ENOENT; Fix this by ensuring the return code is checked before using pointers from the srch_inf. Link: https://bugzilla.kernel.org/show_bug.cgi?id=220131 [1] Fixes: a364bc0b37f1 ("[CIFS] fix saving of resume key before CIFSFindNext") Cc: stable@vger.kernel.org Reviewed-by: Paulo Alcantara (Red Hat) Signed-off-by: Wang Zhaolong Signed-off-by: Steve French --- fs/smb/client/readdir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index 50f96259d9adc2..67d7dd64b5e2e2 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -756,11 +756,11 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos, rc = server->ops->query_dir_next(xid, tcon, &cfile->fid, search_flags, &cfile->srch_inf); + if (rc) + return -ENOENT; /* FindFirst/Next set last_entry to NULL on malformed reply */ if (cfile->srch_inf.last_entry) cifs_save_resume_key(cfile->srch_inf.last_entry, cfile); - if (rc) - return -ENOENT; } if (index_to_find < cfile->srch_inf.index_of_last_entry) { /* we found the buffer that contains the entry */ From 494d458cfa70d8c107de5a6b6fc941f31c886b12 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 18 May 2025 22:32:30 -0400 Subject: [PATCH 1887/2924] bcachefs: fix extent_has_stripe_ptr() This wasn't checking indirect extents. Fixes: https://github.com/koverstreet/bcachefs/issues/887 Signed-off-by: Kent Overstreet --- fs/bcachefs/ec.c | 20 +++++++------------- fs/bcachefs/extents.h | 7 ------- 2 files changed, 7 insertions(+), 20 deletions(-) diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index fff58b78327c1a..c6cb26981923dd 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -507,20 +507,14 @@ static const struct bch_extent_ptr *bkey_matches_stripe(struct bch_stripe *s, static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx) { - switch (k.k->type) { - case KEY_TYPE_extent: { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - const union bch_extent_entry *entry; - - extent_for_each_entry(e, entry) - if (extent_entry_type(entry) == - BCH_EXTENT_ENTRY_stripe_ptr && - entry->stripe_ptr.idx == idx) - return true; + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; - break; - } - } + bkey_extent_entry_for_each(ptrs, entry) + if (extent_entry_type(entry) == + BCH_EXTENT_ENTRY_stripe_ptr && + entry->stripe_ptr.idx == idx) + return true; return false; } diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index e78a39e7e18f9a..9fe153183b36ec 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -380,13 +380,6 @@ out: \ /* Iterate over pointers in KEY_TYPE_extent: */ -#define extent_for_each_entry_from(_e, _entry, _start) \ - __bkey_extent_entry_for_each_from(_start, \ - extent_entry_last(_e), _entry) - -#define extent_for_each_entry(_e, _entry) \ - extent_for_each_entry_from(_e, _entry, (_e).v->start) - #define extent_ptr_next(_e, _ptr) \ __bkey_ptr_next(_ptr, extent_entry_last(_e)) From d6ebcde6d4ecf34f8495fb30516645db3aea8993 Mon Sep 17 00:00:00 2001 From: Dominik Grzegorzek Date: Sun, 18 May 2025 19:45:31 +0200 Subject: [PATCH 1888/2924] padata: do not leak refcount in reorder_work A recent patch that addressed a UAF introduced a reference count leak: the parallel_data refcount is incremented unconditionally, regardless of the return value of queue_work(). If the work item is already queued, the incremented refcount is never decremented. Fix this by checking the return value of queue_work() and decrementing the refcount when necessary. Resolves: Unreferenced object 0xffff9d9f421e3d80 (size 192): comm "cryptomgr_probe", pid 157, jiffies 4294694003 hex dump (first 32 bytes): 80 8b cf 41 9f 9d ff ff b8 97 e0 89 ff ff ff ff ...A............ d0 97 e0 89 ff ff ff ff 19 00 00 00 1f 88 23 00 ..............#. backtrace (crc 838fb36): __kmalloc_cache_noprof+0x284/0x320 padata_alloc_pd+0x20/0x1e0 padata_alloc_shell+0x3b/0xa0 0xffffffffc040a54d cryptomgr_probe+0x43/0xc0 kthread+0xf6/0x1f0 ret_from_fork+0x2f/0x50 ret_from_fork_asm+0x1a/0x30 Fixes: dd7d37ccf6b1 ("padata: avoid UAF for reorder_work") Cc: Signed-off-by: Dominik Grzegorzek Signed-off-by: Herbert Xu --- kernel/padata.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/padata.c b/kernel/padata.c index b3d4eacc4f5d8c..7eee94166357a0 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -358,7 +358,8 @@ static void padata_reorder(struct parallel_data *pd) * To avoid UAF issue, add pd ref here, and put pd ref after reorder_work finish. */ padata_get_pd(pd); - queue_work(pinst->serial_wq, &pd->reorder_work); + if (!queue_work(pinst->serial_wq, &pd->reorder_work)) + padata_put_pd(pd); } } From b2df03ed4052e97126267e8c13ad4204ea6ba9b6 Mon Sep 17 00:00:00 2001 From: Ivan Pravdin Date: Sun, 18 May 2025 18:41:02 -0400 Subject: [PATCH 1889/2924] crypto: algif_hash - fix double free in hash_accept If accept(2) is called on socket type algif_hash with MSG_MORE flag set and crypto_ahash_import fails, sk2 is freed. However, it is also freed in af_alg_release, leading to slab-use-after-free error. Fixes: fe869cdb89c9 ("crypto: algif_hash - User-space interface for hash operations") Cc: Signed-off-by: Ivan Pravdin Signed-off-by: Herbert Xu --- crypto/algif_hash.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 5498a87249d3e7..e3f1a4852737b0 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -265,10 +265,6 @@ static int hash_accept(struct socket *sock, struct socket *newsock, goto out_free_state; err = crypto_ahash_import(&ctx2->req, state); - if (err) { - sock_orphan(sk2); - sock_put(sk2); - } out_free_state: kfree_sensitive(state); From 6692dbc15e5ed40a3aa037aced65d7b8826c58cd Mon Sep 17 00:00:00 2001 From: "feijuan.li" Date: Wed, 14 May 2025 14:35:11 +0800 Subject: [PATCH 1890/2924] drm/edid: fixed the bug that hdr metadata was not reset When DP connected to a device with HDR capability, the hdr structure was filled.Then connected to another sink device without hdr capability, but the hdr info still exist. Fixes: e85959d6cbe0 ("drm: Parse HDR metadata info from EDID") Cc: # v5.3+ Signed-off-by: "feijuan.li" Reviewed-by: Jani Nikula Link: https://lore.kernel.org/r/20250514063511.4151780-1-feijuan.li@samsung.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/drm_edid.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 13bc4c290b17d5..9edb3247c767b8 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -6596,6 +6596,7 @@ static void drm_reset_display_info(struct drm_connector *connector) info->has_hdmi_infoframe = false; info->rgb_quant_range_selectable = false; memset(&info->hdmi, 0, sizeof(info->hdmi)); + memset(&connector->hdr_sink_metadata, 0, sizeof(connector->hdr_sink_metadata)); info->edid_hdmi_rgb444_dc_modes = 0; info->edid_hdmi_ycbcr444_dc_modes = 0; From 239af1970bcb039a1551d2c438d113df0010c149 Mon Sep 17 00:00:00 2001 From: Ilia Gavrilov Date: Thu, 15 May 2025 12:20:15 +0000 Subject: [PATCH 1891/2924] llc: fix data loss when reading from a socket in llc_ui_recvmsg() For SOCK_STREAM sockets, if user buffer size (len) is less than skb size (skb->len), the remaining data from skb will be lost after calling kfree_skb(). To fix this, move the statement for partial reading above skb deletion. Found by InfoTeCS on behalf of Linux Verification Center (linuxtesting.org) Fixes: 30a584d944fb ("[LLX]: SOCK_DGRAM interface fixes") Cc: stable@vger.kernel.org Signed-off-by: Ilia Gavrilov Signed-off-by: David S. Miller --- net/llc/af_llc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 0259cde394ba09..cc77ec5769d828 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -887,15 +887,15 @@ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (sk->sk_type != SOCK_STREAM) goto copy_uaddr; + /* Partial read */ + if (used + offset < skb_len) + continue; + if (!(flags & MSG_PEEK)) { skb_unlink(skb, &sk->sk_receive_queue); kfree_skb(skb); *seq = 0; } - - /* Partial read */ - if (used + offset < skb_len) - continue; } while (len > 0); out: From 69c6d83d717317eab42835bc9fd54173c8de31ac Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 7 May 2025 10:42:00 -0500 Subject: [PATCH 1892/2924] dt-bindings: can: microchip,mcp2510: Fix $id path The "$id" value must match the relative path under bindings/ and is missing the "net" sub-directory. Fixes: 09328600c2f9 ("dt-bindings: can: convert microchip,mcp251x.txt to yaml") Signed-off-by: "Rob Herring (Arm)" Acked-by: Conor Dooley Link: https://patch.msgid.link/20250507154201.1589542-1-robh@kernel.org Signed-off-by: Marc Kleine-Budde --- .../devicetree/bindings/net/can/microchip,mcp2510.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/can/microchip,mcp2510.yaml b/Documentation/devicetree/bindings/net/can/microchip,mcp2510.yaml index e0ec53bc10c6f9..1525a50ded476f 100644 --- a/Documentation/devicetree/bindings/net/can/microchip,mcp2510.yaml +++ b/Documentation/devicetree/bindings/net/can/microchip,mcp2510.yaml @@ -1,7 +1,7 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) %YAML 1.2 --- -$id: http://devicetree.org/schemas/can/microchip,mcp2510.yaml# +$id: http://devicetree.org/schemas/net/can/microchip,mcp2510.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# title: Microchip MCP251X stand-alone CAN controller From 71c9475b1e2cc4d31370b1b7a328bdfeea5d53b4 Mon Sep 17 00:00:00 2001 From: Judith Mendez Date: Fri, 16 May 2025 15:31:21 -0500 Subject: [PATCH 1893/2924] mmc: sdhci_am654: Add SDHCI_QUIRK2_SUPPRESS_V1P8_ENA quirk to am62 compatible Add a new struct for platform data for the ti,am62-sdhci compatible to apply additional quirks, namely "SDHCI_QUIRK2_SUPPRESS_V1P8_ENA", to host controllers with am62 compatible. Note, the fix was originally introduced by commit 941a7abd4666 ("mmc: sdhci_am654: Add sdhci_am654_start_signal_voltage_switch") but was found to be applied too broadly and had to be reverted. This fixes MMC init failures seen across am62x boards. Fixes: ac5a41b472b4 ("Revert "mmc: sdhci_am654: Add sdhci_am654_start_signal_voltage_switch"") Fixes: 941a7abd4666 ("mmc: sdhci_am654: Add sdhci_am654_start_signal_voltage_switch") Cc: stable@vger.kernel.org Suggested-by: Nishanth Menon Signed-off-by: Judith Mendez Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20250516203121.3736379-1-jm@ti.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci_am654.c | 35 +++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c index f75c31815ab00d..73385ff4c0f30b 100644 --- a/drivers/mmc/host/sdhci_am654.c +++ b/drivers/mmc/host/sdhci_am654.c @@ -155,6 +155,7 @@ struct sdhci_am654_data { u32 tuning_loop; #define SDHCI_AM654_QUIRK_FORCE_CDTEST BIT(0) +#define SDHCI_AM654_QUIRK_SUPPRESS_V1P8_ENA BIT(1) }; struct window { @@ -166,6 +167,7 @@ struct window { struct sdhci_am654_driver_data { const struct sdhci_pltfm_data *pdata; u32 flags; + u32 quirks; #define IOMUX_PRESENT (1 << 0) #define FREQSEL_2_BIT (1 << 1) #define STRBSEL_4_BIT (1 << 2) @@ -356,6 +358,29 @@ static void sdhci_j721e_4bit_set_clock(struct sdhci_host *host, sdhci_set_clock(host, clock); } +static int sdhci_am654_start_signal_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios) +{ + struct sdhci_host *host = mmc_priv(mmc); + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_am654_data *sdhci_am654 = sdhci_pltfm_priv(pltfm_host); + int ret; + + if ((sdhci_am654->quirks & SDHCI_AM654_QUIRK_SUPPRESS_V1P8_ENA) && + ios->signal_voltage == MMC_SIGNAL_VOLTAGE_180) { + if (!IS_ERR(mmc->supply.vqmmc)) { + ret = mmc_regulator_set_vqmmc(mmc, ios); + if (ret < 0) { + pr_err("%s: Switching to 1.8V signalling voltage failed,\n", + mmc_hostname(mmc)); + return -EIO; + } + } + return 0; + } + + return sdhci_start_signal_voltage_switch(mmc, ios); +} + static u8 sdhci_am654_write_power_on(struct sdhci_host *host, u8 val, int reg) { writeb(val, host->ioaddr + reg); @@ -650,6 +675,12 @@ static const struct sdhci_am654_driver_data sdhci_j721e_4bit_drvdata = { .flags = IOMUX_PRESENT, }; +static const struct sdhci_am654_driver_data sdhci_am62_4bit_drvdata = { + .pdata = &sdhci_j721e_4bit_pdata, + .flags = IOMUX_PRESENT, + .quirks = SDHCI_AM654_QUIRK_SUPPRESS_V1P8_ENA, +}; + static const struct soc_device_attribute sdhci_am654_devices[] = { { .family = "AM65X", .revision = "SR1.0", @@ -872,7 +903,7 @@ static const struct of_device_id sdhci_am654_of_match[] = { }, { .compatible = "ti,am62-sdhci", - .data = &sdhci_j721e_4bit_drvdata, + .data = &sdhci_am62_4bit_drvdata, }, { /* sentinel */ } }; @@ -906,6 +937,7 @@ static int sdhci_am654_probe(struct platform_device *pdev) pltfm_host = sdhci_priv(host); sdhci_am654 = sdhci_pltfm_priv(pltfm_host); sdhci_am654->flags = drvdata->flags; + sdhci_am654->quirks = drvdata->quirks; clk_xin = devm_clk_get(dev, "clk_xin"); if (IS_ERR(clk_xin)) { @@ -940,6 +972,7 @@ static int sdhci_am654_probe(struct platform_device *pdev) goto err_pltfm_free; } + host->mmc_host_ops.start_signal_voltage_switch = sdhci_am654_start_signal_voltage_switch; host->mmc_host_ops.execute_tuning = sdhci_am654_execute_tuning; pm_runtime_get_noresume(dev); From cbed8287e5789558cf947a7b4d47fdf3e25c29bf Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 14 May 2025 21:32:40 -0400 Subject: [PATCH 1894/2924] bcachefs: mkwrite() now only dirties one page Don't dirty the whole folio - fixes write amplification with applications doing mmaped writes. https://www.reddit.com/r/bcachefs/comments/1klzcg1/incredible_amounts_of_write_amplification_when/ Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io-pagecache.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/fs/bcachefs/fs-io-pagecache.c b/fs/bcachefs/fs-io-pagecache.c index e072900e6a5b9e..fbae9c1de746a1 100644 --- a/fs/bcachefs/fs-io-pagecache.c +++ b/fs/bcachefs/fs-io-pagecache.c @@ -605,10 +605,14 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf) struct address_space *mapping = file->f_mapping; struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch2_folio_reservation res; - unsigned len; - loff_t isize; vm_fault_t ret; + loff_t file_offset = round_down(vmf->pgoff << PAGE_SHIFT, block_bytes(c)); + unsigned offset = file_offset - folio_pos(folio); + unsigned len = max(PAGE_SIZE, block_bytes(c)); + + BUG_ON(offset + len > folio_size(folio)); + bch2_folio_reservation_init(c, inode, &res); sb_start_pagefault(inode->v.i_sb); @@ -623,24 +627,24 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf) bch2_pagecache_add_get(inode); folio_lock(folio); - isize = i_size_read(&inode->v); + u64 isize = i_size_read(&inode->v); - if (folio->mapping != mapping || folio_pos(folio) >= isize) { + if (folio->mapping != mapping || file_offset >= isize) { folio_unlock(folio); ret = VM_FAULT_NOPAGE; goto out; } - len = min_t(loff_t, folio_size(folio), isize - folio_pos(folio)); + len = min_t(unsigned, len, isize - file_offset); if (bch2_folio_set(c, inode_inum(inode), &folio, 1) ?: - bch2_folio_reservation_get(c, inode, folio, &res, 0, len)) { + bch2_folio_reservation_get(c, inode, folio, &res, offset, len)) { folio_unlock(folio); ret = VM_FAULT_SIGBUS; goto out; } - bch2_set_folio_dirty(c, inode, folio, &res, 0, len); + bch2_set_folio_dirty(c, inode, folio, &res, offset, len); bch2_folio_reservation_put(c, inode, &res); folio_wait_stable(folio); From c2aba69d0c36a496ab4f2e81e9c2b271f2693fd7 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 19 May 2025 14:50:26 +0200 Subject: [PATCH 1895/2924] can: bcm: add locking for bcm_op runtime updates The CAN broadcast manager (CAN BCM) can send a sequence of CAN frames via hrtimer. The content and also the length of the sequence can be changed resp reduced at runtime where the 'currframe' counter is then set to zero. Although this appeared to be a safe operation the updates of 'currframe' can be triggered from user space and hrtimer context in bcm_can_tx(). Anderson Nascimento created a proof of concept that triggered a KASAN slab-out-of-bounds read access which can be prevented with a spin_lock_bh. At the rework of bcm_can_tx() the 'count' variable has been moved into the protected section as this variable can be modified from both contexts too. Fixes: ffd980f976e7 ("[CAN]: Add broadcast manager (bcm) protocol") Reported-by: Anderson Nascimento Tested-by: Anderson Nascimento Reviewed-by: Marc Kleine-Budde Signed-off-by: Oliver Hartkopp Link: https://patch.msgid.link/20250519125027.11900-1-socketcan@hartkopp.net Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde --- net/can/bcm.c | 66 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 21 deletions(-) diff --git a/net/can/bcm.c b/net/can/bcm.c index 0bca1b9b3f7072..871707dab7db61 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include @@ -122,6 +123,7 @@ struct bcm_op { struct canfd_frame last_sframe; struct sock *sk; struct net_device *rx_reg_dev; + spinlock_t bcm_tx_lock; /* protect currframe/count in runtime updates */ }; struct bcm_sock { @@ -285,13 +287,18 @@ static void bcm_can_tx(struct bcm_op *op) { struct sk_buff *skb; struct net_device *dev; - struct canfd_frame *cf = op->frames + op->cfsiz * op->currframe; + struct canfd_frame *cf; int err; /* no target device? => exit */ if (!op->ifindex) return; + /* read currframe under lock protection */ + spin_lock_bh(&op->bcm_tx_lock); + cf = op->frames + op->cfsiz * op->currframe; + spin_unlock_bh(&op->bcm_tx_lock); + dev = dev_get_by_index(sock_net(op->sk), op->ifindex); if (!dev) { /* RFC: should this bcm_op remove itself here? */ @@ -312,6 +319,10 @@ static void bcm_can_tx(struct bcm_op *op) skb->dev = dev; can_skb_set_owner(skb, op->sk); err = can_send(skb, 1); + + /* update currframe and count under lock protection */ + spin_lock_bh(&op->bcm_tx_lock); + if (!err) op->frames_abs++; @@ -320,6 +331,11 @@ static void bcm_can_tx(struct bcm_op *op) /* reached last frame? */ if (op->currframe >= op->nframes) op->currframe = 0; + + if (op->count > 0) + op->count--; + + spin_unlock_bh(&op->bcm_tx_lock); out: dev_put(dev); } @@ -430,7 +446,7 @@ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) struct bcm_msg_head msg_head; if (op->kt_ival1 && (op->count > 0)) { - op->count--; + bcm_can_tx(op); if (!op->count && (op->flags & TX_COUNTEVT)) { /* create notification to user */ @@ -445,7 +461,6 @@ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) bcm_send_to_user(op, &msg_head, NULL, 0); } - bcm_can_tx(op); } else if (op->kt_ival2) { bcm_can_tx(op); @@ -956,6 +971,27 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, } op->flags = msg_head->flags; + /* only lock for unlikely count/nframes/currframe changes */ + if (op->nframes != msg_head->nframes || + op->flags & TX_RESET_MULTI_IDX || + op->flags & SETTIMER) { + + spin_lock_bh(&op->bcm_tx_lock); + + if (op->nframes != msg_head->nframes || + op->flags & TX_RESET_MULTI_IDX) { + /* potentially update changed nframes */ + op->nframes = msg_head->nframes; + /* restart multiple frame transmission */ + op->currframe = 0; + } + + if (op->flags & SETTIMER) + op->count = msg_head->count; + + spin_unlock_bh(&op->bcm_tx_lock); + } + } else { /* insert new BCM operation for the given can_id */ @@ -963,9 +999,14 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, if (!op) return -ENOMEM; + spin_lock_init(&op->bcm_tx_lock); op->can_id = msg_head->can_id; op->cfsiz = CFSIZ(msg_head->flags); op->flags = msg_head->flags; + op->nframes = msg_head->nframes; + + if (op->flags & SETTIMER) + op->count = msg_head->count; /* create array for CAN frames and copy the data */ if (msg_head->nframes > 1) { @@ -1023,22 +1064,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, } /* if ((op = bcm_find_op(&bo->tx_ops, msg_head->can_id, ifindex))) */ - if (op->nframes != msg_head->nframes) { - op->nframes = msg_head->nframes; - /* start multiple frame transmission with index 0 */ - op->currframe = 0; - } - - /* check flags */ - - if (op->flags & TX_RESET_MULTI_IDX) { - /* start multiple frame transmission with index 0 */ - op->currframe = 0; - } - if (op->flags & SETTIMER) { /* set timer values */ - op->count = msg_head->count; op->ival1 = msg_head->ival1; op->ival2 = msg_head->ival2; op->kt_ival1 = bcm_timeval_to_ktime(msg_head->ival1); @@ -1055,11 +1082,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, op->flags |= TX_ANNOUNCE; } - if (op->flags & TX_ANNOUNCE) { + if (op->flags & TX_ANNOUNCE) bcm_can_tx(op); - if (op->count) - op->count--; - } if (op->flags & STARTTIMER) bcm_tx_start_timer(op); From dac5e6249159ac255dad9781793dbe5908ac9ddb Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 19 May 2025 14:50:27 +0200 Subject: [PATCH 1896/2924] can: bcm: add missing rcu read protection for procfs content When the procfs content is generated for a bcm_op which is in the process to be removed the procfs output might show unreliable data (UAF). As the removal of bcm_op's is already implemented with rcu handling this patch adds the missing rcu_read_lock() and makes sure the list entries are properly removed under rcu protection. Fixes: f1b4e32aca08 ("can: bcm: use call_rcu() instead of costly synchronize_rcu()") Reported-by: Anderson Nascimento Suggested-by: Anderson Nascimento Tested-by: Anderson Nascimento Signed-off-by: Oliver Hartkopp Link: https://patch.msgid.link/20250519125027.11900-2-socketcan@hartkopp.net Cc: stable@vger.kernel.org # >= 5.4 Signed-off-by: Marc Kleine-Budde --- net/can/bcm.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/can/bcm.c b/net/can/bcm.c index 871707dab7db61..6bc1cc4c94c5ef 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -219,7 +219,9 @@ static int bcm_proc_show(struct seq_file *m, void *v) seq_printf(m, " / bound %s", bcm_proc_getifname(net, ifname, bo->ifindex)); seq_printf(m, " <<<\n"); - list_for_each_entry(op, &bo->rx_ops, list) { + rcu_read_lock(); + + list_for_each_entry_rcu(op, &bo->rx_ops, list) { unsigned long reduction; @@ -275,6 +277,9 @@ static int bcm_proc_show(struct seq_file *m, void *v) seq_printf(m, "# sent %ld\n", op->frames_abs); } seq_putc(m, '\n'); + + rcu_read_unlock(); + return 0; } #endif /* CONFIG_PROC_FS */ @@ -858,7 +863,7 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh, REGMASK(op->can_id), bcm_rx_handler, op); - list_del(&op->list); + list_del_rcu(&op->list); bcm_remove_op(op); return 1; /* done */ } @@ -878,7 +883,7 @@ static int bcm_delete_tx_op(struct list_head *ops, struct bcm_msg_head *mh, list_for_each_entry_safe(op, n, ops, list) { if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) && (op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) { - list_del(&op->list); + list_del_rcu(&op->list); bcm_remove_op(op); return 1; /* done */ } @@ -1296,7 +1301,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, bcm_rx_handler, op, "bcm", sk); if (err) { /* this bcm rx op is broken -> remove it */ - list_del(&op->list); + list_del_rcu(&op->list); bcm_remove_op(op); return err; } From bbd95160a03dbfcd01a541f25c27ddb730dfbbd5 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 10 Apr 2025 11:13:52 -0700 Subject: [PATCH 1897/2924] ice: fix vf->num_mac count with port representors The ice_vc_repr_add_mac() function indicates that it does not store the MAC address filters in the firmware. However, it still increments vf->num_mac. This is incorrect, as vf->num_mac should represent the number of MAC filters currently programmed to firmware. Indeed, we only perform this increment if the requested filter is a unicast address that doesn't match the existing vf->hw_lan_addr. In addition, ice_vc_repr_del_mac() does not decrement the vf->num_mac counter. This results in the counter becoming out of sync with the actual count. As it turns out, vf->num_mac is currently only used in legacy made without port representors. The single place where the value is checked is for enforcing a filter limit on untrusted VFs. Upcoming patches to support VF Live Migration will use this value when determining the size of the TLV for MAC address filters. Fix the representor mode function to stop incrementing the counter incorrectly. Fixes: ac19e03ef780 ("ice: allow process VF opcodes in different ways") Signed-off-by: Jacob Keller Reviewed-by: Michal Swiatkowski Reviewed-by: Simon Horman Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_virtchnl.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index 7c3006eb68dd07..6446d0fcc05286 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -4275,7 +4275,6 @@ static int ice_vc_repr_add_mac(struct ice_vf *vf, u8 *msg) } ice_vfhw_mac_add(vf, &al->list[i]); - vf->num_mac++; break; } From 6c778f1b839b63525b30046c9d1899424a62be0a Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Mon, 28 Apr 2025 15:33:39 -0400 Subject: [PATCH 1898/2924] ice: Fix LACP bonds without SRIOV environment If an aggregate has the following conditions: - The SRIOV LAG DDP package has been enabled - The bond is in 802.3ad LACP mode - The bond is disqualified from supporting SRIOV VF LAG - Both interfaces were added simultaneously to the bond (same command) Then there is a chance that the two interfaces will be assigned different LACP Aggregator ID's. This will cause a failure of the LACP control over the bond. To fix this, we can detect if the primary interface for the bond (as defined by the driver) is not in switchdev mode, and exit the setup flow if so. Reproduction steps: %> ip link add bond0 type bond mode 802.3ad miimon 100 %> ip link set bond0 up %> ifenslave bond0 eth0 eth1 %> cat /proc/net/bonding/bond0 | grep Agg Check for Aggregator IDs that differ. Fixes: ec5a6c5f79ed ("ice: process events created by lag netdev event handler") Reviewed-by: Aleksandr Loktionov Signed-off-by: Dave Ertman Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lag.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index 22371011c24928..2410aee59fb2d5 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -1321,12 +1321,18 @@ static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr) */ if (!primary_lag) { lag->primary = true; + if (!ice_is_switchdev_running(lag->pf)) + return; + /* Configure primary's SWID to be shared */ ice_lag_primary_swid(lag, true); primary_lag = lag; } else { u16 swid; + if (!ice_is_switchdev_running(primary_lag->pf)) + return; + swid = primary_lag->pf->hw.port_info->sw_id; ice_lag_set_swid(swid, lag, true); ice_lag_add_prune_list(primary_lag, lag->pf); From 2dabe349f7882ff1407a784d54d8541909329088 Mon Sep 17 00:00:00 2001 From: Pavan Kumar Linga Date: Fri, 11 Apr 2025 09:00:35 -0700 Subject: [PATCH 1899/2924] idpf: fix null-ptr-deref in idpf_features_check idpf_features_check is used to validate the TX packet. skb header length is compared with the hardware supported value received from the device control plane. The value is stored in the adapter structure and to access it, vport pointer is used. During reset all the vports are released and the vport pointer that the netdev private structure points to is NULL. To avoid null-ptr-deref, store the max header length value in netdev private structure. This also helps to cache the value and avoid accessing adapter pointer in hot path. BUG: kernel NULL pointer dereference, address: 0000000000000068 ... RIP: 0010:idpf_features_check+0x6d/0xe0 [idpf] Call Trace: ? __die+0x23/0x70 ? page_fault_oops+0x154/0x520 ? exc_page_fault+0x76/0x190 ? asm_exc_page_fault+0x26/0x30 ? idpf_features_check+0x6d/0xe0 [idpf] netif_skb_features+0x88/0x310 validate_xmit_skb+0x2a/0x2b0 validate_xmit_skb_list+0x4c/0x70 sch_direct_xmit+0x19d/0x3a0 __dev_queue_xmit+0xb74/0xe70 ... Fixes: a251eee62133 ("idpf: add SRIOV support and other ndo_ops") Reviewed-by: Madhu Chititm Signed-off-by: Pavan Kumar Linga Reviewed-by: Simon Horman Tested-by: Samuel Salin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf.h | 2 ++ drivers/net/ethernet/intel/idpf/idpf_lib.c | 10 ++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index aef0e9775a3305..70dbf80f3bb75b 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -143,6 +143,7 @@ enum idpf_vport_state { * @vport_id: Vport identifier * @link_speed_mbps: Link speed in mbps * @vport_idx: Relative vport index + * @max_tx_hdr_size: Max header length hardware can support * @state: See enum idpf_vport_state * @netstats: Packet and byte stats * @stats_lock: Lock to protect stats update @@ -153,6 +154,7 @@ struct idpf_netdev_priv { u32 vport_id; u32 link_speed_mbps; u16 vport_idx; + u16 max_tx_hdr_size; enum idpf_vport_state state; struct rtnl_link_stats64 netstats; spinlock_t stats_lock; diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 82f09b4030bce2..3a033ce19cda23 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -723,6 +723,7 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) np->vport = vport; np->vport_idx = vport->idx; np->vport_id = vport->vport_id; + np->max_tx_hdr_size = idpf_get_max_tx_hdr_size(adapter); vport->netdev = netdev; return idpf_init_mac_addr(vport, netdev); @@ -740,6 +741,7 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) np->adapter = adapter; np->vport_idx = vport->idx; np->vport_id = vport->vport_id; + np->max_tx_hdr_size = idpf_get_max_tx_hdr_size(adapter); spin_lock_init(&np->stats_lock); @@ -2203,8 +2205,8 @@ static netdev_features_t idpf_features_check(struct sk_buff *skb, struct net_device *netdev, netdev_features_t features) { - struct idpf_vport *vport = idpf_netdev_to_vport(netdev); - struct idpf_adapter *adapter = vport->adapter; + struct idpf_netdev_priv *np = netdev_priv(netdev); + u16 max_tx_hdr_size = np->max_tx_hdr_size; size_t len; /* No point in doing any of this if neither checksum nor GSO are @@ -2227,7 +2229,7 @@ static netdev_features_t idpf_features_check(struct sk_buff *skb, goto unsupported; len = skb_network_header_len(skb); - if (unlikely(len > idpf_get_max_tx_hdr_size(adapter))) + if (unlikely(len > max_tx_hdr_size)) goto unsupported; if (!skb->encapsulation) @@ -2240,7 +2242,7 @@ static netdev_features_t idpf_features_check(struct sk_buff *skb, /* IPLEN can support at most 127 dwords */ len = skb_inner_network_header_len(skb); - if (unlikely(len > idpf_get_max_tx_hdr_size(adapter))) + if (unlikely(len > max_tx_hdr_size)) goto unsupported; /* No need to validate L4LEN as TCP is the only protocol with a From ca39500f6af9cfe6823dc5aa8fbaed788d6e35b2 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 5 May 2025 15:49:59 -0700 Subject: [PATCH 1900/2924] Input: synaptics-rmi - fix crash with unsupported versions of F34 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sysfs interface for updating firmware for RMI devices is available even when F34 probe fails. The code checks for presence of F34 "container" pointer and then tries to use the function data attached to the sub-device. F34 assigns the function data early, before it knows if probe will succeed, leaving behind a stale pointer. Fix this by expanding checks to not only test for presence of F34 "container" but also check if there is driver data assigned to the sub-device, and call dev_set_drvdata() only after we are certain that probe is successful. This is not a complete fix, since F34 will be freed during firmware update, so there is still a race when fetching and accessing this pointer. This race will be addressed in follow-up changes. Reported-by: Hanno Böck Fixes: 29fd0ec2bdbe ("Input: synaptics-rmi4 - add support for F34 device reflash") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/aBlAl6sGulam-Qcx@google.com Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/rmi_f34.c | 135 ++++++++++++++++++++--------------- 1 file changed, 76 insertions(+), 59 deletions(-) diff --git a/drivers/input/rmi4/rmi_f34.c b/drivers/input/rmi4/rmi_f34.c index d760af4cc12efb..f1947f03b06af6 100644 --- a/drivers/input/rmi4/rmi_f34.c +++ b/drivers/input/rmi4/rmi_f34.c @@ -4,6 +4,7 @@ * Copyright (C) 2016 Zodiac Inflight Innovations */ +#include "linux/device.h" #include #include #include @@ -289,39 +290,30 @@ static int rmi_f34_update_firmware(struct f34_data *f34, return rmi_f34_flash_firmware(f34, syn_fw); } -static int rmi_f34_status(struct rmi_function *fn) -{ - struct f34_data *f34 = dev_get_drvdata(&fn->dev); - - /* - * The status is the percentage complete, or once complete, - * zero for success or a negative return code. - */ - return f34->update_status; -} - static ssize_t rmi_driver_bootloader_id_show(struct device *dev, struct device_attribute *dattr, char *buf) { struct rmi_driver_data *data = dev_get_drvdata(dev); - struct rmi_function *fn = data->f34_container; + struct rmi_function *fn; struct f34_data *f34; - if (fn) { - f34 = dev_get_drvdata(&fn->dev); - - if (f34->bl_version == 5) - return sysfs_emit(buf, "%c%c\n", - f34->bootloader_id[0], - f34->bootloader_id[1]); - else - return sysfs_emit(buf, "V%d.%d\n", - f34->bootloader_id[1], - f34->bootloader_id[0]); - } + fn = data->f34_container; + if (!fn) + return -ENODEV; - return 0; + f34 = dev_get_drvdata(&fn->dev); + if (!f34) + return -ENODEV; + + if (f34->bl_version == 5) + return sysfs_emit(buf, "%c%c\n", + f34->bootloader_id[0], + f34->bootloader_id[1]); + else + return sysfs_emit(buf, "V%d.%d\n", + f34->bootloader_id[1], + f34->bootloader_id[0]); } static DEVICE_ATTR(bootloader_id, 0444, rmi_driver_bootloader_id_show, NULL); @@ -334,13 +326,16 @@ static ssize_t rmi_driver_configuration_id_show(struct device *dev, struct rmi_function *fn = data->f34_container; struct f34_data *f34; - if (fn) { - f34 = dev_get_drvdata(&fn->dev); + fn = data->f34_container; + if (!fn) + return -ENODEV; + + f34 = dev_get_drvdata(&fn->dev); + if (!f34) + return -ENODEV; - return sysfs_emit(buf, "%s\n", f34->configuration_id); - } - return 0; + return sysfs_emit(buf, "%s\n", f34->configuration_id); } static DEVICE_ATTR(configuration_id, 0444, @@ -356,10 +351,14 @@ static int rmi_firmware_update(struct rmi_driver_data *data, if (!data->f34_container) { dev_warn(dev, "%s: No F34 present!\n", __func__); - return -EINVAL; + return -ENODEV; } f34 = dev_get_drvdata(&data->f34_container->dev); + if (!f34) { + dev_warn(dev, "%s: No valid F34 present!\n", __func__); + return -ENODEV; + } if (f34->bl_version >= 7) { if (data->pdt_props & HAS_BSR) { @@ -485,10 +484,18 @@ static ssize_t rmi_driver_update_fw_status_show(struct device *dev, char *buf) { struct rmi_driver_data *data = dev_get_drvdata(dev); - int update_status = 0; + struct f34_data *f34; + int update_status = -ENODEV; - if (data->f34_container) - update_status = rmi_f34_status(data->f34_container); + /* + * The status is the percentage complete, or once complete, + * zero for success or a negative return code. + */ + if (data->f34_container) { + f34 = dev_get_drvdata(&data->f34_container->dev); + if (f34) + update_status = f34->update_status; + } return sysfs_emit(buf, "%d\n", update_status); } @@ -508,33 +515,21 @@ static const struct attribute_group rmi_firmware_attr_group = { .attrs = rmi_firmware_attrs, }; -static int rmi_f34_probe(struct rmi_function *fn) +static int rmi_f34v5_probe(struct f34_data *f34) { - struct f34_data *f34; - unsigned char f34_queries[9]; + struct rmi_function *fn = f34->fn; + u8 f34_queries[9]; bool has_config_id; - u8 version = fn->fd.function_version; - int ret; - - f34 = devm_kzalloc(&fn->dev, sizeof(struct f34_data), GFP_KERNEL); - if (!f34) - return -ENOMEM; - - f34->fn = fn; - dev_set_drvdata(&fn->dev, f34); - - /* v5 code only supported version 0, try V7 probe */ - if (version > 0) - return rmi_f34v7_probe(f34); + int error; f34->bl_version = 5; - ret = rmi_read_block(fn->rmi_dev, fn->fd.query_base_addr, - f34_queries, sizeof(f34_queries)); - if (ret) { + error = rmi_read_block(fn->rmi_dev, fn->fd.query_base_addr, + f34_queries, sizeof(f34_queries)); + if (error) { dev_err(&fn->dev, "%s: Failed to query properties\n", __func__); - return ret; + return error; } snprintf(f34->bootloader_id, sizeof(f34->bootloader_id), @@ -560,11 +555,11 @@ static int rmi_f34_probe(struct rmi_function *fn) f34->v5.config_blocks); if (has_config_id) { - ret = rmi_read_block(fn->rmi_dev, fn->fd.control_base_addr, - f34_queries, sizeof(f34_queries)); - if (ret) { + error = rmi_read_block(fn->rmi_dev, fn->fd.control_base_addr, + f34_queries, sizeof(f34_queries)); + if (error) { dev_err(&fn->dev, "Failed to read F34 config ID\n"); - return ret; + return error; } snprintf(f34->configuration_id, sizeof(f34->configuration_id), @@ -573,12 +568,34 @@ static int rmi_f34_probe(struct rmi_function *fn) f34_queries[2], f34_queries[3]); rmi_dbg(RMI_DEBUG_FN, &fn->dev, "Configuration ID: %s\n", - f34->configuration_id); + f34->configuration_id); } return 0; } +static int rmi_f34_probe(struct rmi_function *fn) +{ + struct f34_data *f34; + u8 version = fn->fd.function_version; + int error; + + f34 = devm_kzalloc(&fn->dev, sizeof(struct f34_data), GFP_KERNEL); + if (!f34) + return -ENOMEM; + + f34->fn = fn; + + /* v5 code only supported version 0 */ + error = version == 0 ? rmi_f34v5_probe(f34) : rmi_f34v7_probe(f34); + if (error) + return error; + + dev_set_drvdata(&fn->dev, f34); + + return 0; +} + int rmi_f34_create_sysfs(struct rmi_device *rmi_dev) { return sysfs_create_group(&rmi_dev->dev.kobj, &rmi_firmware_attr_group); From cf948c8e274e8b406e846cdf6cc48fe47f98cf57 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Mon, 19 May 2025 15:09:01 +0800 Subject: [PATCH 1901/2924] thermal: intel: x86_pkg_temp_thermal: Fix bogus trip temperature The tj_max value obtained from the Intel TCC library are in Celsius, whereas the thermal subsystem operates in milli-Celsius. This discrepancy leads to incorrect trip temperature calculations. Fix bogus trip temperature by converting tj_max to milli-Celsius Unit. Fixes: 8ef0ca4a177d ("Merge back other thermal control material for 6.3.") Signed-off-by: Zhang Rui Reported-by: zhang ning Closes: https://lore.kernel.org/all/TY2PR01MB3786EF0FE24353026293F5ACCD97A@TY2PR01MB3786.jpnprd01.prod.outlook.com/ Tested-by: zhang ning Cc: 6.3+ # 6.3+ Link: https://patch.msgid.link/20250519070901.1031233-1-rui.zhang@intel.com Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/x86_pkg_temp_thermal.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/thermal/intel/x86_pkg_temp_thermal.c b/drivers/thermal/intel/x86_pkg_temp_thermal.c index 496abf8e55e0d5..2841d14914b710 100644 --- a/drivers/thermal/intel/x86_pkg_temp_thermal.c +++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c @@ -329,6 +329,7 @@ static int pkg_temp_thermal_device_add(unsigned int cpu) tj_max = intel_tcc_get_tjmax(cpu); if (tj_max < 0) return tj_max; + tj_max *= 1000; zonedev = kzalloc(sizeof(*zonedev), GFP_KERNEL); if (!zonedev) From e48f9d849bfdec276eebf782a84fd4dfbe1c14c0 Mon Sep 17 00:00:00 2001 From: Wang Zhaolong Date: Fri, 16 May 2025 17:12:56 +0800 Subject: [PATCH 1902/2924] smb: client: Reset all search buffer pointers when releasing buffer Multiple pointers in struct cifs_search_info (ntwrk_buf_start, srch_entries_start, and last_entry) point to the same allocated buffer. However, when freeing this buffer, only ntwrk_buf_start was set to NULL, while the other pointers remained pointing to freed memory. This is defensive programming to prevent potential issues with stale pointers. While the active UAF vulnerability is fixed by the previous patch, this change ensures consistent pointer state and more robust error handling. Signed-off-by: Wang Zhaolong Cc: stable@vger.kernel.org Reviewed-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/readdir.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index 67d7dd64b5e2e2..787d6bcb5d1dc4 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -733,7 +733,10 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos, else cifs_buf_release(cfile->srch_inf. ntwrk_buf_start); + /* Reset all pointers to the network buffer to prevent stale references */ cfile->srch_inf.ntwrk_buf_start = NULL; + cfile->srch_inf.srch_entries_start = NULL; + cfile->srch_inf.last_entry = NULL; } rc = initiate_cifs_search(xid, file, full_path); if (rc) { From 1f4bbedd4e5a69b01cde2cc21d01151ab2d0884f Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 8 May 2025 16:46:11 +0900 Subject: [PATCH 1903/2924] ksmbd: fix stream write failure If there is no stream data in file, v_len is zero. So, If position(*pos) is zero, stream write will fail due to stream write position validation check. This patch reorganize stream write position validation. Fixes: 0ca6df4f40cf ("ksmbd: prevent out-of-bounds stream writes by validating *pos") Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/vfs.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 482eba0f4dc11a..156ded9ac889bd 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -409,10 +409,15 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos, ksmbd_debug(VFS, "write stream data pos : %llu, count : %zd\n", *pos, count); + if (*pos >= XATTR_SIZE_MAX) { + pr_err("stream write position %lld is out of bounds\n", *pos); + return -EINVAL; + } + size = *pos + count; if (size > XATTR_SIZE_MAX) { size = XATTR_SIZE_MAX; - count = (*pos + count) - XATTR_SIZE_MAX; + count = XATTR_SIZE_MAX - *pos; } v_len = ksmbd_vfs_getcasexattr(idmap, @@ -426,13 +431,6 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos, goto out; } - if (v_len <= *pos) { - pr_err("stream write position %lld is out of bounds (stream length: %zd)\n", - *pos, v_len); - err = -EINVAL; - goto out; - } - if (v_len < size) { wbuf = kvzalloc(size, KSMBD_DEFAULT_GFP); if (!wbuf) { From 8d70503068510e6080c2c649cccb154f16de26c9 Mon Sep 17 00:00:00 2001 From: Ed Burcher Date: Mon, 19 May 2025 23:49:07 +0100 Subject: [PATCH 1904/2924] ALSA: hda/realtek: Add quirk for Lenovo Yoga Pro 7 14ASP10 Lenovo Yoga Pro 7 (gen 10) with Realtek ALC3306 and combined CS35L56 amplifiers need quirk ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN to enable bass Signed-off-by: Ed Burcher Cc: Link: https://patch.msgid.link/20250519224907.31265-2-git@edburcher.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index dcfaddc3ae133f..a426d9882702a5 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -11303,6 +11303,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x38fa, "Thinkbook 16P Gen5", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), SND_PCI_QUIRK(0x17aa, 0x38fd, "ThinkBook plus Gen5 Hybrid", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), + SND_PCI_QUIRK(0x17aa, 0x390d, "Lenovo Yoga Pro 7 14ASP10", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), SND_PCI_QUIRK(0x17aa, 0x3913, "Lenovo 145", ALC236_FIXUP_LENOVO_INV_DMIC), SND_PCI_QUIRK(0x17aa, 0x391f, "Yoga S990-16 pro Quad YC Quad", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x3920, "Yoga S990-16 pro Quad VECO Quad", ALC287_FIXUP_TAS2781_I2C), From 29e4e6b4235fefa5930affb531fe449cac330a72 Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Fri, 16 May 2025 22:33:37 -0400 Subject: [PATCH 1905/2924] platform/x86: thinkpad_acpi: Ignore battery threshold change event notification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If user modifies the battery charge threshold an ACPI event is generated. Confirmed with Lenovo FW team this is only generated on user event. As no action is needed, ignore the event and prevent spurious kernel logs. Reported-by: Derek Barbosa Closes: https://lore.kernel.org/platform-driver-x86/7e9a1c47-5d9c-4978-af20-3949d53fb5dc@app.fastmail.com/T/#m5f5b9ae31d3fbf30d7d9a9d76c15fb3502dfd903 Signed-off-by: Mark Pearson Reviewed-by: Hans de Goede Reviewed-by: Armin Wolf Link: https://lore.kernel.org/r/20250517023348.2962591-1-mpearson-lenovo@squebb.ca Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/thinkpad_acpi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 92b21e49faf62b..657625dd60a06c 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -231,6 +231,7 @@ enum tpacpi_hkey_event_t { /* Thermal events */ TP_HKEY_EV_ALARM_BAT_HOT = 0x6011, /* battery too hot */ TP_HKEY_EV_ALARM_BAT_XHOT = 0x6012, /* battery critically hot */ + TP_HKEY_EV_ALARM_BAT_LIM_CHANGE = 0x6013, /* battery charge limit changed*/ TP_HKEY_EV_ALARM_SENSOR_HOT = 0x6021, /* sensor too hot */ TP_HKEY_EV_ALARM_SENSOR_XHOT = 0x6022, /* sensor critically hot */ TP_HKEY_EV_THM_TABLE_CHANGED = 0x6030, /* windows; thermal table changed */ @@ -3777,6 +3778,10 @@ static bool hotkey_notify_6xxx(const u32 hkey, bool *send_acpi_ev) pr_alert("THERMAL EMERGENCY: battery is extremely hot!\n"); /* recommended action: immediate sleep/hibernate */ break; + case TP_HKEY_EV_ALARM_BAT_LIM_CHANGE: + pr_debug("Battery Info: battery charge threshold changed\n"); + /* User changed charging threshold. No action needed */ + return true; case TP_HKEY_EV_ALARM_SENSOR_HOT: pr_crit("THERMAL ALARM: a sensor reports something is too hot!\n"); /* recommended action: warn user through gui, that */ From 8508427a6e21c1ef01ae4c9f4e2675fc99deb949 Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Mon, 19 May 2025 20:50:18 -0400 Subject: [PATCH 1906/2924] platform/x86: think-lmi: Fix attribute name usage for non-compliant items MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A few, quite rare, WMI attributes have names that are not compatible with filenames, e.g. "Intel VT for Directed I/O (VT-d)". For these cases the '/' gets replaced with '\' for display, but doesn't get switched again when doing the WMI access. Fix this by keeping the original attribute name and using that for sending commands to the BIOS Fixes: a40cd7ef22fb ("platform/x86: think-lmi: Add WMI interface support on Lenovo platforms") Signed-off-by: Mark Pearson Link: https://lore.kernel.org/r/20250520005027.3840705-1-mpearson-lenovo@squebb.ca Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/think-lmi.c | 26 ++++++++++++++------------ drivers/platform/x86/think-lmi.h | 1 + 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index 0fc275e461be40..00b1e7c79a3d1e 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -1061,8 +1061,8 @@ static ssize_t current_value_store(struct kobject *kobj, ret = -EINVAL; goto out; } - set_str = kasprintf(GFP_KERNEL, "%s,%s,%s", setting->display_name, - new_setting, tlmi_priv.pwd_admin->signature); + set_str = kasprintf(GFP_KERNEL, "%s,%s,%s", setting->name, + new_setting, tlmi_priv.pwd_admin->signature); if (!set_str) { ret = -ENOMEM; goto out; @@ -1092,7 +1092,7 @@ static ssize_t current_value_store(struct kobject *kobj, goto out; } - set_str = kasprintf(GFP_KERNEL, "%s,%s;", setting->display_name, + set_str = kasprintf(GFP_KERNEL, "%s,%s;", setting->name, new_setting); if (!set_str) { ret = -ENOMEM; @@ -1120,11 +1120,11 @@ static ssize_t current_value_store(struct kobject *kobj, } if (auth_str) - set_str = kasprintf(GFP_KERNEL, "%s,%s,%s", setting->display_name, - new_setting, auth_str); + set_str = kasprintf(GFP_KERNEL, "%s,%s,%s", setting->name, + new_setting, auth_str); else - set_str = kasprintf(GFP_KERNEL, "%s,%s;", setting->display_name, - new_setting); + set_str = kasprintf(GFP_KERNEL, "%s,%s;", setting->name, + new_setting); if (!set_str) { ret = -ENOMEM; goto out; @@ -1629,9 +1629,6 @@ static int tlmi_analyze(struct wmi_device *wdev) continue; } - /* It is not allowed to have '/' for file name. Convert it into '\'. */ - strreplace(item, '/', '\\'); - /* Remove the value part */ strreplace(item, ',', '\0'); @@ -1644,11 +1641,16 @@ static int tlmi_analyze(struct wmi_device *wdev) } setting->wdev = wdev; setting->index = i; + + strscpy(setting->name, item); + /* It is not allowed to have '/' for file name. Convert it into '\'. */ + strreplace(item, '/', '\\'); strscpy(setting->display_name, item); + /* If BIOS selections supported, load those */ if (tlmi_priv.can_get_bios_selections) { - ret = tlmi_get_bios_selections(setting->display_name, - &setting->possible_values); + ret = tlmi_get_bios_selections(setting->name, + &setting->possible_values); if (ret || !setting->possible_values) pr_info("Error retrieving possible values for %d : %s\n", i, setting->display_name); diff --git a/drivers/platform/x86/think-lmi.h b/drivers/platform/x86/think-lmi.h index a8045248222729..9b014644d31610 100644 --- a/drivers/platform/x86/think-lmi.h +++ b/drivers/platform/x86/think-lmi.h @@ -90,6 +90,7 @@ struct tlmi_attr_setting { struct kobject kobj; struct wmi_device *wdev; int index; + char name[TLMI_SETTINGS_MAXLEN]; char display_name[TLMI_SETTINGS_MAXLEN]; char *possible_values; }; From 7150d57c370f9e61b7d0e82c58002f1c5a205ac4 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Tue, 20 May 2025 13:47:43 +0100 Subject: [PATCH 1907/2924] ALSA: hda/realtek: Add support for HP Agusta using CS35L41 HDA Add support for HP Agusta. Laptops use 2 CS35L41 Amps with HDA, using Internal boost, with I2C Signed-off-by: Stefan Binding Cc: Link: https://patch.msgid.link/20250520124757.12597-1-sbinding@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a426d9882702a5..69788dd9a1ec95 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10888,6 +10888,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8e2c, "HP EliteBook 16 G12", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8e36, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e37, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x8e3a, "HP Agusta", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x8e3b, "HP Agusta", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e60, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e61, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e62, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2), From f709b78aecab519dbcefa9a6603b94ad18c553e3 Mon Sep 17 00:00:00 2001 From: Chris Chiu Date: Tue, 20 May 2025 21:21:01 +0800 Subject: [PATCH 1908/2924] ALSA: hda/realtek - Add new HP ZBook laptop with micmute led fixup New HP ZBook with Realtek HDA codec ALC3247 needs the quirk ALC236_FIXUP_HP_GPIO_LED to fix the micmute LED. Signed-off-by: Chris Chiu Cc: Link: https://patch.msgid.link/20250520132101.120685-1-chris.chiu@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 69788dd9a1ec95..20ab1fb2195ff6 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10885,6 +10885,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8e1a, "HP ZBook Firefly 14 G12A", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A), SND_PCI_QUIRK(0x103c, 0x8e1b, "HP EliteBook G12", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A), SND_PCI_QUIRK(0x103c, 0x8e1c, "HP EliteBook G12", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A), + SND_PCI_QUIRK(0x103c, 0x8e1d, "HP ZBook X Gli 16 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8e2c, "HP EliteBook 16 G12", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8e36, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e37, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), From 219bf6edd7efcea9eca53c44c8dc3d1c6437f8b8 Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Tue, 13 May 2025 20:17:45 -0400 Subject: [PATCH 1909/2924] orangefs: adjust counting code to recover from 665575cf A late commit to 6.14-rc7! broke orangefs. 665575cf seems like a good change, but maybe should have been introduced during the merge window. This patch adjusts the counting code associated with writing out pages so that orangefs works in a 665575cf world. Signed-off-by: Mike Marshall --- fs/orangefs/inode.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 5ac743c6bc2ed5..08a6f372a352f8 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -32,12 +32,13 @@ static int orangefs_writepage_locked(struct folio *folio, len = i_size_read(inode); if (folio->private) { wr = folio->private; - WARN_ON(wr->pos >= len); off = wr->pos; - if (off + wr->len > len) + if ((off + wr->len > len) && (off <= len)) wlen = len - off; else wlen = wr->len; + if (wlen == 0) + wlen = wr->len; } else { WARN_ON(1); off = folio_pos(folio); @@ -46,8 +47,6 @@ static int orangefs_writepage_locked(struct folio *folio, if (wlen > len - off) wlen = len - off; } - /* Should've been handled in orangefs_invalidate_folio. */ - WARN_ON(off == len || off + wlen > len); WARN_ON(wlen == 0); bvec_set_folio(&bv, folio, wlen, offset_in_folio(folio, off)); @@ -320,6 +319,8 @@ static int orangefs_write_begin(struct file *file, wr->len += len; goto okay; } else { + wr->pos = pos; + wr->len = len; ret = orangefs_launder_folio(folio); if (ret) return ret; From 355341e4359b2d5edf0ed5e117f7e9e7a0a5dac0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 May 2025 15:54:20 +0200 Subject: [PATCH 1910/2924] loop: don't require ->write_iter for writable files in loop_configure Block devices can be opened read-write even if they can't be written to for historic reasons. Remove the check requiring file->f_op->write_iter when the block devices was opened in loop_configure. The call to loop_check_backing_file just below ensures the ->write_iter is present for backing files opened for writing, which is the only check that is actually needed. Fixes: f5c84eff634b ("loop: Add sanity check for read/write_iter") Reported-by: Christian Hesse Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250520135420.1177312-1-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/loop.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index b8ba7de0875395..e2b1f377f58563 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -979,9 +979,6 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode, if (!file) return -EBADF; - if ((mode & BLK_OPEN_WRITE) && !file->f_op->write_iter) - return -EINVAL; - error = loop_check_backing_file(file); if (error) return error; From 9176bd205ee0b2cd35073a9973c2a0936bcb579e Mon Sep 17 00:00:00 2001 From: Axel Forsman Date: Tue, 20 May 2025 13:43:30 +0200 Subject: [PATCH 1911/2924] can: kvaser_pciefd: Force IRQ edge in case of nested IRQ Avoid the driver missing IRQs by temporarily masking IRQs in the ISR to enforce an edge even if a different IRQ is signalled before handled IRQs are cleared. Fixes: 48f827d4f48f ("can: kvaser_pciefd: Move reset of DMA RX buffers to the end of the ISR") Cc: stable@vger.kernel.org Signed-off-by: Axel Forsman Tested-by: Jimmy Assarsson Reviewed-by: Jimmy Assarsson Link: https://patch.msgid.link/20250520114332.8961-2-axfo@kvaser.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/kvaser_pciefd.c | 81 ++++++++++++++++----------------- 1 file changed, 38 insertions(+), 43 deletions(-) diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index cf0d5180527232..9cc9176c205815 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -1646,24 +1646,28 @@ static int kvaser_pciefd_read_buffer(struct kvaser_pciefd *pcie, int dma_buf) return res; } -static u32 kvaser_pciefd_receive_irq(struct kvaser_pciefd *pcie) +static void kvaser_pciefd_receive_irq(struct kvaser_pciefd *pcie) { + void __iomem *srb_cmd_reg = KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG; u32 irq = ioread32(KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IRQ_REG); - if (irq & KVASER_PCIEFD_SRB_IRQ_DPD0) + iowrite32(irq, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IRQ_REG); + + if (irq & KVASER_PCIEFD_SRB_IRQ_DPD0) { kvaser_pciefd_read_buffer(pcie, 0); + iowrite32(KVASER_PCIEFD_SRB_CMD_RDB0, srb_cmd_reg); /* Rearm buffer */ + } - if (irq & KVASER_PCIEFD_SRB_IRQ_DPD1) + if (irq & KVASER_PCIEFD_SRB_IRQ_DPD1) { kvaser_pciefd_read_buffer(pcie, 1); + iowrite32(KVASER_PCIEFD_SRB_CMD_RDB1, srb_cmd_reg); /* Rearm buffer */ + } if (unlikely(irq & KVASER_PCIEFD_SRB_IRQ_DOF0 || irq & KVASER_PCIEFD_SRB_IRQ_DOF1 || irq & KVASER_PCIEFD_SRB_IRQ_DUF0 || irq & KVASER_PCIEFD_SRB_IRQ_DUF1)) dev_err(&pcie->pci->dev, "DMA IRQ error 0x%08X\n", irq); - - iowrite32(irq, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IRQ_REG); - return irq; } static void kvaser_pciefd_transmit_irq(struct kvaser_pciefd_can *can) @@ -1691,29 +1695,22 @@ static irqreturn_t kvaser_pciefd_irq_handler(int irq, void *dev) struct kvaser_pciefd *pcie = (struct kvaser_pciefd *)dev; const struct kvaser_pciefd_irq_mask *irq_mask = pcie->driver_data->irq_mask; u32 pci_irq = ioread32(KVASER_PCIEFD_PCI_IRQ_ADDR(pcie)); - u32 srb_irq = 0; - u32 srb_release = 0; int i; if (!(pci_irq & irq_mask->all)) return IRQ_NONE; + iowrite32(0, KVASER_PCIEFD_PCI_IEN_ADDR(pcie)); + if (pci_irq & irq_mask->kcan_rx0) - srb_irq = kvaser_pciefd_receive_irq(pcie); + kvaser_pciefd_receive_irq(pcie); for (i = 0; i < pcie->nr_channels; i++) { if (pci_irq & irq_mask->kcan_tx[i]) kvaser_pciefd_transmit_irq(pcie->can[i]); } - if (srb_irq & KVASER_PCIEFD_SRB_IRQ_DPD0) - srb_release |= KVASER_PCIEFD_SRB_CMD_RDB0; - - if (srb_irq & KVASER_PCIEFD_SRB_IRQ_DPD1) - srb_release |= KVASER_PCIEFD_SRB_CMD_RDB1; - - if (srb_release) - iowrite32(srb_release, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG); + iowrite32(irq_mask->all, KVASER_PCIEFD_PCI_IEN_ADDR(pcie)); return IRQ_HANDLED; } @@ -1733,13 +1730,22 @@ static void kvaser_pciefd_teardown_can_ctrls(struct kvaser_pciefd *pcie) } } +static void kvaser_pciefd_disable_irq_srcs(struct kvaser_pciefd *pcie) +{ + unsigned int i; + + /* Masking PCI_IRQ is insufficient as running ISR will unmask it */ + iowrite32(0, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IEN_REG); + for (i = 0; i < pcie->nr_channels; ++i) + iowrite32(0, pcie->can[i]->reg_base + KVASER_PCIEFD_KCAN_IEN_REG); +} + static int kvaser_pciefd_probe(struct pci_dev *pdev, const struct pci_device_id *id) { int ret; struct kvaser_pciefd *pcie; const struct kvaser_pciefd_irq_mask *irq_mask; - void __iomem *irq_en_base; pcie = devm_kzalloc(&pdev->dev, sizeof(*pcie), GFP_KERNEL); if (!pcie) @@ -1805,8 +1811,7 @@ static int kvaser_pciefd_probe(struct pci_dev *pdev, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IEN_REG); /* Enable PCI interrupts */ - irq_en_base = KVASER_PCIEFD_PCI_IEN_ADDR(pcie); - iowrite32(irq_mask->all, irq_en_base); + iowrite32(irq_mask->all, KVASER_PCIEFD_PCI_IEN_ADDR(pcie)); /* Ready the DMA buffers */ iowrite32(KVASER_PCIEFD_SRB_CMD_RDB0, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG); @@ -1820,8 +1825,7 @@ static int kvaser_pciefd_probe(struct pci_dev *pdev, return 0; err_free_irq: - /* Disable PCI interrupts */ - iowrite32(0, irq_en_base); + kvaser_pciefd_disable_irq_srcs(pcie); free_irq(pcie->pci->irq, pcie); err_pci_free_irq_vectors: @@ -1844,35 +1848,26 @@ static int kvaser_pciefd_probe(struct pci_dev *pdev, return ret; } -static void kvaser_pciefd_remove_all_ctrls(struct kvaser_pciefd *pcie) -{ - int i; - - for (i = 0; i < pcie->nr_channels; i++) { - struct kvaser_pciefd_can *can = pcie->can[i]; - - if (can) { - iowrite32(0, can->reg_base + KVASER_PCIEFD_KCAN_IEN_REG); - unregister_candev(can->can.dev); - timer_delete(&can->bec_poll_timer); - kvaser_pciefd_pwm_stop(can); - free_candev(can->can.dev); - } - } -} - static void kvaser_pciefd_remove(struct pci_dev *pdev) { struct kvaser_pciefd *pcie = pci_get_drvdata(pdev); + unsigned int i; - kvaser_pciefd_remove_all_ctrls(pcie); + for (i = 0; i < pcie->nr_channels; ++i) { + struct kvaser_pciefd_can *can = pcie->can[i]; - /* Disable interrupts */ - iowrite32(0, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CTRL_REG); - iowrite32(0, KVASER_PCIEFD_PCI_IEN_ADDR(pcie)); + unregister_candev(can->can.dev); + timer_delete(&can->bec_poll_timer); + kvaser_pciefd_pwm_stop(can); + } + kvaser_pciefd_disable_irq_srcs(pcie); free_irq(pcie->pci->irq, pcie); pci_free_irq_vectors(pcie->pci); + + for (i = 0; i < pcie->nr_channels; ++i) + free_candev(pcie->can[i]->can.dev); + pci_iounmap(pdev, pcie->reg_base); pci_release_regions(pdev); pci_disable_device(pdev); From 8256e0ca601051933e9395746817f3801fa9a6bf Mon Sep 17 00:00:00 2001 From: Axel Forsman Date: Tue, 20 May 2025 13:43:31 +0200 Subject: [PATCH 1912/2924] can: kvaser_pciefd: Fix echo_skb race The functions kvaser_pciefd_start_xmit() and kvaser_pciefd_handle_ack_packet() raced to stop/wake TX queues and get/put echo skbs, as kvaser_pciefd_can->echo_lock was only ever taken when transmitting and KCAN_TX_NR_PACKETS_CURRENT gets decremented prior to handling of ACKs. E.g., this caused the following error: can_put_echo_skb: BUG! echo_skb 5 is occupied! Instead, use the synchronization helpers in netdev_queues.h. As those piggyback on BQL barriers, start updating in-flight packets and bytes counts as well. Cc: stable@vger.kernel.org Signed-off-by: Axel Forsman Tested-by: Jimmy Assarsson Reviewed-by: Jimmy Assarsson Link: https://patch.msgid.link/20250520114332.8961-3-axfo@kvaser.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/kvaser_pciefd.c | 93 +++++++++++++++++++++------------ 1 file changed, 59 insertions(+), 34 deletions(-) diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index 9cc9176c205815..a61cbade96d94d 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -16,6 +16,7 @@ #include #include #include +#include MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Kvaser AB "); @@ -410,10 +411,13 @@ struct kvaser_pciefd_can { void __iomem *reg_base; struct can_berr_counter bec; u8 cmd_seq; + u8 tx_max_count; + u8 tx_idx; + u8 ack_idx; int err_rep_cnt; - int echo_idx; + unsigned int completed_tx_pkts; + unsigned int completed_tx_bytes; spinlock_t lock; /* Locks sensitive registers (e.g. MODE) */ - spinlock_t echo_lock; /* Locks the message echo buffer */ struct timer_list bec_poll_timer; struct completion start_comp, flush_comp; }; @@ -714,6 +718,9 @@ static int kvaser_pciefd_open(struct net_device *netdev) int ret; struct kvaser_pciefd_can *can = netdev_priv(netdev); + can->tx_idx = 0; + can->ack_idx = 0; + ret = open_candev(netdev); if (ret) return ret; @@ -745,21 +752,26 @@ static int kvaser_pciefd_stop(struct net_device *netdev) timer_delete(&can->bec_poll_timer); } can->can.state = CAN_STATE_STOPPED; + netdev_reset_queue(netdev); close_candev(netdev); return ret; } +static unsigned int kvaser_pciefd_tx_avail(const struct kvaser_pciefd_can *can) +{ + return can->tx_max_count - (READ_ONCE(can->tx_idx) - READ_ONCE(can->ack_idx)); +} + static int kvaser_pciefd_prepare_tx_packet(struct kvaser_pciefd_tx_packet *p, - struct kvaser_pciefd_can *can, + struct can_priv *can, u8 seq, struct sk_buff *skb) { struct canfd_frame *cf = (struct canfd_frame *)skb->data; int packet_size; - int seq = can->echo_idx; memset(p, 0, sizeof(*p)); - if (can->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT) + if (can->ctrlmode & CAN_CTRLMODE_ONE_SHOT) p->header[1] |= KVASER_PCIEFD_TPACKET_SMS; if (cf->can_id & CAN_RTR_FLAG) @@ -782,7 +794,7 @@ static int kvaser_pciefd_prepare_tx_packet(struct kvaser_pciefd_tx_packet *p, } else { p->header[1] |= FIELD_PREP(KVASER_PCIEFD_RPACKET_DLC_MASK, - can_get_cc_dlc((struct can_frame *)cf, can->can.ctrlmode)); + can_get_cc_dlc((struct can_frame *)cf, can->ctrlmode)); } p->header[1] |= FIELD_PREP(KVASER_PCIEFD_PACKET_SEQ_MASK, seq); @@ -797,22 +809,24 @@ static netdev_tx_t kvaser_pciefd_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct kvaser_pciefd_can *can = netdev_priv(netdev); - unsigned long irq_flags; struct kvaser_pciefd_tx_packet packet; + unsigned int seq = can->tx_idx & (can->can.echo_skb_max - 1); + unsigned int frame_len; int nr_words; - u8 count; if (can_dev_dropped_skb(netdev, skb)) return NETDEV_TX_OK; + if (!netif_subqueue_maybe_stop(netdev, 0, kvaser_pciefd_tx_avail(can), 1, 1)) + return NETDEV_TX_BUSY; - nr_words = kvaser_pciefd_prepare_tx_packet(&packet, can, skb); + nr_words = kvaser_pciefd_prepare_tx_packet(&packet, &can->can, seq, skb); - spin_lock_irqsave(&can->echo_lock, irq_flags); /* Prepare and save echo skb in internal slot */ - can_put_echo_skb(skb, netdev, can->echo_idx, 0); - - /* Move echo index to the next slot */ - can->echo_idx = (can->echo_idx + 1) % can->can.echo_skb_max; + WRITE_ONCE(can->can.echo_skb[seq], NULL); + frame_len = can_skb_get_frame_len(skb); + can_put_echo_skb(skb, netdev, seq, frame_len); + netdev_sent_queue(netdev, frame_len); + WRITE_ONCE(can->tx_idx, can->tx_idx + 1); /* Write header to fifo */ iowrite32(packet.header[0], @@ -836,14 +850,7 @@ static netdev_tx_t kvaser_pciefd_start_xmit(struct sk_buff *skb, KVASER_PCIEFD_KCAN_FIFO_LAST_REG); } - count = FIELD_GET(KVASER_PCIEFD_KCAN_TX_NR_PACKETS_CURRENT_MASK, - ioread32(can->reg_base + KVASER_PCIEFD_KCAN_TX_NR_PACKETS_REG)); - /* No room for a new message, stop the queue until at least one - * successful transmit - */ - if (count >= can->can.echo_skb_max || can->can.echo_skb[can->echo_idx]) - netif_stop_queue(netdev); - spin_unlock_irqrestore(&can->echo_lock, irq_flags); + netif_subqueue_maybe_stop(netdev, 0, kvaser_pciefd_tx_avail(can), 1, 1); return NETDEV_TX_OK; } @@ -970,6 +977,8 @@ static int kvaser_pciefd_setup_can_ctrls(struct kvaser_pciefd *pcie) can->kv_pcie = pcie; can->cmd_seq = 0; can->err_rep_cnt = 0; + can->completed_tx_pkts = 0; + can->completed_tx_bytes = 0; can->bec.txerr = 0; can->bec.rxerr = 0; @@ -983,11 +992,10 @@ static int kvaser_pciefd_setup_can_ctrls(struct kvaser_pciefd *pcie) tx_nr_packets_max = FIELD_GET(KVASER_PCIEFD_KCAN_TX_NR_PACKETS_MAX_MASK, ioread32(can->reg_base + KVASER_PCIEFD_KCAN_TX_NR_PACKETS_REG)); + can->tx_max_count = min(KVASER_PCIEFD_CAN_TX_MAX_COUNT, tx_nr_packets_max - 1); can->can.clock.freq = pcie->freq; - can->can.echo_skb_max = min(KVASER_PCIEFD_CAN_TX_MAX_COUNT, tx_nr_packets_max - 1); - can->echo_idx = 0; - spin_lock_init(&can->echo_lock); + can->can.echo_skb_max = roundup_pow_of_two(can->tx_max_count); spin_lock_init(&can->lock); can->can.bittiming_const = &kvaser_pciefd_bittiming_const; @@ -1510,19 +1518,21 @@ static int kvaser_pciefd_handle_ack_packet(struct kvaser_pciefd *pcie, netdev_dbg(can->can.dev, "Packet was flushed\n"); } else { int echo_idx = FIELD_GET(KVASER_PCIEFD_PACKET_SEQ_MASK, p->header[0]); - int len; - u8 count; + unsigned int len, frame_len = 0; struct sk_buff *skb; + if (echo_idx != (can->ack_idx & (can->can.echo_skb_max - 1))) + return 0; skb = can->can.echo_skb[echo_idx]; - if (skb) - kvaser_pciefd_set_skb_timestamp(pcie, skb, p->timestamp); - len = can_get_echo_skb(can->can.dev, echo_idx, NULL); - count = FIELD_GET(KVASER_PCIEFD_KCAN_TX_NR_PACKETS_CURRENT_MASK, - ioread32(can->reg_base + KVASER_PCIEFD_KCAN_TX_NR_PACKETS_REG)); + if (!skb) + return 0; + kvaser_pciefd_set_skb_timestamp(pcie, skb, p->timestamp); + len = can_get_echo_skb(can->can.dev, echo_idx, &frame_len); - if (count < can->can.echo_skb_max && netif_queue_stopped(can->can.dev)) - netif_wake_queue(can->can.dev); + /* Pairs with barrier in kvaser_pciefd_start_xmit() */ + smp_store_release(&can->ack_idx, can->ack_idx + 1); + can->completed_tx_pkts++; + can->completed_tx_bytes += frame_len; if (!one_shot_fail) { can->can.dev->stats.tx_bytes += len; @@ -1638,11 +1648,26 @@ static int kvaser_pciefd_read_buffer(struct kvaser_pciefd *pcie, int dma_buf) { int pos = 0; int res = 0; + unsigned int i; do { res = kvaser_pciefd_read_packet(pcie, &pos, dma_buf); } while (!res && pos > 0 && pos < KVASER_PCIEFD_DMA_SIZE); + /* Report ACKs in this buffer to BQL en masse for correct periods */ + for (i = 0; i < pcie->nr_channels; ++i) { + struct kvaser_pciefd_can *can = pcie->can[i]; + + if (!can->completed_tx_pkts) + continue; + netif_subqueue_completed_wake(can->can.dev, 0, + can->completed_tx_pkts, + can->completed_tx_bytes, + kvaser_pciefd_tx_avail(can), 1); + can->completed_tx_pkts = 0; + can->completed_tx_bytes = 0; + } + return res; } From 6d820b81c4dc4a4023e45c3cd6707a07dd838649 Mon Sep 17 00:00:00 2001 From: Axel Forsman Date: Tue, 20 May 2025 13:43:32 +0200 Subject: [PATCH 1913/2924] can: kvaser_pciefd: Continue parsing DMA buf after dropped RX Going bus-off on a channel doing RX could result in dropped packets. As netif_running() gets cleared before the channel abort procedure, the handling of any last RDATA packets would see netif_rx() return non-zero to signal a dropped packet. kvaser_pciefd_read_buffer() dealt with this "error" by breaking out of processing the remaining DMA RX buffer. Only return an error from kvaser_pciefd_read_buffer() due to packet corruption, otherwise handle it internally. Cc: stable@vger.kernel.org Signed-off-by: Axel Forsman Tested-by: Jimmy Assarsson Reviewed-by: Jimmy Assarsson Link: https://patch.msgid.link/20250520114332.8961-4-axfo@kvaser.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/kvaser_pciefd.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index a61cbade96d94d..f6921368cd14e9 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -1209,7 +1209,7 @@ static int kvaser_pciefd_handle_data_packet(struct kvaser_pciefd *pcie, skb = alloc_canfd_skb(priv->dev, &cf); if (!skb) { priv->dev->stats.rx_dropped++; - return -ENOMEM; + return 0; } cf->len = can_fd_dlc2len(dlc); @@ -1221,7 +1221,7 @@ static int kvaser_pciefd_handle_data_packet(struct kvaser_pciefd *pcie, skb = alloc_can_skb(priv->dev, (struct can_frame **)&cf); if (!skb) { priv->dev->stats.rx_dropped++; - return -ENOMEM; + return 0; } can_frame_set_cc_len((struct can_frame *)cf, dlc, priv->ctrlmode); } @@ -1239,7 +1239,9 @@ static int kvaser_pciefd_handle_data_packet(struct kvaser_pciefd *pcie, priv->dev->stats.rx_packets++; kvaser_pciefd_set_skb_timestamp(pcie, skb, p->timestamp); - return netif_rx(skb); + netif_rx(skb); + + return 0; } static void kvaser_pciefd_change_state(struct kvaser_pciefd_can *can, From f1774d9d4e104639a9122bde3b1fe58a0c0dcde7 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Tue, 20 May 2025 13:33:36 -0600 Subject: [PATCH 1914/2924] io_uring/cmd: axe duplicate io_uring_cmd_import_fixed_vec() declaration io_uring_cmd_import_fixed_vec() is declared in both include/linux/io_uring/cmd.h and io_uring/uring_cmd.h. The declarations are identical (if redundant) for CONFIG_IO_URING=y. But if CONFIG_IO_URING=N, include/linux/io_uring/cmd.h declares the function as static inline while io_uring/uring_cmd.h declares it as extern. This causes linker errors if the declaration in io_uring/uring_cmd.h is used. Remove the declaration in io_uring/uring_cmd.h to avoid linker errors and prevent the declarations getting out of sync. Signed-off-by: Caleb Sander Mateos Fixes: ef4902752972 ("io_uring/cmd: introduce io_uring_cmd_import_fixed_vec") Link: https://lore.kernel.org/r/20250520193337.1374509-1-csander@purestorage.com Signed-off-by: Jens Axboe --- io_uring/uring_cmd.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/io_uring/uring_cmd.h b/io_uring/uring_cmd.h index b04686b6b5d249..e6a5142c890ea6 100644 --- a/io_uring/uring_cmd.h +++ b/io_uring/uring_cmd.h @@ -17,9 +17,3 @@ bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx, struct io_uring_task *tctx, bool cancel_all); void io_cmd_cache_free(const void *entry); - -int io_uring_cmd_import_fixed_vec(struct io_uring_cmd *ioucmd, - const struct iovec __user *uvec, - size_t uvec_segs, - int ddir, struct iov_iter *iter, - unsigned issue_flags); From fdf6cab17f5866221c6f5a164806aa66662b8911 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 13 May 2025 21:38:57 +0300 Subject: [PATCH 1915/2924] gpiolib: don't crash on enabling GPIO HOG pins On Qualcomm platforms if the board uses GPIO hogs msm_pinmux_request() calls gpiochip_line_is_valid(). After commit 8015443e24e7 ("gpio: Hide valid_mask from direct assignments") gpiochip_line_is_valid() uses gc->gpiodev, which is NULL when GPIO hog pins are being processed. Thus after this commit using GPIO hogs causes the following crash. In order to fix this, verify that gc->gpiodev is not NULL. Note: it is not possible to reorder calls (e.g. by calling msm_gpio_init() before pinctrl registration or by splitting pinctrl_register() into _and_init() and pinctrl_enable() and calling the latter function after msm_gpio_init()) because GPIO chip registration would fail with EPROBE_DEFER if pinctrl is not enabled at the time of registration. pc : gpiochip_line_is_valid+0x4/0x28 lr : msm_pinmux_request+0x24/0x40 sp : ffff8000808eb870 x29: ffff8000808eb870 x28: 0000000000000000 x27: 0000000000000000 x26: 0000000000000000 x25: ffff726240f9d040 x24: 0000000000000000 x23: ffff7262438c0510 x22: 0000000000000080 x21: ffff726243ea7000 x20: ffffab13f2c4e698 x19: 0000000000000080 x18: 00000000ffffffff x17: ffff726242ba6000 x16: 0000000000000100 x15: 0000000000000028 x14: 0000000000000000 x13: 0000000000002948 x12: 0000000000000003 x11: 0000000000000078 x10: 0000000000002948 x9 : ffffab13f50eb5e8 x8 : 0000000003ecb21b x7 : 000000000000002d x6 : 0000000000000b68 x5 : 0000007fffffffff x4 : ffffab13f52f84a8 x3 : ffff8000808eb804 x2 : ffffab13f1de8190 x1 : 0000000000000080 x0 : 0000000000000000 Call trace: gpiochip_line_is_valid+0x4/0x28 (P) pin_request+0x208/0x2c0 pinmux_enable_setting+0xa0/0x2e0 pinctrl_commit_state+0x150/0x26c pinctrl_enable+0x6c/0x2a4 pinctrl_register+0x3c/0xb0 devm_pinctrl_register+0x58/0xa0 msm_pinctrl_probe+0x2a8/0x584 sdm845_pinctrl_probe+0x20/0x88 platform_probe+0x68/0xc0 really_probe+0xbc/0x298 __driver_probe_device+0x78/0x12c driver_probe_device+0x3c/0x160 __device_attach_driver+0xb8/0x138 bus_for_each_drv+0x84/0xe0 __device_attach+0x9c/0x188 device_initial_probe+0x14/0x20 bus_probe_device+0xac/0xb0 deferred_probe_work_func+0x8c/0xc8 process_one_work+0x208/0x5e8 worker_thread+0x1b4/0x35c kthread+0x144/0x220 ret_from_fork+0x10/0x20 Code: b5fffba0 17fffff2 9432ec27 f9400400 (f9428800) Fixes: 8015443e24e7 ("gpio: Hide valid_mask from direct assignments") Reported-by: Doug Anderson Closes: https://lore.kernel.org/r/CAD=FV=Vg8_ZOLgLoC4WhFPzhVsxXFC19NrF38W6cW_W_3nFjbw@mail.gmail.com Tested-by: Douglas Anderson Signed-off-by: Dmitry Baryshkov Reviewed-by: Matti Vaittinen Acked-by: Bartosz Golaszewski Link: https://lore.kernel.org/20250513-pinctrl-msm-fix-v2-1-249999af0fc1@oss.qualcomm.com Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index cd4fecbb41f292..113c5d90f2df46 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -742,6 +742,12 @@ EXPORT_SYMBOL_GPL(gpiochip_query_valid_mask); bool gpiochip_line_is_valid(const struct gpio_chip *gc, unsigned int offset) { + /* + * hog pins are requested before registering GPIO chip + */ + if (!gc->gpiodev) + return true; + /* No mask means all valid */ if (likely(!gc->gpiodev->valid_mask)) return true; From 41e452e6933d14146381ea25cff5e4d1ac2abea1 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 13 May 2025 21:38:58 +0300 Subject: [PATCH 1916/2924] pinctrl: qcom: switch to devm_register_sys_off_handler() Error-handling paths in msm_pinctrl_probe() don't call a function required to unroll restart handler registration, unregister_restart_handler(). Instead of adding calls to this function, switch the msm pinctrl code into using devm_register_sys_off_handler(). Fixes: cf1fc1876289 ("pinctrl: qcom: use restart_notifier mechanism for ps_hold") Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/20250513-pinctrl-msm-fix-v2-2-249999af0fc1@oss.qualcomm.com Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-msm.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index 82f0cc43bbf4f4..0eb816395dc64d 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -44,7 +44,6 @@ * @pctrl: pinctrl handle. * @chip: gpiochip handle. * @desc: pin controller descriptor - * @restart_nb: restart notifier block. * @irq: parent irq for the TLMM irq_chip. * @intr_target_use_scm: route irq to application cpu using scm calls * @lock: Spinlock to protect register resources as well @@ -64,7 +63,6 @@ struct msm_pinctrl { struct pinctrl_dev *pctrl; struct gpio_chip chip; struct pinctrl_desc desc; - struct notifier_block restart_nb; int irq; @@ -1471,10 +1469,9 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl) return 0; } -static int msm_ps_hold_restart(struct notifier_block *nb, unsigned long action, - void *data) +static int msm_ps_hold_restart(struct sys_off_data *data) { - struct msm_pinctrl *pctrl = container_of(nb, struct msm_pinctrl, restart_nb); + struct msm_pinctrl *pctrl = data->cb_data; writel(0, pctrl->regs[0] + PS_HOLD_OFFSET); mdelay(1000); @@ -1485,7 +1482,11 @@ static struct msm_pinctrl *poweroff_pctrl; static void msm_ps_hold_poweroff(void) { - msm_ps_hold_restart(&poweroff_pctrl->restart_nb, 0, NULL); + struct sys_off_data data = { + .cb_data = poweroff_pctrl, + }; + + msm_ps_hold_restart(&data); } static void msm_pinctrl_setup_pm_reset(struct msm_pinctrl *pctrl) @@ -1495,9 +1496,11 @@ static void msm_pinctrl_setup_pm_reset(struct msm_pinctrl *pctrl) for (i = 0; i < pctrl->soc->nfunctions; i++) if (!strcmp(func[i].name, "ps_hold")) { - pctrl->restart_nb.notifier_call = msm_ps_hold_restart; - pctrl->restart_nb.priority = 128; - if (register_restart_handler(&pctrl->restart_nb)) + if (devm_register_sys_off_handler(pctrl->dev, + SYS_OFF_MODE_RESTART, + 128, + msm_ps_hold_restart, + pctrl)) dev_err(pctrl->dev, "failed to setup restart handler.\n"); poweroff_pctrl = pctrl; @@ -1599,8 +1602,6 @@ void msm_pinctrl_remove(struct platform_device *pdev) struct msm_pinctrl *pctrl = platform_get_drvdata(pdev); gpiochip_remove(&pctrl->chip); - - unregister_restart_handler(&pctrl->restart_nb); } EXPORT_SYMBOL(msm_pinctrl_remove); From 50980d8da71a0c2e045e85bba93c0099ab73a209 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Fri, 16 May 2025 07:26:55 -0500 Subject: [PATCH 1917/2924] net: ethernet: ti: am65-cpsw: Lower random mac address error print to info Using random mac address is not an error since the driver continues to function, it should be informative that the system has not assigned a MAC address. This is inline with other drivers such as ax88796c, dm9051 etc. Drop the error level to info level. Signed-off-by: Nishanth Menon Reviewed-by: Simon Horman Reviewed-by: Roger Quadros Link: https://patch.msgid.link/20250516122655.442808-1-nm@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 1e6d2335293df6..30665ffe78cf91 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2685,7 +2685,7 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) port->slave.mac_addr); if (!is_valid_ether_addr(port->slave.mac_addr)) { eth_random_addr(port->slave.mac_addr); - dev_err(dev, "Use random MAC address\n"); + dev_info(dev, "Use random MAC address\n"); } } From 47653e4243f2b0a26372e481ca098936b51ec3a8 Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Mon, 19 May 2025 18:49:36 +0200 Subject: [PATCH 1918/2924] net: dwmac-sun8i: Use parsed internal PHY address instead of 1 While the MDIO address of the internal PHY on Allwinner sun8i chips is generally 1, of_mdio_parse_addr is used to cleanly parse the address from the device-tree instead of hardcoding it. A commit reworking the code ditched the parsed value and hardcoded the value 1 instead, which didn't really break anything but is more fragile and not future-proof. Restore the initial behavior using the parsed address returned from the helper. Fixes: 634db83b8265 ("net: stmmac: dwmac-sun8i: Handle integrated/external MDIOs") Signed-off-by: Paul Kocialkowski Reviewed-by: Andrew Lunn Acked-by: Corentin LABBE Tested-by: Corentin LABBE Link: https://patch.msgid.link/20250519164936.4172658-1-paulk@sys-base.io Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 85723a78793ab6..6c7e8655a7eb92 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -964,7 +964,7 @@ static int sun8i_dwmac_set_syscon(struct device *dev, /* of_mdio_parse_addr returns a valid (0 ~ 31) PHY * address. No need to mask it again. */ - reg |= 1 << H3_EPHY_ADDR_SHIFT; + reg |= ret << H3_EPHY_ADDR_SHIFT; } else { /* For SoCs without internal PHY the PHY selection bit should be * set to 0 (external PHY). From 48a62855073bfbdf8f8a1e06e7f97fd68e39422c Mon Sep 17 00:00:00 2001 From: Paul Barker Date: Mon, 19 May 2025 13:33:51 +0000 Subject: [PATCH 1919/2924] MAINTAINERS: Drop myself to reviewer for ravb driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Maintenance of the ravb driver will be handled by Niklas for now. I still intend to review patches, and will be using my own email address going forward. Signed-off-by: Paul Barker Acked-by: Niklas Söderlund Link: https://patch.msgid.link/20250519133354.6564-1-paul.barker.ct@bp.renesas.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3563492e4eba48..43d5f87097795c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20594,8 +20594,8 @@ F: Documentation/devicetree/bindings/i2c/renesas,iic-emev2.yaml F: drivers/i2c/busses/i2c-emev2.c RENESAS ETHERNET AVB DRIVER -M: Paul Barker M: Niklas Söderlund +R: Paul Barker L: netdev@vger.kernel.org L: linux-renesas-soc@vger.kernel.org S: Maintained From aed031da7e8cf6e31816a34afde961fbe0305fea Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 19 May 2025 13:41:28 -0700 Subject: [PATCH 1920/2924] bnxt_en: Fix netdev locking in ULP IRQ functions netdev_lock is already held when calling bnxt_ulp_irq_stop() and bnxt_ulp_irq_restart(). When converting rtnl_lock to netdev_lock, the original code was rtnl_dereference() to indicate that rtnl_lock was already held. rcu_dereference_protected() is the correct conversion after replacing rtnl_lock with netdev_lock. Add a new helper netdev_lock_dereference() similar to rtnl_dereference(). Fixes: 004b5008016a ("eth: bnxt: remove most dependencies on RTNL") Reviewed-by: Andy Gospodarek Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250519204130.3097027-2-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 9 +++------ include/net/netdev_lock.h | 3 +++ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index a8e930d5dbb09f..7564705d64783e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "bnxt_hsi.h" #include "bnxt.h" @@ -309,14 +310,12 @@ void bnxt_ulp_irq_stop(struct bnxt *bp) if (!ulp->msix_requested) return; - netdev_lock(bp->dev); - ops = rcu_dereference(ulp->ulp_ops); + ops = netdev_lock_dereference(ulp->ulp_ops, bp->dev); if (!ops || !ops->ulp_irq_stop) return; if (test_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) reset = true; ops->ulp_irq_stop(ulp->handle, reset); - netdev_unlock(bp->dev); } } @@ -335,8 +334,7 @@ void bnxt_ulp_irq_restart(struct bnxt *bp, int err) if (!ulp->msix_requested) return; - netdev_lock(bp->dev); - ops = rcu_dereference(ulp->ulp_ops); + ops = netdev_lock_dereference(ulp->ulp_ops, bp->dev); if (!ops || !ops->ulp_irq_restart) return; @@ -348,7 +346,6 @@ void bnxt_ulp_irq_restart(struct bnxt *bp, int err) bnxt_fill_msix_vecs(bp, ent); } ops->ulp_irq_restart(ulp->handle, ent); - netdev_unlock(bp->dev); kfree(ent); } } diff --git a/include/net/netdev_lock.h b/include/net/netdev_lock.h index c316b551df8d61..0ee5bc7668103b 100644 --- a/include/net/netdev_lock.h +++ b/include/net/netdev_lock.h @@ -98,6 +98,9 @@ static inline int netdev_lock_cmp_fn(const struct lockdep_map *a, &qdisc_xmit_lock_key); \ } +#define netdev_lock_dereference(p, dev) \ + rcu_dereference_protected(p, lockdep_is_held(&(dev)->lock)) + int netdev_debug_event(struct notifier_block *nb, unsigned long event, void *ptr); From 293e38ff4e4c2ba53f3fd47d8a4a9f0f0414a7a6 Mon Sep 17 00:00:00 2001 From: Thangaraj Samynathan Date: Fri, 16 May 2025 09:27:19 +0530 Subject: [PATCH 1921/2924] net: lan743x: Restore SGMII CTRL register on resume SGMII_CTRL register, which specifies the active interface, was not properly restored when resuming from suspend. This led to incorrect interface selection after resume particularly in scenarios involving the FPGA. To fix this: - Move the SGMII_CTRL setup out of the probe function. - Initialize the register in the hardware initialization helper function, which is called during both device initialization and resume. This ensures the interface configuration is consistently restored after suspend/resume cycles. Fixes: a46d9d37c4f4f ("net: lan743x: Add support for SGMII interface") Signed-off-by: Thangaraj Samynathan Link: https://patch.msgid.link/20250516035719.117960-1-thangaraj.s@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan743x_main.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index e2d6bfb5d69334..a70b88037a208b 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -3495,6 +3495,7 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, struct pci_dev *pdev) { struct lan743x_tx *tx; + u32 sgmii_ctl; int index; int ret; @@ -3507,6 +3508,15 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, spin_lock_init(&adapter->eth_syslock_spinlock); mutex_init(&adapter->sgmii_rw_lock); pci11x1x_set_rfe_rd_fifo_threshold(adapter); + sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL); + if (adapter->is_sgmii_en) { + sgmii_ctl |= SGMII_CTL_SGMII_ENABLE_; + sgmii_ctl &= ~SGMII_CTL_SGMII_POWER_DN_; + } else { + sgmii_ctl &= ~SGMII_CTL_SGMII_ENABLE_; + sgmii_ctl |= SGMII_CTL_SGMII_POWER_DN_; + } + lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl); } else { adapter->max_tx_channels = LAN743X_MAX_TX_CHANNELS; adapter->used_tx_channels = LAN743X_USED_TX_CHANNELS; @@ -3558,7 +3568,6 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, static int lan743x_mdiobus_init(struct lan743x_adapter *adapter) { - u32 sgmii_ctl; int ret; adapter->mdiobus = devm_mdiobus_alloc(&adapter->pdev->dev); @@ -3570,10 +3579,6 @@ static int lan743x_mdiobus_init(struct lan743x_adapter *adapter) adapter->mdiobus->priv = (void *)adapter; if (adapter->is_pci11x1x) { if (adapter->is_sgmii_en) { - sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL); - sgmii_ctl |= SGMII_CTL_SGMII_ENABLE_; - sgmii_ctl &= ~SGMII_CTL_SGMII_POWER_DN_; - lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl); netif_dbg(adapter, drv, adapter->netdev, "SGMII operation\n"); adapter->mdiobus->read = lan743x_mdiobus_read_c22; @@ -3584,10 +3589,6 @@ static int lan743x_mdiobus_init(struct lan743x_adapter *adapter) netif_dbg(adapter, drv, adapter->netdev, "lan743x-mdiobus-c45\n"); } else { - sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL); - sgmii_ctl &= ~SGMII_CTL_SGMII_ENABLE_; - sgmii_ctl |= SGMII_CTL_SGMII_POWER_DN_; - lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl); netif_dbg(adapter, drv, adapter->netdev, "RGMII operation\n"); // Only C22 support when RGMII I/F From e05741fb10c38d70bbd7ec12b23c197b6355d519 Mon Sep 17 00:00:00 2001 From: Tianyang Zhang Date: Wed, 16 Apr 2025 16:24:05 +0800 Subject: [PATCH 1922/2924] mm/page_alloc.c: avoid infinite retries caused by cpuset race __alloc_pages_slowpath has no change detection for ac->nodemask in the part of retry path, while cpuset can modify it in parallel. For some processes that set mempolicy as MPOL_BIND, this results ac->nodemask changes, and then the should_reclaim_retry will judge based on the latest nodemask and jump to retry, while the get_page_from_freelist only traverses the zonelist from ac->preferred_zoneref, which selected by a expired nodemask and may cause infinite retries in some cases cpu 64: __alloc_pages_slowpath { /* ..... */ retry: /* ac->nodemask = 0x1, ac->preferred->zone->nid = 1 */ if (alloc_flags & ALLOC_KSWAPD) wake_all_kswapds(order, gfp_mask, ac); /* cpu 1: cpuset_write_resmask update_nodemask update_nodemasks_hier update_tasks_nodemask mpol_rebind_task mpol_rebind_policy mpol_rebind_nodemask // mempolicy->nodes has been modified, // which ac->nodemask point to */ /* ac->nodemask = 0x3, ac->preferred->zone->nid = 1 */ if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags, did_some_progress > 0, &no_progress_loops)) goto retry; } Simultaneously starting multiple cpuset01 from LTP can quickly reproduce this issue on a multi node server when the maximum memory pressure is reached and the swap is enabled Link: https://lkml.kernel.org/r/20250416082405.20988-1-zhangtianyang@loongson.cn Fixes: c33d6c06f60f ("mm, page_alloc: avoid looking up the first zone in a zonelist twice") Signed-off-by: Tianyang Zhang Reviewed-by: Suren Baghdasaryan Reviewed-by: Vlastimil Babka Cc: Michal Hocko Cc: Brendan Jackman Cc: Johannes Weiner Cc: Zi Yan Cc: Signed-off-by: Andrew Morton --- mm/page_alloc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8258349e49acb7..47fa713ccb4d89 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4562,6 +4562,14 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, } retry: + /* + * Deal with possible cpuset update races or zonelist updates to avoid + * infinite retries. + */ + if (check_retry_cpuset(cpuset_mems_cookie, ac) || + check_retry_zonelist(zonelist_iter_cookie)) + goto restart; + /* Ensure kswapd doesn't accidentally go to sleep as long as we loop */ if (alloc_flags & ALLOC_KSWAPD) wake_all_kswapds(order, gfp_mask, ac); From 910224c7830d252697dc7fbd57fd1059d266d370 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Wed, 7 May 2025 17:02:57 +0200 Subject: [PATCH 1923/2924] MAINTAINERS: add myself as vmalloc co-maintainer I have been working on the vmalloc code for several years, contributing to improvements and fixes. Add myself as co-maintainer ("M") alongside Andrew Morton. Link: https://lkml.kernel.org/r/20250507150257.61485-1-urezki@gmail.com Signed-off-by: Uladzislau Rezki (Sony) Acked-by: David Hildenbrand Acked-by: Lorenzo Stoakes Acked-by: Baoquan He Cc: Christop Hellwig Signed-off-by: Andrew Morton --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 88102bb64dfec5..27874530502f10 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -25906,7 +25906,7 @@ F: tools/testing/vsock/ VMALLOC M: Andrew Morton -R: Uladzislau Rezki +M: Uladzislau Rezki L: linux-mm@kvack.org S: Maintained W: http://www.linux-mm.org From 7190b3c8bd2b0cde483bd440cf91ba1c518b4261 Mon Sep 17 00:00:00 2001 From: Ignacio Moreno Gonzalez Date: Wed, 7 May 2025 15:28:06 +0200 Subject: [PATCH 1924/2924] mm: mmap: map MAP_STACK to VM_NOHUGEPAGE only if THP is enabled commit c4608d1bf7c6 ("mm: mmap: map MAP_STACK to VM_NOHUGEPAGE") maps the mmap option MAP_STACK to VM_NOHUGEPAGE. This is also done if CONFIG_TRANSPARENT_HUGEPAGE is not defined. But in that case, the VM_NOHUGEPAGE does not make sense. I discovered this issue when trying to use the tool CRIU to checkpoint and restore a container. Our running kernel is compiled without CONFIG_TRANSPARENT_HUGEPAGE. CRIU parses the output of /proc//smaps and saves the "nh" flag. When trying to restore the container, CRIU fails to restore the "nh" mappings, since madvise() MADV_NOHUGEPAGE always returns an error because CONFIG_TRANSPARENT_HUGEPAGE is not defined. Link: https://lkml.kernel.org/r/20250507-map-map_stack-to-vm_nohugepage-only-if-thp-is-enabled-v5-1-c6c38cfefd6e@kuka.com Fixes: c4608d1bf7c6 ("mm: mmap: map MAP_STACK to VM_NOHUGEPAGE") Signed-off-by: Ignacio Moreno Gonzalez Acked-by: David Hildenbrand Reviewed-by: Lorenzo Stoakes Reviewed-by: Yang Shi Reviewed-by: Liam R. Howlett Cc: Signed-off-by: Andrew Morton --- include/linux/mman.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/mman.h b/include/linux/mman.h index bce214fece16b9..f4c6346a8fcd29 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -155,7 +155,9 @@ calc_vm_flag_bits(struct file *file, unsigned long flags) return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | +#ifdef CONFIG_TRANSPARENT_HUGEPAGE _calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) | +#endif arch_calc_vm_flag_bits(file, flags); } From 0f518255bde881d2a2605bbc080b438b532b6ab2 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Wed, 7 May 2025 15:09:57 +0200 Subject: [PATCH 1925/2924] mm: fix VM_UFFD_MINOR == VM_SHADOW_STACK on USERFAULTFD=y && ARM64_GCS=y On configs with CONFIG_ARM64_GCS=y, VM_SHADOW_STACK is bit 38. On configs with CONFIG_HAVE_ARCH_USERFAULTFD_MINOR=y (selected by CONFIG_ARM64 when CONFIG_USERFAULTFD=y), VM_UFFD_MINOR is _also_ bit 38. This bit being shared by two different VMA flags could lead to all sorts of unintended behaviors. Presumably, a process could maybe call into userfaultfd in a way that disables the shadow stack vma flag. I can't think of any attack where this would help (presumably, if an attacker tries to disable shadow stacks, they are trying to hijack control flow so can't arbitrarily call into userfaultfd yet anyway) but this still feels somewhat scary. Link: https://lkml.kernel.org/r/20250507131000.1204175-2-revest@chromium.org Fixes: ae80e1629aea ("mm: Define VM_SHADOW_STACK for arm64 when we support GCS") Signed-off-by: Florent Revest Reviewed-by: Mark Brown Cc: Borislav Betkov Cc: Brendan Jackman Cc: Catalin Marinas Cc: Florent Revest Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thiago Jung Bauermann Cc: Thomas Gleinxer Cc: Will Deacon Cc: Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index bf55206935c467..fdda6b16263b35 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -385,7 +385,7 @@ extern unsigned int kobjsize(const void *objp); #endif #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR -# define VM_UFFD_MINOR_BIT 38 +# define VM_UFFD_MINOR_BIT 41 # define VM_UFFD_MINOR BIT(VM_UFFD_MINOR_BIT) /* UFFD minor faults */ #else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ # define VM_UFFD_MINOR VM_NONE From e27126e0bcc747e59efaccac67166b76ff731670 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 12 May 2025 15:46:03 +0100 Subject: [PATCH 1926/2924] MAINTAINERS: update page allocator section Make Vlastimil maintainer of this section (with thanks to Vlastimil for agreeing to this!) and add page isolation files for which this section seem most appropriate. We may wish to, in future, refactor/rename some of these files to more logically fit what is actually being performed, but for the time being this seems the most sensible place. Additionally, fix the alphabetical ordering of files. Link: https://lkml.kernel.org/r/20250512144603.90379-1-lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: Vlastimil Babka Acked-by: Johannes Weiner Acked-by: Zi Yan Cc: Brendan Jackman Cc: David Hildenbrand Cc: Michal Hocko Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- MAINTAINERS | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 27874530502f10..89097a5580e8ce 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15566,7 +15566,7 @@ F: mm/numa_memblks.c MEMORY MANAGEMENT - PAGE ALLOCATOR M: Andrew Morton -R: Vlastimil Babka +M: Vlastimil Babka R: Suren Baghdasaryan R: Michal Hocko R: Brendan Jackman @@ -15574,10 +15574,12 @@ R: Johannes Weiner R: Zi Yan L: linux-mm@kvack.org S: Maintained +F: include/linux/compaction.h +F: include/linux/gfp.h +F: include/linux/page-isolation.h F: mm/compaction.c F: mm/page_alloc.c -F: include/linux/gfp.h -F: include/linux/compaction.h +F: mm/page_isolation.c MEMORY MANAGEMENT - RMAP (REVERSE MAPPING) M: Andrew Morton From 41f36b3912a60804603a23709181f599ec69dc8f Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 12 May 2025 15:31:22 +0100 Subject: [PATCH 1927/2924] MAINTAINERS: add mm reclaim section In furtherance of ongoing efforts to ensure people are aware of who de-facto maintains/has an interest in specific parts of mm, as well trying to avoid get_maintainers.pl listing only Andrew and the mailing list for mm files - establish a reclaim memory management section and add relevant maintainers/reviewers. This is a key part of memory management so sensibly deserves its own section. This encompasses both 'classical' reclaim and MGLRU and thus reflects this in the reviewers from both, as well as those who have contributed specifically on the memcg side of things. Link: https://lkml.kernel.org/r/20250512143122.87740-1-lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: Qi Zheng Acked-by: Michal Hocko Acked-by: Shakeel Butt Acked-by: Johannes Weiner Acked-by: David Hildenbrand Cc: Yu Zhao Signed-off-by: Andrew Morton --- MAINTAINERS | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 89097a5580e8ce..dc79f285573fbe 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15581,6 +15581,19 @@ F: mm/compaction.c F: mm/page_alloc.c F: mm/page_isolation.c +MEMORY MANAGEMENT - RECLAIM +M: Andrew Morton +M: Johannes Weiner +R: David Hildenbrand +R: Michal Hocko +R: Qi Zheng +R: Shakeel Butt +R: Lorenzo Stoakes +L: linux-mm@kvack.org +S: Maintained +F: mm/pt_reclaim.c +F: mm/vmscan.c + MEMORY MANAGEMENT - RMAP (REVERSE MAPPING) M: Andrew Morton M: David Hildenbrand From 66f28ffb38ccc9d7e5fc8066d4c6aab6434f94a4 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Mon, 12 May 2025 14:28:25 +0800 Subject: [PATCH 1928/2924] mm/truncate: fix out-of-bounds when doing a right-aligned split When performing a right split on a folio, the split_at2 may point to a not-present page if the offset + length equals the original folio size, which will trigger the following error: BUG: unable to handle page fault for address: ffffea0006000008 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 143ffb9067 P4D 143ffb9067 PUD 143ffb8067 PMD 0 Oops: Oops: 0000 [#1] SMP PTI CPU: 0 UID: 0 PID: 502640 Comm: fsx Not tainted 6.15.0-rc3-gc6156189fc6b #889 PR Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-2.fc40 04/01/4 RIP: 0010:truncate_inode_partial_folio+0x208/0x620 Code: ff 03 48 01 da e8 78 7e 13 00 48 83 05 10 b5 5a 0c 01 85 c0 0f 85 1c 02 001 RSP: 0018:ffffc90005bafab0 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffffea0005ffff00 RCX: 0000000000000002 RDX: 000000000000000c RSI: 0000000000013975 RDI: ffffc90005bafa30 RBP: ffffea0006000000 R08: 0000000000000000 R09: 00000000000009bf R10: 00000000000007e0 R11: 0000000000000000 R12: 0000000000001633 R13: 0000000000000000 R14: ffffea0005ffff00 R15: fffffffffffffffe FS: 00007f9f9a161740(0000) GS:ffff8894971fd000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffea0006000008 CR3: 000000017c2ae000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: truncate_inode_pages_range+0x226/0x720 truncate_pagecache+0x57/0x90 ... Fix this issue by skipping the split if truncation aligns with the folio size, make sure the split page number lies within the folio. Link: https://lkml.kernel.org/r/20250512062825.3533342-1-yi.zhang@huaweicloud.com Fixes: 7460b470a131 ("mm/truncate: use folio_split() in truncate operation") Signed-off-by: Zhang Yi Reviewed-by: Zi Yan Cc: ErKun Yang Cc: Kefeng Wang Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- mm/truncate.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/mm/truncate.c b/mm/truncate.c index 5d98054094d1f3..f2aaf99f29906a 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -191,6 +191,7 @@ int truncate_inode_folio(struct address_space *mapping, struct folio *folio) bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) { loff_t pos = folio_pos(folio); + size_t size = folio_size(folio); unsigned int offset, length; struct page *split_at, *split_at2; @@ -198,14 +199,13 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) offset = start - pos; else offset = 0; - length = folio_size(folio); - if (pos + length <= (u64)end) - length = length - offset; + if (pos + size <= (u64)end) + length = size - offset; else length = end + 1 - pos - offset; folio_wait_writeback(folio); - if (length == folio_size(folio)) { + if (length == size) { truncate_inode_folio(folio->mapping, folio); return true; } @@ -224,16 +224,20 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) return true; split_at = folio_page(folio, PAGE_ALIGN_DOWN(offset) / PAGE_SIZE); - split_at2 = folio_page(folio, - PAGE_ALIGN_DOWN(offset + length) / PAGE_SIZE); - if (!try_folio_split(folio, split_at, NULL)) { /* * try to split at offset + length to make sure folios within * the range can be dropped, especially to avoid memory waste * for shmem truncate */ - struct folio *folio2 = page_folio(split_at2); + struct folio *folio2; + + if (offset + length == size) + goto no_split; + + split_at2 = folio_page(folio, + PAGE_ALIGN_DOWN(offset + length) / PAGE_SIZE); + folio2 = page_folio(split_at2); if (!folio_try_get(folio2)) goto no_split; From 0bf2d838de1ffb6d0bb6f8d18a6ccc59b7d9a705 Mon Sep 17 00:00:00 2001 From: Wang Yaxin Date: Sat, 10 May 2025 15:54:13 +0800 Subject: [PATCH 1929/2924] taskstats: fix struct taskstats breaks backward compatibility since version 15 Problem ======== commit 658eb5ab916d ("delayacct: add delay max to record delay peak") - adding more fields commit f65c64f311ee ("delayacct: add delay min to record delay peak") - adding more fields commit b016d0873777 ("taskstats: modify taskstats version") - version bump to 15 Since version 15 (TASKSTATS_VERSION=15) the new layout of the structure adds fields in the middle of the structure, rendering all old software incompatible with newer kernels and software compiled against the new kernel headers incompatible with older kernels. Solution ========= move delay max and delay min to the end of taskstat, and bump the version to 16 after the change [wang.yaxin@zte.com.cn: adjust indentation] Link: https://lkml.kernel.org/r/202505192131489882NSciXV4EGd8zzjLuwoOK@zte.com.cn Link: https://lkml.kernel.org/r/20250510155413259V4JNRXxukdDgzsaL0Fo6a@zte.com.cn Fixes: f65c64f311ee ("delayacct: add delay min to record delay peak") Signed-off-by: Wang Yaxin Signed-off-by: xu xin Signed-off-by: Kun Jiang Reviewed-by: Yang Yang Cc: Balbir Singh Cc: Signed-off-by: Andrew Morton --- include/uapi/linux/taskstats.h | 47 +++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h index 95762232e01863..5929030d4e8b06 100644 --- a/include/uapi/linux/taskstats.h +++ b/include/uapi/linux/taskstats.h @@ -34,7 +34,7 @@ */ -#define TASKSTATS_VERSION 15 +#define TASKSTATS_VERSION 16 #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN * in linux/sched.h */ @@ -72,8 +72,6 @@ struct taskstats { */ __u64 cpu_count __attribute__((aligned(8))); __u64 cpu_delay_total; - __u64 cpu_delay_max; - __u64 cpu_delay_min; /* Following four fields atomically updated using task->delays->lock */ @@ -82,14 +80,10 @@ struct taskstats { */ __u64 blkio_count; __u64 blkio_delay_total; - __u64 blkio_delay_max; - __u64 blkio_delay_min; /* Delay waiting for page fault I/O (swap in only) */ __u64 swapin_count; __u64 swapin_delay_total; - __u64 swapin_delay_max; - __u64 swapin_delay_min; /* cpu "wall-clock" running time * On some architectures, value will adjust for cpu time stolen @@ -172,14 +166,11 @@ struct taskstats { /* Delay waiting for memory reclaim */ __u64 freepages_count; __u64 freepages_delay_total; - __u64 freepages_delay_max; - __u64 freepages_delay_min; + /* Delay waiting for thrashing page */ __u64 thrashing_count; __u64 thrashing_delay_total; - __u64 thrashing_delay_max; - __u64 thrashing_delay_min; /* v10: 64-bit btime to avoid overflow */ __u64 ac_btime64; /* 64-bit begin time */ @@ -187,8 +178,6 @@ struct taskstats { /* v11: Delay waiting for memory compact */ __u64 compact_count; __u64 compact_delay_total; - __u64 compact_delay_max; - __u64 compact_delay_min; /* v12 begin */ __u32 ac_tgid; /* thread group ID */ @@ -210,15 +199,37 @@ struct taskstats { /* v13: Delay waiting for write-protect copy */ __u64 wpcopy_count; __u64 wpcopy_delay_total; - __u64 wpcopy_delay_max; - __u64 wpcopy_delay_min; /* v14: Delay waiting for IRQ/SOFTIRQ */ __u64 irq_count; __u64 irq_delay_total; - __u64 irq_delay_max; - __u64 irq_delay_min; - /* v15: add Delay max */ + + /* v15: add Delay max and Delay min */ + + /* v16: move Delay max and Delay min to the end of taskstat */ + __u64 cpu_delay_max; + __u64 cpu_delay_min; + + __u64 blkio_delay_max; + __u64 blkio_delay_min; + + __u64 swapin_delay_max; + __u64 swapin_delay_min; + + __u64 freepages_delay_max; + __u64 freepages_delay_min; + + __u64 thrashing_delay_max; + __u64 thrashing_delay_min; + + __u64 compact_delay_max; + __u64 compact_delay_min; + + __u64 wpcopy_delay_max; + __u64 wpcopy_delay_min; + + __u64 irq_delay_max; + __u64 irq_delay_min; }; From 6fa04511f103ce1b4aa30d50b4abe3401b13bee5 Mon Sep 17 00:00:00 2001 From: Lance Yang Date: Tue, 13 May 2025 13:22:34 +0800 Subject: [PATCH 1930/2924] MAINTAINERS: add hung-task detector section The hung-task detector is missing in MAINTAINERS. While it's been quiet recently, I'm actively working on it and volunteering to review patches. Adding this section will make it easier for contributors to know who to contact. Link: https://lkml.kernel.org/r/20250513052234.46463-1-lance.yang@linux.dev Signed-off-by: Lance Yang Signed-off-by: Andrew Morton --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index dc79f285573fbe..bbe92bbf6c1a90 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11099,6 +11099,14 @@ L: linuxppc-dev@lists.ozlabs.org S: Odd Fixes F: drivers/tty/hvc/ +HUNG TASK DETECTOR +M: Andrew Morton +R: Lance Yang +L: linux-kernel@vger.kernel.org +S: Maintained +F: include/linux/hung_task.h +F: kernel/hung_task.c + I2C ACPI SUPPORT M: Mika Westerberg L: linux-i2c@vger.kernel.org From 97dfbbd135cb5e4426f37ca53a8fa87eaaa4e376 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 14 May 2025 18:06:02 +0100 Subject: [PATCH 1931/2924] highmem: add folio_test_partial_kmap() In commit c749d9b7ebbc ("iov_iter: fix copy_page_from_iter_atomic() if KMAP_LOCAL_FORCE_MAP"), Hugh correctly noted that if KMAP_LOCAL_FORCE_MAP is enabled, we must limit ourselves to PAGE_SIZE bytes per call to kmap_local(). The same problem exists in memcpy_from_folio(), memcpy_to_folio(), folio_zero_tail(), folio_fill_tail() and memcpy_from_file_folio(), so add folio_test_partial_kmap() to do this more succinctly. Link: https://lkml.kernel.org/r/20250514170607.3000994-2-willy@infradead.org Fixes: 00cdf76012ab ("mm: add memcpy_from_file_folio()") Signed-off-by: Matthew Wilcox (Oracle) Cc: Al Viro Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton --- include/linux/highmem.h | 10 +++++----- include/linux/page-flags.h | 7 +++++++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 5c6bea81a90ecf..c698f8415675ef 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -461,7 +461,7 @@ static inline void memcpy_from_folio(char *to, struct folio *folio, const char *from = kmap_local_folio(folio, offset); size_t chunk = len; - if (folio_test_highmem(folio) && + if (folio_test_partial_kmap(folio) && chunk > PAGE_SIZE - offset_in_page(offset)) chunk = PAGE_SIZE - offset_in_page(offset); memcpy(to, from, chunk); @@ -489,7 +489,7 @@ static inline void memcpy_to_folio(struct folio *folio, size_t offset, char *to = kmap_local_folio(folio, offset); size_t chunk = len; - if (folio_test_highmem(folio) && + if (folio_test_partial_kmap(folio) && chunk > PAGE_SIZE - offset_in_page(offset)) chunk = PAGE_SIZE - offset_in_page(offset); memcpy(to, from, chunk); @@ -522,7 +522,7 @@ static inline __must_check void *folio_zero_tail(struct folio *folio, { size_t len = folio_size(folio) - offset; - if (folio_test_highmem(folio)) { + if (folio_test_partial_kmap(folio)) { size_t max = PAGE_SIZE - offset_in_page(offset); while (len > max) { @@ -560,7 +560,7 @@ static inline void folio_fill_tail(struct folio *folio, size_t offset, VM_BUG_ON(offset + len > folio_size(folio)); - if (folio_test_highmem(folio)) { + if (folio_test_partial_kmap(folio)) { size_t max = PAGE_SIZE - offset_in_page(offset); while (len > max) { @@ -597,7 +597,7 @@ static inline size_t memcpy_from_file_folio(char *to, struct folio *folio, size_t offset = offset_in_folio(folio, pos); char *from = kmap_local_folio(folio, offset); - if (folio_test_highmem(folio)) { + if (folio_test_partial_kmap(folio)) { offset = offset_in_page(offset); len = min_t(size_t, len, PAGE_SIZE - offset); } else diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e6a21b62dcceef..3b814ce08331e2 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -615,6 +615,13 @@ FOLIO_FLAG(dropbehind, FOLIO_HEAD_PAGE) PAGEFLAG_FALSE(HighMem, highmem) #endif +/* Does kmap_local_folio() only allow access to one page of the folio? */ +#ifdef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP +#define folio_test_partial_kmap(f) true +#else +#define folio_test_partial_kmap(f) folio_test_highmem(f) +#endif + #ifdef CONFIG_SWAP static __always_inline bool folio_test_swapcache(const struct folio *folio) { From b6ea95a34cbd014ab6ade4248107b86b0aaf2d6c Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 15 May 2025 15:55:38 +0200 Subject: [PATCH 1932/2924] kasan: avoid sleepable page allocation from atomic context apply_to_pte_range() enters the lazy MMU mode and then invokes kasan_populate_vmalloc_pte() callback on each page table walk iteration. However, the callback can go into sleep when trying to allocate a single page, e.g. if an architecutre disables preemption on lazy MMU mode enter. On s390 if make arch_enter_lazy_mmu_mode() -> preempt_enable() and arch_leave_lazy_mmu_mode() -> preempt_disable(), such crash occurs: [ 0.663336] BUG: sleeping function called from invalid context at ./include/linux/sched/mm.h:321 [ 0.663348] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 2, name: kthreadd [ 0.663358] preempt_count: 1, expected: 0 [ 0.663366] RCU nest depth: 0, expected: 0 [ 0.663375] no locks held by kthreadd/2. [ 0.663383] Preemption disabled at: [ 0.663386] [<0002f3284cbb4eda>] apply_to_pte_range+0xfa/0x4a0 [ 0.663405] CPU: 0 UID: 0 PID: 2 Comm: kthreadd Not tainted 6.15.0-rc5-gcc-kasan-00043-gd76bb1ebb558-dirty #162 PREEMPT [ 0.663408] Hardware name: IBM 3931 A01 701 (KVM/Linux) [ 0.663409] Call Trace: [ 0.663410] [<0002f3284c385f58>] dump_stack_lvl+0xe8/0x140 [ 0.663413] [<0002f3284c507b9e>] __might_resched+0x66e/0x700 [ 0.663415] [<0002f3284cc4f6c0>] __alloc_frozen_pages_noprof+0x370/0x4b0 [ 0.663419] [<0002f3284ccc73c0>] alloc_pages_mpol+0x1a0/0x4a0 [ 0.663421] [<0002f3284ccc8518>] alloc_frozen_pages_noprof+0x88/0xc0 [ 0.663424] [<0002f3284ccc8572>] alloc_pages_noprof+0x22/0x120 [ 0.663427] [<0002f3284cc341ac>] get_free_pages_noprof+0x2c/0xc0 [ 0.663429] [<0002f3284cceba70>] kasan_populate_vmalloc_pte+0x50/0x120 [ 0.663433] [<0002f3284cbb4ef8>] apply_to_pte_range+0x118/0x4a0 [ 0.663435] [<0002f3284cbc7c14>] apply_to_pmd_range+0x194/0x3e0 [ 0.663437] [<0002f3284cbc99be>] __apply_to_page_range+0x2fe/0x7a0 [ 0.663440] [<0002f3284cbc9e88>] apply_to_page_range+0x28/0x40 [ 0.663442] [<0002f3284ccebf12>] kasan_populate_vmalloc+0x82/0xa0 [ 0.663445] [<0002f3284cc1578c>] alloc_vmap_area+0x34c/0xc10 [ 0.663448] [<0002f3284cc1c2a6>] __get_vm_area_node+0x186/0x2a0 [ 0.663451] [<0002f3284cc1e696>] __vmalloc_node_range_noprof+0x116/0x310 [ 0.663454] [<0002f3284cc1d950>] __vmalloc_node_noprof+0xd0/0x110 [ 0.663457] [<0002f3284c454b88>] alloc_thread_stack_node+0xf8/0x330 [ 0.663460] [<0002f3284c458d56>] dup_task_struct+0x66/0x4d0 [ 0.663463] [<0002f3284c45be90>] copy_process+0x280/0x4b90 [ 0.663465] [<0002f3284c460940>] kernel_clone+0xd0/0x4b0 [ 0.663467] [<0002f3284c46115e>] kernel_thread+0xbe/0xe0 [ 0.663469] [<0002f3284c4e440e>] kthreadd+0x50e/0x7f0 [ 0.663472] [<0002f3284c38c04a>] __ret_from_fork+0x8a/0xf0 [ 0.663475] [<0002f3284ed57ff2>] ret_from_fork+0xa/0x38 Instead of allocating single pages per-PTE, bulk-allocate the shadow memory prior to applying kasan_populate_vmalloc_pte() callback on a page range. Link: https://lkml.kernel.org/r/c61d3560297c93ed044f0b1af085610353a06a58.1747316918.git.agordeev@linux.ibm.com Fixes: 3c5c3cfb9ef4 ("kasan: support backing vmalloc space with real shadow memory") Signed-off-by: Alexander Gordeev Suggested-by: Andrey Ryabinin Reviewed-by: Harry Yoo Cc: Daniel Axtens Cc: Signed-off-by: Andrew Morton --- mm/kasan/shadow.c | 92 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 78 insertions(+), 14 deletions(-) diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index 88d1c9dcb50721..d2c70cd2afb1de 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -292,33 +292,99 @@ void __init __weak kasan_populate_early_vm_area_shadow(void *start, { } +struct vmalloc_populate_data { + unsigned long start; + struct page **pages; +}; + static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr, - void *unused) + void *_data) { - unsigned long page; + struct vmalloc_populate_data *data = _data; + struct page *page; pte_t pte; + int index; if (likely(!pte_none(ptep_get(ptep)))) return 0; - page = __get_free_page(GFP_KERNEL); - if (!page) - return -ENOMEM; - - __memset((void *)page, KASAN_VMALLOC_INVALID, PAGE_SIZE); - pte = pfn_pte(PFN_DOWN(__pa(page)), PAGE_KERNEL); + index = PFN_DOWN(addr - data->start); + page = data->pages[index]; + __memset(page_to_virt(page), KASAN_VMALLOC_INVALID, PAGE_SIZE); + pte = pfn_pte(page_to_pfn(page), PAGE_KERNEL); spin_lock(&init_mm.page_table_lock); if (likely(pte_none(ptep_get(ptep)))) { set_pte_at(&init_mm, addr, ptep, pte); - page = 0; + data->pages[index] = NULL; } spin_unlock(&init_mm.page_table_lock); - if (page) - free_page(page); + + return 0; +} + +static void ___free_pages_bulk(struct page **pages, int nr_pages) +{ + int i; + + for (i = 0; i < nr_pages; i++) { + if (pages[i]) { + __free_pages(pages[i], 0); + pages[i] = NULL; + } + } +} + +static int ___alloc_pages_bulk(struct page **pages, int nr_pages) +{ + unsigned long nr_populated, nr_total = nr_pages; + struct page **page_array = pages; + + while (nr_pages) { + nr_populated = alloc_pages_bulk(GFP_KERNEL, nr_pages, pages); + if (!nr_populated) { + ___free_pages_bulk(page_array, nr_total - nr_pages); + return -ENOMEM; + } + pages += nr_populated; + nr_pages -= nr_populated; + } + return 0; } +static int __kasan_populate_vmalloc(unsigned long start, unsigned long end) +{ + unsigned long nr_pages, nr_total = PFN_UP(end - start); + struct vmalloc_populate_data data; + int ret = 0; + + data.pages = (struct page **)__get_free_page(GFP_KERNEL | __GFP_ZERO); + if (!data.pages) + return -ENOMEM; + + while (nr_total) { + nr_pages = min(nr_total, PAGE_SIZE / sizeof(data.pages[0])); + ret = ___alloc_pages_bulk(data.pages, nr_pages); + if (ret) + break; + + data.start = start; + ret = apply_to_page_range(&init_mm, start, nr_pages * PAGE_SIZE, + kasan_populate_vmalloc_pte, &data); + ___free_pages_bulk(data.pages, nr_pages); + if (ret) + break; + + start += nr_pages * PAGE_SIZE; + nr_total -= nr_pages; + } + + free_page((unsigned long)data.pages); + + return ret; +} + int kasan_populate_vmalloc(unsigned long addr, unsigned long size) { unsigned long shadow_start, shadow_end; @@ -348,9 +414,7 @@ int kasan_populate_vmalloc(unsigned long addr, unsigned long size) shadow_start = PAGE_ALIGN_DOWN(shadow_start); shadow_end = PAGE_ALIGN(shadow_end); - ret = apply_to_page_range(&init_mm, shadow_start, - shadow_end - shadow_start, - kasan_populate_vmalloc_pte, NULL); + ret = __kasan_populate_vmalloc(shadow_start, shadow_end); if (ret) return ret; From 62bec60be24e59ac34c6c7a0ffd69f20726cb88e Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 15 May 2025 20:04:04 +0100 Subject: [PATCH 1933/2924] MAINTAINERS: add mm ksm section As part of the ongoing efforts to sub-divide memory management maintainership and reviewership, establish a section for Kernel Samepage Merging (KSM) and add appropriate maintainers and reviewers. Link: https://lkml.kernel.org/r/20250515190404.203596-1-lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: David Hildenbrand Acked-by: Chengming Zhou Acked-by: Xu Xin Signed-off-by: Andrew Morton --- MAINTAINERS | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index bbe92bbf6c1a90..b3e5db58c6b73c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15562,6 +15562,21 @@ W: http://www.linux-mm.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm F: mm/gup.c +MEMORY MANAGEMENT - KSM (Kernel Samepage Merging) +M: Andrew Morton +M: David Hildenbrand +R: Xu Xin +R: Chengming Zhou +L: linux-mm@kvack.org +S: Maintained +W: http://www.linux-mm.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm +F: Documentation/admin-guide/mm/ksm.rst +F: Documentation/mm/ksm.rst +F: include/linux/ksm.h +F: include/trace/events/ksm.h +F: mm/ksm.c + MEMORY MANAGEMENT - NUMA MEMBLOCKS AND NUMA EMULATION M: Andrew Morton M: Mike Rapoport From bdc3f7e9e19661c855964685820e988d2c371d8e Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 15 May 2025 20:13:58 +0100 Subject: [PATCH 1934/2924] MAINTAINERS: add mm memory policy section As part of the ongoing efforts to sub-divide memory management maintainership and reviewership, establish a section for memory policy and migration and add appropriate maintainers and reviewers. [lorenzo.stoakes@oracle.com: add Ying as reviewer] Link: https://lkml.kernel.org/r/ed6f0fc2-5608-4eea-b1be-07e3e19be263@lucifer.local Link: https://lkml.kernel.org/r/20250515191358.205684-1-lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Reviewed-by: Rakie Kim Acked-by: Matthew Brost Reviewed-by: Joshua Hahn Acked-by: David Hildenbrand Acked-by: Zi Yan Acked-by: Huang Ying Acked-by: Byungchul Park Cc: Alistair Popple Cc: David Hildenbrand Cc: Gregory Price Signed-off-by: Andrew Morton --- MAINTAINERS | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index b3e5db58c6b73c..698d5fdd51eb04 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15577,6 +15577,26 @@ F: include/linux/ksm.h F: include/trace/events/ksm.h F: mm/ksm.c +MEMORY MANAGEMENT - MEMORY POLICY AND MIGRATION +M: Andrew Morton +M: David Hildenbrand +R: Zi Yan +R: Matthew Brost +R: Joshua Hahn +R: Rakie Kim +R: Byungchul Park +R: Gregory Price +R: Ying Huang +L: linux-mm@kvack.org +S: Maintained +W: http://www.linux-mm.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm +F: include/linux/mempolicy.h +F: include/linux/migrate.h +F: mm/mempolicy.c +F: mm/migrate.c +F: mm/migrate_device.c + MEMORY MANAGEMENT - NUMA MEMBLOCKS AND NUMA EMULATION M: Andrew Morton M: Mike Rapoport From fb27226c389f499d04913023fbcfb7920fb0e475 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 15 May 2025 10:39:22 -0700 Subject: [PATCH 1935/2924] fs/buffer: use sleeping lookup in __getblk_slowpath() Just as with the fast path, call the lookup variant depending on the gfp flags. Signed-off-by: Davidlohr Bueso Link: https://lore.kernel.org/20250515173925.147823-2-dave@stgolabs.net Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/buffer.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/buffer.c b/fs/buffer.c index 7ba1807145aa8b..8563d949cd2c89 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1122,6 +1122,8 @@ static struct buffer_head * __getblk_slow(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp) { + bool blocking = gfpflags_allow_blocking(gfp); + /* Size must be multiple of hard sectorsize */ if (unlikely(size & (bdev_logical_block_size(bdev)-1) || (size < 512 || size > PAGE_SIZE))) { @@ -1137,7 +1139,10 @@ __getblk_slow(struct block_device *bdev, sector_t block, for (;;) { struct buffer_head *bh; - bh = __find_get_block(bdev, block, size); + if (blocking) + bh = __find_get_block_nonatomic(bdev, block, size); + else + bh = __find_get_block(bdev, block, size); if (bh) return bh; From 98a6ca16333e10ce450b0ab516f4c3e5fe52ef31 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 15 May 2025 10:39:23 -0700 Subject: [PATCH 1936/2924] fs/buffer: avoid redundant lookup in getblk slowpath __getblk_slow() already implies failing a first lookup as the fastpath, so try to create the buffers immediately and avoid the redundant lookup. This saves 5-10% of the total cost/latency of the slowpath. Signed-off-by: Davidlohr Bueso Link: https://lore.kernel.org/20250515173925.147823-3-dave@stgolabs.net Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/buffer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 8563d949cd2c89..23773b7505241d 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1139,15 +1139,15 @@ __getblk_slow(struct block_device *bdev, sector_t block, for (;;) { struct buffer_head *bh; + if (!grow_buffers(bdev, block, size, gfp)) + return NULL; + if (blocking) bh = __find_get_block_nonatomic(bdev, block, size); else bh = __find_get_block(bdev, block, size); if (bh) return bh; - - if (!grow_buffers(bdev, block, size, gfp)) - return NULL; } } From d11a24999605a054bef5e2ade7fedfaefce52388 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 15 May 2025 10:39:24 -0700 Subject: [PATCH 1937/2924] fs/buffer: remove superfluous statements Get rid of those unnecessary return statements. Signed-off-by: Davidlohr Bueso Link: https://lore.kernel.org/20250515173925.147823-4-dave@stgolabs.net Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/buffer.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 23773b7505241d..7621f0c471f7e8 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -297,7 +297,6 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) still_busy: spin_unlock_irqrestore(&first->b_uptodate_lock, flags); - return; } struct postprocess_bh_ctx { @@ -422,7 +421,6 @@ static void end_buffer_async_write(struct buffer_head *bh, int uptodate) still_busy: spin_unlock_irqrestore(&first->b_uptodate_lock, flags); - return; } /* @@ -1682,7 +1680,6 @@ void block_invalidate_folio(struct folio *folio, size_t offset, size_t length) filemap_release_folio(folio, 0); out: folio_clear_mappedtodisk(folio); - return; } EXPORT_SYMBOL(block_invalidate_folio); From 8e184bf1cd7495c63242651de6190bb1678730b0 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 15 May 2025 10:39:25 -0700 Subject: [PATCH 1938/2924] fs/buffer: optimize discard_buffer() While invalidating, the clearing of the bits in discard_buffer() is done in one fully ordered CAS operation. In the past this was done via individual clear_bit(), until e7470ee89f0 (fs: buffer: do not use unnecessary atomic operations when discarding buffers). This implies that there were never strong ordering requirements outside of being serialized by the buffer lock. As such relax the ordering for archs that can benefit. Further, the implied ordering in buffer_unlock() makes current cmpxchg implied barrier redundant due to release semantics. And while in theory the unlock could be part of the bulk clearing, it is best to leave it explicit, but without the double barriers. Signed-off-by: Davidlohr Bueso Link: https://lore.kernel.org/20250515173925.147823-5-dave@stgolabs.net Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 7621f0c471f7e8..dd8709e05225ed 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1614,8 +1614,8 @@ static void discard_buffer(struct buffer_head * bh) bh->b_bdev = NULL; b_state = READ_ONCE(bh->b_state); do { - } while (!try_cmpxchg(&bh->b_state, &b_state, - b_state & ~BUFFER_FLAGS_DISCARD)); + } while (!try_cmpxchg_relaxed(&bh->b_state, &b_state, + b_state & ~BUFFER_FLAGS_DISCARD)); unlock_buffer(bh); } From ef0841e4cb08754be6cb42bf97739fce5d086e5f Mon Sep 17 00:00:00 2001 From: Carlos Sanchez Date: Tue, 20 May 2025 12:23:05 +0200 Subject: [PATCH 1939/2924] can: slcan: allow reception of short error messages Allows slcan to receive short messages (typically errors) from the serial interface. When error support was added to slcan protocol in b32ff4668544e1333b694fcc7812b2d7397b4d6a ("can: slcan: extend the protocol with error info") the minimum valid message size changed from 5 (minimum standard can frame tIII0) to 3 ("e1a" is a valid protocol message, it is one of the examples given in the comments for slcan_bump_err() ), but the check for minimum message length prodicating all decoding was not adjusted. This makes short error messages discarded and error frames not being generated. This patch changes the minimum length to the new minimum (3 characters, excluding terminator, is now a valid message). Signed-off-by: Carlos Sanchez Fixes: b32ff4668544 ("can: slcan: extend the protocol with error info") Reviewed-by: Vincent Mailhol Link: https://patch.msgid.link/20250520102305.1097494-1-carlossanchez@geotab.com Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde --- drivers/net/can/slcan/slcan-core.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/net/can/slcan/slcan-core.c b/drivers/net/can/slcan/slcan-core.c index 24c6622d36bd85..58ff2ec1d9757e 100644 --- a/drivers/net/can/slcan/slcan-core.c +++ b/drivers/net/can/slcan/slcan-core.c @@ -71,12 +71,21 @@ MODULE_AUTHOR("Dario Binacchi "); #define SLCAN_CMD_LEN 1 #define SLCAN_SFF_ID_LEN 3 #define SLCAN_EFF_ID_LEN 8 +#define SLCAN_DATA_LENGTH_LEN 1 +#define SLCAN_ERROR_LEN 1 #define SLCAN_STATE_LEN 1 #define SLCAN_STATE_BE_RXCNT_LEN 3 #define SLCAN_STATE_BE_TXCNT_LEN 3 -#define SLCAN_STATE_FRAME_LEN (1 + SLCAN_CMD_LEN + \ - SLCAN_STATE_BE_RXCNT_LEN + \ - SLCAN_STATE_BE_TXCNT_LEN) +#define SLCAN_STATE_MSG_LEN (SLCAN_CMD_LEN + \ + SLCAN_STATE_LEN + \ + SLCAN_STATE_BE_RXCNT_LEN + \ + SLCAN_STATE_BE_TXCNT_LEN) +#define SLCAN_ERROR_MSG_LEN_MIN (SLCAN_CMD_LEN + \ + SLCAN_ERROR_LEN + \ + SLCAN_DATA_LENGTH_LEN) +#define SLCAN_FRAME_MSG_LEN_MIN (SLCAN_CMD_LEN + \ + SLCAN_SFF_ID_LEN + \ + SLCAN_DATA_LENGTH_LEN) struct slcan { struct can_priv can; @@ -176,6 +185,9 @@ static void slcan_bump_frame(struct slcan *sl) u32 tmpid; char *cmd = sl->rbuff; + if (sl->rcount < SLCAN_FRAME_MSG_LEN_MIN) + return; + skb = alloc_can_skb(sl->dev, &cf); if (unlikely(!skb)) { sl->dev->stats.rx_dropped++; @@ -281,7 +293,7 @@ static void slcan_bump_state(struct slcan *sl) return; } - if (state == sl->can.state || sl->rcount < SLCAN_STATE_FRAME_LEN) + if (state == sl->can.state || sl->rcount != SLCAN_STATE_MSG_LEN) return; cmd += SLCAN_STATE_BE_RXCNT_LEN + SLCAN_CMD_LEN + 1; @@ -328,6 +340,9 @@ static void slcan_bump_err(struct slcan *sl) bool rx_errors = false, tx_errors = false, rx_over_errors = false; int i, len; + if (sl->rcount < SLCAN_ERROR_MSG_LEN_MIN) + return; + /* get len from sanitized ASCII value */ len = cmd[1]; if (len >= '0' && len < '9') @@ -456,8 +471,7 @@ static void slcan_bump(struct slcan *sl) static void slcan_unesc(struct slcan *sl, unsigned char s) { if ((s == '\r') || (s == '\a')) { /* CR or BEL ends the pdu */ - if (!test_and_clear_bit(SLF_ERROR, &sl->flags) && - sl->rcount > 4) + if (!test_and_clear_bit(SLF_ERROR, &sl->flags)) slcan_bump(sl); sl->rcount = 0; From b95ed5517354a5f451fb9b3771776ca4c0b65ac3 Mon Sep 17 00:00:00 2001 From: Samiullah Khawaja Date: Fri, 16 May 2025 21:36:38 +0000 Subject: [PATCH 1940/2924] xsk: Bring back busy polling support in XDP_COPY Commit 5ef44b3cb43b ("xsk: Bring back busy polling support") fixed the busy polling support in xsk for XDP_ZEROCOPY after it was broken in commit 86e25f40aa1e ("net: napi: Add napi_config"). The busy polling support with XDP_COPY remained broken since the napi_id setup in xsk_rcv_check was removed. Bring back the setup of napi_id for XDP_COPY so socket level SO_BUSYPOLL can be used to poll the underlying napi. Do the setup of napi_id for XDP_COPY in xsk_bind, as it is done currently for XDP_ZEROCOPY. The setup of napi_id for XDP_COPY in xsk_bind is safe because xsk_rcv_check checks that the rx queue at which the packet arrives is equal to the queue_id that was supplied in bind. This is done for both XDP_COPY and XDP_ZEROCOPY mode. Tested using AF_XDP support in virtio-net by running the xsk_rr AF_XDP benchmarking tool shared here: https://lore.kernel.org/all/20250320163523.3501305-1-skhawaja@google.com/T/ Enabled socket busy polling using following commands in qemu, ``` sudo ethtool -L eth0 combined 1 echo 400 | sudo tee /proc/sys/net/core/busy_read echo 100 | sudo tee /sys/class/net/eth0/napi_defer_hard_irqs echo 15000 | sudo tee /sys/class/net/eth0/gro_flush_timeout ``` Fixes: 5ef44b3cb43b ("xsk: Bring back busy polling support") Signed-off-by: Samiullah Khawaja Reviewed-by: Willem de Bruijn Acked-by: Stanislav Fomichev Signed-off-by: David S. Miller --- net/xdp/xsk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 4abc81f33d3ebe..72c000c0ae5f57 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -1304,7 +1304,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) xs->queue_id = qid; xp_add_xsk(xs->pool, xs); - if (xs->zc && qid < dev->real_num_rx_queues) { + if (qid < dev->real_num_rx_queues) { struct netdev_rx_queue *rxq; rxq = __netif_get_rx_queue(dev, qid); From 49b9f86a594a5403641e6e60508788a7310fd293 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Wed, 21 May 2025 18:11:23 +0530 Subject: [PATCH 1941/2924] nvme: avoid creating multipath sysfs group under namespace path devices Commit 4dbd2b2ebe4c ("nvme-multipath: Add visibility for round-robin io-policy") introduced the creation of the multipath sysfs group under the NVMe head gendisk device node. However, it also inadvertently added the same sysfs group under each namespace path device which head node refers to and that is incorrect. The multipath sysfs group should only be exposed through the namespace head gendisk node. This is sufficient, as the head device already provides symbolic links to the individual namespace paths it manages. This patch fixes the issue by preventing the creation of the multipath sysfs group under namespace path devices, ensuring it only appears under the head disk node. Fixes: 4dbd2b2ebe4c ("nvme-multipath: Add visibility for round-robin io-policy") Signed-off-by: Nilay Shroff Signed-off-by: Christoph Hellwig --- drivers/nvme/host/sysfs.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 6d31226f7a4f84..a5bc3bb483d505 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -306,13 +306,41 @@ static const struct attribute_group nvme_ns_attr_group = { }; #ifdef CONFIG_NVME_MULTIPATH +/* + * NOTE: The dummy attribute does not appear in sysfs. It exists solely to allow + * control over the visibility of the multipath sysfs node. Without at least one + * attribute defined in nvme_ns_mpath_attrs[], the sysfs implementation does not + * invoke the multipath_sysfs_group_visible() method. As a result, we would not + * be able to control the visibility of the multipath sysfs node. + */ +static struct attribute dummy_attr = { + .name = "dummy", +}; + static struct attribute *nvme_ns_mpath_attrs[] = { + &dummy_attr, NULL, }; +static bool multipath_sysfs_group_visible(struct kobject *kobj) +{ + struct device *dev = container_of(kobj, struct device, kobj); + + return nvme_disk_is_ns_head(dev_to_disk(dev)); +} + +static bool multipath_sysfs_attr_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + return false; +} + +DEFINE_SYSFS_GROUP_VISIBLE(multipath_sysfs) + const struct attribute_group nvme_ns_mpath_attr_group = { .name = "multipath", .attrs = nvme_ns_mpath_attrs, + .is_visible = SYSFS_GROUP_VISIBLE(multipath_sysfs), }; #endif From a7d755ed9ce9738af3db602eb29d32774a180bc7 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 17 May 2025 13:27:37 +0100 Subject: [PATCH 1942/2924] io_uring: fix overflow resched cqe reordering Leaving the CQ critical section in the middle of a overflow flushing can cause cqe reordering since the cache cq pointers are reset and any new cqe emitters that might get called in between are not going to be forced into io_cqe_cache_refill(). Fixes: eac2ca2d682f9 ("io_uring: check if we need to reschedule during overflow flush") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/90ba817f1a458f091f355f407de1c911d2b93bbf.1747483784.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 541e65a1eebfde..edda31a15c6e65 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -636,6 +636,7 @@ static void __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool dying) * to care for a non-real case. */ if (need_resched()) { + ctx->cqe_sentinel = ctx->cqe_cached; io_cq_unlock_post(ctx); mutex_unlock(&ctx->uring_lock); cond_resched(); From 6c9ab811871bc9a08e9cd4b96371a60667fa9ec8 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 15 May 2025 12:41:36 -0300 Subject: [PATCH 1943/2924] arm64: defconfig: Ensure CRYPTO_CHACHA20_NEON is selected Since commit 17ec3e71ba79 ("crypto: lib/Kconfig - Hide arch options from user"), the CRYPTO_CHACHA20_NEON option is no longer selected by default due to changes in how crypto library options are exposed and selected. To restore the previous behavior and ensure CRYPTO_CHACHA20_NEON is enabled, explicitly select CONFIG_CRYPTO_CHACHA20 in the defconfig. This pulls in CRYPTO_LIB_CHACHA_INTERNAL and allows CRYPTO_CHACHA20_NEON to be selected automatically as before. Fixes: 17ec3e71ba79 ("crypto: lib/Kconfig - Hide arch options from user") Signed-off-by: Fabio Estevam Acked-by: Arnd Bergmann Signed-off-by: Arnd Bergmann --- arch/arm64/configs/defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 5bb8f09422a221..c4ce2c67c0e067 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1729,12 +1729,12 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_SECURITY=y CONFIG_CRYPTO_USER=y +CONFIG_CRYPTO_CHACHA20=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_ECHAINIV=y CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_ANSI_CPRNG=y CONFIG_CRYPTO_USER_API_RNG=m -CONFIG_CRYPTO_CHACHA20_NEON=m CONFIG_CRYPTO_GHASH_ARM64_CE=y CONFIG_CRYPTO_SHA1_ARM64_CE=y CONFIG_CRYPTO_SHA2_ARM64_CE=y From 009970506c92816c857e1705cf6db68c9147d39f Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Sat, 17 May 2025 17:58:27 +0800 Subject: [PATCH 1944/2924] net: hibmcge: fix incorrect statistics update issue When the user dumps statistics, the hibmcge driver automatically updates all statistics. If the driver is performing the reset operation, the error data of 0xFFFFFFFF is updated. Therefore, if the driver is resetting, the hbg_update_stats_by_info() needs to return directly. Fixes: c0bf9bf31e79 ("net: hibmcge: Add support for dump statistics") Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250517095828.1763126-2-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c index 8f1107b85fbb09..55520053270a5c 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c @@ -317,6 +317,9 @@ static void hbg_update_stats_by_info(struct hbg_priv *priv, const struct hbg_ethtool_stats *stats; u32 i; + if (test_bit(HBG_NIC_STATE_RESETTING, &priv->state)) + return; + for (i = 0; i < info_len; i++) { stats = &info[i]; if (!stats->reg) From 1b45443b84872c010b36dfc2681413bb1a10011a Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Sat, 17 May 2025 17:58:28 +0800 Subject: [PATCH 1945/2924] net: hibmcge: fix wrong ndo.open() after reset fail issue. If the driver reset fails, it may not work properly. Therefore, the ndo.open() operation should be rejected. In this patch, the driver calls netif_device_detach() before the reset and calls netif_device_attach() after the reset succeeds. If the reset fails, netif_device_attach() is not called. Therefore, netdev does not present and cannot be opened. If reset fails, only the PCI reset (via sysfs) can be used to attempt recovery. Fixes: 3f5a61f6d504 ("net: hibmcge: Add reset supported in this module") Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250517095828.1763126-3-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c index a0bcfb5a713d13..ff3295b60a69a8 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c @@ -61,6 +61,8 @@ static int hbg_reset_prepare(struct hbg_priv *priv, enum hbg_reset_type type) return -EBUSY; } + netif_device_detach(priv->netdev); + priv->reset_type = type; set_bit(HBG_NIC_STATE_RESETTING, &priv->state); clear_bit(HBG_NIC_STATE_RESET_FAIL, &priv->state); @@ -91,6 +93,8 @@ static int hbg_reset_done(struct hbg_priv *priv, enum hbg_reset_type type) return ret; } + netif_device_attach(priv->netdev); + dev_info(&priv->pdev->dev, "reset done\n"); return ret; } @@ -117,16 +121,13 @@ void hbg_err_reset(struct hbg_priv *priv) if (running) dev_close(priv->netdev); - hbg_reset(priv); - - /* in hbg_pci_err_detected(), we will detach first, - * so we need to attach before open - */ - if (!netif_device_present(priv->netdev)) - netif_device_attach(priv->netdev); + if (hbg_reset(priv)) + goto err_unlock; if (running) dev_open(priv->netdev, NULL); + +err_unlock: rtnl_unlock(); } @@ -160,7 +161,6 @@ static pci_ers_result_t hbg_pci_err_slot_reset(struct pci_dev *pdev) pci_save_state(pdev); hbg_err_reset(priv); - netif_device_attach(netdev); return PCI_ERS_RESULT_RECOVERED; } From 8d5ac187da10a8a5599b481588951470a2fb792b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 20 May 2025 20:05:45 -0400 Subject: [PATCH 1946/2924] bcachefs: Fix casefold opt via xattr interface Changing the casefold option requires extra checks/work - factor out a helper from bch2_fileattr_set() for the xattr code to use. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 26 +------------------------- fs/bcachefs/inode.c | 36 ++++++++++++++++++++++++++++++++++++ fs/bcachefs/inode.h | 4 +++- fs/bcachefs/xattr.c | 6 ++++++ 4 files changed, 46 insertions(+), 26 deletions(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 4b742e62255bb7..47f1a64c5c8d83 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1664,33 +1664,9 @@ static int fssetxattr_inode_update_fn(struct btree_trans *trans, return -EINVAL; if (s->casefold != bch2_inode_casefold(c, bi)) { -#ifdef CONFIG_UNICODE - int ret = 0; - /* Not supported on individual files. */ - if (!S_ISDIR(bi->bi_mode)) - return -EOPNOTSUPP; - - /* - * Make sure the dir is empty, as otherwise we'd need to - * rehash everything and update the dirent keys. - */ - ret = bch2_empty_dir_trans(trans, inode_inum(inode)); - if (ret < 0) - return ret; - - ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding); + int ret = bch2_inode_set_casefold(trans, inode_inum(inode), bi, s->casefold); if (ret) return ret; - - bch2_check_set_feature(c, BCH_FEATURE_casefolding); - - bi->bi_casefold = s->casefold + 1; - bi->bi_fields_set |= BIT(Inode_opt_casefold); - -#else - printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n"); - return -EOPNOTSUPP; -#endif } if (s->set_project) { diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index b51d98cf8a80d5..490b85841de960 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -14,6 +14,7 @@ #include "extent_update.h" #include "fs.h" #include "inode.h" +#include "namei.h" #include "opts.h" #include "str_hash.h" #include "snapshot.h" @@ -1204,6 +1205,41 @@ int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_i return 0; } +int bch2_inode_set_casefold(struct btree_trans *trans, subvol_inum inum, + struct bch_inode_unpacked *bi, unsigned v) +{ + struct bch_fs *c = trans->c; + +#ifdef CONFIG_UNICODE + int ret = 0; + /* Not supported on individual files. */ + if (!S_ISDIR(bi->bi_mode)) + return -EOPNOTSUPP; + + /* + * Make sure the dir is empty, as otherwise we'd need to + * rehash everything and update the dirent keys. + */ + ret = bch2_empty_dir_trans(trans, inum); + if (ret < 0) + return ret; + + ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding); + if (ret) + return ret; + + bch2_check_set_feature(c, BCH_FEATURE_casefolding); + + bi->bi_casefold = v + 1; + bi->bi_fields_set |= BIT(Inode_opt_casefold); + + return 0; +#else + bch_err(c, "Cannot use casefolding on a kernel without CONFIG_UNICODE"); + return -EOPNOTSUPP; +#endif +} + static noinline int __bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot) { struct bch_fs *c = trans->c; diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index c74af15b14f281..5cfba9e98966d5 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -292,7 +292,9 @@ static inline bool bch2_inode_should_have_single_bp(struct bch_inode_unpacked *i struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *); void bch2_inode_opts_get(struct bch_io_opts *, struct bch_fs *, struct bch_inode_unpacked *); -int bch2_inum_opts_get(struct btree_trans*, subvol_inum, struct bch_io_opts *); +int bch2_inum_opts_get(struct btree_trans *, subvol_inum, struct bch_io_opts *); +int bch2_inode_set_casefold(struct btree_trans *, subvol_inum, + struct bch_inode_unpacked *, unsigned); #include "rebalance.h" diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 651da52b2cbcc0..e6be32003f3b4c 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -473,6 +473,12 @@ static int inode_opt_set_fn(struct btree_trans *trans, { struct inode_opt_set *s = p; + if (s->id == Inode_opt_casefold) { + int ret = bch2_inode_set_casefold(trans, inode_inum(inode), bi, s->v); + if (ret) + return ret; + } + if (s->defined) bi->bi_fields_set |= 1U << s->id; else From ecd76c5f108eec3eea0a5b32007cda0dca6c92ac Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 21 May 2025 00:41:07 -0400 Subject: [PATCH 1947/2924] bcachefs: Fix bch2_dirent_create_snapshot() for casefolding bch2_dirent_create_snapshot(), used in fsck, neglected to create a casefolded dirent. Just move this into dirent_create_key(). Signed-off-by: Kent Overstreet --- fs/bcachefs/dirent.c | 33 +++++++++++++++------------------ fs/bcachefs/dirent.h | 2 +- fs/bcachefs/fsck.c | 2 ++ fs/bcachefs/namei.c | 2 -- 4 files changed, 18 insertions(+), 21 deletions(-) diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 8a680e52c1ed12..a5119508822789 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -288,6 +288,7 @@ static void dirent_init_casefolded_name(struct bkey_i_dirent *dirent, } static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans, + const struct bch_hash_info *hash_info, subvol_inum dir, u8 type, const struct qstr *name, @@ -295,10 +296,19 @@ static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans, u64 dst) { struct bkey_i_dirent *dirent; + struct qstr _cf_name; if (name->len > BCH_NAME_MAX) return ERR_PTR(-ENAMETOOLONG); + if (hash_info->cf_encoding && !cf_name) { + int ret = bch2_casefold(trans, hash_info, name, &_cf_name); + if (ret) + return ERR_PTR(ret); + + cf_name = &_cf_name; + } + dirent = dirent_alloc_key(trans, dir, type, name->len, cf_name ? cf_name->len : 0, dst); if (IS_ERR(dirent)) return dirent; @@ -324,7 +334,7 @@ int bch2_dirent_create_snapshot(struct btree_trans *trans, struct bkey_i_dirent *dirent; int ret; - dirent = dirent_create_key(trans, dir_inum, type, name, NULL, dst_inum); + dirent = dirent_create_key(trans, hash_info, dir_inum, type, name, NULL, dst_inum); ret = PTR_ERR_OR_ZERO(dirent); if (ret) return ret; @@ -333,8 +343,7 @@ int bch2_dirent_create_snapshot(struct btree_trans *trans, dirent->k.p.snapshot = snapshot; ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info, - dir_inum, snapshot, &dirent->k_i, - flags|BTREE_UPDATE_internal_snapshot_node); + dir_inum, snapshot, &dirent->k_i, flags); *dir_offset = dirent->k.p.offset; return ret; @@ -344,28 +353,16 @@ int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir, const struct bch_hash_info *hash_info, u8 type, const struct qstr *name, u64 dst_inum, u64 *dir_offset, - u64 *i_size, enum btree_iter_update_trigger_flags flags) { struct bkey_i_dirent *dirent; int ret; - if (hash_info->cf_encoding) { - struct qstr cf_name; - ret = bch2_casefold(trans, hash_info, name, &cf_name); - if (ret) - return ret; - dirent = dirent_create_key(trans, dir, type, name, &cf_name, dst_inum); - } else { - dirent = dirent_create_key(trans, dir, type, name, NULL, dst_inum); - } - + dirent = dirent_create_key(trans, hash_info, dir, type, name, NULL, dst_inum); ret = PTR_ERR_OR_ZERO(dirent); if (ret) return ret; - *i_size += bkey_bytes(&dirent->k); - ret = bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info, dir, &dirent->k_i, flags); *dir_offset = dirent->k.p.offset; @@ -466,7 +463,7 @@ int bch2_dirent_rename(struct btree_trans *trans, *src_offset = dst_iter.pos.offset; /* Create new dst key: */ - new_dst = dirent_create_key(trans, dst_dir, 0, dst_name, + new_dst = dirent_create_key(trans, dst_hash, dst_dir, 0, dst_name, dst_hash->cf_encoding ? &dst_name_lookup : NULL, 0); ret = PTR_ERR_OR_ZERO(new_dst); if (ret) @@ -477,7 +474,7 @@ int bch2_dirent_rename(struct btree_trans *trans, /* Create new src key: */ if (mode == BCH_RENAME_EXCHANGE) { - new_src = dirent_create_key(trans, src_dir, 0, src_name, + new_src = dirent_create_key(trans, src_hash, src_dir, 0, src_name, src_hash->cf_encoding ? &src_name_lookup : NULL, 0); ret = PTR_ERR_OR_ZERO(new_src); if (ret) diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index 9838a7ba7ed1b5..d3e7ae669575a6 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -65,7 +65,7 @@ int bch2_dirent_create_snapshot(struct btree_trans *, u32, u64, u32, enum btree_iter_update_trigger_flags); int bch2_dirent_create(struct btree_trans *, subvol_inum, const struct bch_hash_info *, u8, - const struct qstr *, u64, u64 *, u64 *, + const struct qstr *, u64, u64 *, enum btree_iter_update_trigger_flags); static inline unsigned vfs_d_type(unsigned type) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 71d428f376a5f8..0cf9fd4c17bc01 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -306,6 +306,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, &lostfound_str, lostfound->bi_inum, &lostfound->bi_dir_offset, + BTREE_UPDATE_internal_snapshot_node| STR_HASH_must_create) ?: bch2_inode_write_flags(trans, &lostfound_iter, lostfound, BTREE_UPDATE_internal_snapshot_node); @@ -431,6 +432,7 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * &name, inode->bi_subvol ?: inode->bi_inum, &inode->bi_dir_offset, + BTREE_UPDATE_internal_snapshot_node| STR_HASH_must_create); if (ret) { bch_err_msg(c, ret, "error creating dirent"); diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c index 52c58c6d53d243..9136a90977893c 100644 --- a/fs/bcachefs/namei.c +++ b/fs/bcachefs/namei.c @@ -158,7 +158,6 @@ int bch2_create_trans(struct btree_trans *trans, name, dir_target, &dir_offset, - &dir_u->bi_size, STR_HASH_must_create|BTREE_ITER_with_updates) ?: bch2_inode_write(trans, &dir_iter, dir_u); if (ret) @@ -225,7 +224,6 @@ int bch2_link_trans(struct btree_trans *trans, mode_to_type(inode_u->bi_mode), name, inum.inum, &dir_offset, - &dir_u->bi_size, STR_HASH_must_create); if (ret) goto err; From 010c89468134d1991b87122379f86feae23d512f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 21 May 2025 00:38:04 -0400 Subject: [PATCH 1948/2924] bcachefs: Check for casefolded dirents in non casefolded dirs Check for mismatches between casefold dirents and casefold directories. A mismatch will cause lookups to fail, as we'll be doing the lookup with the casefolded name, which won't match the non-casefolded dirent, and vice versa. Reported-by: Christopher Snowhill Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 35 ++++++++++++++++++++++++++++++++++ fs/bcachefs/sb-errors_format.h | 8 +++++++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 0cf9fd4c17bc01..aaf18708527644 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -2190,6 +2190,41 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); + /* check casefold */ + if (fsck_err_on(d.v->d_casefold != !!hash_info->cf_encoding, + trans, dirent_casefold_mismatch, + "dirent casefold does not match dir casefold\n%s", + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), + buf.buf))) { + struct qstr name = bch2_dirent_get_name(d); + u32 subvol = d.v->d_type == DT_SUBVOL + ? d.v->d_parent_subvol + : 0; + u64 target = d.v->d_type == DT_SUBVOL + ? d.v->d_child_subvol + : d.v->d_inum; + u64 dir_offset; + + ret = bch2_hash_delete_at(trans, + bch2_dirent_hash_desc, hash_info, iter, + BTREE_UPDATE_internal_snapshot_node) ?: + bch2_dirent_create_snapshot(trans, subvol, + d.k->p.inode, d.k->p.snapshot, + hash_info, + d.v->d_type, + &name, + target, + &dir_offset, + BTREE_ITER_with_updates| + BTREE_UPDATE_internal_snapshot_node| + STR_HASH_must_create) ?: + bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); + + /* might need another check_dirents pass */ + goto out; + } + if (d.v->d_type == DT_SUBVOL) { ret = check_dirent_to_subvol(trans, iter, d); if (ret) diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 3b69a924086fd5..4036a20c6adc26 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -209,6 +209,7 @@ enum bch_fsck_flags { x(subvol_to_missing_root, 188, 0) \ x(subvol_root_wrong_bi_subvol, 189, FSCK_AUTOFIX) \ x(bkey_in_missing_snapshot, 190, 0) \ + x(bkey_in_deleted_snapshot, 315, 0) \ x(inode_pos_inode_nonzero, 191, 0) \ x(inode_pos_blockdev_range, 192, 0) \ x(inode_alloc_cursor_inode_bad, 301, 0) \ @@ -216,6 +217,7 @@ enum bch_fsck_flags { x(inode_str_hash_invalid, 194, 0) \ x(inode_v3_fields_start_bad, 195, 0) \ x(inode_snapshot_mismatch, 196, 0) \ + x(snapshot_key_missing_inode_snapshot, 314, 0) \ x(inode_unlinked_but_clean, 197, 0) \ x(inode_unlinked_but_nlink_nonzero, 198, 0) \ x(inode_unlinked_and_not_open, 281, 0) \ @@ -237,6 +239,8 @@ enum bch_fsck_flags { x(inode_unreachable, 210, FSCK_AUTOFIX) \ x(inode_journal_seq_in_future, 299, FSCK_AUTOFIX) \ x(inode_i_sectors_underflow, 312, FSCK_AUTOFIX) \ + x(inode_has_case_insensitive_not_set, 316, FSCK_AUTOFIX) \ + x(inode_parent_has_case_insensitive_not_set, 317, FSCK_AUTOFIX) \ x(vfs_inode_i_blocks_underflow, 311, FSCK_AUTOFIX) \ x(vfs_inode_i_blocks_not_zero_at_truncate, 313, FSCK_AUTOFIX) \ x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \ @@ -262,6 +266,7 @@ enum bch_fsck_flags { x(dirent_to_overwritten_inode, 302, 0) \ x(dirent_to_missing_subvol, 230, 0) \ x(dirent_to_itself, 231, 0) \ + x(dirent_casefold_mismatch, 318, FSCK_AUTOFIX) \ x(quota_type_invalid, 232, 0) \ x(xattr_val_size_too_small, 233, 0) \ x(xattr_val_size_too_big, 234, 0) \ @@ -301,6 +306,7 @@ enum bch_fsck_flags { x(btree_ptr_v2_written_0, 268, 0) \ x(subvol_snapshot_bad, 269, 0) \ x(subvol_inode_bad, 270, 0) \ + x(subvol_missing, 308, FSCK_AUTOFIX) \ x(alloc_key_stripe_sectors_wrong, 271, FSCK_AUTOFIX) \ x(accounting_mismatch, 272, FSCK_AUTOFIX) \ x(accounting_replicas_not_marked, 273, 0) \ @@ -322,7 +328,7 @@ enum bch_fsck_flags { x(dirent_stray_data_after_cf_name, 305, 0) \ x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \ x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \ - x(MAX, 314, 0) + x(MAX, 319, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, From 3a08988123c868dbfdd054541b1090fb891fa49e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 21 May 2025 18:51:49 -0600 Subject: [PATCH 1949/2924] io_uring/net: only retry recv bundle for a full transfer If a shorter than assumed transfer was seen, a partial buffer will have been filled. For that case it isn't sane to attempt to fill more into the bundle before posting a completion, as that will cause a gap in the received data. Check if the iterator has hit zero and only allow to continue a bundle operation if that is the case. Also ensure that for putting finished buffers, only the current transfer is accounted. Otherwise too many buffers may be put for a short transfer. Link: https://github.com/axboe/liburing/issues/1409 Cc: stable@vger.kernel.org Fixes: 7c71a0af81ba ("io_uring/net: improve recv bundles") Signed-off-by: Jens Axboe --- io_uring/net.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index 24040bc3916a1b..27f37fa2ef7936 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -827,18 +827,24 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret, cflags |= IORING_CQE_F_SOCK_NONEMPTY; if (sr->flags & IORING_RECVSEND_BUNDLE) { - cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret), + size_t this_ret = *ret - sr->done_io; + + cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, this_ret), issue_flags); if (sr->retry) cflags = req->cqe.flags | (cflags & CQE_F_MASK); /* bundle with no more immediate buffers, we're done */ if (req->flags & REQ_F_BL_EMPTY) goto finish; - /* if more is available, retry and append to this one */ - if (!sr->retry && kmsg->msg.msg_inq > 0 && *ret > 0) { + /* + * If more is available AND it was a full transfer, retry and + * append to this one + */ + if (!sr->retry && kmsg->msg.msg_inq > 0 && this_ret > 0 && + !iov_iter_count(&kmsg->msg.msg_iter)) { req->cqe.flags = cflags & ~CQE_F_MASK; sr->len = kmsg->msg.msg_inq; - sr->done_io += *ret; + sr->done_io += this_ret; sr->retry = true; return false; } From 68477b5dc57108d4306c56b3842d6f80a8161f30 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 21 May 2025 17:07:36 +0900 Subject: [PATCH 1950/2924] ksmbd: fix rename failure I found that rename fails after cifs mount due to update of lookup_one_qstr_excl(). mv a/c b/ mv: cannot move 'a/c' to 'b/c': No such file or directory In order to rename to a new name regardless of whether the dentry is negative, we need to get the dentry through lookup_one_qstr_excl(). So It will not return error if the name doesn't exist. Fixes: 204a575e91f3 ("VFS: add common error checks to lookup_one_qstr_excl()") Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/vfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 156ded9ac889bd..baf0d3031a44a0 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -682,7 +682,7 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path, struct ksmbd_file *parent_fp; int new_type; int err, lookup_flags = LOOKUP_NO_SYMLINKS; - int target_lookup_flags = LOOKUP_RENAME_TARGET; + int target_lookup_flags = LOOKUP_RENAME_TARGET | LOOKUP_CREATE; if (ksmbd_override_fsids(work)) return -ENOMEM; From 10379171f346e6f61d30d9949500a8de4336444a Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 20 May 2025 09:25:03 +0900 Subject: [PATCH 1951/2924] ksmbd: use list_first_entry_or_null for opinfo_get_list() The list_first_entry() macro never returns NULL. If the list is empty then it returns an invalid pointer. Use list_first_entry_or_null() to check if the list is empty. Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202505080231.7OXwq4Te-lkp@intel.com/ Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/oplock.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 03f606afad93a0..d7a8a580d01362 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -146,12 +146,9 @@ static struct oplock_info *opinfo_get_list(struct ksmbd_inode *ci) { struct oplock_info *opinfo; - if (list_empty(&ci->m_op_list)) - return NULL; - down_read(&ci->m_lock); - opinfo = list_first_entry(&ci->m_op_list, struct oplock_info, - op_entry); + opinfo = list_first_entry_or_null(&ci->m_op_list, struct oplock_info, + op_entry); if (opinfo) { if (opinfo->conn == NULL || !atomic_inc_not_zero(&opinfo->refcount)) From 407e0efdf8baf1672876d5948b75049860a93e59 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 May 2025 12:40:30 +0000 Subject: [PATCH 1952/2924] idpf: fix idpf_vport_splitq_napi_poll() idpf_vport_splitq_napi_poll() can incorrectly return @budget after napi_complete_done() has been called. This violates NAPI rules, because after napi_complete_done(), current thread lost napi ownership. Move the test against POLL_MODE before the napi_complete_done(). Fixes: c2d548cad150 ("idpf: add TX splitq napi poll support") Reported-by: Peter Newman Closes: https://lore.kernel.org/netdev/20250520121908.1805732-1-edumazet@google.com/T/#u Signed-off-by: Eric Dumazet Cc: Joshua Hay Cc: Alan Brady Cc: Madhu Chittim Cc: Phani Burra Cc: Pavan Kumar Linga Link: https://patch.msgid.link/20250520124030.1983936-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index bdf52cef3891b8..2d5f5c9f91ce1e 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -4025,6 +4025,14 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget) return budget; } + /* Switch to poll mode in the tear-down path after sending disable + * queues virtchnl message, as the interrupts will be disabled after + * that. + */ + if (unlikely(q_vector->num_txq && idpf_queue_has(POLL_MODE, + q_vector->tx[0]))) + return budget; + work_done = min_t(int, work_done, budget - 1); /* Exit the polling mode, but don't re-enable interrupts if stack might @@ -4035,15 +4043,7 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget) else idpf_vport_intr_set_wb_on_itr(q_vector); - /* Switch to poll mode in the tear-down path after sending disable - * queues virtchnl message, as the interrupts will be disabled after - * that - */ - if (unlikely(q_vector->num_txq && idpf_queue_has(POLL_MODE, - q_vector->tx[0]))) - return budget; - else - return work_done; + return work_done; } /** From b3f6fcd8404f9f92262303369bb877ec5d188a81 Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Mon, 19 May 2025 18:19:37 -0700 Subject: [PATCH 1953/2924] iommu: Skip PASID validation for devices without PASID capability Generally PASID support requires ACS settings that usually create single device groups, but there are some niche cases where we can get multi-device groups and still have working PASID support. The primary issue is that PCI switches are not required to treat PASID tagged TLPs specially so appropriate ACS settings are required to route all TLPs to the host bridge if PASID is going to work properly. pci_enable_pasid() does check that each device that will use PASID has the proper ACS settings to achieve this routing. However, no-PASID devices can be combined with PASID capable devices within the same topology using non-uniform ACS settings. In this case the no-PASID devices may not have strict route to host ACS flags and end up being grouped with the PASID devices. This configuration fails to allow use of the PASID within the iommu core code which wrongly checks if the no-PASID device supports PASID. Fix this by ignoring no-PASID devices during the PASID validation. They will never issue a PASID TLP anyhow so they can be ignored. Fixes: c404f55c26fc ("iommu: Validate the PASID in iommu_attach_device_pasid()") Cc: stable@vger.kernel.org Signed-off-by: Tushar Dave Reviewed-by: Lu Baolu Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/20250520011937.3230557-1-tdave@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 4f91a740c15fe7..9d728800a862e6 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3366,10 +3366,12 @@ static int __iommu_set_group_pasid(struct iommu_domain *domain, int ret; for_each_group_device(group, device) { - ret = domain->ops->set_dev_pasid(domain, device->dev, - pasid, old); - if (ret) - goto err_revert; + if (device->dev->iommu->max_pasids > 0) { + ret = domain->ops->set_dev_pasid(domain, device->dev, + pasid, old); + if (ret) + goto err_revert; + } } return 0; @@ -3379,15 +3381,18 @@ static int __iommu_set_group_pasid(struct iommu_domain *domain, for_each_group_device(group, device) { if (device == last_gdev) break; - /* - * If no old domain, undo the succeeded devices/pasid. - * Otherwise, rollback the succeeded devices/pasid to the old - * domain. And it is a driver bug to fail attaching with a - * previously good domain. - */ - if (!old || WARN_ON(old->ops->set_dev_pasid(old, device->dev, + if (device->dev->iommu->max_pasids > 0) { + /* + * If no old domain, undo the succeeded devices/pasid. + * Otherwise, rollback the succeeded devices/pasid to + * the old domain. And it is a driver bug to fail + * attaching with a previously good domain. + */ + if (!old || + WARN_ON(old->ops->set_dev_pasid(old, device->dev, pasid, domain))) - iommu_remove_dev_pasid(device->dev, pasid, domain); + iommu_remove_dev_pasid(device->dev, pasid, domain); + } } return ret; } @@ -3398,8 +3403,10 @@ static void __iommu_remove_group_pasid(struct iommu_group *group, { struct group_device *device; - for_each_group_device(group, device) - iommu_remove_dev_pasid(device->dev, pasid, domain); + for_each_group_device(group, device) { + if (device->dev->iommu->max_pasids > 0) + iommu_remove_dev_pasid(device->dev, pasid, domain); + } } /* @@ -3440,7 +3447,13 @@ int iommu_attach_device_pasid(struct iommu_domain *domain, mutex_lock(&group->mutex); for_each_group_device(group, device) { - if (pasid >= device->dev->iommu->max_pasids) { + /* + * Skip PASID validation for devices without PASID support + * (max_pasids = 0). These devices cannot issue transactions + * with PASID, so they don't affect group's PASID usage. + */ + if ((device->dev->iommu->max_pasids > 0) && + (pasid >= device->dev->iommu->max_pasids)) { ret = -EINVAL; goto out_unlock; } From 3f981138109f63232a5fb7165938d4c945cc1b9d Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 18 May 2025 15:20:37 -0700 Subject: [PATCH 1954/2924] sch_hfsc: Fix qlen accounting bug when using peek in hfsc_enqueue() When enqueuing the first packet to an HFSC class, hfsc_enqueue() calls the child qdisc's peek() operation before incrementing sch->q.qlen and sch->qstats.backlog. If the child qdisc uses qdisc_peek_dequeued(), this may trigger an immediate dequeue and potential packet drop. In such cases, qdisc_tree_reduce_backlog() is called, but the HFSC qdisc's qlen and backlog have not yet been updated, leading to inconsistent queue accounting. This can leave an empty HFSC class in the active list, causing further consequences like use-after-free. This patch fixes the bug by moving the increment of sch->q.qlen and sch->qstats.backlog before the call to the child qdisc's peek() operation. This ensures that queue length and backlog are always accurate when packet drops or dequeues are triggered during the peek. Fixes: 12d0ad3be9c3 ("net/sched/sch_hfsc.c: handle corner cases where head may change invalidating calculated deadline") Reported-by: Mingi Cho Signed-off-by: Cong Wang Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250518222038.58538-2-xiyou.wangcong@gmail.com Reviewed-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- net/sched/sch_hfsc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index cb8c525ea20eab..7986145a527cbe 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1569,6 +1569,9 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) return err; } + sch->qstats.backlog += len; + sch->q.qlen++; + if (first && !cl->cl_nactive) { if (cl->cl_flags & HFSC_RSC) init_ed(cl, len); @@ -1584,9 +1587,6 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) } - sch->qstats.backlog += len; - sch->q.qlen++; - return NET_XMIT_SUCCESS; } From c3572acffb751af65505511b847f628c872ffae0 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 18 May 2025 15:20:38 -0700 Subject: [PATCH 1955/2924] selftests/tc-testing: Add an HFSC qlen accounting test This test reproduces a scenario where HFSC queue length and backlog accounting can become inconsistent when a peek operation triggers a dequeue and possible drop before the parent qdisc updates its counters. The test sets up a DRR root qdisc with an HFSC class, netem, and blackhole children, and uses Scapy to inject a packet. It helps to verify that HFSC correctly tracks qlen and backlog even when packets are dropped during peek-induced dequeue. Cc: Mingi Cho Signed-off-by: Cong Wang Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250518222038.58538-3-xiyou.wangcong@gmail.com Reviewed-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- .../tc-testing/tc-tests/infra/qdiscs.json | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index a951c0d33cd2d0..ddc97ecd8b3911 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -573,5 +573,32 @@ "teardown": [ "$TC qdisc del dev $DEV1 handle 1: root" ] + }, + { + "id": "831d", + "name": "Test HFSC qlen accounting with DRR/NETEM/BLACKHOLE chain", + "category": ["qdisc", "hfsc", "drr", "netem", "blackhole"], + "plugins": { "requires": ["nsPlugin", "scapyPlugin"] }, + "setup": [ + "$IP link set dev $DEV1 up || true", + "$TC qdisc add dev $DEV1 root handle 1: drr", + "$TC filter add dev $DEV1 parent 1: basic classid 1:1", + "$TC class add dev $DEV1 parent 1: classid 1:1 drr", + "$TC qdisc add dev $DEV1 parent 1:1 handle 2: hfsc def 1", + "$TC class add dev $DEV1 parent 2: classid 2:1 hfsc rt m1 8 d 1 m2 0", + "$TC qdisc add dev $DEV1 parent 2:1 handle 3: netem", + "$TC qdisc add dev $DEV1 parent 3:1 handle 4: blackhole" + ], + "scapy": { + "iface": "$DEV0", + "count": 5, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + }, + "cmdUnderTest": "$TC -s qdisc show dev $DEV1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DEV1", + "matchPattern": "qdisc hfsc", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 root handle 1: drr"] } ] From 184fb40f731bd3353b0887731f7caba66609e9cd Mon Sep 17 00:00:00 2001 From: Suman Ghosh Date: Mon, 19 May 2025 12:56:58 +0530 Subject: [PATCH 1956/2924] octeontx2-pf: Avoid adding dcbnl_ops for LBK and SDP vf Priority flow control is not supported for LBK and SDP vf. This patch adds support to not add dcbnl_ops for LBK and SDP vf. Fixes: 8e67558177f8 ("octeontx2-pf: PFC config support with DCBx") Signed-off-by: Suman Ghosh Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250519072658.2960851-1-sumang@marvell.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 7ef3ba477d4964..9b28be4c4a5d6c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -729,9 +729,12 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) } #ifdef CONFIG_DCB - err = otx2_dcbnl_set_ops(netdev); - if (err) - goto err_free_zc_bmap; + /* Priority flow control is not supported for LBK and SDP vf(s) */ + if (!(is_otx2_lbkvf(vf->pdev) || is_otx2_sdp_rep(vf->pdev))) { + err = otx2_dcbnl_set_ops(netdev); + if (err) + goto err_free_zc_bmap; + } #endif otx2_qos_init(vf, qos_txqs); From e279024617134c94fd3e37470156534d5f2b3472 Mon Sep 17 00:00:00 2001 From: Wang Liang Date: Tue, 20 May 2025 18:14:04 +0800 Subject: [PATCH 1957/2924] net/tipc: fix slab-use-after-free Read in tipc_aead_encrypt_done Syzbot reported a slab-use-after-free with the following call trace: ================================================================== BUG: KASAN: slab-use-after-free in tipc_aead_encrypt_done+0x4bd/0x510 net/tipc/crypto.c:840 Read of size 8 at addr ffff88807a733000 by task kworker/1:0/25 Call Trace: kasan_report+0xd9/0x110 mm/kasan/report.c:601 tipc_aead_encrypt_done+0x4bd/0x510 net/tipc/crypto.c:840 crypto_request_complete include/crypto/algapi.h:266 aead_request_complete include/crypto/internal/aead.h:85 cryptd_aead_crypt+0x3b8/0x750 crypto/cryptd.c:772 crypto_request_complete include/crypto/algapi.h:266 cryptd_queue_worker+0x131/0x200 crypto/cryptd.c:181 process_one_work+0x9fb/0x1b60 kernel/workqueue.c:3231 Allocated by task 8355: kzalloc_noprof include/linux/slab.h:778 tipc_crypto_start+0xcc/0x9e0 net/tipc/crypto.c:1466 tipc_init_net+0x2dd/0x430 net/tipc/core.c:72 ops_init+0xb9/0x650 net/core/net_namespace.c:139 setup_net+0x435/0xb40 net/core/net_namespace.c:343 copy_net_ns+0x2f0/0x670 net/core/net_namespace.c:508 create_new_namespaces+0x3ea/0xb10 kernel/nsproxy.c:110 unshare_nsproxy_namespaces+0xc0/0x1f0 kernel/nsproxy.c:228 ksys_unshare+0x419/0x970 kernel/fork.c:3323 __do_sys_unshare kernel/fork.c:3394 Freed by task 63: kfree+0x12a/0x3b0 mm/slub.c:4557 tipc_crypto_stop+0x23c/0x500 net/tipc/crypto.c:1539 tipc_exit_net+0x8c/0x110 net/tipc/core.c:119 ops_exit_list+0xb0/0x180 net/core/net_namespace.c:173 cleanup_net+0x5b7/0xbf0 net/core/net_namespace.c:640 process_one_work+0x9fb/0x1b60 kernel/workqueue.c:3231 After freed the tipc_crypto tx by delete namespace, tipc_aead_encrypt_done may still visit it in cryptd_queue_worker workqueue. I reproduce this issue by: ip netns add ns1 ip link add veth1 type veth peer name veth2 ip link set veth1 netns ns1 ip netns exec ns1 tipc bearer enable media eth dev veth1 ip netns exec ns1 tipc node set key this_is_a_master_key master ip netns exec ns1 tipc bearer disable media eth dev veth1 ip netns del ns1 The key of reproduction is that, simd_aead_encrypt is interrupted, leading to crypto_simd_usable() return false. Thus, the cryptd_queue_worker is triggered, and the tipc_crypto tx will be visited. tipc_disc_timeout tipc_bearer_xmit_skb tipc_crypto_xmit tipc_aead_encrypt crypto_aead_encrypt // encrypt() simd_aead_encrypt // crypto_simd_usable() is false child = &ctx->cryptd_tfm->base; simd_aead_encrypt crypto_aead_encrypt // encrypt() cryptd_aead_encrypt_enqueue cryptd_aead_enqueue cryptd_enqueue_request // trigger cryptd_queue_worker queue_work_on(smp_processor_id(), cryptd_wq, &cpu_queue->work) Fix this by holding net reference count before encrypt. Reported-by: syzbot+55c12726619ff85ce1f6@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=55c12726619ff85ce1f6 Fixes: fc1b6d6de220 ("tipc: introduce TIPC encryption & authentication") Signed-off-by: Wang Liang Link: https://patch.msgid.link/20250520101404.1341730-1-wangliang74@huawei.com Signed-off-by: Paolo Abeni --- net/tipc/crypto.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index c524421ec65252..8584893b478510 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -817,12 +817,16 @@ static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb, goto exit; } + /* Get net to avoid freed tipc_crypto when delete namespace */ + get_net(aead->crypto->net); + /* Now, do encrypt */ rc = crypto_aead_encrypt(req); if (rc == -EINPROGRESS || rc == -EBUSY) return rc; tipc_bearer_put(b); + put_net(aead->crypto->net); exit: kfree(ctx); @@ -860,6 +864,7 @@ static void tipc_aead_encrypt_done(void *data, int err) kfree(tx_ctx); tipc_bearer_put(b); tipc_aead_put(aead); + put_net(net); } /** From 0eefa27b493306928d88af6368193b134c98fd64 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Wed, 21 May 2025 11:38:33 +0530 Subject: [PATCH 1958/2924] octeontx2-af: Set LMT_ENA bit for APR table entries This patch enables the LMT line for a PF/VF by setting the LMT_ENA bit in the APR_LMT_MAP_ENTRY_S structure. Additionally, it simplifies the logic for calculating the LMTST table index by consistently using the maximum number of hw supported VFs (i.e., 256). Fixes: 873a1e3d207a ("octeontx2-af: cn10k: Setting up lmtst map table"). Signed-off-by: Subbaraya Sundeep Signed-off-by: Geetha sowjanya Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20250521060834.19780-2-gakula@marvell.com Signed-off-by: Paolo Abeni --- .../net/ethernet/marvell/octeontx2/af/rvu_cn10k.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c index 7fa98aeb3663c0..3838c04b78c221 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c @@ -15,13 +15,17 @@ #define LMT_TBL_OP_WRITE 1 #define LMT_MAP_TABLE_SIZE (128 * 1024) #define LMT_MAPTBL_ENTRY_SIZE 16 +#define LMT_MAX_VFS 256 + +#define LMT_MAP_ENTRY_ENA BIT_ULL(20) +#define LMT_MAP_ENTRY_LINES GENMASK_ULL(18, 16) /* Function to perform operations (read/write) on lmtst map table */ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val, int lmt_tbl_op) { void __iomem *lmt_map_base; - u64 tbl_base; + u64 tbl_base, cfg; tbl_base = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_MAP_BASE); @@ -35,6 +39,13 @@ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val, *val = readq(lmt_map_base + index); } else { writeq((*val), (lmt_map_base + index)); + + cfg = FIELD_PREP(LMT_MAP_ENTRY_ENA, 0x1); + /* 2048 LMTLINES */ + cfg |= FIELD_PREP(LMT_MAP_ENTRY_LINES, 0x6); + + writeq(cfg, (lmt_map_base + (index + 8))); + /* Flushing the AP interceptor cache to make APR_LMT_MAP_ENTRY_S * changes effective. Write 1 for flush and read is being used as a * barrier and sets up a data dependency. Write to 0 after a write @@ -52,7 +63,7 @@ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val, #define LMT_MAP_TBL_W1_OFF 8 static u32 rvu_get_lmtst_tbl_index(struct rvu *rvu, u16 pcifunc) { - return ((rvu_get_pf(pcifunc) * rvu->hw->total_vfs) + + return ((rvu_get_pf(pcifunc) * LMT_MAX_VFS) + (pcifunc & RVU_PFVF_FUNC_MASK)) * LMT_MAPTBL_ENTRY_SIZE; } From a6ae7129819ad20788e610261246e71736543b8b Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Wed, 21 May 2025 11:38:34 +0530 Subject: [PATCH 1959/2924] octeontx2-af: Fix APR entry mapping based on APR_LMT_CFG The current implementation maps the APR table using a fixed size, which can lead to incorrect mapping when the number of PFs and VFs varies. This patch corrects the mapping by calculating the APR table size dynamically based on the values configured in the APR_LMT_CFG register, ensuring accurate representation of APR entries in debugfs. Fixes: 0daa55d033b0 ("octeontx2-af: cn10k: debugfs for dumping LMTST map table"). Signed-off-by: Geetha sowjanya Link: https://patch.msgid.link/20250521060834.19780-3-gakula@marvell.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c | 9 ++++++--- .../net/ethernet/marvell/octeontx2/af/rvu_debugfs.c | 11 ++++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c index 3838c04b78c221..4a3370a40dd887 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c @@ -13,7 +13,6 @@ /* RVU LMTST */ #define LMT_TBL_OP_READ 0 #define LMT_TBL_OP_WRITE 1 -#define LMT_MAP_TABLE_SIZE (128 * 1024) #define LMT_MAPTBL_ENTRY_SIZE 16 #define LMT_MAX_VFS 256 @@ -26,10 +25,14 @@ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val, { void __iomem *lmt_map_base; u64 tbl_base, cfg; + int pfs, vfs; tbl_base = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_MAP_BASE); + cfg = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_CFG); + vfs = 1 << (cfg & 0xF); + pfs = 1 << ((cfg >> 4) & 0x7); - lmt_map_base = ioremap_wc(tbl_base, LMT_MAP_TABLE_SIZE); + lmt_map_base = ioremap_wc(tbl_base, pfs * vfs * LMT_MAPTBL_ENTRY_SIZE); if (!lmt_map_base) { dev_err(rvu->dev, "Failed to setup lmt map table mapping!!\n"); return -ENOMEM; @@ -80,7 +83,7 @@ static int rvu_get_lmtaddr(struct rvu *rvu, u16 pcifunc, mutex_lock(&rvu->rsrc_lock); rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_ADDR_REQ, iova); - pf = rvu_get_pf(pcifunc) & 0x1F; + pf = rvu_get_pf(pcifunc) & RVU_PFVF_PF_MASK; val = BIT_ULL(63) | BIT_ULL(14) | BIT_ULL(13) | pf << 8 | ((pcifunc & RVU_PFVF_FUNC_MASK) & 0xFF); rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_TXN_REQ, val); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index a1f9ec03c2ce69..c827da62647126 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -553,6 +553,7 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp, u64 lmt_addr, val, tbl_base; int pf, vf, num_vfs, hw_vfs; void __iomem *lmt_map_base; + int apr_pfs, apr_vfs; int buf_size = 10240; size_t off = 0; int index = 0; @@ -568,8 +569,12 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp, return -ENOMEM; tbl_base = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_MAP_BASE); + val = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_CFG); + apr_vfs = 1 << (val & 0xF); + apr_pfs = 1 << ((val >> 4) & 0x7); - lmt_map_base = ioremap_wc(tbl_base, 128 * 1024); + lmt_map_base = ioremap_wc(tbl_base, apr_pfs * apr_vfs * + LMT_MAPTBL_ENTRY_SIZE); if (!lmt_map_base) { dev_err(rvu->dev, "Failed to setup lmt map table mapping!!\n"); kfree(buf); @@ -591,7 +596,7 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp, off += scnprintf(&buf[off], buf_size - 1 - off, "PF%d \t\t\t", pf); - index = pf * rvu->hw->total_vfs * LMT_MAPTBL_ENTRY_SIZE; + index = pf * apr_vfs * LMT_MAPTBL_ENTRY_SIZE; off += scnprintf(&buf[off], buf_size - 1 - off, " 0x%llx\t\t", (tbl_base + index)); lmt_addr = readq(lmt_map_base + index); @@ -604,7 +609,7 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp, /* Reading num of VFs per PF */ rvu_get_pf_numvfs(rvu, pf, &num_vfs, &hw_vfs); for (vf = 0; vf < num_vfs; vf++) { - index = (pf * rvu->hw->total_vfs * 16) + + index = (pf * apr_vfs * LMT_MAPTBL_ENTRY_SIZE) + ((vf + 1) * LMT_MAPTBL_ENTRY_SIZE); off += scnprintf(&buf[off], buf_size - 1 - off, "PF%d:VF%d \t\t", pf, vf); From 1007ae0d464ceb55a3740634790521d3543aaab9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 22 May 2025 12:47:31 +0200 Subject: [PATCH 1960/2924] spi: use container_of_cont() for to_spi_device() Some places in the spi core pass in a const pointer to a device and the default container_of() casts that away, which is not a good idea. Preserve the proper const attribute by using container_of_const() for to_spi_device() instead, which is what it was designed for. Note, this removes the NULL check for a device pointer in the call, but no one was ever checking for that return value, and a device pointer should never be NULL overall anyway, so this should be a safe change. Cc: Mark Brown Fixes: d69d80484598 ("driver core: have match() callback in struct bus_type take a const *") Signed-off-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2025052230-fidgeting-stooge-66f5@gregkh Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 0ba5e49bace42c..6e64f0193777b0 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -249,10 +249,7 @@ struct spi_device { static_assert((SPI_MODE_KERNEL_MASK & SPI_MODE_USER_MASK) == 0, "SPI_MODE_USER_MASK & SPI_MODE_KERNEL_MASK must not overlap"); -static inline struct spi_device *to_spi_device(const struct device *dev) -{ - return dev ? container_of(dev, struct spi_device, dev) : NULL; -} +#define to_spi_device(__dev) container_of_const(__dev, struct spi_device, dev) /* Most drivers won't need to care about device refcounting */ static inline struct spi_device *spi_dev_get(struct spi_device *spi) From 283ae0c65e9c592f4a1ba4f31917f5e766da7f31 Mon Sep 17 00:00:00 2001 From: Larisa Grigore Date: Thu, 22 May 2025 15:51:30 +0100 Subject: [PATCH 1961/2924] spi: spi-fsl-dspi: restrict register range for regmap access DSPI registers are NOT continuous, some registers are reserved and accessing them from userspace will trigger external abort, add regmap register access table to avoid below abort. For example on S32G: # cat /sys/kernel/debug/regmap/401d8000.spi/registers Internal error: synchronous external abort: 96000210 1 PREEMPT SMP ... Call trace: regmap_mmio_read32le+0x24/0x48 regmap_mmio_read+0x48/0x70 _regmap_bus_reg_read+0x38/0x48 _regmap_read+0x68/0x1b0 regmap_read+0x50/0x78 regmap_read_debugfs+0x120/0x338 Fixes: 1acbdeb92c87 ("spi/fsl-dspi: Convert to use regmap and add big-endian support") Co-developed-by: Xulin Sun Signed-off-by: Xulin Sun Signed-off-by: Larisa Grigore Signed-off-by: James Clark Link: https://patch.msgid.link/20250522-james-nxp-spi-v2-1-bea884630cfb@linaro.org Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-dspi.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index 067c954cb6ea03..effb460d436da5 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0+ // // Copyright 2013 Freescale Semiconductor, Inc. -// Copyright 2020 NXP +// Copyright 2020-2025 NXP // // Freescale DSPI driver // This file contains a driver for the Freescale DSPI @@ -1167,6 +1167,20 @@ static int dspi_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(dspi_pm, dspi_suspend, dspi_resume); +static const struct regmap_range dspi_yes_ranges[] = { + regmap_reg_range(SPI_MCR, SPI_MCR), + regmap_reg_range(SPI_TCR, SPI_CTAR(3)), + regmap_reg_range(SPI_SR, SPI_TXFR3), + regmap_reg_range(SPI_RXFR0, SPI_RXFR3), + regmap_reg_range(SPI_CTARE(0), SPI_CTARE(3)), + regmap_reg_range(SPI_SREX, SPI_SREX), +}; + +static const struct regmap_access_table dspi_access_table = { + .yes_ranges = dspi_yes_ranges, + .n_yes_ranges = ARRAY_SIZE(dspi_yes_ranges), +}; + static const struct regmap_range dspi_volatile_ranges[] = { regmap_reg_range(SPI_MCR, SPI_TCR), regmap_reg_range(SPI_SR, SPI_SR), @@ -1184,6 +1198,8 @@ static const struct regmap_config dspi_regmap_config = { .reg_stride = 4, .max_register = 0x88, .volatile_table = &dspi_volatile_table, + .rd_table = &dspi_access_table, + .wr_table = &dspi_access_table, }; static const struct regmap_range dspi_xspi_volatile_ranges[] = { @@ -1205,6 +1221,8 @@ static const struct regmap_config dspi_xspi_regmap_config[] = { .reg_stride = 4, .max_register = 0x13c, .volatile_table = &dspi_xspi_volatile_table, + .rd_table = &dspi_access_table, + .wr_table = &dspi_access_table, }, { .name = "pushr", From 8a30a6d35a11ff5ccdede7d6740765685385a917 Mon Sep 17 00:00:00 2001 From: Bogdan-Gabriel Roman Date: Thu, 22 May 2025 15:51:31 +0100 Subject: [PATCH 1962/2924] spi: spi-fsl-dspi: Halt the module after a new message transfer The XSPI mode implementation in this driver still uses the EOQ flag to signal the last word in a transmission and deassert the PCS signal. However, at speeds lower than ~200kHZ, the PCS signal seems to remain asserted even when SR[EOQF] = 1 indicates the end of a transmission. This is a problem for target devices which require the deassertation of the PCS signal between transfers. Hence, this commit 'forces' the deassertation of the PCS by stopping the module through MCR[HALT] after completing a new transfer. According to the reference manual, the module stops or transitions from the Running state to the Stopped state after the current frame, when any one of the following conditions exist: - The value of SR[EOQF] = 1. - The chip is in Debug mode and the value of MCR[FRZ] = 1. - The value of MCR[HALT] = 1. This shouldn't be done if the last transfer in the message has cs_change set. Fixes: ea93ed4c181b ("spi: spi-fsl-dspi: Use EOQ for last word in buffer even for XSPI mode") Signed-off-by: Bogdan-Gabriel Roman Signed-off-by: Larisa Grigore Signed-off-by: James Clark Link: https://patch.msgid.link/20250522-james-nxp-spi-v2-2-bea884630cfb@linaro.org Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-dspi.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index effb460d436da5..1fa96e8189cfa1 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -62,6 +62,7 @@ #define SPI_SR_TFIWF BIT(18) #define SPI_SR_RFDF BIT(17) #define SPI_SR_CMDFFF BIT(16) +#define SPI_SR_TXRXS BIT(30) #define SPI_SR_CLEAR (SPI_SR_TCFQF | \ SPI_SR_TFUF | SPI_SR_TFFF | \ SPI_SR_CMDTCF | SPI_SR_SPEF | \ @@ -921,9 +922,20 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr, struct spi_transfer *transfer; bool cs = false; int status = 0; + u32 val = 0; + bool cs_change = false; message->actual_length = 0; + /* Put DSPI in running mode if halted. */ + regmap_read(dspi->regmap, SPI_MCR, &val); + if (val & SPI_MCR_HALT) { + regmap_update_bits(dspi->regmap, SPI_MCR, SPI_MCR_HALT, 0); + while (regmap_read(dspi->regmap, SPI_SR, &val) >= 0 && + !(val & SPI_SR_TXRXS)) + ; + } + list_for_each_entry(transfer, &message->transfers, transfer_list) { dspi->cur_transfer = transfer; dspi->cur_msg = message; @@ -953,6 +965,7 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr, dspi->tx_cmd |= SPI_PUSHR_CMD_CONT; } + cs_change = transfer->cs_change; dspi->tx = transfer->tx_buf; dspi->rx = transfer->rx_buf; dspi->len = transfer->len; @@ -988,6 +1001,15 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr, dspi_deassert_cs(spi, &cs); } + if (status || !cs_change) { + /* Put DSPI in stop mode */ + regmap_update_bits(dspi->regmap, SPI_MCR, + SPI_MCR_HALT, SPI_MCR_HALT); + while (regmap_read(dspi->regmap, SPI_SR, &val) >= 0 && + val & SPI_SR_TXRXS) + ; + } + message->status = status; spi_finalize_current_message(ctlr); @@ -1245,6 +1267,8 @@ static int dspi_init(struct fsl_dspi *dspi) if (!spi_controller_is_target(dspi->ctlr)) mcr |= SPI_MCR_HOST; + mcr |= SPI_MCR_HALT; + regmap_write(dspi->regmap, SPI_MCR, mcr); regmap_write(dspi->regmap, SPI_SR, SPI_SR_CLEAR); From 7aba292eb15389073c7f3bd7847e3862dfdf604d Mon Sep 17 00:00:00 2001 From: Larisa Grigore Date: Thu, 22 May 2025 15:51:32 +0100 Subject: [PATCH 1963/2924] spi: spi-fsl-dspi: Reset SR flags before sending a new message If, in a previous transfer, the controller sends more data than expected by the DSPI target, SR.RFDF (RX FIFO is not empty) will remain asserted. When flushing the FIFOs at the beginning of a new transfer (writing 1 into MCR.CLR_TXF and MCR.CLR_RXF), SR.RFDF should also be cleared. Otherwise, when running in target mode with DMA, if SR.RFDF remains asserted, the DMA callback will be fired before the controller sends any data. Take this opportunity to reset all Status Register fields. Fixes: 5ce3cc567471 ("spi: spi-fsl-dspi: Provide support for DSPI slave mode operation (Vybryd vf610)") Signed-off-by: Larisa Grigore Signed-off-by: James Clark Link: https://patch.msgid.link/20250522-james-nxp-spi-v2-3-bea884630cfb@linaro.org Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-dspi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index 1fa96e8189cfa1..863781ba6c1601 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -975,6 +975,8 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr, SPI_MCR_CLR_TXF | SPI_MCR_CLR_RXF, SPI_MCR_CLR_TXF | SPI_MCR_CLR_RXF); + regmap_write(dspi->regmap, SPI_SR, SPI_SR_CLEAR); + spi_take_timestamp_pre(dspi->ctlr, dspi->cur_transfer, dspi->progress, !dspi->irq); From 7e7cb7a13c81073d38a10fa7b450d23712281ec4 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 22 May 2025 09:13:28 -0500 Subject: [PATCH 1964/2924] Revert "drm/amd: Keep display off while going into S4" commit 68bfdc8dc0a1a ("drm/amd: Keep display off while going into S4") attempted to keep displays off during the S4 sequence by not resuming display IP. This however leads to hangs because DRM clients such as the console can try to access registers and cause a hang. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4155 Fixes: 68bfdc8dc0a1a ("drm/amd: Keep display off while going into S4") Reviewed-by: Alex Deucher Link: https://lore.kernel.org/r/20250522141328.115095-1-mario.limonciello@amd.com Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit e485502c37b097b0bd773baa7e2741bf7bd2909a) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index cc01b9c68b479e..a187cdb43e7e1d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3469,11 +3469,6 @@ static int dm_resume(struct amdgpu_ip_block *ip_block) return 0; } - - /* leave display off for S4 sequence */ - if (adev->in_s4) - return 0; - /* Recreate dc_state - DC invalidates it when setting power state to S3. */ dc_state_release(dm_state->context); dm_state->context = dc_state_create(dm->dc, NULL); From 57b34cba8ec01e22d2f3628ffa979e0cb9169238 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Tue, 6 May 2025 19:53:00 +0530 Subject: [PATCH 1965/2924] drm/xe/mocs: Check if all domains awake Check if all domains are awake specially for LNCF regs Fixes: 298661cd9cea ("drm/xe: Fix MOCS debugfs LNCF readout") Improvements-suggested-by: Himal Prasad Ghimiray Reviewed-by: Badal Nilawar Link: https://patchwork.freedesktop.org/patch/msgid/20250506142300.1865783-1-tejas.upadhyay@intel.com Signed-off-by: Tejas Upadhyay (cherry picked from commit a383cf218ef8bb35d4c03958bd956573b65cf778) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_mocs.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 31dade91a08974..0c737413fcb68d 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -775,22 +775,23 @@ void xe_mocs_init(struct xe_gt *gt) void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); + enum xe_force_wake_domains domain; struct xe_mocs_info table; unsigned int fw_ref, flags; flags = get_mocs_settings(xe, &table); + domain = flags & HAS_LNCF_MOCS ? XE_FORCEWAKE_ALL : XE_FW_GT; xe_pm_runtime_get_noresume(xe); - fw_ref = xe_force_wake_get(gt_to_fw(gt), - flags & HAS_LNCF_MOCS ? - XE_FORCEWAKE_ALL : XE_FW_GT); - if (!fw_ref) + fw_ref = xe_force_wake_get(gt_to_fw(gt), domain); + + if (!xe_force_wake_ref_has_domain(fw_ref, domain)) goto err_fw; table.ops->dump(&table, flags, gt, p); - xe_force_wake_put(gt_to_fw(gt), fw_ref); err_fw: + xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_pm_runtime_put(xe); } From 84b6f8503b29a6cc5a82848253a97c09a95fdf49 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Tue, 13 May 2025 15:30:10 +0000 Subject: [PATCH 1966/2924] drm/xe: Use xe_mmio_read32() to read mtcfg register The mtcfg register is a 32-bit register and should therefore be accessed using xe_mmio_read32(). Other 3 changes per codestyle suggestion: " xe_mmio.c:83: CHECK: Alignment should match open parenthesis xe_mmio.c:131: CHECK: Comparison to NULL could be written "!xe->mmio.regs" xe_mmio.c:315: CHECK: line length of 103 exceeds 100 columns " Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Reviewed-by: Tejas Upadhyay Cc: Matt Roper Signed-off-by: Shuicheng Lin Link: https://lore.kernel.org/r/20250513153010.3464767-1-shuicheng.lin@intel.com Signed-off-by: Matt Roper (cherry picked from commit d2662cf8f44a68deb6c76ad9f1d9f29dbf7ba601) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_mmio.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 70a36e77754666..46301f341773f1 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -75,12 +75,12 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) * is fine as it's going to the root tile's mmio, that's * guaranteed to be initialized earlier in xe_mmio_probe_early() */ - mtcfg = xe_mmio_read64_2x32(mmio, XEHP_MTCFG_ADDR); + mtcfg = xe_mmio_read32(mmio, XEHP_MTCFG_ADDR); tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; if (tile_count < xe->info.tile_count) { drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n", - xe->info.tile_count, tile_count); + xe->info.tile_count, tile_count); xe->info.tile_count = tile_count; /* @@ -128,7 +128,7 @@ int xe_mmio_probe_early(struct xe_device *xe) */ xe->mmio.size = pci_resource_len(pdev, GTTMMADR_BAR); xe->mmio.regs = pci_iomap(pdev, GTTMMADR_BAR, 0); - if (xe->mmio.regs == NULL) { + if (!xe->mmio.regs) { drm_err(&xe->drm, "failed to map registers\n"); return -EIO; } @@ -309,8 +309,8 @@ u64 xe_mmio_read64_2x32(struct xe_mmio *mmio, struct xe_reg reg) return (u64)udw << 32 | ldw; } -static int __xe_mmio_wait32(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us, - u32 *out_val, bool atomic, bool expect_match) +static int __xe_mmio_wait32(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val, + u32 timeout_us, u32 *out_val, bool atomic, bool expect_match) { ktime_t cur = ktime_get_raw(); const ktime_t end = ktime_add_us(cur, timeout_us); From 027a362fb36b479030beecbaaec30711ddabf8fa Mon Sep 17 00:00:00 2001 From: Matt Atwood Date: Tue, 20 May 2025 12:57:49 -0700 Subject: [PATCH 1967/2924] drm/xe/ptl: Update the PTL pci id table Update to current bspec table. Bspec: 72574 Signed-off-by: Matt Atwood Reviewed-by: Tejas Upadhyay Reviewed-by: Clint Taylor Link: https://lore.kernel.org/r/20250520195749.371748-1-matthew.s.atwood@intel.com Signed-off-by: Matt Roper (cherry picked from commit 49c6dc74b5968885f421f9f1b45eb4890b955870) Signed-off-by: Lucas De Marchi --- include/drm/intel/pciids.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h index d212848d07f3fe..a7ce9523c50d37 100644 --- a/include/drm/intel/pciids.h +++ b/include/drm/intel/pciids.h @@ -861,6 +861,10 @@ MACRO__(0xB081, ## __VA_ARGS__), \ MACRO__(0xB082, ## __VA_ARGS__), \ MACRO__(0xB083, ## __VA_ARGS__), \ + MACRO__(0xB084, ## __VA_ARGS__), \ + MACRO__(0xB085, ## __VA_ARGS__), \ + MACRO__(0xB086, ## __VA_ARGS__), \ + MACRO__(0xB087, ## __VA_ARGS__), \ MACRO__(0xB08F, ## __VA_ARGS__), \ MACRO__(0xB090, ## __VA_ARGS__), \ MACRO__(0xB0A0, ## __VA_ARGS__), \ From f2eae58c4428bd792c8e91e3666ab0718d87b44a Mon Sep 17 00:00:00 2001 From: Todd Brandt Date: Tue, 20 May 2025 03:45:55 -0700 Subject: [PATCH 1968/2924] platform/x86/intel/pmc: Fix Arrow Lake U/H NPU PCI ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ARL requires that the GMA and NPU devices both be in D3Hot in order for PC10 and S0iX to be achieved in S2idle. The original ARL-H/U addition to the intel_pmc_core driver attempted to do this by switching them to D3 in the init and resume calls of the intel_pmc_core driver. The problem is the ARL-H/U have a different NPU device and thus are not being properly set and thus S0iX does not work properly in ARL-H/U. This patch creates a new ARL-H specific device id that is correct and also adds the D3 fixup to the suspend callback. This way if the PCI devies drop from D3 to D0 after resume they can be corrected for the next suspend. Thus there is no dropout in S0iX. Fixes: bd820906ea9d ("platform/x86/intel/pmc: Add Arrow Lake U/H support to intel_pmc_core driver") Signed-off-by: Todd Brandt Link: https://lore.kernel.org/r/a61f78be45c13f39e122dcc684b636f4b21e79a0.1747737446.git.todd.e.brandt@intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/pmc/arl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/pmc/arl.c b/drivers/platform/x86/intel/pmc/arl.c index 320993bd6d31de..f9c48738b853b4 100644 --- a/drivers/platform/x86/intel/pmc/arl.c +++ b/drivers/platform/x86/intel/pmc/arl.c @@ -681,6 +681,7 @@ static struct pmc_info arl_pmc_info_list[] = { #define ARL_NPU_PCI_DEV 0xad1d #define ARL_GNA_PCI_DEV 0xae4c +#define ARL_H_NPU_PCI_DEV 0x7d1d #define ARL_H_GNA_PCI_DEV 0x774c /* * Set power state of select devices that do not have drivers to D3 @@ -694,7 +695,7 @@ static void arl_d3_fixup(void) static void arl_h_d3_fixup(void) { - pmc_core_set_device_d3(ARL_NPU_PCI_DEV); + pmc_core_set_device_d3(ARL_H_NPU_PCI_DEV); pmc_core_set_device_d3(ARL_H_GNA_PCI_DEV); } From 07c9214c790e66d49cf12c316f648dcd208c4145 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Mon, 19 May 2025 20:18:05 +0300 Subject: [PATCH 1969/2924] mm/cma: make detection of highmem_start more robust Pratyush Yadav reports the following crash: ------------[ cut here ]------------ kernel BUG at arch/x86/mm/physaddr.c:23! ception 0x06 IP 10:ffffffff812ebbf8 error 0 cr2 0xffff88903ffff000 CPU: 0 UID: 0 PID: 0 Comm: swapper Not tainted 6.15.0-rc6+ #231 PREEMPT(undef) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014 RIP: 0010:__phys_addr+0x58/0x60 Code: 01 48 89 c2 48 d3 ea 48 85 d2 75 05 e9 91 52 cf 00 0f 0b 48 3d ff ff ff 1f 77 0f 48 8b 05 20 54 55 01 48 01 d0 e9 78 52 cf 00 <0f> 0b 90 0f 1f 44 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 RSP: 0000:ffffffff82803dd8 EFLAGS: 00010006 ORIG_RAX: 0000000000000000 RAX: 000000007fffffff RBX: 00000000ffffffff RCX: 0000000000000000 RDX: 000000007fffffff RSI: 0000000280000000 RDI: ffffffffffffffff RBP: ffffffff82803e68 R08: 0000000000000000 R09: 0000000000000000 R10: ffffffff83153180 R11: ffffffff82803e48 R12: ffffffff83c9aed0 R13: 0000000000000000 R14: 0000001040000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:0000000000000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff88903ffff000 CR3: 0000000002838000 CR4: 00000000000000b0 Call Trace: ? __cma_declare_contiguous_nid+0x6e/0x340 ? cma_declare_contiguous_nid+0x33/0x70 ? dma_contiguous_reserve_area+0x2f/0x70 ? setup_arch+0x6f1/0x870 ? start_kernel+0x52/0x4b0 ? x86_64_start_reservations+0x29/0x30 ? x86_64_start_kernel+0x7c/0x80 ? common_startup_64+0x13e/0x141 The reason is that __cma_declare_contiguous_nid() does: highmem_start = __pa(high_memory - 1) + 1; If dma_contiguous_reserve_area() (or any other CMA declaration) is called before free_area_init(), high_memory is uninitialized. Without CONFIG_DEBUG_VIRTUAL, it will likely work but use the wrong value for highmem_start. The issue occurs because commit e120d1bc12da ("arch, mm: set high_memory in free_area_init()") moved initialization of high_memory after the call to dma_contiguous_reserve() -> __cma_declare_contiguous_nid() on several architectures. In the case CONFIG_HIGHMEM is enabled, some architectures that actually support HIGHMEM (arm, powerpc and x86) have initialization of high_memory before a possible call to __cma_declare_contiguous_nid() and some initialized high_memory late anyway (arc, csky, microblase, mips, sparc, xtensa) even before the commit e120d1bc12da so they are fine with using uninitialized value of high_memory. And in the case CONFIG_HIGHMEM is disabled high_memory essentially becomes the first address after memory end, so instead of relying on high_memory to calculate highmem_start use memblock_end_of_DRAM() and eliminate the dependency of CMA area creation on high_memory in majority of configurations. Link: https://lkml.kernel.org/r/20250519171805.1288393-1-rppt@kernel.org Fixes: e120d1bc12da ("arch, mm: set high_memory in free_area_init()") Signed-off-by: Mike Rapoport (Microsoft) Reported-by: Pratyush Yadav Tested-by: Pratyush Yadav Tested-by: Alexandre Ghiti Reviewed-by: Oscar Salvador Signed-off-by: Andrew Morton --- mm/cma.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/cma.c b/mm/cma.c index 15632939f20a1e..c04be488b09929 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -608,7 +608,10 @@ static int __init __cma_declare_contiguous_nid(phys_addr_t *basep, * complain. Find the boundary by adding one to the last valid * address. */ - highmem_start = __pa(high_memory - 1) + 1; + if (IS_ENABLED(CONFIG_HIGHMEM)) + highmem_start = __pa(high_memory - 1) + 1; + else + highmem_start = memblock_end_of_DRAM(); pr_debug("%s(size %pa, base %pa, limit %pa alignment %pa)\n", __func__, &size, &base, &limit, &alignment); From 221fcbf77578826fad8f4bfa0530b5b55bf9676a Mon Sep 17 00:00:00 2001 From: David Wang <00107082@163.com> Date: Tue, 20 May 2025 00:38:23 +0800 Subject: [PATCH 1970/2924] module: release codetag section when module load fails When module load fails after memory for codetag section is ready, codetag section memory will not be properly released. This causes memory leak, and if next module load happens to get the same module address, codetag may pick the uninitialized section when manipulating tags during module unload, and leads to "unable to handle page fault" BUG. Link: https://lkml.kernel.org/r/20250519163823.7540-1-00107082@163.com Fixes: 0db6f8d7820a ("alloc_tag: load module tags into separate contiguous memory") Closes: https://lore.kernel.org/all/20250516131246.6244-1-00107082@163.com/ Signed-off-by: David Wang <00107082@163.com> Acked-by: Suren Baghdasaryan Cc: Petr Pavlu Cc: Signed-off-by: Andrew Morton --- kernel/module/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/module/main.c b/kernel/module/main.c index a2859dc3eea66e..5c6ab20240a6d6 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -2829,6 +2829,7 @@ static void module_deallocate(struct module *mod, struct load_info *info) { percpu_modfree(mod); module_arch_freeing_init(mod); + codetag_free_module_sections(mod); free_mod_mem(mod); } From 12ca42c237756182aad8ab04654c952765cb9061 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 16 May 2025 17:07:39 -0700 Subject: [PATCH 1971/2924] alloc_tag: allocate percpu counters for module tags dynamically When a module gets unloaded it checks whether any of its tags are still in use and if so, we keep the memory containing module's allocation tags alive until all tags are unused. However percpu counters referenced by the tags are freed by free_module(). This will lead to UAF if the memory allocated by a module is accessed after module was unloaded. To fix this we allocate percpu counters for module allocation tags dynamically and we keep it alive for tags which are still in use after module unloading. This also removes the requirement of a larger PERCPU_MODULE_RESERVE when memory allocation profiling is enabled because percpu memory for counters does not need to be reserved anymore. Link: https://lkml.kernel.org/r/20250517000739.5930-1-surenb@google.com Fixes: 0db6f8d7820a ("alloc_tag: load module tags into separate contiguous memory") Signed-off-by: Suren Baghdasaryan Reported-by: David Wang <00107082@163.com> Closes: https://lore.kernel.org/all/20250516131246.6244-1-00107082@163.com/ Tested-by: David Wang <00107082@163.com> Cc: Christoph Lameter (Ampere) Cc: Dennis Zhou Cc: Kent Overstreet Cc: Pasha Tatashin Cc: Tejun Heo Cc: Signed-off-by: Andrew Morton --- include/linux/alloc_tag.h | 12 ++++++ include/linux/codetag.h | 8 ++-- include/linux/percpu.h | 4 -- lib/alloc_tag.c | 87 +++++++++++++++++++++++++++++++-------- lib/codetag.c | 5 ++- 5 files changed, 88 insertions(+), 28 deletions(-) diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h index a946e0203e6d60..8f7931eb7d164c 100644 --- a/include/linux/alloc_tag.h +++ b/include/linux/alloc_tag.h @@ -104,6 +104,16 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); #else /* ARCH_NEEDS_WEAK_PER_CPU */ +#ifdef MODULE + +#define DEFINE_ALLOC_TAG(_alloc_tag) \ + static struct alloc_tag _alloc_tag __used __aligned(8) \ + __section(ALLOC_TAG_SECTION_NAME) = { \ + .ct = CODE_TAG_INIT, \ + .counters = NULL }; + +#else /* MODULE */ + #define DEFINE_ALLOC_TAG(_alloc_tag) \ static DEFINE_PER_CPU(struct alloc_tag_counters, _alloc_tag_cntr); \ static struct alloc_tag _alloc_tag __used __aligned(8) \ @@ -111,6 +121,8 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); .ct = CODE_TAG_INIT, \ .counters = &_alloc_tag_cntr }; +#endif /* MODULE */ + #endif /* ARCH_NEEDS_WEAK_PER_CPU */ DECLARE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT, diff --git a/include/linux/codetag.h b/include/linux/codetag.h index d14dbd26b37085..0ee4c21c6dbc7c 100644 --- a/include/linux/codetag.h +++ b/include/linux/codetag.h @@ -36,10 +36,10 @@ union codetag_ref { struct codetag_type_desc { const char *section; size_t tag_size; - void (*module_load)(struct codetag_type *cttype, - struct codetag_module *cmod); - void (*module_unload)(struct codetag_type *cttype, - struct codetag_module *cmod); + void (*module_load)(struct module *mod, + struct codetag *start, struct codetag *end); + void (*module_unload)(struct module *mod, + struct codetag *start, struct codetag *end); #ifdef CONFIG_MODULES void (*module_replaced)(struct module *mod, struct module *new_mod); bool (*needs_section_mem)(struct module *mod, unsigned long size); diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 52b5ea663b9f09..85bf8dd9f08740 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -15,11 +15,7 @@ /* enough to cover all DEFINE_PER_CPUs in modules */ #ifdef CONFIG_MODULES -#ifdef CONFIG_MEM_ALLOC_PROFILING -#define PERCPU_MODULE_RESERVE (8 << 13) -#else #define PERCPU_MODULE_RESERVE (8 << 10) -#endif #else #define PERCPU_MODULE_RESERVE 0 #endif diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index 25ecc1334b67dd..c7f602fa7b23fc 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -350,18 +350,28 @@ static bool needs_section_mem(struct module *mod, unsigned long size) return size >= sizeof(struct alloc_tag); } -static struct alloc_tag *find_used_tag(struct alloc_tag *from, struct alloc_tag *to) +static bool clean_unused_counters(struct alloc_tag *start_tag, + struct alloc_tag *end_tag) { - while (from <= to) { + struct alloc_tag *tag; + bool ret = true; + + for (tag = start_tag; tag <= end_tag; tag++) { struct alloc_tag_counters counter; - counter = alloc_tag_read(from); - if (counter.bytes) - return from; - from++; + if (!tag->counters) + continue; + + counter = alloc_tag_read(tag); + if (!counter.bytes) { + free_percpu(tag->counters); + tag->counters = NULL; + } else { + ret = false; + } } - return NULL; + return ret; } /* Called with mod_area_mt locked */ @@ -371,12 +381,16 @@ static void clean_unused_module_areas_locked(void) struct module *val; mas_for_each(&mas, val, module_tags.size) { + struct alloc_tag *start_tag; + struct alloc_tag *end_tag; + if (val != &unloaded_mod) continue; /* Release area if all tags are unused */ - if (!find_used_tag((struct alloc_tag *)(module_tags.start_addr + mas.index), - (struct alloc_tag *)(module_tags.start_addr + mas.last))) + start_tag = (struct alloc_tag *)(module_tags.start_addr + mas.index); + end_tag = (struct alloc_tag *)(module_tags.start_addr + mas.last); + if (clean_unused_counters(start_tag, end_tag)) mas_erase(&mas); } } @@ -561,7 +575,8 @@ static void *reserve_module_tags(struct module *mod, unsigned long size, static void release_module_tags(struct module *mod, bool used) { MA_STATE(mas, &mod_area_mt, module_tags.size, module_tags.size); - struct alloc_tag *tag; + struct alloc_tag *start_tag; + struct alloc_tag *end_tag; struct module *val; mas_lock(&mas); @@ -575,15 +590,22 @@ static void release_module_tags(struct module *mod, bool used) if (!used) goto release_area; - /* Find out if the area is used */ - tag = find_used_tag((struct alloc_tag *)(module_tags.start_addr + mas.index), - (struct alloc_tag *)(module_tags.start_addr + mas.last)); - if (tag) { - struct alloc_tag_counters counter = alloc_tag_read(tag); + start_tag = (struct alloc_tag *)(module_tags.start_addr + mas.index); + end_tag = (struct alloc_tag *)(module_tags.start_addr + mas.last); + if (!clean_unused_counters(start_tag, end_tag)) { + struct alloc_tag *tag; + + for (tag = start_tag; tag <= end_tag; tag++) { + struct alloc_tag_counters counter; + + if (!tag->counters) + continue; - pr_info("%s:%u module %s func:%s has %llu allocated at module unload\n", - tag->ct.filename, tag->ct.lineno, tag->ct.modname, - tag->ct.function, counter.bytes); + counter = alloc_tag_read(tag); + pr_info("%s:%u module %s func:%s has %llu allocated at module unload\n", + tag->ct.filename, tag->ct.lineno, tag->ct.modname, + tag->ct.function, counter.bytes); + } } else { used = false; } @@ -596,6 +618,34 @@ static void release_module_tags(struct module *mod, bool used) mas_unlock(&mas); } +static void load_module(struct module *mod, struct codetag *start, struct codetag *stop) +{ + /* Allocate module alloc_tag percpu counters */ + struct alloc_tag *start_tag; + struct alloc_tag *stop_tag; + struct alloc_tag *tag; + + if (!mod) + return; + + start_tag = ct_to_alloc_tag(start); + stop_tag = ct_to_alloc_tag(stop); + for (tag = start_tag; tag < stop_tag; tag++) { + WARN_ON(tag->counters); + tag->counters = alloc_percpu(struct alloc_tag_counters); + if (!tag->counters) { + while (--tag >= start_tag) { + free_percpu(tag->counters); + tag->counters = NULL; + } + shutdown_mem_profiling(true); + pr_err("Failed to allocate memory for allocation tag percpu counters in the module %s. Memory allocation profiling is disabled!\n", + mod->name); + break; + } + } +} + static void replace_module(struct module *mod, struct module *new_mod) { MA_STATE(mas, &mod_area_mt, 0, module_tags.size); @@ -757,6 +807,7 @@ static int __init alloc_tag_init(void) .needs_section_mem = needs_section_mem, .alloc_section_mem = reserve_module_tags, .free_section_mem = release_module_tags, + .module_load = load_module, .module_replaced = replace_module, #endif }; diff --git a/lib/codetag.c b/lib/codetag.c index 42aadd6c145499..de332e98d6f5b5 100644 --- a/lib/codetag.c +++ b/lib/codetag.c @@ -194,7 +194,7 @@ static int codetag_module_init(struct codetag_type *cttype, struct module *mod) if (err >= 0) { cttype->count += range_size(cttype, &range); if (cttype->desc.module_load) - cttype->desc.module_load(cttype, cmod); + cttype->desc.module_load(mod, range.start, range.stop); } up_write(&cttype->mod_lock); @@ -333,7 +333,8 @@ void codetag_unload_module(struct module *mod) } if (found) { if (cttype->desc.module_unload) - cttype->desc.module_unload(cttype, cmod); + cttype->desc.module_unload(cmod->mod, + cmod->range.start, cmod->range.stop); cttype->count -= range_size(cttype, &cmod->range); idr_remove(&cttype->mod_idr, mod_id); From f7a35a3c36d1e36059c5654737d9bee3454f01a3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 15 May 2025 14:42:15 -0700 Subject: [PATCH 1972/2924] mm: vmalloc: actually use the in-place vrealloc region Patch series "mm: vmalloc: Actually use the in-place vrealloc region". This fixes a performance regression[1] with vrealloc()[1]. The refactoring to not build a new vmalloc region only actually worked when shrinking. Actually return the resized area when it grows. Ugh. Link: https://lkml.kernel.org/r/20250515214217.619685-1-kees@kernel.org Fixes: a0309faf1cb0 ("mm: vmalloc: support more granular vrealloc() sizing") Signed-off-by: Kees Cook Reported-by: Shung-Hsi Yu Closes: https://lore.kernel.org/all/20250515-bpf-verifier-slowdown-vwo2meju4cgp2su5ckj@6gi6ssxbnfqg [1] Tested-by: Eduard Zingerman Tested-by: Pawan Gupta Tested-by: Shung-Hsi Yu Reviewed-by: "Uladzislau Rezki (Sony)" Reviewed-by: Danilo Krummrich Cc: "Erhard F." Cc: Signed-off-by: Andrew Morton --- mm/vmalloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 2d7511654831e9..74bd00fd734dd4 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -4111,6 +4111,7 @@ void *vrealloc_noprof(const void *p, size_t size, gfp_t flags) if (want_init_on_alloc(flags)) memset((void *)p + old_size, 0, size - old_size); vm->requested_size = size; + return (void *)p; } /* TODO: Grow the vm_area, i.e. allocate and map additional pages. */ From 70d1eb031a68cbde4eed8099674be21778441c94 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 15 May 2025 14:42:16 -0700 Subject: [PATCH 1973/2924] mm: vmalloc: only zero-init on vrealloc shrink The common case is to grow reallocations, and since init_on_alloc will have already zeroed the whole allocation, we only need to zero when shrinking the allocation. Link: https://lkml.kernel.org/r/20250515214217.619685-2-kees@kernel.org Fixes: a0309faf1cb0 ("mm: vmalloc: support more granular vrealloc() sizing") Signed-off-by: Kees Cook Tested-by: Pawan Gupta Cc: Danilo Krummrich Cc: Eduard Zingerman Cc: "Erhard F." Cc: Shung-Hsi Yu Cc: "Uladzislau Rezki (Sony)" Cc: Signed-off-by: Andrew Morton --- mm/vmalloc.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 74bd00fd734dd4..00cf1b575c8962 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -4093,8 +4093,8 @@ void *vrealloc_noprof(const void *p, size_t size, gfp_t flags) * would be a good heuristic for when to shrink the vm_area? */ if (size <= old_size) { - /* Zero out "freed" memory. */ - if (want_init_on_free()) + /* Zero out "freed" memory, potentially for future realloc. */ + if (want_init_on_free() || want_init_on_alloc(flags)) memset((void *)p + size, 0, old_size - size); vm->requested_size = size; kasan_poison_vmalloc(p + size, old_size - size); @@ -4107,9 +4107,11 @@ void *vrealloc_noprof(const void *p, size_t size, gfp_t flags) if (size <= alloced_size) { kasan_unpoison_vmalloc(p + old_size, size - old_size, KASAN_VMALLOC_PROT_NORMAL); - /* Zero out "alloced" memory. */ - if (want_init_on_alloc(flags)) - memset((void *)p + old_size, 0, size - old_size); + /* + * No need to zero memory here, as unused memory will have + * already been zeroed at initial allocation time or during + * realloc shrink time. + */ vm->requested_size = size; return (void *)p; } From 113ed54ad276c352ee5ce109bdcf0df118a43bda Mon Sep 17 00:00:00 2001 From: Ge Yang Date: Thu, 22 May 2025 11:22:17 +0800 Subject: [PATCH 1974/2924] mm/hugetlb: fix kernel NULL pointer dereference when replacing free hugetlb folios A kernel crash was observed when replacing free hugetlb folios: BUG: kernel NULL pointer dereference, address: 0000000000000028 PGD 0 P4D 0 Oops: Oops: 0000 [#1] SMP NOPTI CPU: 28 UID: 0 PID: 29639 Comm: test_cma.sh Tainted 6.15.0-rc6-zp #41 PREEMPT(voluntary) RIP: 0010:alloc_and_dissolve_hugetlb_folio+0x1d/0x1f0 RSP: 0018:ffffc9000b30fa90 EFLAGS: 00010286 RAX: 0000000000000000 RBX: 0000000000342cca RCX: ffffea0043000000 RDX: ffffc9000b30fb08 RSI: ffffea0043000000 RDI: 0000000000000000 RBP: ffffc9000b30fb20 R08: 0000000000001000 R09: 0000000000000000 R10: ffff88886f92eb00 R11: 0000000000000000 R12: ffffea0043000000 R13: 0000000000000000 R14: 00000000010c0200 R15: 0000000000000004 FS: 00007fcda5f14740(0000) GS:ffff8888ec1d8000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000028 CR3: 0000000391402000 CR4: 0000000000350ef0 Call Trace: replace_free_hugepage_folios+0xb6/0x100 alloc_contig_range_noprof+0x18a/0x590 ? srso_return_thunk+0x5/0x5f ? down_read+0x12/0xa0 ? srso_return_thunk+0x5/0x5f cma_range_alloc.constprop.0+0x131/0x290 __cma_alloc+0xcf/0x2c0 cma_alloc_write+0x43/0xb0 simple_attr_write_xsigned.constprop.0.isra.0+0xb2/0x110 debugfs_attr_write+0x46/0x70 full_proxy_write+0x62/0xa0 vfs_write+0xf8/0x420 ? srso_return_thunk+0x5/0x5f ? filp_flush+0x86/0xa0 ? srso_return_thunk+0x5/0x5f ? filp_close+0x1f/0x30 ? srso_return_thunk+0x5/0x5f ? do_dup2+0xaf/0x160 ? srso_return_thunk+0x5/0x5f ksys_write+0x65/0xe0 do_syscall_64+0x64/0x170 entry_SYSCALL_64_after_hwframe+0x76/0x7e There is a potential race between __update_and_free_hugetlb_folio() and replace_free_hugepage_folios(): CPU1 CPU2 __update_and_free_hugetlb_folio replace_free_hugepage_folios folio_test_hugetlb(folio) -- It's still hugetlb folio. __folio_clear_hugetlb(folio) hugetlb_free_folio(folio) h = folio_hstate(folio) -- Here, h is NULL pointer When the above race condition occurs, folio_hstate(folio) returns NULL, and subsequent access to this NULL pointer will cause the system to crash. To resolve this issue, execute folio_hstate(folio) under the protection of the hugetlb_lock lock, ensuring that folio_hstate(folio) does not return NULL. Link: https://lkml.kernel.org/r/1747884137-26685-1-git-send-email-yangge1116@126.com Fixes: 04f13d241b8b ("mm: replace free hugepage folios after migration") Signed-off-by: Ge Yang Reviewed-by: Muchun Song Reviewed-by: Oscar Salvador Cc: Baolin Wang Cc: Barry Song <21cnbao@gmail.com> Cc: David Hildenbrand Cc: Signed-off-by: Andrew Morton --- mm/hugetlb.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 7ae38bfb9096c4..55e6796b24d0ae 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2949,12 +2949,20 @@ int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn) while (start_pfn < end_pfn) { folio = pfn_folio(start_pfn); + + /* + * The folio might have been dissolved from under our feet, so make sure + * to carefully check the state under the lock. + */ + spin_lock_irq(&hugetlb_lock); if (folio_test_hugetlb(folio)) { h = folio_hstate(folio); } else { + spin_unlock_irq(&hugetlb_lock); start_pfn++; continue; } + spin_unlock_irq(&hugetlb_lock); if (!folio_ref_count(folio)) { ret = alloc_and_dissolve_hugetlb_folio(h, folio, From 06717a7b6c86514dbd6ab322e8083ffaa4db5712 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 23 May 2025 10:21:06 -0700 Subject: [PATCH 1975/2924] memcg: always call cond_resched() after fn() I am seeing soft lockup on certain machine types when a cgroup OOMs. This is happening because killing the process in certain machine might be very slow, which causes the soft lockup and RCU stalls. This happens usually when the cgroup has MANY processes and memory.oom.group is set. Example I am seeing in real production: [462012.244552] Memory cgroup out of memory: Killed process 3370438 (crosvm) .... .... [462037.318059] Memory cgroup out of memory: Killed process 4171372 (adb) .... [462037.348314] watchdog: BUG: soft lockup - CPU#64 stuck for 26s! [stat_manager-ag:1618982] .... Quick look at why this is so slow, it seems to be related to serial flush for certain machine types. For all the crashes I saw, the target CPU was at console_flush_all(). In the case above, there are thousands of processes in the cgroup, and it is soft locking up before it reaches the 1024 limit in the code (which would call the cond_resched()). So, cond_resched() in 1024 blocks is not sufficient. Remove the counter-based conditional rescheduling logic and call cond_resched() unconditionally after each task iteration, after fn() is called. This avoids the lockup independently of how slow fn() is. Link: https://lkml.kernel.org/r/20250523-memcg_fix-v1-1-ad3eafb60477@debian.org Fixes: ade81479c7dd ("memcg: fix soft lockup in the OOM process") Signed-off-by: Breno Leitao Suggested-by: Rik van Riel Acked-by: Shakeel Butt Cc: Michael van der Westhuizen Cc: Usama Arif Cc: Pavel Begunkov Cc: Chen Ridong Cc: Greg Kroah-Hartman Cc: Johannes Weiner Cc: Michal Hocko Cc: Michal Hocko Cc: Muchun Song Cc: Roman Gushchin Cc: Signed-off-by: Andrew Morton --- mm/memcontrol.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c96c1f2b9cf575..2d4d65f25fecd4 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1168,7 +1168,6 @@ void mem_cgroup_scan_tasks(struct mem_cgroup *memcg, { struct mem_cgroup *iter; int ret = 0; - int i = 0; BUG_ON(mem_cgroup_is_root(memcg)); @@ -1178,10 +1177,9 @@ void mem_cgroup_scan_tasks(struct mem_cgroup *memcg, css_task_iter_start(&iter->css, CSS_TASK_ITER_PROCS, &it); while (!ret && (task = css_task_iter_next(&it))) { - /* Avoid potential softlockup warning */ - if ((++i & 1023) == 0) - cond_resched(); ret = fn(task, arg); + /* Avoid potential softlockup warning */ + cond_resched(); } css_task_iter_end(&it); if (ret) { From ee40c9920ac286c5bfe7c811e66ff899266d2582 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo=20Navarro?= Date: Fri, 23 May 2025 14:19:10 +0200 Subject: [PATCH 1976/2924] mm: fix copy_vma() error handling for hugetlb mappings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If, during a mremap() operation for a hugetlb-backed memory mapping, copy_vma() fails after the source vma has been duplicated and opened (ie. vma_link() fails), the error is handled by closing the new vma. This updates the hugetlbfs reservation counter of the reservation map which at this point is referenced by both the source vma and the new copy. As a result, once the new vma has been freed and copy_vma() returns, the reservation counter for the source vma will be incorrect. This patch addresses this corner case by clearing the hugetlb private page reservation reference for the new vma and decrementing the reference before closing the vma, so that vma_close() won't update the reservation counter. This is also what copy_vma_and_data() does with the source vma if copy_vma() succeeds, so a helper function has been added to do the fixup in both functions. The issue was reported by a private syzbot instance and can be reproduced using the C reproducer in [1]. It's also a possible duplicate of public syzbot report [2]. The WARNING report is: ============================================================ page_counter underflow: -1024 nr_pages=1024 WARNING: CPU: 0 PID: 3287 at mm/page_counter.c:61 page_counter_cancel+0xf6/0x120 Modules linked in: CPU: 0 UID: 0 PID: 3287 Comm: repro__WARNING_ Not tainted 6.15.0-rc7+ #54 NONE Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-2-gc13ff2cd-prebuilt.qemu.org 04/01/2014 RIP: 0010:page_counter_cancel+0xf6/0x120 Code: ff 5b 41 5e 41 5f 5d c3 cc cc cc cc e8 f3 4f 8f ff c6 05 64 01 27 06 01 48 c7 c7 60 15 f8 85 48 89 de 4c 89 fa e8 2a a7 51 ff <0f> 0b e9 66 ff ff ff 44 89 f9 80 e1 07 38 c1 7c 9d 4c 81 RSP: 0018:ffffc900025df6a0 EFLAGS: 00010246 RAX: 2edfc409ebb44e00 RBX: fffffffffffffc00 RCX: ffff8880155f0000 RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000000 RBP: dffffc0000000000 R08: ffffffff81c4a23c R09: 1ffff1100330482a R10: dffffc0000000000 R11: ffffed100330482b R12: 0000000000000000 R13: ffff888058a882c0 R14: ffff888058a882c0 R15: 0000000000000400 FS: 0000000000000000(0000) GS:ffff88808fc53000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000004b33e0 CR3: 00000000076d6000 CR4: 00000000000006f0 Call Trace: page_counter_uncharge+0x33/0x80 hugetlb_cgroup_uncharge_counter+0xcb/0x120 hugetlb_vm_op_close+0x579/0x960 ? __pfx_hugetlb_vm_op_close+0x10/0x10 remove_vma+0x88/0x130 exit_mmap+0x71e/0xe00 ? __pfx_exit_mmap+0x10/0x10 ? __mutex_unlock_slowpath+0x22e/0x7f0 ? __pfx_exit_aio+0x10/0x10 ? __up_read+0x256/0x690 ? uprobe_clear_state+0x274/0x290 ? mm_update_next_owner+0xa9/0x810 __mmput+0xc9/0x370 exit_mm+0x203/0x2f0 ? __pfx_exit_mm+0x10/0x10 ? taskstats_exit+0x32b/0xa60 do_exit+0x921/0x2740 ? do_raw_spin_lock+0x155/0x3b0 ? __pfx_do_exit+0x10/0x10 ? __pfx_do_raw_spin_lock+0x10/0x10 ? _raw_spin_lock_irq+0xc5/0x100 do_group_exit+0x20c/0x2c0 get_signal+0x168c/0x1720 ? __pfx_get_signal+0x10/0x10 ? schedule+0x165/0x360 arch_do_signal_or_restart+0x8e/0x7d0 ? __pfx_arch_do_signal_or_restart+0x10/0x10 ? __pfx___se_sys_futex+0x10/0x10 syscall_exit_to_user_mode+0xb8/0x2c0 do_syscall_64+0x75/0x120 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x422dcd Code: Unable to access opcode bytes at 0x422da3. RSP: 002b:00007ff266cdb208 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca RAX: 0000000000000001 RBX: 00007ff266cdbcdc RCX: 0000000000422dcd RDX: 00000000000f4240 RSI: 0000000000000081 RDI: 00000000004c7bec RBP: 00007ff266cdb220 R08: 203a6362696c6720 R09: 203a6362696c6720 R10: 0000200000c00000 R11: 0000000000000246 R12: ffffffffffffffd0 R13: 0000000000000002 R14: 00007ffe1cb5f520 R15: 00007ff266cbb000 ============================================================ Link: https://lkml.kernel.org/r/20250523-warning_in_page_counter_cancel-v2-1-b6df1a8cfefd@igalia.com Link: https://people.igalia.com/rcn/kernel_logs/20250422__WARNING_in_page_counter_cancel__repro.c [1] Link: https://lore.kernel.org/all/67000a50.050a0220.49194.048d.GAE@google.com/ [2] Signed-off-by: Ricardo Cañuelo Navarro Suggested-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Cc: Florent Revest Cc: Jann Horn Cc: Oscar Salvador Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 5 +++++ mm/hugetlb.c | 16 +++++++++++++++- mm/mremap.c | 3 +-- mm/vma.c | 1 + 4 files changed, 22 insertions(+), 3 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 8f3ac832ee7f38..4861a7e304bbf4 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -275,6 +275,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma, bool is_hugetlb_entry_migration(pte_t pte); bool is_hugetlb_entry_hwpoisoned(pte_t pte); void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); +void fixup_hugetlb_reservations(struct vm_area_struct *vma); #else /* !CONFIG_HUGETLB_PAGE */ @@ -468,6 +469,10 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { } +static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) +{ +} + #endif /* !CONFIG_HUGETLB_PAGE */ #ifndef pgd_write diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 55e6796b24d0ae..6a3cf7935c1499 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1250,7 +1250,7 @@ void hugetlb_dup_vma_private(struct vm_area_struct *vma) /* * Reset and decrement one ref on hugepage private reservation. * Called with mm->mmap_lock writer semaphore held. - * This function should be only used by move_vma() and operate on + * This function should be only used by mremap and operate on * same sized vma. It should never come here with last ref on the * reservation. */ @@ -7939,3 +7939,17 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE), ALIGN_DOWN(vma->vm_end, PUD_SIZE)); } + +/* + * For hugetlb, mremap() is an odd edge case - while the VMA copying is + * performed, we permit both the old and new VMAs to reference the same + * reservation. + * + * We fix this up after the operation succeeds, or if a newly allocated VMA + * is closed as a result of a failure to allocate memory. + */ +void fixup_hugetlb_reservations(struct vm_area_struct *vma) +{ + if (is_vm_hugetlb_page(vma)) + clear_vma_resv_huge_pages(vma); +} diff --git a/mm/mremap.c b/mm/mremap.c index 7db9da609c84f0..0d4948b720e22e 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -1188,8 +1188,7 @@ static int copy_vma_and_data(struct vma_remap_struct *vrm, mremap_userfaultfd_prep(new_vma, vrm->uf); } - if (is_vm_hugetlb_page(vma)) - clear_vma_resv_huge_pages(vma); + fixup_hugetlb_reservations(vma); /* Tell pfnmap has moved from this vma */ if (unlikely(vma->vm_flags & VM_PFNMAP)) diff --git a/mm/vma.c b/mm/vma.c index 839d12f02c885d..a468d4c29c0cd4 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -1834,6 +1834,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, return new_vma; out_vma_link: + fixup_hugetlb_reservations(new_vma); vma_close(new_vma); if (new_vma->vm_file) From 1ec971da1c10e6376411e6d4a3f3b2351217d94f Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Fri, 23 May 2025 15:11:04 +0300 Subject: [PATCH 1977/2924] mailmap: add Jarkko's employer email address Add the current employer email address to mailmap. Link: https://lkml.kernel.org/r/20250523121105.15850-1-jarkko@kernel.org Signed-off-by: Jarkko Sakkinen Cc: Alexander Sverdlin Cc: Antonio Quartulli Cc: Carlos Bilbao Cc: Kees Cook Cc: Simon Wunderlich Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 1c70e51c789d34..a2559e2d7fd938 100644 --- a/.mailmap +++ b/.mailmap @@ -312,6 +312,7 @@ Jan Glauber Jan Kuliga Jarkko Sakkinen Jarkko Sakkinen +Jarkko Sakkinen Jason Gunthorpe Jason Gunthorpe Jason Gunthorpe From 478ad02d6844217cc7568619aeb0809d93ade43d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 25 May 2025 15:43:36 -0700 Subject: [PATCH 1978/2924] Disable FOP_DONTCACHE for now due to bugs This is kind of last-minute, but Al Viro reported that the new FOP_DONTCACHE flag causes memory corruption due to use-after-free issues. This was triggered by commit 974c5e6139db ("xfs: flag as supporting FOP_DONTCACHE"), but that is not the underlying bug - it is just the first user of the flag. Vlastimil Babka suspects the underlying problem stems from the folio_end_writeback() logic introduced in commit fb7d3bc414939 ("mm/filemap: drop streaming/uncached pages when writeback completes"). The most straightforward fix would be to just revert the commit that exposed this, but Matthew Wilcox points out that other filesystems are also starting to enable the FOP_DONTCACHE logic, so this instead disables that bit globally for now. The fix will hopefully end up being trivial and we can just re-enable this logic after more testing, but until such a time we'll have to disable the new FOP_DONTCACHE flag. Reported-by: Al Viro Link: https://lore.kernel.org/all/20250525083209.GS2023217@ZenIV/ Triggered-by: 974c5e6139db ("xfs: flag as supporting FOP_DONTCACHE") Cc: Vlastimil Babka Cc: Matthew Wilcox Cc: Jan Kara Cc: Jens Axboe Cc: Christoph Hellwig Cc: Darrick J. Wong Cc: Christian Brauner Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 016b0fe1536e36..a4fd649e2c3fcd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2186,7 +2186,7 @@ struct file_operations { /* Supports asynchronous lock callbacks */ #define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6)) /* File system supports uncached read/write buffered IO */ -#define FOP_DONTCACHE ((__force fop_flags_t)(1 << 7)) +#define FOP_DONTCACHE 0 /* ((__force fop_flags_t)(1 << 7)) */ /* Wrap a directory iterator that needs exclusive inode access */ int wrap_directory_iterator(struct file *, struct dir_context *, From 0ff41df1cb268fc69e703a08a57ee14ae967d0ca Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 25 May 2025 16:09:23 -0700 Subject: [PATCH 1979/2924] Linux 6.15 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a9edd030365372..c1cd1b5fc269a6 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = NAME = Baby Opossum Posse # *DOCUMENTATION* From 4c285c30d06083bb06c52a504dc37586d210b4ba Mon Sep 17 00:00:00 2001 From: Niravkumar L Rabara Date: Thu, 13 Feb 2025 18:50:36 +0800 Subject: [PATCH 1980/2924] arm64: dts: socfpga: agilex5: fix gpio0 address commit a6c9896e65e555d679a4bc71c3cdfce6df4b2343 upstream. Use the correct gpio0 address for Agilex5. Fixes: 3f7c869e143a ("arm64: dts: socfpga: agilex5: Add gpio0 node and spi dma handshake id") Cc: stable@vger.kernel.org Signed-off-by: Niravkumar L Rabara Reviewed-by: Krzysztof Kozlowski Signed-off-by: Dinh Nguyen Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/intel/socfpga_agilex5.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex5.dtsi b/arch/arm64/boot/dts/intel/socfpga_agilex5.dtsi index 51c6e19e40b843..7d9394a0430272 100644 --- a/arch/arm64/boot/dts/intel/socfpga_agilex5.dtsi +++ b/arch/arm64/boot/dts/intel/socfpga_agilex5.dtsi @@ -222,9 +222,9 @@ status = "disabled"; }; - gpio0: gpio@ffc03200 { + gpio0: gpio@10c03200 { compatible = "snps,dw-apb-gpio"; - reg = <0xffc03200 0x100>; + reg = <0x10c03200 0x100>; #address-cells = <1>; #size-cells = <0>; resets = <&rst GPIO0_RESET>; From f4369a9307da341ba67df588ba57abe4f253075f Mon Sep 17 00:00:00 2001 From: Lukasz Czechowski Date: Fri, 25 Apr 2025 17:18:08 +0200 Subject: [PATCH 1981/2924] arm64: dts: rockchip: fix internal USB hub instability on RK3399 Puma MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d7cc532df95f7f159e40595440e4e4b99481457b upstream. Currently, the onboard Cypress CYUSB3304 USB hub is not defined in the device tree, and hub reset pin is provided as vcc5v0_host regulator to usb phy. This causes instability issues, as a result of improper reset duration. The fixed regulator device requests the GPIO during probe in its inactive state (except if regulator-boot-on property is set, in which case it is requested in the active state). Considering gpio is GPIO_ACTIVE_LOW for Puma, it means it’s driving it high. Then the regulator gets enabled (because regulator-always-on property), which drives it to its active state, meaning driving it low. The Cypress CYUSB3304 USB hub actually requires the reset to be asserted for at least 5 ms, which we cannot guarantee right now since there's no delay in the current config, meaning the hub may sometimes work or not. We could add delay as offered by fixed-regulator but let's rather fix this by using the proper way to model onboard USB hubs. Define hub_2_0 and hub_3_0 nodes, as the onboard Cypress hub consist of two 'logical' hubs, for USB2.0 and USB3.0. Use the 'reset-gpios' property of hub to assign reset pin instead of using regulator. Rename the vcc5v0_host regulator to cy3304_reset to be more meaningful. Pin is configured to output-high by default, which sets the hub in reset state during pin controller initialization. This allows to avoid double enumeration of devices in case the bootloader has setup the USB hub before the kernel. The vdd-supply and vdd2-supply properties in hub nodes are added to provide correct dt-bindings, although power supplies are always enabled based on HW design. Fixes: 2c66fc34e945 ("arm64: dts: rockchip: add RK3399-Q7 (Puma) SoM") Cc: stable@vger.kernel.org # 6.6 Cc: stable@vger.kernel.org # Backport of the patch in this series fixing product ID in onboard_dev_id_table in drivers/usb/misc/onboard_usb_dev.c driver Signed-off-by: Lukasz Czechowski Link: https://lore.kernel.org/r/20250425-onboard_usb_dev-v2-3-4a76a474a010@thaumatec.com Signed-off-by: Heiko Stuebner Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi | 40 ++++++++++++------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index e00fbaa8acc168..314d9dfdba5732 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -60,16 +60,6 @@ vin-supply = <&vcc5v0_sys>; }; - vcc5v0_host: regulator-vcc5v0-host { - compatible = "regulator-fixed"; - gpio = <&gpio4 RK_PA3 GPIO_ACTIVE_LOW>; - pinctrl-names = "default"; - pinctrl-0 = <&vcc5v0_host_en>; - regulator-name = "vcc5v0_host"; - regulator-always-on; - vin-supply = <&vcc5v0_sys>; - }; - vcc5v0_sys: regulator-vcc5v0-sys { compatible = "regulator-fixed"; regulator-name = "vcc5v0_sys"; @@ -527,10 +517,10 @@ }; }; - usb2 { - vcc5v0_host_en: vcc5v0-host-en { + usb { + cy3304_reset: cy3304-reset { rockchip,pins = - <4 RK_PA3 RK_FUNC_GPIO &pcfg_pull_none>; + <4 RK_PA3 RK_FUNC_GPIO &pcfg_output_high>; }; }; @@ -597,7 +587,6 @@ }; u2phy1_host: host-port { - phy-supply = <&vcc5v0_host>; status = "okay"; }; }; @@ -609,6 +598,29 @@ &usbdrd_dwc3_1 { status = "okay"; dr_mode = "host"; + pinctrl-names = "default"; + pinctrl-0 = <&cy3304_reset>; + #address-cells = <1>; + #size-cells = <0>; + + hub_2_0: hub@1 { + compatible = "usb4b4,6502", "usb4b4,6506"; + reg = <1>; + peer-hub = <&hub_3_0>; + reset-gpios = <&gpio4 RK_PA3 GPIO_ACTIVE_HIGH>; + vdd-supply = <&vcc1v2_phy>; + vdd2-supply = <&vcc3v3_sys>; + + }; + + hub_3_0: hub@2 { + compatible = "usb4b4,6500", "usb4b4,6504"; + reg = <2>; + peer-hub = <&hub_2_0>; + reset-gpios = <&gpio4 RK_PA3 GPIO_ACTIVE_HIGH>; + vdd-supply = <&vcc1v2_phy>; + vdd2-supply = <&vcc3v3_sys>; + }; }; &usb_host1_ehci { From 074d50aca1b470ac33a2383afd08726cc4012fe3 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 20 May 2025 13:14:27 +0200 Subject: [PATCH 1982/2924] arm64: dts: rockchip: Add missing SFC power-domains to rk3576 commit ede1fa1384c230c9823f6bf1849cf50c5fc8a83e upstream. Add the power-domains for the RK3576 SFC nodes according to the TRM part 1. This fixes potential SErrors when accessing the SFC registers without other peripherals (e.g. eMMC) doing a prior power-domain enable. For example this is easy to trigger on the Rock 4D, which enables the SFC0 interface, but does not enable the eMMC interface at the moment. Cc: stable@vger.kernel.org Fixes: 36299757129c8 ("arm64: dts: rockchip: Add SFC nodes for rk3576") Signed-off-by: Sebastian Reichel Link: https://lore.kernel.org/r/20250520-rk3576-fix-fspi-pmdomain-v1-1-f07c6e62dadd@kernel.org Signed-off-by: Heiko Stuebner Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/rockchip/rk3576.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3576.dtsi b/arch/arm64/boot/dts/rockchip/rk3576.dtsi index ebb5fc8bb8b136..3824242f8ae88a 100644 --- a/arch/arm64/boot/dts/rockchip/rk3576.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3576.dtsi @@ -1364,6 +1364,7 @@ interrupts = ; clocks = <&cru SCLK_FSPI1_X2>, <&cru HCLK_FSPI1>; clock-names = "clk_sfc", "hclk_sfc"; + power-domains = <&power RK3576_PD_SDGMAC>; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -1414,6 +1415,7 @@ interrupts = ; clocks = <&cru SCLK_FSPI_X2>, <&cru HCLK_FSPI>; clock-names = "clk_sfc", "hclk_sfc"; + power-domains = <&power RK3576_PD_NVM>; #address-cells = <1>; #size-cells = <0>; status = "disabled"; From 2bef5bfa165d66cf3c39ab4cd22fc3730a777390 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 12 Feb 2025 18:03:52 +0100 Subject: [PATCH 1983/2924] arm64: dts: qcom: ipq9574: Add missing properties for cryptobam commit b4cd966edb2deb5c75fe356191422e127445b830 upstream. num-channels and qcom,num-ees are required for BAM nodes without clock, because the driver cannot ensure the hardware is powered on when trying to obtain the information from the hardware registers. Specifying the node without these properties is unsafe and has caused early boot crashes for other SoCs before [1, 2]. Add the missing information from the hardware registers to ensure the driver can probe successfully without causing crashes. [1]: https://lore.kernel.org/r/CY01EKQVWE36.B9X5TDXAREPF@fairphone.com/ [2]: https://lore.kernel.org/r/20230626145959.646747-1-krzysztof.kozlowski@linaro.org/ Cc: stable@vger.kernel.org Tested-by: Md Sadre Alam Fixes: ffadc79ed99f ("arm64: dts: qcom: ipq9574: Enable crypto nodes") Signed-off-by: Stephan Gerhold Link: https://lore.kernel.org/r/20250212-bam-dma-fixes-v1-6-f560889e65d8@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/ipq9574.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/ipq9574.dtsi b/arch/arm64/boot/dts/qcom/ipq9574.dtsi index 94229002897257..3c02351fbb156a 100644 --- a/arch/arm64/boot/dts/qcom/ipq9574.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq9574.dtsi @@ -378,6 +378,8 @@ interrupts = ; #dma-cells = <1>; qcom,ee = <1>; + qcom,num-ees = <4>; + num-channels = <16>; qcom,controlled-remotely; }; From 3a1ee7631731e1e0836445c2a7cec90ffdd996bd Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 12 Feb 2025 18:03:51 +0100 Subject: [PATCH 1984/2924] arm64: dts: qcom: sa8775p: Add missing properties for cryptobam commit a2517331f11bd22cded60e791a8818cec3e7597a upstream. num-channels and qcom,num-ees are required for BAM nodes without clock, because the driver cannot ensure the hardware is powered on when trying to obtain the information from the hardware registers. Specifying the node without these properties is unsafe and has caused early boot crashes for other SoCs before [1, 2]. Add the missing information from the hardware registers to ensure the driver can probe successfully without causing crashes. [1]: https://lore.kernel.org/r/CY01EKQVWE36.B9X5TDXAREPF@fairphone.com/ [2]: https://lore.kernel.org/r/20230626145959.646747-1-krzysztof.kozlowski@linaro.org/ Cc: stable@vger.kernel.org Fixes: 7ff3da43ef44 ("arm64: dts: qcom: sa8775p: add QCrypto nodes") Signed-off-by: Stephan Gerhold Link: https://lore.kernel.org/r/20250212-bam-dma-fixes-v1-5-f560889e65d8@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/sa8775p.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sa8775p.dtsi b/arch/arm64/boot/dts/qcom/sa8775p.dtsi index 3394ae2d130034..8d7d157ff2f56a 100644 --- a/arch/arm64/boot/dts/qcom/sa8775p.dtsi +++ b/arch/arm64/boot/dts/qcom/sa8775p.dtsi @@ -2413,6 +2413,8 @@ interrupts = ; #dma-cells = <1>; qcom,ee = <0>; + qcom,num-ees = <4>; + num-channels = <20>; qcom,controlled-remotely; iommus = <&apps_smmu 0x480 0x00>, <&apps_smmu 0x481 0x00>; From 6bfdb733eaeac5e961d160c2c30c853c3594c24d Mon Sep 17 00:00:00 2001 From: Ling Xu Date: Tue, 11 Feb 2025 13:44:14 +0530 Subject: [PATCH 1985/2924] arm64: dts: qcom: sa8775p: Remove extra entries from the iommus property commit eb73f500548a3205741330cbd7d0e209a7a6a9af upstream. There are some items come out to be same value if we do SID & ~MASK. Remove extra entries from the iommus property for sa8775p to simplify. Fixes: f7b01bfb4b47 ("arm64: qcom: sa8775p: Add ADSP and CDSP0 fastrpc nodes") Cc: stable@kernel.org Reviewed-by: Dmitry Baryshkov Signed-off-by: Ling Xu Link: https://lore.kernel.org/r/49f463415c8fa2b08fbc2317e31493362056f403.1739260973.git.quic_lxu5@quicinc.com Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/sa8775p.dtsi | 240 +++----------------------- 1 file changed, 24 insertions(+), 216 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sa8775p.dtsi b/arch/arm64/boot/dts/qcom/sa8775p.dtsi index 8d7d157ff2f56a..7e670187a20e1f 100644 --- a/arch/arm64/boot/dts/qcom/sa8775p.dtsi +++ b/arch/arm64/boot/dts/qcom/sa8775p.dtsi @@ -4905,15 +4905,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <1>; iommus = <&apps_smmu 0x2141 0x04a0>, - <&apps_smmu 0x2161 0x04a0>, - <&apps_smmu 0x2181 0x0400>, - <&apps_smmu 0x21c1 0x04a0>, - <&apps_smmu 0x21e1 0x04a0>, - <&apps_smmu 0x2541 0x04a0>, - <&apps_smmu 0x2561 0x04a0>, - <&apps_smmu 0x2581 0x0400>, - <&apps_smmu 0x25c1 0x04a0>, - <&apps_smmu 0x25e1 0x04a0>; + <&apps_smmu 0x2181 0x0400>; dma-coherent; }; @@ -4921,15 +4913,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <2>; iommus = <&apps_smmu 0x2142 0x04a0>, - <&apps_smmu 0x2162 0x04a0>, - <&apps_smmu 0x2182 0x0400>, - <&apps_smmu 0x21c2 0x04a0>, - <&apps_smmu 0x21e2 0x04a0>, - <&apps_smmu 0x2542 0x04a0>, - <&apps_smmu 0x2562 0x04a0>, - <&apps_smmu 0x2582 0x0400>, - <&apps_smmu 0x25c2 0x04a0>, - <&apps_smmu 0x25e2 0x04a0>; + <&apps_smmu 0x2182 0x0400>; dma-coherent; }; @@ -4937,15 +4921,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <3>; iommus = <&apps_smmu 0x2143 0x04a0>, - <&apps_smmu 0x2163 0x04a0>, - <&apps_smmu 0x2183 0x0400>, - <&apps_smmu 0x21c3 0x04a0>, - <&apps_smmu 0x21e3 0x04a0>, - <&apps_smmu 0x2543 0x04a0>, - <&apps_smmu 0x2563 0x04a0>, - <&apps_smmu 0x2583 0x0400>, - <&apps_smmu 0x25c3 0x04a0>, - <&apps_smmu 0x25e3 0x04a0>; + <&apps_smmu 0x2183 0x0400>; dma-coherent; }; @@ -4953,15 +4929,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <4>; iommus = <&apps_smmu 0x2144 0x04a0>, - <&apps_smmu 0x2164 0x04a0>, - <&apps_smmu 0x2184 0x0400>, - <&apps_smmu 0x21c4 0x04a0>, - <&apps_smmu 0x21e4 0x04a0>, - <&apps_smmu 0x2544 0x04a0>, - <&apps_smmu 0x2564 0x04a0>, - <&apps_smmu 0x2584 0x0400>, - <&apps_smmu 0x25c4 0x04a0>, - <&apps_smmu 0x25e4 0x04a0>; + <&apps_smmu 0x2184 0x0400>; dma-coherent; }; @@ -4969,15 +4937,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <5>; iommus = <&apps_smmu 0x2145 0x04a0>, - <&apps_smmu 0x2165 0x04a0>, - <&apps_smmu 0x2185 0x0400>, - <&apps_smmu 0x21c5 0x04a0>, - <&apps_smmu 0x21e5 0x04a0>, - <&apps_smmu 0x2545 0x04a0>, - <&apps_smmu 0x2565 0x04a0>, - <&apps_smmu 0x2585 0x0400>, - <&apps_smmu 0x25c5 0x04a0>, - <&apps_smmu 0x25e5 0x04a0>; + <&apps_smmu 0x2185 0x0400>; dma-coherent; }; @@ -4985,15 +4945,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <6>; iommus = <&apps_smmu 0x2146 0x04a0>, - <&apps_smmu 0x2166 0x04a0>, - <&apps_smmu 0x2186 0x0400>, - <&apps_smmu 0x21c6 0x04a0>, - <&apps_smmu 0x21e6 0x04a0>, - <&apps_smmu 0x2546 0x04a0>, - <&apps_smmu 0x2566 0x04a0>, - <&apps_smmu 0x2586 0x0400>, - <&apps_smmu 0x25c6 0x04a0>, - <&apps_smmu 0x25e6 0x04a0>; + <&apps_smmu 0x2186 0x0400>; dma-coherent; }; @@ -5001,15 +4953,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <7>; iommus = <&apps_smmu 0x2147 0x04a0>, - <&apps_smmu 0x2167 0x04a0>, - <&apps_smmu 0x2187 0x0400>, - <&apps_smmu 0x21c7 0x04a0>, - <&apps_smmu 0x21e7 0x04a0>, - <&apps_smmu 0x2547 0x04a0>, - <&apps_smmu 0x2567 0x04a0>, - <&apps_smmu 0x2587 0x0400>, - <&apps_smmu 0x25c7 0x04a0>, - <&apps_smmu 0x25e7 0x04a0>; + <&apps_smmu 0x2187 0x0400>; dma-coherent; }; @@ -5017,15 +4961,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <8>; iommus = <&apps_smmu 0x2148 0x04a0>, - <&apps_smmu 0x2168 0x04a0>, - <&apps_smmu 0x2188 0x0400>, - <&apps_smmu 0x21c8 0x04a0>, - <&apps_smmu 0x21e8 0x04a0>, - <&apps_smmu 0x2548 0x04a0>, - <&apps_smmu 0x2568 0x04a0>, - <&apps_smmu 0x2588 0x0400>, - <&apps_smmu 0x25c8 0x04a0>, - <&apps_smmu 0x25e8 0x04a0>; + <&apps_smmu 0x2188 0x0400>; dma-coherent; }; @@ -5033,15 +4969,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <9>; iommus = <&apps_smmu 0x2149 0x04a0>, - <&apps_smmu 0x2169 0x04a0>, - <&apps_smmu 0x2189 0x0400>, - <&apps_smmu 0x21c9 0x04a0>, - <&apps_smmu 0x21e9 0x04a0>, - <&apps_smmu 0x2549 0x04a0>, - <&apps_smmu 0x2569 0x04a0>, - <&apps_smmu 0x2589 0x0400>, - <&apps_smmu 0x25c9 0x04a0>, - <&apps_smmu 0x25e9 0x04a0>; + <&apps_smmu 0x2189 0x0400>; dma-coherent; }; @@ -5049,15 +4977,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <10>; iommus = <&apps_smmu 0x214a 0x04a0>, - <&apps_smmu 0x216a 0x04a0>, - <&apps_smmu 0x218a 0x0400>, - <&apps_smmu 0x21ca 0x04a0>, - <&apps_smmu 0x21ea 0x04a0>, - <&apps_smmu 0x254a 0x04a0>, - <&apps_smmu 0x256a 0x04a0>, - <&apps_smmu 0x258a 0x0400>, - <&apps_smmu 0x25ca 0x04a0>, - <&apps_smmu 0x25ea 0x04a0>; + <&apps_smmu 0x218a 0x0400>; dma-coherent; }; @@ -5065,15 +4985,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <11>; iommus = <&apps_smmu 0x214b 0x04a0>, - <&apps_smmu 0x216b 0x04a0>, - <&apps_smmu 0x218b 0x0400>, - <&apps_smmu 0x21cb 0x04a0>, - <&apps_smmu 0x21eb 0x04a0>, - <&apps_smmu 0x254b 0x04a0>, - <&apps_smmu 0x256b 0x04a0>, - <&apps_smmu 0x258b 0x0400>, - <&apps_smmu 0x25cb 0x04a0>, - <&apps_smmu 0x25eb 0x04a0>; + <&apps_smmu 0x218b 0x0400>; dma-coherent; }; }; @@ -5133,15 +5045,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <1>; iommus = <&apps_smmu 0x2941 0x04a0>, - <&apps_smmu 0x2961 0x04a0>, - <&apps_smmu 0x2981 0x0400>, - <&apps_smmu 0x29c1 0x04a0>, - <&apps_smmu 0x29e1 0x04a0>, - <&apps_smmu 0x2d41 0x04a0>, - <&apps_smmu 0x2d61 0x04a0>, - <&apps_smmu 0x2d81 0x0400>, - <&apps_smmu 0x2dc1 0x04a0>, - <&apps_smmu 0x2de1 0x04a0>; + <&apps_smmu 0x2981 0x0400>; dma-coherent; }; @@ -5149,15 +5053,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <2>; iommus = <&apps_smmu 0x2942 0x04a0>, - <&apps_smmu 0x2962 0x04a0>, - <&apps_smmu 0x2982 0x0400>, - <&apps_smmu 0x29c2 0x04a0>, - <&apps_smmu 0x29e2 0x04a0>, - <&apps_smmu 0x2d42 0x04a0>, - <&apps_smmu 0x2d62 0x04a0>, - <&apps_smmu 0x2d82 0x0400>, - <&apps_smmu 0x2dc2 0x04a0>, - <&apps_smmu 0x2de2 0x04a0>; + <&apps_smmu 0x2982 0x0400>; dma-coherent; }; @@ -5165,15 +5061,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <3>; iommus = <&apps_smmu 0x2943 0x04a0>, - <&apps_smmu 0x2963 0x04a0>, - <&apps_smmu 0x2983 0x0400>, - <&apps_smmu 0x29c3 0x04a0>, - <&apps_smmu 0x29e3 0x04a0>, - <&apps_smmu 0x2d43 0x04a0>, - <&apps_smmu 0x2d63 0x04a0>, - <&apps_smmu 0x2d83 0x0400>, - <&apps_smmu 0x2dc3 0x04a0>, - <&apps_smmu 0x2de3 0x04a0>; + <&apps_smmu 0x2983 0x0400>; dma-coherent; }; @@ -5181,15 +5069,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <4>; iommus = <&apps_smmu 0x2944 0x04a0>, - <&apps_smmu 0x2964 0x04a0>, - <&apps_smmu 0x2984 0x0400>, - <&apps_smmu 0x29c4 0x04a0>, - <&apps_smmu 0x29e4 0x04a0>, - <&apps_smmu 0x2d44 0x04a0>, - <&apps_smmu 0x2d64 0x04a0>, - <&apps_smmu 0x2d84 0x0400>, - <&apps_smmu 0x2dc4 0x04a0>, - <&apps_smmu 0x2de4 0x04a0>; + <&apps_smmu 0x2984 0x0400>; dma-coherent; }; @@ -5197,15 +5077,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <5>; iommus = <&apps_smmu 0x2945 0x04a0>, - <&apps_smmu 0x2965 0x04a0>, - <&apps_smmu 0x2985 0x0400>, - <&apps_smmu 0x29c5 0x04a0>, - <&apps_smmu 0x29e5 0x04a0>, - <&apps_smmu 0x2d45 0x04a0>, - <&apps_smmu 0x2d65 0x04a0>, - <&apps_smmu 0x2d85 0x0400>, - <&apps_smmu 0x2dc5 0x04a0>, - <&apps_smmu 0x2de5 0x04a0>; + <&apps_smmu 0x2985 0x0400>; dma-coherent; }; @@ -5213,15 +5085,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <6>; iommus = <&apps_smmu 0x2946 0x04a0>, - <&apps_smmu 0x2966 0x04a0>, - <&apps_smmu 0x2986 0x0400>, - <&apps_smmu 0x29c6 0x04a0>, - <&apps_smmu 0x29e6 0x04a0>, - <&apps_smmu 0x2d46 0x04a0>, - <&apps_smmu 0x2d66 0x04a0>, - <&apps_smmu 0x2d86 0x0400>, - <&apps_smmu 0x2dc6 0x04a0>, - <&apps_smmu 0x2de6 0x04a0>; + <&apps_smmu 0x2986 0x0400>; dma-coherent; }; @@ -5229,15 +5093,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <7>; iommus = <&apps_smmu 0x2947 0x04a0>, - <&apps_smmu 0x2967 0x04a0>, - <&apps_smmu 0x2987 0x0400>, - <&apps_smmu 0x29c7 0x04a0>, - <&apps_smmu 0x29e7 0x04a0>, - <&apps_smmu 0x2d47 0x04a0>, - <&apps_smmu 0x2d67 0x04a0>, - <&apps_smmu 0x2d87 0x0400>, - <&apps_smmu 0x2dc7 0x04a0>, - <&apps_smmu 0x2de7 0x04a0>; + <&apps_smmu 0x2987 0x0400>; dma-coherent; }; @@ -5245,15 +5101,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <8>; iommus = <&apps_smmu 0x2948 0x04a0>, - <&apps_smmu 0x2968 0x04a0>, - <&apps_smmu 0x2988 0x0400>, - <&apps_smmu 0x29c8 0x04a0>, - <&apps_smmu 0x29e8 0x04a0>, - <&apps_smmu 0x2d48 0x04a0>, - <&apps_smmu 0x2d68 0x04a0>, - <&apps_smmu 0x2d88 0x0400>, - <&apps_smmu 0x2dc8 0x04a0>, - <&apps_smmu 0x2de8 0x04a0>; + <&apps_smmu 0x2988 0x0400>; dma-coherent; }; @@ -5261,15 +5109,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <9>; iommus = <&apps_smmu 0x2949 0x04a0>, - <&apps_smmu 0x2969 0x04a0>, - <&apps_smmu 0x2989 0x0400>, - <&apps_smmu 0x29c9 0x04a0>, - <&apps_smmu 0x29e9 0x04a0>, - <&apps_smmu 0x2d49 0x04a0>, - <&apps_smmu 0x2d69 0x04a0>, - <&apps_smmu 0x2d89 0x0400>, - <&apps_smmu 0x2dc9 0x04a0>, - <&apps_smmu 0x2de9 0x04a0>; + <&apps_smmu 0x2989 0x0400>; dma-coherent; }; @@ -5277,15 +5117,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <10>; iommus = <&apps_smmu 0x294a 0x04a0>, - <&apps_smmu 0x296a 0x04a0>, - <&apps_smmu 0x298a 0x0400>, - <&apps_smmu 0x29ca 0x04a0>, - <&apps_smmu 0x29ea 0x04a0>, - <&apps_smmu 0x2d4a 0x04a0>, - <&apps_smmu 0x2d6a 0x04a0>, - <&apps_smmu 0x2d8a 0x0400>, - <&apps_smmu 0x2dca 0x04a0>, - <&apps_smmu 0x2dea 0x04a0>; + <&apps_smmu 0x298a 0x0400>; dma-coherent; }; @@ -5293,15 +5125,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <11>; iommus = <&apps_smmu 0x294b 0x04a0>, - <&apps_smmu 0x296b 0x04a0>, - <&apps_smmu 0x298b 0x0400>, - <&apps_smmu 0x29cb 0x04a0>, - <&apps_smmu 0x29eb 0x04a0>, - <&apps_smmu 0x2d4b 0x04a0>, - <&apps_smmu 0x2d6b 0x04a0>, - <&apps_smmu 0x2d8b 0x0400>, - <&apps_smmu 0x2dcb 0x04a0>, - <&apps_smmu 0x2deb 0x04a0>; + <&apps_smmu 0x298b 0x0400>; dma-coherent; }; @@ -5309,15 +5133,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <12>; iommus = <&apps_smmu 0x294c 0x04a0>, - <&apps_smmu 0x296c 0x04a0>, - <&apps_smmu 0x298c 0x0400>, - <&apps_smmu 0x29cc 0x04a0>, - <&apps_smmu 0x29ec 0x04a0>, - <&apps_smmu 0x2d4c 0x04a0>, - <&apps_smmu 0x2d6c 0x04a0>, - <&apps_smmu 0x2d8c 0x0400>, - <&apps_smmu 0x2dcc 0x04a0>, - <&apps_smmu 0x2dec 0x04a0>; + <&apps_smmu 0x298c 0x0400>; dma-coherent; }; @@ -5325,15 +5141,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <13>; iommus = <&apps_smmu 0x294d 0x04a0>, - <&apps_smmu 0x296d 0x04a0>, - <&apps_smmu 0x298d 0x0400>, - <&apps_smmu 0x29Cd 0x04a0>, - <&apps_smmu 0x29ed 0x04a0>, - <&apps_smmu 0x2d4d 0x04a0>, - <&apps_smmu 0x2d6d 0x04a0>, - <&apps_smmu 0x2d8d 0x0400>, - <&apps_smmu 0x2dcd 0x04a0>, - <&apps_smmu 0x2ded 0x04a0>; + <&apps_smmu 0x298d 0x0400>; dma-coherent; }; }; From 1be3c20cd66dc4e22fd3c0ea09439b10ebe7da28 Mon Sep 17 00:00:00 2001 From: Karthik Sanagavarapu Date: Tue, 11 Feb 2025 13:44:15 +0530 Subject: [PATCH 1986/2924] arm64: dts: qcom: sa8775p: Remove cdsp compute-cb@10 commit d180c2bd3b43d55f30c9b99de68bc6bb8420d1c1 upstream. Remove the context bank compute-cb@10 because these SMMU ids are S2-only which is not used for S1 transaction. Fixes: f7b01bfb4b47 ("arm64: qcom: sa8775p: Add ADSP and CDSP0 fastrpc nodes") Cc: stable@kernel.org Signed-off-by: Karthik Sanagavarapu Signed-off-by: Ling Xu Link: https://lore.kernel.org/r/4c9de858fda7848b77ea8c528c9b9d53600ad21a.1739260973.git.quic_lxu5@quicinc.com Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/sa8775p.dtsi | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sa8775p.dtsi b/arch/arm64/boot/dts/qcom/sa8775p.dtsi index 7e670187a20e1f..2329460b210381 100644 --- a/arch/arm64/boot/dts/qcom/sa8775p.dtsi +++ b/arch/arm64/boot/dts/qcom/sa8775p.dtsi @@ -4973,14 +4973,6 @@ dma-coherent; }; - compute-cb@10 { - compatible = "qcom,fastrpc-compute-cb"; - reg = <10>; - iommus = <&apps_smmu 0x214a 0x04a0>, - <&apps_smmu 0x218a 0x0400>; - dma-coherent; - }; - compute-cb@11 { compatible = "qcom,fastrpc-compute-cb"; reg = <11>; From 6f70532f4eacd37a4f20161606fc5e51fb98b286 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Wed, 14 May 2025 04:46:51 -0700 Subject: [PATCH 1987/2924] arm64: dts: qcom: sm8350: Fix typo in pil_camera_mem node commit 295217420a44403a33c30f99d8337fe7b07eb02b upstream. There is a typo in sm8350.dts where the node label mmeory@85200000 should be memory@85200000. This patch corrects the typo for clarity and consistency. Fixes: b7e8f433a673 ("arm64: dts: qcom: Add basic devicetree support for SM8350 SoC") Cc: stable@vger.kernel.org Signed-off-by: Alok Tiwari Link: https://lore.kernel.org/r/20250514114656.2307828-1-alok.a.tiwari@oracle.com Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/sm8350.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sm8350.dtsi b/arch/arm64/boot/dts/qcom/sm8350.dtsi index 69da30f35baaab..f055600d6cfe5b 100644 --- a/arch/arm64/boot/dts/qcom/sm8350.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8350.dtsi @@ -455,7 +455,7 @@ no-map; }; - pil_camera_mem: mmeory@85200000 { + pil_camera_mem: memory@85200000 { reg = <0x0 0x85200000 0x0 0x500000>; no-map; }; From 24e2fa6e3c72c00533d00076c3491424b90f7ebc Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 12 Feb 2025 18:03:48 +0100 Subject: [PATCH 1988/2924] arm64: dts: qcom: sm8450: Add missing properties for cryptobam commit 0fe6357229cb15a64b6413c62f1c3d4de68ce55f upstream. num-channels and qcom,num-ees are required for BAM nodes without clock, because the driver cannot ensure the hardware is powered on when trying to obtain the information from the hardware registers. Specifying the node without these properties is unsafe and has caused early boot crashes for other SoCs before [1, 2]. Add the missing information from the hardware registers to ensure the driver can probe successfully without causing crashes. [1]: https://lore.kernel.org/r/CY01EKQVWE36.B9X5TDXAREPF@fairphone.com/ [2]: https://lore.kernel.org/r/20230626145959.646747-1-krzysztof.kozlowski@linaro.org/ Cc: stable@vger.kernel.org Fixes: b92b0d2f7582 ("arm64: dts: qcom: sm8450: add crypto nodes") Signed-off-by: Stephan Gerhold Link: https://lore.kernel.org/r/20250212-bam-dma-fixes-v1-2-f560889e65d8@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/sm8450.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sm8450.dtsi b/arch/arm64/boot/dts/qcom/sm8450.dtsi index 9c809fc5fa45a9..419df72cd04b0c 100644 --- a/arch/arm64/boot/dts/qcom/sm8450.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8450.dtsi @@ -5283,6 +5283,8 @@ interrupts = ; #dma-cells = <1>; qcom,ee = <0>; + qcom,num-ees = <4>; + num-channels = <16>; qcom,controlled-remotely; iommus = <&apps_smmu 0x584 0x11>, <&apps_smmu 0x588 0x0>, From 0c3d3b39513512ccc5b0bd6bfd8df82aa39db186 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 12 Feb 2025 18:03:49 +0100 Subject: [PATCH 1989/2924] arm64: dts: qcom: sm8550: Add missing properties for cryptobam commit 663cd2cad36da23cf1a3db7868fce9f1a19b2d61 upstream. num-channels and qcom,num-ees are required for BAM nodes without clock, because the driver cannot ensure the hardware is powered on when trying to obtain the information from the hardware registers. Specifying the node without these properties is unsafe and has caused early boot crashes for other SoCs before [1, 2]. Add the missing information from the hardware registers to ensure the driver can probe successfully without causing crashes. [1]: https://lore.kernel.org/r/CY01EKQVWE36.B9X5TDXAREPF@fairphone.com/ [2]: https://lore.kernel.org/r/20230626145959.646747-1-krzysztof.kozlowski@linaro.org/ Cc: stable@vger.kernel.org Fixes: 433477c3bf0b ("arm64: dts: qcom: sm8550: add QCrypto nodes") Signed-off-by: Stephan Gerhold Link: https://lore.kernel.org/r/20250212-bam-dma-fixes-v1-3-f560889e65d8@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/sm8550.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sm8550.dtsi b/arch/arm64/boot/dts/qcom/sm8550.dtsi index eac8de4005d82f..ac3e00ad417719 100644 --- a/arch/arm64/boot/dts/qcom/sm8550.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8550.dtsi @@ -1957,6 +1957,8 @@ interrupts = ; #dma-cells = <1>; qcom,ee = <0>; + qcom,num-ees = <4>; + num-channels = <20>; qcom,controlled-remotely; iommus = <&apps_smmu 0x480 0x0>, <&apps_smmu 0x481 0x0>; From bf0a9ddbb38e796587733a283a7e1e5c533bc767 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 12 Feb 2025 18:03:50 +0100 Subject: [PATCH 1990/2924] arm64: dts: qcom: sm8650: Add missing properties for cryptobam commit 38b88722bce07b6a5927f45fbf7a9a85e834572c upstream. num-channels and qcom,num-ees are required for BAM nodes without clock, because the driver cannot ensure the hardware is powered on when trying to obtain the information from the hardware registers. Specifying the node without these properties is unsafe and has caused early boot crashes for other SoCs before [1, 2]. Add the missing information from the hardware registers to ensure the driver can probe successfully without causing crashes. [1]: https://lore.kernel.org/r/CY01EKQVWE36.B9X5TDXAREPF@fairphone.com/ [2]: https://lore.kernel.org/r/20230626145959.646747-1-krzysztof.kozlowski@linaro.org/ Cc: stable@vger.kernel.org Fixes: 10e024671295 ("arm64: dts: qcom: sm8650: add interconnect dependent device nodes") Signed-off-by: Stephan Gerhold Link: https://lore.kernel.org/r/20250212-bam-dma-fixes-v1-4-f560889e65d8@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/sm8650.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sm8650.dtsi b/arch/arm64/boot/dts/qcom/sm8650.dtsi index 86684cb9a93256..c8a2a76a98f000 100644 --- a/arch/arm64/boot/dts/qcom/sm8650.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8650.dtsi @@ -2533,6 +2533,8 @@ <&apps_smmu 0x481 0>; qcom,ee = <0>; + qcom,num-ees = <4>; + num-channels = <20>; qcom,controlled-remotely; }; From 3887db86c4dea5d90793d8c24c652d8c0686f934 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 23 Apr 2025 09:30:08 +0200 Subject: [PATCH 1991/2924] arm64: dts: qcom: x1e001de-devkit: Fix vreg_l2j_1p2 voltage commit 3ed2a9e03abfeece9e30ebc746f935536f661414 upstream. In the ACPI DSDT table, PPP_RESOURCE_ID_LDO2_J is configured with 1256000 uV instead of the 1200000 uV we have currently in the device tree. Use the same for consistency and correctness. Cc: stable@vger.kernel.org Fixes: 7b8a31e82b87 ("arm64: dts: qcom: Add X1E001DE Snapdragon Devkit for Windows") Signed-off-by: Stephan Gerhold Reviewed-by: Johan Hovold Reviewed-by: Abel Vesa Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250423-x1e-vreg-l2j-voltage-v1-2-24b6a2043025@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/x1e001de-devkit.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/x1e001de-devkit.dts b/arch/arm64/boot/dts/qcom/x1e001de-devkit.dts index 5e3970b26e2f95..530648123e5c4f 100644 --- a/arch/arm64/boot/dts/qcom/x1e001de-devkit.dts +++ b/arch/arm64/boot/dts/qcom/x1e001de-devkit.dts @@ -745,8 +745,8 @@ vreg_l2j_1p2: ldo2 { regulator-name = "vreg_l2j_1p2"; - regulator-min-microvolt = <1200000>; - regulator-max-microvolt = <1200000>; + regulator-min-microvolt = <1256000>; + regulator-max-microvolt = <1256000>; regulator-initial-mode = ; }; From ddf395b2cde143c5933c2fccc6d2c68ef12d1706 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 14 Mar 2025 15:54:35 +0100 Subject: [PATCH 1992/2924] arm64: dts: qcom: x1e001de-devkit: mark l12b and l15b always-on commit 7d328cc134f7db1e062f616a30cffe96fbc43abb upstream. The l12b and l15b supplies are used by components that are not (fully) described (and some never will be) and must never be disabled. Mark the regulators as always-on to prevent them from being disabled, for example, when consumers probe defer or suspend. Fixes: 7b8a31e82b87 ("arm64: dts: qcom: Add X1E001DE Snapdragon Devkit for Windows") Cc: stable@vger.kernel.org # 6.14 Cc: Sibi Sankar Reviewed-by: Konrad Dybcio Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20250314145440.11371-4-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/x1e001de-devkit.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/x1e001de-devkit.dts b/arch/arm64/boot/dts/qcom/x1e001de-devkit.dts index 530648123e5c4f..f5063a0df9fbfa 100644 --- a/arch/arm64/boot/dts/qcom/x1e001de-devkit.dts +++ b/arch/arm64/boot/dts/qcom/x1e001de-devkit.dts @@ -507,6 +507,7 @@ regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1200000>; regulator-initial-mode = ; + regulator-always-on; }; vreg_l13b_3p0: ldo13 { @@ -528,6 +529,7 @@ regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; regulator-initial-mode = ; + regulator-always-on; }; vreg_l16b_2p9: ldo16 { From 1d84bbbe3bf63e24babb7dabb53b0322f230ed29 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 23 Apr 2025 09:30:09 +0200 Subject: [PATCH 1993/2924] arm64: dts: qcom: x1e80100-asus-vivobook-s15: Fix vreg_l2j_1p2 voltage commit 0fb9ecf8713a7a458f7378c86e0703467db2ad22 upstream. In the ACPI DSDT table, PPP_RESOURCE_ID_LDO2_J is configured with 1256000 uV instead of the 1200000 uV we have currently in the device tree. Use the same for consistency and correctness. Cc: stable@vger.kernel.org Fixes: d0e2f8f62dff ("arm64: dts: qcom: Add device tree for ASUS Vivobook S 15") Signed-off-by: Stephan Gerhold Reviewed-by: Johan Hovold Reviewed-by: Abel Vesa Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250423-x1e-vreg-l2j-voltage-v1-3-24b6a2043025@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts b/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts index 53781f9b13af3e..f53067463b7601 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts @@ -330,8 +330,8 @@ vreg_l2j_1p2: ldo2 { regulator-name = "vreg_l2j_1p2"; - regulator-min-microvolt = <1200000>; - regulator-max-microvolt = <1200000>; + regulator-min-microvolt = <1256000>; + regulator-max-microvolt = <1256000>; regulator-initial-mode = ; }; From 036fb5ccad269312d6c9e030a10d1f3dfadc55f3 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 14 Mar 2025 15:54:36 +0100 Subject: [PATCH 1994/2924] arm64: dts: qcom: x1e80100-dell-xps13-9345: mark l12b and l15b always-on commit 63169c07d74031c5e10a9f91229dabade880cf0f upstream. The l12b and l15b supplies are used by components that are not (fully) described (and some never will be) and must never be disabled. Mark the regulators as always-on to prevent them from being disabled, for example, when consumers probe defer or suspend. Note that these supplies currently have no consumers described in mainline. Fixes: f5b788d0e8cd ("arm64: dts: qcom: Add support for X1-based Dell XPS 13 9345") Cc: stable@vger.kernel.org # 6.13 Reviewed-by: Aleksandrs Vinarskis Tested-by: Aleksandrs Vinarskis Reviewed-by: Konrad Dybcio Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20250314145440.11371-5-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/x1e80100-dell-xps13-9345.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-dell-xps13-9345.dts b/arch/arm64/boot/dts/qcom/x1e80100-dell-xps13-9345.dts index 86e87f03b0ec61..90f588ed7d63d7 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-dell-xps13-9345.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-dell-xps13-9345.dts @@ -359,6 +359,7 @@ regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1200000>; regulator-initial-mode = ; + regulator-always-on; }; vreg_l13b_3p0: ldo13 { @@ -380,6 +381,7 @@ regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; regulator-initial-mode = ; + regulator-always-on; }; vreg_l17b_2p5: ldo17 { From 3cd0190d80931d1e88dee04b6c7689ca4713dd00 Mon Sep 17 00:00:00 2001 From: Juerg Haefliger Date: Wed, 19 Mar 2025 17:05:09 +0100 Subject: [PATCH 1995/2924] arm64: dts: qcom: x1e80100-hp-omnibook-x14: Enable SMB2360 0 and 1 commit 48274b40a3719a950b1062f8125c972a2df5c083 upstream. Commit d37e2646c8a5 ("arm64: dts: qcom: x1e80100-pmics: Enable all SMB2360 separately") disables all SMB2360s and let the board DTS explicitly enable them. The HP OmniBook DTS is from before this change and is missing the explicit enabling. Add that to get all USB root ports. Fixes: 6f18b8d4142c ("arm64: dts: qcom: x1e80100-hp-x14: dt for HP Omnibook X Laptop 14") Cc: stable@vger.kernel.org # 6.14 Signed-off-by: Juerg Haefliger Reviewed-by: Konrad Dybcio Reviewed-by: Bryan O'Donoghue Link: https://lore.kernel.org/r/20250319160509.1812805-1-juerg.haefliger@canonical.com Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/x1e80100-hp-omnibook-x14.dts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-hp-omnibook-x14.dts b/arch/arm64/boot/dts/qcom/x1e80100-hp-omnibook-x14.dts index cd860a246c450b..c4ac0aaa6f65b3 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-hp-omnibook-x14.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-hp-omnibook-x14.dts @@ -1352,18 +1352,22 @@ status = "okay"; }; +&smb2360_0 { + status = "okay"; +}; + &smb2360_0_eusb2_repeater { vdd18-supply = <&vreg_l3d_1p8>; vdd3-supply = <&vreg_l2b_3p0>; +}; +&smb2360_1 { status = "okay"; }; &smb2360_1_eusb2_repeater { vdd18-supply = <&vreg_l3d_1p8>; vdd3-supply = <&vreg_l14b_3p0>; - - status = "okay"; }; &swr0 { From a186824a33322b4034434b29add3f129a0990bae Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 23 Apr 2025 09:30:10 +0200 Subject: [PATCH 1996/2924] arm64: dts: qcom: x1e80100-hp-omnibook-x14: Fix vreg_l2j_1p2 voltage commit 4a09dad9d437a13e9cd4383ff7791a816a6e1652 upstream. In the ACPI DSDT table, PPP_RESOURCE_ID_LDO2_J is configured with 1256000 uV instead of the 1200000 uV we have currently in the device tree. Use the same for consistency and correctness. Cc: stable@vger.kernel.org Fixes: 6f18b8d4142c ("arm64: dts: qcom: x1e80100-hp-x14: dt for HP Omnibook X Laptop 14") Signed-off-by: Stephan Gerhold Reviewed-by: Johan Hovold Reviewed-by: Abel Vesa Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250423-x1e-vreg-l2j-voltage-v1-4-24b6a2043025@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/x1e80100-hp-omnibook-x14.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-hp-omnibook-x14.dts b/arch/arm64/boot/dts/qcom/x1e80100-hp-omnibook-x14.dts index c4ac0aaa6f65b3..ec16482b063f0a 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-hp-omnibook-x14.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-hp-omnibook-x14.dts @@ -871,8 +871,8 @@ vreg_l2j_1p2: ldo2 { regulator-name = "vreg_l2j_1p2"; - regulator-min-microvolt = <1200000>; - regulator-max-microvolt = <1200000>; + regulator-min-microvolt = <1256000>; + regulator-max-microvolt = <1256000>; regulator-initial-mode = ; }; From 3cc36387336ef66e57069bf61c6f341680df4b4f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 14 Mar 2025 15:54:37 +0100 Subject: [PATCH 1997/2924] arm64: dts: qcom: x1e80100-hp-x14: mark l12b and l15b always-on commit 3ab4e212a41c46668adf93c8d10d0d3d6de8f0e4 upstream. The l12b and l15b supplies are used by components that are not (fully) described (and some never will be) and must never be disabled. Mark the regulators as always-on to prevent them from being disabled, for example, when consumers probe defer or suspend. Fixes: 6f18b8d4142c ("arm64: dts: qcom: x1e80100-hp-x14: dt for HP Omnibook X Laptop 14") Cc: stable@vger.kernel.org # 6.14 Cc: Jens Glathe Reviewed-by: Konrad Dybcio Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20250314145440.11371-6-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/x1e80100-hp-omnibook-x14.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-hp-omnibook-x14.dts b/arch/arm64/boot/dts/qcom/x1e80100-hp-omnibook-x14.dts index ec16482b063f0a..929da9ecddc47c 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-hp-omnibook-x14.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-hp-omnibook-x14.dts @@ -633,6 +633,7 @@ regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1200000>; regulator-initial-mode = ; + regulator-always-on; }; vreg_l13b_3p0: ldo13 { @@ -654,6 +655,7 @@ regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; regulator-initial-mode = ; + regulator-always-on; }; vreg_l16b_2p9: ldo16 { From d6fd1909453143096b5f7e6081924e5d4740b6bc Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 23 Apr 2025 09:30:11 +0200 Subject: [PATCH 1998/2924] arm64: dts: qcom: x1e80100-lenovo-yoga-slim7x: Fix vreg_l2j_1p2 voltage commit 4f27ede34ca3369cdcde80c5a4ca84cdb28edbbb upstream. In the ACPI DSDT table, PPP_RESOURCE_ID_LDO2_J is configured with 1256000 uV instead of the 1200000 uV we have currently in the device tree. Use the same for consistency and correctness. Cc: stable@vger.kernel.org Fixes: 45247fe17db2 ("arm64: dts: qcom: x1e80100: add Lenovo Thinkpad Yoga slim 7x devicetree") Signed-off-by: Stephan Gerhold Reviewed-by: Johan Hovold Reviewed-by: Abel Vesa Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250423-x1e-vreg-l2j-voltage-v1-5-24b6a2043025@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts b/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts index a3d53f2ba2c3d0..1ef679b4cae4e6 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts @@ -508,8 +508,8 @@ vreg_l2j_1p2: ldo2 { regulator-name = "vreg_l2j_1p2"; - regulator-min-microvolt = <1200000>; - regulator-max-microvolt = <1200000>; + regulator-min-microvolt = <1256000>; + regulator-max-microvolt = <1256000>; regulator-initial-mode = ; }; From e5778778656fa2abc60543542c678d6a492638f5 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 23 Apr 2025 09:30:12 +0200 Subject: [PATCH 1999/2924] arm64: dts: qcom: x1e80100-qcp: Fix vreg_l2j_1p2 voltage commit efdbeae860bf0278b050c6c9ad5921afba4596d0 upstream. In the ACPI DSDT table, PPP_RESOURCE_ID_LDO2_J is configured with 1256000 uV instead of the 1200000 uV we have currently in the device tree. Use the same for consistency and correctness. Cc: stable@vger.kernel.org Fixes: af16b00578a7 ("arm64: dts: qcom: Add base X1E80100 dtsi and the QCP dts") Signed-off-by: Stephan Gerhold Reviewed-by: Johan Hovold Reviewed-by: Abel Vesa Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250423-x1e-vreg-l2j-voltage-v1-6-24b6a2043025@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/x1e80100-qcp.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts index ec594628304a9a..eba59857e778f7 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts @@ -675,8 +675,8 @@ vreg_l2j_1p2: ldo2 { regulator-name = "vreg_l2j_1p2"; - regulator-min-microvolt = <1200000>; - regulator-max-microvolt = <1200000>; + regulator-min-microvolt = <1256000>; + regulator-max-microvolt = <1256000>; regulator-initial-mode = ; }; From 4db29d6da46b009809f9681e96d9e9113fa741eb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 14 Mar 2025 15:54:39 +0100 Subject: [PATCH 2000/2924] arm64: dts: qcom: x1e80100-qcp: mark l12b and l15b always-on commit ff6ba96378367133b66587bd3ee9f068a39ff3a9 upstream. The l12b and l15b supplies are used by components that are not (fully) described (and some never will be) and must never be disabled. Mark the regulators as always-on to prevent them from being disabled, for example, when consumers probe defer or suspend. Fixes: af16b00578a7 ("arm64: dts: qcom: Add base X1E80100 dtsi and the QCP dts") Cc: stable@vger.kernel.org # 6.8 Cc: Rajendra Nayak Reviewed-by: Konrad Dybcio Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20250314145440.11371-8-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/x1e80100-qcp.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts index eba59857e778f7..f06f4547884683 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts @@ -437,6 +437,7 @@ regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1200000>; regulator-initial-mode = ; + regulator-always-on; }; vreg_l13b_3p0: ldo13 { @@ -458,6 +459,7 @@ regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; regulator-initial-mode = ; + regulator-always-on; }; vreg_l16b_2p9: ldo16 { From 752162b455e59b93d4a8bf02e6b6e388a15d477f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 14 Mar 2025 15:54:38 +0100 Subject: [PATCH 2001/2924] arm64: dts: qcom: x1e80100-yoga-slim7x: mark l12b and l15b always-on commit f43a71dc6d8d8378af587675eec77c06e0298c79 upstream. The l12b and l15b supplies are used by components that are not (fully) described (and some never will be) and must never be disabled. Mark the regulators as always-on to prevent them from being disabled, for example, when consumers probe defer or suspend. Fixes: 45247fe17db2 ("arm64: dts: qcom: x1e80100: add Lenovo Thinkpad Yoga slim 7x devicetree") Cc: stable@vger.kernel.org # 6.11 Cc: Srinivas Kandagatla Reviewed-by: Konrad Dybcio Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20250314145440.11371-7-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts b/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts index 1ef679b4cae4e6..744a66ae5bdc84 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts @@ -290,6 +290,7 @@ regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1200000>; regulator-initial-mode = ; + regulator-always-on; }; vreg_l14b_3p0: ldo14 { @@ -304,8 +305,8 @@ regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; regulator-initial-mode = ; + regulator-always-on; }; - }; regulators-1 { From c9f794a758b84e5971e326aa8624ff95d9231a7f Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 19 Feb 2025 12:36:18 +0100 Subject: [PATCH 2002/2924] arm64: dts: qcom: x1e80100: Fix video thermal zone MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 801befff4c827aa72e3698367c5afc18987a6a3f upstream. A passive trip point at 125°C is pretty high, this is usually the temperature for the critical shutdown trip point. Also, we don't have any passive cooling devices attached to the video thermal zone. Change this to be a critical trip point, and add a "hot" trip point at 90°C for consistency with the other thermal zones. Cc: stable@vger.kernel.org Fixes: 4e915987ff5b ("arm64: dts: qcom: x1e80100: Enable tsens and thermal zone nodes") Signed-off-by: Stephan Gerhold Reviewed-by: Johan Hovold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250219-x1e80100-thermal-fixes-v1-1-d110e44ac3f9@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/x1e80100.dtsi | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi index 4936fa5b98ff7a..019b0e627214b5 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi @@ -8727,15 +8727,19 @@ }; video-thermal { - polling-delay-passive = <250>; - thermal-sensors = <&tsens0 12>; trips { trip-point0 { + temperature = <90000>; + hysteresis = <2000>; + type = "hot"; + }; + + video-critical { temperature = <125000>; hysteresis = <1000>; - type = "passive"; + type = "critical"; }; }; }; From 9f0e0d6673c4132448c6d36c61548c258c0ba74c Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 19 Feb 2025 12:36:19 +0100 Subject: [PATCH 2003/2924] arm64: dts: qcom: x1e80100: Apply consistent critical thermal shutdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 03f2b8eed73418269a158ccebad5d8d8f2f6daa1 upstream. The firmware configures the TSENS controller with a maximum temperature of 120°C. When reaching that temperature, the hardware automatically triggers a reset of the entire platform. Some of the thermal zones in x1e80100.dtsi use a critical trip point of 125°C. It's impossible to reach those. It's preferable to shut down the system cleanly before reaching the hardware trip point. Make the critical temperature trip points consistent by setting all of them to 115°C and apply a consistent hysteresis. The ACPI tables also specify 115°C as critical shutdown temperature. Cc: stable@vger.kernel.org Fixes: 4e915987ff5b ("arm64: dts: qcom: x1e80100: Enable tsens and thermal zone nodes") Signed-off-by: Stephan Gerhold Reviewed-by: Johan Hovold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250219-x1e80100-thermal-fixes-v1-2-d110e44ac3f9@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/x1e80100.dtsi | 128 ++++++++++++------------- 1 file changed, 64 insertions(+), 64 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi index 019b0e627214b5..e690386bbe4f5f 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi @@ -8457,8 +8457,8 @@ }; aoss0-critical { - temperature = <125000>; - hysteresis = <0>; + temperature = <115000>; + hysteresis = <1000>; type = "critical"; }; }; @@ -8483,7 +8483,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8509,7 +8509,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8535,7 +8535,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8561,7 +8561,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8587,7 +8587,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8613,7 +8613,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8639,7 +8639,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8665,7 +8665,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8683,8 +8683,8 @@ }; cpuss2-critical { - temperature = <125000>; - hysteresis = <0>; + temperature = <115000>; + hysteresis = <1000>; type = "critical"; }; }; @@ -8701,8 +8701,8 @@ }; cpuss2-critical { - temperature = <125000>; - hysteresis = <0>; + temperature = <115000>; + hysteresis = <1000>; type = "critical"; }; }; @@ -8719,7 +8719,7 @@ }; mem-critical { - temperature = <125000>; + temperature = <115000>; hysteresis = <0>; type = "critical"; }; @@ -8737,7 +8737,7 @@ }; video-critical { - temperature = <125000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8755,8 +8755,8 @@ }; aoss0-critical { - temperature = <125000>; - hysteresis = <0>; + temperature = <115000>; + hysteresis = <1000>; type = "critical"; }; }; @@ -8781,7 +8781,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8807,7 +8807,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8833,7 +8833,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8859,7 +8859,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8885,7 +8885,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8911,7 +8911,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8937,7 +8937,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8963,7 +8963,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8981,8 +8981,8 @@ }; cpuss2-critical { - temperature = <125000>; - hysteresis = <0>; + temperature = <115000>; + hysteresis = <1000>; type = "critical"; }; }; @@ -8999,8 +8999,8 @@ }; cpuss2-critical { - temperature = <125000>; - hysteresis = <0>; + temperature = <115000>; + hysteresis = <1000>; type = "critical"; }; }; @@ -9017,8 +9017,8 @@ }; aoss0-critical { - temperature = <125000>; - hysteresis = <0>; + temperature = <115000>; + hysteresis = <1000>; type = "critical"; }; }; @@ -9043,7 +9043,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9069,7 +9069,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9095,7 +9095,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9121,7 +9121,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9147,7 +9147,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9173,7 +9173,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9199,7 +9199,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9225,7 +9225,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9243,8 +9243,8 @@ }; cpuss2-critical { - temperature = <125000>; - hysteresis = <0>; + temperature = <115000>; + hysteresis = <1000>; type = "critical"; }; }; @@ -9261,8 +9261,8 @@ }; cpuss2-critical { - temperature = <125000>; - hysteresis = <0>; + temperature = <115000>; + hysteresis = <1000>; type = "critical"; }; }; @@ -9279,8 +9279,8 @@ }; aoss0-critical { - temperature = <125000>; - hysteresis = <0>; + temperature = <115000>; + hysteresis = <1000>; type = "critical"; }; }; @@ -9297,8 +9297,8 @@ }; nsp0-critical { - temperature = <125000>; - hysteresis = <0>; + temperature = <115000>; + hysteresis = <1000>; type = "critical"; }; }; @@ -9315,8 +9315,8 @@ }; nsp1-critical { - temperature = <125000>; - hysteresis = <0>; + temperature = <115000>; + hysteresis = <1000>; type = "critical"; }; }; @@ -9333,8 +9333,8 @@ }; nsp2-critical { - temperature = <125000>; - hysteresis = <0>; + temperature = <115000>; + hysteresis = <1000>; type = "critical"; }; }; @@ -9351,8 +9351,8 @@ }; nsp3-critical { - temperature = <125000>; - hysteresis = <0>; + temperature = <115000>; + hysteresis = <1000>; type = "critical"; }; }; @@ -9377,7 +9377,7 @@ }; trip-point2 { - temperature = <125000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9403,7 +9403,7 @@ }; trip-point2 { - temperature = <125000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9429,7 +9429,7 @@ }; trip-point2 { - temperature = <125000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9455,7 +9455,7 @@ }; trip-point2 { - temperature = <125000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9481,7 +9481,7 @@ }; trip-point2 { - temperature = <125000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9507,7 +9507,7 @@ }; trip-point2 { - temperature = <125000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9533,7 +9533,7 @@ }; trip-point2 { - temperature = <125000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9559,7 +9559,7 @@ }; trip-point2 { - temperature = <125000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9578,7 +9578,7 @@ camera0-critical { temperature = <115000>; - hysteresis = <0>; + hysteresis = <1000>; type = "critical"; }; }; @@ -9596,7 +9596,7 @@ camera0-critical { temperature = <115000>; - hysteresis = <0>; + hysteresis = <1000>; type = "critical"; }; }; From d145a6a3e252f093dc243d2944fecb2387a3d690 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 19 Feb 2025 12:36:20 +0100 Subject: [PATCH 2004/2924] arm64: dts: qcom: x1e80100: Add GPU cooling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5ba21fa11f473c9827f378ace8c9f983de9e0287 upstream. Unlike the CPU, the GPU does not throttle its speed automatically when it reaches high temperatures. With certain high GPU loads it is possible to reach the critical hardware shutdown temperature of 120°C, endangering the hardware and making it impossible to run certain applications. Set up GPU cooling similar to the ACPI tables, by throttling the GPU speed when reaching 95°C and polling every 200ms. Cc: stable@vger.kernel.org Fixes: 721e38301b79 ("arm64: dts: qcom: x1e80100: Add gpu support") Signed-off-by: Stephan Gerhold Reviewed-by: Johan Hovold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250219-x1e80100-thermal-fixes-v1-3-d110e44ac3f9@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/x1e80100.dtsi | 169 +++++++++++++------------ 1 file changed, 89 insertions(+), 80 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi index e690386bbe4f5f..23595aac6dacdd 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi @@ -20,6 +20,7 @@ #include #include #include +#include / { interrupt-parent = <&intc>; @@ -9359,24 +9360,25 @@ }; gpuss-0-thermal { - polling-delay-passive = <10>; + polling-delay-passive = <200>; thermal-sensors = <&tsens3 5>; - trips { - trip-point0 { - temperature = <85000>; - hysteresis = <1000>; - type = "passive"; + cooling-maps { + map0 { + trip = <&gpuss0_alert0>; + cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; }; + }; - trip-point1 { - temperature = <90000>; + trips { + gpuss0_alert0: trip-point0 { + temperature = <95000>; hysteresis = <1000>; - type = "hot"; + type = "passive"; }; - trip-point2 { + gpu-critical { temperature = <115000>; hysteresis = <1000>; type = "critical"; @@ -9385,24 +9387,25 @@ }; gpuss-1-thermal { - polling-delay-passive = <10>; + polling-delay-passive = <200>; thermal-sensors = <&tsens3 6>; - trips { - trip-point0 { - temperature = <85000>; - hysteresis = <1000>; - type = "passive"; + cooling-maps { + map0 { + trip = <&gpuss1_alert0>; + cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; }; + }; - trip-point1 { - temperature = <90000>; + trips { + gpuss1_alert0: trip-point0 { + temperature = <95000>; hysteresis = <1000>; - type = "hot"; + type = "passive"; }; - trip-point2 { + gpu-critical { temperature = <115000>; hysteresis = <1000>; type = "critical"; @@ -9411,24 +9414,25 @@ }; gpuss-2-thermal { - polling-delay-passive = <10>; + polling-delay-passive = <200>; thermal-sensors = <&tsens3 7>; - trips { - trip-point0 { - temperature = <85000>; - hysteresis = <1000>; - type = "passive"; + cooling-maps { + map0 { + trip = <&gpuss2_alert0>; + cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; }; + }; - trip-point1 { - temperature = <90000>; + trips { + gpuss2_alert0: trip-point0 { + temperature = <95000>; hysteresis = <1000>; - type = "hot"; + type = "passive"; }; - trip-point2 { + gpu-critical { temperature = <115000>; hysteresis = <1000>; type = "critical"; @@ -9437,24 +9441,25 @@ }; gpuss-3-thermal { - polling-delay-passive = <10>; + polling-delay-passive = <200>; thermal-sensors = <&tsens3 8>; - trips { - trip-point0 { - temperature = <85000>; - hysteresis = <1000>; - type = "passive"; + cooling-maps { + map0 { + trip = <&gpuss3_alert0>; + cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; }; + }; - trip-point1 { - temperature = <90000>; + trips { + gpuss3_alert0: trip-point0 { + temperature = <95000>; hysteresis = <1000>; - type = "hot"; + type = "passive"; }; - trip-point2 { + gpu-critical { temperature = <115000>; hysteresis = <1000>; type = "critical"; @@ -9463,24 +9468,25 @@ }; gpuss-4-thermal { - polling-delay-passive = <10>; + polling-delay-passive = <200>; thermal-sensors = <&tsens3 9>; - trips { - trip-point0 { - temperature = <85000>; - hysteresis = <1000>; - type = "passive"; + cooling-maps { + map0 { + trip = <&gpuss4_alert0>; + cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; }; + }; - trip-point1 { - temperature = <90000>; + trips { + gpuss4_alert0: trip-point0 { + temperature = <95000>; hysteresis = <1000>; - type = "hot"; + type = "passive"; }; - trip-point2 { + gpu-critical { temperature = <115000>; hysteresis = <1000>; type = "critical"; @@ -9489,24 +9495,25 @@ }; gpuss-5-thermal { - polling-delay-passive = <10>; + polling-delay-passive = <200>; thermal-sensors = <&tsens3 10>; - trips { - trip-point0 { - temperature = <85000>; - hysteresis = <1000>; - type = "passive"; + cooling-maps { + map0 { + trip = <&gpuss5_alert0>; + cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; }; + }; - trip-point1 { - temperature = <90000>; + trips { + gpuss5_alert0: trip-point0 { + temperature = <95000>; hysteresis = <1000>; - type = "hot"; + type = "passive"; }; - trip-point2 { + gpu-critical { temperature = <115000>; hysteresis = <1000>; type = "critical"; @@ -9515,24 +9522,25 @@ }; gpuss-6-thermal { - polling-delay-passive = <10>; + polling-delay-passive = <200>; thermal-sensors = <&tsens3 11>; - trips { - trip-point0 { - temperature = <85000>; - hysteresis = <1000>; - type = "passive"; + cooling-maps { + map0 { + trip = <&gpuss6_alert0>; + cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; }; + }; - trip-point1 { - temperature = <90000>; + trips { + gpuss6_alert0: trip-point0 { + temperature = <95000>; hysteresis = <1000>; - type = "hot"; + type = "passive"; }; - trip-point2 { + gpu-critical { temperature = <115000>; hysteresis = <1000>; type = "critical"; @@ -9541,24 +9549,25 @@ }; gpuss-7-thermal { - polling-delay-passive = <10>; + polling-delay-passive = <200>; thermal-sensors = <&tsens3 12>; - trips { - trip-point0 { - temperature = <85000>; - hysteresis = <1000>; - type = "passive"; + cooling-maps { + map0 { + trip = <&gpuss7_alert0>; + cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; }; + }; - trip-point1 { - temperature = <90000>; + trips { + gpuss7_alert0: trip-point0 { + temperature = <95000>; hysteresis = <1000>; - type = "hot"; + type = "passive"; }; - trip-point2 { + gpu-critical { temperature = <115000>; hysteresis = <1000>; type = "critical"; From ddc4e2927f7c1983f9f795377144c3206e5fbd6e Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Tue, 22 Apr 2025 14:03:16 +0300 Subject: [PATCH 2005/2924] arm64: dts: qcom: x1e80100: Fix PCIe 3rd controller DBI size commit 181faec4cc9d90dad0ec7f7c8124269c0ba2e107 upstream. According to documentation, the DBI range size is 0xf20. So fix it. Cc: stable@vger.kernel.org # 6.14 Fixes: f8af195beeb0 ("arm64: dts: qcom: x1e80100: Add support for PCIe3 on x1e80100") Signed-off-by: Abel Vesa Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250422-x1e80100-dts-fix-pcie3-dbi-size-v1-1-c197701fd7e4@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/x1e80100.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi index 23595aac6dacdd..5aeecf711340d2 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi @@ -3126,7 +3126,7 @@ device_type = "pci"; compatible = "qcom,pcie-x1e80100"; reg = <0x0 0x01bd0000 0x0 0x3000>, - <0x0 0x78000000 0x0 0xf1d>, + <0x0 0x78000000 0x0 0xf20>, <0x0 0x78000f40 0x0 0xa8>, <0x0 0x78001000 0x0 0x1000>, <0x0 0x78100000 0x0 0x100000>, From d556df89295e2fd70618bfa8c71942d797fa3ab7 Mon Sep 17 00:00:00 2001 From: Judith Mendez Date: Tue, 29 Apr 2025 11:33:35 -0500 Subject: [PATCH 2006/2924] arm64: dts: ti: k3-am62-main: Set eMMC clock parent to default commit 3a71cdfec94436079513d9adf4b1d4f7a7edd917 upstream. Set eMMC clock parents to the defaults which is MAIN_PLL0_HSDIV5_CLKOUT for eMMC. This change is necessary since DM is not implementing the correct procedure to switch PLL clock source for eMMC and MMC CLK mux is not glich-free. As a preventative action, lets switch back to the defaults. Fixes: c37c58fdeb8a ("arm64: dts: ti: k3-am62: Add more peripheral nodes") Cc: stable@vger.kernel.org Signed-off-by: Judith Mendez Acked-by: Udit Kumar Acked-by: Bryan Brattlof Link: https://lore.kernel.org/r/20250429163337.15634-2-jm@ti.com Signed-off-by: Nishanth Menon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/ti/k3-am62-main.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/boot/dts/ti/k3-am62-main.dtsi b/arch/arm64/boot/dts/ti/k3-am62-main.dtsi index 7d355aa73ea211..0c286f600296cd 100644 --- a/arch/arm64/boot/dts/ti/k3-am62-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am62-main.dtsi @@ -552,8 +552,6 @@ power-domains = <&k3_pds 57 TI_SCI_PD_EXCLUSIVE>; clocks = <&k3_clks 57 5>, <&k3_clks 57 6>; clock-names = "clk_ahb", "clk_xin"; - assigned-clocks = <&k3_clks 57 6>; - assigned-clock-parents = <&k3_clks 57 8>; bus-width = <8>; mmc-ddr-1_8v; mmc-hs200-1_8v; From 11411df3e4c0ac57f670d50b3d6b58dca5c6a67f Mon Sep 17 00:00:00 2001 From: Judith Mendez Date: Tue, 29 Apr 2025 11:33:36 -0500 Subject: [PATCH 2007/2924] arm64: dts: ti: k3-am62a-main: Set eMMC clock parent to default commit 6af731c5de59cc4e7cce193d446f1fe872ac711b upstream. Set eMMC clock parents to the defaults which is MAIN_PLL0_HSDIV5_CLKOUT for eMMC. This change is necessary since DM is not implementing the correct procedure to switch PLL clock source for eMMC and MMC CLK mux is not glich-free. As a preventative action, lets switch back to the defaults. Fixes: d3ae4e8d8b6a ("arm64: dts: ti: k3-am62a-main: Add sdhci0 instance") Cc: stable@vger.kernel.org Signed-off-by: Judith Mendez Acked-by: Udit Kumar Acked-by: Bryan Brattlof Link: https://lore.kernel.org/r/20250429163337.15634-3-jm@ti.com Signed-off-by: Nishanth Menon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/ti/k3-am62a-main.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/boot/dts/ti/k3-am62a-main.dtsi b/arch/arm64/boot/dts/ti/k3-am62a-main.dtsi index a1daba7b1fad5d..455ccc770f16a1 100644 --- a/arch/arm64/boot/dts/ti/k3-am62a-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am62a-main.dtsi @@ -575,8 +575,6 @@ power-domains = <&k3_pds 57 TI_SCI_PD_EXCLUSIVE>; clocks = <&k3_clks 57 5>, <&k3_clks 57 6>; clock-names = "clk_ahb", "clk_xin"; - assigned-clocks = <&k3_clks 57 6>; - assigned-clock-parents = <&k3_clks 57 8>; bus-width = <8>; mmc-hs200-1_8v; ti,clkbuf-sel = <0x7>; From d8e4a5160dd8ebc4b4243e6cacd79cbb595fe492 Mon Sep 17 00:00:00 2001 From: Judith Mendez Date: Tue, 29 Apr 2025 11:33:37 -0500 Subject: [PATCH 2008/2924] arm64: dts: ti: k3-am62p-j722s-common-main: Set eMMC clock parent to default commit 9c6b73fc72e19c449147233587833ce20f84b660 upstream. Set eMMC clock parents to the defaults which is MAIN_PLL0_HSDIV5_CLKOUT for eMMC. This change is necessary since DM is not implementing the correct procedure to switch PLL clock source for eMMC and MMC CLK mux is not glich-free. As a preventative action, lets switch back to the defaults. Fixes: b5080c7c1f7e ("arm64: dts: ti: k3-am62p: Add nodes for more IPs") Cc: stable@vger.kernel.org Signed-off-by: Judith Mendez Acked-by: Udit Kumar Acked-by: Bryan Brattlof Link: https://lore.kernel.org/r/20250429163337.15634-4-jm@ti.com Signed-off-by: Nishanth Menon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/ti/k3-am62p-j722s-common-main.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/boot/dts/ti/k3-am62p-j722s-common-main.dtsi b/arch/arm64/boot/dts/ti/k3-am62p-j722s-common-main.dtsi index 6e3beb5c2e010e..f9b5c97518d68f 100644 --- a/arch/arm64/boot/dts/ti/k3-am62p-j722s-common-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am62p-j722s-common-main.dtsi @@ -564,8 +564,6 @@ power-domains = <&k3_pds 57 TI_SCI_PD_EXCLUSIVE>; clocks = <&k3_clks 57 1>, <&k3_clks 57 2>; clock-names = "clk_ahb", "clk_xin"; - assigned-clocks = <&k3_clks 57 2>; - assigned-clock-parents = <&k3_clks 57 4>; bus-width = <8>; mmc-ddr-1_8v; mmc-hs200-1_8v; From 78cac4df997b38b54bea6151b7b72515738d3198 Mon Sep 17 00:00:00 2001 From: Yemike Abhilash Chandra Date: Tue, 15 Apr 2025 16:43:26 +0530 Subject: [PATCH 2009/2924] arm64: dts: ti: k3-am62x: Remove clock-names property from IMX219 overlay commit c68ab54a89a8c935732589a35ea2596e2329f167 upstream. The IMX219 sensor device tree bindings do not include a clock-names property. Remove the incorrectly added clock-names entry to avoid dtbs_check warnings. Fixes: 4111db03dc05 ("arm64: dts: ti: k3-am62x: Add overlay for IMX219") Cc: stable@vger.kernel.org Signed-off-by: Yemike Abhilash Chandra Reviewed-by: Neha Malcom Francis Reviewed-by: Jai Luthra Link: https://lore.kernel.org/r/20250415111328.3847502-6-y-abhilashchandra@ti.com Signed-off-by: Nishanth Menon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-imx219.dtso | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-imx219.dtso b/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-imx219.dtso index 76ca02127f95ff..7a0d35eb04d33d 100644 --- a/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-imx219.dtso +++ b/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-imx219.dtso @@ -39,7 +39,6 @@ reg = <0x10>; clocks = <&clk_imx219_fixed>; - clock-names = "xclk"; reset-gpios = <&exp1 13 GPIO_ACTIVE_HIGH>; From 9bb7bdeb9e0b9d8103e89aee211bfd094cbcf0fd Mon Sep 17 00:00:00 2001 From: Yemike Abhilash Chandra Date: Tue, 15 Apr 2025 16:43:27 +0530 Subject: [PATCH 2010/2924] arm64: dts: ti: k3-am62x: Rename I2C switch to I2C mux in IMX219 overlay commit 7b75dd2029ee01a8c11fcf4d97f3ccebbef9f8eb upstream. The IMX219 device tree overlay incorrectly defined an I2C switch instead of an I2C mux. According to the DT bindings, the correct terminology and node definition should use "i2c-mux" instead of "i2c-switch". Hence, update the same to avoid dtbs_check warnings. Fixes: 4111db03dc05 ("arm64: dts: ti: k3-am62x: Add overlay for IMX219") Cc: stable@vger.kernel.org Signed-off-by: Yemike Abhilash Chandra Reviewed-by: Neha Malcom Francis Reviewed-by: Jai Luthra Link: https://lore.kernel.org/r/20250415111328.3847502-7-y-abhilashchandra@ti.com Signed-off-by: Nishanth Menon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-imx219.dtso | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-imx219.dtso b/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-imx219.dtso index 7a0d35eb04d33d..dd090813a32d61 100644 --- a/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-imx219.dtso +++ b/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-imx219.dtso @@ -22,7 +22,7 @@ #size-cells = <0>; status = "okay"; - i2c-switch@71 { + i2c-mux@71 { compatible = "nxp,pca9543"; #address-cells = <1>; #size-cells = <0>; From fb07d9e2fa938226232c881b34845b2efd3e3825 Mon Sep 17 00:00:00 2001 From: Yemike Abhilash Chandra Date: Tue, 15 Apr 2025 16:43:28 +0530 Subject: [PATCH 2011/2924] arm64: dts: ti: k3-am62x: Rename I2C switch to I2C mux in OV5640 overlay commit b22cc402d38774ccc552d18e762c25dde02f7be0 upstream. The OV5640 device tree overlay incorrectly defined an I2C switch instead of an I2C mux. According to the DT bindings, the correct terminology and node definition should use "i2c-mux" instead of "i2c-switch". Hence, update the same to avoid dtbs_check warnings. Fixes: 635ed9715194 ("arm64: dts: ti: k3-am62x: Add overlays for OV5640") Cc: stable@vger.kernel.org Signed-off-by: Yemike Abhilash Chandra Reviewed-by: Neha Malcom Francis Reviewed-by: Jai Luthra Link: https://lore.kernel.org/r/20250415111328.3847502-8-y-abhilashchandra@ti.com Signed-off-by: Nishanth Menon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-ov5640.dtso | 2 +- arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-tevi-ov5640.dtso | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-ov5640.dtso b/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-ov5640.dtso index ccc7f5e43184fa..7fc7c95f5cd578 100644 --- a/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-ov5640.dtso +++ b/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-ov5640.dtso @@ -22,7 +22,7 @@ #size-cells = <0>; status = "okay"; - i2c-switch@71 { + i2c-mux@71 { compatible = "nxp,pca9543"; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-tevi-ov5640.dtso b/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-tevi-ov5640.dtso index 4eaf9d757dd0ad..b6bfdfbbdd984a 100644 --- a/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-tevi-ov5640.dtso +++ b/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-tevi-ov5640.dtso @@ -22,7 +22,7 @@ #size-cells = <0>; status = "okay"; - i2c-switch@71 { + i2c-mux@71 { compatible = "nxp,pca9543"; #address-cells = <1>; #size-cells = <0>; From 5ebcc420f3256ee407e2254c4a9fc02834dae635 Mon Sep 17 00:00:00 2001 From: Judith Mendez Date: Tue, 29 Apr 2025 12:30:08 -0500 Subject: [PATCH 2012/2924] arm64: dts: ti: k3-am65-main: Add missing taps to sdhci0 commit f55c9f087cc2e2252d44ffd9d58def2066fc176e upstream. For am65x, add missing ITAPDLYSEL values for Default Speed and High Speed SDR modes to sdhci0 node according to the device datasheet [0]. [0] https://www.ti.com/lit/gpn/am6548 Fixes: eac99d38f861 ("arm64: dts: ti: k3-am654-main: Update otap-del-sel values") Cc: stable@vger.kernel.org Signed-off-by: Judith Mendez Reviewed-by: Moteen Shah Link: https://lore.kernel.org/r/20250429173009.33994-1-jm@ti.com Signed-off-by: Nishanth Menon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/ti/k3-am65-main.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi index 94a812a1355baf..5ebf7ada6e4851 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi @@ -449,6 +449,8 @@ ti,otap-del-sel-mmc-hs = <0x0>; ti,otap-del-sel-ddr52 = <0x5>; ti,otap-del-sel-hs200 = <0x5>; + ti,itap-del-sel-legacy = <0xa>; + ti,itap-del-sel-mmc-hs = <0x1>; ti,itap-del-sel-ddr52 = <0x0>; dma-coherent; status = "disabled"; From e1ea3de2cadf8222a5229bf7d0668936f0b3a617 Mon Sep 17 00:00:00 2001 From: Yemike Abhilash Chandra Date: Tue, 15 Apr 2025 16:43:23 +0530 Subject: [PATCH 2013/2924] arm64: dts: ti: k3-am68-sk: Fix regulator hierarchy commit 7edf0a4d3bb7f5cd84f172b76c380c4259bb4ef8 upstream. Update the vin-supply of the TLV71033 regulator from LM5141 (vsys_3v3) to LM61460 (vsys_5v0) to match the schematics. Add a fixed regulator node for the LM61460 5V supply to support this change. AM68-SK schematics: https://www.ti.com/lit/zip/sprr463 Fixes: a266c180b398 ("arm64: dts: ti: k3-am68-sk: Add support for AM68 SK base board") Cc: stable@vger.kernel.org Signed-off-by: Yemike Abhilash Chandra Reviewed-by: Neha Malcom Francis Reviewed-by: Udit Kumar Link: https://lore.kernel.org/r/20250415111328.3847502-3-y-abhilashchandra@ti.com Signed-off-by: Nishanth Menon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/ti/k3-am68-sk-base-board.dts | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/ti/k3-am68-sk-base-board.dts b/arch/arm64/boot/dts/ti/k3-am68-sk-base-board.dts index 11522b36e0cece..5fa70a874d7b4d 100644 --- a/arch/arm64/boot/dts/ti/k3-am68-sk-base-board.dts +++ b/arch/arm64/boot/dts/ti/k3-am68-sk-base-board.dts @@ -44,6 +44,17 @@ regulator-boot-on; }; + vsys_5v0: regulator-vsys5v0 { + /* Output of LM61460 */ + compatible = "regulator-fixed"; + regulator-name = "vsys_5v0"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + vin-supply = <&vusb_main>; + regulator-always-on; + regulator-boot-on; + }; + vsys_3v3: regulator-vsys3v3 { /* Output of LM5141 */ compatible = "regulator-fixed"; @@ -76,7 +87,7 @@ regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; regulator-boot-on; - vin-supply = <&vsys_3v3>; + vin-supply = <&vsys_5v0>; gpios = <&main_gpio0 49 GPIO_ACTIVE_HIGH>; states = <1800000 0x0>, <3300000 0x1>; From 1670b1dc19f37ffa662097f3ec8a9bb918493412 Mon Sep 17 00:00:00 2001 From: Yemike Abhilash Chandra Date: Tue, 15 Apr 2025 16:43:22 +0530 Subject: [PATCH 2014/2924] arm64: dts: ti: k3-j721e-sk: Add DT nodes for power regulators commit 97b67cc102dc2cc8aa39a569c22a196e21af5a21 upstream. Add device tree nodes for two power regulators on the J721E SK board. vsys_5v0: A fixed regulator representing the 5V supply output from the LM61460 and vdd_sd_dv: A GPIO-controlled TLV71033 regulator. J721E-SK schematics: https://www.ti.com/lit/zip/sprr438 Fixes: 1bfda92a3a36 ("arm64: dts: ti: Add support for J721E SK") Cc: stable@vger.kernel.org Signed-off-by: Yemike Abhilash Chandra Reviewed-by: Udit Kumar Link: https://lore.kernel.org/r/20250415111328.3847502-2-y-abhilashchandra@ti.com Signed-off-by: Nishanth Menon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/ti/k3-j721e-sk.dts | 31 ++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/arch/arm64/boot/dts/ti/k3-j721e-sk.dts b/arch/arm64/boot/dts/ti/k3-j721e-sk.dts index 440ef57be2943c..ffef3d1cfd5532 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-sk.dts +++ b/arch/arm64/boot/dts/ti/k3-j721e-sk.dts @@ -184,6 +184,17 @@ regulator-boot-on; }; + vsys_5v0: fixedregulator-vsys5v0 { + /* Output of LM61460 */ + compatible = "regulator-fixed"; + regulator-name = "vsys_5v0"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + vin-supply = <&vusb_main>; + regulator-always-on; + regulator-boot-on; + }; + vdd_mmc1: fixedregulator-sd { compatible = "regulator-fixed"; pinctrl-names = "default"; @@ -211,6 +222,20 @@ <3300000 0x1>; }; + vdd_sd_dv: gpio-regulator-TLV71033 { + compatible = "regulator-gpio"; + pinctrl-names = "default"; + pinctrl-0 = <&vdd_sd_dv_pins_default>; + regulator-name = "tlv71033"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <3300000>; + regulator-boot-on; + vin-supply = <&vsys_5v0>; + gpios = <&main_gpio0 118 GPIO_ACTIVE_HIGH>; + states = <1800000 0x0>, + <3300000 0x1>; + }; + transceiver1: can-phy1 { compatible = "ti,tcan1042"; #phy-cells = <0>; @@ -613,6 +638,12 @@ >; }; + vdd_sd_dv_pins_default: vdd-sd-dv-default-pins { + pinctrl-single,pins = < + J721E_IOPAD(0x1dc, PIN_OUTPUT, 7) /* (Y1) SPI1_CLK.GPIO0_118 */ + >; + }; + wkup_uart0_pins_default: wkup-uart0-default-pins { pinctrl-single,pins = < J721E_WKUP_IOPAD(0xa0, PIN_INPUT, 0) /* (J29) WKUP_UART0_RXD */ From f2e67d7997f878aefe89d288463f31d5adbda1ef Mon Sep 17 00:00:00 2001 From: Yemike Abhilash Chandra Date: Tue, 15 Apr 2025 16:43:24 +0530 Subject: [PATCH 2015/2924] arm64: dts: ti: k3-j721e-sk: Remove clock-names property from IMX219 overlay commit 24ab76e55ef15450c6681a2b5db4d78f45200939 upstream. The IMX219 sensor device tree bindings do not include a clock-names property. Remove the incorrectly added clock-names entry to avoid dtbs_check warnings. Fixes: f767eb918096 ("arm64: dts: ti: k3-j721e-sk: Add overlay for IMX219") Cc: stable@vger.kernel.org Signed-off-by: Yemike Abhilash Chandra Reviewed-by: Neha Malcom Francis Reviewed-by: Jai Luthra Link: https://lore.kernel.org/r/20250415111328.3847502-4-y-abhilashchandra@ti.com Signed-off-by: Nishanth Menon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/ti/k3-j721e-sk-csi2-dual-imx219.dtso | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/boot/dts/ti/k3-j721e-sk-csi2-dual-imx219.dtso b/arch/arm64/boot/dts/ti/k3-j721e-sk-csi2-dual-imx219.dtso index 47bb5480b5b006..4a395d1209c8f8 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-sk-csi2-dual-imx219.dtso +++ b/arch/arm64/boot/dts/ti/k3-j721e-sk-csi2-dual-imx219.dtso @@ -34,7 +34,6 @@ reg = <0x10>; clocks = <&clk_imx219_fixed>; - clock-names = "xclk"; port { csi2_cam0: endpoint { @@ -56,7 +55,6 @@ reg = <0x10>; clocks = <&clk_imx219_fixed>; - clock-names = "xclk"; port { csi2_cam1: endpoint { From 541a84233852e00d4eccfab4c8530679635966c1 Mon Sep 17 00:00:00 2001 From: Yemike Abhilash Chandra Date: Tue, 15 Apr 2025 16:43:25 +0530 Subject: [PATCH 2016/2924] arm64: dts: ti: k3-j721e-sk: Add requiried voltage supplies for IMX219 commit c6a20a250200da6fcaf80fe945b7b92cba8cfe0f upstream. The device tree overlay for the IMX219 sensor requires three voltage supplies to be defined: VANA (analog), VDIG (digital core), and VDDL (digital I/O). Add the corresponding voltage supply definitions to avoid dtbs_check warnings. Fixes: f767eb918096 ("arm64: dts: ti: k3-j721e-sk: Add overlay for IMX219") Cc: stable@vger.kernel.org Signed-off-by: Yemike Abhilash Chandra Link: https://lore.kernel.org/r/20250415111328.3847502-5-y-abhilashchandra@ti.com Signed-off-by: Nishanth Menon Signed-off-by: Greg Kroah-Hartman --- .../dts/ti/k3-j721e-sk-csi2-dual-imx219.dtso | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/arch/arm64/boot/dts/ti/k3-j721e-sk-csi2-dual-imx219.dtso b/arch/arm64/boot/dts/ti/k3-j721e-sk-csi2-dual-imx219.dtso index 4a395d1209c8f8..4eb3cffab0321d 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-sk-csi2-dual-imx219.dtso +++ b/arch/arm64/boot/dts/ti/k3-j721e-sk-csi2-dual-imx219.dtso @@ -19,6 +19,33 @@ #clock-cells = <0>; clock-frequency = <24000000>; }; + + reg_2p8v: regulator-2p8v { + compatible = "regulator-fixed"; + regulator-name = "2P8V"; + regulator-min-microvolt = <2800000>; + regulator-max-microvolt = <2800000>; + vin-supply = <&vdd_sd_dv>; + regulator-always-on; + }; + + reg_1p8v: regulator-1p8v { + compatible = "regulator-fixed"; + regulator-name = "1P8V"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + vin-supply = <&vdd_sd_dv>; + regulator-always-on; + }; + + reg_1p2v: regulator-1p2v { + compatible = "regulator-fixed"; + regulator-name = "1P2V"; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <1200000>; + vin-supply = <&vdd_sd_dv>; + regulator-always-on; + }; }; &csi_mux { @@ -34,6 +61,9 @@ reg = <0x10>; clocks = <&clk_imx219_fixed>; + VANA-supply = <®_2p8v>; + VDIG-supply = <®_1p8v>; + VDDL-supply = <®_1p2v>; port { csi2_cam0: endpoint { @@ -55,6 +85,9 @@ reg = <0x10>; clocks = <&clk_imx219_fixed>; + VANA-supply = <®_2p8v>; + VDIG-supply = <®_1p8v>; + VDDL-supply = <®_1p2v>; port { csi2_cam1: endpoint { From 9e12789dd705d7412a398acedee53f66e159bbbb Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Thu, 17 Apr 2025 18:02:43 +0530 Subject: [PATCH 2017/2924] arm64: dts: ti: k3-j722s-evm: Enable "serdes_wiz0" and "serdes_wiz1" commit 9d76be5828be44ed7a104cc21b4f875be4a63322 upstream. In preparation for disabling "serdes_wiz0" and "serdes_wiz1" device-tree nodes in the SoC file, enable them in the board file. The motivation for this change is that of following the existing convention of disabling nodes in the SoC file and only enabling the required ones in the board file. Fixes: 485705df5d5f ("arm64: dts: ti: k3-j722s: Enable PCIe and USB support on J722S-EVM") Cc: stable@vger.kernel.org Signed-off-by: Siddharth Vadapalli Reviewed-by: Udit Kumar Link: https://lore.kernel.org/r/20250417123246.2733923-2-s-vadapalli@ti.com Signed-off-by: Nishanth Menon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/ti/k3-j722s-evm.dts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/boot/dts/ti/k3-j722s-evm.dts b/arch/arm64/boot/dts/ti/k3-j722s-evm.dts index 2127316f36a34b..0bf2e182166244 100644 --- a/arch/arm64/boot/dts/ti/k3-j722s-evm.dts +++ b/arch/arm64/boot/dts/ti/k3-j722s-evm.dts @@ -843,6 +843,10 @@ ; }; +&serdes_wiz0 { + status = "okay"; +}; + &serdes0 { status = "okay"; serdes0_usb_link: phy@0 { @@ -854,6 +858,10 @@ }; }; +&serdes_wiz1 { + status = "okay"; +}; + &serdes1 { status = "okay"; serdes1_pcie_link: phy@0 { From b50410e0c6c0772a0e23b6ce503d04ebacc25da7 Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Thu, 17 Apr 2025 18:02:44 +0530 Subject: [PATCH 2018/2924] arm64: dts: ti: k3-j722s-main: Disable "serdes_wiz0" and "serdes_wiz1" commit 320d8a84f6f045dc876d4c2983f9024c7ac9d6df upstream. Since "serdes0" and "serdes1" which are the sub-nodes of "serdes_wiz0" and "serdes_wiz1" respectively, have been disabled in the SoC file already, and, given that these sub-nodes will only be enabled in a board file if the board utilizes any of the SERDES instances and the peripherals bound to them, we end up in a situation where the board file doesn't explicitly disable "serdes_wiz0" and "serdes_wiz1". As a consequence of this, the following errors show up when booting Linux: wiz bus@f0000:phy@f000000: probe with driver wiz failed with error -12 ... wiz bus@f0000:phy@f010000: probe with driver wiz failed with error -12 To not only fix the above, but also, in order to follow the convention of disabling device-tree nodes in the SoC file and enabling them in the board files for those boards which require them, disable "serdes_wiz0" and "serdes_wiz1" device-tree nodes. Fixes: 628e0a0118e6 ("arm64: dts: ti: k3-j722s-main: Add SERDES and PCIe support") Cc: stable@vger.kernel.org Signed-off-by: Siddharth Vadapalli Reviewed-by: Udit Kumar Link: https://lore.kernel.org/r/20250417123246.2733923-3-s-vadapalli@ti.com Signed-off-by: Nishanth Menon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/ti/k3-j722s-main.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/ti/k3-j722s-main.dtsi b/arch/arm64/boot/dts/ti/k3-j722s-main.dtsi index 6850f50530f12b..beda9e40e931b4 100644 --- a/arch/arm64/boot/dts/ti/k3-j722s-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j722s-main.dtsi @@ -32,6 +32,8 @@ assigned-clocks = <&k3_clks 279 1>; assigned-clock-parents = <&k3_clks 279 5>; + status = "disabled"; + serdes0: serdes@f000000 { compatible = "ti,j721e-serdes-10g"; reg = <0x0f000000 0x00010000>; @@ -70,6 +72,8 @@ assigned-clocks = <&k3_clks 280 1>; assigned-clock-parents = <&k3_clks 280 5>; + status = "disabled"; + serdes1: serdes@f010000 { compatible = "ti,j721e-serdes-10g"; reg = <0x0f010000 0x00010000>; From b04cd984d00966304545686bb1c47d7bffc8f37a Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Wed, 23 Apr 2025 20:46:12 +0530 Subject: [PATCH 2019/2924] arm64: dts: ti: k3-j784s4-j742s2-main-common: Fix length of serdes_ln_ctrl commit 3b62bd1fde50d54cc59015e14869e6cc3d6899e0 upstream. Commit under Fixes corrected the "mux-reg-masks" property but did not update the "length" field of the "reg" property to account for the newly added register offsets which extend the region. Fix this. Fixes: 38e7f9092efb ("arm64: dts: ti: k3-j784s4-j742s2-main-common: Fix serdes_ln_ctrl reg-masks") Cc: stable@vger.kernel.org Signed-off-by: Siddharth Vadapalli Reviewed-by: Udit Kumar Link: https://lore.kernel.org/r/20250423151612.48848-1-s-vadapalli@ti.com Signed-off-by: Nishanth Menon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/ti/k3-j784s4-j742s2-main-common.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/ti/k3-j784s4-j742s2-main-common.dtsi b/arch/arm64/boot/dts/ti/k3-j784s4-j742s2-main-common.dtsi index 1944616ab3579a..1fc0a11c5ab4a9 100644 --- a/arch/arm64/boot/dts/ti/k3-j784s4-j742s2-main-common.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j784s4-j742s2-main-common.dtsi @@ -77,7 +77,7 @@ serdes_ln_ctrl: mux-controller@4080 { compatible = "reg-mux"; - reg = <0x00004080 0x30>; + reg = <0x00004080 0x50>; #mux-control-cells = <1>; mux-reg-masks = <0x0 0x3>, <0x4 0x3>, /* SERDES0 lane0/1 select */ <0x8 0x3>, <0xc 0x3>, /* SERDES0 lane2/3 select */ From 39ed887b1dd2d6b720f87e86692ac3006cc111c8 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Thu, 22 May 2025 15:14:47 -0300 Subject: [PATCH 2020/2924] net_sched: hfsc: Address reentrant enqueue adding class to eltree twice commit ac9fe7dd8e730a103ae4481147395cc73492d786 upstream. Savino says: "We are writing to report that this recent patch (141d34391abbb315d68556b7c67ad97885407547) [1] can be bypassed, and a UAF can still occur when HFSC is utilized with NETEM. The patch only checks the cl->cl_nactive field to determine whether it is the first insertion or not [2], but this field is only incremented by init_vf [3]. By using HFSC_RSC (which uses init_ed) [4], it is possible to bypass the check and insert the class twice in the eltree. Under normal conditions, this would lead to an infinite loop in hfsc_dequeue for the reasons we already explained in this report [5]. However, if TBF is added as root qdisc and it is configured with a very low rate, it can be utilized to prevent packets from being dequeued. This behavior can be exploited to perform subsequent insertions in the HFSC eltree and cause a UAF." To fix both the UAF and the infinite loop, with netem as an hfsc child, check explicitly in hfsc_enqueue whether the class is already in the eltree whenever the HFSC_RSC flag is set. [1] https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=141d34391abbb315d68556b7c67ad97885407547 [2] https://elixir.bootlin.com/linux/v6.15-rc5/source/net/sched/sch_hfsc.c#L1572 [3] https://elixir.bootlin.com/linux/v6.15-rc5/source/net/sched/sch_hfsc.c#L677 [4] https://elixir.bootlin.com/linux/v6.15-rc5/source/net/sched/sch_hfsc.c#L1574 [5] https://lore.kernel.org/netdev/8DuRWwfqjoRDLDmBMlIfbrsZg9Gx50DHJc1ilxsEBNe2D6NMoigR_eIRIG0LOjMc3r10nUUZtArXx4oZBIdUfZQrwjcQhdinnMis_0G7VEk=@willsroot.io/T/#u Fixes: 37d9cf1a3ce3 ("sched: Fix detection of empty queues in child qdiscs") Reported-by: Savino Dicanosa Reported-by: William Liu Acked-by: Jamal Hadi Salim Tested-by: Victor Nogueira Signed-off-by: Pedro Tammela Link: https://patch.msgid.link/20250522181448.1439717-2-pctammela@mojatatu.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_hfsc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 7986145a527cbe..5a7745170e84b1 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -175,6 +175,11 @@ struct hfsc_sched { #define HT_INFINITY 0xffffffffffffffffULL /* infinite time value */ +static bool cl_in_el_or_vttree(struct hfsc_class *cl) +{ + return ((cl->cl_flags & HFSC_FSC) && cl->cl_nactive) || + ((cl->cl_flags & HFSC_RSC) && !RB_EMPTY_NODE(&cl->el_node)); +} /* * eligible tree holds backlogged classes being sorted by their eligible times. @@ -1040,6 +1045,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl == NULL) return -ENOBUFS; + RB_CLEAR_NODE(&cl->el_node); + err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); if (err) { kfree(cl); @@ -1572,7 +1579,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) sch->qstats.backlog += len; sch->q.qlen++; - if (first && !cl->cl_nactive) { + if (first && !cl_in_el_or_vttree(cl)) { if (cl->cl_flags & HFSC_RSC) init_ed(cl, len); if (cl->cl_flags & HFSC_FSC) From 5ca39bf6f8aaef5a9cd777446e08b6f3f4f1653a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 8 May 2025 16:16:40 +0100 Subject: [PATCH 2021/2924] perf/arm-cmn: Fix REQ2/SNP2 mixup commit 11b0f576e0cbde6a12258f2af6753b17b8df342b upstream. Somehow the encodings for REQ2/SNP2 channels in XP events got mixed up... Unmix them. CC: stable@vger.kernel.org Fixes: 23760a014417 ("perf/arm-cmn: Add CMN-700 support") Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/087023e9737ac93d7ec7a841da904758c254cb01.1746717400.git.robin.murphy@arm.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- drivers/perf/arm-cmn.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index d4fe30ff225b6a..aa290831355876 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -727,8 +727,8 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, if ((chan == 5 && cmn->rsp_vc_num < 2) || (chan == 6 && cmn->dat_vc_num < 2) || - (chan == 7 && cmn->snp_vc_num < 2) || - (chan == 8 && cmn->req_vc_num < 2)) + (chan == 7 && cmn->req_vc_num < 2) || + (chan == 8 && cmn->snp_vc_num < 2)) return 0; } @@ -882,8 +882,8 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, _CMN_EVENT_XP(pub_##_name, (_event) | (4 << 5)), \ _CMN_EVENT_XP(rsp2_##_name, (_event) | (5 << 5)), \ _CMN_EVENT_XP(dat2_##_name, (_event) | (6 << 5)), \ - _CMN_EVENT_XP(snp2_##_name, (_event) | (7 << 5)), \ - _CMN_EVENT_XP(req2_##_name, (_event) | (8 << 5)) + _CMN_EVENT_XP(req2_##_name, (_event) | (7 << 5)), \ + _CMN_EVENT_XP(snp2_##_name, (_event) | (8 << 5)) #define CMN_EVENT_XP_DAT(_name, _event) \ _CMN_EVENT_XP_PORT(dat_##_name, (_event) | (3 << 5)), \ From 37a2ff8b5c4077616981c919c3ad3bdca0093dcc Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 12 May 2025 18:11:54 +0100 Subject: [PATCH 2022/2924] perf/arm-cmn: Initialise cmn->cpu earlier commit 597704e201068db3d104de3c7a4d447ff8209127 upstream. For all the complexity of handling affinity for CPU hotplug, what we've apparently managed to overlook is that arm_cmn_init_irqs() has in fact always been setting the *initial* affinity of all IRQs to CPU 0, not the CPU we subsequently choose for event scheduling. Oh dear. Cc: stable@vger.kernel.org Fixes: 0ba64770a2f2 ("perf: Add Arm CMN-600 PMU driver") Signed-off-by: Robin Murphy Reviewed-by: Ilkka Koskinen Link: https://lore.kernel.org/r/b12fccba6b5b4d2674944f59e4daad91cd63420b.1747069914.git.robin.murphy@arm.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- drivers/perf/arm-cmn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index aa290831355876..e385f187a084dc 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -2558,6 +2558,7 @@ static int arm_cmn_probe(struct platform_device *pdev) cmn->dev = &pdev->dev; cmn->part = (unsigned long)device_get_match_data(cmn->dev); + cmn->cpu = cpumask_local_spread(0, dev_to_node(cmn->dev)); platform_set_drvdata(pdev, cmn); if (cmn->part == PART_CMN600 && has_acpi_companion(cmn->dev)) { @@ -2585,7 +2586,6 @@ static int arm_cmn_probe(struct platform_device *pdev) if (err) return err; - cmn->cpu = cpumask_local_spread(0, dev_to_node(cmn->dev)); cmn->pmu = (struct pmu) { .module = THIS_MODULE, .parent = cmn->dev, From 6d64b337e0c6fa2d85e840008592ba57907e2519 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 19 May 2025 11:56:04 +0100 Subject: [PATCH 2023/2924] perf/arm-cmn: Add CMN S3 ACPI binding commit 8c138a189f6db295ceb32258d46ac061df0823e5 upstream. An ACPI binding for CMN S3 was not yet finalised when the driver support was originally written, but v1.2 of DEN0093 "ACPI for Arm Components" has at last been published; support ACPI systems using the proper HID. Cc: stable@vger.kernel.org Fixes: 0dc2f4963f7e ("perf/arm-cmn: Support CMN S3") Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/7dafe147f186423020af49d7037552ee59c60e97.1747652164.git.robin.murphy@arm.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- drivers/perf/arm-cmn.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index e385f187a084dc..403850b1040d3d 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -2651,6 +2651,7 @@ static const struct acpi_device_id arm_cmn_acpi_match[] = { { "ARMHC600", PART_CMN600 }, { "ARMHC650" }, { "ARMHC700" }, + { "ARMHC003" }, {} }; MODULE_DEVICE_TABLE(acpi, arm_cmn_acpi_match); From 24aed3a922a52540ff919b3de0ab020c305e6234 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 14 Apr 2025 15:55:05 +0200 Subject: [PATCH 2024/2924] pidfs: move O_RDWR into pidfs_alloc_file() commit c57f07b235871c9e5bffaccd458dca2d9a62b164 upstream. Since all pidfds must be O_RDWR currently enfore that directly in the file allocation function itself instead of letting callers specify it. Link: https://lore.kernel.org/20250414-work-coredump-v2-1-685bf231f828@kernel.org Tested-by: Luca Boccassi Reviewed-by: Oleg Nesterov Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- fs/pidfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/pidfs.c b/fs/pidfs.c index d64a4cbeb0dafa..50e69a9e104a60 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -888,6 +888,7 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags) return ERR_PTR(-ESRCH); flags &= ~PIDFD_CLONE; + flags |= O_RDWR; pidfd_file = dentry_open(&path, flags, current_cred()); /* Raise PIDFD_THREAD explicitly as do_dentry_open() strips it. */ if (!IS_ERR(pidfd_file)) From c13640f95bafdfc8bbd6ce1f6949690a85505448 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 14 Apr 2025 15:55:06 +0200 Subject: [PATCH 2025/2924] coredump: fix error handling for replace_fd() commit 95c5f43181fe9c1b5e5a4bd3281c857a5259991f upstream. The replace_fd() helper returns the file descriptor number on success and a negative error code on failure. The current error handling in umh_pipe_setup() only works because the file descriptor that is replaced is zero but that's pretty volatile. Explicitly check for a negative error code. Link: https://lore.kernel.org/20250414-work-coredump-v2-2-685bf231f828@kernel.org Tested-by: Luca Boccassi Reviewed-by: Oleg Nesterov Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- fs/coredump.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index c33c177a701b3d..9da592aa8f1623 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -507,7 +507,9 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) { struct file *files[2]; struct coredump_params *cp = (struct coredump_params *)info->data; - int err = create_pipe_files(files, 0); + int err; + + err = create_pipe_files(files, 0); if (err) return err; @@ -515,10 +517,13 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) err = replace_fd(0, files[0], 0); fput(files[0]); + if (err < 0) + return err; + /* and disallow core files too */ current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; - return err; + return 0; } void do_coredump(const kernel_siginfo_t *siginfo) From 25f950dc53fe242d39980862d071a7b2a0134682 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 14 Apr 2025 15:55:07 +0200 Subject: [PATCH 2026/2924] coredump: hand a pidfd to the usermode coredump helper commit b5325b2a270fcaf7b2a9a0f23d422ca8a5a8bdea upstream. Give userspace a way to instruct the kernel to install a pidfd into the usermode helper process. This makes coredump handling a lot more reliable for userspace. In parallel with this commit we already have systemd adding support for this in [1]. We create a pidfs file for the coredumping process when we process the corename pattern. When the usermode helper process is forked we then install the pidfs file as file descriptor three into the usermode helpers file descriptor table so it's available to the exec'd program. Since usermode helpers are either children of the system_unbound_wq workqueue or kthreadd we know that the file descriptor table is empty and can thus always use three as the file descriptor number. Note, that we'll install a pidfd for the thread-group leader even if a subthread is calling do_coredump(). We know that task linkage hasn't been removed due to delay_group_leader() and even if this @current isn't the actual thread-group leader we know that the thread-group leader cannot be reaped until @current has exited. Link: https://github.com/systemd/systemd/pull/37125 [1] Link: https://lore.kernel.org/20250414-work-coredump-v2-3-685bf231f828@kernel.org Tested-by: Luca Boccassi Reviewed-by: Oleg Nesterov Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- fs/coredump.c | 56 +++++++++++++++++++++++++++++++++++++--- include/linux/coredump.h | 1 + 2 files changed, 53 insertions(+), 4 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index 9da592aa8f1623..d740a04112663c 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -43,6 +43,8 @@ #include #include #include +#include +#include #include #include @@ -60,6 +62,12 @@ static void free_vma_snapshot(struct coredump_params *cprm); #define CORE_FILE_NOTE_SIZE_DEFAULT (4*1024*1024) /* Define a reasonable max cap */ #define CORE_FILE_NOTE_SIZE_MAX (16*1024*1024) +/* + * File descriptor number for the pidfd for the thread-group leader of + * the coredumping task installed into the usermode helper's file + * descriptor table. + */ +#define COREDUMP_PIDFD_NUMBER 3 static int core_uses_pid; static unsigned int core_pipe_limit; @@ -339,6 +347,27 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm, case 'C': err = cn_printf(cn, "%d", cprm->cpu); break; + /* pidfd number */ + case 'F': { + /* + * Installing a pidfd only makes sense if + * we actually spawn a usermode helper. + */ + if (!ispipe) + break; + + /* + * Note that we'll install a pidfd for the + * thread-group leader. We know that task + * linkage hasn't been removed yet and even if + * this @current isn't the actual thread-group + * leader we know that the thread-group leader + * cannot be reaped until @current has exited. + */ + cprm->pid = task_tgid(current); + err = cn_printf(cn, "%d", COREDUMP_PIDFD_NUMBER); + break; + } default: break; } @@ -493,7 +522,7 @@ static void wait_for_dump_helpers(struct file *file) } /* - * umh_pipe_setup + * umh_coredump_setup * helper function to customize the process used * to collect the core in userspace. Specifically * it sets up a pipe and installs it as fd 0 (stdin) @@ -503,12 +532,31 @@ static void wait_for_dump_helpers(struct file *file) * is a special value that we use to trap recursive * core dumps */ -static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) +static int umh_coredump_setup(struct subprocess_info *info, struct cred *new) { struct file *files[2]; struct coredump_params *cp = (struct coredump_params *)info->data; int err; + if (cp->pid) { + struct file *pidfs_file __free(fput) = NULL; + + pidfs_file = pidfs_alloc_file(cp->pid, 0); + if (IS_ERR(pidfs_file)) + return PTR_ERR(pidfs_file); + + /* + * Usermode helpers are childen of either + * system_unbound_wq or of kthreadd. So we know that + * we're starting off with a clean file descriptor + * table. So we should always be able to use + * COREDUMP_PIDFD_NUMBER as our file descriptor value. + */ + err = replace_fd(COREDUMP_PIDFD_NUMBER, pidfs_file, 0); + if (err < 0) + return err; + } + err = create_pipe_files(files, 0); if (err) return err; @@ -598,7 +646,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) } if (cprm.limit == 1) { - /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. + /* See umh_coredump_setup() which sets RLIMIT_CORE = 1. * * Normally core limits are irrelevant to pipes, since * we're not writing to the file system, but we use @@ -637,7 +685,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) retval = -ENOMEM; sub_info = call_usermodehelper_setup(helper_argv[0], helper_argv, NULL, GFP_KERNEL, - umh_pipe_setup, NULL, &cprm); + umh_coredump_setup, NULL, &cprm); if (sub_info) retval = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC); diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 77e6e195d1d687..76e41805b92de9 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -28,6 +28,7 @@ struct coredump_params { int vma_count; size_t vma_data_size; struct core_vma_metadata *vma_meta; + struct pid *pid; }; extern unsigned int core_file_note_size_limit; From 9eff7605ebccdba7b3d83bbe9043bb9c14fedb13 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 11 Mar 2025 15:19:25 +0000 Subject: [PATCH 2027/2924] iommu: Avoid introducing more races commit 0c8e9c148e29a983e67060fb4944a8ca79d4362a upstream. Although the lock-juggling is only a temporary workaround, we don't want it to make things avoidably worse. Jason was right to be nervous, since bus_iommu_probe() doesn't care *which* IOMMU instance it's probing for, so it probably is possible for one walk to finish a probe which a different walk started, thus we do want to check for that. Also there's no need to drop the lock just to have of_iommu_configure() do nothing when a fwspec already exists; check that directly and avoid opening a window at all in that (still somewhat likely) case. Suggested-by: Jason Gunthorpe Signed-off-by: Robin Murphy Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/09d901ad11b3a410fbb6e27f7d04ad4609c3fe4a.1741706365.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/iommu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 9d728800a862e6..ee9c62150b5606 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -422,13 +422,15 @@ static int iommu_init_device(struct device *dev) * is buried in the bus dma_configure path. Properly unpicking that is * still a big job, so for now just invoke the whole thing. The device * already having a driver bound means dma_configure has already run and - * either found no IOMMU to wait for, or we're in its replay call right - * now, so either way there's no point calling it again. + * found no IOMMU to wait for, so there's no point calling it again. */ - if (!dev->driver && dev->bus->dma_configure) { + if (!dev->iommu->fwspec && !dev->driver && dev->bus->dma_configure) { mutex_unlock(&iommu_probe_device_lock); dev->bus->dma_configure(dev); mutex_lock(&iommu_probe_device_lock); + /* If another instance finished the job for us, skip it */ + if (!dev->iommu || dev->iommu_group) + return -ENODEV; } /* * At this point, relevant devices either now have a fwspec which will From b5afaa296d5ee43fa1233f255fe652262a96bcc3 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 24 Apr 2025 18:41:28 +0100 Subject: [PATCH 2028/2924] iommu: Handle yet another race around registration commit da33e87bd2bfc63531cf7448a3cd7a3d42182f08 upstream. Next up on our list of race windows to close is another one during iommu_device_register() - it's now OK again for multiple instances to run their bus_iommu_probe() in parallel, but an iommu_probe_device() can still also race against a running bus_iommu_probe(). As Johan has managed to prove, this has now become a lot more visible on DT platforms wth driver_async_probe where a client driver is attempting to probe in parallel with its IOMMU driver - although commit b46064a18810 ("iommu: Handle race with default domain setup") resolves this from the client driver's point of view, this isn't before of_iommu_configure() has had the chance to attempt to "replay" a probe that the bus walk hasn't even tried yet, and so still cause the out-of-order group allocation behaviour that we're trying to clean up (and now warning about). The most reliable thing to do here is to explicitly keep track of the "iommu_device_register() is still running" state, so we can then special-case the ops lookup for the replay path (based on dev->iommu again) to let that think it's still waiting for the IOMMU driver to appear at all. This still leaves the longstanding theoretical case of iommu_bus_notifier() being triggered during bus_iommu_probe(), but it's not so simple to defer a notifier, and nobody's ever reported that being a visible issue, so let's quietly kick that can down the road for now... Reported-by: Johan Hovold Fixes: bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe path") Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/88d54c1b48fed8279aa47d30f3d75173685bb26a.1745516488.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/iommu.c | 26 ++++++++++++++++++-------- include/linux/iommu.h | 2 ++ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index ee9c62150b5606..5bc2fc969494f5 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -277,6 +277,8 @@ int iommu_device_register(struct iommu_device *iommu, err = bus_iommu_probe(iommu_buses[i]); if (err) iommu_device_unregister(iommu); + else + WRITE_ONCE(iommu->ready, true); return err; } EXPORT_SYMBOL_GPL(iommu_device_register); @@ -2832,31 +2834,39 @@ bool iommu_default_passthrough(void) } EXPORT_SYMBOL_GPL(iommu_default_passthrough); -const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode) +static const struct iommu_device *iommu_from_fwnode(const struct fwnode_handle *fwnode) { - const struct iommu_ops *ops = NULL; - struct iommu_device *iommu; + const struct iommu_device *iommu, *ret = NULL; spin_lock(&iommu_device_lock); list_for_each_entry(iommu, &iommu_device_list, list) if (iommu->fwnode == fwnode) { - ops = iommu->ops; + ret = iommu; break; } spin_unlock(&iommu_device_lock); - return ops; + return ret; +} + +const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode) +{ + const struct iommu_device *iommu = iommu_from_fwnode(fwnode); + + return iommu ? iommu->ops : NULL; } int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode) { - const struct iommu_ops *ops = iommu_ops_from_fwnode(iommu_fwnode); + const struct iommu_device *iommu = iommu_from_fwnode(iommu_fwnode); struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - if (!ops) + if (!iommu) return driver_deferred_probe_check_state(dev); + if (!dev->iommu && !READ_ONCE(iommu->ready)) + return -EPROBE_DEFER; if (fwspec) - return ops == iommu_fwspec_ops(fwspec) ? 0 : -EINVAL; + return iommu->ops == iommu_fwspec_ops(fwspec) ? 0 : -EINVAL; if (!dev_iommu_get(dev)) return -ENOMEM; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 3a8d35d41fdad5..4273871845eebb 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -750,6 +750,7 @@ struct iommu_domain_ops { * @dev: struct device for sysfs handling * @singleton_group: Used internally for drivers that have only one group * @max_pasids: number of supported PASIDs + * @ready: set once iommu_device_register() has completed successfully */ struct iommu_device { struct list_head list; @@ -758,6 +759,7 @@ struct iommu_device { struct device *dev; struct iommu_group *singleton_group; u32 max_pasids; + bool ready; }; /** From 3ef49626da6dd67013fc2cf0a4e4c9e158bb59f7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 4 Jun 2025 14:46:27 +0200 Subject: [PATCH 2029/2924] Linux 6.15.1 Link: https://lore.kernel.org/r/20250602134237.940995114@linuxfoundation.org Tested-by: Florian Fainelli Tested-by: Peter Schneider Tested-by: Ron Economos Tested-by: Mark Brown Tested-by: Christian Heusel Tested-by: Takeshi Ogasawara Tested-by: Linux Kernel Functional Testing Tested-by: Shuah Khan Tested-by: Salvatore Bonaccorso Tested-by: Jon Hunter Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c1cd1b5fc269a6..61d69a3fc827cc 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 15 -SUBLEVEL = 0 +SUBLEVEL = 1 EXTRAVERSION = NAME = Baby Opossum Posse From 980f5b6c320998fe9714e10d6f858a13bb88622d Mon Sep 17 00:00:00 2001 From: Pan Taixi Date: Mon, 26 May 2025 09:37:31 +0800 Subject: [PATCH 2030/2924] tracing: Fix compilation warning on arm32 commit 2fbdb6d8e03b70668c0876e635506540ae92ab05 upstream. On arm32, size_t is defined to be unsigned int, while PAGE_SIZE is unsigned long. This hence triggers a compilation warning as min() asserts the type of two operands to be equal. Casting PAGE_SIZE to size_t solves this issue and works on other target architectures as well. Compilation warning details: kernel/trace/trace.c: In function 'tracing_splice_read_pipe': ./include/linux/minmax.h:20:28: warning: comparison of distinct pointer types lacks a cast (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) ^ ./include/linux/minmax.h:26:4: note: in expansion of macro '__typecheck' (__typecheck(x, y) && __no_side_effects(x, y)) ^~~~~~~~~~~ ... kernel/trace/trace.c:6771:8: note: in expansion of macro 'min' min((size_t)trace_seq_used(&iter->seq), ^~~ Cc: stable@vger.kernel.org Link: https://lore.kernel.org/20250526013731.1198030-1-pantaixi@huaweicloud.com Fixes: f5178c41bb43 ("tracing: Fix oob write in trace_seq_to_buffer()") Reviewed-by: Jeongjun Park Signed-off-by: Pan Taixi Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5b8db27fb6ef37..766cb3cd254e05 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6824,7 +6824,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, ret = trace_seq_to_buffer(&iter->seq, page_address(spd.pages[i]), min((size_t)trace_seq_used(&iter->seq), - PAGE_SIZE)); + (size_t)PAGE_SIZE)); if (ret < 0) { __free_page(spd.pages[i]); break; From ca83fbe0b52f8643797e64ececbea41ffe5a9b94 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 29 May 2025 15:40:43 +0200 Subject: [PATCH 2031/2924] Revert "x86/smp: Eliminate mwait_play_dead_cpuid_hint()" commit 70523f335734b0b42f97647556d331edf684c7dc upstream. Revert commit 96040f7273e2 ("x86/smp: Eliminate mwait_play_dead_cpuid_hint()") because it introduced a significant power regression on systems that start with "nosmt" in the kernel command line. Namely, on such systems, SMT siblings permanently go offline early, when cpuidle has not been initialized yet, so after the above commit, hlt_play_dead() is called for them. Later on, when the processor attempts to enter a deep package C-state, including PC10 which is requisite for reaching minimum power in suspend-to-idle, it is not able to do that because of the SMT siblings staying in C1 (which they have been put into by HLT). As a result, the idle power (including power in suspend-to-idle) rises quite dramatically on those systems with all of the possible consequences, which (needless to say) may not be expected by their users. This issue is hard to debug and potentially dangerous, so it needs to be addressed as soon as possible in a way that will work for 6.15.y, hence the revert. Of course, after this revert, the issue that commit 96040f7273e2 attempted to address will be back and it will need to be fixed again later. Fixes: 96040f7273e2 ("x86/smp: Eliminate mwait_play_dead_cpuid_hint()") Reported-by: Todd Brandt Tested-by: Todd Brandt Cc: 6.15+ # 6.15+ Signed-off-by: Rafael J. Wysocki Acked-by: Dave Hansen Link: https://patch.msgid.link/12674167.O9o76ZdvQC@rjwysocki.net Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/smpboot.c | 54 ++++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index d6cf1e23c2a326..96355ab9aed953 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1238,10 +1238,6 @@ void play_dead_common(void) local_irq_disable(); } -/* - * We need to flush the caches before going to sleep, lest we have - * dirty data in our caches when we come back up. - */ void __noreturn mwait_play_dead(unsigned int eax_hint) { struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead); @@ -1287,6 +1283,50 @@ void __noreturn mwait_play_dead(unsigned int eax_hint) } } +/* + * We need to flush the caches before going to sleep, lest we have + * dirty data in our caches when we come back up. + */ +static inline void mwait_play_dead_cpuid_hint(void) +{ + unsigned int eax, ebx, ecx, edx; + unsigned int highest_cstate = 0; + unsigned int highest_subcstate = 0; + int i; + + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) + return; + if (!this_cpu_has(X86_FEATURE_MWAIT)) + return; + if (!this_cpu_has(X86_FEATURE_CLFLUSH)) + return; + + eax = CPUID_LEAF_MWAIT; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + + /* + * eax will be 0 if EDX enumeration is not valid. + * Initialized below to cstate, sub_cstate value when EDX is valid. + */ + if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) { + eax = 0; + } else { + edx >>= MWAIT_SUBSTATE_SIZE; + for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { + if (edx & MWAIT_SUBSTATE_MASK) { + highest_cstate = i; + highest_subcstate = edx & MWAIT_SUBSTATE_MASK; + } + } + eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) | + (highest_subcstate - 1); + } + + mwait_play_dead(eax); +} + /* * Kick all "offline" CPUs out of mwait on kexec(). See comment in * mwait_play_dead(). @@ -1337,9 +1377,9 @@ void native_play_dead(void) play_dead_common(); tboot_shutdown(TB_SHUTDOWN_WFS); - /* Below returns only on error. */ - cpuidle_play_dead(); - hlt_play_dead(); + mwait_play_dead_cpuid_hint(); + if (cpuidle_play_dead()) + hlt_play_dead(); } #else /* ... !CONFIG_HOTPLUG_CPU */ From f1c9331f77e7a826ec2e7b45a52b8cc389348405 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 25 Apr 2025 21:27:58 +0200 Subject: [PATCH 2032/2924] ACPICA: Introduce ACPI_NONSTRING commit 6da5e6f3028d46e4fee7849e85eda681939c630b upstream. ACPICA commit 878823ca20f1987cba0c9d4c1056be0d117ea4fe In order to distinguish character arrays from C Strings (i.e. strings with a terminating NUL character), add support for the "nonstring" attribute provided by GCC. (A better name might be "ACPI_NONCSTRING", but that's the attribute name, so stick to the existing naming convention.) GCC 15's -Wunterminated-string-initialization will warn about truncation of the NUL byte for string initializers unless the destination is marked with "nonstring". Prepare for applying this attribute to the project. Link: https://github.com/acpica/acpica/commit/878823ca Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/1841930.VLH7GnMWUR@rjwysocki.net Signed-off-by: Kees Cook [ rjw: Pick up the tag from Kees ] Signed-off-by: Rafael J. Wysocki Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- include/acpi/actypes.h | 4 ++++ include/acpi/platform/acgcc.h | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 80767e8bf3ad43..f7b3c4a4b7e7c3 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -1327,4 +1327,8 @@ typedef enum { #define ACPI_FLEX_ARRAY(TYPE, NAME) TYPE NAME[0] #endif +#ifndef ACPI_NONSTRING +#define ACPI_NONSTRING /* No terminating NUL character */ +#endif + #endif /* __ACTYPES_H__ */ diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h index 04b4bf62051707..68e9379623e6dc 100644 --- a/include/acpi/platform/acgcc.h +++ b/include/acpi/platform/acgcc.h @@ -72,4 +72,12 @@ TYPE NAME[]; \ } +/* + * Explicitly mark strings that lack a terminating NUL character so + * that ACPICA can be built with -Wunterminated-string-initialization. + */ +#if __has_attribute(__nonstring__) +#define ACPI_NONSTRING __attribute__((__nonstring__)) +#endif + #endif /* __ACGCC_H__ */ From 692b9698a252dd3477585b7bf33b1af7954c637d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 25 Apr 2025 21:28:34 +0200 Subject: [PATCH 2033/2924] ACPICA: Apply ACPI_NONSTRING commit 2b82118845e04c7adf4ece797150c19809bab29b upstream. ACPICA commit ed68cb8e082e3bfbba02814af4fd5a61247f491b Add ACPI_NONSTRING annotations for places found that are using char arrays without a terminating NUL character. These were found during Linux kernel builds and after looking for instances of arrays of size ACPI_NAMESEG_SIZE. Link: https://github.com/acpica/acpica/commit/ed68cb8e Signed-off-by: Kees Cook Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/2039736.usQuhbGJ8B@rjwysocki.net Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpica/aclocal.h | 4 ++-- drivers/acpi/acpica/nsnames.c | 2 +- drivers/acpi/acpica/nsrepair2.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h index 6481c48c22bb7e..b40e9a520618e0 100644 --- a/drivers/acpi/acpica/aclocal.h +++ b/drivers/acpi/acpica/aclocal.h @@ -293,7 +293,7 @@ acpi_status (*acpi_internal_method) (struct acpi_walk_state * walk_state); * expected_return_btypes - Allowed type(s) for the return value */ struct acpi_name_info { - char name[ACPI_NAMESEG_SIZE] __nonstring; + char name[ACPI_NAMESEG_SIZE] ACPI_NONSTRING; u16 argument_list; u8 expected_btypes; }; @@ -370,7 +370,7 @@ typedef acpi_status (*acpi_object_converter) (struct acpi_namespace_node * converted_object); struct acpi_simple_repair_info { - char name[ACPI_NAMESEG_SIZE] __nonstring; + char name[ACPI_NAMESEG_SIZE] ACPI_NONSTRING; u32 unexpected_btypes; u32 package_index; acpi_object_converter object_converter; diff --git a/drivers/acpi/acpica/nsnames.c b/drivers/acpi/acpica/nsnames.c index d91153f6570053..22aeeeb56cffdb 100644 --- a/drivers/acpi/acpica/nsnames.c +++ b/drivers/acpi/acpica/nsnames.c @@ -194,7 +194,7 @@ acpi_ns_build_normalized_path(struct acpi_namespace_node *node, char *full_path, u32 path_size, u8 no_trailing) { u32 length = 0, i; - char name[ACPI_NAMESEG_SIZE]; + char name[ACPI_NAMESEG_SIZE] ACPI_NONSTRING; u8 do_no_trailing; char c, *left, *right; struct acpi_namespace_node *next_node; diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c index 330b5e4711daca..0075fc80d49843 100644 --- a/drivers/acpi/acpica/nsrepair2.c +++ b/drivers/acpi/acpica/nsrepair2.c @@ -25,7 +25,7 @@ acpi_status (*acpi_repair_function) (struct acpi_evaluate_info * info, return_object_ptr); typedef struct acpi_repair_info { - char name[ACPI_NAMESEG_SIZE] __nonstring; + char name[ACPI_NAMESEG_SIZE] ACPI_NONSTRING; acpi_repair_function repair_function; } acpi_repair_info; From 4c487424273629820a1d06771bbb1dd4e420368d Mon Sep 17 00:00:00 2001 From: Ahmed Salem Date: Fri, 25 Apr 2025 21:31:05 +0200 Subject: [PATCH 2034/2924] ACPICA: Apply ACPI_NONSTRING in more places commit 70662db73d5455ebc8a1da29973fa70237b18cd2 upstream. ACPICA commit 1035a3d453f7dd49a235a59ee84ebda9d2d2f41b Add ACPI_NONSTRING for destination char arrays without a terminating NUL character. This is a follow-up to commit 35ad99236f3a ("ACPICA: Apply ACPI_NONSTRING") where not all instances received the same treatment, in preparation for replacing strncpy() calls with memcpy() Link: https://github.com/acpica/acpica/commit/1035a3d4 Signed-off-by: Ahmed Salem Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/3833065.MHq7AAxBmi@rjwysocki.net Cc: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpica/acdebug.h | 2 +- include/acpi/actbl.h | 6 +++--- tools/power/acpi/os_specific/service_layers/oslinuxtbl.c | 2 +- tools/power/acpi/tools/acpidump/apfiles.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/acpica/acdebug.h b/drivers/acpi/acpica/acdebug.h index 911875c5a5f190..58842130ca47bb 100644 --- a/drivers/acpi/acpica/acdebug.h +++ b/drivers/acpi/acpica/acdebug.h @@ -37,7 +37,7 @@ struct acpi_db_argument_info { struct acpi_db_execute_walk { u32 count; u32 max_count; - char name_seg[ACPI_NAMESEG_SIZE + 1]; + char name_seg[ACPI_NAMESEG_SIZE + 1] ACPI_NONSTRING; }; #define PARAM_LIST(pl) pl diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h index 2fc89704be1797..74cc61e3ab09b1 100644 --- a/include/acpi/actbl.h +++ b/include/acpi/actbl.h @@ -66,12 +66,12 @@ ******************************************************************************/ struct acpi_table_header { - char signature[ACPI_NAMESEG_SIZE] __nonstring; /* ASCII table signature */ + char signature[ACPI_NAMESEG_SIZE] ACPI_NONSTRING; /* ASCII table signature */ u32 length; /* Length of table in bytes, including this header */ u8 revision; /* ACPI Specification minor version number */ u8 checksum; /* To make sum of entire table == 0 */ - char oem_id[ACPI_OEM_ID_SIZE]; /* ASCII OEM identification */ - char oem_table_id[ACPI_OEM_TABLE_ID_SIZE]; /* ASCII OEM table identification */ + char oem_id[ACPI_OEM_ID_SIZE] ACPI_NONSTRING; /* ASCII OEM identification */ + char oem_table_id[ACPI_OEM_TABLE_ID_SIZE] ACPI_NONSTRING; /* ASCII OEM table identification */ u32 oem_revision; /* OEM revision number */ char asl_compiler_id[ACPI_NAMESEG_SIZE]; /* ASCII ASL compiler vendor ID */ u32 asl_compiler_revision; /* ASL compiler version */ diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c index 9d70d8c945af4e..987a5d32f3b600 100644 --- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c +++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c @@ -19,7 +19,7 @@ ACPI_MODULE_NAME("oslinuxtbl") typedef struct osl_table_info { struct osl_table_info *next; u32 instance; - char signature[ACPI_NAMESEG_SIZE]; + char signature[ACPI_NAMESEG_SIZE] ACPI_NONSTRING; } osl_table_info; diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c index 13817f9112c06a..9fc927fcc22a7f 100644 --- a/tools/power/acpi/tools/acpidump/apfiles.c +++ b/tools/power/acpi/tools/acpidump/apfiles.c @@ -103,7 +103,7 @@ int ap_open_output_file(char *pathname) int ap_write_to_binary_file(struct acpi_table_header *table, u32 instance) { - char filename[ACPI_NAMESEG_SIZE + 16]; + char filename[ACPI_NAMESEG_SIZE + 16] ACPI_NONSTRING; char instance_str[16]; ACPI_FILE file; acpi_size actual; From 921ae6204c04925c5286f2c654699641dd2bdcfe Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Wed, 14 May 2025 21:18:32 +0200 Subject: [PATCH 2035/2924] pinctrl: armada-37xx: use correct OUTPUT_VAL register for GPIOs > 31 commit 947c93eb29c2a581c0b0b6d5f21af3c2b7ff6d25 upstream. The controller has two consecutive OUTPUT_VAL registers and both holds output value for 32 GPIOs. Due to a missing adjustment, the current code always uses the first register while setting the output value whereas it should use the second one for GPIOs > 31. Add the missing armada_37xx_update_reg() call to adjust the register according to the 'offset' parameter of the function to fix the issue. Cc: stable@vger.kernel.org Fixes: 6702abb3bf23 ("pinctrl: armada-37xx: Fix direction_output() callback behavior") Signed-off-by: Imre Kaloz Reviewed-by: Andrew Lunn Signed-off-by: Gabor Juhos Link: https://lore.kernel.org/20250514-pinctrl-a37xx-fixes-v2-1-07e9ac1ab737@gmail.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/mvebu/pinctrl-armada-37xx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c index 335744ac831057..43034d29292687 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c @@ -417,6 +417,7 @@ static int armada_37xx_gpio_direction_output(struct gpio_chip *chip, unsigned int offset, int value) { struct armada_37xx_pinctrl *info = gpiochip_get_data(chip); + unsigned int val_offset = offset; unsigned int reg = OUTPUT_EN; unsigned int mask, val, ret; @@ -429,6 +430,8 @@ static int armada_37xx_gpio_direction_output(struct gpio_chip *chip, return ret; reg = OUTPUT_VAL; + armada_37xx_update_reg(®, &val_offset); + val = value ? mask : 0; regmap_update_bits(info->regmap, reg, mask, val); From f87e3cf4b21bf3dd42831bc30d73e15aaf9117ac Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Wed, 14 May 2025 21:18:33 +0200 Subject: [PATCH 2036/2924] pinctrl: armada-37xx: set GPIO output value before setting direction commit e6ebd4942981f8ad37189bbb36a3c8495e21ef4c upstream. Changing the direction before updating the output value in the OUTPUT_VAL register may result in a glitch on the output line if the previous value in the OUTPUT_VAL register is different from the one we want to set. In order to avoid that, update the output value before changing the direction. Cc: stable@vger.kernel.org Fixes: 6702abb3bf23 ("pinctrl: armada-37xx: Fix direction_output() callback behavior") Signed-off-by: Imre Kaloz Reviewed-by: Andrew Lunn Signed-off-by: Gabor Juhos Link: https://lore.kernel.org/20250514-pinctrl-a37xx-fixes-v2-2-07e9ac1ab737@gmail.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/mvebu/pinctrl-armada-37xx.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c index 43034d29292687..79f9c08e5039c3 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c @@ -417,23 +417,22 @@ static int armada_37xx_gpio_direction_output(struct gpio_chip *chip, unsigned int offset, int value) { struct armada_37xx_pinctrl *info = gpiochip_get_data(chip); - unsigned int val_offset = offset; - unsigned int reg = OUTPUT_EN; + unsigned int en_offset = offset; + unsigned int reg = OUTPUT_VAL; unsigned int mask, val, ret; armada_37xx_update_reg(®, &offset); mask = BIT(offset); + val = value ? mask : 0; - ret = regmap_update_bits(info->regmap, reg, mask, mask); - + ret = regmap_update_bits(info->regmap, reg, mask, val); if (ret) return ret; - reg = OUTPUT_VAL; - armada_37xx_update_reg(®, &val_offset); + reg = OUTPUT_EN; + armada_37xx_update_reg(®, &en_offset); - val = value ? mask : 0; - regmap_update_bits(info->regmap, reg, mask, val); + regmap_update_bits(info->regmap, reg, mask, mask); return 0; } From 97a322d1c93df607e391983dda50da3a441c7fff Mon Sep 17 00:00:00 2001 From: Pritam Manohar Sutar Date: Tue, 6 May 2025 13:31:54 +0530 Subject: [PATCH 2037/2924] clk: samsung: correct clock summary for hsi1 block commit 81214185e7e1fc6dfc8661a574c457accaf9a5a4 upstream. clk_summary shows wrong value for "mout_hsi1_usbdrd_user". It shows 400Mhz instead of 40Mhz as below. dout_shared2_div4 1 1 0 400000000 0 0 50000 Y ... mout_hsi1_usbdrd_user 0 0 0 400000000 0 0 50000 Y ... dout_clkcmu_hsi1_usbdrd 0 0 0 40000000 0 0 50000 Y ... Correct the clk_tree by adding correct clock parent for "mout_hsi1_usbdrd_user". Post this change, clk_summary shows correct value. dout_shared2_div4 1 1 0 400000000 0 0 50000 Y ... mout_clkcmu_hsi1_usbdrd 0 0 0 400000000 0 0 50000 Y ... dout_clkcmu_hsi1_usbdrd 0 0 0 40000000 0 0 50000 Y ... mout_hsi1_usbdrd_user 0 0 0 40000000 0 0 50000 Y ... Fixes: 485e13fe2fb6 ("clk: samsung: add top clock support for ExynosAuto v920 SoC") Cc: Signed-off-by: Pritam Manohar Sutar Reviewed-by: Alim Akhtar Link: https://lore.kernel.org/r/20250506080154.3995512-1-pritam.sutar@samsung.com Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- drivers/clk/samsung/clk-exynosautov920.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/samsung/clk-exynosautov920.c b/drivers/clk/samsung/clk-exynosautov920.c index dc8d4240f6defc..b0561faecfeb1b 100644 --- a/drivers/clk/samsung/clk-exynosautov920.c +++ b/drivers/clk/samsung/clk-exynosautov920.c @@ -1393,7 +1393,7 @@ static const unsigned long hsi1_clk_regs[] __initconst = { /* List of parent clocks for Muxes in CMU_HSI1 */ PNAME(mout_hsi1_mmc_card_user_p) = {"oscclk", "dout_clkcmu_hsi1_mmc_card"}; PNAME(mout_hsi1_noc_user_p) = { "oscclk", "dout_clkcmu_hsi1_noc" }; -PNAME(mout_hsi1_usbdrd_user_p) = { "oscclk", "mout_clkcmu_hsi1_usbdrd" }; +PNAME(mout_hsi1_usbdrd_user_p) = { "oscclk", "dout_clkcmu_hsi1_usbdrd" }; PNAME(mout_hsi1_usbdrd_p) = { "dout_tcxo_div2", "mout_hsi1_usbdrd_user" }; static const struct samsung_mux_clock hsi1_mux_clks[] __initconst = { From 7a9b2536aaf7a26f76af1cf4f1822e4af54c650f Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Thu, 29 May 2025 14:21:43 +0530 Subject: [PATCH 2038/2924] acpi-cpufreq: Fix nominal_freq units to KHz in get_max_boost_ratio() commit cb6a85f38f456b086c366e346ebb67ffa70c7243 upstream. commit 083466754596 ("cpufreq: ACPI: Fix max-frequency computation") modified get_max_boost_ratio() to return the nominal_freq advertised in the _CPC object. This was for the purposes of computing the maximum frequency. The frequencies advertised in _CPC objects are in MHz. However, cpufreq expects the frequency to be in KHz. Since the nominal_freq returned by get_max_boost_ratio() was not in KHz but instead in MHz,the cpuinfo_max_frequency that was computed using this nominal_freq was incorrect and an invalid value which resulted in cpufreq reporting the P0 frequency as the cpuinfo_max_freq. Fix this by converting the nominal_freq to KHz before returning the same from get_max_boost_ratio(). Reported-by: Manu Bretelle Closes: https://lore.kernel.org/lkml/aDaB63tDvbdcV0cg@HQ-GR2X1W2P57/ Fixes: 083466754596 ("cpufreq: ACPI: Fix max-frequency computation") Signed-off-by: Gautham R. Shenoy Cc: 6.14+ # 6.14+ Link: https://patch.msgid.link/20250529085143.709-1-gautham.shenoy@amd.com Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/acpi-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index d26b610e4f24f9..76768fe213a978 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -660,7 +660,7 @@ static u64 get_max_boost_ratio(unsigned int cpu, u64 *nominal_freq) nominal_perf = perf_caps.nominal_perf; if (nominal_freq) - *nominal_freq = perf_caps.nominal_freq; + *nominal_freq = perf_caps.nominal_freq * 1000; if (!highest_perf || !nominal_perf) { pr_debug("CPU%d: highest or nominal performance missing\n", cpu); From 7429b67b2447f43aa8bc4f9aacb04b323ad3de15 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Wed, 9 Apr 2025 11:47:38 +0300 Subject: [PATCH 2039/2924] Documentation: ACPI: Use all-string data node references commit 6db0261f3776bde01ae916ad8e1cb2ded3ba1a2b upstream. Document that references to data nodes shall use string-only references instead of a device reference and a succession of the first package entries of hierarchical data node references. Fixes: 9880702d123f ("ACPI: property: Support using strings in reference properties") Cc: 6.8+ # 6.8+ Signed-off-by: Sakari Ailus Link: https://patch.msgid.link/20250409084738.3657079-1-sakari.ailus@linux.intel.com [ rjw: Clarifying edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- .../acpi/dsd/data-node-references.rst | 26 +++++++++---------- .../firmware-guide/acpi/dsd/graph.rst | 11 +++----- .../firmware-guide/acpi/dsd/leds.rst | 7 +---- 3 files changed, 17 insertions(+), 27 deletions(-) diff --git a/Documentation/firmware-guide/acpi/dsd/data-node-references.rst b/Documentation/firmware-guide/acpi/dsd/data-node-references.rst index 8d8b53e96bcfee..ccb4b153e6f2dd 100644 --- a/Documentation/firmware-guide/acpi/dsd/data-node-references.rst +++ b/Documentation/firmware-guide/acpi/dsd/data-node-references.rst @@ -12,11 +12,14 @@ ACPI in general allows referring to device objects in the tree only. Hierarchical data extension nodes may not be referred to directly, hence this document defines a scheme to implement such references. -A reference consist of the device object name followed by one or more -hierarchical data extension [dsd-guide] keys. Specifically, the hierarchical -data extension node which is referred to by the key shall lie directly under -the parent object i.e. either the device object or another hierarchical data -extension node. +A reference to a _DSD hierarchical data node is a string consisting of a +device object reference followed by a dot (".") and a relative path to a data +node object. Do not use non-string references as this will produce a copy of +the hierarchical data node, not a reference! + +The hierarchical data extension node which is referred to shall be located +directly under its parent object i.e. either the device object or another +hierarchical data extension node [dsd-guide]. The keys in the hierarchical data nodes shall consist of the name of the node, "@" character and the number of the node in hexadecimal notation (without pre- @@ -33,11 +36,9 @@ extension key. Example ======= -In the ASL snippet below, the "reference" _DSD property contains a -device object reference to DEV0 and under that device object, a -hierarchical data extension key "node@1" referring to the NOD1 object -and lastly, a hierarchical data extension key "anothernode" referring to -the ANOD object which is also the final target node of the reference. +In the ASL snippet below, the "reference" _DSD property contains a string +reference to a hierarchical data extension node ANOD under DEV0 under the parent +of DEV1. ANOD is also the final target node of the reference. :: Device (DEV0) @@ -76,10 +77,7 @@ the ANOD object which is also the final target node of the reference. Name (_DSD, Package () { ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), Package () { - Package () { - "reference", Package () { - ^DEV0, "node@1", "anothernode" - } + Package () { "reference", "^DEV0.ANOD" } }, } }) diff --git a/Documentation/firmware-guide/acpi/dsd/graph.rst b/Documentation/firmware-guide/acpi/dsd/graph.rst index b9dbfc73ed25b6..d6ae5ffa748ca4 100644 --- a/Documentation/firmware-guide/acpi/dsd/graph.rst +++ b/Documentation/firmware-guide/acpi/dsd/graph.rst @@ -66,12 +66,9 @@ of that port shall be zero. Similarly, if a port may only have a single endpoint, the number of that endpoint shall be zero. The endpoint reference uses property extension with "remote-endpoint" property -name followed by a reference in the same package. Such references consist of -the remote device reference, the first package entry of the port data extension -reference under the device and finally the first package entry of the endpoint -data extension reference under the port. Individual references thus appear as:: +name followed by a string reference in the same package. [data-node-ref]:: - Package() { device, "port@X", "endpoint@Y" } + "device.datanode" In the above example, "X" is the number of the port and "Y" is the number of the endpoint. @@ -109,7 +106,7 @@ A simple example of this is show below:: ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), Package () { Package () { "reg", 0 }, - Package () { "remote-endpoint", Package() { \_SB.PCI0.ISP, "port@4", "endpoint@0" } }, + Package () { "remote-endpoint", "\\_SB.PCI0.ISP.EP40" }, } }) } @@ -141,7 +138,7 @@ A simple example of this is show below:: ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), Package () { Package () { "reg", 0 }, - Package () { "remote-endpoint", Package () { \_SB.PCI0.I2C2.CAM0, "port@0", "endpoint@0" } }, + Package () { "remote-endpoint", "\\_SB.PCI0.I2C2.CAM0.EP00" }, } }) } diff --git a/Documentation/firmware-guide/acpi/dsd/leds.rst b/Documentation/firmware-guide/acpi/dsd/leds.rst index 93db592c93c712..a97cd07d49be38 100644 --- a/Documentation/firmware-guide/acpi/dsd/leds.rst +++ b/Documentation/firmware-guide/acpi/dsd/leds.rst @@ -15,11 +15,6 @@ Referring to LEDs in Device tree is documented in [video-interfaces], in "flash-leds" property documentation. In short, LEDs are directly referred to by using phandles. -While Device tree allows referring to any node in the tree [devicetree], in -ACPI references are limited to device nodes only [acpi]. For this reason using -the same mechanism on ACPI is not possible. A mechanism to refer to non-device -ACPI nodes is documented in [data-node-ref]. - ACPI allows (as does DT) using integer arguments after the reference. A combination of the LED driver device reference and an integer argument, referring to the "reg" property of the relevant LED, is used to identify @@ -74,7 +69,7 @@ omitted. :: Package () { Package () { "flash-leds", - Package () { ^LED, "led@0", ^LED, "led@1" }, + Package () { "^LED.LED0", "^LED.LED1" }, } } }) From 9ebe21ede792cef851847648962c363cac67d17f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Tue, 20 May 2025 17:15:58 -0400 Subject: [PATCH 2040/2924] pinctrl: mediatek: eint: Fix invalid pointer dereference for v1 platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1c9977b263475373b31bbf86af94a5c9ae2be42c upstream. Commit 3ef9f710efcb ("pinctrl: mediatek: Add EINT support for multiple addresses") introduced an access to the 'soc' field of struct mtk_pinctrl in mtk_eint_do_init() and for that an include of pinctrl-mtk-common-v2.h. However, pinctrl drivers relying on the v1 common driver include pinctrl-mtk-common.h instead, which provides another definition of struct mtk_pinctrl that does not contain an 'soc' field. Since mtk_eint_do_init() can be called both by v1 and v2 drivers, it will now try to dereference an invalid pointer when called on v1 platforms. This has been observed on Genio 350 EVK (MT8365), which crashes very early in boot (the kernel trace can only be seen with earlycon). In order to fix this, since 'struct mtk_pinctrl' was only needed to get a 'struct mtk_eint_pin', make 'struct mtk_eint_pin' a parameter of mtk_eint_do_init() so that callers need to supply it, removing mtk_eint_do_init()'s dependency on any particular 'struct mtk_pinctrl'. Fixes: 3ef9f710efcb ("pinctrl: mediatek: Add EINT support for multiple addresses") Suggested-by: AngeloGioacchino Del Regno Signed-off-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/20250520-genio-350-eint-null-ptr-deref-fix-v2-1-6a3ca966a7ba@collabora.com Signed-off-by: Linus Walleij [ukleinek: backport to 6.15.y] Signed-off-by: Uwe Kleine-König Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/mediatek/mtk-eint.c | 26 ++++++++----------- drivers/pinctrl/mediatek/mtk-eint.h | 5 ++-- .../pinctrl/mediatek/pinctrl-mtk-common-v2.c | 2 +- drivers/pinctrl/mediatek/pinctrl-mtk-common.c | 2 +- 4 files changed, 16 insertions(+), 19 deletions(-) diff --git a/drivers/pinctrl/mediatek/mtk-eint.c b/drivers/pinctrl/mediatek/mtk-eint.c index b4eb2beab691cf..c516c34aaaf603 100644 --- a/drivers/pinctrl/mediatek/mtk-eint.c +++ b/drivers/pinctrl/mediatek/mtk-eint.c @@ -22,7 +22,6 @@ #include #include "mtk-eint.h" -#include "pinctrl-mtk-common-v2.h" #define MTK_EINT_EDGE_SENSITIVE 0 #define MTK_EINT_LEVEL_SENSITIVE 1 @@ -505,10 +504,9 @@ int mtk_eint_find_irq(struct mtk_eint *eint, unsigned long eint_n) } EXPORT_SYMBOL_GPL(mtk_eint_find_irq); -int mtk_eint_do_init(struct mtk_eint *eint) +int mtk_eint_do_init(struct mtk_eint *eint, struct mtk_eint_pin *eint_pin) { unsigned int size, i, port, inst = 0; - struct mtk_pinctrl *hw = (struct mtk_pinctrl *)eint->pctl; /* If clients don't assign a specific regs, let's use generic one */ if (!eint->regs) @@ -519,7 +517,15 @@ int mtk_eint_do_init(struct mtk_eint *eint) if (!eint->base_pin_num) return -ENOMEM; - if (eint->nbase == 1) { + if (eint_pin) { + eint->pins = eint_pin; + for (i = 0; i < eint->hw->ap_num; i++) { + inst = eint->pins[i].instance; + if (inst >= eint->nbase) + continue; + eint->base_pin_num[inst]++; + } + } else { size = eint->hw->ap_num * sizeof(struct mtk_eint_pin); eint->pins = devm_kmalloc(eint->dev, size, GFP_KERNEL); if (!eint->pins) @@ -533,16 +539,6 @@ int mtk_eint_do_init(struct mtk_eint *eint) } } - if (hw && hw->soc && hw->soc->eint_pin) { - eint->pins = hw->soc->eint_pin; - for (i = 0; i < eint->hw->ap_num; i++) { - inst = eint->pins[i].instance; - if (inst >= eint->nbase) - continue; - eint->base_pin_num[inst]++; - } - } - eint->pin_list = devm_kmalloc(eint->dev, eint->nbase * sizeof(u16 *), GFP_KERNEL); if (!eint->pin_list) goto err_pin_list; @@ -610,7 +606,7 @@ int mtk_eint_do_init(struct mtk_eint *eint) err_wake_mask: devm_kfree(eint->dev, eint->pin_list); err_pin_list: - if (eint->nbase == 1) + if (!eint_pin) devm_kfree(eint->dev, eint->pins); err_pins: devm_kfree(eint->dev, eint->base_pin_num); diff --git a/drivers/pinctrl/mediatek/mtk-eint.h b/drivers/pinctrl/mediatek/mtk-eint.h index f7f58cca0d5e31..23801d4b636f62 100644 --- a/drivers/pinctrl/mediatek/mtk-eint.h +++ b/drivers/pinctrl/mediatek/mtk-eint.h @@ -88,7 +88,7 @@ struct mtk_eint { }; #if IS_ENABLED(CONFIG_EINT_MTK) -int mtk_eint_do_init(struct mtk_eint *eint); +int mtk_eint_do_init(struct mtk_eint *eint, struct mtk_eint_pin *eint_pin); int mtk_eint_do_suspend(struct mtk_eint *eint); int mtk_eint_do_resume(struct mtk_eint *eint); int mtk_eint_set_debounce(struct mtk_eint *eint, unsigned long eint_n, @@ -96,7 +96,8 @@ int mtk_eint_set_debounce(struct mtk_eint *eint, unsigned long eint_n, int mtk_eint_find_irq(struct mtk_eint *eint, unsigned long eint_n); #else -static inline int mtk_eint_do_init(struct mtk_eint *eint) +static inline int mtk_eint_do_init(struct mtk_eint *eint, + struct mtk_eint_pin *eint_pin) { return -EOPNOTSUPP; } diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c index d1556b75d9effd..ba13558bfcd7bb 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c @@ -416,7 +416,7 @@ int mtk_build_eint(struct mtk_pinctrl *hw, struct platform_device *pdev) hw->eint->pctl = hw; hw->eint->gpio_xlate = &mtk_eint_xt; - ret = mtk_eint_do_init(hw->eint); + ret = mtk_eint_do_init(hw->eint, hw->soc->eint_pin); if (ret) goto err_free_eint; diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c index 8596f3541265e9..7289648eaa0259 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c @@ -1039,7 +1039,7 @@ static int mtk_eint_init(struct mtk_pinctrl *pctl, struct platform_device *pdev) pctl->eint->pctl = pctl; pctl->eint->gpio_xlate = &mtk_eint_xt; - return mtk_eint_do_init(pctl->eint); + return mtk_eint_do_init(pctl->eint, NULL); } /* This is used as a common probe function */ From aa9745977724d561245b11bb7999f6196ef076ea Mon Sep 17 00:00:00 2001 From: Alexandre Mergnat Date: Mon, 28 Apr 2025 12:06:47 +0200 Subject: [PATCH 2041/2924] rtc: Make rtc_time64_to_tm() support dates before 1970 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7df4cfef8b351fec3156160bedfc7d6d29de4cce upstream. Conversion of dates before 1970 is still relevant today because these dates are reused on some hardwares to store dates bigger than the maximal date that is representable in the device's native format. This prominently and very soon affects the hardware covered by the rtc-mt6397 driver that can only natively store dates in the interval 1900-01-01 up to 2027-12-31. So to store the date 2028-01-01 00:00:00 to such a device, rtc_time64_to_tm() must do the right thing for time=-2208988800. Signed-off-by: Alexandre Mergnat Reviewed-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20250428-enable-rtc-v4-1-2b2f7e3f9349@baylibre.com Signed-off-by: Alexandre Belloni Signed-off-by: Uwe Kleine-König Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/lib.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/rtc/lib.c b/drivers/rtc/lib.c index fe361652727a3f..13b5b1f2046510 100644 --- a/drivers/rtc/lib.c +++ b/drivers/rtc/lib.c @@ -46,24 +46,38 @@ EXPORT_SYMBOL(rtc_year_days); * rtc_time64_to_tm - converts time64_t to rtc_time. * * @time: The number of seconds since 01-01-1970 00:00:00. - * (Must be positive.) + * Works for values since at least 1900 * @tm: Pointer to the struct rtc_time. */ void rtc_time64_to_tm(time64_t time, struct rtc_time *tm) { - unsigned int secs; - int days; + int days, secs; u64 u64tmp; u32 u32tmp, udays, century, day_of_century, year_of_century, year, day_of_year, month, day; bool is_Jan_or_Feb, is_leap_year; - /* time must be positive */ + /* + * Get days and seconds while preserving the sign to + * handle negative time values (dates before 1970-01-01) + */ days = div_s64_rem(time, 86400, &secs); + /* + * We need 0 <= secs < 86400 which isn't given for negative + * values of time. Fixup accordingly. + */ + if (secs < 0) { + days -= 1; + secs += 86400; + } + /* day of the week, 1970-01-01 was a Thursday */ tm->tm_wday = (days + 4) % 7; + /* Ensure tm_wday is always positive */ + if (tm->tm_wday < 0) + tm->tm_wday += 7; /* * The following algorithm is, basically, Proposition 6.3 of Neri @@ -93,7 +107,7 @@ void rtc_time64_to_tm(time64_t time, struct rtc_time *tm) * thus, is slightly different from [1]. */ - udays = ((u32) days) + 719468; + udays = days + 719468; u32tmp = 4 * udays + 3; century = u32tmp / 146097; From cea549ca705b3c0c657f5ef62e7926ad7e8ca342 Mon Sep 17 00:00:00 2001 From: Alexandre Mergnat Date: Mon, 28 Apr 2025 12:06:48 +0200 Subject: [PATCH 2042/2924] rtc: Fix offset calculation for .start_secs < 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit fe9f5f96cfe8b82d0f24cbfa93718925560f4f8d upstream. The comparison rtc->start_secs > rtc->range_max has a signed left-hand side and an unsigned right-hand side. So the comparison might become true for negative start_secs which is interpreted as a (possibly very large) positive value. As a negative value can never be bigger than an unsigned value the correct representation of the (mathematical) comparison rtc->start_secs > rtc->range_max in C is: rtc->start_secs >= 0 && rtc->start_secs > rtc->range_max Use that to fix the offset calculation currently used in the rtc-mt6397 driver. Fixes: 989515647e783 ("rtc: Add one offset seconds to expand RTC range") Signed-off-by: Alexandre Mergnat Reviewed-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20250428-enable-rtc-v4-2-2b2f7e3f9349@baylibre.com Signed-off-by: Alexandre Belloni Signed-off-by: Uwe Kleine-König Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/class.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index b88cd4fb295bce..b1a2be1f9e3b93 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -326,7 +326,7 @@ static void rtc_device_get_offset(struct rtc_device *rtc) * * Otherwise the offset seconds should be 0. */ - if (rtc->start_secs > rtc->range_max || + if ((rtc->start_secs >= 0 && rtc->start_secs > rtc->range_max) || rtc->start_secs + range_secs - 1 < rtc->range_min) rtc->offset_secs = rtc->start_secs - rtc->range_min; else if (rtc->start_secs > rtc->range_min) From deaead8247f3de0ae5e7c5dcea55343d4320bdc0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 31 May 2025 18:32:37 -0400 Subject: [PATCH 2043/2924] bcachefs: Kill un-reverted directory i_size code commit 95fafc0f3407a6446082c11849df585bd3246571 upstream. Signed-off-by: Kent Overstreet Signed-off-by: Greg Kroah-Hartman --- fs/bcachefs/dirent.c | 12 ++---------- fs/bcachefs/dirent.h | 4 ++-- fs/bcachefs/namei.c | 4 ++-- 3 files changed, 6 insertions(+), 14 deletions(-) diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index a5119508822789..901230ca4a750e 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -395,8 +395,8 @@ int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir, } int bch2_dirent_rename(struct btree_trans *trans, - subvol_inum src_dir, struct bch_hash_info *src_hash, u64 *src_dir_i_size, - subvol_inum dst_dir, struct bch_hash_info *dst_hash, u64 *dst_dir_i_size, + subvol_inum src_dir, struct bch_hash_info *src_hash, + subvol_inum dst_dir, struct bch_hash_info *dst_hash, const struct qstr *src_name, subvol_inum *src_inum, u64 *src_offset, const struct qstr *dst_name, subvol_inum *dst_inum, u64 *dst_offset, enum bch_rename_mode mode) @@ -535,14 +535,6 @@ int bch2_dirent_rename(struct btree_trans *trans, new_src->v.d_type == DT_SUBVOL) new_src->v.d_parent_subvol = cpu_to_le32(src_dir.subvol); - if (old_dst.k) - *dst_dir_i_size -= bkey_bytes(old_dst.k); - *src_dir_i_size -= bkey_bytes(old_src.k); - - if (mode == BCH_RENAME_EXCHANGE) - *src_dir_i_size += bkey_bytes(&new_src->k); - *dst_dir_i_size += bkey_bytes(&new_dst->k); - ret = bch2_trans_update(trans, &dst_iter, &new_dst->k_i, 0); if (ret) goto out; diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index d3e7ae669575a6..999b895fa28a86 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -80,8 +80,8 @@ enum bch_rename_mode { }; int bch2_dirent_rename(struct btree_trans *, - subvol_inum, struct bch_hash_info *, u64 *, - subvol_inum, struct bch_hash_info *, u64 *, + subvol_inum, struct bch_hash_info *, + subvol_inum, struct bch_hash_info *, const struct qstr *, subvol_inum *, u64 *, const struct qstr *, subvol_inum *, u64 *, enum bch_rename_mode); diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c index 9136a90977893c..413fb60cff434b 100644 --- a/fs/bcachefs/namei.c +++ b/fs/bcachefs/namei.c @@ -418,8 +418,8 @@ int bch2_rename_trans(struct btree_trans *trans, } ret = bch2_dirent_rename(trans, - src_dir, &src_hash, &src_dir_u->bi_size, - dst_dir, &dst_hash, &dst_dir_u->bi_size, + src_dir, &src_hash, + dst_dir, &dst_hash, src_name, &src_inum, &src_offset, dst_name, &dst_inum, &dst_offset, mode); From 66c36452163871360f15dc4042dd00612e42cd66 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 31 May 2025 11:58:11 -0400 Subject: [PATCH 2044/2924] bcachefs: Repair code for directory i_size commit 36a2fdf7c5c1ccae6ca16cd14067567096cebe17 upstream. We had a bug due due to an incomplete revert of the patch implementing directory i_size (summing up the size of the dirents), leading to completely screwy i_size values that underflow. Most userspace programs don't seem to care (e.g. du ignores it), but it turns out this broke sshfs, so needs to be repaired. Signed-off-by: Kent Overstreet Signed-off-by: Greg Kroah-Hartman --- fs/bcachefs/fsck.c | 8 ++++++++ fs/bcachefs/sb-errors_format.h | 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index aaf18708527644..bf117f2225d8a0 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1183,6 +1183,14 @@ static int check_inode(struct btree_trans *trans, ret = 0; } + if (fsck_err_on(S_ISDIR(u.bi_mode) && u.bi_size, + trans, inode_dir_has_nonzero_i_size, + "directory %llu:%u with nonzero i_size %lli", + u.bi_inum, u.bi_snapshot, u.bi_size)) { + u.bi_size = 0; + do_update = true; + } + ret = bch2_inode_has_child_snapshots(trans, k.k->p); if (ret < 0) goto err; diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 4036a20c6adc26..2274ae59bc2a70 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -232,6 +232,7 @@ enum bch_fsck_flags { x(inode_dir_multiple_links, 206, FSCK_AUTOFIX) \ x(inode_dir_missing_backpointer, 284, FSCK_AUTOFIX) \ x(inode_dir_unlinked_but_not_empty, 286, FSCK_AUTOFIX) \ + x(inode_dir_has_nonzero_i_size, 319, FSCK_AUTOFIX) \ x(inode_multiple_links_but_nlink_0, 207, FSCK_AUTOFIX) \ x(inode_wrong_backpointer, 208, FSCK_AUTOFIX) \ x(inode_wrong_nlink, 209, FSCK_AUTOFIX) \ @@ -328,7 +329,7 @@ enum bch_fsck_flags { x(dirent_stray_data_after_cf_name, 305, 0) \ x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \ x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \ - x(MAX, 319, 0) + x(MAX, 320, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, From fd1b2ef7216a29ad6a27ed43c7298ed8e4a9a0e7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 2 Jun 2025 18:26:44 -0400 Subject: [PATCH 2045/2924] bcachefs: delete dead code from may_delete_deleted_inode() commit bb6689bbeebc6fb51f0f120b486bdcc9a38ffcf6 upstream. Signed-off-by: Kent Overstreet Signed-off-by: Greg Kroah-Hartman --- fs/bcachefs/inode.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 490b85841de960..07e733c1ed78a2 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -1342,10 +1342,7 @@ int bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot) delete_ancestor_snapshot_inodes(trans, SPOS(0, inum, snapshot)); } -static int may_delete_deleted_inode(struct btree_trans *trans, - struct btree_iter *iter, - struct bpos pos, - bool *need_another_pass) +static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos) { struct bch_fs *c = trans->c; struct btree_iter inode_iter; @@ -1434,9 +1431,8 @@ static int may_delete_deleted_inode(struct btree_trans *trans, int bch2_delete_dead_inodes(struct bch_fs *c) { struct btree_trans *trans = bch2_trans_get(c); - bool need_another_pass; int ret; -again: + /* * if we ran check_inodes() unlinked inodes will have already been * cleaned up but the write buffer will be out of sync; therefore we @@ -1446,8 +1442,6 @@ int bch2_delete_dead_inodes(struct bch_fs *c) if (ret) goto err; - need_another_pass = false; - /* * Weird transaction restart handling here because on successful delete, * bch2_inode_rm_snapshot() will return a nested transaction restart, @@ -1457,7 +1451,7 @@ int bch2_delete_dead_inodes(struct bch_fs *c) ret = for_each_btree_key_commit(trans, iter, BTREE_ID_deleted_inodes, POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ - ret = may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass); + ret = may_delete_deleted_inode(trans, k.k->p); if (ret > 0) { bch_verbose_ratelimited(c, "deleting unlinked inode %llu:%u", k.k->p.offset, k.k->p.snapshot); @@ -1478,9 +1472,6 @@ int bch2_delete_dead_inodes(struct bch_fs *c) ret; })); - - if (!ret && need_another_pass) - goto again; err: bch2_trans_put(trans); return ret; From 00d26f3a47e44bd7662de33b89dd84a3837e2959 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 2 Jun 2025 17:43:36 -0400 Subject: [PATCH 2046/2924] bcachefs: Run may_delete_deleted_inode() checks in bch2_inode_rm() commit 09fb85ae565645b982e9030dbb2ff6707f2cdddc upstream. We had a bug where bch2_evict_inode() incorrectly called bch2_inode_rm() - the journal clearly showed the inode was not unlinked. We've got checks that we use in recovery when cleaning up deleted inodes, lift them to bch2_inode_rm() as well. Signed-off-by: Kent Overstreet Signed-off-by: Greg Kroah-Hartman --- fs/bcachefs/errcode.h | 2 ++ fs/bcachefs/fs.c | 8 ++++- fs/bcachefs/inode.c | 66 +++++++++++++++++++++++++--------- fs/bcachefs/sb-errors_format.h | 3 +- 4 files changed, 61 insertions(+), 18 deletions(-) diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index d9ebffa5b3a282..346766299cb34a 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -209,6 +209,8 @@ x(EINVAL, remove_would_lose_data) \ x(EINVAL, no_resize_with_buckets_nouse) \ x(EINVAL, inode_unpack_error) \ + x(EINVAL, inode_not_unlinked) \ + x(EINVAL, inode_has_child_snapshot) \ x(EINVAL, varint_decode_error) \ x(EINVAL, erasure_coding_found_btree_node) \ x(EOPNOTSUPP, may_not_use_incompat_feature) \ diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 47f1a64c5c8d83..8a47ce3467e8d2 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -2181,7 +2181,13 @@ static void bch2_evict_inode(struct inode *vinode) KEY_TYPE_QUOTA_WARN); bch2_quota_acct(c, inode->ei_qid, Q_INO, -1, KEY_TYPE_QUOTA_WARN); - bch2_inode_rm(c, inode_inum(inode)); + int ret = bch2_inode_rm(c, inode_inum(inode)); + if (ret && !bch2_err_matches(ret, EROFS)) { + bch_err_msg(c, ret, "VFS incorrectly tried to delete inode %llu:%llu", + inode->ei_inum.subvol, + inode->ei_inum.inum); + bch2_sb_error_count(c, BCH_FSCK_ERR_vfs_bad_inode_rm); + } /* * If we are deleting, we need it present in the vfs hash table diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 07e733c1ed78a2..845efd429d13a8 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -38,6 +38,7 @@ static const char * const bch2_inode_flag_strs[] = { #undef x static int delete_ancestor_snapshot_inodes(struct btree_trans *, struct bpos); +static int may_delete_deleted_inum(struct btree_trans *, subvol_inum); static const u8 byte_table[8] = { 1, 2, 3, 4, 6, 8, 10, 13 }; @@ -1048,19 +1049,23 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) u32 snapshot; int ret; + ret = lockrestart_do(trans, may_delete_deleted_inum(trans, inum)); + if (ret) + goto err2; + /* * If this was a directory, there shouldn't be any real dirents left - * but there could be whiteouts (from hash collisions) that we should * delete: * - * XXX: the dirent could ideally would delete whiteouts when they're no + * XXX: the dirent code ideally would delete whiteouts when they're no * longer needed */ ret = bch2_inode_delete_keys(trans, inum, BTREE_ID_extents) ?: bch2_inode_delete_keys(trans, inum, BTREE_ID_xattrs) ?: bch2_inode_delete_keys(trans, inum, BTREE_ID_dirents); if (ret) - goto err; + goto err2; retry: bch2_trans_begin(trans); @@ -1342,7 +1347,8 @@ int bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot) delete_ancestor_snapshot_inodes(trans, SPOS(0, inum, snapshot)); } -static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos) +static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos, + bool from_deleted_inodes) { struct bch_fs *c = trans->c; struct btree_iter inode_iter; @@ -1357,11 +1363,13 @@ static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos) return ret; ret = bkey_is_inode(k.k) ? 0 : -BCH_ERR_ENOENT_inode; - if (fsck_err_on(!bkey_is_inode(k.k), + if (fsck_err_on(from_deleted_inodes && ret, trans, deleted_inode_missing, "nonexistent inode %llu:%u in deleted_inodes btree", pos.offset, pos.snapshot)) goto delete; + if (ret) + goto out; ret = bch2_inode_unpack(k, &inode); if (ret) @@ -1369,7 +1377,8 @@ static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos) if (S_ISDIR(inode.bi_mode)) { ret = bch2_empty_dir_snapshot(trans, pos.offset, 0, pos.snapshot); - if (fsck_err_on(bch2_err_matches(ret, ENOTEMPTY), + if (fsck_err_on(from_deleted_inodes && + bch2_err_matches(ret, ENOTEMPTY), trans, deleted_inode_is_dir, "non empty directory %llu:%u in deleted_inodes btree", pos.offset, pos.snapshot)) @@ -1378,17 +1387,25 @@ static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos) goto out; } - if (fsck_err_on(!(inode.bi_flags & BCH_INODE_unlinked), + ret = inode.bi_flags & BCH_INODE_unlinked ? 0 : -BCH_ERR_inode_not_unlinked; + if (fsck_err_on(from_deleted_inodes && ret, trans, deleted_inode_not_unlinked, "non-deleted inode %llu:%u in deleted_inodes btree", pos.offset, pos.snapshot)) goto delete; + if (ret) + goto out; + + ret = !(inode.bi_flags & BCH_INODE_has_child_snapshot) + ? 0 : -BCH_ERR_inode_has_child_snapshot; - if (fsck_err_on(inode.bi_flags & BCH_INODE_has_child_snapshot, + if (fsck_err_on(from_deleted_inodes && ret, trans, deleted_inode_has_child_snapshots, "inode with child snapshots %llu:%u in deleted_inodes btree", pos.offset, pos.snapshot)) goto delete; + if (ret) + goto out; ret = bch2_inode_has_child_snapshots(trans, k.k->p); if (ret < 0) @@ -1405,19 +1422,28 @@ static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos) if (ret) goto out; } + + if (!from_deleted_inodes) { + ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: + -BCH_ERR_inode_has_child_snapshot; + goto out; + } + goto delete; } - if (test_bit(BCH_FS_clean_recovery, &c->flags) && - !fsck_err(trans, deleted_inode_but_clean, - "filesystem marked as clean but have deleted inode %llu:%u", - pos.offset, pos.snapshot)) { - ret = 0; - goto out; - } + if (from_deleted_inodes) { + if (test_bit(BCH_FS_clean_recovery, &c->flags) && + !fsck_err(trans, deleted_inode_but_clean, + "filesystem marked as clean but have deleted inode %llu:%u", + pos.offset, pos.snapshot)) { + ret = 0; + goto out; + } - ret = 1; + ret = 1; + } out: fsck_err: bch2_trans_iter_exit(trans, &inode_iter); @@ -1428,6 +1454,14 @@ static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos) goto out; } +static int may_delete_deleted_inum(struct btree_trans *trans, subvol_inum inum) +{ + u32 snapshot; + + return bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot) ?: + may_delete_deleted_inode(trans, SPOS(0, inum.inum, snapshot), false); +} + int bch2_delete_dead_inodes(struct bch_fs *c) { struct btree_trans *trans = bch2_trans_get(c); @@ -1451,7 +1485,7 @@ int bch2_delete_dead_inodes(struct bch_fs *c) ret = for_each_btree_key_commit(trans, iter, BTREE_ID_deleted_inodes, POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ - ret = may_delete_deleted_inode(trans, k.k->p); + ret = may_delete_deleted_inode(trans, k.k->p, true); if (ret > 0) { bch_verbose_ratelimited(c, "deleting unlinked inode %llu:%u", k.k->p.offset, k.k->p.snapshot); diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 2274ae59bc2a70..9387f6092fe989 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -244,6 +244,7 @@ enum bch_fsck_flags { x(inode_parent_has_case_insensitive_not_set, 317, FSCK_AUTOFIX) \ x(vfs_inode_i_blocks_underflow, 311, FSCK_AUTOFIX) \ x(vfs_inode_i_blocks_not_zero_at_truncate, 313, FSCK_AUTOFIX) \ + x(vfs_bad_inode_rm, 320, 0) \ x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \ x(deleted_inode_missing, 212, FSCK_AUTOFIX) \ x(deleted_inode_is_dir, 213, FSCK_AUTOFIX) \ @@ -329,7 +330,7 @@ enum bch_fsck_flags { x(dirent_stray_data_after_cf_name, 305, 0) \ x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \ x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \ - x(MAX, 320, 0) + x(MAX, 321, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, From 379dd295019c9e11d19f660a857170ee1bd74f63 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 2 Jun 2025 19:48:27 -0400 Subject: [PATCH 2047/2924] bcachefs: Fix subvol to missing root repair commit 29cc6fb7c068c773049d3bde14b939033893eff4 upstream. We had a bug where the root inode of a subvolume was erronously deleted: bch2_evict_inode() called bch2_inode_rm(), meaning the VFS inode's i_nlink was somehow set to 0 when it shouldn't have - the inode in the btree indicated it clearly was not unlinked. This has been addressed with additional safety checks in bch2_inode_rm() - pulling in the safety checks we already were doing when deleting unlinked inodes in recovery - but the really disastrous bug was in check_subvols(), which on finding a dangling subvol (subvol with a missing root inode) would delete the subvolume. I assume this bug dates from early check_directory_structure() code, which originally handled subvolumes and normal paths - the idea being that still live contents of the subvolume would get reattached somewhere. But that's incorrect, and disastrously so; deleting a subvolume triggers deleting the snapshot ID it points to, deleting the entire contents. The correct way to repair is to recreate the root inode if it's missing; then any contents will get reattached under that subvolume's lost+found. Signed-off-by: Kent Overstreet Signed-off-by: Greg Kroah-Hartman --- fs/bcachefs/subvolume.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index d0209f7658bb87..bc6009a7128443 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -6,6 +6,7 @@ #include "errcode.h" #include "error.h" #include "fs.h" +#include "inode.h" #include "recovery_passes.h" #include "snapshot.h" #include "subvolume.h" @@ -113,10 +114,20 @@ static int check_subvol(struct btree_trans *trans, "subvolume %llu points to missing subvolume root %llu:%u", k.k->p.offset, le64_to_cpu(subvol.v->inode), le32_to_cpu(subvol.v->snapshot))) { - ret = bch2_subvolume_delete(trans, iter->pos.offset); - bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset); - ret = ret ?: -BCH_ERR_transaction_restart_nested; - goto err; + /* + * Recreate - any contents that are still disconnected + * will then get reattached under lost+found + */ + bch2_inode_init_early(c, &inode); + bch2_inode_init_late(&inode, bch2_current_time(c), + 0, 0, S_IFDIR|0700, 0, NULL); + inode.bi_inum = le64_to_cpu(subvol.v->inode); + inode.bi_snapshot = le32_to_cpu(subvol.v->snapshot); + inode.bi_subvol = k.k->p.offset; + inode.bi_parent_subvol = le32_to_cpu(subvol.v->fs_path_parent); + ret = __bch2_fsck_write_inode(trans, &inode); + if (ret) + goto err; } } else { goto err; From fa4b47965c331eb6938d70285094156f3b198b2e Mon Sep 17 00:00:00 2001 From: Jiayi Li Date: Thu, 8 May 2025 13:59:47 +0800 Subject: [PATCH 2048/2924] usb: quirks: Add NO_LPM quirk for SanDisk Extreme 55AE commit 19f795591947596b5b9efa86fd4b9058e45786e9 upstream. This device exhibits I/O errors during file transfers due to unstable link power management (LPM) behavior. The kernel logs show repeated warm resets and eventual disconnection when LPM is enabled: [ 3467.810740] hub 2-0:1.0: state 7 ports 6 chg 0000 evt 0020 [ 3467.810740] usb usb2-port5: do warm reset [ 3467.866444] usb usb2-port5: not warm reset yet, waiting 50ms [ 3467.907407] sd 0:0:0:0: [sda] tag#12 sense submit err -19 [ 3467.994423] usb usb2-port5: status 02c0, change 0001, 10.0 Gb/s [ 3467.994453] usb 2-5: USB disconnect, device number 4 The error -19 (ENODEV) occurs when the device disappears during write operations. Adding USB_QUIRK_NO_LPM disables link power management for this specific device, resolving the stability issues. Signed-off-by: Jiayi Li Cc: stable Link: https://lore.kernel.org/r/20250508055947.764538-1-lijiayi@kylinos.cn Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 36d3df7d040c63..53d68d20fb62e0 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -372,6 +372,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* SanDisk Corp. SanDisk 3.2Gen1 */ { USB_DEVICE(0x0781, 0x55a3), .driver_info = USB_QUIRK_DELAY_INIT }, + /* SanDisk Extreme 55AE */ + { USB_DEVICE(0x0781, 0x55ae), .driver_info = USB_QUIRK_NO_LPM }, + /* Realforce 87U Keyboard */ { USB_DEVICE(0x0853, 0x011b), .driver_info = USB_QUIRK_NO_LPM }, From 144945bb0d6f44652ae47b749f7108e5ee71328a Mon Sep 17 00:00:00 2001 From: Hongyu Xie Date: Mon, 19 May 2025 10:33:28 +0800 Subject: [PATCH 2049/2924] usb: storage: Ignore UAS driver for SanDisk 3.2 Gen2 storage device commit a541acceedf4f639f928f41fbb676b75946dc295 upstream. SanDisk 3.2 Gen2 storage device(0781:55e8) doesn't work well with UAS. Log says, [ 6.507865][ 3] [ T159] usb 2-1.4: new SuperSpeed Gen 1 USB device number 4 using xhci_hcd [ 6.540314][ 3] [ T159] usb 2-1.4: New USB device found, idVendor=0781, idProduct=55e8, bcdDevice= 0.01 [ 6.576304][ 3] [ T159] usb 2-1.4: New USB device strings: Mfr=1, Product=2, SerialNumber=3 [ 6.584727][ 3] [ T159] usb 2-1.4: Product: SanDisk 3.2 Gen2 [ 6.590459][ 3] [ T159] usb 2-1.4: Manufacturer: SanDisk [ 6.595845][ 3] [ T159] usb 2-1.4: SerialNumber: 03021707022525140940 [ 7.230852][ 0] [ T265] usbcore: registered new interface driver usb-storage [ 7.251247][ 0] [ T265] scsi host3: uas [ 7.255280][ 0] [ T265] usbcore: registered new interface driver uas [ 7.270498][ 1] [ T192] scsi 3:0:0:0: Direct-Access SanDisk Extreme Pro DDE1 0110 PQ: 0 ANSI: 6 [ 7.299588][ 3] [ T192] scsi 3:0:0:1: Enclosure SanDisk SES Device 0110 PQ: 0 ANSI: 6 [ 7.321681][ 3] [ T192] sd 3:0:0:0: Attached scsi generic sg1 type 0 [ 7.328185][ 3] [ T192] scsi 3:0:0:1: Attached scsi generic sg2 type 13 [ 7.328804][ 0] [ T191] sd 3:0:0:0: [sda] 976773168 512-byte logical blocks: (500 GB/466 GiB) [ 7.343486][ 0] [ T191] sd 3:0:0:0: [sda] 4096-byte physical blocks [ 7.364611][ 0] [ T191] sd 3:0:0:0: [sda] Write Protect is off [ 7.370524][ 0] [ T191] sd 3:0:0:0: [sda] Mode Sense: 3d 00 10 00 [ 7.390655][ 0] [ T191] sd 3:0:0:0: [sda] Write cache: enabled, read cache: enabled, supports DPO and FUA [ 7.401363][ 0] [ T191] sd 3:0:0:0: [sda] Optimal transfer size 1048576 bytes [ 7.436010][ 0] [ T191] sda: sda1 [ 7.450850][ 0] [ T191] sd 3:0:0:0: [sda] Attached SCSI disk [ 7.470218][ 4] [ T262] scsi 3:0:0:1: Failed to get diagnostic page 0x1 [ 7.474869][ 0] [ C0] sd 3:0:0:0: [sda] tag#0 data cmplt err -75 uas-tag 2 inflight: CMD [ 7.476911][ 4] [ T262] scsi 3:0:0:1: Failed to bind enclosure -19 [ 7.485330][ 0] [ C0] sd 3:0:0:0: [sda] tag#0 CDB: Read(10) 28 00 00 00 00 28 00 00 10 00 [ 7.491593][ 4] [ T262] ses 3:0:0:1: Attached Enclosure device [ 38.066980][ 4] [ T192] sd 3:0:0:0: [sda] tag#4 uas_eh_abort_handler 0 uas-tag 5 inflight: CMD IN [ 38.076012][ 4] [ T192] sd 3:0:0:0: [sda] tag#4 CDB: Read(10) 28 00 00 00 01 08 00 00 f8 00 [ 38.086485][ 4] [ T192] sd 3:0:0:0: [sda] tag#3 uas_eh_abort_handler 0 uas-tag 1 inflight: CMD IN [ 38.095515][ 4] [ T192] sd 3:0:0:0: [sda] tag#3 CDB: Read(10) 28 00 00 00 00 10 00 00 08 00 [ 38.104122][ 4] [ T192] sd 3:0:0:0: [sda] tag#2 uas_eh_abort_handler 0 uas-tag 4 inflight: CMD IN [ 38.113152][ 4] [ T192] sd 3:0:0:0: [sda] tag#2 CDB: Read(10) 28 00 00 00 00 88 00 00 78 00 [ 38.121761][ 4] [ T192] sd 3:0:0:0: [sda] tag#1 uas_eh_abort_handler 0 uas-tag 3 inflight: CMD IN [ 38.130791][ 4] [ T192] sd 3:0:0:0: [sda] tag#1 CDB: Read(10) 28 00 00 00 00 48 00 00 30 00 [ 38.139401][ 4] [ T192] sd 3:0:0:0: [sda] tag#0 uas_eh_abort_handler 0 uas-tag 2 inflight: CMD [ 38.148170][ 4] [ T192] sd 3:0:0:0: [sda] tag#0 CDB: Read(10) 28 00 00 00 00 28 00 00 10 00 [ 38.178980][ 2] [ T304] scsi host3: uas_eh_device_reset_handler start [ 38.901540][ 2] [ T304] usb 2-1.4: reset SuperSpeed Gen 1 USB device number 4 using xhci_hcd [ 38.936791][ 2] [ T304] scsi host3: uas_eh_device_reset_handler success Device decriptor is below, Bus 002 Device 006: ID 0781:55e8 SanDisk Corp. SanDisk 3.2 Gen2 Device Descriptor: bLength 18 bDescriptorType 1 bcdUSB 3.20 bDeviceClass 0 bDeviceSubClass 0 bDeviceProtocol 0 bMaxPacketSize0 9 idVendor 0x0781 SanDisk Corp. idProduct 0x55e8 bcdDevice 0.01 iManufacturer 1 SanDisk iProduct 2 SanDisk 3.2 Gen2 iSerial 3 03021707022525140940 bNumConfigurations 1 Configuration Descriptor: bLength 9 bDescriptorType 2 wTotalLength 0x0079 bNumInterfaces 1 bConfigurationValue 1 iConfiguration 0 bmAttributes 0x80 (Bus Powered) MaxPower 896mA Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 0 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 8 Mass Storage bInterfaceSubClass 6 SCSI bInterfaceProtocol 80 Bulk-Only iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x82 EP 2 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0400 1x 1024 bytes bInterval 0 bMaxBurst 15 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x01 EP 1 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0400 1x 1024 bytes bInterval 0 bMaxBurst 15 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 0 bAlternateSetting 1 bNumEndpoints 4 bInterfaceClass 8 Mass Storage bInterfaceSubClass 6 SCSI bInterfaceProtocol 98 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x01 EP 1 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0400 1x 1024 bytes bInterval 0 bMaxBurst 0 Command pipe (0x01) Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x84 EP 4 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0400 1x 1024 bytes bInterval 0 bMaxBurst 15 MaxStreams 32 Status pipe (0x02) Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x82 EP 2 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0400 1x 1024 bytes bInterval 0 bMaxBurst 15 MaxStreams 32 Data-in pipe (0x03) Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x03 EP 3 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0400 1x 1024 bytes bInterval 0 bMaxBurst 15 MaxStreams 32 Data-out pipe (0x04) Binary Object Store Descriptor: bLength 5 bDescriptorType 15 wTotalLength 0x002a bNumDeviceCaps 3 USB 2.0 Extension Device Capability: bLength 7 bDescriptorType 16 bDevCapabilityType 2 bmAttributes 0x0000f41e BESL Link Power Management (LPM) Supported BESL value 1024 us Deep BESL value 61440 us SuperSpeed USB Device Capability: bLength 10 bDescriptorType 16 bDevCapabilityType 3 bmAttributes 0x00 wSpeedsSupported 0x000e Device can operate at Full Speed (12Mbps) Device can operate at High Speed (480Mbps) Device can operate at SuperSpeed (5Gbps) bFunctionalitySupport 1 Lowest fully-functional device speed is Full Speed (12Mbps) bU1DevExitLat 10 micro seconds bU2DevExitLat 2047 micro seconds SuperSpeedPlus USB Device Capability: bLength 20 bDescriptorType 16 bDevCapabilityType 10 bmAttributes 0x00000001 Sublink Speed Attribute count 1 Sublink Speed ID count 0 wFunctionalitySupport 0x1100 bmSublinkSpeedAttr[0] 0x000a4030 Speed Attribute ID: 0 10Gb/s Symmetric RX SuperSpeedPlus bmSublinkSpeedAttr[1] 0x000a40b0 Speed Attribute ID: 0 10Gb/s Symmetric TX SuperSpeedPlus Device Status: 0x0000 (Bus Powered) So ignore UAS driver for this device. Signed-off-by: Hongyu Xie Cc: stable Link: https://lore.kernel.org/r/20250519023328.1498856-1-xiehongyu1@kylinos.cn Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_uas.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index d460d71b425783..1477e31d776327 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -52,6 +52,13 @@ UNUSUAL_DEV(0x059f, 0x1061, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_REPORT_OPCODES | US_FL_NO_SAME), +/* Reported-by: Zhihong Zhou */ +UNUSUAL_DEV(0x0781, 0x55e8, 0x0000, 0x9999, + "SanDisk", + "", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_IGNORE_UAS), + /* Reported-by: Hongling Zeng */ UNUSUAL_DEV(0x090c, 0x2000, 0x0000, 0x9999, "Hiksemi", From f43c67847f70d2eef66c6fb7fcfb9e2f897ce637 Mon Sep 17 00:00:00 2001 From: Charles Yeh Date: Wed, 21 May 2025 21:23:54 +0800 Subject: [PATCH 2050/2924] USB: serial: pl2303: add new chip PL2303GC-Q20 and PL2303GT-2AB commit d3a889482bd5abf2bbdc1ec3d2d49575aa160c9c upstream. Add new bcd (0x905) to support PL2303GT-2AB (TYPE_HXN). Add new bcd (0x1005) to support PL2303GC-Q20 (TYPE_HXN). Signed-off-by: Charles Yeh Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/pl2303.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 010688dd9e49ce..22579d0d8ab8aa 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -458,6 +458,8 @@ static int pl2303_detect_type(struct usb_serial *serial) case 0x605: case 0x700: /* GR */ case 0x705: + case 0x905: /* GT-2AB */ + case 0x1005: /* GC-Q20 */ return TYPE_HXN; } break; From f35a1e0b38d2724f338df2d3660927d8c4cd7a4e Mon Sep 17 00:00:00 2001 From: Qasim Ijaz Date: Tue, 22 Apr 2025 14:47:17 +0100 Subject: [PATCH 2051/2924] usb: typec: ucsi: fix Clang -Wsign-conversion warning commit f4239ace2dd8606f6824757f192965a95746da05 upstream. debugfs.c emits the following warnings when compiling with the -Wsign-conversion flag with clang 15: drivers/usb/typec/ucsi/debugfs.c:58:27: warning: implicit conversion changes signedness: 'int' to 'u32' (aka 'unsigned int') [-Wsign-conversion] ucsi->debugfs->status = ret; ~ ^~~ drivers/usb/typec/ucsi/debugfs.c:71:25: warning: implicit conversion changes signedness: 'u32' (aka 'unsigned int') to 'int' [-Wsign-conversion] return ucsi->debugfs->status; ~~~~~~ ~~~~~~~~~~~~~~~^~~~~~ During ucsi_cmd() we see: if (ret < 0) { ucsi->debugfs->status = ret; return ret; } But "status" is u32 meaning unsigned wrap-around occurs when assigning a value which is < 0 to it, this obscures the real status. To fix this make the "status" of type int since ret is also of type int. Fixes: df0383ffad64 ("usb: typec: ucsi: Add debugfs for ucsi commands") Cc: stable@vger.kernel.org Signed-off-by: Qasim Ijaz Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20250422134717.66218-1-qasdev00@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h index 9c5278a0c5d409..70910232a05d74 100644 --- a/drivers/usb/typec/ucsi/ucsi.h +++ b/drivers/usb/typec/ucsi/ucsi.h @@ -434,7 +434,7 @@ struct ucsi_debugfs_entry { u64 low; u64 high; } response; - u32 status; + int status; struct dentry *dentry; }; From 8b91e4b6b82b57499c939fa249cfae4c3070c0b9 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 27 May 2025 09:47:37 +0200 Subject: [PATCH 2052/2924] Bluetooth: hci_qca: move the SoC type check to the right place commit 0fb410c914eb03c7e9d821e26d03bac0a239e5db upstream. Commit 3d05fc82237a ("Bluetooth: qca: set power_ctrl_enabled on NULL returned by gpiod_get_optional()") accidentally changed the prevous behavior where power control would be disabled without the BT_EN GPIO only on QCA_WCN6750 and QCA_WCN6855 while also getting the error check wrong. We should treat every IS_ERR() return value from devm_gpiod_get_optional() as a reason to bail-out while we should only set power_ctrl_enabled to false on the two models mentioned above. While at it: use dev_err_probe() to save a LOC. Cc: stable@vger.kernel.org Fixes: 3d05fc82237a ("Bluetooth: qca: set power_ctrl_enabled on NULL returned by gpiod_get_optional()") Signed-off-by: Bartosz Golaszewski Reviewed-by: Krzysztof Kozlowski Tested-by: Hsin-chen Chuang Reviewed-by: Hsin-chen Chuang Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/hci_qca.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index e00590ba24fdbb..a2dc39c005f4f8 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2415,14 +2415,14 @@ static int qca_serdev_probe(struct serdev_device *serdev) qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", GPIOD_OUT_LOW); - if (IS_ERR(qcadev->bt_en) && - (data->soc_type == QCA_WCN6750 || - data->soc_type == QCA_WCN6855)) { - dev_err(&serdev->dev, "failed to acquire BT_EN gpio\n"); - return PTR_ERR(qcadev->bt_en); - } + if (IS_ERR(qcadev->bt_en)) + return dev_err_probe(&serdev->dev, + PTR_ERR(qcadev->bt_en), + "failed to acquire BT_EN gpio\n"); - if (!qcadev->bt_en) + if (!qcadev->bt_en && + (data->soc_type == QCA_WCN6750 || + data->soc_type == QCA_WCN6855)) power_ctrl_enabled = false; qcadev->sw_ctrl = devm_gpiod_get_optional(&serdev->dev, "swctrl", From abaecb2a4ad021c2f2426e9b2a9c020aef57aca9 Mon Sep 17 00:00:00 2001 From: Dustin Lundquist Date: Tue, 6 May 2025 11:18:45 -0700 Subject: [PATCH 2053/2924] serial: jsm: fix NPE during jsm_uart_port_init commit e3975aa899c0a3bbc10d035e699b142cd1373a71 upstream. No device was set which caused serial_base_ctrl_add to crash. BUG: kernel NULL pointer dereference, address: 0000000000000050 Oops: Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 16 UID: 0 PID: 368 Comm: (udev-worker) Not tainted 6.12.25-amd64 #1 Debian 6.12.25-1 RIP: 0010:serial_base_ctrl_add+0x96/0x120 Call Trace: serial_core_register_port+0x1a0/0x580 ? __setup_irq+0x39c/0x660 ? __kmalloc_cache_noprof+0x111/0x310 jsm_uart_port_init+0xe8/0x180 [jsm] jsm_probe_one+0x1f4/0x410 [jsm] local_pci_probe+0x42/0x90 pci_device_probe+0x22f/0x270 really_probe+0xdb/0x340 ? pm_runtime_barrier+0x54/0x90 ? __pfx___driver_attach+0x10/0x10 __driver_probe_device+0x78/0x110 driver_probe_device+0x1f/0xa0 __driver_attach+0xba/0x1c0 bus_for_each_dev+0x8c/0xe0 bus_add_driver+0x112/0x1f0 driver_register+0x72/0xd0 jsm_init_module+0x36/0xff0 [jsm] ? __pfx_jsm_init_module+0x10/0x10 [jsm] do_one_initcall+0x58/0x310 do_init_module+0x60/0x230 Tested with Digi Neo PCIe 8 port card. Fixes: 84a9582fd203 ("serial: core: Start managing serial controllers to enable runtime PM") Cc: stable Signed-off-by: Dustin Lundquist Link: https://lore.kernel.org/r/3f31d4f75863614655c4673027a208be78d022ec.camel@null-ptr.net Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/jsm/jsm_tty.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/jsm/jsm_tty.c b/drivers/tty/serial/jsm/jsm_tty.c index ce0fef7e2c665c..be2f130696b3a0 100644 --- a/drivers/tty/serial/jsm/jsm_tty.c +++ b/drivers/tty/serial/jsm/jsm_tty.c @@ -451,6 +451,7 @@ int jsm_uart_port_init(struct jsm_board *brd) if (!brd->channels[i]) continue; + brd->channels[i]->uart_port.dev = &brd->pci_dev->dev; brd->channels[i]->uart_port.irq = brd->irq; brd->channels[i]->uart_port.uartclk = 14745600; brd->channels[i]->uart_port.type = PORT_JSM; From 93612604f4b59e96a3f070435f12b8fd3f9274bf Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 9 May 2025 13:24:06 +0100 Subject: [PATCH 2054/2924] nvmem: rmem: select CONFIG_CRC32 commit 7a93add1d31f14e0b7e937163904dee1e864a9a8 upstream. The newly added crc checking leads to a link failure if CRC32 itself is disabled: x86_64-linux-ld: vmlinux.o: in function `rmem_eyeq5_checksum': rmem.c:(.text+0x52341b): undefined reference to `crc32_le_arch' Fixes: 7e606c311f70 ("nvmem: rmem: add CRC validation for Mobileye EyeQ5 NVMEM") Cc: stable Signed-off-by: Arnd Bergmann Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20250509122407.11763-2-srini@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvmem/Kconfig b/drivers/nvmem/Kconfig index 8671b7c974b933..eceb3cdb421ffb 100644 --- a/drivers/nvmem/Kconfig +++ b/drivers/nvmem/Kconfig @@ -260,6 +260,7 @@ config NVMEM_RCAR_EFUSE config NVMEM_RMEM tristate "Reserved Memory Based Driver Support" depends on HAS_IOMEM + select CRC32 help This driver maps reserved memory into an nvmem device. It might be useful to expose information left by firmware in memory. From 88953d793922e555480706e109b970e68aaf7c8c Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Wed, 21 May 2025 14:16:56 +0200 Subject: [PATCH 2055/2924] usb: usbtmc: Fix timeout value in get_stb commit 342e4955a1f1ce28c70a589999b76365082dbf10 upstream. wait_event_interruptible_timeout requires a timeout argument in units of jiffies. It was being called in usbtmc_get_stb with the usb timeout value which is in units of milliseconds. Pass the timeout argument converted to jiffies. Fixes: 048c6d88a021 ("usb: usbtmc: Add ioctls to set/get usb timeout") Cc: stable@vger.kernel.org Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250521121656.18174-4-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usbtmc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index 740d2d2b19fbe0..66f3d9324ba2f3 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -483,6 +483,7 @@ static int usbtmc_get_stb(struct usbtmc_file_data *file_data, __u8 *stb) u8 tag; int rv; long wait_rv; + unsigned long expire; dev_dbg(dev, "Enter ioctl_read_stb iin_ep_present: %d\n", data->iin_ep_present); @@ -512,10 +513,11 @@ static int usbtmc_get_stb(struct usbtmc_file_data *file_data, __u8 *stb) } if (data->iin_ep_present) { + expire = msecs_to_jiffies(file_data->timeout); wait_rv = wait_event_interruptible_timeout( data->waitq, atomic_read(&data->iin_data_valid) != 0, - file_data->timeout); + expire); if (wait_rv < 0) { dev_dbg(dev, "wait interrupted %ld\n", wait_rv); rv = wait_rv; From aea61a1a77613d4184d7ebe7c1d7cb606458b43b Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Sat, 17 May 2025 17:09:56 +0000 Subject: [PATCH 2056/2924] binder: fix use-after-free in binderfs_evict_inode() commit 8c0a559825281764061a127632e5ad273f0466ad upstream. Running 'stress-ng --binderfs 16 --timeout 300' under KASAN-enabled kernel, I've noticed the following: BUG: KASAN: slab-use-after-free in binderfs_evict_inode+0x1de/0x2d0 Write of size 8 at addr ffff88807379bc08 by task stress-ng-binde/1699 CPU: 0 UID: 0 PID: 1699 Comm: stress-ng-binde Not tainted 6.14.0-rc7-g586de92313fc-dirty #13 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-3.fc41 04/01/2014 Call Trace: dump_stack_lvl+0x1c2/0x2a0 ? __pfx_dump_stack_lvl+0x10/0x10 ? __pfx__printk+0x10/0x10 ? __pfx_lock_release+0x10/0x10 ? __virt_addr_valid+0x18c/0x540 ? __virt_addr_valid+0x469/0x540 print_report+0x155/0x840 ? __virt_addr_valid+0x18c/0x540 ? __virt_addr_valid+0x469/0x540 ? __phys_addr+0xba/0x170 ? binderfs_evict_inode+0x1de/0x2d0 kasan_report+0x147/0x180 ? binderfs_evict_inode+0x1de/0x2d0 binderfs_evict_inode+0x1de/0x2d0 ? __pfx_binderfs_evict_inode+0x10/0x10 evict+0x524/0x9f0 ? __pfx_lock_release+0x10/0x10 ? __pfx_evict+0x10/0x10 ? do_raw_spin_unlock+0x4d/0x210 ? _raw_spin_unlock+0x28/0x50 ? iput+0x697/0x9b0 __dentry_kill+0x209/0x660 ? shrink_kill+0x8d/0x2c0 shrink_kill+0xa9/0x2c0 shrink_dentry_list+0x2e0/0x5e0 shrink_dcache_parent+0xa2/0x2c0 ? __pfx_shrink_dcache_parent+0x10/0x10 ? __pfx_lock_release+0x10/0x10 ? __pfx_do_raw_spin_lock+0x10/0x10 do_one_tree+0x23/0xe0 shrink_dcache_for_umount+0xa0/0x170 generic_shutdown_super+0x67/0x390 kill_litter_super+0x76/0xb0 binderfs_kill_super+0x44/0x90 deactivate_locked_super+0xb9/0x130 cleanup_mnt+0x422/0x4c0 ? lockdep_hardirqs_on+0x9d/0x150 task_work_run+0x1d2/0x260 ? __pfx_task_work_run+0x10/0x10 resume_user_mode_work+0x52/0x60 syscall_exit_to_user_mode+0x9a/0x120 do_syscall_64+0x103/0x210 ? asm_sysvec_apic_timer_interrupt+0x1a/0x20 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0xcac57b Code: c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 f3 0f 1e fa 31 f6 e9 05 00 00 00 0f 1f 44 00 00 f3 0f 1e fa b8 RSP: 002b:00007ffecf4226a8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 00007ffecf422720 RCX: 0000000000cac57b RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00007ffecf422850 RBP: 00007ffecf422850 R08: 0000000028d06ab1 R09: 7fffffffffffffff R10: 3fffffffffffffff R11: 0000000000000246 R12: 00007ffecf422718 R13: 00007ffecf422710 R14: 00007f478f87b658 R15: 00007ffecf422830 Allocated by task 1705: kasan_save_track+0x3e/0x80 __kasan_kmalloc+0x8f/0xa0 __kmalloc_cache_noprof+0x213/0x3e0 binderfs_binder_device_create+0x183/0xa80 binder_ctl_ioctl+0x138/0x190 __x64_sys_ioctl+0x120/0x1b0 do_syscall_64+0xf6/0x210 entry_SYSCALL_64_after_hwframe+0x77/0x7f Freed by task 1705: kasan_save_track+0x3e/0x80 kasan_save_free_info+0x46/0x50 __kasan_slab_free+0x62/0x70 kfree+0x194/0x440 evict+0x524/0x9f0 do_unlinkat+0x390/0x5b0 __x64_sys_unlink+0x47/0x50 do_syscall_64+0xf6/0x210 entry_SYSCALL_64_after_hwframe+0x77/0x7f This 'stress-ng' workload causes the concurrent deletions from 'binder_devices' and so requires full-featured synchronization to prevent list corruption. I've found this issue independently but pretty sure that syzbot did the same, so Reported-by: and Closes: should be applicable here as well. Cc: stable@vger.kernel.org Reported-by: syzbot+353d7b75658a95aa955a@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=353d7b75658a95aa955a Fixes: e77aff5528a18 ("binderfs: fix use-after-free in binder_devices") Signed-off-by: Dmitry Antipov Acked-by: Carlos Llamas Signed-off-by: Carlos Llamas Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250517170957.1317876-1-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 15 +++++++++++++-- drivers/android/binder_internal.h | 8 ++++++-- drivers/android/binderfs.c | 2 +- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 5fc2c8ee61b19b..8d9c5f436fcafb 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -79,6 +79,8 @@ static HLIST_HEAD(binder_deferred_list); static DEFINE_MUTEX(binder_deferred_lock); static HLIST_HEAD(binder_devices); +static DEFINE_SPINLOCK(binder_devices_lock); + static HLIST_HEAD(binder_procs); static DEFINE_MUTEX(binder_procs_lock); @@ -6929,7 +6931,16 @@ const struct binder_debugfs_entry binder_debugfs_entries[] = { void binder_add_device(struct binder_device *device) { + spin_lock(&binder_devices_lock); hlist_add_head(&device->hlist, &binder_devices); + spin_unlock(&binder_devices_lock); +} + +void binder_remove_device(struct binder_device *device) +{ + spin_lock(&binder_devices_lock); + hlist_del_init(&device->hlist); + spin_unlock(&binder_devices_lock); } static int __init init_binder_device(const char *name) @@ -6956,7 +6967,7 @@ static int __init init_binder_device(const char *name) return ret; } - hlist_add_head(&binder_device->hlist, &binder_devices); + binder_add_device(binder_device); return ret; } @@ -7018,7 +7029,7 @@ static int __init binder_init(void) err_init_binder_device_failed: hlist_for_each_entry_safe(device, tmp, &binder_devices, hlist) { misc_deregister(&device->miscdev); - hlist_del(&device->hlist); + binder_remove_device(device); kfree(device); } diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h index 6a66c9769c6cd4..1ba5caf1d88d9d 100644 --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h @@ -583,9 +583,13 @@ struct binder_object { /** * Add a binder device to binder_devices * @device: the new binder device to add to the global list - * - * Not reentrant as the list is not protected by any locks */ void binder_add_device(struct binder_device *device); +/** + * Remove a binder device to binder_devices + * @device: the binder device to remove from the global list + */ +void binder_remove_device(struct binder_device *device); + #endif /* _LINUX_BINDER_INTERNAL_H */ diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 94c6446604fc95..44d430c4ebefd2 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -274,7 +274,7 @@ static void binderfs_evict_inode(struct inode *inode) mutex_unlock(&binderfs_minors_mutex); if (refcount_dec_and_test(&device->ref)) { - hlist_del_init(&device->hlist); + binder_remove_device(device); kfree(device->context.name); kfree(device); } From 72a726fb5f25fbb31d6060acfb671c1955831245 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Sat, 24 May 2025 22:07:58 +0000 Subject: [PATCH 2057/2924] binder: fix yet another UAF in binder_devices commit 9857af0fcff385c75433f2162c30c62eb912ef6d upstream. Commit e77aff5528a18 ("binderfs: fix use-after-free in binder_devices") addressed a use-after-free where devices could be released without first being removed from the binder_devices list. However, there is a similar path in binder_free_proc() that was missed: ================================================================== BUG: KASAN: slab-use-after-free in binder_remove_device+0xd4/0x100 Write of size 8 at addr ffff0000c773b900 by task umount/467 CPU: 12 UID: 0 PID: 467 Comm: umount Not tainted 6.15.0-rc7-00138-g57483a362741 #9 PREEMPT Hardware name: linux,dummy-virt (DT) Call trace: binder_remove_device+0xd4/0x100 binderfs_evict_inode+0x230/0x2f0 evict+0x25c/0x5dc iput+0x304/0x480 dentry_unlink_inode+0x208/0x46c __dentry_kill+0x154/0x530 [...] Allocated by task 463: __kmalloc_cache_noprof+0x13c/0x324 binderfs_binder_device_create.isra.0+0x138/0xa60 binder_ctl_ioctl+0x1ac/0x230 [...] Freed by task 215: kfree+0x184/0x31c binder_proc_dec_tmpref+0x33c/0x4ac binder_deferred_func+0xc10/0x1108 process_one_work+0x520/0xba4 [...] ================================================================== Call binder_remove_device() within binder_free_proc() to ensure the device is removed from the binder_devices list before being kfreed. Cc: stable@vger.kernel.org Fixes: 12d909cac1e1 ("binderfs: add new binder devices to binder_devices") Reported-by: syzbot+4af454407ec393de51d6@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=4af454407ec393de51d6 Tested-by: syzbot+4af454407ec393de51d6@syzkaller.appspotmail.com Signed-off-by: Carlos Llamas Link: https://lore.kernel.org/r/20250524220758.915028-1-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 8d9c5f436fcafb..6be0f7ac7213d1 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -5246,6 +5246,7 @@ static void binder_free_proc(struct binder_proc *proc) __func__, proc->outstanding_txns); device = container_of(proc->context, struct binder_device, context); if (refcount_dec_and_test(&device->ref)) { + binder_remove_device(device); kfree(proc->context->name); kfree(device); } From cdb4feab2f39e75a66239e3a112beced279612a8 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Fri, 28 Mar 2025 00:03:50 +0900 Subject: [PATCH 2058/2924] thunderbolt: Do not double dequeue a configuration request commit 0f73628e9da1ee39daf5f188190cdbaee5e0c98c upstream. Some of our devices crash in tb_cfg_request_dequeue(): general protection fault, probably for non-canonical address 0xdead000000000122 CPU: 6 PID: 91007 Comm: kworker/6:2 Tainted: G U W 6.6.65 RIP: 0010:tb_cfg_request_dequeue+0x2d/0xa0 Call Trace: ? tb_cfg_request_dequeue+0x2d/0xa0 tb_cfg_request_work+0x33/0x80 worker_thread+0x386/0x8f0 kthread+0xed/0x110 ret_from_fork+0x38/0x50 ret_from_fork_asm+0x1b/0x30 The circumstances are unclear, however, the theory is that tb_cfg_request_work() can be scheduled twice for a request: first time via frame.callback from ring_work() and second time from tb_cfg_request(). Both times kworkers will execute tb_cfg_request_dequeue(), which results in double list_del() from the ctl->request_queue (the list poison deference hints at it: 0xdead000000000122). Do not dequeue requests that don't have TB_CFG_REQUEST_ACTIVE bit set. Signed-off-by: Sergey Senozhatsky Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/ctl.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/thunderbolt/ctl.c b/drivers/thunderbolt/ctl.c index cd15e84c47f475..1db2e951b53fac 100644 --- a/drivers/thunderbolt/ctl.c +++ b/drivers/thunderbolt/ctl.c @@ -151,6 +151,11 @@ static void tb_cfg_request_dequeue(struct tb_cfg_request *req) struct tb_ctl *ctl = req->ctl; mutex_lock(&ctl->request_queue_lock); + if (!test_bit(TB_CFG_REQUEST_ACTIVE, &req->flags)) { + mutex_unlock(&ctl->request_queue_lock); + return; + } + list_del(&req->list); clear_bit(TB_CFG_REQUEST_ACTIVE, &req->flags); if (test_bit(TB_CFG_REQUEST_CANCELED, &req->flags)) From 5628b99316bcb5920a8b8e4df4b90b9d8fa171b4 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Thu, 29 May 2025 11:53:19 -0500 Subject: [PATCH 2059/2924] dt-bindings: pwm: adi,axi-pwmgen: Fix clocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e683131e64f71e957ca77743cb3d313646157329 upstream. Fix a shortcoming in the bindings that doesn't allow for a separate external clock. The AXI PWMGEN IP block has a compile option ASYNC_CLK_EN that allows the use of an external clock for the PWM output separate from the AXI clock that runs the peripheral. This was missed in the original bindings and so users were writing dts files where the one and only clock specified would be the external clock, if there was one, incorrectly missing the separate AXI clock. The correct bindings are that the AXI clock is always required and the external clock is optional (must be given only when HDL compile option ASYNC_CLK_EN=1). Fixes: 1edf2c2a2841 ("dt-bindings: pwm: Add AXI PWM generator") Cc: stable@vger.kernel.org Signed-off-by: David Lechner Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250529-pwm-axi-pwmgen-add-external-clock-v3-2-5d8809a7da91@baylibre.com Signed-off-by: Uwe Kleine-König Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/pwm/adi,axi-pwmgen.yaml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/pwm/adi,axi-pwmgen.yaml b/Documentation/devicetree/bindings/pwm/adi,axi-pwmgen.yaml index 45e112d0efb466..5575c58357d6e7 100644 --- a/Documentation/devicetree/bindings/pwm/adi,axi-pwmgen.yaml +++ b/Documentation/devicetree/bindings/pwm/adi,axi-pwmgen.yaml @@ -30,11 +30,19 @@ properties: const: 3 clocks: - maxItems: 1 + minItems: 1 + maxItems: 2 + + clock-names: + minItems: 1 + items: + - const: axi + - const: ext required: - reg - clocks + - clock-names unevaluatedProperties: false @@ -43,6 +51,7 @@ examples: pwm@44b00000 { compatible = "adi,axi-pwmgen-2.00.a"; reg = <0x44b00000 0x1000>; - clocks = <&spi_clk>; + clocks = <&fpga_clk>, <&spi_clk>; + clock-names = "axi", "ext"; #pwm-cells = <3>; }; From 5a25a9fe6ae72e6c88f926463dab95cd764fb771 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 28 Apr 2025 09:52:44 +0200 Subject: [PATCH 2060/2924] dt-bindings: remoteproc: qcom,sm8150-pas: Add missing SC8180X compatible commit b278981b5ac109e6f6986b20a5cb19654aba8f68 upstream. Commit 4b4ab93ddc5f ("dt-bindings: remoteproc: Consolidate SC8180X and SM8150 PAS files") moved SC8180X bindings from separate file into this one, but it forgot to add actual compatibles in top-level properties section making the entire binding un-selectable (no-op) for SC8180X PAS. Fixes: 4b4ab93ddc5f ("dt-bindings: remoteproc: Consolidate SC8180X and SM8150 PAS files") Cc: stable@vger.kernel.org Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250428075243.44256-2-krzysztof.kozlowski@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/remoteproc/qcom,sm8150-pas.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,sm8150-pas.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,sm8150-pas.yaml index 56ff6386534ddf..5dcc2a32c08004 100644 --- a/Documentation/devicetree/bindings/remoteproc/qcom,sm8150-pas.yaml +++ b/Documentation/devicetree/bindings/remoteproc/qcom,sm8150-pas.yaml @@ -16,6 +16,9 @@ description: properties: compatible: enum: + - qcom,sc8180x-adsp-pas + - qcom,sc8180x-cdsp-pas + - qcom,sc8180x-slpi-pas - qcom,sm8150-adsp-pas - qcom,sm8150-cdsp-pas - qcom,sm8150-mpss-pas From 80671c454a2974a80f73eb14f021dbcde021a16e Mon Sep 17 00:00:00 2001 From: Lukasz Czechowski Date: Fri, 25 Apr 2025 17:18:07 +0200 Subject: [PATCH 2061/2924] dt-bindings: usb: cypress,hx3: Add support for all variants commit 1ad4b5a7de16806afc1aeaf012337e62af04e001 upstream. The Cypress HX3 hubs use different default PID value depending on the variant. Update compatibles list. Becasuse all hub variants use the same driver data, allow the dt node to have two compatibles: leftmost which matches the HW exactly, and the second one as fallback. Fixes: 1eca51f58a10 ("dt-bindings: usb: Add binding for Cypress HX3 USB 3.0 family") Cc: stable@vger.kernel.org # 6.6 Cc: stable@vger.kernel.org # Backport of the patch ("dt-bindings: usb: usb-device: relax compatible pattern to a contains") from list: https://lore.kernel.org/linux-usb/20250418-dt-binding-usb-device-compatibles-v2-1-b3029f14e800@cherry.de/ Cc: stable@vger.kernel.org # Backport of the patch in this series fixing product ID in onboard_dev_id_table in drivers/usb/misc/onboard_usb_dev.c driver Signed-off-by: Lukasz Czechowski Reviewed-by: "Rob Herring (Arm)" Link: https://lore.kernel.org/r/20250425-onboard_usb_dev-v2-2-4a76a474a010@thaumatec.com [taken with Greg's blessing] Signed-off-by: Heiko Stuebner Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/usb/cypress,hx3.yaml | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/usb/cypress,hx3.yaml b/Documentation/devicetree/bindings/usb/cypress,hx3.yaml index 1033b7a4b8f953..d6eac1213228d2 100644 --- a/Documentation/devicetree/bindings/usb/cypress,hx3.yaml +++ b/Documentation/devicetree/bindings/usb/cypress,hx3.yaml @@ -14,9 +14,22 @@ allOf: properties: compatible: - enum: - - usb4b4,6504 - - usb4b4,6506 + oneOf: + - enum: + - usb4b4,6504 + - usb4b4,6506 + - items: + - enum: + - usb4b4,6500 + - usb4b4,6508 + - const: usb4b4,6504 + - items: + - enum: + - usb4b4,6502 + - usb4b4,6503 + - usb4b4,6507 + - usb4b4,650a + - const: usb4b4,6506 reg: true From b0814b391f44c25b86fd7c6cb0247b4ce7d98fc4 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Wed, 30 Apr 2025 17:44:59 +0800 Subject: [PATCH 2062/2924] dt-bindings: phy: imx8mq-usb: fix fsl,phy-tx-vboost-level-microvolt property commit 5b3a91b207c00a8d27f75ce8aaa9860844da72c8 upstream. The ticket TKT0676370 shows the description of TX_VBOOST_LVL is wrong in register PHY_CTRL3 bit[31:29]. 011: Corresponds to a launch amplitude of 1.12 V. 010: Corresponds to a launch amplitude of 1.04 V. 000: Corresponds to a launch amplitude of 0.88 V. After updated: 011: Corresponds to a launch amplitude of 0.844 V. 100: Corresponds to a launch amplitude of 1.008 V. 101: Corresponds to a launch amplitude of 1.156 V. This will correct it accordingly. Fixes: b2e75563dc39 ("dt-bindings: phy: imx8mq-usb: add phy tuning properties") Cc: stable@vger.kernel.org Reviewed-by: Jun Li Signed-off-by: Xu Yang Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250430094502.2723983-1-xu.yang_2@nxp.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml b/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml index daee0c0fc91539..c468207eb95168 100644 --- a/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml +++ b/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml @@ -63,8 +63,7 @@ properties: fsl,phy-tx-vboost-level-microvolt: description: Adjust the boosted transmit launch pk-pk differential amplitude - minimum: 880 - maximum: 1120 + enum: [844, 1008, 1156] fsl,phy-comp-dis-tune-percent: description: From c1c52720bb0fab4fe312de79080b90443289da58 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Wed, 21 May 2025 16:05:39 -0400 Subject: [PATCH 2063/2924] Revert "drm/amd/display: more liberal vmin/vmax update for freesync" commit 1b824eef269db44d068bbc0de74c94a8e8f9ce02 upstream. This reverts commit cfb2d41831ee5647a4ae0ea7c24971a92d5dfa0d since it causes regressions on certain configs. Revert until the issue can be isolated and debugged. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4238 Signed-off-by: Aurabindo Pillai Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a187cdb43e7e1d..0fe8bd19ecd13e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -675,21 +675,15 @@ static void dm_crtc_high_irq(void *interrupt_params) spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags); if (acrtc->dm_irq_params.stream && - acrtc->dm_irq_params.vrr_params.supported) { - bool replay_en = acrtc->dm_irq_params.stream->link->replay_settings.replay_feature_enabled; - bool psr_en = acrtc->dm_irq_params.stream->link->psr_settings.psr_feature_enabled; - bool fs_active_var_en = acrtc->dm_irq_params.freesync_config.state == VRR_STATE_ACTIVE_VARIABLE; - + acrtc->dm_irq_params.vrr_params.supported && + acrtc->dm_irq_params.freesync_config.state == + VRR_STATE_ACTIVE_VARIABLE) { mod_freesync_handle_v_update(adev->dm.freesync_module, acrtc->dm_irq_params.stream, &acrtc->dm_irq_params.vrr_params); - /* update vmin_vmax only if freesync is enabled, or only if PSR and REPLAY are disabled */ - if (fs_active_var_en || (!fs_active_var_en && !replay_en && !psr_en)) { - dc_stream_adjust_vmin_vmax(adev->dm.dc, - acrtc->dm_irq_params.stream, - &acrtc->dm_irq_params.vrr_params.adjust); - } + dc_stream_adjust_vmin_vmax(adev->dm.dc, acrtc->dm_irq_params.stream, + &acrtc->dm_irq_params.vrr_params.adjust); } /* From fc85704c3dae5ac1cb3c94045727241cd72871ff Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 10 Jun 2025 07:17:11 -0400 Subject: [PATCH 2064/2924] Linux 6.15.2 Link: https://lore.kernel.org/r/20250607100719.711372213@linuxfoundation.org Tested-by: Christian Heusel Tested-by: Ronald Warsow Tested-by: Salvatore Bonaccorso Tested-by: Takeshi Ogasawara Tested-by: Luna Jernberg Tested-by: Ron Economos Tested-by: Linux Kernel Functional Testing Tested-by: Peter Schneider Tested-by: Mark Brown Tested-by: Jon Hunter Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 61d69a3fc827cc..7138d1fabfa4ae 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 15 -SUBLEVEL = 1 +SUBLEVEL = 2 EXTRAVERSION = NAME = Baby Opossum Posse From 3931f9c8f830901bb12a94691c28d23a79fb55cc Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 15:20:22 +0100 Subject: [PATCH 2065/2924] tools/x86/kcpuid: Fix error handling [ Upstream commit 116edfe173d0c59ec2aa87fb91f2f31d477b61b3 ] Error handling in kcpuid is unreliable. On malloc() failures, the code prints an error then just goes on. The error messages are also printed to standard output instead of standard error. Use err() and errx() from to direct all error messages to standard error and automatically exit the program. Use err() to include the errno information, and errx() otherwise. Use warnx() for warnings. While at it, alphabetically reorder the header includes. [ mingo: Fix capitalization in the help text while at it. ] Fixes: c6b2f240bf8d ("tools/x86: Add a kcpuid tool to show raw CPU features") Reported-by: Remington Brasga Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Link: https://lore.kernel.org/r/20250324142042.29010-2-darwi@linutronix.de Closes: https://lkml.kernel.org/r/20240926223557.2048-1-rbrasga@uci.edu Signed-off-by: Sasha Levin --- tools/arch/x86/kcpuid/kcpuid.c | 47 +++++++++++++++++----------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c index 1b25c0a95d3f9a..40a9e59c2fd568 100644 --- a/tools/arch/x86/kcpuid/kcpuid.c +++ b/tools/arch/x86/kcpuid/kcpuid.c @@ -1,11 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 #define _GNU_SOURCE -#include +#include +#include #include +#include #include #include -#include #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define min(a, b) (((a) < (b)) ? (a) : (b)) @@ -145,14 +146,14 @@ static bool cpuid_store(struct cpuid_range *range, u32 f, int subleaf, if (!func->leafs) { func->leafs = malloc(sizeof(struct subleaf)); if (!func->leafs) - perror("malloc func leaf"); + err(EXIT_FAILURE, NULL); func->nr = 1; } else { s = func->nr; func->leafs = realloc(func->leafs, (s + 1) * sizeof(*leaf)); if (!func->leafs) - perror("realloc f->leafs"); + err(EXIT_FAILURE, NULL); func->nr++; } @@ -211,7 +212,7 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax) range = malloc(sizeof(struct cpuid_range)); if (!range) - perror("malloc range"); + err(EXIT_FAILURE, NULL); if (input_eax & 0x80000000) range->is_ext = true; @@ -220,7 +221,7 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax) range->funcs = malloc(sizeof(struct cpuid_func) * idx_func); if (!range->funcs) - perror("malloc range->funcs"); + err(EXIT_FAILURE, NULL); range->nr = idx_func; memset(range->funcs, 0, sizeof(struct cpuid_func) * idx_func); @@ -395,8 +396,8 @@ static int parse_line(char *line) return 0; err_exit: - printf("Warning: wrong line format:\n"); - printf("\tline[%d]: %s\n", flines, line); + warnx("Wrong line format:\n" + "\tline[%d]: %s", flines, line); return -1; } @@ -418,10 +419,8 @@ static void parse_text(void) file = fopen("./cpuid.csv", "r"); } - if (!file) { - printf("Fail to open '%s'\n", filename); - return; - } + if (!file) + err(EXIT_FAILURE, "%s", filename); while (1) { ret = getline(&line, &len, file); @@ -530,7 +529,7 @@ static inline struct cpuid_func *index_to_func(u32 index) func_idx = index & 0xffff; if ((func_idx + 1) > (u32)range->nr) { - printf("ERR: invalid input index (0x%x)\n", index); + warnx("Invalid input index (0x%x)", index); return NULL; } return &range->funcs[func_idx]; @@ -562,7 +561,7 @@ static void show_info(void) return; } - printf("ERR: invalid input subleaf (0x%x)\n", user_sub); + warnx("Invalid input subleaf (0x%x)", user_sub); } show_func(func); @@ -593,15 +592,15 @@ static void setup_platform_cpuid(void) static void usage(void) { - printf("kcpuid [-abdfhr] [-l leaf] [-s subleaf]\n" - "\t-a|--all Show both bit flags and complex bit fields info\n" - "\t-b|--bitflags Show boolean flags only\n" - "\t-d|--detail Show details of the flag/fields (default)\n" - "\t-f|--flags Specify the cpuid csv file\n" - "\t-h|--help Show usage info\n" - "\t-l|--leaf=index Specify the leaf you want to check\n" - "\t-r|--raw Show raw cpuid data\n" - "\t-s|--subleaf=sub Specify the subleaf you want to check\n" + warnx("kcpuid [-abdfhr] [-l leaf] [-s subleaf]\n" + "\t-a|--all Show both bit flags and complex bit fields info\n" + "\t-b|--bitflags Show boolean flags only\n" + "\t-d|--detail Show details of the flag/fields (default)\n" + "\t-f|--flags Specify the CPUID CSV file\n" + "\t-h|--help Show usage info\n" + "\t-l|--leaf=index Specify the leaf you want to check\n" + "\t-r|--raw Show raw CPUID data\n" + "\t-s|--subleaf=sub Specify the subleaf you want to check" ); } @@ -652,7 +651,7 @@ static int parse_options(int argc, char *argv[]) user_sub = strtoul(optarg, NULL, 0); break; default: - printf("%s: Invalid option '%c'\n", argv[0], optopt); + warnx("Invalid option '%c'", optopt); return -1; } From cc1e608ab5d78f4ab1081575923618b98033b65d Mon Sep 17 00:00:00 2001 From: Andrew Cooper Date: Wed, 2 Apr 2025 18:24:58 +0100 Subject: [PATCH 2066/2924] x86/idle: Remove MFENCEs for X86_BUG_CLFLUSH_MONITOR in mwait_idle_with_hints() and prefer_mwait_c1_over_halt() [ Upstream commit 1f13c60d84e880df6698441026e64f84c7110c49 ] The following commit, 12 years ago: 7e98b7192046 ("x86, idle: Use static_cpu_has() for CLFLUSH workaround, add barriers") added barriers around the CLFLUSH in mwait_idle_with_hints(), justified with: ... and add memory barriers around it since the documentation is explicit that CLFLUSH is only ordered with respect to MFENCE. This also triggered, 11 years ago, the same adjustment in: f8e617f45829 ("sched/idle/x86: Optimize unnecessary mwait_idle() resched IPIs") during development, although it failed to get the static_cpu_has_bug() treatment. X86_BUG_CLFLUSH_MONITOR (a.k.a the AAI65 errata) is specific to Intel CPUs, and the SDM currently states: Executions of the CLFLUSH instruction are ordered with respect to each other and with respect to writes, locked read-modify-write instructions, and fence instructions[1]. With footnote 1 reading: Earlier versions of this manual specified that executions of the CLFLUSH instruction were ordered only by the MFENCE instruction. All processors implementing the CLFLUSH instruction also order it relative to the other operations enumerated above. i.e. The SDM was incorrect at the time, and barriers should not have been inserted. Double checking the original AAI65 errata (not available from intel.com any more) shows no mention of barriers either. Note: If this were a general codepath, the MFENCEs would be needed, because AMD CPUs of the same vintage do sport otherwise-unordered CLFLUSHs. Remove the unnecessary barriers. Furthermore, use a plain alternative(), rather than static_cpu_has_bug() and/or no optimisation. The workaround is a single instruction. Use an explicit %rax pointer rather than a general memory operand, because MONITOR takes the pointer implicitly in the same way. [ mingo: Cleaned up the commit a bit. ] Fixes: 7e98b7192046 ("x86, idle: Use static_cpu_has() for CLFLUSH workaround, add barriers") Signed-off-by: Andrew Cooper Signed-off-by: Ingo Molnar Acked-by: Dave Hansen Acked-by: Borislav Petkov (AMD) Cc: "H. Peter Anvin" Cc: Peter Zijlstra Cc: Rik van Riel Cc: Linus Torvalds Cc: Andy Lutomirski Cc: Brian Gerst Cc: Juergen Gross Cc: Rafael J. Wysocki Link: https://lore.kernel.org/r/20250402172458.1378112-1-andrew.cooper3@citrix.com Signed-off-by: Sasha Levin --- arch/x86/include/asm/mwait.h | 9 +++------ arch/x86/kernel/process.c | 9 +++------ 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index ce857ef54cf158..54dc313bcdf018 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -116,13 +116,10 @@ static __always_inline void __sti_mwait(unsigned long eax, unsigned long ecx) static __always_inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) { if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) { - if (static_cpu_has_bug(X86_BUG_CLFLUSH_MONITOR)) { - mb(); - clflush((void *)¤t_thread_info()->flags); - mb(); - } + const void *addr = ¤t_thread_info()->flags; - __monitor((void *)¤t_thread_info()->flags, 0, 0); + alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr)); + __monitor(addr, 0, 0); if (!need_resched()) { if (ecx & 1) { diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 962c3ce39323e7..12948380cdd06c 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -907,13 +907,10 @@ static __init bool prefer_mwait_c1_over_halt(void) static __cpuidle void mwait_idle(void) { if (!current_set_polling_and_test()) { - if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) { - mb(); /* quirk */ - clflush((void *)¤t_thread_info()->flags); - mb(); /* quirk */ - } + const void *addr = ¤t_thread_info()->flags; - __monitor((void *)¤t_thread_info()->flags, 0, 0); + alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr)); + __monitor(addr, 0, 0); if (!need_resched()) { __sti_mwait(0, 0); raw_local_irq_disable(); From 5b5c635e1a0a9d94ff09a509f886904fc5a2c77a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 24 Mar 2025 12:04:18 +0800 Subject: [PATCH 2067/2924] crypto: iaa - Do not clobber req->base.data [ Upstream commit cc98d8ce934b99789d30421957fd6a20fffb1c22 ] The req->base.data field is for the user and must not be touched by the driver, unless you save it first. The iaa driver doesn't seem to be using the req->base.data value so just remove the assignment. Fixes: 09646c98d0bf ("crypto: iaa - Add irq support for the crypto async interface") Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/intel/iaa/iaa_crypto_main.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c index 09d9589f2d681d..33a285981dfd45 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -1187,8 +1187,7 @@ static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req, " src_addr %llx, dst_addr %llx\n", __func__, active_compression_mode->name, src_addr, dst_addr); - } else if (ctx->async_mode) - req->base.data = idxd_desc; + } dev_dbg(dev, "%s: compression mode %s," " desc->src1_addr %llx, desc->src1_size %d," @@ -1425,8 +1424,7 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req, " src_addr %llx, dst_addr %llx\n", __func__, active_compression_mode->name, src_addr, dst_addr); - } else if (ctx->async_mode && !disable_async) - req->base.data = idxd_desc; + } dev_dbg(dev, "%s: decompression mode %s," " desc->src1_addr %llx, desc->src1_size %d," From 9fb9eb996d726df69a9e7eb144df648000e89963 Mon Sep 17 00:00:00 2001 From: Ovidiu Panait Date: Tue, 1 Apr 2025 22:23:16 +0300 Subject: [PATCH 2068/2924] crypto: sun8i-ce-hash - fix error handling in sun8i_ce_hash_run() [ Upstream commit ea4dd134ef332bd9e3e734c1ba0a1521f436b678 ] Rework error handling in sun8i_ce_hash_run() to unmap the dma buffers in case of failure. Currently, the dma unmap functions are not called if the function errors out at various points. Fixes: 56f6d5aee88d1 ("crypto: sun8i-ce - support hash algorithms") Signed-off-by: Ovidiu Panait Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- .../crypto/allwinner/sun8i-ce/sun8i-ce-hash.c | 34 ++++++++++++------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c index 6072dd9f390b40..3f9d79ea01aaa6 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c @@ -343,9 +343,8 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) u32 common; u64 byte_count; __le32 *bf; - void *buf = NULL; + void *buf, *result; int j, i, todo; - void *result = NULL; u64 bs; int digestsize; dma_addr_t addr_res, addr_pad; @@ -365,14 +364,14 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) buf = kcalloc(2, bs, GFP_KERNEL | GFP_DMA); if (!buf) { err = -ENOMEM; - goto theend; + goto err_out; } bf = (__le32 *)buf; result = kzalloc(digestsize, GFP_KERNEL | GFP_DMA); if (!result) { err = -ENOMEM; - goto theend; + goto err_free_buf; } flow = rctx->flow; @@ -398,7 +397,7 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) if (nr_sgs <= 0 || nr_sgs > MAX_SG) { dev_err(ce->dev, "Invalid sg number %d\n", nr_sgs); err = -EINVAL; - goto theend; + goto err_free_result; } len = areq->nbytes; @@ -411,7 +410,7 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) if (len > 0) { dev_err(ce->dev, "remaining len %d\n", len); err = -EINVAL; - goto theend; + goto err_unmap_src; } addr_res = dma_map_single(ce->dev, result, digestsize, DMA_FROM_DEVICE); cet->t_dst[0].addr = desc_addr_val_le32(ce, addr_res); @@ -419,7 +418,7 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) if (dma_mapping_error(ce->dev, addr_res)) { dev_err(ce->dev, "DMA map dest\n"); err = -EINVAL; - goto theend; + goto err_unmap_src; } byte_count = areq->nbytes; @@ -441,7 +440,7 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) } if (!j) { err = -EINVAL; - goto theend; + goto err_unmap_result; } addr_pad = dma_map_single(ce->dev, buf, j * 4, DMA_TO_DEVICE); @@ -450,7 +449,7 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) if (dma_mapping_error(ce->dev, addr_pad)) { dev_err(ce->dev, "DMA error on padding SG\n"); err = -EINVAL; - goto theend; + goto err_unmap_result; } if (ce->variant->hash_t_dlen_in_bits) @@ -463,16 +462,25 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) err = sun8i_ce_run_task(ce, flow, crypto_ahash_alg_name(tfm)); dma_unmap_single(ce->dev, addr_pad, j * 4, DMA_TO_DEVICE); - dma_unmap_sg(ce->dev, areq->src, ns, DMA_TO_DEVICE); + +err_unmap_result: dma_unmap_single(ce->dev, addr_res, digestsize, DMA_FROM_DEVICE); + if (!err) + memcpy(areq->result, result, algt->alg.hash.base.halg.digestsize); +err_unmap_src: + dma_unmap_sg(ce->dev, areq->src, ns, DMA_TO_DEVICE); - memcpy(areq->result, result, algt->alg.hash.base.halg.digestsize); -theend: - kfree(buf); +err_free_result: kfree(result); + +err_free_buf: + kfree(buf); + +err_out: local_bh_disable(); crypto_finalize_hash_request(engine, breq, err); local_bh_enable(); + return 0; } From e594f9ceef47d0888379d9d41f8236871988fbd0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 19 Mar 2025 22:23:23 +0100 Subject: [PATCH 2069/2924] sched: Fix trace_sched_switch(.prev_state) [ Upstream commit 8feb053d53194382fcfb68231296fdc220497ea6 ] Gabriele noted that in case of signal_pending_state(), the tracepoint sees a stale task-state. Fixes: fa2c3254d7cf ("sched/tracing: Don't re-read p->state when emitting sched_switch event") Reported-by: Gabriele Monaco Signed-off-by: Peter Zijlstra (Intel) Cc: Valentin Schneider Signed-off-by: Sasha Levin --- kernel/sched/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c81cf642dba055..36b34e68845873 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6571,12 +6571,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) * Otherwise marks the task's __state as RUNNING */ static bool try_to_block_task(struct rq *rq, struct task_struct *p, - unsigned long task_state) + unsigned long *task_state_p) { + unsigned long task_state = *task_state_p; int flags = DEQUEUE_NOCLOCK; if (signal_pending_state(task_state, p)) { WRITE_ONCE(p->__state, TASK_RUNNING); + *task_state_p = TASK_RUNNING; return false; } @@ -6713,7 +6715,7 @@ static void __sched notrace __schedule(int sched_mode) goto picked; } } else if (!preempt && prev_state) { - try_to_block_task(rq, prev, prev_state); + try_to_block_task(rq, prev, &prev_state); switch_count = &prev->nvcsw; } From e8bb8220006871b00adc125238094a2451cfde7a Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Mon, 7 Apr 2025 21:32:41 +0200 Subject: [PATCH 2070/2924] crypto: ecdsa - Fix enc/dec size reported by KEYCTL_PKEY_QUERY [ Upstream commit 3828485e1c7b111290122ab6e083c2a37132b5c2 ] KEYCTL_PKEY_QUERY system calls for ecdsa keys return the key size as max_enc_size and max_dec_size, even though such keys cannot be used for encryption/decryption. They're exclusively for signature generation or verification. Only rsa keys with pkcs1 encoding can also be used for encryption or decryption. Return 0 instead for ecdsa keys (as well as ecrdsa keys). Signed-off-by: Lukas Wunner Reviewed-by: Stefan Berger Reviewed-by: Ignat Korchagin Signed-off-by: Herbert Xu Stable-dep-of: 6b7f9397c98c ("crypto: ecdsa - Fix NIST P521 key size reported by KEYCTL_PKEY_QUERY") Signed-off-by: Sasha Levin --- crypto/asymmetric_keys/public_key.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index bf165d321440d5..dd44a966947fb8 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -188,6 +188,8 @@ static int software_key_query(const struct kernel_pkey_params *params, ptr = pkey_pack_u32(ptr, pkey->paramlen); memcpy(ptr, pkey->params, pkey->paramlen); + memset(info, 0, sizeof(*info)); + if (issig) { sig = crypto_alloc_sig(alg_name, 0, 0); if (IS_ERR(sig)) { @@ -211,6 +213,9 @@ static int software_key_query(const struct kernel_pkey_params *params, info->supported_ops |= KEYCTL_SUPPORTS_SIGN; if (strcmp(params->encoding, "pkcs1") == 0) { + info->max_enc_size = len; + info->max_dec_size = len; + info->supported_ops |= KEYCTL_SUPPORTS_ENCRYPT; if (pkey->key_is_private) info->supported_ops |= KEYCTL_SUPPORTS_DECRYPT; @@ -232,6 +237,8 @@ static int software_key_query(const struct kernel_pkey_params *params, len = crypto_akcipher_maxsize(tfm); info->max_sig_size = len; info->max_data_size = len; + info->max_enc_size = len; + info->max_dec_size = len; info->supported_ops = KEYCTL_SUPPORTS_ENCRYPT; if (pkey->key_is_private) @@ -239,8 +246,6 @@ static int software_key_query(const struct kernel_pkey_params *params, } info->key_size = len * 8; - info->max_enc_size = len; - info->max_dec_size = len; ret = 0; From 40466ce607897f92a910ec2af67b3a543e0f4e2b Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Mon, 7 Apr 2025 21:32:42 +0200 Subject: [PATCH 2071/2924] crypto: ecdsa - Fix NIST P521 key size reported by KEYCTL_PKEY_QUERY [ Upstream commit 6b7f9397c98c72902f9364056413c73fe6dee1d8 ] When user space issues a KEYCTL_PKEY_QUERY system call for a NIST P521 key, the key_size is incorrectly reported as 528 bits instead of 521. That's because the key size obtained through crypto_sig_keysize() is in bytes and software_key_query() multiplies by 8 to yield the size in bits. The underlying assumption is that the key size is always a multiple of 8. With the recent addition of NIST P521, that's no longer the case. Fix by returning the key_size in bits from crypto_sig_keysize() and adjusting the calculations in software_key_query(). The ->key_size() callbacks of sig_alg algorithms now return the size in bits, whereas the ->digest_size() and ->max_size() callbacks return the size in bytes. This matches with the units in struct keyctl_pkey_query. Fixes: a7d45ba77d3d ("crypto: ecdsa - Register NIST P521 and extend test suite") Signed-off-by: Lukas Wunner Reviewed-by: Stefan Berger Reviewed-by: Ignat Korchagin Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/asymmetric_keys/public_key.c | 8 ++++---- crypto/ecdsa-p1363.c | 6 ++++-- crypto/ecdsa-x962.c | 5 +++-- crypto/ecdsa.c | 2 +- crypto/ecrdsa.c | 2 +- crypto/rsassa-pkcs1.c | 2 +- crypto/sig.c | 9 +++++++-- include/crypto/sig.h | 2 +- 8 files changed, 22 insertions(+), 14 deletions(-) diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index dd44a966947fb8..89dc887d2c5c7e 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -205,6 +205,7 @@ static int software_key_query(const struct kernel_pkey_params *params, goto error_free_tfm; len = crypto_sig_keysize(sig); + info->key_size = len; info->max_sig_size = crypto_sig_maxsize(sig); info->max_data_size = crypto_sig_digestsize(sig); @@ -213,8 +214,8 @@ static int software_key_query(const struct kernel_pkey_params *params, info->supported_ops |= KEYCTL_SUPPORTS_SIGN; if (strcmp(params->encoding, "pkcs1") == 0) { - info->max_enc_size = len; - info->max_dec_size = len; + info->max_enc_size = len / BITS_PER_BYTE; + info->max_dec_size = len / BITS_PER_BYTE; info->supported_ops |= KEYCTL_SUPPORTS_ENCRYPT; if (pkey->key_is_private) @@ -235,6 +236,7 @@ static int software_key_query(const struct kernel_pkey_params *params, goto error_free_tfm; len = crypto_akcipher_maxsize(tfm); + info->key_size = len * BITS_PER_BYTE; info->max_sig_size = len; info->max_data_size = len; info->max_enc_size = len; @@ -245,8 +247,6 @@ static int software_key_query(const struct kernel_pkey_params *params, info->supported_ops |= KEYCTL_SUPPORTS_DECRYPT; } - info->key_size = len * 8; - ret = 0; error_free_tfm: diff --git a/crypto/ecdsa-p1363.c b/crypto/ecdsa-p1363.c index 4454f1f8f33f58..e0c55c64711c83 100644 --- a/crypto/ecdsa-p1363.c +++ b/crypto/ecdsa-p1363.c @@ -21,7 +21,8 @@ static int ecdsa_p1363_verify(struct crypto_sig *tfm, const void *digest, unsigned int dlen) { struct ecdsa_p1363_ctx *ctx = crypto_sig_ctx(tfm); - unsigned int keylen = crypto_sig_keysize(ctx->child); + unsigned int keylen = DIV_ROUND_UP_POW2(crypto_sig_keysize(ctx->child), + BITS_PER_BYTE); unsigned int ndigits = DIV_ROUND_UP_POW2(keylen, sizeof(u64)); struct ecdsa_raw_sig sig; @@ -45,7 +46,8 @@ static unsigned int ecdsa_p1363_max_size(struct crypto_sig *tfm) { struct ecdsa_p1363_ctx *ctx = crypto_sig_ctx(tfm); - return 2 * crypto_sig_keysize(ctx->child); + return 2 * DIV_ROUND_UP_POW2(crypto_sig_keysize(ctx->child), + BITS_PER_BYTE); } static unsigned int ecdsa_p1363_digest_size(struct crypto_sig *tfm) diff --git a/crypto/ecdsa-x962.c b/crypto/ecdsa-x962.c index 90a04f4b9a2f55..ee71594d10a069 100644 --- a/crypto/ecdsa-x962.c +++ b/crypto/ecdsa-x962.c @@ -82,7 +82,7 @@ static int ecdsa_x962_verify(struct crypto_sig *tfm, int err; sig_ctx.ndigits = DIV_ROUND_UP_POW2(crypto_sig_keysize(ctx->child), - sizeof(u64)); + sizeof(u64) * BITS_PER_BYTE); err = asn1_ber_decoder(&ecdsasignature_decoder, &sig_ctx, src, slen); if (err < 0) @@ -103,7 +103,8 @@ static unsigned int ecdsa_x962_max_size(struct crypto_sig *tfm) { struct ecdsa_x962_ctx *ctx = crypto_sig_ctx(tfm); struct sig_alg *alg = crypto_sig_alg(ctx->child); - int slen = crypto_sig_keysize(ctx->child); + int slen = DIV_ROUND_UP_POW2(crypto_sig_keysize(ctx->child), + BITS_PER_BYTE); /* * Verify takes ECDSA-Sig-Value (described in RFC 5480) as input, diff --git a/crypto/ecdsa.c b/crypto/ecdsa.c index 117526d15ddebf..a70b60a90a3c76 100644 --- a/crypto/ecdsa.c +++ b/crypto/ecdsa.c @@ -167,7 +167,7 @@ static unsigned int ecdsa_key_size(struct crypto_sig *tfm) { struct ecc_ctx *ctx = crypto_sig_ctx(tfm); - return DIV_ROUND_UP(ctx->curve->nbits, 8); + return ctx->curve->nbits; } static unsigned int ecdsa_digest_size(struct crypto_sig *tfm) diff --git a/crypto/ecrdsa.c b/crypto/ecrdsa.c index b3dd8a3ddeb796..2c0602f0cd406f 100644 --- a/crypto/ecrdsa.c +++ b/crypto/ecrdsa.c @@ -249,7 +249,7 @@ static unsigned int ecrdsa_key_size(struct crypto_sig *tfm) * Verify doesn't need any output, so it's just informational * for keyctl to determine the key bit size. */ - return ctx->pub_key.ndigits * sizeof(u64); + return ctx->pub_key.ndigits * sizeof(u64) * BITS_PER_BYTE; } static unsigned int ecrdsa_max_size(struct crypto_sig *tfm) diff --git a/crypto/rsassa-pkcs1.c b/crypto/rsassa-pkcs1.c index d01ac75635e008..94fa5e9600e79d 100644 --- a/crypto/rsassa-pkcs1.c +++ b/crypto/rsassa-pkcs1.c @@ -301,7 +301,7 @@ static unsigned int rsassa_pkcs1_key_size(struct crypto_sig *tfm) { struct rsassa_pkcs1_ctx *ctx = crypto_sig_ctx(tfm); - return ctx->key_size; + return ctx->key_size * BITS_PER_BYTE; } static int rsassa_pkcs1_set_pub_key(struct crypto_sig *tfm, diff --git a/crypto/sig.c b/crypto/sig.c index dfc7cae9080282..53a3dd6fbe3fe6 100644 --- a/crypto/sig.c +++ b/crypto/sig.c @@ -102,6 +102,11 @@ static int sig_default_set_key(struct crypto_sig *tfm, return -ENOSYS; } +static unsigned int sig_default_size(struct crypto_sig *tfm) +{ + return DIV_ROUND_UP_POW2(crypto_sig_keysize(tfm), BITS_PER_BYTE); +} + static int sig_prepare_alg(struct sig_alg *alg) { struct crypto_alg *base = &alg->base; @@ -117,9 +122,9 @@ static int sig_prepare_alg(struct sig_alg *alg) if (!alg->key_size) return -EINVAL; if (!alg->max_size) - alg->max_size = alg->key_size; + alg->max_size = sig_default_size; if (!alg->digest_size) - alg->digest_size = alg->key_size; + alg->digest_size = sig_default_size; base->cra_type = &crypto_sig_type; base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; diff --git a/include/crypto/sig.h b/include/crypto/sig.h index 11024708c06929..fa6dafafab3f0d 100644 --- a/include/crypto/sig.h +++ b/include/crypto/sig.h @@ -128,7 +128,7 @@ static inline void crypto_free_sig(struct crypto_sig *tfm) /** * crypto_sig_keysize() - Get key size * - * Function returns the key size in bytes. + * Function returns the key size in bits. * Function assumes that the key is already set in the transformation. If this * function is called without a setkey or with a failed setkey, you may end up * in a NULL dereference. From d2c5e001aac2065e0b2c3be0ba38390898798eae Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 12 Apr 2025 18:57:22 +0800 Subject: [PATCH 2072/2924] crypto: zynqmp-sha - Add locking [ Upstream commit c7e68043620e0d5f89a37e573c667beab72d2937 ] The hardwrae is only capable of one hash at a time, so add a lock to make sure that it isn't used concurrently. Fixes: 7ecc3e34474b ("crypto: xilinx - Add Xilinx SHA3 driver") Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/xilinx/zynqmp-sha.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/xilinx/zynqmp-sha.c b/drivers/crypto/xilinx/zynqmp-sha.c index 580649f9bff81f..0edf8eb264b55f 100644 --- a/drivers/crypto/xilinx/zynqmp-sha.c +++ b/drivers/crypto/xilinx/zynqmp-sha.c @@ -3,18 +3,19 @@ * Xilinx ZynqMP SHA Driver. * Copyright (c) 2022 Xilinx Inc. */ -#include #include #include #include -#include +#include +#include #include #include +#include #include -#include #include #include #include +#include #include #define ZYNQMP_DMA_BIT_MASK 32U @@ -43,6 +44,8 @@ struct zynqmp_sha_desc_ctx { static dma_addr_t update_dma_addr, final_dma_addr; static char *ubuf, *fbuf; +static DEFINE_SPINLOCK(zynqmp_sha_lock); + static int zynqmp_sha_init_tfm(struct crypto_shash *hash) { const char *fallback_driver_name = crypto_shash_alg_name(hash); @@ -124,7 +127,8 @@ static int zynqmp_sha_export(struct shash_desc *desc, void *out) return crypto_shash_export(&dctx->fbk_req, out); } -static int zynqmp_sha_digest(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) +static int __zynqmp_sha_digest(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) { unsigned int remaining_len = len; int update_size; @@ -159,6 +163,12 @@ static int zynqmp_sha_digest(struct shash_desc *desc, const u8 *data, unsigned i return ret; } +static int zynqmp_sha_digest(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) +{ + scoped_guard(spinlock_bh, &zynqmp_sha_lock) + return __zynqmp_sha_digest(desc, data, len, out); +} + static struct zynqmp_sha_drv_ctx sha3_drv_ctx = { .sha3_384 = { .init = zynqmp_sha_init, From a650f84d8c2c4b01576e796355c05b33a7d7d7f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 15 Apr 2025 15:38:05 +0200 Subject: [PATCH 2073/2924] kunit: qemu_configs: sparc: Explicitly enable CONFIG_SPARC32=y MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d16b3d0fb43cb0f9eb21b35c2d2c870b3f38ab1d ] The configuration generated by kunit ends up with a 32bit configuration. A new kunit configuration for 64bit is to be added. To make the difference clearer spell out the variant in the kunit reference config. Link: https://lore.kernel.org/r/20250415-kunit-qemu-sparc64-v1-1-253906f61102@linutronix.de Signed-off-by: Thomas Weißschuh Reviewed-by: David Gow Signed-off-by: Shuah Khan Stable-dep-of: 1d31d536871f ("kunit: qemu_configs: Disable faulting tests on 32-bit SPARC") Signed-off-by: Sasha Levin --- tools/testing/kunit/qemu_configs/sparc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/kunit/qemu_configs/sparc.py b/tools/testing/kunit/qemu_configs/sparc.py index 256d9573b44646..3131dd299a6e34 100644 --- a/tools/testing/kunit/qemu_configs/sparc.py +++ b/tools/testing/kunit/qemu_configs/sparc.py @@ -2,6 +2,7 @@ QEMU_ARCH = QemuArchParams(linux_arch='sparc', kconfig=''' +CONFIG_SPARC32=y CONFIG_SERIAL_SUNZILOG=y CONFIG_SERIAL_SUNZILOG_CONSOLE=y ''', From ff5b0a1302a8473b5d091e0b4f2322ebafa6b282 Mon Sep 17 00:00:00 2001 From: David Gow Date: Wed, 16 Apr 2025 17:38:25 +0800 Subject: [PATCH 2074/2924] kunit: qemu_configs: Disable faulting tests on 32-bit SPARC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1d31d536871fe8b16c8c0de58d201c78e21eb3a2 ] The 32-bit sparc configuration (--arch sparc) crashes on the kunit_fault_test. It's known that some architectures don't handle deliberate segfaults in kernel mode well, so there's a config switch to disable tests which rely upon it by default. Use this for the sparc config, making sure the default config for it passes. Link: https://lore.kernel.org/r/20250416093826.1550040-1-davidgow@google.com Fixes: 87c9c1631788 ("kunit: tool: add support for QEMU") Signed-off-by: David Gow Reviewed-by: Thomas Weißschuh Tested-by: Thomas Weißschuh Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- tools/testing/kunit/qemu_configs/sparc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/kunit/qemu_configs/sparc.py b/tools/testing/kunit/qemu_configs/sparc.py index 3131dd299a6e34..2019550a1b692e 100644 --- a/tools/testing/kunit/qemu_configs/sparc.py +++ b/tools/testing/kunit/qemu_configs/sparc.py @@ -2,6 +2,7 @@ QEMU_ARCH = QemuArchParams(linux_arch='sparc', kconfig=''' +CONFIG_KUNIT_FAULT_TEST=n CONFIG_SPARC32=y CONFIG_SERIAL_SUNZILOG=y CONFIG_SERIAL_SUNZILOG_CONSOLE=y From 323ceb7c6d356d87734ad2d3b733311e72b60163 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Fri, 18 Apr 2025 09:12:59 +0530 Subject: [PATCH 2075/2924] perf/x86/amd/uncore: Remove unused 'struct amd_uncore_ctx::node' member [ Upstream commit 4f81cc2d1bf91a49d33eb6578b58db2518deef01 ] Fixes: d6389d3ccc13 ("perf/x86/amd/uncore: Refactor uncore management") Signed-off-by: Sandipan Das Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/30f9254c2de6c4318dd0809ef85a1677f68eef10.1744906694.git.sandipan.das@amd.com Signed-off-by: Sasha Levin --- arch/x86/events/amd/uncore.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 49c26ce2b11522..010024f09f2c49 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -38,7 +38,6 @@ struct amd_uncore_ctx { int refcnt; int cpu; struct perf_event **events; - struct hlist_node node; }; struct amd_uncore_pmu { From 4ad1035fd31ccb4b8831336712e010efbd0396e7 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Fri, 18 Apr 2025 09:13:03 +0530 Subject: [PATCH 2076/2924] perf/x86/amd/uncore: Prevent UMC counters from saturating [ Upstream commit 2492e5aba2be064d0604ae23ae0770ecc0168192 ] Unlike L3 and DF counters, UMC counters (PERF_CTRs) set the Overflow bit (bit 48) and saturate on overflow. A subsequent pmu->read() of the event reports an incorrect accumulated count as there is no difference between the previous and the current values of the counter. To avoid this, inspect the current counter value and proactively reset the corresponding PERF_CTR register on every pmu->read(). Combined with the periodic reads initiated by the hrtimer, the counters never get a chance saturate but the resolution reduces to 47 bits. Fixes: 25e56847821f ("perf/x86/amd/uncore: Add memory controller support") Signed-off-by: Sandipan Das Signed-off-by: Ingo Molnar Reviewed-by: Song Liu Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/dee9c8af2c6d66814cf4c6224529c144c620cf2c.1744906694.git.sandipan.das@amd.com Signed-off-by: Sasha Levin --- arch/x86/events/amd/uncore.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 010024f09f2c49..a6fa01ef35a10e 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -889,6 +889,39 @@ static void amd_uncore_umc_start(struct perf_event *event, int flags) perf_event_update_userpage(event); } +static void amd_uncore_umc_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 prev, new, shift; + s64 delta; + + shift = COUNTER_SHIFT + 1; + prev = local64_read(&hwc->prev_count); + + /* + * UMC counters do not have RDPMC assignments. Read counts directly + * from the corresponding PERF_CTR. + */ + rdmsrl(hwc->event_base, new); + + /* + * Unlike the other uncore counters, UMC counters saturate and set the + * Overflow bit (bit 48) on overflow. Since they do not roll over, + * proactively reset the corresponding PERF_CTR when bit 47 is set so + * that the counter never gets a chance to saturate. + */ + if (new & BIT_ULL(63 - COUNTER_SHIFT)) { + wrmsrl(hwc->event_base, 0); + local64_set(&hwc->prev_count, 0); + } else { + local64_set(&hwc->prev_count, new); + } + + delta = (new << shift) - (prev << shift); + delta >>= shift; + local64_add(delta, &event->count); +} + static void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) { @@ -967,7 +1000,7 @@ int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu) .del = amd_uncore_del, .start = amd_uncore_umc_start, .stop = amd_uncore_stop, - .read = amd_uncore_read, + .read = amd_uncore_umc_read, .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, .module = THIS_MODULE, }; From 67c02d33ab1a091064861ecb6050a4012aaa073f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 6 Apr 2025 00:31:37 +0200 Subject: [PATCH 2077/2924] gfs2: replace sd_aspace with sd_inode [ Upstream commit ae9f3bd8259a0a8f67be2420e66bb05fbb95af48 ] Currently, sdp->sd_aspace and the per-inode metadata address spaces use sb->s_bdev->bd_mapping->host as their ->host; folios in those address spaces will thus appear to be on bdev rather than on gfs2 filesystems. This is a problem because gfs2 doesn't support cgroup writeback (SB_I_CGROUPWB), but bdev does. Fix that by using a "dummy" gfs2 inode as ->host in those address spaces. When coming from a folio, folio->mapping->host->i_sb will then be a gfs2 super block and the SB_I_CGROUPWB flag will not be set in sb->s_iflags. Based on a previous version from Bob Peterson from several years ago. Thanks to Tetsuo Handa, Jan Kara, and Rafael Aquini for helping figure this out. Fixes: aaa2cacf8184 ("writeback: add lockdep annotation to inode_to_wb()") Signed-off-by: Andreas Gruenbacher Signed-off-by: Sasha Levin --- fs/gfs2/glock.c | 3 +-- fs/gfs2/glops.c | 4 ++-- fs/gfs2/incore.h | 9 ++++++++- fs/gfs2/meta_io.c | 2 +- fs/gfs2/meta_io.h | 4 +--- fs/gfs2/ops_fstype.c | 31 ++++++++++++++++++------------- fs/gfs2/super.c | 2 +- 7 files changed, 32 insertions(+), 23 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index d7220a6fe8f55e..ba25b884169e50 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1166,7 +1166,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, const struct gfs2_glock_operations *glops, int create, struct gfs2_glock **glp) { - struct super_block *s = sdp->sd_vfs; struct lm_lockname name = { .ln_number = number, .ln_type = glops->go_type, .ln_sbd = sdp }; @@ -1229,7 +1228,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, mapping = gfs2_glock2aspace(gl); if (mapping) { mapping->a_ops = &gfs2_meta_aops; - mapping->host = s->s_bdev->bd_mapping->host; + mapping->host = sdp->sd_inode; mapping->flags = 0; mapping_set_gfp_mask(mapping, GFP_NOFS); mapping->i_private_data = NULL; diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index eb4714f299efb6..116efe335c3212 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -168,7 +168,7 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) static int gfs2_rgrp_metasync(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - struct address_space *metamapping = &sdp->sd_aspace; + struct address_space *metamapping = gfs2_aspace(sdp); struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl); const unsigned bsize = sdp->sd_sb.sb_bsize; loff_t start = (rgd->rd_addr * bsize) & PAGE_MASK; @@ -225,7 +225,7 @@ static int rgrp_go_sync(struct gfs2_glock *gl) static void rgrp_go_inval(struct gfs2_glock *gl, int flags) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - struct address_space *mapping = &sdp->sd_aspace; + struct address_space *mapping = gfs2_aspace(sdp); struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl); const unsigned bsize = sdp->sd_sb.sb_bsize; loff_t start, end; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 74abbd4970f80b..0a41c4e76b3267 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -795,7 +795,7 @@ struct gfs2_sbd { /* Log stuff */ - struct address_space sd_aspace; + struct inode *sd_inode; spinlock_t sd_log_lock; @@ -851,6 +851,13 @@ struct gfs2_sbd { unsigned long sd_glock_dqs_held; }; +#define GFS2_BAD_INO 1 + +static inline struct address_space *gfs2_aspace(struct gfs2_sbd *sdp) +{ + return sdp->sd_inode->i_mapping; +} + static inline void gfs2_glstats_inc(struct gfs2_glock *gl, int which) { gl->gl_stats.stats[which]++; diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 198cc705663755..9dc8885c95d072 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -132,7 +132,7 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create) unsigned int bufnum; if (mapping == NULL) - mapping = &sdp->sd_aspace; + mapping = gfs2_aspace(sdp); shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift; index = blkno >> shift; /* convert block to page */ diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index 831d988c2ceb74..b7c8a6684d0249 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h @@ -44,9 +44,7 @@ static inline struct gfs2_sbd *gfs2_mapping2sbd(struct address_space *mapping) struct gfs2_glock_aspace *gla = container_of(mapping, struct gfs2_glock_aspace, mapping); return gla->glock.gl_name.ln_sbd; - } else if (mapping->a_ops == &gfs2_rgrp_aops) - return container_of(mapping, struct gfs2_sbd, sd_aspace); - else + } else return inode->i_sb->s_fs_info; } diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index e83d293c361423..6ce475e1c6d64c 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -72,7 +72,6 @@ void free_sbd(struct gfs2_sbd *sdp) static struct gfs2_sbd *init_sbd(struct super_block *sb) { struct gfs2_sbd *sdp; - struct address_space *mapping; sdp = kzalloc(sizeof(struct gfs2_sbd), GFP_KERNEL); if (!sdp) @@ -109,16 +108,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) INIT_LIST_HEAD(&sdp->sd_sc_inodes_list); - mapping = &sdp->sd_aspace; - - address_space_init_once(mapping); - mapping->a_ops = &gfs2_rgrp_aops; - mapping->host = sb->s_bdev->bd_mapping->host; - mapping->flags = 0; - mapping_set_gfp_mask(mapping, GFP_NOFS); - mapping->i_private_data = NULL; - mapping->writeback_index = 0; - spin_lock_init(&sdp->sd_log_lock); atomic_set(&sdp->sd_log_pinned, 0); INIT_LIST_HEAD(&sdp->sd_log_revokes); @@ -1135,6 +1124,7 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) int silent = fc->sb_flags & SB_SILENT; struct gfs2_sbd *sdp; struct gfs2_holder mount_gh; + struct address_space *mapping; int error; sdp = init_sbd(sb); @@ -1156,6 +1146,7 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_flags |= SB_NOSEC; sb->s_magic = GFS2_MAGIC; sb->s_op = &gfs2_super_ops; + sb->s_d_op = &gfs2_dops; sb->s_export_op = &gfs2_export_ops; sb->s_qcop = &gfs2_quotactl_ops; @@ -1181,9 +1172,21 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) sdp->sd_tune.gt_statfs_quantum = 30; } + /* Set up an address space for metadata writes */ + sdp->sd_inode = new_inode(sb); + error = -ENOMEM; + if (!sdp->sd_inode) + goto fail_free; + sdp->sd_inode->i_ino = GFS2_BAD_INO; + sdp->sd_inode->i_size = OFFSET_MAX; + + mapping = gfs2_aspace(sdp); + mapping->a_ops = &gfs2_rgrp_aops; + mapping_set_gfp_mask(mapping, GFP_NOFS); + error = init_names(sdp, silent); if (error) - goto fail_free; + goto fail_iput; snprintf(sdp->sd_fsname, sizeof(sdp->sd_fsname), "%s", sdp->sd_table_name); @@ -1192,7 +1195,7 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZABLE, 0, sdp->sd_fsname); if (!sdp->sd_glock_wq) - goto fail_free; + goto fail_iput; sdp->sd_delete_wq = alloc_workqueue("gfs2-delete/%s", WQ_MEM_RECLAIM | WQ_FREEZABLE, 0, sdp->sd_fsname); @@ -1309,6 +1312,8 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) fail_glock_wq: if (sdp->sd_glock_wq) destroy_workqueue(sdp->sd_glock_wq); +fail_iput: + iput(sdp->sd_inode); fail_free: free_sbd(sdp); sb->s_fs_info = NULL; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 44e5658b896c88..4529b7dda8ca2e 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -648,7 +648,7 @@ static void gfs2_put_super(struct super_block *sb) gfs2_jindex_free(sdp); /* Take apart glock structures and buffer lists */ gfs2_gl_hash_clear(sdp); - truncate_inode_pages_final(&sdp->sd_aspace); + iput(sdp->sd_inode); gfs2_delete_debugfs_file(sdp); gfs2_sys_fs_del(sdp); From 9fe47aaf495723ca33cec3d59d2687c741d26ded Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 18 Apr 2025 16:40:58 +0200 Subject: [PATCH 2078/2924] gfs2: gfs2_create_inode error handling fix [ Upstream commit af4044fd0b77e915736527dd83011e46e6415f01 ] When gfs2_create_inode() finds a directory, make sure to return -EISDIR. Fixes: 571a4b57975a ("GFS2: bugger off early if O_CREAT open finds a directory") Signed-off-by: Andreas Gruenbacher Signed-off-by: Sasha Levin --- fs/gfs2/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 198a8cbaf5e5ad..9621680814b805 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -659,7 +659,8 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (!IS_ERR(inode)) { if (S_ISDIR(inode->i_mode)) { iput(inode); - inode = ERR_PTR(-EISDIR); + inode = NULL; + error = -EISDIR; goto fail_gunlock; } d_instantiate(dentry, inode); From 4ec522525263eb3eeb404a3a9ea36ed325c14d9a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 17 Apr 2025 22:41:40 +0200 Subject: [PATCH 2079/2924] gfs2: Move gfs2_dinode_dealloc [ Upstream commit bcd18105fb34e27c097f222733dba9a3e79f191c ] Move gfs2_dinode_dealloc() and its helper gfs2_final_release_pages() from super.c to inode.c. Signed-off-by: Andreas Gruenbacher Stable-dep-of: 2c63986dd35f ("gfs2: deallocate inodes in gfs2_create_inode") Signed-off-by: Sasha Levin --- fs/gfs2/inode.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/gfs2/inode.h | 1 + fs/gfs2/super.c | 68 ------------------------------------------------- 3 files changed, 69 insertions(+), 68 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 9621680814b805..b2d38d09af7e8c 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -439,6 +439,74 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks) return error; } +static void gfs2_final_release_pages(struct gfs2_inode *ip) +{ + struct inode *inode = &ip->i_inode; + struct gfs2_glock *gl = ip->i_gl; + + if (unlikely(!gl)) { + /* This can only happen during incomplete inode creation. */ + BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); + return; + } + + truncate_inode_pages(gfs2_glock2aspace(gl), 0); + truncate_inode_pages(&inode->i_data, 0); + + if (atomic_read(&gl->gl_revokes) == 0) { + clear_bit(GLF_LFLUSH, &gl->gl_flags); + clear_bit(GLF_DIRTY, &gl->gl_flags); + } +} + +int gfs2_dinode_dealloc(struct gfs2_inode *ip) +{ + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + struct gfs2_rgrpd *rgd; + struct gfs2_holder gh; + int error; + + if (gfs2_get_inode_blocks(&ip->i_inode) != 1) { + gfs2_consist_inode(ip); + return -EIO; + } + + gfs2_rindex_update(sdp); + + error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); + if (error) + return error; + + rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1); + if (!rgd) { + gfs2_consist_inode(ip); + error = -EIO; + goto out_qs; + } + + error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, + LM_FLAG_NODE_SCOPE, &gh); + if (error) + goto out_qs; + + error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, + sdp->sd_jdesc->jd_blocks); + if (error) + goto out_rg_gunlock; + + gfs2_free_di(rgd, ip); + + gfs2_final_release_pages(ip); + + gfs2_trans_end(sdp); + +out_rg_gunlock: + gfs2_glock_dq_uninit(&gh); +out_qs: + gfs2_quota_unhold(ip); + return error; +} + static void gfs2_init_dir(struct buffer_head *dibh, const struct gfs2_inode *parent) { diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 9e5e1622d50a60..eafe123617e698 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -92,6 +92,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, u64 no_formal_ino, unsigned int blktype); +int gfs2_dinode_dealloc(struct gfs2_inode *ip); struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, int is_root); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 4529b7dda8ca2e..3f49f848c6b4e0 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1173,74 +1173,6 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) return 0; } -static void gfs2_final_release_pages(struct gfs2_inode *ip) -{ - struct inode *inode = &ip->i_inode; - struct gfs2_glock *gl = ip->i_gl; - - if (unlikely(!gl)) { - /* This can only happen during incomplete inode creation. */ - BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); - return; - } - - truncate_inode_pages(gfs2_glock2aspace(gl), 0); - truncate_inode_pages(&inode->i_data, 0); - - if (atomic_read(&gl->gl_revokes) == 0) { - clear_bit(GLF_LFLUSH, &gl->gl_flags); - clear_bit(GLF_DIRTY, &gl->gl_flags); - } -} - -static int gfs2_dinode_dealloc(struct gfs2_inode *ip) -{ - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_rgrpd *rgd; - struct gfs2_holder gh; - int error; - - if (gfs2_get_inode_blocks(&ip->i_inode) != 1) { - gfs2_consist_inode(ip); - return -EIO; - } - - gfs2_rindex_update(sdp); - - error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); - if (error) - return error; - - rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1); - if (!rgd) { - gfs2_consist_inode(ip); - error = -EIO; - goto out_qs; - } - - error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, - LM_FLAG_NODE_SCOPE, &gh); - if (error) - goto out_qs; - - error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, - sdp->sd_jdesc->jd_blocks); - if (error) - goto out_rg_gunlock; - - gfs2_free_di(rgd, ip); - - gfs2_final_release_pages(ip); - - gfs2_trans_end(sdp); - -out_rg_gunlock: - gfs2_glock_dq_uninit(&gh); -out_qs: - gfs2_quota_unhold(ip); - return error; -} - /** * gfs2_glock_put_eventually * @gl: The glock to put From b94c3058f1c1d4355398ede8c12917ecf581599a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 18 Apr 2025 01:09:32 +0200 Subject: [PATCH 2080/2924] gfs2: Move GIF_ALLOC_FAILED check out of gfs2_ea_dealloc [ Upstream commit 0cc617a54dfe6b44624c9a03e2e11a24eb9bc720 ] Don't check for the GIF_ALLOC_FAILED flag in gfs2_ea_dealloc() and pass that information explicitly instead. This allows for a cleaner follow-up patch. Signed-off-by: Andreas Gruenbacher Stable-dep-of: 2c63986dd35f ("gfs2: deallocate inodes in gfs2_create_inode") Signed-off-by: Sasha Levin --- fs/gfs2/super.c | 2 +- fs/gfs2/xattr.c | 11 ++++++----- fs/gfs2/xattr.h | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 3f49f848c6b4e0..e25a24ae2197fd 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1314,7 +1314,7 @@ static int evict_unlinked_inode(struct inode *inode) } if (ip->i_eattr) { - ret = gfs2_ea_dealloc(ip); + ret = gfs2_ea_dealloc(ip, !test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); if (ret) goto out; } diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 17ae5070a90e67..df9c93de94c793 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -1383,7 +1383,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip) return error; } -static int ea_dealloc_block(struct gfs2_inode *ip) +static int ea_dealloc_block(struct gfs2_inode *ip, bool initialized) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *rgd; @@ -1416,7 +1416,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip) ip->i_eattr = 0; gfs2_add_inode_blocks(&ip->i_inode, -1); - if (likely(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) { + if (initialized) { error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { gfs2_trans_add_meta(ip->i_gl, dibh); @@ -1435,11 +1435,12 @@ static int ea_dealloc_block(struct gfs2_inode *ip) /** * gfs2_ea_dealloc - deallocate the extended attribute fork * @ip: the inode + * @initialized: xattrs have been initialized * * Returns: errno */ -int gfs2_ea_dealloc(struct gfs2_inode *ip) +int gfs2_ea_dealloc(struct gfs2_inode *ip, bool initialized) { int error; @@ -1451,7 +1452,7 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip) if (error) return error; - if (likely(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) { + if (initialized) { error = ea_foreach(ip, ea_dealloc_unstuffed, NULL); if (error) goto out_quota; @@ -1463,7 +1464,7 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip) } } - error = ea_dealloc_block(ip); + error = ea_dealloc_block(ip, initialized); out_quota: gfs2_quota_unhold(ip); diff --git a/fs/gfs2/xattr.h b/fs/gfs2/xattr.h index eb12eb7e37c194..3c9788e0e13750 100644 --- a/fs/gfs2/xattr.h +++ b/fs/gfs2/xattr.h @@ -54,7 +54,7 @@ int __gfs2_xattr_set(struct inode *inode, const char *name, const void *value, size_t size, int flags, int type); ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size); -int gfs2_ea_dealloc(struct gfs2_inode *ip); +int gfs2_ea_dealloc(struct gfs2_inode *ip, bool initialized); /* Exported to acl.c */ From 7887a8e503de6a09ea6d02993aca77037f969466 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 18 Apr 2025 16:41:52 +0200 Subject: [PATCH 2081/2924] gfs2: deallocate inodes in gfs2_create_inode [ Upstream commit 2c63986dd35fa9eb0d7d1530b5eb2244b7296e22 ] When creating and destroying inodes, we are relying on the inode hash table to make sure that for a given inode number, only a single inode will exist. We then link that inode to its inode and iopen glock and let those glocks point back at the inode. However, when iget_failed() is called, the inode is removed from the inode hash table before gfs_evict_inode() is called, and uniqueness is no longer guaranteed. Commit f1046a472b70 ("gfs2: gl_object races fix") was trying to work around that problem by detaching the inode glock from the inode before calling iget_failed(), but that broke the inode deallocation code in gfs_evict_inode(). To fix that, deallocate partially created inodes in gfs2_create_inode() instead of relying on gfs_evict_inode() for doing that. This means that gfs2_evict_inode() and its helper functions will no longer see partially created inodes, and so some simplifications are possible there. Fixes: 9ffa18884cce ("gfs2: gl_object races fix") Signed-off-by: Andreas Gruenbacher Signed-off-by: Sasha Levin --- fs/gfs2/inode.c | 27 +++++++++++++++++++-------- fs/gfs2/super.c | 6 +----- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index b2d38d09af7e8c..8fd81444ffea00 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -697,10 +697,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, struct gfs2_inode *dip = GFS2_I(dir), *ip; struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct gfs2_glock *io_gl; - int error; + int error, dealloc_error; u32 aflags = 0; unsigned blocks = 1; struct gfs2_diradd da = { .bh = NULL, .save_loc = 1, }; + bool xattr_initialized = false; if (!name->len || name->len > GFS2_FNAMESIZE) return -ENAMETOOLONG; @@ -813,11 +814,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); if (error) - goto fail_free_inode; + goto fail_dealloc_inode; error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl); if (error) - goto fail_free_inode; + goto fail_dealloc_inode; gfs2_cancel_delete_work(io_gl); io_gl->gl_no_formal_ino = ip->i_no_formal_ino; @@ -841,8 +842,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail_gunlock3; - if (blocks > 1) + if (blocks > 1) { gfs2_init_xattr(ip); + xattr_initialized = true; + } init_dinode(dip, ip, symname); gfs2_trans_end(sdp); @@ -897,6 +900,18 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, gfs2_glock_dq_uninit(&ip->i_iopen_gh); fail_gunlock2: gfs2_glock_put(io_gl); +fail_dealloc_inode: + set_bit(GIF_ALLOC_FAILED, &ip->i_flags); + dealloc_error = 0; + if (ip->i_eattr) + dealloc_error = gfs2_ea_dealloc(ip, xattr_initialized); + clear_nlink(inode); + mark_inode_dirty(inode); + if (!dealloc_error) + dealloc_error = gfs2_dinode_dealloc(ip); + if (dealloc_error) + fs_warn(sdp, "%s: %d\n", __func__, dealloc_error); + ip->i_no_addr = 0; fail_free_inode: if (ip->i_gl) { gfs2_glock_put(ip->i_gl); @@ -911,10 +926,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, gfs2_dir_no_add(&da); gfs2_glock_dq_uninit(&d_gh); if (!IS_ERR_OR_NULL(inode)) { - set_bit(GIF_ALLOC_FAILED, &ip->i_flags); - clear_nlink(inode); - if (ip->i_no_addr) - mark_inode_dirty(inode); if (inode->i_state & I_NEW) iget_failed(inode); else diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index e25a24ae2197fd..54f1efd47a3e5f 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1258,9 +1258,6 @@ static enum evict_behavior evict_should_delete(struct inode *inode, struct gfs2_sbd *sdp = sb->s_fs_info; int ret; - if (unlikely(test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) - goto should_delete; - if (gfs2_holder_initialized(&ip->i_iopen_gh) && test_bit(GLF_DEFER_DELETE, &ip->i_iopen_gh.gh_gl->gl_flags)) return EVICT_SHOULD_DEFER_DELETE; @@ -1290,7 +1287,6 @@ static enum evict_behavior evict_should_delete(struct inode *inode, if (inode->i_nlink) return EVICT_SHOULD_SKIP_DELETE; -should_delete: if (gfs2_holder_initialized(&ip->i_iopen_gh) && test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) return gfs2_upgrade_iopen_glock(inode); @@ -1314,7 +1310,7 @@ static int evict_unlinked_inode(struct inode *inode) } if (ip->i_eattr) { - ret = gfs2_ea_dealloc(ip, !test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); + ret = gfs2_ea_dealloc(ip, true); if (ret) goto out; } From 2c350809b8090723b4c9d62906f0d62509ee6571 Mon Sep 17 00:00:00 2001 From: Qing Wang Date: Sat, 5 Apr 2025 22:16:35 +0800 Subject: [PATCH 2082/2924] perf/core: Fix broken throttling when max_samples_per_tick=1 [ Upstream commit f51972e6f8b9a737b2b3eb588069acb538fa72de ] According to the throttling mechanism, the pmu interrupts number can not exceed the max_samples_per_tick in one tick. But this mechanism is ineffective when max_samples_per_tick=1, because the throttling check is skipped during the first interrupt and only performed when the second interrupt arrives. Perhaps this bug may cause little influence in one tick, but if in a larger time scale, the problem can not be underestimated. When max_samples_per_tick = 1: Allowed-interrupts-per-second max-samples-per-second default-HZ ARCH 200 100 100 X86 500 250 250 ARM64 ... Obviously, the pmu interrupt number far exceed the user's expect. Fixes: e050e3f0a71b ("perf: Fix broken interrupt rate throttling") Signed-off-by: Qing Wang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20250405141635.243786-3-wangqing7171@gmail.com Signed-off-by: Sasha Levin --- kernel/events/core.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 95e703891b24f8..881d768e455642 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10029,14 +10029,14 @@ __perf_event_account_interrupt(struct perf_event *event, int throttle) hwc->interrupts = 1; } else { hwc->interrupts++; - if (unlikely(throttle && - hwc->interrupts > max_samples_per_tick)) { - __this_cpu_inc(perf_throttled_count); - tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS); - hwc->interrupts = MAX_INTERRUPTS; - perf_log_throttle(event, 0); - ret = 1; - } + } + + if (unlikely(throttle && hwc->interrupts >= max_samples_per_tick)) { + __this_cpu_inc(perf_throttled_count); + tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS); + hwc->interrupts = MAX_INTERRUPTS; + perf_log_throttle(event, 0); + ret = 1; } if (event->attr.freq) { From 4051250e5db489f8ad65fc337e2677b9b568ac72 Mon Sep 17 00:00:00 2001 From: Ovidiu Panait Date: Fri, 25 Apr 2025 15:45:14 +0300 Subject: [PATCH 2083/2924] crypto: sun8i-ce-cipher - fix error handling in sun8i_ce_cipher_prepare() [ Upstream commit f31adc3e356f7350d4a4d68c98d3f60f2f6e26b3 ] Fix two DMA cleanup issues on the error path in sun8i_ce_cipher_prepare(): 1] If dma_map_sg() fails for areq->dst, the device driver would try to free DMA memory it has not allocated in the first place. To fix this, on the "theend_sgs" error path, call dma unmap only if the corresponding dma map was successful. 2] If the dma_map_single() call for the IV fails, the device driver would try to free an invalid DMA memory address on the "theend_iv" path: ------------[ cut here ]------------ DMA-API: sun8i-ce 1904000.crypto: device driver tries to free an invalid DMA memory address WARNING: CPU: 2 PID: 69 at kernel/dma/debug.c:968 check_unmap+0x123c/0x1b90 Modules linked in: skcipher_example(O+) CPU: 2 UID: 0 PID: 69 Comm: 1904000.crypto- Tainted: G O 6.15.0-rc3+ #24 PREEMPT Tainted: [O]=OOT_MODULE Hardware name: OrangePi Zero2 (DT) pc : check_unmap+0x123c/0x1b90 lr : check_unmap+0x123c/0x1b90 ... Call trace: check_unmap+0x123c/0x1b90 (P) debug_dma_unmap_page+0xac/0xc0 dma_unmap_page_attrs+0x1f4/0x5fc sun8i_ce_cipher_do_one+0x1bd4/0x1f40 crypto_pump_work+0x334/0x6e0 kthread_worker_fn+0x21c/0x438 kthread+0x374/0x664 ret_from_fork+0x10/0x20 ---[ end trace 0000000000000000 ]--- To fix this, check for !dma_mapping_error() before calling dma_unmap_single() on the "theend_iv" path. Fixes: 06f751b61329 ("crypto: allwinner - Add sun8i-ce Crypto Engine") Signed-off-by: Ovidiu Panait Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c index 19b7fb4a93e86c..05f67661553c9a 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c @@ -275,13 +275,16 @@ static int sun8i_ce_cipher_prepare(struct crypto_engine *engine, void *async_req } else { if (nr_sgs > 0) dma_unmap_sg(ce->dev, areq->src, ns, DMA_TO_DEVICE); - dma_unmap_sg(ce->dev, areq->dst, nd, DMA_FROM_DEVICE); + + if (nr_sgd > 0) + dma_unmap_sg(ce->dev, areq->dst, nd, DMA_FROM_DEVICE); } theend_iv: if (areq->iv && ivsize > 0) { - if (rctx->addr_iv) + if (!dma_mapping_error(ce->dev, rctx->addr_iv)) dma_unmap_single(ce->dev, rctx->addr_iv, rctx->ivlen, DMA_TO_DEVICE); + offset = areq->cryptlen - ivsize; if (rctx->op_dir & CE_DECRYPTION) { memcpy(areq->iv, chan->backup_iv, ivsize); From f8af2bb62ef4e8217971e48683e5baf382cde212 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Sun, 27 Apr 2025 13:12:36 +0200 Subject: [PATCH 2084/2924] crypto: sun8i-ss - do not use sg_dma_len before calling DMA functions [ Upstream commit 2dfc7cd74a5e062a5405560447517e7aab1c7341 ] When testing sun8i-ss with multi_v7_defconfig, all CBC algorithm fail crypto selftests. This is strange since on sunxi_defconfig, everything was ok. The problem was in the IV setup loop which never run because sg_dma_len was 0. Fixes: 359e893e8af4 ("crypto: sun8i-ss - rework handling of IV") Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c index 9b9605ce8ee629..8831bcb230c2d4 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c @@ -141,7 +141,7 @@ static int sun8i_ss_setup_ivs(struct skcipher_request *areq) /* we need to copy all IVs from source in case DMA is bi-directionnal */ while (sg && len) { - if (sg_dma_len(sg) == 0) { + if (sg->length == 0) { sg = sg_next(sg); continue; } From 409bcfdc5551e0454bfaacc141c39f51abc5d85d Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 17 Apr 2025 12:53:05 +0200 Subject: [PATCH 2085/2924] powerpc: do not build ppc_save_regs.o always [ Upstream commit 497b7794aef03d525a5be05ae78dd7137c6861a5 ] The Fixes commit below tried to add CONFIG_PPC_BOOK3S to one of the conditions to enable the build of ppc_save_regs.o. But it failed to do so, in fact. The commit omitted to add a dollar sign. Therefore, ppc_save_regs.o is built always these days (as "(CONFIG_PPC_BOOK3S)" is never an empty string). Fix this by adding the missing dollar sign. Signed-off-by: Jiri Slaby (SUSE) Fixes: fc2a5a6161a2 ("powerpc/64s: ppc_save_regs is now needed for all 64s builds") Acked-by: Stephen Rothwell Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250417105305.397128-1-jirislaby@kernel.org Signed-off-by: Sasha Levin --- arch/powerpc/kernel/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 6ac621155ec3c8..0c26b2412d1730 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -160,7 +160,7 @@ endif obj64-$(CONFIG_PPC_TRANSACTIONAL_MEM) += tm.o -ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC_CORE)(CONFIG_PPC_BOOK3S),) +ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC_CORE)$(CONFIG_PPC_BOOK3S),) obj-y += ppc_save_regs.o endif From a0731b83e16111dcfa589f7d54aa042bb6fb8332 Mon Sep 17 00:00:00 2001 From: Eddie James Date: Tue, 11 Feb 2025 10:20:54 -0600 Subject: [PATCH 2086/2924] powerpc/crash: Fix non-smp kexec preparation [ Upstream commit 882b25af265de8e05c66f72b9a29f6047102958f ] In non-smp configurations, crash_kexec_prepare is never called in the crash shutdown path. One result of this is that the crashing_cpu variable is never set, preventing crash_save_cpu from storing the NT_PRSTATUS elf note in the core dump. Fixes: c7255058b543 ("powerpc/crash: save cpu register data in crash_smp_send_stop()") Signed-off-by: Eddie James Reviewed-by: Hari Bathini Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250211162054.857762-1-eajames@linux.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/kexec/crash.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c index 9ac3266e496522..a325c1c02f96dc 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c @@ -359,7 +359,10 @@ void default_machine_crash_shutdown(struct pt_regs *regs) if (TRAP(regs) == INTERRUPT_SYSTEM_RESET) is_via_system_reset = 1; - crash_smp_send_stop(); + if (IS_ENABLED(CONFIG_SMP)) + crash_smp_send_stop(); + else + crash_kexec_prepare(); crash_save_cpu(regs, crashing_cpu); From d947affa9213ea28081048ce761700fc3957490e Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 29 Apr 2025 08:07:26 -0700 Subject: [PATCH 2087/2924] sched/core: Tweak wait_task_inactive() to force dequeue sched_delayed tasks [ Upstream commit b7ca5743a2604156d6083b88cefacef983f3a3a6 ] It was reported that in 6.12, smpboot_create_threads() was taking much longer then in 6.6. I narrowed down the call path to: smpboot_create_threads() -> kthread_create_on_cpu() -> kthread_bind() -> __kthread_bind_mask() ->wait_task_inactive() Where in wait_task_inactive() we were regularly hitting the queued case, which sets a 1 tick timeout, which when called multiple times in a row, accumulates quickly into a long delay. I noticed disabling the DELAY_DEQUEUE sched feature recovered the performance, and it seems the newly create tasks are usually sched_delayed and left on the runqueue. So in wait_task_inactive() when we see the task p->se.sched_delayed, manually dequeue the sched_delayed task with DEQUEUE_DELAYED, so we don't have to constantly wait a tick. Fixes: 152e11f6df29 ("sched/fair: Implement delayed dequeue") Reported-by: peter-yc.chang@mediatek.com Signed-off-by: John Stultz Signed-off-by: Peter Zijlstra (Intel) Tested-by: K Prateek Nayak Link: https://lkml.kernel.org/r/20250429150736.3778580-1-jstultz@google.com Signed-off-by: Sasha Levin --- kernel/sched/core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 36b34e68845873..d593d6612ba07e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2283,6 +2283,12 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state * just go back and repeat. */ rq = task_rq_lock(p, &rf); + /* + * If task is sched_delayed, force dequeue it, to avoid always + * hitting the tick timeout in the queued case + */ + if (p->se.sched_delayed) + dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED); trace_sched_wait_task(p); running = task_on_cpu(rq, p); queued = task_on_rq_queued(p); From f6e90139f79ff024c98bb0fa2b53c2686098ed9f Mon Sep 17 00:00:00 2001 From: Annie Li Date: Wed, 30 Apr 2025 05:34:24 +0000 Subject: [PATCH 2088/2924] x86/microcode/AMD: Do not return error when microcode update is not necessary [ Upstream commit b43dc4ab097859c24e2a6993119c927cffc856aa ] After 6f059e634dcd("x86/microcode: Clarify the late load logic"), if the load is up-to-date, the AMD side returns UCODE_OK which leads to load_late_locked() returning -EBADFD. Handle UCODE_OK in the switch case to avoid this error. [ bp: Massage commit message. ] Fixes: 6f059e634dcd ("x86/microcode: Clarify the late load logic") Signed-off-by: Annie Li Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/20250430053424.77438-1-jiayanli@google.com Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/microcode/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 079f046ee26d19..e8021d3e58824a 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -696,6 +696,8 @@ static int load_late_locked(void) return load_late_stop_cpus(true); case UCODE_NFOUND: return -ENOENT; + case UCODE_OK: + return 0; default: return -EBADFD; } From d9b9da94022620c531094ef2bff78449cc72610e Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Fri, 11 Apr 2025 17:09:41 +0200 Subject: [PATCH 2089/2924] selftests: coredump: Properly initialize pointer [ Upstream commit e194d2067c958827810a7a7282dff8773633ad8c ] The buffer pointer "line" is not initialized. This pointer is passed to getline(). It can still work if the stack is zero-initialized, because getline() can work with a NULL pointer as buffer. But this is obviously broken. This bug shows up while running the test on a riscv64 machine. Fix it by properly initializing the pointer. Fixes: 15858da53542 ("selftests: coredump: Add stackdump test") Signed-off-by: Nam Cao Link: https://lore.kernel.org/4fb9b6fb3e0040481bacc258c44b4aab5c4df35d.1744383419.git.namcao@linutronix.de Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- tools/testing/selftests/coredump/stackdump_test.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c index 137b2364a08207..c23cf95c3f6df0 100644 --- a/tools/testing/selftests/coredump/stackdump_test.c +++ b/tools/testing/selftests/coredump/stackdump_test.c @@ -138,10 +138,12 @@ TEST_F(coredump, stackdump) ASSERT_NE(file, NULL); /* Step 4: Make sure all stack pointer values are non-zero */ + line = NULL; for (i = 0; -1 != getline(&line, &line_length, file); ++i) { stack = strtoull(line, NULL, 10); ASSERT_NE(stack, 0); } + free(line); ASSERT_EQ(i, 1 + NUM_THREAD_SPAWN); From a44b5e75f727c418add2a4589a92f1242229122c Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Fri, 11 Apr 2025 17:09:42 +0200 Subject: [PATCH 2090/2924] selftests: coredump: Fix test failure for slow machines [ Upstream commit 6f5bf9f37f06668ead446e2997f2b431db8963ce ] The test waits for coredump to finish by busy-waiting for the stack_values file to be created. The maximum wait time is 10 seconds. This doesn't work for slow machine (qemu-system-riscv64), because coredump takes longer. Fix it by waiting for the crashing child process to finish first. Fixes: 15858da53542 ("selftests: coredump: Add stackdump test") Signed-off-by: Nam Cao Link: https://lore.kernel.org/ee657f3fc8e19657cf7aaa366552d6347728f371.1744383419.git.namcao@linutronix.de Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- tools/testing/selftests/coredump/stackdump_test.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c index c23cf95c3f6df0..9da10fb5e597c3 100644 --- a/tools/testing/selftests/coredump/stackdump_test.c +++ b/tools/testing/selftests/coredump/stackdump_test.c @@ -96,7 +96,7 @@ TEST_F(coredump, stackdump) char *test_dir, *line; size_t line_length; char buf[PATH_MAX]; - int ret, i; + int ret, i, status; FILE *file; pid_t pid; @@ -129,6 +129,10 @@ TEST_F(coredump, stackdump) /* * Step 3: Wait for the stackdump script to write the stack pointers to the stackdump file */ + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_TRUE(WCOREDUMP(status)); + for (i = 0; i < 10; ++i) { file = fopen(STACKDUMP_FILE, "r"); if (file) From 7dba640f278e02810b72d9edae1b4c4af889aff6 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Fri, 11 Apr 2025 17:09:43 +0200 Subject: [PATCH 2091/2924] selftests: coredump: Raise timeout to 2 minutes [ Upstream commit c6e888d02d51d1e9f9abf1587e6a5618375cac9f ] The test's runtime (nearly 20s) is dangerously close to the limit (30s) on qemu-system-riscv64: $ time ./stackdump_test > /dev/null real 0m19.210s user 0m0.077s sys 0m0.359s There could be machines slower than qemu-system-riscv64. Therefore raise the test timeout to 2 minutes to be safe. Fixes: 15858da53542 ("selftests: coredump: Add stackdump test") Signed-off-by: Nam Cao Link: https://lore.kernel.org/dd636084d55e7828782728d087fa2298dcab1c8b.1744383419.git.namcao@linutronix.de Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- tools/testing/selftests/coredump/stackdump_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c index 9da10fb5e597c3..fe3c728cd6be5a 100644 --- a/tools/testing/selftests/coredump/stackdump_test.c +++ b/tools/testing/selftests/coredump/stackdump_test.c @@ -89,7 +89,7 @@ FIXTURE_TEARDOWN(coredump) fprintf(stderr, "Failed to cleanup stackdump test: %s\n", reason); } -TEST_F(coredump, stackdump) +TEST_F_TIMEOUT(coredump, stackdump, 120) { struct sigaction action = {}; unsigned long long stack; From 5974f45fa24cf4743748b3994dfa122db795d18e Mon Sep 17 00:00:00 2001 From: Ovidiu Panait Date: Thu, 1 May 2025 22:06:50 +0300 Subject: [PATCH 2092/2924] crypto: sun8i-ce - undo runtime PM changes during driver removal [ Upstream commit 9334f427576e6d361a409959b52246b0aa10476f ] The pm_runtime_use_autosuspend() call must be undone with pm_runtime_dont_use_autosuspend() at driver exit, but this is not currently handled in the driver. To fix this issue and at the same time simplify error handling, switch to devm_pm_runtime_enable(). It will call both pm_runtime_disable() and pm_runtime_dont_use_autosuspend() during driver removal. Fixes: 06f751b61329 ("crypto: allwinner - Add sun8i-ce Crypto Engine") Signed-off-by: Ovidiu Panait Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- .../crypto/allwinner/sun8i-ce/sun8i-ce-core.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c index ec1ffda9ea32e0..658f520cee0caa 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c @@ -832,13 +832,12 @@ static int sun8i_ce_pm_init(struct sun8i_ce_dev *ce) err = pm_runtime_set_suspended(ce->dev); if (err) return err; - pm_runtime_enable(ce->dev); - return err; -} -static void sun8i_ce_pm_exit(struct sun8i_ce_dev *ce) -{ - pm_runtime_disable(ce->dev); + err = devm_pm_runtime_enable(ce->dev); + if (err) + return err; + + return 0; } static int sun8i_ce_get_clks(struct sun8i_ce_dev *ce) @@ -1041,7 +1040,7 @@ static int sun8i_ce_probe(struct platform_device *pdev) "sun8i-ce-ns", ce); if (err) { dev_err(ce->dev, "Cannot request CryptoEngine Non-secure IRQ (err=%d)\n", err); - goto error_irq; + goto error_pm; } err = sun8i_ce_register_algs(ce); @@ -1082,8 +1081,6 @@ static int sun8i_ce_probe(struct platform_device *pdev) return 0; error_alg: sun8i_ce_unregister_algs(ce); -error_irq: - sun8i_ce_pm_exit(ce); error_pm: sun8i_ce_free_chanlist(ce, MAXFLOW - 1); return err; @@ -1104,8 +1101,6 @@ static void sun8i_ce_remove(struct platform_device *pdev) #endif sun8i_ce_free_chanlist(ce, MAXFLOW - 1); - - sun8i_ce_pm_exit(ce); } static const struct of_device_id sun8i_ce_crypto_of_match_table[] = { From 31a559510b9d90be14d6c5332f377747cccaa4be Mon Sep 17 00:00:00 2001 From: Zizhi Wo Date: Thu, 17 Apr 2025 21:20:52 +0800 Subject: [PATCH 2093/2924] blk-throttle: Fix wrong tg->[bytes/io]_disp update in __tg_update_carryover() [ Upstream commit f66cf69eb8765341bbeff0e92a7d0d2027f62452 ] In commit 6cc477c36875 ("blk-throttle: carry over directly"), the carryover bytes/ios was be carried to [bytes/io]_disp. However, its update mechanism has some issues. In __tg_update_carryover(), we calculate "bytes" and "ios" to represent the carryover, but the computation when updating [bytes/io]_disp is incorrect. And if the sq->nr_queued is empty, we may not update tg->[bytes/io]_disp to 0 in tg_update_carryover(). We should set it to 0 in non carryover case. This patch fixes the issue. Fixes: 6cc477c36875 ("blk-throttle: carry over directly") Signed-off-by: Zizhi Wo Reviewed-by: Yu Kuai Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20250417132054.2866409-2-wozizhi@huaweicloud.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-throttle.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index d6dd2e04787491..7437de947120ed 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -644,6 +644,18 @@ static void __tg_update_carryover(struct throtl_grp *tg, bool rw, u64 bps_limit = tg_bps_limit(tg, rw); u32 iops_limit = tg_iops_limit(tg, rw); + /* + * If the queue is empty, carryover handling is not needed. In such cases, + * tg->[bytes/io]_disp should be reset to 0 to avoid impacting the dispatch + * of subsequent bios. The same handling applies when the previous BPS/IOPS + * limit was set to max. + */ + if (tg->service_queue.nr_queued[rw] == 0) { + tg->bytes_disp[rw] = 0; + tg->io_disp[rw] = 0; + return; + } + /* * If config is updated while bios are still throttled, calculate and * accumulate how many bytes/ios are waited across changes. And @@ -656,8 +668,8 @@ static void __tg_update_carryover(struct throtl_grp *tg, bool rw, if (iops_limit != UINT_MAX) *ios = calculate_io_allowed(iops_limit, jiffy_elapsed) - tg->io_disp[rw]; - tg->bytes_disp[rw] -= *bytes; - tg->io_disp[rw] -= *ios; + tg->bytes_disp[rw] = -*bytes; + tg->io_disp[rw] = -*ios; } static void tg_update_carryover(struct throtl_grp *tg) @@ -665,10 +677,8 @@ static void tg_update_carryover(struct throtl_grp *tg) long long bytes[2] = {0}; int ios[2] = {0}; - if (tg->service_queue.nr_queued[READ]) - __tg_update_carryover(tg, READ, &bytes[READ], &ios[READ]); - if (tg->service_queue.nr_queued[WRITE]) - __tg_update_carryover(tg, WRITE, &bytes[WRITE], &ios[WRITE]); + __tg_update_carryover(tg, READ, &bytes[READ], &ios[READ]); + __tg_update_carryover(tg, WRITE, &bytes[WRITE], &ios[WRITE]); /* see comments in struct throtl_grp for meaning of these fields. */ throtl_log(&tg->service_queue, "%s: %lld %lld %d %d\n", __func__, From 1cdad40ab51572f8e3830dc0f000fc9ae29c9abb Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Tue, 6 May 2025 07:04:13 +0200 Subject: [PATCH 2094/2924] x86/cpu: Sanitize CPUID(0x80000000) output [ Upstream commit cc663ba3fe383a628a812f893cc98aafff39ab04 ] CPUID(0x80000000).EAX returns the max extended CPUID leaf available. On x86-32 machines without an extended CPUID range, a CPUID(0x80000000) query will just repeat the output of the last valid standard CPUID leaf on the CPU; i.e., a garbage values. Current tip:x86/cpu code protects against this by doing: eax = cpuid_eax(0x80000000); c->extended_cpuid_level = eax; if ((eax & 0xffff0000) == 0x80000000) { // CPU has an extended CPUID range. Check for 0x80000001 if (eax >= 0x80000001) { cpuid(0x80000001, ...); } } This is correct so far. Afterwards though, the same possibly broken EAX value is used to check the availability of other extended CPUID leaves: if (c->extended_cpuid_level >= 0x80000007) ... if (c->extended_cpuid_level >= 0x80000008) ... if (c->extended_cpuid_level >= 0x8000000a) ... if (c->extended_cpuid_level >= 0x8000001f) ... which is invalid. Fix this by immediately setting the CPU's max extended CPUID leaf to zero if CPUID(0x80000000).EAX doesn't indicate a valid CPUID extended range. While at it, add a comment, similar to kernel/head_32.S, clarifying the CPUID(0x80000000) sanity check. References: 8a50e5135af0 ("x86-32: Use symbolic constants, safer CPUID when enabling EFER.NX") Fixes: 3da99c977637 ("x86: make (early)_identify_cpu more the same between 32bit and 64 bit") Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: Andrew Cooper Cc: H. Peter Anvin Cc: John Ogness Cc: x86-cpuid@lists.linux.dev Link: https://lore.kernel.org/r/20250506050437.10264-3-darwi@linutronix.de Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/common.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0ff057ff11ce93..5de4a879232a6c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1005,17 +1005,18 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_D_1_EAX] = eax; } - /* AMD-defined flags: level 0x80000001 */ + /* + * Check if extended CPUID leaves are implemented: Max extended + * CPUID leaf must be in the 0x80000001-0x8000ffff range. + */ eax = cpuid_eax(0x80000000); - c->extended_cpuid_level = eax; + c->extended_cpuid_level = ((eax & 0xffff0000) == 0x80000000) ? eax : 0; - if ((eax & 0xffff0000) == 0x80000000) { - if (eax >= 0x80000001) { - cpuid(0x80000001, &eax, &ebx, &ecx, &edx); + if (c->extended_cpuid_level >= 0x80000001) { + cpuid(0x80000001, &eax, &ebx, &ecx, &edx); - c->x86_capability[CPUID_8000_0001_ECX] = ecx; - c->x86_capability[CPUID_8000_0001_EDX] = edx; - } + c->x86_capability[CPUID_8000_0001_ECX] = ecx; + c->x86_capability[CPUID_8000_0001_EDX] = edx; } if (c->extended_cpuid_level >= 0x80000007) { From 5387b3e0be28fe43800399b073d204838e49f7c5 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Mon, 28 Apr 2025 10:48:10 +0900 Subject: [PATCH 2095/2924] x86/insn: Fix opcode map (!REX2) superscript tags [ Upstream commit ca698ec2f07873a448d53c580795c4e023c75393 ] Commit: 159039af8c07 ("x86/insn: x86/insn: Add support for REX2 prefix to the instruction decoder opcode map") added (!REX2) superscript with a space, but the correct format requires ',' for concatination with other superscript tags. Add ',' to generate correct insn attribute tables. I confirmed with following command: arch/x86/lib/x86-opcode-map.txt | grep e8 | head -n 1 [0xe8] = INAT_MAKE_IMM(INAT_IMM_VWORD32) | INAT_FORCE64 | INAT_NO_REX2, Fixes: 159039af8c07 ("x86/insn: x86/insn: Add support for REX2 prefix to the instruction decoder opcode map") Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Adrian Hunter Cc: Peter Zijlstra Link: https://lore.kernel.org/r/174580489027.388420.15539375184727726142.stgit@devnote2 Signed-off-by: Sasha Levin --- arch/x86/lib/x86-opcode-map.txt | 50 +++++++++++++-------------- tools/arch/x86/lib/x86-opcode-map.txt | 50 +++++++++++++-------------- 2 files changed, 50 insertions(+), 50 deletions(-) diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index f5dd84eb55dcda..cd3fd5155f6ece 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -35,7 +35,7 @@ # - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) # - (66&F2): Both 0x66 and 0xF2 prefixes are specified. # -# REX2 Prefix +# REX2 Prefix Superscripts # - (!REX2): REX2 is not allowed # - (REX2): REX2 variant e.g. JMPABS @@ -286,10 +286,10 @@ df: ESC # Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix # in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation # to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD. -e0: LOOPNE/LOOPNZ Jb (f64) (!REX2) -e1: LOOPE/LOOPZ Jb (f64) (!REX2) -e2: LOOP Jb (f64) (!REX2) -e3: JrCXZ Jb (f64) (!REX2) +e0: LOOPNE/LOOPNZ Jb (f64),(!REX2) +e1: LOOPE/LOOPZ Jb (f64),(!REX2) +e2: LOOP Jb (f64),(!REX2) +e3: JrCXZ Jb (f64),(!REX2) e4: IN AL,Ib (!REX2) e5: IN eAX,Ib (!REX2) e6: OUT Ib,AL (!REX2) @@ -298,10 +298,10 @@ e7: OUT Ib,eAX (!REX2) # in "near" jumps and calls is 16-bit. For CALL, # push of return address is 16-bit wide, RSP is decremented by 2 # but is not truncated to 16 bits, unlike RIP. -e8: CALL Jz (f64) (!REX2) -e9: JMP-near Jz (f64) (!REX2) -ea: JMP-far Ap (i64) (!REX2) -eb: JMP-short Jb (f64) (!REX2) +e8: CALL Jz (f64),(!REX2) +e9: JMP-near Jz (f64),(!REX2) +ea: JMP-far Ap (i64),(!REX2) +eb: JMP-short Jb (f64),(!REX2) ec: IN AL,DX (!REX2) ed: IN eAX,DX (!REX2) ee: OUT DX,AL (!REX2) @@ -478,22 +478,22 @@ AVXcode: 1 7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev) # 0x0f 0x80-0x8f # Note: "forced64" is Intel CPU behavior (see comment about CALL insn). -80: JO Jz (f64) (!REX2) -81: JNO Jz (f64) (!REX2) -82: JB/JC/JNAE Jz (f64) (!REX2) -83: JAE/JNB/JNC Jz (f64) (!REX2) -84: JE/JZ Jz (f64) (!REX2) -85: JNE/JNZ Jz (f64) (!REX2) -86: JBE/JNA Jz (f64) (!REX2) -87: JA/JNBE Jz (f64) (!REX2) -88: JS Jz (f64) (!REX2) -89: JNS Jz (f64) (!REX2) -8a: JP/JPE Jz (f64) (!REX2) -8b: JNP/JPO Jz (f64) (!REX2) -8c: JL/JNGE Jz (f64) (!REX2) -8d: JNL/JGE Jz (f64) (!REX2) -8e: JLE/JNG Jz (f64) (!REX2) -8f: JNLE/JG Jz (f64) (!REX2) +80: JO Jz (f64),(!REX2) +81: JNO Jz (f64),(!REX2) +82: JB/JC/JNAE Jz (f64),(!REX2) +83: JAE/JNB/JNC Jz (f64),(!REX2) +84: JE/JZ Jz (f64),(!REX2) +85: JNE/JNZ Jz (f64),(!REX2) +86: JBE/JNA Jz (f64),(!REX2) +87: JA/JNBE Jz (f64),(!REX2) +88: JS Jz (f64),(!REX2) +89: JNS Jz (f64),(!REX2) +8a: JP/JPE Jz (f64),(!REX2) +8b: JNP/JPO Jz (f64),(!REX2) +8c: JL/JNGE Jz (f64),(!REX2) +8d: JNL/JGE Jz (f64),(!REX2) +8e: JLE/JNG Jz (f64),(!REX2) +8f: JNLE/JG Jz (f64),(!REX2) # 0x0f 0x90-0x9f 90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66) 91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66) diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt index f5dd84eb55dcda..cd3fd5155f6ece 100644 --- a/tools/arch/x86/lib/x86-opcode-map.txt +++ b/tools/arch/x86/lib/x86-opcode-map.txt @@ -35,7 +35,7 @@ # - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) # - (66&F2): Both 0x66 and 0xF2 prefixes are specified. # -# REX2 Prefix +# REX2 Prefix Superscripts # - (!REX2): REX2 is not allowed # - (REX2): REX2 variant e.g. JMPABS @@ -286,10 +286,10 @@ df: ESC # Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix # in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation # to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD. -e0: LOOPNE/LOOPNZ Jb (f64) (!REX2) -e1: LOOPE/LOOPZ Jb (f64) (!REX2) -e2: LOOP Jb (f64) (!REX2) -e3: JrCXZ Jb (f64) (!REX2) +e0: LOOPNE/LOOPNZ Jb (f64),(!REX2) +e1: LOOPE/LOOPZ Jb (f64),(!REX2) +e2: LOOP Jb (f64),(!REX2) +e3: JrCXZ Jb (f64),(!REX2) e4: IN AL,Ib (!REX2) e5: IN eAX,Ib (!REX2) e6: OUT Ib,AL (!REX2) @@ -298,10 +298,10 @@ e7: OUT Ib,eAX (!REX2) # in "near" jumps and calls is 16-bit. For CALL, # push of return address is 16-bit wide, RSP is decremented by 2 # but is not truncated to 16 bits, unlike RIP. -e8: CALL Jz (f64) (!REX2) -e9: JMP-near Jz (f64) (!REX2) -ea: JMP-far Ap (i64) (!REX2) -eb: JMP-short Jb (f64) (!REX2) +e8: CALL Jz (f64),(!REX2) +e9: JMP-near Jz (f64),(!REX2) +ea: JMP-far Ap (i64),(!REX2) +eb: JMP-short Jb (f64),(!REX2) ec: IN AL,DX (!REX2) ed: IN eAX,DX (!REX2) ee: OUT DX,AL (!REX2) @@ -478,22 +478,22 @@ AVXcode: 1 7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev) # 0x0f 0x80-0x8f # Note: "forced64" is Intel CPU behavior (see comment about CALL insn). -80: JO Jz (f64) (!REX2) -81: JNO Jz (f64) (!REX2) -82: JB/JC/JNAE Jz (f64) (!REX2) -83: JAE/JNB/JNC Jz (f64) (!REX2) -84: JE/JZ Jz (f64) (!REX2) -85: JNE/JNZ Jz (f64) (!REX2) -86: JBE/JNA Jz (f64) (!REX2) -87: JA/JNBE Jz (f64) (!REX2) -88: JS Jz (f64) (!REX2) -89: JNS Jz (f64) (!REX2) -8a: JP/JPE Jz (f64) (!REX2) -8b: JNP/JPO Jz (f64) (!REX2) -8c: JL/JNGE Jz (f64) (!REX2) -8d: JNL/JGE Jz (f64) (!REX2) -8e: JLE/JNG Jz (f64) (!REX2) -8f: JNLE/JG Jz (f64) (!REX2) +80: JO Jz (f64),(!REX2) +81: JNO Jz (f64),(!REX2) +82: JB/JC/JNAE Jz (f64),(!REX2) +83: JAE/JNB/JNC Jz (f64),(!REX2) +84: JE/JZ Jz (f64),(!REX2) +85: JNE/JNZ Jz (f64),(!REX2) +86: JBE/JNA Jz (f64),(!REX2) +87: JA/JNBE Jz (f64),(!REX2) +88: JS Jz (f64),(!REX2) +89: JNS Jz (f64),(!REX2) +8a: JP/JPE Jz (f64),(!REX2) +8b: JNP/JPO Jz (f64),(!REX2) +8c: JL/JNGE Jz (f64),(!REX2) +8d: JNL/JGE Jz (f64),(!REX2) +8e: JLE/JNG Jz (f64),(!REX2) +8f: JNLE/JG Jz (f64),(!REX2) # 0x0f 0x90-0x9f 90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66) 91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66) From da7042b9a42e43af422b4da2e7e1d7168a32cf51 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 6 May 2025 14:17:55 +0800 Subject: [PATCH 2096/2924] brd: fix aligned_sector from brd_do_discard() [ Upstream commit d4099f8893b057ad7e8d61df76bdeaf807ebd679 ] The calculation is just wrong, fix it by round_up(). Fixes: 9ead7efc6f3f ("brd: implement discard support") Signed-off-by: Yu Kuai Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250506061756.2970934-3-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/brd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 292f127cae0abe..9549cd71e083b5 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -224,7 +224,7 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page, static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size) { - sector_t aligned_sector = (sector + PAGE_SECTORS) & ~PAGE_SECTORS; + sector_t aligned_sector = round_up(sector, PAGE_SECTORS); struct page *page; size -= (aligned_sector - sector) * SECTOR_SIZE; From 24c9cb53e682a5a34c5794d1855cfd98c4f64787 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 6 May 2025 14:17:56 +0800 Subject: [PATCH 2097/2924] brd: fix discard end sector [ Upstream commit a26a339a654b9403f0ee1004f1db4c2b2a355460 ] brd_do_discard() just aligned start sector to page, this can only work if the discard size if at least one page. For example: blkdiscard /dev/ram0 -o 5120 -l 1024 In this case, size = (1024 - (8192 - 5120)), which is a huge value. Fix the problem by round_down() the end sector. Fixes: 9ead7efc6f3f ("brd: implement discard support") Signed-off-by: Yu Kuai Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250506061756.2970934-4-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/brd.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 9549cd71e083b5..02fa8106ef549f 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -225,18 +225,21 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page, static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size) { sector_t aligned_sector = round_up(sector, PAGE_SECTORS); + sector_t aligned_end = round_down( + sector + (size >> SECTOR_SHIFT), PAGE_SECTORS); struct page *page; - size -= (aligned_sector - sector) * SECTOR_SIZE; + if (aligned_end <= aligned_sector) + return; + xa_lock(&brd->brd_pages); - while (size >= PAGE_SIZE && aligned_sector < rd_size * 2) { + while (aligned_sector < aligned_end && aligned_sector < rd_size * 2) { page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT); if (page) { __free_page(page); brd->brd_nr_pages--; } aligned_sector += PAGE_SECTORS; - size -= PAGE_SIZE; } xa_unlock(&brd->brd_pages); } From 4398787d04454600dd4466831f430ec84dcc4d31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Wed, 30 Apr 2025 10:55:49 -0400 Subject: [PATCH 2098/2924] kselftest: cpufreq: Get rid of double suspend in rtcwake case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 23b88515a318680337f21d0a2fceee8038ccffc8 ] Commit 0b631ed3ce92 ("kselftest: cpufreq: Add RTC wakeup alarm") added support for automatic wakeup in the suspend routine of the cpufreq kselftest by using rtcwake, however it left the manual power state change in the common path. The end result is that when running the cpufreq kselftest with '-t suspend_rtc' or '-t hibernate_rtc', the system will go to sleep and be woken up by the RTC, but then immediately go to sleep again with no wakeup programmed, so it will sleep forever in an automated testing setup. Fix this by moving the manual power state change so that it only happens when not using rtcwake. Link: https://lore.kernel.org/r/20250430-ksft-cpufreq-suspend-rtc-double-fix-v1-1-dc17a729c5a7@collabora.com Fixes: 0b631ed3ce92 ("kselftest: cpufreq: Add RTC wakeup alarm") Signed-off-by: Nícolas F. R. A. Prado Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- tools/testing/selftests/cpufreq/cpufreq.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/cpufreq/cpufreq.sh b/tools/testing/selftests/cpufreq/cpufreq.sh index e350c521b46750..3aad9db921b533 100755 --- a/tools/testing/selftests/cpufreq/cpufreq.sh +++ b/tools/testing/selftests/cpufreq/cpufreq.sh @@ -244,9 +244,10 @@ do_suspend() printf "Failed to suspend using RTC wake alarm\n" return 1 fi + else + echo $filename > $SYSFS/power/state fi - echo $filename > $SYSFS/power/state printf "Came out of $1\n" printf "Do basic tests after finishing $1 to verify cpufreq state\n\n" From c9610dda42bd382a96f97e68825cb5f66cd9e1dc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 10 May 2025 18:41:31 +0800 Subject: [PATCH 2099/2924] crypto: marvell/cesa - Handle zero-length skcipher requests [ Upstream commit 8a4e047c6cc07676f637608a9dd675349b5de0a7 ] Do not access random memory for zero-length skcipher requests. Just return 0. Fixes: f63601fd616a ("crypto: marvell/cesa - add a new driver for Marvell's CESA") Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/marvell/cesa/cipher.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/crypto/marvell/cesa/cipher.c b/drivers/crypto/marvell/cesa/cipher.c index cf62db50f95858..48c5c8ea8c43ec 100644 --- a/drivers/crypto/marvell/cesa/cipher.c +++ b/drivers/crypto/marvell/cesa/cipher.c @@ -459,6 +459,9 @@ static int mv_cesa_skcipher_queue_req(struct skcipher_request *req, struct mv_cesa_skcipher_req *creq = skcipher_request_ctx(req); struct mv_cesa_engine *engine; + if (!req->cryptlen) + return 0; + ret = mv_cesa_skcipher_req_init(req, tmpl); if (ret) return ret; From 4e83332fe515dcdc6728ffd34319f6a5569b7984 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 10 May 2025 18:43:33 +0800 Subject: [PATCH 2100/2924] crypto: marvell/cesa - Avoid empty transfer descriptor [ Upstream commit 1bafd82d9a40cf09c6c40f1c09cc35b7050b1a9f ] The user may set req->src even if req->nbytes == 0. If there is no data to hash from req->src, do not generate an empty TDMA descriptor. Fixes: db509a45339f ("crypto: marvell/cesa - add TDMA support") Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/marvell/cesa/hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/marvell/cesa/hash.c b/drivers/crypto/marvell/cesa/hash.c index f150861ceaf695..6815eddc906812 100644 --- a/drivers/crypto/marvell/cesa/hash.c +++ b/drivers/crypto/marvell/cesa/hash.c @@ -663,7 +663,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) if (ret) goto err_free_tdma; - if (iter.src.sg) { + if (iter.base.len > iter.src.op_offset) { /* * Add all the new data, inserting an operation block and * launch command between each full SRAM block-worth of From 480d4cdfcb2db69226dc6f0142fa86ecdb736b4c Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Wed, 7 May 2025 09:40:15 +0000 Subject: [PATCH 2101/2924] erofs: fix file handle encoding for 64-bit NIDs [ Upstream commit 510de8363f2c3d8e67fa9dfb2366e821382036e0 ] EROFS uses NID to indicate the on-disk inode offset, which can exceed 32 bits. However, the default encode_fh uses the ino32, thus it doesn't work if the image is larger than 128GiB. Let's introduce our own helpers to encode file handles. It's easy to reproduce: 1. prepare an erofs image with nid bigger than U32_MAX 2. mount -t erofs foo.img /mnt/erofs 3. set exportfs with configuration: /mnt/erofs *(rw,sync, no_root_squash) 4. mount -t nfs $IP:/mnt/erofs /mnt/nfs 5. md5sum /mnt/nfs/foo # foo is the file which nid bigger than U32_MAX. # you will get ESTALE error. In the case of overlayfs, the underlying filesystem's file handle is encoded in ovl_fb.fid, which is similar to NFS's case. If the NID of file is larger than U32_MAX, the overlay will get -ESTALE error when calls exportfs_decode_fh. Fixes: 3e917cc305c6 ("erofs: make filesystem exportable") Signed-off-by: Hongbo Li Reviewed-by: Gao Xiang Link: https://lore.kernel.org/r/20250507094015.14007-1-lihongbo22@huawei.com Signed-off-by: Gao Xiang Signed-off-by: Sasha Levin --- fs/erofs/super.c | 44 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index da6ee7c39290d5..bcb99c3c2594b3 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -510,24 +510,52 @@ static int erofs_fc_parse_param(struct fs_context *fc, return 0; } -static struct inode *erofs_nfs_get_inode(struct super_block *sb, - u64 ino, u32 generation) +static int erofs_encode_fh(struct inode *inode, u32 *fh, int *max_len, + struct inode *parent) { - return erofs_iget(sb, ino); + erofs_nid_t nid = EROFS_I(inode)->nid; + int len = parent ? 6 : 3; + + if (*max_len < len) { + *max_len = len; + return FILEID_INVALID; + } + + fh[0] = (u32)(nid >> 32); + fh[1] = (u32)(nid & 0xffffffff); + fh[2] = inode->i_generation; + + if (parent) { + nid = EROFS_I(parent)->nid; + + fh[3] = (u32)(nid >> 32); + fh[4] = (u32)(nid & 0xffffffff); + fh[5] = parent->i_generation; + } + + *max_len = len; + return parent ? FILEID_INO64_GEN_PARENT : FILEID_INO64_GEN; } static struct dentry *erofs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { - return generic_fh_to_dentry(sb, fid, fh_len, fh_type, - erofs_nfs_get_inode); + if ((fh_type != FILEID_INO64_GEN && + fh_type != FILEID_INO64_GEN_PARENT) || fh_len < 3) + return NULL; + + return d_obtain_alias(erofs_iget(sb, + ((u64)fid->raw[0] << 32) | fid->raw[1])); } static struct dentry *erofs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { - return generic_fh_to_parent(sb, fid, fh_len, fh_type, - erofs_nfs_get_inode); + if (fh_type != FILEID_INO64_GEN_PARENT || fh_len < 6) + return NULL; + + return d_obtain_alias(erofs_iget(sb, + ((u64)fid->raw[3] << 32) | fid->raw[4])); } static struct dentry *erofs_get_parent(struct dentry *child) @@ -543,7 +571,7 @@ static struct dentry *erofs_get_parent(struct dentry *child) } static const struct export_operations erofs_export_ops = { - .encode_fh = generic_encode_ino32_fh, + .encode_fh = erofs_encode_fh, .fh_to_dentry = erofs_fh_to_dentry, .fh_to_parent = erofs_fh_to_parent, .get_parent = erofs_get_parent, From cd04beb9ce2773a16057248bb4fa424068ae3807 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Thu, 15 May 2025 09:48:37 +0800 Subject: [PATCH 2102/2924] erofs: avoid using multiple devices with different type [ Upstream commit 9748f2f54f66743ac77275c34886a9f890e18409 ] For multiple devices, both primary and extra devices should be the same type. `erofs_init_device` has already guaranteed that if the primary is a file-backed device, extra devices should also be regular files. However, if the primary is a block device while the extra device is a file-backed device, `erofs_init_device` will get an ENOTBLK, which is not treated as an error in `erofs_fc_get_tree`, and that leads to an UAF: erofs_fc_get_tree get_tree_bdev_flags(erofs_fc_fill_super) erofs_read_superblock erofs_init_device // sbi->dif0 is not inited yet, // return -ENOTBLK deactivate_locked_super free(sbi) if (err is -ENOTBLK) sbi->dif0.file = filp_open() // sbi UAF So if -ENOTBLK is hitted in `erofs_init_device`, it means the primary device must be a block device, and the extra device is not a block device. The error can be converted to -EINVAL. Fixes: fb176750266a ("erofs: add file-backed mount support") Signed-off-by: Sheng Yong Reviewed-by: Gao Xiang Reviewed-by: Hongbo Li Link: https://lore.kernel.org/r/20250515014837.3315886-1-shengyong1@xiaomi.com Signed-off-by: Gao Xiang Signed-off-by: Sasha Levin --- fs/erofs/super.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index bcb99c3c2594b3..6e57b9cc6ed2e0 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -165,8 +165,11 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, filp_open(dif->path, O_RDONLY | O_LARGEFILE, 0) : bdev_file_open_by_path(dif->path, BLK_OPEN_READ, sb->s_type, NULL); - if (IS_ERR(file)) + if (IS_ERR(file)) { + if (file == ERR_PTR(-ENOTBLK)) + return -EINVAL; return PTR_ERR(file); + } if (!erofs_is_fileio_mode(sbi)) { dif->dax_dev = fs_dax_get_by_bdev(file_bdev(file), From 234ac83e16296d4fb0c26884999821fe5ce8c0ae Mon Sep 17 00:00:00 2001 From: Gaurav Batra Date: Mon, 12 May 2025 17:46:53 -0500 Subject: [PATCH 2103/2924] powerpc/pseries/iommu: Fix kmemleak in TCE table userspace view [ Upstream commit d36e3f11fe8b55b801bdbe84ad51f612b1bd84da ] When a device is opened by a userspace driver, via VFIO interface, DMA window is created. This DMA window has TCE Table and a corresponding data for userview of TCE table. When the userspace driver closes the device, all the above infrastructure is free'ed and the device control given back to kernel. Both DMA window and TCE table is getting free'ed. But due to a code bug, userview of the TCE table is not getting free'ed. This is resulting in a memory leak. Befow is the information from KMEMLEAK unreferenced object 0xc008000022af0000 (size 16777216): comm "senlib_unit_tes", pid 9346, jiffies 4294983174 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 0): kmemleak_vmalloc+0xc8/0x1a0 __vmalloc_node_range+0x284/0x340 vzalloc+0x58/0x70 spapr_tce_create_table+0x4b0/0x8d0 tce_iommu_create_table+0xcc/0x170 [vfio_iommu_spapr_tce] tce_iommu_create_window+0x144/0x2f0 [vfio_iommu_spapr_tce] tce_iommu_ioctl.part.0+0x59c/0xc90 [vfio_iommu_spapr_tce] vfio_fops_unl_ioctl+0x88/0x280 [vfio] sys_ioctl+0xf4/0x160 system_call_exception+0x164/0x310 system_call_vectored_common+0xe8/0x278 unreferenced object 0xc008000023b00000 (size 4194304): comm "senlib_unit_tes", pid 9351, jiffies 4294984116 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 0): kmemleak_vmalloc+0xc8/0x1a0 __vmalloc_node_range+0x284/0x340 vzalloc+0x58/0x70 spapr_tce_create_table+0x4b0/0x8d0 tce_iommu_create_table+0xcc/0x170 [vfio_iommu_spapr_tce] tce_iommu_create_window+0x144/0x2f0 [vfio_iommu_spapr_tce] tce_iommu_create_default_window+0x88/0x120 [vfio_iommu_spapr_tce] tce_iommu_ioctl.part.0+0x57c/0xc90 [vfio_iommu_spapr_tce] vfio_fops_unl_ioctl+0x88/0x280 [vfio] sys_ioctl+0xf4/0x160 system_call_exception+0x164/0x310 system_call_vectored_common+0xe8/0x278 Fixes: f431a8cde7f1 ("powerpc/iommu: Reimplement the iommu_table_group_ops for pSeries") Signed-off-by: Gaurav Batra Reviewed-by: Nilay Shroff Reviewed-by: Ritesh Harjani (IBM) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250512224653.35697-1-gbatra@linux.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/platforms/pseries/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index d6ebc19fb99c51..eec333dd2e598c 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -197,7 +197,7 @@ static void tce_iommu_userspace_view_free(struct iommu_table *tbl) static void tce_free_pSeries(struct iommu_table *tbl) { - if (!tbl->it_userspace) + if (tbl->it_userspace) tce_iommu_userspace_view_free(tbl); } From d42baa6e8f79902475ad6acbcaa7e0369d3e8eda Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 1 May 2025 08:37:54 +0930 Subject: [PATCH 2104/2924] btrfs: scrub: update device stats when an error is detected [ Upstream commit ec1f3a207cdf314eae4d4ae145f1ffdb829f0652 ] [BUG] Since the migration to the new scrub_stripe interface, scrub no longer updates the device stats when hitting an error, no matter if it's a read or checksum mismatch error. E.g: BTRFS info (device dm-2): scrub: started on devid 1 BTRFS error (device dm-2): unable to fixup (regular) error at logical 13631488 on dev /dev/mapper/test-scratch1 physical 13631488 BTRFS warning (device dm-2): checksum error at logical 13631488 on dev /dev/mapper/test-scratch1, physical 13631488, root 5, inode 257, offset 0, length 4096, links 1 (path: file) BTRFS error (device dm-2): unable to fixup (regular) error at logical 13631488 on dev /dev/mapper/test-scratch1 physical 13631488 BTRFS warning (device dm-2): checksum error at logical 13631488 on dev /dev/mapper/test-scratch1, physical 13631488, root 5, inode 257, offset 0, length 4096, links 1 (path: file) BTRFS info (device dm-2): scrub: finished on devid 1 with status: 0 Note there is no line showing the device stats error update. [CAUSE] In the migration to the new scrub_stripe interface, we no longer call btrfs_dev_stat_inc_and_print(). [FIX] - Introduce a new bitmap for metadata generation errors * A new bitmap @meta_gen_error_bitmap is introduced to record which blocks have metadata generation mismatch errors. * A new counter for that bitmap @init_nr_meta_gen_errors, is also introduced to store the number of generation mismatch errors that are found during the initial read. This is for the error reporting at scrub_stripe_report_errors(). * New dedicated error message for unrepaired generation mismatches * Update @meta_gen_error_bitmap if a transid mismatch is hit - Add btrfs_dev_stat_inc_and_print() calls to the following call sites * scrub_stripe_report_errors() * scrub_write_endio() This is only for the write errors. This means there is a minor behavior change: - The timing of device stats error message Since we concentrate the error messages at scrub_stripe_report_errors(), the device stats error messages will all show up in one go, after the detailed scrub error messages: BTRFS error (device dm-2): unable to fixup (regular) error at logical 13631488 on dev /dev/mapper/test-scratch1 physical 13631488 BTRFS warning (device dm-2): checksum error at logical 13631488 on dev /dev/mapper/test-scratch1, physical 13631488, root 5, inode 257, offset 0, length 4096, links 1 (path: file) BTRFS error (device dm-2): unable to fixup (regular) error at logical 13631488 on dev /dev/mapper/test-scratch1 physical 13631488 BTRFS warning (device dm-2): checksum error at logical 13631488 on dev /dev/mapper/test-scratch1, physical 13631488, root 5, inode 257, offset 0, length 4096, links 1 (path: file) BTRFS error (device dm-2): bdev /dev/mapper/test-scratch1 errs: wr 0, rd 0, flush 0, corrupt 1, gen 0 BTRFS error (device dm-2): bdev /dev/mapper/test-scratch1 errs: wr 0, rd 0, flush 0, corrupt 2, gen 0 Fixes: e02ee89baa66 ("btrfs: scrub: switch scrub_simple_mirror() to scrub_stripe infrastructure") Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/scrub.c | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index c3b2e29e3e019d..61812298325a20 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -153,12 +153,14 @@ struct scrub_stripe { unsigned int init_nr_io_errors; unsigned int init_nr_csum_errors; unsigned int init_nr_meta_errors; + unsigned int init_nr_meta_gen_errors; /* * The following error bitmaps are all for the current status. * Every time we submit a new read, these bitmaps may be updated. * - * error_bitmap = io_error_bitmap | csum_error_bitmap | meta_error_bitmap; + * error_bitmap = io_error_bitmap | csum_error_bitmap | + * meta_error_bitmap | meta_generation_bitmap; * * IO and csum errors can happen for both metadata and data. */ @@ -166,6 +168,7 @@ struct scrub_stripe { unsigned long io_error_bitmap; unsigned long csum_error_bitmap; unsigned long meta_error_bitmap; + unsigned long meta_gen_error_bitmap; /* For writeback (repair or replace) error reporting. */ unsigned long write_error_bitmap; @@ -672,7 +675,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr } if (stripe->sectors[sector_nr].generation != btrfs_stack_header_generation(header)) { - bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree); + bitmap_set(&stripe->meta_gen_error_bitmap, sector_nr, sectors_per_tree); bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, "tree block %llu mirror %u has bad generation, has %llu want %llu", @@ -684,6 +687,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr bitmap_clear(&stripe->error_bitmap, sector_nr, sectors_per_tree); bitmap_clear(&stripe->csum_error_bitmap, sector_nr, sectors_per_tree); bitmap_clear(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree); + bitmap_clear(&stripe->meta_gen_error_bitmap, sector_nr, sectors_per_tree); } static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr) @@ -972,8 +976,22 @@ static void scrub_stripe_report_errors(struct scrub_ctx *sctx, if (__ratelimit(&rs) && dev) scrub_print_common_warning("header error", dev, false, stripe->logical, physical); + if (test_bit(sector_nr, &stripe->meta_gen_error_bitmap)) + if (__ratelimit(&rs) && dev) + scrub_print_common_warning("generation error", dev, false, + stripe->logical, physical); } + /* Update the device stats. */ + for (int i = 0; i < stripe->init_nr_io_errors; i++) + btrfs_dev_stat_inc_and_print(stripe->dev, BTRFS_DEV_STAT_READ_ERRS); + for (int i = 0; i < stripe->init_nr_csum_errors; i++) + btrfs_dev_stat_inc_and_print(stripe->dev, BTRFS_DEV_STAT_CORRUPTION_ERRS); + /* Generation mismatch error is based on each metadata, not each block. */ + for (int i = 0; i < stripe->init_nr_meta_gen_errors; + i += (fs_info->nodesize >> fs_info->sectorsize_bits)) + btrfs_dev_stat_inc_and_print(stripe->dev, BTRFS_DEV_STAT_GENERATION_ERRS); + spin_lock(&sctx->stat_lock); sctx->stat.data_extents_scrubbed += stripe->nr_data_extents; sctx->stat.tree_extents_scrubbed += stripe->nr_meta_extents; @@ -982,7 +1000,8 @@ static void scrub_stripe_report_errors(struct scrub_ctx *sctx, sctx->stat.no_csum += nr_nodatacsum_sectors; sctx->stat.read_errors += stripe->init_nr_io_errors; sctx->stat.csum_errors += stripe->init_nr_csum_errors; - sctx->stat.verify_errors += stripe->init_nr_meta_errors; + sctx->stat.verify_errors += stripe->init_nr_meta_errors + + stripe->init_nr_meta_gen_errors; sctx->stat.uncorrectable_errors += bitmap_weight(&stripe->error_bitmap, stripe->nr_sectors); sctx->stat.corrected_errors += nr_repaired_sectors; @@ -1028,6 +1047,8 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work) stripe->nr_sectors); stripe->init_nr_meta_errors = bitmap_weight(&stripe->meta_error_bitmap, stripe->nr_sectors); + stripe->init_nr_meta_gen_errors = bitmap_weight(&stripe->meta_gen_error_bitmap, + stripe->nr_sectors); if (bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors)) goto out; @@ -1142,6 +1163,9 @@ static void scrub_write_endio(struct btrfs_bio *bbio) bitmap_set(&stripe->write_error_bitmap, sector_nr, bio_size >> fs_info->sectorsize_bits); spin_unlock_irqrestore(&stripe->write_error_lock, flags); + for (int i = 0; i < (bio_size >> fs_info->sectorsize_bits); i++) + btrfs_dev_stat_inc_and_print(stripe->dev, + BTRFS_DEV_STAT_WRITE_ERRS); } bio_put(&bbio->bio); @@ -1508,10 +1532,12 @@ static void scrub_stripe_reset_bitmaps(struct scrub_stripe *stripe) stripe->init_nr_io_errors = 0; stripe->init_nr_csum_errors = 0; stripe->init_nr_meta_errors = 0; + stripe->init_nr_meta_gen_errors = 0; stripe->error_bitmap = 0; stripe->io_error_bitmap = 0; stripe->csum_error_bitmap = 0; stripe->meta_error_bitmap = 0; + stripe->meta_gen_error_bitmap = 0; } /* From be6188b7ba14311b18ce546048852be5c8b9e936 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 5 May 2025 18:56:18 +0930 Subject: [PATCH 2105/2924] btrfs: scrub: fix a wrong error type when metadata bytenr mismatches [ Upstream commit f2c19541e421b3235efc515dad88b581f00592ae ] When the bytenr doesn't match for a metadata tree block, we will report it as an csum error, which is incorrect and should be reported as a metadata error instead. Fixes: a3ddbaebc7c9 ("btrfs: scrub: introduce a helper to verify one metadata block") Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/scrub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 61812298325a20..4c525a0408125d 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -619,7 +619,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr memcpy(on_disk_csum, header->csum, fs_info->csum_size); if (logical != btrfs_stack_header_bytenr(header)) { - bitmap_set(&stripe->csum_error_bitmap, sector_nr, sectors_per_tree); + bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree); bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, "tree block %llu mirror %u has bad bytenr, has %llu want %llu", From 14266a7623530dd835db9500a1d7d78b7216b6e2 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 9 May 2025 17:08:50 +0100 Subject: [PATCH 2106/2924] btrfs: fix invalid data space release when truncating block in NOCOW mode [ Upstream commit d3914d6030aa6be2993dfc223d096ff93018c236 ] If when truncating a block we fail to reserve data space and then we proceed anyway because we can do a NOCOW write, if we later get an error when trying to get the folio from the inode's mapping, we end up releasing data space that we haven't reserved, screwing up the bytes_may_use counter from the data space_info, eventually resulting in an underflow when all other reservations done by other tasks are released, if any, or right away if there are no other reservations at the moment. This is because when we get an error when trying to grab the block's folio we call btrfs_delalloc_release_space(), which releases metadata (which we have reserved) and data (which we haven't reserved). Fix this by calling btrfs_delalloc_release_space() only if we did reserve data space, that is, if we aren't falling back to NOCOW, meaning the local variable @only_release_metadata has a false value, otherwise release only metadata by calling btrfs_delalloc_release_metadata(). Fixes: 6d4572a9d71d ("btrfs: allow btrfs_truncate_block() to fallback to nocow for data space reservation") Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/inode.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 90f5da3c520ac3..8a3f44302788cd 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4849,8 +4849,11 @@ int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len, folio = __filemap_get_folio(mapping, index, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, mask); if (IS_ERR(folio)) { - btrfs_delalloc_release_space(inode, data_reserved, block_start, - blocksize, true); + if (only_release_metadata) + btrfs_delalloc_release_metadata(inode, blocksize, true); + else + btrfs_delalloc_release_space(inode, data_reserved, + block_start, blocksize, true); btrfs_delalloc_release_extents(inode, blocksize); ret = -ENOMEM; goto out; From 9f451bcb35131c622faa00f15a3bd61db096b757 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 14 May 2025 10:30:58 +0100 Subject: [PATCH 2107/2924] btrfs: fix wrong start offset for delalloc space release during mmap write [ Upstream commit 17a85f520469a1838379de8ad24f63e778f7c277 ] If we're doing a mmap write against a folio that has i_size somewhere in the middle and we have multiple sectors in the folio, we may have to release excess space previously reserved, for the range going from the rounded up (to sector size) i_size to the folio's end offset. We are calculating the right amount to release and passing it to btrfs_delalloc_release_space(), but we are passing the wrong start offset of that range - we're passing the folio's start offset instead of the end offset, plus 1, of the range for which we keep the reservation. This may result in releasing more space then we should and eventually trigger an underflow of the data space_info's bytes_may_use counter. So fix this by passing the start offset as 'end + 1' instead of 'page_start' to btrfs_delalloc_release_space(). Fixes: d0b7da88f640 ("Btrfs: btrfs_page_mkwrite: Reserve space in sectorsized units") Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 71b8a825c4479a..22455fbcb29eba 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1862,7 +1862,7 @@ static vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf) if (reserved_space < fsize) { end = page_start + reserved_space - 1; btrfs_delalloc_release_space(BTRFS_I(inode), - data_reserved, page_start, + data_reserved, end + 1, fsize - reserved_space, true); } } From 6a950a7f15aa3e4965cab62e9bc7a06f62fd341f Mon Sep 17 00:00:00 2001 From: Yongliang Gao Date: Sun, 16 Feb 2025 16:41:09 +0800 Subject: [PATCH 2108/2924] rcu/cpu_stall_cputime: fix the hardirq count for x86 architecture [ Upstream commit da6b85598af30e9fec34d82882d7e1e39f3da769 ] When counting the number of hardirqs in the x86 architecture, it is essential to add arch_irq_stat_cpu to ensure accuracy. For example, a CPU loop within the rcu_read_lock function. Before: [ 70.910184] rcu: INFO: rcu_preempt self-detected stall on CPU [ 70.910436] rcu: 3-....: (4999 ticks this GP) idle=*** [ 70.910711] rcu: hardirqs softirqs csw/system [ 70.910870] rcu: number: 0 657 0 [ 70.911024] rcu: cputime: 0 0 2498 ==> 2498(ms) [ 70.911278] rcu: (t=5001 jiffies g=3677 q=29 ncpus=8) After: [ 68.046132] rcu: INFO: rcu_preempt self-detected stall on CPU [ 68.046354] rcu: 2-....: (4999 ticks this GP) idle=*** [ 68.046628] rcu: hardirqs softirqs csw/system [ 68.046793] rcu: number: 2498 663 0 [ 68.046951] rcu: cputime: 0 0 2496 ==> 2496(ms) [ 68.047244] rcu: (t=5000 jiffies g=3825 q=4 ncpus=8) Fixes: be42f00b73a0 ("rcu: Add RCU stall diagnosis information") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202501090842.SfI6QPGS-lkp@intel.com/ Signed-off-by: Yongliang Gao Reviewed-by: Neeraj Upadhyay Link: https://lore.kernel.org/r/20250216084109.3109837-1-leonylgao@gmail.com Signed-off-by: Boqun Feng Signed-off-by: Joel Fernandes Signed-off-by: Sasha Levin --- kernel/rcu/tree.c | 10 +++++++--- kernel/rcu/tree.h | 2 +- kernel/rcu/tree_stall.h | 4 ++-- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 659f83e7104869..80b10893b5038d 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -801,6 +801,10 @@ static int rcu_watching_snap_save(struct rcu_data *rdp) return 0; } +#ifndef arch_irq_stat_cpu +#define arch_irq_stat_cpu(cpu) 0 +#endif + /* * Returns positive if the specified CPU has passed through a quiescent state * by virtue of being in or having passed through an dynticks idle state since @@ -936,9 +940,9 @@ static int rcu_watching_snap_recheck(struct rcu_data *rdp) rsrp->cputime_irq = kcpustat_field(kcsp, CPUTIME_IRQ, cpu); rsrp->cputime_softirq = kcpustat_field(kcsp, CPUTIME_SOFTIRQ, cpu); rsrp->cputime_system = kcpustat_field(kcsp, CPUTIME_SYSTEM, cpu); - rsrp->nr_hardirqs = kstat_cpu_irqs_sum(rdp->cpu); - rsrp->nr_softirqs = kstat_cpu_softirqs_sum(rdp->cpu); - rsrp->nr_csw = nr_context_switches_cpu(rdp->cpu); + rsrp->nr_hardirqs = kstat_cpu_irqs_sum(cpu) + arch_irq_stat_cpu(cpu); + rsrp->nr_softirqs = kstat_cpu_softirqs_sum(cpu); + rsrp->nr_csw = nr_context_switches_cpu(cpu); rsrp->jiffies = jiffies; rsrp->gp_seq = rdp->gp_seq; } diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index a9a811d9d7a372..1bba2225e7448b 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -168,7 +168,7 @@ struct rcu_snap_record { u64 cputime_irq; /* Accumulated cputime of hard irqs */ u64 cputime_softirq;/* Accumulated cputime of soft irqs */ u64 cputime_system; /* Accumulated cputime of kernel tasks */ - unsigned long nr_hardirqs; /* Accumulated number of hard irqs */ + u64 nr_hardirqs; /* Accumulated number of hard irqs */ unsigned int nr_softirqs; /* Accumulated number of soft irqs */ unsigned long long nr_csw; /* Accumulated number of task switches */ unsigned long jiffies; /* Track jiffies value */ diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 925fcdad5dea22..56b21219442b65 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -435,8 +435,8 @@ static void print_cpu_stat_info(int cpu) rsr.cputime_system = kcpustat_field(kcsp, CPUTIME_SYSTEM, cpu); pr_err("\t hardirqs softirqs csw/system\n"); - pr_err("\t number: %8ld %10d %12lld\n", - kstat_cpu_irqs_sum(cpu) - rsrp->nr_hardirqs, + pr_err("\t number: %8lld %10d %12lld\n", + kstat_cpu_irqs_sum(cpu) + arch_irq_stat_cpu(cpu) - rsrp->nr_hardirqs, kstat_cpu_softirqs_sum(cpu) - rsrp->nr_softirqs, nr_context_switches_cpu(cpu) - rsrp->nr_csw); pr_err("\tcputime: %8lld %10lld %12lld ==> %d(ms)\n", From e71b6f780009861e4314e46b4ce4ec9f9ed64fbb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 15 May 2025 16:28:08 +0800 Subject: [PATCH 2109/2924] crypto: lrw - Only add ecb if it is not already there [ Upstream commit 3d73909bddc2ebb3224a8bc2e5ce00e9df70c15d ] Only add ecb to the cipher name if it isn't already ecb. Also use memcmp instead of strncmp since these strings are all stored in an array of length CRYPTO_MAX_ALG_NAME. Fixes: 700cb3f5fe75 ("crypto: lrw - Convert to skcipher") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202505151503.d8a6cf10-lkp@intel.com Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/lrw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/lrw.c b/crypto/lrw.c index 391ae0f7641ff9..15f579a768614d 100644 --- a/crypto/lrw.c +++ b/crypto/lrw.c @@ -322,7 +322,7 @@ static int lrw_create(struct crypto_template *tmpl, struct rtattr **tb) err = crypto_grab_skcipher(spawn, skcipher_crypto_instance(inst), cipher_name, 0, mask); - if (err == -ENOENT) { + if (err == -ENOENT && memcmp(cipher_name, "ecb(", 4)) { err = -ENAMETOOLONG; if (snprintf(ecb_name, CRYPTO_MAX_ALG_NAME, "ecb(%s)", cipher_name) >= CRYPTO_MAX_ALG_NAME) @@ -356,7 +356,7 @@ static int lrw_create(struct crypto_template *tmpl, struct rtattr **tb) /* Alas we screwed up the naming so we have to mangle the * cipher name. */ - if (!strncmp(cipher_name, "ecb(", 4)) { + if (!memcmp(cipher_name, "ecb(", 4)) { int len; len = strscpy(ecb_name, cipher_name + 4, sizeof(ecb_name)); From e2acafff6d0852ffba97c5c78a149986887fde01 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 15 May 2025 16:34:04 +0800 Subject: [PATCH 2110/2924] crypto: xts - Only add ecb if it is not already there [ Upstream commit 270b6f13454cb7f2f7058c50df64df409c5dcf55 ] Only add ecb to the cipher name if it isn't already ecb. Also use memcmp instead of strncmp since these strings are all stored in an array of length CRYPTO_MAX_ALG_NAME. Fixes: f1c131b45410 ("crypto: xts - Convert to skcipher") Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/xts.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/xts.c b/crypto/xts.c index 31529c9ef08f8f..46b7c70ea54bbf 100644 --- a/crypto/xts.c +++ b/crypto/xts.c @@ -363,7 +363,7 @@ static int xts_create(struct crypto_template *tmpl, struct rtattr **tb) err = crypto_grab_skcipher(&ctx->spawn, skcipher_crypto_instance(inst), cipher_name, 0, mask); - if (err == -ENOENT) { + if (err == -ENOENT && memcmp(cipher_name, "ecb(", 4)) { err = -ENAMETOOLONG; if (snprintf(name, CRYPTO_MAX_ALG_NAME, "ecb(%s)", cipher_name) >= CRYPTO_MAX_ALG_NAME) @@ -397,7 +397,7 @@ static int xts_create(struct crypto_template *tmpl, struct rtattr **tb) /* Alas we screwed up the naming so we have to mangle the * cipher name. */ - if (!strncmp(cipher_name, "ecb(", 4)) { + if (!memcmp(cipher_name, "ecb(", 4)) { int len; len = strscpy(name, cipher_name + 4, sizeof(name)); From c5a15c74124ae1649323d2a4e856e942c1ca9b1e Mon Sep 17 00:00:00 2001 From: Ovidiu Panait Date: Fri, 16 May 2025 15:06:56 +0300 Subject: [PATCH 2111/2924] crypto: sun8i-ce - move fallback ahash_request to the end of the struct [ Upstream commit c822831b426307a6ca426621504d3c7f99765a39 ] 'struct ahash_request' has a flexible array at the end, so it must be the last member in a struct, to avoid overwriting other struct members. Therefore, move 'fallback_req' to the end of the 'sun8i_ce_hash_reqctx' struct. Fixes: 56f6d5aee88d ("crypto: sun8i-ce - support hash algorithms") Signed-off-by: Ovidiu Panait Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h index 3b5c2af013d0da..83df4d71905318 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h @@ -308,8 +308,8 @@ struct sun8i_ce_hash_tfm_ctx { * @flow: the flow to use for this request */ struct sun8i_ce_hash_reqctx { - struct ahash_request fallback_req; int flow; + struct ahash_request fallback_req; // keep at the end }; /* From 74c84deec50a9c1dc66dcffcd1a0e32262b6701f Mon Sep 17 00:00:00 2001 From: Xuewen Yan Date: Mon, 3 Mar 2025 18:52:39 +0800 Subject: [PATCH 2112/2924] sched/fair: Fixup wake_up_sync() vs DELAYED_DEQUEUE [ Upstream commit aa3ee4f0b7541382c9f6f43f7408d73a5d4f4042 ] Delayed dequeued feature keeps a sleeping task enqueued until its lag has elapsed. As a result, it stays also visible in rq->nr_running. So when in wake_affine_idle(), we should use the real running-tasks in rq to check whether we should place the wake-up task to current cpu. On the other hand, add a helper function to return the nr-delayed. Fixes: 152e11f6df29 ("sched/fair: Implement delayed dequeue") Signed-off-by: Xuewen Yan Reviewed-and-tested-by: Tianchen Ding Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Vincent Guittot Link: https://lore.kernel.org/r/20250303105241.17251-2-xuewen.yan@unisoc.com Signed-off-by: Sasha Levin --- kernel/sched/fair.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 0fb9bf995a4795..0c04ed41485259 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7196,6 +7196,11 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) return true; } +static inline unsigned int cfs_h_nr_delayed(struct rq *rq) +{ + return (rq->cfs.h_nr_queued - rq->cfs.h_nr_runnable); +} + #ifdef CONFIG_SMP /* Working cpumask for: sched_balance_rq(), sched_balance_newidle(). */ @@ -7357,8 +7362,12 @@ wake_affine_idle(int this_cpu, int prev_cpu, int sync) if (available_idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu)) return available_idle_cpu(prev_cpu) ? prev_cpu : this_cpu; - if (sync && cpu_rq(this_cpu)->nr_running == 1) - return this_cpu; + if (sync) { + struct rq *rq = cpu_rq(this_cpu); + + if ((rq->nr_running - cfs_h_nr_delayed(rq)) == 1) + return this_cpu; + } if (available_idle_cpu(prev_cpu)) return prev_cpu; From 6a740abc981a8d4d8d931deea1dc253e614c9cf7 Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Tue, 20 May 2025 08:20:49 +0000 Subject: [PATCH 2113/2924] kunit: Fix wrong parameter to kunit_deactivate_static_stub() [ Upstream commit 772e50a76ee664e75581624f512df4e45582605a ] kunit_deactivate_static_stub() accepts real_fn_addr instead of replacement_addr. In the case, it always passes NULL to kunit_deactivate_static_stub(). Fix it. Link: https://lore.kernel.org/r/20250520082050.2254875-1-tzungbi@kernel.org Fixes: e047c5eaa763 ("kunit: Expose 'static stub' API to redirect functions") Signed-off-by: Tzung-Bi Shih Reviewed-by: David Gow Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- lib/kunit/static_stub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/kunit/static_stub.c b/lib/kunit/static_stub.c index 92b2cccd5e7633..484fd85251b415 100644 --- a/lib/kunit/static_stub.c +++ b/lib/kunit/static_stub.c @@ -96,7 +96,7 @@ void __kunit_activate_static_stub(struct kunit *test, /* If the replacement address is NULL, deactivate the stub. */ if (!replacement_addr) { - kunit_deactivate_static_stub(test, replacement_addr); + kunit_deactivate_static_stub(test, real_fn_addr); return; } From a5b50bfc177eac3e5eaaa5205b5ed6e70810bebc Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 26 Mar 2025 22:21:08 +0100 Subject: [PATCH 2114/2924] gfs2: Move gfs2_trans_add_databufs [ Upstream commit d50a64e3c55e59e45e415c65531b0d76ad4cea36 ] Move gfs2_trans_add_databufs() to trans.c. Pass in a glock instead of a gfs2_inode. Signed-off-by: Andreas Gruenbacher Stable-dep-of: 5a90f8d49922 ("gfs2: Don't start unnecessary transactions during log flush") Signed-off-by: Sasha Levin --- fs/gfs2/aops.c | 23 +---------------------- fs/gfs2/aops.h | 2 -- fs/gfs2/bmap.c | 3 ++- fs/gfs2/trans.c | 21 +++++++++++++++++++++ fs/gfs2/trans.h | 2 ++ 5 files changed, 26 insertions(+), 25 deletions(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 68fc8af14700d3..ed2c708a215a46 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -37,27 +37,6 @@ #include "aops.h" -void gfs2_trans_add_databufs(struct gfs2_inode *ip, struct folio *folio, - size_t from, size_t len) -{ - struct buffer_head *head = folio_buffers(folio); - unsigned int bsize = head->b_size; - struct buffer_head *bh; - size_t to = from + len; - size_t start, end; - - for (bh = head, start = 0; bh != head || !start; - bh = bh->b_this_page, start = end) { - end = start + bsize; - if (end <= from) - continue; - if (start >= to) - break; - set_buffer_uptodate(bh); - gfs2_trans_add_data(ip->i_gl, bh); - } -} - /** * gfs2_get_block_noalloc - Fills in a buffer head with details about a block * @inode: The inode @@ -133,7 +112,7 @@ static int __gfs2_jdata_write_folio(struct folio *folio, inode->i_sb->s_blocksize, BIT(BH_Dirty)|BIT(BH_Uptodate)); } - gfs2_trans_add_databufs(ip, folio, 0, folio_size(folio)); + gfs2_trans_add_databufs(ip->i_gl, folio, 0, folio_size(folio)); } return gfs2_write_jdata_folio(folio, wbc); } diff --git a/fs/gfs2/aops.h b/fs/gfs2/aops.h index a10c4334d24893..f9fa41aaeaf410 100644 --- a/fs/gfs2/aops.h +++ b/fs/gfs2/aops.h @@ -9,7 +9,5 @@ #include "incore.h" void adjust_fs_space(struct inode *inode); -void gfs2_trans_add_databufs(struct gfs2_inode *ip, struct folio *folio, - size_t from, size_t len); #endif /* __AOPS_DOT_H__ */ diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 366516b98b3f31..b81984def58ec3 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -988,7 +988,8 @@ static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos, struct gfs2_sbd *sdp = GFS2_SB(inode); if (!gfs2_is_stuffed(ip)) - gfs2_trans_add_databufs(ip, folio, offset_in_folio(folio, pos), + gfs2_trans_add_databufs(ip->i_gl, folio, + offset_in_folio(folio, pos), copied); folio_unlock(folio); diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index f8ae2c666fd609..075f7e9abe47ca 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -226,6 +226,27 @@ void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh) unlock_buffer(bh); } +void gfs2_trans_add_databufs(struct gfs2_glock *gl, struct folio *folio, + size_t from, size_t len) +{ + struct buffer_head *head = folio_buffers(folio); + unsigned int bsize = head->b_size; + struct buffer_head *bh; + size_t to = from + len; + size_t start, end; + + for (bh = head, start = 0; bh != head || !start; + bh = bh->b_this_page, start = end) { + end = start + bsize; + if (end <= from) + continue; + if (start >= to) + break; + set_buffer_uptodate(bh); + gfs2_trans_add_data(gl, bh); + } +} + void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) { diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index f8ce5302280d31..790c55f59e6121 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h @@ -42,6 +42,8 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, void gfs2_trans_end(struct gfs2_sbd *sdp); void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh); +void gfs2_trans_add_databufs(struct gfs2_glock *gl, struct folio *folio, + size_t from, size_t len); void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh); void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len); From 2be0abcf8fef614c615e4de4b6a622632c953583 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 28 Mar 2025 22:47:02 +0100 Subject: [PATCH 2115/2924] gfs2: Don't start unnecessary transactions during log flush [ Upstream commit 5a90f8d499225512a385585ffe3e28f687263d47 ] Commit 8d391972ae2d ("gfs2: Remove __gfs2_writepage()") changed the log flush code in gfs2_ail1_start_one() to call aops->writepages() instead of aops->writepage(). For jdata inodes, this means that we will now try to reserve log space and start a transaction before we can determine that the pages in question have already been journaled. When this happens in the context of gfs2_logd(), it can now appear that not enough log space is available for freeing up log space, and we will lock up. Fix that by issuing journal writes directly instead of going through aops->writepages() in the log flush code. Fixes: 8d391972ae2d ("gfs2: Remove __gfs2_writepage()") Signed-off-by: Andreas Gruenbacher Signed-off-by: Sasha Levin --- fs/gfs2/aops.c | 31 +++++++++++++++++++++++++++++++ fs/gfs2/aops.h | 1 + fs/gfs2/log.c | 7 ++++++- 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index ed2c708a215a46..eb4270e82ef8ee 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -117,6 +117,37 @@ static int __gfs2_jdata_write_folio(struct folio *folio, return gfs2_write_jdata_folio(folio, wbc); } +/** + * gfs2_jdata_writeback - Write jdata folios to the log + * @mapping: The mapping to write + * @wbc: The writeback control + * + * Returns: errno + */ +int gfs2_jdata_writeback(struct address_space *mapping, struct writeback_control *wbc) +{ + struct inode *inode = mapping->host; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(mapping->host); + struct folio *folio = NULL; + int error; + + BUG_ON(current->journal_info); + if (gfs2_assert_withdraw(sdp, ip->i_gl->gl_state == LM_ST_EXCLUSIVE)) + return 0; + + while ((folio = writeback_iter(mapping, wbc, folio, &error))) { + if (folio_test_checked(folio)) { + folio_redirty_for_writepage(wbc, folio); + folio_unlock(folio); + continue; + } + error = __gfs2_jdata_write_folio(folio, wbc); + } + + return error; +} + /** * gfs2_writepages - Write a bunch of dirty pages back to disk * @mapping: The mapping to write diff --git a/fs/gfs2/aops.h b/fs/gfs2/aops.h index f9fa41aaeaf410..bf002522a78220 100644 --- a/fs/gfs2/aops.h +++ b/fs/gfs2/aops.h @@ -9,5 +9,6 @@ #include "incore.h" void adjust_fs_space(struct inode *inode); +int gfs2_jdata_writeback(struct address_space *mapping, struct writeback_control *wbc); #endif /* __AOPS_DOT_H__ */ diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index f9c5089783d24c..115c4ac457e90a 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -31,6 +31,7 @@ #include "dir.h" #include "trace_gfs2.h" #include "trans.h" +#include "aops.h" static void gfs2_log_shutdown(struct gfs2_sbd *sdp); @@ -131,7 +132,11 @@ __acquires(&sdp->sd_ail_lock) if (!mapping) continue; spin_unlock(&sdp->sd_ail_lock); - ret = mapping->a_ops->writepages(mapping, wbc); + BUG_ON(GFS2_SB(mapping->host) != sdp); + if (gfs2_is_jdata(GFS2_I(mapping->host))) + ret = gfs2_jdata_writeback(mapping, wbc); + else + ret = mapping->a_ops->writepages(mapping, wbc); if (need_resched()) { blk_finish_plug(plug); cond_resched(); From 8959b098e138bb87e85cc4cd93d6630a40c3e597 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 19 May 2025 18:29:38 +0800 Subject: [PATCH 2116/2924] crypto: api - Redo lookup on EEXIST [ Upstream commit 0a3cf32da469ff1df6e016f5f82b439a63d14461 ] When two crypto algorithm lookups occur at the same time with different names for the same algorithm, e.g., ctr(aes-generic) and ctr(aes), they will both be instantiated. However, only one of them can be registered. The second instantiation will fail with EEXIST. Avoid failing the second lookup by making it retry, but only once because there are tricky names such as gcm_base(ctr(aes),ghash) that will always fail, despite triggering instantiation and EEXIST. Reported-by: Ingo Franzki Fixes: 2825982d9d66 ("[CRYPTO] api: Added event notification") Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/api.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/crypto/api.c b/crypto/api.c index 3416e98128a059..8592d3dccc64e6 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -220,10 +220,19 @@ static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg, if (crypto_is_test_larval(larval)) crypto_larval_kill(larval); alg = ERR_PTR(-ETIMEDOUT); - } else if (!alg) { + } else if (!alg || PTR_ERR(alg) == -EEXIST) { + int err = alg ? -EEXIST : -EAGAIN; + + /* + * EEXIST is expected because two probes can be scheduled + * at the same time with one using alg_name and the other + * using driver_name. Do a re-lookup but do not retry in + * case we hit a quirk like gcm_base(ctr(aes),...) which + * will never match. + */ alg = &larval->alg; alg = crypto_alg_lookup(alg->cra_name, type, mask) ?: - ERR_PTR(-EAGAIN); + ERR_PTR(err); } else if (IS_ERR(alg)) ; else if (crypto_is_test_larval(larval) && From 8bfc1d3ed69f5f48a42d4fd2b084bcaf40043a66 Mon Sep 17 00:00:00 2001 From: Daniil Tatianin Date: Tue, 1 Apr 2025 21:43:11 +0300 Subject: [PATCH 2117/2924] ACPICA: exserial: don't forget to handle FFixedHW opregions for reading [ Upstream commit 0f8af0356a45547683a216e4921006a3c6a6d922 ] The initial commit that introduced support for FFixedHW operation regions did add a special case in the AcpiExReadSerialBus If, but forgot to actually handle it inside the switch, so add the missing case to prevent reads from failing with AE_AML_INVALID_SPACE_ID. Link: https://github.com/acpica/acpica/pull/998 Fixes: ee64b827a9a ("ACPICA: Add support for FFH Opregion special context data") Signed-off-by: Daniil Tatianin Link: https://patch.msgid.link/20250401184312.599962-1-d-tatianin@yandex-team.ru Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/acpica/exserial.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/acpi/acpica/exserial.c b/drivers/acpi/acpica/exserial.c index 5241f4c01c7655..89a4ac447a2bea 100644 --- a/drivers/acpi/acpica/exserial.c +++ b/drivers/acpi/acpica/exserial.c @@ -201,6 +201,12 @@ acpi_ex_read_serial_bus(union acpi_operand_object *obj_desc, function = ACPI_READ; break; + case ACPI_ADR_SPACE_FIXED_HARDWARE: + + buffer_length = ACPI_FFH_INPUT_BUFFER_SIZE; + function = ACPI_READ; + break; + default: return_ACPI_STATUS(AE_AML_INVALID_SPACE_ID); } From d1c661043a3139164045f6032bd49a0bd2088ac2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Povi=C5=A1er?= Date: Sun, 6 Apr 2025 09:15:07 +1000 Subject: [PATCH 2118/2924] ASoC: tas2764: Reinit cache on part reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 592ab3936b096da5deb64d4c906edbeb989174d6 ] When the part is reset in component_probe, do not forget to reinit the regcache, otherwise the cache can get out of sync with the part's actual state. This fix is similar to commit 0a0342ede303 ("ASoC: tas2770: Reinit regcache on reset") which concerned the tas2770 driver. Fixes: 827ed8a0fa50 ("ASoC: tas2764: Add the driver for the TAS2764") Reviewed-by: Neal Gompa Signed-off-by: Martin Povišer Signed-off-by: James Calligeros Link: https://patch.msgid.link/20250406-apple-codec-changes-v5-3-50a00ec850a3@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/tas2764.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c index 08aa7ee3425689..49b73b74b2d9dd 100644 --- a/sound/soc/codecs/tas2764.c +++ b/sound/soc/codecs/tas2764.c @@ -546,6 +546,8 @@ static uint8_t sn012776_bop_presets[] = { 0x06, 0x3e, 0x37, 0x30, 0xff, 0xe6 }; +static const struct regmap_config tas2764_i2c_regmap; + static int tas2764_codec_probe(struct snd_soc_component *component) { struct tas2764_priv *tas2764 = snd_soc_component_get_drvdata(component); @@ -559,6 +561,7 @@ static int tas2764_codec_probe(struct snd_soc_component *component) } tas2764_reset(tas2764); + regmap_reinit_cache(tas2764->regmap, &tas2764_i2c_regmap); if (tas2764->irq) { ret = snd_soc_component_write(tas2764->component, TAS2764_INT_MASK0, 0xff); From 98dc82ae5b197bf1a5548c5e01996261104fff25 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Sun, 6 Apr 2025 09:15:08 +1000 Subject: [PATCH 2119/2924] ASoC: tas2764: Enable main IRQs [ Upstream commit dd50f0e38563f15819059c923bf142200453e003 ] IRQ handling was added in commit dae191fb957f ("ASoC: tas2764: Add IRQ handling") however that same commit masks all interrupts coming from the chip. Unmask the "main" interrupts so that we can see and deal with a number of errors including clock, voltage, and current. Fixes: dae191fb957f ("ASoC: tas2764: Add IRQ handling") Reviewed-by: Neal Gompa Signed-off-by: Hector Martin Signed-off-by: James Calligeros Link: https://patch.msgid.link/20250406-apple-codec-changes-v5-4-50a00ec850a3@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/tas2764.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c index 49b73b74b2d9dd..fbfe4d032df7b2 100644 --- a/sound/soc/codecs/tas2764.c +++ b/sound/soc/codecs/tas2764.c @@ -564,7 +564,7 @@ static int tas2764_codec_probe(struct snd_soc_component *component) regmap_reinit_cache(tas2764->regmap, &tas2764_i2c_regmap); if (tas2764->irq) { - ret = snd_soc_component_write(tas2764->component, TAS2764_INT_MASK0, 0xff); + ret = snd_soc_component_write(tas2764->component, TAS2764_INT_MASK0, 0x00); if (ret < 0) return ret; From 183e7329d41d7a8e298f48b6b0eb81102a8654de Mon Sep 17 00:00:00 2001 From: Julien Massot Date: Thu, 17 Apr 2025 10:44:33 +0200 Subject: [PATCH 2120/2924] ASoC: mediatek: mt8195: Set ETDM1/2 IN/OUT to COMP_DUMMY() [ Upstream commit 7af317f7faaab09d5a78f24605057d11f5955115 ] ETDM2_IN_BE and ETDM1_OUT_BE are defined as COMP_EMPTY(), in the case the codec dai_name will be null. Avoid a crash if the device tree is not assigning a codec to these links. [ 1.179936] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 [ 1.181065] Mem abort info: [ 1.181420] ESR = 0x0000000096000004 [ 1.181892] EC = 0x25: DABT (current EL), IL = 32 bits [ 1.182576] SET = 0, FnV = 0 [ 1.182964] EA = 0, S1PTW = 0 [ 1.183367] FSC = 0x04: level 0 translation fault [ 1.183983] Data abort info: [ 1.184406] ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 [ 1.185097] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 1.185766] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 1.186439] [0000000000000000] user address but active_mm is swapper [ 1.187239] Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP [ 1.188029] Modules linked in: [ 1.188420] CPU: 7 UID: 0 PID: 70 Comm: kworker/u32:1 Not tainted 6.14.0-rc4-next-20250226+ #85 [ 1.189515] Hardware name: Radxa NIO 12L (DT) [ 1.190065] Workqueue: events_unbound deferred_probe_work_func [ 1.190808] pstate: 40400009 (nZcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 1.191683] pc : __pi_strcmp+0x24/0x140 [ 1.192170] lr : mt8195_mt6359_soc_card_probe+0x224/0x7b0 [ 1.192854] sp : ffff800083473970 [ 1.193271] x29: ffff800083473a10 x28: 0000000000001008 x27: 0000000000000002 [ 1.194168] x26: ffff800082408960 x25: ffff800082417db0 x24: ffff800082417d88 [ 1.195065] x23: 000000000000001e x22: ffff800082dbf480 x21: ffff800082dc07b8 [ 1.195961] x20: 0000000000000000 x19: 0000000000000013 x18: 00000000ffffffff [ 1.196858] x17: 000000040044ffff x16: 005000f2b5503510 x15: 0000000000000006 [ 1.197755] x14: ffff800082407af0 x13: 6e6f69737265766e x12: 692d6b636f6c6374 [ 1.198651] x11: 0000000000000002 x10: ffff80008240b920 x9 : 0000000000000018 [ 1.199547] x8 : 0101010101010101 x7 : 0000000000000000 x6 : 0000000000000000 [ 1.200443] x5 : 0000000000000000 x4 : 8080808080000000 x3 : 303933383978616d [ 1.201339] x2 : 0000000000000000 x1 : ffff80008240b920 x0 : 0000000000000000 [ 1.202236] Call trace: [ 1.202545] __pi_strcmp+0x24/0x140 (P) [ 1.203029] mtk_soundcard_common_probe+0x3bc/0x5b8 [ 1.203644] platform_probe+0x70/0xe8 [ 1.204106] really_probe+0xc8/0x3a0 [ 1.204556] __driver_probe_device+0x84/0x160 [ 1.205104] driver_probe_device+0x44/0x130 [ 1.205630] __device_attach_driver+0xc4/0x170 [ 1.206189] bus_for_each_drv+0x8c/0xf8 [ 1.206672] __device_attach+0xa8/0x1c8 [ 1.207155] device_initial_probe+0x1c/0x30 [ 1.207681] bus_probe_device+0xb0/0xc0 [ 1.208165] deferred_probe_work_func+0xa4/0x100 [ 1.208747] process_one_work+0x158/0x3e0 [ 1.209254] worker_thread+0x2c4/0x3e8 [ 1.209727] kthread+0x134/0x1f0 [ 1.210136] ret_from_fork+0x10/0x20 [ 1.210589] Code: 54000401 b50002c6 d503201f f86a6803 (f8408402) [ 1.211355] ---[ end trace 0000000000000000 ]--- Signed-off-by: Julien Massot Fixes: e70b8dd26711 ("ASoC: mediatek: mt8195: Remove afe-dai component and rework codec link") Reviewed-by: AngeloGioacchino Del Regno Link: https://patch.msgid.link/20250417-mt8395-audio-sof-v1-2-30587426e5dd@collabora.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/mediatek/mt8195/mt8195-mt6359.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/mediatek/mt8195/mt8195-mt6359.c b/sound/soc/mediatek/mt8195/mt8195-mt6359.c index df29a9fa5aee5b..1fa664b56f30fa 100644 --- a/sound/soc/mediatek/mt8195/mt8195-mt6359.c +++ b/sound/soc/mediatek/mt8195/mt8195-mt6359.c @@ -822,12 +822,12 @@ SND_SOC_DAILINK_DEFS(ETDM1_IN_BE, SND_SOC_DAILINK_DEFS(ETDM2_IN_BE, DAILINK_COMP_ARRAY(COMP_CPU("ETDM2_IN")), - DAILINK_COMP_ARRAY(COMP_EMPTY()), + DAILINK_COMP_ARRAY(COMP_DUMMY()), DAILINK_COMP_ARRAY(COMP_EMPTY())); SND_SOC_DAILINK_DEFS(ETDM1_OUT_BE, DAILINK_COMP_ARRAY(COMP_CPU("ETDM1_OUT")), - DAILINK_COMP_ARRAY(COMP_EMPTY()), + DAILINK_COMP_ARRAY(COMP_DUMMY()), DAILINK_COMP_ARRAY(COMP_EMPTY())); SND_SOC_DAILINK_DEFS(ETDM2_OUT_BE, From 31ef6f7c9aee3be78d63789653e92350f2537f93 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 17 Apr 2025 23:07:18 +0800 Subject: [PATCH 2121/2924] EDAC/skx_common: Fix general protection fault [ Upstream commit 20d2d476b3ae18041be423671a8637ed5ffd6958 ] After loading i10nm_edac (which automatically loads skx_edac_common), if unload only i10nm_edac, then reload it and perform error injection testing, a general protection fault may occur: mce: [Hardware Error]: Machine check events logged Oops: general protection fault ... ... Workqueue: events mce_gen_pool_process RIP: 0010:string+0x53/0xe0 ... Call Trace: ? die_addr+0x37/0x90 ? exc_general_protection+0x1e7/0x3f0 ? asm_exc_general_protection+0x26/0x30 ? string+0x53/0xe0 vsnprintf+0x23e/0x4c0 snprintf+0x4d/0x70 skx_adxl_decode+0x16a/0x330 [skx_edac_common] skx_mce_check_error.part.0+0xf8/0x220 [skx_edac_common] skx_mce_check_error+0x17/0x20 [skx_edac_common] ... The issue arose was because the variable 'adxl_component_count' (inside skx_edac_common), which counts the ADXL components, was not reset. During the reloading of i10nm_edac, the count was incremented by the actual number of ADXL components again, resulting in a count that was double the real number of ADXL components. This led to an out-of-bounds reference to the ADXL component array, causing the general protection fault above. Fix this issue by resetting the 'adxl_component_count' in adxl_put(), which is called during the unloading of {skx,i10nm}_edac. Fixes: 123b15863550 ("EDAC, i10nm: make skx_common.o a separate module") Reported-by: Feng Xu Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Feng Xu Link: https://lore.kernel.org/r/20250417150724.1170168-2-qiuxu.zhuo@intel.com Signed-off-by: Sasha Levin --- drivers/edac/skx_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index fa5b442b184499..c9ade45c1a99f3 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -116,6 +116,7 @@ EXPORT_SYMBOL_GPL(skx_adxl_get); void skx_adxl_put(void) { + adxl_component_count = 0; kfree(adxl_values); kfree(adxl_msg); } From 1725a4f49e530bdb62d57a57da2639b0efac7f37 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 17 Apr 2025 23:07:19 +0800 Subject: [PATCH 2122/2924] EDAC/{skx_common,i10nm}: Fix the loss of saved RRL for HBM pseudo channel 0 [ Upstream commit eeed3e03f4261e5e381a72ae099ff00ccafbb437 ] When enabling the retry_rd_err_log (RRL) feature during the loading of the i10nm_edac driver with the module parameter retry_rd_err_log=2 (Linux RRL control mode), the default values of the control bits of RRL are saved so that they can be restored during the unloading of the driver. In the current code, the RRL of pseudo channel 1 of HBM overwrites pseudo channel 0 during the loading of the driver, resulting in the loss of saved RRL for pseudo channel 0. This causes the RRL of pseudo channel 0 of HBM to be wrongly restored with the values from pseudo channel 1 when unloading the driver. Fix this issue by creating two separate groups of RRL control registers per channel to save default RRL settings of two {sub-,pseudo-}channels. Fixes: acd4cf68fefe ("EDAC/i10nm: Retrieve and print retry_rd_err_log registers for HBM") Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Feng Xu Link: https://lore.kernel.org/r/20250417150724.1170168-3-qiuxu.zhuo@intel.com Signed-off-by: Sasha Levin --- drivers/edac/i10nm_base.c | 35 +++++++++++++++++++---------------- drivers/edac/skx_common.h | 11 ++++++++--- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 355a977019e944..355b527d839e78 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -95,7 +95,7 @@ static u32 offsets_demand2_spr[] = {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, static u32 offsets_demand_spr_hbm0[] = {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0}; static u32 offsets_demand_spr_hbm1[] = {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0}; -static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable, +static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable, u32 *rrl_ctl, u32 *offsets_scrub, u32 *offsets_demand, u32 *offsets_demand2) { @@ -108,10 +108,10 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable if (enable) { /* Save default configurations */ - imc->chan[chan].retry_rd_err_log_s = s; - imc->chan[chan].retry_rd_err_log_d = d; + rrl_ctl[0] = s; + rrl_ctl[1] = d; if (offsets_demand2) - imc->chan[chan].retry_rd_err_log_d2 = d2; + rrl_ctl[2] = d2; s &= ~RETRY_RD_ERR_LOG_NOOVER_UC; s |= RETRY_RD_ERR_LOG_EN; @@ -125,25 +125,25 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable } } else { /* Restore default configurations */ - if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_UC) + if (rrl_ctl[0] & RETRY_RD_ERR_LOG_UC) s |= RETRY_RD_ERR_LOG_UC; - if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_NOOVER) + if (rrl_ctl[0] & RETRY_RD_ERR_LOG_NOOVER) s |= RETRY_RD_ERR_LOG_NOOVER; - if (!(imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_EN)) + if (!(rrl_ctl[0] & RETRY_RD_ERR_LOG_EN)) s &= ~RETRY_RD_ERR_LOG_EN; - if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_UC) + if (rrl_ctl[1] & RETRY_RD_ERR_LOG_UC) d |= RETRY_RD_ERR_LOG_UC; - if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_NOOVER) + if (rrl_ctl[1] & RETRY_RD_ERR_LOG_NOOVER) d |= RETRY_RD_ERR_LOG_NOOVER; - if (!(imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_EN)) + if (!(rrl_ctl[1] & RETRY_RD_ERR_LOG_EN)) d &= ~RETRY_RD_ERR_LOG_EN; if (offsets_demand2) { - if (imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_UC) + if (rrl_ctl[2] & RETRY_RD_ERR_LOG_UC) d2 |= RETRY_RD_ERR_LOG_UC; - if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_NOOVER)) + if (!(rrl_ctl[2] & RETRY_RD_ERR_LOG_NOOVER)) d2 &= ~RETRY_RD_ERR_LOG_NOOVER; - if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_EN)) + if (!(rrl_ctl[2] & RETRY_RD_ERR_LOG_EN)) d2 &= ~RETRY_RD_ERR_LOG_EN; } } @@ -157,6 +157,7 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable static void enable_retry_rd_err_log(bool enable) { int i, j, imc_num, chan_num; + struct skx_channel *chan; struct skx_imc *imc; struct skx_dev *d; @@ -171,8 +172,9 @@ static void enable_retry_rd_err_log(bool enable) if (!imc->mbase) continue; + chan = d->imc[i].chan; for (j = 0; j < chan_num; j++) - __enable_retry_rd_err_log(imc, j, enable, + __enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[0], res_cfg->offsets_scrub, res_cfg->offsets_demand, res_cfg->offsets_demand2); @@ -186,12 +188,13 @@ static void enable_retry_rd_err_log(bool enable) if (!imc->mbase || !imc->hbm_mc) continue; + chan = d->imc[i].chan; for (j = 0; j < chan_num; j++) { - __enable_retry_rd_err_log(imc, j, enable, + __enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[0], res_cfg->offsets_scrub_hbm0, res_cfg->offsets_demand_hbm0, NULL); - __enable_retry_rd_err_log(imc, j, enable, + __enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[1], res_cfg->offsets_scrub_hbm1, res_cfg->offsets_demand_hbm1, NULL); diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index ca5408803f8787..5afd425f3b4ff1 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -79,6 +79,9 @@ */ #define MCACOD_EXT_MEM_ERR 0x280 +/* Max RRL register sets per {,sub-,pseudo-}channel. */ +#define NUM_RRL_SET 3 + /* * Each cpu socket contains some pci devices that provide global * information, and also some that are local to each of the two @@ -117,9 +120,11 @@ struct skx_dev { struct skx_channel { struct pci_dev *cdev; struct pci_dev *edev; - u32 retry_rd_err_log_s; - u32 retry_rd_err_log_d; - u32 retry_rd_err_log_d2; + /* + * Two groups of RRL control registers per channel to save default RRL + * settings of two {sub-,pseudo-}channels in Linux RRL control mode. + */ + u32 rrl_ctl[2][NUM_RRL_SET]; struct skx_dimm { u8 close_pg; u8 bank_xor_enable; From d7f6f73454b4d039079aecae8c336fa01872bf4b Mon Sep 17 00:00:00 2001 From: Vishwaroop A Date: Wed, 16 Apr 2025 11:06:01 +0000 Subject: [PATCH 2123/2924] spi: tegra210-quad: Fix X1_X2_X4 encoding and support x4 transfers [ Upstream commit dcb06c638a1174008a985849fa30fc0da7d08904 ] This patch corrects the QSPI_COMMAND_X1_X2_X4 and QSPI_ADDRESS_X1_X2_X4 macros to properly encode the bus width for x1, x2, and x4 transfers. Although these macros were previously incorrect, they were not being used in the driver, so no functionality was affected. The patch updates tegra_qspi_cmd_config() and tegra_qspi_addr_config() function calls to use the actual bus width from the transfer, instead of hardcoding it to 0 (which implied x1 mode). This change enables proper support for x1, x2, and x4 data transfers by correctly configuring the interface width for commands and addresses. These modifications improve the QSPI driver's flexibility and prepare it for future use cases that may require different bus widths for commands and addresses. Fixes: 1b8342cc4a38 ("spi: tegra210-quad: combined sequence mode") Signed-off-by: Vishwaroop A Link: https://patch.msgid.link/20250416110606.2737315-2-va@nvidia.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-tegra210-quad.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c index 64e1b2f8a0001c..3d9c5fb237825d 100644 --- a/drivers/spi/spi-tegra210-quad.c +++ b/drivers/spi/spi-tegra210-quad.c @@ -134,7 +134,7 @@ #define QSPI_COMMAND_VALUE_SET(X) (((x) & 0xFF) << 0) #define QSPI_CMB_SEQ_CMD_CFG 0x1a0 -#define QSPI_COMMAND_X1_X2_X4(x) (((x) & 0x3) << 13) +#define QSPI_COMMAND_X1_X2_X4(x) ((((x) >> 1) & 0x3) << 13) #define QSPI_COMMAND_X1_X2_X4_MASK (0x03 << 13) #define QSPI_COMMAND_SDR_DDR BIT(12) #define QSPI_COMMAND_SIZE_SET(x) (((x) & 0xFF) << 0) @@ -147,7 +147,7 @@ #define QSPI_ADDRESS_VALUE_SET(X) (((x) & 0xFFFF) << 0) #define QSPI_CMB_SEQ_ADDR_CFG 0x1ac -#define QSPI_ADDRESS_X1_X2_X4(x) (((x) & 0x3) << 13) +#define QSPI_ADDRESS_X1_X2_X4(x) ((((x) >> 1) & 0x3) << 13) #define QSPI_ADDRESS_X1_X2_X4_MASK (0x03 << 13) #define QSPI_ADDRESS_SDR_DDR BIT(12) #define QSPI_ADDRESS_SIZE_SET(x) (((x) & 0xFF) << 0) @@ -1036,10 +1036,6 @@ static u32 tegra_qspi_addr_config(bool is_ddr, u8 bus_width, u8 len) { u32 addr_config = 0; - /* Extract Address configuration and value */ - is_ddr = 0; //Only SDR mode supported - bus_width = 0; //X1 mode - if (is_ddr) addr_config |= QSPI_ADDRESS_SDR_DDR; else @@ -1079,13 +1075,13 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi, switch (transfer_phase) { case CMD_TRANSFER: /* X1 SDR mode */ - cmd_config = tegra_qspi_cmd_config(false, 0, + cmd_config = tegra_qspi_cmd_config(false, xfer->tx_nbits, xfer->len); cmd_value = *((const u8 *)(xfer->tx_buf)); break; case ADDR_TRANSFER: /* X1 SDR mode */ - addr_config = tegra_qspi_addr_config(false, 0, + addr_config = tegra_qspi_addr_config(false, xfer->tx_nbits, xfer->len); address_value = *((const u32 *)(xfer->tx_buf)); break; From cb796521476ed7c21f0446cbff265d801de498e7 Mon Sep 17 00:00:00 2001 From: Vishwaroop A Date: Wed, 16 Apr 2025 11:06:02 +0000 Subject: [PATCH 2124/2924] spi: tegra210-quad: remove redundant error handling code [ Upstream commit 400d9f1a27cc2fceabdb1ed93eaf0b89b6d32ba5 ] Remove unnecessary error handling code that terminated transfers and executed delay on errors. This code was redundant as error handling is already done at a higher level in the SPI core. Fixes: 1b8342cc4a38 ("spi: tegra210-quad: combined sequence mode") Signed-off-by: Vishwaroop A Link: https://patch.msgid.link/20250416110606.2737315-3-va@nvidia.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-tegra210-quad.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c index 3d9c5fb237825d..b2872853d6ed86 100644 --- a/drivers/spi/spi-tegra210-quad.c +++ b/drivers/spi/spi-tegra210-quad.c @@ -1175,10 +1175,6 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi, exit: msg->status = ret; - if (ret < 0) { - tegra_qspi_transfer_end(spi); - spi_transfer_delay_exec(xfer); - } return ret; } From ccd0ecc13090d6faf574c136d6fb8e42046f3e68 Mon Sep 17 00:00:00 2001 From: Vishwaroop A Date: Wed, 16 Apr 2025 11:06:03 +0000 Subject: [PATCH 2125/2924] spi: tegra210-quad: modify chip select (CS) deactivation [ Upstream commit d8966b65413390d1b5b706886987caac05fbe024 ] Modify the chip select (CS) deactivation and inter-transfer delay execution only during the DATA_TRANSFER phase when the cs_change flag is not set. This ensures proper CS handling and timing between transfers while eliminating redundant operations. Fixes: 1b8342cc4a38 ("spi: tegra210-quad: combined sequence mode") Signed-off-by: Vishwaroop A Link: https://patch.msgid.link/20250416110606.2737315-4-va@nvidia.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-tegra210-quad.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c index b2872853d6ed86..665c06e1473beb 100644 --- a/drivers/spi/spi-tegra210-quad.c +++ b/drivers/spi/spi-tegra210-quad.c @@ -1159,16 +1159,16 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi, ret = -EIO; goto exit; } - if (!xfer->cs_change) { - tegra_qspi_transfer_end(spi); - spi_transfer_delay_exec(xfer); - } break; default: ret = -EINVAL; goto exit; } msg->actual_length += xfer->len; + if (!xfer->cs_change && transfer_phase == DATA_TRANSFER) { + tegra_qspi_transfer_end(spi); + spi_transfer_delay_exec(xfer); + } transfer_phase++; } ret = 0; From 36fd041c11fa57cf8cf5890525617bef848cfdb4 Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Fri, 7 Mar 2025 08:38:09 +0300 Subject: [PATCH 2126/2924] power: reset: at91-reset: Optimize at91_reset() [ Upstream commit 62d48983f215bf1dd48665913318101fa3414dcf ] This patch adds a small optimization to the low-level at91_reset() function, which includes: - Removes the extra branch, since the following store operations already have proper condition checks. - Removes the definition of the clobber register r4, since it is no longer used in the code. Fixes: fcd0532fac2a ("power: reset: at91-reset: make at91sam9g45_restart() generic") Signed-off-by: Alexander Shiyan Reviewed-by: Alexandre Belloni Link: https://lore.kernel.org/r/20250307053809.20245-1-eagle.alexander923@gmail.com Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin --- drivers/power/reset/at91-reset.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/power/reset/at91-reset.c b/drivers/power/reset/at91-reset.c index 036b18a1f90f80..511f5a8f8961ce 100644 --- a/drivers/power/reset/at91-reset.c +++ b/drivers/power/reset/at91-reset.c @@ -129,12 +129,11 @@ static int at91_reset(struct notifier_block *this, unsigned long mode, " str %4, [%0, %6]\n\t" /* Disable SDRAM1 accesses */ "1: tst %1, #0\n\t" - " beq 2f\n\t" " strne %3, [%1, #" __stringify(AT91_DDRSDRC_RTR) "]\n\t" /* Power down SDRAM1 */ " strne %4, [%1, %6]\n\t" /* Reset CPU */ - "2: str %5, [%2, #" __stringify(AT91_RSTC_CR) "]\n\t" + " str %5, [%2, #" __stringify(AT91_RSTC_CR) "]\n\t" " b .\n\t" : @@ -145,7 +144,7 @@ static int at91_reset(struct notifier_block *this, unsigned long mode, "r" cpu_to_le32(AT91_DDRSDRC_LPCB_POWER_DOWN), "r" (reset->data->reset_args), "r" (reset->ramc_lpr) - : "r4"); + ); return NOTIFY_DONE; } From 14cbdd64f3870cf0a2d94b87919b9056448c59a0 Mon Sep 17 00:00:00 2001 From: Yaxiong Tian Date: Fri, 18 Apr 2025 09:06:13 +0800 Subject: [PATCH 2127/2924] PM: EM: Fix potential division-by-zero error in em_compute_costs() [ Upstream commit 179c0c7044a378198adb36f2a12410ab68cc730a ] When the device is of a non-CPU type, table[i].performance won't be initialized in the previous em_init_performance(), resulting in division by zero when calculating costs in em_compute_costs(). Since the 'cost' algorithm is only used for EAS energy efficiency calculations and is currently not utilized by other device drivers, we should add the _is_cpu_device(dev) check to prevent this division-by-zero issue. Fixes: 1b600da51073 ("PM: EM: Optimize em_cpu_energy() and remove division") Signed-off-by: Yaxiong Tian Reviewed-by: Lukasz Luba Link: https://patch.msgid.link/tencent_7F99ED4767C1AF7889D0D8AD50F34859CE06@qq.com Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- kernel/power/energy_model.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index d9b7e2b38c7a9f..41606247c27763 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -233,6 +233,10 @@ static int em_compute_costs(struct device *dev, struct em_perf_state *table, unsigned long prev_cost = ULONG_MAX; int i, ret; + /* This is needed only for CPUs and EAS skip other devices */ + if (!_is_cpu_device(dev)) + return 0; + /* Compute the cost of each performance state. */ for (i = nr_states - 1; i >= 0; i--) { unsigned long power_res, cost; From 7f16be2b2927fdcfe40b596b7411c46d23a82034 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 21 Mar 2025 17:34:06 +0300 Subject: [PATCH 2128/2924] power: supply: max77705: Fix workqueue error handling in probe [ Upstream commit 11741b8e382d34b13277497ab91123d8b0b5c2db ] The create_singlethread_workqueue() doesn't return error pointers, it returns NULL. Also cleanup the workqueue on the error paths. Fixes: a6a494c8e3ce ("power: supply: max77705: Add charger driver for Maxim 77705") Signed-off-by: Dan Carpenter Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/547656e3-4a5f-4f2e-802b-4edcb7c576b0@stanley.mountain Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin --- drivers/power/supply/max77705_charger.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/power/supply/max77705_charger.c b/drivers/power/supply/max77705_charger.c index eec5e9ef795efd..329b430d0e5065 100644 --- a/drivers/power/supply/max77705_charger.c +++ b/drivers/power/supply/max77705_charger.c @@ -545,20 +545,28 @@ static int max77705_charger_probe(struct i2c_client *i2c) return dev_err_probe(dev, ret, "failed to add irq chip\n"); chg->wqueue = create_singlethread_workqueue(dev_name(dev)); - if (IS_ERR(chg->wqueue)) - return dev_err_probe(dev, PTR_ERR(chg->wqueue), "failed to create workqueue\n"); + if (!chg->wqueue) + return dev_err_probe(dev, -ENOMEM, "failed to create workqueue\n"); ret = devm_work_autocancel(dev, &chg->chgin_work, max77705_chgin_isr_work); - if (ret) - return dev_err_probe(dev, ret, "failed to initialize interrupt work\n"); + if (ret) { + dev_err_probe(dev, ret, "failed to initialize interrupt work\n"); + goto destroy_wq; + } max77705_charger_initialize(chg); ret = max77705_charger_enable(chg); - if (ret) - return dev_err_probe(dev, ret, "failed to enable charge\n"); + if (ret) { + dev_err_probe(dev, ret, "failed to enable charge\n"); + goto destroy_wq; + } return devm_add_action_or_reset(dev, max77705_charger_disable, chg); + +destroy_wq: + destroy_workqueue(chg->wqueue); + return ret; } static const struct of_device_id max77705_charger_of_match[] = { From c0805147153aea78a207d1490dbba9701f39eee0 Mon Sep 17 00:00:00 2001 From: Benson Leung Date: Mon, 28 Apr 2025 17:48:28 +0000 Subject: [PATCH 2129/2924] platform/chrome: cros_ec_typec: Set Pin Assignment E in DP PORT VDO [ Upstream commit a9635ef0ca12e7914f42bfa7ca6a019f606c2817 ] Pin C and D are used on C-to-C cable applications including docks, and for USB-C adapters that convert from DP over USB-C to other video standards. Pin Assignment E is intended to be used with adapter from USB-C to DP plugs or receptacles. All Chromebook USB-C DFPs support DisplayPort Alternate Mode as the DP Source with support for all 3 pin assignments. Pin Assignment E is required in order to support if the user attaches a Pin E C-to-DP cable. Without this, the displayport.c alt mode driver will error out of dp_altmode_probe with an -ENODEV, as it cannot find a compatible matching pin assignment between the DFP_D and UFP_D. Fixes: dbb3fc0ffa95 ("platform/chrome: cros_ec_typec: Displayport support") Signed-off-by: Benson Leung Reviewed-by: Jameson Thies Reviewed-by: Abhishek Pandit-Subedi Link: https://lore.kernel.org/r/20250428174828.13939-1-bleung@chromium.org Signed-off-by: Tzung-Bi Shih Signed-off-by: Sasha Levin --- drivers/platform/chrome/cros_ec_typec.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/platform/chrome/cros_ec_typec.c b/drivers/platform/chrome/cros_ec_typec.c index d2228720991ffe..7678e3d05fd36f 100644 --- a/drivers/platform/chrome/cros_ec_typec.c +++ b/drivers/platform/chrome/cros_ec_typec.c @@ -22,8 +22,10 @@ #define DRV_NAME "cros-ec-typec" -#define DP_PORT_VDO (DP_CONF_SET_PIN_ASSIGN(BIT(DP_PIN_ASSIGN_C) | BIT(DP_PIN_ASSIGN_D)) | \ - DP_CAP_DFP_D | DP_CAP_RECEPTACLE) +#define DP_PORT_VDO (DP_CAP_DFP_D | DP_CAP_RECEPTACLE | \ + DP_CONF_SET_PIN_ASSIGN(BIT(DP_PIN_ASSIGN_C) | \ + BIT(DP_PIN_ASSIGN_D) | \ + BIT(DP_PIN_ASSIGN_E))) static void cros_typec_role_switch_quirk(struct fwnode_handle *fwnode) { From 31dfe21b2c46c7140d9171ee3114e0d91b9dc2ca Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 25 Apr 2025 23:25:12 -0700 Subject: [PATCH 2130/2924] ASoC: SOF: ipc4-pcm: Adjust pipeline_list->pipelines allocation type [ Upstream commit 00a371adbbfb46db561db85a9d7b53b2363880a1 ] In preparation for making the kmalloc family of allocators type aware, we need to make sure that the returned type from the allocation matches the type of the variable being assigned. (Before, the allocator would always return "void *", which can be implicitly cast to any pointer type.) The assigned type is "struct snd_sof_pipeline **", but the returned type will be "struct snd_sof_widget **". These are the same size allocation (pointer size) but the types don't match. Adjust the allocation type to match the assignment. Signed-off-by: Kees Cook Fixes: 9c04363d222b ("ASoC: SOF: Introduce struct snd_sof_pipeline") Acked-by: Peter Ujfalusi Link: https://patch.msgid.link/20250426062511.work.859-kees@kernel.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sof/ipc4-pcm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/sof/ipc4-pcm.c b/sound/soc/sof/ipc4-pcm.c index c09b424ab863d7..8eee3e1aadf932 100644 --- a/sound/soc/sof/ipc4-pcm.c +++ b/sound/soc/sof/ipc4-pcm.c @@ -784,7 +784,8 @@ static int sof_ipc4_pcm_setup(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm /* allocate memory for max number of pipeline IDs */ pipeline_list->pipelines = kcalloc(ipc4_data->max_num_pipelines, - sizeof(struct snd_sof_widget *), GFP_KERNEL); + sizeof(*pipeline_list->pipelines), + GFP_KERNEL); if (!pipeline_list->pipelines) { sof_ipc4_pcm_free(sdev, spcm); return -ENOMEM; From c386c80239449213f75b109663a93413205b4f67 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sat, 26 Apr 2025 16:13:41 +0200 Subject: [PATCH 2131/2924] ASoC: Intel: avs: Fix kcalloc() sizes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d20df86b056b95845f6ed52da1010059202a0c23 ] rlist, clist, and slist are allocated using sizeof(pointer) instead of sizeof(*pointer). Fix the allocations by using sizeof(*pointer) and avoid overallocating memory on 64-bit systems. Fixes: f2f847461fb7 ("ASoC: Intel: avs: Constrain path based on BE capabilities") Signed-off-by: Thorsten Blum Reviewed-by: Amadeusz Sławiński Link: https://patch.msgid.link/20250426141342.94134-2-thorsten.blum@linux.dev Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/avs/path.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/intel/avs/path.c b/sound/soc/intel/avs/path.c index cafb8c6198bedb..8f1bf8d0af8f99 100644 --- a/sound/soc/intel/avs/path.c +++ b/sound/soc/intel/avs/path.c @@ -131,9 +131,9 @@ int avs_path_set_constraint(struct avs_dev *adev, struct avs_tplg_path_template list_for_each_entry(path_template, &template->path_list, node) i++; - rlist = kcalloc(i, sizeof(rlist), GFP_KERNEL); - clist = kcalloc(i, sizeof(clist), GFP_KERNEL); - slist = kcalloc(i, sizeof(slist), GFP_KERNEL); + rlist = kcalloc(i, sizeof(*rlist), GFP_KERNEL); + clist = kcalloc(i, sizeof(*clist), GFP_KERNEL); + slist = kcalloc(i, sizeof(*slist), GFP_KERNEL); i = 0; list_for_each_entry(path_template, &template->path_list, node) { From 63626222b331b2a2f9540d9f286ed649f57bb316 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Fri, 2 May 2025 21:12:41 +0530 Subject: [PATCH 2132/2924] ASoC: SOF: amd: add missing acp descriptor field [ Upstream commit 7c2bad7b95db5b4b978853cd4dd042ae3ec83e63 ] Add missing acp descriptor field acp_error_stat for ACP7.0 platform. Fixes: 490be7ba2a01 ("ASoC: SOF: amd: add support for acp7.0 based platform") Signed-off-by: Vijendar Mukunda Reviewed-by: Ranjani Sridharan Reviewed-by: Bard Liao Link: https://patch.msgid.link/20250502154445.3008598-3-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sof/amd/pci-acp70.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/sof/amd/pci-acp70.c b/sound/soc/sof/amd/pci-acp70.c index 8fa1170a2161e9..9108f1139ff2dc 100644 --- a/sound/soc/sof/amd/pci-acp70.c +++ b/sound/soc/sof/amd/pci-acp70.c @@ -33,6 +33,7 @@ static const struct sof_amd_acp_desc acp70_chip_info = { .ext_intr_cntl = ACP70_EXTERNAL_INTR_CNTL, .ext_intr_stat = ACP70_EXT_INTR_STAT, .ext_intr_stat1 = ACP70_EXT_INTR_STAT1, + .acp_error_stat = ACP70_ERROR_STATUS, .dsp_intr_base = ACP70_DSP_SW_INTR_BASE, .acp_sw0_i2s_err_reason = ACP7X_SW0_I2S_ERROR_REASON, .sram_pte_offset = ACP70_SRAM_PTE_OFFSET, From 71fd4767e9a2540c4c4507ee8b5f3f5431118b2e Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Mon, 5 May 2025 17:26:51 +0800 Subject: [PATCH 2133/2924] PM: wakeup: Delete space in the end of string shown by pm_show_wakelocks() [ Upstream commit f0050a3e214aa941b78ad4caf122a735a24d81a6 ] pm_show_wakelocks() is called to generate a string when showing attributes /sys/power/wake_(lock|unlock), but the string ends with an unwanted space that was added back by mistake by commit c9d967b2ce40 ("PM: wakeup: simplify the output logic of pm_show_wakelocks()"). Remove the unwanted space. Fixes: c9d967b2ce40 ("PM: wakeup: simplify the output logic of pm_show_wakelocks()") Signed-off-by: Zijun Hu Link: https://patch.msgid.link/20250505-fix_power-v1-1-0f7f2c2f338c@quicinc.com [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- kernel/power/wakelock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c index 52571dcad768b9..4e941999a53ba6 100644 --- a/kernel/power/wakelock.c +++ b/kernel/power/wakelock.c @@ -49,6 +49,9 @@ ssize_t pm_show_wakelocks(char *buf, bool show_active) len += sysfs_emit_at(buf, len, "%s ", wl->name); } + if (len > 0) + --len; + len += sysfs_emit_at(buf, len, "\n"); mutex_unlock(&wakelocks_lock); From 7d74f6cf7449f8fe880c9f5734a61a250f0ecd16 Mon Sep 17 00:00:00 2001 From: Mingcong Bai Date: Thu, 17 Apr 2025 15:39:46 +0800 Subject: [PATCH 2134/2924] ACPI: resource: fix a typo for MECHREVO in irq1_edge_low_force_override[] [ Upstream commit 113e04276018bd13978051d8b05a613b4d390cc9 ] The vendor name for MECHREVO was incorrectly spelled in commit b53f09ecd602 ("ACPI: resource: Do IRQ override on MECHREV GM7XG0M"). Correct this typo in this trivial patch. Fixes: b53f09ecd602 ("ACPI: resource: Do IRQ override on MECHREV GM7XG0M") Signed-off-by: Mingcong Bai Link: https://patch.msgid.link/20250417073947.47419-1-jeffbai@aosc.io Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 14c7bac4100b46..7d59c6c9185fc1 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -534,7 +534,7 @@ static const struct dmi_system_id irq1_level_low_skip_override[] = { */ static const struct dmi_system_id irq1_edge_low_force_override[] = { { - /* MECHREV Jiaolong17KS Series GM7XG0M */ + /* MECHREVO Jiaolong17KS Series GM7XG0M */ .matches = { DMI_MATCH(DMI_BOARD_NAME, "GM7XG0M"), }, From c3102581a71d74b4740a1c0b4cfc8d7faade93f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bence=20Cs=C3=B3k=C3=A1s?= Date: Thu, 27 Mar 2025 20:59:26 +0100 Subject: [PATCH 2135/2924] PM: runtime: Add new devm functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 73db799bf5efc5a04654bb3ff6c9bf63a0dfa473 ] Add `devm_pm_runtime_set_active_enabled()` and `devm_pm_runtime_get_noresume()` for simplifying common cases in drivers. Signed-off-by: Bence Csókás Link: https://patch.msgid.link/20250327195928.680771-3-csokas.bence@prolan.hu Signed-off-by: Rafael J. Wysocki Stable-dep-of: 8856eafcc05e ("spi: atmel-quadspi: Fix unbalanced pm_runtime by using devm_ API") Signed-off-by: Sasha Levin --- drivers/base/power/runtime.c | 44 ++++++++++++++++++++++++++++++++++++ include/linux/pm_runtime.h | 4 ++++ 2 files changed, 48 insertions(+) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 0e127b0329c00c..205a4f8828b0ac 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1568,6 +1568,32 @@ void pm_runtime_enable(struct device *dev) } EXPORT_SYMBOL_GPL(pm_runtime_enable); +static void pm_runtime_set_suspended_action(void *data) +{ + pm_runtime_set_suspended(data); +} + +/** + * devm_pm_runtime_set_active_enabled - set_active version of devm_pm_runtime_enable. + * + * @dev: Device to handle. + */ +int devm_pm_runtime_set_active_enabled(struct device *dev) +{ + int err; + + err = pm_runtime_set_active(dev); + if (err) + return err; + + err = devm_add_action_or_reset(dev, pm_runtime_set_suspended_action, dev); + if (err) + return err; + + return devm_pm_runtime_enable(dev); +} +EXPORT_SYMBOL_GPL(devm_pm_runtime_set_active_enabled); + static void pm_runtime_disable_action(void *data) { pm_runtime_dont_use_autosuspend(data); @@ -1590,6 +1616,24 @@ int devm_pm_runtime_enable(struct device *dev) } EXPORT_SYMBOL_GPL(devm_pm_runtime_enable); +static void pm_runtime_put_noidle_action(void *data) +{ + pm_runtime_put_noidle(data); +} + +/** + * devm_pm_runtime_get_noresume - devres-enabled version of pm_runtime_get_noresume. + * + * @dev: Device to handle. + */ +int devm_pm_runtime_get_noresume(struct device *dev) +{ + pm_runtime_get_noresume(dev); + + return devm_add_action_or_reset(dev, pm_runtime_put_noidle_action, dev); +} +EXPORT_SYMBOL_GPL(devm_pm_runtime_get_noresume); + /** * pm_runtime_forbid - Block runtime PM of a device. * @dev: Device to handle. diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 7fb5a459847ef3..756b842dcd3091 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -96,7 +96,9 @@ extern void pm_runtime_new_link(struct device *dev); extern void pm_runtime_drop_link(struct device_link *link); extern void pm_runtime_release_supplier(struct device_link *link); +int devm_pm_runtime_set_active_enabled(struct device *dev); extern int devm_pm_runtime_enable(struct device *dev); +int devm_pm_runtime_get_noresume(struct device *dev); /** * pm_suspend_ignore_children - Set runtime PM behavior regarding children. @@ -294,7 +296,9 @@ static inline bool pm_runtime_blocked(struct device *dev) { return true; } static inline void pm_runtime_allow(struct device *dev) {} static inline void pm_runtime_forbid(struct device *dev) {} +static inline int devm_pm_runtime_set_active_enabled(struct device *dev) { return 0; } static inline int devm_pm_runtime_enable(struct device *dev) { return 0; } +static inline int devm_pm_runtime_get_noresume(struct device *dev) { return 0; } static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {} static inline void pm_runtime_get_noresume(struct device *dev) {} From c0e1adf4053ba2de499db00269c8dadf2382ff2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bence=20Cs=C3=B3k=C3=A1s?= Date: Thu, 27 Mar 2025 20:59:27 +0100 Subject: [PATCH 2136/2924] spi: atmel-quadspi: Fix unbalanced pm_runtime by using devm_ API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8856eafcc05ecf54d6dd2b6c67804fefd276472c ] Fix unbalanced PM in error path of `atmel_qspi_probe()` by using `devm_pm_runtime_*()` functions. Reported-by: Alexander Dahl Closes: https://lore.kernel.org/linux-spi/20250110-paycheck-irregular-bcddab1276c7@thorsis.com/ Fixes: 5af42209a4d2 ("spi: atmel-quadspi: Add support for sama7g5 QSPI") Signed-off-by: Bence Csókás Link: https://patch.msgid.link/20250327195928.680771-4-csokas.bence@prolan.hu Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/atmel-quadspi.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/spi/atmel-quadspi.c b/drivers/spi/atmel-quadspi.c index 244ac010686298..e7b61dc4ce6766 100644 --- a/drivers/spi/atmel-quadspi.c +++ b/drivers/spi/atmel-quadspi.c @@ -1436,22 +1436,17 @@ static int atmel_qspi_probe(struct platform_device *pdev) pm_runtime_set_autosuspend_delay(&pdev->dev, 500); pm_runtime_use_autosuspend(&pdev->dev); - pm_runtime_set_active(&pdev->dev); - pm_runtime_enable(&pdev->dev); - pm_runtime_get_noresume(&pdev->dev); + devm_pm_runtime_set_active_enabled(&pdev->dev); + devm_pm_runtime_get_noresume(&pdev->dev); err = atmel_qspi_init(aq); if (err) goto dma_release; err = spi_register_controller(ctrl); - if (err) { - pm_runtime_put_noidle(&pdev->dev); - pm_runtime_disable(&pdev->dev); - pm_runtime_set_suspended(&pdev->dev); - pm_runtime_dont_use_autosuspend(&pdev->dev); + if (err) goto dma_release; - } + pm_runtime_mark_last_busy(&pdev->dev); pm_runtime_put_autosuspend(&pdev->dev); @@ -1530,10 +1525,6 @@ static void atmel_qspi_remove(struct platform_device *pdev) */ dev_warn(&pdev->dev, "Failed to resume device on remove\n"); } - - pm_runtime_disable(&pdev->dev); - pm_runtime_dont_use_autosuspend(&pdev->dev); - pm_runtime_put_noidle(&pdev->dev); } static int __maybe_unused atmel_qspi_suspend(struct device *dev) From a239e3d296727817d5225a0af48065fc8050f543 Mon Sep 17 00:00:00 2001 From: Jiaqing Zhao Date: Fri, 9 May 2025 17:06:33 +0000 Subject: [PATCH 2137/2924] x86/mtrr: Check if fixed-range MTRRs exist in mtrr_save_fixed_ranges() [ Upstream commit 824c6384e8d9275d4ec7204f3f79a4ac6bc10379 ] When suspending, save_processor_state() calls mtrr_save_fixed_ranges() to save fixed-range MTRRs. On platforms without fixed-range MTRRs like the ACRN hypervisor which has removed fixed-range MTRR emulation, accessing these MSRs will trigger an unchecked MSR access error. Make sure fixed-range MTRRs are supported before access to prevent such error. Since mtrr_state.have_fixed is only set when MTRRs are present and enabled, checking the CPU feature flag in mtrr_save_fixed_ranges() is unnecessary. Fixes: 3ebad5905609 ("[PATCH] x86: Save and restore the fixed-range MTRRs of the BSP when suspending") Signed-off-by: Jiaqing Zhao Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/20250509170633.3411169-2-jiaqing.zhao@linux.intel.com Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/mtrr/generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index e2c6b471d2302a..8c18327eb10bbb 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -593,7 +593,7 @@ static void get_fixed_ranges(mtrr_type *frs) void mtrr_save_fixed_ranges(void *info) { - if (boot_cpu_has(X86_FEATURE_MTRR)) + if (mtrr_state.have_fixed) get_fixed_ranges(mtrr_state.fixed_ranges); } From d1ddcc5551ac4808e1199bf1ac819cd089e5fc86 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 9 May 2025 14:51:47 +0200 Subject: [PATCH 2138/2924] PM: sleep: Print PM debug messages during hibernation [ Upstream commit 1b17d4525bca3916644c41e01522df8fa0f8b90b ] Commit cdb8c100d8a4 ("include/linux/suspend.h: Only show pm_pr_dbg messages at suspend/resume") caused PM debug messages to only be printed during system-wide suspend and resume in progress, but it forgot about hibernation. Address this by adding a check for hibernation in progress to pm_debug_messages_should_print(). Fixes: cdb8c100d8a4 ("include/linux/suspend.h: Only show pm_pr_dbg messages at suspend/resume") Signed-off-by: Rafael J. Wysocki Reviewed-by: Mario Limonciello Link: https://patch.msgid.link/4998903.GXAFRqVoOG@rjwysocki.net Signed-off-by: Sasha Levin --- kernel/power/hibernate.c | 5 +++++ kernel/power/main.c | 3 ++- kernel/power/power.h | 4 ++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 23c0f4e6cb2ffe..5af9c7ee98cd4a 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -90,6 +90,11 @@ void hibernate_release(void) atomic_inc(&hibernate_atomic); } +bool hibernation_in_progress(void) +{ + return !atomic_read(&hibernate_atomic); +} + bool hibernation_available(void) { return nohibernate == 0 && diff --git a/kernel/power/main.c b/kernel/power/main.c index 6254814d481714..0622e7dacf1720 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -613,7 +613,8 @@ bool pm_debug_messages_on __read_mostly; bool pm_debug_messages_should_print(void) { - return pm_debug_messages_on && pm_suspend_target_state != PM_SUSPEND_ON; + return pm_debug_messages_on && (hibernation_in_progress() || + pm_suspend_target_state != PM_SUSPEND_ON); } EXPORT_SYMBOL_GPL(pm_debug_messages_should_print); diff --git a/kernel/power/power.h b/kernel/power/power.h index c352dea2f67b56..f8496f40b54fa5 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -71,10 +71,14 @@ extern void enable_restore_image_protection(void); static inline void enable_restore_image_protection(void) {} #endif /* CONFIG_STRICT_KERNEL_RWX */ +extern bool hibernation_in_progress(void); + #else /* !CONFIG_HIBERNATION */ static inline void hibernate_reserved_size_init(void) {} static inline void hibernate_image_size_init(void) {} + +static inline bool hibernation_in_progress(void) { return false; } #endif /* !CONFIG_HIBERNATION */ #define power_attr(_name) \ From 652277e82d6ebf6eb0c1d229b1c59abb0b546ed6 Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Thu, 20 Mar 2025 19:11:59 +0100 Subject: [PATCH 2139/2924] spi: spi-qpic-snand: use kmalloc() for OOB buffer allocation [ Upstream commit f48d80503504257682e493dc17408f2f0b47bcfa ] The qcom_spi_ecc_init_ctx_pipelined() function allocates zeroed memory for the OOB buffer, then it fills the buffer with '0xff' bytes right after the allocation. In this case zeroing the memory during allocation is superfluous, so use kmalloc() instead of kzalloc() to avoid that. Signed-off-by: Gabor Juhos Link: https://patch.msgid.link/20250320-qpic-snand-kmalloc-v1-1-94e267550675@gmail.com Signed-off-by: Mark Brown Stable-dep-of: 65cb56d49f6e ("spi: spi-qpic-snand: validate user/chip specific ECC properties") Signed-off-by: Sasha Levin --- drivers/spi/spi-qpic-snand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-qpic-snand.c b/drivers/spi/spi-qpic-snand.c index 94948c8781e83f..924aa8461963f3 100644 --- a/drivers/spi/spi-qpic-snand.c +++ b/drivers/spi/spi-qpic-snand.c @@ -261,7 +261,7 @@ static int qcom_spi_ecc_init_ctx_pipelined(struct nand_device *nand) ecc_cfg = kzalloc(sizeof(*ecc_cfg), GFP_KERNEL); if (!ecc_cfg) return -ENOMEM; - snandc->qspi->oob_buf = kzalloc(mtd->writesize + mtd->oobsize, + snandc->qspi->oob_buf = kmalloc(mtd->writesize + mtd->oobsize, GFP_KERNEL); if (!snandc->qspi->oob_buf) { kfree(ecc_cfg); From a2d539827d5d7481b6604c7ad809bf32d4d4d8ed Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Thu, 1 May 2025 18:19:16 +0200 Subject: [PATCH 2140/2924] spi: spi-qpic-snand: validate user/chip specific ECC properties [ Upstream commit 65cb56d49f6edea409600a3c61effc70ee5d43d8 ] The driver only supports 512 bytes ECC step size and 4 bit ECC strength at the moment, however it does not reject unsupported step/strength configurations. Due to this, whenever the driver is used with a flash chip which needs stronger ECC protection, the following warning is shown in the kernel log: [ 0.574648] spi-nand spi0.0: GigaDevice SPI NAND was found. [ 0.635748] spi-nand spi0.0: 256 MiB, block size: 128 KiB, page size: 2048, OOB size: 128 [ 0.649079] nand: WARNING: (null): the ECC used on your system is too weak compared to the one required by the NAND chip Although the message indicates that something is wrong, but it often gets unnoticed, which can cause serious problems. For example when the user writes something into the flash chip despite the warning, the written data may won't be readable by the bootloader or by the boot ROM. In the worst case, when the attached SPI NAND chip is the boot device, the board may not be able to boot anymore. Also, it is not even possible to create a backup of the flash, because reading its content results in bogus data. For example, dumping the first page of the flash gives this: # hexdump -C -n 2048 /dev/mtd0 00000000 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f |................| * 00000040 0f 0f 0f 0f 0f 0f 0f 0d 0f 0f 0f 0f 0f 0f 0f 0f |................| 00000050 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f |................| * 000001c0 0f 0f 0f 0f ff 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f |................| 000001d0 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f |................| * 00000200 0f 0f 0f 0f f5 5b ff ff 0f 0f 0f 0f 0f 0f 0f 0f |.....[..........| 00000210 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f |................| * 000002f0 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 1f 0f 0f |................| 00000300 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f |................| * 000003c0 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f ff 0f 0f 0f |................| 000003d0 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f |................| * 00000400 0f 0f 0f 0f 0f 0f 0f 0f e9 74 c9 06 f5 5b ff ff |.........t...[..| 00000410 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f |................| * 000005d0 0f 0f 0f 0f ff 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f |................| 000005e0 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f |................| * 00000600 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f c6 be 0f c3 |................| 00000610 e9 74 c9 06 f5 5b ff ff 0f 0f 0f 0f 0f 0f 0f 0f |.t...[..........| 00000620 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f |................| * 00000770 0f 0f 0f 0f 8f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f |................| 00000780 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f |................| * 00000800 # Doing the same by using the downstream kernel results in different output: # hexdump -C -n 2048 /dev/mtd0 00000000 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f 0f |................| * 00000800 # This patch adds some sanity checks to the code to prevent using the driver with unsupported ECC step/strength configurations. After the change, probing of the driver fails in such cases: [ 0.655038] spi-nand spi0.0: GigaDevice SPI NAND was found. [ 0.659159] spi-nand spi0.0: 256 MiB, block size: 128 KiB, page size: 2048, OOB size: 128 [ 0.669138] qcom_snand 79b0000.spi: only 4 bits ECC strength is supported [ 0.677476] nand: No suitable ECC configuration [ 0.689909] spi-nand spi0.0: probe with driver spi-nand failed with error -95 This helps to avoid the aforementioned hassles until support for 8 bit ECC strength gets implemented. Fixes: 7304d1909080 ("spi: spi-qpic: add driver for QCOM SPI NAND flash Interface") Signed-off-by: Gabor Juhos Link: https://patch.msgid.link/20250501-qpic-snand-validate-ecc-v1-1-532776581a66@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-qpic-snand.c | 42 +++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/drivers/spi/spi-qpic-snand.c b/drivers/spi/spi-qpic-snand.c index 924aa8461963f3..44a8f58e46fe12 100644 --- a/drivers/spi/spi-qpic-snand.c +++ b/drivers/spi/spi-qpic-snand.c @@ -250,9 +250,11 @@ static const struct mtd_ooblayout_ops qcom_spi_ooblayout = { static int qcom_spi_ecc_init_ctx_pipelined(struct nand_device *nand) { struct qcom_nand_controller *snandc = nand_to_qcom_snand(nand); + struct nand_ecc_props *reqs = &nand->ecc.requirements; + struct nand_ecc_props *user = &nand->ecc.user_conf; struct nand_ecc_props *conf = &nand->ecc.ctx.conf; struct mtd_info *mtd = nanddev_to_mtd(nand); - int cwperpage, bad_block_byte; + int cwperpage, bad_block_byte, ret; struct qpic_ecc *ecc_cfg; cwperpage = mtd->writesize / NANDC_STEP_SIZE; @@ -261,11 +263,39 @@ static int qcom_spi_ecc_init_ctx_pipelined(struct nand_device *nand) ecc_cfg = kzalloc(sizeof(*ecc_cfg), GFP_KERNEL); if (!ecc_cfg) return -ENOMEM; + + if (user->step_size && user->strength) { + ecc_cfg->step_size = user->step_size; + ecc_cfg->strength = user->strength; + } else if (reqs->step_size && reqs->strength) { + ecc_cfg->step_size = reqs->step_size; + ecc_cfg->strength = reqs->strength; + } else { + /* use defaults */ + ecc_cfg->step_size = NANDC_STEP_SIZE; + ecc_cfg->strength = 4; + } + + if (ecc_cfg->step_size != NANDC_STEP_SIZE) { + dev_err(snandc->dev, + "only %u bytes ECC step size is supported\n", + NANDC_STEP_SIZE); + ret = -EOPNOTSUPP; + goto err_free_ecc_cfg; + } + + if (ecc_cfg->strength != 4) { + dev_err(snandc->dev, + "only 4 bits ECC strength is supported\n"); + ret = -EOPNOTSUPP; + goto err_free_ecc_cfg; + } + snandc->qspi->oob_buf = kmalloc(mtd->writesize + mtd->oobsize, GFP_KERNEL); if (!snandc->qspi->oob_buf) { - kfree(ecc_cfg); - return -ENOMEM; + ret = -ENOMEM; + goto err_free_ecc_cfg; } memset(snandc->qspi->oob_buf, 0xff, mtd->writesize + mtd->oobsize); @@ -280,8 +310,6 @@ static int qcom_spi_ecc_init_ctx_pipelined(struct nand_device *nand) ecc_cfg->bytes = ecc_cfg->ecc_bytes_hw + ecc_cfg->spare_bytes + ecc_cfg->bbm_size; ecc_cfg->steps = 4; - ecc_cfg->strength = 4; - ecc_cfg->step_size = 512; ecc_cfg->cw_data = 516; ecc_cfg->cw_size = ecc_cfg->cw_data + ecc_cfg->bytes; bad_block_byte = mtd->writesize - ecc_cfg->cw_size * (cwperpage - 1) + 1; @@ -339,6 +367,10 @@ static int qcom_spi_ecc_init_ctx_pipelined(struct nand_device *nand) ecc_cfg->strength, ecc_cfg->step_size); return 0; + +err_free_ecc_cfg: + kfree(ecc_cfg); + return ret; } static void qcom_spi_ecc_cleanup_ctx_pipelined(struct nand_device *nand) From 3346e88278b4ca74c72d35be63f361ad88028b9a Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 2 Apr 2025 10:38:52 +0200 Subject: [PATCH 2141/2924] thermal/drivers/mediatek/lvts: Fix debugfs unregister on failure [ Upstream commit b49825661af93d9b8d7236f914803f136896f8fd ] When running the probe function for this driver, the function lvts_debugfs_init() gets called in lvts_domain_init() which, in turn, gets called in lvts_probe() before registering threaded interrupt handlers. Even though it's unlikely, the last call may fail and, if it does, there's nothing removing the already created debugfs folder and files. In order to fix that, instead of calling the lvts debugfs cleanup function upon failure, register a devm action that will take care of calling that upon failure or driver removal. Since devm was used, also delete the call to lvts_debugfs_exit() in the lvts_remove() callback, as now that's done automatically. Fixes: f5f633b18234 ("thermal/drivers/mediatek: Add the Low Voltage Thermal Sensor driver") Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20250402083852.20624-1-angelogioacchino.delregno@collabora.com Signed-off-by: Daniel Lezcano Signed-off-by: Sasha Levin --- drivers/thermal/mediatek/lvts_thermal.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/thermal/mediatek/lvts_thermal.c b/drivers/thermal/mediatek/lvts_thermal.c index 088481d91e6e29..c0be4ca55c7b84 100644 --- a/drivers/thermal/mediatek/lvts_thermal.c +++ b/drivers/thermal/mediatek/lvts_thermal.c @@ -213,6 +213,13 @@ static const struct debugfs_reg32 lvts_regs[] = { LVTS_DEBUG_FS_REGS(LVTS_CLKEN), }; +static void lvts_debugfs_exit(void *data) +{ + struct lvts_domain *lvts_td = data; + + debugfs_remove_recursive(lvts_td->dom_dentry); +} + static int lvts_debugfs_init(struct device *dev, struct lvts_domain *lvts_td) { struct debugfs_regset32 *regset; @@ -245,12 +252,7 @@ static int lvts_debugfs_init(struct device *dev, struct lvts_domain *lvts_td) debugfs_create_regset32("registers", 0400, dentry, regset); } - return 0; -} - -static void lvts_debugfs_exit(struct lvts_domain *lvts_td) -{ - debugfs_remove_recursive(lvts_td->dom_dentry); + return devm_add_action_or_reset(dev, lvts_debugfs_exit, lvts_td); } #else @@ -1374,8 +1376,6 @@ static void lvts_remove(struct platform_device *pdev) for (i = 0; i < lvts_td->num_lvts_ctrl; i++) lvts_ctrl_set_enable(&lvts_td->lvts_ctrl[i], false); - - lvts_debugfs_exit(lvts_td); } static const struct lvts_ctrl_data mt7988_lvts_ap_data_ctrl[] = { From 9b8f454872f160d08c183d7dde6086b46bd51bd5 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Thu, 10 Apr 2025 18:54:54 +0200 Subject: [PATCH 2142/2924] ACPI: OSI: Stop advertising support for "3.0 _SCP Extensions" [ Upstream commit 8cf4fdac9bdead7bca15fc56fdecdf78d11c3ec6 ] As specified in section 5.7.2 of the ACPI specification the feature group string "3.0 _SCP Extensions" implies that the operating system evaluates the _SCP control method with additional parameters. However the ACPI thermal driver evaluates the _SCP control method without those additional parameters, conflicting with the above feature group string advertised to the firmware thru _OSI. Stop advertising support for this feature string to avoid confusing the ACPI firmware. Fixes: e5f660ebef68 ("ACPI / osi: Collect _OSI handling into one single file") Signed-off-by: Armin Wolf Link: https://patch.msgid.link/20250410165456.4173-2-W_Armin@gmx.de Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/osi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/acpi/osi.c b/drivers/acpi/osi.c index df9328c850bd33..f2c943b934be0a 100644 --- a/drivers/acpi/osi.c +++ b/drivers/acpi/osi.c @@ -42,7 +42,6 @@ static struct acpi_osi_entry osi_setup_entries[OSI_STRING_ENTRIES_MAX] __initdata = { {"Module Device", true}, {"Processor Device", true}, - {"3.0 _SCP Extensions", true}, {"Processor Aggregator Device", true}, }; From 00e4ea01c751c0e94594561e7a4054344e27e560 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Thu, 10 Apr 2025 18:54:55 +0200 Subject: [PATCH 2143/2924] ACPI: thermal: Execute _SCP before reading trip points [ Upstream commit 3f7cd28ae3d1a1d6f151178469cfaef1b07fdbcc ] As specified in section 11.4.13 of the ACPI specification the operating system is required to evaluate the _ACx and _PSV objects after executing the _SCP control method. Move the execution of the _SCP control method before the invocation of acpi_thermal_get_trip_points() to avoid missing updates to the _ACx and _PSV objects. Fixes: b09872a652d3 ("ACPI: thermal: Fold acpi_thermal_get_info() into its caller") Signed-off-by: Armin Wolf Link: https://patch.msgid.link/20250410165456.4173-3-W_Armin@gmx.de Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/thermal.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 0c874186f8aed4..5c2defe55898f1 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -803,6 +803,12 @@ static int acpi_thermal_add(struct acpi_device *device) acpi_thermal_aml_dependency_fix(tz); + /* + * Set the cooling mode [_SCP] to active cooling. This needs to happen before + * we retrieve the trip point values. + */ + acpi_execute_simple_method(tz->device->handle, "_SCP", ACPI_THERMAL_MODE_ACTIVE); + /* Get trip points [_ACi, _PSV, etc.] (required). */ acpi_thermal_get_trip_points(tz); @@ -814,10 +820,6 @@ static int acpi_thermal_add(struct acpi_device *device) if (result) goto free_memory; - /* Set the cooling mode [_SCP] to active cooling. */ - acpi_execute_simple_method(tz->device->handle, "_SCP", - ACPI_THERMAL_MODE_ACTIVE); - /* Determine the default polling frequency [_TZP]. */ if (tzp) tz->polling_frequency = tzp; From 1018fdbf5d4c5ecdd14d623a3ad54b4a85ee2ba6 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 16 May 2025 15:32:06 +0200 Subject: [PATCH 2144/2924] spi: sh-msiof: Fix maximum DMA transfer size [ Upstream commit 0941d5166629cb766000530945e54b4e49680c68 ] The maximum amount of data to transfer in a single DMA request is calculated from the FIFO sizes (which is technically not 100% correct, but a simplification, as it is limited by the maximum word count values in the Transmit and Control Data Registers). However, in case there is both data to transmit and to receive, the transmit limit is overwritten by the receive limit. Fix this by using the minimum applicable FIFO size instead. Move the calculation outside the loop, so it is not repeated for each individual DMA transfer. As currently tx_fifo_size is always equal to rx_fifo_size, this bug had no real impact. Fixes: fe78d0b7691c0274 ("spi: sh-msiof: Fix FIFO size to 64 word from 256 word") Signed-off-by: Geert Uytterhoeven Link: https://patch.msgid.link/d9961767a97758b2614f2ee8afe1bd56dc900a60.1747401908.git.geert+renesas@glider.be Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-sh-msiof.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index 8a98c313548e37..7d8a7998f8ae73 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -918,6 +918,7 @@ static int sh_msiof_transfer_one(struct spi_controller *ctlr, void *rx_buf = t->rx_buf; unsigned int len = t->len; unsigned int bits = t->bits_per_word; + unsigned int max_wdlen = 256; unsigned int bytes_per_word; unsigned int words; int n; @@ -931,17 +932,17 @@ static int sh_msiof_transfer_one(struct spi_controller *ctlr, if (!spi_controller_is_target(p->ctlr)) sh_msiof_spi_set_clk_regs(p, t); + if (tx_buf) + max_wdlen = min(max_wdlen, p->tx_fifo_size); + if (rx_buf) + max_wdlen = min(max_wdlen, p->rx_fifo_size); + while (ctlr->dma_tx && len > 15) { /* * DMA supports 32-bit words only, hence pack 8-bit and 16-bit * words, with byte resp. word swapping. */ - unsigned int l = 0; - - if (tx_buf) - l = min(round_down(len, 4), p->tx_fifo_size * 4); - if (rx_buf) - l = min(round_down(len, 4), p->rx_fifo_size * 4); + unsigned int l = min(round_down(len, 4), max_wdlen * 4); if (bits <= 8) { copy32 = copy_bswap32; From 294c85e7109d93f161bc369897bba8b9950fbe91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Povi=C5=A1er?= Date: Sun, 18 May 2025 20:50:46 +1000 Subject: [PATCH 2145/2924] ASoC: apple: mca: Constrain channels according to TDM mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e717c661e2d1a660e96c40b0fe9933e23a1d7747 ] We don't (and can't) configure the hardware correctly if the number of channels exceeds the weight of the TDM mask. Report that constraint in startup of FE. Fixes: 3df5d0d97289 ("ASoC: apple: mca: Start new platform driver") Signed-off-by: Martin Povišer Signed-off-by: James Calligeros Link: https://patch.msgid.link/20250518-mca-fixes-v1-1-ee1015a695f6@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/apple/mca.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/sound/soc/apple/mca.c b/sound/soc/apple/mca.c index b4f4696809dd23..5dd24ab90d0f05 100644 --- a/sound/soc/apple/mca.c +++ b/sound/soc/apple/mca.c @@ -464,6 +464,28 @@ static int mca_configure_serdes(struct mca_cluster *cl, int serdes_unit, return -EINVAL; } +static int mca_fe_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) +{ + struct mca_cluster *cl = mca_dai_to_cluster(dai); + unsigned int mask, nchannels; + + if (cl->tdm_slots) { + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + mask = cl->tdm_tx_mask; + else + mask = cl->tdm_rx_mask; + + nchannels = hweight32(mask); + } else { + nchannels = 2; + } + + return snd_pcm_hw_constraint_minmax(substream->runtime, + SNDRV_PCM_HW_PARAM_CHANNELS, + 1, nchannels); +} + static int mca_fe_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mask, unsigned int rx_mask, int slots, int slot_width) { @@ -680,6 +702,7 @@ static int mca_fe_hw_params(struct snd_pcm_substream *substream, } static const struct snd_soc_dai_ops mca_fe_ops = { + .startup = mca_fe_startup, .set_fmt = mca_fe_set_fmt, .set_bclk_ratio = mca_set_bclk_ratio, .set_tdm_slot = mca_fe_set_tdm_slot, From 04d39b014fe0fe311463ad652d38eac6f014dbac Mon Sep 17 00:00:00 2001 From: David Thompson Date: Tue, 18 Mar 2025 17:47:47 -0400 Subject: [PATCH 2146/2924] EDAC/bluefield: Don't use bluefield_edac_readl() result on error [ Upstream commit ea3b0b7f541b9511abe2b89547c95458804f38e2 ] The bluefield_edac_readl() routine returns an uninitialized result on error paths. In those cases the calling routine should not use the uninitialized result. The driver should simply log the error, and then return early. Fixes: e41967575474 ("EDAC/bluefield: Use Arm SMC for EMI access on BlueField-2") Signed-off-by: David Thompson Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Shravan Kumar Ramani Link: https://lore.kernel.org/20250318214747.12271-1-davthompson@nvidia.com Signed-off-by: Sasha Levin --- drivers/edac/bluefield_edac.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/edac/bluefield_edac.c b/drivers/edac/bluefield_edac.c index 4942a240c30f25..ae3bb7afa103eb 100644 --- a/drivers/edac/bluefield_edac.c +++ b/drivers/edac/bluefield_edac.c @@ -199,8 +199,10 @@ static void bluefield_gather_report_ecc(struct mem_ctl_info *mci, * error without the detailed information. */ err = bluefield_edac_readl(priv, MLXBF_SYNDROM, &dram_syndrom); - if (err) + if (err) { dev_err(priv->dev, "DRAM syndrom read failed.\n"); + return; + } serr = FIELD_GET(MLXBF_SYNDROM__SERR, dram_syndrom); derr = FIELD_GET(MLXBF_SYNDROM__DERR, dram_syndrom); @@ -213,20 +215,26 @@ static void bluefield_gather_report_ecc(struct mem_ctl_info *mci, } err = bluefield_edac_readl(priv, MLXBF_ADD_INFO, &dram_additional_info); - if (err) + if (err) { dev_err(priv->dev, "DRAM additional info read failed.\n"); + return; + } err_prank = FIELD_GET(MLXBF_ADD_INFO__ERR_PRANK, dram_additional_info); ecc_dimm = (err_prank >= 2 && priv->dimm_ranks[0] <= 2) ? 1 : 0; err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_0, &edea0); - if (err) + if (err) { dev_err(priv->dev, "Error addr 0 read failed.\n"); + return; + } err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_1, &edea1); - if (err) + if (err) { dev_err(priv->dev, "Error addr 1 read failed.\n"); + return; + } ecc_dimm_addr = ((u64)edea1 << 32) | edea0; @@ -250,8 +258,10 @@ static void bluefield_edac_check(struct mem_ctl_info *mci) return; err = bluefield_edac_readl(priv, MLXBF_ECC_CNT, &ecc_count); - if (err) + if (err) { dev_err(priv->dev, "ECC count read failed.\n"); + return; + } single_error_count = FIELD_GET(MLXBF_ECC_CNT__SERR_CNT, ecc_count); double_error_count = FIELD_GET(MLXBF_ECC_CNT__DERR_CNT, ecc_count); From 9841a465b88080063a6a3db7a439929384febee9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 22 May 2025 12:08:05 +0200 Subject: [PATCH 2147/2924] ALSA: core: fix up bus match const issues. [ Upstream commit 62f134ab190c5fd5c9f68fe638ad8e13bb8a4cb4 ] In commit d69d80484598 ("driver core: have match() callback in struct bus_type take a const *"), the match bus callback was changed to have the driver be a const pointer. Unfortunately that const attribute was thrown away when container_of() is called, which is not correct and was not caught by the compiler due to how container_of() is implemented. Fix this up by correctly preserving the const attribute of the driver passed to the bus match function which requires the hdac_driver match function to also take a const pointer for the driver structure. Cc: Jaroslav Kysela Cc: Takashi Iwai Fixes: d69d80484598 ("driver core: have match() callback in struct bus_type take a const *") Signed-off-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2025052204-hyphen-thermal-3e72@gregkh Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- include/sound/hdaudio.h | 4 ++-- sound/core/seq_device.c | 2 +- sound/hda/hda_bus_type.c | 6 +++--- sound/pci/hda/hda_bind.c | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h index b098ceadbe74bf..9a70048adbc069 100644 --- a/include/sound/hdaudio.h +++ b/include/sound/hdaudio.h @@ -223,7 +223,7 @@ struct hdac_driver { struct device_driver driver; int type; const struct hda_device_id *id_table; - int (*match)(struct hdac_device *dev, struct hdac_driver *drv); + int (*match)(struct hdac_device *dev, const struct hdac_driver *drv); void (*unsol_event)(struct hdac_device *dev, unsigned int event); /* fields used by ext bus APIs */ @@ -235,7 +235,7 @@ struct hdac_driver { #define drv_to_hdac_driver(_drv) container_of(_drv, struct hdac_driver, driver) const struct hda_device_id * -hdac_get_device_id(struct hdac_device *hdev, struct hdac_driver *drv); +hdac_get_device_id(struct hdac_device *hdev, const struct hdac_driver *drv); /* * Bus verb operators diff --git a/sound/core/seq_device.c b/sound/core/seq_device.c index 4492be5d2317c7..bac9f860373425 100644 --- a/sound/core/seq_device.c +++ b/sound/core/seq_device.c @@ -43,7 +43,7 @@ MODULE_LICENSE("GPL"); static int snd_seq_bus_match(struct device *dev, const struct device_driver *drv) { struct snd_seq_device *sdev = to_seq_dev(dev); - struct snd_seq_driver *sdrv = to_seq_drv(drv); + const struct snd_seq_driver *sdrv = to_seq_drv(drv); return strcmp(sdrv->id, sdev->id) == 0 && sdrv->argsize == sdev->argsize; diff --git a/sound/hda/hda_bus_type.c b/sound/hda/hda_bus_type.c index 7545ace7b0ee4b..eb72a7af2e56e8 100644 --- a/sound/hda/hda_bus_type.c +++ b/sound/hda/hda_bus_type.c @@ -21,7 +21,7 @@ MODULE_LICENSE("GPL"); * driver id_table and returns the matching device id entry. */ const struct hda_device_id * -hdac_get_device_id(struct hdac_device *hdev, struct hdac_driver *drv) +hdac_get_device_id(struct hdac_device *hdev, const struct hdac_driver *drv) { if (drv->id_table) { const struct hda_device_id *id = drv->id_table; @@ -38,7 +38,7 @@ hdac_get_device_id(struct hdac_device *hdev, struct hdac_driver *drv) } EXPORT_SYMBOL_GPL(hdac_get_device_id); -static int hdac_codec_match(struct hdac_device *dev, struct hdac_driver *drv) +static int hdac_codec_match(struct hdac_device *dev, const struct hdac_driver *drv) { if (hdac_get_device_id(dev, drv)) return 1; @@ -49,7 +49,7 @@ static int hdac_codec_match(struct hdac_device *dev, struct hdac_driver *drv) static int hda_bus_match(struct device *dev, const struct device_driver *drv) { struct hdac_device *hdev = dev_to_hdac_dev(dev); - struct hdac_driver *hdrv = drv_to_hdac_driver(drv); + const struct hdac_driver *hdrv = drv_to_hdac_driver(drv); if (hdev->type != hdrv->type) return 0; diff --git a/sound/pci/hda/hda_bind.c b/sound/pci/hda/hda_bind.c index 9521e5e0e6e6f8..1fef350d821ef0 100644 --- a/sound/pci/hda/hda_bind.c +++ b/sound/pci/hda/hda_bind.c @@ -18,10 +18,10 @@ /* * find a matching codec id */ -static int hda_codec_match(struct hdac_device *dev, struct hdac_driver *drv) +static int hda_codec_match(struct hdac_device *dev, const struct hdac_driver *drv) { struct hda_codec *codec = container_of(dev, struct hda_codec, core); - struct hda_codec_driver *driver = + const struct hda_codec_driver *driver = container_of(drv, struct hda_codec_driver, core); const struct hda_device_id *list; /* check probe_id instead of vendor_id if set */ From ccc3d68b92be89c30ba42ac62d2a141bd0c2b457 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Thu, 22 May 2025 16:13:56 +0200 Subject: [PATCH 2148/2924] ACPI: platform_profile: Avoid initializing on non-ACPI platforms [ Upstream commit dd133162c9cff5951a692fab9811fadf46a46457 ] The platform profile driver is loaded even on platforms that do not have ACPI enabled. The initialization of the sysfs entries was recently moved from platform_profile_register() to the module init call, and those entries need acpi_kobj to be initialized which is not the case when ACPI is disabled. This results in the following warning: WARNING: CPU: 5 PID: 1 at fs/sysfs/group.c:131 internal_create_group+0xa22/0xdd8 Modules linked in: CPU: 5 UID: 0 PID: 1 Comm: swapper/0 Tainted: G W 6.15.0-rc7-dirty #6 PREEMPT Tainted: [W]=WARN Hardware name: riscv-virtio,qemu (DT) epc : internal_create_group+0xa22/0xdd8 ra : internal_create_group+0xa22/0xdd8 Call Trace: internal_create_group+0xa22/0xdd8 sysfs_create_group+0x22/0x2e platform_profile_init+0x74/0xb2 do_one_initcall+0x198/0xa9e kernel_init_freeable+0x6d8/0x780 kernel_init+0x28/0x24c ret_from_fork+0xe/0x18 Fix this by checking if ACPI is enabled before trying to create sysfs entries. Fixes: 77be5cacb2c2 ("ACPI: platform_profile: Create class for ACPI platform profile") Signed-off-by: Alexandre Ghiti Reviewed-by: Arnd Bergmann Reviewed-by: Mark Pearson Link: https://patch.msgid.link/20250522141410.31315-1-alexghiti@rivosinc.com [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/platform_profile.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/acpi/platform_profile.c b/drivers/acpi/platform_profile.c index ffbfd32f4cf1ba..b43f4459a4f61e 100644 --- a/drivers/acpi/platform_profile.c +++ b/drivers/acpi/platform_profile.c @@ -688,6 +688,9 @@ static int __init platform_profile_init(void) { int err; + if (acpi_disabled) + return -EOPNOTSUPP; + err = class_register(&platform_profile_class); if (err) return err; From 1d17acfc019a6a831b4656094b4de5dca8b80886 Mon Sep 17 00:00:00 2001 From: Ian Forbes Date: Fri, 28 Feb 2025 14:06:33 -0600 Subject: [PATCH 2149/2924] drm/vmwgfx: Add seqno waiter for sync_files [ Upstream commit 0039a3b35b10d9c15d3d26320532ab56cc566750 ] Because sync_files are passive waiters they do not participate in the processing of fences like the traditional vmw_fence_wait IOCTL. If userspace exclusively uses sync_files for synchronization then nothing in the kernel actually processes fence updates as interrupts for fences are masked and ignored if the kernel does not indicate to the SVGA device that there are active waiters. This oversight results in a bug where the entire GUI can freeze waiting on a sync_file that will never be signalled as we've masked the interrupts to signal its completion. This bug is incredibly racy as any process which interacts with the fencing code via the 3D stack can process the stuck fences on behalf of the stuck process causing it to run again. Even a simple app like eglinfo is enough to resume the stuck process. Usually this bug is seen at a login screen like GDM because there are no other 3D apps running. By adding a seqno waiter we re-enable interrupt based processing of the dma_fences associated with the sync_file which is signalled as part of a dma_fence_callback. This has likely been broken since it was initially added to the kernel in 2017 but has gone unnoticed until mutter recently started using sync_files heavily over the course of 2024 as part of their explicit sync support. Fixes: c906965dee22 ("drm/vmwgfx: Add export fence to file descriptor support") Signed-off-by: Ian Forbes Signed-off-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20250228200633.642417-1-ian.forbes@broadcom.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 26 +++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 2e52d73eba4840..ea741bc4ac3fc7 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -4086,6 +4086,23 @@ static int vmw_execbuf_tie_context(struct vmw_private *dev_priv, return 0; } +/* + * DMA fence callback to remove a seqno_waiter + */ +struct seqno_waiter_rm_context { + struct dma_fence_cb base; + struct vmw_private *dev_priv; +}; + +static void seqno_waiter_rm_cb(struct dma_fence *f, struct dma_fence_cb *cb) +{ + struct seqno_waiter_rm_context *ctx = + container_of(cb, struct seqno_waiter_rm_context, base); + + vmw_seqno_waiter_remove(ctx->dev_priv); + kfree(ctx); +} + int vmw_execbuf_process(struct drm_file *file_priv, struct vmw_private *dev_priv, void __user *user_commands, void *kernel_commands, @@ -4266,6 +4283,15 @@ int vmw_execbuf_process(struct drm_file *file_priv, } else { /* Link the fence with the FD created earlier */ fd_install(out_fence_fd, sync_file->file); + struct seqno_waiter_rm_context *ctx = + kmalloc(sizeof(*ctx), GFP_KERNEL); + ctx->dev_priv = dev_priv; + vmw_seqno_waiter_add(dev_priv); + if (dma_fence_add_callback(&fence->base, &ctx->base, + seqno_waiter_rm_cb) < 0) { + vmw_seqno_waiter_remove(dev_priv); + kfree(ctx); + } } } From d0926e679761fb259acba3ee09a1de69e1a94502 Mon Sep 17 00:00:00 2001 From: Keisuke Nishimura Date: Tue, 25 Feb 2025 15:52:23 +0100 Subject: [PATCH 2150/2924] drm/vmwgfx: Add error path for xa_store in vmw_bo_add_detached_resource [ Upstream commit 3282422bf251db541fe07c548ca304130d37d754 ] The xa_store() may fail due to memory allocation failure because there is no guarantee that the index is already used. This fix introduces new paths to handle the error. This patch also aligns the order of function calls by calling vmw_bo_add_detached_resource() before ttm_prime_object_init() in order to allow consistent error handling. Fixes: d6667f0ddf46 ("drm/vmwgfx: Fix handling of dumb buffers") Signed-off-by: Keisuke Nishimura Signed-off-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20250225145223.34773-1-keisuke.nishimura@inria.fr Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 4 ++-- drivers/gpu/drm/vmwgfx/vmwgfx_bo.h | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 16 ++++++++++++++-- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index 9b5b8c1f063bb7..aa13e4061ff15d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -848,9 +848,9 @@ void vmw_bo_placement_set_default_accelerated(struct vmw_bo *bo) vmw_bo_placement_set(bo, domain, domain); } -void vmw_bo_add_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res) +int vmw_bo_add_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res) { - xa_store(&vbo->detached_resources, (unsigned long)res, res, GFP_KERNEL); + return xa_err(xa_store(&vbo->detached_resources, (unsigned long)res, res, GFP_KERNEL)); } void vmw_bo_del_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h index 11e330c7c7f52b..51790a11fe6494 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h @@ -141,7 +141,7 @@ void vmw_bo_move_notify(struct ttm_buffer_object *bo, struct ttm_resource *mem); void vmw_bo_swap_notify(struct ttm_buffer_object *bo); -void vmw_bo_add_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res); +int vmw_bo_add_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res); void vmw_bo_del_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res); struct vmw_surface *vmw_bo_surface(struct vmw_bo *vbo); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 5721c74da3e0b9..1f7626f6ac0b1a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -871,7 +871,12 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, vmw_resource_unreference(&res); goto out_unlock; } - vmw_bo_add_detached_resource(res->guest_memory_bo, res); + + ret = vmw_bo_add_detached_resource(res->guest_memory_bo, res); + if (unlikely(ret != 0)) { + vmw_resource_unreference(&res); + goto out_unlock; + } } tmp = vmw_resource_reference(&srf->res); @@ -1670,6 +1675,14 @@ vmw_gb_surface_define_internal(struct drm_device *dev, } + if (res->guest_memory_bo) { + ret = vmw_bo_add_detached_resource(res->guest_memory_bo, res); + if (unlikely(ret != 0)) { + vmw_resource_unreference(&res); + goto out_unlock; + } + } + tmp = vmw_resource_reference(res); ret = ttm_prime_object_init(tfile, res->guest_memory_size, &user_srf->prime, VMW_RES_SURFACE, @@ -1684,7 +1697,6 @@ vmw_gb_surface_define_internal(struct drm_device *dev, rep->handle = user_srf->prime.base.handle; rep->backup_size = res->guest_memory_size; if (res->guest_memory_bo) { - vmw_bo_add_detached_resource(res->guest_memory_bo, res); rep->buffer_map_handle = drm_vma_node_offset_addr(&res->guest_memory_bo->tbo.base.vma_node); rep->buffer_size = res->guest_memory_bo->tbo.base.size; From e1ef25a10362982da95c6d8b1a7ea3d0e6242478 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 27 Feb 2025 14:20:32 +0100 Subject: [PATCH 2151/2924] drm: xlnx: zynqmp_dpsub: fix Kconfig dependencies for ASoC [ Upstream commit f9f087d946266bc5da7c3a17bd8fd9d01969e3cf ] The new audio code fails to build when sounds support is in a loadable module but the GPU driver is built-in: x86_64-linux-ld: zynqmp_dp_audio.c:(.text+0x6a8): undefined reference to `devm_snd_soc_register_card' x86_64-linux-ld: drivers/gpu/drm/xlnx/zynqmp_dp_audio.o:(.rodata+0x1bc): undefined reference to `snd_soc_info_volsw' x86_64-linux-ld: drivers/gpu/drm/xlnx/zynqmp_dp_audio.o:(.rodata+0x1f0): undefined reference to `snd_soc_get_volsw' x86_64-linux-ld: drivers/gpu/drm/xlnx/zynqmp_dp_audio.o:(.rodata+0x1f4): undefined reference to `snd_soc_put_volsw' Change the Kconfig dependency to disallow the sound support in this configuration. Fixes: 3ec5c1579305 ("drm: xlnx: zynqmp_dpsub: Add DP audio support") Signed-off-by: Arnd Bergmann Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20250227132036.1136600-1-arnd@kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/xlnx/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xlnx/Kconfig b/drivers/gpu/drm/xlnx/Kconfig index dbecca9bdd544f..cfabf5e2a0bb0a 100644 --- a/drivers/gpu/drm/xlnx/Kconfig +++ b/drivers/gpu/drm/xlnx/Kconfig @@ -22,6 +22,7 @@ config DRM_ZYNQMP_DPSUB_AUDIO bool "ZynqMP DisplayPort Audio Support" depends on DRM_ZYNQMP_DPSUB depends on SND && SND_SOC + depends on SND_SOC=y || DRM_ZYNQMP_DPSUB=m select SND_SOC_GENERIC_DMAENGINE_PCM help Choose this option to enable DisplayPort audio support in the ZynqMP From 926cd7ab2f3aede5c37f7cc9332dc0fa23296000 Mon Sep 17 00:00:00 2001 From: Vignesh Raman Date: Fri, 28 Feb 2025 18:56:18 +0530 Subject: [PATCH 2152/2924] drm/ci: fix merge request rules [ Upstream commit 10646ddac2917b31c985ceff0e4982c42a9c924b ] Merge request pipelines were only created when changes were made to drivers/gpu/drm/ci/, causing MRs that didn't touch this path to break. Fix MR pipeline rules to trigger jobs for all changes. Run jobs automatically for marge-bot and scheduled pipelines, but in all other cases run manually. Also remove CI_PROJECT_NAMESPACE checks specific to mesa. Fixes: df54f04f2020 ("drm/ci: update gitlab rules") Signed-off-by: Vignesh Raman Reviewed-by: Daniel Stone Signed-off-by: Helen Koike Link: https://patchwork.freedesktop.org/patch/msgid/20250228132620.556079-1-vignesh.raman@collabora.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/ci/gitlab-ci.yml | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/ci/gitlab-ci.yml b/drivers/gpu/drm/ci/gitlab-ci.yml index f04aabe8327c6b..b06b9e7d3d09bf 100644 --- a/drivers/gpu/drm/ci/gitlab-ci.yml +++ b/drivers/gpu/drm/ci/gitlab-ci.yml @@ -143,11 +143,11 @@ stages: # Pre-merge pipeline - if: &is-pre-merge $CI_PIPELINE_SOURCE == "merge_request_event" # Push to a branch on a fork - - if: &is-fork-push $CI_PROJECT_NAMESPACE != "mesa" && $CI_PIPELINE_SOURCE == "push" + - if: &is-fork-push $CI_PIPELINE_SOURCE == "push" # nightly pipeline - if: &is-scheduled-pipeline $CI_PIPELINE_SOURCE == "schedule" # pipeline for direct pushes that bypassed the CI - - if: &is-direct-push $CI_PROJECT_NAMESPACE == "mesa" && $CI_PIPELINE_SOURCE == "push" && $GITLAB_USER_LOGIN != "marge-bot" + - if: &is-direct-push $CI_PIPELINE_SOURCE == "push" && $GITLAB_USER_LOGIN != "marge-bot" # Rules applied to every job in the pipeline @@ -170,26 +170,15 @@ stages: - !reference [.disable-farm-mr-rules, rules] # Never run immediately after merging, as we just ran everything - !reference [.never-post-merge-rules, rules] - # Build everything in merge pipelines, if any files affecting the pipeline - # were changed + # Build everything in merge pipelines - if: *is-merge-attempt - changes: &all_paths - - drivers/gpu/drm/ci/**/* when: on_success # Same as above, but for pre-merge pipelines - if: *is-pre-merge - changes: - *all_paths when: manual - # Skip everything for pre-merge and merge pipelines which don't change - # anything in the build - - if: *is-merge-attempt - when: never - - if: *is-pre-merge - when: never # Build everything after someone bypassed the CI - if: *is-direct-push - when: on_success + when: manual # Build everything in scheduled pipelines - if: *is-scheduled-pipeline when: on_success From 289ce5d9621ae63f41c451389f675eb82a353659 Mon Sep 17 00:00:00 2001 From: Ian Forbes Date: Thu, 23 Jan 2025 14:44:24 -0600 Subject: [PATCH 2153/2924] drm/vmwgfx: Fix dumb buffer leak [ Upstream commit f42c09e614f1bda96f5690be8d0bb273234febbc ] Dumb buffers were not being freed because the GEM reference that was acquired in gb_surface_define was not dropped like it is in the 2D case. Dropping this ref uncovered a few additional issues with freeing the resources associated with dirty tracking in vmw_bo_free/release. Additionally the TTM object associated with the surface were also leaking which meant that when the ttm_object_file was closed at process exit the destructor unreferenced an already destroyed surface. The solution is to remove the destructor from the vmw_user_surface associated with the dumb_buffer and immediately unreferencing the TTM object which his removes it from the ttm_object_file. This also allows the early return in vmw_user_surface_base_release for the dumb buffer case to be removed as it should no longer occur. The chain of references now has the GEM handle(s) owning the dumb buffer. The GEM handles have a singular GEM reference to the vmw_bo which is dropped when all handles are closed. When the GEM reference count hits zero the vmw_bo is freed which then unreferences the surface via vmw_resource_release in vmw_bo_release. Fixes: d6667f0ddf46 ("drm/vmwgfx: Fix handling of dumb buffers") Signed-off-by: Ian Forbes Reviewed-by: Zack Rusin Signed-off-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20250123204424.836896-1-ian.forbes@broadcom.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 6 ++++-- drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 18 ++++++++++++------ 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index aa13e4061ff15d..f30df3dc871fd1 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -51,11 +51,13 @@ static void vmw_bo_release(struct vmw_bo *vbo) mutex_lock(&res->dev_priv->cmdbuf_mutex); (void)vmw_resource_reserve(res, false, true); vmw_resource_mob_detach(res); + if (res->dirty) + res->func->dirty_free(res); if (res->coherent) vmw_bo_dirty_release(res->guest_memory_bo); res->guest_memory_bo = NULL; res->guest_memory_offset = 0; - vmw_resource_unreserve(res, false, false, false, NULL, + vmw_resource_unreserve(res, true, false, false, NULL, 0); mutex_unlock(&res->dev_priv->cmdbuf_mutex); } @@ -73,9 +75,9 @@ static void vmw_bo_free(struct ttm_buffer_object *bo) { struct vmw_bo *vbo = to_vmw_bo(&bo->base); - WARN_ON(vbo->dirty); WARN_ON(!RB_EMPTY_ROOT(&vbo->res_tree)); vmw_bo_release(vbo); + WARN_ON(vbo->dirty); kfree(vbo); } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index a73af8a355fbf5..c4d5fe5f330f98 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -273,7 +273,7 @@ int vmw_user_resource_lookup_handle(struct vmw_private *dev_priv, goto out_bad_resource; res = converter->base_obj_to_res(base); - kref_get(&res->kref); + vmw_resource_reference(res); *p_res = res; ret = 0; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 1f7626f6ac0b1a..d7a8070330ba54 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -658,7 +658,7 @@ static void vmw_user_surface_free(struct vmw_resource *res) struct vmw_user_surface *user_srf = container_of(srf, struct vmw_user_surface, srf); - WARN_ON_ONCE(res->dirty); + WARN_ON(res->dirty); if (user_srf->master) drm_master_put(&user_srf->master); kfree(srf->offsets); @@ -689,8 +689,7 @@ static void vmw_user_surface_base_release(struct ttm_base_object **p_base) * Dumb buffers own the resource and they'll unref the * resource themselves */ - if (res && res->guest_memory_bo && res->guest_memory_bo->is_dumb) - return; + WARN_ON(res && res->guest_memory_bo && res->guest_memory_bo->is_dumb); vmw_resource_unreference(&res); } @@ -2370,12 +2369,19 @@ int vmw_dumb_create(struct drm_file *file_priv, vbo = res->guest_memory_bo; vbo->is_dumb = true; vbo->dumb_surface = vmw_res_to_srf(res); - + drm_gem_object_put(&vbo->tbo.base); + /* + * Unset the user surface dtor since this in not actually exposed + * to userspace. The suface is owned via the dumb_buffer's GEM handle + */ + struct vmw_user_surface *usurf = container_of(vbo->dumb_surface, + struct vmw_user_surface, srf); + usurf->prime.base.refcount_release = NULL; err: if (res) vmw_resource_unreference(&res); - if (ret) - ttm_ref_object_base_unref(tfile, arg.rep.handle); + + ttm_ref_object_base_unref(tfile, arg.rep.handle); return ret; } From 99abee83b1f83fea59545b9b587923bc28abf4bc Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Mon, 17 Mar 2025 17:12:23 +0000 Subject: [PATCH 2154/2924] drm/vc4: hdmi: Call HDMI hotplug helper on disconnect [ Upstream commit 34f051accedb642087fdcf19b3501fe150fbee49 ] drm_atomic_helper_connector_hdmi_hotplug() must be called regardless of the connection status, otherwise the HDMI audio disconnect event won't be notified. Fixes: 2ea9ec5d2c20 ("drm/vc4: hdmi: use drm_atomic_helper_connector_hdmi_hotplug()") Suggested-by: Dmitry Baryshkov Signed-off-by: Stefan Wahren Reviewed-by: Maxime Ripard Signed-off-by: David Turner Link: https://patchwork.freedesktop.org/patch/msgid/20250317-vc4_hotplug-v4-2-2af625629186@raspberrypi.com Signed-off-by: Dave Stevenson Signed-off-by: Sasha Levin --- drivers/gpu/drm/vc4/vc4_hdmi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 37238a12baa58a..37a7d45695f236 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -372,13 +372,13 @@ static void vc4_hdmi_handle_hotplug(struct vc4_hdmi *vc4_hdmi, * the lock for now. */ + drm_atomic_helper_connector_hdmi_hotplug(connector, status); + if (status == connector_status_disconnected) { cec_phys_addr_invalidate(vc4_hdmi->cec_adap); return; } - drm_atomic_helper_connector_hdmi_hotplug(connector, status); - cec_s_phys_addr(vc4_hdmi->cec_adap, connector->display_info.source_physical_address, false); From 0ae03b2e11f26fdaf35394db9181d96f399e9e9e Mon Sep 17 00:00:00 2001 From: Badal Nilawar Date: Thu, 27 Mar 2025 21:49:14 +0530 Subject: [PATCH 2155/2924] drm/xe/d3cold: Set power state to D3Cold during s2idle/s3 [ Upstream commit f945dd89fa8da3f662508165453dafdb4035d9d3 ] According to pci core guidelines, pci_save_config is recommended when the driver explicitly needs to set the pci power state. As of now xe kmd is only doing pci_save_config while entering to s2idle/s3 state, which makes pci core think that device driver has already applied required pci power state. This leads to GPU remain in D0 state. To fix the issue setting the pci power state to D3Cold. Fixes:dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Rafael J. Wysocki Cc: Rodrigo Vivi Signed-off-by: Badal Nilawar Signed-off-by: Anshuman Gupta Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250327161914.432552-1-badal.nilawar@intel.com Signed-off-by: Rodrigo Vivi Signed-off-by: Sasha Levin --- drivers/gpu/drm/xe/xe_pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index f4d108dc49b1b1..30f7ce06c89690 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -922,6 +922,7 @@ static int xe_pci_suspend(struct device *dev) pci_save_state(pdev); pci_disable_device(pdev); + pci_set_power_state(pdev, PCI_D3cold); return 0; } From aea146b5bc3937e0208366628ca5e79fa8bd41d1 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 3 Apr 2025 15:33:30 +0200 Subject: [PATCH 2156/2924] drm/vc4: tests: Use return instead of assert MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9e26a3740cc08ef8bcdc5e5d824792cd677affce ] The vc4_mock_atomic_add_output() and vc4_mock_atomic_del_output() assert that the functions they are calling didn't fail. Since some of them can return EDEADLK, we can't properly deal with it. Since both functions are expected to return an int, and all caller check the return value, let's just properly propagate the errors when they occur. Fixes: f759f5b53f1c ("drm/vc4: tests: Introduce a mocking infrastructure") Fixes: 76ec18dc5afa ("drm/vc4: tests: Add unit test suite for the PV muxing") Reviewed-by: Maíra Canal Link: https://lore.kernel.org/r/20250403-drm-vc4-kunit-failures-v2-1-e09195cc8840@kernel.org Signed-off-by: Maxime Ripard Signed-off-by: Sasha Levin --- drivers/gpu/drm/vc4/tests/vc4_mock_output.c | 36 ++++++++++++++------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/vc4/tests/vc4_mock_output.c b/drivers/gpu/drm/vc4/tests/vc4_mock_output.c index e70d7c3076acf1..f0ddc223c1f839 100644 --- a/drivers/gpu/drm/vc4/tests/vc4_mock_output.c +++ b/drivers/gpu/drm/vc4/tests/vc4_mock_output.c @@ -75,24 +75,30 @@ int vc4_mock_atomic_add_output(struct kunit *test, int ret; encoder = vc4_find_encoder_by_type(drm, type); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, encoder); + if (!encoder) + return -ENODEV; crtc = vc4_find_crtc_for_encoder(test, drm, encoder); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc); + if (!crtc) + return -ENODEV; output = encoder_to_vc4_dummy_output(encoder); conn = &output->connector; conn_state = drm_atomic_get_connector_state(state, conn); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, conn_state); + if (IS_ERR(conn_state)) + return PTR_ERR(conn_state); ret = drm_atomic_set_crtc_for_connector(conn_state, crtc); - KUNIT_EXPECT_EQ(test, ret, 0); + if (ret) + return ret; crtc_state = drm_atomic_get_crtc_state(state, crtc); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc_state); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); ret = drm_atomic_set_mode_for_crtc(crtc_state, &default_mode); - KUNIT_EXPECT_EQ(test, ret, 0); + if (ret) + return ret; crtc_state->active = true; @@ -113,26 +119,32 @@ int vc4_mock_atomic_del_output(struct kunit *test, int ret; encoder = vc4_find_encoder_by_type(drm, type); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, encoder); + if (!encoder) + return -ENODEV; crtc = vc4_find_crtc_for_encoder(test, drm, encoder); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc); + if (!crtc) + return -ENODEV; crtc_state = drm_atomic_get_crtc_state(state, crtc); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc_state); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); crtc_state->active = false; ret = drm_atomic_set_mode_for_crtc(crtc_state, NULL); - KUNIT_ASSERT_EQ(test, ret, 0); + if (ret) + return ret; output = encoder_to_vc4_dummy_output(encoder); conn = &output->connector; conn_state = drm_atomic_get_connector_state(state, conn); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, conn_state); + if (IS_ERR(conn_state)) + return PTR_ERR(conn_state); ret = drm_atomic_set_crtc_for_connector(conn_state, NULL); - KUNIT_ASSERT_EQ(test, ret, 0); + if (ret) + return ret; return 0; } From e13c4a28e8ecb9a5043774d9636fb609d5d778fd Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 3 Apr 2025 15:33:32 +0200 Subject: [PATCH 2157/2924] drm/vc4: tests: Stop allocating the state in test init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7e0351ae91ed2b6178abbfae96c3c6aaa1652567 ] The vc4-pv-muxing-combinations and vc5-pv-muxing-combinations test suites use a common test init function which, in part, allocates the drm atomic state the test will use. That allocation relies on drm_kunit_helper_atomic_state_alloc(), and thus requires a struct drm_modeset_acquire_ctx. This context will then be stored in the allocated state->acquire_ctx field. However, the context is local to the test init function, and is cleared as soon as drm_kunit_helper_atomic_state_alloc() is done. We thus end up with an dangling pointer to a cleared context in state->acquire_ctx for our test to consumes. We should really allocate the context and the state in the test functions, so we can also control when we're done with it. Fixes: 30188df0c387 ("drm/tests: Drop drm_kunit_helper_acquire_ctx_alloc()") Reviewed-by: Maíra Canal Link: https://lore.kernel.org/r/20250403-drm-vc4-kunit-failures-v2-3-e09195cc8840@kernel.org Signed-off-by: Maxime Ripard Signed-off-by: Sasha Levin --- .../gpu/drm/vc4/tests/vc4_test_pv_muxing.c | 41 ++++++++++++------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/vc4/tests/vc4_test_pv_muxing.c b/drivers/gpu/drm/vc4/tests/vc4_test_pv_muxing.c index 992e8f5c5c6ea8..52c04ef33206bf 100644 --- a/drivers/gpu/drm/vc4/tests/vc4_test_pv_muxing.c +++ b/drivers/gpu/drm/vc4/tests/vc4_test_pv_muxing.c @@ -20,7 +20,6 @@ struct pv_muxing_priv { struct vc4_dev *vc4; - struct drm_atomic_state *state; }; static bool check_fifo_conflict(struct kunit *test, @@ -677,10 +676,19 @@ static void drm_vc4_test_pv_muxing(struct kunit *test) { const struct pv_muxing_param *params = test->param_value; const struct pv_muxing_priv *priv = test->priv; - struct drm_atomic_state *state = priv->state; + struct drm_modeset_acquire_ctx ctx; + struct drm_atomic_state *state; + struct drm_device *drm; + struct vc4_dev *vc4; unsigned int i; int ret; + drm_modeset_acquire_init(&ctx, 0); + + vc4 = priv->vc4; + drm = &vc4->base; + state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, state); for (i = 0; i < params->nencoders; i++) { enum vc4_encoder_type enc_type = params->encoders[i]; @@ -700,16 +708,29 @@ static void drm_vc4_test_pv_muxing(struct kunit *test) KUNIT_EXPECT_TRUE(test, check_channel_for_encoder(test, state, enc_type, params->check_fn)); } + + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); } static void drm_vc4_test_pv_muxing_invalid(struct kunit *test) { const struct pv_muxing_param *params = test->param_value; const struct pv_muxing_priv *priv = test->priv; - struct drm_atomic_state *state = priv->state; + struct drm_modeset_acquire_ctx ctx; + struct drm_atomic_state *state; + struct drm_device *drm; + struct vc4_dev *vc4; unsigned int i; int ret; + drm_modeset_acquire_init(&ctx, 0); + + vc4 = priv->vc4; + drm = &vc4->base; + state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, state); + for (i = 0; i < params->nencoders; i++) { enum vc4_encoder_type enc_type = params->encoders[i]; @@ -719,14 +740,15 @@ static void drm_vc4_test_pv_muxing_invalid(struct kunit *test) ret = drm_atomic_check_only(state); KUNIT_EXPECT_LT(test, ret, 0); + + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); } static int vc4_pv_muxing_test_init(struct kunit *test) { const struct pv_muxing_param *params = test->param_value; - struct drm_modeset_acquire_ctx ctx; struct pv_muxing_priv *priv; - struct drm_device *drm; struct vc4_dev *vc4; priv = kunit_kzalloc(test, sizeof(*priv), GFP_KERNEL); @@ -737,15 +759,6 @@ static int vc4_pv_muxing_test_init(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, vc4); priv->vc4 = vc4; - drm_modeset_acquire_init(&ctx, 0); - - drm = &vc4->base; - priv->state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, priv->state); - - drm_modeset_drop_locks(&ctx); - drm_modeset_acquire_fini(&ctx); - return 0; } From 8359b1df16cb866aa5e697836330deb0a8b1c175 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 3 Apr 2025 15:33:33 +0200 Subject: [PATCH 2158/2924] drm/vc4: tests: Retry pv-muxing tests when EDEADLK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d5be7722d1736827a850556fd4d93e0fe2608c15 ] Some functions used by the HVS->PV muxing tests can return with EDEADLK, meaning the entire sequence should be restarted. It's not a fatal error and we should treat it as a recoverable error, and recover, instead of failing the test like we currently do. Fixes: 76ec18dc5afa ("drm/vc4: tests: Add unit test suite for the PV muxing") Reviewed-by: Maíra Canal Link: https://lore.kernel.org/r/20250403-drm-vc4-kunit-failures-v2-4-e09195cc8840@kernel.org Signed-off-by: Maxime Ripard Signed-off-by: Sasha Levin --- .../gpu/drm/vc4/tests/vc4_test_pv_muxing.c | 113 +++++++++++++++++- 1 file changed, 112 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/tests/vc4_test_pv_muxing.c b/drivers/gpu/drm/vc4/tests/vc4_test_pv_muxing.c index 52c04ef33206bf..d1f694029169ad 100644 --- a/drivers/gpu/drm/vc4/tests/vc4_test_pv_muxing.c +++ b/drivers/gpu/drm/vc4/tests/vc4_test_pv_muxing.c @@ -687,16 +687,30 @@ static void drm_vc4_test_pv_muxing(struct kunit *test) vc4 = priv->vc4; drm = &vc4->base; + +retry: state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, state); for (i = 0; i < params->nencoders; i++) { enum vc4_encoder_type enc_type = params->encoders[i]; ret = vc4_mock_atomic_add_output(test, state, enc_type); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry; + } KUNIT_ASSERT_EQ(test, ret, 0); } ret = drm_atomic_check_only(state); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry; + } KUNIT_EXPECT_EQ(test, ret, 0); KUNIT_EXPECT_TRUE(test, @@ -728,6 +742,8 @@ static void drm_vc4_test_pv_muxing_invalid(struct kunit *test) vc4 = priv->vc4; drm = &vc4->base; + +retry: state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, state); @@ -735,10 +751,22 @@ static void drm_vc4_test_pv_muxing_invalid(struct kunit *test) enum vc4_encoder_type enc_type = params->encoders[i]; ret = vc4_mock_atomic_add_output(test, state, enc_type); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry; + } KUNIT_ASSERT_EQ(test, ret, 0); } ret = drm_atomic_check_only(state); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry; + } KUNIT_EXPECT_LT(test, ret, 0); drm_modeset_drop_locks(&ctx); @@ -813,13 +841,26 @@ static void drm_test_vc5_pv_muxing_bugs_subsequent_crtc_enable(struct kunit *tes drm_modeset_acquire_init(&ctx, 0); drm = &vc4->base; +retry_first: state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, state); ret = vc4_mock_atomic_add_output(test, state, VC4_ENCODER_TYPE_HDMI0); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry_first; + } KUNIT_ASSERT_EQ(test, ret, 0); ret = drm_atomic_check_only(state); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry_first; + } KUNIT_ASSERT_EQ(test, ret, 0); new_hvs_state = vc4_hvs_get_new_global_state(state); @@ -836,13 +877,26 @@ static void drm_test_vc5_pv_muxing_bugs_subsequent_crtc_enable(struct kunit *tes ret = drm_atomic_helper_swap_state(state, false); KUNIT_ASSERT_EQ(test, ret, 0); +retry_second: state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, state); ret = vc4_mock_atomic_add_output(test, state, VC4_ENCODER_TYPE_HDMI1); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry_second; + } KUNIT_ASSERT_EQ(test, ret, 0); ret = drm_atomic_check_only(state); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry_second; + } KUNIT_ASSERT_EQ(test, ret, 0); new_hvs_state = vc4_hvs_get_new_global_state(state); @@ -887,16 +941,35 @@ static void drm_test_vc5_pv_muxing_bugs_stable_fifo(struct kunit *test) drm_modeset_acquire_init(&ctx, 0); drm = &vc4->base; +retry_first: state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, state); ret = vc4_mock_atomic_add_output(test, state, VC4_ENCODER_TYPE_HDMI0); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry_first; + } KUNIT_ASSERT_EQ(test, ret, 0); ret = vc4_mock_atomic_add_output(test, state, VC4_ENCODER_TYPE_HDMI1); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry_first; + } KUNIT_ASSERT_EQ(test, ret, 0); ret = drm_atomic_check_only(state); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry_first; + } KUNIT_ASSERT_EQ(test, ret, 0); new_hvs_state = vc4_hvs_get_new_global_state(state); @@ -921,13 +994,26 @@ static void drm_test_vc5_pv_muxing_bugs_stable_fifo(struct kunit *test) ret = drm_atomic_helper_swap_state(state, false); KUNIT_ASSERT_EQ(test, ret, 0); +retry_second: state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, state); ret = vc4_mock_atomic_del_output(test, state, VC4_ENCODER_TYPE_HDMI0); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry_second; + } KUNIT_ASSERT_EQ(test, ret, 0); ret = drm_atomic_check_only(state); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry_second; + } KUNIT_ASSERT_EQ(test, ret, 0); new_hvs_state = vc4_hvs_get_new_global_state(state); @@ -981,25 +1067,50 @@ drm_test_vc5_pv_muxing_bugs_subsequent_crtc_enable_too_many_crtc_state(struct ku drm_modeset_acquire_init(&ctx, 0); drm = &vc4->base; +retry_first: state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, state); ret = vc4_mock_atomic_add_output(test, state, VC4_ENCODER_TYPE_HDMI0); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry_first; + } KUNIT_ASSERT_EQ(test, ret, 0); ret = drm_atomic_check_only(state); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry_first; + } KUNIT_ASSERT_EQ(test, ret, 0); - ret = drm_atomic_helper_swap_state(state, false); KUNIT_ASSERT_EQ(test, ret, 0); +retry_second: state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, state); ret = vc4_mock_atomic_add_output(test, state, VC4_ENCODER_TYPE_HDMI1); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry_second; + } KUNIT_ASSERT_EQ(test, ret, 0); ret = drm_atomic_check_only(state); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry_second; + } KUNIT_ASSERT_EQ(test, ret, 0); new_vc4_crtc_state = get_vc4_crtc_state_for_encoder(test, state, From a4ff7391c8b75b1541900bd9d0c238e558c11fb3 Mon Sep 17 00:00:00 2001 From: Charles Han Date: Thu, 27 Mar 2025 12:04:35 +0800 Subject: [PATCH 2159/2924] drm/amd/pp: Fix potential NULL pointer dereference in atomctrl_initialize_mc_reg_table [ Upstream commit 820116a39f96bdc7d426c33a804b52f53700a919 ] The function atomctrl_initialize_mc_reg_table() and atomctrl_initialize_mc_reg_table_v2_2() does not check the return value of smu_atom_get_data_table(). If smu_atom_get_data_table() fails to retrieve vram_info, it returns NULL which is later dereferenced. Fixes: b3892e2bb519 ("drm/amd/pp: Use atombios api directly in powerplay (v2)") Fixes: 5f92b48cf62c ("drm/amd/pm: add mc register table initialization") Signed-off-by: Charles Han Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c index 4bd92fd782be6a..8d40ed0f0e8383 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c @@ -143,6 +143,10 @@ int atomctrl_initialize_mc_reg_table( vram_info = (ATOM_VRAM_INFO_HEADER_V2_1 *) smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, VRAM_Info), &size, &frev, &crev); + if (!vram_info) { + pr_err("Could not retrieve the VramInfo table!"); + return -EINVAL; + } if (module_index >= vram_info->ucNumOfVRAMModule) { pr_err("Invalid VramInfo table."); @@ -180,6 +184,10 @@ int atomctrl_initialize_mc_reg_table_v2_2( vram_info = (ATOM_VRAM_INFO_HEADER_V2_2 *) smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, VRAM_Info), &size, &frev, &crev); + if (!vram_info) { + pr_err("Could not retrieve the VramInfo table!"); + return -EINVAL; + } if (module_index >= vram_info->ucNumOfVRAMModule) { pr_err("Invalid VramInfo table."); From b20a022de996d4cbb22f5671c5979e649a034e73 Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Tue, 25 Feb 2025 10:40:33 +0100 Subject: [PATCH 2160/2924] media: rkvdec: Fix frame size enumeration [ Upstream commit f270005b99fa19fee9a6b4006e8dee37c10f1944 ] The VIDIOC_ENUM_FRAMESIZES ioctl should return all frame sizes (i.e. width and height in pixels) that the device supports for the given pixel format. It doesn't make a lot of sense to return the frame-sizes in a stepwise manner, which is used to enforce hardware alignments requirements for CAPTURE buffers, for coded formats. Instead, applications should receive an indication, about the maximum supported frame size for that hardware decoder, via a continuous frame-size enumeration. Fixes: cd33c830448b ("media: rkvdec: Add the rkvdec driver") Suggested-by: Alex Bee Signed-off-by: Jonas Karlman Reviewed-by: Nicolas Dufresne Signed-off-by: Nicolas Dufresne Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/staging/media/rkvdec/rkvdec.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/staging/media/rkvdec/rkvdec.c b/drivers/staging/media/rkvdec/rkvdec.c index f9bef5173bf25c..a9bfd5305410c2 100644 --- a/drivers/staging/media/rkvdec/rkvdec.c +++ b/drivers/staging/media/rkvdec/rkvdec.c @@ -213,8 +213,14 @@ static int rkvdec_enum_framesizes(struct file *file, void *priv, if (!fmt) return -EINVAL; - fsize->type = V4L2_FRMSIZE_TYPE_STEPWISE; - fsize->stepwise = fmt->frmsize; + fsize->type = V4L2_FRMSIZE_TYPE_CONTINUOUS; + fsize->stepwise.min_width = 1; + fsize->stepwise.max_width = fmt->frmsize.max_width; + fsize->stepwise.step_width = 1; + fsize->stepwise.min_height = 1; + fsize->stepwise.max_height = fmt->frmsize.max_height; + fsize->stepwise.step_height = 1; + return 0; } From f99eac1e9874c31f68dd088dbccca810daee6863 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 9 Apr 2025 17:39:58 +0100 Subject: [PATCH 2161/2924] arm64/fpsimd: Avoid RES0 bits in the SME trap handler [ Upstream commit 95507570fb2f75544af69760cd5d8f48fc5c7f20 ] The SME trap handler consumes RES0 bits from the ESR when determining the reason for the trap, and depends upon those bits reading as zero. This may break in future when those RES0 bits are allocated a meaning and stop reading as zero. For SME traps taken with ESR_ELx.EC == 0b011101, the specific reason for the trap is indicated by ESR_ELx.ISS.SMTC ("SME Trap Code"). This field occupies bits [2:0] of ESR_ELx.ISS, and as of ARM DDI 0487 L.a, bits [24:3] of ESR_ELx.ISS are RES0. ESR_ELx.ISS itself occupies bits [24:0] of ESR_ELx. Extract the SMTC field specifically, matching the way we handle ESR_ELx fields elsewhere, and ensuring that the handler is future-proof. Fixes: 8bd7f91c03d8 ("arm64/sme: Implement traps and syscall handling for SME") Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250409164010.3480271-2-mark.rutland@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/include/asm/esr.h | 14 ++++++++------ arch/arm64/kernel/fpsimd.c | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index e4f77757937e65..71f0cbf7b28872 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -378,12 +378,14 @@ /* * ISS values for SME traps */ - -#define ESR_ELx_SME_ISS_SME_DISABLED 0 -#define ESR_ELx_SME_ISS_ILL 1 -#define ESR_ELx_SME_ISS_SM_DISABLED 2 -#define ESR_ELx_SME_ISS_ZA_DISABLED 3 -#define ESR_ELx_SME_ISS_ZT_DISABLED 4 +#define ESR_ELx_SME_ISS_SMTC_MASK GENMASK(2, 0) +#define ESR_ELx_SME_ISS_SMTC(esr) ((esr) & ESR_ELx_SME_ISS_SMTC_MASK) + +#define ESR_ELx_SME_ISS_SMTC_SME_DISABLED 0 +#define ESR_ELx_SME_ISS_SMTC_ILL 1 +#define ESR_ELx_SME_ISS_SMTC_SM_DISABLED 2 +#define ESR_ELx_SME_ISS_SMTC_ZA_DISABLED 3 +#define ESR_ELx_SME_ISS_SMTC_ZT_DISABLED 4 /* ISS field definitions for MOPS exceptions */ #define ESR_ELx_MOPS_ISS_MEM_INST (UL(1) << 24) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 8370d55f035334..1ee5f330b8ed35 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1436,7 +1436,7 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs) * If this not a trap due to SME being disabled then something * is being used in the wrong mode, report as SIGILL. */ - if (ESR_ELx_ISS(esr) != ESR_ELx_SME_ISS_SME_DISABLED) { + if (ESR_ELx_SME_ISS_SMTC(esr) != ESR_ELx_SME_ISS_SMTC_SME_DISABLED) { force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); return; } From c4a4786d93e99517d6f10ed56b9ffba4ce88d3b3 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 9 Apr 2025 17:40:02 +0100 Subject: [PATCH 2162/2924] arm64/fpsimd: Discard stale CPU state when handling SME traps [ Upstream commit d3eaab3c70905c5467e5c4ea403053d67505adeb ] The logic for handling SME traps manipulates saved FPSIMD/SVE/SME state incorrectly, and a race with preemption can result in a task having TIF_SME set and TIF_FOREIGN_FPSTATE clear even though the live CPU state is stale (e.g. with SME traps enabled). This can result in warnings from do_sme_acc() where SME traps are not expected while TIF_SME is set: | /* With TIF_SME userspace shouldn't generate any traps */ | if (test_and_set_thread_flag(TIF_SME)) | WARN_ON(1); This is very similar to the SVE issue we fixed in commit: 751ecf6afd6568ad ("arm64/sve: Discard stale CPU state when handling SVE traps") The race can occur when the SME trap handler is preempted before and after manipulating the saved FPSIMD/SVE/SME state, starting and ending on the same CPU, e.g. | void do_sme_acc(unsigned long esr, struct pt_regs *regs) | { | // Trap on CPU 0 with TIF_SME clear, SME traps enabled | // task->fpsimd_cpu is 0. | // per_cpu_ptr(&fpsimd_last_state, 0) is task. | | ... | | // Preempted; migrated from CPU 0 to CPU 1. | // TIF_FOREIGN_FPSTATE is set. | | get_cpu_fpsimd_context(); | | /* With TIF_SME userspace shouldn't generate any traps */ | if (test_and_set_thread_flag(TIF_SME)) | WARN_ON(1); | | if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { | unsigned long vq_minus_one = | sve_vq_from_vl(task_get_sme_vl(current)) - 1; | sme_set_vq(vq_minus_one); | | fpsimd_bind_task_to_cpu(); | } | | put_cpu_fpsimd_context(); | | // Preempted; migrated from CPU 1 to CPU 0. | // task->fpsimd_cpu is still 0 | // If per_cpu_ptr(&fpsimd_last_state, 0) is still task then: | // - Stale HW state is reused (with SME traps enabled) | // - TIF_FOREIGN_FPSTATE is cleared | // - A return to userspace skips HW state restore | } Fix the case where the state is not live and TIF_FOREIGN_FPSTATE is set by calling fpsimd_flush_task_state() to detach from the saved CPU state. This ensures that a subsequent context switch will not reuse the stale CPU state, and will instead set TIF_FOREIGN_FPSTATE, forcing the new state to be reloaded from memory prior to a return to userspace. Note: this was originallly posted as [1]. Fixes: 8bd7f91c03d8 ("arm64/sme: Implement traps and syscall handling for SME") Reported-by: Mark Rutland Signed-off-by: Mark Brown Link: https://lore.kernel.org/linux-arm-kernel/20241204-arm64-sme-reenable-v2-1-bae87728251d@kernel.org/ [ Rutland: rewrite commit message ] Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Will Deacon Link: https://lore.kernel.org/r/20250409164010.3480271-6-mark.rutland@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/kernel/fpsimd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 1ee5f330b8ed35..c92c0a08370b2f 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1460,6 +1460,8 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs) sme_set_vq(vq_minus_one); fpsimd_bind_task_to_cpu(); + } else { + fpsimd_flush_task_state(current); } put_cpu_fpsimd_context(); From 87135d60ecf4fefc657d2e6bc8d00a638f08a570 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 9 Apr 2025 17:40:03 +0100 Subject: [PATCH 2163/2924] arm64/fpsimd: Don't corrupt FPMR when streaming mode changes [ Upstream commit e5fa85fce08b21ed41643cb7968bf66bbd0532e3 ] When the effective value of PSTATE.SM is changed from 0 to 1 or from 1 to 0 by any method, an entry or exit to/from streaming SVE mode is performed, and hardware automatically resets a number of registers. As of ARM DDI 0487 L.a, this means: * All implemented bits of the SVE vector registers are set to zero. * All implemented bits of the SVE predicate registers are set to zero. * All implemented bits of FFR are set to zero, if FFR is implemented in the new mode. * FPSR is set to 0x0000_0000_0800_009f. * FPMR is set to 0, if FPMR is implemented. Currently task_fpsimd_load() restores FPMR before restoring SVCR (which is an accessor for PSTATE.{SM,ZA}), and so the restored value of FPMR may be clobbered if the restored value of PSTATE.SM happens to differ from the initial value of PSTATE.SM. Fix this by moving the restore of FPMR later. Note: this was originally posted as [1]. Fixes: 203f2b95a882 ("arm64/fpsimd: Support FEAT_FPMR") Signed-off-by: Mark Brown Link: https://lore.kernel.org/linux-arm-kernel/20241204-arm64-sme-reenable-v2-2-bae87728251d@kernel.org/ [ Rutland: rewrite commit message ] Signed-off-by: Mark Rutland Link: https://lore.kernel.org/r/20250409164010.3480271-7-mark.rutland@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/kernel/fpsimd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index c92c0a08370b2f..9466b3c5021aca 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -359,9 +359,6 @@ static void task_fpsimd_load(void) WARN_ON(preemptible()); WARN_ON(test_thread_flag(TIF_KERNEL_FPSTATE)); - if (system_supports_fpmr()) - write_sysreg_s(current->thread.uw.fpmr, SYS_FPMR); - if (system_supports_sve() || system_supports_sme()) { switch (current->thread.fp_type) { case FP_STATE_FPSIMD: @@ -413,6 +410,9 @@ static void task_fpsimd_load(void) restore_ffr = system_supports_fa64(); } + if (system_supports_fpmr()) + write_sysreg_s(current->thread.uw.fpmr, SYS_FPMR); + if (restore_sve_regs) { WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE); sve_load_state(sve_pffr(¤t->thread), From a305821f597ec943849d3e53924adb88c61ed682 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 9 Apr 2025 17:40:04 +0100 Subject: [PATCH 2164/2924] arm64/fpsimd: Avoid clobbering kernel FPSIMD state with SMSTOP [ Upstream commit 01098d893fa8a6edb2b56e178b798e3e6b674f02 ] On system with SME, a thread's kernel FPSIMD state may be erroneously clobbered during a context switch immediately after that state is restored. Systems without SME are unaffected. If the CPU happens to be in streaming SVE mode before a context switch to a thread with kernel FPSIMD state, fpsimd_thread_switch() will restore the kernel FPSIMD state using fpsimd_load_kernel_state() while the CPU is still in streaming SVE mode. When fpsimd_thread_switch() subsequently calls fpsimd_flush_cpu_state(), this will execute an SMSTOP, causing an exit from streaming SVE mode. The exit from streaming SVE mode will cause the hardware to reset a number of FPSIMD/SVE/SME registers, clobbering the FPSIMD state. Fix this by calling fpsimd_flush_cpu_state() before restoring the kernel FPSIMD state. Fixes: e92bee9f861b ("arm64/fpsimd: Avoid erroneous elide of user state reload") Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: Mark Brown Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250409164010.3480271-8-mark.rutland@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/kernel/fpsimd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 9466b3c5021aca..72ab9649c705ba 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1575,8 +1575,8 @@ void fpsimd_thread_switch(struct task_struct *next) fpsimd_save_user_state(); if (test_tsk_thread_flag(next, TIF_KERNEL_FPSTATE)) { - fpsimd_load_kernel_state(next); fpsimd_flush_cpu_state(); + fpsimd_load_kernel_state(next); } else { /* * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's From 00a1a3fb553338eb74585d7df7b2bec20c80845a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 9 Apr 2025 17:40:05 +0100 Subject: [PATCH 2165/2924] arm64/fpsimd: Reset FPMR upon exec() [ Upstream commit a90878f297d3dba906a6261deccb1bd4a791ba52 ] An exec() is expected to reset all FPSIMD/SVE/SME state, and barring special handling of the vector lengths, the state is expected to reset to zero. This reset is handled in fpsimd_flush_thread(), which the core exec() code calls via flush_thread(). When support was added for FPMR, no logic was added to fpsimd_flush_thread() to reset the FPMR value, and thus it is erroneously inherited across an exec(). Add the missing reset of FPMR. Fixes: 203f2b95a882 ("arm64/fpsimd: Support FEAT_FPMR") Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250409164010.3480271-9-mark.rutland@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/kernel/fpsimd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 72ab9649c705ba..7ae01f02e18b72 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1663,6 +1663,9 @@ void fpsimd_flush_thread(void) current->thread.svcr = 0; } + if (system_supports_fpmr()) + current->thread.uw.fpmr = 0; + current->thread.fp_type = FP_STATE_FPSIMD; put_cpu_fpsimd_context(); From 06111746ea7a7a41409aad2f175ca3a02482b80e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 9 Apr 2025 17:40:06 +0100 Subject: [PATCH 2166/2924] arm64/fpsimd: Fix merging of FPSIMD state during signal return [ Upstream commit c94f2f326146a34066a0070ed90b8bc656b1842f ] For backwards compatibility reasons, when a signal return occurs which restores SVE state, the effective lower 128 bits of each of the SVE vector registers are restored from the corresponding FPSIMD vector register in the FPSIMD signal frame, overriding the values in the SVE signal frame. This is intended to be the case regardless of streaming mode. To make this happen, restore_sve_fpsimd_context() uses fpsimd_update_current_state() to merge the lower 128 bits from the FPSIMD signal frame into the SVE register state. Unfortunately, fpsimd_update_current_state() performs this merging dependent upon TIF_SVE, which is not always correct for streaming SVE register state: * When restoring non-streaming SVE register state there is no observable problem, as the signal return code configures TIF_SVE and the saved fp_type to match before calling fpsimd_update_current_state(), which observes either: - TIF_SVE set AND fp_type == FP_STATE_SVE - TIF_SVE clear AND fp_type == FP_STATE_FPSIMD * On systems which have SME but not SVE, TIF_SVE cannot be set. Thus the merging will never happen for the streaming SVE register state. * On systems which have SVE and SME, TIF_SVE can be set and cleared independently of PSTATE.SM. Thus the merging may or may not happen for streaming SVE register state. As TIF_SVE can be cleared non-deterministically during syscalls (including at the start of sigreturn()), the merging may occur non-deterministically from the perspective of userspace. This logic has been broken since its introduction in commit: 85ed24dad2904f7c ("arm64/sme: Implement streaming SVE signal handling") ... at which point both fpsimd_signal_preserve_current_state() and fpsimd_update_current_state() only checked TIF SVE. When PSTATE.SM==1 and TIF_SVE was clear, signal delivery would place stale FPSIMD state into the FPSIMD signal frame, and signal return would not merge this into the restored register state. Subsequently, signal delivery was fixed as part of commit: 61da7c8e2a602f66 ("arm64/signal: Don't assume that TIF_SVE means we saved SVE state") ... but signal restore was not given a corresponding fix, and when TIF_SVE was clear, signal restore would still fail to merge the FPSIMD state into the restored SVE register state. The 'Fixes' tag did not indicate that this had been broken since its introduction. Fix this by merging the FPSIMD state dependent upon the saved fp_type, matching what we (currently) do during signal delivery. As described above, when backporting this commit, it will also be necessary to backport commit: 61da7c8e2a602f66 ("arm64/signal: Don't assume that TIF_SVE means we saved SVE state") ... and prior to commit: baa8515281b30861 ("arm64/fpsimd: Track the saved FPSIMD state type separately to TIF_SVE") ... it will be necessary for fpsimd_signal_preserve_current_state() and fpsimd_update_current_state() to consider both TIF_SVE and thread_sm_enabled(¤t->thread), in place of the saved fp_type. Fixes: 85ed24dad290 ("arm64/sme: Implement streaming SVE signal handling") Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20250409164010.3480271-10-mark.rutland@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/kernel/fpsimd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 7ae01f02e18b72..1edf797d2c7103 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1806,7 +1806,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) get_cpu_fpsimd_context(); current->thread.uw.fpsimd_state = *state; - if (test_thread_flag(TIF_SVE)) + if (current->thread.fp_type == FP_STATE_SVE) fpsimd_to_sve(current); task_fpsimd_load(); From 94bc1f753220d3af101ab14b97b1a465e36776ed Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 4 Apr 2025 10:09:29 +0200 Subject: [PATCH 2167/2924] drm/panthor: Fix GPU_COHERENCY_ACE[_LITE] definitions [ Upstream commit d1df2907fb69df56aad8e4a0734dac0778c234a7 ] GPU_COHERENCY_ACE and GPU_COHERENCY_ACE_LITE definitions have been swapped. Changes in v2: - New patch Changes in v3: - Add Steve's R-b Reported-by: Liviu Dudau Fixes: 546b366600ef ("drm/panthor: Add GPU register definitions") Reviewed-by: Steven Price Reviewed-by: Liviu Dudau Link: https://lore.kernel.org/r/20250404080933.2912674-2-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon Signed-off-by: Sasha Levin --- drivers/gpu/drm/panthor/panthor_regs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_regs.h b/drivers/gpu/drm/panthor/panthor_regs.h index b7b3b3add16627..a7a323dc5cf92a 100644 --- a/drivers/gpu/drm/panthor/panthor_regs.h +++ b/drivers/gpu/drm/panthor/panthor_regs.h @@ -133,8 +133,8 @@ #define GPU_COHERENCY_PROT_BIT(name) BIT(GPU_COHERENCY_ ## name) #define GPU_COHERENCY_PROTOCOL 0x304 -#define GPU_COHERENCY_ACE 0 -#define GPU_COHERENCY_ACE_LITE 1 +#define GPU_COHERENCY_ACE_LITE 0 +#define GPU_COHERENCY_ACE 1 #define GPU_COHERENCY_NONE 31 #define MCU_CONTROL 0x700 From 73bdbf12fbd4a067fd624d3bddd989c09a0a21c0 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 4 Apr 2025 10:09:30 +0200 Subject: [PATCH 2168/2924] drm/panthor: Call panthor_gpu_coherency_init() after PM resume() [ Upstream commit 7d5a3b22f5b58ef89ab8770d7a44c24eecde8d66 ] When the device is coherent, panthor_gpu_coherency_init() will read GPU_COHERENCY_FEATURES to make sure the GPU supports the ACE-Lite coherency protocol, which will fail if the clocks/power-domains are not enabled when the read is done. Move the panthor_gpu_coherency_init() call after the device has been resumed to prevent that. Changes in v2: - Add Liviu's R-b Changes in v3: - Add Steve's R-b Fixes: dd7db8d911a1 ("drm/panthor: Explicitly set the coherency mode") Reviewed-by: Liviu Dudau Reviewed-by: Steven Price Link: https://lore.kernel.org/r/20250404080933.2912674-3-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon Signed-off-by: Sasha Levin --- drivers/gpu/drm/panthor/panthor_device.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c index a9da1d1eeb7071..c73c1608d6e68c 100644 --- a/drivers/gpu/drm/panthor/panthor_device.c +++ b/drivers/gpu/drm/panthor/panthor_device.c @@ -171,10 +171,6 @@ int panthor_device_init(struct panthor_device *ptdev) struct page *p; int ret; - ret = panthor_gpu_coherency_init(ptdev); - if (ret) - return ret; - init_completion(&ptdev->unplug.done); ret = drmm_mutex_init(&ptdev->base, &ptdev->unplug.lock); if (ret) @@ -247,6 +243,10 @@ int panthor_device_init(struct panthor_device *ptdev) if (ret) goto err_rpm_put; + ret = panthor_gpu_coherency_init(ptdev); + if (ret) + return ret; + ret = panthor_mmu_init(ptdev); if (ret) goto err_unplug_gpu; From 84bd2b049cc4dfccb5e20833b4e4eef5d79ec691 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 4 Apr 2025 10:09:31 +0200 Subject: [PATCH 2169/2924] drm/panthor: Update panthor_mmu::irq::mask when needed [ Upstream commit 8ba64cf2f358079d09faba7529aad2b0a46c7903 ] When we clear the faulty bits in the AS mask, we also need to update the panthor_mmu::irq::mask field otherwise our IRQ handler won't get called again until the GPU is reset. Changes in v2: - Add Liviu's R-b Changes in v3: - Add Steve's R-b Fixes: 647810ec2476 ("drm/panthor: Add the MMU/VM logical block") Reviewed-by: Liviu Dudau Reviewed-by: Steven Price Link: https://lore.kernel.org/r/20250404080933.2912674-4-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon Signed-off-by: Sasha Levin --- drivers/gpu/drm/panthor/panthor_mmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index 12a02e28f50fd8..7cca97d298ea10 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -781,6 +781,7 @@ int panthor_vm_active(struct panthor_vm *vm) if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as)) { gpu_write(ptdev, MMU_INT_CLEAR, panthor_mmu_as_fault_mask(ptdev, as)); ptdev->mmu->as.faulty_mask &= ~panthor_mmu_as_fault_mask(ptdev, as); + ptdev->mmu->irq.mask |= panthor_mmu_as_fault_mask(ptdev, as); gpu_write(ptdev, MMU_INT_MASK, ~ptdev->mmu->as.faulty_mask); } From 582a04abbd3061331e25c36facc10b5908352ec8 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Wed, 9 Apr 2025 14:00:13 -0700 Subject: [PATCH 2170/2924] accel/amdxdna: Fix incorrect size of ERT_START_NPU commands [ Upstream commit 6c161732ea6467c6dea0c35810ca8e8d1ae135f1 ] When multiple ERT_START_NPU commands are combined in one buffer, the buffer size calculation is incorrect. Also, the condition to make sure the buffer size is not beyond 4K is also fixed. Fixes: aac243092b70 ("accel/amdxdna: Add command execution") Reviewed-by: Jacek Lawrynowicz Reviewed-by: Maciej Falkowski Signed-off-by: Lizhi Hou Link: https://lore.kernel.org/r/20250409210013.10854-1-lizhi.hou@amd.com Signed-off-by: Sasha Levin --- drivers/accel/amdxdna/aie2_message.c | 6 +++--- drivers/accel/amdxdna/aie2_msg_priv.h | 10 ++++------ 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index bf4219e32cc19d..82412eec9a4b8b 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -525,7 +525,7 @@ aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset, if (!payload) return -EINVAL; - if (!slot_cf_has_space(offset, payload_len)) + if (!slot_has_space(*buf, offset, payload_len)) return -ENOSPC; buf->cu_idx = cu_idx; @@ -558,7 +558,7 @@ aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset, if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE) return -EINVAL; - if (!slot_dpu_has_space(offset, arg_sz)) + if (!slot_has_space(*buf, offset, arg_sz)) return -ENOSPC; buf->inst_buf_addr = sn->buffer; @@ -569,7 +569,7 @@ aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset, memcpy(buf->args, sn->prop_args, arg_sz); /* Accurate buf size to hint firmware to do necessary copy */ - *size += sizeof(*buf) + arg_sz; + *size = sizeof(*buf) + arg_sz; return 0; } diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h index 4e02e744b470eb..6df9065b13f685 100644 --- a/drivers/accel/amdxdna/aie2_msg_priv.h +++ b/drivers/accel/amdxdna/aie2_msg_priv.h @@ -319,18 +319,16 @@ struct async_event_msg_resp { } __packed; #define MAX_CHAIN_CMDBUF_SIZE SZ_4K -#define slot_cf_has_space(offset, payload_size) \ - (MAX_CHAIN_CMDBUF_SIZE - ((offset) + (payload_size)) > \ - offsetof(struct cmd_chain_slot_execbuf_cf, args[0])) +#define slot_has_space(slot, offset, payload_size) \ + (MAX_CHAIN_CMDBUF_SIZE >= (offset) + (payload_size) + \ + sizeof(typeof(slot))) + struct cmd_chain_slot_execbuf_cf { __u32 cu_idx; __u32 arg_cnt; __u32 args[] __counted_by(arg_cnt); }; -#define slot_dpu_has_space(offset, payload_size) \ - (MAX_CHAIN_CMDBUF_SIZE - ((offset) + (payload_size)) > \ - offsetof(struct cmd_chain_slot_dpu, args[0])) struct cmd_chain_slot_dpu { __u64 inst_buf_addr; __u32 inst_size; From abc7c588c68bc528132910636707c92c80bfbc6a Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 14 Apr 2025 15:01:20 +0200 Subject: [PATCH 2171/2924] drm/panthor: Fix the panthor_gpu_coherency_init() error path [ Upstream commit 938aaed555f3add76531cd204e2d4128ecb84ace ] The panthor_gpu_coherency_init() call has been moved around, but the error path hasn't been adjusted accordingly. Make sure we undo what has been done before this call in case of failure. Fixes: 7d5a3b22f5b5 ("drm/panthor: Call panthor_gpu_coherency_init() after PM resume()") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/dri-devel/4da470aa-4f84-460e-aff8-dabc8cc4da15@stanley.mountain/T/#t Reviewed-by: Steven Price Reviewed-by: Liviu Dudau Link: https://lore.kernel.org/r/20250414130120.581274-1-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon Signed-off-by: Sasha Levin --- drivers/gpu/drm/panthor/panthor_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c index c73c1608d6e68c..1e8811c6716dfa 100644 --- a/drivers/gpu/drm/panthor/panthor_device.c +++ b/drivers/gpu/drm/panthor/panthor_device.c @@ -245,7 +245,7 @@ int panthor_device_init(struct panthor_device *ptdev) ret = panthor_gpu_coherency_init(ptdev); if (ret) - return ret; + goto err_unplug_gpu; ret = panthor_mmu_init(ptdev); if (ret) From 72caf9886e9c1731cf7bfe3eabc308b9268b21d6 Mon Sep 17 00:00:00 2001 From: Hongbo Yao Date: Thu, 3 Apr 2025 15:09:18 +0800 Subject: [PATCH 2172/2924] perf: arm-ni: Unregister PMUs on probe failure [ Upstream commit 7f57afde6a44d9e044885e1125034edd4fda02e8 ] When a resource allocation fails in one clock domain of an NI device, we need to properly roll back all previously registered perf PMUs in other clock domains of the same device. Otherwise, it can lead to kernel panics. Calling arm_ni_init+0x0/0xff8 [arm_ni] @ 2374 arm-ni ARMHCB70:00: Failed to request PMU region 0x1f3c13000 arm-ni ARMHCB70:00: probe with driver arm-ni failed with error -16 list_add corruption: next->prev should be prev (fffffd01e9698a18), but was 0000000000000000. (next=ffff10001a0decc8). pstate: 6340009 (nZCv daif +PAN -UAO +TCO +DIT -SSBS BTYPE=--) pc : list_add_valid_or_report+0x7c/0xb8 lr : list_add_valid_or_report+0x7c/0xb8 Call trace: __list_add_valid_or_report+0x7c/0xb8 perf_pmu_register+0x22c/0x3a0 arm_ni_probe+0x554/0x70c [arm_ni] platform_probe+0x70/0xe8 really_probe+0xc6/0x4d8 driver_probe_device+0x48/0x170 __driver_attach+0x8e/0x1c0 bus_for_each_dev+0x64/0xf0 driver_add+0x138/0x260 bus_add_driver+0x68/0x138 __platform_driver_register+0x2c/0x40 arm_ni_init+0x14/0x2a [arm_ni] do_init_module+0x36/0x298 ---[ end trace 0000000000000000 ]--- Kernel panic - not syncing: Oops - BUG: Fatal exception SMP: stopping secondary CPUs Fixes: 4d5a7680f2b4 ("perf: Add driver for Arm NI-700 interconnect PMU") Signed-off-by: Hongbo Yao Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20250403070918.4153839-1-andy.xu@hj-micro.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/perf/arm-ni.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/drivers/perf/arm-ni.c b/drivers/perf/arm-ni.c index fd7a5e60e96302..0c418a7ee10f31 100644 --- a/drivers/perf/arm-ni.c +++ b/drivers/perf/arm-ni.c @@ -575,6 +575,23 @@ static int arm_ni_init_cd(struct arm_ni *ni, struct arm_ni_node *node, u64 res_s return err; } +static void arm_ni_remove(struct platform_device *pdev) +{ + struct arm_ni *ni = platform_get_drvdata(pdev); + + for (int i = 0; i < ni->num_cds; i++) { + struct arm_ni_cd *cd = ni->cds + i; + + if (!cd->pmu_base) + continue; + + writel_relaxed(0, cd->pmu_base + NI_PMCR); + writel_relaxed(U32_MAX, cd->pmu_base + NI_PMINTENCLR); + perf_pmu_unregister(&cd->pmu); + cpuhp_state_remove_instance_nocalls(arm_ni_hp_state, &cd->cpuhp_node); + } +} + static void arm_ni_probe_domain(void __iomem *base, struct arm_ni_node *node) { u32 reg = readl_relaxed(base + NI_NODE_TYPE); @@ -656,8 +673,11 @@ static int arm_ni_probe(struct platform_device *pdev) reg = readl_relaxed(pd.base + NI_CHILD_PTR(c)); arm_ni_probe_domain(base + reg, &cd); ret = arm_ni_init_cd(ni, &cd, res->start); - if (ret) + if (ret) { + ni->cds[cd.id].pmu_base = NULL; + arm_ni_remove(pdev); return ret; + } } } } @@ -665,23 +685,6 @@ static int arm_ni_probe(struct platform_device *pdev) return 0; } -static void arm_ni_remove(struct platform_device *pdev) -{ - struct arm_ni *ni = platform_get_drvdata(pdev); - - for (int i = 0; i < ni->num_cds; i++) { - struct arm_ni_cd *cd = ni->cds + i; - - if (!cd->pmu_base) - continue; - - writel_relaxed(0, cd->pmu_base + NI_PMCR); - writel_relaxed(U32_MAX, cd->pmu_base + NI_PMINTENCLR); - perf_pmu_unregister(&cd->pmu); - cpuhp_state_remove_instance_nocalls(arm_ni_hp_state, &cd->cpuhp_node); - } -} - #ifdef CONFIG_OF static const struct of_device_id arm_ni_of_match[] = { { .compatible = "arm,ni-700" }, From 498129a868ce966af277841f7e32e9f76b3779ad Mon Sep 17 00:00:00 2001 From: Hongbo Yao Date: Tue, 1 Apr 2025 13:42:48 +0800 Subject: [PATCH 2173/2924] perf: arm-ni: Fix missing platform_set_drvdata() [ Upstream commit fc5106088d6db75df61308ef6de314d1f7959646 ] Add missing platform_set_drvdata in arm_ni_probe(), otherwise calling platform_get_drvdata() in remove returns NULL. Fixes: 4d5a7680f2b4 ("perf: Add driver for Arm NI-700 interconnect PMU") Signed-off-by: Hongbo Yao Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20250401054248.3985814-1-andy.xu@hj-micro.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/perf/arm-ni.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/arm-ni.c b/drivers/perf/arm-ni.c index 0c418a7ee10f31..de7b6cce4d68a8 100644 --- a/drivers/perf/arm-ni.c +++ b/drivers/perf/arm-ni.c @@ -660,6 +660,7 @@ static int arm_ni_probe(struct platform_device *pdev) ni->num_cds = num_cds; ni->part = part; ni->id = atomic_fetch_inc(&id); + platform_set_drvdata(pdev, ni); for (int v = 0; v < cfg.num_components; v++) { reg = readl_relaxed(cfg.base + NI_CHILD_PTR(v)); From 7662022dd5a9849d6e060a19391ad896ece4b96f Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Thu, 17 Apr 2025 21:05:35 +0530 Subject: [PATCH 2174/2924] drm/amdgpu: Refine Cleaner Shader MEC firmware version for GFX10.1.x GPUs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d30f61076268fd7ce01e4ec9e4d84bfaf90365f7 ] Update the minimum firmware version for the Cleaner Shader in the gfx_v10_0_sw_init function. This change adjusts the minimum required firmware version for the MEC firmware from 152 to 151, allowing for broader compatibility with GFX10.1 GPUs. Fixes: 25961bad9212 ("drm/amdgpu/gfx10: Add cleaner shader for GFX10.1.10") Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Acked-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 23e6a05359c24b..c68c2e2f4d61aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4800,7 +4800,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.cleaner_shader_size = sizeof(gfx_10_1_10_cleaner_shader_hex); if (adev->gfx.me_fw_version >= 101 && adev->gfx.pfp_fw_version >= 158 && - adev->gfx.mec_fw_version >= 152) { + adev->gfx.mec_fw_version >= 151) { adev->gfx.enable_cleaner_shader = true; r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); if (r) { From b6d6a26b162798e657f15045f624fb4832bdd8c8 Mon Sep 17 00:00:00 2001 From: Casey Connolly Date: Sat, 19 Apr 2025 18:31:44 +0200 Subject: [PATCH 2175/2924] drm/panel: samsung-sofef00: Drop s6e3fc2x01 support [ Upstream commit e1eb7293ab4107e9e19fa609835e657fe30dfec7 ] We never properly supported this panel and always used the wrong init sequence. Drop support so we can move it to it's own proper driver. Fixes: 5933baa36e26 ("drm/panel/samsung-sofef00: Add panel for OnePlus 6/T devices") Signed-off-by: Casey Connolly Signed-off-by: David Heidelberg Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20250419-drop-s6e3fc2x01-support-v1-1-05edfe0d27aa@ixit.cz Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-samsung-sofef00.c | 34 ++----------------- 1 file changed, 2 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-samsung-sofef00.c b/drivers/gpu/drm/panel/panel-samsung-sofef00.c index 04ce925b3d9dbd..49cfa84b34f0ca 100644 --- a/drivers/gpu/drm/panel/panel-samsung-sofef00.c +++ b/drivers/gpu/drm/panel/panel-samsung-sofef00.c @@ -22,7 +22,6 @@ struct sofef00_panel { struct mipi_dsi_device *dsi; struct regulator *supply; struct gpio_desc *reset_gpio; - const struct drm_display_mode *mode; }; static inline @@ -159,26 +158,11 @@ static const struct drm_display_mode enchilada_panel_mode = { .height_mm = 145, }; -static const struct drm_display_mode fajita_panel_mode = { - .clock = (1080 + 72 + 16 + 36) * (2340 + 32 + 4 + 18) * 60 / 1000, - .hdisplay = 1080, - .hsync_start = 1080 + 72, - .hsync_end = 1080 + 72 + 16, - .htotal = 1080 + 72 + 16 + 36, - .vdisplay = 2340, - .vsync_start = 2340 + 32, - .vsync_end = 2340 + 32 + 4, - .vtotal = 2340 + 32 + 4 + 18, - .width_mm = 68, - .height_mm = 145, -}; - static int sofef00_panel_get_modes(struct drm_panel *panel, struct drm_connector *connector) { struct drm_display_mode *mode; - struct sofef00_panel *ctx = to_sofef00_panel(panel); - mode = drm_mode_duplicate(connector->dev, ctx->mode); + mode = drm_mode_duplicate(connector->dev, &enchilada_panel_mode); if (!mode) return -ENOMEM; @@ -239,13 +223,6 @@ static int sofef00_panel_probe(struct mipi_dsi_device *dsi) if (!ctx) return -ENOMEM; - ctx->mode = of_device_get_match_data(dev); - - if (!ctx->mode) { - dev_err(dev, "Missing device mode\n"); - return -ENODEV; - } - ctx->supply = devm_regulator_get(dev, "vddio"); if (IS_ERR(ctx->supply)) return dev_err_probe(dev, PTR_ERR(ctx->supply), @@ -295,14 +272,7 @@ static void sofef00_panel_remove(struct mipi_dsi_device *dsi) } static const struct of_device_id sofef00_panel_of_match[] = { - { // OnePlus 6 / enchilada - .compatible = "samsung,sofef00", - .data = &enchilada_panel_mode, - }, - { // OnePlus 6T / fajita - .compatible = "samsung,s6e3fc2x01", - .data = &fajita_panel_mode, - }, + { .compatible = "samsung,sofef00" }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, sofef00_panel_of_match); From 75a5b01b6437ae1854cc14b8e0f81200035c7138 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 18 Apr 2025 08:48:16 +0200 Subject: [PATCH 2176/2924] drm/bridge: lt9611uxc: Fix an error handling path in lt9611uxc_probe() [ Upstream commit b848cd418aebdb313364b4843f41fae82281a823 ] If lt9611uxc_audio_init() fails, some resources still need to be released before returning the error code. Use the existing error handling path. Fixes: 0cbbd5b1a012 ("drm: bridge: add support for lontium LT9611UXC bridge") Signed-off-by: Christophe JAILLET Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/f167608e392c6b4d7d7f6e45e3c21878feb60cbd.1744958833.git.christophe.jaillet@wanadoo.fr Signed-off-by: Dmitry Baryshkov Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/lontium-lt9611uxc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/lontium-lt9611uxc.c b/drivers/gpu/drm/bridge/lontium-lt9611uxc.c index f4c3ff1fdc6923..f6e714feeea54c 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9611uxc.c +++ b/drivers/gpu/drm/bridge/lontium-lt9611uxc.c @@ -880,7 +880,11 @@ static int lt9611uxc_probe(struct i2c_client *client) } } - return lt9611uxc_audio_init(dev, lt9611uxc); + ret = lt9611uxc_audio_init(dev, lt9611uxc); + if (ret) + goto err_remove_bridge; + + return 0; err_remove_bridge: free_irq(client->irq, lt9611uxc); From 7e33ba1175603b04802710ce6d9060eaa5039e76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 17 Mar 2025 22:01:09 -0300 Subject: [PATCH 2177/2924] drm/v3d: Associate a V3D tech revision to all supported devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 76dbd0973c555037931d2ed055a4a69e592caad4 ] The V3D driver currently determines the GPU tech version (33, 41...) by reading a register. This approach has worked so far since this information wasn’t needed before powering on the GPU. V3D 7.1 introduces new registers that must be written to power on the GPU, requiring us to know the V3D version beforehand. To address this, associate each supported SoC with the corresponding VideoCore GPU version as part of the device data. To prevent possible mistakes, add an assertion to verify that the version specified in the device data matches the one reported by the hardware. If there is a mismatch, the kernel will trigger a warning. With the goal of maintaining consistency around the driver, use `enum v3d_gen` to assign values to `v3d->ver` and for comparisons with other V3D generations. Note that all mentions of unsupported or non-existing V3D generations (such as V3D 4.0) were removed by this commit and replaced with supported generations without functional changes. Reviewed-by: Iago Toral Quiroga Reviewed-by: Stefan Wahren Signed-off-by: Maíra Canal Link: https://patchwork.freedesktop.org/patch/msgid/20250317-v3d-gpu-reset-fixes-v6-1-f3ee7717ed17@igalia.com Stable-dep-of: d0e4c6537005 ("drm/v3d: fix client obtained from axi_ids on V3D 4.1") Signed-off-by: Sasha Levin --- drivers/gpu/drm/v3d/v3d_debugfs.c | 126 +++++++++++++++--------------- drivers/gpu/drm/v3d/v3d_drv.c | 22 ++++-- drivers/gpu/drm/v3d/v3d_drv.h | 11 ++- drivers/gpu/drm/v3d/v3d_gem.c | 10 +-- drivers/gpu/drm/v3d/v3d_irq.c | 6 +- drivers/gpu/drm/v3d/v3d_perfmon.c | 4 +- drivers/gpu/drm/v3d/v3d_sched.c | 6 +- 7 files changed, 101 insertions(+), 84 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c index 76816f2551c100..7e789e181af0ac 100644 --- a/drivers/gpu/drm/v3d/v3d_debugfs.c +++ b/drivers/gpu/drm/v3d/v3d_debugfs.c @@ -21,74 +21,74 @@ struct v3d_reg_def { }; static const struct v3d_reg_def v3d_hub_reg_defs[] = { - REGDEF(33, 42, V3D_HUB_AXICFG), - REGDEF(33, 71, V3D_HUB_UIFCFG), - REGDEF(33, 71, V3D_HUB_IDENT0), - REGDEF(33, 71, V3D_HUB_IDENT1), - REGDEF(33, 71, V3D_HUB_IDENT2), - REGDEF(33, 71, V3D_HUB_IDENT3), - REGDEF(33, 71, V3D_HUB_INT_STS), - REGDEF(33, 71, V3D_HUB_INT_MSK_STS), - - REGDEF(33, 71, V3D_MMU_CTL), - REGDEF(33, 71, V3D_MMU_VIO_ADDR), - REGDEF(33, 71, V3D_MMU_VIO_ID), - REGDEF(33, 71, V3D_MMU_DEBUG_INFO), - - REGDEF(71, 71, V3D_GMP_STATUS(71)), - REGDEF(71, 71, V3D_GMP_CFG(71)), - REGDEF(71, 71, V3D_GMP_VIO_ADDR(71)), + REGDEF(V3D_GEN_33, V3D_GEN_42, V3D_HUB_AXICFG), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_HUB_UIFCFG), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_HUB_IDENT0), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_HUB_IDENT1), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_HUB_IDENT2), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_HUB_IDENT3), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_HUB_INT_STS), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_HUB_INT_MSK_STS), + + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_MMU_CTL), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_MMU_VIO_ADDR), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_MMU_VIO_ID), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_MMU_DEBUG_INFO), + + REGDEF(V3D_GEN_71, V3D_GEN_71, V3D_GMP_STATUS(71)), + REGDEF(V3D_GEN_71, V3D_GEN_71, V3D_GMP_CFG(71)), + REGDEF(V3D_GEN_71, V3D_GEN_71, V3D_GMP_VIO_ADDR(71)), }; static const struct v3d_reg_def v3d_gca_reg_defs[] = { - REGDEF(33, 33, V3D_GCA_SAFE_SHUTDOWN), - REGDEF(33, 33, V3D_GCA_SAFE_SHUTDOWN_ACK), + REGDEF(V3D_GEN_33, V3D_GEN_33, V3D_GCA_SAFE_SHUTDOWN), + REGDEF(V3D_GEN_33, V3D_GEN_33, V3D_GCA_SAFE_SHUTDOWN_ACK), }; static const struct v3d_reg_def v3d_core_reg_defs[] = { - REGDEF(33, 71, V3D_CTL_IDENT0), - REGDEF(33, 71, V3D_CTL_IDENT1), - REGDEF(33, 71, V3D_CTL_IDENT2), - REGDEF(33, 71, V3D_CTL_MISCCFG), - REGDEF(33, 71, V3D_CTL_INT_STS), - REGDEF(33, 71, V3D_CTL_INT_MSK_STS), - REGDEF(33, 71, V3D_CLE_CT0CS), - REGDEF(33, 71, V3D_CLE_CT0CA), - REGDEF(33, 71, V3D_CLE_CT0EA), - REGDEF(33, 71, V3D_CLE_CT1CS), - REGDEF(33, 71, V3D_CLE_CT1CA), - REGDEF(33, 71, V3D_CLE_CT1EA), - - REGDEF(33, 71, V3D_PTB_BPCA), - REGDEF(33, 71, V3D_PTB_BPCS), - - REGDEF(33, 42, V3D_GMP_STATUS(33)), - REGDEF(33, 42, V3D_GMP_CFG(33)), - REGDEF(33, 42, V3D_GMP_VIO_ADDR(33)), - - REGDEF(33, 71, V3D_ERR_FDBGO), - REGDEF(33, 71, V3D_ERR_FDBGB), - REGDEF(33, 71, V3D_ERR_FDBGS), - REGDEF(33, 71, V3D_ERR_STAT), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_CTL_IDENT0), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_CTL_IDENT1), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_CTL_IDENT2), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_CTL_MISCCFG), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_CTL_INT_STS), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_CTL_INT_MSK_STS), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_CLE_CT0CS), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_CLE_CT0CA), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_CLE_CT0EA), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_CLE_CT1CS), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_CLE_CT1CA), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_CLE_CT1EA), + + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_PTB_BPCA), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_PTB_BPCS), + + REGDEF(V3D_GEN_33, V3D_GEN_42, V3D_GMP_STATUS(33)), + REGDEF(V3D_GEN_33, V3D_GEN_42, V3D_GMP_CFG(33)), + REGDEF(V3D_GEN_33, V3D_GEN_42, V3D_GMP_VIO_ADDR(33)), + + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_ERR_FDBGO), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_ERR_FDBGB), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_ERR_FDBGS), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_ERR_STAT), }; static const struct v3d_reg_def v3d_csd_reg_defs[] = { - REGDEF(41, 71, V3D_CSD_STATUS), - REGDEF(41, 42, V3D_CSD_CURRENT_CFG0(41)), - REGDEF(41, 42, V3D_CSD_CURRENT_CFG1(41)), - REGDEF(41, 42, V3D_CSD_CURRENT_CFG2(41)), - REGDEF(41, 42, V3D_CSD_CURRENT_CFG3(41)), - REGDEF(41, 42, V3D_CSD_CURRENT_CFG4(41)), - REGDEF(41, 42, V3D_CSD_CURRENT_CFG5(41)), - REGDEF(41, 42, V3D_CSD_CURRENT_CFG6(41)), - REGDEF(71, 71, V3D_CSD_CURRENT_CFG0(71)), - REGDEF(71, 71, V3D_CSD_CURRENT_CFG1(71)), - REGDEF(71, 71, V3D_CSD_CURRENT_CFG2(71)), - REGDEF(71, 71, V3D_CSD_CURRENT_CFG3(71)), - REGDEF(71, 71, V3D_CSD_CURRENT_CFG4(71)), - REGDEF(71, 71, V3D_CSD_CURRENT_CFG5(71)), - REGDEF(71, 71, V3D_CSD_CURRENT_CFG6(71)), - REGDEF(71, 71, V3D_V7_CSD_CURRENT_CFG7), + REGDEF(V3D_GEN_41, V3D_GEN_71, V3D_CSD_STATUS), + REGDEF(V3D_GEN_41, V3D_GEN_42, V3D_CSD_CURRENT_CFG0(41)), + REGDEF(V3D_GEN_41, V3D_GEN_42, V3D_CSD_CURRENT_CFG1(41)), + REGDEF(V3D_GEN_41, V3D_GEN_42, V3D_CSD_CURRENT_CFG2(41)), + REGDEF(V3D_GEN_41, V3D_GEN_42, V3D_CSD_CURRENT_CFG3(41)), + REGDEF(V3D_GEN_41, V3D_GEN_42, V3D_CSD_CURRENT_CFG4(41)), + REGDEF(V3D_GEN_41, V3D_GEN_42, V3D_CSD_CURRENT_CFG5(41)), + REGDEF(V3D_GEN_41, V3D_GEN_42, V3D_CSD_CURRENT_CFG6(41)), + REGDEF(V3D_GEN_71, V3D_GEN_71, V3D_CSD_CURRENT_CFG0(71)), + REGDEF(V3D_GEN_71, V3D_GEN_71, V3D_CSD_CURRENT_CFG1(71)), + REGDEF(V3D_GEN_71, V3D_GEN_71, V3D_CSD_CURRENT_CFG2(71)), + REGDEF(V3D_GEN_71, V3D_GEN_71, V3D_CSD_CURRENT_CFG3(71)), + REGDEF(V3D_GEN_71, V3D_GEN_71, V3D_CSD_CURRENT_CFG4(71)), + REGDEF(V3D_GEN_71, V3D_GEN_71, V3D_CSD_CURRENT_CFG5(71)), + REGDEF(V3D_GEN_71, V3D_GEN_71, V3D_CSD_CURRENT_CFG6(71)), + REGDEF(V3D_GEN_71, V3D_GEN_71, V3D_V7_CSD_CURRENT_CFG7), }; static int v3d_v3d_debugfs_regs(struct seq_file *m, void *unused) @@ -164,7 +164,7 @@ static int v3d_v3d_debugfs_ident(struct seq_file *m, void *unused) str_yes_no(ident2 & V3D_HUB_IDENT2_WITH_MMU)); seq_printf(m, "TFU: %s\n", str_yes_no(ident1 & V3D_HUB_IDENT1_WITH_TFU)); - if (v3d->ver <= 42) { + if (v3d->ver <= V3D_GEN_42) { seq_printf(m, "TSY: %s\n", str_yes_no(ident1 & V3D_HUB_IDENT1_WITH_TSY)); } @@ -196,11 +196,11 @@ static int v3d_v3d_debugfs_ident(struct seq_file *m, void *unused) seq_printf(m, " QPUs: %d\n", nslc * qups); seq_printf(m, " Semaphores: %d\n", V3D_GET_FIELD(ident1, V3D_IDENT1_NSEM)); - if (v3d->ver <= 42) { + if (v3d->ver <= V3D_GEN_42) { seq_printf(m, " BCG int: %d\n", (ident2 & V3D_IDENT2_BCG_INT) != 0); } - if (v3d->ver < 40) { + if (v3d->ver < V3D_GEN_41) { seq_printf(m, " Override TMU: %d\n", (misccfg & V3D_MISCCFG_OVRTMUOUT) != 0); } @@ -234,7 +234,7 @@ static int v3d_measure_clock(struct seq_file *m, void *unused) int core = 0; int measure_ms = 1000; - if (v3d->ver >= 40) { + if (v3d->ver >= V3D_GEN_41) { int cycle_count_reg = V3D_PCTR_CYCLE_COUNT(v3d->ver); V3D_CORE_WRITE(core, V3D_V4_PCTR_0_SRC_0_3, V3D_SET_FIELD_VER(cycle_count_reg, diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index 852015214e971c..aa68be8fe86b71 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -92,7 +93,7 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void *data, args->value = 1; return 0; case DRM_V3D_PARAM_SUPPORTS_PERFMON: - args->value = (v3d->ver >= 40); + args->value = (v3d->ver >= V3D_GEN_41); return 0; case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT: args->value = 1; @@ -254,10 +255,10 @@ static const struct drm_driver v3d_drm_driver = { }; static const struct of_device_id v3d_of_match[] = { - { .compatible = "brcm,2711-v3d" }, - { .compatible = "brcm,2712-v3d" }, - { .compatible = "brcm,7268-v3d" }, - { .compatible = "brcm,7278-v3d" }, + { .compatible = "brcm,2711-v3d", .data = (void *)V3D_GEN_42 }, + { .compatible = "brcm,2712-v3d", .data = (void *)V3D_GEN_71 }, + { .compatible = "brcm,7268-v3d", .data = (void *)V3D_GEN_33 }, + { .compatible = "brcm,7278-v3d", .data = (void *)V3D_GEN_41 }, {}, }; MODULE_DEVICE_TABLE(of, v3d_of_match); @@ -274,6 +275,7 @@ static int v3d_platform_drm_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct drm_device *drm; struct v3d_dev *v3d; + enum v3d_gen gen; int ret; u32 mmu_debug; u32 ident1, ident3; @@ -287,6 +289,9 @@ static int v3d_platform_drm_probe(struct platform_device *pdev) platform_set_drvdata(pdev, drm); + gen = (uintptr_t)of_device_get_match_data(dev); + v3d->ver = gen; + ret = map_regs(v3d, &v3d->hub_regs, "hub"); if (ret) return ret; @@ -316,6 +321,11 @@ static int v3d_platform_drm_probe(struct platform_device *pdev) ident1 = V3D_READ(V3D_HUB_IDENT1); v3d->ver = (V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_TVER) * 10 + V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_REV)); + /* Make sure that the V3D tech version retrieved from the HW is equal + * to the one advertised by the device tree. + */ + WARN_ON(v3d->ver != gen); + v3d->cores = V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_NCORES); WARN_ON(v3d->cores > 1); /* multicore not yet implemented */ @@ -340,7 +350,7 @@ static int v3d_platform_drm_probe(struct platform_device *pdev) } } - if (v3d->ver < 41) { + if (v3d->ver < V3D_GEN_41) { ret = map_regs(v3d, &v3d->gca_regs, "gca"); if (ret) goto clk_disable; diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index 9deaefa0f95b71..de4a9e18f6a903 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -94,11 +94,18 @@ struct v3d_perfmon { u64 values[] __counted_by(ncounters); }; +enum v3d_gen { + V3D_GEN_33 = 33, + V3D_GEN_41 = 41, + V3D_GEN_42 = 42, + V3D_GEN_71 = 71, +}; + struct v3d_dev { struct drm_device drm; /* Short representation (e.g. 33, 41) of the V3D tech version */ - int ver; + enum v3d_gen ver; /* Short representation (e.g. 5, 6) of the V3D tech revision */ int rev; @@ -199,7 +206,7 @@ to_v3d_dev(struct drm_device *dev) static inline bool v3d_has_csd(struct v3d_dev *v3d) { - return v3d->ver >= 41; + return v3d->ver >= V3D_GEN_41; } #define v3d_to_pdev(v3d) to_platform_device((v3d)->drm.dev) diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index b1e681630ded09..1ea6d3832c2212 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -25,7 +25,7 @@ v3d_init_core(struct v3d_dev *v3d, int core) * type. If you want the default behavior, you can still put * "2" in the indirect texture state's output_type field. */ - if (v3d->ver < 40) + if (v3d->ver < V3D_GEN_41) V3D_CORE_WRITE(core, V3D_CTL_MISCCFG, V3D_MISCCFG_OVRTMUOUT); /* Whenever we flush the L2T cache, we always want to flush @@ -58,7 +58,7 @@ v3d_idle_axi(struct v3d_dev *v3d, int core) static void v3d_idle_gca(struct v3d_dev *v3d) { - if (v3d->ver >= 41) + if (v3d->ver >= V3D_GEN_41) return; V3D_GCA_WRITE(V3D_GCA_SAFE_SHUTDOWN, V3D_GCA_SAFE_SHUTDOWN_EN); @@ -132,13 +132,13 @@ v3d_reset(struct v3d_dev *v3d) static void v3d_flush_l3(struct v3d_dev *v3d) { - if (v3d->ver < 41) { + if (v3d->ver < V3D_GEN_41) { u32 gca_ctrl = V3D_GCA_READ(V3D_GCA_CACHE_CTRL); V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH); - if (v3d->ver < 33) { + if (v3d->ver < V3D_GEN_33) { V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH); } @@ -151,7 +151,7 @@ v3d_flush_l3(struct v3d_dev *v3d) static void v3d_invalidate_l2c(struct v3d_dev *v3d, int core) { - if (v3d->ver > 32) + if (v3d->ver >= V3D_GEN_33) return; V3D_CORE_WRITE(core, V3D_CTL_L2CACTL, diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c index 72b6a119412fa7..29f63f572d35b7 100644 --- a/drivers/gpu/drm/v3d/v3d_irq.c +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -143,7 +143,7 @@ v3d_irq(int irq, void *arg) /* We shouldn't be triggering these if we have GMP in * always-allowed mode. */ - if (v3d->ver < 71 && (intsts & V3D_INT_GMPV)) + if (v3d->ver < V3D_GEN_71 && (intsts & V3D_INT_GMPV)) dev_err(v3d->drm.dev, "GMP violation\n"); /* V3D 4.2 wires the hub and core IRQs together, so if we & @@ -200,7 +200,7 @@ v3d_hub_irq(int irq, void *arg) V3D_WRITE(V3D_MMU_CTL, V3D_READ(V3D_MMU_CTL)); - if (v3d->ver >= 41) { + if (v3d->ver >= V3D_GEN_41) { axi_id = axi_id >> 5; if (axi_id < ARRAY_SIZE(v3d41_axi_ids)) client = v3d41_axi_ids[axi_id]; @@ -217,7 +217,7 @@ v3d_hub_irq(int irq, void *arg) status = IRQ_HANDLED; } - if (v3d->ver >= 71 && (intsts & V3D_V7_HUB_INT_GMPV)) { + if (v3d->ver >= V3D_GEN_71 && (intsts & V3D_V7_HUB_INT_GMPV)) { dev_err(v3d->drm.dev, "GMP Violation\n"); status = IRQ_HANDLED; } diff --git a/drivers/gpu/drm/v3d/v3d_perfmon.c b/drivers/gpu/drm/v3d/v3d_perfmon.c index 3ebda2fa46fc47..9a3fe52558746e 100644 --- a/drivers/gpu/drm/v3d/v3d_perfmon.c +++ b/drivers/gpu/drm/v3d/v3d_perfmon.c @@ -200,10 +200,10 @@ void v3d_perfmon_init(struct v3d_dev *v3d) const struct v3d_perf_counter_desc *counters = NULL; unsigned int max = 0; - if (v3d->ver >= 71) { + if (v3d->ver >= V3D_GEN_71) { counters = v3d_v71_performance_counters; max = ARRAY_SIZE(v3d_v71_performance_counters); - } else if (v3d->ver >= 42) { + } else if (v3d->ver >= V3D_GEN_42) { counters = v3d_v42_performance_counters; max = ARRAY_SIZE(v3d_v42_performance_counters); } diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index eb35482f6fb577..35f131a46d0701 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -357,11 +357,11 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job) V3D_WRITE(V3D_TFU_ICA(v3d->ver), job->args.ica); V3D_WRITE(V3D_TFU_IUA(v3d->ver), job->args.iua); V3D_WRITE(V3D_TFU_IOA(v3d->ver), job->args.ioa); - if (v3d->ver >= 71) + if (v3d->ver >= V3D_GEN_71) V3D_WRITE(V3D_V7_TFU_IOC, job->args.v71.ioc); V3D_WRITE(V3D_TFU_IOS(v3d->ver), job->args.ios); V3D_WRITE(V3D_TFU_COEF0(v3d->ver), job->args.coef[0]); - if (v3d->ver >= 71 || (job->args.coef[0] & V3D_TFU_COEF0_USECOEF)) { + if (v3d->ver >= V3D_GEN_71 || (job->args.coef[0] & V3D_TFU_COEF0_USECOEF)) { V3D_WRITE(V3D_TFU_COEF1(v3d->ver), job->args.coef[1]); V3D_WRITE(V3D_TFU_COEF2(v3d->ver), job->args.coef[2]); V3D_WRITE(V3D_TFU_COEF3(v3d->ver), job->args.coef[3]); @@ -412,7 +412,7 @@ v3d_csd_job_run(struct drm_sched_job *sched_job) * * XXX: Set the CFG7 register */ - if (v3d->ver >= 71) + if (v3d->ver >= V3D_GEN_71) V3D_CORE_WRITE(0, V3D_V7_CSD_QUEUED_CFG7, 0); /* CFG0 write kicks off the job. */ From 67c6d96d2bb949fc47d2b81efc614aebe84f73d8 Mon Sep 17 00:00:00 2001 From: Jose Maria Casanova Crespo Date: Fri, 25 Apr 2025 14:25:07 +0200 Subject: [PATCH 2178/2924] drm/v3d: fix client obtained from axi_ids on V3D 4.1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d0e4c6537005dd106b101d4434a0c3dafd936d05 ] In the case of MMU errors caused by the TFU unit, the client that causes the MMU error is expected to be reported. But in the case of MMU TFU errors, a non existing client was being reported. This happened because the client calculation was taking into account more than the bits 0-7 from the axi_id that were representing the client. [ 27.845132] v3d fec00000.v3d: MMU error from client ? (13) at 0x3bb1000, pte invalid Masking the bits and using the correct axi_id ranges fixes the calculation to report the real guilty client on V3D 4.1 and 4.2. Make the MMU error print axi_id with hexadecimal as used in the ranges. Fixes: 38c2c7917adc ("drm/v3d: Fix and extend MMU error handling.") Signed-off-by: Jose Maria Casanova Crespo Reviewed-by: Maíra Canal Link: https://lore.kernel.org/r/20250425122522.18425-1-jmcasanova@igalia.com Signed-off-by: Maíra Canal Signed-off-by: Sasha Levin --- drivers/gpu/drm/v3d/v3d_irq.c | 37 +++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c index 29f63f572d35b7..d6ce1324905df9 100644 --- a/drivers/gpu/drm/v3d/v3d_irq.c +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -186,27 +186,38 @@ v3d_hub_irq(int irq, void *arg) u32 axi_id = V3D_READ(V3D_MMU_VIO_ID); u64 vio_addr = ((u64)V3D_READ(V3D_MMU_VIO_ADDR) << (v3d->va_width - 32)); - static const char *const v3d41_axi_ids[] = { - "L2T", - "PTB", - "PSE", - "TLB", - "CLE", - "TFU", - "MMU", - "GMP", + static const struct { + u32 begin; + u32 end; + const char *client; + } v3d41_axi_ids[] = { + {0x00, 0x20, "L2T"}, + {0x20, 0x21, "PTB"}, + {0x40, 0x41, "PSE"}, + {0x60, 0x80, "TLB"}, + {0x80, 0x88, "CLE"}, + {0xA0, 0xA1, "TFU"}, + {0xC0, 0xE0, "MMU"}, + {0xE0, 0xE1, "GMP"}, }; const char *client = "?"; V3D_WRITE(V3D_MMU_CTL, V3D_READ(V3D_MMU_CTL)); if (v3d->ver >= V3D_GEN_41) { - axi_id = axi_id >> 5; - if (axi_id < ARRAY_SIZE(v3d41_axi_ids)) - client = v3d41_axi_ids[axi_id]; + size_t i; + + axi_id = axi_id & 0xFF; + for (i = 0; i < ARRAY_SIZE(v3d41_axi_ids); i++) { + if (axi_id >= v3d41_axi_ids[i].begin && + axi_id < v3d41_axi_ids[i].end) { + client = v3d41_axi_ids[i].client; + break; + } + } } - dev_err(v3d->drm.dev, "MMU error from client %s (%d) at 0x%llx%s%s%s\n", + dev_err(v3d->drm.dev, "MMU error from client %s (0x%x) at 0x%llx%s%s%s\n", client, axi_id, (long long)vio_addr, ((intsts & V3D_HUB_INT_MMU_WRV) ? ", write violation" : ""), From 14fd6d6da3a41dbd9eb4c9fa2159e48370cbdb21 Mon Sep 17 00:00:00 2001 From: Jose Maria Casanova Crespo Date: Fri, 25 Apr 2025 14:25:08 +0200 Subject: [PATCH 2179/2924] drm/v3d: client ranges from axi_ids are different with V3D 7.1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a22e0051f9eb2281b181218d97f77cebc299310d ] The client mask has been reduced from 8 bits on V3D 4.1 to 7 bits on V3D 7.1, so the ranges for each client are not compatible. On V3D 7.1, the CSD client can also report MMU errors. Therefore, add its AXI ID to the IDs list. Fixes: 0ad5bc1ce463 ("drm/v3d: fix up register addresses for V3D 7.x") Signed-off-by: Jose Maria Casanova Crespo Reviewed-by: Maíra Canal Link: https://lore.kernel.org/r/20250425122522.18425-2-jmcasanova@igalia.com Signed-off-by: Maíra Canal Signed-off-by: Sasha Levin --- drivers/gpu/drm/v3d/v3d_irq.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c index d6ce1324905df9..2cca5d3a26a22c 100644 --- a/drivers/gpu/drm/v3d/v3d_irq.c +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -199,12 +199,33 @@ v3d_hub_irq(int irq, void *arg) {0xA0, 0xA1, "TFU"}, {0xC0, 0xE0, "MMU"}, {0xE0, 0xE1, "GMP"}, + }, v3d71_axi_ids[] = { + {0x00, 0x30, "L2T"}, + {0x30, 0x38, "CLE"}, + {0x38, 0x39, "PTB"}, + {0x39, 0x3A, "PSE"}, + {0x3A, 0x3B, "CSD"}, + {0x40, 0x60, "TLB"}, + {0x60, 0x70, "MMU"}, + {0x7C, 0x7E, "TFU"}, + {0x7F, 0x80, "GMP"}, }; const char *client = "?"; V3D_WRITE(V3D_MMU_CTL, V3D_READ(V3D_MMU_CTL)); - if (v3d->ver >= V3D_GEN_41) { + if (v3d->ver >= V3D_GEN_71) { + size_t i; + + axi_id = axi_id & 0x7F; + for (i = 0; i < ARRAY_SIZE(v3d71_axi_ids); i++) { + if (axi_id >= v3d71_axi_ids[i].begin && + axi_id < v3d71_axi_ids[i].end) { + client = v3d71_axi_ids[i].client; + break; + } + } + } else if (v3d->ver >= V3D_GEN_41) { size_t i; axi_id = axi_id & 0xFF; From 4ecd0cde89feee26525ccdf1af0c1ae156ca010b Mon Sep 17 00:00:00 2001 From: Andrey Vatoropin Date: Tue, 18 Mar 2025 13:42:18 +0000 Subject: [PATCH 2180/2924] fs/ntfs3: handle hdr_first_de() return value [ Upstream commit af5cab0e5b6f8edb0be51a9f47f3f620e0b4fd70 ] The hdr_first_de() function returns a pointer to a struct NTFS_DE. This pointer may be NULL. To handle the NULL error effectively, it is important to implement an error handler. This will help manage potential errors consistently. Additionally, error handling for the return value already exists at other points where this function is called. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 82cae269cfa9 ("fs/ntfs3: Add initialization of super block") Signed-off-by: Andrey Vatoropin Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/index.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index 78d20e4baa2c9a..1bf2a6593dec66 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -2182,6 +2182,10 @@ static int indx_get_entry_to_replace(struct ntfs_index *indx, e = hdr_first_de(&n->index->ihdr); fnd_push(fnd, n, e); + if (!e) { + err = -EINVAL; + goto out; + } if (!de_is_last(e)) { /* @@ -2203,6 +2207,10 @@ static int indx_get_entry_to_replace(struct ntfs_index *indx, n = fnd->nodes[level]; te = hdr_first_de(&n->index->ihdr); + if (!te) { + err = -EINVAL; + goto out; + } /* Copy the candidate entry into the replacement entry buffer. */ re = kmalloc(le16_to_cpu(te->size) + sizeof(u64), GFP_NOFS); if (!re) { From 3fd74b07e8a7557544b24f93cc4f67f8c9169e94 Mon Sep 17 00:00:00 2001 From: Lizhi Xu Date: Tue, 15 Apr 2025 17:26:37 +0800 Subject: [PATCH 2181/2924] fs/ntfs3: Add missing direct_IO in ntfs_aops_cmpr [ Upstream commit 8b26c8c376b29cf29710fbfd093df194cefe26ad ] The ntfs3 can use the page cache directly, so its address_space_operations need direct_IO. Exit ntfs_direct_IO() if it is a compressed file. Fixes: b432163ebd15 ("fs/ntfs3: Update inode->i_mapping->a_ops on compression state") Reported-by: syzbot+e36cc3297bd3afd25e19@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=e36cc3297bd3afd25e19 Signed-off-by: Lizhi Xu Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/inode.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index 3e2957a1e3605c..0f0d27d4644a9b 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -805,6 +805,10 @@ static ssize_t ntfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) ret = 0; goto out; } + if (is_compressed(ni)) { + ret = 0; + goto out; + } ret = blockdev_direct_IO(iocb, inode, iter, wr ? ntfs_get_block_direct_IO_W : @@ -2068,5 +2072,6 @@ const struct address_space_operations ntfs_aops_cmpr = { .read_folio = ntfs_read_folio, .readahead = ntfs_readahead, .dirty_folio = block_dirty_folio, + .direct_IO = ntfs_direct_IO, }; // clang-format on From b7c88cf9c2c26ce6745a4f5badfff7e4fb4375f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 16 Apr 2025 14:44:19 +0200 Subject: [PATCH 2182/2924] kunit/usercopy: Disable u64 test on 32-bit SPARC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0d6efa20e384a41a7f4afdcd8a0aec442c19d33e ] usercopy of 64 bit values does not work on 32-bit SPARC: # usercopy_test_valid: EXPECTATION FAILED at lib/tests/usercopy_kunit.c:209 Expected val_u64 == 0x5a5b5c5d6a6b6c6d, but val_u64 == 1515936861 (0x5a5b5c5d) 0x5a5b5c5d6a6b6c6d == 6510899242581322861 (0x5a5b5c5d6a6b6c6d) Disable the test. Fixes: 4c5d7bc63775 ("usercopy: Add tests for all get_user() sizes") Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20250416-kunit-sparc-usercopy-v1-1-a772054db3af@linutronix.de Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- lib/tests/usercopy_kunit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/tests/usercopy_kunit.c b/lib/tests/usercopy_kunit.c index 77fa00a13df775..80f8abe10968c1 100644 --- a/lib/tests/usercopy_kunit.c +++ b/lib/tests/usercopy_kunit.c @@ -27,6 +27,7 @@ !defined(CONFIG_MICROBLAZE) && \ !defined(CONFIG_NIOS2) && \ !defined(CONFIG_PPC32) && \ + !defined(CONFIG_SPARC32) && \ !defined(CONFIG_SUPERH)) # define TEST_U64 #endif From 599facc46884ac876f8cd65aababd669f45ac876 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 15 Apr 2025 15:52:49 -0700 Subject: [PATCH 2183/2924] watchdog: exar: Shorten identity name to fit correctly [ Upstream commit 8e28276a569addb8a2324439ae473848ee52b056 ] The static initializer for struct watchdog_info::identity is too long and gets initialized without a trailing NUL byte. Since the length of "identity" is part of UAPI and tied to ioctls, just shorten the name of the device. Avoids the warning seen with GCC 15's -Wunterminated-string-initialization option: drivers/watchdog/exar_wdt.c:224:27: warning: initializer-string for array of 'unsigned char' truncates NUL terminator but destination lacks 'nonstring' attribute (33 chars into 32 available) [-Wunterminated-string-initialization] 224 | .identity = "Exar/MaxLinear XR28V38x Watchdog", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fixes: 81126222bd3a ("watchdog: Exar/MaxLinear XR28V38x driver") Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20250415225246.work.458-kees@kernel.org Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- drivers/watchdog/exar_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/exar_wdt.c b/drivers/watchdog/exar_wdt.c index 7c61ff34327116..c2e3bb08df899a 100644 --- a/drivers/watchdog/exar_wdt.c +++ b/drivers/watchdog/exar_wdt.c @@ -221,7 +221,7 @@ static const struct watchdog_info exar_wdt_info = { .options = WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT | WDIOF_MAGICCLOSE, - .identity = "Exar/MaxLinear XR28V38x Watchdog", + .identity = "Exar XR28V38x Watchdog", }; static const struct watchdog_ops exar_wdt_ops = { From de3ef06c553902d27d2916d86553ef7d551e46b0 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 24 Apr 2025 10:07:26 +1000 Subject: [PATCH 2184/2924] m68k: mac: Fix macintosh_config for Mac II [ Upstream commit 52ae3f5da7e5adbe3d1319573b55dac470abb83c ] When booted on my Mac II, the kernel prints this: Detected Macintosh model: 6 Apple Macintosh Unknown The catch-all entry ("Unknown") is mac_data_table[0] which is only needed in the unlikely event that the bootinfo model ID can't be matched. When model ID is 6, the search should begin and end at mac_data_table[1]. Fix the off-by-one error that causes this problem. Cc: Joshua Thompson Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Finn Thain Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/d0f30a551064ca4810b1c48d5a90954be80634a9.1745453246.git.fthain@linux-m68k.org Signed-off-by: Geert Uytterhoeven Signed-off-by: Sasha Levin --- arch/m68k/mac/config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index e324410ef239c0..d26c7f4f8c360a 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -793,7 +793,7 @@ static void __init mac_identify(void) } macintosh_config = mac_data_table; - for (m = macintosh_config; m->ident != -1; m++) { + for (m = &mac_data_table[1]; m->ident != -1; m++) { if (m->ident == model) { macintosh_config = m; break; From 205f4f44a7cc305f83a60da961bc2034b0fca980 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Tue, 18 Mar 2025 23:17:12 +0800 Subject: [PATCH 2185/2924] firmware: psci: Fix refcount leak in psci_dt_init [ Upstream commit 7ff37d29fd5c27617b9767e1b8946d115cf93a1e ] Fix a reference counter leak in psci_dt_init() where of_node_put(np) was missing after of_find_matching_node_and_match() when np is unavailable. Fixes: d09a0011ec0d ("drivers: psci: Allow PSCI node to be disabled") Signed-off-by: Miaoqian Lin Reviewed-by: Gavin Shan Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20250318151712.28763-1-linmq006@gmail.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/firmware/psci/psci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index a1ebbe9b73b136..38ca190d4a22d6 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -804,8 +804,10 @@ int __init psci_dt_init(void) np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np); - if (!np || !of_device_is_available(np)) + if (!np || !of_device_is_available(np)) { + of_node_put(np); return -ENODEV; + } init_fn = (psci_initcall_t)matched_np->data; ret = init_fn(np); From b3b9e722582981039124bfc3251d12d724619e0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kornel=20Dul=C4=99ba?= Date: Thu, 17 Apr 2025 11:47:54 +0000 Subject: [PATCH 2186/2924] arm64: Support ARM64_VA_BITS=52 when setting ARCH_MMAP_RND_BITS_MAX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f101c56447717c595d803894ba0e215f56c6fba4 ] When the 52-bit virtual addressing was introduced the select like ARCH_MMAP_RND_BITS_MAX logic was never updated to account for it. Because of that the rnd max bits knob is set to the default value of 18 when ARM64_VA_BITS=52. Fix this by setting ARCH_MMAP_RND_BITS_MAX to the same value that would be used if 48-bit addressing was used. Higher values can't used here because 52-bit addressing is used only if the caller provides a hint to mmap, with a fallback to 48-bit. The knob in question is an upper bound for what the user can set in /proc/sys/vm/mmap_rnd_bits, which in turn is used to determine how many random bits can be inserted into the base address used for mmap allocations. Since 48-bit allocations are legal with ARM64_VA_BITS=52, we need to make sure that the base address is small enough to facilitate this. Fixes: b6d00d47e81a ("arm64: mm: Introduce 52-bit Kernel VAs") Signed-off-by: Kornel Dulęba Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20250417114754.3238273-1-korneld@google.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a182295e6f08bf..6527d0d5656a13 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -333,9 +333,9 @@ config ARCH_MMAP_RND_BITS_MAX default 24 if ARM64_VA_BITS=39 default 27 if ARM64_VA_BITS=42 default 30 if ARM64_VA_BITS=47 - default 29 if ARM64_VA_BITS=48 && ARM64_64K_PAGES - default 31 if ARM64_VA_BITS=48 && ARM64_16K_PAGES - default 33 if ARM64_VA_BITS=48 + default 29 if (ARM64_VA_BITS=48 || ARM64_VA_BITS=52) && ARM64_64K_PAGES + default 31 if (ARM64_VA_BITS=48 || ARM64_VA_BITS=52) && ARM64_16K_PAGES + default 33 if (ARM64_VA_BITS=48 || ARM64_VA_BITS=52) default 14 if ARM64_64K_PAGES default 16 if ARM64_16K_PAGES default 18 From e3d0c562573ad87fec405fd3ad187cea703bc2bd Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 30 Apr 2025 18:32:40 +0100 Subject: [PATCH 2187/2924] arm64/fpsimd: Avoid warning when sve_to_fpsimd() is unused [ Upstream commit f699c66691fb7e08a5a631c5baf5f2a19b7a6468 ] Historically fpsimd_to_sve() and sve_to_fpsimd() were (conditionally) called by functions which were defined regardless of CONFIG_ARM64_SVE. Hence it was necessary that both fpsimd_to_sve() and sve_to_fpsimd() were always defined and not guarded by ifdeffery. As a result of the removal of fpsimd_signal_preserve_current_state() in commit: 929fa99b1215966f ("arm64/fpsimd: signal: Always save+flush state early") ... sve_to_fpsimd() has no callers when CONFIG_ARM64_SVE=n, resulting in a build-time warnign that it is unused: | arch/arm64/kernel/fpsimd.c:676:13: warning: unused function 'sve_to_fpsimd' [-Wunused-function] | 676 | static void sve_to_fpsimd(struct task_struct *task) | | ^~~~~~~~~~~~~ | 1 warning generated. In contrast, fpsimd_to_sve() still has callers which are defined when CONFIG_ARM64_SVE=n, and it would be awkward to hide this behind ifdeffery and/or to use stub functions. For now, suppress the warning by marking both fpsimd_to_sve() and sve_to_fpsimd() as 'static inline', as we usually do for stub functions. The compiler will no longer warn if either function is unused. Aside from suppressing the warning, there should be no functional change as a result of this patch. Link: https://lore.kernel.org/linux-arm-kernel/20250429194600.GA26883@willie-the-truck/ Reported-by: Will Deacon Fixes: 929fa99b1215 ("arm64/fpsimd: signal: Always save+flush state early") Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20250430173240.4023627-1-mark.rutland@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/kernel/fpsimd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 1edf797d2c7103..0e649d0e59b06e 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -651,7 +651,7 @@ static void __fpsimd_to_sve(void *sst, struct user_fpsimd_state const *fst, * task->thread.uw.fpsimd_state must be up to date before calling this * function. */ -static void fpsimd_to_sve(struct task_struct *task) +static inline void fpsimd_to_sve(struct task_struct *task) { unsigned int vq; void *sst = task->thread.sve_state; @@ -675,7 +675,7 @@ static void fpsimd_to_sve(struct task_struct *task) * bytes of allocated kernel memory. * task->thread.sve_state must be up to date before calling this function. */ -static void sve_to_fpsimd(struct task_struct *task) +static inline void sve_to_fpsimd(struct task_struct *task) { unsigned int vq, vl; void const *sst = task->thread.sve_state; From 03b71e45846eb864cec8cc278399f23c8d834ee9 Mon Sep 17 00:00:00 2001 From: Neill Kapron Date: Sun, 27 Apr 2025 09:40:58 +0000 Subject: [PATCH 2188/2924] selftests/seccomp: fix syscall_restart test for arm compat [ Upstream commit 797002deed03491215a352ace891749b39741b69 ] The inconsistencies in the systcall ABI between arm and arm-compat can can cause a failure in the syscall_restart test due to the logic attempting to work around the differences. The 'machine' field for an ARM64 device running in compat mode can report 'armv8l' or 'armv8b' which matches with the string 'arm' when only examining the first three characters of the string. This change adds additional validation to the workaround logic to make sure we only take the arm path when running natively, not in arm-compat. Fixes: 256d0afb11d6 ("selftests/seccomp: build and pass on arm64") Signed-off-by: Neill Kapron Link: https://lore.kernel.org/r/20250427094103.3488304-2-nkapron@google.com Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- tools/testing/selftests/seccomp/seccomp_bpf.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index b2f76a52215ad2..53bf6a9c801f8f 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -3166,12 +3166,15 @@ TEST(syscall_restart) ret = get_syscall(_metadata, child_pid); #if defined(__arm__) /* - * FIXME: * - native ARM registers do NOT expose true syscall. * - compat ARM registers on ARM64 DO expose true syscall. + * - values of utsbuf.machine include 'armv8l' or 'armb8b' + * for ARM64 running in compat mode. */ ASSERT_EQ(0, uname(&utsbuf)); - if (strncmp(utsbuf.machine, "arm", 3) == 0) { + if ((strncmp(utsbuf.machine, "arm", 3) == 0) && + (strncmp(utsbuf.machine, "armv8l", 6) != 0) && + (strncmp(utsbuf.machine, "armv8b", 6) != 0)) { EXPECT_EQ(__NR_nanosleep, ret); } else #endif From a30456c4cb5a5c3281938aff98947b2c759a553e Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 25 Apr 2025 22:49:08 +0300 Subject: [PATCH 2189/2924] drm/msm/dpu: enable SmartDMA on SM8150 [ Upstream commit 6a2343de0b6f70a21bf503ac4688dc905cb068e1 ] Reworking of the catalog dropped the SmartDMA feature bit on the SM8150 platform. Renable SmartDMA support on this SoC. Fixes: 460c410f02e4 ("drm/msm/dpu: duplicate sdm845 catalog entries") Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/650418/ Link: https://lore.kernel.org/r/20250425-dpu-rework-vig-masks-v2-1-c71900687d08@oss.qualcomm.com Signed-off-by: Dmitry Baryshkov Signed-off-by: Sasha Levin --- .../drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h index 979527d98fbcb1..8e23dbfeef3543 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h @@ -76,7 +76,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f0, - .features = VIG_SDM845_MASK, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 0, .type = SSPP_TYPE_VIG, @@ -84,7 +84,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { }, { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x1f0, - .features = VIG_SDM845_MASK, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 4, .type = SSPP_TYPE_VIG, @@ -92,7 +92,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { }, { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x1f0, - .features = VIG_SDM845_MASK, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 8, .type = SSPP_TYPE_VIG, @@ -100,7 +100,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { }, { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x1f0, - .features = VIG_SDM845_MASK, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 12, .type = SSPP_TYPE_VIG, @@ -108,7 +108,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { }, { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f0, - .features = DMA_SDM845_MASK, + .features = DMA_SDM845_MASK_SDMA, .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, @@ -116,7 +116,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { }, { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x1f0, - .features = DMA_SDM845_MASK, + .features = DMA_SDM845_MASK_SDMA, .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, @@ -124,7 +124,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { }, { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x1f0, - .features = DMA_CURSOR_SDM845_MASK, + .features = DMA_CURSOR_SDM845_MASK_SDMA, .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, @@ -132,7 +132,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { }, { .name = "sspp_11", .id = SSPP_DMA3, .base = 0x2a000, .len = 0x1f0, - .features = DMA_CURSOR_SDM845_MASK, + .features = DMA_CURSOR_SDM845_MASK_SDMA, .sblk = &dpu_dma_sblk, .xin_id = 13, .type = SSPP_TYPE_DMA, From df32f4765b20ed9d33843fae8d2814c809acd282 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 25 Apr 2025 22:49:09 +0300 Subject: [PATCH 2190/2924] drm/msm/dpu: enable SmartDMA on SC8180X [ Upstream commit 8dcccd7a156ffb3157de7f527cc7c6100e9a455a ] Reworking of the catalog dropped the SmartDMA feature bit on the SC8180X platform. Renable SmartDMA support on this SoC. Fixes: 460c410f02e4 ("drm/msm/dpu: duplicate sdm845 catalog entries") Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/650421/ Link: https://lore.kernel.org/r/20250425-dpu-rework-vig-masks-v2-2-c71900687d08@oss.qualcomm.com Signed-off-by: Dmitry Baryshkov Signed-off-by: Sasha Levin --- .../drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h index d76b8992a6c18c..e736eb73a7e615 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h @@ -75,7 +75,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f0, - .features = VIG_SDM845_MASK, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 0, .type = SSPP_TYPE_VIG, @@ -83,7 +83,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { }, { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x1f0, - .features = VIG_SDM845_MASK, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 4, .type = SSPP_TYPE_VIG, @@ -91,7 +91,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { }, { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x1f0, - .features = VIG_SDM845_MASK, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 8, .type = SSPP_TYPE_VIG, @@ -99,7 +99,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { }, { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x1f0, - .features = VIG_SDM845_MASK, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 12, .type = SSPP_TYPE_VIG, @@ -107,7 +107,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { }, { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f0, - .features = DMA_SDM845_MASK, + .features = DMA_SDM845_MASK_SDMA, .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, @@ -115,7 +115,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { }, { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x1f0, - .features = DMA_SDM845_MASK, + .features = DMA_SDM845_MASK_SDMA, .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, @@ -123,7 +123,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { }, { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x1f0, - .features = DMA_CURSOR_SDM845_MASK, + .features = DMA_CURSOR_SDM845_MASK_SDMA, .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, @@ -131,7 +131,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { }, { .name = "sspp_11", .id = SSPP_DMA3, .base = 0x2a000, .len = 0x1f0, - .features = DMA_CURSOR_SDM845_MASK, + .features = DMA_CURSOR_SDM845_MASK_SDMA, .sblk = &dpu_dma_sblk, .xin_id = 13, .type = SSPP_TYPE_DMA, From 09fddeed6d173125cbba954292bc547aee2afa65 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 1 Mar 2025 11:24:54 +0200 Subject: [PATCH 2191/2924] drm/msm/dpu: remove DSC feature bit for PINGPONG on MSM8937 [ Upstream commit b43c524134e0b0ae38acecc4e1dc585940ff6f88 ] The MSM8937 platform doesn't have DSC blocks nor does have it DSC registers in the PINGPONG block. Drop the DPU_PINGPONG_DSC feature bit from the PINGPONG's feature mask and, as it is the only remaining bit, drop the .features assignment completely. Fixes: c079680bb0fa ("drm/msm/dpu: Add support for MSM8937") Reported-by: Abhinav Kumar Reviewed-by: Konrad Dybcio Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/640299/ Link: https://lore.kernel.org/r/20250301-dpu-fix-catalog-v2-1-498271be8b50@linaro.org Signed-off-by: Dmitry Baryshkov Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_14_msm8937.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_14_msm8937.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_14_msm8937.h index ad60089f18ea6c..39027a21c6feec 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_14_msm8937.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_14_msm8937.h @@ -100,14 +100,12 @@ static const struct dpu_pingpong_cfg msm8937_pp[] = { { .name = "pingpong_0", .id = PINGPONG_0, .base = 0x70000, .len = 0xd4, - .features = PINGPONG_MSM8996_MASK, .sblk = &msm8996_pp_sblk, .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 8), .intr_rdptr = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 12), }, { .name = "pingpong_1", .id = PINGPONG_1, .base = 0x70800, .len = 0xd4, - .features = PINGPONG_MSM8996_MASK, .sblk = &msm8996_pp_sblk, .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 9), .intr_rdptr = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 13), From 3261bf1486729f788b4d3f50492f8399e6f48d30 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 1 Mar 2025 11:24:55 +0200 Subject: [PATCH 2192/2924] drm/msm/dpu: remove DSC feature bit for PINGPONG on MSM8917 [ Upstream commit 5be98120115c46907921e29344291628cf79912a ] The MSM8917 platform doesn't have DSC blocks nor does have it DSC registers in the PINGPONG block. Drop the DPU_PINGPONG_DSC feature bit from the PINGPONG's feature mask and, as it is the only remaining bit, drop the .features assignment completely. Fixes: 62af6e1cb596 ("drm/msm/dpu: Add support for MSM8917") Reported-by: Abhinav Kumar Reviewed-by: Konrad Dybcio Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/640301/ Link: https://lore.kernel.org/r/20250301-dpu-fix-catalog-v2-2-498271be8b50@linaro.org Signed-off-by: Dmitry Baryshkov Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_15_msm8917.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_15_msm8917.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_15_msm8917.h index a1cf89a0a42d5f..8d1b43ea1663cf 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_15_msm8917.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_15_msm8917.h @@ -93,7 +93,6 @@ static const struct dpu_pingpong_cfg msm8917_pp[] = { { .name = "pingpong_0", .id = PINGPONG_0, .base = 0x70000, .len = 0xd4, - .features = PINGPONG_MSM8996_MASK, .sblk = &msm8996_pp_sblk, .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 8), .intr_rdptr = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 12), From 96895c839dacacc47543dd19b0fa5ab5f09e179f Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 1 Mar 2025 11:24:56 +0200 Subject: [PATCH 2193/2924] drm/msm/dpu: remove DSC feature bit for PINGPONG on MSM8953 [ Upstream commit 5232a29ebc74df4abf790147a06db451d824cd92 ] The MSM8953 platform doesn't have DSC blocks nor does have it DSC registers in the PINGPONG block. Drop the DPU_PINGPONG_DSC feature bit from the PINGPONG's feature mask and, as it is the only remaining bit, drop the .features assignment completely. Fixes: 7a6109ce1c2c ("drm/msm/dpu: Add support for MSM8953") Reported-by: Abhinav Kumar Reviewed-by: Konrad Dybcio Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/640303/ Link: https://lore.kernel.org/r/20250301-dpu-fix-catalog-v2-3-498271be8b50@linaro.org Signed-off-by: Dmitry Baryshkov Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_16_msm8953.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_16_msm8953.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_16_msm8953.h index eea9b80e2287a8..16c12499b24bb4 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_16_msm8953.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_16_msm8953.h @@ -100,14 +100,12 @@ static const struct dpu_pingpong_cfg msm8953_pp[] = { { .name = "pingpong_0", .id = PINGPONG_0, .base = 0x70000, .len = 0xd4, - .features = PINGPONG_MSM8996_MASK, .sblk = &msm8996_pp_sblk, .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 8), .intr_rdptr = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 12), }, { .name = "pingpong_1", .id = PINGPONG_1, .base = 0x70800, .len = 0xd4, - .features = PINGPONG_MSM8996_MASK, .sblk = &msm8996_pp_sblk, .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 9), .intr_rdptr = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 13), From bdfcaa2de6ebb94ef04a0db6c630f929546a1ca7 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Thu, 16 Nov 2023 12:24:24 +0000 Subject: [PATCH 2194/2924] drm: rcar-du: Fix memory leak in rcar_du_vsps_init() [ Upstream commit 91e3bf09a90bb4340c0c3c51396e7531555efda4 ] The rcar_du_vsps_init() doesn't free the np allocated by of_parse_phandle_with_fixed_args() for the non-error case. Fix memory leak for the non-error case. While at it, replace the label 'error'->'done' as it applies to non-error case as well and update the error check condition for rcar_du_vsp_init() to avoid breakage in future, if it returns positive value. Fixes: 3e81374e2014 ("drm: rcar-du: Support multiple sources from the same VSP") Signed-off-by: Biju Das Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/20231116122424.80136-1-biju.das.jz@bp.renesas.com Signed-off-by: Tomi Valkeinen Signed-off-by: Sasha Levin --- drivers/gpu/drm/renesas/rcar-du/rcar_du_kms.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/renesas/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/renesas/rcar-du/rcar_du_kms.c index 70d8ad065bfa1d..4c8fe83dd6101b 100644 --- a/drivers/gpu/drm/renesas/rcar-du/rcar_du_kms.c +++ b/drivers/gpu/drm/renesas/rcar-du/rcar_du_kms.c @@ -705,7 +705,7 @@ static int rcar_du_vsps_init(struct rcar_du_device *rcdu) ret = of_parse_phandle_with_fixed_args(np, vsps_prop_name, cells, i, &args); if (ret < 0) - goto error; + goto done; /* * Add the VSP to the list or update the corresponding existing @@ -743,13 +743,11 @@ static int rcar_du_vsps_init(struct rcar_du_device *rcdu) vsp->dev = rcdu; ret = rcar_du_vsp_init(vsp, vsps[i].np, vsps[i].crtcs_mask); - if (ret < 0) - goto error; + if (ret) + goto done; } - return 0; - -error: +done: for (i = 0; i < ARRAY_SIZE(vsps); ++i) of_node_put(vsps[i].np); From 89eb6c953f116a62d21780e07985c9ebe055947c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 25 Apr 2025 23:14:32 -0700 Subject: [PATCH 2195/2924] drm/vkms: Adjust vkms_state->active_planes allocation type [ Upstream commit 258aebf100540d36aba910f545d4d5ddf4ecaf0b ] In preparation for making the kmalloc family of allocators type aware, we need to make sure that the returned type from the allocation matches the type of the variable being assigned. (Before, the allocator would always return "void *", which can be implicitly cast to any pointer type.) The assigned type is "struct vkms_plane_state **", but the returned type will be "struct drm_plane **". These are the same size (pointer size), but the types don't match. Adjust the allocation type to match the assignment. Signed-off-by: Kees Cook Reviewed-by: Louis Chauvet Fixes: 8b1865873651 ("drm/vkms: totally reworked crc data tracking") Link: https://lore.kernel.org/r/20250426061431.work.304-kees@kernel.org Signed-off-by: Louis Chauvet Signed-off-by: Sasha Levin --- drivers/gpu/drm/vkms/vkms_crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vkms/vkms_crtc.c b/drivers/gpu/drm/vkms/vkms_crtc.c index 12034ec1202990..8c9898b9055d4c 100644 --- a/drivers/gpu/drm/vkms/vkms_crtc.c +++ b/drivers/gpu/drm/vkms/vkms_crtc.c @@ -194,7 +194,7 @@ static int vkms_crtc_atomic_check(struct drm_crtc *crtc, i++; } - vkms_state->active_planes = kcalloc(i, sizeof(plane), GFP_KERNEL); + vkms_state->active_planes = kcalloc(i, sizeof(*vkms_state->active_planes), GFP_KERNEL); if (!vkms_state->active_planes) return -ENOMEM; vkms_state->num_active_planes = i; From d0e3476889c6a800c0d4504b16b795cd0508527a Mon Sep 17 00:00:00 2001 From: Biju Das Date: Wed, 5 Feb 2025 11:21:35 +0000 Subject: [PATCH 2196/2924] drm/tegra: rgb: Fix the unbound reference count [ Upstream commit 3c3642335065c3bde0742b0edc505b6ea8fdc2b3 ] The of_get_child_by_name() increments the refcount in tegra_dc_rgb_probe, but the driver does not decrement the refcount during unbind. Fix the unbound reference count using devm_add_action_or_reset() helper. Fixes: d8f4a9eda006 ("drm: Add NVIDIA Tegra20 support") Signed-off-by: Biju Das Signed-off-by: Thierry Reding Link: https://lore.kernel.org/r/20250205112137.36055-1-biju.das.jz@bp.renesas.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/tegra/rgb.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tegra/rgb.c b/drivers/gpu/drm/tegra/rgb.c index 1e8ec50b759e46..ff5a749710db3a 100644 --- a/drivers/gpu/drm/tegra/rgb.c +++ b/drivers/gpu/drm/tegra/rgb.c @@ -200,6 +200,11 @@ static const struct drm_encoder_helper_funcs tegra_rgb_encoder_helper_funcs = { .atomic_check = tegra_rgb_encoder_atomic_check, }; +static void tegra_dc_of_node_put(void *data) +{ + of_node_put(data); +} + int tegra_dc_rgb_probe(struct tegra_dc *dc) { struct device_node *np; @@ -207,7 +212,14 @@ int tegra_dc_rgb_probe(struct tegra_dc *dc) int err; np = of_get_child_by_name(dc->dev->of_node, "rgb"); - if (!np || !of_device_is_available(np)) + if (!np) + return -ENODEV; + + err = devm_add_action_or_reset(dc->dev, tegra_dc_of_node_put, np); + if (err < 0) + return err; + + if (!of_device_is_available(np)) return -ENODEV; rgb = devm_kzalloc(dc->dev, sizeof(*rgb), GFP_KERNEL); From d2323763148448510c32c1e43eb3843b4215ba43 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Mon, 28 Apr 2025 16:26:20 -0400 Subject: [PATCH 2197/2924] drm/amd/display: Don't check for NULL divisor in fixpt code [ Upstream commit d01ca8708d95a561f6462a15cad94a2c0bec7042 ] [Why] We check for a NULL divisor but don't act on it. This check does nothing other than throw a warning. It does confuse static checkers though: See https://lkml.org/lkml/2025/4/26/371 [How] Drop the ASSERTs in both DC and SPL variants. Fixes: 4562236b3bc0 ("drm/amd/dc: Add dc display driver (v2)") Fixes: 6efc0ab3b05d ("drm/amd/display: add back quality EASF and ISHARP and dc dependency changes") Signed-off-by: Harry Wentland Cc: Linus Torvalds Cc: Leo Li Cc: Alex Deucher Acked-by: Alex Deucher Reviewed-by: Alex Hung Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c | 5 ----- drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.c | 4 ---- 2 files changed, 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c index 88d3f9d7dd556a..452206b5095eb0 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c +++ b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c @@ -51,8 +51,6 @@ static inline unsigned long long complete_integer_division_u64( { unsigned long long result; - ASSERT(divisor); - result = div64_u64_rem(dividend, divisor, remainder); return result; @@ -213,9 +211,6 @@ struct fixed31_32 dc_fixpt_recip(struct fixed31_32 arg) * @note * Good idea to use Newton's method */ - - ASSERT(arg.value); - return dc_fixpt_from_fraction( dc_fixpt_one.value, arg.value); diff --git a/drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.c index 52d97918a3bd21..ebf0287417e0eb 100644 --- a/drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.c +++ b/drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.c @@ -29,8 +29,6 @@ static inline unsigned long long spl_complete_integer_division_u64( { unsigned long long result; - SPL_ASSERT(divisor); - result = spl_div64_u64_rem(dividend, divisor, remainder); return result; @@ -196,8 +194,6 @@ struct spl_fixed31_32 spl_fixpt_recip(struct spl_fixed31_32 arg) * Good idea to use Newton's method */ - SPL_ASSERT(arg.value); - return spl_fixpt_from_fraction( spl_fixpt_one.value, arg.value); From 894eafea66c32a0dff717793b2537387020e213b Mon Sep 17 00:00:00 2001 From: Huang Yiwei Date: Wed, 7 May 2025 12:57:57 +0800 Subject: [PATCH 2198/2924] firmware: SDEI: Allow sdei initialization without ACPI_APEI_GHES [ Upstream commit 59529bbe642de4eb2191a541d9b4bae7eb73862e ] SDEI usually initialize with the ACPI table, but on platforms where ACPI is not used, the SDEI feature can still be used to handle specific firmware calls or other customized purposes. Therefore, it is not necessary for ARM_SDE_INTERFACE to depend on ACPI_APEI_GHES. In commit dc4e8c07e9e2 ("ACPI: APEI: explicit init of HEST and GHES in acpi_init()"), to make APEI ready earlier, sdei_init was moved into acpi_ghes_init instead of being a standalone initcall, adding ACPI_APEI_GHES dependency to ARM_SDE_INTERFACE. This restricts the flexibility and usability of SDEI. This patch corrects the dependency in Kconfig and splits sdei_init() into two separate functions: sdei_init() and acpi_sdei_init(). sdei_init() will be called by arch_initcall and will only initialize the platform driver, while acpi_sdei_init() will initialize the device from acpi_ghes_init() when ACPI is ready. This allows the initialization of SDEI without ACPI_APEI_GHES enabled. Fixes: dc4e8c07e9e2 ("ACPI: APEI: explicit init of HEST and GHES in apci_init()") Cc: Shuai Xue Signed-off-by: Huang Yiwei Reviewed-by: Shuai Xue Reviewed-by: Gavin Shan Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20250507045757.2658795-1-quic_hyiwei@quicinc.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/acpi/apei/Kconfig | 1 + drivers/acpi/apei/ghes.c | 2 +- drivers/firmware/Kconfig | 1 - drivers/firmware/arm_sdei.c | 11 ++++++++--- include/linux/arm_sdei.h | 4 ++-- 5 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index 3cfe7e7475f2fd..070c07d68dfb2f 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -23,6 +23,7 @@ config ACPI_APEI_GHES select ACPI_HED select IRQ_WORK select GENERIC_ALLOCATOR + select ARM_SDE_INTERFACE if ARM64 help Generic Hardware Error Source provides a way to report platform hardware errors (such as that from chipset). It diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 289e365f84b249..0f3c663c1b0a33 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -1715,7 +1715,7 @@ void __init acpi_ghes_init(void) { int rc; - sdei_init(); + acpi_sdei_init(); if (acpi_disabled) return; diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index aadc395ee16813..7df19d82aa689e 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -31,7 +31,6 @@ config ARM_SCPI_PROTOCOL config ARM_SDE_INTERFACE bool "ARM Software Delegated Exception Interface (SDEI)" depends on ARM64 - depends on ACPI_APEI_GHES help The Software Delegated Exception Interface (SDEI) is an ARM standard for registering callbacks from the platform firmware diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index 3e8051fe829657..71e2a9a89f6ada 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -1062,13 +1062,12 @@ static bool __init sdei_present_acpi(void) return true; } -void __init sdei_init(void) +void __init acpi_sdei_init(void) { struct platform_device *pdev; int ret; - ret = platform_driver_register(&sdei_driver); - if (ret || !sdei_present_acpi()) + if (!sdei_present_acpi()) return; pdev = platform_device_register_simple(sdei_driver.driver.name, @@ -1081,6 +1080,12 @@ void __init sdei_init(void) } } +static int __init sdei_init(void) +{ + return platform_driver_register(&sdei_driver); +} +arch_initcall(sdei_init); + int sdei_event_handler(struct pt_regs *regs, struct sdei_registered_event *arg) { diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h index 255701e1251b4a..f652a5028b5907 100644 --- a/include/linux/arm_sdei.h +++ b/include/linux/arm_sdei.h @@ -46,12 +46,12 @@ int sdei_unregister_ghes(struct ghes *ghes); /* For use by arch code when CPU hotplug notifiers are not appropriate. */ int sdei_mask_local_cpu(void); int sdei_unmask_local_cpu(void); -void __init sdei_init(void); +void __init acpi_sdei_init(void); void sdei_handler_abort(void); #else static inline int sdei_mask_local_cpu(void) { return 0; } static inline int sdei_unmask_local_cpu(void) { return 0; } -static inline void sdei_init(void) { } +static inline void acpi_sdei_init(void) { } static inline void sdei_handler_abort(void) { } #endif /* CONFIG_ARM_SDE_INTERFACE */ From 41a3ecfb0526416189cc6208b2b6525556af7118 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 May 2025 14:26:41 +0100 Subject: [PATCH 2199/2924] kselftest/arm64: fp-ptrace: Fix expected FPMR value when PSTATE.SM is changed [ Upstream commit 78b23877dbba7dbcda2b89383d17bed82ce8f663 ] The fp-ptrace test suite expects that FPMR is set to zero when PSTATE.SM is changed via ptrace, but ptrace has never altered FPMR in this way, and the test logic erroneously relies upon (and has concealed) a bug where task_fpsimd_load() would unexpectedly and non-deterministically clobber FPMR. Using ptrace, FPMR can only be altered by writing to the NT_ARM_FPMR regset. The value of PSTATE.SM can be altered by writing to the NT_ARM_SVE or NT_ARM_SSVE regsets, and/or by changing the SME vector length (when writing to the NT_ARM_SVE, NT_ARM_SSVE, or NT_ARM_ZA regsets), but none of these writes will change the value of FPMR. The task_fpsimd_load() bug was introduced with the initial FPMR support in commit: 203f2b95a882 ("arm64/fpsimd: Support FEAT_FPMR") The incorrect FPMR test code was introduced in commit: 7dbd26d0b22d ("kselftest/arm64: Add FPMR coverage to fp-ptrace") Subsequently, the task_fpsimd_load() bug was fixed in commit: e5fa85fce08b ("arm64/fpsimd: Don't corrupt FPMR when streaming mode changes") ... whereupon the fp-ptrace FPMR tests started failing reliably, e.g. | # # Mismatch in saved FPMR: 915058000 != 0 | # not ok 25 SVE write, SVE 64->64, SME 64/0->64/1 Fix this by changing the test to expect that FPMR is *NOT* changed when PSTATE.SM is changed via ptrace, matching the extant behaviour. I've chosen to update the test code rather than modifying ptrace to zero FPMR when PSTATE.SM changes. Not zeroing FPMR is simpler overall, and allows the NT_ARM_FPMR regset to be handled independently from other regsets, leaving less scope for error. Fixes: 7dbd26d0b22d ("kselftest/arm64: Add FPMR coverage to fp-ptrace") Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: David Spickett Cc: Luis Machado Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20250508132644.1395904-22-mark.rutland@arm.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- tools/testing/selftests/arm64/fp/fp-ptrace.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c index 4930e03a7b9903..762048eb354ffe 100644 --- a/tools/testing/selftests/arm64/fp/fp-ptrace.c +++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c @@ -891,18 +891,11 @@ static void set_initial_values(struct test_config *config) { int vq = __sve_vq_from_vl(vl_in(config)); int sme_vq = __sve_vq_from_vl(config->sme_vl_in); - bool sm_change; svcr_in = config->svcr_in; svcr_expected = config->svcr_expected; svcr_out = 0; - if (sme_supported() && - (svcr_in & SVCR_SM) != (svcr_expected & SVCR_SM)) - sm_change = true; - else - sm_change = false; - fill_random(&v_in, sizeof(v_in)); memcpy(v_expected, v_in, sizeof(v_in)); memset(v_out, 0, sizeof(v_out)); @@ -953,12 +946,7 @@ static void set_initial_values(struct test_config *config) if (fpmr_supported()) { fill_random(&fpmr_in, sizeof(fpmr_in)); fpmr_in &= FPMR_SAFE_BITS; - - /* Entering or exiting streaming mode clears FPMR */ - if (sm_change) - fpmr_expected = 0; - else - fpmr_expected = fpmr_in; + fpmr_expected = fpmr_in; } else { fpmr_in = 0; fpmr_expected = 0; From 1f523638b7ac3f03ea172bb14f702ab472b2062c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 1 May 2025 18:44:43 -0600 Subject: [PATCH 2200/2924] overflow: Fix direct struct member initialization in _DEFINE_FLEX() [ Upstream commit 47e36ed7840661a9f7fb53554a1b04a5f8daffea ] Currently, to statically initialize the struct members of the `type` object created by _DEFINE_FLEX(), the internal `obj` member must be explicitly referenced at the call site. See: struct flex { int a; int b; struct foo flex_array[]; }; _DEFINE_FLEX(struct flex, instance, flex_array, FIXED_SIZE, = { .obj = { .a = 0, .b = 1, }, }); This leaks _DEFINE_FLEX() internal implementation details and make the helper harder to use and read. Fix this and allow for a more natural and intuitive C99 init-style: _DEFINE_FLEX(struct flex, instance, flex_array, FIXED_SIZE, = { .a = 0, .b = 1, }); Note that before these changes, the `initializer` argument was optional, but now it's required. Also, update "counter" member initialization in DEFINE_FLEX(). Fixes: 26dd68d293fd ("overflow: add DEFINE_FLEX() for on-stack allocs") Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/aBQVeyKfLOkO9Yss@kspp Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- include/linux/overflow.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 0c7e3dcfe8670c..823a53cd9a1935 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -396,7 +396,7 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) * @name: Name for a variable to define. * @member: Name of the array member. * @count: Number of elements in the array; must be compile-time const. - * @initializer: initializer expression (could be empty for no init). + * @initializer: Initializer expression (e.g., pass `= { }` at minimum). */ #define _DEFINE_FLEX(type, name, member, count, initializer...) \ _Static_assert(__builtin_constant_p(count), \ @@ -404,7 +404,7 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) union { \ u8 bytes[struct_size_t(type, member, count)]; \ type obj; \ - } name##_u initializer; \ + } name##_u = { .obj initializer }; \ type *name = (type *)&name##_u /** @@ -438,6 +438,6 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) * Use __struct_size(@NAME) to get compile-time size of it afterwards. */ #define DEFINE_FLEX(TYPE, NAME, MEMBER, COUNTER, COUNT) \ - _DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .obj.COUNTER = COUNT, }) + _DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .COUNTER = COUNT, }) #endif /* __LINUX_OVERFLOW_H */ From 36d7e404fbbd934b25e4b21afb3c9198a750cd92 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 2 May 2025 15:41:57 -0700 Subject: [PATCH 2201/2924] scsi: qedf: Use designated initializer for struct qed_fcoe_cb_ops [ Upstream commit d8720235d5b5cad86c1f07f65117ef2a96f8bec7 ] Recent fixes to the randstruct GCC plugin allowed it to notice that this structure is entirely function pointers and is therefore subject to randomization, but doing so requires that it always use designated initializers. Explicitly specify the "common" member as being initialized. Silences: drivers/scsi/qedf/qedf_main.c:702:9: error: positional initialization of field in 'struct' declared with 'designated_init' attribute [-Werror=designated-init] 702 | { | ^ Fixes: 035f7f87b729 ("randstruct: Enable Clang support") Link: https://lore.kernel.org/r/20250502224156.work.617-kees@kernel.org Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- drivers/scsi/qedf/qedf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index 436bd29d5ebae6..6b1ebab36fa35b 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -699,7 +699,7 @@ static u32 qedf_get_login_failures(void *cookie) } static struct qed_fcoe_cb_ops qedf_cb_ops = { - { + .common = { .link_update = qedf_link_update, .bw_update = qedf_bw_update, .schedule_recovery_handler = qedf_schedule_recovery_handler, From faed83b30d5f15d8fa02fd674586eb11d88b9369 Mon Sep 17 00:00:00 2001 From: Nicolas Dufresne Date: Thu, 10 Apr 2025 16:43:46 -0400 Subject: [PATCH 2202/2924] media: synopsys: hdmirx: Renamed frame_idx to sequence [ Upstream commit 0400bee67f49753b878c2576c02c1bc454f091ed ] This variable is used to fill the v4l2_buffer.sequence, let's name it the exact same way to reduce confusion. No functional changes. Fixes: 7b59b132ad439 ("media: platform: synopsys: Add support for HDMI input driver") Signed-off-by: Nicolas Dufresne Reviewed-by: Dmitry Osipenko Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/platform/synopsys/hdmirx/snps_hdmirx.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/media/platform/synopsys/hdmirx/snps_hdmirx.c b/drivers/media/platform/synopsys/hdmirx/snps_hdmirx.c index 3d2913de9a86c6..f5b3f5010ede55 100644 --- a/drivers/media/platform/synopsys/hdmirx/snps_hdmirx.c +++ b/drivers/media/platform/synopsys/hdmirx/snps_hdmirx.c @@ -114,7 +114,7 @@ struct hdmirx_stream { spinlock_t vbq_lock; /* to lock video buffer queue */ bool stopping; wait_queue_head_t wq_stopped; - u32 frame_idx; + u32 sequence; u32 line_flag_int_cnt; u32 irq_stat; }; @@ -1540,7 +1540,7 @@ static int hdmirx_start_streaming(struct vb2_queue *queue, unsigned int count) int line_flag; mutex_lock(&hdmirx_dev->stream_lock); - stream->frame_idx = 0; + stream->sequence = 0; stream->line_flag_int_cnt = 0; stream->curr_buf = NULL; stream->next_buf = NULL; @@ -1948,7 +1948,7 @@ static void dma_idle_int_handler(struct snps_hdmirx_dev *hdmirx_dev, if (vb_done) { vb_done->vb2_buf.timestamp = ktime_get_ns(); - vb_done->sequence = stream->frame_idx; + vb_done->sequence = stream->sequence; if (bt->interlaced) vb_done->field = V4L2_FIELD_INTERLACED_TB; @@ -1956,8 +1956,8 @@ static void dma_idle_int_handler(struct snps_hdmirx_dev *hdmirx_dev, vb_done->field = V4L2_FIELD_NONE; hdmirx_vb_done(stream, vb_done); - stream->frame_idx++; - if (stream->frame_idx == 30) + stream->sequence++; + if (stream->sequence == 30) v4l2_dbg(1, debug, v4l2_dev, "rcv frames\n"); } From 56c938959f14143979ca7987ee21e56c90c3350f Mon Sep 17 00:00:00 2001 From: Nicolas Dufresne Date: Thu, 10 Apr 2025 16:43:47 -0400 Subject: [PATCH 2203/2924] media: synopsys: hdmirx: Count dropped frames [ Upstream commit 57c8d79adf05244b171964d1d6c7e6fabbe5f5fd ] The sequence number communicate the lost frames to userspace. For this reason, it must be incremented even when a frame is skipped. This allows userspace such as GStreamer to report the loss. Fixes: 7b59b132ad439 ("media: platform: synopsys: Add support for HDMI input driver") Signed-off-by: Nicolas Dufresne Reviewed-by: Dmitry Osipenko Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/platform/synopsys/hdmirx/snps_hdmirx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/media/platform/synopsys/hdmirx/snps_hdmirx.c b/drivers/media/platform/synopsys/hdmirx/snps_hdmirx.c index f5b3f5010ede55..7af6765532e332 100644 --- a/drivers/media/platform/synopsys/hdmirx/snps_hdmirx.c +++ b/drivers/media/platform/synopsys/hdmirx/snps_hdmirx.c @@ -1956,10 +1956,6 @@ static void dma_idle_int_handler(struct snps_hdmirx_dev *hdmirx_dev, vb_done->field = V4L2_FIELD_NONE; hdmirx_vb_done(stream, vb_done); - stream->sequence++; - if (stream->sequence == 30) - v4l2_dbg(1, debug, v4l2_dev, - "rcv frames\n"); } stream->curr_buf = NULL; @@ -1971,6 +1967,10 @@ static void dma_idle_int_handler(struct snps_hdmirx_dev *hdmirx_dev, v4l2_dbg(3, debug, v4l2_dev, "%s: next_buf NULL, skip vb_done\n", __func__); } + + stream->sequence++; + if (stream->sequence == 30) + v4l2_dbg(1, debug, v4l2_dev, "rcv frames\n"); } DMA_IDLE_OUT: From 6f5f53048d3b761d694430632d3a03977273e987 Mon Sep 17 00:00:00 2001 From: Anand Moon Date: Mon, 7 Apr 2025 12:02:03 +0530 Subject: [PATCH 2204/2924] perf/amlogic: Replace smp_processor_id() with raw_smp_processor_id() in meson_ddr_pmu_create() [ Upstream commit 097469a2b0f12b91b4f27b9e9e4f2c46484cde30 ] The Amlogic DDR PMU driver meson_ddr_pmu_create() function incorrectly uses smp_processor_id(), which assumes disabled preemption. This leads to kernel warnings during module loading because meson_ddr_pmu_create() can be called in a preemptible context. Following kernel warning and stack trace: [ 31.745138] [ T2289] BUG: using smp_processor_id() in preemptible [00000000] code: (udev-worker)/2289 [ 31.745154] [ T2289] caller is debug_smp_processor_id+0x28/0x38 [ 31.745172] [ T2289] CPU: 4 UID: 0 PID: 2289 Comm: (udev-worker) Tainted: GW 6.14.0-0-MANJARO-ARM #1 59519addcbca6ba8de735e151fd7b9e97aac7ff0 [ 31.745181] [ T2289] Tainted: [W]=WARN [ 31.745183] [ T2289] Hardware name: Hardkernel ODROID-N2Plus (DT) [ 31.745188] [ T2289] Call trace: [ 31.745191] [ T2289] show_stack+0x28/0x40 (C) [ 31.745199] [ T2289] dump_stack_lvl+0x4c/0x198 [ 31.745205] [ T2289] dump_stack+0x20/0x50 [ 31.745209] [ T2289] check_preemption_disabled+0xec/0xf0 [ 31.745213] [ T2289] debug_smp_processor_id+0x28/0x38 [ 31.745216] [ T2289] meson_ddr_pmu_create+0x200/0x560 [meson_ddr_pmu_g12 8095101c49676ad138d9961e3eddaee10acca7bd] [ 31.745237] [ T2289] g12_ddr_pmu_probe+0x20/0x38 [meson_ddr_pmu_g12 8095101c49676ad138d9961e3eddaee10acca7bd] [ 31.745246] [ T2289] platform_probe+0x98/0xe0 [ 31.745254] [ T2289] really_probe+0x144/0x3f8 [ 31.745258] [ T2289] __driver_probe_device+0xb8/0x180 [ 31.745261] [ T2289] driver_probe_device+0x54/0x268 [ 31.745264] [ T2289] __driver_attach+0x11c/0x288 [ 31.745267] [ T2289] bus_for_each_dev+0xfc/0x160 [ 31.745274] [ T2289] driver_attach+0x34/0x50 [ 31.745277] [ T2289] bus_add_driver+0x160/0x2b0 [ 31.745281] [ T2289] driver_register+0x78/0x120 [ 31.745285] [ T2289] __platform_driver_register+0x30/0x48 [ 31.745288] [ T2289] init_module+0x30/0xfe0 [meson_ddr_pmu_g12 8095101c49676ad138d9961e3eddaee10acca7bd] [ 31.745298] [ T2289] do_one_initcall+0x11c/0x438 [ 31.745303] [ T2289] do_init_module+0x68/0x228 [ 31.745311] [ T2289] load_module+0x118c/0x13a8 [ 31.745315] [ T2289] __arm64_sys_finit_module+0x274/0x390 [ 31.745320] [ T2289] invoke_syscall+0x74/0x108 [ 31.745326] [ T2289] el0_svc_common+0x90/0xf8 [ 31.745330] [ T2289] do_el0_svc+0x2c/0x48 [ 31.745333] [ T2289] el0_svc+0x60/0x150 [ 31.745337] [ T2289] el0t_64_sync_handler+0x80/0x118 [ 31.745341] [ T2289] el0t_64_sync+0x1b8/0x1c0 Changes replaces smp_processor_id() with raw_smp_processor_id() to ensure safe CPU ID retrieval in preemptible contexts. Cc: Jiucheng Xu Fixes: 2016e2113d35 ("perf/amlogic: Add support for Amlogic meson G12 SoC DDR PMU driver") Signed-off-by: Anand Moon Link: https://lore.kernel.org/r/20250407063206.5211-1-linux.amoon@gmail.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/perf/amlogic/meson_ddr_pmu_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c index 07446d784a1a64..c1e755c356a333 100644 --- a/drivers/perf/amlogic/meson_ddr_pmu_core.c +++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c @@ -511,7 +511,7 @@ int meson_ddr_pmu_create(struct platform_device *pdev) fmt_attr_fill(pmu->info.hw_info->fmt_attr); - pmu->cpu = smp_processor_id(); + pmu->cpu = raw_smp_processor_id(); name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME); if (!name) From f1d93b5584f50756078007679a5def4d8b44ee0c Mon Sep 17 00:00:00 2001 From: Terry Tritton Date: Fri, 9 May 2025 12:56:22 +0100 Subject: [PATCH 2205/2924] selftests/seccomp: fix negative_ENOSYS tracer tests on arm32 [ Upstream commit 73989c998814d82c71d523c104c398925470d59e ] TRACE_syscall.ptrace.negative_ENOSYS and TRACE_syscall.seccomp.negative_ENOSYS on arm32 are being reported as failures instead of skipping. The teardown_trace_fixture function sets the test to KSFT_FAIL in case of a non 0 return value from the tracer process. Due to _metadata now being shared between the forked processes the tracer is returning the KSFT_SKIP value set by the tracee which is non 0. Remove the setting of the _metadata.exit_code in teardown_trace_fixture. Fixes: 24cf65a62266 ("selftests/harness: Share _metadata between forked processes") Signed-off-by: Terry Tritton Link: https://lore.kernel.org/r/20250509115622.64775-1-terry.tritton@linaro.org Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- tools/testing/selftests/seccomp/seccomp_bpf.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 53bf6a9c801f8f..61acbd45ffaaf8 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1629,14 +1629,8 @@ void teardown_trace_fixture(struct __test_metadata *_metadata, { if (tracer) { int status; - /* - * Extract the exit code from the other process and - * adopt it for ourselves in case its asserts failed. - */ ASSERT_EQ(0, kill(tracer, SIGUSR1)); ASSERT_EQ(tracer, waitpid(tracer, &status, 0)); - if (WEXITSTATUS(status)) - _metadata->exit_code = KSFT_FAIL; } } From 294167f862fc89899805a33e06ba5682f0c6eebb Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 5 May 2025 13:13:40 +0200 Subject: [PATCH 2206/2924] drm/msm/a6xx: Disable rgb565_predicator on Adreno 7c3 [ Upstream commit 5a9c1bea011fb42088ba08ceaa252fb20e695626 ] This feature is supposed to be enabled with UBWC v4 or later. Implementations of this SKU feature an effective UBWC version of 3, so disable it, in line with the BSP kernel. Reported-by: Dmitry Baryshkov Reviewed-by: Akhil P Oommen Fixes: 192f4ee3e408 ("drm/msm/a6xx: Add support for Adreno 7c Gen 3 gpu") Signed-off-by: Konrad Dybcio Patchwork: https://patchwork.freedesktop.org/patch/651759/ Signed-off-by: Rob Clark Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 242d02d48c0cd0..90991ba5a4ae10 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -655,7 +655,6 @@ static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu) if (adreno_is_7c3(gpu)) { gpu->ubwc_config.highest_bank_bit = 14; gpu->ubwc_config.amsbc = 1; - gpu->ubwc_config.rgb565_predicator = 1; gpu->ubwc_config.uavflagprd_inv = 2; gpu->ubwc_config.macrotile_mode = 1; } From d657bc51fe4507c60a16835a579c1fd66b1c5ec7 Mon Sep 17 00:00:00 2001 From: Aleksandrs Vinarskis Date: Thu, 8 May 2025 00:58:59 +0200 Subject: [PATCH 2207/2924] drm/msm/dp: Fix support of LTTPR initialization [ Upstream commit 9351d3d302060d114de2f0c648579c0aadbd8f72 ] Initialize LTTPR before msm_dp_panel_read_sink_caps, as DPTX shall (re)read DPRX caps after LTTPR detection, as required by DP 2.1a, Section 3.6.7.6.1. Fixes: 72d0af4accd9 ("drm/msm/dp: Add support for LTTPR handling") Reviewed-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Signed-off-by: Aleksandrs Vinarskis Tested-by: Jessica Zhang # SA8775P Tested-by: Johan Hovold Tested-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/652301/ Link: https://lore.kernel.org/r/20250507230113.14270-2-alex.vinarskis@gmail.com Signed-off-by: Dmitry Baryshkov Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/dp/dp_display.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index bbc47d86ae9e67..fc07cce68382a0 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -367,12 +367,12 @@ static int msm_dp_display_send_hpd_notification(struct msm_dp_display_private *d return 0; } -static void msm_dp_display_lttpr_init(struct msm_dp_display_private *dp) +static void msm_dp_display_lttpr_init(struct msm_dp_display_private *dp, u8 *dpcd) { u8 lttpr_caps[DP_LTTPR_COMMON_CAP_SIZE]; int rc; - if (drm_dp_read_lttpr_common_caps(dp->aux, dp->panel->dpcd, lttpr_caps)) + if (drm_dp_read_lttpr_common_caps(dp->aux, dpcd, lttpr_caps)) return; rc = drm_dp_lttpr_init(dp->aux, drm_dp_lttpr_count(lttpr_caps)); @@ -385,12 +385,17 @@ static int msm_dp_display_process_hpd_high(struct msm_dp_display_private *dp) struct drm_connector *connector = dp->msm_dp_display.connector; const struct drm_display_info *info = &connector->display_info; int rc = 0; + u8 dpcd[DP_RECEIVER_CAP_SIZE]; - rc = msm_dp_panel_read_sink_caps(dp->panel, connector); + rc = drm_dp_read_dpcd_caps(dp->aux, dpcd); if (rc) goto end; - msm_dp_display_lttpr_init(dp); + msm_dp_display_lttpr_init(dp, dpcd); + + rc = msm_dp_panel_read_sink_caps(dp->panel, connector); + if (rc) + goto end; msm_dp_link_process_request(dp->link); From f9b8d26214c1997d40b4eb4a0509770a0e0f9649 Mon Sep 17 00:00:00 2001 From: Aleksandrs Vinarskis Date: Thu, 8 May 2025 00:59:00 +0200 Subject: [PATCH 2208/2924] drm/msm/dp: Account for LTTPRs capabilities [ Upstream commit c156fe2dd46774321c8eaaff9a6f04b64e6b9742 ] Take into account LTTPR capabilities when selecting maximum allowed link rate, number of data lines. Fixes: 72d0af4accd9 ("drm/msm/dp: Add support for LTTPR handling") Reviewed-by: Abel Vesa Reviewed-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Signed-off-by: Aleksandrs Vinarskis Tested-by: Jessica Zhang # SA8775P Tested-by: Johan Hovold Tested-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/652302/ Link: https://lore.kernel.org/r/20250507230113.14270-3-alex.vinarskis@gmail.com Signed-off-by: Dmitry Baryshkov Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/dp/dp_display.c | 5 ++--- drivers/gpu/drm/msm/dp/dp_link.h | 3 +++ drivers/gpu/drm/msm/dp/dp_panel.c | 12 +++++++++++- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index fc07cce68382a0..5c57c1d7ac601a 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -369,13 +369,12 @@ static int msm_dp_display_send_hpd_notification(struct msm_dp_display_private *d static void msm_dp_display_lttpr_init(struct msm_dp_display_private *dp, u8 *dpcd) { - u8 lttpr_caps[DP_LTTPR_COMMON_CAP_SIZE]; int rc; - if (drm_dp_read_lttpr_common_caps(dp->aux, dpcd, lttpr_caps)) + if (drm_dp_read_lttpr_common_caps(dp->aux, dpcd, dp->link->lttpr_common_caps)) return; - rc = drm_dp_lttpr_init(dp->aux, drm_dp_lttpr_count(lttpr_caps)); + rc = drm_dp_lttpr_init(dp->aux, drm_dp_lttpr_count(dp->link->lttpr_common_caps)); if (rc) DRM_ERROR("failed to set LTTPRs transparency mode, rc=%d\n", rc); } diff --git a/drivers/gpu/drm/msm/dp/dp_link.h b/drivers/gpu/drm/msm/dp/dp_link.h index 8db5d5698a97cf..c47d75cfc720c9 100644 --- a/drivers/gpu/drm/msm/dp/dp_link.h +++ b/drivers/gpu/drm/msm/dp/dp_link.h @@ -7,6 +7,7 @@ #define _DP_LINK_H_ #include "dp_aux.h" +#include #define DS_PORT_STATUS_CHANGED 0x200 #define DP_TEST_BIT_DEPTH_UNKNOWN 0xFFFFFFFF @@ -60,6 +61,8 @@ struct msm_dp_link_phy_params { }; struct msm_dp_link { + u8 lttpr_common_caps[DP_LTTPR_COMMON_CAP_SIZE]; + u32 sink_request; u32 test_response; diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c index 92415bf8aa1665..4e8ab75c771b1e 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.c +++ b/drivers/gpu/drm/msm/dp/dp_panel.c @@ -47,7 +47,7 @@ static void msm_dp_panel_read_psr_cap(struct msm_dp_panel_private *panel) static int msm_dp_panel_read_dpcd(struct msm_dp_panel *msm_dp_panel) { - int rc; + int rc, max_lttpr_lanes, max_lttpr_rate; struct msm_dp_panel_private *panel; struct msm_dp_link_info *link_info; u8 *dpcd, major, minor; @@ -75,6 +75,16 @@ static int msm_dp_panel_read_dpcd(struct msm_dp_panel *msm_dp_panel) if (link_info->rate > msm_dp_panel->max_dp_link_rate) link_info->rate = msm_dp_panel->max_dp_link_rate; + /* Limit data lanes from LTTPR capabilities, if any */ + max_lttpr_lanes = drm_dp_lttpr_max_lane_count(panel->link->lttpr_common_caps); + if (max_lttpr_lanes && max_lttpr_lanes < link_info->num_lanes) + link_info->num_lanes = max_lttpr_lanes; + + /* Limit link rate from LTTPR capabilities, if any */ + max_lttpr_rate = drm_dp_lttpr_max_link_rate(panel->link->lttpr_common_caps); + if (max_lttpr_rate && max_lttpr_rate < link_info->rate) + link_info->rate = max_lttpr_rate; + drm_dbg_dp(panel->drm_dev, "version: %d.%d\n", major, minor); drm_dbg_dp(panel->drm_dev, "link_rate=%d\n", link_info->rate); drm_dbg_dp(panel->drm_dev, "lane_count=%d\n", link_info->num_lanes); From 26c730d17564686329cbbd4caed6cfbe8a9f0156 Mon Sep 17 00:00:00 2001 From: Aleksandrs Vinarskis Date: Thu, 8 May 2025 00:59:01 +0200 Subject: [PATCH 2209/2924] drm/msm/dp: Prepare for link training per-segment for LTTPRs [ Upstream commit 7513ccb8840b54e35579f3c2cce08c3443a6e2d4 ] Per-segment link training requires knowing the number of LTTPRs (if any) present. Store the count during LTTPRs' initialization. Fixes: 72d0af4accd9 ("drm/msm/dp: Add support for LTTPR handling") Reviewed-by: Abel Vesa Reviewed-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Signed-off-by: Aleksandrs Vinarskis Tested-by: Jessica Zhang # SA8775P Tested-by: Johan Hovold Tested-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/652306/ Link: https://lore.kernel.org/r/20250507230113.14270-4-alex.vinarskis@gmail.com Signed-off-by: Dmitry Baryshkov Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/dp/dp_display.c | 17 +++++++++++------ drivers/gpu/drm/msm/dp/dp_link.h | 1 + 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 5c57c1d7ac601a..ab8c1f19dcb42d 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -367,16 +367,21 @@ static int msm_dp_display_send_hpd_notification(struct msm_dp_display_private *d return 0; } -static void msm_dp_display_lttpr_init(struct msm_dp_display_private *dp, u8 *dpcd) +static int msm_dp_display_lttpr_init(struct msm_dp_display_private *dp, u8 *dpcd) { - int rc; + int rc, lttpr_count; if (drm_dp_read_lttpr_common_caps(dp->aux, dpcd, dp->link->lttpr_common_caps)) - return; + return 0; - rc = drm_dp_lttpr_init(dp->aux, drm_dp_lttpr_count(dp->link->lttpr_common_caps)); - if (rc) + lttpr_count = drm_dp_lttpr_count(dp->link->lttpr_common_caps); + rc = drm_dp_lttpr_init(dp->aux, lttpr_count); + if (rc) { DRM_ERROR("failed to set LTTPRs transparency mode, rc=%d\n", rc); + return 0; + } + + return lttpr_count; } static int msm_dp_display_process_hpd_high(struct msm_dp_display_private *dp) @@ -390,7 +395,7 @@ static int msm_dp_display_process_hpd_high(struct msm_dp_display_private *dp) if (rc) goto end; - msm_dp_display_lttpr_init(dp, dpcd); + dp->link->lttpr_count = msm_dp_display_lttpr_init(dp, dpcd); rc = msm_dp_panel_read_sink_caps(dp->panel, connector); if (rc) diff --git a/drivers/gpu/drm/msm/dp/dp_link.h b/drivers/gpu/drm/msm/dp/dp_link.h index c47d75cfc720c9..ba47c6d19fbfac 100644 --- a/drivers/gpu/drm/msm/dp/dp_link.h +++ b/drivers/gpu/drm/msm/dp/dp_link.h @@ -62,6 +62,7 @@ struct msm_dp_link_phy_params { struct msm_dp_link { u8 lttpr_common_caps[DP_LTTPR_COMMON_CAP_SIZE]; + int lttpr_count; u32 sink_request; u32 test_response; From 4881c5fc4ac2a76479ebe1f5625d3253e5de6841 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 9 May 2025 21:03:28 +0300 Subject: [PATCH 2210/2924] drm/i915/dp_mst: Use the correct connector while computing the link BPP limit on MST [ Upstream commit a92e390e0d438e021de0e52065121484b6cca675 ] Atm, on an MST link in DSC mode intel_dp_compute_config_link_bpp_limits() calculates the maximum link bpp limit using the MST root connector's DSC capabilities. That's not correct in general: the decompression could be performed by a branch device downstream of the root branch device or the sink itself. Fix the above by passing to intel_dp_compute_config_link_bpp_limits() the actual connector being modeset, containing the correct DSC capabilities. Cc: Ankit Nautiyal Fixes: 1c5b72daff46 ("drm/i915/dp: Set the DSC link limits in intel_dp_compute_config_link_bpp_limits") Reviewed-by: Ankit Nautiyal Reviewed-by: Luca Coelho Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20250509180340.554867-2-imre.deak@intel.com (cherry picked from commit 266e2fcfe2ea0d062ea392cd22f6250ae0d11c04) Signed-off-by: Joonas Lahtinen Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/display/intel_dp.c | 7 ++++--- drivers/gpu/drm/i915/display/intel_dp.h | 1 + drivers/gpu/drm/i915/display/intel_dp_mst.c | 5 +++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 392c3653d0d738..cd8f728d5fddc4 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2523,6 +2523,7 @@ intel_dp_dsc_compute_pipe_bpp_limits(struct intel_dp *intel_dp, bool intel_dp_compute_config_limits(struct intel_dp *intel_dp, + struct intel_connector *connector, struct intel_crtc_state *crtc_state, bool respect_downstream_limits, bool dsc, @@ -2576,7 +2577,7 @@ intel_dp_compute_config_limits(struct intel_dp *intel_dp, intel_dp_test_compute_config(intel_dp, crtc_state, limits); return intel_dp_compute_config_link_bpp_limits(intel_dp, - intel_dp->attached_connector, + connector, crtc_state, dsc, limits); @@ -2637,7 +2638,7 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, joiner_needs_dsc = intel_dp_joiner_needs_dsc(display, num_joined_pipes); dsc_needed = joiner_needs_dsc || intel_dp->force_dsc_en || - !intel_dp_compute_config_limits(intel_dp, pipe_config, + !intel_dp_compute_config_limits(intel_dp, connector, pipe_config, respect_downstream_limits, false, &limits); @@ -2671,7 +2672,7 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, str_yes_no(ret), str_yes_no(joiner_needs_dsc), str_yes_no(intel_dp->force_dsc_en)); - if (!intel_dp_compute_config_limits(intel_dp, pipe_config, + if (!intel_dp_compute_config_limits(intel_dp, connector, pipe_config, respect_downstream_limits, true, &limits)) diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index 9189db4c25946a..98f90955fdb1db 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -194,6 +194,7 @@ void intel_dp_wait_source_oui(struct intel_dp *intel_dp); int intel_dp_output_bpp(enum intel_output_format output_format, int bpp); bool intel_dp_compute_config_limits(struct intel_dp *intel_dp, + struct intel_connector *connector, struct intel_crtc_state *crtc_state, bool respect_downstream_limits, bool dsc, diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 6dc2d31ccb5a53..fe685f098ba9a2 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -590,12 +590,13 @@ adjust_limits_for_dsc_hblank_expansion_quirk(struct intel_dp *intel_dp, static bool mst_stream_compute_config_limits(struct intel_dp *intel_dp, - const struct intel_connector *connector, + struct intel_connector *connector, struct intel_crtc_state *crtc_state, bool dsc, struct link_config_limits *limits) { - if (!intel_dp_compute_config_limits(intel_dp, crtc_state, false, dsc, + if (!intel_dp_compute_config_limits(intel_dp, connector, + crtc_state, false, dsc, limits)) return false; From fa04e6f37ac9ceb6a018fbd719d596a8b3f74b94 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Thu, 3 Apr 2025 12:47:37 +0200 Subject: [PATCH 2211/2924] drm/mediatek: mtk_drm_drv: Fix kobject put for mtk_mutex device ptr [ Upstream commit 22918591fb747a6d16801e74a170cf98e886f83b ] This driver is taking a kobject for mtk_mutex only once per mmsys device for each drm-mediatek driver instance, differently from the behavior with other components, but it is decrementing the kobj's refcount in a loop and once per mmsys: this is not right and will result in a refcount_t underflow warning when mediatek-drm returns multiple probe deferrals in one boot (or when manually bound and unbound). Besides that, the refcount for mutex_dev was not decremented for error cases in mtk_drm_bind(), causing another refcount_t warning but this time for overflow, when the failure happens not during driver bind but during component bind. In order to fix one of the reasons why this is happening, remove the put_device(xx->mutex_dev) loop from the mtk_drm_kms_init()'s put_mutex_dev label (and drop the label) and add a single call to correctly free the single incremented refcount of mutex_dev to the mtk_drm_unbind() function to fix the refcount_t underflow. Moreover, add the same call to the error cases in mtk_drm_bind() to fix the refcount_t overflow. Fixes: 1ef7ed48356c ("drm/mediatek: Modify mediatek-drm for mt8195 multi mmsys support") Reviewed-by: Chen-Yu Tsai Signed-off-by: AngeloGioacchino Del Regno Link: https://patchwork.kernel.org/project/dri-devel/patch/20250403104741.71045-2-angelogioacchino.delregno@collabora.com/ Signed-off-by: Chun-Kuang Hu Signed-off-by: Sasha Levin --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 74158b9d65035b..5994e2a97dc135 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -470,7 +470,7 @@ static int mtk_drm_kms_init(struct drm_device *drm) ret = drmm_mode_config_init(drm); if (ret) - goto put_mutex_dev; + return ret; drm->mode_config.min_width = 64; drm->mode_config.min_height = 64; @@ -489,7 +489,7 @@ static int mtk_drm_kms_init(struct drm_device *drm) drm->dev_private = private->all_drm_private[i]; ret = component_bind_all(private->all_drm_private[i]->dev, drm); if (ret) - goto put_mutex_dev; + return ret; } /* @@ -582,9 +582,6 @@ static int mtk_drm_kms_init(struct drm_device *drm) err_component_unbind: for (i = 0; i < private->data->mmsys_dev_num; i++) component_unbind_all(private->all_drm_private[i]->dev, drm); -put_mutex_dev: - for (i = 0; i < private->data->mmsys_dev_num; i++) - put_device(private->all_drm_private[i]->mutex_dev); return ret; } @@ -655,8 +652,10 @@ static int mtk_drm_bind(struct device *dev) return 0; drm = drm_dev_alloc(&mtk_drm_driver, dev); - if (IS_ERR(drm)) - return PTR_ERR(drm); + if (IS_ERR(drm)) { + ret = PTR_ERR(drm); + goto err_put_dev; + } private->drm_master = true; drm->dev_private = private; @@ -682,6 +681,8 @@ static int mtk_drm_bind(struct device *dev) drm_dev_put(drm); for (i = 0; i < private->data->mmsys_dev_num; i++) private->all_drm_private[i]->drm = NULL; +err_put_dev: + put_device(private->mutex_dev); return ret; } @@ -694,6 +695,8 @@ static void mtk_drm_unbind(struct device *dev) drm_dev_unregister(private->drm); mtk_drm_kms_deinit(private->drm); drm_dev_put(private->drm); + + put_device(private->mutex_dev); } private->mtk_drm_bound = false; private->drm_master = false; From 4f52f695ca1a42cc39edba9d422b326ca642968e Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Thu, 3 Apr 2025 12:47:38 +0200 Subject: [PATCH 2212/2924] drm/mediatek: Fix kobject put for component sub-drivers [ Upstream commit 80805b62ea5b95eda54c225b989f929ca0691ab0 ] In function mtk_drm_get_all_drm_priv(), this driver is incrementing the refcount for the sub-drivers of mediatek-drm with a call to device_find_child() when taking a reference to all of those child devices. When the component bind fails multiple times this results in a refcount_t overflow, as the reference count is never decremented: fix that by adding a call to put_device() for all of the mmsys devices in a loop, in error cases of mtk_drm_bind() and in the mtk_drm_unbind() callback. Fixes: 1ef7ed48356c ("drm/mediatek: Modify mediatek-drm for mt8195 multi mmsys support") Reviewed-by: Chen-Yu Tsai Signed-off-by: AngeloGioacchino Del Regno Link: https://patchwork.kernel.org/project/dri-devel/patch/20250403104741.71045-3-angelogioacchino.delregno@collabora.com/ Signed-off-by: Chun-Kuang Hu Signed-off-by: Sasha Levin --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 5994e2a97dc135..593193555c07ef 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -682,6 +682,10 @@ static int mtk_drm_bind(struct device *dev) for (i = 0; i < private->data->mmsys_dev_num; i++) private->all_drm_private[i]->drm = NULL; err_put_dev: + for (i = 0; i < private->data->mmsys_dev_num; i++) { + /* For device_find_child in mtk_drm_get_all_priv() */ + put_device(private->all_drm_private[i]->dev); + } put_device(private->mutex_dev); return ret; } @@ -689,6 +693,7 @@ static int mtk_drm_bind(struct device *dev) static void mtk_drm_unbind(struct device *dev) { struct mtk_drm_private *private = dev_get_drvdata(dev); + int i; /* for multi mmsys dev, unregister drm dev in mmsys master */ if (private->drm_master) { @@ -696,6 +701,10 @@ static void mtk_drm_unbind(struct device *dev) mtk_drm_kms_deinit(private->drm); drm_dev_put(private->drm); + for (i = 0; i < private->data->mmsys_dev_num; i++) { + /* For device_find_child in mtk_drm_get_all_priv() */ + put_device(private->all_drm_private[i]->dev); + } put_device(private->mutex_dev); } private->mtk_drm_bound = false; From dbc6aa697897f90b3db85526f19b94cca967bb1a Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Thu, 3 Apr 2025 12:47:39 +0200 Subject: [PATCH 2213/2924] drm/mediatek: mtk_drm_drv: Unbind secondary mmsys components on err [ Upstream commit 94c933716567084bfb9e79dcd81eb2b2308e84e1 ] When calling component_bind_all(), if a component that is included in the list fails, all of those that have been successfully bound will be unbound, but this driver has two components lists for two actual devices, as in, each mmsys instance has its own components list. In case mmsys0 (or actually vdosys0) is able to bind all of its components, but the secondary one fails, all of the components of the first are kept bound, while the ones of mmsys1/vdosys1 are correctly cleaned up. This is not right because, in case of a failure, the components are re-bound for all of the mmsys/vdosys instances without caring about the ones that were previously left in a bound state. Fix that by calling component_unbind_all() on all of the previous component masters that succeeded binding all subdevices when any of the other masters errors out. Fixes: 1ef7ed48356c ("drm/mediatek: Modify mediatek-drm for mt8195 multi mmsys support") Reviewed-by: Chen-Yu Tsai Signed-off-by: AngeloGioacchino Del Regno Link: https://patchwork.kernel.org/project/dri-devel/patch/20250403104741.71045-4-angelogioacchino.delregno@collabora.com/ Signed-off-by: Chun-Kuang Hu Signed-off-by: Sasha Levin --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 593193555c07ef..7c0c12dde48859 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -488,8 +488,11 @@ static int mtk_drm_kms_init(struct drm_device *drm) for (i = 0; i < private->data->mmsys_dev_num; i++) { drm->dev_private = private->all_drm_private[i]; ret = component_bind_all(private->all_drm_private[i]->dev, drm); - if (ret) + if (ret) { + while (--i >= 0) + component_unbind_all(private->all_drm_private[i]->dev, drm); return ret; + } } /* From 87b6745c5c85ac4de083ab1756f86d12f31c7d14 Mon Sep 17 00:00:00 2001 From: Detlev Casanova Date: Fri, 25 Apr 2025 15:24:47 -0400 Subject: [PATCH 2214/2924] media: verisilicon: Free post processor buffers on error [ Upstream commit 11beb0fc346e00c412b3bfd19013206f6b655604 ] During initialization, the post processor allocates the same number of buffers as the buf queue. As the init function is called in streamon(), if an allocation fails, streamon will return an error and streamoff() will not be called, keeping all post processor buffers allocated. To avoid that, all post proc buffers are freed in case of an allocation error. Fixes: 26711491a807 ("media: verisilicon: Refactor postprocessor to store more buffers") Signed-off-by: Detlev Casanova Reviewed-by: Nicolas Dufresne Signed-off-by: Nicolas Dufresne Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/platform/verisilicon/hantro_postproc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/media/platform/verisilicon/hantro_postproc.c b/drivers/media/platform/verisilicon/hantro_postproc.c index c435a393e0cb70..9f559a13d409bb 100644 --- a/drivers/media/platform/verisilicon/hantro_postproc.c +++ b/drivers/media/platform/verisilicon/hantro_postproc.c @@ -250,8 +250,10 @@ int hantro_postproc_init(struct hantro_ctx *ctx) for (i = 0; i < num_buffers; i++) { ret = hantro_postproc_alloc(ctx, i); - if (ret) + if (ret) { + hantro_postproc_free(ctx); return ret; + } } return 0; From 9d394de375c37cf86d9e74139d342465876e4145 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 28 Apr 2025 15:36:49 -0400 Subject: [PATCH 2215/2924] svcrdma: Reduce the number of rdma_rw contexts per-QP [ Upstream commit 59243315890578a040a2d50ae9e001a2ef2fcb62 ] There is an upper bound on the number of rdma_rw contexts that can be created per QP. This invisible upper bound is because rdma_create_qp() adds one or more additional SQEs for each ctxt that the ULP requests via qp_attr.cap.max_rdma_ctxs. The QP's actual Send Queue length is on the order of the sum of qp_attr.cap.max_send_wr and a factor times qp_attr.cap.max_rdma_ctxs. The factor can be up to three, depending on whether MR operations are required before RDMA Reads. This limit is not visible to RDMA consumers via dev->attrs. When the limit is surpassed, QP creation fails with -ENOMEM. For example: svcrdma's estimate of the number of rdma_rw contexts it needs is three times the number of pages in RPCSVC_MAXPAGES. When MAXPAGES is about 260, the internally-computed SQ length should be: 64 credits + 10 backlog + 3 * (3 * 260) = 2414 Which is well below the advertised qp_max_wr of 32768. If RPCSVC_MAXPAGES is increased to 4MB, that's 1040 pages: 64 credits + 10 backlog + 3 * (3 * 1040) = 9434 However, QP creation fails. Dynamic printk for mlx5 shows: calc_sq_size:618:(pid 1514): send queue size (9326 * 256 / 64 -> 65536) exceeds limits(32768) Although 9326 is still far below qp_max_wr, QP creation still fails. Because the total SQ length calculation is opaque to RDMA consumers, there doesn't seem to be much that can be done about this except for consumers to try to keep the requested rdma_rw ctxt count low. Fixes: 2da0f610e733 ("svcrdma: Increase the per-transport rw_ctx count") Reviewed-by: NeilBrown Reviewed-by: Christoph Hellwig Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- net/sunrpc/xprtrdma/svc_rdma_transport.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index aca8bdf65d729f..ca6172822b68ae 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -406,12 +406,12 @@ static void svc_rdma_xprt_done(struct rpcrdma_notification *rn) */ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) { + unsigned int ctxts, rq_depth, maxpayload; struct svcxprt_rdma *listen_rdma; struct svcxprt_rdma *newxprt = NULL; struct rdma_conn_param conn_param; struct rpcrdma_connect_private pmsg; struct ib_qp_init_attr qp_attr; - unsigned int ctxts, rq_depth; struct ib_device *dev; int ret = 0; RPC_IFDEBUG(struct sockaddr *sap); @@ -462,12 +462,14 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_max_bc_requests = 2; } - /* Arbitrarily estimate the number of rw_ctxs needed for - * this transport. This is enough rw_ctxs to make forward - * progress even if the client is using one rkey per page - * in each Read chunk. + /* Arbitrary estimate of the needed number of rdma_rw contexts. */ - ctxts = 3 * RPCSVC_MAXPAGES; + maxpayload = min(xprt->xpt_server->sv_max_payload, + RPCSVC_MAXPAYLOAD_RDMA); + ctxts = newxprt->sc_max_requests * 3 * + rdma_rw_mr_factor(dev, newxprt->sc_port_num, + maxpayload >> PAGE_SHIFT); + newxprt->sc_sq_depth = rq_depth + ctxts; if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr) newxprt->sc_sq_depth = dev->attrs.max_qp_wr; From 420d4544f73c1ddbd0abaf4e22b9e380ceffb912 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Wed, 14 May 2025 10:04:26 +0200 Subject: [PATCH 2216/2924] xen/x86: fix initial memory balloon target MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 74287971dbb3fe322bb316afd9e7fb5807e23bee ] When adding extra memory regions as ballooned pages also adjust the balloon target, otherwise when the balloon driver is started it will populate memory to match the target value and consume all the extra memory regions added. This made the usage of the Xen `dom0_mem=,max:` command line parameter for dom0 not work as expected, as the target won't be adjusted and when the balloon is started it will populate memory straight to the 'max:' value. It would equally affect domUs that have memory != maxmem. Kernels built with CONFIG_XEN_UNPOPULATED_ALLOC are not affected, because the extra memory regions are consumed by the unpopulated allocation driver, and then balloon_add_regions() becomes a no-op. Reported-by: John Fixes: 87af633689ce ('x86/xen: fix balloon target initialization for PVH dom0') Signed-off-by: Roger Pau Monné Reviewed-by: Juergen Gross Tested-by: Marek Marczykowski-Górecki Message-ID: <20250514080427.28129-1-roger.pau@citrix.com> Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/xen/balloon.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 8c852807ba1c10..2de37dcd75566f 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -704,15 +704,18 @@ static int __init balloon_add_regions(void) /* * Extra regions are accounted for in the physmap, but need - * decreasing from current_pages to balloon down the initial - * allocation, because they are already accounted for in - * total_pages. + * decreasing from current_pages and target_pages to balloon + * down the initial allocation, because they are already + * accounted for in total_pages. */ - if (extra_pfn_end - start_pfn >= balloon_stats.current_pages) { + pages = extra_pfn_end - start_pfn; + if (pages >= balloon_stats.current_pages || + pages >= balloon_stats.target_pages) { WARN(1, "Extra pages underflow current target"); return -ERANGE; } - balloon_stats.current_pages -= extra_pfn_end - start_pfn; + balloon_stats.current_pages -= pages; + balloon_stats.target_pages -= pages; } return 0; From a2a03a255904746f0914ff1d46a2a2a06a6c452b Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 3 Apr 2025 16:26:33 +0200 Subject: [PATCH 2217/2924] drm/xe/guc: Refactor GuC debugfs initialization [ Upstream commit e15826bb3c2c5377eedc757f2adec8dcaa5255f7 ] We don't have to drmm_kmalloc() local copy of debugfs_list to write there our pointer to the struct xe_guc as we can extract pointer to the struct xe_gt from the grandparent debugfs entry, in similar way to what we did for GT debugfs files. Note that there is no change in file/directory structure, just refactored how files are created and how functions are called. Signed-off-by: Michal Wajdeczko Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250403142635.1821-2-michal.wajdeczko@intel.com Stable-dep-of: e22d7acf9f47 ("drm/xe/guc: Make creation of SLPC debugfs files conditional") Signed-off-by: Sasha Levin --- drivers/gpu/drm/xe/xe_guc_debugfs.c | 130 ++++++++++++++-------------- 1 file changed, 67 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c index c569ff456e7416..9a1c78b89f457c 100644 --- a/drivers/gpu/drm/xe/xe_guc_debugfs.c +++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c @@ -17,101 +17,105 @@ #include "xe_macros.h" #include "xe_pm.h" -static struct xe_guc *node_to_guc(struct drm_info_node *node) -{ - return node->info_ent->data; -} - -static int guc_info(struct seq_file *m, void *data) +/* + * guc_debugfs_show - A show callback for struct drm_info_list + * @m: the &seq_file + * @data: data used by the drm debugfs helpers + * + * This callback can be used in struct drm_info_list to describe debugfs + * files that are &xe_guc specific in similar way how we handle &xe_gt + * specific files using &xe_gt_debugfs_simple_show. + * + * It is assumed that those debugfs files will be created on directory entry + * which grandparent struct dentry d_inode->i_private points to &xe_gt. + * + * /sys/kernel/debug/dri/0/ + * ├── gt0 # dent->d_parent->d_parent (d_inode->i_private == gt) + * │   ├── uc # dent->d_parent + * │   │   ├── guc_info # dent + * │   │   ├── guc_... + * + * This function assumes that &m->private will be set to the &struct + * drm_info_node corresponding to the instance of the info on a given &struct + * drm_minor (see struct drm_info_list.show for details). + * + * This function also assumes that struct drm_info_list.data will point to the + * function code that will actually print a file content:: + * + * int (*print)(struct xe_guc *, struct drm_printer *) + * + * Example:: + * + * int foo(struct xe_guc *guc, struct drm_printer *p) + * { + * drm_printf(p, "enabled %d\n", guc->submission_state.enabled); + * return 0; + * } + * + * static const struct drm_info_list bar[] = { + * { name = "foo", .show = guc_debugfs_show, .data = foo }, + * }; + * + * parent = debugfs_create_dir("uc", gtdir); + * drm_debugfs_create_files(bar, ARRAY_SIZE(bar), parent, minor); + * + * Return: 0 on success or a negative error code on failure. + */ +static int guc_debugfs_show(struct seq_file *m, void *data) { - struct xe_guc *guc = node_to_guc(m->private); - struct xe_device *xe = guc_to_xe(guc); struct drm_printer p = drm_seq_file_printer(m); + struct drm_info_node *node = m->private; + struct dentry *parent = node->dent->d_parent; + struct dentry *grandparent = parent->d_parent; + struct xe_gt *gt = grandparent->d_inode->i_private; + struct xe_device *xe = gt_to_xe(gt); + int (*print)(struct xe_guc *, struct drm_printer *) = node->info_ent->data; + int ret; xe_pm_runtime_get(xe); - xe_guc_print_info(guc, &p); + ret = print(>->uc.guc, &p); xe_pm_runtime_put(xe); - return 0; + return ret; } -static int guc_log(struct seq_file *m, void *data) +static int guc_log(struct xe_guc *guc, struct drm_printer *p) { - struct xe_guc *guc = node_to_guc(m->private); - struct xe_device *xe = guc_to_xe(guc); - struct drm_printer p = drm_seq_file_printer(m); - - xe_pm_runtime_get(xe); - xe_guc_log_print(&guc->log, &p); - xe_pm_runtime_put(xe); - + xe_guc_log_print(&guc->log, p); return 0; } -static int guc_log_dmesg(struct seq_file *m, void *data) +static int guc_log_dmesg(struct xe_guc *guc, struct drm_printer *p) { - struct xe_guc *guc = node_to_guc(m->private); - struct xe_device *xe = guc_to_xe(guc); - - xe_pm_runtime_get(xe); xe_guc_log_print_dmesg(&guc->log); - xe_pm_runtime_put(xe); - return 0; } -static int guc_ctb(struct seq_file *m, void *data) +static int guc_ctb(struct xe_guc *guc, struct drm_printer *p) { - struct xe_guc *guc = node_to_guc(m->private); - struct xe_device *xe = guc_to_xe(guc); - struct drm_printer p = drm_seq_file_printer(m); - - xe_pm_runtime_get(xe); - xe_guc_ct_print(&guc->ct, &p, true); - xe_pm_runtime_put(xe); - + xe_guc_ct_print(&guc->ct, p, true); return 0; } -static int guc_pc(struct seq_file *m, void *data) +static int guc_pc(struct xe_guc *guc, struct drm_printer *p) { - struct xe_guc *guc = node_to_guc(m->private); - struct xe_device *xe = guc_to_xe(guc); - struct drm_printer p = drm_seq_file_printer(m); - - xe_pm_runtime_get(xe); - xe_guc_pc_print(&guc->pc, &p); - xe_pm_runtime_put(xe); - + xe_guc_pc_print(&guc->pc, p); return 0; } static const struct drm_info_list debugfs_list[] = { - {"guc_info", guc_info, 0}, - {"guc_log", guc_log, 0}, - {"guc_log_dmesg", guc_log_dmesg, 0}, - {"guc_ctb", guc_ctb, 0}, - {"guc_pc", guc_pc, 0}, + { "guc_info", .show = guc_debugfs_show, .data = xe_guc_print_info }, + { "guc_log", .show = guc_debugfs_show, .data = guc_log }, + { "guc_log_dmesg", .show = guc_debugfs_show, .data = guc_log_dmesg }, + { "guc_ctb", .show = guc_debugfs_show, .data = guc_ctb }, + { "guc_pc", .show = guc_debugfs_show, .data = guc_pc }, }; void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent) { struct drm_minor *minor = guc_to_xe(guc)->drm.primary; - struct drm_info_list *local; - int i; - -#define DEBUGFS_SIZE (ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)) - local = drmm_kmalloc(&guc_to_xe(guc)->drm, DEBUGFS_SIZE, GFP_KERNEL); - if (!local) - return; - - memcpy(local, debugfs_list, DEBUGFS_SIZE); -#undef DEBUGFS_SIZE - - for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i) - local[i].data = guc; - drm_debugfs_create_files(local, + drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list), parent, minor); } From 1b9dc8d2525544b7a88d31a748878d71dc6f7288 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 3 Apr 2025 16:26:34 +0200 Subject: [PATCH 2218/2924] drm/xe/guc: Don't expose GuC privileged debugfs files if VF [ Upstream commit 387444984d7b53dbaee263887cad4ea7c8e57b34 ] Some of the GuC debugfs files require access to the data that is not available on the VFs. Don't expose those files on the VF driver. Signed-off-by: Michal Wajdeczko Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250403142635.1821-3-michal.wajdeczko@intel.com Stable-dep-of: e22d7acf9f47 ("drm/xe/guc: Make creation of SLPC debugfs files conditional") Signed-off-by: Sasha Levin --- drivers/gpu/drm/xe/xe_guc_debugfs.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c index 9a1c78b89f457c..f33013f8a0f38a 100644 --- a/drivers/gpu/drm/xe/xe_guc_debugfs.c +++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c @@ -103,11 +103,20 @@ static int guc_pc(struct xe_guc *guc, struct drm_printer *p) return 0; } -static const struct drm_info_list debugfs_list[] = { +/* + * only for GuC debugfs files which can be safely used on the VF as well: + * - without access to the GuC privileged registers + * - without access to the PF specific GuC objects + */ +static const struct drm_info_list vf_safe_debugfs_list[] = { { "guc_info", .show = guc_debugfs_show, .data = xe_guc_print_info }, + { "guc_ctb", .show = guc_debugfs_show, .data = guc_ctb }, +}; + +/* everything else should be added here */ +static const struct drm_info_list pf_only_debugfs_list[] = { { "guc_log", .show = guc_debugfs_show, .data = guc_log }, { "guc_log_dmesg", .show = guc_debugfs_show, .data = guc_log_dmesg }, - { "guc_ctb", .show = guc_debugfs_show, .data = guc_ctb }, { "guc_pc", .show = guc_debugfs_show, .data = guc_pc }, }; @@ -115,7 +124,12 @@ void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent) { struct drm_minor *minor = guc_to_xe(guc)->drm.primary; - drm_debugfs_create_files(debugfs_list, - ARRAY_SIZE(debugfs_list), + drm_debugfs_create_files(vf_safe_debugfs_list, + ARRAY_SIZE(vf_safe_debugfs_list), parent, minor); + + if (!IS_SRIOV_VF(guc_to_xe(guc))) + drm_debugfs_create_files(pf_only_debugfs_list, + ARRAY_SIZE(pf_only_debugfs_list), + parent, minor); } From 3f5d6a0317db61a7660575ea82f4c6f8571b2e46 Mon Sep 17 00:00:00 2001 From: Aradhya Bhatia Date: Fri, 16 May 2025 14:19:02 +0000 Subject: [PATCH 2219/2924] drm/xe/guc: Make creation of SLPC debugfs files conditional MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e22d7acf9f47b01c9a538f3dac5c8e8d46fbca96 ] Platforms that do not support SLPC are exempted from the GuC PC support. The GuC PC does not get initialized, and neither do its BOs get created. This causes a problem because the GuC PC debugfs file is still being created. Whenever the file is attempted to read, it causes a NULL pointer dereference on the supposed BO of the GuC PC. So, make the creation of SLPC debugfs files conditional to when SLPC features are supported. Fixes: aaab5404b16f ("drm/xe: Introduce GuC PC debugfs") Suggested-by: Matt Roper Reviewed-by: Tejas Upadhyay Reviewed-by: Stuart Summers Signed-off-by: Aradhya Bhatia Link: https://lore.kernel.org/r/20250516141902.5614-1-aradhya.bhatia@intel.com Signed-off-by: Matt Roper (cherry picked from commit 17486cf3df5320752cc67ee8bcb2379d1b9de76c) Signed-off-by: Thomas Hellström Signed-off-by: Sasha Levin --- drivers/gpu/drm/xe/xe_guc_debugfs.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c index f33013f8a0f38a..0b102ab46c4df2 100644 --- a/drivers/gpu/drm/xe/xe_guc_debugfs.c +++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c @@ -113,23 +113,34 @@ static const struct drm_info_list vf_safe_debugfs_list[] = { { "guc_ctb", .show = guc_debugfs_show, .data = guc_ctb }, }; +/* For GuC debugfs files that require the SLPC support */ +static const struct drm_info_list slpc_debugfs_list[] = { + { "guc_pc", .show = guc_debugfs_show, .data = guc_pc }, +}; + /* everything else should be added here */ static const struct drm_info_list pf_only_debugfs_list[] = { { "guc_log", .show = guc_debugfs_show, .data = guc_log }, { "guc_log_dmesg", .show = guc_debugfs_show, .data = guc_log_dmesg }, - { "guc_pc", .show = guc_debugfs_show, .data = guc_pc }, }; void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent) { - struct drm_minor *minor = guc_to_xe(guc)->drm.primary; + struct xe_device *xe = guc_to_xe(guc); + struct drm_minor *minor = xe->drm.primary; drm_debugfs_create_files(vf_safe_debugfs_list, ARRAY_SIZE(vf_safe_debugfs_list), parent, minor); - if (!IS_SRIOV_VF(guc_to_xe(guc))) + if (!IS_SRIOV_VF(xe)) { drm_debugfs_create_files(pf_only_debugfs_list, ARRAY_SIZE(pf_only_debugfs_list), parent, minor); + + if (!xe->info.skip_guc_pc) + drm_debugfs_create_files(slpc_debugfs_list, + ARRAY_SIZE(slpc_debugfs_list), + parent, minor); + } } From fc71fa9e080a445e5a26692740df2b63387ce96b Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 3 Mar 2025 10:32:42 +0100 Subject: [PATCH 2220/2924] drm/panic: clean Clippy warning [ Upstream commit 57145afa3326947154c3a890b1118774b55212a0 ] Clippy warns: error: manual implementation of an assign operation --> drivers/gpu/drm/drm_panic_qr.rs:418:25 | 418 | self.carry = self.carry % pow; | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: replace it with: `self.carry %= pow` | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#assign_op_pattern Thus clean it up. Fixes: dbed4a797e00 ("drm/panic: Better binary encoding in QR code") Signed-off-by: Miguel Ojeda Reviewed-by: Alice Ryhl Reviewed-by: Jocelyn Falempe Signed-off-by: Jocelyn Falempe Link: https://patchwork.freedesktop.org/patch/msgid/20250303093242.1011790-1-ojeda@kernel.org Stable-dep-of: 675008f196ca ("drm/panic: Use a decimal fifo to avoid u64 by u64 divide") Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_panic_qr.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_panic_qr.rs b/drivers/gpu/drm/drm_panic_qr.rs index f2a99681b99858..ba6724aed51c95 100644 --- a/drivers/gpu/drm/drm_panic_qr.rs +++ b/drivers/gpu/drm/drm_panic_qr.rs @@ -415,7 +415,7 @@ impl Iterator for SegmentIterator<'_> { self.carry_len -= out_len; let pow = u64::pow(10, self.carry_len as u32); let out = (self.carry / pow) as u16; - self.carry = self.carry % pow; + self.carry %= pow; Some((out, NUM_CHARS_BITS[out_len])) } } From 2d07a98c5f70c3b61abc86dfacdb86a0a78089fe Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Fri, 18 Apr 2025 18:48:16 +0200 Subject: [PATCH 2221/2924] drm/panic: Use a decimal fifo to avoid u64 by u64 divide [ Upstream commit 675008f196ca5c8d8413204e861cc2a2238581aa ] On 32bits ARM, u64/u64 is not supported [1], so change the algorithm to use a simple fifo with decimal digits as u8 instead. This is slower but should compile on all architecture. Link: https://lore.kernel.org/dri-devel/CANiq72ke45eOwckMhWHvmwxc03dxr4rnxxKvx+HvWdBLopZfrQ@mail.gmail.com/ [1] Reported-by: Miguel Ojeda Closes: https://lore.kernel.org/dri-devel/CANiq72ke45eOwckMhWHvmwxc03dxr4rnxxKvx+HvWdBLopZfrQ@mail.gmail.com/ Fixes: ccb8ce526807 ("ARM: 9441/1: rust: Enable Rust support for ARMv7") Signed-off-by: Jocelyn Falempe Link: https://lore.kernel.org/r/20250418165059.560503-1-jfalempe@redhat.com Acked-by: Javier Martinez Canillas Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_panic_qr.rs | 71 ++++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/drm_panic_qr.rs b/drivers/gpu/drm/drm_panic_qr.rs index ba6724aed51c95..de2ddf5dbbd3f1 100644 --- a/drivers/gpu/drm/drm_panic_qr.rs +++ b/drivers/gpu/drm/drm_panic_qr.rs @@ -366,8 +366,48 @@ impl Segment<'_> { SegmentIterator { segment: self, offset: 0, - carry: 0, - carry_len: 0, + decfifo: Default::default(), + } + } +} + +/// Max fifo size is 17 (max push) + 2 (max remaining) +const MAX_FIFO_SIZE: usize = 19; + +/// A simple Decimal digit FIFO +#[derive(Default)] +struct DecFifo { + decimals: [u8; MAX_FIFO_SIZE], + len: usize, +} + +impl DecFifo { + fn push(&mut self, data: u64, len: usize) { + let mut chunk = data; + for i in (0..self.len).rev() { + self.decimals[i + len] = self.decimals[i]; + } + for i in 0..len { + self.decimals[i] = (chunk % 10) as u8; + chunk /= 10; + } + self.len += len; + } + + /// Pop 3 decimal digits from the FIFO + fn pop3(&mut self) -> Option<(u16, usize)> { + if self.len == 0 { + None + } else { + let poplen = 3.min(self.len); + self.len -= poplen; + let mut out = 0; + let mut exp = 1; + for i in 0..poplen { + out += self.decimals[self.len + i] as u16 * exp; + exp *= 10; + } + Some((out, NUM_CHARS_BITS[poplen])) } } } @@ -375,8 +415,7 @@ impl Segment<'_> { struct SegmentIterator<'a> { segment: &'a Segment<'a>, offset: usize, - carry: u64, - carry_len: usize, + decfifo: DecFifo, } impl Iterator for SegmentIterator<'_> { @@ -394,31 +433,17 @@ impl Iterator for SegmentIterator<'_> { } } Segment::Numeric(data) => { - if self.carry_len < 3 && self.offset < data.len() { - // If there are less than 3 decimal digits in the carry, - // take the next 7 bytes of input, and add them to the carry. + if self.decfifo.len < 3 && self.offset < data.len() { + // If there are less than 3 decimal digits in the fifo, + // take the next 7 bytes of input, and push them to the fifo. let mut buf = [0u8; 8]; let len = 7.min(data.len() - self.offset); buf[..len].copy_from_slice(&data[self.offset..self.offset + len]); let chunk = u64::from_le_bytes(buf); - let pow = u64::pow(10, BYTES_TO_DIGITS[len] as u32); - self.carry = chunk + self.carry * pow; + self.decfifo.push(chunk, BYTES_TO_DIGITS[len]); self.offset += len; - self.carry_len += BYTES_TO_DIGITS[len]; - } - match self.carry_len { - 0 => None, - len => { - // take the next 3 decimal digits of the carry - // and return 10bits of numeric data. - let out_len = 3.min(len); - self.carry_len -= out_len; - let pow = u64::pow(10, self.carry_len as u32); - let out = (self.carry / pow) as u16; - self.carry %= pow; - Some((out, NUM_CHARS_BITS[out_len])) - } } + self.decfifo.pop3() } } } From 3f919f76893069ec3c7475acaeb611eb31fca22d Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Mon, 24 Mar 2025 11:55:15 +0530 Subject: [PATCH 2222/2924] wifi: ath12k: fix NULL access in assign channel context handler [ Upstream commit ea24531d00f782f4e659e8c74578b7ac144720ca ] Currently, when ath12k_mac_assign_vif_to_vdev() fails, the radio handle (ar) gets accessed from the link VIF handle (arvif) for debug logging, This is incorrect. In the fail scenario, radio handle is NULL. Fix the NULL access, avoid radio handle access by moving to the hardware debug logging helper function (ath12k_hw_warn). Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Fixes: 90570ba4610b ("wifi: ath12k: do not return invalid link id for scan link") Reviewed-by: Vasanthakumar Thiagarajan Signed-off-by: Karthikeyan Periyasamy Link: https://patch.msgid.link/20250324062518.2752822-8-quic_periyasa@quicinc.com Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/mac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index dfa05f0ee6c9f7..c20fd92000dfe3 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -9462,8 +9462,8 @@ ath12k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw, ar = ath12k_mac_assign_vif_to_vdev(hw, arvif, ctx); if (!ar) { - ath12k_warn(arvif->ar->ab, "failed to assign chanctx for vif %pM link id %u link vif is already started", - vif->addr, link_id); + ath12k_hw_warn(ah, "failed to assign chanctx for vif %pM link id %u link vif is already started", + vif->addr, link_id); return -EINVAL; } From 6d6cb27fe146061f2512e904618f5e005bb7bb6a Mon Sep 17 00:00:00 2001 From: Stone Zhang Date: Thu, 20 Mar 2025 13:31:45 +0800 Subject: [PATCH 2223/2924] wifi: ath11k: fix node corruption in ar->arvifs list [ Upstream commit 31e98e277ae47f56632e4d663b1d4fd12ba33ea8 ] In current WLAN recovery code flow, ath11k_core_halt() only reinitializes the "arvifs" list head. This will cause the list node immediately following the list head to become an invalid list node. Because the prev of that node still points to the list head "arvifs", but the next of the list head "arvifs" no longer points to that list node. When a WLAN recovery occurs during the execution of a vif removal, and it happens before the spin_lock_bh(&ar->data_lock) in ath11k_mac_op_remove_interface(), list_del() will detect the previously mentioned situation, thereby triggering a kernel panic. The fix is to remove and reinitialize all vif list nodes from the list head "arvifs" during WLAN halt. The reinitialization is to make the list nodes valid, ensuring that the list_del() in ath11k_mac_op_remove_interface() can execute normally. Call trace: __list_del_entry_valid_or_report+0xb8/0xd0 ath11k_mac_op_remove_interface+0xb0/0x27c [ath11k] drv_remove_interface+0x48/0x194 [mac80211] ieee80211_do_stop+0x6e0/0x844 [mac80211] ieee80211_stop+0x44/0x17c [mac80211] __dev_close_many+0xac/0x150 __dev_change_flags+0x194/0x234 dev_change_flags+0x24/0x6c devinet_ioctl+0x3a0/0x670 inet_ioctl+0x200/0x248 sock_do_ioctl+0x60/0x118 sock_ioctl+0x274/0x35c __arm64_sys_ioctl+0xac/0xf0 invoke_syscall+0x48/0x114 ... Tested-on: QCA6698AQ hw2.1 PCI WLAN.HSP.1.1-04591-QCAHSPSWPL_V1_V2_SILICONZ_IOE-1 Fixes: d5c65159f289 ("ath11k: driver for Qualcomm IEEE 802.11ax devices") Signed-off-by: Stone Zhang Link: https://patch.msgid.link/20250320053145.3445187-1-quic_stonez@quicinc.com Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath11k/core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c index 3d39ff85ba94ad..8d08dd47bde9c7 100644 --- a/drivers/net/wireless/ath/ath11k/core.c +++ b/drivers/net/wireless/ath/ath11k/core.c @@ -2050,6 +2050,7 @@ static int ath11k_core_reconfigure_on_crash(struct ath11k_base *ab) void ath11k_core_halt(struct ath11k *ar) { struct ath11k_base *ab = ar->ab; + struct list_head *pos, *n; lockdep_assert_held(&ar->conf_mutex); @@ -2065,7 +2066,12 @@ void ath11k_core_halt(struct ath11k *ar) rcu_assign_pointer(ab->pdevs_active[ar->pdev_idx], NULL); synchronize_rcu(); - INIT_LIST_HEAD(&ar->arvifs); + + spin_lock_bh(&ar->data_lock); + list_for_each_safe(pos, n, &ar->arvifs) + list_del_init(pos); + spin_unlock_bh(&ar->data_lock); + idr_init(&ar->txmgmt_idr); } From 94c2cccad82b870b87a6c9735cc85a4f1818706a Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Sun, 30 Mar 2025 21:13:23 +0000 Subject: [PATCH 2224/2924] libbpf: Fix implicit memfd_create() for bionic [ Upstream commit 75011ad69bc54396703867b5434f1622343a848e ] Since memfd_create() is not consistently available across different bionic libc implementations, using memfd_create() directly can break some Android builds: tools/lib/bpf/linker.c:576:7: error: implicit declaration of function 'memfd_create' [-Werror,-Wimplicit-function-declaration] 576 | fd = memfd_create(filename, 0); | ^ To fix this, relocate and inline the sys_memfd_create() helper so that it can be used in "linker.c". Similar issues were previously fixed by commit 9fa5e1a180aa ("libbpf: Call memfd_create() syscall directly"). Fixes: 6d5e5e5d7ce1 ("libbpf: Extend linker API to support in-memory ELF files") Signed-off-by: Carlos Llamas Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250330211325.530677-1-cmllamas@google.com Signed-off-by: Sasha Levin --- tools/lib/bpf/libbpf.c | 9 --------- tools/lib/bpf/libbpf_internal.h | 9 +++++++++ tools/lib/bpf/linker.c | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 6b85060f07b3b4..37d563e1405156 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1725,15 +1725,6 @@ static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *nam return ERR_PTR(-ENOENT); } -/* Some versions of Android don't provide memfd_create() in their libc - * implementation, so avoid complications and just go straight to Linux - * syscall. - */ -static int sys_memfd_create(const char *name, unsigned flags) -{ - return syscall(__NR_memfd_create, name, flags); -} - #ifndef MFD_CLOEXEC #define MFD_CLOEXEC 0x0001U #endif diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 76669c73dcd162..477a3b3389a091 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -667,6 +667,15 @@ static inline int sys_dup3(int oldfd, int newfd, int flags) return syscall(__NR_dup3, oldfd, newfd, flags); } +/* Some versions of Android don't provide memfd_create() in their libc + * implementation, so avoid complications and just go straight to Linux + * syscall. + */ +static inline int sys_memfd_create(const char *name, unsigned flags) +{ + return syscall(__NR_memfd_create, name, flags); +} + /* Point *fixed_fd* to the same file that *tmp_fd* points to. * Regardless of success, *tmp_fd* is closed. * Whatever *fixed_fd* pointed to is closed silently. diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 800e0ef09c3787..56f5068e2ebab0 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -573,7 +573,7 @@ int bpf_linker__add_buf(struct bpf_linker *linker, void *buf, size_t buf_sz, snprintf(filename, sizeof(filename), "mem:%p+%zu", buf, buf_sz); - fd = memfd_create(filename, 0); + fd = sys_memfd_create(filename, 0); if (fd < 0) { ret = -errno; pr_warn("failed to create memfd '%s': %s\n", filename, errstr(ret)); From b47b5236599aad43a488f82b3aeb369f1084dfe4 Mon Sep 17 00:00:00 2001 From: P Praneesh Date: Wed, 2 Apr 2025 23:10:32 +0530 Subject: [PATCH 2225/2924] wifi: ath12k: Fix memory leak during vdev_id mismatch [ Upstream commit 75ec94db880b1e4b4f9182885d60db0db6e2ee56 ] Currently driver enables vdev_id check as part of the bank configuration in ath12k_dp_tx_get_vdev_bank_config(). This check ensures that the vdev_id configured in the bank register aligns with the vdev_id in the packet's address search table within the firmware. If there is a mismatch, the firmware forwards the packet with the HTT status HAL_WBM_REL_HTT_TX_COMP_STATUS_VDEVID_MISMATCH. Since driver does not handle this vdev_id mismatch HTT status, the corresponding buffers are not freed properly, causing a memory leak. Fix this issue by adding handling to free the buffers when a vdev_id mismatch HTT status is encountered. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ- Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Signed-off-by: P Praneesh Link: https://patch.msgid.link/20250402174032.2651221-1-praneesh.p@oss.qualcomm.com Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/dp_tx.c | 1 + drivers/net/wireless/ath/ath12k/hal_desc.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/dp_tx.c b/drivers/net/wireless/ath/ath12k/dp_tx.c index ced232bf4aed01..1e2788df476c28 100644 --- a/drivers/net/wireless/ath/ath12k/dp_tx.c +++ b/drivers/net/wireless/ath/ath12k/dp_tx.c @@ -585,6 +585,7 @@ ath12k_dp_tx_process_htt_tx_complete(struct ath12k_base *ab, case HAL_WBM_REL_HTT_TX_COMP_STATUS_TTL: case HAL_WBM_REL_HTT_TX_COMP_STATUS_REINJ: case HAL_WBM_REL_HTT_TX_COMP_STATUS_INSPECT: + case HAL_WBM_REL_HTT_TX_COMP_STATUS_VDEVID_MISMATCH: ath12k_dp_tx_free_txbuf(ab, msdu, mac_id, tx_ring); break; case HAL_WBM_REL_HTT_TX_COMP_STATUS_MEC_NOTIFY: diff --git a/drivers/net/wireless/ath/ath12k/hal_desc.h b/drivers/net/wireless/ath/ath12k/hal_desc.h index 3e8983b85de863..63d279fab32249 100644 --- a/drivers/net/wireless/ath/ath12k/hal_desc.h +++ b/drivers/net/wireless/ath/ath12k/hal_desc.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2022, 2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2022, 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "core.h" @@ -1298,6 +1298,7 @@ enum hal_wbm_htt_tx_comp_status { HAL_WBM_REL_HTT_TX_COMP_STATUS_REINJ, HAL_WBM_REL_HTT_TX_COMP_STATUS_INSPECT, HAL_WBM_REL_HTT_TX_COMP_STATUS_MEC_NOTIFY, + HAL_WBM_REL_HTT_TX_COMP_STATUS_VDEVID_MISMATCH, HAL_WBM_REL_HTT_TX_COMP_STATUS_MAX, }; From f7758cce853fcad005f433009b2d0efba3e7dfe2 Mon Sep 17 00:00:00 2001 From: P Praneesh Date: Wed, 2 Apr 2025 23:27:14 +0530 Subject: [PATCH 2226/2924] wifi: ath12k: Fix memory corruption during MLO multicast tx [ Upstream commit 6f8a27a584b23e9dedefd6cb110dd2587b84a6d4 ] The struct sk_buff's control buffer is shared by mac80211's struct ieee80211_tx_info and ath12k's struct ath12k_skb_cb. When the driver wants to transmit an skb, it caches all the mac80211-specific information from struct ieee80211_tx_info, then performs a memset on the control buffer before writing the ath12k-specific information using struct ath12k_skb_cb. However, during multicast tx, the key is being filled into the driver data, which overwrites some crucial members like link_id and flags in struct ath12k_skb_cb. This causes invalid information retrieval when the driver accesses these fields during ath12k_dp_tx(). Fix this issue by removing the key filling logic during MLO multicast tx, as it is not used anywhere in the tx path. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Fixes: 2f50de725677 ("wifi: ath12k: Add support for MLO Multicast handling in driver") Signed-off-by: P Praneesh Link: https://patch.msgid.link/20250402175714.2667270-1-praneesh.p@oss.qualcomm.com Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/mac.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index c20fd92000dfe3..a1fad297ca3578 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -7429,7 +7429,6 @@ static void ath12k_mac_op_tx(struct ieee80211_hw *hw, info_flags); skb_cb = ATH12K_SKB_CB(msdu_copied); - info = IEEE80211_SKB_CB(msdu_copied); skb_cb->link_id = link_id; /* For open mode, skip peer find logic */ @@ -7452,7 +7451,6 @@ static void ath12k_mac_op_tx(struct ieee80211_hw *hw, if (key) { skb_cb->cipher = key->cipher; skb_cb->flags |= ATH12K_SKB_CIPHER_SET; - info->control.hw_key = key; hdr = (struct ieee80211_hdr *)msdu_copied->data; if (!ieee80211_has_protected(hdr->frame_control)) From cd29608db6c1a6ddb75c2069d00f5caba5e5aa2b Mon Sep 17 00:00:00 2001 From: P Praneesh Date: Wed, 2 Apr 2025 23:35:43 +0530 Subject: [PATCH 2227/2924] wifi: ath12k: Fix invalid memory access while forming 802.11 header [ Upstream commit be908d2360341f8bbc982fff5a5e4f8030c17f74 ] While forming the 802.11 header from the rx descriptor, skb_push() is performed for the 802.11 header length and then calls ath12k_dp_rx_desc_get_dot11_hdr(). Since skb_push() moves the skb->data pointer backwards by the 802.11 header length, the rx descriptor points to a different memory area than intended, causing invalid information to be fetched from the rx descriptor. Also, when IV and ICV are not stripped from the given MSDU, mac80211 performs PN validation for these MSDUs, which requires the crypto header. Before forming the crypto header from the given rx descriptor, skb_push() is performed for the crypto header length, which overwrites the memory pointed to by the rx descriptor, causing invalid information to form the 802.11 header. Fix these issues by moving all rx descriptor accesses before the skb_push() operation which ensures the proper 802.11 headers are generated from the given rx descriptor and removing ath12k_dp_rxdesc_get_mpdu_frame_ctrl() for filling frame control, as this information is already fetched by ath12k_dp_rx_desc_get_dot11_hdr(). Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Co-developed-by: Karthikeyan Periyasamy Signed-off-by: Karthikeyan Periyasamy Signed-off-by: P Praneesh Link: https://patch.msgid.link/20250402180543.2670947-1-praneesh.p@oss.qualcomm.com Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/dp_rx.c | 27 +++++++++---------------- drivers/net/wireless/ath/ath12k/hal.c | 19 ----------------- drivers/net/wireless/ath/ath12k/hal.h | 1 - 3 files changed, 9 insertions(+), 38 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 75bf4211ad4227..21d240cc3aee46 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -228,12 +228,6 @@ static void ath12k_dp_rx_desc_get_crypto_header(struct ath12k_base *ab, ab->hal_rx_ops->rx_desc_get_crypto_header(desc, crypto_hdr, enctype); } -static u16 ath12k_dp_rxdesc_get_mpdu_frame_ctrl(struct ath12k_base *ab, - struct hal_rx_desc *desc) -{ - return ab->hal_rx_ops->rx_desc_get_mpdu_frame_ctl(desc); -} - static inline u8 ath12k_dp_rx_get_msdu_src_link(struct ath12k_base *ab, struct hal_rx_desc *desc) { @@ -2122,10 +2116,13 @@ static void ath12k_get_dot11_hdr_from_rx_desc(struct ath12k *ar, struct hal_rx_desc *rx_desc = rxcb->rx_desc; struct ath12k_base *ab = ar->ab; size_t hdr_len, crypto_len; - struct ieee80211_hdr *hdr; + struct ieee80211_hdr hdr; u16 qos_ctl; - __le16 fc; - u8 *crypto_hdr; + u8 *crypto_hdr, mesh_ctrl; + + ath12k_dp_rx_desc_get_dot11_hdr(ab, rx_desc, &hdr); + hdr_len = ieee80211_hdrlen(hdr.frame_control); + mesh_ctrl = ath12k_dp_rx_h_mesh_ctl_present(ab, rx_desc); if (!(status->flag & RX_FLAG_IV_STRIPPED)) { crypto_len = ath12k_dp_rx_crypto_param_len(ar, enctype); @@ -2133,22 +2130,16 @@ static void ath12k_get_dot11_hdr_from_rx_desc(struct ath12k *ar, ath12k_dp_rx_desc_get_crypto_header(ab, rx_desc, crypto_hdr, enctype); } - fc = cpu_to_le16(ath12k_dp_rxdesc_get_mpdu_frame_ctrl(ab, rx_desc)); - hdr_len = ieee80211_hdrlen(fc); skb_push(msdu, hdr_len); - hdr = (struct ieee80211_hdr *)msdu->data; - hdr->frame_control = fc; - - /* Get wifi header from rx_desc */ - ath12k_dp_rx_desc_get_dot11_hdr(ab, rx_desc, hdr); + memcpy(msdu->data, &hdr, min(hdr_len, sizeof(hdr))); if (rxcb->is_mcbc) status->flag &= ~RX_FLAG_PN_VALIDATED; /* Add QOS header */ - if (ieee80211_is_data_qos(hdr->frame_control)) { + if (ieee80211_is_data_qos(hdr.frame_control)) { qos_ctl = rxcb->tid; - if (ath12k_dp_rx_h_mesh_ctl_present(ab, rx_desc)) + if (mesh_ctrl) qos_ctl |= IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT; /* TODO: Add other QoS ctl fields when required */ diff --git a/drivers/net/wireless/ath/ath12k/hal.c b/drivers/net/wireless/ath/ath12k/hal.c index cd59ff8e6c7b0c..178c242a840e3e 100644 --- a/drivers/net/wireless/ath/ath12k/hal.c +++ b/drivers/net/wireless/ath/ath12k/hal.c @@ -511,11 +511,6 @@ static void ath12k_hw_qcn9274_rx_desc_get_crypto_hdr(struct hal_rx_desc *desc, crypto_hdr[7] = HAL_RX_MPDU_INFO_PN_GET_BYTE2(desc->u.qcn9274.mpdu_start.pn[1]); } -static u16 ath12k_hw_qcn9274_rx_desc_get_mpdu_frame_ctl(struct hal_rx_desc *desc) -{ - return __le16_to_cpu(desc->u.qcn9274.mpdu_start.frame_ctrl); -} - static int ath12k_hal_srng_create_config_qcn9274(struct ath12k_base *ab) { struct ath12k_hal *hal = &ab->hal; @@ -736,7 +731,6 @@ const struct hal_rx_ops hal_rx_qcn9274_ops = { .rx_desc_is_da_mcbc = ath12k_hw_qcn9274_rx_desc_is_da_mcbc, .rx_desc_get_dot11_hdr = ath12k_hw_qcn9274_rx_desc_get_dot11_hdr, .rx_desc_get_crypto_header = ath12k_hw_qcn9274_rx_desc_get_crypto_hdr, - .rx_desc_get_mpdu_frame_ctl = ath12k_hw_qcn9274_rx_desc_get_mpdu_frame_ctl, .dp_rx_h_msdu_done = ath12k_hw_qcn9274_dp_rx_h_msdu_done, .dp_rx_h_l4_cksum_fail = ath12k_hw_qcn9274_dp_rx_h_l4_cksum_fail, .dp_rx_h_ip_cksum_fail = ath12k_hw_qcn9274_dp_rx_h_ip_cksum_fail, @@ -975,11 +969,6 @@ ath12k_hw_qcn9274_compact_rx_desc_get_crypto_hdr(struct hal_rx_desc *desc, HAL_RX_MPDU_INFO_PN_GET_BYTE2(desc->u.qcn9274_compact.mpdu_start.pn[1]); } -static u16 ath12k_hw_qcn9274_compact_rx_desc_get_mpdu_frame_ctl(struct hal_rx_desc *desc) -{ - return __le16_to_cpu(desc->u.qcn9274_compact.mpdu_start.frame_ctrl); -} - static bool ath12k_hw_qcn9274_compact_dp_rx_h_msdu_done(struct hal_rx_desc *desc) { return !!le32_get_bits(desc->u.qcn9274_compact.msdu_end.info14, @@ -1080,8 +1069,6 @@ const struct hal_rx_ops hal_rx_qcn9274_compact_ops = { .rx_desc_is_da_mcbc = ath12k_hw_qcn9274_compact_rx_desc_is_da_mcbc, .rx_desc_get_dot11_hdr = ath12k_hw_qcn9274_compact_rx_desc_get_dot11_hdr, .rx_desc_get_crypto_header = ath12k_hw_qcn9274_compact_rx_desc_get_crypto_hdr, - .rx_desc_get_mpdu_frame_ctl = - ath12k_hw_qcn9274_compact_rx_desc_get_mpdu_frame_ctl, .dp_rx_h_msdu_done = ath12k_hw_qcn9274_compact_dp_rx_h_msdu_done, .dp_rx_h_l4_cksum_fail = ath12k_hw_qcn9274_compact_dp_rx_h_l4_cksum_fail, .dp_rx_h_ip_cksum_fail = ath12k_hw_qcn9274_compact_dp_rx_h_ip_cksum_fail, @@ -1330,11 +1317,6 @@ static void ath12k_hw_wcn7850_rx_desc_get_crypto_hdr(struct hal_rx_desc *desc, crypto_hdr[7] = HAL_RX_MPDU_INFO_PN_GET_BYTE2(desc->u.wcn7850.mpdu_start.pn[1]); } -static u16 ath12k_hw_wcn7850_rx_desc_get_mpdu_frame_ctl(struct hal_rx_desc *desc) -{ - return __le16_to_cpu(desc->u.wcn7850.mpdu_start.frame_ctrl); -} - static int ath12k_hal_srng_create_config_wcn7850(struct ath12k_base *ab) { struct ath12k_hal *hal = &ab->hal; @@ -1555,7 +1537,6 @@ const struct hal_rx_ops hal_rx_wcn7850_ops = { .rx_desc_is_da_mcbc = ath12k_hw_wcn7850_rx_desc_is_da_mcbc, .rx_desc_get_dot11_hdr = ath12k_hw_wcn7850_rx_desc_get_dot11_hdr, .rx_desc_get_crypto_header = ath12k_hw_wcn7850_rx_desc_get_crypto_hdr, - .rx_desc_get_mpdu_frame_ctl = ath12k_hw_wcn7850_rx_desc_get_mpdu_frame_ctl, .dp_rx_h_msdu_done = ath12k_hw_wcn7850_dp_rx_h_msdu_done, .dp_rx_h_l4_cksum_fail = ath12k_hw_wcn7850_dp_rx_h_l4_cksum_fail, .dp_rx_h_ip_cksum_fail = ath12k_hw_wcn7850_dp_rx_h_ip_cksum_fail, diff --git a/drivers/net/wireless/ath/ath12k/hal.h b/drivers/net/wireless/ath/ath12k/hal.h index 94e2e873595831..3156563c77e5b4 100644 --- a/drivers/net/wireless/ath/ath12k/hal.h +++ b/drivers/net/wireless/ath/ath12k/hal.h @@ -1068,7 +1068,6 @@ struct hal_rx_ops { bool (*rx_desc_is_da_mcbc)(struct hal_rx_desc *desc); void (*rx_desc_get_dot11_hdr)(struct hal_rx_desc *desc, struct ieee80211_hdr *hdr); - u16 (*rx_desc_get_mpdu_frame_ctl)(struct hal_rx_desc *desc); void (*rx_desc_get_crypto_header)(struct hal_rx_desc *desc, u8 *crypto_hdr, enum hal_encrypt_type enctype); From 857b240047628ade6cf24cd28813a7b86bc52d9c Mon Sep 17 00:00:00 2001 From: Jacob Moroni Date: Thu, 20 Feb 2025 17:56:12 +0000 Subject: [PATCH 2228/2924] IB/cm: use rwlock for MAD agent lock [ Upstream commit 4dab26bed543584577b64b36aadb8b5b165bf44f ] In workloads where there are many processes establishing connections using RDMA CM in parallel (large scale MPI), there can be heavy contention for mad_agent_lock in cm_alloc_msg. This contention can occur while inside of a spin_lock_irq region, leading to interrupts being disabled for extended durations on many cores. Furthermore, it leads to the serialization of rdma_create_ah calls, which has negative performance impacts for NICs which are capable of processing multiple address handle creations in parallel. The end result is the machine becoming unresponsive, hung task warnings, netdev TX timeouts, etc. Since the lock appears to be only for protection from cm_remove_one, it can be changed to a rwlock to resolve these issues. Reproducer: Server: for i in $(seq 1 512); do ucmatose -c 32 -p $((i + 5000)) & done Client: for i in $(seq 1 512); do ucmatose -c 32 -p $((i + 5000)) -s 10.2.0.52 & done Fixes: 76039ac9095f ("IB/cm: Protect cm_dev, cm_ports and mad_agent with kref and lock") Link: https://patch.msgid.link/r/20250220175612.2763122-1-jmoroni@google.com Signed-off-by: Jacob Moroni Acked-by: Eric Dumazet Reviewed-by: Zhu Yanjun Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/cm.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 142170473e7536..effa53dd680027 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -167,7 +167,7 @@ struct cm_port { struct cm_device { struct kref kref; struct list_head list; - spinlock_t mad_agent_lock; + rwlock_t mad_agent_lock; struct ib_device *ib_device; u8 ack_delay; int going_down; @@ -285,7 +285,7 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv) if (!cm_id_priv->av.port) return ERR_PTR(-EINVAL); - spin_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); + read_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); mad_agent = cm_id_priv->av.port->mad_agent; if (!mad_agent) { m = ERR_PTR(-EINVAL); @@ -311,7 +311,7 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv) m->ah = ah; out: - spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); + read_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); return m; } @@ -1297,10 +1297,10 @@ static __be64 cm_form_tid(struct cm_id_private *cm_id_priv) if (!cm_id_priv->av.port) return cpu_to_be64(low_tid); - spin_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); + read_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); if (cm_id_priv->av.port->mad_agent) hi_tid = ((u64)cm_id_priv->av.port->mad_agent->hi_tid) << 32; - spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); + read_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); return cpu_to_be64(hi_tid | low_tid); } @@ -4378,7 +4378,7 @@ static int cm_add_one(struct ib_device *ib_device) return -ENOMEM; kref_init(&cm_dev->kref); - spin_lock_init(&cm_dev->mad_agent_lock); + rwlock_init(&cm_dev->mad_agent_lock); cm_dev->ib_device = ib_device; cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay; cm_dev->going_down = 0; @@ -4494,9 +4494,9 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) * The above ensures no call paths from the work are running, * the remaining paths all take the mad_agent_lock. */ - spin_lock(&cm_dev->mad_agent_lock); + write_lock(&cm_dev->mad_agent_lock); port->mad_agent = NULL; - spin_unlock(&cm_dev->mad_agent_lock); + write_unlock(&cm_dev->mad_agent_lock); ib_unregister_mad_agent(mad_agent); ib_port_unregister_client_groups(ib_device, i, cm_counter_groups); From 4966cd0c097d182d130aaa508be2209a32db247f Mon Sep 17 00:00:00 2001 From: Tao Chen Date: Mon, 7 Apr 2025 11:57:51 +0800 Subject: [PATCH 2229/2924] bpf: Check link_create.flags parameter for multi_kprobe [ Upstream commit 243911982aa9faf4361aa952f879331ad66933fe ] The link_create.flags are currently not used for multi-kprobes, so return -EINVAL if it is set, same as for other attach APIs. We allow target_fd, on the other hand, to have an arbitrary value for multi-kprobe, as there are existing users (libbpf) relying on this. Fixes: 0dcac2725406 ("bpf: Add multi kprobe link") Signed-off-by: Tao Chen Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250407035752.1108927-1-chen.dylane@linux.dev Signed-off-by: Sasha Levin --- kernel/trace/bpf_trace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 187dc37d61d4a3..ec19942321e6d2 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2987,6 +2987,9 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr if (sizeof(u64) != sizeof(void *)) return -EOPNOTSUPP; + if (attr->link_create.flags) + return -EINVAL; + if (!is_kprobe_multi(prog)) return -EINVAL; From 2e4a42b33ac6be8deb9e9dccf75d1a2f53959ca1 Mon Sep 17 00:00:00 2001 From: Tao Chen Date: Mon, 7 Apr 2025 11:57:52 +0800 Subject: [PATCH 2230/2924] bpf: Check link_create.flags parameter for multi_uprobe [ Upstream commit a76116f422c442ab691b4dcabb25613486d34360 ] The link_create.flags are currently not used for multi-uprobes, so return -EINVAL if it is set, same as for other attach APIs. We allow target_fd to have an arbitrary value for multi-uprobe, though, as there are existing users (libbpf) relying on this. Fixes: 89ae89f53d20 ("bpf: Add multi uprobe link") Signed-off-by: Tao Chen Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250407035752.1108927-2-chen.dylane@linux.dev Signed-off-by: Sasha Levin --- kernel/trace/bpf_trace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index ec19942321e6d2..0f5906f43d7cac 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -3379,6 +3379,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr if (sizeof(u64) != sizeof(void *)) return -EOPNOTSUPP; + if (attr->link_create.flags) + return -EINVAL; + if (!is_uprobe_multi(prog)) return -EINVAL; From 0bab1415fcb6c0aa1032cbc7ee1d3fb140998f05 Mon Sep 17 00:00:00 2001 From: Saket Kumar Bhaskar Date: Wed, 9 Apr 2025 15:26:33 +0530 Subject: [PATCH 2231/2924] selftests/bpf: Fix bpf_nf selftest failure [ Upstream commit 967e8def1100cb4b08c28a54d27ce69563fdf281 ] For systems with missing iptables-legacy tool this selftest fails. Add check to find if iptables-legacy tool is available and skip the test if the tool is missing. Fixes: de9c8d848d90 ("selftests/bpf: S/iptables/iptables-legacy/ in the bpf_nf and xdp_synproxy test") Signed-off-by: Saket Kumar Bhaskar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250409095633.33653-1-skb99@linux.ibm.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/prog_tests/bpf_nf.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c index dbd13f8e42a7aa..dd6512fa652be0 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -63,6 +63,12 @@ static void test_bpf_nf_ct(int mode) .repeat = 1, ); + if (SYS_NOFAIL("iptables-legacy --version")) { + fprintf(stdout, "Missing required iptables-legacy tool\n"); + test__skip(); + return; + } + skel = test_bpf_nf__open_and_load(); if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load")) return; From 603943f022a7fe5cc83ca7005faf34798fb7853f Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Wed, 19 Feb 2025 13:20:14 +0800 Subject: [PATCH 2232/2924] bpf: fix ktls panic with sockmap [ Upstream commit 54a3ecaeeeae8176da8badbd7d72af1017032c39 ] [ 2172.936997] ------------[ cut here ]------------ [ 2172.936999] kernel BUG at lib/iov_iter.c:629! ...... [ 2172.944996] PKRU: 55555554 [ 2172.945155] Call Trace: [ 2172.945299] [ 2172.945428] ? die+0x36/0x90 [ 2172.945601] ? do_trap+0xdd/0x100 [ 2172.945795] ? iov_iter_revert+0x178/0x180 [ 2172.946031] ? iov_iter_revert+0x178/0x180 [ 2172.946267] ? do_error_trap+0x7d/0x110 [ 2172.946499] ? iov_iter_revert+0x178/0x180 [ 2172.946736] ? exc_invalid_op+0x50/0x70 [ 2172.946961] ? iov_iter_revert+0x178/0x180 [ 2172.947197] ? asm_exc_invalid_op+0x1a/0x20 [ 2172.947446] ? iov_iter_revert+0x178/0x180 [ 2172.947683] ? iov_iter_revert+0x5c/0x180 [ 2172.947913] tls_sw_sendmsg_locked.isra.0+0x794/0x840 [ 2172.948206] tls_sw_sendmsg+0x52/0x80 [ 2172.948420] ? inet_sendmsg+0x1f/0x70 [ 2172.948634] __sys_sendto+0x1cd/0x200 [ 2172.948848] ? find_held_lock+0x2b/0x80 [ 2172.949072] ? syscall_trace_enter+0x140/0x270 [ 2172.949330] ? __lock_release.isra.0+0x5e/0x170 [ 2172.949595] ? find_held_lock+0x2b/0x80 [ 2172.949817] ? syscall_trace_enter+0x140/0x270 [ 2172.950211] ? lockdep_hardirqs_on_prepare+0xda/0x190 [ 2172.950632] ? ktime_get_coarse_real_ts64+0xc2/0xd0 [ 2172.951036] __x64_sys_sendto+0x24/0x30 [ 2172.951382] do_syscall_64+0x90/0x170 ...... After calling bpf_exec_tx_verdict(), the size of msg_pl->sg may increase, e.g., when the BPF program executes bpf_msg_push_data(). If the BPF program sets cork_bytes and sg.size is smaller than cork_bytes, it will return -ENOSPC and attempt to roll back to the non-zero copy logic. However, during rollback, msg->msg_iter is reset, but since msg_pl->sg.size has been increased, subsequent executions will exceed the actual size of msg_iter. ''' iov_iter_revert(&msg->msg_iter, msg_pl->sg.size - orig_size); ''' The changes in this commit are based on the following considerations: 1. When cork_bytes is set, rolling back to non-zero copy logic is pointless and can directly go to zero-copy logic. 2. We can not calculate the correct number of bytes to revert msg_iter. Assume the original data is "abcdefgh" (8 bytes), and after 3 pushes by the BPF program, it becomes 11-byte data: "abc?de?fgh?". Then, we set cork_bytes to 6, which means the first 6 bytes have been processed, and the remaining 5 bytes "?fgh?" will be cached until the length meets the cork_bytes requirement. However, some data in "?fgh?" is not within 'sg->msg_iter' (but in msg_pl instead), especially the data "?" we pushed. So it doesn't seem as simple as just reverting through an offset of msg_iter. 3. For non-TLS sockets in tcp_bpf_sendmsg, when a "cork" situation occurs, the user-space send() doesn't return an error, and the returned length is the same as the input length parameter, even if some data is cached. Additionally, I saw that the current non-zero-copy logic for handling corking is written as: ''' line 1177 else if (ret != -EAGAIN) { if (ret == -ENOSPC) ret = 0; goto send_end; ''' So it's ok to just return 'copied' without error when a "cork" situation occurs. Fixes: fcb14cb1bdac ("new iov_iter flavour - ITER_UBUF") Fixes: d3b18ad31f93 ("tls: add bpf support to sk_msg handling") Signed-off-by: Jiayuan Chen Acked-by: John Fastabend Link: https://lore.kernel.org/r/20250219052015.274405-2-jiayuan.chen@linux.dev Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 914d4e1516a3cd..f3d7d19482da9a 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1120,9 +1120,13 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg, num_async++; else if (ret == -ENOMEM) goto wait_for_memory; - else if (ctx->open_rec && ret == -ENOSPC) + else if (ctx->open_rec && ret == -ENOSPC) { + if (msg_pl->cork_bytes) { + ret = 0; + goto send_end; + } goto rollback_iter; - else if (ret != -EAGAIN) + } else if (ret != -EAGAIN) goto send_end; } continue; From f585747905c0ae8d02bf63932fa95daa19c44a64 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Mon, 7 Apr 2025 22:21:21 +0800 Subject: [PATCH 2233/2924] bpf, sockmap: fix duplicated data transmission [ Upstream commit 3b4f14b794287be137ea2c6158765d1ea1e018a4 ] In the !ingress path under sk_psock_handle_skb(), when sending data to the remote under snd_buf limitations, partial skb data might be transmitted. Although we preserved the partial transmission state (offset/length), the state wasn't properly consumed during retries. This caused the retry path to resend the entire skb data instead of continuing from the previous offset, resulting in data overlap at the receiver side. Fixes: 405df89dd52c ("bpf, sockmap: Improved check for empty queue") Signed-off-by: Jiayuan Chen Link: https://lore.kernel.org/r/20250407142234.47591-3-jiayuan.chen@linux.dev Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- net/core/skmsg.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 0ddc4c7188332a..dc136603c42ce8 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -656,11 +656,6 @@ static void sk_psock_backlog(struct work_struct *work) int ret; mutex_lock(&psock->work_mutex); - if (unlikely(state->len)) { - len = state->len; - off = state->off; - } - while ((skb = skb_peek(&psock->ingress_skb))) { len = skb->len; off = 0; @@ -670,6 +665,13 @@ static void sk_psock_backlog(struct work_struct *work) off = stm->offset; len = stm->full_len; } + + /* Resume processing from previous partial state */ + if (unlikely(state->len)) { + len = state->len; + off = state->off; + } + ingress = skb_bpf_ingress(skb); skb_bpf_redirect_clear(skb); do { @@ -697,6 +699,8 @@ static void sk_psock_backlog(struct work_struct *work) len -= ret; } while (len); + /* The entire skb sent, clear state */ + sk_psock_skb_state(psock, state, 0, 0); skb = skb_dequeue(&psock->ingress_skb); kfree_skb(skb); } From e9c1299d813fc04668042690f2c3cc76d013959a Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Mon, 7 Apr 2025 22:21:22 +0800 Subject: [PATCH 2234/2924] bpf, sockmap: Fix panic when calling skb_linearize MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5ca2e29f6834c64c0e5a9ccf1278c21fb49b827e ] The panic can be reproduced by executing the command: ./bench sockmap -c 2 -p 1 -a --rx-verdict-ingress --rx-strp 100000 Then a kernel panic was captured: ''' [ 657.460555] kernel BUG at net/core/skbuff.c:2178! [ 657.462680] Tainted: [W]=WARN [ 657.463287] Workqueue: events sk_psock_backlog ... [ 657.469610] [ 657.469738] ? die+0x36/0x90 [ 657.469916] ? do_trap+0x1d0/0x270 [ 657.470118] ? pskb_expand_head+0x612/0xf40 [ 657.470376] ? pskb_expand_head+0x612/0xf40 [ 657.470620] ? do_error_trap+0xa3/0x170 [ 657.470846] ? pskb_expand_head+0x612/0xf40 [ 657.471092] ? handle_invalid_op+0x2c/0x40 [ 657.471335] ? pskb_expand_head+0x612/0xf40 [ 657.471579] ? exc_invalid_op+0x2d/0x40 [ 657.471805] ? asm_exc_invalid_op+0x1a/0x20 [ 657.472052] ? pskb_expand_head+0xd1/0xf40 [ 657.472292] ? pskb_expand_head+0x612/0xf40 [ 657.472540] ? lock_acquire+0x18f/0x4e0 [ 657.472766] ? find_held_lock+0x2d/0x110 [ 657.472999] ? __pfx_pskb_expand_head+0x10/0x10 [ 657.473263] ? __kmalloc_cache_noprof+0x5b/0x470 [ 657.473537] ? __pfx___lock_release.isra.0+0x10/0x10 [ 657.473826] __pskb_pull_tail+0xfd/0x1d20 [ 657.474062] ? __kasan_slab_alloc+0x4e/0x90 [ 657.474707] sk_psock_skb_ingress_enqueue+0x3bf/0x510 [ 657.475392] ? __kasan_kmalloc+0xaa/0xb0 [ 657.476010] sk_psock_backlog+0x5cf/0xd70 [ 657.476637] process_one_work+0x858/0x1a20 ''' The panic originates from the assertion BUG_ON(skb_shared(skb)) in skb_linearize(). A previous commit(see Fixes tag) introduced skb_get() to avoid race conditions between skb operations in the backlog and skb release in the recvmsg path. However, this caused the panic to always occur when skb_linearize is executed. The "--rx-strp 100000" parameter forces the RX path to use the strparser module which aggregates data until it reaches 100KB before calling sockmap logic. The 100KB payload exceeds MAX_MSG_FRAGS, triggering skb_linearize. To fix this issue, just move skb_get into sk_psock_skb_ingress_enqueue. ''' sk_psock_backlog: sk_psock_handle_skb skb_get(skb) <== we move it into 'sk_psock_skb_ingress_enqueue' sk_psock_skb_ingress____________ ↓ | | → sk_psock_skb_ingress_self | sk_psock_skb_ingress_enqueue sk_psock_verdict_apply_________________↑ skb_linearize ''' Note that for verdict_apply path, the skb_get operation is unnecessary so we add 'take_ref' param to control it's behavior. Fixes: a454d84ee20b ("bpf, sockmap: Fix skb refcnt race after locking changes") Signed-off-by: Jiayuan Chen Link: https://lore.kernel.org/r/20250407142234.47591-4-jiayuan.chen@linux.dev Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- net/core/skmsg.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index dc136603c42ce8..061f1409bd5a9e 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -530,16 +530,22 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, u32 off, u32 len, struct sk_psock *psock, struct sock *sk, - struct sk_msg *msg) + struct sk_msg *msg, + bool take_ref) { int num_sge, copied; + /* skb_to_sgvec will fail when the total number of fragments in + * frag_list and frags exceeds MAX_MSG_FRAGS. For example, the + * caller may aggregate multiple skbs. + */ num_sge = skb_to_sgvec(skb, msg->sg.data, off, len); if (num_sge < 0) { /* skb linearize may fail with ENOMEM, but lets simply try again * later if this happens. Under memory pressure we don't want to * drop the skb. We need to linearize the skb so that the mapping * in skb_to_sgvec can not error. + * Note that skb_linearize requires the skb not to be shared. */ if (skb_linearize(skb)) return -EAGAIN; @@ -556,7 +562,7 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, msg->sg.start = 0; msg->sg.size = copied; msg->sg.end = num_sge; - msg->skb = skb; + msg->skb = take_ref ? skb_get(skb) : skb; sk_psock_queue_msg(psock, msg); sk_psock_data_ready(sk, psock); @@ -564,7 +570,7 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, } static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb, - u32 off, u32 len); + u32 off, u32 len, bool take_ref); static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb, u32 off, u32 len) @@ -578,7 +584,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb, * correctly. */ if (unlikely(skb->sk == sk)) - return sk_psock_skb_ingress_self(psock, skb, off, len); + return sk_psock_skb_ingress_self(psock, skb, off, len, true); msg = sk_psock_create_ingress_msg(sk, skb); if (!msg) return -EAGAIN; @@ -590,7 +596,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb, * into user buffers. */ skb_set_owner_r(skb, sk); - err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg); + err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg, true); if (err < 0) kfree(msg); return err; @@ -601,7 +607,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb, * because the skb is already accounted for here. */ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb, - u32 off, u32 len) + u32 off, u32 len, bool take_ref) { struct sk_msg *msg = alloc_sk_msg(GFP_ATOMIC); struct sock *sk = psock->sk; @@ -610,7 +616,7 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb if (unlikely(!msg)) return -EAGAIN; skb_set_owner_r(skb, sk); - err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg); + err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg, take_ref); if (err < 0) kfree(msg); return err; @@ -619,18 +625,13 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb, u32 off, u32 len, bool ingress) { - int err = 0; - if (!ingress) { if (!sock_writeable(psock->sk)) return -EAGAIN; return skb_send_sock(psock->sk, skb, off, len); } - skb_get(skb); - err = sk_psock_skb_ingress(psock, skb, off, len); - if (err < 0) - kfree_skb(skb); - return err; + + return sk_psock_skb_ingress(psock, skb, off, len); } static void sk_psock_skb_state(struct sk_psock *psock, @@ -1018,7 +1019,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, off = stm->offset; len = stm->full_len; } - err = sk_psock_skb_ingress_self(psock, skb, off, len); + err = sk_psock_skb_ingress_self(psock, skb, off, len, false); } if (err < 0) { spin_lock_bh(&psock->ingress_lock); From 8d9431b0d11a5030aa1ce477defee455b3821701 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Mar 2025 16:06:46 +0800 Subject: [PATCH 2235/2924] f2fs: zone: fix to avoid inconsistence in between SIT and SSA [ Upstream commit 773704c1ef96a8b70d0d186ab725f50548de82c4 ] w/ below testcase, it will cause inconsistence in between SIT and SSA. create_null_blk 512 2 1024 1024 mkfs.f2fs -m /dev/nullb0 mount /dev/nullb0 /mnt/f2fs/ touch /mnt/f2fs/file f2fs_io pinfile set /mnt/f2fs/file fallocate -l 4GiB /mnt/f2fs/file F2FS-fs (nullb0): Inconsistent segment (0) type [1, 0] in SSA and SIT CPU: 5 UID: 0 PID: 2398 Comm: fallocate Tainted: G O 6.13.0-rc1 #84 Tainted: [O]=OOT_MODULE Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 Call Trace: dump_stack_lvl+0xb3/0xd0 dump_stack+0x14/0x20 f2fs_handle_critical_error+0x18c/0x220 [f2fs] f2fs_stop_checkpoint+0x38/0x50 [f2fs] do_garbage_collect+0x674/0x6e0 [f2fs] f2fs_gc_range+0x12b/0x230 [f2fs] f2fs_allocate_pinning_section+0x5c/0x150 [f2fs] f2fs_expand_inode_data+0x1cc/0x3c0 [f2fs] f2fs_fallocate+0x3c3/0x410 [f2fs] vfs_fallocate+0x15f/0x4b0 __x64_sys_fallocate+0x4a/0x80 x64_sys_call+0x15e8/0x1b80 do_syscall_64+0x68/0x130 entry_SYSCALL_64_after_hwframe+0x67/0x6f RIP: 0033:0x7f9dba5197ca F2FS-fs (nullb0): Stopped filesystem due to reason: 4 The reason is f2fs_gc_range() may try to migrate block in curseg, however, its SSA block is not uptodate due to the last summary block data is still in cache of curseg. In this patch, we add a condition in f2fs_gc_range() to check whether section is opened or not, and skip block migration for opened section. Fixes: 9703d69d9d15 ("f2fs: support file pinning for zoned devices") Reviewed-by: Daeho Jeong Cc: Daeho Jeong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/gc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 2b8f9239bede7c..8b5a55b72264dd 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -2066,6 +2066,9 @@ int f2fs_gc_range(struct f2fs_sb_info *sbi, .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), }; + if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, segno))) + continue; + do_garbage_collect(sbi, segno, &gc_list, FG_GC, true, false); put_gc_inode(&gc_list); From b9c9f16a00bf956d6cc4908f6c4f4fbaee0beb6b Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Thu, 10 Apr 2025 12:04:03 +0200 Subject: [PATCH 2236/2924] net: phy: mediatek: permit to compile test GE SOC PHY driver [ Upstream commit e5566162af8b9690e096d2e6089e4ed955a0d13d ] When commit 462a3daad679 ("net: phy: mediatek: fix compile-test dependencies") fixed the dependency, it should have also introduced an or on COMPILE_TEST to permit this driver to be compile-tested even if NVMEM_MTK_EFUSE wasn't selected. The driver makes use of NVMEM API that are always compiled (return error) so the driver can actually be compiled even without that config. Fix and simplify the dependency condition of this kernel config. Fixes: 462a3daad679 ("net: phy: mediatek: fix compile-test dependencies") Acked-by: Daniel Golle Reviewed-by: Andrew Lunn Signed-off-by: Christian Marangi Acked-by: Arnd Bergmann Link: https://patch.msgid.link/20250410100410.348-1-ansuelsmth@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/phy/mediatek/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/phy/mediatek/Kconfig b/drivers/net/phy/mediatek/Kconfig index 2a8ac5aed0f893..6a4c2b328c4183 100644 --- a/drivers/net/phy/mediatek/Kconfig +++ b/drivers/net/phy/mediatek/Kconfig @@ -15,8 +15,7 @@ config MEDIATEK_GE_PHY config MEDIATEK_GE_SOC_PHY tristate "MediaTek SoC Ethernet PHYs" - depends on (ARM64 && ARCH_MEDIATEK) || COMPILE_TEST - depends on NVMEM_MTK_EFUSE + depends on (ARM64 && ARCH_MEDIATEK && NVMEM_MTK_EFUSE) || COMPILE_TEST select MTK_NET_PHYLIB help Supports MediaTek SoC built-in Gigabit Ethernet PHYs. From f44bcf5204af1632275cf57050691fd24c676f49 Mon Sep 17 00:00:00 2001 From: Raj Kumar Bhagat Date: Thu, 3 Apr 2025 15:34:29 +0530 Subject: [PATCH 2237/2924] wifi: ath12k: fix cleanup path after mhi init [ Upstream commit 6177c97fb6f05bf0473a2806e3bece7e77693209 ] Currently, the 'err_pci_msi_free' label is misplaced, causing the cleanup sequence to be incorrect. Fix this by moving the 'err_pci_msi_free' label to the correct position after 'err_irq_affinity_cleanup'. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00209-QCAHKSWPL_SILICONZ-1 Fixes: a3012f206d07 ("wifi: ath12k: set IRQ affinity to CPU0 in case of one MSI vector") Signed-off-by: Raj Kumar Bhagat Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250403-ath12k-cleanup-v1-1-ad8f67b0e9cf@quicinc.com Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/pci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c index b474696ac6d8c9..99b2c7927ec818 100644 --- a/drivers/net/wireless/ath/ath12k/pci.c +++ b/drivers/net/wireless/ath/ath12k/pci.c @@ -1710,12 +1710,12 @@ static int ath12k_pci_probe(struct pci_dev *pdev, err_mhi_unregister: ath12k_mhi_unregister(ab_pci); -err_pci_msi_free: - ath12k_pci_msi_free(ab_pci); - err_irq_affinity_cleanup: ath12k_pci_set_irq_affinity_hint(ab_pci, NULL); +err_pci_msi_free: + ath12k_pci_msi_free(ab_pci); + err_pci_free_region: ath12k_pci_free_region(ab_pci); From 43995b028c1c327041ef22ad7e0837cb2c23aecf Mon Sep 17 00:00:00 2001 From: Aaradhana Sahu Date: Thu, 3 Apr 2025 13:52:06 +0530 Subject: [PATCH 2238/2924] wifi: ath12k: Resolve multicast packet drop by populating key_cipher in ath12k_install_key() [ Upstream commit d61c0b3c63462d17e63e5a2b4815e0f1ad17f57e ] Currently, the key_cipher in the ath12k_vif structure, which represents the group cipher of the MLD AP, is populated when the link address matches the ieee80211_vif address within ath12k_install_key(). However, in MLD AP, the link address and ieee80211_vif address can differ. Due to this key_cipher is not populated and multicast packets don't get the correct cipher information and resulting multicast packets drop. To fix this, compare the link address with the arvif->bssid instead of the ieee80211_vif address. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00209-QCAHKSWPL_SILICONZ-1 Fixes: 3dd2c68f206ef ("wifi: ath12k: prepare vif data structure for MLO handling") Signed-off-by: Aaradhana Sahu Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250403082207.3323938-2-aaradhana.sahu@oss.qualcomm.com Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/mac.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index a1fad297ca3578..3883dab6771b86 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -4605,7 +4605,6 @@ static int ath12k_install_key(struct ath12k_link_vif *arvif, .macaddr = macaddr, }; struct ath12k_vif *ahvif = arvif->ahvif; - struct ieee80211_vif *vif = ath12k_ahvif_to_vif(ahvif); lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); @@ -4658,7 +4657,7 @@ static int ath12k_install_key(struct ath12k_link_vif *arvif, if (!wait_for_completion_timeout(&ar->install_key_done, 1 * HZ)) return -ETIMEDOUT; - if (ether_addr_equal(macaddr, vif->addr)) + if (ether_addr_equal(macaddr, arvif->bssid)) ahvif->key_cipher = key->cipher; return ar->install_key_status ? -EINVAL : 0; From 53c6682da17969b8bed4c1954480362e3800d25b Mon Sep 17 00:00:00 2001 From: Ramya Gnanasekar Date: Wed, 9 Apr 2025 20:53:41 +0530 Subject: [PATCH 2239/2924] wifi: ath12k: Fix WMI tag for EHT rate in peer assoc [ Upstream commit 1a0e65750b55d2cf5de4a9bf7d6d55718784bdb7 ] Incorrect WMI tag is used for EHT rate update from host to firmware while encoding peer assoc WMI. Correct the WMI tag used for EHT rate update from WMI_TAG_HE_RATE_SET to the proper tag. This ensures firmware does not mistakenly update HE rate during parsing. Found during code review. Compile tested only. Fixes: 5b70ec6036c1 ("wifi: ath12k: add WMI support for EHT peer") Signed-off-by: Ramya Gnanasekar Link: https://patch.msgid.link/20250409152341.944628-1-ramya.gnanasekar@oss.qualcomm.com Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/wmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 6d1ea5f3a791b0..6374e86f89f3d5 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -2351,7 +2351,7 @@ int ath12k_wmi_send_peer_assoc_cmd(struct ath12k *ar, for (i = 0; i < arg->peer_eht_mcs_count; i++) { eht_mcs = ptr; - eht_mcs->tlv_header = ath12k_wmi_tlv_cmd_hdr(WMI_TAG_HE_RATE_SET, + eht_mcs->tlv_header = ath12k_wmi_tlv_cmd_hdr(WMI_TAG_EHT_RATE_SET, sizeof(*eht_mcs)); eht_mcs->rx_mcs_set = cpu_to_le32(arg->peer_eht_rx_mcs_set[i]); From 8c4a200d03574bfcbf54fdb7ba5968b58ad2e0b3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 9 Apr 2025 14:01:25 +0300 Subject: [PATCH 2240/2924] wifi: ath12k: Fix buffer overflow in debugfs [ Upstream commit 8c7a5031a6b0d42e640fbd2d5d05f61f74e32dce ] If the user tries to write more than 32 bytes then it results in memory corruption. Fortunately, this is debugfs so it's limited to root users. Fixes: 3f73c24f28b3 ("wifi: ath12k: Add support to enable debugfs_htt_stats") Signed-off-by: Dan Carpenter Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/35daefbd-d493-41d9-b192-96177d521b40@stanley.mountain Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c index 1c0d5fa39a8dcb..aeaf970339d4dc 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c +++ b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c @@ -5377,6 +5377,9 @@ static ssize_t ath12k_write_htt_stats_type(struct file *file, const int size = 32; int num_args; + if (count > size) + return -EINVAL; + char *buf __free(kfree) = kzalloc(size, GFP_KERNEL); if (!buf) return -ENOMEM; From 497ac4b33b968798fdbae410f88c3382f3cd8a0a Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Tue, 8 Apr 2025 11:36:29 +0530 Subject: [PATCH 2241/2924] wifi: ath12k: fix SLUB BUG - Object already free in ath12k_reg_free() [ Upstream commit 6d019abc402f58b25a7cab30b2d9af2f3173e4df ] During rmmod of ath12k module with SLUB debug enabled, following print is seen - ============================================================================= BUG kmalloc-1k (Not tainted): Object already free ----------------------------------------------------------------------------- Allocated in ath12k_reg_build_regd+0x94/0xa20 [ath12k] age=10470 cpu=0 pid=0 __kmalloc_noprof+0xf4/0x368 ath12k_reg_build_regd+0x94/0xa20 [ath12k] ath12k_wmi_op_rx+0x199c/0x2c14 [ath12k] ath12k_htc_rx_completion_handler+0x398/0x554 [ath12k] ath12k_ce_per_engine_service+0x248/0x368 [ath12k] ath12k_pci_ce_workqueue+0x28/0x50 [ath12k] process_one_work+0x14c/0x28c bh_worker+0x22c/0x27c workqueue_softirq_action+0x80/0x90 tasklet_action+0x14/0x3c handle_softirqs+0x108/0x240 __do_softirq+0x14/0x20 Freed in ath12k_reg_free+0x40/0x74 [ath12k] age=136 cpu=2 pid=166 kfree+0x148/0x248 ath12k_reg_free+0x40/0x74 [ath12k] ath12k_core_hw_group_destroy+0x68/0xac [ath12k] ath12k_core_deinit+0xd8/0x124 [ath12k] ath12k_pci_remove+0x6c/0x130 [ath12k] pci_device_remove+0x44/0xe8 device_remove+0x4c/0x80 device_release_driver_internal+0x1d0/0x22c driver_detach+0x50/0x98 bus_remove_driver+0x70/0xf4 driver_unregister+0x30/0x60 pci_unregister_driver+0x24/0x9c ath12k_pci_exit+0x18/0x24 [ath12k] __arm64_sys_delete_module+0x1a0/0x2a8 invoke_syscall+0x48/0x110 el0_svc_common.constprop.0+0x40/0xe0 Slab 0xfffffdffc0033600 objects=10 used=6 fp=0xffff000000cdcc00 flags=0x3fffe0000000240(workingset|head|node=0|zone=0|lastcpupid=0x1ffff) Object 0xffff000000cdcc00 @offset=19456 fp=0xffff000000cde400 [...] This issue arises because in ath12k_core_hw_group_destroy(), each device calls ath12k_core_soc_destroy() for itself and all its partners within the same group. Since ath12k_core_hw_group_destroy() is invoked for each device, this results in a double free condition, eventually causing the SLUB bug. To resolve this, set the freed pointers to NULL. And since there could be a race condition to read these pointers, guard these with the available mutex lock. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Fixes: 6f245ea0ec6c ("wifi: ath12k: introduce device group abstraction") Signed-off-by: Aditya Kumar Singh Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250408-fix_reboot_issues_with_hw_grouping-v4-1-95e7bf048595@oss.qualcomm.com Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/reg.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/ath/ath12k/reg.c b/drivers/net/wireless/ath/ath12k/reg.c index 439d61f284d892..7fa7cd301b7579 100644 --- a/drivers/net/wireless/ath/ath12k/reg.c +++ b/drivers/net/wireless/ath/ath12k/reg.c @@ -777,8 +777,12 @@ void ath12k_reg_free(struct ath12k_base *ab) { int i; + mutex_lock(&ab->core_lock); for (i = 0; i < ab->hw_params->max_radios; i++) { kfree(ab->default_regd[i]); kfree(ab->new_regd[i]); + ab->default_regd[i] = NULL; + ab->new_regd[i] = NULL; } + mutex_unlock(&ab->core_lock); } From a16866506a9d2e35b593c9862ae207f08199f6a4 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Tue, 8 Apr 2025 11:36:32 +0530 Subject: [PATCH 2242/2924] wifi: ath12k: fix ATH12K_FLAG_REGISTERED flag handling [ Upstream commit 6af396942bf132a1a49523e8fe2f816dc1ebd913 ] Commit a5686ae820fa ("wifi: ath12k: move ATH12K_FLAG_REGISTERED handling to ath12k_mac_register()") relocated the setting of the ATH12K_FLAG_REGISTERED flag to the ath12k_mac_register() function. However, this function only accesses the first device (ab) via ag->ab[0], resulting in the flag being set only for the first device in the group. Similarly, ath12k_mac_unregister() only unsets the flag for the first device. The flag should actually be set for all devices in the group to avoid issues during recovery. Hence, move setting and clearing of this flag in the function ath12k_core_hw_group_start() and ath12k_core_hw_group_stop() respectively. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Fixes: a5686ae820fa ("wifi: ath12k: move ATH12K_FLAG_REGISTERED handling to ath12k_mac_register()") Signed-off-by: Aditya Kumar Singh Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250408-fix_reboot_issues_with_hw_grouping-v4-4-95e7bf048595@oss.qualcomm.com Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/core.c | 5 +++++ drivers/net/wireless/ath/ath12k/mac.c | 6 ------ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index 0b2dec081c6ee8..34eaa6b5bf772c 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -891,6 +891,9 @@ static void ath12k_core_hw_group_stop(struct ath12k_hw_group *ag) ab = ag->ab[i]; if (!ab) continue; + + clear_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags); + ath12k_core_device_cleanup(ab); } @@ -1026,6 +1029,8 @@ static int ath12k_core_hw_group_start(struct ath12k_hw_group *ag) mutex_lock(&ab->core_lock); + set_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags); + ret = ath12k_core_pdev_create(ab); if (ret) { ath12k_err(ab, "failed to create pdev core %d\n", ret); diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 3883dab6771b86..bd67140688290f 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -11569,7 +11569,6 @@ void ath12k_mac_mlo_teardown(struct ath12k_hw_group *ag) int ath12k_mac_register(struct ath12k_hw_group *ag) { - struct ath12k_base *ab = ag->ab[0]; struct ath12k_hw *ah; int i; int ret; @@ -11582,8 +11581,6 @@ int ath12k_mac_register(struct ath12k_hw_group *ag) goto err; } - set_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags); - return 0; err: @@ -11600,12 +11597,9 @@ int ath12k_mac_register(struct ath12k_hw_group *ag) void ath12k_mac_unregister(struct ath12k_hw_group *ag) { - struct ath12k_base *ab = ag->ab[0]; struct ath12k_hw *ah; int i; - clear_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags); - for (i = ag->num_hw - 1; i >= 0; i--) { ah = ath12k_ag_to_ah(ag, i); if (!ah) From 75d98902f353a50ad7691e80e0f5e93baa5b9c72 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 4 Apr 2025 19:03:03 +0000 Subject: [PATCH 2243/2924] f2fs: clean up unnecessary indentation [ Upstream commit 05d3273ad03fa5ea1177b4f3dfeeb6de4899b504 ] No functional change. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Stable-dep-of: d26fecb03e1f ("f2fs: prevent the current section from being selected as a victim during GC") Signed-off-by: Sasha Levin --- fs/f2fs/segment.h | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 0465dc00b349d2..5fcb1f92d506f3 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -429,7 +429,6 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int next; - unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi); spin_lock(&free_i->segmap_lock); clear_bit(segno, free_i->free_segmap); @@ -437,7 +436,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) next = find_next_bit(free_i->free_segmap, start_segno + SEGS_PER_SEC(sbi), start_segno); - if (next >= start_segno + usable_segs) { + if (next >= start_segno + f2fs_usable_segs_in_sec(sbi)) { clear_bit(secno, free_i->free_secmap); free_i->free_sections++; } @@ -463,22 +462,31 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int next; - unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi); + bool ret; spin_lock(&free_i->segmap_lock); - if (test_and_clear_bit(segno, free_i->free_segmap)) { - free_i->free_segments++; - - if (!inmem && IS_CURSEC(sbi, secno)) - goto skip_free; - next = find_next_bit(free_i->free_segmap, - start_segno + SEGS_PER_SEC(sbi), start_segno); - if (next >= start_segno + usable_segs) { - if (test_and_clear_bit(secno, free_i->free_secmap)) - free_i->free_sections++; - } - } -skip_free: + ret = test_and_clear_bit(segno, free_i->free_segmap); + if (!ret) + goto unlock_out; + + free_i->free_segments++; + + if (!inmem && IS_CURSEC(sbi, secno)) + goto unlock_out; + + /* check large section */ + next = find_next_bit(free_i->free_segmap, + start_segno + SEGS_PER_SEC(sbi), start_segno); + if (next < start_segno + f2fs_usable_segs_in_sec(sbi)) + goto unlock_out; + + ret = test_and_clear_bit(secno, free_i->free_secmap); + if (!ret) + goto unlock_out; + + free_i->free_sections++; + +unlock_out: spin_unlock(&free_i->segmap_lock); } From b282923c4ff411083cc048b3e742f9be05f1a3f5 Mon Sep 17 00:00:00 2001 From: "yohan.joung" Date: Fri, 4 Apr 2025 08:21:06 +0900 Subject: [PATCH 2244/2924] f2fs: prevent the current section from being selected as a victim during GC [ Upstream commit d26fecb03e1f1069480d41fa2a6cea87ebbb89b8 ] When selecting a victim using next_victim_seg in a large section, the selected section might already have been cleared and designated as the new current section, making it actively in use. This behavior causes inconsistency between the SIT and SSA. F2FS-fs (dm-54): Inconsistent segment (70961) type [0, 1] in SSA and SIT Call trace: dump_backtrace+0xe8/0x10c show_stack+0x18/0x28 dump_stack_lvl+0x50/0x6c dump_stack+0x18/0x28 f2fs_stop_checkpoint+0x1c/0x3c do_garbage_collect+0x41c/0x271c f2fs_gc+0x27c/0x828 gc_thread_func+0x290/0x88c kthread+0x11c/0x164 ret_from_fork+0x10/0x20 issue scenario segs_per_sec=2 - seg#0 and seg#1 are all dirty - all valid blocks are removed in seg#1 - gc select this sec and next_victim_seg=seg#0 - migrate seg#0, next_victim_seg=seg#1 - checkpoint -> sec(seg#0, seg#1) becomes free - allocator assigns sec(seg#0, seg#1) to curseg - gc tries to migrate seg#1 Fixes: e3080b0120a1 ("f2fs: support subsectional garbage collection") Signed-off-by: yohan.joung Signed-off-by: Chao Yu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/segment.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 5fcb1f92d506f3..503f6df690bf2b 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -486,6 +486,11 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, free_i->free_sections++; + if (GET_SEC_FROM_SEG(sbi, sbi->next_victim_seg[BG_GC]) == secno) + sbi->next_victim_seg[BG_GC] = NULL_SEGNO; + if (GET_SEC_FROM_SEG(sbi, sbi->next_victim_seg[FG_GC]) == secno) + sbi->next_victim_seg[FG_GC] = NULL_SEGNO; + unlock_out: spin_unlock(&free_i->segmap_lock); } From ccc28c0397f75a3ec9539cceed9db014d7b73869 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 8 Apr 2025 20:22:08 +0800 Subject: [PATCH 2245/2924] f2fs: fix to do sanity check on sbi->total_valid_block_count [ Upstream commit 05872a167c2cab80ef186ef23cc34a6776a1a30c ] syzbot reported a f2fs bug as below: ------------[ cut here ]------------ kernel BUG at fs/f2fs/f2fs.h:2521! RIP: 0010:dec_valid_block_count+0x3b2/0x3c0 fs/f2fs/f2fs.h:2521 Call Trace: f2fs_truncate_data_blocks_range+0xc8c/0x11a0 fs/f2fs/file.c:695 truncate_dnode+0x417/0x740 fs/f2fs/node.c:973 truncate_nodes+0x3ec/0xf50 fs/f2fs/node.c:1014 f2fs_truncate_inode_blocks+0x8e3/0x1370 fs/f2fs/node.c:1197 f2fs_do_truncate_blocks+0x840/0x12b0 fs/f2fs/file.c:810 f2fs_truncate_blocks+0x10d/0x300 fs/f2fs/file.c:838 f2fs_truncate+0x417/0x720 fs/f2fs/file.c:888 f2fs_setattr+0xc4f/0x12f0 fs/f2fs/file.c:1112 notify_change+0xbca/0xe90 fs/attr.c:552 do_truncate+0x222/0x310 fs/open.c:65 handle_truncate fs/namei.c:3466 [inline] do_open fs/namei.c:3849 [inline] path_openat+0x2e4f/0x35d0 fs/namei.c:4004 do_filp_open+0x284/0x4e0 fs/namei.c:4031 do_sys_openat2+0x12b/0x1d0 fs/open.c:1429 do_sys_open fs/open.c:1444 [inline] __do_sys_creat fs/open.c:1522 [inline] __se_sys_creat fs/open.c:1516 [inline] __x64_sys_creat+0x124/0x170 fs/open.c:1516 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/syscall_64.c:94 The reason is: in fuzzed image, sbi->total_valid_block_count is inconsistent w/ mapped blocks indexed by inode, so, we should not trigger panic for such case, instead, let's print log and set fsck flag. Fixes: 39a53e0ce0df ("f2fs: add superblock and major in-memory structure") Reported-by: syzbot+8b376a77b2f364097fbe@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-f2fs-devel/67f3c0b2.050a0220.396535.0547.GAE@google.com Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/f2fs.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f1576dc6ec6797..6dcac5ab041c80 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2518,8 +2518,14 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, blkcnt_t sectors = count << F2FS_LOG_SECTORS_PER_BLOCK; spin_lock(&sbi->stat_lock); - f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count); - sbi->total_valid_block_count -= (block_t)count; + if (unlikely(sbi->total_valid_block_count < count)) { + f2fs_warn(sbi, "Inconsistent total_valid_block_count:%u, ino:%lu, count:%u", + sbi->total_valid_block_count, inode->i_ino, count); + sbi->total_valid_block_count = 0; + set_sbi_flag(sbi, SBI_NEED_FSCK); + } else { + sbi->total_valid_block_count -= count; + } if (sbi->reserved_blocks && sbi->current_reserved_blocks < sbi->reserved_blocks) sbi->current_reserved_blocks = min(sbi->reserved_blocks, From cfddb2467265e12e575359e35d1d7062d89b022b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 9 Apr 2025 12:41:36 +0200 Subject: [PATCH 2246/2924] page_pool: Move pp_magic check into helper functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit cd3c93167da0e760b5819246eae7a4ea30fd014b ] Since we are about to stash some more information into the pp_magic field, let's move the magic signature checks into a pair of helper functions so it can be changed in one place. Reviewed-by: Mina Almasry Tested-by: Yonglong Liu Acked-by: Jesper Dangaard Brouer Reviewed-by: Ilias Apalodimas Signed-off-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20250409-page-pool-track-dma-v9-1-6a9ef2e0cba8@redhat.com Signed-off-by: Jakub Kicinski Stable-dep-of: ee62ce7a1d90 ("page_pool: Track DMA-mapped pages and unmap them when destroying the pool") Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/en/xdp.c | 4 ++-- include/linux/mm.h | 20 +++++++++++++++++++ mm/page_alloc.c | 8 ++------ net/core/netmem_priv.h | 5 +++++ net/core/skbuff.c | 16 ++------------- net/core/xdp.c | 4 ++-- 6 files changed, 33 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index f803e1c9359006..5ce1b463b7a8dd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -707,8 +707,8 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); page = xdpi.page.page; - /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE) - * as we know this is a page_pool page. + /* No need to check page_pool_page_is_pp() as we + * know this is a page_pool page. */ page_pool_recycle_direct(page->pp, page); } while (++n < num); diff --git a/include/linux/mm.h b/include/linux/mm.h index fdda6b16263b35..af2f551668e9e8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4265,4 +4265,24 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status); #define VM_SEALED_SYSMAP VM_NONE #endif +/* Mask used for checking in page_pool_page_is_pp() below. page->pp_magic is + * OR'ed with PP_SIGNATURE after the allocation in order to preserve bit 0 for + * the head page of compound page and bit 1 for pfmemalloc page. + * page_is_pfmemalloc() is checked in __page_pool_put_page() to avoid recycling + * the pfmemalloc page. + */ +#define PP_MAGIC_MASK ~0x3UL + +#ifdef CONFIG_PAGE_POOL +static inline bool page_pool_page_is_pp(struct page *page) +{ + return (page->pp_magic & PP_MAGIC_MASK) == PP_SIGNATURE; +} +#else +static inline bool page_pool_page_is_pp(struct page *page) +{ + return false; +} +#endif + #endif /* _LINUX_MM_H */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 47fa713ccb4d89..4f29e393f6af1c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -898,9 +898,7 @@ static inline bool page_expected_state(struct page *page, #ifdef CONFIG_MEMCG page->memcg_data | #endif -#ifdef CONFIG_PAGE_POOL - ((page->pp_magic & ~0x3UL) == PP_SIGNATURE) | -#endif + page_pool_page_is_pp(page) | (page->flags & check_flags))) return false; @@ -927,10 +925,8 @@ static const char *page_bad_reason(struct page *page, unsigned long flags) if (unlikely(page->memcg_data)) bad_reason = "page still charged to cgroup"; #endif -#ifdef CONFIG_PAGE_POOL - if (unlikely((page->pp_magic & ~0x3UL) == PP_SIGNATURE)) + if (unlikely(page_pool_page_is_pp(page))) bad_reason = "page_pool leak"; -#endif return bad_reason; } diff --git a/net/core/netmem_priv.h b/net/core/netmem_priv.h index 7eadb8393e002f..f33162fd281c23 100644 --- a/net/core/netmem_priv.h +++ b/net/core/netmem_priv.h @@ -18,6 +18,11 @@ static inline void netmem_clear_pp_magic(netmem_ref netmem) __netmem_clear_lsb(netmem)->pp_magic = 0; } +static inline bool netmem_is_pp(netmem_ref netmem) +{ + return (netmem_get_pp_magic(netmem) & PP_MAGIC_MASK) == PP_SIGNATURE; +} + static inline void netmem_set_pp(netmem_ref netmem, struct page_pool *pool) { __netmem_clear_lsb(netmem)->pp = pool; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6cbf77bc61fce7..74a2d886a35b51 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -893,11 +893,6 @@ static void skb_clone_fraglist(struct sk_buff *skb) skb_get(list); } -static bool is_pp_netmem(netmem_ref netmem) -{ - return (netmem_get_pp_magic(netmem) & ~0x3UL) == PP_SIGNATURE; -} - int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, unsigned int headroom) { @@ -995,14 +990,7 @@ bool napi_pp_put_page(netmem_ref netmem) { netmem = netmem_compound_head(netmem); - /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation - * in order to preserve any existing bits, such as bit 0 for the - * head page of compound page and bit 1 for pfmemalloc page, so - * mask those bits for freeing side when doing below checking, - * and page_is_pfmemalloc() is checked in __page_pool_put_page() - * to avoid recycling the pfmemalloc page. - */ - if (unlikely(!is_pp_netmem(netmem))) + if (unlikely(!netmem_is_pp(netmem))) return false; page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, false); @@ -1042,7 +1030,7 @@ static int skb_pp_frag_ref(struct sk_buff *skb) for (i = 0; i < shinfo->nr_frags; i++) { head_netmem = netmem_compound_head(shinfo->frags[i].netmem); - if (likely(is_pp_netmem(head_netmem))) + if (likely(netmem_is_pp(head_netmem))) page_pool_ref_netmem(head_netmem); else page_ref_inc(netmem_to_page(head_netmem)); diff --git a/net/core/xdp.c b/net/core/xdp.c index f86eedad586a77..0ba73943c6eed8 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -437,8 +437,8 @@ void __xdp_return(netmem_ref netmem, enum xdp_mem_type mem_type, netmem = netmem_compound_head(netmem); if (napi_direct && xdp_return_frame_no_direct()) napi_direct = false; - /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE) - * as mem->type knows this a page_pool page + /* No need to check netmem_is_pp() as mem->type knows this a + * page_pool page */ page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, napi_direct); From c30ae60f41f9edd6e1b5cad41cf28ce04dae39e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 9 Apr 2025 12:41:37 +0200 Subject: [PATCH 2247/2924] page_pool: Track DMA-mapped pages and unmap them when destroying the pool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ee62ce7a1d909ccba0399680a03c2dee83bcae95 ] When enabling DMA mapping in page_pool, pages are kept DMA mapped until they are released from the pool, to avoid the overhead of re-mapping the pages every time they are used. This causes resource leaks and/or crashes when there are pages still outstanding while the device is torn down, because page_pool will attempt an unmap through a non-existent DMA device on the subsequent page return. To fix this, implement a simple tracking of outstanding DMA-mapped pages in page pool using an xarray. This was first suggested by Mina[0], and turns out to be fairly straight forward: We simply store pointers to pages directly in the xarray with xa_alloc() when they are first DMA mapped, and remove them from the array on unmap. Then, when a page pool is torn down, it can simply walk the xarray and unmap all pages still present there before returning, which also allows us to get rid of the get/put_device() calls in page_pool. Using xa_cmpxchg(), no additional synchronisation is needed, as a page will only ever be unmapped once. To avoid having to walk the entire xarray on unmap to find the page reference, we stash the ID assigned by xa_alloc() into the page structure itself, using the upper bits of the pp_magic field. This requires a couple of defines to avoid conflicting with the POINTER_POISON_DELTA define, but this is all evaluated at compile-time, so does not affect run-time performance. The bitmap calculations in this patch gives the following number of bits for different architectures: - 23 bits on 32-bit architectures - 21 bits on PPC64 (because of the definition of ILLEGAL_POINTER_VALUE) - 32 bits on other 64-bit architectures Stashing a value into the unused bits of pp_magic does have the effect that it can make the value stored there lie outside the unmappable range (as governed by the mmap_min_addr sysctl), for architectures that don't define ILLEGAL_POINTER_VALUE. This means that if one of the pointers that is aliased to the pp_magic field (such as page->lru.next) is dereferenced while the page is owned by page_pool, that could lead to a dereference into userspace, which is a security concern. The risk of this is mitigated by the fact that (a) we always clear pp_magic before releasing a page from page_pool, and (b) this would need a use-after-free bug for struct page, which can have many other risks since page->lru.next is used as a generic list pointer in multiple places in the kernel. As such, with this patch we take the position that this risk is negligible in practice. For more discussion, see[1]. Since all the tracking added in this patch is performed on DMA map/unmap, no additional code is needed in the fast path, meaning the performance overhead of this tracking is negligible there. A micro-benchmark shows that the total overhead of the tracking itself is about 400 ns (39 cycles(tsc) 395.218 ns; sum for both map and unmap[2]). Since this cost is only paid on DMA map and unmap, it seems like an acceptable cost to fix the late unmap issue. Further optimisation can narrow the cases where this cost is paid (for instance by eliding the tracking when DMA map/unmap is a no-op). The extra memory needed to track the pages is neatly encapsulated inside xarray, which uses the 'struct xa_node' structure to track items. This structure is 576 bytes long, with slots for 64 items, meaning that a full node occurs only 9 bytes of overhead per slot it tracks (in practice, it probably won't be this efficient, but in any case it should be an acceptable overhead). [0] https://lore.kernel.org/all/CAHS8izPg7B5DwKfSuzz-iOop_YRbk3Sd6Y4rX7KBG9DcVJcyWg@mail.gmail.com/ [1] https://lore.kernel.org/r/20250320023202.GA25514@openwall.com [2] https://lore.kernel.org/r/ae07144c-9295-4c9d-a400-153bb689fe9e@huawei.com Reported-by: Yonglong Liu Closes: https://lore.kernel.org/r/8743264a-9700-4227-a556-5f931c720211@huawei.com Fixes: ff7d6b27f894 ("page_pool: refurbish version of page_pool code") Suggested-by: Mina Almasry Reviewed-by: Mina Almasry Reviewed-by: Jesper Dangaard Brouer Tested-by: Jesper Dangaard Brouer Tested-by: Qiuling Ren Tested-by: Yuying Ma Tested-by: Yonglong Liu Acked-by: Jesper Dangaard Brouer Signed-off-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20250409-page-pool-track-dma-v9-2-6a9ef2e0cba8@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/linux/mm.h | 46 ++++++++++++++++++-- include/linux/poison.h | 4 ++ include/net/page_pool/types.h | 6 +++ net/core/netmem_priv.h | 28 +++++++++++- net/core/page_pool.c | 81 +++++++++++++++++++++++++++++------ 5 files changed, 147 insertions(+), 18 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index af2f551668e9e8..e51dba8398f747 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4265,13 +4265,51 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status); #define VM_SEALED_SYSMAP VM_NONE #endif +/* + * DMA mapping IDs for page_pool + * + * When DMA-mapping a page, page_pool allocates an ID (from an xarray) and + * stashes it in the upper bits of page->pp_magic. We always want to be able to + * unambiguously identify page pool pages (using page_pool_page_is_pp()). Non-PP + * pages can have arbitrary kernel pointers stored in the same field as pp_magic + * (since it overlaps with page->lru.next), so we must ensure that we cannot + * mistake a valid kernel pointer with any of the values we write into this + * field. + * + * On architectures that set POISON_POINTER_DELTA, this is already ensured, + * since this value becomes part of PP_SIGNATURE; meaning we can just use the + * space between the PP_SIGNATURE value (without POISON_POINTER_DELTA), and the + * lowest bits of POISON_POINTER_DELTA. On arches where POISON_POINTER_DELTA is + * 0, we make sure that we leave the two topmost bits empty, as that guarantees + * we won't mistake a valid kernel pointer for a value we set, regardless of the + * VMSPLIT setting. + * + * Altogether, this means that the number of bits available is constrained by + * the size of an unsigned long (at the upper end, subtracting two bits per the + * above), and the definition of PP_SIGNATURE (with or without + * POISON_POINTER_DELTA). + */ +#define PP_DMA_INDEX_SHIFT (1 + __fls(PP_SIGNATURE - POISON_POINTER_DELTA)) +#if POISON_POINTER_DELTA > 0 +/* PP_SIGNATURE includes POISON_POINTER_DELTA, so limit the size of the DMA + * index to not overlap with that if set + */ +#define PP_DMA_INDEX_BITS MIN(32, __ffs(POISON_POINTER_DELTA) - PP_DMA_INDEX_SHIFT) +#else +/* Always leave out the topmost two; see above. */ +#define PP_DMA_INDEX_BITS MIN(32, BITS_PER_LONG - PP_DMA_INDEX_SHIFT - 2) +#endif + +#define PP_DMA_INDEX_MASK GENMASK(PP_DMA_INDEX_BITS + PP_DMA_INDEX_SHIFT - 1, \ + PP_DMA_INDEX_SHIFT) + /* Mask used for checking in page_pool_page_is_pp() below. page->pp_magic is * OR'ed with PP_SIGNATURE after the allocation in order to preserve bit 0 for - * the head page of compound page and bit 1 for pfmemalloc page. - * page_is_pfmemalloc() is checked in __page_pool_put_page() to avoid recycling - * the pfmemalloc page. + * the head page of compound page and bit 1 for pfmemalloc page, as well as the + * bits used for the DMA index. page_is_pfmemalloc() is checked in + * __page_pool_put_page() to avoid recycling the pfmemalloc page. */ -#define PP_MAGIC_MASK ~0x3UL +#define PP_MAGIC_MASK ~(PP_DMA_INDEX_MASK | 0x3UL) #ifdef CONFIG_PAGE_POOL static inline bool page_pool_page_is_pp(struct page *page) diff --git a/include/linux/poison.h b/include/linux/poison.h index 331a9a996fa874..8ca2235f78d5d9 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -70,6 +70,10 @@ #define KEY_DESTROY 0xbd /********** net/core/page_pool.c **********/ +/* + * page_pool uses additional free bits within this value to store data, see the + * definition of PP_DMA_INDEX_MASK in mm.h + */ #define PP_SIGNATURE (0x40 + POISON_POINTER_DELTA) /********** net/core/skbuff.c **********/ diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 36eb57d73abc6c..431b593de70937 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA @@ -33,6 +34,9 @@ #define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | \ PP_FLAG_SYSTEM_POOL | PP_FLAG_ALLOW_UNREADABLE_NETMEM) +/* Index limit to stay within PP_DMA_INDEX_BITS for DMA indices */ +#define PP_DMA_INDEX_LIMIT XA_LIMIT(1, BIT(PP_DMA_INDEX_BITS) - 1) + /* * Fast allocation side cache array/stack * @@ -221,6 +225,8 @@ struct page_pool { void *mp_priv; const struct memory_provider_ops *mp_ops; + struct xarray dma_mapped; + #ifdef CONFIG_PAGE_POOL_STATS /* recycle stats are per-cpu to avoid locking */ struct page_pool_recycle_stats __percpu *recycle_stats; diff --git a/net/core/netmem_priv.h b/net/core/netmem_priv.h index f33162fd281c23..cd95394399b40c 100644 --- a/net/core/netmem_priv.h +++ b/net/core/netmem_priv.h @@ -5,7 +5,7 @@ static inline unsigned long netmem_get_pp_magic(netmem_ref netmem) { - return __netmem_clear_lsb(netmem)->pp_magic; + return __netmem_clear_lsb(netmem)->pp_magic & ~PP_DMA_INDEX_MASK; } static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic) @@ -15,6 +15,8 @@ static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic) static inline void netmem_clear_pp_magic(netmem_ref netmem) { + WARN_ON_ONCE(__netmem_clear_lsb(netmem)->pp_magic & PP_DMA_INDEX_MASK); + __netmem_clear_lsb(netmem)->pp_magic = 0; } @@ -33,4 +35,28 @@ static inline void netmem_set_dma_addr(netmem_ref netmem, { __netmem_clear_lsb(netmem)->dma_addr = dma_addr; } + +static inline unsigned long netmem_get_dma_index(netmem_ref netmem) +{ + unsigned long magic; + + if (WARN_ON_ONCE(netmem_is_net_iov(netmem))) + return 0; + + magic = __netmem_clear_lsb(netmem)->pp_magic; + + return (magic & PP_DMA_INDEX_MASK) >> PP_DMA_INDEX_SHIFT; +} + +static inline void netmem_set_dma_index(netmem_ref netmem, + unsigned long id) +{ + unsigned long magic; + + if (WARN_ON_ONCE(netmem_is_net_iov(netmem))) + return; + + magic = netmem_get_pp_magic(netmem) | (id << PP_DMA_INDEX_SHIFT); + __netmem_clear_lsb(netmem)->pp_magic = magic; +} #endif diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 7745ad924ae2d8..2b768486594185 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -276,8 +276,7 @@ static int page_pool_init(struct page_pool *pool, /* Driver calling page_pool_create() also call page_pool_destroy() */ refcount_set(&pool->user_cnt, 1); - if (pool->dma_map) - get_device(pool->p.dev); + xa_init_flags(&pool->dma_mapped, XA_FLAGS_ALLOC1); if (pool->slow.flags & PP_FLAG_ALLOW_UNREADABLE_NETMEM) { netdev_assert_locked(pool->slow.netdev); @@ -320,9 +319,7 @@ static int page_pool_init(struct page_pool *pool, static void page_pool_uninit(struct page_pool *pool) { ptr_ring_cleanup(&pool->ring, NULL); - - if (pool->dma_map) - put_device(pool->p.dev); + xa_destroy(&pool->dma_mapped); #ifdef CONFIG_PAGE_POOL_STATS if (!pool->system) @@ -463,13 +460,21 @@ page_pool_dma_sync_for_device(const struct page_pool *pool, netmem_ref netmem, u32 dma_sync_size) { - if (pool->dma_sync && dma_dev_need_sync(pool->p.dev)) - __page_pool_dma_sync_for_device(pool, netmem, dma_sync_size); + if (pool->dma_sync && dma_dev_need_sync(pool->p.dev)) { + rcu_read_lock(); + /* re-check under rcu_read_lock() to sync with page_pool_scrub() */ + if (pool->dma_sync) + __page_pool_dma_sync_for_device(pool, netmem, + dma_sync_size); + rcu_read_unlock(); + } } -static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem) +static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem, gfp_t gfp) { dma_addr_t dma; + int err; + u32 id; /* Setup DMA mapping: use 'struct page' area for storing DMA-addr * since dma_addr_t can be either 32 or 64 bits and does not always fit @@ -483,15 +488,30 @@ static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem) if (dma_mapping_error(pool->p.dev, dma)) return false; - if (page_pool_set_dma_addr_netmem(netmem, dma)) + if (page_pool_set_dma_addr_netmem(netmem, dma)) { + WARN_ONCE(1, "unexpected DMA address, please report to netdev@"); goto unmap_failed; + } + + if (in_softirq()) + err = xa_alloc(&pool->dma_mapped, &id, netmem_to_page(netmem), + PP_DMA_INDEX_LIMIT, gfp); + else + err = xa_alloc_bh(&pool->dma_mapped, &id, netmem_to_page(netmem), + PP_DMA_INDEX_LIMIT, gfp); + if (err) { + WARN_ONCE(err != -ENOMEM, "couldn't track DMA mapping, please report to netdev@"); + goto unset_failed; + } + netmem_set_dma_index(netmem, id); page_pool_dma_sync_for_device(pool, netmem, pool->p.max_len); return true; +unset_failed: + page_pool_set_dma_addr_netmem(netmem, 0); unmap_failed: - WARN_ONCE(1, "unexpected DMA address, please report to netdev@"); dma_unmap_page_attrs(pool->p.dev, dma, PAGE_SIZE << pool->p.order, pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); @@ -508,7 +528,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool, if (unlikely(!page)) return NULL; - if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page_to_netmem(page)))) { + if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page_to_netmem(page), gfp))) { put_page(page); return NULL; } @@ -554,7 +574,7 @@ static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool, */ for (i = 0; i < nr_pages; i++) { netmem = pool->alloc.cache[i]; - if (dma_map && unlikely(!page_pool_dma_map(pool, netmem))) { + if (dma_map && unlikely(!page_pool_dma_map(pool, netmem, gfp))) { put_page(netmem_to_page(netmem)); continue; } @@ -656,6 +676,8 @@ void page_pool_clear_pp_info(netmem_ref netmem) static __always_inline void __page_pool_release_page_dma(struct page_pool *pool, netmem_ref netmem) { + struct page *old, *page = netmem_to_page(netmem); + unsigned long id; dma_addr_t dma; if (!pool->dma_map) @@ -664,6 +686,17 @@ static __always_inline void __page_pool_release_page_dma(struct page_pool *pool, */ return; + id = netmem_get_dma_index(netmem); + if (!id) + return; + + if (in_softirq()) + old = xa_cmpxchg(&pool->dma_mapped, id, page, NULL, 0); + else + old = xa_cmpxchg_bh(&pool->dma_mapped, id, page, NULL, 0); + if (old != page) + return; + dma = page_pool_get_dma_addr_netmem(netmem); /* When page is unmapped, it cannot be returned to our pool */ @@ -671,6 +704,7 @@ static __always_inline void __page_pool_release_page_dma(struct page_pool *pool, PAGE_SIZE << pool->p.order, pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); page_pool_set_dma_addr_netmem(netmem, 0); + netmem_set_dma_index(netmem, 0); } /* Disconnects a page (from a page_pool). API users can have a need @@ -1080,8 +1114,29 @@ static void page_pool_empty_alloc_cache_once(struct page_pool *pool) static void page_pool_scrub(struct page_pool *pool) { + unsigned long id; + void *ptr; + page_pool_empty_alloc_cache_once(pool); - pool->destroy_cnt++; + if (!pool->destroy_cnt++ && pool->dma_map) { + if (pool->dma_sync) { + /* Disable page_pool_dma_sync_for_device() */ + pool->dma_sync = false; + + /* Make sure all concurrent returns that may see the old + * value of dma_sync (and thus perform a sync) have + * finished before doing the unmapping below. Skip the + * wait if the device doesn't actually need syncing, or + * if there are no outstanding mapped pages. + */ + if (dma_dev_need_sync(pool->p.dev) && + !xa_empty(&pool->dma_mapped)) + synchronize_net(); + } + + xa_for_each(&pool->dma_mapped, id, ptr) + __page_pool_release_page_dma(pool, page_to_netmem(ptr)); + } /* No more consumers should exist, but producers could still * be in-flight. From 001f9fd029a61714618eadb09960fa6f79e508dc Mon Sep 17 00:00:00 2001 From: Vlad Dogaru Date: Thu, 10 Apr 2025 22:17:31 +0300 Subject: [PATCH 2248/2924] net/mlx5: HWS, Fix matcher action template attach [ Upstream commit 36ef2575e78d1a3c699dc3f1c9dee9be742c9bdd ] The procedure of attaching an action template to an existing matcher had a few issues: 1. Attaching accidentally overran the `at` array in bwc_matcher, which would result in memory corruption. This bug wasn't triggered, but it is possible to trigger it by attaching action templates beyond the initial buffer size of 8. Fix this by converting to a dynamically sized buffer and reallocating if needed. 2. Similarly, the `at` array inside the native matcher was never reallocated. Fix this the same as above. 3. The bwc layer treated any error in action template attach as a signal that the matcher should be rehashed to account for a larger number of action STEs. In reality, there are other unrelated errors that can arise and they should be propagated upstack. Fix this by adding a `need_rehash` output parameter that's orthogonal to error codes. Fixes: 2111bb970c78 ("net/mlx5: HWS, added backward-compatible API handling") Signed-off-by: Vlad Dogaru Reviewed-by: Yevgeny Kliteynik Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Reviewed-by: Michal Kubiak Link: https://patch.msgid.link/1744312662-356571-2-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../mellanox/mlx5/core/steering/hws/bwc.c | 55 ++++++++++++++++--- .../mellanox/mlx5/core/steering/hws/bwc.h | 9 ++- .../mellanox/mlx5/core/steering/hws/matcher.c | 48 +++++++++++++--- .../mellanox/mlx5/core/steering/hws/matcher.h | 4 ++ .../mellanox/mlx5/core/steering/hws/mlx5hws.h | 5 +- 5 files changed, 97 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c index 19dce1ba512d42..32de8bfc7644f5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c @@ -90,13 +90,19 @@ int mlx5hws_bwc_matcher_create_simple(struct mlx5hws_bwc_matcher *bwc_matcher, bwc_matcher->priority = priority; bwc_matcher->size_log = MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG; + bwc_matcher->size_of_at_array = MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM; + bwc_matcher->at = kcalloc(bwc_matcher->size_of_at_array, + sizeof(*bwc_matcher->at), GFP_KERNEL); + if (!bwc_matcher->at) + goto free_bwc_matcher_rules; + /* create dummy action template */ bwc_matcher->at[0] = mlx5hws_action_template_create(action_types ? action_types : init_action_types); if (!bwc_matcher->at[0]) { mlx5hws_err(table->ctx, "BWC matcher: failed creating action template\n"); - goto free_bwc_matcher_rules; + goto free_bwc_matcher_at_array; } bwc_matcher->num_of_at = 1; @@ -126,6 +132,8 @@ int mlx5hws_bwc_matcher_create_simple(struct mlx5hws_bwc_matcher *bwc_matcher, mlx5hws_match_template_destroy(bwc_matcher->mt); free_at: mlx5hws_action_template_destroy(bwc_matcher->at[0]); +free_bwc_matcher_at_array: + kfree(bwc_matcher->at); free_bwc_matcher_rules: kfree(bwc_matcher->rules); err: @@ -192,6 +200,7 @@ int mlx5hws_bwc_matcher_destroy_simple(struct mlx5hws_bwc_matcher *bwc_matcher) for (i = 0; i < bwc_matcher->num_of_at; i++) mlx5hws_action_template_destroy(bwc_matcher->at[i]); + kfree(bwc_matcher->at); mlx5hws_match_template_destroy(bwc_matcher->mt); kfree(bwc_matcher->rules); @@ -520,6 +529,23 @@ hws_bwc_matcher_extend_at(struct mlx5hws_bwc_matcher *bwc_matcher, struct mlx5hws_rule_action rule_actions[]) { enum mlx5hws_action_type action_types[MLX5HWS_BWC_MAX_ACTS]; + void *p; + + if (unlikely(bwc_matcher->num_of_at >= bwc_matcher->size_of_at_array)) { + if (bwc_matcher->size_of_at_array >= MLX5HWS_MATCHER_MAX_AT) + return -ENOMEM; + bwc_matcher->size_of_at_array *= 2; + p = krealloc(bwc_matcher->at, + bwc_matcher->size_of_at_array * + sizeof(*bwc_matcher->at), + __GFP_ZERO | GFP_KERNEL); + if (!p) { + bwc_matcher->size_of_at_array /= 2; + return -ENOMEM; + } + + bwc_matcher->at = p; + } hws_bwc_rule_actions_to_action_types(rule_actions, action_types); @@ -777,6 +803,7 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, struct mlx5hws_rule_attr rule_attr; struct mutex *queue_lock; /* Protect the queue */ u32 num_of_rules; + bool need_rehash; int ret = 0; int at_idx; @@ -803,10 +830,14 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, at_idx = bwc_matcher->num_of_at - 1; ret = mlx5hws_matcher_attach_at(bwc_matcher->matcher, - bwc_matcher->at[at_idx]); + bwc_matcher->at[at_idx], + &need_rehash); if (unlikely(ret)) { - /* Action template attach failed, possibly due to - * requiring more action STEs. + hws_bwc_unlock_all_queues(ctx); + return ret; + } + if (unlikely(need_rehash)) { + /* The new action template requires more action STEs. * Need to attempt creating new matcher with all * the action templates, including the new one. */ @@ -942,6 +973,7 @@ hws_bwc_rule_action_update(struct mlx5hws_bwc_rule *bwc_rule, struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; struct mlx5hws_rule_attr rule_attr; struct mutex *queue_lock; /* Protect the queue */ + bool need_rehash; int at_idx, ret; u16 idx; @@ -973,12 +1005,17 @@ hws_bwc_rule_action_update(struct mlx5hws_bwc_rule *bwc_rule, at_idx = bwc_matcher->num_of_at - 1; ret = mlx5hws_matcher_attach_at(bwc_matcher->matcher, - bwc_matcher->at[at_idx]); + bwc_matcher->at[at_idx], + &need_rehash); if (unlikely(ret)) { - /* Action template attach failed, possibly due to - * requiring more action STEs. - * Need to attempt creating new matcher with all - * the action templates, including the new one. + hws_bwc_unlock_all_queues(ctx); + return ret; + } + if (unlikely(need_rehash)) { + /* The new action template requires more action + * STEs. Need to attempt creating new matcher + * with all the action templates, including the + * new one. */ ret = hws_bwc_matcher_rehash_at(bwc_matcher); if (unlikely(ret)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h index 47f7ed1415535f..bb0cf4b922ceba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h @@ -10,9 +10,7 @@ #define MLX5HWS_BWC_MATCHER_REHASH_BURST_TH 32 /* Max number of AT attach operations for the same matcher. - * When the limit is reached, next attempt to attach new AT - * will result in creation of a new matcher and moving all - * the rules to this matcher. + * When the limit is reached, a larger buffer is allocated for the ATs. */ #define MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM 8 @@ -23,10 +21,11 @@ struct mlx5hws_bwc_matcher { struct mlx5hws_matcher *matcher; struct mlx5hws_match_template *mt; - struct mlx5hws_action_template *at[MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM]; - u32 priority; + struct mlx5hws_action_template **at; u8 num_of_at; + u8 size_of_at_array; u8 size_log; + u32 priority; atomic_t num_of_rules; struct list_head *rules; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c index b61864b320536d..37a4497048a6fa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c @@ -905,18 +905,48 @@ static int hws_matcher_uninit(struct mlx5hws_matcher *matcher) return 0; } +static int hws_matcher_grow_at_array(struct mlx5hws_matcher *matcher) +{ + void *p; + + if (matcher->size_of_at_array >= MLX5HWS_MATCHER_MAX_AT) + return -ENOMEM; + + matcher->size_of_at_array *= 2; + p = krealloc(matcher->at, + matcher->size_of_at_array * sizeof(*matcher->at), + __GFP_ZERO | GFP_KERNEL); + if (!p) { + matcher->size_of_at_array /= 2; + return -ENOMEM; + } + + matcher->at = p; + + return 0; +} + int mlx5hws_matcher_attach_at(struct mlx5hws_matcher *matcher, - struct mlx5hws_action_template *at) + struct mlx5hws_action_template *at, + bool *need_rehash) { bool is_jumbo = mlx5hws_matcher_mt_is_jumbo(matcher->mt); struct mlx5hws_context *ctx = matcher->tbl->ctx; u32 required_stes; int ret; - if (!matcher->attr.max_num_of_at_attach) { - mlx5hws_dbg(ctx, "Num of current at (%d) exceed allowed value\n", - matcher->num_of_at); - return -EOPNOTSUPP; + *need_rehash = false; + + if (unlikely(matcher->num_of_at >= matcher->size_of_at_array)) { + ret = hws_matcher_grow_at_array(matcher); + if (ret) + return ret; + + if (matcher->col_matcher) { + ret = hws_matcher_grow_at_array(matcher->col_matcher); + if (ret) + return ret; + } } ret = hws_matcher_check_and_process_at(matcher, at); @@ -927,12 +957,11 @@ int mlx5hws_matcher_attach_at(struct mlx5hws_matcher *matcher, if (matcher->action_ste.max_stes < required_stes) { mlx5hws_dbg(ctx, "Required STEs [%d] exceeds initial action template STE [%d]\n", required_stes, matcher->action_ste.max_stes); - return -ENOMEM; + *need_rehash = true; } matcher->at[matcher->num_of_at] = *at; matcher->num_of_at += 1; - matcher->attr.max_num_of_at_attach -= 1; if (matcher->col_matcher) matcher->col_matcher->num_of_at = matcher->num_of_at; @@ -960,8 +989,9 @@ hws_matcher_set_templates(struct mlx5hws_matcher *matcher, if (!matcher->mt) return -ENOMEM; - matcher->at = kvcalloc(num_of_at + matcher->attr.max_num_of_at_attach, - sizeof(*matcher->at), + matcher->size_of_at_array = + num_of_at + matcher->attr.max_num_of_at_attach; + matcher->at = kvcalloc(matcher->size_of_at_array, sizeof(*matcher->at), GFP_KERNEL); if (!matcher->at) { mlx5hws_err(ctx, "Failed to allocate action template array\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h index 020de70270c501..20b32012c418be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h @@ -23,6 +23,9 @@ */ #define MLX5HWS_MATCHER_ACTION_RTC_UPDATE_MULT 1 +/* Maximum number of action templates that can be attached to a matcher. */ +#define MLX5HWS_MATCHER_MAX_AT 128 + enum mlx5hws_matcher_offset { MLX5HWS_MATCHER_OFFSET_TAG_DW1 = 12, MLX5HWS_MATCHER_OFFSET_TAG_DW0 = 13, @@ -72,6 +75,7 @@ struct mlx5hws_matcher { struct mlx5hws_match_template *mt; struct mlx5hws_action_template *at; u8 num_of_at; + u8 size_of_at_array; u8 num_of_mt; /* enum mlx5hws_matcher_flags */ u8 flags; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h index 5121951f2778a8..8ed8a715a2eb26 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h @@ -399,11 +399,14 @@ int mlx5hws_matcher_destroy(struct mlx5hws_matcher *matcher); * * @matcher: Matcher to attach the action template to. * @at: Action template to be attached to the matcher. + * @need_rehash: Output parameter that tells callers if the matcher needs to be + * rehashed. * * Return: Zero on success, non-zero otherwise. */ int mlx5hws_matcher_attach_at(struct mlx5hws_matcher *matcher, - struct mlx5hws_action_template *at); + struct mlx5hws_action_template *at, + bool *need_rehash); /** * mlx5hws_matcher_resize_set_target - Link two matchers and enable moving rules. From 5cfa1b65b372fea30c30009990a46d19b420785f Mon Sep 17 00:00:00 2001 From: Charles Han Date: Tue, 25 Mar 2025 17:49:32 +0800 Subject: [PATCH 2249/2924] pinctrl: qcom: tlmm-test: Fix potential null dereference in tlmm kunit test [ Upstream commit 1938be9fbad1bd87a1dcd9c3ca88e454565f0609 ] kunit_kzalloc() may return a NULL pointer, dereferencing it without NULL check may lead to NULL dereference. Add a NULL check for grp. Fixes: c7984dc0a2b9 ("pinctrl: qcom: Add test case for TLMM interrupt handling") Signed-off-by: Charles Han Link: https://lore.kernel.org/20250325094932.4733-1-hanchunchao@inspur.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/qcom/tlmm-test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pinctrl/qcom/tlmm-test.c b/drivers/pinctrl/qcom/tlmm-test.c index fd02bf3a76cbcc..7b99e89e0f6703 100644 --- a/drivers/pinctrl/qcom/tlmm-test.c +++ b/drivers/pinctrl/qcom/tlmm-test.c @@ -547,6 +547,7 @@ static int tlmm_test_init(struct kunit *test) struct tlmm_test_priv *priv; priv = kunit_kzalloc(test, sizeof(*priv), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, priv); atomic_set(&priv->intr_count, 0); atomic_set(&priv->thread_count, 0); From feec357c102205fee8b3e9d99170c1ef5cfae908 Mon Sep 17 00:00:00 2001 From: Hari Kalavakunta Date: Wed, 9 Apr 2025 18:23:08 -0700 Subject: [PATCH 2250/2924] net: ncsi: Fix GCPS 64-bit member variables [ Upstream commit e8a1bd8344054ce27bebf59f48e3f6bc10bc419b ] Correct Get Controller Packet Statistics (GCPS) 64-bit wide member variables, as per DSP0222 v1.0.0 and forward specs. The Driver currently collects these stats, but they are yet to be exposed to the user. Therefore, no user impact. Statistics fixes: Total Bytes Received (byte range 28..35) Total Bytes Transmitted (byte range 36..43) Total Unicast Packets Received (byte range 44..51) Total Multicast Packets Received (byte range 52..59) Total Broadcast Packets Received (byte range 60..67) Total Unicast Packets Transmitted (byte range 68..75) Total Multicast Packets Transmitted (byte range 76..83) Total Broadcast Packets Transmitted (byte range 84..91) Valid Bytes Received (byte range 204..11) Signed-off-by: Hari Kalavakunta Reviewed-by: Paul Fertser Link: https://patch.msgid.link/20250410012309.1343-1-kalavakunta.hari.prasad@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ncsi/internal.h | 21 ++++++++++----------- net/ncsi/ncsi-pkt.h | 23 +++++++++++------------ net/ncsi/ncsi-rsp.c | 21 ++++++++++----------- 3 files changed, 31 insertions(+), 34 deletions(-) diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index 4e0842df5234ea..2c260f33b55cc5 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -143,16 +143,15 @@ struct ncsi_channel_vlan_filter { }; struct ncsi_channel_stats { - u32 hnc_cnt_hi; /* Counter cleared */ - u32 hnc_cnt_lo; /* Counter cleared */ - u32 hnc_rx_bytes; /* Rx bytes */ - u32 hnc_tx_bytes; /* Tx bytes */ - u32 hnc_rx_uc_pkts; /* Rx UC packets */ - u32 hnc_rx_mc_pkts; /* Rx MC packets */ - u32 hnc_rx_bc_pkts; /* Rx BC packets */ - u32 hnc_tx_uc_pkts; /* Tx UC packets */ - u32 hnc_tx_mc_pkts; /* Tx MC packets */ - u32 hnc_tx_bc_pkts; /* Tx BC packets */ + u64 hnc_cnt; /* Counter cleared */ + u64 hnc_rx_bytes; /* Rx bytes */ + u64 hnc_tx_bytes; /* Tx bytes */ + u64 hnc_rx_uc_pkts; /* Rx UC packets */ + u64 hnc_rx_mc_pkts; /* Rx MC packets */ + u64 hnc_rx_bc_pkts; /* Rx BC packets */ + u64 hnc_tx_uc_pkts; /* Tx UC packets */ + u64 hnc_tx_mc_pkts; /* Tx MC packets */ + u64 hnc_tx_bc_pkts; /* Tx BC packets */ u32 hnc_fcs_err; /* FCS errors */ u32 hnc_align_err; /* Alignment errors */ u32 hnc_false_carrier; /* False carrier detection */ @@ -181,7 +180,7 @@ struct ncsi_channel_stats { u32 hnc_tx_1023_frames; /* Tx 512-1023 bytes frames */ u32 hnc_tx_1522_frames; /* Tx 1024-1522 bytes frames */ u32 hnc_tx_9022_frames; /* Tx 1523-9022 bytes frames */ - u32 hnc_rx_valid_bytes; /* Rx valid bytes */ + u64 hnc_rx_valid_bytes; /* Rx valid bytes */ u32 hnc_rx_runt_pkts; /* Rx error runt packets */ u32 hnc_rx_jabber_pkts; /* Rx error jabber packets */ u32 ncsi_rx_cmds; /* Rx NCSI commands */ diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h index f2f3b5c1b94126..24edb273797240 100644 --- a/net/ncsi/ncsi-pkt.h +++ b/net/ncsi/ncsi-pkt.h @@ -252,16 +252,15 @@ struct ncsi_rsp_gp_pkt { /* Get Controller Packet Statistics */ struct ncsi_rsp_gcps_pkt { struct ncsi_rsp_pkt_hdr rsp; /* Response header */ - __be32 cnt_hi; /* Counter cleared */ - __be32 cnt_lo; /* Counter cleared */ - __be32 rx_bytes; /* Rx bytes */ - __be32 tx_bytes; /* Tx bytes */ - __be32 rx_uc_pkts; /* Rx UC packets */ - __be32 rx_mc_pkts; /* Rx MC packets */ - __be32 rx_bc_pkts; /* Rx BC packets */ - __be32 tx_uc_pkts; /* Tx UC packets */ - __be32 tx_mc_pkts; /* Tx MC packets */ - __be32 tx_bc_pkts; /* Tx BC packets */ + __be64 cnt; /* Counter cleared */ + __be64 rx_bytes; /* Rx bytes */ + __be64 tx_bytes; /* Tx bytes */ + __be64 rx_uc_pkts; /* Rx UC packets */ + __be64 rx_mc_pkts; /* Rx MC packets */ + __be64 rx_bc_pkts; /* Rx BC packets */ + __be64 tx_uc_pkts; /* Tx UC packets */ + __be64 tx_mc_pkts; /* Tx MC packets */ + __be64 tx_bc_pkts; /* Tx BC packets */ __be32 fcs_err; /* FCS errors */ __be32 align_err; /* Alignment errors */ __be32 false_carrier; /* False carrier detection */ @@ -290,11 +289,11 @@ struct ncsi_rsp_gcps_pkt { __be32 tx_1023_frames; /* Tx 512-1023 bytes frames */ __be32 tx_1522_frames; /* Tx 1024-1522 bytes frames */ __be32 tx_9022_frames; /* Tx 1523-9022 bytes frames */ - __be32 rx_valid_bytes; /* Rx valid bytes */ + __be64 rx_valid_bytes; /* Rx valid bytes */ __be32 rx_runt_pkts; /* Rx error runt packets */ __be32 rx_jabber_pkts; /* Rx error jabber packets */ __be32 checksum; /* Checksum */ -}; +} __packed __aligned(4); /* Get NCSI Statistics */ struct ncsi_rsp_gns_pkt { diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 4a8ce2949faeac..8668888c5a2f99 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -926,16 +926,15 @@ static int ncsi_rsp_handler_gcps(struct ncsi_request *nr) /* Update HNC's statistics */ ncs = &nc->stats; - ncs->hnc_cnt_hi = ntohl(rsp->cnt_hi); - ncs->hnc_cnt_lo = ntohl(rsp->cnt_lo); - ncs->hnc_rx_bytes = ntohl(rsp->rx_bytes); - ncs->hnc_tx_bytes = ntohl(rsp->tx_bytes); - ncs->hnc_rx_uc_pkts = ntohl(rsp->rx_uc_pkts); - ncs->hnc_rx_mc_pkts = ntohl(rsp->rx_mc_pkts); - ncs->hnc_rx_bc_pkts = ntohl(rsp->rx_bc_pkts); - ncs->hnc_tx_uc_pkts = ntohl(rsp->tx_uc_pkts); - ncs->hnc_tx_mc_pkts = ntohl(rsp->tx_mc_pkts); - ncs->hnc_tx_bc_pkts = ntohl(rsp->tx_bc_pkts); + ncs->hnc_cnt = be64_to_cpu(rsp->cnt); + ncs->hnc_rx_bytes = be64_to_cpu(rsp->rx_bytes); + ncs->hnc_tx_bytes = be64_to_cpu(rsp->tx_bytes); + ncs->hnc_rx_uc_pkts = be64_to_cpu(rsp->rx_uc_pkts); + ncs->hnc_rx_mc_pkts = be64_to_cpu(rsp->rx_mc_pkts); + ncs->hnc_rx_bc_pkts = be64_to_cpu(rsp->rx_bc_pkts); + ncs->hnc_tx_uc_pkts = be64_to_cpu(rsp->tx_uc_pkts); + ncs->hnc_tx_mc_pkts = be64_to_cpu(rsp->tx_mc_pkts); + ncs->hnc_tx_bc_pkts = be64_to_cpu(rsp->tx_bc_pkts); ncs->hnc_fcs_err = ntohl(rsp->fcs_err); ncs->hnc_align_err = ntohl(rsp->align_err); ncs->hnc_false_carrier = ntohl(rsp->false_carrier); @@ -964,7 +963,7 @@ static int ncsi_rsp_handler_gcps(struct ncsi_request *nr) ncs->hnc_tx_1023_frames = ntohl(rsp->tx_1023_frames); ncs->hnc_tx_1522_frames = ntohl(rsp->tx_1522_frames); ncs->hnc_tx_9022_frames = ntohl(rsp->tx_9022_frames); - ncs->hnc_rx_valid_bytes = ntohl(rsp->rx_valid_bytes); + ncs->hnc_rx_valid_bytes = be64_to_cpu(rsp->rx_valid_bytes); ncs->hnc_rx_runt_pkts = ntohl(rsp->rx_runt_pkts); ncs->hnc_rx_jabber_pkts = ntohl(rsp->rx_jabber_pkts); From 0e84dc981b3b84553d7a1616e912e03153d82ac3 Mon Sep 17 00:00:00 2001 From: Viktor Malik Date: Tue, 15 Apr 2025 17:50:14 +0200 Subject: [PATCH 2251/2924] libbpf: Fix buffer overflow in bpf_object__init_prog [ Upstream commit ee684de5c1b0ac01821320826baec7da93f3615b ] As shown in [1], it is possible to corrupt a BPF ELF file such that arbitrary BPF instructions are loaded by libbpf. This can be done by setting a symbol (BPF program) section offset to a large (unsigned) number such that

overflows and points before the section data in the memory. Consider the situation below where: - prog_start = sec_start + symbol_offset <-- size_t overflow here - prog_end = prog_start + prog_size prog_start sec_start prog_end sec_end | | | | v v v v .....................|################################|............ The report in [1] also provides a corrupted BPF ELF which can be used as a reproducer: $ readelf -S crash Section Headers: [Nr] Name Type Address Offset Size EntSize Flags Link Info Align ... [ 2] uretprobe.mu[...] PROGBITS 0000000000000000 00000040 0000000000000068 0000000000000000 AX 0 0 8 $ readelf -s crash Symbol table '.symtab' contains 8 entries: Num: Value Size Type Bind Vis Ndx Name ... 6: ffffffffffffffb8 104 FUNC GLOBAL DEFAULT 2 handle_tp Here, the handle_tp prog has section offset ffffffffffffffb8, i.e. will point before the actual memory where section 2 is allocated. This is also reported by AddressSanitizer: ================================================================= ==1232==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x7c7302fe0000 at pc 0x7fc3046e4b77 bp 0x7ffe64677cd0 sp 0x7ffe64677490 READ of size 104 at 0x7c7302fe0000 thread T0 #0 0x7fc3046e4b76 in memcpy (/lib64/libasan.so.8+0xe4b76) #1 0x00000040df3e in bpf_object__init_prog /src/libbpf/src/libbpf.c:856 #2 0x00000040df3e in bpf_object__add_programs /src/libbpf/src/libbpf.c:928 #3 0x00000040df3e in bpf_object__elf_collect /src/libbpf/src/libbpf.c:3930 #4 0x00000040df3e in bpf_object_open /src/libbpf/src/libbpf.c:8067 #5 0x00000040f176 in bpf_object__open_file /src/libbpf/src/libbpf.c:8090 #6 0x000000400c16 in main /poc/poc.c:8 #7 0x7fc3043d25b4 in __libc_start_call_main (/lib64/libc.so.6+0x35b4) #8 0x7fc3043d2667 in __libc_start_main@@GLIBC_2.34 (/lib64/libc.so.6+0x3667) #9 0x000000400b34 in _start (/poc/poc+0x400b34) 0x7c7302fe0000 is located 64 bytes before 104-byte region [0x7c7302fe0040,0x7c7302fe00a8) allocated by thread T0 here: #0 0x7fc3046e716b in malloc (/lib64/libasan.so.8+0xe716b) #1 0x7fc3045ee600 in __libelf_set_rawdata_wrlock (/lib64/libelf.so.1+0xb600) #2 0x7fc3045ef018 in __elf_getdata_rdlock (/lib64/libelf.so.1+0xc018) #3 0x00000040642f in elf_sec_data /src/libbpf/src/libbpf.c:3740 The problem here is that currently, libbpf only checks that the program end is within the section bounds. There used to be a check `while (sec_off < sec_sz)` in bpf_object__add_programs, however, it was removed by commit 6245947c1b3c ("libbpf: Allow gaps in BPF program sections to support overriden weak functions"). Add a check for detecting the overflow of `sec_off + prog_sz` to bpf_object__init_prog to fix this issue. [1] https://github.com/lmarch2/poc/blob/main/libbpf/libbpf.md Fixes: 6245947c1b3c ("libbpf: Allow gaps in BPF program sections to support overriden weak functions") Reported-by: lmarch2 <2524158037@qq.com> Signed-off-by: Viktor Malik Signed-off-by: Andrii Nakryiko Reviewed-by: Shung-Hsi Yu Link: https://github.com/lmarch2/poc/blob/main/libbpf/libbpf.md Link: https://lore.kernel.org/bpf/20250415155014.397603-1-vmalik@redhat.com Signed-off-by: Sasha Levin --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 37d563e1405156..cfb952d7002f61 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -896,7 +896,7 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, return -LIBBPF_ERRNO__FORMAT; } - if (sec_off + prog_sz > sec_sz) { + if (sec_off + prog_sz > sec_sz || sec_off + prog_sz < sec_off) { pr_warn("sec '%s': program at offset %zu crosses section boundary\n", sec_name, sec_off); return -LIBBPF_ERRNO__FORMAT; From 8fa26547de4f1a01fec07b2ce4d334856d27811d Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Fri, 11 Apr 2025 10:49:53 +0300 Subject: [PATCH 2252/2924] net/mlx5: Avoid using xso.real_dev unnecessarily [ Upstream commit d79444e8c3d40b11f5e155e5591d53bd1e512e1f ] xso.real_dev is the active device of an offloaded xfrm state and is managed by bonding. As such, it's subject to change when states are migrated to a new device. Using it in places other than offloading/unoffloading the states is risky. This commit saves the device into the driver-specific struct mlx5e_ipsec_sa_entry and switches mlx5e_ipsec_init_macs() and mlx5e_ipsec_netevent_event() to make use of it. Additionally, mlx5e_xfrm_update_stats() used xso.real_dev to validate that correct net locks are held. But in a bonding config, the net of the master device is the same as the underlying devices, and the net is already a local var, so use that instead. The only remaining references to xso.real_dev are now in the .xdo_dev_state_add() / .xdo_dev_state_delete() path. Signed-off-by: Cosmin Ratiu Reviewed-by: Leon Romanovsky Reviewed-by: Nikolay Aleksandrov Signed-off-by: Steffen Klassert Stable-dep-of: fd4e41ebf66c ("bonding: Mark active offloaded xfrm_states") Signed-off-by: Sasha Levin --- .../ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 16 +++++----------- .../ethernet/mellanox/mlx5/core/en_accel/ipsec.h | 1 + 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 2dd842aac6fc47..626e525c0f0d7e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -259,8 +259,7 @@ static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, struct mlx5_accel_esp_xfrm_attrs *attrs) { struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); - struct xfrm_state *x = sa_entry->x; - struct net_device *netdev; + struct net_device *netdev = sa_entry->dev; struct neighbour *n; u8 addr[ETH_ALEN]; const void *pkey; @@ -270,8 +269,6 @@ static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, attrs->type != XFRM_DEV_OFFLOAD_PACKET) return; - netdev = x->xso.real_dev; - mlx5_query_mac_address(mdev, addr); switch (attrs->dir) { case XFRM_DEV_OFFLOAD_IN: @@ -713,6 +710,7 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, return -ENOMEM; sa_entry->x = x; + sa_entry->dev = netdev; sa_entry->ipsec = ipsec; /* Check if this SA is originated from acquire flow temporary SA */ if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) @@ -855,8 +853,6 @@ static int mlx5e_ipsec_netevent_event(struct notifier_block *nb, struct mlx5e_ipsec_sa_entry *sa_entry; struct mlx5e_ipsec *ipsec; struct neighbour *n = ptr; - struct net_device *netdev; - struct xfrm_state *x; unsigned long idx; if (event != NETEVENT_NEIGH_UPDATE || !(n->nud_state & NUD_VALID)) @@ -876,11 +872,9 @@ static int mlx5e_ipsec_netevent_event(struct notifier_block *nb, continue; } - x = sa_entry->x; - netdev = x->xso.real_dev; data = sa_entry->work->data; - neigh_ha_snapshot(data->addr, n, netdev); + neigh_ha_snapshot(data->addr, n, sa_entry->dev); queue_work(ipsec->wq, &sa_entry->work->work); } @@ -996,8 +990,8 @@ static void mlx5e_xfrm_update_stats(struct xfrm_state *x) size_t headers; lockdep_assert(lockdep_is_held(&x->lock) || - lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_cfg_mutex) || - lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_state_lock)); + lockdep_is_held(&net->xfrm.xfrm_cfg_mutex) || + lockdep_is_held(&net->xfrm.xfrm_state_lock)); if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index a63c2289f8af92..ffcd0cdeb77544 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -274,6 +274,7 @@ struct mlx5e_ipsec_limits { struct mlx5e_ipsec_sa_entry { struct mlx5e_ipsec_esn_state esn_state; struct xfrm_state *x; + struct net_device *dev; struct mlx5e_ipsec *ipsec; struct mlx5_accel_esp_xfrm_attrs attrs; void (*set_iv_op)(struct sk_buff *skb, struct xfrm_state *x, From baab28f90ddca048e72af8013a50da9b63af03b7 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Fri, 11 Apr 2025 10:49:54 +0300 Subject: [PATCH 2253/2924] xfrm: Use xdo.dev instead of xdo.real_dev [ Upstream commit 25ac138f58e7d5c8bffa31e8891418d2819180c4 ] The policy offload struct was reused from the state offload and real_dev was copied from dev, but it was never set to anything else. Simplify the code by always using xdo.dev for policies. Signed-off-by: Cosmin Ratiu Reviewed-by: Leon Romanovsky Reviewed-by: Nikolay Aleksandrov Signed-off-by: Steffen Klassert Stable-dep-of: fd4e41ebf66c ("bonding: Mark active offloaded xfrm_states") Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 2 +- net/xfrm/xfrm_device.c | 2 -- net/xfrm/xfrm_state.c | 2 -- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 626e525c0f0d7e..0dfbbe21936f31 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -1164,7 +1164,7 @@ mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry *pol_entry, static int mlx5e_xfrm_add_policy(struct xfrm_policy *x, struct netlink_ext_ack *extack) { - struct net_device *netdev = x->xdo.real_dev; + struct net_device *netdev = x->xdo.dev; struct mlx5e_ipsec_pol_entry *pol_entry; struct mlx5e_priv *priv; int err; diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index d62f76161d83e2..4f4165ff738d2a 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -378,7 +378,6 @@ int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp, xdo->dev = dev; netdev_tracker_alloc(dev, &xdo->dev_tracker, GFP_ATOMIC); - xdo->real_dev = dev; xdo->type = XFRM_DEV_OFFLOAD_PACKET; switch (dir) { case XFRM_POLICY_IN: @@ -400,7 +399,6 @@ int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp, err = dev->xfrmdev_ops->xdo_dev_policy_add(xp, extack); if (err) { xdo->dev = NULL; - xdo->real_dev = NULL; xdo->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; xdo->dir = 0; netdev_put(dev, &xdo->dev_tracker); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 07fe8e5daa32b0..a98e193b55a3eb 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1552,7 +1552,6 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, xso->type = XFRM_DEV_OFFLOAD_PACKET; xso->dir = xdo->dir; xso->dev = xdo->dev; - xso->real_dev = xdo->real_dev; xso->flags = XFRM_DEV_OFFLOAD_FLAG_ACQ; netdev_hold(xso->dev, &xso->dev_tracker, GFP_ATOMIC); error = xso->dev->xfrmdev_ops->xdo_dev_state_add(x, NULL); @@ -1560,7 +1559,6 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, xso->dir = 0; netdev_put(xso->dev, &xso->dev_tracker); xso->dev = NULL; - xso->real_dev = NULL; xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; x->km.state = XFRM_STATE_DEAD; to_put = x; From 877e87a450cfe655c7bf6175368631d76eedbe14 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Fri, 11 Apr 2025 10:49:56 +0300 Subject: [PATCH 2254/2924] xfrm: Add explicit dev to .xdo_dev_state_{add,delete,free} [ Upstream commit 43eca05b6a3b917c600e10cc6b06bfa57fa57401 ] Previously, device driver IPSec offload implementations would fall into two categories: 1. Those that used xso.dev to determine the offload device. 2. Those that used xso.real_dev to determine the offload device. The first category didn't work with bonding while the second did. In a non-bonding setup the two pointers are the same. This commit adds explicit pointers for the offload netdevice to .xdo_dev_state_add() / .xdo_dev_state_delete() / .xdo_dev_state_free() which eliminates the confusion and allows drivers from the first category to work with bonding. xso.real_dev now becomes a private pointer managed by the bonding driver. Signed-off-by: Cosmin Ratiu Reviewed-by: Leon Romanovsky Reviewed-by: Nikolay Aleksandrov Signed-off-by: Steffen Klassert Stable-dep-of: fd4e41ebf66c ("bonding: Mark active offloaded xfrm_states") Signed-off-by: Sasha Levin --- Documentation/networking/xfrm_device.rst | 10 +++-- drivers/net/bonding/bond_main.c | 33 ++++++++------- .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 20 +++++---- .../inline_crypto/ch_ipsec/chcr_ipsec.c | 18 +++++--- .../net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 41 +++++++++++-------- drivers/net/ethernet/intel/ixgbevf/ipsec.c | 21 ++++++---- .../marvell/octeontx2/nic/cn10k_ipsec.c | 18 ++++---- .../mellanox/mlx5/core/en_accel/ipsec.c | 12 +++--- .../net/ethernet/netronome/nfp/crypto/ipsec.c | 11 +++-- drivers/net/netdevsim/ipsec.c | 15 ++++--- include/linux/netdevice.h | 10 +++-- include/net/xfrm.h | 8 ++++ net/xfrm/xfrm_device.c | 4 +- net/xfrm/xfrm_state.c | 14 ++++--- 14 files changed, 136 insertions(+), 99 deletions(-) diff --git a/Documentation/networking/xfrm_device.rst b/Documentation/networking/xfrm_device.rst index 7f24c09f269431..122204da0fff69 100644 --- a/Documentation/networking/xfrm_device.rst +++ b/Documentation/networking/xfrm_device.rst @@ -65,9 +65,13 @@ Callbacks to implement /* from include/linux/netdevice.h */ struct xfrmdev_ops { /* Crypto and Packet offload callbacks */ - int (*xdo_dev_state_add) (struct xfrm_state *x, struct netlink_ext_ack *extack); - void (*xdo_dev_state_delete) (struct xfrm_state *x); - void (*xdo_dev_state_free) (struct xfrm_state *x); + int (*xdo_dev_state_add)(struct net_device *dev, + struct xfrm_state *x, + struct netlink_ext_ack *extack); + void (*xdo_dev_state_delete)(struct net_device *dev, + struct xfrm_state *x); + void (*xdo_dev_state_free)(struct net_device *dev, + struct xfrm_state *x); bool (*xdo_dev_offload_ok) (struct sk_buff *skb, struct xfrm_state *x); void (*xdo_dev_state_advance_esn) (struct xfrm_state *x); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 8ea183da8d5398..b183a3c99f627a 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -453,13 +453,14 @@ static struct net_device *bond_ipsec_dev(struct xfrm_state *xs) /** * bond_ipsec_add_sa - program device with a security association + * @bond_dev: pointer to the bond net device * @xs: pointer to transformer state struct * @extack: extack point to fill failure reason **/ -static int bond_ipsec_add_sa(struct xfrm_state *xs, +static int bond_ipsec_add_sa(struct net_device *bond_dev, + struct xfrm_state *xs, struct netlink_ext_ack *extack) { - struct net_device *bond_dev = xs->xso.dev; struct net_device *real_dev; netdevice_tracker tracker; struct bond_ipsec *ipsec; @@ -496,7 +497,7 @@ static int bond_ipsec_add_sa(struct xfrm_state *xs, } xs->xso.real_dev = real_dev; - err = real_dev->xfrmdev_ops->xdo_dev_state_add(xs, extack); + err = real_dev->xfrmdev_ops->xdo_dev_state_add(real_dev, xs, extack); if (!err) { ipsec->xs = xs; INIT_LIST_HEAD(&ipsec->list); @@ -540,7 +541,8 @@ static void bond_ipsec_add_sa_all(struct bonding *bond) continue; ipsec->xs->xso.real_dev = real_dev; - if (real_dev->xfrmdev_ops->xdo_dev_state_add(ipsec->xs, NULL)) { + if (real_dev->xfrmdev_ops->xdo_dev_state_add(real_dev, + ipsec->xs, NULL)) { slave_warn(bond_dev, real_dev, "%s: failed to add SA\n", __func__); ipsec->xs->xso.real_dev = NULL; } @@ -551,11 +553,12 @@ static void bond_ipsec_add_sa_all(struct bonding *bond) /** * bond_ipsec_del_sa - clear out this specific SA + * @bond_dev: pointer to the bond net device * @xs: pointer to transformer state struct **/ -static void bond_ipsec_del_sa(struct xfrm_state *xs) +static void bond_ipsec_del_sa(struct net_device *bond_dev, + struct xfrm_state *xs) { - struct net_device *bond_dev = xs->xso.dev; struct net_device *real_dev; netdevice_tracker tracker; struct bond_ipsec *ipsec; @@ -587,7 +590,7 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs) goto out; } - real_dev->xfrmdev_ops->xdo_dev_state_delete(xs); + real_dev->xfrmdev_ops->xdo_dev_state_delete(real_dev, xs); out: netdev_put(real_dev, &tracker); mutex_lock(&bond->ipsec_lock); @@ -624,18 +627,20 @@ static void bond_ipsec_del_sa_all(struct bonding *bond) slave_warn(bond_dev, real_dev, "%s: no slave xdo_dev_state_delete\n", __func__); - } else { - real_dev->xfrmdev_ops->xdo_dev_state_delete(ipsec->xs); - if (real_dev->xfrmdev_ops->xdo_dev_state_free) - real_dev->xfrmdev_ops->xdo_dev_state_free(ipsec->xs); + continue; } + real_dev->xfrmdev_ops->xdo_dev_state_delete(real_dev, + ipsec->xs); + if (real_dev->xfrmdev_ops->xdo_dev_state_free) + real_dev->xfrmdev_ops->xdo_dev_state_free(real_dev, + ipsec->xs); } mutex_unlock(&bond->ipsec_lock); } -static void bond_ipsec_free_sa(struct xfrm_state *xs) +static void bond_ipsec_free_sa(struct net_device *bond_dev, + struct xfrm_state *xs) { - struct net_device *bond_dev = xs->xso.dev; struct net_device *real_dev; netdevice_tracker tracker; struct bonding *bond; @@ -661,7 +666,7 @@ static void bond_ipsec_free_sa(struct xfrm_state *xs) if (real_dev && real_dev->xfrmdev_ops && real_dev->xfrmdev_ops->xdo_dev_state_free) - real_dev->xfrmdev_ops->xdo_dev_state_free(xs); + real_dev->xfrmdev_ops->xdo_dev_state_free(real_dev, xs); out: netdev_put(real_dev, &tracker); } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 551c279dc14bed..51395c96b2e994 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -6480,10 +6480,11 @@ static const struct tlsdev_ops cxgb4_ktls_ops = { #if IS_ENABLED(CONFIG_CHELSIO_IPSEC_INLINE) -static int cxgb4_xfrm_add_state(struct xfrm_state *x, +static int cxgb4_xfrm_add_state(struct net_device *dev, + struct xfrm_state *x, struct netlink_ext_ack *extack) { - struct adapter *adap = netdev2adap(x->xso.dev); + struct adapter *adap = netdev2adap(dev); int ret; if (!mutex_trylock(&uld_mutex)) { @@ -6494,7 +6495,8 @@ static int cxgb4_xfrm_add_state(struct xfrm_state *x, if (ret) goto out_unlock; - ret = adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops->xdo_dev_state_add(x, extack); + ret = adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops->xdo_dev_state_add(dev, x, + extack); out_unlock: mutex_unlock(&uld_mutex); @@ -6502,9 +6504,9 @@ static int cxgb4_xfrm_add_state(struct xfrm_state *x, return ret; } -static void cxgb4_xfrm_del_state(struct xfrm_state *x) +static void cxgb4_xfrm_del_state(struct net_device *dev, struct xfrm_state *x) { - struct adapter *adap = netdev2adap(x->xso.dev); + struct adapter *adap = netdev2adap(dev); if (!mutex_trylock(&uld_mutex)) { dev_dbg(adap->pdev_dev, @@ -6514,15 +6516,15 @@ static void cxgb4_xfrm_del_state(struct xfrm_state *x) if (chcr_offload_state(adap, CXGB4_XFRMDEV_OPS)) goto out_unlock; - adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops->xdo_dev_state_delete(x); + adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops->xdo_dev_state_delete(dev, x); out_unlock: mutex_unlock(&uld_mutex); } -static void cxgb4_xfrm_free_state(struct xfrm_state *x) +static void cxgb4_xfrm_free_state(struct net_device *dev, struct xfrm_state *x) { - struct adapter *adap = netdev2adap(x->xso.dev); + struct adapter *adap = netdev2adap(dev); if (!mutex_trylock(&uld_mutex)) { dev_dbg(adap->pdev_dev, @@ -6532,7 +6534,7 @@ static void cxgb4_xfrm_free_state(struct xfrm_state *x) if (chcr_offload_state(adap, CXGB4_XFRMDEV_OPS)) goto out_unlock; - adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops->xdo_dev_state_free(x); + adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops->xdo_dev_state_free(dev, x); out_unlock: mutex_unlock(&uld_mutex); diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c index baba96883f48b5..ecd9a0bd5e1822 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c @@ -75,9 +75,12 @@ static int ch_ipsec_uld_state_change(void *handle, enum cxgb4_state new_state); static int ch_ipsec_xmit(struct sk_buff *skb, struct net_device *dev); static void *ch_ipsec_uld_add(const struct cxgb4_lld_info *infop); static void ch_ipsec_advance_esn_state(struct xfrm_state *x); -static void ch_ipsec_xfrm_free_state(struct xfrm_state *x); -static void ch_ipsec_xfrm_del_state(struct xfrm_state *x); -static int ch_ipsec_xfrm_add_state(struct xfrm_state *x, +static void ch_ipsec_xfrm_free_state(struct net_device *dev, + struct xfrm_state *x); +static void ch_ipsec_xfrm_del_state(struct net_device *dev, + struct xfrm_state *x); +static int ch_ipsec_xfrm_add_state(struct net_device *dev, + struct xfrm_state *x, struct netlink_ext_ack *extack); static const struct xfrmdev_ops ch_ipsec_xfrmdev_ops = { @@ -223,7 +226,8 @@ static int ch_ipsec_setkey(struct xfrm_state *x, * returns 0 on success, negative error if failed to send message to FPGA * positive error if FPGA returned a bad response */ -static int ch_ipsec_xfrm_add_state(struct xfrm_state *x, +static int ch_ipsec_xfrm_add_state(struct net_device *dev, + struct xfrm_state *x, struct netlink_ext_ack *extack) { struct ipsec_sa_entry *sa_entry; @@ -302,14 +306,16 @@ static int ch_ipsec_xfrm_add_state(struct xfrm_state *x, return res; } -static void ch_ipsec_xfrm_del_state(struct xfrm_state *x) +static void ch_ipsec_xfrm_del_state(struct net_device *dev, + struct xfrm_state *x) { /* do nothing */ if (!x->xso.offload_handle) return; } -static void ch_ipsec_xfrm_free_state(struct xfrm_state *x) +static void ch_ipsec_xfrm_free_state(struct net_device *dev, + struct xfrm_state *x) { struct ipsec_sa_entry *sa_entry; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index 07ea1954a276ed..796e90d741f022 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -9,7 +9,7 @@ #define IXGBE_IPSEC_KEY_BITS 160 static const char aes_gcm_name[] = "rfc4106(gcm(aes))"; -static void ixgbe_ipsec_del_sa(struct xfrm_state *xs); +static void ixgbe_ipsec_del_sa(struct net_device *dev, struct xfrm_state *xs); /** * ixgbe_ipsec_set_tx_sa - set the Tx SA registers @@ -321,7 +321,7 @@ void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter) if (r->used) { if (r->mode & IXGBE_RXTXMOD_VF) - ixgbe_ipsec_del_sa(r->xs); + ixgbe_ipsec_del_sa(adapter->netdev, r->xs); else ixgbe_ipsec_set_rx_sa(hw, i, r->xs->id.spi, r->key, r->salt, @@ -330,7 +330,7 @@ void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter) if (t->used) { if (t->mode & IXGBE_RXTXMOD_VF) - ixgbe_ipsec_del_sa(t->xs); + ixgbe_ipsec_del_sa(adapter->netdev, t->xs); else ixgbe_ipsec_set_tx_sa(hw, i, t->key, t->salt); } @@ -417,6 +417,7 @@ static struct xfrm_state *ixgbe_ipsec_find_rx_state(struct ixgbe_ipsec *ipsec, /** * ixgbe_ipsec_parse_proto_keys - find the key and salt based on the protocol + * @dev: pointer to net device * @xs: pointer to xfrm_state struct * @mykey: pointer to key array to populate * @mysalt: pointer to salt value to populate @@ -424,10 +425,10 @@ static struct xfrm_state *ixgbe_ipsec_find_rx_state(struct ixgbe_ipsec *ipsec, * This copies the protocol keys and salt to our own data tables. The * 82599 family only supports the one algorithm. **/ -static int ixgbe_ipsec_parse_proto_keys(struct xfrm_state *xs, +static int ixgbe_ipsec_parse_proto_keys(struct net_device *dev, + struct xfrm_state *xs, u32 *mykey, u32 *mysalt) { - struct net_device *dev = xs->xso.real_dev; unsigned char *key_data; char *alg_name = NULL; int key_len; @@ -473,11 +474,12 @@ static int ixgbe_ipsec_parse_proto_keys(struct xfrm_state *xs, /** * ixgbe_ipsec_check_mgmt_ip - make sure there is no clash with mgmt IP filters + * @dev: pointer to net device * @xs: pointer to transformer state struct **/ -static int ixgbe_ipsec_check_mgmt_ip(struct xfrm_state *xs) +static int ixgbe_ipsec_check_mgmt_ip(struct net_device *dev, + struct xfrm_state *xs) { - struct net_device *dev = xs->xso.real_dev; struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_hw *hw = &adapter->hw; u32 mfval, manc, reg; @@ -556,13 +558,14 @@ static int ixgbe_ipsec_check_mgmt_ip(struct xfrm_state *xs) /** * ixgbe_ipsec_add_sa - program device with a security association + * @dev: pointer to device to program * @xs: pointer to transformer state struct * @extack: extack point to fill failure reason **/ -static int ixgbe_ipsec_add_sa(struct xfrm_state *xs, +static int ixgbe_ipsec_add_sa(struct net_device *dev, + struct xfrm_state *xs, struct netlink_ext_ack *extack) { - struct net_device *dev = xs->xso.real_dev; struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_ipsec *ipsec = adapter->ipsec; struct ixgbe_hw *hw = &adapter->hw; @@ -581,7 +584,7 @@ static int ixgbe_ipsec_add_sa(struct xfrm_state *xs, return -EINVAL; } - if (ixgbe_ipsec_check_mgmt_ip(xs)) { + if (ixgbe_ipsec_check_mgmt_ip(dev, xs)) { NL_SET_ERR_MSG_MOD(extack, "IPsec IP addr clash with mgmt filters"); return -EINVAL; } @@ -615,7 +618,7 @@ static int ixgbe_ipsec_add_sa(struct xfrm_state *xs, rsa.decrypt = xs->ealg || xs->aead; /* get the key and salt */ - ret = ixgbe_ipsec_parse_proto_keys(xs, rsa.key, &rsa.salt); + ret = ixgbe_ipsec_parse_proto_keys(dev, xs, rsa.key, &rsa.salt); if (ret) { NL_SET_ERR_MSG_MOD(extack, "Failed to get key data for Rx SA table"); return ret; @@ -724,7 +727,7 @@ static int ixgbe_ipsec_add_sa(struct xfrm_state *xs, if (xs->id.proto & IPPROTO_ESP) tsa.encrypt = xs->ealg || xs->aead; - ret = ixgbe_ipsec_parse_proto_keys(xs, tsa.key, &tsa.salt); + ret = ixgbe_ipsec_parse_proto_keys(dev, xs, tsa.key, &tsa.salt); if (ret) { NL_SET_ERR_MSG_MOD(extack, "Failed to get key data for Tx SA table"); memset(&tsa, 0, sizeof(tsa)); @@ -752,11 +755,11 @@ static int ixgbe_ipsec_add_sa(struct xfrm_state *xs, /** * ixgbe_ipsec_del_sa - clear out this specific SA + * @dev: pointer to device to program * @xs: pointer to transformer state struct **/ -static void ixgbe_ipsec_del_sa(struct xfrm_state *xs) +static void ixgbe_ipsec_del_sa(struct net_device *dev, struct xfrm_state *xs) { - struct net_device *dev = xs->xso.real_dev; struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_ipsec *ipsec = adapter->ipsec; struct ixgbe_hw *hw = &adapter->hw; @@ -841,7 +844,8 @@ void ixgbe_ipsec_vf_clear(struct ixgbe_adapter *adapter, u32 vf) continue; if (ipsec->rx_tbl[i].mode & IXGBE_RXTXMOD_VF && ipsec->rx_tbl[i].vf == vf) - ixgbe_ipsec_del_sa(ipsec->rx_tbl[i].xs); + ixgbe_ipsec_del_sa(adapter->netdev, + ipsec->rx_tbl[i].xs); } /* search tx sa table */ @@ -850,7 +854,8 @@ void ixgbe_ipsec_vf_clear(struct ixgbe_adapter *adapter, u32 vf) continue; if (ipsec->tx_tbl[i].mode & IXGBE_RXTXMOD_VF && ipsec->tx_tbl[i].vf == vf) - ixgbe_ipsec_del_sa(ipsec->tx_tbl[i].xs); + ixgbe_ipsec_del_sa(adapter->netdev, + ipsec->tx_tbl[i].xs); } } @@ -930,7 +935,7 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) memcpy(xs->aead->alg_name, aes_gcm_name, sizeof(aes_gcm_name)); /* set up the HW offload */ - err = ixgbe_ipsec_add_sa(xs, NULL); + err = ixgbe_ipsec_add_sa(adapter->netdev, xs, NULL); if (err) goto err_aead; @@ -1034,7 +1039,7 @@ int ixgbe_ipsec_vf_del_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) xs = ipsec->tx_tbl[sa_idx].xs; } - ixgbe_ipsec_del_sa(xs); + ixgbe_ipsec_del_sa(adapter->netdev, xs); /* remove the xs that was made-up in the add request */ kfree_sensitive(xs); diff --git a/drivers/net/ethernet/intel/ixgbevf/ipsec.c b/drivers/net/ethernet/intel/ixgbevf/ipsec.c index 8ba037e3d9c270..65580b9cb06f21 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ipsec.c +++ b/drivers/net/ethernet/intel/ixgbevf/ipsec.c @@ -201,6 +201,7 @@ struct xfrm_state *ixgbevf_ipsec_find_rx_state(struct ixgbevf_ipsec *ipsec, /** * ixgbevf_ipsec_parse_proto_keys - find the key and salt based on the protocol + * @dev: pointer to net device to program * @xs: pointer to xfrm_state struct * @mykey: pointer to key array to populate * @mysalt: pointer to salt value to populate @@ -208,10 +209,10 @@ struct xfrm_state *ixgbevf_ipsec_find_rx_state(struct ixgbevf_ipsec *ipsec, * This copies the protocol keys and salt to our own data tables. The * 82599 family only supports the one algorithm. **/ -static int ixgbevf_ipsec_parse_proto_keys(struct xfrm_state *xs, +static int ixgbevf_ipsec_parse_proto_keys(struct net_device *dev, + struct xfrm_state *xs, u32 *mykey, u32 *mysalt) { - struct net_device *dev = xs->xso.real_dev; unsigned char *key_data; char *alg_name = NULL; int key_len; @@ -256,13 +257,14 @@ static int ixgbevf_ipsec_parse_proto_keys(struct xfrm_state *xs, /** * ixgbevf_ipsec_add_sa - program device with a security association + * @dev: pointer to net device to program * @xs: pointer to transformer state struct * @extack: extack point to fill failure reason **/ -static int ixgbevf_ipsec_add_sa(struct xfrm_state *xs, +static int ixgbevf_ipsec_add_sa(struct net_device *dev, + struct xfrm_state *xs, struct netlink_ext_ack *extack) { - struct net_device *dev = xs->xso.real_dev; struct ixgbevf_adapter *adapter; struct ixgbevf_ipsec *ipsec; u16 sa_idx; @@ -310,7 +312,8 @@ static int ixgbevf_ipsec_add_sa(struct xfrm_state *xs, rsa.decrypt = xs->ealg || xs->aead; /* get the key and salt */ - ret = ixgbevf_ipsec_parse_proto_keys(xs, rsa.key, &rsa.salt); + ret = ixgbevf_ipsec_parse_proto_keys(dev, xs, rsa.key, + &rsa.salt); if (ret) { NL_SET_ERR_MSG_MOD(extack, "Failed to get key data for Rx SA table"); return ret; @@ -363,7 +366,8 @@ static int ixgbevf_ipsec_add_sa(struct xfrm_state *xs, if (xs->id.proto & IPPROTO_ESP) tsa.encrypt = xs->ealg || xs->aead; - ret = ixgbevf_ipsec_parse_proto_keys(xs, tsa.key, &tsa.salt); + ret = ixgbevf_ipsec_parse_proto_keys(dev, xs, tsa.key, + &tsa.salt); if (ret) { NL_SET_ERR_MSG_MOD(extack, "Failed to get key data for Tx SA table"); memset(&tsa, 0, sizeof(tsa)); @@ -388,11 +392,12 @@ static int ixgbevf_ipsec_add_sa(struct xfrm_state *xs, /** * ixgbevf_ipsec_del_sa - clear out this specific SA + * @dev: pointer to net device to program * @xs: pointer to transformer state struct **/ -static void ixgbevf_ipsec_del_sa(struct xfrm_state *xs) +static void ixgbevf_ipsec_del_sa(struct net_device *dev, + struct xfrm_state *xs) { - struct net_device *dev = xs->xso.real_dev; struct ixgbevf_adapter *adapter; struct ixgbevf_ipsec *ipsec; u16 sa_idx; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c index fc59e50bafce66..a6500e3673f248 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c @@ -663,10 +663,10 @@ static int cn10k_ipsec_inb_add_state(struct xfrm_state *x, return -EOPNOTSUPP; } -static int cn10k_ipsec_outb_add_state(struct xfrm_state *x, +static int cn10k_ipsec_outb_add_state(struct net_device *dev, + struct xfrm_state *x, struct netlink_ext_ack *extack) { - struct net_device *netdev = x->xso.dev; struct cn10k_tx_sa_s *sa_entry; struct qmem *sa_info; struct otx2_nic *pf; @@ -676,7 +676,7 @@ static int cn10k_ipsec_outb_add_state(struct xfrm_state *x, if (err) return err; - pf = netdev_priv(netdev); + pf = netdev_priv(dev); err = qmem_alloc(pf->dev, &sa_info, pf->ipsec.sa_size, OTX2_ALIGN); if (err) @@ -700,18 +700,18 @@ static int cn10k_ipsec_outb_add_state(struct xfrm_state *x, return 0; } -static int cn10k_ipsec_add_state(struct xfrm_state *x, +static int cn10k_ipsec_add_state(struct net_device *dev, + struct xfrm_state *x, struct netlink_ext_ack *extack) { if (x->xso.dir == XFRM_DEV_OFFLOAD_IN) return cn10k_ipsec_inb_add_state(x, extack); else - return cn10k_ipsec_outb_add_state(x, extack); + return cn10k_ipsec_outb_add_state(dev, x, extack); } -static void cn10k_ipsec_del_state(struct xfrm_state *x) +static void cn10k_ipsec_del_state(struct net_device *dev, struct xfrm_state *x) { - struct net_device *netdev = x->xso.dev; struct cn10k_tx_sa_s *sa_entry; struct qmem *sa_info; struct otx2_nic *pf; @@ -720,7 +720,7 @@ static void cn10k_ipsec_del_state(struct xfrm_state *x) if (x->xso.dir == XFRM_DEV_OFFLOAD_IN) return; - pf = netdev_priv(netdev); + pf = netdev_priv(dev); sa_info = (struct qmem *)x->xso.offload_handle; sa_entry = (struct cn10k_tx_sa_s *)sa_info->base; @@ -732,7 +732,7 @@ static void cn10k_ipsec_del_state(struct xfrm_state *x) err = cn10k_outb_write_sa(pf, sa_info); if (err) - netdev_err(netdev, "Error (%d) deleting SA\n", err); + netdev_err(dev, "Error (%d) deleting SA\n", err); x->xso.offload_handle = 0; qmem_free(pf->dev, sa_info); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 0dfbbe21936f31..77f61cd28a7993 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -689,17 +689,17 @@ static int mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry *sa_entry) return 0; } -static int mlx5e_xfrm_add_state(struct xfrm_state *x, +static int mlx5e_xfrm_add_state(struct net_device *dev, + struct xfrm_state *x, struct netlink_ext_ack *extack) { struct mlx5e_ipsec_sa_entry *sa_entry = NULL; - struct net_device *netdev = x->xso.real_dev; struct mlx5e_ipsec *ipsec; struct mlx5e_priv *priv; gfp_t gfp; int err; - priv = netdev_priv(netdev); + priv = netdev_priv(dev); if (!priv->ipsec) return -EOPNOTSUPP; @@ -710,7 +710,7 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, return -ENOMEM; sa_entry->x = x; - sa_entry->dev = netdev; + sa_entry->dev = dev; sa_entry->ipsec = ipsec; /* Check if this SA is originated from acquire flow temporary SA */ if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) @@ -807,7 +807,7 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, return err; } -static void mlx5e_xfrm_del_state(struct xfrm_state *x) +static void mlx5e_xfrm_del_state(struct net_device *dev, struct xfrm_state *x) { struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); struct mlx5e_ipsec *ipsec = sa_entry->ipsec; @@ -820,7 +820,7 @@ static void mlx5e_xfrm_del_state(struct xfrm_state *x) WARN_ON(old != sa_entry); } -static void mlx5e_xfrm_free_state(struct xfrm_state *x) +static void mlx5e_xfrm_free_state(struct net_device *dev, struct xfrm_state *x) { struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); struct mlx5e_ipsec *ipsec = sa_entry->ipsec; diff --git a/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c b/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c index 671af5d4c5d25c..9e7c285eaa6bca 100644 --- a/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c +++ b/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c @@ -266,17 +266,17 @@ static void set_sha2_512hmac(struct nfp_ipsec_cfg_add_sa *cfg, int *trunc_len) } } -static int nfp_net_xfrm_add_state(struct xfrm_state *x, +static int nfp_net_xfrm_add_state(struct net_device *dev, + struct xfrm_state *x, struct netlink_ext_ack *extack) { - struct net_device *netdev = x->xso.real_dev; struct nfp_ipsec_cfg_mssg msg = {}; int i, key_len, trunc_len, err = 0; struct nfp_ipsec_cfg_add_sa *cfg; struct nfp_net *nn; unsigned int saidx; - nn = netdev_priv(netdev); + nn = netdev_priv(dev); cfg = &msg.cfg_add_sa; /* General */ @@ -546,17 +546,16 @@ static int nfp_net_xfrm_add_state(struct xfrm_state *x, return 0; } -static void nfp_net_xfrm_del_state(struct xfrm_state *x) +static void nfp_net_xfrm_del_state(struct net_device *dev, struct xfrm_state *x) { struct nfp_ipsec_cfg_mssg msg = { .cmd = NFP_IPSEC_CFG_MSSG_INV_SA, .sa_idx = x->xso.offload_handle - 1, }; - struct net_device *netdev = x->xso.real_dev; struct nfp_net *nn; int err; - nn = netdev_priv(netdev); + nn = netdev_priv(dev); err = nfp_net_sched_mbox_amsg_work(nn, NFP_NET_CFG_MBOX_CMD_IPSEC, &msg, sizeof(msg), nfp_net_ipsec_cfg); if (err) diff --git a/drivers/net/netdevsim/ipsec.c b/drivers/net/netdevsim/ipsec.c index d88bdb9a17176a..47cdee5577d461 100644 --- a/drivers/net/netdevsim/ipsec.c +++ b/drivers/net/netdevsim/ipsec.c @@ -85,11 +85,11 @@ static int nsim_ipsec_find_empty_idx(struct nsim_ipsec *ipsec) return -ENOSPC; } -static int nsim_ipsec_parse_proto_keys(struct xfrm_state *xs, +static int nsim_ipsec_parse_proto_keys(struct net_device *dev, + struct xfrm_state *xs, u32 *mykey, u32 *mysalt) { const char aes_gcm_name[] = "rfc4106(gcm(aes))"; - struct net_device *dev = xs->xso.real_dev; unsigned char *key_data; char *alg_name = NULL; int key_len; @@ -129,17 +129,16 @@ static int nsim_ipsec_parse_proto_keys(struct xfrm_state *xs, return 0; } -static int nsim_ipsec_add_sa(struct xfrm_state *xs, +static int nsim_ipsec_add_sa(struct net_device *dev, + struct xfrm_state *xs, struct netlink_ext_ack *extack) { struct nsim_ipsec *ipsec; - struct net_device *dev; struct netdevsim *ns; struct nsim_sa sa; u16 sa_idx; int ret; - dev = xs->xso.real_dev; ns = netdev_priv(dev); ipsec = &ns->ipsec; @@ -174,7 +173,7 @@ static int nsim_ipsec_add_sa(struct xfrm_state *xs, sa.crypt = xs->ealg || xs->aead; /* get the key and salt */ - ret = nsim_ipsec_parse_proto_keys(xs, sa.key, &sa.salt); + ret = nsim_ipsec_parse_proto_keys(dev, xs, sa.key, &sa.salt); if (ret) { NL_SET_ERR_MSG_MOD(extack, "Failed to get key data for SA table"); return ret; @@ -200,9 +199,9 @@ static int nsim_ipsec_add_sa(struct xfrm_state *xs, return 0; } -static void nsim_ipsec_del_sa(struct xfrm_state *xs) +static void nsim_ipsec_del_sa(struct net_device *dev, struct xfrm_state *xs) { - struct netdevsim *ns = netdev_priv(xs->xso.real_dev); + struct netdevsim *ns = netdev_priv(dev); struct nsim_ipsec *ipsec = &ns->ipsec; u16 sa_idx; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7ea022750e4e09..33338a233cc724 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1012,9 +1012,13 @@ struct netdev_bpf { #ifdef CONFIG_XFRM_OFFLOAD struct xfrmdev_ops { - int (*xdo_dev_state_add) (struct xfrm_state *x, struct netlink_ext_ack *extack); - void (*xdo_dev_state_delete) (struct xfrm_state *x); - void (*xdo_dev_state_free) (struct xfrm_state *x); + int (*xdo_dev_state_add)(struct net_device *dev, + struct xfrm_state *x, + struct netlink_ext_ack *extack); + void (*xdo_dev_state_delete)(struct net_device *dev, + struct xfrm_state *x); + void (*xdo_dev_state_free)(struct net_device *dev, + struct xfrm_state *x); bool (*xdo_dev_offload_ok) (struct sk_buff *skb, struct xfrm_state *x); void (*xdo_dev_state_advance_esn) (struct xfrm_state *x); diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 06ab2a3d2ebd10..01783dc3d0e324 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -147,8 +147,16 @@ enum { }; struct xfrm_dev_offload { + /* The device for this offload. + * Device drivers should not use this directly, as that will prevent + * them from working with bonding device. Instead, the device passed + * to the add/delete callbacks should be used. + */ struct net_device *dev; netdevice_tracker dev_tracker; + /* This is a private pointer used by the bonding driver. + * Device drivers should not use it. + */ struct net_device *real_dev; unsigned long offload_handle; u8 dir : 2; diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 4f4165ff738d2a..f46a9e5764f014 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -314,7 +314,6 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, xso->dev = dev; netdev_tracker_alloc(dev, &xso->dev_tracker, GFP_ATOMIC); - xso->real_dev = dev; if (xuo->flags & XFRM_OFFLOAD_INBOUND) xso->dir = XFRM_DEV_OFFLOAD_IN; @@ -326,11 +325,10 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, else xso->type = XFRM_DEV_OFFLOAD_CRYPTO; - err = dev->xfrmdev_ops->xdo_dev_state_add(x, extack); + err = dev->xfrmdev_ops->xdo_dev_state_add(dev, x, extack); if (err) { xso->dev = NULL; xso->dir = 0; - xso->real_dev = NULL; netdev_put(dev, &xso->dev_tracker); xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index a98e193b55a3eb..5ece039846e201 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -767,7 +767,7 @@ void xfrm_dev_state_delete(struct xfrm_state *x) struct net_device *dev = READ_ONCE(xso->dev); if (dev) { - dev->xfrmdev_ops->xdo_dev_state_delete(x); + dev->xfrmdev_ops->xdo_dev_state_delete(dev, x); spin_lock_bh(&xfrm_state_dev_gc_lock); hlist_add_head(&x->dev_gclist, &xfrm_state_dev_gc_list); spin_unlock_bh(&xfrm_state_dev_gc_lock); @@ -789,7 +789,7 @@ void xfrm_dev_state_free(struct xfrm_state *x) spin_unlock_bh(&xfrm_state_dev_gc_lock); if (dev->xfrmdev_ops->xdo_dev_state_free) - dev->xfrmdev_ops->xdo_dev_state_free(x); + dev->xfrmdev_ops->xdo_dev_state_free(dev, x); WRITE_ONCE(xso->dev, NULL); xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; netdev_put(dev, &xso->dev_tracker); @@ -1548,16 +1548,18 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) { struct xfrm_dev_offload *xdo = &pol->xdo; struct xfrm_dev_offload *xso = &x->xso; + struct net_device *dev = xdo->dev; xso->type = XFRM_DEV_OFFLOAD_PACKET; xso->dir = xdo->dir; - xso->dev = xdo->dev; + xso->dev = dev; xso->flags = XFRM_DEV_OFFLOAD_FLAG_ACQ; - netdev_hold(xso->dev, &xso->dev_tracker, GFP_ATOMIC); - error = xso->dev->xfrmdev_ops->xdo_dev_state_add(x, NULL); + netdev_hold(dev, &xso->dev_tracker, GFP_ATOMIC); + error = dev->xfrmdev_ops->xdo_dev_state_add(dev, x, + NULL); if (error) { xso->dir = 0; - netdev_put(xso->dev, &xso->dev_tracker); + netdev_put(dev, &xso->dev_tracker); xso->dev = NULL; xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; x->km.state = XFRM_STATE_DEAD; From d50cc6afb2a88ca2a4366f79c7ff8068a966fc6e Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Fri, 11 Apr 2025 10:49:57 +0300 Subject: [PATCH 2255/2924] bonding: Mark active offloaded xfrm_states [ Upstream commit fd4e41ebf66cb8b43de2f640b97314c4ee3b4499 ] When the active link is changed for a bond device, the existing xfrm states need to be migrated over to the new link. This is done with: - bond_ipsec_del_sa_all() goes through the offloaded states list and removes all of them from hw. - bond_ipsec_add_sa_all() re-offloads all states to the new device. But because the offload status of xfrm states isn't marked in any way, there can be bugs. When all bond links are down, bond_ipsec_del_sa_all() unoffloads everything from the previous active link. If the same link then comes back up, nothing gets reoffloaded by bond_ipsec_add_sa_all(). This results in a stack trace like this a bit later when user space removes the offloaded rules, because mlx5e_xfrm_del_state() is asked to remove a rule that's no longer offloaded: [] Call Trace: [] [] ? __warn+0x7d/0x110 [] ? mlx5e_xfrm_del_state+0x90/0xa0 [mlx5_core] [] ? report_bug+0x16d/0x180 [] ? handle_bug+0x4f/0x90 [] ? exc_invalid_op+0x14/0x70 [] ? asm_exc_invalid_op+0x16/0x20 [] ? mlx5e_xfrm_del_state+0x73/0xa0 [mlx5_core] [] ? mlx5e_xfrm_del_state+0x90/0xa0 [mlx5_core] [] bond_ipsec_del_sa+0x1ab/0x200 [bonding] [] xfrm_dev_state_delete+0x1f/0x60 [] __xfrm_state_delete+0x196/0x200 [] xfrm_state_delete+0x21/0x40 [] xfrm_del_sa+0x69/0x110 [] xfrm_user_rcv_msg+0x11d/0x300 [] ? release_pages+0xca/0x140 [] ? copy_to_user_tmpl.part.0+0x110/0x110 [] netlink_rcv_skb+0x54/0x100 [] xfrm_netlink_rcv+0x31/0x40 [] netlink_unicast+0x1fc/0x2d0 [] netlink_sendmsg+0x1e4/0x410 [] __sock_sendmsg+0x38/0x60 [] sock_write_iter+0x94/0xf0 [] vfs_write+0x338/0x3f0 [] ksys_write+0xba/0xd0 [] do_syscall_64+0x4c/0x100 [] entry_SYSCALL_64_after_hwframe+0x4b/0x53 There's also another theoretical bug: Calling bond_ipsec_del_sa_all() multiple times can result in corruption in the driver implementation if the double-free isn't tolerated. This isn't nice. Before the "Fixes" commit, xs->xso.real_dev was set to NULL when an xfrm state was unoffloaded from a device, but a race with netdevsim's .xdo_dev_offload_ok() accessing real_dev was considered a sufficient reason to not set real_dev to NULL anymore. This unfortunately introduced the new bugs. Since .xdo_dev_offload_ok() was significantly refactored by [1] and there are no more users in the stack of xso.real_dev, that race is now gone and xs->xso.real_dev can now once again be used to represent which device (if any) currently holds the offloaded rule. Go one step further and set real_dev after add/before delete calls, to catch any future driver misuses of real_dev. [1] https://lore.kernel.org/netdev/cover.1739972570.git.leon@kernel.org/ Fixes: f8cde9805981 ("bonding: fix xfrm real_dev null pointer dereference") Signed-off-by: Cosmin Ratiu Reviewed-by: Leon Romanovsky Reviewed-by: Nikolay Aleksandrov Reviewed-by: Hangbin Liu Tested-by: Hangbin Liu Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- drivers/net/bonding/bond_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index b183a3c99f627a..14ebbe82a2c5b7 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -496,9 +496,9 @@ static int bond_ipsec_add_sa(struct net_device *bond_dev, goto out; } - xs->xso.real_dev = real_dev; err = real_dev->xfrmdev_ops->xdo_dev_state_add(real_dev, xs, extack); if (!err) { + xs->xso.real_dev = real_dev; ipsec->xs = xs; INIT_LIST_HEAD(&ipsec->list); mutex_lock(&bond->ipsec_lock); @@ -540,12 +540,12 @@ static void bond_ipsec_add_sa_all(struct bonding *bond) if (ipsec->xs->xso.real_dev == real_dev) continue; - ipsec->xs->xso.real_dev = real_dev; if (real_dev->xfrmdev_ops->xdo_dev_state_add(real_dev, ipsec->xs, NULL)) { slave_warn(bond_dev, real_dev, "%s: failed to add SA\n", __func__); - ipsec->xs->xso.real_dev = NULL; + continue; } + ipsec->xs->xso.real_dev = real_dev; } out: mutex_unlock(&bond->ipsec_lock); @@ -629,6 +629,7 @@ static void bond_ipsec_del_sa_all(struct bonding *bond) __func__); continue; } + ipsec->xs->xso.real_dev = NULL; real_dev->xfrmdev_ops->xdo_dev_state_delete(real_dev, ipsec->xs); if (real_dev->xfrmdev_ops->xdo_dev_state_free) @@ -664,6 +665,7 @@ static void bond_ipsec_free_sa(struct net_device *bond_dev, WARN_ON(xs->xso.real_dev != real_dev); + xs->xso.real_dev = NULL; if (real_dev && real_dev->xfrmdev_ops && real_dev->xfrmdev_ops->xdo_dev_state_free) real_dev->xfrmdev_ops->xdo_dev_state_free(real_dev, xs); From d679fb0876e448db8b51f666563fcbde2d373d12 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Fri, 11 Apr 2025 10:49:58 +0300 Subject: [PATCH 2256/2924] bonding: Fix multiple long standing offload races [ Upstream commit d2fddbd3479928e52061e1c8dd302006b6283ce8 ] Refactor the bonding ipsec offload operations to fix a number of long-standing control plane races between state migration and user deletion and a few other issues. xfrm state deletion can happen concurrently with bond_change_active_slave() operation. This manifests itself as a bond_ipsec_del_sa() call with x->lock held, followed by a bond_ipsec_free_sa() a bit later from a wq. The alternate path of these calls coming from xfrm_dev_state_flush() can't happen, as that needs the RTNL lock and bond_change_active_slave() already holds it. 1. bond_ipsec_del_sa_all() might call xdo_dev_state_delete() a second time on an xfrm state that was concurrently killed. This is bad. 2. bond_ipsec_add_sa_all() can add a state on the new device, but pending bond_ipsec_free_sa() calls from the old device will then hit the WARN_ON() and then, worse, call xdo_dev_state_free() on the new device without a corresponding xdo_dev_state_delete(). 3. Resolve a sleeping in atomic context introduced by the mentioned "Fixes" commit. bond_ipsec_del_sa_all() and bond_ipsec_add_sa_all() now acquire x->lock and check for x->km.state to help with problems 1 and 2. And since xso.real_dev is now a private pointer managed by the bonding driver in xfrm state, make better use of it to fully fix problems 1 and 2. In bond_ipsec_del_sa_all(), set xso.real_dev to NULL while holding both the mutex and x->lock, which makes sure that neither bond_ipsec_del_sa() nor bond_ipsec_free_sa() could run concurrently. Fix problem 3 by moving the list cleanup (which requires the mutex) from bond_ipsec_del_sa() (called from atomic context) to bond_ipsec_free_sa() Finally, simplify bond_ipsec_del_sa() and bond_ipsec_free_sa() by using xso->real_dev directly, since it's now protected by locks and can be trusted to always reflect the offload device. Fixes: 2aeeef906d5a ("bonding: change ipsec_lock from spin lock to mutex") Signed-off-by: Cosmin Ratiu Reviewed-by: Leon Romanovsky Reviewed-by: Nikolay Aleksandrov Reviewed-by: Hangbin Liu Tested-by: Hangbin Liu Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- drivers/net/bonding/bond_main.c | 82 +++++++++++++++------------------ include/net/xfrm.h | 7 ++- 2 files changed, 41 insertions(+), 48 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 14ebbe82a2c5b7..4461220bcd58d8 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -545,7 +545,20 @@ static void bond_ipsec_add_sa_all(struct bonding *bond) slave_warn(bond_dev, real_dev, "%s: failed to add SA\n", __func__); continue; } + + spin_lock_bh(&ipsec->xs->lock); + /* xs might have been killed by the user during the migration + * to the new dev, but bond_ipsec_del_sa() should have done + * nothing, as xso.real_dev is NULL. + * Delete it from the device we just added it to. The pending + * bond_ipsec_free_sa() call will do the rest of the cleanup. + */ + if (ipsec->xs->km.state == XFRM_STATE_DEAD && + real_dev->xfrmdev_ops->xdo_dev_state_delete) + real_dev->xfrmdev_ops->xdo_dev_state_delete(real_dev, + ipsec->xs); ipsec->xs->xso.real_dev = real_dev; + spin_unlock_bh(&ipsec->xs->lock); } out: mutex_unlock(&bond->ipsec_lock); @@ -560,48 +573,20 @@ static void bond_ipsec_del_sa(struct net_device *bond_dev, struct xfrm_state *xs) { struct net_device *real_dev; - netdevice_tracker tracker; - struct bond_ipsec *ipsec; - struct bonding *bond; - struct slave *slave; - if (!bond_dev) + if (!bond_dev || !xs->xso.real_dev) return; - rcu_read_lock(); - bond = netdev_priv(bond_dev); - slave = rcu_dereference(bond->curr_active_slave); - real_dev = slave ? slave->dev : NULL; - netdev_hold(real_dev, &tracker, GFP_ATOMIC); - rcu_read_unlock(); - - if (!slave) - goto out; - - if (!xs->xso.real_dev) - goto out; - - WARN_ON(xs->xso.real_dev != real_dev); + real_dev = xs->xso.real_dev; if (!real_dev->xfrmdev_ops || !real_dev->xfrmdev_ops->xdo_dev_state_delete || netif_is_bond_master(real_dev)) { slave_warn(bond_dev, real_dev, "%s: no slave xdo_dev_state_delete\n", __func__); - goto out; + return; } real_dev->xfrmdev_ops->xdo_dev_state_delete(real_dev, xs); -out: - netdev_put(real_dev, &tracker); - mutex_lock(&bond->ipsec_lock); - list_for_each_entry(ipsec, &bond->ipsec_list, list) { - if (ipsec->xs == xs) { - list_del(&ipsec->list); - kfree(ipsec); - break; - } - } - mutex_unlock(&bond->ipsec_lock); } static void bond_ipsec_del_sa_all(struct bonding *bond) @@ -629,9 +614,15 @@ static void bond_ipsec_del_sa_all(struct bonding *bond) __func__); continue; } + + spin_lock_bh(&ipsec->xs->lock); ipsec->xs->xso.real_dev = NULL; - real_dev->xfrmdev_ops->xdo_dev_state_delete(real_dev, - ipsec->xs); + /* Don't double delete states killed by the user. */ + if (ipsec->xs->km.state != XFRM_STATE_DEAD) + real_dev->xfrmdev_ops->xdo_dev_state_delete(real_dev, + ipsec->xs); + spin_unlock_bh(&ipsec->xs->lock); + if (real_dev->xfrmdev_ops->xdo_dev_state_free) real_dev->xfrmdev_ops->xdo_dev_state_free(real_dev, ipsec->xs); @@ -643,34 +634,33 @@ static void bond_ipsec_free_sa(struct net_device *bond_dev, struct xfrm_state *xs) { struct net_device *real_dev; - netdevice_tracker tracker; + struct bond_ipsec *ipsec; struct bonding *bond; - struct slave *slave; if (!bond_dev) return; - rcu_read_lock(); bond = netdev_priv(bond_dev); - slave = rcu_dereference(bond->curr_active_slave); - real_dev = slave ? slave->dev : NULL; - netdev_hold(real_dev, &tracker, GFP_ATOMIC); - rcu_read_unlock(); - - if (!slave) - goto out; + mutex_lock(&bond->ipsec_lock); if (!xs->xso.real_dev) goto out; - WARN_ON(xs->xso.real_dev != real_dev); + real_dev = xs->xso.real_dev; xs->xso.real_dev = NULL; - if (real_dev && real_dev->xfrmdev_ops && + if (real_dev->xfrmdev_ops && real_dev->xfrmdev_ops->xdo_dev_state_free) real_dev->xfrmdev_ops->xdo_dev_state_free(real_dev, xs); out: - netdev_put(real_dev, &tracker); + list_for_each_entry(ipsec, &bond->ipsec_list, list) { + if (ipsec->xs == xs) { + list_del(&ipsec->list); + kfree(ipsec); + break; + } + } + mutex_unlock(&bond->ipsec_lock); } /** diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 01783dc3d0e324..1f1861c57e2ad0 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -154,8 +154,11 @@ struct xfrm_dev_offload { */ struct net_device *dev; netdevice_tracker dev_tracker; - /* This is a private pointer used by the bonding driver. - * Device drivers should not use it. + /* This is a private pointer used by the bonding driver (and eventually + * should be moved there). Device drivers should not use it. + * Protected by xfrm_state.lock AND bond.ipsec_lock in most cases, + * except in the .xdo_dev_state_del() flow, where only xfrm_state.lock + * is held. */ struct net_device *real_dev; unsigned long offload_handle; From 7a0ba7b2c2cb4af8a9b1fe2bdb7f2da82e3cec0a Mon Sep 17 00:00:00 2001 From: Zhen XIN Date: Thu, 10 Apr 2025 15:42:17 +0000 Subject: [PATCH 2257/2924] wifi: rtw88: sdio: map mgmt frames to queue TX_DESC_QSEL_MGMT [ Upstream commit b2effcdc237979dcc533d446a792fc54fd0e1213 ] The rtw88-sdio do not work in AP mode due to the lack of TX status report for management frames. Map the management frames to queue TX_DESC_QSEL_MGMT, which enables the chip to generate TX reports for these frames Tested-on: rtl8723ds Fixes: 65371a3f14e7 ("wifi: rtw88: sdio: Add HCI implementation for SDIO based chipsets") Signed-off-by: Zhen XIN Reviewed-by: Martin Blumenstingl Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20250410154217.1849977-3-zhen.xin@nokia-sbell.com Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtw88/sdio.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw88/sdio.c b/drivers/net/wireless/realtek/rtw88/sdio.c index 6209a49312f176..ba600d40bba463 100644 --- a/drivers/net/wireless/realtek/rtw88/sdio.c +++ b/drivers/net/wireless/realtek/rtw88/sdio.c @@ -718,10 +718,7 @@ static u8 rtw_sdio_get_tx_qsel(struct rtw_dev *rtwdev, struct sk_buff *skb, case RTW_TX_QUEUE_H2C: return TX_DESC_QSEL_H2C; case RTW_TX_QUEUE_MGMT: - if (rtw_chip_wcpu_11n(rtwdev)) - return TX_DESC_QSEL_HIGH; - else - return TX_DESC_QSEL_MGMT; + return TX_DESC_QSEL_MGMT; case RTW_TX_QUEUE_HI0: return TX_DESC_QSEL_HIGH; default: From c2a306f51fa3ad8f7b0a6af0e1f4b25471044369 Mon Sep 17 00:00:00 2001 From: Zhen XIN Date: Thu, 10 Apr 2025 15:42:16 +0000 Subject: [PATCH 2258/2924] wifi: rtw88: sdio: call rtw_sdio_indicate_tx_status unconditionally [ Upstream commit fc5f5a0ec463ae6a07850428bd3082947e01d276 ] The rtw88-sdio do not work in AP mode due to the lack of TX status report for management frames. Make the invocation of rtw_sdio_indicate_tx_status unconditional and cover all packet queues Tested-on: rtl8723ds Fixes: 65371a3f14e7 ("wifi: rtw88: sdio: Add HCI implementation for SDIO based chipsets") Signed-off-by: Zhen XIN Reviewed-by: Martin Blumenstingl Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20250410154217.1849977-2-zhen.xin@nokia-sbell.com Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtw88/sdio.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw88/sdio.c b/drivers/net/wireless/realtek/rtw88/sdio.c index ba600d40bba463..410f637b1add58 100644 --- a/drivers/net/wireless/realtek/rtw88/sdio.c +++ b/drivers/net/wireless/realtek/rtw88/sdio.c @@ -1224,10 +1224,7 @@ static void rtw_sdio_process_tx_queue(struct rtw_dev *rtwdev, return; } - if (queue <= RTW_TX_QUEUE_VO) - rtw_sdio_indicate_tx_status(rtwdev, skb); - else - dev_kfree_skb_any(skb); + rtw_sdio_indicate_tx_status(rtwdev, skb); } static void rtw_sdio_tx_handler(struct work_struct *work) From fc48ca82ff15d4bf7940460a2b271178eaf6be20 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Tue, 15 Apr 2025 12:07:20 +0300 Subject: [PATCH 2259/2924] wifi: rtw88: do not ignore hardware read error during DPK [ Upstream commit 20d3c19bd8f9b498173c198eadf54580c8caa336 ] In 'rtw8822c_dpk_cal_coef1()', do not ignore error returned by 'check_hw_ready()' but issue a warning to denote possible DPK issue. Compile tested only. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 5227c2ee453d ("rtw88: 8822c: add SW DPK support") Suggested-by: Ping-Ke Shih Signed-off-by: Dmitry Antipov Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20250415090720.194048-1-dmantipov@yandex.ru Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtw88/rtw8822c.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c index 5e53e0db177efe..8937a7b656edb1 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c @@ -3951,7 +3951,8 @@ static void rtw8822c_dpk_cal_coef1(struct rtw_dev *rtwdev) rtw_write32(rtwdev, REG_NCTL0, 0x00001148); rtw_write32(rtwdev, REG_NCTL0, 0x00001149); - check_hw_ready(rtwdev, 0x2d9c, MASKBYTE0, 0x55); + if (!check_hw_ready(rtwdev, 0x2d9c, MASKBYTE0, 0x55)) + rtw_warn(rtwdev, "DPK stuck, performance may be suboptimal"); rtw_write8(rtwdev, 0x1b10, 0x0); rtw_write32_mask(rtwdev, REG_NCTL0, BIT_SUBPAGE, 0x0000000c); From 1111e612c5f8b6e1a21091bbfcd26776c4318554 Mon Sep 17 00:00:00 2001 From: P Praneesh Date: Fri, 11 Apr 2025 11:31:51 +0530 Subject: [PATCH 2260/2924] wifi: ath12k: Handle error cases during extended skb allocation [ Upstream commit 37a068fc9dc4feb8d76e8896bb33883d06c11a6b ] Currently, in the case of extended skb allocation, the buffer is freed before the DMA unmap operation. This premature deletion can result in skb->data corruption, as the memory region could be re-allocated for other purposes. Fix this issue by reordering the failure cases by calling dma_unmap_single() first, then followed by the corresponding kfree_skb(). This helps avoid data corruption in case of failures in dp_tx(). Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Signed-off-by: P Praneesh Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250411060154.1388159-2-praneesh.p@oss.qualcomm.com Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/dp_tx.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/dp_tx.c b/drivers/net/wireless/ath/ath12k/dp_tx.c index 1e2788df476c28..f82d2c58eff3f6 100644 --- a/drivers/net/wireless/ath/ath12k/dp_tx.c +++ b/drivers/net/wireless/ath/ath12k/dp_tx.c @@ -229,7 +229,7 @@ int ath12k_dp_tx(struct ath12k *ar, struct ath12k_link_vif *arvif, struct ath12k_skb_cb *skb_cb = ATH12K_SKB_CB(skb); struct hal_tcl_data_cmd *hal_tcl_desc; struct hal_tx_msdu_ext_desc *msg; - struct sk_buff *skb_ext_desc; + struct sk_buff *skb_ext_desc = NULL; struct hal_srng *tcl_ring; struct ieee80211_hdr *hdr = (void *)skb->data; struct ath12k_vif *ahvif = arvif->ahvif; @@ -415,18 +415,15 @@ int ath12k_dp_tx(struct ath12k *ar, struct ath12k_link_vif *arvif, if (ret < 0) { ath12k_dbg(ab, ATH12K_DBG_DP_TX, "Failed to add HTT meta data, dropping packet\n"); - kfree_skb(skb_ext_desc); - goto fail_unmap_dma; + goto fail_free_ext_skb; } } ti.paddr = dma_map_single(ab->dev, skb_ext_desc->data, skb_ext_desc->len, DMA_TO_DEVICE); ret = dma_mapping_error(ab->dev, ti.paddr); - if (ret) { - kfree_skb(skb_ext_desc); - goto fail_unmap_dma; - } + if (ret) + goto fail_free_ext_skb; ti.data_len = skb_ext_desc->len; ti.type = HAL_TCL_DESC_TYPE_EXT_DESC; @@ -462,7 +459,7 @@ int ath12k_dp_tx(struct ath12k *ar, struct ath12k_link_vif *arvif, ring_selector++; } - goto fail_unmap_dma; + goto fail_unmap_dma_ext; } ath12k_hal_tx_cmd_desc_setup(ab, hal_tcl_desc, &ti); @@ -478,13 +475,16 @@ int ath12k_dp_tx(struct ath12k *ar, struct ath12k_link_vif *arvif, return 0; -fail_unmap_dma: - dma_unmap_single(ab->dev, ti.paddr, ti.data_len, DMA_TO_DEVICE); - +fail_unmap_dma_ext: if (skb_cb->paddr_ext_desc) dma_unmap_single(ab->dev, skb_cb->paddr_ext_desc, sizeof(struct hal_tx_msdu_ext_desc), DMA_TO_DEVICE); +fail_free_ext_skb: + kfree_skb(skb_ext_desc); + +fail_unmap_dma: + dma_unmap_single(ab->dev, ti.paddr, ti.data_len, DMA_TO_DEVICE); fail_remove_tx_buf: ath12k_dp_tx_release_txbuf(dp, tx_desc, pool_id); From 5f09d16cd57764c95c8548fe5b70672c9ac01127 Mon Sep 17 00:00:00 2001 From: Sarika Sharma Date: Tue, 8 Apr 2025 10:23:27 +0530 Subject: [PATCH 2261/2924] wifi: ath12k: fix invalid access to memory [ Upstream commit 9f17747fbda6fca934854463873c4abf8061491d ] In ath12k_dp_rx_msdu_coalesce(), rxcb is fetched from skb and boolean is_continuation is part of rxcb. Currently, after freeing the skb, the rxcb->is_continuation accessed again which is wrong since the memory is already freed. This might lead use-after-free error. Hence, fix by locally defining bool is_continuation from rxcb, so that after freeing skb, is_continuation can be used. Compile tested only. Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Signed-off-by: Sarika Sharma Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250408045327.1632222-1-quic_sarishar@quicinc.com Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/dp_rx.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 21d240cc3aee46..70afb3bd392029 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -1817,6 +1817,7 @@ static int ath12k_dp_rx_msdu_coalesce(struct ath12k *ar, struct hal_rx_desc *ldesc; int space_extra, rem_len, buf_len; u32 hal_rx_desc_sz = ar->ab->hal.hal_desc_sz; + bool is_continuation; /* As the msdu is spread across multiple rx buffers, * find the offset to the start of msdu for computing @@ -1865,7 +1866,8 @@ static int ath12k_dp_rx_msdu_coalesce(struct ath12k *ar, rem_len = msdu_len - buf_first_len; while ((skb = __skb_dequeue(msdu_list)) != NULL && rem_len > 0) { rxcb = ATH12K_SKB_RXCB(skb); - if (rxcb->is_continuation) + is_continuation = rxcb->is_continuation; + if (is_continuation) buf_len = DP_RX_BUFFER_SIZE - hal_rx_desc_sz; else buf_len = rem_len; @@ -1883,7 +1885,7 @@ static int ath12k_dp_rx_msdu_coalesce(struct ath12k *ar, dev_kfree_skb_any(skb); rem_len -= buf_len; - if (!rxcb->is_continuation) + if (!is_continuation) break; } From c77ea4e216440f21ffbf293b28c4d46ded9ccf98 Mon Sep 17 00:00:00 2001 From: P Praneesh Date: Wed, 16 Apr 2025 07:49:03 +0530 Subject: [PATCH 2262/2924] wifi: ath12k: Add MSDU length validation for TKIP MIC error [ Upstream commit 763216fe6c5df95d122c71ef34c342427c987820 ] In the WBM error path, while processing TKIP MIC errors, MSDU length is fetched from the hal_rx_desc's msdu_end. This MSDU length is directly passed to skb_put() without validation. In stress test scenarios, the WBM error ring may receive invalid descriptors, which could lead to an invalid MSDU length. To fix this, add a check to drop the skb when the calculated MSDU length is greater than the skb size. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Signed-off-by: P Praneesh Signed-off-by: Nithyanantham Paramasivam Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250416021903.3178962-1-nithyanantham.paramasivam@oss.qualcomm.com Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/dp_rx.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 70afb3bd392029..317cc6e6e1f5b5 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -3824,6 +3824,15 @@ static bool ath12k_dp_rx_h_tkip_mic_err(struct ath12k *ar, struct sk_buff *msdu, l3pad_bytes = ath12k_dp_rx_h_l3pad(ab, desc); msdu_len = ath12k_dp_rx_h_msdu_len(ab, desc); + + if ((hal_rx_desc_sz + l3pad_bytes + msdu_len) > DP_RX_BUFFER_SIZE) { + ath12k_dbg(ab, ATH12K_DBG_DATA, + "invalid msdu len in tkip mic err %u\n", msdu_len); + ath12k_dbg_dump(ab, ATH12K_DBG_DATA, NULL, "", desc, + sizeof(*desc)); + return true; + } + skb_put(msdu, hal_rx_desc_sz + l3pad_bytes + msdu_len); skb_pull(msdu, hal_rx_desc_sz + l3pad_bytes); From 3add30f43b0137f91ef5104c3aa7427e36b007d8 Mon Sep 17 00:00:00 2001 From: Ramasamy Kaliappan Date: Wed, 16 Apr 2025 00:11:02 +0530 Subject: [PATCH 2263/2924] wifi: ath12k: Fix the QoS control field offset to build QoS header [ Upstream commit 8599d4cc4191c8c1af34207a8b9414acca4afb59 ] Currently, in the mac80211 layer, received EAPOL packets are dropped when the HT control field is present in the QoS header. This issue arises due to an incorrect QoS control field offset used to build the QoS header in the MSDU data, leading to a corrupted header in the mac80211 layer. This issue also applies to other frames that contain the QoS control field, such as QoS data or Null frames. To resolve this, use ieee80211_get_qos_ctl() to obtain the correct QoS control offset from the MSDU data. Additionally, ensure the QoS control header is copied in little-endian format within the MSDU data. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Signed-off-by: Ramasamy Kaliappan Signed-off-by: Nithyanantham Paramasivam Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250415184102.2707300-1-nithyanantham.paramasivam@oss.qualcomm.com Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/dp_rx.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 317cc6e6e1f5b5..125ccbd0532cee 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -2119,7 +2119,7 @@ static void ath12k_get_dot11_hdr_from_rx_desc(struct ath12k *ar, struct ath12k_base *ab = ar->ab; size_t hdr_len, crypto_len; struct ieee80211_hdr hdr; - u16 qos_ctl; + __le16 qos_ctl; u8 *crypto_hdr, mesh_ctrl; ath12k_dp_rx_desc_get_dot11_hdr(ab, rx_desc, &hdr); @@ -2140,13 +2140,13 @@ static void ath12k_get_dot11_hdr_from_rx_desc(struct ath12k *ar, /* Add QOS header */ if (ieee80211_is_data_qos(hdr.frame_control)) { - qos_ctl = rxcb->tid; + struct ieee80211_hdr *qos_ptr = (struct ieee80211_hdr *)msdu->data; + + qos_ctl = cpu_to_le16(rxcb->tid & IEEE80211_QOS_CTL_TID_MASK); if (mesh_ctrl) - qos_ctl |= IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT; + qos_ctl |= cpu_to_le16(IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT); - /* TODO: Add other QoS ctl fields when required */ - memcpy(msdu->data + (hdr_len - IEEE80211_QOS_CTL_LEN), - &qos_ctl, IEEE80211_QOS_CTL_LEN); + memcpy(ieee80211_get_qos_ctl(qos_ptr), &qos_ctl, IEEE80211_QOS_CTL_LEN); } } From 648f34a8d3c0b547004c17628ccde3abeb6b9f99 Mon Sep 17 00:00:00 2001 From: P Praneesh Date: Mon, 24 Mar 2025 11:55:10 +0530 Subject: [PATCH 2264/2924] wifi: ath12k: Add extra TLV tag parsing support in monitor Rx path [ Upstream commit 3973cda5ef496fd412fdec2c7c3403ac90e391b8 ] Currently, the monitor Rx parser handler is inherited from the ath11k. However, the ath12k 802.11be hardware does not report the Rx TLV header in the MSDU data. Instead, the hardware reports those TLVs under the following TLV tags: 1. Buffer address 2. MPDU start 3. MPDU end 4. MSDU end Therefore, add support for parsing the above TLVs in the Rx monitor path and use this information for MSDU buffer and status updates. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: P Praneesh Tested-by: Nicolas Escande Reviewed-by: Vasanthakumar Thiagarajan Signed-off-by: Karthikeyan Periyasamy Link: https://patch.msgid.link/20250324062518.2752822-3-quic_periyasa@quicinc.com Signed-off-by: Jeff Johnson Stable-dep-of: f5d6b15d9503 ("wifi: ath12k: fix wrong handling of CCMP256 and GCMP ciphers") Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/dp.h | 2 + drivers/net/wireless/ath/ath12k/dp_mon.c | 157 ++++++++++++++++++++++- drivers/net/wireless/ath/ath12k/dp_mon.h | 4 +- drivers/net/wireless/ath/ath12k/dp_rx.c | 2 + drivers/net/wireless/ath/ath12k/hal_rx.h | 12 +- 5 files changed, 170 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/dp.h b/drivers/net/wireless/ath/ath12k/dp.h index 75435a931548c9..427a87b63dec3b 100644 --- a/drivers/net/wireless/ath/ath12k/dp.h +++ b/drivers/net/wireless/ath/ath12k/dp.h @@ -106,6 +106,8 @@ struct dp_mon_mpdu { struct list_head list; struct sk_buff *head; struct sk_buff *tail; + u32 err_bitmap; + u8 decap_format; }; #define DP_MON_MAX_STATUS_BUF 32 diff --git a/drivers/net/wireless/ath/ath12k/dp_mon.c b/drivers/net/wireless/ath/ath12k/dp_mon.c index d22800e894850d..8c78f601f70aa1 100644 --- a/drivers/net/wireless/ath/ath12k/dp_mon.c +++ b/drivers/net/wireless/ath/ath12k/dp_mon.c @@ -1647,7 +1647,7 @@ ath12k_dp_mon_rx_parse_status_tlv(struct ath12k *ar, u32_get_bits(info[0], HAL_RX_MPDU_START_INFO0_PPDU_ID); } - break; + return HAL_RX_MON_STATUS_MPDU_START; } case HAL_RX_MSDU_START: /* TODO: add msdu start parsing logic */ @@ -2088,6 +2088,144 @@ static int ath12k_dp_mon_rx_deliver(struct ath12k *ar, return -EINVAL; } +static int ath12k_dp_pkt_set_pktlen(struct sk_buff *skb, u32 len) +{ + if (skb->len > len) { + skb_trim(skb, len); + } else { + if (skb_tailroom(skb) < len - skb->len) { + if ((pskb_expand_head(skb, 0, + len - skb->len - skb_tailroom(skb), + GFP_ATOMIC))) { + return -ENOMEM; + } + } + skb_put(skb, (len - skb->len)); + } + + return 0; +} + +static void ath12k_dp_mon_parse_rx_msdu_end_err(u32 info, u32 *errmap) +{ + if (info & RX_MSDU_END_INFO13_FCS_ERR) + *errmap |= HAL_RX_MPDU_ERR_FCS; + + if (info & RX_MSDU_END_INFO13_DECRYPT_ERR) + *errmap |= HAL_RX_MPDU_ERR_DECRYPT; + + if (info & RX_MSDU_END_INFO13_TKIP_MIC_ERR) + *errmap |= HAL_RX_MPDU_ERR_TKIP_MIC; + + if (info & RX_MSDU_END_INFO13_A_MSDU_ERROR) + *errmap |= HAL_RX_MPDU_ERR_AMSDU_ERR; + + if (info & RX_MSDU_END_INFO13_OVERFLOW_ERR) + *errmap |= HAL_RX_MPDU_ERR_OVERFLOW; + + if (info & RX_MSDU_END_INFO13_MSDU_LEN_ERR) + *errmap |= HAL_RX_MPDU_ERR_MSDU_LEN; + + if (info & RX_MSDU_END_INFO13_MPDU_LEN_ERR) + *errmap |= HAL_RX_MPDU_ERR_MPDU_LEN; +} + +static int +ath12k_dp_mon_parse_status_msdu_end(struct ath12k_mon_data *pmon, + const struct hal_rx_msdu_end *msdu_end) +{ + struct dp_mon_mpdu *mon_mpdu = pmon->mon_mpdu; + + ath12k_dp_mon_parse_rx_msdu_end_err(__le32_to_cpu(msdu_end->info2), + &mon_mpdu->err_bitmap); + + mon_mpdu->decap_format = le32_get_bits(msdu_end->info1, + RX_MSDU_END_INFO11_DECAP_FORMAT); + + return 0; +} + +static int +ath12k_dp_mon_parse_status_buf(struct ath12k *ar, + struct ath12k_mon_data *pmon, + const struct dp_mon_packet_info *packet_info) +{ + struct ath12k_base *ab = ar->ab; + struct dp_rxdma_mon_ring *buf_ring = &ab->dp.rxdma_mon_buf_ring; + struct sk_buff *msdu; + int buf_id; + u32 offset; + + buf_id = u32_get_bits(packet_info->cookie, DP_RXDMA_BUF_COOKIE_BUF_ID); + + spin_lock_bh(&buf_ring->idr_lock); + msdu = idr_remove(&buf_ring->bufs_idr, buf_id); + spin_unlock_bh(&buf_ring->idr_lock); + + if (unlikely(!msdu)) { + ath12k_warn(ab, "mon dest desc with inval buf_id %d\n", buf_id); + return 0; + } + + dma_unmap_single(ab->dev, ATH12K_SKB_RXCB(msdu)->paddr, + msdu->len + skb_tailroom(msdu), + DMA_FROM_DEVICE); + + offset = packet_info->dma_length + ATH12K_MON_RX_DOT11_OFFSET; + if (ath12k_dp_pkt_set_pktlen(msdu, offset)) { + dev_kfree_skb_any(msdu); + goto dest_replenish; + } + + if (!pmon->mon_mpdu->head) + pmon->mon_mpdu->head = msdu; + else + pmon->mon_mpdu->tail->next = msdu; + + pmon->mon_mpdu->tail = msdu; + +dest_replenish: + ath12k_dp_mon_buf_replenish(ab, buf_ring, 1); + + return 0; +} + +static int +ath12k_dp_mon_parse_rx_dest_tlv(struct ath12k *ar, + struct ath12k_mon_data *pmon, + enum hal_rx_mon_status hal_status, + const void *tlv_data) +{ + switch (hal_status) { + case HAL_RX_MON_STATUS_MPDU_START: + if (WARN_ON_ONCE(pmon->mon_mpdu)) + break; + + pmon->mon_mpdu = kzalloc(sizeof(*pmon->mon_mpdu), GFP_ATOMIC); + if (!pmon->mon_mpdu) + return -ENOMEM; + break; + case HAL_RX_MON_STATUS_BUF_ADDR: + return ath12k_dp_mon_parse_status_buf(ar, pmon, tlv_data); + case HAL_RX_MON_STATUS_MPDU_END: + /* If no MSDU then free empty MPDU */ + if (pmon->mon_mpdu->tail) { + pmon->mon_mpdu->tail->next = NULL; + list_add_tail(&pmon->mon_mpdu->list, &pmon->dp_rx_mon_mpdu_list); + } else { + kfree(pmon->mon_mpdu); + } + pmon->mon_mpdu = NULL; + break; + case HAL_RX_MON_STATUS_MSDU_END: + return ath12k_dp_mon_parse_status_msdu_end(pmon, tlv_data); + default: + break; + } + + return 0; +} + static enum hal_rx_mon_status ath12k_dp_mon_parse_rx_dest(struct ath12k *ar, struct ath12k_mon_data *pmon, struct sk_buff *skb) @@ -2114,14 +2252,20 @@ ath12k_dp_mon_parse_rx_dest(struct ath12k *ar, struct ath12k_mon_data *pmon, tlv_len = le64_get_bits(tlv->tl, HAL_TLV_64_HDR_LEN); hal_status = ath12k_dp_mon_rx_parse_status_tlv(ar, pmon, tlv); + + if (ar->monitor_started && + ath12k_dp_mon_parse_rx_dest_tlv(ar, pmon, hal_status, tlv->value)) + return HAL_RX_MON_STATUS_PPDU_DONE; + ptr += sizeof(*tlv) + tlv_len; ptr = PTR_ALIGN(ptr, HAL_TLV_64_ALIGN); - if ((ptr - skb->data) >= DP_RX_BUFFER_SIZE) + if ((ptr - skb->data) > skb->len) break; } while ((hal_status == HAL_RX_MON_STATUS_PPDU_NOT_DONE) || (hal_status == HAL_RX_MON_STATUS_BUF_ADDR) || + (hal_status == HAL_RX_MON_STATUS_MPDU_START) || (hal_status == HAL_RX_MON_STATUS_MPDU_END) || (hal_status == HAL_RX_MON_STATUS_MSDU_END)); @@ -2142,9 +2286,11 @@ ath12k_dp_mon_rx_parse_mon_status(struct ath12k *ar, struct dp_mon_mpdu *tmp; struct dp_mon_mpdu *mon_mpdu = pmon->mon_mpdu; struct sk_buff *head_msdu, *tail_msdu; - enum hal_rx_mon_status hal_status = HAL_RX_MON_STATUS_BUF_DONE; + enum hal_rx_mon_status hal_status; - ath12k_dp_mon_parse_rx_dest(ar, pmon, skb); + hal_status = ath12k_dp_mon_parse_rx_dest(ar, pmon, skb); + if (hal_status != HAL_RX_MON_STATUS_PPDU_DONE) + return hal_status; list_for_each_entry_safe(mon_mpdu, tmp, &pmon->dp_rx_mon_mpdu_list, list) { list_del(&mon_mpdu->list); @@ -2158,6 +2304,7 @@ ath12k_dp_mon_rx_parse_mon_status(struct ath12k *ar, kfree(mon_mpdu); } + return hal_status; } @@ -3346,7 +3493,7 @@ int ath12k_dp_mon_srng_process(struct ath12k *ar, int *budget, ath12k_dp_mon_rx_memset_ppdu_info(ppdu_info); while ((skb = __skb_dequeue(&skb_list))) { - hal_status = ath12k_dp_mon_parse_rx_dest(ar, pmon, skb); + hal_status = ath12k_dp_mon_rx_parse_mon_status(ar, pmon, skb, napi); if (hal_status != HAL_RX_MON_STATUS_PPDU_DONE) { ppdu_info->ppdu_continuation = true; dev_kfree_skb_any(skb); diff --git a/drivers/net/wireless/ath/ath12k/dp_mon.h b/drivers/net/wireless/ath/ath12k/dp_mon.h index e4368eb42aca83..b039f6b9277c69 100644 --- a/drivers/net/wireless/ath/ath12k/dp_mon.h +++ b/drivers/net/wireless/ath/ath12k/dp_mon.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2019-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef ATH12K_DP_MON_H @@ -9,6 +9,8 @@ #include "core.h" +#define ATH12K_MON_RX_DOT11_OFFSET 5 + enum dp_monitor_mode { ATH12K_DP_TX_MONITOR_MODE, ATH12K_DP_RX_MONITOR_MODE diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 125ccbd0532cee..d6794c753f3ae9 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -4482,6 +4482,8 @@ int ath12k_dp_rx_pdev_mon_attach(struct ath12k *ar) pmon->mon_last_linkdesc_paddr = 0; pmon->mon_last_buf_cookie = DP_RX_DESC_COOKIE_MAX + 1; + INIT_LIST_HEAD(&pmon->dp_rx_mon_mpdu_list); + pmon->mon_mpdu = NULL; spin_lock_init(&pmon->mon_lock); return 0; diff --git a/drivers/net/wireless/ath/ath12k/hal_rx.h b/drivers/net/wireless/ath/ath12k/hal_rx.h index 6bdcd0867d86e3..6f10e4222ba6e6 100644 --- a/drivers/net/wireless/ath/ath12k/hal_rx.h +++ b/drivers/net/wireless/ath/ath12k/hal_rx.h @@ -108,11 +108,12 @@ enum hal_rx_mon_status { HAL_RX_MON_STATUS_PPDU_DONE, HAL_RX_MON_STATUS_BUF_DONE, HAL_RX_MON_STATUS_BUF_ADDR, + HAL_RX_MON_STATUS_MPDU_START, HAL_RX_MON_STATUS_MPDU_END, HAL_RX_MON_STATUS_MSDU_END, }; -#define HAL_RX_MAX_MPDU 256 +#define HAL_RX_MAX_MPDU 1024 #define HAL_RX_NUM_WORDS_PER_PPDU_BITMAP (HAL_RX_MAX_MPDU >> 5) struct hal_rx_user_status { @@ -506,6 +507,15 @@ struct hal_rx_mpdu_start { __le32 rsvd2[16]; } __packed; +struct hal_rx_msdu_end { + __le32 info0; + __le32 rsvd0[18]; + __le32 info1; + __le32 rsvd1[10]; + __le32 info2; + __le32 rsvd2; +} __packed; + #define HAL_RX_PPDU_END_DURATION GENMASK(23, 0) struct hal_rx_ppdu_end_duration { __le32 rsvd0[9]; From f3de1a1a799dca01dc61538a2426ef2d9405309f Mon Sep 17 00:00:00 2001 From: P Praneesh Date: Mon, 24 Mar 2025 11:55:11 +0530 Subject: [PATCH 2265/2924] wifi: ath12k: Avoid fetch Error bitmap and decap format from Rx TLV [ Upstream commit a6621bf6397ae6981b5041ba0a127e7dbeade746 ] Currently, error bitmap and decap format information are fetched from the MSDU Rx TLV data. This logic is inherited from ath11k. However, for ath12k 802.11be hardware, the Rx TLV will not be present in the MSDU data. Instead, this information is reported separately under the MSDU END TLV tag. Therefore, remove the existing fetch code, handle the MSDU END TLV tag and fetch the above information to store it in the mon_mpdu data structure for use in the merge MSDU procedure. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: P Praneesh Tested-by: Nicolas Escande Reviewed-by: Vasanthakumar Thiagarajan Signed-off-by: Karthikeyan Periyasamy Link: https://patch.msgid.link/20250324062518.2752822-4-quic_periyasa@quicinc.com Signed-off-by: Jeff Johnson Stable-dep-of: f5d6b15d9503 ("wifi: ath12k: fix wrong handling of CCMP256 and GCMP ciphers") Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/dp_mon.c | 58 ++++++++++-------------- 1 file changed, 23 insertions(+), 35 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/dp_mon.c b/drivers/net/wireless/ath/ath12k/dp_mon.c index 8c78f601f70aa1..6fc3b921176561 100644 --- a/drivers/net/wireless/ath/ath12k/dp_mon.c +++ b/drivers/net/wireless/ath/ath12k/dp_mon.c @@ -1702,30 +1702,26 @@ static void ath12k_dp_mon_rx_msdus_set_payload(struct ath12k *ar, static struct sk_buff * ath12k_dp_mon_rx_merg_msdus(struct ath12k *ar, - struct sk_buff *head_msdu, struct sk_buff *tail_msdu, - struct ieee80211_rx_status *rxs, bool *fcs_err) + struct dp_mon_mpdu *mon_mpdu, + struct ieee80211_rx_status *rxs) { struct ath12k_base *ab = ar->ab; struct sk_buff *msdu, *mpdu_buf, *prev_buf, *head_frag_list; + struct sk_buff *head_msdu, *tail_msdu; struct hal_rx_desc *rx_desc, *tail_rx_desc; - u8 *hdr_desc, *dest, decap_format; + u8 *hdr_desc, *dest, decap_format = mon_mpdu->decap_format; struct ieee80211_hdr_3addr *wh; - u32 err_bitmap, frag_list_sum_len = 0; + u32 frag_list_sum_len = 0; mpdu_buf = NULL; + head_msdu = mon_mpdu->head; + tail_msdu = mon_mpdu->tail; if (!head_msdu) goto err_merge_fail; - rx_desc = (struct hal_rx_desc *)head_msdu->data; tail_rx_desc = (struct hal_rx_desc *)tail_msdu->data; - err_bitmap = ath12k_dp_rx_h_mpdu_err(ab, tail_rx_desc); - if (err_bitmap & HAL_RX_MPDU_ERR_FCS) - *fcs_err = true; - - decap_format = ath12k_dp_rx_h_decap_type(ab, tail_rx_desc); - ath12k_dp_rx_h_ppdu(ar, tail_rx_desc, rxs); if (decap_format == DP_RX_DECAP_TYPE_RAW) { @@ -1954,7 +1950,8 @@ static void ath12k_dp_mon_update_radiotap(struct ath12k *ar, static void ath12k_dp_mon_rx_deliver_msdu(struct ath12k *ar, struct napi_struct *napi, struct sk_buff *msdu, - struct ieee80211_rx_status *status) + struct ieee80211_rx_status *status, + u8 decap) { static const struct ieee80211_radiotap_he known = { .data1 = cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA1_DATA_MCS_KNOWN | @@ -1966,7 +1963,6 @@ static void ath12k_dp_mon_rx_deliver_msdu(struct ath12k *ar, struct napi_struct struct ieee80211_sta *pubsta = NULL; struct ath12k_peer *peer; struct ath12k_skb_rxcb *rxcb = ATH12K_SKB_RXCB(msdu); - u8 decap = DP_RX_DECAP_TYPE_RAW; bool is_mcbc = rxcb->is_mcbc; bool is_eapol_tkip = rxcb->is_eapol; @@ -1977,8 +1973,6 @@ static void ath12k_dp_mon_rx_deliver_msdu(struct ath12k *ar, struct napi_struct status->flag |= RX_FLAG_RADIOTAP_HE; } - if (!(status->flag & RX_FLAG_ONLY_MONITOR)) - decap = ath12k_dp_rx_h_decap_type(ar->ab, rxcb->rx_desc); spin_lock_bh(&ar->ab->base_lock); peer = ath12k_dp_rx_h_find_peer(ar->ab, msdu); if (peer && peer->sta) { @@ -2035,25 +2029,23 @@ static void ath12k_dp_mon_rx_deliver_msdu(struct ath12k *ar, struct napi_struct } static int ath12k_dp_mon_rx_deliver(struct ath12k *ar, - struct sk_buff *head_msdu, struct sk_buff *tail_msdu, + struct dp_mon_mpdu *mon_mpdu, struct hal_rx_mon_ppdu_info *ppduinfo, struct napi_struct *napi) { struct ath12k_pdev_dp *dp = &ar->dp; struct sk_buff *mon_skb, *skb_next, *header; struct ieee80211_rx_status *rxs = &dp->rx_status; - bool fcs_err = false; + u8 decap = DP_RX_DECAP_TYPE_RAW; - mon_skb = ath12k_dp_mon_rx_merg_msdus(ar, - head_msdu, tail_msdu, - rxs, &fcs_err); + mon_skb = ath12k_dp_mon_rx_merg_msdus(ar, mon_mpdu, rxs); if (!mon_skb) goto mon_deliver_fail; header = mon_skb; rxs->flag = 0; - if (fcs_err) + if (mon_mpdu->err_bitmap & HAL_RX_MPDU_ERR_FCS) rxs->flag = RX_FLAG_FAILED_FCS_CRC; do { @@ -2070,8 +2062,12 @@ static int ath12k_dp_mon_rx_deliver(struct ath12k *ar, rxs->flag |= RX_FLAG_ALLOW_SAME_PN; } rxs->flag |= RX_FLAG_ONLY_MONITOR; + + if (!(rxs->flag & RX_FLAG_ONLY_MONITOR)) + decap = mon_mpdu->decap_format; + ath12k_dp_mon_update_radiotap(ar, ppduinfo, mon_skb, rxs); - ath12k_dp_mon_rx_deliver_msdu(ar, napi, mon_skb, rxs); + ath12k_dp_mon_rx_deliver_msdu(ar, napi, mon_skb, rxs, decap); mon_skb = skb_next; } while (mon_skb); rxs->flag = 0; @@ -2079,7 +2075,7 @@ static int ath12k_dp_mon_rx_deliver(struct ath12k *ar, return 0; mon_deliver_fail: - mon_skb = head_msdu; + mon_skb = mon_mpdu->head; while (mon_skb) { skb_next = mon_skb->next; dev_kfree_skb_any(mon_skb); @@ -2285,7 +2281,6 @@ ath12k_dp_mon_rx_parse_mon_status(struct ath12k *ar, struct hal_rx_mon_ppdu_info *ppdu_info = &pmon->mon_ppdu_info; struct dp_mon_mpdu *tmp; struct dp_mon_mpdu *mon_mpdu = pmon->mon_mpdu; - struct sk_buff *head_msdu, *tail_msdu; enum hal_rx_mon_status hal_status; hal_status = ath12k_dp_mon_parse_rx_dest(ar, pmon, skb); @@ -2294,13 +2289,9 @@ ath12k_dp_mon_rx_parse_mon_status(struct ath12k *ar, list_for_each_entry_safe(mon_mpdu, tmp, &pmon->dp_rx_mon_mpdu_list, list) { list_del(&mon_mpdu->list); - head_msdu = mon_mpdu->head; - tail_msdu = mon_mpdu->tail; - if (head_msdu && tail_msdu) { - ath12k_dp_mon_rx_deliver(ar, head_msdu, - tail_msdu, ppdu_info, napi); - } + if (mon_mpdu->head && mon_mpdu->tail) + ath12k_dp_mon_rx_deliver(ar, mon_mpdu, ppdu_info, napi); kfree(mon_mpdu); } @@ -2985,16 +2976,13 @@ ath12k_dp_mon_tx_process_ppdu_info(struct ath12k *ar, struct dp_mon_tx_ppdu_info *tx_ppdu_info) { struct dp_mon_mpdu *tmp, *mon_mpdu; - struct sk_buff *head_msdu, *tail_msdu; list_for_each_entry_safe(mon_mpdu, tmp, &tx_ppdu_info->dp_tx_mon_mpdu_list, list) { list_del(&mon_mpdu->list); - head_msdu = mon_mpdu->head; - tail_msdu = mon_mpdu->tail; - if (head_msdu) - ath12k_dp_mon_rx_deliver(ar, head_msdu, tail_msdu, + if (mon_mpdu->head) + ath12k_dp_mon_rx_deliver(ar, mon_mpdu, &tx_ppdu_info->rx_status, napi); kfree(mon_mpdu); From 5d743c3d89ce73172722079379f04cbbb02731b5 Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Mon, 24 Mar 2025 11:55:12 +0530 Subject: [PATCH 2266/2924] wifi: ath12k: Replace band define G with GHZ where appropriate [ Upstream commit 6a88093f79ea0b131e5feab9cdc045a007fad26e ] Currently, band define and enum are with the word 'G'. Replace it with more appropriate 'GHZ' for clarity and correctness. No functional changes. Only compile tested. Reviewed-by: Vasanthakumar Thiagarajan Signed-off-by: Karthikeyan Periyasamy Link: https://patch.msgid.link/20250324062518.2752822-5-quic_periyasa@quicinc.com Signed-off-by: Jeff Johnson Stable-dep-of: f5d6b15d9503 ("wifi: ath12k: fix wrong handling of CCMP256 and GCMP ciphers") Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/core.h | 8 ++-- drivers/net/wireless/ath/ath12k/debugfs.c | 4 +- drivers/net/wireless/ath/ath12k/dp_rx.c | 4 +- drivers/net/wireless/ath/ath12k/mac.c | 52 +++++++++++------------ drivers/net/wireless/ath/ath12k/wmi.c | 36 ++++++++-------- drivers/net/wireless/ath/ath12k/wmi.h | 16 +++---- 6 files changed, 60 insertions(+), 60 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index 3fac4f00d3832c..2d91a6a4750ca0 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -533,11 +533,11 @@ struct ath12k_sta { enum ieee80211_sta_state state; }; -#define ATH12K_MIN_5G_FREQ 4150 -#define ATH12K_MIN_6G_FREQ 5925 -#define ATH12K_MAX_6G_FREQ 7115 +#define ATH12K_MIN_5GHZ_FREQ 4150 +#define ATH12K_MIN_6GHZ_FREQ 5925 +#define ATH12K_MAX_6GHZ_FREQ 7115 #define ATH12K_NUM_CHANS 101 -#define ATH12K_MAX_5G_CHAN 173 +#define ATH12K_MAX_5GHZ_CHAN 173 enum ath12k_hw_state { ATH12K_HW_STATE_OFF, diff --git a/drivers/net/wireless/ath/ath12k/debugfs.c b/drivers/net/wireless/ath/ath12k/debugfs.c index 57002215ddf168..5efe30cf77470a 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs.c +++ b/drivers/net/wireless/ath/ath12k/debugfs.c @@ -88,8 +88,8 @@ static int ath12k_get_tpc_ctl_mode_idx(struct wmi_tpc_stats_arg *tpc_stats, u32 chan_freq = le32_to_cpu(tpc_stats->tpc_config.chan_freq); u8 band; - band = ((chan_freq > ATH12K_MIN_6G_FREQ) ? NL80211_BAND_6GHZ : - ((chan_freq > ATH12K_MIN_5G_FREQ) ? NL80211_BAND_5GHZ : + band = ((chan_freq > ATH12K_MIN_6GHZ_FREQ) ? NL80211_BAND_6GHZ : + ((chan_freq > ATH12K_MIN_5GHZ_FREQ) ? NL80211_BAND_5GHZ : NL80211_BAND_2GHZ)); if (band == NL80211_BAND_5GHZ || band == NL80211_BAND_6GHZ) { diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index d6794c753f3ae9..eaba6982676851 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -2426,8 +2426,8 @@ void ath12k_dp_rx_h_ppdu(struct ath12k *ar, struct hal_rx_desc *rx_desc, channel_num = meta_data; center_freq = meta_data >> 16; - if (center_freq >= ATH12K_MIN_6G_FREQ && - center_freq <= ATH12K_MAX_6G_FREQ) { + if (center_freq >= ATH12K_MIN_6GHZ_FREQ && + center_freq <= ATH12K_MAX_6GHZ_FREQ) { rx_status->band = NL80211_BAND_6GHZ; rx_status->freq = center_freq; } else if (channel_num >= 1 && channel_num <= 14) { diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index bd67140688290f..4134f9512d0386 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -874,12 +874,12 @@ static bool ath12k_mac_band_match(enum nl80211_band band1, enum WMI_HOST_WLAN_BA { switch (band1) { case NL80211_BAND_2GHZ: - if (band2 & WMI_HOST_WLAN_2G_CAP) + if (band2 & WMI_HOST_WLAN_2GHZ_CAP) return true; break; case NL80211_BAND_5GHZ: case NL80211_BAND_6GHZ: - if (band2 & WMI_HOST_WLAN_5G_CAP) + if (band2 & WMI_HOST_WLAN_5GHZ_CAP) return true; break; default: @@ -980,7 +980,7 @@ static int ath12k_mac_txpower_recalc(struct ath12k *ar) ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "txpower to set in hw %d\n", txpower / 2); - if ((pdev->cap.supported_bands & WMI_HOST_WLAN_2G_CAP) && + if ((pdev->cap.supported_bands & WMI_HOST_WLAN_2GHZ_CAP) && ar->txpower_limit_2g != txpower) { param = WMI_PDEV_PARAM_TXPOWER_LIMIT2G; ret = ath12k_wmi_pdev_set_param(ar, param, @@ -990,7 +990,7 @@ static int ath12k_mac_txpower_recalc(struct ath12k *ar) ar->txpower_limit_2g = txpower; } - if ((pdev->cap.supported_bands & WMI_HOST_WLAN_5G_CAP) && + if ((pdev->cap.supported_bands & WMI_HOST_WLAN_5GHZ_CAP) && ar->txpower_limit_5g != txpower) { param = WMI_PDEV_PARAM_TXPOWER_LIMIT5G; ret = ath12k_wmi_pdev_set_param(ar, param, @@ -1272,12 +1272,12 @@ static int ath12k_mac_monitor_vdev_create(struct ath12k *ar) arg.pdev_id = pdev->pdev_id; arg.if_stats_id = ATH12K_INVAL_VDEV_STATS_ID; - if (pdev->cap.supported_bands & WMI_HOST_WLAN_2G_CAP) { + if (pdev->cap.supported_bands & WMI_HOST_WLAN_2GHZ_CAP) { arg.chains[NL80211_BAND_2GHZ].tx = ar->num_tx_chains; arg.chains[NL80211_BAND_2GHZ].rx = ar->num_rx_chains; } - if (pdev->cap.supported_bands & WMI_HOST_WLAN_5G_CAP) { + if (pdev->cap.supported_bands & WMI_HOST_WLAN_5GHZ_CAP) { arg.chains[NL80211_BAND_5GHZ].tx = ar->num_tx_chains; arg.chains[NL80211_BAND_5GHZ].rx = ar->num_rx_chains; } @@ -3988,7 +3988,7 @@ static void ath12k_mac_bss_info_changed(struct ath12k *ar, else rateidx = ffs(info->basic_rates) - 1; - if (ar->pdev->cap.supported_bands & WMI_HOST_WLAN_5G_CAP) + if (ar->pdev->cap.supported_bands & WMI_HOST_WLAN_5GHZ_CAP) rateidx += ATH12K_MAC_FIRST_OFDM_RATE_IDX; bitrate = ath12k_legacy_rates[rateidx].bitrate; @@ -4162,9 +4162,9 @@ ath12k_mac_select_scan_device(struct ieee80211_hw *hw, * split the hw request and perform multiple scans */ - if (center_freq < ATH12K_MIN_5G_FREQ) + if (center_freq < ATH12K_MIN_5GHZ_FREQ) band = NL80211_BAND_2GHZ; - else if (center_freq < ATH12K_MIN_6G_FREQ) + else if (center_freq < ATH12K_MIN_6GHZ_FREQ) band = NL80211_BAND_5GHZ; else band = NL80211_BAND_6GHZ; @@ -6474,7 +6474,7 @@ static void ath12k_mac_setup_ht_vht_cap(struct ath12k *ar, rate_cap_tx_chainmask = ar->cfg_tx_chainmask >> cap->tx_chain_mask_shift; rate_cap_rx_chainmask = ar->cfg_rx_chainmask >> cap->rx_chain_mask_shift; - if (cap->supported_bands & WMI_HOST_WLAN_2G_CAP) { + if (cap->supported_bands & WMI_HOST_WLAN_2GHZ_CAP) { band = &ar->mac.sbands[NL80211_BAND_2GHZ]; ht_cap = cap->band[NL80211_BAND_2GHZ].ht_cap_info; if (ht_cap_info) @@ -6483,7 +6483,7 @@ static void ath12k_mac_setup_ht_vht_cap(struct ath12k *ar, rate_cap_rx_chainmask); } - if (cap->supported_bands & WMI_HOST_WLAN_5G_CAP && + if (cap->supported_bands & WMI_HOST_WLAN_5GHZ_CAP && (ar->ab->hw_params->single_pdev_only || !ar->supports_6ghz)) { band = &ar->mac.sbands[NL80211_BAND_5GHZ]; @@ -6892,7 +6892,7 @@ static void ath12k_mac_setup_sband_iftype_data(struct ath12k *ar, enum nl80211_band band; int count; - if (cap->supported_bands & WMI_HOST_WLAN_2G_CAP) { + if (cap->supported_bands & WMI_HOST_WLAN_2GHZ_CAP) { band = NL80211_BAND_2GHZ; count = ath12k_mac_copy_sband_iftype_data(ar, cap, ar->mac.iftype[band], @@ -6902,7 +6902,7 @@ static void ath12k_mac_setup_sband_iftype_data(struct ath12k *ar, count); } - if (cap->supported_bands & WMI_HOST_WLAN_5G_CAP) { + if (cap->supported_bands & WMI_HOST_WLAN_5GHZ_CAP) { band = NL80211_BAND_5GHZ; count = ath12k_mac_copy_sband_iftype_data(ar, cap, ar->mac.iftype[band], @@ -6912,7 +6912,7 @@ static void ath12k_mac_setup_sband_iftype_data(struct ath12k *ar, count); } - if (cap->supported_bands & WMI_HOST_WLAN_5G_CAP && + if (cap->supported_bands & WMI_HOST_WLAN_5GHZ_CAP && ar->supports_6ghz) { band = NL80211_BAND_6GHZ; count = ath12k_mac_copy_sband_iftype_data(ar, cap, @@ -7900,15 +7900,15 @@ static int ath12k_mac_setup_vdev_create_arg(struct ath12k_link_vif *arvif, return ret; } - if (pdev->cap.supported_bands & WMI_HOST_WLAN_2G_CAP) { + if (pdev->cap.supported_bands & WMI_HOST_WLAN_2GHZ_CAP) { arg->chains[NL80211_BAND_2GHZ].tx = ar->num_tx_chains; arg->chains[NL80211_BAND_2GHZ].rx = ar->num_rx_chains; } - if (pdev->cap.supported_bands & WMI_HOST_WLAN_5G_CAP) { + if (pdev->cap.supported_bands & WMI_HOST_WLAN_5GHZ_CAP) { arg->chains[NL80211_BAND_5GHZ].tx = ar->num_tx_chains; arg->chains[NL80211_BAND_5GHZ].rx = ar->num_rx_chains; } - if (pdev->cap.supported_bands & WMI_HOST_WLAN_5G_CAP && + if (pdev->cap.supported_bands & WMI_HOST_WLAN_5GHZ_CAP && ar->supports_6ghz) { arg->chains[NL80211_BAND_6GHZ].tx = ar->num_tx_chains; arg->chains[NL80211_BAND_6GHZ].rx = ar->num_rx_chains; @@ -7937,7 +7937,7 @@ ath12k_mac_prepare_he_mode(struct ath12k_pdev *pdev, u32 viftype) u32 *hecap_phy_ptr = NULL; u32 hemode; - if (pdev->cap.supported_bands & WMI_HOST_WLAN_2G_CAP) + if (pdev->cap.supported_bands & WMI_HOST_WLAN_2GHZ_CAP) cap_band = &pdev_cap->band[NL80211_BAND_2GHZ]; else cap_band = &pdev_cap->band[NL80211_BAND_5GHZ]; @@ -10637,10 +10637,10 @@ static u32 ath12k_get_phy_id(struct ath12k *ar, u32 band) struct ath12k_pdev *pdev = ar->pdev; struct ath12k_pdev_cap *pdev_cap = &pdev->cap; - if (band == WMI_HOST_WLAN_2G_CAP) + if (band == WMI_HOST_WLAN_2GHZ_CAP) return pdev_cap->band[NL80211_BAND_2GHZ].phy_id; - if (band == WMI_HOST_WLAN_5G_CAP) + if (band == WMI_HOST_WLAN_5GHZ_CAP) return pdev_cap->band[NL80211_BAND_5GHZ].phy_id; ath12k_warn(ar->ab, "unsupported phy cap:%d\n", band); @@ -10665,7 +10665,7 @@ static int ath12k_mac_setup_channels_rates(struct ath12k *ar, reg_cap = &ar->ab->hal_reg_cap[ar->pdev_idx]; - if (supported_bands & WMI_HOST_WLAN_2G_CAP) { + if (supported_bands & WMI_HOST_WLAN_2GHZ_CAP) { channels = kmemdup(ath12k_2ghz_channels, sizeof(ath12k_2ghz_channels), GFP_KERNEL); @@ -10681,7 +10681,7 @@ static int ath12k_mac_setup_channels_rates(struct ath12k *ar, bands[NL80211_BAND_2GHZ] = band; if (ar->ab->hw_params->single_pdev_only) { - phy_id = ath12k_get_phy_id(ar, WMI_HOST_WLAN_2G_CAP); + phy_id = ath12k_get_phy_id(ar, WMI_HOST_WLAN_2GHZ_CAP); reg_cap = &ar->ab->hal_reg_cap[phy_id]; } ath12k_mac_update_ch_list(ar, band, @@ -10689,8 +10689,8 @@ static int ath12k_mac_setup_channels_rates(struct ath12k *ar, reg_cap->high_2ghz_chan); } - if (supported_bands & WMI_HOST_WLAN_5G_CAP) { - if (reg_cap->high_5ghz_chan >= ATH12K_MIN_6G_FREQ) { + if (supported_bands & WMI_HOST_WLAN_5GHZ_CAP) { + if (reg_cap->high_5ghz_chan >= ATH12K_MIN_6GHZ_FREQ) { channels = kmemdup(ath12k_6ghz_channels, sizeof(ath12k_6ghz_channels), GFP_KERNEL); if (!channels) { @@ -10712,7 +10712,7 @@ static int ath12k_mac_setup_channels_rates(struct ath12k *ar, ah->use_6ghz_regd = true; } - if (reg_cap->low_5ghz_chan < ATH12K_MIN_6G_FREQ) { + if (reg_cap->low_5ghz_chan < ATH12K_MIN_6GHZ_FREQ) { channels = kmemdup(ath12k_5ghz_channels, sizeof(ath12k_5ghz_channels), GFP_KERNEL); @@ -10731,7 +10731,7 @@ static int ath12k_mac_setup_channels_rates(struct ath12k *ar, bands[NL80211_BAND_5GHZ] = band; if (ar->ab->hw_params->single_pdev_only) { - phy_id = ath12k_get_phy_id(ar, WMI_HOST_WLAN_5G_CAP); + phy_id = ath12k_get_phy_id(ar, WMI_HOST_WLAN_5GHZ_CAP); reg_cap = &ar->ab->hal_reg_cap[phy_id]; } diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 6374e86f89f3d5..d0705880cd3c9e 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -520,10 +520,10 @@ ath12k_pull_mac_phy_cap_svc_ready_ext(struct ath12k_wmi_pdev *wmi_handle, * band to band for a single radio, need to see how this should be * handled. */ - if (le32_to_cpu(mac_caps->supported_bands) & WMI_HOST_WLAN_2G_CAP) { + if (le32_to_cpu(mac_caps->supported_bands) & WMI_HOST_WLAN_2GHZ_CAP) { pdev_cap->tx_chain_mask = le32_to_cpu(mac_caps->tx_chain_mask_2g); pdev_cap->rx_chain_mask = le32_to_cpu(mac_caps->rx_chain_mask_2g); - } else if (le32_to_cpu(mac_caps->supported_bands) & WMI_HOST_WLAN_5G_CAP) { + } else if (le32_to_cpu(mac_caps->supported_bands) & WMI_HOST_WLAN_5GHZ_CAP) { pdev_cap->vht_cap = le32_to_cpu(mac_caps->vht_cap_info_5g); pdev_cap->vht_mcs = le32_to_cpu(mac_caps->vht_supp_mcs_5g); pdev_cap->he_mcs = le32_to_cpu(mac_caps->he_supp_mcs_5g); @@ -546,7 +546,7 @@ ath12k_pull_mac_phy_cap_svc_ready_ext(struct ath12k_wmi_pdev *wmi_handle, pdev_cap->rx_chain_mask_shift = find_first_bit((unsigned long *)&pdev_cap->rx_chain_mask, 32); - if (le32_to_cpu(mac_caps->supported_bands) & WMI_HOST_WLAN_2G_CAP) { + if (le32_to_cpu(mac_caps->supported_bands) & WMI_HOST_WLAN_2GHZ_CAP) { cap_band = &pdev_cap->band[NL80211_BAND_2GHZ]; cap_band->phy_id = le32_to_cpu(mac_caps->phy_id); cap_band->max_bw_supported = le32_to_cpu(mac_caps->max_bw_supported_2g); @@ -566,7 +566,7 @@ ath12k_pull_mac_phy_cap_svc_ready_ext(struct ath12k_wmi_pdev *wmi_handle, le32_to_cpu(mac_caps->he_ppet2g.ppet16_ppet8_ru3_ru0[i]); } - if (le32_to_cpu(mac_caps->supported_bands) & WMI_HOST_WLAN_5G_CAP) { + if (le32_to_cpu(mac_caps->supported_bands) & WMI_HOST_WLAN_5GHZ_CAP) { cap_band = &pdev_cap->band[NL80211_BAND_5GHZ]; cap_band->phy_id = le32_to_cpu(mac_caps->phy_id); cap_band->max_bw_supported = @@ -3646,15 +3646,15 @@ ath12k_fill_band_to_mac_param(struct ath12k_base *soc, arg[i].pdev_id = pdev->pdev_id; switch (pdev->cap.supported_bands) { - case WMI_HOST_WLAN_2G_5G_CAP: + case WMI_HOST_WLAN_2GHZ_5GHZ_CAP: arg[i].start_freq = hal_reg_cap->low_2ghz_chan; arg[i].end_freq = hal_reg_cap->high_5ghz_chan; break; - case WMI_HOST_WLAN_2G_CAP: + case WMI_HOST_WLAN_2GHZ_CAP: arg[i].start_freq = hal_reg_cap->low_2ghz_chan; arg[i].end_freq = hal_reg_cap->high_2ghz_chan; break; - case WMI_HOST_WLAN_5G_CAP: + case WMI_HOST_WLAN_5GHZ_CAP: arg[i].start_freq = hal_reg_cap->low_5ghz_chan; arg[i].end_freq = hal_reg_cap->high_5ghz_chan; break; @@ -4699,7 +4699,7 @@ ath12k_wmi_tlv_mac_phy_caps_ext_parse(struct ath12k_base *ab, bands = pdev->cap.supported_bands; } - if (bands & WMI_HOST_WLAN_2G_CAP) { + if (bands & WMI_HOST_WLAN_2GHZ_CAP) { ath12k_wmi_eht_caps_parse(pdev, NL80211_BAND_2GHZ, caps->eht_cap_mac_info_2ghz, caps->eht_cap_phy_info_2ghz, @@ -4708,7 +4708,7 @@ ath12k_wmi_tlv_mac_phy_caps_ext_parse(struct ath12k_base *ab, caps->eht_cap_info_internal); } - if (bands & WMI_HOST_WLAN_5G_CAP) { + if (bands & WMI_HOST_WLAN_5GHZ_CAP) { ath12k_wmi_eht_caps_parse(pdev, NL80211_BAND_5GHZ, caps->eht_cap_mac_info_5ghz, caps->eht_cap_phy_info_5ghz, @@ -4922,7 +4922,7 @@ static u8 ath12k_wmi_ignore_num_extra_rules(struct ath12k_wmi_reg_rule_ext_param for (count = 0; count < num_reg_rules; count++) { start_freq = le32_get_bits(rule[count].freq_info, REG_RULE_START_FREQ); - if (start_freq >= ATH12K_MIN_6G_FREQ) + if (start_freq >= ATH12K_MIN_6GHZ_FREQ) num_invalid_5ghz_rules++; } @@ -4992,9 +4992,9 @@ static int ath12k_pull_reg_chan_list_ext_update_ev(struct ath12k_base *ab, for (i = 0; i < WMI_REG_CURRENT_MAX_AP_TYPE; i++) { num_6g_reg_rules_ap[i] = reg_info->num_6g_reg_rules_ap[i]; - if (num_6g_reg_rules_ap[i] > MAX_6G_REG_RULES) { + if (num_6g_reg_rules_ap[i] > MAX_6GHZ_REG_RULES) { ath12k_warn(ab, "Num 6G reg rules for AP mode(%d) exceeds max limit (num_6g_reg_rules_ap: %d, max_rules: %d)\n", - i, num_6g_reg_rules_ap[i], MAX_6G_REG_RULES); + i, num_6g_reg_rules_ap[i], MAX_6GHZ_REG_RULES); kfree(tb); return -EINVAL; } @@ -5015,9 +5015,9 @@ static int ath12k_pull_reg_chan_list_ext_update_ev(struct ath12k_base *ab, reg_info->num_6g_reg_rules_cl[WMI_REG_VLP_AP][i]; total_reg_rules += num_6g_reg_rules_cl[WMI_REG_VLP_AP][i]; - if (num_6g_reg_rules_cl[WMI_REG_INDOOR_AP][i] > MAX_6G_REG_RULES || - num_6g_reg_rules_cl[WMI_REG_STD_POWER_AP][i] > MAX_6G_REG_RULES || - num_6g_reg_rules_cl[WMI_REG_VLP_AP][i] > MAX_6G_REG_RULES) { + if (num_6g_reg_rules_cl[WMI_REG_INDOOR_AP][i] > MAX_6GHZ_REG_RULES || + num_6g_reg_rules_cl[WMI_REG_STD_POWER_AP][i] > MAX_6GHZ_REG_RULES || + num_6g_reg_rules_cl[WMI_REG_VLP_AP][i] > MAX_6GHZ_REG_RULES) { ath12k_warn(ab, "Num 6g client reg rules exceeds max limit, for client(type: %d)\n", i); kfree(tb); @@ -6317,13 +6317,13 @@ static void ath12k_mgmt_rx_event(struct ath12k_base *ab, struct sk_buff *skb) if (rx_ev.status & WMI_RX_STATUS_ERR_MIC) status->flag |= RX_FLAG_MMIC_ERROR; - if (rx_ev.chan_freq >= ATH12K_MIN_6G_FREQ && - rx_ev.chan_freq <= ATH12K_MAX_6G_FREQ) { + if (rx_ev.chan_freq >= ATH12K_MIN_6GHZ_FREQ && + rx_ev.chan_freq <= ATH12K_MAX_6GHZ_FREQ) { status->band = NL80211_BAND_6GHZ; status->freq = rx_ev.chan_freq; } else if (rx_ev.channel >= 1 && rx_ev.channel <= 14) { status->band = NL80211_BAND_2GHZ; - } else if (rx_ev.channel >= 36 && rx_ev.channel <= ATH12K_MAX_5G_CHAN) { + } else if (rx_ev.channel >= 36 && rx_ev.channel <= ATH12K_MAX_5GHZ_CHAN) { status->band = NL80211_BAND_5GHZ; } else { /* Shouldn't happen unless list of advertised channels to diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h index 1ba33e30ddd279..be4ac91dd34f50 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.h +++ b/drivers/net/wireless/ath/ath12k/wmi.h @@ -216,9 +216,9 @@ enum wmi_host_hw_mode_priority { }; enum WMI_HOST_WLAN_BAND { - WMI_HOST_WLAN_2G_CAP = 1, - WMI_HOST_WLAN_5G_CAP = 2, - WMI_HOST_WLAN_2G_5G_CAP = 3, + WMI_HOST_WLAN_2GHZ_CAP = 1, + WMI_HOST_WLAN_5GHZ_CAP = 2, + WMI_HOST_WLAN_2GHZ_5GHZ_CAP = 3, }; enum wmi_cmd_group { @@ -2690,8 +2690,8 @@ enum wmi_channel_width { * 2 - index for 160 MHz, first 3 bytes valid * 3 - index for 320 MHz, first 3 bytes valid */ -#define WMI_MAX_EHT_SUPP_MCS_2G_SIZE 2 -#define WMI_MAX_EHT_SUPP_MCS_5G_SIZE 4 +#define WMI_MAX_EHT_SUPP_MCS_2GHZ_SIZE 2 +#define WMI_MAX_EHT_SUPP_MCS_5GHZ_SIZE 4 #define WMI_EHTCAP_TXRX_MCS_NSS_IDX_80 0 #define WMI_EHTCAP_TXRX_MCS_NSS_IDX_160 1 @@ -2730,8 +2730,8 @@ struct ath12k_wmi_caps_ext_params { struct ath12k_wmi_ppe_threshold_params eht_ppet_2ghz; struct ath12k_wmi_ppe_threshold_params eht_ppet_5ghz; __le32 eht_cap_info_internal; - __le32 eht_supp_mcs_ext_2ghz[WMI_MAX_EHT_SUPP_MCS_2G_SIZE]; - __le32 eht_supp_mcs_ext_5ghz[WMI_MAX_EHT_SUPP_MCS_5G_SIZE]; + __le32 eht_supp_mcs_ext_2ghz[WMI_MAX_EHT_SUPP_MCS_2GHZ_SIZE]; + __le32 eht_supp_mcs_ext_5ghz[WMI_MAX_EHT_SUPP_MCS_5GHZ_SIZE]; __le32 eml_capability; __le32 mld_capability; } __packed; @@ -4108,7 +4108,7 @@ struct ath12k_wmi_eht_rate_set_params { #define MAX_REG_RULES 10 #define REG_ALPHA2_LEN 2 -#define MAX_6G_REG_RULES 5 +#define MAX_6GHZ_REG_RULES 5 enum wmi_start_event_param { WMI_VDEV_START_RESP_EVENT = 0, From b05e7e6ada85c233be49ad65affe621068a1f9fa Mon Sep 17 00:00:00 2001 From: P Praneesh Date: Mon, 24 Mar 2025 11:55:13 +0530 Subject: [PATCH 2267/2924] wifi: ath12k: change the status update in the monitor Rx [ Upstream commit 5393dcb4520911f2b4a980e7e3c2c0de2bbf9ec7 ] Currently, in the monitor Rx path, status is filled from the RX TLV header present in the MSDU data. This logic is inherited from ath11k. However, in the ath12k 802.11be hardware, the Rx TLV header is not present in the MSDU data. This information is reported under various TLV tags. Therefore, avoid the existing status filling by accumulating the needed information in the PPDU information structure and fill the status. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: P Praneesh Tested-by: Nicolas Escande Reviewed-by: Vasanthakumar Thiagarajan Signed-off-by: Karthikeyan Periyasamy Link: https://patch.msgid.link/20250324062518.2752822-6-quic_periyasa@quicinc.com Signed-off-by: Jeff Johnson Stable-dep-of: f5d6b15d9503 ("wifi: ath12k: fix wrong handling of CCMP256 and GCMP ciphers") Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/core.h | 16 ++- drivers/net/wireless/ath/ath12k/dp_mon.c | 138 ++++++++++++++++++++++- drivers/net/wireless/ath/ath12k/hal_rx.h | 5 +- 3 files changed, 151 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index 2d91a6a4750ca0..2ee83517eadc72 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -533,9 +533,19 @@ struct ath12k_sta { enum ieee80211_sta_state state; }; -#define ATH12K_MIN_5GHZ_FREQ 4150 -#define ATH12K_MIN_6GHZ_FREQ 5925 -#define ATH12K_MAX_6GHZ_FREQ 7115 +#define ATH12K_HALF_20MHZ_BW 10 +#define ATH12K_2GHZ_MIN_CENTER 2412 +#define ATH12K_2GHZ_MAX_CENTER 2484 +#define ATH12K_5GHZ_MIN_CENTER 4900 +#define ATH12K_5GHZ_MAX_CENTER 5920 +#define ATH12K_6GHZ_MIN_CENTER 5935 +#define ATH12K_6GHZ_MAX_CENTER 7115 +#define ATH12K_MIN_2GHZ_FREQ (ATH12K_2GHZ_MIN_CENTER - ATH12K_HALF_20MHZ_BW - 1) +#define ATH12K_MAX_2GHZ_FREQ (ATH12K_2GHZ_MAX_CENTER + ATH12K_HALF_20MHZ_BW + 1) +#define ATH12K_MIN_5GHZ_FREQ (ATH12K_5GHZ_MIN_CENTER - ATH12K_HALF_20MHZ_BW) +#define ATH12K_MAX_5GHZ_FREQ (ATH12K_5GHZ_MAX_CENTER + ATH12K_HALF_20MHZ_BW) +#define ATH12K_MIN_6GHZ_FREQ (ATH12K_6GHZ_MIN_CENTER - ATH12K_HALF_20MHZ_BW) +#define ATH12K_MAX_6GHZ_FREQ (ATH12K_6GHZ_MAX_CENTER + ATH12K_HALF_20MHZ_BW) #define ATH12K_NUM_CHANS 101 #define ATH12K_MAX_5GHZ_CHAN 173 diff --git a/drivers/net/wireless/ath/ath12k/dp_mon.c b/drivers/net/wireless/ath/ath12k/dp_mon.c index 6fc3b921176561..9013c6ce942576 100644 --- a/drivers/net/wireless/ath/ath12k/dp_mon.c +++ b/drivers/net/wireless/ath/ath12k/dp_mon.c @@ -1700,18 +1700,128 @@ static void ath12k_dp_mon_rx_msdus_set_payload(struct ath12k *ar, skb_pull(head_msdu, rx_pkt_offset + l2_hdr_offset); } +static void +ath12k_dp_mon_fill_rx_stats_info(struct ath12k *ar, + struct hal_rx_mon_ppdu_info *ppdu_info, + struct ieee80211_rx_status *rx_status) +{ + u32 center_freq = ppdu_info->freq; + + rx_status->freq = center_freq; + rx_status->bw = ath12k_mac_bw_to_mac80211_bw(ppdu_info->bw); + rx_status->nss = ppdu_info->nss; + rx_status->rate_idx = 0; + rx_status->encoding = RX_ENC_LEGACY; + rx_status->flag |= RX_FLAG_NO_SIGNAL_VAL; + + if (center_freq >= ATH12K_MIN_6GHZ_FREQ && + center_freq <= ATH12K_MAX_6GHZ_FREQ) { + rx_status->band = NL80211_BAND_6GHZ; + } else if (center_freq >= ATH12K_MIN_2GHZ_FREQ && + center_freq <= ATH12K_MAX_2GHZ_FREQ) { + rx_status->band = NL80211_BAND_2GHZ; + } else if (center_freq >= ATH12K_MIN_5GHZ_FREQ && + center_freq <= ATH12K_MAX_5GHZ_FREQ) { + rx_status->band = NL80211_BAND_5GHZ; + } else { + rx_status->band = NUM_NL80211_BANDS; + } +} + +static void +ath12k_dp_mon_fill_rx_rate(struct ath12k *ar, + struct hal_rx_mon_ppdu_info *ppdu_info, + struct ieee80211_rx_status *rx_status) +{ + struct ieee80211_supported_band *sband; + enum rx_msdu_start_pkt_type pkt_type; + u8 rate_mcs, nss, sgi; + bool is_cck; + + pkt_type = ppdu_info->preamble_type; + rate_mcs = ppdu_info->rate; + nss = ppdu_info->nss; + sgi = ppdu_info->gi; + + switch (pkt_type) { + case RX_MSDU_START_PKT_TYPE_11A: + case RX_MSDU_START_PKT_TYPE_11B: + is_cck = (pkt_type == RX_MSDU_START_PKT_TYPE_11B); + if (rx_status->band < NUM_NL80211_BANDS) { + sband = &ar->mac.sbands[rx_status->band]; + rx_status->rate_idx = ath12k_mac_hw_rate_to_idx(sband, rate_mcs, + is_cck); + } + break; + case RX_MSDU_START_PKT_TYPE_11N: + rx_status->encoding = RX_ENC_HT; + if (rate_mcs > ATH12K_HT_MCS_MAX) { + ath12k_warn(ar->ab, + "Received with invalid mcs in HT mode %d\n", + rate_mcs); + break; + } + rx_status->rate_idx = rate_mcs + (8 * (nss - 1)); + if (sgi) + rx_status->enc_flags |= RX_ENC_FLAG_SHORT_GI; + break; + case RX_MSDU_START_PKT_TYPE_11AC: + rx_status->encoding = RX_ENC_VHT; + rx_status->rate_idx = rate_mcs; + if (rate_mcs > ATH12K_VHT_MCS_MAX) { + ath12k_warn(ar->ab, + "Received with invalid mcs in VHT mode %d\n", + rate_mcs); + break; + } + if (sgi) + rx_status->enc_flags |= RX_ENC_FLAG_SHORT_GI; + break; + case RX_MSDU_START_PKT_TYPE_11AX: + rx_status->rate_idx = rate_mcs; + if (rate_mcs > ATH12K_HE_MCS_MAX) { + ath12k_warn(ar->ab, + "Received with invalid mcs in HE mode %d\n", + rate_mcs); + break; + } + rx_status->encoding = RX_ENC_HE; + rx_status->he_gi = ath12k_he_gi_to_nl80211_he_gi(sgi); + break; + case RX_MSDU_START_PKT_TYPE_11BE: + rx_status->rate_idx = rate_mcs; + if (rate_mcs > ATH12K_EHT_MCS_MAX) { + ath12k_warn(ar->ab, + "Received with invalid mcs in EHT mode %d\n", + rate_mcs); + break; + } + rx_status->encoding = RX_ENC_EHT; + rx_status->he_gi = ath12k_he_gi_to_nl80211_he_gi(sgi); + break; + default: + ath12k_dbg(ar->ab, ATH12K_DBG_DATA, + "monitor receives invalid preamble type %d", + pkt_type); + break; + } +} + static struct sk_buff * ath12k_dp_mon_rx_merg_msdus(struct ath12k *ar, struct dp_mon_mpdu *mon_mpdu, + struct hal_rx_mon_ppdu_info *ppdu_info, struct ieee80211_rx_status *rxs) { struct ath12k_base *ab = ar->ab; struct sk_buff *msdu, *mpdu_buf, *prev_buf, *head_frag_list; struct sk_buff *head_msdu, *tail_msdu; - struct hal_rx_desc *rx_desc, *tail_rx_desc; + struct hal_rx_desc *rx_desc; u8 *hdr_desc, *dest, decap_format = mon_mpdu->decap_format; struct ieee80211_hdr_3addr *wh; + struct ieee80211_channel *channel; u32 frag_list_sum_len = 0; + u8 channel_num = ppdu_info->chan_num; mpdu_buf = NULL; head_msdu = mon_mpdu->head; @@ -1720,9 +1830,29 @@ ath12k_dp_mon_rx_merg_msdus(struct ath12k *ar, if (!head_msdu) goto err_merge_fail; - tail_rx_desc = (struct hal_rx_desc *)tail_msdu->data; + ath12k_dp_mon_fill_rx_stats_info(ar, ppdu_info, rxs); + + if (unlikely(rxs->band == NUM_NL80211_BANDS || + !ath12k_ar_to_hw(ar)->wiphy->bands[rxs->band])) { + ath12k_dbg(ar->ab, ATH12K_DBG_DATA, + "sband is NULL for status band %d channel_num %d center_freq %d pdev_id %d\n", + rxs->band, channel_num, ppdu_info->freq, ar->pdev_idx); + + spin_lock_bh(&ar->data_lock); + channel = ar->rx_channel; + if (channel) { + rxs->band = channel->band; + channel_num = + ieee80211_frequency_to_channel(channel->center_freq); + } + spin_unlock_bh(&ar->data_lock); + } + + if (rxs->band < NUM_NL80211_BANDS) + rxs->freq = ieee80211_channel_to_frequency(channel_num, + rxs->band); - ath12k_dp_rx_h_ppdu(ar, tail_rx_desc, rxs); + ath12k_dp_mon_fill_rx_rate(ar, ppdu_info, rxs); if (decap_format == DP_RX_DECAP_TYPE_RAW) { ath12k_dp_mon_rx_msdus_set_payload(ar, head_msdu, tail_msdu); @@ -2038,7 +2168,7 @@ static int ath12k_dp_mon_rx_deliver(struct ath12k *ar, struct ieee80211_rx_status *rxs = &dp->rx_status; u8 decap = DP_RX_DECAP_TYPE_RAW; - mon_skb = ath12k_dp_mon_rx_merg_msdus(ar, mon_mpdu, rxs); + mon_skb = ath12k_dp_mon_rx_merg_msdus(ar, mon_mpdu, ppduinfo, rxs); if (!mon_skb) goto mon_deliver_fail; diff --git a/drivers/net/wireless/ath/ath12k/hal_rx.h b/drivers/net/wireless/ath/ath12k/hal_rx.h index 6f10e4222ba6e6..c753eb2a03ad24 100644 --- a/drivers/net/wireless/ath/ath12k/hal_rx.h +++ b/drivers/net/wireless/ath/ath12k/hal_rx.h @@ -509,7 +509,10 @@ struct hal_rx_mpdu_start { struct hal_rx_msdu_end { __le32 info0; - __le32 rsvd0[18]; + __le32 rsvd0[9]; + __le16 info00; + __le16 info01; + __le32 rsvd00[8]; __le32 info1; __le32 rsvd1[10]; __le32 info2; From 3ff947186f8fef9e9dfc97625ee23eda6a768b0e Mon Sep 17 00:00:00 2001 From: P Praneesh Date: Wed, 2 Apr 2025 23:59:16 +0530 Subject: [PATCH 2268/2924] wifi: ath12k: add rx_info to capture required field from rx descriptor [ Upstream commit e88e6e3c9ada84ceed3fa223ce11af94fcaf3ad3 ] In ath12k_dp_rx_h_mpdu(), as part of undecap to native wifi mode, the rx descriptor memory is getting overwritten. After this function call, all the rx descriptor related memory accesses give invalid values. To handle this scenario, introduce a new structure ath12k_dp_rx_info which pre-caches all the required fields from the rx descriptor before calling ath12k_dp_rx_h_ppdu(). This rx_info structure will be used in the next patch. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Co-developed-by: Karthikeyan Periyasamy Signed-off-by: Karthikeyan Periyasamy Signed-off-by: P Praneesh Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250402182917.2715596-2-praneesh.p@oss.qualcomm.com Signed-off-by: Jeff Johnson Stable-dep-of: f5d6b15d9503 ("wifi: ath12k: fix wrong handling of CCMP256 and GCMP ciphers") Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/dp_rx.c | 80 ++++++++++++++++++------- drivers/net/wireless/ath/ath12k/dp_rx.h | 18 ++++++ 2 files changed, 78 insertions(+), 20 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index eaba6982676851..dcbd97975f642b 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -2405,6 +2405,30 @@ static void ath12k_dp_rx_h_rate(struct ath12k *ar, struct hal_rx_desc *rx_desc, } } +static void ath12k_dp_rx_h_fetch_info(struct ath12k_base *ab, + struct hal_rx_desc *rx_desc, + struct ath12k_dp_rx_info *rx_info) +{ + rx_info->ip_csum_fail = ath12k_dp_rx_h_ip_cksum_fail(ab, rx_desc); + rx_info->l4_csum_fail = ath12k_dp_rx_h_l4_cksum_fail(ab, rx_desc); + rx_info->is_mcbc = ath12k_dp_rx_h_is_da_mcbc(ab, rx_desc); + rx_info->decap_type = ath12k_dp_rx_h_decap_type(ab, rx_desc); + rx_info->pkt_type = ath12k_dp_rx_h_pkt_type(ab, rx_desc); + rx_info->sgi = ath12k_dp_rx_h_sgi(ab, rx_desc); + rx_info->rate_mcs = ath12k_dp_rx_h_rate_mcs(ab, rx_desc); + rx_info->bw = ath12k_dp_rx_h_rx_bw(ab, rx_desc); + rx_info->nss = ath12k_dp_rx_h_nss(ab, rx_desc); + rx_info->tid = ath12k_dp_rx_h_tid(ab, rx_desc); + rx_info->peer_id = ath12k_dp_rx_h_peer_id(ab, rx_desc); + rx_info->phy_meta_data = ath12k_dp_rx_h_freq(ab, rx_desc); + + if (ath12k_dp_rxdesc_mac_addr2_valid(ab, rx_desc)) { + ether_addr_copy(rx_info->addr2, + ath12k_dp_rxdesc_get_mpdu_start_addr2(ab, rx_desc)); + rx_info->addr2_present = true; + } +} + void ath12k_dp_rx_h_ppdu(struct ath12k *ar, struct hal_rx_desc *rx_desc, struct ieee80211_rx_status *rx_status) { @@ -2567,7 +2591,7 @@ static bool ath12k_dp_rx_check_nwifi_hdr_len_valid(struct ath12k_base *ab, static int ath12k_dp_rx_process_msdu(struct ath12k *ar, struct sk_buff *msdu, struct sk_buff_head *msdu_list, - struct ieee80211_rx_status *rx_status) + struct ath12k_dp_rx_info *rx_info) { struct ath12k_base *ab = ar->ab; struct hal_rx_desc *rx_desc, *lrx_desc; @@ -2627,10 +2651,11 @@ static int ath12k_dp_rx_process_msdu(struct ath12k *ar, goto free_out; } - ath12k_dp_rx_h_ppdu(ar, rx_desc, rx_status); - ath12k_dp_rx_h_mpdu(ar, msdu, rx_desc, rx_status); + ath12k_dp_rx_h_fetch_info(ab, rx_desc, rx_info); + ath12k_dp_rx_h_ppdu(ar, rx_desc, rx_info->rx_status); + ath12k_dp_rx_h_mpdu(ar, msdu, rx_desc, rx_info->rx_status); - rx_status->flag |= RX_FLAG_SKIP_MONITOR | RX_FLAG_DUP_VALIDATED; + rx_info->rx_status->flag |= RX_FLAG_SKIP_MONITOR | RX_FLAG_DUP_VALIDATED; return 0; @@ -2650,12 +2675,16 @@ static void ath12k_dp_rx_process_received_packets(struct ath12k_base *ab, struct ath12k *ar; struct ath12k_hw_link *hw_links = ag->hw_links; struct ath12k_base *partner_ab; + struct ath12k_dp_rx_info rx_info; u8 hw_link_id, pdev_id; int ret; if (skb_queue_empty(msdu_list)) return; + rx_info.addr2_present = false; + rx_info.rx_status = &rx_status; + rcu_read_lock(); while ((msdu = __skb_dequeue(msdu_list))) { @@ -2676,7 +2705,7 @@ static void ath12k_dp_rx_process_received_packets(struct ath12k_base *ab, continue; } - ret = ath12k_dp_rx_process_msdu(ar, msdu, msdu_list, &rx_status); + ret = ath12k_dp_rx_process_msdu(ar, msdu, msdu_list, &rx_info); if (ret) { ath12k_dbg(ab, ATH12K_DBG_DATA, "Unable to process msdu %d", ret); @@ -2977,6 +3006,7 @@ static int ath12k_dp_rx_h_verify_tkip_mic(struct ath12k *ar, struct ath12k_peer struct ieee80211_rx_status *rxs = IEEE80211_SKB_RXCB(msdu); struct ieee80211_key_conf *key_conf; struct ieee80211_hdr *hdr; + struct ath12k_dp_rx_info rx_info; u8 mic[IEEE80211_CCMP_MIC_LEN]; int head_len, tail_len, ret; size_t data_len; @@ -2987,6 +3017,9 @@ static int ath12k_dp_rx_h_verify_tkip_mic(struct ath12k *ar, struct ath12k_peer if (ath12k_dp_rx_h_enctype(ab, rx_desc) != HAL_ENCRYPT_TYPE_TKIP_MIC) return 0; + rx_info.addr2_present = false; + rx_info.rx_status = rxs; + hdr = (struct ieee80211_hdr *)(msdu->data + hal_rx_desc_sz); hdr_len = ieee80211_hdrlen(hdr->frame_control); head_len = hdr_len + hal_rx_desc_sz + IEEE80211_TKIP_IV_LEN; @@ -3013,6 +3046,8 @@ static int ath12k_dp_rx_h_verify_tkip_mic(struct ath12k *ar, struct ath12k_peer (ATH12K_SKB_RXCB(msdu))->is_first_msdu = true; (ATH12K_SKB_RXCB(msdu))->is_last_msdu = true; + ath12k_dp_rx_h_fetch_info(ab, rx_desc, &rx_info); + rxs->flag |= RX_FLAG_MMIC_ERROR | RX_FLAG_MMIC_STRIPPED | RX_FLAG_IV_STRIPPED | RX_FLAG_DECRYPTED; skb_pull(msdu, hal_rx_desc_sz); @@ -3709,7 +3744,7 @@ static void ath12k_dp_rx_null_q_desc_sg_drop(struct ath12k *ar, } static int ath12k_dp_rx_h_null_q_desc(struct ath12k *ar, struct sk_buff *msdu, - struct ieee80211_rx_status *status, + struct ath12k_dp_rx_info *rx_info, struct sk_buff_head *msdu_list) { struct ath12k_base *ab = ar->ab; @@ -3765,9 +3800,9 @@ static int ath12k_dp_rx_h_null_q_desc(struct ath12k *ar, struct sk_buff *msdu, if (unlikely(!ath12k_dp_rx_check_nwifi_hdr_len_valid(ab, desc, msdu))) return -EINVAL; - ath12k_dp_rx_h_ppdu(ar, desc, status); - - ath12k_dp_rx_h_mpdu(ar, msdu, desc, status); + ath12k_dp_rx_h_fetch_info(ab, desc, rx_info); + ath12k_dp_rx_h_ppdu(ar, desc, rx_info->rx_status); + ath12k_dp_rx_h_mpdu(ar, msdu, desc, rx_info->rx_status); rxcb->tid = ath12k_dp_rx_h_tid(ab, desc); @@ -3779,7 +3814,7 @@ static int ath12k_dp_rx_h_null_q_desc(struct ath12k *ar, struct sk_buff *msdu, } static bool ath12k_dp_rx_h_reo_err(struct ath12k *ar, struct sk_buff *msdu, - struct ieee80211_rx_status *status, + struct ath12k_dp_rx_info *rx_info, struct sk_buff_head *msdu_list) { struct ath12k_skb_rxcb *rxcb = ATH12K_SKB_RXCB(msdu); @@ -3789,7 +3824,7 @@ static bool ath12k_dp_rx_h_reo_err(struct ath12k *ar, struct sk_buff *msdu, switch (rxcb->err_code) { case HAL_REO_DEST_RING_ERROR_CODE_DESC_ADDR_ZERO: - if (ath12k_dp_rx_h_null_q_desc(ar, msdu, status, msdu_list)) + if (ath12k_dp_rx_h_null_q_desc(ar, msdu, rx_info, msdu_list)) drop = true; break; case HAL_REO_DEST_RING_ERROR_CODE_PN_CHECK_FAILED: @@ -3810,7 +3845,7 @@ static bool ath12k_dp_rx_h_reo_err(struct ath12k *ar, struct sk_buff *msdu, } static bool ath12k_dp_rx_h_tkip_mic_err(struct ath12k *ar, struct sk_buff *msdu, - struct ieee80211_rx_status *status) + struct ath12k_dp_rx_info *rx_info) { struct ath12k_base *ab = ar->ab; u16 msdu_len; @@ -3839,18 +3874,18 @@ static bool ath12k_dp_rx_h_tkip_mic_err(struct ath12k *ar, struct sk_buff *msdu, if (unlikely(!ath12k_dp_rx_check_nwifi_hdr_len_valid(ab, desc, msdu))) return true; - ath12k_dp_rx_h_ppdu(ar, desc, status); + ath12k_dp_rx_h_ppdu(ar, desc, rx_info->rx_status); - status->flag |= (RX_FLAG_MMIC_STRIPPED | RX_FLAG_MMIC_ERROR | - RX_FLAG_DECRYPTED); + rx_info->rx_status->flag |= (RX_FLAG_MMIC_STRIPPED | RX_FLAG_MMIC_ERROR | + RX_FLAG_DECRYPTED); ath12k_dp_rx_h_undecap(ar, msdu, desc, - HAL_ENCRYPT_TYPE_TKIP_MIC, status, false); + HAL_ENCRYPT_TYPE_TKIP_MIC, rx_info->rx_status, false); return false; } static bool ath12k_dp_rx_h_rxdma_err(struct ath12k *ar, struct sk_buff *msdu, - struct ieee80211_rx_status *status) + struct ath12k_dp_rx_info *rx_info) { struct ath12k_base *ab = ar->ab; struct ath12k_skb_rxcb *rxcb = ATH12K_SKB_RXCB(msdu); @@ -3865,7 +3900,8 @@ static bool ath12k_dp_rx_h_rxdma_err(struct ath12k *ar, struct sk_buff *msdu, case HAL_REO_ENTR_RING_RXDMA_ECODE_TKIP_MIC_ERR: err_bitmap = ath12k_dp_rx_h_mpdu_err(ab, rx_desc); if (err_bitmap & HAL_RX_MPDU_ERR_TKIP_MIC) { - drop = ath12k_dp_rx_h_tkip_mic_err(ar, msdu, status); + ath12k_dp_rx_h_fetch_info(ab, rx_desc, rx_info); + drop = ath12k_dp_rx_h_tkip_mic_err(ar, msdu, rx_info); break; } fallthrough; @@ -3887,14 +3923,18 @@ static void ath12k_dp_rx_wbm_err(struct ath12k *ar, { struct ath12k_skb_rxcb *rxcb = ATH12K_SKB_RXCB(msdu); struct ieee80211_rx_status rxs = {0}; + struct ath12k_dp_rx_info rx_info; bool drop = true; + rx_info.addr2_present = false; + rx_info.rx_status = &rxs; + switch (rxcb->err_rel_src) { case HAL_WBM_REL_SRC_MODULE_REO: - drop = ath12k_dp_rx_h_reo_err(ar, msdu, &rxs, msdu_list); + drop = ath12k_dp_rx_h_reo_err(ar, msdu, &rx_info, msdu_list); break; case HAL_WBM_REL_SRC_MODULE_RXDMA: - drop = ath12k_dp_rx_h_rxdma_err(ar, msdu, &rxs); + drop = ath12k_dp_rx_h_rxdma_err(ar, msdu, &rx_info); break; default: /* msdu will get freed */ diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.h b/drivers/net/wireless/ath/ath12k/dp_rx.h index 88e42365a9d8bc..0e7cec42a8d13b 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.h +++ b/drivers/net/wireless/ath/ath12k/dp_rx.h @@ -65,6 +65,24 @@ struct ath12k_dp_rx_rfc1042_hdr { __be16 snap_type; } __packed; +struct ath12k_dp_rx_info { + struct ieee80211_rx_status *rx_status; + u32 phy_meta_data; + u16 peer_id; + u8 decap_type; + u8 pkt_type; + u8 sgi; + u8 rate_mcs; + u8 bw; + u8 nss; + u8 addr2[ETH_ALEN]; + u8 tid; + bool ip_csum_fail; + bool l4_csum_fail; + bool is_mcbc; + bool addr2_present; +}; + static inline u32 ath12k_he_gi_to_nl80211_he_gi(u8 sgi) { u32 ret = 0; From 124bd8cea02395a1a140f1dcc5e57c65cdd428af Mon Sep 17 00:00:00 2001 From: P Praneesh Date: Wed, 2 Apr 2025 23:59:17 +0530 Subject: [PATCH 2269/2924] wifi: ath12k: replace the usage of rx desc with rx_info [ Upstream commit bd00cc7e8a4c1048d14c9a9e9790c582119785fb ] In ath12k_dp_rx_h_mpdu(), during the undecap to native wifi mode, the rx descriptor memory is overwritten. After this function call, any subsequent accesses to rx descriptor related memory yield invalid values. Fix this by replacing instances where rx_desc was used with the pre-cached information in rx_info. This ensures that the values populated from the rx descriptor are accurate and prevents invalid memory access. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Co-developed-by: Karthikeyan Periyasamy Signed-off-by: Karthikeyan Periyasamy Signed-off-by: P Praneesh Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250402182917.2715596-3-praneesh.p@oss.qualcomm.com Signed-off-by: Jeff Johnson Stable-dep-of: f5d6b15d9503 ("wifi: ath12k: fix wrong handling of CCMP256 and GCMP ciphers") Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/dp_mon.c | 4 +- drivers/net/wireless/ath/ath12k/dp_rx.c | 107 ++++++++++------------- drivers/net/wireless/ath/ath12k/dp_rx.h | 8 +- 3 files changed, 52 insertions(+), 67 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/dp_mon.c b/drivers/net/wireless/ath/ath12k/dp_mon.c index 9013c6ce942576..600d97169f241a 100644 --- a/drivers/net/wireless/ath/ath12k/dp_mon.c +++ b/drivers/net/wireless/ath/ath12k/dp_mon.c @@ -2093,6 +2093,7 @@ static void ath12k_dp_mon_rx_deliver_msdu(struct ath12k *ar, struct napi_struct struct ieee80211_sta *pubsta = NULL; struct ath12k_peer *peer; struct ath12k_skb_rxcb *rxcb = ATH12K_SKB_RXCB(msdu); + struct ath12k_dp_rx_info rx_info; bool is_mcbc = rxcb->is_mcbc; bool is_eapol_tkip = rxcb->is_eapol; @@ -2104,7 +2105,8 @@ static void ath12k_dp_mon_rx_deliver_msdu(struct ath12k *ar, struct napi_struct } spin_lock_bh(&ar->ab->base_lock); - peer = ath12k_dp_rx_h_find_peer(ar->ab, msdu); + rx_info.addr2_present = false; + peer = ath12k_dp_rx_h_find_peer(ar->ab, msdu, &rx_info); if (peer && peer->sta) { pubsta = peer->sta; if (pubsta->valid_links) { diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index dcbd97975f642b..f7e176e9c01a66 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -1910,17 +1910,11 @@ static struct sk_buff *ath12k_dp_rx_get_msdu_last_buf(struct sk_buff_head *msdu_ return NULL; } -static void ath12k_dp_rx_h_csum_offload(struct ath12k *ar, struct sk_buff *msdu) +static void ath12k_dp_rx_h_csum_offload(struct sk_buff *msdu, + struct ath12k_dp_rx_info *rx_info) { - struct ath12k_skb_rxcb *rxcb = ATH12K_SKB_RXCB(msdu); - struct ath12k_base *ab = ar->ab; - bool ip_csum_fail, l4_csum_fail; - - ip_csum_fail = ath12k_dp_rx_h_ip_cksum_fail(ab, rxcb->rx_desc); - l4_csum_fail = ath12k_dp_rx_h_l4_cksum_fail(ab, rxcb->rx_desc); - - msdu->ip_summed = (ip_csum_fail || l4_csum_fail) ? - CHECKSUM_NONE : CHECKSUM_UNNECESSARY; + msdu->ip_summed = (rx_info->ip_csum_fail || rx_info->l4_csum_fail) ? + CHECKSUM_NONE : CHECKSUM_UNNECESSARY; } static int ath12k_dp_rx_crypto_mic_len(struct ath12k *ar, @@ -2222,10 +2216,10 @@ static void ath12k_dp_rx_h_undecap(struct ath12k *ar, struct sk_buff *msdu, } struct ath12k_peer * -ath12k_dp_rx_h_find_peer(struct ath12k_base *ab, struct sk_buff *msdu) +ath12k_dp_rx_h_find_peer(struct ath12k_base *ab, struct sk_buff *msdu, + struct ath12k_dp_rx_info *rx_info) { struct ath12k_skb_rxcb *rxcb = ATH12K_SKB_RXCB(msdu); - struct hal_rx_desc *rx_desc = rxcb->rx_desc; struct ath12k_peer *peer = NULL; lockdep_assert_held(&ab->base_lock); @@ -2236,39 +2230,35 @@ ath12k_dp_rx_h_find_peer(struct ath12k_base *ab, struct sk_buff *msdu) if (peer) return peer; - if (!rx_desc || !(ath12k_dp_rxdesc_mac_addr2_valid(ab, rx_desc))) - return NULL; + if (rx_info->addr2_present) + peer = ath12k_peer_find_by_addr(ab, rx_info->addr2); - peer = ath12k_peer_find_by_addr(ab, - ath12k_dp_rxdesc_get_mpdu_start_addr2(ab, - rx_desc)); return peer; } static void ath12k_dp_rx_h_mpdu(struct ath12k *ar, struct sk_buff *msdu, struct hal_rx_desc *rx_desc, - struct ieee80211_rx_status *rx_status) + struct ath12k_dp_rx_info *rx_info) { - bool fill_crypto_hdr; struct ath12k_base *ab = ar->ab; struct ath12k_skb_rxcb *rxcb; enum hal_encrypt_type enctype; bool is_decrypted = false; struct ieee80211_hdr *hdr; struct ath12k_peer *peer; + struct ieee80211_rx_status *rx_status = rx_info->rx_status; u32 err_bitmap; /* PN for multicast packets will be checked in mac80211 */ rxcb = ATH12K_SKB_RXCB(msdu); - fill_crypto_hdr = ath12k_dp_rx_h_is_da_mcbc(ar->ab, rx_desc); - rxcb->is_mcbc = fill_crypto_hdr; + rxcb->is_mcbc = rx_info->is_mcbc; if (rxcb->is_mcbc) - rxcb->peer_id = ath12k_dp_rx_h_peer_id(ar->ab, rx_desc); + rxcb->peer_id = rx_info->peer_id; spin_lock_bh(&ar->ab->base_lock); - peer = ath12k_dp_rx_h_find_peer(ar->ab, msdu); + peer = ath12k_dp_rx_h_find_peer(ar->ab, msdu, rx_info); if (peer) { if (rxcb->is_mcbc) enctype = peer->sec_type_grp; @@ -2298,7 +2288,7 @@ static void ath12k_dp_rx_h_mpdu(struct ath12k *ar, if (is_decrypted) { rx_status->flag |= RX_FLAG_DECRYPTED | RX_FLAG_MMIC_STRIPPED; - if (fill_crypto_hdr) + if (rx_info->is_mcbc) rx_status->flag |= RX_FLAG_MIC_STRIPPED | RX_FLAG_ICV_STRIPPED; else @@ -2306,37 +2296,28 @@ static void ath12k_dp_rx_h_mpdu(struct ath12k *ar, RX_FLAG_PN_VALIDATED; } - ath12k_dp_rx_h_csum_offload(ar, msdu); + ath12k_dp_rx_h_csum_offload(msdu, rx_info); ath12k_dp_rx_h_undecap(ar, msdu, rx_desc, enctype, rx_status, is_decrypted); - if (!is_decrypted || fill_crypto_hdr) + if (!is_decrypted || rx_info->is_mcbc) return; - if (ath12k_dp_rx_h_decap_type(ar->ab, rx_desc) != - DP_RX_DECAP_TYPE_ETHERNET2_DIX) { + if (rx_info->decap_type != DP_RX_DECAP_TYPE_ETHERNET2_DIX) { hdr = (void *)msdu->data; hdr->frame_control &= ~__cpu_to_le16(IEEE80211_FCTL_PROTECTED); } } -static void ath12k_dp_rx_h_rate(struct ath12k *ar, struct hal_rx_desc *rx_desc, - struct ieee80211_rx_status *rx_status) +static void ath12k_dp_rx_h_rate(struct ath12k *ar, struct ath12k_dp_rx_info *rx_info) { - struct ath12k_base *ab = ar->ab; struct ieee80211_supported_band *sband; - enum rx_msdu_start_pkt_type pkt_type; - u8 bw; - u8 rate_mcs, nss; - u8 sgi; + struct ieee80211_rx_status *rx_status = rx_info->rx_status; + enum rx_msdu_start_pkt_type pkt_type = rx_info->pkt_type; + u8 bw = rx_info->bw, sgi = rx_info->sgi; + u8 rate_mcs = rx_info->rate_mcs, nss = rx_info->nss; bool is_cck; - pkt_type = ath12k_dp_rx_h_pkt_type(ab, rx_desc); - bw = ath12k_dp_rx_h_rx_bw(ab, rx_desc); - rate_mcs = ath12k_dp_rx_h_rate_mcs(ab, rx_desc); - nss = ath12k_dp_rx_h_nss(ab, rx_desc); - sgi = ath12k_dp_rx_h_sgi(ab, rx_desc); - switch (pkt_type) { case RX_MSDU_START_PKT_TYPE_11A: case RX_MSDU_START_PKT_TYPE_11B: @@ -2405,9 +2386,8 @@ static void ath12k_dp_rx_h_rate(struct ath12k *ar, struct hal_rx_desc *rx_desc, } } -static void ath12k_dp_rx_h_fetch_info(struct ath12k_base *ab, - struct hal_rx_desc *rx_desc, - struct ath12k_dp_rx_info *rx_info) +void ath12k_dp_rx_h_fetch_info(struct ath12k_base *ab, struct hal_rx_desc *rx_desc, + struct ath12k_dp_rx_info *rx_info) { rx_info->ip_csum_fail = ath12k_dp_rx_h_ip_cksum_fail(ab, rx_desc); rx_info->l4_csum_fail = ath12k_dp_rx_h_l4_cksum_fail(ab, rx_desc); @@ -2427,12 +2407,14 @@ static void ath12k_dp_rx_h_fetch_info(struct ath12k_base *ab, ath12k_dp_rxdesc_get_mpdu_start_addr2(ab, rx_desc)); rx_info->addr2_present = true; } + + ath12k_dbg_dump(ab, ATH12K_DBG_DATA, NULL, "rx_desc: ", + rx_desc, sizeof(*rx_desc)); } -void ath12k_dp_rx_h_ppdu(struct ath12k *ar, struct hal_rx_desc *rx_desc, - struct ieee80211_rx_status *rx_status) +void ath12k_dp_rx_h_ppdu(struct ath12k *ar, struct ath12k_dp_rx_info *rx_info) { - struct ath12k_base *ab = ar->ab; + struct ieee80211_rx_status *rx_status = rx_info->rx_status; u8 channel_num; u32 center_freq, meta_data; struct ieee80211_channel *channel; @@ -2446,7 +2428,7 @@ void ath12k_dp_rx_h_ppdu(struct ath12k *ar, struct hal_rx_desc *rx_desc, rx_status->flag |= RX_FLAG_NO_SIGNAL_VAL; - meta_data = ath12k_dp_rx_h_freq(ab, rx_desc); + meta_data = rx_info->phy_meta_data; channel_num = meta_data; center_freq = meta_data >> 16; @@ -2467,20 +2449,18 @@ void ath12k_dp_rx_h_ppdu(struct ath12k *ar, struct hal_rx_desc *rx_desc, ieee80211_frequency_to_channel(channel->center_freq); } spin_unlock_bh(&ar->data_lock); - ath12k_dbg_dump(ar->ab, ATH12K_DBG_DATA, NULL, "rx_desc: ", - rx_desc, sizeof(*rx_desc)); } if (rx_status->band != NL80211_BAND_6GHZ) rx_status->freq = ieee80211_channel_to_frequency(channel_num, rx_status->band); - ath12k_dp_rx_h_rate(ar, rx_desc, rx_status); + ath12k_dp_rx_h_rate(ar, rx_info); } static void ath12k_dp_rx_deliver_msdu(struct ath12k *ar, struct napi_struct *napi, struct sk_buff *msdu, - struct ieee80211_rx_status *status) + struct ath12k_dp_rx_info *rx_info) { struct ath12k_base *ab = ar->ab; static const struct ieee80211_radiotap_he known = { @@ -2493,6 +2473,7 @@ static void ath12k_dp_rx_deliver_msdu(struct ath12k *ar, struct napi_struct *nap struct ieee80211_sta *pubsta; struct ath12k_peer *peer; struct ath12k_skb_rxcb *rxcb = ATH12K_SKB_RXCB(msdu); + struct ieee80211_rx_status *status = rx_info->rx_status; u8 decap = DP_RX_DECAP_TYPE_RAW; bool is_mcbc = rxcb->is_mcbc; bool is_eapol = rxcb->is_eapol; @@ -2505,10 +2486,10 @@ static void ath12k_dp_rx_deliver_msdu(struct ath12k *ar, struct napi_struct *nap } if (!(status->flag & RX_FLAG_ONLY_MONITOR)) - decap = ath12k_dp_rx_h_decap_type(ab, rxcb->rx_desc); + decap = rx_info->decap_type; spin_lock_bh(&ab->base_lock); - peer = ath12k_dp_rx_h_find_peer(ab, msdu); + peer = ath12k_dp_rx_h_find_peer(ab, msdu, rx_info); pubsta = peer ? peer->sta : NULL; @@ -2652,8 +2633,8 @@ static int ath12k_dp_rx_process_msdu(struct ath12k *ar, } ath12k_dp_rx_h_fetch_info(ab, rx_desc, rx_info); - ath12k_dp_rx_h_ppdu(ar, rx_desc, rx_info->rx_status); - ath12k_dp_rx_h_mpdu(ar, msdu, rx_desc, rx_info->rx_status); + ath12k_dp_rx_h_ppdu(ar, rx_info); + ath12k_dp_rx_h_mpdu(ar, msdu, rx_desc, rx_info); rx_info->rx_status->flag |= RX_FLAG_SKIP_MONITOR | RX_FLAG_DUP_VALIDATED; @@ -2713,7 +2694,7 @@ static void ath12k_dp_rx_process_received_packets(struct ath12k_base *ab, continue; } - ath12k_dp_rx_deliver_msdu(ar, napi, msdu, &rx_status); + ath12k_dp_rx_deliver_msdu(ar, napi, msdu, &rx_info); } rcu_read_unlock(); @@ -3055,7 +3036,7 @@ static int ath12k_dp_rx_h_verify_tkip_mic(struct ath12k *ar, struct ath12k_peer if (unlikely(!ath12k_dp_rx_check_nwifi_hdr_len_valid(ab, rx_desc, msdu))) return -EINVAL; - ath12k_dp_rx_h_ppdu(ar, rx_desc, rxs); + ath12k_dp_rx_h_ppdu(ar, &rx_info); ath12k_dp_rx_h_undecap(ar, msdu, rx_desc, HAL_ENCRYPT_TYPE_TKIP_MIC, rxs, true); ieee80211_rx(ath12k_ar_to_hw(ar), msdu); @@ -3801,10 +3782,10 @@ static int ath12k_dp_rx_h_null_q_desc(struct ath12k *ar, struct sk_buff *msdu, return -EINVAL; ath12k_dp_rx_h_fetch_info(ab, desc, rx_info); - ath12k_dp_rx_h_ppdu(ar, desc, rx_info->rx_status); - ath12k_dp_rx_h_mpdu(ar, msdu, desc, rx_info->rx_status); + ath12k_dp_rx_h_ppdu(ar, rx_info); + ath12k_dp_rx_h_mpdu(ar, msdu, desc, rx_info); - rxcb->tid = ath12k_dp_rx_h_tid(ab, desc); + rxcb->tid = rx_info->tid; /* Please note that caller will having the access to msdu and completing * rx with mac80211. Need not worry about cleaning up amsdu_list. @@ -3874,7 +3855,7 @@ static bool ath12k_dp_rx_h_tkip_mic_err(struct ath12k *ar, struct sk_buff *msdu, if (unlikely(!ath12k_dp_rx_check_nwifi_hdr_len_valid(ab, desc, msdu))) return true; - ath12k_dp_rx_h_ppdu(ar, desc, rx_info->rx_status); + ath12k_dp_rx_h_ppdu(ar, rx_info); rx_info->rx_status->flag |= (RX_FLAG_MMIC_STRIPPED | RX_FLAG_MMIC_ERROR | RX_FLAG_DECRYPTED); @@ -3946,7 +3927,7 @@ static void ath12k_dp_rx_wbm_err(struct ath12k *ar, return; } - ath12k_dp_rx_deliver_msdu(ar, napi, msdu, &rxs); + ath12k_dp_rx_deliver_msdu(ar, napi, msdu, &rx_info); } int ath12k_dp_rx_process_wbm_err(struct ath12k_base *ab, diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.h b/drivers/net/wireless/ath/ath12k/dp_rx.h index 0e7cec42a8d13b..eb4d2b60a035e4 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.h +++ b/drivers/net/wireless/ath/ath12k/dp_rx.h @@ -149,13 +149,13 @@ int ath12k_dp_rx_peer_frag_setup(struct ath12k *ar, const u8 *peer_mac, int vdev u8 ath12k_dp_rx_h_l3pad(struct ath12k_base *ab, struct hal_rx_desc *desc); struct ath12k_peer * -ath12k_dp_rx_h_find_peer(struct ath12k_base *ab, struct sk_buff *msdu); +ath12k_dp_rx_h_find_peer(struct ath12k_base *ab, struct sk_buff *msdu, + struct ath12k_dp_rx_info *rx_info); u8 ath12k_dp_rx_h_decap_type(struct ath12k_base *ab, struct hal_rx_desc *desc); u32 ath12k_dp_rx_h_mpdu_err(struct ath12k_base *ab, struct hal_rx_desc *desc); -void ath12k_dp_rx_h_ppdu(struct ath12k *ar, struct hal_rx_desc *rx_desc, - struct ieee80211_rx_status *rx_status); +void ath12k_dp_rx_h_ppdu(struct ath12k *ar, struct ath12k_dp_rx_info *rx_info); int ath12k_dp_rxdma_ring_sel_config_qcn9274(struct ath12k_base *ab); int ath12k_dp_rxdma_ring_sel_config_wcn7850(struct ath12k_base *ab); @@ -163,4 +163,6 @@ int ath12k_dp_htt_tlv_iter(struct ath12k_base *ab, const void *ptr, size_t len, int (*iter)(struct ath12k_base *ar, u16 tag, u16 len, const void *ptr, void *data), void *data); +void ath12k_dp_rx_h_fetch_info(struct ath12k_base *ab, struct hal_rx_desc *rx_desc, + struct ath12k_dp_rx_info *rx_info); #endif /* ATH12K_DP_RX_H */ From 98bf5da4bc06adce2fb86dc9aeadae3272a82cee Mon Sep 17 00:00:00 2001 From: Rameshkumar Sundaram Date: Wed, 16 Apr 2025 01:28:11 +0530 Subject: [PATCH 2270/2924] wifi: ath12k: fix wrong handling of CCMP256 and GCMP ciphers [ Upstream commit f5d6b15d9503263d9425dcde9cc2fd401a32b0f2 ] Currently for CCMP256, GCMP128 and GCMP256 ciphers, in ath12k_install_key() IEEE80211_KEY_FLAG_GENERATE_IV_MGMT is not set and in ath12k_mac_mgmt_tx_wmi() a length of IEEE80211_CCMP_MIC_LEN is reserved for all ciphers. This results in unexpected drop of protected management frames in case either of above 3 ciphers is used. The reason is, without IEEE80211_KEY_FLAG_GENERATE_IV_MGMT set, mac80211 will not generate CCMP/GCMP headers in TX frame for ath12k. Also MIC length reserved is wrong and such frames are dropped by hardware. Fix this by setting IEEE80211_KEY_FLAG_GENERATE_IV_MGMT flag for above ciphers and by reserving proper MIC length for those ciphers. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Signed-off-by: Rameshkumar Sundaram Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250415195812.2633923-2-rameshkumar.sundaram@oss.qualcomm.com Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/dp_rx.c | 3 +-- drivers/net/wireless/ath/ath12k/dp_rx.h | 3 +++ drivers/net/wireless/ath/ath12k/mac.c | 16 ++++++++++------ 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index f7e176e9c01a66..7fadd366ec13de 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -1917,8 +1917,7 @@ static void ath12k_dp_rx_h_csum_offload(struct sk_buff *msdu, CHECKSUM_NONE : CHECKSUM_UNNECESSARY; } -static int ath12k_dp_rx_crypto_mic_len(struct ath12k *ar, - enum hal_encrypt_type enctype) +int ath12k_dp_rx_crypto_mic_len(struct ath12k *ar, enum hal_encrypt_type enctype) { switch (enctype) { case HAL_ENCRYPT_TYPE_OPEN: diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.h b/drivers/net/wireless/ath/ath12k/dp_rx.h index eb4d2b60a035e4..a4e179c6f2664f 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.h +++ b/drivers/net/wireless/ath/ath12k/dp_rx.h @@ -165,4 +165,7 @@ int ath12k_dp_htt_tlv_iter(struct ath12k_base *ab, const void *ptr, size_t len, void *data); void ath12k_dp_rx_h_fetch_info(struct ath12k_base *ab, struct hal_rx_desc *rx_desc, struct ath12k_dp_rx_info *rx_info); + +int ath12k_dp_rx_crypto_mic_len(struct ath12k *ar, enum hal_encrypt_type enctype); + #endif /* ATH12K_DP_RX_H */ diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 4134f9512d0386..65eb50bf03b24b 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -4623,8 +4623,8 @@ static int ath12k_install_key(struct ath12k_link_vif *arvif, switch (key->cipher) { case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: arg.key_cipher = WMI_CIPHER_AES_CCM; - /* TODO: Re-check if flag is valid */ key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV_MGMT; break; case WLAN_CIPHER_SUITE_TKIP: @@ -4632,12 +4632,10 @@ static int ath12k_install_key(struct ath12k_link_vif *arvif, arg.key_txmic_len = 8; arg.key_rxmic_len = 8; break; - case WLAN_CIPHER_SUITE_CCMP_256: - arg.key_cipher = WMI_CIPHER_AES_CCM; - break; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: arg.key_cipher = WMI_CIPHER_AES_GCM; + key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV_MGMT; break; default: ath12k_warn(ar->ab, "cipher %d is not supported\n", key->cipher); @@ -7041,6 +7039,8 @@ static int ath12k_mac_mgmt_tx_wmi(struct ath12k *ar, struct ath12k_link_vif *arv struct ath12k_base *ab = ar->ab; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; struct ieee80211_tx_info *info; + enum hal_encrypt_type enctype; + unsigned int mic_len; dma_addr_t paddr; int buf_id; int ret; @@ -7056,12 +7056,16 @@ static int ath12k_mac_mgmt_tx_wmi(struct ath12k *ar, struct ath12k_link_vif *arv return -ENOSPC; info = IEEE80211_SKB_CB(skb); - if (!(info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP)) { + if ((ATH12K_SKB_CB(skb)->flags & ATH12K_SKB_CIPHER_SET) && + !(info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP)) { if ((ieee80211_is_action(hdr->frame_control) || ieee80211_is_deauth(hdr->frame_control) || ieee80211_is_disassoc(hdr->frame_control)) && ieee80211_has_protected(hdr->frame_control)) { - skb_put(skb, IEEE80211_CCMP_MIC_LEN); + enctype = + ath12k_dp_tx_get_encrypt_type(ATH12K_SKB_CB(skb)->cipher); + mic_len = ath12k_dp_rx_crypto_mic_len(ar, enctype); + skb_put(skb, mic_len); } } From 2563069baf243cadc76dc64d9085606742c4b282 Mon Sep 17 00:00:00 2001 From: Maharaja Kennadyrajan Date: Wed, 16 Apr 2025 07:34:14 +0530 Subject: [PATCH 2271/2924] wifi: ath12k: Prevent sending WMI commands to firmware during firmware crash [ Upstream commit e9e094a9734ea3bd4d4d117c915ccf129ac61ba1 ] Currently, we encounter the following kernel call trace when a firmware crash occurs. This happens because the host sends WMI commands to the firmware while it is in recovery, causing the commands to fail and resulting in the kernel call trace. Set the ATH12K_FLAG_CRASH_FLUSH and ATH12K_FLAG_RECOVERY flags when the host driver receives the firmware crash notification from MHI. This prevents sending WMI commands to the firmware during recovery. Call Trace: dump_stack_lvl+0x75/0xc0 register_lock_class+0x6be/0x7a0 ? __lock_acquire+0x644/0x19a0 __lock_acquire+0x95/0x19a0 lock_acquire+0x265/0x310 ? ath12k_ce_send+0xa2/0x210 [ath12k] ? find_held_lock+0x34/0xa0 ? ath12k_ce_send+0x56/0x210 [ath12k] _raw_spin_lock_bh+0x33/0x70 ? ath12k_ce_send+0xa2/0x210 [ath12k] ath12k_ce_send+0xa2/0x210 [ath12k] ath12k_htc_send+0x178/0x390 [ath12k] ath12k_wmi_cmd_send_nowait+0x76/0xa0 [ath12k] ath12k_wmi_cmd_send+0x62/0x190 [ath12k] ath12k_wmi_pdev_bss_chan_info_request+0x62/0xc0 [ath1 ath12k_mac_op_get_survey+0x2be/0x310 [ath12k] ieee80211_dump_survey+0x99/0x240 [mac80211] nl80211_dump_survey+0xe7/0x470 [cfg80211] ? kmalloc_reserve+0x59/0xf0 genl_dumpit+0x24/0x70 netlink_dump+0x177/0x360 __netlink_dump_start+0x206/0x280 genl_family_rcv_msg_dumpit.isra.22+0x8a/0xe0 ? genl_family_rcv_msg_attrs_parse.isra.23+0xe0/0xe0 ? genl_op_lock.part.12+0x10/0x10 ? genl_dumpit+0x70/0x70 genl_rcv_msg+0x1d0/0x290 ? nl80211_del_station+0x330/0x330 [cfg80211] ? genl_get_cmd_both+0x50/0x50 netlink_rcv_skb+0x4f/0x100 genl_rcv+0x1f/0x30 netlink_unicast+0x1b6/0x260 netlink_sendmsg+0x31a/0x450 __sock_sendmsg+0xa8/0xb0 ____sys_sendmsg+0x1e4/0x260 ___sys_sendmsg+0x89/0xe0 ? local_clock_noinstr+0xb/0xc0 ? rcu_is_watching+0xd/0x40 ? kfree+0x1de/0x370 ? __sys_sendmsg+0x7a/0xc0 Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Fixes: a9b46dd2e483 ("wifi: ath12k: Add firmware coredump collection support") Signed-off-by: Maharaja Kennadyrajan Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250416020414.2161545-1-maharaja.kennadyrajan@oss.qualcomm.com Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/mhi.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mhi.c b/drivers/net/wireless/ath/ath12k/mhi.c index 2f6d14382ed70c..4d40c4ec4b8110 100644 --- a/drivers/net/wireless/ath/ath12k/mhi.c +++ b/drivers/net/wireless/ath/ath12k/mhi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2020-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -285,8 +285,11 @@ static void ath12k_mhi_op_status_cb(struct mhi_controller *mhi_cntrl, break; } - if (!(test_bit(ATH12K_FLAG_UNREGISTERING, &ab->dev_flags))) + if (!(test_bit(ATH12K_FLAG_UNREGISTERING, &ab->dev_flags))) { + set_bit(ATH12K_FLAG_CRASH_FLUSH, &ab->dev_flags); + set_bit(ATH12K_FLAG_RECOVERY, &ab->dev_flags); queue_work(ab->workqueue_aux, &ab->reset_work); + } break; default: break; From 6bfe7ae9bbd9734751b853e2d2e1c13e8b46fd2d Mon Sep 17 00:00:00 2001 From: Maharaja Kennadyrajan Date: Wed, 16 Apr 2025 07:47:24 +0530 Subject: [PATCH 2272/2924] wifi: ath12k: fix node corruption in ar->arvifs list [ Upstream commit 823435bd23108d6f8be89ea2d025c0e2e3769c51 ] In current WLAN recovery code flow, ath12k_core_halt() only reinitializes the "arvifs" list head. This will cause the list node immediately following the list head to become an invalid list node. Because the prev of that node still points to the list head "arvifs", but the next of the list head "arvifs" no longer points to that list node. When a WLAN recovery occurs during the execution of a vif removal, and it happens before the spin_lock_bh(&ar->data_lock) in ath12k_mac_vdev_delete(), list_del() will detect the previously mentioned situation, thereby triggering a kernel panic. The fix is to remove and reinitialize all vif list nodes from the list head "arvifs" during WLAN halt. The reinitialization is to make the list nodes valid, ensuring that the list_del() in ath12k_mac_vdev_delete() can execute normally. Call trace: __list_del_entry_valid_or_report+0xd4/0x100 (P) ath12k_mac_remove_link_interface.isra.0+0xf8/0x2e4 [ath12k] ath12k_scan_vdev_clean_work+0x40/0x164 [ath12k] cfg80211_wiphy_work+0xfc/0x100 process_one_work+0x164/0x2d0 worker_thread+0x254/0x380 kthread+0xfc/0x100 ret_from_fork+0x10/0x20 The change is mostly copied from the ath11k patch: https://lore.kernel.org/all/20250320053145.3445187-1-quic_stonez@quicinc.com/ Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Signed-off-by: Maharaja Kennadyrajan Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250416021724.2162519-1-maharaja.kennadyrajan@oss.qualcomm.com Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index 34eaa6b5bf772c..6b0c719be5434b 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -1251,6 +1251,7 @@ static void ath12k_rfkill_work(struct work_struct *work) void ath12k_core_halt(struct ath12k *ar) { + struct list_head *pos, *n; struct ath12k_base *ab = ar->ab; lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); @@ -1266,7 +1267,12 @@ void ath12k_core_halt(struct ath12k *ar) rcu_assign_pointer(ab->pdevs_active[ar->pdev_idx], NULL); synchronize_rcu(); - INIT_LIST_HEAD(&ar->arvifs); + + spin_lock_bh(&ar->data_lock); + list_for_each_safe(pos, n, &ar->arvifs) + list_del_init(pos); + spin_unlock_bh(&ar->data_lock); + idr_init(&ar->txmgmt_idr); } From 294bf7985b2b714b990b69b3804b389aae96dd7b Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Sat, 19 Apr 2025 10:07:41 +0200 Subject: [PATCH 2273/2924] RDMA/rxe: Fix "trying to register non-static key in rxe_qp_do_cleanup" bug [ Upstream commit 1c7eec4d5f3b39cdea2153abaebf1b7229a47072 ] Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:120 assign_lock_key kernel/locking/lockdep.c:986 [inline] register_lock_class+0x4a3/0x4c0 kernel/locking/lockdep.c:1300 __lock_acquire+0x99/0x1ba0 kernel/locking/lockdep.c:5110 lock_acquire kernel/locking/lockdep.c:5866 [inline] lock_acquire+0x179/0x350 kernel/locking/lockdep.c:5823 __timer_delete_sync+0x152/0x1b0 kernel/time/timer.c:1644 rxe_qp_do_cleanup+0x5c3/0x7e0 drivers/infiniband/sw/rxe/rxe_qp.c:815 execute_in_process_context+0x3a/0x160 kernel/workqueue.c:4596 __rxe_cleanup+0x267/0x3c0 drivers/infiniband/sw/rxe/rxe_pool.c:232 rxe_create_qp+0x3f7/0x5f0 drivers/infiniband/sw/rxe/rxe_verbs.c:604 create_qp+0x62d/0xa80 drivers/infiniband/core/verbs.c:1250 ib_create_qp_kernel+0x9f/0x310 drivers/infiniband/core/verbs.c:1361 ib_create_qp include/rdma/ib_verbs.h:3803 [inline] rdma_create_qp+0x10c/0x340 drivers/infiniband/core/cma.c:1144 rds_ib_setup_qp+0xc86/0x19a0 net/rds/ib_cm.c:600 rds_ib_cm_initiate_connect+0x1e8/0x3d0 net/rds/ib_cm.c:944 rds_rdma_cm_event_handler_cmn+0x61f/0x8c0 net/rds/rdma_transport.c:109 cma_cm_event_handler+0x94/0x300 drivers/infiniband/core/cma.c:2184 cma_work_handler+0x15b/0x230 drivers/infiniband/core/cma.c:3042 process_one_work+0x9cc/0x1b70 kernel/workqueue.c:3238 process_scheduled_works kernel/workqueue.c:3319 [inline] worker_thread+0x6c8/0xf10 kernel/workqueue.c:3400 kthread+0x3c2/0x780 kernel/kthread.c:464 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:153 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245 The root cause is as below: In the function rxe_create_qp, the function rxe_qp_from_init is called to create qp, if this function rxe_qp_from_init fails, rxe_cleanup will be called to handle all the allocated resources, including the timers: retrans_timer and rnr_nak_timer. The function rxe_qp_from_init calls the function rxe_qp_init_req to initialize the timers: retrans_timer and rnr_nak_timer. But these timers are initialized in the end of rxe_qp_init_req. If some errors occur before the initialization of these timers, this problem will occur. The solution is to check whether these timers are initialized or not. If these timers are not initialized, ignore these timers. Fixes: 8700e3e7c485 ("Soft RoCE driver") Reported-by: syzbot+4edb496c3cad6e953a31@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=4edb496c3cad6e953a31 Signed-off-by: Zhu Yanjun Link: https://patch.msgid.link/20250419080741.1515231-1-yanjun.zhu@linux.dev Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rxe/rxe_qp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 7975fb0e2782f0..f2af3e0aef35b5 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -811,7 +811,12 @@ static void rxe_qp_do_cleanup(struct work_struct *work) spin_unlock_irqrestore(&qp->state_lock, flags); qp->qp_timeout_jiffies = 0; - if (qp_type(qp) == IB_QPT_RC) { + /* In the function timer_setup, .function is initialized. If .function + * is NULL, it indicates the function timer_setup is not called, the + * timer is not initialized. Or else, the timer is initialized. + */ + if (qp_type(qp) == IB_QPT_RC && qp->retrans_timer.function && + qp->rnr_nak_timer.function) { timer_delete_sync(&qp->retrans_timer); timer_delete_sync(&qp->rnr_nak_timer); } From ffb6487ed83fb0e5795ef46c127b1ab1d072fbae Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Mon, 21 Apr 2025 21:27:49 +0800 Subject: [PATCH 2274/2924] RDMA/hns: Include hnae3.h in hns_roce_hw_v2.h [ Upstream commit 2b11d33de23262cb20d1dcb24b586dbb8f54d463 ] hns_roce_hw_v2.h has a direct dependency on hnae3.h due to the inline function hns_roce_write64(), but it doesn't include this header currently. This leads to that files including hns_roce_hw_v2.h must also include hnae3.h to avoid compilation errors, even if they themselves don't really rely on hnae3.h. This doesn't make sense, hns_roce_hw_v2.h should include hnae3.h directly. Fixes: d3743fa94ccd ("RDMA/hns: Fix the chip hanging caused by sending doorbell during reset") Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20250421132750.1363348-6-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hns/hns_roce_ah.c | 1 - drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 1 - drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 1 + drivers/infiniband/hw/hns/hns_roce_main.c | 1 - drivers/infiniband/hw/hns/hns_roce_restrack.c | 1 - 5 files changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 4fc5b9d5fea87e..307c35888b3003 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -33,7 +33,6 @@ #include #include #include -#include "hnae3.h" #include "hns_roce_device.h" #include "hns_roce_hw_v2.h" diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 160e8927d364e1..59352d1b62099f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -43,7 +43,6 @@ #include #include -#include "hnae3.h" #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_cmd.h" diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 91a5665465ffba..bc7466830eaf9d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -34,6 +34,7 @@ #define _HNS_ROCE_HW_V2_H #include +#include "hnae3.h" #define HNS_ROCE_V2_MAX_RC_INL_INN_SZ 32 #define HNS_ROCE_V2_MTT_ENTRY_SZ 64 diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 8d0b63d4b50a6c..e7a497cc125cc3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -37,7 +37,6 @@ #include #include #include -#include "hnae3.h" #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_hem.h" diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index 356d9881694973..f637b73b946e44 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -4,7 +4,6 @@ #include #include #include -#include "hnae3.h" #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_hw_v2.h" From 124df6eef122a580052a0bf3bbc362886bd283e1 Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Mon, 14 Apr 2025 16:08:44 +0800 Subject: [PATCH 2275/2924] scsi: hisi_sas: Call I_T_nexus after soft reset for SATA disk [ Upstream commit e4d953ca557e02edd3aed7390043e1b8ad1c9723 ] In commit 21c7e972475e ("scsi: hisi_sas: Disable SATA disk phy for severe I_T nexus reset failure"), if the softreset fails upon certain conditions, the PHY connected to the disk is disabled directly. Manual recovery is required, which is inconvenient for users in actual use. In addition, SATA disks do not support simultaneous connection of multiple hosts. Therefore, when multiple controllers are connected to a SATA disk at the same time, the controller which is connected later failed to issue an ATA softreset to the SATA disk. As a result, the PHY associated with the disk is disabled and cannot be automatically recovered. Now that, we will not focus on the execution result of softreset. No matter whether the execution is successful or not, we will directly carry out I_T_nexus_reset. Fixes: 21c7e972475e ("scsi: hisi_sas: Disable SATA disk phy for severe I_T nexus reset failure") Signed-off-by: Yihang Li Link: https://lore.kernel.org/r/20250414080845.1220997-4-liyihang9@huawei.com Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/hisi_sas/hisi_sas_main.c | 29 +++++---------------------- 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 944cf2fb05617d..d3981b6779316b 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1885,33 +1885,14 @@ static int hisi_sas_I_T_nexus_reset(struct domain_device *device) } hisi_sas_dereg_device(hisi_hba, device); - rc = hisi_sas_debug_I_T_nexus_reset(device); - if (rc == TMF_RESP_FUNC_COMPLETE && dev_is_sata(device)) { - struct sas_phy *local_phy; - + if (dev_is_sata(device)) { rc = hisi_sas_softreset_ata_disk(device); - switch (rc) { - case -ECOMM: - rc = -ENODEV; - break; - case TMF_RESP_FUNC_FAILED: - case -EMSGSIZE: - case -EIO: - local_phy = sas_get_local_phy(device); - rc = sas_phy_enable(local_phy, 0); - if (!rc) { - local_phy->enabled = 0; - dev_err(dev, "Disabled local phy of ATA disk %016llx due to softreset fail (%d)\n", - SAS_ADDR(device->sas_addr), rc); - rc = -ENODEV; - } - sas_put_local_phy(local_phy); - break; - default: - break; - } + if (rc == TMF_RESP_FUNC_FAILED) + dev_err(dev, "ata disk %016llx reset (%d)\n", + SAS_ADDR(device->sas_addr), rc); } + rc = hisi_sas_debug_I_T_nexus_reset(device); if ((rc == TMF_RESP_FUNC_COMPLETE) || (rc == -ENODEV)) hisi_sas_release_task(hisi_hba, device); From 220735551a40109222f2f900dc90290f8aca0143 Mon Sep 17 00:00:00 2001 From: Feng Yang Date: Thu, 17 Apr 2025 09:48:46 +0800 Subject: [PATCH 2276/2924] libbpf: Fix event name too long error [ Upstream commit 4dde20b1aa85d69c4281eaac9a7cfa7d2b62ecf0 ] When the binary path is excessively long, the generated probe_name in libbpf exceeds the kernel's MAX_EVENT_NAME_LEN limit (64 bytes). This causes legacy uprobe event attachment to fail with error code -22. The fix reorders the fields to place the unique ID before the name. This ensures that even if truncation occurs via snprintf, the unique ID remains intact, preserving event name uniqueness. Additionally, explicit checks with MAX_EVENT_NAME_LEN are added to enforce length constraints. Before Fix: ./test_progs -t attach_probe/kprobe-long_name ...... libbpf: failed to add legacy kprobe event for 'bpf_testmod_looooooooooooooooooooooooooooooong_name+0x0': -EINVAL libbpf: prog 'handle_kprobe': failed to create kprobe 'bpf_testmod_looooooooooooooooooooooooooooooong_name+0x0' perf event: -EINVAL test_attach_kprobe_long_event_name:FAIL:attach_kprobe_long_event_name unexpected error: -22 test_attach_probe:PASS:uprobe_ref_ctr_cleanup 0 nsec #13/11 attach_probe/kprobe-long_name:FAIL #13 attach_probe:FAIL ./test_progs -t attach_probe/uprobe-long_name ...... libbpf: failed to add legacy uprobe event for /root/linux-bpf/bpf-next/tools/testing/selftests/bpf/test_progs:0x13efd9: -EINVAL libbpf: prog 'handle_uprobe': failed to create uprobe '/root/linux-bpf/bpf-next/tools/testing/selftests/bpf/test_progs:0x13efd9' perf event: -EINVAL test_attach_uprobe_long_event_name:FAIL:attach_uprobe_long_event_name unexpected error: -22 #13/10 attach_probe/uprobe-long_name:FAIL #13 attach_probe:FAIL After Fix: ./test_progs -t attach_probe/uprobe-long_name #13/10 attach_probe/uprobe-long_name:OK #13 attach_probe:OK Summary: 1/1 PASSED, 0 SKIPPED, 0 FAILED ./test_progs -t attach_probe/kprobe-long_name #13/11 attach_probe/kprobe-long_name:OK #13 attach_probe:OK Summary: 1/1 PASSED, 0 SKIPPED, 0 FAILED Fixes: 46ed5fc33db9 ("libbpf: Refactor and simplify legacy kprobe code") Fixes: cc10623c6810 ("libbpf: Add legacy uprobe attaching support") Signed-off-by: Hengqi Chen Signed-off-by: Feng Yang Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250417014848.59321-2-yangfeng59949@163.com Signed-off-by: Sasha Levin --- tools/lib/bpf/libbpf.c | 43 ++++++++++++++++-------------------------- 1 file changed, 16 insertions(+), 27 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index cfb952d7002f61..087bb8daa46b5f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -60,6 +60,8 @@ #define BPF_FS_MAGIC 0xcafe4a11 #endif +#define MAX_EVENT_NAME_LEN 64 + #define BPF_FS_DEFAULT_PATH "/sys/fs/bpf" #define BPF_INSN_SZ (sizeof(struct bpf_insn)) @@ -11112,16 +11114,16 @@ static const char *tracefs_available_filter_functions_addrs(void) : TRACEFS"/available_filter_functions_addrs"; } -static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz, - const char *kfunc_name, size_t offset) +static void gen_probe_legacy_event_name(char *buf, size_t buf_sz, + const char *name, size_t offset) { static int index = 0; int i; - snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset, - __sync_fetch_and_add(&index, 1)); + snprintf(buf, buf_sz, "libbpf_%u_%d_%s_0x%zx", getpid(), + __sync_fetch_and_add(&index, 1), name, offset); - /* sanitize binary_path in the probe name */ + /* sanitize name in the probe name */ for (i = 0; buf[i]; i++) { if (!isalnum(buf[i])) buf[i] = '_'; @@ -11246,9 +11248,9 @@ int probe_kern_syscall_wrapper(int token_fd) return pfd >= 0 ? 1 : 0; } else { /* legacy mode */ - char probe_name[128]; + char probe_name[MAX_EVENT_NAME_LEN]; - gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); + gen_probe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0) return 0; @@ -11304,10 +11306,10 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, func_name, offset, -1 /* pid */, 0 /* ref_ctr_off */); } else { - char probe_name[256]; + char probe_name[MAX_EVENT_NAME_LEN]; - gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), - func_name, offset); + gen_probe_legacy_event_name(probe_name, sizeof(probe_name), + func_name, offset); legacy_probe = strdup(probe_name); if (!legacy_probe) @@ -11851,20 +11853,6 @@ static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, stru return ret; } -static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz, - const char *binary_path, uint64_t offset) -{ - int i; - - snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset); - - /* sanitize binary_path in the probe name */ - for (i = 0; buf[i]; i++) { - if (!isalnum(buf[i])) - buf[i] = '_'; - } -} - static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe, const char *binary_path, size_t offset) { @@ -12288,13 +12276,14 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, func_offset, pid, ref_ctr_off); } else { - char probe_name[PATH_MAX + 64]; + char probe_name[MAX_EVENT_NAME_LEN]; if (ref_ctr_off) return libbpf_err_ptr(-EINVAL); - gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name), - binary_path, func_offset); + gen_probe_legacy_event_name(probe_name, sizeof(probe_name), + strrchr(binary_path, '/') ? : binary_path, + func_offset); legacy_probe = strdup(probe_name); if (!legacy_probe) From a02347dbd0874bff5ad33dfad34ae4ccfa4c76c3 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Wed, 23 Apr 2025 09:16:29 +0300 Subject: [PATCH 2277/2924] wifi: iwlwifi: re-add IWL_AMSDU_8K case [ Upstream commit bdd6d93d7a109ba1080336fe07d0b4eb815efaf9 ] This case in iwl_trans_get_rb_size_order was accidently combined with the IWL_AMSDU_12K case. Fix this. Fixes: 7391b2a4f7db ("wifi: iwlwifi: rework firmware error handling") Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250423091408.ef19205aa358.Ifbf89e7b7391cd7070267b7360c53230b3b2c57c@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/iwl-trans.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index ce4954b0d52462..44a249a753ecf6 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -328,6 +328,7 @@ iwl_trans_get_rb_size_order(enum iwl_amsdu_size rb_size) case IWL_AMSDU_4K: return get_order(4 * 1024); case IWL_AMSDU_8K: + return get_order(8 * 1024); case IWL_AMSDU_12K: return get_order(16 * 1024); default: From c5a5553e3b5261cc68fd25d4ce1510f959ab2f1f Mon Sep 17 00:00:00 2001 From: Tao Chen Date: Thu, 24 Apr 2025 00:39:01 +0800 Subject: [PATCH 2278/2924] libbpf: Remove sample_period init in perf_buffer [ Upstream commit 64821d25f05ac468d435e61669ae745ce5a633ea ] It seems that sample_period is not used in perf buffer. Actually, only wakeup_events are meaningful to enable events aggregation for wakeup notification. Remove sample_period setting code to avoid confusion. Fixes: fb84b8224655 ("libbpf: add perf buffer API") Signed-off-by: Tao Chen Signed-off-by: Andrii Nakryiko Acked-by: Jiri Olsa Acked-by: Namhyung Kim Link: https://lore.kernel.org/bpf/20250423163901.2983689-1-chen.dylane@linux.dev Signed-off-by: Sasha Levin --- tools/lib/bpf/libbpf.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 087bb8daa46b5f..2bc0e3bfdd491e 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -13351,7 +13351,6 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, attr.config = PERF_COUNT_SW_BPF_OUTPUT; attr.type = PERF_TYPE_SOFTWARE; attr.sample_type = PERF_SAMPLE_RAW; - attr.sample_period = sample_period; attr.wakeup_events = sample_period; p.attr = &attr; From 07d74c1c976d496bc3b1f0c70e157795e8a7d9d8 Mon Sep 17 00:00:00 2001 From: Jonathan Wiepert Date: Thu, 24 Apr 2025 18:14:57 -0400 Subject: [PATCH 2279/2924] Use thread-safe function pointer in libbpf_print [ Upstream commit 91dbac4076537b464639953c055c460d2bdfc7ea ] This patch fixes a thread safety bug where libbpf_print uses the global variable storing the print function pointer rather than the local variable that had the print function set via __atomic_load_n. Fixes: f1cb927cdb62 ("libbpf: Ensure print callback usage is thread-safe") Signed-off-by: Jonathan Wiepert Signed-off-by: Andrii Nakryiko Acked-by: Mykyta Yatsenko Link: https://lore.kernel.org/bpf/20250424221457.793068-1-jonathan.wiepert@gmail.com Signed-off-by: Sasha Levin --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2bc0e3bfdd491e..147964bb64c8f4 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -286,7 +286,7 @@ void libbpf_print(enum libbpf_print_level level, const char *format, ...) old_errno = errno; va_start(args, format); - __libbpf_pr(level, format, args); + print_fn(level, format, args); va_end(args); errno = old_errno; From 0971b82fb2803ee5468c721586912c5c95eee4ba Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 23 Apr 2025 18:40:02 +0200 Subject: [PATCH 2280/2924] iommu: ipmmu-vmsa: avoid Wformat-security warning [ Upstream commit 33647d0be323f9e2f27cd1e86de5cfd965cec654 ] iommu_device_sysfs_add() requires a constant format string, otherwise a W=1 build produces a warning: drivers/iommu/ipmmu-vmsa.c:1093:62: error: format string is not a string literal (potentially insecure) [-Werror,-Wformat-security] 1093 | ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, dev_name(&pdev->dev)); | ^~~~~~~~~~~~~~~~~~~~ drivers/iommu/ipmmu-vmsa.c:1093:62: note: treat the string as an argument to avoid this 1093 | ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, dev_name(&pdev->dev)); | ^ | "%s", This was an old bug but I saw it now because the code was changed as part of commit d9d3cede4167 ("iommu/ipmmu-vmsa: Register in a sensible order"). Fixes: 7af9a5fdb9e0 ("iommu/ipmmu-vmsa: Use iommu_device_sysfs_add()/remove()") Signed-off-by: Arnd Bergmann Reviewed-by: Geert Uytterhoeven Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20250423164006.2661372-1-arnd@kernel.org Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/ipmmu-vmsa.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index e424b279a8cd9b..90341b24a81155 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -1090,7 +1090,8 @@ static int ipmmu_probe(struct platform_device *pdev) if (mmu->features->has_cache_leaf_nodes && ipmmu_is_root(mmu)) return 0; - ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, dev_name(&pdev->dev)); + ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, "%s", + dev_name(&pdev->dev)); if (ret) return ret; From b4dcc631a6e478ac038cd35738028e3753fb504e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 23 Apr 2025 18:48:16 +0200 Subject: [PATCH 2281/2924] iommu/io-pgtable-arm: dynamically allocate selftest device struct [ Upstream commit fa26198d30f3cdd7627ce47362057848219de765 ] In general a 'struct device' is way too large to be put on the kernel stack. Apparently something just caused it to grow a slightly larger, which pushed the arm_lpae_do_selftests() function over the warning limit in some configurations: drivers/iommu/io-pgtable-arm.c:1423:19: error: stack frame size (1032) exceeds limit (1024) in 'arm_lpae_do_selftests' [-Werror,-Wframe-larger-than] 1423 | static int __init arm_lpae_do_selftests(void) | ^ Change the function to use a dynamically allocated faux_device instead of the on-stack device structure. Fixes: ca25ec247aad ("iommu/io-pgtable-arm: Remove iommu_dev==NULL special case") Link: https://lore.kernel.org/all/ab75a444-22a1-47f5-b3c0-253660395b5a@arm.com/ Signed-off-by: Arnd Bergmann Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20250423164826.2931382-1-arnd@kernel.org Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/io-pgtable-arm.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 7632c80edea63a..396c4f6f5a5bd9 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -1433,15 +1434,17 @@ static int __init arm_lpae_do_selftests(void) }; int i, j, k, pass = 0, fail = 0; - struct device dev; + struct faux_device *dev; struct io_pgtable_cfg cfg = { .tlb = &dummy_tlb_ops, .coherent_walk = true, - .iommu_dev = &dev, }; - /* __arm_lpae_alloc_pages() merely needs dev_to_node() to work */ - set_dev_node(&dev, NUMA_NO_NODE); + dev = faux_device_create("io-pgtable-test", NULL, 0); + if (!dev) + return -ENOMEM; + + cfg.iommu_dev = &dev->dev; for (i = 0; i < ARRAY_SIZE(pgsize); ++i) { for (j = 0; j < ARRAY_SIZE(address_size); ++j) { @@ -1461,6 +1464,8 @@ static int __init arm_lpae_do_selftests(void) } pr_info("selftest: completed with %d PASS %d FAIL\n", pass, fail); + faux_device_destroy(dev); + return fail ? -EFAULT : 0; } subsys_initcall(arm_lpae_do_selftests); From 231123516968a09f2be95b635e6ddd0aa9bf4d44 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 25 Apr 2025 10:08:37 -0300 Subject: [PATCH 2282/2924] iommu: Protect against overflow in iommu_pgsize() [ Upstream commit e586e22974d2b7acbef3c6c3e01b2d5ce69efe33 ] On a 32 bit system calling: iommu_map(0, 0x40000000) When using the AMD V1 page table type with a domain->pgsize of 0xfffff000 causes iommu_pgsize() to miscalculate a result of: size=0x40000000 count=2 count should be 1. This completely corrupts the mapping process. This is because the final test to adjust the pagesize malfunctions when the addition overflows. Use check_add_overflow() to prevent this. Fixes: b1d99dc5f983 ("iommu: Hook up '->unmap_pages' driver callback") Signed-off-by: Jason Gunthorpe Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/0-v1-3ad28fc2e3a3+163327-iommu_overflow_pgsize_jgg@nvidia.com Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/iommu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 5bc2fc969494f5..e4628d96216102 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2399,6 +2399,7 @@ static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, unsigned int pgsize_idx, pgsize_idx_next; unsigned long pgsizes; size_t offset, pgsize, pgsize_next; + size_t offset_end; unsigned long addr_merge = paddr | iova; /* Page sizes supported by the hardware and small enough for @size */ @@ -2439,7 +2440,8 @@ static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, * If size is big enough to accommodate the larger page, reduce * the number of smaller pages. */ - if (offset + pgsize_next <= size) + if (!check_add_overflow(offset, pgsize_next, &offset_end) && + offset_end <= size) size = offset; out_set_count: From f1e573d7dafd9cc03b7fb8988f841716fecc8bb9 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 24 Apr 2025 04:22:38 +0000 Subject: [PATCH 2283/2924] bonding: assign random address if device address is same as bond [ Upstream commit 5c3bf6cba7911f470afd748606be5c03a9512fcc ] This change addresses a MAC address conflict issue in failover scenarios, similar to the problem described in commit a951bc1e6ba5 ("bonding: correct the MAC address for 'follow' fail_over_mac policy"). In fail_over_mac=follow mode, the bonding driver expects the formerly active slave to swap MAC addresses with the newly active slave during failover. However, under certain conditions, two slaves may end up with the same MAC address, which breaks this policy: 1) ip link set eth0 master bond0 -> bond0 adopts eth0's MAC address (MAC0). 2) ip link set eth1 master bond0 -> eth1 is added as a backup with its own MAC (MAC1). 3) ip link set eth0 nomaster -> eth0 is released and restores its MAC (MAC0). -> eth1 becomes the active slave, and bond0 assigns MAC0 to eth1. 4) ip link set eth0 master bond0 -> eth0 is re-added to bond0, now both eth0 and eth1 have MAC0. This results in a MAC address conflict and violates the expected behavior of the failover policy. To fix this, we assign a random MAC address to any newly added slave if its current MAC address matches that of the bond. The original (permanent) MAC address is saved and will be restored when the device is released from the bond. This ensures that each slave has a unique MAC address during failover transitions, preserving the integrity of the fail_over_mac=follow policy. Fixes: 3915c1e8634a ("bonding: Add "follow" option to fail_over_mac") Signed-off-by: Hangbin Liu Acked-by: Jay Vosburgh Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/bonding/bond_main.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 4461220bcd58d8..17ae4b819a5977 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2115,15 +2115,26 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, * set the master's mac address to that of the first slave */ memcpy(ss.__data, bond_dev->dev_addr, bond_dev->addr_len); - ss.ss_family = slave_dev->type; - res = dev_set_mac_address(slave_dev, (struct sockaddr *)&ss, - extack); - if (res) { - slave_err(bond_dev, slave_dev, "Error %d calling set_mac_address\n", res); - goto err_restore_mtu; - } + } else if (bond->params.fail_over_mac == BOND_FOM_FOLLOW && + BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP && + memcmp(slave_dev->dev_addr, bond_dev->dev_addr, bond_dev->addr_len) == 0) { + /* Set slave to random address to avoid duplicate mac + * address in later fail over. + */ + eth_random_addr(ss.__data); + } else { + goto skip_mac_set; } + ss.ss_family = slave_dev->type; + res = dev_set_mac_address(slave_dev, (struct sockaddr *)&ss, extack); + if (res) { + slave_err(bond_dev, slave_dev, "Error %d calling set_mac_address\n", res); + goto err_restore_mtu; + } + +skip_mac_set: + /* set no_addrconf flag before open to prevent IPv6 addrconf */ slave_dev->priv_flags |= IFF_NO_ADDRCONF; From d93198513eada13158d1498aa9f8d0627490c49b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 14 Apr 2025 18:52:36 +0800 Subject: [PATCH 2284/2924] f2fs: clean up w/ fscrypt_is_bounce_page() [ Upstream commit 0c708e35cf26449ca317fcbfc274704660b6d269 ] Just cleanup, no logic changes. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Stable-dep-of: aa1be8dd6416 ("f2fs: fix to detect gcing page in f2fs_is_cp_guaranteed()") Signed-off-by: Sasha Levin --- fs/f2fs/data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 54f89f0ee69b18..5a7888a5923ca0 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -53,7 +53,7 @@ bool f2fs_is_cp_guaranteed(struct page *page) struct inode *inode; struct f2fs_sb_info *sbi; - if (!mapping) + if (fscrypt_is_bounce_page(page)) return false; inode = mapping->host; From 21f3f2b982f2f2ab99bf2770cf0b67363b659d3b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 14 Apr 2025 18:52:37 +0800 Subject: [PATCH 2285/2924] f2fs: fix to detect gcing page in f2fs_is_cp_guaranteed() [ Upstream commit aa1be8dd64163eca4dde7fd2557eb19927a06a47 ] Jan Prusakowski reported a f2fs bug as below: f2fs/007 will hang kernel during testing w/ below configs: kernel 6.12.18 (from pixel-kernel/android16-6.12) export MKFS_OPTIONS="-O encrypt -O extra_attr -O project_quota -O quota" export F2FS_MOUNT_OPTIONS="test_dummy_encryption,discard,fsync_mode=nobarrier,reserve_root=32768,checkpoint_merge,atgc" cat /proc//stack f2fs_wait_on_all_pages+0xa3/0x130 do_checkpoint+0x40c/0x5d0 f2fs_write_checkpoint+0x258/0x550 kill_f2fs_super+0x14f/0x190 deactivate_locked_super+0x30/0xb0 cleanup_mnt+0xba/0x150 task_work_run+0x59/0xa0 syscall_exit_to_user_mode+0x12d/0x130 do_syscall_64+0x57/0x110 entry_SYSCALL_64_after_hwframe+0x76/0x7e cat /sys/kernel/debug/f2fs/status - IO_W (CP: -256, Data: 256, Flush: ( 0 0 1), Discard: ( 0 0)) cmd: 0 undiscard: 0 CP IOs reference count becomes negative. The root cause is: After 4961acdd65c9 ("f2fs: fix to tag gcing flag on page during block migration"), we will tag page w/ gcing flag for raw page of cluster during its migration. However, if the inode is both encrypted and compressed, during ioc_decompress(), it will tag page w/ gcing flag, and it increase F2FS_WB_DATA reference count: - f2fs_write_multi_page - f2fs_write_raw_page - f2fs_write_single_page - do_write_page - f2fs_submit_page_write - WB_DATA_TYPE(bio_page, fio->compressed_page) : bio_page is encrypted, so mapping is NULL, and fio->compressed_page is NULL, it returns F2FS_WB_DATA - inc_page_count(.., F2FS_WB_DATA) Then, during end_io(), it decrease F2FS_WB_CP_DATA reference count: - f2fs_write_end_io - f2fs_compress_write_end_io - fscrypt_pagecache_folio : get raw page from encrypted page - WB_DATA_TYPE(&folio->page, false) : raw page has gcing flag, it returns F2FS_WB_CP_DATA - dec_page_count(.., F2FS_WB_CP_DATA) In order to fix this issue, we need to detect gcing flag in raw page in f2fs_is_cp_guaranteed(). Fixes: 4961acdd65c9 ("f2fs: fix to tag gcing flag on page during block migration") Reported-by: Jan Prusakowski Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 5a7888a5923ca0..81b25dd15a0b9b 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -54,7 +54,7 @@ bool f2fs_is_cp_guaranteed(struct page *page) struct f2fs_sb_info *sbi; if (fscrypt_is_bounce_page(page)) - return false; + return page_private_gcing(fscrypt_pagecache_page(page)); inode = mapping->host; sbi = F2FS_I_SB(inode); From a2b4ddc8d83d78159e3778352cf515971b34169a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 10 Apr 2025 11:10:19 +0800 Subject: [PATCH 2286/2924] f2fs: zone: fix to calculate first_zoned_segno correctly [ Upstream commit dc6d9ef57fcf42fac1b3be4bff5ac5b3f1e8f9f3 ] A zoned device can has both conventional zones and sequential zones, so we should not treat first segment of zoned device as first_zoned_segno, instead, we need to check zone type for each zone during traversing zoned device to find first_zoned_segno. Otherwise, for below case, first_zoned_segno will be 0, which could be wrong. create_null_blk 512 2 1024 1024 mkfs.f2fs -m /dev/nullb0 Testcase: export SCRIPTS_PATH=/share/git/scripts test multiple devices w/ zoned device for ((i=0;i<8;i++)) do { zonesize=$((2<<$i)) conzone=$((4096/$zonesize)) seqzone=$((4096/$zonesize)) $SCRIPTS_PATH/nullblk_create.sh 512 $zonesize $conzone $seqzone mkfs.f2fs -f -m /dev/vdb -c /dev/nullb0 mount /dev/vdb /mnt/f2fs touch /mnt/f2fs/file f2fs_io pinfile set /mnt/f2fs/file $((8589934592*2)) stat /mnt/f2fs/file df cat /proc/fs/f2fs/vdb/segment_info umount /mnt/f2fs $SCRIPTS_PATH/nullblk_remove.sh 0 } done test single zoned device for ((i=0;i<8;i++)) do { zonesize=$((2<<$i)) conzone=$((4096/$zonesize)) seqzone=$((4096/$zonesize)) $SCRIPTS_PATH/nullblk_create.sh 512 $zonesize $conzone $seqzone mkfs.f2fs -f -m /dev/nullb0 mount /dev/nullb0 /mnt/f2fs touch /mnt/f2fs/file f2fs_io pinfile set /mnt/f2fs/file $((8589934592*2)) stat /mnt/f2fs/file df cat /proc/fs/f2fs/nullb0/segment_info umount /mnt/f2fs $SCRIPTS_PATH/nullblk_remove.sh 0 } done Fixes: 9703d69d9d15 ("f2fs: support file pinning for zoned devices") Cc: Daeho Jeong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 36 ++++++++++++++++++++++++++++-------- fs/f2fs/segment.c | 10 +++++----- fs/f2fs/super.c | 41 +++++++++++++++++++++++++++++++++++------ 4 files changed, 69 insertions(+), 20 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 81b25dd15a0b9b..b0b8748ae287f4 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3966,7 +3966,7 @@ static int check_swap_activate(struct swap_info_struct *sis, if ((pblock - SM_I(sbi)->main_blkaddr) % blks_per_sec || nr_pblocks % blks_per_sec || - !f2fs_valid_pinned_area(sbi, pblock)) { + f2fs_is_sequential_zone_area(sbi, pblock)) { bool last_extent = false; not_aligned++; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6dcac5ab041c80..4f34a7d9760a10 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1780,7 +1780,7 @@ struct f2fs_sb_info { unsigned int dirty_device; /* for checkpoint data flush */ spinlock_t dev_lock; /* protect dirty_device */ bool aligned_blksize; /* all devices has the same logical blksize */ - unsigned int first_zoned_segno; /* first zoned segno */ + unsigned int first_seq_zone_segno; /* first segno in sequential zone */ /* For write statistics */ u64 sectors_written_start; @@ -4628,12 +4628,16 @@ F2FS_FEATURE_FUNCS(readonly, RO); F2FS_FEATURE_FUNCS(device_alias, DEVICE_ALIAS); #ifdef CONFIG_BLK_DEV_ZONED -static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, - block_t blkaddr) +static inline bool f2fs_zone_is_seq(struct f2fs_sb_info *sbi, int devi, + unsigned int zone) { - unsigned int zno = blkaddr / sbi->blocks_per_blkz; + return test_bit(zone, FDEV(devi).blkz_seq); +} - return test_bit(zno, FDEV(devi).blkz_seq); +static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, + block_t blkaddr) +{ + return f2fs_zone_is_seq(sbi, devi, blkaddr / sbi->blocks_per_blkz); } #endif @@ -4705,15 +4709,31 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi) return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS; } -static inline bool f2fs_valid_pinned_area(struct f2fs_sb_info *sbi, +static inline bool f2fs_is_sequential_zone_area(struct f2fs_sb_info *sbi, block_t blkaddr) { if (f2fs_sb_has_blkzoned(sbi)) { +#ifdef CONFIG_BLK_DEV_ZONED int devi = f2fs_target_device_index(sbi, blkaddr); - return !bdev_is_zoned(FDEV(devi).bdev); + if (!bdev_is_zoned(FDEV(devi).bdev)) + return false; + + if (f2fs_is_multi_device(sbi)) { + if (blkaddr < FDEV(devi).start_blk || + blkaddr > FDEV(devi).end_blk) { + f2fs_err(sbi, "Invalid block %x", blkaddr); + return false; + } + blkaddr -= FDEV(devi).start_blk; + } + + return f2fs_blkz_is_seq(sbi, devi, blkaddr); +#else + return false; +#endif } - return true; + return false; } static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 396ef71f41e359..2a71e70c9ac977 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2777,7 +2777,7 @@ static int get_new_segment(struct f2fs_sb_info *sbi, if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_PRIOR_CONV || pinning) segno = 0; else - segno = max(sbi->first_zoned_segno, *newseg); + segno = max(sbi->first_seq_zone_segno, *newseg); hint = GET_SEC_FROM_SEG(sbi, segno); } #endif @@ -2789,7 +2789,7 @@ static int get_new_segment(struct f2fs_sb_info *sbi, if (secno >= MAIN_SECS(sbi) && f2fs_sb_has_blkzoned(sbi)) { /* Write only to sequential zones */ if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_ONLY_SEQ) { - hint = GET_SEC_FROM_SEG(sbi, sbi->first_zoned_segno); + hint = GET_SEC_FROM_SEG(sbi, sbi->first_seq_zone_segno); secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); } else secno = find_first_zero_bit(free_i->free_secmap, @@ -2838,9 +2838,9 @@ static int get_new_segment(struct f2fs_sb_info *sbi, /* set it as dirty segment in free segmap */ f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap)); - /* no free section in conventional zone */ + /* no free section in conventional device or conventional zone */ if (new_sec && pinning && - !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) { + f2fs_is_sequential_zone_area(sbi, START_BLOCK(sbi, segno))) { ret = -EAGAIN; goto out_unlock; } @@ -3311,7 +3311,7 @@ int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi) if (f2fs_sb_has_blkzoned(sbi) && err == -EAGAIN && gc_required) { f2fs_down_write(&sbi->gc_lock); - err = f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), + err = f2fs_gc_range(sbi, 0, sbi->first_seq_zone_segno - 1, true, ZONED_PIN_SEC_REQUIRED_COUNT); f2fs_up_write(&sbi->gc_lock); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f087b2b71c8987..7a3bc85df6a7a9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4304,14 +4304,35 @@ static void f2fs_record_error_work(struct work_struct *work) f2fs_record_stop_reason(sbi); } -static inline unsigned int get_first_zoned_segno(struct f2fs_sb_info *sbi) +static inline unsigned int get_first_seq_zone_segno(struct f2fs_sb_info *sbi) { +#ifdef CONFIG_BLK_DEV_ZONED + unsigned int zoneno, total_zones; int devi; - for (devi = 0; devi < sbi->s_ndevs; devi++) - if (bdev_is_zoned(FDEV(devi).bdev)) - return GET_SEGNO(sbi, FDEV(devi).start_blk); - return 0; + if (!f2fs_sb_has_blkzoned(sbi)) + return NULL_SEGNO; + + for (devi = 0; devi < sbi->s_ndevs; devi++) { + if (!bdev_is_zoned(FDEV(devi).bdev)) + continue; + + total_zones = GET_ZONE_FROM_SEG(sbi, FDEV(devi).total_segments); + + for (zoneno = 0; zoneno < total_zones; zoneno++) { + unsigned int segs, blks; + + if (!f2fs_zone_is_seq(sbi, devi, zoneno)) + continue; + + segs = GET_SEG_FROM_SEC(sbi, + zoneno * sbi->secs_per_zone); + blks = SEGS_TO_BLKS(sbi, segs); + return GET_SEGNO(sbi, FDEV(devi).start_blk + blks); + } + } +#endif + return NULL_SEGNO; } static int f2fs_scan_devices(struct f2fs_sb_info *sbi) @@ -4348,6 +4369,14 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) #endif for (i = 0; i < max_devices; i++) { + if (max_devices == 1) { + FDEV(i).total_segments = + le32_to_cpu(raw_super->segment_count_main); + FDEV(i).start_blk = 0; + FDEV(i).end_blk = FDEV(i).total_segments * + BLKS_PER_SEG(sbi); + } + if (i == 0) FDEV(0).bdev_file = sbi->sb->s_bdev_file; else if (!RDEV(i).path[0]) @@ -4718,7 +4747,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) sbi->sectors_written_start = f2fs_get_sectors_written(sbi); /* get segno of first zoned block device */ - sbi->first_zoned_segno = get_first_zoned_segno(sbi); + sbi->first_seq_zone_segno = get_first_seq_zone_segno(sbi); /* Read accumulated write IO statistics if exists */ seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); From 4f09940b5581e44069eb31a66cf7f05c3c35ed04 Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Fri, 25 Apr 2025 12:48:03 -0700 Subject: [PATCH 2287/2924] scsi: lpfc: Avoid potential ndlp use-after-free in dev_loss_tmo_callbk [ Upstream commit b5162bb6aa1ec04dff4509b025883524b6d7e7ca ] Smatch detected a potential use-after-free of an ndlp oject in dev_loss_tmo_callbk during driver unload or fatal error handling. Fix by reordering code to avoid potential use-after-free if initial nodelist reference has been previously removed. Fixes: 4281f44ea8bf ("scsi: lpfc: Prevent NDLP reference count underflow in dev_loss_tmo callback") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-scsi/41c1d855-9eb5-416f-ac12-8b61929201a3@stanley.mountain/ Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20250425194806.3585-6-justintee8345@gmail.com Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/lpfc/lpfc_hbadisc.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 179be6c5a43e07..c2ec4db6728697 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -161,7 +161,7 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) struct lpfc_hba *phba; struct lpfc_work_evt *evtp; unsigned long iflags; - bool nvme_reg = false; + bool drop_initial_node_ref = false; ndlp = ((struct lpfc_rport_data *)rport->dd_data)->pnode; if (!ndlp) @@ -188,8 +188,13 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) spin_lock_irqsave(&ndlp->lock, iflags); ndlp->rport = NULL; - if (ndlp->fc4_xpt_flags & NVME_XPT_REGD) - nvme_reg = true; + /* Only 1 thread can drop the initial node reference. + * If not registered for NVME and NLP_DROPPED flag is + * clear, remove the initial reference. + */ + if (!(ndlp->fc4_xpt_flags & NVME_XPT_REGD)) + if (!test_and_set_bit(NLP_DROPPED, &ndlp->nlp_flag)) + drop_initial_node_ref = true; /* The scsi_transport is done with the rport so lpfc cannot * call to unregister. @@ -200,13 +205,16 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) /* If NLP_XPT_REGD was cleared in lpfc_nlp_unreg_node, * unregister calls were made to the scsi and nvme * transports and refcnt was already decremented. Clear - * the NLP_XPT_REGD flag only if the NVME Rport is + * the NLP_XPT_REGD flag only if the NVME nrport is * confirmed unregistered. */ - if (!nvme_reg && ndlp->fc4_xpt_flags & NLP_XPT_REGD) { - ndlp->fc4_xpt_flags &= ~NLP_XPT_REGD; + if (ndlp->fc4_xpt_flags & NLP_XPT_REGD) { + if (!(ndlp->fc4_xpt_flags & NVME_XPT_REGD)) + ndlp->fc4_xpt_flags &= ~NLP_XPT_REGD; spin_unlock_irqrestore(&ndlp->lock, iflags); - lpfc_nlp_put(ndlp); /* may free ndlp */ + + /* Release scsi transport reference */ + lpfc_nlp_put(ndlp); } else { spin_unlock_irqrestore(&ndlp->lock, iflags); } @@ -214,14 +222,8 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) spin_unlock_irqrestore(&ndlp->lock, iflags); } - /* Only 1 thread can drop the initial node reference. If - * another thread has set NLP_DROPPED, this thread is done. - */ - if (nvme_reg || test_bit(NLP_DROPPED, &ndlp->nlp_flag)) - return; - - set_bit(NLP_DROPPED, &ndlp->nlp_flag); - lpfc_nlp_put(ndlp); + if (drop_initial_node_ref) + lpfc_nlp_put(ndlp); return; } From 31224199316f488c8385c5dcdccf7bd80f675b81 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Wed, 23 Apr 2025 13:32:28 -0500 Subject: [PATCH 2288/2924] scsi: smartpqi: Fix smp_processor_id() call trace for preemptible kernels [ Upstream commit 42d033cf4b517e91c187ad2fbd7b30fdc6d2d62c ] Correct kernel call trace when calling smp_processor_id() when called in preemptible kernels by using raw_smp_processor_id(). smp_processor_id() checks to see if preemption is disabled and if not, issue an error message followed by a call to dump_stack(). Brief example of call trace: kernel: check_preemption_disabled: 436 callbacks suppressed kernel: BUG: using smp_processor_id() in preemptible [00000000] code: kworker/u1025:0/2354 kernel: caller is pqi_scsi_queue_command+0x183/0x310 [smartpqi] kernel: CPU: 129 PID: 2354 Comm: kworker/u1025:0 kernel: ... kernel: Workqueue: writeback wb_workfn (flush-253:0) kernel: Call Trace: kernel: kernel: dump_stack_lvl+0x34/0x48 kernel: check_preemption_disabled+0xdd/0xe0 kernel: pqi_scsi_queue_command+0x183/0x310 [smartpqi] kernel: ... Fixes: 283dcc1b142e ("scsi: smartpqi: add counter for parity write stream requests") Reviewed-by: Scott Benesh Reviewed-by: Mike McGowen Tested-by: Don Brace Signed-off-by: Yi Zhang Signed-off-by: Don Brace Link: https://lore.kernel.org/r/20250423183229.538572-5-don.brace@microchip.com Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/smartpqi/smartpqi_init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 8a26eca4fdc9b8..6c9dec7e3128fd 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -5990,7 +5990,7 @@ static bool pqi_is_parity_write_stream(struct pqi_ctrl_info *ctrl_info, pqi_stream_data->next_lba = rmd.first_block + rmd.block_cnt; pqi_stream_data->last_accessed = jiffies; - per_cpu_ptr(device->raid_io_stats, smp_processor_id())->write_stream_cnt++; + per_cpu_ptr(device->raid_io_stats, raw_smp_processor_id())->write_stream_cnt++; return true; } @@ -6069,7 +6069,7 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm rc = pqi_raid_bypass_submit_scsi_cmd(ctrl_info, device, scmd, queue_group); if (rc == 0 || rc == SCSI_MLQUEUE_HOST_BUSY) { raid_bypassed = true; - per_cpu_ptr(device->raid_io_stats, smp_processor_id())->raid_bypass_cnt++; + per_cpu_ptr(device->raid_io_stats, raw_smp_processor_id())->raid_bypass_cnt++; } } if (!raid_bypassed) From f22ead0ca9ddee28be8f072e748c4dd1f5653b53 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 28 Apr 2025 11:22:06 +0100 Subject: [PATCH 2289/2924] crypto/krb5: Fix change to use SG miter to use offset [ Upstream commit eed848871c96d4b5a7b06307755b75abd0cc7a06 ] The recent patch to make the rfc3961 simplified code use sg_miter rather than manually walking the scatterlist to hash the contents of a buffer described by that scatterlist failed to take the starting offset into account. This is indicated by the selftests reporting: krb5: Running aes128-cts-hmac-sha256-128 mic krb5: !!! TESTFAIL crypto/krb5/selftest.c:446 krb5: MIC mismatch Fix this by calling sg_miter_skip() before doing the loop to advance by the offset. This only affects packet signing modes and not full encryption in RxGK because, for full encryption, the message digest is handled inside the authenc and krb5enc drivers. Note: Nothing in linus/master uses the krb5lib, though the bug is there. It is used by AF_RXRPC's RxGK implementation in -next, no need to backport. Fixes: da6f9bf40ac2 ("crypto: krb5 - Use SG miter instead of doing it by hand") Reported-by: Marc Dionne Signed-off-by: David Howells cc: Chuck Lever cc: Simon Horman cc: linux-afs@lists.infradead.org Acked-by: Herbert Xu Link: https://patch.msgid.link/3824017.1745835726@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- crypto/krb5/rfc3961_simplified.c | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/krb5/rfc3961_simplified.c b/crypto/krb5/rfc3961_simplified.c index 79180d28baa9fb..e49cbdec7c404d 100644 --- a/crypto/krb5/rfc3961_simplified.c +++ b/crypto/krb5/rfc3961_simplified.c @@ -89,6 +89,7 @@ int crypto_shash_update_sg(struct shash_desc *desc, struct scatterlist *sg, sg_miter_start(&miter, sg, sg_nents(sg), SG_MITER_FROM_SG | SG_MITER_LOCAL); + sg_miter_skip(&miter, offset); for (i = 0; i < len; i += n) { sg_miter_next(&miter); n = min(miter.length, len - i); From a12b3932683a960ffdde1f4f1332eda349d0b04b Mon Sep 17 00:00:00 2001 From: "T.J. Mercier" Date: Mon, 28 Apr 2025 18:02:54 +0000 Subject: [PATCH 2290/2924] selftests/bpf: Fix kmem_cache iterator draining [ Upstream commit 38d976c32d85ef12dcd2b8a231196f7049548477 ] The closing parentheses around the read syscall is misplaced, causing single byte reads from the iterator instead of buf sized reads. While the end result is the same, many more read calls than necessary are performed. $ tools/testing/selftests/bpf/vmtest.sh "./test_progs -t kmem_cache_iter" 145/1 kmem_cache_iter/check_task_struct:OK 145/2 kmem_cache_iter/check_slabinfo:OK 145/3 kmem_cache_iter/open_coded_iter:OK 145 kmem_cache_iter:OK Summary: 1/3 PASSED, 0 SKIPPED, 0 FAILED Fixes: a496d0cdc84d ("selftests/bpf: Add a test for kmem_cache_iter") Signed-off-by: T.J. Mercier Signed-off-by: Martin KaFai Lau Acked-by: Song Liu Acked-by: Namhyung Kim Link: https://patch.msgid.link/20250428180256.1482899-1-tjmercier@google.com Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c index 8e13a3416a21d2..1de14b111931aa 100644 --- a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c @@ -104,7 +104,7 @@ void test_kmem_cache_iter(void) goto destroy; memset(buf, 0, sizeof(buf)); - while (read(iter_fd, buf, sizeof(buf) > 0)) { + while (read(iter_fd, buf, sizeof(buf)) > 0) { /* Read out all contents */ printf("%s", buf); } From d19aa5c648eebb0e243f9f8e58d30a56da91e186 Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Wed, 30 Apr 2025 12:08:20 +0000 Subject: [PATCH 2291/2924] libbpf: Use proper errno value in linker [ Upstream commit 358b1c0f56ebb6996fcec7dcdcf6bae5dcbc8b6c ] Return values of the linker_append_sec_data() and the linker_append_elf_relos() functions are propagated all the way up to users of libbpf API. In some error cases these functions return -1 which will be seen as -EPERM from user's point of view. Instead, return a more reasonable -EINVAL. Fixes: faf6ed321cf6 ("libbpf: Add BPF static linker APIs") Signed-off-by: Anton Protopopov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250430120820.2262053-1-a.s.protopopov@gmail.com Signed-off-by: Sasha Levin --- tools/lib/bpf/linker.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 56f5068e2ebab0..a469e5d4fee70e 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -1376,7 +1376,7 @@ static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj } else { if (!secs_match(dst_sec, src_sec)) { pr_warn("ELF sections %s are incompatible\n", src_sec->sec_name); - return -1; + return -EINVAL; } /* "license" and "version" sections are deduped */ @@ -2223,7 +2223,7 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob } } else if (!secs_match(dst_sec, src_sec)) { pr_warn("sections %s are not compatible\n", src_sec->sec_name); - return -1; + return -EINVAL; } /* shdr->sh_link points to SYMTAB */ From a6b02ec0a44da529fbf75f13590d9275d1fc0b61 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 28 Apr 2025 17:44:02 +0200 Subject: [PATCH 2292/2924] bpf: Allow XDP dev-bound programs to perform XDP_REDIRECT into maps [ Upstream commit 714070c4cb7a10ff57450a618a936775f3036245 ] In the current implementation if the program is dev-bound to a specific device, it will not be possible to perform XDP_REDIRECT into a DEVMAP or CPUMAP even if the program is running in the driver NAPI context and it is not attached to any map entry. This seems in contrast with the explanation available in bpf_prog_map_compatible routine. Fix the issue introducing __bpf_prog_map_compatible utility routine in order to avoid bpf_prog_is_dev_bound() check running bpf_check_tail_call() at program load time (bpf_prog_select_runtime()). Continue forbidding to attach a dev-bound program to XDP maps (BPF_MAP_TYPE_PROG_ARRAY, BPF_MAP_TYPE_DEVMAP and BPF_MAP_TYPE_CPUMAP). Fixes: 3d76a4d3d4e59 ("bpf: XDP metadata RX kfuncs") Signed-off-by: Lorenzo Bianconi Signed-off-by: Martin KaFai Lau Acked-by: Stanislav Fomichev Signed-off-by: Sasha Levin --- kernel/bpf/core.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index ba6b6118cf5040..a3e57168842119 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2358,8 +2358,8 @@ static unsigned int __bpf_prog_ret0_warn(const void *ctx, return 0; } -bool bpf_prog_map_compatible(struct bpf_map *map, - const struct bpf_prog *fp) +static bool __bpf_prog_map_compatible(struct bpf_map *map, + const struct bpf_prog *fp) { enum bpf_prog_type prog_type = resolve_prog_type(fp); bool ret; @@ -2368,14 +2368,6 @@ bool bpf_prog_map_compatible(struct bpf_map *map, if (fp->kprobe_override) return false; - /* XDP programs inserted into maps are not guaranteed to run on - * a particular netdev (and can run outside driver context entirely - * in the case of devmap and cpumap). Until device checks - * are implemented, prohibit adding dev-bound programs to program maps. - */ - if (bpf_prog_is_dev_bound(aux)) - return false; - spin_lock(&map->owner.lock); if (!map->owner.type) { /* There's no owner yet where we could check for @@ -2409,6 +2401,19 @@ bool bpf_prog_map_compatible(struct bpf_map *map, return ret; } +bool bpf_prog_map_compatible(struct bpf_map *map, const struct bpf_prog *fp) +{ + /* XDP programs inserted into maps are not guaranteed to run on + * a particular netdev (and can run outside driver context entirely + * in the case of devmap and cpumap). Until device checks + * are implemented, prohibit adding dev-bound programs to program maps. + */ + if (bpf_prog_is_dev_bound(fp->aux)) + return false; + + return __bpf_prog_map_compatible(map, fp); +} + static int bpf_check_tail_call(const struct bpf_prog *fp) { struct bpf_prog_aux *aux = fp->aux; @@ -2421,7 +2426,7 @@ static int bpf_check_tail_call(const struct bpf_prog *fp) if (!map_type_contains_progs(map)) continue; - if (!bpf_prog_map_compatible(map, fp)) { + if (!__bpf_prog_map_compatible(map, fp)) { ret = -EINVAL; goto out; } From 43d262c2ab19f8b1f28ea2e47f71613a02744e0e Mon Sep 17 00:00:00 2001 From: Huajian Yang Date: Thu, 17 Apr 2025 17:29:53 +0800 Subject: [PATCH 2293/2924] netfilter: bridge: Move specific fragmented packet to slow_path instead of dropping it [ Upstream commit aa04c6f45b9224b949aa35d4fa5f8d0ba07b23d4 ] The config NF_CONNTRACK_BRIDGE will change the bridge forwarding for fragmented packets. The original bridge does not know that it is a fragmented packet and forwards it directly, after NF_CONNTRACK_BRIDGE is enabled, function nf_br_ip_fragment and br_ip6_fragment will check the headroom. In original br_forward, insufficient headroom of skb may indeed exist, but there's still a way to save the skb in the device driver after dev_queue_xmit.So droping the skb will change the original bridge forwarding in some cases. Fixes: 3c171f496ef5 ("netfilter: bridge: add connection tracking system") Signed-off-by: Huajian Yang Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/bridge/netfilter/nf_conntrack_bridge.c | 12 ++++++------ net/ipv6/netfilter.c | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index 816bb0fde718ed..6482de4d875092 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -60,19 +60,19 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk, struct ip_fraglist_iter iter; struct sk_buff *frag; - if (first_len - hlen > mtu || - skb_headroom(skb) < ll_rs) + if (first_len - hlen > mtu) goto blackhole; - if (skb_cloned(skb)) + if (skb_cloned(skb) || + skb_headroom(skb) < ll_rs) goto slow_path; skb_walk_frags(skb, frag) { - if (frag->len > mtu || - skb_headroom(frag) < hlen + ll_rs) + if (frag->len > mtu) goto blackhole; - if (skb_shared(frag)) + if (skb_shared(frag) || + skb_headroom(frag) < hlen + ll_rs) goto slow_path; } diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 581ce055bf520f..4541836ee3da20 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -164,20 +164,20 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, struct ip6_fraglist_iter iter; struct sk_buff *frag2; - if (first_len - hlen > mtu || - skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) + if (first_len - hlen > mtu) goto blackhole; - if (skb_cloned(skb)) + if (skb_cloned(skb) || + skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) goto slow_path; skb_walk_frags(skb, frag2) { - if (frag2->len > mtu || - skb_headroom(frag2) < (hlen + hroom + sizeof(struct frag_hdr))) + if (frag2->len > mtu) goto blackhole; /* Partially cloned skb? */ - if (skb_shared(frag2)) + if (skb_shared(frag2) || + skb_headroom(frag2) < (hlen + hroom + sizeof(struct frag_hdr))) goto slow_path; } From 4d6ae1842cb29719e2866470aadf4742382acfea Mon Sep 17 00:00:00 2001 From: Zhongqiu Duan Date: Thu, 17 Apr 2025 15:49:30 +0000 Subject: [PATCH 2294/2924] netfilter: nft_quota: match correctly when the quota just depleted [ Upstream commit bfe7cfb65c753952735c3eed703eba9a8b96a18d ] The xt_quota compares skb length with remaining quota, but the nft_quota compares it with consumed bytes. The xt_quota can match consumed bytes up to quota at maximum. But the nft_quota break match when consumed bytes equal to quota. i.e., nft_quota match consumed bytes in [0, quota - 1], not [0, quota]. Fixes: 795595f68d6c ("netfilter: nft_quota: dump consumed quota") Signed-off-by: Zhongqiu Duan Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_quota.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index 9b2d7463d3d326..df0798da2329b9 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -19,10 +19,16 @@ struct nft_quota { }; static inline bool nft_overquota(struct nft_quota *priv, - const struct sk_buff *skb) + const struct sk_buff *skb, + bool *report) { - return atomic64_add_return(skb->len, priv->consumed) >= - atomic64_read(&priv->quota); + u64 consumed = atomic64_add_return(skb->len, priv->consumed); + u64 quota = atomic64_read(&priv->quota); + + if (report) + *report = consumed >= quota; + + return consumed > quota; } static inline bool nft_quota_invert(struct nft_quota *priv) @@ -34,7 +40,7 @@ static inline void nft_quota_do_eval(struct nft_quota *priv, struct nft_regs *regs, const struct nft_pktinfo *pkt) { - if (nft_overquota(priv, pkt->skb) ^ nft_quota_invert(priv)) + if (nft_overquota(priv, pkt->skb, NULL) ^ nft_quota_invert(priv)) regs->verdict.code = NFT_BREAK; } @@ -51,13 +57,13 @@ static void nft_quota_obj_eval(struct nft_object *obj, const struct nft_pktinfo *pkt) { struct nft_quota *priv = nft_obj_data(obj); - bool overquota; + bool overquota, report; - overquota = nft_overquota(priv, pkt->skb); + overquota = nft_overquota(priv, pkt->skb, &report); if (overquota ^ nft_quota_invert(priv)) regs->verdict.code = NFT_BREAK; - if (overquota && + if (report && !test_and_set_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags)) nft_obj_notify(nft_net(pkt), obj->key.table, obj, 0, 0, NFT_MSG_NEWOBJ, 0, nft_pf(pkt), 0, GFP_ATOMIC); From 43fe1181f738295624696ae9ff611790edb65b5e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 22 Apr 2025 21:52:43 +0200 Subject: [PATCH 2295/2924] netfilter: nft_set_pipapo: prevent overflow in lookup table allocation [ Upstream commit 4c5c6aa9967dbe55bd017bb509885928d0f31206 ] When calculating the lookup table size, ensure the following multiplication does not overflow: - desc->field_len[] maximum value is U8_MAX multiplied by NFT_PIPAPO_GROUPS_PER_BYTE(f) that can be 2, worst case. - NFT_PIPAPO_BUCKETS(f->bb) is 2^8, worst case. - sizeof(unsigned long), from sizeof(*f->lt), lt in struct nft_pipapo_field. Then, use check_mul_overflow() to multiply by bucket size and then use check_add_overflow() to the alignment for avx2 (if needed). Finally, add lt_size_check_overflow() helper and use it to consolidate this. While at it, replace leftover allocation using the GFP_KERNEL to GFP_KERNEL_ACCOUNT for consistency, in pipapo_resize(). Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Signed-off-by: Pablo Neira Ayuso Reviewed-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_set_pipapo.c | 58 ++++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 7be342b495f5f7..0529e4ef752070 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -683,6 +683,30 @@ static int pipapo_realloc_mt(struct nft_pipapo_field *f, return 0; } + +/** + * lt_calculate_size() - Get storage size for lookup table with overflow check + * @groups: Amount of bit groups + * @bb: Number of bits grouped together in lookup table buckets + * @bsize: Size of each bucket in lookup table, in longs + * + * Return: allocation size including alignment overhead, negative on overflow + */ +static ssize_t lt_calculate_size(unsigned int groups, unsigned int bb, + unsigned int bsize) +{ + ssize_t ret = groups * NFT_PIPAPO_BUCKETS(bb) * sizeof(long); + + if (check_mul_overflow(ret, bsize, &ret)) + return -1; + if (check_add_overflow(ret, NFT_PIPAPO_ALIGN_HEADROOM, &ret)) + return -1; + if (ret > INT_MAX) + return -1; + + return ret; +} + /** * pipapo_resize() - Resize lookup or mapping table, or both * @f: Field containing lookup and mapping tables @@ -701,6 +725,7 @@ static int pipapo_resize(struct nft_pipapo_field *f, long *new_lt = NULL, *new_p, *old_lt = f->lt, *old_p; unsigned int new_bucket_size, copy; int group, bucket, err; + ssize_t lt_size; if (rules >= NFT_PIPAPO_RULE0_MAX) return -ENOSPC; @@ -719,10 +744,11 @@ static int pipapo_resize(struct nft_pipapo_field *f, else copy = new_bucket_size; - new_lt = kvzalloc(f->groups * NFT_PIPAPO_BUCKETS(f->bb) * - new_bucket_size * sizeof(*new_lt) + - NFT_PIPAPO_ALIGN_HEADROOM, - GFP_KERNEL); + lt_size = lt_calculate_size(f->groups, f->bb, new_bucket_size); + if (lt_size < 0) + return -ENOMEM; + + new_lt = kvzalloc(lt_size, GFP_KERNEL_ACCOUNT); if (!new_lt) return -ENOMEM; @@ -907,7 +933,7 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f) { unsigned int groups, bb; unsigned long *new_lt; - size_t lt_size; + ssize_t lt_size; lt_size = f->groups * NFT_PIPAPO_BUCKETS(f->bb) * f->bsize * sizeof(*f->lt); @@ -917,15 +943,17 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f) groups = f->groups * 2; bb = NFT_PIPAPO_GROUP_BITS_LARGE_SET; - lt_size = groups * NFT_PIPAPO_BUCKETS(bb) * f->bsize * - sizeof(*f->lt); + lt_size = lt_calculate_size(groups, bb, f->bsize); + if (lt_size < 0) + return; } else if (f->bb == NFT_PIPAPO_GROUP_BITS_LARGE_SET && lt_size < NFT_PIPAPO_LT_SIZE_LOW) { groups = f->groups / 2; bb = NFT_PIPAPO_GROUP_BITS_SMALL_SET; - lt_size = groups * NFT_PIPAPO_BUCKETS(bb) * f->bsize * - sizeof(*f->lt); + lt_size = lt_calculate_size(groups, bb, f->bsize); + if (lt_size < 0) + return; /* Don't increase group width if the resulting lookup table size * would exceed the upper size threshold for a "small" set. @@ -936,7 +964,7 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f) return; } - new_lt = kvzalloc(lt_size + NFT_PIPAPO_ALIGN_HEADROOM, GFP_KERNEL_ACCOUNT); + new_lt = kvzalloc(lt_size, GFP_KERNEL_ACCOUNT); if (!new_lt) return; @@ -1451,13 +1479,15 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) for (i = 0; i < old->field_count; i++) { unsigned long *new_lt; + ssize_t lt_size; memcpy(dst, src, offsetof(struct nft_pipapo_field, lt)); - new_lt = kvzalloc(src->groups * NFT_PIPAPO_BUCKETS(src->bb) * - src->bsize * sizeof(*dst->lt) + - NFT_PIPAPO_ALIGN_HEADROOM, - GFP_KERNEL_ACCOUNT); + lt_size = lt_calculate_size(src->groups, src->bb, src->bsize); + if (lt_size < 0) + goto out_lt; + + new_lt = kvzalloc(lt_size, GFP_KERNEL_ACCOUNT); if (!new_lt) goto out_lt; From fc096a0cd2017cb0aa1e7fb83131410af9283910 Mon Sep 17 00:00:00 2001 From: Vlad Dumitrescu Date: Mon, 28 Apr 2025 14:30:18 +0300 Subject: [PATCH 2296/2924] IB/cm: Drop lockdep assert and WARN when freeing old msg [ Upstream commit 7590649ee7af381a9d1153143026dec124c5798e ] The send completion handler can run after cm_id has advanced to another message. The cm_id lock is not needed in this case, but a recent change re-used cm_free_priv_msg(), which asserts that the lock is held and WARNs if the cm_id's currently outstanding msg is different than the one being freed. Fixes: 1e5159219076 ("IB/cm: Do not hold reference on cm_id unless needed") Signed-off-by: Vlad Dumitrescu Reviewed-by: Sean Hefty Link: https://patch.msgid.link/0c364c29142f72b7875fdeba51f3c9bd6ca863ee.1745839788.git.leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/core/cm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index effa53dd680027..e64cbd034a2a19 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3786,7 +3786,8 @@ static void cm_process_send_error(struct cm_id_private *cm_id_priv, spin_lock_irq(&cm_id_priv->lock); if (msg != cm_id_priv->msg) { spin_unlock_irq(&cm_id_priv->lock); - cm_free_priv_msg(msg); + cm_free_msg(msg); + cm_deref_id(cm_id_priv); return; } cm_free_priv_msg(msg); From 0a7790cbba654e925243571cf2f24d61603d3ed3 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Mon, 28 Apr 2025 14:34:07 +0300 Subject: [PATCH 2297/2924] RDMA/mlx5: Fix error flow upon firmware failure for RQ destruction [ Upstream commit 5d2ea5aebbb2f3ebde4403f9c55b2b057e5dd2d6 ] Upon RQ destruction if the firmware command fails which is the last resource to be destroyed some SW resources were already cleaned regardless of the failure. Now properly rollback the object to its original state upon such failure. In order to avoid a use-after free in case someone tries to destroy the object again, which results in the following kernel trace: refcount_t: underflow; use-after-free. WARNING: CPU: 0 PID: 37589 at lib/refcount.c:28 refcount_warn_saturate+0xf4/0x148 Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) rfkill mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) psample mlxfw(OE) mlx_compat(OE) macsec tls pci_hyperv_intf sunrpc vfat fat virtio_net net_failover failover fuse loop nfnetlink vsock_loopback vmw_vsock_virtio_transport_common vmw_vsock_vmci_transport vmw_vmci vsock xfs crct10dif_ce ghash_ce sha2_ce sha256_arm64 sha1_ce virtio_console virtio_gpu virtio_blk virtio_dma_buf virtio_mmio dm_mirror dm_region_hash dm_log dm_mod xpmem(OE) CPU: 0 UID: 0 PID: 37589 Comm: python3 Kdump: loaded Tainted: G OE ------- --- 6.12.0-54.el10.aarch64 #1 Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : refcount_warn_saturate+0xf4/0x148 lr : refcount_warn_saturate+0xf4/0x148 sp : ffff80008b81b7e0 x29: ffff80008b81b7e0 x28: ffff000133d51600 x27: 0000000000000001 x26: 0000000000000000 x25: 00000000ffffffea x24: ffff00010ae80f00 x23: ffff00010ae80f80 x22: ffff0000c66e5d08 x21: 0000000000000000 x20: ffff0000c66e0000 x19: ffff00010ae80340 x18: 0000000000000006 x17: 0000000000000000 x16: 0000000000000020 x15: ffff80008b81b37f x14: 0000000000000000 x13: 2e656572662d7265 x12: ffff80008283ef78 x11: ffff80008257efd0 x10: ffff80008283efd0 x9 : ffff80008021ed90 x8 : 0000000000000001 x7 : 00000000000bffe8 x6 : c0000000ffff7fff x5 : ffff0001fb8e3408 x4 : 0000000000000000 x3 : ffff800179993000 x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff000133d51600 Call trace: refcount_warn_saturate+0xf4/0x148 mlx5_core_put_rsc+0x88/0xa0 [mlx5_ib] mlx5_core_destroy_rq_tracked+0x64/0x98 [mlx5_ib] mlx5_ib_destroy_wq+0x34/0x80 [mlx5_ib] ib_destroy_wq_user+0x30/0xc0 [ib_core] uverbs_free_wq+0x28/0x58 [ib_uverbs] destroy_hw_idr_uobject+0x34/0x78 [ib_uverbs] uverbs_destroy_uobject+0x48/0x240 [ib_uverbs] __uverbs_cleanup_ufile+0xd4/0x1a8 [ib_uverbs] uverbs_destroy_ufile_hw+0x48/0x120 [ib_uverbs] ib_uverbs_close+0x2c/0x100 [ib_uverbs] __fput+0xd8/0x2f0 __fput_sync+0x50/0x70 __arm64_sys_close+0x40/0x90 invoke_syscall.constprop.0+0x74/0xd0 do_el0_svc+0x48/0xe8 el0_svc+0x44/0x1d0 el0t_64_sync_handler+0x120/0x130 el0t_64_sync+0x1a4/0x1a8 Fixes: e2013b212f9f ("net/mlx5_core: Add RQ and SQ event handling") Signed-off-by: Patrisious Haddad Link: https://patch.msgid.link/3181433ccdd695c63560eeeb3f0c990961732101.1745839855.git.leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/qpc.c | 30 ++++++++++++++++++++++++++++-- include/linux/mlx5/driver.h | 1 + 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c index d3dcc272200afa..146d03ae40bd9f 100644 --- a/drivers/infiniband/hw/mlx5/qpc.c +++ b/drivers/infiniband/hw/mlx5/qpc.c @@ -21,8 +21,10 @@ mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn) spin_lock_irqsave(&table->lock, flags); common = radix_tree_lookup(&table->tree, rsn); - if (common) + if (common && !common->invalid) refcount_inc(&common->refcount); + else + common = NULL; spin_unlock_irqrestore(&table->lock, flags); @@ -178,6 +180,18 @@ static int create_resource_common(struct mlx5_ib_dev *dev, return 0; } +static void modify_resource_common_state(struct mlx5_ib_dev *dev, + struct mlx5_core_qp *qp, + bool invalid) +{ + struct mlx5_qp_table *table = &dev->qp_table; + unsigned long flags; + + spin_lock_irqsave(&table->lock, flags); + qp->common.invalid = invalid; + spin_unlock_irqrestore(&table->lock, flags); +} + static void destroy_resource_common(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp) { @@ -609,8 +623,20 @@ int mlx5_core_create_rq_tracked(struct mlx5_ib_dev *dev, u32 *in, int inlen, int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev, struct mlx5_core_qp *rq) { + int ret; + + /* The rq destruction can be called again in case it fails, hence we + * mark the common resource as invalid and only once FW destruction + * is completed successfully we actually destroy the resources. + */ + modify_resource_common_state(dev, rq, true); + ret = destroy_rq_tracked(dev, rq->qpn, rq->uid); + if (ret) { + modify_resource_common_state(dev, rq, false); + return ret; + } destroy_resource_common(dev, rq); - return destroy_rq_tracked(dev, rq->qpn, rq->uid); + return 0; } static void destroy_sq_tracked(struct mlx5_ib_dev *dev, u32 sqn, u16 uid) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d1dfbad9a44730..e6ba8f4f4bd1f4 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -398,6 +398,7 @@ struct mlx5_core_rsc_common { enum mlx5_res_type res; refcount_t refcount; struct completion free; + bool invalid; }; struct mlx5_uars_page { From 1340a95ec972a45272cfda2a5856372cdf0cf00a Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Fri, 2 May 2025 19:30:31 +0000 Subject: [PATCH 2298/2924] bpf: Fix uninitialized values in BPF_{CORE,PROBE}_READ [ Upstream commit 41d4ce6df3f4945341ec509a840cc002a413b6cc ] With the latest LLVM bpf selftests build will fail with the following error message: progs/profiler.inc.h:710:31: error: default initialization of an object of type 'typeof ((parent_task)->real_cred->uid.val)' (aka 'const unsigned int') leaves the object uninitialized and is incompatible with C++ [-Werror,-Wdefault-const-init-unsafe] 710 | proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val); | ^ tools/testing/selftests/bpf/tools/include/bpf/bpf_core_read.h:520:35: note: expanded from macro 'BPF_CORE_READ' 520 | ___type((src), a, ##__VA_ARGS__) __r; \ | ^ This happens because BPF_CORE_READ (and other macro) declare the variable __r using the ___type macro which can inherit const modifier from intermediate types. Fix this by using __typeof_unqual__, when supported. (And when it is not supported, the problem shouldn't appear, as older compilers haven't complained.) Fixes: 792001f4f7aa ("libbpf: Add user-space variants of BPF_CORE_READ() family of macros") Fixes: a4b09a9ef945 ("libbpf: Add non-CO-RE variants of BPF_CORE_READ() macro family") Signed-off-by: Anton Protopopov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250502193031.3522715-1-a.s.protopopov@gmail.com Signed-off-by: Sasha Levin --- tools/lib/bpf/bpf_core_read.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index c0e13cdf966077..b997c68bd94536 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -388,7 +388,13 @@ extern void *bpf_rdonly_cast(const void *obj, __u32 btf_id) __ksym __weak; #define ___arrow10(a, b, c, d, e, f, g, h, i, j) a->b->c->d->e->f->g->h->i->j #define ___arrow(...) ___apply(___arrow, ___narg(__VA_ARGS__))(__VA_ARGS__) +#if defined(__clang__) && (__clang_major__ >= 19) +#define ___type(...) __typeof_unqual__(___arrow(__VA_ARGS__)) +#elif defined(__GNUC__) && (__GNUC__ >= 14) +#define ___type(...) __typeof_unqual__(___arrow(__VA_ARGS__)) +#else #define ___type(...) typeof(___arrow(__VA_ARGS__)) +#endif #define ___read(read_fn, dst, src_type, src, accessor) \ read_fn((void *)(dst), sizeof(*(dst)), &((src_type)(src))->accessor) From f15264821d02ad80864f72ffc3df847167b72199 Mon Sep 17 00:00:00 2001 From: Qinxin Xia Date: Tue, 22 Apr 2025 19:29:51 +0800 Subject: [PATCH 2299/2924] iommu/arm-smmu-v3: Fix incorrect return in arm_smmu_attach_dev [ Upstream commit be5a2d3f8f9738ea1426e7d2243707164ed5bac2 ] After commit 48e7b8e284e5 ("iommu/arm-smmu-v3: Remove arm_smmu_domain_finalise() during attach"), an error code is not returned on the attach path when the smmu does not match with the domain. This causes problems with VFIO because vfio_iommu_type1_attach_group() relies on this check to determine domain compatability. Re-instate the -EINVAL return value when the SMMU doesn't match on the device attach path. Fixes: 48e7b8e284e5 ("iommu/arm-smmu-v3: Remove arm_smmu_domain_finalise() during attach") Signed-off-by: Qinxin Xia Link: https://lore.kernel.org/r/20250422112951.2027969-1-xiaqinxin@huawei.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 48d910399a1ba6..be8d0f7db617d0 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2953,7 +2953,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) smmu = master->smmu; if (smmu_domain->smmu != smmu) - return ret; + return -EINVAL; if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID); From 740dab2338f70d93aa8235db5a9541d846d7b21c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 7 Apr 2025 12:38:51 -0400 Subject: [PATCH 2300/2924] tracing: Move histogram trigger variables from stack to per CPU structure [ Upstream commit 7ab0fc61ce73040f89b12d76a8279995ec283541 ] The histogram trigger has three somewhat large arrays on the kernel stack: unsigned long entries[HIST_STACKTRACE_DEPTH]; u64 var_ref_vals[TRACING_MAP_VARS_MAX]; char compound_key[HIST_KEY_SIZE_MAX]; Checking the function event_hist_trigger() stack frame size, it currently uses 816 bytes for its stack frame due to these variables! Instead, allocate a per CPU structure that holds these arrays for each context level (normal, softirq, irq and NMI). That is, each CPU will have 4 of these structures. This will be allocated when the first histogram trigger is enabled and freed when the last is disabled. When the histogram callback triggers, it will request this structure. The request will disable preemption, get the per CPU structure at the index of the per CPU variable, and increment that variable. The callback will use the arrays in this structure to perform its work and then release the structure. That in turn will simply decrement the per CPU index and enable preemption. Moving the variables from the kernel stack to the per CPU structure brings the stack frame of event_hist_trigger() down to just 112 bytes. Cc: Mathieu Desnoyers Cc: Tom Zanussi Link: https://lore.kernel.org/20250407123851.74ea8d58@gandalf.local.home Fixes: 067fe038e70f6 ("tracing: Add variable reference handling to hist triggers") Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/trace_events_hist.c | 120 +++++++++++++++++++++++++++---- 1 file changed, 105 insertions(+), 15 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 1260c23cfa5fc4..0ec692f80aefcc 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -5246,17 +5246,94 @@ hist_trigger_actions(struct hist_trigger_data *hist_data, } } +/* + * The hist_pad structure is used to save information to create + * a histogram from the histogram trigger. It's too big to store + * on the stack, so when the histogram trigger is initialized + * a percpu array of 4 hist_pad structures is allocated. + * This will cover every context from normal, softirq, irq and NMI + * in the very unlikely event that a tigger happens at each of + * these contexts and interrupts a currently active trigger. + */ +struct hist_pad { + unsigned long entries[HIST_STACKTRACE_DEPTH]; + u64 var_ref_vals[TRACING_MAP_VARS_MAX]; + char compound_key[HIST_KEY_SIZE_MAX]; +}; + +static struct hist_pad __percpu *hist_pads; +static DEFINE_PER_CPU(int, hist_pad_cnt); +static refcount_t hist_pad_ref; + +/* One hist_pad for every context (normal, softirq, irq, NMI) */ +#define MAX_HIST_CNT 4 + +static int alloc_hist_pad(void) +{ + lockdep_assert_held(&event_mutex); + + if (refcount_read(&hist_pad_ref)) { + refcount_inc(&hist_pad_ref); + return 0; + } + + hist_pads = __alloc_percpu(sizeof(struct hist_pad) * MAX_HIST_CNT, + __alignof__(struct hist_pad)); + if (!hist_pads) + return -ENOMEM; + + refcount_set(&hist_pad_ref, 1); + return 0; +} + +static void free_hist_pad(void) +{ + lockdep_assert_held(&event_mutex); + + if (!refcount_dec_and_test(&hist_pad_ref)) + return; + + free_percpu(hist_pads); + hist_pads = NULL; +} + +static struct hist_pad *get_hist_pad(void) +{ + struct hist_pad *hist_pad; + int cnt; + + if (WARN_ON_ONCE(!hist_pads)) + return NULL; + + preempt_disable(); + + hist_pad = per_cpu_ptr(hist_pads, smp_processor_id()); + + if (this_cpu_read(hist_pad_cnt) == MAX_HIST_CNT) { + preempt_enable(); + return NULL; + } + + cnt = this_cpu_inc_return(hist_pad_cnt) - 1; + + return &hist_pad[cnt]; +} + +static void put_hist_pad(void) +{ + this_cpu_dec(hist_pad_cnt); + preempt_enable(); +} + static void event_hist_trigger(struct event_trigger_data *data, struct trace_buffer *buffer, void *rec, struct ring_buffer_event *rbe) { struct hist_trigger_data *hist_data = data->private_data; bool use_compound_key = (hist_data->n_keys > 1); - unsigned long entries[HIST_STACKTRACE_DEPTH]; - u64 var_ref_vals[TRACING_MAP_VARS_MAX]; - char compound_key[HIST_KEY_SIZE_MAX]; struct tracing_map_elt *elt = NULL; struct hist_field *key_field; + struct hist_pad *hist_pad; u64 field_contents; void *key = NULL; unsigned int i; @@ -5264,12 +5341,18 @@ static void event_hist_trigger(struct event_trigger_data *data, if (unlikely(!rbe)) return; - memset(compound_key, 0, hist_data->key_size); + hist_pad = get_hist_pad(); + if (!hist_pad) + return; + + memset(hist_pad->compound_key, 0, hist_data->key_size); for_each_hist_key_field(i, hist_data) { key_field = hist_data->fields[i]; if (key_field->flags & HIST_FIELD_FL_STACKTRACE) { + unsigned long *entries = hist_pad->entries; + memset(entries, 0, HIST_STACKTRACE_SIZE); if (key_field->field) { unsigned long *stack, n_entries; @@ -5293,26 +5376,31 @@ static void event_hist_trigger(struct event_trigger_data *data, } if (use_compound_key) - add_to_key(compound_key, key, key_field, rec); + add_to_key(hist_pad->compound_key, key, key_field, rec); } if (use_compound_key) - key = compound_key; + key = hist_pad->compound_key; if (hist_data->n_var_refs && - !resolve_var_refs(hist_data, key, var_ref_vals, false)) - return; + !resolve_var_refs(hist_data, key, hist_pad->var_ref_vals, false)) + goto out; elt = tracing_map_insert(hist_data->map, key); if (!elt) - return; + goto out; - hist_trigger_elt_update(hist_data, elt, buffer, rec, rbe, var_ref_vals); + hist_trigger_elt_update(hist_data, elt, buffer, rec, rbe, hist_pad->var_ref_vals); - if (resolve_var_refs(hist_data, key, var_ref_vals, true)) - hist_trigger_actions(hist_data, elt, buffer, rec, rbe, key, var_ref_vals); + if (resolve_var_refs(hist_data, key, hist_pad->var_ref_vals, true)) { + hist_trigger_actions(hist_data, elt, buffer, rec, rbe, + key, hist_pad->var_ref_vals); + } hist_poll_wakeup(); + + out: + put_hist_pad(); } static void hist_trigger_stacktrace_print(struct seq_file *m, @@ -6157,6 +6245,9 @@ static int event_hist_trigger_init(struct event_trigger_data *data) { struct hist_trigger_data *hist_data = data->private_data; + if (alloc_hist_pad() < 0) + return -ENOMEM; + if (!data->ref && hist_data->attrs->name) save_named_trigger(hist_data->attrs->name, data); @@ -6201,6 +6292,7 @@ static void event_hist_trigger_free(struct event_trigger_data *data) destroy_hist_data(hist_data); } + free_hist_pad(); } static const struct event_trigger_ops event_hist_trigger_ops = { @@ -6216,9 +6308,7 @@ static int event_hist_trigger_named_init(struct event_trigger_data *data) save_named_trigger(data->named_data->name, data); - event_hist_trigger_init(data->named_data); - - return 0; + return event_hist_trigger_init(data->named_data); } static void event_hist_trigger_named_free(struct event_trigger_data *data) From d29df0c1307dd937d404ebeecec02a6c68f4c89e Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Fri, 25 Apr 2025 14:12:55 +0200 Subject: [PATCH 2301/2924] clk: qcom: camcc-sm6350: Add *_wait_val values for GDSCs [ Upstream commit e7b1c13280ad866f3b935f6c658713c41db61635 ] Compared to the msm-4.19 driver the mainline GDSC driver always sets the bits for en_rest, en_few & clk_dis, and if those values are not set per-GDSC in the respective driver then the default value from the GDSC driver is used. The downstream driver only conditionally sets clk_dis_wait_val if qcom,clk-dis-wait-val is given in devicetree. Correct this situation by explicitly setting those values. For all GDSCs the reset value of those bits are used. Fixes: 80f5451d9a7c ("clk: qcom: Add camera clock controller driver for SM6350") Signed-off-by: Luca Weiss Reviewed-by: Taniya Das Link: https://lore.kernel.org/r/20250425-sm6350-gdsc-val-v1-1-1f252d9c5e4e@fairphone.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/clk/qcom/camcc-sm6350.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/clk/qcom/camcc-sm6350.c b/drivers/clk/qcom/camcc-sm6350.c index 1871970fb046d7..8aac97d29ce3ff 100644 --- a/drivers/clk/qcom/camcc-sm6350.c +++ b/drivers/clk/qcom/camcc-sm6350.c @@ -1695,6 +1695,9 @@ static struct clk_branch camcc_sys_tmr_clk = { static struct gdsc bps_gdsc = { .gdscr = 0x6004, + .en_rest_wait_val = 0x2, + .en_few_wait_val = 0x2, + .clk_dis_wait_val = 0xf, .pd = { .name = "bps_gdsc", }, @@ -1704,6 +1707,9 @@ static struct gdsc bps_gdsc = { static struct gdsc ipe_0_gdsc = { .gdscr = 0x7004, + .en_rest_wait_val = 0x2, + .en_few_wait_val = 0x2, + .clk_dis_wait_val = 0xf, .pd = { .name = "ipe_0_gdsc", }, @@ -1713,6 +1719,9 @@ static struct gdsc ipe_0_gdsc = { static struct gdsc ife_0_gdsc = { .gdscr = 0x9004, + .en_rest_wait_val = 0x2, + .en_few_wait_val = 0x2, + .clk_dis_wait_val = 0xf, .pd = { .name = "ife_0_gdsc", }, @@ -1721,6 +1730,9 @@ static struct gdsc ife_0_gdsc = { static struct gdsc ife_1_gdsc = { .gdscr = 0xa004, + .en_rest_wait_val = 0x2, + .en_few_wait_val = 0x2, + .clk_dis_wait_val = 0xf, .pd = { .name = "ife_1_gdsc", }, @@ -1729,6 +1741,9 @@ static struct gdsc ife_1_gdsc = { static struct gdsc ife_2_gdsc = { .gdscr = 0xb004, + .en_rest_wait_val = 0x2, + .en_few_wait_val = 0x2, + .clk_dis_wait_val = 0xf, .pd = { .name = "ife_2_gdsc", }, @@ -1737,6 +1752,9 @@ static struct gdsc ife_2_gdsc = { static struct gdsc titan_top_gdsc = { .gdscr = 0x14004, + .en_rest_wait_val = 0x2, + .en_few_wait_val = 0x2, + .clk_dis_wait_val = 0xf, .pd = { .name = "titan_top_gdsc", }, From 77e73917867b631bfbb5e31a8d108c299c1ef368 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Fri, 25 Apr 2025 14:12:56 +0200 Subject: [PATCH 2302/2924] clk: qcom: dispcc-sm6350: Add *_wait_val values for GDSCs [ Upstream commit 673989d27123618afab56df1143a75454178b4ae ] Compared to the msm-4.19 driver the mainline GDSC driver always sets the bits for en_rest, en_few & clk_dis, and if those values are not set per-GDSC in the respective driver then the default value from the GDSC driver is used. The downstream driver only conditionally sets clk_dis_wait_val if qcom,clk-dis-wait-val is given in devicetree. Correct this situation by explicitly setting those values. For all GDSCs the reset value of those bits are used. Fixes: 837519775f1d ("clk: qcom: Add display clock controller driver for SM6350") Signed-off-by: Luca Weiss Reviewed-by: Taniya Das Link: https://lore.kernel.org/r/20250425-sm6350-gdsc-val-v1-2-1f252d9c5e4e@fairphone.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/clk/qcom/dispcc-sm6350.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/clk/qcom/dispcc-sm6350.c b/drivers/clk/qcom/dispcc-sm6350.c index e703ecf00e4404..b0bd163a449ccd 100644 --- a/drivers/clk/qcom/dispcc-sm6350.c +++ b/drivers/clk/qcom/dispcc-sm6350.c @@ -681,6 +681,9 @@ static struct clk_branch disp_cc_xo_clk = { static struct gdsc mdss_gdsc = { .gdscr = 0x1004, + .en_rest_wait_val = 0x2, + .en_few_wait_val = 0x2, + .clk_dis_wait_val = 0xf, .pd = { .name = "mdss_gdsc", }, From 01bcd3eb3ef744f0804951445e9961fdf5772306 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Fri, 25 Apr 2025 14:12:57 +0200 Subject: [PATCH 2303/2924] clk: qcom: gcc-sm6350: Add *_wait_val values for GDSCs [ Upstream commit afdfd829a99e467869e3ca1955fb6c6e337c340a ] Compared to the msm-4.19 driver the mainline GDSC driver always sets the bits for en_rest, en_few & clk_dis, and if those values are not set per-GDSC in the respective driver then the default value from the GDSC driver is used. The downstream driver only conditionally sets clk_dis_wait_val if qcom,clk-dis-wait-val is given in devicetree. Correct this situation by explicitly setting those values. For all GDSCs the reset value of those bits are used. Fixes: 131abae905df ("clk: qcom: Add SM6350 GCC driver") Signed-off-by: Luca Weiss Reviewed-by: Taniya Das Link: https://lore.kernel.org/r/20250425-sm6350-gdsc-val-v1-3-1f252d9c5e4e@fairphone.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/clk/qcom/gcc-sm6350.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/clk/qcom/gcc-sm6350.c b/drivers/clk/qcom/gcc-sm6350.c index 74346dc026068a..a4d6dff9d0f7f1 100644 --- a/drivers/clk/qcom/gcc-sm6350.c +++ b/drivers/clk/qcom/gcc-sm6350.c @@ -2320,6 +2320,9 @@ static struct clk_branch gcc_video_xo_clk = { static struct gdsc usb30_prim_gdsc = { .gdscr = 0x1a004, + .en_rest_wait_val = 0x2, + .en_few_wait_val = 0x2, + .clk_dis_wait_val = 0xf, .pd = { .name = "usb30_prim_gdsc", }, @@ -2328,6 +2331,9 @@ static struct gdsc usb30_prim_gdsc = { static struct gdsc ufs_phy_gdsc = { .gdscr = 0x3a004, + .en_rest_wait_val = 0x2, + .en_few_wait_val = 0x2, + .clk_dis_wait_val = 0xf, .pd = { .name = "ufs_phy_gdsc", }, From 17cfd35d4794872e5d1a93bf1c6dc9f3f0c61acb Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Fri, 25 Apr 2025 14:12:58 +0200 Subject: [PATCH 2304/2924] clk: qcom: gpucc-sm6350: Add *_wait_val values for GDSCs [ Upstream commit d988b0b866c2aeb23aa74022b5bbd463165a7a33 ] Compared to the msm-4.19 driver the mainline GDSC driver always sets the bits for en_rest, en_few & clk_dis, and if those values are not set per-GDSC in the respective driver then the default value from the GDSC driver is used. The downstream driver only conditionally sets clk_dis_wait_val if qcom,clk-dis-wait-val is given in devicetree. Correct this situation by explicitly setting those values. For all GDSCs the reset value of those bits are used, with the exception of gpu_cx_gdsc which has an explicit value (qcom,clk-dis-wait-val = <8>). Fixes: 013804a727a0 ("clk: qcom: Add GPU clock controller driver for SM6350") Signed-off-by: Luca Weiss Reviewed-by: Taniya Das Link: https://lore.kernel.org/r/20250425-sm6350-gdsc-val-v1-4-1f252d9c5e4e@fairphone.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/clk/qcom/gpucc-sm6350.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/clk/qcom/gpucc-sm6350.c b/drivers/clk/qcom/gpucc-sm6350.c index 35ed0500bc5931..ee89c42413f885 100644 --- a/drivers/clk/qcom/gpucc-sm6350.c +++ b/drivers/clk/qcom/gpucc-sm6350.c @@ -413,6 +413,9 @@ static struct clk_branch gpu_cc_gx_vsense_clk = { static struct gdsc gpu_cx_gdsc = { .gdscr = 0x106c, .gds_hw_ctrl = 0x1540, + .en_rest_wait_val = 0x2, + .en_few_wait_val = 0x2, + .clk_dis_wait_val = 0x8, .pd = { .name = "gpu_cx_gdsc", }, @@ -423,6 +426,9 @@ static struct gdsc gpu_cx_gdsc = { static struct gdsc gpu_gx_gdsc = { .gdscr = 0x100c, .clamp_io_ctrl = 0x1508, + .en_rest_wait_val = 0x2, + .en_few_wait_val = 0x2, + .clk_dis_wait_val = 0x2, .pd = { .name = "gpu_gx_gdsc", .power_on = gdsc_gx_do_nothing_enable, From 2b67d70847060d556abc6b11f62b8b7c5ac31575 Mon Sep 17 00:00:00 2001 From: YiFei Zhu Date: Mon, 28 Apr 2025 21:15:36 +0000 Subject: [PATCH 2305/2924] bpftool: Fix regression of "bpftool cgroup tree" EINVAL on older kernels [ Upstream commit 43745d11bfd9683abdf08ad7a5cc403d6a9ffd15 ] If cgroup_has_attached_progs queries an attach type not supported by the running kernel, due to the kernel being older than the bpftool build, it would encounter an -EINVAL from BPF_PROG_QUERY syscall. Prior to commit 98b303c9bf05 ("bpftool: Query only cgroup-related attach types"), this EINVAL would be ignored by the function, allowing the function to only consider supported attach types. The commit changed so that, instead of querying all attach types, only attach types from the array `cgroup_attach_types` is queried. The assumption is that because these are only cgroup attach types, they should all be supported. Unfortunately this assumption may be false when the kernel is older than the bpftool build, where the attach types queried by bpftool is not yet implemented in the kernel. This would result in errors such as: $ bpftool cgroup tree CgroupPath ID AttachType AttachFlags Name Error: can't query bpf programs attached to /sys/fs/cgroup: Invalid argument This patch restores the logic of ignoring EINVAL from prior to that patch. Fixes: 98b303c9bf05 ("bpftool: Query only cgroup-related attach types") Reported-by: Sagarika Sharma Reported-by: Minh-Anh Nguyen Signed-off-by: YiFei Zhu Signed-off-by: Andrii Nakryiko Acked-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20250428211536.1651456-1-zhuyifei@google.com Signed-off-by: Sasha Levin --- tools/bpf/bpftool/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index 93b139bfb9880a..3f1d6be512151d 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -221,7 +221,7 @@ static int cgroup_has_attached_progs(int cgroup_fd) for (i = 0; i < ARRAY_SIZE(cgroup_attach_types); i++) { int count = count_attached_bpf_progs(cgroup_fd, cgroup_attach_types[i]); - if (count < 0) + if (count < 0 && errno != EINVAL) return -1; if (count > 0) { From 1b69a5299f28ce8e6afa37c3690dbc14c3a1f53f Mon Sep 17 00:00:00 2001 From: Henry Martin Date: Wed, 2 Apr 2025 10:05:13 +0800 Subject: [PATCH 2306/2924] clk: bcm: rpi: Add NULL check in raspberrypi_clk_register() [ Upstream commit 73c46d9a93d071ca69858dea3f569111b03e549e ] devm_kasprintf() returns NULL when memory allocation fails. Currently, raspberrypi_clk_register() does not check for this case, which results in a NULL pointer dereference. Add NULL check after devm_kasprintf() to prevent this issue. Fixes: 93d2725affd6 ("clk: bcm: rpi: Discover the firmware clocks") Signed-off-by: Henry Martin Reviewed-by: Dave Stevenson Link: https://lore.kernel.org/r/20250402020513.42628-1-bsdhenrymartin@gmail.com Reviewed-by: Stefan Wahren Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/bcm/clk-raspberrypi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/bcm/clk-raspberrypi.c b/drivers/clk/bcm/clk-raspberrypi.c index 0e1fe3759530a4..720acc10f8aa45 100644 --- a/drivers/clk/bcm/clk-raspberrypi.c +++ b/drivers/clk/bcm/clk-raspberrypi.c @@ -286,6 +286,8 @@ static struct clk_hw *raspberrypi_clk_register(struct raspberrypi_clk *rpi, init.name = devm_kasprintf(rpi->dev, GFP_KERNEL, "fw-clk-%s", rpi_firmware_clk_names[id]); + if (!init.name) + return ERR_PTR(-ENOMEM); init.ops = &raspberrypi_firmware_clk_ops; init.flags = CLK_GET_RATE_NOCACHE; From 3f30dc4fa6f777f0938eade438a065cd6f12de17 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 27 Mar 2025 12:52:14 +0000 Subject: [PATCH 2307/2924] clk: test: Forward-declare struct of_phandle_args in kunit/clk.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7a0c1872ee7db25d711ac29a55f36844a7108308 ] Add a forward-declare of struct of_phandle_args to prevent the compiler warning: ../include/kunit/clk.h:29:63: warning: ‘struct of_phandle_args’ declared inside parameter list will not be visible outside of this definition or declaration struct clk_hw *(*get)(struct of_phandle_args *clkspec, void *data), Signed-off-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20250327125214.82598-1-rf@opensource.cirrus.com Fixes: a82fcb16d977 ("clk: test: Add test managed of_clk_add_hw_provider()") Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- include/kunit/clk.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/kunit/clk.h b/include/kunit/clk.h index 0afae7688157b8..f226044cc78d11 100644 --- a/include/kunit/clk.h +++ b/include/kunit/clk.h @@ -6,6 +6,7 @@ struct clk; struct clk_hw; struct device; struct device_node; +struct of_phandle_args; struct kunit; struct clk * From d2549b5f84cddae85a5f817979f18a5e839f938e Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Sat, 3 May 2025 22:44:32 +0300 Subject: [PATCH 2308/2924] wifi: iwlfiwi: mvm: Fix the rate reporting [ Upstream commit 8f7561209eda7d6998708f06376e8dd2dc52f3b8 ] The rate validation in mac80211 considers a rate to be valid iff both the rate index and the count are positive. When the rate scaling is managed in the driver and not enough traffic passed to set the actual rate, the driver set the rate to be the optimal rate. However, the rate count is not set and thus the rate is considered not valid. Fix it by setting the count to 1. Fixes: 3e99b4d28219 ("wifi: mac80211: Sanity check tx bitrate if not provided by driver") Signed-off-by: Ilan Peer Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250503224232.0d1d1e022d63.I76833c14ba1d66f9bea5c32b25a54d8b36f229ba@changeid Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/rs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c index 068c58e9c1eb4e..c2729dab8e79e5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c @@ -2,6 +2,7 @@ /****************************************************************************** * * Copyright(c) 2005 - 2014, 2018 - 2023 Intel Corporation. All rights reserved. + * Copyright(c) 2025 Intel Corporation * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH *****************************************************************************/ @@ -2709,6 +2710,7 @@ static void rs_drv_get_rate(void *mvm_r, struct ieee80211_sta *sta, optimal_rate); iwl_mvm_hwrate_to_tx_rate_v1(last_ucode_rate, info->band, &txrc->reported_rate); + txrc->reported_rate.count = 1; } spin_unlock_bh(&lq_sta->pers.lock); } From 20d7644798f8bbb399f0b67071a9fe73d7ef8e62 Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Wed, 30 Apr 2025 16:46:51 +0200 Subject: [PATCH 2309/2924] rtla: Define _GNU_SOURCE in timerlat_bpf.c [ Upstream commit 8020361d51eea5145402e450d91b083bccdcd874 ] Newer versions of glibc include a definition of struct sched_attr in bits/sched.h (included through sched.h which is included by rtla). Commit 0eecee340672 ("tools/rtla: fix collision with glibc sched_attr/sched_set_attr") has modified the definition of struct sched_attr in utils.h, so that it is only applied with older versions of glibc that do not define it, in order to prevent build failure. The definition in bits/sched.h depends on _GNU_SOURCE. timerlat_bpf.c does not define _GNU_SOURCE, making it fall back to the definition in utils.h. The latter has two fields less, leading to shifted offsets of struct timerlat_params in timerlat_bpf_init. Because of the shift, timerlat_bpf_init incorrectly reads params->entries as 0 for timerlat-hist and disables the creation of histogram maps, causing breakage in BPF sample collection mode: $ rtla timerlat hist -d 1s Error pulling BPF data Fix the issue by also defining _GNU_SOURCE in timerlat_bpf.c. Cc: John Kacur Cc: Luis Goncalves Link: https://lore.kernel.org/20250430144651.621766-1-tglozar@redhat.com Fixes: e34293ddcebd ("rtla/timerlat: Add BPF skeleton to collect samples") Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- tools/tracing/rtla/src/timerlat_bpf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/tracing/rtla/src/timerlat_bpf.c b/tools/tracing/rtla/src/timerlat_bpf.c index 5abee884037aeb..0bc44ce5d69bd9 100644 --- a/tools/tracing/rtla/src/timerlat_bpf.c +++ b/tools/tracing/rtla/src/timerlat_bpf.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #ifdef HAVE_BPF_SKEL +#define _GNU_SOURCE #include "timerlat.h" #include "timerlat_bpf.h" #include "timerlat.skel.h" From 49993db7103c70d9f4cf5d060431b96fe5011118 Mon Sep 17 00:00:00 2001 From: Hans Zhang <18255117159@163.com> Date: Wed, 7 May 2025 00:31:11 +0800 Subject: [PATCH 2310/2924] efi/libstub: Describe missing 'out' parameter in efi_load_initrd [ Upstream commit c8e1927e7f7d63721e32ec41d27ccb0eb1a1b0fc ] The function efi_load_initrd() had a documentation warning due to the missing description for the 'out' parameter. Add the parameter description to the kernel-doc comment to resolve the warning and improve API documentation. Fixes the following compiler warning: drivers/firmware/efi/libstub/efi-stub-helper.c:611: warning: Function parameter or struct member 'out' not described in 'efi_load_initrd' Fixes: f4dc7fffa987 ("efi: libstub: unify initrd loading between architectures") Signed-off-by: Hans Zhang <18255117159@163.com> Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin --- drivers/firmware/efi/libstub/efi-stub-helper.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index fd6dc790c5a89d..7aa2f9ad293562 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -601,6 +601,7 @@ efi_status_t efi_load_initrd_cmdline(efi_loaded_image_t *image, * @image: EFI loaded image protocol * @soft_limit: preferred address for loading the initrd * @hard_limit: upper limit address for loading the initrd + * @out: pointer to store the address of the initrd table * * Return: status code */ From 74bee33834d0c90bed208baad01a018402b11e17 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Wed, 7 May 2025 03:43:19 +0000 Subject: [PATCH 2311/2924] selftests/bpf: Avoid passing out-of-range values to __retval() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6e492ffcab6064cd7b317dc1f49fb9f92407aaa7 ] Currently, we pass 0x1234567890abcdef to __retval() for the following two tests: verifier_load_acquire/load_acquire_64 verifier_store_release/store_release_64 However, the upper 32 bits of that value are being ignored, since __retval() expects an int. Actually, the tests would still pass even if I change '__retval(0x1234567890abcdef)' to e.g. '__retval(0x90abcdef)'. Restructure the tests a bit to test the entire 64-bit values properly. Do the same to their 8-, 16- and 32-bit variants as well to keep the style consistent. Fixes: ff3afe5da998 ("selftests/bpf: Add selftests for load-acquire and store-release instructions") Acked-by: Björn Töpel Reviewed-by: Pu Lehui Tested-by: Björn Töpel # QEMU/RVA23 Signed-off-by: Peilin Ye Link: https://lore.kernel.org/r/d67f4c6f6ee0d0388cbce1f4892ec4176ee2d604.1746588351.git.yepeilin@google.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- .../bpf/progs/verifier_load_acquire.c | 40 +++++++++++++------ .../bpf/progs/verifier_store_release.c | 32 +++++++++++---- 2 files changed, 52 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/verifier_load_acquire.c b/tools/testing/selftests/bpf/progs/verifier_load_acquire.c index 77698d5a19e446..a696ab84bfd662 100644 --- a/tools/testing/selftests/bpf/progs/verifier_load_acquire.c +++ b/tools/testing/selftests/bpf/progs/verifier_load_acquire.c @@ -10,65 +10,81 @@ SEC("socket") __description("load-acquire, 8-bit") -__success __success_unpriv __retval(0x12) +__success __success_unpriv __retval(0) __naked void load_acquire_8(void) { asm volatile ( + "r0 = 0;" "w1 = 0x12;" "*(u8 *)(r10 - 1) = w1;" - ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r10 - 1)); + ".8byte %[load_acquire_insn];" // w2 = load_acquire((u8 *)(r10 - 1)); + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(load_acquire_insn, - BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -1)) + BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -1)) : __clobber_all); } SEC("socket") __description("load-acquire, 16-bit") -__success __success_unpriv __retval(0x1234) +__success __success_unpriv __retval(0) __naked void load_acquire_16(void) { asm volatile ( + "r0 = 0;" "w1 = 0x1234;" "*(u16 *)(r10 - 2) = w1;" - ".8byte %[load_acquire_insn];" // w0 = load_acquire((u16 *)(r10 - 2)); + ".8byte %[load_acquire_insn];" // w2 = load_acquire((u16 *)(r10 - 2)); + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(load_acquire_insn, - BPF_ATOMIC_OP(BPF_H, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -2)) + BPF_ATOMIC_OP(BPF_H, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -2)) : __clobber_all); } SEC("socket") __description("load-acquire, 32-bit") -__success __success_unpriv __retval(0x12345678) +__success __success_unpriv __retval(0) __naked void load_acquire_32(void) { asm volatile ( + "r0 = 0;" "w1 = 0x12345678;" "*(u32 *)(r10 - 4) = w1;" - ".8byte %[load_acquire_insn];" // w0 = load_acquire((u32 *)(r10 - 4)); + ".8byte %[load_acquire_insn];" // w2 = load_acquire((u32 *)(r10 - 4)); + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(load_acquire_insn, - BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -4)) + BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -4)) : __clobber_all); } SEC("socket") __description("load-acquire, 64-bit") -__success __success_unpriv __retval(0x1234567890abcdef) +__success __success_unpriv __retval(0) __naked void load_acquire_64(void) { asm volatile ( + "r0 = 0;" "r1 = 0x1234567890abcdef ll;" "*(u64 *)(r10 - 8) = r1;" - ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r10 - 8)); + ".8byte %[load_acquire_insn];" // r2 = load_acquire((u64 *)(r10 - 8)); + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(load_acquire_insn, - BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -8)) + BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -8)) : __clobber_all); } diff --git a/tools/testing/selftests/bpf/progs/verifier_store_release.c b/tools/testing/selftests/bpf/progs/verifier_store_release.c index c0442d5bb049d8..022d03d9835957 100644 --- a/tools/testing/selftests/bpf/progs/verifier_store_release.c +++ b/tools/testing/selftests/bpf/progs/verifier_store_release.c @@ -11,13 +11,17 @@ SEC("socket") __description("store-release, 8-bit") -__success __success_unpriv __retval(0x12) +__success __success_unpriv __retval(0) __naked void store_release_8(void) { asm volatile ( + "r0 = 0;" "w1 = 0x12;" ".8byte %[store_release_insn];" // store_release((u8 *)(r10 - 1), w1); - "w0 = *(u8 *)(r10 - 1);" + "w2 = *(u8 *)(r10 - 1);" + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(store_release_insn, @@ -27,13 +31,17 @@ __naked void store_release_8(void) SEC("socket") __description("store-release, 16-bit") -__success __success_unpriv __retval(0x1234) +__success __success_unpriv __retval(0) __naked void store_release_16(void) { asm volatile ( + "r0 = 0;" "w1 = 0x1234;" ".8byte %[store_release_insn];" // store_release((u16 *)(r10 - 2), w1); - "w0 = *(u16 *)(r10 - 2);" + "w2 = *(u16 *)(r10 - 2);" + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(store_release_insn, @@ -43,13 +51,17 @@ __naked void store_release_16(void) SEC("socket") __description("store-release, 32-bit") -__success __success_unpriv __retval(0x12345678) +__success __success_unpriv __retval(0) __naked void store_release_32(void) { asm volatile ( + "r0 = 0;" "w1 = 0x12345678;" ".8byte %[store_release_insn];" // store_release((u32 *)(r10 - 4), w1); - "w0 = *(u32 *)(r10 - 4);" + "w2 = *(u32 *)(r10 - 4);" + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(store_release_insn, @@ -59,13 +71,17 @@ __naked void store_release_32(void) SEC("socket") __description("store-release, 64-bit") -__success __success_unpriv __retval(0x1234567890abcdef) +__success __success_unpriv __retval(0) __naked void store_release_64(void) { asm volatile ( + "r0 = 0;" "r1 = 0x1234567890abcdef ll;" ".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r1); - "r0 = *(u64 *)(r10 - 8);" + "r2 = *(u64 *)(r10 - 8);" + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(store_release_insn, From 72089e524b77715538d8a22c9a5629ae0dad0d5f Mon Sep 17 00:00:00 2001 From: Luis Gerhorst Date: Thu, 1 May 2025 09:35:52 +0200 Subject: [PATCH 2312/2924] selftests/bpf: Fix caps for __xlated/jited_unpriv [ Upstream commit cf15cdc0f0f39a5c6315200808ec3e3995b0c2d2 ] Currently, __xlated_unpriv and __jited_unpriv do not work because the BPF syscall will overwrite info.jited_prog_len and info.xlated_prog_len with 0 if the process is not bpf_capable(). This bug was not noticed before, because there is no test that actually uses __xlated_unpriv/__jited_unpriv. To resolve this, simply restore the capabilities earlier (but still after loading the program). Adding this here unconditionally is fine because the function first checks that the capabilities were initialized before attempting to restore them. This will be important later when we add tests that check whether a speculation barrier was inserted in the correct location. Signed-off-by: Luis Gerhorst Fixes: 9c9f73391310 ("selftests/bpf: allow checking xlated programs in verifier_* tests") Fixes: 7d743e4c759c ("selftests/bpf: __jited test tag to check disassembly after jit") Acked-by: Kumar Kartikeya Dwivedi Tested-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250501073603.1402960-2-luis.gerhorst@fau.de Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- tools/testing/selftests/bpf/test_loader.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 49f2fc61061f5d..9551d8d5f8f9f8 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -1042,6 +1042,14 @@ void run_subtest(struct test_loader *tester, emit_verifier_log(tester->log_buf, false /*force*/); validate_msgs(tester->log_buf, &subspec->expect_msgs, emit_verifier_log); + /* Restore capabilities because the kernel will silently ignore requests + * for program info (such as xlated program text) if we are not + * bpf-capable. Also, for some reason test_verifier executes programs + * with all capabilities restored. Do the same here. + */ + if (restore_capabilities(&caps)) + goto tobj_cleanup; + if (subspec->expect_xlated.cnt) { err = get_xlated_program_text(bpf_program__fd(tprog), tester->log_buf, tester->log_buf_sz); @@ -1067,12 +1075,6 @@ void run_subtest(struct test_loader *tester, } if (should_do_test_run(spec, subspec)) { - /* For some reason test_verifier executes programs - * with all capabilities restored. Do the same here. - */ - if (restore_capabilities(&caps)) - goto tobj_cleanup; - /* Do bpf_map__attach_struct_ops() for each struct_ops map. * This should trigger bpf_struct_ops->reg callback on kernel side. */ From ad031f83d7209e0504cd71ccd92e449501e05495 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 7 May 2025 10:53:06 -0400 Subject: [PATCH 2313/2924] tracing: Rename event_trigger_alloc() to trigger_data_alloc() [ Upstream commit f2947c4b7d0f235621c5daf78aecfbd6e22c05e5 ] The function event_trigger_alloc() creates an event_trigger_data descriptor and states that it needs to be freed via event_trigger_free(). This is incorrect, it needs to be freed by trigger_data_free() as event_trigger_free() adds ref counting. Rename event_trigger_alloc() to trigger_data_alloc() and state that it needs to be freed via trigger_data_free(). This naming convention was introducing bugs. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Tom Zanussi Link: https://lore.kernel.org/20250507145455.776436410@goodmis.org Fixes: 86599dbe2c527 ("tracing: Add helper functions to simplify event_command.parse() callback handling") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/trace.h | 8 +++----- kernel/trace/trace_events_hist.c | 2 +- kernel/trace/trace_events_trigger.c | 16 ++++++++-------- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 79be1995db44c4..10ee434a9b755f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1772,6 +1772,9 @@ extern int event_enable_register_trigger(char *glob, extern void event_enable_unregister_trigger(char *glob, struct event_trigger_data *test, struct trace_event_file *file); +extern struct event_trigger_data * +trigger_data_alloc(struct event_command *cmd_ops, char *cmd, char *param, + void *private_data); extern void trigger_data_free(struct event_trigger_data *data); extern int event_trigger_init(struct event_trigger_data *data); extern int trace_event_trigger_enable_disable(struct trace_event_file *file, @@ -1798,11 +1801,6 @@ extern bool event_trigger_check_remove(const char *glob); extern bool event_trigger_empty_param(const char *param); extern int event_trigger_separate_filter(char *param_and_filter, char **param, char **filter, bool param_required); -extern struct event_trigger_data * -event_trigger_alloc(struct event_command *cmd_ops, - char *cmd, - char *param, - void *private_data); extern int event_trigger_parse_num(char *trigger, struct event_trigger_data *trigger_data); extern int event_trigger_set_filter(struct event_command *cmd_ops, diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 0ec692f80aefcc..86fd06812cdab4 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -6795,7 +6795,7 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops, return PTR_ERR(hist_data); } - trigger_data = event_trigger_alloc(cmd_ops, cmd, param, hist_data); + trigger_data = trigger_data_alloc(cmd_ops, cmd, param, hist_data); if (!trigger_data) { ret = -ENOMEM; goto out_free; diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 6e87ae2a1a66bf..9f3f76405a28c0 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -804,7 +804,7 @@ int event_trigger_separate_filter(char *param_and_filter, char **param, } /** - * event_trigger_alloc - allocate and init event_trigger_data for a trigger + * trigger_data_alloc - allocate and init event_trigger_data for a trigger * @cmd_ops: The event_command operations for the trigger * @cmd: The cmd string * @param: The param string @@ -815,14 +815,14 @@ int event_trigger_separate_filter(char *param_and_filter, char **param, * trigger_ops to assign to the event_trigger_data. @private_data can * also be passed in and associated with the event_trigger_data. * - * Use event_trigger_free() to free an event_trigger_data object. + * Use trigger_data_free() to free an event_trigger_data object. * * Return: The trigger_data object success, NULL otherwise */ -struct event_trigger_data *event_trigger_alloc(struct event_command *cmd_ops, - char *cmd, - char *param, - void *private_data) +struct event_trigger_data *trigger_data_alloc(struct event_command *cmd_ops, + char *cmd, + char *param, + void *private_data) { struct event_trigger_data *trigger_data; const struct event_trigger_ops *trigger_ops; @@ -989,7 +989,7 @@ event_trigger_parse(struct event_command *cmd_ops, return ret; ret = -ENOMEM; - trigger_data = event_trigger_alloc(cmd_ops, cmd, param, file); + trigger_data = trigger_data_alloc(cmd_ops, cmd, param, file); if (!trigger_data) goto out; @@ -1793,7 +1793,7 @@ int event_enable_trigger_parse(struct event_command *cmd_ops, enable_data->enable = enable; enable_data->file = event_enable_file; - trigger_data = event_trigger_alloc(cmd_ops, cmd, param, enable_data); + trigger_data = trigger_data_alloc(cmd_ops, cmd, param, enable_data); if (!trigger_data) { kfree(enable_data); goto out; From 2e0c0292593793be6b62d86d3e2f1d9beea13193 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Wed, 7 May 2025 10:53:07 -0400 Subject: [PATCH 2314/2924] tracing: Fix error handling in event_trigger_parse() [ Upstream commit c5dd28e7fb4f63475b50df4f58311df92939d011 ] According to trigger_data_alloc() doc, trigger_data_free() should be used to free an event_trigger_data object. This fixes a mismatch introduced when kzalloc was replaced with trigger_data_alloc without updating the corresponding deallocation calls. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Andrew Morton Cc: Mathieu Desnoyers Cc: Tom Zanussi Link: https://lore.kernel.org/20250507145455.944453325@goodmis.org Link: https://lore.kernel.org/20250318112737.4174-1-linmq006@gmail.com Fixes: e1f187d09e11 ("tracing: Have existing event_command.parse() implementations use helpers") Signed-off-by: Miaoqian Lin [ SDR: Changed event_trigger_alloc/free() to trigger_data_alloc/free() ] Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin --- kernel/trace/trace_events_trigger.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 9f3f76405a28c0..c443ed7649a896 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -995,7 +995,7 @@ event_trigger_parse(struct event_command *cmd_ops, if (remove) { event_trigger_unregister(cmd_ops, file, glob+1, trigger_data); - kfree(trigger_data); + trigger_data_free(trigger_data); ret = 0; goto out; } @@ -1022,7 +1022,7 @@ event_trigger_parse(struct event_command *cmd_ops, out_free: event_trigger_reset_filter(cmd_ops, trigger_data); - kfree(trigger_data); + trigger_data_free(trigger_data); goto out; } From 85193dc3595e5ca6eaf4ed65f7f6163abcba3d8f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 30 Apr 2025 11:05:40 +0300 Subject: [PATCH 2315/2924] of: unittest: Unlock on error in unittest_data_add() [ Upstream commit 493e6cb63a21e9f009dc4c209fd311f2bb777656 ] The of_overlay_mutex_unlock() was accidentally deleted if "of_root" is NULL. Change this to a goto unlock. Fixes: d1eabd218ede ("of: unittest: treat missing of_root as error instead of fixing up") Signed-off-by: Dan Carpenter Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/aBHZ1DvXiBcZkWmk@stanley.mountain Signed-off-by: Rob Herring (Arm) Signed-off-by: Sasha Levin --- drivers/of/unittest.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 64d301893af7b8..eeb370e0f50777 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -2029,15 +2029,16 @@ static int __init unittest_data_add(void) rc = of_resolve_phandles(unittest_data_node); if (rc) { pr_err("%s: Failed to resolve phandles (rc=%i)\n", __func__, rc); - of_overlay_mutex_unlock(); - return -EINVAL; + rc = -EINVAL; + goto unlock; } /* attach the sub-tree to live tree */ if (!of_root) { pr_warn("%s: no live tree to attach sub-tree\n", __func__); kfree(unittest_data); - return -ENODEV; + rc = -ENODEV; + goto unlock; } EXPECT_BEGIN(KERN_INFO, @@ -2056,9 +2057,10 @@ static int __init unittest_data_add(void) EXPECT_END(KERN_INFO, "Duplicate name in testcase-data, renamed to \"duplicate-name#1\""); +unlock: of_overlay_mutex_unlock(); - return 0; + return rc; } #ifdef CONFIG_OF_OVERLAY From 52d12296c5d5df940fc2e8bfb8e0fcd5d4fae3d6 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Fri, 25 Apr 2025 13:59:57 +0800 Subject: [PATCH 2316/2924] ktls, sockmap: Fix missing uncharge operation [ Upstream commit 79f0c39ae7d3dc628c01b02f23ca5d01f9875040 ] When we specify apply_bytes, we divide the msg into multiple segments, each with a length of 'send', and every time we send this part of the data using tcp_bpf_sendmsg_redir(), we use sk_msg_return_zero() to uncharge the memory of the specified 'send' size. However, if the first segment of data fails to send, for example, the peer's buffer is full, we need to release all of the msg. When releasing the msg, we haven't uncharged the memory of the subsequent segments. This modification does not make significant logical changes, but only fills in the missing uncharge places. This issue has existed all along, until it was exposed after we added the apply test in test_sockmap: commit 3448ad23b34e ("selftests/bpf: Add apply_bytes test to test_txmsg_redir_wait_sndmem in test_sockmap") Fixes: d3b18ad31f93 ("tls: add bpf support to sk_msg handling") Reported-by: Cong Wang Closes: https://lore.kernel.org/bpf/aAmIi0vlycHtbXeb@pop-os.localdomain/T/#t Signed-off-by: Jiayuan Chen Signed-off-by: Martin KaFai Lau Acked-by: John Fastabend Reviewed-by: Cong Wang Link: https://lore.kernel.org/r/20250425060015.6968-2-jiayuan.chen@linux.dev Signed-off-by: Sasha Levin --- net/tls/tls_sw.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index f3d7d19482da9a..fc88e34b7f33fe 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -908,6 +908,13 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, &msg_redir, send, flags); lock_sock(sk); if (err < 0) { + /* Regardless of whether the data represented by + * msg_redir is sent successfully, we have already + * uncharged it via sk_msg_return_zero(). The + * msg->sg.size represents the remaining unprocessed + * data, which needs to be uncharged here. + */ + sk_mem_uncharge(sk, msg->sg.size); *copied -= sk_msg_free_nocharge(sk, &msg_redir); msg->sg.size = 0; } From 879beb3f03f8f1340f4921eb9e65fa6931bf22d2 Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Sat, 10 May 2025 18:20:11 +0000 Subject: [PATCH 2317/2924] libbpf: Use proper errno value in nlattr [ Upstream commit fd5fd538a1f4b34cee6823ba0ddda2f7a55aca96 ] Return value of the validate_nla() function can be propagated all the way up to users of libbpf API. In case of error this libbpf version of validate_nla returns -1 which will be seen as -EPERM from user's point of view. Instead, return a more reasonable -EINVAL. Fixes: bbf48c18ee0c ("libbpf: add error reporting in XDP") Suggested-by: Andrii Nakryiko Signed-off-by: Anton Protopopov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250510182011.2246631-1-a.s.protopopov@gmail.com Signed-off-by: Sasha Levin --- tools/lib/bpf/nlattr.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c index 975e265eab3bfe..06663f9ea581f9 100644 --- a/tools/lib/bpf/nlattr.c +++ b/tools/lib/bpf/nlattr.c @@ -63,16 +63,16 @@ static int validate_nla(struct nlattr *nla, int maxtype, minlen = nla_attr_minlen[pt->type]; if (libbpf_nla_len(nla) < minlen) - return -1; + return -EINVAL; if (pt->maxlen && libbpf_nla_len(nla) > pt->maxlen) - return -1; + return -EINVAL; if (pt->type == LIBBPF_NLA_STRING) { char *data = libbpf_nla_data(nla); if (data[libbpf_nla_len(nla) - 1] != '\0') - return -1; + return -EINVAL; } return 0; @@ -118,19 +118,18 @@ int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, if (policy) { err = validate_nla(nla, maxtype, policy); if (err < 0) - goto errout; + return err; } - if (tb[type]) + if (tb[type]) { pr_warn("Attribute of type %#x found multiple times in message, " "previous attribute is being ignored.\n", type); + } tb[type] = nla; } - err = 0; -errout: - return err; + return 0; } /** From 7d2c69bd3fe691d9d647086012b9c28fcd335bfa Mon Sep 17 00:00:00 2001 From: Lijuan Gao Date: Tue, 6 May 2025 14:23:00 +0800 Subject: [PATCH 2318/2924] pinctrl: qcom: correct the ngpios entry for QCS615 [ Upstream commit d18cdb975ba8aad7046ff82df1a963fa21082a45 ] Correct the ngpios entry to account for the UFS_RESET pin being exported as a GPIO in addition to the real GPIOs, allowing the UFS driver to toggle it. Fixes: b698f36a9d40 ("pinctrl: qcom: add the tlmm driver for QCS615 platform") Reviewed-by: Konrad Dybcio Signed-off-by: Lijuan Gao Link: https://lore.kernel.org/20250506-correct_gpio_ranges-v3-3-49a7d292befa@quicinc.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/qcom/pinctrl-qcs615.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/qcom/pinctrl-qcs615.c b/drivers/pinctrl/qcom/pinctrl-qcs615.c index 23015b055f6a92..17ca743c2210fc 100644 --- a/drivers/pinctrl/qcom/pinctrl-qcs615.c +++ b/drivers/pinctrl/qcom/pinctrl-qcs615.c @@ -1062,7 +1062,7 @@ static const struct msm_pinctrl_soc_data qcs615_tlmm = { .nfunctions = ARRAY_SIZE(qcs615_functions), .groups = qcs615_groups, .ngroups = ARRAY_SIZE(qcs615_groups), - .ngpios = 123, + .ngpios = 124, .tiles = qcs615_tiles, .ntiles = ARRAY_SIZE(qcs615_tiles), .wakeirq_map = qcs615_pdc_map, From c38ecf4075a6ebda47d0dae0b2f445573b201f7f Mon Sep 17 00:00:00 2001 From: Lijuan Gao Date: Tue, 6 May 2025 14:23:01 +0800 Subject: [PATCH 2319/2924] pinctrl: qcom: correct the ngpios entry for QCS8300 [ Upstream commit 32b5361a0d10627343b2ded76f05e5d591627fd6 ] Correct the ngpios entry to account for the UFS_RESET pin, which is expected to be wired to the reset pin of the primary UFS memory and is exported as GPIOs in addition to the real GPIOs, allowing the UFS driver to toggle it. Fixes: 0c4cd2cc87c8 ("pinctrl: qcom: add the tlmm driver for QCS8300 platforms") Reviewed-by: Konrad Dybcio Signed-off-by: Lijuan Gao Link: https://lore.kernel.org/20250506-correct_gpio_ranges-v3-4-49a7d292befa@quicinc.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/qcom/pinctrl-qcs8300.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/qcom/pinctrl-qcs8300.c b/drivers/pinctrl/qcom/pinctrl-qcs8300.c index ba6de944a859a0..5f5f7c4ac644c4 100644 --- a/drivers/pinctrl/qcom/pinctrl-qcs8300.c +++ b/drivers/pinctrl/qcom/pinctrl-qcs8300.c @@ -1204,7 +1204,7 @@ static const struct msm_pinctrl_soc_data qcs8300_pinctrl = { .nfunctions = ARRAY_SIZE(qcs8300_functions), .groups = qcs8300_groups, .ngroups = ARRAY_SIZE(qcs8300_groups), - .ngpios = 133, + .ngpios = 134, .wakeirq_map = qcs8300_pdc_map, .nwakeirq_map = ARRAY_SIZE(qcs8300_pdc_map), .egpio_func = 11, From 288c39286f759314ee8fb3a80a858179b4f306da Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 8 May 2025 23:08:07 +0300 Subject: [PATCH 2320/2924] pinctrl: at91: Fix possible out-of-boundary access [ Upstream commit 762ef7d1e6eefad9896560bfcb9bcf7f1b6df9c1 ] at91_gpio_probe() doesn't check that given OF alias is not available or something went wrong when trying to get it. This might have consequences when accessing gpio_chips array with that value as an index. Note, that BUG() can be compiled out and hence won't actually perform the required checks. Fixes: 6732ae5cb47c ("ARM: at91: add pinctrl support") Signed-off-by: Andy Shevchenko Closes: https://lore.kernel.org/r/202505052343.UHF1Zo93-lkp@intel.com/ Link: https://lore.kernel.org/20250508200807.1384558-1-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/pinctrl-at91.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c index 93ab277d9943cf..fbe74e4ef320c1 100644 --- a/drivers/pinctrl/pinctrl-at91.c +++ b/drivers/pinctrl/pinctrl-at91.c @@ -1819,12 +1819,16 @@ static int at91_gpio_probe(struct platform_device *pdev) struct at91_gpio_chip *at91_chip = NULL; struct gpio_chip *chip; struct pinctrl_gpio_range *range; + int alias_idx; int ret = 0; int irq, i; - int alias_idx = of_alias_get_id(np, "gpio"); uint32_t ngpio; char **names; + alias_idx = of_alias_get_id(np, "gpio"); + if (alias_idx < 0) + return alias_idx; + BUG_ON(alias_idx >= ARRAY_SIZE(gpio_chips)); if (gpio_chips[alias_idx]) return dev_err_probe(dev, -EBUSY, "%d slot is occupied.\n", alias_idx); From 18e8cbbae79cb35bdce8a01c889827b9799c762e Mon Sep 17 00:00:00 2001 From: Tao Chen Date: Tue, 13 May 2025 12:27:47 +0800 Subject: [PATCH 2321/2924] bpf: Fix WARN() in get_bpf_raw_tp_regs [ Upstream commit 3880cdbed1c4607e378f58fa924c5d6df900d1d3 ] syzkaller reported an issue: WARNING: CPU: 3 PID: 5971 at kernel/trace/bpf_trace.c:1861 get_bpf_raw_tp_regs+0xa4/0x100 kernel/trace/bpf_trace.c:1861 Modules linked in: CPU: 3 UID: 0 PID: 5971 Comm: syz-executor205 Not tainted 6.15.0-rc5-syzkaller-00038-g707df3375124 #0 PREEMPT(full) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 RIP: 0010:get_bpf_raw_tp_regs+0xa4/0x100 kernel/trace/bpf_trace.c:1861 RSP: 0018:ffffc90003636fa8 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 0000000000000003 RCX: ffffffff81c6bc4c RDX: ffff888032efc880 RSI: ffffffff81c6bc83 RDI: 0000000000000005 RBP: ffff88806a730860 R08: 0000000000000005 R09: 0000000000000003 R10: 0000000000000004 R11: 0000000000000000 R12: 0000000000000004 R13: 0000000000000001 R14: ffffc90003637008 R15: 0000000000000900 FS: 0000000000000000(0000) GS:ffff8880d6cdf000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f7baee09130 CR3: 0000000029f5a000 CR4: 0000000000352ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ____bpf_get_stack_raw_tp kernel/trace/bpf_trace.c:1934 [inline] bpf_get_stack_raw_tp+0x24/0x160 kernel/trace/bpf_trace.c:1931 bpf_prog_ec3b2eefa702d8d3+0x43/0x47 bpf_dispatcher_nop_func include/linux/bpf.h:1316 [inline] __bpf_prog_run include/linux/filter.h:718 [inline] bpf_prog_run include/linux/filter.h:725 [inline] __bpf_trace_run kernel/trace/bpf_trace.c:2363 [inline] bpf_trace_run3+0x23f/0x5a0 kernel/trace/bpf_trace.c:2405 __bpf_trace_mmap_lock_acquire_returned+0xfc/0x140 include/trace/events/mmap_lock.h:47 __traceiter_mmap_lock_acquire_returned+0x79/0xc0 include/trace/events/mmap_lock.h:47 __do_trace_mmap_lock_acquire_returned include/trace/events/mmap_lock.h:47 [inline] trace_mmap_lock_acquire_returned include/trace/events/mmap_lock.h:47 [inline] __mmap_lock_do_trace_acquire_returned+0x138/0x1f0 mm/mmap_lock.c:35 __mmap_lock_trace_acquire_returned include/linux/mmap_lock.h:36 [inline] mmap_read_trylock include/linux/mmap_lock.h:204 [inline] stack_map_get_build_id_offset+0x535/0x6f0 kernel/bpf/stackmap.c:157 __bpf_get_stack+0x307/0xa10 kernel/bpf/stackmap.c:483 ____bpf_get_stack kernel/bpf/stackmap.c:499 [inline] bpf_get_stack+0x32/0x40 kernel/bpf/stackmap.c:496 ____bpf_get_stack_raw_tp kernel/trace/bpf_trace.c:1941 [inline] bpf_get_stack_raw_tp+0x124/0x160 kernel/trace/bpf_trace.c:1931 bpf_prog_ec3b2eefa702d8d3+0x43/0x47 Tracepoint like trace_mmap_lock_acquire_returned may cause nested call as the corner case show above, which will be resolved with more general method in the future. As a result, WARN_ON_ONCE will be triggered. As Alexei suggested, remove the WARN_ON_ONCE first. Fixes: 9594dc3c7e71 ("bpf: fix nested bpf tracepoints with per-cpu data") Reported-by: syzbot+45b0c89a0fc7ae8dbadc@syzkaller.appspotmail.com Suggested-by: Alexei Starovoitov Signed-off-by: Tao Chen Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250513042747.757042-1-chen.dylane@linux.dev Closes: https://lore.kernel.org/bpf/8bc2554d-1052-4922-8832-e0078a033e1d@gmail.com Signed-off-by: Sasha Levin --- kernel/trace/bpf_trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 0f5906f43d7cac..e1bf9c06007fb7 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1858,7 +1858,7 @@ static struct pt_regs *get_bpf_raw_tp_regs(void) struct bpf_raw_tp_regs *tp_regs = this_cpu_ptr(&bpf_raw_tp_regs); int nest_level = this_cpu_inc_return(bpf_raw_tp_nest_level); - if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(tp_regs->regs))) { + if (nest_level > ARRAY_SIZE(tp_regs->regs)) { this_cpu_dec(bpf_raw_tp_nest_level); return ERR_PTR(-EBUSY); } From 90842d6a5bd94741a0ed435cdfeea975a739654a Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 7 May 2025 10:42:31 -0500 Subject: [PATCH 2322/2924] dt-bindings: soc: fsl,qman-fqd: Fix reserved-memory.yaml reference [ Upstream commit 1090c38bbfd9ab7f22830c0e8a5c605e7d4ef084 ] The reserved-memory.yaml reference needs the full path. No warnings were generated because the example has the wrong compatible string, so fix that too. Fixes: 304a90c4f75d ("dt-bindings: soc: fsl: Convert q(b)man-* to yaml format") Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20250507154231.1590634-1-robh@kernel.org Signed-off-by: Rob Herring (Arm) Signed-off-by: Sasha Levin --- Documentation/devicetree/bindings/soc/fsl/fsl,qman-fqd.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/soc/fsl/fsl,qman-fqd.yaml b/Documentation/devicetree/bindings/soc/fsl/fsl,qman-fqd.yaml index de0b4ae740ff23..a975bce599750e 100644 --- a/Documentation/devicetree/bindings/soc/fsl/fsl,qman-fqd.yaml +++ b/Documentation/devicetree/bindings/soc/fsl/fsl,qman-fqd.yaml @@ -50,7 +50,7 @@ required: - compatible allOf: - - $ref: reserved-memory.yaml + - $ref: /schemas/reserved-memory/reserved-memory.yaml unevaluatedProperties: false @@ -61,7 +61,7 @@ examples: #size-cells = <2>; qman-fqd { - compatible = "shared-dma-pool"; + compatible = "fsl,qman-fqd"; size = <0 0x400000>; alignment = <0 0x400000>; no-map; From 5482e58bc1f131a84c2cd993429e2a71778e7a58 Mon Sep 17 00:00:00 2001 From: Vincent Knecht Date: Mon, 14 Apr 2025 18:45:12 +0200 Subject: [PATCH 2323/2924] clk: qcom: gcc-msm8939: Fix mclk0 & mclk1 for 24 MHz [ Upstream commit 9e7acf70cf6aa7b22f67d911f50a8cd510e8fb00 ] Fix mclk0 & mclk1 parent map to use correct GPLL6 configuration and freq_tbl to use GPLL6 instead of GPLL0 so that they tick at 24 MHz. Fixes: 1664014e4679 ("clk: qcom: gcc-msm8939: Add MSM8939 Generic Clock Controller") Suggested-by: Stephan Gerhold Reviewed-by: Konrad Dybcio Reviewed-by: Bryan O'Donoghue Signed-off-by: Vincent Knecht Link: https://lore.kernel.org/r/20250414-gcc-msm8939-fixes-mclk-v2-resend2-v2-1-5ddcf572a6de@mailoo.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/clk/qcom/gcc-msm8939.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/qcom/gcc-msm8939.c b/drivers/clk/qcom/gcc-msm8939.c index 7431c9a65044f8..45193b3d714bab 100644 --- a/drivers/clk/qcom/gcc-msm8939.c +++ b/drivers/clk/qcom/gcc-msm8939.c @@ -432,7 +432,7 @@ static const struct parent_map gcc_xo_gpll0_gpll1a_gpll6_sleep_map[] = { { P_XO, 0 }, { P_GPLL0, 1 }, { P_GPLL1_AUX, 2 }, - { P_GPLL6, 2 }, + { P_GPLL6, 3 }, { P_SLEEP_CLK, 6 }, }; @@ -1113,7 +1113,7 @@ static struct clk_rcg2 jpeg0_clk_src = { }; static const struct freq_tbl ftbl_gcc_camss_mclk0_1_clk[] = { - F(24000000, P_GPLL0, 1, 1, 45), + F(24000000, P_GPLL6, 1, 1, 45), F(66670000, P_GPLL0, 12, 0, 0), { } }; From 6e5e2f46c50e7d8cca7d21fd00599f0481699e24 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Mon, 12 May 2025 14:26:15 +0200 Subject: [PATCH 2324/2924] s390/bpf: Store backchain even for leaf progs [ Upstream commit 5f55f2168432298f5a55294831ab6a76a10cb3c3 ] Currently a crash in a leaf prog (caused by a bug) produces the following call trace: [<000003ff600ebf00>] bpf_prog_6df0139e1fbf2789_fentry+0x20/0x78 [<0000000000000000>] 0x0 This is because leaf progs do not store backchain. Fix by making all progs do it. This is what GCC and Clang-generated code does as well. Now the call trace looks like this: [<000003ff600eb0f2>] bpf_prog_6df0139e1fbf2789_fentry+0x2a/0x80 [<000003ff600ed096>] bpf_trampoline_201863462940+0x96/0xf4 [<000003ff600e3a40>] bpf_prog_05f379658fdd72f2_classifier_0+0x58/0xc0 [<000003ffe0aef070>] bpf_test_run+0x210/0x390 [<000003ffe0af0dc2>] bpf_prog_test_run_skb+0x25a/0x668 [<000003ffe038a90e>] __sys_bpf+0xa46/0xdb0 [<000003ffe038ad0c>] __s390x_sys_bpf+0x44/0x50 [<000003ffe0defea8>] __do_syscall+0x150/0x280 [<000003ffe0e01d5c>] system_call+0x74/0x98 Fixes: 054623105728 ("s390/bpf: Add s390x eBPF JIT compiler backend") Signed-off-by: Ilya Leoshkevich Link: https://lore.kernel.org/r/20250512122717.54878-1-iii@linux.ibm.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- arch/s390/net/bpf_jit_comp.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 0776dfde2dba9c..945106b5562db0 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -605,17 +605,15 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp, } /* Setup stack and backchain */ if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) { - if (is_first_pass(jit) || (jit->seen & SEEN_FUNC)) - /* lgr %w1,%r15 (backchain) */ - EMIT4(0xb9040000, REG_W1, REG_15); + /* lgr %w1,%r15 (backchain) */ + EMIT4(0xb9040000, REG_W1, REG_15); /* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */ EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED); /* aghi %r15,-STK_OFF */ EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth)); - if (is_first_pass(jit) || (jit->seen & SEEN_FUNC)) - /* stg %w1,152(%r15) (backchain) */ - EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, - REG_15, 152); + /* stg %w1,152(%r15) (backchain) */ + EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, + REG_15, 152); } } From e1e0f046041474004dc6ebce5ce1d3e86556291d Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Tue, 6 May 2025 09:53:56 +0800 Subject: [PATCH 2325/2924] wifi: rtw89: pci: configure manual DAC mode via PCI config API only [ Upstream commit a70cf04b08f44f41bce14659aa7012674b15d9de ] To support 36-bit DMA, configure chip proprietary bit via PCI config API or chip DBI interface. However, the PCI device mmap isn't set yet and the DBI is also inaccessible via mmap, so only if the bit can be accessible via PCI config API, chip can support 36-bit DMA. Otherwise, fallback to 32-bit DMA. With NULL mmap address, kernel throws trace: BUG: unable to handle page fault for address: 0000000000001090 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 0 P4D 0 Oops: Oops: 0002 [#1] PREEMPT SMP PTI CPU: 1 UID: 0 PID: 71 Comm: irq/26-pciehp Tainted: G OE 6.14.2-061402-generic #202504101348 Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE RIP: 0010:rtw89_pci_ops_write16+0x12/0x30 [rtw89_pci] RSP: 0018:ffffb0ffc0acf9d8 EFLAGS: 00010206 RAX: ffffffffc158f9c0 RBX: ffff94865e702020 RCX: 0000000000000000 RDX: 0000000000000718 RSI: 0000000000001090 RDI: ffff94865e702020 RBP: ffffb0ffc0acf9d8 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000015 R13: 0000000000000719 R14: ffffb0ffc0acfa1f R15: ffffffffc1813060 FS: 0000000000000000(0000) GS:ffff9486f3480000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000001090 CR3: 0000000090440001 CR4: 00000000000626f0 Call Trace: rtw89_pci_read_config_byte+0x6d/0x120 [rtw89_pci] rtw89_pci_cfg_dac+0x5b/0xb0 [rtw89_pci] rtw89_pci_probe+0xa96/0xbd0 [rtw89_pci] ? __pfx___device_attach_driver+0x10/0x10 ? __pfx___device_attach_driver+0x10/0x10 local_pci_probe+0x47/0xa0 pci_call_probe+0x5d/0x190 pci_device_probe+0xa7/0x160 really_probe+0xf9/0x370 ? pm_runtime_barrier+0x55/0xa0 __driver_probe_device+0x8c/0x140 driver_probe_device+0x24/0xd0 __device_attach_driver+0xcd/0x170 bus_for_each_drv+0x99/0x100 __device_attach+0xb4/0x1d0 device_attach+0x10/0x20 pci_bus_add_device+0x59/0x90 pci_bus_add_devices+0x31/0x80 pciehp_configure_device+0xaa/0x170 pciehp_enable_slot+0xd6/0x240 pciehp_handle_presence_or_link_change+0xf1/0x180 pciehp_ist+0x162/0x1c0 irq_thread_fn+0x24/0x70 irq_thread+0xef/0x1c0 ? __pfx_irq_thread_fn+0x10/0x10 ? __pfx_irq_thread_dtor+0x10/0x10 ? __pfx_irq_thread+0x10/0x10 kthread+0xfc/0x230 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x47/0x70 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 Fixes: 1fd4b3fe52ef ("wifi: rtw89: pci: support 36-bit PCI DMA address") Reported-by: Bitterblue Smith Closes: https://lore.kernel.org/linux-wireless/ccaf49b6-ff41-4917-90f1-f53cadaaa0da@gmail.com/T/#u Closes: https://github.com/openwrt/openwrt/issues/17025 Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20250506015356.7995-1-pkshih@realtek.com Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtw89/pci.c | 34 ++++++++++++++++-------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/pci.c b/drivers/net/wireless/realtek/rtw89/pci.c index c2fe5a898dc717..b5cdc18f802a96 100644 --- a/drivers/net/wireless/realtek/rtw89/pci.c +++ b/drivers/net/wireless/realtek/rtw89/pci.c @@ -3105,17 +3105,26 @@ static bool rtw89_pci_is_dac_compatible_bridge(struct rtw89_dev *rtwdev) return false; } -static void rtw89_pci_cfg_dac(struct rtw89_dev *rtwdev) +static int rtw89_pci_cfg_dac(struct rtw89_dev *rtwdev, bool force) { struct rtw89_pci *rtwpci = (struct rtw89_pci *)rtwdev->priv; + struct pci_dev *pdev = rtwpci->pdev; + int ret; + u8 val; - if (!rtwpci->enable_dac) - return; + if (!rtwpci->enable_dac && !force) + return 0; if (!rtw89_pci_chip_is_manual_dac(rtwdev)) - return; + return 0; - rtw89_pci_config_byte_set(rtwdev, RTW89_PCIE_L1_CTRL, RTW89_PCIE_BIT_EN_64BITS); + /* Configure DAC only via PCI config API, not DBI interfaces */ + ret = pci_read_config_byte(pdev, RTW89_PCIE_L1_CTRL, &val); + if (ret) + return ret; + + val |= RTW89_PCIE_BIT_EN_64BITS; + return pci_write_config_byte(pdev, RTW89_PCIE_L1_CTRL, val); } static int rtw89_pci_setup_mapping(struct rtw89_dev *rtwdev, @@ -3133,13 +3142,16 @@ static int rtw89_pci_setup_mapping(struct rtw89_dev *rtwdev, } if (!rtw89_pci_is_dac_compatible_bridge(rtwdev)) - goto no_dac; + goto try_dac_done; ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(36)); if (!ret) { - rtwpci->enable_dac = true; - rtw89_pci_cfg_dac(rtwdev); - } else { + ret = rtw89_pci_cfg_dac(rtwdev, true); + if (!ret) { + rtwpci->enable_dac = true; + goto try_dac_done; + } + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (ret) { rtw89_err(rtwdev, @@ -3147,7 +3159,7 @@ static int rtw89_pci_setup_mapping(struct rtw89_dev *rtwdev, goto err_release_regions; } } -no_dac: +try_dac_done: resource_len = pci_resource_len(pdev, bar_id); rtwpci->mmap = pci_iomap(pdev, bar_id, resource_len); @@ -4302,7 +4314,7 @@ static void rtw89_pci_l2_hci_ldo(struct rtw89_dev *rtwdev) void rtw89_pci_basic_cfg(struct rtw89_dev *rtwdev, bool resume) { if (resume) - rtw89_pci_cfg_dac(rtwdev); + rtw89_pci_cfg_dac(rtwdev, false); rtw89_pci_disable_eq(rtwdev); rtw89_pci_filter_out(rtwdev); From e3a7cb7542c142a17e51c5216f58189466e158ae Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Fri, 9 May 2025 09:34:33 +0800 Subject: [PATCH 2326/2924] wifi: rtw89: pci: enlarge retry times of RX tag to 1000 [ Upstream commit dda27a47c036d981ec664ac57e044a21035ffe12 ] RX tag is sequence number to ensure RX DMA is complete. On platform Gigabyte X870 AORUS ELITE WIFI7, sometimes it needs longer retry times to complete RX DMA, or driver throws warnings and connection drops: rtw89_8922ae 0000:07:00.0: failed to update 162 RXBD info: -11 rtw89_8922ae 0000:07:00.0: failed to update 163 RXBD info: -11 rtw89_8922ae 0000:07:00.0: failed to update 32 RXBD info: -11 rtw89_8922ae 0000:07:00.0: failed to release TX skbs Fixes: 0bc7d1d4e63c ("wifi: rtw89: pci: validate RX tag for RXQ and RPQ") Reported-by: Samuel Reyes Closes: https://lore.kernel.org/linux-wireless/f4355539f3ac46bbaf9c586d059a8cbb@realtek.com/T/#t Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20250509013433.7573-1-pkshih@realtek.com Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtw89/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw89/pci.c b/drivers/net/wireless/realtek/rtw89/pci.c index b5cdc18f802a96..064f6a94010731 100644 --- a/drivers/net/wireless/realtek/rtw89/pci.c +++ b/drivers/net/wireless/realtek/rtw89/pci.c @@ -228,7 +228,7 @@ int rtw89_pci_sync_skb_for_device_and_validate_rx_info(struct rtw89_dev *rtwdev, struct sk_buff *skb) { struct rtw89_pci_rx_info *rx_info = RTW89_PCI_RX_SKB_CB(skb); - int rx_tag_retry = 100; + int rx_tag_retry = 1000; int ret; do { From 9febcc8bded8be0d7efd8237fcef599b6d93b788 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Tue, 13 May 2025 12:13:04 +0000 Subject: [PATCH 2327/2924] wifi: rtw88: fix the 'para' buffer size to avoid reading out of bounds [ Upstream commit 4c2c372de2e108319236203cce6de44d70ae15cd ] Set the size to 6 instead of 2, since 'para' array is passed to 'rtw_fw_bt_wifi_control(rtwdev, para[0], ¶[1])', which reads 5 bytes: void rtw_fw_bt_wifi_control(struct rtw_dev *rtwdev, u8 op_code, u8 *data) { ... SET_BT_WIFI_CONTROL_DATA1(h2c_pkt, *data); SET_BT_WIFI_CONTROL_DATA2(h2c_pkt, *(data + 1)); ... SET_BT_WIFI_CONTROL_DATA5(h2c_pkt, *(data + 4)); Detected using the static analysis tool - Svace. Fixes: 4136214f7c46 ("rtw88: add BT co-existence support") Signed-off-by: Alexey Kodanev Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20250513121304.124141-1-aleksei.kodanev@bell-sw.com Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtw88/coex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw88/coex.c b/drivers/net/wireless/realtek/rtw88/coex.c index c929db1e53ca63..64904278ddad7d 100644 --- a/drivers/net/wireless/realtek/rtw88/coex.c +++ b/drivers/net/wireless/realtek/rtw88/coex.c @@ -309,7 +309,7 @@ static void rtw_coex_tdma_timer_base(struct rtw_dev *rtwdev, u8 type) { struct rtw_coex *coex = &rtwdev->coex; struct rtw_coex_stat *coex_stat = &coex->stat; - u8 para[2] = {0}; + u8 para[6] = {}; u8 times; u16 tbtt_interval = coex_stat->wl_beacon_interval; From a7d2fce4c0ea38dbfcb0c30b8bfc3c5991bfd758 Mon Sep 17 00:00:00 2001 From: Chin-Yen Lee Date: Tue, 13 May 2025 20:52:03 +0800 Subject: [PATCH 2328/2924] wifi: rtw89: fix firmware scan delay unit for WiFi 6 chips [ Upstream commit 3cc35394fac15d533639c9c9e42f28d28936a4a0 ] The scan delay unit of firmware command for WiFi 6 chips is microsecond, but is wrong set now and lead to abnormal work for net-detect. Correct the unit to avoid the error. Fixes: e99dd80c8a18 ("wifi: rtw89: wow: add delay option for net-detect") Signed-off-by: Chin-Yen Lee Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20250513125203.6858-1-pkshih@realtek.com Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtw89/fw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index 8643b17866f897..6c52b0425f2ea9 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -5477,7 +5477,7 @@ int rtw89_fw_h2c_scan_list_offload_be(struct rtw89_dev *rtwdev, int ch_num, return 0; } -#define RTW89_SCAN_DELAY_TSF_UNIT 104800 +#define RTW89_SCAN_DELAY_TSF_UNIT 1000000 int rtw89_fw_h2c_scan_offload_ax(struct rtw89_dev *rtwdev, struct rtw89_scan_option *option, struct rtw89_vif_link *rtwvif_link, From 1006c939df3840a59a743b1d7b50b5ed81bfca28 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Mon, 12 May 2025 15:10:44 +0200 Subject: [PATCH 2329/2924] iommu: remove duplicate selection of DMAR_TABLE [ Upstream commit 9548feff840a05d61783e6316d08ed37e115f3b1 ] This is already done in intel/Kconfig. Fixes: 70bad345e622 ("iommu: Fix compilation without CONFIG_IOMMU_INTEL") Signed-off-by: Rolf Eike Beer Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/2232605.Mh6RI2rZIc@devpool92.emlix.com Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index cd750f512deee2..bad585b45a31df 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -199,7 +199,6 @@ source "drivers/iommu/riscv/Kconfig" config IRQ_REMAP bool "Support for Interrupt Remapping" depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI - select DMAR_TABLE if INTEL_IOMMU help Supports Interrupt remapping for IO-APIC and MSI devices. To use x2apic mode in the CPU's which support x2APIC enhancements or From 161c4ee0977de27b5654aa92b791196d4c661c8d Mon Sep 17 00:00:00 2001 From: P Praneesh Date: Thu, 24 Apr 2025 11:21:04 +0530 Subject: [PATCH 2330/2924] wifi: ath12k: Fix invalid RSSI values in station dump [ Upstream commit 3126f1c52af5bc8d40d2c984907daeb501f6b739 ] When processing a "station dump" command, the driver retrieves RSSI values from the HAL_PHYRX_RSSI_LEGACY TLV received from the monitor destination ring, and reports them to userspace. Currently, the RSSI values reported are improper because the hardware has not been configured to update them properly. To fix this, enable the HTT_RX_FILTER_TLV_FLAGS_PPDU_START_USER_INFO in the filter setup to ensure the correct RSSI values are returned in the HAL_PHYRX_RSSI_LEGACY TLV, resulting in correct RSSI values being reported to userspace. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00218-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Signed-off-by: P Praneesh Signed-off-by: Sowjanya vardhineni Reviewed-by: Mahendran P Link: https://patch.msgid.link/20250424055104.2503723-1-quic_svardhin@quicinc.com Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/mac.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 65eb50bf03b24b..331bcf5e6c4cce 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -229,7 +229,8 @@ ath12k_phymodes[NUM_NL80211_BANDS][ATH12K_CHAN_WIDTH_NUM] = { const struct htt_rx_ring_tlv_filter ath12k_mac_mon_status_filter_default = { .rx_filter = HTT_RX_FILTER_TLV_FLAGS_MPDU_START | HTT_RX_FILTER_TLV_FLAGS_PPDU_END | - HTT_RX_FILTER_TLV_FLAGS_PPDU_END_STATUS_DONE, + HTT_RX_FILTER_TLV_FLAGS_PPDU_END_STATUS_DONE | + HTT_RX_FILTER_TLV_FLAGS_PPDU_START_USER_INFO, .pkt_filter_flags0 = HTT_RX_FP_MGMT_FILTER_FLAGS0, .pkt_filter_flags1 = HTT_RX_FP_MGMT_FILTER_FLAGS1, .pkt_filter_flags2 = HTT_RX_FP_CTRL_FILTER_FLASG2, From 5d0cb07da3e22906a25722d14301a360b31f13be Mon Sep 17 00:00:00 2001 From: Yingying Tang Date: Wed, 23 Apr 2025 11:26:49 +0530 Subject: [PATCH 2331/2924] wifi: ath12k: Reorder and relocate the release of resources in ath12k_core_deinit() [ Upstream commit aabd3be90579ed088aa34f0584ab6836c735c76f ] Ath12k panic notifier is registered in driver loading process. But it is not unregistered if ATH12K_FLAG_QMI_FAIL is set(e.g. load BDF failed) and unload driver. It causes a dirty node in panic notifier list since ath12k panic notifier is not unregistered from list but the buffer of this node is freed in driver unloading process. If load driver again there will be a page fault error due to this dirty node in panic notifier list. This issue is caused by asymmetry between ath12k_core_init() and ath12k_core_deinit(). Reorder and relocate the release of resources in ath12k_core_deinit() to avoid this asymmetry issue. Call Trace: ? show_regs+0x67/0x70 ? __die_body+0x20/0x70 ? __die+0x2b/0x40 ? page_fault_oops+0x15d/0x500 ? search_bpf_extables+0x63/0x90 ? notifier_chain_register+0x21/0xe0 ? search_exception_tables+0x5f/0x70 ? kernelmode_fixup_or_oops.isra.0+0x61/0x80 ? __bad_area_nosemaphore+0x179/0x240 ? bad_area_nosemaphore+0x16/0x20 ? do_user_addr_fault+0x312/0x7f0 ? prb_read_valid+0x1c/0x30 ? exc_page_fault+0x78/0x180 ? asm_exc_page_fault+0x27/0x30 ? notifier_chain_register+0x21/0xe0 ? notifier_chain_register+0x55/0xe0 atomic_notifier_chain_register+0x2c/0x50 ath12k_core_init+0x7e/0x110 [ath12k] ath12k_pci_probe+0xaba/0xba0 [ath12k] Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0-02903-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Fixes: 809055628bce8 ("wifi: ath12k: add panic handler") Signed-off-by: Yingying Tang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250423055650.16230-2-quic_yintang@quicinc.com Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/core.c | 5 ++--- drivers/net/wireless/ath/ath12k/core.h | 1 + drivers/net/wireless/ath/ath12k/pci.c | 5 ++--- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index 6b0c719be5434b..770156347ffadc 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -1785,7 +1785,7 @@ static void ath12k_core_hw_group_destroy(struct ath12k_hw_group *ag) } } -static void ath12k_core_hw_group_cleanup(struct ath12k_hw_group *ag) +void ath12k_core_hw_group_cleanup(struct ath12k_hw_group *ag) { struct ath12k_base *ab; int i; @@ -1933,10 +1933,9 @@ int ath12k_core_init(struct ath12k_base *ab) void ath12k_core_deinit(struct ath12k_base *ab) { - ath12k_core_panic_notifier_unregister(ab); - ath12k_core_hw_group_cleanup(ab->ag); ath12k_core_hw_group_destroy(ab->ag); ath12k_core_hw_group_unassign(ab); + ath12k_core_panic_notifier_unregister(ab); } void ath12k_core_free(struct ath12k_base *ab) diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index 2ee83517eadc72..f5f1ec796f7c55 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -1195,6 +1195,7 @@ struct ath12k_fw_stats_pdev { }; int ath12k_core_qmi_firmware_ready(struct ath12k_base *ab); +void ath12k_core_hw_group_cleanup(struct ath12k_hw_group *ag); int ath12k_core_pre_init(struct ath12k_base *ab); int ath12k_core_init(struct ath12k_base *ath12k); void ath12k_core_deinit(struct ath12k_base *ath12k); diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c index 99b2c7927ec818..273f4bc260bfed 100644 --- a/drivers/net/wireless/ath/ath12k/pci.c +++ b/drivers/net/wireless/ath/ath12k/pci.c @@ -1734,8 +1734,6 @@ static void ath12k_pci_remove(struct pci_dev *pdev) if (test_bit(ATH12K_FLAG_QMI_FAIL, &ab->dev_flags)) { ath12k_pci_power_down(ab, false); - ath12k_qmi_deinit_service(ab); - ath12k_core_hw_group_unassign(ab); goto qmi_fail; } @@ -1743,9 +1741,10 @@ static void ath12k_pci_remove(struct pci_dev *pdev) cancel_work_sync(&ab->reset_work); cancel_work_sync(&ab->dump_work); - ath12k_core_deinit(ab); + ath12k_core_hw_group_cleanup(ab->ag); qmi_fail: + ath12k_core_deinit(ab); ath12k_fw_unmap(ab); ath12k_mhi_unregister(ab_pci); From 1089f65b2de78c7837ef6b4f26146a5a5b0b9749 Mon Sep 17 00:00:00 2001 From: Rajat Soni Date: Wed, 30 Apr 2025 10:25:38 +0530 Subject: [PATCH 2332/2924] wifi: ath12k: fix memory leak in ath12k_service_ready_ext_event [ Upstream commit 89142d34d5602c7447827beb181fa06eb08b9d5c ] Currently, in ath12k_service_ready_ext_event(), svc_rdy_ext.mac_phy_caps is not freed in the failure case, causing a memory leak. The following trace is observed in kmemleak: unreferenced object 0xffff8b3eb5789c00 (size 1024): comm "softirq", pid 0, jiffies 4294942577 hex dump (first 32 bytes): 00 00 00 00 01 00 00 00 00 00 00 00 7b 00 00 10 ............{... 01 00 00 00 00 00 00 00 01 00 00 00 1f 38 00 00 .............8.. backtrace (crc 44e1c357): __kmalloc_noprof+0x30b/0x410 ath12k_wmi_mac_phy_caps_parse+0x84/0x100 [ath12k] ath12k_wmi_tlv_iter+0x5e/0x140 [ath12k] ath12k_wmi_svc_rdy_ext_parse+0x308/0x4c0 [ath12k] ath12k_wmi_tlv_iter+0x5e/0x140 [ath12k] ath12k_service_ready_ext_event.isra.0+0x44/0xd0 [ath12k] ath12k_wmi_op_rx+0x2eb/0xd70 [ath12k] ath12k_htc_rx_completion_handler+0x1f4/0x330 [ath12k] ath12k_ce_recv_process_cb+0x218/0x300 [ath12k] ath12k_pci_ce_workqueue+0x1b/0x30 [ath12k] process_one_work+0x219/0x680 bh_worker+0x198/0x1f0 tasklet_action+0x13/0x30 handle_softirqs+0xca/0x460 __irq_exit_rcu+0xbe/0x110 irq_exit_rcu+0x9/0x30 Free svc_rdy_ext.mac_phy_caps in the error case to fix this memory leak. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Signed-off-by: Rajat Soni Signed-off-by: Raj Kumar Bhagat Link: https://patch.msgid.link/20250430-wmi-mem-leak-v1-1-fcc9b49c2ddc@quicinc.com Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index d0705880cd3c9e..fe50c3d3cb8201 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -4601,6 +4601,7 @@ static int ath12k_service_ready_ext_event(struct ath12k_base *ab, return 0; err: + kfree(svc_rdy_ext.mac_phy_caps); ath12k_wmi_free_dbring_caps(ab); return ret; } From 7710c883eb8cb5cf510ca47ec0e26c6cb7e94a4f Mon Sep 17 00:00:00 2001 From: Longfang Liu Date: Sat, 10 May 2025 16:11:50 +0800 Subject: [PATCH 2333/2924] hisi_acc_vfio_pci: fix XQE dma address error [ Upstream commit 8bb7170c5a055ea17c6857c256ee73c10ff872eb ] The dma addresses of EQE and AEQE are wrong after migration and results in guest kernel-mode encryption services failure. Comparing the definition of hardware registers, we found that there was an error when the data read from the register was combined into an address. Therefore, the address combination sequence needs to be corrected. Even after fixing the above problem, we still have an issue where the Guest from an old kernel can get migrated to new kernel and may result in wrong data. In order to ensure that the address is correct after migration, if an old magic number is detected, the dma address needs to be updated. Fixes: b0eed085903e ("hisi_acc_vfio_pci: Add support for VFIO live migration") Signed-off-by: Longfang Liu Reviewed-by: Shameer Kolothum Link: https://lore.kernel.org/r/20250510081155.55840-2-liulongfang@huawei.com Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin --- .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 41 ++++++++++++++++--- .../vfio/pci/hisilicon/hisi_acc_vfio_pci.h | 14 ++++++- 2 files changed, 47 insertions(+), 8 deletions(-) diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index 451c639299eb3b..304dbdfa0e95a7 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -350,6 +350,32 @@ static int vf_qm_func_stop(struct hisi_qm *qm) return hisi_qm_mb(qm, QM_MB_CMD_PAUSE_QM, 0, 0, 0); } +static int vf_qm_version_check(struct acc_vf_data *vf_data, struct device *dev) +{ + switch (vf_data->acc_magic) { + case ACC_DEV_MAGIC_V2: + if (vf_data->major_ver != ACC_DRV_MAJOR_VER) { + dev_info(dev, "migration driver version<%u.%u> not match!\n", + vf_data->major_ver, vf_data->minor_ver); + return -EINVAL; + } + break; + case ACC_DEV_MAGIC_V1: + /* Correct dma address */ + vf_data->eqe_dma = vf_data->qm_eqc_dw[QM_XQC_ADDR_HIGH]; + vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET; + vf_data->eqe_dma |= vf_data->qm_eqc_dw[QM_XQC_ADDR_LOW]; + vf_data->aeqe_dma = vf_data->qm_aeqc_dw[QM_XQC_ADDR_HIGH]; + vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET; + vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[QM_XQC_ADDR_LOW]; + break; + default: + return -EINVAL; + } + + return 0; +} + static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev, struct hisi_acc_vf_migration_file *migf) { @@ -363,7 +389,8 @@ static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev, if (migf->total_length < QM_MATCH_SIZE || hisi_acc_vdev->match_done) return 0; - if (vf_data->acc_magic != ACC_DEV_MAGIC) { + ret = vf_qm_version_check(vf_data, dev); + if (ret) { dev_err(dev, "failed to match ACC_DEV_MAGIC\n"); return -EINVAL; } @@ -418,7 +445,9 @@ static int vf_qm_get_match_data(struct hisi_acc_vf_core_device *hisi_acc_vdev, int vf_id = hisi_acc_vdev->vf_id; int ret; - vf_data->acc_magic = ACC_DEV_MAGIC; + vf_data->acc_magic = ACC_DEV_MAGIC_V2; + vf_data->major_ver = ACC_DRV_MAJOR_VER; + vf_data->minor_ver = ACC_DRV_MINOR_VER; /* Save device id */ vf_data->dev_id = hisi_acc_vdev->vf_dev->device; @@ -496,12 +525,12 @@ static int vf_qm_read_data(struct hisi_qm *vf_qm, struct acc_vf_data *vf_data) return -EINVAL; /* Every reg is 32 bit, the dma address is 64 bit. */ - vf_data->eqe_dma = vf_data->qm_eqc_dw[1]; + vf_data->eqe_dma = vf_data->qm_eqc_dw[QM_XQC_ADDR_HIGH]; vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET; - vf_data->eqe_dma |= vf_data->qm_eqc_dw[0]; - vf_data->aeqe_dma = vf_data->qm_aeqc_dw[1]; + vf_data->eqe_dma |= vf_data->qm_eqc_dw[QM_XQC_ADDR_LOW]; + vf_data->aeqe_dma = vf_data->qm_aeqc_dw[QM_XQC_ADDR_HIGH]; vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET; - vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[0]; + vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[QM_XQC_ADDR_LOW]; /* Through SQC_BT/CQC_BT to get sqc and cqc address */ ret = qm_get_sqc(vf_qm, &vf_data->sqc_dma); diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h index 245d7537b2bcd4..91002ceeebc18a 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h @@ -39,6 +39,9 @@ #define QM_REG_ADDR_OFFSET 0x0004 #define QM_XQC_ADDR_OFFSET 32U +#define QM_XQC_ADDR_LOW 0x1 +#define QM_XQC_ADDR_HIGH 0x2 + #define QM_VF_AEQ_INT_MASK 0x0004 #define QM_VF_EQ_INT_MASK 0x000c #define QM_IFC_INT_SOURCE_V 0x0020 @@ -50,10 +53,15 @@ #define QM_EQC_DW0 0X8000 #define QM_AEQC_DW0 0X8020 +#define ACC_DRV_MAJOR_VER 1 +#define ACC_DRV_MINOR_VER 0 + +#define ACC_DEV_MAGIC_V1 0XCDCDCDCDFEEDAACC +#define ACC_DEV_MAGIC_V2 0xAACCFEEDDECADEDE + struct acc_vf_data { #define QM_MATCH_SIZE offsetofend(struct acc_vf_data, qm_rsv_state) /* QM match information */ -#define ACC_DEV_MAGIC 0XCDCDCDCDFEEDAACC u64 acc_magic; u32 qp_num; u32 dev_id; @@ -61,7 +69,9 @@ struct acc_vf_data { u32 qp_base; u32 vf_qm_state; /* QM reserved match information */ - u32 qm_rsv_state[3]; + u16 major_ver; + u16 minor_ver; + u32 qm_rsv_state[2]; /* QM RW regs */ u32 aeq_int_mask; From a01f82deb1e15bf0fafb3afb045a21169d9b5ede Mon Sep 17 00:00:00 2001 From: Longfang Liu Date: Sat, 10 May 2025 16:11:51 +0800 Subject: [PATCH 2334/2924] hisi_acc_vfio_pci: add eq and aeq interruption restore [ Upstream commit 3495cec0787721ba7a9d5c19d0bbb66d182de584 ] In order to ensure that the task packets of the accelerator device are not lost during the migration process, it is necessary to send an EQ and AEQ command to the device after the live migration is completed and to update the completion position of the task queue. Let the device recheck the completed tasks data and if there are uncollected packets, device resend a task completion interrupt to the software. Fixes: b0eed085903e ("hisi_acc_vfio_pci: Add support for VFIO live migration") Signed-off-by: Longfang Liu Reviewed-by: Shameer Kolothum Link: https://lore.kernel.org/r/20250510081155.55840-3-liulongfang@huawei.com Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin --- drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index 304dbdfa0e95a7..80217aea54755b 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -470,6 +470,19 @@ static int vf_qm_get_match_data(struct hisi_acc_vf_core_device *hisi_acc_vdev, return 0; } +static void vf_qm_xeqc_save(struct hisi_qm *qm, + struct hisi_acc_vf_migration_file *migf) +{ + struct acc_vf_data *vf_data = &migf->vf_data; + u16 eq_head, aeq_head; + + eq_head = vf_data->qm_eqc_dw[0] & 0xFFFF; + qm_db(qm, 0, QM_DOORBELL_CMD_EQ, eq_head, 0); + + aeq_head = vf_data->qm_aeqc_dw[0] & 0xFFFF; + qm_db(qm, 0, QM_DOORBELL_CMD_AEQ, aeq_head, 0); +} + static int vf_qm_load_data(struct hisi_acc_vf_core_device *hisi_acc_vdev, struct hisi_acc_vf_migration_file *migf) { @@ -578,6 +591,9 @@ static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev, return -EINVAL; migf->total_length = sizeof(struct acc_vf_data); + /* Save eqc and aeqc interrupt information */ + vf_qm_xeqc_save(vf_qm, migf); + return 0; } From f3cb7a7ba4324cd344f701b83eb87f3e0a397eb1 Mon Sep 17 00:00:00 2001 From: Longfang Liu Date: Sat, 10 May 2025 16:11:52 +0800 Subject: [PATCH 2335/2924] hisi_acc_vfio_pci: bugfix cache write-back issue [ Upstream commit e63c466398731bb7867f42f44b76fa984de59db2 ] At present, cache write-back is placed in the device data copy stage after stopping the device operation. Writing back to the cache at this stage will cause the data obtained by the cache to be written back to be empty. In order to ensure that the cache data is written back successfully, the data needs to be written back into the stop device stage. Fixes: b0eed085903e ("hisi_acc_vfio_pci: Add support for VFIO live migration") Signed-off-by: Longfang Liu Reviewed-by: Shameer Kolothum Link: https://lore.kernel.org/r/20250510081155.55840-4-liulongfang@huawei.com Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin --- drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index 80217aea54755b..d96446f499ed01 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -566,7 +566,6 @@ static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev, { struct acc_vf_data *vf_data = &migf->vf_data; struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm; - struct device *dev = &vf_qm->pdev->dev; int ret; if (unlikely(qm_wait_dev_not_ready(vf_qm))) { @@ -580,12 +579,6 @@ static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev, vf_data->vf_qm_state = QM_READY; hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state; - ret = vf_qm_cache_wb(vf_qm); - if (ret) { - dev_err(dev, "failed to writeback QM Cache!\n"); - return ret; - } - ret = vf_qm_read_data(vf_qm, vf_data); if (ret) return -EINVAL; @@ -1012,6 +1005,13 @@ static int hisi_acc_vf_stop_device(struct hisi_acc_vf_core_device *hisi_acc_vdev dev_err(dev, "failed to check QM INT state!\n"); return ret; } + + ret = vf_qm_cache_wb(vf_qm); + if (ret) { + dev_err(dev, "failed to writeback QM cache!\n"); + return ret; + } + return 0; } From a1779a96378dca7960eb13058a035a267dd39976 Mon Sep 17 00:00:00 2001 From: Longfang Liu Date: Sat, 10 May 2025 16:11:53 +0800 Subject: [PATCH 2336/2924] hisi_acc_vfio_pci: bugfix the problem of uninstalling driver [ Upstream commit db6525a8573957faea28850392f4744e5f8f7a53 ] In a live migration scenario. If the number of VFs at the destination is greater than the source, the recovery operation will fail and qemu will not be able to complete the process and exit after shutting down the device FD. This will cause the driver to be unable to be unloaded normally due to abnormal reference counting of the live migration driver caused by the abnormal closing operation of fd. Therefore, make sure the migration file descriptor references are always released when the device is closed. Fixes: b0eed085903e ("hisi_acc_vfio_pci: Add support for VFIO live migration") Signed-off-by: Longfang Liu Reviewed-by: Shameer Kolothum Link: https://lore.kernel.org/r/20250510081155.55840-5-liulongfang@huawei.com Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin --- drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index d96446f499ed01..cadc82419dca29 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -1508,6 +1508,7 @@ static void hisi_acc_vfio_pci_close_device(struct vfio_device *core_vdev) struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(core_vdev); struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm; + hisi_acc_vf_disable_fds(hisi_acc_vdev); mutex_lock(&hisi_acc_vdev->open_mutex); hisi_acc_vdev->dev_opened = false; iounmap(vf_qm->io_base); From 53e8e8e909f7c3a77857d09d2b733a42547f57ee Mon Sep 17 00:00:00 2001 From: Longfang Liu Date: Sat, 10 May 2025 16:11:54 +0800 Subject: [PATCH 2337/2924] hisi_acc_vfio_pci: bugfix live migration function without VF device driver [ Upstream commit 2777a40998deb36f96b6afc48bd397cf58a4edf0 ] If the VF device driver is not loaded in the Guest OS and we attempt to perform device data migration, the address of the migrated data will be NULL. The live migration recovery operation on the destination side will access a null address value, which will cause access errors. Therefore, live migration of VMs without added VF device drivers does not require device data migration. In addition, when the queue address data obtained by the destination is empty, device queue recovery processing will not be performed. Fixes: b0eed085903e ("hisi_acc_vfio_pci: Add support for VFIO live migration") Signed-off-by: Longfang Liu Reviewed-by: Shameer Kolothum Link: https://lore.kernel.org/r/20250510081155.55840-6-liulongfang@huawei.com Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin --- .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index cadc82419dca29..d12a350440d3ca 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -426,13 +426,6 @@ static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev, return -EINVAL; } - ret = qm_write_regs(vf_qm, QM_VF_STATE, &vf_data->vf_qm_state, 1); - if (ret) { - dev_err(dev, "failed to write QM_VF_STATE\n"); - return ret; - } - - hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state; hisi_acc_vdev->match_done = true; return 0; } @@ -498,6 +491,20 @@ static int vf_qm_load_data(struct hisi_acc_vf_core_device *hisi_acc_vdev, if (migf->total_length < sizeof(struct acc_vf_data)) return -EINVAL; + if (!vf_data->eqe_dma || !vf_data->aeqe_dma || + !vf_data->sqc_dma || !vf_data->cqc_dma) { + dev_info(dev, "resume dma addr is NULL!\n"); + hisi_acc_vdev->vf_qm_state = QM_NOT_READY; + return 0; + } + + ret = qm_write_regs(qm, QM_VF_STATE, &vf_data->vf_qm_state, 1); + if (ret) { + dev_err(dev, "failed to write QM_VF_STATE\n"); + return -EINVAL; + } + hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state; + qm->eqe_dma = vf_data->eqe_dma; qm->aeqe_dma = vf_data->aeqe_dma; qm->sqc_dma = vf_data->sqc_dma; @@ -1531,6 +1538,7 @@ static int hisi_acc_vfio_pci_migrn_init_dev(struct vfio_device *core_vdev) hisi_acc_vdev->vf_id = pci_iov_vf_id(pdev) + 1; hisi_acc_vdev->pf_qm = pf_qm; hisi_acc_vdev->vf_dev = pdev; + hisi_acc_vdev->vf_qm_state = QM_NOT_READY; mutex_init(&hisi_acc_vdev->state_mutex); mutex_init(&hisi_acc_vdev->open_mutex); From 5a85c21f812e02cb00ca07007d88acdd42d08c46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 2 Apr 2025 13:22:16 +0200 Subject: [PATCH 2338/2924] wifi: ath9k_htc: Abort software beacon handling if disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ac4e317a95a1092b5da5b9918b7118759342641c ] A malicious USB device can send a WMI_SWBA_EVENTID event from an ath9k_htc-managed device before beaconing has been enabled. This causes a device-by-zero error in the driver, leading to either a crash or an out of bounds read. Prevent this by aborting the handling in ath9k_htc_swba() if beacons are not enabled. Reported-by: Robert Morris Closes: https://lore.kernel.org/r/88967.1743099372@localhost Fixes: 832f6a18fc2a ("ath9k_htc: Add beacon slots") Signed-off-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20250402112217.58533-1-toke@toke.dk Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath9k/htc_drv_beacon.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c b/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c index 547634f82183d6..81fa7cbad89213 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c @@ -290,6 +290,9 @@ void ath9k_htc_swba(struct ath9k_htc_priv *priv, struct ath_common *common = ath9k_hw_common(priv->ah); int slot; + if (!priv->cur_beacon_conf.enable_beacon) + return; + if (swba->beacon_pending != 0) { priv->beacon.bmisscnt++; if (priv->beacon.bmisscnt > BSTUCK_THRESHOLD) { From 416f37da8df37a5b67ff7d0d9fb5d781e692f64a Mon Sep 17 00:00:00 2001 From: Hao Chang Date: Sat, 29 Mar 2025 10:40:29 +0800 Subject: [PATCH 2339/2924] pinctrl: mediatek: Fix the invalid conditions [ Upstream commit 86dee87f4b2e6ac119b03810e58723d0b27787a4 ] The variable count_reg_names is defined as an int type and cannot be directly compared to an unsigned int. To resolve this issue, first verify the correctness of count_reg_names. Link: https://lore.kernel.org/all/5ae93d42e4c4e70fb33bf35dcc37caebf324c8d3.camel@mediatek.com/T/ Signed-off-by: Hao Chang Signed-off-by: Qingliang Li Link: https://lore.kernel.org/20250329024533.5279-1-ot_chhao.chang@mediatek.com Signed-off-by: Linus Walleij Stable-dep-of: 1c9977b26347 ("pinctrl: mediatek: eint: Fix invalid pointer dereference for v1 platforms") Signed-off-by: Sasha Levin --- drivers/pinctrl/mediatek/mtk-eint.c | 4 ++-- drivers/pinctrl/mediatek/mtk-eint.h | 2 +- drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c | 7 +++++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/pinctrl/mediatek/mtk-eint.c b/drivers/pinctrl/mediatek/mtk-eint.c index c516c34aaaf603..e235a98ae7ee58 100644 --- a/drivers/pinctrl/mediatek/mtk-eint.c +++ b/drivers/pinctrl/mediatek/mtk-eint.c @@ -506,7 +506,7 @@ EXPORT_SYMBOL_GPL(mtk_eint_find_irq); int mtk_eint_do_init(struct mtk_eint *eint, struct mtk_eint_pin *eint_pin) { - unsigned int size, i, port, inst = 0; + unsigned int size, i, port, virq, inst = 0; /* If clients don't assign a specific regs, let's use generic one */ if (!eint->regs) @@ -580,7 +580,7 @@ int mtk_eint_do_init(struct mtk_eint *eint, struct mtk_eint_pin *eint_pin) if (inst >= eint->nbase) continue; eint->pin_list[inst][eint->pins[i].index] = i; - int virq = irq_create_mapping(eint->domain, i); + virq = irq_create_mapping(eint->domain, i); irq_set_chip_and_handler(virq, &mtk_eint_irq_chip, handle_level_irq); irq_set_chip_data(virq, eint); diff --git a/drivers/pinctrl/mediatek/mtk-eint.h b/drivers/pinctrl/mediatek/mtk-eint.h index 23801d4b636f62..fc31a4c0c77bf2 100644 --- a/drivers/pinctrl/mediatek/mtk-eint.h +++ b/drivers/pinctrl/mediatek/mtk-eint.h @@ -66,7 +66,7 @@ struct mtk_eint_xt { struct mtk_eint { struct device *dev; void __iomem **base; - u8 nbase; + int nbase; u16 *base_pin_num; struct irq_domain *domain; int irq; diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c index ba13558bfcd7bb..4918d38abfc29d 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c @@ -381,10 +381,13 @@ int mtk_build_eint(struct mtk_pinctrl *hw, struct platform_device *pdev) return -ENOMEM; count_reg_names = of_property_count_strings(np, "reg-names"); - if (count_reg_names < hw->soc->nbase_names) + if (count_reg_names < 0) + return -EINVAL; + + hw->eint->nbase = count_reg_names - (int)hw->soc->nbase_names; + if (hw->eint->nbase <= 0) return -EINVAL; - hw->eint->nbase = count_reg_names - hw->soc->nbase_names; hw->eint->base = devm_kmalloc_array(&pdev->dev, hw->eint->nbase, sizeof(*hw->eint->base), GFP_KERNEL | __GFP_ZERO); if (!hw->eint->base) { From 42982d90f2752eda78408b8d96e0de96acbd9a53 Mon Sep 17 00:00:00 2001 From: "ping.gao" Date: Fri, 16 May 2025 16:38:12 +0800 Subject: [PATCH 2340/2924] scsi: ufs: mcq: Delete ufshcd_release_scsi_cmd() in ufshcd_mcq_abort() [ Upstream commit 53755903b9357e69b2dd6a02fafbb1e30c741895 ] After UFS_ABORT_TASK has been processed successfully, the host will generate MCQ IRQ for ABORT TAG with response OCS_ABORTED. This results in ufshcd_compl_one_cqe() calling ufshcd_release_scsi_cmd(). But ufshcd_mcq_abort() already calls ufshcd_release_scsi_cmd(), resulting in __ufshcd_release() being called twice. This means hba->clk_gating.active_reqs will be decreased twice, making it go negative. Delete ufshcd_release_scsi_cmd() in ufshcd_mcq_abort(). Fixes: f1304d442077 ("scsi: ufs: mcq: Added ufshcd_mcq_abort()") Signed-off-by: ping.gao Link: https://lore.kernel.org/r/20250516083812.3894396-1-ping.gao@samsung.com Reviewed-by: Peter Wang Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/ufs/core/ufs-mcq.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c index f1294c29f48491..1e50675772febb 100644 --- a/drivers/ufs/core/ufs-mcq.c +++ b/drivers/ufs/core/ufs-mcq.c @@ -674,7 +674,6 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd) int tag = scsi_cmd_to_rq(cmd)->tag; struct ufshcd_lrb *lrbp = &hba->lrb[tag]; struct ufs_hw_queue *hwq; - unsigned long flags; int err; /* Skip task abort in case previous aborts failed and report failure */ @@ -713,10 +712,5 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd) return FAILED; } - spin_lock_irqsave(&hwq->cq_lock, flags); - if (ufshcd_cmd_inflight(lrbp->cmd)) - ufshcd_release_scsi_cmd(hba, lrbp); - spin_unlock_irqrestore(&hwq->cq_lock, flags); - return SUCCESS; } From d383ae6a95143e0d9b5c022da01a7be71d708549 Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Wed, 14 May 2025 02:09:41 -0700 Subject: [PATCH 2341/2924] scsi: mpt3sas: Fix _ctl_get_mpt_mctp_passthru_adapter() to return IOC pointer [ Upstream commit 9ad5249b37b59baaf2da1014f397c2e23b605075 ] Fix _ctl_get_mpt_mctp_passthru_adapter() to return the correct IOC pointer to caller based on dev_index. Signed-off-by: Shivasharan S Link: https://lore.kernel.org/r/1747213781-31545-1-git-send-email-shivasharan.srikanteshwara@broadcom.com Fixes: c72be4b5bb7c ("scsi: mpt3sas: Add support for MCTP Passthrough commands") Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/mpt3sas/mpt3sas_ctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c index 063b10dd82514e..02fc204b9bf7b2 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c @@ -2869,8 +2869,9 @@ _ctl_get_mpt_mctp_passthru_adapter(int dev_index) if (ioc->facts.IOCCapabilities & MPI26_IOCFACTS_CAPABILITY_MCTP_PASSTHRU) { if (count == dev_index) { spin_unlock(&gioc_lock); - return 0; + return ioc; } + count++; } } spin_unlock(&gioc_lock); From 10c30e005c5fe885545f6fb22f38f6605d2d28a0 Mon Sep 17 00:00:00 2001 From: Gautam R A Date: Tue, 20 May 2025 09:29:07 +0530 Subject: [PATCH 2342/2924] RDMA/bnxt_re: Fix incorrect display of inactivity_cp in debugfs output [ Upstream commit 58d7a965bb2b014d467445d38cdb07099b1f0f77 ] The inactivity_cp parameter in debugfs was not being read or written correctly, resulting in "Invalid argument" errors. Fixed this by ensuring proper mapping of inactivity_cp in both the map_cc_config_offset_gen0_ext0 and bnxt_re_fill_gen0_ext0() functions. Fixes: 656dff55da19 ("RDMA/bnxt_re: Congestion control settings using debugfs hook") Signed-off-by: Gautam R A Link: https://patch.msgid.link/20250520035910.1061918-2-kalesh-anakkur.purayil@broadcom.com Reviewed-by: Kalesh AP Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/debugfs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/debugfs.c b/drivers/infiniband/hw/bnxt_re/debugfs.c index af91d16c3c77f5..a3aad6c3dbec1f 100644 --- a/drivers/infiniband/hw/bnxt_re/debugfs.c +++ b/drivers/infiniband/hw/bnxt_re/debugfs.c @@ -170,6 +170,9 @@ static int map_cc_config_offset_gen0_ext0(u32 offset, struct bnxt_qplib_cc_param case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TCP_CP: *val = ccparam->tcp_cp; break; + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INACTIVITY_CP: + *val = ccparam->inact_th; + break; default: return -EINVAL; } @@ -247,7 +250,9 @@ static void bnxt_re_fill_gen0_ext0(struct bnxt_qplib_cc_param *ccparam, u32 offs ccparam->tcp_cp = val; break; case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TX_QUEUE: + break; case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INACTIVITY_CP: + ccparam->inact_th = val; break; case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TIME_PER_PHASE: ccparam->time_pph = val; From 47ec7f85591d5b5e696e4a69f968c86e027f688b Mon Sep 17 00:00:00 2001 From: Gautam R A Date: Tue, 20 May 2025 09:29:08 +0530 Subject: [PATCH 2343/2924] RDMA/bnxt_re: Fix missing error handling for tx_queue [ Upstream commit e3d57a00d4d1f36689e9eab80b60d5024361efec ] bnxt_re_fill_gen0_ext0() did not return an error when attempting to modify CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TX_QUEUE, leading to silent failures. Fixed this by returning -EOPNOTSUPP for tx_queue modifications and ensuring proper error propagation in bnxt_re_configure_cc(). Fixes: 656dff55da19 ("RDMA/bnxt_re: Congestion control settings using debugfs hook") Signed-off-by: Gautam R A Link: https://patch.msgid.link/20250520035910.1061918-3-kalesh-anakkur.purayil@broadcom.com Reviewed-by: Kalesh AP Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/debugfs.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/debugfs.c b/drivers/infiniband/hw/bnxt_re/debugfs.c index a3aad6c3dbec1f..9f6392155d9157 100644 --- a/drivers/infiniband/hw/bnxt_re/debugfs.c +++ b/drivers/infiniband/hw/bnxt_re/debugfs.c @@ -206,7 +206,7 @@ static ssize_t bnxt_re_cc_config_get(struct file *filp, char __user *buffer, return simple_read_from_buffer(buffer, usr_buf_len, ppos, (u8 *)(buf), rc); } -static void bnxt_re_fill_gen0_ext0(struct bnxt_qplib_cc_param *ccparam, u32 offset, u32 val) +static int bnxt_re_fill_gen0_ext0(struct bnxt_qplib_cc_param *ccparam, u32 offset, u32 val) { u32 modify_mask; @@ -250,7 +250,7 @@ static void bnxt_re_fill_gen0_ext0(struct bnxt_qplib_cc_param *ccparam, u32 offs ccparam->tcp_cp = val; break; case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TX_QUEUE: - break; + return -EOPNOTSUPP; case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INACTIVITY_CP: ccparam->inact_th = val; break; @@ -263,18 +263,21 @@ static void bnxt_re_fill_gen0_ext0(struct bnxt_qplib_cc_param *ccparam, u32 offs } ccparam->mask = modify_mask; + return 0; } static int bnxt_re_configure_cc(struct bnxt_re_dev *rdev, u32 gen_ext, u32 offset, u32 val) { struct bnxt_qplib_cc_param ccparam = { }; + int rc; - /* Supporting only Gen 0 now */ - if (gen_ext == CC_CONFIG_GEN0_EXT0) - bnxt_re_fill_gen0_ext0(&ccparam, offset, val); - else + if (gen_ext != CC_CONFIG_GEN0_EXT0) return -EINVAL; + rc = bnxt_re_fill_gen0_ext0(&ccparam, offset, val); + if (rc) + return rc; + bnxt_qplib_modify_cc(&rdev->qplib_res, &ccparam); return 0; } From 2772b6726f01a3c2d69e9eacfb6789ec34d800c7 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Tue, 20 May 2025 09:29:09 +0530 Subject: [PATCH 2344/2924] RDMA/bnxt_re: Fix return code of bnxt_re_configure_cc [ Upstream commit 990b5c07f677a0b633b41130a70771337c18343e ] Driver currently supports modifying GEN0_EXT0 CC parameters through debugfs hook. Fixed to return -EOPNOTSUPP instead of -EINVAL in bnxt_re_configure_cc() when the user tries to modify any other CC parameters. Fixes: 656dff55da19 ("RDMA/bnxt_re: Congestion control settings using debugfs hook") Signed-off-by: Kalesh AP Link: https://patch.msgid.link/20250520035910.1061918-4-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/debugfs.c b/drivers/infiniband/hw/bnxt_re/debugfs.c index 9f6392155d9157..e632f1661b9295 100644 --- a/drivers/infiniband/hw/bnxt_re/debugfs.c +++ b/drivers/infiniband/hw/bnxt_re/debugfs.c @@ -272,7 +272,7 @@ static int bnxt_re_configure_cc(struct bnxt_re_dev *rdev, u32 gen_ext, u32 offse int rc; if (gen_ext != CC_CONFIG_GEN0_EXT0) - return -EINVAL; + return -EOPNOTSUPP; rc = bnxt_re_fill_gen0_ext0(&ccparam, offset, val); if (rc) From 72275c888f8962b406ee9c6885c79bf68cca5a63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Mon, 5 May 2025 14:12:00 +0200 Subject: [PATCH 2345/2924] kernfs: Relax constraint in draining guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 071d8e4c2a3b0999a9b822e2eb8854784a350f8a ] The active reference lifecycle provides the break/unbreak mechanism but the active reference is not truly active after unbreak -- callers don't use it afterwards but it's important for proper pairing of kn->active counting. Assuming this mechanism is in place, the WARN check in kernfs_should_drain_open_files() is too sensitive -- it may transiently catch those (rightful) callers between kernfs_unbreak_active_protection() and kernfs_put_active() as found out by Chen Ridong: kernfs_remove_by_name_ns kernfs_get_active // active=1 __kernfs_remove // active=0x80000002 kernfs_drain ... wait_event //waiting (active == 0x80000001) kernfs_break_active_protection // active = 0x80000001 // continue kernfs_unbreak_active_protection // active = 0x80000002 ... kernfs_should_drain_open_files // warning occurs kernfs_put_active To avoid the false positives (mind panic_on_warn) remove the check altogether. (This is meant as quick fix, I think active reference break/unbreak may be simplified with larger rework.) Fixes: bdb2fd7fc56e1 ("kernfs: Skip kernfs_drain_open_files() more aggressively") Link: https://lore.kernel.org/r/kmmrseckjctb4gxcx2rdminrjnq2b4ipf7562nvfd432ld5v5m@2byj5eedkb2o/ Cc: Chen Ridong Signed-off-by: Michal Koutný Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20250505121201.879823-1-mkoutny@suse.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- fs/kernfs/dir.c | 5 +++-- fs/kernfs/file.c | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index fc70d72c3fe805..43487fa83eaea1 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -1580,8 +1580,9 @@ void kernfs_break_active_protection(struct kernfs_node *kn) * invoked before finishing the kernfs operation. Note that while this * function restores the active reference, it doesn't and can't actually * restore the active protection - @kn may already or be in the process of - * being removed. Once kernfs_break_active_protection() is invoked, that - * protection is irreversibly gone for the kernfs operation instance. + * being drained and removed. Once kernfs_break_active_protection() is + * invoked, that protection is irreversibly gone for the kernfs operation + * instance. * * While this function may be called at any point after * kernfs_break_active_protection() is invoked, its most useful location diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 66fe8fe41f0605..a6c692cac61659 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -778,8 +778,9 @@ bool kernfs_should_drain_open_files(struct kernfs_node *kn) /* * @kn being deactivated guarantees that @kn->attr.open can't change * beneath us making the lockless test below safe. + * Callers post kernfs_unbreak_active_protection may be counted in + * kn->active by now, do not WARN_ON because of them. */ - WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS); rcu_read_lock(); on = rcu_dereference(kn->attr.open); From 3c68444b2a1e0b2cb5c5d4e4230ea0c38a3cf147 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 21 Mar 2025 17:35:40 +0300 Subject: [PATCH 2346/2924] wifi: mt76: mt7925: Fix logical vs bitwise typo [ Upstream commit 88224119863c39fa67581874e1ba218fa56113b4 ] This was supposed to be & instead of &&. Fixes: f0317215b367 ("wifi: mt76: mt7925: add EHT control support based on the CLC data") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/d323a443-4e81-4064-8563-b62274b53ef4@stanley.mountain Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7925/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/init.c b/drivers/net/wireless/mediatek/mt76/mt7925/init.c index 63cb08f4d87cc4..79639be0d29aca 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/init.c @@ -89,7 +89,7 @@ void mt7925_regd_be_ctrl(struct mt792x_dev *dev, u8 *alpha2) } /* Check the last one */ - if (rule->flag && BIT(0)) + if (rule->flag & BIT(0)) break; pos += sizeof(*rule); From 8340cb173750c1ea99cd643006b72f8b0e6c21f2 Mon Sep 17 00:00:00 2001 From: Charles Han Date: Mon, 7 Apr 2025 17:55:51 +0800 Subject: [PATCH 2347/2924] wifi: mt76: mt7996: Add NULL check in mt7996_thermal_init [ Upstream commit caf4b347c5dc40fdd125793b5e82ba9fc4de5275 ] devm_kasprintf() can return a NULL pointer on failure,but this returned value in mt7996_thermal_init() is not checked. Add NULL check in mt7996_thermal_init(), to handle kernel NULL pointer dereference error. Fixes: 69d54ce7491d ("wifi: mt76: mt7996: switch to single multi-radio wiphy") Signed-off-by: Charles Han Link: https://patch.msgid.link/20250407095551.32127-1-hanchunchao@inspur.com Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7996/init.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/init.c b/drivers/net/wireless/mediatek/mt76/mt7996/init.c index 6b660424aedc31..5af52bd1f1f122 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/init.c @@ -217,6 +217,9 @@ static int mt7996_thermal_init(struct mt7996_phy *phy) name = devm_kasprintf(&wiphy->dev, GFP_KERNEL, "mt7996_%s.%d", wiphy_name(wiphy), phy->mt76->band_idx); + if (!name) + return -ENOMEM; + snprintf(cname, sizeof(cname), "cooling_device%d", phy->mt76->band_idx); cdev = thermal_cooling_device_register(name, phy, &mt7996_thermal_ops); From b623ad5972d3e264dd7dae389dec32f0dc70dd90 Mon Sep 17 00:00:00 2001 From: Qasim Ijaz Date: Mon, 21 Apr 2025 12:05:50 +0100 Subject: [PATCH 2348/2924] wifi: mt76: mt7996: prevent uninit return in mt7996_mac_sta_add_links [ Upstream commit d9bc625861d490cb76ae8af86fac6f8ab0655a18 ] If link_conf_dereference_protected() or mt7996_vif_link() or link_sta_dereference_protected() fail the code jumps to the error_unlink label and returns ret which is uninitialised. Fix this by setting err before jumping to error_unlink. Fixes: c7e4fc362443 ("wifi: mt76: mt7996: Update mt7996_mcu_add_sta to MLO support") Fixes: dd82a9e02c05 ("wifi: mt76: mt7996: Rely on mt7996_sta_link in sta_add/sta_remove callbacks") Signed-off-by: Qasim Ijaz Link: https://patch.msgid.link/20250421110550.9839-1-qasdev00@gmail.com Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7996/main.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 91c64e3a0860ff..70823bbb165c7e 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -998,16 +998,22 @@ mt7996_mac_sta_add_links(struct mt7996_dev *dev, struct ieee80211_vif *vif, continue; link_conf = link_conf_dereference_protected(vif, link_id); - if (!link_conf) + if (!link_conf) { + err = -EINVAL; goto error_unlink; + } link = mt7996_vif_link(dev, vif, link_id); - if (!link) + if (!link) { + err = -EINVAL; goto error_unlink; + } link_sta = link_sta_dereference_protected(sta, link_id); - if (!link_sta) + if (!link_sta) { + err = -EINVAL; goto error_unlink; + } err = mt7996_mac_sta_init_link(dev, link_conf, link_sta, link, link_id); From 83a422c8169eef95aef57f7ddc467bb126d9ae81 Mon Sep 17 00:00:00 2001 From: Qasim Ijaz Date: Mon, 21 Apr 2025 12:25:44 +0100 Subject: [PATCH 2349/2924] wifi: mt76: mt7996: avoid NULL pointer dereference in mt7996_set_monitor() [ Upstream commit cb423ddad0f6e6f55b1700422ab777b25597cc83 ] The function mt7996_set_monitor() dereferences phy before the NULL sanity check. Fix this to avoid NULL pointer dereference by moving the dereference after the check. Fixes: 69d54ce7491d ("wifi: mt76: mt7996: switch to single multi-radio wiphy") Signed-off-by: Qasim Ijaz Link: https://patch.msgid.link/20250421112544.13430-1-qasdev00@gmail.com Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7996/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 70823bbb165c7e..5ec4f979328653 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -414,11 +414,13 @@ static void mt7996_phy_set_rxfilter(struct mt7996_phy *phy) static void mt7996_set_monitor(struct mt7996_phy *phy, bool enabled) { - struct mt7996_dev *dev = phy->dev; + struct mt7996_dev *dev; if (!phy) return; + dev = phy->dev; + if (enabled == !(phy->rxfilter & MT_WF_RFCR_DROP_OTHER_UC)) return; From 63145c84d6b9110f68775728d4469bf07a451470 Mon Sep 17 00:00:00 2001 From: Qasim Ijaz Date: Mon, 21 Apr 2025 12:13:44 +0100 Subject: [PATCH 2350/2924] wifi: mt76: mt7996: avoid null deref in mt7996_stop_phy() [ Upstream commit a0bdd3d1b94d22dd9d255fd148eb9183ea0a822f ] In mt7996_stop_phy() the mt7996_phy structure is dereferenced before the null sanity check which could lead to a null deref. Fix by moving the dereference operation after the sanity check. Fixes: 69d54ce7491d ("wifi: mt76: mt7996: switch to single multi-radio wiphy") Signed-off-by: Qasim Ijaz Link: https://patch.msgid.link/20250421111344.11484-1-qasdev00@gmail.com Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7996/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 5ec4f979328653..8698c4345af0c8 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -68,11 +68,13 @@ static int mt7996_start(struct ieee80211_hw *hw) static void mt7996_stop_phy(struct mt7996_phy *phy) { - struct mt7996_dev *dev = phy->dev; + struct mt7996_dev *dev; if (!phy || !test_bit(MT76_STATE_RUNNING, &phy->mt76->state)) return; + dev = phy->dev; + cancel_delayed_work_sync(&phy->mt76->mac_work); mutex_lock(&dev->mt76.mutex); From 932d9f87a2a4da2f846b043e2d26aff7db69e325 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 11 Apr 2025 12:37:50 -0400 Subject: [PATCH 2351/2924] Bluetooth: ISO: Fix not using SID from adv report [ Upstream commit e2d471b7806b09744d65a64bcf41337468f2443b ] Up until now it has been assumed that the application would be able to enter the advertising SID in sockaddr_iso_bc.bc_sid, but userspace has no access to SID since the likes of MGMT_EV_DEVICE_FOUND cannot carry it, so it was left unset (0x00) which means it would be unable to synchronize if the broadcast source is using a different SID e.g. 0x04: > HCI Event: LE Meta Event (0x3e) plen 57 LE Extended Advertising Report (0x0d) Num reports: 1 Entry 0 Event type: 0x0000 Props: 0x0000 Data status: Complete Address type: Random (0x01) Address: 0B:82:E8:50:6D:C8 (Non-Resolvable) Primary PHY: LE 1M Secondary PHY: LE 2M SID: 0x04 TX power: 127 dBm RSSI: -55 dBm (0xc9) Periodic advertising interval: 180.00 msec (0x0090) Direct address type: Public (0x00) Direct address: 00:00:00:00:00:00 (OUI 00-00-00) Data length: 0x1f 06 16 52 18 5b 0b e1 05 16 56 18 04 00 11 30 4c ..R.[....V....0L 75 69 7a 27 73 20 53 32 33 20 55 6c 74 72 61 uiz's S23 Ultra Service Data: Broadcast Audio Announcement (0x1852) Broadcast ID: 14748507 (0xe10b5b) Service Data: Public Broadcast Announcement (0x1856) Data[2]: 0400 Unknown EIR field 0x30[16]: 4c75697a27732053323320556c747261 < HCI Command: LE Periodic Advertising Create Sync (0x08|0x0044) plen 14 Options: 0x0000 Use advertising SID, Advertiser Address Type and address Reporting initially enabled SID: 0x00 (<- Invalid) Adv address type: Random (0x01) Adv address: 0B:82:E8:50:6D:C8 (Non-Resolvable) Skip: 0x0000 Sync timeout: 20000 msec (0x07d0) Sync CTE type: 0x0000 So instead this changes now allow application to set HCI_SID_INVALID which will make hci_le_pa_create_sync to wait for a report, update the conn->sid using the report SID and only then issue PA create sync command: < HCI Command: LE Periodic Advertising Create Sync Options: 0x0000 Use advertising SID, Advertiser Address Type and address Reporting initially enabled SID: 0x04 Adv address type: Random (0x01) Adv address: 0B:82:E8:50:6D:C8 (Non-Resolvable) Skip: 0x0000 Sync timeout: 20000 msec (0x07d0) Sync CTE type: 0x0000 > HCI Event: LE Meta Event (0x3e) plen 16 LE Periodic Advertising Sync Established (0x0e) Status: Success (0x00) Sync handle: 64 Advertising SID: 0x04 Advertiser address type: Random (0x01) Advertiser address: 0B:82:E8:50:6D:C8 (Non-Resolvable) Advertiser PHY: LE 2M (0x02) Periodic advertising interval: 180.00 msec (0x0090) Advertiser clock accuracy: 0x05 Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 23205562ffc8 ("Bluetooth: separate CIS_LINK and BIS_LINK link types") Signed-off-by: Sasha Levin --- net/bluetooth/hci_conn.c | 2 ++ net/bluetooth/hci_core.c | 13 ++++++----- net/bluetooth/hci_event.c | 16 ++++++++++++- net/bluetooth/hci_sync.c | 49 +++++++++++++++++++++++++++++++++++---- net/bluetooth/iso.c | 9 +++++-- 5 files changed, 75 insertions(+), 14 deletions(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 946d2ae551f86c..c0207812f43282 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -2070,6 +2070,8 @@ struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, { struct hci_conn *conn; + bt_dev_dbg(hdev, "dst %pMR type %d sid %d", dst, dst_type, sid); + conn = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_SLAVE); if (IS_ERR(conn)) return conn; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 5eb0600bbd03cc..75da6f6e39c9ed 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -4057,10 +4057,13 @@ static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) return; } - err = hci_send_frame(hdev, skb); - if (err < 0) { - hci_cmd_sync_cancel_sync(hdev, -err); - return; + if (hci_skb_opcode(skb) != HCI_OP_NOP) { + err = hci_send_frame(hdev, skb); + if (err < 0) { + hci_cmd_sync_cancel_sync(hdev, -err); + return; + } + atomic_dec(&hdev->cmd_cnt); } if (hdev->req_status == HCI_REQ_PEND && @@ -4068,8 +4071,6 @@ static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) kfree_skb(hdev->req_skb); hdev->req_skb = skb_clone(hdev->sent_cmd, GFP_KERNEL); } - - atomic_dec(&hdev->cmd_cnt); } static void hci_cmd_work(struct work_struct *work) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index c38ada69c3d7f2..4183560582a3ad 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -6351,6 +6351,17 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data, info->secondary_phy &= 0x1f; } + /* Check if PA Sync is pending and if the hci_conn SID has not + * been set update it. + */ + if (hci_dev_test_flag(hdev, HCI_PA_SYNC)) { + struct hci_conn *conn; + + conn = hci_conn_hash_lookup_create_pa_sync(hdev); + if (conn && conn->sid == HCI_SID_INVALID) + conn->sid = info->sid; + } + if (legacy_evt_type != LE_ADV_INVALID) { process_adv_report(hdev, legacy_evt_type, &info->bdaddr, info->bdaddr_type, NULL, 0, @@ -7155,7 +7166,8 @@ static void hci_le_meta_evt(struct hci_dev *hdev, void *data, /* Only match event if command OGF is for LE */ if (hdev->req_skb && - hci_opcode_ogf(hci_skb_opcode(hdev->req_skb)) == 0x08 && + (hci_opcode_ogf(hci_skb_opcode(hdev->req_skb)) == 0x08 || + hci_skb_opcode(hdev->req_skb) == HCI_OP_NOP) && hci_skb_event(hdev->req_skb) == ev->subevent) { *opcode = hci_skb_opcode(hdev->req_skb); hci_req_cmd_complete(hdev, *opcode, 0x00, req_complete, @@ -7511,8 +7523,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) goto done; } + hci_dev_lock(hdev); kfree_skb(hdev->recv_event); hdev->recv_event = skb_clone(skb, GFP_KERNEL); + hci_dev_unlock(hdev); event = hdr->evt; if (!event) { diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index e56b1cbedab908..d00ff18f3be0dc 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -6898,20 +6898,37 @@ int hci_le_conn_update_sync(struct hci_dev *hdev, struct hci_conn *conn, static void create_pa_complete(struct hci_dev *hdev, void *data, int err) { + struct hci_conn *conn = data; + struct hci_conn *pa_sync; + bt_dev_dbg(hdev, "err %d", err); - if (!err) + if (err == -ECANCELED) return; + hci_dev_lock(hdev); + hci_dev_clear_flag(hdev, HCI_PA_SYNC); - if (err == -ECANCELED) - return; + if (!hci_conn_valid(hdev, conn)) + clear_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags); - hci_dev_lock(hdev); + if (!err) + goto unlock; - hci_update_passive_scan_sync(hdev); + /* Add connection to indicate PA sync error */ + pa_sync = hci_conn_add_unset(hdev, ISO_LINK, BDADDR_ANY, + HCI_ROLE_SLAVE); + if (IS_ERR(pa_sync)) + goto unlock; + + set_bit(HCI_CONN_PA_SYNC_FAILED, &pa_sync->flags); + + /* Notify iso layer */ + hci_connect_cfm(pa_sync, bt_status(err)); + +unlock: hci_dev_unlock(hdev); } @@ -6925,9 +6942,23 @@ static int hci_le_pa_create_sync(struct hci_dev *hdev, void *data) if (!hci_conn_valid(hdev, conn)) return -ECANCELED; + if (conn->sync_handle != HCI_SYNC_HANDLE_INVALID) + return -EINVAL; + if (hci_dev_test_and_set_flag(hdev, HCI_PA_SYNC)) return -EBUSY; + /* Stop scanning if SID has not been set and active scanning is enabled + * so we use passive scanning which will be scanning using the allow + * list programmed to contain only the connection address. + */ + if (conn->sid == HCI_SID_INVALID && + hci_dev_test_flag(hdev, HCI_LE_SCAN)) { + hci_scan_disable_sync(hdev); + hci_dev_set_flag(hdev, HCI_LE_SCAN_INTERRUPTED); + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + } + /* Mark HCI_CONN_CREATE_PA_SYNC so hci_update_passive_scan_sync can * program the address in the allow list so PA advertisements can be * received. @@ -6936,6 +6967,14 @@ static int hci_le_pa_create_sync(struct hci_dev *hdev, void *data) hci_update_passive_scan_sync(hdev); + /* SID has not been set listen for HCI_EV_LE_EXT_ADV_REPORT to update + * it. + */ + if (conn->sid == HCI_SID_INVALID) + __hci_cmd_sync_status_sk(hdev, HCI_OP_NOP, 0, NULL, + HCI_EV_LE_EXT_ADV_REPORT, + conn->conn_timeout, NULL); + memset(&cp, 0, sizeof(cp)); cp.options = qos->bcast.options; cp.sid = conn->sid; diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 2819cda616bce8..7c0012ce1b890e 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -941,7 +941,7 @@ static int iso_sock_bind_bc(struct socket *sock, struct sockaddr *addr, iso_pi(sk)->dst_type = sa->iso_bc->bc_bdaddr_type; - if (sa->iso_bc->bc_sid > 0x0f) + if (sa->iso_bc->bc_sid > 0x0f && sa->iso_bc->bc_sid != HCI_SID_INVALID) return -EINVAL; iso_pi(sk)->bc_sid = sa->iso_bc->bc_sid; @@ -2029,6 +2029,9 @@ static bool iso_match_sid(struct sock *sk, void *data) { struct hci_ev_le_pa_sync_established *ev = data; + if (iso_pi(sk)->bc_sid == HCI_SID_INVALID) + return true; + return ev->sid == iso_pi(sk)->bc_sid; } @@ -2075,8 +2078,10 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) if (ev1) { sk = iso_get_sock(&hdev->bdaddr, bdaddr, BT_LISTEN, iso_match_sid, ev1); - if (sk && !ev1->status) + if (sk && !ev1->status) { iso_pi(sk)->sync_handle = le16_to_cpu(ev1->handle); + iso_pi(sk)->bc_sid = ev1->sid; + } goto done; } From 4e83956ad117c160317f9bd7f53c66503d716d2b Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Sat, 3 May 2025 17:08:21 +0300 Subject: [PATCH 2352/2924] Bluetooth: separate CIS_LINK and BIS_LINK link types [ Upstream commit 23205562ffc8de20f57afdd984858cab29e77968 ] Use separate link type id for unicast and broadcast ISO connections. These connection types are handled with separate HCI commands, socket API is different, and hci_conn has union fields that are different in the two cases, so they shall not be mixed up. Currently in most places it is attempted to distinguish ucast by bacmp(&c->dst, BDADDR_ANY) but it is wrong as dst is set for bcast sink hci_conn in iso_conn_ready(). Additionally checking sync_handle might be OK, but depends on details of bcast conn configuration flow. To avoid complicating it, use separate link types. Fixes: f764a6c2c1e4 ("Bluetooth: ISO: Add broadcast support") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- include/net/bluetooth/hci.h | 3 +- include/net/bluetooth/hci_core.h | 48 ++++++++++++++------------------ net/bluetooth/hci_conn.c | 44 +++++++++++++++++------------ net/bluetooth/hci_core.c | 21 ++++++++------ net/bluetooth/hci_event.c | 24 ++++++++-------- net/bluetooth/hci_sync.c | 16 +++++++---- net/bluetooth/iso.c | 4 +-- net/bluetooth/mgmt.c | 3 +- 8 files changed, 89 insertions(+), 74 deletions(-) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 797992019f9ee5..521a9d0acac692 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -557,7 +557,8 @@ enum { #define ESCO_LINK 0x02 /* Low Energy links do not have defined link type. Use invented one */ #define LE_LINK 0x80 -#define ISO_LINK 0x82 +#define CIS_LINK 0x82 +#define BIS_LINK 0x83 #define INVALID_LINK 0xff /* LMP features */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 54bfeeaa09959b..fc66d9b61198e6 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -996,7 +996,8 @@ static inline void hci_conn_hash_add(struct hci_dev *hdev, struct hci_conn *c) case ESCO_LINK: h->sco_num++; break; - case ISO_LINK: + case CIS_LINK: + case BIS_LINK: h->iso_num++; break; } @@ -1022,7 +1023,8 @@ static inline void hci_conn_hash_del(struct hci_dev *hdev, struct hci_conn *c) case ESCO_LINK: h->sco_num--; break; - case ISO_LINK: + case CIS_LINK: + case BIS_LINK: h->iso_num--; break; } @@ -1039,7 +1041,8 @@ static inline unsigned int hci_conn_num(struct hci_dev *hdev, __u8 type) case SCO_LINK: case ESCO_LINK: return h->sco_num; - case ISO_LINK: + case CIS_LINK: + case BIS_LINK: return h->iso_num; default: return 0; @@ -1100,7 +1103,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_bis(struct hci_dev *hdev, rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (bacmp(&c->dst, ba) || c->type != ISO_LINK) + if (bacmp(&c->dst, ba) || c->type != BIS_LINK) continue; if (c->iso_qos.bcast.bis == bis) { @@ -1122,7 +1125,7 @@ hci_conn_hash_lookup_create_pa_sync(struct hci_dev *hdev) rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK) + if (c->type != BIS_LINK) continue; if (!test_bit(HCI_CONN_CREATE_PA_SYNC, &c->flags)) @@ -1148,8 +1151,8 @@ hci_conn_hash_lookup_per_adv_bis(struct hci_dev *hdev, rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (bacmp(&c->dst, ba) || c->type != ISO_LINK || - !test_bit(HCI_CONN_PER_ADV, &c->flags)) + if (bacmp(&c->dst, ba) || c->type != BIS_LINK || + !test_bit(HCI_CONN_PER_ADV, &c->flags)) continue; if (c->iso_qos.bcast.big == big && @@ -1238,7 +1241,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_cis(struct hci_dev *hdev, rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK || !bacmp(&c->dst, BDADDR_ANY)) + if (c->type != CIS_LINK) continue; /* Match CIG ID if set */ @@ -1270,7 +1273,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_cig(struct hci_dev *hdev, rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK || !bacmp(&c->dst, BDADDR_ANY)) + if (c->type != CIS_LINK) continue; if (handle == c->iso_qos.ucast.cig) { @@ -1293,17 +1296,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_big(struct hci_dev *hdev, rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK) - continue; - - /* An ISO_LINK hcon with BDADDR_ANY as destination - * address is a Broadcast connection. A Broadcast - * slave connection is associated with a PA train, - * so the sync_handle can be used to differentiate - * from unicast. - */ - if (bacmp(&c->dst, BDADDR_ANY) && - c->sync_handle == HCI_SYNC_HANDLE_INVALID) + if (c->type != BIS_LINK) continue; if (handle == c->iso_qos.bcast.big) { @@ -1327,7 +1320,7 @@ hci_conn_hash_lookup_big_sync_pend(struct hci_dev *hdev, rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK) + if (c->type != BIS_LINK) continue; if (handle == c->iso_qos.bcast.big && num_bis == c->num_bis) { @@ -1350,8 +1343,8 @@ hci_conn_hash_lookup_big_state(struct hci_dev *hdev, __u8 handle, __u16 state) rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (bacmp(&c->dst, BDADDR_ANY) || c->type != ISO_LINK || - c->state != state) + if (c->type != BIS_LINK || bacmp(&c->dst, BDADDR_ANY) || + c->state != state) continue; if (handle == c->iso_qos.bcast.big) { @@ -1374,8 +1367,8 @@ hci_conn_hash_lookup_pa_sync_big_handle(struct hci_dev *hdev, __u8 big) rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK || - !test_bit(HCI_CONN_PA_SYNC, &c->flags)) + if (c->type != BIS_LINK || + !test_bit(HCI_CONN_PA_SYNC, &c->flags)) continue; if (c->iso_qos.bcast.big == big) { @@ -1397,7 +1390,7 @@ hci_conn_hash_lookup_pa_sync_handle(struct hci_dev *hdev, __u16 sync_handle) rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK) + if (c->type != BIS_LINK) continue; /* Ignore the listen hcon, we are looking @@ -2012,7 +2005,8 @@ static inline int hci_proto_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, case ESCO_LINK: return sco_connect_ind(hdev, bdaddr, flags); - case ISO_LINK: + case CIS_LINK: + case BIS_LINK: return iso_connect_ind(hdev, bdaddr, flags); default: diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index c0207812f43282..fccdb864af7264 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -785,7 +785,7 @@ static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, struct hci_conn *c d->sync_handle = conn->sync_handle; if (test_and_clear_bit(HCI_CONN_PA_SYNC, &conn->flags)) { - hci_conn_hash_list_flag(hdev, find_bis, ISO_LINK, + hci_conn_hash_list_flag(hdev, find_bis, BIS_LINK, HCI_CONN_PA_SYNC, d); if (!d->count) @@ -795,7 +795,7 @@ static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, struct hci_conn *c } if (test_and_clear_bit(HCI_CONN_BIG_SYNC, &conn->flags)) { - hci_conn_hash_list_flag(hdev, find_bis, ISO_LINK, + hci_conn_hash_list_flag(hdev, find_bis, BIS_LINK, HCI_CONN_BIG_SYNC, d); if (!d->count) @@ -885,9 +885,11 @@ static void cis_cleanup(struct hci_conn *conn) /* Check if ISO connection is a CIS and remove CIG if there are * no other connections using it. */ - hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, BT_BOUND, &d); - hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, BT_CONNECT, &d); - hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, BT_CONNECTED, &d); + hci_conn_hash_list_state(hdev, find_cis, CIS_LINK, BT_BOUND, &d); + hci_conn_hash_list_state(hdev, find_cis, CIS_LINK, BT_CONNECT, + &d); + hci_conn_hash_list_state(hdev, find_cis, CIS_LINK, BT_CONNECTED, + &d); if (d.count) return; @@ -910,7 +912,8 @@ static struct hci_conn *__hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t if (!hdev->acl_mtu) return ERR_PTR(-ECONNREFUSED); break; - case ISO_LINK: + case CIS_LINK: + case BIS_LINK: if (hdev->iso_mtu) /* Dedicated ISO Buffer exists */ break; @@ -974,7 +977,8 @@ static struct hci_conn *__hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t hci_copy_identity_address(hdev, &conn->src, &conn->src_type); conn->mtu = hdev->le_mtu ? hdev->le_mtu : hdev->acl_mtu; break; - case ISO_LINK: + case CIS_LINK: + case BIS_LINK: /* conn->src should reflect the local identity address */ hci_copy_identity_address(hdev, &conn->src, &conn->src_type); @@ -1071,7 +1075,8 @@ static void hci_conn_cleanup_child(struct hci_conn *conn, u8 reason) if (HCI_CONN_HANDLE_UNSET(conn->handle)) hci_conn_failed(conn, reason); break; - case ISO_LINK: + case CIS_LINK: + case BIS_LINK: if ((conn->state != BT_CONNECTED && !test_bit(HCI_CONN_CREATE_CIS, &conn->flags)) || test_bit(HCI_CONN_BIG_CREATED, &conn->flags)) @@ -1146,7 +1151,8 @@ void hci_conn_del(struct hci_conn *conn) hdev->acl_cnt += conn->sent; } else { /* Unacked ISO frames */ - if (conn->type == ISO_LINK) { + if (conn->type == CIS_LINK || + conn->type == BIS_LINK) { if (hdev->iso_pkts) hdev->iso_cnt += conn->sent; else if (hdev->le_pkts) @@ -1532,7 +1538,7 @@ static struct hci_conn *hci_add_bis(struct hci_dev *hdev, bdaddr_t *dst, memcmp(conn->le_per_adv_data, base, base_len))) return ERR_PTR(-EADDRINUSE); - conn = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_MASTER); + conn = hci_conn_add_unset(hdev, BIS_LINK, dst, HCI_ROLE_MASTER); if (IS_ERR(conn)) return conn; @@ -1740,7 +1746,7 @@ static int hci_le_create_big(struct hci_conn *conn, struct bt_iso_qos *qos) data.count = 0; /* Create a BIS for each bound connection */ - hci_conn_hash_list_state(hdev, bis_list, ISO_LINK, + hci_conn_hash_list_state(hdev, bis_list, BIS_LINK, BT_BOUND, &data); cp.handle = qos->bcast.big; @@ -1829,12 +1835,12 @@ static bool hci_le_set_cig_params(struct hci_conn *conn, struct bt_iso_qos *qos) for (data.cig = 0x00; data.cig < 0xf0; data.cig++) { data.count = 0; - hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, + hci_conn_hash_list_state(hdev, find_cis, CIS_LINK, BT_CONNECT, &data); if (data.count) continue; - hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, + hci_conn_hash_list_state(hdev, find_cis, CIS_LINK, BT_CONNECTED, &data); if (!data.count) break; @@ -1884,7 +1890,8 @@ struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst, cis = hci_conn_hash_lookup_cis(hdev, dst, dst_type, qos->ucast.cig, qos->ucast.cis); if (!cis) { - cis = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_MASTER); + cis = hci_conn_add_unset(hdev, CIS_LINK, dst, + HCI_ROLE_MASTER); if (IS_ERR(cis)) return cis; cis->cleanup = cis_cleanup; @@ -1976,7 +1983,7 @@ bool hci_iso_setup_path(struct hci_conn *conn) int hci_conn_check_create_cis(struct hci_conn *conn) { - if (conn->type != ISO_LINK || !bacmp(&conn->dst, BDADDR_ANY)) + if (conn->type != CIS_LINK) return -EINVAL; if (!conn->parent || conn->parent->state != BT_CONNECTED || @@ -2072,7 +2079,7 @@ struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, bt_dev_dbg(hdev, "dst %pMR type %d sid %d", dst, dst_type, sid); - conn = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_SLAVE); + conn = hci_conn_add_unset(hdev, BIS_LINK, dst, HCI_ROLE_SLAVE); if (IS_ERR(conn)) return conn; @@ -2221,7 +2228,7 @@ struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, * the start periodic advertising and create BIG commands have * been queued */ - hci_conn_hash_list_state(hdev, bis_mark_per_adv, ISO_LINK, + hci_conn_hash_list_state(hdev, bis_mark_per_adv, BIS_LINK, BT_BOUND, &data); /* Queue start periodic advertising and create BIG */ @@ -2953,7 +2960,8 @@ void hci_conn_tx_queue(struct hci_conn *conn, struct sk_buff *skb) * TODO: SCO support without flowctl (needs to be done in drivers) */ switch (conn->type) { - case ISO_LINK: + case CIS_LINK: + case BIS_LINK: case ACL_LINK: case LE_LINK: break; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 75da6f6e39c9ed..2668e0e563c43f 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2898,12 +2898,13 @@ int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb) break; case HCI_ACLDATA_PKT: /* Detect if ISO packet has been sent as ACL */ - if (hci_conn_num(hdev, ISO_LINK)) { + if (hci_conn_num(hdev, CIS_LINK) || + hci_conn_num(hdev, BIS_LINK)) { __u16 handle = __le16_to_cpu(hci_acl_hdr(skb)->handle); __u8 type; type = hci_conn_lookup_type(hdev, hci_handle(handle)); - if (type == ISO_LINK) + if (type == CIS_LINK || type == BIS_LINK) hci_skb_pkt_type(skb) = HCI_ISODATA_PKT; } break; @@ -3345,7 +3346,8 @@ static inline void hci_quote_sent(struct hci_conn *conn, int num, int *quote) case LE_LINK: cnt = hdev->le_mtu ? hdev->le_cnt : hdev->acl_cnt; break; - case ISO_LINK: + case CIS_LINK: + case BIS_LINK: cnt = hdev->iso_mtu ? hdev->iso_cnt : hdev->le_mtu ? hdev->le_cnt : hdev->acl_cnt; break; @@ -3359,7 +3361,7 @@ static inline void hci_quote_sent(struct hci_conn *conn, int num, int *quote) } static struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, - int *quote) + __u8 type2, int *quote) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *conn = NULL, *c; @@ -3371,7 +3373,8 @@ static struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != type || skb_queue_empty(&c->data_q)) + if ((c->type != type && c->type != type2) || + skb_queue_empty(&c->data_q)) continue; if (c->state != BT_CONNECTED && c->state != BT_CONFIG) @@ -3579,7 +3582,7 @@ static void hci_sched_sco(struct hci_dev *hdev, __u8 type) else cnt = &hdev->sco_cnt; - while (*cnt && (conn = hci_low_sent(hdev, type, "e))) { + while (*cnt && (conn = hci_low_sent(hdev, type, type, "e))) { while (quote-- && (skb = skb_dequeue(&conn->data_q))) { BT_DBG("skb %p len %d", skb, skb->len); hci_send_conn_frame(hdev, conn, skb); @@ -3707,12 +3710,14 @@ static void hci_sched_iso(struct hci_dev *hdev) BT_DBG("%s", hdev->name); - if (!hci_conn_num(hdev, ISO_LINK)) + if (!hci_conn_num(hdev, CIS_LINK) && + !hci_conn_num(hdev, BIS_LINK)) return; cnt = hdev->iso_pkts ? &hdev->iso_cnt : hdev->le_pkts ? &hdev->le_cnt : &hdev->acl_cnt; - while (*cnt && (conn = hci_low_sent(hdev, ISO_LINK, "e))) { + while (*cnt && (conn = hci_low_sent(hdev, CIS_LINK, BIS_LINK, + "e))) { while (quote-- && (skb = skb_dequeue(&conn->data_q))) { BT_DBG("skb %p len %d", skb, skb->len); hci_send_conn_frame(hdev, conn, skb); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 4183560582a3ad..66052d6aaa1d50 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3804,7 +3804,7 @@ static void hci_unbound_cis_failed(struct hci_dev *hdev, u8 cig, u8 status) lockdep_assert_held(&hdev->lock); list_for_each_entry_safe(conn, tmp, &hdev->conn_hash.list, list) { - if (conn->type != ISO_LINK || !bacmp(&conn->dst, BDADDR_ANY) || + if (conn->type != CIS_LINK || conn->state == BT_OPEN || conn->iso_qos.ucast.cig != cig) continue; @@ -4467,7 +4467,8 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data, break; - case ISO_LINK: + case CIS_LINK: + case BIS_LINK: if (hdev->iso_pkts) { hdev->iso_cnt += count; if (hdev->iso_cnt > hdev->iso_pkts) @@ -6413,7 +6414,8 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data, conn->sync_handle = le16_to_cpu(ev->handle); conn->sid = HCI_SID_INVALID; - mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ISO_LINK, &flags); + mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, BIS_LINK, + &flags); if (!(mask & HCI_LM_ACCEPT)) { hci_le_pa_term_sync(hdev, ev->handle); goto unlock; @@ -6423,7 +6425,7 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data, goto unlock; /* Add connection to indicate PA sync event */ - pa_sync = hci_conn_add_unset(hdev, ISO_LINK, BDADDR_ANY, + pa_sync = hci_conn_add_unset(hdev, BIS_LINK, BDADDR_ANY, HCI_ROLE_SLAVE); if (IS_ERR(pa_sync)) @@ -6454,7 +6456,7 @@ static void hci_le_per_adv_report_evt(struct hci_dev *hdev, void *data, hci_dev_lock(hdev); - mask |= hci_proto_connect_ind(hdev, BDADDR_ANY, ISO_LINK, &flags); + mask |= hci_proto_connect_ind(hdev, BDADDR_ANY, BIS_LINK, &flags); if (!(mask & HCI_LM_ACCEPT)) goto unlock; @@ -6738,7 +6740,7 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data, goto unlock; } - if (conn->type != ISO_LINK) { + if (conn->type != CIS_LINK) { bt_dev_err(hdev, "Invalid connection link type handle 0x%4.4x", handle); @@ -6856,7 +6858,7 @@ static void hci_le_cis_req_evt(struct hci_dev *hdev, void *data, if (!acl) goto unlock; - mask = hci_proto_connect_ind(hdev, &acl->dst, ISO_LINK, &flags); + mask = hci_proto_connect_ind(hdev, &acl->dst, CIS_LINK, &flags); if (!(mask & HCI_LM_ACCEPT)) { hci_le_reject_cis(hdev, ev->cis_handle); goto unlock; @@ -6864,8 +6866,8 @@ static void hci_le_cis_req_evt(struct hci_dev *hdev, void *data, cis = hci_conn_hash_lookup_handle(hdev, cis_handle); if (!cis) { - cis = hci_conn_add(hdev, ISO_LINK, &acl->dst, HCI_ROLE_SLAVE, - cis_handle); + cis = hci_conn_add(hdev, CIS_LINK, &acl->dst, + HCI_ROLE_SLAVE, cis_handle); if (IS_ERR(cis)) { hci_le_reject_cis(hdev, ev->cis_handle); goto unlock; @@ -6980,7 +6982,7 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "ignore too large handle %u", handle); continue; } - bis = hci_conn_add(hdev, ISO_LINK, BDADDR_ANY, + bis = hci_conn_add(hdev, BIS_LINK, BDADDR_ANY, HCI_ROLE_SLAVE, handle); if (IS_ERR(bis)) continue; @@ -7036,7 +7038,7 @@ static void hci_le_big_info_adv_report_evt(struct hci_dev *hdev, void *data, hci_dev_lock(hdev); - mask |= hci_proto_connect_ind(hdev, BDADDR_ANY, ISO_LINK, &flags); + mask |= hci_proto_connect_ind(hdev, BDADDR_ANY, BIS_LINK, &flags); if (!(mask & HCI_LM_ACCEPT)) goto unlock; diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index d00ff18f3be0dc..62d1ff951ebe60 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -2860,7 +2860,7 @@ static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type, if (sent) { struct hci_conn *conn; - conn = hci_conn_hash_lookup_ba(hdev, ISO_LINK, + conn = hci_conn_hash_lookup_ba(hdev, BIS_LINK, &sent->bdaddr); if (conn) { struct bt_iso_qos *qos = &conn->iso_qos; @@ -5477,7 +5477,7 @@ static int hci_connect_cancel_sync(struct hci_dev *hdev, struct hci_conn *conn, if (conn->type == LE_LINK) return hci_le_connect_cancel_sync(hdev, conn, reason); - if (conn->type == ISO_LINK) { + if (conn->type == CIS_LINK) { /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E * page 1857: * @@ -5490,9 +5490,10 @@ static int hci_connect_cancel_sync(struct hci_dev *hdev, struct hci_conn *conn, return hci_disconnect_sync(hdev, conn, reason); /* CIS with no Create CIS sent have nothing to cancel */ - if (bacmp(&conn->dst, BDADDR_ANY)) - return HCI_ERROR_LOCAL_HOST_TERM; + return HCI_ERROR_LOCAL_HOST_TERM; + } + if (conn->type == BIS_LINK) { /* There is no way to cancel a BIS without terminating the BIG * which is done later on connection cleanup. */ @@ -5554,9 +5555,12 @@ static int hci_reject_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, { struct hci_cp_reject_conn_req cp; - if (conn->type == ISO_LINK) + if (conn->type == CIS_LINK) return hci_le_reject_cis_sync(hdev, conn, reason); + if (conn->type == BIS_LINK) + return -EINVAL; + if (conn->type == SCO_LINK || conn->type == ESCO_LINK) return hci_reject_sco_sync(hdev, conn, reason); @@ -6917,7 +6921,7 @@ static void create_pa_complete(struct hci_dev *hdev, void *data, int err) goto unlock; /* Add connection to indicate PA sync error */ - pa_sync = hci_conn_add_unset(hdev, ISO_LINK, BDADDR_ANY, + pa_sync = hci_conn_add_unset(hdev, BIS_LINK, BDADDR_ANY, HCI_ROLE_SLAVE); if (IS_ERR(pa_sync)) diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 7c0012ce1b890e..5389af86bdae4f 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -2208,7 +2208,7 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) static void iso_connect_cfm(struct hci_conn *hcon, __u8 status) { - if (hcon->type != ISO_LINK) { + if (hcon->type != CIS_LINK && hcon->type != BIS_LINK) { if (hcon->type != LE_LINK) return; @@ -2249,7 +2249,7 @@ static void iso_connect_cfm(struct hci_conn *hcon, __u8 status) static void iso_disconn_cfm(struct hci_conn *hcon, __u8 reason) { - if (hcon->type != ISO_LINK) + if (hcon->type != CIS_LINK && hcon->type != BIS_LINK) return; BT_DBG("hcon %p reason %d", hcon, reason); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 46b22708dfbd2d..261926dccc7e8e 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3221,7 +3221,8 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data, static u8 link_to_bdaddr(u8 link_type, u8 addr_type) { switch (link_type) { - case ISO_LINK: + case CIS_LINK: + case BIS_LINK: case LE_LINK: switch (addr_type) { case ADDR_LE_DEV_PUBLIC: From f9b8bd88620d94b058b9583580d1cd89c4041323 Mon Sep 17 00:00:00 2001 From: Feng Jiang Date: Wed, 2 Apr 2025 14:24:15 +0800 Subject: [PATCH 2353/2924] wifi: mt76: scan: Fix 'mlink' dereferenced before IS_ERR_OR_NULL check [ Upstream commit 7e1fcf687c2fb22ad25cf3fae322a37452f5f560 ] Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202504011739.HvUKtUUe-lkp@intel.com/ Fixes: 3ba20af886d1 ("wifi: mt76: scan: set vif offchannel link for scanning/roc") Signed-off-by: Feng Jiang Link: https://patch.msgid.link/20250402062415.25434-1-jiangfeng@kylinos.cn Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/channel.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/channel.c b/drivers/net/wireless/mediatek/mt76/channel.c index e7b839e7429034..cc2d888e3f17a5 100644 --- a/drivers/net/wireless/mediatek/mt76/channel.c +++ b/drivers/net/wireless/mediatek/mt76/channel.c @@ -302,11 +302,13 @@ void mt76_put_vif_phy_link(struct mt76_phy *phy, struct ieee80211_vif *vif, struct mt76_vif_link *mlink) { struct mt76_dev *dev = phy->dev; - struct mt76_vif_data *mvif = mlink->mvif; + struct mt76_vif_data *mvif; if (IS_ERR_OR_NULL(mlink) || !mlink->offchannel) return; + mvif = mlink->mvif; + rcu_assign_pointer(mvif->offchannel_link, NULL); dev->drv->vif_link_remove(phy, vif, &vif->bss_conf, mlink); kfree(mlink); From af861c6dea2ef06845a5c7672999a06c06099735 Mon Sep 17 00:00:00 2001 From: Henry Martin Date: Mon, 7 Apr 2025 11:23:49 +0800 Subject: [PATCH 2354/2924] wifi: mt76: mt7996: Fix null-ptr-deref in mt7996_mmio_wed_init() [ Upstream commit 8f30e2b059757d8711a823e4c9c023db62a1d171 ] devm_ioremap() returns NULL on error. Currently, mt7996_mmio_wed_init() does not check for this case, which results in a NULL pointer dereference. Prevent null pointer dereference in mt7996_mmio_wed_init() Fixes: 83eafc9251d6 ("wifi: mt76: mt7996: add wed tx support") Signed-off-by: Henry Martin Link: https://patch.msgid.link/20250407032349.83360-1-bsdhenrymartin@gmail.com Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7996/mmio.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c index 13b188e281bdb9..af9169030bad99 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c @@ -323,6 +323,9 @@ int mt7996_mmio_wed_init(struct mt7996_dev *dev, void *pdev_ptr, wed->wlan.base = devm_ioremap(dev->mt76.dev, pci_resource_start(pci_dev, 0), pci_resource_len(pci_dev, 0)); + if (!wed->wlan.base) + return -ENOMEM; + wed->wlan.phy_base = pci_resource_start(pci_dev, 0); if (hif2) { From d825ed9fd768be10d52beba6f57a4b50c0c154aa Mon Sep 17 00:00:00 2001 From: Henry Martin Date: Mon, 7 Apr 2025 14:19:00 +0800 Subject: [PATCH 2355/2924] wifi: mt76: mt7915: Fix null-ptr-deref in mt7915_mmio_wed_init() [ Upstream commit efb95439c1477bbc955cacd0179c35e7861b437c ] devm_ioremap() returns NULL on error. Currently, mt7915_mmio_wed_init() does not check for this case, which results in a NULL pointer dereference. Prevent null pointer dereference in mt7915_mmio_wed_init(). Fixes: 4f831d18d12d ("wifi: mt76: mt7915: enable WED RX support") Signed-off-by: Henry Martin Link: https://patch.msgid.link/20250407061900.85317-1-bsdhenrymartin@gmail.com Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7915/mmio.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c index 876f0692850a2e..9c4d5cea0c42e9 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c @@ -651,6 +651,9 @@ int mt7915_mmio_wed_init(struct mt7915_dev *dev, void *pdev_ptr, wed->wlan.base = devm_ioremap(dev->mt76.dev, pci_resource_start(pci_dev, 0), pci_resource_len(pci_dev, 0)); + if (!wed->wlan.base) + return -ENOMEM; + wed->wlan.phy_base = pci_resource_start(pci_dev, 0); wed->wlan.wpdma_int = pci_resource_start(pci_dev, 0) + MT_INT_WED_SOURCE_CSR; @@ -678,6 +681,9 @@ int mt7915_mmio_wed_init(struct mt7915_dev *dev, void *pdev_ptr, wed->wlan.bus_type = MTK_WED_BUS_AXI; wed->wlan.base = devm_ioremap(dev->mt76.dev, res->start, resource_size(res)); + if (!wed->wlan.base) + return -ENOMEM; + wed->wlan.phy_base = res->start; wed->wlan.wpdma_int = res->start + MT_INT_SOURCE_CSR; wed->wlan.wpdma_mask = res->start + MT_INT_MASK_CSR; From 63b450ea55a4e2ed44beba89eb14978abd060a62 Mon Sep 17 00:00:00 2001 From: Ming Yen Hsieh Date: Mon, 14 Apr 2025 09:39:52 +0800 Subject: [PATCH 2356/2924] wifi: mt76: mt7925: prevent multiple scan commands [ Upstream commit 122f270aca2c86d7de264ab67161c845e0691d73 ] Add a check to ensure only one scan command is active at a time by testing the MT76_HW_SCANNING state. Fixes: c948b5da6bbe ("wifi: mt76: mt7925: add Mediatek Wi-Fi7 driver for mt7925 chips") Signed-off-by: Ming Yen Hsieh Link: https://patch.msgid.link/20250414013954.1151774-1-mingyen.hsieh@mediatek.com Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7925/mcu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c index 14b1f603fb6224..8eaba8c07a62ab 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c @@ -2790,6 +2790,9 @@ int mt7925_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif, struct tlv *tlv; int max_len; + if (test_bit(MT76_HW_SCANNING, &phy->state)) + return -EBUSY; + max_len = sizeof(*hdr) + sizeof(*req) + sizeof(*ssid) + sizeof(*bssid) + sizeof(*chan_info) + sizeof(*misc) + sizeof(*ie); From dca019fa8f59220bb642b517ae0801bb7687f3cc Mon Sep 17 00:00:00 2001 From: Ming Yen Hsieh Date: Mon, 14 Apr 2025 09:39:53 +0800 Subject: [PATCH 2357/2924] wifi: mt76: mt7925: refine the sniffer commnad [ Upstream commit bd02eebfc0b3502fe8322cf229b4c801416d1007 ] Remove a duplicate call to `mt76_mcu_send_msg` to fix redundant operations in the sniffer command handling. Fixes: c948b5da6bbe ("wifi: mt76: mt7925: add Mediatek Wi-Fi7 driver for mt7925 chips") Signed-off-by: Ming Yen Hsieh Link: https://patch.msgid.link/20250414013954.1151774-2-mingyen.hsieh@mediatek.com Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7925/mcu.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c index 8eaba8c07a62ab..2bd9fc38a27f41 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c @@ -2046,8 +2046,6 @@ int mt7925_mcu_set_sniffer(struct mt792x_dev *dev, struct ieee80211_vif *vif, }, }; - mt76_mcu_send_msg(&dev->mt76, MCU_UNI_CMD(SNIFFER), &req, sizeof(req), true); - return mt76_mcu_send_msg(&dev->mt76, MCU_UNI_CMD(SNIFFER), &req, sizeof(req), true); } From 7a1c36a70eb6d3901fc048a07abec1e4a8f9a195 Mon Sep 17 00:00:00 2001 From: Michael Lo Date: Mon, 14 Apr 2025 09:39:54 +0800 Subject: [PATCH 2358/2924] wifi: mt76: mt7925: ensure all MCU commands wait for response [ Upstream commit aa97ff5782cf01cf2163593e1f57bbde63a06047 ] Modify MCU command sending functions to wait for a response, ensuring consistent behavior across all commands and improves reliability by confirming that each command is processed successfully. Fixes: c948b5da6bbe ("wifi: mt76: mt7925: add Mediatek Wi-Fi7 driver for mt7925 chips") Signed-off-by: Michael Lo Signed-off-by: Ming Yen Hsieh Link: https://patch.msgid.link/20250414013954.1151774-3-mingyen.hsieh@mediatek.com Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7925/mcu.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c index 2bd9fc38a27f41..dea5b9bcb3fdfb 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c @@ -783,7 +783,7 @@ int mt7925_mcu_fw_log_2_host(struct mt792x_dev *dev, u8 ctrl) int ret; ret = mt76_mcu_send_and_get_msg(&dev->mt76, MCU_UNI_CMD(WSYS_CONFIG), - &req, sizeof(req), false, NULL); + &req, sizeof(req), true, NULL); return ret; } @@ -1424,7 +1424,7 @@ int mt7925_mcu_set_eeprom(struct mt792x_dev *dev) }; return mt76_mcu_send_and_get_msg(&dev->mt76, MCU_UNI_CMD(EFUSE_CTRL), - &req, sizeof(req), false, NULL); + &req, sizeof(req), true, NULL); } EXPORT_SYMBOL_GPL(mt7925_mcu_set_eeprom); @@ -2762,7 +2762,7 @@ int mt7925_mcu_set_dbdc(struct mt76_phy *phy, bool enable) conf->band = 0; /* unused */ err = mt76_mcu_skb_send_msg(mdev, skb, MCU_UNI_CMD(SET_DBDC_PARMS), - false); + true); return err; } @@ -2870,7 +2870,7 @@ int mt7925_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif, } err = mt76_mcu_skb_send_msg(mdev, skb, MCU_UNI_CMD(SCAN_REQ), - false); + true); if (err < 0) clear_bit(MT76_HW_SCANNING, &phy->state); @@ -2976,7 +2976,7 @@ int mt7925_mcu_sched_scan_req(struct mt76_phy *phy, } return mt76_mcu_skb_send_msg(mdev, skb, MCU_UNI_CMD(SCAN_REQ), - false); + true); } EXPORT_SYMBOL_GPL(mt7925_mcu_sched_scan_req); @@ -3012,7 +3012,7 @@ mt7925_mcu_sched_scan_enable(struct mt76_phy *phy, clear_bit(MT76_HW_SCHED_SCANNING, &phy->state); return mt76_mcu_skb_send_msg(mdev, skb, MCU_UNI_CMD(SCAN_REQ), - false); + true); } int mt7925_mcu_cancel_hw_scan(struct mt76_phy *phy, @@ -3051,7 +3051,7 @@ int mt7925_mcu_cancel_hw_scan(struct mt76_phy *phy, } return mt76_mcu_send_msg(phy->dev, MCU_UNI_CMD(SCAN_REQ), - &req, sizeof(req), false); + &req, sizeof(req), true); } EXPORT_SYMBOL_GPL(mt7925_mcu_cancel_hw_scan); @@ -3156,7 +3156,7 @@ int mt7925_mcu_set_channel_domain(struct mt76_phy *phy) memcpy(__skb_push(skb, sizeof(req)), &req, sizeof(req)); return mt76_mcu_skb_send_msg(dev, skb, MCU_UNI_CMD(SET_DOMAIN_INFO), - false); + true); } EXPORT_SYMBOL_GPL(mt7925_mcu_set_channel_domain); From 1b0585a84ff55464a4a727af5fddccae319f5c1b Mon Sep 17 00:00:00 2001 From: Howard Hsu Date: Thu, 15 May 2025 11:29:45 +0800 Subject: [PATCH 2359/2924] wifi: mt76: mt7996: fix beamformee SS field [ Upstream commit 5c78949fc7cd772d483a8abe126fe90937c0f002 ] Fix the beamformee SS field for the mt7996, mt7992 and mt7990 chipsets. For the mt7992, this value shall be set to 0x4, while the others shall be set to 0x3. Fixes: 5b20557593d4 ("wifi: mt76: connac: adjust phy capabilities based on band constraints") Signed-off-by: Howard Hsu Signed-off-by: Shayne Chen Link: https://patch.msgid.link/20250515032952.1653494-2-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7996/init.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/init.c b/drivers/net/wireless/mediatek/mt76/mt7996/init.c index 5af52bd1f1f122..e99dfd1771d528 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/init.c @@ -1116,12 +1116,12 @@ mt7996_set_stream_he_txbf_caps(struct mt7996_phy *phy, c = IEEE80211_HE_PHY_CAP4_SU_BEAMFORMEE; - if (is_mt7996(phy->mt76->dev)) - c |= IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_4 | - (IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_4 * non_2g); - else + if (is_mt7992(phy->mt76->dev)) c |= IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_5 | (IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_5 * non_2g); + else + c |= IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_4 | + (IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_4 * non_2g); elem->phy_cap_info[4] |= c; From 3263de3744de447a745e9870640d476be3675764 Mon Sep 17 00:00:00 2001 From: Peter Chiu Date: Thu, 15 May 2025 11:29:46 +0800 Subject: [PATCH 2360/2924] wifi: mt76: mt7996: set EHT max ampdu length capability [ Upstream commit 8b2f574845e33d02e7fbad2d3192a8b717567afa ] Set the max AMPDU length in the EHT MAC CAP. Without this patch, the peer station cannot obtain the correct capability, which prevents achieving peak throughput on the 2 GHz band. Fixes: 1816ad9381e0 ("wifi: mt76: mt7996: add max mpdu len capability") Signed-off-by: Peter Chiu Signed-off-by: Shayne Chen Link: https://patch.msgid.link/20250515032952.1653494-3-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7996/init.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/init.c b/drivers/net/wireless/mediatek/mt76/mt7996/init.c index e99dfd1771d528..4906b0ecc73e02 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/init.c @@ -1321,6 +1321,9 @@ mt7996_init_eht_caps(struct mt7996_phy *phy, enum nl80211_band band, u8_encode_bits(IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_11454, IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_MASK); + eht_cap_elem->mac_cap_info[1] |= + IEEE80211_EHT_MAC_CAP1_MAX_AMPDU_LEN_MASK; + eht_cap_elem->phy_cap_info[0] = IEEE80211_EHT_PHY_CAP0_NDP_4_EHT_LFT_32_GI | IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMER | From 6cb11d753204a5ce234b554e93562d46e9622e4f Mon Sep 17 00:00:00 2001 From: Peter Chiu Date: Thu, 15 May 2025 11:29:48 +0800 Subject: [PATCH 2361/2924] wifi: mt76: mt7996: fix invalid NSS setting when TX path differs from NSS [ Upstream commit d5012734fc4bd5371e2e8dea8c2f3d28287573b8 ] The maximum TX path and NSS may differ on a band. For example, one variant of the MT7992 has 5 TX paths and 4 NSS on the 5 GHz band. To address this, add orig_antenna_mask to record the maximum NSS and prevent setting an invalid NSS in mt7996_set_antenna(). Fixes: 69d54ce7491d ("wifi: mt76: mt7996: switch to single multi-radio wiphy") Signed-off-by: Peter Chiu Signed-off-by: Shayne Chen Link: https://patch.msgid.link/20250515032952.1653494-5-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7996/eeprom.c | 1 + drivers/net/wireless/mediatek/mt76/mt7996/main.c | 3 ++- drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7996/eeprom.c index 53dfac02f8af0b..f0c76aac175dff 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/eeprom.c @@ -304,6 +304,7 @@ int mt7996_eeprom_parse_hw_cap(struct mt7996_dev *dev, struct mt7996_phy *phy) phy->has_aux_rx = true; mphy->antenna_mask = BIT(nss) - 1; + phy->orig_antenna_mask = mphy->antenna_mask; mphy->chainmask = (BIT(path) - 1) << dev->chainshift[band_idx]; phy->orig_chainmask = mphy->chainmask; dev->chainmask |= mphy->chainmask; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 8698c4345af0c8..a3295b22523a61 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -1528,7 +1528,8 @@ mt7996_set_antenna(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant) u8 shift = dev->chainshift[band_idx]; phy->mt76->chainmask = tx_ant & phy->orig_chainmask; - phy->mt76->antenna_mask = phy->mt76->chainmask >> shift; + phy->mt76->antenna_mask = (phy->mt76->chainmask >> shift) & + phy->orig_antenna_mask; mt76_set_stream_caps(phy->mt76, true); mt7996_set_stream_vht_txbf_caps(phy); diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h index 43e646ed6094cb..c393784436d4c3 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h @@ -293,6 +293,7 @@ struct mt7996_phy { struct mt76_channel_state state_ts; u16 orig_chainmask; + u16 orig_antenna_mask; bool has_aux_rx; bool counter_reset; From 07e6672128e25303fb352ecfad70331ef0efb487 Mon Sep 17 00:00:00 2001 From: Shayne Chen Date: Thu, 15 May 2025 11:29:50 +0800 Subject: [PATCH 2362/2924] wifi: mt76: mt7996: fix RX buffer size of MCU event [ Upstream commit 42cb27af34de4acf680606fad2c1f2932110591f ] Some management frames are first processed by the firmware and then passed to the driver through the MCU event rings. In CONNAC3, event rings do not support scatter-gather and have a size limitation of 2048 bytes. If a packet sized between 1728 and 2048 bytes arrives from an event ring, the ring will hang because the driver attempts to use scatter-gather to process it. To fix this, include the size of struct skb_shared_info in the MCU RX buffer size to prevent scatter-gather from being used for event skb in mt76_dma_rx_fill_buf(). Fixes: 98686cd21624 ("wifi: mt76: mt7996: add driver for MediaTek Wi-Fi 7 (802.11be) devices") Co-developed-by: Peter Chiu Signed-off-by: Peter Chiu Signed-off-by: Shayne Chen Link: https://patch.msgid.link/20250515032952.1653494-7-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mt7996/dma.c | 4 ++-- drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/dma.c b/drivers/net/wireless/mediatek/mt76/mt7996/dma.c index 69a7d9b2e38bd7..4b68d2fc5e0949 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/dma.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/dma.c @@ -493,7 +493,7 @@ int mt7996_dma_init(struct mt7996_dev *dev) ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_MCU], MT_RXQ_ID(MT_RXQ_MCU), MT7996_RX_MCU_RING_SIZE, - MT_RX_BUF_SIZE, + MT7996_RX_MCU_BUF_SIZE, MT_RXQ_RING_BASE(MT_RXQ_MCU)); if (ret) return ret; @@ -502,7 +502,7 @@ int mt7996_dma_init(struct mt7996_dev *dev) ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_MCU_WA], MT_RXQ_ID(MT_RXQ_MCU_WA), MT7996_RX_MCU_RING_SIZE_WA, - MT_RX_BUF_SIZE, + MT7996_RX_MCU_BUF_SIZE, MT_RXQ_RING_BASE(MT_RXQ_MCU_WA)); if (ret) return ret; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h index c393784436d4c3..77605403b39661 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h @@ -29,6 +29,9 @@ #define MT7996_RX_RING_SIZE 1536 #define MT7996_RX_MCU_RING_SIZE 512 #define MT7996_RX_MCU_RING_SIZE_WA 1024 +/* scatter-gather of mcu event is not supported in connac3 */ +#define MT7996_RX_MCU_BUF_SIZE (2048 + \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) #define MT7996_FIRMWARE_WA "mediatek/mt7996/mt7996_wa.bin" #define MT7996_FIRMWARE_WM "mediatek/mt7996/mt7996_wm.bin" From 62d5bfd0b441eb0ec341a3624ed212bacca25859 Mon Sep 17 00:00:00 2001 From: Shayne Chen Date: Thu, 15 May 2025 11:29:51 +0800 Subject: [PATCH 2363/2924] wifi: mt76: fix available_antennas setting [ Upstream commit 249173e94dd5edef9e704f89513de7d9715ddf22 ] Check if available_antennas_tx and available_antennas_rx are already set during the per-chip initialization phase; otherwise, they could be overwritten with incorrect values. Fixes: 69d54ce7491d ("wifi: mt76: mt7996: switch to single multi-radio wiphy") Signed-off-by: Shayne Chen Link: https://patch.msgid.link/20250515032952.1653494-8-shayne.chen@mediatek.com Signed-off-by: Felix Fietkau Signed-off-by: Sasha Levin --- drivers/net/wireless/mediatek/mt76/mac80211.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index b88d7e10742ee6..e9605dc222910f 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -449,8 +449,10 @@ mt76_phy_init(struct mt76_phy *phy, struct ieee80211_hw *hw) wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_AIRTIME_FAIRNESS); wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_AQL); - wiphy->available_antennas_tx = phy->antenna_mask; - wiphy->available_antennas_rx = phy->antenna_mask; + if (!wiphy->available_antennas_tx) + wiphy->available_antennas_tx = phy->antenna_mask; + if (!wiphy->available_antennas_rx) + wiphy->available_antennas_rx = phy->antenna_mask; wiphy->sar_capa = &mt76_sar_capa; phy->frp = devm_kcalloc(dev->dev, wiphy->sar_capa->num_freq_ranges, From b397b309456b7671dbb0a18d9d32dfb8d93fb8f0 Mon Sep 17 00:00:00 2001 From: Di Shen Date: Tue, 20 May 2025 13:49:43 +0800 Subject: [PATCH 2364/2924] bpf: Revert "bpf: remove unnecessary rcu_read_{lock,unlock}() in multi-uprobe attach logic" [ Upstream commit 4e2e6841ff761cc15a54e8bebcf35d7325ec78a2 ] This reverts commit 4a8f635a6054. Althought get_pid_task() internally already calls rcu_read_lock() and rcu_read_unlock(), the find_vpid() was not. The documentation for find_vpid() clearly states: "Must be called with the tasklist_lock or rcu_read_lock() held." Add proper rcu_read_lock/unlock() to protect the find_vpid(). Fixes: 4a8f635a6054 ("bpf: remove unnecessary rcu_read_{lock,unlock}() in multi-uprobe attach logic") Reported-by: Xuewen Yan Signed-off-by: Di Shen Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20250520054943.5002-1-xuewen.yan@unisoc.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/trace/bpf_trace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index e1bf9c06007fb7..090cdab38f0ccd 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -3423,7 +3423,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr } if (pid) { + rcu_read_lock(); task = get_pid_task(find_vpid(pid), PIDTYPE_TGID); + rcu_read_unlock(); if (!task) { err = -ESRCH; goto error_path_put; From 2c2e34fa81c22f245d7c32867377b807be0223ed Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 16 May 2025 16:12:13 +0200 Subject: [PATCH 2365/2924] netfilter: xtables: support arpt_mark and ipv6 optstrip for iptables-nft only builds [ Upstream commit c38eb2973c18d34a8081d173a6ad298461f4a37c ] Its now possible to build a kernel that has no support for the classic xtables get/setsockopt interfaces and builtin tables. In this case, we have CONFIG_IP6_NF_MANGLE=n and CONFIG_IP_NF_ARPTABLES=n. For optstript, the ipv6 code is so small that we can enable it if netfilter ipv6 support exists. For mark, check if either classic arptables or NFT_ARP_COMPAT is set. Fixes: a9525c7f6219 ("netfilter: xtables: allow xtables-nft only builds") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/xt_TCPOPTSTRIP.c | 4 ++-- net/netfilter/xt_mark.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c index 30e99464171b7b..93f064306901c0 100644 --- a/net/netfilter/xt_TCPOPTSTRIP.c +++ b/net/netfilter/xt_TCPOPTSTRIP.c @@ -91,7 +91,7 @@ tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_action_param *par) return tcpoptstrip_mangle_packet(skb, par, ip_hdrlen(skb)); } -#if IS_ENABLED(CONFIG_IP6_NF_MANGLE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static unsigned int tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_action_param *par) { @@ -119,7 +119,7 @@ static struct xt_target tcpoptstrip_tg_reg[] __read_mostly = { .targetsize = sizeof(struct xt_tcpoptstrip_target_info), .me = THIS_MODULE, }, -#if IS_ENABLED(CONFIG_IP6_NF_MANGLE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "TCPOPTSTRIP", .family = NFPROTO_IPV6, diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c index 65b965ca40ea7e..59b9d04400cac2 100644 --- a/net/netfilter/xt_mark.c +++ b/net/netfilter/xt_mark.c @@ -48,7 +48,7 @@ static struct xt_target mark_tg_reg[] __read_mostly = { .targetsize = sizeof(struct xt_mark_tginfo2), .me = THIS_MODULE, }, -#if IS_ENABLED(CONFIG_IP_NF_ARPTABLES) +#if IS_ENABLED(CONFIG_IP_NF_ARPTABLES) || IS_ENABLED(CONFIG_NFT_COMPAT_ARP) { .name = "MARK", .revision = 2, From edc2fe00c6dfe811771adbbf09d36b8a3f8fd5dd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 21 May 2025 11:38:47 +0200 Subject: [PATCH 2366/2924] netfilter: nf_tables: nft_fib_ipv6: fix VRF ipv4/ipv6 result discrepancy [ Upstream commit 8b53f46eb430fe5b42d485873b85331d2de2c469 ] With a VRF, ipv4 and ipv6 FIB expression behave differently. fib daddr . iif oif Will return the input interface name for ipv4, but the real device for ipv6. Example: If VRF device name is tvrf and real (incoming) device is veth0. First round is ok, both ipv4 and ipv6 will yield 'veth0'. But in the second round (incoming device will be set to "tvrf"), ipv4 will yield "tvrf" whereas ipv6 returns "veth0" for the second round too. This makes ipv6 behave like ipv4. A followup patch will add a test case for this, without this change it will fail with: get element inet t fibif6iif { tvrf . dead:1::99 . tvrf } ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ FAIL: did not find tvrf . dead:1::99 . tvrf in fibif6iif Alternatively we could either not do anything at all or change ipv4 to also return the lower/real device, however, nft (userspace) doc says "iif: if fib lookup provides a route then check its output interface is identical to the packets input interface." which is what the nft fib ipv4 behaviour is. Fixes: f6d0cbcf09c5 ("netfilter: nf_tables: add fib expression") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/ipv6/netfilter/nft_fib_ipv6.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 7fd9d7b21cd42d..f1f5640da67285 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -158,6 +158,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, { const struct nft_fib *priv = nft_expr_priv(expr); int noff = skb_network_offset(pkt->skb); + const struct net_device *found = NULL; const struct net_device *oif = NULL; u32 *dest = ®s->data[priv->dreg]; struct ipv6hdr *iph, _iph; @@ -203,11 +204,15 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL)) goto put_rt_err; - if (oif && oif != rt->rt6i_idev->dev && - l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) != oif->ifindex) - goto put_rt_err; + if (!oif) { + found = rt->rt6i_idev->dev; + } else { + if (oif == rt->rt6i_idev->dev || + l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) == oif->ifindex) + found = oif; + } - nft_fib_store_result(dest, priv, rt->rt6i_idev->dev); + nft_fib_store_result(dest, priv, found); put_rt_err: ip6_rt_put(rt); } From e81d71c235d1ec5c693d7138d7bbcd485a232ca5 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Wed, 21 May 2025 11:46:47 +0800 Subject: [PATCH 2367/2924] vfio/type1: Fix error unwind in migration dirty bitmap allocation [ Upstream commit 4518e5a60c7fbf0cdff393c2681db39d77b4f87e ] When setting up dirty page tracking at the vfio IOMMU backend for device migration, if an error is encountered allocating a tracking bitmap, the unwind loop fails to free previously allocated tracking bitmaps. This occurs because the wrong loop index is used to generate the tracking object. This results in unintended memory usage for the life of the current DMA mappings where bitmaps were successfully allocated. Use the correct loop index to derive the tracking object for freeing during unwind. Fixes: d6a4c185660c ("vfio iommu: Implementation of ioctl for dirty pages tracking") Signed-off-by: Li RongQing Link: https://lore.kernel.org/r/20250521034647.2877-1-lirongqing@baidu.com Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin --- drivers/vfio/vfio_iommu_type1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 0ac56072af9f23..ba5d91e576af16 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -293,7 +293,7 @@ static int vfio_dma_bitmap_alloc_all(struct vfio_iommu *iommu, size_t pgsize) struct rb_node *p; for (p = rb_prev(n); p; p = rb_prev(p)) { - struct vfio_dma *dma = rb_entry(n, + struct vfio_dma *dma = rb_entry(p, struct vfio_dma, node); vfio_dma_bitmap_free(dma); From c6cbc5c12b185a16e871662042c9d2169f94f18c Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Tue, 20 May 2025 11:42:30 +0300 Subject: [PATCH 2368/2924] Bluetooth: MGMT: iterate over mesh commands in mgmt_mesh_foreach() [ Upstream commit 3bb88524b7d030160bb3c9b35f928b2778092111 ] In 'mgmt_mesh_foreach()', iterate over mesh commands rather than generic mgmt ones. Compile tested only. Fixes: b338d91703fa ("Bluetooth: Implement support for Mesh") Signed-off-by: Dmitry Antipov Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/mgmt_util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c index e5ff65e424b5b4..3713ff490c65d5 100644 --- a/net/bluetooth/mgmt_util.c +++ b/net/bluetooth/mgmt_util.c @@ -304,7 +304,7 @@ void mgmt_mesh_foreach(struct hci_dev *hdev, { struct mgmt_mesh_tx *mesh_tx, *tmp; - list_for_each_entry_safe(mesh_tx, tmp, &hdev->mgmt_pending, list) { + list_for_each_entry_safe(mesh_tx, tmp, &hdev->mesh_pending, list) { if (!sk || mesh_tx->sk == sk) cb(mesh_tx, data); } From 7b8526bb489780ccc0caffc446ecabec83cfe568 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 20 May 2025 09:31:35 -0700 Subject: [PATCH 2369/2924] Bluetooth: btintel: Check dsbr size from EFI variable [ Upstream commit 3aa1dc3c9060e335e82e9c182bf3d1db29220b1b ] Since the size of struct btintel_dsbr is already known, we can just start there instead of querying the EFI variable size. If the final result doesn't match what we expect also fail. This fixes a stack buffer overflow when the EFI variable is larger than struct btintel_dsbr. Reported-by: zepta Closes: https://lore.kernel.org/all/CAPBS6KoaWV9=dtjTESZiU6KK__OZX0KpDk-=JEH8jCHFLUYv3Q@mail.gmail.com Fixes: eb9e749c0182 ("Bluetooth: btintel: Allow configuring drive strength of BRI") Signed-off-by: Kees Cook Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- drivers/bluetooth/btintel.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index 48e2f400957bc9..46d9bbd8e411b3 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -2719,7 +2719,7 @@ static int btintel_uefi_get_dsbr(u32 *dsbr_var) } __packed data; efi_status_t status; - unsigned long data_size = 0; + unsigned long data_size = sizeof(data); efi_guid_t guid = EFI_GUID(0xe65d8884, 0xd4af, 0x4b20, 0x8d, 0x03, 0x77, 0x2e, 0xcc, 0x3d, 0xa5, 0x31); @@ -2729,16 +2729,10 @@ static int btintel_uefi_get_dsbr(u32 *dsbr_var) if (!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE)) return -EOPNOTSUPP; - status = efi.get_variable(BTINTEL_EFI_DSBR, &guid, NULL, &data_size, - NULL); - - if (status != EFI_BUFFER_TOO_SMALL || !data_size) - return -EIO; - status = efi.get_variable(BTINTEL_EFI_DSBR, &guid, NULL, &data_size, &data); - if (status != EFI_SUCCESS) + if (status != EFI_SUCCESS || data_size != sizeof(data)) return -ENXIO; *dsbr_var = data.dsbr; From 7c0a16f6ea2b1c82a03bccd5d1bdb4a7bbd4d987 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Fri, 16 May 2025 22:17:12 +0800 Subject: [PATCH 2370/2924] bpf, sockmap: Avoid using sk_socket after free when sending [ Upstream commit 8259eb0e06d8f64c700f5fbdb28a5c18e10de291 ] The sk->sk_socket is not locked or referenced in backlog thread, and during the call to skb_send_sock(), there is a race condition with the release of sk_socket. All types of sockets(tcp/udp/unix/vsock) will be affected. Race conditions: ''' CPU0 CPU1 backlog::skb_send_sock sendmsg_unlocked sock_sendmsg sock_sendmsg_nosec close(fd): ... ops->release() -> sock_map_close() sk_socket->ops = NULL free(socket) sock->ops->sendmsg ^ panic here ''' The ref of psock become 0 after sock_map_close() executed. ''' void sock_map_close() { ... if (likely(psock)) { ... // !! here we remove psock and the ref of psock become 0 sock_map_remove_links(sk, psock) psock = sk_psock_get(sk); if (unlikely(!psock)) goto no_psock; <=== Control jumps here via goto ... cancel_delayed_work_sync(&psock->work); <=== not executed sk_psock_put(sk, psock); ... } ''' Based on the fact that we already wait for the workqueue to finish in sock_map_close() if psock is held, we simply increase the psock reference count to avoid race conditions. With this patch, if the backlog thread is running, sock_map_close() will wait for the backlog thread to complete and cancel all pending work. If no backlog running, any pending work that hasn't started by then will fail when invoked by sk_psock_get(), as the psock reference count have been zeroed, and sk_psock_drop() will cancel all jobs via cancel_delayed_work_sync(). In summary, we require synchronization to coordinate the backlog thread and close() thread. The panic I catched: ''' Workqueue: events sk_psock_backlog RIP: 0010:sock_sendmsg+0x21d/0x440 RAX: 0000000000000000 RBX: ffffc9000521fad8 RCX: 0000000000000001 ... Call Trace: ? die_addr+0x40/0xa0 ? exc_general_protection+0x14c/0x230 ? asm_exc_general_protection+0x26/0x30 ? sock_sendmsg+0x21d/0x440 ? sock_sendmsg+0x3e0/0x440 ? __pfx_sock_sendmsg+0x10/0x10 __skb_send_sock+0x543/0xb70 sk_psock_backlog+0x247/0xb80 ... ''' Fixes: 4b4647add7d3 ("sock_map: avoid race between sock_map_close and sk_psock_put") Reported-by: Michal Luczaj Signed-off-by: Jiayuan Chen Signed-off-by: Martin KaFai Lau Reviewed-by: John Fastabend Link: https://lore.kernel.org/r/20250516141713.291150-1-jiayuan.chen@linux.dev Signed-off-by: Sasha Levin --- net/core/skmsg.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 061f1409bd5a9e..6d689918c2b390 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -656,6 +656,13 @@ static void sk_psock_backlog(struct work_struct *work) bool ingress; int ret; + /* Increment the psock refcnt to synchronize with close(fd) path in + * sock_map_close(), ensuring we wait for backlog thread completion + * before sk_socket freed. If refcnt increment fails, it indicates + * sock_map_close() completed with sk_socket potentially already freed. + */ + if (!sk_psock_get(psock->sk)) + return; mutex_lock(&psock->work_mutex); while ((skb = skb_peek(&psock->ingress_skb))) { len = skb->len; @@ -707,6 +714,7 @@ static void sk_psock_backlog(struct work_struct *work) } end: mutex_unlock(&psock->work_mutex); + sk_psock_put(psock->sk, psock); } struct sk_psock *sk_psock_init(struct sock *sk, int node) From 011d1c563e8225e3141e18e806a6d59be6d5ff10 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 21 May 2025 11:38:48 +0200 Subject: [PATCH 2371/2924] netfilter: nf_tables: nft_fib: consistent l3mdev handling [ Upstream commit 9a119669fb1924cd9658c16da39a5a585e129e50 ] fib has two modes: 1. Obtain output device according to source or destination address 2. Obtain the type of the address, e.g. local, unicast, multicast. 'fib daddr type' should return 'local' if the address is configured in this netns or unicast otherwise. 'fib daddr . iif type' should return 'local' if the address is configured on the input interface or unicast otherwise, i.e. more restrictive. However, if the interface is part of a VRF, then 'fib daddr type' returns unicast even if the address is configured on the incoming interface. This is broken for both ipv4 and ipv6. In the ipv4 case, inet_dev_addr_type must only be used if the 'iif' or 'oif' (strict mode) was requested. Else inet_addr_type_dev_table() needs to be used and the correct dev argument must be passed as well so the correct fib (vrf) table is used. In the ipv6 case, the bug is similar, without strict mode, dev is NULL so .flowi6_l3mdev will be set to 0. Add a new 'nft_fib_l3mdev_master_ifindex_rcu()' helper and use that to init the .l3mdev structure member. For ipv6, use it from nft_fib6_flowi_init() which gets called from both the 'type' and the 'route' mode eval functions. This provides consistent behaviour for all modes for both ipv4 and ipv6: If strict matching is requested, the input respectively output device of the netfilter hooks is used. Otherwise, use skb->dev to obtain the l3mdev ifindex. Without this, most type checks in updated nft_fib.sh selftest fail: FAIL: did not find veth0 . 10.9.9.1 . local in fibtype4 FAIL: did not find veth0 . dead:1::1 . local in fibtype6 FAIL: did not find veth0 . dead:9::1 . local in fibtype6 FAIL: did not find tvrf . 10.0.1.1 . local in fibtype4 FAIL: did not find tvrf . 10.9.9.1 . local in fibtype4 FAIL: did not find tvrf . dead:1::1 . local in fibtype6 FAIL: did not find tvrf . dead:9::1 . local in fibtype6 FAIL: fib expression address types match (iif in vrf) (fib errounously returns 'unicast' for all of them, even though all of these addresses are local to the vrf). Fixes: f6d0cbcf09c5 ("netfilter: nf_tables: add fib expression") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/net/netfilter/nft_fib.h | 9 +++++++++ net/ipv4/netfilter/nft_fib_ipv4.c | 11 +++++++++-- net/ipv6/netfilter/nft_fib_ipv6.c | 4 +--- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h index 6e202ed5e63f3c..7370fba844efcf 100644 --- a/include/net/netfilter/nft_fib.h +++ b/include/net/netfilter/nft_fib.h @@ -2,6 +2,7 @@ #ifndef _NFT_FIB_H_ #define _NFT_FIB_H_ +#include #include struct nft_fib { @@ -39,6 +40,14 @@ static inline bool nft_fib_can_skip(const struct nft_pktinfo *pkt) return nft_fib_is_loopback(pkt->skb, indev); } +static inline int nft_fib_l3mdev_master_ifindex_rcu(const struct nft_pktinfo *pkt, + const struct net_device *iif) +{ + const struct net_device *dev = iif ? iif : pkt->skb->dev; + + return l3mdev_master_ifindex_rcu(dev); +} + int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset); int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]); diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index 9082ca17e845cb..7e7c49535e3f56 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -50,7 +50,12 @@ void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs, else addr = iph->saddr; - *dst = inet_dev_addr_type(nft_net(pkt), dev, addr); + if (priv->flags & (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)) { + *dst = inet_dev_addr_type(nft_net(pkt), dev, addr); + return; + } + + *dst = inet_addr_type_dev_table(nft_net(pkt), pkt->skb->dev, addr); } EXPORT_SYMBOL_GPL(nft_fib4_eval_type); @@ -65,8 +70,8 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, struct flowi4 fl4 = { .flowi4_scope = RT_SCOPE_UNIVERSE, .flowi4_iif = LOOPBACK_IFINDEX, + .flowi4_proto = pkt->tprot, .flowi4_uid = sock_net_uid(nft_net(pkt), NULL), - .flowi4_l3mdev = l3mdev_master_ifindex_rcu(nft_in(pkt)), }; const struct net_device *oif; const struct net_device *found; @@ -90,6 +95,8 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, else oif = NULL; + fl4.flowi4_l3mdev = nft_fib_l3mdev_master_ifindex_rcu(pkt, oif); + iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph); if (!iph) { regs->verdict.code = NFT_BREAK; diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index f1f5640da67285..421036a3605b46 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -50,6 +50,7 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv, fl6->flowi6_mark = pkt->skb->mark; fl6->flowlabel = (*(__be32 *)iph) & IPV6_FLOWINFO_MASK; + fl6->flowi6_l3mdev = nft_fib_l3mdev_master_ifindex_rcu(pkt, dev); return lookup_flags; } @@ -73,8 +74,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, else if (priv->flags & NFTA_FIB_F_OIF) dev = nft_out(pkt); - fl6.flowi6_l3mdev = l3mdev_master_ifindex_rcu(dev); - nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph); if (dev && nf_ipv6_chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) @@ -166,7 +165,6 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, .flowi6_iif = LOOPBACK_IFINDEX, .flowi6_proto = pkt->tprot, .flowi6_uid = sock_net_uid(nft_net(pkt), NULL), - .flowi6_l3mdev = l3mdev_master_ifindex_rcu(nft_in(pkt)), }; struct rt6_info *rt; int lookup_flags; From 6601a4e8d43b7b2015780732484d0a73c7159b49 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Wed, 21 May 2025 11:41:08 +0200 Subject: [PATCH 2372/2924] netfilter: nft_tunnel: fix geneve_opt dump [ Upstream commit 22a9613de4c29d7d0770bfb8a5a9d73eb8df7dad ] When dumping a nft_tunnel with more than one geneve_opt configured the netlink attribute hierarchy should be as follow: NFTA_TUNNEL_KEY_OPTS | |--NFTA_TUNNEL_KEY_OPTS_GENEVE | | | |--NFTA_TUNNEL_KEY_GENEVE_CLASS | |--NFTA_TUNNEL_KEY_GENEVE_TYPE | |--NFTA_TUNNEL_KEY_GENEVE_DATA | |--NFTA_TUNNEL_KEY_OPTS_GENEVE | | | |--NFTA_TUNNEL_KEY_GENEVE_CLASS | |--NFTA_TUNNEL_KEY_GENEVE_TYPE | |--NFTA_TUNNEL_KEY_GENEVE_DATA | |--NFTA_TUNNEL_KEY_OPTS_GENEVE ... Otherwise, userspace tools won't be able to fetch the geneve options configured correctly. Fixes: 925d844696d9 ("netfilter: nft_tunnel: add support for geneve opts") Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_tunnel.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 0c63d1367cf7a7..a12486ae089d6f 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -621,10 +621,10 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb, struct geneve_opt *opt; int offset = 0; - inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_GENEVE); - if (!inner) - goto failure; while (opts->len > offset) { + inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_GENEVE); + if (!inner) + goto failure; opt = (struct geneve_opt *)(opts->u.data + offset); if (nla_put_be16(skb, NFTA_TUNNEL_KEY_GENEVE_CLASS, opt->opt_class) || @@ -634,8 +634,8 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb, opt->length * 4, opt->opt_data)) goto inner_failure; offset += sizeof(*opt) + opt->length * 4; + nla_nest_end(skb, inner); } - nla_nest_end(skb, inner); } nla_nest_end(skb, nest); return 0; From 5ccf9e2c51f1253a99795df8943a6bba5ef99058 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Fri, 23 May 2025 12:47:28 +0200 Subject: [PATCH 2373/2924] RISC-V: KVM: lock the correct mp_state during reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7917be170928189fefad490d1a1237fdfa6b856f ] Currently, the kvm_riscv_vcpu_sbi_system_reset() function locks vcpu->arch.mp_state_lock when updating tmp->arch.mp_state.mp_state which is incorrect hence fix it. Fixes: 2121cadec45a ("RISCV: KVM: Introduce mp_state_lock to avoid lock inversion") Signed-off-by: Radim Krčmář Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20250523104725.2894546-4-rkrcmar@ventanamicro.com Signed-off-by: Anup Patel Signed-off-by: Sasha Levin --- arch/riscv/kvm/vcpu_sbi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index d1c83a77735e05..0000ecf49b188b 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -143,9 +143,9 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu, struct kvm_vcpu *tmp; kvm_for_each_vcpu(i, tmp, vcpu->kvm) { - spin_lock(&vcpu->arch.mp_state_lock); + spin_lock(&tmp->arch.mp_state_lock); WRITE_ONCE(tmp->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED); - spin_unlock(&vcpu->arch.mp_state_lock); + spin_unlock(&tmp->arch.mp_state_lock); } kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); From 7c01863b1c47f040d9674171e77789a423b9b128 Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Tue, 20 May 2025 14:32:39 +0300 Subject: [PATCH 2374/2924] net: usb: aqc111: fix error handling of usbnet read calls [ Upstream commit 405b0d610745fb5e84fc2961d9b960abb9f3d107 ] Syzkaller, courtesy of syzbot, identified an error (see report [1]) in aqc111 driver, caused by incomplete sanitation of usb read calls' results. This problem is quite similar to the one fixed in commit 920a9fa27e78 ("net: asix: add proper error handling of usb read errors"). For instance, usbnet_read_cmd() may read fewer than 'size' bytes, even if the caller expected the full amount, and aqc111_read_cmd() will not check its result properly. As [1] shows, this may lead to MAC address in aqc111_bind() being only partly initialized, triggering KMSAN warnings. Fix the issue by verifying that the number of bytes read is as expected and not less. [1] Partial syzbot report: BUG: KMSAN: uninit-value in is_valid_ether_addr include/linux/etherdevice.h:208 [inline] BUG: KMSAN: uninit-value in usbnet_probe+0x2e57/0x4390 drivers/net/usb/usbnet.c:1830 is_valid_ether_addr include/linux/etherdevice.h:208 [inline] usbnet_probe+0x2e57/0x4390 drivers/net/usb/usbnet.c:1830 usb_probe_interface+0xd01/0x1310 drivers/usb/core/driver.c:396 call_driver_probe drivers/base/dd.c:-1 [inline] really_probe+0x4d1/0xd90 drivers/base/dd.c:658 __driver_probe_device+0x268/0x380 drivers/base/dd.c:800 ... Uninit was stored to memory at: dev_addr_mod+0xb0/0x550 net/core/dev_addr_lists.c:582 __dev_addr_set include/linux/netdevice.h:4874 [inline] eth_hw_addr_set include/linux/etherdevice.h:325 [inline] aqc111_bind+0x35f/0x1150 drivers/net/usb/aqc111.c:717 usbnet_probe+0xbe6/0x4390 drivers/net/usb/usbnet.c:1772 usb_probe_interface+0xd01/0x1310 drivers/usb/core/driver.c:396 ... Uninit was stored to memory at: ether_addr_copy include/linux/etherdevice.h:305 [inline] aqc111_read_perm_mac drivers/net/usb/aqc111.c:663 [inline] aqc111_bind+0x794/0x1150 drivers/net/usb/aqc111.c:713 usbnet_probe+0xbe6/0x4390 drivers/net/usb/usbnet.c:1772 usb_probe_interface+0xd01/0x1310 drivers/usb/core/driver.c:396 call_driver_probe drivers/base/dd.c:-1 [inline] ... Local variable buf.i created at: aqc111_read_perm_mac drivers/net/usb/aqc111.c:656 [inline] aqc111_bind+0x221/0x1150 drivers/net/usb/aqc111.c:713 usbnet_probe+0xbe6/0x4390 drivers/net/usb/usbnet.c:1772 Reported-by: syzbot+3b6b9ff7b80430020c7b@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=3b6b9ff7b80430020c7b Tested-by: syzbot+3b6b9ff7b80430020c7b@syzkaller.appspotmail.com Fixes: df2d59a2ab6c ("net: usb: aqc111: Add support for getting and setting of MAC address") Signed-off-by: Nikita Zhandarovich Link: https://patch.msgid.link/20250520113240.2369438-1-n.zhandarovich@fintech.ru Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/usb/aqc111.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c index ff5be2cbf17b90..453a2cf82753fb 100644 --- a/drivers/net/usb/aqc111.c +++ b/drivers/net/usb/aqc111.c @@ -30,10 +30,13 @@ static int aqc111_read_cmd_nopm(struct usbnet *dev, u8 cmd, u16 value, ret = usbnet_read_cmd_nopm(dev, cmd, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, data, size); - if (unlikely(ret < 0)) + if (unlikely(ret < size)) { + ret = ret < 0 ? ret : -ENODATA; + netdev_warn(dev->net, "Failed to read(0x%x) reg index 0x%04x: %d\n", cmd, index, ret); + } return ret; } @@ -46,10 +49,13 @@ static int aqc111_read_cmd(struct usbnet *dev, u8 cmd, u16 value, ret = usbnet_read_cmd(dev, cmd, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, data, size); - if (unlikely(ret < 0)) + if (unlikely(ret < size)) { + ret = ret < 0 ? ret : -ENODATA; + netdev_warn(dev->net, "Failed to read(0x%x) reg index 0x%04x: %d\n", cmd, index, ret); + } return ret; } From cd8bef963aaff980abca9755433ae783c979cfc6 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Wed, 21 May 2025 16:00:43 +0530 Subject: [PATCH 2375/2924] octeontx2-af: Send Link events one by one [ Upstream commit ba5cb47b56e5d89f0a5eb5163ffbfda1e3e7cef0 ] Send link events one after another otherwise new message is overwriting the message which is being processed by PF. Fixes: a88e0f936ba9 ("octeontx2: Detect the mbox up or down message via register") Signed-off-by: Subbaraya Sundeep Reviewed-by: Michal Swiatkowski Reviewed-by: Simon Horman Link: https://patch.msgid.link/1747823443-404-1-git-send-email-sbhatta@marvell.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c | 2 ++ drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c | 2 ++ drivers/net/ethernet/marvell/octeontx2/af/rvu_rep.c | 2 ++ 3 files changed, 6 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c b/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c index 655dd4726d36ef..0277d226293e9c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c @@ -143,6 +143,8 @@ static int mcs_notify_pfvf(struct mcs_intr_event *event, struct rvu *rvu) otx2_mbox_msg_send_up(&rvu->afpf_wq_info.mbox_up, pf); + otx2_mbox_wait_for_rsp(&rvu->afpf_wq_info.mbox_up, pf); + mutex_unlock(&rvu->mbox_lock); return 0; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 992fa0b82e8d2d..ebb56eb0d18cfd 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -272,6 +272,8 @@ static void cgx_notify_pfs(struct cgx_link_event *event, struct rvu *rvu) otx2_mbox_msg_send_up(&rvu->afpf_wq_info.mbox_up, pfid); + otx2_mbox_wait_for_rsp(&rvu->afpf_wq_info.mbox_up, pfid); + mutex_unlock(&rvu->mbox_lock); } while (pfmap); } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_rep.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_rep.c index 052ae5923e3a85..32953cca108c80 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_rep.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_rep.c @@ -60,6 +60,8 @@ static int rvu_rep_up_notify(struct rvu *rvu, struct rep_event *event) otx2_mbox_msg_send_up(&rvu->afpf_wq_info.mbox_up, pf); + otx2_mbox_wait_for_rsp(&rvu->afpf_wq_info.mbox_up, pf); + mutex_unlock(&rvu->mbox_lock); return 0; } From 17bada47e9ac3621c28e0ffa89af8279fedea00a Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Wed, 21 May 2025 14:17:05 +0200 Subject: [PATCH 2376/2924] vsock/virtio: fix `rx_bytes` accounting for stream sockets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 45ca7e9f0730ae36fc610e675b990e9cc9ca0714 ] In `struct virtio_vsock_sock`, we maintain two counters: - `rx_bytes`: used internally to track how many bytes have been read. This supports mechanisms like .stream_has_data() and sock_rcvlowat(). - `fwd_cnt`: used for the credit mechanism to inform available receive buffer space to the remote peer. These counters are updated via virtio_transport_inc_rx_pkt() and virtio_transport_dec_rx_pkt(). Since the beginning with commit 06a8fc78367d ("VSOCK: Introduce virtio_vsock_common.ko"), we call virtio_transport_dec_rx_pkt() in virtio_transport_stream_do_dequeue() only when we consume the entire packet, so partial reads, do not update `rx_bytes` and `fwd_cnt`. This is fine for `fwd_cnt`, because we still have space used for the entire packet, and we don't want to update the credit for the other peer until we free the space of the entire packet. However, this causes `rx_bytes` to be stale on partial reads. Previously, this didn’t cause issues because `rx_bytes` was used only by .stream_has_data(), and any unread portion of a packet implied data was still available. However, since commit 93b808876682 ("virtio/vsock: fix logic which reduces credit update messages"), we now rely on `rx_bytes` to determine if a credit update should be sent when the data in the RX queue drops below SO_RCVLOWAT value. This patch fixes the accounting by updating `rx_bytes` with the number of bytes actually read, even on partial reads, while leaving `fwd_cnt` untouched until the packet is fully consumed. Also introduce a new `buf_used` counter to check that the remote peer is honoring the given credit; this was previously done via `rx_bytes`. Fixes: 93b808876682 ("virtio/vsock: fix logic which reduces credit update messages") Signed-off-by: Stefano Garzarella Link: https://patch.msgid.link/20250521121705.196379-1-sgarzare@redhat.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- include/linux/virtio_vsock.h | 1 + net/vmw_vsock/virtio_transport_common.c | 26 +++++++++++++++---------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 0387d64e2c66c6..36fb3edfa403d9 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -140,6 +140,7 @@ struct virtio_vsock_sock { u32 last_fwd_cnt; u32 rx_bytes; u32 buf_alloc; + u32 buf_used; struct sk_buff_head rx_queue; u32 msg_count; }; diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 7f7de6d8809655..2c9b1011cdcc80 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -441,18 +441,20 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, u32 len) { - if (vvs->rx_bytes + len > vvs->buf_alloc) + if (vvs->buf_used + len > vvs->buf_alloc) return false; vvs->rx_bytes += len; + vvs->buf_used += len; return true; } static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs, - u32 len) + u32 bytes_read, u32 bytes_dequeued) { - vvs->rx_bytes -= len; - vvs->fwd_cnt += len; + vvs->rx_bytes -= bytes_read; + vvs->buf_used -= bytes_dequeued; + vvs->fwd_cnt += bytes_dequeued; } void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *skb) @@ -581,11 +583,11 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, size_t len) { struct virtio_vsock_sock *vvs = vsk->trans; - size_t bytes, total = 0; struct sk_buff *skb; u32 fwd_cnt_delta; bool low_rx_bytes; int err = -EFAULT; + size_t total = 0; u32 free_space; spin_lock_bh(&vvs->rx_lock); @@ -597,6 +599,8 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, } while (total < len && !skb_queue_empty(&vvs->rx_queue)) { + size_t bytes, dequeued = 0; + skb = skb_peek(&vvs->rx_queue); bytes = min_t(size_t, len - total, @@ -620,12 +624,12 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, VIRTIO_VSOCK_SKB_CB(skb)->offset += bytes; if (skb->len == VIRTIO_VSOCK_SKB_CB(skb)->offset) { - u32 pkt_len = le32_to_cpu(virtio_vsock_hdr(skb)->len); - - virtio_transport_dec_rx_pkt(vvs, pkt_len); + dequeued = le32_to_cpu(virtio_vsock_hdr(skb)->len); __skb_unlink(skb, &vvs->rx_queue); consume_skb(skb); } + + virtio_transport_dec_rx_pkt(vvs, bytes, dequeued); } fwd_cnt_delta = vvs->fwd_cnt - vvs->last_fwd_cnt; @@ -781,7 +785,7 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, msg->msg_flags |= MSG_EOR; } - virtio_transport_dec_rx_pkt(vvs, pkt_len); + virtio_transport_dec_rx_pkt(vvs, pkt_len, pkt_len); kfree_skb(skb); } @@ -1735,6 +1739,7 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto struct sock *sk = sk_vsock(vsk); struct virtio_vsock_hdr *hdr; struct sk_buff *skb; + u32 pkt_len; int off = 0; int err; @@ -1752,7 +1757,8 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) vvs->msg_count--; - virtio_transport_dec_rx_pkt(vvs, le32_to_cpu(hdr->len)); + pkt_len = le32_to_cpu(hdr->len); + virtio_transport_dec_rx_pkt(vvs, pkt_len, pkt_len); spin_unlock_bh(&vvs->rx_lock); virtio_transport_send_credit_update(vsk); From 8b05aa3692e45b8249379dc52b14acc6a104d2e5 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 21 May 2025 14:36:02 +0300 Subject: [PATCH 2377/2924] RDMA/cma: Fix hang when cma_netevent_callback fails to queue_work [ Upstream commit 92a251c3df8ea1991cd9fe00f1ab0cfce18d7711 ] The cited commit fixed a crash when cma_netevent_callback was called for a cma_id while work on that id from a previous call had not yet started. The work item was re-initialized in the second call, which corrupted the work item currently in the work queue. However, it left a problem when queue_work fails (because the item is still pending in the work queue from a previous call). In this case, cma_id_put (which is called in the work handler) is therefore not called. This results in a userspace process hang (zombie process). Fix this by calling cma_id_put() if queue_work fails. Fixes: 45f5dcdd0497 ("RDMA/cma: Fix workqueue crash in cma_netevent_work_handler") Link: https://patch.msgid.link/r/4f3640b501e48d0166f312a64fdadf72b059bd04.1747827103.git.leon@kernel.org Signed-off-by: Jack Morgenstein Signed-off-by: Feng Liu Reviewed-by: Vlad Dumitrescu Signed-off-by: Leon Romanovsky Reviewed-by: Sharath Srinivasan Reviewed-by: Kalesh AP Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/cma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index ab31eefa916b3e..274cfbd5aaba76 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -5245,7 +5245,8 @@ static int cma_netevent_callback(struct notifier_block *self, neigh->ha, ETH_ALEN)) continue; cma_id_get(current_id); - queue_work(cma_wq, ¤t_id->id.net_work); + if (!queue_work(cma_wq, ¤t_id->id.net_work)) + cma_id_put(current_id); } out: spin_unlock_irqrestore(&id_table_lock, flags); From 33b58a27ba69cb33bc247bac9e10da5f9ad9cb64 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Wed, 21 May 2025 14:41:59 +0200 Subject: [PATCH 2378/2924] net: lan966x: Fix 1-step timestamping over ipv4 or ipv6 [ Upstream commit 57ee9584fd8606deef66d7b65fa4dcf94f6843aa ] When enabling 1-step timestamping for ptp frames that are over udpv4 or udpv6 then the inserted timestamp is added at the wrong offset in the frame, meaning that will modify the frame at the wrong place, so the frame will be malformed. To fix this, the HW needs to know which kind of frame it is to know where to insert the timestamp. For that there is a field in the IFH that says the PDU_TYPE, which can be NONE which is the default value, IPV4 or IPV6. Therefore make sure to set the PDU_TYPE so the HW knows where to insert the timestamp. Like I mention before the issue is not seen with L2 frames because by default the PDU_TYPE has a value of 0, which represents the L2 frames. Fixes: 77eecf25bd9d2f ("net: lan966x: Update extraction/injection for timestamping") Signed-off-by: Horatiu Vultur Link: https://patch.msgid.link/20250521124159.2713525-1-horatiu.vultur@microchip.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../ethernet/microchip/lan966x/lan966x_main.c | 6 +++ .../ethernet/microchip/lan966x/lan966x_main.h | 5 ++ .../ethernet/microchip/lan966x/lan966x_ptp.c | 49 ++++++++++++++----- 3 files changed, 47 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 0af143ec0f8694..427bdc0e4908c6 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -353,6 +353,11 @@ static void lan966x_ifh_set_rew_op(void *ifh, u64 rew_op) lan966x_ifh_set(ifh, rew_op, IFH_POS_REW_CMD, IFH_WID_REW_CMD); } +static void lan966x_ifh_set_oam_type(void *ifh, u64 oam_type) +{ + lan966x_ifh_set(ifh, oam_type, IFH_POS_PDU_TYPE, IFH_WID_PDU_TYPE); +} + static void lan966x_ifh_set_timestamp(void *ifh, u64 timestamp) { lan966x_ifh_set(ifh, timestamp, IFH_POS_TIMESTAMP, IFH_WID_TIMESTAMP); @@ -380,6 +385,7 @@ static netdev_tx_t lan966x_port_xmit(struct sk_buff *skb, return err; lan966x_ifh_set_rew_op(ifh, LAN966X_SKB_CB(skb)->rew_op); + lan966x_ifh_set_oam_type(ifh, LAN966X_SKB_CB(skb)->pdu_type); lan966x_ifh_set_timestamp(ifh, LAN966X_SKB_CB(skb)->ts_id); } diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h index 1efa584e710777..1f9df67f05044b 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h @@ -75,6 +75,10 @@ #define IFH_REW_OP_ONE_STEP_PTP 0x3 #define IFH_REW_OP_TWO_STEP_PTP 0x4 +#define IFH_PDU_TYPE_NONE 0 +#define IFH_PDU_TYPE_IPV4 7 +#define IFH_PDU_TYPE_IPV6 8 + #define FDMA_RX_DCB_MAX_DBS 1 #define FDMA_TX_DCB_MAX_DBS 1 @@ -254,6 +258,7 @@ struct lan966x_phc { struct lan966x_skb_cb { u8 rew_op; + u8 pdu_type; u16 ts_id; unsigned long jiffies; }; diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c index 63905bb5a63a83..87e5e81d40dc68 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c @@ -322,34 +322,55 @@ void lan966x_ptp_hwtstamp_get(struct lan966x_port *port, *cfg = phc->hwtstamp_config; } -static int lan966x_ptp_classify(struct lan966x_port *port, struct sk_buff *skb) +static void lan966x_ptp_classify(struct lan966x_port *port, struct sk_buff *skb, + u8 *rew_op, u8 *pdu_type) { struct ptp_header *header; u8 msgtype; int type; - if (port->ptp_tx_cmd == IFH_REW_OP_NOOP) - return IFH_REW_OP_NOOP; + if (port->ptp_tx_cmd == IFH_REW_OP_NOOP) { + *rew_op = IFH_REW_OP_NOOP; + *pdu_type = IFH_PDU_TYPE_NONE; + return; + } type = ptp_classify_raw(skb); - if (type == PTP_CLASS_NONE) - return IFH_REW_OP_NOOP; + if (type == PTP_CLASS_NONE) { + *rew_op = IFH_REW_OP_NOOP; + *pdu_type = IFH_PDU_TYPE_NONE; + return; + } header = ptp_parse_header(skb, type); - if (!header) - return IFH_REW_OP_NOOP; + if (!header) { + *rew_op = IFH_REW_OP_NOOP; + *pdu_type = IFH_PDU_TYPE_NONE; + return; + } - if (port->ptp_tx_cmd == IFH_REW_OP_TWO_STEP_PTP) - return IFH_REW_OP_TWO_STEP_PTP; + if (type & PTP_CLASS_L2) + *pdu_type = IFH_PDU_TYPE_NONE; + if (type & PTP_CLASS_IPV4) + *pdu_type = IFH_PDU_TYPE_IPV4; + if (type & PTP_CLASS_IPV6) + *pdu_type = IFH_PDU_TYPE_IPV6; + + if (port->ptp_tx_cmd == IFH_REW_OP_TWO_STEP_PTP) { + *rew_op = IFH_REW_OP_TWO_STEP_PTP; + return; + } /* If it is sync and run 1 step then set the correct operation, * otherwise run as 2 step */ msgtype = ptp_get_msgtype(header, type); - if ((msgtype & 0xf) == 0) - return IFH_REW_OP_ONE_STEP_PTP; + if ((msgtype & 0xf) == 0) { + *rew_op = IFH_REW_OP_ONE_STEP_PTP; + return; + } - return IFH_REW_OP_TWO_STEP_PTP; + *rew_op = IFH_REW_OP_TWO_STEP_PTP; } static void lan966x_ptp_txtstamp_old_release(struct lan966x_port *port) @@ -374,10 +395,12 @@ int lan966x_ptp_txtstamp_request(struct lan966x_port *port, { struct lan966x *lan966x = port->lan966x; unsigned long flags; + u8 pdu_type; u8 rew_op; - rew_op = lan966x_ptp_classify(port, skb); + lan966x_ptp_classify(port, skb, &rew_op, &pdu_type); LAN966X_SKB_CB(skb)->rew_op = rew_op; + LAN966X_SKB_CB(skb)->pdu_type = pdu_type; if (rew_op != IFH_REW_OP_TWO_STEP_PTP) return 0; From cc94336dc3f3b2ac05ff7b093af71eb5c6e406cd Mon Sep 17 00:00:00 2001 From: Suraj Gupta Date: Wed, 21 May 2025 23:46:08 +0530 Subject: [PATCH 2379/2924] net: xilinx: axienet: Fix Tx skb circular buffer occupancy check in dmaengine xmit [ Upstream commit 32374234ab0101881e7d0c6a8ef7ebce566c46c9 ] In Dmaengine flow, driver maintains struct skbuf_dma_descriptor rings each element of which corresponds to a skb. In Tx datapath, compare available space in skb ring with number of skbs instead of skb fragments. Replace x * (MAX_SKB_FRAGS) in netif_txq_completed_wake() and netif_txq_maybe_stop() with x * (1 skb) to fix the comparison. Fixes: 6a91b846af85 ("net: axienet: Introduce dmaengine support") Signed-off-by: Suraj Gupta Reviewed-by: Sean Anderson Link: https://patch.msgid.link/20250521181608.669554-1-suraj.gupta2@amd.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 054abf283ab33e..5f912b27bfd7fc 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -880,7 +880,7 @@ static void axienet_dma_tx_cb(void *data, const struct dmaengine_result *result) dev_consume_skb_any(skbuf_dma->skb); netif_txq_completed_wake(txq, 1, len, CIRC_SPACE(lp->tx_ring_head, lp->tx_ring_tail, TX_BD_NUM_MAX), - 2 * MAX_SKB_FRAGS); + 2); } /** @@ -914,7 +914,7 @@ axienet_start_xmit_dmaengine(struct sk_buff *skb, struct net_device *ndev) dma_dev = lp->tx_chan->device; sg_len = skb_shinfo(skb)->nr_frags + 1; - if (CIRC_SPACE(lp->tx_ring_head, lp->tx_ring_tail, TX_BD_NUM_MAX) <= sg_len) { + if (CIRC_SPACE(lp->tx_ring_head, lp->tx_ring_tail, TX_BD_NUM_MAX) <= 1) { netif_stop_queue(ndev); if (net_ratelimit()) netdev_warn(ndev, "TX ring unexpectedly full\n"); @@ -964,7 +964,7 @@ axienet_start_xmit_dmaengine(struct sk_buff *skb, struct net_device *ndev) txq = skb_get_tx_queue(lp->ndev, skb); netdev_tx_sent_queue(txq, skb->len); netif_txq_maybe_stop(txq, CIRC_SPACE(lp->tx_ring_head, lp->tx_ring_tail, TX_BD_NUM_MAX), - MAX_SKB_FRAGS + 1, 2 * MAX_SKB_FRAGS); + 1, 2); dmaengine_submit(dma_tx_desc); dma_async_issue_pending(lp->tx_chan); From 2dd4781c5af99415ebbd2f7cc763feb109863c05 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 21 May 2025 20:11:28 -0700 Subject: [PATCH 2380/2924] af_packet: move notifier's packet_dev_mc out of rcu critical section [ Upstream commit d8d85ef0a631df9127f202e6371bb33a0b589952 ] Syzkaller reports the following issue: BUG: sleeping function called from invalid context at kernel/locking/mutex.c:578 __mutex_lock+0x106/0xe80 kernel/locking/mutex.c:746 team_change_rx_flags+0x38/0x220 drivers/net/team/team_core.c:1781 dev_change_rx_flags net/core/dev.c:9145 [inline] __dev_set_promiscuity+0x3f8/0x590 net/core/dev.c:9189 netif_set_promiscuity+0x50/0xe0 net/core/dev.c:9201 dev_set_promiscuity+0x126/0x260 net/core/dev_api.c:286 packet_dev_mc net/packet/af_packet.c:3698 [inline] packet_dev_mclist_delete net/packet/af_packet.c:3722 [inline] packet_notifier+0x292/0xa60 net/packet/af_packet.c:4247 notifier_call_chain+0x1b3/0x3e0 kernel/notifier.c:85 call_netdevice_notifiers_extack net/core/dev.c:2214 [inline] call_netdevice_notifiers net/core/dev.c:2228 [inline] unregister_netdevice_many_notify+0x15d8/0x2330 net/core/dev.c:11972 rtnl_delete_link net/core/rtnetlink.c:3522 [inline] rtnl_dellink+0x488/0x710 net/core/rtnetlink.c:3564 rtnetlink_rcv_msg+0x7cf/0xb70 net/core/rtnetlink.c:6955 netlink_rcv_skb+0x219/0x490 net/netlink/af_netlink.c:2534 Calling `PACKET_ADD_MEMBERSHIP` on an ops-locked device can trigger the `NETDEV_UNREGISTER` notifier, which may require disabling promiscuous and/or allmulti mode. Both of these operations require acquiring the netdev instance lock. Move the call to `packet_dev_mc` outside of the RCU critical section. The `mclist` modifications (add, del, flush, unregister) are protected by the RTNL, not the RCU. The RCU only protects the `sklist` and its associated `sks`. The delayed operation on the `mclist` entry remains within the RTNL. Reported-by: syzbot+b191b5ccad8d7a986286@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=b191b5ccad8d7a986286 Fixes: ad7c7b2172c3 ("net: hold netdev instance lock during sysfs operations") Signed-off-by: Stanislav Fomichev Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250522031129.3247266-1-stfomichev@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/packet/af_packet.c | 21 ++++++++++++++++----- net/packet/internal.h | 1 + 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d4dba06297c33e..20be2c47cf4191 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3713,15 +3713,15 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, } static void packet_dev_mclist_delete(struct net_device *dev, - struct packet_mclist **mlp) + struct packet_mclist **mlp, + struct list_head *list) { struct packet_mclist *ml; while ((ml = *mlp) != NULL) { if (ml->ifindex == dev->ifindex) { - packet_dev_mc(dev, ml, -1); + list_add(&ml->remove_list, list); *mlp = ml->next; - kfree(ml); } else mlp = &ml->next; } @@ -3769,6 +3769,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) memcpy(i->addr, mreq->mr_address, i->alen); memset(i->addr + i->alen, 0, sizeof(i->addr) - i->alen); i->count = 1; + INIT_LIST_HEAD(&i->remove_list); i->next = po->mclist; po->mclist = i; err = packet_dev_mc(dev, i, 1); @@ -4233,9 +4234,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, static int packet_notifier(struct notifier_block *this, unsigned long msg, void *ptr) { - struct sock *sk; struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); + struct packet_mclist *ml, *tmp; + LIST_HEAD(mclist); + struct sock *sk; rcu_read_lock(); sk_for_each_rcu(sk, &net->packet.sklist) { @@ -4244,7 +4247,8 @@ static int packet_notifier(struct notifier_block *this, switch (msg) { case NETDEV_UNREGISTER: if (po->mclist) - packet_dev_mclist_delete(dev, &po->mclist); + packet_dev_mclist_delete(dev, &po->mclist, + &mclist); fallthrough; case NETDEV_DOWN: @@ -4277,6 +4281,13 @@ static int packet_notifier(struct notifier_block *this, } } rcu_read_unlock(); + + /* packet_dev_mc might grab instance locks so can't run under rcu */ + list_for_each_entry_safe(ml, tmp, &mclist, remove_list) { + packet_dev_mc(dev, ml, -1); + kfree(ml); + } + return NOTIFY_DONE; } diff --git a/net/packet/internal.h b/net/packet/internal.h index d5d70712007ad3..1e743d0316fdda 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -11,6 +11,7 @@ struct packet_mclist { unsigned short type; unsigned short alen; unsigned char addr[MAX_ADDR_LEN]; + struct list_head remove_list; }; /* kbdq - kernel block descriptor queue */ From 920b6720bb63893b81516c0c45884a8350f9e4bf Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 22 Apr 2025 13:33:45 +0300 Subject: [PATCH 2381/2924] virtio-pci: Fix result size returned for the admin command completion [ Upstream commit 9ef41ebf787fcbde99ac404ae473f8467641f983 ] The result size returned by virtio_pci_admin_dev_parts_get() is 8 bytes larger than the actual result data size. This occurs because the result_sg_size field of the command is filled with the result length from virtqueue_get_buf(), which includes both the data size and an additional 8 bytes of status. This oversized result size causes two issues: 1. The state transferred to the destination includes 8 bytes of extra data at the end. 2. The allocated buffer in the kernel may be smaller than the returned size, leading to failures when reading beyond the allocated size. The commit fixes this by subtracting the status size from the result of virtqueue_get_buf(). This fix has been tested through live migrations with virtio-net, virtio-net-transitional, and virtio-blk devices. Fixes: 704806ca400e ("virtio: Extend the admin command to include the result size") Signed-off-by: Israel Rukshin Reviewed-by: Parav Pandit Reviewed-by: Max Gurtovoy Message-Id: <1745318025-23103-1-git-send-email-israelr@nvidia.com> Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/virtio/virtio_pci_modern.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index d50fe030d82534..7182f43ed05515 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -48,6 +48,7 @@ void vp_modern_avq_done(struct virtqueue *vq) { struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); struct virtio_pci_admin_vq *admin_vq = &vp_dev->admin_vq; + unsigned int status_size = sizeof(struct virtio_admin_cmd_status); struct virtio_admin_cmd *cmd; unsigned long flags; unsigned int len; @@ -56,7 +57,17 @@ void vp_modern_avq_done(struct virtqueue *vq) do { virtqueue_disable_cb(vq); while ((cmd = virtqueue_get_buf(vq, &len))) { - cmd->result_sg_size = len; + /* If the number of bytes written by the device is less + * than the size of struct virtio_admin_cmd_status, the + * remaining status bytes will remain zero-initialized, + * since the buffer was zeroed during allocation. + * In this case, set the size of command_specific_result + * to 0. + */ + if (len < status_size) + cmd->result_sg_size = 0; + else + cmd->result_sg_size = len - status_size; complete(&cmd->completion); } } while (!virtqueue_enable_cb(vq)); From 0b9bb52796b239de6792d0d68cdc6eb505ebff96 Mon Sep 17 00:00:00 2001 From: KaFai Wan Date: Mon, 26 May 2025 21:33:58 +0800 Subject: [PATCH 2382/2924] bpf: Avoid __bpf_prog_ret0_warn when jit fails [ Upstream commit 86bc9c742426a16b52a10ef61f5b721aecca2344 ] syzkaller reported an issue: WARNING: CPU: 3 PID: 217 at kernel/bpf/core.c:2357 __bpf_prog_ret0_warn+0xa/0x20 kernel/bpf/core.c:2357 Modules linked in: CPU: 3 UID: 0 PID: 217 Comm: kworker/u32:6 Not tainted 6.15.0-rc4-syzkaller-00040-g8bac8898fe39 RIP: 0010:__bpf_prog_ret0_warn+0xa/0x20 kernel/bpf/core.c:2357 Call Trace: bpf_dispatcher_nop_func include/linux/bpf.h:1316 [inline] __bpf_prog_run include/linux/filter.h:718 [inline] bpf_prog_run include/linux/filter.h:725 [inline] cls_bpf_classify+0x74a/0x1110 net/sched/cls_bpf.c:105 ... When creating bpf program, 'fp->jit_requested' depends on bpf_jit_enable. This issue is triggered because of CONFIG_BPF_JIT_ALWAYS_ON is not set and bpf_jit_enable is set to 1, causing the arch to attempt JIT the prog, but jit failed due to FAULT_INJECTION. As a result, incorrectly treats the program as valid, when the program runs it calls `__bpf_prog_ret0_warn` and triggers the WARN_ON_ONCE(1). Reported-by: syzbot+0903f6d7f285e41cdf10@syzkaller.appspotmail.com Closes: https://lore.kernel.org/bpf/6816e34e.a70a0220.254cdc.002c.GAE@google.com Fixes: fa9dd599b4da ("bpf: get rid of pure_initcall dependency to enable jits") Signed-off-by: KaFai Wan Link: https://lore.kernel.org/r/20250526133358.2594176-1-mannkafai@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index a3e57168842119..c20babbf998f4e 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2474,7 +2474,7 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) /* In case of BPF to BPF calls, verifier did all the prep * work with regards to JITing, etc. */ - bool jit_needed = false; + bool jit_needed = fp->jit_requested; if (fp->bpf_func) goto finalize; From ac49b7560b4b08b1e4043a29214cc7ad77644c00 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 23 May 2025 21:13:35 -0700 Subject: [PATCH 2383/2924] bpf: Do not include stack ptr register in precision backtracking bookkeeping [ Upstream commit e2d2115e56c4a02377189bfc3a9a7933552a7b0f ] Yi Lai reported an issue ([1]) where the following warning appears in kernel dmesg: [ 60.643604] verifier backtracking bug [ 60.643635] WARNING: CPU: 10 PID: 2315 at kernel/bpf/verifier.c:4302 __mark_chain_precision+0x3a6c/0x3e10 [ 60.648428] Modules linked in: bpf_testmod(OE) [ 60.650471] CPU: 10 UID: 0 PID: 2315 Comm: test_progs Tainted: G OE 6.15.0-rc4-gef11287f8289-dirty #327 PREEMPT(full) [ 60.654385] Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE [ 60.656682] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [ 60.660475] RIP: 0010:__mark_chain_precision+0x3a6c/0x3e10 [ 60.662814] Code: 5a 30 84 89 ea e8 c4 d9 01 00 80 3d 3e 7d d8 04 00 0f 85 60 fa ff ff c6 05 31 7d d8 04 01 48 c7 c7 00 58 30 84 e8 c4 06 a5 ff <0f> 0b e9 46 fa ff ff 48 ... [ 60.668720] RSP: 0018:ffff888116cc7298 EFLAGS: 00010246 [ 60.671075] RAX: 54d70e82dfd31900 RBX: ffff888115b65e20 RCX: 0000000000000000 [ 60.673659] RDX: 0000000000000001 RSI: 0000000000000004 RDI: 00000000ffffffff [ 60.676241] RBP: 0000000000000400 R08: ffff8881f6f23bd3 R09: 1ffff1103ede477a [ 60.678787] R10: dffffc0000000000 R11: ffffed103ede477b R12: ffff888115b60ae8 [ 60.681420] R13: 1ffff11022b6cbc4 R14: 00000000fffffff2 R15: 0000000000000001 [ 60.684030] FS: 00007fc2aedd80c0(0000) GS:ffff88826fa8a000(0000) knlGS:0000000000000000 [ 60.686837] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 60.689027] CR2: 000056325369e000 CR3: 000000011088b002 CR4: 0000000000370ef0 [ 60.691623] Call Trace: [ 60.692821] [ 60.693960] ? __pfx_verbose+0x10/0x10 [ 60.695656] ? __pfx_disasm_kfunc_name+0x10/0x10 [ 60.697495] check_cond_jmp_op+0x16f7/0x39b0 [ 60.699237] do_check+0x58fa/0xab10 ... Further analysis shows the warning is at line 4302 as below: 4294 /* static subprog call instruction, which 4295 * means that we are exiting current subprog, 4296 * so only r1-r5 could be still requested as 4297 * precise, r0 and r6-r10 or any stack slot in 4298 * the current frame should be zero by now 4299 */ 4300 if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) { 4301 verbose(env, "BUG regs %x\n", bt_reg_mask(bt)); 4302 WARN_ONCE(1, "verifier backtracking bug"); 4303 return -EFAULT; 4304 } With the below test (also in the next patch): __used __naked static void __bpf_jmp_r10(void) { asm volatile ( "r2 = 2314885393468386424 ll;" "goto +0;" "if r2 <= r10 goto +3;" "if r1 >= -1835016 goto +0;" "if r2 <= 8 goto +0;" "if r3 <= 0 goto +0;" "exit;" ::: __clobber_all); } SEC("?raw_tp") __naked void bpf_jmp_r10(void) { asm volatile ( "r3 = 0 ll;" "call __bpf_jmp_r10;" "r0 = 0;" "exit;" ::: __clobber_all); } The following is the verifier failure log: 0: (18) r3 = 0x0 ; R3_w=0 2: (85) call pc+2 caller: R10=fp0 callee: frame1: R1=ctx() R3_w=0 R10=fp0 5: frame1: R1=ctx() R3_w=0 R10=fp0 ; asm volatile (" \ @ verifier_precision.c:184 5: (18) r2 = 0x20202000256c6c78 ; frame1: R2_w=0x20202000256c6c78 7: (05) goto pc+0 8: (bd) if r2 <= r10 goto pc+3 ; frame1: R2_w=0x20202000256c6c78 R10=fp0 9: (35) if r1 >= 0xffe3fff8 goto pc+0 ; frame1: R1=ctx() 10: (b5) if r2 <= 0x8 goto pc+0 mark_precise: frame1: last_idx 10 first_idx 0 subseq_idx -1 mark_precise: frame1: regs=r2 stack= before 9: (35) if r1 >= 0xffe3fff8 goto pc+0 mark_precise: frame1: regs=r2 stack= before 8: (bd) if r2 <= r10 goto pc+3 mark_precise: frame1: regs=r2,r10 stack= before 7: (05) goto pc+0 mark_precise: frame1: regs=r2,r10 stack= before 5: (18) r2 = 0x20202000256c6c78 mark_precise: frame1: regs=r10 stack= before 2: (85) call pc+2 BUG regs 400 The main failure reason is due to r10 in precision backtracking bookkeeping. Actually r10 is always precise and there is no need to add it for the precision backtracking bookkeeping. One way to fix the issue is to prevent bt_set_reg() if any src/dst reg is r10. Andrii suggested to go with push_insn_history() approach to avoid explicitly checking r10 in backtrack_insn(). This patch added push_insn_history() support for cond_jmp like 'rX rY' operations. In check_cond_jmp_op(), if any of rX or rY is a stack pointer, push_insn_history() will record such information, and later backtrack_insn() will do bt_set_reg() properly for those register(s). [1] https://lore.kernel.org/bpf/Z%2F8q3xzpU59CIYQE@ly-workstation/ Reported by: Yi Lai Fixes: 407958a0e980 ("bpf: encapsulate precision backtracking bookkeeping") Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250524041335.4046126-1-yonghong.song@linux.dev Signed-off-by: Sasha Levin --- include/linux/bpf_verifier.h | 12 ++++++++---- kernel/bpf/verifier.c | 18 ++++++++++++++++-- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 9734544b6957cf..d1f02f8e3e55f4 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -356,7 +356,11 @@ enum { INSN_F_SPI_MASK = 0x3f, /* 6 bits */ INSN_F_SPI_SHIFT = 3, /* shifted 3 bits to the left */ - INSN_F_STACK_ACCESS = BIT(9), /* we need 10 bits total */ + INSN_F_STACK_ACCESS = BIT(9), + + INSN_F_DST_REG_STACK = BIT(10), /* dst_reg is PTR_TO_STACK */ + INSN_F_SRC_REG_STACK = BIT(11), /* src_reg is PTR_TO_STACK */ + /* total 12 bits are used now. */ }; static_assert(INSN_F_FRAMENO_MASK + 1 >= MAX_CALL_FRAMES); @@ -365,9 +369,9 @@ static_assert(INSN_F_SPI_MASK + 1 >= MAX_BPF_STACK / 8); struct bpf_insn_hist_entry { u32 idx; /* insn idx can't be bigger than 1 million */ - u32 prev_idx : 22; - /* special flags, e.g., whether insn is doing register stack spill/load */ - u32 flags : 10; + u32 prev_idx : 20; + /* special INSN_F_xxx flags */ + u32 flags : 12; /* additional registers that need precision tracking when this * jump is backtracked, vector of six 10-bit records */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 54c6953a8b84c2..efa70141171290 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4413,8 +4413,10 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, * before it would be equally necessary to * propagate it to dreg. */ - bt_set_reg(bt, dreg); - bt_set_reg(bt, sreg); + if (!hist || !(hist->flags & INSN_F_SRC_REG_STACK)) + bt_set_reg(bt, sreg); + if (!hist || !(hist->flags & INSN_F_DST_REG_STACK)) + bt_set_reg(bt, dreg); } else if (BPF_SRC(insn->code) == BPF_K) { /* dreg K * Only dreg still needs precision before @@ -16377,6 +16379,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_reg_state *eq_branch_regs; struct linked_regs linked_regs = {}; u8 opcode = BPF_OP(insn->code); + int insn_flags = 0; bool is_jmp32; int pred = -1; int err; @@ -16435,6 +16438,9 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, insn->src_reg); return -EACCES; } + + if (src_reg->type == PTR_TO_STACK) + insn_flags |= INSN_F_SRC_REG_STACK; } else { if (insn->src_reg != BPF_REG_0) { verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); @@ -16446,6 +16452,14 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, __mark_reg_known(src_reg, insn->imm); } + if (dst_reg->type == PTR_TO_STACK) + insn_flags |= INSN_F_DST_REG_STACK; + if (insn_flags) { + err = push_insn_history(env, this_branch, insn_flags, 0); + if (err) + return err; + } + is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; pred = is_branch_taken(dst_reg, src_reg, opcode, is_jmp32); if (pred >= 0) { From 034bc4a2a72dea2cfcaf24c6bae03c38ad5a0b87 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Fri, 23 May 2025 16:37:59 +0800 Subject: [PATCH 2384/2924] net: phy: clear phydev->devlink when the link is deleted [ Upstream commit 0795b05a59b1371b18ffbf09d385296b12e9f5d5 ] There is a potential crash issue when disabling and re-enabling the network port. When disabling the network port, phy_detach() calls device_link_del() to remove the device link, but it does not clear phydev->devlink, so phydev->devlink is not a NULL pointer. Then the network port is re-enabled, but if phy_attach_direct() fails before calling device_link_add(), the code jumps to the "error" label and calls phy_detach(). Since phydev->devlink retains the old value from the previous attach/detach cycle, device_link_del() uses the old value, which accesses a NULL pointer and causes a crash. The simplified crash log is as follows. [ 24.702421] Call trace: [ 24.704856] device_link_put_kref+0x20/0x120 [ 24.709124] device_link_del+0x30/0x48 [ 24.712864] phy_detach+0x24/0x168 [ 24.716261] phy_attach_direct+0x168/0x3a4 [ 24.720352] phylink_fwnode_phy_connect+0xc8/0x14c [ 24.725140] phylink_of_phy_connect+0x1c/0x34 Therefore, phydev->devlink needs to be cleared when the device link is deleted. Fixes: bc66fa87d4fd ("net: phy: Add link between phy dev and mac dev") Signed-off-by: Wei Fang Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250523083759.3741168-1-wei.fang@nxp.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/phy/phy_device.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index cc1bfd22fb8120..7d5e76a3db0e94 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1727,8 +1727,10 @@ void phy_detach(struct phy_device *phydev) struct module *ndev_owner = NULL; struct mii_bus *bus; - if (phydev->devlink) + if (phydev->devlink) { device_link_del(phydev->devlink); + phydev->devlink = NULL; + } if (phydev->sysfs_links) { if (dev) From 4318108368d55ce1bf79341718095aa66a236758 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 24 May 2025 09:29:11 +0200 Subject: [PATCH 2385/2924] net: airoha: Fix an error handling path in airoha_alloc_gdm_port() [ Upstream commit c59783780c8ad66f6076a9a7c74df3e006e29519 ] If register_netdev() fails, the error handling path of the probe will not free the memory allocated by the previous airoha_metadata_dst_alloc() call because port->dev->reg_state will not be NETREG_REGISTERED. So, an explicit airoha_metadata_dst_free() call is needed in this case to avoid a memory leak. Fixes: af3cf757d5c9 ("net: airoha: Move DSA tag in DMA descriptor") Signed-off-by: Christophe JAILLET Acked-by: Lorenzo Bianconi Reviewed-by: Simon Horman Link: https://patch.msgid.link/1b94b91345017429ed653e2f05d25620dc2823f9.1746715755.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/airoha/airoha_eth.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 1e9ab65218ff14..c169b8b411b4f5 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -2541,7 +2541,15 @@ static int airoha_alloc_gdm_port(struct airoha_eth *eth, if (err) return err; - return register_netdev(dev); + err = register_netdev(dev); + if (err) + goto free_metadata_dst; + + return 0; + +free_metadata_dst: + airoha_metadata_dst_free(port); + return err; } static int airoha_probe(struct platform_device *pdev) From 4adeac4a1f3b7a99ce4ef0c18b1ba75539e58ea9 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 26 May 2025 10:44:33 +0800 Subject: [PATCH 2386/2924] net: mctp: start tx queue on netdev open [ Upstream commit 126cd7852a62c6fab11a4a4cb6fa96421929ab69 ] We stop queues in ndo_stop, so they need to be restarted in ndo_open. This allows us to resume tx after a link down/up cycle. Suggested-by: Nitin Singh Fixes: 0791c0327a6e ("net: mctp: Add MCTP USB transport driver") Signed-off-by: Jeremy Kerr Link: https://patch.msgid.link/20250526-dev-mctp-usb-v1-1-c7bd6cb75aa0@codeconstruct.com.au Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/mctp/mctp-usb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/mctp/mctp-usb.c b/drivers/net/mctp/mctp-usb.c index e8d4b01c3f3458..775a386d0aca12 100644 --- a/drivers/net/mctp/mctp-usb.c +++ b/drivers/net/mctp/mctp-usb.c @@ -257,6 +257,8 @@ static int mctp_usb_open(struct net_device *dev) WRITE_ONCE(mctp_usb->stopped, false); + netif_start_queue(dev); + return mctp_usb_rx_queue(mctp_usb, GFP_KERNEL); } From 41fcc5b78df9e227e652551fbd598b9e13b187c2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 22 May 2025 13:21:47 +0200 Subject: [PATCH 2387/2924] net: phy: fix up const issues in to_mdio_device() and to_phy_device() [ Upstream commit e9cb929670a1e98b592b30f03f06e9e20110f318 ] Both to_mdio_device() and to_phy_device() "throw away" the const pointer attribute passed to them and return a non-const pointer, which generally is not a good thing overall. Fix this up by using container_of_const() which was designed for this very problem. Cc: Alexander Lobakin Cc: Andrew Lunn Cc: Heiner Kallweit Cc: Russell King Fixes: 7eab14de73a8 ("mdio, phy: fix -Wshadow warnings triggered by nested container_of()") Signed-off-by: Greg Kroah-Hartman Link: https://patch.msgid.link/2025052246-conduit-glory-8fc9@gregkh Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/linux/mdio.h | 5 +---- include/linux/phy.h | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 3c3deac57894ef..e43ff9f980a46b 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -45,10 +45,7 @@ struct mdio_device { unsigned int reset_deassert_delay; }; -static inline struct mdio_device *to_mdio_device(const struct device *dev) -{ - return container_of(dev, struct mdio_device, dev); -} +#define to_mdio_device(__dev) container_of_const(__dev, struct mdio_device, dev) /* struct mdio_driver_common: Common to all MDIO drivers */ struct mdio_driver_common { diff --git a/include/linux/phy.h b/include/linux/phy.h index a2bfae80c44975..bef68f6af99a92 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -744,10 +744,7 @@ struct phy_device { #define PHY_F_NO_IRQ 0x80000000 #define PHY_F_RXC_ALWAYS_ON 0x40000000 -static inline struct phy_device *to_phy_device(const struct device *dev) -{ - return container_of(to_mdio_device(dev), struct phy_device, mdio); -} +#define to_phy_device(__dev) container_of_const(to_mdio_device(__dev), struct phy_device, mdio) /** * struct phy_tdr_config - Configuration of a TDR raw test From 53ae47782c0dd8bbae2ed3b9780a4a9d673a5a49 Mon Sep 17 00:00:00 2001 From: Thangaraj Samynathan Date: Mon, 26 May 2025 11:00:47 +0530 Subject: [PATCH 2388/2924] net: lan743x: rename lan743x_reset_phy to lan743x_hw_reset_phy [ Upstream commit 68927eb52d0af04863584930db06075d2610e194 ] rename the function to lan743x_hw_reset_phy to better describe it operation. Fixes: 23f0703c125be ("lan743x: Add main source files for new lan743x driver") Signed-off-by: Thangaraj Samynathan Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250526053048.287095-2-thangaraj.s@microchip.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/microchip/lan743x_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index a70b88037a208b..da5bd54208dd54 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -1330,7 +1330,7 @@ static int lan743x_mac_set_mtu(struct lan743x_adapter *adapter, int new_mtu) } /* PHY */ -static int lan743x_phy_reset(struct lan743x_adapter *adapter) +static int lan743x_hw_reset_phy(struct lan743x_adapter *adapter) { u32 data; @@ -1348,7 +1348,7 @@ static int lan743x_phy_reset(struct lan743x_adapter *adapter) static int lan743x_phy_init(struct lan743x_adapter *adapter) { - return lan743x_phy_reset(adapter); + return lan743x_hw_reset_phy(adapter); } static void lan743x_phy_interface_select(struct lan743x_adapter *adapter) From be0cb02add593d6dc228fc736bfe0426bbcea885 Mon Sep 17 00:00:00 2001 From: Thangaraj Samynathan Date: Mon, 26 May 2025 11:00:48 +0530 Subject: [PATCH 2389/2924] net: lan743x: Fix PHY reset handling during initialization and WOL [ Upstream commit 82d1096ca8b5dbb3158d707e6fb3ad21c3403a49 ] Remove lan743x_phy_init from lan743x_hardware_init as it resets the PHY registers, causing WOL to fail on subsequent attempts. Add a call to lan743x_hw_reset_phy in the probe function to ensure the PHY is reset during device initialization. Fixes: 23f0703c125be ("lan743x: Add main source files for new lan743x driver") Signed-off-by: Thangaraj Samynathan Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250526053048.287095-3-thangaraj.s@microchip.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/microchip/lan743x_main.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index da5bd54208dd54..7f36443832ada3 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -1346,11 +1346,6 @@ static int lan743x_hw_reset_phy(struct lan743x_adapter *adapter) 50000, 1000000); } -static int lan743x_phy_init(struct lan743x_adapter *adapter) -{ - return lan743x_hw_reset_phy(adapter); -} - static void lan743x_phy_interface_select(struct lan743x_adapter *adapter) { u32 id_rev; @@ -3534,10 +3529,6 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, if (ret) return ret; - ret = lan743x_phy_init(adapter); - if (ret) - return ret; - ret = lan743x_ptp_init(adapter); if (ret) return ret; @@ -3674,6 +3665,10 @@ static int lan743x_pcidev_probe(struct pci_dev *pdev, if (ret) goto cleanup_pci; + ret = lan743x_hw_reset_phy(adapter); + if (ret) + goto cleanup_pci; + ret = lan743x_hardware_init(adapter, pdev); if (ret) goto cleanup_pci; From cdbabd316c5a4a9b0fda6aafe491e2db17fbb95d Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Thu, 22 May 2025 13:57:22 +0200 Subject: [PATCH 2390/2924] net: phy: mscc: Fix memory leak when using one step timestamping [ Upstream commit 846992645b25ec4253167e3f931e4597eb84af56 ] Fix memory leak when running one-step timestamping. When running one-step sync timestamping, the HW is configured to insert the TX time into the frame, so there is no reason to keep the skb anymore. As in this case the HW will never generate an interrupt to say that the frame was timestamped, then the frame will never released. Fix this by freeing the frame in case of one-step timestamping. Fixes: 7d272e63e0979d ("net: phy: mscc: timestamping and PHC support") Signed-off-by: Horatiu Vultur Link: https://patch.msgid.link/20250522115722.2827199-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/phy/mscc/mscc_ptp.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/mscc/mscc_ptp.c b/drivers/net/phy/mscc/mscc_ptp.c index ed8fb14a7f215e..6f96f2679f0bf7 100644 --- a/drivers/net/phy/mscc/mscc_ptp.c +++ b/drivers/net/phy/mscc/mscc_ptp.c @@ -1166,18 +1166,24 @@ static void vsc85xx_txtstamp(struct mii_timestamper *mii_ts, container_of(mii_ts, struct vsc8531_private, mii_ts); if (!vsc8531->ptp->configured) - return; + goto out; - if (vsc8531->ptp->tx_type == HWTSTAMP_TX_OFF) { - kfree_skb(skb); - return; - } + if (vsc8531->ptp->tx_type == HWTSTAMP_TX_OFF) + goto out; + + if (vsc8531->ptp->tx_type == HWTSTAMP_TX_ONESTEP_SYNC) + if (ptp_msg_is_sync(skb, type)) + goto out; skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; mutex_lock(&vsc8531->ts_lock); __skb_queue_tail(&vsc8531->ptp->tx_queue, skb); mutex_unlock(&vsc8531->ts_lock); + return; + +out: + kfree_skb(skb); } static bool vsc85xx_rxtstamp(struct mii_timestamper *mii_ts, From 49ef400435071d14ebdc1577de923cb90e4e7f8c Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Thu, 22 May 2025 15:17:41 +0530 Subject: [PATCH 2391/2924] octeontx2-pf: QOS: Perform cache sync on send queue teardown [ Upstream commit 479c58016099d19686e36f6c50f912360839a7fa ] QOS is designed to create a new send queue whenever a class is created, ensuring proper shaping and scheduling. However, when multiple send queues are created and deleted in a loop, SMMU errors are observed. This patch addresses the issue by performing an data cache sync during the teardown of QOS send queues. Fixes: ab6dddd2a669 ("octeontx2-pf: qos send queues management") Signed-off-by: Hariprasad Kelam Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250522094742.1498295-1-hkelam@marvell.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../ethernet/marvell/octeontx2/nic/qos_sq.c | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c index c5dbae0e513b64..58d572ce08eff5 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c @@ -256,6 +256,26 @@ int otx2_qos_enable_sq(struct otx2_nic *pfvf, int qidx) return err; } +static int otx2_qos_nix_npa_ndc_sync(struct otx2_nic *pfvf) +{ + struct ndc_sync_op *req; + int rc; + + mutex_lock(&pfvf->mbox.lock); + + req = otx2_mbox_alloc_msg_ndc_sync_op(&pfvf->mbox); + if (!req) { + mutex_unlock(&pfvf->mbox.lock); + return -ENOMEM; + } + + req->nix_lf_tx_sync = true; + req->npa_lf_sync = true; + rc = otx2_sync_mbox_msg(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); + return rc; +} + void otx2_qos_disable_sq(struct otx2_nic *pfvf, int qidx) { struct otx2_qset *qset = &pfvf->qset; @@ -285,6 +305,8 @@ void otx2_qos_disable_sq(struct otx2_nic *pfvf, int qidx) otx2_qos_sqb_flush(pfvf, sq_idx); otx2_smq_flush(pfvf, otx2_get_smq_idx(pfvf, sq_idx)); + /* NIX/NPA NDC sync */ + otx2_qos_nix_npa_ndc_sync(pfvf); otx2_cleanup_tx_cqes(pfvf, cq); mutex_lock(&pfvf->mbox.lock); From f1fca0eae5a0573f226f46c6871260278e7dda12 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Thu, 22 May 2025 17:28:42 +0530 Subject: [PATCH 2392/2924] octeontx2-pf: QOS: Refactor TC_HTB_LEAF_DEL_LAST callback [ Upstream commit 67af4ec948e8ce3ea53a9cf614d01fddf172e56d ] This patch addresses below issues, 1. Active traffic on the leaf node must be stopped before its send queue is reassigned to the parent. This patch resolves the issue by marking the node as 'Inner'. 2. During a system reboot, the interface receives TC_HTB_LEAF_DEL and TC_HTB_LEAF_DEL_LAST callbacks to delete its HTB queues. In the case of TC_HTB_LEAF_DEL_LAST, although the same send queue is reassigned to the parent, the current logic still attempts to update the real number of queues, leadning to below warnings New queues can't be registered after device unregistration. WARNING: CPU: 0 PID: 6475 at net/core/net-sysfs.c:1714 netdev_queue_update_kobjects+0x1e4/0x200 Fixes: 5e6808b4c68d ("octeontx2-pf: Add support for HTB offload") Signed-off-by: Hariprasad Kelam Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250522115842.1499666-1-hkelam@marvell.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/nic/qos.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c index 35acc07bd96489..5765bac119f0e7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c @@ -1638,6 +1638,7 @@ static int otx2_qos_leaf_del_last(struct otx2_nic *pfvf, u16 classid, bool force if (!node->is_static) dwrr_del_node = true; + WRITE_ONCE(node->qid, OTX2_QOS_QID_INNER); /* destroy the leaf node */ otx2_qos_disable_sq(pfvf, qid); otx2_qos_destroy_node(pfvf, node); @@ -1682,9 +1683,6 @@ static int otx2_qos_leaf_del_last(struct otx2_nic *pfvf, u16 classid, bool force } kfree(new_cfg); - /* update tx_real_queues */ - otx2_qos_update_tx_netdev_queues(pfvf); - return 0; } From e2ec310c7a50271843c585e27ef14e48c66ce649 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 22 May 2025 15:18:56 -0700 Subject: [PATCH 2393/2924] calipso: Don't call calipso functions for AF_INET sk. [ Upstream commit 6e9f2df1c550ead7cecb3e450af1105735020c92 ] syzkaller reported a null-ptr-deref in txopt_get(). [0] The offset 0x70 was of struct ipv6_txoptions in struct ipv6_pinfo, so struct ipv6_pinfo was NULL there. However, this never happens for IPv6 sockets as inet_sk(sk)->pinet6 is always set in inet6_create(), meaning the socket was not IPv6 one. The root cause is missing validation in netlbl_conn_setattr(). netlbl_conn_setattr() switches branches based on struct sockaddr.sa_family, which is passed from userspace. However, netlbl_conn_setattr() does not check if the address family matches the socket. The syzkaller must have called connect() for an IPv6 address on an IPv4 socket. We have a proper validation in tcp_v[46]_connect(), but security_socket_connect() is called in the earlier stage. Let's copy the validation to netlbl_conn_setattr(). [0]: Oops: general protection fault, probably for non-canonical address 0xdffffc000000000e: 0000 [#1] PREEMPT SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000070-0x0000000000000077] CPU: 2 UID: 0 PID: 12928 Comm: syz.9.1677 Not tainted 6.12.0 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 RIP: 0010:txopt_get include/net/ipv6.h:390 [inline] RIP: 0010: Code: 02 00 00 49 8b ac 24 f8 02 00 00 e8 84 69 2a fd e8 ff 00 16 fd 48 8d 7d 70 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 53 02 00 00 48 8b 6d 70 48 85 ed 0f 84 ab 01 00 RSP: 0018:ffff88811b8afc48 EFLAGS: 00010212 RAX: dffffc0000000000 RBX: 1ffff11023715f8a RCX: ffffffff841ab00c RDX: 000000000000000e RSI: ffffc90007d9e000 RDI: 0000000000000070 RBP: 0000000000000000 R08: ffffed1023715f9d R09: ffffed1023715f9e R10: ffffed1023715f9d R11: 0000000000000003 R12: ffff888123075f00 R13: ffff88810245bd80 R14: ffff888113646780 R15: ffff888100578a80 FS: 00007f9019bd7640(0000) GS:ffff8882d2d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f901b927bac CR3: 0000000104788003 CR4: 0000000000770ef0 PKRU: 80000000 Call Trace: calipso_sock_setattr+0x56/0x80 net/netlabel/netlabel_calipso.c:557 netlbl_conn_setattr+0x10c/0x280 net/netlabel/netlabel_kapi.c:1177 selinux_netlbl_socket_connect_helper+0xd3/0x1b0 security/selinux/netlabel.c:569 selinux_netlbl_socket_connect_locked security/selinux/netlabel.c:597 [inline] selinux_netlbl_socket_connect+0xb6/0x100 security/selinux/netlabel.c:615 selinux_socket_connect+0x5f/0x80 security/selinux/hooks.c:4931 security_socket_connect+0x50/0xa0 security/security.c:4598 __sys_connect_file+0xa4/0x190 net/socket.c:2067 __sys_connect+0x12c/0x170 net/socket.c:2088 __do_sys_connect net/socket.c:2098 [inline] __se_sys_connect net/socket.c:2095 [inline] __x64_sys_connect+0x73/0xb0 net/socket.c:2095 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xaa/0x1b0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f901b61a12d Code: 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f9019bd6fa8 EFLAGS: 00000246 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 00007f901b925fa0 RCX: 00007f901b61a12d RDX: 000000000000001c RSI: 0000200000000140 RDI: 0000000000000003 RBP: 00007f901b701505 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 00007f901b5b62a0 R15: 00007f9019bb7000 Modules linked in: Fixes: ceba1832b1b2 ("calipso: Set the calipso socket label to match the secattr.") Reported-by: syzkaller Reported-by: John Cheung Closes: https://lore.kernel.org/netdev/CAP=Rh=M1LzunrcQB1fSGauMrJrhL6GGps5cPAKzHJXj6GQV+-g@mail.gmail.com/ Signed-off-by: Kuniyuki Iwashima Acked-by: Paul Moore Link: https://patch.msgid.link/20250522221858.91240-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netlabel/netlabel_kapi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index cd9160bbc91974..6ea16138582c0b 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -1165,6 +1165,9 @@ int netlbl_conn_setattr(struct sock *sk, break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: + if (sk->sk_family != AF_INET6) + return -EAFNOSUPPORT; + addr6 = (struct sockaddr_in6 *)addr; entry = netlbl_domhsh_getentry_af6(secattr->domain, &addr6->sin6_addr); From 3c1906a3d50cb94fd0a10e97a1c0a40c0f033cb7 Mon Sep 17 00:00:00 2001 From: Faicker Mo Date: Fri, 23 May 2025 03:41:43 +0000 Subject: [PATCH 2394/2924] net: openvswitch: Fix the dead loop of MPLS parse [ Upstream commit 0bdc924bfb319fb10d1113cbf091fc26fb7b1f99 ] The unexpected MPLS packet may not end with the bottom label stack. When there are many stacks, The label count value has wrapped around. A dead loop occurs, soft lockup/CPU stuck finally. stack backtrace: UBSAN: array-index-out-of-bounds in /build/linux-0Pa0xK/linux-5.15.0/net/openvswitch/flow.c:662:26 index -1 is out of range for type '__be32 [3]' CPU: 34 PID: 0 Comm: swapper/34 Kdump: loaded Tainted: G OE 5.15.0-121-generic #131-Ubuntu Hardware name: Dell Inc. PowerEdge C6420/0JP9TF, BIOS 2.12.2 07/14/2021 Call Trace: show_stack+0x52/0x5c dump_stack_lvl+0x4a/0x63 dump_stack+0x10/0x16 ubsan_epilogue+0x9/0x36 __ubsan_handle_out_of_bounds.cold+0x44/0x49 key_extract_l3l4+0x82a/0x840 [openvswitch] ? kfree_skbmem+0x52/0xa0 key_extract+0x9c/0x2b0 [openvswitch] ovs_flow_key_extract+0x124/0x350 [openvswitch] ovs_vport_receive+0x61/0xd0 [openvswitch] ? kernel_init_free_pages.part.0+0x4a/0x70 ? get_page_from_freelist+0x353/0x540 netdev_port_receive+0xc4/0x180 [openvswitch] ? netdev_port_receive+0x180/0x180 [openvswitch] netdev_frame_hook+0x1f/0x40 [openvswitch] __netif_receive_skb_core.constprop.0+0x23a/0xf00 __netif_receive_skb_list_core+0xfa/0x240 netif_receive_skb_list_internal+0x18e/0x2a0 napi_complete_done+0x7a/0x1c0 bnxt_poll+0x155/0x1c0 [bnxt_en] __napi_poll+0x30/0x180 net_rx_action+0x126/0x280 ? bnxt_msix+0x67/0x80 [bnxt_en] handle_softirqs+0xda/0x2d0 irq_exit_rcu+0x96/0xc0 common_interrupt+0x8e/0xa0 Fixes: fbdcdd78da7c ("Change in Openvswitch to support MPLS label depth of 3 in ingress direction") Signed-off-by: Faicker Mo Acked-by: Ilya Maximets Reviewed-by: Aaron Conole Link: https://patch.msgid.link/259D3404-575D-4A6D-B263-1DF59A67CF89@zenlayer.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/openvswitch/flow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 8a848ce72e2910..b80bd3a9077397 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -788,7 +788,7 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key) memset(&key->ipv4, 0, sizeof(key->ipv4)); } } else if (eth_p_mpls(key->eth.type)) { - u8 label_count = 1; + size_t label_count = 1; memset(&key->mpls, 0, sizeof(key->mpls)); skb_set_inner_network_header(skb, skb->mac_len); From 33fe531cb20ce07f78cf9d8605cf2579d3d75794 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Fri, 23 May 2025 10:27:16 +0200 Subject: [PATCH 2395/2924] net: phy: mscc: Stop clearing the the UDPv4 checksum for L2 frames [ Upstream commit 57a92d14659df3e7e7e0052358c8cc68bbbc3b5e ] We have noticed that when PHY timestamping is enabled, L2 frames seems to be modified by changing two 2 bytes with a value of 0. The place were these 2 bytes seems to be random(or I couldn't find a pattern). In most of the cases the userspace can ignore these frames but if for example those 2 bytes are in the correction field there is nothing to do. This seems to happen when configuring the HW for IPv4 even that the flow is not enabled. These 2 bytes correspond to the UDPv4 checksum and once we don't enable clearing the checksum when using L2 frames then the frame doesn't seem to be changed anymore. Fixes: 7d272e63e0979d ("net: phy: mscc: timestamping and PHC support") Signed-off-by: Horatiu Vultur Link: https://patch.msgid.link/20250523082716.2935895-1-horatiu.vultur@microchip.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/phy/mscc/mscc_ptp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/mscc/mscc_ptp.c b/drivers/net/phy/mscc/mscc_ptp.c index 6f96f2679f0bf7..6b800081eed52f 100644 --- a/drivers/net/phy/mscc/mscc_ptp.c +++ b/drivers/net/phy/mscc/mscc_ptp.c @@ -946,7 +946,9 @@ static int vsc85xx_ip1_conf(struct phy_device *phydev, enum ts_blk blk, /* UDP checksum offset in IPv4 packet * according to: https://tools.ietf.org/html/rfc768 */ - val |= IP1_NXT_PROT_UDP_CHKSUM_OFF(26) | IP1_NXT_PROT_UDP_CHKSUM_CLEAR; + val |= IP1_NXT_PROT_UDP_CHKSUM_OFF(26); + if (enable) + val |= IP1_NXT_PROT_UDP_CHKSUM_CLEAR; vsc85xx_ts_write_csr(phydev, blk, MSCC_ANA_IP1_NXT_PROT_UDP_CHKSUM, val); From b0802f3bee526c58494170fb64afdf893a0415c4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 23 May 2025 11:25:45 +0800 Subject: [PATCH 2396/2924] f2fs: fix to skip f2fs_balance_fs() if checkpoint is disabled [ Upstream commit c836d3b8d94e3dd381e7d4bb918875084f85715e ] Syzbot reports a f2fs bug as below: INFO: task syz-executor328:5856 blocked for more than 144 seconds. Not tainted 6.15.0-rc6-syzkaller-00208-g3c21441eeffc #0 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:syz-executor328 state:D stack:24392 pid:5856 tgid:5832 ppid:5826 task_flags:0x400040 flags:0x00004006 Call Trace: context_switch kernel/sched/core.c:5382 [inline] __schedule+0x168f/0x4c70 kernel/sched/core.c:6767 __schedule_loop kernel/sched/core.c:6845 [inline] schedule+0x165/0x360 kernel/sched/core.c:6860 io_schedule+0x81/0xe0 kernel/sched/core.c:7742 f2fs_balance_fs+0x4b4/0x780 fs/f2fs/segment.c:444 f2fs_map_blocks+0x3af1/0x43b0 fs/f2fs/data.c:1791 f2fs_expand_inode_data+0x653/0xaf0 fs/f2fs/file.c:1872 f2fs_fallocate+0x4f5/0x990 fs/f2fs/file.c:1975 vfs_fallocate+0x6a0/0x830 fs/open.c:338 ioctl_preallocate fs/ioctl.c:290 [inline] file_ioctl fs/ioctl.c:-1 [inline] do_vfs_ioctl+0x1b8f/0x1eb0 fs/ioctl.c:885 __do_sys_ioctl fs/ioctl.c:904 [inline] __se_sys_ioctl+0x82/0x170 fs/ioctl.c:892 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xf6/0x210 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f The root cause is after commit 84b5bb8bf0f6 ("f2fs: modify f2fs_is_checkpoint_ready logic to allow more data to be written with the CP disable"), we will get chance to allow f2fs_is_checkpoint_ready() to return true once below conditions are all true: 1. checkpoint is disabled 2. there are not enough free segments 3. there are enough free blocks Then it will cause f2fs_balance_fs() to trigger foreground GC. void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) ... if (!f2fs_is_checkpoint_ready(sbi)) return; And the testcase mounts f2fs image w/ gc_merge,checkpoint=disable, so deadloop will happen through below race condition: - f2fs_do_shutdown - vfs_fallocate - gc_thread_func - file_start_write - __sb_start_write(SB_FREEZE_WRITE) - f2fs_fallocate - f2fs_expand_inode_data - f2fs_map_blocks - f2fs_balance_fs - prepare_to_wait - wake_up(gc_wait_queue_head) - io_schedule - bdev_freeze - freeze_super - sb->s_writers.frozen = SB_FREEZE_WRITE; - sb_wait_write(sb, SB_FREEZE_WRITE); - if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) continue; : cause deadloop This patch fix to add check condition in f2fs_balance_fs(), so that if checkpoint is disabled, we will just skip trigger foreground GC to avoid such deadloop issue. Meanwhile let's remove f2fs_is_checkpoint_ready() check condition in f2fs_balance_fs(), since it's redundant, due to the main logic in the function is to check: a) whether checkpoint is disabled b) there is enough free segments f2fs_balance_fs() still has all logics after f2fs_is_checkpoint_ready()'s removal. Reported-by: syzbot+aa5bb5f6860e08a60450@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-f2fs-devel/682d743a.a00a0220.29bc26.0289.GAE@google.com Fixes: 84b5bb8bf0f6 ("f2fs: modify f2fs_is_checkpoint_ready logic to allow more data to be written with the CP disable") Cc: Qi Han Signed-off-by: Chao Yu Reviewed-by: Zhiguo Niu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2a71e70c9ac977..41ca73622c8d46 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -424,7 +424,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) if (need && excess_cached_nats(sbi)) f2fs_balance_fs_bg(sbi, false); - if (!f2fs_is_checkpoint_ready(sbi)) + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return; /* From 59471c123498efedc36ae2780f29c1cde13b583b Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Wed, 14 May 2025 16:45:48 +0800 Subject: [PATCH 2397/2924] f2fs: use d_inode(dentry) cleanup dentry->d_inode [ Upstream commit a6c397a31f58a1d577c2c8d04b624e9baa31951c ] no logic changes. Signed-off-by: Zhiguo Niu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Stable-dep-of: 9883494c45a1 ("f2fs: fix to correct check conditions in f2fs_cross_rename") Signed-off-by: Sasha Levin --- fs/f2fs/namei.c | 8 ++++---- fs/f2fs/super.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 8f8b9b843bdf4b..faa5191cf4ca6d 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -414,7 +414,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, if (is_inode_flag_set(dir, FI_PROJ_INHERIT) && (!projid_eq(F2FS_I(dir)->i_projid, - F2FS_I(old_dentry->d_inode)->i_projid))) + F2FS_I(inode)->i_projid))) return -EXDEV; err = f2fs_dquot_initialize(dir); @@ -914,7 +914,7 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && (!projid_eq(F2FS_I(new_dir)->i_projid, - F2FS_I(old_dentry->d_inode)->i_projid))) + F2FS_I(old_inode)->i_projid))) return -EXDEV; /* @@ -1107,10 +1107,10 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && !projid_eq(F2FS_I(new_dir)->i_projid, - F2FS_I(old_dentry->d_inode)->i_projid)) || + F2FS_I(old_inode)->i_projid)) || (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && !projid_eq(F2FS_I(old_dir)->i_projid, - F2FS_I(new_dentry->d_inode)->i_projid))) + F2FS_I(new_inode)->i_projid))) return -EXDEV; err = f2fs_dquot_initialize(old_dir); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 7a3bc85df6a7a9..386326f7a440eb 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1882,9 +1882,9 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_fsid = u64_to_fsid(id); #ifdef CONFIG_QUOTA - if (is_inode_flag_set(dentry->d_inode, FI_PROJ_INHERIT) && + if (is_inode_flag_set(d_inode(dentry), FI_PROJ_INHERIT) && sb_has_quota_limits_enabled(sb, PRJQUOTA)) { - f2fs_statfs_project(sb, F2FS_I(dentry->d_inode)->i_projid, buf); + f2fs_statfs_project(sb, F2FS_I(d_inode(dentry))->i_projid, buf); } #endif return 0; From 9389c0bee53e084a9b702fa341e4d0da55139c4c Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Wed, 14 May 2025 16:45:49 +0800 Subject: [PATCH 2398/2924] f2fs: fix to correct check conditions in f2fs_cross_rename [ Upstream commit 9883494c45a13dc88d27dde4f988c04823b42a2f ] Should be "old_dir" here. Fixes: 5c57132eaf52 ("f2fs: support project quota") Signed-off-by: Zhiguo Niu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index faa5191cf4ca6d..28137d499f8f65 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -1108,7 +1108,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && !projid_eq(F2FS_I(new_dir)->i_projid, F2FS_I(old_inode)->i_projid)) || - (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && + (is_inode_flag_set(old_dir, FI_PROJ_INHERIT) && !projid_eq(F2FS_I(old_dir)->i_projid, F2FS_I(new_inode)->i_projid))) return -EXDEV; From 3eba8ec6cd0ad62bbaff55f76586f86d25758f26 Mon Sep 17 00:00:00 2001 From: Mark Kettenis Date: Thu, 9 Jan 2025 21:52:31 +0100 Subject: [PATCH 2399/2924] arm64: dts: qcom: x1e80100: Mark usb_2 as dma-coherent [ Upstream commit 45bd6ff900cfe5038e2718a900f153ded3fa5392 ] Make this USB controller consistent with the others on this platform. Fixes: 4af46b7bd66f ("arm64: dts: qcom: x1e80100: Add USB nodes") Signed-off-by: Mark Kettenis Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250109205232.92336-1-kettenis@openbsd.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/x1e80100.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi index 5aeecf711340d2..607d32f68c3406 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi @@ -4815,6 +4815,8 @@ snps,dis-u1-entry-quirk; snps,dis-u2-entry-quirk; + dma-coherent; + ports { #address-cells = <1>; #size-cells = <0>; From 3926326d5b7b50a4c33975d412a6f5e09dfdddbd Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 3 Feb 2025 14:23:18 +0100 Subject: [PATCH 2400/2924] arm64: dts: qcom: sm8650: setup gpu thermal with higher temperatures [ Upstream commit 2250f65b32565eb8b757e89248c75977f370f498 ] On the SM8650, the dynamic clock and voltage scaling (DCVS) for the GPU is done from the HLOS, but the GPU can achieve a much higher temperature before failing according the reference downstream implementation. Set higher temperatures in the GPU trip points corresponding to the temperatures provided by Qualcomm in the dowstream source, much closer to the junction temperature and with a higher critical temperature trip in the case the HLOS DCVS cannot handle the temperature surge. The tsens MAX_THRESHOLD is set to 120C on those platforms, so set the hot to 110C to leave a chance to HLOS to react and critical to 115C to avoid the monitor thermal shutdown. Fixes: 497624ed5506 ("arm64: dts: qcom: sm8650: Throttle the GPU when overheating") Signed-off-by: Neil Armstrong Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250203-topic-sm8650-thermal-cpu-idle-v4-2-65e35f307301@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sm8650.dtsi | 64 ++++++++++++++-------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8650.dtsi b/arch/arm64/boot/dts/qcom/sm8650.dtsi index c8a2a76a98f000..9591c13edbb9d4 100644 --- a/arch/arm64/boot/dts/qcom/sm8650.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8650.dtsi @@ -6537,20 +6537,20 @@ trips { gpu0_alert0: trip-point0 { - temperature = <85000>; + temperature = <95000>; hysteresis = <1000>; type = "passive"; }; trip-point1 { - temperature = <90000>; + temperature = <110000>; hysteresis = <1000>; type = "hot"; }; trip-point2 { - temperature = <110000>; - hysteresis = <1000>; + temperature = <115000>; + hysteresis = <0>; type = "critical"; }; }; @@ -6570,20 +6570,20 @@ trips { gpu1_alert0: trip-point0 { - temperature = <85000>; + temperature = <95000>; hysteresis = <1000>; type = "passive"; }; trip-point1 { - temperature = <90000>; + temperature = <110000>; hysteresis = <1000>; type = "hot"; }; trip-point2 { - temperature = <110000>; - hysteresis = <1000>; + temperature = <115000>; + hysteresis = <0>; type = "critical"; }; }; @@ -6603,20 +6603,20 @@ trips { gpu2_alert0: trip-point0 { - temperature = <85000>; + temperature = <95000>; hysteresis = <1000>; type = "passive"; }; trip-point1 { - temperature = <90000>; + temperature = <110000>; hysteresis = <1000>; type = "hot"; }; trip-point2 { - temperature = <110000>; - hysteresis = <1000>; + temperature = <115000>; + hysteresis = <0>; type = "critical"; }; }; @@ -6636,20 +6636,20 @@ trips { gpu3_alert0: trip-point0 { - temperature = <85000>; + temperature = <95000>; hysteresis = <1000>; type = "passive"; }; trip-point1 { - temperature = <90000>; + temperature = <110000>; hysteresis = <1000>; type = "hot"; }; trip-point2 { - temperature = <110000>; - hysteresis = <1000>; + temperature = <115000>; + hysteresis = <0>; type = "critical"; }; }; @@ -6669,20 +6669,20 @@ trips { gpu4_alert0: trip-point0 { - temperature = <85000>; + temperature = <95000>; hysteresis = <1000>; type = "passive"; }; trip-point1 { - temperature = <90000>; + temperature = <110000>; hysteresis = <1000>; type = "hot"; }; trip-point2 { - temperature = <110000>; - hysteresis = <1000>; + temperature = <115000>; + hysteresis = <0>; type = "critical"; }; }; @@ -6702,20 +6702,20 @@ trips { gpu5_alert0: trip-point0 { - temperature = <85000>; + temperature = <95000>; hysteresis = <1000>; type = "passive"; }; trip-point1 { - temperature = <90000>; + temperature = <110000>; hysteresis = <1000>; type = "hot"; }; trip-point2 { - temperature = <110000>; - hysteresis = <1000>; + temperature = <115000>; + hysteresis = <0>; type = "critical"; }; }; @@ -6735,20 +6735,20 @@ trips { gpu6_alert0: trip-point0 { - temperature = <85000>; + temperature = <95000>; hysteresis = <1000>; type = "passive"; }; trip-point1 { - temperature = <90000>; + temperature = <110000>; hysteresis = <1000>; type = "hot"; }; trip-point2 { - temperature = <110000>; - hysteresis = <1000>; + temperature = <115000>; + hysteresis = <0>; type = "critical"; }; }; @@ -6768,20 +6768,20 @@ trips { gpu7_alert0: trip-point0 { - temperature = <85000>; + temperature = <95000>; hysteresis = <1000>; type = "passive"; }; trip-point1 { - temperature = <90000>; + temperature = <110000>; hysteresis = <1000>; type = "hot"; }; trip-point2 { - temperature = <110000>; - hysteresis = <1000>; + temperature = <115000>; + hysteresis = <0>; type = "critical"; }; }; From b567568eb7c31e11a39c0169682fa423c68aff32 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 28 Jan 2025 12:53:32 +0100 Subject: [PATCH 2401/2924] arm64: dts: qcom: sa8775p: Partially revert "arm64: dts: qcom: sa8775p: add QCrypto nodes" [ Upstream commit 92979f12a201c54ea94b8b3c9f0737c33bb45e23 ] Partially revert commit 7ff3da43ef44 ("arm64: dts: qcom: sa8775p: add QCrypto nodes") by dropping the untested QCE device node. Devicetree bindings test failures were reported on mailing list on 16th of January and after two weeks still no fixes: sa8775p-ride.dtb: crypto@1dfa000: compatible: 'oneOf' conditional failed, one must be fixed: ... 'qcom,sa8775p-qce' is not one of ['qcom,ipq4019-qce', 'qcom,sm8150-qce'] Reported-by: Rob Herring Closes: https://lore.kernel.org/all/CAL_JsqJG_w9jyWjVR=QnPuJganG4uj9+9cEXZ__UAiCw2ZYZZA@mail.gmail.com/ Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250128115333.95021-1-krzysztof.kozlowski@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sa8775p.dtsi | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sa8775p.dtsi b/arch/arm64/boot/dts/qcom/sa8775p.dtsi index 2329460b210381..2010b7988b6cc4 100644 --- a/arch/arm64/boot/dts/qcom/sa8775p.dtsi +++ b/arch/arm64/boot/dts/qcom/sa8775p.dtsi @@ -2420,17 +2420,6 @@ <&apps_smmu 0x481 0x00>; }; - crypto: crypto@1dfa000 { - compatible = "qcom,sa8775p-qce", "qcom,qce"; - reg = <0x0 0x01dfa000 0x0 0x6000>; - dmas = <&cryptobam 4>, <&cryptobam 5>; - dma-names = "rx", "tx"; - iommus = <&apps_smmu 0x480 0x00>, - <&apps_smmu 0x481 0x00>; - interconnects = <&aggre2_noc MASTER_CRYPTO_CORE0 0 &mc_virt SLAVE_EBI1 0>; - interconnect-names = "memory"; - }; - stm: stm@4002000 { compatible = "arm,coresight-stm", "arm,primecell"; reg = <0x0 0x4002000 0x0 0x1000>, From 7cf1c47bf404e424441d0932aac79ad95e392093 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 28 Jan 2025 12:53:33 +0100 Subject: [PATCH 2402/2924] arm64: dts: qcom: qcs8300: Partially revert "arm64: dts: qcom: qcs8300: add QCrypto nodes" [ Upstream commit cdc117c40537c5babfa7f261360d5a98e434d59e ] Partially revert commit a86d84409947 ("arm64: dts: qcom: qcs8300: add QCrypto nodes") by dropping the untested QCE device node. Devicetree bindings test failures were reported on mailing list on 16th of January and after two weeks still no fixes: qcs8300-ride.dtb: crypto@1dfa000: compatible: 'oneOf' conditional failed, one must be fixed: ... 'qcom,qcs8300-qce' is not one of ['qcom,ipq4019-qce', 'qcom,sm8150-qce'] Reported-by: Rob Herring Closes: https://lore.kernel.org/all/CAL_JsqL0HzzGXnCD+z4GASeXNsBxrdw8-qyfHj8S+C2ucK6EPQ@mail.gmail.com/ Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250128115333.95021-2-krzysztof.kozlowski@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/qcs8300.dtsi | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/qcs8300.dtsi b/arch/arm64/boot/dts/qcom/qcs8300.dtsi index 4a057f7c0d9fae..13b1121cdf175b 100644 --- a/arch/arm64/boot/dts/qcom/qcs8300.dtsi +++ b/arch/arm64/boot/dts/qcom/qcs8300.dtsi @@ -798,18 +798,6 @@ <&apps_smmu 0x481 0x00>; }; - crypto: crypto@1dfa000 { - compatible = "qcom,qcs8300-qce", "qcom,qce"; - reg = <0x0 0x01dfa000 0x0 0x6000>; - dmas = <&cryptobam 4>, <&cryptobam 5>; - dma-names = "rx", "tx"; - iommus = <&apps_smmu 0x480 0x00>, - <&apps_smmu 0x481 0x00>; - interconnects = <&aggre2_noc MASTER_CRYPTO_CORE0 QCOM_ICC_TAG_ALWAYS - &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; - interconnect-names = "memory"; - }; - ice: crypto@1d88000 { compatible = "qcom,qcs8300-inline-crypto-engine", "qcom,inline-crypto-engine"; From cdf4a8eefdd8fd3ac24c4a72ceda85e298775b5a Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Wed, 15 Jan 2025 14:43:53 +0100 Subject: [PATCH 2403/2924] arm64: dts: qcom: sm8550: use ICC tag for all interconnect phandles [ Upstream commit 54df5e52777e1126862778a2796c3809df85acd7 ] Use the proper QCOM_ICC_TAG_ define instead of passing 0 in all interconnect paths phandle third argument. Use QCOM_ICC_TAG_ALWAYS which is the fallback mask if 0 is used as third phandle argument. Signed-off-by: Neil Armstrong Acked-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250115-topic-sm8x50-upstream-dt-icc-update-v1-1-eaa8b10e2af7@linaro.org Signed-off-by: Bjorn Andersson Stable-dep-of: 327d489d1eca ("arm64: dts: qcom: sm8550: add missing cpu-cfg interconnect path in the mdss node") Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sm8550.dtsi | 387 ++++++++++++++++++--------- 1 file changed, 258 insertions(+), 129 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8550.dtsi b/arch/arm64/boot/dts/qcom/sm8550.dtsi index ac3e00ad417719..9465b00f1e74cd 100644 --- a/arch/arm64/boot/dts/qcom/sm8550.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8550.dtsi @@ -331,7 +331,8 @@ scm: scm { compatible = "qcom,scm-sm8550", "qcom,scm"; qcom,dload-mode = <&tcsr 0x19000>; - interconnects = <&aggre2_noc MASTER_CRYPTO 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&aggre2_noc MASTER_CRYPTO QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; }; }; @@ -850,9 +851,12 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>, - <&aggre2_noc MASTER_QUP_2 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>, + <&aggre2_noc MASTER_QUP_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma2 0 0 QCOM_GPI_I2C>, <&gpi_dma2 1 0 QCOM_GPI_I2C>; @@ -868,9 +872,12 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&qup_spi8_data_clk>, <&qup_spi8_cs>; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>, - <&aggre2_noc MASTER_QUP_2 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>, + <&aggre2_noc MASTER_QUP_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma2 0 0 QCOM_GPI_SPI>, <&gpi_dma2 1 0 QCOM_GPI_SPI>; @@ -890,9 +897,12 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>, - <&aggre2_noc MASTER_QUP_2 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>, + <&aggre2_noc MASTER_QUP_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma2 0 1 QCOM_GPI_I2C>, <&gpi_dma2 1 1 QCOM_GPI_I2C>; @@ -908,9 +918,12 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&qup_spi9_data_clk>, <&qup_spi9_cs>; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>, - <&aggre2_noc MASTER_QUP_2 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>, + <&aggre2_noc MASTER_QUP_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma2 0 1 QCOM_GPI_SPI>, <&gpi_dma2 1 1 QCOM_GPI_SPI>; @@ -930,9 +943,12 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>, - <&aggre2_noc MASTER_QUP_2 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>, + <&aggre2_noc MASTER_QUP_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma2 0 2 QCOM_GPI_I2C>, <&gpi_dma2 1 2 QCOM_GPI_I2C>; @@ -948,9 +964,12 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&qup_spi10_data_clk>, <&qup_spi10_cs>; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>, - <&aggre2_noc MASTER_QUP_2 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>, + <&aggre2_noc MASTER_QUP_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma2 0 2 QCOM_GPI_SPI>, <&gpi_dma2 1 2 QCOM_GPI_SPI>; @@ -970,9 +989,12 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>, - <&aggre2_noc MASTER_QUP_2 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>, + <&aggre2_noc MASTER_QUP_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma2 0 3 QCOM_GPI_I2C>, <&gpi_dma2 1 3 QCOM_GPI_I2C>; @@ -988,9 +1010,12 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&qup_spi11_data_clk>, <&qup_spi11_cs>; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>, - <&aggre2_noc MASTER_QUP_2 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>, + <&aggre2_noc MASTER_QUP_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma2 0 3 QCOM_GPI_I2C>, <&gpi_dma2 1 3 QCOM_GPI_I2C>; @@ -1010,9 +1035,12 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>, - <&aggre2_noc MASTER_QUP_2 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>, + <&aggre2_noc MASTER_QUP_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma2 0 4 QCOM_GPI_I2C>, <&gpi_dma2 1 4 QCOM_GPI_I2C>; @@ -1028,9 +1056,12 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&qup_spi12_data_clk>, <&qup_spi12_cs>; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>, - <&aggre2_noc MASTER_QUP_2 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>, + <&aggre2_noc MASTER_QUP_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma2 0 4 QCOM_GPI_I2C>, <&gpi_dma2 1 4 QCOM_GPI_I2C>; @@ -1050,9 +1081,12 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>, - <&aggre2_noc MASTER_QUP_2 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>, + <&aggre2_noc MASTER_QUP_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma2 0 5 QCOM_GPI_I2C>, <&gpi_dma2 1 5 QCOM_GPI_I2C>; @@ -1068,9 +1102,12 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&qup_spi13_data_clk>, <&qup_spi13_cs>; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>, - <&aggre2_noc MASTER_QUP_2 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>, + <&aggre2_noc MASTER_QUP_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma2 0 5 QCOM_GPI_SPI>, <&gpi_dma2 1 5 QCOM_GPI_SPI>; @@ -1088,8 +1125,10 @@ pinctrl-names = "default"; pinctrl-0 = <&qup_uart14_default>, <&qup_uart14_cts_rts>; interrupts = ; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config"; status = "disabled"; }; @@ -1104,9 +1143,12 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>, - <&aggre2_noc MASTER_QUP_2 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>, + <&aggre2_noc MASTER_QUP_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma2 0 7 QCOM_GPI_I2C>, <&gpi_dma2 1 7 QCOM_GPI_I2C>; @@ -1122,9 +1164,12 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&qup_spi15_data_clk>, <&qup_spi15_cs>; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>, - <&aggre2_noc MASTER_QUP_2 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>, + <&aggre2_noc MASTER_QUP_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma2 0 7 QCOM_GPI_SPI>, <&gpi_dma2 1 7 QCOM_GPI_SPI>; @@ -1156,8 +1201,10 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_0 0 &clk_virt SLAVE_QUP_CORE_0 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_I2C 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_I2C QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config"; status = "disabled"; }; @@ -1173,8 +1220,10 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_0 0 &clk_virt SLAVE_QUP_CORE_0 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_I2C 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_I2C QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config"; status = "disabled"; }; @@ -1190,8 +1239,10 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_0 0 &clk_virt SLAVE_QUP_CORE_0 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_I2C 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_I2C QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config"; status = "disabled"; }; @@ -1207,8 +1258,10 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_0 0 &clk_virt SLAVE_QUP_CORE_0 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_I2C 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_I2C QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config"; status = "disabled"; }; @@ -1224,8 +1277,10 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_0 0 &clk_virt SLAVE_QUP_CORE_0 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_I2C 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_I2C QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config"; status = "disabled"; }; @@ -1241,8 +1296,10 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_0 0 &clk_virt SLAVE_QUP_CORE_0 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_I2C 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_I2C QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config"; status = "disabled"; }; @@ -1258,8 +1315,10 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_0 0 &clk_virt SLAVE_QUP_CORE_0 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_I2C 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_I2C QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config"; status = "disabled"; }; @@ -1275,8 +1334,10 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_0 0 &clk_virt SLAVE_QUP_CORE_0 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_I2C 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_I2C QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config"; status = "disabled"; }; @@ -1292,8 +1353,10 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_0 0 &clk_virt SLAVE_QUP_CORE_0 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_I2C 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_I2C QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config"; status = "disabled"; }; @@ -1309,8 +1372,10 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_0 0 &clk_virt SLAVE_QUP_CORE_0 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_I2C 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_I2C QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config"; status = "disabled"; }; @@ -1347,7 +1412,8 @@ clocks = <&gcc GCC_QUPV3_WRAP_1_M_AHB_CLK>, <&gcc GCC_QUPV3_WRAP_1_S_AHB_CLK>; iommus = <&apps_smmu 0xa3 0>; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core"; dma-coherent; #address-cells = <2>; @@ -1364,9 +1430,12 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>, - <&aggre1_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>, + <&aggre1_noc MASTER_QUP_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma1 0 0 QCOM_GPI_I2C>, <&gpi_dma1 1 0 QCOM_GPI_I2C>; @@ -1382,9 +1451,12 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&qup_spi0_data_clk>, <&qup_spi0_cs>; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>, - <&aggre1_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>, + <&aggre1_noc MASTER_QUP_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma1 0 0 QCOM_GPI_SPI>, <&gpi_dma1 1 0 QCOM_GPI_SPI>; @@ -1404,9 +1476,12 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>, - <&aggre1_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>, + <&aggre1_noc MASTER_QUP_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma1 0 1 QCOM_GPI_I2C>, <&gpi_dma1 1 1 QCOM_GPI_I2C>; @@ -1422,9 +1497,12 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&qup_spi1_data_clk>, <&qup_spi1_cs>; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>, - <&aggre1_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>, + <&aggre1_noc MASTER_QUP_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma1 0 1 QCOM_GPI_SPI>, <&gpi_dma1 1 1 QCOM_GPI_SPI>; @@ -1444,9 +1522,12 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>, - <&aggre1_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>, + <&aggre1_noc MASTER_QUP_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma1 0 2 QCOM_GPI_I2C>, <&gpi_dma1 1 2 QCOM_GPI_I2C>; @@ -1462,9 +1543,12 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&qup_spi2_data_clk>, <&qup_spi2_cs>; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>, - <&aggre1_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>, + <&aggre1_noc MASTER_QUP_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma1 0 2 QCOM_GPI_SPI>, <&gpi_dma1 1 2 QCOM_GPI_SPI>; @@ -1484,9 +1568,12 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>, - <&aggre1_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>, + <&aggre1_noc MASTER_QUP_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma1 0 3 QCOM_GPI_I2C>, <&gpi_dma1 1 3 QCOM_GPI_I2C>; @@ -1502,9 +1589,12 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&qup_spi3_data_clk>, <&qup_spi3_cs>; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>, - <&aggre1_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>, + <&aggre1_noc MASTER_QUP_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma1 0 3 QCOM_GPI_SPI>, <&gpi_dma1 1 3 QCOM_GPI_SPI>; @@ -1524,9 +1614,12 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>, - <&aggre1_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>, + <&aggre1_noc MASTER_QUP_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma1 0 4 QCOM_GPI_I2C>, <&gpi_dma1 1 4 QCOM_GPI_I2C>; @@ -1542,9 +1635,12 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&qup_spi4_data_clk>, <&qup_spi4_cs>; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>, - <&aggre1_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>, + <&aggre1_noc MASTER_QUP_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma1 0 4 QCOM_GPI_SPI>, <&gpi_dma1 1 4 QCOM_GPI_SPI>; @@ -1562,9 +1658,12 @@ pinctrl-names = "default"; pinctrl-0 = <&qup_i2c5_data_clk>; interrupts = ; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>, - <&aggre1_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>, + <&aggre1_noc MASTER_QUP_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma1 0 5 QCOM_GPI_I2C>, <&gpi_dma1 1 5 QCOM_GPI_I2C>; @@ -1582,9 +1681,12 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&qup_spi5_data_clk>, <&qup_spi5_cs>; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>, - <&aggre1_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>, + <&aggre1_noc MASTER_QUP_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma1 0 5 QCOM_GPI_SPI>, <&gpi_dma1 1 5 QCOM_GPI_SPI>; @@ -1602,9 +1704,12 @@ pinctrl-names = "default"; pinctrl-0 = <&qup_i2c6_data_clk>; interrupts = ; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>, - <&aggre1_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>, + <&aggre1_noc MASTER_QUP_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma1 0 6 QCOM_GPI_I2C>, <&gpi_dma1 1 6 QCOM_GPI_I2C>; @@ -1622,9 +1727,12 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&qup_spi6_data_clk>, <&qup_spi6_cs>; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>, - <&aggre1_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>, + <&aggre1_noc MASTER_QUP_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma1 0 6 QCOM_GPI_SPI>, <&gpi_dma1 1 6 QCOM_GPI_SPI>; @@ -1643,8 +1751,10 @@ pinctrl-0 = <&qup_uart7_default>; interrupts = ; interconnect-names = "qup-core", "qup-config"; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>; status = "disabled"; }; }; @@ -1768,8 +1878,10 @@ "ddrss_sf_tbu", "noc_aggr"; - interconnects = <&pcie_noc MASTER_PCIE_0 0 &mc_virt SLAVE_EBI1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &cnoc_main SLAVE_PCIE_0 0>; + interconnects = <&pcie_noc MASTER_PCIE_0 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &cnoc_main SLAVE_PCIE_0 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "pcie-mem", "cpu-pcie"; msi-map = <0x0 &gic_its 0x1400 0x1>, @@ -1891,8 +2003,10 @@ assigned-clocks = <&gcc GCC_PCIE_1_AUX_CLK>; assigned-clock-rates = <19200000>; - interconnects = <&pcie_noc MASTER_PCIE_1 0 &mc_virt SLAVE_EBI1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &cnoc_main SLAVE_PCIE_1 0>; + interconnects = <&pcie_noc MASTER_PCIE_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &cnoc_main SLAVE_PCIE_1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "pcie-mem", "cpu-pcie"; msi-map = <0x0 &gic_its 0x1480 0x1>, @@ -1971,7 +2085,8 @@ dma-names = "rx", "tx"; iommus = <&apps_smmu 0x480 0x0>, <&apps_smmu 0x481 0x0>; - interconnects = <&aggre2_noc MASTER_CRYPTO 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&aggre2_noc MASTER_CRYPTO QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "memory"; }; @@ -2015,8 +2130,10 @@ dma-coherent; operating-points-v2 = <&ufs_opp_table>; - interconnects = <&aggre1_noc MASTER_UFS_MEM 0 &mc_virt SLAVE_EBI1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_UFS_MEM_CFG 0>; + interconnects = <&aggre1_noc MASTER_UFS_MEM QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_UFS_MEM_CFG QCOM_ICC_TAG_ALWAYS>; interconnect-names = "ufs-ddr", "cpu-ufs"; clock-names = "core_clk", @@ -2316,8 +2433,10 @@ clocks = <&rpmhcc RPMH_IPA_CLK>; clock-names = "core"; - interconnects = <&aggre2_noc MASTER_IPA 0 &mc_virt SLAVE_EBI1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_IPA_CFG 0>; + interconnects = <&aggre2_noc MASTER_IPA QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_IPA_CFG QCOM_ICC_TAG_ALWAYS>; interconnect-names = "memory", "config"; @@ -2351,7 +2470,8 @@ <&rpmhpd RPMHPD_MSS>; power-domain-names = "cx", "mss"; - interconnects = <&mc_virt MASTER_LLCC 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&mc_virt MASTER_LLCC QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; memory-region = <&mpss_mem>, <&q6_mpss_dtb_mem>, <&mpss_dsm_mem>; @@ -2392,7 +2512,8 @@ <&rpmhpd RPMHPD_LMX>; power-domain-names = "lcx", "lmx"; - interconnects = <&lpass_lpicx_noc MASTER_LPASS_PROC 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&lpass_lpicx_noc MASTER_LPASS_PROC QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; memory-region = <&adspslpi_mem>, <&q6_adsp_dtb_mem>; @@ -2850,8 +2971,10 @@ power-domains = <&rpmhpd RPMHPD_CX>; operating-points-v2 = <&sdhc2_opp_table>; - interconnects = <&aggre2_noc MASTER_SDCC_2 0 &mc_virt SLAVE_EBI1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_SDCC_2 0>; + interconnects = <&aggre2_noc MASTER_SDCC_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_SDCC_2 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "sdhc-ddr", "cpu-sdhc"; bus-width = <4>; dma-coherent; @@ -3022,7 +3145,8 @@ power-domains = <&dispcc MDSS_GDSC>; - interconnects = <&mmss_noc MASTER_MDP 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&mmss_noc MASTER_MDP QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "mdp0-mem"; iommus = <&apps_smmu 0x1c00 0x2>; @@ -3495,8 +3619,10 @@ resets = <&gcc GCC_USB30_PRIM_BCR>; - interconnects = <&aggre1_noc MASTER_USB3_0 0 &mc_virt SLAVE_EBI1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_USB3_0 0>; + interconnects = <&aggre1_noc MASTER_USB3_0 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_USB3_0 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "usb-ddr", "apps-usb"; status = "disabled"; @@ -4619,7 +4745,8 @@ compatible = "qcom,sm8550-llcc-bwmon", "qcom,sc7280-llcc-bwmon"; reg = <0 0x24091000 0 0x1000>; interrupts = ; - interconnects = <&mc_virt MASTER_LLCC 3 &mc_virt SLAVE_EBI1 3>; + interconnects = <&mc_virt MASTER_LLCC QCOM_ICC_TAG_ACTIVE_ONLY + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ACTIVE_ONLY>; operating-points-v2 = <&llcc_bwmon_opp_table>; @@ -4668,7 +4795,8 @@ compatible = "qcom,sm8550-cpu-bwmon", "qcom,sdm845-bwmon"; reg = <0 0x240b6400 0 0x600>; interrupts = ; - interconnects = <&gem_noc MASTER_APPSS_PROC 3 &gem_noc SLAVE_LLCC 3>; + interconnects = <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ACTIVE_ONLY + &gem_noc SLAVE_LLCC QCOM_ICC_TAG_ACTIVE_ONLY>; operating-points-v2 = <&cpu_bwmon_opp_table>; @@ -4752,7 +4880,8 @@ <&rpmhpd RPMHPD_NSP>; power-domain-names = "cx", "mxc", "nsp"; - interconnects = <&nsp_noc MASTER_CDSP_PROC 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&nsp_noc MASTER_CDSP_PROC QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; memory-region = <&cdsp_mem>, <&q6_cdsp_dtb_mem>; From 2b5b8fc253049356f118122483bfe31910b0f6d5 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Thu, 27 Feb 2025 10:00:32 +0100 Subject: [PATCH 2404/2924] arm64: dts: qcom: sm8550: add missing cpu-cfg interconnect path in the mdss node [ Upstream commit 327d489d1ecaf16182952f079cc21f04cf83f967 ] The bindings requires the mdp0-mem and the cpu-cfg interconnect path, add the missing cpu-cfg path to fix the dtbs check error and also to ensure that MDSS has enough bandwidth to let HLOS write config registers. Fixes: b8591df49cde ("arm64: dts: qcom: sm8550: correct MDSS interconnects") Reviewed-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20250227-topic-sm8x50-mdss-interconnect-bindings-fix-v5-1-bf6233c6ebe5@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sm8550.dtsi | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8550.dtsi b/arch/arm64/boot/dts/qcom/sm8550.dtsi index 9465b00f1e74cd..65ebddd124e2a8 100644 --- a/arch/arm64/boot/dts/qcom/sm8550.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8550.dtsi @@ -3146,8 +3146,10 @@ power-domains = <&dispcc MDSS_GDSC>; interconnects = <&mmss_noc MASTER_MDP QCOM_ICC_TAG_ALWAYS - &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; - interconnect-names = "mdp0-mem"; + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ACTIVE_ONLY + &config_noc SLAVE_DISPLAY_CFG QCOM_ICC_TAG_ACTIVE_ONLY>; + interconnect-names = "mdp0-mem", "cpu-cfg"; iommus = <&apps_smmu 0x1c00 0x2>; From c6b9e9b965e54ea63700747ef1d22c76965b77c7 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Thu, 27 Feb 2025 10:00:33 +0100 Subject: [PATCH 2405/2924] arm64: dts: qcom: sm8650: add missing cpu-cfg interconnect path in the mdss node [ Upstream commit f22be5c1dd3e12519e3f3b80c14d10b90be2c2fc ] The bindings requires the mdp0-mem and the cpu-cfg interconnect path, add the missing cpu-cfg path to fix the dtbs check error and also to ensure that MDSS has enough bandwidth to let HLOS write config registers. Fixes: 9fa33cbca3d2 ("arm64: dts: qcom: sm8650: correct MDSS interconnects") Reviewed-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20250227-topic-sm8x50-mdss-interconnect-bindings-fix-v5-2-bf6233c6ebe5@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sm8650.dtsi | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8650.dtsi b/arch/arm64/boot/dts/qcom/sm8650.dtsi index 9591c13edbb9d4..36919efc888c23 100644 --- a/arch/arm64/boot/dts/qcom/sm8650.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8650.dtsi @@ -3658,8 +3658,11 @@ resets = <&dispcc DISP_CC_MDSS_CORE_BCR>; interconnects = <&mmss_noc MASTER_MDP QCOM_ICC_TAG_ALWAYS - &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; - interconnect-names = "mdp0-mem"; + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ACTIVE_ONLY + &config_noc SLAVE_DISPLAY_CFG QCOM_ICC_TAG_ACTIVE_ONLY>; + interconnect-names = "mdp0-mem", + "cpu-cfg"; power-domains = <&dispcc MDSS_GDSC>; From 3194c159d25f81c1d190d9364322b0ed8ee127a1 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Tue, 4 Mar 2025 18:10:46 +0100 Subject: [PATCH 2406/2924] arm64: dts: qcom: x1e80100-romulus: Keep L12B and L15B always on [ Upstream commit 0783c8b3c06b9cf16b5108d558e2faffb8c533b7 ] These regulators power some electronic components onboard. They're most likely kept online by other pieces of firmware, but you can never be sure enough. Fixes: 09d77be56093 ("arm64: dts: qcom: Add support for X1-based Surface Laptop 7 devices") Reported-by: Johan Hovold Signed-off-by: Konrad Dybcio Reviewed-by: Johan Hovold Link: https://lore.kernel.org/r/20250304-topic-sl7_vregs_aon-v1-1-b2dc706e4157@oss.qualcomm.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/x1e80100-microsoft-romulus.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/x1e80100-microsoft-romulus.dtsi b/arch/arm64/boot/dts/qcom/x1e80100-microsoft-romulus.dtsi index 5867953c73564c..6a883fafe3c77a 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-microsoft-romulus.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100-microsoft-romulus.dtsi @@ -510,6 +510,7 @@ regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1200000>; regulator-initial-mode = ; + regulator-always-on; }; vreg_l13b: ldo13 { @@ -531,6 +532,7 @@ regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; regulator-initial-mode = ; + regulator-always-on; }; vreg_l16b: ldo16 { From 0dec196ee3968da6a411795370a21ca7556f7d88 Mon Sep 17 00:00:00 2001 From: Dzmitry Sankouski Date: Tue, 25 Feb 2025 19:38:53 +0300 Subject: [PATCH 2407/2924] arm64: dts: qcom: sdm845-starqltechn: remove wifi [ Upstream commit 2d3dd4b237638853b8a99353401ab8d88a6afb6c ] Starqltechn has broadcom chip for wifi, so sdm845 wifi part can be disabled. Fixes: d711b22eee55 ("arm64: dts: qcom: starqltechn: add initial device tree for starqltechn") Reviewed-by: Konrad Dybcio Signed-off-by: Dzmitry Sankouski Fixes: d711b22eee55 ("arm64: dts: qcom: starqltechn: add initial device tree for starqltechn") Reviewed-by: Bryan O'Donoghue Link: https://lore.kernel.org/r/20250225-starqltechn_integration_upstream-v9-2-a5d80375cb66@gmail.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sdm845-samsung-starqltechn.dts | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sdm845-samsung-starqltechn.dts b/arch/arm64/boot/dts/qcom/sdm845-samsung-starqltechn.dts index d37a433130b98f..6fc30fd1262b88 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-samsung-starqltechn.dts +++ b/arch/arm64/boot/dts/qcom/sdm845-samsung-starqltechn.dts @@ -418,14 +418,6 @@ status = "okay"; }; -&wifi { - vdd-0.8-cx-mx-supply = <&vreg_l5a_0p8>; - vdd-1.8-xo-supply = <&vreg_l7a_1p8>; - vdd-1.3-rfa-supply = <&vreg_l17a_1p3>; - vdd-3.3-ch0-supply = <&vreg_l25a_3p3>; - status = "okay"; -}; - &tlmm { gpio-reserved-ranges = <0 4>, <27 4>, <81 4>, <85 4>; From 59cb1654f78378534626c04525abf8290e4dbd22 Mon Sep 17 00:00:00 2001 From: Dzmitry Sankouski Date: Tue, 25 Feb 2025 19:38:54 +0300 Subject: [PATCH 2408/2924] arm64: dts: qcom: sdm845-starqltechn: fix usb regulator mistake [ Upstream commit 242e4126ee007b95765c21a9d74651fdcf221f2b ] Usb regulator was wrongly pointed to vreg_l1a_0p875. However, on starqltechn it's powered from vreg_l5a_0p8. Fixes: d711b22eee55 ("arm64: dts: qcom: starqltechn: add initial device tree for starqltechn") Reviewed-by: Konrad Dybcio Signed-off-by: Dzmitry Sankouski Link: https://lore.kernel.org/r/20250225-starqltechn_integration_upstream-v9-3-a5d80375cb66@gmail.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sdm845-samsung-starqltechn.dts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sdm845-samsung-starqltechn.dts b/arch/arm64/boot/dts/qcom/sdm845-samsung-starqltechn.dts index 6fc30fd1262b88..f3f2b25883d812 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-samsung-starqltechn.dts +++ b/arch/arm64/boot/dts/qcom/sdm845-samsung-starqltechn.dts @@ -135,8 +135,6 @@ vdda_sp_sensor: vdda_ufs1_core: vdda_ufs2_core: - vdda_usb1_ss_core: - vdda_usb2_ss_core: vreg_l1a_0p875: ldo1 { regulator-min-microvolt = <880000>; regulator-max-microvolt = <880000>; @@ -157,6 +155,7 @@ regulator-initial-mode = ; }; + vdda_usb1_ss_core: vdd_wcss_cx: vdd_wcss_mx: vdda_wcss_pll: From 7e8142870d35d07bb11821cde6d7cfb5e584e0bf Mon Sep 17 00:00:00 2001 From: Dzmitry Sankouski Date: Tue, 25 Feb 2025 19:38:55 +0300 Subject: [PATCH 2409/2924] arm64: dts: qcom: sdm845-starqltechn: refactor node order [ Upstream commit cba1dd3d851ebc1b6c5ae4000208a9753320694b ] Fixes: d711b22eee55 ("arm64: dts: qcom: starqltechn: add initial device tree for starqltechn") Signed-off-by: Dzmitry Sankouski Link: https://lore.kernel.org/r/20250225-starqltechn_integration_upstream-v9-4-a5d80375cb66@gmail.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sdm845-samsung-starqltechn.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sdm845-samsung-starqltechn.dts b/arch/arm64/boot/dts/qcom/sdm845-samsung-starqltechn.dts index f3f2b25883d812..8a0d63bd594b3b 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-samsung-starqltechn.dts +++ b/arch/arm64/boot/dts/qcom/sdm845-samsung-starqltechn.dts @@ -382,8 +382,8 @@ }; &sdhc_2 { - pinctrl-names = "default"; pinctrl-0 = <&sdc2_clk_state &sdc2_cmd_state &sdc2_data_state &sd_card_det_n_state>; + pinctrl-names = "default"; cd-gpios = <&tlmm 126 GPIO_ACTIVE_LOW>; vmmc-supply = <&vreg_l21a_2p95>; vqmmc-supply = <&vddpx_2>; From 5927e89cc40d4e634623c2bbbb79e0581235ac88 Mon Sep 17 00:00:00 2001 From: Dzmitry Sankouski Date: Tue, 25 Feb 2025 19:38:56 +0300 Subject: [PATCH 2410/2924] arm64: dts: qcom: sdm845-starqltechn: remove excess reserved gpios [ Upstream commit fb5fce873b952f8b1c5f7edcabcc8611ef45ea7a ] Starqltechn has 2 reserved gpio ranges <27 4>, <85 4>. <27 4> is spi for eSE(embedded Secure Element). <85 4> is spi for fingerprint. Remove excess reserved gpio regions. Fixes: d711b22eee55 ("arm64: dts: qcom: starqltechn: add initial device tree for starqltechn") Reviewed-by: Konrad Dybcio Signed-off-by: Dzmitry Sankouski Link: https://lore.kernel.org/r/20250225-starqltechn_integration_upstream-v9-5-a5d80375cb66@gmail.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sdm845-samsung-starqltechn.dts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sdm845-samsung-starqltechn.dts b/arch/arm64/boot/dts/qcom/sdm845-samsung-starqltechn.dts index 8a0d63bd594b3b..5948b401165ce9 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-samsung-starqltechn.dts +++ b/arch/arm64/boot/dts/qcom/sdm845-samsung-starqltechn.dts @@ -418,7 +418,8 @@ }; &tlmm { - gpio-reserved-ranges = <0 4>, <27 4>, <81 4>, <85 4>; + gpio-reserved-ranges = <27 4>, /* SPI (eSE - embedded Secure Element) */ + <85 4>; /* SPI (fingerprint reader) */ sdc2_clk_state: sdc2-clk-state { pins = "sdc2_clk"; From 4cc706cc9d12ba0887e5220d13d302e247b7f75e Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Wed, 12 Feb 2025 18:03:47 +0100 Subject: [PATCH 2411/2924] arm64: dts: qcom: sm8350: Reenable crypto & cryptobam [ Upstream commit 75eefd474469abf95aa9ef6da8161d69f86b98b4 ] When num-channels and qcom,num-ees is not provided in devicetree, the driver will try to read these values from the registers during probe but this fails if the interconnect is not on and then crashes the system. So we can provide these properties in devicetree (queried after patching BAM driver to enable the necessary interconnect) so we can probe cryptobam without reading registers and then also use the QCE as expected. Fixes: 4d29db204361 ("arm64: dts: qcom: sm8350: fix BAM DMA crash and reboot") Fixes: f1040a7fe8f0 ("arm64: dts: qcom: sm8350: Add Crypto Engine support") Signed-off-by: Luca Weiss Signed-off-by: Stephan Gerhold Link: https://lore.kernel.org/r/20250212-bam-dma-fixes-v1-1-f560889e65d8@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sm8350.dtsi | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8350.dtsi b/arch/arm64/boot/dts/qcom/sm8350.dtsi index f055600d6cfe5b..a86d0067634e81 100644 --- a/arch/arm64/boot/dts/qcom/sm8350.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8350.dtsi @@ -1806,11 +1806,11 @@ interrupts = ; #dma-cells = <1>; qcom,ee = <0>; + qcom,num-ees = <4>; + num-channels = <16>; qcom,controlled-remotely; iommus = <&apps_smmu 0x594 0x0011>, <&apps_smmu 0x596 0x0011>; - /* FIXME: Probing BAM DMA causes some abort and system hang */ - status = "fail"; }; crypto: crypto@1dfa000 { @@ -1822,8 +1822,6 @@ <&apps_smmu 0x596 0x0011>; interconnects = <&aggre2_noc MASTER_CRYPTO 0 &mc_virt SLAVE_EBI1 0>; interconnect-names = "memory"; - /* FIXME: dependency BAM DMA is disabled */ - status = "disabled"; }; ipa: ipa@1e40000 { From 8e26aecdd5170482ba566532687bb156629e7bb2 Mon Sep 17 00:00:00 2001 From: Manikanta Mylavarapu Date: Thu, 13 Mar 2025 12:44:22 +0530 Subject: [PATCH 2412/2924] arm64: dts: qcom: ipq9574: fix the msi interrupt numbers of pcie3 [ Upstream commit c87d58bc7f831bf3d887e6ec846246cb673c2e50 ] The MSI interrupt numbers of the PCIe3 controller are incorrect. Due to this, the functional bring up of the QDSP6 processor on the PCIe endpoint has failed. Correct the MSI interrupt numbers to properly bring up the QDSP6 processor on the PCIe endpoint. Fixes: d80c7fbfa908 ("arm64: dts: qcom: ipq9574: Add PCIe PHYs and controller nodes") Signed-off-by: Manikanta Mylavarapu Link: https://lore.kernel.org/r/20250313071422.510-1-quic_mmanikan@quicinc.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/ipq9574.dtsi | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/ipq9574.dtsi b/arch/arm64/boot/dts/qcom/ipq9574.dtsi index 3c02351fbb156a..b790a6b288abb8 100644 --- a/arch/arm64/boot/dts/qcom/ipq9574.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq9574.dtsi @@ -974,14 +974,14 @@ ranges = <0x01000000 0x0 0x00000000 0x18200000 0x0 0x100000>, <0x02000000 0x0 0x18300000 0x18300000 0x0 0x7d00000>; - interrupts = , - , - , - , - , - , - , - ; + interrupts = , + , + , + , + , + , + , + ; interrupt-names = "msi0", "msi1", "msi2", From 9db98d2ca196b83d5c16d2b8d039aee4b922c478 Mon Sep 17 00:00:00 2001 From: Maulik Shah Date: Wed, 26 Feb 2025 12:21:27 +0530 Subject: [PATCH 2413/2924] arm64: dts: qcom: sm8750: Fix cluster hierarchy for idle states [ Upstream commit 778dc0f876c70b3d781a49981560ec88e1b7083a ] SM8750 have two different clusters. cluster0 have CPU 0-5 as child and cluster1 have CPU 6-7 as child. Each cluster requires its own idle state and power domain in order to achieve complete domain sleep state. However only single cluster idle state is added mapping CPU 0-7 to the same power domain. Fix this by correctly mapping each CPU to respective cluster power domain and make cluster1 power domain use same domain idle state as cluster0 since both use same idle state parameters. Fixes: 068c3d3c83be ("arm64: dts: qcom: Add base SM8750 dtsi") Signed-off-by: Maulik Shah Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250226-sm8750_cluster_idle-v2-1-ef0ac81e242f@oss.qualcomm.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sm8750.dtsi | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8750.dtsi b/arch/arm64/boot/dts/qcom/sm8750.dtsi index 3bbd7d18598ee0..d08a2dbeb0f792 100644 --- a/arch/arm64/boot/dts/qcom/sm8750.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8750.dtsi @@ -233,53 +233,59 @@ cpu_pd0: power-domain-cpu0 { #power-domain-cells = <0>; - power-domains = <&cluster_pd>; + power-domains = <&cluster0_pd>; domain-idle-states = <&cluster0_c4>; }; cpu_pd1: power-domain-cpu1 { #power-domain-cells = <0>; - power-domains = <&cluster_pd>; + power-domains = <&cluster0_pd>; domain-idle-states = <&cluster0_c4>; }; cpu_pd2: power-domain-cpu2 { #power-domain-cells = <0>; - power-domains = <&cluster_pd>; + power-domains = <&cluster0_pd>; domain-idle-states = <&cluster0_c4>; }; cpu_pd3: power-domain-cpu3 { #power-domain-cells = <0>; - power-domains = <&cluster_pd>; + power-domains = <&cluster0_pd>; domain-idle-states = <&cluster0_c4>; }; cpu_pd4: power-domain-cpu4 { #power-domain-cells = <0>; - power-domains = <&cluster_pd>; + power-domains = <&cluster0_pd>; domain-idle-states = <&cluster0_c4>; }; cpu_pd5: power-domain-cpu5 { #power-domain-cells = <0>; - power-domains = <&cluster_pd>; + power-domains = <&cluster0_pd>; domain-idle-states = <&cluster0_c4>; }; cpu_pd6: power-domain-cpu6 { #power-domain-cells = <0>; - power-domains = <&cluster_pd>; + power-domains = <&cluster1_pd>; domain-idle-states = <&cluster1_c4>; }; cpu_pd7: power-domain-cpu7 { #power-domain-cells = <0>; - power-domains = <&cluster_pd>; + power-domains = <&cluster1_pd>; domain-idle-states = <&cluster1_c4>; }; - cluster_pd: power-domain-cluster { + cluster0_pd: power-domain-cluster0 { + #power-domain-cells = <0>; + domain-idle-states = <&cluster_cl5>; + power-domains = <&system_pd>; + }; + + cluster1_pd: power-domain-cluster1 { #power-domain-cells = <0>; domain-idle-states = <&cluster_cl5>; power-domains = <&system_pd>; From a211058c6022d4a8d0d9e302a2b45c83c6668019 Mon Sep 17 00:00:00 2001 From: Xilin Wu Date: Sat, 8 Mar 2025 18:27:51 +0800 Subject: [PATCH 2414/2924] arm64: dts: qcom: sm8250: Fix CPU7 opp table [ Upstream commit 28f997b89967afdc0855d8aa7538b251fb44f654 ] There is a typo in cpu7_opp9. Fix it to get rid of the following errors. [ 0.198043] cpu cpu7: Voltage update failed freq=1747200 [ 0.198052] cpu cpu7: failed to update OPP for freq=1747200 Fixes: 8e0e8016cb79 ("arm64: dts: qcom: sm8250: Add CPU opp tables") Signed-off-by: Xilin Wu Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250308-fix-sm8250-cpufreq-v1-1-8a0226721399@gmail.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sm8250.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi index c2937b4d9f1802..68613ea7146c88 100644 --- a/arch/arm64/boot/dts/qcom/sm8250.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi @@ -606,7 +606,7 @@ }; cpu7_opp9: opp-1747200000 { - opp-hz = /bits/ 64 <1708800000>; + opp-hz = /bits/ 64 <1747200000>; opp-peak-kBps = <5412000 42393600>; }; From 7610e299ffbc4e177e9df096b6951cc4df55c00b Mon Sep 17 00:00:00 2001 From: Jyothi Kumar Seerapu Date: Wed, 12 Mar 2025 16:13:58 +0530 Subject: [PATCH 2415/2924] arm64: dts: qcom: sm8750: Correct clocks property for uart14 node [ Upstream commit 515551e65635b988f2afa9e8683a6b57d6cfba36 ] Correct the clocks property for the uart14 node to fix UART functionality on QUP2_SE6. The current failure is due to an incorrect clocks assignment. Change the clocks property to GCC_QUPV3_WRAP2_S6_CLK to resolve the issue. Signed-off-by: Jyothi Kumar Seerapu Reviewed-by: Konrad Dybcio Fixes: 068c3d3c83be ("arm64: dts: qcom: Add base SM8750 dtsi") Link: https://lore.kernel.org/r/20250312104358.2558-1-quic_jseerapu@quicinc.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sm8750.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sm8750.dtsi b/arch/arm64/boot/dts/qcom/sm8750.dtsi index d08a2dbeb0f792..e8bb587a7813f9 100644 --- a/arch/arm64/boot/dts/qcom/sm8750.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8750.dtsi @@ -993,7 +993,7 @@ interrupts = ; - clocks = <&gcc GCC_QUPV3_WRAP2_S5_CLK>; + clocks = <&gcc GCC_QUPV3_WRAP2_S6_CLK>; clock-names = "se"; interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS From e05848481b734740173c37b73f30926f0f1bae54 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Tue, 3 Dec 2024 18:44:02 +0100 Subject: [PATCH 2416/2924] arm64: dts: qcom: sc8280xp-x13s: Drop duplicate DMIC supplies [ Upstream commit a2e617f4e6981aa514a569e927f90b0d39bb31b2 ] The WCD938x codec provides two controls for each of the MIC_BIASn outputs: - "MIC BIASn" enables an internal regulator to generate the output with a configurable voltage (qcom,micbiasN-microvolt). - "VA MIC BIASn" enables "pull-up mode" that bypasses the internal regulator and directly outputs fixed 1.8V from the VDD_PX pin. This is intended for low-power VA (voice activation) use cases. The audio-routing setup for the ThinkPad X13s currently specifies both as power supplies for the DMICs, but only one of them can be active at the same time. In practice, only the internal regulator is used with the current setup because the driver prefers it over pull-up mode. Make this more clear by dropping the redundant routes to the pull-up "VA MIC BIASn" supply. There is no functional difference except that we skip briefly switching to pull-up mode when shutting down the microphone. Fixes: 2e498f35c385 ("arm64: dts: qcom: sc8280xp-x13s: fix va dmic dai links and routing") Signed-off-by: Stephan Gerhold Link: https://lore.kernel.org/r/20241203-x1e80100-va-mic-bias-v1-1-0dfd4d9b492c@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts index f3190f408f4b2c..0f1ebd869ce315 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts @@ -1202,9 +1202,6 @@ "VA DMIC0", "MIC BIAS1", "VA DMIC1", "MIC BIAS1", "VA DMIC2", "MIC BIAS3", - "VA DMIC0", "VA MIC BIAS1", - "VA DMIC1", "VA MIC BIAS1", - "VA DMIC2", "VA MIC BIAS3", "TX SWR_ADC1", "ADC2_OUTPUT"; wcd-playback-dai-link { From 94440b35e91c14f211c41fda802c1310a8efa89a Mon Sep 17 00:00:00 2001 From: Varadarajan Narayanan Date: Fri, 7 Feb 2025 13:05:45 +0530 Subject: [PATCH 2417/2924] arm64: dts: qcom: ipq9574: Fix USB vdd info [ Upstream commit 4f4c905e6a2a4e884f4e9b7326c94fac3500e0f9 ] USB phys in ipq9574 use the 'L5' regulator. The commit ec4f047679d5 ("arm64: dts: qcom: ipq9574: Enable USB") incorrectly specified it as 'L2'. Because of this when the phy module turns off/on its regulators, the wrong regulator is turned off/on resulting in 2 issues, namely the correct regulator related to the USB phy is not turned off/on and the module powered by the incorrect regulator is affected. Fixes: ec4f047679d5 ("arm64: dts: qcom: ipq9574: Enable USB") Signed-off-by: Varadarajan Narayanan Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250207073545.1768990-2-quic_varada@quicinc.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/ipq9574-rdp-common.dtsi | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/ipq9574-rdp-common.dtsi b/arch/arm64/boot/dts/qcom/ipq9574-rdp-common.dtsi index ae12f069f26fa5..b24b795873d416 100644 --- a/arch/arm64/boot/dts/qcom/ipq9574-rdp-common.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq9574-rdp-common.dtsi @@ -111,6 +111,13 @@ regulator-always-on; regulator-boot-on; }; + + mp5496_l5: l5 { + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + regulator-always-on; + regulator-boot-on; + }; }; }; @@ -146,7 +153,7 @@ }; &usb_0_qmpphy { - vdda-pll-supply = <&mp5496_l2>; + vdda-pll-supply = <&mp5496_l5>; vdda-phy-supply = <®ulator_fixed_0p925>; status = "okay"; @@ -154,7 +161,7 @@ &usb_0_qusbphy { vdd-supply = <®ulator_fixed_0p925>; - vdda-pll-supply = <&mp5496_l2>; + vdda-pll-supply = <&mp5496_l5>; vdda-phy-dpdm-supply = <®ulator_fixed_3p3>; status = "okay"; From fba1e4552c84a443ec8a4e55165390aed32f3593 Mon Sep 17 00:00:00 2001 From: Tingguo Cheng Date: Fri, 17 Jan 2025 11:24:31 +0800 Subject: [PATCH 2418/2924] arm64: dts: qcom: qcs615: remove disallowed property in spmi bus node [ Upstream commit 54040a3e3da67ef0e014e5f04f9f3fe680fc4b55 ] Remove the unevaluated 'cell-index' property from qcs615-ride.dtb spmi@c440000 to fix the Devicetree validation error reported by the kernel test robot. Reported-by: kernel test robot Closes: https://lore.kernel.org/r/202412272210.GpGmqcPC-lkp@intel.com/ Fixes: 27554e2bef4d ("arm64: dts: qcom: qcs615: Adds SPMI support") Signed-off-by: Tingguo Cheng Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250117-fix-kernel-test-robot-unexpected-property-issue-v2-1-0b68cf481249@quicinc.com [bjorn: Fixes commit message wording about LKP] Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/qcs615.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/qcs615.dtsi b/arch/arm64/boot/dts/qcom/qcs615.dtsi index f4abfad474ea62..8db06d17eb4746 100644 --- a/arch/arm64/boot/dts/qcom/qcs615.dtsi +++ b/arch/arm64/boot/dts/qcom/qcs615.dtsi @@ -3304,7 +3304,6 @@ #interrupt-cells = <4>; #address-cells = <2>; #size-cells = <0>; - cell-index = <0>; qcom,channel = <0>; qcom,ee = <0>; }; From 4f8763e6cde4324e017af315feec341620aa584c Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Fri, 14 Mar 2025 09:21:16 +0100 Subject: [PATCH 2419/2924] arm64: dts: qcom: sm8650: Fix domain-idle-state for CPU2 [ Upstream commit 9bb5ca464100e7c8f2d740148088f60e04fed8ed ] On SM8650 the CPUs 0-1 are "silver" (Cortex-A520), CPU 2-6 are "gold" (Cortex-A720) and CPU 7 is "gold-plus" (Cortex-X4). So reference the correct "gold" idle-state for CPU core 2. Fixes: d2350377997f ("arm64: dts: qcom: add initial SM8650 dtsi") Signed-off-by: Luca Weiss Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250314-sm8650-cpu2-sleep-v1-1-31d5c7c87a5d@fairphone.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sm8650.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sm8650.dtsi b/arch/arm64/boot/dts/qcom/sm8650.dtsi index 36919efc888c23..bf6590e09a4c4f 100644 --- a/arch/arm64/boot/dts/qcom/sm8650.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8650.dtsi @@ -460,7 +460,7 @@ cpu_pd2: power-domain-cpu2 { #power-domain-cells = <0>; power-domains = <&cluster_pd>; - domain-idle-states = <&silver_cpu_sleep_0>; + domain-idle-states = <&gold_cpu_sleep_0>; }; cpu_pd3: power-domain-cpu3 { From 2f5b6e87ca5b691913fd5b8b34c781212e048250 Mon Sep 17 00:00:00 2001 From: Chukun Pan Date: Tue, 1 Apr 2025 18:00:18 +0800 Subject: [PATCH 2420/2924] arm64: dts: rockchip: Add missing uart3 interrupt for RK3528 [ Upstream commit a37d21a9b45e47ed6bc1f94e738096c07db78a07 ] The interrupt of uart3 node on rk3528 is missing, fix it. Fixes: 7983e6c379a9 ("arm64: dts: rockchip: Add base DT for rk3528 SoC") Reviewed-by: Yao Zi Signed-off-by: Chukun Pan Link: https://lore.kernel.org/r/20250401100020.944658-2-amadeus@jmu.edu.cn Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3528.dtsi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3528.dtsi b/arch/arm64/boot/dts/rockchip/rk3528.dtsi index 26c3559d6a6deb..7f1ffd6003f581 100644 --- a/arch/arm64/boot/dts/rockchip/rk3528.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3528.dtsi @@ -404,9 +404,10 @@ uart3: serial@ffa08000 { compatible = "rockchip,rk3528-uart", "snps,dw-apb-uart"; + reg = <0x0 0xffa08000 0x0 0x100>; clocks = <&cru SCLK_UART3>, <&cru PCLK_UART3>; clock-names = "baudclk", "apb_pclk"; - reg = <0x0 0xffa08000 0x0 0x100>; + interrupts = ; reg-io-width = <4>; reg-shift = <2>; status = "disabled"; From cb4f48794f321b19078c982c3e2228e8ca753238 Mon Sep 17 00:00:00 2001 From: Chukun Pan Date: Tue, 1 Apr 2025 17:00:09 +0800 Subject: [PATCH 2421/2924] arm64: dts: rockchip: Move SHMEM memory to reserved memory on rk3588 [ Upstream commit 8ecd096d018be8a6bd3bd930f3a41a85db66a67d ] 0x0 to 0xf0000000 are SDRAM memory areas where 0x10f000 is located. So move the SHMEM memory of arm_scmi to the reserved memory node. Fixes: c9211fa2602b ("arm64: dts: rockchip: Add base DT for rk3588 SoC") Signed-off-by: Chukun Pan Link: https://lore.kernel.org/r/20250401090009.733771-2-amadeus@jmu.edu.cn Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3588-base.dtsi | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi index 1e18ad93ba0ebd..c52af310c7062e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi @@ -439,16 +439,15 @@ #clock-cells = <0>; }; - pmu_sram: sram@10f000 { - compatible = "mmio-sram"; - reg = <0x0 0x0010f000 0x0 0x100>; - ranges = <0 0x0 0x0010f000 0x100>; - #address-cells = <1>; - #size-cells = <1>; + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; - scmi_shmem: sram@0 { + scmi_shmem: shmem@10f000 { compatible = "arm,scmi-shmem"; - reg = <0x0 0x100>; + reg = <0x0 0x0010f000 0x0 0x100>; + no-map; }; }; From e733178af2eb00d594cb45e3b0ed8f59d6ef63c5 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 4 Apr 2025 13:27:43 +0200 Subject: [PATCH 2422/2924] ARM: dts: at91: usb_a9263: fix GPIO for Dataflash chip select [ Upstream commit 67ba341e57ab158423818ed33bfa1c40eb0e5e7e ] Dataflash did not work on my board. After checking schematics and using the proper GPIO, it works now. Also, make it active low to avoid: flash@0 enforce active low on GPIO handle Fixes: 2432d201468d ("ARM: at91: dt: usb-a9263: add dataflash support") Signed-off-by: Wolfram Sang Link: https://lore.kernel.org/r/20250404112742.67416-2-wsa+renesas@sang-engineering.com Signed-off-by: Claudiu Beznea Signed-off-by: Sasha Levin --- arch/arm/boot/dts/microchip/usb_a9263.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/microchip/usb_a9263.dts b/arch/arm/boot/dts/microchip/usb_a9263.dts index 60d7936dc56274..6af450cb387c95 100644 --- a/arch/arm/boot/dts/microchip/usb_a9263.dts +++ b/arch/arm/boot/dts/microchip/usb_a9263.dts @@ -58,7 +58,7 @@ }; spi0: spi@fffa4000 { - cs-gpios = <&pioB 15 GPIO_ACTIVE_HIGH>; + cs-gpios = <&pioA 5 GPIO_ACTIVE_LOW>; status = "okay"; flash@0 { compatible = "atmel,at45", "atmel,dataflash"; From 4e1131a18ffd6b9a862829106d880e1389b3afe3 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 2 Apr 2025 23:04:46 +0200 Subject: [PATCH 2423/2924] ARM: dts: at91: at91sam9263: fix NAND chip selects [ Upstream commit c72ede1c24be689733bcd2233a3a56f2478429c8 ] NAND did not work on my USB-A9263. I discovered that the offending commit converted the PIO bank for chip selects wrongly, so all A9263 boards need to be fixed. Fixes: 1004a2977bdc ("ARM: dts: at91: Switch to the new NAND bindings") Signed-off-by: Wolfram Sang Reviewed-by: Alexandre Belloni Link: https://lore.kernel.org/r/20250402210446.5972-2-wsa+renesas@sang-engineering.com Signed-off-by: Claudiu Beznea Signed-off-by: Sasha Levin --- arch/arm/boot/dts/microchip/at91sam9263ek.dts | 2 +- arch/arm/boot/dts/microchip/tny_a9263.dts | 2 +- arch/arm/boot/dts/microchip/usb_a9263.dts | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/boot/dts/microchip/at91sam9263ek.dts b/arch/arm/boot/dts/microchip/at91sam9263ek.dts index 471ea25296aa14..93c5268a0845d0 100644 --- a/arch/arm/boot/dts/microchip/at91sam9263ek.dts +++ b/arch/arm/boot/dts/microchip/at91sam9263ek.dts @@ -152,7 +152,7 @@ nand@3 { reg = <0x3 0x0 0x800000>; rb-gpios = <&pioA 22 GPIO_ACTIVE_HIGH>; - cs-gpios = <&pioA 15 GPIO_ACTIVE_HIGH>; + cs-gpios = <&pioD 15 GPIO_ACTIVE_HIGH>; nand-bus-width = <8>; nand-ecc-mode = "soft"; nand-on-flash-bbt; diff --git a/arch/arm/boot/dts/microchip/tny_a9263.dts b/arch/arm/boot/dts/microchip/tny_a9263.dts index 3dd48b3e06da57..fd8244b56e0593 100644 --- a/arch/arm/boot/dts/microchip/tny_a9263.dts +++ b/arch/arm/boot/dts/microchip/tny_a9263.dts @@ -64,7 +64,7 @@ nand@3 { reg = <0x3 0x0 0x800000>; rb-gpios = <&pioA 22 GPIO_ACTIVE_HIGH>; - cs-gpios = <&pioA 15 GPIO_ACTIVE_HIGH>; + cs-gpios = <&pioD 15 GPIO_ACTIVE_HIGH>; nand-bus-width = <8>; nand-ecc-mode = "soft"; nand-on-flash-bbt; diff --git a/arch/arm/boot/dts/microchip/usb_a9263.dts b/arch/arm/boot/dts/microchip/usb_a9263.dts index 6af450cb387c95..8e1a3fb61087ca 100644 --- a/arch/arm/boot/dts/microchip/usb_a9263.dts +++ b/arch/arm/boot/dts/microchip/usb_a9263.dts @@ -84,7 +84,7 @@ nand@3 { reg = <0x3 0x0 0x800000>; rb-gpios = <&pioA 22 GPIO_ACTIVE_HIGH>; - cs-gpios = <&pioA 15 GPIO_ACTIVE_HIGH>; + cs-gpios = <&pioD 15 GPIO_ACTIVE_HIGH>; nand-bus-width = <8>; nand-ecc-mode = "soft"; nand-on-flash-bbt; From a4f9b05a7ec5c2eb45111ce6380a42616c8c9f86 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 8 Apr 2025 17:23:02 +0800 Subject: [PATCH 2424/2924] arm64: dts: mediatek: mt8188: Fix IOMMU device for rdma0 [ Upstream commit 267623000d11f6d483214be2484555f600393a12 ] Based on the comments in the MT8188 IOMMU binding header, the rdma0 device specifies the wrong IOMMU device for the IOMMU port it is tied to: This SoC have two MM IOMMU HWs, this is the connected information: iommu-vdo: larb0/2/5/9/10/11A/11C/13/16B/17B/19/21 iommu-vpp: larb1/3/4/6/7/11B/12/14/15/16A/17A/23/27 rdma0's endpoint is M4U_PORT_L1_DISP_RDMA0 (on larb1), which should use iommu-vpp, but it is currently tied to iommu-vdo. Somehow this went undetected until recently in Linux v6.15-rc1 with some IOMMU subsystem framework changes that caused the IOMMU to no longer work. The IOMMU would fail to probe if any devices associated with it could not be successfully attached. Prior to these changes, only the end device would be left without an IOMMU attached. Fixes: 7075b21d1a8e ("arm64: dts: mediatek: mt8188: Add display nodes for vdosys0") Signed-off-by: Chen-Yu Tsai Reviewed-by: Jason-JH Lin Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20250408092303.3563231-1-wenst@chromium.org Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8188.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8188.dtsi b/arch/arm64/boot/dts/mediatek/mt8188.dtsi index 69a8423d385890..29d35ca945973c 100644 --- a/arch/arm64/boot/dts/mediatek/mt8188.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8188.dtsi @@ -2579,7 +2579,7 @@ reg = <0 0x1c002000 0 0x1000>; clocks = <&vdosys0 CLK_VDO0_DISP_RDMA0>; interrupts = ; - iommus = <&vdo_iommu M4U_PORT_L1_DISP_RDMA0>; + iommus = <&vpp_iommu M4U_PORT_L1_DISP_RDMA0>; power-domains = <&spm MT8188_POWER_DOMAIN_VDOSYS0>; mediatek,gce-client-reg = <&gce0 SUBSYS_1c00XXXX 0x2000 0x1000>; From d95d216655a15607199605e750c089889d1c020a Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 2 Apr 2025 11:06:15 +0200 Subject: [PATCH 2425/2924] arm64: dts: mediatek: mt8195: Reparent vdec1/2 and venc1 power domains [ Upstream commit 394f29033324e2317bfd6a7ed99b9a60832b36a2 ] By hardware, the first and second core of the video decoder IP need the VDEC_SOC to be powered up in order to be able to be accessed (both internally, by firmware, and externally, by the kernel). Similarly, for the video encoder IP, the second core needs the first core to be powered up in order to be accessible. Fix that by reparenting the VDEC1/2 power domains to be children of VDEC0 (VDEC_SOC), and the VENC1 to be a child of VENC0. Fixes: 2b515194bf0c ("arm64: dts: mt8195: Add power domains controller") Reviewed-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20250402090615.25871-3-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8195.dtsi | 50 +++++++++++++----------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index 4f2dc0a7556610..1ded4b3f87605f 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -617,22 +617,6 @@ #size-cells = <0>; #power-domain-cells = <1>; - power-domain@MT8195_POWER_DOMAIN_VDEC1 { - reg = ; - clocks = <&vdecsys CLK_VDEC_LARB1>; - clock-names = "vdec1-0"; - mediatek,infracfg = <&infracfg_ao>; - #power-domain-cells = <0>; - }; - - power-domain@MT8195_POWER_DOMAIN_VENC_CORE1 { - reg = ; - clocks = <&vencsys_core1 CLK_VENC_CORE1_LARB>; - clock-names = "venc1-larb"; - mediatek,infracfg = <&infracfg_ao>; - #power-domain-cells = <0>; - }; - power-domain@MT8195_POWER_DOMAIN_VDOSYS0 { reg = ; clocks = <&topckgen CLK_TOP_CFG_VDO0>, @@ -678,15 +662,25 @@ clocks = <&vdecsys_soc CLK_VDEC_SOC_LARB1>; clock-names = "vdec0-0"; mediatek,infracfg = <&infracfg_ao>; + #address-cells = <1>; + #size-cells = <0>; #power-domain-cells = <0>; - }; - power-domain@MT8195_POWER_DOMAIN_VDEC2 { - reg = ; - clocks = <&vdecsys_core1 CLK_VDEC_CORE1_LARB1>; - clock-names = "vdec2-0"; - mediatek,infracfg = <&infracfg_ao>; - #power-domain-cells = <0>; + power-domain@MT8195_POWER_DOMAIN_VDEC1 { + reg = ; + clocks = <&vdecsys CLK_VDEC_LARB1>; + clock-names = "vdec1-0"; + mediatek,infracfg = <&infracfg_ao>; + #power-domain-cells = <0>; + }; + + power-domain@MT8195_POWER_DOMAIN_VDEC2 { + reg = ; + clocks = <&vdecsys_core1 CLK_VDEC_CORE1_LARB1>; + clock-names = "vdec2-0"; + mediatek,infracfg = <&infracfg_ao>; + #power-domain-cells = <0>; + }; }; power-domain@MT8195_POWER_DOMAIN_VENC { @@ -694,7 +688,17 @@ clocks = <&vencsys CLK_VENC_LARB>; clock-names = "venc0-larb"; mediatek,infracfg = <&infracfg_ao>; + #address-cells = <1>; + #size-cells = <0>; #power-domain-cells = <0>; + + power-domain@MT8195_POWER_DOMAIN_VENC_CORE1 { + reg = ; + clocks = <&vencsys_core1 CLK_VENC_CORE1_LARB>; + clock-names = "venc1-larb"; + mediatek,infracfg = <&infracfg_ao>; + #power-domain-cells = <0>; + }; }; power-domain@MT8195_POWER_DOMAIN_VDOSYS1 { From 4c14c9e2c714b1ce494699b8b5cd85ee66714502 Mon Sep 17 00:00:00 2001 From: Alexey Minnekhanov Date: Tue, 15 Apr 2025 16:01:01 +0300 Subject: [PATCH 2426/2924] arm64: dts: qcom: sdm660-xiaomi-lavender: Add missing SD card detect GPIO [ Upstream commit 2eca6af66709de0d1ba14cdf8b6d200a1337a3a2 ] During initial porting these cd-gpios were missed. Having card detect is beneficial because driver does not need to do polling every second and it can just use IRQ. SD card detection in U-Boot is also fixed by this. Fixes: cf85e9aee210 ("arm64: dts: qcom: sdm660-xiaomi-lavender: Add eMMC and SD") Signed-off-by: Alexey Minnekhanov Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250415130101.1429281-1-alexeymin@postmarketos.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sdm660-xiaomi-lavender.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sdm660-xiaomi-lavender.dts b/arch/arm64/boot/dts/qcom/sdm660-xiaomi-lavender.dts index 7167f75bced3fd..0b4d71c14a7723 100644 --- a/arch/arm64/boot/dts/qcom/sdm660-xiaomi-lavender.dts +++ b/arch/arm64/boot/dts/qcom/sdm660-xiaomi-lavender.dts @@ -404,6 +404,8 @@ &sdhc_2 { status = "okay"; + cd-gpios = <&tlmm 54 GPIO_ACTIVE_HIGH>; + vmmc-supply = <&vreg_l5b_2p95>; vqmmc-supply = <&vreg_l2b_2p95>; }; From c3953771dc2edab676e6481fc780fa906ee2aa9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Wed, 19 Mar 2025 05:38:23 +0000 Subject: [PATCH 2427/2924] firmware: exynos-acpm: fix reading longer results MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 67af3cd813695fd3e6432b0849c453250c4685aa ] ACPM commands that return more than 8 bytes currently don't work correctly, as this driver ignores any such returned bytes. This is evident in at least acpm_pmic_bulk_read(), where up to 8 registers can be read back and those 8 register values are placed starting at &xfer->rxd[8]. The reason is that xfter->rxlen is initialized with the size of a pointer (8 bytes), rather than the size of the byte array that pointer points to (16 bytes) Update the code such that we set the number of bytes expected to be the size of the rx buffer. Note1: While different commands have different lengths rx buffers, we have to specify the same length for all rx buffers since acpm_get_rx() assumes they're all the same length. Note2: The different commands also have different lengths tx buffers, but before switching the code to use the minimum possible length, some more testing would have to be done to ensure this works correctly in all situations. It seems wiser to just apply this fix here without additional logic changes for now. Fixes: a88927b534ba ("firmware: add Exynos ACPM protocol driver") Reviewed-by: Tudor Ambarus Signed-off-by: André Draszik Link: https://lore.kernel.org/r/20250319-acpm-fixes-v2-1-ac2c1bcf322b@linaro.org Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sasha Levin --- drivers/firmware/samsung/exynos-acpm-pmic.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/firmware/samsung/exynos-acpm-pmic.c b/drivers/firmware/samsung/exynos-acpm-pmic.c index 85e90d236da21e..39b33a356ebd24 100644 --- a/drivers/firmware/samsung/exynos-acpm-pmic.c +++ b/drivers/firmware/samsung/exynos-acpm-pmic.c @@ -43,13 +43,13 @@ static inline u32 acpm_pmic_get_bulk(u32 data, unsigned int i) return (data >> (ACPM_PMIC_BULK_SHIFT * i)) & ACPM_PMIC_BULK_MASK; } -static void acpm_pmic_set_xfer(struct acpm_xfer *xfer, u32 *cmd, +static void acpm_pmic_set_xfer(struct acpm_xfer *xfer, u32 *cmd, size_t cmdlen, unsigned int acpm_chan_id) { xfer->txd = cmd; xfer->rxd = cmd; - xfer->txlen = sizeof(cmd); - xfer->rxlen = sizeof(cmd); + xfer->txlen = cmdlen; + xfer->rxlen = cmdlen; xfer->acpm_chan_id = acpm_chan_id; } @@ -71,7 +71,7 @@ int acpm_pmic_read_reg(const struct acpm_handle *handle, int ret; acpm_pmic_init_read_cmd(cmd, type, reg, chan); - acpm_pmic_set_xfer(&xfer, cmd, acpm_chan_id); + acpm_pmic_set_xfer(&xfer, cmd, sizeof(cmd), acpm_chan_id); ret = acpm_do_xfer(handle, &xfer); if (ret) @@ -104,7 +104,7 @@ int acpm_pmic_bulk_read(const struct acpm_handle *handle, return -EINVAL; acpm_pmic_init_bulk_read_cmd(cmd, type, reg, chan, count); - acpm_pmic_set_xfer(&xfer, cmd, acpm_chan_id); + acpm_pmic_set_xfer(&xfer, cmd, sizeof(cmd), acpm_chan_id); ret = acpm_do_xfer(handle, &xfer); if (ret) @@ -144,7 +144,7 @@ int acpm_pmic_write_reg(const struct acpm_handle *handle, int ret; acpm_pmic_init_write_cmd(cmd, type, reg, chan, value); - acpm_pmic_set_xfer(&xfer, cmd, acpm_chan_id); + acpm_pmic_set_xfer(&xfer, cmd, sizeof(cmd), acpm_chan_id); ret = acpm_do_xfer(handle, &xfer); if (ret) @@ -184,7 +184,7 @@ int acpm_pmic_bulk_write(const struct acpm_handle *handle, return -EINVAL; acpm_pmic_init_bulk_write_cmd(cmd, type, reg, chan, count, buf); - acpm_pmic_set_xfer(&xfer, cmd, acpm_chan_id); + acpm_pmic_set_xfer(&xfer, cmd, sizeof(cmd), acpm_chan_id); ret = acpm_do_xfer(handle, &xfer); if (ret) @@ -214,7 +214,7 @@ int acpm_pmic_update_reg(const struct acpm_handle *handle, int ret; acpm_pmic_init_update_cmd(cmd, type, reg, chan, value, mask); - acpm_pmic_set_xfer(&xfer, cmd, acpm_chan_id); + acpm_pmic_set_xfer(&xfer, cmd, sizeof(cmd), acpm_chan_id); ret = acpm_do_xfer(handle, &xfer); if (ret) From f038a682a570888764b238000a723b8a302b99a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Wed, 19 Mar 2025 05:38:24 +0000 Subject: [PATCH 2428/2924] firmware: exynos-acpm: silence EPROBE_DEFER error on boot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 53734383a73888e6d765aa07f4523802fdf1ee10 ] This driver emits error messages when client drivers are trying to get an interface handle to this driver here before this driver has completed _probe(). Given this driver returns -EPROBE_DEFER in that case, this is not an error and shouldn't be emitted to the log, similar to how dev_err_probe() behaves, so just remove them. This change also allows us to simplify the logic around releasing of the acpm_np handle. Fixes: a88927b534ba ("firmware: add Exynos ACPM protocol driver") Signed-off-by: André Draszik Link: https://lore.kernel.org/r/20250319-acpm-fixes-v2-2-ac2c1bcf322b@linaro.org Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sasha Levin --- drivers/firmware/samsung/exynos-acpm.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/firmware/samsung/exynos-acpm.c b/drivers/firmware/samsung/exynos-acpm.c index 15e991b99f5a38..e80cb7a8da8f23 100644 --- a/drivers/firmware/samsung/exynos-acpm.c +++ b/drivers/firmware/samsung/exynos-acpm.c @@ -696,24 +696,17 @@ static const struct acpm_handle *acpm_get_by_phandle(struct device *dev, return ERR_PTR(-ENODEV); pdev = of_find_device_by_node(acpm_np); - if (!pdev) { - dev_err(dev, "Cannot find device node %s\n", acpm_np->name); - of_node_put(acpm_np); - return ERR_PTR(-EPROBE_DEFER); - } - of_node_put(acpm_np); + if (!pdev) + return ERR_PTR(-EPROBE_DEFER); acpm = platform_get_drvdata(pdev); if (!acpm) { - dev_err(dev, "Cannot get drvdata from %s\n", - dev_name(&pdev->dev)); platform_device_put(pdev); return ERR_PTR(-EPROBE_DEFER); } if (!try_module_get(pdev->dev.driver->owner)) { - dev_err(dev, "Cannot get module reference.\n"); platform_device_put(pdev); return ERR_PTR(-EPROBE_DEFER); } From 4b795ceaebbae821480b68323f60e38adda0eb9e Mon Sep 17 00:00:00 2001 From: Pin-yen Lin Date: Wed, 23 Apr 2025 12:03:39 +0800 Subject: [PATCH 2429/2924] arm64: dts: mt8183: Add port node to mt8183.dtsi [ Upstream commit d15059f7be59f887c1a370037cc2337c2ff2ad56 ] Add the port node to fix the binding schema check. Also update mt8183-kukui to reference the new port node. Fixes: 88ec840270e6 ("arm64: dts: mt8183: Add dsi node") Fixes: 27eaf34df364 ("arm64: dts: mt8183: config dsi node") Signed-off-by: Pin-yen Lin Link: https://lore.kernel.org/r/20250423040354.2847447-1-treapking@chromium.org Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi | 10 +++------- arch/arm64/boot/dts/mediatek/mt8183.dtsi | 4 ++++ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi index e1495f1900a7b4..f9ca6b3720e915 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi @@ -259,14 +259,10 @@ }; }; }; +}; - ports { - port { - dsi_out: endpoint { - remote-endpoint = <&panel_in>; - }; - }; - }; +&dsi_out { + remote-endpoint = <&panel_in>; }; &gic { diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi index 0aa34e5bbaaa87..3c1fe80e64b9c5 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi @@ -1836,6 +1836,10 @@ phys = <&mipi_tx0>; phy-names = "dphy"; status = "disabled"; + + port { + dsi_out: endpoint { }; + }; }; dpi0: dpi@14015000 { From 4a730a7598d9c9c7db0a980695203172b977e691 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Tue, 15 Apr 2025 20:01:27 -0500 Subject: [PATCH 2430/2924] arm64: dts: imx8mm-beacon: Fix RTC capacitive load [ Upstream commit 2e98d456666d63f897ba153210bcef9d78ba0f3a ] Although not noticeable when used every day, the RTC appears to drift when left to sit over time. This is due to the capacitive load not being properly set. Fix RTC drift by correcting the capacitive load setting from 7000 to 12500, which matches the actual hardware configuration. Fixes: 593816fa2f35 ("arm64: dts: imx: Add Beacon i.MX8m-Mini development kit") Signed-off-by: Adam Ford Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi index 62ed64663f4952..9ba0cb89fa24e0 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi @@ -233,6 +233,7 @@ rtc: rtc@51 { compatible = "nxp,pcf85263"; reg = <0x51>; + quartz-load-femtofarads = <12500>; }; }; From 09856f1dd862a2ce00c589fdf11ed2eb96373027 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Tue, 15 Apr 2025 20:01:28 -0500 Subject: [PATCH 2431/2924] arm64: dts: imx8mn-beacon: Fix RTC capacitive load [ Upstream commit c3f03bec30efd5082b55876846d57b5d17dae7b9 ] Although not noticeable when used every day, the RTC appears to drift when left to sit over time. This is due to the capacitive load not being properly set. Fix RTC drift by correcting the capacitive load setting from 7000 to 12500, which matches the actual hardware configuration. Fixes: 36ca3c8ccb53 ("arm64: dts: imx: Add Beacon i.MX8M Nano development kit") Signed-off-by: Adam Ford Reviewed-by: Frank Li Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8mn-beacon-som.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mn-beacon-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-beacon-som.dtsi index 2a64115eebf1c6..bb11590473a4c7 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-beacon-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn-beacon-som.dtsi @@ -242,6 +242,7 @@ rtc: rtc@51 { compatible = "nxp,pcf85263"; reg = <0x51>; + quartz-load-femtofarads = <12500>; }; }; From e4564e711d8e9fee1aafc924ed4011164792d5d1 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Tue, 15 Apr 2025 20:01:29 -0500 Subject: [PATCH 2432/2924] arm64: dts: imx8mp-beacon: Fix RTC capacitive load [ Upstream commit 6821ee17537938e919e8b86a541aae451f73165b ] Although not noticeable when used every day, the RTC appears to drift when left to sit over time. This is due to the capacitive load not being properly set. Fix RTC drift by correcting the capacitive load setting from 7000 to 12500, which matches the actual hardware configuration. Fixes: 25a5ccdce767 ("arm64: dts: freescale: Introduce imx8mp-beacon-kit") Signed-off-by: Adam Ford Reviewed-by: Frank Li Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8mp-beacon-som.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-beacon-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-beacon-som.dtsi index 15f7ab58db36cc..88561df70d03ac 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-beacon-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-beacon-som.dtsi @@ -257,6 +257,7 @@ rtc: rtc@51 { compatible = "nxp,pcf85263"; reg = <0x51>; + quartz-load-femtofarads = <12500>; }; }; From 876cbf18c1ad2e81f42ec88f5d710b3f524153b8 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Tue, 15 Apr 2025 20:01:30 -0500 Subject: [PATCH 2433/2924] arm64: dts: imx8mm-beacon: Set SAI5 MCLK direction to output for HDMI audio [ Upstream commit 8c716f80dfe8cd6ed9a2696847cea1affeeff6ff ] The HDMI bridge chip fails to generate an audio source due to the SAI5 master clock (MCLK) direction not being set to output. This prevents proper clocking of the HDMI audio interface. Add the `fsl,sai-mclk-direction-output` property to the SAI5 node to ensure the MCLK is driven by the SoC, resolving the HDMI sound issue. Fixes: 8ad7d14d99f3 ("arm64: dts: imx8mm-beacon: Add HDMI video with sound") Signed-off-by: Adam Ford Reviewed-by: Frank Li Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8mm-beacon-kit.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-beacon-kit.dts b/arch/arm64/boot/dts/freescale/imx8mm-beacon-kit.dts index 97ff1ddd631888..734a75198f06e0 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-beacon-kit.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-beacon-kit.dts @@ -124,6 +124,7 @@ assigned-clock-parents = <&clk IMX8MM_AUDIO_PLL1_OUT>; assigned-clock-rates = <24576000>; #sound-dai-cells = <0>; + fsl,sai-mclk-direction-output; status = "okay"; }; From 28e1021edea10fa9a7502fb7d0e26812b0d539f9 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Tue, 15 Apr 2025 20:01:31 -0500 Subject: [PATCH 2434/2924] arm64: dts: imx8mn-beacon: Set SAI5 MCLK direction to output for HDMI audio [ Upstream commit a747c4dd2a60c4d0179b372032a4b98548135096 ] The HDMI bridge chip fails to generate an audio source due to the SAI5 master clock (MCLK) direction not being set to output. This prevents proper clocking of the HDMI audio interface. Add the `fsl,sai-mclk-direction-output` property to the SAI5 node to ensure the MCLK is driven by the SoC, resolving the HDMI sound issue. Fixes: 1d6880ceef43 ("arm64: dts: imx8mn-beacon: Add HDMI video with sound") Signed-off-by: Adam Ford Reviewed-by: Frank Li Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8mn-beacon-kit.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mn-beacon-kit.dts b/arch/arm64/boot/dts/freescale/imx8mn-beacon-kit.dts index 1df5ceb1138793..37fc5ed98d7f61 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-beacon-kit.dts +++ b/arch/arm64/boot/dts/freescale/imx8mn-beacon-kit.dts @@ -124,6 +124,7 @@ assigned-clock-parents = <&clk IMX8MN_AUDIO_PLL1_OUT>; assigned-clock-rates = <24576000>; #sound-dai-cells = <0>; + fsl,sai-mclk-direction-output; status = "okay"; }; From 6b08c4132e27eec68b43d927d4abc984ba593df9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Fri, 2 May 2025 11:32:10 -0400 Subject: [PATCH 2435/2924] arm64: dts: mediatek: mt6357: Drop regulator-fixed compatibles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d77e89b7b03fb945b4353f2dcc4a70b34baa7bcb ] Some of the regulators in the MT6357 PMIC dtsi have compatible set to regulator-fixed, even though they don't serve any purpose: all those regulators are handled as a whole by the mt6357-regulator driver. In fact this is the only dtsi in this family of chips where this is the case: mt6359 and mt6358 don't have any such compatibles. A side-effect caused by this is that the DT kselftest, which is supposed to identify nodes with compatibles that can be probed, but haven't, shows these nodes as failures. Remove the useless compatibles to move the dtsi in line with the others in its family and fix the DT kselftest failures. Fixes: 55749bb478f8 ("arm64: dts: mediatek: add mt6357 device-tree") Signed-off-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20250502-mt6357-regulator-fixed-compatibles-removal-v1-1-a582c16743fe@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt6357.dtsi | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt6357.dtsi b/arch/arm64/boot/dts/mediatek/mt6357.dtsi index 5fafa842d312f3..dca4e5c3d8e210 100644 --- a/arch/arm64/boot/dts/mediatek/mt6357.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt6357.dtsi @@ -60,7 +60,6 @@ }; mt6357_vfe28_reg: ldo-vfe28 { - compatible = "regulator-fixed"; regulator-name = "vfe28"; regulator-min-microvolt = <2800000>; regulator-max-microvolt = <2800000>; @@ -75,7 +74,6 @@ }; mt6357_vrf18_reg: ldo-vrf18 { - compatible = "regulator-fixed"; regulator-name = "vrf18"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; @@ -83,7 +81,6 @@ }; mt6357_vrf12_reg: ldo-vrf12 { - compatible = "regulator-fixed"; regulator-name = "vrf12"; regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1200000>; @@ -112,7 +109,6 @@ }; mt6357_vcn28_reg: ldo-vcn28 { - compatible = "regulator-fixed"; regulator-name = "vcn28"; regulator-min-microvolt = <2800000>; regulator-max-microvolt = <2800000>; @@ -120,7 +116,6 @@ }; mt6357_vcn18_reg: ldo-vcn18 { - compatible = "regulator-fixed"; regulator-name = "vcn18"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; @@ -142,7 +137,6 @@ }; mt6357_vcamio_reg: ldo-vcamio18 { - compatible = "regulator-fixed"; regulator-name = "vcamio"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; @@ -175,7 +169,6 @@ }; mt6357_vaux18_reg: ldo-vaux18 { - compatible = "regulator-fixed"; regulator-name = "vaux18"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; @@ -183,7 +176,6 @@ }; mt6357_vaud28_reg: ldo-vaud28 { - compatible = "regulator-fixed"; regulator-name = "vaud28"; regulator-min-microvolt = <2800000>; regulator-max-microvolt = <2800000>; @@ -191,7 +183,6 @@ }; mt6357_vio28_reg: ldo-vio28 { - compatible = "regulator-fixed"; regulator-name = "vio28"; regulator-min-microvolt = <2800000>; regulator-max-microvolt = <2800000>; @@ -199,7 +190,6 @@ }; mt6357_vio18_reg: ldo-vio18 { - compatible = "regulator-fixed"; regulator-name = "vio18"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; From 6b0b2645cfb923dec6d6eadafbf0e74b2b85ebdd Mon Sep 17 00:00:00 2001 From: Louis-Alexis Eyraud Date: Fri, 2 May 2025 15:17:19 +0200 Subject: [PATCH 2436/2924] arm64: dts: mediatek: mt8390-genio-common: Set ssusb2 default dual role mode to host [ Upstream commit f9167f15dd4e70b124023a2f7ba2b09401b3b6ff ] On the Mediatek Genio 510-EVK and 700-EVK boards, ssusb2 controller is one but has two ports: one is routed to the M.2 slot, the other is on the RPi header who does support full OTG. Since Mediatek Genio 700-EVK USB support was added, dual role mode property is set to otg for ssusb2. This config prevents the M.2 Wifi/Bluetooth module, present on those boards and exposing Bluetooth as an USB device to be properly detected at startup as the default role is device. To keep the OTG functionality and make the M.2 module be detected at the same time, add role-switch-default-mode property set to host and also fix the polarity of GPIO associated to the USB connector, so the ssusb2 controller role is properly set to host when the other port is unused. Fixes: 1afaeca17238 ("arm64: dts: mediatek: mt8390-genio-700: Add USB, TypeC Controller, MUX") Signed-off-by: Louis-Alexis Eyraud Link: https://lore.kernel.org/r/20250502-mtk-genio-510-700-fix-bt-detection-v2-1-870aa2145480@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- .../arm64/boot/dts/mediatek/mt8390-genio-common.dtsi | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8390-genio-common.dtsi b/arch/arm64/boot/dts/mediatek/mt8390-genio-common.dtsi index 60139e6dffd8e0..6a75b230282eda 100644 --- a/arch/arm64/boot/dts/mediatek/mt8390-genio-common.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8390-genio-common.dtsi @@ -1199,8 +1199,18 @@ }; &ssusb2 { + /* + * the ssusb2 controller is one but we got two ports : one is routed + * to the M.2 slot, the other is on the RPi header who does support + * full OTG. + * As the controller is shared between them, the role switch default + * mode is set to host to make any peripheral inserted in the M.2 + * slot (i.e BT/WIFI module) be detected when the other port is + * unused. + */ dr_mode = "otg"; maximum-speed = "high-speed"; + role-switch-default-mode = "host"; usb-role-switch; vusb33-supply = <&mt6359_vusb_ldo_reg>; wakeup-source; @@ -1211,7 +1221,7 @@ connector { compatible = "gpio-usb-b-connector", "usb-b-connector"; type = "micro"; - id-gpios = <&pio 89 GPIO_ACTIVE_HIGH>; + id-gpios = <&pio 89 GPIO_ACTIVE_LOW>; vbus-supply = <&usb_p2_vbus>; }; }; From 3288529e52e6cc5bc334748d22e4e120b7328cc0 Mon Sep 17 00:00:00 2001 From: Julien Massot Date: Mon, 5 May 2025 15:23:39 +0200 Subject: [PATCH 2437/2924] arm64: dts: mt6359: Add missing 'compatible' property to regulators node [ Upstream commit 1fe38d2a19950fa6dbc384ee8967c057aef9faf4 ] The 'compatible' property is required by the 'mfd/mediatek,mt6397.yaml' binding. Add it to fix the following dtb-check error: mediatek/mt8395-radxa-nio-12l.dtb: pmic: regulators: 'compatible' is a required property Fixes: 3b7d143be4b7 ("arm64: dts: mt6359: add PMIC MT6359 related nodes") Signed-off-by: Julien Massot Link: https://lore.kernel.org/r/20250505-mt8395-dtb-errors-v1-3-9c4714dcdcdb@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt6359.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/mediatek/mt6359.dtsi b/arch/arm64/boot/dts/mediatek/mt6359.dtsi index 7b10f9c59819a9..0c479404b3fe3a 100644 --- a/arch/arm64/boot/dts/mediatek/mt6359.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt6359.dtsi @@ -20,6 +20,8 @@ }; regulators { + compatible = "mediatek,mt6359-regulator"; + mt6359_vs1_buck_reg: buck_vs1 { regulator-name = "vs1"; regulator-min-microvolt = <800000>; From d94e9e8f94ffc32087fe2982bdec4426be914d40 Mon Sep 17 00:00:00 2001 From: Alexey Minnekhanov Date: Sun, 4 May 2025 14:51:19 +0300 Subject: [PATCH 2438/2924] arm64: dts: qcom: sdm660-lavender: Add missing USB phy supply [ Upstream commit dbf62a117a1b7f605a98dd1fd1fd6c85ec324ea0 ] Fixes the following dtbs check error: phy@c012000: 'vdda-pll-supply' is a required property Fixes: e5d3e752b050e ("arm64: dts: qcom: sdm660-xiaomi-lavender: Add USB") Signed-off-by: Alexey Minnekhanov Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250504115120.1432282-3-alexeymin@postmarketos.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sdm660-xiaomi-lavender.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/qcom/sdm660-xiaomi-lavender.dts b/arch/arm64/boot/dts/qcom/sdm660-xiaomi-lavender.dts index 0b4d71c14a7723..a9926ad6c6f9f5 100644 --- a/arch/arm64/boot/dts/qcom/sdm660-xiaomi-lavender.dts +++ b/arch/arm64/boot/dts/qcom/sdm660-xiaomi-lavender.dts @@ -107,6 +107,7 @@ status = "okay"; vdd-supply = <&vreg_l1b_0p925>; + vdda-pll-supply = <&vreg_l10a_1p8>; vdda-phy-dpdm-supply = <&vreg_l7b_3p125>; }; From 5a95d17f5928683bda30afc5af2096ad458aa944 Mon Sep 17 00:00:00 2001 From: Alexey Minnekhanov Date: Sun, 4 May 2025 14:51:20 +0300 Subject: [PATCH 2439/2924] arm64: dts: qcom: sda660-ifc6560: Fix dt-validate warning [ Upstream commit f5110806b41eaa0eb0ab1bf2787876a580c6246c ] If you remove clocks property, you should remove clock-names, too. Fixes warning with dtbs check: 'clocks' is a dependency of 'clock-names' Fixes: 34279d6e3f32c ("arm64: dts: qcom: sdm660: Add initial Inforce IFC6560 board support") Signed-off-by: Alexey Minnekhanov Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250504115120.1432282-4-alexeymin@postmarketos.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sda660-inforce-ifc6560.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sda660-inforce-ifc6560.dts b/arch/arm64/boot/dts/qcom/sda660-inforce-ifc6560.dts index d402f4c85b11d1..ee696317f78cc3 100644 --- a/arch/arm64/boot/dts/qcom/sda660-inforce-ifc6560.dts +++ b/arch/arm64/boot/dts/qcom/sda660-inforce-ifc6560.dts @@ -175,6 +175,7 @@ * BAM DMA interconnects support is in place. */ /delete-property/ clocks; + /delete-property/ clock-names; }; &blsp1_uart2 { @@ -187,6 +188,7 @@ * BAM DMA interconnects support is in place. */ /delete-property/ clocks; + /delete-property/ clock-names; }; &blsp2_uart1 { From 8abb25127ffd3757b88773a6b5b193d235722ad7 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Tue, 22 Apr 2025 14:25:22 +0300 Subject: [PATCH 2440/2924] arm64: dts: qcom: x1e001de-devkit: Describe USB retimers resets pin configs [ Upstream commit f76fdcd2550991c854a698a9f881b1579455fc0a ] Currently, on the X Elite Devkit, the pin configuration of the reset gpios for all three PS8830 USB retimers are left configured by the bootloader. Fix that by describing their pin configuration. Fixes: 019e1ee32fec ("arm64: dts: qcom: x1e001de-devkit: Enable external DP support") Reviewed-by: Konrad Dybcio Signed-off-by: Abel Vesa Reviewed-by: Johan Hovold Link: https://lore.kernel.org/r/20250422-x1e001de-devkit-dts-fix-retimer-gpios-v2-1-0129c4f2b6d7@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/x1e001de-devkit.dts | 32 ++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/x1e001de-devkit.dts b/arch/arm64/boot/dts/qcom/x1e001de-devkit.dts index f5063a0df9fbfa..8f4482ade0f982 100644 --- a/arch/arm64/boot/dts/qcom/x1e001de-devkit.dts +++ b/arch/arm64/boot/dts/qcom/x1e001de-devkit.dts @@ -790,6 +790,9 @@ reset-gpios = <&tlmm 185 GPIO_ACTIVE_HIGH>; + pinctrl-0 = <&rtmr2_default>; + pinctrl-names = "default"; + orientation-switch; retimer-switch; @@ -845,6 +848,9 @@ reset-gpios = <&pm8550_gpios 10 GPIO_ACTIVE_HIGH>; + pinctrl-0 = <&rtmr0_default>; + pinctrl-names = "default"; + retimer-switch; orientation-switch; @@ -900,6 +906,9 @@ reset-gpios = <&tlmm 176 GPIO_ACTIVE_HIGH>; + pinctrl-0 = <&rtmr1_default>; + pinctrl-names = "default"; + retimer-switch; orientation-switch; @@ -1018,6 +1027,15 @@ }; &pm8550_gpios { + rtmr0_default: rtmr0-reset-n-active-state { + pins = "gpio10"; + function = "normal"; + power-source = <1>; /* 1.8 V */ + bias-disable; + input-disable; + output-enable; + }; + usb0_3p3_reg_en: usb0-3p3-reg-en-state { pins = "gpio11"; function = "normal"; @@ -1205,6 +1223,20 @@ }; }; + rtmr1_default: rtmr1-reset-n-active-state { + pins = "gpio176"; + function = "gpio"; + drive-strength = <2>; + bias-disable; + }; + + rtmr2_default: rtmr2-reset-n-active-state { + pins = "gpio185"; + function = "gpio"; + drive-strength = <2>; + bias-disable; + }; + rtmr1_1p15_reg_en: rtmr1-1p15-reg-en-state { pins = "gpio188"; function = "gpio"; From d9219a2f23a7afdf6e1db55fc533d2d999f58556 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Tue, 22 Apr 2025 14:25:23 +0300 Subject: [PATCH 2441/2924] arm64: dts: qcom: x1e001de-devkit: Fix pin config for USB0 retimer vregs [ Upstream commit 635d0c8edf26994dc1dcbc09add9423aa61869b0 ] Describe the missing power source, bias and direction for each of the USB0 retimer gpio-controlled voltage regulators related pin configuration. Fixes: 019e1ee32fec ("arm64: dts: qcom: x1e001de-devkit: Enable external DP support") Reviewed-by: Konrad Dybcio Signed-off-by: Abel Vesa Reviewed-by: Johan Hovold Link: https://lore.kernel.org/r/20250422-x1e001de-devkit-dts-fix-retimer-gpios-v2-2-0129c4f2b6d7@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/x1e001de-devkit.dts | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/x1e001de-devkit.dts b/arch/arm64/boot/dts/qcom/x1e001de-devkit.dts index 8f4482ade0f982..3cfe42ec089141 100644 --- a/arch/arm64/boot/dts/qcom/x1e001de-devkit.dts +++ b/arch/arm64/boot/dts/qcom/x1e001de-devkit.dts @@ -1039,6 +1039,10 @@ usb0_3p3_reg_en: usb0-3p3-reg-en-state { pins = "gpio11"; function = "normal"; + power-source = <1>; /* 1.8 V */ + bias-disable; + input-disable; + output-enable; }; }; @@ -1046,6 +1050,10 @@ usb0_pwr_1p15_en: usb0-pwr-1p15-en-state { pins = "gpio8"; function = "normal"; + power-source = <1>; /* 1.8 V */ + bias-disable; + input-disable; + output-enable; }; }; @@ -1053,6 +1061,10 @@ usb0_1p8_reg_en: usb0-1p8-reg-en-state { pins = "gpio8"; function = "normal"; + power-source = <1>; /* 1.8 V */ + bias-disable; + input-disable; + output-enable; }; }; From ac21d13b6629b28a7cca3f53939a84137d13a01e Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Tue, 6 May 2025 20:56:55 +0100 Subject: [PATCH 2442/2924] arm64: dts: rockchip: Add vcc-supply to SPI flash on rk3566-rock3c [ Upstream commit a706a593cb19796f31d3a888423ef1a71885ae72 ] As described in the radxa_rock_3c_v1400_schematic.pdf, the SPI Flash's VCC connector is connected to VCCIO_FLASH and according to the that same schematic, that belongs to the VCC_1V8 power source. This fixes the following warning: spi-nor spi4.0: supply vcc not found, using dummy regulator Fixes: ee219017ddb5 ("arm64: dts: rockchip: Add Radxa ROCK 3C") Signed-off-by: Peter Robinson Link: https://lore.kernel.org/r/20250506195702.593044-1-pbrobinson@gmail.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3566-rock-3c.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3566-rock-3c.dts b/arch/arm64/boot/dts/rockchip/rk3566-rock-3c.dts index 53e71528e4c4c7..6224d72813e593 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-rock-3c.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-rock-3c.dts @@ -636,6 +636,7 @@ spi-max-frequency = <104000000>; spi-rx-bus-width = <4>; spi-tx-bus-width = <1>; + vcc-supply = <&vcc_1v8>; }; }; From e31c0c425b70db8cf63c5f43da3a02a02a7de5b0 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 5 May 2025 21:24:16 +0100 Subject: [PATCH 2443/2924] arm64: dts: allwinner: a100: set maximum MMC frequency [ Upstream commit d8f10550448b03d3c5c6d9392119205c65ebfc89 ] The manual for the Allwinner A133 SoC mentions that the maximum supported MMC frequency is 150 MHz, for all of the MMC devices. Describe that in the DT entry, to help drivers setting the right interface frequency. Fixes: fcfbb8d9ec58 ("arm64: allwinner: a100: Add MMC related nodes") Signed-off-by: Andre Przywara Link: https://patch.msgid.link/20250505202416.23753-1-andre.przywara@arm.com Signed-off-by: Chen-Yu Tsai Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/allwinner/sun50i-a100.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a100.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a100.dtsi index f9f6fea03b7446..bd366389b2389d 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a100.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-a100.dtsi @@ -252,6 +252,7 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&mmc0_pins>; + max-frequency = <150000000>; status = "disabled"; #address-cells = <1>; #size-cells = <0>; @@ -267,6 +268,7 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&mmc1_pins>; + max-frequency = <150000000>; status = "disabled"; #address-cells = <1>; #size-cells = <0>; @@ -282,6 +284,7 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&mmc2_pins>; + max-frequency = <150000000>; status = "disabled"; #address-cells = <1>; #size-cells = <0>; From 311c364b196e075491cf760ee8c2cda27534a4a0 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Tue, 6 May 2025 23:25:28 +0100 Subject: [PATCH 2444/2924] arm64: dts: rockchip: Update eMMC for NanoPi R5 series [ Upstream commit 8eca9e979a1efbcc3d090f6eb3f4da621e7c87e0 ] Add the 3.3v and 1.8v regulators that are connected to the eMMC on the R5 series devices, as well as adding the eMMC data strobe, and enable eMMC HS200 mode as the Foresee FEMDNN0xxG-A3A55 modules support it. Fixes: c8ec73b05a95d ("arm64: dts: rockchip: create common dtsi for NanoPi R5 series") Signed-off-by: Peter Robinson Reviewed-by: Diederik de Haas Link: https://lore.kernel.org/r/20250506222531.625157-1-pbrobinson@gmail.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dtsi | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dtsi b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dtsi index 00c479aa18711a..a28b4af10d13a2 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dtsi @@ -486,9 +486,12 @@ &sdhci { bus-width = <8>; max-frequency = <200000000>; + mmc-hs200-1_8v; non-removable; pinctrl-names = "default"; - pinctrl-0 = <&emmc_bus8 &emmc_clk &emmc_cmd>; + pinctrl-0 = <&emmc_bus8 &emmc_clk &emmc_cmd &emmc_datastrobe>; + vmmc-supply = <&vcc_3v3>; + vqmmc-supply = <&vcc_1v8>; status = "okay"; }; From cf495b1aae81001f26393cd95c94b06f6d6f4bdc Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 7 May 2025 15:31:55 +0200 Subject: [PATCH 2445/2924] arm64: dts: renesas: white-hawk-single: Improve Ethernet TSN description MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8ffec7d62c6956199f442dac3b2d5d02231c3977 ] - Add the missing "ethernet3" alias for the Ethernet TSN port, so U-Boot will fill its local-mac-address property based on the "eth3addr" environment variable (if set), avoiding a random MAC address being assigned by the OS, - Rename the numerical Ethernet PHY label to "tsn0_phy", to avoid future conflicts, and for consistency with the "avbN_phy" labels. Fixes: 3d8e475bd7a724a9 ("arm64: dts: renesas: white-hawk-single: Wire-up Ethernet TSN") Signed-off-by: Geert Uytterhoeven Reviewed-by: Niklas Söderlund Link: https://lore.kernel.org/367f10a18aa196ff1c96734dd9bd5634b312c421.1746624368.git.geert+renesas@glider.be Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/renesas/white-hawk-single.dtsi | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/renesas/white-hawk-single.dtsi b/arch/arm64/boot/dts/renesas/white-hawk-single.dtsi index 20e8232f2f3234..976a3ab44e5a52 100644 --- a/arch/arm64/boot/dts/renesas/white-hawk-single.dtsi +++ b/arch/arm64/boot/dts/renesas/white-hawk-single.dtsi @@ -11,6 +11,10 @@ / { model = "Renesas White Hawk Single board"; compatible = "renesas,white-hawk-single"; + + aliases { + ethernet3 = &tsn0; + }; }; &hscif0 { @@ -53,7 +57,7 @@ pinctrl-0 = <&tsn0_pins>; pinctrl-names = "default"; phy-mode = "rgmii"; - phy-handle = <&phy3>; + phy-handle = <&tsn0_phy>; status = "okay"; mdio { @@ -63,7 +67,7 @@ reset-gpios = <&gpio1 23 GPIO_ACTIVE_LOW>; reset-post-delay-us = <4000>; - phy3: ethernet-phy@0 { + tsn0_phy: ethernet-phy@0 { compatible = "ethernet-phy-id002b.0980", "ethernet-phy-ieee802.3-c22"; reg = <0>; From 43c60d58533367ed9fa6ed944cc1a41bf842c704 Mon Sep 17 00:00:00 2001 From: Aaron Kling Date: Mon, 28 Apr 2025 20:51:47 -0500 Subject: [PATCH 2446/2924] arm64: tegra: Drop remaining serial clock-names and reset-names [ Upstream commit 4cd763297c2203c6ba587d7d4a9105f96597b998 ] The referenced commit only removed some of the names, missing all that weren't in use at the time. The commit removes the rest. Fixes: 71de0a054d0e ("arm64: tegra: Drop serial clock-names and reset-names") Signed-off-by: Aaron Kling Link: https://lore.kernel.org/r/20250428-tegra-serial-fixes-v1-1-4f47c5d85bf6@gmail.com Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/nvidia/tegra186.dtsi | 12 ------------ arch/arm64/boot/dts/nvidia/tegra194.dtsi | 12 ------------ 2 files changed, 24 deletions(-) diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi b/arch/arm64/boot/dts/nvidia/tegra186.dtsi index 2b3bb5d0af17bd..f0b7949df92c05 100644 --- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi @@ -621,9 +621,7 @@ reg-shift = <2>; interrupts = ; clocks = <&bpmp TEGRA186_CLK_UARTB>; - clock-names = "serial"; resets = <&bpmp TEGRA186_RESET_UARTB>; - reset-names = "serial"; status = "disabled"; }; @@ -633,9 +631,7 @@ reg-shift = <2>; interrupts = ; clocks = <&bpmp TEGRA186_CLK_UARTD>; - clock-names = "serial"; resets = <&bpmp TEGRA186_RESET_UARTD>; - reset-names = "serial"; status = "disabled"; }; @@ -645,9 +641,7 @@ reg-shift = <2>; interrupts = ; clocks = <&bpmp TEGRA186_CLK_UARTE>; - clock-names = "serial"; resets = <&bpmp TEGRA186_RESET_UARTE>; - reset-names = "serial"; status = "disabled"; }; @@ -657,9 +651,7 @@ reg-shift = <2>; interrupts = ; clocks = <&bpmp TEGRA186_CLK_UARTF>; - clock-names = "serial"; resets = <&bpmp TEGRA186_RESET_UARTF>; - reset-names = "serial"; status = "disabled"; }; @@ -1236,9 +1228,7 @@ reg-shift = <2>; interrupts = ; clocks = <&bpmp TEGRA186_CLK_UARTC>; - clock-names = "serial"; resets = <&bpmp TEGRA186_RESET_UARTC>; - reset-names = "serial"; status = "disabled"; }; @@ -1248,9 +1238,7 @@ reg-shift = <2>; interrupts = ; clocks = <&bpmp TEGRA186_CLK_UARTG>; - clock-names = "serial"; resets = <&bpmp TEGRA186_RESET_UARTG>; - reset-names = "serial"; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi index 33f92b77cd9d9e..c3695077478514 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi @@ -766,9 +766,7 @@ reg-shift = <2>; interrupts = ; clocks = <&bpmp TEGRA194_CLK_UARTD>; - clock-names = "serial"; resets = <&bpmp TEGRA194_RESET_UARTD>; - reset-names = "serial"; status = "disabled"; }; @@ -778,9 +776,7 @@ reg-shift = <2>; interrupts = ; clocks = <&bpmp TEGRA194_CLK_UARTE>; - clock-names = "serial"; resets = <&bpmp TEGRA194_RESET_UARTE>; - reset-names = "serial"; status = "disabled"; }; @@ -790,9 +786,7 @@ reg-shift = <2>; interrupts = ; clocks = <&bpmp TEGRA194_CLK_UARTF>; - clock-names = "serial"; resets = <&bpmp TEGRA194_RESET_UARTF>; - reset-names = "serial"; status = "disabled"; }; @@ -817,9 +811,7 @@ reg-shift = <2>; interrupts = ; clocks = <&bpmp TEGRA194_CLK_UARTH>; - clock-names = "serial"; resets = <&bpmp TEGRA194_RESET_UARTH>; - reset-names = "serial"; status = "disabled"; }; @@ -1616,9 +1608,7 @@ reg-shift = <2>; interrupts = ; clocks = <&bpmp TEGRA194_CLK_UARTC>; - clock-names = "serial"; resets = <&bpmp TEGRA194_RESET_UARTC>; - reset-names = "serial"; status = "disabled"; }; @@ -1628,9 +1618,7 @@ reg-shift = <2>; interrupts = ; clocks = <&bpmp TEGRA194_CLK_UARTG>; - clock-names = "serial"; resets = <&bpmp TEGRA194_RESET_UARTG>; - reset-names = "serial"; status = "disabled"; }; From f9650f682b50efe61cd97b2acb0cc9ffd451b265 Mon Sep 17 00:00:00 2001 From: Aaron Kling Date: Sun, 20 Apr 2025 09:35:37 -0500 Subject: [PATCH 2447/2924] arm64: tegra: Add uartd serial alias for Jetson TX1 module [ Upstream commit dfb25484bd73c8590954ead6fd58a1587ba3bbc5 ] If a serial-tegra interface does not have an alias, the driver fails to probe with an error: serial-tegra 70006300.serial: failed to get alias id, errno -19 This prevents the bluetooth device from being accessible. Fixes: 6eba6471bbb7 ("arm64: tegra: Wire up Bluetooth on Jetson TX1 module") Signed-off-by: Aaron Kling Reviewed-by: Tomasz Maciej Nowak Link: https://lore.kernel.org/r/20250420-tx1-bt-v1-1-153cba105a4e@gmail.com Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi index 9b9d1d15b0c7ea..1bb1f9640a800a 100644 --- a/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi @@ -11,6 +11,7 @@ rtc0 = "/i2c@7000d000/pmic@3c"; rtc1 = "/rtc@7000e000"; serial0 = &uarta; + serial3 = &uartd; }; chosen { From 8ac35b8fbe100659543e5939f76954b82cf90dce Mon Sep 17 00:00:00 2001 From: Prasanth Babu Mantena Date: Wed, 7 May 2025 10:37:01 +0530 Subject: [PATCH 2448/2924] arm64: dts: ti: k3-j721e-common-proc-board: Enable OSPI1 on J721E [ Upstream commit 6b8deb2ff0d31848c43a73f6044e69ba9276b3ec ] J721E SoM has MT25QU512AB Serial NOR flash connected to OSPI1 controller. Enable ospi1 node in device tree. Fixes: 73676c480b72 ("arm64: dts: ti: k3-j721e: Enable OSPI nodes at the board level") Signed-off-by: Prasanth Babu Mantena Link: https://lore.kernel.org/r/20250507050701.3007209-1-p-mantena@ti.com Signed-off-by: Nishanth Menon Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts b/arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts index 4421852161dd65..da4e0cacd6d72d 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts +++ b/arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts @@ -573,6 +573,7 @@ &ospi1 { pinctrl-names = "default"; pinctrl-0 = <&mcu_fss0_ospi1_pins_default>; + status = "okay"; flash@0 { compatible = "jedec,spi-nor"; From 6b05e92c02c7634d0ac7219354b8f428dbaf9ff9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Barnab=C3=A1s=20Cz=C3=A9m=C3=A1n?= Date: Mon, 21 Apr 2025 04:04:17 +0200 Subject: [PATCH 2449/2924] soc: qcom: smp2p: Fix fallback to qcom,ipc parse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 421777a02bbd9cdabe0ae05a69ee06253150589d ] mbox_request_channel() returning value was changed in case of error. It uses returning value of of_parse_phandle_with_args(). It is returning with -ENOENT instead of -ENODEV when no mboxes property exists. Fixes: 24fdd5074b20 ("mailbox: use error ret code of of_parse_phandle_with_args()") Signed-off-by: Barnabás Czémán Reviewed-by: Stephan Gerhold Tested-by: Stephan Gerhold # msm8939 Link: https://lore.kernel.org/r/20250421-fix-qcom-smd-v1-2-574d071d3f27@mainlining.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/soc/qcom/smp2p.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/qcom/smp2p.c b/drivers/soc/qcom/smp2p.c index a3e88ced328a91..c9199d6fbe26ec 100644 --- a/drivers/soc/qcom/smp2p.c +++ b/drivers/soc/qcom/smp2p.c @@ -575,7 +575,7 @@ static int qcom_smp2p_probe(struct platform_device *pdev) smp2p->mbox_client.knows_txdone = true; smp2p->mbox_chan = mbox_request_channel(&smp2p->mbox_client, 0); if (IS_ERR(smp2p->mbox_chan)) { - if (PTR_ERR(smp2p->mbox_chan) != -ENODEV) + if (PTR_ERR(smp2p->mbox_chan) != -ENOENT) return PTR_ERR(smp2p->mbox_chan); smp2p->mbox_chan = NULL; From 295ab18c2dbce8d0ac6ecf7c5187e16e1ac8b282 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Wed, 9 Apr 2025 03:47:47 +0100 Subject: [PATCH 2450/2924] Squashfs: check return result of sb_min_blocksize [ Upstream commit 734aa85390ea693bb7eaf2240623d41b03705c84 ] Syzkaller reports an "UBSAN: shift-out-of-bounds in squashfs_bio_read" bug. Syzkaller forks multiple processes which after mounting the Squashfs filesystem, issues an ioctl("/dev/loop0", LOOP_SET_BLOCK_SIZE, 0x8000). Now if this ioctl occurs at the same time another process is in the process of mounting a Squashfs filesystem on /dev/loop0, the failure occurs. When this happens the following code in squashfs_fill_super() fails. ---- msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE); msblk->devblksize_log2 = ffz(~msblk->devblksize); ---- sb_min_blocksize() returns 0, which means msblk->devblksize is set to 0. As a result, ffz(~msblk->devblksize) returns 64, and msblk->devblksize_log2 is set to 64. This subsequently causes the UBSAN: shift-out-of-bounds in fs/squashfs/block.c:195:36 shift exponent 64 is too large for 64-bit type 'u64' (aka 'unsigned long long') This commit adds a check for a 0 return by sb_min_blocksize(). Link: https://lkml.kernel.org/r/20250409024747.876480-1-phillip@squashfs.org.uk Fixes: 0aa666190509 ("Squashfs: super block operations") Reported-by: syzbot+65761fc25a137b9c8c6e@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/67f0dd7a.050a0220.0a13.0230.GAE@google.com/ Signed-off-by: Phillip Lougher Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- fs/squashfs/super.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 67c55fe32ce88d..992ea0e372572f 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -202,6 +202,11 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc) msblk->panic_on_errors = (opts->errors == Opt_errors_panic); msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE); + if (!msblk->devblksize) { + errorf(fc, "squashfs: unable to set blocksize\n"); + return -EINVAL; + } + msblk->devblksize_log2 = ffz(~msblk->devblksize); mutex_init(&msblk->meta_index_mutex); From 574d654fa5c00ce2bfc3fc7784e531c64fa72153 Mon Sep 17 00:00:00 2001 From: Murad Masimov Date: Wed, 2 Apr 2025 09:56:27 +0300 Subject: [PATCH 2451/2924] ocfs2: fix possible memory leak in ocfs2_finish_quota_recovery [ Upstream commit cdc3ed3035d0fe934aa1d9b78ce256752fd3bb7d ] If ocfs2_finish_quota_recovery() exits due to an error before passing all rc_list elements to ocfs2_recover_local_quota_file() then it can lead to a memory leak as rc_list may still contain elements that have to be freed. Release all memory allocated by ocfs2_add_recovery_chunk() using ocfs2_free_quota_recovery() instead of kfree(). Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Link: https://lkml.kernel.org/r/20250402065628.706359-2-m.masimov@mt-integration.ru Fixes: 2205363dce74 ("ocfs2: Implement quota recovery") Signed-off-by: Murad Masimov Reviewed-by: Jan Kara Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- fs/ocfs2/quota_local.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index e272429da3db34..de7f12858729ac 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -674,7 +674,7 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, break; } out: - kfree(rec); + ocfs2_free_quota_recovery(rec); return status; } From 46e784bd1d5532d575e4a7a95c35bf6cd2be4ab8 Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Tue, 29 Apr 2025 02:37:07 +0900 Subject: [PATCH 2452/2924] nilfs2: add pointer check for nilfs_direct_propagate() [ Upstream commit f43f02429295486059605997bc43803527d69791 ] Patch series "nilfs2: improve sanity checks in dirty state propagation". This fixes one missed check for block mapping anomalies and one improper return of an error code during a preparation step for log writing, thereby improving checking for filesystem corruption on writeback. This patch (of 2): In nilfs_direct_propagate(), the printer get from nilfs_direct_get_ptr() need to be checked to ensure it is not an invalid pointer. If the pointer value obtained by nilfs_direct_get_ptr() is NILFS_BMAP_INVALID_PTR, means that the metadata (in this case, i_bmap in the nilfs_inode_info struct) that should point to the data block at the buffer head of the argument is corrupted and the data block is orphaned, meaning that the file system has lost consistency. Add a value check and return -EINVAL when it is an invalid pointer. Link: https://lkml.kernel.org/r/20250428173808.6452-1-konishi.ryusuke@gmail.com Link: https://lkml.kernel.org/r/20250428173808.6452-2-konishi.ryusuke@gmail.com Fixes: 36a580eb489f ("nilfs2: direct block mapping") Signed-off-by: Wentao Liang Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- fs/nilfs2/direct.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index 893ab36824cc2b..2d8dc6b35b5477 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c @@ -273,6 +273,9 @@ static int nilfs_direct_propagate(struct nilfs_bmap *bmap, dat = nilfs_bmap_get_dat(bmap); key = nilfs_bmap_data_get_key(bmap, bh); ptr = nilfs_direct_get_ptr(bmap, key); + if (ptr == NILFS_BMAP_INVALID_PTR) + return -EINVAL; + if (!buffer_nilfs_volatile(bh)) { oldreq.pr_entry_nr = ptr; newreq.pr_entry_nr = ptr; From 1fb0f6aa3e88bf772fe7df580cef2724fae06ca7 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 29 Apr 2025 02:37:08 +0900 Subject: [PATCH 2453/2924] nilfs2: do not propagate ENOENT error from nilfs_btree_propagate() [ Upstream commit 8e39fbb1edbb4ec9d7c1124f403877fc167fcecd ] In preparation for writing logs, in nilfs_btree_propagate(), which makes parent and ancestor node blocks dirty starting from a modified data block or b-tree node block, if the starting block does not belong to the b-tree, i.e. is isolated, nilfs_btree_do_lookup() called within the function fails with -ENOENT. In this case, even though -ENOENT is an internal code, it is propagated to the log writer via nilfs_bmap_propagate() and may be erroneously returned to system calls such as fsync(). Fix this issue by changing the error code to -EINVAL in this case, and having the bmap layer detect metadata corruption and convert the error code appropriately. Link: https://lkml.kernel.org/r/20250428173808.6452-3-konishi.ryusuke@gmail.com Fixes: 1f5abe7e7dbc ("nilfs2: replace BUG_ON and BUG calls triggerable from ioctl") Signed-off-by: Ryusuke Konishi Cc: Wentao Liang Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- fs/nilfs2/btree.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 0d8f7fb15c2e54..dd0c8e560ef6a2 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -2102,11 +2102,13 @@ static int nilfs_btree_propagate(struct nilfs_bmap *btree, ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); if (ret < 0) { - if (unlikely(ret == -ENOENT)) + if (unlikely(ret == -ENOENT)) { nilfs_crit(btree->b_inode->i_sb, "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d", btree->b_inode->i_ino, (unsigned long long)key, level); + ret = -EINVAL; + } goto out; } From 873d47114fd5e5a1cad2018843671537cc71ac84 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Tue, 8 Apr 2025 13:58:09 +0300 Subject: [PATCH 2454/2924] bus: fsl-mc: fix double-free on mc_dev [ Upstream commit d694bf8a9acdbd061596f3e7549bc8cb70750a60 ] The blamed commit tried to simplify how the deallocations are done but, in the process, introduced a double-free on the mc_dev variable. In case the MC device is a DPRC, a new mc_bus is allocated and the mc_dev variable is just a reference to one of its fields. In this circumstance, on the error path only the mc_bus should be freed. This commit introduces back the following checkpatch warning which is a false-positive. WARNING: kfree(NULL) is safe and this check is probably not required + if (mc_bus) + kfree(mc_bus); Fixes: a042fbed0290 ("staging: fsl-mc: simplify couple of deallocations") Signed-off-by: Ioana Ciornei Link: https://lore.kernel.org/r/20250408105814.2837951-2-ioana.ciornei@nxp.com Signed-off-by: Christophe Leroy Signed-off-by: Sasha Levin --- drivers/bus/fsl-mc/fsl-mc-bus.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c index a8be8cf246fb6f..0c3a38d7f3358c 100644 --- a/drivers/bus/fsl-mc/fsl-mc-bus.c +++ b/drivers/bus/fsl-mc/fsl-mc-bus.c @@ -906,8 +906,10 @@ int fsl_mc_device_add(struct fsl_mc_obj_desc *obj_desc, error_cleanup_dev: kfree(mc_dev->regions); - kfree(mc_bus); - kfree(mc_dev); + if (mc_bus) + kfree(mc_bus); + else + kfree(mc_dev); return error; } From 73ca3e3649947a4602f174178e8d94471b2d9482 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 25 Apr 2025 14:39:28 +0100 Subject: [PATCH 2455/2924] bus: fsl_mc: Fix driver_managed_dma check [ Upstream commit 152f33ee30ee6a7f4c15bedd7529dc5945315547 ] Since it's not currently safe to take device_lock() in the IOMMU probe path, that can race against really_probe() setting dev->driver before attempting to bind. The race itself isn't so bad, since we're only concerned with dereferencing dev->driver itself anyway, but sadly my attempt to implement the check with minimal churn leads to a kind of TOCTOU issue, where dev->driver becomes valid after to_fsl_mc_driver(NULL) is already computed, and thus the check fails to work as intended. Will and I both hit this with the platform bus, but the pattern here is the same, so fix it for correctness too. Reported-by: Will McVicker Fixes: bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe path") Signed-off-by: Robin Murphy Reviewed-by: Will McVicker Link: https://lore.kernel.org/r/20250425133929.646493-3-robin.murphy@arm.com Signed-off-by: Christophe Leroy Signed-off-by: Sasha Levin --- drivers/bus/fsl-mc/fsl-mc-bus.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c index 0c3a38d7f3358c..7671bd15854551 100644 --- a/drivers/bus/fsl-mc/fsl-mc-bus.c +++ b/drivers/bus/fsl-mc/fsl-mc-bus.c @@ -139,9 +139,9 @@ static int fsl_mc_bus_uevent(const struct device *dev, struct kobj_uevent_env *e static int fsl_mc_dma_configure(struct device *dev) { + const struct device_driver *drv = READ_ONCE(dev->driver); struct device *dma_dev = dev; struct fsl_mc_device *mc_dev = to_fsl_mc_device(dev); - struct fsl_mc_driver *mc_drv = to_fsl_mc_driver(dev->driver); u32 input_id = mc_dev->icid; int ret; @@ -153,8 +153,8 @@ static int fsl_mc_dma_configure(struct device *dev) else ret = acpi_dma_configure_id(dev, DEV_DMA_COHERENT, &input_id); - /* @mc_drv may not be valid when we're called from the IOMMU layer */ - if (!ret && dev->driver && !mc_drv->driver_managed_dma) { + /* @drv may not be valid when we're called from the IOMMU layer */ + if (!ret && drv && !to_fsl_mc_driver(drv)->driver_managed_dma) { ret = iommu_device_use_default_domain(dev); if (ret) arch_teardown_dma_ops(dev); From 52888dbb30964a6262adac1a886236e32c34d42f Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 5 May 2025 17:47:27 +0100 Subject: [PATCH 2456/2924] dt-bindings: vendor-prefixes: Add Liontron name [ Upstream commit 9baa27a2e9fc746143ab686b6dbe2d515284a4c5 ] Liontron is a company based in Shenzen, China, making industrial development boards and embedded computers, mostly using Rockchip and Allwinner SoCs. Add their name to the list of vendors. Signed-off-by: Andre Przywara Acked-by: Rob Herring (Arm) Link: https://patch.msgid.link/20250505164729.18175-2-andre.przywara@arm.com Signed-off-by: Chen-Yu Tsai Signed-off-by: Sasha Levin --- Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index 86f6a19b28ae21..190ab40cf23afc 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -864,6 +864,8 @@ patternProperties: description: Linux-specific binding "^linx,.*": description: Linx Technologies + "^liontron,.*": + description: Shenzhen Liontron Technology Co., Ltd "^liteon,.*": description: LITE-ON Technology Corp. "^litex,.*": From 2320c3dc151608d33f6d63d49ce166c6d5053afe Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 18 Mar 2025 15:21:59 +0200 Subject: [PATCH 2457/2924] ARM: dts: qcom: apq8064: add missing clocks to the timer node [ Upstream commit 4b0eb149df58b6750cd8113e5ee5b3ac7cc51743 ] In order to fix DT schema warning and describe hardware properly, add missing sleep clock to the timer node. Fixes: f335b8af4fd5 ("ARM: dts: qcom: Add initial APQ8064 SoC and IFC6410 board device trees") Signed-off-by: Dmitry Baryshkov Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250318-fix-nexus-4-v2-6-bcedd1406790@oss.qualcomm.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm/boot/dts/qcom/qcom-apq8064.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/qcom/qcom-apq8064.dtsi b/arch/arm/boot/dts/qcom/qcom-apq8064.dtsi index 5f1a6b4b764492..ba99e794dcd223 100644 --- a/arch/arm/boot/dts/qcom/qcom-apq8064.dtsi +++ b/arch/arm/boot/dts/qcom/qcom-apq8064.dtsi @@ -326,6 +326,8 @@ ; reg = <0x0200a000 0x100>; clock-frequency = <27000000>; + clocks = <&sleep_clk>; + clock-names = "sleep"; cpu-offset = <0x80000>; }; From e04a59e5b397214c6c8b55b51d2ca8f88cab2188 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 18 Mar 2025 15:22:00 +0200 Subject: [PATCH 2458/2924] ARM: dts: qcom: apq8064 merge hw splinlock into corresponding syscon device [ Upstream commit 325c6a441ae1f8fcb1db9bb945b8bdbd3142141e ] Follow up the expected way of describing the SFPB hwspinlock and merge hwspinlock node into corresponding syscon node, fixing several dt-schema warnings. Fixes: 24a9baf933dc ("ARM: dts: qcom: apq8064: Add hwmutex and SMEM nodes") Signed-off-by: Dmitry Baryshkov Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250318-fix-nexus-4-v2-7-bcedd1406790@oss.qualcomm.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm/boot/dts/qcom/qcom-apq8064.dtsi | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/arch/arm/boot/dts/qcom/qcom-apq8064.dtsi b/arch/arm/boot/dts/qcom/qcom-apq8064.dtsi index ba99e794dcd223..41f8dcde20819b 100644 --- a/arch/arm/boot/dts/qcom/qcom-apq8064.dtsi +++ b/arch/arm/boot/dts/qcom/qcom-apq8064.dtsi @@ -213,12 +213,6 @@ }; }; - sfpb_mutex: hwmutex { - compatible = "qcom,sfpb-mutex"; - syscon = <&sfpb_wrapper_mutex 0x604 0x4>; - #hwlock-cells = <1>; - }; - smem { compatible = "qcom,smem"; memory-region = <&smem_region>; @@ -305,9 +299,10 @@ pinctrl-0 = <&ps_hold_default_state>; }; - sfpb_wrapper_mutex: syscon@1200000 { - compatible = "syscon"; - reg = <0x01200000 0x8000>; + sfpb_mutex: hwmutex@1200600 { + compatible = "qcom,sfpb-mutex"; + reg = <0x01200600 0x100>; + #hwlock-cells = <1>; }; intc: interrupt-controller@2000000 { From f0e080573e47144baee228bd2b09a36124c6cd64 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 18 Mar 2025 15:22:03 +0200 Subject: [PATCH 2459/2924] ARM: dts: qcom: apq8064: move replicator out of soc node [ Upstream commit f2420037d90a8354594b3da541e19dcbb60c75e1 ] The CoreSight static replicator device isn't a part of the system MMIO bus, as such it should not be a part of the soc node. Follow the example of other platforms and move it out of the soc bus to the top-level (and reoder ports to follow alphabetic order). Fixes: 7a5c275fd821 ("ARM: dts: qcom: Add apq8064 CoreSight components") Signed-off-by: Dmitry Baryshkov Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250318-fix-nexus-4-v2-10-bcedd1406790@oss.qualcomm.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm/boot/dts/qcom/qcom-apq8064.dtsi | 67 ++++++++++++------------ 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/arch/arm/boot/dts/qcom/qcom-apq8064.dtsi b/arch/arm/boot/dts/qcom/qcom-apq8064.dtsi index 41f8dcde20819b..1dad4e4493926f 100644 --- a/arch/arm/boot/dts/qcom/qcom-apq8064.dtsi +++ b/arch/arm/boot/dts/qcom/qcom-apq8064.dtsi @@ -278,6 +278,40 @@ }; }; + replicator { + compatible = "arm,coresight-static-replicator"; + + clocks = <&rpmcc RPM_QDSS_CLK>; + clock-names = "apb_pclk"; + + in-ports { + port { + replicator_in: endpoint { + remote-endpoint = <&funnel_out>; + }; + }; + }; + + out-ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + replicator_out0: endpoint { + remote-endpoint = <&etb_in>; + }; + }; + + port@1 { + reg = <1>; + replicator_out1: endpoint { + remote-endpoint = <&tpiu_in>; + }; + }; + }; + }; + soc: soc { #address-cells = <1>; #size-cells = <1>; @@ -1529,39 +1563,6 @@ }; }; - replicator { - compatible = "arm,coresight-static-replicator"; - - clocks = <&rpmcc RPM_QDSS_CLK>; - clock-names = "apb_pclk"; - - out-ports { - #address-cells = <1>; - #size-cells = <0>; - - port@0 { - reg = <0>; - replicator_out0: endpoint { - remote-endpoint = <&etb_in>; - }; - }; - port@1 { - reg = <1>; - replicator_out1: endpoint { - remote-endpoint = <&tpiu_in>; - }; - }; - }; - - in-ports { - port { - replicator_in: endpoint { - remote-endpoint = <&funnel_out>; - }; - }; - }; - }; - funnel@1a04000 { compatible = "arm,coresight-dynamic-funnel", "arm,primecell"; reg = <0x1a04000 0x1000>; From d2b2217a28c475c1c4adbb3cea5dc0fdc5406c60 Mon Sep 17 00:00:00 2001 From: Vignesh Raman Date: Mon, 12 May 2025 18:49:24 +0530 Subject: [PATCH 2460/2924] arm64: defconfig: mediatek: enable PHY drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f52cd248d844f9451858992f924988ac413fdc7e ] The mediatek display driver fails to probe on mt8173-elm-hana and mt8183-kukui-jacuzzi-juniper-sku16 in v6.14-rc4 due to missing PHY configurations. Commit 924d66011f24 ("drm/mediatek: stop selecting foreign drivers") stopped selecting the MediaTek PHY drivers, requiring them to be explicitly enabled in defconfig. Enable the following PHY drivers for MediaTek platforms: CONFIG_PHY_MTK_HDMI=m for HDMI display CONFIG_PHY_MTK_MIPI_DSI=m for DSI display CONFIG_PHY_MTK_DP=m for DP display Fixes: 924d66011f24 ("drm/mediatek: stop selecting foreign drivers") Reviewed-by: Nícolas F. R. A. Prado Signed-off-by: Vignesh Raman Link: https://lore.kernel.org/r/20250512131933.1247830-1-vignesh.raman@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/configs/defconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index c4ce2c67c0e067..8e5d4dbd74e50d 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1587,6 +1587,9 @@ CONFIG_PHY_HISTB_COMBPHY=y CONFIG_PHY_HISI_INNO_USB2=y CONFIG_PHY_MVEBU_CP110_COMPHY=y CONFIG_PHY_MTK_TPHY=y +CONFIG_PHY_MTK_HDMI=m +CONFIG_PHY_MTK_MIPI_DSI=m +CONFIG_PHY_MTK_DP=m CONFIG_PHY_QCOM_EDP=m CONFIG_PHY_QCOM_PCIE2=m CONFIG_PHY_QCOM_QMP=m From e5a87be0575db641d48ddd78f37d3658e52e91a0 Mon Sep 17 00:00:00 2001 From: Pengyu Luo Date: Sat, 5 Apr 2025 18:55:28 +0800 Subject: [PATCH 2461/2924] arm64: dts: qcom: sm8650: add the missing l2 cache node [ Upstream commit 4becd72352b6861de0c24074a8502ca85080fd63 ] Only two little a520s share the same L2, every a720 has their own L2 cache. Fixes: d2350377997f ("arm64: dts: qcom: add initial SM8650 dtsi") Signed-off-by: Pengyu Luo Reviewed-by: Konrad Dybcio Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250405105529.309711-1-mitltlatltl@gmail.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/sm8650.dtsi | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sm8650.dtsi b/arch/arm64/boot/dts/qcom/sm8650.dtsi index bf6590e09a4c4f..76acce6754986a 100644 --- a/arch/arm64/boot/dts/qcom/sm8650.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8650.dtsi @@ -159,13 +159,20 @@ power-domain-names = "psci"; enable-method = "psci"; - next-level-cache = <&l2_200>; + next-level-cache = <&l2_300>; capacity-dmips-mhz = <1792>; dynamic-power-coefficient = <238>; qcom,freq-domain = <&cpufreq_hw 3>; #cooling-cells = <2>; + + l2_300: l2-cache { + compatible = "cache"; + cache-level = <2>; + cache-unified; + next-level-cache = <&l3_0>; + }; }; cpu4: cpu@400 { From e298135b68daed0a62c3419096294d644eda62ef Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Fri, 25 Apr 2025 17:18:09 +0200 Subject: [PATCH 2462/2924] arm64: dts: rockchip: disable unrouted USB controllers and PHY on RK3399 Puma [ Upstream commit 3373af1d76bacd054b37f3e10266dd335ce425f8 ] The u2phy1_host port is the part of the USB PHY1 (namely the HOST1_DP/DM lanes) which routes directly to the USB2.0 HOST controller[1]. The other lanes of the PHY are routed to the USB3.0 OTG controller (dwc3), which we do use. The HOST1_DP/DM lanes aren't routed on RK3399 Puma so let's simply disable the USB2.0 controllers and associated part in USB2.0 PHY. No intended functional change. [1] https://rockchip.fr/Rockchip%20RK3399%20TRM%20V1.3%20Part2.pdf Chapter 2 USB2.0 PHY Fixes: 2c66fc34e945 ("arm64: dts: rockchip: add RK3399-Q7 (Puma) SoM") Signed-off-by: Quentin Schulz Signed-off-by: Lukasz Czechowski Link: https://lore.kernel.org/r/20250425-onboard_usb_dev-v2-4-4a76a474a010@thaumatec.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index 314d9dfdba5732..587e89d7fc5e42 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -585,10 +585,6 @@ u2phy1_otg: otg-port { status = "okay"; }; - - u2phy1_host: host-port { - status = "okay"; - }; }; &usbdrd3_1 { @@ -622,11 +618,3 @@ vdd2-supply = <&vcc3v3_sys>; }; }; - -&usb_host1_ehci { - status = "okay"; -}; - -&usb_host1_ohci { - status = "okay"; -}; From 1a6dbc7d7b7c14cf97aad4a62eed86e693b4e116 Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Fri, 25 Apr 2025 17:18:10 +0200 Subject: [PATCH 2463/2924] arm64: dts: rockchip: disable unrouted USB controllers and PHY on RK3399 Puma with Haikou [ Upstream commit febd8c6ab52c683b447fe22fc740918c86feae43 ] The u2phy0_host port is the part of the USB PHY0 (namely the HOST0_DP/DM lanes) which routes directly to the USB2.0 HOST controller[1]. The other lanes of the PHY are routed to the USB3.0 OTG controller (dwc3), which we do use. The HOST0_DP/DM lanes aren't routed on RK3399 Puma so let's simply disable the USB2.0 controllers. USB3 OTG has been known to be unstable on RK3399 Puma Haikou for a while, one of the recurring issues being that only USB2 is detected and not USB3 in host mode. Reading the justification above and seeing that we are keeping u2phy0_host in the Haikou carrierboard DTS probably may have bothered you since it should be changed to u2phy0_otg. The issue is that if it's switched to that, USB OTG on Haikou is entirely broken. I have checked the routing in the Gerber file, the lanes are going to the expected ball pins (that is, NOT HOST0_DP/DM). u2phy0_host is for sure the wrong part of the PHY to use, but it's the only one that works at the moment for that board so keep it until we figure out what exactly is broken. No intended functional change. [1] https://rockchip.fr/Rockchip%20RK3399%20TRM%20V1.3%20Part2.pdf Chapter 2 USB2.0 PHY Fixes: 2c66fc34e945 ("arm64: dts: rockchip: add RK3399-Q7 (Puma) SoM") Signed-off-by: Quentin Schulz Signed-off-by: Lukasz Czechowski Link: https://lore.kernel.org/r/20250425-onboard_usb_dev-v2-5-4a76a474a010@thaumatec.com Signed-off-by: Heiko Stuebner Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts index f2234dabd66411..70979079923c10 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts @@ -312,14 +312,6 @@ status = "okay"; }; -&usb_host0_ehci { - status = "okay"; -}; - -&usb_host0_ohci { - status = "okay"; -}; - &vopb { status = "okay"; }; From 82f94fdb918025da9dc1d9c20994defd00ab5076 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 7 Feb 2025 22:41:18 +0200 Subject: [PATCH 2464/2924] arm64: dts: qcom: qcm2290: fix (some) of QUP interconnects [ Upstream commit e07d2d57a1c7254d25597dcdd34f318a91ec9398 ] While adding interconnect support for the QCM2290 platform some of them got the c&p error, rogue MASTER_APPSS_PROC for the config_noc interconnect. Turn that into SLAVE_QUP_0 as expected. Fixes: 5b970ff0193d ("arm64: dts: qcom: qcm2290: Hook up interconnects") Reported-by: Konrad Dybcio Reviewed-by: Konrad Dybcio Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250207-rb1-bt-v4-4-d810fc8c94a9@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/qcm2290.dtsi | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/qcm2290.dtsi b/arch/arm64/boot/dts/qcom/qcm2290.dtsi index f0746123e594d5..6e3e57dd02612f 100644 --- a/arch/arm64/boot/dts/qcom/qcm2290.dtsi +++ b/arch/arm64/boot/dts/qcom/qcm2290.dtsi @@ -1073,7 +1073,7 @@ interconnects = <&qup_virt MASTER_QUP_CORE_0 RPM_ALWAYS_TAG &qup_virt SLAVE_QUP_CORE_0 RPM_ALWAYS_TAG>, <&bimc MASTER_APPSS_PROC RPM_ALWAYS_TAG - &config_noc MASTER_APPSS_PROC RPM_ALWAYS_TAG>; + &config_noc SLAVE_QUP_0 RPM_ALWAYS_TAG>; interconnect-names = "qup-core", "qup-config"; #address-cells = <1>; @@ -1092,7 +1092,7 @@ interconnects = <&qup_virt MASTER_QUP_CORE_0 RPM_ALWAYS_TAG &qup_virt SLAVE_QUP_CORE_0 RPM_ALWAYS_TAG>, <&bimc MASTER_APPSS_PROC RPM_ALWAYS_TAG - &config_noc MASTER_APPSS_PROC RPM_ALWAYS_TAG>; + &config_noc SLAVE_QUP_0 RPM_ALWAYS_TAG>; interconnect-names = "qup-core", "qup-config"; status = "disabled"; @@ -1137,7 +1137,7 @@ interconnects = <&qup_virt MASTER_QUP_CORE_0 RPM_ALWAYS_TAG &qup_virt SLAVE_QUP_CORE_0 RPM_ALWAYS_TAG>, <&bimc MASTER_APPSS_PROC RPM_ALWAYS_TAG - &config_noc MASTER_APPSS_PROC RPM_ALWAYS_TAG>; + &config_noc SLAVE_QUP_0 RPM_ALWAYS_TAG>; interconnect-names = "qup-core", "qup-config"; #address-cells = <1>; @@ -1184,7 +1184,7 @@ interconnects = <&qup_virt MASTER_QUP_CORE_0 RPM_ALWAYS_TAG &qup_virt SLAVE_QUP_CORE_0 RPM_ALWAYS_TAG>, <&bimc MASTER_APPSS_PROC RPM_ALWAYS_TAG - &config_noc MASTER_APPSS_PROC RPM_ALWAYS_TAG>; + &config_noc SLAVE_QUP_0 RPM_ALWAYS_TAG>; interconnect-names = "qup-core", "qup-config"; #address-cells = <1>; @@ -1231,7 +1231,7 @@ interconnects = <&qup_virt MASTER_QUP_CORE_0 RPM_ALWAYS_TAG &qup_virt SLAVE_QUP_CORE_0 RPM_ALWAYS_TAG>, <&bimc MASTER_APPSS_PROC RPM_ALWAYS_TAG - &config_noc MASTER_APPSS_PROC RPM_ALWAYS_TAG>; + &config_noc SLAVE_QUP_0 RPM_ALWAYS_TAG>; interconnect-names = "qup-core", "qup-config"; #address-cells = <1>; @@ -1278,7 +1278,7 @@ interconnects = <&qup_virt MASTER_QUP_CORE_0 RPM_ALWAYS_TAG &qup_virt SLAVE_QUP_CORE_0 RPM_ALWAYS_TAG>, <&bimc MASTER_APPSS_PROC RPM_ALWAYS_TAG - &config_noc MASTER_APPSS_PROC RPM_ALWAYS_TAG>; + &config_noc SLAVE_QUP_0 RPM_ALWAYS_TAG>; interconnect-names = "qup-core", "qup-config"; #address-cells = <1>; @@ -1297,7 +1297,7 @@ interconnects = <&qup_virt MASTER_QUP_CORE_0 RPM_ALWAYS_TAG &qup_virt SLAVE_QUP_CORE_0 RPM_ALWAYS_TAG>, <&bimc MASTER_APPSS_PROC RPM_ALWAYS_TAG - &config_noc MASTER_APPSS_PROC RPM_ALWAYS_TAG>; + &config_noc SLAVE_QUP_0 RPM_ALWAYS_TAG>; interconnect-names = "qup-core", "qup-config"; status = "disabled"; @@ -1342,7 +1342,7 @@ interconnects = <&qup_virt MASTER_QUP_CORE_0 RPM_ALWAYS_TAG &qup_virt SLAVE_QUP_CORE_0 RPM_ALWAYS_TAG>, <&bimc MASTER_APPSS_PROC RPM_ALWAYS_TAG - &config_noc MASTER_APPSS_PROC RPM_ALWAYS_TAG>; + &config_noc SLAVE_QUP_0 RPM_ALWAYS_TAG>; interconnect-names = "qup-core", "qup-config"; #address-cells = <1>; From d0c194350c4d65445a74d61fb51bf6e74c615045 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 8 Apr 2025 11:32:06 +0200 Subject: [PATCH 2465/2924] arm64: dts: qcom: msm8998: Use the header with DSI phy clock IDs [ Upstream commit f4220c41decc1944ef319c859840aa5405eee6fa ] Use the header with DSI phy clock IDs to make code more readable. Reviewed-by: Dmitry Baryshkov Reviewed-by: Neil Armstrong Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20250408-dts-qcom-dsi-phy-clocks-v2-9-73b482a6dd02@linaro.org Signed-off-by: Bjorn Andersson Stable-dep-of: 7ebdb205d4b9 ("arm64: dts: qcom: msm8998: Remove mdss_hdmi_phy phandle argument") Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/msm8998.dtsi | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi index c2caad85c668df..7eca38440cd7ea 100644 --- a/arch/arm64/boot/dts/qcom/msm8998.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi @@ -2,6 +2,7 @@ /* Copyright (c) 2016, The Linux Foundation. All rights reserved. */ #include +#include #include #include #include @@ -2790,10 +2791,10 @@ "gpll0_div"; clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>, <&gcc GCC_MMSS_GPLL0_CLK>, - <&mdss_dsi0_phy 1>, - <&mdss_dsi0_phy 0>, - <&mdss_dsi1_phy 1>, - <&mdss_dsi1_phy 0>, + <&mdss_dsi0_phy DSI_PIXEL_PLL_CLK>, + <&mdss_dsi0_phy DSI_BYTE_PLL_CLK>, + <&mdss_dsi1_phy DSI_PIXEL_PLL_CLK>, + <&mdss_dsi1_phy DSI_BYTE_PLL_CLK>, <&mdss_hdmi_phy 0>, <0>, <0>, @@ -2932,8 +2933,8 @@ "bus"; assigned-clocks = <&mmcc BYTE0_CLK_SRC>, <&mmcc PCLK0_CLK_SRC>; - assigned-clock-parents = <&mdss_dsi0_phy 0>, - <&mdss_dsi0_phy 1>; + assigned-clock-parents = <&mdss_dsi0_phy DSI_BYTE_PLL_CLK>, + <&mdss_dsi0_phy DSI_PIXEL_PLL_CLK>; operating-points-v2 = <&dsi_opp_table>; power-domains = <&rpmpd MSM8998_VDDCX>; @@ -3008,8 +3009,8 @@ "bus"; assigned-clocks = <&mmcc BYTE1_CLK_SRC>, <&mmcc PCLK1_CLK_SRC>; - assigned-clock-parents = <&mdss_dsi1_phy 0>, - <&mdss_dsi1_phy 1>; + assigned-clock-parents = <&mdss_dsi1_phy DSI_BYTE_PLL_CLK>, + <&mdss_dsi1_phy DSI_PIXEL_PLL_CLK>; operating-points-v2 = <&dsi_opp_table>; power-domains = <&rpmpd MSM8998_VDDCX>; From 8c3ece5ca8361b7404a396e3e8748e797be79ef7 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 27 Mar 2025 02:47:06 +0100 Subject: [PATCH 2466/2924] arm64: dts: qcom: msm8998: Remove mdss_hdmi_phy phandle argument [ Upstream commit 7ebdb205d4b9dd839a93a9b8975ce8ccf86b882a ] The node has #clock-cells = <0>, as it only provides a single clock output. This leads to a turbo sneaky bug, where the dt checker shows that we have additional clocks in the array: clock-controller@c8c0000: clocks: [[3, 0], [39, 178], [156, 1], [156, 0], [157, 1], [157, 0], [158], [0], [0], [0], [39, 184]] is too long ..which happens due to dtc interpreting <&mdss_hdmi_phy 0> as <&mdss_hdmi_phy>, <0> after taking cells into account. Remove the superfluous argument to both silence the warning and fix the index-based lookup of subsequent entries in "clocks". Fixes: 2150c87db80c ("arm64: dts: qcom: msm8998: add HDMI nodes") Reviewed-by: Dmitry Baryshkov Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20250327-topic-more_dt_bindings_fixes-v2-4-b763d958545f@oss.qualcomm.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/msm8998.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi index 7eca38440cd7ea..fa6769320a238c 100644 --- a/arch/arm64/boot/dts/qcom/msm8998.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi @@ -2795,7 +2795,7 @@ <&mdss_dsi0_phy DSI_BYTE_PLL_CLK>, <&mdss_dsi1_phy DSI_PIXEL_PLL_CLK>, <&mdss_dsi1_phy DSI_BYTE_PLL_CLK>, - <&mdss_hdmi_phy 0>, + <&mdss_hdmi_phy>, <0>, <0>, <&gcc GCC_MMSS_GPLL0_DIV_CLK>; From 8fe34788c8fc23e9eadf5254eaabf9634d5e9001 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 27 Mar 2025 02:47:14 +0100 Subject: [PATCH 2467/2924] arm64: dts: qcom: qcs615: Fix up UFS clocks [ Upstream commit ea172f61f4fdb17aaaf8def980ee309a3b727eea ] The clocks are out of order with the bindings' expectations. Reorder them to resolve the errors. Fixes: a6a9d10e7969 ("arm64: dts: qcom: qcs615: add UFS node") Signed-off-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250327-topic-more_dt_bindings_fixes-v2-12-b763d958545f@oss.qualcomm.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/qcs615.dtsi | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/qcs615.dtsi b/arch/arm64/boot/dts/qcom/qcs615.dtsi index 8db06d17eb4746..12065484904380 100644 --- a/arch/arm64/boot/dts/qcom/qcs615.dtsi +++ b/arch/arm64/boot/dts/qcom/qcs615.dtsi @@ -1022,10 +1022,10 @@ "bus_aggr_clk", "iface_clk", "core_clk_unipro", - "core_clk_ice", "ref_clk", "tx_lane0_sync_clk", - "rx_lane0_sync_clk"; + "rx_lane0_sync_clk", + "ice_core_clk"; resets = <&gcc GCC_UFS_PHY_BCR>; reset-names = "rst"; @@ -1060,10 +1060,10 @@ /bits/ 64 <0>, /bits/ 64 <0>, /bits/ 64 <37500000>, - /bits/ 64 <75000000>, /bits/ 64 <0>, /bits/ 64 <0>, - /bits/ 64 <0>; + /bits/ 64 <0>, + /bits/ 64 <75000000>; required-opps = <&rpmhpd_opp_low_svs>; }; @@ -1072,10 +1072,10 @@ /bits/ 64 <0>, /bits/ 64 <0>, /bits/ 64 <75000000>, - /bits/ 64 <150000000>, /bits/ 64 <0>, /bits/ 64 <0>, - /bits/ 64 <0>; + /bits/ 64 <0>, + /bits/ 64 <150000000>; required-opps = <&rpmhpd_opp_svs>; }; @@ -1084,10 +1084,10 @@ /bits/ 64 <0>, /bits/ 64 <0>, /bits/ 64 <150000000>, - /bits/ 64 <300000000>, /bits/ 64 <0>, /bits/ 64 <0>, - /bits/ 64 <0>; + /bits/ 64 <0>, + /bits/ 64 <300000000>; required-opps = <&rpmhpd_opp_nom>; }; }; From a38c42d49334820d5f0bb60263cba1bd899571b3 Mon Sep 17 00:00:00 2001 From: Thuan Nguyen Date: Mon, 19 May 2025 06:43:24 +0000 Subject: [PATCH 2468/2924] arm64: dts: renesas: white-hawk-ard-audio: Fix TPU0 groups [ Upstream commit 652eea251dd852f02cef6223f367220acb3d1867 ] White Hawk ARD audio uses a clock generated by the TPU, but commit 3d144ef10a44 ("pinctrl: renesas: r8a779g0: Fix TPU suffixes") renamed pin group "tpu_to0_a" to "tpu_to0_b". Update DTS accordingly otherwise the sound driver does not receive a clock signal. Fixes: 3d144ef10a448f89 ("pinctrl: renesas: r8a779g0: Fix TPU suffixes") Signed-off-by: Thuan Nguyen Signed-off-by: Duy Nguyen Reviewed-by: Geert Uytterhoeven Acked-by: Kuninori Morimoto Link: https://lore.kernel.org/TYCPR01MB8740608B675365215ADB0374B49CA@TYCPR01MB8740.jpnprd01.prod.outlook.com Signed-off-by: Geert Uytterhoeven Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/renesas/white-hawk-ard-audio-da7212.dtso | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/renesas/white-hawk-ard-audio-da7212.dtso b/arch/arm64/boot/dts/renesas/white-hawk-ard-audio-da7212.dtso index c27b9b3d4e5f4a..f2d53e958da116 100644 --- a/arch/arm64/boot/dts/renesas/white-hawk-ard-audio-da7212.dtso +++ b/arch/arm64/boot/dts/renesas/white-hawk-ard-audio-da7212.dtso @@ -108,7 +108,7 @@ }; tpu0_pins: tpu0 { - groups = "tpu_to0_a"; + groups = "tpu_to0_b"; function = "tpu"; }; }; From 9b4129780a9d1246e803b6b112c5ee76c4f65a4a Mon Sep 17 00:00:00 2001 From: Julien Massot Date: Wed, 14 May 2025 10:19:58 +0200 Subject: [PATCH 2469/2924] arm64: dts: mt6359: Rename RTC node to match binding expectations [ Upstream commit cfe035d8662cfbd6edff9bd89c4b516bbb34c350 ] Rename the node 'mt6359rtc' to 'rtc', as required by the binding. Fix the following dtb-check error: mediatek/mt8395-radxa-nio-12l.dtb: pmic: 'mt6359rtc' do not match any of the regexes: 'pinctrl-[0-9]+' Fixes: 3b7d143be4b7 ("arm64: dts: mt6359: add PMIC MT6359 related nodes") Signed-off-by: Julien Massot Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20250514-mt8395-dtb-errors-v2-3-d67b9077c59a@collabora.com Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/mediatek/mt6359.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt6359.dtsi b/arch/arm64/boot/dts/mediatek/mt6359.dtsi index 0c479404b3fe3a..467d8a4c2aa7f1 100644 --- a/arch/arm64/boot/dts/mediatek/mt6359.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt6359.dtsi @@ -300,7 +300,7 @@ }; }; - mt6359rtc: mt6359rtc { + mt6359rtc: rtc { compatible = "mediatek,mt6358-rtc"; }; }; From b56ee5d79be1d99000a6424da932c83eeed53b05 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 15 May 2025 16:00:42 +0930 Subject: [PATCH 2470/2924] ARM: aspeed: Don't select SRAM [ Upstream commit e4f59f873c3ffe2a0150e11115a83e2dfb671dbf ] The ASPEED devices have SRAM, but don't require it for basic function (or any function; there's no known users of the driver). Fixes: 8c2ed9bcfbeb ("arm: Add Aspeed machine") Signed-off-by: Joel Stanley Link: https://patch.msgid.link/20250115103942.421429-1-joel@jms.id.au Signed-off-by: Andrew Jeffery Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin --- arch/arm/mach-aspeed/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/mach-aspeed/Kconfig b/arch/arm/mach-aspeed/Kconfig index 080019aa6fcd89..fcf287edd0e5e6 100644 --- a/arch/arm/mach-aspeed/Kconfig +++ b/arch/arm/mach-aspeed/Kconfig @@ -2,7 +2,6 @@ menuconfig ARCH_ASPEED bool "Aspeed BMC architectures" depends on (CPU_LITTLE_ENDIAN && ARCH_MULTI_V5) || ARCH_MULTI_V6 || ARCH_MULTI_V7 - select SRAM select WATCHDOG select ASPEED_WATCHDOG select MFD_SYSCON From e5d0bfd2ee215f382fdd647974b1428b25dc408f Mon Sep 17 00:00:00 2001 From: Su Hui Date: Thu, 15 May 2025 16:00:43 +0930 Subject: [PATCH 2471/2924] soc: aspeed: lpc: Fix impossible judgment condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d9f0a97e859bdcef51f9c187b1eb712eb13fd3ff ] smatch error: drivers/soc/aspeed/aspeed-lpc-snoop.c:169 aspeed_lpc_snoop_config_irq() warn: platform_get_irq() does not return zero platform_get_irq() return non-zero IRQ number or negative error code, change '!lpc_snoop->irq' to 'lpc_snoop->irq < 0' to fix this. Fixes: 9f4f9ae81d0a ("drivers/misc: add Aspeed LPC snoop driver") Signed-off-by: Su Hui Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/20231027020703.1231875-1-suhui@nfschina.com Signed-off-by: Andrew Jeffery Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin --- drivers/soc/aspeed/aspeed-lpc-snoop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/aspeed/aspeed-lpc-snoop.c b/drivers/soc/aspeed/aspeed-lpc-snoop.c index 9ab5ba9cf1d61c..57ba855f375d98 100644 --- a/drivers/soc/aspeed/aspeed-lpc-snoop.c +++ b/drivers/soc/aspeed/aspeed-lpc-snoop.c @@ -166,7 +166,7 @@ static int aspeed_lpc_snoop_config_irq(struct aspeed_lpc_snoop *lpc_snoop, int rc; lpc_snoop->irq = platform_get_irq(pdev, 0); - if (!lpc_snoop->irq) + if (lpc_snoop->irq < 0) return -ENODEV; rc = devm_request_irq(dev, lpc_snoop->irq, From 45b2e8b0fdd280aba04c3cc869e9ae500c44e4b7 Mon Sep 17 00:00:00 2001 From: Henry Martin Date: Thu, 15 May 2025 16:00:44 +0930 Subject: [PATCH 2472/2924] soc: aspeed: Add NULL check in aspeed_lpc_enable_snoop() [ Upstream commit f1706e0e1a74b095cbc60375b9b1e6205f5f4c98 ] devm_kasprintf() returns NULL when memory allocation fails. Currently, aspeed_lpc_enable_snoop() does not check for this case, which results in a NULL pointer dereference. Add NULL check after devm_kasprintf() to prevent this issue. Fixes: 3772e5da4454 ("drivers/misc: Aspeed LPC snoop output using misc chardev") Signed-off-by: Henry Martin Link: https://patch.msgid.link/20250401074647.21300-1-bsdhenrymartin@gmail.com [arj: Fix Fixes: tag to use subject from 3772e5da4454] Signed-off-by: Andrew Jeffery Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin --- drivers/soc/aspeed/aspeed-lpc-snoop.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/soc/aspeed/aspeed-lpc-snoop.c b/drivers/soc/aspeed/aspeed-lpc-snoop.c index 57ba855f375d98..ef8f355589a584 100644 --- a/drivers/soc/aspeed/aspeed-lpc-snoop.c +++ b/drivers/soc/aspeed/aspeed-lpc-snoop.c @@ -200,11 +200,15 @@ static int aspeed_lpc_enable_snoop(struct aspeed_lpc_snoop *lpc_snoop, lpc_snoop->chan[channel].miscdev.minor = MISC_DYNAMIC_MINOR; lpc_snoop->chan[channel].miscdev.name = devm_kasprintf(dev, GFP_KERNEL, "%s%d", DEVICE_NAME, channel); + if (!lpc_snoop->chan[channel].miscdev.name) { + rc = -ENOMEM; + goto err_free_fifo; + } lpc_snoop->chan[channel].miscdev.fops = &snoop_fops; lpc_snoop->chan[channel].miscdev.parent = dev; rc = misc_register(&lpc_snoop->chan[channel].miscdev); if (rc) - return rc; + goto err_free_fifo; /* Enable LPC snoop channel at requested port */ switch (channel) { @@ -221,7 +225,8 @@ static int aspeed_lpc_enable_snoop(struct aspeed_lpc_snoop *lpc_snoop, hicrb_en = HICRB_ENSNP1D; break; default: - return -EINVAL; + rc = -EINVAL; + goto err_misc_deregister; } regmap_update_bits(lpc_snoop->regmap, HICR5, hicr5_en, hicr5_en); @@ -231,6 +236,12 @@ static int aspeed_lpc_enable_snoop(struct aspeed_lpc_snoop *lpc_snoop, regmap_update_bits(lpc_snoop->regmap, HICRB, hicrb_en, hicrb_en); + return 0; + +err_misc_deregister: + misc_deregister(&lpc_snoop->chan[channel].miscdev); +err_free_fifo: + kfifo_free(&lpc_snoop->chan[channel].fifo); return rc; } From b6d171e598307bda5054d9c34da88b015e0f4f77 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 28 May 2025 11:26:22 -0700 Subject: [PATCH 2473/2924] ubsan: integer-overflow: depend on BROKEN to keep this out of CI [ Upstream commit d6a0e0bfecccdcecb08defe75a137c7262352102 ] Depending on !COMPILE_TEST isn't sufficient to keep this feature out of CI because we can't stop it from being included in randconfig builds. This feature is still highly experimental, and is developed in lock-step with Clang's Overflow Behavior Types[1]. Depend on BROKEN to keep it from being enabled by anyone not expecting it. Link: https://discourse.llvm.org/t/rfc-v2-clang-introduce-overflowbehaviortypes-for-wrapping-and-non-wrapping-arithmetic/86507 [1] Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202505281024.f42beaa7-lkp@intel.com Fixes: 557f8c582a9b ("ubsan: Reintroduce signed overflow sanitizer") Acked-by: Eric Biggers Link: https://lore.kernel.org/r/20250528182616.work.296-kees@kernel.org Reviewed-by: Nathan Chancellor Acked-by: Marco Elver Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- lib/Kconfig.ubsan | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan index f6ea0c5b5da393..96cd896684676d 100644 --- a/lib/Kconfig.ubsan +++ b/lib/Kconfig.ubsan @@ -118,6 +118,8 @@ config UBSAN_UNREACHABLE config UBSAN_INTEGER_WRAP bool "Perform checking for integer arithmetic wrap-around" + # This is very experimental so drop the next line if you really want it + depends on BROKEN depends on !COMPILE_TEST depends on $(cc-option,-fsanitize-undefined-ignore-overflow-pattern=all) depends on $(cc-option,-fsanitize=signed-integer-overflow) From 53784073cbad18f75583fd3da9ffdfc4d1f05405 Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Wed, 14 May 2025 23:35:58 +0300 Subject: [PATCH 2474/2924] fbdev: core: fbcvt: avoid division by 0 in fb_cvt_hperiod() [ Upstream commit 3f6dae09fc8c306eb70fdfef70726e1f154e173a ] In fb_find_mode_cvt(), iff mode->refresh somehow happens to be 0x80000000, cvt.f_refresh will become 0 when multiplying it by 2 due to overflow. It's then passed to fb_cvt_hperiod(), where it's used as a divider -- division by 0 will result in kernel oops. Add a sanity check for cvt.f_refresh to avoid such overflow... Found by Linux Verification Center (linuxtesting.org) with the Svace static analysis tool. Fixes: 96fe6a2109db ("[PATCH] fbdev: Add VESA Coordinated Video Timings (CVT) support") Signed-off-by: Sergey Shtylyov Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- drivers/video/fbdev/core/fbcvt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/core/fbcvt.c b/drivers/video/fbdev/core/fbcvt.c index 64843464c66135..cd3821bd82e566 100644 --- a/drivers/video/fbdev/core/fbcvt.c +++ b/drivers/video/fbdev/core/fbcvt.c @@ -312,7 +312,7 @@ int fb_find_mode_cvt(struct fb_videomode *mode, int margins, int rb) cvt.f_refresh = cvt.refresh; cvt.interlace = 1; - if (!cvt.xres || !cvt.yres || !cvt.refresh) { + if (!cvt.xres || !cvt.yres || !cvt.refresh || cvt.f_refresh > INT_MAX) { printk(KERN_INFO "fbcvt: Invalid input parameters\n"); return 1; } From 57f7a1da0ec06d8579accaf77762d0128d13e4af Mon Sep 17 00:00:00 2001 From: Henry Martin Date: Thu, 24 Apr 2025 15:16:48 +0800 Subject: [PATCH 2475/2924] watchdog: lenovo_se30_wdt: Fix possible devm_ioremap() NULL pointer dereference in lenovo_se30_wdt_probe() [ Upstream commit a4e2401438a26131ecff9be6a3a1d4cbfea66f9a ] devm_ioremap() returns NULL on error. Currently, lenovo_se30_wdt_probe() does not check for this case, which results in a NULL pointer dereference. Add NULL check after devm_ioremap() to prevent this issue. Fixes: c284153a2c55 ("watchdog: lenovo_se30_wdt: Watchdog driver for Lenovo SE30 platform") Signed-off-by: Henry Martin Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20250424071648.89016-1-bsdhenrymartin@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Sasha Levin --- drivers/watchdog/lenovo_se30_wdt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/watchdog/lenovo_se30_wdt.c b/drivers/watchdog/lenovo_se30_wdt.c index 024b842499b368..1c73bb7eeeeed1 100644 --- a/drivers/watchdog/lenovo_se30_wdt.c +++ b/drivers/watchdog/lenovo_se30_wdt.c @@ -271,6 +271,8 @@ static int lenovo_se30_wdt_probe(struct platform_device *pdev) return -EBUSY; priv->shm_base_addr = devm_ioremap(dev, base_phys, SHM_WIN_SIZE); + if (!priv->shm_base_addr) + return -ENOMEM; priv->wdt_cfg.mod = WDT_MODULE; priv->wdt_cfg.idx = WDT_CFG_INDEX; From 4fb27da8a1508111057ca110d0bde80c396dd5cb Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 26 Apr 2025 00:37:52 -0700 Subject: [PATCH 2476/2924] randstruct: gcc-plugin: Remove bogus void member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e136a4062174a9a8d1c1447ca040ea81accfa6a8 ] When building the randomized replacement tree of struct members, the randstruct GCC plugin would insert, as the first member, a 0-sized void member. This appears as though it was done to catch non-designated ("unnamed") static initializers, which wouldn't be stable since they depend on the original struct layout order. This was accomplished by having the side-effect of the "void member" tripping an assert in GCC internals (count_type_elements) if the member list ever needed to be counted (e.g. for figuring out the order of members during a non-designated initialization), which would catch impossible type (void) in the struct: security/landlock/fs.c: In function ‘hook_file_ioctl_common’: security/landlock/fs.c:1745:61: internal compiler error: in count_type_elements, at expr.cc:7075 1745 | .u.op = &(struct lsm_ioctlop_audit) { | ^ static HOST_WIDE_INT count_type_elements (const_tree type, bool for_ctor_p) { switch (TREE_CODE (type)) ... case VOID_TYPE: default: gcc_unreachable (); } } However this is a redundant safety measure since randstruct uses the __designated_initializer attribute both internally and within the __randomized_layout attribute macro so that this would be enforced by the compiler directly even when randstruct was not enabled (via -Wdesignated-init). A recent change in Landlock ended up tripping the same member counting routine when using a full-struct copy initializer as part of an anonymous initializer. This, however, is a false positive as the initializer is copying between identical structs (and hence identical layouts). The "path" member is "struct path", a randomized struct, and is being copied to from another "struct path", the "f_path" member: landlock_log_denial(landlock_cred(file->f_cred), &(struct landlock_request) { .type = LANDLOCK_REQUEST_FS_ACCESS, .audit = { .type = LSM_AUDIT_DATA_IOCTL_OP, .u.op = &(struct lsm_ioctlop_audit) { .path = file->f_path, .cmd = cmd, }, }, ... As can be seen with the coming randstruct KUnit test, there appears to be no behavioral problems with this kind of initialization when the void member is removed from the randstruct GCC plugin, so remove it. Reported-by: "Dr. David Alan Gilbert" Closes: https://lore.kernel.org/lkml/Z_PRaKx7q70MKgCA@gallifrey/ Reported-by: Mark Brown Closes: https://lore.kernel.org/lkml/20250407-kbuild-disable-gcc-plugins-v1-1-5d46ae583f5e@kernel.org/ Reported-by: WangYuli Closes: https://lore.kernel.org/lkml/337D5D4887277B27+3c677db3-a8b9-47f0-93a4-7809355f1381@uniontech.com/ Fixes: 313dd1b62921 ("gcc-plugins: Add the randstruct plugin") Signed-off-by: Kees Cook Stable-dep-of: f39f18f3c353 ("randstruct: gcc-plugin: Fix attribute addition") Signed-off-by: Sasha Levin --- scripts/gcc-plugins/randomize_layout_plugin.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/scripts/gcc-plugins/randomize_layout_plugin.c b/scripts/gcc-plugins/randomize_layout_plugin.c index 5694df3da2e95b..971a1908a8cc40 100644 --- a/scripts/gcc-plugins/randomize_layout_plugin.c +++ b/scripts/gcc-plugins/randomize_layout_plugin.c @@ -344,29 +344,13 @@ static int relayout_struct(tree type) shuffle(type, (tree *)newtree, shuffle_length); - /* - * set up a bogus anonymous struct field designed to error out on unnamed struct initializers - * as gcc provides no other way to detect such code - */ - list = make_node(FIELD_DECL); - TREE_CHAIN(list) = newtree[0]; - TREE_TYPE(list) = void_type_node; - DECL_SIZE(list) = bitsize_zero_node; - DECL_NONADDRESSABLE_P(list) = 1; - DECL_FIELD_BIT_OFFSET(list) = bitsize_zero_node; - DECL_SIZE_UNIT(list) = size_zero_node; - DECL_FIELD_OFFSET(list) = size_zero_node; - DECL_CONTEXT(list) = type; - // to satisfy the constify plugin - TREE_READONLY(list) = 1; - for (i = 0; i < num_fields - 1; i++) TREE_CHAIN(newtree[i]) = newtree[i+1]; TREE_CHAIN(newtree[num_fields - 1]) = NULL_TREE; main_variant = TYPE_MAIN_VARIANT(type); for (variant = main_variant; variant; variant = TYPE_NEXT_VARIANT(variant)) { - TYPE_FIELDS(variant) = list; + TYPE_FIELDS(variant) = newtree[0]; TYPE_ATTRIBUTES(variant) = copy_list(TYPE_ATTRIBUTES(variant)); TYPE_ATTRIBUTES(variant) = tree_cons(get_identifier("randomize_performed"), NULL_TREE, TYPE_ATTRIBUTES(variant)); TYPE_ATTRIBUTES(variant) = tree_cons(get_identifier("designated_init"), NULL_TREE, TYPE_ATTRIBUTES(variant)); From a255510f001905cfef7095aaadcfa3feb78e15c6 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 30 May 2025 15:18:28 -0700 Subject: [PATCH 2477/2924] randstruct: gcc-plugin: Fix attribute addition [ Upstream commit f39f18f3c3531aa802b58a20d39d96e82eb96c14 ] Based on changes in the 2021 public version of the randstruct out-of-tree GCC plugin[1], more carefully update the attributes on resulting decls, to avoid tripping checks in GCC 15's comptypes_check_enum_int() when it has been configured with "--enable-checking=misc": arch/arm64/kernel/kexec_image.c:132:14: internal compiler error: in comptypes_check_enum_int, at c/c-typeck.cc:1519 132 | const struct kexec_file_ops kexec_image_ops = { | ^~~~~~~~~~~~~~ internal_error(char const*, ...), at gcc/gcc/diagnostic-global-context.cc:517 fancy_abort(char const*, int, char const*), at gcc/gcc/diagnostic.cc:1803 comptypes_check_enum_int(tree_node*, tree_node*, bool*), at gcc/gcc/c/c-typeck.cc:1519 ... Link: https://archive.org/download/grsecurity/grsecurity-3.1-5.10.41-202105280954.patch.gz [1] Reported-by: Thiago Jung Bauermann Closes: https://github.com/KSPP/linux/issues/367 Closes: https://lore.kernel.org/lkml/20250530000646.104457-1-thiago.bauermann@linaro.org/ Reported-by: Ingo Saitz Closes: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1104745 Fixes: 313dd1b62921 ("gcc-plugins: Add the randstruct plugin") Tested-by: Thiago Jung Bauermann Link: https://lore.kernel.org/r/20250530221824.work.623-kees@kernel.org Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- scripts/gcc-plugins/gcc-common.h | 32 +++++++++++++++++++ scripts/gcc-plugins/randomize_layout_plugin.c | 22 ++++++------- 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/scripts/gcc-plugins/gcc-common.h b/scripts/gcc-plugins/gcc-common.h index 3222c1070444fa..ef12c8f929eda3 100644 --- a/scripts/gcc-plugins/gcc-common.h +++ b/scripts/gcc-plugins/gcc-common.h @@ -123,6 +123,38 @@ static inline tree build_const_char_string(int len, const char *str) return cstr; } +static inline void __add_type_attr(tree type, const char *attr, tree args) +{ + tree oldattr; + + if (type == NULL_TREE) + return; + oldattr = lookup_attribute(attr, TYPE_ATTRIBUTES(type)); + if (oldattr != NULL_TREE) { + gcc_assert(TREE_VALUE(oldattr) == args || TREE_VALUE(TREE_VALUE(oldattr)) == TREE_VALUE(args)); + return; + } + + TYPE_ATTRIBUTES(type) = copy_list(TYPE_ATTRIBUTES(type)); + TYPE_ATTRIBUTES(type) = tree_cons(get_identifier(attr), args, TYPE_ATTRIBUTES(type)); +} + +static inline void add_type_attr(tree type, const char *attr, tree args) +{ + tree main_variant = TYPE_MAIN_VARIANT(type); + + __add_type_attr(TYPE_CANONICAL(type), attr, args); + __add_type_attr(TYPE_CANONICAL(main_variant), attr, args); + __add_type_attr(main_variant, attr, args); + + for (type = TYPE_NEXT_VARIANT(main_variant); type; type = TYPE_NEXT_VARIANT(type)) { + if (!lookup_attribute(attr, TYPE_ATTRIBUTES(type))) + TYPE_ATTRIBUTES(type) = TYPE_ATTRIBUTES(main_variant); + + __add_type_attr(TYPE_CANONICAL(type), attr, args); + } +} + #define PASS_INFO(NAME, REF, ID, POS) \ struct register_pass_info NAME##_pass_info = { \ .pass = make_##NAME##_pass(), \ diff --git a/scripts/gcc-plugins/randomize_layout_plugin.c b/scripts/gcc-plugins/randomize_layout_plugin.c index 971a1908a8cc40..ff65a4f87f240a 100644 --- a/scripts/gcc-plugins/randomize_layout_plugin.c +++ b/scripts/gcc-plugins/randomize_layout_plugin.c @@ -73,6 +73,9 @@ static tree handle_randomize_layout_attr(tree *node, tree name, tree args, int f if (TYPE_P(*node)) { type = *node; + } else if (TREE_CODE(*node) == FIELD_DECL) { + *no_add_attrs = false; + return NULL_TREE; } else { gcc_assert(TREE_CODE(*node) == TYPE_DECL); type = TREE_TYPE(*node); @@ -348,15 +351,14 @@ static int relayout_struct(tree type) TREE_CHAIN(newtree[i]) = newtree[i+1]; TREE_CHAIN(newtree[num_fields - 1]) = NULL_TREE; + add_type_attr(type, "randomize_performed", NULL_TREE); + add_type_attr(type, "designated_init", NULL_TREE); + if (has_flexarray) + add_type_attr(type, "has_flexarray", NULL_TREE); + main_variant = TYPE_MAIN_VARIANT(type); - for (variant = main_variant; variant; variant = TYPE_NEXT_VARIANT(variant)) { + for (variant = main_variant; variant; variant = TYPE_NEXT_VARIANT(variant)) TYPE_FIELDS(variant) = newtree[0]; - TYPE_ATTRIBUTES(variant) = copy_list(TYPE_ATTRIBUTES(variant)); - TYPE_ATTRIBUTES(variant) = tree_cons(get_identifier("randomize_performed"), NULL_TREE, TYPE_ATTRIBUTES(variant)); - TYPE_ATTRIBUTES(variant) = tree_cons(get_identifier("designated_init"), NULL_TREE, TYPE_ATTRIBUTES(variant)); - if (has_flexarray) - TYPE_ATTRIBUTES(type) = tree_cons(get_identifier("has_flexarray"), NULL_TREE, TYPE_ATTRIBUTES(type)); - } /* * force a re-layout of the main variant @@ -424,10 +426,8 @@ static void randomize_type(tree type) if (lookup_attribute("randomize_layout", TYPE_ATTRIBUTES(TYPE_MAIN_VARIANT(type))) || is_pure_ops_struct(type)) relayout_struct(type); - for (variant = TYPE_MAIN_VARIANT(type); variant; variant = TYPE_NEXT_VARIANT(variant)) { - TYPE_ATTRIBUTES(type) = copy_list(TYPE_ATTRIBUTES(type)); - TYPE_ATTRIBUTES(type) = tree_cons(get_identifier("randomize_considered"), NULL_TREE, TYPE_ATTRIBUTES(type)); - } + add_type_attr(type, "randomize_considered", NULL_TREE); + #ifdef __DEBUG_PLUGIN fprintf(stderr, "Marking randomize_considered on struct %s\n", ORIG_TYPE_NAME(type)); #ifdef __DEBUG_VERBOSE From 1227324427b55311fa6a70c91616d5e420f6ff93 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 4 Apr 2025 15:37:45 -0300 Subject: [PATCH 2478/2924] tools build: Don't set libunwind as available if test-all.c build succeeds [ Upstream commit 6559b83e4e71ba77461c8d6e6af7b89693c8e677 ] The tools/build/feature/test-all.c file tries to detect the expected, most common set of libraries/features we expect to have available to build perf with. At some point libunwind was deemed not to be part of that set of libries, but the patches making it to be opt-in ended up forgetting some details, fix one more. Testing it: $ rm -rf /tmp/build/$(basename $PWD)/ ; mkdir -p /tmp/build/$(basename $PWD)/ $ rpm -q libunwind-devel libunwind-devel-1.8.0-3.fc40.x86_64 $ make -k LIBUNWIND=1 CORESIGHT=1 O=/tmp/build/$(basename $PWD)/ -C tools/perf install-bin |& grep unwind && ldd ~/bin/perf | grep unwind ... libunwind: [ on ] CC /tmp/build/perf-tools-next/arch/x86/tests/dwarf-unwind.o CC /tmp/build/perf-tools-next/arch/x86/util/unwind-libunwind.o CC /tmp/build/perf-tools-next/util/arm64-frame-pointer-unwind-support.o CC /tmp/build/perf-tools-next/tests/dwarf-unwind.o CC /tmp/build/perf-tools-next/util/unwind-libunwind-local.o CC /tmp/build/perf-tools-next/util/unwind-libunwind.o libunwind-x86_64.so.8 => /lib64/libunwind-x86_64.so.8 (0x00007f615a549000) libunwind.so.8 => /lib64/libunwind.so.8 (0x00007f615a52f000) $ sudo rpm -e libunwind-devel $ rm -rf /tmp/build/$(basename $PWD)/ ; mkdir -p /tmp/build/$(basename $PWD)/ $ make -k LIBUNWIND=1 CORESIGHT=1 O=/tmp/build/$(basename $PWD)/ -C tools/perf install-bin |& grep unwind && ldd ~/bin/perf | grep unwind Makefile.config:653: No libunwind found. Please install libunwind-dev[el] >= 1.1 and/or set LIBUNWIND_DIR ... libunwind: [ OFF ] CC /tmp/build/perf-tools-next/arch/x86/tests/dwarf-unwind.o CC /tmp/build/perf-tools-next/arch/x86/util/unwind-libdw.o CC /tmp/build/perf-tools-next/util/arm64-frame-pointer-unwind-support.o CC /tmp/build/perf-tools-next/tests/dwarf-unwind.o CC /tmp/build/perf-tools-next/util/unwind-libdw.o $ Should be in a separate patch, but tired now, so also adding a message about the need to use LIBUNWIND=1 in the output when its not available, so done here as well. So, now when the devel files are not available we get: $ make -k LIBUNWIND=1 CORESIGHT=1 O=/tmp/build/$(basename $PWD)/ -C tools/perf install-bin |& grep unwind && ldd ~/bin/perf | grep unwind Makefile.config:653: No libunwind found. Please install libunwind-dev[el] >= 1.1 and/or set LIBUNWIND_DIR and set LIBUNWIND=1 in the make command line as it is opt-in now ... libunwind: [ OFF ] $ Fixes: 13e17c9ff49119aa ("perf build: Make libunwind opt-in rather than opt-out") Reported-by: Ingo Molnar Tested-by: Ingo Molnar Cc: Adrian Hunter Cc: Dmitriy Vyukov Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/Z_AnsW9oJzFbhIFC@x1 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/build/Makefile.feature | 1 - tools/perf/Makefile.config | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 1f44ca677ad3d6..48e3f124b98acb 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -87,7 +87,6 @@ FEATURE_TESTS_BASIC := \ libtracefs \ libcpupower \ libcrypto \ - libunwind \ pthread-attr-setaffinity-np \ pthread-barrier \ reallocarray \ diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index b7769a22fe1afa..e0c20a5c19cfe2 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -625,6 +625,8 @@ endif ifndef NO_LIBUNWIND have_libunwind := + $(call feature_check,libunwind) + $(call feature_check,libunwind-x86) ifeq ($(feature-libunwind-x86), 1) $(call detected,CONFIG_LIBUNWIND_X86) @@ -649,7 +651,7 @@ ifndef NO_LIBUNWIND endif ifneq ($(feature-libunwind), 1) - $(warning No libunwind found. Please install libunwind-dev[el] >= 1.1 and/or set LIBUNWIND_DIR) + $(warning No libunwind found. Please install libunwind-dev[el] >= 1.1 and/or set LIBUNWIND_DIR and set LIBUNWIND=1 in the make command line as it is opt-in now) NO_LOCAL_LIBUNWIND := 1 else have_libunwind := 1 From 4c775e56c0cb07ccb5d88ea0d1beea7f42cb4b27 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 8 Apr 2025 15:03:11 -0300 Subject: [PATCH 2479/2924] tools build: Don't show libunwind build status as it is opt-in [ Upstream commit a3a40391292273cf78a7920c119cdc386d694c38 ] Since 13e17c9ff49119aa ("perf build: Make libunwind opt-in rather than opt-out") doesn't try to build with libunwind, so showing that it is OFF when building causes just distraction, remove it from FEATURES_DISPLAY. For people that for some reason notice that there is always 'perf -vv', a short hand for 'perf version --build-options' and 'perf check feature libunwind' that now explains why it is not built: $ perf check feature libunwind libunwind: [ OFF ] # HAVE_LIBUNWIND_SUPPORT ( tip: Deprecated, use LIBUNWIND=1 and install libunwind-dev[el] to build with it ) $ Fixes: 13e17c9ff49119aa ("perf build: Make libunwind opt-in rather than opt-out") Reported-by: Ingo Molnar Tested-by: Ingo Molnar Cc: Adrian Hunter Cc: Dmitriy Vyukov Cc: Howard Chu Cc: Ian Rogers Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/Z--pWmTHGb62_83e@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/build/Makefile.feature | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 48e3f124b98acb..357749701239f6 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -155,7 +155,6 @@ FEATURE_DISPLAY ?= \ libperl \ libpython \ libcrypto \ - libunwind \ libcapstone \ llvm-perf \ zlib \ From 9f54a3878959729b15c4a4a10dabafccba634ff7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 8 Apr 2025 11:37:20 -0300 Subject: [PATCH 2480/2924] perf build: Warn when libdebuginfod devel files are not available [ Upstream commit 4fce4b91fd1aabb326c46e237eb4b19ab72598f8 ] While working on 'perf version --build-options' I noticed that: $ perf version --build-options perf version 6.15.rc1.g312a07a00d31 aio: [ on ] # HAVE_AIO_SUPPORT bpf: [ on ] # HAVE_LIBBPF_SUPPORT bpf_skeletons: [ on ] # HAVE_BPF_SKEL debuginfod: [ OFF ] # HAVE_DEBUGINFOD_SUPPORT And looking at tools/perf/Makefile.config I also noticed that it is not opt-in, meaning we will attempt to build with it in all normal cases. So add the usual warning at build time to let the user know that something recommended is missing, now we see: Makefile.config:563: No elfutils/debuginfod.h found, no debuginfo server support, please install elfutils-debuginfod-client-devel or equivalent And after following the recommendation: $ perf check feature debuginfod debuginfod: [ on ] # HAVE_DEBUGINFOD_SUPPORT $ ldd ~/bin/perf | grep debuginfo libdebuginfod.so.1 => /lib64/libdebuginfod.so.1 (0x00007fee5cf5f000) $ With this feature on several perf tools will fetch what is needed and not require all the contents of the debuginfo packages, for instance: # rpm -qa | grep kernel-debuginfo # pahole --running_kernel_vmlinux pahole: couldn't find a vmlinux that matches the running kernel HINT: Maybe you're inside a container or missing a debuginfo package? # # perf trace -e open* perf probe --vars icmp_rcv 0.000 ( 0.005 ms): perf/97391 openat(dfd: CWD, filename: "/etc/ld.so.cache", flags: RDONLY|CLOEXEC) = 3 0.014 ( 0.004 ms): perf/97391 openat(dfd: CWD, filename: "/lib64/libm.so.6", flags: RDONLY|CLOEXEC) = 3 32130.100 ( 0.008 ms): perf/97391 openat(dfd: CWD, filename: "/root/.cache/debuginfod_client/aa3c82b4a13f9c0e0301bebb20fe958c4db6f362/debuginfo") = 3 Available variables at icmp_rcv @ struct sk_buff* skb # # pahole --running_kernel_vmlinux /root/.cache/debuginfod_client/aa3c82b4a13f9c0e0301bebb20fe958c4db6f362/debuginfo # file /root/.cache/debuginfod_client/aa3c82b4a13f9c0e0301bebb20fe958c4db6f362/debuginfo /root/.cache/debuginfod_client/aa3c82b4a13f9c0e0301bebb20fe958c4db6f362/debuginfo: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), statically linked, BuildID[sha1]=aa3c82b4a13f9c0e0301bebb20fe958c4db6f362, with debug_info, not stripped # ls -la /root/.cache/debuginfod_client/aa3c82b4a13f9c0e0301bebb20fe958c4db6f362/debuginfo -r--------. 1 root root 475401512 Mar 27 21:00 /root/.cache/debuginfod_client/aa3c82b4a13f9c0e0301bebb20fe958c4db6f362/debuginfo # Then, cached: # perf stat --null perf probe --vars icmp_rcv Available variables at icmp_rcv @ struct sk_buff* skb Performance counter stats for 'perf probe --vars icmp_rcv': 0.671389041 seconds time elapsed 0.519176000 seconds user 0.150860000 seconds sys Fixes: c7a14fdcb3fa7736 ("perf build-ids: Fall back to debuginfod query if debuginfo not found") Tested-by: Ingo Molnar Cc: Adrian Hunter Cc: Dmitriy Vyukov Cc: Howard Chu Cc: Ian Rogers Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Frank Ch. Eigler Link: https://lore.kernel.org/r/Z_dkNDj9EPFwPqq1@gmail.com [ Folded patch from Ingo to have the debian/ubuntu devel package added build warning message ] Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/Makefile.config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index e0c20a5c19cfe2..d1ea7bf449647e 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -560,6 +560,8 @@ ifndef NO_LIBELF ifeq ($(feature-libdebuginfod), 1) CFLAGS += -DHAVE_DEBUGINFOD_SUPPORT EXTLIBS += -ldebuginfod + else + $(warning No elfutils/debuginfod.h found, no debuginfo server support, please install libdebuginfod-dev/elfutils-debuginfod-client-devel or equivalent) endif endif From 38b7e7c15515c80d5e4d6919798a63ea3cc0b2d9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 8 Apr 2025 15:03:11 -0300 Subject: [PATCH 2481/2924] tools build: Don't show libbfd build status as it is opt-in [ Upstream commit e0eb84cd518084582c0d1db5d904f31b16902fdc ] Since dd317df072071903 ("perf build: Make binutil libraries opt in") doesn't try to build with binutils libraries, so showing that it is OFF when building causes just distraction, remove it from FEATURES_DISPLAY. For people that for some reason notice that there is always 'perf -vv', a short hand for 'perf version --build-options' and 'perf check feature libbfd' that now explains why it is not built: $ perf check feature libbfd libbfd: [ OFF ] # HAVE_LIBBFD_SUPPORT ( tip: Deprecated, license incompatibility, use BUILD_NONDISTRO=1 and install binutils-dev[el] ) $ Fixes: dd317df072071903 ("perf build: Make binutil libraries opt in") Tested-by: Ingo Molnar Cc: Adrian Hunter Cc: Dmitriy Vyukov Cc: Howard Chu Cc: Ian Rogers Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/Z--pWmTHGb62_83e@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/build/Makefile.feature | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 357749701239f6..57bd995ce6afa3 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -147,8 +147,6 @@ endif FEATURE_DISPLAY ?= \ libdw \ glibc \ - libbfd \ - libbfd-buildid \ libelf \ libnuma \ numa_num_possible_cpus \ From ac1dad53aca4810e3401a14d02c76a7c6398c98d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 9 Apr 2025 21:58:19 -0300 Subject: [PATCH 2482/2924] perf ui browser hists: Set actions->thread before calling do_zoom_thread() [ Upstream commit 1741189d843a1d5ef38538bc52a3760e2e46cb2e ] In 7cecb7fe8388d5c3 ("perf hists: Move sort__has_comm into struct perf_hpp_list") it assumes that act->thread is set prior to calling do_zoom_thread(). This doesn't happen when we use ESC or the Left arrow key to Zoom out of a specific thread, making this operation not to work and we get stuck into the thread zoom. In 6422184b087ff435 ("perf hists browser: Simplify zooming code using pstack_peek()") it says no need to set actions->thread, and at that point that was true, but in 7cecb7fe8388d5c3 a actions->thread == NULL check was added before the zoom out of thread could kick in. We can zoom out using the alternative 't' thread zoom toggle hotkey to finally set actions->thread before calling do_zoom_thread() and zoom out, but lets also fix the ESC/Zoom out of thread case. Fixes: 7cecb7fe8388d5c3 ("perf hists: Move sort__has_comm into struct perf_hpp_list") Reported-by: Ingo Molnar Tested-by: Ingo Molnar Cc: Adrian Hunter Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Link: https://lore.kernel.org/r/Z_TYux5fUg2pW-pF@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/ui/browsers/hists.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 35c10509b797f3..992d1c822a97a8 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3274,10 +3274,10 @@ static int evsel__hists_browse(struct evsel *evsel, int nr_events, const char *h /* * No need to set actions->dso here since * it's just to remove the current filter. - * Ditto for thread below. */ do_zoom_dso(browser, actions); } else if (top == &browser->hists->thread_filter) { + actions->thread = thread; do_zoom_thread(browser, actions); } else if (top == &browser->hists->socket_filter) { do_zoom_socket(browser, actions); From 5e87a26a7f0a5ef1ed05cd2d78f467fcfecba7ae Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Thu, 10 Apr 2025 15:49:38 -0400 Subject: [PATCH 2483/2924] dm: don't change md if dm_table_set_restrictions() fails [ Upstream commit 9eb7109a5bfc5b8226e9517e9f3cc6d414391884 ] __bind was changing the disk capacity, geometry and mempools of the mapped device before calling dm_table_set_restrictions() which could fail, forcing dm to drop the new table. Failing here would leave the device using the old table but with the wrong capacity and mempools. Move dm_table_set_restrictions() earlier in __bind(). Since it needs the capacity to be set, save the old version and restore it on failure. Fixes: bb37d77239af2 ("dm: introduce zone append emulation") Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Benjamin Marzinski Signed-off-by: Mikulas Patocka Signed-off-by: Sasha Levin --- drivers/md/dm.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 5ab7574c0c76ab..f5c5ccb6f8d25a 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2421,21 +2421,29 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, struct queue_limits *limits) { struct dm_table *old_map; - sector_t size; + sector_t size, old_size; int ret; lockdep_assert_held(&md->suspend_lock); size = dm_table_get_size(t); + old_size = dm_get_size(md); + set_capacity(md->disk, size); + + ret = dm_table_set_restrictions(t, md->queue, limits); + if (ret) { + set_capacity(md->disk, old_size); + old_map = ERR_PTR(ret); + goto out; + } + /* * Wipe any geometry if the size of the table changed. */ - if (size != dm_get_size(md)) + if (size != old_size) memset(&md->geometry, 0, sizeof(md->geometry)); - set_capacity(md->disk, size); - dm_table_event_callback(t, event_callback, md); if (dm_table_request_based(t)) { @@ -2468,12 +2476,6 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, t->mempools = NULL; } - ret = dm_table_set_restrictions(t, md->queue, limits); - if (ret) { - old_map = ERR_PTR(ret); - goto out; - } - old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); rcu_assign_pointer(md->map, (void *)t); md->immutable_target_type = dm_table_get_immutable_target_type(t); From 32e26cc61ed9b882ea4ce74353283c535ab22566 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Thu, 10 Apr 2025 15:49:39 -0400 Subject: [PATCH 2484/2924] dm: free table mempools if not used in __bind [ Upstream commit e8819e7f03470c5b468720630d9e4e1d5b99159e ] With request-based dm, the mempools don't need reloading when switching tables, but the unused table mempools are not freed until the active table is finally freed. Free them immediately if they are not needed. Fixes: 29dec90a0f1d9 ("dm: fix bio_set allocation") Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Benjamin Marzinski Signed-off-by: Mikulas Patocka Signed-off-by: Sasha Levin --- drivers/md/dm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index f5c5ccb6f8d25a..292414da871daf 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2461,10 +2461,10 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, * requests in the queue may refer to bio from the old bioset, * so you must walk through the queue to unprep. */ - if (!md->mempools) { + if (!md->mempools) md->mempools = t->mempools; - t->mempools = NULL; - } + else + dm_free_md_mempools(t->mempools); } else { /* * The md may already have mempools that need changing. @@ -2473,8 +2473,8 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, */ dm_free_md_mempools(md->mempools); md->mempools = t->mempools; - t->mempools = NULL; } + t->mempools = NULL; old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); rcu_assign_pointer(md->map, (void *)t); From 3119faa5ad50e91345fe4ed9517a2c03ba76450b Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Thu, 10 Apr 2025 15:49:40 -0400 Subject: [PATCH 2485/2924] dm: handle failures in dm_table_set_restrictions [ Upstream commit 4ea30ec6fb3bb598bd1df04cdfab13b1140074d2 ] If dm_table_set_restrictions() fails while swapping tables, device-mapper will continue using the previous table. It must be sure to leave the mapped_device in it's previous state on failure. Otherwise device-mapper could end up using the old table with settings from the unused table. Do not update the mapped device in dm_set_zones_restrictions(). Wait till after dm_table_set_restrictions() is sure to succeed to update the md zoned settings. Do the same with the dax settings, and if dm_revalidate_zones() fails, restore the original queue limits. Fixes: 7f91ccd8a608d ("dm: Call dm_revalidate_zones() after setting the queue limits") Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Benjamin Marzinski Signed-off-by: Mikulas Patocka Signed-off-by: Sasha Levin --- drivers/md/dm-table.c | 26 +++++++++++++++++--------- drivers/md/dm-zone.c | 26 ++++++++++++++++++-------- drivers/md/dm.h | 1 + 3 files changed, 36 insertions(+), 17 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 6b23e777e10e7f..9cf82e0513c16e 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1834,6 +1834,7 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { int r; + struct queue_limits old_limits; if (!dm_table_supports_nowait(t)) limits->features &= ~BLK_FEAT_NOWAIT; @@ -1860,16 +1861,11 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, if (dm_table_supports_flush(t)) limits->features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA; - if (dm_table_supports_dax(t, device_not_dax_capable)) { + if (dm_table_supports_dax(t, device_not_dax_capable)) limits->features |= BLK_FEAT_DAX; - if (dm_table_supports_dax(t, device_not_dax_synchronous_capable)) - set_dax_synchronous(t->md->dax_dev); - } else + else limits->features &= ~BLK_FEAT_DAX; - if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled, NULL)) - dax_write_cache(t->md->dax_dev, true); - /* For a zoned table, setup the zone related queue attributes. */ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && (limits->features & BLK_FEAT_ZONED)) { @@ -1881,7 +1877,8 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, if (dm_table_supports_atomic_writes(t)) limits->features |= BLK_FEAT_ATOMIC_WRITES; - r = queue_limits_set(q, limits); + old_limits = queue_limits_start_update(q); + r = queue_limits_commit_update(q, limits); if (r) return r; @@ -1892,10 +1889,21 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && (limits->features & BLK_FEAT_ZONED)) { r = dm_revalidate_zones(t, q); - if (r) + if (r) { + queue_limits_set(q, &old_limits); return r; + } } + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) + dm_finalize_zone_settings(t, limits); + + if (dm_table_supports_dax(t, device_not_dax_synchronous_capable)) + set_dax_synchronous(t->md->dax_dev); + + if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled, NULL)) + dax_write_cache(t->md->dax_dev, true); + dm_update_crypto_profile(q, t); return 0; } diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c index 20edd3fabbabfe..681058feb63b5a 100644 --- a/drivers/md/dm-zone.c +++ b/drivers/md/dm-zone.c @@ -340,12 +340,8 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q, * mapped device queue as needing zone append emulation. */ WARN_ON_ONCE(queue_is_mq(q)); - if (dm_table_supports_zone_append(t)) { - clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); - } else { - set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); + if (!dm_table_supports_zone_append(t)) lim->max_hw_zone_append_sectors = 0; - } /* * Determine the max open and max active zone limits for the mapped @@ -383,9 +379,6 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q, lim->zone_write_granularity = 0; lim->chunk_sectors = 0; lim->features &= ~BLK_FEAT_ZONED; - clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); - md->nr_zones = 0; - disk->nr_zones = 0; return 0; } @@ -408,6 +401,23 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q, return 0; } +void dm_finalize_zone_settings(struct dm_table *t, struct queue_limits *lim) +{ + struct mapped_device *md = t->md; + + if (lim->features & BLK_FEAT_ZONED) { + if (dm_table_supports_zone_append(t)) + clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); + else + set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); + } else { + clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); + md->nr_zones = 0; + md->disk->nr_zones = 0; + } +} + + /* * IO completion callback called from clone_endio(). */ diff --git a/drivers/md/dm.h b/drivers/md/dm.h index a0a8ff11981580..e5d3a9f46a912c 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -102,6 +102,7 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t); int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *lim); int dm_revalidate_zones(struct dm_table *t, struct request_queue *q); +void dm_finalize_zone_settings(struct dm_table *t, struct queue_limits *lim); void dm_zone_endio(struct dm_io *io, struct bio *clone); #ifdef CONFIG_BLK_DEV_ZONED int dm_blk_report_zones(struct gendisk *disk, sector_t sector, From 4a715be3fe80b68fa55cb3569af3d294be101626 Mon Sep 17 00:00:00 2001 From: Henry Martin Date: Tue, 1 Apr 2025 17:16:47 +0800 Subject: [PATCH 2486/2924] backlight: pm8941: Add NULL check in wled_configure() [ Upstream commit e12d3e1624a02706cdd3628bbf5668827214fa33 ] devm_kasprintf() returns NULL when memory allocation fails. Currently, wled_configure() does not check for this case, which results in a NULL pointer dereference. Add NULL check after devm_kasprintf() to prevent this issue. Fixes: f86b77583d88 ("backlight: pm8941: Convert to using %pOFn instead of device_node.name") Signed-off-by: Henry Martin Reviewed-by: Dmitry Baryshkov Reviewed-by: "Daniel Thompson (RISCstar)" Link: https://lore.kernel.org/r/20250401091647.22784-1-bsdhenrymartin@gmail.com Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/video/backlight/qcom-wled.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/video/backlight/qcom-wled.c b/drivers/video/backlight/qcom-wled.c index 9afe701b2a1b64..a63bb42c8f8b03 100644 --- a/drivers/video/backlight/qcom-wled.c +++ b/drivers/video/backlight/qcom-wled.c @@ -1406,9 +1406,11 @@ static int wled_configure(struct wled *wled) wled->ctrl_addr = be32_to_cpu(*prop_addr); rc = of_property_read_string(dev->of_node, "label", &wled->name); - if (rc) + if (rc) { wled->name = devm_kasprintf(dev, GFP_KERNEL, "%pOFn", dev->of_node); - + if (!wled->name) + return -ENOMEM; + } switch (wled->version) { case 3: u32_opts = wled3_opts; From 9617e02ca67f560e9c106beec6e0ea9afb47179e Mon Sep 17 00:00:00 2001 From: Wentao Guan Date: Thu, 17 Apr 2025 21:50:23 +0800 Subject: [PATCH 2487/2924] HID: intel-thc-hid: intel-quicki2c: pass correct arguments to acpi_evaluate_object [ Upstream commit 37d66cf07871927ea682c491b9e9a12dd73e6b5b ] Delete unused argument, pass correct argument to acpi_evaluate_object. Log: intel_quicki2c 0000:00:10.0: enabling device (0000 -> 0002) ACPI: \_SB.PC00.THC0.ICRS: 1 arguments were passed to a non-method ACPI object (Buffer) (20230628/nsarguments-211) ACPI: \_SB.PC00.THC0.ISUB: 1 arguments were passed to a non-method ACPI object (Buffer) (20230628/nsarguments-211) Fixes: 5282e45ccbfa ("HID: intel-thc-hid: intel-quicki2c: Add THC QuickI2C ACPI interfaces") Signed-off-by: Wentao Guan Signed-off-by: WangYuli Reviewed-by: Even Xu Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c b/drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c index fa51155ebe3937..8a8c4a46f92700 100644 --- a/drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c +++ b/drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c @@ -82,15 +82,10 @@ static int quicki2c_acpi_get_dsd_property(struct acpi_device *adev, acpi_string { acpi_handle handle = acpi_device_handle(adev); struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; - union acpi_object obj = { .type = type }; - struct acpi_object_list arg_list = { - .count = 1, - .pointer = &obj, - }; union acpi_object *ret_obj; acpi_status status; - status = acpi_evaluate_object(handle, dsd_method_name, &arg_list, &buffer); + status = acpi_evaluate_object(handle, dsd_method_name, NULL, &buffer); if (ACPI_FAILURE(status)) { acpi_handle_err(handle, "Can't evaluate %s method: %d\n", dsd_method_name, status); From 632145e230f10f7ae8f3797b22b20dc0de89e49a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 10 Apr 2025 10:30:56 +0200 Subject: [PATCH 2488/2924] HID: HID_APPLETB_KBD should depend on X86 [ Upstream commit 2a647d400afecdf12ba5905424e1337fbc2d6750 ] The Apple Touch Bar is only present on x86 MacBook Pros. Hence add a dependency on X86, to prevent asking the user about this driver when configuring a kernel for a different architecture. Fixes: 8e9b9152cfbdc2a9 ("HID: hid-appletb-kbd: add driver for the keyboard mode of Apple Touch Bars") Signed-off-by: Geert Uytterhoeven Reviewed-by: Aditya Garg Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index a503252702b7b4..119e5190a2df78 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -163,6 +163,7 @@ config HID_APPLETB_KBD depends on USB_HID depends on BACKLIGHT_CLASS_DEVICE depends on INPUT + depends on X86 || COMPILE_TEST select INPUT_SPARSEKMAP select HID_APPLETB_BL help From 1944078e50a2ea288eb085aca44cbba4feacc90d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 10 Apr 2025 10:30:57 +0200 Subject: [PATCH 2489/2924] HID: HID_APPLETB_BL should depend on X86 [ Upstream commit de7ad66b16b4d397593eafbbb09e9557b558a7e3 ] The Apple Touch Bar is only present on x86 MacBook Pros. Hence add a dependency on X86, to prevent asking the user about this driver when configuring a kernel for a different architecture. Fixes: 1fd41e5e3d7cc556 ("HID: hid-appletb-bl: add driver for the backlight of Apple Touch Bars") Signed-off-by: Geert Uytterhoeven Reviewed-by: Aditya Garg Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 119e5190a2df78..43859fc757470c 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -151,6 +151,7 @@ config HID_APPLEIR config HID_APPLETB_BL tristate "Apple Touch Bar Backlight" depends on BACKLIGHT_CLASS_DEVICE + depends on X86 || COMPILE_TEST help Say Y here if you want support for the backlight of Touch Bars on x86 MacBook Pros. From 0af01ed5d0607a81101d906d2677999b7c4db719 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 1 Apr 2025 09:34:40 -0700 Subject: [PATCH 2490/2924] x86/irq: Ensure initial PIR loads are performed exactly once [ Upstream commit 600e9606046ac3b9b7a3f0500d08a179df84c45e ] Ensure the PIR is read exactly once at the start of handle_pending_pir(), to guarantee that checking for an outstanding posted interrupt in a given chuck doesn't reload the chunk from the "real" PIR. Functionally, a reload is benign, but it would defeat the purpose of pre-loading into a copy. Fixes: 1b03d82ba15e ("x86/irq: Install posted MSI notification handler") Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20250401163447.846608-2-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Sasha Levin --- arch/x86/kernel/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 81f9b78e0f7baa..6cd5d2d6c58af6 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -419,7 +419,7 @@ static __always_inline bool handle_pending_pir(u64 *pir, struct pt_regs *regs) bool handled = false; for (i = 0; i < 4; i++) - pir_copy[i] = pir[i]; + pir_copy[i] = READ_ONCE(pir[i]); for (i = 0; i < 4; i++) { if (!pir_copy[i]) From 980f15017894f7a8a2d4217bc6696428d4d1dc6a Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 22 Apr 2025 22:03:58 -0700 Subject: [PATCH 2491/2924] perf tool_pmu: Fix aggregation on duration_time [ Upstream commit 68cb1567439fa325ba980f3b5b67f95d3953eafd ] evsel__count_has_error() fails counters when the enabled or running time are 0. The duration_time event reads 0 when the cpu_map_idx != 0 to avoid aggregating time over CPUs. Change the enable and running time to always have a ratio of 100% so that evsel__count_has_error won't fail. Before: ``` $ sudo /tmp/perf/perf stat --per-core -a -M UNCORE_FREQ sleep 1 Performance counter stats for 'system wide': S0-D0-C0 1 2,615,819,485 UNC_CLOCK.SOCKET # 2.61 UNCORE_FREQ S0-D0-C0 2 duration_time 1.002111784 seconds time elapsed ``` After: ``` $ perf stat --per-core -a -M UNCORE_FREQ sleep 1 Performance counter stats for 'system wide': S0-D0-C0 1 758,160,296 UNC_CLOCK.SOCKET # 0.76 UNCORE_FREQ S0-D0-C0 2 1,003,438,246 duration_time 1.002486017 seconds time elapsed ``` Note: the metric reads the value a different way and isn't impacted. Fixes: 240505b2d0adcdc8 ("perf tool_pmu: Factor tool events into their own PMU") Reported-by: Stephane Eranian Reviewed-by: James Clark Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Richter Link: https://lore.kernel.org/r/20250423050358.94310-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/tool_pmu.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/tool_pmu.c b/tools/perf/util/tool_pmu.c index 97b327d1ce4a01..727a10e3f99001 100644 --- a/tools/perf/util/tool_pmu.c +++ b/tools/perf/util/tool_pmu.c @@ -486,8 +486,14 @@ int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) delta_start *= 1000000000 / ticks_per_sec; } count->val = delta_start; - count->ena = count->run = delta_start; count->lost = 0; + /* + * The values of enabled and running must make a ratio of 100%. The + * exact values don't matter as long as they are non-zero to avoid + * issues with evsel__count_has_error. + */ + count->ena++; + count->run++; return 0; } From 4bfb6060e29376966c0006fdf7b4aaf9ec23d4ac Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 24 Apr 2025 15:33:10 +0200 Subject: [PATCH 2492/2924] perf tests metric-only perf stat: Fix tests 84 and 86 s390 [ Upstream commit ccd4b5cdf00f358acae9f396d13029e7ae78522e ] On s390x KVM and z/VM machines the CPU Measurement Facility is not available. Events cycles and instructions do not exist. Running above tests on s390 KVM and z/VM guests always fail with this error: # ./perf test 84 86 84: perf stat JSON output linter : FAILED! 86: perf stat STD output linter : FAILED! # Root cause is command: # perf stat -j --metric-only -e instructions,cycles -- true {"metric-value" : "none"} # Which fails due to unsupported events and returns "none". Do not execute this test case on s390 KVM and z/VM machines. Output after: # ./perf test 84 86 84: perf stat JSON output linter : Ok 86: perf stat STD output linter : Ok # Fixes: 45a86d017adf4d6c ("perf test: Add --metric-only to perf stat output tests") Suggested-by: Heiko Carstens Suggested-by: Sumanth Korikkar Reviewed-by: Sumanth Korikkar Signed-off-by: Thomas Richter Cc: Alexander Gordeev Cc: Namhyung Kim Cc: Vasily Gorbik Link: https://lore.kernel.org/r/20250424133310.37452-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/tests/shell/lib/stat_output.sh | 5 +++++ tools/perf/tests/shell/stat+json_output.sh | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/tools/perf/tests/shell/lib/stat_output.sh b/tools/perf/tests/shell/lib/stat_output.sh index 4d4aac547f0109..c2ec7881ec1de4 100644 --- a/tools/perf/tests/shell/lib/stat_output.sh +++ b/tools/perf/tests/shell/lib/stat_output.sh @@ -151,6 +151,11 @@ check_per_socket() check_metric_only() { echo -n "Checking $1 output: metric only " + if [ "$(uname -m)" = "s390x" ] && ! grep '^facilities' /proc/cpuinfo | grep -qw 67 + then + echo "[Skip] CPU-measurement counter facility not installed" + return + fi perf stat --metric-only $2 -e instructions,cycles true commachecker --metric-only echo "[Success]" diff --git a/tools/perf/tests/shell/stat+json_output.sh b/tools/perf/tests/shell/stat+json_output.sh index a4f257ea839e13..98fb65274ac4f7 100755 --- a/tools/perf/tests/shell/stat+json_output.sh +++ b/tools/perf/tests/shell/stat+json_output.sh @@ -176,6 +176,11 @@ check_per_socket() check_metric_only() { echo -n "Checking json output: metric only " + if [ "$(uname -m)" = "s390x" ] && ! grep '^facilities' /proc/cpuinfo | grep -qw 67 + then + echo "[Skip] CPU-measurement counter facility not installed" + return + fi perf stat -j --metric-only -e instructions,cycles -o "${stat_output}" true $PYTHON $pythonchecker --metric-only --file "${stat_output}" echo "[Success]" From 49482f4a39620f6afedcd3f6aa9e0d558b6a460b Mon Sep 17 00:00:00 2001 From: Mikhail Arkhipov Date: Wed, 9 Apr 2025 00:39:06 +0300 Subject: [PATCH 2493/2924] mtd: nand: ecc-mxic: Fix use of uninitialized variable ret [ Upstream commit d95846350aac72303036a70c4cdc69ae314aa26d ] If ctx->steps is zero, the loop processing ECC steps is skipped, and the variable ret remains uninitialized. It is later checked and returned, which leads to undefined behavior and may cause unpredictable results in user space or kernel crashes. This scenario can be triggered in edge cases such as misconfigured geometry, ECC engine misuse, or if ctx->steps is not validated after initialization. Initialize ret to zero before the loop to ensure correct and safe behavior regardless of the ctx->steps value. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 48e6633a9fa2 ("mtd: nand: mxic-ecc: Add Macronix external ECC engine support") Signed-off-by: Mikhail Arkhipov Signed-off-by: Miquel Raynal Signed-off-by: Sasha Levin --- drivers/mtd/nand/ecc-mxic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/ecc-mxic.c b/drivers/mtd/nand/ecc-mxic.c index 56b56f726b9983..1bf9a5a64b87a4 100644 --- a/drivers/mtd/nand/ecc-mxic.c +++ b/drivers/mtd/nand/ecc-mxic.c @@ -614,7 +614,7 @@ static int mxic_ecc_finish_io_req_external(struct nand_device *nand, { struct mxic_ecc_engine *mxic = nand_to_mxic(nand); struct mxic_ecc_ctx *ctx = nand_to_ecc_ctx(nand); - int nents, step, ret; + int nents, step, ret = 0; if (req->mode == MTD_OPS_RAW) return 0; From 7eeb3df6f07a886bdfd52757ede127a59a8784dc Mon Sep 17 00:00:00 2001 From: Alexei Safin Date: Thu, 24 Apr 2025 23:26:54 +0300 Subject: [PATCH 2494/2924] hwmon: (asus-ec-sensors) check sensor index in read_string() [ Upstream commit 25be318324563c63cbd9cb53186203a08d2f83a1 ] Prevent a potential invalid memory access when the requested sensor is not found. find_ec_sensor_index() may return a negative value (e.g. -ENOENT), but its result was used without checking, which could lead to undefined behavior when passed to get_sensor_info(). Add a proper check to return -EINVAL if sensor_index is negative. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: d0ddfd241e57 ("hwmon: (asus-ec-sensors) add driver for ASUS EC") Signed-off-by: Alexei Safin Link: https://lore.kernel.org/r/20250424202654.5902-1-a.safin@rosa.ru [groeck: Return error code returned from find_ec_sensor_index] Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/asus-ec-sensors.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c index 006ced5ab6e6ad..c7c02a1f55d459 100644 --- a/drivers/hwmon/asus-ec-sensors.c +++ b/drivers/hwmon/asus-ec-sensors.c @@ -933,6 +933,10 @@ static int asus_ec_hwmon_read_string(struct device *dev, { struct ec_sensors_data *state = dev_get_drvdata(dev); int sensor_index = find_ec_sensor_index(state, type, channel); + + if (sensor_index < 0) + return sensor_index; + *str = get_sensor_info(state, sensor_index)->label; return 0; From 6a2eb44bbe66e7de2cee3b69a9e609c72bdac5d1 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 1 May 2025 00:00:03 -0700 Subject: [PATCH 2495/2924] perf symbol-minimal: Fix double free in filename__read_build_id [ Upstream commit fa9c4977fbfbca182f9e410d57b3f98356a9d917 ] Running the "perf script task-analyzer tests" with address sanitizer showed a double free: ``` FAIL: "test_csv_extended_times" Error message: "Failed to find required string:'Out-Out;'." ================================================================= ==19190==ERROR: AddressSanitizer: attempting double-free on 0x50b000017b10 in thread T0: #0 0x55da9601c78a in free (perf+0x26078a) (BuildId: e7ef50e08970f017a96fde6101c5e2491acc674a) #1 0x55da96640c63 in filename__read_build_id tools/perf/util/symbol-minimal.c:221:2 0x50b000017b10 is located 0 bytes inside of 112-byte region [0x50b000017b10,0x50b000017b80) freed by thread T0 here: #0 0x55da9601ce40 in realloc (perf+0x260e40) (BuildId: e7ef50e08970f017a96fde6101c5e2491acc674a) #1 0x55da96640ad6 in filename__read_build_id tools/perf/util/symbol-minimal.c:204:10 previously allocated by thread T0 here: #0 0x55da9601ca23 in malloc (perf+0x260a23) (BuildId: e7ef50e08970f017a96fde6101c5e2491acc674a) #1 0x55da966407e7 in filename__read_build_id tools/perf/util/symbol-minimal.c:181:9 SUMMARY: AddressSanitizer: double-free (perf+0x26078a) (BuildId: e7ef50e08970f017a96fde6101c5e2491acc674a) in free ==19190==ABORTING FAIL: "invocation of perf script report task-analyzer --csv-summary csvsummary --summary-extended command failed" Error message: "" FAIL: "test_csvsummary_extended" Error message: "Failed to find required string:'Out-Out;'." ---- end(-1) ---- 132: perf script task-analyzer tests : FAILED! ``` The buf_size if always set to phdr->p_filesz, but that may be 0 causing a free and realloc to return NULL. This is treated in filename__read_build_id like a failure and the buffer is freed again. To avoid this problem only grow buf, meaning the buf_size will never be 0. This also reduces the number of memory (re)allocations. Fixes: b691f64360ecec49 ("perf symbols: Implement poor man's ELF parser") Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250501070003.22251-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/symbol-minimal.c | 34 +++++++++++++++++--------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index c6f369b5d893f3..d8da3da01fe6b5 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -147,18 +147,19 @@ int filename__read_build_id(const char *filename, struct build_id *bid) if (phdr->p_type != PT_NOTE) continue; - buf_size = phdr->p_filesz; offset = phdr->p_offset; - tmp = realloc(buf, buf_size); - if (tmp == NULL) - goto out_free; - - buf = tmp; + if (phdr->p_filesz > buf_size) { + buf_size = phdr->p_filesz; + tmp = realloc(buf, buf_size); + if (tmp == NULL) + goto out_free; + buf = tmp; + } fseek(fp, offset, SEEK_SET); - if (fread(buf, buf_size, 1, fp) != 1) + if (fread(buf, phdr->p_filesz, 1, fp) != 1) goto out_free; - ret = read_build_id(buf, buf_size, bid, need_swap); + ret = read_build_id(buf, phdr->p_filesz, bid, need_swap); if (ret == 0) { ret = bid->size; break; @@ -199,18 +200,19 @@ int filename__read_build_id(const char *filename, struct build_id *bid) if (phdr->p_type != PT_NOTE) continue; - buf_size = phdr->p_filesz; offset = phdr->p_offset; - tmp = realloc(buf, buf_size); - if (tmp == NULL) - goto out_free; - - buf = tmp; + if (phdr->p_filesz > buf_size) { + buf_size = phdr->p_filesz; + tmp = realloc(buf, buf_size); + if (tmp == NULL) + goto out_free; + buf = tmp; + } fseek(fp, offset, SEEK_SET); - if (fread(buf, buf_size, 1, fp) != 1) + if (fread(buf, phdr->p_filesz, 1, fp) != 1) goto out_free; - ret = read_build_id(buf, buf_size, bid, need_swap); + ret = read_build_id(buf, phdr->p_filesz, bid, need_swap); if (ret == 0) { ret = bid->size; break; From d19bc1b4dd5f322980b1f05f79b2ea4f0db10920 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Thu, 10 Apr 2025 15:49:41 -0400 Subject: [PATCH 2496/2924] dm: fix dm_blk_report_zones [ Upstream commit 37f53a2c60d03743e0eacf7a0c01c279776fef4e ] If dm_get_live_table() returned NULL, dm_put_live_table() was never called. Also, it is possible that md->zone_revalidate_map will change while calling this function. Only read it once, so that we are always using the same value. Otherwise we might miss a call to dm_put_live_table(). Finally, while md->zone_revalidate_map is set and a process is calling blk_revalidate_disk_zones() to set up the zone append emulation resources, it is possible that another process, perhaps triggered by blkdev_report_zones_ioctl(), will call dm_blk_report_zones(). If blk_revalidate_disk_zones() fails, these resources can be freed while the other process is still using them, causing a use-after-free error. blk_revalidate_disk_zones() will only ever be called when initially setting up the zone append emulation resources, such as when setting up a zoned dm-crypt table for the first time. Further table swaps will not set md->zone_revalidate_map or call blk_revalidate_disk_zones(). However it must be called using the new table (referenced by md->zone_revalidate_map) and the new queue limits while the DM device is suspended. dm_blk_report_zones() needs some way to distinguish between a call from blk_revalidate_disk_zones(), which must be allowed to use md->zone_revalidate_map to access this not yet activated table, and all other calls to dm_blk_report_zones(), which should not be allowed while the device is suspended and cannot use md->zone_revalidate_map, since the zone resources might be freed by the process currently calling blk_revalidate_disk_zones(). Solve this by tracking the process that sets md->zone_revalidate_map in dm_revalidate_zones() and only allowing that process to make use of it in dm_blk_report_zones(). Fixes: f211268ed1f9b ("dm: Use the block layer zone append emulation") Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Benjamin Marzinski Signed-off-by: Mikulas Patocka Signed-off-by: Sasha Levin --- drivers/md/dm-core.h | 1 + drivers/md/dm-zone.c | 25 +++++++++++++++++-------- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 3637761f35853c..f3a3f2ef632261 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -141,6 +141,7 @@ struct mapped_device { #ifdef CONFIG_BLK_DEV_ZONED unsigned int nr_zones; void *zone_revalidate_map; + struct task_struct *revalidate_map_task; #endif #ifdef CONFIG_IMA diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c index 681058feb63b5a..ff9a1a94eea96a 100644 --- a/drivers/md/dm-zone.c +++ b/drivers/md/dm-zone.c @@ -56,24 +56,31 @@ int dm_blk_report_zones(struct gendisk *disk, sector_t sector, { struct mapped_device *md = disk->private_data; struct dm_table *map; - int srcu_idx, ret; + struct dm_table *zone_revalidate_map = md->zone_revalidate_map; + int srcu_idx, ret = -EIO; + bool put_table = false; - if (!md->zone_revalidate_map) { - /* Regular user context */ + if (!zone_revalidate_map || md->revalidate_map_task != current) { + /* + * Regular user context or + * Zone revalidation during __bind() is in progress, but this + * call is from a different process + */ if (dm_suspended_md(md)) return -EAGAIN; map = dm_get_live_table(md, &srcu_idx); - if (!map) - return -EIO; + put_table = true; } else { /* Zone revalidation during __bind() */ - map = md->zone_revalidate_map; + map = zone_revalidate_map; } - ret = dm_blk_do_report_zones(md, map, sector, nr_zones, cb, data); + if (map) + ret = dm_blk_do_report_zones(md, map, sector, nr_zones, cb, + data); - if (!md->zone_revalidate_map) + if (put_table) dm_put_live_table(md, srcu_idx); return ret; @@ -175,7 +182,9 @@ int dm_revalidate_zones(struct dm_table *t, struct request_queue *q) * our table for dm_blk_report_zones() to use directly. */ md->zone_revalidate_map = t; + md->revalidate_map_task = current; ret = blk_revalidate_disk_zones(disk); + md->revalidate_map_task = NULL; md->zone_revalidate_map = NULL; if (ret) { From ac8acb0bfd98a1c65f3ca9a3e217a766124eebd8 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Thu, 10 Apr 2025 15:49:42 -0400 Subject: [PATCH 2497/2924] dm: limit swapping tables for devices with zone write plugs [ Upstream commit 121218bef4c1df165181f5cd8fc3a2246bac817e ] dm_revalidate_zones() only allowed new or previously unzoned devices to call blk_revalidate_disk_zones(). If the device was already zoned, disk->nr_zones would always equal md->nr_zones, so dm_revalidate_zones() returned without doing any work. This would make the zoned settings for the device not match the new table. If the device had zone write plug resources, it could run into errors like bdev_zone_is_seq() reading invalid memory because disk->conv_zones_bitmap was the wrong size. If the device doesn't have any zone write plug resources, calling blk_revalidate_disk_zones() will always correctly update device. If blk_revalidate_disk_zones() fails, it can still overwrite or clear the current disk->nr_zones value. In this case, DM must restore the previous value of disk->nr_zones, so that the zoned settings will continue to match the previous value that it fell back to. If the device already has zone write plug resources, blk_revalidate_disk_zones() will not correctly update them, if it is called for arbitrary zoned device changes. Since there is not much need for this ability, the easiest solution is to disallow any table reloads that change the zoned settings, for devices that already have zone plug resources. Specifically, if a device already has zone plug resources allocated, it can only switch to another zoned table that also emulates zone append. Also, it cannot change the device size or the zone size. A device can switch to an error target. Fixes: bb37d77239af2 ("dm: introduce zone append emulation") Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Benjamin Marzinski Signed-off-by: Mikulas Patocka Signed-off-by: Sasha Levin --- drivers/md/dm-table.c | 41 ++++++++++++++++++++++++++++++++++++----- drivers/md/dm-zone.c | 35 ++++++++++++++++++++++++++--------- drivers/md/dm.c | 6 ++++++ drivers/md/dm.h | 5 +++++ 4 files changed, 73 insertions(+), 14 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 9cf82e0513c16e..e009bba52d4c0c 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1490,6 +1490,18 @@ bool dm_table_has_no_data_devices(struct dm_table *t) return true; } +bool dm_table_is_wildcard(struct dm_table *t) +{ + for (unsigned int i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); + + if (!dm_target_is_wildcard(ti->type)) + return false; + } + + return true; +} + static int device_not_zoned(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { @@ -1830,6 +1842,19 @@ static bool dm_table_supports_atomic_writes(struct dm_table *t) return true; } +bool dm_table_supports_size_change(struct dm_table *t, sector_t old_size, + sector_t new_size) +{ + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && dm_has_zone_plugs(t->md) && + old_size != new_size) { + DMWARN("%s: device has zone write plug resources. " + "Cannot change size", + dm_device_name(t->md)); + return false; + } + return true; +} + int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { @@ -1867,11 +1892,17 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, limits->features &= ~BLK_FEAT_DAX; /* For a zoned table, setup the zone related queue attributes. */ - if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && - (limits->features & BLK_FEAT_ZONED)) { - r = dm_set_zones_restrictions(t, q, limits); - if (r) - return r; + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) { + if (limits->features & BLK_FEAT_ZONED) { + r = dm_set_zones_restrictions(t, q, limits); + if (r) + return r; + } else if (dm_has_zone_plugs(t->md)) { + DMWARN("%s: device has zone write plug resources. " + "Cannot switch to non-zoned table.", + dm_device_name(t->md)); + return -EINVAL; + } } if (dm_table_supports_atomic_writes(t)) diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c index ff9a1a94eea96a..4af78111d0b4dd 100644 --- a/drivers/md/dm-zone.c +++ b/drivers/md/dm-zone.c @@ -160,22 +160,22 @@ int dm_revalidate_zones(struct dm_table *t, struct request_queue *q) { struct mapped_device *md = t->md; struct gendisk *disk = md->disk; + unsigned int nr_zones = disk->nr_zones; int ret; if (!get_capacity(disk)) return 0; - /* Revalidate only if something changed. */ - if (!disk->nr_zones || disk->nr_zones != md->nr_zones) { - DMINFO("%s using %s zone append", - disk->disk_name, - queue_emulates_zone_append(q) ? "emulated" : "native"); - md->nr_zones = 0; - } - - if (md->nr_zones) + /* + * Do not revalidate if zone write plug resources have already + * been allocated. + */ + if (dm_has_zone_plugs(md)) return 0; + DMINFO("%s using %s zone append", disk->disk_name, + queue_emulates_zone_append(q) ? "emulated" : "native"); + /* * Our table is not live yet. So the call to dm_get_live_table() * in dm_blk_report_zones() will fail. Set a temporary pointer to @@ -189,6 +189,7 @@ int dm_revalidate_zones(struct dm_table *t, struct request_queue *q) if (ret) { DMERR("Revalidate zones failed %d", ret); + disk->nr_zones = nr_zones; return ret; } @@ -385,12 +386,28 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q, lim->max_open_zones = 0; lim->max_active_zones = 0; lim->max_hw_zone_append_sectors = 0; + lim->max_zone_append_sectors = 0; lim->zone_write_granularity = 0; lim->chunk_sectors = 0; lim->features &= ~BLK_FEAT_ZONED; return 0; } + if (get_capacity(disk) && dm_has_zone_plugs(t->md)) { + if (q->limits.chunk_sectors != lim->chunk_sectors) { + DMWARN("%s: device has zone write plug resources. " + "Cannot change zone size", + disk->disk_name); + return -EINVAL; + } + if (lim->max_hw_zone_append_sectors != 0 && + !dm_table_is_wildcard(t)) { + DMWARN("%s: device has zone write plug resources. " + "New table must emulate zone append", + disk->disk_name); + return -EINVAL; + } + } /* * Warn once (when the capacity is not yet set) if the mapped device is * partially using zone resources of the target devices as that leads to diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 292414da871daf..240f6dab8ddafb 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2429,6 +2429,12 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, size = dm_table_get_size(t); old_size = dm_get_size(md); + + if (!dm_table_supports_size_change(t, old_size, size)) { + old_map = ERR_PTR(-EINVAL); + goto out; + } + set_capacity(md->disk, size); ret = dm_table_set_restrictions(t, md->queue, limits); diff --git a/drivers/md/dm.h b/drivers/md/dm.h index e5d3a9f46a912c..245f52b592154d 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -58,6 +58,7 @@ void dm_table_event_callback(struct dm_table *t, void (*fn)(void *), void *context); struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector); bool dm_table_has_no_data_devices(struct dm_table *table); +bool dm_table_is_wildcard(struct dm_table *t); int dm_calculate_queue_limits(struct dm_table *table, struct queue_limits *limits); int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, @@ -72,6 +73,8 @@ struct target_type *dm_table_get_immutable_target_type(struct dm_table *t); struct dm_target *dm_table_get_immutable_target(struct dm_table *t); struct dm_target *dm_table_get_wildcard_target(struct dm_table *t); bool dm_table_request_based(struct dm_table *t); +bool dm_table_supports_size_change(struct dm_table *t, sector_t old_size, + sector_t new_size); void dm_lock_md_type(struct mapped_device *md); void dm_unlock_md_type(struct mapped_device *md); @@ -111,12 +114,14 @@ bool dm_is_zone_write(struct mapped_device *md, struct bio *bio); int dm_zone_get_reset_bitmap(struct mapped_device *md, struct dm_table *t, sector_t sector, unsigned int nr_zones, unsigned long *need_reset); +#define dm_has_zone_plugs(md) ((md)->disk->zone_wplugs_hash != NULL) #else #define dm_blk_report_zones NULL static inline bool dm_is_zone_write(struct mapped_device *md, struct bio *bio) { return false; } +#define dm_has_zone_plugs(md) false #endif /* From b1731277cbceba04f27b28cb7e182353dde855df Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Tue, 22 Apr 2025 19:47:36 -0400 Subject: [PATCH 2498/2924] dm-flakey: error all IOs when num_features is absent [ Upstream commit 40ed054f39bc99eac09871c33198e501f4acdf24 ] dm-flakey would error all IOs if num_features was 0, but if it was absent, dm-flakey would never error any IO. Fix this so that no num_features works the same as num_features set to 0. Fixes: aa7d7bc99fed7 ("dm flakey: add an "error_reads" option") Reported-by: Kent Overstreet Signed-off-by: Benjamin Marzinski Signed-off-by: Mikulas Patocka Signed-off-by: Sasha Levin --- drivers/md/dm-flakey.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index b690905ab89ffb..806a80dd3bd9bd 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -53,8 +53,8 @@ struct per_bio_data { static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, struct dm_target *ti) { - int r; - unsigned int argc; + int r = 0; + unsigned int argc = 0; const char *arg_name; static const struct dm_arg _args[] = { @@ -65,14 +65,13 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, {0, PROBABILITY_BASE, "Invalid random corrupt argument"}, }; - /* No feature arguments supplied. */ - if (!as->argc) - return 0; - - r = dm_read_arg_group(_args, as, &argc, &ti->error); - if (r) + if (as->argc && (r = dm_read_arg_group(_args, as, &argc, &ti->error))) return r; + /* No feature arguments supplied. */ + if (!argc) + goto error_all_io; + while (argc) { arg_name = dm_shift_arg(as); argc--; @@ -217,6 +216,7 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, if (!fc->corrupt_bio_byte && !test_bit(ERROR_READS, &fc->flags) && !test_bit(DROP_WRITES, &fc->flags) && !test_bit(ERROR_WRITES, &fc->flags) && !fc->random_read_corrupt && !fc->random_write_corrupt) { +error_all_io: set_bit(ERROR_WRITES, &fc->flags); set_bit(ERROR_READS, &fc->flags); } From b7e80128bbf42c2f254d235f32b0c1cd2c042f38 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Tue, 22 Apr 2025 19:47:38 -0400 Subject: [PATCH 2499/2924] dm-flakey: make corrupting read bios work [ Upstream commit 13e79076c89f6e96a6cca8f6df38b40d025907b4 ] dm-flakey corrupts the read bios in the endio function. However, the corrupt_bio_* functions checked bio_has_data() to see if there was data to corrupt. Since this was the endio function, there was no data left to complete, so bio_has_data() was always false. Fix this by saving a copy of the bio's bi_iter in flakey_map(), and using this to initialize the iter for corrupting the read bios. This patch also skips cloning the bio for write bios with no data. Reported-by: Kent Overstreet Fixes: a3998799fb4df ("dm flakey: add corrupt_bio_byte feature") Signed-off-by: Benjamin Marzinski Signed-off-by: Mikulas Patocka Signed-off-by: Sasha Levin --- drivers/md/dm-flakey.c | 54 ++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 806a80dd3bd9bd..347881f323d5bc 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -47,7 +47,8 @@ enum feature_flag_bits { }; struct per_bio_data { - bool bio_submitted; + bool bio_can_corrupt; + struct bvec_iter saved_iter; }; static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, @@ -339,7 +340,8 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio) } static void corrupt_bio_common(struct bio *bio, unsigned int corrupt_bio_byte, - unsigned char corrupt_bio_value) + unsigned char corrupt_bio_value, + struct bvec_iter start) { struct bvec_iter iter; struct bio_vec bvec; @@ -348,7 +350,7 @@ static void corrupt_bio_common(struct bio *bio, unsigned int corrupt_bio_byte, * Overwrite the Nth byte of the bio's data, on whichever page * it falls. */ - bio_for_each_segment(bvec, bio, iter) { + __bio_for_each_segment(bvec, bio, iter, start) { if (bio_iter_len(bio, iter) > corrupt_bio_byte) { unsigned char *segment = bvec_kmap_local(&bvec); segment[corrupt_bio_byte] = corrupt_bio_value; @@ -357,36 +359,31 @@ static void corrupt_bio_common(struct bio *bio, unsigned int corrupt_bio_byte, "(rw=%c bi_opf=%u bi_sector=%llu size=%u)\n", bio, corrupt_bio_value, corrupt_bio_byte, (bio_data_dir(bio) == WRITE) ? 'w' : 'r', bio->bi_opf, - (unsigned long long)bio->bi_iter.bi_sector, - bio->bi_iter.bi_size); + (unsigned long long)start.bi_sector, + start.bi_size); break; } corrupt_bio_byte -= bio_iter_len(bio, iter); } } -static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc) +static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc, + struct bvec_iter start) { unsigned int corrupt_bio_byte = fc->corrupt_bio_byte - 1; - if (!bio_has_data(bio)) - return; - - corrupt_bio_common(bio, corrupt_bio_byte, fc->corrupt_bio_value); + corrupt_bio_common(bio, corrupt_bio_byte, fc->corrupt_bio_value, start); } -static void corrupt_bio_random(struct bio *bio) +static void corrupt_bio_random(struct bio *bio, struct bvec_iter start) { unsigned int corrupt_byte; unsigned char corrupt_value; - if (!bio_has_data(bio)) - return; - - corrupt_byte = get_random_u32() % bio->bi_iter.bi_size; + corrupt_byte = get_random_u32() % start.bi_size; corrupt_value = get_random_u8(); - corrupt_bio_common(bio, corrupt_byte, corrupt_value); + corrupt_bio_common(bio, corrupt_byte, corrupt_value, start); } static void clone_free(struct bio *clone) @@ -481,7 +478,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio) unsigned int elapsed; struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); - pb->bio_submitted = false; + pb->bio_can_corrupt = false; if (op_is_zone_mgmt(bio_op(bio))) goto map_bio; @@ -490,10 +487,11 @@ static int flakey_map(struct dm_target *ti, struct bio *bio) elapsed = (jiffies - fc->start_time) / HZ; if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval) { bool corrupt_fixed, corrupt_random; - /* - * Flag this bio as submitted while down. - */ - pb->bio_submitted = true; + + if (bio_has_data(bio)) { + pb->bio_can_corrupt = true; + pb->saved_iter = bio->bi_iter; + } /* * Error reads if neither corrupt_bio_byte or drop_writes or error_writes are set. @@ -516,6 +514,8 @@ static int flakey_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_SUBMITTED; } + if (!pb->bio_can_corrupt) + goto map_bio; /* * Corrupt matching writes. */ @@ -535,9 +535,11 @@ static int flakey_map(struct dm_target *ti, struct bio *bio) struct bio *clone = clone_bio(ti, fc, bio); if (clone) { if (corrupt_fixed) - corrupt_bio_data(clone, fc); + corrupt_bio_data(clone, fc, + clone->bi_iter); if (corrupt_random) - corrupt_bio_random(clone); + corrupt_bio_random(clone, + clone->bi_iter); submit_bio(clone); return DM_MAPIO_SUBMITTED; } @@ -559,21 +561,21 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, if (op_is_zone_mgmt(bio_op(bio))) return DM_ENDIO_DONE; - if (!*error && pb->bio_submitted && (bio_data_dir(bio) == READ)) { + if (!*error && pb->bio_can_corrupt && (bio_data_dir(bio) == READ)) { if (fc->corrupt_bio_byte) { if ((fc->corrupt_bio_rw == READ) && all_corrupt_bio_flags_match(bio, fc)) { /* * Corrupt successful matching READs while in down state. */ - corrupt_bio_data(bio, fc); + corrupt_bio_data(bio, fc, pb->saved_iter); } } if (fc->random_read_corrupt) { u64 rnd = get_random_u64(); u32 rem = do_div(rnd, PROBABILITY_BASE); if (rem < fc->random_read_corrupt) - corrupt_bio_random(bio); + corrupt_bio_random(bio, pb->saved_iter); } if (test_bit(ERROR_READS, &fc->flags)) { /* From 832af8a252d37d7bdf0b34a648d40a82662102e0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 2 Apr 2025 22:42:13 -0700 Subject: [PATCH 2500/2924] perf trace: Fix leaks of 'struct thread' in fprintf_sys_enter() [ Upstream commit bb3de7fa988c1b315ab8e87dc74e0d088284f142 ] I've found some leaks from 'perf trace -a'. It seems there are more leaks but this is what I can find for now. Fixes: 70351029b55677eb ("perf thread: Add support for reading the e_machine type for a thread") Reviewed-by: Howard Chu Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250403054213.7021-1-namhyung@kernel.org [ split from a larget patch ] Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 6ac51925ea4249..f6b23319ea5f6c 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2842,7 +2842,7 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel, e_machine = thread__e_machine(thread, trace->host); sc = trace__syscall_info(trace, evsel, e_machine, id); if (sc == NULL) - return -1; + goto out_put; ttrace = thread__trace(thread, trace); /* * We need to get ttrace just to make sure it is there when syscall__scnprintf_args() From f3bdca84c7ade1b6a61ad06ddb682360dbdab20b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 2 Apr 2025 22:42:13 -0700 Subject: [PATCH 2501/2924] perf trace: Fix leaks of 'struct thread' in set_filter_loop_pids() [ Upstream commit 30d20fb1f84ad5c92706fe2c6cbb2d4cc293e671 ] I've found some leaks from 'perf trace -a'. It seems there are more leaks but this is what I can find for now. Fixes: 082ab9a18e532864 ("perf trace: Filter out 'sshd' in the tracer ancestry in syswide tracing") Reviewed-by: Howard Chu Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250403054213.7021-1-namhyung@kernel.org [ split from a larget patch ] Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-trace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index f6b23319ea5f6c..f98b5efb1f96c9 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -4128,10 +4128,13 @@ static int trace__set_filter_loop_pids(struct trace *trace) if (!strcmp(thread__comm_str(parent), "sshd") || strstarts(thread__comm_str(parent), "gnome-terminal")) { pids[nr++] = thread__tid(parent); + thread__put(parent); break; } + thread__put(thread); thread = parent; } + thread__put(thread); err = evlist__append_tp_filter_pids(trace->evlist, nr, pids); if (!err && trace->filter_pids.map) From 7ba4b429105bf28336d6ed1c2762a3837f7ca659 Mon Sep 17 00:00:00 2001 From: Michael Petlan Date: Mon, 13 Jan 2025 19:26:00 +0100 Subject: [PATCH 2502/2924] perf tests: Fix 'perf report' tests installation [ Upstream commit 4bfe27140edf8dd1322326c79f5ae8d29ff7e43d ] There was a copy-paste mistake in the installation commands. Also, we need to install stderr-whitelist.txt file, which contains allowed messages that are printed on stderr and should not cause test fail. Fixes: 097fe67df1aa9cc7 ("perf testsuite: Install perf-report tests in the 'make install-tests -C tools/perf' target") Signed-off-by: Michael Petlan Cc: Ian Rogers Cc: Namhyung Kim Link: https://lore.kernel.org/r/20250113182605.130719-6-vmolnaro@redhat.com Signed-off-by: Veronika Molnarova Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/Makefile.perf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 979d4691221a07..a7ae5637dadeeb 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -1147,7 +1147,8 @@ install-tests: all install-gtk $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_probe'; \ $(INSTALL) tests/shell/base_probe/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_probe'; \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_report'; \ - $(INSTALL) tests/shell/base_probe/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_report'; \ + $(INSTALL) tests/shell/base_report/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_report'; \ + $(INSTALL) tests/shell/base_report/*.txt '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_report'; \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/coresight' ; \ $(INSTALL) tests/shell/coresight/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/coresight' $(Q)$(MAKE) -C tests/shell/coresight install-tests From 8b0c60e6e85e7d08bbb7660c2219368ba2fa8a42 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 12 May 2025 12:39:30 +0300 Subject: [PATCH 2503/2924] perf intel-pt: Fix PEBS-via-PT data_src [ Upstream commit e00eac6b5b6d956f38d8880c44bf7fd9954063c3 ] The Fixes commit did not add support for decoding PEBS-via-PT data_src. Fix by adding support. PEBS-via-PT is a feature of some E-core processors, starting with processors based on Tremont microarchitecture. Because the kernel only supports Intel PT features that are on all processors, there is no support for PEBS-via-PT on hybrids. Currently that leaves processors based on Tremont, Gracemont and Crestmont, however there are no events on Tremont that produce data_src information, and for Gracemont and Crestmont there are only: mem-loads event=0xd0,umask=0x5,ldlat=3 mem-stores event=0xd0,umask=0x6 Affected processors include Alder Lake N (Gracemont), Sierra Forest (Crestmont) and Grand Ridge (Crestmont). Example: # perf record -d -e intel_pt/branch=0/ -e mem-loads/aux-output/pp uname Before: # perf.before script --itrace=o -Fdata_src 0 |OP No|LVL N/A|SNP N/A|TLB N/A|LCK No|BLK N/A 0 |OP No|LVL N/A|SNP N/A|TLB N/A|LCK No|BLK N/A After: # perf script --itrace=o -Fdata_src 10268100142 |OP LOAD|LVL L1 hit|SNP None|TLB L1 or L2 hit|LCK No|BLK N/A 10450100442 |OP LOAD|LVL L2 hit|SNP None|TLB L2 miss|LCK No|BLK N/A Fixes: 975846eddf907297 ("perf intel-pt: Add memory information to synthesized PEBS sample") Reviewed-by: Kan Liang Signed-off-by: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/r/20250512093932.79854-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/intel-pt.c | 205 ++++++++++++++++++++++++++++++++++++- 1 file changed, 202 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 4e8a9b172fbcc7..9b1011fe482671 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -127,6 +127,7 @@ struct intel_pt { bool single_pebs; bool sample_pebs; + int pebs_data_src_fmt; struct evsel *pebs_evsel; u64 evt_sample_type; @@ -175,6 +176,7 @@ enum switch_state { struct intel_pt_pebs_event { struct evsel *evsel; u64 id; + int data_src_fmt; }; struct intel_pt_queue { @@ -2272,7 +2274,146 @@ static void intel_pt_add_lbrs(struct branch_stack *br_stack, } } -static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, u64 id) +#define P(a, b) PERF_MEM_S(a, b) +#define OP_LH (P(OP, LOAD) | P(LVL, HIT)) +#define LEVEL(x) P(LVLNUM, x) +#define REM P(REMOTE, REMOTE) +#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS)) + +#define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10 +#define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1) + +/* Based on kernel __intel_pmu_pebs_data_source_grt() and pebs_data_source */ +static const u64 pebs_data_source_grt[PERF_PEBS_DATA_SOURCE_GRT_MAX] = { + P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* L3 miss|SNP N/A */ + OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* L1 hit|SNP None */ + OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* LFB/MAB hit|SNP None */ + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* L2 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* L3 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* L3 hit|SNP Hit */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* L3 hit|SNP Fwd */ + OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* Remote L3 hit|SNP HitM */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, HIT), /* RAM hit|SNP Hit */ + OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* Remote L3 hit|SNP Hit */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | SNOOP_NONE_MISS, /* RAM hit|SNP None or Miss */ + OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* Remote RAM hit|SNP None or Miss */ + OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* I/O hit|SNP None */ + OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* Uncached hit|SNP None */ +}; + +/* Based on kernel __intel_pmu_pebs_data_source_cmt() and pebs_data_source */ +static const u64 pebs_data_source_cmt[PERF_PEBS_DATA_SOURCE_GRT_MAX] = { + P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* L3 miss|SNP N/A */ + OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* L1 hit|SNP None */ + OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* LFB/MAB hit|SNP None */ + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* L2 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* L3 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, MISS), /* L3 hit|SNP Hit */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP Fwd */ + OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* Remote L3 hit|SNP HitM */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE), /* RAM hit|SNP Hit */ + OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE), /* Remote L3 hit|SNP Hit */ + OP_LH | LEVEL(RAM) | REM | P(SNOOPX, FWD), /* RAM hit|SNP None or Miss */ + OP_LH | LEVEL(RAM) | REM | P(SNOOP, HITM), /* Remote RAM hit|SNP None or Miss */ + OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* I/O hit|SNP None */ + OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* Uncached hit|SNP None */ +}; + +/* Based on kernel pebs_set_tlb_lock() */ +static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock) +{ + /* + * TLB access + * 0 = did not miss 2nd level TLB + * 1 = missed 2nd level TLB + */ + if (tlb) + *val |= P(TLB, MISS) | P(TLB, L2); + else + *val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); + + /* locked prefix */ + if (lock) + *val |= P(LOCK, LOCKED); +} + +/* Based on kernel __grt_latency_data() */ +static u64 intel_pt_grt_latency_data(u8 dse, bool tlb, bool lock, bool blk, + const u64 *pebs_data_source) +{ + u64 val; + + dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK; + val = pebs_data_source[dse]; + + pebs_set_tlb_lock(&val, tlb, lock); + + if (blk) + val |= P(BLK, DATA); + else + val |= P(BLK, NA); + + return val; +} + +/* Default value for data source */ +#define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\ + PERF_MEM_S(LVL, NA) |\ + PERF_MEM_S(SNOOP, NA) |\ + PERF_MEM_S(LOCK, NA) |\ + PERF_MEM_S(TLB, NA) |\ + PERF_MEM_S(LVLNUM, NA)) + +enum DATA_SRC_FORMAT { + DATA_SRC_FORMAT_ERR = -1, + DATA_SRC_FORMAT_NA = 0, + DATA_SRC_FORMAT_GRT = 1, + DATA_SRC_FORMAT_CMT = 2, +}; + +/* Based on kernel grt_latency_data() and cmt_latency_data */ +static u64 intel_pt_get_data_src(u64 mem_aux_info, int data_src_fmt) +{ + switch (data_src_fmt) { + case DATA_SRC_FORMAT_GRT: { + union { + u64 val; + struct { + unsigned int dse:4; + unsigned int locked:1; + unsigned int stlb_miss:1; + unsigned int fwd_blk:1; + unsigned int reserved:25; + }; + } x = {.val = mem_aux_info}; + return intel_pt_grt_latency_data(x.dse, x.stlb_miss, x.locked, x.fwd_blk, + pebs_data_source_grt); + } + case DATA_SRC_FORMAT_CMT: { + union { + u64 val; + struct { + unsigned int dse:5; + unsigned int locked:1; + unsigned int stlb_miss:1; + unsigned int fwd_blk:1; + unsigned int reserved:24; + }; + } x = {.val = mem_aux_info}; + return intel_pt_grt_latency_data(x.dse, x.stlb_miss, x.locked, x.fwd_blk, + pebs_data_source_cmt); + } + default: + return PERF_MEM_NA; + } +} + +static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, + u64 id, int data_src_fmt) { const struct intel_pt_blk_items *items = &ptq->state->items; struct perf_sample sample; @@ -2393,6 +2534,18 @@ static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evse } } + if (sample_type & PERF_SAMPLE_DATA_SRC) { + if (items->has_mem_aux_info && data_src_fmt) { + if (data_src_fmt < 0) { + pr_err("Intel PT missing data_src info\n"); + return -1; + } + sample.data_src = intel_pt_get_data_src(items->mem_aux_info, data_src_fmt); + } else { + sample.data_src = PERF_MEM_NA; + } + } + if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) { u64 ax = items->has_rax ? items->rax : 0; /* Refer kernel's intel_hsw_transaction() */ @@ -2413,9 +2566,10 @@ static int intel_pt_synth_single_pebs_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; struct evsel *evsel = pt->pebs_evsel; + int data_src_fmt = pt->pebs_data_src_fmt; u64 id = evsel->core.id[0]; - return intel_pt_do_synth_pebs_sample(ptq, evsel, id); + return intel_pt_do_synth_pebs_sample(ptq, evsel, id, data_src_fmt); } static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) @@ -2440,7 +2594,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) hw_id); return intel_pt_synth_single_pebs_sample(ptq); } - err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id); + err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id, pe->data_src_fmt); if (err) return err; } @@ -3407,6 +3561,49 @@ static int intel_pt_process_itrace_start(struct intel_pt *pt, event->itrace_start.tid); } +/* + * Events with data_src are identified by L1_Hit_Indication + * refer https://github.com/intel/perfmon + */ +static int intel_pt_data_src_fmt(struct intel_pt *pt, struct evsel *evsel) +{ + struct perf_env *env = pt->machine->env; + int fmt = DATA_SRC_FORMAT_NA; + + if (!env->cpuid) + return DATA_SRC_FORMAT_ERR; + + /* + * PEBS-via-PT is only supported on E-core non-hybrid. Of those only + * Gracemont and Crestmont have data_src. Check for: + * Alderlake N (Gracemont) + * Sierra Forest (Crestmont) + * Grand Ridge (Crestmont) + */ + + if (!strncmp(env->cpuid, "GenuineIntel,6,190,", 19)) + fmt = DATA_SRC_FORMAT_GRT; + + if (!strncmp(env->cpuid, "GenuineIntel,6,175,", 19) || + !strncmp(env->cpuid, "GenuineIntel,6,182,", 19)) + fmt = DATA_SRC_FORMAT_CMT; + + if (fmt == DATA_SRC_FORMAT_NA) + return fmt; + + /* + * Only data_src events are: + * mem-loads event=0xd0,umask=0x5 + * mem-stores event=0xd0,umask=0x6 + */ + if (evsel->core.attr.type == PERF_TYPE_RAW && + ((evsel->core.attr.config & 0xffff) == 0x5d0 || + (evsel->core.attr.config & 0xffff) == 0x6d0)) + return fmt; + + return DATA_SRC_FORMAT_NA; +} + static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt, union perf_event *event, struct perf_sample *sample) @@ -3427,6 +3624,7 @@ static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt, ptq->pebs[hw_id].evsel = evsel; ptq->pebs[hw_id].id = sample->id; + ptq->pebs[hw_id].data_src_fmt = intel_pt_data_src_fmt(pt, evsel); return 0; } @@ -3976,6 +4174,7 @@ static void intel_pt_setup_pebs_events(struct intel_pt *pt) } pt->single_pebs = true; pt->sample_pebs = true; + pt->pebs_data_src_fmt = intel_pt_data_src_fmt(pt, evsel); pt->pebs_evsel = evsel; } } From b29c324b8a4f84338a07da6706d43e0a1b87dba7 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 12 May 2025 12:39:32 +0300 Subject: [PATCH 2504/2924] perf scripts python: exported-sql-viewer.py: Fix pattern matching with Python 3 [ Upstream commit 17e548405a81665fd14cee960db7d093d1396400 ] The script allows the user to enter patterns to find symbols. The pattern matching characters are converted for use in SQL. For PostgreSQL the conversion involves using the Python maketrans() method which is slightly different in Python 3 compared with Python 2. Fix to work in Python 3. Fixes: beda0e725e5f06ac ("perf script python: Add Python3 support to exported-sql-viewer.py") Signed-off-by: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Tony Jones Link: https://lore.kernel.org/r/20250512093932.79854-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/scripts/python/exported-sql-viewer.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 121cf61ba1b345..e0b2e7268ef68c 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -680,7 +680,10 @@ def FindSelect(self, value, pattern, query): s = value.replace("%", "\\%") s = s.replace("_", "\\_") # Translate * and ? into SQL LIKE pattern characters % and _ - trans = string.maketrans("*?", "%_") + if sys.version_info[0] == 3: + trans = str.maketrans("*?", "%_") + else: + trans = string.maketrans("*?", "%_") match = " LIKE '" + str(s).translate(trans) + "'" else: match = " GLOB '" + str(value) + "'" From 1c2a25b5cfb7b72d7d7cebba317dfb458f69ec7e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 2 Apr 2025 13:59:51 +0300 Subject: [PATCH 2505/2924] remoteproc: qcom_wcnss_iris: Add missing put_device() on error in probe [ Upstream commit 0cb4b1b97041d8a1f773425208ded253c1cb5869 ] The device_del() call matches with the device_add() but we also need to call put_device() to trigger the qcom_iris_release(). Fixes: 1fcef985c8bd ("remoteproc: qcom: wcnss: Fix race with iris probe") Signed-off-by: Dan Carpenter Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/4604f7e0-3217-4095-b28a-3ff8b5afad3a@stanley.mountain Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/remoteproc/qcom_wcnss_iris.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/remoteproc/qcom_wcnss_iris.c b/drivers/remoteproc/qcom_wcnss_iris.c index b989718776bdb5..2b52b403eb3f76 100644 --- a/drivers/remoteproc/qcom_wcnss_iris.c +++ b/drivers/remoteproc/qcom_wcnss_iris.c @@ -196,6 +196,7 @@ struct qcom_iris *qcom_iris_probe(struct device *parent, bool *use_48mhz_xo) err_device_del: device_del(&iris->dev); + put_device(&iris->dev); return ERR_PTR(ret); } @@ -203,4 +204,5 @@ struct qcom_iris *qcom_iris_probe(struct device *parent, bool *use_48mhz_xo) void qcom_iris_remove(struct qcom_iris *iris) { device_del(&iris->dev); + put_device(&iris->dev); } From e54bd0f3238fc2c967defc6d80a64029599c9952 Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 13 May 2025 10:57:30 +0100 Subject: [PATCH 2506/2924] perf tools: Fix arm64 source package build [ Upstream commit aea3496bbc7c20037ecc35411264de109d700fc7 ] Syscall tables are generated from rules in the kernel tree. Add the related files to the MANIFEST to fix the Perf source package build. Reported-by: Arnaldo Carvalho de Melo Fixes: bfb713ea53c746b0 ("perf tools: Fix arm64 build by generating unistd_64.h") Signed-off-by: James Clark Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250513-james-perf-src-pkg-fix-v1-1-bcfd0486dbd6@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/MANIFEST | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 364b55b00b4841..34af57b8ec2a9c 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -1,8 +1,10 @@ COPYING LICENSES/preferred/GPL-2.0 arch/arm64/tools/gen-sysreg.awk +arch/arm64/tools/syscall_64.tbl arch/arm64/tools/sysreg arch/*/include/uapi/asm/bpf_perf_event.h +include/uapi/asm-generic/Kbuild tools/perf tools/arch tools/scripts @@ -25,6 +27,10 @@ tools/lib/str_error_r.c tools/lib/vsprintf.c tools/lib/zalloc.c scripts/bpf_doc.py +scripts/Kbuild.include +scripts/Makefile.asm-headers +scripts/syscall.tbl +scripts/syscallhdr.sh tools/bpf/bpftool kernel/bpf/disasm.c kernel/bpf/disasm.h From aa33fd51cb50f886f41b924e1c0e568f365b683d Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Tue, 13 May 2025 11:14:35 +0530 Subject: [PATCH 2507/2924] remoteproc: k3-r5: Drop check performed in k3_r5_rproc_{mbox_callback/kick} [ Upstream commit 9995dbfc2235efabdb3759606d522e1a7ec3bdcb ] Commit f3f11cfe8907 ("remoteproc: k3-r5: Acquire mailbox handle during probe routine") introduced a check in the "k3_r5_rproc_mbox_callback()" and "k3_r5_rproc_kick()" callbacks, causing them to exit if the remote core's state is "RPROC_DETACHED". However, the "__rproc_attach()" function that is responsible for attaching to a remote core, updates the state of the remote core to "RPROC_ATTACHED" only after invoking "rproc_start_subdevices()". The "rproc_start_subdevices()" function triggers the probe of the Virtio RPMsg devices associated with the remote core, which require that the "k3_r5_rproc_kick()" and "k3_r5_rproc_mbox_callback()" callbacks are functional. Hence, drop the check in the callbacks. Fixes: f3f11cfe8907 ("remoteproc: k3-r5: Acquire mailbox handle during probe routine") Signed-off-by: Siddharth Vadapalli Signed-off-by: Beleswar Padhi Tested-by: Judith Mendez Reviewed-by: Andrew Davis Link: https://lore.kernel.org/r/20250513054510.3439842-2-b-padhi@ti.com Signed-off-by: Mathieu Poirier Signed-off-by: Sasha Levin --- drivers/remoteproc/ti_k3_r5_remoteproc.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/remoteproc/ti_k3_r5_remoteproc.c b/drivers/remoteproc/ti_k3_r5_remoteproc.c index dbc513c5569cbf..3fc0b97dec6000 100644 --- a/drivers/remoteproc/ti_k3_r5_remoteproc.c +++ b/drivers/remoteproc/ti_k3_r5_remoteproc.c @@ -194,10 +194,6 @@ static void k3_r5_rproc_mbox_callback(struct mbox_client *client, void *data) const char *name = kproc->rproc->name; u32 msg = omap_mbox_message(data); - /* Do not forward message from a detached core */ - if (kproc->rproc->state == RPROC_DETACHED) - return; - dev_dbg(dev, "mbox msg: 0x%x\n", msg); switch (msg) { @@ -233,10 +229,6 @@ static void k3_r5_rproc_kick(struct rproc *rproc, int vqid) mbox_msg_t msg = (mbox_msg_t)vqid; int ret; - /* Do not forward message to a detached core */ - if (kproc->rproc->state == RPROC_DETACHED) - return; - /* send the index of the triggered virtqueue in the mailbox payload */ ret = mbox_send_message(kproc->mbox, (void *)msg); if (ret < 0) From c3267ee7ac02eba431efe07ac680c36b744c3489 Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Tue, 13 May 2025 11:14:36 +0530 Subject: [PATCH 2508/2924] remoteproc: k3-dsp: Drop check performed in k3_dsp_rproc_{mbox_callback/kick} [ Upstream commit 349d62ab207f55f039c3ddb40b36e95c2f0b3ed0 ] Commit ea1d6fb5b571 ("remoteproc: k3-dsp: Acquire mailbox handle during probe routine") introduced a check in the "k3_dsp_rproc_mbox_callback()" and "k3_dsp_rproc_kick()" callbacks, causing them to exit if the remote core's state is "RPROC_DETACHED". However, the "__rproc_attach()" function that is responsible for attaching to a remote core, updates the state of the remote core to "RPROC_ATTACHED" only after invoking "rproc_start_subdevices()". The "rproc_start_subdevices()" function triggers the probe of the Virtio RPMsg devices associated with the remote core, which require that the "k3_dsp_rproc_kick()" and "k3_dsp_rproc_mbox_callback()" callbacks are functional. Hence, drop the check in the callbacks. Fixes: ea1d6fb5b571 ("remoteproc: k3-dsp: Acquire mailbox handle during probe routine") Signed-off-by: Siddharth Vadapalli Signed-off-by: Beleswar Padhi Tested-by: Judith Mendez Reviewed-by: Andrew Davis Link: https://lore.kernel.org/r/20250513054510.3439842-3-b-padhi@ti.com Signed-off-by: Mathieu Poirier Signed-off-by: Sasha Levin --- drivers/remoteproc/ti_k3_dsp_remoteproc.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/remoteproc/ti_k3_dsp_remoteproc.c b/drivers/remoteproc/ti_k3_dsp_remoteproc.c index a695890254ff76..35e8c3cc313c36 100644 --- a/drivers/remoteproc/ti_k3_dsp_remoteproc.c +++ b/drivers/remoteproc/ti_k3_dsp_remoteproc.c @@ -115,10 +115,6 @@ static void k3_dsp_rproc_mbox_callback(struct mbox_client *client, void *data) const char *name = kproc->rproc->name; u32 msg = omap_mbox_message(data); - /* Do not forward messages from a detached core */ - if (kproc->rproc->state == RPROC_DETACHED) - return; - dev_dbg(dev, "mbox msg: 0x%x\n", msg); switch (msg) { @@ -159,10 +155,6 @@ static void k3_dsp_rproc_kick(struct rproc *rproc, int vqid) mbox_msg_t msg = (mbox_msg_t)vqid; int ret; - /* Do not forward messages to a detached core */ - if (kproc->rproc->state == RPROC_DETACHED) - return; - /* send the index of the triggered virtqueue in the mailbox payload */ ret = mbox_send_message(kproc->mbox, (void *)msg); if (ret < 0) From 4af0ad194c6f3a955706ea124ca595fdab2eec0c Mon Sep 17 00:00:00 2001 From: Beleswar Padhi Date: Tue, 13 May 2025 11:14:37 +0530 Subject: [PATCH 2509/2924] remoteproc: k3-r5: Refactor sequential core power up/down operations [ Upstream commit 701177511abd295e0fc2499796e466d8ff12165c ] The existing implementation of the waiting mechanism in "k3_r5_cluster_rproc_init()" waits for the "released_from_reset" flag to be set as part of the firmware boot process in "k3_r5_rproc_start()". The "k3_r5_cluster_rproc_init()" function is invoked in the probe routine which causes unexpected failures in cases where the firmware is unavailable at boot time, resulting in probe failure and removal of the remoteproc handles in the sysfs paths. To address this, the waiting mechanism is refactored out of the probe routine into the appropriate "k3_r5_rproc_{prepare/unprepare}()" functions. This allows the probe routine to complete without depending on firmware booting, while still maintaining the required power-synchronization between cores. Further, this wait mechanism is dropped from "k3_r5_rproc_{start/stop}()" functions as they deal with Core Run/Halt operations, and as such, there is no constraint in Running or Halting the cores of a cluster in order. Fixes: 61f6f68447ab ("remoteproc: k3-r5: Wait for core0 power-up before powering up core1") Signed-off-by: Beleswar Padhi Tested-by: Judith Mendez Reviewed-by: Andrew Davis Link: https://lore.kernel.org/r/20250513054510.3439842-4-b-padhi@ti.com Signed-off-by: Mathieu Poirier Signed-off-by: Sasha Levin --- drivers/remoteproc/ti_k3_r5_remoteproc.c | 110 +++++++++++++---------- 1 file changed, 63 insertions(+), 47 deletions(-) diff --git a/drivers/remoteproc/ti_k3_r5_remoteproc.c b/drivers/remoteproc/ti_k3_r5_remoteproc.c index 3fc0b97dec6000..ba082ca13e7508 100644 --- a/drivers/remoteproc/ti_k3_r5_remoteproc.c +++ b/drivers/remoteproc/ti_k3_r5_remoteproc.c @@ -440,13 +440,36 @@ static int k3_r5_rproc_prepare(struct rproc *rproc) { struct k3_r5_rproc *kproc = rproc->priv; struct k3_r5_cluster *cluster = kproc->cluster; - struct k3_r5_core *core = kproc->core; + struct k3_r5_core *core = kproc->core, *core0, *core1; struct device *dev = kproc->dev; u32 ctrl = 0, cfg = 0, stat = 0; u64 boot_vec = 0; bool mem_init_dis; int ret; + /* + * R5 cores require to be powered on sequentially, core0 should be in + * higher power state than core1 in a cluster. So, wait for core0 to + * power up before proceeding to core1 and put timeout of 2sec. This + * waiting mechanism is necessary because rproc_auto_boot_callback() for + * core1 can be called before core0 due to thread execution order. + * + * By placing the wait mechanism here in .prepare() ops, this condition + * is enforced for rproc boot requests from sysfs as well. + */ + core0 = list_first_entry(&cluster->cores, struct k3_r5_core, elem); + core1 = list_last_entry(&cluster->cores, struct k3_r5_core, elem); + if (cluster->mode == CLUSTER_MODE_SPLIT && core == core1 && + !core0->released_from_reset) { + ret = wait_event_interruptible_timeout(cluster->core_transition, + core0->released_from_reset, + msecs_to_jiffies(2000)); + if (ret <= 0) { + dev_err(dev, "can not power up core1 before core0"); + return -EPERM; + } + } + ret = ti_sci_proc_get_status(core->tsp, &boot_vec, &cfg, &ctrl, &stat); if (ret < 0) return ret; @@ -462,6 +485,14 @@ static int k3_r5_rproc_prepare(struct rproc *rproc) return ret; } + /* + * Notify all threads in the wait queue when core0 state has changed so + * that threads waiting for this condition can be executed. + */ + core->released_from_reset = true; + if (core == core0) + wake_up_interruptible(&cluster->core_transition); + /* * Newer IP revisions like on J7200 SoCs support h/w auto-initialization * of TCMs, so there is no need to perform the s/w memzero. This bit is @@ -507,10 +538,30 @@ static int k3_r5_rproc_unprepare(struct rproc *rproc) { struct k3_r5_rproc *kproc = rproc->priv; struct k3_r5_cluster *cluster = kproc->cluster; - struct k3_r5_core *core = kproc->core; + struct k3_r5_core *core = kproc->core, *core0, *core1; struct device *dev = kproc->dev; int ret; + /* + * Ensure power-down of cores is sequential in split mode. Core1 must + * power down before Core0 to maintain the expected state. By placing + * the wait mechanism here in .unprepare() ops, this condition is + * enforced for rproc stop or shutdown requests from sysfs and device + * removal as well. + */ + core0 = list_first_entry(&cluster->cores, struct k3_r5_core, elem); + core1 = list_last_entry(&cluster->cores, struct k3_r5_core, elem); + if (cluster->mode == CLUSTER_MODE_SPLIT && core == core0 && + core1->released_from_reset) { + ret = wait_event_interruptible_timeout(cluster->core_transition, + !core1->released_from_reset, + msecs_to_jiffies(2000)); + if (ret <= 0) { + dev_err(dev, "can not power down core0 before core1"); + return -EPERM; + } + } + /* Re-use LockStep-mode reset logic for Single-CPU mode */ ret = (cluster->mode == CLUSTER_MODE_LOCKSTEP || cluster->mode == CLUSTER_MODE_SINGLECPU) ? @@ -518,6 +569,14 @@ static int k3_r5_rproc_unprepare(struct rproc *rproc) if (ret) dev_err(dev, "unable to disable cores, ret = %d\n", ret); + /* + * Notify all threads in the wait queue when core1 state has changed so + * that threads waiting for this condition can be executed. + */ + core->released_from_reset = false; + if (core == core1) + wake_up_interruptible(&cluster->core_transition); + return ret; } @@ -543,7 +602,7 @@ static int k3_r5_rproc_start(struct rproc *rproc) struct k3_r5_rproc *kproc = rproc->priv; struct k3_r5_cluster *cluster = kproc->cluster; struct device *dev = kproc->dev; - struct k3_r5_core *core0, *core; + struct k3_r5_core *core; u32 boot_addr; int ret; @@ -565,21 +624,9 @@ static int k3_r5_rproc_start(struct rproc *rproc) goto unroll_core_run; } } else { - /* do not allow core 1 to start before core 0 */ - core0 = list_first_entry(&cluster->cores, struct k3_r5_core, - elem); - if (core != core0 && core0->rproc->state == RPROC_OFFLINE) { - dev_err(dev, "%s: can not start core 1 before core 0\n", - __func__); - return -EPERM; - } - ret = k3_r5_core_run(core); if (ret) return ret; - - core->released_from_reset = true; - wake_up_interruptible(&cluster->core_transition); } return 0; @@ -620,8 +667,7 @@ static int k3_r5_rproc_stop(struct rproc *rproc) { struct k3_r5_rproc *kproc = rproc->priv; struct k3_r5_cluster *cluster = kproc->cluster; - struct device *dev = kproc->dev; - struct k3_r5_core *core1, *core = kproc->core; + struct k3_r5_core *core = kproc->core; int ret; /* halt all applicable cores */ @@ -634,16 +680,6 @@ static int k3_r5_rproc_stop(struct rproc *rproc) } } } else { - /* do not allow core 0 to stop before core 1 */ - core1 = list_last_entry(&cluster->cores, struct k3_r5_core, - elem); - if (core != core1 && core1->rproc->state != RPROC_OFFLINE) { - dev_err(dev, "%s: can not stop core 0 before core 1\n", - __func__); - ret = -EPERM; - goto out; - } - ret = k3_r5_core_halt(core); if (ret) goto out; @@ -1271,26 +1307,6 @@ static int k3_r5_cluster_rproc_init(struct platform_device *pdev) cluster->mode == CLUSTER_MODE_SINGLECPU || cluster->mode == CLUSTER_MODE_SINGLECORE) break; - - /* - * R5 cores require to be powered on sequentially, core0 - * should be in higher power state than core1 in a cluster - * So, wait for current core to power up before proceeding - * to next core and put timeout of 2sec for each core. - * - * This waiting mechanism is necessary because - * rproc_auto_boot_callback() for core1 can be called before - * core0 due to thread execution order. - */ - ret = wait_event_interruptible_timeout(cluster->core_transition, - core->released_from_reset, - msecs_to_jiffies(2000)); - if (ret <= 0) { - dev_err(dev, - "Timed out waiting for %s core to power up!\n", - rproc->name); - goto out; - } } return 0; From 031fa5c9198929fd3a40bb7fb655f31ff71de86a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 23 Apr 2025 20:22:05 +0300 Subject: [PATCH 2510/2924] rpmsg: qcom_smd: Fix uninitialized return variable in __qcom_smd_send() [ Upstream commit 5de775df3362090a6e90046d1f2d83fe62489aa0 ] The "ret" variable isn't initialized if we don't enter the loop. For example, if "channel->state" is not SMD_CHANNEL_OPENED. Fixes: 33e3820dda88 ("rpmsg: smd: Use spinlock in tx path") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/aAkhvV0nSbrsef1P@stanley.mountain Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/rpmsg/qcom_smd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rpmsg/qcom_smd.c b/drivers/rpmsg/qcom_smd.c index 40d386809d6b78..bb161def317533 100644 --- a/drivers/rpmsg/qcom_smd.c +++ b/drivers/rpmsg/qcom_smd.c @@ -746,7 +746,7 @@ static int __qcom_smd_send(struct qcom_smd_channel *channel, const void *data, __le32 hdr[5] = { cpu_to_le32(len), }; int tlen = sizeof(hdr) + len; unsigned long flags; - int ret; + int ret = 0; /* Word aligned channels only accept word size aligned data */ if (channel->info_word && len % 4) From bd0edaf99a920b1a9decd773179caacacb61d0fd Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 19 May 2025 10:07:01 +0100 Subject: [PATCH 2511/2924] netfs: Fix oops in write-retry from mis-resetting the subreq iterator [ Upstream commit 4481f7f2b3df123ec77e828c849138f75cff2bf2 ] Fix the resetting of the subrequest iterator in netfs_retry_write_stream() to use the iterator-reset function as the iterator may have been shortened by a previous retry. In such a case, the amount of data to be written by the subrequest is not "subreq->len" but "subreq->len - subreq->transferred". Without this, KASAN may see an error in iov_iter_revert(): BUG: KASAN: slab-out-of-bounds in iov_iter_revert lib/iov_iter.c:633 [inline] BUG: KASAN: slab-out-of-bounds in iov_iter_revert+0x443/0x5a0 lib/iov_iter.c:611 Read of size 4 at addr ffff88802912a0b8 by task kworker/u32:7/1147 CPU: 1 UID: 0 PID: 1147 Comm: kworker/u32:7 Not tainted 6.15.0-rc6-syzkaller-00052-g9f35e33144ae #0 PREEMPT(full) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 Workqueue: events_unbound netfs_write_collection_worker Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:408 [inline] print_report+0xc3/0x670 mm/kasan/report.c:521 kasan_report+0xe0/0x110 mm/kasan/report.c:634 iov_iter_revert lib/iov_iter.c:633 [inline] iov_iter_revert+0x443/0x5a0 lib/iov_iter.c:611 netfs_retry_write_stream fs/netfs/write_retry.c:44 [inline] netfs_retry_writes+0x166d/0x1a50 fs/netfs/write_retry.c:231 netfs_collect_write_results fs/netfs/write_collect.c:352 [inline] netfs_write_collection_worker+0x23fd/0x3830 fs/netfs/write_collect.c:374 process_one_work+0x9cf/0x1b70 kernel/workqueue.c:3238 process_scheduled_works kernel/workqueue.c:3319 [inline] worker_thread+0x6c8/0xf10 kernel/workqueue.c:3400 kthread+0x3c2/0x780 kernel/kthread.c:464 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:153 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245 Fixes: cd0277ed0c18 ("netfs: Use new folio_queue data type and iterator instead of xarray iter") Reported-by: syzbot+25b83a6f2c702075fcbc@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=25b83a6f2c702075fcbc Signed-off-by: David Howells Link: https://lore.kernel.org/20250519090707.2848510-2-dhowells@redhat.com Tested-by: syzbot+25b83a6f2c702075fcbc@syzkaller.appspotmail.com cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/netfs/write_retry.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c index 545d33079a77d0..9b1ca8b0f4dd6b 100644 --- a/fs/netfs/write_retry.c +++ b/fs/netfs/write_retry.c @@ -39,9 +39,10 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq, if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) break; if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) { - struct iov_iter source = subreq->io_iter; + struct iov_iter source; - iov_iter_revert(&source, subreq->len - source.count); + netfs_reset_iter(subreq); + source = subreq->io_iter; netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); netfs_reissue_write(stream, subreq, &source); } From 3f844fa87d47c17deecb0f07b625c6fdd96fd009 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 19 May 2025 10:07:02 +0100 Subject: [PATCH 2512/2924] netfs: Fix setting of transferred bytes with short DIO reads [ Upstream commit 34eb98c6598c4057640ca56dd1fad6555187473a ] A netfslib request comprises an ordered stream of subrequests that, when doing an unbuffered/DIO read, are contiguous. The subrequests may be performed in parallel, but may not be fully completed. For instance, if we try and make a 256KiB DIO read from a 3-byte file with a 64KiB rsize and 256KiB bsize, netfslib will attempt to make a read of 256KiB, broken up into four 64KiB subreads, with the expectation that the first will be short and the subsequent three be completely devoid - but we do all four on the basis that the file may have been changed by a third party. The read-collection code, however, walks through all the subreqs and advances the notion of how much data has been read in the stream to the start of each subreq plus its amount transferred (which are 3, 0, 0, 0 for the example above) - which gives an amount apparently read of 3*64KiB - which is incorrect. Fix the collection code to cut short the calculation of the transferred amount with the first short subrequest in an unbuffered read; everything beyond that must be ignored as there's a hole that cannot be filled. This applies both to shortness due to hitting the EOF and shortness due to an error. This is achieved by setting a flag on the request when we collect the first short subrequest (collection is done in ascending order). This can be tested by mounting a cifs volume with rsize=65536,bsize=262144 and doing a 256k DIO read of a very small file (e.g. 3 bytes). read() should return 3, not >3. This problem came in when netfs_read_collection() set rreq->transferred to stream->transferred, even for DIO. Prior to that, netfs_rreq_assess_dio() just went over the list and added up the subreqs till it met a short one - but now the subreqs are discarded earlier. Fixes: e2d46f2ec332 ("netfs: Change the read result collector to only use one work item") Reported-by: Nicolas Baranger Closes: https://lore.kernel.org/all/10bec2430ed4df68bde10ed95295d093@3xo.fr/ Signed-off-by: "Paulo Alcantara (Red Hat)" Signed-off-by: David Howells Link: https://lore.kernel.org/20250519090707.2848510-3-dhowells@redhat.com cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/netfs/read_collect.c | 21 +++++---------------- include/linux/netfs.h | 1 + 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index 23c75755ad4ed9..d3cf27b2697c33 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c @@ -280,9 +280,13 @@ static void netfs_collect_read_results(struct netfs_io_request *rreq) stream->need_retry = true; notes |= NEED_RETRY | MADE_PROGRESS; break; + } else if (test_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags)) { + notes |= MADE_PROGRESS; } else { if (!stream->failed) - stream->transferred = stream->collected_to - rreq->start; + stream->transferred += transferred; + if (front->transferred < front->len) + set_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags); notes |= MADE_PROGRESS; } @@ -342,23 +346,8 @@ static void netfs_collect_read_results(struct netfs_io_request *rreq) */ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq) { - struct netfs_io_subrequest *subreq; - struct netfs_io_stream *stream = &rreq->io_streams[0]; unsigned int i; - /* Collect unbuffered reads and direct reads, adding up the transfer - * sizes until we find the first short or failed subrequest. - */ - list_for_each_entry(subreq, &stream->subrequests, rreq_link) { - rreq->transferred += subreq->transferred; - - if (subreq->transferred < subreq->len || - test_bit(NETFS_SREQ_FAILED, &subreq->flags)) { - rreq->error = subreq->error; - break; - } - } - if (rreq->origin == NETFS_DIO_READ) { for (i = 0; i < rreq->direct_bv_count; i++) { flush_dcache_page(rreq->direct_bv[i].bv_page); diff --git a/include/linux/netfs.h b/include/linux/netfs.h index c86a11cfc4a36a..497c4f4698f6ec 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -279,6 +279,7 @@ struct netfs_io_request { #define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */ #define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */ #define NETFS_RREQ_RETRYING 14 /* Set if we're in the retry path */ +#define NETFS_RREQ_SHORT_TRANSFER 15 /* Set if we have a short transfer */ #define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark * write to cache on read */ const struct netfs_request_ops *netfs_ops; From 1a8360c2eed3b292ed654c2ac61b09de4a80e298 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 19 May 2025 10:07:03 +0100 Subject: [PATCH 2513/2924] netfs: Fix the request's work item to not require a ref [ Upstream commit 20d72b00ca814d748f5663484e5c53bb2bf37a3a ] When the netfs_io_request struct's work item is queued, it must be supplied with a ref to the work item struct to prevent it being deallocated whilst on the queue or whilst it is being processed. This is tricky to manage as we have to get a ref before we try and queue it and then we may find it's already queued and is thus already holding a ref - in which case we have to try and get rid of the ref again. The problem comes if we're in BH or IRQ context and need to drop the ref: if netfs_put_request() reduces the count to 0, we have to do the cleanup - but the cleanup may need to wait. Fix this by adding a new work item to the request, ->cleanup_work, and dispatching that when the refcount hits zero. That can then synchronously cancel any outstanding work on the main work item before doing the cleanup. Adding a new work item also deals with another problem upstream where it's sometimes changing the work func in the put function and requeuing it - which has occasionally in the past caused the cleanup to happen incorrectly. As a bonus, this allows us to get rid of the 'was_async' parameter from a bunch of functions. This indicated whether the put function might not be permitted to sleep. Fixes: 3d3c95046742 ("netfs: Provide readahead and readpage netfs helpers") Signed-off-by: David Howells Link: https://lore.kernel.org/20250519090707.2848510-4-dhowells@redhat.com cc: Paulo Alcantara cc: Marc Dionne cc: Steve French cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/9p/vfs_addr.c | 2 +- fs/afs/write.c | 8 ++--- fs/cachefiles/io.c | 16 +++++----- fs/ceph/addr.c | 2 +- fs/erofs/fscache.c | 6 ++-- fs/netfs/buffered_read.c | 30 +++++++++--------- fs/netfs/direct_read.c | 6 ++-- fs/netfs/direct_write.c | 2 +- fs/netfs/fscache_io.c | 10 +++--- fs/netfs/internal.h | 11 +++---- fs/netfs/objects.c | 47 +++++++++++++-------------- fs/netfs/read_collect.c | 44 ++++++++++++++++---------- fs/netfs/read_pgpriv2.c | 4 +-- fs/netfs/read_retry.c | 2 +- fs/netfs/read_single.c | 6 ++-- fs/netfs/write_collect.c | 61 +++++++++++++++++------------------- fs/netfs/write_issue.c | 16 +++++----- fs/netfs/write_retry.c | 2 +- fs/smb/client/cifsproto.h | 3 +- fs/smb/client/cifssmb.c | 4 +-- fs/smb/client/file.c | 7 ++--- fs/smb/client/smb2pdu.c | 4 +-- include/linux/fscache.h | 2 +- include/linux/netfs.h | 13 ++++---- include/trace/events/netfs.h | 7 ++--- net/9p/client.c | 6 ++-- 26 files changed, 159 insertions(+), 162 deletions(-) diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 32619d146cbc19..b5a4a28e0fe79b 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -59,7 +59,7 @@ static void v9fs_issue_write(struct netfs_io_subrequest *subreq) len = p9_client_write(fid, subreq->start, &subreq->io_iter, &err); if (len > 0) __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); - netfs_write_subrequest_terminated(subreq, len ?: err, false); + netfs_write_subrequest_terminated(subreq, len ?: err); } /** diff --git a/fs/afs/write.c b/fs/afs/write.c index 18b0a9f1615e44..7df7b2f5e7b298 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -120,17 +120,17 @@ static void afs_issue_write_worker(struct work_struct *work) #if 0 // Error injection if (subreq->debug_index == 3) - return netfs_write_subrequest_terminated(subreq, -ENOANO, false); + return netfs_write_subrequest_terminated(subreq, -ENOANO); if (!subreq->retry_count) { set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); - return netfs_write_subrequest_terminated(subreq, -EAGAIN, false); + return netfs_write_subrequest_terminated(subreq, -EAGAIN); } #endif op = afs_alloc_operation(wreq->netfs_priv, vnode->volume); if (IS_ERR(op)) - return netfs_write_subrequest_terminated(subreq, -EAGAIN, false); + return netfs_write_subrequest_terminated(subreq, -EAGAIN); afs_op_set_vnode(op, 0, vnode); op->file[0].dv_delta = 1; @@ -166,7 +166,7 @@ static void afs_issue_write_worker(struct work_struct *work) break; } - netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len, false); + netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len); } void afs_issue_write(struct netfs_io_subrequest *subreq) diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c index 92058ae4348826..c08e4a66ac07a7 100644 --- a/fs/cachefiles/io.c +++ b/fs/cachefiles/io.c @@ -63,7 +63,7 @@ static void cachefiles_read_complete(struct kiocb *iocb, long ret) ret = -ESTALE; } - ki->term_func(ki->term_func_priv, ret, ki->was_async); + ki->term_func(ki->term_func_priv, ret); } cachefiles_put_kiocb(ki); @@ -188,7 +188,7 @@ static int cachefiles_read(struct netfs_cache_resources *cres, presubmission_error: if (term_func) - term_func(term_func_priv, ret < 0 ? ret : skipped, false); + term_func(term_func_priv, ret < 0 ? ret : skipped); return ret; } @@ -271,7 +271,7 @@ static void cachefiles_write_complete(struct kiocb *iocb, long ret) atomic_long_sub(ki->b_writing, &object->volume->cache->b_writing); set_bit(FSCACHE_COOKIE_HAVE_DATA, &object->cookie->flags); if (ki->term_func) - ki->term_func(ki->term_func_priv, ret, ki->was_async); + ki->term_func(ki->term_func_priv, ret); cachefiles_put_kiocb(ki); } @@ -301,7 +301,7 @@ int __cachefiles_write(struct cachefiles_object *object, ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL); if (!ki) { if (term_func) - term_func(term_func_priv, -ENOMEM, false); + term_func(term_func_priv, -ENOMEM); return -ENOMEM; } @@ -366,7 +366,7 @@ static int cachefiles_write(struct netfs_cache_resources *cres, { if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) { if (term_func) - term_func(term_func_priv, -ENOBUFS, false); + term_func(term_func_priv, -ENOBUFS); trace_netfs_sreq(term_func_priv, netfs_sreq_trace_cache_nowrite); return -ENOBUFS; } @@ -665,7 +665,7 @@ static void cachefiles_issue_write(struct netfs_io_subrequest *subreq) pre = CACHEFILES_DIO_BLOCK_SIZE - off; if (pre >= len) { fscache_count_dio_misfit(); - netfs_write_subrequest_terminated(subreq, len, false); + netfs_write_subrequest_terminated(subreq, len); return; } subreq->transferred += pre; @@ -691,7 +691,7 @@ static void cachefiles_issue_write(struct netfs_io_subrequest *subreq) len -= post; if (len == 0) { fscache_count_dio_misfit(); - netfs_write_subrequest_terminated(subreq, post, false); + netfs_write_subrequest_terminated(subreq, post); return; } iov_iter_truncate(&subreq->io_iter, len); @@ -703,7 +703,7 @@ static void cachefiles_issue_write(struct netfs_io_subrequest *subreq) &start, &len, len, true); cachefiles_end_secure(cache, saved_cred); if (ret < 0) { - netfs_write_subrequest_terminated(subreq, ret, false); + netfs_write_subrequest_terminated(subreq, ret); return; } diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 29be367905a16f..557c326561fdc0 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -539,7 +539,7 @@ static void ceph_set_page_fscache(struct page *page) folio_start_private_2(page_folio(page)); /* [DEPRECATED] */ } -static void ceph_fscache_write_terminated(void *priv, ssize_t error, bool was_async) +static void ceph_fscache_write_terminated(void *priv, ssize_t error) { struct inode *inode = priv; diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index 9c9129bca3460b..34517ca9df9157 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -102,8 +102,7 @@ static void erofs_fscache_req_io_put(struct erofs_fscache_io *io) erofs_fscache_req_put(req); } -static void erofs_fscache_req_end_io(void *priv, - ssize_t transferred_or_error, bool was_async) +static void erofs_fscache_req_end_io(void *priv, ssize_t transferred_or_error) { struct erofs_fscache_io *io = priv; struct erofs_fscache_rq *req = io->private; @@ -180,8 +179,7 @@ struct erofs_fscache_bio { struct bio_vec bvecs[BIO_MAX_VECS]; }; -static void erofs_fscache_bio_endio(void *priv, - ssize_t transferred_or_error, bool was_async) +static void erofs_fscache_bio_endio(void *priv, ssize_t transferred_or_error) { struct erofs_fscache_bio *io = priv; diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index 0d1b6d35ff3b80..cb6202efc4668f 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -262,9 +262,9 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq) if (ret < 0) { subreq->error = ret; /* Not queued - release both refs. */ - netfs_put_subrequest(subreq, false, + netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel); - netfs_put_subrequest(subreq, false, + netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel); break; } @@ -297,8 +297,8 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq) subreq->error = ret; trace_netfs_sreq(subreq, netfs_sreq_trace_cancel); /* Not queued - release both refs. */ - netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel); - netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel); + netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel); + netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel); break; } size -= slice; @@ -365,12 +365,10 @@ void netfs_readahead(struct readahead_control *ractl) goto cleanup_free; netfs_read_to_pagecache(rreq); - netfs_put_request(rreq, true, netfs_rreq_trace_put_return); - return; + return netfs_put_request(rreq, netfs_rreq_trace_put_return); cleanup_free: - netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); - return; + return netfs_put_request(rreq, netfs_rreq_trace_put_failed); } EXPORT_SYMBOL(netfs_readahead); @@ -470,11 +468,11 @@ static int netfs_read_gaps(struct file *file, struct folio *folio) folio_mark_uptodate(folio); } folio_unlock(folio); - netfs_put_request(rreq, false, netfs_rreq_trace_put_return); + netfs_put_request(rreq, netfs_rreq_trace_put_return); return ret < 0 ? ret : 0; discard: - netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); + netfs_put_request(rreq, netfs_rreq_trace_put_discard); alloc_error: folio_unlock(folio); return ret; @@ -530,11 +528,11 @@ int netfs_read_folio(struct file *file, struct folio *folio) netfs_read_to_pagecache(rreq); ret = netfs_wait_for_read(rreq); - netfs_put_request(rreq, false, netfs_rreq_trace_put_return); + netfs_put_request(rreq, netfs_rreq_trace_put_return); return ret < 0 ? ret : 0; discard: - netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); + netfs_put_request(rreq, netfs_rreq_trace_put_discard); alloc_error: folio_unlock(folio); return ret; @@ -689,7 +687,7 @@ int netfs_write_begin(struct netfs_inode *ctx, ret = netfs_wait_for_read(rreq); if (ret < 0) goto error; - netfs_put_request(rreq, false, netfs_rreq_trace_put_return); + netfs_put_request(rreq, netfs_rreq_trace_put_return); have_folio: ret = folio_wait_private_2_killable(folio); @@ -701,7 +699,7 @@ int netfs_write_begin(struct netfs_inode *ctx, return 0; error_put: - netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); + netfs_put_request(rreq, netfs_rreq_trace_put_failed); error: if (folio) { folio_unlock(folio); @@ -752,11 +750,11 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio, netfs_read_to_pagecache(rreq); ret = netfs_wait_for_read(rreq); - netfs_put_request(rreq, false, netfs_rreq_trace_put_return); + netfs_put_request(rreq, netfs_rreq_trace_put_return); return ret < 0 ? ret : 0; error_put: - netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); + netfs_put_request(rreq, netfs_rreq_trace_put_discard); error: _leave(" = %d", ret); return ret; diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c index 5e3f0aeb51f31f..cb3c6dc0b1654c 100644 --- a/fs/netfs/direct_read.c +++ b/fs/netfs/direct_read.c @@ -85,7 +85,7 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq) if (rreq->netfs_ops->prepare_read) { ret = rreq->netfs_ops->prepare_read(subreq); if (ret < 0) { - netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel); + netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel); break; } } @@ -144,7 +144,7 @@ static ssize_t netfs_unbuffered_read(struct netfs_io_request *rreq, bool sync) ret = netfs_dispatch_unbuffered_reads(rreq); if (!rreq->submitted) { - netfs_put_request(rreq, false, netfs_rreq_trace_put_no_submit); + netfs_put_request(rreq, netfs_rreq_trace_put_no_submit); inode_dio_end(rreq->inode); ret = 0; goto out; @@ -236,7 +236,7 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i } out: - netfs_put_request(rreq, false, netfs_rreq_trace_put_return); + netfs_put_request(rreq, netfs_rreq_trace_put_return); if (ret > 0) orig_count -= ret; return ret; diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c index 42ce53cc216e9d..c98f1676f86dfc 100644 --- a/fs/netfs/direct_write.c +++ b/fs/netfs/direct_write.c @@ -117,7 +117,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * } out: - netfs_put_request(wreq, false, netfs_rreq_trace_put_return); + netfs_put_request(wreq, netfs_rreq_trace_put_return); return ret; } EXPORT_SYMBOL(netfs_unbuffered_write_iter_locked); diff --git a/fs/netfs/fscache_io.c b/fs/netfs/fscache_io.c index b1722a82c03d3d..e4308457633ca3 100644 --- a/fs/netfs/fscache_io.c +++ b/fs/netfs/fscache_io.c @@ -192,8 +192,7 @@ EXPORT_SYMBOL(__fscache_clear_page_bits); /* * Deal with the completion of writing the data to the cache. */ -static void fscache_wreq_done(void *priv, ssize_t transferred_or_error, - bool was_async) +static void fscache_wreq_done(void *priv, ssize_t transferred_or_error) { struct fscache_write_request *wreq = priv; @@ -202,8 +201,7 @@ static void fscache_wreq_done(void *priv, ssize_t transferred_or_error, wreq->set_bits); if (wreq->term_func) - wreq->term_func(wreq->term_func_priv, transferred_or_error, - was_async); + wreq->term_func(wreq->term_func_priv, transferred_or_error); fscache_end_operation(&wreq->cache_resources); kfree(wreq); } @@ -255,14 +253,14 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie, return; abandon_end: - return fscache_wreq_done(wreq, ret, false); + return fscache_wreq_done(wreq, ret); abandon_free: kfree(wreq); abandon: if (using_pgpriv2) fscache_clear_page_bits(mapping, start, len, cond); if (term_func) - term_func(term_func_priv, ret, false); + term_func(term_func_priv, ret); } EXPORT_SYMBOL(__fscache_write_to_cache); diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h index 1c4f953c3d683b..b6500a7cda81df 100644 --- a/fs/netfs/internal.h +++ b/fs/netfs/internal.h @@ -23,7 +23,7 @@ /* * buffered_read.c */ -void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async); +void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error); int netfs_prefetch_for_write(struct file *file, struct folio *folio, size_t offset, size_t len); @@ -71,9 +71,8 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, loff_t start, size_t len, enum netfs_io_origin origin); void netfs_get_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what); -void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async); -void netfs_put_request(struct netfs_io_request *rreq, bool was_async, - enum netfs_rreq_ref_trace what); +void netfs_clear_subrequests(struct netfs_io_request *rreq); +void netfs_put_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what); struct netfs_io_subrequest *netfs_alloc_subrequest(struct netfs_io_request *rreq); static inline void netfs_see_request(struct netfs_io_request *rreq, @@ -94,7 +93,7 @@ static inline void netfs_see_subrequest(struct netfs_io_subrequest *subreq, */ void netfs_read_collection_worker(struct work_struct *work); void netfs_wake_read_collector(struct netfs_io_request *rreq); -void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async); +void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error); ssize_t netfs_wait_for_read(struct netfs_io_request *rreq); void netfs_wait_for_pause(struct netfs_io_request *rreq); @@ -177,7 +176,7 @@ static inline void netfs_stat_d(atomic_t *stat) */ int netfs_folio_written_back(struct folio *folio); void netfs_write_collection_worker(struct work_struct *work); -void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async); +void netfs_wake_write_collector(struct netfs_io_request *wreq); /* * write_issue.c diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c index dc6b41ef18b097..d3eb9ba3013a7d 100644 --- a/fs/netfs/objects.c +++ b/fs/netfs/objects.c @@ -10,6 +10,8 @@ #include #include "internal.h" +static void netfs_free_request(struct work_struct *work); + /* * Allocate an I/O request and initialise it. */ @@ -34,6 +36,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, } memset(rreq, 0, kmem_cache_size(cache)); + INIT_WORK(&rreq->cleanup_work, netfs_free_request); rreq->start = start; rreq->len = len; rreq->origin = origin; @@ -49,7 +52,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, INIT_LIST_HEAD(&rreq->io_streams[0].subrequests); INIT_LIST_HEAD(&rreq->io_streams[1].subrequests); init_waitqueue_head(&rreq->waitq); - refcount_set(&rreq->ref, 1); + refcount_set(&rreq->ref, 2); if (origin == NETFS_READAHEAD || origin == NETFS_READPAGE || @@ -63,7 +66,9 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, INIT_WORK(&rreq->work, netfs_write_collection_worker); } + /* The IN_PROGRESS flag comes with a ref. */ __set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags); + if (file && file->f_flags & O_NONBLOCK) __set_bit(NETFS_RREQ_NONBLOCK, &rreq->flags); if (rreq->netfs_ops->init_request) { @@ -75,7 +80,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, } atomic_inc(&ctx->io_count); - trace_netfs_rreq_ref(rreq->debug_id, 1, netfs_rreq_trace_new); + trace_netfs_rreq_ref(rreq->debug_id, refcount_read(&rreq->ref), netfs_rreq_trace_new); netfs_proc_add_rreq(rreq); netfs_stat(&netfs_n_rh_rreq); return rreq; @@ -89,7 +94,7 @@ void netfs_get_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace trace_netfs_rreq_ref(rreq->debug_id, r + 1, what); } -void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async) +void netfs_clear_subrequests(struct netfs_io_request *rreq) { struct netfs_io_subrequest *subreq; struct netfs_io_stream *stream; @@ -101,8 +106,7 @@ void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async) subreq = list_first_entry(&stream->subrequests, struct netfs_io_subrequest, rreq_link); list_del(&subreq->rreq_link); - netfs_put_subrequest(subreq, was_async, - netfs_sreq_trace_put_clear); + netfs_put_subrequest(subreq, netfs_sreq_trace_put_clear); } } } @@ -118,13 +122,19 @@ static void netfs_free_request_rcu(struct rcu_head *rcu) static void netfs_free_request(struct work_struct *work) { struct netfs_io_request *rreq = - container_of(work, struct netfs_io_request, work); + container_of(work, struct netfs_io_request, cleanup_work); struct netfs_inode *ictx = netfs_inode(rreq->inode); unsigned int i; trace_netfs_rreq(rreq, netfs_rreq_trace_free); + + /* Cancel/flush the result collection worker. That does not carry a + * ref of its own, so we must wait for it somewhere. + */ + cancel_work_sync(&rreq->work); + netfs_proc_del_rreq(rreq); - netfs_clear_subrequests(rreq, false); + netfs_clear_subrequests(rreq); if (rreq->netfs_ops->free_request) rreq->netfs_ops->free_request(rreq); if (rreq->cache_resources.ops) @@ -145,8 +155,7 @@ static void netfs_free_request(struct work_struct *work) call_rcu(&rreq->rcu, netfs_free_request_rcu); } -void netfs_put_request(struct netfs_io_request *rreq, bool was_async, - enum netfs_rreq_ref_trace what) +void netfs_put_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what) { unsigned int debug_id; bool dead; @@ -156,15 +165,8 @@ void netfs_put_request(struct netfs_io_request *rreq, bool was_async, debug_id = rreq->debug_id; dead = __refcount_dec_and_test(&rreq->ref, &r); trace_netfs_rreq_ref(debug_id, r - 1, what); - if (dead) { - if (was_async) { - rreq->work.func = netfs_free_request; - if (!queue_work(system_unbound_wq, &rreq->work)) - WARN_ON(1); - } else { - netfs_free_request(&rreq->work); - } - } + if (dead) + WARN_ON(!queue_work(system_unbound_wq, &rreq->cleanup_work)); } } @@ -206,8 +208,7 @@ void netfs_get_subrequest(struct netfs_io_subrequest *subreq, what); } -static void netfs_free_subrequest(struct netfs_io_subrequest *subreq, - bool was_async) +static void netfs_free_subrequest(struct netfs_io_subrequest *subreq) { struct netfs_io_request *rreq = subreq->rreq; @@ -216,10 +217,10 @@ static void netfs_free_subrequest(struct netfs_io_subrequest *subreq, rreq->netfs_ops->free_subrequest(subreq); mempool_free(subreq, rreq->netfs_ops->subrequest_pool ?: &netfs_subrequest_pool); netfs_stat_d(&netfs_n_rh_sreq); - netfs_put_request(rreq, was_async, netfs_rreq_trace_put_subreq); + netfs_put_request(rreq, netfs_rreq_trace_put_subreq); } -void netfs_put_subrequest(struct netfs_io_subrequest *subreq, bool was_async, +void netfs_put_subrequest(struct netfs_io_subrequest *subreq, enum netfs_sreq_ref_trace what) { unsigned int debug_index = subreq->debug_index; @@ -230,5 +231,5 @@ void netfs_put_subrequest(struct netfs_io_subrequest *subreq, bool was_async, dead = __refcount_dec_and_test(&subreq->ref, &r); trace_netfs_sreq_ref(debug_id, debug_index, r - 1, what); if (dead) - netfs_free_subrequest(subreq, was_async); + netfs_free_subrequest(subreq); } diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index d3cf27b2697c33..1197ebce56757d 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c @@ -301,7 +301,7 @@ static void netfs_collect_read_results(struct netfs_io_request *rreq) struct netfs_io_subrequest, rreq_link); stream->front = front; spin_unlock(&rreq->lock); - netfs_put_subrequest(remove, false, + netfs_put_subrequest(remove, notes & ABANDON_SREQ ? netfs_sreq_trace_put_abandon : netfs_sreq_trace_put_done); @@ -399,7 +399,7 @@ static void netfs_rreq_assess_single(struct netfs_io_request *rreq) * Note that we're in normal kernel thread context at this point, possibly * running on a workqueue. */ -static void netfs_read_collection(struct netfs_io_request *rreq) +static bool netfs_read_collection(struct netfs_io_request *rreq) { struct netfs_io_stream *stream = &rreq->io_streams[0]; @@ -409,11 +409,11 @@ static void netfs_read_collection(struct netfs_io_request *rreq) * queue is empty. */ if (!test_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags)) - return; + return false; smp_rmb(); /* Read ALL_QUEUED before subreq lists. */ if (!list_empty(&stream->subrequests)) - return; + return false; /* Okay, declare that all I/O is complete. */ rreq->transferred = stream->transferred; @@ -436,12 +436,14 @@ static void netfs_read_collection(struct netfs_io_request *rreq) trace_netfs_rreq(rreq, netfs_rreq_trace_wake_ip); clear_and_wake_up_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags); + /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */ trace_netfs_rreq(rreq, netfs_rreq_trace_done); - netfs_clear_subrequests(rreq, false); + netfs_clear_subrequests(rreq); netfs_unlock_abandoned_read_pages(rreq); if (unlikely(rreq->copy_to_cache)) netfs_pgpriv2_end_copy_to_cache(rreq); + return true; } void netfs_read_collection_worker(struct work_struct *work) @@ -449,9 +451,13 @@ void netfs_read_collection_worker(struct work_struct *work) struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work); netfs_see_request(rreq, netfs_rreq_trace_see_work); - if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) - netfs_read_collection(rreq); - netfs_put_request(rreq, false, netfs_rreq_trace_put_work); + if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) { + if (netfs_read_collection(rreq)) + /* Drop the ref from the IN_PROGRESS flag. */ + netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); + else + netfs_see_request(rreq, netfs_rreq_trace_see_work_complete); + } } /* @@ -461,11 +467,7 @@ void netfs_wake_read_collector(struct netfs_io_request *rreq) { if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags) && !test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) { - if (!work_pending(&rreq->work)) { - netfs_get_request(rreq, netfs_rreq_trace_get_work); - if (!queue_work(system_unbound_wq, &rreq->work)) - netfs_put_request(rreq, true, netfs_rreq_trace_put_work_nq); - } + queue_work(system_unbound_wq, &rreq->work); } else { trace_netfs_rreq(rreq, netfs_rreq_trace_wake_queue); wake_up(&rreq->waitq); @@ -580,14 +582,14 @@ void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq) test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) netfs_wake_read_collector(rreq); - netfs_put_subrequest(subreq, true, netfs_sreq_trace_put_terminated); + netfs_put_subrequest(subreq, netfs_sreq_trace_put_terminated); } EXPORT_SYMBOL(netfs_read_subreq_terminated); /* * Handle termination of a read from the cache. */ -void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async) +void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error) { struct netfs_io_subrequest *subreq = priv; @@ -623,7 +625,11 @@ ssize_t netfs_wait_for_read(struct netfs_io_request *rreq) (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) || test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) { __set_current_state(TASK_RUNNING); - netfs_read_collection(rreq); + if (netfs_read_collection(rreq)) { + /* Drop the ref from the NETFS_RREQ_IN_PROGRESS flag. */ + netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); + break; + } continue; } @@ -678,7 +684,11 @@ void netfs_wait_for_pause(struct netfs_io_request *rreq) (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) || test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) { __set_current_state(TASK_RUNNING); - netfs_read_collection(rreq); + if (netfs_read_collection(rreq)) { + /* Drop the ref from the NETFS_RREQ_IN_PROGRESS flag. */ + netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); + break; + } continue; } } diff --git a/fs/netfs/read_pgpriv2.c b/fs/netfs/read_pgpriv2.c index cf7727060215ad..5bbe906a551d57 100644 --- a/fs/netfs/read_pgpriv2.c +++ b/fs/netfs/read_pgpriv2.c @@ -116,7 +116,7 @@ static struct netfs_io_request *netfs_pgpriv2_begin_copy_to_cache( return creq; cancel_put: - netfs_put_request(creq, false, netfs_rreq_trace_put_return); + netfs_put_request(creq, netfs_rreq_trace_put_return); cancel: rreq->copy_to_cache = ERR_PTR(-ENOBUFS); clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags); @@ -155,7 +155,7 @@ void netfs_pgpriv2_end_copy_to_cache(struct netfs_io_request *rreq) smp_wmb(); /* Write lists before ALL_QUEUED. */ set_bit(NETFS_RREQ_ALL_QUEUED, &creq->flags); - netfs_put_request(creq, false, netfs_rreq_trace_put_return); + netfs_put_request(creq, netfs_rreq_trace_put_return); creq->copy_to_cache = NULL; } diff --git a/fs/netfs/read_retry.c b/fs/netfs/read_retry.c index 0f294b26e08c96..1378dc7fa2ccdb 100644 --- a/fs/netfs/read_retry.c +++ b/fs/netfs/read_retry.c @@ -173,7 +173,7 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) &stream->subrequests, rreq_link) { trace_netfs_sreq(subreq, netfs_sreq_trace_superfluous); list_del(&subreq->rreq_link); - netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done); + netfs_put_subrequest(subreq, netfs_sreq_trace_put_done); if (subreq == to) break; } diff --git a/fs/netfs/read_single.c b/fs/netfs/read_single.c index fea0ecdecc5397..fa622a6cd56da3 100644 --- a/fs/netfs/read_single.c +++ b/fs/netfs/read_single.c @@ -142,7 +142,7 @@ static int netfs_single_dispatch_read(struct netfs_io_request *rreq) set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); return ret; cancel: - netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel); + netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel); return ret; } @@ -185,11 +185,11 @@ ssize_t netfs_read_single(struct inode *inode, struct file *file, struct iov_ite netfs_single_dispatch_read(rreq); ret = netfs_wait_for_read(rreq); - netfs_put_request(rreq, true, netfs_rreq_trace_put_return); + netfs_put_request(rreq, netfs_rreq_trace_put_return); return ret; cleanup_free: - netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); + netfs_put_request(rreq, netfs_rreq_trace_put_failed); return ret; } EXPORT_SYMBOL(netfs_read_single); diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c index 3fca59e6475d1c..7241d1fd2c14af 100644 --- a/fs/netfs/write_collect.c +++ b/fs/netfs/write_collect.c @@ -280,7 +280,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq) struct netfs_io_subrequest, rreq_link); stream->front = front; spin_unlock(&wreq->lock); - netfs_put_subrequest(remove, false, + netfs_put_subrequest(remove, notes & SAW_FAILURE ? netfs_sreq_trace_put_cancel : netfs_sreq_trace_put_done); @@ -356,30 +356,21 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq) /* * Perform the collection of subrequests, folios and encryption buffers. */ -void netfs_write_collection_worker(struct work_struct *work) +static bool netfs_write_collection(struct netfs_io_request *wreq) { - struct netfs_io_request *wreq = container_of(work, struct netfs_io_request, work); struct netfs_inode *ictx = netfs_inode(wreq->inode); size_t transferred; int s; _enter("R=%x", wreq->debug_id); - netfs_see_request(wreq, netfs_rreq_trace_see_work); - if (!test_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags)) { - netfs_put_request(wreq, false, netfs_rreq_trace_put_work); - return; - } - netfs_collect_write_results(wreq); /* We're done when the app thread has finished posting subreqs and all * the queues in all the streams are empty. */ - if (!test_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags)) { - netfs_put_request(wreq, false, netfs_rreq_trace_put_work); - return; - } + if (!test_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags)) + return false; smp_rmb(); /* Read ALL_QUEUED before lists. */ transferred = LONG_MAX; @@ -387,10 +378,8 @@ void netfs_write_collection_worker(struct work_struct *work) struct netfs_io_stream *stream = &wreq->io_streams[s]; if (!stream->active) continue; - if (!list_empty(&stream->subrequests)) { - netfs_put_request(wreq, false, netfs_rreq_trace_put_work); - return; - } + if (!list_empty(&stream->subrequests)) + return false; if (stream->transferred < transferred) transferred = stream->transferred; } @@ -430,6 +419,7 @@ void netfs_write_collection_worker(struct work_struct *work) _debug("finished"); trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip); clear_and_wake_up_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags); + /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */ if (wreq->iocb) { size_t written = min(wreq->transferred, wreq->len); @@ -440,27 +430,36 @@ void netfs_write_collection_worker(struct work_struct *work) wreq->iocb = VFS_PTR_POISON; } - netfs_clear_subrequests(wreq, false); - netfs_put_request(wreq, false, netfs_rreq_trace_put_work_complete); + netfs_clear_subrequests(wreq); + return true; +} + +void netfs_write_collection_worker(struct work_struct *work) +{ + struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work); + + netfs_see_request(rreq, netfs_rreq_trace_see_work); + if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) { + if (netfs_write_collection(rreq)) + /* Drop the ref from the IN_PROGRESS flag. */ + netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); + else + netfs_see_request(rreq, netfs_rreq_trace_see_work_complete); + } } /* * Wake the collection work item. */ -void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async) +void netfs_wake_write_collector(struct netfs_io_request *wreq) { - if (!work_pending(&wreq->work)) { - netfs_get_request(wreq, netfs_rreq_trace_get_work); - if (!queue_work(system_unbound_wq, &wreq->work)) - netfs_put_request(wreq, was_async, netfs_rreq_trace_put_work_nq); - } + queue_work(system_unbound_wq, &wreq->work); } /** * netfs_write_subrequest_terminated - Note the termination of a write operation. * @_op: The I/O request that has terminated. * @transferred_or_error: The amount of data transferred or an error code. - * @was_async: The termination was asynchronous * * This tells the library that a contributory write I/O operation has * terminated, one way or another, and that it should collect the results. @@ -470,17 +469,13 @@ void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async) * negative error code. The library will look after reissuing I/O operations * as appropriate and writing downloaded data to the cache. * - * If @was_async is true, the caller might be running in softirq or interrupt - * context and we can't sleep. - * * When this is called, ownership of the subrequest is transferred back to the * library, along with a ref. * * Note that %_op is a void* so that the function can be passed to * kiocb::term_func without the need for a casting wrapper. */ -void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, - bool was_async) +void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error) { struct netfs_io_subrequest *subreq = _op; struct netfs_io_request *wreq = subreq->rreq; @@ -543,8 +538,8 @@ void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, * transferring a ref to it if we were the ones to do so. */ if (list_is_first(&subreq->rreq_link, &stream->subrequests)) - netfs_wake_write_collector(wreq, was_async); + netfs_wake_write_collector(wreq); - netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated); + netfs_put_subrequest(subreq, netfs_sreq_trace_put_terminated); } EXPORT_SYMBOL(netfs_write_subrequest_terminated); diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 77279fc5b5a7cb..8744ed3faf29b6 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -134,7 +134,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, return wreq; nomem: wreq->error = -ENOMEM; - netfs_put_request(wreq, false, netfs_rreq_trace_put_failed); + netfs_put_request(wreq, netfs_rreq_trace_put_failed); return ERR_PTR(-ENOMEM); } @@ -233,7 +233,7 @@ static void netfs_do_issue_write(struct netfs_io_stream *stream, _enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len); if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) - return netfs_write_subrequest_terminated(subreq, subreq->error, false); + return netfs_write_subrequest_terminated(subreq, subreq->error); trace_netfs_sreq(subreq, netfs_sreq_trace_submit); stream->issue_write(subreq); @@ -542,7 +542,7 @@ static void netfs_end_issue_write(struct netfs_io_request *wreq) } if (needs_poke) - netfs_wake_write_collector(wreq, false); + netfs_wake_write_collector(wreq); } /* @@ -599,8 +599,9 @@ int netfs_writepages(struct address_space *mapping, netfs_end_issue_write(wreq); mutex_unlock(&ictx->wb_lock); + netfs_wake_write_collector(wreq); - netfs_put_request(wreq, false, netfs_rreq_trace_put_return); + netfs_put_request(wreq, netfs_rreq_trace_put_return); _leave(" = %d", error); return error; @@ -694,7 +695,7 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE); ret = wreq->error; } - netfs_put_request(wreq, false, netfs_rreq_trace_put_return); + netfs_put_request(wreq, netfs_rreq_trace_put_return); return ret; } @@ -885,7 +886,7 @@ int netfs_writeback_single(struct address_space *mapping, goto couldnt_start; } - trace_netfs_write(wreq, netfs_write_trace_writeback); + trace_netfs_write(wreq, netfs_write_trace_writeback_single); netfs_stat(&netfs_n_wh_writepages); if (__test_and_set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags)) @@ -914,8 +915,9 @@ int netfs_writeback_single(struct address_space *mapping, set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags); mutex_unlock(&ictx->wb_lock); + netfs_wake_write_collector(wreq); - netfs_put_request(wreq, false, netfs_rreq_trace_put_return); + netfs_put_request(wreq, netfs_rreq_trace_put_return); _leave(" = %d", ret); return ret; diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c index 9b1ca8b0f4dd6b..7408f6bb8e42e5 100644 --- a/fs/netfs/write_retry.c +++ b/fs/netfs/write_retry.c @@ -132,7 +132,7 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq, &stream->subrequests, rreq_link) { trace_netfs_sreq(subreq, netfs_sreq_trace_discard); list_del(&subreq->rreq_link); - netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done); + netfs_put_subrequest(subreq, netfs_sreq_trace_put_done); if (subreq == to) break; } diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index ecf774a8f1ca01..66093fa78aed7d 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -151,8 +151,7 @@ extern bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 eof, bool from_readdir); extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, unsigned int bytes_written); -void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result, - bool was_async); +void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result); extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, int); extern int cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags, diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index f55457b4b82e36..477792c07d458f 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -1725,7 +1725,7 @@ cifs_writev_callback(struct mid_q_entry *mid) server->credits, server->in_flight, 0, cifs_trace_rw_credits_write_response_clear); wdata->credits.value = 0; - cifs_write_subrequest_terminated(wdata, result, true); + cifs_write_subrequest_terminated(wdata, result); release_mid(mid); trace_smb3_rw_credits(credits.rreq_debug_id, credits.rreq_debug_index, 0, server->credits, server->in_flight, @@ -1813,7 +1813,7 @@ cifs_async_writev(struct cifs_io_subrequest *wdata) out: if (rc) { add_credits_and_wake_if(wdata->server, &wdata->credits, 0); - cifs_write_subrequest_terminated(wdata, rc, false); + cifs_write_subrequest_terminated(wdata, rc); } } diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 950aa4f912f5cd..3000c8a9d3ea54 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -130,7 +130,7 @@ static void cifs_issue_write(struct netfs_io_subrequest *subreq) else trace_netfs_sreq(subreq, netfs_sreq_trace_fail); add_credits_and_wake_if(wdata->server, &wdata->credits, 0); - cifs_write_subrequest_terminated(wdata, rc, false); + cifs_write_subrequest_terminated(wdata, rc); goto out; } @@ -2423,8 +2423,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock) return rc; } -void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result, - bool was_async) +void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result) { struct netfs_io_request *wreq = wdata->rreq; struct netfs_inode *ictx = netfs_inode(wreq->inode); @@ -2441,7 +2440,7 @@ void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t netfs_resize_file(ictx, wrend, true); } - netfs_write_subrequest_terminated(&wdata->subreq, result, was_async); + netfs_write_subrequest_terminated(&wdata->subreq, result); } struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 4e28632b5fd661..399185ca7cacb0 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -4888,7 +4888,7 @@ smb2_writev_callback(struct mid_q_entry *mid) 0, cifs_trace_rw_credits_write_response_clear); wdata->credits.value = 0; trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_progress); - cifs_write_subrequest_terminated(wdata, result ?: written, true); + cifs_write_subrequest_terminated(wdata, result ?: written); release_mid(mid); trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0, server->credits, server->in_flight, @@ -5061,7 +5061,7 @@ smb2_async_writev(struct cifs_io_subrequest *wdata) -(int)wdata->credits.value, cifs_trace_rw_credits_write_response_clear); add_credits_and_wake_if(wdata->server, &wdata->credits, 0); - cifs_write_subrequest_terminated(wdata, rc, true); + cifs_write_subrequest_terminated(wdata, rc); } } diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 9de27643607fb1..266e6c9e6f83ad 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -628,7 +628,7 @@ static inline void fscache_write_to_cache(struct fscache_cookie *cookie, term_func, term_func_priv, using_pgpriv2, caching); else if (term_func) - term_func(term_func_priv, -ENOBUFS, false); + term_func(term_func_priv, -ENOBUFS); } diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 497c4f4698f6ec..c3f230732f51df 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -51,8 +51,7 @@ enum netfs_io_source { NETFS_INVALID_WRITE, } __mode(byte); -typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error, - bool was_async); +typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error); /* * Per-inode context. This wraps the VFS inode. @@ -223,9 +222,10 @@ enum netfs_io_origin { */ struct netfs_io_request { union { - struct work_struct work; + struct work_struct cleanup_work; /* Deferred cleanup work */ struct rcu_head rcu; }; + struct work_struct work; /* Result collector work */ struct inode *inode; /* The file being accessed */ struct address_space *mapping; /* The mapping being accessed */ struct kiocb *iocb; /* AIO completion vector */ @@ -270,7 +270,7 @@ struct netfs_io_request { #define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */ #define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ #define NETFS_RREQ_FAILED 4 /* The request failed */ -#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */ +#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes (has ref) */ #define NETFS_RREQ_FOLIO_COPY_TO_CACHE 6 /* Copy current folio to cache from read */ #define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ #define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */ @@ -440,15 +440,14 @@ void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq); void netfs_get_subrequest(struct netfs_io_subrequest *subreq, enum netfs_sreq_ref_trace what); void netfs_put_subrequest(struct netfs_io_subrequest *subreq, - bool was_async, enum netfs_sreq_ref_trace what); + enum netfs_sreq_ref_trace what); ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len, struct iov_iter *new, iov_iter_extraction_t extraction_flags); size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset, size_t max_size, size_t max_segs); void netfs_prepare_write_failed(struct netfs_io_subrequest *subreq); -void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, - bool was_async); +void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error); void netfs_queue_write_request(struct netfs_io_subrequest *subreq); int netfs_start_io_read(struct inode *inode); diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index f880835f7695ed..402c5e82e7b8d9 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -30,6 +30,7 @@ EM(netfs_write_trace_dio_write, "DIO-WRITE") \ EM(netfs_write_trace_unbuffered_write, "UNB-WRITE") \ EM(netfs_write_trace_writeback, "WRITEBACK") \ + EM(netfs_write_trace_writeback_single, "WB-SINGLE") \ E_(netfs_write_trace_writethrough, "WRITETHRU") #define netfs_rreq_origins \ @@ -128,17 +129,15 @@ #define netfs_rreq_ref_traces \ EM(netfs_rreq_trace_get_for_outstanding,"GET OUTSTND") \ EM(netfs_rreq_trace_get_subreq, "GET SUBREQ ") \ - EM(netfs_rreq_trace_get_work, "GET WORK ") \ EM(netfs_rreq_trace_put_complete, "PUT COMPLT ") \ EM(netfs_rreq_trace_put_discard, "PUT DISCARD") \ EM(netfs_rreq_trace_put_failed, "PUT FAILED ") \ EM(netfs_rreq_trace_put_no_submit, "PUT NO-SUBM") \ EM(netfs_rreq_trace_put_return, "PUT RETURN ") \ EM(netfs_rreq_trace_put_subreq, "PUT SUBREQ ") \ - EM(netfs_rreq_trace_put_work, "PUT WORK ") \ - EM(netfs_rreq_trace_put_work_complete, "PUT WORK CP") \ - EM(netfs_rreq_trace_put_work_nq, "PUT WORK NQ") \ + EM(netfs_rreq_trace_put_work_ip, "PUT WORK IP ") \ EM(netfs_rreq_trace_see_work, "SEE WORK ") \ + EM(netfs_rreq_trace_see_work_complete, "SEE WORK CP") \ E_(netfs_rreq_trace_new, "NEW ") #define netfs_sreq_ref_traces \ diff --git a/net/9p/client.c b/net/9p/client.c index 61461b9fa13431..5c1ca57ccd2853 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1704,7 +1704,7 @@ p9_client_write_subreq(struct netfs_io_subrequest *subreq) start, len, &subreq->io_iter); } if (IS_ERR(req)) { - netfs_write_subrequest_terminated(subreq, PTR_ERR(req), false); + netfs_write_subrequest_terminated(subreq, PTR_ERR(req)); return; } @@ -1712,7 +1712,7 @@ p9_client_write_subreq(struct netfs_io_subrequest *subreq) if (err) { trace_9p_protocol_dump(clnt, &req->rc); p9_req_put(clnt, req); - netfs_write_subrequest_terminated(subreq, err, false); + netfs_write_subrequest_terminated(subreq, err); return; } @@ -1724,7 +1724,7 @@ p9_client_write_subreq(struct netfs_io_subrequest *subreq) p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", len); p9_req_put(clnt, req); - netfs_write_subrequest_terminated(subreq, written, false); + netfs_write_subrequest_terminated(subreq, written); } EXPORT_SYMBOL(p9_client_write_subreq); From 329ba1cb402ac328224965b8fc7a554a5150908e Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 19 May 2025 10:07:04 +0100 Subject: [PATCH 2514/2924] netfs: Fix wait/wake to be consistent about the waitqueue used [ Upstream commit 2b1424cd131cfaba4cf7040473133d26cddac088 ] Fix further inconsistencies in the use of waitqueues (clear_and_wake_up_bit() vs private waitqueue). Move some of this stuff from the read and write sides into common code so that it can be done in fewer places. To make this work, async I/O needs to set NETFS_RREQ_OFFLOAD_COLLECTION to indicate that a workqueue will do the collecting and places that call the wait function need to deal with it returning the amount transferred. Fixes: e2d46f2ec332 ("netfs: Change the read result collector to only use one work item") Signed-off-by: David Howells Link: https://lore.kernel.org/20250519090707.2848510-5-dhowells@redhat.com cc: Marc Dionne cc: Steve French cc: Ihor Solodrai cc: Eric Van Hensbergen cc: Latchesar Ionkov cc: Dominique Martinet cc: Christian Schoenebeck cc: Paulo Alcantara cc: Jeff Layton cc: v9fs@lists.linux.dev cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/netfs/buffered_read.c | 2 +- fs/netfs/buffered_write.c | 2 +- fs/netfs/direct_read.c | 4 +- fs/netfs/direct_write.c | 10 +- fs/netfs/internal.h | 33 ++++-- fs/netfs/misc.c | 218 ++++++++++++++++++++++++++++++++++++++ fs/netfs/read_collect.c | 139 +----------------------- fs/netfs/read_retry.c | 24 +---- fs/netfs/write_collect.c | 36 ++----- fs/netfs/write_issue.c | 28 +++-- fs/netfs/write_retry.c | 12 +-- 11 files changed, 284 insertions(+), 224 deletions(-) diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index cb6202efc4668f..fd4619275801be 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -312,7 +312,7 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq) if (unlikely(size > 0)) { smp_wmb(); /* Write lists before ALL_QUEUED. */ set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); - netfs_wake_read_collector(rreq); + netfs_wake_collector(rreq); } /* Defer error return as we may need to wait for outstanding I/O. */ diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index b4826360a41112..dbb544e183d13d 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -386,7 +386,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, wbc_detach_inode(&wbc); if (ret2 == -EIOCBQUEUED) return ret2; - if (ret == 0) + if (ret == 0 && ret2 < 0) ret = ret2; } diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c index cb3c6dc0b1654c..a24e63d2c8186a 100644 --- a/fs/netfs/direct_read.c +++ b/fs/netfs/direct_read.c @@ -103,7 +103,7 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq) rreq->netfs_ops->issue_read(subreq); if (test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) - netfs_wait_for_pause(rreq); + netfs_wait_for_paused_read(rreq); if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) break; if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) && @@ -115,7 +115,7 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq) if (unlikely(size > 0)) { smp_wmb(); /* Write lists before ALL_QUEUED. */ set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); - netfs_wake_read_collector(rreq); + netfs_wake_collector(rreq); } return ret; diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c index c98f1676f86dfc..fa9a5bf3c6d512 100644 --- a/fs/netfs/direct_write.c +++ b/fs/netfs/direct_write.c @@ -87,6 +87,8 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * } __set_bit(NETFS_RREQ_USE_IO_ITER, &wreq->flags); + if (async) + __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags); /* Copy the data into the bounce buffer and encrypt it. */ // TODO @@ -105,13 +107,9 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * if (!async) { trace_netfs_rreq(wreq, netfs_rreq_trace_wait_ip); - wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, - TASK_UNINTERRUPTIBLE); - ret = wreq->error; - if (ret == 0) { - ret = wreq->transferred; + ret = netfs_wait_for_write(wreq); + if (ret > 0) iocb->ki_pos += ret; - } } else { ret = -EIOCBQUEUED; } diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h index b6500a7cda81df..e2ee9183392b93 100644 --- a/fs/netfs/internal.h +++ b/fs/netfs/internal.h @@ -62,6 +62,14 @@ static inline void netfs_proc_del_rreq(struct netfs_io_request *rreq) {} struct folio_queue *netfs_buffer_make_space(struct netfs_io_request *rreq, enum netfs_folioq_trace trace); void netfs_reset_iter(struct netfs_io_subrequest *subreq); +void netfs_wake_collector(struct netfs_io_request *rreq); +void netfs_subreq_clear_in_progress(struct netfs_io_subrequest *subreq); +void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq, + struct netfs_io_stream *stream); +ssize_t netfs_wait_for_read(struct netfs_io_request *rreq); +ssize_t netfs_wait_for_write(struct netfs_io_request *rreq); +void netfs_wait_for_paused_read(struct netfs_io_request *rreq); +void netfs_wait_for_paused_write(struct netfs_io_request *rreq); /* * objects.c @@ -91,11 +99,9 @@ static inline void netfs_see_subrequest(struct netfs_io_subrequest *subreq, /* * read_collect.c */ +bool netfs_read_collection(struct netfs_io_request *rreq); void netfs_read_collection_worker(struct work_struct *work); -void netfs_wake_read_collector(struct netfs_io_request *rreq); void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error); -ssize_t netfs_wait_for_read(struct netfs_io_request *rreq); -void netfs_wait_for_pause(struct netfs_io_request *rreq); /* * read_pgpriv2.c @@ -175,8 +181,8 @@ static inline void netfs_stat_d(atomic_t *stat) * write_collect.c */ int netfs_folio_written_back(struct folio *folio); +bool netfs_write_collection(struct netfs_io_request *wreq); void netfs_write_collection_worker(struct work_struct *work); -void netfs_wake_write_collector(struct netfs_io_request *wreq); /* * write_issue.c @@ -197,8 +203,8 @@ struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, struct folio *folio, size_t copied, bool to_page_end, struct folio **writethrough_cache); -int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, - struct folio *writethrough_cache); +ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, + struct folio *writethrough_cache); int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len); /* @@ -253,6 +259,21 @@ static inline void netfs_put_group_many(struct netfs_group *netfs_group, int nr) netfs_group->free(netfs_group); } +/* + * Clear and wake up a NETFS_RREQ_* flag bit on a request. + */ +static inline void netfs_wake_rreq_flag(struct netfs_io_request *rreq, + unsigned int rreq_flag, + enum netfs_rreq_trace trace) +{ + if (test_bit(rreq_flag, &rreq->flags)) { + trace_netfs_rreq(rreq, trace); + clear_bit_unlock(rreq_flag, &rreq->flags); + smp_mb__after_atomic(); /* Set flag before task state */ + wake_up(&rreq->waitq); + } +} + /* * fscache-cache.c */ diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 7099aa07737ac0..77e7f7c79d27c2 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -313,3 +313,221 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp) return true; } EXPORT_SYMBOL(netfs_release_folio); + +/* + * Wake the collection work item. + */ +void netfs_wake_collector(struct netfs_io_request *rreq) +{ + if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags) && + !test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) { + queue_work(system_unbound_wq, &rreq->work); + } else { + trace_netfs_rreq(rreq, netfs_rreq_trace_wake_queue); + wake_up(&rreq->waitq); + } +} + +/* + * Mark a subrequest as no longer being in progress and, if need be, wake the + * collector. + */ +void netfs_subreq_clear_in_progress(struct netfs_io_subrequest *subreq) +{ + struct netfs_io_request *rreq = subreq->rreq; + struct netfs_io_stream *stream = &rreq->io_streams[subreq->stream_nr]; + + clear_bit_unlock(NETFS_SREQ_IN_PROGRESS, &subreq->flags); + smp_mb__after_atomic(); /* Clear IN_PROGRESS before task state */ + + /* If we are at the head of the queue, wake up the collector. */ + if (list_is_first(&subreq->rreq_link, &stream->subrequests) || + test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) + netfs_wake_collector(rreq); +} + +/* + * Wait for all outstanding I/O in a stream to quiesce. + */ +void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq, + struct netfs_io_stream *stream) +{ + struct netfs_io_subrequest *subreq; + DEFINE_WAIT(myself); + + list_for_each_entry(subreq, &stream->subrequests, rreq_link) { + if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags)) + continue; + + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); + for (;;) { + prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); + + if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags)) + break; + + trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for); + schedule(); + trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); + } + } + + finish_wait(&rreq->waitq, &myself); +} + +/* + * Perform collection in app thread if not offloaded to workqueue. + */ +static int netfs_collect_in_app(struct netfs_io_request *rreq, + bool (*collector)(struct netfs_io_request *rreq)) +{ + bool need_collect = false, inactive = true; + + for (int i = 0; i < NR_IO_STREAMS; i++) { + struct netfs_io_subrequest *subreq; + struct netfs_io_stream *stream = &rreq->io_streams[i]; + + if (!stream->active) + continue; + inactive = false; + trace_netfs_collect_stream(rreq, stream); + subreq = list_first_entry_or_null(&stream->subrequests, + struct netfs_io_subrequest, + rreq_link); + if (subreq && + (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) || + test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) { + need_collect = true; + break; + } + } + + if (!need_collect && !inactive) + return 0; /* Sleep */ + + __set_current_state(TASK_RUNNING); + if (collector(rreq)) { + /* Drop the ref from the NETFS_RREQ_IN_PROGRESS flag. */ + netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); + return 1; /* Done */ + } + + if (inactive) { + WARN(true, "Failed to collect inactive req R=%08x\n", + rreq->debug_id); + cond_resched(); + } + return 2; /* Again */ +} + +/* + * Wait for a request to complete, successfully or otherwise. + */ +static ssize_t netfs_wait_for_request(struct netfs_io_request *rreq, + bool (*collector)(struct netfs_io_request *rreq)) +{ + DEFINE_WAIT(myself); + ssize_t ret; + + for (;;) { + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); + prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); + + if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) { + switch (netfs_collect_in_app(rreq, collector)) { + case 0: + break; + case 1: + goto all_collected; + case 2: + continue; + } + } + + if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) + break; + + schedule(); + trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); + } + +all_collected: + finish_wait(&rreq->waitq, &myself); + + ret = rreq->error; + if (ret == 0) { + ret = rreq->transferred; + switch (rreq->origin) { + case NETFS_DIO_READ: + case NETFS_DIO_WRITE: + case NETFS_READ_SINGLE: + case NETFS_UNBUFFERED_WRITE: + break; + default: + if (rreq->submitted < rreq->len) { + trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read); + ret = -EIO; + } + break; + } + } + + return ret; +} + +ssize_t netfs_wait_for_read(struct netfs_io_request *rreq) +{ + return netfs_wait_for_request(rreq, netfs_read_collection); +} + +ssize_t netfs_wait_for_write(struct netfs_io_request *rreq) +{ + return netfs_wait_for_request(rreq, netfs_write_collection); +} + +/* + * Wait for a paused operation to unpause or complete in some manner. + */ +static void netfs_wait_for_pause(struct netfs_io_request *rreq, + bool (*collector)(struct netfs_io_request *rreq)) +{ + DEFINE_WAIT(myself); + + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause); + + for (;;) { + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); + prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); + + if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) { + switch (netfs_collect_in_app(rreq, collector)) { + case 0: + break; + case 1: + goto all_collected; + case 2: + continue; + } + } + + if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags) || + !test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) + break; + + schedule(); + trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); + } + +all_collected: + finish_wait(&rreq->waitq, &myself); +} + +void netfs_wait_for_paused_read(struct netfs_io_request *rreq) +{ + return netfs_wait_for_pause(rreq, netfs_read_collection); +} + +void netfs_wait_for_paused_write(struct netfs_io_request *rreq) +{ + return netfs_wait_for_pause(rreq, netfs_write_collection); +} diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index 1197ebce56757d..900dd51c3b941c 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c @@ -315,14 +315,8 @@ static void netfs_collect_read_results(struct netfs_io_request *rreq) if (notes & NEED_RETRY) goto need_retry; - if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) { - trace_netfs_rreq(rreq, netfs_rreq_trace_unpause); - clear_bit_unlock(NETFS_RREQ_PAUSE, &rreq->flags); - smp_mb__after_atomic(); /* Set PAUSE before task state */ - wake_up(&rreq->waitq); - } - if (notes & MADE_PROGRESS) { + netfs_wake_rreq_flag(rreq, NETFS_RREQ_PAUSE, netfs_rreq_trace_unpause); //cond_resched(); goto reassess; } @@ -399,7 +393,7 @@ static void netfs_rreq_assess_single(struct netfs_io_request *rreq) * Note that we're in normal kernel thread context at this point, possibly * running on a workqueue. */ -static bool netfs_read_collection(struct netfs_io_request *rreq) +bool netfs_read_collection(struct netfs_io_request *rreq) { struct netfs_io_stream *stream = &rreq->io_streams[0]; @@ -434,8 +428,7 @@ static bool netfs_read_collection(struct netfs_io_request *rreq) } task_io_account_read(rreq->transferred); - trace_netfs_rreq(rreq, netfs_rreq_trace_wake_ip); - clear_and_wake_up_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags); + netfs_wake_rreq_flag(rreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip); /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */ trace_netfs_rreq(rreq, netfs_rreq_trace_done); @@ -460,20 +453,6 @@ void netfs_read_collection_worker(struct work_struct *work) } } -/* - * Wake the collection work item. - */ -void netfs_wake_read_collector(struct netfs_io_request *rreq) -{ - if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags) && - !test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) { - queue_work(system_unbound_wq, &rreq->work); - } else { - trace_netfs_rreq(rreq, netfs_rreq_trace_wake_queue); - wake_up(&rreq->waitq); - } -} - /** * netfs_read_subreq_progress - Note progress of a read operation. * @subreq: The read request that has terminated. @@ -502,7 +481,7 @@ void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq) list_is_first(&subreq->rreq_link, &stream->subrequests) ) { __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); - netfs_wake_read_collector(rreq); + netfs_wake_collector(rreq); } } EXPORT_SYMBOL(netfs_read_subreq_progress); @@ -526,7 +505,6 @@ EXPORT_SYMBOL(netfs_read_subreq_progress); void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq) { struct netfs_io_request *rreq = subreq->rreq; - struct netfs_io_stream *stream = &rreq->io_streams[0]; switch (subreq->source) { case NETFS_READ_FROM_CACHE: @@ -573,15 +551,7 @@ void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq) } trace_netfs_sreq(subreq, netfs_sreq_trace_terminated); - - clear_bit_unlock(NETFS_SREQ_IN_PROGRESS, &subreq->flags); - smp_mb__after_atomic(); /* Clear IN_PROGRESS before task state */ - - /* If we are at the head of the queue, wake up the collector. */ - if (list_is_first(&subreq->rreq_link, &stream->subrequests) || - test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) - netfs_wake_read_collector(rreq); - + netfs_subreq_clear_in_progress(subreq); netfs_put_subrequest(subreq, netfs_sreq_trace_put_terminated); } EXPORT_SYMBOL(netfs_read_subreq_terminated); @@ -604,102 +574,3 @@ void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error) } netfs_read_subreq_terminated(subreq); } - -/* - * Wait for the read operation to complete, successfully or otherwise. - */ -ssize_t netfs_wait_for_read(struct netfs_io_request *rreq) -{ - struct netfs_io_subrequest *subreq; - struct netfs_io_stream *stream = &rreq->io_streams[0]; - DEFINE_WAIT(myself); - ssize_t ret; - - for (;;) { - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); - prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); - - subreq = list_first_entry_or_null(&stream->subrequests, - struct netfs_io_subrequest, rreq_link); - if (subreq && - (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) || - test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) { - __set_current_state(TASK_RUNNING); - if (netfs_read_collection(rreq)) { - /* Drop the ref from the NETFS_RREQ_IN_PROGRESS flag. */ - netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); - break; - } - continue; - } - - if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) - break; - - schedule(); - trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); - } - - finish_wait(&rreq->waitq, &myself); - - ret = rreq->error; - if (ret == 0) { - ret = rreq->transferred; - switch (rreq->origin) { - case NETFS_DIO_READ: - case NETFS_READ_SINGLE: - ret = rreq->transferred; - break; - default: - if (rreq->submitted < rreq->len) { - trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read); - ret = -EIO; - } - break; - } - } - - return ret; -} - -/* - * Wait for a paused read operation to unpause or complete in some manner. - */ -void netfs_wait_for_pause(struct netfs_io_request *rreq) -{ - struct netfs_io_subrequest *subreq; - struct netfs_io_stream *stream = &rreq->io_streams[0]; - DEFINE_WAIT(myself); - - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause); - - for (;;) { - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); - prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); - - if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) { - subreq = list_first_entry_or_null(&stream->subrequests, - struct netfs_io_subrequest, rreq_link); - if (subreq && - (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) || - test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) { - __set_current_state(TASK_RUNNING); - if (netfs_read_collection(rreq)) { - /* Drop the ref from the NETFS_RREQ_IN_PROGRESS flag. */ - netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); - break; - } - continue; - } - } - - if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags) || - !test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) - break; - - schedule(); - trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); - } - - finish_wait(&rreq->waitq, &myself); -} diff --git a/fs/netfs/read_retry.c b/fs/netfs/read_retry.c index 1378dc7fa2ccdb..b99e84a8170af2 100644 --- a/fs/netfs/read_retry.c +++ b/fs/netfs/read_retry.c @@ -257,35 +257,15 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) */ void netfs_retry_reads(struct netfs_io_request *rreq) { - struct netfs_io_subrequest *subreq; struct netfs_io_stream *stream = &rreq->io_streams[0]; - DEFINE_WAIT(myself); netfs_stat(&netfs_n_rh_retry_read_req); - set_bit(NETFS_RREQ_RETRYING, &rreq->flags); - /* Wait for all outstanding I/O to quiesce before performing retries as * we may need to renegotiate the I/O sizes. */ - list_for_each_entry(subreq, &stream->subrequests, rreq_link) { - if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags)) - continue; - - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); - for (;;) { - prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); - - if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags)) - break; - - trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for); - schedule(); - trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); - } - - finish_wait(&rreq->waitq, &myself); - } + set_bit(NETFS_RREQ_RETRYING, &rreq->flags); + netfs_wait_for_in_progress_stream(rreq, stream); clear_bit(NETFS_RREQ_RETRYING, &rreq->flags); trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit); diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c index 7241d1fd2c14af..0ce7b53e7fe83f 100644 --- a/fs/netfs/write_collect.c +++ b/fs/netfs/write_collect.c @@ -321,18 +321,14 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq) if (notes & NEED_RETRY) goto need_retry; - if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) { - trace_netfs_rreq(wreq, netfs_rreq_trace_unpause); - clear_bit_unlock(NETFS_RREQ_PAUSE, &wreq->flags); - smp_mb__after_atomic(); /* Set PAUSE before task state */ - wake_up(&wreq->waitq); - } - if (notes & NEED_REASSESS) { + if (notes & MADE_PROGRESS) { + netfs_wake_rreq_flag(wreq, NETFS_RREQ_PAUSE, netfs_rreq_trace_unpause); //cond_resched(); goto reassess_streams; } - if (notes & MADE_PROGRESS) { + + if (notes & NEED_REASSESS) { //cond_resched(); goto reassess_streams; } @@ -356,7 +352,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq) /* * Perform the collection of subrequests, folios and encryption buffers. */ -static bool netfs_write_collection(struct netfs_io_request *wreq) +bool netfs_write_collection(struct netfs_io_request *wreq) { struct netfs_inode *ictx = netfs_inode(wreq->inode); size_t transferred; @@ -417,8 +413,7 @@ static bool netfs_write_collection(struct netfs_io_request *wreq) inode_dio_end(wreq->inode); _debug("finished"); - trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip); - clear_and_wake_up_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags); + netfs_wake_rreq_flag(wreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip); /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */ if (wreq->iocb) { @@ -448,14 +443,6 @@ void netfs_write_collection_worker(struct work_struct *work) } } -/* - * Wake the collection work item. - */ -void netfs_wake_write_collector(struct netfs_io_request *wreq) -{ - queue_work(system_unbound_wq, &wreq->work); -} - /** * netfs_write_subrequest_terminated - Note the termination of a write operation. * @_op: The I/O request that has terminated. @@ -479,7 +466,6 @@ void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error) { struct netfs_io_subrequest *subreq = _op; struct netfs_io_request *wreq = subreq->rreq; - struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr]; _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error); @@ -531,15 +517,7 @@ void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error) } trace_netfs_sreq(subreq, netfs_sreq_trace_terminated); - - clear_and_wake_up_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); - - /* If we are at the head of the queue, wake up the collector, - * transferring a ref to it if we were the ones to do so. - */ - if (list_is_first(&subreq->rreq_link, &stream->subrequests)) - netfs_wake_write_collector(wreq); - + netfs_subreq_clear_in_progress(subreq); netfs_put_subrequest(subreq, netfs_sreq_trace_put_terminated); } EXPORT_SYMBOL(netfs_write_subrequest_terminated); diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 8744ed3faf29b6..50bee2c4130d1e 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -542,7 +542,7 @@ static void netfs_end_issue_write(struct netfs_io_request *wreq) } if (needs_poke) - netfs_wake_write_collector(wreq); + netfs_wake_collector(wreq); } /* @@ -576,6 +576,7 @@ int netfs_writepages(struct address_space *mapping, goto couldnt_start; } + __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags); trace_netfs_write(wreq, netfs_write_trace_writeback); netfs_stat(&netfs_n_wh_writepages); @@ -599,7 +600,7 @@ int netfs_writepages(struct address_space *mapping, netfs_end_issue_write(wreq); mutex_unlock(&ictx->wb_lock); - netfs_wake_write_collector(wreq); + netfs_wake_collector(wreq); netfs_put_request(wreq, netfs_rreq_trace_put_return); _leave(" = %d", error); @@ -674,11 +675,11 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c /* * End a write operation used when writing through the pagecache. */ -int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, - struct folio *writethrough_cache) +ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, + struct folio *writethrough_cache) { struct netfs_inode *ictx = netfs_inode(wreq->inode); - int ret; + ssize_t ret; _enter("R=%x", wreq->debug_id); @@ -689,12 +690,10 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr mutex_unlock(&ictx->wb_lock); - if (wreq->iocb) { + if (wreq->iocb) ret = -EIOCBQUEUED; - } else { - wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE); - ret = wreq->error; - } + else + ret = netfs_wait_for_write(wreq); netfs_put_request(wreq, netfs_rreq_trace_put_return); return ret; } @@ -723,10 +722,8 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t start += part; len -= part; rolling_buffer_advance(&wreq->buffer, part); - if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) { - trace_netfs_rreq(wreq, netfs_rreq_trace_wait_pause); - wait_event(wreq->waitq, !test_bit(NETFS_RREQ_PAUSE, &wreq->flags)); - } + if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) + netfs_wait_for_paused_write(wreq); if (test_bit(NETFS_RREQ_FAILED, &wreq->flags)) break; } @@ -886,6 +883,7 @@ int netfs_writeback_single(struct address_space *mapping, goto couldnt_start; } + __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags); trace_netfs_write(wreq, netfs_write_trace_writeback_single); netfs_stat(&netfs_n_wh_writepages); @@ -915,7 +913,7 @@ int netfs_writeback_single(struct address_space *mapping, set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags); mutex_unlock(&ictx->wb_lock); - netfs_wake_write_collector(wreq); + netfs_wake_collector(wreq); netfs_put_request(wreq, netfs_rreq_trace_put_return); _leave(" = %d", ret); diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c index 7408f6bb8e42e5..9d1d8a8bab7261 100644 --- a/fs/netfs/write_retry.c +++ b/fs/netfs/write_retry.c @@ -200,7 +200,6 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq, */ void netfs_retry_writes(struct netfs_io_request *wreq) { - struct netfs_io_subrequest *subreq; struct netfs_io_stream *stream; int s; @@ -209,16 +208,13 @@ void netfs_retry_writes(struct netfs_io_request *wreq) /* Wait for all outstanding I/O to quiesce before performing retries as * we may need to renegotiate the I/O sizes. */ + set_bit(NETFS_RREQ_RETRYING, &wreq->flags); for (s = 0; s < NR_IO_STREAMS; s++) { stream = &wreq->io_streams[s]; - if (!stream->active) - continue; - - list_for_each_entry(subreq, &stream->subrequests, rreq_link) { - wait_on_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS, - TASK_UNINTERRUPTIBLE); - } + if (stream->active) + netfs_wait_for_in_progress_stream(wreq, stream); } + clear_bit(NETFS_RREQ_RETRYING, &wreq->flags); // TODO: Enc: Fetch changed partial pages // TODO: Enc: Reencrypt content if needed. From df7747431661c96c9c785d0fb1a9a26c3142af38 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 21 Apr 2025 17:00:33 +0200 Subject: [PATCH 2515/2924] mfd: exynos-lpass: Fix an error handling path in exynos_lpass_probe() [ Upstream commit 484f0f59f09edd1f6fa63703c12eb30d72a519ac ] If an error occurs after a successful regmap_init_mmio(), regmap_exit() should be called as already done in the .remove() function. Switch to devm_regmap_init_mmio() to avoid the leak and simplify the .remove() function. Fixes: c414df12bdf7 ("mfd: exynos-lpass: Add missing remove() function") Signed-off-by: Christophe JAILLET Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/38414eecb1096840946756ae86887aea2a489c1b.1745247209.git.christophe.jaillet@wanadoo.fr Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/exynos-lpass.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/mfd/exynos-lpass.c b/drivers/mfd/exynos-lpass.c index 6a585173230b13..6b95927e99be71 100644 --- a/drivers/mfd/exynos-lpass.c +++ b/drivers/mfd/exynos-lpass.c @@ -122,8 +122,8 @@ static int exynos_lpass_probe(struct platform_device *pdev) if (IS_ERR(lpass->sfr0_clk)) return PTR_ERR(lpass->sfr0_clk); - lpass->top = regmap_init_mmio(dev, base_top, - &exynos_lpass_reg_conf); + lpass->top = devm_regmap_init_mmio(dev, base_top, + &exynos_lpass_reg_conf); if (IS_ERR(lpass->top)) { dev_err(dev, "LPASS top regmap initialization failed\n"); return PTR_ERR(lpass->top); @@ -145,7 +145,6 @@ static void exynos_lpass_remove(struct platform_device *pdev) pm_runtime_disable(&pdev->dev); if (!pm_runtime_status_suspended(&pdev->dev)) exynos_lpass_disable(lpass); - regmap_exit(lpass->top); } static int __maybe_unused exynos_lpass_suspend(struct device *dev) From 3f33b744f471f37e8f73508c519eb3917a60a8f2 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 21 Apr 2025 17:00:34 +0200 Subject: [PATCH 2516/2924] mfd: exynos-lpass: Avoid calling exynos_lpass_disable() twice in exynos_lpass_remove() [ Upstream commit b70b84556eeca5262d290e8619fe0af5b7664a52 ] exynos_lpass_disable() is called twice in the remove function. Remove one of these calls. Fixes: 90f447170c6f ("mfd: exynos-lpass: Add runtime PM support") Signed-off-by: Christophe JAILLET Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/74d69e8de10308c9855db6d54155a3de4b11abfd.1745247209.git.christophe.jaillet@wanadoo.fr Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/exynos-lpass.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/mfd/exynos-lpass.c b/drivers/mfd/exynos-lpass.c index 6b95927e99be71..a2785ceea8bfcd 100644 --- a/drivers/mfd/exynos-lpass.c +++ b/drivers/mfd/exynos-lpass.c @@ -141,7 +141,6 @@ static void exynos_lpass_remove(struct platform_device *pdev) { struct exynos_lpass *lpass = platform_get_drvdata(pdev); - exynos_lpass_disable(lpass); pm_runtime_disable(&pdev->dev); if (!pm_runtime_status_suspended(&pdev->dev)) exynos_lpass_disable(lpass); From a430cc3d229b60bb66892708fcb5bca4b6e4399a Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 21 Apr 2025 17:00:35 +0200 Subject: [PATCH 2517/2924] mfd: exynos-lpass: Fix another error handling path in exynos_lpass_probe() [ Upstream commit f41cc37f4bc0e8cd424697bf6e26586cadcf4b9b ] If devm_of_platform_populate() fails, some clean-up needs to be done, as already done in the remove function. Add a new devm_add_action_or_reset() to fix the leak in the probe and remove the need of a remove function. Fixes: c695abab2429 ("mfd: Add Samsung Exynos Low Power Audio Subsystem driver") Signed-off-by: Christophe JAILLET Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/69471e839efc0249a504492a8de3497fcdb6a009.1745247209.git.christophe.jaillet@wanadoo.fr Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/exynos-lpass.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/mfd/exynos-lpass.c b/drivers/mfd/exynos-lpass.c index a2785ceea8bfcd..44797001a4322b 100644 --- a/drivers/mfd/exynos-lpass.c +++ b/drivers/mfd/exynos-lpass.c @@ -104,11 +104,22 @@ static const struct regmap_config exynos_lpass_reg_conf = { .fast_io = true, }; +static void exynos_lpass_disable_lpass(void *data) +{ + struct platform_device *pdev = data; + struct exynos_lpass *lpass = platform_get_drvdata(pdev); + + pm_runtime_disable(&pdev->dev); + if (!pm_runtime_status_suspended(&pdev->dev)) + exynos_lpass_disable(lpass); +} + static int exynos_lpass_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct exynos_lpass *lpass; void __iomem *base_top; + int ret; lpass = devm_kzalloc(dev, sizeof(*lpass), GFP_KERNEL); if (!lpass) @@ -134,16 +145,11 @@ static int exynos_lpass_probe(struct platform_device *pdev) pm_runtime_enable(dev); exynos_lpass_enable(lpass); - return devm_of_platform_populate(dev); -} - -static void exynos_lpass_remove(struct platform_device *pdev) -{ - struct exynos_lpass *lpass = platform_get_drvdata(pdev); + ret = devm_add_action_or_reset(dev, exynos_lpass_disable_lpass, pdev); + if (ret) + return ret; - pm_runtime_disable(&pdev->dev); - if (!pm_runtime_status_suspended(&pdev->dev)) - exynos_lpass_disable(lpass); + return devm_of_platform_populate(dev); } static int __maybe_unused exynos_lpass_suspend(struct device *dev) @@ -183,7 +189,6 @@ static struct platform_driver exynos_lpass_driver = { .of_match_table = exynos_lpass_of_match, }, .probe = exynos_lpass_probe, - .remove = exynos_lpass_remove, }; module_platform_driver(exynos_lpass_driver); From 7a506ec1caa6a992b84f5bebedf32ecbd739989a Mon Sep 17 00:00:00 2001 From: Alexey Gladkov Date: Sat, 26 Apr 2025 18:16:32 +0200 Subject: [PATCH 2518/2924] mfd: stmpe-spi: Correct the name used in MODULE_DEVICE_TABLE [ Upstream commit 59d60c16ed41475f3b5f7b605e75fbf8e3628720 ] The name used in the macro does not exist. drivers/mfd/stmpe-spi.c:132:26: error: use of undeclared identifier 'stmpe_id' 132 | MODULE_DEVICE_TABLE(spi, stmpe_id); Fixes: e789995d5c61 ("mfd: Add support for STMPE SPI interface") Signed-off-by: Alexey Gladkov Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/79d5a847303e45a46098f2d827d3d8a249a32be3.1745591072.git.legion@kernel.org Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/stmpe-spi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/stmpe-spi.c b/drivers/mfd/stmpe-spi.c index 792236f56399af..b9cc85ea2c4019 100644 --- a/drivers/mfd/stmpe-spi.c +++ b/drivers/mfd/stmpe-spi.c @@ -129,7 +129,7 @@ static const struct spi_device_id stmpe_spi_id[] = { { "stmpe2403", STMPE2403 }, { } }; -MODULE_DEVICE_TABLE(spi, stmpe_id); +MODULE_DEVICE_TABLE(spi, stmpe_spi_id); static struct spi_driver stmpe_spi_driver = { .driver = { From b5dbb53c888e5d5099c1a7038aaa2e6c765a9ac6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 May 2025 08:57:52 +0100 Subject: [PATCH 2519/2924] netfs: Fix undifferentiation of DIO reads from unbuffered reads [ Upstream commit db26d62d79e4068934ad0dccdb92715df36352b9 ] On cifs, "DIO reads" (specified by O_DIRECT) need to be differentiated from "unbuffered reads" (specified by cache=none in the mount parameters). The difference is flagged in the protocol and the server may behave differently: Windows Server will, for example, mandate that DIO reads are block aligned. Fix this by adding a NETFS_UNBUFFERED_READ to differentiate this from NETFS_DIO_READ, parallelling the write differentiation that already exists. cifs will then do the right thing. Fixes: 016dc8516aec ("netfs: Implement unbuffered/DIO read support") Signed-off-by: David Howells Link: https://lore.kernel.org/3444961.1747987072@warthog.procyon.org.uk Reviewed-by: "Paulo Alcantara (Red Hat)" Reviewed-by: Viacheslav Dubeyko cc: Steve French cc: netfs@lists.linux.dev cc: v9fs@lists.linux.dev cc: linux-afs@lists.infradead.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: linux-nfs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/9p/vfs_addr.c | 3 ++- fs/afs/write.c | 1 + fs/ceph/addr.c | 4 +++- fs/netfs/direct_read.c | 3 ++- fs/netfs/main.c | 1 + fs/netfs/misc.c | 1 + fs/netfs/objects.c | 1 + fs/netfs/read_collect.c | 7 +++++-- fs/nfs/fscache.c | 1 + fs/smb/client/file.c | 3 ++- include/linux/netfs.h | 1 + include/trace/events/netfs.h | 1 + 12 files changed, 21 insertions(+), 6 deletions(-) diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index b5a4a28e0fe79b..e4420591cf3543 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -77,7 +77,8 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq) /* if we just extended the file size, any portion not in * cache won't be on server and is zeroes */ - if (subreq->rreq->origin != NETFS_DIO_READ) + if (subreq->rreq->origin != NETFS_UNBUFFERED_READ && + subreq->rreq->origin != NETFS_DIO_READ) __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); if (pos + total >= i_size_read(rreq->inode)) __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags); diff --git a/fs/afs/write.c b/fs/afs/write.c index 7df7b2f5e7b298..2e7526ea883ae2 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -202,6 +202,7 @@ void afs_retry_request(struct netfs_io_request *wreq, struct netfs_io_stream *st case NETFS_READ_GAPS: case NETFS_READ_SINGLE: case NETFS_READ_FOR_WRITE: + case NETFS_UNBUFFERED_READ: case NETFS_DIO_READ: return; default: diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 557c326561fdc0..b95c4cb21c13f0 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -238,6 +238,7 @@ static void finish_netfs_read(struct ceph_osd_request *req) if (sparse && err > 0) err = ceph_sparse_ext_map_end(op); if (err < subreq->len && + subreq->rreq->origin != NETFS_UNBUFFERED_READ && subreq->rreq->origin != NETFS_DIO_READ) __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); if (IS_ENCRYPTED(inode) && err > 0) { @@ -281,7 +282,8 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq) size_t len; int mode; - if (rreq->origin != NETFS_DIO_READ) + if (rreq->origin != NETFS_UNBUFFERED_READ && + rreq->origin != NETFS_DIO_READ) __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c index a24e63d2c8186a..9902766195d7b2 100644 --- a/fs/netfs/direct_read.c +++ b/fs/netfs/direct_read.c @@ -188,7 +188,8 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i rreq = netfs_alloc_request(iocb->ki_filp->f_mapping, iocb->ki_filp, iocb->ki_pos, orig_count, - NETFS_DIO_READ); + iocb->ki_flags & IOCB_DIRECT ? + NETFS_DIO_READ : NETFS_UNBUFFERED_READ); if (IS_ERR(rreq)) return PTR_ERR(rreq); diff --git a/fs/netfs/main.c b/fs/netfs/main.c index 70ecc8f5f21034..3db401d269e7b3 100644 --- a/fs/netfs/main.c +++ b/fs/netfs/main.c @@ -39,6 +39,7 @@ static const char *netfs_origins[nr__netfs_io_origin] = { [NETFS_READ_GAPS] = "RG", [NETFS_READ_SINGLE] = "R1", [NETFS_READ_FOR_WRITE] = "RW", + [NETFS_UNBUFFERED_READ] = "UR", [NETFS_DIO_READ] = "DR", [NETFS_WRITEBACK] = "WB", [NETFS_WRITEBACK_SINGLE] = "W1", diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 77e7f7c79d27c2..43b67a28a8fa07 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -461,6 +461,7 @@ static ssize_t netfs_wait_for_request(struct netfs_io_request *rreq, case NETFS_DIO_READ: case NETFS_DIO_WRITE: case NETFS_READ_SINGLE: + case NETFS_UNBUFFERED_READ: case NETFS_UNBUFFERED_WRITE: break; default: diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c index d3eb9ba3013a7d..31fa0c81e2a43e 100644 --- a/fs/netfs/objects.c +++ b/fs/netfs/objects.c @@ -59,6 +59,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, origin == NETFS_READ_GAPS || origin == NETFS_READ_SINGLE || origin == NETFS_READ_FOR_WRITE || + origin == NETFS_UNBUFFERED_READ || origin == NETFS_DIO_READ) { INIT_WORK(&rreq->work, netfs_read_collection_worker); rreq->io_streams[0].avail = true; diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index 900dd51c3b941c..bad677e58a4237 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c @@ -342,7 +342,8 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq) { unsigned int i; - if (rreq->origin == NETFS_DIO_READ) { + if (rreq->origin == NETFS_UNBUFFERED_READ || + rreq->origin == NETFS_DIO_READ) { for (i = 0; i < rreq->direct_bv_count; i++) { flush_dcache_page(rreq->direct_bv[i].bv_page); // TODO: cifs marks pages in the destination buffer @@ -360,7 +361,8 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq) } if (rreq->netfs_ops->done) rreq->netfs_ops->done(rreq); - if (rreq->origin == NETFS_DIO_READ) + if (rreq->origin == NETFS_UNBUFFERED_READ || + rreq->origin == NETFS_DIO_READ) inode_dio_end(rreq->inode); } @@ -416,6 +418,7 @@ bool netfs_read_collection(struct netfs_io_request *rreq) //netfs_rreq_is_still_valid(rreq); switch (rreq->origin) { + case NETFS_UNBUFFERED_READ: case NETFS_DIO_READ: case NETFS_READ_GAPS: netfs_rreq_assess_dio(rreq); diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index e278a1ad1ca3e8..8b07851787312b 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -367,6 +367,7 @@ void nfs_netfs_read_completion(struct nfs_pgio_header *hdr) sreq = netfs->sreq; if (test_bit(NFS_IOHDR_EOF, &hdr->flags) && + sreq->rreq->origin != NETFS_UNBUFFERED_READ && sreq->rreq->origin != NETFS_DIO_READ) __set_bit(NETFS_SREQ_CLEAR_TAIL, &sreq->flags); diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 3000c8a9d3ea54..d2df10b8e6fd88 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -219,7 +219,8 @@ static void cifs_issue_read(struct netfs_io_subrequest *subreq) goto failed; } - if (subreq->rreq->origin != NETFS_DIO_READ) + if (subreq->rreq->origin != NETFS_UNBUFFERED_READ && + subreq->rreq->origin != NETFS_DIO_READ) __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); trace_netfs_sreq(subreq, netfs_sreq_trace_submit); diff --git a/include/linux/netfs.h b/include/linux/netfs.h index c3f230732f51df..1464b3a104989d 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -206,6 +206,7 @@ enum netfs_io_origin { NETFS_READ_GAPS, /* This read is a synchronous read to fill gaps */ NETFS_READ_SINGLE, /* This read should be treated as a single object */ NETFS_READ_FOR_WRITE, /* This read is to prepare a write */ + NETFS_UNBUFFERED_READ, /* This is an unbuffered read */ NETFS_DIO_READ, /* This is a direct I/O read */ NETFS_WRITEBACK, /* This write was triggered by writepages */ NETFS_WRITEBACK_SINGLE, /* This monolithic write was triggered by writepages */ diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 402c5e82e7b8d9..4175eec40048ad 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -39,6 +39,7 @@ EM(NETFS_READ_GAPS, "RG") \ EM(NETFS_READ_SINGLE, "R1") \ EM(NETFS_READ_FOR_WRITE, "RW") \ + EM(NETFS_UNBUFFERED_READ, "UR") \ EM(NETFS_DIO_READ, "DR") \ EM(NETFS_WRITEBACK, "WB") \ EM(NETFS_WRITEBACK_SINGLE, "W1") \ From 7dd8665953a40ad502c17db912b4f0f81cdfbe56 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 31 Mar 2025 18:27:59 +0100 Subject: [PATCH 2520/2924] perf tests switch-tracking: Fix timestamp comparison [ Upstream commit 628e124404b3db5e10e17228e680a2999018ab33 ] The test might fail on the Arm64 platform with the error: # perf test -vvv "Track with sched_switch" Missing sched_switch events # The issue is caused by incorrect handling of timestamp comparisons. The comparison result, a signed 64-bit value, was being directly cast to an int, leading to incorrect sorting for sched events. The case does not fail everytime, usually I can trigger the failure after run 20 ~ 30 times: # while true; do perf test "Track with sched_switch"; done 106: Track with sched_switch : Ok 106: Track with sched_switch : Ok 106: Track with sched_switch : Ok 106: Track with sched_switch : Ok 106: Track with sched_switch : Ok 106: Track with sched_switch : Ok 106: Track with sched_switch : Ok 106: Track with sched_switch : Ok 106: Track with sched_switch : Ok 106: Track with sched_switch : Ok 106: Track with sched_switch : Ok 106: Track with sched_switch : Ok 106: Track with sched_switch : Ok 106: Track with sched_switch : Ok 106: Track with sched_switch : FAILED! 106: Track with sched_switch : Ok 106: Track with sched_switch : Ok 106: Track with sched_switch : Ok 106: Track with sched_switch : Ok 106: Track with sched_switch : Ok 106: Track with sched_switch : Ok 106: Track with sched_switch : Ok 106: Track with sched_switch : Ok 106: Track with sched_switch : FAILED! 106: Track with sched_switch : Ok 106: Track with sched_switch : Ok I used cross compiler to build Perf tool on my host machine and tested on Debian / Juno board. Generally, I think this issue is not very specific to GCC versions. As both internal CI and my local env can reproduce the issue. My Host Build compiler: # aarch64-linux-gnu-gcc --version aarch64-linux-gnu-gcc (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0 Juno Board: # lsb_release -a No LSB modules are available. Distributor ID: Debian Description: Debian GNU/Linux 12 (bookworm) Release: 12 Codename: bookworm Fix this by explicitly returning 0, 1, or -1 based on whether the result is zero, positive, or negative. Fixes: d44bc558297222d9 ("perf tests: Add a test for tracking with sched_switch") Reviewed-by: Ian Rogers Signed-off-by: Leo Yan Cc: Adrian Hunter Cc: James Clark Cc: Kan Liang Cc: Namhyung Kim Link: https://lore.kernel.org/r/20250331172759.115604-1-leo.yan@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/tests/switch-tracking.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 8df3f9d9ffd2b2..6b3aac283c371c 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -264,7 +264,7 @@ static int compar(const void *a, const void *b) const struct event_node *nodeb = b; s64 cmp = nodea->event_time - nodeb->event_time; - return cmp; + return cmp < 0 ? -1 : (cmp > 0 ? 1 : 0); } static int process_events(struct evlist *evlist, From fda73c907cd5a26846ebc3e4201ba376b632fcc4 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Fri, 11 Apr 2025 15:57:47 +0800 Subject: [PATCH 2521/2924] mailbox: mchp-ipc-sbi: Fix COMPILE_TEST build error [ Upstream commit d635ba4207c31940398c41caa0cedd80f3b9c9c7 ] If COMPILE_TEST is y but RISCV_SBI is n, build fails: drivers/mailbox/mailbox-mchp-ipc-sbi.c: In function 'mchp_ipc_sbi_chan_send': drivers/mailbox/mailbox-mchp-ipc-sbi.c:119:23: error: storage size of 'ret' isn't known struct sbiret ret; ^~~ CC drivers/nvmem/lpc18xx_otp.o drivers/mailbox/mailbox-mchp-ipc-sbi.c:121:15: error: implicit declaration of function 'sbi_ecall' [-Werror=implicit-function-declaration] ret = sbi_ecall(SBI_EXT_MICROCHIP_TECHNOLOGY, command, channel, ^~~~~~~~~ move COMPILE_TEST to ARCH_MICROCHIP dependency as other drivers. Fixes: e4b1d67e7141 ("mailbox: add Microchip IPC support") Signed-off-by: Yue Haibing Signed-off-by: Jassi Brar Signed-off-by: Sasha Levin --- drivers/mailbox/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig index ed52db272f4d05..e8445cda7c6182 100644 --- a/drivers/mailbox/Kconfig +++ b/drivers/mailbox/Kconfig @@ -191,8 +191,8 @@ config POLARFIRE_SOC_MAILBOX config MCHP_SBI_IPC_MBOX tristate "Microchip Inter-processor Communication (IPC) SBI driver" - depends on RISCV_SBI || COMPILE_TEST - depends on ARCH_MICROCHIP + depends on RISCV_SBI + depends on ARCH_MICROCHIP || COMPILE_TEST help Mailbox implementation for Microchip devices with an Inter-process communication (IPC) controller. From 1dcaf07ba68a80b12d9b2ae464c3e65112d54fef Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Sun, 25 May 2025 16:47:24 +0800 Subject: [PATCH 2522/2924] mailbox: imx: Fix TXDB_V2 sending [ Upstream commit f5cb07ec6aabd1bb56adbdeb5f0d70cb524db2cd ] i.MX95 features several processing domains, Cortex-M7, Cortex-A55 secure, Cortex-A55 non-secure. Each domain could communicate with SCMI firmware with a dedicated MU. But the current NXP SCMI firmware is not a RTOS, all processing logic codes are in interrupt context. So if high priority Cortex-M7 is communicating with SCMI firmware and requires a bit more time to handle the SCMI call, Linux MU TXDB_V2 will be timeout with high possiblity in 1000us(the current value in imx-mailbox.c). Per NXP SCMI firmware design, if timeout, there is no recover logic, so SCMI agents should never timeout and always wait until the check condition met. Based on the upper reason, enlarge the timeout value to 10ms which is less chance to timeout, and retry if timeout really happends. Fixes: 5bfe4067d350 ("mailbox: imx: support channel type tx doorbell v2") Signed-off-by: Peng Fan Signed-off-by: Jassi Brar Signed-off-by: Sasha Levin --- drivers/mailbox/imx-mailbox.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/mailbox/imx-mailbox.c b/drivers/mailbox/imx-mailbox.c index 6ef8338add0d61..6778afc64a048c 100644 --- a/drivers/mailbox/imx-mailbox.c +++ b/drivers/mailbox/imx-mailbox.c @@ -226,7 +226,7 @@ static int imx_mu_generic_tx(struct imx_mu_priv *priv, { u32 *arg = data; u32 val; - int ret; + int ret, count; switch (cp->type) { case IMX_MU_TYPE_TX: @@ -240,11 +240,20 @@ static int imx_mu_generic_tx(struct imx_mu_priv *priv, case IMX_MU_TYPE_TXDB_V2: imx_mu_write(priv, IMX_MU_xCR_GIRn(priv->dcfg->type, cp->idx), priv->dcfg->xCR[IMX_MU_GCR]); - ret = readl_poll_timeout(priv->base + priv->dcfg->xCR[IMX_MU_GCR], val, - !(val & IMX_MU_xCR_GIRn(priv->dcfg->type, cp->idx)), - 0, 1000); - if (ret) - dev_warn_ratelimited(priv->dev, "channel type: %d failure\n", cp->type); + ret = -ETIMEDOUT; + count = 0; + while (ret && (count < 10)) { + ret = + readl_poll_timeout(priv->base + priv->dcfg->xCR[IMX_MU_GCR], val, + !(val & IMX_MU_xCR_GIRn(priv->dcfg->type, cp->idx)), + 0, 10000); + + if (ret) { + dev_warn_ratelimited(priv->dev, + "channel type: %d timeout, %d times, retry\n", + cp->type, ++count); + } + } break; default: dev_warn_ratelimited(priv->dev, "Send data on wrong channel type: %d\n", cp->type); From 5169ac06ba136603a603ac0d850a3fa2e44f784d Mon Sep 17 00:00:00 2001 From: Jason-JH Lin Date: Mon, 21 Apr 2025 11:55:47 +0800 Subject: [PATCH 2523/2924] mailbox: mtk-cmdq: Refine GCE_GCTL_VALUE setting [ Upstream commit 9fcebcb37c3e0a4b6eb40768cc5a5faebf166fbe ] Add cmdq_gctl_value_toggle() to configure GCE_CTRL_BY_SW and GCE_DDR_EN together in the same GCE_GCTL_VALUE register. For the SoCs whose GCE is located in MMINFRA and uses MMINFRA_AO power, this allows it to be written without enabling the clocks. Otherwise, all GCE registers should be written after the GCE clocks are enabled. Move this function into cmdq_runtime_resume() and cmdq_runtime_suspend() to ensure it is called when the GCE clock is enabled. Fixes: 7abd037aa581 ("mailbox: mtk-cmdq: add gce ddr enable support flow") Signed-off-by: Jason-JH Lin Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Jassi Brar Signed-off-by: Sasha Levin --- drivers/mailbox/mtk-cmdq-mailbox.c | 51 +++++++++++++----------------- 1 file changed, 22 insertions(+), 29 deletions(-) diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c index d186865b8dce64..ab4e8d1954a16e 100644 --- a/drivers/mailbox/mtk-cmdq-mailbox.c +++ b/drivers/mailbox/mtk-cmdq-mailbox.c @@ -92,18 +92,6 @@ struct gce_plat { u32 gce_num; }; -static void cmdq_sw_ddr_enable(struct cmdq *cmdq, bool enable) -{ - WARN_ON(clk_bulk_enable(cmdq->pdata->gce_num, cmdq->clocks)); - - if (enable) - writel(GCE_DDR_EN | GCE_CTRL_BY_SW, cmdq->base + GCE_GCTL_VALUE); - else - writel(GCE_CTRL_BY_SW, cmdq->base + GCE_GCTL_VALUE); - - clk_bulk_disable(cmdq->pdata->gce_num, cmdq->clocks); -} - u8 cmdq_get_shift_pa(struct mbox_chan *chan) { struct cmdq *cmdq = container_of(chan->mbox, struct cmdq, mbox); @@ -112,6 +100,19 @@ u8 cmdq_get_shift_pa(struct mbox_chan *chan) } EXPORT_SYMBOL(cmdq_get_shift_pa); +static void cmdq_gctl_value_toggle(struct cmdq *cmdq, bool ddr_enable) +{ + u32 val = cmdq->pdata->control_by_sw ? GCE_CTRL_BY_SW : 0; + + if (!cmdq->pdata->control_by_sw && !cmdq->pdata->sw_ddr_en) + return; + + if (cmdq->pdata->sw_ddr_en && ddr_enable) + val |= GCE_DDR_EN; + + writel(val, cmdq->base + GCE_GCTL_VALUE); +} + static int cmdq_thread_suspend(struct cmdq *cmdq, struct cmdq_thread *thread) { u32 status; @@ -140,16 +141,10 @@ static void cmdq_thread_resume(struct cmdq_thread *thread) static void cmdq_init(struct cmdq *cmdq) { int i; - u32 gctl_regval = 0; WARN_ON(clk_bulk_enable(cmdq->pdata->gce_num, cmdq->clocks)); - if (cmdq->pdata->control_by_sw) - gctl_regval = GCE_CTRL_BY_SW; - if (cmdq->pdata->sw_ddr_en) - gctl_regval |= GCE_DDR_EN; - if (gctl_regval) - writel(gctl_regval, cmdq->base + GCE_GCTL_VALUE); + cmdq_gctl_value_toggle(cmdq, true); writel(CMDQ_THR_ACTIVE_SLOT_CYCLES, cmdq->base + CMDQ_THR_SLOT_CYCLES); for (i = 0; i <= CMDQ_MAX_EVENT; i++) @@ -315,14 +310,21 @@ static irqreturn_t cmdq_irq_handler(int irq, void *dev) static int cmdq_runtime_resume(struct device *dev) { struct cmdq *cmdq = dev_get_drvdata(dev); + int ret; - return clk_bulk_enable(cmdq->pdata->gce_num, cmdq->clocks); + ret = clk_bulk_enable(cmdq->pdata->gce_num, cmdq->clocks); + if (ret) + return ret; + + cmdq_gctl_value_toggle(cmdq, true); + return 0; } static int cmdq_runtime_suspend(struct device *dev) { struct cmdq *cmdq = dev_get_drvdata(dev); + cmdq_gctl_value_toggle(cmdq, false); clk_bulk_disable(cmdq->pdata->gce_num, cmdq->clocks); return 0; } @@ -347,9 +349,6 @@ static int cmdq_suspend(struct device *dev) if (task_running) dev_warn(dev, "exist running task(s) in suspend\n"); - if (cmdq->pdata->sw_ddr_en) - cmdq_sw_ddr_enable(cmdq, false); - return pm_runtime_force_suspend(dev); } @@ -360,9 +359,6 @@ static int cmdq_resume(struct device *dev) WARN_ON(pm_runtime_force_resume(dev)); cmdq->suspended = false; - if (cmdq->pdata->sw_ddr_en) - cmdq_sw_ddr_enable(cmdq, true); - return 0; } @@ -370,9 +366,6 @@ static void cmdq_remove(struct platform_device *pdev) { struct cmdq *cmdq = platform_get_drvdata(pdev); - if (cmdq->pdata->sw_ddr_en) - cmdq_sw_ddr_enable(cmdq, false); - if (!IS_ENABLED(CONFIG_PM)) cmdq_runtime_suspend(&pdev->dev); From 68a233b4fcd8a0fce4bb0c13670b5f2cc42a6529 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 May 2025 17:01:31 -0600 Subject: [PATCH 2524/2924] iomap: don't lose folio dropbehind state for overwrites [ Upstream commit 34ecde3c56066ba79e5ec3d93c5b14ea83e3603e ] DONTCACHE I/O must have the completion punted to a workqueue, just like what is done for unwritten extents, as the completion needs task context to perform the invalidation of the folio(s). However, if writeback is started off filemap_fdatawrite_range() off generic_sync() and it's an overwrite, then the DONTCACHE marking gets lost as iomap_add_to_ioend() don't look at the folio being added and no further state is passed down to help it know that this is a dropbehind/DONTCACHE write. Check if the folio being added is marked as dropbehind, and set IOMAP_IOEND_DONTCACHE if that is the case. Then XFS can factor this into the decision making of completion context in xfs_submit_ioend(). Additionally include this ioend flag in the NOMERGE flags, to avoid mixing it with unrelated IO. Since this is the 3rd flag that will cause XFS to punt the completion to a workqueue, add a helper so that each one of them can get appropriately commented. This fixes extra page cache being instantiated when the write performed is an overwrite, rather than newly instantiated blocks. Fixes: b2cd5ae693a3 ("iomap: make buffered writes work with RWF_DONTCACHE") Signed-off-by: Jens Axboe Link: https://lore.kernel.org/5153f6e8-274d-4546-bf55-30a5018e0d03@kernel.dk Reviewed-by: Dave Chinner Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/iomap/buffered-io.c | 2 ++ fs/xfs/xfs_aops.c | 22 ++++++++++++++++++++-- include/linux/iomap.h | 5 ++++- 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 5b08bd417b2872..0ac474888a02e9 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1675,6 +1675,8 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, ioend_flags |= IOMAP_IOEND_UNWRITTEN; if (wpc->iomap.flags & IOMAP_F_SHARED) ioend_flags |= IOMAP_IOEND_SHARED; + if (folio_test_dropbehind(folio)) + ioend_flags |= IOMAP_IOEND_DONTCACHE; if (pos == wpc->iomap.offset && (wpc->iomap.flags & IOMAP_F_BOUNDARY)) ioend_flags |= IOMAP_IOEND_BOUNDARY; diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 26a04a78348967..63151feb9c3fd5 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -436,6 +436,25 @@ xfs_map_blocks( return 0; } +static bool +xfs_ioend_needs_wq_completion( + struct iomap_ioend *ioend) +{ + /* Changing inode size requires a transaction. */ + if (xfs_ioend_is_append(ioend)) + return true; + + /* Extent manipulation requires a transaction. */ + if (ioend->io_flags & (IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_SHARED)) + return true; + + /* Page cache invalidation cannot be done in irq context. */ + if (ioend->io_flags & IOMAP_IOEND_DONTCACHE) + return true; + + return false; +} + static int xfs_submit_ioend( struct iomap_writepage_ctx *wpc, @@ -460,8 +479,7 @@ xfs_submit_ioend( memalloc_nofs_restore(nofs_flag); /* send ioends that might require a transaction to the completion wq */ - if (xfs_ioend_is_append(ioend) || - (ioend->io_flags & (IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_SHARED))) + if (xfs_ioend_needs_wq_completion(ioend)) ioend->io_bio.bi_end_io = xfs_end_bio; if (status) diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 68416b135151d7..522644d62f30f0 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -377,13 +377,16 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno, #define IOMAP_IOEND_BOUNDARY (1U << 2) /* is direct I/O */ #define IOMAP_IOEND_DIRECT (1U << 3) +/* is DONTCACHE I/O */ +#define IOMAP_IOEND_DONTCACHE (1U << 4) /* * Flags that if set on either ioend prevent the merge of two ioends. * (IOMAP_IOEND_BOUNDARY also prevents merges, but only one-way) */ #define IOMAP_IOEND_NOMERGE_FLAGS \ - (IOMAP_IOEND_SHARED | IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_DIRECT) + (IOMAP_IOEND_SHARED | IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_DIRECT | \ + IOMAP_IOEND_DONTCACHE) /* * Structure for writeback I/O completions. From d07d27e47786a8a63e13a469072811b7a7da3b18 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 27 May 2025 14:50:35 -0700 Subject: [PATCH 2525/2924] perf pmu: Avoid segv for missing name/alias_name in wildcarding [ Upstream commit 2a2a7f5e7deffa363b438308812989ded126a48a ] The pmu name or alias_name fields may be NULL and should be skipped if so. This is done in all loops of perf_pmu___name_match except the final wildcard loop which was an oversight. Fixes: 63e287131cf0c59b ("perf pmu: Rename name matching for no suffix or wildcard variants") Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250527215035.187992-1-irogers@google.com [ Fixup the Fixes: tag to the right commit ] Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/pmu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index b7ebac5ab1d112..e2e3969e12d360 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -2052,6 +2052,9 @@ static bool perf_pmu___name_match(const struct perf_pmu *pmu, const char *to_mat for (size_t i = 0; i < ARRAY_SIZE(names); i++) { const char *name = names[i]; + if (!name) + continue; + if (wildcard && perf_pmu__match_wildcard_uncore(name, to_match)) return true; if (!wildcard && perf_pmu__match_ignoring_suffix_uncore(name, to_match)) From 7ab6a125ae2da012d199c92820850a6922fe6be1 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 27 May 2025 20:26:31 -0700 Subject: [PATCH 2526/2924] perf symbol: Fix use-after-free in filename__read_build_id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit fef8f648bb47726d96a5701fe31ed606268da73d ] The same buf is used for the program headers and reading notes. As the notes memory may be reallocated then this corrupts the memory pointed to by the phdr. Using the same buffer is in any case a logic error. Rather than deal with the duplicated code, introduce an elf32 boolean and a union for either the elf32 or elf64 headers that are in use. Let the program headers have their own memory and grow the buffer for notes as necessary. Before `perf list -j` compiled with asan would crash with: ``` ==4176189==ERROR: AddressSanitizer: heap-use-after-free on address 0x5160000070b8 at pc 0x555d3b15075b bp 0x7ffebb5a8090 sp 0x7ffebb5a8088 READ of size 8 at 0x5160000070b8 thread T0 #0 0x555d3b15075a in filename__read_build_id tools/perf/util/symbol-minimal.c:212:25 #1 0x555d3ae43aff in filename__sprintf_build_id tools/perf/util/build-id.c:110:8 ... 0x5160000070b8 is located 312 bytes inside of 560-byte region [0x516000006f80,0x5160000071b0) freed by thread T0 here: #0 0x555d3ab21840 in realloc (perf+0x264840) (BuildId: 12dff2f6629f738e5012abdf0e90055518e70b5e) #1 0x555d3b1506e7 in filename__read_build_id tools/perf/util/symbol-minimal.c:206:11 ... previously allocated by thread T0 here: #0 0x555d3ab21423 in malloc (perf+0x264423) (BuildId: 12dff2f6629f738e5012abdf0e90055518e70b5e) #1 0x555d3b1503a2 in filename__read_build_id tools/perf/util/symbol-minimal.c:182:9 ... ``` Note: this bug is long standing and not introduced by the other asan fix in commit fa9c4977fbfb ("perf symbol-minimal: Fix double free in filename__read_build_id"). Fixes: b691f64360ecec49 ("perf symbols: Implement poor man's ELF parser") Signed-off-by: Ian Rogers Link: https://lore.kernel.org/r/20250528032637.198960-2-irogers@google.com Cc: Mark Rutland Cc: Gary Guo Cc: Alex Gaynor Cc: Boqun Feng Cc: Howard Chu Cc: Alice Ryhl Cc: Dmitry Vyukov Cc: Peter Zijlstra Cc: Adrian Hunter Cc: Weilin Wang Cc: Andreas Hindborg Cc: Arnaldo Carvalho de Melo Cc: Danilo Krummrich Cc: Jiri Olsa Cc: Namhyung Kim Cc: Miguel Ojeda Cc: James Clark Cc: Jiapeng Chong Cc: Andi Kleen Cc: Alexander Shishkin Cc: Kan Liang Cc: Stephen Brennan Cc: Benno Lossin Cc: Björn Roy Baron Cc: Ingo Molnar Cc: Trevor Gross Cc: linux-kernel@vger.kernel.org Cc: linux-perf-users@vger.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/symbol-minimal.c | 168 +++++++++++++------------------ 1 file changed, 70 insertions(+), 98 deletions(-) diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index d8da3da01fe6b5..36c1d3090689fc 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -90,11 +90,23 @@ int filename__read_build_id(const char *filename, struct build_id *bid) { FILE *fp; int ret = -1; - bool need_swap = false; + bool need_swap = false, elf32; u8 e_ident[EI_NIDENT]; - size_t buf_size; - void *buf; int i; + union { + struct { + Elf32_Ehdr ehdr32; + Elf32_Phdr *phdr32; + }; + struct { + Elf64_Ehdr ehdr64; + Elf64_Phdr *phdr64; + }; + } hdrs; + void *phdr; + size_t phdr_size; + void *buf = NULL; + size_t buf_size = 0; fp = fopen(filename, "r"); if (fp == NULL) @@ -108,119 +120,79 @@ int filename__read_build_id(const char *filename, struct build_id *bid) goto out; need_swap = check_need_swap(e_ident[EI_DATA]); + elf32 = e_ident[EI_CLASS] == ELFCLASS32; - /* for simplicity */ - fseek(fp, 0, SEEK_SET); - - if (e_ident[EI_CLASS] == ELFCLASS32) { - Elf32_Ehdr ehdr; - Elf32_Phdr *phdr; - - if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) - goto out; + if (fread(elf32 ? (void *)&hdrs.ehdr32 : (void *)&hdrs.ehdr64, + elf32 ? sizeof(hdrs.ehdr32) : sizeof(hdrs.ehdr64), + 1, fp) != 1) + goto out; - if (need_swap) { - ehdr.e_phoff = bswap_32(ehdr.e_phoff); - ehdr.e_phentsize = bswap_16(ehdr.e_phentsize); - ehdr.e_phnum = bswap_16(ehdr.e_phnum); + if (need_swap) { + if (elf32) { + hdrs.ehdr32.e_phoff = bswap_32(hdrs.ehdr32.e_phoff); + hdrs.ehdr32.e_phentsize = bswap_16(hdrs.ehdr32.e_phentsize); + hdrs.ehdr32.e_phnum = bswap_16(hdrs.ehdr32.e_phnum); + } else { + hdrs.ehdr64.e_phoff = bswap_64(hdrs.ehdr64.e_phoff); + hdrs.ehdr64.e_phentsize = bswap_16(hdrs.ehdr64.e_phentsize); + hdrs.ehdr64.e_phnum = bswap_16(hdrs.ehdr64.e_phnum); } + } + phdr_size = elf32 ? hdrs.ehdr32.e_phentsize * hdrs.ehdr32.e_phnum + : hdrs.ehdr64.e_phentsize * hdrs.ehdr64.e_phnum; + phdr = malloc(phdr_size); + if (phdr == NULL) + goto out; - buf_size = ehdr.e_phentsize * ehdr.e_phnum; - buf = malloc(buf_size); - if (buf == NULL) - goto out; - - fseek(fp, ehdr.e_phoff, SEEK_SET); - if (fread(buf, buf_size, 1, fp) != 1) - goto out_free; - - for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) { - void *tmp; - long offset; - - if (need_swap) { - phdr->p_type = bswap_32(phdr->p_type); - phdr->p_offset = bswap_32(phdr->p_offset); - phdr->p_filesz = bswap_32(phdr->p_filesz); - } - - if (phdr->p_type != PT_NOTE) - continue; - - offset = phdr->p_offset; - if (phdr->p_filesz > buf_size) { - buf_size = phdr->p_filesz; - tmp = realloc(buf, buf_size); - if (tmp == NULL) - goto out_free; - buf = tmp; - } - fseek(fp, offset, SEEK_SET); - if (fread(buf, phdr->p_filesz, 1, fp) != 1) - goto out_free; + fseek(fp, elf32 ? hdrs.ehdr32.e_phoff : hdrs.ehdr64.e_phoff, SEEK_SET); + if (fread(phdr, phdr_size, 1, fp) != 1) + goto out_free; - ret = read_build_id(buf, phdr->p_filesz, bid, need_swap); - if (ret == 0) { - ret = bid->size; - break; - } - } - } else { - Elf64_Ehdr ehdr; - Elf64_Phdr *phdr; + if (elf32) + hdrs.phdr32 = phdr; + else + hdrs.phdr64 = phdr; - if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) - goto out; + for (i = 0; i < elf32 ? hdrs.ehdr32.e_phnum : hdrs.ehdr64.e_phnum; i++) { + size_t p_filesz; if (need_swap) { - ehdr.e_phoff = bswap_64(ehdr.e_phoff); - ehdr.e_phentsize = bswap_16(ehdr.e_phentsize); - ehdr.e_phnum = bswap_16(ehdr.e_phnum); + if (elf32) { + hdrs.phdr32[i].p_type = bswap_32(hdrs.phdr32[i].p_type); + hdrs.phdr32[i].p_offset = bswap_32(hdrs.phdr32[i].p_offset); + hdrs.phdr32[i].p_filesz = bswap_32(hdrs.phdr32[i].p_offset); + } else { + hdrs.phdr64[i].p_type = bswap_32(hdrs.phdr64[i].p_type); + hdrs.phdr64[i].p_offset = bswap_64(hdrs.phdr64[i].p_offset); + hdrs.phdr64[i].p_filesz = bswap_64(hdrs.phdr64[i].p_filesz); + } } + if ((elf32 ? hdrs.phdr32[i].p_type : hdrs.phdr64[i].p_type) != PT_NOTE) + continue; - buf_size = ehdr.e_phentsize * ehdr.e_phnum; - buf = malloc(buf_size); - if (buf == NULL) - goto out; - - fseek(fp, ehdr.e_phoff, SEEK_SET); - if (fread(buf, buf_size, 1, fp) != 1) - goto out_free; - - for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) { + p_filesz = elf32 ? hdrs.phdr32[i].p_filesz : hdrs.phdr64[i].p_filesz; + if (p_filesz > buf_size) { void *tmp; - long offset; - - if (need_swap) { - phdr->p_type = bswap_32(phdr->p_type); - phdr->p_offset = bswap_64(phdr->p_offset); - phdr->p_filesz = bswap_64(phdr->p_filesz); - } - - if (phdr->p_type != PT_NOTE) - continue; - offset = phdr->p_offset; - if (phdr->p_filesz > buf_size) { - buf_size = phdr->p_filesz; - tmp = realloc(buf, buf_size); - if (tmp == NULL) - goto out_free; - buf = tmp; - } - fseek(fp, offset, SEEK_SET); - if (fread(buf, phdr->p_filesz, 1, fp) != 1) + buf_size = p_filesz; + tmp = realloc(buf, buf_size); + if (tmp == NULL) goto out_free; + buf = tmp; + } + fseek(fp, elf32 ? hdrs.phdr32[i].p_offset : hdrs.phdr64[i].p_offset, SEEK_SET); + if (fread(buf, p_filesz, 1, fp) != 1) + goto out_free; - ret = read_build_id(buf, phdr->p_filesz, bid, need_swap); - if (ret == 0) { - ret = bid->size; - break; - } + ret = read_build_id(buf, p_filesz, bid, need_swap); + if (ret == 0) { + ret = bid->size; + break; } } out_free: free(buf); + free(phdr); out: fclose(fp); return ret; From 65ef2ca5185cb61b6399cd8d1010ef32ac2d244d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 16 May 2025 14:39:44 +0200 Subject: [PATCH 2527/2924] s390/uv: Don't return 0 from make_hva_secure() if the operation was not successful [ Upstream commit 3ec8a8330a1aa846dffbf1d64479213366c55b54 ] If s390_wiggle_split_folio() returns 0 because splitting a large folio succeeded, we will return 0 from make_hva_secure() even though a retry is required. Return -EAGAIN in that case. Otherwise, we'll return 0 from gmap_make_secure(), and consequently from unpack_one(). In kvm_s390_pv_unpack(), we assume that unpacking succeeded and skip unpacking this page. Later on, we run into issues and fail booting the VM. So far, this issue was only observed with follow-up patches where we split large pagecache XFS folios. Maybe it can also be triggered with shmem? We'll cleanup s390_wiggle_split_folio() a bit next, to also return 0 if no split was required. Fixes: d8dfda5af0be ("KVM: s390: pv: fix race when making a page secure") Cc: stable@vger.kernel.org Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20250516123946.1648026-2-david@redhat.com Message-ID: <20250516123946.1648026-2-david@redhat.com> Reviewed-by: Claudio Imbrenda Signed-off-by: Claudio Imbrenda Stable-dep-of: ab73b29efd36 ("s390/uv: Improve splitting of large folios that cannot be split while dirty") Signed-off-by: Sasha Levin --- arch/s390/kernel/uv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 9a5d5be8acf41e..2cc3b599c7fe33 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -393,8 +393,11 @@ int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header folio_walk_end(&fw, vma); mmap_read_unlock(mm); - if (rc == -E2BIG || rc == -EBUSY) + if (rc == -E2BIG || rc == -EBUSY) { rc = s390_wiggle_split_folio(mm, folio, rc == -E2BIG); + if (!rc) + rc = -EAGAIN; + } folio_put(folio); return rc; From dce163aa5aacb8face7a2187f45eb611ece829d3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 16 May 2025 14:39:45 +0200 Subject: [PATCH 2528/2924] s390/uv: Always return 0 from s390_wiggle_split_folio() if successful [ Upstream commit bd428b8c79ed8e8658570e70c62c0092500e2eac ] Let's consistently return 0 if the operation was successful, and just detect ourselves whether splitting is required -- folio_test_large() is a cheap operation. Update the documentation. Should we simply always return -EAGAIN instead of 0, so we don't have to handle it in the caller? Not sure, staring at the documentation, this way looks a bit cleaner. Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20250516123946.1648026-3-david@redhat.com Message-ID: <20250516123946.1648026-3-david@redhat.com> Reviewed-by: Claudio Imbrenda Signed-off-by: Claudio Imbrenda Stable-dep-of: ab73b29efd36 ("s390/uv: Improve splitting of large folios that cannot be split while dirty") Signed-off-by: Sasha Levin --- arch/s390/kernel/uv.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 2cc3b599c7fe33..f6ddb2b54032e6 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -324,34 +324,36 @@ static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct u } /** - * s390_wiggle_split_folio() - try to drain extra references to a folio and optionally split. + * s390_wiggle_split_folio() - try to drain extra references to a folio and + * split the folio if it is large. * @mm: the mm containing the folio to work on * @folio: the folio - * @split: whether to split a large folio * * Context: Must be called while holding an extra reference to the folio; * the mm lock should not be held. - * Return: 0 if the folio was split successfully; - * -EAGAIN if the folio was not split successfully but another attempt - * can be made, or if @split was set to false; - * -EINVAL in case of other errors. See split_folio(). + * Return: 0 if the operation was successful; + * -EAGAIN if splitting the large folio was not successful, + * but another attempt can be made; + * -EINVAL in case of other folio splitting errors. See split_folio(). */ -static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split) +static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio) { int rc; lockdep_assert_not_held(&mm->mmap_lock); folio_wait_writeback(folio); lru_add_drain_all(); - if (split) { + + if (folio_test_large(folio)) { folio_lock(folio); rc = split_folio(folio); folio_unlock(folio); if (rc != -EBUSY) return rc; + return -EAGAIN; } - return -EAGAIN; + return 0; } int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb) @@ -394,7 +396,7 @@ int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header mmap_read_unlock(mm); if (rc == -E2BIG || rc == -EBUSY) { - rc = s390_wiggle_split_folio(mm, folio, rc == -E2BIG); + rc = s390_wiggle_split_folio(mm, folio); if (!rc) rc = -EAGAIN; } From e9b2bd232e6466466f263207db0be0e81313f580 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 16 May 2025 14:39:46 +0200 Subject: [PATCH 2529/2924] s390/uv: Improve splitting of large folios that cannot be split while dirty [ Upstream commit ab73b29efd36f8916c6cc9954e912c4723c9a1b0 ] Currently, starting a PV VM on an iomap-based filesystem with large folio support, such as XFS, will not work. We'll be stuck in unpack_one()->gmap_make_secure(), because we can't seem to make progress splitting the large folio. The problem is that we require a writable PTE but a writable PTE under such filesystems will imply a dirty folio. So whenever we have a writable PTE, we'll have a dirty folio, and dirty iomap folios cannot currently get split, because split_folio()->split_huge_page_to_list_to_order()->filemap_release_folio() will fail in iomap_release_folio(). So we will not make any progress splitting such large folios. Until dirty folios can be split more reliably, let's manually trigger writeback of the problematic folio using filemap_write_and_wait_range(), and retry the split immediately afterwards exactly once, before looking up the folio again. Should this logic be part of split_folio()? Likely not; most split users don't have to split so eagerly to make any progress. For now, this seems to affect xfs, zonefs and erofs, and this patch makes it work again (tested on xfs only). While this could be considered a fix for commit 6795801366da ("xfs: Support large folios"), commit df2f9708ff1f ("zonefs: enable support for large folios") and commit ce529cc25b18 ("erofs: enable large folios for iomap mode"), before commit eef88fe45ac9 ("s390/uv: Split large folios in gmap_make_secure()"), we did not try splitting large folios at all. So it's all rather part of making SE compatible with file systems that support large folios. But to have some "Fixes:" tag, let's just use eef88fe45ac9. Not CCing stable, because there are a lot of dependencies, and it simply not working is not critical in stable kernels. Reported-by: Sebastian Mitterle Closes: https://issues.redhat.com/browse/RHEL-58218 Fixes: eef88fe45ac9 ("s390/uv: Split large folios in gmap_make_secure()") Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20250516123946.1648026-4-david@redhat.com Message-ID: <20250516123946.1648026-4-david@redhat.com> Reviewed-by: Claudio Imbrenda Signed-off-by: Claudio Imbrenda Signed-off-by: Sasha Levin --- arch/s390/kernel/uv.c | 66 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 60 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index f6ddb2b54032e6..d278bf0c09d1b3 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -338,22 +339,75 @@ static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct u */ static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio) { - int rc; + int rc, tried_splits; lockdep_assert_not_held(&mm->mmap_lock); folio_wait_writeback(folio); lru_add_drain_all(); - if (folio_test_large(folio)) { + if (!folio_test_large(folio)) + return 0; + + for (tried_splits = 0; tried_splits < 2; tried_splits++) { + struct address_space *mapping; + loff_t lstart, lend; + struct inode *inode; + folio_lock(folio); rc = split_folio(folio); + if (rc != -EBUSY) { + folio_unlock(folio); + return rc; + } + + /* + * Splitting with -EBUSY can fail for various reasons, but we + * have to handle one case explicitly for now: some mappings + * don't allow for splitting dirty folios; writeback will + * mark them clean again, including marking all page table + * entries mapping the folio read-only, to catch future write + * attempts. + * + * While the system should be writing back dirty folios in the + * background, we obtained this folio by looking up a writable + * page table entry. On these problematic mappings, writable + * page table entries imply dirty folios, preventing the + * split in the first place. + * + * To prevent a livelock when trigger writeback manually and + * letting the caller look up the folio again in the page + * table (turning it dirty), immediately try to split again. + * + * This is only a problem for some mappings (e.g., XFS); + * mappings that do not support writeback (e.g., shmem) do not + * apply. + */ + if (!folio_test_dirty(folio) || folio_test_anon(folio) || + !folio->mapping || !mapping_can_writeback(folio->mapping)) { + folio_unlock(folio); + break; + } + + /* + * Ideally, we'd only trigger writeback on this exact folio. But + * there is no easy way to do that, so we'll stabilize the + * mapping while we still hold the folio lock, so we can drop + * the folio lock to trigger writeback on the range currently + * covered by the folio instead. + */ + mapping = folio->mapping; + lstart = folio_pos(folio); + lend = lstart + folio_size(folio) - 1; + inode = igrab(mapping->host); folio_unlock(folio); - if (rc != -EBUSY) - return rc; - return -EAGAIN; + if (unlikely(!inode)) + break; + + filemap_write_and_wait_range(mapping, lstart, lend); + iput(mapping->host); } - return 0; + return -EAGAIN; } int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb) From 284325d47b2f314652226685d374a86510b60d0f Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Thu, 3 Apr 2025 06:08:10 +0000 Subject: [PATCH 2530/2924] perf record: Fix incorrect --user-regs comments [ Upstream commit a4a859eb6704a8aa46aa1cec5396c8d41383a26b ] The comment of "--user-regs" option is not correct, fix it. "on interrupt," -> "in user space," Fixes: 84c417422798c897 ("perf record: Support direct --user-regs arguments") Reviewed-by: Ian Rogers Signed-off-by: Dapeng Mi Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250403060810.196028-1-dapeng1.mi@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-record.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index ba20bf7c011d77..d56273a0e241c7 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -3480,7 +3480,7 @@ static struct option __record_options[] = { "sample selected machine registers on interrupt," " use '-I?' to list register names", parse_intr_regs), OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", - "sample selected machine registers on interrupt," + "sample selected machine registers in user space," " use '--user-regs=?' to list register names", parse_user_regs), OPT_BOOLEAN(0, "running-time", &record.opts.running_time, "Record running/enabled time of read (:S) events"), From 987bfcd718fdad5af916a86abbc38abb588d3ffa Mon Sep 17 00:00:00 2001 From: Anubhav Shelat Date: Thu, 3 Apr 2025 12:04:12 -0400 Subject: [PATCH 2531/2924] perf trace: Always print return value for syscalls returning a pid [ Upstream commit c7a48ea9b919e2fa0e4a1d9938fdb03e9afe276c ] The syscalls that were consistently observed were set_robust_list and rseq. This is because perf cannot find their child process. This change ensures that the return value is always printed. Before: 0.256 ( 0.001 ms): set_robust_list(head: 0x7f09c77dba20, len: 24) = 0.259 ( 0.001 ms): rseq(rseq: 0x7f09c77dc0e0, rseq_len: 32, sig: 1392848979) = After: 0.270 ( 0.002 ms): set_robust_list(head: 0x7f0bb14a6a20, len: 24) = 0 0.273 ( 0.002 ms): rseq(rseq: 0x7f0bb14a70e0, rseq_len: 32, sig: 1392848979) = 0 Committer notes: As discussed in the thread in the Link: tag below, these two don't return a pid, but for syscalls returning one, we need to print the result and if we manage to find the children in 'perf trace' data structures, then print its name as well. Fixes: 11c8e39f5133aed9 ("perf trace: Infrastructure to show COMM strings for syscalls returning PIDs") Reviewed-by: Howard Chu Signed-off-by: Anubhav Shelat Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Dapeng Mi Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Michael Petlan Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250403160411.159238-2-ashelat@redhat.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index f98b5efb1f96c9..7304e62b69fee1 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3005,8 +3005,8 @@ errno_print: { else if (sc->fmt->errpid) { struct thread *child = machine__find_thread(trace->host, ret, ret); + fprintf(trace->output, "%ld", ret); if (child != NULL) { - fprintf(trace->output, "%ld", ret); if (thread__comm_set(child)) fprintf(trace->output, " (%s)", thread__comm_str(child)); thread__put(child); From 93cf676a1750ebee4474eaa8ac0496dc0d4ffb11 Mon Sep 17 00:00:00 2001 From: Li Lingfeng Date: Tue, 4 Mar 2025 21:05:32 +0800 Subject: [PATCH 2532/2924] nfs: clear SB_RDONLY before getting superblock [ Upstream commit 8cd9b785943c57a136536250da80ba1eb6f8eb18 ] As described in the link, commit 52cb7f8f1778 ("nfs: ignore SB_RDONLY when mounting nfs") removed the check for the ro flag when determining whether to share the superblock, which caused issues when mounting different subdirectories under the same export directory via NFSv3. However, this change did not affect NFSv4. For NFSv3: 1) A single superblock is created for the initial mount. 2) When mounted read-only, this superblock carries the SB_RDONLY flag. 3) Before commit 52cb7f8f1778 ("nfs: ignore SB_RDONLY when mounting nfs"): Subsequent rw mounts would not share the existing ro superblock due to flag mismatch, creating a new superblock without SB_RDONLY. After the commit: The SB_RDONLY flag is ignored during superblock comparison, and this leads to sharing the existing superblock even for rw mounts. Ultimately results in write operations being rejected at the VFS layer. For NFSv4: 1) Multiple superblocks are created and the last one will be kept. 2) The actually used superblock for ro mounts doesn't carry SB_RDONLY flag. Therefore, commit 52cb7f8f1778 doesn't affect NFSv4 mounts. Clear SB_RDONLY before getting superblock when NFS_MOUNT_UNSHARED is not set to fix it. Fixes: 52cb7f8f1778 ("nfs: ignore SB_RDONLY when mounting nfs") Closes: https://lore.kernel.org/all/12d7ea53-1202-4e21-a7ef-431c94758ce5@app.fastmail.com/T/ Signed-off-by: Li Lingfeng Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- fs/nfs/super.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 9eea9e62afc9c3..1a609471e85ef8 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1308,8 +1308,17 @@ int nfs_get_tree_common(struct fs_context *fc) if (IS_ERR(server)) return PTR_ERR(server); + /* + * When NFS_MOUNT_UNSHARED is not set, NFS forces the sharing of a + * superblock among each filesystem that mounts sub-directories + * belonging to a single exported root path. + * To prevent interference between different filesystems, the + * SB_RDONLY flag should be removed from the superblock. + */ if (server->flags & NFS_MOUNT_UNSHARED) compare_super = NULL; + else + fc->sb_flags &= ~SB_RDONLY; /* -o noac implies -o sync */ if (server->flags & NFS_MOUNT_NOAC) From b5da66f6154a0c61e69c8b059eb40bc56b3321ab Mon Sep 17 00:00:00 2001 From: Li Lingfeng Date: Tue, 4 Mar 2025 21:05:33 +0800 Subject: [PATCH 2533/2924] nfs: ignore SB_RDONLY when remounting nfs [ Upstream commit 80c4de6ab44c14e910117a02f2f8241ffc6ec54a ] In some scenarios, when mounting NFS, more than one superblock may be created. The final superblock used is the last one created, but only the first superblock carries the ro flag passed from user space. If a ro flag is added to the superblock via remount, it will trigger the issue described in Link[1]. Link[2] attempted to address this by marking the superblock as ro during the initial mount. However, this introduced a new problem in scenarios where multiple mount points share the same superblock: [root@a ~]# mount /dev/sdb /mnt/sdb [root@a ~]# echo "/mnt/sdb *(rw,no_root_squash)" > /etc/exports [root@a ~]# echo "/mnt/sdb/test_dir2 *(ro,no_root_squash)" >> /etc/exports [root@a ~]# systemctl restart nfs-server [root@a ~]# mount -t nfs -o rw 127.0.0.1:/mnt/sdb/test_dir1 /mnt/test_mp1 [root@a ~]# mount | grep nfs4 127.0.0.1:/mnt/sdb/test_dir1 on /mnt/test_mp1 type nfs4 (rw,relatime,... [root@a ~]# mount -t nfs -o ro 127.0.0.1:/mnt/sdb/test_dir2 /mnt/test_mp2 [root@a ~]# mount | grep nfs4 127.0.0.1:/mnt/sdb/test_dir1 on /mnt/test_mp1 type nfs4 (ro,relatime,... 127.0.0.1:/mnt/sdb/test_dir2 on /mnt/test_mp2 type nfs4 (ro,relatime,... [root@a ~]# When mounting the second NFS, the shared superblock is marked as ro, causing the previous NFS mount to become read-only. To resolve both issues, the ro flag is no longer applied to the superblock during remount. Instead, the ro flag on the mount is used to control whether the mount point is read-only. Fixes: 281cad46b34d ("NFS: Create a submount rpc_op") Link[1]: https://lore.kernel.org/all/20240604112636.236517-3-lilingfeng@huaweicloud.com/ Link[2]: https://lore.kernel.org/all/20241130035818.1459775-1-lilingfeng3@huawei.com/ Signed-off-by: Li Lingfeng Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- fs/nfs/super.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 1a609471e85ef8..91b5503b6f74d7 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1051,6 +1051,16 @@ int nfs_reconfigure(struct fs_context *fc) sync_filesystem(sb); + /* + * The SB_RDONLY flag has been removed from the superblock during + * mounts to prevent interference between different filesystems. + * Similarly, it is also necessary to ignore the SB_RDONLY flag + * during reconfiguration; otherwise, it may also result in the + * creation of redundant superblocks when mounting a directory with + * different rw and ro flags multiple times. + */ + fc->sb_flags_mask &= ~SB_RDONLY; + /* * Userspace mount programs that send binary options generally send * them populated with default values. We have no way to know which From 062feb506caf231b7e5164dcc28709465081256a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 26 May 2025 14:08:25 +1000 Subject: [PATCH 2534/2924] nfs: fix incorrect handling of large-number NFS errors in nfs4_do_mkdir() [ Upstream commit dd862da61e91123ca745e06c03ba39ce71a929d9 ] A recent commit introduced nfs4_do_mkdir() which reports an error from nfs4_call_sync() by returning it with ERR_PTR(). This is a problem as nfs4_call_sync() can return negative NFS-specific errors with values larger than MAX_ERRNO (4095). One example is NFS4ERR_DELAY which has value 10008. This "pointer" gets to PTR_ERR_OR_ZERO() in nfs4_proc_mkdir() which chooses ZERO because it isn't in the range of value errors. Ultimately the pointer is dereferenced. This patch changes nfs4_do_mkdir() to report the dentry pointer and status separately - pointer as a return value, status in an "int *" parameter. The same separation is used for _nfs4_proc_mkdir() and the two are combined only in nfs4_proc_mkdir() after the status has passed through nfs4_handle_exception(), which ensures the error code does not exceed MAX_ERRNO. It also fixes a problem in the even when nfs4_handle_exception() updated the error value, the original 'alias' was still returned. Reported-by: Anna Schumaker Fixes: 8376583b84a1 ("nfs: change mkdir inode_operation to return alternate dentry if needed.") Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- fs/nfs/nfs4proc.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b1d2122bd5a749..4b123bca65e12d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5164,13 +5164,15 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_ } static struct dentry *nfs4_do_mkdir(struct inode *dir, struct dentry *dentry, - struct nfs4_createdata *data) + struct nfs4_createdata *data, int *statusp) { - int status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &data->msg, + struct dentry *ret; + + *statusp = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &data->msg, &data->arg.seq_args, &data->res.seq_res, 1); - if (status) - return ERR_PTR(status); + if (*statusp) + return NULL; spin_lock(&dir->i_lock); /* Creating a directory bumps nlink in the parent */ @@ -5179,7 +5181,11 @@ static struct dentry *nfs4_do_mkdir(struct inode *dir, struct dentry *dentry, data->res.fattr->time_start, NFS_INO_INVALID_DATA); spin_unlock(&dir->i_lock); - return nfs_add_or_obtain(dentry, data->res.fh, data->res.fattr); + ret = nfs_add_or_obtain(dentry, data->res.fh, data->res.fattr); + if (!IS_ERR(ret)) + return ret; + *statusp = PTR_ERR(ret); + return NULL; } static void nfs4_free_createdata(struct nfs4_createdata *data) @@ -5240,17 +5246,18 @@ static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, static struct dentry *_nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - struct nfs4_label *label) + struct nfs4_label *label, int *statusp) { struct nfs4_createdata *data; - struct dentry *ret = ERR_PTR(-ENOMEM); + struct dentry *ret = NULL; + *statusp = -ENOMEM; data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4DIR); if (data == NULL) goto out; data->arg.label = label; - ret = nfs4_do_mkdir(dir, dentry, data); + ret = nfs4_do_mkdir(dir, dentry, data, statusp); nfs4_free_createdata(data); out: @@ -5273,11 +5280,12 @@ static struct dentry *nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK)) sattr->ia_mode &= ~current_umask(); do { - alias = _nfs4_proc_mkdir(dir, dentry, sattr, label); - err = PTR_ERR_OR_ZERO(alias); + alias = _nfs4_proc_mkdir(dir, dentry, sattr, label, &err); trace_nfs4_mkdir(dir, &dentry->d_name, err); - err = nfs4_handle_exception(NFS_SERVER(dir), err, - &exception); + if (err) + alias = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir), + err, + &exception)); } while (exception.retry); nfs4_label_release_security(label); From 2b12ad4306d4bbbc69a9ff511c1fc93f5c49f2ee Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 May 2025 10:46:38 +1000 Subject: [PATCH 2535/2924] nfs_localio: use cmpxchg() to install new nfs_file_localio [ Upstream commit ed9be317330c7390df7db9e1d046698c02001bd2 ] Rather than using nfs_uuid.lock to protect installing a new ro_file or rw_file, change to use cmpxchg(). Removing the file already uses xchg() so this improves symmetry and also makes the code a little simpler. Also remove the optimisation of not taking the lock, and not removing the nfs_file_localio from the linked list, when both ->ro_file and ->rw_file are already NULL. Given that ->nfs_uuid was not NULL, it is extremely unlikely that neither ->ro_file or ->rw_file is NULL so this optimisation can be of little value and it complicates understanding of the code - why can the list_del_init() be skipped? Finally, move the assignment of NULL to ->nfs_uuid until after the last action on the nfs_file_localio (the list_del_init). As soon as this is NULL a racing nfs_close_local_fh() can bypass all the locking and go on to free the nfs_file_localio, so we must be certain to be finished with it first. Fixes: 86e00412254a ("nfs: cache all open LOCALIO nfsd_file(s) in client") Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- fs/nfs/localio.c | 11 +++-------- fs/nfs_common/nfslocalio.c | 39 +++++++++++++++++--------------------- 2 files changed, 20 insertions(+), 30 deletions(-) diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index 4ec952f9f47dde..595903c2152352 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -280,14 +280,9 @@ nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred, return NULL; rcu_read_lock(); /* try to swap in the pointer */ - spin_lock(&clp->cl_uuid.lock); - nf = rcu_dereference_protected(*pnf, 1); - if (!nf) { - nf = new; - new = NULL; - rcu_assign_pointer(*pnf, nf); - } - spin_unlock(&clp->cl_uuid.lock); + nf = unrcu_pointer(cmpxchg(pnf, NULL, RCU_INITIALIZER(new))); + if (!nf) + swap(nf, new); } nf = nfs_local_file_get(nf); rcu_read_unlock(); diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c index 6a0bdea6d6449f..bdf251332b6b86 100644 --- a/fs/nfs_common/nfslocalio.c +++ b/fs/nfs_common/nfslocalio.c @@ -273,8 +273,8 @@ EXPORT_SYMBOL_GPL(nfs_open_local_fh); void nfs_close_local_fh(struct nfs_file_localio *nfl) { - struct nfsd_file *ro_nf = NULL; - struct nfsd_file *rw_nf = NULL; + struct nfsd_file *ro_nf; + struct nfsd_file *rw_nf; nfs_uuid_t *nfs_uuid; rcu_read_lock(); @@ -285,28 +285,23 @@ void nfs_close_local_fh(struct nfs_file_localio *nfl) return; } - ro_nf = rcu_access_pointer(nfl->ro_file); - rw_nf = rcu_access_pointer(nfl->rw_file); - if (ro_nf || rw_nf) { - spin_lock(&nfs_uuid->lock); - if (ro_nf) - ro_nf = rcu_dereference_protected(xchg(&nfl->ro_file, NULL), 1); - if (rw_nf) - rw_nf = rcu_dereference_protected(xchg(&nfl->rw_file, NULL), 1); - - /* Remove nfl from nfs_uuid->files list */ - RCU_INIT_POINTER(nfl->nfs_uuid, NULL); - list_del_init(&nfl->list); - spin_unlock(&nfs_uuid->lock); - rcu_read_unlock(); + ro_nf = unrcu_pointer(xchg(&nfl->ro_file, NULL)); + rw_nf = unrcu_pointer(xchg(&nfl->rw_file, NULL)); - if (ro_nf) - nfs_to_nfsd_file_put_local(ro_nf); - if (rw_nf) - nfs_to_nfsd_file_put_local(rw_nf); - return; - } + spin_lock(&nfs_uuid->lock); + /* Remove nfl from nfs_uuid->files list */ + list_del_init(&nfl->list); + spin_unlock(&nfs_uuid->lock); rcu_read_unlock(); + /* Now we can allow racing nfs_close_local_fh() to + * skip the locking. + */ + RCU_INIT_POINTER(nfl->nfs_uuid, NULL); + + if (ro_nf) + nfs_to_nfsd_file_put_local(ro_nf); + if (rw_nf) + nfs_to_nfsd_file_put_local(rw_nf); } EXPORT_SYMBOL_GPL(nfs_close_local_fh); From 9afc61e93d94fc805476b8a1c885c291d97341b3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 May 2025 10:46:39 +1000 Subject: [PATCH 2536/2924] nfs_localio: always hold nfsd net ref with nfsd_file ref [ Upstream commit 77e82fb2c6c27c122e785f543ae0062f7783c886 ] Having separate nfsd_file_put and nfsd_file_put_local in struct nfsd_localio_operations doesn't make much sense. The difference is that nfsd_file_put doesn't drop a reference to the nfs_net which is what keeps nfsd from shutting down. Currently, if nfsd tries to shutdown it will invalidate the files stored in the list from the nfs_uuid and this will drop all references to the nfsd net that the client holds. But the client could still hold some references to nfsd_files for active IO. So nfsd might think is has completely shut down local IO, but hasn't and has no way to wait for those active IO requests to complete. So this patch changes nfsd_file_get to nfsd_file_get_local and has it increase the ref count on the nfsd net and it replaces all calls to ->nfsd_put_file to ->nfsd_put_file_local. It also changes ->nfsd_open_local_fh to return with the refcount on the net elevated precisely when a valid nfsd_file is returned. This means that whenever the client holds a valid nfsd_file, there will be an associated count on the nfsd net, and so the count can only reach zero when all nfsd_files have been returned. nfs_local_file_put() is changed to call nfs_to_nfsd_file_put_local() instead of replacing calls to one with calls to the other because this will help a later patch which changes nfs_to_nfsd_file_put_local() to take an __rcu pointer while nfs_local_file_put() doesn't. Fixes: 86e00412254a ("nfs: cache all open LOCALIO nfsd_file(s) in client") Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- fs/nfs/localio.c | 4 ++-- fs/nfs_common/nfslocalio.c | 5 ++--- fs/nfsd/filecache.c | 21 +++++++++++++++++++++ fs/nfsd/filecache.h | 1 + fs/nfsd/localio.c | 9 +++++++-- include/linux/nfslocalio.h | 3 +-- 6 files changed, 34 insertions(+), 9 deletions(-) diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index 595903c2152352..8fb08c3ad563b3 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -209,12 +209,12 @@ EXPORT_SYMBOL_GPL(nfs_local_probe_async); static inline struct nfsd_file *nfs_local_file_get(struct nfsd_file *nf) { - return nfs_to->nfsd_file_get(nf); + return nfs_to->nfsd_file_get_local(nf); } static inline void nfs_local_file_put(struct nfsd_file *nf) { - nfs_to->nfsd_file_put(nf); + nfs_to_nfsd_file_put_local(nf); } /* diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c index bdf251332b6b86..f6821b2c87a2f1 100644 --- a/fs/nfs_common/nfslocalio.c +++ b/fs/nfs_common/nfslocalio.c @@ -262,9 +262,8 @@ struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *uuid, /* We have an implied reference to net thanks to nfsd_net_try_get */ localio = nfs_to->nfsd_open_local_fh(net, uuid->dom, rpc_clnt, cred, nfs_fh, fmode); - if (IS_ERR(localio)) - nfs_to_nfsd_net_put(net); - else + nfs_to_nfsd_net_put(net); + if (!IS_ERR(localio)) nfs_uuid_add_file(uuid, nfl); return localio; diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index ab85e6a2454f4c..eedf2af8ee6ec1 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -386,6 +386,27 @@ nfsd_file_put_local(struct nfsd_file *nf) return net; } +/** + * nfsd_file_get_local - get nfsd_file reference and reference to net + * @nf: nfsd_file of which to put the reference + * + * Get reference to both the nfsd_file and nf->nf_net. + */ +struct nfsd_file * +nfsd_file_get_local(struct nfsd_file *nf) +{ + struct net *net = nf->nf_net; + + if (nfsd_net_try_get(net)) { + nf = nfsd_file_get(nf); + if (!nf) + nfsd_net_put(net); + } else { + nf = NULL; + } + return nf; +} + /** * nfsd_file_file - get the backing file of an nfsd_file * @nf: nfsd_file of which to access the backing file. diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 5865f9c7271214..cd02f91aaef13d 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -63,6 +63,7 @@ int nfsd_file_cache_start_net(struct net *net); void nfsd_file_cache_shutdown_net(struct net *net); void nfsd_file_put(struct nfsd_file *nf); struct net *nfsd_file_put_local(struct nfsd_file *nf); +struct nfsd_file *nfsd_file_get_local(struct nfsd_file *nf); struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); struct file *nfsd_file_file(struct nfsd_file *nf); void nfsd_file_close_inode_sync(struct inode *inode); diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c index 238647fa379e32..2c0afd1067ea61 100644 --- a/fs/nfsd/localio.c +++ b/fs/nfsd/localio.c @@ -29,8 +29,7 @@ static const struct nfsd_localio_operations nfsd_localio_ops = { .nfsd_net_put = nfsd_net_put, .nfsd_open_local_fh = nfsd_open_local_fh, .nfsd_file_put_local = nfsd_file_put_local, - .nfsd_file_get = nfsd_file_get, - .nfsd_file_put = nfsd_file_put, + .nfsd_file_get_local = nfsd_file_get_local, .nfsd_file_file = nfsd_file_file, }; @@ -71,6 +70,9 @@ nfsd_open_local_fh(struct net *net, struct auth_domain *dom, if (nfs_fh->size > NFS4_FHSIZE) return ERR_PTR(-EINVAL); + if (!nfsd_net_try_get(net)) + return ERR_PTR(-ENXIO); + /* nfs_fh -> svc_fh */ fh_init(&fh, NFS4_FHSIZE); fh.fh_handle.fh_size = nfs_fh->size; @@ -92,6 +94,9 @@ nfsd_open_local_fh(struct net *net, struct auth_domain *dom, if (rq_cred.cr_group_info) put_group_info(rq_cred.cr_group_info); + if (IS_ERR(localio)) + nfsd_net_put(net); + return localio; } EXPORT_SYMBOL_GPL(nfsd_open_local_fh); diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h index 9aa8a43843d717..c3f34bae60e13b 100644 --- a/include/linux/nfslocalio.h +++ b/include/linux/nfslocalio.h @@ -66,8 +66,7 @@ struct nfsd_localio_operations { const struct nfs_fh *, const fmode_t); struct net *(*nfsd_file_put_local)(struct nfsd_file *); - struct nfsd_file *(*nfsd_file_get)(struct nfsd_file *); - void (*nfsd_file_put)(struct nfsd_file *); + struct nfsd_file *(*nfsd_file_get_local)(struct nfsd_file *); struct file *(*nfsd_file_file)(struct nfsd_file *); } ____cacheline_aligned; From 986a21ace186433e0397a59491646edad8c8d636 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 May 2025 10:46:40 +1000 Subject: [PATCH 2537/2924] nfs_localio: simplify interface to nfsd for getting nfsd_file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e6f7e1487ab528a6c653bd0d42812ff2942846cd ] The nfsd_localio_operations structure contains nfsd_file_get() to get a reference to an nfsd_file. This is only used in one place, where nfsd_open_local_fh() is also used. This patch combines the two, calling nfsd_open_local_fh() passing a pointer to where the nfsd_file pointer might be stored. If there is a pointer there an nfsd_file_get() can get a reference, that reference is returned. If not a new nfsd_file is acquired, stored at the pointer, and returned. When we store a reference we also increase the refcount on the net, as that refcount is decrements when we clear the stored pointer. We now get an extra reference *before* storing the new nfsd_file at the given location. This avoids possible races with the nfsd_file being freed before the final reference can be taken. This patch moves the rcu_dereference() needed after fetching from ro_file or rw_file into the nfsd code where the 'struct nfs_file' is fully defined. This avoids an error reported by older versions of gcc such as gcc-8 which complain about rcu_dereference() use in contexts where the structure (which will supposedly be accessed) is not fully defined. Reported-by: Pali Rohár Reported-by: Vincent Mailhol Fixes: 86e00412254a ("nfs: cache all open LOCALIO nfsd_file(s) in client") Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- fs/nfs/localio.c | 31 ++++-------------- fs/nfs_common/nfslocalio.c | 3 +- fs/nfsd/localio.c | 65 +++++++++++++++++++++++++++----------- 3 files changed, 55 insertions(+), 44 deletions(-) diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index 8fb08c3ad563b3..030a54c8c9d8b1 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -207,11 +207,6 @@ void nfs_local_probe_async(struct nfs_client *clp) } EXPORT_SYMBOL_GPL(nfs_local_probe_async); -static inline struct nfsd_file *nfs_local_file_get(struct nfsd_file *nf) -{ - return nfs_to->nfsd_file_get_local(nf); -} - static inline void nfs_local_file_put(struct nfsd_file *nf) { nfs_to_nfsd_file_put_local(nf); @@ -226,12 +221,13 @@ static inline void nfs_local_file_put(struct nfsd_file *nf) static struct nfsd_file * __nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred, struct nfs_fh *fh, struct nfs_file_localio *nfl, + struct nfsd_file __rcu **pnf, const fmode_t mode) { struct nfsd_file *localio; localio = nfs_open_local_fh(&clp->cl_uuid, clp->cl_rpcclient, - cred, fh, nfl, mode); + cred, fh, nfl, pnf, mode); if (IS_ERR(localio)) { int status = PTR_ERR(localio); trace_nfs_local_open_fh(fh, mode, status); @@ -258,7 +254,7 @@ nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred, struct nfs_fh *fh, struct nfs_file_localio *nfl, const fmode_t mode) { - struct nfsd_file *nf, *new, __rcu **pnf; + struct nfsd_file *nf, __rcu **pnf; if (!nfs_server_is_local(clp)) return NULL; @@ -270,24 +266,9 @@ nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred, else pnf = &nfl->ro_file; - new = NULL; - rcu_read_lock(); - nf = rcu_dereference(*pnf); - if (!nf) { - rcu_read_unlock(); - new = __nfs_local_open_fh(clp, cred, fh, nfl, mode); - if (IS_ERR(new)) - return NULL; - rcu_read_lock(); - /* try to swap in the pointer */ - nf = unrcu_pointer(cmpxchg(pnf, NULL, RCU_INITIALIZER(new))); - if (!nf) - swap(nf, new); - } - nf = nfs_local_file_get(nf); - rcu_read_unlock(); - if (new) - nfs_to_nfsd_file_put_local(new); + nf = __nfs_local_open_fh(clp, cred, fh, nfl, pnf, mode); + if (IS_ERR(nf)) + return NULL; return nf; } EXPORT_SYMBOL_GPL(nfs_local_open_fh); diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c index f6821b2c87a2f1..503f85f64b7609 100644 --- a/fs/nfs_common/nfslocalio.c +++ b/fs/nfs_common/nfslocalio.c @@ -237,6 +237,7 @@ static void nfs_uuid_add_file(nfs_uuid_t *nfs_uuid, struct nfs_file_localio *nfl struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *uuid, struct rpc_clnt *rpc_clnt, const struct cred *cred, const struct nfs_fh *nfs_fh, struct nfs_file_localio *nfl, + struct nfsd_file __rcu **pnf, const fmode_t fmode) { struct net *net; @@ -261,7 +262,7 @@ struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *uuid, rcu_read_unlock(); /* We have an implied reference to net thanks to nfsd_net_try_get */ localio = nfs_to->nfsd_open_local_fh(net, uuid->dom, rpc_clnt, - cred, nfs_fh, fmode); + cred, nfs_fh, pnf, fmode); nfs_to_nfsd_net_put(net); if (!IS_ERR(localio)) nfs_uuid_add_file(uuid, nfl); diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c index 2c0afd1067ea61..80d9ff6608a7b9 100644 --- a/fs/nfsd/localio.c +++ b/fs/nfsd/localio.c @@ -24,20 +24,6 @@ #include "filecache.h" #include "cache.h" -static const struct nfsd_localio_operations nfsd_localio_ops = { - .nfsd_net_try_get = nfsd_net_try_get, - .nfsd_net_put = nfsd_net_put, - .nfsd_open_local_fh = nfsd_open_local_fh, - .nfsd_file_put_local = nfsd_file_put_local, - .nfsd_file_get_local = nfsd_file_get_local, - .nfsd_file_file = nfsd_file_file, -}; - -void nfsd_localio_ops_init(void) -{ - nfs_to = &nfsd_localio_ops; -} - /** * nfsd_open_local_fh - lookup a local filehandle @nfs_fh and map to nfsd_file * @@ -46,6 +32,7 @@ void nfsd_localio_ops_init(void) * @rpc_clnt: rpc_clnt that the client established * @cred: cred that the client established * @nfs_fh: filehandle to lookup + * @nfp: place to find the nfsd_file, or store it if it was non-NULL * @fmode: fmode_t to use for open * * This function maps a local fh to a path on a local filesystem. @@ -56,10 +43,11 @@ void nfsd_localio_ops_init(void) * set. Caller (NFS client) is responsible for calling nfsd_net_put and * nfsd_file_put (via nfs_to_nfsd_file_put_local). */ -struct nfsd_file * +static struct nfsd_file * nfsd_open_local_fh(struct net *net, struct auth_domain *dom, struct rpc_clnt *rpc_clnt, const struct cred *cred, - const struct nfs_fh *nfs_fh, const fmode_t fmode) + const struct nfs_fh *nfs_fh, struct nfsd_file __rcu **pnf, + const fmode_t fmode) { int mayflags = NFSD_MAY_LOCALIO; struct svc_cred rq_cred; @@ -73,6 +61,12 @@ nfsd_open_local_fh(struct net *net, struct auth_domain *dom, if (!nfsd_net_try_get(net)) return ERR_PTR(-ENXIO); + rcu_read_lock(); + localio = nfsd_file_get(rcu_dereference(*pnf)); + rcu_read_unlock(); + if (localio) + return localio; + /* nfs_fh -> svc_fh */ fh_init(&fh, NFS4_FHSIZE); fh.fh_handle.fh_size = nfs_fh->size; @@ -94,12 +88,47 @@ nfsd_open_local_fh(struct net *net, struct auth_domain *dom, if (rq_cred.cr_group_info) put_group_info(rq_cred.cr_group_info); - if (IS_ERR(localio)) + if (!IS_ERR(localio)) { + struct nfsd_file *new; + if (!nfsd_net_try_get(net)) { + nfsd_file_put(localio); + nfsd_net_put(net); + return ERR_PTR(-ENXIO); + } + nfsd_file_get(localio); + again: + new = unrcu_pointer(cmpxchg(pnf, NULL, RCU_INITIALIZER(localio))); + if (new) { + /* Some other thread installed an nfsd_file */ + if (nfsd_file_get(new) == NULL) + goto again; + /* + * Drop the ref we were going to install and the + * one we were going to return. + */ + nfsd_file_put(localio); + nfsd_file_put(localio); + localio = new; + } + } else nfsd_net_put(net); return localio; } -EXPORT_SYMBOL_GPL(nfsd_open_local_fh); + +static const struct nfsd_localio_operations nfsd_localio_ops = { + .nfsd_net_try_get = nfsd_net_try_get, + .nfsd_net_put = nfsd_net_put, + .nfsd_open_local_fh = nfsd_open_local_fh, + .nfsd_file_put_local = nfsd_file_put_local, + .nfsd_file_get_local = nfsd_file_get_local, + .nfsd_file_file = nfsd_file_file, +}; + +void nfsd_localio_ops_init(void) +{ + nfs_to = &nfsd_localio_ops; +} /* * UUID_IS_LOCAL XDR functions From 04ecb3e05ddd4efc9610081fe54a73ee7df2ae0f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 May 2025 10:46:41 +1000 Subject: [PATCH 2538/2924] nfs_localio: duplicate nfs_close_local_fh() [ Upstream commit 74fc55ab2a6a0c71628fcf3b9783aae7119b5199 ] nfs_close_local_fh() is called from two different places for quite different use case. It is called from nfs_uuid_put() when the nfs_uuid is being detached - possibly because the nfs server is not longer serving that filesystem. In this case there will always be an nfs_uuid and so rcu_read_lock() is not needed. It is also called when the nfs_file_localio is no longer needed. In this case there may not be an active nfs_uuid. These two can race, and handling the race properly while avoiding excessive locking will require different handling on each side. This patch prepares the way by opencoding nfs_close_local_fh() into nfs_uuid_put(), then simplifying the code there as befits the context. Fixes: 86e00412254a ("nfs: cache all open LOCALIO nfsd_file(s) in client") Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- fs/nfs_common/nfslocalio.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c index 503f85f64b7609..49c59f0c78c629 100644 --- a/fs/nfs_common/nfslocalio.c +++ b/fs/nfs_common/nfslocalio.c @@ -171,7 +171,26 @@ static bool nfs_uuid_put(nfs_uuid_t *nfs_uuid) /* Walk list of files and ensure their last references dropped */ list_for_each_entry_safe(nfl, tmp, &local_files, list) { - nfs_close_local_fh(nfl); + struct nfsd_file *ro_nf; + struct nfsd_file *rw_nf; + + ro_nf = unrcu_pointer(xchg(&nfl->ro_file, NULL)); + rw_nf = unrcu_pointer(xchg(&nfl->rw_file, NULL)); + + spin_lock(&nfs_uuid->lock); + /* Remove nfl from nfs_uuid->files list */ + list_del_init(&nfl->list); + spin_unlock(&nfs_uuid->lock); + /* Now we can allow racing nfs_close_local_fh() to + * skip the locking. + */ + RCU_INIT_POINTER(nfl->nfs_uuid, NULL); + + if (ro_nf) + nfs_to_nfsd_file_put_local(ro_nf); + if (rw_nf) + nfs_to_nfsd_file_put_local(rw_nf); + cond_resched(); } From 0965a7f65808c2b940e53d5559e14476464554e3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 May 2025 10:46:42 +1000 Subject: [PATCH 2539/2924] nfs_localio: protect race between nfs_uuid_put() and nfs_close_local_fh() [ Upstream commit 21fb44034695185f1dcbcb37354c842cabfe83c1 ] nfs_uuid_put() and nfs_close_local_fh() can race if a "struct nfs_file_localio" is released at the same time that nfsd calls nfs_localio_invalidate_clients(). It is important that neither of these functions completes after the other has started looking at a given nfs_file_localio and before it finishes. If nfs_uuid_put() exits while nfs_close_local_fh() is closing ro_file and rw_file it could return to __nfd_file_cache_purge() while some files are still referenced so the purge may not succeed. If nfs_close_local_fh() exits while nfsd_uuid_put() is still closing the files then the "struct nfs_file_localio" could be freed while nfsd_uuid_put() is still looking at it. This side is currently handled by copying the pointers out of ro_file and rw_file before deleting from the list in nfsd_uuid. We need to preserve this while ensuring that nfsd_uuid_put() does wait for nfs_close_local_fh(). This patch use nfl->uuid and nfl->list to provide the required interlock. nfs_uuid_put() removes the nfs_file_localio from the list, then drops locks and puts the two files, then reclaims the spinlock and sets ->nfs_uuid to NULL. nfs_close_local_fh() operates in the reverse order, setting ->nfs_uuid to NULL, then closing the files, then unlinking from the list. If nfs_uuid_put() finds that ->nfs_uuid is already NULL, it waits for the nfs_file_localio to be removed from the list. If nfs_close_local_fh() find that it has already been unlinked it waits for ->nfs_uuid to become NULL. This ensure that one of the two tries to close the files, but they each waits for the other. As nfs_uuid_put() is making the list empty, change from a list_for_each_safe loop to a while that always takes the first entry. This makes the intent more clear. Also don't move the list to a temporary local list as this would defeat the guarantees required for the interlock. Fixes: 86e00412254a ("nfs: cache all open LOCALIO nfsd_file(s) in client") Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- fs/nfs_common/nfslocalio.c | 81 ++++++++++++++++++++++++++------------ 1 file changed, 56 insertions(+), 25 deletions(-) diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c index 49c59f0c78c629..1dd5a8cca064d8 100644 --- a/fs/nfs_common/nfslocalio.c +++ b/fs/nfs_common/nfslocalio.c @@ -151,8 +151,7 @@ EXPORT_SYMBOL_GPL(nfs_localio_enable_client); */ static bool nfs_uuid_put(nfs_uuid_t *nfs_uuid) { - LIST_HEAD(local_files); - struct nfs_file_localio *nfl, *tmp; + struct nfs_file_localio *nfl; spin_lock(&nfs_uuid->lock); if (unlikely(!rcu_access_pointer(nfs_uuid->net))) { @@ -166,37 +165,49 @@ static bool nfs_uuid_put(nfs_uuid_t *nfs_uuid) nfs_uuid->dom = NULL; } - list_splice_init(&nfs_uuid->files, &local_files); - spin_unlock(&nfs_uuid->lock); - /* Walk list of files and ensure their last references dropped */ - list_for_each_entry_safe(nfl, tmp, &local_files, list) { + + while ((nfl = list_first_entry_or_null(&nfs_uuid->files, + struct nfs_file_localio, + list)) != NULL) { struct nfsd_file *ro_nf; struct nfsd_file *rw_nf; + /* If nfs_uuid is already NULL, nfs_close_local_fh is + * closing and we must wait, else we unlink and close. + */ + if (rcu_access_pointer(nfl->nfs_uuid) == NULL) { + /* nfs_close_local_fh() is doing the + * close and we must wait. until it unlinks + */ + wait_var_event_spinlock(nfl, + list_first_entry_or_null( + &nfs_uuid->files, + struct nfs_file_localio, + list) != nfl, + &nfs_uuid->lock); + continue; + } + ro_nf = unrcu_pointer(xchg(&nfl->ro_file, NULL)); rw_nf = unrcu_pointer(xchg(&nfl->rw_file, NULL)); - spin_lock(&nfs_uuid->lock); /* Remove nfl from nfs_uuid->files list */ list_del_init(&nfl->list); spin_unlock(&nfs_uuid->lock); - /* Now we can allow racing nfs_close_local_fh() to - * skip the locking. - */ - RCU_INIT_POINTER(nfl->nfs_uuid, NULL); - if (ro_nf) nfs_to_nfsd_file_put_local(ro_nf); if (rw_nf) nfs_to_nfsd_file_put_local(rw_nf); - cond_resched(); + spin_lock(&nfs_uuid->lock); + /* Now we can allow racing nfs_close_local_fh() to + * skip the locking. + */ + RCU_INIT_POINTER(nfl->nfs_uuid, NULL); + wake_up_var_locked(&nfl->nfs_uuid, &nfs_uuid->lock); } - spin_lock(&nfs_uuid->lock); - BUG_ON(!list_empty(&nfs_uuid->files)); - /* Remove client from nn->local_clients */ if (nfs_uuid->list_lock) { spin_lock(nfs_uuid->list_lock); @@ -304,23 +315,43 @@ void nfs_close_local_fh(struct nfs_file_localio *nfl) return; } - ro_nf = unrcu_pointer(xchg(&nfl->ro_file, NULL)); - rw_nf = unrcu_pointer(xchg(&nfl->rw_file, NULL)); - spin_lock(&nfs_uuid->lock); - /* Remove nfl from nfs_uuid->files list */ - list_del_init(&nfl->list); + if (!rcu_access_pointer(nfl->nfs_uuid)) { + /* nfs_uuid_put has finished here */ + spin_unlock(&nfs_uuid->lock); + rcu_read_unlock(); + return; + } + if (list_empty(&nfs_uuid->files)) { + /* nfs_uuid_put() has started closing files, wait for it + * to finished + */ + spin_unlock(&nfs_uuid->lock); + rcu_read_unlock(); + wait_var_event(&nfl->nfs_uuid, + rcu_access_pointer(nfl->nfs_uuid) == NULL); + return; + } + /* tell nfs_uuid_put() to wait for us */ + RCU_INIT_POINTER(nfl->nfs_uuid, NULL); spin_unlock(&nfs_uuid->lock); rcu_read_unlock(); - /* Now we can allow racing nfs_close_local_fh() to - * skip the locking. - */ - RCU_INIT_POINTER(nfl->nfs_uuid, NULL); + ro_nf = unrcu_pointer(xchg(&nfl->ro_file, NULL)); + rw_nf = unrcu_pointer(xchg(&nfl->rw_file, NULL)); if (ro_nf) nfs_to_nfsd_file_put_local(ro_nf); if (rw_nf) nfs_to_nfsd_file_put_local(rw_nf); + + /* Remove nfl from nfs_uuid->files list and signal nfs_uuid_put() + * that we are done. The moment we drop the spinlock the + * nfs_uuid could be freed. + */ + spin_lock(&nfs_uuid->lock); + list_del_init(&nfl->list); + wake_up_var_locked(&nfl->nfs_uuid, &nfs_uuid->lock); + spin_unlock(&nfs_uuid->lock); } EXPORT_SYMBOL_GPL(nfs_close_local_fh); From 3f28e08941e340ecdea818323777a909c40f824c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 May 2025 10:46:43 +1000 Subject: [PATCH 2540/2924] nfs_localio: change nfsd_file_put_local() to take a pointer to __rcu pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c25a89770d1f216dcedfc2d25d56b604f62ce0bd ] Instead of calling xchg() and unrcu_pointer() before nfsd_file_put_local(), we now pass pointer to the __rcu pointer and call xchg() and unrcu_pointer() inside that function. Where unrcu_pointer() is currently called the internals of "struct nfsd_file" are not known and that causes older compilers such as gcc-8 to complain. In some cases we have a __kernel (aka normal) pointer not an __rcu pointer so we need to cast it to __rcu first. This is strictly a weakening so no information is lost. Somewhat surprisingly, this cast is accepted by gcc-8. This has the pleasing result that the cmpxchg() which sets ro_file and rw_file, and also the xchg() which clears them, are both now in the nfsd code. Reported-by: Pali Rohár Reported-by: Vincent Mailhol Fixes: 86e00412254a ("nfs: cache all open LOCALIO nfsd_file(s) in client") Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- fs/nfs/localio.c | 11 +++++++++-- fs/nfs_common/nfslocalio.c | 24 ++++++------------------ fs/nfsd/filecache.c | 11 ++++++++--- fs/nfsd/filecache.h | 2 +- include/linux/nfslocalio.h | 23 ++++++++++++----------- 5 files changed, 36 insertions(+), 35 deletions(-) diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index 030a54c8c9d8b1..e6d36b3d3fc059 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -207,9 +207,16 @@ void nfs_local_probe_async(struct nfs_client *clp) } EXPORT_SYMBOL_GPL(nfs_local_probe_async); -static inline void nfs_local_file_put(struct nfsd_file *nf) +static inline void nfs_local_file_put(struct nfsd_file *localio) { - nfs_to_nfsd_file_put_local(nf); + /* nfs_to_nfsd_file_put_local() expects an __rcu pointer + * but we have a __kernel pointer. It is always safe + * to cast a __kernel pointer to an __rcu pointer + * because the cast only weakens what is known about the pointer. + */ + struct nfsd_file __rcu *nf = (struct nfsd_file __rcu*) localio; + + nfs_to_nfsd_file_put_local(&nf); } /* diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c index 1dd5a8cca064d8..05c7c16e37ab4c 100644 --- a/fs/nfs_common/nfslocalio.c +++ b/fs/nfs_common/nfslocalio.c @@ -170,9 +170,6 @@ static bool nfs_uuid_put(nfs_uuid_t *nfs_uuid) while ((nfl = list_first_entry_or_null(&nfs_uuid->files, struct nfs_file_localio, list)) != NULL) { - struct nfsd_file *ro_nf; - struct nfsd_file *rw_nf; - /* If nfs_uuid is already NULL, nfs_close_local_fh is * closing and we must wait, else we unlink and close. */ @@ -189,17 +186,14 @@ static bool nfs_uuid_put(nfs_uuid_t *nfs_uuid) continue; } - ro_nf = unrcu_pointer(xchg(&nfl->ro_file, NULL)); - rw_nf = unrcu_pointer(xchg(&nfl->rw_file, NULL)); - /* Remove nfl from nfs_uuid->files list */ list_del_init(&nfl->list); spin_unlock(&nfs_uuid->lock); - if (ro_nf) - nfs_to_nfsd_file_put_local(ro_nf); - if (rw_nf) - nfs_to_nfsd_file_put_local(rw_nf); + + nfs_to_nfsd_file_put_local(&nfl->ro_file); + nfs_to_nfsd_file_put_local(&nfl->rw_file); cond_resched(); + spin_lock(&nfs_uuid->lock); /* Now we can allow racing nfs_close_local_fh() to * skip the locking. @@ -303,8 +297,6 @@ EXPORT_SYMBOL_GPL(nfs_open_local_fh); void nfs_close_local_fh(struct nfs_file_localio *nfl) { - struct nfsd_file *ro_nf; - struct nfsd_file *rw_nf; nfs_uuid_t *nfs_uuid; rcu_read_lock(); @@ -337,12 +329,8 @@ void nfs_close_local_fh(struct nfs_file_localio *nfl) spin_unlock(&nfs_uuid->lock); rcu_read_unlock(); - ro_nf = unrcu_pointer(xchg(&nfl->ro_file, NULL)); - rw_nf = unrcu_pointer(xchg(&nfl->rw_file, NULL)); - if (ro_nf) - nfs_to_nfsd_file_put_local(ro_nf); - if (rw_nf) - nfs_to_nfsd_file_put_local(rw_nf); + nfs_to_nfsd_file_put_local(&nfl->ro_file); + nfs_to_nfsd_file_put_local(&nfl->rw_file); /* Remove nfl from nfs_uuid->files list and signal nfs_uuid_put() * that we are done. The moment we drop the spinlock the diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index eedf2af8ee6ec1..e108b6c705b459 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -378,11 +378,16 @@ nfsd_file_put(struct nfsd_file *nf) * the reference of the nfsd_file. */ struct net * -nfsd_file_put_local(struct nfsd_file *nf) +nfsd_file_put_local(struct nfsd_file __rcu **pnf) { - struct net *net = nf->nf_net; + struct nfsd_file *nf; + struct net *net = NULL; - nfsd_file_put(nf); + nf = unrcu_pointer(xchg(pnf, NULL)); + if (nf) { + net = nf->nf_net; + nfsd_file_put(nf); + } return net; } diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index cd02f91aaef13d..722b26c71e454a 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -62,7 +62,7 @@ void nfsd_file_cache_shutdown(void); int nfsd_file_cache_start_net(struct net *net); void nfsd_file_cache_shutdown_net(struct net *net); void nfsd_file_put(struct nfsd_file *nf); -struct net *nfsd_file_put_local(struct nfsd_file *nf); +struct net *nfsd_file_put_local(struct nfsd_file __rcu **nf); struct nfsd_file *nfsd_file_get_local(struct nfsd_file *nf); struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); struct file *nfsd_file_file(struct nfsd_file *nf); diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h index c3f34bae60e13b..5c7c92659e736f 100644 --- a/include/linux/nfslocalio.h +++ b/include/linux/nfslocalio.h @@ -50,10 +50,6 @@ void nfs_localio_invalidate_clients(struct list_head *nn_local_clients, spinlock_t *nn_local_clients_lock); /* localio needs to map filehandle -> struct nfsd_file */ -extern struct nfsd_file * -nfsd_open_local_fh(struct net *, struct auth_domain *, struct rpc_clnt *, - const struct cred *, const struct nfs_fh *, - const fmode_t) __must_hold(rcu); void nfs_close_local_fh(struct nfs_file_localio *); struct nfsd_localio_operations { @@ -64,8 +60,9 @@ struct nfsd_localio_operations { struct rpc_clnt *, const struct cred *, const struct nfs_fh *, + struct nfsd_file __rcu **pnf, const fmode_t); - struct net *(*nfsd_file_put_local)(struct nfsd_file *); + struct net *(*nfsd_file_put_local)(struct nfsd_file __rcu **); struct nfsd_file *(*nfsd_file_get_local)(struct nfsd_file *); struct file *(*nfsd_file_file)(struct nfsd_file *); } ____cacheline_aligned; @@ -76,6 +73,7 @@ extern const struct nfsd_localio_operations *nfs_to; struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *, struct rpc_clnt *, const struct cred *, const struct nfs_fh *, struct nfs_file_localio *, + struct nfsd_file __rcu **pnf, const fmode_t); static inline void nfs_to_nfsd_net_put(struct net *net) @@ -90,16 +88,19 @@ static inline void nfs_to_nfsd_net_put(struct net *net) rcu_read_unlock(); } -static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio) +static inline void nfs_to_nfsd_file_put_local(struct nfsd_file __rcu **localio) { /* - * Must not hold RCU otherwise nfsd_file_put() can easily trigger: - * "Voluntary context switch within RCU read-side critical section!" - * by scheduling deep in underlying filesystem (e.g. XFS). + * Either *localio must be guaranteed to be non-NULL, or caller + * must prevent nfsd shutdown from completing as nfs_close_local_fh() + * does by blocking the nfs_uuid from being finally put. */ - struct net *net = nfs_to->nfsd_file_put_local(localio); + struct net *net; - nfs_to_nfsd_net_put(net); + net = nfs_to->nfsd_file_put_local(localio); + + if (net) + nfs_to_nfsd_net_put(net); } #else /* CONFIG_NFS_LOCALIO */ From ed0bdac5eb1bd21500eb0fc9368e15c5890a7b7a Mon Sep 17 00:00:00 2001 From: Anubhav Shelat Date: Thu, 29 May 2025 10:33:35 -0400 Subject: [PATCH 2541/2924] perf trace: Set errpid to false for rseq and set_robust_list [ Upstream commit 8c56bfe53bd881c7b598c54a3a06216743c57bbc ] The 'rseq' and 'set_robust_list' syscalls don't return a pid, so set errpid for both to false. Fixes: 0c1019e3463b263a ("perf trace: Mark the 'rseq' arg in the rseq syscall as coming from user space") Fixes: 1de5b5dcb8353f36 ("perf trace: Mark the 'head' arg in the set_robust_list syscall as coming from user space") Signed-off-by: Anubhav Shelat Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Dapeng Mi Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250529143334.1469669-2-ashelat@redhat.com [ Remove explicit .errpid = false, omitting its initialization zeroes it, as noted by Namhyung ] Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 7304e62b69fee1..33cce59bdfbdb5 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1352,7 +1352,7 @@ static const struct syscall_fmt syscall_fmts[] = { .arg = { [0] = { .scnprintf = SCA_FDAT, /* olddirfd */ }, [2] = { .scnprintf = SCA_FDAT, /* newdirfd */ }, [4] = { .scnprintf = SCA_RENAMEAT2_FLAGS, /* flags */ }, }, }, - { .name = "rseq", .errpid = true, + { .name = "rseq", .arg = { [0] = { .from_user = true /* rseq */, }, }, }, { .name = "rt_sigaction", .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, @@ -1376,7 +1376,7 @@ static const struct syscall_fmt syscall_fmts[] = { { .name = "sendto", .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, [4] = SCA_SOCKADDR_FROM_USER(addr), }, }, - { .name = "set_robust_list", .errpid = true, + { .name = "set_robust_list", .arg = { [0] = { .from_user = true /* head */, }, }, }, { .name = "set_tid_address", .errpid = true, }, { .name = "setitimer", From 61009dd2252ab4391d44a240e891f1e04c00b9ca Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 23 May 2025 14:37:49 +1000 Subject: [PATCH 2542/2924] fs/dax: Fix "don't skip locked entries when scanning entries" [ Upstream commit dd59137bfe70cf3646021b4721e430213b9c71bd ] Commit 6be3e21d25ca ("fs/dax: don't skip locked entries when scanning entries") introduced a new function, wait_entry_unlocked_exclusive(), which waits for the current entry to become unlocked without advancing the XArray iterator state. Waiting for the entry to become unlocked requires dropping the XArray lock. This requires calling xas_pause() prior to dropping the lock which leaves the xas in a suitable state for the next iteration. However this has the side-effect of advancing the xas state to the next index. Normally this isn't an issue because xas_for_each() contains code to detect this state and thus avoid advancing the index a second time on the next loop iteration. However both callers of and wait_entry_unlocked_exclusive() itself subsequently use the xas state to reload the entry. As xas_pause() updated the state to the next index this will cause the current entry which is being waited on to be skipped. This caused the following warning to fire intermittently when running xftest generic/068 on an XFS filesystem with FS DAX enabled: [ 35.067397] ------------[ cut here ]------------ [ 35.068229] WARNING: CPU: 21 PID: 1640 at mm/truncate.c:89 truncate_folio_batch_exceptionals+0xd8/0x1e0 [ 35.069717] Modules linked in: nd_pmem dax_pmem nd_btt nd_e820 libnvdimm [ 35.071006] CPU: 21 UID: 0 PID: 1640 Comm: fstest Not tainted 6.15.0-rc7+ #77 PREEMPT(voluntary) [ 35.072613] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/204 [ 35.074845] RIP: 0010:truncate_folio_batch_exceptionals+0xd8/0x1e0 [ 35.075962] Code: a1 00 00 00 f6 47 0d 20 0f 84 97 00 00 00 4c 63 e8 41 39 c4 7f 0b eb 61 49 83 c5 01 45 39 ec 7e 58 42 f68 [ 35.079522] RSP: 0018:ffffb04e426c7850 EFLAGS: 00010202 [ 35.080359] RAX: 0000000000000000 RBX: ffff9d21e3481908 RCX: ffffb04e426c77f4 [ 35.081477] RDX: ffffb04e426c79e8 RSI: ffffb04e426c79e0 RDI: ffff9d21e34816e8 [ 35.082590] RBP: ffffb04e426c79e0 R08: 0000000000000001 R09: 0000000000000003 [ 35.083733] R10: 0000000000000000 R11: 822b53c0f7a49868 R12: 000000000000001f [ 35.084850] R13: 0000000000000000 R14: ffffb04e426c78e8 R15: fffffffffffffffe [ 35.085953] FS: 00007f9134c87740(0000) GS:ffff9d22abba0000(0000) knlGS:0000000000000000 [ 35.087346] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 35.088244] CR2: 00007f9134c86000 CR3: 000000040afff000 CR4: 00000000000006f0 [ 35.089354] Call Trace: [ 35.089749] [ 35.090168] truncate_inode_pages_range+0xfc/0x4d0 [ 35.091078] truncate_pagecache+0x47/0x60 [ 35.091735] xfs_setattr_size+0xc7/0x3e0 [ 35.092648] xfs_vn_setattr+0x1ea/0x270 [ 35.093437] notify_change+0x1f4/0x510 [ 35.094219] ? do_truncate+0x97/0xe0 [ 35.094879] do_truncate+0x97/0xe0 [ 35.095640] path_openat+0xabd/0xca0 [ 35.096278] do_filp_open+0xd7/0x190 [ 35.096860] do_sys_openat2+0x8a/0xe0 [ 35.097459] __x64_sys_openat+0x6d/0xa0 [ 35.098076] do_syscall_64+0xbb/0x1d0 [ 35.098647] entry_SYSCALL_64_after_hwframe+0x77/0x7f [ 35.099444] RIP: 0033:0x7f9134d81fc1 [ 35.100033] Code: 75 57 89 f0 25 00 00 41 00 3d 00 00 41 00 74 49 80 3d 2a 26 0e 00 00 74 6d 89 da 48 89 ee bf 9c ff ff ff5 [ 35.102993] RSP: 002b:00007ffcd41e0d10 EFLAGS: 00000202 ORIG_RAX: 0000000000000101 [ 35.104263] RAX: ffffffffffffffda RBX: 0000000000000242 RCX: 00007f9134d81fc1 [ 35.105452] RDX: 0000000000000242 RSI: 00007ffcd41e1200 RDI: 00000000ffffff9c [ 35.106663] RBP: 00007ffcd41e1200 R08: 0000000000000000 R09: 0000000000000064 [ 35.107923] R10: 00000000000001a4 R11: 0000000000000202 R12: 0000000000000066 [ 35.109112] R13: 0000000000100000 R14: 0000000000100000 R15: 0000000000000400 [ 35.110357] [ 35.110769] irq event stamp: 8415587 [ 35.111486] hardirqs last enabled at (8415599): [] __up_console_sem+0x52/0x60 [ 35.113067] hardirqs last disabled at (8415610): [] __up_console_sem+0x37/0x60 [ 35.114575] softirqs last enabled at (8415300): [] handle_softirqs+0x315/0x3f0 [ 35.115933] softirqs last disabled at (8415291): [] __irq_exit_rcu+0xa1/0xc0 [ 35.117316] ---[ end trace 0000000000000000 ]--- Fix this by using xas_reset() instead, which is equivalent in implementation to xas_pause() but does not advance the XArray state. Fixes: 6be3e21d25ca ("fs/dax: don't skip locked entries when scanning entries") Signed-off-by: Alistair Popple Link: https://lore.kernel.org/20250523043749.1460780-1-apopple@nvidia.com Reviewed-by: Dan Williams Reviewed-by: Jan Kara Cc: Dan Williams Cc: Alison Schofield Cc: "Matthew Wilcow (Oracle)" Cc: Balbir Singh Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: David Hildenbrand Cc: Jan Kara Cc: John Hubbard Cc: Ted Ts'o Cc: Alexander Viro Cc: Christian Brauner Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/dax.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/dax.c b/fs/dax.c index 676303419e9e8a..f8d8b1afd23244 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -257,7 +257,7 @@ static void *wait_entry_unlocked_exclusive(struct xa_state *xas, void *entry) wq = dax_entry_waitqueue(xas, entry, &ewait.key); prepare_to_wait_exclusive(wq, &ewait.wait, TASK_UNINTERRUPTIBLE); - xas_pause(xas); + xas_reset(xas); xas_unlock_irq(xas); schedule(); finish_wait(wq, &ewait.wait); From aefd954dd83e0f635266e82d2d0f8e257600f230 Mon Sep 17 00:00:00 2001 From: Pekka Ristola Date: Tue, 27 May 2025 20:48:55 +0000 Subject: [PATCH 2543/2924] rust: file: mark `LocalFile` as `repr(transparent)` [ Upstream commit 15ecd83dc06277385ad71dc7ea26911d9a79acaf ] Unsafe code in `LocalFile`'s methods assumes that the type has the same layout as the inner `bindings::file`. This is not guaranteed by the default struct representation in Rust, but requires specifying the `transparent` representation. The `File` struct (which also wraps `bindings::file`) is already marked as `repr(transparent)`, so this change makes their layouts equivalent. Fixes: 851849824bb5 ("rust: file: add Rust abstraction for `struct file`") Closes: https://github.com/Rust-for-Linux/linux/issues/1165 Signed-off-by: Pekka Ristola Link: https://lore.kernel.org/20250527204636.12573-1-pekkarr@protonmail.com Reviewed-by: Benno Lossin Reviewed-by: Alice Ryhl Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- rust/kernel/fs/file.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/rust/kernel/fs/file.rs b/rust/kernel/fs/file.rs index 13a0e44cd1aa81..138693bdeb3fdf 100644 --- a/rust/kernel/fs/file.rs +++ b/rust/kernel/fs/file.rs @@ -219,6 +219,7 @@ unsafe impl AlwaysRefCounted for File { /// must be on the same thread as this file. /// /// [`assume_no_fdget_pos`]: LocalFile::assume_no_fdget_pos +#[repr(transparent)] pub struct LocalFile { inner: Opaque, } From b05dc9f757ecfb8790409d23e449e70232d34a85 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 25 May 2025 12:47:31 +0200 Subject: [PATCH 2544/2924] exportfs: require ->fh_to_parent() to encode connectable file handles [ Upstream commit 5402c4d4d2000a9baa30c1157c97152ec6383733 ] When user requests a connectable file handle explicitly with the AT_HANDLE_CONNECTABLE flag, fail the request if filesystem (e.g. nfs) does not know how to decode a connected non-dir dentry. Fixes: c374196b2b9f ("fs: name_to_handle_at() support for "explicit connectable" file handles") Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/20250525104731.1461704-1-amir73il@gmail.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- include/linux/exportfs.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index fc93f0abf513cd..25c4a5afbd4432 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -314,6 +314,9 @@ static inline bool exportfs_can_decode_fh(const struct export_operations *nop) static inline bool exportfs_can_encode_fh(const struct export_operations *nop, int fh_flags) { + if (!nop) + return false; + /* * If a non-decodeable file handle was requested, we only need to make * sure that filesystem did not opt-out of encoding fid. @@ -321,6 +324,13 @@ static inline bool exportfs_can_encode_fh(const struct export_operations *nop, if (fh_flags & EXPORT_FH_FID) return exportfs_can_encode_fid(nop); + /* + * If a connectable file handle was requested, we need to make sure that + * filesystem can also decode connected file handles. + */ + if ((fh_flags & EXPORT_FH_CONNECTABLE) && !nop->fh_to_parent) + return false; + /* * If a decodeable file handle was requested, we need to make sure that * filesystem can also decode file handles. From 821a88c86640e5b493f4da148eacbc1858c08a9d Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 28 May 2025 21:39:37 -0700 Subject: [PATCH 2545/2924] perf callchain: Always populate the addr_location map when adding IP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a913ef6fd883c05bd6538ed21ee1e773f0d750b7 ] Dropping symbols also meant the callchain maps wasn't populated, but the callchain map is needed to find the DSO. Plumb the symbols option better, falling back to thread__find_map() rather than thread__find_symbol() when symbols are disabled. Fixes: 02b2705017d2e5ad ("perf callchain: Allow symbols to be optional when resolving a callchain") Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Rajeev Cc: Ben Gainey Cc: Chaitanya S Prakash Cc: Charlie Jenkins Cc: Chun-Tse Shao Cc: Colin Ian King Cc: Dmitriy Vyukov Cc: Dr. David Alan Gilbert Cc: Graham Woodward Cc: Howard Chu Cc: Ilkka Koskinen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Krzysztof Łopatowski Cc: Leo Yan Cc: Levi Yun Cc: Li Huafei Cc: linux-arm-kernel@lists.infradead.org Cc: Mark Rutland Cc: Martin Liška Cc: Masami Hiramatsu Cc: Matt Fleming Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Song Liu Cc: Steinar H. Gunderson Cc: Stephen Brennan Cc: Steve Clevenger Cc: Thomas Falcon Cc: Veronika Molnarova Cc: Weilin Wang Cc: Will Deacon Cc: Yicong Yang Cc: Yujie Liu Cc: Zhongqiu Han Cc: Zixian Cai Link: https://lore.kernel.org/r/20250529044000.759937-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/machine.c | 6 ++++-- tools/perf/util/thread.c | 8 ++++++-- tools/perf/util/thread.h | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 2531b373f2cf7c..b048165b10c141 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1976,7 +1976,7 @@ static void ip__resolve_ams(struct thread *thread, * Thus, we have to try consecutively until we find a match * or else, the symbol is unknown */ - thread__find_cpumode_addr_location(thread, ip, &al); + thread__find_cpumode_addr_location(thread, ip, /*symbols=*/true, &al); ams->addr = ip; ams->al_addr = al.addr; @@ -2078,7 +2078,7 @@ static int add_callchain_ip(struct thread *thread, al.sym = NULL; al.srcline = NULL; if (!cpumode) { - thread__find_cpumode_addr_location(thread, ip, &al); + thread__find_cpumode_addr_location(thread, ip, symbols, &al); } else { if (ip >= PERF_CONTEXT_MAX) { switch (ip) { @@ -2106,6 +2106,8 @@ static int add_callchain_ip(struct thread *thread, } if (symbols) thread__find_symbol(thread, *cpumode, ip, &al); + else + thread__find_map(thread, *cpumode, ip, &al); } if (al.sym != NULL) { diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 89585f53c1d5cc..10a01f8fbd4000 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -410,7 +410,7 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bo } void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, - struct addr_location *al) + bool symbols, struct addr_location *al) { size_t i; const u8 cpumodes[] = { @@ -421,7 +421,11 @@ void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, }; for (i = 0; i < ARRAY_SIZE(cpumodes); i++) { - thread__find_symbol(thread, cpumodes[i], addr, al); + if (symbols) + thread__find_symbol(thread, cpumodes[i], addr, al); + else + thread__find_map(thread, cpumodes[i], addr, al); + if (al->map) break; } diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index cd574a896418ac..2b90bbed7a6121 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -126,7 +126,7 @@ struct symbol *thread__find_symbol_fb(struct thread *thread, u8 cpumode, u64 addr, struct addr_location *al); void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, - struct addr_location *al); + bool symbols, struct addr_location *al); int thread__memcpy(struct thread *thread, struct machine *machine, void *buf, u64 ip, int len, bool *is64bit); From c13b779d26b3702fba8f7d5fe757aba5bda85fd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Thu, 26 Dec 2024 19:55:13 +0100 Subject: [PATCH 2546/2924] cifs: Fix validation of SMB1 query reparse point response MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 56e84c64fc257a95728ee73165456b025c48d408 ] Validate the SMB1 query reparse point response per [MS-CIFS] section 2.2.7.2 NT_TRANSACT_IOCTL. NT_TRANSACT_IOCTL response contains one word long setup data after which is ByteCount member. So check that SetupCount is 1 before trying to read and use ByteCount member. Output setup data contains ReturnedDataLen member which is the output length of executed IOCTL command by remote system. So check that output was not truncated before transferring over network. Change MaxSetupCount of NT_TRANSACT_IOCTL request from 4 to 1 as io_rsp structure already expects one word long output setup data. This should prevent server sending incompatible structure (in case it would be extended in future, which is unlikely). Change MaxParameterCount of NT_TRANSACT_IOCTL request from 2 to 0 as NT IOCTL does not have any documented output parameters and this function does not parse any output parameters at all. Fixes: ed3e0a149b58 ("smb: client: implement ->query_reparse_point() for SMB1") Signed-off-by: Pali Rohár Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/cifssmb.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 477792c07d458f..a3ba3346ed313f 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -2753,10 +2753,10 @@ int cifs_query_reparse_point(const unsigned int xid, io_req->TotalParameterCount = 0; io_req->TotalDataCount = 0; - io_req->MaxParameterCount = cpu_to_le32(2); + io_req->MaxParameterCount = cpu_to_le32(0); /* BB find exact data count max from sess structure BB */ io_req->MaxDataCount = cpu_to_le32(CIFSMaxBufSize & 0xFFFFFF00); - io_req->MaxSetupCount = 4; + io_req->MaxSetupCount = 1; io_req->Reserved = 0; io_req->ParameterOffset = 0; io_req->DataCount = 0; @@ -2783,6 +2783,22 @@ int cifs_query_reparse_point(const unsigned int xid, goto error; } + /* SetupCount must be 1, otherwise offset to ByteCount is incorrect. */ + if (io_rsp->SetupCount != 1) { + rc = -EIO; + goto error; + } + + /* + * ReturnedDataLen is output length of executed IOCTL. + * DataCount is output length transferred over network. + * Check that we have full FSCTL_GET_REPARSE_POINT buffer. + */ + if (data_count != le16_to_cpu(io_rsp->ReturnedDataLen)) { + rc = -EIO; + goto error; + } + end = 2 + get_bcc(&io_rsp->hdr) + (__u8 *)&io_rsp->ByteCount; start = (__u8 *)&io_rsp->hdr.Protocol + data_offset; if (start >= end) { From 9eb3b74f49fa1ce2ce00cd6c4c0fdc2ad81b4c16 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Sat, 15 Mar 2025 16:43:02 +0100 Subject: [PATCH 2547/2924] rust: alloc: add missing invariant in Vec::set_len() [ Upstream commit fb1bf1067de979c89ae33589e0466d6ce0dde204 ] When setting a new length, we have to justify that the set length represents the exact number of elements stored in the vector. Reviewed-by: Benno Lossin Reported-by: Alice Ryhl Closes: https://lore.kernel.org/rust-for-linux/20250311-iov-iter-v1-4-f6c9134ea824@google.com Fixes: 2aac4cd7dae3 ("rust: alloc: implement kernel `Vec` type") Link: https://lore.kernel.org/r/20250315154436.65065-2-dakr@kernel.org Signed-off-by: Danilo Krummrich Signed-off-by: Sasha Levin --- rust/kernel/alloc/kvec.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rust/kernel/alloc/kvec.rs b/rust/kernel/alloc/kvec.rs index 87a71fd40c3cad..f62204fe563f58 100644 --- a/rust/kernel/alloc/kvec.rs +++ b/rust/kernel/alloc/kvec.rs @@ -196,6 +196,9 @@ where #[inline] pub unsafe fn set_len(&mut self, new_len: usize) { debug_assert!(new_len <= self.capacity()); + + // INVARIANT: By the safety requirements of this method `new_len` represents the exact + // number of elements stored within `self`. self.len = new_len; } From 5de0311844c06801ae5fce13d2c2a57132ce576b Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 27 Feb 2025 14:42:56 +0100 Subject: [PATCH 2548/2924] rtc: sh: assign correct interrupts with DT [ Upstream commit 8f2efdbc303fe7baa83843d3290dd6ea5ba3276c ] The DT bindings for this driver define the interrupts in the order as they are numbered in the interrupt controller. The old platform_data, however, listed them in a different order. So, for DT based platforms, they are mixed up. Assign them specifically for DT, so we can keep the bindings stable. After the fix, 'rtctest' passes again on the Renesas Genmai board (RZ-A1 / R7S72100). Fixes: dab5aec64bf5 ("rtc: sh: add support for rza series") Signed-off-by: Wolfram Sang Link: https://lore.kernel.org/r/20250227134256.9167-11-wsa+renesas@sang-engineering.com Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/rtc/rtc-sh.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c index 9ea40f40188f3e..3409f576422485 100644 --- a/drivers/rtc/rtc-sh.c +++ b/drivers/rtc/rtc-sh.c @@ -485,9 +485,15 @@ static int __init sh_rtc_probe(struct platform_device *pdev) return -ENOENT; } - rtc->periodic_irq = ret; - rtc->carry_irq = platform_get_irq(pdev, 1); - rtc->alarm_irq = platform_get_irq(pdev, 2); + if (!pdev->dev.of_node) { + rtc->periodic_irq = ret; + rtc->carry_irq = platform_get_irq(pdev, 1); + rtc->alarm_irq = platform_get_irq(pdev, 2); + } else { + rtc->alarm_irq = ret; + rtc->periodic_irq = platform_get_irq(pdev, 1); + rtc->carry_irq = platform_get_irq(pdev, 2); + } res = platform_get_resource(pdev, IORESOURCE_IO, 0); if (!res) From 70fdc1af006b406e49ac4536b9dc73d61d2a3ee7 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Tue, 18 Mar 2025 14:35:37 +0200 Subject: [PATCH 2549/2924] phy: rockchip: samsung-hdptx: Fix clock ratio setup [ Upstream commit 0422253ac1919fea8292381c85f11a9decff1bb1 ] The switch from 1/10 to 1/40 clock ratio must happen when exceeding the 340 MHz rate limit of HDMI 1.4, i.e. when entering the HDMI 2.0 domain, and not before. Therefore, use the correct comparison operator '>' instead of '>=' when checking the max rate. While at it, introduce a define for this rate limit constant. Fixes: 553be2830c5f ("phy: rockchip: Add Samsung HDMI/eDP Combo PHY driver") Reviewed-by: Dmitry Baryshkov Signed-off-by: Cristian Ciocaltea Link: https://lore.kernel.org/r/20250318-phy-sam-hdptx-bpc-v6-3-8cb1678e7663@collabora.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c index 77236f012a1f75..6cfed3fcd647b2 100644 --- a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c +++ b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c @@ -320,6 +320,7 @@ #define LN3_TX_SER_RATE_SEL_HBR2_MASK BIT(3) #define LN3_TX_SER_RATE_SEL_HBR3_MASK BIT(2) +#define HDMI14_MAX_RATE 340000000 #define HDMI20_MAX_RATE 600000000 enum dp_link_rate { @@ -1074,7 +1075,7 @@ static int rk_hdptx_ropll_tmds_mode_config(struct rk_hdptx_phy *hdptx, regmap_write(hdptx->regmap, LNTOP_REG(0200), 0x06); - if (rate >= 3400000) { + if (rate > HDMI14_MAX_RATE / 100) { /* For 1/40 bitrate clk */ rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_tmds_lntop_highbr_seq); } else { From 0f743e05157670994774ba906369bb818e334f1a Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Tue, 18 Mar 2025 14:35:38 +0200 Subject: [PATCH 2550/2924] phy: rockchip: samsung-hdptx: Do no set rk_hdptx_phy->rate in case of errors [ Upstream commit 1f4d382769e3b38dfc498c806811dae856e40f31 ] Ensure rk_hdptx_ropll_tmds_cmn_config() updates hdptx->rate only after all the other operations have been successful. Fixes: c4b09c562086 ("phy: phy-rockchip-samsung-hdptx: Add clock provider support") Signed-off-by: Cristian Ciocaltea Link: https://lore.kernel.org/r/20250318-phy-sam-hdptx-bpc-v6-4-8cb1678e7663@collabora.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c index 6cfed3fcd647b2..61db514ce5cfb5 100644 --- a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c +++ b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c @@ -1008,9 +1008,7 @@ static int rk_hdptx_ropll_tmds_cmn_config(struct rk_hdptx_phy *hdptx, { const struct ropll_config *cfg = NULL; struct ropll_config rc = {0}; - int i; - - hdptx->rate = rate * 100; + int ret, i; for (i = 0; i < ARRAY_SIZE(ropll_tmds_cfg); i++) if (rate == ropll_tmds_cfg[i].bit_rate) { @@ -1065,7 +1063,11 @@ static int rk_hdptx_ropll_tmds_cmn_config(struct rk_hdptx_phy *hdptx, regmap_update_bits(hdptx->regmap, CMN_REG(0086), PLL_PCG_CLK_EN_MASK, FIELD_PREP(PLL_PCG_CLK_EN_MASK, 0x1)); - return rk_hdptx_post_enable_pll(hdptx); + ret = rk_hdptx_post_enable_pll(hdptx); + if (!ret) + hdptx->rate = rate * 100; + + return ret; } static int rk_hdptx_ropll_tmds_mode_config(struct rk_hdptx_phy *hdptx, From 33513c7b99d9488bf1161ceae0768a2aab225fa2 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 10 Apr 2025 17:27:11 +0200 Subject: [PATCH 2551/2924] PCI: pciehp: Ignore Presence Detect Changed caused by DPC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c3be50f7547ccb533284b22f74baf37d3379843e ] Commit a97396c6eb13 ("PCI: pciehp: Ignore Link Down/Up caused by DPC") amended PCIe hotplug to not bring down the slot upon Data Link Layer State Changed events caused by Downstream Port Containment. However Keith reports off-list that if the slot uses in-band presence detect (i.e. Presence Detect State is derived from Data Link Layer Link Active), DPC also causes a spurious Presence Detect Changed event. This needs to be ignored as well. Unfortunately there's no register indicating that in-band presence detect is used. PCIe r5.0 sec 7.5.3.10 introduced the In-Band PD Disable bit in the Slot Control Register. The PCIe hotplug driver sets this bit on ports supporting it. But older ports may still use in-band presence detect. If in-band presence detect can be disabled, Presence Detect Changed events occurring during DPC must not be ignored because they signal device replacement. On all other ports, device replacement cannot be detected reliably because the Presence Detect Changed event could be a side effect of DPC. On those (older) ports, perform a best-effort device replacement check by comparing the Vendor ID, Device ID and other data in Config Space with the values cached in struct pci_dev. Use the existing helper pciehp_device_replaced() to accomplish this. It is currently #ifdef'ed to CONFIG_PM_SLEEP in pciehp_core.c, so move it to pciehp_hpc.c where most other functions accessing config space reside. Reported-by: Keith Busch Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Ilpo Järvinen Link: https://patch.msgid.link/fa264ff71952915c4e35a53c89eb0cde8455a5c5.1744298239.git.lukas@wunner.de Stable-dep-of: 2af781a9edc4 ("PCI: pciehp: Ignore Link Down/Up caused by Secondary Bus Reset") Signed-off-by: Sasha Levin --- drivers/pci/hotplug/pciehp.h | 1 + drivers/pci/hotplug/pciehp_core.c | 29 ------------------ drivers/pci/hotplug/pciehp_hpc.c | 49 ++++++++++++++++++++++++++----- 3 files changed, 43 insertions(+), 36 deletions(-) diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 273dd8c66f4eff..debc79b0adfb2c 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -187,6 +187,7 @@ int pciehp_card_present(struct controller *ctrl); int pciehp_card_present_or_link_active(struct controller *ctrl); int pciehp_check_link_status(struct controller *ctrl); int pciehp_check_link_active(struct controller *ctrl); +bool pciehp_device_replaced(struct controller *ctrl); void pciehp_release_ctrl(struct controller *ctrl); int pciehp_sysfs_enable_slot(struct hotplug_slot *hotplug_slot); diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index 997841c6989359..f59baa91297099 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -284,35 +284,6 @@ static int pciehp_suspend(struct pcie_device *dev) return 0; } -static bool pciehp_device_replaced(struct controller *ctrl) -{ - struct pci_dev *pdev __free(pci_dev_put) = NULL; - u32 reg; - - if (pci_dev_is_disconnected(ctrl->pcie->port)) - return false; - - pdev = pci_get_slot(ctrl->pcie->port->subordinate, PCI_DEVFN(0, 0)); - if (!pdev) - return true; - - if (pci_read_config_dword(pdev, PCI_VENDOR_ID, ®) || - reg != (pdev->vendor | (pdev->device << 16)) || - pci_read_config_dword(pdev, PCI_CLASS_REVISION, ®) || - reg != (pdev->revision | (pdev->class << 8))) - return true; - - if (pdev->hdr_type == PCI_HEADER_TYPE_NORMAL && - (pci_read_config_dword(pdev, PCI_SUBSYSTEM_VENDOR_ID, ®) || - reg != (pdev->subsystem_vendor | (pdev->subsystem_device << 16)))) - return true; - - if (pci_get_dsn(pdev) != ctrl->dsn) - return true; - - return false; -} - static int pciehp_resume_noirq(struct pcie_device *dev) { struct controller *ctrl = get_service_data(dev); diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 8a09fb6083e276..388fbed25402b5 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -563,18 +563,48 @@ void pciehp_power_off_slot(struct controller *ctrl) PCI_EXP_SLTCTL_PWR_OFF); } +bool pciehp_device_replaced(struct controller *ctrl) +{ + struct pci_dev *pdev __free(pci_dev_put) = NULL; + u32 reg; + + if (pci_dev_is_disconnected(ctrl->pcie->port)) + return false; + + pdev = pci_get_slot(ctrl->pcie->port->subordinate, PCI_DEVFN(0, 0)); + if (!pdev) + return true; + + if (pci_read_config_dword(pdev, PCI_VENDOR_ID, ®) || + reg != (pdev->vendor | (pdev->device << 16)) || + pci_read_config_dword(pdev, PCI_CLASS_REVISION, ®) || + reg != (pdev->revision | (pdev->class << 8))) + return true; + + if (pdev->hdr_type == PCI_HEADER_TYPE_NORMAL && + (pci_read_config_dword(pdev, PCI_SUBSYSTEM_VENDOR_ID, ®) || + reg != (pdev->subsystem_vendor | (pdev->subsystem_device << 16)))) + return true; + + if (pci_get_dsn(pdev) != ctrl->dsn) + return true; + + return false; +} + static void pciehp_ignore_dpc_link_change(struct controller *ctrl, - struct pci_dev *pdev, int irq) + struct pci_dev *pdev, int irq, + u16 ignored_events) { /* * Ignore link changes which occurred while waiting for DPC recovery. * Could be several if DPC triggered multiple times consecutively. */ synchronize_hardirq(irq); - atomic_and(~PCI_EXP_SLTSTA_DLLSC, &ctrl->pending_events); + atomic_and(~ignored_events, &ctrl->pending_events); if (pciehp_poll_mode) pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, - PCI_EXP_SLTSTA_DLLSC); + ignored_events); ctrl_info(ctrl, "Slot(%s): Link Down/Up ignored (recovered by DPC)\n", slot_name(ctrl)); @@ -584,8 +614,8 @@ static void pciehp_ignore_dpc_link_change(struct controller *ctrl, * Synthesize it to ensure that it is acted on. */ down_read_nested(&ctrl->reset_lock, ctrl->depth); - if (!pciehp_check_link_active(ctrl)) - pciehp_request(ctrl, PCI_EXP_SLTSTA_DLLSC); + if (!pciehp_check_link_active(ctrl) || pciehp_device_replaced(ctrl)) + pciehp_request(ctrl, ignored_events); up_read(&ctrl->reset_lock); } @@ -736,8 +766,13 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) */ if ((events & PCI_EXP_SLTSTA_DLLSC) && pci_dpc_recovered(pdev) && ctrl->state == ON_STATE) { - events &= ~PCI_EXP_SLTSTA_DLLSC; - pciehp_ignore_dpc_link_change(ctrl, pdev, irq); + u16 ignored_events = PCI_EXP_SLTSTA_DLLSC; + + if (!ctrl->inband_presence_disabled) + ignored_events |= events & PCI_EXP_SLTSTA_PDC; + + events &= ~ignored_events; + pciehp_ignore_dpc_link_change(ctrl, pdev, irq, ignored_events); } /* From 9d684cbe9cb6addf2fe29695b1541c3e5493b8ab Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 10 Apr 2025 17:27:12 +0200 Subject: [PATCH 2552/2924] PCI: pciehp: Ignore Link Down/Up caused by Secondary Bus Reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2af781a9edc4ef5f6684c0710cc3542d9be48b31 ] When a Secondary Bus Reset is issued at a hotplug port, it causes a Data Link Layer State Changed event as a side effect. On hotplug ports using in-band presence detect, it additionally causes a Presence Detect Changed event. These spurious events should not result in teardown and re-enumeration of the device in the slot. Hence commit 2e35afaefe64 ("PCI: pciehp: Add reset_slot() method") masked the Presence Detect Changed Enable bit in the Slot Control register during a Secondary Bus Reset. Commit 06a8d89af551 ("PCI: pciehp: Disable link notification across slot reset") additionally masked the Data Link Layer State Changed Enable bit. However masking those bits only disables interrupt generation (PCIe r6.2 sec 6.7.3.1). The events are still visible in the Slot Status register and picked up by the IRQ handler if it runs during a Secondary Bus Reset. This can happen if the interrupt is shared or if an unmasked hotplug event occurs, e.g. Attention Button Pressed or Power Fault Detected. The likelihood of this happening used to be small, so it wasn't much of a problem in practice. That has changed with the recent introduction of bandwidth control in v6.13-rc1 with commit 665745f27487 ("PCI/bwctrl: Re-add BW notification portdrv as PCIe BW controller"): Bandwidth control shares the interrupt with PCIe hotplug. A Secondary Bus Reset causes a Link Bandwidth Notification, so the hotplug IRQ handler runs, picks up the masked events and tears down the device in the slot. As a result, Joel reports VFIO passthrough failure of a GPU, which Ilpo root-caused to the incorrect handling of masked hotplug events. Clearly, a more reliable way is needed to ignore spurious hotplug events. For Downstream Port Containment, a new ignore mechanism was introduced by commit a97396c6eb13 ("PCI: pciehp: Ignore Link Down/Up caused by DPC"). It has been working reliably for the past four years. Adapt it for Secondary Bus Resets. Introduce two helpers to annotate code sections which cause spurious link changes: pci_hp_ignore_link_change() and pci_hp_unignore_link_change() Use those helpers in lieu of masking interrupts in the Slot Control register. Introduce a helper to check whether such a code section is executing concurrently and if so, await it: pci_hp_spurious_link_change() Invoke the helper in the hotplug IRQ thread pciehp_ist(). Re-use the IRQ thread's existing code which ignores DPC-induced link changes unless the link is unexpectedly down after reset recovery or the device was replaced during the bus reset. That code block in pciehp_ist() was previously only executed if a Data Link Layer State Changed event has occurred. Additionally execute it for Presence Detect Changed events. That's necessary for compatibility with PCIe r1.0 hotplug ports because Data Link Layer State Changed didn't exist before PCIe r1.1. DPC was added with PCIe r3.1 and thus DPC-capable hotplug ports always support Data Link Layer State Changed events. But the same cannot be assumed for Secondary Bus Reset, which already existed in PCIe r1.0. Secondary Bus Reset is only one of many causes of spurious link changes. Others include runtime suspend to D3cold, firmware updates or FPGA reconfiguration. The new pci_hp_{,un}ignore_link_change() helpers may be used by all kinds of drivers to annotate such code sections, hence their declarations are publicly visible in . A case in point is the Mellanox Ethernet driver which disables a firmware reset feature if the Ethernet card is attached to a hotplug port, see commit 3d7a3f2612d7 ("net/mlx5: Nack sync reset request when HotPlug is enabled"). Going forward, PCIe hotplug will be able to cope gracefully with all such use cases once the code sections are properly annotated. The new helpers internally use two bits in struct pci_dev's priv_flags as well as a wait_queue. This mirrors what was done for DPC by commit a97396c6eb13 ("PCI: pciehp: Ignore Link Down/Up caused by DPC"). That may be insufficient if spurious link changes are caused by multiple sources simultaneously. An example might be a Secondary Bus Reset issued by AER during FPGA reconfiguration. If this turns out to happen in real life, support for it can easily be added by replacing the PCI_LINK_CHANGING flag with an atomic_t counter incremented by pci_hp_ignore_link_change() and decremented by pci_hp_unignore_link_change(). Instead of awaiting a zero PCI_LINK_CHANGING flag, the pci_hp_spurious_link_change() helper would then simply await a zero counter. Fixes: 665745f27487 ("PCI/bwctrl: Re-add BW notification portdrv as PCIe BW controller") Reported-by: Joel Mathew Thomas Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219765 Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Tested-by: Joel Mathew Thomas Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Ilpo Järvinen Link: https://patch.msgid.link/d04deaf49d634a2edf42bf3c06ed81b4ca54d17b.1744298239.git.lukas@wunner.de Signed-off-by: Sasha Levin --- drivers/pci/hotplug/pci_hotplug_core.c | 69 ++++++++++++++++++++++++++ drivers/pci/hotplug/pciehp_hpc.c | 35 +++++-------- drivers/pci/pci.h | 3 ++ include/linux/pci.h | 8 +++ 4 files changed, 92 insertions(+), 23 deletions(-) diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c index d30f1316c98e2c..d7fc3bc039643c 100644 --- a/drivers/pci/hotplug/pci_hotplug_core.c +++ b/drivers/pci/hotplug/pci_hotplug_core.c @@ -492,6 +492,75 @@ void pci_hp_destroy(struct hotplug_slot *slot) } EXPORT_SYMBOL_GPL(pci_hp_destroy); +static DECLARE_WAIT_QUEUE_HEAD(pci_hp_link_change_wq); + +/** + * pci_hp_ignore_link_change - begin code section causing spurious link changes + * @pdev: PCI hotplug bridge + * + * Mark the beginning of a code section causing spurious link changes on the + * Secondary Bus of @pdev, e.g. as a side effect of a Secondary Bus Reset, + * D3cold transition, firmware update or FPGA reconfiguration. + * + * Hotplug drivers can thus check whether such a code section is executing + * concurrently, await it with pci_hp_spurious_link_change() and ignore the + * resulting link change events. + * + * Must be paired with pci_hp_unignore_link_change(). May be called both + * from the PCI core and from Endpoint drivers. May be called for bridges + * which are not hotplug-capable, in which case it has no effect because + * no hotplug driver is bound to the bridge. + */ +void pci_hp_ignore_link_change(struct pci_dev *pdev) +{ + set_bit(PCI_LINK_CHANGING, &pdev->priv_flags); + smp_mb__after_atomic(); /* pairs with implied barrier of wait_event() */ +} + +/** + * pci_hp_unignore_link_change - end code section causing spurious link changes + * @pdev: PCI hotplug bridge + * + * Mark the end of a code section causing spurious link changes on the + * Secondary Bus of @pdev. Must be paired with pci_hp_ignore_link_change(). + */ +void pci_hp_unignore_link_change(struct pci_dev *pdev) +{ + set_bit(PCI_LINK_CHANGED, &pdev->priv_flags); + mb(); /* ensure pci_hp_spurious_link_change() sees either bit set */ + clear_bit(PCI_LINK_CHANGING, &pdev->priv_flags); + wake_up_all(&pci_hp_link_change_wq); +} + +/** + * pci_hp_spurious_link_change - check for spurious link changes + * @pdev: PCI hotplug bridge + * + * Check whether a code section is executing concurrently which is causing + * spurious link changes on the Secondary Bus of @pdev. Await the end of the + * code section if so. + * + * May be called by hotplug drivers to check whether a link change is spurious + * and can be ignored. + * + * Because a genuine link change may have occurred in-between a spurious link + * change and the invocation of this function, hotplug drivers should perform + * sanity checks such as retrieving the current link state and bringing down + * the slot if the link is down. + * + * Return: %true if such a code section has been executing concurrently, + * otherwise %false. Also return %true if such a code section has not been + * executing concurrently, but at least once since the last invocation of this + * function. + */ +bool pci_hp_spurious_link_change(struct pci_dev *pdev) +{ + wait_event(pci_hp_link_change_wq, + !test_bit(PCI_LINK_CHANGING, &pdev->priv_flags)); + + return test_and_clear_bit(PCI_LINK_CHANGED, &pdev->priv_flags); +} + static int __init pci_hotplug_init(void) { int result; diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 388fbed25402b5..ebd342bda235d4 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -592,21 +592,21 @@ bool pciehp_device_replaced(struct controller *ctrl) return false; } -static void pciehp_ignore_dpc_link_change(struct controller *ctrl, - struct pci_dev *pdev, int irq, - u16 ignored_events) +static void pciehp_ignore_link_change(struct controller *ctrl, + struct pci_dev *pdev, int irq, + u16 ignored_events) { /* * Ignore link changes which occurred while waiting for DPC recovery. * Could be several if DPC triggered multiple times consecutively. + * Also ignore link changes caused by Secondary Bus Reset, etc. */ synchronize_hardirq(irq); atomic_and(~ignored_events, &ctrl->pending_events); if (pciehp_poll_mode) pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, ignored_events); - ctrl_info(ctrl, "Slot(%s): Link Down/Up ignored (recovered by DPC)\n", - slot_name(ctrl)); + ctrl_info(ctrl, "Slot(%s): Link Down/Up ignored\n", slot_name(ctrl)); /* * If the link is unexpectedly down after successful recovery, @@ -762,9 +762,11 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) /* * Ignore Link Down/Up events caused by Downstream Port Containment - * if recovery from the error succeeded. + * if recovery succeeded, or caused by Secondary Bus Reset, + * suspend to D3cold, firmware update, FPGA reconfiguration, etc. */ - if ((events & PCI_EXP_SLTSTA_DLLSC) && pci_dpc_recovered(pdev) && + if ((events & (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC)) && + (pci_dpc_recovered(pdev) || pci_hp_spurious_link_change(pdev)) && ctrl->state == ON_STATE) { u16 ignored_events = PCI_EXP_SLTSTA_DLLSC; @@ -772,7 +774,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) ignored_events |= events & PCI_EXP_SLTSTA_PDC; events &= ~ignored_events; - pciehp_ignore_dpc_link_change(ctrl, pdev, irq, ignored_events); + pciehp_ignore_link_change(ctrl, pdev, irq, ignored_events); } /* @@ -937,7 +939,6 @@ int pciehp_reset_slot(struct hotplug_slot *hotplug_slot, bool probe) { struct controller *ctrl = to_ctrl(hotplug_slot); struct pci_dev *pdev = ctrl_dev(ctrl); - u16 stat_mask = 0, ctrl_mask = 0; int rc; if (probe) @@ -945,23 +946,11 @@ int pciehp_reset_slot(struct hotplug_slot *hotplug_slot, bool probe) down_write_nested(&ctrl->reset_lock, ctrl->depth); - if (!ATTN_BUTTN(ctrl)) { - ctrl_mask |= PCI_EXP_SLTCTL_PDCE; - stat_mask |= PCI_EXP_SLTSTA_PDC; - } - ctrl_mask |= PCI_EXP_SLTCTL_DLLSCE; - stat_mask |= PCI_EXP_SLTSTA_DLLSC; - - pcie_write_cmd(ctrl, 0, ctrl_mask); - ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, - pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, 0); + pci_hp_ignore_link_change(pdev); rc = pci_bridge_secondary_bus_reset(ctrl->pcie->port); - pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, stat_mask); - pcie_write_cmd_nowait(ctrl, ctrl_mask, ctrl_mask); - ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, - pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, ctrl_mask); + pci_hp_unignore_link_change(pdev); up_write(&ctrl->reset_lock); return rc; diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index b81e99cd4b62a3..7db798bdcaaae6 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -227,6 +227,7 @@ static inline int pci_proc_detach_bus(struct pci_bus *bus) { return 0; } /* Functions for PCI Hotplug drivers to use */ int pci_hp_add_bridge(struct pci_dev *dev); +bool pci_hp_spurious_link_change(struct pci_dev *pdev); #if defined(CONFIG_SYSFS) && defined(HAVE_PCI_LEGACY) void pci_create_legacy_files(struct pci_bus *bus); @@ -557,6 +558,8 @@ static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused) #define PCI_DPC_RECOVERED 1 #define PCI_DPC_RECOVERING 2 #define PCI_DEV_REMOVED 3 +#define PCI_LINK_CHANGED 4 +#define PCI_LINK_CHANGING 5 static inline void pci_dev_assign_added(struct pci_dev *dev) { diff --git a/include/linux/pci.h b/include/linux/pci.h index 51e2bd6405cda5..081e5c0a3ddf4e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1850,6 +1850,14 @@ static inline bool pcie_aspm_support_enabled(void) { return false; } static inline bool pcie_aspm_enabled(struct pci_dev *pdev) { return false; } #endif +#ifdef CONFIG_HOTPLUG_PCI +void pci_hp_ignore_link_change(struct pci_dev *pdev); +void pci_hp_unignore_link_change(struct pci_dev *pdev); +#else +static inline void pci_hp_ignore_link_change(struct pci_dev *pdev) { } +static inline void pci_hp_unignore_link_change(struct pci_dev *pdev) { } +#endif + #ifdef CONFIG_PCIEAER bool pci_aer_available(void); #else From 33d2596e6e46d96406c80c0931f3cfae23e5d6f9 Mon Sep 17 00:00:00 2001 From: Wilfred Mallawa Date: Mon, 14 Apr 2025 10:15:06 +1000 Subject: [PATCH 2553/2924] PCI: Print the actual delay time in pci_bridge_wait_for_secondary_bus() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d24eba726aadf8778f2907dd42281c6380b0ccaa ] Print the delay amount that pcie_wait_for_link_delay() is invoked with instead of the hardcoded 1000ms value in the debug info print. Fixes: 7b3ba09febf4 ("PCI/PM: Shorten pci_bridge_wait_for_secondary_bus() wait time for slow links") Signed-off-by: Wilfred Mallawa Signed-off-by: Bjorn Helgaas Reviewed-by: Damien Le Moal Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Mika Westerberg Reviewed-by: Ilpo Järvinen Link: https://patch.msgid.link/20250414001505.21243-2-wilfred.opensource@gmail.com Signed-off-by: Sasha Levin --- drivers/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e77d5b53c0cec9..4d84ed41248442 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4954,7 +4954,7 @@ int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type) delay); if (!pcie_wait_for_link_delay(dev, true, delay)) { /* Did not train, no need to wait any further */ - pci_info(dev, "Data Link Layer Link Active not set in 1000 msec\n"); + pci_info(dev, "Data Link Layer Link Active not set in %d msec\n", delay); return -ENOTTY; } From 17844456ebee8aff13ab14c1f9e1ff56573e4fb7 Mon Sep 17 00:00:00 2001 From: Jensen Huang Date: Fri, 28 Mar 2025 18:58:22 +0800 Subject: [PATCH 2554/2924] PCI: rockchip: Fix order of rockchip_pci_core_rsts [ Upstream commit c7540e5423d7f588c7210a9941ceb6a836963ccc ] The order of rockchip_pci_core_rsts introduced in the offending commit followed the previous comment that warned not to reorder them. But the commit failed to take into account that reset_control_bulk_deassert() deasserts the resets in reverse order. So this leads to the link getting downgraded to 2.5 GT/s. Hence, restore the deassert order and also add back the comments for rockchip_pci_core_rsts. Tested on NanoPC-T4 with Samsung 970 Pro. Fixes: 18715931a5c0 ("PCI: rockchip: Simplify reset control handling by using reset_control_bulk*() function") Signed-off-by: Jensen Huang [mani: reworded the commit message and the comment above rockchip_pci_core_rsts] Signed-off-by: Manivannan Sadhasivam Reviewed-by: Manivannan Sadhasivam Acked-by: Shawn Lin Link: https://patch.msgid.link/20250328105822.3946767-1-jensenhuang@friendlyarm.com Signed-off-by: Sasha Levin --- drivers/pci/controller/pcie-rockchip.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/pci/controller/pcie-rockchip.h b/drivers/pci/controller/pcie-rockchip.h index 14954f43e5e9af..5864a20323f21a 100644 --- a/drivers/pci/controller/pcie-rockchip.h +++ b/drivers/pci/controller/pcie-rockchip.h @@ -319,11 +319,12 @@ static const char * const rockchip_pci_pm_rsts[] = { "aclk", }; +/* NOTE: Do not reorder the deassert sequence of the following reset pins */ static const char * const rockchip_pci_core_rsts[] = { - "mgmt-sticky", - "core", - "mgmt", "pipe", + "mgmt", + "core", + "mgmt-sticky", }; struct rockchip_pcie { From 5448c2f6b1cad0f38c90ffd5494b8aa4a570edc7 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Fri, 28 Mar 2025 15:30:44 +0100 Subject: [PATCH 2555/2924] PCI: rcar-gen4: set ep BAR4 fixed size [ Upstream commit b584ab12d59f646b9254b2b16ff197d612fd4935 ] On rcar-gen4, the ep BAR4 has a fixed size of 256B. Document this constraint in the epc features of the platform. Fixes: e311b3834dfa ("PCI: rcar-gen4: Add endpoint mode support") Signed-off-by: Jerome Brunet Signed-off-by: Manivannan Sadhasivam Reviewed-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20250328-rcar-gen4-bar4-v1-1-10bb6ce9ee7f@baylibre.com Signed-off-by: Sasha Levin --- drivers/pci/controller/dwc/pcie-rcar-gen4.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/controller/dwc/pcie-rcar-gen4.c b/drivers/pci/controller/dwc/pcie-rcar-gen4.c index fc872dd35029c0..02638ec442e701 100644 --- a/drivers/pci/controller/dwc/pcie-rcar-gen4.c +++ b/drivers/pci/controller/dwc/pcie-rcar-gen4.c @@ -403,6 +403,7 @@ static const struct pci_epc_features rcar_gen4_pcie_epc_features = { .msix_capable = false, .bar[BAR_1] = { .type = BAR_RESERVED, }, .bar[BAR_3] = { .type = BAR_RESERVED, }, + .bar[BAR_4] = { .type = BAR_FIXED, .fixed_size = 256 }, .bar[BAR_5] = { .type = BAR_RESERVED, }, .align = SZ_1M, }; From f8fc38df71cf43208a5c0d9d50244506cdd59be3 Mon Sep 17 00:00:00 2001 From: Hans Zhang <18255117159@163.com> Date: Sat, 19 Apr 2025 21:30:58 +0800 Subject: [PATCH 2556/2924] PCI: cadence: Fix runtime atomic count underflow [ Upstream commit 8805f32a96d3b97cef07999fa6f52112678f7e65 ] If the call to pci_host_probe() in cdns_pcie_host_setup() fails, PM runtime count is decremented in the error path using pm_runtime_put_sync(). But the runtime count is not incremented by this driver, but only by the callers (cdns_plat_pcie_probe/j721e_pcie_probe). And the callers also decrement the runtime PM count in their error path. So this leads to the below warning from the PM core: "runtime PM usage count underflow!" So fix it by getting rid of pm_runtime_put_sync() in the error path and directly return the errno. Fixes: 49e427e6bdd1 ("Merge branch 'pci/host-probe-refactor'") Signed-off-by: Hans Zhang <18255117159@163.com> Signed-off-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20250419133058.162048-1-18255117159@163.com Signed-off-by: Sasha Levin --- drivers/pci/controller/cadence/pcie-cadence-host.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/pci/controller/cadence/pcie-cadence-host.c b/drivers/pci/controller/cadence/pcie-cadence-host.c index 8af95e9da7cec6..741e10a575ec75 100644 --- a/drivers/pci/controller/cadence/pcie-cadence-host.c +++ b/drivers/pci/controller/cadence/pcie-cadence-host.c @@ -570,14 +570,5 @@ int cdns_pcie_host_setup(struct cdns_pcie_rc *rc) if (!bridge->ops) bridge->ops = &cdns_pcie_host_ops; - ret = pci_host_probe(bridge); - if (ret < 0) - goto err_init; - - return 0; - - err_init: - pm_runtime_put_sync(dev); - - return ret; + return pci_host_probe(bridge); } From 16c529aac3270f60597f01153f3779e0c7b7fab0 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Tue, 1 Apr 2025 10:17:11 +0100 Subject: [PATCH 2557/2924] PCI: apple: Use gpiod_set_value_cansleep in probe flow [ Upstream commit 7334364f9de79a9a236dd0243ba574b8d2876e89 ] We're allowed to sleep here, so tell the GPIO core by using gpiod_set_value_cansleep instead of gpiod_set_value. Fixes: 1e33888fbe44 ("PCI: apple: Add initial hardware bring-up") Signed-off-by: Hector Martin Signed-off-by: Alyssa Rosenzweig Signed-off-by: Marc Zyngier Signed-off-by: Manivannan Sadhasivam Tested-by: Janne Grunau Reviewed-by: Rob Herring (Arm) Reviewed-by: Manivannan Sadhasivam Acked-by: Alyssa Rosenzweig Link: https://patch.msgid.link/20250401091713.2765724-12-maz@kernel.org Signed-off-by: Sasha Levin --- drivers/pci/controller/pcie-apple.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/pcie-apple.c b/drivers/pci/controller/pcie-apple.c index 18e11b9a7f4647..3d778d8b018756 100644 --- a/drivers/pci/controller/pcie-apple.c +++ b/drivers/pci/controller/pcie-apple.c @@ -540,7 +540,7 @@ static int apple_pcie_setup_port(struct apple_pcie *pcie, rmw_set(PORT_APPCLK_EN, port->base + PORT_APPCLK); /* Assert PERST# before setting up the clock */ - gpiod_set_value(reset, 1); + gpiod_set_value_cansleep(reset, 1); ret = apple_pcie_setup_refclk(pcie, port); if (ret < 0) @@ -551,7 +551,7 @@ static int apple_pcie_setup_port(struct apple_pcie *pcie, /* Deassert PERST# */ rmw_set(PORT_PERST_OFF, port->base + PORT_PERST); - gpiod_set_value(reset, 0); + gpiod_set_value_cansleep(reset, 0); /* Wait for 100ms after PERST# deassertion (PCIe r5.0, 6.6.1) */ msleep(100); From c7686afd37c93c7d26221ad70e5e43b3fd01505a Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Wed, 16 Apr 2025 16:13:14 +0800 Subject: [PATCH 2558/2924] PCI: imx6: Save and restore the LUT setting during suspend/resume for i.MX95 SoC [ Upstream commit e4d66131caaf18d7c3c69914513f4be0519ddaaf ] The look up table (LUT) setting would be lost during the PCIe suspend on i.MX95 SoC. So to ensure proper functionality after resume, save it during suspend and restore it while resuming. Fixes: 9d6b1bd6b3c8 ("PCI: imx6: Add i.MX8MQ, i.MX8Q and i.MX95 PM support") Signed-off-by: Richard Zhu [mani: subject and description rewording] Signed-off-by: Manivannan Sadhasivam Reviewed-by: Frank Li Reviewed-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20250416081314.3929794-8-hongxing.zhu@nxp.com Signed-off-by: Sasha Levin --- drivers/pci/controller/dwc/pci-imx6.c | 47 +++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c index 5f267dd261b51e..ea5c06371171ff 100644 --- a/drivers/pci/controller/dwc/pci-imx6.c +++ b/drivers/pci/controller/dwc/pci-imx6.c @@ -129,6 +129,11 @@ struct imx_pcie_drvdata { const struct dw_pcie_host_ops *ops; }; +struct imx_lut_data { + u32 data1; + u32 data2; +}; + struct imx_pcie { struct dw_pcie *pci; struct gpio_desc *reset_gpiod; @@ -148,6 +153,8 @@ struct imx_pcie { struct regulator *vph; void __iomem *phy_base; + /* LUT data for pcie */ + struct imx_lut_data luts[IMX95_MAX_LUT]; /* power domain for pcie */ struct device *pd_pcie; /* power domain for pcie phy */ @@ -1386,6 +1393,42 @@ static void imx_pcie_msi_save_restore(struct imx_pcie *imx_pcie, bool save) } } +static void imx_pcie_lut_save(struct imx_pcie *imx_pcie) +{ + u32 data1, data2; + int i; + + for (i = 0; i < IMX95_MAX_LUT; i++) { + regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_ACSCTRL, + IMX95_PEO_LUT_RWA | i); + regmap_read(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA1, &data1); + regmap_read(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA2, &data2); + if (data1 & IMX95_PE0_LUT_VLD) { + imx_pcie->luts[i].data1 = data1; + imx_pcie->luts[i].data2 = data2; + } else { + imx_pcie->luts[i].data1 = 0; + imx_pcie->luts[i].data2 = 0; + } + } +} + +static void imx_pcie_lut_restore(struct imx_pcie *imx_pcie) +{ + int i; + + for (i = 0; i < IMX95_MAX_LUT; i++) { + if ((imx_pcie->luts[i].data1 & IMX95_PE0_LUT_VLD) == 0) + continue; + + regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA1, + imx_pcie->luts[i].data1); + regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA2, + imx_pcie->luts[i].data2); + regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_ACSCTRL, i); + } +} + static int imx_pcie_suspend_noirq(struct device *dev) { struct imx_pcie *imx_pcie = dev_get_drvdata(dev); @@ -1394,6 +1437,8 @@ static int imx_pcie_suspend_noirq(struct device *dev) return 0; imx_pcie_msi_save_restore(imx_pcie, true); + if (imx_check_flag(imx_pcie, IMX_PCIE_FLAG_HAS_LUT)) + imx_pcie_lut_save(imx_pcie); if (imx_check_flag(imx_pcie, IMX_PCIE_FLAG_BROKEN_SUSPEND)) { /* * The minimum for a workaround would be to set PERST# and to @@ -1438,6 +1483,8 @@ static int imx_pcie_resume_noirq(struct device *dev) if (ret) return ret; } + if (imx_check_flag(imx_pcie, IMX_PCIE_FLAG_HAS_LUT)) + imx_pcie_lut_restore(imx_pcie); imx_pcie_msi_save_restore(imx_pcie, false); return 0; From 0c33117f00c8c5363c22676931b22ae5041f7603 Mon Sep 17 00:00:00 2001 From: Chenyuan Yang Date: Mon, 14 Apr 2025 07:50:50 -0500 Subject: [PATCH 2559/2924] phy: qcom-qmp-usb: Fix an NULL vs IS_ERR() bug [ Upstream commit d14402a38c2d868cacb1facaf9be908ca6558e59 ] The qmp_usb_iomap() helper function currently returns the raw result of devm_ioremap() for non-exclusive mappings. Since devm_ioremap() may return a NULL pointer and the caller only checks error pointers with IS_ERR(), NULL could bypass the check and lead to an invalid dereference. Fix the issue by checking if devm_ioremap() returns NULL. When it does, qmp_usb_iomap() now returns an error pointer via IOMEM_ERR_PTR(-ENOMEM), ensuring safe and consistent error handling. Signed-off-by: Chenyuan Yang Fixes: a5d6b1ac56cb ("phy: qcom-qmp-usb: fix memleak on probe deferral") CC: Johan Hovold CC: Krzysztof Kozlowski Reviewed-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250414125050.2118619-1-chenyuan0y@gmail.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 78772157045752..ed646a7e705ba3 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -2106,12 +2106,16 @@ static void __iomem *qmp_usb_iomap(struct device *dev, struct device_node *np, int index, bool exclusive) { struct resource res; + void __iomem *mem; if (!exclusive) { if (of_address_to_resource(np, index, &res)) return IOMEM_ERR_PTR(-EINVAL); - return devm_ioremap(dev, res.start, resource_size(&res)); + mem = devm_ioremap(dev, res.start, resource_size(&res)); + if (!mem) + return IOMEM_ERR_PTR(-ENOMEM); + return mem; } return devm_of_iomap(dev, np, index, NULL); From 1ec47c5f3cb2d475828754f7b751c40e465b8d69 Mon Sep 17 00:00:00 2001 From: Kathiravan Thirumoorthy Date: Tue, 15 Apr 2025 09:52:50 +0530 Subject: [PATCH 2560/2924] Revert "phy: qcom-qusb2: add QUSB2 support for IPQ5424" [ Upstream commit 8a040e13afd94a1f91acaf8e0505769d4f7f5af4 ] With the current settings, compliance tests especially eye diagram (Host High-speed Signal Quality) tests are failing. Reuse the IPQ6018 settings to overcome this issue, as mentioned in the Hardware Design Document. So revert the change which introduced the new settings and reuse the IPQ6018 settings in the subsequent patch. Fixes: 9c56a1de296e ("phy: qcom-qusb2: add QUSB2 support for IPQ5424") Signed-off-by: Kathiravan Thirumoorthy Link: https://lore.kernel.org/r/20250415-revert_hs_phy_settings-v3-1-3a8f86211b59@oss.qualcomm.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/qualcomm/phy-qcom-qusb2.c | 28 --------------------------- 1 file changed, 28 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qusb2.c b/drivers/phy/qualcomm/phy-qcom-qusb2.c index 1f5f7df14d5a2f..81b9e9349c3ebb 100644 --- a/drivers/phy/qualcomm/phy-qcom-qusb2.c +++ b/drivers/phy/qualcomm/phy-qcom-qusb2.c @@ -151,21 +151,6 @@ static const struct qusb2_phy_init_tbl ipq6018_init_tbl[] = { QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_AUTOPGM_CTL1, 0x9F), }; -static const struct qusb2_phy_init_tbl ipq5424_init_tbl[] = { - QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL, 0x14), - QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE1, 0x00), - QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE2, 0x53), - QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE4, 0xc3), - QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_TUNE, 0x30), - QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_USER_CTL1, 0x79), - QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_USER_CTL2, 0x21), - QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE5, 0x00), - QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_PWR_CTRL, 0x00), - QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TEST2, 0x14), - QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_TEST, 0x80), - QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_AUTOPGM_CTL1, 0x9f), -}; - static const struct qusb2_phy_init_tbl qcs615_init_tbl[] = { QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE1, 0xc8), QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE2, 0xb3), @@ -359,16 +344,6 @@ static const struct qusb2_phy_cfg ipq6018_phy_cfg = { .autoresume_en = BIT(0), }; -static const struct qusb2_phy_cfg ipq5424_phy_cfg = { - .tbl = ipq5424_init_tbl, - .tbl_num = ARRAY_SIZE(ipq5424_init_tbl), - .regs = ipq6018_regs_layout, - - .disable_ctrl = POWER_DOWN, - .mask_core_ready = PLL_LOCKED, - .autoresume_en = BIT(0), -}; - static const struct qusb2_phy_cfg qcs615_phy_cfg = { .tbl = qcs615_init_tbl, .tbl_num = ARRAY_SIZE(qcs615_init_tbl), @@ -954,9 +929,6 @@ static const struct phy_ops qusb2_phy_gen_ops = { static const struct of_device_id qusb2_phy_of_match_table[] = { { - .compatible = "qcom,ipq5424-qusb2-phy", - .data = &ipq5424_phy_cfg, - }, { .compatible = "qcom,ipq6018-qusb2-phy", .data = &ipq6018_phy_cfg, }, { From f006ad318fa7c52c3daa978a04a8782f3dc6f2da Mon Sep 17 00:00:00 2001 From: Kathiravan Thirumoorthy Date: Tue, 15 Apr 2025 09:52:51 +0530 Subject: [PATCH 2561/2924] phy: qcom-qusb2: reuse the IPQ6018 settings for IPQ5424 [ Upstream commit 25c36b54eafc98b3ef004e2037cea1328d9b8bc5 ] With the settings used in the commit 9c56a1de296e ("phy: qcom-qusb2: add QUSB2 support for IPQ5424"), compliance test cases especially eye-diagram (Host High-speed Signal Quality) tests are failing. Reuse the IPQ6018 settings for IPQ5424 as mentioned in the Hardware Design Document which helps to meet all the complaince requirement test cases. Fixes: 9c56a1de296e ("phy: qcom-qusb2: add QUSB2 support for IPQ5424") Signed-off-by: Kathiravan Thirumoorthy Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250415-revert_hs_phy_settings-v3-2-3a8f86211b59@oss.qualcomm.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/qualcomm/phy-qcom-qusb2.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qusb2.c b/drivers/phy/qualcomm/phy-qcom-qusb2.c index 81b9e9349c3ebb..49c37c53b38e70 100644 --- a/drivers/phy/qualcomm/phy-qcom-qusb2.c +++ b/drivers/phy/qualcomm/phy-qcom-qusb2.c @@ -929,6 +929,9 @@ static const struct phy_ops qusb2_phy_gen_ops = { static const struct of_device_id qusb2_phy_of_match_table[] = { { + .compatible = "qcom,ipq5424-qusb2-phy", + .data = &ipq6018_phy_cfg, + }, { .compatible = "qcom,ipq6018-qusb2-phy", .data = &ipq6018_phy_cfg, }, { From ce841128837fff4da55c63447bbdee196f38aab0 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Thu, 8 May 2025 14:20:29 +0800 Subject: [PATCH 2562/2924] soundwire: only compute port params in specific stream states MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 62ada17a6217a50fbd1b23f10899890f56effc97 ] Currently, sdw_compute_master_ports() is blindly called for every single Manager runtime. However, we should not take into account the stream's bandwidth if the stream is just allocated or already deprepared. Fixes: 25befdf32aa4 ("soundwire: generic_bandwidth_allocation: count the bandwidth of active streams only") Link: https://github.com/thesofproject/linux/issues/5398 Signed-off-by: Bard Liao Reviewed-by: Ranjani Sridharan Reviewed-by: Péter Ujfalusi Link: https://lore.kernel.org/r/20250508062029.6596-1-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/soundwire/generic_bandwidth_allocation.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/soundwire/generic_bandwidth_allocation.c b/drivers/soundwire/generic_bandwidth_allocation.c index 1cfaccf43eac50..c18f0c16f92973 100644 --- a/drivers/soundwire/generic_bandwidth_allocation.c +++ b/drivers/soundwire/generic_bandwidth_allocation.c @@ -204,6 +204,13 @@ static void _sdw_compute_port_params(struct sdw_bus *bus, port_bo = 1; list_for_each_entry(m_rt, &bus->m_rt_list, bus_node) { + /* + * Only runtimes with CONFIGURED, PREPARED, ENABLED, and DISABLED + * states should be included in the bandwidth calculation. + */ + if (m_rt->stream->state > SDW_STREAM_DISABLED || + m_rt->stream->state < SDW_STREAM_CONFIGURED) + continue; sdw_compute_master_ports(m_rt, ¶ms[i], &port_bo, hstop); } From 643db430f4cbd91dd2b63c49d62d0abb6debc13b Mon Sep 17 00:00:00 2001 From: Henry Martin Date: Wed, 2 Apr 2025 10:39:00 +0800 Subject: [PATCH 2563/2924] dmaengine: ti: Add NULL check in udma_probe() [ Upstream commit fd447415e74bccd7362f760d4ea727f8e1ebfe91 ] devm_kasprintf() returns NULL when memory allocation fails. Currently, udma_probe() does not check for this case, which results in a NULL pointer dereference. Add NULL check after devm_kasprintf() to prevent this issue. Fixes: 25dcb5dd7b7c ("dmaengine: ti: New driver for K3 UDMA") Signed-off-by: Henry Martin Reviewed-by: Nathan Lynch Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20250402023900.43440-1-bsdhenrymartin@gmail.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/ti/k3-udma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index b6255c0601bb29..aa2dc762140f6e 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -5624,7 +5624,8 @@ static int udma_probe(struct platform_device *pdev) uc->config.dir = DMA_MEM_TO_MEM; uc->name = devm_kasprintf(dev, GFP_KERNEL, "%s chan%d", dev_name(dev), i); - + if (!uc->name) + return -ENOMEM; vchan_init(&uc->vc, &ud->ddev); /* Use custom vchan completion handling */ tasklet_setup(&uc->vc.task, udma_vchan_complete); From d9470e94f1511b0b592d6d669089c871072d0df7 Mon Sep 17 00:00:00 2001 From: Zhe Qiao Date: Wed, 30 Apr 2025 14:06:03 +0800 Subject: [PATCH 2564/2924] PCI/ACPI: Fix allocated memory release on error in pci_acpi_scan_root() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 631b2af2f35737750af284be22e63da56bf20139 ] In the pci_acpi_scan_root() function, when creating a PCI bus fails, we need to free up the previously allocated memory, which can avoid invalid memory usage and save resources. Fixes: 789befdfa389 ("arm64: PCI: Migrate ACPI related functions to pci-acpi.c") Signed-off-by: Zhe Qiao [kwilczynski: commit log] Signed-off-by: Krzysztof Wilczyński Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20250430060603.381504-1-qiaozhe@iscas.ac.cn Signed-off-by: Sasha Levin --- drivers/pci/pci-acpi.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index af370628e58393..b78e0e41732445 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -1676,24 +1676,19 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) return NULL; root_ops = kzalloc(sizeof(*root_ops), GFP_KERNEL); - if (!root_ops) { - kfree(ri); - return NULL; - } + if (!root_ops) + goto free_ri; ri->cfg = pci_acpi_setup_ecam_mapping(root); - if (!ri->cfg) { - kfree(ri); - kfree(root_ops); - return NULL; - } + if (!ri->cfg) + goto free_root_ops; root_ops->release_info = pci_acpi_generic_release_info; root_ops->prepare_resources = pci_acpi_root_prepare_resources; root_ops->pci_ops = (struct pci_ops *)&ri->cfg->ops->pci_ops; bus = acpi_pci_root_create(root, root_ops, &ri->common, ri->cfg); if (!bus) - return NULL; + goto free_cfg; /* If we must preserve the resource configuration, claim now */ host = pci_find_host_bridge(bus); @@ -1710,6 +1705,14 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) pcie_bus_configure_settings(child); return bus; + +free_cfg: + pci_ecam_free(ri->cfg); +free_root_ops: + kfree(root_ops); +free_ri: + kfree(ri); + return NULL; } void pcibios_add_bus(struct pci_bus *bus) From 2d50d734cad39c1fcf115520f72285aee63a3228 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 22 May 2025 18:21:07 -0500 Subject: [PATCH 2565/2924] PCI/DPC: Initialize aer_err_info before using it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a424b598e6a6c1e69a2bb801d6fd16e805ab2c38 ] Previously the struct aer_err_info "info" was allocated on the stack without being initialized, so it contained junk except for the fields we explicitly set later. Initialize "info" at declaration so it starts as all zeros. Fixes: 8aefa9b0d910 ("PCI/DPC: Print AER status in DPC event handling") Signed-off-by: Bjorn Helgaas Tested-by: Krzysztof Wilczyński Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Ilpo Järvinen Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250522232339.1525671-2-helgaas@kernel.org Signed-off-by: Sasha Levin --- drivers/pci/pcie/dpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index df42f15c98295f..3daaf61c79c9fb 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -258,7 +258,7 @@ static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev, void dpc_process_error(struct pci_dev *pdev) { u16 cap = pdev->dpc_cap, status, source, reason, ext_reason; - struct aer_err_info info; + struct aer_err_info info = {}; pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status); pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID, &source); From 4e0abf270c62cb5c6e2bf6ea4c599b56b457354f Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 22 May 2025 18:21:08 -0500 Subject: [PATCH 2566/2924] PCI/DPC: Log Error Source ID only when valid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a0b62cc310239c7f1323fb20bd3789f21bdd8615 ] DPC Error Source ID is only valid when the DPC Trigger Reason indicates that DPC was triggered due to reception of an ERR_NONFATAL or ERR_FATAL Message (PCIe r6.0, sec 7.9.14.5). When DPC was triggered by ERR_NONFATAL (PCI_EXP_DPC_STATUS_TRIGGER_RSN_NFE) or ERR_FATAL (PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE) from a downstream device, log the Error Source ID (decoded into domain/bus/device/function). Don't print the source otherwise, since it's not valid. For DPC trigger due to reception of ERR_NONFATAL or ERR_FATAL, the dmesg logging changes: - pci 0000:00:01.0: DPC: containment event, status:0x000d source:0x0200 - pci 0000:00:01.0: DPC: ERR_FATAL detected + pci 0000:00:01.0: DPC: containment event, status:0x000d, ERR_FATAL received from 0000:02:00.0 and when DPC triggered for other reasons, where DPC Error Source ID is undefined, e.g., unmasked uncorrectable error: - pci 0000:00:01.0: DPC: containment event, status:0x0009 source:0x0200 - pci 0000:00:01.0: DPC: unmasked uncorrectable error detected + pci 0000:00:01.0: DPC: containment event, status:0x0009: unmasked uncorrectable error detected Previously the "containment event" message was at KERN_INFO and the "%s detected" message was at KERN_WARNING. Now the single message is at KERN_WARNING. Fixes: 26e515713342 ("PCI: Add Downstream Port Containment driver") Signed-off-by: Bjorn Helgaas Tested-by: Krzysztof Wilczyński Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Jonathan Cameron Reviewed-by: Ilpo Järvinen Link: https://patch.msgid.link/20250522232339.1525671-3-helgaas@kernel.org Signed-off-by: Sasha Levin --- drivers/pci/pcie/dpc.c | 66 +++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index 3daaf61c79c9fb..9d85f1b3b76112 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -261,37 +261,45 @@ void dpc_process_error(struct pci_dev *pdev) struct aer_err_info info = {}; pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status); - pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID, &source); - - pci_info(pdev, "containment event, status:%#06x source:%#06x\n", - status, source); reason = status & PCI_EXP_DPC_STATUS_TRIGGER_RSN; - ext_reason = status & PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT; - pci_warn(pdev, "%s detected\n", - (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR) ? - "unmasked uncorrectable error" : - (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_NFE) ? - "ERR_NONFATAL" : - (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE) ? - "ERR_FATAL" : - (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO) ? - "RP PIO error" : - (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_SW_TRIGGER) ? - "software trigger" : - "reserved error"); - - /* show RP PIO error detail information */ - if (pdev->dpc_rp_extensions && - reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_IN_EXT && - ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO) - dpc_process_rp_pio_error(pdev); - else if (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR && - dpc_get_aer_uncorrect_severity(pdev, &info) && - aer_get_device_error_info(pdev, &info)) { - aer_print_error(pdev, &info); - pci_aer_clear_nonfatal_status(pdev); - pci_aer_clear_fatal_status(pdev); + + switch (reason) { + case PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR: + pci_warn(pdev, "containment event, status:%#06x: unmasked uncorrectable error detected\n", + status); + if (dpc_get_aer_uncorrect_severity(pdev, &info) && + aer_get_device_error_info(pdev, &info)) { + aer_print_error(pdev, &info); + pci_aer_clear_nonfatal_status(pdev); + pci_aer_clear_fatal_status(pdev); + } + break; + case PCI_EXP_DPC_STATUS_TRIGGER_RSN_NFE: + case PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE: + pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID, + &source); + pci_warn(pdev, "containment event, status:%#06x, %s received from %04x:%02x:%02x.%d\n", + status, + (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE) ? + "ERR_FATAL" : "ERR_NONFATAL", + pci_domain_nr(pdev->bus), PCI_BUS_NUM(source), + PCI_SLOT(source), PCI_FUNC(source)); + break; + case PCI_EXP_DPC_STATUS_TRIGGER_RSN_IN_EXT: + ext_reason = status & PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT; + pci_warn(pdev, "containment event, status:%#06x: %s detected\n", + status, + (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO) ? + "RP PIO error" : + (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_SW_TRIGGER) ? + "software trigger" : + "reserved error"); + /* show RP PIO error detail information */ + if (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO && + pdev->dpc_rp_extensions) + dpc_process_rp_pio_error(pdev); + break; } } From b3ad6d23fec23fbef382ce9ea640c37446593cf5 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 9 Apr 2025 11:53:13 -0700 Subject: [PATCH 2567/2924] PCI/pwrctrl: Cancel outstanding rescan work when unregistering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8b926f237743f020518162c62b93cb7107a2b5eb ] It's possible to trigger use-after-free here by: (a) forcing rescan_work_func() to take a long time and (b) utilizing a pwrctrl driver that may be unloaded for some reason Cancel outstanding work to ensure it is finished before we allow our data structures to be cleaned up. [bhelgaas: tidy commit log] Fixes: 8f62819aaace ("PCI/pwrctl: Rescan bus on a separate thread") Signed-off-by: Brian Norris Signed-off-by: Brian Norris Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam Acked-by: Bartosz Golaszewski Cc: Konrad Dybcio Link: https://patch.msgid.link/20250409115313.1.Ia319526ed4ef06bec3180378c9a008340cec9658@changeid Signed-off-by: Sasha Levin --- drivers/pci/pwrctrl/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/pwrctrl/core.c b/drivers/pci/pwrctrl/core.c index 9cc7e2b7f2b560..6bdbfed584d6d7 100644 --- a/drivers/pci/pwrctrl/core.c +++ b/drivers/pci/pwrctrl/core.c @@ -101,6 +101,8 @@ EXPORT_SYMBOL_GPL(pci_pwrctrl_device_set_ready); */ void pci_pwrctrl_device_unset_ready(struct pci_pwrctrl *pwrctrl) { + cancel_work_sync(&pwrctrl->work); + /* * We don't have to delete the link here. Typically, this function * is only called when the power control device is being detached. If From f8ca072c3e2d9808d1a7c3317b4777a13f001bc2 Mon Sep 17 00:00:00 2001 From: Liu Dalin Date: Fri, 9 May 2025 16:44:16 +0800 Subject: [PATCH 2568/2924] rtc: loongson: Add missing alarm notifications for ACPI RTC events [ Upstream commit 5af9f1fa576874b24627d4c05e3a84672204c200 ] When an application sets and enables an alarm on Loongson RTC devices, the alarm notification fails to propagate to userspace because the ACPI event handler omits calling rtc_update_irq(). As a result, processes waiting via select() or poll() on RTC device files fail to receive alarm notifications. The ACPI interrupt is also triggered multiple times. In loongson_rtc_handler, we need to clear TOY_MATCH0_REG to resolve this issue. Fixes: 09471d8f5b39 ("rtc: loongson: clear TOY_MATCH0_REG in loongson_rtc_isr()") Fixes: 1b733a9ebc3d ("rtc: Add rtc driver for the Loongson family chips") Signed-off-by: Liu Dalin Reviewed-by: Binbin Zhou Link: https://lore.kernel.org/r/20250509084416.7979-1-liudalin@kylinsec.com.cn Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/rtc/rtc-loongson.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/rtc/rtc-loongson.c b/drivers/rtc/rtc-loongson.c index 97e5625c064ceb..2ca7ffd5d7a92a 100644 --- a/drivers/rtc/rtc-loongson.c +++ b/drivers/rtc/rtc-loongson.c @@ -129,6 +129,14 @@ static u32 loongson_rtc_handler(void *id) { struct loongson_rtc_priv *priv = (struct loongson_rtc_priv *)id; + rtc_update_irq(priv->rtcdev, 1, RTC_AF | RTC_IRQF); + + /* + * The TOY_MATCH0_REG should be cleared 0 here, + * otherwise the interrupt cannot be cleared. + */ + regmap_write(priv->regmap, TOY_MATCH0_REG, 0); + spin_lock(&priv->lock); /* Disable RTC alarm wakeup and interrupt */ writel(readl(priv->pm_base + PM1_EN_REG) & ~RTC_EN, From 99155d36a3ddf46cb5b48b974d95d5b49daef224 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 24 Mar 2025 22:03:54 +0100 Subject: [PATCH 2569/2924] rust: pci: fix docs related to missing Markdown code spans [ Upstream commit 1dbaf8b1bafb8557904eb54b98bb323a3061dd2c ] In particular: - Add missing Markdown code spans. - Improve title for `DeviceId`, adding a link to the struct in the C side, rather than referring to `bindings::`. - Convert `TODO` from documentation to a normal comment, and put code in block. This was found using the Clippy `doc_markdown` lint, which we may want to enable. Fixes: 1bd8b6b2c5d3 ("rust: pci: add basic PCI device / driver abstractions") Reviewed-by: Benno Lossin Acked-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250324210359.1199574-8-ojeda@kernel.org [ Prefixed link text with `struct`. - Miguel ] Signed-off-by: Miguel Ojeda Signed-off-by: Sasha Levin --- rust/kernel/pci.rs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs index c97d6d470b2822..bbc453c6d9ea88 100644 --- a/rust/kernel/pci.rs +++ b/rust/kernel/pci.rs @@ -118,7 +118,9 @@ macro_rules! module_pci_driver { }; } -/// Abstraction for bindings::pci_device_id. +/// Abstraction for the PCI device ID structure ([`struct pci_device_id`]). +/// +/// [`struct pci_device_id`]: https://docs.kernel.org/PCI/pci.html#c.pci_device_id #[repr(transparent)] #[derive(Clone, Copy)] pub struct DeviceId(bindings::pci_device_id); @@ -173,7 +175,7 @@ unsafe impl RawDeviceId for DeviceId { } } -/// IdTable type for PCI +/// `IdTable` type for PCI. pub type IdTable = &'static dyn kernel::device_id::IdTable; /// Create a PCI `IdTable` with its alias for modpost. @@ -224,10 +226,11 @@ macro_rules! pci_device_table { /// `Adapter` documentation for an example. pub trait Driver: Send { /// The type holding information about each device id supported by the driver. - /// - /// TODO: Use associated_type_defaults once stabilized: - /// - /// type IdInfo: 'static = (); + // TODO: Use `associated_type_defaults` once stabilized: + // + // ``` + // type IdInfo: 'static = (); + // ``` type IdInfo: 'static; /// The table of device ids supported by the driver. From 55ebe901711323eebde8ccf983b4330bf2b7517b Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 24 Apr 2025 10:34:04 +0200 Subject: [PATCH 2570/2924] PCI: endpoint: Retain fixed-size BAR size as well as aligned size [ Upstream commit 793908d60b8745c386b9f4e29eb702f74ceb0886 ] When allocating space for an endpoint function on a BAR with a fixed size, the size saved in 'struct pci_epf_bar.size' should be the fixed size as expected by pci_epc_set_bar(). However, if pci_epf_alloc_space() increased the allocation size to accommodate iATU alignment requirements, it previously saved the larger aligned size in .size, which broke pci_epc_set_bar(). To solve this, keep the fixed BAR size in .size and save the aligned size in a new .aligned_size for use when deallocating it. Fixes: 2a9a801620ef ("PCI: endpoint: Add support to specify alignment for buffers allocated to BARs") Signed-off-by: Jerome Brunet [mani: commit message fixup] Signed-off-by: Manivannan Sadhasivam [bhelgaas: more specific subject, commit log, wrap comment to match file] Signed-off-by: Bjorn Helgaas Reviewed-by: Niklas Cassel Link: https://patch.msgid.link/20250424-pci-ep-size-alignment-v5-1-2d4ec2af23f5@baylibre.com Signed-off-by: Sasha Levin --- drivers/pci/endpoint/pci-epf-core.c | 22 +++++++++++++++------- include/linux/pci-epf.h | 3 +++ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/pci/endpoint/pci-epf-core.c b/drivers/pci/endpoint/pci-epf-core.c index 394395c7f8decf..577a9e490115c9 100644 --- a/drivers/pci/endpoint/pci-epf-core.c +++ b/drivers/pci/endpoint/pci-epf-core.c @@ -236,12 +236,13 @@ void pci_epf_free_space(struct pci_epf *epf, void *addr, enum pci_barno bar, } dev = epc->dev.parent; - dma_free_coherent(dev, epf_bar[bar].size, addr, + dma_free_coherent(dev, epf_bar[bar].aligned_size, addr, epf_bar[bar].phys_addr); epf_bar[bar].phys_addr = 0; epf_bar[bar].addr = NULL; epf_bar[bar].size = 0; + epf_bar[bar].aligned_size = 0; epf_bar[bar].barno = 0; epf_bar[bar].flags = 0; } @@ -264,7 +265,7 @@ void *pci_epf_alloc_space(struct pci_epf *epf, size_t size, enum pci_barno bar, enum pci_epc_interface_type type) { u64 bar_fixed_size = epc_features->bar[bar].fixed_size; - size_t align = epc_features->align; + size_t aligned_size, align = epc_features->align; struct pci_epf_bar *epf_bar; dma_addr_t phys_addr; struct pci_epc *epc; @@ -285,12 +286,18 @@ void *pci_epf_alloc_space(struct pci_epf *epf, size_t size, enum pci_barno bar, return NULL; } size = bar_fixed_size; + } else { + /* BAR size must be power of two */ + size = roundup_pow_of_two(size); } - if (align) - size = ALIGN(size, align); - else - size = roundup_pow_of_two(size); + /* + * Allocate enough memory to accommodate the iATU alignment + * requirement. In most cases, this will be the same as .size but + * it might be different if, for example, the fixed size of a BAR + * is smaller than align. + */ + aligned_size = align ? ALIGN(size, align) : size; if (type == PRIMARY_INTERFACE) { epc = epf->epc; @@ -301,7 +308,7 @@ void *pci_epf_alloc_space(struct pci_epf *epf, size_t size, enum pci_barno bar, } dev = epc->dev.parent; - space = dma_alloc_coherent(dev, size, &phys_addr, GFP_KERNEL); + space = dma_alloc_coherent(dev, aligned_size, &phys_addr, GFP_KERNEL); if (!space) { dev_err(dev, "failed to allocate mem space\n"); return NULL; @@ -310,6 +317,7 @@ void *pci_epf_alloc_space(struct pci_epf *epf, size_t size, enum pci_barno bar, epf_bar[bar].phys_addr = phys_addr; epf_bar[bar].addr = space; epf_bar[bar].size = size; + epf_bar[bar].aligned_size = aligned_size; epf_bar[bar].barno = bar; if (upper_32_bits(size) || epc_features->bar[bar].only_64bit) epf_bar[bar].flags |= PCI_BASE_ADDRESS_MEM_TYPE_64; diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index 879d19cebd4fc6..749cee0bcf2cc0 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -114,6 +114,8 @@ struct pci_epf_driver { * @phys_addr: physical address that should be mapped to the BAR * @addr: virtual address corresponding to the @phys_addr * @size: the size of the address space present in BAR + * @aligned_size: the size actually allocated to accommodate the iATU alignment + * requirement * @barno: BAR number * @flags: flags that are set for the BAR */ @@ -121,6 +123,7 @@ struct pci_epf_bar { dma_addr_t phys_addr; void *addr; size_t size; + size_t aligned_size; enum pci_barno barno; int flags; }; From 9219079c52821246b2395bd30a4a2ffb9681e9e6 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Tue, 3 Jun 2025 10:22:01 +0200 Subject: [PATCH 2571/2924] sched_ext: idle: Skip cross-node search with !CONFIG_NUMA [ Upstream commit 9960be72a54cf0e4d47abdd4cacd1278835a3bb4 ] In the idle CPU selection logic, attempting cross-node searches adds unnecessary complexity when CONFIG_NUMA is disabled. Since there's no meaningful concept of nodes in this case, simplify the logic by restricting the idle CPU search to the current node only. Fixes: 48849271e6611 ("sched_ext: idle: Per-node idle cpumasks") Signed-off-by: Andrea Righi Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin --- kernel/sched/ext_idle.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c index e67a19a071c114..50b9f3af810d93 100644 --- a/kernel/sched/ext_idle.c +++ b/kernel/sched/ext_idle.c @@ -131,6 +131,7 @@ static s32 pick_idle_cpu_in_node(const struct cpumask *cpus_allowed, int node, u goto retry; } +#ifdef CONFIG_NUMA /* * Tracks nodes that have not yet been visited when searching for an idle * CPU across all available nodes. @@ -179,6 +180,13 @@ static s32 pick_idle_cpu_from_online_nodes(const struct cpumask *cpus_allowed, i return cpu; } +#else +static inline s32 +pick_idle_cpu_from_online_nodes(const struct cpumask *cpus_allowed, int node, u64 flags) +{ + return -EBUSY; +} +#endif /* * Find an idle CPU in the system, starting from @node. From 6bab152e817fd41b9e178fa6b275354795c9703d Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Mon, 7 Apr 2025 11:50:02 +0100 Subject: [PATCH 2572/2924] usb: renesas_usbhs: Reorder clock handling and power management in probe [ Upstream commit ffb34a60ce86656ba12d46e91f1ccc71dd221251 ] Reorder the initialization sequence in `usbhs_probe()` to enable runtime PM before accessing registers, preventing potential crashes due to uninitialized clocks. Currently, in the probe path, registers are accessed before enabling the clocks, leading to a synchronous external abort on the RZ/V2H SoC. The problematic call flow is as follows: usbhs_probe() usbhs_sys_clock_ctrl() usbhs_bset() usbhs_write() iowrite16() <-- Register access before enabling clocks Since `iowrite16()` is performed without ensuring the required clocks are enabled, this can lead to access errors. To fix this, enable PM runtime early in the probe function and ensure clocks are acquired before register access, preventing crashes like the following on RZ/V2H: [13.272640] Internal error: synchronous external abort: 0000000096000010 [#1] PREEMPT SMP [13.280814] Modules linked in: cec renesas_usbhs(+) drm_kms_helper fuse drm backlight ipv6 [13.289088] CPU: 1 UID: 0 PID: 195 Comm: (udev-worker) Not tainted 6.14.0-rc7+ #98 [13.296640] Hardware name: Renesas RZ/V2H EVK Board based on r9a09g057h44 (DT) [13.303834] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [13.310770] pc : usbhs_bset+0x14/0x4c [renesas_usbhs] [13.315831] lr : usbhs_probe+0x2e4/0x5ac [renesas_usbhs] [13.321138] sp : ffff8000827e3850 [13.324438] x29: ffff8000827e3860 x28: 0000000000000000 x27: ffff8000827e3ca0 [13.331554] x26: ffff8000827e3ba0 x25: ffff800081729668 x24: 0000000000000025 [13.338670] x23: ffff0000c0f08000 x22: 0000000000000000 x21: ffff0000c0f08010 [13.345783] x20: 0000000000000000 x19: ffff0000c3b52080 x18: 00000000ffffffff [13.352895] x17: 0000000000000000 x16: 0000000000000000 x15: ffff8000827e36ce [13.360009] x14: 00000000000003d7 x13: 00000000000003d7 x12: 0000000000000000 [13.367122] x11: 0000000000000000 x10: 0000000000000aa0 x9 : ffff8000827e3750 [13.374235] x8 : ffff0000c1850b00 x7 : 0000000003826060 x6 : 000000000000001c [13.381347] x5 : 000000030d5fcc00 x4 : ffff8000825c0000 x3 : 0000000000000000 [13.388459] x2 : 0000000000000400 x1 : 0000000000000000 x0 : ffff0000c3b52080 [13.395574] Call trace: [13.398013] usbhs_bset+0x14/0x4c [renesas_usbhs] (P) [13.403076] platform_probe+0x68/0xdc [13.406738] really_probe+0xbc/0x2c0 [13.410306] __driver_probe_device+0x78/0x120 [13.414653] driver_probe_device+0x3c/0x154 [13.418825] __driver_attach+0x90/0x1a0 [13.422647] bus_for_each_dev+0x7c/0xe0 [13.426470] driver_attach+0x24/0x30 [13.430032] bus_add_driver+0xe4/0x208 [13.433766] driver_register+0x68/0x130 [13.437587] __platform_driver_register+0x24/0x30 [13.442273] renesas_usbhs_driver_init+0x20/0x1000 [renesas_usbhs] [13.448450] do_one_initcall+0x60/0x1d4 [13.452276] do_init_module+0x54/0x1f8 [13.456014] load_module+0x1754/0x1c98 [13.459750] init_module_from_file+0x88/0xcc [13.464004] __arm64_sys_finit_module+0x1c4/0x328 [13.468689] invoke_syscall+0x48/0x104 [13.472426] el0_svc_common.constprop.0+0xc0/0xe0 [13.477113] do_el0_svc+0x1c/0x28 [13.480415] el0_svc+0x30/0xcc [13.483460] el0t_64_sync_handler+0x10c/0x138 [13.487800] el0t_64_sync+0x198/0x19c [13.491453] Code: 2a0103e1 12003c42 12003c63 8b010084 (79400084) [13.497522] ---[ end trace 0000000000000000 ]--- Fixes: f1407d5c66240 ("usb: renesas_usbhs: Add Renesas USBHS common code") Signed-off-by: Lad Prabhakar Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/20250407105002.107181-4-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/renesas_usbhs/common.c | 50 +++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 12 deletions(-) diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c index 4b35ef216125c7..16692e72b73650 100644 --- a/drivers/usb/renesas_usbhs/common.c +++ b/drivers/usb/renesas_usbhs/common.c @@ -685,10 +685,29 @@ static int usbhs_probe(struct platform_device *pdev) INIT_DELAYED_WORK(&priv->notify_hotplug_work, usbhsc_notify_hotplug); spin_lock_init(usbhs_priv_to_lock(priv)); + /* + * Acquire clocks and enable power management (PM) early in the + * probe process, as the driver accesses registers during + * initialization. Ensure the device is active before proceeding. + */ + pm_runtime_enable(dev); + + ret = usbhsc_clk_get(dev, priv); + if (ret) + goto probe_pm_disable; + + ret = pm_runtime_resume_and_get(dev); + if (ret) + goto probe_clk_put; + + ret = usbhsc_clk_prepare_enable(priv); + if (ret) + goto probe_pm_put; + /* call pipe and module init */ ret = usbhs_pipe_probe(priv); if (ret < 0) - return ret; + goto probe_clk_dis_unprepare; ret = usbhs_fifo_probe(priv); if (ret < 0) @@ -705,10 +724,6 @@ static int usbhs_probe(struct platform_device *pdev) if (ret) goto probe_fail_rst; - ret = usbhsc_clk_get(dev, priv); - if (ret) - goto probe_fail_clks; - /* * deviece reset here because * USB device might be used in boot loader. @@ -721,7 +736,7 @@ static int usbhs_probe(struct platform_device *pdev) if (ret) { dev_warn(dev, "USB function not selected (GPIO)\n"); ret = -ENOTSUPP; - goto probe_end_mod_exit; + goto probe_assert_rest; } } @@ -735,14 +750,19 @@ static int usbhs_probe(struct platform_device *pdev) ret = usbhs_platform_call(priv, hardware_init, pdev); if (ret < 0) { dev_err(dev, "platform init failed.\n"); - goto probe_end_mod_exit; + goto probe_assert_rest; } /* reset phy for connection */ usbhs_platform_call(priv, phy_reset, pdev); - /* power control */ - pm_runtime_enable(dev); + /* + * Disable the clocks that were enabled earlier in the probe path, + * and let the driver handle the clocks beyond this point. + */ + usbhsc_clk_disable_unprepare(priv); + pm_runtime_put(dev); + if (!usbhs_get_dparam(priv, runtime_pwctrl)) { usbhsc_power_ctrl(priv, 1); usbhs_mod_autonomy_mode(priv); @@ -759,9 +779,7 @@ static int usbhs_probe(struct platform_device *pdev) return ret; -probe_end_mod_exit: - usbhsc_clk_put(priv); -probe_fail_clks: +probe_assert_rest: reset_control_assert(priv->rsts); probe_fail_rst: usbhs_mod_remove(priv); @@ -769,6 +787,14 @@ static int usbhs_probe(struct platform_device *pdev) usbhs_fifo_remove(priv); probe_end_pipe_exit: usbhs_pipe_remove(priv); +probe_clk_dis_unprepare: + usbhsc_clk_disable_unprepare(priv); +probe_pm_put: + pm_runtime_put(dev); +probe_clk_put: + usbhsc_clk_put(priv); +probe_pm_disable: + pm_runtime_disable(dev); dev_info(dev, "probe failed (%d)\n", ret); From e1b144aebe6fb898d96ced8c990d7aa38fda4a7a Mon Sep 17 00:00:00 2001 From: Henry Martin Date: Thu, 3 Apr 2025 15:03:39 +0800 Subject: [PATCH 2573/2924] serial: Fix potential null-ptr-deref in mlb_usio_probe() [ Upstream commit 86bcae88c9209e334b2f8c252f4cc66beb261886 ] devm_ioremap() can return NULL on error. Currently, mlb_usio_probe() does not check for this case, which could result in a NULL pointer dereference. Add NULL check after devm_ioremap() to prevent this issue. Fixes: ba44dc043004 ("serial: Add Milbeaut serial control") Signed-off-by: Henry Martin Link: https://lore.kernel.org/r/20250403070339.64990-1-bsdhenrymartin@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/milbeaut_usio.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/milbeaut_usio.c b/drivers/tty/serial/milbeaut_usio.c index 059bea18dbab56..4e47dca2c4ed9c 100644 --- a/drivers/tty/serial/milbeaut_usio.c +++ b/drivers/tty/serial/milbeaut_usio.c @@ -523,7 +523,10 @@ static int mlb_usio_probe(struct platform_device *pdev) } port->membase = devm_ioremap(&pdev->dev, res->start, resource_size(res)); - + if (!port->membase) { + ret = -ENOMEM; + goto failed; + } ret = platform_get_irq_byname(pdev, "rx"); mlb_usio_irq[index][RX] = ret; From d2ef2fac35b6140fcdff0d26b6194780240538e2 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 11 Apr 2025 10:14:44 -0500 Subject: [PATCH 2574/2924] thunderbolt: Fix a logic error in wake on connect [ Upstream commit 1a760d10ded372d113a0410c42be246315bbc2ff ] commit a5cfc9d65879c ("thunderbolt: Add wake on connect/disconnect on USB4 ports") introduced a sysfs file to control wake up policy for a given USB4 port that defaulted to disabled. However when testing commit 4bfeea6ec1c02 ("thunderbolt: Use wake on connect and disconnect over suspend") I found that it was working even without making changes to the power/wakeup file (which defaults to disabled). This is because of a logic error doing a bitwise or of the wake-on-connect flag with device_may_wakeup() which should have been a logical AND. Adjust the logic so that policy is only applied when wakeup is actually enabled. Fixes: a5cfc9d65879c ("thunderbolt: Add wake on connect/disconnect on USB4 ports") Signed-off-by: Mario Limonciello Signed-off-by: Mika Westerberg Signed-off-by: Sasha Levin --- drivers/thunderbolt/usb4.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c index e51d01671d8e7c..3e96f1afd4268e 100644 --- a/drivers/thunderbolt/usb4.c +++ b/drivers/thunderbolt/usb4.c @@ -440,10 +440,10 @@ int usb4_switch_set_wake(struct tb_switch *sw, unsigned int flags) bool configured = val & PORT_CS_19_PC; usb4 = port->usb4; - if (((flags & TB_WAKE_ON_CONNECT) | + if (((flags & TB_WAKE_ON_CONNECT) && device_may_wakeup(&usb4->dev)) && !configured) val |= PORT_CS_19_WOC; - if (((flags & TB_WAKE_ON_DISCONNECT) | + if (((flags & TB_WAKE_ON_DISCONNECT) && device_may_wakeup(&usb4->dev)) && configured) val |= PORT_CS_19_WOD; if ((flags & TB_WAKE_ON_USB4) && configured) From 900bf5135aa8e0f9b4385fe80c363902b2907777 Mon Sep 17 00:00:00 2001 From: Sam Winchenbach Date: Fri, 28 Mar 2025 13:48:27 -0400 Subject: [PATCH 2575/2924] iio: filter: admv8818: fix band 4, state 15 [ Upstream commit ef0ce24f590ac075d5eda11f2d6434b303333ed6 ] Corrects the upper range of LPF Band 4 from 18.5 GHz to 18.85 GHz per the ADMV8818 datasheet Fixes: f34fe888ad05 ("iio:filter:admv8818: add support for ADMV8818") Signed-off-by: Sam Winchenbach Link: https://patch.msgid.link/20250328174831.227202-3-sam.winchenbach@framepointer.org Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/filter/admv8818.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/filter/admv8818.c b/drivers/iio/filter/admv8818.c index d85b7d3de86604..3d8740caa14559 100644 --- a/drivers/iio/filter/admv8818.c +++ b/drivers/iio/filter/admv8818.c @@ -103,7 +103,7 @@ static const unsigned long long freq_range_lpf[4][2] = { {2050000000ULL, 3850000000ULL}, {3350000000ULL, 7250000000ULL}, {7000000000, 13000000000}, - {12550000000, 18500000000} + {12550000000, 18850000000} }; static const struct regmap_config admv8818_regmap_config = { From 35e5c0cc4dfdcca31727ea264754115a1c6060fd Mon Sep 17 00:00:00 2001 From: Sam Winchenbach Date: Fri, 28 Mar 2025 13:48:28 -0400 Subject: [PATCH 2576/2924] iio: filter: admv8818: fix integer overflow [ Upstream commit fb6009a28d77edec4eb548b5875dae8c79b88467 ] HZ_PER_MHZ is only unsigned long. This math overflows, leading to incorrect results. Fixes: f34fe888ad05 ("iio:filter:admv8818: add support for ADMV8818") Signed-off-by: Sam Winchenbach Link: https://patch.msgid.link/20250328174831.227202-4-sam.winchenbach@framepointer.org Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/filter/admv8818.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/filter/admv8818.c b/drivers/iio/filter/admv8818.c index 3d8740caa14559..cd3aff9a2f7bf5 100644 --- a/drivers/iio/filter/admv8818.c +++ b/drivers/iio/filter/admv8818.c @@ -154,7 +154,7 @@ static int __admv8818_hpf_select(struct admv8818_state *st, u64 freq) } /* Close HPF frequency gap between 12 and 12.5 GHz */ - if (freq >= 12000 * HZ_PER_MHZ && freq <= 12500 * HZ_PER_MHZ) { + if (freq >= 12000ULL * HZ_PER_MHZ && freq < 12500ULL * HZ_PER_MHZ) { hpf_band = 3; hpf_step = 15; } From 1e1cfb92e546ec8adb2c196876c8d8d2621b5db0 Mon Sep 17 00:00:00 2001 From: Sam Winchenbach Date: Fri, 28 Mar 2025 13:48:29 -0400 Subject: [PATCH 2577/2924] iio: filter: admv8818: fix range calculation [ Upstream commit d542db7095d322bfcdc8e306db6f8c48358c9619 ] Search for the minimum error while ensuring that the LPF corner frequency is greater than the target, and the HPF corner frequency is lower than the target This fixes issues where the range calculations were suboptimal. Add two new DTS properties to set the margin between the input frequency and the calculated corner frequency Below is a generated table of the differences between the old algorithm and the new. This is a sweep from 0 to 20 GHz in 10 MHz steps. === HPF === freq = 1750 MHz, 3db: bypass => 1750 MHz freq = 3400 MHz, 3db: 3310 => 3400 MHz freq = 3410 MHz, 3db: 3310 => 3400 MHz freq = 3420 MHz, 3db: 3310 => 3400 MHz freq = 3660 MHz, 3db: 3550 => 3656 MHz freq = 6600 MHz, 3db: 6479 => 6600 MHz freq = 6610 MHz, 3db: 6479 => 6600 MHz freq = 6620 MHz, 3db: 6479 => 6600 MHz freq = 6630 MHz, 3db: 6479 => 6600 MHz freq = 6640 MHz, 3db: 6479 => 6600 MHz freq = 6650 MHz, 3db: 6479 => 6600 MHz freq = 6660 MHz, 3db: 6479 => 6600 MHz freq = 6670 MHz, 3db: 6479 => 6600 MHz freq = 6680 MHz, 3db: 6479 => 6600 MHz freq = 6690 MHz, 3db: 6479 => 6600 MHz freq = 6700 MHz, 3db: 6479 => 6600 MHz freq = 6710 MHz, 3db: 6479 => 6600 MHz freq = 6720 MHz, 3db: 6479 => 6600 MHz freq = 6730 MHz, 3db: 6479 => 6600 MHz freq = 6960 MHz, 3db: 6736 => 6960 MHz freq = 6970 MHz, 3db: 6736 => 6960 MHz freq = 6980 MHz, 3db: 6736 => 6960 MHz freq = 6990 MHz, 3db: 6736 => 6960 MHz freq = 7320 MHz, 3db: 7249 => 7320 MHz freq = 7330 MHz, 3db: 7249 => 7320 MHz freq = 7340 MHz, 3db: 7249 => 7320 MHz freq = 7350 MHz, 3db: 7249 => 7320 MHz freq = 7360 MHz, 3db: 7249 => 7320 MHz freq = 7370 MHz, 3db: 7249 => 7320 MHz freq = 7380 MHz, 3db: 7249 => 7320 MHz freq = 7390 MHz, 3db: 7249 => 7320 MHz freq = 7400 MHz, 3db: 7249 => 7320 MHz freq = 7410 MHz, 3db: 7249 => 7320 MHz freq = 7420 MHz, 3db: 7249 => 7320 MHz freq = 7430 MHz, 3db: 7249 => 7320 MHz freq = 7440 MHz, 3db: 7249 => 7320 MHz freq = 7450 MHz, 3db: 7249 => 7320 MHz freq = 7460 MHz, 3db: 7249 => 7320 MHz freq = 7470 MHz, 3db: 7249 => 7320 MHz freq = 7480 MHz, 3db: 7249 => 7320 MHz freq = 7490 MHz, 3db: 7249 => 7320 MHz freq = 7500 MHz, 3db: 7249 => 7320 MHz freq = 12500 MHz, 3db: 12000 => 12500 MHz === LPF === freq = 2050 MHz, 3db: bypass => 2050 MHz freq = 2170 MHz, 3db: 2290 => 2170 MHz freq = 2290 MHz, 3db: 2410 => 2290 MHz freq = 2410 MHz, 3db: 2530 => 2410 MHz freq = 2530 MHz, 3db: 2650 => 2530 MHz freq = 2650 MHz, 3db: 2770 => 2650 MHz freq = 2770 MHz, 3db: 2890 => 2770 MHz freq = 2890 MHz, 3db: 3010 => 2890 MHz freq = 3010 MHz, 3db: 3130 => 3010 MHz freq = 3130 MHz, 3db: 3250 => 3130 MHz freq = 3250 MHz, 3db: 3370 => 3250 MHz freq = 3260 MHz, 3db: 3370 => 3350 MHz freq = 3270 MHz, 3db: 3370 => 3350 MHz freq = 3280 MHz, 3db: 3370 => 3350 MHz freq = 3290 MHz, 3db: 3370 => 3350 MHz freq = 3300 MHz, 3db: 3370 => 3350 MHz freq = 3310 MHz, 3db: 3370 => 3350 MHz freq = 3320 MHz, 3db: 3370 => 3350 MHz freq = 3330 MHz, 3db: 3370 => 3350 MHz freq = 3340 MHz, 3db: 3370 => 3350 MHz freq = 3350 MHz, 3db: 3370 => 3350 MHz freq = 3370 MHz, 3db: 3490 => 3370 MHz freq = 3490 MHz, 3db: 3610 => 3490 MHz freq = 3610 MHz, 3db: 3730 => 3610 MHz freq = 3730 MHz, 3db: 3850 => 3730 MHz freq = 3850 MHz, 3db: 3870 => 3850 MHz freq = 3870 MHz, 3db: 4130 => 3870 MHz freq = 4130 MHz, 3db: 4390 => 4130 MHz freq = 4390 MHz, 3db: 4650 => 4390 MHz freq = 4650 MHz, 3db: 4910 => 4650 MHz freq = 4910 MHz, 3db: 5170 => 4910 MHz freq = 5170 MHz, 3db: 5430 => 5170 MHz freq = 5430 MHz, 3db: 5690 => 5430 MHz freq = 5690 MHz, 3db: 5950 => 5690 MHz freq = 5950 MHz, 3db: 6210 => 5950 MHz freq = 6210 MHz, 3db: 6470 => 6210 MHz freq = 6470 MHz, 3db: 6730 => 6470 MHz freq = 6730 MHz, 3db: 6990 => 6730 MHz freq = 6990 MHz, 3db: 7250 => 6990 MHz freq = 7000 MHz, 3db: 7250 => 7000 MHz freq = 7250 MHz, 3db: 7400 => 7250 MHz freq = 7400 MHz, 3db: 7800 => 7400 MHz freq = 7800 MHz, 3db: 8200 => 7800 MHz freq = 8200 MHz, 3db: 8600 => 8200 MHz freq = 8600 MHz, 3db: 9000 => 8600 MHz freq = 9000 MHz, 3db: 9400 => 9000 MHz freq = 9400 MHz, 3db: 9800 => 9400 MHz freq = 9800 MHz, 3db: 10200 => 9800 MHz freq = 10200 MHz, 3db: 10600 => 10200 MHz freq = 10600 MHz, 3db: 11000 => 10600 MHz freq = 11000 MHz, 3db: 11400 => 11000 MHz freq = 11400 MHz, 3db: 11800 => 11400 MHz freq = 11800 MHz, 3db: 12200 => 11800 MHz freq = 12200 MHz, 3db: 12600 => 12200 MHz freq = 12210 MHz, 3db: 12600 => 12550 MHz freq = 12220 MHz, 3db: 12600 => 12550 MHz freq = 12230 MHz, 3db: 12600 => 12550 MHz freq = 12240 MHz, 3db: 12600 => 12550 MHz freq = 12250 MHz, 3db: 12600 => 12550 MHz freq = 12260 MHz, 3db: 12600 => 12550 MHz freq = 12270 MHz, 3db: 12600 => 12550 MHz freq = 12280 MHz, 3db: 12600 => 12550 MHz freq = 12290 MHz, 3db: 12600 => 12550 MHz freq = 12300 MHz, 3db: 12600 => 12550 MHz freq = 12310 MHz, 3db: 12600 => 12550 MHz freq = 12320 MHz, 3db: 12600 => 12550 MHz freq = 12330 MHz, 3db: 12600 => 12550 MHz freq = 12340 MHz, 3db: 12600 => 12550 MHz freq = 12350 MHz, 3db: 12600 => 12550 MHz freq = 12360 MHz, 3db: 12600 => 12550 MHz freq = 12370 MHz, 3db: 12600 => 12550 MHz freq = 12380 MHz, 3db: 12600 => 12550 MHz freq = 12390 MHz, 3db: 12600 => 12550 MHz freq = 12400 MHz, 3db: 12600 => 12550 MHz freq = 12410 MHz, 3db: 12600 => 12550 MHz freq = 12420 MHz, 3db: 12600 => 12550 MHz freq = 12430 MHz, 3db: 12600 => 12550 MHz freq = 12440 MHz, 3db: 12600 => 12550 MHz freq = 12450 MHz, 3db: 12600 => 12550 MHz freq = 12460 MHz, 3db: 12600 => 12550 MHz freq = 12470 MHz, 3db: 12600 => 12550 MHz freq = 12480 MHz, 3db: 12600 => 12550 MHz freq = 12490 MHz, 3db: 12600 => 12550 MHz freq = 12500 MHz, 3db: 12600 => 12550 MHz freq = 12510 MHz, 3db: 12600 => 12550 MHz freq = 12520 MHz, 3db: 12600 => 12550 MHz freq = 12530 MHz, 3db: 12600 => 12550 MHz freq = 12540 MHz, 3db: 12600 => 12550 MHz freq = 12550 MHz, 3db: 12600 => 12550 MHz freq = 12600 MHz, 3db: 13000 => 12600 MHz freq = 12610 MHz, 3db: 13000 => 12970 MHz freq = 12620 MHz, 3db: 13000 => 12970 MHz freq = 12630 MHz, 3db: 13000 => 12970 MHz freq = 12640 MHz, 3db: 13000 => 12970 MHz freq = 12650 MHz, 3db: 13000 => 12970 MHz freq = 12660 MHz, 3db: 13000 => 12970 MHz freq = 12670 MHz, 3db: 13000 => 12970 MHz freq = 12680 MHz, 3db: 13000 => 12970 MHz freq = 12690 MHz, 3db: 13000 => 12970 MHz freq = 12700 MHz, 3db: 13000 => 12970 MHz freq = 12710 MHz, 3db: 13000 => 12970 MHz freq = 12720 MHz, 3db: 13000 => 12970 MHz freq = 12730 MHz, 3db: 13000 => 12970 MHz freq = 12740 MHz, 3db: 13000 => 12970 MHz freq = 12750 MHz, 3db: 13000 => 12970 MHz freq = 12760 MHz, 3db: 13000 => 12970 MHz freq = 12770 MHz, 3db: 13000 => 12970 MHz freq = 12780 MHz, 3db: 13000 => 12970 MHz freq = 12790 MHz, 3db: 13000 => 12970 MHz freq = 12800 MHz, 3db: 13000 => 12970 MHz freq = 12810 MHz, 3db: 13000 => 12970 MHz freq = 12820 MHz, 3db: 13000 => 12970 MHz freq = 12830 MHz, 3db: 13000 => 12970 MHz freq = 12840 MHz, 3db: 13000 => 12970 MHz freq = 12850 MHz, 3db: 13000 => 12970 MHz freq = 12860 MHz, 3db: 13000 => 12970 MHz freq = 12870 MHz, 3db: 13000 => 12970 MHz freq = 12880 MHz, 3db: 13000 => 12970 MHz freq = 12890 MHz, 3db: 13000 => 12970 MHz freq = 12900 MHz, 3db: 13000 => 12970 MHz freq = 12910 MHz, 3db: 13000 => 12970 MHz freq = 12920 MHz, 3db: 13000 => 12970 MHz freq = 12930 MHz, 3db: 13000 => 12970 MHz freq = 12940 MHz, 3db: 13000 => 12970 MHz freq = 12950 MHz, 3db: 13000 => 12970 MHz freq = 12960 MHz, 3db: 13000 => 12970 MHz freq = 12970 MHz, 3db: 13000 => 12970 MHz freq = 13000 MHz, 3db: 13390 => 13000 MHz freq = 13390 MHz, 3db: 13810 => 13390 MHz freq = 13810 MHz, 3db: 14230 => 13810 MHz freq = 14230 MHz, 3db: 14650 => 14230 MHz freq = 14650 MHz, 3db: 15070 => 14650 MHz freq = 15070 MHz, 3db: 15490 => 15070 MHz freq = 15490 MHz, 3db: 15910 => 15490 MHz freq = 15910 MHz, 3db: 16330 => 15910 MHz freq = 16330 MHz, 3db: 16750 => 16330 MHz freq = 16750 MHz, 3db: 17170 => 16750 MHz freq = 17170 MHz, 3db: 17590 => 17170 MHz freq = 17590 MHz, 3db: 18010 => 17590 MHz freq = 18010 MHz, 3db: 18430 => 18010 MHz freq = 18430 MHz, 3db: 18850 => 18430 MHz freq = 18850 MHz, 3db: bypass => 18850 MHz Fixes: f34fe888ad05 ("iio:filter:admv8818: add support for ADMV8818") Signed-off-by: Sam Winchenbach Link: https://patch.msgid.link/20250328174831.227202-5-sam.winchenbach@framepointer.org Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/filter/admv8818.c | 205 +++++++++++++++++++++++++--------- 1 file changed, 152 insertions(+), 53 deletions(-) diff --git a/drivers/iio/filter/admv8818.c b/drivers/iio/filter/admv8818.c index cd3aff9a2f7bf5..380e119b3cf549 100644 --- a/drivers/iio/filter/admv8818.c +++ b/drivers/iio/filter/admv8818.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -70,6 +71,16 @@ #define ADMV8818_HPF_WR0_MSK GENMASK(7, 4) #define ADMV8818_LPF_WR0_MSK GENMASK(3, 0) +#define ADMV8818_BAND_BYPASS 0 +#define ADMV8818_BAND_MIN 1 +#define ADMV8818_BAND_MAX 4 +#define ADMV8818_BAND_CORNER_LOW 0 +#define ADMV8818_BAND_CORNER_HIGH 1 + +#define ADMV8818_STATE_MIN 0 +#define ADMV8818_STATE_MAX 15 +#define ADMV8818_NUM_STATES 16 + enum { ADMV8818_BW_FREQ, ADMV8818_CENTER_FREQ @@ -90,16 +101,20 @@ struct admv8818_state { struct mutex lock; unsigned int filter_mode; u64 cf_hz; + u64 lpf_margin_hz; + u64 hpf_margin_hz; }; -static const unsigned long long freq_range_hpf[4][2] = { +static const unsigned long long freq_range_hpf[5][2] = { + {0ULL, 0ULL}, /* bypass */ {1750000000ULL, 3550000000ULL}, {3400000000ULL, 7250000000ULL}, {6600000000, 12000000000}, {12500000000, 19900000000} }; -static const unsigned long long freq_range_lpf[4][2] = { +static const unsigned long long freq_range_lpf[5][2] = { + {U64_MAX, U64_MAX}, /* bypass */ {2050000000ULL, 3850000000ULL}, {3350000000ULL, 7250000000ULL}, {7000000000, 13000000000}, @@ -121,44 +136,59 @@ static const char * const admv8818_modes[] = { static int __admv8818_hpf_select(struct admv8818_state *st, u64 freq) { - unsigned int hpf_step = 0, hpf_band = 0, i, j; - u64 freq_step; - int ret; + int band, state, ret; + unsigned int hpf_state = ADMV8818_STATE_MIN, hpf_band = ADMV8818_BAND_BYPASS; + u64 freq_error, min_freq_error, freq_corner, freq_step; - if (freq < freq_range_hpf[0][0]) + if (freq < freq_range_hpf[ADMV8818_BAND_MIN][ADMV8818_BAND_CORNER_LOW]) goto hpf_write; - if (freq > freq_range_hpf[3][1]) { - hpf_step = 15; - hpf_band = 4; - + if (freq >= freq_range_hpf[ADMV8818_BAND_MAX][ADMV8818_BAND_CORNER_HIGH]) { + hpf_state = ADMV8818_STATE_MAX; + hpf_band = ADMV8818_BAND_MAX; goto hpf_write; } - for (i = 0; i < 4; i++) { - freq_step = div_u64((freq_range_hpf[i][1] - - freq_range_hpf[i][0]), 15); + /* Close HPF frequency gap between 12 and 12.5 GHz */ + if (freq >= 12000ULL * HZ_PER_MHZ && freq < 12500ULL * HZ_PER_MHZ) { + hpf_state = ADMV8818_STATE_MAX; + hpf_band = 3; + goto hpf_write; + } - if (freq > freq_range_hpf[i][0] && - (freq < freq_range_hpf[i][1] + freq_step)) { - hpf_band = i + 1; + min_freq_error = U64_MAX; + for (band = ADMV8818_BAND_MIN; band <= ADMV8818_BAND_MAX; band++) { + /* + * This (and therefore all other ranges) have a corner + * frequency higher than the target frequency. + */ + if (freq_range_hpf[band][ADMV8818_BAND_CORNER_LOW] > freq) + break; - for (j = 1; j <= 16; j++) { - if (freq < (freq_range_hpf[i][0] + (freq_step * j))) { - hpf_step = j - 1; - break; - } + freq_step = freq_range_hpf[band][ADMV8818_BAND_CORNER_HIGH] - + freq_range_hpf[band][ADMV8818_BAND_CORNER_LOW]; + freq_step = div_u64(freq_step, ADMV8818_NUM_STATES - 1); + + for (state = ADMV8818_STATE_MIN; state <= ADMV8818_STATE_MAX; state++) { + freq_corner = freq_range_hpf[band][ADMV8818_BAND_CORNER_LOW] + + freq_step * state; + + /* + * This (and therefore all other states) have a corner + * frequency higher than the target frequency. + */ + if (freq_corner > freq) + break; + + freq_error = freq - freq_corner; + if (freq_error < min_freq_error) { + min_freq_error = freq_error; + hpf_state = state; + hpf_band = band; } - break; } } - /* Close HPF frequency gap between 12 and 12.5 GHz */ - if (freq >= 12000ULL * HZ_PER_MHZ && freq < 12500ULL * HZ_PER_MHZ) { - hpf_band = 3; - hpf_step = 15; - } - hpf_write: ret = regmap_update_bits(st->regmap, ADMV8818_REG_WR0_SW, ADMV8818_SW_IN_SET_WR0_MSK | @@ -170,7 +200,7 @@ static int __admv8818_hpf_select(struct admv8818_state *st, u64 freq) return regmap_update_bits(st->regmap, ADMV8818_REG_WR0_FILTER, ADMV8818_HPF_WR0_MSK, - FIELD_PREP(ADMV8818_HPF_WR0_MSK, hpf_step)); + FIELD_PREP(ADMV8818_HPF_WR0_MSK, hpf_state)); } static int admv8818_hpf_select(struct admv8818_state *st, u64 freq) @@ -186,31 +216,52 @@ static int admv8818_hpf_select(struct admv8818_state *st, u64 freq) static int __admv8818_lpf_select(struct admv8818_state *st, u64 freq) { - unsigned int lpf_step = 0, lpf_band = 0, i, j; - u64 freq_step; - int ret; + int band, state, ret; + unsigned int lpf_state = ADMV8818_STATE_MIN, lpf_band = ADMV8818_BAND_BYPASS; + u64 freq_error, min_freq_error, freq_corner, freq_step; - if (freq > freq_range_lpf[3][1]) + if (freq > freq_range_lpf[ADMV8818_BAND_MAX][ADMV8818_BAND_CORNER_HIGH]) goto lpf_write; - if (freq < freq_range_lpf[0][0]) { - lpf_band = 1; - + if (freq < freq_range_lpf[ADMV8818_BAND_MIN][ADMV8818_BAND_CORNER_LOW]) { + lpf_state = ADMV8818_STATE_MIN; + lpf_band = ADMV8818_BAND_MIN; goto lpf_write; } - for (i = 0; i < 4; i++) { - if (freq > freq_range_lpf[i][0] && freq < freq_range_lpf[i][1]) { - lpf_band = i + 1; - freq_step = div_u64((freq_range_lpf[i][1] - freq_range_lpf[i][0]), 15); + min_freq_error = U64_MAX; + for (band = ADMV8818_BAND_MAX; band >= ADMV8818_BAND_MIN; --band) { + /* + * At this point the highest corner frequency of + * all remaining ranges is below the target. + * LPF corner should be >= the target. + */ + if (freq > freq_range_lpf[band][ADMV8818_BAND_CORNER_HIGH]) + break; + + freq_step = freq_range_lpf[band][ADMV8818_BAND_CORNER_HIGH] - + freq_range_lpf[band][ADMV8818_BAND_CORNER_LOW]; + freq_step = div_u64(freq_step, ADMV8818_NUM_STATES - 1); + + for (state = ADMV8818_STATE_MAX; state >= ADMV8818_STATE_MIN; --state) { - for (j = 0; j <= 15; j++) { - if (freq < (freq_range_lpf[i][0] + (freq_step * j))) { - lpf_step = j; - break; - } + freq_corner = freq_range_lpf[band][ADMV8818_BAND_CORNER_LOW] + + state * freq_step; + + /* + * At this point all other states in range will + * place the corner frequency below the target + * LPF corner should >= the target. + */ + if (freq > freq_corner) + break; + + freq_error = freq_corner - freq; + if (freq_error < min_freq_error) { + min_freq_error = freq_error; + lpf_state = state; + lpf_band = band; } - break; } } @@ -225,7 +276,7 @@ static int __admv8818_lpf_select(struct admv8818_state *st, u64 freq) return regmap_update_bits(st->regmap, ADMV8818_REG_WR0_FILTER, ADMV8818_LPF_WR0_MSK, - FIELD_PREP(ADMV8818_LPF_WR0_MSK, lpf_step)); + FIELD_PREP(ADMV8818_LPF_WR0_MSK, lpf_state)); } static int admv8818_lpf_select(struct admv8818_state *st, u64 freq) @@ -242,16 +293,28 @@ static int admv8818_lpf_select(struct admv8818_state *st, u64 freq) static int admv8818_rfin_band_select(struct admv8818_state *st) { int ret; + u64 hpf_corner_target, lpf_corner_target; st->cf_hz = clk_get_rate(st->clkin); + /* Check for underflow */ + if (st->cf_hz > st->hpf_margin_hz) + hpf_corner_target = st->cf_hz - st->hpf_margin_hz; + else + hpf_corner_target = 0; + + /* Check for overflow */ + lpf_corner_target = st->cf_hz + st->lpf_margin_hz; + if (lpf_corner_target < st->cf_hz) + lpf_corner_target = U64_MAX; + mutex_lock(&st->lock); - ret = __admv8818_hpf_select(st, st->cf_hz); + ret = __admv8818_hpf_select(st, hpf_corner_target); if (ret) goto exit; - ret = __admv8818_lpf_select(st, st->cf_hz); + ret = __admv8818_lpf_select(st, lpf_corner_target); exit: mutex_unlock(&st->lock); return ret; @@ -278,8 +341,11 @@ static int __admv8818_read_hpf_freq(struct admv8818_state *st, u64 *hpf_freq) hpf_state = FIELD_GET(ADMV8818_HPF_WR0_MSK, data); - *hpf_freq = div_u64(freq_range_hpf[hpf_band - 1][1] - freq_range_hpf[hpf_band - 1][0], 15); - *hpf_freq = freq_range_hpf[hpf_band - 1][0] + (*hpf_freq * hpf_state); + *hpf_freq = freq_range_hpf[hpf_band][ADMV8818_BAND_CORNER_HIGH] - + freq_range_hpf[hpf_band][ADMV8818_BAND_CORNER_LOW]; + *hpf_freq = div_u64(*hpf_freq, ADMV8818_NUM_STATES - 1); + *hpf_freq = freq_range_hpf[hpf_band][ADMV8818_BAND_CORNER_LOW] + + (*hpf_freq * hpf_state); return ret; } @@ -316,8 +382,11 @@ static int __admv8818_read_lpf_freq(struct admv8818_state *st, u64 *lpf_freq) lpf_state = FIELD_GET(ADMV8818_LPF_WR0_MSK, data); - *lpf_freq = div_u64(freq_range_lpf[lpf_band - 1][1] - freq_range_lpf[lpf_band - 1][0], 15); - *lpf_freq = freq_range_lpf[lpf_band - 1][0] + (*lpf_freq * lpf_state); + *lpf_freq = freq_range_lpf[lpf_band][ADMV8818_BAND_CORNER_HIGH] - + freq_range_lpf[lpf_band][ADMV8818_BAND_CORNER_LOW]; + *lpf_freq = div_u64(*lpf_freq, ADMV8818_NUM_STATES - 1); + *lpf_freq = freq_range_lpf[lpf_band][ADMV8818_BAND_CORNER_LOW] + + (*lpf_freq * lpf_state); return ret; } @@ -641,6 +710,32 @@ static int admv8818_clk_setup(struct admv8818_state *st) return devm_add_action_or_reset(&spi->dev, admv8818_clk_notifier_unreg, st); } +static int admv8818_read_properties(struct admv8818_state *st) +{ + struct spi_device *spi = st->spi; + u32 mhz; + int ret; + + ret = device_property_read_u32(&spi->dev, "adi,lpf-margin-mhz", &mhz); + if (ret == 0) + st->lpf_margin_hz = (u64)mhz * HZ_PER_MHZ; + else if (ret == -EINVAL) + st->lpf_margin_hz = 0; + else + return ret; + + + ret = device_property_read_u32(&spi->dev, "adi,hpf-margin-mhz", &mhz); + if (ret == 0) + st->hpf_margin_hz = (u64)mhz * HZ_PER_MHZ; + else if (ret == -EINVAL) + st->hpf_margin_hz = 0; + else if (ret < 0) + return ret; + + return 0; +} + static int admv8818_probe(struct spi_device *spi) { struct iio_dev *indio_dev; @@ -672,6 +767,10 @@ static int admv8818_probe(struct spi_device *spi) mutex_init(&st->lock); + ret = admv8818_read_properties(st); + if (ret) + return ret; + ret = admv8818_init(st); if (ret) return ret; From e13e663f884ada40128671f7e61dae2eb75fdb55 Mon Sep 17 00:00:00 2001 From: Brian Pellegrino Date: Fri, 28 Mar 2025 13:48:31 -0400 Subject: [PATCH 2578/2924] iio: filter: admv8818: Support frequencies >= 2^32 [ Upstream commit 9016776f1301627de78a633bda7c898425a56572 ] This patch allows writing u64 values to the ADMV8818's high and low-pass filter frequencies. It includes the following changes: - Rejects negative frequencies in admv8818_write_raw. - Adds a write_raw_get_fmt function to admv8818's iio_info, returning IIO_VAL_INT_64 for the high and low-pass filter 3dB frequency channels. Fixes: f34fe888ad05 ("iio:filter:admv8818: add support for ADMV8818") Signed-off-by: Brian Pellegrino Signed-off-by: Sam Winchenbach Link: https://patch.msgid.link/20250328174831.227202-7-sam.winchenbach@framepointer.org Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/filter/admv8818.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/iio/filter/admv8818.c b/drivers/iio/filter/admv8818.c index 380e119b3cf549..cc8ce0fe74e7c6 100644 --- a/drivers/iio/filter/admv8818.c +++ b/drivers/iio/filter/admv8818.c @@ -402,6 +402,19 @@ static int admv8818_read_lpf_freq(struct admv8818_state *st, u64 *lpf_freq) return ret; } +static int admv8818_write_raw_get_fmt(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + long mask) +{ + switch (mask) { + case IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY: + case IIO_CHAN_INFO_HIGH_PASS_FILTER_3DB_FREQUENCY: + return IIO_VAL_INT_64; + default: + return -EINVAL; + } +} + static int admv8818_write_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int val, int val2, long info) @@ -410,6 +423,9 @@ static int admv8818_write_raw(struct iio_dev *indio_dev, u64 freq = ((u64)val2 << 32 | (u32)val); + if ((s64)freq < 0) + return -EINVAL; + switch (info) { case IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY: return admv8818_lpf_select(st, freq); @@ -571,6 +587,7 @@ static int admv8818_set_mode(struct iio_dev *indio_dev, static const struct iio_info admv8818_info = { .write_raw = admv8818_write_raw, + .write_raw_get_fmt = admv8818_write_raw_get_fmt, .read_raw = admv8818_read_raw, .debugfs_reg_access = &admv8818_reg_access, }; From 7fc479fb5ad219306dad9190131188fb2dcdea00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 17 Mar 2025 12:52:47 +0100 Subject: [PATCH 2579/2924] iio: adc: ad7124: Fix 3dB filter frequency reading MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8712e4986e7ce42a14c762c4c350f290989986a5 ] The sinc4 filter has a factor 0.23 between Output Data Rate and f_{3dB} and for sinc3 the factor is 0.272 according to the data sheets for ad7124-4 (Rev. E.) and ad7124-8 (Rev. F). Fixes: cef2760954cf ("iio: adc: ad7124: add 3db filter") Signed-off-by: Uwe Kleine-König Reviewed-by: Marcelo Schmitt Link: https://patch.msgid.link/20250317115247.3735016-6-u.kleine-koenig@baylibre.com Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/ad7124.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c index 3ea81a98e45534..7d5d84a07cae1d 100644 --- a/drivers/iio/adc/ad7124.c +++ b/drivers/iio/adc/ad7124.c @@ -301,9 +301,9 @@ static int ad7124_get_3db_filter_freq(struct ad7124_state *st, switch (st->channels[channel].cfg.filter_type) { case AD7124_SINC3_FILTER: - return DIV_ROUND_CLOSEST(fadc * 230, 1000); + return DIV_ROUND_CLOSEST(fadc * 272, 1000); case AD7124_SINC4_FILTER: - return DIV_ROUND_CLOSEST(fadc * 262, 1000); + return DIV_ROUND_CLOSEST(fadc * 230, 1000); default: return -EINVAL; } From e3d530173b70514d4390a94f9f979acad689b70a Mon Sep 17 00:00:00 2001 From: Chenyuan Yang Date: Thu, 17 Apr 2025 14:50:32 -0500 Subject: [PATCH 2580/2924] usb: acpi: Prevent null pointer dereference in usb_acpi_add_usb4_devlink() [ Upstream commit 73fb0ec9436ae87bcae067ce35d6cdd72bade86c ] As demonstrated by the fix for update_port_device_state, commit 12783c0b9e2c ("usb: core: Prevent null pointer dereference in update_port_device_state"), usb_hub_to_struct_hub() can return NULL in certain scenarios, such as during hub driver unbind or teardown race conditions, even if the underlying usb_device structure exists. Plus, all other places that call usb_hub_to_struct_hub() in the same file do check for NULL return values. If usb_hub_to_struct_hub() returns NULL, the subsequent access to hub->ports[udev->portnum - 1] will cause a null pointer dereference. Signed-off-by: Chenyuan Yang Fixes: f1bfb4a6fed6 ("usb: acpi: add device link between tunneled USB3 device and USB4 Host Interface") Acked-by: Alan Stern Link: https://lore.kernel.org/r/20250417195032.1811338-1-chenyuan0y@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/core/usb-acpi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/core/usb-acpi.c b/drivers/usb/core/usb-acpi.c index 935c0efea0b640..ea1ce8beb0cbb2 100644 --- a/drivers/usb/core/usb-acpi.c +++ b/drivers/usb/core/usb-acpi.c @@ -165,6 +165,8 @@ static int usb_acpi_add_usb4_devlink(struct usb_device *udev) return 0; hub = usb_hub_to_struct_hub(udev->parent); + if (!hub) + return 0; port_dev = hub->ports[udev->portnum - 1]; struct fwnode_handle *nhi_fwnode __free(fwnode_handle) = From 30aea6b925ec8995094c8dd7e1e3d8d0234f3cfe Mon Sep 17 00:00:00 2001 From: WangYuli Date: Wed, 16 Apr 2025 11:45:48 +0800 Subject: [PATCH 2581/2924] MIPS: Loongson64: Add missing '#interrupt-cells' for loongson64c_ls7a MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6d223b8ffcd1593d032b71875def2daa71c53111 ] Similar to commit 98a9e2ac3755 ("MIPS: Loongson64: DTS: Fix msi node for ls7a"). Fix follow warnings: arch/mips/boot/dts/loongson/loongson64c_4core_ls7a.dts:28.31-36.4: Warning (interrupt_provider): /bus@10000000/msi-controller@2ff00000: Missing '#interrupt-cells' in interrupt provider arch/mips/boot/dts/loongson/loongson64c_4core_ls7a.dtb: Warning (interrupt_map): Failed prerequisite 'interrupt_provider' Fixes: 24af105962c8 ("MIPS: Loongson64: DeviceTree for LS7A PCH") Tested-by: WangYuli Signed-off-by: WangYuli Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Thomas Bogendoerfer Signed-off-by: Sasha Levin --- arch/mips/boot/dts/loongson/loongson64c_4core_ls7a.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/boot/dts/loongson/loongson64c_4core_ls7a.dts b/arch/mips/boot/dts/loongson/loongson64c_4core_ls7a.dts index c7ea4f1c0bb21f..6c277ab83d4b94 100644 --- a/arch/mips/boot/dts/loongson/loongson64c_4core_ls7a.dts +++ b/arch/mips/boot/dts/loongson/loongson64c_4core_ls7a.dts @@ -29,6 +29,7 @@ compatible = "loongson,pch-msi-1.0"; reg = <0 0x2ff00000 0 0x8>; interrupt-controller; + #interrupt-cells = <1>; msi-controller; loongson,msi-base-vec = <64>; loongson,msi-num-vecs = <64>; From bece2360488b24effcf1e33a09d95607c99d7a4f Mon Sep 17 00:00:00 2001 From: Junhao He Date: Wed, 18 Sep 2024 11:53:27 +0800 Subject: [PATCH 2582/2924] coresight: Fixes device's owner field for registered using coresight_init_driver() [ Upstream commit 9f52aecc952ddf307571517d5c91136c8c4e87c9 ] The coresight_init_driver() of the coresight-core module is called from the sub coresgiht device (such as tmc/stm/funnle/...) module. It calls amba_driver_register() and Platform_driver_register(), which are macro functions that use the coresight-core's module to initialize the caller's owner field. Therefore, when the sub coresight device calls coresight_init_driver(), an incorrect THIS_MODULE value is captured. The sub coesgiht modules can be removed while their callbacks are running, resulting in a general protection failure. Add module parameter to coresight_init_driver() so can be called with the module of the callback. Fixes: 075b7cd7ad7d ("coresight: Add helpers registering/removing both AMBA and platform drivers") Signed-off-by: Junhao He Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20240918035327.9710-1-hejunhao3@huawei.com Signed-off-by: Sasha Levin --- drivers/hwtracing/coresight/coresight-catu.c | 2 +- drivers/hwtracing/coresight/coresight-core.c | 6 +++--- drivers/hwtracing/coresight/coresight-cpu-debug.c | 3 ++- drivers/hwtracing/coresight/coresight-funnel.c | 3 ++- drivers/hwtracing/coresight/coresight-replicator.c | 3 ++- drivers/hwtracing/coresight/coresight-stm.c | 2 +- drivers/hwtracing/coresight/coresight-tmc-core.c | 2 +- drivers/hwtracing/coresight/coresight-tpiu.c | 2 +- include/linux/coresight.h | 2 +- 9 files changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-catu.c b/drivers/hwtracing/coresight/coresight-catu.c index fa170c966bc3be..96cb48b140afa8 100644 --- a/drivers/hwtracing/coresight/coresight-catu.c +++ b/drivers/hwtracing/coresight/coresight-catu.c @@ -702,7 +702,7 @@ static int __init catu_init(void) { int ret; - ret = coresight_init_driver("catu", &catu_driver, &catu_platform_driver); + ret = coresight_init_driver("catu", &catu_driver, &catu_platform_driver, THIS_MODULE); tmc_etr_set_catu_ops(&etr_catu_buf_ops); return ret; } diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c index fb43ef6a3b1f0d..dabec7073aedaa 100644 --- a/drivers/hwtracing/coresight/coresight-core.c +++ b/drivers/hwtracing/coresight/coresight-core.c @@ -1585,17 +1585,17 @@ module_init(coresight_init); module_exit(coresight_exit); int coresight_init_driver(const char *drv, struct amba_driver *amba_drv, - struct platform_driver *pdev_drv) + struct platform_driver *pdev_drv, struct module *owner) { int ret; - ret = amba_driver_register(amba_drv); + ret = __amba_driver_register(amba_drv, owner); if (ret) { pr_err("%s: error registering AMBA driver\n", drv); return ret; } - ret = platform_driver_register(pdev_drv); + ret = __platform_driver_register(pdev_drv, owner); if (!ret) return 0; diff --git a/drivers/hwtracing/coresight/coresight-cpu-debug.c b/drivers/hwtracing/coresight/coresight-cpu-debug.c index 342c3aaf414dd8..a871d997330b09 100644 --- a/drivers/hwtracing/coresight/coresight-cpu-debug.c +++ b/drivers/hwtracing/coresight/coresight-cpu-debug.c @@ -774,7 +774,8 @@ static struct platform_driver debug_platform_driver = { static int __init debug_init(void) { - return coresight_init_driver("debug", &debug_driver, &debug_platform_driver); + return coresight_init_driver("debug", &debug_driver, &debug_platform_driver, + THIS_MODULE); } static void __exit debug_exit(void) diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c index 0541712b2bcb69..124fc2e26cfb1a 100644 --- a/drivers/hwtracing/coresight/coresight-funnel.c +++ b/drivers/hwtracing/coresight/coresight-funnel.c @@ -433,7 +433,8 @@ static struct amba_driver dynamic_funnel_driver = { static int __init funnel_init(void) { - return coresight_init_driver("funnel", &dynamic_funnel_driver, &funnel_driver); + return coresight_init_driver("funnel", &dynamic_funnel_driver, &funnel_driver, + THIS_MODULE); } static void __exit funnel_exit(void) diff --git a/drivers/hwtracing/coresight/coresight-replicator.c b/drivers/hwtracing/coresight/coresight-replicator.c index ee7ee79f6cf775..572dcd2bac16d9 100644 --- a/drivers/hwtracing/coresight/coresight-replicator.c +++ b/drivers/hwtracing/coresight/coresight-replicator.c @@ -438,7 +438,8 @@ static struct amba_driver dynamic_replicator_driver = { static int __init replicator_init(void) { - return coresight_init_driver("replicator", &dynamic_replicator_driver, &replicator_driver); + return coresight_init_driver("replicator", &dynamic_replicator_driver, &replicator_driver, + THIS_MODULE); } static void __exit replicator_exit(void) diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c index 26f9339f38b938..527347e4d16c5d 100644 --- a/drivers/hwtracing/coresight/coresight-stm.c +++ b/drivers/hwtracing/coresight/coresight-stm.c @@ -1058,7 +1058,7 @@ static struct platform_driver stm_platform_driver = { static int __init stm_init(void) { - return coresight_init_driver("stm", &stm_driver, &stm_platform_driver); + return coresight_init_driver("stm", &stm_driver, &stm_platform_driver, THIS_MODULE); } static void __exit stm_exit(void) diff --git a/drivers/hwtracing/coresight/coresight-tmc-core.c b/drivers/hwtracing/coresight/coresight-tmc-core.c index a7814e8e657b21..455b1c9b15682c 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-core.c +++ b/drivers/hwtracing/coresight/coresight-tmc-core.c @@ -1060,7 +1060,7 @@ static struct platform_driver tmc_platform_driver = { static int __init tmc_init(void) { - return coresight_init_driver("tmc", &tmc_driver, &tmc_platform_driver); + return coresight_init_driver("tmc", &tmc_driver, &tmc_platform_driver, THIS_MODULE); } static void __exit tmc_exit(void) diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c index 97ef36f03ec207..3e015928842808 100644 --- a/drivers/hwtracing/coresight/coresight-tpiu.c +++ b/drivers/hwtracing/coresight/coresight-tpiu.c @@ -318,7 +318,7 @@ static struct platform_driver tpiu_platform_driver = { static int __init tpiu_init(void) { - return coresight_init_driver("tpiu", &tpiu_driver, &tpiu_platform_driver); + return coresight_init_driver("tpiu", &tpiu_driver, &tpiu_platform_driver, THIS_MODULE); } static void __exit tpiu_exit(void) diff --git a/include/linux/coresight.h b/include/linux/coresight.h index d79a242b271d6e..cfcf6e4707ed94 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -723,7 +723,7 @@ coresight_find_output_type(struct coresight_platform_data *pdata, union coresight_dev_subtype subtype); int coresight_init_driver(const char *drv, struct amba_driver *amba_drv, - struct platform_driver *pdev_drv); + struct platform_driver *pdev_drv, struct module *owner); void coresight_remove_driver(struct amba_driver *amba_drv, struct platform_driver *pdev_drv); From d5f1bd703e485ed60389e296b4e7fd40d7bdfcc9 Mon Sep 17 00:00:00 2001 From: Yabin Cui Date: Tue, 29 Apr 2025 16:12:59 -0700 Subject: [PATCH 2583/2924] coresight: catu: Introduce refcount and spinlock for enabling/disabling [ Upstream commit a03a0a08c6fe5e50c1b12ea41b9e228e7f649c22 ] When tracing ETM data on multiple CPUs concurrently via the perf interface, the CATU device is shared across different CPU paths. This can lead to race conditions when multiple CPUs attempt to enable or disable the CATU device simultaneously. To address these race conditions, this patch introduces the following changes: 1. The enable and disable operations for the CATU device are not reentrant. Therefore, a spinlock is added to ensure that only one CPU can enable or disable a given CATU device at any point in time. 2. A reference counter is used to manage the enable/disable state of the CATU device. The device is enabled when the first CPU requires it and is only disabled when the last CPU finishes using it. This ensures the device remains active as long as at least one CPU needs it. Fixes: fcacb5c154ba ("coresight: Introduce support for Coresight Address Translation Unit") Signed-off-by: Yabin Cui Reviewed-by: James Clark Reviewed-by: Leo Yan Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250429231301.1952246-2-yabinc@google.com Signed-off-by: Sasha Levin --- drivers/hwtracing/coresight/coresight-catu.c | 25 +++++++++++++------- drivers/hwtracing/coresight/coresight-catu.h | 1 + 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-catu.c b/drivers/hwtracing/coresight/coresight-catu.c index 96cb48b140afa8..d4e2e175e07700 100644 --- a/drivers/hwtracing/coresight/coresight-catu.c +++ b/drivers/hwtracing/coresight/coresight-catu.c @@ -458,12 +458,17 @@ static int catu_enable_hw(struct catu_drvdata *drvdata, enum cs_mode cs_mode, static int catu_enable(struct coresight_device *csdev, enum cs_mode mode, void *data) { - int rc; + int rc = 0; struct catu_drvdata *catu_drvdata = csdev_to_catu_drvdata(csdev); - CS_UNLOCK(catu_drvdata->base); - rc = catu_enable_hw(catu_drvdata, mode, data); - CS_LOCK(catu_drvdata->base); + guard(raw_spinlock_irqsave)(&catu_drvdata->spinlock); + if (csdev->refcnt == 0) { + CS_UNLOCK(catu_drvdata->base); + rc = catu_enable_hw(catu_drvdata, mode, data); + CS_LOCK(catu_drvdata->base); + } + if (!rc) + csdev->refcnt++; return rc; } @@ -486,12 +491,15 @@ static int catu_disable_hw(struct catu_drvdata *drvdata) static int catu_disable(struct coresight_device *csdev, void *__unused) { - int rc; + int rc = 0; struct catu_drvdata *catu_drvdata = csdev_to_catu_drvdata(csdev); - CS_UNLOCK(catu_drvdata->base); - rc = catu_disable_hw(catu_drvdata); - CS_LOCK(catu_drvdata->base); + guard(raw_spinlock_irqsave)(&catu_drvdata->spinlock); + if (--csdev->refcnt == 0) { + CS_UNLOCK(catu_drvdata->base); + rc = catu_disable_hw(catu_drvdata); + CS_LOCK(catu_drvdata->base); + } return rc; } @@ -550,6 +558,7 @@ static int __catu_probe(struct device *dev, struct resource *res) dev->platform_data = pdata; drvdata->base = base; + raw_spin_lock_init(&drvdata->spinlock); catu_desc.access = CSDEV_ACCESS_IOMEM(base); catu_desc.pdata = pdata; catu_desc.dev = dev; diff --git a/drivers/hwtracing/coresight/coresight-catu.h b/drivers/hwtracing/coresight/coresight-catu.h index 141feac1c14b08..755776cd19c5bb 100644 --- a/drivers/hwtracing/coresight/coresight-catu.h +++ b/drivers/hwtracing/coresight/coresight-catu.h @@ -65,6 +65,7 @@ struct catu_drvdata { void __iomem *base; struct coresight_device *csdev; int irq; + raw_spinlock_t spinlock; }; #define CATU_REG32(name, offset) \ From f4d006a65aa5a8c9952bc0e2a579981712947445 Mon Sep 17 00:00:00 2001 From: Yabin Cui Date: Tue, 29 Apr 2025 16:13:00 -0700 Subject: [PATCH 2584/2924] coresight: core: Disable helpers for devices that fail to enable [ Upstream commit f6028eeeb5e4cf86f93f805098c84974a79bba8a ] When enabling a SINK or LINK type coresight device fails, the associated helpers should be disabled. Fixes: 6148652807ba ("coresight: Enable and disable helper devices adjacent to the path") Signed-off-by: Yabin Cui Suggested-by: Suzuki K Poulose Reviewed-by: James Clark Reviewed-by: Mike Leach Reviewed-by: Leo Yan Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250429231301.1952246-3-yabinc@google.com Signed-off-by: Sasha Levin --- drivers/hwtracing/coresight/coresight-core.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c index dabec7073aedaa..d3523f0262af82 100644 --- a/drivers/hwtracing/coresight/coresight-core.c +++ b/drivers/hwtracing/coresight/coresight-core.c @@ -465,7 +465,7 @@ int coresight_enable_path(struct coresight_path *path, enum cs_mode mode, /* Enable all helpers adjacent to the path first */ ret = coresight_enable_helpers(csdev, mode, path); if (ret) - goto err; + goto err_disable_path; /* * ETF devices are tricky... They can be a link or a sink, * depending on how they are configured. If an ETF has been @@ -486,8 +486,10 @@ int coresight_enable_path(struct coresight_path *path, enum cs_mode mode, * that need disabling. Disabling the path here * would mean we could disrupt an existing session. */ - if (ret) + if (ret) { + coresight_disable_helpers(csdev, path); goto out; + } break; case CORESIGHT_DEV_TYPE_SOURCE: /* sources are enabled from either sysFS or Perf */ @@ -497,16 +499,19 @@ int coresight_enable_path(struct coresight_path *path, enum cs_mode mode, child = list_next_entry(nd, link)->csdev; ret = coresight_enable_link(csdev, parent, child, source); if (ret) - goto err; + goto err_disable_helpers; break; default: - goto err; + ret = -EINVAL; + goto err_disable_helpers; } } out: return ret; -err: +err_disable_helpers: + coresight_disable_helpers(csdev, path); +err_disable_path: coresight_disable_path_from(path, nd); goto out; } From 798d78a2cd73b52850e77ab668a5d3dc64ba040b Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Mon, 31 Mar 2025 18:36:40 +0200 Subject: [PATCH 2585/2924] counter: interrupt-cnt: Protect enable/disable OPs with mutex [ Upstream commit 7351312632e831e51383f48957d47712fae791ef ] Enable/disable seems to be racy on SMP, consider the following scenario: CPU0 CPU1 interrupt_cnt_enable_write(true) { if (priv->enabled == enable) return 0; if (enable) { priv->enabled = true; interrupt_cnt_enable_write(false) { if (priv->enabled == enable) return 0; if (enable) { priv->enabled = true; enable_irq(priv->irq); } else { disable_irq(priv->irq) priv->enabled = false; } enable_irq(priv->irq); } else { disable_irq(priv->irq); priv->enabled = false; } The above would result in priv->enabled == false, but IRQ left enabled. Protect both write (above race) and read (to propagate the value on SMP) callbacks with a mutex. Signed-off-by: Alexander Sverdlin Fixes: a55ebd47f21f ("counter: add IRQ or GPIO based counter") Acked-by: Oleksij Rempel Link: https://lore.kernel.org/r/20250331163642.2382651-1-alexander.sverdlin@siemens.com Signed-off-by: William Breathitt Gray Signed-off-by: Sasha Levin --- drivers/counter/interrupt-cnt.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/counter/interrupt-cnt.c b/drivers/counter/interrupt-cnt.c index 949598d51575a1..d83848d0fe2af5 100644 --- a/drivers/counter/interrupt-cnt.c +++ b/drivers/counter/interrupt-cnt.c @@ -3,12 +3,14 @@ * Copyright (c) 2021 Pengutronix, Oleksij Rempel */ +#include #include #include #include #include #include #include +#include #include #include @@ -19,6 +21,7 @@ struct interrupt_cnt_priv { struct gpio_desc *gpio; int irq; bool enabled; + struct mutex lock; struct counter_signal signals; struct counter_synapse synapses; struct counter_count cnts; @@ -41,6 +44,8 @@ static int interrupt_cnt_enable_read(struct counter_device *counter, { struct interrupt_cnt_priv *priv = counter_priv(counter); + guard(mutex)(&priv->lock); + *enable = priv->enabled; return 0; @@ -51,6 +56,8 @@ static int interrupt_cnt_enable_write(struct counter_device *counter, { struct interrupt_cnt_priv *priv = counter_priv(counter); + guard(mutex)(&priv->lock); + if (priv->enabled == enable) return 0; @@ -227,6 +234,8 @@ static int interrupt_cnt_probe(struct platform_device *pdev) if (ret) return ret; + mutex_init(&priv->lock); + ret = devm_counter_add(dev, counter); if (ret < 0) return dev_err_probe(dev, ret, "Failed to add counter\n"); From eb4c74eaa6e2d15f3bbd32941c9d2a25b29a718d Mon Sep 17 00:00:00 2001 From: Qasim Ijaz Date: Tue, 22 Apr 2025 16:37:37 +0100 Subject: [PATCH 2586/2924] fpga: fix potential null pointer deref in fpga_mgr_test_img_load_sgt() [ Upstream commit 6ebf1982038af12f3588417e4fd0417d2551da28 ] fpga_mgr_test_img_load_sgt() allocates memory for sgt using kunit_kzalloc() however it does not check if the allocation failed. It then passes sgt to sg_alloc_table(), which passes it to __sg_alloc_table(). This function calls memset() on sgt in an attempt to zero it out. If the allocation fails then sgt will be NULL and the memset will trigger a NULL pointer dereference. Fix this by checking the allocation with KUNIT_ASSERT_NOT_ERR_OR_NULL(). Reviewed-by: Marco Pagani Fixes: ccbc1c302115 ("fpga: add an initial KUnit suite for the FPGA Manager") Signed-off-by: Qasim Ijaz Acked-by: Xu Yilun Link: https://lore.kernel.org/r/20250422153737.5264-1-qasdev00@gmail.com Signed-off-by: Xu Yilun Signed-off-by: Sasha Levin --- drivers/fpga/tests/fpga-mgr-test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/fpga/tests/fpga-mgr-test.c b/drivers/fpga/tests/fpga-mgr-test.c index 8748babb050458..62975a39ee14e4 100644 --- a/drivers/fpga/tests/fpga-mgr-test.c +++ b/drivers/fpga/tests/fpga-mgr-test.c @@ -263,6 +263,7 @@ static void fpga_mgr_test_img_load_sgt(struct kunit *test) img_buf = init_test_buffer(test, IMAGE_SIZE); sgt = kunit_kzalloc(test, sizeof(*sgt), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sgt); ret = sg_alloc_table(sgt, 1, GFP_KERNEL); KUNIT_ASSERT_EQ(test, ret, 0); sg_init_one(sgt->sgl, img_buf, IMAGE_SIZE); From 967a479803b4165d998b0e3195158615b991ca3e Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Wed, 9 Apr 2025 11:16:54 +0200 Subject: [PATCH 2587/2924] iio: dac: adi-axi-dac: fix bus read MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 921fece3268c3bf2e8c20dd17ff9e5454fa16fda ] Fix bus read function. Testing the driver, on a random basis, wrong reads was detected, mainly by a wrong DAC chip ID read at first boot. Before reading the expected value from the AXI regmap, need always to wait for busy flag to be cleared. Fixes: e61d7178429a ("iio: dac: adi-axi-dac: extend features") Signed-off-by: Angelo Dureghello Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250409-ad3552r-fix-bus-read-v2-1-34d3b21e8ca0@baylibre.com Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/dac/adi-axi-dac.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/iio/dac/adi-axi-dac.c b/drivers/iio/dac/adi-axi-dac.c index 892d770aec69c4..05b374e137d35d 100644 --- a/drivers/iio/dac/adi-axi-dac.c +++ b/drivers/iio/dac/adi-axi-dac.c @@ -707,6 +707,7 @@ static int axi_dac_bus_reg_read(struct iio_backend *back, u32 reg, u32 *val, { struct axi_dac_state *st = iio_backend_get_priv(back); int ret; + u32 ival; guard(mutex)(&st->lock); @@ -719,6 +720,13 @@ static int axi_dac_bus_reg_read(struct iio_backend *back, u32 reg, u32 *val, if (ret) return ret; + ret = regmap_read_poll_timeout(st->regmap, + AXI_DAC_UI_STATUS_REG, ival, + FIELD_GET(AXI_DAC_UI_STATUS_IF_BUSY, ival) == 0, + 10, 100 * KILO); + if (ret) + return ret; + return regmap_read(st->regmap, AXI_DAC_CUSTOM_RD_REG, val); } From 6c3b9e1167d072ce2d01cafec7866647cf8d3616 Mon Sep 17 00:00:00 2001 From: Antoniu Miclaus Date: Fri, 9 May 2025 13:16:57 +0300 Subject: [PATCH 2588/2924] iio: adc: ad4851: fix ad4858 chan pointer handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 499a8cee812588905cc940837e69918c1649a19e ] The pointer returned from ad4851_parse_channels_common() is incremented internally as each channel is populated. In ad4858_parse_channels(), the same pointer was further incremented while setting ext_scan_type fields for each channel. This resulted in indio_dev->channels being set to a pointer past the end of the allocated array, potentially causing memory corruption or undefined behavior. Fix this by iterating over the channels using an explicit index instead of incrementing the pointer. This preserves the original base pointer and ensures all channel metadata is set correctly. Fixes: 6250803fe2ec ("iio: adc: ad4851: add ad485x driver") Signed-off-by: Antoniu Miclaus Reviewed-by: Nuno Sá Link: https://patch.msgid.link/20250509101657.6742-1-antoniu.miclaus@analog.com Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/ad4851.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/iio/adc/ad4851.c b/drivers/iio/adc/ad4851.c index 98ebc853db7962..f1d2e2896f2a2d 100644 --- a/drivers/iio/adc/ad4851.c +++ b/drivers/iio/adc/ad4851.c @@ -1034,7 +1034,7 @@ static int ad4858_parse_channels(struct iio_dev *indio_dev) struct device *dev = &st->spi->dev; struct iio_chan_spec *ad4851_channels; const struct iio_chan_spec ad4851_chan = AD4858_IIO_CHANNEL; - int ret; + int ret, i = 0; ret = ad4851_parse_channels_common(indio_dev, &ad4851_channels, ad4851_chan); @@ -1042,15 +1042,15 @@ static int ad4858_parse_channels(struct iio_dev *indio_dev) return ret; device_for_each_child_node_scoped(dev, child) { - ad4851_channels->has_ext_scan_type = 1; + ad4851_channels[i].has_ext_scan_type = 1; if (fwnode_property_read_bool(child, "bipolar")) { - ad4851_channels->ext_scan_type = ad4851_scan_type_20_b; - ad4851_channels->num_ext_scan_type = ARRAY_SIZE(ad4851_scan_type_20_b); + ad4851_channels[i].ext_scan_type = ad4851_scan_type_20_b; + ad4851_channels[i].num_ext_scan_type = ARRAY_SIZE(ad4851_scan_type_20_b); } else { - ad4851_channels->ext_scan_type = ad4851_scan_type_20_u; - ad4851_channels->num_ext_scan_type = ARRAY_SIZE(ad4851_scan_type_20_u); + ad4851_channels[i].ext_scan_type = ad4851_scan_type_20_u; + ad4851_channels[i].num_ext_scan_type = ARRAY_SIZE(ad4851_scan_type_20_u); } - ad4851_channels++; + i++; } indio_dev->channels = ad4851_channels; From f9809df09f74de1abe4c538f29e38a4f6a5735a3 Mon Sep 17 00:00:00 2001 From: Mao Jinlong Date: Tue, 6 May 2025 23:37:16 -0700 Subject: [PATCH 2589/2924] coresight: tmc: fix failure to disable/enable ETF after reading [ Upstream commit d23bc38e8aa4efbd617bf660bb1a25fee9f6c177 ] ETF may fail to re-enable after reading, and driver->reading will not be set to false, this will cause failure to enable/disable to ETF. This change set driver->reading to false even if re-enabling fail. Fixes: 669c4614236a ("coresight: tmc: Don't enable TMC when it's not ready.") Co-developed-by: Yuanfang Zhang Signed-off-by: Yuanfang Zhang Signed-off-by: Mao Jinlong [ Added a comment to explain why we ignore the error ] Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250507063716.1945213-1-quic_jinlmao@quicinc.com Signed-off-by: Sasha Levin --- drivers/hwtracing/coresight/coresight-tmc-etf.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index d858740001c27d..a922e3b709638d 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -747,7 +747,6 @@ int tmc_read_unprepare_etb(struct tmc_drvdata *drvdata) char *buf = NULL; enum tmc_mode mode; unsigned long flags; - int rc = 0; /* config types are set a boot time and never change */ if (WARN_ON_ONCE(drvdata->config_type != TMC_CONFIG_TYPE_ETB && @@ -773,11 +772,11 @@ int tmc_read_unprepare_etb(struct tmc_drvdata *drvdata) * can't be NULL. */ memset(drvdata->buf, 0, drvdata->size); - rc = __tmc_etb_enable_hw(drvdata); - if (rc) { - raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); - return rc; - } + /* + * Ignore failures to enable the TMC to make sure, we don't + * leave the TMC in a "reading" state. + */ + __tmc_etb_enable_hw(drvdata); } else { /* * The ETB/ETF is not tracing and the buffer was just read. From b22f48a3a7fd98248b6cb8a9c4a29cf6e0520261 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 19 May 2025 18:49:44 +0100 Subject: [PATCH 2590/2924] coresight: etm4x: Fix timestamp bit field handling [ Upstream commit ee811bc733be5c57a2bfecdf2f6f5d4db466200a ] Timestamps in the trace data appear as all zeros on recent kernels, although the feature works correctly on old kernels (e.g., v6.12). Since commit c382ee674c8b ("arm64/sysreg/tools: Move TRFCR definitions to sysreg"), the TRFCR_ELx_TS_{VIRTUAL|GUEST_PHYSICAL|PHYSICAL} macros were updated to remove the bit shift. As a result, the driver no longer shifts bits when operates the timestamp field. Fix this by using the FIELD_PREP() and FIELD_GET() helpers. Reported-by: Tamas Zsoldos Fixes: c382ee674c8b ("arm64/sysreg/tools: Move TRFCR definitions to sysreg") Signed-off-by: Leo Yan Reviewed-by: James Clark Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250519174945.2245271-2-leo.yan@arm.com Signed-off-by: Sasha Levin --- drivers/hwtracing/coresight/coresight-etm4x-core.c | 2 +- drivers/hwtracing/coresight/coresight-etm4x-sysfs.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index 2b8f1046384020..b42b03dba516d1 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -1176,7 +1176,7 @@ static void cpu_detect_trace_filtering(struct etmv4_drvdata *drvdata) * tracing at the kernel EL and EL0, forcing to use the * virtual time as the timestamp. */ - trfcr = (TRFCR_EL1_TS_VIRTUAL | + trfcr = (FIELD_PREP(TRFCR_EL1_TS_MASK, TRFCR_EL1_TS_VIRTUAL) | TRFCR_EL1_ExTRE | TRFCR_EL1_E0TRE); diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c index fdd0956fecb36d..c3ca904de584d7 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c @@ -2320,11 +2320,11 @@ static ssize_t ts_source_show(struct device *dev, goto out; } - switch (drvdata->trfcr & TRFCR_EL1_TS_MASK) { + val = FIELD_GET(TRFCR_EL1_TS_MASK, drvdata->trfcr); + switch (val) { case TRFCR_EL1_TS_VIRTUAL: case TRFCR_EL1_TS_GUEST_PHYSICAL: case TRFCR_EL1_TS_PHYSICAL: - val = FIELD_GET(TRFCR_EL1_TS_MASK, drvdata->trfcr); break; default: val = -1; From c7f2b00d5e17e3702c59cbe2070543aa1f8eb96e Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Wed, 14 May 2025 17:19:49 +0100 Subject: [PATCH 2591/2924] coresight/etm4: fix missing disable active config [ Upstream commit 895b12b7d7b8c651f73f57a1ea040d35aa7048cb ] When etm4 device is disabled via sysfs, it should disable its active count. Fixes: 7ebd0ec6cf94 ("coresight: configfs: Allow configfs to activate configuration") Signed-off-by: Yeoreum Yun Reviewed-by: Leo Yan Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250514161951.3427590-2-yeoreum.yun@arm.com Signed-off-by: Sasha Levin --- drivers/hwtracing/coresight/coresight-etm4x-core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index b42b03dba516d1..88ef381ee6dd9b 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -1020,6 +1020,9 @@ static void etm4_disable_sysfs(struct coresight_device *csdev) smp_call_function_single(drvdata->cpu, etm4_disable_hw, drvdata, 1); raw_spin_unlock(&drvdata->spinlock); + + cscfg_csdev_disable_active_config(csdev); + cpus_read_unlock(); /* From 42f8afb0b161631fd1d814d017f75f955475ad41 Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Wed, 14 May 2025 17:19:50 +0100 Subject: [PATCH 2592/2924] coresight: holding cscfg_csdev_lock while removing cscfg from csdev [ Upstream commit 53b9e2659719b04f5ba7593f2af0f2335f75e94a ] There'll be possible race scenario for coresight config: CPU0 CPU1 (perf enable) load module cscfg_load_config_sets() activate config. // sysfs (sys_active_cnt == 1) ... cscfg_csdev_enable_active_config() lock(csdev->cscfg_csdev_lock) deactivate config // sysfs (sys_activec_cnt == 0) cscfg_unload_config_sets() cscfg_remove_owned_csdev_configs() // here load config activate by CPU1 unlock(csdev->cscfg_csdev_lock) iterating config_csdev_list could be raced with config_csdev_list's entry delete. To resolve this race , hold csdev->cscfg_csdev_lock() while cscfg_remove_owned_csdev_configs() Fixes: 02bd588e12df ("coresight: configuration: Update API to permit dynamic load/unload") Signed-off-by: Yeoreum Yun Reviewed-by: Leo Yan Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250514161951.3427590-3-yeoreum.yun@arm.com Signed-off-by: Sasha Levin --- drivers/hwtracing/coresight/coresight-syscfg.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hwtracing/coresight/coresight-syscfg.c b/drivers/hwtracing/coresight/coresight-syscfg.c index a70c1454b4106c..23017612f2eae0 100644 --- a/drivers/hwtracing/coresight/coresight-syscfg.c +++ b/drivers/hwtracing/coresight/coresight-syscfg.c @@ -395,6 +395,8 @@ static void cscfg_remove_owned_csdev_configs(struct coresight_device *csdev, voi if (list_empty(&csdev->config_csdev_list)) return; + guard(raw_spinlock_irqsave)(&csdev->cscfg_csdev_lock); + list_for_each_entry_safe(config_csdev, tmp, &csdev->config_csdev_list, node) { if (config_csdev->config_desc->load_owner == load_owner) list_del(&config_csdev->node); From ed42ee1ed05ff2f4c36938379057413a40c56680 Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Wed, 14 May 2025 17:19:51 +0100 Subject: [PATCH 2593/2924] coresight: prevent deactivate active config while enabling the config [ Upstream commit 408c97c4a5e0b634dcd15bf8b8808b382e888164 ] While enable active config via cscfg_csdev_enable_active_config(), active config could be deactivated via configfs' sysfs interface. This could make UAF issue in below scenario: CPU0 CPU1 (sysfs enable) load module cscfg_load_config_sets() activate config. // sysfs (sys_active_cnt == 1) ... cscfg_csdev_enable_active_config() lock(csdev->cscfg_csdev_lock) // here load config activate by CPU1 unlock(csdev->cscfg_csdev_lock) deactivate config // sysfs (sys_activec_cnt == 0) cscfg_unload_config_sets() unload module // access to config_desc which freed // while unloading module. cscfg_csdev_enable_config To address this, use cscfg_config_desc's active_cnt as a reference count which will be holded when - activate the config. - enable the activated config. and put the module reference when config_active_cnt == 0. Fixes: f8cce2ff3c04 ("coresight: syscfg: Add API to activate and enable configurations") Suggested-by: Suzuki K Poulose Signed-off-by: Yeoreum Yun Reviewed-by: Leo Yan Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20250514161951.3427590-4-yeoreum.yun@arm.com Signed-off-by: Sasha Levin --- .../hwtracing/coresight/coresight-config.h | 2 +- .../hwtracing/coresight/coresight-syscfg.c | 49 +++++++++++++------ 2 files changed, 35 insertions(+), 16 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-config.h b/drivers/hwtracing/coresight/coresight-config.h index b9ebc9fcfb7f20..90fd937d3bd837 100644 --- a/drivers/hwtracing/coresight/coresight-config.h +++ b/drivers/hwtracing/coresight/coresight-config.h @@ -228,7 +228,7 @@ struct cscfg_feature_csdev { * @feats_csdev:references to the device features to enable. */ struct cscfg_config_csdev { - const struct cscfg_config_desc *config_desc; + struct cscfg_config_desc *config_desc; struct coresight_device *csdev; bool enabled; struct list_head node; diff --git a/drivers/hwtracing/coresight/coresight-syscfg.c b/drivers/hwtracing/coresight/coresight-syscfg.c index 23017612f2eae0..83dad24e0116d4 100644 --- a/drivers/hwtracing/coresight/coresight-syscfg.c +++ b/drivers/hwtracing/coresight/coresight-syscfg.c @@ -869,6 +869,25 @@ void cscfg_csdev_reset_feats(struct coresight_device *csdev) } EXPORT_SYMBOL_GPL(cscfg_csdev_reset_feats); +static bool cscfg_config_desc_get(struct cscfg_config_desc *config_desc) +{ + if (!atomic_fetch_inc(&config_desc->active_cnt)) { + /* must ensure that config cannot be unloaded in use */ + if (unlikely(cscfg_owner_get(config_desc->load_owner))) { + atomic_dec(&config_desc->active_cnt); + return false; + } + } + + return true; +} + +static void cscfg_config_desc_put(struct cscfg_config_desc *config_desc) +{ + if (!atomic_dec_return(&config_desc->active_cnt)) + cscfg_owner_put(config_desc->load_owner); +} + /* * This activate configuration for either perf or sysfs. Perf can have multiple * active configs, selected per event, sysfs is limited to one. @@ -892,22 +911,17 @@ static int _cscfg_activate_config(unsigned long cfg_hash) if (config_desc->available == false) return -EBUSY; - /* must ensure that config cannot be unloaded in use */ - err = cscfg_owner_get(config_desc->load_owner); - if (err) + if (!cscfg_config_desc_get(config_desc)) { + err = -EINVAL; break; + } + /* * increment the global active count - control changes to * active configurations */ atomic_inc(&cscfg_mgr->sys_active_cnt); - /* - * mark the descriptor as active so enable config on a - * device instance will use it - */ - atomic_inc(&config_desc->active_cnt); - err = 0; dev_dbg(cscfg_device(), "Activate config %s.\n", config_desc->name); break; @@ -922,9 +936,8 @@ static void _cscfg_deactivate_config(unsigned long cfg_hash) list_for_each_entry(config_desc, &cscfg_mgr->config_desc_list, item) { if ((unsigned long)config_desc->event_ea->var == cfg_hash) { - atomic_dec(&config_desc->active_cnt); atomic_dec(&cscfg_mgr->sys_active_cnt); - cscfg_owner_put(config_desc->load_owner); + cscfg_config_desc_put(config_desc); dev_dbg(cscfg_device(), "Deactivate config %s.\n", config_desc->name); break; } @@ -1049,7 +1062,7 @@ int cscfg_csdev_enable_active_config(struct coresight_device *csdev, unsigned long cfg_hash, int preset) { struct cscfg_config_csdev *config_csdev_active = NULL, *config_csdev_item; - const struct cscfg_config_desc *config_desc; + struct cscfg_config_desc *config_desc; unsigned long flags; int err = 0; @@ -1064,8 +1077,8 @@ int cscfg_csdev_enable_active_config(struct coresight_device *csdev, raw_spin_lock_irqsave(&csdev->cscfg_csdev_lock, flags); list_for_each_entry(config_csdev_item, &csdev->config_csdev_list, node) { config_desc = config_csdev_item->config_desc; - if ((atomic_read(&config_desc->active_cnt)) && - ((unsigned long)config_desc->event_ea->var == cfg_hash)) { + if (((unsigned long)config_desc->event_ea->var == cfg_hash) && + cscfg_config_desc_get(config_desc)) { config_csdev_active = config_csdev_item; csdev->active_cscfg_ctxt = (void *)config_csdev_active; break; @@ -1099,7 +1112,11 @@ int cscfg_csdev_enable_active_config(struct coresight_device *csdev, err = -EBUSY; raw_spin_unlock_irqrestore(&csdev->cscfg_csdev_lock, flags); } + + if (err) + cscfg_config_desc_put(config_desc); } + return err; } EXPORT_SYMBOL_GPL(cscfg_csdev_enable_active_config); @@ -1138,8 +1155,10 @@ void cscfg_csdev_disable_active_config(struct coresight_device *csdev) raw_spin_unlock_irqrestore(&csdev->cscfg_csdev_lock, flags); /* true if there was an enabled active config */ - if (config_csdev) + if (config_csdev) { cscfg_csdev_disable_config(config_csdev); + cscfg_config_desc_put(config_csdev->config_desc); + } } EXPORT_SYMBOL_GPL(cscfg_csdev_disable_active_config); From d7a3c36a955d06c31bc1359397be5a1c1a1e80e7 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 15 May 2025 11:30:52 -0400 Subject: [PATCH 2594/2924] vt: remove VT_RESIZE and VT_RESIZEX from vt_compat_ioctl() [ Upstream commit c4c7ead7b86c1e7f11c64915b7e5bb6d2e242691 ] They are listed amon those cmd values that "treat 'arg' as an integer" which is wrong. They should instead fall into the default case. Probably nobody ever relied on that code since 2009 but still. Fixes: e92166517e3c ("tty: handle VT specific compat ioctls in vt driver") Signed-off-by: Nicolas Pitre Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/pr214s15-36r8-6732-2pop-159nq85o48r7@syhkavp.arg Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/vt/vt_ioctl.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index 4b91072f3a4e91..1f2bdd2e1cc593 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -1103,8 +1103,6 @@ long vt_compat_ioctl(struct tty_struct *tty, case VT_WAITACTIVE: case VT_RELDISP: case VT_DISALLOCATE: - case VT_RESIZE: - case VT_RESIZEX: return vt_ioctl(tty, cmd, arg); /* From cb4b635efb78c5cdfab13b6037144aab901078d9 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Fri, 2 May 2025 09:21:48 +0200 Subject: [PATCH 2595/2924] staging: gpib: Fix PCMCIA config identifier [ Upstream commit 034a456869a071c635a9997e0bf3947a6cb20b25 ] The PCMCIA config identifier in the ines_exit_module function was never changed because it was misspelled in the original commit. Update the config parameter to use the correct identifier from gpib/Kconfig Fixes: bb1bd92fa0f2 ("staging: gpib: Add ines GPIB driver") Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250502072150.32714-2-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/staging/gpib/ines/ines_gpib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/gpib/ines/ines_gpib.c b/drivers/staging/gpib/ines/ines_gpib.c index d93eb05dab9038..8e2375d8ddac24 100644 --- a/drivers/staging/gpib/ines/ines_gpib.c +++ b/drivers/staging/gpib/ines/ines_gpib.c @@ -1484,7 +1484,7 @@ static void __exit ines_exit_module(void) gpib_unregister_driver(&ines_pci_unaccel_interface); gpib_unregister_driver(&ines_pci_accel_interface); gpib_unregister_driver(&ines_isa_interface); -#ifdef GPIB__PCMCIA +#ifdef CONFIG_GPIB_PCMCIA gpib_unregister_driver(&ines_pcmcia_interface); gpib_unregister_driver(&ines_pcmcia_unaccel_interface); gpib_unregister_driver(&ines_pcmcia_accel_interface); From 8c797936a763adaabdb199bbbec63017d238e2cf Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Tue, 20 May 2025 17:51:00 +0200 Subject: [PATCH 2596/2924] staging: gpib: Fix secondary address restriction [ Upstream commit 5aac95320d0f17f1098960e903ce5e087f42bc70 ] GPIB secondary addresses have valid values between 0 and 31 inclusive. The Make Secondary Address function MSA, used to form the protocol byte, was using the gpib_address_restrict function erroneously restricting the address range to 0 through 30. Remove the call to gpib_address_restrict and simply trim the address to 5 bits. Fixes: 2da03e7e31aa ("staging: gpib: Add user api include files") Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250520155100.5808-1-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/staging/gpib/uapi/gpib_user.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/gpib/uapi/gpib_user.h b/drivers/staging/gpib/uapi/gpib_user.h index 5ff4588686fde3..0fd32fb9e7a64d 100644 --- a/drivers/staging/gpib/uapi/gpib_user.h +++ b/drivers/staging/gpib/uapi/gpib_user.h @@ -178,7 +178,7 @@ static inline uint8_t MTA(unsigned int addr) static inline uint8_t MSA(unsigned int addr) { - return gpib_address_restrict(addr) | SAD; + return (addr & 0x1f) | SAD; } static inline uint8_t PPE_byte(unsigned int dio_line, int sense) From a687bcb724082af94fdae5c3387da1801ab3a4f5 Mon Sep 17 00:00:00 2001 From: Christian Schrefl Date: Sat, 17 May 2025 13:06:15 +0200 Subject: [PATCH 2597/2924] rust: miscdevice: fix typo in MiscDevice::ioctl documentation [ Upstream commit 81e9edc1a8d657291409d70d93361d8277d226d8 ] Fixes one small typo (`utilties` to `utilities`) in the documentation of `MiscDevice::ioctl`. Fixes: f893691e7426 ("rust: miscdevice: add base miscdevice abstraction") Signed-off-by: Christian Schrefl Reviewed-by: Benno Lossin Reviewed-by: Alice Ryhl Link: https://lore.kernel.org/r/20250517-rust_miscdevice_fix_typo-v1-1-8c30a6237ba9@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- rust/kernel/miscdevice.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs index fa9ecc42602a47..15d10e5c1db7da 100644 --- a/rust/kernel/miscdevice.rs +++ b/rust/kernel/miscdevice.rs @@ -121,7 +121,7 @@ pub trait MiscDevice: Sized { /// Handler for ioctls. /// - /// The `cmd` argument is usually manipulated using the utilties in [`kernel::ioctl`]. + /// The `cmd` argument is usually manipulated using the utilities in [`kernel::ioctl`]. /// /// [`kernel::ioctl`]: mod@crate::ioctl fn ioctl( From 35187d96bc03992121f94707749160090f9e6942 Mon Sep 17 00:00:00 2001 From: Roxana Nicolescu Date: Tue, 6 May 2025 11:00:07 +0000 Subject: [PATCH 2598/2924] misc: lis3lv02d: Fix correct sysfs directory path for lis3lv02d [ Upstream commit 7b386d7454b610534026b279aa150e5a9e584082 ] The lis3lv02d driver does not create a platform device anymore. It was recently changed to use a faux device instead. Therefore the sysfs path has changed from /sys/devices/platform/lis3lv02d to /sys/devices/faux/lis3lv02d. Fixes: 3b18ccb5472b ("misc: lis3lv02d: convert to use faux_device") Signed-off-by: Roxana Nicolescu Link: https://lore.kernel.org/r/20250506110002.36477-1-nicolescu.roxana@protonmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- Documentation/misc-devices/lis3lv02d.rst | 6 +++--- drivers/misc/lis3lv02d/Kconfig | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/misc-devices/lis3lv02d.rst b/Documentation/misc-devices/lis3lv02d.rst index 959bd2b822cfa9..6b3b7405ebdf6e 100644 --- a/Documentation/misc-devices/lis3lv02d.rst +++ b/Documentation/misc-devices/lis3lv02d.rst @@ -22,10 +22,10 @@ sporting the feature officially called "HP Mobile Data Protection System 3D" or models (full list can be found in drivers/platform/x86/hp_accel.c) will have their axis automatically oriented on standard way (eg: you can directly play neverball). The accelerometer data is readable via -/sys/devices/platform/lis3lv02d. Reported values are scaled +/sys/devices/faux/lis3lv02d. Reported values are scaled to mg values (1/1000th of earth gravity). -Sysfs attributes under /sys/devices/platform/lis3lv02d/: +Sysfs attributes under /sys/devices/faux/lis3lv02d/: position - 3D position that the accelerometer reports. Format: "(x,y,z)" @@ -85,7 +85,7 @@ the accelerometer are converted into a "standard" organisation of the axes If your laptop model is not recognized (cf "dmesg"), you can send an email to the maintainer to add it to the database. When reporting a new laptop, please include the output of "dmidecode" plus the value of -/sys/devices/platform/lis3lv02d/position in these four cases. +/sys/devices/faux/lis3lv02d/position in these four cases. Q&A --- diff --git a/drivers/misc/lis3lv02d/Kconfig b/drivers/misc/lis3lv02d/Kconfig index bb2fec4b5880bf..56005243a230d5 100644 --- a/drivers/misc/lis3lv02d/Kconfig +++ b/drivers/misc/lis3lv02d/Kconfig @@ -10,7 +10,7 @@ config SENSORS_LIS3_SPI help This driver provides support for the LIS3LV02Dx accelerometer connected via SPI. The accelerometer data is readable via - /sys/devices/platform/lis3lv02d. + /sys/devices/faux/lis3lv02d. This driver also provides an absolute input class device, allowing the laptop to act as a pinball machine-esque joystick. @@ -26,7 +26,7 @@ config SENSORS_LIS3_I2C help This driver provides support for the LIS3LV02Dx accelerometer connected via I2C. The accelerometer data is readable via - /sys/devices/platform/lis3lv02d. + /sys/devices/faux/lis3lv02d. This driver also provides an absolute input class device, allowing the device to act as a pinball machine-esque joystick. From d7e380af93ba9188f5bc118a17416d8c76c546c5 Mon Sep 17 00:00:00 2001 From: Roxana Nicolescu Date: Thu, 1 May 2025 20:05:00 +0000 Subject: [PATCH 2599/2924] char: tlclk: Fix correct sysfs directory path for tlclk [ Upstream commit 46a4d12a005c58317e89b5644774c683365dc2ca ] The tlckl driver does not create a platform device anymore. It was recently changed to use a faux device instead. Therefore the sysfs path has changed from /sys/devices/platform/telco_clock to /sys/devices/faux/telco_clock. Fixes: 72239a78f9f5 ("tlclk: convert to use faux_device") Signed-off-by: Roxana Nicolescu Link: https://lore.kernel.org/r/20250501200457.18506-1-nicolescu.roxana@protonmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/char/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 8fb33c90482f79..ae61967605563c 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -404,7 +404,7 @@ config TELCLOCK configuration of the telecom clock configuration settings. This device is used for hardware synchronization across the ATCA backplane fabric. Upon loading, the driver exports a sysfs directory, - /sys/devices/platform/telco_clock, with a number of files for + /sys/devices/faux/telco_clock, with a number of files for controlling the behavior of this hardware. source "drivers/s390/char/Kconfig" From 34627b25251cf6c081494de8148179bf223094c4 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 7 May 2025 11:07:28 +0200 Subject: [PATCH 2600/2924] mei: vsc: Cast tx_buf to (__be32 *) when passed to cpu_to_be32_array() [ Upstream commit 97ce0fe2b7240d47d9124daa92217e478c21a3ba ] Commit f88c0c72ffb0 ("mei: vsc: Use struct vsc_tp_packet as vsc-tp tx_buf and rx_buf type") changed the type of tx_buf from "void *" to "struct vsc_tp_packet *" and added a cast to (u32 *) when passing it to cpu_to_be32_array() and the same change was made for rx_buf. This triggers the type-check warning in sparse: vsc-tp.c:327:28: sparse: expected restricted __be32 [usertype] *dst vsc-tp.c:327:28: sparse: got unsigned int [usertype] * vsc-tp.c:343:42: sparse: expected restricted __be32 const [usertype] *src vsc-tp.c:343:42: sparse: got unsigned int [usertype] * Fix this by casting to (__be32 *) instead. Note actually changing the type of the buffers to "be32 *" is not an option this buffer does actually contain a "struct vsc_tp_packet" and is used as such most of the time. vsc_tp_rom_xfer() re-uses the buffers as just dumb arrays of 32 bit words to talk to the device before the firmware has booted, to avoid needing to allocate a separate buffer. Fixes: f88c0c72ffb0 ("mei: vsc: Use struct vsc_tp_packet as vsc-tp tx_buf and rx_buf type") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202505071634.kZ0I7Va6-lkp@intel.com/ Signed-off-by: Hans de Goede Reviewed-by: Sakari Ailus Link: https://lore.kernel.org/r/20250507090728.115910-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/misc/mei/vsc-tp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/mei/vsc-tp.c b/drivers/misc/mei/vsc-tp.c index da26a080916c54..267d0de5fade83 100644 --- a/drivers/misc/mei/vsc-tp.c +++ b/drivers/misc/mei/vsc-tp.c @@ -324,7 +324,7 @@ int vsc_tp_rom_xfer(struct vsc_tp *tp, const void *obuf, void *ibuf, size_t len) guard(mutex)(&tp->mutex); /* rom xfer is big endian */ - cpu_to_be32_array((u32 *)tp->tx_buf, obuf, words); + cpu_to_be32_array((__be32 *)tp->tx_buf, obuf, words); ret = read_poll_timeout(gpiod_get_value_cansleep, ret, !ret, VSC_TP_ROM_XFER_POLL_DELAY_US, @@ -340,7 +340,7 @@ int vsc_tp_rom_xfer(struct vsc_tp *tp, const void *obuf, void *ibuf, size_t len) return ret; if (ibuf) - be32_to_cpu_array(ibuf, (u32 *)tp->rx_buf, words); + be32_to_cpu_array(ibuf, (__be32 *)tp->rx_buf, words); return ret; } From aae3f30240c03593532e382e2ad02a3568a01fb3 Mon Sep 17 00:00:00 2001 From: Marius Cristea Date: Thu, 24 Apr 2025 11:06:33 +0300 Subject: [PATCH 2601/2924] iio: adc: PAC1934: fix typo in documentation link [ Upstream commit 52c43d80fa8370eb877fc63b1fc1eec67e1b1410 ] Fix a typo,(PAC1934 -> PAC193X), into the link from an application note related to the ACPI device definition. Fixes: 0fb528c8255b ("iio: adc: adding support for PAC193x") Reported-by: Matteo Martelli Closes: https://patch.msgid.link/172794015844.2520.11909797050797595912@njaxe.localdomain Signed-off-by: Marius Cristea Reviewed-by: David Lechner Link: https://patch.msgid.link/20250424-pac1934-doc_link-v1-1-9832445cb270@microchip.com Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/pac1934.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/pac1934.c b/drivers/iio/adc/pac1934.c index 20802b7f49ea84..09fe88eb3fb045 100644 --- a/drivers/iio/adc/pac1934.c +++ b/drivers/iio/adc/pac1934.c @@ -1081,7 +1081,7 @@ static int pac1934_chip_identify(struct pac1934_chip_info *info) /* * documentation related to the ACPI device definition - * https://ww1.microchip.com/downloads/aemDocuments/documents/OTH/ApplicationNotes/ApplicationNotes/PAC1934-Integration-Notes-for-Microsoft-Windows-10-and-Windows-11-Driver-Support-DS00002534.pdf + * https://ww1.microchip.com/downloads/aemDocuments/documents/OTH/ApplicationNotes/ApplicationNotes/PAC193X-Integration-Notes-for-Microsoft-Windows-10-and-Windows-11-Driver-Support-DS00002534.pdf */ static int pac1934_acpi_parse_channel_config(struct i2c_client *client, struct pac1934_chip_info *info) From 2ff84314be02bd8e00244cb40125625e3d1dc1af Mon Sep 17 00:00:00 2001 From: Marcus Folkesson Date: Mon, 28 Apr 2025 08:54:11 +0200 Subject: [PATCH 2602/2924] iio: adc: mcp3911: fix device dependent mappings for conversion result registers [ Upstream commit f62c49d8f32d6ce8871b01795498352775aa61db ] The conversion result registers differs between devices. Make sure the mapping is correct by using a device dependent .get_raw() callback function. Fixes: 732ad34260d3 ("iio: adc: mcp3911: add support for the whole MCP39xx family") Co-developed-by: Lukas Rauber Signed-off-by: Lukas Rauber Signed-off-by: Marcus Folkesson Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20250428-mcp3911-fixes-v2-1-406e39330c3d@gmail.com Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin --- drivers/iio/adc/mcp3911.c | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/drivers/iio/adc/mcp3911.c b/drivers/iio/adc/mcp3911.c index 6748b44d568db6..60a19c35807ab7 100644 --- a/drivers/iio/adc/mcp3911.c +++ b/drivers/iio/adc/mcp3911.c @@ -6,7 +6,7 @@ * Copyright (C) 2018 Kent Gustavsson */ #include -#include +#include #include #include #include @@ -79,6 +79,8 @@ #define MCP3910_CONFIG1_CLKEXT BIT(6) #define MCP3910_CONFIG1_VREFEXT BIT(7) +#define MCP3910_CHANNEL(ch) (MCP3911_REG_CHANNEL0 + (ch)) + #define MCP3910_REG_OFFCAL_CH0 0x0f #define MCP3910_OFFCAL(ch) (MCP3910_REG_OFFCAL_CH0 + (ch) * 6) @@ -110,6 +112,7 @@ struct mcp3911_chip_info { int (*get_offset)(struct mcp3911 *adc, int channel, int *val); int (*set_offset)(struct mcp3911 *adc, int channel, int val); int (*set_scale)(struct mcp3911 *adc, int channel, u32 val); + int (*get_raw)(struct mcp3911 *adc, int channel, int *val); }; struct mcp3911 { @@ -170,6 +173,18 @@ static int mcp3911_update(struct mcp3911 *adc, u8 reg, u32 mask, u32 val, u8 len return mcp3911_write(adc, reg, val, len); } +static int mcp3911_read_s24(struct mcp3911 *const adc, u8 const reg, s32 *const val) +{ + u32 uval; + int const ret = mcp3911_read(adc, reg, &uval, 3); + + if (ret) + return ret; + + *val = sign_extend32(uval, 23); + return ret; +} + static int mcp3910_enable_offset(struct mcp3911 *adc, bool enable) { unsigned int mask = MCP3910_CONFIG0_EN_OFFCAL; @@ -194,6 +209,11 @@ static int mcp3910_set_offset(struct mcp3911 *adc, int channel, int val) return adc->chip->enable_offset(adc, 1); } +static int mcp3910_get_raw(struct mcp3911 *adc, int channel, s32 *val) +{ + return mcp3911_read_s24(adc, MCP3910_CHANNEL(channel), val); +} + static int mcp3911_enable_offset(struct mcp3911 *adc, bool enable) { unsigned int mask = MCP3911_STATUSCOM_EN_OFFCAL; @@ -218,6 +238,11 @@ static int mcp3911_set_offset(struct mcp3911 *adc, int channel, int val) return adc->chip->enable_offset(adc, 1); } +static int mcp3911_get_raw(struct mcp3911 *adc, int channel, s32 *val) +{ + return mcp3911_read_s24(adc, MCP3911_CHANNEL(channel), val); +} + static int mcp3910_get_osr(struct mcp3911 *adc, u32 *val) { int ret; @@ -321,12 +346,9 @@ static int mcp3911_read_raw(struct iio_dev *indio_dev, guard(mutex)(&adc->lock); switch (mask) { case IIO_CHAN_INFO_RAW: - ret = mcp3911_read(adc, - MCP3911_CHANNEL(channel->channel), val, 3); + ret = adc->chip->get_raw(adc, channel->channel, val); if (ret) return ret; - - *val = sign_extend32(*val, 23); return IIO_VAL_INT; case IIO_CHAN_INFO_OFFSET: ret = adc->chip->get_offset(adc, channel->channel, val); @@ -799,6 +821,7 @@ static const struct mcp3911_chip_info mcp3911_chip_info[] = { .get_offset = mcp3910_get_offset, .set_offset = mcp3910_set_offset, .set_scale = mcp3910_set_scale, + .get_raw = mcp3910_get_raw, }, [MCP3911] = { .channels = mcp3911_channels, @@ -810,6 +833,7 @@ static const struct mcp3911_chip_info mcp3911_chip_info[] = { .get_offset = mcp3911_get_offset, .set_offset = mcp3911_set_offset, .set_scale = mcp3911_set_scale, + .get_raw = mcp3911_get_raw, }, [MCP3912] = { .channels = mcp3912_channels, @@ -821,6 +845,7 @@ static const struct mcp3911_chip_info mcp3911_chip_info[] = { .get_offset = mcp3910_get_offset, .set_offset = mcp3910_set_offset, .set_scale = mcp3910_set_scale, + .get_raw = mcp3910_get_raw, }, [MCP3913] = { .channels = mcp3913_channels, @@ -832,6 +857,7 @@ static const struct mcp3911_chip_info mcp3911_chip_info[] = { .get_offset = mcp3910_get_offset, .set_offset = mcp3910_set_offset, .set_scale = mcp3910_set_scale, + .get_raw = mcp3910_get_raw, }, [MCP3914] = { .channels = mcp3914_channels, @@ -843,6 +869,7 @@ static const struct mcp3911_chip_info mcp3911_chip_info[] = { .get_offset = mcp3910_get_offset, .set_offset = mcp3910_set_offset, .set_scale = mcp3910_set_scale, + .get_raw = mcp3910_get_raw, }, [MCP3918] = { .channels = mcp3918_channels, @@ -854,6 +881,7 @@ static const struct mcp3911_chip_info mcp3911_chip_info[] = { .get_offset = mcp3910_get_offset, .set_offset = mcp3910_set_offset, .set_scale = mcp3910_set_scale, + .get_raw = mcp3910_get_raw, }, [MCP3919] = { .channels = mcp3919_channels, @@ -865,6 +893,7 @@ static const struct mcp3911_chip_info mcp3911_chip_info[] = { .get_offset = mcp3910_get_offset, .set_offset = mcp3910_set_offset, .set_scale = mcp3910_set_scale, + .get_raw = mcp3910_get_raw, }, }; static const struct of_device_id mcp3911_dt_ids[] = { From 84dfe77602e81392ea07303673ea30fa7ac2fefd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 21 May 2025 15:41:40 +0200 Subject: [PATCH 2603/2924] USB: gadget: udc: fix const issue in gadget_match_driver() [ Upstream commit 5f5cc794fac605afd3bef8065e33096aeacf6257 ] gadget_match_driver() takes a const pointer, and then decides to cast it away into a non-const one, which is not a good thing to do overall. Fix this up by properly setting the pointers to be const to preserve that attribute. Fixes: d69d80484598 ("driver core: have match() callback in struct bus_type take a const *") Link: https://lore.kernel.org/r/2025052139-rash-unsaddle-7c5e@gregkh Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/gadget/udc/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 4b3d5075621aa0..d709e24c1fd422 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -1570,7 +1570,7 @@ static int gadget_match_driver(struct device *dev, const struct device_driver *d { struct usb_gadget *gadget = dev_to_usb_gadget(dev); struct usb_udc *udc = gadget->udc; - struct usb_gadget_driver *driver = container_of(drv, + const struct usb_gadget_driver *driver = container_of(drv, struct usb_gadget_driver, driver); /* If the driver specifies a udc_name, it must match the UDC's name */ From d32e4b0546b2b0570cdd67ec95750697cbb86e6e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 21 May 2025 15:35:24 +0200 Subject: [PATCH 2604/2924] USB: typec: fix const issue in typec_match() [ Upstream commit ae4432e01dd967a64f6670a152d91d5328032726 ] typec_match() takes a const pointer, and then decides to cast it away into a non-const one, which is not a good thing to do overall. Fix this up by properly setting the pointers to be const to preserve that attribute. Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/2025052126-scholar-stainless-ad55@gregkh Fixes: d69d80484598 ("driver core: have match() callback in struct bus_type take a const *") Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/typec/bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/bus.c b/drivers/usb/typec/bus.c index ae90688d23e400..a884cec9ab7e88 100644 --- a/drivers/usb/typec/bus.c +++ b/drivers/usb/typec/bus.c @@ -449,7 +449,7 @@ ATTRIBUTE_GROUPS(typec); static int typec_match(struct device *dev, const struct device_driver *driver) { - struct typec_altmode_driver *drv = to_altmode_driver(driver); + const struct typec_altmode_driver *drv = to_altmode_driver(driver); struct typec_altmode *altmode = to_typec_altmode(dev); const struct typec_device_id *id; From 966229c6bac9b4a35116fc03dfd235d260073a24 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 27 May 2025 23:34:05 +0800 Subject: [PATCH 2605/2924] loop: add file_start_write() and file_end_write() [ Upstream commit 39d86db34e41b96bd86f1955cd0ce6cd9c5fca4c ] file_start_write() and file_end_write() should be added around ->write_iter(). Recently we switch to ->write_iter() from vfs_iter_write(), and the implied file_start_write() and file_end_write() are lost. Also we never add them for dio code path, so add them back for covering both. Cc: Jeff Moyer Fixes: f2fed441c69b ("loop: stop using vfs_iter_{read,write} for buffered I/O") Fixes: bc07c10a3603 ("block: loop: support DIO & AIO") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250527153405.837216-1-ming.lei@redhat.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/loop.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index e2b1f377f58563..f8d136684109aa 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -308,11 +308,14 @@ static void lo_complete_rq(struct request *rq) static void lo_rw_aio_do_completion(struct loop_cmd *cmd) { struct request *rq = blk_mq_rq_from_pdu(cmd); + struct loop_device *lo = rq->q->queuedata; if (!atomic_dec_and_test(&cmd->ref)) return; kfree(cmd->bvec); cmd->bvec = NULL; + if (req_op(rq) == REQ_OP_WRITE) + file_end_write(lo->lo_backing_file); if (likely(!blk_should_fake_timeout(rq->q))) blk_mq_complete_request(rq); } @@ -387,9 +390,10 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, cmd->iocb.ki_flags = 0; } - if (rw == ITER_SOURCE) + if (rw == ITER_SOURCE) { + file_start_write(lo->lo_backing_file); ret = file->f_op->write_iter(&cmd->iocb, &iter); - else + } else ret = file->f_op->read_iter(&cmd->iocb, &iter); lo_rw_aio_do_completion(cmd); From 933f3eab1d489af8d734bff855b10d29dd5968a4 Mon Sep 17 00:00:00 2001 From: Nicolas Frattaroli Date: Tue, 27 May 2025 19:57:08 +0200 Subject: [PATCH 2606/2924] drm/connector: only call HDMI audio helper plugged cb if non-null [ Upstream commit be9b3f9a54101c19226c25ba7163d291183777a0 ] On driver remove, sound/soc/codecs/hdmi-codec.c calls the plugged_cb with NULL as the callback function and codec_dev, as seen in its hdmi_remove function. The HDMI audio helper then happily tries calling said null function pointer, and produces an Oops as a result. Fix this by only executing the callback if fn is non-null. This means the .plugged_cb and .plugged_cb_dev members still get appropriately cleared. Fixes: baf616647fe6 ("drm/connector: implement generic HDMI audio helpers") Signed-off-by: Nicolas Frattaroli Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250527-hdmi-audio-helper-remove-fix-v1-1-6cf77de364d8@collabora.com Signed-off-by: Dmitry Baryshkov Signed-off-by: Sasha Levin --- drivers/gpu/drm/display/drm_hdmi_audio_helper.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/display/drm_hdmi_audio_helper.c b/drivers/gpu/drm/display/drm_hdmi_audio_helper.c index 05afc9f0bdd6b6..ae8a0cf595fc6f 100644 --- a/drivers/gpu/drm/display/drm_hdmi_audio_helper.c +++ b/drivers/gpu/drm/display/drm_hdmi_audio_helper.c @@ -103,7 +103,8 @@ static int drm_connector_hdmi_audio_hook_plugged_cb(struct device *dev, connector->hdmi_audio.plugged_cb = fn; connector->hdmi_audio.plugged_cb_dev = codec_dev; - fn(codec_dev, connector->hdmi_audio.last_state); + if (fn) + fn(codec_dev, connector->hdmi_audio.last_state); mutex_unlock(&connector->hdmi_audio.lock); From e0930ed863233439f8276c81a5cbb463f06e6f25 Mon Sep 17 00:00:00 2001 From: Damon Ding Date: Sun, 2 Mar 2025 16:30:43 +0800 Subject: [PATCH 2607/2924] drm/bridge: analogix_dp: Remove the unnecessary calls to clk_disable_unprepare() during probing [ Upstream commit 6579a03e68ffa5feb2d2823dea16ca7466f6de16 ] With the commit f37952339cc2 ("drm/bridge: analogix_dp: handle clock via runtime PM"), the PM operations can help enable/disable the clock. The err_disable_clk label and clk_disable_unprepare() operations are no longer necessary because the analogix_dp_resume() will not be called during probing. Fixes: f37952339cc2 ("drm/bridge: analogix_dp: handle clock via runtime PM") Suggested-by: Douglas Anderson Reviewed-by: Douglas Anderson Signed-off-by: Damon Ding Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20250302083043.3197235-1-damon.ding@rock-chips.com Signed-off-by: Dmitry Baryshkov Signed-off-by: Sasha Levin --- .../gpu/drm/bridge/analogix/analogix_dp_core.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c index 071168aa0c3bda..533a6e17cf6747 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c @@ -1597,10 +1597,8 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) } dp->reg_base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(dp->reg_base)) { - ret = PTR_ERR(dp->reg_base); - goto err_disable_clk; - } + if (IS_ERR(dp->reg_base)) + return ERR_CAST(dp->reg_base); dp->force_hpd = of_property_read_bool(dev->of_node, "force-hpd"); @@ -1612,8 +1610,7 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) if (IS_ERR(dp->hpd_gpiod)) { dev_err(dev, "error getting HDP GPIO: %ld\n", PTR_ERR(dp->hpd_gpiod)); - ret = PTR_ERR(dp->hpd_gpiod); - goto err_disable_clk; + return ERR_CAST(dp->hpd_gpiod); } if (dp->hpd_gpiod) { @@ -1633,8 +1630,7 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) if (dp->irq == -ENXIO) { dev_err(&pdev->dev, "failed to get irq\n"); - ret = -ENODEV; - goto err_disable_clk; + return ERR_PTR(-ENODEV); } ret = devm_request_threaded_irq(&pdev->dev, dp->irq, @@ -1643,15 +1639,11 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) irq_flags, "analogix-dp", dp); if (ret) { dev_err(&pdev->dev, "failed to request irq\n"); - goto err_disable_clk; + return ERR_PTR(ret); } disable_irq(dp->irq); return dp; - -err_disable_clk: - clk_disable_unprepare(dp->clock); - return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(analogix_dp_probe); From 6bb773d15837692f6c1624c9a513601b38764a48 Mon Sep 17 00:00:00 2001 From: Karol Wachowski Date: Thu, 15 May 2025 11:41:24 +0200 Subject: [PATCH 2608/2924] accel/ivpu: Reorder Doorbell Unregister and Command Queue Destruction [ Upstream commit 4557cc834712eca4eae7adbd9f0a06bdd8f79c99 ] Refactor ivpu_cmdq_unregister() to ensure the doorbell is unregistered before destroying the command queue. The NPU firmware requires doorbells to be unregistered prior to command queue destruction. If doorbell remains registered when command queue destroy command is sent firmware will automatically unregister the doorbell, making subsequent unregister attempts no-operations (NOPs). Ensure compliance with firmware expectations by moving the doorbell unregister call ahead of the command queue destruction logic, thus preventing unnecessary NOP operation. Fixes: 465a3914b254 ("accel/ivpu: Add API for command queue create/destroy/submit") Signed-off-by: Karol Wachowski Reviewed-by: Jeff Hugo Signed-off-by: Jacek Lawrynowicz Link: https://lore.kernel.org/r/20250515094124.255141-1-jacek.lawrynowicz@linux.intel.com Signed-off-by: Sasha Levin --- drivers/accel/ivpu/ivpu_job.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index b28da35c30b675..1c8e283ad98542 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -247,6 +247,10 @@ static int ivpu_cmdq_unregister(struct ivpu_file_priv *file_priv, struct ivpu_cm if (!cmdq->db_id) return 0; + ret = ivpu_jsm_unregister_db(vdev, cmdq->db_id); + if (!ret) + ivpu_dbg(vdev, JOB, "DB %d unregistered\n", cmdq->db_id); + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { ret = ivpu_jsm_hws_destroy_cmdq(vdev, file_priv->ctx.id, cmdq->id); if (!ret) @@ -254,10 +258,6 @@ static int ivpu_cmdq_unregister(struct ivpu_file_priv *file_priv, struct ivpu_cm cmdq->id, file_priv->ctx.id); } - ret = ivpu_jsm_unregister_db(vdev, cmdq->db_id); - if (!ret) - ivpu_dbg(vdev, JOB, "DB %d unregistered\n", cmdq->db_id); - xa_erase(&file_priv->vdev->db_xa, cmdq->db_id); cmdq->db_id = 0; From 7b0a3da39e7d95e406417857109e54c2ac93351d Mon Sep 17 00:00:00 2001 From: Damon Ding Date: Mon, 10 Mar 2025 18:41:03 +0800 Subject: [PATCH 2609/2924] drm/bridge: analogix_dp: Remove CONFIG_PM related check in analogix_dp_bind()/analogix_dp_unbind() [ Upstream commit c71db051142a74b255cb61b84d8fedae3b70952f ] Remove the check related to CONFIG_PM in order to make the code more concise, as the CONFIG_PM should be a required option for many drivers. In addition, it is preferable to use devm_pm_runtime_enable() instead of manually invoking pm_runtime_enable() followed by pm_runtime_disable(). Suggested-by: Douglas Anderson Reviewed-by: Douglas Anderson Signed-off-by: Damon Ding Link: https://lore.kernel.org/r/20250310104114.2608063-3-damon.ding@rock-chips.com Signed-off-by: Dmitry Baryshkov Stable-dep-of: fd03f82a026c ("drm/bridge: analogix_dp: Fix clk-disable removal") Signed-off-by: Sasha Levin --- .../drm/bridge/analogix/analogix_dp_core.c | 30 ++++--------------- 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c index 533a6e17cf6747..b7a96f5bc00745 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c @@ -1688,15 +1688,11 @@ int analogix_dp_bind(struct analogix_dp_device *dp, struct drm_device *drm_dev) dp->drm_dev = drm_dev; dp->encoder = dp->plat_data->encoder; - if (IS_ENABLED(CONFIG_PM)) { - pm_runtime_use_autosuspend(dp->dev); - pm_runtime_set_autosuspend_delay(dp->dev, 100); - pm_runtime_enable(dp->dev); - } else { - ret = analogix_dp_resume(dp); - if (ret) - return ret; - } + pm_runtime_use_autosuspend(dp->dev); + pm_runtime_set_autosuspend_delay(dp->dev, 100); + ret = devm_pm_runtime_enable(dp->dev); + if (ret) + return ret; dp->aux.name = "DP-AUX"; dp->aux.transfer = analogix_dpaux_transfer; @@ -1706,7 +1702,7 @@ int analogix_dp_bind(struct analogix_dp_device *dp, struct drm_device *drm_dev) ret = drm_dp_aux_register(&dp->aux); if (ret) { DRM_ERROR("failed to register AUX (%d)\n", ret); - goto err_disable_pm_runtime; + return ret; } ret = analogix_dp_create_bridge(drm_dev, dp); @@ -1719,13 +1715,6 @@ int analogix_dp_bind(struct analogix_dp_device *dp, struct drm_device *drm_dev) err_unregister_aux: drm_dp_aux_unregister(&dp->aux); -err_disable_pm_runtime: - if (IS_ENABLED(CONFIG_PM)) { - pm_runtime_dont_use_autosuspend(dp->dev); - pm_runtime_disable(dp->dev); - } else { - analogix_dp_suspend(dp); - } return ret; } @@ -1742,13 +1731,6 @@ void analogix_dp_unbind(struct analogix_dp_device *dp) } drm_dp_aux_unregister(&dp->aux); - - if (IS_ENABLED(CONFIG_PM)) { - pm_runtime_dont_use_autosuspend(dp->dev); - pm_runtime_disable(dp->dev); - } else { - analogix_dp_suspend(dp); - } } EXPORT_SYMBOL_GPL(analogix_dp_unbind); From b841e7ddceb8205684d8067375fb3100d64b9ef4 Mon Sep 17 00:00:00 2001 From: Damon Ding Date: Mon, 10 Mar 2025 18:41:07 +0800 Subject: [PATCH 2610/2924] drm/bridge: analogix_dp: Add support to get panel from the DP AUX bus [ Upstream commit e5e9fa9f7aad4ad7eedb6359baea9193531bf4ac ] The main modification is moving the DP AUX initialization from function analogix_dp_bind() to analogix_dp_probe(). In order to get the EDID of eDP panel during probing, it is also needed to advance PM operations to ensure that eDP controller and phy are prepared for AUX transmission. Signed-off-by: Damon Ding Reviewed-by: Dmitry Baryshkov Reviewed-by: Douglas Anderson Link: https://lore.kernel.org/r/20250310104114.2608063-7-damon.ding@rock-chips.com Signed-off-by: Dmitry Baryshkov Stable-dep-of: fd03f82a026c ("drm/bridge: analogix_dp: Fix clk-disable removal") Signed-off-by: Sasha Levin --- .../drm/bridge/analogix/analogix_dp_core.c | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c index b7a96f5bc00745..28a5326e11b354 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c @@ -1643,6 +1643,17 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) } disable_irq(dp->irq); + dp->aux.name = "DP-AUX"; + dp->aux.transfer = analogix_dpaux_transfer; + dp->aux.dev = dp->dev; + drm_dp_aux_init(&dp->aux); + + pm_runtime_use_autosuspend(dp->dev); + pm_runtime_set_autosuspend_delay(dp->dev, 100); + ret = devm_pm_runtime_enable(dp->dev); + if (ret) + goto err_disable_clk; + return dp; } EXPORT_SYMBOL_GPL(analogix_dp_probe); @@ -1688,15 +1699,6 @@ int analogix_dp_bind(struct analogix_dp_device *dp, struct drm_device *drm_dev) dp->drm_dev = drm_dev; dp->encoder = dp->plat_data->encoder; - pm_runtime_use_autosuspend(dp->dev); - pm_runtime_set_autosuspend_delay(dp->dev, 100); - ret = devm_pm_runtime_enable(dp->dev); - if (ret) - return ret; - - dp->aux.name = "DP-AUX"; - dp->aux.transfer = analogix_dpaux_transfer; - dp->aux.dev = dp->dev; dp->aux.drm_dev = drm_dev; ret = drm_dp_aux_register(&dp->aux); From 2e826a8588e267e4468700ddd499ea9442e41f3c Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Wed, 28 May 2025 00:51:19 +0200 Subject: [PATCH 2611/2924] drm/bridge: analogix_dp: Fix clk-disable removal [ Upstream commit fd03f82a026cc03cb8051a8c6487c99f96c9029f ] Commit 6579a03e68ff ("drm/bridge: analogix_dp: Remove the unnecessary calls to clk_disable_unprepare() during probing") removed the mismatched clock_disable calls from analogix_dp_probe. But that patch was created and sent before commit e5e9fa9f7aad ("drm/bridge: analogix_dp: Add support to get panel from the DP AUX bus") was merged, so couldn't know about this change. So in the original patch the last change is if (ret) { dev_err(&pdev->dev, "failed to request irq\n"); - goto err_disable_clk; + return ERR_PTR(ret); } disable_irq(dp->irq); return dp; - -err_disable_clk: - clk_disable_unprepare(dp->clock); - return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(analogix_dp_probe); the analogix_dp_core.c actually now has the runtime-pm handling between disable_irq() and return do introducing another goto err_clk_disable there. So remove that one too and return an error pointer, to not create build breakage. Fixes: 6579a03e68ff ("drm/bridge: analogix_dp: Remove the unnecessary calls to clk_disable_unprepare() during probing") Signed-off-by: Heiko Stuebner Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250527225120.3361663-1-heiko@sntech.de Signed-off-by: Dmitry Baryshkov Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/analogix/analogix_dp_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c index 28a5326e11b354..5222b1e9f533d0 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c @@ -1652,7 +1652,7 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) pm_runtime_set_autosuspend_delay(dp->dev, 100); ret = devm_pm_runtime_enable(dp->dev); if (ret) - goto err_disable_clk; + return ERR_PTR(ret); return dp; } From 2350c3f81b79f0800b4adf7380161a1ca9f59f2e Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 21 May 2025 12:51:47 -0400 Subject: [PATCH 2612/2924] drm/xe: Make xe_gt_freq part of the Documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 55f8aa083604ce098c9d6a0911c6bcde15d03a80 ] The documentation was created with the creation of the component, however it has never been actually shown in the actual Documentation. While doing this, fixes the identation style, to avoid new warnings while building htmldocs. Fixes: bef52b5c7a19 ("drm/xe: Create a xe_gt_freq component for raw management and sysfs") Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250521165146.39616-3-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit af53f0fd99c3bbb3afd29f1612c9e88c5a92cc01) Signed-off-by: Thomas Hellström Signed-off-by: Sasha Levin --- Documentation/gpu/xe/index.rst | 1 + Documentation/gpu/xe/xe_gt_freq.rst | 14 ++++++++++++++ drivers/gpu/drm/xe/xe_gt_freq.c | 2 ++ 3 files changed, 17 insertions(+) create mode 100644 Documentation/gpu/xe/xe_gt_freq.rst diff --git a/Documentation/gpu/xe/index.rst b/Documentation/gpu/xe/index.rst index 92cfb25e64d327..b53a0cc7f66a36 100644 --- a/Documentation/gpu/xe/index.rst +++ b/Documentation/gpu/xe/index.rst @@ -16,6 +16,7 @@ DG2, etc is provided to prototype the driver. xe_migrate xe_cs xe_pm + xe_gt_freq xe_pcode xe_gt_mcr xe_wa diff --git a/Documentation/gpu/xe/xe_gt_freq.rst b/Documentation/gpu/xe/xe_gt_freq.rst new file mode 100644 index 00000000000000..c0811200e32755 --- /dev/null +++ b/Documentation/gpu/xe/xe_gt_freq.rst @@ -0,0 +1,14 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +========================== +Xe GT Frequency Management +========================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_gt_freq.c + :doc: Xe GT Frequency Management + +Internal API +============ + +.. kernel-doc:: drivers/gpu/drm/xe/xe_gt_freq.c + :internal: diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index 604bdc7c817360..985efbc6852867 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -32,6 +32,7 @@ * Xe's Freq provides a sysfs API for frequency management: * * device/tile#/gt#/freq0/_freq *read-only* files: + * * - act_freq: The actual resolved frequency decided by PCODE. * - cur_freq: The current one requested by GuC PC to the PCODE. * - rpn_freq: The Render Performance (RP) N level, which is the minimal one. @@ -39,6 +40,7 @@ * - rp0_freq: The Render Performance (RP) 0 level, which is the maximum one. * * device/tile#/gt#/freq0/_freq *read-write* files: + * * - min_freq: Min frequency request. * - max_freq: Max frequency request. * If max <= min, then freq_min becomes a fixed frequency request. From c57aa95bba918c7cd399f9830e1fedcadfb46b26 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 21 May 2025 12:51:48 -0400 Subject: [PATCH 2613/2924] drm/xe: Add missing documentation of rpa_freq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 40493d97b329f8185c0f04dc0ef2b9ffc58e7f3b ] While at it, already adjust the rpe_freq frequency, to highlight that both are calculated by PCODE at runtime. Fixes: c6aac2fa77a3 ("drm/xe: Introduce the RPa information") Cc: Vinay Belgaumkar Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Reviewed-by: Vinay Belgaumkar Link: https://lore.kernel.org/r/20250521165146.39616-4-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 39578fa40420fb11dbe4f42225a347e945d8fd0e) Signed-off-by: Thomas Hellström Signed-off-by: Sasha Levin --- drivers/gpu/drm/xe/xe_gt_freq.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index 985efbc6852867..552ac92496a408 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -36,7 +36,10 @@ * - act_freq: The actual resolved frequency decided by PCODE. * - cur_freq: The current one requested by GuC PC to the PCODE. * - rpn_freq: The Render Performance (RP) N level, which is the minimal one. + * - rpa_freq: The Render Performance (RP) A level, which is the achiveable one. + * Calculated by PCODE at runtime based on multiple running conditions * - rpe_freq: The Render Performance (RP) E level, which is the efficient one. + * Calculated by PCODE at runtime based on multiple running conditions * - rp0_freq: The Render Performance (RP) 0 level, which is the maximum one. * * device/tile#/gt#/freq0/_freq *read-write* files: From 798acdc241a270a3d959fc7ab2b377226d02a83f Mon Sep 17 00:00:00 2001 From: Tengteng Yang Date: Tue, 27 May 2025 11:04:19 +0800 Subject: [PATCH 2614/2924] Fix sock_exceed_buf_limit not being triggered in __sk_mem_raise_allocated [ Upstream commit 8542d6fac25c03b4bf36b2d762cfe60fda8491bb ] When a process under memory pressure is not part of any cgroup and the charged flag is false, trace_sock_exceed_buf_limit was not called as expected. This regression was introduced by commit 2def8ff3fdb6 ("sock: Code cleanup on __sk_mem_raise_allocated()"). The fix changes the default value of charged to true while preserving existing logic. Fixes: 2def8ff3fdb6 ("sock: Code cleanup on __sk_mem_raise_allocated()") Signed-off-by: Abel Wu Signed-off-by: Tengteng Yang Link: https://patch.msgid.link/20250527030419.67693-1-yangtengteng@bytedance.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/core/sock.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index e54449c9ab0bad..5034d0fbd4a427 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3234,16 +3234,16 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) { struct mem_cgroup *memcg = mem_cgroup_sockets_enabled ? sk->sk_memcg : NULL; struct proto *prot = sk->sk_prot; - bool charged = false; + bool charged = true; long allocated; sk_memory_allocated_add(sk, amt); allocated = sk_memory_allocated(sk); if (memcg) { - if (!mem_cgroup_charge_skmem(memcg, amt, gfp_memcg_charge())) + charged = mem_cgroup_charge_skmem(memcg, amt, gfp_memcg_charge()); + if (!charged) goto suppress_allocation; - charged = true; } /* Under limit. */ @@ -3328,7 +3328,7 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) sk_memory_allocated_sub(sk, amt); - if (charged) + if (memcg && charged) mem_cgroup_uncharge_skmem(memcg, amt); return 0; From 4ab8c0f8905c9c4d05e7f437e65a9a365573ff02 Mon Sep 17 00:00:00 2001 From: Dong Chenchen Date: Tue, 27 May 2025 19:41:52 +0800 Subject: [PATCH 2615/2924] page_pool: Fix use-after-free in page_pool_recycle_in_ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 271683bb2cf32e5126c592b5d5e6a756fa374fd9 ] syzbot reported a uaf in page_pool_recycle_in_ring: BUG: KASAN: slab-use-after-free in lock_release+0x151/0xa30 kernel/locking/lockdep.c:5862 Read of size 8 at addr ffff8880286045a0 by task syz.0.284/6943 CPU: 0 UID: 0 PID: 6943 Comm: syz.0.284 Not tainted 6.13.0-rc3-syzkaller-gdfa94ce54f41 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:378 [inline] print_report+0x169/0x550 mm/kasan/report.c:489 kasan_report+0x143/0x180 mm/kasan/report.c:602 lock_release+0x151/0xa30 kernel/locking/lockdep.c:5862 __raw_spin_unlock_bh include/linux/spinlock_api_smp.h:165 [inline] _raw_spin_unlock_bh+0x1b/0x40 kernel/locking/spinlock.c:210 spin_unlock_bh include/linux/spinlock.h:396 [inline] ptr_ring_produce_bh include/linux/ptr_ring.h:164 [inline] page_pool_recycle_in_ring net/core/page_pool.c:707 [inline] page_pool_put_unrefed_netmem+0x748/0xb00 net/core/page_pool.c:826 page_pool_put_netmem include/net/page_pool/helpers.h:323 [inline] page_pool_put_full_netmem include/net/page_pool/helpers.h:353 [inline] napi_pp_put_page+0x149/0x2b0 net/core/skbuff.c:1036 skb_pp_recycle net/core/skbuff.c:1047 [inline] skb_free_head net/core/skbuff.c:1094 [inline] skb_release_data+0x6c4/0x8a0 net/core/skbuff.c:1125 skb_release_all net/core/skbuff.c:1190 [inline] __kfree_skb net/core/skbuff.c:1204 [inline] sk_skb_reason_drop+0x1c9/0x380 net/core/skbuff.c:1242 kfree_skb_reason include/linux/skbuff.h:1263 [inline] __skb_queue_purge_reason include/linux/skbuff.h:3343 [inline] root cause is: page_pool_recycle_in_ring ptr_ring_produce spin_lock(&r->producer_lock); WRITE_ONCE(r->queue[r->producer++], ptr) //recycle last page to pool page_pool_release page_pool_scrub page_pool_empty_ring ptr_ring_consume page_pool_return_page //release all page __page_pool_destroy free_percpu(pool->recycle_stats); free(pool) //free spin_unlock(&r->producer_lock); //pool->ring uaf read recycle_stat_inc(pool, ring); page_pool can be free while page pool recycle the last page in ring. Add producer-lock barrier to page_pool_release to prevent the page pool from being free before all pages have been recycled. recycle_stat_inc() is empty when CONFIG_PAGE_POOL_STATS is not enabled, which will trigger Wempty-body build warning. Add definition for pool stat macro to fix warning. Suggested-by: Jakub Kicinski Link: https://lore.kernel.org/netdev/20250513083123.3514193-1-dongchenchen2@huawei.com Fixes: ff7d6b27f894 ("page_pool: refurbish version of page_pool code") Reported-by: syzbot+204a4382fcb3311f3858@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=204a4382fcb3311f3858 Signed-off-by: Dong Chenchen Reviewed-by: Toke Høiland-Jørgensen Reviewed-by: Mina Almasry Link: https://patch.msgid.link/20250527114152.3119109-1-dongchenchen2@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/core/page_pool.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 2b768486594185..2d9c51f480fb5f 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -153,9 +153,9 @@ u64 *page_pool_ethtool_stats_get(u64 *data, const void *stats) EXPORT_SYMBOL(page_pool_ethtool_stats_get); #else -#define alloc_stat_inc(pool, __stat) -#define recycle_stat_inc(pool, __stat) -#define recycle_stat_add(pool, __stat, val) +#define alloc_stat_inc(...) do { } while (0) +#define recycle_stat_inc(...) do { } while (0) +#define recycle_stat_add(...) do { } while (0) #endif static bool page_pool_producer_lock(struct page_pool *pool) @@ -741,19 +741,16 @@ void page_pool_return_page(struct page_pool *pool, netmem_ref netmem) static bool page_pool_recycle_in_ring(struct page_pool *pool, netmem_ref netmem) { - int ret; - /* BH protection not needed if current is softirq */ - if (in_softirq()) - ret = ptr_ring_produce(&pool->ring, (__force void *)netmem); - else - ret = ptr_ring_produce_bh(&pool->ring, (__force void *)netmem); + bool in_softirq, ret; - if (!ret) { + /* BH protection not needed if current is softirq */ + in_softirq = page_pool_producer_lock(pool); + ret = !__ptr_ring_produce(&pool->ring, (__force void *)netmem); + if (ret) recycle_stat_inc(pool, ring); - return true; - } + page_pool_producer_unlock(pool, in_softirq); - return false; + return ret; } /* Only allow direct recycling in special circumstances, into the @@ -1146,10 +1143,14 @@ static void page_pool_scrub(struct page_pool *pool) static int page_pool_release(struct page_pool *pool) { + bool in_softirq; int inflight; page_pool_scrub(pool); inflight = page_pool_inflight(pool, true); + /* Acquire producer lock to make sure producers have exited. */ + in_softirq = page_pool_producer_lock(pool); + page_pool_producer_unlock(pool, in_softirq); if (!inflight) __page_pool_destroy(pool); From 8501d814a674c68bfa49ea15be5a4fbd37acb498 Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Tue, 27 May 2025 13:56:23 +0200 Subject: [PATCH 2616/2924] net: stmmac: platform: guarantee uniqueness of bus_id [ Upstream commit eb7fd7aa35bfcc1e1fda4ecc42ccfcb526cdc780 ] bus_id is currently derived from the ethernetX alias. If one is missing for the device, 0 is used. If ethernet0 points to another stmmac device or if there are 2+ stmmac devices without an ethernet alias, then bus_id will be 0 for all of those. This is an issue because the bus_id is used to generate the mdio bus id (new_bus->id in drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c stmmac_mdio_register) and this needs to be unique. This allows to avoid needing to define ethernet aliases for devices with multiple stmmac controllers (such as the Rockchip RK3588) for multiple stmmac devices to probe properly. Obviously, the bus_id isn't guaranteed to be stable across reboots if no alias is set for the device but that is easily fixed by simply adding an alias if this is desired. Fixes: 25c83b5c2e82 ("dt:net:stmmac: Add support to dwmac version 3.610 and 3.710") Signed-off-by: Quentin Schulz Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/20250527-stmmac-mdio-bus_id-v2-1-a5ca78454e3c@cherry.de Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index c73eff6a56b87a..15205a47cafc27 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -430,6 +430,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) struct device_node *np = pdev->dev.of_node; struct plat_stmmacenet_data *plat; struct stmmac_dma_cfg *dma_cfg; + static int bus_id = -ENODEV; int phy_mode; void *ret; int rc; @@ -465,8 +466,14 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) of_property_read_u32(np, "max-speed", &plat->max_speed); plat->bus_id = of_alias_get_id(np, "ethernet"); - if (plat->bus_id < 0) - plat->bus_id = 0; + if (plat->bus_id < 0) { + if (bus_id < 0) + bus_id = of_alias_get_highest_id("ethernet"); + /* No ethernet alias found, init at -1 so first bus_id is 0 */ + if (bus_id < 0) + bus_id = -1; + plat->bus_id = ++bus_id; + } /* Default to phy auto-detection */ plat->phy_addr = -1; From 1850cf73f2717f059b38bef8bc3fe775d61ccf50 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Tue, 27 May 2025 06:08:16 -0700 Subject: [PATCH 2617/2924] gve: Fix RX_BUFFERS_POSTED stat to report per-queue fill_cnt [ Upstream commit f41a94aade120dc60322865f363cee7865f2df01 ] Previously, the RX_BUFFERS_POSTED stat incorrectly reported the fill_cnt from RX queue 0 for all queues, resulting in inaccurate per-queue statistics. Fix this by correctly indexing priv->rx[idx].fill_cnt for each RX queue. Fixes: 24aeb56f2d38 ("gve: Add Gvnic stats AQ command and ethtool show/set-priv-flags.") Signed-off-by: Alok Tiwari Link: https://patch.msgid.link/20250527130830.1812903-1-alok.a.tiwari@oracle.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/google/gve/gve_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index c3791cf23c876c..d561d45021a581 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -2153,7 +2153,7 @@ void gve_handle_report_stats(struct gve_priv *priv) }; stats[stats_idx++] = (struct stats) { .stat_name = cpu_to_be32(RX_BUFFERS_POSTED), - .value = cpu_to_be64(priv->rx[0].fill_cnt), + .value = cpu_to_be64(priv->rx[idx].fill_cnt), .queue_id = cpu_to_be32(idx), }; } From 9ff60e0d9974dccf24e89bcd3ee7933e538d929f Mon Sep 17 00:00:00 2001 From: Charalampos Mitrodimas Date: Tue, 27 May 2025 16:35:44 +0000 Subject: [PATCH 2618/2924] net: tipc: fix refcount warning in tipc_aead_encrypt [ Upstream commit f29ccaa07cf3d35990f4d25028cc55470d29372b ] syzbot reported a refcount warning [1] caused by calling get_net() on a network namespace that is being destroyed (refcount=0). This happens when a TIPC discovery timer fires during network namespace cleanup. The recently added get_net() call in commit e279024617134 ("net/tipc: fix slab-use-after-free Read in tipc_aead_encrypt_done") attempts to hold a reference to the network namespace. However, if the namespace is already being destroyed, its refcount might be zero, leading to the use-after-free warning. Replace get_net() with maybe_get_net(), which safely checks if the refcount is non-zero before incrementing it. If the namespace is being destroyed, return -ENODEV early, after releasing the bearer reference. [1]: https://lore.kernel.org/all/68342b55.a70a0220.253bc2.0091.GAE@google.com/T/#m12019cf9ae77e1954f666914640efa36d52704a2 Reported-by: syzbot+f0c4a4aba757549ae26c@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/68342b55.a70a0220.253bc2.0091.GAE@google.com/T/#m12019cf9ae77e1954f666914640efa36d52704a2 Fixes: e27902461713 ("net/tipc: fix slab-use-after-free Read in tipc_aead_encrypt_done") Signed-off-by: Charalampos Mitrodimas Reviewed-by: Tung Nguyen Link: https://patch.msgid.link/20250527-net-tipc-warning-v2-1-df3dc398a047@posteo.net Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/tipc/crypto.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index 8584893b478510..79f91b6ca8c847 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -818,7 +818,11 @@ static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb, } /* Get net to avoid freed tipc_crypto when delete namespace */ - get_net(aead->crypto->net); + if (!maybe_get_net(aead->crypto->net)) { + tipc_bearer_put(b); + rc = -ENODEV; + goto exit; + } /* Now, do encrypt */ rc = crypto_aead_encrypt(req); From 0918e3b62f8675a24ef0b9c250b600302252ba58 Mon Sep 17 00:00:00 2001 From: Yanqing Wang Date: Wed, 28 May 2025 15:53:51 +0800 Subject: [PATCH 2619/2924] driver: net: ethernet: mtk_star_emac: fix suspend/resume issue [ Upstream commit ba99c627aac85bc746fb4a6e2d79edb3ad100326 ] Identify the cause of the suspend/resume hang: netif_carrier_off() is called during link state changes and becomes stuck while executing linkwatch_work(). To resolve this issue, call netif_device_detach() during the Ethernet suspend process to temporarily detach the network device from the kernel and prevent the suspend/resume hang. Fixes: 8c7bd5a454ff ("net: ethernet: mtk-star-emac: new driver") Signed-off-by: Yanqing Wang Signed-off-by: Macpaul Lin Signed-off-by: Biao Huang Link: https://patch.msgid.link/20250528075351.593068-1-macpaul.lin@mediatek.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/mediatek/mtk_star_emac.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c index b175119a6a7da5..b83886a4112105 100644 --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c @@ -1463,6 +1463,8 @@ static __maybe_unused int mtk_star_suspend(struct device *dev) if (netif_running(ndev)) mtk_star_disable(ndev); + netif_device_detach(ndev); + clk_bulk_disable_unprepare(MTK_STAR_NCLKS, priv->clks); return 0; @@ -1487,6 +1489,8 @@ static __maybe_unused int mtk_star_resume(struct device *dev) clk_bulk_disable_unprepare(MTK_STAR_NCLKS, priv->clks); } + netif_device_attach(ndev); + return ret; } From b7516919625b473e977e4d4c0715620316912c57 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 28 May 2025 11:11:09 +0300 Subject: [PATCH 2620/2924] net/mlx4_en: Prevent potential integer overflow calculating Hz [ Upstream commit 54d34165b4f786d7fea8412a18fb4a54c1eab623 ] The "freq" variable is in terms of MHz and "max_val_cycles" is in terms of Hz. The fact that "max_val_cycles" is a u64 suggests that support for high frequency is intended but the "freq_khz * 1000" would overflow the u32 type if we went above 4GHz. Use unsigned long long type for the mutliplication to prevent that. Fixes: 31c128b66e5b ("net/mlx4_en: Choose time-stamping shift value according to HW frequency") Signed-off-by: Dan Carpenter Reviewed-by: Simon Horman Link: https://patch.msgid.link/aDbFHe19juIJKjsb@stanley.mountain Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx4/en_clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index cd754cd76bde1b..d73a2044dc2662 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -249,7 +249,7 @@ static const struct ptp_clock_info mlx4_en_ptp_clock_info = { static u32 freq_to_shift(u16 freq) { u32 freq_khz = freq * 1000; - u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC; + u64 max_val_cycles = freq_khz * 1000ULL * MLX4_EN_WRAP_AROUND_SEC; u64 max_val_cycles_rounded = 1ULL << fls64(max_val_cycles - 1); /* calculate max possible multiplier in order to fit in 64bit */ u64 max_mul = div64_u64(ULLONG_MAX, max_val_cycles_rounded); From 250154b79714595ecc1104ba357a11e51dea5b63 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Wed, 28 May 2025 11:36:19 +0200 Subject: [PATCH 2621/2924] net: lan966x: Make sure to insert the vlan tags also in host mode [ Upstream commit 27eab4c644236a9324084a70fe79e511cbd07393 ] When running these commands on DUT (and similar at the other end) ip link set dev eth0 up ip link add link eth0 name eth0.10 type vlan id 10 ip addr add 10.0.0.1/24 dev eth0.10 ip link set dev eth0.10 up ping 10.0.0.2 The ping will fail. The reason why is failing is because, the network interfaces for lan966x have a flag saying that the HW can insert the vlan tags into the frames(NETIF_F_HW_VLAN_CTAG_TX). Meaning that the frames that are transmitted don't have the vlan tag inside the skb data, but they have it inside the skb. We already get that vlan tag and put it in the IFH but the problem is that we don't configure the HW to rewrite the frame when the interface is in host mode. The fix consists in actually configuring the HW to insert the vlan tag if it is different than 0. Reviewed-by: Maxime Chevallier Fixes: 6d2c186afa5d ("net: lan966x: Add vlan support.") Signed-off-by: Horatiu Vultur Link: https://patch.msgid.link/20250528093619.3738998-1-horatiu.vultur@microchip.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../ethernet/microchip/lan966x/lan966x_main.c | 1 + .../ethernet/microchip/lan966x/lan966x_main.h | 1 + .../microchip/lan966x/lan966x_switchdev.c | 1 + .../ethernet/microchip/lan966x/lan966x_vlan.c | 21 +++++++++++++++++++ 4 files changed, 24 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 427bdc0e4908c6..7001584f1b7a62 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -879,6 +879,7 @@ static int lan966x_probe_port(struct lan966x *lan966x, u32 p, lan966x_vlan_port_set_vlan_aware(port, 0); lan966x_vlan_port_set_vid(port, HOST_PVID, false, false); lan966x_vlan_port_apply(port); + lan966x_vlan_port_rew_host(port); return 0; } diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h index 1f9df67f05044b..4f75f068836933 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h @@ -497,6 +497,7 @@ void lan966x_vlan_port_apply(struct lan966x_port *port); bool lan966x_vlan_cpu_member_cpu_vlan_mask(struct lan966x *lan966x, u16 vid); void lan966x_vlan_port_set_vlan_aware(struct lan966x_port *port, bool vlan_aware); +void lan966x_vlan_port_rew_host(struct lan966x_port *port); int lan966x_vlan_port_set_vid(struct lan966x_port *port, u16 vid, bool pvid, diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c index 1c88120eb291a2..bcb4db76b75cd5 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c @@ -297,6 +297,7 @@ static void lan966x_port_bridge_leave(struct lan966x_port *port, lan966x_vlan_port_set_vlan_aware(port, false); lan966x_vlan_port_set_vid(port, HOST_PVID, false, false); lan966x_vlan_port_apply(port); + lan966x_vlan_port_rew_host(port); } int lan966x_port_changeupper(struct net_device *dev, diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c index fa34a739c748e1..7da22520724ce2 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c @@ -149,6 +149,27 @@ void lan966x_vlan_port_set_vlan_aware(struct lan966x_port *port, port->vlan_aware = vlan_aware; } +/* When the interface is in host mode, the interface should not be vlan aware + * but it should insert all the tags that it gets from the network stack. + * The tags are not in the data of the frame but actually in the skb and the ifh + * is configured already to get this tag. So what we need to do is to update the + * rewriter to insert the vlan tag for all frames which have a vlan tag + * different than 0. + */ +void lan966x_vlan_port_rew_host(struct lan966x_port *port) +{ + struct lan966x *lan966x = port->lan966x; + u32 val; + + /* Tag all frames except when VID=0*/ + val = REW_TAG_CFG_TAG_CFG_SET(2); + + /* Update only some bits in the register */ + lan_rmw(val, + REW_TAG_CFG_TAG_CFG, + lan966x, REW_TAG_CFG(port->chip_port)); +} + void lan966x_vlan_port_apply(struct lan966x_port *port) { struct lan966x *lan966x = port->lan966x; From 4e20c86cc7b436da97ddafc4de910d8f1191c68c Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 27 May 2025 16:14:07 +0800 Subject: [PATCH 2622/2924] md/raid1,raid10: don't handle IO error for REQ_RAHEAD and REQ_NOWAIT [ Upstream commit 9f346f7d4ea73692b82f5102ca8698e4040469ea ] IO with REQ_RAHEAD or REQ_NOWAIT can fail early, even if the storage medium is fine, hence record badblocks or remove the disk from array does not make sense. This problem if found by lvm2 test lvcreate-large-raid, where dm-zero will fail read ahead IO directly. Fixes: e879a0d9cb08 ("md/raid1,raid10: don't ignore IO flags") Reported-and-tested-by: Mikulas Patocka Closes: https://lore.kernel.org/all/34fa755d-62c8-4588-8ee1-33cb1249bdf2@redhat.com/ Link: https://lore.kernel.org/linux-raid/20250527081407.3004055-1-yukuai1@huaweicloud.com Signed-off-by: Yu Kuai Signed-off-by: Sasha Levin --- drivers/md/raid1-10.c | 10 ++++++++++ drivers/md/raid1.c | 19 ++++++++++--------- drivers/md/raid10.c | 11 ++++++----- 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c index c7efd8aab675cc..b8b3a90697012c 100644 --- a/drivers/md/raid1-10.c +++ b/drivers/md/raid1-10.c @@ -293,3 +293,13 @@ static inline bool raid1_should_read_first(struct mddev *mddev, return false; } + +/* + * bio with REQ_RAHEAD or REQ_NOWAIT can fail at anytime, before such IO is + * submitted to the underlying disks, hence don't record badblocks or retry + * in this case. + */ +static inline bool raid1_should_handle_error(struct bio *bio) +{ + return !(bio->bi_opf & (REQ_RAHEAD | REQ_NOWAIT)); +} diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index de9bccbe7337b5..1fe645e6300121 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -373,14 +373,16 @@ static void raid1_end_read_request(struct bio *bio) */ update_head_pos(r1_bio->read_disk, r1_bio); - if (uptodate) + if (uptodate) { set_bit(R1BIO_Uptodate, &r1_bio->state); - else if (test_bit(FailFast, &rdev->flags) && - test_bit(R1BIO_FailFast, &r1_bio->state)) + } else if (test_bit(FailFast, &rdev->flags) && + test_bit(R1BIO_FailFast, &r1_bio->state)) { /* This was a fail-fast read so we definitely * want to retry */ ; - else { + } else if (!raid1_should_handle_error(bio)) { + uptodate = 1; + } else { /* If all other devices have failed, we want to return * the error upwards rather than fail the last device. * Here we redefine "uptodate" to mean "Don't want to retry" @@ -451,16 +453,15 @@ static void raid1_end_write_request(struct bio *bio) struct bio *to_put = NULL; int mirror = find_bio_disk(r1_bio, bio); struct md_rdev *rdev = conf->mirrors[mirror].rdev; - bool discard_error; sector_t lo = r1_bio->sector; sector_t hi = r1_bio->sector + r1_bio->sectors; - - discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD; + bool ignore_error = !raid1_should_handle_error(bio) || + (bio->bi_status && bio_op(bio) == REQ_OP_DISCARD); /* * 'one mirror IO has finished' event handler: */ - if (bio->bi_status && !discard_error) { + if (bio->bi_status && !ignore_error) { set_bit(WriteErrorSeen, &rdev->flags); if (!test_and_set_bit(WantReplacement, &rdev->flags)) set_bit(MD_RECOVERY_NEEDED, & @@ -511,7 +512,7 @@ static void raid1_end_write_request(struct bio *bio) /* Maybe we can clear some bad blocks. */ if (rdev_has_badblock(rdev, r1_bio->sector, r1_bio->sectors) && - !discard_error) { + !ignore_error) { r1_bio->bios[mirror] = IO_MADE_GOOD; set_bit(R1BIO_MadeGood, &r1_bio->state); } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index ba32bac975b8d6..54320a887ecc50 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -399,6 +399,8 @@ static void raid10_end_read_request(struct bio *bio) * wait for the 'master' bio. */ set_bit(R10BIO_Uptodate, &r10_bio->state); + } else if (!raid1_should_handle_error(bio)) { + uptodate = 1; } else { /* If all other devices that store this block have * failed, we want to return the error upwards rather @@ -456,9 +458,8 @@ static void raid10_end_write_request(struct bio *bio) int slot, repl; struct md_rdev *rdev = NULL; struct bio *to_put = NULL; - bool discard_error; - - discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD; + bool ignore_error = !raid1_should_handle_error(bio) || + (bio->bi_status && bio_op(bio) == REQ_OP_DISCARD); dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl); @@ -472,7 +473,7 @@ static void raid10_end_write_request(struct bio *bio) /* * this branch is our 'one mirror IO has finished' event handler: */ - if (bio->bi_status && !discard_error) { + if (bio->bi_status && !ignore_error) { if (repl) /* Never record new bad blocks to replacement, * just fail it. @@ -527,7 +528,7 @@ static void raid10_end_write_request(struct bio *bio) /* Maybe we can clear some bad blocks. */ if (rdev_has_badblock(rdev, r10_bio->devs[slot].addr, r10_bio->sectors) && - !discard_error) { + !ignore_error) { bio_put(bio); if (repl) r10_bio->devs[slot].repl_bio = IO_MADE_GOOD; From 1b9e202b5102fe65caabc90f3a585e4058cef90a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Fern=C3=A1ndez=20Rojas?= Date: Thu, 29 May 2025 15:09:14 +0200 Subject: [PATCH 2623/2924] spi: bcm63xx-spi: fix shared reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5ad20e3d8cfe3b2e42bbddc7e0ebaa74479bb589 ] Some bmips SoCs (bcm6362, bcm63268) share the same SPI reset for both SPI and HSSPI controllers, so reset shouldn't be exclusive. Fixes: 38807adeaf1e ("spi: bcm63xx-spi: add reset support") Reported-by: Jonas Gorski Signed-off-by: Álvaro Fernández Rojas Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250529130915.2519590-2-noltari@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-bcm63xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-bcm63xx.c b/drivers/spi/spi-bcm63xx.c index c8f64ec69344af..b56210734caafc 100644 --- a/drivers/spi/spi-bcm63xx.c +++ b/drivers/spi/spi-bcm63xx.c @@ -523,7 +523,7 @@ static int bcm63xx_spi_probe(struct platform_device *pdev) return PTR_ERR(clk); } - reset = devm_reset_control_get_optional_exclusive(dev, NULL); + reset = devm_reset_control_get_optional_shared(dev, NULL); if (IS_ERR(reset)) return PTR_ERR(reset); From 08f450057328e3c5f86daf574c1b70999b3774e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Fern=C3=A1ndez=20Rojas?= Date: Thu, 29 May 2025 15:09:15 +0200 Subject: [PATCH 2624/2924] spi: bcm63xx-hsspi: fix shared reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3d6d84c8f2f66d3fd6a43a1e2ce8e6b54c573960 ] Some bmips SoCs (bcm6362, bcm63268) share the same SPI reset for both SPI and HSSPI controllers, so reset shouldn't be exclusive. Fixes: 0eeadddbf09a ("spi: bcm63xx-hsspi: add reset support") Reported-by: Jonas Gorski Signed-off-by: Álvaro Fernández Rojas Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250529130915.2519590-3-noltari@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-bcm63xx-hsspi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-bcm63xx-hsspi.c b/drivers/spi/spi-bcm63xx-hsspi.c index 644b44d2aef24e..18261cbd413b49 100644 --- a/drivers/spi/spi-bcm63xx-hsspi.c +++ b/drivers/spi/spi-bcm63xx-hsspi.c @@ -745,7 +745,7 @@ static int bcm63xx_hsspi_probe(struct platform_device *pdev) if (IS_ERR(clk)) return PTR_ERR(clk); - reset = devm_reset_control_get_optional_exclusive(dev, NULL); + reset = devm_reset_control_get_optional_shared(dev, NULL); if (IS_ERR(reset)) return PTR_ERR(reset); From 9eeafd16d76a7642d12b3442a26c15cd345e12f7 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Thu, 22 May 2025 21:16:02 +0300 Subject: [PATCH 2625/2924] Bluetooth: MGMT: reject malformed HCI_CMD_SYNC commands [ Upstream commit 03f1700b9b4d4f2fed3165370f3c23db76553178 ] In 'mgmt_hci_cmd_sync()', check whether the size of parameters passed in 'struct mgmt_cp_hci_cmd_sync' matches the total size of the data (i.e. 'sizeof(struct mgmt_cp_hci_cmd_sync)' plus trailing bytes). Otherwise, large invalid 'params_len' will cause 'hci_cmd_sync_alloc()' to do 'skb_put_data()' from an area beyond the one actually passed to 'mgmt_hci_cmd_sync()'. Reported-by: syzbot+5fe2d5bfbfbec0b675a0@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=5fe2d5bfbfbec0b675a0 Fixes: 827af4787e74 ("Bluetooth: MGMT: Add initial implementation of MGMT_OP_HCI_CMD_SYNC") Signed-off-by: Dmitry Antipov Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/mgmt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 261926dccc7e8e..14a9462fced5e9 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2566,7 +2566,8 @@ static int mgmt_hci_cmd_sync(struct sock *sk, struct hci_dev *hdev, struct mgmt_pending_cmd *cmd; int err; - if (len < sizeof(*cp)) + if (len != (offsetof(struct mgmt_cp_hci_cmd_sync, params) + + le16_to_cpu(cp->params_len))) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_HCI_CMD_SYNC, MGMT_STATUS_INVALID_PARAMS); From b21d6998a618f53e576dd2eb404021495703ad45 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 28 May 2025 14:53:11 -0400 Subject: [PATCH 2626/2924] Bluetooth: L2CAP: Fix not responding with L2CAP_CR_LE_ENCRYPTION [ Upstream commit 03dba9cea72f977e873e4e60e220fa596959dd8f ] Depending on the security set the response to L2CAP_LE_CONN_REQ shall be just L2CAP_CR_LE_ENCRYPTION if only encryption when BT_SECURITY_MEDIUM is selected since that means security mode 2 which doesn't require authentication which is something that is covered in the qualification test L2CAP/LE/CFC/BV-25-C. Link: https://github.com/bluez/bluez/issues/1270 Fixes: 27e2d4c8d28b ("Bluetooth: Add basic LE L2CAP connect request receiving support") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/l2cap_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 042d3ac3b4a38e..a5bde5db58efcb 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4870,7 +4870,8 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn, if (!smp_sufficient_security(conn->hcon, pchan->sec_level, SMP_ALLOW_STK)) { - result = L2CAP_CR_LE_AUTHENTICATION; + result = pchan->sec_level == BT_SECURITY_MEDIUM ? + L2CAP_CR_LE_ENCRYPTION : L2CAP_CR_LE_AUTHENTICATION; chan = NULL; goto response_unlock; } From 0e061abaad1498c5b76c10c594d4359ceb6b9145 Mon Sep 17 00:00:00 2001 From: Michal Kubiak Date: Tue, 13 May 2025 12:55:27 +0200 Subject: [PATCH 2627/2924] ice: fix Tx scheduler error handling in XDP callback [ Upstream commit 0153f36041b8e52019ebfa8629c13bf8f9b0a951 ] When the XDP program is loaded, the XDP callback adds new Tx queues. This means that the callback must update the Tx scheduler with the new queue number. In the event of a Tx scheduler failure, the XDP callback should also fail and roll back any changes previously made for XDP preparation. The previous implementation had a bug that not all changes made by the XDP callback were rolled back. This caused the crash with the following call trace: [ +9.549584] ice 0000:ca:00.0: Failed VSI LAN queue config for XDP, error: -5 [ +0.382335] Oops: general protection fault, probably for non-canonical address 0x50a2250a90495525: 0000 [#1] SMP NOPTI [ +0.010710] CPU: 103 UID: 0 PID: 0 Comm: swapper/103 Not tainted 6.14.0-net-next-mar-31+ #14 PREEMPT(voluntary) [ +0.010175] Hardware name: Intel Corporation M50CYP2SBSTD/M50CYP2SBSTD, BIOS SE5C620.86B.01.01.0005.2202160810 02/16/2022 [ +0.010946] RIP: 0010:__ice_update_sample+0x39/0xe0 [ice] [...] [ +0.002715] Call Trace: [ +0.002452] [ +0.002021] ? __die_body.cold+0x19/0x29 [ +0.003922] ? die_addr+0x3c/0x60 [ +0.003319] ? exc_general_protection+0x17c/0x400 [ +0.004707] ? asm_exc_general_protection+0x26/0x30 [ +0.004879] ? __ice_update_sample+0x39/0xe0 [ice] [ +0.004835] ice_napi_poll+0x665/0x680 [ice] [ +0.004320] __napi_poll+0x28/0x190 [ +0.003500] net_rx_action+0x198/0x360 [ +0.003752] ? update_rq_clock+0x39/0x220 [ +0.004013] handle_softirqs+0xf1/0x340 [ +0.003840] ? sched_clock_cpu+0xf/0x1f0 [ +0.003925] __irq_exit_rcu+0xc2/0xe0 [ +0.003665] common_interrupt+0x85/0xa0 [ +0.003839] [ +0.002098] [ +0.002106] asm_common_interrupt+0x26/0x40 [ +0.004184] RIP: 0010:cpuidle_enter_state+0xd3/0x690 Fix this by performing the missing unmapping of XDP queues from q_vectors and setting the XDP rings pointer back to NULL after all those queues are released. Also, add an immediate exit from the XDP callback in case of ring preparation failure. Fixes: efc2214b6047 ("ice: Add support for XDP") Reviewed-by: Dawid Osuchowski Reviewed-by: Przemek Kitszel Reviewed-by: Jacob Keller Signed-off-by: Michal Kubiak Reviewed-by: Aleksandr Loktionov Reviewed-by: Simon Horman Tested-by: Jesse Brandeburg Tested-by: Saritha Sanigani (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_main.c | 47 ++++++++++++++++------- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index d390157b59fe18..82d472f1d781a7 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2740,6 +2740,27 @@ void ice_map_xdp_rings(struct ice_vsi *vsi) } } +/** + * ice_unmap_xdp_rings - Unmap XDP rings from interrupt vectors + * @vsi: the VSI with XDP rings being unmapped + */ +static void ice_unmap_xdp_rings(struct ice_vsi *vsi) +{ + int v_idx; + + ice_for_each_q_vector(vsi, v_idx) { + struct ice_q_vector *q_vector = vsi->q_vectors[v_idx]; + struct ice_tx_ring *ring; + + ice_for_each_tx_ring(ring, q_vector->tx) + if (!ring->tx_buf || !ice_ring_is_xdp(ring)) + break; + + /* restore the value of last node prior to XDP setup */ + q_vector->tx.tx_ring = ring; + } +} + /** * ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP * @vsi: VSI to bring up Tx rings used by XDP @@ -2803,7 +2824,7 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog, if (status) { dev_err(dev, "Failed VSI LAN queue config for XDP, error: %d\n", status); - goto clear_xdp_rings; + goto unmap_xdp_rings; } /* assign the prog only when it's not already present on VSI; @@ -2819,6 +2840,8 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog, ice_vsi_assign_bpf_prog(vsi, prog); return 0; +unmap_xdp_rings: + ice_unmap_xdp_rings(vsi); clear_xdp_rings: ice_for_each_xdp_txq(vsi, i) if (vsi->xdp_rings[i]) { @@ -2835,6 +2858,8 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog, mutex_unlock(&pf->avail_q_mutex); devm_kfree(dev, vsi->xdp_rings); + vsi->xdp_rings = NULL; + return -ENOMEM; } @@ -2850,7 +2875,7 @@ int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; struct ice_pf *pf = vsi->back; - int i, v_idx; + int i; /* q_vectors are freed in reset path so there's no point in detaching * rings @@ -2858,17 +2883,7 @@ int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type) if (cfg_type == ICE_XDP_CFG_PART) goto free_qmap; - ice_for_each_q_vector(vsi, v_idx) { - struct ice_q_vector *q_vector = vsi->q_vectors[v_idx]; - struct ice_tx_ring *ring; - - ice_for_each_tx_ring(ring, q_vector->tx) - if (!ring->tx_buf || !ice_ring_is_xdp(ring)) - break; - - /* restore the value of last node prior to XDP setup */ - q_vector->tx.tx_ring = ring; - } + ice_unmap_xdp_rings(vsi); free_qmap: mutex_lock(&pf->avail_q_mutex); @@ -3013,11 +3028,14 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, xdp_ring_err = ice_vsi_determine_xdp_res(vsi); if (xdp_ring_err) { NL_SET_ERR_MSG_MOD(extack, "Not enough Tx resources for XDP"); + goto resume_if; } else { xdp_ring_err = ice_prepare_xdp_rings(vsi, prog, ICE_XDP_CFG_FULL); - if (xdp_ring_err) + if (xdp_ring_err) { NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed"); + goto resume_if; + } } xdp_features_set_redirect_target(vsi->netdev, true); /* reallocate Rx queues that are used for zero-copy */ @@ -3035,6 +3053,7 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Rx resources failed"); } +resume_if: if (if_running) ret = ice_up(vsi); From 4e68afb05566a94980ba3fa5f48ace89e1a8b99d Mon Sep 17 00:00:00 2001 From: Michal Kubiak Date: Tue, 13 May 2025 12:55:28 +0200 Subject: [PATCH 2628/2924] ice: create new Tx scheduler nodes for new queues only [ Upstream commit 6fa2942578472c9cab13a8fc1dae0d830193e0a1 ] The current implementation of the Tx scheduler tree attempts to create nodes for all Tx queues, ignoring the fact that some queues may already exist in the tree. For example, if the VSI already has 128 Tx queues and the user requests for 16 new queues, the Tx scheduler will compute the tree for 272 queues (128 existing queues + 144 new queues), instead of 144 queues (128 existing queues and 16 new queues). Fix that by modifying the node count calculation algorithm to skip the queues that already exist in the tree. Fixes: 5513b920a4f7 ("ice: Update Tx scheduler tree for VSI multi-Tx queue support") Reviewed-by: Dawid Osuchowski Reviewed-by: Przemek Kitszel Reviewed-by: Jacob Keller Signed-off-by: Michal Kubiak Reviewed-by: Simon Horman Tested-by: Jesse Brandeburg Tested-by: Saritha Sanigani (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_sched.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 6ca13c5dcb14e7..6524875b34d393 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -1604,16 +1604,16 @@ ice_sched_get_agg_node(struct ice_port_info *pi, struct ice_sched_node *tc_node, /** * ice_sched_calc_vsi_child_nodes - calculate number of VSI child nodes * @hw: pointer to the HW struct - * @num_qs: number of queues + * @num_new_qs: number of new queues that will be added to the tree * @num_nodes: num nodes array * * This function calculates the number of VSI child nodes based on the * number of queues. */ static void -ice_sched_calc_vsi_child_nodes(struct ice_hw *hw, u16 num_qs, u16 *num_nodes) +ice_sched_calc_vsi_child_nodes(struct ice_hw *hw, u16 num_new_qs, u16 *num_nodes) { - u16 num = num_qs; + u16 num = num_new_qs; u8 i, qgl, vsil; qgl = ice_sched_get_qgrp_layer(hw); @@ -1863,8 +1863,9 @@ ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle, return status; } - if (new_numqs) - ice_sched_calc_vsi_child_nodes(hw, new_numqs, new_num_nodes); + ice_sched_calc_vsi_child_nodes(hw, new_numqs - prev_numqs, + new_num_nodes); + /* Keep the max number of queue configuration all the time. Update the * tree only if number of queues > previous number of queues. This may * leave some extra nodes in the tree if number of queues < previous From 4d65cfa6d0129534ce4b64365c77d01aef59146e Mon Sep 17 00:00:00 2001 From: Michal Kubiak Date: Tue, 13 May 2025 12:55:29 +0200 Subject: [PATCH 2629/2924] ice: fix rebuilding the Tx scheduler tree for large queue counts [ Upstream commit 73145e6d81070d34a21431c9e0d7aaf2f29ca048 ] The current implementation of the Tx scheduler allows the tree to be rebuilt as the user adds more Tx queues to the VSI. In such a case, additional child nodes are added to the tree to support the new number of queues. Unfortunately, this algorithm does not take into account that the limit of the VSI support node may be exceeded, so an additional node in the VSI layer may be required to handle all the requested queues. Such a scenario occurs when adding XDP Tx queues on machines with many CPUs. Although the driver still respects the queue limit returned by the FW, the Tx scheduler was unable to add those queues to its tree and returned one of the errors below. Such a scenario occurs when adding XDP Tx queues on machines with many CPUs (e.g. at least 321 CPUs, if there is already 128 Tx/Rx queue pairs). Although the driver still respects the queue limit returned by the FW, the Tx scheduler was unable to add those queues to its tree and returned the following errors: Failed VSI LAN queue config for XDP, error: -5 or: Failed to set LAN Tx queue context, error: -22 Fix this problem by extending the tree rebuild algorithm to check if the current VSI node can support the requested number of queues. If it cannot, create as many additional VSI support nodes as necessary to handle all the required Tx queues. Symmetrically, adjust the VSI node removal algorithm to remove all nodes associated with the given VSI. Also, make the search for the next free VSI node more restrictive. That is, add queue group nodes only to the VSI support nodes that have a matching VSI handle. Finally, fix the comment describing the tree update algorithm to better reflect the current scenario. Fixes: b0153fdd7e8a ("ice: update VSI config dynamically") Reviewed-by: Dawid Osuchowski Reviewed-by: Przemek Kitszel Signed-off-by: Michal Kubiak Reviewed-by: Simon Horman Tested-by: Jesse Brandeburg Tested-by: Saritha Sanigani (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_sched.c | 170 +++++++++++++++++---- 1 file changed, 142 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 6524875b34d393..d9d09296d1d481 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -84,6 +84,27 @@ ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid) return NULL; } +/** + * ice_sched_find_next_vsi_node - find the next node for a given VSI + * @vsi_node: VSI support node to start search with + * + * Return: Next VSI support node, or NULL. + * + * The function returns a pointer to the next node from the VSI layer + * assigned to the given VSI, or NULL if there is no such a node. + */ +static struct ice_sched_node * +ice_sched_find_next_vsi_node(struct ice_sched_node *vsi_node) +{ + unsigned int vsi_handle = vsi_node->vsi_handle; + + while ((vsi_node = vsi_node->sibling) != NULL) + if (vsi_node->vsi_handle == vsi_handle) + break; + + return vsi_node; +} + /** * ice_aqc_send_sched_elem_cmd - send scheduling elements cmd * @hw: pointer to the HW struct @@ -1084,8 +1105,10 @@ ice_sched_add_nodes_to_layer(struct ice_port_info *pi, if (parent->num_children < max_child_nodes) { new_num_nodes = max_child_nodes - parent->num_children; } else { - /* This parent is full, try the next sibling */ - parent = parent->sibling; + /* This parent is full, + * try the next available sibling. + */ + parent = ice_sched_find_next_vsi_node(parent); /* Don't modify the first node TEID memory if the * first node was added already in the above call. * Instead send some temp memory for all other @@ -1528,12 +1551,23 @@ ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc, /* get the first queue group node from VSI sub-tree */ qgrp_node = ice_sched_get_first_node(pi, vsi_node, qgrp_layer); while (qgrp_node) { + struct ice_sched_node *next_vsi_node; + /* make sure the qgroup node is part of the VSI subtree */ if (ice_sched_find_node_in_subtree(pi->hw, vsi_node, qgrp_node)) if (qgrp_node->num_children < max_children && qgrp_node->owner == owner) break; qgrp_node = qgrp_node->sibling; + if (qgrp_node) + continue; + + next_vsi_node = ice_sched_find_next_vsi_node(vsi_node); + if (!next_vsi_node) + break; + + vsi_node = next_vsi_node; + qgrp_node = ice_sched_get_first_node(pi, vsi_node, qgrp_layer); } /* Select the best queue group */ @@ -1779,7 +1813,11 @@ ice_sched_add_vsi_support_nodes(struct ice_port_info *pi, u16 vsi_handle, if (!parent) return -EIO; - if (i == vsil) + /* Do not modify the VSI handle for already existing VSI nodes, + * (if no new VSI node was added to the tree). + * Assign the VSI handle only to newly added VSI nodes. + */ + if (i == vsil && num_added) parent->vsi_handle = vsi_handle; } @@ -1812,6 +1850,41 @@ ice_sched_add_vsi_to_topo(struct ice_port_info *pi, u16 vsi_handle, u8 tc) num_nodes); } +/** + * ice_sched_recalc_vsi_support_nodes - recalculate VSI support nodes count + * @hw: pointer to the HW struct + * @vsi_node: pointer to the leftmost VSI node that needs to be extended + * @new_numqs: new number of queues that has to be handled by the VSI + * @new_num_nodes: pointer to nodes count table to modify the VSI layer entry + * + * This function recalculates the number of supported nodes that need to + * be added after adding more Tx queues for a given VSI. + * The number of new VSI support nodes that shall be added will be saved + * to the @new_num_nodes table for the VSI layer. + */ +static void +ice_sched_recalc_vsi_support_nodes(struct ice_hw *hw, + struct ice_sched_node *vsi_node, + unsigned int new_numqs, u16 *new_num_nodes) +{ + u32 vsi_nodes_cnt = 1; + u32 max_queue_cnt = 1; + u32 qgl, vsil; + + qgl = ice_sched_get_qgrp_layer(hw); + vsil = ice_sched_get_vsi_layer(hw); + + for (u32 i = vsil; i <= qgl; i++) + max_queue_cnt *= hw->max_children[i]; + + while ((vsi_node = ice_sched_find_next_vsi_node(vsi_node)) != NULL) + vsi_nodes_cnt++; + + if (new_numqs > (max_queue_cnt * vsi_nodes_cnt)) + new_num_nodes[vsil] = DIV_ROUND_UP(new_numqs, max_queue_cnt) - + vsi_nodes_cnt; +} + /** * ice_sched_update_vsi_child_nodes - update VSI child nodes * @pi: port information structure @@ -1863,16 +1936,25 @@ ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle, return status; } + ice_sched_recalc_vsi_support_nodes(hw, vsi_node, + new_numqs, new_num_nodes); ice_sched_calc_vsi_child_nodes(hw, new_numqs - prev_numqs, new_num_nodes); - /* Keep the max number of queue configuration all the time. Update the - * tree only if number of queues > previous number of queues. This may + /* Never decrease the number of queues in the tree. Update the tree + * only if number of queues > previous number of queues. This may * leave some extra nodes in the tree if number of queues < previous * number but that wouldn't harm anything. Removing those extra nodes * may complicate the code if those nodes are part of SRL or * individually rate limited. + * Also, add the required VSI support nodes if the existing ones cannot + * handle the requested new number of queues. */ + status = ice_sched_add_vsi_support_nodes(pi, vsi_handle, tc_node, + new_num_nodes); + if (status) + return status; + status = ice_sched_add_vsi_child_nodes(pi, vsi_handle, tc_node, new_num_nodes, owner); if (status) @@ -2013,6 +2095,58 @@ static bool ice_sched_is_leaf_node_present(struct ice_sched_node *node) return (node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF); } +/** + * ice_sched_rm_vsi_subtree - remove all nodes assigned to a given VSI + * @pi: port information structure + * @vsi_node: pointer to the leftmost node of the VSI to be removed + * @owner: LAN or RDMA + * @tc: TC number + * + * Return: Zero in case of success, or -EBUSY if the VSI has leaf nodes in TC. + * + * This function removes all the VSI support nodes associated with a given VSI + * and its LAN or RDMA children nodes from the scheduler tree. + */ +static int +ice_sched_rm_vsi_subtree(struct ice_port_info *pi, + struct ice_sched_node *vsi_node, u8 owner, u8 tc) +{ + u16 vsi_handle = vsi_node->vsi_handle; + bool all_vsi_nodes_removed = true; + int j = 0; + + while (vsi_node) { + struct ice_sched_node *next_vsi_node; + + if (ice_sched_is_leaf_node_present(vsi_node)) { + ice_debug(pi->hw, ICE_DBG_SCHED, "VSI has leaf nodes in TC %d\n", tc); + return -EBUSY; + } + while (j < vsi_node->num_children) { + if (vsi_node->children[j]->owner == owner) + ice_free_sched_node(pi, vsi_node->children[j]); + else + j++; + } + + next_vsi_node = ice_sched_find_next_vsi_node(vsi_node); + + /* remove the VSI if it has no children */ + if (!vsi_node->num_children) + ice_free_sched_node(pi, vsi_node); + else + all_vsi_nodes_removed = false; + + vsi_node = next_vsi_node; + } + + /* clean up aggregator related VSI info if any */ + if (all_vsi_nodes_removed) + ice_sched_rm_agg_vsi_info(pi, vsi_handle); + + return 0; +} + /** * ice_sched_rm_vsi_cfg - remove the VSI and its children nodes * @pi: port information structure @@ -2039,7 +2173,6 @@ ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner) ice_for_each_traffic_class(i) { struct ice_sched_node *vsi_node, *tc_node; - u8 j = 0; tc_node = ice_sched_get_tc_node(pi, i); if (!tc_node) @@ -2049,31 +2182,12 @@ ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner) if (!vsi_node) continue; - if (ice_sched_is_leaf_node_present(vsi_node)) { - ice_debug(pi->hw, ICE_DBG_SCHED, "VSI has leaf nodes in TC %d\n", i); - status = -EBUSY; + status = ice_sched_rm_vsi_subtree(pi, vsi_node, owner, i); + if (status) goto exit_sched_rm_vsi_cfg; - } - while (j < vsi_node->num_children) { - if (vsi_node->children[j]->owner == owner) { - ice_free_sched_node(pi, vsi_node->children[j]); - /* reset the counter again since the num - * children will be updated after node removal - */ - j = 0; - } else { - j++; - } - } - /* remove the VSI if it has no children */ - if (!vsi_node->num_children) { - ice_free_sched_node(pi, vsi_node); - vsi_ctx->sched.vsi_node[i] = NULL; + vsi_ctx->sched.vsi_node[i] = NULL; - /* clean up aggregator related VSI info if any */ - ice_sched_rm_agg_vsi_info(pi, vsi_handle); - } if (owner == ICE_SCHED_NODE_OWNER_LAN) vsi_ctx->sched.max_lanq[i] = 0; else From e47d842adcbfc16dc0fe8f0d8c27a63484363099 Mon Sep 17 00:00:00 2001 From: Brian Vazquez Date: Thu, 1 May 2025 17:06:17 +0000 Subject: [PATCH 2630/2924] idpf: fix a race in txq wakeup [ Upstream commit 7292af042bcf22e2c18b96ed250f78498a5b28ab ] Add a helper function to correctly handle the lockless synchronization when the sender needs to block. The paradigm is if (no_resources()) { stop_queue(); barrier(); if (!no_resources()) restart_queue(); } netif_subqueue_maybe_stop already handles the paradigm correctly, but the code split the check for resources in three parts, the first one (descriptors) followed the protocol, but the other two (completions and tx_buf) were only doing the first part and so race prone. Luckily netif_subqueue_maybe_stop macro already allows you to use a function to evaluate the start/stop conditions so the fix only requires the right helper function to evaluate all the conditions at once. The patch removes idpf_tx_maybe_stop_common since it's no longer needed and instead adjusts separately the conditions for singleq and splitq. Note that idpf_tx_buf_hw_update doesn't need to check for resources since that will be covered in idpf_tx_splitq_frame. To reproduce: Reduce the threshold for pending completions to increase the chances of hitting this pause by changing your kernel: drivers/net/ethernet/intel/idpf/idpf_txrx.h -#define IDPF_TX_COMPLQ_OVERFLOW_THRESH(txcq) ((txcq)->desc_count >> 1) +#define IDPF_TX_COMPLQ_OVERFLOW_THRESH(txcq) ((txcq)->desc_count >> 4) Use pktgen to force the host to push small pkts very aggressively: ./pktgen_sample02_multiqueue.sh -i eth1 -s 100 -6 -d $IP -m $MAC \ -p 10000-10000 -t 16 -n 0 -v -x -c 64 Fixes: 6818c4d5b3c2 ("idpf: add splitq start_xmit") Reviewed-by: Jacob Keller Reviewed-by: Madhu Chittim Signed-off-by: Josh Hay Signed-off-by: Brian Vazquez Signed-off-by: Luigi Rizzo Reviewed-by: Simon Horman Tested-by: Samuel Salin Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- .../ethernet/intel/idpf/idpf_singleq_txrx.c | 9 ++-- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 45 +++++++------------ drivers/net/ethernet/intel/idpf/idpf_txrx.h | 8 ---- 3 files changed, 22 insertions(+), 40 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c index eae1b6f474e624..6ade54e213259c 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c @@ -362,17 +362,18 @@ netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, { struct idpf_tx_offload_params offload = { }; struct idpf_tx_buf *first; + int csum, tso, needed; unsigned int count; __be16 protocol; - int csum, tso; count = idpf_tx_desc_count_required(tx_q, skb); if (unlikely(!count)) return idpf_tx_drop_skb(tx_q, skb); - if (idpf_tx_maybe_stop_common(tx_q, - count + IDPF_TX_DESCS_PER_CACHE_LINE + - IDPF_TX_DESCS_FOR_CTX)) { + needed = count + IDPF_TX_DESCS_PER_CACHE_LINE + IDPF_TX_DESCS_FOR_CTX; + if (!netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx, + IDPF_DESC_UNUSED(tx_q), + needed, needed)) { idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); u64_stats_update_begin(&tx_q->stats_sync); diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 2d5f5c9f91ce1e..aa16e4c1edbb8b 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -2132,6 +2132,19 @@ void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc, desc->flow.qw1.compl_tag = cpu_to_le16(params->compl_tag); } +/* Global conditions to tell whether the txq (and related resources) + * has room to allow the use of "size" descriptors. + */ +static int idpf_txq_has_room(struct idpf_tx_queue *tx_q, u32 size) +{ + if (IDPF_DESC_UNUSED(tx_q) < size || + IDPF_TX_COMPLQ_PENDING(tx_q->txq_grp) > + IDPF_TX_COMPLQ_OVERFLOW_THRESH(tx_q->txq_grp->complq) || + IDPF_TX_BUF_RSV_LOW(tx_q)) + return 0; + return 1; +} + /** * idpf_tx_maybe_stop_splitq - 1st level check for Tx splitq stop conditions * @tx_q: the queue to be checked @@ -2142,29 +2155,11 @@ void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc, static int idpf_tx_maybe_stop_splitq(struct idpf_tx_queue *tx_q, unsigned int descs_needed) { - if (idpf_tx_maybe_stop_common(tx_q, descs_needed)) - goto out; - - /* If there are too many outstanding completions expected on the - * completion queue, stop the TX queue to give the device some time to - * catch up - */ - if (unlikely(IDPF_TX_COMPLQ_PENDING(tx_q->txq_grp) > - IDPF_TX_COMPLQ_OVERFLOW_THRESH(tx_q->txq_grp->complq))) - goto splitq_stop; - - /* Also check for available book keeping buffers; if we are low, stop - * the queue to wait for more completions - */ - if (unlikely(IDPF_TX_BUF_RSV_LOW(tx_q))) - goto splitq_stop; - - return 0; - -splitq_stop: - netif_stop_subqueue(tx_q->netdev, tx_q->idx); + if (netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx, + idpf_txq_has_room(tx_q, descs_needed), + 1, 1)) + return 0; -out: u64_stats_update_begin(&tx_q->stats_sync); u64_stats_inc(&tx_q->q_stats.q_busy); u64_stats_update_end(&tx_q->stats_sync); @@ -2190,12 +2185,6 @@ void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val, nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); tx_q->next_to_use = val; - if (idpf_tx_maybe_stop_common(tx_q, IDPF_TX_DESC_NEEDED)) { - u64_stats_update_begin(&tx_q->stats_sync); - u64_stats_inc(&tx_q->q_stats.q_busy); - u64_stats_update_end(&tx_q->stats_sync); - } - /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index b029f566e57cd6..c192a6c547dd32 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -1037,12 +1037,4 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rxq, u16 cleaned_count); int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off); -static inline bool idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q, - u32 needed) -{ - return !netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx, - IDPF_DESC_UNUSED(tx_q), - needed, needed); -} - #endif /* !_IDPF_TXRX_H_ */ From 73be96915373d54b8b9441666056bc21036fda00 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Thu, 8 May 2025 11:47:15 -0700 Subject: [PATCH 2631/2924] idpf: avoid mailbox timeout delays during reset [ Upstream commit 9dc63d8ff182150d7d7b318ab9389702a2c0a292 ] Mailbox operations are not possible while the driver is in reset. Operations that require MBX exchange with the control plane will result in long delays if executed while a reset is in progress: ethtool -L combined 8& echo 1 > /sys/class/net//device/reset idpf 0000:83:00.0: HW reset detected idpf 0000:83:00.0: Device HW Reset initiated idpf 0000:83:00.0: Transaction timed-out (op:504 cookie:be00 vc_op:504 salt:be timeout:2000ms) idpf 0000:83:00.0: Transaction timed-out (op:508 cookie:bf00 vc_op:508 salt:bf timeout:2000ms) idpf 0000:83:00.0: Transaction timed-out (op:512 cookie:c000 vc_op:512 salt:c0 timeout:2000ms) idpf 0000:83:00.0: Transaction timed-out (op:510 cookie:c100 vc_op:510 salt:c1 timeout:2000ms) idpf 0000:83:00.0: Transaction timed-out (op:509 cookie:c200 vc_op:509 salt:c2 timeout:60000ms) idpf 0000:83:00.0: Transaction timed-out (op:509 cookie:c300 vc_op:509 salt:c3 timeout:60000ms) idpf 0000:83:00.0: Transaction timed-out (op:505 cookie:c400 vc_op:505 salt:c4 timeout:60000ms) idpf 0000:83:00.0: Failed to configure queues for vport 0, -62 Disable mailbox communication in case of a reset, unless it's done during a driver load, where the virtchnl operations are needed to configure the device. Fixes: 8077c727561aa ("idpf: add controlq init and reset checks") Co-developed-by: Joshua Hay Signed-off-by: Joshua Hay Signed-off-by: Emil Tantilov Reviewed-by: Ahmed Zaki Reviewed-by: Aleksandr Loktionov Reviewed-by: Simon Horman Tested-by: Samuel Salin Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/idpf/idpf_lib.c | 18 +++++++++++++----- .../net/ethernet/intel/idpf/idpf_virtchnl.c | 2 +- .../net/ethernet/intel/idpf/idpf_virtchnl.h | 1 + 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 3a033ce19cda23..2ed801398971cc 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -1816,11 +1816,19 @@ void idpf_vc_event_task(struct work_struct *work) if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags)) return; - if (test_bit(IDPF_HR_FUNC_RESET, adapter->flags) || - test_bit(IDPF_HR_DRV_LOAD, adapter->flags)) { - set_bit(IDPF_HR_RESET_IN_PROG, adapter->flags); - idpf_init_hard_reset(adapter); - } + if (test_bit(IDPF_HR_FUNC_RESET, adapter->flags)) + goto func_reset; + + if (test_bit(IDPF_HR_DRV_LOAD, adapter->flags)) + goto drv_load; + + return; + +func_reset: + idpf_vc_xn_shutdown(adapter->vcxn_mngr); +drv_load: + set_bit(IDPF_HR_RESET_IN_PROG, adapter->flags); + idpf_init_hard_reset(adapter); } /** diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index 3d2413b8684fca..5d2ca007f6828e 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -376,7 +376,7 @@ static void idpf_vc_xn_init(struct idpf_vc_xn_manager *vcxn_mngr) * All waiting threads will be woken-up and their transaction aborted. Further * operations on that object will fail. */ -static void idpf_vc_xn_shutdown(struct idpf_vc_xn_manager *vcxn_mngr) +void idpf_vc_xn_shutdown(struct idpf_vc_xn_manager *vcxn_mngr) { int i; diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h index 83da5d8da56bf2..23271cf0a21605 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h @@ -66,5 +66,6 @@ int idpf_send_get_stats_msg(struct idpf_vport *vport); int idpf_send_set_sriov_vfs_msg(struct idpf_adapter *adapter, u16 num_vfs); int idpf_send_get_set_rss_key_msg(struct idpf_vport *vport, bool get); int idpf_send_get_set_rss_lut_msg(struct idpf_vport *vport, bool get); +void idpf_vc_xn_shutdown(struct idpf_vc_xn_manager *vcxn_mngr); #endif /* _IDPF_VIRTCHNL_H_ */ From 1e72069fb33d4032c329d4cfee901f20b8faa75f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Fern=C3=A1ndez=20Rojas?= Date: Thu, 29 May 2025 14:44:06 +0200 Subject: [PATCH 2632/2924] net: dsa: tag_brcm: legacy: fix pskb_may_pull length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit efdddc4484859082da6c7877ed144c8121c8ea55 ] BRCM_LEG_PORT_ID was incorrectly used for pskb_may_pull length. The correct check is BRCM_LEG_TAG_LEN + VLAN_HLEN, or 10 bytes. Fixes: 964dbf186eaa ("net: dsa: tag_brcm: add support for legacy tags") Signed-off-by: Álvaro Fernández Rojas Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250529124406.2513779-1-noltari@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/dsa/tag_brcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 8c3c068728e51c..fe75821623a4fc 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -257,7 +257,7 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, int source_port; u8 *brcm_tag; - if (unlikely(!pskb_may_pull(skb, BRCM_LEG_PORT_ID))) + if (unlikely(!pskb_may_pull(skb, BRCM_LEG_TAG_LEN + VLAN_HLEN))) return NULL; brcm_tag = dsa_etype_header_pos_rx(skb); From 379cd990dfe752b38fcf46034698a9a150626c7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9?= Date: Thu, 29 May 2025 11:07:23 +0200 Subject: [PATCH 2633/2924] net: stmmac: make sure that ptp_rate is not 0 before configuring timestamping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 030ce919e114a111e83b7976ecb3597cefd33f26 ] The stmmac platform drivers that do not open-code the clk_ptp_rate value after having retrieved the default one from the device-tree can end up with 0 in clk_ptp_rate (as clk_get_rate can return 0). It will eventually propagate up to PTP initialization when bringing up the interface, leading to a divide by 0: Division by zero in kernel. CPU: 1 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.12.30-00001-g48313bd5768a #22 Hardware name: STM32 (Device Tree Support) Call trace: unwind_backtrace from show_stack+0x18/0x1c show_stack from dump_stack_lvl+0x6c/0x8c dump_stack_lvl from Ldiv0_64+0x8/0x18 Ldiv0_64 from stmmac_init_tstamp_counter+0x190/0x1a4 stmmac_init_tstamp_counter from stmmac_hw_setup+0xc1c/0x111c stmmac_hw_setup from __stmmac_open+0x18c/0x434 __stmmac_open from stmmac_open+0x3c/0xbc stmmac_open from __dev_open+0xf4/0x1ac __dev_open from __dev_change_flags+0x1cc/0x224 __dev_change_flags from dev_change_flags+0x24/0x60 dev_change_flags from ip_auto_config+0x2e8/0x11a0 ip_auto_config from do_one_initcall+0x84/0x33c do_one_initcall from kernel_init_freeable+0x1b8/0x214 kernel_init_freeable from kernel_init+0x24/0x140 kernel_init from ret_from_fork+0x14/0x28 Exception stack(0xe0815fb0 to 0xe0815ff8) Prevent this division by 0 by adding an explicit check and error log about the actual issue. While at it, remove the same check from stmmac_ptp_register, which then becomes duplicate Fixes: 19d857c9038e ("stmmac: Fix calculations for ptp counters when clock input = 50Mhz.") Signed-off-by: Alexis Lothoré Reviewed-by: Yanteng Si Reviewed-by: Maxime Chevallier Link: https://patch.msgid.link/20250529-stmmac_tstamp_div-v4-1-d73340a794d5@bootlin.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 5 +++++ drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 59d07d0d3369db..3a049a158ea111 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -803,6 +803,11 @@ int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags) if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp)) return -EOPNOTSUPP; + if (!priv->plat->clk_ptp_rate) { + netdev_err(priv->dev, "Invalid PTP clock rate"); + return -EINVAL; + } + stmmac_config_hw_tstamping(priv, priv->ptpaddr, systime_flags); priv->systime_flags = systime_flags; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 429b2d357813c8..3767ba495e78d2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -317,7 +317,7 @@ void stmmac_ptp_register(struct stmmac_priv *priv) /* Calculate the clock domain crossing (CDC) error if necessary */ priv->plat->cdc_error_adj = 0; - if (priv->plat->has_gmac4 && priv->plat->clk_ptp_rate) + if (priv->plat->has_gmac4) priv->plat->cdc_error_adj = (2 * NSEC_PER_SEC) / priv->plat->clk_ptp_rate; /* Update the ptp clock parameters based on feature discovery, when From d5e3bfdba0dc419499b801937128957f77503761 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20Lothor=C3=A9?= Date: Thu, 29 May 2025 11:07:24 +0200 Subject: [PATCH 2634/2924] net: stmmac: make sure that ptp_rate is not 0 before configuring EST MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit cbefe2ffa7784525ec5d008ba87c7add19ec631a ] If the ptp_rate recorded earlier in the driver happens to be 0, this bogus value will propagate up to EST configuration, where it will trigger a division by 0. Prevent this division by 0 by adding the corresponding check and error code. Suggested-by: Maxime Chevallier Signed-off-by: Alexis Lothoré Fixes: 8572aec3d0dc ("net: stmmac: Add basic EST support for XGMAC") Link: https://patch.msgid.link/20250529-stmmac_tstamp_div-v4-2-d73340a794d5@bootlin.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_est.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c index c9693f77e1f61f..ac6f2e3a3fcd2f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c @@ -32,6 +32,11 @@ static int est_configure(struct stmmac_priv *priv, struct stmmac_est *cfg, int i, ret = 0; u32 ctrl; + if (!ptp_rate) { + netdev_warn(priv->dev, "Invalid PTP rate"); + return -EINVAL; + } + ret |= est_write(est_addr, EST_BTR_LOW, cfg->btr[0], false); ret |= est_write(est_addr, EST_BTR_HIGH, cfg->btr[1], false); ret |= est_write(est_addr, EST_TER, cfg->ter, false); From 4649767d62afed95981328c2fea2198b1146e784 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Thu, 29 May 2025 12:28:05 +0200 Subject: [PATCH 2635/2924] net: Fix checksum update for ILA adj-transport [ Upstream commit 6043b794c7668c19dabc4a93c75b924a19474d59 ] During ILA address translations, the L4 checksums can be handled in different ways. One of them, adj-transport, consist in parsing the transport layer and updating any found checksum. This logic relies on inet_proto_csum_replace_by_diff and produces an incorrect skb->csum when in state CHECKSUM_COMPLETE. This bug can be reproduced with a simple ILA to SIR mapping, assuming packets are received with CHECKSUM_COMPLETE: $ ip a show dev eth0 14: eth0@if15: mtu 1500 qdisc noqueue state UP group default qlen 1000 link/ether 62:ae:35:9e:0f:8d brd ff:ff:ff:ff:ff:ff link-netnsid 0 inet6 3333:0:0:1::c078/64 scope global valid_lft forever preferred_lft forever inet6 fd00:10:244:1::c078/128 scope global nodad valid_lft forever preferred_lft forever inet6 fe80::60ae:35ff:fe9e:f8d/64 scope link proto kernel_ll valid_lft forever preferred_lft forever $ ip ila add loc_match fd00:10:244:1 loc 3333:0:0:1 \ csum-mode adj-transport ident-type luid dev eth0 Then I hit [fd00:10:244:1::c078]:8000 with a server listening only on [3333:0:0:1::c078]:8000. With the bug, the SYN packet is dropped with SKB_DROP_REASON_TCP_CSUM after inet_proto_csum_replace_by_diff changed skb->csum. The translation and drop are visible on pwru [1] traces: IFACE TUPLE FUNC eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) ipv6_rcv eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) ip6_rcv_core eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) nf_hook_slow eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) inet_proto_csum_replace_by_diff eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) tcp_v6_early_demux eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_route_input eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_input eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_input_finish eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_protocol_deliver_rcu eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) raw6_local_deliver eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ipv6_raw_deliver eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) tcp_v6_rcv eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) __skb_checksum_complete eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) kfree_skb_reason(SKB_DROP_REASON_TCP_CSUM) eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) skb_release_head_state eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) skb_release_data eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) skb_free_head eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) kfree_skbmem This is happening because inet_proto_csum_replace_by_diff is updating skb->csum when it shouldn't. The L4 checksum is updated such that it "cancels" the IPv6 address change in terms of checksum computation, so the impact on skb->csum is null. Note this would be different for an IPv4 packet since three fields would be updated: the IPv4 address, the IP checksum, and the L4 checksum. Two would cancel each other and skb->csum would still need to be updated to take the L4 checksum change into account. This patch fixes it by passing an ipv6 flag to inet_proto_csum_replace_by_diff, to skip the skb->csum update if we're in the IPv6 case. Note the behavior of the only other user of inet_proto_csum_replace_by_diff, the BPF subsystem, is left as is in this patch and fixed in the subsequent patch. With the fix, using the reproduction from above, I can confirm skb->csum is not touched by inet_proto_csum_replace_by_diff and the TCP SYN proceeds to the application after the ILA translation. Link: https://github.com/cilium/pwru [1] Fixes: 65d7ab8de582 ("net: Identifier Locator Addressing module") Signed-off-by: Paul Chaignon Acked-by: Daniel Borkmann Link: https://patch.msgid.link/b5539869e3550d46068504feb02d37653d939c0b.1748509484.git.paul.chaignon@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/checksum.h | 2 +- net/core/filter.c | 2 +- net/core/utils.c | 4 ++-- net/ipv6/ila/ila_common.c | 6 +++--- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/net/checksum.h b/include/net/checksum.h index 243f972267b8d1..be9356d4b67a1e 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -164,7 +164,7 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, const __be32 *from, const __be32 *to, bool pseudohdr); void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, - __wsum diff, bool pseudohdr); + __wsum diff, bool pseudohdr, bool ipv6); static __always_inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, diff --git a/net/core/filter.c b/net/core/filter.c index 577a4504e26fa0..3c93765742c9c8 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1987,7 +1987,7 @@ BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset, if (unlikely(from != 0)) return -EINVAL; - inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo); + inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo, false); break; case 2: inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo); diff --git a/net/core/utils.c b/net/core/utils.c index 27f4cffaae05d9..b8c21a859e27b1 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -473,11 +473,11 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, EXPORT_SYMBOL(inet_proto_csum_replace16); void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, - __wsum diff, bool pseudohdr) + __wsum diff, bool pseudohdr, bool ipv6) { if (skb->ip_summed != CHECKSUM_PARTIAL) { csum_replace_by_diff(sum, diff); - if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) + if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr && !ipv6) skb->csum = ~csum_sub(diff, skb->csum); } else if (pseudohdr) { *sum = ~csum_fold(csum_add(diff, csum_unfold(*sum))); diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c index 95e9146918cc6f..b8d43ed4689db9 100644 --- a/net/ipv6/ila/ila_common.c +++ b/net/ipv6/ila/ila_common.c @@ -86,7 +86,7 @@ static void ila_csum_adjust_transport(struct sk_buff *skb, diff = get_csum_diff(ip6h, p); inet_proto_csum_replace_by_diff(&th->check, skb, - diff, true); + diff, true, true); } break; case NEXTHDR_UDP: @@ -97,7 +97,7 @@ static void ila_csum_adjust_transport(struct sk_buff *skb, if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) { diff = get_csum_diff(ip6h, p); inet_proto_csum_replace_by_diff(&uh->check, skb, - diff, true); + diff, true, true); if (!uh->check) uh->check = CSUM_MANGLED_0; } @@ -111,7 +111,7 @@ static void ila_csum_adjust_transport(struct sk_buff *skb, diff = get_csum_diff(ip6h, p); inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb, - diff, true); + diff, true, true); } break; } From 3904dfba2913835cf4ac1ba6b5dd44ce89171aa7 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Tue, 8 Apr 2025 11:00:51 +0200 Subject: [PATCH 2636/2924] bpf: Clarify the meaning of BPF_F_PSEUDO_HDR [ Upstream commit 5a15a050df714959f0d5a57ac3201bd1c6594984 ] In the bpf_l4_csum_replace helper, the BPF_F_PSEUDO_HDR flag should only be set if the modified header field is part of the pseudo-header. If you modify for example the UDP ports and pass BPF_F_PSEUDO_HDR, inet_proto_csum_replace4 will update skb->csum even though it shouldn't (the port and the UDP checksum updates null each other). Signed-off-by: Paul Chaignon Link: https://lore.kernel.org/r/5126ef84ba75425b689482cbc98bffe75e5d8ab0.1744102490.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov Stable-dep-of: ead7f9b8de65 ("bpf: Fix L4 csum update on IPv6 in CHECKSUM_COMPLETE") Signed-off-by: Sasha Levin --- include/uapi/linux/bpf.h | 2 +- tools/include/uapi/linux/bpf.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index fd404729b1154b..bceb1950ec204e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2051,7 +2051,7 @@ union bpf_attr { * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and * for updates resulting in a null checksum the value is set to * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates - * the checksum is to be computed against a pseudo-header. + * that the modified header field is part of the pseudo-header. * * This helper works in combination with **bpf_csum_diff**\ (), * which does not update the checksum in-place, but offers more diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index fd404729b1154b..bceb1950ec204e 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2051,7 +2051,7 @@ union bpf_attr { * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and * for updates resulting in a null checksum the value is set to * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates - * the checksum is to be computed against a pseudo-header. + * that the modified header field is part of the pseudo-header. * * This helper works in combination with **bpf_csum_diff**\ (), * which does not update the checksum in-place, but offers more From a8452696877a3958fe1e43b99889f9d3c9f56b56 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Thu, 29 May 2025 12:28:35 +0200 Subject: [PATCH 2637/2924] bpf: Fix L4 csum update on IPv6 in CHECKSUM_COMPLETE [ Upstream commit ead7f9b8de65632ef8060b84b0c55049a33cfea1 ] In Cilium, we use bpf_csum_diff + bpf_l4_csum_replace to, among other things, update the L4 checksum after reverse SNATing IPv6 packets. That use case is however not currently supported and leads to invalid skb->csum values in some cases. This patch adds support for IPv6 address changes in bpf_l4_csum_update via a new flag. When calling bpf_l4_csum_replace in Cilium, it ends up calling inet_proto_csum_replace_by_diff: 1: void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, 2: __wsum diff, bool pseudohdr) 3: { 4: if (skb->ip_summed != CHECKSUM_PARTIAL) { 5: csum_replace_by_diff(sum, diff); 6: if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) 7: skb->csum = ~csum_sub(diff, skb->csum); 8: } else if (pseudohdr) { 9: *sum = ~csum_fold(csum_add(diff, csum_unfold(*sum))); 10: } 11: } The bug happens when we're in the CHECKSUM_COMPLETE state. We've just updated one of the IPv6 addresses. The helper now updates the L4 header checksum on line 5. Next, it updates skb->csum on line 7. It shouldn't. For an IPv6 packet, the updates of the IPv6 address and of the L4 checksum will cancel each other. The checksums are set such that computing a checksum over the packet including its checksum will result in a sum of 0. So the same is true here when we update the L4 checksum on line 5. We'll update it as to cancel the previous IPv6 address update. Hence skb->csum should remain untouched in this case. The same bug doesn't affect IPv4 packets because, in that case, three fields are updated: the IPv4 address, the IP checksum, and the L4 checksum. The change to the IPv4 address and one of the checksums still cancel each other in skb->csum, but we're left with one checksum update and should therefore update skb->csum accordingly. That's exactly what inet_proto_csum_replace_by_diff does. This special case for IPv6 L4 checksums is also described atop inet_proto_csum_replace16, the function we should be using in this case. This patch introduces a new bpf_l4_csum_replace flag, BPF_F_IPV6, to indicate that we're updating the L4 checksum of an IPv6 packet. When the flag is set, inet_proto_csum_replace_by_diff will skip the skb->csum update. Fixes: 7d672345ed295 ("bpf: add generic bpf_csum_diff helper") Signed-off-by: Paul Chaignon Acked-by: Daniel Borkmann Link: https://patch.msgid.link/96a6bc3a443e6f0b21ff7b7834000e17fb549e05.1748509484.git.paul.chaignon@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/uapi/linux/bpf.h | 2 ++ net/core/filter.c | 5 +++-- tools/include/uapi/linux/bpf.h | 2 ++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bceb1950ec204e..fe5df2a9fe8ee6 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2052,6 +2052,7 @@ union bpf_attr { * for updates resulting in a null checksum the value is set to * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates * that the modified header field is part of the pseudo-header. + * Flag **BPF_F_IPV6** should be set for IPv6 packets. * * This helper works in combination with **bpf_csum_diff**\ (), * which does not update the checksum in-place, but offers more @@ -6068,6 +6069,7 @@ enum { BPF_F_PSEUDO_HDR = (1ULL << 4), BPF_F_MARK_MANGLED_0 = (1ULL << 5), BPF_F_MARK_ENFORCE = (1ULL << 6), + BPF_F_IPV6 = (1ULL << 7), }; /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ diff --git a/net/core/filter.c b/net/core/filter.c index 3c93765742c9c8..357d26b76c22d9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1968,10 +1968,11 @@ BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset, bool is_pseudo = flags & BPF_F_PSEUDO_HDR; bool is_mmzero = flags & BPF_F_MARK_MANGLED_0; bool do_mforce = flags & BPF_F_MARK_ENFORCE; + bool is_ipv6 = flags & BPF_F_IPV6; __sum16 *ptr; if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE | - BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK))) + BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK | BPF_F_IPV6))) return -EINVAL; if (unlikely(offset > 0xffff || offset & 1)) return -EFAULT; @@ -1987,7 +1988,7 @@ BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset, if (unlikely(from != 0)) return -EINVAL; - inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo, false); + inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo, is_ipv6); break; case 2: inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index bceb1950ec204e..fe5df2a9fe8ee6 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2052,6 +2052,7 @@ union bpf_attr { * for updates resulting in a null checksum the value is set to * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates * that the modified header field is part of the pseudo-header. + * Flag **BPF_F_IPV6** should be set for IPv6 packets. * * This helper works in combination with **bpf_csum_diff**\ (), * which does not update the checksum in-place, but offers more @@ -6068,6 +6069,7 @@ enum { BPF_F_PSEUDO_HDR = (1ULL << 4), BPF_F_MARK_MANGLED_0 = (1ULL << 5), BPF_F_MARK_ENFORCE = (1ULL << 6), + BPF_F_IPV6 = (1ULL << 7), }; /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ From 17b7dbbd0afb25ed082e90d2b09fdf3ca5cd5571 Mon Sep 17 00:00:00 2001 From: Jesus Narvaez Date: Wed, 14 May 2025 15:52:24 -0700 Subject: [PATCH 2638/2924] drm/i915/guc: Check if expecting reply before decrementing outstanding_submission_g2h [ Upstream commit c557fd1050f6691dde36818dfc1a4c415c42901b ] When sending a H2G message where a reply is expected in guc_submission_send_busy_loop(), outstanding_submission_g2h is incremented before the send. However, if there is an error sending the message, outstanding_submission_g2h is decremented without checking if a reply is expected. Therefore, check if reply is expected when there is a failure before decrementing outstanding_submission_g2h. Fixes: 2f2cc53b5fe7 ("drm/i915/guc: Close deregister-context race against CT-loss") Signed-off-by: Jesus Narvaez Cc: Daniele Ceraolo Spurio Cc: Alan Previn Cc: Anshuman Gupta Cc: Mousumi Jana Cc: Rodrigo Vivi Cc: Matt Roper Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://lore.kernel.org/r/20250514225224.4142684-1-jesus.narvaez@intel.com (cherry picked from commit a6a26786f22a4ab0227bcf610510c4c9c2df0808) Signed-off-by: Joonas Lahtinen Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index f8cb7c630d5b83..108331a699958b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -633,7 +633,7 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc, atomic_inc(&guc->outstanding_submission_g2h); ret = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); - if (ret) + if (ret && g2h_len_dw) atomic_dec(&guc->outstanding_submission_g2h); return ret; From 91adc20742dd2a744f57d24c0b84f7abe4bf7b27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20H=C3=B6gander?= Date: Mon, 26 May 2025 15:05:11 +0300 Subject: [PATCH 2639/2924] drm/i915/psr: Fix using wrong mask in REG_FIELD_PREP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 57d63c6cd0851d3af612a556ec61b0f2a9bd522f ] Wrong mask is used in PORT_ALPM_LFPS_CTL_FIRST_LFPS_HALF_CYCLE_DURATION and PORT_ALPM_LFPS_CTL_LAST_LFPS_HALF_CYCLE_DURATION. Fixes: 295099580f04 ("drm/i915/psr: Add missing ALPM AUX-Less register definitions") Signed-off-by: Jouni Högander Reviewed-by: Ankit Nautiyal Link: https://lore.kernel.org/r/20250526120512.1702815-12-jouni.hogander@intel.com (cherry picked from commit 8097128a40ff378761034ec72cdbf6f46e466dc0) Signed-off-by: Joonas Lahtinen Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/display/intel_psr_regs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr_regs.h b/drivers/gpu/drm/i915/display/intel_psr_regs.h index 795e6b9cc575c8..248136456048e3 100644 --- a/drivers/gpu/drm/i915/display/intel_psr_regs.h +++ b/drivers/gpu/drm/i915/display/intel_psr_regs.h @@ -325,8 +325,8 @@ #define PORT_ALPM_LFPS_CTL_LFPS_HALF_CYCLE_DURATION_MASK REG_GENMASK(20, 16) #define PORT_ALPM_LFPS_CTL_LFPS_HALF_CYCLE_DURATION(val) REG_FIELD_PREP(PORT_ALPM_LFPS_CTL_LFPS_HALF_CYCLE_DURATION_MASK, val) #define PORT_ALPM_LFPS_CTL_FIRST_LFPS_HALF_CYCLE_DURATION_MASK REG_GENMASK(12, 8) -#define PORT_ALPM_LFPS_CTL_FIRST_LFPS_HALF_CYCLE_DURATION(val) REG_FIELD_PREP(PORT_ALPM_LFPS_CTL_LFPS_HALF_CYCLE_DURATION_MASK, val) +#define PORT_ALPM_LFPS_CTL_FIRST_LFPS_HALF_CYCLE_DURATION(val) REG_FIELD_PREP(PORT_ALPM_LFPS_CTL_FIRST_LFPS_HALF_CYCLE_DURATION_MASK, val) #define PORT_ALPM_LFPS_CTL_LAST_LFPS_HALF_CYCLE_DURATION_MASK REG_GENMASK(4, 0) -#define PORT_ALPM_LFPS_CTL_LAST_LFPS_HALF_CYCLE_DURATION(val) REG_FIELD_PREP(PORT_ALPM_LFPS_CTL_LFPS_HALF_CYCLE_DURATION_MASK, val) +#define PORT_ALPM_LFPS_CTL_LAST_LFPS_HALF_CYCLE_DURATION(val) REG_FIELD_PREP(PORT_ALPM_LFPS_CTL_LAST_LFPS_HALF_CYCLE_DURATION_MASK, val) #endif /* __INTEL_PSR_REGS_H__ */ From 5233b0657fa60e2622eb966d37275e4945c3696b Mon Sep 17 00:00:00 2001 From: Jesus Narvaez Date: Wed, 28 May 2025 16:05:51 -0700 Subject: [PATCH 2640/2924] drm/i915/guc: Handle race condition where wakeref count drops below 0 [ Upstream commit 0323a5127e7c534cfc88efe0f850a0cb777e938b ] There is a rare race condition when preparing for a reset where guc_lrc_desc_unpin() could be in the process of deregistering a context while a different thread is scrubbing outstanding contexts and it alters the context state and does a wakeref put. Then, if there is a failure with deregister_context(), a second wakeref put could occur. As a result the wakeref count could drop below 0 and fail an INTEL_WAKEREF_BUG_ON() check. Therefore if there is a failure with deregister_context(), undo the context state changes and do a wakeref put only if the context was set to be destroyed earlier. v2: Expand comment to better explain change. (Daniele) v3: Removed addition to the original comment. (Daniele) Fixes: 2f2cc53b5fe7 ("drm/i915/guc: Close deregister-context race against CT-loss") Signed-off-by: Jesus Narvaez Cc: Daniele Ceraolo Spurio Cc: Alan Previn Cc: Anshuman Gupta Cc: Mousumi Jana Cc: Rodrigo Vivi Cc: Matt Roper Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://lore.kernel.org/r/20250528230551.1855177-1-jesus.narvaez@intel.com (cherry picked from commit f36a75aba1c3176d177964bca76f86a075d2943a) Signed-off-by: Joonas Lahtinen Signed-off-by: Sasha Levin --- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 108331a699958b..127316d2c8aa99 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -3443,18 +3443,29 @@ static inline int guc_lrc_desc_unpin(struct intel_context *ce) * GuC is active, lets destroy this context, but at this point we can still be racing * with suspend, so we undo everything if the H2G fails in deregister_context so * that GuC reset will find this context during clean up. + * + * There is a race condition where the reset code could have altered + * this context's state and done a wakeref put before we try to + * deregister it here. So check if the context is still set to be + * destroyed before undoing earlier changes, to avoid two wakeref puts + * on the same context. */ ret = deregister_context(ce, ce->guc_id.id); if (ret) { + bool pending_destroyed; spin_lock_irqsave(&ce->guc_state.lock, flags); - set_context_registered(ce); - clr_context_destroyed(ce); + pending_destroyed = context_destroyed(ce); + if (pending_destroyed) { + set_context_registered(ce); + clr_context_destroyed(ce); + } spin_unlock_irqrestore(&ce->guc_state.lock, flags); /* * As gt-pm is awake at function entry, intel_wakeref_put_async merely decrements * the wakeref immediately but per function spec usage call this after unlock. */ - intel_wakeref_put_async(>->wakeref); + if (pending_destroyed) + intel_wakeref_put_async(>->wakeref); } return ret; From e24c5d08a202b96f46f1baf9b5985b11d1000336 Mon Sep 17 00:00:00 2001 From: Yongting Lin Date: Tue, 27 May 2025 23:12:22 +0800 Subject: [PATCH 2641/2924] um: Fix tgkill compile error on old host OSes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit fd054188999ff19746cc09f4e0f196a113964db9 ] tgkill is a quite old syscall since kernel 2.5.75, but unfortunately glibc doesn't support it before 2.30. Thus some systems fail to compile the latest UserMode Linux. Here is the compile error I encountered when I tried to compile UML in my system shipped with glibc-2.28. CALL scripts/checksyscalls.sh CC arch/um/os-Linux/sigio.o In file included from arch/um/os-Linux/sigio.c:17: arch/um/os-Linux/sigio.c: In function ‘write_sigio_thread’: arch/um/os-Linux/sigio.c:49:19: error: implicit declaration of function ‘tgkill’; did you mean ‘kill’? [-Werror=implicit-function-declaration] CATCH_EINTR(r = tgkill(pid, pid, SIGIO)); ^~~~~~ ./arch/um/include/shared/os.h:21:48: note: in definition of macro ‘CATCH_EINTR’ #define CATCH_EINTR(expr) while ((errno = 0, ((expr) < 0)) && (errno == EINTR)) ^~~~ cc1: some warnings being treated as errors Fix it by Replacing glibc call with raw syscall. Fixes: 33c9da5dfb18 ("um: Rewrite the sigio workaround based on epoll and tgkill") Signed-off-by: Yongting Lin Link: https://patch.msgid.link/20250527151222.40371-1-linyongting@gmail.com Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- arch/um/os-Linux/sigio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c index a05a6ecee75615..6de145f8fe3d93 100644 --- a/arch/um/os-Linux/sigio.c +++ b/arch/um/os-Linux/sigio.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -46,7 +47,7 @@ static void *write_sigio_thread(void *unused) __func__, errno); } - CATCH_EINTR(r = tgkill(pid, pid, SIGIO)); + CATCH_EINTR(r = syscall(__NR_tgkill, pid, pid, SIGIO)); if (r < 0) printk(UM_KERN_ERR "%s: tgkill failed, errno = %d\n", __func__, errno); From a04302867094bdc6efac1b598370fc47cf3f2388 Mon Sep 17 00:00:00 2001 From: Shiming Cheng Date: Fri, 30 May 2025 09:26:08 +0800 Subject: [PATCH 2642/2924] net: fix udp gso skb_segment after pull from frag_list [ Upstream commit 3382a1ed7f778db841063f5d7e317ac55f9e7f72 ] Commit a1e40ac5b5e9 ("net: gso: fix udp gso fraglist segmentation after pull from frag_list") detected invalid geometry in frag_list skbs and redirects them from skb_segment_list to more robust skb_segment. But some packets with modified geometry can also hit bugs in that code. We don't know how many such cases exist. Addressing each one by one also requires touching the complex skb_segment code, which risks introducing bugs for other types of skbs. Instead, linearize all these packets that fail the basic invariants on gso fraglist skbs. That is more robust. If only part of the fraglist payload is pulled into head_skb, it will always cause exception when splitting skbs by skb_segment. For detailed call stack information, see below. Valid SKB_GSO_FRAGLIST skbs - consist of two or more segments - the head_skb holds the protocol headers plus first gso_size - one or more frag_list skbs hold exactly one segment - all but the last must be gso_size Optional datapath hooks such as NAT and BPF (bpf_skb_pull_data) can modify fraglist skbs, breaking these invariants. In extreme cases they pull one part of data into skb linear. For UDP, this causes three payloads with lengths of (11,11,10) bytes were pulled tail to become (12,10,10) bytes. The skbs no longer meets the above SKB_GSO_FRAGLIST conditions because payload was pulled into head_skb, it needs to be linearized before pass to regular skb_segment. skb_segment+0xcd0/0xd14 __udp_gso_segment+0x334/0x5f4 udp4_ufo_fragment+0x118/0x15c inet_gso_segment+0x164/0x338 skb_mac_gso_segment+0xc4/0x13c __skb_gso_segment+0xc4/0x124 validate_xmit_skb+0x9c/0x2c0 validate_xmit_skb_list+0x4c/0x80 sch_direct_xmit+0x70/0x404 __dev_queue_xmit+0x64c/0xe5c neigh_resolve_output+0x178/0x1c4 ip_finish_output2+0x37c/0x47c __ip_finish_output+0x194/0x240 ip_finish_output+0x20/0xf4 ip_output+0x100/0x1a0 NF_HOOK+0xc4/0x16c ip_forward+0x314/0x32c ip_rcv+0x90/0x118 __netif_receive_skb+0x74/0x124 process_backlog+0xe8/0x1a4 __napi_poll+0x5c/0x1f8 net_rx_action+0x154/0x314 handle_softirqs+0x154/0x4b8 [118.376811] [C201134] rxq0_pus: [name:bug&]kernel BUG at net/core/skbuff.c:4278! [118.376829] [C201134] rxq0_pus: [name:traps&]Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP [118.470774] [C201134] rxq0_pus: [name:mrdump&]Kernel Offset: 0x178cc00000 from 0xffffffc008000000 [118.470810] [C201134] rxq0_pus: [name:mrdump&]PHYS_OFFSET: 0x40000000 [118.470827] [C201134] rxq0_pus: [name:mrdump&]pstate: 60400005 (nZCv daif +PAN -UAO) [118.470848] [C201134] rxq0_pus: [name:mrdump&]pc : [0xffffffd79598aefc] skb_segment+0xcd0/0xd14 [118.470900] [C201134] rxq0_pus: [name:mrdump&]lr : [0xffffffd79598a5e8] skb_segment+0x3bc/0xd14 [118.470928] [C201134] rxq0_pus: [name:mrdump&]sp : ffffffc008013770 Fixes: a1e40ac5b5e9 ("gso: fix udp gso fraglist segmentation after pull from frag_list") Signed-off-by: Shiming Cheng Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/udp_offload.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 9a8142ccbabe44..9b295b2878befa 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -332,6 +332,7 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, bool copy_dtor; __sum16 check; __be16 newlen; + int ret = 0; mss = skb_shinfo(gso_skb)->gso_size; if (gso_skb->len <= sizeof(*uh) + mss) @@ -360,6 +361,10 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, if (skb_pagelen(gso_skb) - sizeof(*uh) == skb_shinfo(gso_skb)->gso_size) return __udp_gso_segment_list(gso_skb, features, is_ipv6); + ret = __skb_linearize(gso_skb); + if (ret) + return ERR_PTR(ret); + /* Setup csum, as fraglist skips this in udp4_gro_receive. */ gso_skb->csum_start = skb_transport_header(gso_skb) - gso_skb->head; gso_skb->csum_offset = offsetof(struct udphdr, check); From 66542e9430c625f878a5b5dc0fe41e3458d614bf Mon Sep 17 00:00:00 2001 From: Jinjian Song Date: Fri, 30 May 2025 11:16:48 +0800 Subject: [PATCH 2643/2924] net: wwan: t7xx: Fix napi rx poll issue [ Upstream commit 905fe0845bb27e4eed2ca27ea06e6c4847f1b2b1 ] When driver handles the napi rx polling requests, the netdev might have been released by the dellink logic triggered by the disconnect operation on user plane. However, in the logic of processing skb in polling, an invalid netdev is still being used, which causes a panic. BUG: kernel NULL pointer dereference, address: 00000000000000f1 Oops: 0000 [#1] PREEMPT SMP NOPTI RIP: 0010:dev_gro_receive+0x3a/0x620 [...] Call Trace: ? __die_body+0x68/0xb0 ? page_fault_oops+0x379/0x3e0 ? exc_page_fault+0x4f/0xa0 ? asm_exc_page_fault+0x22/0x30 ? __pfx_t7xx_ccmni_recv_skb+0x10/0x10 [mtk_t7xx (HASH:1400 7)] ? dev_gro_receive+0x3a/0x620 napi_gro_receive+0xad/0x170 t7xx_ccmni_recv_skb+0x48/0x70 [mtk_t7xx (HASH:1400 7)] t7xx_dpmaif_napi_rx_poll+0x590/0x800 [mtk_t7xx (HASH:1400 7)] net_rx_action+0x103/0x470 irq_exit_rcu+0x13a/0x310 sysvec_apic_timer_interrupt+0x56/0x90 Fixes: 5545b7b9f294 ("net: wwan: t7xx: Add NAPI support") Signed-off-by: Jinjian Song Link: https://patch.msgid.link/20250530031648.5592-1-jinjian.song@fibocom.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/wwan/t7xx/t7xx_netdev.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/wwan/t7xx/t7xx_netdev.c b/drivers/net/wwan/t7xx/t7xx_netdev.c index 91fa082e9cab80..fc0a7cb181df2c 100644 --- a/drivers/net/wwan/t7xx/t7xx_netdev.c +++ b/drivers/net/wwan/t7xx/t7xx_netdev.c @@ -302,7 +302,7 @@ static int t7xx_ccmni_wwan_newlink(void *ctxt, struct net_device *dev, u32 if_id ccmni->ctlb = ctlb; ccmni->dev = dev; atomic_set(&ccmni->usage, 0); - ctlb->ccmni_inst[if_id] = ccmni; + WRITE_ONCE(ctlb->ccmni_inst[if_id], ccmni); ret = register_netdevice(dev); if (ret) @@ -324,6 +324,7 @@ static void t7xx_ccmni_wwan_dellink(void *ctxt, struct net_device *dev, struct l if (WARN_ON(ctlb->ccmni_inst[if_id] != ccmni)) return; + WRITE_ONCE(ctlb->ccmni_inst[if_id], NULL); unregister_netdevice(dev); } @@ -419,7 +420,7 @@ static void t7xx_ccmni_recv_skb(struct t7xx_ccmni_ctrl *ccmni_ctlb, struct sk_bu skb_cb = T7XX_SKB_CB(skb); netif_id = skb_cb->netif_idx; - ccmni = ccmni_ctlb->ccmni_inst[netif_id]; + ccmni = READ_ONCE(ccmni_ctlb->ccmni_inst[netif_id]); if (!ccmni) { dev_kfree_skb(skb); return; @@ -441,7 +442,7 @@ static void t7xx_ccmni_recv_skb(struct t7xx_ccmni_ctrl *ccmni_ctlb, struct sk_bu static void t7xx_ccmni_queue_tx_irq_notify(struct t7xx_ccmni_ctrl *ctlb, int qno) { - struct t7xx_ccmni *ccmni = ctlb->ccmni_inst[0]; + struct t7xx_ccmni *ccmni = READ_ONCE(ctlb->ccmni_inst[0]); struct netdev_queue *net_queue; if (netif_running(ccmni->dev) && atomic_read(&ccmni->usage) > 0) { @@ -453,7 +454,7 @@ static void t7xx_ccmni_queue_tx_irq_notify(struct t7xx_ccmni_ctrl *ctlb, int qno static void t7xx_ccmni_queue_tx_full_notify(struct t7xx_ccmni_ctrl *ctlb, int qno) { - struct t7xx_ccmni *ccmni = ctlb->ccmni_inst[0]; + struct t7xx_ccmni *ccmni = READ_ONCE(ctlb->ccmni_inst[0]); struct netdev_queue *net_queue; if (atomic_read(&ccmni->usage) > 0) { @@ -471,7 +472,7 @@ static void t7xx_ccmni_queue_state_notify(struct t7xx_pci_dev *t7xx_dev, if (ctlb->md_sta != MD_STATE_READY) return; - if (!ctlb->ccmni_inst[0]) { + if (!READ_ONCE(ctlb->ccmni_inst[0])) { dev_warn(&t7xx_dev->pdev->dev, "No netdev registered yet\n"); return; } From e7c0c7353b477eca8582b9fa2bdd2507834be1fe Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Fri, 30 May 2025 15:27:00 +0000 Subject: [PATCH 2644/2924] vmxnet3: correctly report gso type for UDP tunnels [ Upstream commit 982d30c30eaa2ec723df42e3bf526c014c1dbb88 ] Commit 3d010c8031e3 ("udp: do not accept non-tunnel GSO skbs landing in a tunnel") added checks in linux stack to not accept non-tunnel GRO packets landing in a tunnel. This exposed an issue in vmxnet3 which was not correctly reporting GRO packets for tunnel packets. This patch fixes this issue by setting correct GSO type for the tunnel packets. Currently, vmxnet3 does not support reporting inner fields for LRO tunnel packets. The issue is not seen for egress drivers that do not use skb inner fields. The workaround is to enable tnl-segmentation offload on the egress interfaces if the driver supports it. This problem pre-exists this patch fix and can be addressed as a separate future patch. Fixes: dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload support") Signed-off-by: Ronak Doshi Acked-by: Guolin Yang Link: https://patch.msgid.link/20250530152701.70354-1-ronak.doshi@broadcom.com [pabeni@redhat.com: dropped the changelog] Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/vmxnet3/vmxnet3_drv.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index c676979c7ab940..287b7c20c0d6c6 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1568,6 +1568,30 @@ vmxnet3_get_hdr_len(struct vmxnet3_adapter *adapter, struct sk_buff *skb, return (hlen + (hdr.tcp->doff << 2)); } +static void +vmxnet3_lro_tunnel(struct sk_buff *skb, __be16 ip_proto) +{ + struct udphdr *uh = NULL; + + if (ip_proto == htons(ETH_P_IP)) { + struct iphdr *iph = (struct iphdr *)skb->data; + + if (iph->protocol == IPPROTO_UDP) + uh = (struct udphdr *)(iph + 1); + } else { + struct ipv6hdr *iph = (struct ipv6hdr *)skb->data; + + if (iph->nexthdr == IPPROTO_UDP) + uh = (struct udphdr *)(iph + 1); + } + if (uh) { + if (uh->check) + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; + else + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; + } +} + static int vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter, int quota) @@ -1881,6 +1905,8 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, if (segCnt != 0 && mss != 0) { skb_shinfo(skb)->gso_type = rcd->v4 ? SKB_GSO_TCPV4 : SKB_GSO_TCPV6; + if (encap_lro) + vmxnet3_lro_tunnel(skb, skb->protocol); skb_shinfo(skb)->gso_size = mss; skb_shinfo(skb)->gso_segs = segCnt; } else if ((segCnt != 0 || skb->len > mtu) && !encap_lro) { From 8fd973681f033564173468733856f898235e928a Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Sun, 1 Jun 2025 21:29:13 +0700 Subject: [PATCH 2645/2924] selftests: net: build net/lib dependency in all target [ Upstream commit d3f2a9587ebe68f5067f9ff624f9a83dfb911f60 ] We have the logic to include net/lib automatically for net related selftests. However, currently, this logic is only in install target which means only `make install` will have net/lib included. This commit adds the logic to all target so that all `make`, `make run_tests` and `make install` will have net/lib included in net related selftests. Signed-off-by: Bui Quang Minh Link: https://patch.msgid.link/20250601142914.13379-1-minhquangbui99@gmail.com Fixes: b86761ff6374 ("selftests: net: add scaffolding for Netlink tests in Python") Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- tools/testing/selftests/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 80fb84fa3cfcbd..9c477321a5b474 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -202,7 +202,7 @@ export KHDR_INCLUDES all: @ret=1; \ - for TARGET in $(TARGETS); do \ + for TARGET in $(TARGETS) $(INSTALL_DEP_TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ mkdir $$BUILD_TARGET -p; \ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET \ From 6c8f62776b5a002daca7ba12dd9ce149351faea9 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 21 May 2025 09:16:39 +0200 Subject: [PATCH 2646/2924] net: airoha: Add the capability to allocate hfwd descriptors in SRAM [ Upstream commit c683e378c0907e66cee939145edf936c254ff1e3 ] In order to improve packet processing and packet forwarding performances, EN7581 SoC supports consuming SRAM instead of DRAM for hw forwarding descriptors queue. For downlink hw accelerated traffic request to consume SRAM memory for hw forwarding descriptors queue. Signed-off-by: Lorenzo Bianconi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250521-airopha-desc-sram-v3-4-a6e9b085b4f0@kernel.org Signed-off-by: Paolo Abeni Stable-dep-of: a869d3a5eb01 ("net: airoha: Initialize PPE UPDMEM source-mac table") Signed-off-by: Sasha Levin --- drivers/net/ethernet/airoha/airoha_eth.c | 11 +---------- drivers/net/ethernet/airoha/airoha_eth.h | 9 +++++++++ drivers/net/ethernet/airoha/airoha_ppe.c | 6 ++++++ 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index c169b8b411b4f5..c8664840f3fc2a 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -67,15 +67,6 @@ static void airoha_qdma_irq_disable(struct airoha_qdma *qdma, int index, airoha_qdma_set_irqmask(qdma, index, mask, 0); } -static bool airhoa_is_lan_gdm_port(struct airoha_gdm_port *port) -{ - /* GDM1 port on EN7581 SoC is connected to the lan dsa switch. - * GDM{2,3,4} can be used as wan port connected to an external - * phy module. - */ - return port->id == 1; -} - static void airoha_set_macaddr(struct airoha_gdm_port *port, const u8 *addr) { struct airoha_eth *eth = port->qdma->eth; @@ -1068,7 +1059,7 @@ static int airoha_qdma_init_hfwd_queues(struct airoha_qdma *qdma) LMGR_INIT_START | LMGR_SRAM_MODE_MASK | HW_FWD_DESC_NUM_MASK, FIELD_PREP(HW_FWD_DESC_NUM_MASK, HW_DSCP_NUM) | - LMGR_INIT_START); + LMGR_INIT_START | LMGR_SRAM_MODE_MASK); return read_poll_timeout(airoha_qdma_rr, status, !(status & LMGR_INIT_START), USEC_PER_MSEC, diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h index ec8908f904c619..0204ea030c31a2 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.h +++ b/drivers/net/ethernet/airoha/airoha_eth.h @@ -532,6 +532,15 @@ u32 airoha_rmw(void __iomem *base, u32 offset, u32 mask, u32 val); #define airoha_qdma_clear(qdma, offset, val) \ airoha_rmw((qdma)->regs, (offset), (val), 0) +static inline bool airhoa_is_lan_gdm_port(struct airoha_gdm_port *port) +{ + /* GDM1 port on EN7581 SoC is connected to the lan dsa switch. + * GDM{2,3,4} can be used as wan port connected to an external + * phy module. + */ + return port->id == 1; +} + bool airoha_is_valid_gdm_port(struct airoha_eth *eth, struct airoha_gdm_port *port); diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index f10dab935cab6f..843fe7bd1446eb 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -234,6 +234,12 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth, else pse_port = 2; /* uplink relies on GDM2 loopback */ val |= FIELD_PREP(AIROHA_FOE_IB2_PSE_PORT, pse_port); + + /* For downlink traffic consume SRAM memory for hw forwarding + * descriptors queue. + */ + if (airhoa_is_lan_gdm_port(port)) + val |= AIROHA_FOE_IB2_FAST_PATH; } if (is_multicast_ether_addr(data->eth.h_dest)) From f304434e5f7a90efd1130081a77c0b1ff7769db5 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 2 Jun 2025 12:55:37 +0200 Subject: [PATCH 2647/2924] net: airoha: Initialize PPE UPDMEM source-mac table [ Upstream commit a869d3a5eb011a9cf9bd864f31f5cf27362de8c7 ] UPDMEM source-mac table is a key-value map used to store devices mac addresses according to the port identifier. UPDMEM source mac table is used during IPv6 traffic hw acceleration since PPE entries, for space constraints, do not contain the full source mac address but just the identifier in the UPDMEM source-mac table. Configure UPDMEM source-mac table with device mac addresses and set the source-mac ID field for PPE IPv6 entries in order to select the proper device mac address as source mac for L3 IPv6 hw accelerated traffic. Fixes: 00a7678310fe ("net: airoha: Introduce flowtable offload support") Signed-off-by: Lorenzo Bianconi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250602-airoha-flowtable-ipv6-fix-v2-1-3287f8b55214@kernel.org Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/airoha/airoha_eth.c | 2 ++ drivers/net/ethernet/airoha/airoha_eth.h | 1 + drivers/net/ethernet/airoha/airoha_ppe.c | 26 ++++++++++++++++++++++- drivers/net/ethernet/airoha/airoha_regs.h | 10 +++++++++ 4 files changed, 38 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index c8664840f3fc2a..af28a9300a15c7 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -80,6 +80,8 @@ static void airoha_set_macaddr(struct airoha_gdm_port *port, const u8 *addr) val = (addr[3] << 16) | (addr[4] << 8) | addr[5]; airoha_fe_wr(eth, REG_FE_MAC_LMIN(reg), val); airoha_fe_wr(eth, REG_FE_MAC_LMAX(reg), val); + + airoha_ppe_init_upd_mem(port); } static void airoha_set_gdm_port_fwd_cfg(struct airoha_eth *eth, u32 addr, diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h index 0204ea030c31a2..2bf6b1a2dd9b03 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.h +++ b/drivers/net/ethernet/airoha/airoha_eth.h @@ -549,6 +549,7 @@ int airoha_ppe_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv); int airoha_ppe_init(struct airoha_eth *eth); void airoha_ppe_deinit(struct airoha_eth *eth); +void airoha_ppe_init_upd_mem(struct airoha_gdm_port *port); struct airoha_foe_entry *airoha_ppe_foe_get_entry(struct airoha_ppe *ppe, u32 hash); diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index 843fe7bd1446eb..1b8f21f808890e 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -206,6 +206,7 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth, int dsa_port = airoha_get_dsa_port(&dev); struct airoha_foe_mac_info_common *l2; u32 qdata, ports_pad, val; + u8 smac_id = 0xf; memset(hwe, 0, sizeof(*hwe)); @@ -240,6 +241,8 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth, */ if (airhoa_is_lan_gdm_port(port)) val |= AIROHA_FOE_IB2_FAST_PATH; + + smac_id = port->id; } if (is_multicast_ether_addr(data->eth.h_dest)) @@ -280,7 +283,7 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth, hwe->ipv4.l2.src_mac_lo = get_unaligned_be16(data->eth.h_source + 4); } else { - l2->src_mac_hi = FIELD_PREP(AIROHA_FOE_MAC_SMAC_ID, 0xf); + l2->src_mac_hi = FIELD_PREP(AIROHA_FOE_MAC_SMAC_ID, smac_id); } if (data->vlan.num) { @@ -868,6 +871,27 @@ void airoha_ppe_check_skb(struct airoha_ppe *ppe, u16 hash) airoha_ppe_foe_insert_entry(ppe, hash); } +void airoha_ppe_init_upd_mem(struct airoha_gdm_port *port) +{ + struct airoha_eth *eth = port->qdma->eth; + struct net_device *dev = port->dev; + const u8 *addr = dev->dev_addr; + u32 val; + + val = (addr[2] << 24) | (addr[3] << 16) | (addr[4] << 8) | addr[5]; + airoha_fe_wr(eth, REG_UPDMEM_DATA(0), val); + airoha_fe_wr(eth, REG_UPDMEM_CTRL(0), + FIELD_PREP(PPE_UPDMEM_ADDR_MASK, port->id) | + PPE_UPDMEM_WR_MASK | PPE_UPDMEM_REQ_MASK); + + val = (addr[0] << 8) | addr[1]; + airoha_fe_wr(eth, REG_UPDMEM_DATA(0), val); + airoha_fe_wr(eth, REG_UPDMEM_CTRL(0), + FIELD_PREP(PPE_UPDMEM_ADDR_MASK, port->id) | + FIELD_PREP(PPE_UPDMEM_OFFSET_MASK, 1) | + PPE_UPDMEM_WR_MASK | PPE_UPDMEM_REQ_MASK); +} + int airoha_ppe_init(struct airoha_eth *eth) { struct airoha_ppe *ppe; diff --git a/drivers/net/ethernet/airoha/airoha_regs.h b/drivers/net/ethernet/airoha/airoha_regs.h index 8146cde4e8ba37..57bff8d2de276b 100644 --- a/drivers/net/ethernet/airoha/airoha_regs.h +++ b/drivers/net/ethernet/airoha/airoha_regs.h @@ -312,6 +312,16 @@ #define REG_PPE_RAM_BASE(_n) (((_n) ? PPE2_BASE : PPE1_BASE) + 0x320) #define REG_PPE_RAM_ENTRY(_m, _n) (REG_PPE_RAM_BASE(_m) + ((_n) << 2)) +#define REG_UPDMEM_CTRL(_n) (((_n) ? PPE2_BASE : PPE1_BASE) + 0x370) +#define PPE_UPDMEM_ACK_MASK BIT(31) +#define PPE_UPDMEM_ADDR_MASK GENMASK(11, 8) +#define PPE_UPDMEM_OFFSET_MASK GENMASK(7, 4) +#define PPE_UPDMEM_SEL_MASK GENMASK(3, 2) +#define PPE_UPDMEM_WR_MASK BIT(1) +#define PPE_UPDMEM_REQ_MASK BIT(0) + +#define REG_UPDMEM_DATA(_n) (((_n) ? PPE2_BASE : PPE1_BASE) + 0x374) + #define REG_FE_GDM_TX_OK_PKT_CNT_H(_n) (GDM_BASE(_n) + 0x280) #define REG_FE_GDM_TX_OK_BYTE_CNT_H(_n) (GDM_BASE(_n) + 0x284) #define REG_FE_GDM_TX_ETH_PKT_CNT_H(_n) (GDM_BASE(_n) + 0x288) From 89e7dd465fe61cda21f0f5cc0b98d5f1566fe33c Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Fri, 4 Apr 2025 12:23:16 +0200 Subject: [PATCH 2648/2924] iavf: iavf_suspend(): take RTNL before netdev_lock() [ Upstream commit dba35a4bb4a3da5696f2a179b7d695dc3ea25fb8 ] Fix an obvious violation of lock ordering. Jakub's [1] added netdev_lock() call that is wrong ordered wrt RTNL, but the Fixes tag points to crit_lock being wrongly placed (by lockdep standards). Actual reason we got it wrong is dated back to critical section managed by pure flag checks, which is with us since the very beginning. [1] afc664987ab3 ("eth: iavf: extend the netdev_lock usage") Fixes: 5ac49f3c2702 ("iavf: use mutexes for locking of critical sections") Reviewed-by: Jacob Keller Signed-off-by: Przemek Kitszel Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/iavf/iavf_main.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 6d7ba4d67a1933..a77c726435281d 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -5596,22 +5596,27 @@ static int iavf_suspend(struct device *dev_d) { struct net_device *netdev = dev_get_drvdata(dev_d); struct iavf_adapter *adapter = netdev_priv(netdev); + bool running; netif_device_detach(netdev); + running = netif_running(netdev); + if (running) + rtnl_lock(); + netdev_lock(netdev); mutex_lock(&adapter->crit_lock); - if (netif_running(netdev)) { - rtnl_lock(); + if (running) iavf_down(adapter); - rtnl_unlock(); - } + iavf_free_misc_irq(adapter); iavf_reset_interrupt_capability(adapter); mutex_unlock(&adapter->crit_lock); netdev_unlock(netdev); + if (running) + rtnl_unlock(); return 0; } From dae1e290290b01b1476b2f5ba17759a88e2929e5 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Fri, 4 Apr 2025 12:23:17 +0200 Subject: [PATCH 2649/2924] iavf: centralize watchdog requeueing itself [ Upstream commit 099418da91b7d3d46ddcccbb03075cc4f1ba2d44 ] Centralize the unlock(critlock); unlock(netdev); queue_delayed_work(watchog_task); pattern to one place. Reviewed-by: Jacob Keller Signed-off-by: Przemek Kitszel Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Stable-dep-of: 120f28a6f314 ("iavf: get rid of the crit lock") Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/iavf/iavf_main.c | 103 ++++++++------------ 1 file changed, 41 insertions(+), 62 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index a77c726435281d..2c6e033c73419b 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2911,6 +2911,8 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter) iavf_change_state(adapter, __IAVF_INIT_FAILED); } +static const int IAVF_NO_RESCHED = -1; + /** * iavf_watchdog_task - Periodic call-back task * @work: pointer to work_struct @@ -2922,6 +2924,7 @@ static void iavf_watchdog_task(struct work_struct *work) watchdog_task.work); struct net_device *netdev = adapter->netdev; struct iavf_hw *hw = &adapter->hw; + int msec_delay; u32 reg_val; netdev_lock(netdev); @@ -2940,39 +2943,24 @@ static void iavf_watchdog_task(struct work_struct *work) switch (adapter->state) { case __IAVF_STARTUP: iavf_startup(adapter); - mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); - queue_delayed_work(adapter->wq, &adapter->watchdog_task, - msecs_to_jiffies(30)); - return; + msec_delay = 30; + goto watchdog_done; case __IAVF_INIT_VERSION_CHECK: iavf_init_version_check(adapter); - mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); - queue_delayed_work(adapter->wq, &adapter->watchdog_task, - msecs_to_jiffies(30)); - return; + msec_delay = 30; + goto watchdog_done; case __IAVF_INIT_GET_RESOURCES: iavf_init_get_resources(adapter); - mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); - queue_delayed_work(adapter->wq, &adapter->watchdog_task, - msecs_to_jiffies(1)); - return; + msec_delay = 1; + goto watchdog_done; case __IAVF_INIT_EXTENDED_CAPS: iavf_init_process_extended_caps(adapter); - mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); - queue_delayed_work(adapter->wq, &adapter->watchdog_task, - msecs_to_jiffies(1)); - return; + msec_delay = 1; + goto watchdog_done; case __IAVF_INIT_CONFIG_ADAPTER: iavf_init_config_adapter(adapter); - mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); - queue_delayed_work(adapter->wq, &adapter->watchdog_task, - msecs_to_jiffies(1)); - return; + msec_delay = 1; + goto watchdog_done; case __IAVF_INIT_FAILED: if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) { @@ -2980,27 +2968,21 @@ static void iavf_watchdog_task(struct work_struct *work) * watchdog task, iavf_remove should handle this state * as it can loop forever */ - mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); - return; + msec_delay = IAVF_NO_RESCHED; + goto watchdog_done; } if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) { dev_err(&adapter->pdev->dev, "Failed to communicate with PF; waiting before retry\n"); adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED; iavf_shutdown_adminq(hw); - mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); - queue_delayed_work(adapter->wq, - &adapter->watchdog_task, (5 * HZ)); - return; + msec_delay = 5000; + goto watchdog_done; } /* Try again from failed step*/ iavf_change_state(adapter, adapter->last_state); - mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); - queue_delayed_work(adapter->wq, &adapter->watchdog_task, HZ); - return; + msec_delay = 1000; + goto watchdog_done; case __IAVF_COMM_FAILED: if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) { @@ -3010,9 +2992,8 @@ static void iavf_watchdog_task(struct work_struct *work) */ iavf_change_state(adapter, __IAVF_INIT_FAILED); adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED; - mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); - return; + msec_delay = IAVF_NO_RESCHED; + goto watchdog_done; } reg_val = rd32(hw, IAVF_VFGEN_RSTAT) & IAVF_VFGEN_RSTAT_VFR_STATE_MASK; @@ -3030,18 +3011,11 @@ static void iavf_watchdog_task(struct work_struct *work) } adapter->aq_required = 0; adapter->current_op = VIRTCHNL_OP_UNKNOWN; - mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); - queue_delayed_work(adapter->wq, - &adapter->watchdog_task, - msecs_to_jiffies(10)); - return; + msec_delay = 10; + goto watchdog_done; case __IAVF_RESETTING: - mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); - queue_delayed_work(adapter->wq, &adapter->watchdog_task, - HZ * 2); - return; + msec_delay = 2000; + goto watchdog_done; case __IAVF_DOWN: case __IAVF_DOWN_PENDING: case __IAVF_TESTING: @@ -3068,9 +3042,8 @@ static void iavf_watchdog_task(struct work_struct *work) break; case __IAVF_REMOVE: default: - mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); - return; + msec_delay = IAVF_NO_RESCHED; + goto watchdog_done; } /* check for hw reset */ @@ -3080,24 +3053,30 @@ static void iavf_watchdog_task(struct work_struct *work) adapter->current_op = VIRTCHNL_OP_UNKNOWN; dev_err(&adapter->pdev->dev, "Hardware reset detected\n"); iavf_schedule_reset(adapter, IAVF_FLAG_RESET_PENDING); - mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); - queue_delayed_work(adapter->wq, - &adapter->watchdog_task, HZ * 2); - return; + msec_delay = 2000; + goto watchdog_done; } mutex_unlock(&adapter->crit_lock); restart_watchdog: netdev_unlock(netdev); + + /* note that we schedule a different task */ if (adapter->state >= __IAVF_DOWN) queue_work(adapter->wq, &adapter->adminq_task); if (adapter->aq_required) - queue_delayed_work(adapter->wq, &adapter->watchdog_task, - msecs_to_jiffies(20)); + msec_delay = 20; else + msec_delay = 2000; + goto skip_unlock; +watchdog_done: + mutex_unlock(&adapter->crit_lock); + netdev_unlock(netdev); +skip_unlock: + + if (msec_delay != IAVF_NO_RESCHED) queue_delayed_work(adapter->wq, &adapter->watchdog_task, - HZ * 2); + msecs_to_jiffies(msec_delay)); } /** From 352a46081e4147222aa472f75c2f7c04406a816e Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Fri, 4 Apr 2025 12:23:18 +0200 Subject: [PATCH 2650/2924] iavf: simplify watchdog_task in terms of adminq task scheduling [ Upstream commit ecb4cd0461accc446d20a7a167f39ed2fd5e9b0e ] Simplify the decision whether to schedule adminq task. The condition is the same, but it is executed in more scenarios. Note that movement of watchdog_done label makes this commit a bit surprising. (Hence not squashing it to anything bigger). Reviewed-by: Jacob Keller Signed-off-by: Przemek Kitszel Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Stable-dep-of: 120f28a6f314 ("iavf: get rid of the crit lock") Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/iavf/iavf_main.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 2c6e033c73419b..5efe44724d1123 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2934,6 +2934,7 @@ static void iavf_watchdog_task(struct work_struct *work) return; } + msec_delay = 20; goto restart_watchdog; } @@ -3053,10 +3054,13 @@ static void iavf_watchdog_task(struct work_struct *work) adapter->current_op = VIRTCHNL_OP_UNKNOWN; dev_err(&adapter->pdev->dev, "Hardware reset detected\n"); iavf_schedule_reset(adapter, IAVF_FLAG_RESET_PENDING); - msec_delay = 2000; - goto watchdog_done; } + if (adapter->aq_required) + msec_delay = 20; + else + msec_delay = 2000; +watchdog_done: mutex_unlock(&adapter->crit_lock); restart_watchdog: netdev_unlock(netdev); @@ -3064,15 +3068,6 @@ static void iavf_watchdog_task(struct work_struct *work) /* note that we schedule a different task */ if (adapter->state >= __IAVF_DOWN) queue_work(adapter->wq, &adapter->adminq_task); - if (adapter->aq_required) - msec_delay = 20; - else - msec_delay = 2000; - goto skip_unlock; -watchdog_done: - mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); -skip_unlock: if (msec_delay != IAVF_NO_RESCHED) queue_delayed_work(adapter->wq, &adapter->watchdog_task, From 47709f87e054fee797ff8c718938fc41b31f0c5c Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Fri, 4 Apr 2025 12:23:19 +0200 Subject: [PATCH 2651/2924] iavf: extract iavf_watchdog_step() out of iavf_watchdog_task() [ Upstream commit 257a8241ad7f4dc312494f69e3bc79a5598b4514 ] Finish up easy refactor of watchdog_task, total for this + prev two commits is: 1 file changed, 47 insertions(+), 82 deletions(-) Signed-off-by: Przemek Kitszel Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Stable-dep-of: 120f28a6f314 ("iavf: get rid of the crit lock") Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/iavf/iavf_main.c | 87 +++++++++------------ 1 file changed, 39 insertions(+), 48 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 5efe44724d1123..4b6963ffaba5f9 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2913,30 +2913,14 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter) static const int IAVF_NO_RESCHED = -1; -/** - * iavf_watchdog_task - Periodic call-back task - * @work: pointer to work_struct - **/ -static void iavf_watchdog_task(struct work_struct *work) +/* return: msec delay for requeueing itself */ +static int iavf_watchdog_step(struct iavf_adapter *adapter) { - struct iavf_adapter *adapter = container_of(work, - struct iavf_adapter, - watchdog_task.work); - struct net_device *netdev = adapter->netdev; struct iavf_hw *hw = &adapter->hw; - int msec_delay; u32 reg_val; - netdev_lock(netdev); - if (!mutex_trylock(&adapter->crit_lock)) { - if (adapter->state == __IAVF_REMOVE) { - netdev_unlock(netdev); - return; - } - - msec_delay = 20; - goto restart_watchdog; - } + netdev_assert_locked(adapter->netdev); + lockdep_assert_held(&adapter->crit_lock); if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) iavf_change_state(adapter, __IAVF_COMM_FAILED); @@ -2944,24 +2928,19 @@ static void iavf_watchdog_task(struct work_struct *work) switch (adapter->state) { case __IAVF_STARTUP: iavf_startup(adapter); - msec_delay = 30; - goto watchdog_done; + return 30; case __IAVF_INIT_VERSION_CHECK: iavf_init_version_check(adapter); - msec_delay = 30; - goto watchdog_done; + return 30; case __IAVF_INIT_GET_RESOURCES: iavf_init_get_resources(adapter); - msec_delay = 1; - goto watchdog_done; + return 1; case __IAVF_INIT_EXTENDED_CAPS: iavf_init_process_extended_caps(adapter); - msec_delay = 1; - goto watchdog_done; + return 1; case __IAVF_INIT_CONFIG_ADAPTER: iavf_init_config_adapter(adapter); - msec_delay = 1; - goto watchdog_done; + return 1; case __IAVF_INIT_FAILED: if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) { @@ -2969,21 +2948,18 @@ static void iavf_watchdog_task(struct work_struct *work) * watchdog task, iavf_remove should handle this state * as it can loop forever */ - msec_delay = IAVF_NO_RESCHED; - goto watchdog_done; + return IAVF_NO_RESCHED; } if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) { dev_err(&adapter->pdev->dev, "Failed to communicate with PF; waiting before retry\n"); adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED; iavf_shutdown_adminq(hw); - msec_delay = 5000; - goto watchdog_done; + return 5000; } /* Try again from failed step*/ iavf_change_state(adapter, adapter->last_state); - msec_delay = 1000; - goto watchdog_done; + return 1000; case __IAVF_COMM_FAILED: if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) { @@ -2993,8 +2969,7 @@ static void iavf_watchdog_task(struct work_struct *work) */ iavf_change_state(adapter, __IAVF_INIT_FAILED); adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED; - msec_delay = IAVF_NO_RESCHED; - goto watchdog_done; + return IAVF_NO_RESCHED; } reg_val = rd32(hw, IAVF_VFGEN_RSTAT) & IAVF_VFGEN_RSTAT_VFR_STATE_MASK; @@ -3012,11 +2987,9 @@ static void iavf_watchdog_task(struct work_struct *work) } adapter->aq_required = 0; adapter->current_op = VIRTCHNL_OP_UNKNOWN; - msec_delay = 10; - goto watchdog_done; + return 10; case __IAVF_RESETTING: - msec_delay = 2000; - goto watchdog_done; + return 2000; case __IAVF_DOWN: case __IAVF_DOWN_PENDING: case __IAVF_TESTING: @@ -3043,8 +3016,7 @@ static void iavf_watchdog_task(struct work_struct *work) break; case __IAVF_REMOVE: default: - msec_delay = IAVF_NO_RESCHED; - goto watchdog_done; + return IAVF_NO_RESCHED; } /* check for hw reset */ @@ -3055,12 +3027,31 @@ static void iavf_watchdog_task(struct work_struct *work) dev_err(&adapter->pdev->dev, "Hardware reset detected\n"); iavf_schedule_reset(adapter, IAVF_FLAG_RESET_PENDING); } - if (adapter->aq_required) + + return adapter->aq_required ? 20 : 2000; +} + +static void iavf_watchdog_task(struct work_struct *work) +{ + struct iavf_adapter *adapter = container_of(work, + struct iavf_adapter, + watchdog_task.work); + struct net_device *netdev = adapter->netdev; + int msec_delay; + + netdev_lock(netdev); + if (!mutex_trylock(&adapter->crit_lock)) { + if (adapter->state == __IAVF_REMOVE) { + netdev_unlock(netdev); + return; + } + msec_delay = 20; - else - msec_delay = 2000; + goto restart_watchdog; + } + + msec_delay = iavf_watchdog_step(adapter); -watchdog_done: mutex_unlock(&adapter->crit_lock); restart_watchdog: netdev_unlock(netdev); From 5c9f23bac8b9c057df51659ede6a23f049b385da Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Fri, 4 Apr 2025 12:23:20 +0200 Subject: [PATCH 2652/2924] iavf: sprinkle netdev_assert_locked() annotations [ Upstream commit 05702b5c949bd46243181833d4726f4c5e95f5e3 ] Lockdep annotations help in general, but here it is extra good, as next commit will remove crit lock. Reviewed-by: Jacob Keller Signed-off-by: Przemek Kitszel Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Stable-dep-of: 120f28a6f314 ("iavf: get rid of the crit lock") Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 6 ++++++ drivers/net/ethernet/intel/iavf/iavf_main.c | 10 ++++++++++ 2 files changed, 16 insertions(+) diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 288bb5b2e72ef7..03d86fe80ad910 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -4,6 +4,8 @@ #include #include +#include + /* ethtool support for iavf */ #include "iavf.h" @@ -1259,6 +1261,8 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx int count = 50; int err; + netdev_assert_locked(adapter->netdev); + if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED)) return -EOPNOTSUPP; @@ -1440,6 +1444,8 @@ iavf_set_adv_rss_hash_opt(struct iavf_adapter *adapter, u64 hash_flds; u32 hdrs; + netdev_assert_locked(adapter->netdev); + if (!ADV_RSS_SUPPORT(adapter)) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 4b6963ffaba5f9..bf8c7baf2ab8ae 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1292,6 +1292,8 @@ static void iavf_configure(struct iavf_adapter *adapter) **/ static void iavf_up_complete(struct iavf_adapter *adapter) { + netdev_assert_locked(adapter->netdev); + iavf_change_state(adapter, __IAVF_RUNNING); clear_bit(__IAVF_VSI_DOWN, adapter->vsi.state); @@ -1417,6 +1419,8 @@ void iavf_down(struct iavf_adapter *adapter) { struct net_device *netdev = adapter->netdev; + netdev_assert_locked(netdev); + if (adapter->state <= __IAVF_DOWN_PENDING) return; @@ -3078,6 +3082,8 @@ static void iavf_disable_vf(struct iavf_adapter *adapter) struct iavf_vlan_filter *fv, *fvtmp; struct iavf_cloud_filter *cf, *cftmp; + netdev_assert_locked(adapter->netdev); + adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED; /* We don't use netif_running() because it may be true prior to @@ -5194,6 +5200,8 @@ iavf_shaper_set(struct net_shaper_binding *binding, struct iavf_ring *tx_ring; int ret = 0; + netdev_assert_locked(adapter->netdev); + mutex_lock(&adapter->crit_lock); if (handle->id >= adapter->num_active_queues) goto unlock; @@ -5222,6 +5230,8 @@ static int iavf_shaper_del(struct net_shaper_binding *binding, struct iavf_adapter *adapter = netdev_priv(binding->netdev); struct iavf_ring *tx_ring; + netdev_assert_locked(adapter->netdev); + mutex_lock(&adapter->crit_lock); if (handle->id >= adapter->num_active_queues) goto unlock; From 620ab4d6215de0b25227f9fff1a8c7fb66837cb8 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Fri, 4 Apr 2025 12:23:21 +0200 Subject: [PATCH 2653/2924] iavf: get rid of the crit lock [ Upstream commit 120f28a6f314fef7f282c99f196923fe44081cad ] Get rid of the crit lock. That frees us from the error prone logic of try_locks. Thanks to netdev_lock() by Jakub it is now easy, and in most cases we were protected by it already - replace crit lock by netdev lock when it was not the case. Lockdep reports that we should cancel the work under crit_lock [splat1], and that was the scheme we have mostly followed since [1] by Slawomir. But when that is done we still got into deadlocks [splat2]. So instead we should look at the bigger problem, namely "weird locking/scheduling" of the iavf. The first step to fix that is to remove the crit lock. I will followup with a -next series that simplifies scheduling/tasks. Cancel the work without netdev lock (weird unlock+lock scheme), to fix the [splat2] (which would be totally ugly if we would kept the crit lock). Extend protected part of iavf_watchdog_task() to include scheduling more work. Note that the removed comment in iavf_reset_task() was misplaced, it belonged to inside of the removed if condition, so it's gone now. [splat1] - w/o this patch - The deadlock during VF removal: WARNING: possible circular locking dependency detected sh/3825 is trying to acquire lock: ((work_completion)(&(&adapter->watchdog_task)->work)){+.+.}-{0:0}, at: start_flush_work+0x1a1/0x470 but task is already holding lock: (&adapter->crit_lock){+.+.}-{4:4}, at: iavf_remove+0xd1/0x690 [iavf] which lock already depends on the new lock. [splat2] - when cancelling work under crit lock, w/o this series, see [2] for the band aid attempt WARNING: possible circular locking dependency detected sh/3550 is trying to acquire lock: ((wq_completion)iavf){+.+.}-{0:0}, at: touch_wq_lockdep_map+0x26/0x90 but task is already holding lock: (&dev->lock){+.+.}-{4:4}, at: iavf_remove+0xa6/0x6e0 [iavf] which lock already depends on the new lock. [1] fc2e6b3b132a ("iavf: Rework mutexes for better synchronisation") [2] https://github.com/pkitszel/linux/commit/52dddbfc2bb60294083f5711a158a Fixes: d1639a17319b ("iavf: fix a deadlock caused by rtnl and driver's lock circular dependencies") Signed-off-by: Przemek Kitszel Reviewed-by: Aleksandr Loktionov Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/iavf/iavf.h | 1 - .../net/ethernet/intel/iavf/iavf_ethtool.c | 23 +-- drivers/net/ethernet/intel/iavf/iavf_main.c | 165 ++++-------------- 3 files changed, 38 insertions(+), 151 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 9de3e0ba37316c..f7a98ff43a57fb 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -268,7 +268,6 @@ struct iavf_adapter { struct list_head vlan_filter_list; int num_vlan_filters; struct list_head mac_filter_list; - struct mutex crit_lock; /* Lock to protect accesses to MAC and VLAN lists */ spinlock_t mac_vlan_list_lock; char misc_vector_name[IFNAMSIZ + 9]; diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 03d86fe80ad910..2b2b315205b5e0 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -1258,7 +1258,6 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx { struct ethtool_rx_flow_spec *fsp = &cmd->fs; struct iavf_fdir_fltr *fltr; - int count = 50; int err; netdev_assert_locked(adapter->netdev); @@ -1281,14 +1280,6 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx if (!fltr) return -ENOMEM; - while (!mutex_trylock(&adapter->crit_lock)) { - if (--count == 0) { - kfree(fltr); - return -EINVAL; - } - udelay(1); - } - err = iavf_add_fdir_fltr_info(adapter, fsp, fltr); if (!err) err = iavf_fdir_add_fltr(adapter, fltr); @@ -1296,7 +1287,6 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx if (err) kfree(fltr); - mutex_unlock(&adapter->crit_lock); return err; } @@ -1439,9 +1429,9 @@ iavf_set_adv_rss_hash_opt(struct iavf_adapter *adapter, { struct iavf_adv_rss *rss_old, *rss_new; bool rss_new_add = false; - int count = 50, err = 0; bool symm = false; u64 hash_flds; + int err = 0; u32 hdrs; netdev_assert_locked(adapter->netdev); @@ -1469,15 +1459,6 @@ iavf_set_adv_rss_hash_opt(struct iavf_adapter *adapter, return -EINVAL; } - while (!mutex_trylock(&adapter->crit_lock)) { - if (--count == 0) { - kfree(rss_new); - return -EINVAL; - } - - udelay(1); - } - spin_lock_bh(&adapter->adv_rss_lock); rss_old = iavf_find_adv_rss_cfg_by_hdrs(adapter, hdrs); if (rss_old) { @@ -1506,8 +1487,6 @@ iavf_set_adv_rss_hash_opt(struct iavf_adapter *adapter, if (!err) iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_ADD_ADV_RSS_CFG); - mutex_unlock(&adapter->crit_lock); - if (!rss_new_add) kfree(rss_new); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index bf8c7baf2ab8ae..2c0bb41809a414 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1287,9 +1287,7 @@ static void iavf_configure(struct iavf_adapter *adapter) /** * iavf_up_complete - Finish the last steps of bringing up a connection * @adapter: board private structure - * - * Expects to be called while holding crit_lock. - **/ + */ static void iavf_up_complete(struct iavf_adapter *adapter) { netdev_assert_locked(adapter->netdev); @@ -1412,9 +1410,7 @@ static void iavf_clear_adv_rss_conf(struct iavf_adapter *adapter) /** * iavf_down - Shutdown the connection processing * @adapter: board private structure - * - * Expects to be called while holding crit_lock. - **/ + */ void iavf_down(struct iavf_adapter *adapter) { struct net_device *netdev = adapter->netdev; @@ -2029,22 +2025,21 @@ static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter, bool runni * iavf_finish_config - do all netdev work that needs RTNL * @work: our work_struct * - * Do work that needs both RTNL and crit_lock. - **/ + * Do work that needs RTNL. + */ static void iavf_finish_config(struct work_struct *work) { struct iavf_adapter *adapter; - bool locks_released = false; + bool netdev_released = false; int pairs, err; adapter = container_of(work, struct iavf_adapter, finish_config); /* Always take RTNL first to prevent circular lock dependency; - * The dev->lock is needed to update the queue number + * the dev->lock (== netdev lock) is needed to update the queue number. */ rtnl_lock(); netdev_lock(adapter->netdev); - mutex_lock(&adapter->crit_lock); if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES) && adapter->netdev->reg_state == NETREG_REGISTERED && @@ -2063,22 +2058,21 @@ static void iavf_finish_config(struct work_struct *work) netif_set_real_num_tx_queues(adapter->netdev, pairs); if (adapter->netdev->reg_state != NETREG_REGISTERED) { - mutex_unlock(&adapter->crit_lock); netdev_unlock(adapter->netdev); - locks_released = true; + netdev_released = true; err = register_netdevice(adapter->netdev); if (err) { dev_err(&adapter->pdev->dev, "Unable to register netdev (%d)\n", err); /* go back and try again.*/ - mutex_lock(&adapter->crit_lock); + netdev_lock(adapter->netdev); iavf_free_rss(adapter); iavf_free_misc_irq(adapter); iavf_reset_interrupt_capability(adapter); iavf_change_state(adapter, __IAVF_INIT_CONFIG_ADAPTER); - mutex_unlock(&adapter->crit_lock); + netdev_unlock(adapter->netdev); goto out; } } @@ -2094,10 +2088,8 @@ static void iavf_finish_config(struct work_struct *work) } out: - if (!locks_released) { - mutex_unlock(&adapter->crit_lock); + if (!netdev_released) netdev_unlock(adapter->netdev); - } rtnl_unlock(); } @@ -2924,7 +2916,6 @@ static int iavf_watchdog_step(struct iavf_adapter *adapter) u32 reg_val; netdev_assert_locked(adapter->netdev); - lockdep_assert_held(&adapter->crit_lock); if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) iavf_change_state(adapter, __IAVF_COMM_FAILED); @@ -3044,22 +3035,7 @@ static void iavf_watchdog_task(struct work_struct *work) int msec_delay; netdev_lock(netdev); - if (!mutex_trylock(&adapter->crit_lock)) { - if (adapter->state == __IAVF_REMOVE) { - netdev_unlock(netdev); - return; - } - - msec_delay = 20; - goto restart_watchdog; - } - msec_delay = iavf_watchdog_step(adapter); - - mutex_unlock(&adapter->crit_lock); -restart_watchdog: - netdev_unlock(netdev); - /* note that we schedule a different task */ if (adapter->state >= __IAVF_DOWN) queue_work(adapter->wq, &adapter->adminq_task); @@ -3067,6 +3043,7 @@ static void iavf_watchdog_task(struct work_struct *work) if (msec_delay != IAVF_NO_RESCHED) queue_delayed_work(adapter->wq, &adapter->watchdog_task, msecs_to_jiffies(msec_delay)); + netdev_unlock(netdev); } /** @@ -3074,8 +3051,7 @@ static void iavf_watchdog_task(struct work_struct *work) * @adapter: board private structure * * Set communication failed flag and free all resources. - * NOTE: This function is expected to be called with crit_lock being held. - **/ + */ static void iavf_disable_vf(struct iavf_adapter *adapter) { struct iavf_mac_filter *f, *ftmp; @@ -3183,17 +3159,7 @@ static void iavf_reset_task(struct work_struct *work) int i = 0, err; bool running; - /* When device is being removed it doesn't make sense to run the reset - * task, just return in such a case. - */ netdev_lock(netdev); - if (!mutex_trylock(&adapter->crit_lock)) { - if (adapter->state != __IAVF_REMOVE) - queue_work(adapter->wq, &adapter->reset_task); - - netdev_unlock(netdev); - return; - } iavf_misc_irq_disable(adapter); if (adapter->flags & IAVF_FLAG_RESET_NEEDED) { @@ -3238,7 +3204,6 @@ static void iavf_reset_task(struct work_struct *work) dev_err(&adapter->pdev->dev, "Reset never finished (%x)\n", reg_val); iavf_disable_vf(adapter); - mutex_unlock(&adapter->crit_lock); netdev_unlock(netdev); return; /* Do not attempt to reinit. It's dead, Jim. */ } @@ -3382,7 +3347,6 @@ static void iavf_reset_task(struct work_struct *work) adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; wake_up(&adapter->reset_waitqueue); - mutex_unlock(&adapter->crit_lock); netdev_unlock(netdev); return; @@ -3393,7 +3357,6 @@ static void iavf_reset_task(struct work_struct *work) } iavf_disable_vf(adapter); - mutex_unlock(&adapter->crit_lock); netdev_unlock(netdev); dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n"); } @@ -3406,6 +3369,7 @@ static void iavf_adminq_task(struct work_struct *work) { struct iavf_adapter *adapter = container_of(work, struct iavf_adapter, adminq_task); + struct net_device *netdev = adapter->netdev; struct iavf_hw *hw = &adapter->hw; struct iavf_arq_event_info event; enum virtchnl_ops v_op; @@ -3413,13 +3377,7 @@ static void iavf_adminq_task(struct work_struct *work) u32 val, oldval; u16 pending; - if (!mutex_trylock(&adapter->crit_lock)) { - if (adapter->state == __IAVF_REMOVE) - return; - - queue_work(adapter->wq, &adapter->adminq_task); - goto out; - } + netdev_lock(netdev); if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) goto unlock; @@ -3486,8 +3444,7 @@ static void iavf_adminq_task(struct work_struct *work) freedom: kfree(event.msg_buf); unlock: - mutex_unlock(&adapter->crit_lock); -out: + netdev_unlock(netdev); /* re-enable Admin queue interrupt cause */ iavf_misc_irq_enable(adapter); } @@ -4180,8 +4137,8 @@ static int iavf_configure_clsflower(struct iavf_adapter *adapter, struct flow_cls_offload *cls_flower) { int tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid); - struct iavf_cloud_filter *filter = NULL; - int err = -EINVAL, count = 50; + struct iavf_cloud_filter *filter; + int err; if (tc < 0) { dev_err(&adapter->pdev->dev, "Invalid traffic class\n"); @@ -4191,17 +4148,10 @@ static int iavf_configure_clsflower(struct iavf_adapter *adapter, filter = kzalloc(sizeof(*filter), GFP_KERNEL); if (!filter) return -ENOMEM; - - while (!mutex_trylock(&adapter->crit_lock)) { - if (--count == 0) { - kfree(filter); - return err; - } - udelay(1); - } - filter->cookie = cls_flower->cookie; + netdev_lock(adapter->netdev); + /* bail out here if filter already exists */ spin_lock_bh(&adapter->cloud_filter_list_lock); if (iavf_find_cf(adapter, &cls_flower->cookie)) { @@ -4235,7 +4185,7 @@ static int iavf_configure_clsflower(struct iavf_adapter *adapter, if (err) kfree(filter); - mutex_unlock(&adapter->crit_lock); + netdev_unlock(adapter->netdev); return err; } @@ -4539,28 +4489,13 @@ static int iavf_open(struct net_device *netdev) return -EIO; } - while (!mutex_trylock(&adapter->crit_lock)) { - /* If we are in __IAVF_INIT_CONFIG_ADAPTER state the crit_lock - * is already taken and iavf_open is called from an upper - * device's notifier reacting on NETDEV_REGISTER event. - * We have to leave here to avoid dead lock. - */ - if (adapter->state == __IAVF_INIT_CONFIG_ADAPTER) - return -EBUSY; - - usleep_range(500, 1000); - } - - if (adapter->state != __IAVF_DOWN) { - err = -EBUSY; - goto err_unlock; - } + if (adapter->state != __IAVF_DOWN) + return -EBUSY; if (adapter->state == __IAVF_RUNNING && !test_bit(__IAVF_VSI_DOWN, adapter->vsi.state)) { dev_dbg(&adapter->pdev->dev, "VF is already open.\n"); - err = 0; - goto err_unlock; + return 0; } /* allocate transmit descriptors */ @@ -4579,9 +4514,7 @@ static int iavf_open(struct net_device *netdev) goto err_req_irq; spin_lock_bh(&adapter->mac_vlan_list_lock); - iavf_add_filter(adapter, adapter->hw.mac.addr); - spin_unlock_bh(&adapter->mac_vlan_list_lock); /* Restore filters that were removed with IFF_DOWN */ @@ -4594,8 +4527,6 @@ static int iavf_open(struct net_device *netdev) iavf_irq_enable(adapter, true); - mutex_unlock(&adapter->crit_lock); - return 0; err_req_irq: @@ -4605,8 +4536,6 @@ static int iavf_open(struct net_device *netdev) iavf_free_all_rx_resources(adapter); err_setup_tx: iavf_free_all_tx_resources(adapter); -err_unlock: - mutex_unlock(&adapter->crit_lock); return err; } @@ -4630,12 +4559,8 @@ static int iavf_close(struct net_device *netdev) netdev_assert_locked(netdev); - mutex_lock(&adapter->crit_lock); - - if (adapter->state <= __IAVF_DOWN_PENDING) { - mutex_unlock(&adapter->crit_lock); + if (adapter->state <= __IAVF_DOWN_PENDING) return 0; - } set_bit(__IAVF_VSI_DOWN, adapter->vsi.state); /* We cannot send IAVF_FLAG_AQ_GET_OFFLOAD_VLAN_V2_CAPS before @@ -4666,7 +4591,6 @@ static int iavf_close(struct net_device *netdev) iavf_change_state(adapter, __IAVF_DOWN_PENDING); iavf_free_traffic_irqs(adapter); - mutex_unlock(&adapter->crit_lock); netdev_unlock(netdev); /* We explicitly don't free resources here because the hardware is @@ -4685,11 +4609,10 @@ static int iavf_close(struct net_device *netdev) msecs_to_jiffies(500)); if (!status) netdev_warn(netdev, "Device resources not yet released\n"); - netdev_lock(netdev); - mutex_lock(&adapter->crit_lock); + adapter->aq_required |= aq_to_restore; - mutex_unlock(&adapter->crit_lock); + return 0; } @@ -5198,17 +5121,16 @@ iavf_shaper_set(struct net_shaper_binding *binding, struct iavf_adapter *adapter = netdev_priv(binding->netdev); const struct net_shaper_handle *handle = &shaper->handle; struct iavf_ring *tx_ring; - int ret = 0; + int ret; netdev_assert_locked(adapter->netdev); - mutex_lock(&adapter->crit_lock); if (handle->id >= adapter->num_active_queues) - goto unlock; + return 0; ret = iavf_verify_shaper(binding, shaper, extack); if (ret) - goto unlock; + return ret; tx_ring = &adapter->tx_rings[handle->id]; @@ -5218,9 +5140,7 @@ iavf_shaper_set(struct net_shaper_binding *binding, adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_QUEUES_BW; -unlock: - mutex_unlock(&adapter->crit_lock); - return ret; + return 0; } static int iavf_shaper_del(struct net_shaper_binding *binding, @@ -5232,9 +5152,8 @@ static int iavf_shaper_del(struct net_shaper_binding *binding, netdev_assert_locked(adapter->netdev); - mutex_lock(&adapter->crit_lock); if (handle->id >= adapter->num_active_queues) - goto unlock; + return 0; tx_ring = &adapter->tx_rings[handle->id]; tx_ring->q_shaper.bw_min = 0; @@ -5243,8 +5162,6 @@ static int iavf_shaper_del(struct net_shaper_binding *binding, adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_QUEUES_BW; -unlock: - mutex_unlock(&adapter->crit_lock); return 0; } @@ -5505,10 +5422,6 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_alloc_qos_cap; } - /* set up the locks for the AQ, do this only once in probe - * and destroy them only once in remove - */ - mutex_init(&adapter->crit_lock); mutex_init(&hw->aq.asq_mutex); mutex_init(&hw->aq.arq_mutex); @@ -5578,9 +5491,7 @@ static int iavf_suspend(struct device *dev_d) running = netif_running(netdev); if (running) rtnl_lock(); - netdev_lock(netdev); - mutex_lock(&adapter->crit_lock); if (running) iavf_down(adapter); @@ -5588,7 +5499,6 @@ static int iavf_suspend(struct device *dev_d) iavf_free_misc_irq(adapter); iavf_reset_interrupt_capability(adapter); - mutex_unlock(&adapter->crit_lock); netdev_unlock(netdev); if (running) rtnl_unlock(); @@ -5668,20 +5578,20 @@ static void iavf_remove(struct pci_dev *pdev) * There are flows where register/unregister netdev may race. */ while (1) { - mutex_lock(&adapter->crit_lock); + netdev_lock(netdev); if (adapter->state == __IAVF_RUNNING || adapter->state == __IAVF_DOWN || adapter->state == __IAVF_INIT_FAILED) { - mutex_unlock(&adapter->crit_lock); + netdev_unlock(netdev); break; } /* Simply return if we already went through iavf_shutdown */ if (adapter->state == __IAVF_REMOVE) { - mutex_unlock(&adapter->crit_lock); + netdev_unlock(netdev); return; } - mutex_unlock(&adapter->crit_lock); + netdev_unlock(netdev); usleep_range(500, 1000); } cancel_delayed_work_sync(&adapter->watchdog_task); @@ -5691,7 +5601,6 @@ static void iavf_remove(struct pci_dev *pdev) unregister_netdev(netdev); netdev_lock(netdev); - mutex_lock(&adapter->crit_lock); dev_info(&adapter->pdev->dev, "Removing device\n"); iavf_change_state(adapter, __IAVF_REMOVE); @@ -5707,9 +5616,11 @@ static void iavf_remove(struct pci_dev *pdev) iavf_misc_irq_disable(adapter); /* Shut down all the garbage mashers on the detention level */ + netdev_unlock(netdev); cancel_work_sync(&adapter->reset_task); cancel_delayed_work_sync(&adapter->watchdog_task); cancel_work_sync(&adapter->adminq_task); + netdev_lock(netdev); adapter->aq_required = 0; adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; @@ -5727,8 +5638,6 @@ static void iavf_remove(struct pci_dev *pdev) /* destroy the locks only once, here */ mutex_destroy(&hw->aq.arq_mutex); mutex_destroy(&hw->aq.asq_mutex); - mutex_unlock(&adapter->crit_lock); - mutex_destroy(&adapter->crit_lock); netdev_unlock(netdev); iounmap(hw->hw_addr); From c29a9dc44cbd53156c85dce671b1ac8dabf9eb1b Mon Sep 17 00:00:00 2001 From: Vitaly Prosyak Date: Tue, 6 May 2025 16:45:33 -0400 Subject: [PATCH 2654/2924] drm/amdgpu/gfx10: Refine Cleaner Shader for GFX10.1.10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d26625d034fb8d596f0488472969493fa02d03f3 ] This patch updates the cleaner shader, which is responsible for initializing GPU resources such as Local Data Share (LDS), Vector General Purpose Registers (VGPRs), and Scalar General Purpose Registers (SGPRs). Changes include adjustments to register clearing and shader configuration. - Updated GPU resource initialization addresses in the cleaner shader from `be803080` to `be803000`. - Simplified the logic in the SGPR clearing section, ensuring all SGPRs are set to zero. Fixes: 25961bad9212 ("drm/amdgpu/gfx10: Add cleaner shader for GFX10.1.10") Cc: Christian König Cc: Alex Deucher Signed-off-by: Manu Rastogi Signed-off-by: Vitaly Prosyak Signed-off-by: Srinivasan Shanmugam Acked-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- .../gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h | 6 +++--- .../drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm | 13 ++++++------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h index 5255378af53c0a..f67569ccf9f609 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h @@ -43,9 +43,9 @@ static const u32 gfx_10_1_10_cleaner_shader_hex[] = { 0xd70f6a01, 0x000202ff, 0x00000400, 0x80828102, 0xbf84fff7, 0xbefc03ff, - 0x00000068, 0xbe803080, - 0xbe813080, 0xbe823080, - 0xbe833080, 0x80fc847c, + 0x00000068, 0xbe803000, + 0xbe813000, 0xbe823000, + 0xbe833000, 0x80fc847c, 0xbf84fffa, 0xbeea0480, 0xbeec0480, 0xbeee0480, 0xbef00480, 0xbef20480, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm index 9ba3359253c95d..54f7ed9e2801c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm @@ -40,7 +40,6 @@ shader main type(CS) wave_size(32) // Note: original source code from SQ team - // // Create 32 waves in a threadgroup (CS waves) // Each allocates 64 VGPRs @@ -71,8 +70,8 @@ label_0005: s_sub_u32 s2, s2, 8 s_cbranch_scc0 label_0005 // - s_mov_b32 s2, 0x80000000 // Bit31 is first_wave - s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set + s_mov_b32 s2, 0x80000000 // Bit31 is first_wave + s_and_b32 s2, s2, s1 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup // CLEAR LDS // @@ -99,10 +98,10 @@ label_001F: label_0023: s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance) label_sgpr_loop: - s_movreld_b32 s0, 0 - s_movreld_b32 s1, 0 - s_movreld_b32 s2, 0 - s_movreld_b32 s3, 0 + s_movreld_b32 s0, s0 + s_movreld_b32 s1, s0 + s_movreld_b32 s2, s0 + s_movreld_b32 s3, s0 s_sub_u32 m0, m0, 4 s_cbranch_scc0 label_sgpr_loop From c7d216830d49952fc849bd5363ae1b029c3a1e0e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 3 Jun 2025 18:19:27 +0200 Subject: [PATCH 2655/2924] PM: sleep: Fix power.is_suspended cleanup for direct-complete devices [ Upstream commit d46c4c839c20a599a0eb8d73708ce401f9c7d06d ] Commit 03f1444016b7 ("PM: sleep: Fix handling devices with direct_complete set on errors") caused power.is_suspended to be set for devices with power.direct_complete set, but it forgot to ensure the clearing of that flag for them in device_resume(), so power.is_suspended is still set for them during the next system suspend-resume cycle. If that cycle is aborted in dpm_suspend(), the subsequent invocation of dpm_resume() will trigger a device_resume() call for every device and because power.is_suspended is set for the devices in question, they will not be skipped by device_resume() as expected which causes scary error messages to be logged (as appropriate). To address this issue, move the clearing of power.is_suspended in device_resume() immediately after the power.is_suspended check so it will be always cleared for all devices processed by that function. Fixes: 03f1444016b7 ("PM: sleep: Fix handling devices with direct_complete set on errors") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4280 Reported-and-tested-by: Chris Bainbridge Signed-off-by: Rafael J. Wysocki Reviewed-by: Mario Limonciello Link: https://patch.msgid.link/4990586.GXAFRqVoOG@rjwysocki.net Signed-off-by: Sasha Levin --- drivers/base/power/main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index c8b0a9e29ed843..1926454c7a7e8c 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -941,6 +941,8 @@ static void device_resume(struct device *dev, pm_message_t state, bool async) if (!dev->power.is_suspended) goto Complete; + dev->power.is_suspended = false; + if (dev->power.direct_complete) { /* * Allow new children to be added under the device after this @@ -1003,7 +1005,6 @@ static void device_resume(struct device *dev, pm_message_t state, bool async) End: error = dpm_run_callback(callback, dev, state, info); - dev->power.is_suspended = false; device_unlock(dev); dpm_watchdog_clear(&wd); From 0400f95a5e1b99197bb3dcd18527184325751d0e Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Tue, 3 Jun 2025 12:47:51 -0600 Subject: [PATCH 2656/2924] block: flip iter directions in blk_rq_integrity_map_user() [ Upstream commit 43a67dd812c5d3de163c0b6971046b4a4b633d3f ] blk_rq_integrity_map_user() creates the ubuf iter with ITER_DEST for write-direction operations and ITER_SOURCE for read-direction ones. This is backwards; writes use the user buffer as a source for metadata and reads use it as a destination. Switch to the rq_data_dir() helper, which maps writes to ITER_SOURCE (WRITE) and reads to ITER_DEST(READ). Signed-off-by: Caleb Sander Mateos Fixes: fe8f4ca7107e ("block: modify bio_integrity_map_user to accept iov_iter as argument") Link: https://lore.kernel.org/r/20250603184752.1185676-1-csander@purestorage.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-integrity.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/block/blk-integrity.c b/block/blk-integrity.c index a1678f0a9f81f9..e4e2567061f9db 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -117,13 +117,8 @@ int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf, { int ret; struct iov_iter iter; - unsigned int direction; - if (op_is_write(req_op(rq))) - direction = ITER_DEST; - else - direction = ITER_SOURCE; - iov_iter_ubuf(&iter, direction, ubuf, bytes); + iov_iter_ubuf(&iter, rq_data_dir(rq), ubuf, bytes); ret = bio_integrity_map_user(rq->bio, &iter); if (ret) return ret; From 75141d56b6268fd5c801c0035a1d4adfd24cfe69 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 20 May 2025 13:20:37 -0700 Subject: [PATCH 2657/2924] nvme: fix command limits status code [ Upstream commit 10f4a7cd724e34b7a6ff96e57ac49dc0cadececc ] The command specific status code, 0x183, was introduced in the NVMe 2.0 specification defined to "Command Size Limits Exceeded" and only ever applied to DSM and Copy commands. Fix the name and, remove the incorrect translation to error codes and special treatment in the target code for it. Fixes: 3b7c33b28a44d4 ("nvme.h: add Write Zeroes definitions") Cc: Chaitanya Kulkarni Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/constants.c | 2 +- drivers/nvme/host/core.c | 1 - drivers/nvme/host/pr.c | 2 -- drivers/nvme/target/core.c | 9 +-------- drivers/nvme/target/io-cmd-bdev.c | 9 +-------- include/linux/nvme.h | 2 +- 6 files changed, 4 insertions(+), 21 deletions(-) diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c index 2b9e6cfaf2a80a..1a0058be582104 100644 --- a/drivers/nvme/host/constants.c +++ b/drivers/nvme/host/constants.c @@ -145,7 +145,7 @@ static const char * const nvme_statuses[] = { [NVME_SC_BAD_ATTRIBUTES] = "Conflicting Attributes", [NVME_SC_INVALID_PI] = "Invalid Protection Information", [NVME_SC_READ_ONLY] = "Attempted Write to Read Only Range", - [NVME_SC_ONCS_NOT_SUPPORTED] = "ONCS Not Supported", + [NVME_SC_CMD_SIZE_LIM_EXCEEDED ] = "Command Size Limits Exceeded", [NVME_SC_ZONE_BOUNDARY_ERROR] = "Zoned Boundary Error", [NVME_SC_ZONE_FULL] = "Zone Is Full", [NVME_SC_ZONE_READ_ONLY] = "Zone Is Read Only", diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6b04473c0ab73c..93a8119ad5ca66 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -286,7 +286,6 @@ static blk_status_t nvme_error_status(u16 status) case NVME_SC_NS_NOT_READY: return BLK_STS_TARGET; case NVME_SC_BAD_ATTRIBUTES: - case NVME_SC_ONCS_NOT_SUPPORTED: case NVME_SC_INVALID_OPCODE: case NVME_SC_INVALID_FIELD: case NVME_SC_INVALID_NS: diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c index cf2d2c5039ddbf..ca6a74607b1397 100644 --- a/drivers/nvme/host/pr.c +++ b/drivers/nvme/host/pr.c @@ -82,8 +82,6 @@ static int nvme_status_to_pr_err(int status) return PR_STS_SUCCESS; case NVME_SC_RESERVATION_CONFLICT: return PR_STS_RESERVATION_CONFLICT; - case NVME_SC_ONCS_NOT_SUPPORTED: - return -EOPNOTSUPP; case NVME_SC_BAD_ATTRIBUTES: case NVME_SC_INVALID_OPCODE: case NVME_SC_INVALID_FIELD: diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 245475c43127fb..69b1ddff6731fc 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -62,14 +62,7 @@ inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno) return NVME_SC_LBA_RANGE | NVME_STATUS_DNR; case -EOPNOTSUPP: req->error_loc = offsetof(struct nvme_common_command, opcode); - switch (req->cmd->common.opcode) { - case nvme_cmd_dsm: - case nvme_cmd_write_zeroes: - return NVME_SC_ONCS_NOT_SUPPORTED | NVME_STATUS_DNR; - default: - return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; - } - break; + return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; case -ENODATA: req->error_loc = offsetof(struct nvme_rw_command, nsid); return NVME_SC_ACCESS_DENIED; diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 83be0657e6df4e..1cfa13d029bfa2 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -145,15 +145,8 @@ u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts) req->error_loc = offsetof(struct nvme_rw_command, slba); break; case BLK_STS_NOTSUPP: + status = NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; req->error_loc = offsetof(struct nvme_common_command, opcode); - switch (req->cmd->common.opcode) { - case nvme_cmd_dsm: - case nvme_cmd_write_zeroes: - status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_STATUS_DNR; - break; - default: - status = NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; - } break; case BLK_STS_MEDIUM: status = NVME_SC_ACCESS_DENIED; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 2479ed10f53e37..5d7afb6079f1d8 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -2094,7 +2094,7 @@ enum { NVME_SC_BAD_ATTRIBUTES = 0x180, NVME_SC_INVALID_PI = 0x181, NVME_SC_READ_ONLY = 0x182, - NVME_SC_ONCS_NOT_SUPPORTED = 0x183, + NVME_SC_CMD_SIZE_LIM_EXCEEDED = 0x183, /* * I/O Command Set Specific - Fabrics commands: From c3dec8c01341e8013da2db66176cd72c9e90190e Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 20 May 2025 16:14:49 +0100 Subject: [PATCH 2658/2924] nvme: fix implicit bool to flags conversion [ Upstream commit c4b680ac2863821e19d360fca62f78b68b1c8ece ] nvme_map_user_request() takes flags as the last argument, but nvme_uring_cmd_io() shoves a bool "vec" into it. It behaves as expected because bool is converted to 0/1 and NVME_IOCTL_VEC is defined as 1, but it's better to pass flags explicitly. Fixes: 7b7fdb8e2dbc1 ("nvme: replace the "bool vec" arguments with flags in the ioctl path") Signed-off-by: Pavel Begunkov Reviewed-by: Jens Axboe Reviewed-by: Keith Busch Reviewed-by: Anuj Gupta Reviewed-by: Kanchan Joshi Reviewed-by: Chaitanya Kulkarni Reviewed-by: Caleb Sander Mateos Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index ca86d3bf7ea49d..f29107d95ff26d 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -521,7 +521,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, if (d.data_len) { ret = nvme_map_user_request(req, d.addr, d.data_len, nvme_to_user_ptr(d.metadata), d.metadata_len, - map_iter, vec); + map_iter, vec ? NVME_IOCTL_VEC : 0); if (ret) goto out_free_req; } From c741a7ef68023ac800054e2131c3e22e647fd7e3 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Mon, 2 Jun 2025 03:34:29 -0700 Subject: [PATCH 2659/2924] gve: add missing NULL check for gve_alloc_pending_packet() in TX DQO [ Upstream commit 12c331b29c7397ac3b03584e12902990693bc248 ] gve_alloc_pending_packet() can return NULL, but gve_tx_add_skb_dqo() did not check for this case before dereferencing the returned pointer. Add a missing NULL check to prevent a potential NULL pointer dereference when allocation fails. This improves robustness in low-memory scenarios. Fixes: a57e5de476be ("gve: DQO: Add TX path") Signed-off-by: Alok Tiwari Reviewed-by: Mina Almasry Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/google/gve/gve_tx_dqo.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index 2eba868d80370a..f7da7de23d6726 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -763,6 +763,9 @@ static int gve_tx_add_skb_dqo(struct gve_tx_ring *tx, s16 completion_tag; pkt = gve_alloc_pending_packet(tx); + if (!pkt) + return -ENOMEM; + pkt->skb = skb; completion_tag = pkt - tx->dqo.pending_packets; From 604981bbaba174d3922a2f80b31026fc15df7f51 Mon Sep 17 00:00:00 2001 From: Dibin Moolakadan Subrahmanian Date: Wed, 28 May 2025 12:15:56 +0530 Subject: [PATCH 2660/2924] drm/i915/display: Fix u32 overflow in SNPS PHY HDMI PLL setup [ Upstream commit 791d76005de0ab556b590473eb4cbfede727fce0 ] When configuring the HDMI PLL, calculations use DIV_ROUND_UP_ULL and DIV_ROUND_DOWN_ULL macros, which internally rely on do_div. However, do_div expects a 32-bit (u32) divisor, and at higher data rates, the divisor can exceed this limit. This leads to incorrect division results and ultimately misconfigured PLL values. This fix replaces do_div calls with div64_base64 calls where diviser can exceed u32 limit. Fixes: 5947642004bf ("drm/i915/display: Add support for SNPS PHY HDMI PLL algorithm for DG2") Cc: Ankit Nautiyal Cc: Suraj Kandpal Cc: Jani Nikula Signed-off-by: Dibin Moolakadan Subrahmanian Reviewed-by: Ankit Nautiyal Signed-off-by: Ankit Nautiyal Link: https://lore.kernel.org/r/20250528064557.4172149-1-dibin.moolakadan.subrahmanian@intel.com (cherry picked from commit ce924116e43ffbfa544d82976c4b9d11bcde9334) Signed-off-by: Joonas Lahtinen Signed-off-by: Sasha Levin --- .../gpu/drm/i915/display/intel_snps_hdmi_pll.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c b/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c index c6321dafef4f3c..74bb3bedf30f5d 100644 --- a/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c +++ b/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c @@ -41,12 +41,12 @@ static s64 interp(s64 x, s64 x1, s64 x2, s64 y1, s64 y2) { s64 dydx; - dydx = DIV_ROUND_UP_ULL((y2 - y1) * 100000, (x2 - x1)); + dydx = DIV64_U64_ROUND_UP((y2 - y1) * 100000, (x2 - x1)); - return (y1 + DIV_ROUND_UP_ULL(dydx * (x - x1), 100000)); + return (y1 + DIV64_U64_ROUND_UP(dydx * (x - x1), 100000)); } -static void get_ana_cp_int_prop(u32 vco_clk, +static void get_ana_cp_int_prop(u64 vco_clk, u32 refclk_postscalar, int mpll_ana_v2i, int c, int a, @@ -115,16 +115,16 @@ static void get_ana_cp_int_prop(u32 vco_clk, CURVE0_MULTIPLIER)); scaled_interpolated_sqrt = - int_sqrt(DIV_ROUND_UP_ULL(interpolated_product, vco_div_refclk_float) * + int_sqrt(DIV64_U64_ROUND_UP(interpolated_product, vco_div_refclk_float) * DIV_ROUND_DOWN_ULL(1000000000000ULL, 55)); /* Scale vco_div_refclk for ana_cp_int */ scaled_vco_div_refclk2 = DIV_ROUND_UP_ULL(vco_div_refclk_float, 1000000); - adjusted_vco_clk2 = 1460281 * DIV_ROUND_UP_ULL(scaled_interpolated_sqrt * + adjusted_vco_clk2 = 1460281 * DIV64_U64_ROUND_UP(scaled_interpolated_sqrt * scaled_vco_div_refclk2, curve_1_interpolated); - *ana_cp_prop = DIV_ROUND_UP_ULL(adjusted_vco_clk2, curve_2_scaled2); + *ana_cp_prop = DIV64_U64_ROUND_UP(adjusted_vco_clk2, curve_2_scaled2); *ana_cp_prop = max(1, min(*ana_cp_prop, 127)); } @@ -165,10 +165,10 @@ static void compute_hdmi_tmds_pll(u64 pixel_clock, u32 refclk, /* Select appropriate v2i point */ if (datarate <= INTEL_SNPS_PHY_HDMI_9999MHZ) { mpll_ana_v2i = 2; - tx_clk_div = ilog2(DIV_ROUND_DOWN_ULL(INTEL_SNPS_PHY_HDMI_9999MHZ, datarate)); + tx_clk_div = ilog2(div64_u64(INTEL_SNPS_PHY_HDMI_9999MHZ, datarate)); } else { mpll_ana_v2i = 3; - tx_clk_div = ilog2(DIV_ROUND_DOWN_ULL(INTEL_SNPS_PHY_HDMI_16GHZ, datarate)); + tx_clk_div = ilog2(div64_u64(INTEL_SNPS_PHY_HDMI_16GHZ, datarate)); } vco_clk = (datarate << tx_clk_div) >> 1; From a26ec8e16958b6dd37dac9daf5fb6978fe0cb0b8 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Wed, 4 Jun 2025 06:13:19 +0300 Subject: [PATCH 2661/2924] wifi: iwlwifi: mld: avoid panic on init failure [ Upstream commit 960c7e6d388034d219dafffa6da0a5c2ccd5ff30 ] In case of an error during init, in_hw_restart will be set, but it will never get cleared. Instead, we will retry to init again, and then we will act like we are in a restart when we are actually not. This causes (among others) to a NULL pointer dereference when canceling rx_omi::finished_work, that was not even initialized, because we thought that we are in hw_restart. Set in_hw_restart to true only if the fw is running, then we know that FW was loaded successfully and we are not going to the retry loop. Fixes: 7391b2a4f7db ("wifi: iwlwifi: rework firmware error handling") Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250604061200.e0040e0a4b09.Iae469a0abe6bfa3c26d8a88c066bad75c2e8f121@changeid Signed-off-by: Miri Korenblit Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mld/mld.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mld.c b/drivers/net/wireless/intel/iwlwifi/mld/mld.c index 73d2166a4c2570..7a098942dc8021 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mld.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mld.c @@ -638,7 +638,8 @@ iwl_mld_nic_error(struct iwl_op_mode *op_mode, * It might not actually be true that we'll restart, but the * setting doesn't matter if we're going to be unbound either. */ - if (type != IWL_ERR_TYPE_RESET_HS_TIMEOUT) + if (type != IWL_ERR_TYPE_RESET_HS_TIMEOUT && + mld->fw_status.running) mld->fw_status.in_hw_restart = true; } From ab1e799b1b42f530db6266d08a27cbc90eafc45e Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Tue, 20 May 2025 09:41:10 +0200 Subject: [PATCH 2662/2924] drm/panel-simple: fix the warnings for the Evervision VGG644804 [ Upstream commit 5dc1ea903588a73fb03b3a3e5a041a7c63a4bccd ] The panel lacked the connector type which causes a warning. Adding the connector type reveals wrong bus_flags and bits per pixel. Fix all of it. Fixes: 1319f2178bdf ("drm/panel-simple: add Evervision VGG644804 panel entry") Signed-off-by: Michael Walle Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20250520074110.655114-1-mwalle@kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-simple.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 33a37539de574e..3aaac96c0bfbf5 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -2199,13 +2199,14 @@ static const struct display_timing evervision_vgg644804_timing = { static const struct panel_desc evervision_vgg644804 = { .timings = &evervision_vgg644804_timing, .num_timings = 1, - .bpc = 8, + .bpc = 6, .size = { .width = 115, .height = 86, }, .bus_format = MEDIA_BUS_FMT_RGB666_1X7X3_SPWG, - .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_SAMPLE_NEGEDGE, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, + .connector_type = DRM_MODE_CONNECTOR_LVDS, }; static const struct display_timing evervision_vgg804821_timing = { From 8068e1e42b46518ce680dc6470bcd710efc3fa0a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 23 May 2025 14:20:44 +0200 Subject: [PATCH 2663/2924] netfilter: nf_set_pipapo_avx2: fix initial map fill [ Upstream commit ea77c397bff8b6d59f6d83dae1425b08f465e8b5 ] If the first field doesn't cover the entire start map, then we must zero out the remainder, else we leak those bits into the next match round map. The early fix was incomplete and did only fix up the generic C implementation. A followup patch adds a test case to nft_concat_range.sh. Fixes: 791a615b7ad2 ("netfilter: nf_set_pipapo: fix initial map fill") Signed-off-by: Florian Westphal Reviewed-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_set_pipapo_avx2.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index c15db28c5ebc43..be7c16c79f711e 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -1113,6 +1113,25 @@ bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features, return true; } +/** + * pipapo_resmap_init_avx2() - Initialise result map before first use + * @m: Matching data, including mapping table + * @res_map: Result map + * + * Like pipapo_resmap_init() but do not set start map bits covered by the first field. + */ +static inline void pipapo_resmap_init_avx2(const struct nft_pipapo_match *m, unsigned long *res_map) +{ + const struct nft_pipapo_field *f = m->f; + int i; + + /* Starting map doesn't need to be set to all-ones for this implementation, + * but we do need to zero the remaining bits, if any. + */ + for (i = f->bsize; i < m->bsize_max; i++) + res_map[i] = 0ul; +} + /** * nft_pipapo_avx2_lookup() - Lookup function for AVX2 implementation * @net: Network namespace @@ -1171,7 +1190,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, res = scratch->map + (map_index ? m->bsize_max : 0); fill = scratch->map + (map_index ? 0 : m->bsize_max); - /* Starting map doesn't need to be set for this implementation */ + pipapo_resmap_init_avx2(m, res); nft_pipapo_avx2_prepare(); From 33d06d3a741fed698742b9f333ea323e9a8a89d8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 30 May 2025 12:34:02 +0200 Subject: [PATCH 2664/2924] netfilter: nf_nat: also check reverse tuple to obtain clashing entry [ Upstream commit 50d9ce9679dd50df2dc51ada717fa875bc248fad ] The logic added in the blamed commit was supposed to only omit nat source port allocation if neither the existing nor the new entry are subject to NAT. However, its not enough to lookup the conntrack based on the proposed tuple, we must also check the reverse direction. Otherwise there are esoteric cases where the collision is in the reverse direction because that colliding connection has a port rewrite, but the new entry doesn't. In this case, we only check the new entry and then erronously conclude that no clash exists anymore. The existing (udp) tuple is: a:p -> b:P, with nat translation to s:P, i.e. pure daddr rewrite, reverse tuple in conntrack table is s:P -> a:p. When another UDP packet is sent directly to s, i.e. a:p->s:P, this is correctly detected as a colliding entry: tuple is taken by existing reply tuple in reverse direction. But the colliding conntrack is only searched for with unreversed direction, and we can't find such entry matching a:p->s:P. The incorrect conclusion is that the clashing entry has timed out and that no port address translation is required. Such conntrack will then be discarded at nf_confirm time because the proposed reverse direction clashes with an existing mapping in the conntrack table. Search for the reverse tuple too, this will then check the NAT bits of the colliding entry and triggers port reallocation. Followp patch extends nft_nat.sh selftest to cover this scenario. The IPS_SEQ_ADJUST change is also a bug fix: Instead of checking for SEQ_ADJ this tested for SEEN_REPLY and ASSURED by accident -- _BIT is only for use with the test_bit() API. This bug has little consequence in practice, because the sequence number adjustments are only useful for TCP which doesn't support clash resolution. The existing test case (conntrack_reverse_clash.sh) exercise a race condition path (parallel conntrack creation on different CPUs), so the colliding entries have neither SEEN_REPLY nor ASSURED set. Thanks to Yafang Shao and Shaun Brady for an initial investigation of this bug. Fixes: d8f84a9bc7c4 ("netfilter: nf_nat: don't try nat source port reallocation for reverse dir clash") Closes: https://bugzilla.netfilter.org/show_bug.cgi?id=1795 Reported-by: Yafang Shao Reported-by: Shaun Brady Signed-off-by: Florian Westphal Tested-by: Yafang Shao Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_nat_core.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index aad84aabd7f1d1..f391cd267922b3 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -248,7 +248,7 @@ static noinline bool nf_nat_used_tuple_new(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_ct) { - static const unsigned long uses_nat = IPS_NAT_MASK | IPS_SEQ_ADJUST_BIT; + static const unsigned long uses_nat = IPS_NAT_MASK | IPS_SEQ_ADJUST; const struct nf_conntrack_tuple_hash *thash; const struct nf_conntrack_zone *zone; struct nf_conn *ct; @@ -287,8 +287,14 @@ nf_nat_used_tuple_new(const struct nf_conntrack_tuple *tuple, zone = nf_ct_zone(ignored_ct); thash = nf_conntrack_find_get(net, zone, tuple); - if (unlikely(!thash)) /* clashing entry went away */ - return false; + if (unlikely(!thash)) { + struct nf_conntrack_tuple reply; + + nf_ct_invert_tuple(&reply, tuple); + thash = nf_conntrack_find_get(net, zone, &reply); + if (!thash) /* clashing entry went away */ + return false; + } ct = nf_ct_tuplehash_to_ctrack(thash); From e75920b2374a58ca888750ce83b2c55b12538d27 Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 3 Jun 2025 10:59:04 +0530 Subject: [PATCH 2665/2924] net: ti: icssg-prueth: Fix swapped TX stats for MII interfaces. [ Upstream commit 919d763d609428c2680ec8159257d9655f002f89 ] In MII mode, Tx lines are swapped for port0 and port1, which means Tx port0 receives data from PRU1 and the Tx port1 receives data from PRU0. This is an expected hardware behavior and reading the Tx stats needs to be handled accordingly in the driver. Update the driver to read Tx stats from the PRU1 for port0 and PRU0 for port1. Fixes: c1e10d5dc7a1 ("net: ti: icssg-prueth: Add ICSSG Stats") Signed-off-by: Meghana Malladi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250603052904.431203-1-m-malladi@ti.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/ti/icssg/icssg_stats.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/ti/icssg/icssg_stats.c b/drivers/net/ethernet/ti/icssg/icssg_stats.c index 6f0edae38ea242..172ae38381b453 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_stats.c +++ b/drivers/net/ethernet/ti/icssg/icssg_stats.c @@ -29,6 +29,14 @@ void emac_update_hardware_stats(struct prueth_emac *emac) spin_lock(&prueth->stats_lock); for (i = 0; i < ARRAY_SIZE(icssg_all_miig_stats); i++) { + /* In MII mode TX lines are swapped inside ICSSG, so read Tx stats + * from slice1 for port0 and slice0 for port1 to get accurate Tx + * stats for a given port + */ + if (emac->phy_if == PHY_INTERFACE_MODE_MII && + icssg_all_miig_stats[i].offset >= ICSSG_TX_PACKET_OFFSET && + icssg_all_miig_stats[i].offset <= ICSSG_TX_BYTE_OFFSET) + base = stats_base[slice ^ 1]; regmap_read(prueth->miig_rt, base + icssg_all_miig_stats[i].offset, &val); From 2dbccf1eb8c04b84ee3afdb1d6b787db02e7befc Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 2 Jun 2025 21:39:49 +0200 Subject: [PATCH 2666/2924] net: dsa: b53: do not enable EEE on bcm63xx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1237c2d4a8db79dfd4369bff6930b0e385ed7d5c ] BCM63xx internal switches do not support EEE, but provide multiple RGMII ports where external PHYs may be connected. If one of these PHYs are EEE capable, we may try to enable EEE for the MACs, which then hangs the system on access of the (non-existent) EEE registers. Fix this by checking if the switch actually supports EEE before attempting to configure it. Fixes: 22256b0afb12 ("net: dsa: b53: Move EEE functions to b53") Reviewed-by: Florian Fainelli Tested-by: Álvaro Fernández Rojas Signed-off-by: Jonas Gorski Link: https://patch.msgid.link/20250602193953.1010487-2-jonas.gorski@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/dsa/b53/b53_common.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 7216eb8f949367..a316f8c01d0a99 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2348,6 +2348,9 @@ int b53_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy) { int ret; + if (!b53_support_eee(ds, port)) + return 0; + ret = phy_init_eee(phy, false); if (ret) return 0; @@ -2362,7 +2365,7 @@ bool b53_support_eee(struct dsa_switch *ds, int port) { struct b53_device *dev = ds->priv; - return !is5325(dev) && !is5365(dev); + return !is5325(dev) && !is5365(dev) && !is63xx(dev); } EXPORT_SYMBOL(b53_support_eee); From 2953f17ad12c4ae1f7cf50948c3e3aaf386276c3 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 2 Jun 2025 21:39:50 +0200 Subject: [PATCH 2667/2924] net: dsa: b53: do not enable RGMII delay on bcm63xx [ Upstream commit 4af523551d876ab8b8057d1e5303a860fd736fcb ] bcm63xx's RGMII ports are always in MAC mode, never in PHY mode, so we shouldn't enable any delays and let the PHY handle any delays as necessary. This fixes using RGMII ports with normal PHYs like BCM54612E, which will handle the delay in the PHY. Fixes: ce3bf94871f7 ("net: dsa: b53: add support for BCM63xx RGMIIs") Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250602193953.1010487-3-jonas.gorski@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/dsa/b53/b53_common.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index a316f8c01d0a99..ba70fbcc0f8bc6 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1325,24 +1325,7 @@ static void b53_adjust_63xx_rgmii(struct dsa_switch *ds, int port, off = B53_RGMII_CTRL_P(port); b53_read8(dev, B53_CTRL_PAGE, off, &rgmii_ctrl); - - switch (interface) { - case PHY_INTERFACE_MODE_RGMII_ID: - rgmii_ctrl |= (RGMII_CTRL_DLL_RXC | RGMII_CTRL_DLL_TXC); - break; - case PHY_INTERFACE_MODE_RGMII_RXID: - rgmii_ctrl &= ~(RGMII_CTRL_DLL_TXC); - rgmii_ctrl |= RGMII_CTRL_DLL_RXC; - break; - case PHY_INTERFACE_MODE_RGMII_TXID: - rgmii_ctrl &= ~(RGMII_CTRL_DLL_RXC); - rgmii_ctrl |= RGMII_CTRL_DLL_TXC; - break; - case PHY_INTERFACE_MODE_RGMII: - default: - rgmii_ctrl &= ~(RGMII_CTRL_DLL_RXC | RGMII_CTRL_DLL_TXC); - break; - } + rgmii_ctrl &= ~(RGMII_CTRL_DLL_RXC | RGMII_CTRL_DLL_TXC); if (port != dev->imp_port) { if (is63268(dev)) From bda4c94206cdd8cc327d499e857362bcbb86c0ce Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sat, 10 May 2025 11:22:11 +0200 Subject: [PATCH 2668/2924] net: dsa: b53: implement setting ageing time [ Upstream commit e39d14a760c039af0653e3df967e7525413924a0 ] b53 supported switches support configuring ageing time between 1 and 1,048,575 seconds, so add an appropriate setter. This allows b53 to pass the FDB learning test for both vlan aware and vlan unaware bridges. Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250510092211.276541-1-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski Stable-dep-of: 75f4f7b2b130 ("net: dsa: b53: do not configure bcm63xx's IMP port interface") Signed-off-by: Sasha Levin --- drivers/net/dsa/b53/b53_common.c | 28 ++++++++++++++++++++++++++++ drivers/net/dsa/b53/b53_priv.h | 1 + drivers/net/dsa/b53/b53_regs.h | 7 +++++++ drivers/net/dsa/bcm_sf2.c | 1 + 4 files changed, 37 insertions(+) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index ba70fbcc0f8bc6..c186ee3fb28df2 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -1202,6 +1203,10 @@ static int b53_setup(struct dsa_switch *ds) */ ds->untag_vlan_aware_bridge_pvid = true; + /* Ageing time is set in seconds */ + ds->ageing_time_min = 1 * 1000; + ds->ageing_time_max = AGE_TIME_MAX * 1000; + ret = b53_reset_switch(dev); if (ret) { dev_err(ds->dev, "failed to reset switch\n"); @@ -2392,6 +2397,28 @@ static int b53_get_max_mtu(struct dsa_switch *ds, int port) return B53_MAX_MTU; } +int b53_set_ageing_time(struct dsa_switch *ds, unsigned int msecs) +{ + struct b53_device *dev = ds->priv; + u32 atc; + int reg; + + if (is63xx(dev)) + reg = B53_AGING_TIME_CONTROL_63XX; + else + reg = B53_AGING_TIME_CONTROL; + + atc = DIV_ROUND_CLOSEST(msecs, 1000); + + if (!is5325(dev) && !is5365(dev)) + atc |= AGE_CHANGE; + + b53_write32(dev, B53_MGMT_PAGE, reg, atc); + + return 0; +} +EXPORT_SYMBOL_GPL(b53_set_ageing_time); + static const struct phylink_mac_ops b53_phylink_mac_ops = { .mac_select_pcs = b53_phylink_mac_select_pcs, .mac_config = b53_phylink_mac_config, @@ -2415,6 +2442,7 @@ static const struct dsa_switch_ops b53_switch_ops = { .port_disable = b53_disable_port, .support_eee = b53_support_eee, .set_mac_eee = b53_set_mac_eee, + .set_ageing_time = b53_set_ageing_time, .port_bridge_join = b53_br_join, .port_bridge_leave = b53_br_leave, .port_pre_bridge_flags = b53_br_flags_pre, diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 2cf3e6a81e3785..a5ef7071ba07b1 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -343,6 +343,7 @@ void b53_get_strings(struct dsa_switch *ds, int port, u32 stringset, void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data); int b53_get_sset_count(struct dsa_switch *ds, int port, int sset); void b53_get_ethtool_phy_stats(struct dsa_switch *ds, int port, uint64_t *data); +int b53_set_ageing_time(struct dsa_switch *ds, unsigned int msecs); int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, bool *tx_fwd_offload, struct netlink_ext_ack *extack); void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge); diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index 5f7a0e5c5709d3..1fbc5a204bc721 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -220,6 +220,13 @@ #define BRCM_HDR_P5_EN BIT(1) /* Enable tagging on port 5 */ #define BRCM_HDR_P7_EN BIT(2) /* Enable tagging on port 7 */ +/* Aging Time control register (32 bit) */ +#define B53_AGING_TIME_CONTROL 0x06 +#define B53_AGING_TIME_CONTROL_63XX 0x08 +#define AGE_CHANGE BIT(20) +#define AGE_TIME_MASK 0x7ffff +#define AGE_TIME_MAX 1048575 + /* Mirror capture control register (16 bit) */ #define B53_MIR_CAP_CTL 0x10 #define CAP_PORT_MASK 0xf diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 454a8c7fd7eea5..960685596093b6 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1235,6 +1235,7 @@ static const struct dsa_switch_ops bcm_sf2_ops = { .port_disable = bcm_sf2_port_disable, .support_eee = b53_support_eee, .set_mac_eee = b53_set_mac_eee, + .set_ageing_time = b53_set_ageing_time, .port_bridge_join = b53_br_join, .port_bridge_leave = b53_br_leave, .port_pre_bridge_flags = b53_br_flags_pre, From 87ae93e421fd488bcc2b7dde395c1ddaaeb48985 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 2 Jun 2025 21:39:51 +0200 Subject: [PATCH 2669/2924] net: dsa: b53: do not configure bcm63xx's IMP port interface [ Upstream commit 75f4f7b2b13008803f84768ff90396f9d7553221 ] The IMP port is not a valid RGMII interface, but hard wired to internal, so we shouldn't touch the undefined register B53_RGMII_CTRL_IMP. While this does not seem to have any side effects, let's not touch it at all, so limit RGMII configuration on bcm63xx to the actual RGMII ports. Fixes: ce3bf94871f7 ("net: dsa: b53: add support for BCM63xx RGMIIs") Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250602193953.1010487-4-jonas.gorski@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/dsa/b53/b53_common.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index c186ee3fb28df2..3f4934f974c81b 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -1322,24 +1323,17 @@ static void b53_adjust_63xx_rgmii(struct dsa_switch *ds, int port, phy_interface_t interface) { struct b53_device *dev = ds->priv; - u8 rgmii_ctrl = 0, off; + u8 rgmii_ctrl = 0; - if (port == dev->imp_port) - off = B53_RGMII_CTRL_IMP; - else - off = B53_RGMII_CTRL_P(port); - - b53_read8(dev, B53_CTRL_PAGE, off, &rgmii_ctrl); + b53_read8(dev, B53_CTRL_PAGE, B53_RGMII_CTRL_P(port), &rgmii_ctrl); rgmii_ctrl &= ~(RGMII_CTRL_DLL_RXC | RGMII_CTRL_DLL_TXC); - if (port != dev->imp_port) { - if (is63268(dev)) - rgmii_ctrl |= RGMII_CTRL_MII_OVERRIDE; + if (is63268(dev)) + rgmii_ctrl |= RGMII_CTRL_MII_OVERRIDE; - rgmii_ctrl |= RGMII_CTRL_ENABLE_GMII; - } + rgmii_ctrl |= RGMII_CTRL_ENABLE_GMII; - b53_write8(dev, B53_CTRL_PAGE, off, rgmii_ctrl); + b53_write8(dev, B53_CTRL_PAGE, B53_RGMII_CTRL_P(port), rgmii_ctrl); dev_dbg(ds->dev, "Configured port %d for %s\n", port, phy_modes(interface)); @@ -1484,7 +1478,7 @@ static void b53_phylink_mac_config(struct phylink_config *config, struct b53_device *dev = ds->priv; int port = dp->index; - if (is63xx(dev) && port >= B53_63XX_RGMII0) + if (is63xx(dev) && in_range(port, B53_63XX_RGMII0, 4)) b53_adjust_63xx_rgmii(ds, port, interface); if (mode == MLO_AN_FIXED) { From d4280f0853d93e1dc095a16071d17e29e23805eb Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 2 Jun 2025 21:39:52 +0200 Subject: [PATCH 2670/2924] net: dsa: b53: allow RGMII for bcm63xx RGMII ports [ Upstream commit 5ea0d42c1980e6d10e5cb56a78021db5bfcebaaf ] Add RGMII to supported interfaces for BCM63xx RGMII ports so they can be actually used in RGMII mode. Without this, phylink will fail to configure them: [ 3.580000] b53-switch 10700000.switch GbE3 (uninitialized): validation of rgmii with support 0000000,00000000,00000000,000062ff and advertisement 0000000,00000000,00000000,000062ff failed: -EINVAL [ 3.600000] b53-switch 10700000.switch GbE3 (uninitialized): failed to connect to PHY: -EINVAL [ 3.610000] b53-switch 10700000.switch GbE3 (uninitialized): error -22 setting up PHY for tree 0, switch 0, port 4 Fixes: ce3bf94871f7 ("net: dsa: b53: add support for BCM63xx RGMIIs") Reviewed-by: Florian Fainelli Signed-off-by: Jonas Gorski Link: https://patch.msgid.link/20250602193953.1010487-5-jonas.gorski@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/dsa/b53/b53_common.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 3f4934f974c81b..be4493b769f441 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1439,6 +1439,10 @@ static void b53_phylink_get_caps(struct dsa_switch *ds, int port, __set_bit(PHY_INTERFACE_MODE_MII, config->supported_interfaces); __set_bit(PHY_INTERFACE_MODE_REVMII, config->supported_interfaces); + /* BCM63xx RGMII ports support RGMII */ + if (is63xx(dev) && in_range(port, B53_63XX_RGMII0, 4)) + phy_interface_set_rgmii(config->supported_interfaces); + config->mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | MAC_10 | MAC_100; From 290a9bcfb434b108aad6786fe9105bbe2263c870 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 2 Jun 2025 21:39:53 +0200 Subject: [PATCH 2671/2924] net: dsa: b53: do not touch DLL_IQQD on bcm53115 [ Upstream commit bc1a65eb81a21e2aa3c3dca058ee8adf687b6ef5 ] According to OpenMDK, bit 2 of the RGMII register has a different meaning for BCM53115 [1]: "DLL_IQQD 1: In the IDDQ mode, power is down0: Normal function mode" Configuring RGMII delay works without setting this bit, so let's keep it at the default. For other chips, we always set it, so not clearing it is not an issue. One would assume BCM53118 works the same, but OpenMDK is not quite sure what this bit actually means [2]: "BYPASS_IMP_2NS_DEL #1: In the IDDQ mode, power is down#0: Normal function mode1: Bypass dll65_2ns_del IP0: Use dll65_2ns_del IP" So lets keep setting it for now. [1] https://github.com/Broadcom-Network-Switching-Software/OpenMDK/blob/master/cdk/PKG/chip/bcm53115/bcm53115_a0_defs.h#L19871 [2] https://github.com/Broadcom-Network-Switching-Software/OpenMDK/blob/master/cdk/PKG/chip/bcm53118/bcm53118_a0_defs.h#L14392 Fixes: 967dd82ffc52 ("net: dsa: b53: Add support for Broadcom RoboSwitch") Signed-off-by: Jonas Gorski Link: https://patch.msgid.link/20250602193953.1010487-6-jonas.gorski@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/dsa/b53/b53_common.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index be4493b769f441..862bdccb743976 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1354,8 +1354,7 @@ static void b53_adjust_531x5_rgmii(struct dsa_switch *ds, int port, * tx_clk aligned timing (restoring to reset defaults) */ b53_read8(dev, B53_CTRL_PAGE, off, &rgmii_ctrl); - rgmii_ctrl &= ~(RGMII_CTRL_DLL_RXC | RGMII_CTRL_DLL_TXC | - RGMII_CTRL_TIMING_SEL); + rgmii_ctrl &= ~(RGMII_CTRL_DLL_RXC | RGMII_CTRL_DLL_TXC); /* PHY_INTERFACE_MODE_RGMII_TXID means TX internal delay, make * sure that we enable the port TX clock internal delay to @@ -1375,7 +1374,10 @@ static void b53_adjust_531x5_rgmii(struct dsa_switch *ds, int port, rgmii_ctrl |= RGMII_CTRL_DLL_TXC; if (interface == PHY_INTERFACE_MODE_RGMII) rgmii_ctrl |= RGMII_CTRL_DLL_TXC | RGMII_CTRL_DLL_RXC; - rgmii_ctrl |= RGMII_CTRL_TIMING_SEL; + + if (dev->chip_id != BCM53115_DEVICE_ID) + rgmii_ctrl |= RGMII_CTRL_TIMING_SEL; + b53_write8(dev, B53_CTRL_PAGE, off, rgmii_ctrl); dev_info(ds->dev, "Configured port %d for %s\n", port, From 8e15454d58e7ff87da65f45c6f0a584e61eaff17 Mon Sep 17 00:00:00 2001 From: Lachlan Hodges Date: Tue, 3 Jun 2025 15:35:38 +1000 Subject: [PATCH 2672/2924] wifi: cfg80211/mac80211: correctly parse S1G beacon optional elements [ Upstream commit 1e1f706fc2ce90eaaf3480b3d5f27885960d751c ] S1G beacons are not traditional beacons but a type of extension frame. Extension frames contain the frame control and duration fields, followed by zero or more optional fields before the frame body. These optional fields are distinct from the variable length elements. The presence of optional fields is indicated in the frame control field. To correctly locate the elements offset, the frame control must be parsed to identify which optional fields are present. Currently, mac80211 parses S1G beacons based on fixed assumptions about the frame layout, without inspecting the frame control field. This can result in incorrect offsets to the "variable" portion of the frame. Properly parse S1G beacon frames by using the field lengths defined in IEEE 802.11-2024, section 9.3.4.3, ensuring that the elements offset is calculated accurately. Fixes: 9eaffe5078ca ("cfg80211: convert S1G beacon to scan results") Fixes: cd418ba63f0c ("mac80211: convert S1G beacon to scan results") Signed-off-by: Lachlan Hodges Link: https://patch.msgid.link/20250603053538.468562-1-lachlan.hodges@morsemicro.com Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- include/linux/ieee80211.h | 79 ++++++++++++++++++++++++++++++++++----- net/mac80211/mlme.c | 7 +--- net/mac80211/scan.c | 11 +++--- net/wireless/scan.c | 18 ++++----- 4 files changed, 83 insertions(+), 32 deletions(-) diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 457b4fba88bd00..7edc3fb0641cba 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -111,6 +111,8 @@ /* bits unique to S1G beacon */ #define IEEE80211_S1G_BCN_NEXT_TBTT 0x100 +#define IEEE80211_S1G_BCN_CSSID 0x200 +#define IEEE80211_S1G_BCN_ANO 0x400 /* see 802.11ah-2016 9.9 NDP CMAC frames */ #define IEEE80211_S1G_1MHZ_NDP_BITS 25 @@ -153,9 +155,6 @@ #define IEEE80211_ANO_NETTYPE_WILD 15 -/* bits unique to S1G beacon */ -#define IEEE80211_S1G_BCN_NEXT_TBTT 0x100 - /* control extension - for IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTL_EXT */ #define IEEE80211_CTL_EXT_POLL 0x2000 #define IEEE80211_CTL_EXT_SPR 0x3000 @@ -627,6 +626,42 @@ static inline bool ieee80211_is_s1g_beacon(__le16 fc) cpu_to_le16(IEEE80211_FTYPE_EXT | IEEE80211_STYPE_S1G_BEACON); } +/** + * ieee80211_s1g_has_next_tbtt - check if IEEE80211_S1G_BCN_NEXT_TBTT + * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame contains the variable-length + * next TBTT field + */ +static inline bool ieee80211_s1g_has_next_tbtt(__le16 fc) +{ + return ieee80211_is_s1g_beacon(fc) && + (fc & cpu_to_le16(IEEE80211_S1G_BCN_NEXT_TBTT)); +} + +/** + * ieee80211_s1g_has_ano - check if IEEE80211_S1G_BCN_ANO + * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame contains the variable-length + * ANO field + */ +static inline bool ieee80211_s1g_has_ano(__le16 fc) +{ + return ieee80211_is_s1g_beacon(fc) && + (fc & cpu_to_le16(IEEE80211_S1G_BCN_ANO)); +} + +/** + * ieee80211_s1g_has_cssid - check if IEEE80211_S1G_BCN_CSSID + * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame contains the variable-length + * compressed SSID field + */ +static inline bool ieee80211_s1g_has_cssid(__le16 fc) +{ + return ieee80211_is_s1g_beacon(fc) && + (fc & cpu_to_le16(IEEE80211_S1G_BCN_CSSID)); +} + /** * ieee80211_is_s1g_short_beacon - check if frame is an S1G short beacon * @fc: frame control bytes in little-endian byteorder @@ -1245,16 +1280,40 @@ struct ieee80211_ext { u8 change_seq; u8 variable[0]; } __packed s1g_beacon; - struct { - u8 sa[ETH_ALEN]; - __le32 timestamp; - u8 change_seq; - u8 next_tbtt[3]; - u8 variable[0]; - } __packed s1g_short_beacon; } u; } __packed __aligned(2); +/** + * ieee80211_s1g_optional_len - determine length of optional S1G beacon fields + * @fc: frame control bytes in little-endian byteorder + * Return: total length in bytes of the optional fixed-length fields + * + * S1G beacons may contain up to three optional fixed-length fields that + * precede the variable-length elements. Whether these fields are present + * is indicated by flags in the frame control field. + * + * From IEEE 802.11-2024 section 9.3.4.3: + * - Next TBTT field may be 0 or 3 bytes + * - Short SSID field may be 0 or 4 bytes + * - Access Network Options (ANO) field may be 0 or 1 byte + */ +static inline size_t +ieee80211_s1g_optional_len(__le16 fc) +{ + size_t len = 0; + + if (ieee80211_s1g_has_next_tbtt(fc)) + len += 3; + + if (ieee80211_s1g_has_cssid(fc)) + len += 4; + + if (ieee80211_s1g_has_ano(fc)) + len += 1; + + return len; +} + #define IEEE80211_TWT_CONTROL_NDP BIT(0) #define IEEE80211_TWT_CONTROL_RESP_MODE BIT(1) #define IEEE80211_TWT_CONTROL_NEG_TYPE_BROADCAST BIT(3) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 35eaf0812c5b24..53d5ffad87be87 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7220,11 +7220,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, bssid = ieee80211_get_bssid(hdr, len, sdata->vif.type); if (ieee80211_is_s1g_beacon(mgmt->frame_control)) { struct ieee80211_ext *ext = (void *) mgmt; - - if (ieee80211_is_s1g_short_beacon(ext->frame_control)) - variable = ext->u.s1g_short_beacon.variable; - else - variable = ext->u.s1g_beacon.variable; + variable = ext->u.s1g_beacon.variable + + ieee80211_s1g_optional_len(ext->frame_control); } baselen = (u8 *) variable - (u8 *) mgmt; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index cb707907188585..5a56487dab69cb 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -260,6 +260,7 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) struct ieee80211_mgmt *mgmt = (void *)skb->data; struct ieee80211_bss *bss; struct ieee80211_channel *channel; + struct ieee80211_ext *ext; size_t min_hdr_len = offsetof(struct ieee80211_mgmt, u.probe_resp.variable); @@ -269,12 +270,10 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) return; if (ieee80211_is_s1g_beacon(mgmt->frame_control)) { - if (ieee80211_is_s1g_short_beacon(mgmt->frame_control)) - min_hdr_len = offsetof(struct ieee80211_ext, - u.s1g_short_beacon.variable); - else - min_hdr_len = offsetof(struct ieee80211_ext, - u.s1g_beacon); + ext = (struct ieee80211_ext *)mgmt; + min_hdr_len = + offsetof(struct ieee80211_ext, u.s1g_beacon.variable) + + ieee80211_s1g_optional_len(ext->frame_control); } if (skb->len < min_hdr_len) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index ddd3a97f6609d1..e8a4fe44ec2d80 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -3250,6 +3250,7 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, const u8 *ie; size_t ielen; u64 tsf; + size_t s1g_optional_len; if (WARN_ON(!mgmt)) return NULL; @@ -3264,12 +3265,11 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, if (ieee80211_is_s1g_beacon(mgmt->frame_control)) { ext = (void *) mgmt; - if (ieee80211_is_s1g_short_beacon(mgmt->frame_control)) - min_hdr_len = offsetof(struct ieee80211_ext, - u.s1g_short_beacon.variable); - else - min_hdr_len = offsetof(struct ieee80211_ext, - u.s1g_beacon.variable); + s1g_optional_len = + ieee80211_s1g_optional_len(ext->frame_control); + min_hdr_len = + offsetof(struct ieee80211_ext, u.s1g_beacon.variable) + + s1g_optional_len; } else { /* same for beacons */ min_hdr_len = offsetof(struct ieee80211_mgmt, @@ -3285,11 +3285,7 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, const struct ieee80211_s1g_bcn_compat_ie *compat; const struct element *elem; - if (ieee80211_is_s1g_short_beacon(mgmt->frame_control)) - ie = ext->u.s1g_short_beacon.variable; - else - ie = ext->u.s1g_beacon.variable; - + ie = ext->u.s1g_beacon.variable + s1g_optional_len; elem = cfg80211_find_elem(WLAN_EID_S1G_BCN_COMPAT, ie, ielen); if (!elem) return NULL; From bee6f128b741baa1b6d3bae07ec53e92fbf034bc Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Tue, 3 Jun 2025 11:12:04 +0200 Subject: [PATCH 2673/2924] net: wwan: mhi_wwan_mbim: use correct mux_id for multiplexing [ Upstream commit 501fe52aa908c96f2c9b8d54767938a1a5960354 ] Recent Qualcomm chipsets like SDX72/75 require MBIM sessionId mapping to muxId in the range (0x70-0x8F) for the PCIe tethered use. This has been partially addressed by the referenced commit, mapping the default data call to muxId = 112, but the multiplexed data calls scenario was not properly considered, mapping sessionId = 1 to muxId 1, while it should have been 113. Fix this by moving the session_id assignment logic to mhi_mbim_newlink, in order to map sessionId = n to muxId = n + WDS_BIND_MUX_DATA_PORT_MUX_ID. Fixes: 65bc58c3dcad ("net: wwan: mhi: make default data link id configurable") Signed-off-by: Daniele Palmas Reviewed-by: Loic Poulain Link: https://patch.msgid.link/20250603091204.2802840-1-dnlplm@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/wwan/mhi_wwan_mbim.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/wwan/mhi_wwan_mbim.c b/drivers/net/wwan/mhi_wwan_mbim.c index 8755c5e6a65b30..c814fbd756a1e7 100644 --- a/drivers/net/wwan/mhi_wwan_mbim.c +++ b/drivers/net/wwan/mhi_wwan_mbim.c @@ -550,8 +550,8 @@ static int mhi_mbim_newlink(void *ctxt, struct net_device *ndev, u32 if_id, struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev); struct mhi_mbim_context *mbim = ctxt; - link->session = if_id; link->mbim = mbim; + link->session = mhi_mbim_get_link_mux_id(link->mbim->mdev->mhi_cntrl) + if_id; link->ndev = ndev; u64_stats_init(&link->rx_syncp); u64_stats_init(&link->tx_syncp); @@ -607,7 +607,7 @@ static int mhi_mbim_probe(struct mhi_device *mhi_dev, const struct mhi_device_id { struct mhi_controller *cntrl = mhi_dev->mhi_cntrl; struct mhi_mbim_context *mbim; - int err, link_id; + int err; mbim = devm_kzalloc(&mhi_dev->dev, sizeof(*mbim), GFP_KERNEL); if (!mbim) @@ -628,11 +628,8 @@ static int mhi_mbim_probe(struct mhi_device *mhi_dev, const struct mhi_device_id /* Number of transfer descriptors determines size of the queue */ mbim->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE); - /* Get the corresponding mux_id from mhi */ - link_id = mhi_mbim_get_link_mux_id(cntrl); - /* Register wwan link ops with MHI controller representing WWAN instance */ - return wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_mbim_wwan_ops, mbim, link_id); + return wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_mbim_wwan_ops, mbim, 0); } static void mhi_mbim_remove(struct mhi_device *mhi_dev) From 3c8745e7bd9da77749e37eea9577f51c017dd8ad Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 3 Jun 2025 06:53:56 -0700 Subject: [PATCH 2674/2924] netlink: specs: rt-link: add missing byte-order properties [ Upstream commit de92258e3b22bd4ce920715cc91f09d788e5badf ] A number of fields in the ip tunnels are lacking the big-endian designation. I suspect this is not intentional, as decoding the ports with the right endian seems objectively beneficial. Fixes: 6ffdbb93a59c ("netlink: specs: rt_link: decode ip6tnl, vti and vti6 link attrs") Fixes: 077b6022d24b ("doc/netlink/specs: Add sub-message type to rt_link family") Signed-off-by: Jakub Kicinski Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20250603135357.502626-2-kuba@kernel.org Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- Documentation/netlink/specs/rt_link.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt_link.yaml index 6b9d5ee87d93a8..ba767c42e87924 100644 --- a/Documentation/netlink/specs/rt_link.yaml +++ b/Documentation/netlink/specs/rt_link.yaml @@ -1778,15 +1778,19 @@ attribute-sets: - name: iflags type: u16 + byte-order: big-endian - name: oflags type: u16 + byte-order: big-endian - name: ikey type: u32 + byte-order: big-endian - name: okey type: u32 + byte-order: big-endian - name: local type: binary @@ -1810,6 +1814,7 @@ attribute-sets: - name: flowinfo type: u32 + byte-order: big-endian - name: flags type: u32 @@ -1822,9 +1827,11 @@ attribute-sets: - name: encap-sport type: u16 + byte-order: big-endian - name: encap-dport type: u16 + byte-order: big-endian - name: collect-metadata type: flag @@ -1856,9 +1863,11 @@ attribute-sets: - name: ikey type: u32 + byte-order: big-endian - name: okey type: u32 + byte-order: big-endian - name: local type: binary @@ -1908,6 +1917,7 @@ attribute-sets: - name: port type: u16 + byte-order: big-endian - name: collect-metadata type: flag @@ -1927,6 +1937,7 @@ attribute-sets: - name: label type: u32 + byte-order: big-endian - name: ttl-inherit type: u8 @@ -1967,9 +1978,11 @@ attribute-sets: - name: flowinfo type: u32 + byte-order: big-endian - name: flags type: u16 + byte-order: big-endian - name: proto type: u8 @@ -1999,9 +2012,11 @@ attribute-sets: - name: encap-sport type: u16 + byte-order: big-endian - name: encap-dport type: u16 + byte-order: big-endian - name: collect-metadata type: flag From ac08fcacd29bee33ed9ad1da15f5828bc3c7477c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 3 Jun 2025 06:53:57 -0700 Subject: [PATCH 2675/2924] netlink: specs: rt-link: decode ip6gre [ Upstream commit 8af7a919c52f02514a145f995cbdf0deadb8075a ] Driver tests now require GRE tunnels, while we don't configure them with YNL, YNL will complain when it sees link types it doesn't recognize. Teach it decoding ip6gre tunnels. The attrs are largely the same as IPv4 GRE. Correct the type of encap-limit, but note that this attr is only used in ip6gre, so the mistake didn't matter until now. Fixes: 0d0f4174f6c8 ("selftests: drv-net: add a simple TSO test") Signed-off-by: Jakub Kicinski Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20250603135357.502626-3-kuba@kernel.org Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- Documentation/netlink/specs/rt_link.yaml | 53 +++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt_link.yaml index ba767c42e87924..2ac0e9fda1582d 100644 --- a/Documentation/netlink/specs/rt_link.yaml +++ b/Documentation/netlink/specs/rt_link.yaml @@ -1810,7 +1810,7 @@ attribute-sets: type: u8 - name: encap-limit - type: u32 + type: u8 - name: flowinfo type: u32 @@ -1853,6 +1853,54 @@ attribute-sets: - name: erspan-hwid type: u16 + - + name: linkinfo-gre6-attrs + subset-of: linkinfo-gre-attrs + attributes: + - + name: link + - + name: iflags + - + name: oflags + - + name: ikey + - + name: okey + - + name: local + display-hint: ipv6 + - + name: remote + display-hint: ipv6 + - + name: ttl + - + name: encap-limit + - + name: flowinfo + - + name: flags + - + name: encap-type + - + name: encap-flags + - + name: encap-sport + - + name: encap-dport + - + name: collect-metadata + - + name: fwmark + - + name: erspan-index + - + name: erspan-ver + - + name: erspan-dir + - + name: erspan-hwid - name: linkinfo-vti-attrs name-prefix: ifla-vti- @@ -2314,6 +2362,9 @@ sub-messages: - value: gretap attribute-set: linkinfo-gre-attrs + - + value: ip6gre + attribute-set: linkinfo-gre6-attrs - value: geneve attribute-set: linkinfo-geneve-attrs From aba544ff9ec522e120b1b3d8b32efa4e6d16d4d7 Mon Sep 17 00:00:00 2001 From: Mirco Barone Date: Thu, 5 Jun 2025 14:06:16 +0200 Subject: [PATCH 2676/2924] wireguard: device: enable threaded NAPI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit db9ae3b6b43c79b1ba87eea849fd65efa05b4b2e ] Enable threaded NAPI by default for WireGuard devices in response to low performance behavior that we observed when multiple tunnels (and thus multiple wg devices) are deployed on a single host. This affects any kind of multi-tunnel deployment, regardless of whether the tunnels share the same endpoints or not (i.e., a VPN concentrator type of gateway would also be affected). The problem is caused by the fact that, in case of a traffic surge that involves multiple tunnels at the same time, the polling of the NAPI instance of all these wg devices tends to converge onto the same core, causing underutilization of the CPU and bottlenecking performance. This happens because NAPI polling is hosted by default in softirq context, but the WireGuard driver only raises this softirq after the rx peer queue has been drained, which doesn't happen during high traffic. In this case, the softirq already active on a core is reused instead of raising a new one. As a result, once two or more tunnel softirqs have been scheduled on the same core, they remain pinned there until the surge ends. In our experiments, this almost always leads to all tunnel NAPIs being handled on a single core shortly after a surge begins, limiting scalability to less than 3× the performance of a single tunnel, despite plenty of unused CPU cores being available. The proposed mitigation is to enable threaded NAPI for all WireGuard devices. This moves the NAPI polling context to a dedicated per-device kernel thread, allowing the scheduler to balance the load across all available cores. On our 32-core gateways, enabling threaded NAPI yields a ~4× performance improvement with 16 tunnels, increasing throughput from ~13 Gbps to ~48 Gbps. Meanwhile, CPU usage on the receiver (which is the bottleneck) jumps from 20% to 100%. We have found no performance regressions in any scenario we tested. Single-tunnel throughput remains unchanged. More details are available in our Netdev paper. Link: https://netdevconf.info/0x18/docs/netdev-0x18-paper23-talk-paper.pdf Signed-off-by: Mirco Barone Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Signed-off-by: Jason A. Donenfeld Link: https://patch.msgid.link/20250605120616.2808744-1-Jason@zx2c4.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/wireguard/device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index 3ffeeba5dccf40..4a529f1f9beab6 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -366,6 +366,7 @@ static int wg_newlink(struct net_device *dev, if (ret < 0) goto err_free_handshake_queue; + dev_set_threaded(dev, true); ret = register_netdevice(dev); if (ret < 0) goto err_uninit_ratelimiter; From 26284a9043904d75e7d64315715c6a37c868db75 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 3 Jun 2025 18:20:31 -0700 Subject: [PATCH 2677/2924] selftests: drv-net: tso: fix the GRE device name [ Upstream commit c68804c934e3197e34560744854c57cf88dff8e7 ] The device type for IPv4 GRE is "gre" not "ipgre", unlike for IPv6 which uses "ip6gre". Not sure how I missed this when writing the test, perhaps because all HW I have access to is on an IPv6-only network. Fixes: 0d0f4174f6c8 ("selftests: drv-net: add a simple TSO test") Reviewed-by: Simon Horman Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250604012031.891242-1-kuba@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- tools/testing/selftests/drivers/net/hw/tso.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py index e1ecb92f79d9b4..eec647e7ec19c9 100755 --- a/tools/testing/selftests/drivers/net/hw/tso.py +++ b/tools/testing/selftests/drivers/net/hw/tso.py @@ -216,7 +216,7 @@ def main() -> None: ("", "6", "tx-tcp6-segmentation", None), ("vxlan", "", "tx-udp_tnl-segmentation", ("vxlan", True, "id 100 dstport 4789 noudpcsum")), ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", False, "id 100 dstport 4789 udpcsum")), - ("gre", "4", "tx-gre-segmentation", ("ipgre", False, "")), + ("gre", "4", "tx-gre-segmentation", ("gre", False, "")), ("gre", "6", "tx-gre-segmentation", ("ip6gre", False, "")), ) From b20f409d29dd4dca8d1fb1cddc4106ddbab334c5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 3 Jun 2025 18:20:55 -0700 Subject: [PATCH 2678/2924] selftests: drv-net: tso: make bkg() wait for socat to quit [ Upstream commit e6854be4d80ea266a7be64a65a0322bcdfa72807 ] Commit 846742f7e32f ("selftests: drv-net: add a warning for bkg + shell + terminate") added a warning for bkg() used with terminate=True. The tso test was missed as we didn't have it running anywhere in NIPA. Add exit_wait=True, to avoid: # Warning: combining shell and terminate is risky! # SIGTERM may not reach the child on zsh/ksh! getting printed twice for every variant. Fixes: 0d0f4174f6c8 ("selftests: drv-net: add a simple TSO test") Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250604012055.891431-1-kuba@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- tools/testing/selftests/drivers/net/hw/tso.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py index eec647e7ec19c9..3370827409aa02 100755 --- a/tools/testing/selftests/drivers/net/hw/tso.py +++ b/tools/testing/selftests/drivers/net/hw/tso.py @@ -39,7 +39,7 @@ def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso): port = rand_port() listen_cmd = f"socat -{ipver} -t 2 -u TCP-LISTEN:{port},reuseport /dev/null,ignoreeof" - with bkg(listen_cmd, host=cfg.remote) as nc: + with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as nc: wait_port_listen(port, host=cfg.remote) if ipver == "4": From 7acb2f063fc5ef0767b2594fd48d039e6792872b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Jun 2025 09:39:28 +0000 Subject: [PATCH 2679/2924] net: annotate data-races around cleanup_net_task [ Upstream commit 535caaca921c653b1f9838fbd5c4e9494cafc3d9 ] from_cleanup_net() reads cleanup_net_task locklessly. Add READ_ONCE()/WRITE_ONCE() annotations to avoid a potential KCSAN warning, even if the race is harmless. Fixes: 0734d7c3d93c ("net: expedite synchronize_net() for cleanup_net()") Signed-off-by: Eric Dumazet Reviewed-by: Jason Xing Link: https://patch.msgid.link/20250604093928.1323333-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/core/dev.c | 2 +- net/core/net_namespace.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 0d891634c69270..2b20aadaf9268d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10393,7 +10393,7 @@ static void dev_index_release(struct net *net, int ifindex) static bool from_cleanup_net(void) { #ifdef CONFIG_NET_NS - return current == cleanup_net_task; + return current == READ_ONCE(cleanup_net_task); #else return false; #endif diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index b0dfdf791ece5a..599f6a89ae581e 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -600,7 +600,7 @@ static void cleanup_net(struct work_struct *work) LIST_HEAD(net_exit_list); LIST_HEAD(dev_kill_list); - cleanup_net_task = current; + WRITE_ONCE(cleanup_net_task, current); /* Atomically snapshot the list of namespaces to cleanup */ net_kill_list = llist_del_all(&cleanup_list); @@ -676,7 +676,7 @@ static void cleanup_net(struct work_struct *work) put_user_ns(net->user_ns); net_passive_dec(net); } - cleanup_net_task = NULL; + WRITE_ONCE(cleanup_net_task, NULL); } /** From 8a29ae8428a5c26f0b4471c2057d9e9415c53109 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Jun 2025 10:58:15 +0000 Subject: [PATCH 2680/2924] net: prevent a NULL deref in rtnl_create_link() [ Upstream commit feafc73f3e6ae73371777a037d41d2e31c929636 ] At the time rtnl_create_link() is running, dev->netdev_ops is NULL, we must not use netdev_lock_ops() or risk a NULL deref if CONFIG_NET_SHAPER is defined. Use netif_set_group() instead of dev_set_group(). RIP: 0010:netdev_need_ops_lock include/net/netdev_lock.h:33 [inline] RIP: 0010:netdev_lock_ops include/net/netdev_lock.h:41 [inline] RIP: 0010:dev_set_group+0xc0/0x230 net/core/dev_api.c:82 Call Trace: rtnl_create_link+0x748/0xd10 net/core/rtnetlink.c:3674 rtnl_newlink_create+0x25c/0xb00 net/core/rtnetlink.c:3813 __rtnl_newlink net/core/rtnetlink.c:3940 [inline] rtnl_newlink+0x16d6/0x1c70 net/core/rtnetlink.c:4055 rtnetlink_rcv_msg+0x7cf/0xb70 net/core/rtnetlink.c:6944 netlink_rcv_skb+0x208/0x470 net/netlink/af_netlink.c:2534 netlink_unicast_kernel net/netlink/af_netlink.c:1313 [inline] netlink_unicast+0x75b/0x8d0 net/netlink/af_netlink.c:1339 netlink_sendmsg+0x805/0xb30 net/netlink/af_netlink.c:1883 sock_sendmsg_nosec net/socket.c:712 [inline] Reported-by: syzbot+9fc858ba0312b42b577e@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/6840265f.a00a0220.d4325.0009.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Fixes: 7e4d784f5810 ("net: hold netdev instance lock during rtnetlink operations") Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250604105815.1516973-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index c5a7f41982a575..fc6815ad78266f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3681,7 +3681,7 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, if (tb[IFLA_LINKMODE]) dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); if (tb[IFLA_GROUP]) - dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); + netif_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); if (tb[IFLA_GSO_MAX_SIZE]) netif_set_gso_max_size(dev, nla_get_u32(tb[IFLA_GSO_MAX_SIZE])); if (tb[IFLA_GSO_MAX_SEGS]) From cd4cd09810211fa23609c5c1018352e9e1cd8e5a Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 4 Jun 2025 14:32:52 +0300 Subject: [PATCH 2681/2924] seg6: Fix validation of nexthop addresses [ Upstream commit 7632fedb266d93ed0ed9f487133e6c6314a9b2d1 ] The kernel currently validates that the length of the provided nexthop address does not exceed the specified length. This can lead to the kernel reading uninitialized memory if user space provided a shorter length than the specified one. Fix by validating that the provided length exactly matches the specified one. Fixes: d1df6fd8a1d2 ("ipv6: sr: define core operations for seg6local lightweight tunnel") Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Link: https://patch.msgid.link/20250604113252.371528-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv6/seg6_local.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index ac1dbd492c22dc..a11a02b4ba95b6 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -1644,10 +1644,8 @@ static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = { [SEG6_LOCAL_SRH] = { .type = NLA_BINARY }, [SEG6_LOCAL_TABLE] = { .type = NLA_U32 }, [SEG6_LOCAL_VRFTABLE] = { .type = NLA_U32 }, - [SEG6_LOCAL_NH4] = { .type = NLA_BINARY, - .len = sizeof(struct in_addr) }, - [SEG6_LOCAL_NH6] = { .type = NLA_BINARY, - .len = sizeof(struct in6_addr) }, + [SEG6_LOCAL_NH4] = NLA_POLICY_EXACT_LEN(sizeof(struct in_addr)), + [SEG6_LOCAL_NH6] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), [SEG6_LOCAL_IIF] = { .type = NLA_U32 }, [SEG6_LOCAL_OIF] = { .type = NLA_U32 }, [SEG6_LOCAL_BPF] = { .type = NLA_NESTED }, From f5e6a6a8aa46d44ec7a240766cf3b7dd077718b9 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 14 May 2025 16:24:26 +0100 Subject: [PATCH 2682/2924] drm/xe/vm: move xe_svm_init() earlier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8cf8cde41ad01150afbd1327ad1942387787f7fd ] In xe_vm_close_and_put() we need to be able to call xe_svm_fini(), however during vm creation we can call this on the error path, before having actually initialised the svm state, leading to various splats followed by a fatal NPD. Fixes: 6fd979c2f331 ("drm/xe: Add SVM init / close / fini to faulting VMs") Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4967 Signed-off-by: Matthew Auld Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250514152424.149591-4-matthew.auld@intel.com (cherry picked from commit 4f296d77cf49fcb5f90b4674123ad7f3a0676165) Signed-off-by: Thomas Hellström Signed-off-by: Sasha Levin --- drivers/gpu/drm/xe/xe_vm.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 367c84b90e9ef7..737172013a8f9e 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1681,10 +1681,16 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) if (flags & XE_VM_FLAG_LR_MODE) xe_pm_runtime_get_noresume(xe); + if (flags & XE_VM_FLAG_FAULT_MODE) { + err = xe_svm_init(vm); + if (err) + goto err_no_resv; + } + vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); if (!vm_resv_obj) { err = -ENOMEM; - goto err_no_resv; + goto err_svm_fini; } drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, @@ -1757,12 +1763,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) } } - if (flags & XE_VM_FLAG_FAULT_MODE) { - err = xe_svm_init(vm); - if (err) - goto err_close; - } - if (number_tiles > 1) vm->composite_fence_ctx = dma_fence_context_alloc(1); @@ -1776,6 +1776,11 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) xe_vm_close_and_put(vm); return ERR_PTR(err); +err_svm_fini: + if (flags & XE_VM_FLAG_FAULT_MODE) { + vm->size = 0; /* close the vm */ + xe_svm_fini(vm); + } err_no_resv: mutex_destroy(&vm->snap_mutex); for_each_tile(tile, xe, id) From e8951661ae9359c3b4a582cbe787f37506ed92cc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 29 May 2025 10:23:56 -0700 Subject: [PATCH 2683/2924] drm/xe/vsec: fix CONFIG_INTEL_VSEC dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2182f358fb138f81a586ffdddd510f2a4fc61702 ] The XE driver can be built with or without VSEC support, but fails to link as built-in if vsec is in a loadable module: x86_64-linux-ld: vmlinux.o: in function `xe_vsec_init': (.text+0x1e83e16): undefined reference to `intel_vsec_register' The normal fix for this is to add a 'depends on INTEL_VSEC || !INTEL_VSEC', forcing XE to be a loadable module as well, but that causes a circular dependency: symbol DRM_XE depends on INTEL_VSEC symbol INTEL_VSEC depends on X86_PLATFORM_DEVICES symbol X86_PLATFORM_DEVICES is selected by DRM_XE The problem here is selecting a symbol from another subsystem, so change that as well and rephrase the 'select' into the corresponding dependency. Since X86_PLATFORM_DEVICES is 'default y', there is no change to defconfig builds here. Fixes: 0c45e76fcc62 ("drm/xe/vsec: Support BMG devices") Signed-off-by: Arnd Bergmann Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250529172355.2395634-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit e4931f8be347ec5f19df4d6d33aea37145378c42) Signed-off-by: Thomas Hellström Signed-off-by: Sasha Levin --- drivers/gpu/drm/xe/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 5c2f459a2925a4..9a46aafcb33bae 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -2,6 +2,8 @@ config DRM_XE tristate "Intel Xe Graphics" depends on DRM && PCI && MMU && (m || (y && KUNIT=y)) + depends on INTEL_VSEC || !INTEL_VSEC + depends on X86_PLATFORM_DEVICES || !(X86 && ACPI) select INTERVAL_TREE # we need shmfs for the swappable backing store, and in particular # the shmem_readpage() which depends upon tmpfs @@ -27,7 +29,6 @@ config DRM_XE select BACKLIGHT_CLASS_DEVICE if ACPI select INPUT if ACPI select ACPI_VIDEO if X86 && ACPI - select X86_PLATFORM_DEVICES if X86 && ACPI select ACPI_WMI if X86 && ACPI select SYNC_FILE select IOSF_MBI From 484231215fa88a10b45195069583e4b7b57fb3c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 28 May 2025 18:41:05 +0200 Subject: [PATCH 2684/2924] drm/xe: Rework eviction rejection of bound external bos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5cc3325584c425069c1c3355c775314d64bf8770 ] For preempt_fence mode VM's we're rejecting eviction of shared bos during VM_BIND. However, since we do this in the move() callback, we're getting an eviction failure warning from TTM. The TTM callback intended for these things is eviction_valuable(). However, the latter doesn't pass in the struct ttm_operation_ctx needed to determine whether the caller needs this. Instead, attach the needed information to the vm under the vm->resv, until we've been able to update TTM to provide the needed information. And add sufficient lockdep checks to prevent misuse and races. v2: - Fix a copy-paste error in xe_vm_clear_validating() v3: - Fix kerneldoc errors. Signed-off-by: Thomas Hellström Fixes: 0af944f0e308 ("drm/xe: Reject BO eviction if BO is bound to current VM") Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250528164105.234718-1-thomas.hellstrom@linux.intel.com (cherry picked from commit 9d5558649f68e2e84a87a909631b30e15ca0f8ec) Signed-off-by: Thomas Hellström Signed-off-by: Sasha Levin --- drivers/gpu/drm/xe/xe_bo.c | 46 ++++++++++++--------- drivers/gpu/drm/xe/xe_vm.h | 69 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_vm_types.h | 8 ++++ 3 files changed, 105 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 64f9c936eea063..b98526d271f2fa 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -816,21 +816,6 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, goto out; } - /* Reject BO eviction if BO is bound to current VM. */ - if (evict && ctx->resv) { - struct drm_gpuvm_bo *vm_bo; - - drm_gem_for_each_gpuvm_bo(vm_bo, &bo->ttm.base) { - struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); - - if (xe_vm_resv(vm) == ctx->resv && - xe_vm_in_preempt_fence_mode(vm)) { - ret = -EBUSY; - goto out; - } - } - } - /* * Failed multi-hop where the old_mem is still marked as * TTM_PL_FLAG_TEMPORARY, should just be a dummy move. @@ -1023,6 +1008,25 @@ static long xe_bo_shrink_purge(struct ttm_operation_ctx *ctx, return lret; } +static bool +xe_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place) +{ + struct drm_gpuvm_bo *vm_bo; + + if (!ttm_bo_eviction_valuable(bo, place)) + return false; + + if (!xe_bo_is_xe_bo(bo)) + return true; + + drm_gem_for_each_gpuvm_bo(vm_bo, &bo->base) { + if (xe_vm_is_validating(gpuvm_to_vm(vm_bo->vm))) + return false; + } + + return true; +} + /** * xe_bo_shrink() - Try to shrink an xe bo. * @ctx: The struct ttm_operation_ctx used for shrinking. @@ -1057,7 +1061,7 @@ long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo, (flags.purge && !xe_tt->purgeable)) return -EBUSY; - if (!ttm_bo_eviction_valuable(bo, &place)) + if (!xe_bo_eviction_valuable(bo, &place)) return -EBUSY; if (!xe_bo_is_xe_bo(bo) || !xe_bo_get_unless_zero(xe_bo)) @@ -1418,7 +1422,7 @@ const struct ttm_device_funcs xe_ttm_funcs = { .io_mem_pfn = xe_ttm_io_mem_pfn, .access_memory = xe_ttm_access_memory, .release_notify = xe_ttm_bo_release_notify, - .eviction_valuable = ttm_bo_eviction_valuable, + .eviction_valuable = xe_bo_eviction_valuable, .delete_mem_notify = xe_ttm_bo_delete_mem_notify, .swap_notify = xe_ttm_bo_swap_notify, }; @@ -2260,6 +2264,8 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) .no_wait_gpu = false, .gfp_retry_mayfail = true, }; + struct pin_cookie cookie; + int ret; if (vm) { lockdep_assert_held(&vm->lock); @@ -2269,8 +2275,12 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) ctx.resv = xe_vm_resv(vm); } + cookie = xe_vm_set_validating(vm, allow_res_evict); trace_xe_bo_validate(bo); - return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx); + ret = ttm_bo_validate(&bo->ttm, &bo->placement, &ctx); + xe_vm_clear_validating(vm, allow_res_evict, cookie); + + return ret; } bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo) diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 0ef811fc2bdeee..494af6bdc646b4 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -301,6 +301,75 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap); void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p); void xe_vm_snapshot_free(struct xe_vm_snapshot *snap); +/** + * xe_vm_set_validating() - Register this task as currently making bos resident + * @allow_res_evict: Allow eviction of buffer objects bound to @vm when + * validating. + * @vm: Pointer to the vm or NULL. + * + * Register this task as currently making bos resident for the vm. Intended + * to avoid eviction by the same task of shared bos bound to the vm. + * Call with the vm's resv lock held. + * + * Return: A pin cookie that should be used for xe_vm_clear_validating(). + */ +static inline struct pin_cookie xe_vm_set_validating(struct xe_vm *vm, + bool allow_res_evict) +{ + struct pin_cookie cookie = {}; + + if (vm && !allow_res_evict) { + xe_vm_assert_held(vm); + cookie = lockdep_pin_lock(&xe_vm_resv(vm)->lock.base); + /* Pairs with READ_ONCE in xe_vm_is_validating() */ + WRITE_ONCE(vm->validating, current); + } + + return cookie; +} + +/** + * xe_vm_clear_validating() - Unregister this task as currently making bos resident + * @vm: Pointer to the vm or NULL + * @allow_res_evict: Eviction from @vm was allowed. Must be set to the same + * value as for xe_vm_set_validation(). + * @cookie: Cookie obtained from xe_vm_set_validating(). + * + * Register this task as currently making bos resident for the vm. Intended + * to avoid eviction by the same task of shared bos bound to the vm. + * Call with the vm's resv lock held. + */ +static inline void xe_vm_clear_validating(struct xe_vm *vm, bool allow_res_evict, + struct pin_cookie cookie) +{ + if (vm && !allow_res_evict) { + lockdep_unpin_lock(&xe_vm_resv(vm)->lock.base, cookie); + /* Pairs with READ_ONCE in xe_vm_is_validating() */ + WRITE_ONCE(vm->validating, NULL); + } +} + +/** + * xe_vm_is_validating() - Whether bos bound to the vm are currently being made resident + * by the current task. + * @vm: Pointer to the vm. + * + * If this function returns %true, we should be in a vm resv locked region, since + * the current process is the same task that called xe_vm_set_validating(). + * The function asserts that that's indeed the case. + * + * Return: %true if the task is currently making bos resident, %false otherwise. + */ +static inline bool xe_vm_is_validating(struct xe_vm *vm) +{ + /* Pairs with WRITE_ONCE in xe_vm_is_validating() */ + if (READ_ONCE(vm->validating) == current) { + xe_vm_assert_held(vm); + return true; + } + return false; +} + #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma); #else diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 84fa41b9fa20f3..0882674ce1cbab 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -310,6 +310,14 @@ struct xe_vm { * protected by the vm resv. */ u64 tlb_flush_seqno; + /** + * @validating: The task that is currently making bos resident for this vm. + * Protected by the VM's resv for writing. Opportunistic reading can be done + * using READ_ONCE. Note: This is a workaround for the + * TTM eviction_valuable() callback not being passed a struct + * ttm_operation_context(). Future work might want to address this. + */ + struct task_struct *validating; /** @batch_invalidate_tlb: Always invalidate TLB before batch start */ bool batch_invalidate_tlb; /** @xef: XE file handle for tracking this VM's drm client */ From 235e422d19022c8b8fb5ad7e3fbb7eb1657b1c54 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 22 May 2025 15:54:03 -0700 Subject: [PATCH 2685/2924] drm/xe/pxp: Use the correct define in the set_property_funcs array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 6bf4d5649230ca65725ec4793333fb5eba18d646 ] The define of the extension type was accidentally used instead of the one of the property itself. They're both zero, so no functional issue, but we should use the correct define for code correctness. Fixes: 41a97c4a1294 ("drm/xe/pxp/uapi: Add API to mark a BO as using PXP") Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Reviewed-by: John Harrison Link: https://lore.kernel.org/r/20250522225401.3953243-6-daniele.ceraolospurio@intel.com (cherry picked from commit 1d891ee820fd0fbb4101eacb0d922b5050a24933) Signed-off-by: Thomas Hellström Signed-off-by: Sasha Levin --- drivers/gpu/drm/xe/xe_bo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index b98526d271f2fa..5922302c3e00cc 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -2396,7 +2396,7 @@ typedef int (*xe_gem_create_set_property_fn)(struct xe_device *xe, u64 value); static const xe_gem_create_set_property_fn gem_create_set_property_funcs[] = { - [DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY] = gem_create_set_pxp_type, + [DRM_XE_GEM_CREATE_SET_PROPERTY_PXP_TYPE] = gem_create_set_pxp_type, }; static int gem_create_user_ext_set_property(struct xe_device *xe, From a2fd22f51aad28c8dd1aee0d9ad6871f85131b29 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 22 May 2025 15:54:04 -0700 Subject: [PATCH 2686/2924] drm/xe/pxp: Clarify PXP queue creation behavior if PXP is not ready MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 69a58ef4fa77759b0e0c2f79834fa51b00a50c0b ] The expected flow of operations when using PXP is to query the PXP status and wait for it to transition to "ready" before attempting to create an exec_queue. This flow is followed by the Mesa driver, but there is no guarantee that an incorrectly coded (or malicious) app will not attempt to create the queue first without querying the status. Therefore, we need to clarify what the expected behavior of the queue creation ioctl is in this scenario. Currently, the ioctl always fails with an -EBUSY code no matter the error, but for consistency it is better to distinguish between "failed to init" (-EIO) and "not ready" (-EBUSY), the same way the query ioctl does. Note that, while this is a change in the return code of an ioctl, the behavior of the ioctl in this particular corner case was not clearly spec'd, so no one should have been relying on it (and we know that Mesa, which is the only known userspace for this, didn't). v2: Minor rework of the doc (Rodrigo) Fixes: 72d479601d67 ("drm/xe/pxp/uapi: Add userspace and LRC support for PXP-using queues") Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Cc: José Roberto de Souza Reviewed-by: José Roberto de Souza Reviewed-by: John Harrison Acked-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250522225401.3953243-7-daniele.ceraolospurio@intel.com (cherry picked from commit 21784ca96025b62d95b670b7639ad70ddafa69b8) Signed-off-by: Thomas Hellström Signed-off-by: Sasha Levin --- drivers/gpu/drm/xe/xe_pxp.c | 8 ++++++-- include/uapi/drm/xe_drm.h | 5 +++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pxp.c b/drivers/gpu/drm/xe/xe_pxp.c index 454ea7dc08ac83..b5bc15f436fa2d 100644 --- a/drivers/gpu/drm/xe/xe_pxp.c +++ b/drivers/gpu/drm/xe/xe_pxp.c @@ -541,10 +541,14 @@ int xe_pxp_exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q) */ xe_pm_runtime_get(pxp->xe); - if (!pxp_prerequisites_done(pxp)) { - ret = -EBUSY; + /* get_readiness_status() returns 0 for in-progress and 1 for done */ + ret = xe_pxp_get_readiness_status(pxp); + if (ret <= 0) { + if (!ret) + ret = -EBUSY; goto out; } + ret = 0; wait_for_idle: /* diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 616916985e3f30..5e5442d03f8789 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1206,6 +1206,11 @@ struct drm_xe_vm_bind { * there is no need to explicitly set that. When a queue of type * %DRM_XE_PXP_TYPE_HWDRM is created, the PXP default HWDRM session * (%XE_PXP_HWDRM_DEFAULT_SESSION) will be started, if isn't already running. + * The user is expected to query the PXP status via the query ioctl (see + * %DRM_XE_DEVICE_QUERY_PXP_STATUS) and to wait for PXP to be ready before + * attempting to create a queue with this property. When a queue is created + * before PXP is ready, the ioctl will return -EBUSY if init is still in + * progress or -EIO if init failed. * Given that going into a power-saving state kills PXP HWDRM sessions, * runtime PM will be blocked while queues of this type are alive. * All PXP queues will be killed if a PXP invalidation event occurs. From 4b0b4df9e8a18d594f51a4ff87f57e77b850a594 Mon Sep 17 00:00:00 2001 From: Nylon Chen Date: Fri, 11 Apr 2025 15:38:50 +0800 Subject: [PATCH 2687/2924] riscv: misaligned: fix sleeping function called during misaligned access handling [ Upstream commit 61a74ad254628ccd9e88838c3c622885dfb6c588 ] Use copy_from_user_nofault() and copy_to_user_nofault() instead of copy_from/to_user functions in the misaligned access trap handlers. The following bug report was found when executing misaligned memory accesses: BUG: sleeping function called from invalid context at ./include/linux/uaccess.h:162 in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 115, name: two preempt_count: 0, expected: 0 CPU: 0 UID: 0 PID: 115 Comm: two Not tainted 6.14.0-rc5 #24 Hardware name: riscv-virtio,qemu (DT) Call Trace: [] dump_backtrace+0x1c/0x24 [] show_stack+0x28/0x34 [] dump_stack_lvl+0x4a/0x68 [] dump_stack+0x14/0x1c [] __might_resched+0xfa/0x104 [] __might_sleep+0x3e/0x62 [] __might_fault+0x1c/0x24 [] _copy_from_user+0x28/0xaa [] handle_misaligned_store+0x204/0x254 [] do_trap_store_misaligned+0x24/0xee [] handle_exception+0x146/0x152 Fixes: b686ecdeacf6 ("riscv: misaligned: Restrict user access to kernel memory") Fixes: 441381506ba7 ("riscv: misaligned: remove CONFIG_RISCV_M_MODE specific code") Signed-off-by: Zong Li Signed-off-by: Nylon Chen Link: https://lore.kernel.org/r/20250411073850.3699180-3-nylon.chen@sifive.com Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/kernel/traps_misaligned.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 77c788660223b3..56f06a27d45fb1 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -455,7 +455,7 @@ static int handle_scalar_misaligned_load(struct pt_regs *regs) val.data_u64 = 0; if (user_mode(regs)) { - if (copy_from_user(&val, (u8 __user *)addr, len)) + if (copy_from_user_nofault(&val, (u8 __user *)addr, len)) return -1; } else { memcpy(&val, (u8 *)addr, len); @@ -556,7 +556,7 @@ static int handle_scalar_misaligned_store(struct pt_regs *regs) return -EOPNOTSUPP; if (user_mode(regs)) { - if (copy_to_user((u8 __user *)addr, &val, len)) + if (copy_to_user_nofault((u8 __user *)addr, &val, len)) return -1; } else { memcpy((u8 *)addr, &val, len); From b5873bc7fce1117e1661146502f6d3671d5af8a8 Mon Sep 17 00:00:00 2001 From: Ziqi Chen Date: Thu, 22 May 2025 10:15:35 +0800 Subject: [PATCH 2688/2924] scsi: ufs: qcom: Check gear against max gear in vop freq_to_gear() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 663d0c19f3acc0d697f623d34b8eb3b438bf2bda ] The vop freq_to_gear() may return a gear greater than the negotiated max gear. Return the negotiated max gear if the mapped gear is greater. Fixes: c02fe9e222d1 ("scsi: ufs: qcom: Implement the freq_to_gear_speed() vop") Signed-off-by: Ziqi Chen Link: https://lore.kernel.org/r/20250522021537.999107-2-quic_ziqichen@quicinc.com Reported-by: Neil Armstrong Closes: https://lore.kernel.org/all/c7f2476a-943a-4d73-ad80-802c91e5f880@linaro.org/ Tested-by: Neil Armstrong Reviewed-by: Bean Huo Tested-by: Loïc Minier Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/ufs/host/ufs-qcom.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index c0761ccc1381e3..28515f89ce7988 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -1924,7 +1924,7 @@ static int ufs_qcom_config_esi(struct ufs_hba *hba) static u32 ufs_qcom_freq_to_gear_speed(struct ufs_hba *hba, unsigned long freq) { - u32 gear = 0; + u32 gear = UFS_HS_DONT_CHANGE; switch (freq) { case 403000000: @@ -1946,10 +1946,10 @@ static u32 ufs_qcom_freq_to_gear_speed(struct ufs_hba *hba, unsigned long freq) break; default: dev_err(hba->dev, "%s: Unsupported clock freq : %lu\n", __func__, freq); - break; + return UFS_HS_DONT_CHANGE; } - return gear; + return min_t(u32, gear, hba->max_pwr_info.info.gear_rx); } /* From 2245ade1fcf2e8c87e6a9245a5817a023f2dce89 Mon Sep 17 00:00:00 2001 From: Can Guo Date: Thu, 22 May 2025 10:15:36 +0800 Subject: [PATCH 2689/2924] scsi: ufs: qcom: Map devfreq OPP freq to UniPro Core Clock freq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8c5bcb3daeef9bc54c9939c36dca9a626126eb59 ] On some platforms, the devfreq OPP freq may be different than the unipro core clock freq. Implement ufs_qcom_opp_freq_to_clk_freq() and use it to find the unipro core clk freq. Fixes: c02fe9e222d1 ("scsi: ufs: qcom: Implement the freq_to_gear_speed() vop") Signed-off-by: Can Guo Co-developed-by: Ziqi Chen Signed-off-by: Ziqi Chen Link: https://lore.kernel.org/r/20250522021537.999107-3-quic_ziqichen@quicinc.com Reported-by: Luca Weiss Closes: https://lore.kernel.org/linux-arm-msm/D9FZ9U3AEXW4.1I12FX3YQ3JPW@fairphone.com/ Tested-by: Luca Weiss Reviewed-by: Bean Huo Tested-by: Loïc Minier Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/ufs/host/ufs-qcom.c | 81 ++++++++++++++++++++++++++++++++----- 1 file changed, 71 insertions(+), 10 deletions(-) diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 28515f89ce7988..2b000c1708ce45 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -103,7 +103,9 @@ static const struct __ufs_qcom_bw_table { }; static void ufs_qcom_get_default_testbus_cfg(struct ufs_qcom_host *host); -static int ufs_qcom_set_core_clk_ctrl(struct ufs_hba *hba, unsigned long freq); +static unsigned long ufs_qcom_opp_freq_to_clk_freq(struct ufs_hba *hba, + unsigned long freq, char *name); +static int ufs_qcom_set_core_clk_ctrl(struct ufs_hba *hba, bool is_scale_up, unsigned long freq); static struct ufs_qcom_host *rcdev_to_ufs_host(struct reset_controller_dev *rcd) { @@ -602,7 +604,7 @@ static int ufs_qcom_link_startup_notify(struct ufs_hba *hba, return -EINVAL; } - err = ufs_qcom_set_core_clk_ctrl(hba, ULONG_MAX); + err = ufs_qcom_set_core_clk_ctrl(hba, true, ULONG_MAX); if (err) dev_err(hba->dev, "cfg core clk ctrl failed\n"); /* @@ -1360,29 +1362,46 @@ static int ufs_qcom_set_clk_40ns_cycles(struct ufs_hba *hba, return ufshcd_dme_set(hba, UIC_ARG_MIB(PA_VS_CORE_CLK_40NS_CYCLES), reg); } -static int ufs_qcom_set_core_clk_ctrl(struct ufs_hba *hba, unsigned long freq) +static int ufs_qcom_set_core_clk_ctrl(struct ufs_hba *hba, bool is_scale_up, unsigned long freq) { struct ufs_qcom_host *host = ufshcd_get_variant(hba); struct list_head *head = &hba->clk_list_head; struct ufs_clk_info *clki; u32 cycles_in_1us = 0; u32 core_clk_ctrl_reg; + unsigned long clk_freq; int err; + if (hba->use_pm_opp && freq != ULONG_MAX) { + clk_freq = ufs_qcom_opp_freq_to_clk_freq(hba, freq, "core_clk_unipro"); + if (clk_freq) { + cycles_in_1us = ceil(clk_freq, HZ_PER_MHZ); + goto set_core_clk_ctrl; + } + } + list_for_each_entry(clki, head, list) { if (!IS_ERR_OR_NULL(clki->clk) && !strcmp(clki->name, "core_clk_unipro")) { - if (!clki->max_freq) + if (!clki->max_freq) { cycles_in_1us = 150; /* default for backwards compatibility */ - else if (freq == ULONG_MAX) + break; + } + + if (freq == ULONG_MAX) { cycles_in_1us = ceil(clki->max_freq, HZ_PER_MHZ); - else - cycles_in_1us = ceil(freq, HZ_PER_MHZ); + break; + } + if (is_scale_up) + cycles_in_1us = ceil(clki->max_freq, HZ_PER_MHZ); + else + cycles_in_1us = ceil(clk_get_rate(clki->clk), HZ_PER_MHZ); break; } } +set_core_clk_ctrl: err = ufshcd_dme_get(hba, UIC_ARG_MIB(DME_VS_CORE_CLK_CTRL), &core_clk_ctrl_reg); @@ -1425,7 +1444,7 @@ static int ufs_qcom_clk_scale_up_pre_change(struct ufs_hba *hba, unsigned long f return ret; } /* set unipro core clock attributes and clear clock divider */ - return ufs_qcom_set_core_clk_ctrl(hba, freq); + return ufs_qcom_set_core_clk_ctrl(hba, true, freq); } static int ufs_qcom_clk_scale_up_post_change(struct ufs_hba *hba) @@ -1457,7 +1476,7 @@ static int ufs_qcom_clk_scale_down_pre_change(struct ufs_hba *hba) static int ufs_qcom_clk_scale_down_post_change(struct ufs_hba *hba, unsigned long freq) { /* set unipro core clock attributes and clear clock divider */ - return ufs_qcom_set_core_clk_ctrl(hba, freq); + return ufs_qcom_set_core_clk_ctrl(hba, false, freq); } static int ufs_qcom_clk_scale_notify(struct ufs_hba *hba, bool scale_up, @@ -1922,11 +1941,53 @@ static int ufs_qcom_config_esi(struct ufs_hba *hba) return ret; } +static unsigned long ufs_qcom_opp_freq_to_clk_freq(struct ufs_hba *hba, + unsigned long freq, char *name) +{ + struct ufs_clk_info *clki; + struct dev_pm_opp *opp; + unsigned long clk_freq; + int idx = 0; + bool found = false; + + opp = dev_pm_opp_find_freq_exact_indexed(hba->dev, freq, 0, true); + if (IS_ERR(opp)) { + dev_err(hba->dev, "Failed to find OPP for exact frequency %lu\n", freq); + return 0; + } + + list_for_each_entry(clki, &hba->clk_list_head, list) { + if (!strcmp(clki->name, name)) { + found = true; + break; + } + + idx++; + } + + if (!found) { + dev_err(hba->dev, "Failed to find clock '%s' in clk list\n", name); + dev_pm_opp_put(opp); + return 0; + } + + clk_freq = dev_pm_opp_get_freq_indexed(opp, idx); + + dev_pm_opp_put(opp); + + return clk_freq; +} + static u32 ufs_qcom_freq_to_gear_speed(struct ufs_hba *hba, unsigned long freq) { u32 gear = UFS_HS_DONT_CHANGE; + unsigned long unipro_freq; + + if (!hba->use_pm_opp) + return gear; - switch (freq) { + unipro_freq = ufs_qcom_opp_freq_to_clk_freq(hba, freq, "core_clk_unipro"); + switch (unipro_freq) { case 403000000: gear = UFS_HS_G5; break; From 63665a9bf8349e0baa688f70cd04ac9469413080 Mon Sep 17 00:00:00 2001 From: Nitin Rawat Date: Mon, 26 May 2025 21:08:12 +0530 Subject: [PATCH 2690/2924] scsi: ufs: qcom: Prevent calling phy_exit() before phy_init() [ Upstream commit 7831003165d37ecb7b33843fcee05cada0359a82 ] Prevent calling phy_exit() before phy_init() to avoid abnormal power count and the following warning during boot up. [5.146763] phy phy-1d80000.phy.0: phy_power_on was called before phy_init Fixes: 7bac65687510 ("scsi: ufs: qcom: Power off the PHY if it was already powered on in ufs_qcom_power_up_sequence()") Signed-off-by: Nitin Rawat Link: https://lore.kernel.org/r/20250526153821.7918-2-quic_nitirawa@quicinc.com Reviewed-by: Konrad Dybcio Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/ufs/host/ufs-qcom.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 2b000c1708ce45..31649f908dd466 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -454,10 +454,9 @@ static int ufs_qcom_power_up_sequence(struct ufs_hba *hba) if (ret) return ret; - if (phy->power_count) { + if (phy->power_count) phy_power_off(phy); - phy_exit(phy); - } + /* phy initialization - calibrate the phy */ ret = phy_init(phy); From babfbf80a9a8209ad44273660440994a1477ed15 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Fri, 30 May 2025 16:10:17 +0200 Subject: [PATCH 2691/2924] ASoC: codecs: hda: Fix RPM usage count underflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ff0045de4ee0288dec683690f66f2f369b7d3466 ] RPM manipulation in hda_codec_probe_complete()'s error path is superfluous and leads to RPM usage count underflow if the build-controls operation fails. hda_codec_probe_complete() is called in: 1) hda_codec_probe() for all non-HDMI codecs 2) in card->late_probe() for HDMI codecs Error path for hda_codec_probe() takes care of bus' RPM already. For 2) if late_probe() fails, ASoC performs card cleanup what triggers hda_codec_remote() - same treatment is in 1). Fixes: b5df2a7dca1c ("ASoC: codecs: Add HD-Audio codec driver") Reviewed-by: Amadeusz Sławiński Signed-off-by: Cezary Rojewski Link: https://patch.msgid.link/20250530141025.2942936-2-cezary.rojewski@intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/hda.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/hda.c b/sound/soc/codecs/hda.c index ddc00927313cfe..dc7794c9ac44ce 100644 --- a/sound/soc/codecs/hda.c +++ b/sound/soc/codecs/hda.c @@ -152,7 +152,7 @@ int hda_codec_probe_complete(struct hda_codec *codec) ret = snd_hda_codec_build_controls(codec); if (ret < 0) { dev_err(&hdev->dev, "unable to create controls %d\n", ret); - goto out; + return ret; } /* Bus suspended codecs as it does not manage their pm */ @@ -160,7 +160,7 @@ int hda_codec_probe_complete(struct hda_codec *codec) /* rpm was forbidden in snd_hda_codec_device_new() */ snd_hda_codec_set_power_save(codec, 2000); snd_hda_codec_register(codec); -out: + /* Complement pm_runtime_get_sync(bus) in probe */ pm_runtime_mark_last_busy(bus->dev); pm_runtime_put_autosuspend(bus->dev); From 5a38c07660b27304399f92984182f41de94d48c7 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Fri, 30 May 2025 16:10:18 +0200 Subject: [PATCH 2692/2924] ASoC: Intel: avs: Fix deadlock when the failing IPC is SET_D0IX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9ad1f3cd0d60444c69948854c7e50d2a61b63755 ] The procedure handling IPC timeouts and EXCEPTION_CAUGHT notification shall cancel any D0IX work before proceeding with DSP recovery. If SET_D0IX called from delayed_work is the failing IPC the procedure will deadlock. Conditionally skip cancelling the work to fix that. Fixes: 335c4cbd201d ("ASoC: Intel: avs: D0ix power state support") Reviewed-by: Amadeusz Sławiński Signed-off-by: Cezary Rojewski Link: https://patch.msgid.link/20250530141025.2942936-3-cezary.rojewski@intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/avs/ipc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/intel/avs/ipc.c b/sound/soc/intel/avs/ipc.c index 08ed9d96738a05..0314f9d4ea5f40 100644 --- a/sound/soc/intel/avs/ipc.c +++ b/sound/soc/intel/avs/ipc.c @@ -169,7 +169,9 @@ static void avs_dsp_exception_caught(struct avs_dev *adev, union avs_notify_msg dev_crit(adev->dev, "communication severed, rebooting dsp..\n"); - cancel_delayed_work_sync(&ipc->d0ix_work); + /* Avoid deadlock as the exception may be the response to SET_D0IX. */ + if (current_work() != &ipc->d0ix_work.work) + cancel_delayed_work_sync(&ipc->d0ix_work); ipc->in_d0ix = false; /* Re-enabled on recovery completion. */ pm_runtime_disable(adev->dev); From 2c9fc713d81d6880115ac23ee283e13afa467962 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Mon, 7 Apr 2025 13:23:43 +0200 Subject: [PATCH 2693/2924] ALSA: hda: Allow to fetch hlink by ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 318c9eef63dd30b59dc8d63c7205ae997aa1e524 ] Starting with LNL platform, Intel HDAudio Links carry IDs specifying non-HDAudio transfer type they help facilitate e.g.: 0xC0 for I2S as defined by AZX_REG_ML_LEPTR_ID_INTEL_SSP. The mechanism accounts for LEPTR register as it is Reserved if LCAP.ALT for given Link equals 0. Reviewed-by: Amadeusz Sławiński Signed-off-by: Cezary Rojewski Acked-by: Liam Girdwood Link: https://patch.msgid.link/20250407112352.3720779-2-cezary.rojewski@intel.com Signed-off-by: Mark Brown Stable-dep-of: 347c8d6db7c9 ("ASoC: Intel: avs: Fix PPLCxFMT calculation") Signed-off-by: Sasha Levin --- include/sound/hdaudio_ext.h | 5 +++++ sound/hda/ext/hdac_ext_controller.c | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h index 4c7a40e149a594..60ec12e3b72f8d 100644 --- a/include/sound/hdaudio_ext.h +++ b/include/sound/hdaudio_ext.h @@ -22,6 +22,7 @@ void snd_hdac_ext_bus_ppcap_enable(struct hdac_bus *chip, bool enable); void snd_hdac_ext_bus_ppcap_int_enable(struct hdac_bus *chip, bool enable); int snd_hdac_ext_bus_get_ml_capabilities(struct hdac_bus *bus); +struct hdac_ext_link *snd_hdac_ext_bus_get_hlink_by_id(struct hdac_bus *bus, u32 id); struct hdac_ext_link *snd_hdac_ext_bus_get_hlink_by_addr(struct hdac_bus *bus, int addr); struct hdac_ext_link *snd_hdac_ext_bus_get_hlink_by_name(struct hdac_bus *bus, const char *codec_name); @@ -97,12 +98,16 @@ struct hdac_ext_link { void __iomem *ml_addr; /* link output stream reg pointer */ u32 lcaps; /* link capablities */ u16 lsdiid; /* link sdi identifier */ + u32 id; int ref_count; struct list_head list; }; +#define hdac_ext_link_alt(link) ((link)->lcaps & AZX_ML_HDA_LCAP_ALT) +#define hdac_ext_link_ofls(link) ((link)->lcaps & AZX_ML_HDA_LCAP_OFLS) + int snd_hdac_ext_bus_link_power_up(struct hdac_ext_link *hlink); int snd_hdac_ext_bus_link_power_down(struct hdac_ext_link *hlink); int snd_hdac_ext_bus_link_power_up_all(struct hdac_bus *bus); diff --git a/sound/hda/ext/hdac_ext_controller.c b/sound/hda/ext/hdac_ext_controller.c index 6199bb60ccf00f..2ec1531d1c1b51 100644 --- a/sound/hda/ext/hdac_ext_controller.c +++ b/sound/hda/ext/hdac_ext_controller.c @@ -9,6 +9,7 @@ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +#include #include #include #include @@ -81,6 +82,7 @@ int snd_hdac_ext_bus_get_ml_capabilities(struct hdac_bus *bus) int idx; u32 link_count; struct hdac_ext_link *hlink; + u32 leptr; link_count = readl(bus->mlcap + AZX_REG_ML_MLCD) + 1; @@ -97,6 +99,11 @@ int snd_hdac_ext_bus_get_ml_capabilities(struct hdac_bus *bus) hlink->lcaps = readl(hlink->ml_addr + AZX_REG_ML_LCAP); hlink->lsdiid = readw(hlink->ml_addr + AZX_REG_ML_LSDIID); + if (hdac_ext_link_alt(hlink)) { + leptr = readl(hlink->ml_addr + AZX_REG_ML_LEPTR); + hlink->id = FIELD_GET(AZX_REG_ML_LEPTR_ID, leptr); + } + /* since link in On, update the ref */ hlink->ref_count = 1; @@ -125,6 +132,17 @@ void snd_hdac_ext_link_free_all(struct hdac_bus *bus) } EXPORT_SYMBOL_GPL(snd_hdac_ext_link_free_all); +struct hdac_ext_link *snd_hdac_ext_bus_get_hlink_by_id(struct hdac_bus *bus, u32 id) +{ + struct hdac_ext_link *hlink; + + list_for_each_entry(hlink, &bus->hlink_list, list) + if (hdac_ext_link_alt(hlink) && hlink->id == id) + return hlink; + return NULL; +} +EXPORT_SYMBOL_GPL(snd_hdac_ext_bus_get_hlink_by_id); + /** * snd_hdac_ext_bus_get_hlink_by_addr - get hlink at specified address * @bus: hlink's parent bus device From 50279a5447c1a667e6d7d480e45ca3070a55d860 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Mon, 7 Apr 2025 13:23:48 +0200 Subject: [PATCH 2694/2924] ASoC: Intel: avs: PCM operations for LNL-based platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 716643786f140f2d68f22424c75b9198ffe14290 ] Starting from LNL platform the so-called non-HDAudio transfer types, e.g.: I2S/DMIC, utilize HDAudio LINK DMA rather than GPDMA for the data streaming. In essence, all transfer types now utilize HDAudio Link. Most of the existing code can be reused with the major difference being HDAudio Link query method: - fetch the Link by codec.addr in standard HDAudio transfer case - fetch the Link by LEPTR.ID in non-HDAudio transfer case To make the unification happen, store pointer to the Link in dma_data and utilize it in the common code. And to avoid confusion in transfer-type naming between cAVS-ACE 1.x (SkyLake till MeteorLake) and ACE 2.0+ architecture (LunarLake onward), use: - 'hda' for typical HDAudio transfer case - 'nonhda' for non-HDAudio transfer case, cAVS-ACE 1.x - 'althda' for non-HDAudio transfer case, ACE 2.0+ Reviewed-by: Amadeusz Sławiński Signed-off-by: Cezary Rojewski Acked-by: Liam Girdwood Link: https://patch.msgid.link/20250407112352.3720779-7-cezary.rojewski@intel.com Signed-off-by: Mark Brown Stable-dep-of: 347c8d6db7c9 ("ASoC: Intel: avs: Fix PPLCxFMT calculation") Signed-off-by: Sasha Levin --- sound/soc/intel/avs/pcm.c | 116 ++++++++++++++++++++++++++++---------- 1 file changed, 85 insertions(+), 31 deletions(-) diff --git a/sound/soc/intel/avs/pcm.c b/sound/soc/intel/avs/pcm.c index d83ef504643bbb..6116465c8f9b12 100644 --- a/sound/soc/intel/avs/pcm.c +++ b/sound/soc/intel/avs/pcm.c @@ -36,6 +36,7 @@ struct avs_dma_data { struct snd_pcm_hw_constraint_list sample_bits_list; struct work_struct period_elapsed_work; + struct hdac_ext_link *link; struct snd_pcm_substream *substream; }; @@ -325,32 +326,75 @@ static const struct snd_soc_dai_ops avs_dai_nonhda_be_ops = { .trigger = avs_dai_nonhda_be_trigger, }; -static int avs_dai_hda_be_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) +static int __avs_dai_hda_be_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai, + struct hdac_ext_link *link) { - struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream); struct hdac_ext_stream *link_stream; struct avs_dma_data *data; - struct hda_codec *codec; int ret; ret = avs_dai_startup(substream, dai); if (ret) return ret; - codec = dev_to_hda_codec(snd_soc_rtd_to_codec(rtd, 0)->dev); - link_stream = snd_hdac_ext_stream_assign(&codec->bus->core, substream, + data = snd_soc_dai_get_dma_data(dai, substream); + link_stream = snd_hdac_ext_stream_assign(&data->adev->base.core, substream, HDAC_EXT_STREAM_TYPE_LINK); if (!link_stream) { avs_dai_shutdown(substream, dai); return -EBUSY; } - data = snd_soc_dai_get_dma_data(dai, substream); data->link_stream = link_stream; - substream->runtime->private_data = link_stream; + data->link = link; return 0; } +static int avs_dai_hda_be_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) +{ + struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream); + struct hdac_ext_link *link; + struct avs_dma_data *data; + struct hda_codec *codec; + int ret; + + codec = dev_to_hda_codec(snd_soc_rtd_to_codec(rtd, 0)->dev); + + link = snd_hdac_ext_bus_get_hlink_by_addr(&codec->bus->core, codec->core.addr); + if (!link) + return -EINVAL; + + ret = __avs_dai_hda_be_startup(substream, dai, link); + if (!ret) { + data = snd_soc_dai_get_dma_data(dai, substream); + substream->runtime->private_data = data->link_stream; + } + + return ret; +} + +static int avs_dai_i2shda_be_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) +{ + struct avs_dev *adev = to_avs_dev(dai->component->dev); + struct hdac_ext_link *link; + + link = snd_hdac_ext_bus_get_hlink_by_id(&adev->base.core, AZX_REG_ML_LEPTR_ID_INTEL_SSP); + if (!link) + return -EINVAL; + return __avs_dai_hda_be_startup(substream, dai, link); +} + +static int avs_dai_dmichda_be_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) +{ + struct avs_dev *adev = to_avs_dev(dai->component->dev); + struct hdac_ext_link *link; + + link = snd_hdac_ext_bus_get_hlink_by_id(&adev->base.core, AZX_REG_ML_LEPTR_ID_INTEL_DMIC); + if (!link) + return -EINVAL; + return __avs_dai_hda_be_startup(substream, dai, link); +} + static void avs_dai_hda_be_shutdown(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct avs_dma_data *data = snd_soc_dai_get_dma_data(dai, substream); @@ -360,6 +404,14 @@ static void avs_dai_hda_be_shutdown(struct snd_pcm_substream *substream, struct avs_dai_shutdown(substream, dai); } +static void avs_dai_althda_be_shutdown(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) +{ + struct avs_dma_data *data = snd_soc_dai_get_dma_data(dai, substream); + + snd_hdac_ext_stream_release(data->link_stream, HDAC_EXT_STREAM_TYPE_LINK); + avs_dai_shutdown(substream, dai); +} + static int avs_dai_hda_be_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *hw_params, struct snd_soc_dai *dai) { @@ -375,13 +427,8 @@ static int avs_dai_hda_be_hw_params(struct snd_pcm_substream *substream, static int avs_dai_hda_be_hw_free(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { - struct avs_dma_data *data; - struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream); struct hdac_ext_stream *link_stream; - struct hdac_ext_link *link; - struct hda_codec *codec; - - dev_dbg(dai->dev, "%s: %s\n", __func__, dai->name); + struct avs_dma_data *data; data = snd_soc_dai_get_dma_data(dai, substream); if (!data->path) @@ -393,27 +440,19 @@ static int avs_dai_hda_be_hw_free(struct snd_pcm_substream *substream, struct sn data->path = NULL; /* clear link <-> stream mapping */ - codec = dev_to_hda_codec(snd_soc_rtd_to_codec(rtd, 0)->dev); - link = snd_hdac_ext_bus_get_hlink_by_addr(&codec->bus->core, codec->core.addr); - if (!link) - return -EINVAL; - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - snd_hdac_ext_bus_link_clear_stream_id(link, hdac_stream(link_stream)->stream_tag); + snd_hdac_ext_bus_link_clear_stream_id(data->link, + hdac_stream(link_stream)->stream_tag); return 0; } static int avs_dai_hda_be_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { - struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream); struct snd_pcm_runtime *runtime = substream->runtime; const struct snd_soc_pcm_stream *stream_info; struct hdac_ext_stream *link_stream; - struct hdac_ext_link *link; struct avs_dma_data *data; - struct hda_codec *codec; - struct hdac_bus *bus; unsigned int format_val; unsigned int bits; int ret; @@ -424,23 +463,18 @@ static int avs_dai_hda_be_prepare(struct snd_pcm_substream *substream, struct sn if (link_stream->link_prepared) return 0; - codec = dev_to_hda_codec(snd_soc_rtd_to_codec(rtd, 0)->dev); - bus = &codec->bus->core; stream_info = snd_soc_dai_get_pcm_stream(dai, substream->stream); bits = snd_hdac_stream_format_bits(runtime->format, runtime->subformat, stream_info->sig_bits); format_val = snd_hdac_stream_format(runtime->channels, bits, runtime->rate); - snd_hdac_ext_stream_decouple(bus, link_stream, true); + snd_hdac_ext_stream_decouple(&data->adev->base.core, link_stream, true); snd_hdac_ext_stream_reset(link_stream); snd_hdac_ext_stream_setup(link_stream, format_val); - link = snd_hdac_ext_bus_get_hlink_by_addr(bus, codec->core.addr); - if (!link) - return -EINVAL; - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - snd_hdac_ext_bus_link_set_stream_id(link, hdac_stream(link_stream)->stream_tag); + snd_hdac_ext_bus_link_set_stream_id(data->link, + hdac_stream(link_stream)->stream_tag); ret = avs_dai_prepare(substream, dai); if (ret) @@ -515,6 +549,26 @@ static const struct snd_soc_dai_ops avs_dai_hda_be_ops = { .trigger = avs_dai_hda_be_trigger, }; +__maybe_unused +static const struct snd_soc_dai_ops avs_dai_i2shda_be_ops = { + .startup = avs_dai_i2shda_be_startup, + .shutdown = avs_dai_althda_be_shutdown, + .hw_params = avs_dai_hda_be_hw_params, + .hw_free = avs_dai_hda_be_hw_free, + .prepare = avs_dai_hda_be_prepare, + .trigger = avs_dai_hda_be_trigger, +}; + +__maybe_unused +static const struct snd_soc_dai_ops avs_dai_dmichda_be_ops = { + .startup = avs_dai_dmichda_be_startup, + .shutdown = avs_dai_althda_be_shutdown, + .hw_params = avs_dai_hda_be_hw_params, + .hw_free = avs_dai_hda_be_hw_free, + .prepare = avs_dai_hda_be_prepare, + .trigger = avs_dai_hda_be_trigger, +}; + static int hw_rule_param_size(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { struct snd_interval *interval = hw_param_interval(params, rule->var); From 61dc63da2b1a899b5e3a6091b914ccad64708765 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Fri, 30 May 2025 16:10:19 +0200 Subject: [PATCH 2695/2924] ASoC: Intel: avs: Fix PPLCxFMT calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 347c8d6db7c9d65d93ef226849b273823f54eaea ] HDAudio transfer types utilize SDxFMT for front-end (HOST) and PPLCxFMT for back-end (LINK) side when setting up the stream. BE's substream->runtime duplicates FE runtime so switch to using BE's hw_params to address incorrect format values on the LINK side when FE and BE formats differ. The problem is introduced with commit d070002a20fc ("ASoC: Intel: avs: HDA PCM BE operations") but the code has been shuffled around since then so direct 'Fixes:' tag does not apply. Reviewed-by: Amadeusz Sławiński Signed-off-by: Cezary Rojewski Link: https://patch.msgid.link/20250530141025.2942936-4-cezary.rojewski@intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/avs/pcm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sound/soc/intel/avs/pcm.c b/sound/soc/intel/avs/pcm.c index 6116465c8f9b12..fc51fa1fd40d2b 100644 --- a/sound/soc/intel/avs/pcm.c +++ b/sound/soc/intel/avs/pcm.c @@ -449,9 +449,10 @@ static int avs_dai_hda_be_hw_free(struct snd_pcm_substream *substream, struct sn static int avs_dai_hda_be_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { - struct snd_pcm_runtime *runtime = substream->runtime; + struct snd_soc_pcm_runtime *be = snd_soc_substream_to_rtd(substream); const struct snd_soc_pcm_stream *stream_info; struct hdac_ext_stream *link_stream; + const struct snd_pcm_hw_params *p; struct avs_dma_data *data; unsigned int format_val; unsigned int bits; @@ -459,14 +460,15 @@ static int avs_dai_hda_be_prepare(struct snd_pcm_substream *substream, struct sn data = snd_soc_dai_get_dma_data(dai, substream); link_stream = data->link_stream; + p = &be->dpcm[substream->stream].hw_params; if (link_stream->link_prepared) return 0; stream_info = snd_soc_dai_get_pcm_stream(dai, substream->stream); - bits = snd_hdac_stream_format_bits(runtime->format, runtime->subformat, + bits = snd_hdac_stream_format_bits(params_format(p), params_subformat(p), stream_info->sig_bits); - format_val = snd_hdac_stream_format(runtime->channels, bits, runtime->rate); + format_val = snd_hdac_stream_format(params_channels(p), bits, params_rate(p)); snd_hdac_ext_stream_decouple(&data->adev->base.core, link_stream, true); snd_hdac_ext_stream_reset(link_stream); From ea218ae05e60616531fe652650b98dcd3c328279 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Fri, 30 May 2025 16:10:20 +0200 Subject: [PATCH 2696/2924] ASoC: Intel: avs: Fix possible null-ptr-deref when initing hw MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2f78724d4f0c665c83e202e3989d5333a2cb1036 ] Search result of avs_dai_find_path_template() shall be verified before being used. As 'template' is already known when avs_hw_constraints_init() is fired, drop the search entirely. Fixes: f2f847461fb7 ("ASoC: Intel: avs: Constrain path based on BE capabilities") Reviewed-by: Amadeusz Sławiński Signed-off-by: Cezary Rojewski Link: https://patch.msgid.link/20250530141025.2942936-5-cezary.rojewski@intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/avs/pcm.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sound/soc/intel/avs/pcm.c b/sound/soc/intel/avs/pcm.c index fc51fa1fd40d2b..5a2330e4e4225d 100644 --- a/sound/soc/intel/avs/pcm.c +++ b/sound/soc/intel/avs/pcm.c @@ -82,10 +82,8 @@ void avs_period_elapsed(struct snd_pcm_substream *substream) static int hw_rule_param_size(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule); static int avs_hw_constraints_init(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { - struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream); struct snd_pcm_runtime *runtime = substream->runtime; struct snd_pcm_hw_constraint_list *r, *c, *s; - struct avs_tplg_path_template *template; struct avs_dma_data *data; int ret; @@ -98,8 +96,7 @@ static int avs_hw_constraints_init(struct snd_pcm_substream *substream, struct s c = &(data->channels_list); s = &(data->sample_bits_list); - template = avs_dai_find_path_template(dai, !rtd->dai_link->no_pcm, substream->stream); - ret = avs_path_set_constraint(data->adev, template, r, c, s); + ret = avs_path_set_constraint(data->adev, data->template, r, c, s); if (ret <= 0) return ret; From 49dcdee529bae3d8c4fe3f061cbf912afed4124a Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Mon, 7 Apr 2025 13:23:44 +0200 Subject: [PATCH 2697/2924] ASoC: Intel: avs: Ignore Vendor-space manipulation for ACE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit acd2563f30886730757062b9b3efe8043daabbc3 ] A number of Vendor Specific registers utilized on cAVS architecture (SkyLake till RaptorLake) are not present on ACE hardware (MeteorLake onward). Similarly, certain recommended procedures do not apply. Adjust existing code to be ACE-friendly. Reviewed-by: Amadeusz Sławiński Signed-off-by: Cezary Rojewski Acked-by: Liam Girdwood Link: https://patch.msgid.link/20250407112352.3720779-3-cezary.rojewski@intel.com Signed-off-by: Mark Brown Stable-dep-of: 9e3285be55e6 ("ASoC: Intel: avs: Fix paths in MODULE_FIRMWARE hints") Signed-off-by: Sasha Levin --- sound/soc/intel/avs/avs.h | 1 + sound/soc/intel/avs/core.c | 13 ++++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/sound/soc/intel/avs/avs.h b/sound/soc/intel/avs/avs.h index 585543f872fccc..91872d1df97a5c 100644 --- a/sound/soc/intel/avs/avs.h +++ b/sound/soc/intel/avs/avs.h @@ -72,6 +72,7 @@ extern const struct avs_dsp_ops avs_tgl_dsp_ops; #define AVS_PLATATTR_CLDMA BIT_ULL(0) #define AVS_PLATATTR_IMR BIT_ULL(1) +#define AVS_PLATATTR_ACE BIT_ULL(2) #define avs_platattr_test(adev, attr) \ ((adev)->spec->attributes & AVS_PLATATTR_##attr) diff --git a/sound/soc/intel/avs/core.c b/sound/soc/intel/avs/core.c index 8fbf33e30dfc3e..72a14dca1a1edf 100644 --- a/sound/soc/intel/avs/core.c +++ b/sound/soc/intel/avs/core.c @@ -54,14 +54,17 @@ void avs_hda_power_gating_enable(struct avs_dev *adev, bool enable) { u32 value = enable ? 0 : pgctl_mask; - avs_hda_update_config_dword(&adev->base.core, AZX_PCIREG_PGCTL, pgctl_mask, value); + if (!avs_platattr_test(adev, ACE)) + avs_hda_update_config_dword(&adev->base.core, AZX_PCIREG_PGCTL, pgctl_mask, value); } static void avs_hdac_clock_gating_enable(struct hdac_bus *bus, bool enable) { + struct avs_dev *adev = hdac_to_avs(bus); u32 value = enable ? cgctl_mask : 0; - avs_hda_update_config_dword(bus, AZX_PCIREG_CGCTL, cgctl_mask, value); + if (!avs_platattr_test(adev, ACE)) + avs_hda_update_config_dword(bus, AZX_PCIREG_CGCTL, cgctl_mask, value); } void avs_hda_clock_gating_enable(struct avs_dev *adev, bool enable) @@ -71,6 +74,8 @@ void avs_hda_clock_gating_enable(struct avs_dev *adev, bool enable) void avs_hda_l1sen_enable(struct avs_dev *adev, bool enable) { + if (avs_platattr_test(adev, ACE)) + return; if (enable) { if (atomic_inc_and_test(&adev->l1sen_counter)) snd_hdac_chip_updatel(&adev->base.core, VS_EM2, AZX_VS_EM2_L1SEN, @@ -99,6 +104,7 @@ static int avs_hdac_bus_init_streams(struct hdac_bus *bus) static bool avs_hdac_bus_init_chip(struct hdac_bus *bus, bool full_reset) { + struct avs_dev *adev = hdac_to_avs(bus); struct hdac_ext_link *hlink; bool ret; @@ -114,7 +120,8 @@ static bool avs_hdac_bus_init_chip(struct hdac_bus *bus, bool full_reset) /* Set DUM bit to address incorrect position reporting for capture * streams. In order to do so, CTRL needs to be out of reset state */ - snd_hdac_chip_updatel(bus, VS_EM2, AZX_VS_EM2_DUM, AZX_VS_EM2_DUM); + if (!avs_platattr_test(adev, ACE)) + snd_hdac_chip_updatel(bus, VS_EM2, AZX_VS_EM2_DUM, AZX_VS_EM2_DUM); return ret; } From 85c3c6cf9a3f6944346e2640c4170e2981cd46b8 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Mon, 7 Apr 2025 13:23:45 +0200 Subject: [PATCH 2698/2924] ASoC: Intel: avs: Read HW capabilities when possible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b9a3ec604993074eb6f5d08b14fb7913d1fae48b ] Starting with LunarLake (LNL) and onward, some hardware capabilities are visible to the sound driver directly. At the same time, these may no longer be visible to the AudioDSP firmware. Update resource allocation function to rely on the registers when possible. Reviewed-by: Amadeusz Sławiński Signed-off-by: Cezary Rojewski Acked-by: Liam Girdwood Link: https://patch.msgid.link/20250407112352.3720779-4-cezary.rojewski@intel.com Signed-off-by: Mark Brown Stable-dep-of: 9e3285be55e6 ("ASoC: Intel: avs: Fix paths in MODULE_FIRMWARE hints") Signed-off-by: Sasha Levin --- include/sound/hdaudio_ext.h | 1 + sound/hda/ext/hdac_ext_controller.c | 1 + sound/soc/intel/avs/avs.h | 1 + sound/soc/intel/avs/loader.c | 9 +++++++++ 4 files changed, 12 insertions(+) diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h index 60ec12e3b72f8d..7de390022ac268 100644 --- a/include/sound/hdaudio_ext.h +++ b/include/sound/hdaudio_ext.h @@ -99,6 +99,7 @@ struct hdac_ext_link { u32 lcaps; /* link capablities */ u16 lsdiid; /* link sdi identifier */ u32 id; + u8 slcount; int ref_count; diff --git a/sound/hda/ext/hdac_ext_controller.c b/sound/hda/ext/hdac_ext_controller.c index 2ec1531d1c1b51..c84754434d1627 100644 --- a/sound/hda/ext/hdac_ext_controller.c +++ b/sound/hda/ext/hdac_ext_controller.c @@ -98,6 +98,7 @@ int snd_hdac_ext_bus_get_ml_capabilities(struct hdac_bus *bus) (AZX_ML_INTERVAL * idx); hlink->lcaps = readl(hlink->ml_addr + AZX_REG_ML_LCAP); hlink->lsdiid = readw(hlink->ml_addr + AZX_REG_ML_LSDIID); + hlink->slcount = FIELD_GET(AZX_ML_HDA_LCAP_SLCOUNT, hlink->lcaps) + 1; if (hdac_ext_link_alt(hlink)) { leptr = readl(hlink->ml_addr + AZX_REG_ML_LEPTR); diff --git a/sound/soc/intel/avs/avs.h b/sound/soc/intel/avs/avs.h index 91872d1df97a5c..201897c5bdc041 100644 --- a/sound/soc/intel/avs/avs.h +++ b/sound/soc/intel/avs/avs.h @@ -73,6 +73,7 @@ extern const struct avs_dsp_ops avs_tgl_dsp_ops; #define AVS_PLATATTR_CLDMA BIT_ULL(0) #define AVS_PLATATTR_IMR BIT_ULL(1) #define AVS_PLATATTR_ACE BIT_ULL(2) +#define AVS_PLATATTR_ALTHDA BIT_ULL(3) #define avs_platattr_test(adev, attr) \ ((adev)->spec->attributes & AVS_PLATATTR_##attr) diff --git a/sound/soc/intel/avs/loader.c b/sound/soc/intel/avs/loader.c index 0b29941feb0ef0..ecf050c2c0c7f4 100644 --- a/sound/soc/intel/avs/loader.c +++ b/sound/soc/intel/avs/loader.c @@ -683,6 +683,7 @@ int avs_dsp_boot_firmware(struct avs_dev *adev, bool purge) static int avs_dsp_alloc_resources(struct avs_dev *adev) { + struct hdac_ext_link *link; int ret, i; ret = avs_ipc_get_hw_config(adev, &adev->hw_cfg); @@ -693,6 +694,14 @@ static int avs_dsp_alloc_resources(struct avs_dev *adev) if (ret) return AVS_IPC_RET(ret); + /* If hw allows, read capabilities directly from it. */ + if (avs_platattr_test(adev, ALTHDA)) { + link = snd_hdac_ext_bus_get_hlink_by_id(&adev->base.core, + AZX_REG_ML_LEPTR_ID_INTEL_SSP); + if (link) + adev->hw_cfg.i2s_caps.ctrl_count = link->slcount; + } + adev->core_refs = devm_kcalloc(adev->dev, adev->hw_cfg.dsp_cores, sizeof(*adev->core_refs), GFP_KERNEL); adev->lib_names = devm_kcalloc(adev->dev, adev->fw_cfg.max_libs_count, From 1b99818b025f41ed307b05c437da8effc9638dfb Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Mon, 7 Apr 2025 13:23:46 +0200 Subject: [PATCH 2699/2924] ASoC: Intel: avs: Relocate DSP status registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 75f3c607b1fa1f4d42cde8377cd2276ab01e287d ] The firmware status and error registers are not part of SRAM on ACE platforms. As these registers take part in IPC on ACE and cAVS platforms both, relocate the field denoting their offset to Host-IPC descriptor. In consequence, code remains cohesive with the ACE specs while still maintaining high readability for the cAVS platforms. Reviewed-by: Amadeusz Sławiński Signed-off-by: Cezary Rojewski Acked-by: Liam Girdwood Link: https://patch.msgid.link/20250407112352.3720779-5-cezary.rojewski@intel.com Signed-off-by: Mark Brown Stable-dep-of: 9e3285be55e6 ("ASoC: Intel: avs: Fix paths in MODULE_FIRMWARE hints") Signed-off-by: Sasha Levin --- sound/soc/intel/avs/avs.h | 2 +- sound/soc/intel/avs/core.c | 18 +++++++++++++++--- sound/soc/intel/avs/loader.c | 2 +- sound/soc/intel/avs/registers.h | 2 +- 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/sound/soc/intel/avs/avs.h b/sound/soc/intel/avs/avs.h index 201897c5bdc041..ec5502f9d5cb1d 100644 --- a/sound/soc/intel/avs/avs.h +++ b/sound/soc/intel/avs/avs.h @@ -81,7 +81,6 @@ extern const struct avs_dsp_ops avs_tgl_dsp_ops; struct avs_sram_spec { const u32 base_offset; const u32 window_size; - const u32 rom_status_offset; }; struct avs_hipc_spec { @@ -93,6 +92,7 @@ struct avs_hipc_spec { const u32 rsp_offset; const u32 rsp_busy_mask; const u32 ctl_offset; + const u32 sts_offset; }; /* Platform specific descriptor */ diff --git a/sound/soc/intel/avs/core.c b/sound/soc/intel/avs/core.c index 72a14dca1a1edf..1495e163d47efe 100644 --- a/sound/soc/intel/avs/core.c +++ b/sound/soc/intel/avs/core.c @@ -755,13 +755,11 @@ static const struct dev_pm_ops avs_dev_pm = { static const struct avs_sram_spec skl_sram_spec = { .base_offset = SKL_ADSP_SRAM_BASE_OFFSET, .window_size = SKL_ADSP_SRAM_WINDOW_SIZE, - .rom_status_offset = SKL_ADSP_SRAM_BASE_OFFSET, }; static const struct avs_sram_spec apl_sram_spec = { .base_offset = APL_ADSP_SRAM_BASE_OFFSET, .window_size = APL_ADSP_SRAM_WINDOW_SIZE, - .rom_status_offset = APL_ADSP_SRAM_BASE_OFFSET, }; static const struct avs_hipc_spec skl_hipc_spec = { @@ -773,6 +771,19 @@ static const struct avs_hipc_spec skl_hipc_spec = { .rsp_offset = SKL_ADSP_REG_HIPCT, .rsp_busy_mask = SKL_ADSP_HIPCT_BUSY, .ctl_offset = SKL_ADSP_REG_HIPCCTL, + .sts_offset = SKL_ADSP_SRAM_BASE_OFFSET, +}; + +static const struct avs_hipc_spec apl_hipc_spec = { + .req_offset = SKL_ADSP_REG_HIPCI, + .req_ext_offset = SKL_ADSP_REG_HIPCIE, + .req_busy_mask = SKL_ADSP_HIPCI_BUSY, + .ack_offset = SKL_ADSP_REG_HIPCIE, + .ack_done_mask = SKL_ADSP_HIPCIE_DONE, + .rsp_offset = SKL_ADSP_REG_HIPCT, + .rsp_busy_mask = SKL_ADSP_HIPCT_BUSY, + .ctl_offset = SKL_ADSP_REG_HIPCCTL, + .sts_offset = APL_ADSP_SRAM_BASE_OFFSET, }; static const struct avs_hipc_spec cnl_hipc_spec = { @@ -784,6 +795,7 @@ static const struct avs_hipc_spec cnl_hipc_spec = { .rsp_offset = CNL_ADSP_REG_HIPCTDR, .rsp_busy_mask = CNL_ADSP_HIPCTDR_BUSY, .ctl_offset = CNL_ADSP_REG_HIPCCTL, + .sts_offset = APL_ADSP_SRAM_BASE_OFFSET, }; static const struct avs_spec skl_desc = { @@ -803,7 +815,7 @@ static const struct avs_spec apl_desc = { .core_init_mask = 3, .attributes = AVS_PLATATTR_IMR, .sram = &apl_sram_spec, - .hipc = &skl_hipc_spec, + .hipc = &apl_hipc_spec, }; static const struct avs_spec cnl_desc = { diff --git a/sound/soc/intel/avs/loader.c b/sound/soc/intel/avs/loader.c index ecf050c2c0c7f4..138e4e9de5e309 100644 --- a/sound/soc/intel/avs/loader.c +++ b/sound/soc/intel/avs/loader.c @@ -310,7 +310,7 @@ avs_hda_init_rom(struct avs_dev *adev, unsigned int dma_id, bool purge) } /* await ROM init */ - ret = snd_hdac_adsp_readl_poll(adev, spec->sram->rom_status_offset, reg, + ret = snd_hdac_adsp_readl_poll(adev, spec->hipc->sts_offset, reg, (reg & 0xF) == AVS_ROM_INIT_DONE || (reg & 0xF) == APL_ROM_FW_ENTERED, AVS_ROM_INIT_POLLING_US, APL_ROM_INIT_TIMEOUT_US); diff --git a/sound/soc/intel/avs/registers.h b/sound/soc/intel/avs/registers.h index 368ede05f2cdaa..4db0cdf68ffc7a 100644 --- a/sound/soc/intel/avs/registers.h +++ b/sound/soc/intel/avs/registers.h @@ -74,7 +74,7 @@ #define APL_ADSP_SRAM_WINDOW_SIZE 0x20000 /* Constants used when accessing SRAM, space shared with firmware */ -#define AVS_FW_REG_BASE(adev) ((adev)->spec->sram->base_offset) +#define AVS_FW_REG_BASE(adev) ((adev)->spec->hipc->sts_offset) #define AVS_FW_REG_STATUS(adev) (AVS_FW_REG_BASE(adev) + 0x0) #define AVS_FW_REG_ERROR(adev) (AVS_FW_REG_BASE(adev) + 0x4) From 6e7e072fdfc3a5e2db3c3f7837956ad9419a3c4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?= Date: Fri, 30 May 2025 16:10:21 +0200 Subject: [PATCH 2700/2924] ASoC: Intel: avs: Fix paths in MODULE_FIRMWARE hints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9e3285be55e6c0829e451b4a341e3059da47ec9d ] The binaries for cAVS architecture are located in "intel/avs" subdirectory, not "intel". Fixes: 94aa347d34e0 ("ASoC: Intel: avs: Add MODULE_FIRMWARE to inform about FW") Reviewed-by: Cezary Rojewski Signed-off-by: Amadeusz Sławiński Signed-off-by: Cezary Rojewski Link: https://patch.msgid.link/20250530141025.2942936-6-cezary.rojewski@intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/avs/core.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sound/soc/intel/avs/core.c b/sound/soc/intel/avs/core.c index 1495e163d47efe..cbbc656fcc3f86 100644 --- a/sound/soc/intel/avs/core.c +++ b/sound/soc/intel/avs/core.c @@ -921,13 +921,13 @@ MODULE_AUTHOR("Cezary Rojewski "); MODULE_AUTHOR("Amadeusz Slawinski "); MODULE_DESCRIPTION("Intel cAVS sound driver"); MODULE_LICENSE("GPL"); -MODULE_FIRMWARE("intel/skl/dsp_basefw.bin"); -MODULE_FIRMWARE("intel/apl/dsp_basefw.bin"); -MODULE_FIRMWARE("intel/cnl/dsp_basefw.bin"); -MODULE_FIRMWARE("intel/icl/dsp_basefw.bin"); -MODULE_FIRMWARE("intel/jsl/dsp_basefw.bin"); -MODULE_FIRMWARE("intel/lkf/dsp_basefw.bin"); -MODULE_FIRMWARE("intel/tgl/dsp_basefw.bin"); -MODULE_FIRMWARE("intel/ehl/dsp_basefw.bin"); -MODULE_FIRMWARE("intel/adl/dsp_basefw.bin"); -MODULE_FIRMWARE("intel/adl_n/dsp_basefw.bin"); +MODULE_FIRMWARE("intel/avs/skl/dsp_basefw.bin"); +MODULE_FIRMWARE("intel/avs/apl/dsp_basefw.bin"); +MODULE_FIRMWARE("intel/avs/cnl/dsp_basefw.bin"); +MODULE_FIRMWARE("intel/avs/icl/dsp_basefw.bin"); +MODULE_FIRMWARE("intel/avs/jsl/dsp_basefw.bin"); +MODULE_FIRMWARE("intel/avs/lkf/dsp_basefw.bin"); +MODULE_FIRMWARE("intel/avs/tgl/dsp_basefw.bin"); +MODULE_FIRMWARE("intel/avs/ehl/dsp_basefw.bin"); +MODULE_FIRMWARE("intel/avs/adl/dsp_basefw.bin"); +MODULE_FIRMWARE("intel/avs/adl_n/dsp_basefw.bin"); From b234af87175a7d6663f03e638a66fe2e884b9323 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Fri, 30 May 2025 16:10:22 +0200 Subject: [PATCH 2701/2924] ASoC: Intel: avs: Verify kcalloc() status when setting constraints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5f342aeee2724d31046172eb5caab8e0e8afd57d ] All memory operations shall be checked. Fixes: f2f847461fb7 ("ASoC: Intel: avs: Constrain path based on BE capabilities") Reviewed-by: Amadeusz Sławiński Signed-off-by: Cezary Rojewski Link: https://patch.msgid.link/20250530141025.2942936-7-cezary.rojewski@intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/avs/path.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/intel/avs/path.c b/sound/soc/intel/avs/path.c index 8f1bf8d0af8f99..43b3d995391072 100644 --- a/sound/soc/intel/avs/path.c +++ b/sound/soc/intel/avs/path.c @@ -134,6 +134,8 @@ int avs_path_set_constraint(struct avs_dev *adev, struct avs_tplg_path_template rlist = kcalloc(i, sizeof(*rlist), GFP_KERNEL); clist = kcalloc(i, sizeof(*clist), GFP_KERNEL); slist = kcalloc(i, sizeof(*slist), GFP_KERNEL); + if (!rlist || !clist || !slist) + return -ENOMEM; i = 0; list_for_each_entry(path_template, &template->path_list, node) { From 2916794ffbce604cc2cda105f6b8a4a7c748dd7f Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Fri, 30 May 2025 16:10:23 +0200 Subject: [PATCH 2702/2924] ASoC: Intel: avs: Verify content returned by parse_int_array() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 93e246b6769bdacb09cfff4ea0f00fe5ab4f0d7a ] The first element of the returned array stores its length. If it is 0, any manipulation beyond the element at index 0 ends with null-ptr-deref. Fixes: 5a565ba23abe ("ASoC: Intel: avs: Probing and firmware tracing over debugfs") Reviewed-by: Amadeusz Sławiński Signed-off-by: Cezary Rojewski Link: https://patch.msgid.link/20250530141025.2942936-8-cezary.rojewski@intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/avs/debugfs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/soc/intel/avs/debugfs.c b/sound/soc/intel/avs/debugfs.c index 8c4edda97f757f..0e826ca20619ca 100644 --- a/sound/soc/intel/avs/debugfs.c +++ b/sound/soc/intel/avs/debugfs.c @@ -373,7 +373,10 @@ static ssize_t trace_control_write(struct file *file, const char __user *from, s return ret; num_elems = *array; - resource_mask = array[1]; + if (!num_elems) { + ret = -EINVAL; + goto free_array; + } /* * Disable if just resource mask is provided - no log priority flags. @@ -381,6 +384,7 @@ static ssize_t trace_control_write(struct file *file, const char __user *from, s * Enable input format: mask, prio1, .., prioN * Where 'N' equals number of bits set in the 'mask'. */ + resource_mask = array[1]; if (num_elems == 1) { ret = disable_logs(adev, resource_mask); } else { From 69c71b15802a291587c3fb512941c4b3f253f7cf Mon Sep 17 00:00:00 2001 From: Yuuki NAGAO Date: Sat, 31 May 2025 23:13:41 +0900 Subject: [PATCH 2703/2924] ASoC: ti: omap-hdmi: Re-add dai_link->platform to fix card init [ Upstream commit bae071aa7bcd034054cec91666c80f812adeccd9 ] The removed dai_link->platform component cause a fail which is exposed at runtime. (ex: when a sound tool is used) This patch re-adds the dai_link->platform component to have a full card registered. Before this patch: $ aplay -l **** List of PLAYBACK Hardware Devices **** card 1: HDMI [HDMI], device 0: HDMI snd-soc-dummy-dai-0 [] Subdevices: 1/1 Subdevice #0: subdevice #0 $ speaker-test -D plughw:1,0 -t sine speaker-test 1.2.8 Playback device is plughw:1,0 Stream parameters are 48000Hz, S16_LE, 1 channels Sine wave rate is 440.0000Hz Playback open error: -22,Invalid argument After this patch which restores the platform component: $ aplay -l **** List of PLAYBACK Hardware Devices **** card 0: HDMI [HDMI], device 0: HDMI snd-soc-dummy-dai-0 [HDMI snd-soc-dummy-dai-0] Subdevices: 0/1 Subdevice #0: subdevice #0 -> Resolve the playback error. Fixes: 3b0db249cf8f ("ASoC: ti: remove unnecessary dai_link->platform") Signed-off-by: Yuuki NAGAO Link: https://patch.msgid.link/20250531141341.81164-1-wf.yn386@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/ti/omap-hdmi.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/soc/ti/omap-hdmi.c b/sound/soc/ti/omap-hdmi.c index cf43ac19c4a6d0..55e7cb96858fca 100644 --- a/sound/soc/ti/omap-hdmi.c +++ b/sound/soc/ti/omap-hdmi.c @@ -361,17 +361,20 @@ static int omap_hdmi_audio_probe(struct platform_device *pdev) if (!card->dai_link) return -ENOMEM; - compnent = devm_kzalloc(dev, sizeof(*compnent), GFP_KERNEL); + compnent = devm_kzalloc(dev, 2 * sizeof(*compnent), GFP_KERNEL); if (!compnent) return -ENOMEM; - card->dai_link->cpus = compnent; + card->dai_link->cpus = &compnent[0]; card->dai_link->num_cpus = 1; card->dai_link->codecs = &snd_soc_dummy_dlc; card->dai_link->num_codecs = 1; + card->dai_link->platforms = &compnent[1]; + card->dai_link->num_platforms = 1; card->dai_link->name = card->name; card->dai_link->stream_name = card->name; card->dai_link->cpus->dai_name = dev_name(ad->dssdev); + card->dai_link->platforms->name = dev_name(ad->dssdev); card->num_links = 1; card->dev = dev; From 43934322cda9a5130d888513e4825735bc1f53ca Mon Sep 17 00:00:00 2001 From: Nitesh Shetty Date: Mon, 28 Apr 2025 15:28:48 +0530 Subject: [PATCH 2704/2924] iov_iter: use iov_offset for length calculation in iov_iter_aligned_bvec [ Upstream commit 334d7c4fb60cf21e0abac134d92fe49e9b04377e ] If iov_offset is non-zero, then we need to consider iov_offset in length calculation, otherwise we might pass smaller IOs such as 512 bytes, in below scenario [1]. This issue is reproducible using lib-uring test/fixed-seg.c application with fixed buffer on a 512 LBA formatted device. [1] At present we pass the alignment check, for 512 LBA formatted devices, len_mask = 511 when IO is smaller, i->count = 512 has an offset, i->io_offset = 3584 with bvec values, bvec->bv_offset = 256, bvec->bv_len = 3840. In short, the first 256 bytes are in the current page, next 256 bytes are in the another page. Ideally we expect to fail the IO. I can think of 2 userspace scenarios where we experience this. a: From userspace, we observe a different behaviour when device LBA size is 512 vs 4096 bytes. For 4096 LBA formatted device, I see the same liburing test [2] failing, whereas 512 the test passes without this. This is reproducible everytime. [2] https://github.com/axboe/liburing/ b: Although I was not able to reproduce the below condition, but I suspect below case should be possible from user space for devices with 512 LBA formatted device. Lets say from userspace while allocating a virtually single chunk of memory, if we get 2 physical chunk of memory, and IO happens to be at the boundary of first physical chunk with length crossing first chunk, then we allow IOs to proceed and hence we might map wrong physical address length and proceed with IO rather than failing. : --- a/test/fixed-seg.c : +++ b/test/fixed-seg.c : @@ -64,7 +64,7 @@ static int test(struct io_uring *ring, int fd, int : vec_off) : return T_EXIT_FAIL; : } : : - ret = read_it(ring, fd, 4096, vec_off); : + ret = read_it(ring, fd, 4096, 7*512 + 256); : if (ret) { : fprintf(stderr, "4096 0 failed\n"); : return T_EXIT_FAIL; Effectively this is a write crossing the page boundary. Link: https://lkml.kernel.org/r/20250428095849.11709-1-nj.shetty@samsung.com Fixes: 2263639f96f2 ("iov_iter: streamline iovec/bvec alignment iteration") Reviewed-by: Jens Axboe Reviewed-by: Anuj Gupta Signed-off-by: Nitesh Shetty Cc: Al Viro Cc: Christian Brauner Cc: Keith Busch Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- lib/iov_iter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index bc9391e55d57ea..9ce83ab71bacd8 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -820,7 +820,7 @@ static bool iov_iter_aligned_bvec(const struct iov_iter *i, unsigned addr_mask, size_t size = i->count; do { - size_t len = bvec->bv_len; + size_t len = bvec->bv_len - skip; if (len > size) len = size; From 287c7d34eedd37af1272dfb3b6e8656f4f026424 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 1 Jun 2025 14:23:52 -0400 Subject: [PATCH 2705/2924] fs/fhandle.c: fix a race in call of has_locked_children() [ Upstream commit 1f282cdc1d219c4a557f7009e81bc792820d9d9a ] may_decode_fh() is calling has_locked_children() while holding no locks. That's an oopsable race... The rest of the callers are safe since they are holding namespace_sem and are guaranteed a positive refcount on the mount in question. Rename the current has_locked_children() to __has_locked_children(), make it static and switch the fs/namespace.c users to it. Make has_locked_children() a wrapper for __has_locked_children(), calling the latter under read_seqlock_excl(&mount_lock). Reviewed-by: Christian Brauner Reviewed-by: Jeff Layton Fixes: 620c266f3949 ("fhandle: relax open_by_handle_at() permission checks") Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- fs/namespace.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 1b466c54a357d1..216807f772cd28 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2424,7 +2424,7 @@ void drop_collected_mounts(struct vfsmount *mnt) namespace_unlock(); } -bool has_locked_children(struct mount *mnt, struct dentry *dentry) +static bool __has_locked_children(struct mount *mnt, struct dentry *dentry) { struct mount *child; @@ -2438,6 +2438,16 @@ bool has_locked_children(struct mount *mnt, struct dentry *dentry) return false; } +bool has_locked_children(struct mount *mnt, struct dentry *dentry) +{ + bool res; + + read_seqlock_excl(&mount_lock); + res = __has_locked_children(mnt, dentry); + read_sequnlock_excl(&mount_lock); + return res; +} + /* * Check that there aren't references to earlier/same mount namespaces in the * specified subtree. Such references can act as pins for mount namespaces @@ -2498,7 +2508,7 @@ struct vfsmount *clone_private_mount(const struct path *path) return ERR_PTR(-EINVAL); } - if (has_locked_children(old_mnt, path->dentry)) + if (__has_locked_children(old_mnt, path->dentry)) return ERR_PTR(-EINVAL); new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE); @@ -3035,7 +3045,7 @@ static struct mount *__do_loopback(struct path *old_path, int recurse) if (!may_copy_tree(old_path)) return mnt; - if (!recurse && has_locked_children(old, old_path->dentry)) + if (!recurse && __has_locked_children(old, old_path->dentry)) return mnt; if (recurse) @@ -3428,7 +3438,7 @@ static int do_set_group(struct path *from_path, struct path *to_path) goto out; /* From mount should not have locked children in place of To's root */ - if (has_locked_children(from, to->mnt.mnt_root)) + if (__has_locked_children(from, to->mnt.mnt_root)) goto out; /* Setting sharing groups is only allowed on private mounts */ From 65b107ec5b9daba5f60e1ba96172b5b58df60ad0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 1 Jun 2025 14:02:26 -0400 Subject: [PATCH 2706/2924] path_overmount(): avoid false negatives [ Upstream commit 5f31c549382bcddbbd754c72c5433b19420d485d ] Holding namespace_sem is enough to make sure that result remains valid. It is *not* enough to avoid false negatives from __lookup_mnt(). Mounts can be unhashed outside of namespace_sem (stuck children getting detached on final mntput() of lazy-umounted mount) and having an unrelated mount removed from the hash chain while we traverse it may end up with false negative from __lookup_mnt(). We need to sample and recheck the seqlock component of mount_lock... Bug predates the introduction of path_overmount() - it had come from the code in finish_automount() that got abstracted into that helper. Reviewed-by: Christian Brauner Fixes: 26df6034fdb2 ("fix automount/automount race properly") Fixes: 6ac392815628 ("fs: allow to mount beneath top mount") Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- fs/namespace.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 216807f772cd28..cb5126b06dcb95 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3477,18 +3477,25 @@ static int do_set_group(struct path *from_path, struct path *to_path) * Check if path is overmounted, i.e., if there's a mount on top of * @path->mnt with @path->dentry as mountpoint. * - * Context: This function expects namespace_lock() to be held. + * Context: namespace_sem must be held at least shared. + * MUST NOT be called under lock_mount_hash() (there one should just + * call __lookup_mnt() and check if it returns NULL). * Return: If path is overmounted true is returned, false if not. */ static inline bool path_overmounted(const struct path *path) { + unsigned seq = read_seqbegin(&mount_lock); + bool no_child; + rcu_read_lock(); - if (unlikely(__lookup_mnt(path->mnt, path->dentry))) { - rcu_read_unlock(); - return true; - } + no_child = !__lookup_mnt(path->mnt, path->dentry); rcu_read_unlock(); - return false; + if (need_seqretry(&mount_lock, seq)) { + read_seqlock_excl(&mount_lock); + no_child = !__lookup_mnt(path->mnt, path->dentry); + read_sequnlock_excl(&mount_lock); + } + return unlikely(!no_child); } /** From 8fb8dd7505a9960e8c7122e3b19a4bb6aedcf1f1 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Wed, 7 May 2025 15:34:01 -0700 Subject: [PATCH 2707/2924] fs: convert mount flags to enum [ Upstream commit 101f2bbab541116ab861b9c3ac0ece07a7eaa756 ] In prior kernel versions (5.8-6.8), commit 9f6c61f96f2d9 ("proc/mounts: add cursor") introduced MNT_CURSOR, a flag used by readers from /proc/mounts to keep their place while reading the file. Later, commit 2eea9ce4310d8 ("mounts: keep list of mounts in an rbtree") removed this flag and its value has since been repurposed. For debuggers iterating over the list of mounts, cursors should be skipped as they are irrelevant. Detecting whether an element is a cursor can be difficult. Since the MNT_CURSOR flag is a preprocessor constant, it's not present in debuginfo, and since its value is repurposed, we cannot hard-code it. For this specific issue, cursors are possible to detect in other ways, but ideally, we would be able to read the mount flag definitions out of the debuginfo. For that reason, convert the mount flags to an enum. Link: https://github.com/osandov/drgn/pull/496 Signed-off-by: Stephen Brennan Link: https://lore.kernel.org/20250507223402.2795029-1-stephen.s.brennan@oracle.com Signed-off-by: Christian Brauner Stable-dep-of: bab77c0d191e ("finish_automount(): don't leak MNT_LOCKED from parent to child") Signed-off-by: Sasha Levin --- include/linux/mount.h | 87 ++++++++++++++++++++++--------------------- 1 file changed, 45 insertions(+), 42 deletions(-) diff --git a/include/linux/mount.h b/include/linux/mount.h index dcc17ce8a959e0..6904ad33ee7a32 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -22,48 +22,51 @@ struct fs_context; struct file; struct path; -#define MNT_NOSUID 0x01 -#define MNT_NODEV 0x02 -#define MNT_NOEXEC 0x04 -#define MNT_NOATIME 0x08 -#define MNT_NODIRATIME 0x10 -#define MNT_RELATIME 0x20 -#define MNT_READONLY 0x40 /* does the user want this to be r/o? */ -#define MNT_NOSYMFOLLOW 0x80 - -#define MNT_SHRINKABLE 0x100 -#define MNT_WRITE_HOLD 0x200 - -#define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */ -#define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */ -/* - * MNT_SHARED_MASK is the set of flags that should be cleared when a - * mount becomes shared. Currently, this is only the flag that says a - * mount cannot be bind mounted, since this is how we create a mount - * that shares events with another mount. If you add a new MNT_* - * flag, consider how it interacts with shared mounts. - */ -#define MNT_SHARED_MASK (MNT_UNBINDABLE) -#define MNT_USER_SETTABLE_MASK (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \ - | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \ - | MNT_READONLY | MNT_NOSYMFOLLOW) -#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME ) - -#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \ - MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED) - -#define MNT_INTERNAL 0x4000 - -#define MNT_LOCK_ATIME 0x040000 -#define MNT_LOCK_NOEXEC 0x080000 -#define MNT_LOCK_NOSUID 0x100000 -#define MNT_LOCK_NODEV 0x200000 -#define MNT_LOCK_READONLY 0x400000 -#define MNT_LOCKED 0x800000 -#define MNT_DOOMED 0x1000000 -#define MNT_SYNC_UMOUNT 0x2000000 -#define MNT_MARKED 0x4000000 -#define MNT_UMOUNT 0x8000000 +enum mount_flags { + MNT_NOSUID = 0x01, + MNT_NODEV = 0x02, + MNT_NOEXEC = 0x04, + MNT_NOATIME = 0x08, + MNT_NODIRATIME = 0x10, + MNT_RELATIME = 0x20, + MNT_READONLY = 0x40, /* does the user want this to be r/o? */ + MNT_NOSYMFOLLOW = 0x80, + + MNT_SHRINKABLE = 0x100, + MNT_WRITE_HOLD = 0x200, + + MNT_SHARED = 0x1000, /* if the vfsmount is a shared mount */ + MNT_UNBINDABLE = 0x2000, /* if the vfsmount is a unbindable mount */ + + MNT_INTERNAL = 0x4000, + + MNT_LOCK_ATIME = 0x040000, + MNT_LOCK_NOEXEC = 0x080000, + MNT_LOCK_NOSUID = 0x100000, + MNT_LOCK_NODEV = 0x200000, + MNT_LOCK_READONLY = 0x400000, + MNT_LOCKED = 0x800000, + MNT_DOOMED = 0x1000000, + MNT_SYNC_UMOUNT = 0x2000000, + MNT_MARKED = 0x4000000, + MNT_UMOUNT = 0x8000000, + + /* + * MNT_SHARED_MASK is the set of flags that should be cleared when a + * mount becomes shared. Currently, this is only the flag that says a + * mount cannot be bind mounted, since this is how we create a mount + * that shares events with another mount. If you add a new MNT_* + * flag, consider how it interacts with shared mounts. + */ + MNT_SHARED_MASK = MNT_UNBINDABLE, + MNT_USER_SETTABLE_MASK = MNT_NOSUID | MNT_NODEV | MNT_NOEXEC + | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME + | MNT_READONLY | MNT_NOSYMFOLLOW, + MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME, + + MNT_INTERNAL_FLAGS = MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | + MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED, +}; struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ From 11b8e4e742dea9375152da4cab8fefcf230f1f48 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 4 May 2025 13:28:37 -0400 Subject: [PATCH 2708/2924] finish_automount(): don't leak MNT_LOCKED from parent to child [ Upstream commit bab77c0d191e241d2d59a845c7ed68bfa6e1b257 ] Intention for MNT_LOCKED had always been to protect the internal mountpoints within a subtree that got copied across the userns boundary, not the mountpoint that tree got attached to - after all, it _was_ exposed before the copying. For roots of secondary copies that is enforced in attach_recursive_mnt() - MNT_LOCKED is explicitly stripped for those. For the root of primary copy we are almost always guaranteed that MNT_LOCKED won't be there, so attach_recursive_mnt() doesn't bother. Unfortunately, one call chain got overlooked - triggering e.g. NFS referral will have the submount inherit the public flags from parent; that's fine for such things as read-only, nosuid, etc., but not for MNT_LOCKED. This is particularly pointless since the mount attached by finish_automount() is usually expirable, which makes any protection granted by MNT_LOCKED null and void; just wait for a while and that mount will go away on its own. Include MNT_LOCKED into the set of flags to be ignored by do_add_mount() - it really is an internal flag. Reviewed-by: Christian Brauner Fixes: 5ff9d8a65ce8 ("vfs: Lock in place mounts from more privileged users") Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- include/linux/mount.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/mount.h b/include/linux/mount.h index 6904ad33ee7a32..1a3136e53eaa07 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -65,7 +65,8 @@ enum mount_flags { MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME, MNT_INTERNAL_FLAGS = MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | - MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED, + MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | + MNT_LOCKED, }; struct vfsmount { From 327d444c951752fcba2e960eef4c0226ba4d0edc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 3 Jun 2025 17:57:27 -0400 Subject: [PATCH 2709/2924] fix propagation graph breakage by MOVE_MOUNT_SET_GROUP move_mount(2) [ Upstream commit d8cc0362f918d020ca1340d7694f07062dc30f36 ] 9ffb14ef61ba "move_mount: allow to add a mount into an existing group" breaks assertions on ->mnt_share/->mnt_slave. For once, the data structures in question are actually documented. Documentation/filesystem/sharedsubtree.rst: All vfsmounts in a peer group have the same ->mnt_master. If it is non-NULL, they form a contiguous (ordered) segment of slave list. do_set_group() puts a mount into the same place in propagation graph as the old one. As the result, if old mount gets events from somewhere and is not a pure event sink, new one needs to be placed next to the old one in the slave list the old one's on. If it is a pure event sink, we only need to make sure the new one doesn't end up in the middle of some peer group. "move_mount: allow to add a mount into an existing group" ends up putting the new one in the beginning of list; that's definitely not going to be in the middle of anything, so that's fine for case when old is not marked shared. In case when old one _is_ marked shared (i.e. is not a pure event sink), that breaks the assumptions of propagation graph iterators. Put the new mount next to the old one on the list - that does the right thing in "old is marked shared" case and is just as correct as the current behaviour if old is not marked shared (kudos to Pavel for pointing that out - my original suggested fix changed behaviour in the "nor marked" case, which complicated things for no good reason). Reviewed-by: Christian Brauner Fixes: 9ffb14ef61ba ("move_mount: allow to add a mount into an existing group") Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- fs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index cb5126b06dcb95..e2780f413a2e0a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3452,7 +3452,7 @@ static int do_set_group(struct path *from_path, struct path *to_path) if (IS_MNT_SLAVE(from)) { struct mount *m = from->mnt_master; - list_add(&to->mnt_slave, &m->mnt_slave_list); + list_add(&to->mnt_slave, &from->mnt_slave); to->mnt_master = m; } From 7e56b53b6f17a7bcaab9f5f38215411920e6880f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?KONDO=20KAZUMA=28=E8=BF=91=E8=97=A4=E3=80=80=E5=92=8C?= =?UTF-8?q?=E7=9C=9F=29?= Date: Thu, 15 May 2025 12:18:30 +0000 Subject: [PATCH 2710/2924] fs: allow clone_private_mount() for a path on real rootfs [ Upstream commit 4954346d80fb047cb78776d9f2ebd6a050f80c5f ] Mounting overlayfs with a directory on real rootfs (initramfs) as upperdir has failed with following message since commit db04662e2f4f ("fs: allow detached mounts in clone_private_mount()"). [ 4.080134] overlayfs: failed to clone upperpath Overlayfs mount uses clone_private_mount() to create internal mount for the underlying layers. The commit made clone_private_mount() reject real rootfs because it does not have a parent mount and is in the initial mount namespace, that is not an anonymous mount namespace. This issue can be fixed by modifying the permission check of clone_private_mount() following [1]. Reviewed-by: Christian Brauner Fixes: db04662e2f4f ("fs: allow detached mounts in clone_private_mount()") Link: https://lore.kernel.org/all/20250514190252.GQ2023217@ZenIV/ [1] Link: https://lore.kernel.org/all/20250506194849.GT2023217@ZenIV/ Suggested-by: Al Viro Signed-off-by: Kazuma Kondo Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- fs/namespace.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index e2780f413a2e0a..07bc500a248ec3 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2492,18 +2492,19 @@ struct vfsmount *clone_private_mount(const struct path *path) if (IS_MNT_UNBINDABLE(old_mnt)) return ERR_PTR(-EINVAL); - if (mnt_has_parent(old_mnt)) { - if (!check_mnt(old_mnt)) - return ERR_PTR(-EINVAL); - } else { - if (!is_mounted(&old_mnt->mnt)) - return ERR_PTR(-EINVAL); - - /* Make sure this isn't something purely kernel internal. */ - if (!is_anon_ns(old_mnt->mnt_ns)) + /* + * Make sure the source mount is acceptable. + * Anything mounted in our mount namespace is allowed. + * Otherwise, it must be the root of an anonymous mount + * namespace, and we need to make sure no namespace + * loops get created. + */ + if (!check_mnt(old_mnt)) { + if (!is_mounted(&old_mnt->mnt) || + !is_anon_ns(old_mnt->mnt_ns) || + mnt_has_parent(old_mnt)) return ERR_PTR(-EINVAL); - /* Make sure we don't create mount namespace loops. */ if (!check_for_nsfs_mounts(old_mnt)) return ERR_PTR(-EINVAL); } From 38628ae06e2a37770cd794802a3f1310cf9846e3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 1 Jun 2025 20:11:06 -0400 Subject: [PATCH 2711/2924] clone_private_mnt(): make sure that caller has CAP_SYS_ADMIN in the right userns [ Upstream commit c28f922c9dcee0e4876a2c095939d77fe7e15116 ] What we want is to verify there is that clone won't expose something hidden by a mount we wouldn't be able to undo. "Wouldn't be able to undo" may be a result of MNT_LOCKED on a child, but it may also come from lacking admin rights in the userns of the namespace mount belongs to. clone_private_mnt() checks the former, but not the latter. There's a number of rather confusing CAP_SYS_ADMIN checks in various userns during the mount, especially with the new mount API; they serve different purposes and in case of clone_private_mnt() they usually, but not always end up covering the missing check mentioned above. Reviewed-by: Christian Brauner Reported-by: "Orlando, Noah" Fixes: 427215d85e8d ("ovl: prevent private clone if bind mount is not allowed") Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- fs/namespace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/namespace.c b/fs/namespace.c index 07bc500a248ec3..163ffdc0422843 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2509,6 +2509,9 @@ struct vfsmount *clone_private_mount(const struct path *path) return ERR_PTR(-EINVAL); } + if (!ns_capable(old_mnt->mnt_ns->user_ns, CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + if (__has_locked_children(old_mnt, path->dentry)) return ERR_PTR(-EINVAL); From 19554c79a2095ddde850906a067915c1ef3a4114 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 4 Jun 2025 12:27:08 -0400 Subject: [PATCH 2712/2924] do_change_type(): refuse to operate on unmounted/not ours mounts [ Upstream commit 12f147ddd6de7382dad54812e65f3f08d05809fc ] Ensure that propagation settings can only be changed for mounts located in the caller's mount namespace. This change aligns permission checking with the rest of mount(2). Reviewed-by: Christian Brauner Fixes: 07b20889e305 ("beginning of the shared-subtree proper") Reported-by: "Orlando, Noah" Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- fs/namespace.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/namespace.c b/fs/namespace.c index 163ffdc0422843..2de4b7ad1dc5d6 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2958,6 +2958,10 @@ static int do_change_type(struct path *path, int ms_flags) return -EINVAL; namespace_lock(); + if (!check_mnt(mnt)) { + err = -EINVAL; + goto out_unlock; + } if (type == MS_SHARED) { err = invent_group_ids(mnt, recurse); if (err) From c74a5f8297321b2d57ed9b7f04df11efc525fb5e Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Tue, 3 Jun 2025 15:02:09 +0200 Subject: [PATCH 2713/2924] genksyms: Fix enum consts from a reference affecting new values [ Upstream commit c50a04f8f45c7f13972f9097622d1d929033ea8c ] Enumeration constants read from a symbol reference file can incorrectly affect new enumeration constants parsed from an actual input file. Example: $ cat test.c enum { E_A, E_B, E_MAX }; struct bar { int mem[E_MAX]; }; int foo(struct bar *a) {} __GENKSYMS_EXPORT_SYMBOL(foo); $ cat test.c | ./scripts/genksyms/genksyms -T test.0.symtypes #SYMVER foo 0x070d854d $ cat test.0.symtypes E#E_MAX 2 s#bar struct bar { int mem [ E#E_MAX ] ; } foo int foo ( s#bar * ) $ cat test.c | ./scripts/genksyms/genksyms -T test.1.symtypes -r test.0.symtypes :4: warning: foo: modversion changed because of changes in enum constant E_MAX #SYMVER foo 0x9c9dfd81 $ cat test.1.symtypes E#E_MAX ( 2 ) + 3 s#bar struct bar { int mem [ E#E_MAX ] ; } foo int foo ( s#bar * ) The __add_symbol() function includes logic to handle the incrementation of enumeration values, but this code is also invoked when reading a reference file. As a result, the variables last_enum_expr and enum_counter might be incorrectly set after reading the reference file, which later affects parsing of the actual input. Fix the problem by splitting the logic for the incrementation of enumeration values into a separate function process_enum() and call it from __add_symbol() only when processing non-reference data. Fixes: e37ddb825003 ("genksyms: Track changes to enum constants") Signed-off-by: Petr Pavlu Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/genksyms/genksyms.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c index 8b0d7ac73dbb09..83e48670c2fcfb 100644 --- a/scripts/genksyms/genksyms.c +++ b/scripts/genksyms/genksyms.c @@ -181,13 +181,9 @@ static int is_unknown_symbol(struct symbol *sym) strcmp(defn->string, "{") == 0); } -static struct symbol *__add_symbol(const char *name, enum symbol_type type, - struct string_list *defn, int is_extern, - int is_reference) +static struct string_list *process_enum(const char *name, enum symbol_type type, + struct string_list *defn) { - unsigned long h; - struct symbol *sym; - enum symbol_status status = STATUS_UNCHANGED; /* The parser adds symbols in the order their declaration completes, * so it is safe to store the value of the previous enum constant in * a static variable. @@ -216,7 +212,7 @@ static struct symbol *__add_symbol(const char *name, enum symbol_type type, defn = mk_node(buf); } } - } else if (type == SYM_ENUM) { + } else { free_list(last_enum_expr, NULL); last_enum_expr = NULL; enum_counter = 0; @@ -225,6 +221,23 @@ static struct symbol *__add_symbol(const char *name, enum symbol_type type, return NULL; } + return defn; +} + +static struct symbol *__add_symbol(const char *name, enum symbol_type type, + struct string_list *defn, int is_extern, + int is_reference) +{ + unsigned long h; + struct symbol *sym; + enum symbol_status status = STATUS_UNCHANGED; + + if ((type == SYM_ENUM_CONST || type == SYM_ENUM) && !is_reference) { + defn = process_enum(name, type, defn); + if (defn == NULL) + return NULL; + } + h = crc32(name); hash_for_each_possible(symbol_hashtable, sym, hnode, h) { if (map_to_ns(sym->type) != map_to_ns(type) || From 32e1708dab890f7ec29d7bccf30b0460bfb63f23 Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Thu, 29 May 2025 17:18:25 +0530 Subject: [PATCH 2714/2924] tools/power turbostat: Fix AMD package-energy reporting [ Upstream commit adb49732c8c63665dd3476e8e6b7c67a0f851245 ] commit 05a2f07db888 ("tools/power turbostat: read RAPL counters via perf") that adds support to read RAPL counters via perf defines the notion of a RAPL domain_id which is set to physical_core_id on platforms which support per_core_rapl counters (Eg: AMD processors Family 17h onwards) and is set to the physical_package_id on all the other platforms. However, the physical_core_id is only unique within a package and on platforms with multiple packages more than one core can have the same physical_core_id and thus the same domain_id. (For eg, the first cores of each package have the physical_core_id = 0). This results in all these cores with the same physical_core_id using the same entry in the rapl_counter_info_perdomain[]. Since rapl_perf_init() skips the perf-initialization for cores whose domain_ids have already been visited, cores that have the same physical_core_id always read the perf file corresponding to the physical_core_id of the first package and thus the package-energy is incorrectly reported to be the same value for different packages. Note: This issue only arises when RAPL counters are read via perf and not when they are read via MSRs since in the latter case the MSRs are read separately on each core. Fix this issue by associating each CPU with rapl_core_id which is unique across all the packages in the system. Fixes: 05a2f07db888 ("tools/power turbostat: read RAPL counters via perf") Signed-off-by: Gautham R. Shenoy Signed-off-by: Len Brown Signed-off-by: Sasha Levin --- tools/power/x86/turbostat/turbostat.c | 41 +++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 0170d3cc68194c..ab79854cb296e4 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4766,6 +4766,38 @@ unsigned long pmt_read_counter(struct pmt_counter *ppmt, unsigned int domain_id) return (value & value_mask) >> value_shift; } + +/* Rapl domain enumeration helpers */ +static inline int get_rapl_num_domains(void) +{ + int num_packages = topo.max_package_id + 1; + int num_cores_per_package; + int num_cores; + + if (!platform->has_per_core_rapl) + return num_packages; + + num_cores_per_package = topo.max_core_id + 1; + num_cores = num_cores_per_package * num_packages; + + return num_cores; +} + +static inline int get_rapl_domain_id(int cpu) +{ + int nr_cores_per_package = topo.max_core_id + 1; + int rapl_core_id; + + if (!platform->has_per_core_rapl) + return cpus[cpu].physical_package_id; + + /* Compute the system-wide unique core-id for @cpu */ + rapl_core_id = cpus[cpu].physical_core_id; + rapl_core_id += cpus[cpu].physical_package_id * nr_cores_per_package; + + return rapl_core_id; +} + /* * get_counters(...) * migrate to cpu @@ -4821,7 +4853,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) goto done; if (platform->has_per_core_rapl) { - status = get_rapl_counters(cpu, c->core_id, c, p); + status = get_rapl_counters(cpu, get_rapl_domain_id(cpu), c, p); if (status != 0) return status; } @@ -4887,7 +4919,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) p->sys_lpi = cpuidle_cur_sys_lpi_us; if (!platform->has_per_core_rapl) { - status = get_rapl_counters(cpu, p->package_id, c, p); + status = get_rapl_counters(cpu, get_rapl_domain_id(cpu), c, p); if (status != 0) return status; } @@ -7863,7 +7895,7 @@ void linux_perf_init(void) void rapl_perf_init(void) { - const unsigned int num_domains = (platform->has_per_core_rapl ? topo.max_core_id : topo.max_package_id) + 1; + const unsigned int num_domains = get_rapl_num_domains(); bool *domain_visited = calloc(num_domains, sizeof(bool)); rapl_counter_info_perdomain = calloc(num_domains, sizeof(*rapl_counter_info_perdomain)); @@ -7904,8 +7936,7 @@ void rapl_perf_init(void) continue; /* Skip already seen and handled RAPL domains */ - next_domain = - platform->has_per_core_rapl ? cpus[cpu].physical_core_id : cpus[cpu].physical_package_id; + next_domain = get_rapl_domain_id(cpu); assert(next_domain < num_domains); From 57e204cdeeb671f225d662fa259a79e6065ad5b1 Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Wed, 2 Apr 2025 16:17:30 +0100 Subject: [PATCH 2715/2924] pinctrl: samsung: refactor drvdata suspend & resume callbacks [ Upstream commit 3ade961e97f3b05dcdd9a4fabfe179c9e75571e0 ] This enables the clk_enable() and clk_disable() logic to be removed from each callback, but otherwise should have no functional impact. It is a prepatory patch so that the callbacks can become SoC specific. Signed-off-by: Peter Griffin Link: https://lore.kernel.org/r/20250402-pinctrl-fltcon-suspend-v6-1-78ce0d4eb30c@linaro.org Signed-off-by: Krzysztof Kozlowski Stable-dep-of: bdbe0a0f7100 ("pinctrl: samsung: add gs101 specific eint suspend/resume callbacks") Signed-off-by: Sasha Levin --- drivers/pinctrl/samsung/pinctrl-exynos.c | 89 ++++++----------------- drivers/pinctrl/samsung/pinctrl-exynos.h | 4 +- drivers/pinctrl/samsung/pinctrl-samsung.c | 21 ++++-- drivers/pinctrl/samsung/pinctrl-samsung.h | 8 +- 4 files changed, 42 insertions(+), 80 deletions(-) diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.c b/drivers/pinctrl/samsung/pinctrl-exynos.c index 42093bae8bb793..ae82f42be83cf0 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos.c @@ -762,19 +762,11 @@ __init int exynos_eint_wkup_init(struct samsung_pinctrl_drv_data *d) return 0; } -static void exynos_pinctrl_suspend_bank( - struct samsung_pinctrl_drv_data *drvdata, - struct samsung_pin_bank *bank) +static void exynos_pinctrl_suspend_bank(struct samsung_pin_bank *bank) { struct exynos_eint_gpio_save *save = bank->soc_priv; const void __iomem *regs = bank->eint_base; - if (clk_enable(bank->drvdata->pclk)) { - dev_err(bank->gpio_chip.parent, - "unable to enable clock for saving state\n"); - return; - } - save->eint_con = readl(regs + EXYNOS_GPIO_ECON_OFFSET + bank->eint_offset); save->eint_fltcon0 = readl(regs + EXYNOS_GPIO_EFLTCON_OFFSET @@ -784,71 +776,46 @@ static void exynos_pinctrl_suspend_bank( save->eint_mask = readl(regs + bank->irq_chip->eint_mask + bank->eint_offset); - clk_disable(bank->drvdata->pclk); - pr_debug("%s: save con %#010x\n", bank->name, save->eint_con); pr_debug("%s: save fltcon0 %#010x\n", bank->name, save->eint_fltcon0); pr_debug("%s: save fltcon1 %#010x\n", bank->name, save->eint_fltcon1); pr_debug("%s: save mask %#010x\n", bank->name, save->eint_mask); } -static void exynosauto_pinctrl_suspend_bank(struct samsung_pinctrl_drv_data *drvdata, - struct samsung_pin_bank *bank) +static void exynosauto_pinctrl_suspend_bank(struct samsung_pin_bank *bank) { struct exynos_eint_gpio_save *save = bank->soc_priv; const void __iomem *regs = bank->eint_base; - if (clk_enable(bank->drvdata->pclk)) { - dev_err(bank->gpio_chip.parent, - "unable to enable clock for saving state\n"); - return; - } - save->eint_con = readl(regs + bank->pctl_offset + bank->eint_con_offset); save->eint_mask = readl(regs + bank->pctl_offset + bank->eint_mask_offset); - clk_disable(bank->drvdata->pclk); - pr_debug("%s: save con %#010x\n", bank->name, save->eint_con); pr_debug("%s: save mask %#010x\n", bank->name, save->eint_mask); } -void exynos_pinctrl_suspend(struct samsung_pinctrl_drv_data *drvdata) +void exynos_pinctrl_suspend(struct samsung_pin_bank *bank) { - struct samsung_pin_bank *bank = drvdata->pin_banks; struct exynos_irq_chip *irq_chip = NULL; - int i; - for (i = 0; i < drvdata->nr_banks; ++i, ++bank) { - if (bank->eint_type == EINT_TYPE_GPIO) { - if (bank->eint_con_offset) - exynosauto_pinctrl_suspend_bank(drvdata, bank); - else - exynos_pinctrl_suspend_bank(drvdata, bank); - } - else if (bank->eint_type == EINT_TYPE_WKUP) { - if (!irq_chip) { - irq_chip = bank->irq_chip; - irq_chip->set_eint_wakeup_mask(drvdata, - irq_chip); - } + if (bank->eint_type == EINT_TYPE_GPIO) { + if (bank->eint_con_offset) + exynosauto_pinctrl_suspend_bank(bank); + else + exynos_pinctrl_suspend_bank(bank); + } else if (bank->eint_type == EINT_TYPE_WKUP) { + if (!irq_chip) { + irq_chip = bank->irq_chip; + irq_chip->set_eint_wakeup_mask(bank->drvdata, irq_chip); } } } -static void exynos_pinctrl_resume_bank( - struct samsung_pinctrl_drv_data *drvdata, - struct samsung_pin_bank *bank) +static void exynos_pinctrl_resume_bank(struct samsung_pin_bank *bank) { struct exynos_eint_gpio_save *save = bank->soc_priv; void __iomem *regs = bank->eint_base; - if (clk_enable(bank->drvdata->pclk)) { - dev_err(bank->gpio_chip.parent, - "unable to enable clock for restoring state\n"); - return; - } - pr_debug("%s: con %#010x => %#010x\n", bank->name, readl(regs + EXYNOS_GPIO_ECON_OFFSET + bank->eint_offset), save->eint_con); @@ -870,22 +837,13 @@ static void exynos_pinctrl_resume_bank( + 2 * bank->eint_offset + 4); writel(save->eint_mask, regs + bank->irq_chip->eint_mask + bank->eint_offset); - - clk_disable(bank->drvdata->pclk); } -static void exynosauto_pinctrl_resume_bank(struct samsung_pinctrl_drv_data *drvdata, - struct samsung_pin_bank *bank) +static void exynosauto_pinctrl_resume_bank(struct samsung_pin_bank *bank) { struct exynos_eint_gpio_save *save = bank->soc_priv; void __iomem *regs = bank->eint_base; - if (clk_enable(bank->drvdata->pclk)) { - dev_err(bank->gpio_chip.parent, - "unable to enable clock for restoring state\n"); - return; - } - pr_debug("%s: con %#010x => %#010x\n", bank->name, readl(regs + bank->pctl_offset + bank->eint_con_offset), save->eint_con); pr_debug("%s: mask %#010x => %#010x\n", bank->name, @@ -894,21 +852,16 @@ static void exynosauto_pinctrl_resume_bank(struct samsung_pinctrl_drv_data *drvd writel(save->eint_con, regs + bank->pctl_offset + bank->eint_con_offset); writel(save->eint_mask, regs + bank->pctl_offset + bank->eint_mask_offset); - clk_disable(bank->drvdata->pclk); } -void exynos_pinctrl_resume(struct samsung_pinctrl_drv_data *drvdata) +void exynos_pinctrl_resume(struct samsung_pin_bank *bank) { - struct samsung_pin_bank *bank = drvdata->pin_banks; - int i; - - for (i = 0; i < drvdata->nr_banks; ++i, ++bank) - if (bank->eint_type == EINT_TYPE_GPIO) { - if (bank->eint_con_offset) - exynosauto_pinctrl_resume_bank(drvdata, bank); - else - exynos_pinctrl_resume_bank(drvdata, bank); - } + if (bank->eint_type == EINT_TYPE_GPIO) { + if (bank->eint_con_offset) + exynosauto_pinctrl_resume_bank(bank); + else + exynos_pinctrl_resume_bank(bank); + } } static void exynos_retention_enable(struct samsung_pinctrl_drv_data *drvdata) diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.h b/drivers/pinctrl/samsung/pinctrl-exynos.h index b483270ddc53c0..341155c1abd153 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos.h +++ b/drivers/pinctrl/samsung/pinctrl-exynos.h @@ -240,8 +240,8 @@ struct exynos_muxed_weint_data { int exynos_eint_gpio_init(struct samsung_pinctrl_drv_data *d); int exynos_eint_wkup_init(struct samsung_pinctrl_drv_data *d); -void exynos_pinctrl_suspend(struct samsung_pinctrl_drv_data *drvdata); -void exynos_pinctrl_resume(struct samsung_pinctrl_drv_data *drvdata); +void exynos_pinctrl_suspend(struct samsung_pin_bank *bank); +void exynos_pinctrl_resume(struct samsung_pin_bank *bank); struct samsung_retention_ctrl * exynos_retention_init(struct samsung_pinctrl_drv_data *drvdata, const struct samsung_retention_data *data); diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.c b/drivers/pinctrl/samsung/pinctrl-samsung.c index 2896eb2de2c098..ef557217e173af 100644 --- a/drivers/pinctrl/samsung/pinctrl-samsung.c +++ b/drivers/pinctrl/samsung/pinctrl-samsung.c @@ -1333,6 +1333,7 @@ static int samsung_pinctrl_probe(struct platform_device *pdev) static int __maybe_unused samsung_pinctrl_suspend(struct device *dev) { struct samsung_pinctrl_drv_data *drvdata = dev_get_drvdata(dev); + struct samsung_pin_bank *bank; int i; i = clk_enable(drvdata->pclk); @@ -1343,7 +1344,7 @@ static int __maybe_unused samsung_pinctrl_suspend(struct device *dev) } for (i = 0; i < drvdata->nr_banks; i++) { - struct samsung_pin_bank *bank = &drvdata->pin_banks[i]; + bank = &drvdata->pin_banks[i]; const void __iomem *reg = bank->pctl_base + bank->pctl_offset; const u8 *offs = bank->type->reg_offset; const u8 *widths = bank->type->fld_width; @@ -1371,10 +1372,14 @@ static int __maybe_unused samsung_pinctrl_suspend(struct device *dev) } } + for (i = 0; i < drvdata->nr_banks; i++) { + bank = &drvdata->pin_banks[i]; + if (drvdata->suspend) + drvdata->suspend(bank); + } + clk_disable(drvdata->pclk); - if (drvdata->suspend) - drvdata->suspend(drvdata); if (drvdata->retention_ctrl && drvdata->retention_ctrl->enable) drvdata->retention_ctrl->enable(drvdata); @@ -1392,6 +1397,7 @@ static int __maybe_unused samsung_pinctrl_suspend(struct device *dev) static int __maybe_unused samsung_pinctrl_resume(struct device *dev) { struct samsung_pinctrl_drv_data *drvdata = dev_get_drvdata(dev); + struct samsung_pin_bank *bank; int ret; int i; @@ -1406,11 +1412,14 @@ static int __maybe_unused samsung_pinctrl_resume(struct device *dev) return ret; } - if (drvdata->resume) - drvdata->resume(drvdata); + for (i = 0; i < drvdata->nr_banks; i++) { + bank = &drvdata->pin_banks[i]; + if (drvdata->resume) + drvdata->resume(bank); + } for (i = 0; i < drvdata->nr_banks; i++) { - struct samsung_pin_bank *bank = &drvdata->pin_banks[i]; + bank = &drvdata->pin_banks[i]; void __iomem *reg = bank->pctl_base + bank->pctl_offset; const u8 *offs = bank->type->reg_offset; const u8 *widths = bank->type->fld_width; diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.h b/drivers/pinctrl/samsung/pinctrl-samsung.h index 3cf758df7d6912..fcc57c244d167d 100644 --- a/drivers/pinctrl/samsung/pinctrl-samsung.h +++ b/drivers/pinctrl/samsung/pinctrl-samsung.h @@ -285,8 +285,8 @@ struct samsung_pin_ctrl { int (*eint_gpio_init)(struct samsung_pinctrl_drv_data *); int (*eint_wkup_init)(struct samsung_pinctrl_drv_data *); void (*pud_value_init)(struct samsung_pinctrl_drv_data *drvdata); - void (*suspend)(struct samsung_pinctrl_drv_data *); - void (*resume)(struct samsung_pinctrl_drv_data *); + void (*suspend)(struct samsung_pin_bank *bank); + void (*resume)(struct samsung_pin_bank *bank); }; /** @@ -335,8 +335,8 @@ struct samsung_pinctrl_drv_data { struct samsung_retention_ctrl *retention_ctrl; - void (*suspend)(struct samsung_pinctrl_drv_data *); - void (*resume)(struct samsung_pinctrl_drv_data *); + void (*suspend)(struct samsung_pin_bank *bank); + void (*resume)(struct samsung_pin_bank *bank); }; /** From e526f7753e549484ec5d652ad6e42529deb724f6 Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Wed, 2 Apr 2025 16:17:31 +0100 Subject: [PATCH 2716/2924] pinctrl: samsung: add dedicated SoC eint suspend/resume callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 77ac6b742eba063a5b6600cda67834a7a212281a ] Refactor the existing platform specific suspend/resume callback so that each SoC variant has it's own callback containing the SoC specific logic. This allows exynosautov920 to have a dedicated function for using eint_con_offset and eint_mask_offset. Also it is easily extendable for gs101 which will need dedicated logic for handling the varying register offset of fltcon0 via eint_fltcon_offset. Reviewed-by: André Draszik Signed-off-by: Peter Griffin Link: https://lore.kernel.org/r/20250402-pinctrl-fltcon-suspend-v6-2-78ce0d4eb30c@linaro.org Signed-off-by: Krzysztof Kozlowski Stable-dep-of: bdbe0a0f7100 ("pinctrl: samsung: add gs101 specific eint suspend/resume callbacks") Signed-off-by: Sasha Levin --- .../pinctrl/samsung/pinctrl-exynos-arm64.c | 28 ++-- drivers/pinctrl/samsung/pinctrl-exynos.c | 152 ++++++++++-------- drivers/pinctrl/samsung/pinctrl-exynos.h | 2 + 3 files changed, 97 insertions(+), 85 deletions(-) diff --git a/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c b/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c index dd07720e32cc09..4b5d4e436a337f 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c @@ -1419,8 +1419,8 @@ static const struct samsung_pin_ctrl exynosautov920_pin_ctrl[] = { .pin_banks = exynosautov920_pin_banks0, .nr_banks = ARRAY_SIZE(exynosautov920_pin_banks0), .eint_wkup_init = exynos_eint_wkup_init, - .suspend = exynos_pinctrl_suspend, - .resume = exynos_pinctrl_resume, + .suspend = exynosautov920_pinctrl_suspend, + .resume = exynosautov920_pinctrl_resume, .retention_data = &exynosautov920_retention_data, }, { /* pin-controller instance 1 AUD data */ @@ -1431,43 +1431,43 @@ static const struct samsung_pin_ctrl exynosautov920_pin_ctrl[] = { .pin_banks = exynosautov920_pin_banks2, .nr_banks = ARRAY_SIZE(exynosautov920_pin_banks2), .eint_gpio_init = exynos_eint_gpio_init, - .suspend = exynos_pinctrl_suspend, - .resume = exynos_pinctrl_resume, + .suspend = exynosautov920_pinctrl_suspend, + .resume = exynosautov920_pinctrl_resume, }, { /* pin-controller instance 3 HSI1 data */ .pin_banks = exynosautov920_pin_banks3, .nr_banks = ARRAY_SIZE(exynosautov920_pin_banks3), .eint_gpio_init = exynos_eint_gpio_init, - .suspend = exynos_pinctrl_suspend, - .resume = exynos_pinctrl_resume, + .suspend = exynosautov920_pinctrl_suspend, + .resume = exynosautov920_pinctrl_resume, }, { /* pin-controller instance 4 HSI2 data */ .pin_banks = exynosautov920_pin_banks4, .nr_banks = ARRAY_SIZE(exynosautov920_pin_banks4), .eint_gpio_init = exynos_eint_gpio_init, - .suspend = exynos_pinctrl_suspend, - .resume = exynos_pinctrl_resume, + .suspend = exynosautov920_pinctrl_suspend, + .resume = exynosautov920_pinctrl_resume, }, { /* pin-controller instance 5 HSI2UFS data */ .pin_banks = exynosautov920_pin_banks5, .nr_banks = ARRAY_SIZE(exynosautov920_pin_banks5), .eint_gpio_init = exynos_eint_gpio_init, - .suspend = exynos_pinctrl_suspend, - .resume = exynos_pinctrl_resume, + .suspend = exynosautov920_pinctrl_suspend, + .resume = exynosautov920_pinctrl_resume, }, { /* pin-controller instance 6 PERIC0 data */ .pin_banks = exynosautov920_pin_banks6, .nr_banks = ARRAY_SIZE(exynosautov920_pin_banks6), .eint_gpio_init = exynos_eint_gpio_init, - .suspend = exynos_pinctrl_suspend, - .resume = exynos_pinctrl_resume, + .suspend = exynosautov920_pinctrl_suspend, + .resume = exynosautov920_pinctrl_resume, }, { /* pin-controller instance 7 PERIC1 data */ .pin_banks = exynosautov920_pin_banks7, .nr_banks = ARRAY_SIZE(exynosautov920_pin_banks7), .eint_gpio_init = exynos_eint_gpio_init, - .suspend = exynos_pinctrl_suspend, - .resume = exynos_pinctrl_resume, + .suspend = exynosautov920_pinctrl_suspend, + .resume = exynosautov920_pinctrl_resume, }, }; diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.c b/drivers/pinctrl/samsung/pinctrl-exynos.c index ae82f42be83cf0..18c327f7e31335 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos.c @@ -762,105 +762,115 @@ __init int exynos_eint_wkup_init(struct samsung_pinctrl_drv_data *d) return 0; } -static void exynos_pinctrl_suspend_bank(struct samsung_pin_bank *bank) +static void exynos_set_wakeup(struct samsung_pin_bank *bank) { - struct exynos_eint_gpio_save *save = bank->soc_priv; - const void __iomem *regs = bank->eint_base; + struct exynos_irq_chip *irq_chip; - save->eint_con = readl(regs + EXYNOS_GPIO_ECON_OFFSET - + bank->eint_offset); - save->eint_fltcon0 = readl(regs + EXYNOS_GPIO_EFLTCON_OFFSET - + 2 * bank->eint_offset); - save->eint_fltcon1 = readl(regs + EXYNOS_GPIO_EFLTCON_OFFSET - + 2 * bank->eint_offset + 4); - save->eint_mask = readl(regs + bank->irq_chip->eint_mask - + bank->eint_offset); - - pr_debug("%s: save con %#010x\n", bank->name, save->eint_con); - pr_debug("%s: save fltcon0 %#010x\n", bank->name, save->eint_fltcon0); - pr_debug("%s: save fltcon1 %#010x\n", bank->name, save->eint_fltcon1); - pr_debug("%s: save mask %#010x\n", bank->name, save->eint_mask); + if (bank->irq_chip) { + irq_chip = bank->irq_chip; + irq_chip->set_eint_wakeup_mask(bank->drvdata, irq_chip); + } } -static void exynosauto_pinctrl_suspend_bank(struct samsung_pin_bank *bank) +void exynos_pinctrl_suspend(struct samsung_pin_bank *bank) { struct exynos_eint_gpio_save *save = bank->soc_priv; const void __iomem *regs = bank->eint_base; - save->eint_con = readl(regs + bank->pctl_offset + bank->eint_con_offset); - save->eint_mask = readl(regs + bank->pctl_offset + bank->eint_mask_offset); - - pr_debug("%s: save con %#010x\n", bank->name, save->eint_con); - pr_debug("%s: save mask %#010x\n", bank->name, save->eint_mask); + if (bank->eint_type == EINT_TYPE_GPIO) { + save->eint_con = readl(regs + EXYNOS_GPIO_ECON_OFFSET + + bank->eint_offset); + save->eint_fltcon0 = readl(regs + EXYNOS_GPIO_EFLTCON_OFFSET + + 2 * bank->eint_offset); + save->eint_fltcon1 = readl(regs + EXYNOS_GPIO_EFLTCON_OFFSET + + 2 * bank->eint_offset + 4); + save->eint_mask = readl(regs + bank->irq_chip->eint_mask + + bank->eint_offset); + + pr_debug("%s: save con %#010x\n", + bank->name, save->eint_con); + pr_debug("%s: save fltcon0 %#010x\n", + bank->name, save->eint_fltcon0); + pr_debug("%s: save fltcon1 %#010x\n", + bank->name, save->eint_fltcon1); + pr_debug("%s: save mask %#010x\n", + bank->name, save->eint_mask); + } else if (bank->eint_type == EINT_TYPE_WKUP) { + exynos_set_wakeup(bank); + } } -void exynos_pinctrl_suspend(struct samsung_pin_bank *bank) +void exynosautov920_pinctrl_suspend(struct samsung_pin_bank *bank) { - struct exynos_irq_chip *irq_chip = NULL; + struct exynos_eint_gpio_save *save = bank->soc_priv; + const void __iomem *regs = bank->eint_base; if (bank->eint_type == EINT_TYPE_GPIO) { - if (bank->eint_con_offset) - exynosauto_pinctrl_suspend_bank(bank); - else - exynos_pinctrl_suspend_bank(bank); + save->eint_con = readl(regs + bank->pctl_offset + + bank->eint_con_offset); + save->eint_mask = readl(regs + bank->pctl_offset + + bank->eint_mask_offset); + pr_debug("%s: save con %#010x\n", + bank->name, save->eint_con); + pr_debug("%s: save mask %#010x\n", + bank->name, save->eint_mask); } else if (bank->eint_type == EINT_TYPE_WKUP) { - if (!irq_chip) { - irq_chip = bank->irq_chip; - irq_chip->set_eint_wakeup_mask(bank->drvdata, irq_chip); - } + exynos_set_wakeup(bank); } } -static void exynos_pinctrl_resume_bank(struct samsung_pin_bank *bank) +void exynos_pinctrl_resume(struct samsung_pin_bank *bank) { struct exynos_eint_gpio_save *save = bank->soc_priv; void __iomem *regs = bank->eint_base; - pr_debug("%s: con %#010x => %#010x\n", bank->name, - readl(regs + EXYNOS_GPIO_ECON_OFFSET - + bank->eint_offset), save->eint_con); - pr_debug("%s: fltcon0 %#010x => %#010x\n", bank->name, - readl(regs + EXYNOS_GPIO_EFLTCON_OFFSET - + 2 * bank->eint_offset), save->eint_fltcon0); - pr_debug("%s: fltcon1 %#010x => %#010x\n", bank->name, - readl(regs + EXYNOS_GPIO_EFLTCON_OFFSET - + 2 * bank->eint_offset + 4), save->eint_fltcon1); - pr_debug("%s: mask %#010x => %#010x\n", bank->name, - readl(regs + bank->irq_chip->eint_mask - + bank->eint_offset), save->eint_mask); - - writel(save->eint_con, regs + EXYNOS_GPIO_ECON_OFFSET - + bank->eint_offset); - writel(save->eint_fltcon0, regs + EXYNOS_GPIO_EFLTCON_OFFSET - + 2 * bank->eint_offset); - writel(save->eint_fltcon1, regs + EXYNOS_GPIO_EFLTCON_OFFSET - + 2 * bank->eint_offset + 4); - writel(save->eint_mask, regs + bank->irq_chip->eint_mask - + bank->eint_offset); + if (bank->eint_type == EINT_TYPE_GPIO) { + pr_debug("%s: con %#010x => %#010x\n", bank->name, + readl(regs + EXYNOS_GPIO_ECON_OFFSET + + bank->eint_offset), save->eint_con); + pr_debug("%s: fltcon0 %#010x => %#010x\n", bank->name, + readl(regs + EXYNOS_GPIO_EFLTCON_OFFSET + + 2 * bank->eint_offset), save->eint_fltcon0); + pr_debug("%s: fltcon1 %#010x => %#010x\n", bank->name, + readl(regs + EXYNOS_GPIO_EFLTCON_OFFSET + + 2 * bank->eint_offset + 4), + save->eint_fltcon1); + pr_debug("%s: mask %#010x => %#010x\n", bank->name, + readl(regs + bank->irq_chip->eint_mask + + bank->eint_offset), save->eint_mask); + + writel(save->eint_con, regs + EXYNOS_GPIO_ECON_OFFSET + + bank->eint_offset); + writel(save->eint_fltcon0, regs + EXYNOS_GPIO_EFLTCON_OFFSET + + 2 * bank->eint_offset); + writel(save->eint_fltcon1, regs + EXYNOS_GPIO_EFLTCON_OFFSET + + 2 * bank->eint_offset + 4); + writel(save->eint_mask, regs + bank->irq_chip->eint_mask + + bank->eint_offset); + } } -static void exynosauto_pinctrl_resume_bank(struct samsung_pin_bank *bank) +void exynosautov920_pinctrl_resume(struct samsung_pin_bank *bank) { struct exynos_eint_gpio_save *save = bank->soc_priv; void __iomem *regs = bank->eint_base; - pr_debug("%s: con %#010x => %#010x\n", bank->name, - readl(regs + bank->pctl_offset + bank->eint_con_offset), save->eint_con); - pr_debug("%s: mask %#010x => %#010x\n", bank->name, - readl(regs + bank->pctl_offset + bank->eint_mask_offset), save->eint_mask); - - writel(save->eint_con, regs + bank->pctl_offset + bank->eint_con_offset); - writel(save->eint_mask, regs + bank->pctl_offset + bank->eint_mask_offset); - -} - -void exynos_pinctrl_resume(struct samsung_pin_bank *bank) -{ if (bank->eint_type == EINT_TYPE_GPIO) { - if (bank->eint_con_offset) - exynosauto_pinctrl_resume_bank(bank); - else - exynos_pinctrl_resume_bank(bank); + /* exynosautov920 has eint_con_offset for all but one bank */ + if (!bank->eint_con_offset) + exynos_pinctrl_resume(bank); + + pr_debug("%s: con %#010x => %#010x\n", bank->name, + readl(regs + bank->pctl_offset + bank->eint_con_offset), + save->eint_con); + pr_debug("%s: mask %#010x => %#010x\n", bank->name, + readl(regs + bank->pctl_offset + + bank->eint_mask_offset), save->eint_mask); + + writel(save->eint_con, + regs + bank->pctl_offset + bank->eint_con_offset); + writel(save->eint_mask, + regs + bank->pctl_offset + bank->eint_mask_offset); } } diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.h b/drivers/pinctrl/samsung/pinctrl-exynos.h index 341155c1abd153..3a771862b4b176 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos.h +++ b/drivers/pinctrl/samsung/pinctrl-exynos.h @@ -240,6 +240,8 @@ struct exynos_muxed_weint_data { int exynos_eint_gpio_init(struct samsung_pinctrl_drv_data *d); int exynos_eint_wkup_init(struct samsung_pinctrl_drv_data *d); +void exynosautov920_pinctrl_resume(struct samsung_pin_bank *bank); +void exynosautov920_pinctrl_suspend(struct samsung_pin_bank *bank); void exynos_pinctrl_suspend(struct samsung_pin_bank *bank); void exynos_pinctrl_resume(struct samsung_pin_bank *bank); struct samsung_retention_ctrl * From e7d9e8f1341e332c7e16f89f53b486595e27c6e8 Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Wed, 2 Apr 2025 16:17:32 +0100 Subject: [PATCH 2717/2924] pinctrl: samsung: add gs101 specific eint suspend/resume callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit bdbe0a0f71003b997d6a2dbe4bc7b5b0438207c7 ] gs101 differs to other SoCs in that fltcon1 register doesn't always exist. Additionally the offset of fltcon0 is not fixed and needs to use the newly added eint_fltcon_offset variable. Fixes: 4a8be01a1a7a ("pinctrl: samsung: Add gs101 SoC pinctrl configuration") Cc: stable@vger.kernel.org # depends on the previous three patches Reviewed-by: André Draszik Signed-off-by: Peter Griffin Link: https://lore.kernel.org/r/20250402-pinctrl-fltcon-suspend-v6-3-78ce0d4eb30c@linaro.org Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sasha Levin --- .../pinctrl/samsung/pinctrl-exynos-arm64.c | 24 +++---- drivers/pinctrl/samsung/pinctrl-exynos.c | 71 +++++++++++++++++++ drivers/pinctrl/samsung/pinctrl-exynos.h | 2 + 3 files changed, 85 insertions(+), 12 deletions(-) diff --git a/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c b/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c index 4b5d4e436a337f..9fd894729a7b87 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c @@ -1762,15 +1762,15 @@ static const struct samsung_pin_ctrl gs101_pin_ctrl[] __initconst = { .pin_banks = gs101_pin_alive, .nr_banks = ARRAY_SIZE(gs101_pin_alive), .eint_wkup_init = exynos_eint_wkup_init, - .suspend = exynos_pinctrl_suspend, - .resume = exynos_pinctrl_resume, + .suspend = gs101_pinctrl_suspend, + .resume = gs101_pinctrl_resume, }, { /* pin banks of gs101 pin-controller (FAR_ALIVE) */ .pin_banks = gs101_pin_far_alive, .nr_banks = ARRAY_SIZE(gs101_pin_far_alive), .eint_wkup_init = exynos_eint_wkup_init, - .suspend = exynos_pinctrl_suspend, - .resume = exynos_pinctrl_resume, + .suspend = gs101_pinctrl_suspend, + .resume = gs101_pinctrl_resume, }, { /* pin banks of gs101 pin-controller (GSACORE) */ .pin_banks = gs101_pin_gsacore, @@ -1784,29 +1784,29 @@ static const struct samsung_pin_ctrl gs101_pin_ctrl[] __initconst = { .pin_banks = gs101_pin_peric0, .nr_banks = ARRAY_SIZE(gs101_pin_peric0), .eint_gpio_init = exynos_eint_gpio_init, - .suspend = exynos_pinctrl_suspend, - .resume = exynos_pinctrl_resume, + .suspend = gs101_pinctrl_suspend, + .resume = gs101_pinctrl_resume, }, { /* pin banks of gs101 pin-controller (PERIC1) */ .pin_banks = gs101_pin_peric1, .nr_banks = ARRAY_SIZE(gs101_pin_peric1), .eint_gpio_init = exynos_eint_gpio_init, - .suspend = exynos_pinctrl_suspend, - .resume = exynos_pinctrl_resume, + .suspend = gs101_pinctrl_suspend, + .resume = gs101_pinctrl_resume, }, { /* pin banks of gs101 pin-controller (HSI1) */ .pin_banks = gs101_pin_hsi1, .nr_banks = ARRAY_SIZE(gs101_pin_hsi1), .eint_gpio_init = exynos_eint_gpio_init, - .suspend = exynos_pinctrl_suspend, - .resume = exynos_pinctrl_resume, + .suspend = gs101_pinctrl_suspend, + .resume = gs101_pinctrl_resume, }, { /* pin banks of gs101 pin-controller (HSI2) */ .pin_banks = gs101_pin_hsi2, .nr_banks = ARRAY_SIZE(gs101_pin_hsi2), .eint_gpio_init = exynos_eint_gpio_init, - .suspend = exynos_pinctrl_suspend, - .resume = exynos_pinctrl_resume, + .suspend = gs101_pinctrl_suspend, + .resume = gs101_pinctrl_resume, }, }; diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.c b/drivers/pinctrl/samsung/pinctrl-exynos.c index 18c327f7e31335..0879684338c772 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos.c @@ -800,6 +800,41 @@ void exynos_pinctrl_suspend(struct samsung_pin_bank *bank) } } +void gs101_pinctrl_suspend(struct samsung_pin_bank *bank) +{ + struct exynos_eint_gpio_save *save = bank->soc_priv; + const void __iomem *regs = bank->eint_base; + + if (bank->eint_type == EINT_TYPE_GPIO) { + save->eint_con = readl(regs + EXYNOS_GPIO_ECON_OFFSET + + bank->eint_offset); + + save->eint_fltcon0 = readl(regs + EXYNOS_GPIO_EFLTCON_OFFSET + + bank->eint_fltcon_offset); + + /* fltcon1 register only exists for pins 4-7 */ + if (bank->nr_pins > 4) + save->eint_fltcon1 = readl(regs + + EXYNOS_GPIO_EFLTCON_OFFSET + + bank->eint_fltcon_offset + 4); + + save->eint_mask = readl(regs + bank->irq_chip->eint_mask + + bank->eint_offset); + + pr_debug("%s: save con %#010x\n", + bank->name, save->eint_con); + pr_debug("%s: save fltcon0 %#010x\n", + bank->name, save->eint_fltcon0); + if (bank->nr_pins > 4) + pr_debug("%s: save fltcon1 %#010x\n", + bank->name, save->eint_fltcon1); + pr_debug("%s: save mask %#010x\n", + bank->name, save->eint_mask); + } else if (bank->eint_type == EINT_TYPE_WKUP) { + exynos_set_wakeup(bank); + } +} + void exynosautov920_pinctrl_suspend(struct samsung_pin_bank *bank) { struct exynos_eint_gpio_save *save = bank->soc_priv; @@ -819,6 +854,42 @@ void exynosautov920_pinctrl_suspend(struct samsung_pin_bank *bank) } } +void gs101_pinctrl_resume(struct samsung_pin_bank *bank) +{ + struct exynos_eint_gpio_save *save = bank->soc_priv; + + void __iomem *regs = bank->eint_base; + void __iomem *eint_fltcfg0 = regs + EXYNOS_GPIO_EFLTCON_OFFSET + + bank->eint_fltcon_offset; + + if (bank->eint_type == EINT_TYPE_GPIO) { + pr_debug("%s: con %#010x => %#010x\n", bank->name, + readl(regs + EXYNOS_GPIO_ECON_OFFSET + + bank->eint_offset), save->eint_con); + + pr_debug("%s: fltcon0 %#010x => %#010x\n", bank->name, + readl(eint_fltcfg0), save->eint_fltcon0); + + /* fltcon1 register only exists for pins 4-7 */ + if (bank->nr_pins > 4) + pr_debug("%s: fltcon1 %#010x => %#010x\n", bank->name, + readl(eint_fltcfg0 + 4), save->eint_fltcon1); + + pr_debug("%s: mask %#010x => %#010x\n", bank->name, + readl(regs + bank->irq_chip->eint_mask + + bank->eint_offset), save->eint_mask); + + writel(save->eint_con, regs + EXYNOS_GPIO_ECON_OFFSET + + bank->eint_offset); + writel(save->eint_fltcon0, eint_fltcfg0); + + if (bank->nr_pins > 4) + writel(save->eint_fltcon1, eint_fltcfg0 + 4); + writel(save->eint_mask, regs + bank->irq_chip->eint_mask + + bank->eint_offset); + } +} + void exynos_pinctrl_resume(struct samsung_pin_bank *bank) { struct exynos_eint_gpio_save *save = bank->soc_priv; diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.h b/drivers/pinctrl/samsung/pinctrl-exynos.h index 3a771862b4b176..2bee52b61b9317 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos.h +++ b/drivers/pinctrl/samsung/pinctrl-exynos.h @@ -244,6 +244,8 @@ void exynosautov920_pinctrl_resume(struct samsung_pin_bank *bank); void exynosautov920_pinctrl_suspend(struct samsung_pin_bank *bank); void exynos_pinctrl_suspend(struct samsung_pin_bank *bank); void exynos_pinctrl_resume(struct samsung_pin_bank *bank); +void gs101_pinctrl_suspend(struct samsung_pin_bank *bank); +void gs101_pinctrl_resume(struct samsung_pin_bank *bank); struct samsung_retention_ctrl * exynos_retention_init(struct samsung_pinctrl_drv_data *drvdata, const struct samsung_retention_data *data); From bb37f795d01961286b8f768a6d7152f32b589067 Mon Sep 17 00:00:00 2001 From: Sanjeev Yadav Date: Fri, 23 May 2025 13:14:01 -0700 Subject: [PATCH 2718/2924] scsi: core: ufs: Fix a hang in the error handler [ Upstream commit 8a3514d348de87a9d5e2ac00fbac4faae0b97996 ] ufshcd_err_handling_prepare() calls ufshcd_rpm_get_sync(). The latter function can only succeed if UFSHCD_EH_IN_PROGRESS is not set because resuming involves submitting a SCSI command and ufshcd_queuecommand() returns SCSI_MLQUEUE_HOST_BUSY if UFSHCD_EH_IN_PROGRESS is set. Fix this hang by setting UFSHCD_EH_IN_PROGRESS after ufshcd_rpm_get_sync() has been called instead of before. Backtrace: __switch_to+0x174/0x338 __schedule+0x600/0x9e4 schedule+0x7c/0xe8 schedule_timeout+0xa4/0x1c8 io_schedule_timeout+0x48/0x70 wait_for_common_io+0xa8/0x160 //waiting on START_STOP wait_for_completion_io_timeout+0x10/0x20 blk_execute_rq+0xe4/0x1e4 scsi_execute_cmd+0x108/0x244 ufshcd_set_dev_pwr_mode+0xe8/0x250 __ufshcd_wl_resume+0x94/0x354 ufshcd_wl_runtime_resume+0x3c/0x174 scsi_runtime_resume+0x64/0xa4 rpm_resume+0x15c/0xa1c __pm_runtime_resume+0x4c/0x90 // Runtime resume ongoing ufshcd_err_handler+0x1a0/0xd08 process_one_work+0x174/0x808 worker_thread+0x15c/0x490 kthread+0xf4/0x1ec ret_from_fork+0x10/0x20 Signed-off-by: Sanjeev Yadav [ bvanassche: rewrote patch description ] Fixes: 62694735ca95 ("[SCSI] ufs: Add runtime PM support for UFS host controller driver") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20250523201409.1676055-1-bvanassche@acm.org Reviewed-by: Peter Wang Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/ufs/core/ufshcd.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 7735421e399182..04f769d907a446 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -6587,9 +6587,14 @@ static void ufshcd_err_handler(struct work_struct *work) up(&hba->host_sem); return; } - ufshcd_set_eh_in_progress(hba); spin_unlock_irqrestore(hba->host->host_lock, flags); + ufshcd_err_handling_prepare(hba); + + spin_lock_irqsave(hba->host->host_lock, flags); + ufshcd_set_eh_in_progress(hba); + spin_unlock_irqrestore(hba->host->host_lock, flags); + /* Complete requests that have door-bell cleared by h/w */ ufshcd_complete_requests(hba, false); spin_lock_irqsave(hba->host->host_lock, flags); From 8e5e05c973f765e6e46f831527154563ea0ffefb Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Sat, 31 May 2025 18:24:58 +0300 Subject: [PATCH 2719/2924] Bluetooth: hci_core: fix list_for_each_entry_rcu usage [ Upstream commit 308a3a8ce8ea41b26c46169f3263e50f5997c28e ] Releasing + re-acquiring RCU lock inside list_for_each_entry_rcu() loop body is not correct. Fix by taking the update-side hdev->lock instead. Fixes: c7eaf80bfb0c ("Bluetooth: Fix hci_link_tx_to RCU lock usage") Signed-off-by: Pauli Virtanen Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/hci_core.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 2668e0e563c43f..67c7e0656ff71e 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3406,23 +3406,18 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type) bt_dev_err(hdev, "link tx timeout"); - rcu_read_lock(); + hci_dev_lock(hdev); /* Kill stalled connections */ - list_for_each_entry_rcu(c, &h->list, list) { + list_for_each_entry(c, &h->list, list) { if (c->type == type && c->sent) { bt_dev_err(hdev, "killing stalled connection %pMR", &c->dst); - /* hci_disconnect might sleep, so, we have to release - * the RCU read lock before calling it. - */ - rcu_read_unlock(); hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM); - rcu_read_lock(); } } - rcu_read_unlock(); + hci_dev_unlock(hdev); } static struct hci_chan *hci_chan_sent(struct hci_dev *hdev, __u8 type, From 7aae7846496c56036e6c45191836893dd80f63de Mon Sep 17 00:00:00 2001 From: Kiran K Date: Tue, 3 Jun 2025 15:34:38 +0530 Subject: [PATCH 2720/2924] Bluetooth: btintel_pcie: Fix driver not posting maximum rx buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit daabd276985055250528da97e9ce6d277d7009c2 ] The driver was posting only 6 rx buffers, despite the maximum rx buffers being defined as 16. Having fewer RX buffers caused firmware exceptions in HID use cases when events arrived in bursts. Exception seen on android 6.12 kernel. E Bluetooth: hci0: Received hw exception interrupt E Bluetooth: hci0: Received gp1 mailbox interrupt D Bluetooth: hci0: 00000000: ff 3e 87 80 03 01 01 01 03 01 0c 0d 02 1c 10 0e D Bluetooth: hci0: 00000010: 01 00 05 14 66 b0 28 b0 c0 b0 28 b0 ac af 28 b0 D Bluetooth: hci0: 00000020: 14 f1 28 b0 00 00 00 00 fa 04 00 00 00 00 40 10 D Bluetooth: hci0: 00000030: 08 00 00 00 7a 7a 7a 7a 47 00 fb a0 10 00 00 00 D Bluetooth: hci0: 00000000: 10 01 0a E Bluetooth: hci0: ---- Dump of debug registers — E Bluetooth: hci0: boot stage: 0xe0fb0047 E Bluetooth: hci0: ipc status: 0x00000004 E Bluetooth: hci0: ipc control: 0x00000000 E Bluetooth: hci0: ipc sleep control: 0x00000000 E Bluetooth: hci0: mbox_1: 0x00badbad E Bluetooth: hci0: mbox_2: 0x0000101c E Bluetooth: hci0: mbox_3: 0x00000008 E Bluetooth: hci0: mbox_4: 0x7a7a7a7a Signed-off-by: Chandrashekar Devegowda Signed-off-by: Kiran K Fixes: c2b636b3f788 ("Bluetooth: btintel_pcie: Add support for PCIe transport") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- drivers/bluetooth/btintel_pcie.c | 3 ++- drivers/bluetooth/btintel_pcie.h | 3 --- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 0a759ea26fd38f..a1bfc76fa29b22 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -303,8 +303,9 @@ static int btintel_pcie_submit_rx(struct btintel_pcie_data *data) static int btintel_pcie_start_rx(struct btintel_pcie_data *data) { int i, ret; + struct rxq *rxq = &data->rxq; - for (i = 0; i < BTINTEL_PCIE_RX_MAX_QUEUE; i++) { + for (i = 0; i < rxq->count; i++) { ret = btintel_pcie_submit_rx(data); if (ret) return ret; diff --git a/drivers/bluetooth/btintel_pcie.h b/drivers/bluetooth/btintel_pcie.h index 873178019cad09..3f59138dfcca41 100644 --- a/drivers/bluetooth/btintel_pcie.h +++ b/drivers/bluetooth/btintel_pcie.h @@ -158,9 +158,6 @@ enum { /* Doorbell vector for TFD */ #define BTINTEL_PCIE_TX_DB_VEC 0 -/* Number of pending RX requests for downlink */ -#define BTINTEL_PCIE_RX_MAX_QUEUE 6 - /* Doorbell vector for FRBD */ #define BTINTEL_PCIE_RX_DB_VEC 513 From bdc06cb53d47d74ee0853cc017b02e707aaa881e Mon Sep 17 00:00:00 2001 From: Chandrashekar Devegowda Date: Tue, 3 Jun 2025 15:34:39 +0530 Subject: [PATCH 2721/2924] Bluetooth: btintel_pcie: Increase the tx and rx descriptor count [ Upstream commit 2dd711102ce69ae41f65d09c012441227d4aa983 ] This change addresses latency issues observed in HID use cases where events arrive in bursts. By increasing the Rx descriptor count to 64, the firmware can handle bursty data more effectively, reducing latency and preventing buffer overflows. Signed-off-by: Chandrashekar Devegowda Signed-off-by: Kiran K Fixes: c2b636b3f788 ("Bluetooth: btintel_pcie: Add support for PCIe transport") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- drivers/bluetooth/btintel_pcie.c | 24 ++++++++++++------------ drivers/bluetooth/btintel_pcie.h | 7 +++++-- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index a1bfc76fa29b22..a4883523ccc9ed 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -1665,8 +1665,8 @@ static int btintel_pcie_alloc(struct btintel_pcie_data *data) * + size of index * Number of queues(2) * type of index array(4) * + size of context information */ - total = (sizeof(struct tfd) + sizeof(struct urbd0) + sizeof(struct frbd) - + sizeof(struct urbd1)) * BTINTEL_DESCS_COUNT; + total = (sizeof(struct tfd) + sizeof(struct urbd0)) * BTINTEL_PCIE_TX_DESCS_COUNT; + total += (sizeof(struct frbd) + sizeof(struct urbd1)) * BTINTEL_PCIE_RX_DESCS_COUNT; /* Add the sum of size of index array and size of ci struct */ total += (sizeof(u16) * BTINTEL_PCIE_NUM_QUEUES * 4) + sizeof(struct ctx_info); @@ -1691,36 +1691,36 @@ static int btintel_pcie_alloc(struct btintel_pcie_data *data) data->dma_v_addr = v_addr; /* Setup descriptor count */ - data->txq.count = BTINTEL_DESCS_COUNT; - data->rxq.count = BTINTEL_DESCS_COUNT; + data->txq.count = BTINTEL_PCIE_TX_DESCS_COUNT; + data->rxq.count = BTINTEL_PCIE_RX_DESCS_COUNT; /* Setup tfds */ data->txq.tfds_p_addr = p_addr; data->txq.tfds = v_addr; - p_addr += (sizeof(struct tfd) * BTINTEL_DESCS_COUNT); - v_addr += (sizeof(struct tfd) * BTINTEL_DESCS_COUNT); + p_addr += (sizeof(struct tfd) * BTINTEL_PCIE_TX_DESCS_COUNT); + v_addr += (sizeof(struct tfd) * BTINTEL_PCIE_TX_DESCS_COUNT); /* Setup urbd0 */ data->txq.urbd0s_p_addr = p_addr; data->txq.urbd0s = v_addr; - p_addr += (sizeof(struct urbd0) * BTINTEL_DESCS_COUNT); - v_addr += (sizeof(struct urbd0) * BTINTEL_DESCS_COUNT); + p_addr += (sizeof(struct urbd0) * BTINTEL_PCIE_TX_DESCS_COUNT); + v_addr += (sizeof(struct urbd0) * BTINTEL_PCIE_TX_DESCS_COUNT); /* Setup FRBD*/ data->rxq.frbds_p_addr = p_addr; data->rxq.frbds = v_addr; - p_addr += (sizeof(struct frbd) * BTINTEL_DESCS_COUNT); - v_addr += (sizeof(struct frbd) * BTINTEL_DESCS_COUNT); + p_addr += (sizeof(struct frbd) * BTINTEL_PCIE_RX_DESCS_COUNT); + v_addr += (sizeof(struct frbd) * BTINTEL_PCIE_RX_DESCS_COUNT); /* Setup urbd1 */ data->rxq.urbd1s_p_addr = p_addr; data->rxq.urbd1s = v_addr; - p_addr += (sizeof(struct urbd1) * BTINTEL_DESCS_COUNT); - v_addr += (sizeof(struct urbd1) * BTINTEL_DESCS_COUNT); + p_addr += (sizeof(struct urbd1) * BTINTEL_PCIE_RX_DESCS_COUNT); + v_addr += (sizeof(struct urbd1) * BTINTEL_PCIE_RX_DESCS_COUNT); /* Setup data buffers for txq */ err = btintel_pcie_setup_txq_bufs(data, &data->txq); diff --git a/drivers/bluetooth/btintel_pcie.h b/drivers/bluetooth/btintel_pcie.h index 3f59138dfcca41..a94910ccd5d3c2 100644 --- a/drivers/bluetooth/btintel_pcie.h +++ b/drivers/bluetooth/btintel_pcie.h @@ -135,8 +135,11 @@ enum btintel_pcie_tlv_type { /* Default interrupt timeout in msec */ #define BTINTEL_DEFAULT_INTR_TIMEOUT_MS 3000 -/* The number of descriptors in TX/RX queues */ -#define BTINTEL_DESCS_COUNT 16 +/* The number of descriptors in TX queues */ +#define BTINTEL_PCIE_TX_DESCS_COUNT 32 + +/* The number of descriptors in RX queues */ +#define BTINTEL_PCIE_RX_DESCS_COUNT 64 /* Number of Queue for TX and RX * It indicates the index of the IA(Index Array) From 4b372e5414f49020aee85f973dae0068a9d9d9f6 Mon Sep 17 00:00:00 2001 From: Chandrashekar Devegowda Date: Tue, 3 Jun 2025 15:34:40 +0530 Subject: [PATCH 2722/2924] Bluetooth: btintel_pcie: Reduce driver buffer posting to prevent race condition [ Upstream commit bf2ffc4d14db29cab781549912d2dc69127f4d3e ] Modify the driver to post 3 fewer buffers than the maximum rx buffers (64) allowed for the firmware. This change mitigates a hardware issue causing a race condition in the firmware, improving stability and data handling. Signed-off-by: Chandrashekar Devegowda Signed-off-by: Kiran K Fixes: c2b636b3f788 ("Bluetooth: btintel_pcie: Add support for PCIe transport") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- drivers/bluetooth/btintel_pcie.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index a4883523ccc9ed..385e29367dd1df 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -305,7 +305,11 @@ static int btintel_pcie_start_rx(struct btintel_pcie_data *data) int i, ret; struct rxq *rxq = &data->rxq; - for (i = 0; i < rxq->count; i++) { + /* Post (BTINTEL_PCIE_RX_DESCS_COUNT - 3) buffers to overcome the + * hardware issues leading to race condition at the firmware. + */ + + for (i = 0; i < rxq->count - 3; i++) { ret = btintel_pcie_submit_rx(data); if (ret) return ret; From 32aa2fbe319f33b0318ec6f4fceb63879771a286 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 3 Jun 2025 16:12:39 -0400 Subject: [PATCH 2723/2924] Bluetooth: MGMT: Fix UAF on mgmt_remove_adv_monitor_complete [ Upstream commit e6ed54e86aae9e4f7286ce8d5c73780f91b48d1c ] This reworks MGMT_OP_REMOVE_ADV_MONITOR to not use mgmt_pending_add to avoid crashes like bellow: ================================================================== BUG: KASAN: slab-use-after-free in mgmt_remove_adv_monitor_complete+0xe5/0x540 net/bluetooth/mgmt.c:5406 Read of size 8 at addr ffff88801c53f318 by task kworker/u5:5/5341 CPU: 0 UID: 0 PID: 5341 Comm: kworker/u5:5 Not tainted 6.15.0-syzkaller-10402-g4cb6c8af8591 #0 PREEMPT(full) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 Workqueue: hci0 hci_cmd_sync_work Call Trace: dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:408 [inline] print_report+0xd2/0x2b0 mm/kasan/report.c:521 kasan_report+0x118/0x150 mm/kasan/report.c:634 mgmt_remove_adv_monitor_complete+0xe5/0x540 net/bluetooth/mgmt.c:5406 hci_cmd_sync_work+0x261/0x3a0 net/bluetooth/hci_sync.c:334 process_one_work kernel/workqueue.c:3238 [inline] process_scheduled_works+0xade/0x17b0 kernel/workqueue.c:3321 worker_thread+0x8a0/0xda0 kernel/workqueue.c:3402 kthread+0x711/0x8a0 kernel/kthread.c:464 ret_from_fork+0x3fc/0x770 arch/x86/kernel/process.c:148 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245 Allocated by task 5987: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3e/0x80 mm/kasan/common.c:68 poison_kmalloc_redzone mm/kasan/common.c:377 [inline] __kasan_kmalloc+0x93/0xb0 mm/kasan/common.c:394 kasan_kmalloc include/linux/kasan.h:260 [inline] __kmalloc_cache_noprof+0x230/0x3d0 mm/slub.c:4358 kmalloc_noprof include/linux/slab.h:905 [inline] kzalloc_noprof include/linux/slab.h:1039 [inline] mgmt_pending_new+0x65/0x240 net/bluetooth/mgmt_util.c:252 mgmt_pending_add+0x34/0x120 net/bluetooth/mgmt_util.c:279 remove_adv_monitor+0x103/0x1b0 net/bluetooth/mgmt.c:5454 hci_mgmt_cmd+0x9c9/0xef0 net/bluetooth/hci_sock.c:1719 hci_sock_sendmsg+0x6ca/0xef0 net/bluetooth/hci_sock.c:1839 sock_sendmsg_nosec net/socket.c:712 [inline] __sock_sendmsg+0x219/0x270 net/socket.c:727 sock_write_iter+0x258/0x330 net/socket.c:1131 new_sync_write fs/read_write.c:593 [inline] vfs_write+0x548/0xa90 fs/read_write.c:686 ksys_write+0x145/0x250 fs/read_write.c:738 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f Freed by task 5989: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3e/0x80 mm/kasan/common.c:68 kasan_save_free_info+0x46/0x50 mm/kasan/generic.c:576 poison_slab_object mm/kasan/common.c:247 [inline] __kasan_slab_free+0x62/0x70 mm/kasan/common.c:264 kasan_slab_free include/linux/kasan.h:233 [inline] slab_free_hook mm/slub.c:2380 [inline] slab_free mm/slub.c:4642 [inline] kfree+0x18e/0x440 mm/slub.c:4841 mgmt_pending_foreach+0xc9/0x120 net/bluetooth/mgmt_util.c:242 mgmt_index_removed+0x10d/0x2f0 net/bluetooth/mgmt.c:9366 hci_sock_bind+0xbe9/0x1000 net/bluetooth/hci_sock.c:1314 __sys_bind_socket net/socket.c:1810 [inline] __sys_bind+0x2c3/0x3e0 net/socket.c:1841 __do_sys_bind net/socket.c:1846 [inline] __se_sys_bind net/socket.c:1844 [inline] __x64_sys_bind+0x7a/0x90 net/socket.c:1844 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: 66bd095ab5d4 ("Bluetooth: advmon offload MSFT remove monitor") Closes: https://syzkaller.appspot.com/bug?extid=feb0dc579bbe30a13190 Reported-by: syzbot+feb0dc579bbe30a13190@syzkaller.appspotmail.com Tested-by: syzbot+feb0dc579bbe30a13190@syzkaller.appspotmail.com Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- include/net/bluetooth/hci_core.h | 1 - net/bluetooth/hci_core.c | 4 +--- net/bluetooth/mgmt.c | 37 ++++++++++---------------------- 3 files changed, 12 insertions(+), 30 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index fc66d9b61198e6..1743bde8528b24 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -2397,7 +2397,6 @@ void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance); void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev, u8 instance); -void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle); int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip); void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle, bdaddr_t *bdaddr, u8 addr_type); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 67c7e0656ff71e..7317efb6306ec2 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1877,10 +1877,8 @@ void hci_free_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor) if (monitor->handle) idr_remove(&hdev->adv_monitors_idr, monitor->handle); - if (monitor->state != ADV_MONITOR_STATE_NOT_REGISTERED) { + if (monitor->state != ADV_MONITOR_STATE_NOT_REGISTERED) hdev->adv_monitors_cnt--; - mgmt_adv_monitor_removed(hdev, monitor->handle); - } kfree(monitor); } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 14a9462fced5e9..feaeec2423ae85 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5108,24 +5108,14 @@ static void mgmt_adv_monitor_added(struct sock *sk, struct hci_dev *hdev, mgmt_event(MGMT_EV_ADV_MONITOR_ADDED, hdev, &ev, sizeof(ev), sk); } -void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle) +static void mgmt_adv_monitor_removed(struct sock *sk, struct hci_dev *hdev, + u16 handle) { struct mgmt_ev_adv_monitor_removed ev; - struct mgmt_pending_cmd *cmd; - struct sock *sk_skip = NULL; - struct mgmt_cp_remove_adv_monitor *cp; - - cmd = pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev); - if (cmd) { - cp = cmd->param; - - if (cp->monitor_handle) - sk_skip = cmd->sk; - } ev.monitor_handle = cpu_to_le16(handle); - mgmt_event(MGMT_EV_ADV_MONITOR_REMOVED, hdev, &ev, sizeof(ev), sk_skip); + mgmt_event(MGMT_EV_ADV_MONITOR_REMOVED, hdev, &ev, sizeof(ev), sk); } static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev, @@ -5227,8 +5217,7 @@ static int __add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev, if (pending_find(MGMT_OP_SET_LE, hdev) || pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR, hdev) || - pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev) || - pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev)) { + pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev)) { status = MGMT_STATUS_BUSY; goto unlock; } @@ -5398,8 +5387,7 @@ static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, struct mgmt_pending_cmd *cmd = data; struct mgmt_cp_remove_adv_monitor *cp; - if (status == -ECANCELED || - cmd != pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev)) + if (status == -ECANCELED) return; hci_dev_lock(hdev); @@ -5408,12 +5396,14 @@ static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, rp.monitor_handle = cp->monitor_handle; - if (!status) + if (!status) { + mgmt_adv_monitor_removed(cmd->sk, hdev, cp->monitor_handle); hci_update_passive_scan(hdev); + } mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(status), &rp, sizeof(rp)); - mgmt_pending_remove(cmd); + mgmt_pending_free(cmd); hci_dev_unlock(hdev); bt_dev_dbg(hdev, "remove monitor %d complete, status %d", @@ -5423,10 +5413,6 @@ static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, static int mgmt_remove_adv_monitor_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; - - if (cmd != pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev)) - return -ECANCELED; - struct mgmt_cp_remove_adv_monitor *cp = cmd->param; u16 handle = __le16_to_cpu(cp->monitor_handle); @@ -5445,14 +5431,13 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev, hci_dev_lock(hdev); if (pending_find(MGMT_OP_SET_LE, hdev) || - pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev) || pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR, hdev) || pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev)) { status = MGMT_STATUS_BUSY; goto unlock; } - cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_ADV_MONITOR, hdev, data, len); + cmd = mgmt_pending_new(sk, MGMT_OP_REMOVE_ADV_MONITOR, hdev, data, len); if (!cmd) { status = MGMT_STATUS_NO_RESOURCES; goto unlock; @@ -5462,7 +5447,7 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev, mgmt_remove_adv_monitor_complete); if (err) { - mgmt_pending_remove(cmd); + mgmt_pending_free(cmd); if (err == -ENOMEM) status = MGMT_STATUS_NO_RESOURCES; From d7882db79135c829a922daf3571f33ea1e056ae3 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 20 May 2025 15:42:21 -0400 Subject: [PATCH 2724/2924] Bluetooth: MGMT: Protect mgmt_pending list with its own lock [ Upstream commit 6fe26f694c824b8a4dbf50c635bee1302e3f099c ] This uses a mutex to protect from concurrent access of mgmt_pending list which can cause crashes like: ================================================================== BUG: KASAN: slab-use-after-free in hci_sock_get_channel+0x60/0x68 net/bluetooth/hci_sock.c:91 Read of size 2 at addr ffff0000c48885b2 by task syz.4.334/7318 CPU: 0 UID: 0 PID: 7318 Comm: syz.4.334 Not tainted 6.15.0-rc7-syzkaller-g187899f4124a #0 PREEMPT Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 Call trace: show_stack+0x2c/0x3c arch/arm64/kernel/stacktrace.c:466 (C) __dump_stack+0x30/0x40 lib/dump_stack.c:94 dump_stack_lvl+0xd8/0x12c lib/dump_stack.c:120 print_address_description+0xa8/0x254 mm/kasan/report.c:408 print_report+0x68/0x84 mm/kasan/report.c:521 kasan_report+0xb0/0x110 mm/kasan/report.c:634 __asan_report_load2_noabort+0x20/0x2c mm/kasan/report_generic.c:379 hci_sock_get_channel+0x60/0x68 net/bluetooth/hci_sock.c:91 mgmt_pending_find+0x7c/0x140 net/bluetooth/mgmt_util.c:223 pending_find net/bluetooth/mgmt.c:947 [inline] remove_adv_monitor+0x44/0x1a4 net/bluetooth/mgmt.c:5445 hci_mgmt_cmd+0x780/0xc00 net/bluetooth/hci_sock.c:1712 hci_sock_sendmsg+0x544/0xbb0 net/bluetooth/hci_sock.c:1832 sock_sendmsg_nosec net/socket.c:712 [inline] __sock_sendmsg net/socket.c:727 [inline] sock_write_iter+0x25c/0x378 net/socket.c:1131 new_sync_write fs/read_write.c:591 [inline] vfs_write+0x62c/0x97c fs/read_write.c:684 ksys_write+0x120/0x210 fs/read_write.c:736 __do_sys_write fs/read_write.c:747 [inline] __se_sys_write fs/read_write.c:744 [inline] __arm64_sys_write+0x7c/0x90 fs/read_write.c:744 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:49 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:132 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:151 el0_svc+0x58/0x17c arch/arm64/kernel/entry-common.c:767 el0t_64_sync_handler+0x78/0x108 arch/arm64/kernel/entry-common.c:786 el0t_64_sync+0x198/0x19c arch/arm64/kernel/entry.S:600 Allocated by task 7037: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x40/0x78 mm/kasan/common.c:68 kasan_save_alloc_info+0x44/0x54 mm/kasan/generic.c:562 poison_kmalloc_redzone mm/kasan/common.c:377 [inline] __kasan_kmalloc+0x9c/0xb4 mm/kasan/common.c:394 kasan_kmalloc include/linux/kasan.h:260 [inline] __do_kmalloc_node mm/slub.c:4327 [inline] __kmalloc_noprof+0x2fc/0x4c8 mm/slub.c:4339 kmalloc_noprof include/linux/slab.h:909 [inline] sk_prot_alloc+0xc4/0x1f0 net/core/sock.c:2198 sk_alloc+0x44/0x3ac net/core/sock.c:2254 bt_sock_alloc+0x4c/0x300 net/bluetooth/af_bluetooth.c:148 hci_sock_create+0xa8/0x194 net/bluetooth/hci_sock.c:2202 bt_sock_create+0x14c/0x24c net/bluetooth/af_bluetooth.c:132 __sock_create+0x43c/0x91c net/socket.c:1541 sock_create net/socket.c:1599 [inline] __sys_socket_create net/socket.c:1636 [inline] __sys_socket+0xd4/0x1c0 net/socket.c:1683 __do_sys_socket net/socket.c:1697 [inline] __se_sys_socket net/socket.c:1695 [inline] __arm64_sys_socket+0x7c/0x94 net/socket.c:1695 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:49 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:132 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:151 el0_svc+0x58/0x17c arch/arm64/kernel/entry-common.c:767 el0t_64_sync_handler+0x78/0x108 arch/arm64/kernel/entry-common.c:786 el0t_64_sync+0x198/0x19c arch/arm64/kernel/entry.S:600 Freed by task 6607: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x40/0x78 mm/kasan/common.c:68 kasan_save_free_info+0x58/0x70 mm/kasan/generic.c:576 poison_slab_object mm/kasan/common.c:247 [inline] __kasan_slab_free+0x68/0x88 mm/kasan/common.c:264 kasan_slab_free include/linux/kasan.h:233 [inline] slab_free_hook mm/slub.c:2380 [inline] slab_free mm/slub.c:4642 [inline] kfree+0x17c/0x474 mm/slub.c:4841 sk_prot_free net/core/sock.c:2237 [inline] __sk_destruct+0x4f4/0x760 net/core/sock.c:2332 sk_destruct net/core/sock.c:2360 [inline] __sk_free+0x320/0x430 net/core/sock.c:2371 sk_free+0x60/0xc8 net/core/sock.c:2382 sock_put include/net/sock.h:1944 [inline] mgmt_pending_free+0x88/0x118 net/bluetooth/mgmt_util.c:290 mgmt_pending_remove+0xec/0x104 net/bluetooth/mgmt_util.c:298 mgmt_set_powered_complete+0x418/0x5cc net/bluetooth/mgmt.c:1355 hci_cmd_sync_work+0x204/0x33c net/bluetooth/hci_sync.c:334 process_one_work+0x7e8/0x156c kernel/workqueue.c:3238 process_scheduled_works kernel/workqueue.c:3319 [inline] worker_thread+0x958/0xed8 kernel/workqueue.c:3400 kthread+0x5fc/0x75c kernel/kthread.c:464 ret_from_fork+0x10/0x20 arch/arm64/kernel/entry.S:847 Fixes: a380b6cff1a2 ("Bluetooth: Add generic mgmt helper API") Closes: https://syzkaller.appspot.com/bug?extid=0a7039d5d9986ff4ecec Closes: https://syzkaller.appspot.com/bug?extid=cc0cc52e7f43dc9e6df1 Reported-by: syzbot+0a7039d5d9986ff4ecec@syzkaller.appspotmail.com Tested-by: syzbot+0a7039d5d9986ff4ecec@syzkaller.appspotmail.com Tested-by: syzbot+cc0cc52e7f43dc9e6df1@syzkaller.appspotmail.com Signed-off-by: Dmitry Antipov Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_core.c | 1 + net/bluetooth/mgmt.c | 101 +++++++++++++++---------------- net/bluetooth/mgmt_util.c | 32 ++++++++-- net/bluetooth/mgmt_util.h | 4 +- 5 files changed, 80 insertions(+), 59 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 1743bde8528b24..d15316bffd70bb 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -545,6 +545,7 @@ struct hci_dev { struct hci_conn_hash conn_hash; struct list_head mesh_pending; + struct mutex mgmt_pending_lock; struct list_head mgmt_pending; struct list_head reject_list; struct list_head accept_list; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 7317efb6306ec2..af30a420bab75a 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2485,6 +2485,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv) mutex_init(&hdev->lock); mutex_init(&hdev->req_lock); + mutex_init(&hdev->mgmt_pending_lock); ida_init(&hdev->unset_handle_ida); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index feaeec2423ae85..de7adb9a47f972 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1447,22 +1447,17 @@ static void settings_rsp(struct mgmt_pending_cmd *cmd, void *data) send_settings_rsp(cmd->sk, cmd->opcode, match->hdev); - list_del(&cmd->list); - if (match->sk == NULL) { match->sk = cmd->sk; sock_hold(match->sk); } - - mgmt_pending_free(cmd); } static void cmd_status_rsp(struct mgmt_pending_cmd *cmd, void *data) { u8 *status = data; - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, *status); - mgmt_pending_remove(cmd); + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, *status); } static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data) @@ -1476,8 +1471,6 @@ static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data) if (cmd->cmd_complete) { cmd->cmd_complete(cmd, match->mgmt_status); - mgmt_pending_remove(cmd); - return; } @@ -1486,13 +1479,13 @@ static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data) static int generic_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) { - return mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, + return mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, status, cmd->param, cmd->param_len); } static int addr_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) { - return mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, + return mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, status, cmd->param, sizeof(struct mgmt_addr_info)); } @@ -1532,7 +1525,7 @@ static void mgmt_set_discoverable_complete(struct hci_dev *hdev, void *data, if (err) { u8 mgmt_err = mgmt_status(err); - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err); hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); goto done; } @@ -1707,7 +1700,7 @@ static void mgmt_set_connectable_complete(struct hci_dev *hdev, void *data, if (err) { u8 mgmt_err = mgmt_status(err); - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err); goto done; } @@ -1943,8 +1936,8 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err) new_settings(hdev, NULL); } - mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, cmd_status_rsp, - &mgmt_err); + mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, true, + cmd_status_rsp, &mgmt_err); return; } @@ -1954,7 +1947,7 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err) changed = hci_dev_test_and_clear_flag(hdev, HCI_SSP_ENABLED); } - mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, settings_rsp, &match); + mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, true, settings_rsp, &match); if (changed) new_settings(hdev, match.sk); @@ -2074,12 +2067,12 @@ static void set_le_complete(struct hci_dev *hdev, void *data, int err) bt_dev_dbg(hdev, "err %d", err); if (status) { - mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, cmd_status_rsp, - &status); + mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, true, cmd_status_rsp, + &status); return; } - mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, settings_rsp, &match); + mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, true, settings_rsp, &match); new_settings(hdev, match.sk); @@ -2138,7 +2131,7 @@ static void set_mesh_complete(struct hci_dev *hdev, void *data, int err) struct sock *sk = cmd->sk; if (status) { - mgmt_pending_foreach(MGMT_OP_SET_MESH_RECEIVER, hdev, + mgmt_pending_foreach(MGMT_OP_SET_MESH_RECEIVER, hdev, true, cmd_status_rsp, &status); return; } @@ -2638,7 +2631,7 @@ static void mgmt_class_complete(struct hci_dev *hdev, void *data, int err) bt_dev_dbg(hdev, "err %d", err); - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), hdev->dev_class, 3); mgmt_pending_free(cmd); @@ -3427,7 +3420,7 @@ static int pairing_complete(struct mgmt_pending_cmd *cmd, u8 status) bacpy(&rp.addr.bdaddr, &conn->dst); rp.addr.type = link_to_bdaddr(conn->type, conn->dst_type); - err = mgmt_cmd_complete(cmd->sk, cmd->index, MGMT_OP_PAIR_DEVICE, + err = mgmt_cmd_complete(cmd->sk, cmd->hdev->id, MGMT_OP_PAIR_DEVICE, status, &rp, sizeof(rp)); /* So we don't get further callbacks for this connection */ @@ -5186,7 +5179,7 @@ static void mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, hci_update_passive_scan(hdev); } - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(status), &rp, sizeof(rp)); mgmt_pending_remove(cmd); @@ -5401,7 +5394,7 @@ static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, hci_update_passive_scan(hdev); } - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(status), &rp, sizeof(rp)); mgmt_pending_free(cmd); @@ -5777,7 +5770,7 @@ static void start_discovery_complete(struct hci_dev *hdev, void *data, int err) cmd != pending_find(MGMT_OP_START_SERVICE_DISCOVERY, hdev)) return; - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err), + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), cmd->param, 1); mgmt_pending_remove(cmd); @@ -5998,7 +5991,7 @@ static void stop_discovery_complete(struct hci_dev *hdev, void *data, int err) bt_dev_dbg(hdev, "err %d", err); - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err), + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), cmd->param, 1); mgmt_pending_remove(cmd); @@ -6223,7 +6216,7 @@ static void set_advertising_complete(struct hci_dev *hdev, void *data, int err) u8 status = mgmt_status(err); if (status) { - mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, + mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, true, cmd_status_rsp, &status); return; } @@ -6233,7 +6226,7 @@ static void set_advertising_complete(struct hci_dev *hdev, void *data, int err) else hci_dev_clear_flag(hdev, HCI_ADVERTISING); - mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, settings_rsp, + mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, true, settings_rsp, &match); new_settings(hdev, match.sk); @@ -6577,7 +6570,7 @@ static void set_bredr_complete(struct hci_dev *hdev, void *data, int err) */ hci_dev_clear_flag(hdev, HCI_BREDR_ENABLED); - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err); } else { send_settings_rsp(cmd->sk, MGMT_OP_SET_BREDR, hdev); new_settings(hdev, cmd->sk); @@ -6714,7 +6707,7 @@ static void set_secure_conn_complete(struct hci_dev *hdev, void *data, int err) if (err) { u8 mgmt_err = mgmt_status(err); - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err); goto done; } @@ -7161,7 +7154,7 @@ static void get_conn_info_complete(struct hci_dev *hdev, void *data, int err) rp.max_tx_power = HCI_TX_POWER_INVALID; } - mgmt_cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, status, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, MGMT_OP_GET_CONN_INFO, status, &rp, sizeof(rp)); mgmt_pending_free(cmd); @@ -7321,7 +7314,7 @@ static void get_clock_info_complete(struct hci_dev *hdev, void *data, int err) } complete: - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, &rp, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, status, &rp, sizeof(rp)); mgmt_pending_free(cmd); @@ -8571,10 +8564,10 @@ static void add_advertising_complete(struct hci_dev *hdev, void *data, int err) rp.instance = cp->instance; if (err) - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err)); else - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), &rp, sizeof(rp)); add_adv_complete(hdev, cmd->sk, cp->instance, err); @@ -8762,10 +8755,10 @@ static void add_ext_adv_params_complete(struct hci_dev *hdev, void *data, hci_remove_adv_instance(hdev, cp->instance); - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err)); } else { - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), &rp, sizeof(rp)); } @@ -8912,10 +8905,10 @@ static void add_ext_adv_data_complete(struct hci_dev *hdev, void *data, int err) rp.instance = cp->instance; if (err) - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err)); else - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), &rp, sizeof(rp)); mgmt_pending_free(cmd); @@ -9074,10 +9067,10 @@ static void remove_advertising_complete(struct hci_dev *hdev, void *data, rp.instance = cp->instance; if (err) - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err)); else - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); mgmt_pending_free(cmd); @@ -9349,7 +9342,7 @@ void mgmt_index_removed(struct hci_dev *hdev) if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) return; - mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &match); + mgmt_pending_foreach(0, hdev, true, cmd_complete_rsp, &match); if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, NULL, 0, @@ -9387,7 +9380,8 @@ void mgmt_power_on(struct hci_dev *hdev, int err) hci_update_passive_scan(hdev); } - mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); + mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, true, settings_rsp, + &match); new_settings(hdev, match.sk); @@ -9402,7 +9396,8 @@ void __mgmt_power_off(struct hci_dev *hdev) struct cmd_lookup match = { NULL, hdev }; u8 zero_cod[] = { 0, 0, 0 }; - mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); + mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, true, settings_rsp, + &match); /* If the power off is because of hdev unregistration let * use the appropriate INVALID_INDEX status. Otherwise use @@ -9416,7 +9411,7 @@ void __mgmt_power_off(struct hci_dev *hdev) else match.mgmt_status = MGMT_STATUS_NOT_POWERED; - mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &match); + mgmt_pending_foreach(0, hdev, true, cmd_complete_rsp, &match); if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) { mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, @@ -9657,7 +9652,6 @@ static void unpair_device_rsp(struct mgmt_pending_cmd *cmd, void *data) device_unpaired(hdev, &cp->addr.bdaddr, cp->addr.type, cmd->sk); cmd->cmd_complete(cmd, 0); - mgmt_pending_remove(cmd); } bool mgmt_powering_down(struct hci_dev *hdev) @@ -9713,8 +9707,8 @@ void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, struct mgmt_cp_disconnect *cp; struct mgmt_pending_cmd *cmd; - mgmt_pending_foreach(MGMT_OP_UNPAIR_DEVICE, hdev, unpair_device_rsp, - hdev); + mgmt_pending_foreach(MGMT_OP_UNPAIR_DEVICE, hdev, true, + unpair_device_rsp, hdev); cmd = pending_find(MGMT_OP_DISCONNECT, hdev); if (!cmd) @@ -9907,7 +9901,7 @@ void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status) if (status) { u8 mgmt_err = mgmt_status(status); - mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, + mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, true, cmd_status_rsp, &mgmt_err); return; } @@ -9917,8 +9911,8 @@ void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status) else changed = hci_dev_test_and_clear_flag(hdev, HCI_LINK_SECURITY); - mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, settings_rsp, - &match); + mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, true, + settings_rsp, &match); if (changed) new_settings(hdev, match.sk); @@ -9942,9 +9936,12 @@ void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, { struct cmd_lookup match = { NULL, hdev, mgmt_status(status) }; - mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, sk_lookup, &match); - mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, sk_lookup, &match); - mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match); + mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, false, sk_lookup, + &match); + mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, false, sk_lookup, + &match); + mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, false, sk_lookup, + &match); if (!status) { mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class, diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c index 3713ff490c65d5..a88a07da394734 100644 --- a/net/bluetooth/mgmt_util.c +++ b/net/bluetooth/mgmt_util.c @@ -217,30 +217,47 @@ int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, struct mgmt_pending_cmd *mgmt_pending_find(unsigned short channel, u16 opcode, struct hci_dev *hdev) { - struct mgmt_pending_cmd *cmd; + struct mgmt_pending_cmd *cmd, *tmp; + + mutex_lock(&hdev->mgmt_pending_lock); - list_for_each_entry(cmd, &hdev->mgmt_pending, list) { + list_for_each_entry_safe(cmd, tmp, &hdev->mgmt_pending, list) { if (hci_sock_get_channel(cmd->sk) != channel) continue; - if (cmd->opcode == opcode) + + if (cmd->opcode == opcode) { + mutex_unlock(&hdev->mgmt_pending_lock); return cmd; + } } + mutex_unlock(&hdev->mgmt_pending_lock); + return NULL; } -void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, +void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, bool remove, void (*cb)(struct mgmt_pending_cmd *cmd, void *data), void *data) { struct mgmt_pending_cmd *cmd, *tmp; + mutex_lock(&hdev->mgmt_pending_lock); + list_for_each_entry_safe(cmd, tmp, &hdev->mgmt_pending, list) { if (opcode > 0 && cmd->opcode != opcode) continue; + if (remove) + list_del(&cmd->list); + cb(cmd, data); + + if (remove) + mgmt_pending_free(cmd); } + + mutex_unlock(&hdev->mgmt_pending_lock); } struct mgmt_pending_cmd *mgmt_pending_new(struct sock *sk, u16 opcode, @@ -254,7 +271,7 @@ struct mgmt_pending_cmd *mgmt_pending_new(struct sock *sk, u16 opcode, return NULL; cmd->opcode = opcode; - cmd->index = hdev->id; + cmd->hdev = hdev; cmd->param = kmemdup(data, len, GFP_KERNEL); if (!cmd->param) { @@ -280,7 +297,9 @@ struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode, if (!cmd) return NULL; + mutex_lock(&hdev->mgmt_pending_lock); list_add_tail(&cmd->list, &hdev->mgmt_pending); + mutex_unlock(&hdev->mgmt_pending_lock); return cmd; } @@ -294,7 +313,10 @@ void mgmt_pending_free(struct mgmt_pending_cmd *cmd) void mgmt_pending_remove(struct mgmt_pending_cmd *cmd) { + mutex_lock(&cmd->hdev->mgmt_pending_lock); list_del(&cmd->list); + mutex_unlock(&cmd->hdev->mgmt_pending_lock); + mgmt_pending_free(cmd); } diff --git a/net/bluetooth/mgmt_util.h b/net/bluetooth/mgmt_util.h index f2ba994ab1d847..024e51dd693756 100644 --- a/net/bluetooth/mgmt_util.h +++ b/net/bluetooth/mgmt_util.h @@ -33,7 +33,7 @@ struct mgmt_mesh_tx { struct mgmt_pending_cmd { struct list_head list; u16 opcode; - int index; + struct hci_dev *hdev; void *param; size_t param_len; struct sock *sk; @@ -54,7 +54,7 @@ int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, struct mgmt_pending_cmd *mgmt_pending_find(unsigned short channel, u16 opcode, struct hci_dev *hdev); -void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, +void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, bool remove, void (*cb)(struct mgmt_pending_cmd *cmd, void *data), void *data); struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode, From 1b6543cbf5e10a35ee3c17952ed1f066829964a6 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 2 Jun 2025 21:49:14 +0200 Subject: [PATCH 2725/2924] net: dsa: b53: fix untagged traffic sent via cpu tagged with VID 0 [ Upstream commit 692eb9f8a5b71d852e873375d20cf5da7a046ea6 ] When Linux sends out untagged traffic from a port, it will enter the CPU port without any VLAN tag, even if the port is a member of a vlan filtering bridge with a PVID egress untagged VLAN. This makes the CPU port's PVID take effect, and the PVID's VLAN table entry controls if the packet will be tagged on egress. Since commit 45e9d59d3950 ("net: dsa: b53: do not allow to configure VLAN 0") we remove bridged ports from VLAN 0 when joining or leaving a VLAN aware bridge. But we also clear the untagged bit, causing untagged traffic from the controller to become tagged with VID 0 (and priority 0). Fix this by not touching the untagged map of VLAN 0. Additionally, always keep the CPU port as a member, as the untag map is only effective as long as there is at least one member, and we would remove it when bridging all ports and leaving no standalone ports. Since Linux (and the switch) treats VLAN 0 tagged traffic like untagged, the actual impact of this is rather low, but this also prevented earlier detection of the issue. Fixes: 45e9d59d3950 ("net: dsa: b53: do not allow to configure VLAN 0") Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20250602194914.1011890-1-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/dsa/b53/b53_common.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 862bdccb743976..dc2f4adac9bc96 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2034,9 +2034,6 @@ int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, b53_get_vlan_entry(dev, pvid, vl); vl->members &= ~BIT(port); - if (vl->members == BIT(cpu_port)) - vl->members &= ~BIT(cpu_port); - vl->untag = vl->members; b53_set_vlan_entry(dev, pvid, vl); } @@ -2115,8 +2112,7 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) } b53_get_vlan_entry(dev, pvid, vl); - vl->members |= BIT(port) | BIT(cpu_port); - vl->untag |= BIT(port) | BIT(cpu_port); + vl->members |= BIT(port); b53_set_vlan_entry(dev, pvid, vl); } } From ef8fc007c28a30a4c0d90bf755e0f343d99bb392 Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Wed, 21 May 2025 01:07:17 +0900 Subject: [PATCH 2726/2924] ptp: remove ptp->n_vclocks check logic in ptp_vclock_in_use() [ Upstream commit 87f7ce260a3c838b49e1dc1ceedf1006795157a2 ] There is no disagreement that we should check both ptp->is_virtual_clock and ptp->n_vclocks to check if the ptp virtual clock is in use. However, when we acquire ptp->n_vclocks_mux to read ptp->n_vclocks in ptp_vclock_in_use(), we observe a recursive lock in the call trace starting from n_vclocks_store(). ============================================ WARNING: possible recursive locking detected 6.15.0-rc6 #1 Not tainted -------------------------------------------- syz.0.1540/13807 is trying to acquire lock: ffff888035a24868 (&ptp->n_vclocks_mux){+.+.}-{4:4}, at: ptp_vclock_in_use drivers/ptp/ptp_private.h:103 [inline] ffff888035a24868 (&ptp->n_vclocks_mux){+.+.}-{4:4}, at: ptp_clock_unregister+0x21/0x250 drivers/ptp/ptp_clock.c:415 but task is already holding lock: ffff888030704868 (&ptp->n_vclocks_mux){+.+.}-{4:4}, at: n_vclocks_store+0xf1/0x6d0 drivers/ptp/ptp_sysfs.c:215 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&ptp->n_vclocks_mux); lock(&ptp->n_vclocks_mux); *** DEADLOCK *** .... ============================================ The best way to solve this is to remove the logic that checks ptp->n_vclocks in ptp_vclock_in_use(). The reason why this is appropriate is that any path that uses ptp->n_vclocks must unconditionally check if ptp->n_vclocks is greater than 0 before unregistering vclocks, and all functions are already written this way. And in the function that uses ptp->n_vclocks, we already get ptp->n_vclocks_mux before unregistering vclocks. Therefore, we need to remove the redundant check for ptp->n_vclocks in ptp_vclock_in_use() to prevent recursive locking. Fixes: 73f37068d540 ("ptp: support ptp physical/virtual clocks conversion") Signed-off-by: Jeongjun Park Acked-by: Richard Cochran Link: https://patch.msgid.link/20250520160717.7350-1-aha310510@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/ptp/ptp_private.h | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index 18934e28469ee6..528d86a33f37de 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -98,17 +98,7 @@ static inline int queue_cnt(const struct timestamp_event_queue *q) /* Check if ptp virtual clock is in use */ static inline bool ptp_vclock_in_use(struct ptp_clock *ptp) { - bool in_use = false; - - if (mutex_lock_interruptible(&ptp->n_vclocks_mux)) - return true; - - if (!ptp->is_virtual_clock && ptp->n_vclocks) - in_use = true; - - mutex_unlock(&ptp->n_vclocks_mux); - - return in_use; + return !ptp->is_virtual_clock; } /* Check if ptp clock shall be free running */ From d255701eb5fc852ed3b29fcd191d2a56e002005a Mon Sep 17 00:00:00 2001 From: Caleb Connolly Date: Tue, 18 Mar 2025 20:50:27 +0000 Subject: [PATCH 2727/2924] ath10k: snoc: fix unbalanced IRQ enable in crash recovery [ Upstream commit 1650d32b92b01db03a1a95d69ee74fcbc34d4b00 ] In ath10k_snoc_hif_stop() we skip disabling the IRQs in the crash recovery flow, but we still unconditionally call enable again in ath10k_snoc_hif_start(). We can't check the ATH10K_FLAG_CRASH_FLUSH bit since it is cleared before hif_start() is called, so instead check the ATH10K_SNOC_FLAG_RECOVERY flag and skip enabling the IRQs during crash recovery. This fixes unbalanced IRQ enable splats that happen after recovering from a crash. Fixes: 0e622f67e041 ("ath10k: add support for WCN3990 firmware crash recovery") Signed-off-by: Caleb Connolly Tested-by: Loic Poulain Link: https://patch.msgid.link/20250318205043.1043148-1-caleb.connolly@linaro.org Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath10k/snoc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c index 866bad2db33487..65673b1aba55d2 100644 --- a/drivers/net/wireless/ath/ath10k/snoc.c +++ b/drivers/net/wireless/ath/ath10k/snoc.c @@ -937,7 +937,9 @@ static int ath10k_snoc_hif_start(struct ath10k *ar) dev_set_threaded(ar->napi_dev, true); ath10k_core_napi_enable(ar); - ath10k_snoc_irq_enable(ar); + /* IRQs are left enabled when we restart due to a firmware crash */ + if (!test_bit(ATH10K_SNOC_FLAG_RECOVERY, &ar_snoc->flags)) + ath10k_snoc_irq_enable(ar); ath10k_snoc_rx_post(ar); clear_bit(ATH10K_SNOC_FLAG_RECOVERY, &ar_snoc->flags); From 913c7c2448e5cc90cdac1262b5e23fe1107535fa Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 20 Feb 2025 16:24:42 +0800 Subject: [PATCH 2728/2924] wifi: ath11k: avoid burning CPU in ath11k_debugfs_fw_stats_request() [ Upstream commit 9f6e82d11bb9692a90d20b10f87345598945c803 ] We get report [1] that CPU is running a hot loop in ath11k_debugfs_fw_stats_request(): 94.60% 0.00% i3status [kernel.kallsyms] [k] do_syscall_64 | --94.60%--do_syscall_64 | --94.55%--__sys_sendmsg ___sys_sendmsg ____sys_sendmsg netlink_sendmsg netlink_unicast genl_rcv netlink_rcv_skb genl_rcv_msg | --94.55%--genl_family_rcv_msg_dumpit __netlink_dump_start netlink_dump genl_dumpit nl80211_dump_station | --94.55%--ieee80211_dump_station sta_set_sinfo | --94.55%--ath11k_mac_op_sta_statistics ath11k_debugfs_get_fw_stats | --94.55%--ath11k_debugfs_fw_stats_request | |--41.73%--_raw_spin_lock_bh | |--22.74%--__local_bh_enable_ip | |--9.22%--_raw_spin_unlock_bh | --6.66%--srso_alias_safe_ret This is because, if for whatever reason ar->fw_stats_done is not set by ath11k_update_stats_event(), ath11k_debugfs_fw_stats_request() won't yield CPU before an up to 3s timeout. Change to completion mechanism to avoid CPU burning. Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.37 Fixes: d5c65159f289 ("ath11k: driver for Qualcomm IEEE 802.11ax devices") Reported-by: Yury Vostrikov Closes: https://lore.kernel.org/all/7324ac7a-8b7a-42a5-aa19-de52138ff638@app.fastmail.com/ # [1] Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250220082448.31039-2-quic_bqiang@quicinc.com Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath11k/core.c | 1 + drivers/net/wireless/ath/ath11k/core.h | 2 +- drivers/net/wireless/ath/ath11k/debugfs.c | 38 +++++++++-------------- drivers/net/wireless/ath/ath11k/mac.c | 2 +- drivers/net/wireless/ath/ath11k/wmi.c | 2 +- 5 files changed, 18 insertions(+), 27 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c index 8d08dd47bde9c7..6c346a8df2df19 100644 --- a/drivers/net/wireless/ath/ath11k/core.c +++ b/drivers/net/wireless/ath/ath11k/core.c @@ -951,6 +951,7 @@ void ath11k_fw_stats_init(struct ath11k *ar) INIT_LIST_HEAD(&ar->fw_stats.bcn); init_completion(&ar->fw_stats_complete); + init_completion(&ar->fw_stats_done); } void ath11k_fw_stats_free(struct ath11k_fw_stats *stats) diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h index 1a3d0de4afde83..6abbb29034f07a 100644 --- a/drivers/net/wireless/ath/ath11k/core.h +++ b/drivers/net/wireless/ath/ath11k/core.h @@ -783,7 +783,7 @@ struct ath11k { u8 alpha2[REG_ALPHA2_LEN + 1]; struct ath11k_fw_stats fw_stats; struct completion fw_stats_complete; - bool fw_stats_done; + struct completion fw_stats_done; /* protected by conf_mutex */ bool ps_state_enable; diff --git a/drivers/net/wireless/ath/ath11k/debugfs.c b/drivers/net/wireless/ath/ath11k/debugfs.c index bf192529e3fe26..1d03e3aab011d5 100644 --- a/drivers/net/wireless/ath/ath11k/debugfs.c +++ b/drivers/net/wireless/ath/ath11k/debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2020 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -96,7 +96,6 @@ void ath11k_debugfs_add_dbring_entry(struct ath11k *ar, static void ath11k_debugfs_fw_stats_reset(struct ath11k *ar) { spin_lock_bh(&ar->data_lock); - ar->fw_stats_done = false; ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); ath11k_fw_stats_vdevs_free(&ar->fw_stats.vdevs); spin_unlock_bh(&ar->data_lock); @@ -114,7 +113,7 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * /* WMI_REQUEST_PDEV_STAT request has been already processed */ if (stats->stats_id == WMI_REQUEST_RSSI_PER_CHAIN_STAT) { - ar->fw_stats_done = true; + complete(&ar->fw_stats_done); return; } @@ -138,7 +137,7 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * &ar->fw_stats.vdevs); if (is_end) { - ar->fw_stats_done = true; + complete(&ar->fw_stats_done); num_vdev = 0; } return; @@ -158,7 +157,7 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * &ar->fw_stats.bcn); if (is_end) { - ar->fw_stats_done = true; + complete(&ar->fw_stats_done); num_bcn = 0; } } @@ -168,21 +167,15 @@ static int ath11k_debugfs_fw_stats_request(struct ath11k *ar, struct stats_request_params *req_param) { struct ath11k_base *ab = ar->ab; - unsigned long timeout, time_left; + unsigned long time_left; int ret; lockdep_assert_held(&ar->conf_mutex); - /* FW stats can get split when exceeding the stats data buffer limit. - * In that case, since there is no end marking for the back-to-back - * received 'update stats' event, we keep a 3 seconds timeout in case, - * fw_stats_done is not marked yet - */ - timeout = jiffies + secs_to_jiffies(3); - ath11k_debugfs_fw_stats_reset(ar); reinit_completion(&ar->fw_stats_complete); + reinit_completion(&ar->fw_stats_done); ret = ath11k_wmi_send_stats_request_cmd(ar, req_param); @@ -193,21 +186,18 @@ static int ath11k_debugfs_fw_stats_request(struct ath11k *ar, } time_left = wait_for_completion_timeout(&ar->fw_stats_complete, 1 * HZ); - if (!time_left) return -ETIMEDOUT; - for (;;) { - if (time_after(jiffies, timeout)) - break; + /* FW stats can get split when exceeding the stats data buffer limit. + * In that case, since there is no end marking for the back-to-back + * received 'update stats' event, we keep a 3 seconds timeout in case, + * fw_stats_done is not marked yet + */ + time_left = wait_for_completion_timeout(&ar->fw_stats_done, 3 * HZ); + if (!time_left) + return -ETIMEDOUT; - spin_lock_bh(&ar->data_lock); - if (ar->fw_stats_done) { - spin_unlock_bh(&ar->data_lock); - break; - } - spin_unlock_bh(&ar->data_lock); - } return 0; } diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 97816916abac96..8a018c0190c8a7 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -9384,11 +9384,11 @@ static int ath11k_fw_stats_request(struct ath11k *ar, lockdep_assert_held(&ar->conf_mutex); spin_lock_bh(&ar->data_lock); - ar->fw_stats_done = false; ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); spin_unlock_bh(&ar->data_lock); reinit_completion(&ar->fw_stats_complete); + reinit_completion(&ar->fw_stats_done); ret = ath11k_wmi_send_stats_request_cmd(ar, req_param); if (ret) { diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index d7f852bebf4aa2..27cb0bb06b93c8 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -8189,7 +8189,7 @@ static void ath11k_update_stats_event(struct ath11k_base *ab, struct sk_buff *sk */ if (stats.stats_id == WMI_REQUEST_PDEV_STAT) { list_splice_tail_init(&stats.pdevs, &ar->fw_stats.pdevs); - ar->fw_stats_done = true; + complete(&ar->fw_stats_done); goto complete; } From faea966f5a6801524fa341b33903c7014d48546e Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 20 Feb 2025 16:24:43 +0800 Subject: [PATCH 2729/2924] wifi: ath11k: don't use static variables in ath11k_debugfs_fw_stats_process() [ Upstream commit 2bcf73b2612dda7432f2c2eaad6679bd291791f2 ] Currently ath11k_debugfs_fw_stats_process() is using static variables to count firmware stat events. Taking num_vdev as an example, if for whatever reason ( say ar->num_started_vdevs is 0 or firmware bug etc.) the following condition (++num_vdev) == total_vdevs_started is not met, is_end is not set thus num_vdev won't be cleared. Next time when firmware stats is requested again, even if everything is working fine, we will fail due to the condition above will never be satisfied. The same applies to num_bcn as well. Change to use non-static counters so that we have a chance to clear them each time firmware stats is requested. Currently only ath11k_fw_stats_request() and ath11k_debugfs_fw_stats_request() are requesting firmware stats, so clear counters there. Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.37 Fixes: da3a9d3c1576 ("ath11k: refactor debugfs code into debugfs.c") Signed-off-by: Baochen Qiang Acked-by: Kalle Valo Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250220082448.31039-3-quic_bqiang@quicinc.com Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath11k/core.h | 2 ++ drivers/net/wireless/ath/ath11k/debugfs.c | 16 +++++++--------- drivers/net/wireless/ath/ath11k/mac.c | 2 ++ 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h index 6abbb29034f07a..529aca4f40621e 100644 --- a/drivers/net/wireless/ath/ath11k/core.h +++ b/drivers/net/wireless/ath/ath11k/core.h @@ -599,6 +599,8 @@ struct ath11k_fw_stats { struct list_head pdevs; struct list_head vdevs; struct list_head bcn; + u32 num_vdev_recvd; + u32 num_bcn_recvd; }; struct ath11k_dbg_htt_stats { diff --git a/drivers/net/wireless/ath/ath11k/debugfs.c b/drivers/net/wireless/ath/ath11k/debugfs.c index 1d03e3aab011d5..27c93c0b4c2236 100644 --- a/drivers/net/wireless/ath/ath11k/debugfs.c +++ b/drivers/net/wireless/ath/ath11k/debugfs.c @@ -98,6 +98,8 @@ static void ath11k_debugfs_fw_stats_reset(struct ath11k *ar) spin_lock_bh(&ar->data_lock); ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); ath11k_fw_stats_vdevs_free(&ar->fw_stats.vdevs); + ar->fw_stats.num_vdev_recvd = 0; + ar->fw_stats.num_bcn_recvd = 0; spin_unlock_bh(&ar->data_lock); } @@ -106,7 +108,6 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * struct ath11k_base *ab = ar->ab; struct ath11k_pdev *pdev; bool is_end; - static unsigned int num_vdev, num_bcn; size_t total_vdevs_started = 0; int i; @@ -131,15 +132,14 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * total_vdevs_started += ar->num_started_vdevs; } - is_end = ((++num_vdev) == total_vdevs_started); + is_end = ((++ar->fw_stats.num_vdev_recvd) == total_vdevs_started); list_splice_tail_init(&stats->vdevs, &ar->fw_stats.vdevs); - if (is_end) { + if (is_end) complete(&ar->fw_stats_done); - num_vdev = 0; - } + return; } @@ -151,15 +151,13 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * /* Mark end until we reached the count of all started VDEVs * within the PDEV */ - is_end = ((++num_bcn) == ar->num_started_vdevs); + is_end = ((++ar->fw_stats.num_bcn_recvd) == ar->num_started_vdevs); list_splice_tail_init(&stats->bcn, &ar->fw_stats.bcn); - if (is_end) { + if (is_end) complete(&ar->fw_stats_done); - num_bcn = 0; - } } } diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 8a018c0190c8a7..d97273b205d9f8 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -9385,6 +9385,8 @@ static int ath11k_fw_stats_request(struct ath11k *ar, spin_lock_bh(&ar->data_lock); ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); + ar->fw_stats.num_vdev_recvd = 0; + ar->fw_stats.num_bcn_recvd = 0; spin_unlock_bh(&ar->data_lock); reinit_completion(&ar->fw_stats_complete); From 75aa65cd9e2f49d8ca9f63863bc82c81335bafd5 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 20 Feb 2025 16:24:44 +0800 Subject: [PATCH 2730/2924] wifi: ath11k: don't wait when there is no vdev started [ Upstream commit 3b6d00fa883075dcaf49221538230e038a9c0b43 ] For WMI_REQUEST_VDEV_STAT request, firmware might split response into multiple events dut to buffer limit, hence currently in ath11k_debugfs_fw_stats_process() we wait until all events received. In case there is no vdev started, this results in that below condition would never get satisfied ((++ar->fw_stats.num_vdev_recvd) == total_vdevs_started) finally the requestor would be blocked until wait time out. The same applies to WMI_REQUEST_BCN_STAT request as well due to: ((++ar->fw_stats.num_bcn_recvd) == ar->num_started_vdevs) Change to check the number of started vdev first: if it is zero, finish wait directly; if not, follow the old way. Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.37 Fixes: d5c65159f289 ("ath11k: driver for Qualcomm IEEE 802.11ax devices") Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250220082448.31039-4-quic_bqiang@quicinc.com Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath11k/debugfs.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/debugfs.c b/drivers/net/wireless/ath/ath11k/debugfs.c index 27c93c0b4c2236..ccf0e62c7d7aed 100644 --- a/drivers/net/wireless/ath/ath11k/debugfs.c +++ b/drivers/net/wireless/ath/ath11k/debugfs.c @@ -107,7 +107,7 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * { struct ath11k_base *ab = ar->ab; struct ath11k_pdev *pdev; - bool is_end; + bool is_end = true; size_t total_vdevs_started = 0; int i; @@ -132,7 +132,9 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * total_vdevs_started += ar->num_started_vdevs; } - is_end = ((++ar->fw_stats.num_vdev_recvd) == total_vdevs_started); + if (total_vdevs_started) + is_end = ((++ar->fw_stats.num_vdev_recvd) == + total_vdevs_started); list_splice_tail_init(&stats->vdevs, &ar->fw_stats.vdevs); @@ -151,7 +153,9 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * /* Mark end until we reached the count of all started VDEVs * within the PDEV */ - is_end = ((++ar->fw_stats.num_bcn_recvd) == ar->num_started_vdevs); + if (ar->num_started_vdevs) + is_end = ((++ar->fw_stats.num_bcn_recvd) == + ar->num_started_vdevs); list_splice_tail_init(&stats->bcn, &ar->fw_stats.bcn); From cfb834c6f3afc78e95e80d024785fe25ef12831b Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 20 Feb 2025 16:24:45 +0800 Subject: [PATCH 2731/2924] wifi: ath11k: move some firmware stats related functions outside of debugfs [ Upstream commit 72610ed7d79da17ee09102534d6c696a4ea8a08e ] Commit b488c766442f ("ath11k: report rssi of each chain to mac80211 for QCA6390/WCN6855") and commit c3b39553fc77 ("ath11k: add signal report to mac80211 for QCA6390 and WCN6855") call debugfs functions in mac ops. Those functions are no-ops if CONFIG_ATH11K_DEBUGFS is not enabled, thus cause wrong status reported. Move them to mac.c. Besides, since WMI_REQUEST_RSSI_PER_CHAIN_STAT and WMI_REQUEST_VDEV_STAT stats could also be requested via mac ops, process them directly in ath11k_update_stats_event(). Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.37 Fixes: b488c766442f ("ath11k: report rssi of each chain to mac80211 for QCA6390/WCN6855") Fixes: c3b39553fc77 ("ath11k: add signal report to mac80211 for QCA6390 and WCN6855") Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250220082448.31039-5-quic_bqiang@quicinc.com Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath11k/debugfs.c | 126 ++-------------------- drivers/net/wireless/ath/ath11k/debugfs.h | 10 +- drivers/net/wireless/ath/ath11k/mac.c | 88 ++++++++++++++- drivers/net/wireless/ath/ath11k/mac.h | 4 +- drivers/net/wireless/ath/ath11k/wmi.c | 45 +++++++- 5 files changed, 135 insertions(+), 138 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/debugfs.c b/drivers/net/wireless/ath/ath11k/debugfs.c index ccf0e62c7d7aed..5d46f8e4c231fb 100644 --- a/drivers/net/wireless/ath/ath11k/debugfs.c +++ b/drivers/net/wireless/ath/ath11k/debugfs.c @@ -93,58 +93,14 @@ void ath11k_debugfs_add_dbring_entry(struct ath11k *ar, spin_unlock_bh(&dbr_data->lock); } -static void ath11k_debugfs_fw_stats_reset(struct ath11k *ar) -{ - spin_lock_bh(&ar->data_lock); - ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); - ath11k_fw_stats_vdevs_free(&ar->fw_stats.vdevs); - ar->fw_stats.num_vdev_recvd = 0; - ar->fw_stats.num_bcn_recvd = 0; - spin_unlock_bh(&ar->data_lock); -} - void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats *stats) { struct ath11k_base *ab = ar->ab; - struct ath11k_pdev *pdev; bool is_end = true; - size_t total_vdevs_started = 0; - int i; - - /* WMI_REQUEST_PDEV_STAT request has been already processed */ - - if (stats->stats_id == WMI_REQUEST_RSSI_PER_CHAIN_STAT) { - complete(&ar->fw_stats_done); - return; - } - - if (stats->stats_id == WMI_REQUEST_VDEV_STAT) { - if (list_empty(&stats->vdevs)) { - ath11k_warn(ab, "empty vdev stats"); - return; - } - /* FW sends all the active VDEV stats irrespective of PDEV, - * hence limit until the count of all VDEVs started - */ - for (i = 0; i < ab->num_radios; i++) { - pdev = rcu_dereference(ab->pdevs_active[i]); - if (pdev && pdev->ar) - total_vdevs_started += ar->num_started_vdevs; - } - - if (total_vdevs_started) - is_end = ((++ar->fw_stats.num_vdev_recvd) == - total_vdevs_started); - - list_splice_tail_init(&stats->vdevs, - &ar->fw_stats.vdevs); - - if (is_end) - complete(&ar->fw_stats_done); - - return; - } + /* WMI_REQUEST_PDEV_STAT, WMI_REQUEST_RSSI_PER_CHAIN_STAT and + * WMI_REQUEST_VDEV_STAT requests have been already processed. + */ if (stats->stats_id == WMI_REQUEST_BCN_STAT) { if (list_empty(&stats->bcn)) { ath11k_warn(ab, "empty bcn stats"); @@ -165,76 +121,6 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * } } -static int ath11k_debugfs_fw_stats_request(struct ath11k *ar, - struct stats_request_params *req_param) -{ - struct ath11k_base *ab = ar->ab; - unsigned long time_left; - int ret; - - lockdep_assert_held(&ar->conf_mutex); - - ath11k_debugfs_fw_stats_reset(ar); - - reinit_completion(&ar->fw_stats_complete); - reinit_completion(&ar->fw_stats_done); - - ret = ath11k_wmi_send_stats_request_cmd(ar, req_param); - - if (ret) { - ath11k_warn(ab, "could not request fw stats (%d)\n", - ret); - return ret; - } - - time_left = wait_for_completion_timeout(&ar->fw_stats_complete, 1 * HZ); - if (!time_left) - return -ETIMEDOUT; - - /* FW stats can get split when exceeding the stats data buffer limit. - * In that case, since there is no end marking for the back-to-back - * received 'update stats' event, we keep a 3 seconds timeout in case, - * fw_stats_done is not marked yet - */ - time_left = wait_for_completion_timeout(&ar->fw_stats_done, 3 * HZ); - if (!time_left) - return -ETIMEDOUT; - - return 0; -} - -int ath11k_debugfs_get_fw_stats(struct ath11k *ar, u32 pdev_id, - u32 vdev_id, u32 stats_id) -{ - struct ath11k_base *ab = ar->ab; - struct stats_request_params req_param; - int ret; - - mutex_lock(&ar->conf_mutex); - - if (ar->state != ATH11K_STATE_ON) { - ret = -ENETDOWN; - goto err_unlock; - } - - req_param.pdev_id = pdev_id; - req_param.vdev_id = vdev_id; - req_param.stats_id = stats_id; - - ret = ath11k_debugfs_fw_stats_request(ar, &req_param); - if (ret) - ath11k_warn(ab, "failed to request fw stats: %d\n", ret); - - ath11k_dbg(ab, ATH11K_DBG_WMI, - "debug get fw stat pdev id %d vdev id %d stats id 0x%x\n", - pdev_id, vdev_id, stats_id); - -err_unlock: - mutex_unlock(&ar->conf_mutex); - - return ret; -} - static int ath11k_open_pdev_stats(struct inode *inode, struct file *file) { struct ath11k *ar = inode->i_private; @@ -260,7 +146,7 @@ static int ath11k_open_pdev_stats(struct inode *inode, struct file *file) req_param.vdev_id = 0; req_param.stats_id = WMI_REQUEST_PDEV_STAT; - ret = ath11k_debugfs_fw_stats_request(ar, &req_param); + ret = ath11k_mac_fw_stats_request(ar, &req_param); if (ret) { ath11k_warn(ab, "failed to request fw pdev stats: %d\n", ret); goto err_free; @@ -331,7 +217,7 @@ static int ath11k_open_vdev_stats(struct inode *inode, struct file *file) req_param.vdev_id = 0; req_param.stats_id = WMI_REQUEST_VDEV_STAT; - ret = ath11k_debugfs_fw_stats_request(ar, &req_param); + ret = ath11k_mac_fw_stats_request(ar, &req_param); if (ret) { ath11k_warn(ar->ab, "failed to request fw vdev stats: %d\n", ret); goto err_free; @@ -407,7 +293,7 @@ static int ath11k_open_bcn_stats(struct inode *inode, struct file *file) continue; req_param.vdev_id = arvif->vdev_id; - ret = ath11k_debugfs_fw_stats_request(ar, &req_param); + ret = ath11k_mac_fw_stats_request(ar, &req_param); if (ret) { ath11k_warn(ar->ab, "failed to request fw bcn stats: %d\n", ret); goto err_free; diff --git a/drivers/net/wireless/ath/ath11k/debugfs.h b/drivers/net/wireless/ath/ath11k/debugfs.h index a39e458637b013..ed7fec177588f6 100644 --- a/drivers/net/wireless/ath/ath11k/debugfs.h +++ b/drivers/net/wireless/ath/ath11k/debugfs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2022, 2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _ATH11K_DEBUGFS_H_ @@ -273,8 +273,6 @@ void ath11k_debugfs_unregister(struct ath11k *ar); void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats *stats); void ath11k_debugfs_fw_stats_init(struct ath11k *ar); -int ath11k_debugfs_get_fw_stats(struct ath11k *ar, u32 pdev_id, - u32 vdev_id, u32 stats_id); static inline bool ath11k_debugfs_is_pktlog_lite_mode_enabled(struct ath11k *ar) { @@ -381,12 +379,6 @@ static inline int ath11k_debugfs_rx_filter(struct ath11k *ar) return 0; } -static inline int ath11k_debugfs_get_fw_stats(struct ath11k *ar, - u32 pdev_id, u32 vdev_id, u32 stats_id) -{ - return 0; -} - static inline void ath11k_debugfs_add_dbring_entry(struct ath11k *ar, enum wmi_direct_buffer_module id, diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index d97273b205d9f8..4763b271309aa2 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -8991,6 +8991,86 @@ static void ath11k_mac_put_chain_rssi(struct station_info *sinfo, } } +static void ath11k_mac_fw_stats_reset(struct ath11k *ar) +{ + spin_lock_bh(&ar->data_lock); + ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); + ath11k_fw_stats_vdevs_free(&ar->fw_stats.vdevs); + ar->fw_stats.num_vdev_recvd = 0; + ar->fw_stats.num_bcn_recvd = 0; + spin_unlock_bh(&ar->data_lock); +} + +int ath11k_mac_fw_stats_request(struct ath11k *ar, + struct stats_request_params *req_param) +{ + struct ath11k_base *ab = ar->ab; + unsigned long time_left; + int ret; + + lockdep_assert_held(&ar->conf_mutex); + + ath11k_mac_fw_stats_reset(ar); + + reinit_completion(&ar->fw_stats_complete); + reinit_completion(&ar->fw_stats_done); + + ret = ath11k_wmi_send_stats_request_cmd(ar, req_param); + + if (ret) { + ath11k_warn(ab, "could not request fw stats (%d)\n", + ret); + return ret; + } + + time_left = wait_for_completion_timeout(&ar->fw_stats_complete, 1 * HZ); + if (!time_left) + return -ETIMEDOUT; + + /* FW stats can get split when exceeding the stats data buffer limit. + * In that case, since there is no end marking for the back-to-back + * received 'update stats' event, we keep a 3 seconds timeout in case, + * fw_stats_done is not marked yet + */ + time_left = wait_for_completion_timeout(&ar->fw_stats_done, 3 * HZ); + if (!time_left) + return -ETIMEDOUT; + + return 0; +} + +static int ath11k_mac_get_fw_stats(struct ath11k *ar, u32 pdev_id, + u32 vdev_id, u32 stats_id) +{ + struct ath11k_base *ab = ar->ab; + struct stats_request_params req_param; + int ret; + + mutex_lock(&ar->conf_mutex); + + if (ar->state != ATH11K_STATE_ON) { + ret = -ENETDOWN; + goto err_unlock; + } + + req_param.pdev_id = pdev_id; + req_param.vdev_id = vdev_id; + req_param.stats_id = stats_id; + + ret = ath11k_mac_fw_stats_request(ar, &req_param); + if (ret) + ath11k_warn(ab, "failed to request fw stats: %d\n", ret); + + ath11k_dbg(ab, ATH11K_DBG_WMI, + "debug get fw stat pdev id %d vdev id %d stats id 0x%x\n", + pdev_id, vdev_id, stats_id); + +err_unlock: + mutex_unlock(&ar->conf_mutex); + + return ret; +} + static void ath11k_mac_op_sta_statistics(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, @@ -9028,8 +9108,8 @@ static void ath11k_mac_op_sta_statistics(struct ieee80211_hw *hw, if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL)) && arsta->arvif->vdev_type == WMI_VDEV_TYPE_STA && ar->ab->hw_params.supports_rssi_stats && - !ath11k_debugfs_get_fw_stats(ar, ar->pdev->pdev_id, 0, - WMI_REQUEST_RSSI_PER_CHAIN_STAT)) { + !ath11k_mac_get_fw_stats(ar, ar->pdev->pdev_id, 0, + WMI_REQUEST_RSSI_PER_CHAIN_STAT)) { ath11k_mac_put_chain_rssi(sinfo, arsta, "fw stats", true); } @@ -9037,8 +9117,8 @@ static void ath11k_mac_op_sta_statistics(struct ieee80211_hw *hw, if (!signal && arsta->arvif->vdev_type == WMI_VDEV_TYPE_STA && ar->ab->hw_params.supports_rssi_stats && - !(ath11k_debugfs_get_fw_stats(ar, ar->pdev->pdev_id, 0, - WMI_REQUEST_VDEV_STAT))) + !(ath11k_mac_get_fw_stats(ar, ar->pdev->pdev_id, 0, + WMI_REQUEST_VDEV_STAT))) signal = arsta->rssi_beacon; ath11k_dbg(ar->ab, ATH11K_DBG_MAC, diff --git a/drivers/net/wireless/ath/ath11k/mac.h b/drivers/net/wireless/ath/ath11k/mac.h index f5800fbecff89e..5e61eea1bb0378 100644 --- a/drivers/net/wireless/ath/ath11k/mac.h +++ b/drivers/net/wireless/ath/ath11k/mac.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2023, 2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef ATH11K_MAC_H @@ -179,4 +179,6 @@ int ath11k_mac_vif_set_keepalive(struct ath11k_vif *arvif, void ath11k_mac_fill_reg_tpc_info(struct ath11k *ar, struct ieee80211_vif *vif, struct ieee80211_chanctx_conf *ctx); +int ath11k_mac_fw_stats_request(struct ath11k *ar, + struct stats_request_params *req_param); #endif diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index 27cb0bb06b93c8..98811726d33bf1 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -8158,6 +8158,11 @@ static void ath11k_peer_assoc_conf_event(struct ath11k_base *ab, struct sk_buff static void ath11k_update_stats_event(struct ath11k_base *ab, struct sk_buff *skb) { struct ath11k_fw_stats stats = {}; + size_t total_vdevs_started = 0; + struct ath11k_pdev *pdev; + bool is_end = true; + int i; + struct ath11k *ar; int ret; @@ -8184,7 +8189,8 @@ static void ath11k_update_stats_event(struct ath11k_base *ab, struct sk_buff *sk spin_lock_bh(&ar->data_lock); - /* WMI_REQUEST_PDEV_STAT can be requested via .get_txpower mac ops or via + /* WMI_REQUEST_PDEV_STAT, WMI_REQUEST_VDEV_STAT and + * WMI_REQUEST_RSSI_PER_CHAIN_STAT can be requested via mac ops or via * debugfs fw stats. Therefore, processing it separately. */ if (stats.stats_id == WMI_REQUEST_PDEV_STAT) { @@ -8193,9 +8199,40 @@ static void ath11k_update_stats_event(struct ath11k_base *ab, struct sk_buff *sk goto complete; } - /* WMI_REQUEST_VDEV_STAT, WMI_REQUEST_BCN_STAT and WMI_REQUEST_RSSI_PER_CHAIN_STAT - * are currently requested only via debugfs fw stats. Hence, processing these - * in debugfs context + if (stats.stats_id == WMI_REQUEST_RSSI_PER_CHAIN_STAT) { + complete(&ar->fw_stats_done); + goto complete; + } + + if (stats.stats_id == WMI_REQUEST_VDEV_STAT) { + if (list_empty(&stats.vdevs)) { + ath11k_warn(ab, "empty vdev stats"); + goto complete; + } + /* FW sends all the active VDEV stats irrespective of PDEV, + * hence limit until the count of all VDEVs started + */ + for (i = 0; i < ab->num_radios; i++) { + pdev = rcu_dereference(ab->pdevs_active[i]); + if (pdev && pdev->ar) + total_vdevs_started += ar->num_started_vdevs; + } + + if (total_vdevs_started) + is_end = ((++ar->fw_stats.num_vdev_recvd) == + total_vdevs_started); + + list_splice_tail_init(&stats.vdevs, + &ar->fw_stats.vdevs); + + if (is_end) + complete(&ar->fw_stats_done); + + goto complete; + } + + /* WMI_REQUEST_BCN_STAT is currently requested only via debugfs fw stats. + * Hence, processing it in debugfs context */ ath11k_debugfs_fw_stats_process(ar, &stats); From 27471a512caf8948960f3d2ddefd3d774bb7c92d Mon Sep 17 00:00:00 2001 From: Rodrigo Gobbi Date: Thu, 22 May 2025 17:01:12 -0300 Subject: [PATCH 2732/2924] wifi: ath11k: validate ath11k_crypto_mode on top of ath11k_core_qmi_firmware_ready [ Upstream commit b0d226a60856a1b765bb9a3848c7b2322fd08c47 ] if ath11k_crypto_mode is invalid (not ATH11K_CRYPT_MODE_SW/ATH11K_CRYPT_MODE_HW), ath11k_core_qmi_firmware_ready() will not undo some actions that was previously started/configured. Do the validation as soon as possible in order to avoid undoing actions in that case and also to fix the following smatch warning: drivers/net/wireless/ath/ath11k/core.c:2166 ath11k_core_qmi_firmware_ready() warn: missing unwind goto? Signed-off-by: Rodrigo Gobbi Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202304151955.oqAetVFd-lkp@intel.com/ Fixes: aa2092a9bab3 ("ath11k: add raw mode and software crypto support") Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20250522200519.16858-1-rodrigo.gobbi.7@gmail.com Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath11k/core.c | 28 +++++++++++++------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c index 6c346a8df2df19..22eb1b0377ffed 100644 --- a/drivers/net/wireless/ath/ath11k/core.c +++ b/drivers/net/wireless/ath/ath11k/core.c @@ -1947,6 +1947,20 @@ int ath11k_core_qmi_firmware_ready(struct ath11k_base *ab) { int ret; + switch (ath11k_crypto_mode) { + case ATH11K_CRYPT_MODE_SW: + set_bit(ATH11K_FLAG_HW_CRYPTO_DISABLED, &ab->dev_flags); + set_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags); + break; + case ATH11K_CRYPT_MODE_HW: + clear_bit(ATH11K_FLAG_HW_CRYPTO_DISABLED, &ab->dev_flags); + clear_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags); + break; + default: + ath11k_info(ab, "invalid crypto_mode: %d\n", ath11k_crypto_mode); + return -EINVAL; + } + ret = ath11k_core_start_firmware(ab, ab->fw_mode); if (ret) { ath11k_err(ab, "failed to start firmware: %d\n", ret); @@ -1965,20 +1979,6 @@ int ath11k_core_qmi_firmware_ready(struct ath11k_base *ab) goto err_firmware_stop; } - switch (ath11k_crypto_mode) { - case ATH11K_CRYPT_MODE_SW: - set_bit(ATH11K_FLAG_HW_CRYPTO_DISABLED, &ab->dev_flags); - set_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags); - break; - case ATH11K_CRYPT_MODE_HW: - clear_bit(ATH11K_FLAG_HW_CRYPTO_DISABLED, &ab->dev_flags); - clear_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags); - break; - default: - ath11k_info(ab, "invalid crypto_mode: %d\n", ath11k_crypto_mode); - return -EINVAL; - } - if (ath11k_frame_mode == ATH11K_HW_TXRX_RAW) set_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags); From fb17af997327277e9ed92435f4fe72693f401149 Mon Sep 17 00:00:00 2001 From: P Praneesh Date: Fri, 21 Mar 2025 16:22:40 +0530 Subject: [PATCH 2733/2924] wifi: ath12k: refactor ath12k_hw_regs structure [ Upstream commit 5257324583e32fd5bd6bbb6c82b4f5880b842f99 ] IPQ5332 device have different register address values for the below registers: HAL_TCL1_RING_BASE_LSB HAL_TCL1_RING_BASE_MSB HAL_TCL2_RING_BASE_LSB HAL_SEQ_WCSS_UMAC_CE0_SRC_REG HAL_SEQ_WCSS_UMAC_CE0_DST_REG HAL_SEQ_WCSS_UMAC_CE1_SRC_REG HAL_SEQ_WCSS_UMAC_CE1_DST_REG Hence, refactor ath12k_hw_regs structure to accommodate these changes in IPQ5332. Tested-on: IPQ5332 hw1.0 AHB WLAN.WBE.1.3.1-00130-QCAHKSWPL_SILICONZ-1 Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.1.1-00210-QCAHKSWPL_SILICONZ-1 Signed-off-by: P Praneesh Co-developed-by: Balamurugan S Signed-off-by: Balamurugan S Reviewed-by: Vasanthakumar Thiagarajan Signed-off-by: Raj Kumar Bhagat Link: https://patch.msgid.link/20250321-ath12k-ahb-v12-3-bb389ed76ae5@quicinc.com Signed-off-by: Jeff Johnson Stable-dep-of: 7588a893cde5 ("wifi: ath12k: fix GCC_GCC_PCIE_HOT_RST definition for WCN7850") Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/hal.c | 84 +++++++++++++-------------- drivers/net/wireless/ath/ath12k/hal.h | 63 +++++++++++--------- drivers/net/wireless/ath/ath12k/hw.c | 29 ++++++++- drivers/net/wireless/ath/ath12k/hw.h | 10 +++- 4 files changed, 114 insertions(+), 72 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/hal.c b/drivers/net/wireless/ath/ath12k/hal.c index 178c242a840e3e..d00869a33fea06 100644 --- a/drivers/net/wireless/ath/ath12k/hal.c +++ b/drivers/net/wireless/ath/ath12k/hal.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include #include "hal_tx.h" @@ -547,9 +547,9 @@ static int ath12k_hal_srng_create_config_qcn9274(struct ath12k_base *ab) s->reg_start[1] = HAL_SEQ_WCSS_UMAC_REO_REG + HAL_REO_STATUS_HP; s = &hal->srng_config[HAL_TCL_DATA]; - s->reg_start[0] = HAL_SEQ_WCSS_UMAC_TCL_REG + HAL_TCL1_RING_BASE_LSB; + s->reg_start[0] = HAL_SEQ_WCSS_UMAC_TCL_REG + HAL_TCL1_RING_BASE_LSB(ab); s->reg_start[1] = HAL_SEQ_WCSS_UMAC_TCL_REG + HAL_TCL1_RING_HP; - s->reg_size[0] = HAL_TCL2_RING_BASE_LSB - HAL_TCL1_RING_BASE_LSB; + s->reg_size[0] = HAL_TCL2_RING_BASE_LSB(ab) - HAL_TCL1_RING_BASE_LSB(ab); s->reg_size[1] = HAL_TCL2_RING_HP - HAL_TCL1_RING_HP; s = &hal->srng_config[HAL_TCL_CMD]; @@ -561,29 +561,29 @@ static int ath12k_hal_srng_create_config_qcn9274(struct ath12k_base *ab) s->reg_start[1] = HAL_SEQ_WCSS_UMAC_TCL_REG + HAL_TCL_STATUS_RING_HP; s = &hal->srng_config[HAL_CE_SRC]; - s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_SRC_REG + HAL_CE_DST_RING_BASE_LSB; - s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_SRC_REG + HAL_CE_DST_RING_HP; - s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_SRC_REG - - HAL_SEQ_WCSS_UMAC_CE0_SRC_REG; - s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_SRC_REG - - HAL_SEQ_WCSS_UMAC_CE0_SRC_REG; + s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab) + HAL_CE_DST_RING_BASE_LSB; + s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab) + HAL_CE_DST_RING_HP; + s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_SRC_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab); + s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_SRC_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab); s = &hal->srng_config[HAL_CE_DST]; - s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG + HAL_CE_DST_RING_BASE_LSB; - s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG + HAL_CE_DST_RING_HP; - s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG - - HAL_SEQ_WCSS_UMAC_CE0_DST_REG; - s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG - - HAL_SEQ_WCSS_UMAC_CE0_DST_REG; + s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) + HAL_CE_DST_RING_BASE_LSB; + s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) + HAL_CE_DST_RING_HP; + s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab); + s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab); s = &hal->srng_config[HAL_CE_DST_STATUS]; - s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG + + s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) + HAL_CE_DST_STATUS_RING_BASE_LSB; - s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG + HAL_CE_DST_STATUS_RING_HP; - s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG - - HAL_SEQ_WCSS_UMAC_CE0_DST_REG; - s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG - - HAL_SEQ_WCSS_UMAC_CE0_DST_REG; + s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) + HAL_CE_DST_STATUS_RING_HP; + s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab); + s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab); s = &hal->srng_config[HAL_WBM_IDLE_LINK]; s->reg_start[0] = HAL_SEQ_WCSS_UMAC_WBM_REG + HAL_WBM_IDLE_LINK_RING_BASE_LSB(ab); @@ -1353,9 +1353,9 @@ static int ath12k_hal_srng_create_config_wcn7850(struct ath12k_base *ab) s = &hal->srng_config[HAL_TCL_DATA]; s->max_rings = 5; - s->reg_start[0] = HAL_SEQ_WCSS_UMAC_TCL_REG + HAL_TCL1_RING_BASE_LSB; + s->reg_start[0] = HAL_SEQ_WCSS_UMAC_TCL_REG + HAL_TCL1_RING_BASE_LSB(ab); s->reg_start[1] = HAL_SEQ_WCSS_UMAC_TCL_REG + HAL_TCL1_RING_HP; - s->reg_size[0] = HAL_TCL2_RING_BASE_LSB - HAL_TCL1_RING_BASE_LSB; + s->reg_size[0] = HAL_TCL2_RING_BASE_LSB(ab) - HAL_TCL1_RING_BASE_LSB(ab); s->reg_size[1] = HAL_TCL2_RING_HP - HAL_TCL1_RING_HP; s = &hal->srng_config[HAL_TCL_CMD]; @@ -1368,31 +1368,31 @@ static int ath12k_hal_srng_create_config_wcn7850(struct ath12k_base *ab) s = &hal->srng_config[HAL_CE_SRC]; s->max_rings = 12; - s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_SRC_REG + HAL_CE_DST_RING_BASE_LSB; - s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_SRC_REG + HAL_CE_DST_RING_HP; - s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_SRC_REG - - HAL_SEQ_WCSS_UMAC_CE0_SRC_REG; - s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_SRC_REG - - HAL_SEQ_WCSS_UMAC_CE0_SRC_REG; + s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab) + HAL_CE_DST_RING_BASE_LSB; + s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab) + HAL_CE_DST_RING_HP; + s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_SRC_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab); + s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_SRC_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab); s = &hal->srng_config[HAL_CE_DST]; s->max_rings = 12; - s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG + HAL_CE_DST_RING_BASE_LSB; - s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG + HAL_CE_DST_RING_HP; - s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG - - HAL_SEQ_WCSS_UMAC_CE0_DST_REG; - s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG - - HAL_SEQ_WCSS_UMAC_CE0_DST_REG; + s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) + HAL_CE_DST_RING_BASE_LSB; + s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) + HAL_CE_DST_RING_HP; + s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab); + s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab); s = &hal->srng_config[HAL_CE_DST_STATUS]; s->max_rings = 12; - s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG + + s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) + HAL_CE_DST_STATUS_RING_BASE_LSB; - s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG + HAL_CE_DST_STATUS_RING_HP; - s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG - - HAL_SEQ_WCSS_UMAC_CE0_DST_REG; - s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG - - HAL_SEQ_WCSS_UMAC_CE0_DST_REG; + s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) + HAL_CE_DST_STATUS_RING_HP; + s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab); + s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab); s = &hal->srng_config[HAL_WBM_IDLE_LINK]; s->reg_start[0] = HAL_SEQ_WCSS_UMAC_WBM_REG + HAL_WBM_IDLE_LINK_RING_BASE_LSB(ab); @@ -1737,7 +1737,7 @@ static void ath12k_hal_srng_src_hw_init(struct ath12k_base *ab, HAL_TCL1_RING_BASE_MSB_RING_BASE_ADDR_MSB) | u32_encode_bits((srng->entry_size * srng->num_entries), HAL_TCL1_RING_BASE_MSB_RING_SIZE); - ath12k_hif_write32(ab, reg_base + HAL_TCL1_RING_BASE_MSB_OFFSET, val); + ath12k_hif_write32(ab, reg_base + HAL_TCL1_RING_BASE_MSB_OFFSET(ab), val); val = u32_encode_bits(srng->entry_size, HAL_REO1_RING_ID_ENTRY_SIZE); ath12k_hif_write32(ab, reg_base + HAL_TCL1_RING_ID_OFFSET(ab), val); diff --git a/drivers/net/wireless/ath/ath12k/hal.h b/drivers/net/wireless/ath/ath12k/hal.h index 3156563c77e5b4..c8205672cd3dd5 100644 --- a/drivers/net/wireless/ath/ath12k/hal.h +++ b/drivers/net/wireless/ath/ath12k/hal.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef ATH12K_HAL_H @@ -44,10 +44,14 @@ struct ath12k_base; #define HAL_SEQ_WCSS_UMAC_OFFSET 0x00a00000 #define HAL_SEQ_WCSS_UMAC_REO_REG 0x00a38000 #define HAL_SEQ_WCSS_UMAC_TCL_REG 0x00a44000 -#define HAL_SEQ_WCSS_UMAC_CE0_SRC_REG 0x01b80000 -#define HAL_SEQ_WCSS_UMAC_CE0_DST_REG 0x01b81000 -#define HAL_SEQ_WCSS_UMAC_CE1_SRC_REG 0x01b82000 -#define HAL_SEQ_WCSS_UMAC_CE1_DST_REG 0x01b83000 +#define HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab) \ + ((ab)->hw_params->regs->hal_umac_ce0_src_reg_base) +#define HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) \ + ((ab)->hw_params->regs->hal_umac_ce0_dest_reg_base) +#define HAL_SEQ_WCSS_UMAC_CE1_SRC_REG(ab) \ + ((ab)->hw_params->regs->hal_umac_ce1_src_reg_base) +#define HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) \ + ((ab)->hw_params->regs->hal_umac_ce1_dest_reg_base) #define HAL_SEQ_WCSS_UMAC_WBM_REG 0x00a34000 #define HAL_CE_WFSS_CE_REG_BASE 0x01b80000 @@ -57,8 +61,10 @@ struct ath12k_base; /* SW2TCL(x) R0 ring configuration address */ #define HAL_TCL1_RING_CMN_CTRL_REG 0x00000020 #define HAL_TCL1_RING_DSCP_TID_MAP 0x00000240 -#define HAL_TCL1_RING_BASE_LSB 0x00000900 -#define HAL_TCL1_RING_BASE_MSB 0x00000904 +#define HAL_TCL1_RING_BASE_LSB(ab) \ + ((ab)->hw_params->regs->hal_tcl1_ring_base_lsb) +#define HAL_TCL1_RING_BASE_MSB(ab) \ + ((ab)->hw_params->regs->hal_tcl1_ring_base_msb) #define HAL_TCL1_RING_ID(ab) ((ab)->hw_params->regs->hal_tcl1_ring_id) #define HAL_TCL1_RING_MISC(ab) \ ((ab)->hw_params->regs->hal_tcl1_ring_misc) @@ -76,30 +82,31 @@ struct ath12k_base; ((ab)->hw_params->regs->hal_tcl1_ring_msi1_base_msb) #define HAL_TCL1_RING_MSI1_DATA(ab) \ ((ab)->hw_params->regs->hal_tcl1_ring_msi1_data) -#define HAL_TCL2_RING_BASE_LSB 0x00000978 +#define HAL_TCL2_RING_BASE_LSB(ab) \ + ((ab)->hw_params->regs->hal_tcl2_ring_base_lsb) #define HAL_TCL_RING_BASE_LSB(ab) \ ((ab)->hw_params->regs->hal_tcl_ring_base_lsb) -#define HAL_TCL1_RING_MSI1_BASE_LSB_OFFSET(ab) \ - (HAL_TCL1_RING_MSI1_BASE_LSB(ab) - HAL_TCL1_RING_BASE_LSB) -#define HAL_TCL1_RING_MSI1_BASE_MSB_OFFSET(ab) \ - (HAL_TCL1_RING_MSI1_BASE_MSB(ab) - HAL_TCL1_RING_BASE_LSB) -#define HAL_TCL1_RING_MSI1_DATA_OFFSET(ab) \ - (HAL_TCL1_RING_MSI1_DATA(ab) - HAL_TCL1_RING_BASE_LSB) -#define HAL_TCL1_RING_BASE_MSB_OFFSET \ - (HAL_TCL1_RING_BASE_MSB - HAL_TCL1_RING_BASE_LSB) -#define HAL_TCL1_RING_ID_OFFSET(ab) \ - (HAL_TCL1_RING_ID(ab) - HAL_TCL1_RING_BASE_LSB) -#define HAL_TCL1_RING_CONSR_INT_SETUP_IX0_OFFSET(ab) \ - (HAL_TCL1_RING_CONSUMER_INT_SETUP_IX0(ab) - HAL_TCL1_RING_BASE_LSB) -#define HAL_TCL1_RING_CONSR_INT_SETUP_IX1_OFFSET(ab) \ - (HAL_TCL1_RING_CONSUMER_INT_SETUP_IX1(ab) - HAL_TCL1_RING_BASE_LSB) -#define HAL_TCL1_RING_TP_ADDR_LSB_OFFSET(ab) \ - (HAL_TCL1_RING_TP_ADDR_LSB(ab) - HAL_TCL1_RING_BASE_LSB) -#define HAL_TCL1_RING_TP_ADDR_MSB_OFFSET(ab) \ - (HAL_TCL1_RING_TP_ADDR_MSB(ab) - HAL_TCL1_RING_BASE_LSB) -#define HAL_TCL1_RING_MISC_OFFSET(ab) \ - (HAL_TCL1_RING_MISC(ab) - HAL_TCL1_RING_BASE_LSB) +#define HAL_TCL1_RING_MSI1_BASE_LSB_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_MSI1_BASE_LSB(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) +#define HAL_TCL1_RING_MSI1_BASE_MSB_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_MSI1_BASE_MSB(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) +#define HAL_TCL1_RING_MSI1_DATA_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_MSI1_DATA(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) +#define HAL_TCL1_RING_BASE_MSB_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_BASE_MSB(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) +#define HAL_TCL1_RING_ID_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_ID(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) +#define HAL_TCL1_RING_CONSR_INT_SETUP_IX0_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_CONSUMER_INT_SETUP_IX0(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) +#define HAL_TCL1_RING_CONSR_INT_SETUP_IX1_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_CONSUMER_INT_SETUP_IX1(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) +#define HAL_TCL1_RING_TP_ADDR_LSB_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_TP_ADDR_LSB(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) +#define HAL_TCL1_RING_TP_ADDR_MSB_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_TP_ADDR_MSB(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) +#define HAL_TCL1_RING_MISC_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_MISC(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) /* SW2TCL(x) R2 ring pointers (head/tail) address */ #define HAL_TCL1_RING_HP 0x00002000 diff --git a/drivers/net/wireless/ath/ath12k/hw.c b/drivers/net/wireless/ath/ath12k/hw.c index a106ebed7870de..4c9d6c42fbb92a 100644 --- a/drivers/net/wireless/ath/ath12k/hw.c +++ b/drivers/net/wireless/ath/ath12k/hw.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -619,6 +619,9 @@ static const struct ath12k_hw_regs qcn9274_v1_regs = { .hal_tcl1_ring_msi1_base_msb = 0x0000094c, .hal_tcl1_ring_msi1_data = 0x00000950, .hal_tcl_ring_base_lsb = 0x00000b58, + .hal_tcl1_ring_base_lsb = 0x00000900, + .hal_tcl1_ring_base_msb = 0x00000904, + .hal_tcl2_ring_base_lsb = 0x00000978, /* TCL STATUS ring address */ .hal_tcl_status_ring_base_lsb = 0x00000d38, @@ -681,6 +684,12 @@ static const struct ath12k_hw_regs qcn9274_v1_regs = { /* REO status ring address */ .hal_reo_status_ring_base = 0x00000a84, + + /* CE base address */ + .hal_umac_ce0_src_reg_base = 0x01b80000, + .hal_umac_ce0_dest_reg_base = 0x01b81000, + .hal_umac_ce1_src_reg_base = 0x01b82000, + .hal_umac_ce1_dest_reg_base = 0x01b83000, }; static const struct ath12k_hw_regs qcn9274_v2_regs = { @@ -695,6 +704,9 @@ static const struct ath12k_hw_regs qcn9274_v2_regs = { .hal_tcl1_ring_msi1_base_msb = 0x0000094c, .hal_tcl1_ring_msi1_data = 0x00000950, .hal_tcl_ring_base_lsb = 0x00000b58, + .hal_tcl1_ring_base_lsb = 0x00000900, + .hal_tcl1_ring_base_msb = 0x00000904, + .hal_tcl2_ring_base_lsb = 0x00000978, /* TCL STATUS ring address */ .hal_tcl_status_ring_base_lsb = 0x00000d38, @@ -761,6 +773,12 @@ static const struct ath12k_hw_regs qcn9274_v2_regs = { /* REO status ring address */ .hal_reo_status_ring_base = 0x00000aa0, + + /* CE base address */ + .hal_umac_ce0_src_reg_base = 0x01b80000, + .hal_umac_ce0_dest_reg_base = 0x01b81000, + .hal_umac_ce1_src_reg_base = 0x01b82000, + .hal_umac_ce1_dest_reg_base = 0x01b83000, }; static const struct ath12k_hw_regs wcn7850_regs = { @@ -775,6 +793,9 @@ static const struct ath12k_hw_regs wcn7850_regs = { .hal_tcl1_ring_msi1_base_msb = 0x0000094c, .hal_tcl1_ring_msi1_data = 0x00000950, .hal_tcl_ring_base_lsb = 0x00000b58, + .hal_tcl1_ring_base_lsb = 0x00000900, + .hal_tcl1_ring_base_msb = 0x00000904, + .hal_tcl2_ring_base_lsb = 0x00000978, /* TCL STATUS ring address */ .hal_tcl_status_ring_base_lsb = 0x00000d38, @@ -837,6 +858,12 @@ static const struct ath12k_hw_regs wcn7850_regs = { /* REO status ring address */ .hal_reo_status_ring_base = 0x00000a84, + + /* CE base address */ + .hal_umac_ce0_src_reg_base = 0x01b80000, + .hal_umac_ce0_dest_reg_base = 0x01b81000, + .hal_umac_ce1_src_reg_base = 0x01b82000, + .hal_umac_ce1_dest_reg_base = 0x01b83000, }; static const struct ath12k_hw_hal_params ath12k_hw_hal_params_qcn9274 = { diff --git a/drivers/net/wireless/ath/ath12k/hw.h b/drivers/net/wireless/ath/ath12k/hw.h index 8d52182e28aef4..acb81b5798ac17 100644 --- a/drivers/net/wireless/ath/ath12k/hw.h +++ b/drivers/net/wireless/ath/ath12k/hw.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef ATH12K_HW_H @@ -293,6 +293,9 @@ struct ath12k_hw_regs { u32 hal_tcl1_ring_msi1_base_msb; u32 hal_tcl1_ring_msi1_data; u32 hal_tcl_ring_base_lsb; + u32 hal_tcl1_ring_base_lsb; + u32 hal_tcl1_ring_base_msb; + u32 hal_tcl2_ring_base_lsb; u32 hal_tcl_status_ring_base_lsb; @@ -316,6 +319,11 @@ struct ath12k_hw_regs { u32 pcie_qserdes_sysclk_en_sel; u32 pcie_pcs_osc_dtct_config_base; + u32 hal_umac_ce0_src_reg_base; + u32 hal_umac_ce0_dest_reg_base; + u32 hal_umac_ce1_src_reg_base; + u32 hal_umac_ce1_dest_reg_base; + u32 hal_ppe_rel_ring_base; u32 hal_reo2_ring_base; From d71ac5694b33c80f1de97d074f6fbdc6c01a9d61 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Fri, 23 May 2025 10:23:05 +0800 Subject: [PATCH 2734/2924] wifi: ath12k: fix GCC_GCC_PCIE_HOT_RST definition for WCN7850 [ Upstream commit 7588a893cde5385ad308400ff167d29a29913b3a ] GCC_GCC_PCIE_HOT_RST is wrongly defined for WCN7850, causing kernel crash on some specific platforms. Since this register is divergent for WCN7850 and QCN9274, move it to register table to allow different definitions. Then correct the register address for WCN7850 to fix this issue. Note IPQ5332 is not affected as it is not PCIe based device. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Reported-by: Parth Pancholi Closes: https://lore.kernel.org/all/86899b2235a59c9134603beebe08f2bb0b244ea0.camel@gmail.com Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Tested-by: Parth Pancholi Link: https://patch.msgid.link/20250523-ath12k-wrong-global-reset-addr-v1-1-3b06eb556196@quicinc.com Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/hw.c | 6 ++++++ drivers/net/wireless/ath/ath12k/hw.h | 2 ++ drivers/net/wireless/ath/ath12k/pci.c | 6 +++--- drivers/net/wireless/ath/ath12k/pci.h | 4 +++- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/hw.c b/drivers/net/wireless/ath/ath12k/hw.c index 4c9d6c42fbb92a..1bfb11bae7add3 100644 --- a/drivers/net/wireless/ath/ath12k/hw.c +++ b/drivers/net/wireless/ath/ath12k/hw.c @@ -690,6 +690,8 @@ static const struct ath12k_hw_regs qcn9274_v1_regs = { .hal_umac_ce0_dest_reg_base = 0x01b81000, .hal_umac_ce1_src_reg_base = 0x01b82000, .hal_umac_ce1_dest_reg_base = 0x01b83000, + + .gcc_gcc_pcie_hot_rst = 0x1e38338, }; static const struct ath12k_hw_regs qcn9274_v2_regs = { @@ -779,6 +781,8 @@ static const struct ath12k_hw_regs qcn9274_v2_regs = { .hal_umac_ce0_dest_reg_base = 0x01b81000, .hal_umac_ce1_src_reg_base = 0x01b82000, .hal_umac_ce1_dest_reg_base = 0x01b83000, + + .gcc_gcc_pcie_hot_rst = 0x1e38338, }; static const struct ath12k_hw_regs wcn7850_regs = { @@ -864,6 +868,8 @@ static const struct ath12k_hw_regs wcn7850_regs = { .hal_umac_ce0_dest_reg_base = 0x01b81000, .hal_umac_ce1_src_reg_base = 0x01b82000, .hal_umac_ce1_dest_reg_base = 0x01b83000, + + .gcc_gcc_pcie_hot_rst = 0x1e40304, }; static const struct ath12k_hw_hal_params ath12k_hw_hal_params_qcn9274 = { diff --git a/drivers/net/wireless/ath/ath12k/hw.h b/drivers/net/wireless/ath/ath12k/hw.h index acb81b5798ac17..862b11325a9021 100644 --- a/drivers/net/wireless/ath/ath12k/hw.h +++ b/drivers/net/wireless/ath/ath12k/hw.h @@ -355,6 +355,8 @@ struct ath12k_hw_regs { u32 hal_reo_cmd_ring_base; u32 hal_reo_status_ring_base; + + u32 gcc_gcc_pcie_hot_rst; }; static inline const char *ath12k_bd_ie_type_str(enum ath12k_bd_ie_type type) diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c index 273f4bc260bfed..2e7d302ace679d 100644 --- a/drivers/net/wireless/ath/ath12k/pci.c +++ b/drivers/net/wireless/ath/ath12k/pci.c @@ -292,10 +292,10 @@ static void ath12k_pci_enable_ltssm(struct ath12k_base *ab) ath12k_dbg(ab, ATH12K_DBG_PCI, "pci ltssm 0x%x\n", val); - val = ath12k_pci_read32(ab, GCC_GCC_PCIE_HOT_RST); + val = ath12k_pci_read32(ab, GCC_GCC_PCIE_HOT_RST(ab)); val |= GCC_GCC_PCIE_HOT_RST_VAL; - ath12k_pci_write32(ab, GCC_GCC_PCIE_HOT_RST, val); - val = ath12k_pci_read32(ab, GCC_GCC_PCIE_HOT_RST); + ath12k_pci_write32(ab, GCC_GCC_PCIE_HOT_RST(ab), val); + val = ath12k_pci_read32(ab, GCC_GCC_PCIE_HOT_RST(ab)); ath12k_dbg(ab, ATH12K_DBG_PCI, "pci pcie_hot_rst 0x%x\n", val); diff --git a/drivers/net/wireless/ath/ath12k/pci.h b/drivers/net/wireless/ath/ath12k/pci.h index 31584a7ad80eb9..9321674eef8b8f 100644 --- a/drivers/net/wireless/ath/ath12k/pci.h +++ b/drivers/net/wireless/ath/ath12k/pci.h @@ -28,7 +28,9 @@ #define PCIE_PCIE_PARF_LTSSM 0x1e081b0 #define PARM_LTSSM_VALUE 0x111 -#define GCC_GCC_PCIE_HOT_RST 0x1e38338 +#define GCC_GCC_PCIE_HOT_RST(ab) \ + ((ab)->hw_params->regs->gcc_gcc_pcie_hot_rst) + #define GCC_GCC_PCIE_HOT_RST_VAL 0x10 #define PCIE_PCIE_INT_ALL_CLEAR 0x1e08228 From 65e1b3404c211dcfaea02698539cdcd26647130f Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Wed, 4 Jun 2025 13:52:50 +0800 Subject: [PATCH 2735/2924] wifi: ath12k: fix uaf in ath12k_core_init() [ Upstream commit f3fe49dbddd73f0155a8935af47cb63693069dbe ] When the execution of ath12k_core_hw_group_assign() or ath12k_core_hw_group_create() fails, the registered notifier chain is not unregistered properly. Its memory is freed after rmmod, which may trigger to a use-after-free (UAF) issue if there is a subsequent access to this notifier chain. Fixes the issue by calling ath12k_core_panic_notifier_unregister() in failure cases. Call trace: notifier_chain_register+0x4c/0x1f0 (P) atomic_notifier_chain_register+0x38/0x68 ath12k_core_init+0x50/0x4e8 [ath12k] ath12k_pci_probe+0x5f8/0xc28 [ath12k] pci_device_probe+0xbc/0x1a8 really_probe+0xc8/0x3a0 __driver_probe_device+0x84/0x1b0 driver_probe_device+0x44/0x130 __driver_attach+0xcc/0x208 bus_for_each_dev+0x84/0x100 driver_attach+0x2c/0x40 bus_add_driver+0x130/0x260 driver_register+0x70/0x138 __pci_register_driver+0x68/0x80 ath12k_pci_init+0x30/0x68 [ath12k] ath12k_init+0x28/0x78 [ath12k] Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Fixes: 6f245ea0ec6c ("wifi: ath12k: introduce device group abstraction") Signed-off-by: Miaoqing Pan Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20250604055250.1228501-1-miaoqing.pan@oss.qualcomm.com Signed-off-by: Jeff Johnson Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/core.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index 770156347ffadc..261f52b327e89c 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -1902,7 +1902,8 @@ int ath12k_core_init(struct ath12k_base *ab) if (!ag) { mutex_unlock(&ath12k_hw_group_mutex); ath12k_warn(ab, "unable to get hw group\n"); - return -ENODEV; + ret = -ENODEV; + goto err_unregister_notifier; } mutex_unlock(&ath12k_hw_group_mutex); @@ -1917,7 +1918,7 @@ int ath12k_core_init(struct ath12k_base *ab) if (ret) { mutex_unlock(&ag->mutex); ath12k_warn(ab, "unable to create hw group\n"); - goto err; + goto err_destroy_hw_group; } } @@ -1925,9 +1926,12 @@ int ath12k_core_init(struct ath12k_base *ab) return 0; -err: +err_destroy_hw_group: ath12k_core_hw_group_destroy(ab->ag); ath12k_core_hw_group_unassign(ab); +err_unregister_notifier: + ath12k_core_panic_notifier_unregister(ab); + return ret; } From 009753ee1509f7dd62bc06cf90fa36c7546df4cb Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 27 May 2025 08:44:14 +0300 Subject: [PATCH 2736/2924] regulator: max20086: Fix refcount leak in max20086_parse_regulators_dt() [ Upstream commit 06118ae36855b7d3d22688298e74a766ccf0cb7a ] There is a missing call to of_node_put() if devm_kcalloc() fails. Fix this by changing the code to use cleanup.h magic to drop the refcount. Fixes: 6b0cd72757c6 ("regulator: max20086: fix invalid memory access") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/aDVRLqgJWMxYU03G@stanley.mountain Reviewed-by: Laurent Pinchart Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/max20086-regulator.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/regulator/max20086-regulator.c b/drivers/regulator/max20086-regulator.c index 198d45f8e88493..3d333b61fb18c8 100644 --- a/drivers/regulator/max20086-regulator.c +++ b/drivers/regulator/max20086-regulator.c @@ -5,6 +5,7 @@ // Copyright (C) 2022 Laurent Pinchart // Copyright (C) 2018 Avnet, Inc. +#include #include #include #include @@ -133,11 +134,11 @@ static int max20086_regulators_register(struct max20086 *chip) static int max20086_parse_regulators_dt(struct max20086 *chip, bool *boot_on) { struct of_regulator_match *matches; - struct device_node *node; unsigned int i; int ret; - node = of_get_child_by_name(chip->dev->of_node, "regulators"); + struct device_node *node __free(device_node) = + of_get_child_by_name(chip->dev->of_node, "regulators"); if (!node) { dev_err(chip->dev, "regulators node not found\n"); return -ENODEV; @@ -153,7 +154,6 @@ static int max20086_parse_regulators_dt(struct max20086 *chip, bool *boot_on) ret = of_regulator_match(chip->dev, node, matches, chip->info->num_outputs); - of_node_put(node); if (ret < 0) { dev_err(chip->dev, "Failed to match regulators\n"); return -EINVAL; From c5d01b57ea8e129f0b22198f8c0507ece0e0df2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Pi=C3=A9dallu?= Date: Fri, 6 Jun 2025 15:37:24 +0200 Subject: [PATCH 2737/2924] spi: omap2-mcspi: Disable multi mode when CS should be kept asserted after message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a5bf5272295d3f058adeee025d2a0b6625f8ba7b ] When the last transfer of a SPI message has the cs_change flag, the CS is kept asserted after the message. Multi-mode can't respect this as CS is deasserted by the hardware at the end of the message. Disable multi-mode when not applicable to the current message. Fixes: d153ff4056cb ("spi: omap2-mcspi: Add support for MULTI-mode") Signed-off-by: Félix Piédallu Link: https://patch.msgid.link/20250606-cs_change_fix-v1-1-27191a98a2e5@non.se.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-omap2-mcspi.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c index 29c616e2c408cf..e834fb42fd4ba7 100644 --- a/drivers/spi/spi-omap2-mcspi.c +++ b/drivers/spi/spi-omap2-mcspi.c @@ -1287,9 +1287,15 @@ static int omap2_mcspi_prepare_message(struct spi_controller *ctlr, mcspi->use_multi_mode = false; } - /* Check if transfer asks to change the CS status after the transfer */ - if (!tr->cs_change) - mcspi->use_multi_mode = false; + if (list_is_last(&tr->transfer_list, &msg->transfers)) { + /* Check if transfer asks to keep the CS status after the whole message */ + if (tr->cs_change) + mcspi->use_multi_mode = false; + } else { + /* Check if transfer asks to change the CS status after the transfer */ + if (!tr->cs_change) + mcspi->use_multi_mode = false; + } /* * If at least one message is not compatible, switch back to single mode From 87c4766dfed077e9816f7e0356a19f97e917ace9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Pi=C3=A9dallu?= Date: Fri, 6 Jun 2025 15:37:25 +0200 Subject: [PATCH 2738/2924] spi: omap2-mcspi: Disable multi-mode when the previous message kept CS asserted MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 10c24e0d2f7cd2bc8a847cf750f01301ce67dbc8 ] When the last transfer of a SPI message has the cs_change flag, the CS is kept asserted after the message. The next message can't use multi-mode because the CS will be briefly deasserted before the first transfer. Remove the early exit of the list_for_each_entry because the last transfer actually needs to be always checked. Fixes: d153ff4056cb ("spi: omap2-mcspi: Add support for MULTI-mode") Signed-off-by: Félix Piédallu Link: https://patch.msgid.link/20250606-cs_change_fix-v1-2-27191a98a2e5@non.se.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-omap2-mcspi.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c index e834fb42fd4ba7..70bb74b3bd9c32 100644 --- a/drivers/spi/spi-omap2-mcspi.c +++ b/drivers/spi/spi-omap2-mcspi.c @@ -134,6 +134,7 @@ struct omap2_mcspi { size_t max_xfer_len; u32 ref_clk_hz; bool use_multi_mode; + bool last_msg_kept_cs; }; struct omap2_mcspi_cs { @@ -1269,6 +1270,10 @@ static int omap2_mcspi_prepare_message(struct spi_controller *ctlr, * multi-mode is applicable. */ mcspi->use_multi_mode = true; + + if (mcspi->last_msg_kept_cs) + mcspi->use_multi_mode = false; + list_for_each_entry(tr, &msg->transfers, transfer_list) { if (!tr->bits_per_word) bits_per_word = msg->spi->bits_per_word; @@ -1289,22 +1294,17 @@ static int omap2_mcspi_prepare_message(struct spi_controller *ctlr, if (list_is_last(&tr->transfer_list, &msg->transfers)) { /* Check if transfer asks to keep the CS status after the whole message */ - if (tr->cs_change) + if (tr->cs_change) { mcspi->use_multi_mode = false; + mcspi->last_msg_kept_cs = true; + } else { + mcspi->last_msg_kept_cs = false; + } } else { /* Check if transfer asks to change the CS status after the transfer */ if (!tr->cs_change) mcspi->use_multi_mode = false; } - - /* - * If at least one message is not compatible, switch back to single mode - * - * The bits_per_word of certain transfer can be different, but it will have no - * impact on the signal itself. - */ - if (!mcspi->use_multi_mode) - break; } omap2_mcspi_set_mode(ctlr); From beabaff7e8fd547137b750cb7c3ba243b87874b3 Mon Sep 17 00:00:00 2001 From: Wojciech Slenska Date: Fri, 23 May 2025 12:14:37 +0200 Subject: [PATCH 2739/2924] pinctrl: qcom: pinctrl-qcm2290: Add missing pins [ Upstream commit 315345610faee8a0568b522dba9e35067d1732ab ] Added the missing pins to the qcm2290_pins table. Signed-off-by: Wojciech Slenska Fixes: 48e049ef1238 ("pinctrl: qcom: Add QCM2290 pinctrl driver") Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/20250523101437.59092-1-wojciech.slenska@gmail.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/qcom/pinctrl-qcm2290.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/pinctrl/qcom/pinctrl-qcm2290.c b/drivers/pinctrl/qcom/pinctrl-qcm2290.c index ba699eac9ee8b2..20e9bccda4cd6d 100644 --- a/drivers/pinctrl/qcom/pinctrl-qcm2290.c +++ b/drivers/pinctrl/qcom/pinctrl-qcm2290.c @@ -165,6 +165,10 @@ static const struct pinctrl_pin_desc qcm2290_pins[] = { PINCTRL_PIN(62, "GPIO_62"), PINCTRL_PIN(63, "GPIO_63"), PINCTRL_PIN(64, "GPIO_64"), + PINCTRL_PIN(65, "GPIO_65"), + PINCTRL_PIN(66, "GPIO_66"), + PINCTRL_PIN(67, "GPIO_67"), + PINCTRL_PIN(68, "GPIO_68"), PINCTRL_PIN(69, "GPIO_69"), PINCTRL_PIN(70, "GPIO_70"), PINCTRL_PIN(71, "GPIO_71"), @@ -179,12 +183,17 @@ static const struct pinctrl_pin_desc qcm2290_pins[] = { PINCTRL_PIN(80, "GPIO_80"), PINCTRL_PIN(81, "GPIO_81"), PINCTRL_PIN(82, "GPIO_82"), + PINCTRL_PIN(83, "GPIO_83"), + PINCTRL_PIN(84, "GPIO_84"), + PINCTRL_PIN(85, "GPIO_85"), PINCTRL_PIN(86, "GPIO_86"), PINCTRL_PIN(87, "GPIO_87"), PINCTRL_PIN(88, "GPIO_88"), PINCTRL_PIN(89, "GPIO_89"), PINCTRL_PIN(90, "GPIO_90"), PINCTRL_PIN(91, "GPIO_91"), + PINCTRL_PIN(92, "GPIO_92"), + PINCTRL_PIN(93, "GPIO_93"), PINCTRL_PIN(94, "GPIO_94"), PINCTRL_PIN(95, "GPIO_95"), PINCTRL_PIN(96, "GPIO_96"), From dc4de0295975d5184545ac0e0061a67bbc57511d Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Wed, 4 Jun 2025 07:32:17 -0700 Subject: [PATCH 2740/2924] accel/amdxdna: Fix incorrect PSP firmware size [ Upstream commit 779a0c9e06a91d0007f2a4f4071e3b9a8102b15e ] The incorrect PSP firmware size is used for initializing. It may cause error for newer version firmware. Fixes: 8c9ff1b181ba ("accel/amdxdna: Add a new driver for AMD AI Engine") Acked-by: Alex Deucher Signed-off-by: Lizhi Hou Link: https://lore.kernel.org/r/20250604143217.1386272-1-lizhi.hou@amd.com Signed-off-by: Sasha Levin --- drivers/accel/amdxdna/aie2_psp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/accel/amdxdna/aie2_psp.c b/drivers/accel/amdxdna/aie2_psp.c index dc3a072ce3b6df..f28a060a88109b 100644 --- a/drivers/accel/amdxdna/aie2_psp.c +++ b/drivers/accel/amdxdna/aie2_psp.c @@ -126,8 +126,8 @@ struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config * psp->ddev = ddev; memcpy(psp->psp_regs, conf->psp_regs, sizeof(psp->psp_regs)); - psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN) + PSP_FW_ALIGN; - psp->fw_buffer = drmm_kmalloc(ddev, psp->fw_buf_sz, GFP_KERNEL); + psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN); + psp->fw_buffer = drmm_kmalloc(ddev, psp->fw_buf_sz + PSP_FW_ALIGN, GFP_KERNEL); if (!psp->fw_buffer) { drm_err(ddev, "no memory for fw buffer"); return NULL; From 5ad8649e4cc6b108fbca12b4ef249fc827468ddc Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Fri, 30 May 2025 12:29:35 -0700 Subject: [PATCH 2741/2924] scsi: iscsi: Fix incorrect error path labels for flashnode operations [ Upstream commit 9b17621366d210ffee83262a8754086ebbde5e55 ] Correct the error handling goto labels used when host lookup fails in various flashnode-related event handlers: - iscsi_new_flashnode() - iscsi_del_flashnode() - iscsi_login_flashnode() - iscsi_logout_flashnode() - iscsi_logout_flashnode_sid() scsi_host_put() is not required when shost is NULL, so jumping to the correct label avoids unnecessary operations. These functions previously jumped to the wrong goto label (put_host), which did not match the intended cleanup logic. Use the correct exit labels (exit_new_fnode, exit_del_fnode, etc.) to ensure proper error handling. Also remove the unused put_host label under iscsi_new_flashnode() as it is no longer needed. No functional changes beyond accurate error path correction. Fixes: c6a4bb2ef596 ("[SCSI] scsi_transport_iscsi: Add flash node mgmt support") Signed-off-by: Alok Tiwari Link: https://lore.kernel.org/r/20250530193012.3312911-1-alok.a.tiwari@oracle.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/scsi_transport_iscsi.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 0b8c91bf793fcb..c75a806496d674 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -3499,7 +3499,7 @@ static int iscsi_new_flashnode(struct iscsi_transport *transport, pr_err("%s could not find host no %u\n", __func__, ev->u.new_flashnode.host_no); err = -ENODEV; - goto put_host; + goto exit_new_fnode; } index = transport->new_flashnode(shost, data, len); @@ -3509,7 +3509,6 @@ static int iscsi_new_flashnode(struct iscsi_transport *transport, else err = -EIO; -put_host: scsi_host_put(shost); exit_new_fnode: @@ -3534,7 +3533,7 @@ static int iscsi_del_flashnode(struct iscsi_transport *transport, pr_err("%s could not find host no %u\n", __func__, ev->u.del_flashnode.host_no); err = -ENODEV; - goto put_host; + goto exit_del_fnode; } idx = ev->u.del_flashnode.flashnode_idx; @@ -3576,7 +3575,7 @@ static int iscsi_login_flashnode(struct iscsi_transport *transport, pr_err("%s could not find host no %u\n", __func__, ev->u.login_flashnode.host_no); err = -ENODEV; - goto put_host; + goto exit_login_fnode; } idx = ev->u.login_flashnode.flashnode_idx; @@ -3628,7 +3627,7 @@ static int iscsi_logout_flashnode(struct iscsi_transport *transport, pr_err("%s could not find host no %u\n", __func__, ev->u.logout_flashnode.host_no); err = -ENODEV; - goto put_host; + goto exit_logout_fnode; } idx = ev->u.logout_flashnode.flashnode_idx; @@ -3678,7 +3677,7 @@ static int iscsi_logout_flashnode_sid(struct iscsi_transport *transport, pr_err("%s could not find host no %u\n", __func__, ev->u.logout_flashnode.host_no); err = -ENODEV; - goto put_host; + goto exit_logout_sid; } session = iscsi_session_lookup(ev->u.logout_flashnode_sid.sid); From 82448d4dcd8406dec688632a405fdcf7f170ec69 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 6 Jun 2025 16:51:27 +0000 Subject: [PATCH 2742/2924] net_sched: sch_sfq: fix a potential crash on gso_skb handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 82ffbe7776d0ac084031f114167712269bf3d832 ] SFQ has an assumption of always being able to queue at least one packet. However, after the blamed commit, sch->q.len can be inflated by packets in sch->gso_skb, and an enqueue() on an empty SFQ qdisc can be followed by an immediate drop. Fix sfq_drop() to properly clear q->tail in this situation. Tested: ip netns add lb ip link add dev to-lb type veth peer name in-lb netns lb ethtool -K to-lb tso off # force qdisc to requeue gso_skb ip netns exec lb ethtool -K in-lb gro on # enable NAPI ip link set dev to-lb up ip -netns lb link set dev in-lb up ip addr add dev to-lb 192.168.20.1/24 ip -netns lb addr add dev in-lb 192.168.20.2/24 tc qdisc replace dev to-lb root sfq limit 100 ip netns exec lb netserver netperf -H 192.168.20.2 -l 100 & netperf -H 192.168.20.2 -l 100 & netperf -H 192.168.20.2 -l 100 & netperf -H 192.168.20.2 -l 100 & Fixes: a53851e2c321 ("net: sched: explicit locking in gso_cpu fallback") Reported-by: Marcus Wichelmann Closes: https://lore.kernel.org/netdev/9da42688-bfaa-4364-8797-e9271f3bdaef@hetzner-cloud.de/ Signed-off-by: Eric Dumazet Reviewed-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20250606165127.3629486-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/sch_sfq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index b912ad99aa15d9..77fa02f2bfcd56 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -310,7 +310,10 @@ static unsigned int sfq_drop(struct Qdisc *sch, struct sk_buff **to_free) /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ x = q->tail->next; slot = &q->slots[x]; - q->tail->next = slot->next; + if (slot->next == x) + q->tail = NULL; /* no more active slots */ + else + q->tail->next = slot->next; q->ht[slot->hash] = SFQ_EMPTY_SLOT; goto drop; } From bbd5a9ddb0f9750783a48a871c9e12c0b68c5f39 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Tue, 10 Jun 2025 07:42:26 +0530 Subject: [PATCH 2743/2924] powerpc/powernv/memtrace: Fix out of bounds issue in memtrace mmap [ Upstream commit cd097df4596f3a1e9d75eb8520162de1eb8485b2 ] memtrace mmap issue has an out of bounds issue. This patch fixes the by checking that the requested mapping region size should stay within the allocated region size. Reported-by: Jonathan Greental Fixes: 08a022ad3dfa ("powerpc/powernv/memtrace: Allow mmaping trace buffers") Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250610021227.361980-1-maddy@linux.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/platforms/powernv/memtrace.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 4ac9808e55a44d..2ea30b34335415 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -48,11 +48,15 @@ static ssize_t memtrace_read(struct file *filp, char __user *ubuf, static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma) { struct memtrace_entry *ent = filp->private_data; + unsigned long ent_nrpages = ent->size >> PAGE_SHIFT; + unsigned long vma_nrpages = vma_pages(vma); - if (ent->size < vma->vm_end - vma->vm_start) + /* The requested page offset should be within object's page count */ + if (vma->vm_pgoff >= ent_nrpages) return -EINVAL; - if (vma->vm_pgoff << PAGE_SHIFT >= ent->size) + /* The requested mapping range should remain within the bounds */ + if (vma_nrpages > ent_nrpages - vma->vm_pgoff) return -EINVAL; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); From dc23a7de3ba4087f6bf5f91014e0e99975d4b122 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Tue, 10 Jun 2025 07:42:27 +0530 Subject: [PATCH 2744/2924] powerpc/vas: Return -EINVAL if the offset is non-zero in mmap() [ Upstream commit 0d67f0dee6c9176bc09a5482dd7346e3a0f14d0b ] The user space calls mmap() to map VAS window paste address and the kernel returns the complete mapped page for each window. So return -EINVAL if non-zero is passed for offset parameter to mmap(). See Documentation/arch/powerpc/vas-api.rst for mmap() restrictions. Co-developed-by: Jonathan Greental Signed-off-by: Jonathan Greental Reported-by: Jonathan Greental Fixes: dda44eb29c23 ("powerpc/vas: Add VAS user space API") Signed-off-by: Haren Myneni Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250610021227.361980-2-maddy@linux.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/platforms/book3s/vas-api.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index 0b6365d85d1171..dc6f75d3ac6ef7 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -521,6 +521,15 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) return -EINVAL; } + /* + * Map complete page to the paste address. So the user + * space should pass 0ULL to the offset parameter. + */ + if (vma->vm_pgoff) { + pr_debug("Page offset unsupported to map paste address\n"); + return -EINVAL; + } + /* Ensure instance has an open send window */ if (!txwin) { pr_err("No send window open?\n"); From f8a4ee5a01eb12ef0c6f10bbf0829c0682e36039 Mon Sep 17 00:00:00 2001 From: Gabriel Dalimonte Date: Sun, 1 Jun 2025 12:45:36 -0400 Subject: [PATCH 2745/2924] drm/vc4: fix infinite EPROBE_DEFER loop [ Upstream commit c0317ad44f45b3c1f0ff46a4e28d14c7bccdedf4 ] `vc4_hdmi_audio_init` calls `devm_snd_dmaengine_pcm_register` which may return EPROBE_DEFER. Calling `drm_connector_hdmi_audio_init` adds a child device. The driver model docs[1] state that adding a child device prior to returning EPROBE_DEFER may result in an infinite loop. [1] https://www.kernel.org/doc/html/v6.14/driver-api/driver-model/driver.html Fixes: 9640f1437a88 ("drm/vc4: hdmi: switch to using generic HDMI Codec infrastructure") Signed-off-by: Gabriel Dalimonte Link: https://lore.kernel.org/r/20250601-vc4-audio-inf-probe-v2-1-9ad43c7b6147@gmail.com Signed-off-by: Maxime Ripard Signed-off-by: Sasha Levin --- drivers/gpu/drm/vc4/vc4_hdmi.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 37a7d45695f236..176aba27b03d36 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -559,12 +559,6 @@ static int vc4_hdmi_connector_init(struct drm_device *dev, if (ret) return ret; - ret = drm_connector_hdmi_audio_init(connector, dev->dev, - &vc4_hdmi_audio_funcs, - 8, false, -1); - if (ret) - return ret; - drm_connector_helper_add(connector, &vc4_hdmi_connector_helper_funcs); /* @@ -2274,6 +2268,12 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *vc4_hdmi) return ret; } + ret = drm_connector_hdmi_audio_init(&vc4_hdmi->connector, dev, + &vc4_hdmi_audio_funcs, 8, false, + -1); + if (ret) + return ret; + dai_link->cpus = &vc4_hdmi->audio.cpu; dai_link->codecs = &vc4_hdmi->audio.codec; dai_link->platforms = &vc4_hdmi->audio.platform; From 6e60a77ca1c2584b6c06cb924ab741baf9d6cc47 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Fri, 6 Jun 2025 22:37:29 +0200 Subject: [PATCH 2746/2924] drm/meson: fix debug log statement when setting the HDMI clocks [ Upstream commit d17e61ab63fb7747b340d6a66bf1408cd5c6562b ] The "phy" and "vclk" frequency labels were swapped, making it more difficult to debug driver errors. Swap the label order to make them match with the actual frequencies printed to correct this. Fixes: e5fab2ec9ca4 ("drm/meson: vclk: add support for YUV420 setup") Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20250606203729.3311592-1-martin.blumenstingl@googlemail.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/meson/meson_encoder_hdmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c index c08fa93e50a30e..2bccda1e52a17d 100644 --- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c +++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c @@ -108,7 +108,7 @@ static void meson_encoder_hdmi_set_vclk(struct meson_encoder_hdmi *encoder_hdmi, venc_freq /= 2; dev_dbg(priv->dev, - "vclk:%lluHz phy=%lluHz venc=%lluHz hdmi=%lluHz enci=%d\n", + "phy:%lluHz vclk=%lluHz venc=%lluHz hdmi=%lluHz enci=%d\n", phy_freq, vclk_freq, venc_freq, hdmi_freq, priv->venc.hdmi_use_enci); From 490a7de4337dd063dad07b0cff28a5748a945bd2 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 7 Jun 2025 00:10:31 +0200 Subject: [PATCH 2747/2924] drm/meson: use vclk_freq instead of pixel_freq in debug print [ Upstream commit faf2f8382088e8c74bd6eeb236c8c9190e61615e ] meson_vclk_vic_supported_freq() has a debug print which includes the pixel freq. However, within the whole function the pixel freq is irrelevant, other than checking the end of the params array. Switch to printing the vclk_freq which is being compared / matched against the inputs to the function to avoid confusion when analyzing error reports from users. Fixes: e5fab2ec9ca4 ("drm/meson: vclk: add support for YUV420 setup") Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20250606221031.3419353-1-martin.blumenstingl@googlemail.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/meson/meson_vclk.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_vclk.c b/drivers/gpu/drm/meson/meson_vclk.c index 3325580d885d0a..c4123bb958e4c7 100644 --- a/drivers/gpu/drm/meson/meson_vclk.c +++ b/drivers/gpu/drm/meson/meson_vclk.c @@ -790,9 +790,9 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, } for (i = 0 ; params[i].pixel_freq ; ++i) { - DRM_DEBUG_DRIVER("i = %d pixel_freq = %lluHz alt = %lluHz\n", - i, params[i].pixel_freq, - PIXEL_FREQ_1000_1001(params[i].pixel_freq)); + DRM_DEBUG_DRIVER("i = %d vclk_freq = %lluHz alt = %lluHz\n", + i, params[i].vclk_freq, + PIXEL_FREQ_1000_1001(params[i].vclk_freq)); DRM_DEBUG_DRIVER("i = %d phy_freq = %lluHz alt = %lluHz\n", i, params[i].phy_freq, PHY_FREQ_1000_1001(params[i].phy_freq)); From 89292a66c5c7e5dc0ccd834e108593b6f8a8e5cf Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Mon, 9 Jun 2025 22:27:51 +0200 Subject: [PATCH 2748/2924] drm/meson: fix more rounding issues with 59.94Hz modes [ Upstream commit 0cee6c4d3518b2e757aedae78771f17149f57653 ] Commit 1017560164b6 ("drm/meson: use unsigned long long / Hz for frequency types") attempts to resolve video playback using 59.94Hz. using YUV420 by changing the clock calculation to use Hz instead of kHz (thus yielding more precision). The basic calculation itself is correct, however the comparisions in meson_vclk_vic_supported_freq() and meson_vclk_setup() don't work anymore for 59.94Hz modes (using the freq * 1000 / 1001 logic). For example, drm/edid specifies a 593407kHz clock for 3840x2160@59.94Hz. With the mentioend commit we convert this to Hz. Then meson_vclk tries to find a matchig "params" entry (as the clock setup code currently only supports specific frequencies) by taking the venc_freq from the params and calculating the "alt frequency" (used for the 59.94Hz modes) from it, which is: (594000000Hz * 1000) / 1001 = 593406593Hz Similar calculation is applied to the phy_freq (TMDS clock), which is 10 times the pixel clock. Implement a new meson_vclk_freqs_are_matching_param() function whose purpose is to compare if the requested and calculated frequencies. They may not match exactly (for the reasons mentioned above). Allow the clocks to deviate slightly to make the 59.94Hz modes again. Fixes: 1017560164b6 ("drm/meson: use unsigned long long / Hz for frequency types") Reported-by: Christian Hewitt Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20250609202751.962208-1-martin.blumenstingl@googlemail.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/meson/meson_vclk.c | 55 ++++++++++++++++++------------ 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_vclk.c b/drivers/gpu/drm/meson/meson_vclk.c index c4123bb958e4c7..dfe0c28a0f054c 100644 --- a/drivers/gpu/drm/meson/meson_vclk.c +++ b/drivers/gpu/drm/meson/meson_vclk.c @@ -110,10 +110,7 @@ #define HDMI_PLL_LOCK BIT(31) #define HDMI_PLL_LOCK_G12A (3 << 30) -#define PIXEL_FREQ_1000_1001(_freq) \ - DIV_ROUND_CLOSEST_ULL((_freq) * 1000ULL, 1001ULL) -#define PHY_FREQ_1000_1001(_freq) \ - (PIXEL_FREQ_1000_1001(DIV_ROUND_DOWN_ULL(_freq, 10ULL)) * 10) +#define FREQ_1000_1001(_freq) DIV_ROUND_CLOSEST_ULL((_freq) * 1000ULL, 1001ULL) /* VID PLL Dividers */ enum { @@ -772,6 +769,36 @@ static void meson_hdmi_pll_generic_set(struct meson_drm *priv, pll_freq); } +static bool meson_vclk_freqs_are_matching_param(unsigned int idx, + unsigned long long phy_freq, + unsigned long long vclk_freq) +{ + DRM_DEBUG_DRIVER("i = %d vclk_freq = %lluHz alt = %lluHz\n", + idx, params[idx].vclk_freq, + FREQ_1000_1001(params[idx].vclk_freq)); + DRM_DEBUG_DRIVER("i = %d phy_freq = %lluHz alt = %lluHz\n", + idx, params[idx].phy_freq, + FREQ_1000_1001(params[idx].phy_freq)); + + /* Match strict frequency */ + if (phy_freq == params[idx].phy_freq && + vclk_freq == params[idx].vclk_freq) + return true; + + /* Match 1000/1001 variant: vclk deviation has to be less than 1kHz + * (drm EDID is defined in 1kHz steps, so everything smaller must be + * rounding error) and the PHY freq deviation has to be less than + * 10kHz (as the TMDS clock is 10 times the pixel clock, so anything + * smaller must be rounding error as well). + */ + if (abs(vclk_freq - FREQ_1000_1001(params[idx].vclk_freq)) < 1000 && + abs(phy_freq - FREQ_1000_1001(params[idx].phy_freq)) < 10000) + return true; + + /* no match */ + return false; +} + enum drm_mode_status meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned long long phy_freq, @@ -790,19 +817,7 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, } for (i = 0 ; params[i].pixel_freq ; ++i) { - DRM_DEBUG_DRIVER("i = %d vclk_freq = %lluHz alt = %lluHz\n", - i, params[i].vclk_freq, - PIXEL_FREQ_1000_1001(params[i].vclk_freq)); - DRM_DEBUG_DRIVER("i = %d phy_freq = %lluHz alt = %lluHz\n", - i, params[i].phy_freq, - PHY_FREQ_1000_1001(params[i].phy_freq)); - /* Match strict frequency */ - if (phy_freq == params[i].phy_freq && - vclk_freq == params[i].vclk_freq) - return MODE_OK; - /* Match 1000/1001 variant */ - if (phy_freq == PHY_FREQ_1000_1001(params[i].phy_freq) && - vclk_freq == PIXEL_FREQ_1000_1001(params[i].vclk_freq)) + if (meson_vclk_freqs_are_matching_param(i, phy_freq, vclk_freq)) return MODE_OK; } @@ -1075,10 +1090,8 @@ void meson_vclk_setup(struct meson_drm *priv, unsigned int target, } for (freq = 0 ; params[freq].pixel_freq ; ++freq) { - if ((phy_freq == params[freq].phy_freq || - phy_freq == PHY_FREQ_1000_1001(params[freq].phy_freq)) && - (vclk_freq == params[freq].vclk_freq || - vclk_freq == PIXEL_FREQ_1000_1001(params[freq].vclk_freq))) { + if (meson_vclk_freqs_are_matching_param(freq, phy_freq, + vclk_freq)) { if (vclk_freq != params[freq].vclk_freq) vic_alternate_clock = true; else From cfe7aae528b4d101230784ce88969d3d28a6958a Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sat, 7 Jun 2025 21:52:03 +0800 Subject: [PATCH 2749/2924] pinctrl: sunxi: dt: Consider pin base when calculating bank number from pin [ Upstream commit 5558f27a58459a4038ebb23bcb5bd40c1e345c57 ] In prepare_function_table() when the pinctrl function table IRQ entries are generated, the pin bank is calculated from the absolute pin number; however the IRQ bank mux array is indexed from the first pin bank of the controller. For R_PIO controllers, this means the absolute pin bank is way off from the relative pin bank used for array indexing. Correct this by taking into account the pin base of the controller. Fixes: f5e2cd34b12f ("pinctrl: sunxi: allow reading mux values from DT") Signed-off-by: Chen-Yu Tsai Link: https://lore.kernel.org/20250607135203.2085226-1-wens@kernel.org Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/sunxi/pinctrl-sunxi-dt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi-dt.c b/drivers/pinctrl/sunxi/pinctrl-sunxi-dt.c index 1833078f68776c..4e34b0cd3b73aa 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi-dt.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi-dt.c @@ -143,7 +143,7 @@ static struct sunxi_desc_pin *init_pins_table(struct device *dev, */ static int prepare_function_table(struct device *dev, struct device_node *pnode, struct sunxi_desc_pin *pins, int npins, - const u8 *irq_bank_muxes) + unsigned pin_base, const u8 *irq_bank_muxes) { struct device_node *node; struct property *prop; @@ -166,7 +166,7 @@ static int prepare_function_table(struct device *dev, struct device_node *pnode, */ for (i = 0; i < npins; i++) { struct sunxi_desc_pin *pin = &pins[i]; - int bank = pin->pin.number / PINS_PER_BANK; + int bank = (pin->pin.number - pin_base) / PINS_PER_BANK; if (irq_bank_muxes[bank]) { pin->variant++; @@ -211,7 +211,7 @@ static int prepare_function_table(struct device *dev, struct device_node *pnode, last_bank = 0; for (i = 0; i < npins; i++) { struct sunxi_desc_pin *pin = &pins[i]; - int bank = pin->pin.number / PINS_PER_BANK; + int bank = (pin->pin.number - pin_base) / PINS_PER_BANK; int lastfunc = pin->variant + 1; int irq_mux = irq_bank_muxes[bank]; @@ -353,7 +353,7 @@ int sunxi_pinctrl_dt_table_init(struct platform_device *pdev, return PTR_ERR(pins); ret = prepare_function_table(&pdev->dev, pnode, pins, desc->npins, - irq_bank_muxes); + desc->pin_base, irq_bank_muxes); if (ret) return ret; From 5ae6ccc8a31d53215695d1da5a0f466e8342519e Mon Sep 17 00:00:00 2001 From: Robert Malz Date: Tue, 20 May 2025 10:31:51 +0200 Subject: [PATCH 2750/2924] i40e: return false from i40e_reset_vf if reset is in progress [ Upstream commit a2c90d63b71223d69a813333c1abf4fdacddbbe5 ] The function i40e_vc_reset_vf attempts, up to 20 times, to handle a VF reset request, using the return value of i40e_reset_vf as an indicator of whether the reset was successfully triggered. Currently, i40e_reset_vf always returns true, which causes new reset requests to be ignored if a different VF reset is already in progress. This patch updates the return value of i40e_reset_vf to reflect when another VF reset is in progress, allowing the caller to properly use the retry mechanism. Fixes: 52424f974bc5 ("i40e: Fix VF hang when reset is triggered on another VF") Signed-off-by: Robert Malz Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 1120f8e4bb6703..22d5b1ec2289f3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1546,8 +1546,8 @@ static void i40e_cleanup_reset_vf(struct i40e_vf *vf) * @vf: pointer to the VF structure * @flr: VFLR was issued or not * - * Returns true if the VF is in reset, resets successfully, or resets - * are disabled and false otherwise. + * Return: True if reset was performed successfully or if resets are disabled. + * False if reset is already in progress. **/ bool i40e_reset_vf(struct i40e_vf *vf, bool flr) { @@ -1566,7 +1566,7 @@ bool i40e_reset_vf(struct i40e_vf *vf, bool flr) /* If VF is being reset already we don't need to continue. */ if (test_and_set_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) - return true; + return false; i40e_trigger_vf_reset(vf, flr); From e172bbb296c6f82014f4d2d79b71c0be989a3ad3 Mon Sep 17 00:00:00 2001 From: Robert Malz Date: Tue, 20 May 2025 10:31:52 +0200 Subject: [PATCH 2751/2924] i40e: retry VFLR handling if there is ongoing VF reset [ Upstream commit fb4e9239e029954a37a00818b21e837cebf2aa10 ] When a VFLR interrupt is received during a VF reset initiated from a different source, the VFLR may be not fully handled. This can leave the VF in an undefined state. To address this, set the I40E_VFLR_EVENT_PENDING bit again during VFLR handling if the reset is not yet complete. This ensures the driver will properly complete the VF reset in such scenarios. Fixes: 52424f974bc5 ("i40e: Fix VF hang when reset is triggered on another VF") Signed-off-by: Robert Malz Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 22d5b1ec2289f3..88e6bef69342c2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -4328,7 +4328,10 @@ int i40e_vc_process_vflr_event(struct i40e_pf *pf) reg = rd32(hw, I40E_GLGEN_VFLRSTAT(reg_idx)); if (reg & BIT(bit_idx)) /* i40e_reset_vf will clear the bit in GLGEN_VFLRSTAT */ - i40e_reset_vf(vf, true); + if (!i40e_reset_vf(vf, true)) { + /* At least one VF did not finish resetting, retry next time */ + set_bit(__I40E_VFLR_EVENT_PENDING, pf->state); + } } return 0; From 325d50130db62122ef7d8744e7552444c6b4a10d Mon Sep 17 00:00:00 2001 From: Ahmed Zaki Date: Thu, 24 Apr 2025 15:50:13 +0200 Subject: [PATCH 2752/2924] iavf: fix reset_task for early reset event [ Upstream commit 0c6f4631436ecea841f1583b98255aedd7495b18 ] If a reset event is received from the PF early in the init cycle, the state machine hangs for about 25 seconds. Reproducer: echo 1 > /sys/class/net/$PF0/device/sriov_numvfs ip link set dev $PF0 vf 0 mac $NEW_MAC The log shows: [792.620416] ice 0000:5e:00.0: Enabling 1 VFs [792.738812] iavf 0000:5e:01.0: enabling device (0000 -> 0002) [792.744182] ice 0000:5e:00.0: Enabling 1 VFs with 17 vectors and 16 queues per VF [792.839964] ice 0000:5e:00.0: Setting MAC 52:54:00:00:00:11 on VF 0. VF driver will be reinitialized [813.389684] iavf 0000:5e:01.0: Failed to communicate with PF; waiting before retry [818.635918] iavf 0000:5e:01.0: Hardware came out of reset. Attempting reinit. [818.766273] iavf 0000:5e:01.0: Multiqueue Enabled: Queue pair count = 16 Fix it by scheduling the reset task and making the reset task capable of resetting early in the init cycle. Fixes: ef8693eb90ae3 ("i40evf: refactor reset handling") Signed-off-by: Ahmed Zaki Tested-by: Przemek Kitszel Reviewed-by: Przemek Kitszel Signed-off-by: Marcin Szycik Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/iavf/iavf_main.c | 11 +++++++++++ drivers/net/ethernet/intel/iavf/iavf_virtchnl.c | 17 +++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 2c0bb41809a414..81d7249d1149c8 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -3209,6 +3209,17 @@ static void iavf_reset_task(struct work_struct *work) } continue_reset: + /* If we are still early in the state machine, just restart. */ + if (adapter->state <= __IAVF_INIT_FAILED) { + iavf_shutdown_adminq(hw); + iavf_change_state(adapter, __IAVF_STARTUP); + iavf_startup(adapter); + queue_delayed_work(adapter->wq, &adapter->watchdog_task, + msecs_to_jiffies(30)); + netdev_unlock(netdev); + return; + } + /* We don't use netif_running() because it may be true prior to * ndo_open() returning, so we can't assume it means all our open * tasks have finished, since we're not holding the rtnl_lock here. diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index a6f0e5990be250..07f0d0a0f1e28a 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -79,6 +79,23 @@ iavf_poll_virtchnl_msg(struct iavf_hw *hw, struct iavf_arq_event_info *event, return iavf_status_to_errno(status); received_op = (enum virtchnl_ops)le32_to_cpu(event->desc.cookie_high); + + if (received_op == VIRTCHNL_OP_EVENT) { + struct iavf_adapter *adapter = hw->back; + struct virtchnl_pf_event *vpe = + (struct virtchnl_pf_event *)event->msg_buf; + + if (vpe->event != VIRTCHNL_EVENT_RESET_IMPENDING) + continue; + + dev_info(&adapter->pdev->dev, "Reset indication received from the PF\n"); + if (!(adapter->flags & IAVF_FLAG_RESET_PENDING)) + iavf_schedule_reset(adapter, + IAVF_FLAG_RESET_PENDING); + + return -EIO; + } + if (op_to_poll == received_op) break; } From f20899b1a877e87defe88ffec31ae7de0613704c Mon Sep 17 00:00:00 2001 From: Anton Nadezhdin Date: Tue, 20 May 2025 10:42:16 +0200 Subject: [PATCH 2753/2924] ice/ptp: fix crosstimestamp reporting [ Upstream commit a5a441ae283d54ec329aadc7426991dc32786d52 ] Set use_nsecs=true as timestamp is reported in ns. Lack of this result in smaller timestamp error window which cause error during phc2sys execution on E825 NICs: phc2sys[1768.256]: ioctl PTP_SYS_OFFSET_PRECISE: Invalid argument This problem was introduced in the cited commit which omitted setting use_nsecs to true when converting the ice driver to use convert_base_to_cs(). Testing hints (ethX is PF netdev): phc2sys -s ethX -c CLOCK_REALTIME -O 37 -m phc2sys[1769.256]: CLOCK_REALTIME phc offset -5 s0 freq -0 delay 0 Fixes: d4bea547ebb57 ("ice/ptp: Remove convert_art_to_tsc()") Signed-off-by: Anton Nadezhdin Reviewed-by: Aleksandr Loktionov Reviewed-by: Arkadiusz Kubalewski Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_ptp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 1fd1ae03eb9096..11ed48a62b5360 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -2307,6 +2307,7 @@ static int ice_capture_crosststamp(ktime_t *device, ts = ((u64)ts_hi << 32) | ts_lo; system->cycles = ts; system->cs_id = CSID_X86_ART; + system->use_nsecs = true; /* Read Device source clock time */ ts_lo = rd32(hw, cfg->dev_time_l[tmr_idx]); From 1fd4438ddcc4958ed24662d5125114299e19bae4 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Tue, 3 Jun 2025 16:34:01 +0000 Subject: [PATCH 2754/2924] e1000: Move cancel_work_sync to avoid deadlock [ Upstream commit b4a8085ceefb7bbb12c2b71c55e71fc946c6929f ] Previously, e1000_down called cancel_work_sync for the e1000 reset task (via e1000_down_and_stop), which takes RTNL. As reported by users and syzbot, a deadlock is possible in the following scenario: CPU 0: - RTNL is held - e1000_close - e1000_down - cancel_work_sync (cancel / wait for e1000_reset_task()) CPU 1: - process_one_work - e1000_reset_task - take RTNL To remedy this, avoid calling cancel_work_sync from e1000_down (e1000_reset_task does nothing if the device is down anyway). Instead, call cancel_work_sync for e1000_reset_task when the device is being removed. Fixes: e400c7444d84 ("e1000: Hold RTNL when e1000_down can be called") Reported-by: syzbot+846bb38dc67fe62cc733@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/683837bf.a00a0220.52848.0003.GAE@google.com/ Reported-by: John Closes: https://lore.kernel.org/netdev/CAP=Rh=OEsn4y_2LvkO3UtDWurKcGPnZ_NPSXK=FbgygNXL37Sw@mail.gmail.com/ Signed-off-by: Joe Damato Acked-by: Stanislav Fomichev Acked-by: Jacob Keller Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/e1000/e1000_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 3f089c3d47b23b..d8595e84326dbc 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -477,10 +477,6 @@ static void e1000_down_and_stop(struct e1000_adapter *adapter) cancel_delayed_work_sync(&adapter->phy_info_task); cancel_delayed_work_sync(&adapter->fifo_stall_task); - - /* Only kill reset task if adapter is not resetting */ - if (!test_bit(__E1000_RESETTING, &adapter->flags)) - cancel_work_sync(&adapter->reset_task); } void e1000_down(struct e1000_adapter *adapter) @@ -1266,6 +1262,10 @@ static void e1000_remove(struct pci_dev *pdev) unregister_netdev(netdev); + /* Only kill reset task if adapter is not resetting */ + if (!test_bit(__E1000_RESETTING, &adapter->flags)) + cancel_work_sync(&adapter->reset_task); + e1000_phy_hw_reset(hw); kfree(adapter->tx_ring); From 1a677d0ceb4a5d62117b711a8b2e0aee80d33015 Mon Sep 17 00:00:00 2001 From: Yunhui Cui Date: Wed, 4 Jun 2025 10:30:36 +0800 Subject: [PATCH 2755/2924] ACPI: CPPC: Fix NULL pointer dereference when nosmp is used [ Upstream commit 15eece6c5b05e5f9db0711978c3e3b7f1a2cfe12 ] With nosmp in cmdline, other CPUs are not brought up, leaving their cpc_desc_ptr NULL. CPU0's iteration via for_each_possible_cpu() dereferences these NULL pointers, causing panic. Panic backtrace: [ 0.401123] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000b8 ... [ 0.403255] [] cppc_allow_fast_switch+0x6a/0xd4 ... Kernel panic - not syncing: Attempted to kill init! Fixes: 3cc30dd00a58 ("cpufreq: CPPC: Enable fast_switch") Reported-by: Xu Lu Signed-off-by: Yunhui Cui Link: https://patch.msgid.link/20250604023036.99553-1-cuiyunhui@bytedance.com [ rjw: New subject ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/cppc_acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index f193e713825ac2..ff23b6edb2df37 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -463,7 +463,7 @@ bool cppc_allow_fast_switch(void) struct cpc_desc *cpc_ptr; int cpu; - for_each_possible_cpu(cpu) { + for_each_present_cpu(cpu) { cpc_ptr = per_cpu(cpc_desc_ptr, cpu); desired_reg = &cpc_ptr->cpc_regs[DESIRED_PERF]; if (!CPC_IN_SYSTEM_MEMORY(desired_reg) && From 1b367ba2f94251822577daed031d6b9a9e11ba91 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Mon, 9 Jun 2025 19:08:03 +0200 Subject: [PATCH 2756/2924] net: Fix TOCTOU issue in sk_is_readable() [ Upstream commit 2660a544fdc0940bba15f70508a46cf9a6491230 ] sk->sk_prot->sock_is_readable is a valid function pointer when sk resides in a sockmap. After the last sk_psock_put() (which usually happens when socket is removed from sockmap), sk->sk_prot gets restored and sk->sk_prot->sock_is_readable becomes NULL. This makes sk_is_readable() racy, if the value of sk->sk_prot is reloaded after the initial check. Which in turn may lead to a null pointer dereference. Ensure the function pointer does not turn NULL after the check. Fixes: 8934ce2fd081 ("bpf: sockmap redirect ingress support") Suggested-by: Jakub Sitnicki Signed-off-by: Michal Luczaj Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250609-skisreadable-toctou-v1-1-d0dfb2d62c37@rbox.co Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/sock.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 694f954258d437..99470c6d24de8b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2978,8 +2978,11 @@ int sock_ioctl_inout(struct sock *sk, unsigned int cmd, int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); static inline bool sk_is_readable(struct sock *sk) { - if (sk->sk_prot->sock_is_readable) - return sk->sk_prot->sock_is_readable(sk); + const struct proto *prot = READ_ONCE(sk->sk_prot); + + if (prot->sock_is_readable) + return prot->sock_is_readable(sk); + return false; } #endif /* _SOCK_H */ From fc7d81b4a9582c5c271c84da18e4bffb0cae1d69 Mon Sep 17 00:00:00 2001 From: Gustavo Luiz Duarte Date: Mon, 9 Jun 2025 11:24:20 -0700 Subject: [PATCH 2757/2924] netconsole: fix appending sysdata when sysdata_fields == SYSDATA_RELEASE [ Upstream commit c85bf1975108d2e2431c11d1cb7e95aca587dfbe ] Before appending sysdata, prepare_extradata() checks if any feature is enabled in sysdata_fields (and exits early if none is enabled). When SYSDATA_RELEASE was introduced, we missed adding it to the list of features being checked against sysdata_fields in prepare_extradata(). The result was that, if only SYSDATA_RELEASE is enabled in sysdata_fields, we incorreclty exit early and fail to append the release. Instead of checking specific bits in sysdata_fields, check if sysdata_fields has ALL bit zeroed and exit early if true. This fixes case when only SYSDATA_RELEASE enabled and makes the code more general / less error prone in future feature implementation. Signed-off-by: Gustavo Luiz Duarte Reviewed-by: Breno Leitao Fixes: cfcc9239e78a ("netconsole: append release to sysdata") Link: https://patch.msgid.link/20250609-netconsole-fix-v1-1-17543611ae31@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/netconsole.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 4289ccd3e41bff..176935a8645ff1 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -1252,7 +1252,6 @@ static int sysdata_append_release(struct netconsole_target *nt, int offset) */ static int prepare_extradata(struct netconsole_target *nt) { - u32 fields = SYSDATA_CPU_NR | SYSDATA_TASKNAME; int extradata_len; /* userdata was appended when configfs write helper was called @@ -1260,7 +1259,7 @@ static int prepare_extradata(struct netconsole_target *nt) */ extradata_len = nt->userdata_length; - if (!(nt->sysdata_fields & fields)) + if (!nt->sysdata_fields) goto out; if (nt->sysdata_fields & SYSDATA_CPU_NR) From 7a4b2ff36cf9fe5947a8971fdf55b5d100fd70ad Mon Sep 17 00:00:00 2001 From: Carlos Fernandez Date: Mon, 9 Jun 2025 09:26:26 +0200 Subject: [PATCH 2758/2924] macsec: MACsec SCI assignment for ES = 0 [ Upstream commit d9816ec74e6d6aa29219d010bba3f780ba1d9d75 ] According to 802.1AE standard, when ES and SC flags in TCI are zero, used SCI should be the current active SC_RX. Current code uses the header MAC address. Without this patch, when ES flag is 0 (using a bridge or switch), header MAC will not fit the SCI and MACSec frames will be discarted. In order to test this issue, MACsec link should be stablished between two interfaces, setting SC and ES flags to zero and a port identifier different than one. For example, using ip macsec tools: ip link add link $ETH0 macsec0 type macsec port 11 send_sci off end_station off ip macsec add macsec0 tx sa 0 pn 2 on key 01 $ETH1_KEY ip macsec add macsec0 rx port 11 address $ETH1_MAC ip macsec add macsec0 rx port 11 address $ETH1_MAC sa 0 pn 2 on key 02 ip link set dev macsec0 up ip link add link $ETH1 macsec1 type macsec port 11 send_sci off end_station off ip macsec add macsec1 tx sa 0 pn 2 on key 01 $ETH0_KEY ip macsec add macsec1 rx port 11 address $ETH0_MAC ip macsec add macsec1 rx port 11 address $ETH0_MAC sa 0 pn 2 on key 02 ip link set dev macsec1 up Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Co-developed-by: Andreu Montiel Signed-off-by: Andreu Montiel Signed-off-by: Carlos Fernandez Reviewed-by: Subbaraya Sundeep Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/macsec.c | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 3d315e30ee4725..7edbe76b5455a8 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -247,15 +247,39 @@ static sci_t make_sci(const u8 *addr, __be16 port) return sci; } -static sci_t macsec_frame_sci(struct macsec_eth_header *hdr, bool sci_present) +static sci_t macsec_active_sci(struct macsec_secy *secy) { - sci_t sci; + struct macsec_rx_sc *rx_sc = rcu_dereference_bh(secy->rx_sc); + + /* Case single RX SC */ + if (rx_sc && !rcu_dereference_bh(rx_sc->next)) + return (rx_sc->active) ? rx_sc->sci : 0; + /* Case no RX SC or multiple */ + else + return 0; +} + +static sci_t macsec_frame_sci(struct macsec_eth_header *hdr, bool sci_present, + struct macsec_rxh_data *rxd) +{ + struct macsec_dev *macsec; + sci_t sci = 0; - if (sci_present) + /* SC = 1 */ + if (sci_present) { memcpy(&sci, hdr->secure_channel_id, sizeof(hdr->secure_channel_id)); - else + /* SC = 0; ES = 0 */ + } else if ((!(hdr->tci_an & (MACSEC_TCI_ES | MACSEC_TCI_SC))) && + (list_is_singular(&rxd->secys))) { + /* Only one SECY should exist on this scenario */ + macsec = list_first_or_null_rcu(&rxd->secys, struct macsec_dev, + secys); + if (macsec) + return macsec_active_sci(&macsec->secy); + } else { sci = make_sci(hdr->eth.h_source, MACSEC_PORT_ES); + } return sci; } @@ -1109,7 +1133,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) struct macsec_rxh_data *rxd; struct macsec_dev *macsec; unsigned int len; - sci_t sci; + sci_t sci = 0; u32 hdr_pn; bool cbit; struct pcpu_rx_sc_stats *rxsc_stats; @@ -1156,11 +1180,14 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) macsec_skb_cb(skb)->has_sci = !!(hdr->tci_an & MACSEC_TCI_SC); macsec_skb_cb(skb)->assoc_num = hdr->tci_an & MACSEC_AN_MASK; - sci = macsec_frame_sci(hdr, macsec_skb_cb(skb)->has_sci); rcu_read_lock(); rxd = macsec_data_rcu(skb->dev); + sci = macsec_frame_sci(hdr, macsec_skb_cb(skb)->has_sci, rxd); + if (!sci) + goto drop_nosc; + list_for_each_entry_rcu(macsec, &rxd->secys, secys) { struct macsec_rx_sc *sc = find_rx_sc(&macsec->secy, sci); @@ -1283,6 +1310,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) macsec_rxsa_put(rx_sa); drop_nosa: macsec_rxsc_put(rx_sc); +drop_nosc: rcu_read_unlock(); drop_direct: kfree_skb(skb); From 049af7ac45a6b407748ee0995278fd861e36df8f Mon Sep 17 00:00:00 2001 From: Jakub Raczynski Date: Mon, 9 Jun 2025 17:31:46 +0200 Subject: [PATCH 2759/2924] net/mdiobus: Fix potential out-of-bounds read/write access [ Upstream commit 0e629694126ca388916f059453a1c36adde219c4 ] When using publicly available tools like 'mdio-tools' to read/write data from/to network interface and its PHY via mdiobus, there is no verification of parameters passed to the ioctl and it accepts any mdio address. Currently there is support for 32 addresses in kernel via PHY_MAX_ADDR define, but it is possible to pass higher value than that via ioctl. While read/write operation should generally fail in this case, mdiobus provides stats array, where wrong address may allow out-of-bounds read/write. Fix that by adding address verification before read/write operation. While this excludes this access from any statistics, it improves security of read/write operation. Fixes: 080bb352fad00 ("net: phy: Maintain MDIO device and bus statistics") Signed-off-by: Jakub Raczynski Reported-by: Wenjing Shan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/phy/mdio_bus.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index ede596c1a69d1b..adb17ec9371517 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -903,6 +903,9 @@ int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum) lockdep_assert_held_once(&bus->mdio_lock); + if (addr >= PHY_MAX_ADDR) + return -ENXIO; + if (bus->read) retval = bus->read(bus, addr, regnum); else @@ -932,6 +935,9 @@ int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val) lockdep_assert_held_once(&bus->mdio_lock); + if (addr >= PHY_MAX_ADDR) + return -ENXIO; + if (bus->write) err = bus->write(bus, addr, regnum, val); else From 4ded22f7f3ce9714ed72c3e9c68fea1cb9388ae7 Mon Sep 17 00:00:00 2001 From: Jakub Raczynski Date: Mon, 9 Jun 2025 17:31:47 +0200 Subject: [PATCH 2760/2924] net/mdiobus: Fix potential out-of-bounds clause 45 read/write access [ Upstream commit 260388f79e94fb3026c419a208ece8358bb7b555 ] When using publicly available tools like 'mdio-tools' to read/write data from/to network interface and its PHY via C45 (clause 45) mdiobus, there is no verification of parameters passed to the ioctl and it accepts any mdio address. Currently there is support for 32 addresses in kernel via PHY_MAX_ADDR define, but it is possible to pass higher value than that via ioctl. While read/write operation should generally fail in this case, mdiobus provides stats array, where wrong address may allow out-of-bounds read/write. Fix that by adding address verification before C45 read/write operation. While this excludes this access from any statistics, it improves security of read/write operation. Fixes: 4e4aafcddbbf ("net: mdio: Add dedicated C45 API to MDIO bus drivers") Signed-off-by: Jakub Raczynski Reported-by: Wenjing Shan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/phy/mdio_bus.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index adb17ec9371517..909b4d53fdacdc 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -999,6 +999,9 @@ int __mdiobus_c45_read(struct mii_bus *bus, int addr, int devad, u32 regnum) lockdep_assert_held_once(&bus->mdio_lock); + if (addr >= PHY_MAX_ADDR) + return -ENXIO; + if (bus->read_c45) retval = bus->read_c45(bus, addr, devad, regnum); else @@ -1030,6 +1033,9 @@ int __mdiobus_c45_write(struct mii_bus *bus, int addr, int devad, u32 regnum, lockdep_assert_held_once(&bus->mdio_lock); + if (addr >= PHY_MAX_ADDR) + return -ENXIO; + if (bus->write_c45) err = bus->write_c45(bus, addr, devad, regnum, val); else From 7d99cc0f8e6fa0f35570887899f178122a61d44e Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 5 Jun 2025 11:14:25 -0400 Subject: [PATCH 2761/2924] Bluetooth: Fix NULL pointer deference on eir_get_service_data [ Upstream commit 20a2aa01f5aeb6daad9aeaa7c33dd512c58d81eb ] The len parameter is considered optional so it can be NULL so it cannot be used for skipping to next entry of EIR_SERVICE_DATA. Fixes: 8f9ae5b3ae80 ("Bluetooth: eir: Add helpers for managing service data") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/eir.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c index 1bc51e2b05a347..3e1713673ecc93 100644 --- a/net/bluetooth/eir.c +++ b/net/bluetooth/eir.c @@ -366,17 +366,19 @@ u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr) void *eir_get_service_data(u8 *eir, size_t eir_len, u16 uuid, size_t *len) { - while ((eir = eir_get_data(eir, eir_len, EIR_SERVICE_DATA, len))) { + size_t dlen; + + while ((eir = eir_get_data(eir, eir_len, EIR_SERVICE_DATA, &dlen))) { u16 value = get_unaligned_le16(eir); if (uuid == value) { if (len) - *len -= 2; + *len = dlen - 2; return &eir[2]; } - eir += *len; - eir_len -= *len; + eir += dlen; + eir_len -= dlen; } return NULL; From 15da883c010cbc2a84aec1738b6ad6ee477846de Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 5 Jun 2025 11:15:16 -0400 Subject: [PATCH 2762/2924] Bluetooth: hci_sync: Fix broadcast/PA when using an existing instance [ Upstream commit 5725bc608252050ed8a4d47d59225b7dd73474c8 ] When using and existing adv_info instance for broadcast source it needs to be updated to periodic first before it can be reused, also in case the existing instance already have data hci_set_adv_instance_data cannot be used directly since it would overwrite the existing data so this reappend the original data after the Broadcast ID, if one was generated. Example: bluetoothctl># Add PBP to EA so it can be later referenced as the BIS ID bluetoothctl> advertise.service 0x1856 0x00 0x00 bluetoothctl> advertise on ... < HCI Command: LE Set Extended Advertising Data (0x08|0x0037) plen 13 Handle: 0x01 Operation: Complete extended advertising data (0x03) Fragment preference: Minimize fragmentation (0x01) Data length: 0x09 Service Data: Public Broadcast Announcement (0x1856) Data[2]: 0000 Flags: 0x06 LE General Discoverable Mode BR/EDR Not Supported ... bluetoothctl># Attempt to acquire Broadcast Source transport bluetoothctl>transport.acquire /org/bluez/hci0/pac_bcast0/fd0 ... < HCI Command: LE Set Extended Advertising Data (0x08|0x0037) plen 255 Handle: 0x01 Operation: Complete extended advertising data (0x03) Fragment preference: Minimize fragmentation (0x01) Data length: 0x0e Service Data: Broadcast Audio Announcement (0x1852) Broadcast ID: 11371620 (0xad8464) Service Data: Public Broadcast Announcement (0x1856) Data[2]: 0000 Flags: 0x06 LE General Discoverable Mode BR/EDR Not Supported Link: https://github.com/bluez/bluez/issues/1117 Fixes: eca0ae4aea66 ("Bluetooth: Add initial implementation of BIS connections") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/hci_sync.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 62d1ff951ebe60..8ba1c3aa7801a4 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1559,7 +1559,8 @@ static int hci_enable_per_advertising_sync(struct hci_dev *hdev, u8 instance) static int hci_adv_bcast_annoucement(struct hci_dev *hdev, struct adv_info *adv) { u8 bid[3]; - u8 ad[4 + 3]; + u8 ad[HCI_MAX_EXT_AD_LENGTH]; + u8 len; /* Skip if NULL adv as instance 0x00 is used for general purpose * advertising so it cannot used for the likes of Broadcast Announcement @@ -1585,8 +1586,10 @@ static int hci_adv_bcast_annoucement(struct hci_dev *hdev, struct adv_info *adv) /* Generate Broadcast ID */ get_random_bytes(bid, sizeof(bid)); - eir_append_service_data(ad, 0, 0x1852, bid, sizeof(bid)); - hci_set_adv_instance_data(hdev, adv->instance, sizeof(ad), ad, 0, NULL); + len = eir_append_service_data(ad, 0, 0x1852, bid, sizeof(bid)); + memcpy(ad + len, adv->adv_data, adv->adv_data_len); + hci_set_adv_instance_data(hdev, adv->instance, len + adv->adv_data_len, + ad, 0, NULL); return hci_update_adv_data_sync(hdev, adv->instance); } @@ -1603,8 +1606,15 @@ int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len, if (instance) { adv = hci_find_adv_instance(hdev, instance); - /* Create an instance if that could not be found */ - if (!adv) { + if (adv) { + /* Turn it into periodic advertising */ + adv->periodic = true; + adv->per_adv_data_len = data_len; + if (data) + memcpy(adv->per_adv_data, data, data_len); + adv->flags = flags; + } else if (!adv) { + /* Create an instance if that could not be found */ adv = hci_add_per_instance(hdev, instance, flags, data_len, data, sync_interval, From b9db0c27e73b7c8a19384a44af527edfda74ff3d Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 10 Jun 2025 10:14:35 -0400 Subject: [PATCH 2763/2924] Bluetooth: eir: Fix possible crashes on eir_create_adv_data [ Upstream commit 47c03902269aff377f959dc3fd94a9733aa31d6e ] eir_create_adv_data may attempt to add EIR_FLAGS and EIR_TX_POWER without checking if that would fit. Link: https://github.com/bluez/bluez/issues/1117#issuecomment-2958244066 Fixes: 01ce70b0a274 ("Bluetooth: eir: Move EIR/Adv Data functions to its own file") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/eir.c | 7 ++++--- net/bluetooth/eir.h | 2 +- net/bluetooth/hci_sync.c | 5 +++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c index 3e1713673ecc93..3f72111ba651f9 100644 --- a/net/bluetooth/eir.c +++ b/net/bluetooth/eir.c @@ -242,7 +242,7 @@ u8 eir_create_per_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) return ad_len; } -u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) +u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr, u8 size) { struct adv_info *adv = NULL; u8 ad_len = 0, flags = 0; @@ -286,7 +286,7 @@ u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) /* If flags would still be empty, then there is no need to * include the "Flags" AD field". */ - if (flags) { + if (flags && (ad_len + eir_precalc_len(1) <= size)) { ptr[0] = 0x02; ptr[1] = EIR_FLAGS; ptr[2] = flags; @@ -316,7 +316,8 @@ u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) } /* Provide Tx Power only if we can provide a valid value for it */ - if (adv_tx_power != HCI_TX_POWER_INVALID) { + if (adv_tx_power != HCI_TX_POWER_INVALID && + (ad_len + eir_precalc_len(1) <= size)) { ptr[0] = 0x02; ptr[1] = EIR_TX_POWER; ptr[2] = (u8)adv_tx_power; diff --git a/net/bluetooth/eir.h b/net/bluetooth/eir.h index 5c89a05e8b2905..9372db83f912fa 100644 --- a/net/bluetooth/eir.h +++ b/net/bluetooth/eir.h @@ -9,7 +9,7 @@ void eir_create(struct hci_dev *hdev, u8 *data); -u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr); +u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr, u8 size); u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr); u8 eir_create_per_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 8ba1c3aa7801a4..83de3847c8eaf7 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1822,7 +1822,8 @@ static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance) return 0; } - len = eir_create_adv_data(hdev, instance, pdu->data); + len = eir_create_adv_data(hdev, instance, pdu->data, + HCI_MAX_EXT_AD_LENGTH); pdu->length = len; pdu->handle = adv ? adv->handle : instance; @@ -1853,7 +1854,7 @@ static int hci_set_adv_data_sync(struct hci_dev *hdev, u8 instance) memset(&cp, 0, sizeof(cp)); - len = eir_create_adv_data(hdev, instance, cp.data); + len = eir_create_adv_data(hdev, instance, cp.data, sizeof(cp.data)); /* There's nothing to do if the data hasn't changed */ if (hdev->adv_data_len == len && From 6bf4723a19f133d853ea43e0f37bf266f591da43 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 11 Jun 2025 16:36:27 -0400 Subject: [PATCH 2764/2924] Bluetooth: MGMT: Fix sparse errors [ Upstream commit 7dd38ba4acbea9875b4ee061e20a26413e39d9f4 ] This fixes the following errors: net/bluetooth/mgmt.c:5400:59: sparse: sparse: incorrect type in argument 3 (different base types) @@ expected unsigned short [usertype] handle @@ got restricted __le16 [usertype] monitor_handle @@ net/bluetooth/mgmt.c:5400:59: sparse: expected unsigned short [usertype] handle net/bluetooth/mgmt.c:5400:59: sparse: got restricted __le16 [usertype] monitor_handle Fixes: e6ed54e86aae ("Bluetooth: MGMT: Fix UAF on mgmt_remove_adv_monitor_complete") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202506060347.ux2O1p7L-lkp@intel.com/ Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/mgmt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index de7adb9a47f972..d540f7b4f75fbf 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5102,11 +5102,11 @@ static void mgmt_adv_monitor_added(struct sock *sk, struct hci_dev *hdev, } static void mgmt_adv_monitor_removed(struct sock *sk, struct hci_dev *hdev, - u16 handle) + __le16 handle) { struct mgmt_ev_adv_monitor_removed ev; - ev.monitor_handle = cpu_to_le16(handle); + ev.monitor_handle = handle; mgmt_event(MGMT_EV_ADV_MONITOR_REMOVED, hdev, &ev, sizeof(ev), sk); } From abb9fb1a70d24988d2234838d4822b9676e3de94 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Tue, 10 Jun 2025 18:15:06 +0300 Subject: [PATCH 2765/2924] net/mlx5: Ensure fw pages are always allocated on same NUMA [ Upstream commit f37258133c1e95e61db532e14067e28b4881bf24 ] When firmware asks the driver to allocate more pages, using event of give_pages, the driver should always allocate it from same NUMA, the original device NUMA. Current code uses dev_to_node() which can result in different NUMA as it is changed by other driver flows, such as mlx5_dma_zalloc_coherent_node(). Instead, use saved numa node for allocating firmware pages. Fixes: 311c7c71c9bb ("net/mlx5e: Allocate DMA coherent memory on reader NUMA node") Signed-off-by: Moshe Shemesh Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250610151514.1094735-2-mbloch@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 972e8e9df585ba..9bc9bd83c2324c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -291,7 +291,7 @@ static void free_4k(struct mlx5_core_dev *dev, u64 addr, u32 function) static int alloc_system_page(struct mlx5_core_dev *dev, u32 function) { struct device *device = mlx5_core_dma_dev(dev); - int nid = dev_to_node(device); + int nid = dev->priv.numa_node; struct page *page; u64 zero_addr = 1; u64 addr; From 24db585d369f949f698e03d7d8017e5ae19d0497 Mon Sep 17 00:00:00 2001 From: Amir Tzin Date: Tue, 10 Jun 2025 18:15:07 +0300 Subject: [PATCH 2766/2924] net/mlx5: Fix ECVF vports unload on shutdown flow [ Upstream commit 687560d8a9a2d654829ad0da1ec24242f1de711d ] Fix shutdown flow UAF when a virtual function is created on the embedded chip (ECVF) of a BlueField device. In such case the vport acl ingress table is not properly destroyed. ECVF functionality is independent of ecpf_vport_exists capability and thus functions mlx5_eswitch_(enable|disable)_pf_vf_vports() should not test it when enabling/disabling ECVF vports. kernel log: [] refcount_t: underflow; use-after-free. [] WARNING: CPU: 3 PID: 1 at lib/refcount.c:28 refcount_warn_saturate+0x124/0x220 ---------------- [] Call trace: [] refcount_warn_saturate+0x124/0x220 [] tree_put_node+0x164/0x1e0 [mlx5_core] [] mlx5_destroy_flow_table+0x98/0x2c0 [mlx5_core] [] esw_acl_ingress_table_destroy+0x28/0x40 [mlx5_core] [] esw_acl_ingress_lgcy_cleanup+0x80/0xf4 [mlx5_core] [] esw_legacy_vport_acl_cleanup+0x44/0x60 [mlx5_core] [] esw_vport_cleanup+0x64/0x90 [mlx5_core] [] mlx5_esw_vport_disable+0xc0/0x1d0 [mlx5_core] [] mlx5_eswitch_unload_ec_vf_vports+0xcc/0x150 [mlx5_core] [] mlx5_eswitch_disable_sriov+0x198/0x2a0 [mlx5_core] [] mlx5_device_disable_sriov+0xb8/0x1e0 [mlx5_core] [] mlx5_sriov_detach+0x40/0x50 [mlx5_core] [] mlx5_unload+0x40/0xc4 [mlx5_core] [] mlx5_unload_one_devl_locked+0x6c/0xe4 [mlx5_core] [] mlx5_unload_one+0x3c/0x60 [mlx5_core] [] shutdown+0x7c/0xa4 [mlx5_core] [] pci_device_shutdown+0x3c/0xa0 [] device_shutdown+0x170/0x340 [] __do_sys_reboot+0x1f4/0x2a0 [] __arm64_sys_reboot+0x2c/0x40 [] invoke_syscall+0x78/0x100 [] el0_svc_common.constprop.0+0x54/0x184 [] do_el0_svc+0x30/0xac [] el0_svc+0x48/0x160 [] el0t_64_sync_handler+0xa4/0x12c [] el0t_64_sync+0x1a4/0x1a8 [] --[ end trace 9c4601d68c70030e ]--- Fixes: a7719b29a821 ("net/mlx5: Add management of EC VF vports") Reviewed-by: Daniel Jurgens Reviewed-by: Moshe Shemesh Signed-off-by: Amir Tzin Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250610151514.1094735-3-mbloch@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 7fb8a3381f849e..4917d185d0c352 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1295,12 +1295,15 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_ECPF, enabled_events); if (ret) goto ecpf_err; - if (mlx5_core_ec_sriov_enabled(esw->dev)) { - ret = mlx5_eswitch_load_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs, - enabled_events); - if (ret) - goto ec_vf_err; - } + } + + /* Enable ECVF vports */ + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + ret = mlx5_eswitch_load_ec_vf_vports(esw, + esw->esw_funcs.num_ec_vfs, + enabled_events); + if (ret) + goto ec_vf_err; } /* Enable VF vports */ @@ -1331,9 +1334,11 @@ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw) { mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); + if (mlx5_core_ec_sriov_enabled(esw->dev)) + mlx5_eswitch_unload_ec_vf_vports(esw, + esw->esw_funcs.num_ec_vfs); + if (mlx5_ecpf_vport_exists(esw->dev)) { - if (mlx5_core_ec_sriov_enabled(esw->dev)) - mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_vfs); mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_ECPF); } From ec4bcb7add9e4e91c239bf639a89e8fc4e54f296 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Tue, 10 Jun 2025 18:15:08 +0300 Subject: [PATCH 2767/2924] net/mlx5: Fix return value when searching for existing flow group [ Upstream commit 8ec40e3f1f72bf8f8accf18020d487caa99f46a4 ] When attempting to add a rule to an existing flow group, if a matching flow group exists but is not active, the error code returned should be EAGAIN, so that the rule can be added to the matching flow group once it is active, rather than ENOENT, which indicates that no matching flow group was found. Fixes: bd71b08ec2ee ("net/mlx5: Support multiple updates of steering rules in parallel") Signed-off-by: Gavi Teitz Signed-off-by: Roi Dayan Signed-off-by: Patrisious Haddad Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250610151514.1094735-4-mbloch@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 6163bc98d94a94..445301ea70426d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -2207,6 +2207,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, struct mlx5_flow_handle *rule; struct match_list *iter; bool take_write = false; + bool try_again = false; struct fs_fte *fte; u64 version = 0; int err; @@ -2271,6 +2272,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); if (!g->node.active) { + try_again = true; up_write_ref_node(&g->node, false); continue; } @@ -2292,7 +2294,8 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, tree_put_node(&fte->node, false); return rule; } - rule = ERR_PTR(-ENOENT); + err = try_again ? -EAGAIN : -ENOENT; + rule = ERR_PTR(err); out: kmem_cache_free(steering->ftes_cache, fte); return rule; From 84e4582bab4ce7bd75a676c147d18d8b1f609d80 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Tue, 10 Jun 2025 18:15:10 +0300 Subject: [PATCH 2768/2924] net/mlx5: HWS, fix missing ip_version handling in definer [ Upstream commit b5e3c76f35ee7e814c2469c73406c5bbf110d89c ] Fix missing field handling in definer - outer IP version. Fixes: 74a778b4a63f ("net/mlx5: HWS, added definers handling") Signed-off-by: Yevgeny Kliteynik Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250610151514.1094735-6-mbloch@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c index c8cc0c8115f537..293459458cc5f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c @@ -559,6 +559,9 @@ hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd, HWS_SET_HDR(fc, match_param, IP_PROTOCOL_O, outer_headers.ip_protocol, eth_l3_outer.protocol_next_header); + HWS_SET_HDR(fc, match_param, IP_VERSION_O, + outer_headers.ip_version, + eth_l3_outer.ip_version); HWS_SET_HDR(fc, match_param, IP_TTL_O, outer_headers.ttl_hoplimit, eth_l3_outer.time_to_live_hop_limit); From 82d42826bf55f7c33f4468fb44b2f9c7b03ee5c6 Mon Sep 17 00:00:00 2001 From: Vlad Dogaru Date: Tue, 10 Jun 2025 18:15:11 +0300 Subject: [PATCH 2769/2924] net/mlx5: HWS, make sure the uplink is the last destination [ Upstream commit b8335829518ec5988294280e37d735799209d70d ] When there are more than one destinations, we create a FW flow table and provide it with all the destinations. FW requires to have wire as the last destination in the list (if it exists), otherwise the operation fails with FW syndrome. This patch fixes the destination array action creation: if it contains a wire destination, it is moved to the end. Fixes: 504e536d9010 ("net/mlx5: HWS, added actions handling") Signed-off-by: Vlad Dogaru Reviewed-by: Yevgeny Kliteynik Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250610151514.1094735-7-mbloch@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../mellanox/mlx5/core/steering/hws/action.c | 14 +++++++------- .../mellanox/mlx5/core/steering/hws/fs_hws.c | 3 +++ .../mellanox/mlx5/core/steering/hws/mlx5hws.h | 1 + 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c index b5332c54d4fb0f..17b8a3beb11732 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c @@ -1361,8 +1361,8 @@ mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, struct mlx5hws_cmd_set_fte_attr fte_attr = {0}; struct mlx5hws_cmd_forward_tbl *fw_island; struct mlx5hws_action *action; - u32 i /*, packet_reformat_id*/; - int ret; + int ret, last_dest_idx = -1; + u32 i; if (num_dest <= 1) { mlx5hws_err(ctx, "Action must have multiple dests\n"); @@ -1392,11 +1392,8 @@ mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, dest_list[i].destination_id = dests[i].dest->dest_obj.obj_id; fte_attr.action_flags |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; fte_attr.ignore_flow_level = ignore_flow_level; - /* ToDo: In SW steering we have a handling of 'go to WIRE' - * destination here by upper layer setting 'is_wire_ft' flag - * if the destination is wire. - * This is because uplink should be last dest in the list. - */ + if (dests[i].is_wire_ft) + last_dest_idx = i; break; case MLX5HWS_ACTION_TYP_VPORT: dest_list[i].destination_type = MLX5_FLOW_DESTINATION_TYPE_VPORT; @@ -1420,6 +1417,9 @@ mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, } } + if (last_dest_idx != -1) + swap(dest_list[last_dest_idx], dest_list[num_dest - 1]); + fte_attr.dests_num = num_dest; fte_attr.dests = dest_list; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c index 1b787cd66e6fd3..29c5e00af1aa06 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c @@ -966,6 +966,9 @@ static int mlx5_fs_fte_get_hws_actions(struct mlx5_flow_root_namespace *ns, switch (attr->type) { case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: dest_action = mlx5_fs_get_dest_action_ft(fs_ctx, dst); + if (dst->dest_attr.ft->flags & + MLX5_FLOW_TABLE_UPLINK_VPORT) + dest_actions[num_dest_actions].is_wire_ft = true; break; case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM: dest_action = mlx5_fs_get_dest_action_table_num(fs_ctx, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h index 8ed8a715a2eb26..173f7ed1c17c3d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h @@ -211,6 +211,7 @@ struct mlx5hws_action_dest_attr { struct mlx5hws_action *dest; /* Optional reformat action */ struct mlx5hws_action *reformat; + bool is_wire_ft; }; /** From b74a34d3095db53095b1bb031b267affded9bf3e Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 10 Jun 2025 18:15:13 +0300 Subject: [PATCH 2770/2924] net/mlx5e: Fix leak of Geneve TLV option object [ Upstream commit aa9c44b842096c553871bc68a8cebc7861fa192b ] Previously, a unique tunnel id was added for the matching on TC non-zero chains, to support inner header rewrite with goto action. Later, it was used to support VF tunnel offload for vxlan, then for Geneve and GRE. To support VF tunnel, a temporary mlx5_flow_spec is used to parse tunnel options. For Geneve, if there is TLV option, a object is created, or refcnt is added if already exists. But the temporary mlx5_flow_spec is directly freed after parsing, which causes the leak because no information regarding the object is saved in flow's mlx5_flow_spec, which is used to free the object when deleting the flow. To fix the leak, call mlx5_geneve_tlv_option_del() before free the temporary spec if it has TLV object. Fixes: 521933cdc4aa ("net/mlx5e: Support Geneve and GRE with VF tunnel offload") Signed-off-by: Jianbo Liu Reviewed-by: Tariq Toukan Reviewed-by: Alex Lazar Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250610151514.1094735-9-mbloch@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index f1d908f611349f..fef418e1ed1a08 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2028,9 +2028,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, return err; } -static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow) +static bool mlx5_flow_has_geneve_opt(struct mlx5_flow_spec *spec) { - struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec; void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_3); @@ -2069,7 +2068,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, } complete_all(&flow->del_hw_done); - if (mlx5_flow_has_geneve_opt(flow)) + if (mlx5_flow_has_geneve_opt(&attr->parse_attr->spec)) mlx5_geneve_tlv_option_del(priv->mdev->geneve); if (flow->decap_route) @@ -2574,12 +2573,13 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level); if (err) { - kvfree(tmp_spec); NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes"); netdev_warn(priv->netdev, "Failed to parse tunnel attributes"); - return err; + } else { + err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec); } - err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec); + if (mlx5_flow_has_geneve_opt(tmp_spec)) + mlx5_geneve_tlv_option_del(priv->mdev->geneve); kvfree(tmp_spec); if (err) return err; From e2c67651b2030e0a039f8aca75e97bcf4664d56f Mon Sep 17 00:00:00 2001 From: Shahar Shitrit Date: Tue, 10 Jun 2025 18:15:14 +0300 Subject: [PATCH 2771/2924] net/mlx5e: Fix number of lanes to UNKNOWN when using data_rate_oper [ Upstream commit 875d7c160d60616ff06dedb70470ad199661efe7 ] When the link is up, either eth_proto_oper or ext_eth_proto_oper typically reports the active link protocol, from which both speed and number of lanes can be retrieved. However, in certain cases, such as when a NIC is connected via a non-standard cable, the firmware may not report the protocol. In such scenarios, the speed can still be obtained from the data_rate_oper field in PTYS register. Since data_rate_oper provides only speed information and lacks lane details, it is incorrect to derive the number of lanes from it. This patch corrects the behavior by setting the number of lanes to UNKNOWN instead of incorrectly using MAX_LANES when relying on data_rate_oper. Fixes: 7e959797f021 ("net/mlx5e: Enable lanes configuration when auto-negotiation is off") Signed-off-by: Shahar Shitrit Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250610151514.1094735-10-mbloch@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index fdf9e9bb99ace6..6253ea4e99a44f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -43,7 +43,6 @@ #include "en/fs_ethtool.h" #define LANES_UNKNOWN 0 -#define MAX_LANES 8 void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, struct ethtool_drvinfo *drvinfo) @@ -1098,10 +1097,8 @@ static void get_link_properties(struct net_device *netdev, speed = info->speed; lanes = info->lanes; duplex = DUPLEX_FULL; - } else if (data_rate_oper) { + } else if (data_rate_oper) speed = 100 * data_rate_oper; - lanes = MAX_LANES; - } out: link_ksettings->base.duplex = duplex; From 6cfc38a8bf5d1b808c72b88ea0619962378303d6 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Fri, 6 Jun 2025 11:43:20 +0200 Subject: [PATCH 2772/2924] net: phy: phy_caps: Don't skip better duplex macth on non-exact match [ Upstream commit d4e6cb324dcc952618fec6b25aa3fc7bfc2750b4 ] When performing a non-exact phy_caps lookup, we are looking for a supported mode that matches as closely as possible the passed speed/duplex. Blamed patch broke that logic by returning a match too early in case the caller asks for half-duplex, as a full-duplex linkmode may match first, and returned as a non-exact match without even trying to mach on half-duplex modes. Reported-by: Jijie Shao Closes: https://lore.kernel.org/netdev/20250603102500.4ec743cf@fedora/T/#m22ed60ca635c67dc7d9cbb47e8995b2beb5c1576 Tested-by: Jijie Shao Reviewed-by: Larysa Zaremba Fixes: fc81e257d19f ("net: phy: phy_caps: Allow looking-up link caps based on speed and duplex") Signed-off-by: Maxime Chevallier Link: https://patch.msgid.link/20250606094321.483602-1-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/phy/phy_caps.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/phy/phy_caps.c b/drivers/net/phy/phy_caps.c index 7033216897264e..38417e2886118c 100644 --- a/drivers/net/phy/phy_caps.c +++ b/drivers/net/phy/phy_caps.c @@ -188,6 +188,9 @@ phy_caps_lookup_by_linkmode_rev(const unsigned long *linkmodes, bool fdx_only) * When @exact is not set, we return either an exact match, or matching capabilities * at lower speed, or the lowest matching speed, or NULL. * + * Non-exact matches will try to return an exact speed and duplex match, but may + * return matching capabilities with same speed but a different duplex. + * * Returns: a matched link_capabilities according to the above process, NULL * otherwise. */ @@ -195,7 +198,7 @@ const struct link_capabilities * phy_caps_lookup(int speed, unsigned int duplex, const unsigned long *supported, bool exact) { - const struct link_capabilities *lcap, *last = NULL; + const struct link_capabilities *lcap, *match = NULL, *last = NULL; for_each_link_caps_desc_speed(lcap) { if (linkmode_intersects(lcap->linkmodes, supported)) { @@ -204,16 +207,19 @@ phy_caps_lookup(int speed, unsigned int duplex, const unsigned long *supported, if (lcap->speed == speed && lcap->duplex == duplex) { return lcap; } else if (!exact) { - if (lcap->speed <= speed) - return lcap; + if (!match && lcap->speed <= speed) + match = lcap; + + if (lcap->speed < speed) + break; } } } - if (!exact) - return last; + if (!match && !exact) + match = last; - return NULL; + return match; } EXPORT_SYMBOL_GPL(phy_caps_lookup); From 93f9eeb678d4c9c1abf720b3615fa8299a490845 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Jun 2025 11:15:11 +0000 Subject: [PATCH 2773/2924] net_sched: prio: fix a race in prio_tune() [ Upstream commit d35acc1be3480505b5931f17e4ea9b7617fea4d3 ] Gerrard Tai reported a race condition in PRIO, whenever SFQ perturb timer fires at the wrong time. The race is as follows: CPU 0 CPU 1 [1]: lock root [2]: qdisc_tree_flush_backlog() [3]: unlock root | | [5]: lock root | [6]: rehash | [7]: qdisc_tree_reduce_backlog() | [4]: qdisc_put() This can be abused to underflow a parent's qlen. Calling qdisc_purge_queue() instead of qdisc_tree_flush_backlog() should fix the race, because all packets will be purged from the qdisc before releasing the lock. Fixes: 7b8e0b6e6599 ("net: sched: prio: delay destroying child qdiscs on change") Reported-by: Gerrard Tai Suggested-by: Gerrard Tai Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250611111515.1983366-2-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/sch_prio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index cc30f7a32f1a78..9e2b9a490db23d 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -211,7 +211,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt, memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); for (i = q->bands; i < oldbands; i++) - qdisc_tree_flush_backlog(q->queues[i]); + qdisc_purge_queue(q->queues[i]); for (i = oldbands; i < q->bands; i++) { q->queues[i] = queues[i]; From 444ad445df5496a785705019268a8a84b84484bb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Jun 2025 11:15:12 +0000 Subject: [PATCH 2774/2924] net_sched: red: fix a race in __red_change() [ Upstream commit 85a3e0ede38450ea3053b8c45d28cf55208409b8 ] Gerrard Tai reported a race condition in RED, whenever SFQ perturb timer fires at the wrong time. The race is as follows: CPU 0 CPU 1 [1]: lock root [2]: qdisc_tree_flush_backlog() [3]: unlock root | | [5]: lock root | [6]: rehash | [7]: qdisc_tree_reduce_backlog() | [4]: qdisc_put() This can be abused to underflow a parent's qlen. Calling qdisc_purge_queue() instead of qdisc_tree_flush_backlog() should fix the race, because all packets will be purged from the qdisc before releasing the lock. Fixes: 0c8d13ac9607 ("net: sched: red: delay destroying child qdisc on replace") Reported-by: Gerrard Tai Suggested-by: Gerrard Tai Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250611111515.1983366-3-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/sch_red.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 1ba3e0bba54f0c..4696c893cf553c 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -285,7 +285,7 @@ static int __red_change(struct Qdisc *sch, struct nlattr **tb, q->userbits = userbits; q->limit = ctl->limit; if (child) { - qdisc_tree_flush_backlog(q->qdisc); + qdisc_purge_queue(q->qdisc); old_child = q->qdisc; q->qdisc = child; } From 6e82433ca8f996ef3b79983f645690202d7da482 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Jun 2025 11:15:13 +0000 Subject: [PATCH 2775/2924] net_sched: tbf: fix a race in tbf_change() [ Upstream commit 43eb466041216d25dedaef1c383ad7bd89929cbc ] Gerrard Tai reported a race condition in TBF, whenever SFQ perturb timer fires at the wrong time. The race is as follows: CPU 0 CPU 1 [1]: lock root [2]: qdisc_tree_flush_backlog() [3]: unlock root | | [5]: lock root | [6]: rehash | [7]: qdisc_tree_reduce_backlog() | [4]: qdisc_put() This can be abused to underflow a parent's qlen. Calling qdisc_purge_queue() instead of qdisc_tree_flush_backlog() should fix the race, because all packets will be purged from the qdisc before releasing the lock. Fixes: b05972f01e7d ("net: sched: tbf: don't call qdisc_put() while holding tree lock") Reported-by: Gerrard Tai Suggested-by: Gerrard Tai Signed-off-by: Eric Dumazet Cc: Zhengchao Shao Link: https://patch.msgid.link/20250611111515.1983366-4-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/sch_tbf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index dc26b22d53c734..4c977f049670a6 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -452,7 +452,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, sch_tree_lock(sch); if (child) { - qdisc_tree_flush_backlog(q->qdisc); + qdisc_purge_queue(q->qdisc); old = q->qdisc; q->qdisc = child; } From fed94bd51d62d2e0e006aa61480e94e5cd0582b0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Jun 2025 11:15:14 +0000 Subject: [PATCH 2776/2924] net_sched: ets: fix a race in ets_qdisc_change() [ Upstream commit d92adacdd8c2960be856e0b82acc5b7c5395fddb ] Gerrard Tai reported a race condition in ETS, whenever SFQ perturb timer fires at the wrong time. The race is as follows: CPU 0 CPU 1 [1]: lock root [2]: qdisc_tree_flush_backlog() [3]: unlock root | | [5]: lock root | [6]: rehash | [7]: qdisc_tree_reduce_backlog() | [4]: qdisc_put() This can be abused to underflow a parent's qlen. Calling qdisc_purge_queue() instead of qdisc_tree_flush_backlog() should fix the race, because all packets will be purged from the qdisc before releasing the lock. Fixes: b05972f01e7d ("net: sched: tbf: don't call qdisc_put() while holding tree lock") Reported-by: Gerrard Tai Suggested-by: Gerrard Tai Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250611111515.1983366-5-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/sch_ets.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index 2c069f0181c62b..037f764822b965 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -661,7 +661,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, for (i = q->nbands; i < oldbands; i++) { if (i >= q->nstrict && q->classes[i].qdisc->q.qlen) list_del_init(&q->classes[i].alist); - qdisc_tree_flush_backlog(q->classes[i].qdisc); + qdisc_purge_queue(q->classes[i].qdisc); } WRITE_ONCE(q->nstrict, nstrict); memcpy(q->prio2band, priomap, sizeof(priomap)); From 6837dd877270c57689bd866de9f3de14172c2439 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 11 Jun 2025 10:46:43 -0700 Subject: [PATCH 2777/2924] net: drv: netdevsim: don't napi_complete() from netpoll [ Upstream commit 1264971017b4d7141352a7fe29021bdfce5d885d ] netdevsim supports netpoll. Make sure we don't call napi_complete() from it, since it may not be scheduled. Breno reports hitting a warning in napi_complete_done(): WARNING: CPU: 14 PID: 104 at net/core/dev.c:6592 napi_complete_done+0x2cc/0x560 __napi_poll+0x2d8/0x3a0 handle_softirqs+0x1fe/0x710 This is presumably after netpoll stole the SCHED bit prematurely. Reported-by: Breno Leitao Fixes: 3762ec05a9fb ("netdevsim: add NAPI support") Tested-by: Breno Leitao Link: https://patch.msgid.link/20250611174643.2769263-1-kuba@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/netdevsim/netdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 0e0321a7ddd710..31a06e71be25bb 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -369,7 +369,8 @@ static int nsim_poll(struct napi_struct *napi, int budget) int done; done = nsim_rcv(rq, budget); - napi_complete(napi); + if (done < budget) + napi_complete_done(napi, done); return done; } From bdb0bd6e4fef56d958e9e27455cebff423fa9c73 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 12 Jun 2025 10:19:57 +0300 Subject: [PATCH 2778/2924] net: ethtool: Don't check if RSS context exists in case of context 0 [ Upstream commit d78ebc772c7ceccf6e655ddb93099f49a1268af4 ] Context 0 (default context) always exists, there is no need to check whether it exists or not when adding a flow steering rule. The existing check fails when creating a flow steering rule for context 0 as it is not stored in the rss_ctx xarray. For example: $ ethtool --config-ntuple eth2 flow-type tcp4 dst-ip 194.237.147.23 dst-port 19983 context 0 loc 618 rmgr: Cannot insert RX class rule: Invalid argument Cannot insert classification rule An example usecase for this could be: - A high-priority rule (loc 0) directing specific port traffic to context 0. - A low-priority rule (loc 1) directing all other TCP traffic to context 1. This is a user-visible regression that was caught in our testing environment, it was not reported by a user yet. Fixes: de7f7582dff2 ("net: ethtool: prevent flow steering to RSS contexts which don't exist") Reviewed-by: Tariq Toukan Reviewed-by: Nimrod Oren Signed-off-by: Gal Pressman Reviewed-by: Joe Damato Reviewed-by: Edward Cree Link: https://patch.msgid.link/20250612071958.1696361-2-gal@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ethtool/ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 8262cc10f98db7..4b1badeebc741c 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1001,7 +1001,8 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, ethtool_get_flow_spec_ring(info.fs.ring_cookie)) return -EINVAL; - if (!xa_load(&dev->ethtool->rss_ctx, info.rss_context)) + if (info.rss_context && + !xa_load(&dev->ethtool->rss_ctx, info.rss_context)) return -EINVAL; } From 857e6b8b0d27350ed1c4d457a10382649e41df16 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 4 Jun 2025 08:03:05 -0700 Subject: [PATCH 2779/2924] drm/xe/lrc: Use a temporary buffer for WA BB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9c7632faad434c98f1f2cc06f3647a5a5d05ddbf ] In case the BO is in iomem, we can't simply take the vaddr and write to it. Instead, prepare a separate buffer that is later copied into io memory. Right now it's just a few words that could be using xe_map_write32(), but the intention is to grow the WA BB for other uses. Fixes: 617d824c5323 ("drm/xe: Add WA BB to capture active context utilization") Cc: Umesh Nerlige Ramappa Cc: Tvrtko Ursulin Reviewed-by: Matthew Brost Reviewed-by: Umesh Nerlige Ramappa Link: https://lore.kernel.org/r/20250604-wa-bb-fix-v1-1-0dfc5dafcef0@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit ef48715b2d3df17c060e23b9aa636af3d95652f8) Signed-off-by: Thomas Hellström Signed-off-by: Sasha Levin --- drivers/gpu/drm/xe/xe_lrc.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 03bfba696b3789..16e20b5ad325f9 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -941,11 +941,18 @@ static void xe_lrc_finish(struct xe_lrc *lrc) * store it in the PPHSWP. */ #define CONTEXT_ACTIVE 1ULL -static void xe_lrc_setup_utilization(struct xe_lrc *lrc) +static int xe_lrc_setup_utilization(struct xe_lrc *lrc) { - u32 *cmd; + u32 *cmd, *buf = NULL; - cmd = lrc->bb_per_ctx_bo->vmap.vaddr; + if (lrc->bb_per_ctx_bo->vmap.is_iomem) { + buf = kmalloc(lrc->bb_per_ctx_bo->size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + cmd = buf; + } else { + cmd = lrc->bb_per_ctx_bo->vmap.vaddr; + } *cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; *cmd++ = ENGINE_ID(0).addr; @@ -966,9 +973,16 @@ static void xe_lrc_setup_utilization(struct xe_lrc *lrc) *cmd++ = MI_BATCH_BUFFER_END; + if (buf) { + xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bb_per_ctx_bo->vmap, 0, + buf, (cmd - buf) * sizeof(*cmd)); + kfree(buf); + } + xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1); + return 0; } #define PVC_CTX_ASID (0x2e + 1) @@ -1123,7 +1137,9 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, map = __xe_lrc_start_seqno_map(lrc); xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); - xe_lrc_setup_utilization(lrc); + err = xe_lrc_setup_utilization(lrc); + if (err) + goto err_lrc_finish; return 0; From 8d9d32088e304e2bc444a3087cab0bbbd9951866 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 10 Apr 2025 17:11:14 +0100 Subject: [PATCH 2780/2924] btrfs: exit after state insertion failure at btrfs_convert_extent_bit() [ Upstream commit 3bf179e36da917c5d9bec71c714573ed1649b7c1 ] If insert_state() state failed it returns an error pointer and we call extent_io_tree_panic() which will trigger a BUG() call. However if CONFIG_BUG is disabled, which is an uncommon and exotic scenario, then we fallthrough and call cache_state() which will dereference the error pointer, resulting in an invalid memory access. So jump to the 'out' label after calling extent_io_tree_panic(), it also makes the code more clear besides dealing with the exotic scenario where CONFIG_BUG is disabled. Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/extent-io-tree.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c index 13de6af279e526..92cfde37b1d339 100644 --- a/fs/btrfs/extent-io-tree.c +++ b/fs/btrfs/extent-io-tree.c @@ -1456,6 +1456,7 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, if (IS_ERR(inserted_state)) { ret = PTR_ERR(inserted_state); extent_io_tree_panic(tree, prealloc, "insert", ret); + goto out; } cache_state(inserted_state, cached_state); if (inserted_state == prealloc) From e680aadc16f24cd8963d03ac4d51e9b0c3f54e5b Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Thu, 10 Apr 2025 19:45:27 +0800 Subject: [PATCH 2781/2924] fs/filesystems: Fix potential unsigned integer underflow in fs_name() [ Upstream commit 1363c134ade81e425873b410566e957fecebb261 ] fs_name() has @index as unsigned int, so there is underflow risk for operation '@index--'. Fix by breaking the for loop when '@index == 0' which is also more proper than '@index <= 0' for unsigned integer comparison. Signed-off-by: Zijun Hu Link: https://lore.kernel.org/20250410-fix_fs-v1-1-7c14ccc8ebaa@quicinc.com Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/filesystems.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/filesystems.c b/fs/filesystems.c index 58b9067b2391ce..95e5256821a534 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -156,15 +156,19 @@ static int fs_index(const char __user * __name) static int fs_name(unsigned int index, char __user * buf) { struct file_system_type * tmp; - int len, res; + int len, res = -EINVAL; read_lock(&file_systems_lock); - for (tmp = file_systems; tmp; tmp = tmp->next, index--) - if (index <= 0 && try_module_get(tmp->owner)) + for (tmp = file_systems; tmp; tmp = tmp->next, index--) { + if (index == 0) { + if (try_module_get(tmp->owner)) + res = 0; break; + } + } read_unlock(&file_systems_lock); - if (!tmp) - return -EINVAL; + if (res) + return res; /* OK, we got the reference, so we can safely block */ len = strlen(tmp->name) + 1; From effdcec367669b151f6077f046b4d2c3203a7ec0 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 20 Mar 2025 16:05:50 +0000 Subject: [PATCH 2782/2924] btrfs: fix fsync of files with no hard links not persisting deletion [ Upstream commit 5e85262e542d6da8898bb8563a724ad98f6fc936 ] If we fsync a file (or directory) that has no more hard links, because while a process had a file descriptor open on it, the file's last hard link was removed and then the process did an fsync against the file descriptor, after a power failure or crash the file still exists after replaying the log. This behaviour is incorrect since once an inode has no more hard links it's not accessible anymore and we insert an orphan item into its subvolume's tree so that the deletion of all its items is not missed in case of a power failure or crash. So after log replay the file shouldn't exist anymore, which is also the behaviour on ext4, xfs, f2fs and other filesystems. Fix this by not ignoring inodes with zero hard links at btrfs_log_inode_parent() and by committing an inode's delayed inode when we are not doing a fast fsync (either BTRFS_INODE_COPY_EVERYTHING or BTRFS_INODE_NEEDS_FULL_SYNC is set in the inode's runtime flags). This last step is necessary because when removing the last hard link we don't delete the corresponding ref (or extref) item, instead we record the change in the inode's delayed inode with the BTRFS_DELAYED_NODE_DEL_IREF flag, so that when the delayed inode is committed we delete the ref/extref item from the inode's subvolume tree - otherwise the logging code will log the last hard link and therefore upon log replay the inode is not deleted. The base code for a fstests test case that reproduces this bug is the following: . ./common/dmflakey _require_scratch _require_dm_target flakey _require_mknod _scratch_mkfs >>$seqres.full 2>&1 || _fail "mkfs failed" _require_metadata_journaling $SCRATCH_DEV _init_flakey _mount_flakey touch $SCRATCH_MNT/foo # Commit the current transaction and persist the file. _scratch_sync # A fifo to communicate with a background xfs_io process that will # fsync the file after we deleted its hard link while it's open by # xfs_io. mkfifo $SCRATCH_MNT/fifo tail -f $SCRATCH_MNT/fifo | \ $XFS_IO_PROG $SCRATCH_MNT/foo >>$seqres.full & XFS_IO_PID=$! # Give some time for the xfs_io process to open a file descriptor for # the file. sleep 1 # Now while the file is open by the xfs_io process, delete its only # hard link. rm -f $SCRATCH_MNT/foo # Now that it has no more hard links, make the xfs_io process fsync it. echo "fsync" > $SCRATCH_MNT/fifo # Terminate the xfs_io process so that we can unmount. echo "quit" > $SCRATCH_MNT/fifo wait $XFS_IO_PID unset XFS_IO_PID # Simulate a power failure and then mount again the filesystem to # replay the journal/log. _flakey_drop_and_remount # We don't expect the file to exist anymore, since it was fsynced when # it had no more hard links. [ -f $SCRATCH_MNT/foo ] && echo "file foo still exists" _unmount_flakey # success, all done echo "Silence is golden" status=0 exit A test case for fstests will be submitted soon. Reviewed-by: Boris Burkov Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/tree-log.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 90dc094cfa5e5a..f5af11565b8760 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -6583,6 +6583,19 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, btrfs_log_get_delayed_items(inode, &delayed_ins_list, &delayed_del_list); + /* + * If we are fsyncing a file with 0 hard links, then commit the delayed + * inode because the last inode ref (or extref) item may still be in the + * subvolume tree and if we log it the file will still exist after a log + * replay. So commit the delayed inode to delete that last ref and we + * skip logging it. + */ + if (inode->vfs_inode.i_nlink == 0) { + ret = btrfs_commit_inode_delayed_inode(inode); + if (ret) + goto out_unlock; + } + ret = copy_inode_items_to_log(trans, inode, &min_key, &max_key, path, dst_path, logged_isize, inode_only, ctx, @@ -7051,14 +7064,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, if (btrfs_root_generation(&root->root_item) == trans->transid) return BTRFS_LOG_FORCE_COMMIT; - /* - * Skip already logged inodes or inodes corresponding to tmpfiles - * (since logging them is pointless, a link count of 0 means they - * will never be accessible). - */ - if ((btrfs_inode_in_log(inode, trans->transid) && - list_empty(&ctx->ordered_extents)) || - inode->vfs_inode.i_nlink == 0) + /* Skip already logged inodes and without new extents. */ + if (btrfs_inode_in_log(inode, trans->transid) && + list_empty(&ctx->ordered_extents)) return BTRFS_NO_LOG_SYNC; ret = start_log_trans(trans, root, ctx); From 2df944842ecdaa72b5d9124247f3b0de030ae06a Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 4 Apr 2025 21:02:28 +0200 Subject: [PATCH 2783/2924] gfs2: pass through holder from the VFS for freeze/thaw [ Upstream commit 62a2175ddf7e72941868f164b7c1f92e00f213bd ] The filesystem's freeze/thaw functions can be called from contexts where the holder isn't userspace but the kernel, e.g., during systemd suspend/hibernate. So pass through the freeze/thaw flags from the VFS instead of hard-coding them. Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/gfs2/super.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 54f1efd47a3e5f..0bd7827e6371e2 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -674,7 +674,7 @@ static int gfs2_sync_fs(struct super_block *sb, int wait) return sdp->sd_log_error; } -static int gfs2_do_thaw(struct gfs2_sbd *sdp) +static int gfs2_do_thaw(struct gfs2_sbd *sdp, enum freeze_holder who) { struct super_block *sb = sdp->sd_vfs; int error; @@ -682,7 +682,7 @@ static int gfs2_do_thaw(struct gfs2_sbd *sdp) error = gfs2_freeze_lock_shared(sdp); if (error) goto fail; - error = thaw_super(sb, FREEZE_HOLDER_USERSPACE); + error = thaw_super(sb, who); if (!error) return 0; @@ -710,7 +710,7 @@ void gfs2_freeze_func(struct work_struct *work) gfs2_freeze_unlock(sdp); set_bit(SDF_FROZEN, &sdp->sd_flags); - error = gfs2_do_thaw(sdp); + error = gfs2_do_thaw(sdp, FREEZE_HOLDER_USERSPACE); if (error) goto out; @@ -728,6 +728,7 @@ void gfs2_freeze_func(struct work_struct *work) /** * gfs2_freeze_super - prevent further writes to the filesystem * @sb: the VFS structure for the filesystem + * @who: freeze flags * */ @@ -744,7 +745,7 @@ static int gfs2_freeze_super(struct super_block *sb, enum freeze_holder who) } for (;;) { - error = freeze_super(sb, FREEZE_HOLDER_USERSPACE); + error = freeze_super(sb, who); if (error) { fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n", error); @@ -758,7 +759,7 @@ static int gfs2_freeze_super(struct super_block *sb, enum freeze_holder who) break; } - error = gfs2_do_thaw(sdp); + error = gfs2_do_thaw(sdp, who); if (error) goto out; @@ -796,6 +797,7 @@ static int gfs2_freeze_fs(struct super_block *sb) /** * gfs2_thaw_super - reallow writes to the filesystem * @sb: the VFS structure for the filesystem + * @who: freeze flags * */ @@ -814,7 +816,7 @@ static int gfs2_thaw_super(struct super_block *sb, enum freeze_holder who) atomic_inc(&sb->s_active); gfs2_freeze_unlock(sdp); - error = gfs2_do_thaw(sdp); + error = gfs2_do_thaw(sdp, who); if (!error) { clear_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags); From e41b74ffc7cd6607ed8506b8b53b2eba29209658 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 16 Apr 2025 16:00:28 +0100 Subject: [PATCH 2784/2924] btrfs: exit after state split error at set_extent_bit() [ Upstream commit 41d69d4d78d8b179bf3bcdfc56d28a12b3a608d2 ] If split_state() returned an error we call extent_io_tree_panic() which will trigger a BUG() call. However if CONFIG_BUG is disabled, which is an uncommon and exotic scenario, then we fallthrough and hit a use after free when calling set_state_bits() since the extent state record which the local variable 'prealloc' points to was freed by split_state(). So jump to the label 'out' after calling extent_io_tree_panic() and set the 'prealloc' pointer to NULL since split_state() has already freed it when it hit an error. Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/extent-io-tree.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c index 92cfde37b1d339..b5b44ea91f9996 100644 --- a/fs/btrfs/extent-io-tree.c +++ b/fs/btrfs/extent-io-tree.c @@ -1252,8 +1252,11 @@ static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, if (!prealloc) goto search_again; ret = split_state(tree, state, prealloc, end + 1); - if (ret) + if (ret) { extent_io_tree_panic(tree, state, "split", ret); + prealloc = NULL; + goto out; + } set_state_bits(tree, prealloc, bits, changeset); cache_state(prealloc, cached_state); From 87457ff0c4f198ab6db3f225b4be8963497ffaae Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 7 May 2025 14:23:03 +0200 Subject: [PATCH 2785/2924] nvmet-fcloop: access fcpreq only when holding reqlock [ Upstream commit 47a827cd7929d0550c3496d70b417fcb5649b27b ] The abort handling logic expects that the state and the fcpreq are only accessed when holding the reqlock lock. While at it, only handle the aborts in the abort handler. Signed-off-by: Daniel Wagner Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/target/fcloop.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 641201e62c1baf..20becea1ad9683 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -618,12 +618,13 @@ fcloop_fcp_recv_work(struct work_struct *work) { struct fcloop_fcpreq *tfcp_req = container_of(work, struct fcloop_fcpreq, fcp_rcv_work); - struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; + struct nvmefc_fcp_req *fcpreq; unsigned long flags; int ret = 0; bool aborted = false; spin_lock_irqsave(&tfcp_req->reqlock, flags); + fcpreq = tfcp_req->fcpreq; switch (tfcp_req->inistate) { case INI_IO_START: tfcp_req->inistate = INI_IO_ACTIVE; @@ -638,16 +639,19 @@ fcloop_fcp_recv_work(struct work_struct *work) } spin_unlock_irqrestore(&tfcp_req->reqlock, flags); - if (unlikely(aborted)) - ret = -ECANCELED; - else { - if (likely(!check_for_drop(tfcp_req))) - ret = nvmet_fc_rcv_fcp_req(tfcp_req->tport->targetport, - &tfcp_req->tgt_fcp_req, - fcpreq->cmdaddr, fcpreq->cmdlen); - else - pr_info("%s: dropped command ********\n", __func__); + if (unlikely(aborted)) { + /* the abort handler will call fcloop_call_host_done */ + return; + } + + if (unlikely(check_for_drop(tfcp_req))) { + pr_info("%s: dropped command ********\n", __func__); + return; } + + ret = nvmet_fc_rcv_fcp_req(tfcp_req->tport->targetport, + &tfcp_req->tgt_fcp_req, + fcpreq->cmdaddr, fcpreq->cmdlen); if (ret) fcloop_call_host_done(fcpreq, tfcp_req, ret); } @@ -662,9 +666,10 @@ fcloop_fcp_abort_recv_work(struct work_struct *work) unsigned long flags; spin_lock_irqsave(&tfcp_req->reqlock, flags); - fcpreq = tfcp_req->fcpreq; switch (tfcp_req->inistate) { case INI_IO_ABORTED: + fcpreq = tfcp_req->fcpreq; + tfcp_req->fcpreq = NULL; break; case INI_IO_COMPLETED: completed = true; @@ -686,10 +691,6 @@ fcloop_fcp_abort_recv_work(struct work_struct *work) nvmet_fc_rcv_fcp_abort(tfcp_req->tport->targetport, &tfcp_req->tgt_fcp_req); - spin_lock_irqsave(&tfcp_req->reqlock, flags); - tfcp_req->fcpreq = NULL; - spin_unlock_irqrestore(&tfcp_req->reqlock, flags); - fcloop_call_host_done(fcpreq, tfcp_req, -ECANCELED); /* call_host_done releases reference for abort downcall */ } From 78ef650ab3b7ae94894a5ce396b5a14a0987a9bd Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 9 May 2025 12:12:48 +0100 Subject: [PATCH 2786/2924] io_uring: fix spurious drain flushing [ Upstream commit fde04c7e2775feb0746301e0ef86a04d3598c3fe ] io_queue_deferred() is not tolerant to spurious calls not completing some requests. You can have an inflight drain-marked request and another request that came after and got queued into the drain list. Now, if io_queue_deferred() is called before the first request completes, it'll check the 2nd req with req_need_defer(), find that there is no drain flag set, and queue it for execution. To make io_queue_deferred() work, it should at least check sequences for the first request, and then we need also need to check if there is another drain request creating another bubble. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/972bde11b7d4ef25b3f5e3fd34f80e4d2aa345b8.1746788718.git.asml.silence@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- io_uring/io_uring.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index edda31a15c6e65..9266d4f2016ad3 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -537,18 +537,30 @@ void io_req_queue_iowq(struct io_kiocb *req) io_req_task_work_add(req); } +static bool io_drain_defer_seq(struct io_kiocb *req, u32 seq) +{ + struct io_ring_ctx *ctx = req->ctx; + + return seq + READ_ONCE(ctx->cq_extra) != ctx->cached_cq_tail; +} + static __cold noinline void io_queue_deferred(struct io_ring_ctx *ctx) { + bool drain_seen = false, first = true; + spin_lock(&ctx->completion_lock); while (!list_empty(&ctx->defer_list)) { struct io_defer_entry *de = list_first_entry(&ctx->defer_list, struct io_defer_entry, list); - if (req_need_defer(de->req, de->seq)) + drain_seen |= de->req->flags & REQ_F_IO_DRAIN; + if ((drain_seen || first) && io_drain_defer_seq(de->req, de->seq)) break; + list_del_init(&de->list); io_req_task_queue(de->req); kfree(de); + first = false; } spin_unlock(&ctx->completion_lock); } From f957ad4c6822196ce0fd5c6e0ae03961b6e089e5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 17 Jan 2025 10:54:50 +0100 Subject: [PATCH 2787/2924] perf: Ensure bpf_perf_link path is properly serialized [ Upstream commit 7ed9138a72829d2035ecbd8dbd35b1bc3c137c40 ] Ravi reported that the bpf_perf_link_attach() usage of perf_event_set_bpf_prog() is not serialized by ctx->mutex, unlike the PERF_EVENT_IOC_SET_BPF case. Reported-by: Ravi Bangoria Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ravi Bangoria Link: https://lkml.kernel.org/r/20250307193305.486326750@infradead.org Signed-off-by: Sasha Levin --- kernel/events/core.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 881d768e455642..e97bc9220fd1a8 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6239,6 +6239,9 @@ static int perf_event_set_output(struct perf_event *event, static int perf_event_set_filter(struct perf_event *event, void __user *arg); static int perf_copy_attr(struct perf_event_attr __user *uattr, struct perf_event_attr *attr); +static int __perf_event_set_bpf_prog(struct perf_event *event, + struct bpf_prog *prog, + u64 bpf_cookie); static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg) { @@ -6301,7 +6304,7 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon if (IS_ERR(prog)) return PTR_ERR(prog); - err = perf_event_set_bpf_prog(event, prog, 0); + err = __perf_event_set_bpf_prog(event, prog, 0); if (err) { bpf_prog_put(prog); return err; @@ -11069,8 +11072,9 @@ static inline bool perf_event_is_tracing(struct perf_event *event) return false; } -int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog, - u64 bpf_cookie) +static int __perf_event_set_bpf_prog(struct perf_event *event, + struct bpf_prog *prog, + u64 bpf_cookie) { bool is_kprobe, is_uprobe, is_tracepoint, is_syscall_tp; @@ -11108,6 +11112,20 @@ int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog, return perf_event_attach_bpf_prog(event, prog, bpf_cookie); } +int perf_event_set_bpf_prog(struct perf_event *event, + struct bpf_prog *prog, + u64 bpf_cookie) +{ + struct perf_event_context *ctx; + int ret; + + ctx = perf_event_ctx_lock(event); + ret = __perf_event_set_bpf_prog(event, prog, bpf_cookie); + perf_event_ctx_unlock(event, ctx); + + return ret; +} + void perf_event_free_bpf_prog(struct perf_event *event) { if (!event->prog) @@ -11130,7 +11148,15 @@ static void perf_event_free_filter(struct perf_event *event) { } -int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog, +static int __perf_event_set_bpf_prog(struct perf_event *event, + struct bpf_prog *prog, + u64 bpf_cookie) +{ + return -ENOENT; +} + +int perf_event_set_bpf_prog(struct perf_event *event, + struct bpf_prog *prog, u64 bpf_cookie) { return -ENOENT; From 6314f004f1e88d231c9234c943b3ac480b0423d9 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 5 May 2025 22:17:42 +0800 Subject: [PATCH 2788/2924] block: use q->elevator with ->elevator_lock held in elv_iosched_show() [ Upstream commit 94209d27d14104ed828ca88cd5403a99162fe51a ] Use q->elevator with ->elevator_lock held in elv_iosched_show(), since the local cached elevator reference may become stale after getting ->elevator_lock. Reviewed-by: Hannes Reinecke Reviewed-by: Nilay Shroff Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250505141805.2751237-5-ming.lei@redhat.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/elevator.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/block/elevator.c b/block/elevator.c index b4d08026b02cef..dc4cadef728e55 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -744,7 +744,6 @@ ssize_t elv_iosched_store(struct gendisk *disk, const char *buf, ssize_t elv_iosched_show(struct gendisk *disk, char *name) { struct request_queue *q = disk->queue; - struct elevator_queue *eq = q->elevator; struct elevator_type *cur = NULL, *e; int len = 0; @@ -753,7 +752,7 @@ ssize_t elv_iosched_show(struct gendisk *disk, char *name) len += sprintf(name+len, "[none] "); } else { len += sprintf(name+len, "none "); - cur = eq->type; + cur = q->elevator->type; } spin_lock(&elv_list_lock); From d0932758a0a77b38ba1b39564f3b7aba12407061 Mon Sep 17 00:00:00 2001 From: Penglei Jiang Date: Tue, 10 Jun 2025 10:18:01 -0700 Subject: [PATCH 2789/2924] io_uring: fix use-after-free of sq->thread in __io_uring_show_fdinfo() [ Upstream commit ac0b8b327a5677dc6fecdf353d808161525b1ff0 ] syzbot reports: BUG: KASAN: slab-use-after-free in getrusage+0x1109/0x1a60 Read of size 8 at addr ffff88810de2d2c8 by task a.out/304 CPU: 0 UID: 0 PID: 304 Comm: a.out Not tainted 6.16.0-rc1 #1 PREEMPT(voluntary) Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 Call Trace: dump_stack_lvl+0x53/0x70 print_report+0xd0/0x670 ? __pfx__raw_spin_lock_irqsave+0x10/0x10 ? getrusage+0x1109/0x1a60 kasan_report+0xce/0x100 ? getrusage+0x1109/0x1a60 getrusage+0x1109/0x1a60 ? __pfx_getrusage+0x10/0x10 __io_uring_show_fdinfo+0x9fe/0x1790 ? ksys_read+0xf7/0x1c0 ? do_syscall_64+0xa4/0x260 ? vsnprintf+0x591/0x1100 ? __pfx___io_uring_show_fdinfo+0x10/0x10 ? __pfx_vsnprintf+0x10/0x10 ? mutex_trylock+0xcf/0x130 ? __pfx_mutex_trylock+0x10/0x10 ? __pfx_show_fd_locks+0x10/0x10 ? io_uring_show_fdinfo+0x57/0x80 io_uring_show_fdinfo+0x57/0x80 seq_show+0x38c/0x690 seq_read_iter+0x3f7/0x1180 ? inode_set_ctime_current+0x160/0x4b0 seq_read+0x271/0x3e0 ? __pfx_seq_read+0x10/0x10 ? __pfx__raw_spin_lock+0x10/0x10 ? __mark_inode_dirty+0x402/0x810 ? selinux_file_permission+0x368/0x500 ? file_update_time+0x10f/0x160 vfs_read+0x177/0xa40 ? __pfx___handle_mm_fault+0x10/0x10 ? __pfx_vfs_read+0x10/0x10 ? mutex_lock+0x81/0xe0 ? __pfx_mutex_lock+0x10/0x10 ? fdget_pos+0x24d/0x4b0 ksys_read+0xf7/0x1c0 ? __pfx_ksys_read+0x10/0x10 ? do_user_addr_fault+0x43b/0x9c0 do_syscall_64+0xa4/0x260 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f0f74170fc9 Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 8 RSP: 002b:00007fffece049e8 EFLAGS: 00000206 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f0f74170fc9 RDX: 0000000000001000 RSI: 00007fffece049f0 RDI: 0000000000000004 RBP: 00007fffece05ad0 R08: 0000000000000000 R09: 00007fffece04d90 R10: 0000000000000000 R11: 0000000000000206 R12: 00005651720a1100 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 Allocated by task 298: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 __kasan_slab_alloc+0x6e/0x70 kmem_cache_alloc_node_noprof+0xe8/0x330 copy_process+0x376/0x5e00 create_io_thread+0xab/0xf0 io_sq_offload_create+0x9ed/0xf20 io_uring_setup+0x12b0/0x1cc0 do_syscall_64+0xa4/0x260 entry_SYSCALL_64_after_hwframe+0x77/0x7f Freed by task 22: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 kasan_save_free_info+0x3b/0x60 __kasan_slab_free+0x37/0x50 kmem_cache_free+0xc4/0x360 rcu_core+0x5ff/0x19f0 handle_softirqs+0x18c/0x530 run_ksoftirqd+0x20/0x30 smpboot_thread_fn+0x287/0x6c0 kthread+0x30d/0x630 ret_from_fork+0xef/0x1a0 ret_from_fork_asm+0x1a/0x30 Last potentially related work creation: kasan_save_stack+0x33/0x60 kasan_record_aux_stack+0x8c/0xa0 __call_rcu_common.constprop.0+0x68/0x940 __schedule+0xff2/0x2930 __cond_resched+0x4c/0x80 mutex_lock+0x5c/0xe0 io_uring_del_tctx_node+0xe1/0x2b0 io_uring_clean_tctx+0xb7/0x160 io_uring_cancel_generic+0x34e/0x760 do_exit+0x240/0x2350 do_group_exit+0xab/0x220 __x64_sys_exit_group+0x39/0x40 x64_sys_call+0x1243/0x1840 do_syscall_64+0xa4/0x260 entry_SYSCALL_64_after_hwframe+0x77/0x7f The buggy address belongs to the object at ffff88810de2cb00 which belongs to the cache task_struct of size 3712 The buggy address is located 1992 bytes inside of freed 3712-byte region [ffff88810de2cb00, ffff88810de2d980) which is caused by the task_struct pointed to by sq->thread being released while it is being used in the function __io_uring_show_fdinfo(). Holding ctx->uring_lock does not prevent ehre relase or exit of sq->thread. Fix this by assigning and looking up ->thread under RCU, and grabbing a reference to the task_struct. This ensures that it cannot get released while fdinfo is using it. Reported-by: syzbot+531502bbbe51d2f769f4@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/682b06a5.a70a0220.3849cf.00b3.GAE@google.com Fixes: 3fcb9d17206e ("io_uring/sqpoll: statistics of the true utilization of sq threads") Signed-off-by: Penglei Jiang Link: https://lore.kernel.org/r/20250610171801.70960-1-superman.xpt@gmail.com [axboe: massage commit message] Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- io_uring/fdinfo.c | 12 ++++++++++-- io_uring/sqpoll.c | 9 ++++----- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index e0d6a59a89fa1b..f948917f7f7071 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -172,18 +172,26 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) if (ctx->flags & IORING_SETUP_SQPOLL) { struct io_sq_data *sq = ctx->sq_data; + struct task_struct *tsk; + rcu_read_lock(); + tsk = rcu_dereference(sq->thread); /* * sq->thread might be NULL if we raced with the sqpoll * thread termination. */ - if (sq->thread) { + if (tsk) { + get_task_struct(tsk); + rcu_read_unlock(); + getrusage(tsk, RUSAGE_SELF, &sq_usage); + put_task_struct(tsk); sq_pid = sq->task_pid; sq_cpu = sq->sq_cpu; - getrusage(sq->thread, RUSAGE_SELF, &sq_usage); sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000 + sq_usage.ru_stime.tv_usec); sq_work_time = sq->work_time; + } else { + rcu_read_unlock(); } } diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index 03c699493b5ab6..0625a421626f4c 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -270,7 +270,8 @@ static int io_sq_thread(void *data) /* offload context creation failed, just exit */ if (!current->io_uring) { mutex_lock(&sqd->lock); - sqd->thread = NULL; + rcu_assign_pointer(sqd->thread, NULL); + put_task_struct(current); mutex_unlock(&sqd->lock); goto err_out; } @@ -379,7 +380,8 @@ static int io_sq_thread(void *data) io_sq_tw(&retry_list, UINT_MAX); io_uring_cancel_generic(true, sqd); - sqd->thread = NULL; + rcu_assign_pointer(sqd->thread, NULL); + put_task_struct(current); list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) atomic_or(IORING_SQ_NEED_WAKEUP, &ctx->rings->sq_flags); io_run_task_work(); @@ -495,9 +497,6 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx, ret = -EINVAL; goto err; } - - if (task_to_put) - put_task_struct(task_to_put); return 0; err_sqpoll: complete(&ctx->sq_data->exited); From 6ffae5d53f704d300cc73b06b4ea99e4507f7cf1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Jun 2025 06:44:16 +0200 Subject: [PATCH 2790/2924] block: don't use submit_bio_noacct_nocheck in blk_zone_wplug_bio_work [ Upstream commit cf625013d8741c01407bbb4a60c111b61b9fa69d ] Bios queued up in the zone write plug have already gone through all all preparation in the submit_bio path, including the freeze protection. Submitting them through submit_bio_noacct_nocheck duplicates the work and can can cause deadlocks when freezing a queue with pending bio write plugs. Go straight to ->submit_bio or blk_mq_submit_bio to bypass the superfluous extra freeze protection and checks. Fixes: 9b1ce7f0c6f8 ("block: Implement zone append emulation") Reported-by: Bart Van Assche Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Link: https://lore.kernel.org/r/20250611044416.2351850-1-hch@lst.de Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-zoned.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 8f15d1aa6eb89a..45c91016cef38a 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -1306,7 +1306,6 @@ static void blk_zone_wplug_bio_work(struct work_struct *work) spin_unlock_irqrestore(&zwplug->lock, flags); bdev = bio->bi_bdev; - submit_bio_noacct_nocheck(bio); /* * blk-mq devices will reuse the extra reference on the request queue @@ -1314,8 +1313,12 @@ static void blk_zone_wplug_bio_work(struct work_struct *work) * path for BIO-based devices will not do that. So drop this extra * reference here. */ - if (bdev_test_flag(bdev, BD_HAS_SUBMIT_BIO)) + if (bdev_test_flag(bdev, BD_HAS_SUBMIT_BIO)) { + bdev->bd_disk->fops->submit_bio(bio); blk_queue_exit(bdev->bd_disk->queue); + } else { + blk_mq_submit_bio(bio); + } put_zwplug: /* Drop the reference we took in disk_zone_wplug_schedule_bio_work(). */ From f73dfd9d7f4b48dd16cdef2d34b24c321926fcdb Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 11 Jun 2025 13:53:43 -0700 Subject: [PATCH 2791/2924] io_uring: consistently use rcu semantics with sqpoll thread [ Upstream commit c538f400fae22725580842deb2bef546701b64bd ] The sqpoll thread is dereferenced with rcu read protection in one place, so it needs to be annotated as an __rcu type, and should consistently use rcu helpers for access and assignment to make sparse happy. Since most of the accesses occur under the sqd->lock, we can use rcu_dereference_protected() without declaring an rcu read section. Provide a simple helper to get the thread from a locked context. Fixes: ac0b8b327a5677d ("io_uring: fix use-after-free of sq->thread in __io_uring_show_fdinfo()") Signed-off-by: Keith Busch Link: https://lore.kernel.org/r/20250611205343.1821117-1-kbusch@meta.com [axboe: fold in fix for register.c] Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- io_uring/io_uring.c | 4 ++-- io_uring/register.c | 7 +++++-- io_uring/sqpoll.c | 34 ++++++++++++++++++++++++---------- io_uring/sqpoll.h | 8 +++++++- 4 files changed, 38 insertions(+), 15 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 9266d4f2016ad3..e5466f65682699 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2913,7 +2913,7 @@ static __cold void io_ring_exit_work(struct work_struct *work) struct task_struct *tsk; io_sq_thread_park(sqd); - tsk = sqd->thread; + tsk = sqpoll_task_locked(sqd); if (tsk && tsk->io_uring && tsk->io_uring->io_wq) io_wq_cancel_cb(tsk->io_uring->io_wq, io_cancel_ctx_cb, ctx, true); @@ -3150,7 +3150,7 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd) s64 inflight; DEFINE_WAIT(wait); - WARN_ON_ONCE(sqd && sqd->thread != current); + WARN_ON_ONCE(sqd && sqpoll_task_locked(sqd) != current); if (!current->io_uring) return; diff --git a/io_uring/register.c b/io_uring/register.c index cc23a4c205cd43..a59589249fce7a 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -273,6 +273,8 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, if (ctx->flags & IORING_SETUP_SQPOLL) { sqd = ctx->sq_data; if (sqd) { + struct task_struct *tsk; + /* * Observe the correct sqd->lock -> ctx->uring_lock * ordering. Fine to drop uring_lock here, we hold @@ -282,8 +284,9 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, mutex_unlock(&ctx->uring_lock); mutex_lock(&sqd->lock); mutex_lock(&ctx->uring_lock); - if (sqd->thread) - tctx = sqd->thread->io_uring; + tsk = sqpoll_task_locked(sqd); + if (tsk) + tctx = tsk->io_uring; } } else { tctx = current->io_uring; diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index 0625a421626f4c..268d2fbe6160c2 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -30,7 +30,7 @@ enum { void io_sq_thread_unpark(struct io_sq_data *sqd) __releases(&sqd->lock) { - WARN_ON_ONCE(sqd->thread == current); + WARN_ON_ONCE(sqpoll_task_locked(sqd) == current); /* * Do the dance but not conditional clear_bit() because it'd race with @@ -46,24 +46,32 @@ void io_sq_thread_unpark(struct io_sq_data *sqd) void io_sq_thread_park(struct io_sq_data *sqd) __acquires(&sqd->lock) { - WARN_ON_ONCE(data_race(sqd->thread) == current); + struct task_struct *tsk; atomic_inc(&sqd->park_pending); set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); mutex_lock(&sqd->lock); - if (sqd->thread) - wake_up_process(sqd->thread); + + tsk = sqpoll_task_locked(sqd); + if (tsk) { + WARN_ON_ONCE(tsk == current); + wake_up_process(tsk); + } } void io_sq_thread_stop(struct io_sq_data *sqd) { - WARN_ON_ONCE(sqd->thread == current); + struct task_struct *tsk; + WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)); set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); mutex_lock(&sqd->lock); - if (sqd->thread) - wake_up_process(sqd->thread); + tsk = sqpoll_task_locked(sqd); + if (tsk) { + WARN_ON_ONCE(tsk == current); + wake_up_process(tsk); + } mutex_unlock(&sqd->lock); wait_for_completion(&sqd->exited); } @@ -486,7 +494,10 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx, goto err_sqpoll; } - sqd->thread = tsk; + mutex_lock(&sqd->lock); + rcu_assign_pointer(sqd->thread, tsk); + mutex_unlock(&sqd->lock); + task_to_put = get_task_struct(tsk); ret = io_uring_alloc_task_context(tsk, ctx); wake_up_new_task(tsk); @@ -514,10 +525,13 @@ __cold int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, int ret = -EINVAL; if (sqd) { + struct task_struct *tsk; + io_sq_thread_park(sqd); /* Don't set affinity for a dying thread */ - if (sqd->thread) - ret = io_wq_cpu_affinity(sqd->thread->io_uring, mask); + tsk = sqpoll_task_locked(sqd); + if (tsk) + ret = io_wq_cpu_affinity(tsk->io_uring, mask); io_sq_thread_unpark(sqd); } diff --git a/io_uring/sqpoll.h b/io_uring/sqpoll.h index 4171666b1cf4cc..b83dcdec9765fd 100644 --- a/io_uring/sqpoll.h +++ b/io_uring/sqpoll.h @@ -8,7 +8,7 @@ struct io_sq_data { /* ctx's that are using this sqd */ struct list_head ctx_list; - struct task_struct *thread; + struct task_struct __rcu *thread; struct wait_queue_head wait; unsigned sq_thread_idle; @@ -29,3 +29,9 @@ void io_sq_thread_unpark(struct io_sq_data *sqd); void io_put_sq_data(struct io_sq_data *sqd); void io_sqpoll_wait_sq(struct io_ring_ctx *ctx); int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, cpumask_var_t mask); + +static inline struct task_struct *sqpoll_task_locked(struct io_sq_data *sqd) +{ + return rcu_dereference_protected(sqd->thread, + lockdep_is_held(&sqd->lock)); +} From 112356b1804ce5659d327baf74f60c3b5b275dae Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Thu, 12 Jun 2025 12:45:04 -0300 Subject: [PATCH 2792/2924] smb: client: fix perf regression with deferred closes [ Upstream commit b64af6bcd3b0f3fc633d6a70adb0991737abfef4 ] Customer reported that one of their applications started failing to open files with STATUS_INSUFFICIENT_RESOURCES due to NetApp server hitting the maximum number of opens to same file that it would allow for a single client connection. It turned out the client was failing to reuse open handles with deferred closes because matching ->f_flags directly without masking off O_CREAT|O_EXCL|O_TRUNC bits first broke the comparision and then client ended up with thousands of deferred closes to same file. Those bits are already satisfied on the original open, so no need to check them against existing open handles. Reproducer: #include #include #include #include #include #include #define NR_THREADS 4 #define NR_ITERATIONS 2500 #define TEST_FILE "/mnt/1/test/dir/foo" static char buf[64]; static void *worker(void *arg) { int i, j; int fd; for (i = 0; i < NR_ITERATIONS; i++) { fd = open(TEST_FILE, O_WRONLY|O_CREAT|O_APPEND, 0666); for (j = 0; j < 16; j++) write(fd, buf, sizeof(buf)); close(fd); } } int main(int argc, char *argv[]) { pthread_t t[NR_THREADS]; int fd; int i; fd = open(TEST_FILE, O_WRONLY|O_CREAT|O_TRUNC, 0666); close(fd); memset(buf, 'a', sizeof(buf)); for (i = 0; i < NR_THREADS; i++) pthread_create(&t[i], NULL, worker, NULL); for (i = 0; i < NR_THREADS; i++) pthread_join(t[i], NULL); return 0; } Before patch: $ mount.cifs //srv/share /mnt/1 -o ... $ mkdir -p /mnt/1/test/dir $ gcc repro.c && ./a.out ... number of opens: 1391 After patch: $ mount.cifs //srv/share /mnt/1 -o ... $ mkdir -p /mnt/1/test/dir $ gcc repro.c && ./a.out ... number of opens: 1 Cc: linux-cifs@vger.kernel.org Cc: David Howells Cc: Jay Shin Cc: Pierguido Lambri Fixes: b8ea3b1ff544 ("smb: enable reuse of deferred file handles for write operations") Acked-by: Shyam Prasad N Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/file.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index d2df10b8e6fd88..9835672267d277 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -999,15 +999,18 @@ int cifs_open(struct inode *inode, struct file *file) rc = cifs_get_readable_path(tcon, full_path, &cfile); } if (rc == 0) { - if (file->f_flags == cfile->f_flags) { + unsigned int oflags = file->f_flags & ~(O_CREAT|O_EXCL|O_TRUNC); + unsigned int cflags = cfile->f_flags & ~(O_CREAT|O_EXCL|O_TRUNC); + + if (cifs_convert_flags(oflags, 0) == cifs_convert_flags(cflags, 0) && + (oflags & (O_SYNC|O_DIRECT)) == (cflags & (O_SYNC|O_DIRECT))) { file->private_data = cfile; spin_lock(&CIFS_I(inode)->deferred_lock); cifs_del_deferred_close(cfile); spin_unlock(&CIFS_I(inode)->deferred_lock); goto use_cache; - } else { - _cifsFileInfo_put(cfile, true, false); } + _cifsFileInfo_put(cfile, true, false); } else { /* hard link on the defeered close file */ rc = cifs_get_hardlink_path(tcon, inode, file); From 50cf0197d12eae21538fa29b1227a3cabb7db269 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 Jun 2025 15:41:25 +0100 Subject: [PATCH 2793/2924] bio: Fix bio_first_folio() for SPARSEMEM without VMEMMAP [ Upstream commit f826ec7966a63d48e16e0868af4e038bf9a1a3ae ] It is possible for physically contiguous folios to have discontiguous struct pages if SPARSEMEM is enabled and SPARSEMEM_VMEMMAP is not. This is correctly handled by folio_page_idx(), so remove this open-coded implementation. Fixes: 640d1930bef4 (block: Add bio_for_each_folio_all()) Signed-off-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/20250612144126.2849931-1-willy@infradead.org Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- include/linux/bio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/bio.h b/include/linux/bio.h index b786ec5bcc81d0..b474a47ec7eefe 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -291,7 +291,7 @@ static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio, fi->folio = page_folio(bvec->bv_page); fi->offset = bvec->bv_offset + - PAGE_SIZE * (bvec->bv_page - &fi->folio->page); + PAGE_SIZE * folio_page_idx(fi->folio, bvec->bv_page); fi->_seg_count = bvec->bv_len; fi->length = min(folio_size(fi->folio) - fi->offset, fi->_seg_count); fi->_next = folio_next(fi->folio); From 02b3d0ea4b04bbc36768bcecdef031f2f3b07512 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 Jun 2025 15:42:53 +0100 Subject: [PATCH 2794/2924] block: Fix bvec_set_folio() for very large folios [ Upstream commit 5e223e06ee7c6d8f630041a0645ac90e39a42cc6 ] Similarly to 26064d3e2b4d ("block: fix adding folio to bio"), if we attempt to add a folio that is larger than 4GB, we'll silently truncate the offset and len. Widen the parameters to size_t, assert that the length is less than 4GB and set the first page that contains the interesting data rather than the first page of the folio. Fixes: 26db5ee15851 (block: add a bvec_set_folio helper) Signed-off-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/20250612144255.2850278-1-willy@infradead.org Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- include/linux/bvec.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 204b22a99c4ba6..0a80e1f9aa201c 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -57,9 +57,12 @@ static inline void bvec_set_page(struct bio_vec *bv, struct page *page, * @offset: offset into the folio */ static inline void bvec_set_folio(struct bio_vec *bv, struct folio *folio, - unsigned int len, unsigned int offset) + size_t len, size_t offset) { - bvec_set_page(bv, &folio->page, len, offset); + unsigned long nr = offset / PAGE_SIZE; + + WARN_ON_ONCE(len > UINT_MAX); + bvec_set_page(bv, folio_page(folio, nr), len, offset % PAGE_SIZE); } /** From 44c805e9d9c9a2101ea783979c1f571883830f5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 6 Jun 2025 10:23:57 +0200 Subject: [PATCH 2795/2924] uapi: bitops: use UAPI-safe variant of BITS_PER_LONG again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 11fcf368506d347088e613edf6cd2604d70c454f upstream. Commit 1e7933a575ed ("uapi: Revert "bitops: avoid integer overflow in GENMASK(_ULL)"") did not take in account that the usage of BITS_PER_LONG in __GENMASK() was changed to __BITS_PER_LONG for UAPI-safety in commit 3c7a8e190bc5 ("uapi: introduce uapi-friendly macros for GENMASK"). BITS_PER_LONG can not be used in UAPI headers as it derives from the kernel configuration and not from the current compiler invocation. When building compat userspace code or a compat vDSO its value will be incorrect. Switch back to __BITS_PER_LONG. Fixes: 1e7933a575ed ("uapi: Revert "bitops: avoid integer overflow in GENMASK(_ULL)"") Cc: stable@vger.kernel.org Signed-off-by: Thomas Weißschuh Signed-off-by: Yury Norov [NVIDIA] Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/bits.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/bits.h b/include/uapi/linux/bits.h index 682b406e10679d..a04afef9efca42 100644 --- a/include/uapi/linux/bits.h +++ b/include/uapi/linux/bits.h @@ -4,9 +4,9 @@ #ifndef _UAPI_LINUX_BITS_H #define _UAPI_LINUX_BITS_H -#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h)))) +#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) -#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) #define __GENMASK_U128(h, l) \ ((_BIT128((h)) << 1) - (_BIT128(l))) From 25c80d7052ecd12c22799878a6e88f9a19c70308 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Tue, 20 May 2025 20:55:55 +0200 Subject: [PATCH 2796/2924] objtool/rust: relax slice condition to cover more `noreturn` Rust functions commit cbeaa41dfe26b72639141e87183cb23e00d4b0dd upstream. Developers are indeed hitting other of the `noreturn` slice symbols in Nova [1], thus relax the last check in the list so that we catch all of them, i.e. *_4core5slice5index22slice_index_order_fail *_4core5slice5index24slice_end_index_len_fail *_4core5slice5index26slice_start_index_len_fail *_4core5slice5index29slice_end_index_overflow_fail *_4core5slice5index31slice_start_index_overflow_fail These all exist since at least Rust 1.78.0, thus backport it too. See commit 56d680dd23c3 ("objtool/rust: list `noreturn` Rust functions") for more details. Cc: stable@vger.kernel.org # Needed in 6.12.y and later. Cc: John Hubbard Cc: Timur Tabi Cc: Kane York Cc: Josh Poimboeuf Cc: Peter Zijlstra Reported-by: Joel Fernandes Fixes: 56d680dd23c3 ("objtool/rust: list `noreturn` Rust functions") Closes: https://lore.kernel.org/rust-for-linux/20250513180757.GA1295002@joelnvbox/ [1] Tested-by: Joel Fernandes Link: https://lore.kernel.org/r/20250520185555.825242-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda Signed-off-by: Greg Kroah-Hartman --- tools/objtool/check.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index b21b12ec88d960..f23bdda737aaa5 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -230,7 +230,8 @@ static bool is_rust_noreturn(const struct symbol *func) str_ends_with(func->name, "_7___rustc17rust_begin_unwind") || strstr(func->name, "_4core9panicking13assert_failed") || strstr(func->name, "_4core9panicking11panic_const24panic_const_") || - (strstr(func->name, "_4core5slice5index24slice_") && + (strstr(func->name, "_4core5slice5index") && + strstr(func->name, "slice_") && str_ends_with(func->name, "_fail")); } From 6545df4edb2cea7607887898da5130748f1b2d94 Mon Sep 17 00:00:00 2001 From: Gary Guo Date: Sat, 17 May 2025 09:55:59 +0100 Subject: [PATCH 2797/2924] rust: compile libcore with edition 2024 for 1.87+ commit f4daa80d6be7d3c55ca72a8e560afc4e21f886aa upstream. Rust 1.87 (released on 2025-05-15) compiles core library with edition 2024 instead of 2021 [1]. Ensure that the edition matches libcore's expectation to avoid potential breakage. [ J3m3 reported in Zulip [2] that the `rust-analyzer` target was broken after this patch -- indeed, we need to avoid `core-cfgs` since those are passed to the `rust-analyzer` target. So, instead, I tweaked the patch to create a new `core-edition` variable and explicitly mention the `--edition` flag instead of reusing `core-cfg`s. In addition, pass a new argument using this new variable to `generate_rust_analyzer.py` so that we set the right edition there. By the way, for future reference: the `filter-out` change is needed for Rust < 1.87, since otherwise we would skip the `--edition=2021` we just added, ending up with no edition flag, and thus the compiler would default to the 2015 one. [2] https://rust-for-linux.zulipchat.com/#narrow/channel/291565/topic/x/near/520206547 - Miguel ] Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). Link: https://github.com/rust-lang/rust/pull/138162 [1] Reported-by: est31 Closes: https://github.com/Rust-for-Linux/linux/issues/1163 Signed-off-by: Gary Guo Link: https://lore.kernel.org/r/20250517085600.2857460-1-gary@garyguo.net Signed-off-by: Miguel Ojeda Signed-off-by: Greg Kroah-Hartman --- rust/Makefile | 14 ++++++++------ scripts/generate_rust_analyzer.py | 13 ++++++++----- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/rust/Makefile b/rust/Makefile index 3aca903a7d08cf..313a200112ce18 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -60,6 +60,8 @@ endif core-cfgs = \ --cfg no_fp_fmt_parse +core-edition := $(if $(call rustc-min-version,108700),2024,2021) + # `rustc` recognizes `--remap-path-prefix` since 1.26.0, but `rustdoc` only # since Rust 1.81.0. Moreover, `rustdoc` ICEs on out-of-tree builds since Rust # 1.82.0 (https://github.com/rust-lang/rust/issues/138520). Thus workaround both @@ -106,8 +108,8 @@ rustdoc-macros: $(src)/macros/lib.rs FORCE # Starting with Rust 1.82.0, skipping `-Wrustdoc::unescaped_backticks` should # not be needed -- see https://github.com/rust-lang/rust/pull/128307. -rustdoc-core: private skip_flags = -Wrustdoc::unescaped_backticks -rustdoc-core: private rustc_target_flags = $(core-cfgs) +rustdoc-core: private skip_flags = --edition=2021 -Wrustdoc::unescaped_backticks +rustdoc-core: private rustc_target_flags = --edition=$(core-edition) $(core-cfgs) rustdoc-core: $(RUST_LIB_SRC)/core/src/lib.rs FORCE +$(call if_changed,rustdoc) @@ -416,7 +418,7 @@ quiet_cmd_rustc_library = $(if $(skip_clippy),RUSTC,$(RUSTC_OR_CLIPPY_QUIET)) L cmd_rustc_library = \ OBJTREE=$(abspath $(objtree)) \ $(if $(skip_clippy),$(RUSTC),$(RUSTC_OR_CLIPPY)) \ - $(filter-out $(skip_flags),$(rust_flags) $(rustc_target_flags)) \ + $(filter-out $(skip_flags),$(rust_flags)) $(rustc_target_flags) \ --emit=dep-info=$(depfile) --emit=obj=$@ \ --emit=metadata=$(dir $@)$(patsubst %.o,lib%.rmeta,$(notdir $@)) \ --crate-type rlib -L$(objtree)/$(obj) \ @@ -427,7 +429,7 @@ quiet_cmd_rustc_library = $(if $(skip_clippy),RUSTC,$(RUSTC_OR_CLIPPY_QUIET)) L rust-analyzer: $(Q)MAKEFLAGS= $(srctree)/scripts/generate_rust_analyzer.py \ - --cfgs='core=$(core-cfgs)' \ + --cfgs='core=$(core-cfgs)' $(core-edition) \ $(realpath $(srctree)) $(realpath $(objtree)) \ $(rustc_sysroot) $(RUST_LIB_SRC) $(if $(KBUILD_EXTMOD),$(srcroot)) \ > rust-project.json @@ -483,9 +485,9 @@ $(obj)/helpers/helpers.o: $(src)/helpers/helpers.c $(recordmcount_source) FORCE $(obj)/exports.o: private skip_gendwarfksyms = 1 $(obj)/core.o: private skip_clippy = 1 -$(obj)/core.o: private skip_flags = -Wunreachable_pub +$(obj)/core.o: private skip_flags = --edition=2021 -Wunreachable_pub $(obj)/core.o: private rustc_objcopy = $(foreach sym,$(redirect-intrinsics),--redefine-sym $(sym)=__rust$(sym)) -$(obj)/core.o: private rustc_target_flags = $(core-cfgs) +$(obj)/core.o: private rustc_target_flags = --edition=$(core-edition) $(core-cfgs) $(obj)/core.o: $(RUST_LIB_SRC)/core/src/lib.rs \ $(wildcard $(objtree)/include/config/RUSTC_VERSION_TEXT) FORCE +$(call if_changed_rule,rustc_library) diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index fe663dd0c43b04..7c3ea2b55041f8 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -19,7 +19,7 @@ def args_crates_cfgs(cfgs): return crates_cfgs -def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs): +def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs, core_edition): # Generate the configuration list. cfg = [] with open(objtree / "include" / "generated" / "rustc_cfg") as fd: @@ -35,7 +35,7 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs): crates_indexes = {} crates_cfgs = args_crates_cfgs(cfgs) - def append_crate(display_name, root_module, deps, cfg=[], is_workspace_member=True, is_proc_macro=False): + def append_crate(display_name, root_module, deps, cfg=[], is_workspace_member=True, is_proc_macro=False, edition="2021"): crate = { "display_name": display_name, "root_module": str(root_module), @@ -43,7 +43,7 @@ def append_crate(display_name, root_module, deps, cfg=[], is_workspace_member=Tr "is_proc_macro": is_proc_macro, "deps": [{"crate": crates_indexes[dep], "name": dep} for dep in deps], "cfg": cfg, - "edition": "2021", + "edition": edition, "env": { "RUST_MODFILE": "This is only for rust-analyzer" } @@ -61,6 +61,7 @@ def append_sysroot_crate( display_name, deps, cfg=[], + edition="2021", ): append_crate( display_name, @@ -68,12 +69,13 @@ def append_sysroot_crate( deps, cfg, is_workspace_member=False, + edition=edition, ) # NB: sysroot crates reexport items from one another so setting up our transitive dependencies # here is important for ensuring that rust-analyzer can resolve symbols. The sources of truth # for this dependency graph are `(sysroot_src / crate / "Cargo.toml" for crate in crates)`. - append_sysroot_crate("core", [], cfg=crates_cfgs.get("core", [])) + append_sysroot_crate("core", [], cfg=crates_cfgs.get("core", []), edition=core_edition) append_sysroot_crate("alloc", ["core"]) append_sysroot_crate("std", ["alloc", "core"]) append_sysroot_crate("proc_macro", ["core", "std"]) @@ -177,6 +179,7 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument('--verbose', '-v', action='store_true') parser.add_argument('--cfgs', action='append', default=[]) + parser.add_argument("core_edition") parser.add_argument("srctree", type=pathlib.Path) parser.add_argument("objtree", type=pathlib.Path) parser.add_argument("sysroot", type=pathlib.Path) @@ -193,7 +196,7 @@ def main(): assert args.sysroot in args.sysroot_src.parents rust_project = { - "crates": generate_crates(args.srctree, args.objtree, args.sysroot_src, args.exttree, args.cfgs), + "crates": generate_crates(args.srctree, args.objtree, args.sysroot_src, args.exttree, args.cfgs, args.core_edition), "sysroot": str(args.sysroot), } From 98af4b6b983cd227a95c7677ca268c3b41c82b47 Mon Sep 17 00:00:00 2001 From: Benno Lossin Date: Sun, 25 May 2025 19:34:45 +0200 Subject: [PATCH 2798/2924] rust: list: fix path of `assert_pinned!` commit eb71feaacaaca227ae8f91c8578cf831553c5ab5 upstream. Commit dbd5058ba60c ("rust: make pin-init its own crate") moved all items from pin-init into the pin-init crate, including the `assert_pinned!` macro. Thus fix the path of the sole user of the `assert_pinned!` macro. This occurrence was missed in the commit above, since it is in a macro rule that has no current users (although binder is a future user). Cc: stable@kernel.org Fixes: dbd5058ba60c ("rust: make pin-init its own crate") Signed-off-by: Benno Lossin Link: https://lore.kernel.org/r/20250525173450.853413-1-lossin@kernel.org [ Reworded slightly as discussed in the list. - Miguel ] Signed-off-by: Miguel Ojeda Signed-off-by: Greg Kroah-Hartman --- rust/kernel/list/arc.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/list/arc.rs b/rust/kernel/list/arc.rs index 13c50df37b89d1..a88a2dc65aa7cf 100644 --- a/rust/kernel/list/arc.rs +++ b/rust/kernel/list/arc.rs @@ -96,7 +96,7 @@ macro_rules! impl_list_arc_safe { } $($rest:tt)*) => { impl$(<$($generics)*>)? $crate::list::ListArcSafe<$num> for $t { unsafe fn on_create_list_arc_from_unique(self: ::core::pin::Pin<&mut Self>) { - $crate::assert_pinned!($t, $field, $fty, inline); + ::pin_init::assert_pinned!($t, $field, $fty, inline); // SAFETY: This field is structurally pinned as per the above assertion. let field = unsafe { From 473fa7a12d038fe8ccf2a6f708c4fe5ce8621d32 Mon Sep 17 00:00:00 2001 From: Mike Yuan Date: Wed, 4 Jun 2025 15:03:42 +0000 Subject: [PATCH 2799/2924] pidfs: never refuse ppid == 0 in PIDFD_GET_INFO commit b55eb6eb2a7427428c59b293a0900131fc849595 upstream. In systemd we spotted an issue after switching to ioctl(PIDFD_GET_INFO) for obtaining pid number the pidfd refers to, that for processes with a parent from outer pidns PIDFD_GET_INFO unexpectedly yields -ESRCH [1]. It turned out that there's an arbitrary check blocking this, which is not really sensible given getppid() happily returns 0 for such processes. Just drop the spurious check and userspace ought to handle ppid == 0 properly everywhere. [1] https://github.com/systemd/systemd/issues/37715 Fixes: cdda1f26e74b ("pidfd: add ioctl to retrieve pid info") Signed-off-by: Mike Yuan Link: https://lore.kernel.org/20250604150238.42664-1-me@yhndnzj.com Cc: Christian Brauner Cc: Luca Boccassi Cc: stable@vger.kernel.org Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- fs/pidfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/pidfs.c b/fs/pidfs.c index 50e69a9e104a60..87a53d2ae4bb78 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -336,7 +336,7 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg) kinfo.pid = task_pid_vnr(task); kinfo.mask |= PIDFD_INFO_PID; - if (kinfo.pid == 0 || kinfo.tgid == 0 || (kinfo.ppid == 0 && kinfo.pid != 1)) + if (kinfo.pid == 0 || kinfo.tgid == 0) return -ESRCH; copy_out: From 0ea714e49b1dea6e3cf3c1efb405103ea026cbb7 Mon Sep 17 00:00:00 2001 From: Suleiman Souhlal Date: Fri, 6 Jun 2025 16:45:38 +0900 Subject: [PATCH 2800/2924] tools/resolve_btfids: Fix build when cross compiling kernel with clang. commit a298bbab903e3fb4cbe16d36d6195e68fad1b776 upstream. When cross compiling the kernel with clang, we need to override CLANG_CROSS_FLAGS when preparing the step libraries. Prior to commit d1d096312176 ("tools: fix annoying "mkdir -p ..." logs when building tools in parallel"), MAKEFLAGS would have been set to a value that wouldn't set a value for CLANG_CROSS_FLAGS, hiding the fact that we weren't properly overriding it. Fixes: 56a2df7615fa ("tools/resolve_btfids: Compile resolve_btfids as host program") Signed-off-by: Suleiman Souhlal Signed-off-by: Andrii Nakryiko Acked-by: Jiri Olsa Cc: stable@vger.kernel.org Link: https://lore.kernel.org/bpf/20250606074538.1608546-1-suleiman@google.com Signed-off-by: Greg Kroah-Hartman --- tools/bpf/resolve_btfids/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index afbddea3a39c64..ce1b556dfa90f1 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -17,7 +17,7 @@ endif # Overrides for the prepare step libraries. HOST_OVERRIDES := AR="$(HOSTAR)" CC="$(HOSTCC)" LD="$(HOSTLD)" ARCH="$(HOSTARCH)" \ - CROSS_COMPILE="" EXTRA_CFLAGS="$(HOSTCFLAGS)" + CROSS_COMPILE="" CLANG_CROSS_FLAGS="" EXTRA_CFLAGS="$(HOSTCFLAGS)" RM ?= rm HOSTCC ?= gcc From e97340a70b13fc392fd367c050722087d5deadd6 Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Thu, 5 Jun 2025 15:03:02 +0200 Subject: [PATCH 2801/2924] Revert "wifi: mwifiex: Fix HT40 bandwidth issue." commit 570896604f47d44d4ff6882d2a588428d2a6ef17 upstream. This reverts commit 4fcfcbe45734 ("wifi: mwifiex: Fix HT40 bandwidth issue.") That commit introduces a regression, when HT40 mode is enabled, received packets are lost, this was experience with W8997 with both SDIO-UART and SDIO-SDIO variants. From an initial investigation the issue solves on its own after some time, but it's not clear what is the reason. Given that this was just a performance optimization, let's revert it till we have a better understanding of the issue and a proper fix. Cc: Jeff Chen Cc: stable@vger.kernel.org Fixes: 4fcfcbe45734 ("wifi: mwifiex: Fix HT40 bandwidth issue.") Closes: https://lore.kernel.org/all/20250603203337.GA109929@francesco-nb/ Signed-off-by: Francesco Dolcini Link: https://patch.msgid.link/20250605130302.55555-1-francesco@dolcini.it [fix commit reference format] Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/marvell/mwifiex/11n.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/11n.c b/drivers/net/wireless/marvell/mwifiex/11n.c index 738bafc3749b0a..66f0f5377ac181 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n.c +++ b/drivers/net/wireless/marvell/mwifiex/11n.c @@ -403,14 +403,12 @@ mwifiex_cmd_append_11n_tlv(struct mwifiex_private *priv, if (sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 && bss_desc->bcn_ht_oper->ht_param & - IEEE80211_HT_PARAM_CHAN_WIDTH_ANY) { - chan_list->chan_scan_param[0].radio_type |= - CHAN_BW_40MHZ << 2; + IEEE80211_HT_PARAM_CHAN_WIDTH_ANY) SET_SECONDARYCHAN(chan_list->chan_scan_param[0]. radio_type, (bss_desc->bcn_ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET)); - } + *buffer += struct_size(chan_list, chan_scan_param, 1); ret_len += struct_size(chan_list, chan_scan_param, 1); } From 62066758d2ae169278e5d6aea5995b1b6f6ddeb5 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 19 May 2025 23:20:30 +0200 Subject: [PATCH 2802/2924] ALSA: usb-audio: Kill timer properly at removal commit 0718a78f6a9f04b88d0dc9616cc216b31c5f3cf1 upstream. The USB-audio MIDI code initializes the timer, but in a rare case, the driver might be freed without the disconnect call. This leaves the timer in an active state while the assigned object is released via snd_usbmidi_free(), which ends up with a kernel warning when the debug configuration is enabled, as spotted by fuzzer. For avoiding the problem, put timer_shutdown_sync() at snd_usbmidi_free(), so that the timer can be killed properly. While we're at it, replace the existing timer_delete_sync() at the disconnect callback with timer_shutdown_sync(), too. Reported-by: syzbot+d8f72178ab6783a7daea@syzkaller.appspotmail.com Closes: https://lore.kernel.org/681c70d7.050a0220.a19a9.00c6.GAE@google.com Cc: Link: https://patch.msgid.link/20250519212031.14436-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/midi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/usb/midi.c b/sound/usb/midi.c index cfed000f243ab9..c3de2b13743500 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -1530,6 +1530,7 @@ static void snd_usbmidi_free(struct snd_usb_midi *umidi) snd_usbmidi_in_endpoint_delete(ep->in); } mutex_destroy(&umidi->mutex); + timer_shutdown_sync(&umidi->error_timer); kfree(umidi); } @@ -1553,7 +1554,7 @@ void snd_usbmidi_disconnect(struct list_head *p) spin_unlock_irq(&umidi->disc_lock); up_write(&umidi->disc_rwsem); - timer_delete_sync(&umidi->error_timer); + timer_shutdown_sync(&umidi->error_timer); for (i = 0; i < MIDI_MAX_ENDPOINTS; ++i) { struct snd_usb_midi_endpoint *ep = &umidi->endpoints[i]; From 362d8a268505120db68d66f3550c28f0c3e0bfa4 Mon Sep 17 00:00:00 2001 From: David Heimann Date: Sun, 1 Jun 2025 12:41:16 -0400 Subject: [PATCH 2803/2924] ALSA: usb-audio: Add implicit feedback quirk for RODE AI-1 commit 6a3439a417b910e662c666993798e0691bc81147 upstream. The RODE AI-1 audio interface requires implicit feedback sync between playback endpoint 0x03 and feedback endpoint 0x84 on interface 3, but doesn't advertise this in its USB descriptors. Without this quirk, the device receives audio data but produces no output. Signed-off-by: David Heimann Cc: Link: https://patch.msgid.link/084dc88c-1193-4a94-a002-5599adff936c@app.fastmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/implicit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/implicit.c b/sound/usb/implicit.c index 4727043fd74580..77f06da93151e8 100644 --- a/sound/usb/implicit.c +++ b/sound/usb/implicit.c @@ -57,6 +57,7 @@ static const struct snd_usb_implicit_fb_match playback_implicit_fb_quirks[] = { IMPLICIT_FB_FIXED_DEV(0x31e9, 0x0002, 0x81, 2), /* Solid State Logic SSL2+ */ IMPLICIT_FB_FIXED_DEV(0x0499, 0x172f, 0x81, 2), /* Steinberg UR22C */ IMPLICIT_FB_FIXED_DEV(0x0d9a, 0x00df, 0x81, 2), /* RTX6001 */ + IMPLICIT_FB_FIXED_DEV(0x19f7, 0x000a, 0x84, 3), /* RODE AI-1 */ IMPLICIT_FB_FIXED_DEV(0x22f0, 0x0006, 0x81, 3), /* Allen&Heath Qu-16 */ IMPLICIT_FB_FIXED_DEV(0x1686, 0xf029, 0x82, 2), /* Zoom UAC-2 */ IMPLICIT_FB_FIXED_DEV(0x2466, 0x8003, 0x86, 2), /* Fractal Audio Axe-Fx II */ From 485e1b741eb838cbe1d6b0e81e5ab62ae6c095cf Mon Sep 17 00:00:00 2001 From: Terry Junge Date: Wed, 12 Mar 2025 15:23:31 -0700 Subject: [PATCH 2804/2924] HID: usbhid: Eliminate recurrent out-of-bounds bug in usbhid_parse() commit fe7f7ac8e0c708446ff017453add769ffc15deed upstream. Update struct hid_descriptor to better reflect the mandatory and optional parts of the HID Descriptor as per USB HID 1.11 specification. Note: the kernel currently does not parse any optional HID class descriptors, only the mandatory report descriptor. Update all references to member element desc[0] to rpt_desc. Add test to verify bLength and bNumDescriptors values are valid. Replace the for loop with direct access to the mandatory HID class descriptor member for the report descriptor. This eliminates the possibility of getting an out-of-bounds fault. Add a warning message if the HID descriptor contains any unsupported optional HID class descriptors. Reported-by: syzbot+c52569baf0c843f35495@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=c52569baf0c843f35495 Fixes: f043bfc98c19 ("HID: usbhid: fix out-of-bounds bug") Cc: stable@vger.kernel.org Signed-off-by: Terry Junge Reviewed-by: Michael Kelley Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-hyperv.c | 4 ++-- drivers/hid/usbhid/hid-core.c | 25 ++++++++++++++----------- drivers/usb/gadget/function/f_hid.c | 12 ++++++------ include/linux/hid.h | 3 ++- 4 files changed, 24 insertions(+), 20 deletions(-) diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c index 0fb210e40a4127..9eafff0b6ea4c3 100644 --- a/drivers/hid/hid-hyperv.c +++ b/drivers/hid/hid-hyperv.c @@ -192,7 +192,7 @@ static void mousevsc_on_receive_device_info(struct mousevsc_dev *input_device, goto cleanup; input_device->report_desc_size = le16_to_cpu( - desc->desc[0].wDescriptorLength); + desc->rpt_desc.wDescriptorLength); if (input_device->report_desc_size == 0) { input_device->dev_info_status = -EINVAL; goto cleanup; @@ -210,7 +210,7 @@ static void mousevsc_on_receive_device_info(struct mousevsc_dev *input_device, memcpy(input_device->report_desc, ((unsigned char *)desc) + desc->bLength, - le16_to_cpu(desc->desc[0].wDescriptorLength)); + le16_to_cpu(desc->rpt_desc.wDescriptorLength)); /* Send the ack */ memset(&ack, 0, sizeof(struct mousevsc_prt_msg)); diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index 7d9297fad90ea7..d4cbecc668ec02 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -984,12 +984,11 @@ static int usbhid_parse(struct hid_device *hid) struct usb_host_interface *interface = intf->cur_altsetting; struct usb_device *dev = interface_to_usbdev (intf); struct hid_descriptor *hdesc; + struct hid_class_descriptor *hcdesc; u32 quirks = 0; unsigned int rsize = 0; char *rdesc; - int ret, n; - int num_descriptors; - size_t offset = offsetof(struct hid_descriptor, desc); + int ret; quirks = hid_lookup_quirk(hid); @@ -1011,20 +1010,19 @@ static int usbhid_parse(struct hid_device *hid) return -ENODEV; } - if (hdesc->bLength < sizeof(struct hid_descriptor)) { - dbg_hid("hid descriptor is too short\n"); + if (!hdesc->bNumDescriptors || + hdesc->bLength != sizeof(*hdesc) + + (hdesc->bNumDescriptors - 1) * sizeof(*hcdesc)) { + dbg_hid("hid descriptor invalid, bLen=%hhu bNum=%hhu\n", + hdesc->bLength, hdesc->bNumDescriptors); return -EINVAL; } hid->version = le16_to_cpu(hdesc->bcdHID); hid->country = hdesc->bCountryCode; - num_descriptors = min_t(int, hdesc->bNumDescriptors, - (hdesc->bLength - offset) / sizeof(struct hid_class_descriptor)); - - for (n = 0; n < num_descriptors; n++) - if (hdesc->desc[n].bDescriptorType == HID_DT_REPORT) - rsize = le16_to_cpu(hdesc->desc[n].wDescriptorLength); + if (hdesc->rpt_desc.bDescriptorType == HID_DT_REPORT) + rsize = le16_to_cpu(hdesc->rpt_desc.wDescriptorLength); if (!rsize || rsize > HID_MAX_DESCRIPTOR_SIZE) { dbg_hid("weird size of report descriptor (%u)\n", rsize); @@ -1052,6 +1050,11 @@ static int usbhid_parse(struct hid_device *hid) goto err; } + if (hdesc->bNumDescriptors > 1) + hid_warn(intf, + "%u unsupported optional hid class descriptors\n", + (int)(hdesc->bNumDescriptors - 1)); + hid->quirks |= quirks; return 0; diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index 740311c4fa2496..c7a05f842745bc 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -144,8 +144,8 @@ static struct hid_descriptor hidg_desc = { .bcdHID = cpu_to_le16(0x0101), .bCountryCode = 0x00, .bNumDescriptors = 0x1, - /*.desc[0].bDescriptorType = DYNAMIC */ - /*.desc[0].wDescriptorLenght = DYNAMIC */ + /*.rpt_desc.bDescriptorType = DYNAMIC */ + /*.rpt_desc.wDescriptorLength = DYNAMIC */ }; /* Super-Speed Support */ @@ -939,8 +939,8 @@ static int hidg_setup(struct usb_function *f, struct hid_descriptor hidg_desc_copy = hidg_desc; VDBG(cdev, "USB_REQ_GET_DESCRIPTOR: HID\n"); - hidg_desc_copy.desc[0].bDescriptorType = HID_DT_REPORT; - hidg_desc_copy.desc[0].wDescriptorLength = + hidg_desc_copy.rpt_desc.bDescriptorType = HID_DT_REPORT; + hidg_desc_copy.rpt_desc.wDescriptorLength = cpu_to_le16(hidg->report_desc_length); length = min_t(unsigned short, length, @@ -1210,8 +1210,8 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f) * We can use hidg_desc struct here but we should not relay * that its content won't change after returning from this function. */ - hidg_desc.desc[0].bDescriptorType = HID_DT_REPORT; - hidg_desc.desc[0].wDescriptorLength = + hidg_desc.rpt_desc.bDescriptorType = HID_DT_REPORT; + hidg_desc.rpt_desc.wDescriptorLength = cpu_to_le16(hidg->report_desc_length); hidg_hs_in_ep_desc.bEndpointAddress = diff --git a/include/linux/hid.h b/include/linux/hid.h index ef9a90ca0fbd6a..daae1d6d11a744 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -740,8 +740,9 @@ struct hid_descriptor { __le16 bcdHID; __u8 bCountryCode; __u8 bNumDescriptors; + struct hid_class_descriptor rpt_desc; - struct hid_class_descriptor desc[1]; + struct hid_class_descriptor opt_descs[]; } __attribute__ ((packed)); #define HID_DEVICE(b, g, ven, prod) \ From 601a8601caa6c1c7291eda3dd72d37ae30a08f60 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Sun, 11 May 2025 09:41:11 +0530 Subject: [PATCH 2805/2924] powerpc/kernel: Fix ppc_save_regs inclusion in build commit 93bd4a80efeb521314485a06d8c21157240497bb upstream. Recent patch fixed an old commit 'fc2a5a6161a2 ("powerpc/64s: ppc_save_regs is now needed for all 64s builds")' which is to include building of ppc_save_reg.c only when XMON and KEXEC_CORE and PPC_BOOK3S are enabled. This was valid, since ppc_save_regs was called only in replay_system_reset() of old irq.c which was under BOOK3S. But there has been multiple refactoring of irq.c and have added call to ppc_save_regs() from __replay_soft_interrupts -> replay_soft_interrupts which is part of irq_64.c included under CONFIG_PPC64. And since ppc_save_regs is called in CRASH_DUMP path as part of crash_setup_regs in kexec.h, CONFIG_PPC32 also needs it. So with this recent patch which enabled the building of ppc_save_regs.c caused a build break when none of these (XMON, KEXEC_CORE, BOOK3S) where enabled as part of config. Patch to enable building of ppc_save_regs.c by defaults. Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250511041111.841158-1-maddy@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 0c26b2412d1730..9d1ab3971694ae 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -160,9 +160,7 @@ endif obj64-$(CONFIG_PPC_TRANSACTIONAL_MEM) += tm.o -ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC_CORE)$(CONFIG_PPC_BOOK3S),) obj-y += ppc_save_regs.o -endif obj-$(CONFIG_EPAPR_PARAVIRT) += epapr_paravirt.o epapr_hcalls.o obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o From 460188bc042a3f40f72d34b9f7fc6ee66b0b757b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 13 Jun 2025 19:26:50 +0200 Subject: [PATCH 2806/2924] posix-cpu-timers: fix race between handle_posix_cpu_timers() and posix_cpu_timer_del() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f90fff1e152dedf52b932240ebbd670d83330eca upstream. If an exiting non-autoreaping task has already passed exit_notify() and calls handle_posix_cpu_timers() from IRQ, it can be reaped by its parent or debugger right after unlock_task_sighand(). If a concurrent posix_cpu_timer_del() runs at that moment, it won't be able to detect timer->it.cpu.firing != 0: cpu_timer_task_rcu() and/or lock_task_sighand() will fail. Add the tsk->exit_state check into run_posix_cpu_timers() to fix this. This fix is not needed if CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y, because exit_task_work() is called before exit_notify(). But the check still makes sense, task_work_add(&tsk->posix_cputimers_work.work) will fail anyway in this case. Cc: stable@vger.kernel.org Reported-by: Benoît Sevens Fixes: 0bdd2ed4138e ("sched: run_posix_cpu_timers: Don't check ->exit_state, use lock_task_sighand()") Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/time/posix-cpu-timers.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 50e8d04ab661f4..2e5b89d7d86605 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -1405,6 +1405,15 @@ void run_posix_cpu_timers(void) lockdep_assert_irqs_disabled(); + /* + * Ensure that release_task(tsk) can't happen while + * handle_posix_cpu_timers() is running. Otherwise, a concurrent + * posix_cpu_timer_del() may fail to lock_task_sighand(tsk) and + * miss timer->it.cpu.firing != 0. + */ + if (tsk->exit_state) + return; + /* * If the actual expiry is deferred to task work context and the * work is already scheduled there is no point to do anything here. From 3728101f56ef54425a11027a3ddc2c3941d60b71 Mon Sep 17 00:00:00 2001 From: Peter Korsgaard Date: Fri, 9 May 2025 13:24:07 +0100 Subject: [PATCH 2807/2924] nvmem: zynqmp_nvmem: unbreak driver after cleanup commit fe8abdd175d7b547ae1a612757e7902bcd62e9cf upstream. Commit 29be47fcd6a0 ("nvmem: zynqmp_nvmem: zynqmp_nvmem_probe cleanup") changed the driver to expect the device pointer to be passed as the "context", but in nvmem the context parameter comes from nvmem_config.priv which is never set - Leading to null pointer exceptions when the device is accessed. Fixes: 29be47fcd6a0 ("nvmem: zynqmp_nvmem: zynqmp_nvmem_probe cleanup") Cc: stable Signed-off-by: Peter Korsgaard Reviewed-by: Michal Simek Tested-by: Michal Simek Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20250509122407.11763-3-srini@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/zynqmp_nvmem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvmem/zynqmp_nvmem.c b/drivers/nvmem/zynqmp_nvmem.c index 8682adaacd692d..7da717d6c7faf3 100644 --- a/drivers/nvmem/zynqmp_nvmem.c +++ b/drivers/nvmem/zynqmp_nvmem.c @@ -213,6 +213,7 @@ static int zynqmp_nvmem_probe(struct platform_device *pdev) econfig.word_size = 1; econfig.size = ZYNQMP_NVMEM_SIZE; econfig.dev = dev; + econfig.priv = dev; econfig.add_legacy_fixed_of_cells = true; econfig.reg_read = zynqmp_nvmem_read; econfig.reg_write = zynqmp_nvmem_write; From 8062676015c0def219685d3b7e2b8441fde2212b Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Wed, 21 May 2025 14:16:55 +0200 Subject: [PATCH 2808/2924] usb: usbtmc: Fix read_stb function and get_stb ioctl commit acb3dac2805d3342ded7dbbd164add32bbfdf21c upstream. The usbtmc488_ioctl_read_stb function relied on a positive return from usbtmc_get_stb to reset the srq condition in the driver. The USBTMC_IOCTL_GET_STB case tested for a positive return to return the stb to the user. Commit: ("usb: usbtmc: Fix erroneous get_stb ioctl error returns") changed the return value of usbtmc_get_stb to 0 on success instead of returning the value of usb_control_msg which is positive in the normal case. This change caused the function usbtmc488_ioctl_read_stb and the USBTMC_IOCTL_GET_STB ioctl to no longer function correctly. Change the test in usbtmc488_ioctl_read_stb to test for failure first and return the failure code immediately. Change the test for the USBTMC_IOCTL_GET_STB ioctl to test for 0 instead of a positive value. Fixes: cac01bd178d6 ("usb: usbtmc: Fix erroneous get_stb ioctl error returns") Cc: stable@vger.kernel.org Signed-off-by: Dave Penkler Link: https://lore.kernel.org/r/20250521121656.18174-3-dpenkler@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usbtmc.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index 66f3d9324ba2f3..75de29725a450c 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -565,14 +565,15 @@ static int usbtmc488_ioctl_read_stb(struct usbtmc_file_data *file_data, rv = usbtmc_get_stb(file_data, &stb); - if (rv > 0) { - srq_asserted = atomic_xchg(&file_data->srq_asserted, - srq_asserted); - if (srq_asserted) - stb |= 0x40; /* Set RQS bit */ + if (rv < 0) + return rv; + + srq_asserted = atomic_xchg(&file_data->srq_asserted, srq_asserted); + if (srq_asserted) + stb |= 0x40; /* Set RQS bit */ + + rv = put_user(stb, (__u8 __user *)arg); - rv = put_user(stb, (__u8 __user *)arg); - } return rv; } @@ -2201,7 +2202,7 @@ static long usbtmc_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case USBTMC_IOCTL_GET_STB: retval = usbtmc_get_stb(file_data, &tmp_byte); - if (retval > 0) + if (!retval) retval = put_user(tmp_byte, (__u8 __user *)arg); break; From 75b5313c80c39a26d27cbb602f968a05576c36f9 Mon Sep 17 00:00:00 2001 From: Wupeng Ma Date: Sat, 10 May 2025 11:30:40 +0800 Subject: [PATCH 2809/2924] VMCI: fix race between vmci_host_setup_notify and vmci_ctx_unset_notify commit 1bd6406fb5f36c2bb1e96e27d4c3e9f4d09edde4 upstream. During our test, it is found that a warning can be trigger in try_grab_folio as follow: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1678 at mm/gup.c:147 try_grab_folio+0x106/0x130 Modules linked in: CPU: 0 UID: 0 PID: 1678 Comm: syz.3.31 Not tainted 6.15.0-rc5 #163 PREEMPT(undef) RIP: 0010:try_grab_folio+0x106/0x130 Call Trace: follow_huge_pmd+0x240/0x8e0 follow_pmd_mask.constprop.0.isra.0+0x40b/0x5c0 follow_pud_mask.constprop.0.isra.0+0x14a/0x170 follow_page_mask+0x1c2/0x1f0 __get_user_pages+0x176/0x950 __gup_longterm_locked+0x15b/0x1060 ? gup_fast+0x120/0x1f0 gup_fast_fallback+0x17e/0x230 get_user_pages_fast+0x5f/0x80 vmci_host_unlocked_ioctl+0x21c/0xf80 RIP: 0033:0x54d2cd ---[ end trace 0000000000000000 ]--- Digging into the source, context->notify_page may init by get_user_pages_fast and can be seen in vmci_ctx_unset_notify which will try to put_page. However get_user_pages_fast is not finished here and lead to following try_grab_folio warning. The race condition is shown as follow: cpu0 cpu1 vmci_host_do_set_notify vmci_host_setup_notify get_user_pages_fast(uva, 1, FOLL_WRITE, &context->notify_page); lockless_pages_from_mm gup_pgd_range gup_huge_pmd // update &context->notify_page vmci_host_do_set_notify vmci_ctx_unset_notify notify_page = context->notify_page; if (notify_page) put_page(notify_page); // page is freed __gup_longterm_locked __get_user_pages follow_trans_huge_pmd try_grab_folio // warn here To slove this, use local variable page to make notify_page can be seen after finish get_user_pages_fast. Fixes: a1d88436d53a ("VMCI: Fix two UVA mapping bugs") Cc: stable Closes: https://lore.kernel.org/all/e91da589-ad57-3969-d979-879bbd10dddd@huawei.com/ Signed-off-by: Wupeng Ma Link: https://lore.kernel.org/r/20250510033040.901582-1-mawupeng1@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vmw_vmci/vmci_host.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c index abe79f6fd2a79b..b64944367ac533 100644 --- a/drivers/misc/vmw_vmci/vmci_host.c +++ b/drivers/misc/vmw_vmci/vmci_host.c @@ -227,6 +227,7 @@ static int drv_cp_harray_to_user(void __user *user_buf_uva, static int vmci_host_setup_notify(struct vmci_ctx *context, unsigned long uva) { + struct page *page; int retval; if (context->notify_page) { @@ -243,13 +244,11 @@ static int vmci_host_setup_notify(struct vmci_ctx *context, /* * Lock physical page backing a given user VA. */ - retval = get_user_pages_fast(uva, 1, FOLL_WRITE, &context->notify_page); - if (retval != 1) { - context->notify_page = NULL; + retval = get_user_pages_fast(uva, 1, FOLL_WRITE, &page); + if (retval != 1) return VMCI_ERROR_GENERIC; - } - if (context->notify_page == NULL) - return VMCI_ERROR_UNAVAILABLE; + + context->notify_page = page; /* * Map the locked page and set up notify pointer. From 115cf028ae4cc2045867ab667eb2a32b91a5fd9f Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 22 May 2025 07:38:35 +0200 Subject: [PATCH 2810/2924] tty: serial: 8250_omap: fix TX with DMA for am33xx commit b495021a973e2468497689bd3e29b736747b896f upstream. Commit 1788cf6a91d9 ("tty: serial: switch from circ_buf to kfifo") introduced an error in the TX DMA handling for 8250_omap. When the OMAP_DMA_TX_KICK flag is set, the "skip_byte" is pulled from the kfifo and emitted directly in order to start the DMA. While the kfifo is updated, dma->tx_size is not decreased. This leads to uart_xmit_advance() called in omap_8250_dma_tx_complete() advancing the kfifo by one too much. In practice, transmitting N bytes has been seen to result in the last N-1 bytes being sent repeatedly. This change fixes the problem by moving all of the dma setup after the OMAP_DMA_TX_KICK handling and using kfifo_len() instead of the DMA size for the 4-byte cutoff check. This slightly changes the behaviour at buffer wraparound, but it still transmits the correct bytes somehow. Now, the "skip_byte" would no longer be accounted to the stats. As previously, dma->tx_size included also this skip byte, up->icount.tx was updated by aforementioned uart_xmit_advance() in omap_8250_dma_tx_complete(). Fix this by using the uart_fifo_out() helper instead of bare kfifo_get(). Based on patch by Mans Rullgard Signed-off-by: "Jiri Slaby (SUSE)" Fixes: 1788cf6a91d9 ("tty: serial: switch from circ_buf to kfifo") Link: https://lore.kernel.org/all/20250506150748.3162-1-mans@mansr.com/ Reported-by: Mans Rullgard Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250522053835.3495975-1-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_omap.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 2a0ce11f405d2d..72ae08d6204ff4 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -1173,16 +1173,6 @@ static int omap_8250_tx_dma(struct uart_8250_port *p) return 0; } - sg_init_table(&sg, 1); - ret = kfifo_dma_out_prepare_mapped(&tport->xmit_fifo, &sg, 1, - UART_XMIT_SIZE, dma->tx_addr); - if (ret != 1) { - serial8250_clear_THRI(p); - return 0; - } - - dma->tx_size = sg_dma_len(&sg); - if (priv->habit & OMAP_DMA_TX_KICK) { unsigned char c; u8 tx_lvl; @@ -1207,18 +1197,22 @@ static int omap_8250_tx_dma(struct uart_8250_port *p) ret = -EBUSY; goto err; } - if (dma->tx_size < 4) { + if (kfifo_len(&tport->xmit_fifo) < 4) { ret = -EINVAL; goto err; } - if (!kfifo_get(&tport->xmit_fifo, &c)) { + if (!uart_fifo_out(&p->port, &c, 1)) { ret = -EINVAL; goto err; } skip_byte = c; - /* now we need to recompute due to kfifo_get */ - kfifo_dma_out_prepare_mapped(&tport->xmit_fifo, &sg, 1, - UART_XMIT_SIZE, dma->tx_addr); + } + + sg_init_table(&sg, 1); + ret = kfifo_dma_out_prepare_mapped(&tport->xmit_fifo, &sg, 1, UART_XMIT_SIZE, dma->tx_addr); + if (ret != 1) { + ret = -EINVAL; + goto err; } desc = dmaengine_prep_slave_sg(dma->txchan, &sg, 1, DMA_MEM_TO_DEV, @@ -1228,6 +1222,7 @@ static int omap_8250_tx_dma(struct uart_8250_port *p) goto err; } + dma->tx_size = sg_dma_len(&sg); dma->tx_running = 1; desc->callback = omap_8250_dma_tx_complete; From ec490d9e3278b83b1d0735bfbb40d0a7566f187e Mon Sep 17 00:00:00 2001 From: Jonathan Stroud Date: Fri, 16 May 2025 18:02:40 +0530 Subject: [PATCH 2811/2924] usb: misc: onboard_usb_dev: Fix usb5744 initialization sequence commit 1143d41922c0f87504f095417ba1870167970143 upstream. Introduce i2c APIs to read/write for proper configuration register programming. It ensures that read-modify-write sequence is performed and reserved bit in Runtime Flags 2 register are not touched. Also legacy smbus block write inserted an extra count value into the i2c data stream which breaks the register write on the usb5744. Switching to new read/write i2c APIs fixes both issues. Fixes: 6782311d04df ("usb: misc: onboard_usb_dev: add Microchip usb5744 SMBus programming support") Cc: stable Signed-off-by: Jonathan Stroud Co-developed-by: Radhey Shyam Pandey Signed-off-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/1747398760-284021-1-git-send-email-radhey.shyam.pandey@amd.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/onboard_usb_dev.c | 100 +++++++++++++++++++++++++---- 1 file changed, 87 insertions(+), 13 deletions(-) diff --git a/drivers/usb/misc/onboard_usb_dev.c b/drivers/usb/misc/onboard_usb_dev.c index f5372dfa241a9c..553f95ccb1c8e5 100644 --- a/drivers/usb/misc/onboard_usb_dev.c +++ b/drivers/usb/misc/onboard_usb_dev.c @@ -36,9 +36,10 @@ #define USB5744_CMD_CREG_ACCESS 0x99 #define USB5744_CMD_CREG_ACCESS_LSB 0x37 #define USB5744_CREG_MEM_ADDR 0x00 +#define USB5744_CREG_MEM_RD_ADDR 0x04 #define USB5744_CREG_WRITE 0x00 -#define USB5744_CREG_RUNTIMEFLAGS2 0x41 -#define USB5744_CREG_RUNTIMEFLAGS2_LSB 0x1D +#define USB5744_CREG_READ 0x01 +#define USB5744_CREG_RUNTIMEFLAGS2 0x411D #define USB5744_CREG_BYPASS_UDC_SUSPEND BIT(3) static void onboard_dev_attach_usb_driver(struct work_struct *work); @@ -309,11 +310,88 @@ static void onboard_dev_attach_usb_driver(struct work_struct *work) pr_err("Failed to attach USB driver: %pe\n", ERR_PTR(err)); } +static int onboard_dev_5744_i2c_read_byte(struct i2c_client *client, u16 addr, u8 *data) +{ + struct i2c_msg msg[2]; + u8 rd_buf[3]; + int ret; + + u8 wr_buf[7] = {0, USB5744_CREG_MEM_ADDR, 4, + USB5744_CREG_READ, 1, + addr >> 8 & 0xff, + addr & 0xff}; + msg[0].addr = client->addr; + msg[0].flags = 0; + msg[0].len = sizeof(wr_buf); + msg[0].buf = wr_buf; + + ret = i2c_transfer(client->adapter, msg, 1); + if (ret < 0) + return ret; + + wr_buf[0] = USB5744_CMD_CREG_ACCESS; + wr_buf[1] = USB5744_CMD_CREG_ACCESS_LSB; + wr_buf[2] = 0; + msg[0].len = 3; + + ret = i2c_transfer(client->adapter, msg, 1); + if (ret < 0) + return ret; + + wr_buf[0] = 0; + wr_buf[1] = USB5744_CREG_MEM_RD_ADDR; + msg[0].len = 2; + + msg[1].addr = client->addr; + msg[1].flags = I2C_M_RD; + msg[1].len = 2; + msg[1].buf = rd_buf; + + ret = i2c_transfer(client->adapter, msg, 2); + if (ret < 0) + return ret; + *data = rd_buf[1]; + + return 0; +} + +static int onboard_dev_5744_i2c_write_byte(struct i2c_client *client, u16 addr, u8 data) +{ + struct i2c_msg msg[2]; + int ret; + + u8 wr_buf[8] = {0, USB5744_CREG_MEM_ADDR, 5, + USB5744_CREG_WRITE, 1, + addr >> 8 & 0xff, + addr & 0xff, + data}; + msg[0].addr = client->addr; + msg[0].flags = 0; + msg[0].len = sizeof(wr_buf); + msg[0].buf = wr_buf; + + ret = i2c_transfer(client->adapter, msg, 1); + if (ret < 0) + return ret; + + msg[0].len = 3; + wr_buf[0] = USB5744_CMD_CREG_ACCESS; + wr_buf[1] = USB5744_CMD_CREG_ACCESS_LSB; + wr_buf[2] = 0; + + ret = i2c_transfer(client->adapter, msg, 1); + if (ret < 0) + return ret; + + return 0; +} + static int onboard_dev_5744_i2c_init(struct i2c_client *client) { #if IS_ENABLED(CONFIG_USB_ONBOARD_DEV_USB5744) struct device *dev = &client->dev; int ret; + u8 reg; /* * Set BYPASS_UDC_SUSPEND bit to ensure MCU is always enabled @@ -321,20 +399,16 @@ static int onboard_dev_5744_i2c_init(struct i2c_client *client) * The command writes 5 bytes to memory and single data byte in * configuration register. */ - char wr_buf[7] = {USB5744_CREG_MEM_ADDR, 5, - USB5744_CREG_WRITE, 1, - USB5744_CREG_RUNTIMEFLAGS2, - USB5744_CREG_RUNTIMEFLAGS2_LSB, - USB5744_CREG_BYPASS_UDC_SUSPEND}; - - ret = i2c_smbus_write_block_data(client, 0, sizeof(wr_buf), wr_buf); + ret = onboard_dev_5744_i2c_read_byte(client, + USB5744_CREG_RUNTIMEFLAGS2, ®); if (ret) - return dev_err_probe(dev, ret, "BYPASS_UDC_SUSPEND bit configuration failed\n"); + return dev_err_probe(dev, ret, "CREG_RUNTIMEFLAGS2 read failed\n"); - ret = i2c_smbus_write_word_data(client, USB5744_CMD_CREG_ACCESS, - USB5744_CMD_CREG_ACCESS_LSB); + reg |= USB5744_CREG_BYPASS_UDC_SUSPEND; + ret = onboard_dev_5744_i2c_write_byte(client, + USB5744_CREG_RUNTIMEFLAGS2, reg); if (ret) - return dev_err_probe(dev, ret, "Configuration Register Access Command failed\n"); + return dev_err_probe(dev, ret, "BYPASS_UDC_SUSPEND bit configuration failed\n"); /* Send SMBus command to boot hub. */ ret = i2c_smbus_write_word_data(client, USB5744_CMD_ATTACH, From f7cbf42ac5d6549f7df535686f229700c9cd155a Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Tue, 13 May 2025 05:30:09 +0000 Subject: [PATCH 2812/2924] usb: cdnsp: Fix issue with detecting command completion event commit f4ecdc352646f7d23f348e5c544dbe3212c94fc8 upstream. In some cases, there is a small-time gap in which CMD_RING_BUSY can be cleared by controller but adding command completion event to event ring will be delayed. As the result driver will return error code. This behavior has been detected on usbtest driver (test 9) with configuration including ep1in/ep1out bulk and ep2in/ep2out isoc endpoint. Probably this gap occurred because controller was busy with adding some other events to event ring. The CMD_RING_BUSY is cleared to '0' when the Command Descriptor has been executed and not when command completion event has been added to event ring. To fix this issue for this test the small delay is sufficient less than 10us) but to make sure the problem doesn't happen again in the future the patch introduces 10 retries to check with delay about 20us before returning error code. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Cc: stable Signed-off-by: Pawel Laszczak Acked-by: Peter Chen Link: https://lore.kernel.org/r/PH7PR07MB9538AA45362ACCF1B94EE9B7DD96A@PH7PR07MB9538.namprd07.prod.outlook.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-gadget.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c index 4824a10df07e7c..58650b7f417341 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.c +++ b/drivers/usb/cdns3/cdnsp-gadget.c @@ -547,6 +547,7 @@ int cdnsp_wait_for_cmd_compl(struct cdnsp_device *pdev) dma_addr_t cmd_deq_dma; union cdnsp_trb *event; u32 cycle_state; + u32 retry = 10; int ret, val; u64 cmd_dma; u32 flags; @@ -578,8 +579,23 @@ int cdnsp_wait_for_cmd_compl(struct cdnsp_device *pdev) flags = le32_to_cpu(event->event_cmd.flags); /* Check the owner of the TRB. */ - if ((flags & TRB_CYCLE) != cycle_state) + if ((flags & TRB_CYCLE) != cycle_state) { + /* + * Give some extra time to get chance controller + * to finish command before returning error code. + * Checking CMD_RING_BUSY is not sufficient because + * this bit is cleared to '0' when the Command + * Descriptor has been executed by controller + * and not when command completion event has + * be added to event ring. + */ + if (retry--) { + udelay(20); + continue; + } + return -EINVAL; + } cmd_dma = le64_to_cpu(event->event_cmd.cmd_trb); From 918f7d19d91d839c6efd92c6e289f35e187030e3 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Tue, 13 May 2025 06:54:03 +0000 Subject: [PATCH 2813/2924] usb: cdnsp: Fix issue with detecting USB 3.2 speed commit 2852788cfbe9ca1ab68509d65804413871f741f9 upstream. Patch adds support for detecting SuperSpeedPlus Gen1 x2 and SuperSpeedPlus Gen2 x2 speed. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Cc: stable Signed-off-by: Pawel Laszczak Acked-by: Peter Chen Link: https://lore.kernel.org/r/PH7PR07MB95387AD98EDCA695FECE52BADD96A@PH7PR07MB9538.namprd07.prod.outlook.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-gadget.c | 3 ++- drivers/usb/cdns3/cdnsp-gadget.h | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c index 58650b7f417341..55f95f41b3b4dd 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.c +++ b/drivers/usb/cdns3/cdnsp-gadget.c @@ -29,7 +29,8 @@ unsigned int cdnsp_port_speed(unsigned int port_status) { /*Detect gadget speed based on PORTSC register*/ - if (DEV_SUPERSPEEDPLUS(port_status)) + if (DEV_SUPERSPEEDPLUS(port_status) || + DEV_SSP_GEN1x2(port_status) || DEV_SSP_GEN2x2(port_status)) return USB_SPEED_SUPER_PLUS; else if (DEV_SUPERSPEED(port_status)) return USB_SPEED_SUPER; diff --git a/drivers/usb/cdns3/cdnsp-gadget.h b/drivers/usb/cdns3/cdnsp-gadget.h index 12534be52f39df..2afa3e558f85ca 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.h +++ b/drivers/usb/cdns3/cdnsp-gadget.h @@ -285,11 +285,15 @@ struct cdnsp_port_regs { #define XDEV_HS (0x3 << 10) #define XDEV_SS (0x4 << 10) #define XDEV_SSP (0x5 << 10) +#define XDEV_SSP1x2 (0x6 << 10) +#define XDEV_SSP2x2 (0x7 << 10) #define DEV_UNDEFSPEED(p) (((p) & DEV_SPEED_MASK) == (0x0 << 10)) #define DEV_FULLSPEED(p) (((p) & DEV_SPEED_MASK) == XDEV_FS) #define DEV_HIGHSPEED(p) (((p) & DEV_SPEED_MASK) == XDEV_HS) #define DEV_SUPERSPEED(p) (((p) & DEV_SPEED_MASK) == XDEV_SS) #define DEV_SUPERSPEEDPLUS(p) (((p) & DEV_SPEED_MASK) == XDEV_SSP) +#define DEV_SSP_GEN1x2(p) (((p) & DEV_SPEED_MASK) == XDEV_SSP1x2) +#define DEV_SSP_GEN2x2(p) (((p) & DEV_SPEED_MASK) == XDEV_SSP2x2) #define DEV_SUPERSPEED_ANY(p) (((p) & DEV_SPEED_MASK) >= XDEV_SS) #define DEV_PORT_SPEED(p) (((p) >> 10) & 0x0f) /* Port Link State Write Strobe - set this when changing link state */ From 1d494a703c160a8ddba75dc6be45dce66716419c Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Wed, 14 May 2025 16:25:20 +0300 Subject: [PATCH 2814/2924] usb: Flush altsetting 0 endpoints before reinitializating them after reset. commit 89bb3dc13ac29a563f4e4c555e422882f64742bd upstream. usb core avoids sending a Set-Interface altsetting 0 request after device reset, and instead relies on calling usb_disable_interface() and usb_enable_interface() to flush and reset host-side of those endpoints. xHCI hosts allocate and set up endpoint ring buffers and host_ep->hcpriv during usb_hcd_alloc_bandwidth() callback, which in this case is called before flushing the endpoint in usb_disable_interface(). Call usb_disable_interface() before usb_hcd_alloc_bandwidth() to ensure URBs are flushed before new ring buffers for the endpoints are allocated. Otherwise host driver will attempt to find and remove old stale URBs from a freshly allocated new ringbuffer. Cc: stable Fixes: 4fe0387afa89 ("USB: don't send Set-Interface after reset") Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20250514132520.225345-1-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 0e1dd6ef60a719..9f19fc7494e022 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -6133,6 +6133,7 @@ static int usb_reset_and_verify_device(struct usb_device *udev) struct usb_hub *parent_hub; struct usb_hcd *hcd = bus_to_hcd(udev->bus); struct usb_device_descriptor descriptor; + struct usb_interface *intf; struct usb_host_bos *bos; int i, j, ret = 0; int port1 = udev->portnum; @@ -6190,6 +6191,18 @@ static int usb_reset_and_verify_device(struct usb_device *udev) if (!udev->actconfig) goto done; + /* + * Some devices can't handle setting default altsetting 0 with a + * Set-Interface request. Disable host-side endpoints of those + * interfaces here. Enable and reset them back after host has set + * its internal endpoint structures during usb_hcd_alloc_bandwith() + */ + for (i = 0; i < udev->actconfig->desc.bNumInterfaces; i++) { + intf = udev->actconfig->interface[i]; + if (intf->cur_altsetting->desc.bAlternateSetting == 0) + usb_disable_interface(udev, intf, true); + } + mutex_lock(hcd->bandwidth_mutex); ret = usb_hcd_alloc_bandwidth(udev, udev->actconfig, NULL, NULL); if (ret < 0) { @@ -6221,12 +6234,11 @@ static int usb_reset_and_verify_device(struct usb_device *udev) */ for (i = 0; i < udev->actconfig->desc.bNumInterfaces; i++) { struct usb_host_config *config = udev->actconfig; - struct usb_interface *intf = config->interface[i]; struct usb_interface_descriptor *desc; + intf = config->interface[i]; desc = &intf->cur_altsetting->desc; if (desc->bAlternateSetting == 0) { - usb_disable_interface(udev, intf, true); usb_enable_interface(udev, intf, true); ret = 0; } else { From 8b19df96cd08abba6672a855b5adcea1e9c26dde Mon Sep 17 00:00:00 2001 From: Amit Sunil Dhamne Date: Fri, 2 May 2025 16:57:03 -0700 Subject: [PATCH 2815/2924] usb: typec: tcpm/tcpci_maxim: Fix bounds check in process_rx() commit 0736299d090f5c6a1032678705c4bc0a9511a3db upstream. Register read of TCPC_RX_BYTE_CNT returns the total size consisting of: PD message (pending read) size + 1 Byte for Frame Type (SOP*) This is validated against the max PD message (`struct pd_message`) size without accounting for the extra byte for the frame type. Note that the struct pd_message does not contain a field for the frame_type. This results in false negatives when the "PD message (pending read)" is equal to the max PD message size. Fixes: 6f413b559f86 ("usb: typec: tcpci_maxim: Chip level TCPC driver") Signed-off-by: Amit Sunil Dhamne Signed-off-by: Badhri Jagan Sridharan Reviewed-by: Kyle Tso Cc: stable Link: https://lore.kernel.org/stable/20250502-b4-new-fix-pd-rx-count-v1-1-e5711ed09b3d%40google.com Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20250502-b4-new-fix-pd-rx-count-v1-1-e5711ed09b3d@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpci_maxim_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpci_maxim_core.c b/drivers/usb/typec/tcpm/tcpci_maxim_core.c index fd1b8059336764..648311f5e3cf13 100644 --- a/drivers/usb/typec/tcpm/tcpci_maxim_core.c +++ b/drivers/usb/typec/tcpm/tcpci_maxim_core.c @@ -166,7 +166,8 @@ static void process_rx(struct max_tcpci_chip *chip, u16 status) return; } - if (count > sizeof(struct pd_message) || count + 1 > TCPC_RECEIVE_BUFFER_LEN) { + if (count > sizeof(struct pd_message) + 1 || + count + 1 > TCPC_RECEIVE_BUFFER_LEN) { dev_err(chip->dev, "Invalid TCPC_RX_BYTE_CNT %d\n", count); return; } From 1970d34b48cbeceb0c765984c9a6bb204c77f16a Mon Sep 17 00:00:00 2001 From: RD Babiera Date: Tue, 6 May 2025 23:28:53 +0000 Subject: [PATCH 2816/2924] usb: typec: tcpm: move tcpm_queue_vdm_unlocked to asynchronous work commit 324d45e53f1a36c88bc649dc39e0c8300a41be0a upstream. A state check was previously added to tcpm_queue_vdm_unlocked to prevent a deadlock where the DisplayPort Alt Mode driver would be executing work and attempting to grab the tcpm_lock while the TCPM was holding the lock and attempting to unregister the altmode, blocking on the altmode driver's cancel_work_sync call. Because the state check isn't protected, there is a small window where the Alt Mode driver could determine that the TCPM is in a ready state and attempt to grab the lock while the TCPM grabs the lock and changes the TCPM state to one that causes the deadlock. The callstack is provided below: [110121.667392][ C7] Call trace: [110121.667396][ C7] __switch_to+0x174/0x338 [110121.667406][ C7] __schedule+0x608/0x9f0 [110121.667414][ C7] schedule+0x7c/0xe8 [110121.667423][ C7] kernfs_drain+0xb0/0x114 [110121.667431][ C7] __kernfs_remove+0x16c/0x20c [110121.667436][ C7] kernfs_remove_by_name_ns+0x74/0xe8 [110121.667442][ C7] sysfs_remove_group+0x84/0xe8 [110121.667450][ C7] sysfs_remove_groups+0x34/0x58 [110121.667458][ C7] device_remove_groups+0x10/0x20 [110121.667464][ C7] device_release_driver_internal+0x164/0x2e4 [110121.667475][ C7] device_release_driver+0x18/0x28 [110121.667484][ C7] bus_remove_device+0xec/0x118 [110121.667491][ C7] device_del+0x1e8/0x4ac [110121.667498][ C7] device_unregister+0x18/0x38 [110121.667504][ C7] typec_unregister_altmode+0x30/0x44 [110121.667515][ C7] tcpm_reset_port+0xac/0x370 [110121.667523][ C7] tcpm_snk_detach+0x84/0xb8 [110121.667529][ C7] run_state_machine+0x4c0/0x1b68 [110121.667536][ C7] tcpm_state_machine_work+0x94/0xe4 [110121.667544][ C7] kthread_worker_fn+0x10c/0x244 [110121.667552][ C7] kthread+0x104/0x1d4 [110121.667557][ C7] ret_from_fork+0x10/0x20 [110121.667689][ C7] Workqueue: events dp_altmode_work [110121.667697][ C7] Call trace: [110121.667701][ C7] __switch_to+0x174/0x338 [110121.667710][ C7] __schedule+0x608/0x9f0 [110121.667717][ C7] schedule+0x7c/0xe8 [110121.667725][ C7] schedule_preempt_disabled+0x24/0x40 [110121.667733][ C7] __mutex_lock+0x408/0xdac [110121.667741][ C7] __mutex_lock_slowpath+0x14/0x24 [110121.667748][ C7] mutex_lock+0x40/0xec [110121.667757][ C7] tcpm_altmode_enter+0x78/0xb4 [110121.667764][ C7] typec_altmode_enter+0xdc/0x10c [110121.667769][ C7] dp_altmode_work+0x68/0x164 [110121.667775][ C7] process_one_work+0x1e4/0x43c [110121.667783][ C7] worker_thread+0x25c/0x430 [110121.667789][ C7] kthread+0x104/0x1d4 [110121.667794][ C7] ret_from_fork+0x10/0x20 Change tcpm_queue_vdm_unlocked to queue for tcpm_queue_vdm_work, which can perform the state check while holding the TCPM lock while the Alt Mode lock is no longer held. This requires a new struct to hold the vdm data, altmode_vdm_event. Fixes: cdc9946ea637 ("usb: typec: tcpm: enforce ready state when queueing alt mode vdm") Cc: stable Signed-off-by: RD Babiera Reviewed-by: Heikki Krogerus Reviewed-by: Badhri Jagan Sridharan Link: https://lore.kernel.org/r/20250506232853.1968304-2-rdbabiera@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 91 +++++++++++++++++++++++++++-------- 1 file changed, 71 insertions(+), 20 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 8adf6f95463304..214d45f8e55c21 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -596,6 +596,15 @@ struct pd_rx_event { enum tcpm_transmit_type rx_sop_type; }; +struct altmode_vdm_event { + struct kthread_work work; + struct tcpm_port *port; + u32 header; + u32 *data; + int cnt; + enum tcpm_transmit_type tx_sop_type; +}; + static const char * const pd_rev[] = { [PD_REV10] = "rev1", [PD_REV20] = "rev2", @@ -1608,18 +1617,68 @@ static void tcpm_queue_vdm(struct tcpm_port *port, const u32 header, mod_vdm_delayed_work(port, 0); } -static void tcpm_queue_vdm_unlocked(struct tcpm_port *port, const u32 header, - const u32 *data, int cnt, enum tcpm_transmit_type tx_sop_type) +static void tcpm_queue_vdm_work(struct kthread_work *work) { - if (port->state != SRC_READY && port->state != SNK_READY && - port->state != SRC_VDM_IDENTITY_REQUEST) - return; + struct altmode_vdm_event *event = container_of(work, + struct altmode_vdm_event, + work); + struct tcpm_port *port = event->port; mutex_lock(&port->lock); - tcpm_queue_vdm(port, header, data, cnt, tx_sop_type); + if (port->state != SRC_READY && port->state != SNK_READY && + port->state != SRC_VDM_IDENTITY_REQUEST) { + tcpm_log_force(port, "dropping altmode_vdm_event"); + goto port_unlock; + } + + tcpm_queue_vdm(port, event->header, event->data, event->cnt, event->tx_sop_type); + +port_unlock: + kfree(event->data); + kfree(event); mutex_unlock(&port->lock); } +static int tcpm_queue_vdm_unlocked(struct tcpm_port *port, const u32 header, + const u32 *data, int cnt, enum tcpm_transmit_type tx_sop_type) +{ + struct altmode_vdm_event *event; + u32 *data_cpy; + int ret = -ENOMEM; + + event = kzalloc(sizeof(*event), GFP_KERNEL); + if (!event) + goto err_event; + + data_cpy = kcalloc(cnt, sizeof(u32), GFP_KERNEL); + if (!data_cpy) + goto err_data; + + kthread_init_work(&event->work, tcpm_queue_vdm_work); + event->port = port; + event->header = header; + memcpy(data_cpy, data, sizeof(u32) * cnt); + event->data = data_cpy; + event->cnt = cnt; + event->tx_sop_type = tx_sop_type; + + ret = kthread_queue_work(port->wq, &event->work); + if (!ret) { + ret = -EBUSY; + goto err_queue; + } + + return 0; + +err_queue: + kfree(data_cpy); +err_data: + kfree(event); +err_event: + tcpm_log_force(port, "failed to queue altmode vdm, err:%d", ret); + return ret; +} + static void svdm_consume_identity(struct tcpm_port *port, const u32 *p, int cnt) { u32 vdo = p[VDO_INDEX_IDH]; @@ -2830,8 +2889,7 @@ static int tcpm_altmode_enter(struct typec_altmode *altmode, u32 *vdo) header = VDO(altmode->svid, vdo ? 2 : 1, svdm_version, CMD_ENTER_MODE); header |= VDO_OPOS(altmode->mode); - tcpm_queue_vdm_unlocked(port, header, vdo, vdo ? 1 : 0, TCPC_TX_SOP); - return 0; + return tcpm_queue_vdm_unlocked(port, header, vdo, vdo ? 1 : 0, TCPC_TX_SOP); } static int tcpm_altmode_exit(struct typec_altmode *altmode) @@ -2847,8 +2905,7 @@ static int tcpm_altmode_exit(struct typec_altmode *altmode) header = VDO(altmode->svid, 1, svdm_version, CMD_EXIT_MODE); header |= VDO_OPOS(altmode->mode); - tcpm_queue_vdm_unlocked(port, header, NULL, 0, TCPC_TX_SOP); - return 0; + return tcpm_queue_vdm_unlocked(port, header, NULL, 0, TCPC_TX_SOP); } static int tcpm_altmode_vdm(struct typec_altmode *altmode, @@ -2856,9 +2913,7 @@ static int tcpm_altmode_vdm(struct typec_altmode *altmode, { struct tcpm_port *port = typec_altmode_get_drvdata(altmode); - tcpm_queue_vdm_unlocked(port, header, data, count - 1, TCPC_TX_SOP); - - return 0; + return tcpm_queue_vdm_unlocked(port, header, data, count - 1, TCPC_TX_SOP); } static const struct typec_altmode_ops tcpm_altmode_ops = { @@ -2882,8 +2937,7 @@ static int tcpm_cable_altmode_enter(struct typec_altmode *altmode, enum typec_pl header = VDO(altmode->svid, vdo ? 2 : 1, svdm_version, CMD_ENTER_MODE); header |= VDO_OPOS(altmode->mode); - tcpm_queue_vdm_unlocked(port, header, vdo, vdo ? 1 : 0, TCPC_TX_SOP_PRIME); - return 0; + return tcpm_queue_vdm_unlocked(port, header, vdo, vdo ? 1 : 0, TCPC_TX_SOP_PRIME); } static int tcpm_cable_altmode_exit(struct typec_altmode *altmode, enum typec_plug_index sop) @@ -2899,8 +2953,7 @@ static int tcpm_cable_altmode_exit(struct typec_altmode *altmode, enum typec_plu header = VDO(altmode->svid, 1, svdm_version, CMD_EXIT_MODE); header |= VDO_OPOS(altmode->mode); - tcpm_queue_vdm_unlocked(port, header, NULL, 0, TCPC_TX_SOP_PRIME); - return 0; + return tcpm_queue_vdm_unlocked(port, header, NULL, 0, TCPC_TX_SOP_PRIME); } static int tcpm_cable_altmode_vdm(struct typec_altmode *altmode, enum typec_plug_index sop, @@ -2908,9 +2961,7 @@ static int tcpm_cable_altmode_vdm(struct typec_altmode *altmode, enum typec_plug { struct tcpm_port *port = typec_altmode_get_drvdata(altmode); - tcpm_queue_vdm_unlocked(port, header, data, count - 1, TCPC_TX_SOP_PRIME); - - return 0; + return tcpm_queue_vdm_unlocked(port, header, data, count - 1, TCPC_TX_SOP_PRIME); } static const struct typec_cable_ops tcpm_cable_ops = { From 38363fcc3e133648ce1d44aaa8cdae5add7a4bd5 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 2 Apr 2025 15:59:55 +0100 Subject: [PATCH 2817/2924] 9p: Add a migrate_folio method commit 03ddd7725ed1b39cf9251e1a420559f25dac49b3 upstream. The migration code used to be able to migrate dirty 9p folios by writing them back using writepage. When the writepage method was removed, we neglected to add a migrate_folio method, which means that dirty 9p folios have been unmovable ever since. This reduced our success at defragmenting memory on machines which use 9p heavily. Fixes: 80105ed2fd27 (9p: Use netfslib read/write_iter) Cc: stable@vger.kernel.org Cc: David Howells Cc: v9fs@lists.linux.dev Signed-off-by: "Matthew Wilcox (Oracle)" Link: https://lore.kernel.org/r/20250402150005.2309458-2-willy@infradead.org Acked-by: Dominique Martinet Reviewed-by: David Howells Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- fs/9p/vfs_addr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index e4420591cf3543..862164181baca1 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -165,4 +165,5 @@ const struct address_space_operations v9fs_addr_operations = { .invalidate_folio = netfs_invalidate_folio, .direct_IO = noop_direct_IO, .writepages = netfs_writepages, + .migrate_folio = filemap_migrate_folio, }; From 02689a8961869170600321a9de819351cc9e69be Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 23 May 2025 19:20:36 -0400 Subject: [PATCH 2818/2924] Don't propagate mounts into detached trees commit 3b5260d12b1fe76b566fe182de8abc586b827ed0 upstream. All versions up to 6.14 did not propagate mount events into detached tree. Shortly after 6.14 a merge of vfs-6.15-rc1.mount.namespace (130e696aa68b) has changed that. Unfortunately, that has caused userland regressions (reported in https://lore.kernel.org/all/CAOYeF9WQhFDe+BGW=Dp5fK8oRy5AgZ6zokVyTj1Wp4EUiYgt4w@mail.gmail.com/) Straight revert wouldn't be an option - in particular, the variant in 6.14 had a bug that got fixed in d1ddc6f1d9f0 ("fix IS_MNT_PROPAGATING uses") and we don't want to bring the bug back. This is a modification of manual revert posted by Christian, with changes needed to avoid reintroducing the breakage in scenario described in d1ddc6f1d9f0. Cc: stable@vger.kernel.org Reported-by: Allison Karlitskaya Tested-by: Allison Karlitskaya Acked-by: Christian Brauner Co-developed-by: Christian Brauner Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/mount.h | 5 ----- fs/namespace.c | 15 ++------------- fs/pnode.c | 4 ++-- 3 files changed, 4 insertions(+), 20 deletions(-) diff --git a/fs/mount.h b/fs/mount.h index 7aecf2a6047232..ad7173037924a8 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -7,10 +7,6 @@ extern struct list_head notify_list; -typedef __u32 __bitwise mntns_flags_t; - -#define MNTNS_PROPAGATING ((__force mntns_flags_t)(1 << 0)) - struct mnt_namespace { struct ns_common ns; struct mount * root; @@ -37,7 +33,6 @@ struct mnt_namespace { struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */ struct list_head mnt_ns_list; /* entry in the sequential list of mounts namespace */ refcount_t passive; /* number references not pinning @mounts */ - mntns_flags_t mntns_flags; } __randomize_layout; struct mnt_pcp { diff --git a/fs/namespace.c b/fs/namespace.c index 2de4b7ad1dc5d6..a32f43b7eb08d4 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3673,7 +3673,7 @@ static int do_move_mount(struct path *old_path, if (!(attached ? check_mnt(old) : is_anon_ns(ns))) goto out; - if (is_anon_ns(ns)) { + if (is_anon_ns(ns) && ns == p->mnt_ns) { /* * Ending up with two files referring to the root of the * same anonymous mount namespace would cause an error @@ -3681,16 +3681,7 @@ static int do_move_mount(struct path *old_path, * twice into the mount tree which would be rejected * later. But be explicit about it right here. */ - if ((is_anon_ns(p->mnt_ns) && ns == p->mnt_ns)) - goto out; - - /* - * If this is an anonymous mount tree ensure that mount - * propagation can detect mounts that were just - * propagated to the target mount tree so we don't - * propagate onto them. - */ - ns->mntns_flags |= MNTNS_PROPAGATING; + goto out; } else if (is_anon_ns(p->mnt_ns)) { /* * Don't allow moving an attached mount tree to an @@ -3747,8 +3738,6 @@ static int do_move_mount(struct path *old_path, if (attached) put_mountpoint(old_mp); out: - if (is_anon_ns(ns)) - ns->mntns_flags &= ~MNTNS_PROPAGATING; unlock_mount(mp); if (!err) { if (attached) { diff --git a/fs/pnode.c b/fs/pnode.c index fb77427df39e2e..ffd429b760d5d4 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -231,8 +231,8 @@ static int propagate_one(struct mount *m, struct mountpoint *dest_mp) /* skip if mountpoint isn't visible in m */ if (!is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) return 0; - /* skip if m is in the anon_ns we are emptying */ - if (m->mnt_ns->mntns_flags & MNTNS_PROPAGATING) + /* skip if m is in the anon_ns */ + if (is_anon_ns(m->mnt_ns)) return 0; if (peers(m, last_dest)) { From 0a471b280481dd63f03801d3b122ff5f6092b981 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 May 2025 07:28:52 -0600 Subject: [PATCH 2819/2924] mm/filemap: gate dropbehind invalidate on folio !dirty && !writeback commit 095f627add86a6ddda2c2cfd563b0ee05d0172b2 upstream. It's possible for the folio to either get marked for writeback or redirtied. Add a helper, filemap_end_dropbehind(), which guards the folio_unmap_invalidate() call behind check for the folio being both non-dirty and not under writeback AFTER the folio lock has been acquired. Use this helper folio_end_dropbehind_write(). Cc: stable@vger.kernel.org Reported-by: Al Viro Fixes: fb7d3bc41493 ("mm/filemap: drop streaming/uncached pages when writeback completes") Link: https://lore.kernel.org/linux-fsdevel/20250525083209.GS2023217@ZenIV/ Signed-off-by: Jens Axboe Link: https://lore.kernel.org/20250527133255.452431-2-axboe@kernel.dk Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- mm/filemap.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 7b90cbeb4a1adf..008a55290f34a9 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1589,6 +1589,16 @@ int folio_wait_private_2_killable(struct folio *folio) } EXPORT_SYMBOL(folio_wait_private_2_killable); +static void filemap_end_dropbehind(struct folio *folio) +{ + struct address_space *mapping = folio->mapping; + + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + + if (mapping && !folio_test_writeback(folio) && !folio_test_dirty(folio)) + folio_unmap_invalidate(mapping, folio, 0); +} + /* * If folio was marked as dropbehind, then pages should be dropped when writeback * completes. Do that now. If we fail, it's likely because of a big folio - @@ -1604,8 +1614,7 @@ static void folio_end_dropbehind_write(struct folio *folio) * invalidation in that case. */ if (in_task() && folio_trylock(folio)) { - if (folio->mapping) - folio_unmap_invalidate(folio->mapping, folio, 0); + filemap_end_dropbehind(folio); folio_unlock(folio); } } From 411bf2a07a19b9f531ae5e43b05a036b41f59946 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 May 2025 07:28:53 -0600 Subject: [PATCH 2820/2924] mm/filemap: use filemap_end_dropbehind() for read invalidation commit 25b065a744ff0c1099bb357be1c40030b5a14c07 upstream. Use the filemap_end_dropbehind() helper rather than calling folio_unmap_invalidate() directly, as we need to check if the folio has been redirtied or marked for writeback once the folio lock has been re-acquired. Cc: stable@vger.kernel.org Reported-by: Trond Myklebust Fixes: 8026e49bff9b ("mm/filemap: add read support for RWF_DONTCACHE") Link: https://lore.kernel.org/linux-fsdevel/ba8a9805331ce258a622feaca266b163db681a10.camel@hammerspace.com/ Signed-off-by: Jens Axboe Link: https://lore.kernel.org/20250527133255.452431-3-axboe@kernel.dk Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- mm/filemap.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 008a55290f34a9..6af6d8f2929ce4 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2644,8 +2644,7 @@ static inline bool pos_same_folio(loff_t pos1, loff_t pos2, struct folio *folio) return (pos1 >> shift == pos2 >> shift); } -static void filemap_end_dropbehind_read(struct address_space *mapping, - struct folio *folio) +static void filemap_end_dropbehind_read(struct folio *folio) { if (!folio_test_dropbehind(folio)) return; @@ -2653,7 +2652,7 @@ static void filemap_end_dropbehind_read(struct address_space *mapping, return; if (folio_trylock(folio)) { if (folio_test_clear_dropbehind(folio)) - folio_unmap_invalidate(mapping, folio, 0); + filemap_end_dropbehind(folio); folio_unlock(folio); } } @@ -2774,7 +2773,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter, for (i = 0; i < folio_batch_count(&fbatch); i++) { struct folio *folio = fbatch.folios[i]; - filemap_end_dropbehind_read(mapping, folio); + filemap_end_dropbehind_read(folio); folio_put(folio); } folio_batch_init(&fbatch); From e018053632bad8ee0752242c7d2cffb0bbf45404 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 28 May 2025 12:15:55 -0400 Subject: [PATCH 2821/2924] ring-buffer: Do not trigger WARN_ON() due to a commit_overrun commit 4fc78a7c9ca994e1da5d3940704d4e8f0ea8c5e4 upstream. When reading a memory mapped buffer the reader page is just swapped out with the last page written in the write buffer. If the reader page is the same as the commit buffer (the buffer that is currently being written to) it was assumed that it should never have missed events. If it does, it triggers a WARN_ON_ONCE(). But there just happens to be one scenario where this can legitimately happen. That is on a commit_overrun. A commit overrun is when an interrupt preempts an event being written to the buffer and then the interrupt adds so many new events that it fills and wraps the buffer back to the commit. Any new events would then be dropped and be reported as "missed_events". In this case, the next page to read is the commit buffer and after the swap of the reader page, the reader page will be the commit buffer, but this time there will be missed events and this triggers the following warning: ------------[ cut here ]------------ WARNING: CPU: 2 PID: 1127 at kernel/trace/ring_buffer.c:7357 ring_buffer_map_get_reader+0x49a/0x780 Modules linked in: kvm_intel kvm irqbypass CPU: 2 UID: 0 PID: 1127 Comm: trace-cmd Not tainted 6.15.0-rc7-test-00004-g478bc2824b45-dirty #564 PREEMPT Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 RIP: 0010:ring_buffer_map_get_reader+0x49a/0x780 Code: 00 00 00 48 89 fe 48 c1 ee 03 80 3c 2e 00 0f 85 ec 01 00 00 4d 3b a6 a8 00 00 00 0f 85 8a fd ff ff 48 85 c0 0f 84 55 fe ff ff <0f> 0b e9 4e fe ff ff be 08 00 00 00 4c 89 54 24 58 48 89 54 24 50 RSP: 0018:ffff888121787dc0 EFLAGS: 00010002 RAX: 00000000000006a2 RBX: ffff888100062800 RCX: ffffffff8190cb49 RDX: ffff888126934c00 RSI: 1ffff11020200a15 RDI: ffff8881010050a8 RBP: dffffc0000000000 R08: 0000000000000000 R09: ffffed1024d26982 R10: ffff888126934c17 R11: ffff8881010050a8 R12: ffff888126934c00 R13: ffff8881010050b8 R14: ffff888101005000 R15: ffff888126930008 FS: 00007f95c8cd7540(0000) GS:ffff8882b576e000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f95c8de4dc0 CR3: 0000000128452002 CR4: 0000000000172ef0 Call Trace: ? __pfx_ring_buffer_map_get_reader+0x10/0x10 tracing_buffers_ioctl+0x283/0x370 __x64_sys_ioctl+0x134/0x190 do_syscall_64+0x79/0x1c0 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7f95c8de48db Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <89> c2 3d 00 f0 ff ff 77 1c 48 8b 44 24 18 64 48 2b 04 25 28 00 00 RSP: 002b:00007ffe037ba110 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007ffe037bb2b0 RCX: 00007f95c8de48db RDX: 0000000000000000 RSI: 0000000000005220 RDI: 0000000000000006 RBP: 00007ffe037ba180 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffe037bb6f8 R14: 00007f95c9065000 R15: 00005575c7492c90 irq event stamp: 5080 hardirqs last enabled at (5079): [] _raw_spin_unlock_irqrestore+0x50/0x70 hardirqs last disabled at (5080): [] _raw_spin_lock_irqsave+0x63/0x70 softirqs last enabled at (4182): [] handle_softirqs+0x552/0x710 softirqs last disabled at (4159): [] __irq_exit_rcu+0x107/0x210 ---[ end trace 0000000000000000 ]--- The above was triggered by running on a kernel with both lockdep and KASAN as well as kmemleak enabled and executing the following command: # perf record -o perf-test.dat -a -- trace-cmd record --nosplice -e all -p function hackbench 50 With perf interjecting a lot of interrupts and trace-cmd enabling all events as well as function tracing, with lockdep, KASAN and kmemleak enabled, it could cause an interrupt preempting an event being written to add enough events to wrap the buffer. trace-cmd was modified to have --nosplice use mmap instead of reading the buffer. The way to differentiate this case from the normal case of there only being one page written to where the swap of the reader page received that one page (which is the commit page), check if the tail page is on the reader page. The difference between the commit page and the tail page is that the tail page is where new writes go to, and the commit page holds the first write that hasn't been committed yet. In the case of an interrupt preempting the write of an event and filling the buffer, it would move the tail page but not the commit page. Have the warning only trigger if the tail page is also on the reader page, and also print out the number of events dropped by a commit overrun as that can not yet be safely added to the page so that the reader can see there were events dropped. Cc: stable@vger.kernel.org Cc: Mathieu Desnoyers Cc: Vincent Donnefort Link: https://lore.kernel.org/20250528121555.2066527e@gandalf.local.home Fixes: fe832be05a8ee ("ring-buffer: Have mmapped ring buffer keep track of missed events") Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 3f9bf562beea23..36ca5e7b3c59b2 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -7284,8 +7284,8 @@ int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu) /* Check if any events were dropped */ missed_events = cpu_buffer->lost_events; - if (cpu_buffer->reader_page != cpu_buffer->commit_page) { - if (missed_events) { + if (missed_events) { + if (cpu_buffer->reader_page != cpu_buffer->commit_page) { struct buffer_data_page *bpage = reader->page; unsigned int commit; /* @@ -7306,13 +7306,23 @@ int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu) local_add(RB_MISSED_STORED, &bpage->commit); } local_add(RB_MISSED_EVENTS, &bpage->commit); + } else if (!WARN_ONCE(cpu_buffer->reader_page == cpu_buffer->tail_page, + "Reader on commit with %ld missed events", + missed_events)) { + /* + * There shouldn't be any missed events if the tail_page + * is on the reader page. But if the tail page is not on the + * reader page and the commit_page is, that would mean that + * there's a commit_overrun (an interrupt preempted an + * addition of an event and then filled the buffer + * with new events). In this case it's not an + * error, but it should still be reported. + * + * TODO: Add missed events to the page for user space to know. + */ + pr_info("Ring buffer [%d] commit overrun lost %ld events at timestamp:%lld\n", + cpu, missed_events, cpu_buffer->reader_page->page->time_stamp); } - } else { - /* - * There really shouldn't be any missed events if the commit - * is on the reader page. - */ - WARN_ON_ONCE(missed_events); } cpu_buffer->lost_events = 0; From 0fc9a295cd8e59c3636e97395e7c74a9c89fee42 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Fri, 6 Jun 2025 14:22:42 +0300 Subject: [PATCH 2822/2924] ring-buffer: Fix buffer locking in ring_buffer_subbuf_order_set() commit 40ee2afafc1d9fe3aa44a6fbe440d78a5c96a72e upstream. Enlarge the critical section in ring_buffer_subbuf_order_set() to ensure that error handling takes place with per-buffer mutex held, thus preventing list corruption and other concurrency-related issues. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Tzvetomir Stoyanov Link: https://lore.kernel.org/20250606112242.1510605-1-dmantipov@yandex.ru Reported-by: syzbot+05d673e83ec640f0ced9@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=05d673e83ec640f0ced9 Fixes: f9b94daa542a8 ("ring-buffer: Set new size of the ring buffer sub page") Signed-off-by: Dmitry Antipov Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 36ca5e7b3c59b2..03bb0ee7bfe9f8 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -6764,7 +6764,7 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order) old_size = buffer->subbuf_size; /* prevent another thread from changing buffer sizes */ - mutex_lock(&buffer->mutex); + guard(mutex)(&buffer->mutex); atomic_inc(&buffer->record_disabled); /* Make sure all commits have finished */ @@ -6869,7 +6869,6 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order) } atomic_dec(&buffer->record_disabled); - mutex_unlock(&buffer->mutex); return 0; @@ -6878,7 +6877,6 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order) buffer->subbuf_size = old_size; atomic_dec(&buffer->record_disabled); - mutex_unlock(&buffer->mutex); for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; From ea97c04a29dc9f6e4d8623d445eb3cf63abf4f4c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 27 May 2025 10:58:20 -0400 Subject: [PATCH 2823/2924] ring-buffer: Move cpus_read_lock() outside of buffer->mutex commit c98cc9797b7009308fff73d41bc1d08642dab77a upstream. Running a modified trace-cmd record --nosplice where it does a mmap of the ring buffer when '--nosplice' is set, caused the following lockdep splat: ====================================================== WARNING: possible circular locking dependency detected 6.15.0-rc7-test-00002-gfb7d03d8a82f #551 Not tainted ------------------------------------------------------ trace-cmd/1113 is trying to acquire lock: ffff888100062888 (&buffer->mutex){+.+.}-{4:4}, at: ring_buffer_map+0x11c/0xe70 but task is already holding lock: ffff888100a5f9f8 (&cpu_buffer->mapping_lock){+.+.}-{4:4}, at: ring_buffer_map+0xcf/0xe70 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #5 (&cpu_buffer->mapping_lock){+.+.}-{4:4}: __mutex_lock+0x192/0x18c0 ring_buffer_map+0xcf/0xe70 tracing_buffers_mmap+0x1c4/0x3b0 __mmap_region+0xd8d/0x1f70 do_mmap+0x9d7/0x1010 vm_mmap_pgoff+0x20b/0x390 ksys_mmap_pgoff+0x2e9/0x440 do_syscall_64+0x79/0x1c0 entry_SYSCALL_64_after_hwframe+0x76/0x7e -> #4 (&mm->mmap_lock){++++}-{4:4}: __might_fault+0xa5/0x110 _copy_to_user+0x22/0x80 _perf_ioctl+0x61b/0x1b70 perf_ioctl+0x62/0x90 __x64_sys_ioctl+0x134/0x190 do_syscall_64+0x79/0x1c0 entry_SYSCALL_64_after_hwframe+0x76/0x7e -> #3 (&cpuctx_mutex){+.+.}-{4:4}: __mutex_lock+0x192/0x18c0 perf_event_init_cpu+0x325/0x7c0 perf_event_init+0x52a/0x5b0 start_kernel+0x263/0x3e0 x86_64_start_reservations+0x24/0x30 x86_64_start_kernel+0x95/0xa0 common_startup_64+0x13e/0x141 -> #2 (pmus_lock){+.+.}-{4:4}: __mutex_lock+0x192/0x18c0 perf_event_init_cpu+0xb7/0x7c0 cpuhp_invoke_callback+0x2c0/0x1030 __cpuhp_invoke_callback_range+0xbf/0x1f0 _cpu_up+0x2e7/0x690 cpu_up+0x117/0x170 cpuhp_bringup_mask+0xd5/0x120 bringup_nonboot_cpus+0x13d/0x170 smp_init+0x2b/0xf0 kernel_init_freeable+0x441/0x6d0 kernel_init+0x1e/0x160 ret_from_fork+0x34/0x70 ret_from_fork_asm+0x1a/0x30 -> #1 (cpu_hotplug_lock){++++}-{0:0}: cpus_read_lock+0x2a/0xd0 ring_buffer_resize+0x610/0x14e0 __tracing_resize_ring_buffer.part.0+0x42/0x120 tracing_set_tracer+0x7bd/0xa80 tracing_set_trace_write+0x132/0x1e0 vfs_write+0x21c/0xe80 ksys_write+0xf9/0x1c0 do_syscall_64+0x79/0x1c0 entry_SYSCALL_64_after_hwframe+0x76/0x7e -> #0 (&buffer->mutex){+.+.}-{4:4}: __lock_acquire+0x1405/0x2210 lock_acquire+0x174/0x310 __mutex_lock+0x192/0x18c0 ring_buffer_map+0x11c/0xe70 tracing_buffers_mmap+0x1c4/0x3b0 __mmap_region+0xd8d/0x1f70 do_mmap+0x9d7/0x1010 vm_mmap_pgoff+0x20b/0x390 ksys_mmap_pgoff+0x2e9/0x440 do_syscall_64+0x79/0x1c0 entry_SYSCALL_64_after_hwframe+0x76/0x7e other info that might help us debug this: Chain exists of: &buffer->mutex --> &mm->mmap_lock --> &cpu_buffer->mapping_lock Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&cpu_buffer->mapping_lock); lock(&mm->mmap_lock); lock(&cpu_buffer->mapping_lock); lock(&buffer->mutex); *** DEADLOCK *** 2 locks held by trace-cmd/1113: #0: ffff888106b847e0 (&mm->mmap_lock){++++}-{4:4}, at: vm_mmap_pgoff+0x192/0x390 #1: ffff888100a5f9f8 (&cpu_buffer->mapping_lock){+.+.}-{4:4}, at: ring_buffer_map+0xcf/0xe70 stack backtrace: CPU: 5 UID: 0 PID: 1113 Comm: trace-cmd Not tainted 6.15.0-rc7-test-00002-gfb7d03d8a82f #551 PREEMPT Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 Call Trace: dump_stack_lvl+0x6e/0xa0 print_circular_bug.cold+0x178/0x1be check_noncircular+0x146/0x160 __lock_acquire+0x1405/0x2210 lock_acquire+0x174/0x310 ? ring_buffer_map+0x11c/0xe70 ? ring_buffer_map+0x11c/0xe70 ? __mutex_lock+0x169/0x18c0 __mutex_lock+0x192/0x18c0 ? ring_buffer_map+0x11c/0xe70 ? ring_buffer_map+0x11c/0xe70 ? function_trace_call+0x296/0x370 ? __pfx___mutex_lock+0x10/0x10 ? __pfx_function_trace_call+0x10/0x10 ? __pfx___mutex_lock+0x10/0x10 ? _raw_spin_unlock+0x2d/0x50 ? ring_buffer_map+0x11c/0xe70 ? ring_buffer_map+0x11c/0xe70 ? __mutex_lock+0x5/0x18c0 ring_buffer_map+0x11c/0xe70 ? do_raw_spin_lock+0x12d/0x270 ? find_held_lock+0x2b/0x80 ? _raw_spin_unlock+0x2d/0x50 ? rcu_is_watching+0x15/0xb0 ? _raw_spin_unlock+0x2d/0x50 ? trace_preempt_on+0xd0/0x110 tracing_buffers_mmap+0x1c4/0x3b0 __mmap_region+0xd8d/0x1f70 ? ring_buffer_lock_reserve+0x99/0xff0 ? __pfx___mmap_region+0x10/0x10 ? ring_buffer_lock_reserve+0x99/0xff0 ? __pfx_ring_buffer_lock_reserve+0x10/0x10 ? __pfx_ring_buffer_lock_reserve+0x10/0x10 ? bpf_lsm_mmap_addr+0x4/0x10 ? security_mmap_addr+0x46/0xd0 ? lock_is_held_type+0xd9/0x130 do_mmap+0x9d7/0x1010 ? 0xffffffffc0370095 ? __pfx_do_mmap+0x10/0x10 vm_mmap_pgoff+0x20b/0x390 ? __pfx_vm_mmap_pgoff+0x10/0x10 ? 0xffffffffc0370095 ksys_mmap_pgoff+0x2e9/0x440 do_syscall_64+0x79/0x1c0 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7fb0963a7de2 Code: 00 00 00 0f 1f 44 00 00 41 f7 c1 ff 0f 00 00 75 27 55 89 cd 53 48 89 fb 48 85 ff 74 3b 41 89 ea 48 89 df b8 09 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 5b 5d c3 0f 1f 00 48 8b 05 e1 9f 0d 00 64 RSP: 002b:00007ffdcc8fb878 EFLAGS: 00000246 ORIG_RAX: 0000000000000009 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fb0963a7de2 RDX: 0000000000000001 RSI: 0000000000001000 RDI: 0000000000000000 RBP: 0000000000000001 R08: 0000000000000006 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffdcc8fbe68 R14: 00007fb096628000 R15: 00005633e01a5c90 The issue is that cpus_read_lock() is taken within buffer->mutex. The memory mapped pages are taken with the mmap_lock held. The buffer->mutex is taken within the cpu_buffer->mapping_lock. There's quite a chain with all these locks, where the deadlock can be fixed by moving the cpus_read_lock() outside the taking of the buffer->mutex. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Vincent Donnefort Link: https://lore.kernel.org/20250527105820.0f45d045@gandalf.local.home Fixes: 117c39200d9d7 ("ring-buffer: Introducing ring-buffer mapping functions") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 03bb0ee7bfe9f8..67707ff28fc519 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2849,6 +2849,12 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, if (nr_pages < 2) nr_pages = 2; + /* + * Keep CPUs from coming online while resizing to synchronize + * with new per CPU buffers being created. + */ + guard(cpus_read_lock)(); + /* prevent another thread from changing buffer sizes */ mutex_lock(&buffer->mutex); atomic_inc(&buffer->resizing); @@ -2893,7 +2899,6 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, cond_resched(); } - cpus_read_lock(); /* * Fire off all the required work handlers * We can't schedule on offline CPUs, but it's not necessary @@ -2933,7 +2938,6 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, cpu_buffer->nr_pages_to_update = 0; } - cpus_read_unlock(); } else { cpu_buffer = buffer->buffers[cpu_id]; @@ -2961,8 +2965,6 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, goto out_err; } - cpus_read_lock(); - /* Can't run something on an offline CPU. */ if (!cpu_online(cpu_id)) rb_update_pages(cpu_buffer); @@ -2981,7 +2983,6 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, } cpu_buffer->nr_pages_to_update = 0; - cpus_read_unlock(); } out: From 53244759c5ba564e6be6863c64f6c88045305bcb Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 1 May 2025 09:27:24 +1000 Subject: [PATCH 2824/2924] xfs: don't assume perags are initialised when trimming AGs commit 23be716b1c4f3f3a6c00ee38d51a57ef7db9ef7d upstream. When running fstrim immediately after mounting a V4 filesystem, the fstrim fails to trim all the free space in the filesystem. It only trims the first extent in the by-size free space tree in each AG and then returns. If a second fstrim is then run, it runs correctly and the entire free space in the filesystem is iterated and discarded correctly. The problem lies in the setup of the trim cursor - it assumes that pag->pagf_longest is valid without either reading the AGF first or checking if xfs_perag_initialised_agf(pag) is true or not. As a result, when a filesystem is mounted without reading the AGF (e.g. a clean mount on a v4 filesystem) and the first operation is a fstrim call, pag->pagf_longest is zero and so the free extent search starts at the wrong end of the by-size btree and exits after discarding the first record in the tree. Fix this by deferring the initialisation of tcur->count to after we have locked the AGF and guaranteed that the perag is properly initialised. We trigger this on tcur->count == 0 after locking the AGF, as this will only occur on the first call to xfs_trim_gather_extents() for each AG. If we need to iterate, tcur->count will be set to the length of the record we need to restart at, so we can use this to ensure we only sample a valid pag->pagf_longest value for the iteration. Signed-off-by: Dave Chinner Reviewed-by: Bill O'Donnell Reviewed-by: Darrick J. Wong Fixes: 89cfa899608f ("xfs: reduce AGF hold times during fstrim operations") Cc: # v6.6 Signed-off-by: Carlos Maiolino Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_discard.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index c1a306268ae439..94d0873bcd6289 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -167,6 +167,14 @@ xfs_discard_extents( return error; } +/* + * Care must be taken setting up the trim cursor as the perags may not have been + * initialised when the cursor is initialised. e.g. a clean mount which hasn't + * read in AGFs and the first operation run on the mounted fs is a trim. This + * can result in perag fields that aren't initialised until + * xfs_trim_gather_extents() calls xfs_alloc_read_agf() to lock down the AG for + * the free space search. + */ struct xfs_trim_cur { xfs_agblock_t start; xfs_extlen_t count; @@ -204,6 +212,14 @@ xfs_trim_gather_extents( if (error) goto out_trans_cancel; + /* + * First time through tcur->count will not have been initialised as + * pag->pagf_longest is not guaranteed to be valid before we read + * the AGF buffer above. + */ + if (!tcur->count) + tcur->count = pag->pagf_longest; + if (tcur->by_bno) { /* sub-AG discard request always starts at tcur->start */ cur = xfs_bnobt_init_cursor(mp, tp, agbp, pag); @@ -350,7 +366,6 @@ xfs_trim_perag_extents( { struct xfs_trim_cur tcur = { .start = start, - .count = pag->pagf_longest, .end = end, .minlen = minlen, }; From dd7ff7ae4de4d56942d1b504a43724c4342b7325 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Mon, 12 May 2025 14:54:52 -0700 Subject: [PATCH 2825/2924] xen/arm: call uaccess_ttbr0_enable for dm_op hypercall commit 7f9bbc1140ff8796230bc2634055763e271fd692 upstream. dm_op hypercalls might come from userspace and pass memory addresses as parameters. The memory addresses typically correspond to buffers allocated in userspace to hold extra hypercall parameters. On ARM, when CONFIG_ARM64_SW_TTBR0_PAN is enabled, they might not be accessible by Xen, as a result ioreq hypercalls might fail. See the existing comment in arch/arm64/xen/hypercall.S regarding privcmd_call for reference. For privcmd_call, Linux calls uaccess_ttbr0_enable before issuing the hypercall thanks to commit 9cf09d68b89a. We need to do the same for dm_op. This resolves the problem. Cc: stable@kernel.org Fixes: 9cf09d68b89a ("arm64: xen: Enable user access before a privcmd hvc call") Signed-off-by: Stefano Stabellini Reviewed-by: Juergen Gross Message-ID: Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- arch/arm64/xen/hypercall.S | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index 9d01361696a145..ae551b8571374f 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -83,7 +83,26 @@ HYPERCALL3(vcpu_op); HYPERCALL1(platform_op_raw); HYPERCALL2(multicall); HYPERCALL2(vm_assist); -HYPERCALL3(dm_op); + +SYM_FUNC_START(HYPERVISOR_dm_op) + mov x16, #__HYPERVISOR_dm_op; \ + /* + * dm_op hypercalls are issued by the userspace. The kernel needs to + * enable access to TTBR0_EL1 as the hypervisor would issue stage 1 + * translations to user memory via AT instructions. Since AT + * instructions are not affected by the PAN bit (ARMv8.1), we only + * need the explicit uaccess_enable/disable if the TTBR0 PAN emulation + * is enabled (it implies that hardware UAO and PAN disabled). + */ + uaccess_ttbr0_enable x6, x7, x8 + hvc XEN_IMM + + /* + * Disable userspace access from kernel once the hyp call completed. + */ + uaccess_ttbr0_disable x6, x7 + ret +SYM_FUNC_END(HYPERVISOR_dm_op); SYM_FUNC_START(privcmd_call) mov x16, x0 From 73cfcc8445585b8af7e18be3c9246b851fdf336c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 26 Feb 2025 16:01:57 +0100 Subject: [PATCH 2826/2924] x86/iopl: Cure TIF_IO_BITMAP inconsistencies commit 8b68e978718f14fdcb080c2a7791c52a0d09bc6d upstream. io_bitmap_exit() is invoked from exit_thread() when a task exists or when a fork fails. In the latter case the exit_thread() cleans up resources which were allocated during fork(). io_bitmap_exit() invokes task_update_io_bitmap(), which in turn ends up in tss_update_io_bitmap(). tss_update_io_bitmap() operates on the current task. If current has TIF_IO_BITMAP set, but no bitmap installed, tss_update_io_bitmap() crashes with a NULL pointer dereference. There are two issues, which lead to that problem: 1) io_bitmap_exit() should not invoke task_update_io_bitmap() when the task, which is cleaned up, is not the current task. That's a clear indicator for a cleanup after a failed fork(). 2) A task should not have TIF_IO_BITMAP set and neither a bitmap installed nor IOPL emulation level 3 activated. This happens when a kernel thread is created in the context of a user space thread, which has TIF_IO_BITMAP set as the thread flags are copied and the IO bitmap pointer is cleared. Other than in the failed fork() case this has no impact because kernel threads including IO workers never return to user space and therefore never invoke tss_update_io_bitmap(). Cure this by adding the missing cleanups and checks: 1) Prevent io_bitmap_exit() to invoke task_update_io_bitmap() if the to be cleaned up task is not the current task. 2) Clear TIF_IO_BITMAP in copy_thread() unconditionally. For user space forks it is set later, when the IO bitmap is inherited in io_bitmap_share(). For paranoia sake, add a warning into tss_update_io_bitmap() to catch the case, when that code is invoked with inconsistent state. Fixes: ea5f1cd7ab49 ("x86/ioperm: Remove bitmap if all permissions dropped") Reported-by: syzbot+e2b1803445d236442e54@syzkaller.appspotmail.com Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/87wmdceom2.ffs@tglx Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/ioport.c | 13 +++++++++---- arch/x86/kernel/process.c | 6 ++++++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 6290dd120f5e45..ff40f09ad9116c 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -33,8 +33,9 @@ void io_bitmap_share(struct task_struct *tsk) set_tsk_thread_flag(tsk, TIF_IO_BITMAP); } -static void task_update_io_bitmap(struct task_struct *tsk) +static void task_update_io_bitmap(void) { + struct task_struct *tsk = current; struct thread_struct *t = &tsk->thread; if (t->iopl_emul == 3 || t->io_bitmap) { @@ -54,7 +55,12 @@ void io_bitmap_exit(struct task_struct *tsk) struct io_bitmap *iobm = tsk->thread.io_bitmap; tsk->thread.io_bitmap = NULL; - task_update_io_bitmap(tsk); + /* + * Don't touch the TSS when invoked on a failed fork(). TSS + * reflects the state of @current and not the state of @tsk. + */ + if (tsk == current) + task_update_io_bitmap(); if (iobm && refcount_dec_and_test(&iobm->refcnt)) kfree(iobm); } @@ -192,8 +198,7 @@ SYSCALL_DEFINE1(iopl, unsigned int, level) } t->iopl_emul = level; - task_update_io_bitmap(current); - + task_update_io_bitmap(); return 0; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 12948380cdd06c..4940fcd409251c 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -181,6 +181,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) frame->ret_addr = (unsigned long) ret_from_fork_asm; p->thread.sp = (unsigned long) fork_frame; p->thread.io_bitmap = NULL; + clear_tsk_thread_flag(p, TIF_IO_BITMAP); p->thread.iopl_warn = 0; memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); @@ -469,6 +470,11 @@ void native_tss_update_io_bitmap(void) } else { struct io_bitmap *iobm = t->io_bitmap; + if (WARN_ON_ONCE(!iobm)) { + clear_thread_flag(TIF_IO_BITMAP); + native_tss_invalidate_io_bitmap(); + } + /* * Only copy bitmap data when the sequence number differs. The * update time is accounted to the incoming task. From cb002d6c8f06d6061e84b2109cb84416107a9fb9 Mon Sep 17 00:00:00 2001 From: Roman Kisel Date: Wed, 7 May 2025 11:22:25 -0700 Subject: [PATCH 2827/2924] x86/hyperv: Fix APIC ID and VP index confusion in hv_snp_boot_ap() commit 86c48271e0d60c82665e9fd61277002391efcef7 upstream. To start an application processor in SNP-isolated guest, a hypercall is used that takes a virtual processor index. The hv_snp_boot_ap() function uses that START_VP hypercall but passes as VP index to it what it receives as a wakeup_secondary_cpu_64 callback: the APIC ID. As those two aren't generally interchangeable, that may lead to hung APs if the VP index and the APIC ID don't match up. Update the parameter names to avoid confusion as to what the parameter is. Use the APIC ID to the VP index conversion to provide the correct input to the hypercall. Cc: stable@vger.kernel.org Fixes: 44676bb9d566 ("x86/hyperv: Add smp support for SEV-SNP guest") Signed-off-by: Roman Kisel Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20250507182227.7421-2-romank@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <20250507182227.7421-2-romank@linux.microsoft.com> Signed-off-by: Greg Kroah-Hartman --- arch/x86/hyperv/hv_init.c | 33 +++++++++++++++++++++++++ arch/x86/hyperv/hv_vtl.c | 44 +++++---------------------------- arch/x86/hyperv/ivm.c | 22 +++++++++++++++-- arch/x86/include/asm/mshyperv.h | 6 +++-- include/hyperv/hvgdk_mini.h | 2 +- 5 files changed, 64 insertions(+), 43 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index ddeb40930bc802..3ca16e1dbbb833 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -706,3 +706,36 @@ bool hv_is_hyperv_initialized(void) return hypercall_msr.enable; } EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized); + +int hv_apicid_to_vp_index(u32 apic_id) +{ + u64 control; + u64 status; + unsigned long irq_flags; + struct hv_get_vp_from_apic_id_in *input; + u32 *output, ret; + + local_irq_save(irq_flags); + + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + memset(input, 0, sizeof(*input)); + input->partition_id = HV_PARTITION_ID_SELF; + input->apic_ids[0] = apic_id; + + output = *this_cpu_ptr(hyperv_pcpu_output_arg); + + control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_INDEX_FROM_APIC_ID; + status = hv_do_hypercall(control, input, output); + ret = output[0]; + + local_irq_restore(irq_flags); + + if (!hv_result_success(status)) { + pr_err("failed to get vp index from apic id %d, status %#llx\n", + apic_id, status); + return -EINVAL; + } + + return ret; +} +EXPORT_SYMBOL_GPL(hv_apicid_to_vp_index); diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 13242ed8ff16fe..5d59e1e05e6491 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -206,41 +206,9 @@ static int hv_vtl_bringup_vcpu(u32 target_vp_index, int cpu, u64 eip_ignored) return ret; } -static int hv_vtl_apicid_to_vp_id(u32 apic_id) -{ - u64 control; - u64 status; - unsigned long irq_flags; - struct hv_get_vp_from_apic_id_in *input; - u32 *output, ret; - - local_irq_save(irq_flags); - - input = *this_cpu_ptr(hyperv_pcpu_input_arg); - memset(input, 0, sizeof(*input)); - input->partition_id = HV_PARTITION_ID_SELF; - input->apic_ids[0] = apic_id; - - output = *this_cpu_ptr(hyperv_pcpu_output_arg); - - control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_ID_FROM_APIC_ID; - status = hv_do_hypercall(control, input, output); - ret = output[0]; - - local_irq_restore(irq_flags); - - if (!hv_result_success(status)) { - pr_err("failed to get vp id from apic id %d, status %#llx\n", - apic_id, status); - return -EINVAL; - } - - return ret; -} - static int hv_vtl_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip) { - int vp_id, cpu; + int vp_index, cpu; /* Find the logical CPU for the APIC ID */ for_each_present_cpu(cpu) { @@ -251,18 +219,18 @@ static int hv_vtl_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip) return -EINVAL; pr_debug("Bringing up CPU with APIC ID %d in VTL2...\n", apicid); - vp_id = hv_vtl_apicid_to_vp_id(apicid); + vp_index = hv_apicid_to_vp_index(apicid); - if (vp_id < 0) { + if (vp_index < 0) { pr_err("Couldn't find CPU with APIC ID %d\n", apicid); return -EINVAL; } - if (vp_id > ms_hyperv.max_vp_index) { - pr_err("Invalid CPU id %d for APIC ID %d\n", vp_id, apicid); + if (vp_index > ms_hyperv.max_vp_index) { + pr_err("Invalid CPU id %d for APIC ID %d\n", vp_index, apicid); return -EINVAL; } - return hv_vtl_bringup_vcpu(vp_id, cpu, start_eip); + return hv_vtl_bringup_vcpu(vp_index, cpu, start_eip); } int __init hv_vtl_early_init(void) diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 77bf05f06b9efa..0cc239cdb4dad8 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -288,7 +289,7 @@ static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa) free_page((unsigned long)vmsa); } -int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) +int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip) { struct sev_es_save_area *vmsa = (struct sev_es_save_area *) __get_free_page(GFP_KERNEL | __GFP_ZERO); @@ -297,10 +298,27 @@ int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) u64 ret, retry = 5; struct hv_enable_vp_vtl *start_vp_input; unsigned long flags; + int cpu, vp_index; if (!vmsa) return -ENOMEM; + /* Find the Hyper-V VP index which might be not the same as APIC ID */ + vp_index = hv_apicid_to_vp_index(apic_id); + if (vp_index < 0 || vp_index > ms_hyperv.max_vp_index) + return -EINVAL; + + /* + * Find the Linux CPU number for addressing the per-CPU data, and it + * might not be the same as APIC ID. + */ + for_each_present_cpu(cpu) { + if (arch_match_cpu_phys_id(cpu, apic_id)) + break; + } + if (cpu >= nr_cpu_ids) + return -EINVAL; + native_store_gdt(&gdtr); vmsa->gdtr.base = gdtr.address; @@ -348,7 +366,7 @@ int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) start_vp_input = (struct hv_enable_vp_vtl *)ap_start_input_arg; memset(start_vp_input, 0, sizeof(*start_vp_input)); start_vp_input->partition_id = -1; - start_vp_input->vp_index = cpu; + start_vp_input->vp_index = vp_index; start_vp_input->target_vtl.target_vtl = ms_hyperv.vtl; *(u64 *)&start_vp_input->vp_context = __pa(vmsa) | 1; diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index bab5ccfc60a748..0b9a3a307d0655 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -268,11 +268,11 @@ int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry); #ifdef CONFIG_AMD_MEM_ENCRYPT bool hv_ghcb_negotiate_protocol(void); void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason); -int hv_snp_boot_ap(u32 cpu, unsigned long start_ip); +int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip); #else static inline bool hv_ghcb_negotiate_protocol(void) { return false; } static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {} -static inline int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) { return 0; } +static inline int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip) { return 0; } #endif #if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) @@ -306,6 +306,7 @@ static __always_inline u64 hv_raw_get_msr(unsigned int reg) { return __rdmsr(reg); } +int hv_apicid_to_vp_index(u32 apic_id); #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} @@ -327,6 +328,7 @@ static inline void hv_set_msr(unsigned int reg, u64 value) { } static inline u64 hv_get_msr(unsigned int reg) { return 0; } static inline void hv_set_non_nested_msr(unsigned int reg, u64 value) { } static inline u64 hv_get_non_nested_msr(unsigned int reg) { return 0; } +static inline int hv_apicid_to_vp_index(u32 apic_id) { return -EINVAL; } #endif /* CONFIG_HYPERV */ diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h index abf0bd76e3703a..6f5976aca3e860 100644 --- a/include/hyperv/hvgdk_mini.h +++ b/include/hyperv/hvgdk_mini.h @@ -475,7 +475,7 @@ union hv_vp_assist_msr_contents { /* HV_REGISTER_VP_ASSIST_PAGE */ #define HVCALL_CREATE_PORT 0x0095 #define HVCALL_CONNECT_PORT 0x0096 #define HVCALL_START_VP 0x0099 -#define HVCALL_GET_VP_ID_FROM_APIC_ID 0x009a +#define HVCALL_GET_VP_INDEX_FROM_APIC_ID 0x009a #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 #define HVCALL_SIGNAL_EVENT_DIRECT 0x00c0 From d70d0373f92b3b1bf8c777ad68462e8d9b2d936b Mon Sep 17 00:00:00 2001 From: "Xin Li (Intel)" Date: Mon, 9 Jun 2025 01:40:53 -0700 Subject: [PATCH 2828/2924] x86/fred/signal: Prevent immediate repeat of single step trap on return from SIGTRAP handler commit e34dbbc85d64af59176fe59fad7b4122f4330fe2 upstream. Clear the software event flag in the augmented SS to prevent immediate repeat of single step trap on return from SIGTRAP handler if the trap flag (TF) is set without an external debugger attached. Following is a typical single-stepping flow for a user process: 1) The user process is prepared for single-stepping by setting RFLAGS.TF = 1. 2) When any instruction in user space completes, a #DB is triggered. 3) The kernel handles the #DB and returns to user space, invoking the SIGTRAP handler with RFLAGS.TF = 0. 4) After the SIGTRAP handler finishes, the user process performs a sigreturn syscall, restoring the original state, including RFLAGS.TF = 1. 5) Goto step 2. According to the FRED specification: A) Bit 17 in the augmented SS is designated as the software event flag, which is set to 1 for FRED event delivery of SYSCALL, SYSENTER, or INT n. B) If bit 17 of the augmented SS is 1 and ERETU would result in RFLAGS.TF = 1, a single-step trap will be pending upon completion of ERETU. In step 4) above, the software event flag is set upon the sigreturn syscall, and its corresponding ERETU would restore RFLAGS.TF = 1. This combination causes a pending single-step trap upon completion of ERETU. Therefore, another #DB is triggered before any user space instruction is executed, which leads to an infinite loop in which the SIGTRAP handler keeps being invoked on the same user space IP. Fixes: 14619d912b65 ("x86/fred: FRED entry/exit and dispatch code") Suggested-by: H. Peter Anvin (Intel) Signed-off-by: Xin Li (Intel) Signed-off-by: Dave Hansen Tested-by: Sohil Mehta Cc:stable@vger.kernel.org Link: https://lore.kernel.org/all/20250609084054.2083189-2-xin%40zytor.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/sighandling.h | 22 ++++++++++++++++++++++ arch/x86/kernel/signal_32.c | 4 ++++ arch/x86/kernel/signal_64.c | 4 ++++ 3 files changed, 30 insertions(+) diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h index e770c4fc47f4c5..8727c7e21dd1e6 100644 --- a/arch/x86/include/asm/sighandling.h +++ b/arch/x86/include/asm/sighandling.h @@ -24,4 +24,26 @@ int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs); int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs); int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs); +/* + * To prevent immediate repeat of single step trap on return from SIGTRAP + * handler if the trap flag (TF) is set without an external debugger attached, + * clear the software event flag in the augmented SS, ensuring no single-step + * trap is pending upon ERETU completion. + * + * Note, this function should be called in sigreturn() before the original + * state is restored to make sure the TF is read from the entry frame. + */ +static __always_inline void prevent_single_step_upon_eretu(struct pt_regs *regs) +{ + /* + * If the trap flag (TF) is set, i.e., the sigreturn() SYSCALL instruction + * is being single-stepped, do not clear the software event flag in the + * augmented SS, thus a debugger won't skip over the following instruction. + */ +#ifdef CONFIG_X86_FRED + if (!(regs->flags & X86_EFLAGS_TF)) + regs->fred_ss.swevent = 0; +#endif +} + #endif /* _ASM_X86_SIGHANDLING_H */ diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 98123ff10506c6..42bbc42bd3503c 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -152,6 +152,8 @@ SYSCALL32_DEFINE0(sigreturn) struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8); sigset_t set; + prevent_single_step_upon_eretu(regs); + if (!access_ok(frame, sizeof(*frame))) goto badframe; if (__get_user(set.sig[0], &frame->sc.oldmask) @@ -175,6 +177,8 @@ SYSCALL32_DEFINE0(rt_sigreturn) struct rt_sigframe_ia32 __user *frame; sigset_t set; + prevent_single_step_upon_eretu(regs); + frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4); if (!access_ok(frame, sizeof(*frame))) diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index ee9453891901b7..d483b585c6c604 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -250,6 +250,8 @@ SYSCALL_DEFINE0(rt_sigreturn) sigset_t set; unsigned long uc_flags; + prevent_single_step_upon_eretu(regs); + frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); if (!access_ok(frame, sizeof(*frame))) goto badframe; @@ -366,6 +368,8 @@ COMPAT_SYSCALL_DEFINE0(x32_rt_sigreturn) sigset_t set; unsigned long uc_flags; + prevent_single_step_upon_eretu(regs); + frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8); if (!access_ok(frame, sizeof(*frame))) From c3e3a011adebe5cfb286c7bdc5ff2f202b21e568 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Jun 2025 13:38:26 +0000 Subject: [PATCH 2829/2924] calipso: unlock rcu before returning -EAFNOSUPPORT commit 3cae906e1a6184cdc9e4d260e4dbdf9a118d94ad upstream. syzbot reported that a recent patch forgot to unlock rcu in the error path. Adopt the convention that netlbl_conn_setattr() is already using. Fixes: 6e9f2df1c550 ("calipso: Don't call calipso functions for AF_INET sk.") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Kuniyuki Iwashima Acked-by: Paul Moore Link: https://patch.msgid.link/20250604133826.1667664-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/netlabel/netlabel_kapi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 6ea16138582c0b..33b77084a4e5f3 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -1165,8 +1165,10 @@ int netlbl_conn_setattr(struct sock *sk, break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - if (sk->sk_family != AF_INET6) - return -EAFNOSUPPORT; + if (sk->sk_family != AF_INET6) { + ret_val = -EAFNOSUPPORT; + goto conn_setattr_return; + } addr6 = (struct sockaddr_in6 *)addr; entry = netlbl_domhsh_getentry_af6(secattr->domain, From 917677d599e269bf0495e1a314f16232ed586b7b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 6 Jun 2025 18:31:03 -0400 Subject: [PATCH 2830/2924] do_move_mount(): split the checks in subtree-of-our-ns and entire-anon cases commit 290da20e333955637f00647d9fff7c6e3c0b61e0 upstream. ... and fix the breakage in anon-to-anon case. There are two cases acceptable for do_move_mount() and mixing checks for those is making things hard to follow. One case is move of a subtree in caller's namespace. * source and destination must be in caller's namespace * source must be detachable from parent Another is moving the entire anon namespace elsewhere * source must be the root of anon namespace * target must either in caller's namespace or in a suitable anon namespace (see may_use_mount() for details). * target must not be in the same namespace as source. It's really easier to follow if tests are *not* mixed together... Reviewed-by: Christian Brauner Fixes: 3b5260d12b1f ("Don't propagate mounts into detached trees") Reported-by: Allison Karlitskaya Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index a32f43b7eb08d4..d6ac7e533b0212 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3662,37 +3662,41 @@ static int do_move_mount(struct path *old_path, ns = old->mnt_ns; err = -EINVAL; - if (!may_use_mount(p)) - goto out; - /* The thing moved must be mounted... */ if (!is_mounted(&old->mnt)) goto out; - /* ... and either ours or the root of anon namespace */ - if (!(attached ? check_mnt(old) : is_anon_ns(ns))) - goto out; - - if (is_anon_ns(ns) && ns == p->mnt_ns) { + if (check_mnt(old)) { + /* if the source is in our namespace... */ + /* ... it should be detachable from parent */ + if (!mnt_has_parent(old) || IS_MNT_LOCKED(old)) + goto out; + /* ... and the target should be in our namespace */ + if (!check_mnt(p)) + goto out; + } else { /* - * Ending up with two files referring to the root of the - * same anonymous mount namespace would cause an error - * as this would mean trying to move the same mount - * twice into the mount tree which would be rejected - * later. But be explicit about it right here. + * otherwise the source must be the root of some anon namespace. + * AV: check for mount being root of an anon namespace is worth + * an inlined predicate... */ - goto out; - } else if (is_anon_ns(p->mnt_ns)) { + if (!is_anon_ns(ns) || mnt_has_parent(old)) + goto out; /* - * Don't allow moving an attached mount tree to an - * anonymous mount tree. + * Bail out early if the target is within the same namespace - + * subsequent checks would've rejected that, but they lose + * some corner cases if we check it early. */ - goto out; + if (ns == p->mnt_ns) + goto out; + /* + * Target should be either in our namespace or in an acceptable + * anon namespace, sensu check_anonymous_mnt(). + */ + if (!may_use_mount(p)) + goto out; } - if (old->mnt.mnt_flags & MNT_LOCKED) - goto out; - if (!path_mounted(old_path)) goto out; From 523e4e0ade60b4d8e36c91062a882c36d7ab5b6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Wed, 14 May 2025 08:36:06 -0400 Subject: [PATCH 2831/2924] regulator: dt-bindings: mt6357: Drop fixed compatible requirement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9cfdd7752ba5f8cc9b8191e8c9aeeec246241fa4 upstream. Some of the regulators on the MT6357 PMIC currently reference the fixed-regulator dt-binding, which enforces the presence of a regulator-fixed compatible. However since all regulators on the MT6357 PMIC are handled by a single mt6357-regulator driver, probed through MFD, the compatibles don't serve any purpose. In fact they cause failures in the DT kselftest since they aren't probed by the fixed regulator driver as would be expected. Furthermore this is the only dt-binding in this family like this: mt6359-regulator and mt6358-regulator don't require those compatibles. Commit d77e89b7b03f ("arm64: dts: mediatek: mt6357: Drop regulator-fixed compatibles") removed the compatibles from Devicetree, but missed updating the binding, which still requires them, introducing dt-binding errors. Remove the compatible requirement by referencing the plain regulator dt-binding instead to fix the dt-binding errors. Fixes: d77e89b7b03f ("arm64: dts: mediatek: mt6357: Drop regulator-fixed compatibles") Signed-off-by: Nícolas F. R. A. Prado Link: https://patch.msgid.link/20250514-mt6357-regulator-fixed-compatibles-removal-bindings-v1-1-2421e9cc6cc7@collabora.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- .../regulator/mediatek,mt6357-regulator.yaml | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/Documentation/devicetree/bindings/regulator/mediatek,mt6357-regulator.yaml b/Documentation/devicetree/bindings/regulator/mediatek,mt6357-regulator.yaml index 6327bb2f6ee080..698266c09e2535 100644 --- a/Documentation/devicetree/bindings/regulator/mediatek,mt6357-regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/mediatek,mt6357-regulator.yaml @@ -33,7 +33,7 @@ patternProperties: "^ldo-v(camio18|aud28|aux18|io18|io28|rf12|rf18|cn18|cn28|fe28)$": type: object - $ref: fixed-regulator.yaml# + $ref: regulator.yaml# unevaluatedProperties: false description: Properties for single fixed LDO regulator. @@ -112,7 +112,6 @@ examples: regulator-enable-ramp-delay = <220>; }; mt6357_vfe28_reg: ldo-vfe28 { - compatible = "regulator-fixed"; regulator-name = "vfe28"; regulator-min-microvolt = <2800000>; regulator-max-microvolt = <2800000>; @@ -125,14 +124,12 @@ examples: regulator-enable-ramp-delay = <110>; }; mt6357_vrf18_reg: ldo-vrf18 { - compatible = "regulator-fixed"; regulator-name = "vrf18"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; regulator-enable-ramp-delay = <110>; }; mt6357_vrf12_reg: ldo-vrf12 { - compatible = "regulator-fixed"; regulator-name = "vrf12"; regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1200000>; @@ -157,14 +154,12 @@ examples: regulator-enable-ramp-delay = <264>; }; mt6357_vcn28_reg: ldo-vcn28 { - compatible = "regulator-fixed"; regulator-name = "vcn28"; regulator-min-microvolt = <2800000>; regulator-max-microvolt = <2800000>; regulator-enable-ramp-delay = <264>; }; mt6357_vcn18_reg: ldo-vcn18 { - compatible = "regulator-fixed"; regulator-name = "vcn18"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; @@ -183,7 +178,6 @@ examples: regulator-enable-ramp-delay = <264>; }; mt6357_vcamio_reg: ldo-vcamio18 { - compatible = "regulator-fixed"; regulator-name = "vcamio"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; @@ -212,28 +206,24 @@ examples: regulator-always-on; }; mt6357_vaux18_reg: ldo-vaux18 { - compatible = "regulator-fixed"; regulator-name = "vaux18"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; regulator-enable-ramp-delay = <264>; }; mt6357_vaud28_reg: ldo-vaud28 { - compatible = "regulator-fixed"; regulator-name = "vaud28"; regulator-min-microvolt = <2800000>; regulator-max-microvolt = <2800000>; regulator-enable-ramp-delay = <264>; }; mt6357_vio28_reg: ldo-vio28 { - compatible = "regulator-fixed"; regulator-name = "vio28"; regulator-min-microvolt = <2800000>; regulator-max-microvolt = <2800000>; regulator-enable-ramp-delay = <264>; }; mt6357_vio18_reg: ldo-vio18 { - compatible = "regulator-fixed"; regulator-name = "vio18"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; From bb0507bbb1be74896666dbed71c8cdf1eb277436 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 23 May 2025 14:09:43 +0200 Subject: [PATCH 2832/2924] usb: misc: onboard_usb_dev: fix build warning for CONFIG_USB_ONBOARD_DEV_USB5744=n commit 662a9ece32add94469138ae66999ee16cb37a531 upstream. When the USB5744 option is disabled, the onboard_usb driver warns about unused functions: drivers/usb/misc/onboard_usb_dev.c:358:12: error: 'onboard_dev_5744_i2c_write_byte' defined but not used [-Werror=unused-function] 358 | static int onboard_dev_5744_i2c_write_byte(struct i2c_client *client, u16 addr, u8 data) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/usb/misc/onboard_usb_dev.c:313:12: error: 'onboard_dev_5744_i2c_read_byte' defined but not used [-Werror=unused-function] 313 | static int onboard_dev_5744_i2c_read_byte(struct i2c_client *client, u16 addr, u8 *data) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Extend the #ifdef block a little further to cover all of these functions. Ideally we'd use use if(IS_ENABLED()) instead, but that doesn't currently work because the i2c_transfer() and i2c_smbus_write_word_data() function declarations are hidden when CONFIG_I2C is disabled. Fixes: 1143d41922c0 ("usb: misc: onboard_usb_dev: Fix usb5744 initialization sequence") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20250523120947.2170302-1-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/onboard_usb_dev.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/misc/onboard_usb_dev.c b/drivers/usb/misc/onboard_usb_dev.c index 553f95ccb1c8e5..86f25bcb64253c 100644 --- a/drivers/usb/misc/onboard_usb_dev.c +++ b/drivers/usb/misc/onboard_usb_dev.c @@ -310,6 +310,7 @@ static void onboard_dev_attach_usb_driver(struct work_struct *work) pr_err("Failed to attach USB driver: %pe\n", ERR_PTR(err)); } +#if IS_ENABLED(CONFIG_USB_ONBOARD_DEV_USB5744) static int onboard_dev_5744_i2c_read_byte(struct i2c_client *client, u16 addr, u8 *data) { struct i2c_msg msg[2]; @@ -388,7 +389,6 @@ static int onboard_dev_5744_i2c_write_byte(struct i2c_client *client, u16 addr, static int onboard_dev_5744_i2c_init(struct i2c_client *client) { -#if IS_ENABLED(CONFIG_USB_ONBOARD_DEV_USB5744) struct device *dev = &client->dev; int ret; u8 reg; @@ -417,10 +417,13 @@ static int onboard_dev_5744_i2c_init(struct i2c_client *client) return dev_err_probe(dev, ret, "USB Attach with SMBus command failed\n"); return ret; +} #else +static int onboard_dev_5744_i2c_init(struct i2c_client *client) +{ return -ENODEV; -#endif } +#endif static int onboard_dev_probe(struct platform_device *pdev) { From 4c6b328f96de18346abb06bc9fe0b8312a7366b9 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 28 May 2025 13:03:54 +0200 Subject: [PATCH 2833/2924] net: usb: aqc111: debug info before sanitation commit d3faab9b5a6a0477d69c38bd11c43aa5e936f929 upstream. If we sanitize error returns, the debug statements need to come before that so that we don't lose information. Signed-off-by: Oliver Neukum Fixes: 405b0d610745 ("net: usb: aqc111: fix error handling of usbnet read calls") Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/aqc111.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c index 453a2cf82753fb..9201ee10a13f78 100644 --- a/drivers/net/usb/aqc111.c +++ b/drivers/net/usb/aqc111.c @@ -31,11 +31,11 @@ static int aqc111_read_cmd_nopm(struct usbnet *dev, u8 cmd, u16 value, USB_RECIP_DEVICE, value, index, data, size); if (unlikely(ret < size)) { - ret = ret < 0 ? ret : -ENODATA; - netdev_warn(dev->net, "Failed to read(0x%x) reg index 0x%04x: %d\n", cmd, index, ret); + + ret = ret < 0 ? ret : -ENODATA; } return ret; @@ -50,11 +50,11 @@ static int aqc111_read_cmd(struct usbnet *dev, u8 cmd, u16 value, USB_RECIP_DEVICE, value, index, data, size); if (unlikely(ret < size)) { - ret = ret < 0 ? ret : -ENODATA; - netdev_warn(dev->net, "Failed to read(0x%x) reg index 0x%04x: %d\n", cmd, index, ret); + + ret = ret < 0 ? ret : -ENODATA; } return ret; From a0c3c47f6b1bb613c74d4d1e64c1dd2d31bb60e5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 30 May 2025 12:06:47 -0700 Subject: [PATCH 2834/2924] overflow: Introduce __DEFINE_FLEX for having no initializer commit 5c78e793f78732b60276401f75cc1a101f9ad121 upstream. While not yet in the tree, there is a proposed patch[1] that was depending on the prior behavior of _DEFINE_FLEX, which did not have an explicit initializer. Provide this via __DEFINE_FLEX now, which can also have attributes applied (e.g. __uninitialized). Examples of the resulting initializer behaviors can be seen here: https://godbolt.org/z/P7Go8Tr33 Link: https://lore.kernel.org/netdev/20250520205920.2134829-9-anthony.l.nguyen@intel.com [1] Fixes: 47e36ed78406 ("overflow: Fix direct struct member initialization in _DEFINE_FLEX()") Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- include/linux/overflow.h | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 823a53cd9a1935..89e9d604988351 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -389,24 +389,37 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) struct_size((type *)NULL, member, count) /** - * _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family. - * Enables caller macro to pass (different) initializer. + * __DEFINE_FLEX() - helper macro for DEFINE_FLEX() family. + * Enables caller macro to pass arbitrary trailing expressions * * @type: structure type name, including "struct" keyword. * @name: Name for a variable to define. * @member: Name of the array member. * @count: Number of elements in the array; must be compile-time const. - * @initializer: Initializer expression (e.g., pass `= { }` at minimum). + * @trailer: Trailing expressions for attributes and/or initializers. */ -#define _DEFINE_FLEX(type, name, member, count, initializer...) \ +#define __DEFINE_FLEX(type, name, member, count, trailer...) \ _Static_assert(__builtin_constant_p(count), \ "onstack flex array members require compile-time const count"); \ union { \ u8 bytes[struct_size_t(type, member, count)]; \ type obj; \ - } name##_u = { .obj initializer }; \ + } name##_u trailer; \ type *name = (type *)&name##_u +/** + * _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family. + * Enables caller macro to pass (different) initializer. + * + * @type: structure type name, including "struct" keyword. + * @name: Name for a variable to define. + * @member: Name of the array member. + * @count: Number of elements in the array; must be compile-time const. + * @initializer: Initializer expression (e.g., pass `= { }` at minimum). + */ +#define _DEFINE_FLEX(type, name, member, count, initializer...) \ + __DEFINE_FLEX(type, name, member, count, = { .obj initializer }) + /** * DEFINE_RAW_FLEX() - Define an on-stack instance of structure with a trailing * flexible array member, when it does not have a __counted_by annotation. @@ -421,7 +434,7 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) * Use __struct_size(@name) to get compile-time size of it afterwards. */ #define DEFINE_RAW_FLEX(type, name, member, count) \ - _DEFINE_FLEX(type, name, member, count, = {}) + __DEFINE_FLEX(type, name, member, count, = { }) /** * DEFINE_FLEX() - Define an on-stack instance of structure with a trailing From 41ccd0006344dbe67c5c1fbdd5c6efc622e66314 Mon Sep 17 00:00:00 2001 From: Andrew Price Date: Wed, 28 May 2025 16:02:37 +0100 Subject: [PATCH 2835/2924] gfs2: Don't clear sb->s_fs_info in gfs2_sys_fs_add commit 9126d2754c5e5d1818765811a10af0a14cf1fa0a upstream. When gfs2_sys_fs_add() fails, it sets sb->s_fs_info to NULL on its error path (see commit 0d515210b696 ("GFS2: Add kobject release method")). The intention seems to be to prevent dereferencing sb->s_fs_info once the object pointed to has been deallocated, but that would be better achieved by setting the pointer to NULL in free_sbd(). As a consequence, when the call to gfs2_sys_fs_add() fails in gfs2_fill_super(), sdp = GFS2_SB(inode) will evaluate to NULL in iput() -> gfs2_drop_inode(), and accessing sdp->sd_flags will be a NULL pointer dereference. Fix that by only setting sb->s_fs_info to NULL when actually freeing the object pointed to in free_sbd(). Fixes: ae9f3bd8259a ("gfs2: replace sd_aspace with sd_inode") Reported-by: syzbot+b12826218502df019f9d@syzkaller.appspotmail.com Signed-off-by: Andrew Price Signed-off-by: Andreas Gruenbacher Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/ops_fstype.c | 4 +++- fs/gfs2/sys.c | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 6ce475e1c6d64c..4a0f7de41b2b2f 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -64,8 +64,11 @@ static void gfs2_tune_init(struct gfs2_tune *gt) void free_sbd(struct gfs2_sbd *sdp) { + struct super_block *sb = sdp->sd_vfs; + if (sdp->sd_lkstats) free_percpu(sdp->sd_lkstats); + sb->s_fs_info = NULL; kfree(sdp); } @@ -1316,7 +1319,6 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) iput(sdp->sd_inode); fail_free: free_sbd(sdp); - sb->s_fs_info = NULL; return error; } diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index ecc699f8d9fcaa..6286183021022a 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -764,7 +764,6 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp) fs_err(sdp, "error %d adding sysfs files\n", error); kobject_put(&sdp->sd_kobj); wait_for_completion(&sdp->sd_kobj_unregister); - sb->s_fs_info = NULL; return error; } From c12905482b621a0fa041ce4c4abe5e0cb0d2f744 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 5 May 2025 07:24:52 +0200 Subject: [PATCH 2836/2924] thermal/drivers/mediatek/lvts: Remove unused lvts_debugfs_exit commit 3159c96ac2cb3a104ce8ee5b8a4afe98e4b8fcad upstream. When debugfs is disabled, the function has no reference any more: drivers/thermal/mediatek/lvts_thermal.c:266:13: error: 'lvts_debugfs_exit' defined but not used [-Werror=unused-function] 266 | static void lvts_debugfs_exit(struct lvts_domain *lvts_td) { } | ^~~~~~~~~~~~~~~~~ Fixes: ef280c17a840 ("thermal/drivers/mediatek/lvts: Fix debugfs unregister on failure") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20250505052502.1812867-1-arnd@kernel.org Signed-off-by: Daniel Lezcano Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/mediatek/lvts_thermal.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/thermal/mediatek/lvts_thermal.c b/drivers/thermal/mediatek/lvts_thermal.c index c0be4ca55c7b84..985925147ac068 100644 --- a/drivers/thermal/mediatek/lvts_thermal.c +++ b/drivers/thermal/mediatek/lvts_thermal.c @@ -263,8 +263,6 @@ static inline int lvts_debugfs_init(struct device *dev, return 0; } -static void lvts_debugfs_exit(struct lvts_domain *lvts_td) { } - #endif static int lvts_raw_to_temp(u32 raw_temp, int temp_factor) From a2b47f77e740a21dbdcb12e2f2ca3c840299545a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 19 Jun 2025 15:41:08 +0200 Subject: [PATCH 2837/2924] Linux 6.15.3 Link: https://lore.kernel.org/r/20250617152451.485330293@linuxfoundation.org Tested-by: Christian Heusel Tested-by: Florian Fainelli Tested-by: Ronald Warsow Tested-by: Salvatore Bonaccorso Tested-by: Luna Jernberg Tested-by: Shuah Khan Tested-By: Achill Gilgenast Tested-by: Ron Economos Tested-by: Jon Hunter Tested-by: Takeshi Ogasawara Tested-by: Peter Schneider Tested-by: Linux Kernel Functional Testing Tested-by: Hardik Garg Tested-by: Justin M. Forbes Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7138d1fabfa4ae..01ddb4eb3659f4 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 15 -SUBLEVEL = 2 +SUBLEVEL = 3 EXTRAVERSION = NAME = Baby Opossum Posse From 2de93dcb075043c3a03b5533278c0ed1311b6e27 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Mon, 18 Jul 2022 01:34:46 +0200 Subject: [PATCH 2838/2924] Cachy: Add sysctl and CONFIG to disallow unprivileged C Signed-off-by: Peter Jung --- include/linux/user_namespace.h | 4 ++++ init/Kconfig | 16 ++++++++++++++++ kernel/fork.c | 14 ++++++++++++++ kernel/sysctl.c | 12 ++++++++++++ kernel/user_namespace.c | 7 +++++++ 5 files changed, 53 insertions(+) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index a0bb6d01213780..93129fea552eaa 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -168,6 +168,8 @@ static inline void set_userns_rlimit_max(struct user_namespace *ns, #ifdef CONFIG_USER_NS +extern int unprivileged_userns_clone; + static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { if (ns) @@ -201,6 +203,8 @@ extern bool current_in_userns(const struct user_namespace *target_ns); struct ns_common *ns_get_owner(struct ns_common *ns); #else +#define unprivileged_userns_clone 0 + static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { return &init_user_ns; diff --git a/init/Kconfig b/init/Kconfig index bf3a920064bec1..312219efb53c52 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1330,6 +1330,22 @@ config USER_NS If unsure, say N. +config USER_NS_UNPRIVILEGED + bool "Allow unprivileged users to create namespaces" + default y + depends on USER_NS + help + When disabled, unprivileged users will not be able to create + new namespaces. Allowing users to create their own namespaces + has been part of several recent local privilege escalation + exploits, so if you need user namespaces but are + paranoid^Wsecurity-conscious you want to disable this. + + This setting can be overridden at runtime via the + kernel.unprivileged_userns_clone sysctl. + + If unsure, say Y. + config PID_NS bool "PID Namespaces" default y diff --git a/kernel/fork.c b/kernel/fork.c index 168681fc4b25a9..39e47bfaea5894 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -106,6 +106,10 @@ #include #include +#ifdef CONFIG_USER_NS +#include +#endif + #include #include #include @@ -2194,6 +2198,10 @@ __latent_entropy struct task_struct *copy_process( if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) return ERR_PTR(-EINVAL); + if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) + if (!capable(CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. @@ -3354,6 +3362,12 @@ int ksys_unshare(unsigned long unshare_flags) if (unshare_flags & CLONE_NEWNS) unshare_flags |= CLONE_FS; + if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) { + err = -EPERM; + if (!capable(CAP_SYS_ADMIN)) + goto bad_unshare_out; + } + err = check_unshare_flags(unshare_flags); if (err) goto bad_unshare_out; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3b7a7308e35b09..fe7bec454345fd 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -70,6 +70,9 @@ #ifdef CONFIG_RT_MUTEXES #include #endif +#ifdef CONFIG_USER_NS +#include +#endif /* shared constants to be used in various sysctls */ const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 }; @@ -1595,6 +1598,15 @@ static const struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_USER_NS + { + .procname = "unprivileged_userns_clone", + .data = &unprivileged_userns_clone, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif #ifdef CONFIG_PROC_SYSCTL { .procname = "tainted", diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 682f40d5632d44..434a25f7b2edb2 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -22,6 +22,13 @@ #include #include +/* sysctl */ +#ifdef CONFIG_USER_NS_UNPRIVILEGED +int unprivileged_userns_clone = 1; +#else +int unprivileged_userns_clone; +#endif + static struct kmem_cache *user_ns_cachep __ro_after_init; static DEFINE_MUTEX(userns_state_mutex); From 263b3ffef4b1391cd38f7dc0071882ac436ed5d5 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Mon, 13 Nov 2023 13:21:27 +0100 Subject: [PATCH 2839/2924] Cachy: Tweaks This adds several tweaks for the Scheduler, dm oops, swap and vmscan.c Signed-off-by: Peter Jung Signed-off-by: Eric Naim --- drivers/md/dm-crypt.c | 5 +++++ include/linux/pagemap.h | 2 +- init/Kconfig | 4 ++++ kernel/Kconfig.hz | 24 ++++++++++++++++++++++++ kernel/sched/fair.c | 15 ++++++++++++++- mm/page-writeback.c | 8 ++++++++ mm/swap.c | 5 +++++ mm/vmpressure.c | 4 ++++ mm/vmscan.c | 4 ++++ 9 files changed, 69 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 9dfdb63220d746..91aeee72a15e92 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -3284,6 +3284,11 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } +#ifdef CONFIG_CACHY + set_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags); + set_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags); +#endif + ret = crypt_ctr_cipher(ti, argv[0], argv[1]); if (ret < 0) goto bad; diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 26baa78f1ca7d8..8fa2ff2682bce5 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1341,7 +1341,7 @@ struct readahead_control { ._index = i, \ } -#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE) +#define VM_READAHEAD_PAGES (SZ_8M / PAGE_SIZE) void page_cache_ra_unbounded(struct readahead_control *, unsigned long nr_to_read, unsigned long lookahead_count); diff --git a/init/Kconfig b/init/Kconfig index 312219efb53c52..7c7f471304416b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -163,6 +163,10 @@ config THREAD_INFO_IN_TASK menu "General setup" +config CACHY + bool "Some kernel tweaks by CachyOS" + default y + config BROKEN bool diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz index ce1435cb08b1ec..e1359db5561e67 100644 --- a/kernel/Kconfig.hz +++ b/kernel/Kconfig.hz @@ -40,6 +40,27 @@ choice on SMP and NUMA systems and exactly dividing by both PAL and NTSC frame rates for video and multimedia work. + config HZ_500 + bool "500 HZ" + help + 500 Hz is a balanced timer frequency. Provides fast interactivity + on desktops with good smoothness without increasing CPU power + consumption and sacrificing the battery life on laptops. + + config HZ_600 + bool "600 HZ" + help + 600 Hz is a balanced timer frequency. Provides fast interactivity + on desktops with good smoothness without increasing CPU power + consumption and sacrificing the battery life on laptops. + + config HZ_750 + bool "750 HZ" + help + 750 Hz is a balanced timer frequency. Provides fast interactivity + on desktops with good smoothness without increasing CPU power + consumption and sacrificing the battery life on laptops. + config HZ_1000 bool "1000 HZ" help @@ -53,6 +74,9 @@ config HZ default 100 if HZ_100 default 250 if HZ_250 default 300 if HZ_300 + default 500 if HZ_500 + default 600 if HZ_600 + default 750 if HZ_750 default 1000 if HZ_1000 config SCHED_HRTICK diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 0c04ed41485259..a6117ccf77f870 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -76,10 +76,19 @@ unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG; * * (default: 0.70 msec * (1 + ilog(ncpus)), units: nanoseconds) */ +#ifdef CONFIG_CACHY +unsigned int sysctl_sched_base_slice = 350000ULL; +static unsigned int normalized_sysctl_sched_base_slice = 350000ULL; +#else unsigned int sysctl_sched_base_slice = 700000ULL; static unsigned int normalized_sysctl_sched_base_slice = 700000ULL; +#endif /* CONFIG_CACHY */ -__read_mostly unsigned int sysctl_sched_migration_cost = 500000UL; +#ifdef CONFIG_CACHY +const_debug unsigned int sysctl_sched_migration_cost = 300000UL; +#else +const_debug unsigned int sysctl_sched_migration_cost = 500000UL; +#endif static int __init setup_sched_thermal_decay_shift(char *str) { @@ -124,8 +133,12 @@ int __weak arch_asym_cpu_priority(int cpu) * * (default: 5 msec, units: microseconds) */ +#ifdef CONFIG_CACHY +static unsigned int sysctl_sched_cfs_bandwidth_slice = 3000UL; +#else static unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; #endif +#endif #ifdef CONFIG_NUMA_BALANCING /* Restrict the NUMA promotion throughput (MB/s) for each target node. */ diff --git a/mm/page-writeback.c b/mm/page-writeback.c index c81624bc396971..f19314209557b6 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -71,7 +71,11 @@ static long ratelimit_pages = 32; /* * Start background writeback (via writeback threads) at this percentage */ +#ifdef CONFIG_CACHY +static int dirty_background_ratio = 5; +#else static int dirty_background_ratio = 10; +#endif /* * dirty_background_bytes starts at 0 (disabled) so that it is a function of @@ -99,7 +103,11 @@ static unsigned long vm_dirty_bytes; /* * The interval between `kupdate'-style writebacks */ +#ifdef CONFIG_CACHY +unsigned int dirty_writeback_interval = 10 * 100; /* centiseconds */ +#else unsigned int dirty_writeback_interval = 5 * 100; /* centiseconds */ +#endif EXPORT_SYMBOL_GPL(dirty_writeback_interval); diff --git a/mm/swap.c b/mm/swap.c index 77b2d599787342..443eb4a8b42b8e 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -1091,6 +1091,10 @@ static const struct ctl_table swap_sysctl_table[] = { */ void __init swap_setup(void) { +#ifdef CONFIG_CACHY + /* Only swap-in pages requested, avoid readahead */ + page_cluster = 0; +#else unsigned long megs = totalram_pages() >> (20 - PAGE_SHIFT); /* Use a smaller cluster for small-memory machines */ @@ -1098,6 +1102,7 @@ void __init swap_setup(void) page_cluster = 2; else page_cluster = 3; +#endif /* CONFIG_CACHY */ /* * Right now other parts of the system means that we * _really_ don't want to cluster much more diff --git a/mm/vmpressure.c b/mm/vmpressure.c index bd5183dfd8791f..3a410f53a07ca4 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c @@ -43,7 +43,11 @@ static const unsigned long vmpressure_win = SWAP_CLUSTER_MAX * 16; * essence, they are percents: the higher the value, the more number * unsuccessful reclaims there were. */ +#ifdef CONFIG_CACHY +static const unsigned int vmpressure_level_med = 65; +#else static const unsigned int vmpressure_level_med = 60; +#endif static const unsigned int vmpressure_level_critical = 95; /* diff --git a/mm/vmscan.c b/mm/vmscan.c index 3783e45bfc92e2..8e6fa42ad584fd 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -200,7 +200,11 @@ struct scan_control { /* * From 0 .. MAX_SWAPPINESS. Higher means more swappy. */ +#ifdef CONFIG_CACHY +int vm_swappiness = 100; +#else int vm_swappiness = 60; +#endif #ifdef CONFIG_MEMCG From d10747b42dd732a65f1b3dac312819883e3e5b45 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Thu, 17 Aug 2023 13:53:54 +0200 Subject: [PATCH 2840/2924] Cachy: Restore -O3 Optimization Level Signed-off-by: Peter Jung --- Makefile | 3 +++ init/Kconfig | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/Makefile b/Makefile index 01ddb4eb3659f4..b5851009d45693 100644 --- a/Makefile +++ b/Makefile @@ -857,6 +857,9 @@ KBUILD_CFLAGS += -fno-delete-null-pointer-checks ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE KBUILD_CFLAGS += -O2 KBUILD_RUSTFLAGS += -Copt-level=2 +else ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3 +KBUILD_CFLAGS += -O3 +KBUILD_RUSTFLAGS += -Copt-level=3 else ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE KBUILD_CFLAGS += -Os KBUILD_RUSTFLAGS += -Copt-level=s diff --git a/init/Kconfig b/init/Kconfig index 7c7f471304416b..c524858118d27e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1499,6 +1499,12 @@ config CC_OPTIMIZE_FOR_PERFORMANCE with the "-O2" compiler flag for best performance and most helpful compile-time warnings. +config CC_OPTIMIZE_FOR_PERFORMANCE_O3 + bool "Optimize more for performance (-O3)" + help + Choosing this option will pass "-O3" to your compiler to optimize + the kernel yet more for performance. + config CC_OPTIMIZE_FOR_SIZE bool "Optimize for size (-Os)" help From 3fa418775dd60bd653c81f2f8c0e62d3fe7b6917 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Tue, 24 Sep 2024 19:35:31 +0200 Subject: [PATCH 2841/2924] Cachy: Add GCC SMS-based modulo scheduling Signed-off-by: Peter Jung --- Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Makefile b/Makefile index b5851009d45693..2d9d46275c4bf9 100644 --- a/Makefile +++ b/Makefile @@ -865,6 +865,11 @@ KBUILD_CFLAGS += -Os KBUILD_RUSTFLAGS += -Copt-level=s endif +# Perform swing modulo scheduling immediately before the first scheduling pass. +# This pass looks at innermost loops and reorders their instructions by +# overlapping different iterations. +KBUILD_CFLAGS += $(call cc-option,-fmodulo-sched -fmodulo-sched-allow-regmoves -fivopts -fmodulo-sched) + # Always set `debug-assertions` and `overflow-checks` because their default # depends on `opt-level` and `debug-assertions`, respectively. KBUILD_RUSTFLAGS += -Cdebug-assertions=$(if $(CONFIG_RUST_DEBUG_ASSERTIONS),y,n) From 4866800d2a3f7d5d8568ea79212283c2cb2ad92a Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Mon, 6 Mar 2023 18:34:25 +0100 Subject: [PATCH 2842/2924] Cachy: Add ACS override support Source: https://gitlab.com/Queuecumber/linux-acs-override/-/raw/master/workspaces/5.10.4/acso.patch Signed-off-by: Peter Jung --- .../admin-guide/kernel-parameters.txt | 9 ++ drivers/pci/quirks.c | 101 ++++++++++++++++++ 2 files changed, 110 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 8f75ec17739944..d7bee42b709043 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4689,6 +4689,15 @@ nomsi [MSI] If the PCI_MSI kernel config parameter is enabled, this kernel boot option can be used to disable the use of MSI interrupts system-wide. + pcie_acs_override = + [PCIE] Override missing PCIe ACS support for: + downstream + All downstream ports - full ACS capabilities + multfunction + All multifunction devices - multifunction ACS subset + id:nnnn:nnnn + Specfic device - full ACS capabilities + Specified as vid:did (vendor/device ID) in hex noioapicquirk [APIC] Disable all boot interrupt quirks. Safety option to keep boot IRQs enabled. This should never be necessary. diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 94daca15a096f7..4822a23dd357d4 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3747,6 +3747,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev) dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET; } +static bool acs_on_downstream; +static bool acs_on_multifunction; + +#define NUM_ACS_IDS 16 +struct acs_on_id { + unsigned short vendor; + unsigned short device; +}; +static struct acs_on_id acs_on_ids[NUM_ACS_IDS]; +static u8 max_acs_id; + +static __init int pcie_acs_override_setup(char *p) +{ + if (!p) + return -EINVAL; + + while (*p) { + if (!strncmp(p, "downstream", 10)) + acs_on_downstream = true; + if (!strncmp(p, "multifunction", 13)) + acs_on_multifunction = true; + if (!strncmp(p, "id:", 3)) { + char opt[5]; + int ret; + long val; + + if (max_acs_id >= NUM_ACS_IDS - 1) { + pr_warn("Out of PCIe ACS override slots (%d)\n", + NUM_ACS_IDS); + goto next; + } + + p += 3; + snprintf(opt, 5, "%s", p); + ret = kstrtol(opt, 16, &val); + if (ret) { + pr_warn("PCIe ACS ID parse error %d\n", ret); + goto next; + } + acs_on_ids[max_acs_id].vendor = val; + + p += strcspn(p, ":"); + if (*p != ':') { + pr_warn("PCIe ACS invalid ID\n"); + goto next; + } + + p++; + snprintf(opt, 5, "%s", p); + ret = kstrtol(opt, 16, &val); + if (ret) { + pr_warn("PCIe ACS ID parse error %d\n", ret); + goto next; + } + acs_on_ids[max_acs_id].device = val; + max_acs_id++; + } +next: + p += strcspn(p, ","); + if (*p == ',') + p++; + } + + if (acs_on_downstream || acs_on_multifunction || max_acs_id) + pr_warn("Warning: PCIe ACS overrides enabled; This may allow non-IOMMU protected peer-to-peer DMA\n"); + + return 0; +} +early_param("pcie_acs_override", pcie_acs_override_setup); + +static int pcie_acs_overrides(struct pci_dev *dev, u16 acs_flags) +{ + int i; + + /* Never override ACS for legacy devices or devices with ACS caps */ + if (!pci_is_pcie(dev) || + pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS)) + return -ENOTTY; + + for (i = 0; i < max_acs_id; i++) + if (acs_on_ids[i].vendor == dev->vendor && + acs_on_ids[i].device == dev->device) + return 1; + + switch (pci_pcie_type(dev)) { + case PCI_EXP_TYPE_DOWNSTREAM: + case PCI_EXP_TYPE_ROOT_PORT: + if (acs_on_downstream) + return 1; + break; + case PCI_EXP_TYPE_ENDPOINT: + case PCI_EXP_TYPE_UPSTREAM: + case PCI_EXP_TYPE_LEG_END: + case PCI_EXP_TYPE_RC_END: + if (acs_on_multifunction && dev->multifunction) + return 1; + } + + return -ENOTTY; +} /* * Some NVIDIA GPU devices do not work with bus reset, SBR needs to be * prevented for those affected devices. @@ -5171,6 +5271,7 @@ static const struct pci_dev_acs_enabled { { PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs }, /* Wangxun nics */ { PCI_VENDOR_ID_WANGXUN, PCI_ANY_ID, pci_quirk_wangxun_nic_acs }, + { PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides }, { 0 } }; From 8c86182c7a98b588bc56ff8a1e2880c2e49ec495 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Thu, 29 Dec 2022 12:12:32 +0100 Subject: [PATCH 2843/2924] Cachy: mm: Disable unevictable compaction Signed-off-by: Peter Jung --- mm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/Kconfig b/mm/Kconfig index e113f713b49387..0f76cc26473d58 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -654,7 +654,7 @@ config COMPACTION config COMPACT_UNEVICTABLE_DEFAULT int depends on COMPACTION - default 0 if PREEMPT_RT + default 0 if PREEMPT_RT || CACHY default 1 # From fbcc28e5065be4420ea6b923df2fb19fedae13d9 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Tue, 4 Jun 2019 14:51:21 +0800 Subject: [PATCH 2844/2924] Cachy: PCI: Add Intel remapped NVMe device support Contains: - PCI: Add Intel remapped NVMe device support Consumer products that are configured by default to run the Intel SATA AHCI controller in "RAID" or "Intel RST Premium With Intel Optane System Acceleration" mode are becoming increasingly prevalent. Unde this mode, NVMe devices are remapped into the SATA device and become hidden from the PCI bus, which means that Linux users cannot access their storage devices unless they go into the firmware setup menu to revert back to AHCI mode - assuming such option is available. Lack of support for this mode is also causing complications for vendors who distribute Linux. Add support for the remapped NVMe mode by creating a virtual PCI bus, where the AHCI and NVMe devices are presented separately, allowing the ahci and nvme drivers to bind in the normal way. Unfortunately the NVMe device configuration space is inaccesible under this scheme, so we provide a fake one, and hope that no DeviceID-based quirks are needed. The interrupt is shared between the AHCI and NVMe devices. Allow pci_real_dma_dev() to traverse back to the real DMA device from the PCI devices created on our virtual bus, in case the iommu driver will be involved with data transfers here. The existing ahci driver is modified to not claim devices where remapped NVMe devices are present, allowing this new driver to step in. The details of the remapping scheme came from patches previously posted by Dan Williams and the resulting discussion. https://phabricator.endlessm.com/T24358 https://phabricator.endlessm.com/T29119 Signed-off-by: Daniel Drake - PCI: Fix order of remapped NVMe devices --- arch/x86/include/asm/pci.h | 6 + arch/x86/pci/common.c | 7 +- drivers/ata/ahci.c | 23 +- drivers/pci/controller/Makefile | 6 + drivers/pci/controller/intel-nvme-remap.c | 462 ++++++++++++++++++++++ 5 files changed, 488 insertions(+), 16 deletions(-) create mode 100644 drivers/pci/controller/intel-nvme-remap.c diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index b3ab80a03365cf..5e883b397ff3f2 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -26,6 +26,7 @@ struct pci_sysdata { #if IS_ENABLED(CONFIG_VMD) struct pci_dev *vmd_dev; /* VMD Device if in Intel VMD domain */ #endif + struct pci_dev *nvme_remap_dev; /* AHCI Device if NVME remapped bus */ }; extern int pci_routeirq; @@ -69,6 +70,11 @@ static inline bool is_vmd(struct pci_bus *bus) #define is_vmd(bus) false #endif /* CONFIG_VMD */ +static inline bool is_nvme_remap(struct pci_bus *bus) +{ + return to_pci_sysdata(bus)->nvme_remap_dev != NULL; +} + /* Can be used to override the logic in pci_scan_bus for skipping already-configured bus numbers - to be used for buggy BIOSes or architectures with incomplete PCI setup by the loader */ diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index ddb798603201ef..7c20387d82029a 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -723,12 +723,15 @@ int pci_ext_cfg_avail(void) return 0; } -#if IS_ENABLED(CONFIG_VMD) struct pci_dev *pci_real_dma_dev(struct pci_dev *dev) { +#if IS_ENABLED(CONFIG_VMD) if (is_vmd(dev->bus)) return to_pci_sysdata(dev->bus)->vmd_dev; +#endif + + if (is_nvme_remap(dev->bus)) + return to_pci_sysdata(dev->bus)->nvme_remap_dev; return dev; } -#endif diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 163ac909bd0689..83a1e8b5443c9e 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1629,7 +1629,7 @@ static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance) } #endif -static void ahci_remap_check(struct pci_dev *pdev, int bar, +static int ahci_remap_check(struct pci_dev *pdev, int bar, struct ahci_host_priv *hpriv) { int i; @@ -1642,7 +1642,7 @@ static void ahci_remap_check(struct pci_dev *pdev, int bar, pci_resource_len(pdev, bar) < SZ_512K || bar != AHCI_PCI_BAR_STANDARD || !(readl(hpriv->mmio + AHCI_VSCAP) & 1)) - return; + return 0; cap = readq(hpriv->mmio + AHCI_REMAP_CAP); for (i = 0; i < AHCI_MAX_REMAP; i++) { @@ -1657,18 +1657,11 @@ static void ahci_remap_check(struct pci_dev *pdev, int bar, } if (!hpriv->remapped_nvme) - return; - - dev_warn(&pdev->dev, "Found %u remapped NVMe devices.\n", - hpriv->remapped_nvme); - dev_warn(&pdev->dev, - "Switch your BIOS from RAID to AHCI mode to use them.\n"); + return 0; - /* - * Don't rely on the msi-x capability in the remap case, - * share the legacy interrupt across ahci and remapped devices. - */ - hpriv->flags |= AHCI_HFLAG_NO_MSI; + /* Abort probe, allowing intel-nvme-remap to step in when available */ + dev_info(&pdev->dev, "Device will be handled by intel-nvme-remap.\n"); + return -ENODEV; } static int ahci_get_irq_vector(struct ata_host *host, int port) @@ -1912,7 +1905,9 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return -ENOMEM; /* detect remapped nvme devices */ - ahci_remap_check(pdev, ahci_pci_bar, hpriv); + rc = ahci_remap_check(pdev, ahci_pci_bar, hpriv); + if (rc) + return rc; sysfs_add_file_to_group(&pdev->dev.kobj, &dev_attr_remapped_nvme.attr, diff --git a/drivers/pci/controller/Makefile b/drivers/pci/controller/Makefile index 038ccbd9e3ba23..de5e4f5145af8d 100644 --- a/drivers/pci/controller/Makefile +++ b/drivers/pci/controller/Makefile @@ -1,4 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 +ifdef CONFIG_X86_64 +ifdef CONFIG_SATA_AHCI +obj-y += intel-nvme-remap.o +endif +endif + obj-$(CONFIG_PCIE_CADENCE) += cadence/ obj-$(CONFIG_PCI_FTPCI100) += pci-ftpci100.o obj-$(CONFIG_PCI_IXP4XX) += pci-ixp4xx.o diff --git a/drivers/pci/controller/intel-nvme-remap.c b/drivers/pci/controller/intel-nvme-remap.c new file mode 100644 index 00000000000000..e105e6f5cc91d1 --- /dev/null +++ b/drivers/pci/controller/intel-nvme-remap.c @@ -0,0 +1,462 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel remapped NVMe device support. + * + * Copyright (c) 2019 Endless Mobile, Inc. + * Author: Daniel Drake + * + * Some products ship by default with the SATA controller in "RAID" or + * "Intel RST Premium With Intel Optane System Acceleration" mode. Under this + * mode, which we refer to as "remapped NVMe" mode, any installed NVMe + * devices disappear from the PCI bus, and instead their I/O memory becomes + * available within the AHCI device BARs. + * + * This scheme is understood to be a way of avoiding usage of the standard + * Windows NVMe driver under that OS, instead mandating usage of Intel's + * driver instead, which has better power management, and presumably offers + * some RAID/disk-caching solutions too. + * + * Here in this driver, we support the remapped NVMe mode by claiming the + * AHCI device and creating a fake PCIe root port. On the new bus, the + * original AHCI device is exposed with only minor tweaks. Then, fake PCI + * devices corresponding to the remapped NVMe devices are created. The usual + * ahci and nvme drivers are then expected to bind to these devices and + * operate as normal. + * + * The PCI configuration space for the NVMe devices is completely + * unavailable, so we fake a minimal one and hope for the best. + * + * Interrupts are shared between the AHCI and NVMe devices. For simplicity, + * we only support the legacy interrupt here, although MSI support + * could potentially be added later. + */ + +#define MODULE_NAME "intel-nvme-remap" + +#include +#include +#include +#include +#include + +#define AHCI_PCI_BAR_STANDARD 5 + +struct nvme_remap_dev { + struct pci_dev *dev; /* AHCI device */ + struct pci_bus *bus; /* our fake PCI bus */ + struct pci_sysdata sysdata; + int irq_base; /* our fake interrupts */ + + /* + * When we detect an all-ones write to a BAR register, this flag + * is set, so that we return the BAR size on the next read (a + * standard PCI behaviour). + * This includes the assumption that an all-ones BAR write is + * immediately followed by a read of the same register. + */ + bool bar_sizing; + + /* + * Resources copied from the AHCI device, to be regarded as + * resources on our fake bus. + */ + struct resource ahci_resources[PCI_NUM_RESOURCES]; + + /* Resources corresponding to the NVMe devices. */ + struct resource remapped_dev_mem[AHCI_MAX_REMAP]; + + /* Number of remapped NVMe devices found. */ + int num_remapped_devices; +}; + +static inline struct nvme_remap_dev *nrdev_from_bus(struct pci_bus *bus) +{ + return container_of(bus->sysdata, struct nvme_remap_dev, sysdata); +} + + +/******** PCI configuration space **********/ + +/* + * Helper macros for tweaking returned contents of PCI configuration space. + * + * value contains len bytes of data read from reg. + * If fixup_reg is included in that range, fix up the contents of that + * register to fixed_value. + */ +#define NR_FIX8(fixup_reg, fixed_value) do { \ + if (reg <= fixup_reg && fixup_reg < reg + len) \ + ((u8 *) value)[fixup_reg - reg] = (u8) (fixed_value); \ + } while (0) + +#define NR_FIX16(fixup_reg, fixed_value) do { \ + NR_FIX8(fixup_reg, fixed_value); \ + NR_FIX8(fixup_reg + 1, fixed_value >> 8); \ + } while (0) + +#define NR_FIX24(fixup_reg, fixed_value) do { \ + NR_FIX8(fixup_reg, fixed_value); \ + NR_FIX8(fixup_reg + 1, fixed_value >> 8); \ + NR_FIX8(fixup_reg + 2, fixed_value >> 16); \ + } while (0) + +#define NR_FIX32(fixup_reg, fixed_value) do { \ + NR_FIX16(fixup_reg, (u16) fixed_value); \ + NR_FIX16(fixup_reg + 2, fixed_value >> 16); \ + } while (0) + +/* + * Read PCI config space of the slot 0 (AHCI) device. + * We pass through the read request to the underlying device, but + * tweak the results in some cases. + */ +static int nvme_remap_pci_read_slot0(struct pci_bus *bus, int reg, + int len, u32 *value) +{ + struct nvme_remap_dev *nrdev = nrdev_from_bus(bus); + struct pci_bus *ahci_dev_bus = nrdev->dev->bus; + int ret; + + ret = ahci_dev_bus->ops->read(ahci_dev_bus, nrdev->dev->devfn, + reg, len, value); + if (ret) + return ret; + + /* + * Adjust the device class, to prevent this driver from attempting to + * additionally probe the device we're simulating here. + */ + NR_FIX24(PCI_CLASS_PROG, PCI_CLASS_STORAGE_SATA_AHCI); + + /* + * Unset interrupt pin, otherwise ACPI tries to find routing + * info for our virtual IRQ, fails, and complains. + */ + NR_FIX8(PCI_INTERRUPT_PIN, 0); + + /* + * Truncate the AHCI BAR to not include the region that covers the + * hidden devices. This will cause the ahci driver to successfully + * probe th new device (instead of handing it over to this driver). + */ + if (nrdev->bar_sizing) { + NR_FIX32(PCI_BASE_ADDRESS_5, ~(SZ_16K - 1)); + nrdev->bar_sizing = false; + } + + return PCIBIOS_SUCCESSFUL; +} + +/* + * Read PCI config space of a remapped device. + * Since the original PCI config space is inaccessible, we provide a minimal, + * fake config space instead. + */ +static int nvme_remap_pci_read_remapped(struct pci_bus *bus, unsigned int port, + int reg, int len, u32 *value) +{ + struct nvme_remap_dev *nrdev = nrdev_from_bus(bus); + struct resource *remapped_mem; + + if (port > nrdev->num_remapped_devices) + return PCIBIOS_DEVICE_NOT_FOUND; + + *value = 0; + remapped_mem = &nrdev->remapped_dev_mem[port - 1]; + + /* Set a Vendor ID, otherwise Linux assumes no device is present */ + NR_FIX16(PCI_VENDOR_ID, PCI_VENDOR_ID_INTEL); + + /* Always appear on & bus mastering */ + NR_FIX16(PCI_COMMAND, PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); + + /* Set class so that nvme driver probes us */ + NR_FIX24(PCI_CLASS_PROG, PCI_CLASS_STORAGE_EXPRESS); + + if (nrdev->bar_sizing) { + NR_FIX32(PCI_BASE_ADDRESS_0, + ~(resource_size(remapped_mem) - 1)); + nrdev->bar_sizing = false; + } else { + resource_size_t mem_start = remapped_mem->start; + + mem_start |= PCI_BASE_ADDRESS_MEM_TYPE_64; + NR_FIX32(PCI_BASE_ADDRESS_0, mem_start); + mem_start >>= 32; + NR_FIX32(PCI_BASE_ADDRESS_1, mem_start); + } + + return PCIBIOS_SUCCESSFUL; +} + +/* Read PCI configuration space. */ +static int nvme_remap_pci_read(struct pci_bus *bus, unsigned int devfn, + int reg, int len, u32 *value) +{ + if (PCI_SLOT(devfn) == 0) + return nvme_remap_pci_read_slot0(bus, reg, len, value); + else + return nvme_remap_pci_read_remapped(bus, PCI_SLOT(devfn), + reg, len, value); +} + +/* + * Write PCI config space of the slot 0 (AHCI) device. + * Apart from the special case of BAR sizing, we disable all writes. + * Otherwise, the ahci driver could make changes (e.g. unset PCI bus master) + * that would affect the operation of the NVMe devices. + */ +static int nvme_remap_pci_write_slot0(struct pci_bus *bus, int reg, + int len, u32 value) +{ + struct nvme_remap_dev *nrdev = nrdev_from_bus(bus); + struct pci_bus *ahci_dev_bus = nrdev->dev->bus; + + if (reg >= PCI_BASE_ADDRESS_0 && reg <= PCI_BASE_ADDRESS_5) { + /* + * Writing all-ones to a BAR means that the size of the + * memory region is being checked. Flag this so that we can + * reply with an appropriate size on the next read. + */ + if (value == ~0) + nrdev->bar_sizing = true; + + return ahci_dev_bus->ops->write(ahci_dev_bus, + nrdev->dev->devfn, + reg, len, value); + } + + return PCIBIOS_SET_FAILED; +} + +/* + * Write PCI config space of a remapped device. + * Since the original PCI config space is inaccessible, we reject all + * writes, except for the special case of BAR probing. + */ +static int nvme_remap_pci_write_remapped(struct pci_bus *bus, + unsigned int port, + int reg, int len, u32 value) +{ + struct nvme_remap_dev *nrdev = nrdev_from_bus(bus); + + if (port > nrdev->num_remapped_devices) + return PCIBIOS_DEVICE_NOT_FOUND; + + /* + * Writing all-ones to a BAR means that the size of the memory + * region is being checked. Flag this so that we can reply with + * an appropriate size on the next read. + */ + if (value == ~0 && reg >= PCI_BASE_ADDRESS_0 + && reg <= PCI_BASE_ADDRESS_5) { + nrdev->bar_sizing = true; + return PCIBIOS_SUCCESSFUL; + } + + return PCIBIOS_SET_FAILED; +} + +/* Write PCI configuration space. */ +static int nvme_remap_pci_write(struct pci_bus *bus, unsigned int devfn, + int reg, int len, u32 value) +{ + if (PCI_SLOT(devfn) == 0) + return nvme_remap_pci_write_slot0(bus, reg, len, value); + else + return nvme_remap_pci_write_remapped(bus, PCI_SLOT(devfn), + reg, len, value); +} + +static struct pci_ops nvme_remap_pci_ops = { + .read = nvme_remap_pci_read, + .write = nvme_remap_pci_write, +}; + + +/******** Initialization & exit **********/ + +/* + * Find a PCI domain ID to use for our fake bus. + * Start at 0x10000 to not clash with ACPI _SEG domains (16 bits). + */ +static int find_free_domain(void) +{ + int domain = 0xffff; + struct pci_bus *bus = NULL; + + while ((bus = pci_find_next_bus(bus)) != NULL) + domain = max_t(int, domain, pci_domain_nr(bus)); + + return domain + 1; +} + +static int find_remapped_devices(struct nvme_remap_dev *nrdev, + struct list_head *resources) +{ + void __iomem *mmio; + int i, count = 0; + u32 cap; + + mmio = pcim_iomap(nrdev->dev, AHCI_PCI_BAR_STANDARD, + pci_resource_len(nrdev->dev, + AHCI_PCI_BAR_STANDARD)); + if (!mmio) + return -ENODEV; + + /* Check if this device might have remapped nvme devices. */ + if (pci_resource_len(nrdev->dev, AHCI_PCI_BAR_STANDARD) < SZ_512K || + !(readl(mmio + AHCI_VSCAP) & 1)) + return -ENODEV; + + cap = readq(mmio + AHCI_REMAP_CAP); + for (i = AHCI_MAX_REMAP-1; i >= 0; i--) { + struct resource *remapped_mem; + + if ((cap & (1 << i)) == 0) + continue; + if (readl(mmio + ahci_remap_dcc(i)) + != PCI_CLASS_STORAGE_EXPRESS) + continue; + + /* We've found a remapped device */ + remapped_mem = &nrdev->remapped_dev_mem[count++]; + remapped_mem->start = + pci_resource_start(nrdev->dev, AHCI_PCI_BAR_STANDARD) + + ahci_remap_base(i); + remapped_mem->end = remapped_mem->start + + AHCI_REMAP_N_SIZE - 1; + remapped_mem->flags = IORESOURCE_MEM | IORESOURCE_PCI_FIXED; + pci_add_resource(resources, remapped_mem); + } + + pcim_iounmap(nrdev->dev, mmio); + + if (count == 0) + return -ENODEV; + + nrdev->num_remapped_devices = count; + dev_info(&nrdev->dev->dev, "Found %d remapped NVMe devices\n", + nrdev->num_remapped_devices); + return 0; +} + +static void nvme_remap_remove_root_bus(void *data) +{ + struct pci_bus *bus = data; + + pci_stop_root_bus(bus); + pci_remove_root_bus(bus); +} + +static int nvme_remap_probe(struct pci_dev *dev, + const struct pci_device_id *id) +{ + struct nvme_remap_dev *nrdev; + LIST_HEAD(resources); + int i; + int ret; + struct pci_dev *child; + + nrdev = devm_kzalloc(&dev->dev, sizeof(*nrdev), GFP_KERNEL); + nrdev->sysdata.domain = find_free_domain(); + nrdev->sysdata.nvme_remap_dev = dev; + nrdev->dev = dev; + pci_set_drvdata(dev, nrdev); + + ret = pcim_enable_device(dev); + if (ret < 0) + return ret; + + pci_set_master(dev); + + ret = find_remapped_devices(nrdev, &resources); + if (ret) + return ret; + + /* Add resources from the original AHCI device */ + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *res = &dev->resource[i]; + + if (res->start) { + struct resource *nr_res = &nrdev->ahci_resources[i]; + + nr_res->start = res->start; + nr_res->end = res->end; + nr_res->flags = res->flags; + pci_add_resource(&resources, nr_res); + } + } + + /* Create virtual interrupts */ + nrdev->irq_base = devm_irq_alloc_descs(&dev->dev, -1, 0, + nrdev->num_remapped_devices + 1, + 0); + if (nrdev->irq_base < 0) + return nrdev->irq_base; + + /* Create and populate PCI bus */ + nrdev->bus = pci_create_root_bus(&dev->dev, 0, &nvme_remap_pci_ops, + &nrdev->sysdata, &resources); + if (!nrdev->bus) + return -ENODEV; + + if (devm_add_action_or_reset(&dev->dev, nvme_remap_remove_root_bus, + nrdev->bus)) + return -ENOMEM; + + /* We don't support sharing MSI interrupts between these devices */ + nrdev->bus->bus_flags |= PCI_BUS_FLAGS_NO_MSI; + + pci_scan_child_bus(nrdev->bus); + + list_for_each_entry(child, &nrdev->bus->devices, bus_list) { + /* + * Prevent PCI core from trying to move memory BARs around. + * The hidden NVMe devices are at fixed locations. + */ + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *res = &child->resource[i]; + + if (res->flags & IORESOURCE_MEM) + res->flags |= IORESOURCE_PCI_FIXED; + } + + /* Share the legacy IRQ between all devices */ + child->irq = dev->irq; + } + + pci_assign_unassigned_bus_resources(nrdev->bus); + pci_bus_add_devices(nrdev->bus); + + return 0; +} + +static const struct pci_device_id nvme_remap_ids[] = { + /* + * Match all Intel RAID controllers. + * + * There's overlap here with the set of devices detected by the ahci + * driver, but ahci will only successfully probe when there + * *aren't* any remapped NVMe devices, and this driver will only + * successfully probe when there *are* remapped NVMe devices that + * need handling. + */ + { + PCI_VDEVICE(INTEL, PCI_ANY_ID), + .class = PCI_CLASS_STORAGE_RAID << 8, + .class_mask = 0xffffff00, + }, + {0,} +}; +MODULE_DEVICE_TABLE(pci, nvme_remap_ids); + +static struct pci_driver nvme_remap_drv = { + .name = MODULE_NAME, + .id_table = nvme_remap_ids, + .probe = nvme_remap_probe, +}; +module_pci_driver(nvme_remap_drv); + +MODULE_AUTHOR("Daniel Drake "); +MODULE_LICENSE("GPL v2"); From f2455e345fbd6eff788dfa1b9e4cfb8a370550b0 Mon Sep 17 00:00:00 2001 From: Steven Barrett Date: Mon, 11 Jul 2022 19:10:30 -0500 Subject: [PATCH 2845/2924] Cachy: cpufreq: Remove schedutil dependency on Intel/AMD P-State drivers Although both P-State drivers depend on schedutil in Kconfig, both code bases do not use any schedutil code. This arbitrarily enables schedutil when unwanted in some configurations. --- drivers/cpufreq/Kconfig.x86 | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index 2c5c228408bf28..918e2bebfe788f 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -9,7 +9,6 @@ config X86_INTEL_PSTATE select ACPI_PROCESSOR if ACPI select ACPI_CPPC_LIB if X86_64 && ACPI && SCHED_MC_PRIO select CPU_FREQ_GOV_PERFORMANCE - select CPU_FREQ_GOV_SCHEDUTIL if SMP help This driver provides a P state for Intel core processors. The driver implements an internal governor and will become @@ -39,7 +38,6 @@ config X86_AMD_PSTATE depends on X86 && ACPI select ACPI_PROCESSOR select ACPI_CPPC_LIB if X86_64 - select CPU_FREQ_GOV_SCHEDUTIL if SMP help This driver adds a CPUFreq driver which utilizes a fine grain processor performance frequency control range instead of legacy From a717f0f99bcabe6a5da29e5e605a3e660de1865f Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Mon, 22 Jan 2024 15:21:12 +0100 Subject: [PATCH 2846/2924] Cachy: Migrate Instead of increasing the number of tasks that migrate at once, migrate the amount acceptable for PREEMPT_RT, but reduce the cost so migrations occur more often. Signed-off-by: Peter Jung --- kernel/sched/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 47972f34ea7014..0892942cf913d4 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2790,7 +2790,7 @@ extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags); extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags); -#ifdef CONFIG_PREEMPT_RT +#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_CACHY) # define SCHED_NR_MIGRATE_BREAK 8 #else # define SCHED_NR_MIGRATE_BREAK 32 From c79800c89498fb10f194ae86a2a040b5eea9b229 Mon Sep 17 00:00:00 2001 From: Kenny Levinsen Date: Sun, 27 Dec 2020 14:43:13 +0000 Subject: [PATCH 2847/2924] Cachy: evdev - use call_rcu when detaching client Significant time was spent on synchronize_rcu in evdev_detach_client when applications closed evdev devices. Switching VT away from a graphical environment commonly leads to mass input device closures, which could lead to noticable delays on systems with many input devices. Replace synchronize_rcu with call_rcu, deferring reclaim of the evdev client struct till after the RCU grace period instead of blocking the calling application. While this does not solve all slow evdev fd closures, it takes care of a good portion of them, including this simple test: #include #include int main(int argc, char *argv[]) { int idx, fd; const char *path = "/dev/input/event0"; for (idx = 0; idx < 1000; idx++) { if ((fd = open(path, O_RDWR)) == -1) { return -1; } close(fd); } return 0; } Time to completion of above test when run locally: Before: 0m27.111s After: 0m0.018s Signed-off-by: Kenny Levinsen --- drivers/input/evdev.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index b5cbb57ee5f600..a0f7fa1518c660 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -46,6 +46,7 @@ struct evdev_client { struct fasync_struct *fasync; struct evdev *evdev; struct list_head node; + struct rcu_head rcu; enum input_clock_type clk_type; bool revoked; unsigned long *evmasks[EV_CNT]; @@ -368,13 +369,22 @@ static void evdev_attach_client(struct evdev *evdev, spin_unlock(&evdev->client_lock); } +static void evdev_reclaim_client(struct rcu_head *rp) +{ + struct evdev_client *client = container_of(rp, struct evdev_client, rcu); + unsigned int i; + for (i = 0; i < EV_CNT; ++i) + bitmap_free(client->evmasks[i]); + kvfree(client); +} + static void evdev_detach_client(struct evdev *evdev, struct evdev_client *client) { spin_lock(&evdev->client_lock); list_del_rcu(&client->node); spin_unlock(&evdev->client_lock); - synchronize_rcu(); + call_rcu(&client->rcu, evdev_reclaim_client); } static int evdev_open_device(struct evdev *evdev) @@ -427,7 +437,6 @@ static int evdev_release(struct inode *inode, struct file *file) { struct evdev_client *client = file->private_data; struct evdev *evdev = client->evdev; - unsigned int i; mutex_lock(&evdev->mutex); @@ -439,11 +448,6 @@ static int evdev_release(struct inode *inode, struct file *file) evdev_detach_client(evdev, client); - for (i = 0; i < EV_CNT; ++i) - bitmap_free(client->evmasks[i]); - - kvfree(client); - evdev_close_device(evdev); return 0; @@ -486,7 +490,6 @@ static int evdev_open(struct inode *inode, struct file *file) err_free_client: evdev_detach_client(evdev, client); - kvfree(client); return error; } From 9fb9d794ba3b93e1ca0a83be3ef1615450b15702 Mon Sep 17 00:00:00 2001 From: "Jan Alexander Steffens (heftig)" Date: Mon, 27 Jan 2020 18:21:09 +0100 Subject: [PATCH 2848/2924] Cachy: Enable background reclaim of hugepages Use [defer+madvise] as default khugepaged defrag strategy: For some reason, the default strategy to respond to THP fault fallbacks is still just madvise, meaning stall if the program wants transparent hugepages, but don't trigger a background reclaim / compaction if THP begins to fail allocations. This creates a snowball affect where we still use the THP code paths, but we almost always fail once a system has been active and busy for a while. The option "defer" was created for interactive systems where THP can still improve performance. If we have to fallback to a regular page due to an allocation failure or anything else, we will trigger a background reclaim and compaction so future THP attempts succeed and previous attempts eventually have their smaller pages combined without stalling running applications. We still want madvise to stall applications that explicitely want THP, so defer+madvise _does_ make a ton of sense. Make it the default for interactive systems, especially if the kernel maintainer left transparent hugepages on "always". Reasoning and details in the original patch: https://lwn.net/Articles/711248/ --- mm/huge_memory.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 47d76d03ce30b4..48615051a8cf16 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -64,7 +64,11 @@ unsigned long transparent_hugepage_flags __read_mostly = #ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE (1< Date: Sat, 28 Mar 2020 13:06:28 -0700 Subject: [PATCH 2849/2924] Cachy: Disable watermark boosting by default What watermark boosting does is preemptively fire up kswapd to free memory when there hasn't been an allocation failure. It does this by increasing kswapd's high watermark goal and then firing up kswapd. The reason why this causes freezes is because, with the increased high watermark goal, kswapd will steal memory from processes that need it in order to make forward progress. These processes will, in turn, try to allocate memory again, which will cause kswapd to steal necessary pages from those processes again, in a positive feedback loop known as page thrashing. When page thrashing occurs, your system is essentially livelocked until the necessary forward progress can be made to stop processes from trying to continuously allocate memory and trigger kswapd to steal it back. This problem already occurs with kswapd *without* watermark boosting, but it's usually only encountered on machines with a small amount of memory and/or a slow CPU. Watermark boosting just makes the existing problem worse enough to notice on higher spec'd machines. Disable watermark boosting by default since it's a total dumpster fire. I can't imagine why anyone would want to explicitly enable it, but the option is there in case someone does. Signed-off-by: Sultan Alsawaf --- mm/page_alloc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4f29e393f6af1c..4632022638a5ae 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -274,7 +274,11 @@ const char * const migratetype_names[MIGRATE_TYPES] = { int min_free_kbytes = 1024; int user_min_free_kbytes = -1; +#ifdef CONFIG_CACHY +static int watermark_boost_factor __read_mostly; +#else static int watermark_boost_factor __read_mostly = 15000; +#endif static int watermark_scale_factor = 10; int defrag_mode; From f618bb50814da55576d2cf35f0242e5ca70f50c3 Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Sat, 24 Oct 2020 22:17:49 -0700 Subject: [PATCH 2850/2924] Cachy: Disable proactive compaction by default On-demand compaction works fine assuming that you don't have a need to spam the page allocator nonstop for large order page allocations. Signed-off-by: Sultan Alsawaf --- mm/compaction.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/compaction.c b/mm/compaction.c index ca71fd3c31815c..2070b1e5106f92 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1923,7 +1923,11 @@ static int sysctl_compact_unevictable_allowed __read_mostly = CONFIG_COMPACT_UNE * aggressively the kernel should compact memory in the * background. It takes values in the range [0, 100]. */ +#ifdef CONFIG_CACHY +static unsigned int __read_mostly sysctl_compaction_proactiveness; +#else static unsigned int __read_mostly sysctl_compaction_proactiveness = 20; +#endif static int sysctl_extfrag_threshold = 500; static int __read_mostly sysctl_compact_memory; From 2ee6f425d3354e72660dcee51ce69b63b53adacb Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Sun, 28 Jul 2024 18:55:38 +0200 Subject: [PATCH 2851/2924] cachy: move AMD_PRIVATE_COLOR to Kconfig Co-authored-by: PedroHLC Signed-off-by: Peter Jung --- drivers/gpu/drm/amd/display/Kconfig | 6 ++++++ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c | 2 +- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 6 +++--- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 6 +++--- 5 files changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index abd3b6564373a4..46937e6fa78d43 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -56,4 +56,10 @@ config DRM_AMD_SECURE_DISPLAY This option enables the calculation of crc of specific region via debugfs. Cooperate with specific DMCU FW. +config AMD_PRIVATE_COLOR + bool "Enable KMS color management by AMD for AMD" + default n + help + This option extends the KMS color management API with AMD driver-specific properties to enhance the color management support on AMD Steam Deck. + endmenu diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0fe8bd19ecd13e..37b14f31cc4679 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4720,7 +4720,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) return r; } -#ifdef AMD_PRIVATE_COLOR +#ifdef CONFIG_AMD_PRIVATE_COLOR if (amdgpu_dm_create_color_properties(adev)) { dc_state_release(state->context); kfree(state); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index ebabfe3a512f49..4d3ebcaacca1ba 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -97,7 +97,7 @@ static inline struct fixed31_32 amdgpu_dm_fixpt_from_s3132(__u64 x) return val; } -#ifdef AMD_PRIVATE_COLOR +#ifdef CONFIG_AMD_PRIVATE_COLOR /* Pre-defined Transfer Functions (TF) * * AMD driver supports pre-defined mathematical functions for transferring diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index e8bdd7f0c46079..b4aafe6103bc47 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -481,7 +481,7 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc) } #endif -#ifdef AMD_PRIVATE_COLOR +#ifdef CONFIG_AMD_PRIVATE_COLOR /** * dm_crtc_additional_color_mgmt - enable additional color properties * @crtc: DRM CRTC @@ -563,7 +563,7 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { #if defined(CONFIG_DEBUG_FS) .late_register = amdgpu_dm_crtc_late_register, #endif -#ifdef AMD_PRIVATE_COLOR +#ifdef CONFIG_AMD_PRIVATE_COLOR .atomic_set_property = amdgpu_dm_atomic_crtc_set_property, .atomic_get_property = amdgpu_dm_atomic_crtc_get_property, #endif @@ -742,7 +742,7 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES); -#ifdef AMD_PRIVATE_COLOR +#ifdef CONFIG_AMD_PRIVATE_COLOR dm_crtc_additional_color_mgmt(&acrtc->base); #endif return 0; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 3e0f45f1711c1b..f645cb7831a0ae 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1601,7 +1601,7 @@ static void amdgpu_dm_plane_drm_plane_destroy_state(struct drm_plane *plane, drm_atomic_helper_plane_destroy_state(plane, state); } -#ifdef AMD_PRIVATE_COLOR +#ifdef CONFIG_AMD_PRIVATE_COLOR static void dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm, struct drm_plane *plane) @@ -1792,7 +1792,7 @@ static const struct drm_plane_funcs dm_plane_funcs = { .atomic_duplicate_state = amdgpu_dm_plane_drm_plane_duplicate_state, .atomic_destroy_state = amdgpu_dm_plane_drm_plane_destroy_state, .format_mod_supported = amdgpu_dm_plane_format_mod_supported, -#ifdef AMD_PRIVATE_COLOR +#ifdef CONFIG_AMD_PRIVATE_COLOR .atomic_set_property = dm_atomic_plane_set_property, .atomic_get_property = dm_atomic_plane_get_property, #endif @@ -1888,7 +1888,7 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, else drm_plane_helper_add(plane, &dm_plane_helper_funcs); -#ifdef AMD_PRIVATE_COLOR +#ifdef CONFIG_AMD_PRIVATE_COLOR dm_atomic_plane_attach_color_mgmt_properties(dm, plane); #endif /* Create (reset) the plane state */ From 04c6b26f8c4de3beb8ed1f45f06c8c8717f163a1 Mon Sep 17 00:00:00 2001 From: Steven Barrett Date: Fri, 15 Mar 2024 12:36:51 -0500 Subject: [PATCH 2852/2924] Cachy: drm/amdgpu/pm: Allow override of min_power_limit with ignore_min_pcap --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 10 ++++++++++ drivers/gpu/drm/amd/pm/amdgpu_pm.c | 3 +++ drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 14 ++++++++++++-- 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c3641331d4de73..ce1072fe492187 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -161,6 +161,7 @@ struct amdgpu_watchdog_timer { */ extern int amdgpu_modeset; extern unsigned int amdgpu_vram_limit; +extern int amdgpu_ignore_min_pcap; extern int amdgpu_vis_vram_limit; extern int amdgpu_gart_size; extern int amdgpu_gtt_size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 72c807f5822e34..6ef3fedc123321 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -143,6 +143,7 @@ enum AMDGPU_DEBUG_MASK { }; unsigned int amdgpu_vram_limit = UINT_MAX; +int amdgpu_ignore_min_pcap = 0; /* do not ignore by default */ int amdgpu_vis_vram_limit; int amdgpu_gart_size = -1; /* auto */ int amdgpu_gtt_size = -1; /* auto */ @@ -262,6 +263,15 @@ struct amdgpu_watchdog_timer amdgpu_watchdog_timer = { .period = 0x0, /* default to 0x0 (timeout disable) */ }; +/** + * DOC: ignore_min_pcap (int) + * Ignore the minimum power cap. + * Useful on graphics cards where the minimum power cap is very high. + * The default is 0 (Do not ignore). + */ +MODULE_PARM_DESC(ignore_min_pcap, "Ignore the minimum power cap"); +module_param_named(ignore_min_pcap, amdgpu_ignore_min_pcap, int, 0600); + /** * DOC: vramlimit (int) * Restrict the total amount of VRAM in MiB for testing. The default is 0 (Use full VRAM). diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 922def51685b0a..da76611edb107d 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -3055,6 +3055,9 @@ static ssize_t amdgpu_hwmon_show_power_cap_min(struct device *dev, struct device_attribute *attr, char *buf) { + if (amdgpu_ignore_min_pcap) + return sysfs_emit(buf, "%i\n", 0); + return amdgpu_hwmon_show_power_cap_generic(dev, attr, buf, PP_PWR_LIMIT_MIN); } diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 46cce1d2aaf357..e3adb7aea969d0 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2854,7 +2854,10 @@ int smu_get_power_limit(void *handle, *limit = smu->max_power_limit; break; case SMU_PPT_LIMIT_MIN: - *limit = smu->min_power_limit; + if (amdgpu_ignore_min_pcap) + *limit = 0; + else + *limit = smu->min_power_limit; break; default: return -EINVAL; @@ -2878,7 +2881,14 @@ static int smu_set_power_limit(void *handle, uint32_t limit) if (smu->ppt_funcs->set_power_limit) return smu->ppt_funcs->set_power_limit(smu, limit_type, limit); - if ((limit > smu->max_power_limit) || (limit < smu->min_power_limit)) { + if (amdgpu_ignore_min_pcap) { + if ((limit > smu->max_power_limit)) { + dev_err(smu->adev->dev, + "New power limit (%d) is over the max allowed %d\n", + limit, smu->max_power_limit); + return -EINVAL; + } + } else if ((limit > smu->max_power_limit) || (limit < smu->min_power_limit)) { dev_err(smu->adev->dev, "New power limit (%d) is out of range [%d,%d]\n", limit, smu->min_power_limit, smu->max_power_limit); From 410ff52906102769a3315b15053184b65ad5f017 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Thu, 4 Apr 2024 21:40:32 +0200 Subject: [PATCH 2853/2924] Cachy: v4l2-core: add v4l2loopback Signed-off-by: Peter Jung Signed-off-by: Eric Naim --- drivers/media/v4l2-core/Kconfig | 5 + drivers/media/v4l2-core/Makefile | 2 + drivers/media/v4l2-core/v4l2loopback.c | 3292 +++++++++++++++++ drivers/media/v4l2-core/v4l2loopback.h | 98 + .../media/v4l2-core/v4l2loopback_formats.h | 445 +++ 5 files changed, 3842 insertions(+) create mode 100644 drivers/media/v4l2-core/v4l2loopback.c create mode 100644 drivers/media/v4l2-core/v4l2loopback.h create mode 100644 drivers/media/v4l2-core/v4l2loopback_formats.h diff --git a/drivers/media/v4l2-core/Kconfig b/drivers/media/v4l2-core/Kconfig index 331b8e535e5bbf..80dabeebf58048 100644 --- a/drivers/media/v4l2-core/Kconfig +++ b/drivers/media/v4l2-core/Kconfig @@ -40,6 +40,11 @@ config VIDEO_TUNER config V4L2_JPEG_HELPER tristate +config V4L2_LOOPBACK + tristate "V4L2 loopback device" + help + V4L2 loopback device + # Used by drivers that need v4l2-h264.ko config V4L2_H264 tristate diff --git a/drivers/media/v4l2-core/Makefile b/drivers/media/v4l2-core/Makefile index 2177b9d63a8ffc..c179507cedc490 100644 --- a/drivers/media/v4l2-core/Makefile +++ b/drivers/media/v4l2-core/Makefile @@ -33,5 +33,7 @@ obj-$(CONFIG_V4L2_JPEG_HELPER) += v4l2-jpeg.o obj-$(CONFIG_V4L2_MEM2MEM_DEV) += v4l2-mem2mem.o obj-$(CONFIG_V4L2_VP9) += v4l2-vp9.o +obj-$(CONFIG_V4L2_LOOPBACK) += v4l2loopback.o + obj-$(CONFIG_VIDEO_TUNER) += tuner.o obj-$(CONFIG_VIDEO_DEV) += v4l2-dv-timings.o videodev.o diff --git a/drivers/media/v4l2-core/v4l2loopback.c b/drivers/media/v4l2-core/v4l2loopback.c new file mode 100644 index 00000000000000..468d47f4950b33 --- /dev/null +++ b/drivers/media/v4l2-core/v4l2loopback.c @@ -0,0 +1,3292 @@ +/* -*- c-file-style: "linux" -*- */ +/* + * v4l2loopback.c -- video4linux2 loopback driver + * + * Copyright (C) 2005-2009 Vasily Levin (vasaka@gmail.com) + * Copyright (C) 2010-2023 IOhannes m zmoelnig (zmoelnig@iem.at) + * Copyright (C) 2011 Stefan Diewald (stefan.diewald@mytum.de) + * Copyright (C) 2012 Anton Novikov (random.plant@gmail.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "v4l2loopback.h" + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) +#error This module is not supported on kernels before 4.0.0. +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) +#define strscpy strlcpy +#endif + +#if defined(timer_setup) && defined(from_timer) +#define HAVE_TIMER_SETUP +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 7, 0) +#define VFL_TYPE_VIDEO VFL_TYPE_GRABBER +#endif + +#define V4L2LOOPBACK_VERSION_CODE \ + KERNEL_VERSION(V4L2LOOPBACK_VERSION_MAJOR, V4L2LOOPBACK_VERSION_MINOR, \ + V4L2LOOPBACK_VERSION_BUGFIX) + +MODULE_DESCRIPTION("V4L2 loopback video device"); +MODULE_AUTHOR("Vasily Levin, " + "IOhannes m zmoelnig ," + "Stefan Diewald," + "Anton Novikov" + "et al."); +#ifdef SNAPSHOT_VERSION +MODULE_VERSION(__stringify(SNAPSHOT_VERSION)); +#else +MODULE_VERSION("" __stringify(V4L2LOOPBACK_VERSION_MAJOR) "." __stringify( + V4L2LOOPBACK_VERSION_MINOR) "." __stringify(V4L2LOOPBACK_VERSION_BUGFIX)); +#endif +MODULE_LICENSE("GPL"); + +/* + * helpers + */ +#define dprintk(fmt, args...) \ + do { \ + if (debug > 0) { \ + printk(KERN_INFO "v4l2-loopback[" __stringify( \ + __LINE__) "], pid(%d): " fmt, \ + task_pid_nr(current), ##args); \ + } \ + } while (0) + +#define MARK() \ + do { \ + if (debug > 1) { \ + printk(KERN_INFO "%s:%d[%s], pid(%d)\n", __FILE__, \ + __LINE__, __func__, task_pid_nr(current)); \ + } \ + } while (0) + +#define dprintkrw(fmt, args...) \ + do { \ + if (debug > 2) { \ + printk(KERN_INFO "v4l2-loopback[" __stringify( \ + __LINE__) "], pid(%d): " fmt, \ + task_pid_nr(current), ##args); \ + } \ + } while (0) + +static inline void v4l2l_get_timestamp(struct v4l2_buffer *b) +{ + struct timespec64 ts; + ktime_get_ts64(&ts); + + b->timestamp.tv_sec = ts.tv_sec; + b->timestamp.tv_usec = (ts.tv_nsec / NSEC_PER_USEC); + b->flags |= V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; + b->flags &= ~V4L2_BUF_FLAG_TIMESTAMP_COPY; +} + +#if BITS_PER_LONG == 32 +#include /* do_div() for 64bit division */ +static inline int v4l2l_mod64(const s64 A, const u32 B) +{ + u64 a = (u64)A; + u32 b = B; + + if (A > 0) + return do_div(a, b); + a = -A; + return -do_div(a, b); +} +#else +static inline int v4l2l_mod64(const s64 A, const u32 B) +{ + return A % B; +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 16, 0) +typedef unsigned __poll_t; +#endif + +/* module constants + * can be overridden during he build process using something like + * make KCPPFLAGS="-DMAX_DEVICES=100" + */ + +/* maximum number of v4l2loopback devices that can be created */ +#ifndef MAX_DEVICES +#define MAX_DEVICES 8 +#endif + +/* whether the default is to announce capabilities exclusively or not */ +#ifndef V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS +#define V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS 0 +#endif + +/* when a producer is considered to have gone stale */ +#ifndef MAX_TIMEOUT +#define MAX_TIMEOUT (100 * 1000) /* in msecs */ +#endif + +/* max buffers that can be mapped, actually they + * are all mapped to max_buffers buffers */ +#ifndef MAX_BUFFERS +#define MAX_BUFFERS 32 +#endif + +/* module parameters */ +static int debug = 0; +module_param(debug, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(debug, "debugging level (higher values == more verbose)"); + +#define V4L2LOOPBACK_DEFAULT_MAX_BUFFERS 2 +static int max_buffers = V4L2LOOPBACK_DEFAULT_MAX_BUFFERS; +module_param(max_buffers, int, S_IRUGO); +MODULE_PARM_DESC(max_buffers, + "how many buffers should be allocated [DEFAULT: " __stringify( + V4L2LOOPBACK_DEFAULT_MAX_BUFFERS) "]"); + +/* how many times a device can be opened + * the per-module default value can be overridden on a per-device basis using + * the /sys/devices interface + * + * note that max_openers should be at least 2 in order to get a working system: + * one opener for the producer and one opener for the consumer + * however, we leave that to the user + */ +#define V4L2LOOPBACK_DEFAULT_MAX_OPENERS 10 +static int max_openers = V4L2LOOPBACK_DEFAULT_MAX_OPENERS; +module_param(max_openers, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC( + max_openers, + "how many users can open the loopback device [DEFAULT: " __stringify( + V4L2LOOPBACK_DEFAULT_MAX_OPENERS) "]"); + +static int devices = -1; +module_param(devices, int, 0); +MODULE_PARM_DESC(devices, "how many devices should be created"); + +static int video_nr[MAX_DEVICES] = { [0 ...(MAX_DEVICES - 1)] = -1 }; +module_param_array(video_nr, int, NULL, 0444); +MODULE_PARM_DESC(video_nr, + "video device numbers (-1=auto, 0=/dev/video0, etc.)"); + +static char *card_label[MAX_DEVICES]; +module_param_array(card_label, charp, NULL, 0000); +MODULE_PARM_DESC(card_label, "card labels for each device"); + +static bool exclusive_caps[MAX_DEVICES] = { + [0 ...(MAX_DEVICES - 1)] = V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS +}; +module_param_array(exclusive_caps, bool, NULL, 0444); +/* FIXXME: wording */ +MODULE_PARM_DESC( + exclusive_caps, + "whether to announce OUTPUT/CAPTURE capabilities exclusively or not [DEFAULT: " __stringify( + V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS) "]"); + +/* format specifications */ +#define V4L2LOOPBACK_SIZE_MIN_WIDTH 2 +#define V4L2LOOPBACK_SIZE_MIN_HEIGHT 1 +#define V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH 8192 +#define V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT 8192 + +#define V4L2LOOPBACK_SIZE_DEFAULT_WIDTH 640 +#define V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT 480 + +static int max_width = V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH; +module_param(max_width, int, S_IRUGO); +MODULE_PARM_DESC(max_width, + "maximum allowed frame width [DEFAULT: " __stringify( + V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH) "]"); +static int max_height = V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT; +module_param(max_height, int, S_IRUGO); +MODULE_PARM_DESC(max_height, + "maximum allowed frame height [DEFAULT: " __stringify( + V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT) "]"); + +static DEFINE_IDR(v4l2loopback_index_idr); +static DEFINE_MUTEX(v4l2loopback_ctl_mutex); + +/* frame intervals */ +#define V4L2LOOPBACK_FRAME_INTERVAL_MAX __UINT32_MAX__ +#define V4L2LOOPBACK_FPS_DEFAULT 30 +#define V4L2LOOPBACK_FPS_MAX 1000 + +/* control IDs */ +#define V4L2LOOPBACK_CID_BASE (V4L2_CID_USER_BASE | 0xf000) +#define CID_KEEP_FORMAT (V4L2LOOPBACK_CID_BASE + 0) +#define CID_SUSTAIN_FRAMERATE (V4L2LOOPBACK_CID_BASE + 1) +#define CID_TIMEOUT (V4L2LOOPBACK_CID_BASE + 2) +#define CID_TIMEOUT_IMAGE_IO (V4L2LOOPBACK_CID_BASE + 3) + +static int v4l2loopback_s_ctrl(struct v4l2_ctrl *ctrl); +static const struct v4l2_ctrl_ops v4l2loopback_ctrl_ops = { + .s_ctrl = v4l2loopback_s_ctrl, +}; +static const struct v4l2_ctrl_config v4l2loopback_ctrl_keepformat = { + // clang-format off + .ops = &v4l2loopback_ctrl_ops, + .id = CID_KEEP_FORMAT, + .name = "keep_format", + .type = V4L2_CTRL_TYPE_BOOLEAN, + .min = 0, + .max = 1, + .step = 1, + .def = 0, + // clang-format on +}; +static const struct v4l2_ctrl_config v4l2loopback_ctrl_sustainframerate = { + // clang-format off + .ops = &v4l2loopback_ctrl_ops, + .id = CID_SUSTAIN_FRAMERATE, + .name = "sustain_framerate", + .type = V4L2_CTRL_TYPE_BOOLEAN, + .min = 0, + .max = 1, + .step = 1, + .def = 0, + // clang-format on +}; +static const struct v4l2_ctrl_config v4l2loopback_ctrl_timeout = { + // clang-format off + .ops = &v4l2loopback_ctrl_ops, + .id = CID_TIMEOUT, + .name = "timeout", + .type = V4L2_CTRL_TYPE_INTEGER, + .min = 0, + .max = MAX_TIMEOUT, + .step = 1, + .def = 0, + // clang-format on +}; +static const struct v4l2_ctrl_config v4l2loopback_ctrl_timeoutimageio = { + // clang-format off + .ops = &v4l2loopback_ctrl_ops, + .id = CID_TIMEOUT_IMAGE_IO, + .name = "timeout_image_io", + .type = V4L2_CTRL_TYPE_BUTTON, + .min = 0, + .max = 0, + .step = 0, + .def = 0, + // clang-format on +}; + +/* module structures */ +struct v4l2loopback_private { + int device_nr; +}; + +/* TODO(vasaka) use typenames which are common to kernel, but first find out if + * it is needed */ +/* struct keeping state and settings of loopback device */ + +struct v4l2l_buffer { + struct v4l2_buffer buffer; + struct list_head list_head; + atomic_t use_count; +}; + +struct v4l2_loopback_device { + struct v4l2_device v4l2_dev; + struct v4l2_ctrl_handler ctrl_handler; + struct video_device *vdev; + + /* loopback device-specific parameters */ + char card_label[32]; + bool announce_all_caps; /* announce both OUTPUT and CAPTURE capabilities + * when true; else announce OUTPUT when no + * writer is streaming, otherwise CAPTURE. */ + int max_openers; /* how many times can this device be opened */ + int min_width, max_width; + int min_height, max_height; + + /* pixel and stream format */ + struct v4l2_pix_format pix_format; + bool pix_format_has_valid_sizeimage; + struct v4l2_captureparm capture_param; + unsigned long frame_jiffies; + + /* ctrls */ + int keep_format; /* CID_KEEP_FORMAT; lock the format, do not free + * on close(), and when `!announce_all_caps` do NOT + * fall back to OUTPUT when no writers attached (clear + * `keep_format` to attach a new writer) */ + int sustain_framerate; /* CID_SUSTAIN_FRAMERATE; duplicate frames to maintain + (close to) nominal framerate */ + unsigned long timeout_jiffies; /* CID_TIMEOUT; 0 means disabled */ + int timeout_image_io; /* CID_TIMEOUT_IMAGE_IO; next opener will + * queue/dequeue the timeout image buffer */ + + /* buffers for OUTPUT and CAPTURE */ + u8 *image; /* pointer to actual buffers data */ + unsigned long image_size; /* number of bytes alloc'd for all buffers */ + struct v4l2l_buffer buffers[MAX_BUFFERS]; /* inner driver buffers */ + u32 buffer_count; /* should not be big, 4 is a good choice */ + u32 buffer_size; /* number of bytes alloc'd per buffer */ + u32 used_buffer_count; /* number of buffers allocated to openers */ + struct list_head outbufs_list; /* FIFO queue for OUTPUT buffers */ + u32 bufpos2index[MAX_BUFFERS]; /* mapping of `(position % used_buffers)` + * to `buffers[index]` */ + s64 write_position; /* sequence number of last 'displayed' buffer plus + * one */ + + /* synchronization between openers */ + atomic_t open_count; + struct mutex image_mutex; /* mutex for allocating image(s) and + * exchanging format tokens */ + spinlock_t lock; /* lock for the timeout and framerate timers */ + spinlock_t list_lock; /* lock for the OUTPUT buffer queue */ + wait_queue_head_t read_event; + u32 format_tokens; /* tokens to 'set format' for OUTPUT, CAPTURE, or + * timeout buffers */ + u32 stream_tokens; /* tokens to 'start' OUTPUT, CAPTURE, or timeout + * stream */ + + /* sustain framerate */ + struct timer_list sustain_timer; + unsigned int reread_count; + + /* timeout */ + u8 *timeout_image; /* copied to outgoing buffers when timeout passes */ + struct v4l2l_buffer timeout_buffer; + u32 timeout_buffer_size; /* number bytes alloc'd for timeout buffer */ + struct timer_list timeout_timer; + int timeout_happened; +}; + +enum v4l2l_io_method { + V4L2L_IO_NONE = 0, + V4L2L_IO_MMAP = 1, + V4L2L_IO_FILE = 2, + V4L2L_IO_TIMEOUT = 3, +}; + +/* struct keeping state and type of opener */ +struct v4l2_loopback_opener { + u32 format_token; /* token (if any) for type used in call to S_FMT or + * REQBUFS */ + u32 stream_token; /* token (if any) for type used in call to STREAMON */ + u32 buffer_count; /* number of buffers (if any) that opener acquired via + * REQBUFS */ + s64 read_position; /* sequence number of the next 'captured' frame */ + unsigned int reread_count; + enum v4l2l_io_method io_method; + + struct v4l2_fh fh; +}; + +#define fh_to_opener(ptr) container_of((ptr), struct v4l2_loopback_opener, fh) + +/* this is heavily inspired by the bttv driver found in the linux kernel */ +struct v4l2l_format { + char *name; + int fourcc; /* video4linux 2 */ + int depth; /* bit/pixel */ + int flags; +}; +/* set the v4l2l_format.flags to PLANAR for non-packed formats */ +#define FORMAT_FLAGS_PLANAR 0x01 +#define FORMAT_FLAGS_COMPRESSED 0x02 + +#include "v4l2loopback_formats.h" + +#ifndef V4L2_TYPE_IS_CAPTURE +#define V4L2_TYPE_IS_CAPTURE(type) \ + ((type) == V4L2_BUF_TYPE_VIDEO_CAPTURE || \ + (type) == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) +#endif /* V4L2_TYPE_IS_CAPTURE */ +#ifndef V4L2_TYPE_IS_OUTPUT +#define V4L2_TYPE_IS_OUTPUT(type) \ + ((type) == V4L2_BUF_TYPE_VIDEO_OUTPUT || \ + (type) == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) +#endif /* V4L2_TYPE_IS_OUTPUT */ + +/* token values for privilege to set format or start/stop stream */ +#define V4L2L_TOKEN_CAPTURE 0x01 +#define V4L2L_TOKEN_OUTPUT 0x02 +#define V4L2L_TOKEN_TIMEOUT 0x04 +#define V4L2L_TOKEN_MASK \ + (V4L2L_TOKEN_CAPTURE | V4L2L_TOKEN_OUTPUT | V4L2L_TOKEN_TIMEOUT) + +/* helpers for token exchange and token status */ +#define token_from_type(type) \ + (V4L2_TYPE_IS_CAPTURE(type) ? V4L2L_TOKEN_CAPTURE : V4L2L_TOKEN_OUTPUT) +#define acquire_token(dev, opener, label, token) \ + do { \ + (opener)->label##_token = token; \ + (dev)->label##_tokens &= ~token; \ + } while (0) +#define release_token(dev, opener, label) \ + do { \ + (dev)->label##_tokens |= (opener)->label##_token; \ + (opener)->label##_token = 0; \ + } while (0) +#define has_output_token(token) (token & V4L2L_TOKEN_OUTPUT) +#define has_capture_token(token) (token & V4L2L_TOKEN_CAPTURE) +#define has_no_owners(dev) ((~((dev)->format_tokens) & V4L2L_TOKEN_MASK) == 0) +#define has_other_owners(opener, dev) \ + (~((dev)->format_tokens ^ (opener)->format_token) & V4L2L_TOKEN_MASK) +#define need_timeout_buffer(dev, token) \ + ((dev)->timeout_jiffies > 0 || (token) & V4L2L_TOKEN_TIMEOUT) + +static const unsigned int FORMATS = ARRAY_SIZE(formats); + +static char *fourcc2str(unsigned int fourcc, char buf[5]) +{ + buf[0] = (fourcc >> 0) & 0xFF; + buf[1] = (fourcc >> 8) & 0xFF; + buf[2] = (fourcc >> 16) & 0xFF; + buf[3] = (fourcc >> 24) & 0xFF; + buf[4] = 0; + + return buf; +} + +static const struct v4l2l_format *format_by_fourcc(int fourcc) +{ + unsigned int i; + char buf[5]; + + for (i = 0; i < FORMATS; i++) { + if (formats[i].fourcc == fourcc) + return formats + i; + } + + dprintk("unsupported format '%4s'\n", fourcc2str(fourcc, buf)); + return NULL; +} + +static void pix_format_set_size(struct v4l2_pix_format *f, + const struct v4l2l_format *fmt, + unsigned int width, unsigned int height) +{ + f->width = width; + f->height = height; + + if (fmt->flags & FORMAT_FLAGS_PLANAR) { + f->bytesperline = width; /* Y plane */ + f->sizeimage = (width * height * fmt->depth) >> 3; + } else if (fmt->flags & FORMAT_FLAGS_COMPRESSED) { + /* doesn't make sense for compressed formats */ + f->bytesperline = 0; + f->sizeimage = (width * height * fmt->depth) >> 3; + } else { + f->bytesperline = (width * fmt->depth) >> 3; + f->sizeimage = height * f->bytesperline; + } +} + +static int v4l2l_fill_format(struct v4l2_format *fmt, const u32 minwidth, + const u32 maxwidth, const u32 minheight, + const u32 maxheight) +{ + u32 width = fmt->fmt.pix.width, height = fmt->fmt.pix.height; + u32 pixelformat = fmt->fmt.pix.pixelformat; + struct v4l2_format fmt0 = *fmt; + u32 bytesperline = 0, sizeimage = 0; + + if (!width) + width = V4L2LOOPBACK_SIZE_DEFAULT_WIDTH; + if (!height) + height = V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT; + width = clamp_val(width, minwidth, maxwidth); + height = clamp_val(height, minheight, maxheight); + + /* sets: width,height,pixelformat,bytesperline,sizeimage */ + if (!(V4L2_TYPE_IS_MULTIPLANAR(fmt0.type))) { + fmt0.fmt.pix.bytesperline = 0; + fmt0.fmt.pix.sizeimage = 0; + } + + if (0) { + ; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0) + } else if (!v4l2_fill_pixfmt(&fmt0.fmt.pix, pixelformat, width, + height)) { + ; + } else if (!v4l2_fill_pixfmt_mp(&fmt0.fmt.pix_mp, pixelformat, width, + height)) { + ; +#endif + } else { + const struct v4l2l_format *format = + format_by_fourcc(pixelformat); + if (!format) + return -EINVAL; + pix_format_set_size(&fmt0.fmt.pix, format, width, height); + fmt0.fmt.pix.pixelformat = format->fourcc; + } + + if (V4L2_TYPE_IS_MULTIPLANAR(fmt0.type)) { + *fmt = fmt0; + + if ((fmt->fmt.pix_mp.colorspace == V4L2_COLORSPACE_DEFAULT) || + (fmt->fmt.pix_mp.colorspace > V4L2_COLORSPACE_DCI_P3)) + fmt->fmt.pix_mp.colorspace = V4L2_COLORSPACE_SRGB; + if (V4L2_FIELD_ANY == fmt->fmt.pix_mp.field) + fmt->fmt.pix_mp.field = V4L2_FIELD_NONE; + } else { + bytesperline = fmt->fmt.pix.bytesperline; + sizeimage = fmt->fmt.pix.sizeimage; + + *fmt = fmt0; + + if (!fmt->fmt.pix.bytesperline) + fmt->fmt.pix.bytesperline = bytesperline; + if (!fmt->fmt.pix.sizeimage) + fmt->fmt.pix.sizeimage = sizeimage; + + if ((fmt->fmt.pix.colorspace == V4L2_COLORSPACE_DEFAULT) || + (fmt->fmt.pix.colorspace > V4L2_COLORSPACE_DCI_P3)) + fmt->fmt.pix.colorspace = V4L2_COLORSPACE_SRGB; + if (V4L2_FIELD_ANY == fmt->fmt.pix.field) + fmt->fmt.pix.field = V4L2_FIELD_NONE; + } + + return 0; +} + +/* Checks if v4l2l_fill_format() has set a valid, fixed sizeimage val. */ +static bool v4l2l_pix_format_has_valid_sizeimage(struct v4l2_format *fmt) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0) + const struct v4l2_format_info *info; + + info = v4l2_format_info(fmt->fmt.pix.pixelformat); + if (info && info->mem_planes == 1) + return true; +#endif + + return false; +} + +static int pix_format_eq(const struct v4l2_pix_format *ref, + const struct v4l2_pix_format *tgt, int strict) +{ + /* check if the two formats are equivalent. + * ANY fields are handled gracefully + */ +#define _pix_format_eq0(x) \ + if (ref->x != tgt->x) \ + result = 0 +#define _pix_format_eq1(x, def) \ + do { \ + if ((def != tgt->x) && (ref->x != tgt->x)) { \ + printk(KERN_INFO #x " failed"); \ + result = 0; \ + } \ + } while (0) + int result = 1; + _pix_format_eq0(width); + _pix_format_eq0(height); + _pix_format_eq0(pixelformat); + if (!strict) + return result; + _pix_format_eq1(field, V4L2_FIELD_ANY); + _pix_format_eq0(bytesperline); + _pix_format_eq0(sizeimage); + _pix_format_eq1(colorspace, V4L2_COLORSPACE_DEFAULT); + return result; +} + +static void set_timeperframe(struct v4l2_loopback_device *dev, + struct v4l2_fract *tpf) +{ + if (!tpf->denominator && !tpf->numerator) { + tpf->numerator = 1; + tpf->denominator = V4L2LOOPBACK_FPS_DEFAULT; + } else if (tpf->numerator > + V4L2LOOPBACK_FRAME_INTERVAL_MAX * tpf->denominator) { + /* divide-by-zero or greater than maximum interval => min FPS */ + tpf->numerator = V4L2LOOPBACK_FRAME_INTERVAL_MAX; + tpf->denominator = 1; + } else if (tpf->numerator * V4L2LOOPBACK_FPS_MAX < tpf->denominator) { + /* zero or lower than minimum interval => max FPS */ + tpf->numerator = 1; + tpf->denominator = V4L2LOOPBACK_FPS_MAX; + } + + dev->capture_param.timeperframe = *tpf; + dev->frame_jiffies = + max(1UL, (msecs_to_jiffies(1000) * tpf->numerator) / + tpf->denominator); +} + +static struct v4l2_loopback_device *v4l2loopback_cd2dev(struct device *cd); + +/* device attributes */ +/* available via sysfs: /sys/devices/virtual/video4linux/video* */ + +static ssize_t attr_show_format(struct device *cd, + struct device_attribute *attr, char *buf) +{ + /* gets the current format as "FOURCC:WxH@f/s", e.g. "YUYV:320x240@1000/30" */ + struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); + const struct v4l2_fract *tpf; + char buf4cc[5], buf_fps[32]; + + if (!dev || (has_no_owners(dev) && !dev->keep_format)) + return 0; + tpf = &dev->capture_param.timeperframe; + + fourcc2str(dev->pix_format.pixelformat, buf4cc); + if (tpf->numerator == 1) + snprintf(buf_fps, sizeof(buf_fps), "%u", tpf->denominator); + else + snprintf(buf_fps, sizeof(buf_fps), "%u/%u", tpf->denominator, + tpf->numerator); + return sprintf(buf, "%4s:%ux%u@%s\n", buf4cc, dev->pix_format.width, + dev->pix_format.height, buf_fps); +} + +static ssize_t attr_store_format(struct device *cd, + struct device_attribute *attr, const char *buf, + size_t len) +{ + struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); + int fps_num = 0, fps_den = 1; + + if (!dev) + return -ENODEV; + + /* only fps changing is supported */ + if (sscanf(buf, "@%u/%u", &fps_num, &fps_den) > 0) { + struct v4l2_fract f = { .numerator = fps_den, + .denominator = fps_num }; + set_timeperframe(dev, &f); + return len; + } + return -EINVAL; +} + +static DEVICE_ATTR(format, S_IRUGO | S_IWUSR, attr_show_format, + attr_store_format); + +static ssize_t attr_show_buffers(struct device *cd, + struct device_attribute *attr, char *buf) +{ + struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); + + if (!dev) + return -ENODEV; + + return sprintf(buf, "%u\n", dev->used_buffer_count); +} + +static DEVICE_ATTR(buffers, S_IRUGO, attr_show_buffers, NULL); + +static ssize_t attr_show_maxopeners(struct device *cd, + struct device_attribute *attr, char *buf) +{ + struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); + + if (!dev) + return -ENODEV; + + return sprintf(buf, "%d\n", dev->max_openers); +} + +static ssize_t attr_store_maxopeners(struct device *cd, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct v4l2_loopback_device *dev = NULL; + unsigned long curr = 0; + + if (kstrtoul(buf, 0, &curr)) + return -EINVAL; + + dev = v4l2loopback_cd2dev(cd); + if (!dev) + return -ENODEV; + + if (dev->max_openers == curr) + return len; + + if (curr > __INT_MAX__ || dev->open_count.counter > curr) { + /* request to limit to less openers as are currently attached to us */ + return -EINVAL; + } + + dev->max_openers = (int)curr; + + return len; +} + +static DEVICE_ATTR(max_openers, S_IRUGO | S_IWUSR, attr_show_maxopeners, + attr_store_maxopeners); + +static ssize_t attr_show_state(struct device *cd, struct device_attribute *attr, + char *buf) +{ + struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); + + if (!dev) + return -ENODEV; + + if (!has_output_token(dev->stream_tokens) || dev->keep_format) { + return sprintf(buf, "capture\n"); + } else + return sprintf(buf, "output\n"); + + return -EAGAIN; +} + +static DEVICE_ATTR(state, S_IRUGO, attr_show_state, NULL); + +static void v4l2loopback_remove_sysfs(struct video_device *vdev) +{ +#define V4L2_SYSFS_DESTROY(x) device_remove_file(&vdev->dev, &dev_attr_##x) + + if (vdev) { + V4L2_SYSFS_DESTROY(format); + V4L2_SYSFS_DESTROY(buffers); + V4L2_SYSFS_DESTROY(max_openers); + V4L2_SYSFS_DESTROY(state); + /* ... */ + } +} + +static void v4l2loopback_create_sysfs(struct video_device *vdev) +{ + int res = 0; + +#define V4L2_SYSFS_CREATE(x) \ + res = device_create_file(&vdev->dev, &dev_attr_##x); \ + if (res < 0) \ + break + if (!vdev) + return; + do { + V4L2_SYSFS_CREATE(format); + V4L2_SYSFS_CREATE(buffers); + V4L2_SYSFS_CREATE(max_openers); + V4L2_SYSFS_CREATE(state); + /* ... */ + } while (0); + + if (res >= 0) + return; + dev_err(&vdev->dev, "%s error: %d\n", __func__, res); +} + +/* Event APIs */ + +#define V4L2LOOPBACK_EVENT_BASE (V4L2_EVENT_PRIVATE_START) +#define V4L2LOOPBACK_EVENT_OFFSET 0x08E00000 +#define V4L2_EVENT_PRI_CLIENT_USAGE \ + (V4L2LOOPBACK_EVENT_BASE + V4L2LOOPBACK_EVENT_OFFSET + 1) + +struct v4l2_event_client_usage { + __u32 count; +}; + +/* global module data */ +/* find a device based on it's device-number (e.g. '3' for /dev/video3) */ +struct v4l2loopback_lookup_cb_data { + int device_nr; + struct v4l2_loopback_device *device; +}; +static int v4l2loopback_lookup_cb(int id, void *ptr, void *data) +{ + struct v4l2_loopback_device *device = ptr; + struct v4l2loopback_lookup_cb_data *cbdata = data; + if (cbdata && device && device->vdev) { + if (device->vdev->num == cbdata->device_nr) { + cbdata->device = device; + cbdata->device_nr = id; + return 1; + } + } + return 0; +} +static int v4l2loopback_lookup(int device_nr, + struct v4l2_loopback_device **device) +{ + struct v4l2loopback_lookup_cb_data data = { + .device_nr = device_nr, + .device = NULL, + }; + int err = idr_for_each(&v4l2loopback_index_idr, &v4l2loopback_lookup_cb, + &data); + if (1 == err) { + if (device) + *device = data.device; + return data.device_nr; + } + return -ENODEV; +} +#define v4l2loopback_get_vdev_nr(vdev) \ + ((struct v4l2loopback_private *)video_get_drvdata(vdev))->device_nr +static struct v4l2_loopback_device *v4l2loopback_cd2dev(struct device *cd) +{ + struct video_device *loopdev = to_video_device(cd); + int device_nr = v4l2loopback_get_vdev_nr(loopdev); + + return idr_find(&v4l2loopback_index_idr, device_nr); +} + +static struct v4l2_loopback_device *v4l2loopback_getdevice(struct file *f) +{ + struct v4l2loopback_private *ptr = video_drvdata(f); + int nr = ptr->device_nr; + + return idr_find(&v4l2loopback_index_idr, nr); +} + +/* forward declarations */ +static void client_usage_queue_event(struct video_device *vdev); +static bool any_buffers_mapped(struct v4l2_loopback_device *dev); +static int allocate_buffers(struct v4l2_loopback_device *dev, + struct v4l2_pix_format *pix_format); +static void init_buffers(struct v4l2_loopback_device *dev, u32 bytes_used, + u32 buffer_size); +static void free_buffers(struct v4l2_loopback_device *dev); +static int allocate_timeout_buffer(struct v4l2_loopback_device *dev); +static void free_timeout_buffer(struct v4l2_loopback_device *dev); +static void check_timers(struct v4l2_loopback_device *dev); +static const struct v4l2_file_operations v4l2_loopback_fops; +static const struct v4l2_ioctl_ops v4l2_loopback_ioctl_ops; + +/* V4L2 ioctl caps and params calls */ +/* returns device capabilities + * called on VIDIOC_QUERYCAP + */ +static int vidioc_querycap(struct file *file, void *fh, + struct v4l2_capability *cap) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + int device_nr = v4l2loopback_get_vdev_nr(dev->vdev); + __u32 capabilities = V4L2_CAP_STREAMING | V4L2_CAP_READWRITE; + + strscpy(cap->driver, "v4l2 loopback", sizeof(cap->driver)); + snprintf(cap->card, sizeof(cap->card), "%s", dev->card_label); + snprintf(cap->bus_info, sizeof(cap->bus_info), + "platform:v4l2loopback-%03d", device_nr); + + if (dev->announce_all_caps) { + capabilities |= V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_VIDEO_OUTPUT; + } else { + if (opener->io_method == V4L2L_IO_TIMEOUT || + (has_output_token(dev->stream_tokens) && + !dev->keep_format)) { + capabilities |= V4L2_CAP_VIDEO_OUTPUT; + } else + capabilities |= V4L2_CAP_VIDEO_CAPTURE; + } + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) + dev->vdev->device_caps = +#endif /* >=linux-4.7.0 */ + cap->device_caps = cap->capabilities = capabilities; + + cap->capabilities |= V4L2_CAP_DEVICE_CAPS; + + memset(cap->reserved, 0, sizeof(cap->reserved)); + return 0; +} + +static int vidioc_enum_framesizes(struct file *file, void *fh, + struct v4l2_frmsizeenum *argp) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + + /* there can be only one... */ + if (argp->index) + return -EINVAL; + + if (dev->keep_format || has_other_owners(opener, dev)) { + /* only current frame size supported */ + if (argp->pixel_format != dev->pix_format.pixelformat) + return -EINVAL; + + argp->type = V4L2_FRMSIZE_TYPE_DISCRETE; + + argp->discrete.width = dev->pix_format.width; + argp->discrete.height = dev->pix_format.height; + } else { + /* return continuous sizes if pixel format is supported */ + if (NULL == format_by_fourcc(argp->pixel_format)) + return -EINVAL; + + if (dev->min_width == dev->max_width && + dev->min_height == dev->max_height) { + argp->type = V4L2_FRMSIZE_TYPE_DISCRETE; + + argp->discrete.width = dev->min_width; + argp->discrete.height = dev->min_height; + } else { + argp->type = V4L2_FRMSIZE_TYPE_CONTINUOUS; + + argp->stepwise.min_width = dev->min_width; + argp->stepwise.min_height = dev->min_height; + + argp->stepwise.max_width = dev->max_width; + argp->stepwise.max_height = dev->max_height; + + argp->stepwise.step_width = 1; + argp->stepwise.step_height = 1; + } + } + return 0; +} + +/* Test if the device is currently 'capable' of the buffer (stream) type when + * the `exclusive_caps` parameter is set. `keep_format` should lock the format + * and prevent free of buffers */ +static int check_buffer_capability(struct v4l2_loopback_device *dev, + struct v4l2_loopback_opener *opener, + enum v4l2_buf_type type) +{ + /* short-circuit for (non-compliant) timeout image mode */ + if (opener->io_method == V4L2L_IO_TIMEOUT) + return 0; + if (dev->announce_all_caps) + return (type == V4L2_BUF_TYPE_VIDEO_CAPTURE || + type == V4L2_BUF_TYPE_VIDEO_OUTPUT) ? + 0 : + -EINVAL; + /* CAPTURE if opener has a capture format or a writer is streaming; + * else OUTPUT. */ + switch (type) { + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + if (!(has_capture_token(opener->format_token) || + !has_output_token(dev->stream_tokens))) + return -EINVAL; + break; + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + if (!(has_output_token(opener->format_token) || + has_output_token(dev->stream_tokens))) + return -EINVAL; + break; + default: + return -EINVAL; + } + return 0; +} +/* returns frameinterval (fps) for the set resolution + * called on VIDIOC_ENUM_FRAMEINTERVALS + */ +static int vidioc_enum_frameintervals(struct file *file, void *fh, + struct v4l2_frmivalenum *argp) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + + /* there can be only one... */ + if (argp->index) + return -EINVAL; + + if (dev->keep_format || has_other_owners(opener, dev)) { + /* keep_format also locks the frame rate */ + if (argp->width != dev->pix_format.width || + argp->height != dev->pix_format.height || + argp->pixel_format != dev->pix_format.pixelformat) + return -EINVAL; + + argp->type = V4L2_FRMIVAL_TYPE_DISCRETE; + argp->discrete = dev->capture_param.timeperframe; + } else { + if (argp->width < dev->min_width || + argp->width > dev->max_width || + argp->height < dev->min_height || + argp->height > dev->max_height || + !format_by_fourcc(argp->pixel_format)) + return -EINVAL; + + argp->type = V4L2_FRMIVAL_TYPE_CONTINUOUS; + argp->stepwise.min.numerator = 1; + argp->stepwise.min.denominator = V4L2LOOPBACK_FPS_MAX; + argp->stepwise.max.numerator = V4L2LOOPBACK_FRAME_INTERVAL_MAX; + argp->stepwise.max.denominator = 1; + argp->stepwise.step.numerator = 1; + argp->stepwise.step.denominator = 1; + } + + return 0; +} + +/* Enumerate device formats + * Returns: + * - EINVAL the index is out of bounds; or if non-zero when format is fixed + * - EFAULT unexpected null pointer */ +static int vidioc_enum_fmt_vid(struct file *file, void *fh, + struct v4l2_fmtdesc *f) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + int fixed = dev->keep_format || has_other_owners(opener, dev); + const struct v4l2l_format *fmt; + + if (check_buffer_capability(dev, opener, f->type) < 0) + return -EINVAL; + + if (!(f->index < FORMATS)) + return -EINVAL; + /* TODO: Support 6.14 V4L2_FMTDESC_FLAG_ENUM_ALL */ + if (fixed && f->index) + return -EINVAL; + + fmt = fixed ? format_by_fourcc(dev->pix_format.pixelformat) : + &formats[f->index]; + if (!fmt) + return -EFAULT; + + f->flags = 0; + if (fmt->flags & FORMAT_FLAGS_COMPRESSED) + f->flags |= V4L2_FMT_FLAG_COMPRESSED; + snprintf(f->description, sizeof(f->description), fmt->name); + f->pixelformat = fmt->fourcc; + return 0; +} + +/* Tests (or tries) the format. + * Returns: + * - EINVAL if the buffer type or format is not supported + */ +static int vidioc_try_fmt_vid(struct file *file, void *fh, + struct v4l2_format *f) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + + if (check_buffer_capability(dev, opener, f->type) < 0) + return -EINVAL; + if (v4l2l_fill_format(f, dev->min_width, dev->max_width, + dev->min_height, dev->max_height) != 0) + return -EINVAL; + if (dev->keep_format || has_other_owners(opener, dev)) + /* use existing format - including colorspace info */ + f->fmt.pix = dev->pix_format; + + return 0; +} + +/* Sets new format. Fills 'f' argument with the requested or existing format. + * Side-effect: buffers are allocated for the (returned) format. + * Returns: + * - EINVAL if the type is not supported + * - EBUSY if buffers are already allocated + * TODO: (vasaka) set subregions of input + */ +static int vidioc_s_fmt_vid(struct file *file, void *fh, struct v4l2_format *f) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + u32 token = opener->io_method == V4L2L_IO_TIMEOUT ? + V4L2L_TOKEN_TIMEOUT : + token_from_type(f->type); + int changed, result; + char buf[5]; + + result = vidioc_try_fmt_vid(file, fh, f); + if (result < 0) + return result; + + if (opener->buffer_count > 0) + /* must free buffers before format can be set */ + return -EBUSY; + + result = mutex_lock_killable(&dev->image_mutex); + if (result < 0) + return result; + + if (opener->format_token) + release_token(dev, opener, format); + if (!(dev->format_tokens & token)) { + result = -EBUSY; + goto exit_s_fmt_unlock; + } + + dprintk("S_FMT[%s] %4s:%ux%u size=%u\n", + V4L2_TYPE_IS_CAPTURE(f->type) ? "CAPTURE" : "OUTPUT", + fourcc2str(f->fmt.pix.pixelformat, buf), f->fmt.pix.width, + f->fmt.pix.height, f->fmt.pix.sizeimage); + changed = !pix_format_eq(&dev->pix_format, &f->fmt.pix, 0); + if (changed || has_no_owners(dev)) { + result = allocate_buffers(dev, &f->fmt.pix); + if (result < 0) + goto exit_s_fmt_unlock; + } + if ((dev->timeout_image && changed) || + (!dev->timeout_image && need_timeout_buffer(dev, token))) { + result = allocate_timeout_buffer(dev); + if (result < 0) + goto exit_s_fmt_free; + } + if (changed) { + dev->pix_format = f->fmt.pix; + dev->pix_format_has_valid_sizeimage = + v4l2l_pix_format_has_valid_sizeimage(f); + } + acquire_token(dev, opener, format, token); + if (opener->io_method == V4L2L_IO_TIMEOUT) + dev->timeout_image_io = 0; + goto exit_s_fmt_unlock; +exit_s_fmt_free: + free_buffers(dev); +exit_s_fmt_unlock: + mutex_unlock(&dev->image_mutex); + return result; +} + +/* ------------------ CAPTURE ----------------------- */ +/* ioctl for VIDIOC_ENUM_FMT, _G_FMT, _S_FMT, and _TRY_FMT when buffer type + * is V4L2_BUF_TYPE_VIDEO_CAPTURE */ + +static int vidioc_enum_fmt_cap(struct file *file, void *fh, + struct v4l2_fmtdesc *f) +{ + return vidioc_enum_fmt_vid(file, fh, f); +} + +static int vidioc_g_fmt_cap(struct file *file, void *fh, struct v4l2_format *f) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + if (check_buffer_capability(dev, opener, f->type) < 0) + return -EINVAL; + f->fmt.pix = dev->pix_format; + return 0; +} + +static int vidioc_try_fmt_cap(struct file *file, void *fh, + struct v4l2_format *f) +{ + return vidioc_try_fmt_vid(file, fh, f); +} + +static int vidioc_s_fmt_cap(struct file *file, void *fh, struct v4l2_format *f) +{ + return vidioc_s_fmt_vid(file, fh, f); +} + +/* ------------------ OUTPUT ----------------------- */ +/* ioctl for VIDIOC_ENUM_FMT, _G_FMT, _S_FMT, and _TRY_FMT when buffer type + * is V4L2_BUF_TYPE_VIDEO_OUTPUT */ + +static int vidioc_enum_fmt_out(struct file *file, void *fh, + struct v4l2_fmtdesc *f) +{ + return vidioc_enum_fmt_vid(file, fh, f); +} + +static int vidioc_g_fmt_out(struct file *file, void *fh, struct v4l2_format *f) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + if (check_buffer_capability(dev, opener, f->type) < 0) + return -EINVAL; + /* + * LATER: this should return the currently valid format + * gstreamer doesn't like it, if this returns -EINVAL, as it + * then concludes that there is _no_ valid format + * CHECK whether this assumption is wrong, + * or whether we have to always provide a valid format + */ + f->fmt.pix = dev->pix_format; + return 0; +} + +static int vidioc_try_fmt_out(struct file *file, void *fh, + struct v4l2_format *f) +{ + return vidioc_try_fmt_vid(file, fh, f); +} + +static int vidioc_s_fmt_out(struct file *file, void *fh, struct v4l2_format *f) +{ + return vidioc_s_fmt_vid(file, fh, f); +} + +// #define V4L2L_OVERLAY +#ifdef V4L2L_OVERLAY +/* ------------------ OVERLAY ----------------------- */ +/* currently unsupported */ +/* GSTreamer's v4l2sink is buggy, as it requires the overlay to work + * while it should only require it, if overlay is requested + * once the gstreamer element is fixed, remove the overlay dummies + */ +#warning OVERLAY dummies +static int vidioc_g_fmt_overlay(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + return 0; +} + +static int vidioc_s_fmt_overlay(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + return 0; +} +#endif /* V4L2L_OVERLAY */ + +/* ------------------ PARAMs ----------------------- */ + +/* get some data flow parameters, only capability, fps and readbuffers has + * effect on this driver + * called on VIDIOC_G_PARM + */ +static int vidioc_g_parm(struct file *file, void *fh, + struct v4l2_streamparm *parm) +{ + /* do not care about type of opener, hope these enums would always be + * compatible */ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + if (check_buffer_capability(dev, opener, parm->type) < 0) + return -EINVAL; + parm->parm.capture = dev->capture_param; + return 0; +} + +/* get some data flow parameters, only capability, fps and readbuffers has + * effect on this driver + * called on VIDIOC_S_PARM + */ +static int vidioc_s_parm(struct file *file, void *fh, + struct v4l2_streamparm *parm) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + + dprintk("S_PARM(frame-time=%u/%u)\n", + parm->parm.capture.timeperframe.numerator, + parm->parm.capture.timeperframe.denominator); + if (check_buffer_capability(dev, opener, parm->type) < 0) + return -EINVAL; + + switch (parm->type) { + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + set_timeperframe(dev, &parm->parm.capture.timeperframe); + break; + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + set_timeperframe(dev, &parm->parm.output.timeperframe); + break; + default: + return -EINVAL; + } + + parm->parm.capture = dev->capture_param; + return 0; +} + +#ifdef V4L2LOOPBACK_WITH_STD +/* sets a tv standard, actually we do not need to handle this any special way + * added to support effecttv + * called on VIDIOC_S_STD + */ +static int vidioc_s_std(struct file *file, void *fh, v4l2_std_id *_std) +{ + v4l2_std_id req_std = 0, supported_std = 0; + const v4l2_std_id all_std = V4L2_STD_ALL, no_std = 0; + + if (_std) { + req_std = *_std; + *_std = all_std; + } + + /* we support everything in V4L2_STD_ALL, but not more... */ + supported_std = (all_std & req_std); + if (no_std == supported_std) + return -EINVAL; + + return 0; +} + +/* gets a fake video standard + * called on VIDIOC_G_STD + */ +static int vidioc_g_std(struct file *file, void *fh, v4l2_std_id *norm) +{ + if (norm) + *norm = V4L2_STD_ALL; + return 0; +} +/* gets a fake video standard + * called on VIDIOC_QUERYSTD + */ +static int vidioc_querystd(struct file *file, void *fh, v4l2_std_id *norm) +{ + if (norm) + *norm = V4L2_STD_ALL; + return 0; +} +#endif /* V4L2LOOPBACK_WITH_STD */ + +static int v4l2loopback_set_ctrl(struct v4l2_loopback_device *dev, u32 id, + s64 val) +{ + int result = 0; + switch (id) { + case CID_KEEP_FORMAT: + if (val < 0 || val > 1) + return -EINVAL; + dev->keep_format = val; + result = mutex_lock_killable(&dev->image_mutex); + if (result < 0) + return result; + if (!dev->keep_format) { + if (has_no_owners(dev) && !any_buffers_mapped(dev)) + free_buffers(dev); + } + mutex_unlock(&dev->image_mutex); + break; + case CID_SUSTAIN_FRAMERATE: + if (val < 0 || val > 1) + return -EINVAL; + spin_lock_bh(&dev->lock); + dev->sustain_framerate = val; + check_timers(dev); + spin_unlock_bh(&dev->lock); + break; + case CID_TIMEOUT: + if (val < 0 || val > MAX_TIMEOUT) + return -EINVAL; + if (val > 0) { + result = mutex_lock_killable(&dev->image_mutex); + if (result < 0) + return result; + /* on-the-fly allocate if device is owned; else + * allocate occurs on next S_FMT or REQBUFS */ + if (!has_no_owners(dev)) + result = allocate_timeout_buffer(dev); + mutex_unlock(&dev->image_mutex); + if (result < 0) { + /* disable timeout as buffer not alloc'd */ + spin_lock_bh(&dev->lock); + dev->timeout_jiffies = 0; + spin_unlock_bh(&dev->lock); + return result; + } + } + spin_lock_bh(&dev->lock); + dev->timeout_jiffies = msecs_to_jiffies(val); + check_timers(dev); + spin_unlock_bh(&dev->lock); + break; + case CID_TIMEOUT_IMAGE_IO: + dev->timeout_image_io = 1; + break; + default: + return -EINVAL; + } + return 0; +} + +static int v4l2loopback_s_ctrl(struct v4l2_ctrl *ctrl) +{ + struct v4l2_loopback_device *dev = container_of( + ctrl->handler, struct v4l2_loopback_device, ctrl_handler); + return v4l2loopback_set_ctrl(dev, ctrl->id, ctrl->val); +} + +/* returns set of device outputs, in our case there is only one + * called on VIDIOC_ENUMOUTPUT + */ +static int vidioc_enum_output(struct file *file, void *fh, + struct v4l2_output *outp) +{ + __u32 index = outp->index; + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + + if (check_buffer_capability(dev, opener, V4L2_BUF_TYPE_VIDEO_OUTPUT)) + return -ENOTTY; + if (index) + return -EINVAL; + + /* clear all data (including the reserved fields) */ + memset(outp, 0, sizeof(*outp)); + + outp->index = index; + strscpy(outp->name, "loopback in", sizeof(outp->name)); + outp->type = V4L2_OUTPUT_TYPE_ANALOG; + outp->audioset = 0; + outp->modulator = 0; +#ifdef V4L2LOOPBACK_WITH_STD + outp->std = V4L2_STD_ALL; +#ifdef V4L2_OUT_CAP_STD + outp->capabilities |= V4L2_OUT_CAP_STD; +#endif /* V4L2_OUT_CAP_STD */ +#endif /* V4L2LOOPBACK_WITH_STD */ + + return 0; +} + +/* which output is currently active, + * called on VIDIOC_G_OUTPUT + */ +static int vidioc_g_output(struct file *file, void *fh, unsigned int *index) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + if (check_buffer_capability(dev, opener, V4L2_BUF_TYPE_VIDEO_OUTPUT)) + return -ENOTTY; + if (index) + *index = 0; + return 0; +} + +/* set output, can make sense if we have more than one video src, + * called on VIDIOC_S_OUTPUT + */ +static int vidioc_s_output(struct file *file, void *fh, unsigned int index) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + if (check_buffer_capability(dev, opener, V4L2_BUF_TYPE_VIDEO_OUTPUT)) + return -ENOTTY; + return index == 0 ? index : -EINVAL; +} + +/* returns set of device inputs, in our case there is only one, + * but later I may add more + * called on VIDIOC_ENUMINPUT + */ +static int vidioc_enum_input(struct file *file, void *fh, + struct v4l2_input *inp) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + __u32 index = inp->index; + + if (check_buffer_capability(dev, opener, V4L2_BUF_TYPE_VIDEO_CAPTURE)) + return -ENOTTY; + if (index) + return -EINVAL; + + /* clear all data (including the reserved fields) */ + memset(inp, 0, sizeof(*inp)); + + inp->index = index; + strscpy(inp->name, "loopback", sizeof(inp->name)); + inp->type = V4L2_INPUT_TYPE_CAMERA; + inp->audioset = 0; + inp->tuner = 0; + inp->status = 0; + +#ifdef V4L2LOOPBACK_WITH_STD + inp->std = V4L2_STD_ALL; +#ifdef V4L2_IN_CAP_STD + inp->capabilities |= V4L2_IN_CAP_STD; +#endif +#endif /* V4L2LOOPBACK_WITH_STD */ + + if (has_output_token(dev->stream_tokens) && !dev->keep_format) + /* if no outputs attached; pretend device is powered off */ + inp->status |= V4L2_IN_ST_NO_SIGNAL; + + return 0; +} + +/* which input is currently active, + * called on VIDIOC_G_INPUT + */ +static int vidioc_g_input(struct file *file, void *fh, unsigned int *index) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + if (check_buffer_capability(dev, opener, V4L2_BUF_TYPE_VIDEO_CAPTURE)) + return -ENOTTY; /* NOTE: -EAGAIN might be more informative */ + if (index) + *index = 0; + return 0; +} + +/* set input, can make sense if we have more than one video src, + * called on VIDIOC_S_INPUT + */ +static int vidioc_s_input(struct file *file, void *fh, unsigned int index) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + if (index != 0) + return -EINVAL; + if (check_buffer_capability(dev, opener, V4L2_BUF_TYPE_VIDEO_CAPTURE)) + return -ENOTTY; /* NOTE: -EAGAIN might be more informative */ + return 0; +} + +/* --------------- V4L2 ioctl buffer related calls ----------------- */ + +#define is_allocated(opener, type, index) \ + (opener->format_token & (opener->io_method == V4L2L_IO_TIMEOUT ? \ + V4L2L_TOKEN_TIMEOUT : \ + token_from_type(type)) && \ + (index) < (opener)->buffer_count) +#define BUFFER_DEBUG_FMT_STR \ + "buffer#%u @ %p type=%u bytesused=%u length=%u flags=%x " \ + "field=%u timestamp= %lld.%06lldsequence=%u\n" +#define BUFFER_DEBUG_FMT_ARGS(buf) \ + (buf)->index, (buf), (buf)->type, (buf)->bytesused, (buf)->length, \ + (buf)->flags, (buf)->field, \ + (long long)(buf)->timestamp.tv_sec, \ + (long long)(buf)->timestamp.tv_usec, (buf)->sequence +/* Buffer flag helpers */ +#define unset_flags(flags) \ + do { \ + flags &= ~V4L2_BUF_FLAG_QUEUED; \ + flags &= ~V4L2_BUF_FLAG_DONE; \ + } while (0) +#define set_queued(flags) \ + do { \ + flags |= V4L2_BUF_FLAG_QUEUED; \ + flags &= ~V4L2_BUF_FLAG_DONE; \ + } while (0) +#define set_done(flags) \ + do { \ + flags &= ~V4L2_BUF_FLAG_QUEUED; \ + flags |= V4L2_BUF_FLAG_DONE; \ + } while (0) + +static bool any_buffers_mapped(struct v4l2_loopback_device *dev) +{ + u32 index; + for (index = 0; index < dev->buffer_count; ++index) + if (dev->buffers[index].buffer.flags & V4L2_BUF_FLAG_MAPPED) + return true; + return false; +} + +static void prepare_buffer_queue(struct v4l2_loopback_device *dev, int count) +{ + struct v4l2l_buffer *bufd, *n; + u32 pos; + + spin_lock_bh(&dev->list_lock); + + /* ensure sufficient number of buffers in queue */ + for (pos = 0; pos < count; ++pos) { + bufd = &dev->buffers[pos]; + if (list_empty(&bufd->list_head)) + list_add_tail(&bufd->list_head, &dev->outbufs_list); + } + if (list_empty(&dev->outbufs_list)) + goto exit_prepare_queue_unlock; + + /* remove any excess buffers */ + list_for_each_entry_safe(bufd, n, &dev->outbufs_list, list_head) { + if (bufd->buffer.index >= count) + list_del_init(&bufd->list_head); + } + + /* buffers are no longer queued; and `write_position` will correspond + * to the first item of `outbufs_list`. */ + pos = v4l2l_mod64(dev->write_position, count); + list_for_each_entry(bufd, &dev->outbufs_list, list_head) { + unset_flags(bufd->buffer.flags); + dev->bufpos2index[pos % count] = bufd->buffer.index; + ++pos; + } +exit_prepare_queue_unlock: + spin_unlock_bh(&dev->list_lock); +} + +/* forward declaration */ +static int vidioc_streamoff(struct file *file, void *fh, + enum v4l2_buf_type type); +/* negotiate buffer type + * only mmap streaming supported + * called on VIDIOC_REQBUFS + */ +static int vidioc_reqbufs(struct file *file, void *fh, + struct v4l2_requestbuffers *reqbuf) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + u32 token = opener->io_method == V4L2L_IO_TIMEOUT ? + V4L2L_TOKEN_TIMEOUT : + token_from_type(reqbuf->type); + u32 req_count = reqbuf->count; + int result = 0; + + dprintk("REQBUFS(memory=%u, req_count=%u) and device-bufs=%u/%u " + "[used/max]\n", + reqbuf->memory, req_count, dev->used_buffer_count, + dev->buffer_count); + + switch (reqbuf->memory) { + case V4L2_MEMORY_MMAP: +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 20, 0) + reqbuf->capabilities = 0; /* only guarantee MMAP support */ +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 16, 0) + reqbuf->flags = 0; /* no memory consistency support */ +#endif + break; + default: + return -EINVAL; + } + + if (opener->format_token & ~token) + /* different (buffer) type already assigned to descriptor by + * S_FMT or REQBUFS */ + return -EINVAL; + + MARK(); + result = mutex_lock_killable(&dev->image_mutex); + if (result < 0) + return result; /* -EINTR */ + + /* CASE queue/dequeue timeout-buffer only: */ + if (opener->format_token & V4L2L_TOKEN_TIMEOUT) { + opener->buffer_count = req_count; + if (req_count == 0) + release_token(dev, opener, format); + goto exit_reqbufs_unlock; + } + + MARK(); + /* CASE count is zero: streamoff, free buffers, release their token */ + if (req_count == 0) { + if (dev->format_tokens & token) { + acquire_token(dev, opener, format, token); + opener->io_method = V4L2L_IO_MMAP; + } + result = vidioc_streamoff(file, fh, reqbuf->type); + opener->buffer_count = 0; + /* undocumented requirement - REQBUFS with count zero should + * ALSO release lock on logical stream */ + if (opener->format_token) + release_token(dev, opener, format); + if (has_no_owners(dev)) + dev->used_buffer_count = 0; + goto exit_reqbufs_unlock; + } + + /* CASE count non-zero: allocate buffers and acquire token for them */ + MARK(); + switch (reqbuf->type) { + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + if (!(dev->format_tokens & token || + opener->format_token & token)) + /* only exclusive ownership for each stream */ + result = -EBUSY; + break; + default: + result = -EINVAL; + } + if (result < 0) + goto exit_reqbufs_unlock; + + if (has_other_owners(opener, dev) && dev->used_buffer_count > 0) { + /* allow 'allocation' of existing number of buffers */ + req_count = dev->used_buffer_count; + } else if (any_buffers_mapped(dev)) { + /* do not allow re-allocation if buffers are mapped */ + result = -EBUSY; + goto exit_reqbufs_unlock; + } + + MARK(); + opener->buffer_count = 0; + + if (req_count > dev->buffer_count) + req_count = dev->buffer_count; + + if (has_no_owners(dev)) { + result = allocate_buffers(dev, &dev->pix_format); + if (result < 0) + goto exit_reqbufs_unlock; + } + if (!dev->timeout_image && need_timeout_buffer(dev, token)) { + result = allocate_timeout_buffer(dev); + if (result < 0) + goto exit_reqbufs_unlock; + } + acquire_token(dev, opener, format, token); + + MARK(); + switch (opener->io_method) { + case V4L2L_IO_TIMEOUT: + dev->timeout_image_io = 0; + opener->buffer_count = req_count; + break; + default: + opener->io_method = V4L2L_IO_MMAP; + prepare_buffer_queue(dev, req_count); + dev->used_buffer_count = opener->buffer_count = req_count; + } +exit_reqbufs_unlock: + mutex_unlock(&dev->image_mutex); + reqbuf->count = opener->buffer_count; + return result; +} + +/* returns buffer asked for; + * give app as many buffers as it wants, if it less than MAX, + * but map them in our inner buffers + * called on VIDIOC_QUERYBUF + */ +static int vidioc_querybuf(struct file *file, void *fh, struct v4l2_buffer *buf) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + u32 type = buf->type; + u32 index = buf->index; + + if ((type != V4L2_BUF_TYPE_VIDEO_CAPTURE) && + (type != V4L2_BUF_TYPE_VIDEO_OUTPUT)) + return -EINVAL; + if (!is_allocated(opener, type, index)) + return -EINVAL; + + if (opener->format_token & V4L2L_TOKEN_TIMEOUT) { + *buf = dev->timeout_buffer.buffer; + buf->index = index; + } else + *buf = dev->buffers[index].buffer; + + buf->type = type; + + if (!(buf->flags & (V4L2_BUF_FLAG_DONE | V4L2_BUF_FLAG_QUEUED))) { + /* v4l2-compliance requires these to be zero */ + buf->sequence = 0; + buf->timestamp.tv_sec = buf->timestamp.tv_usec = 0; + } else if (V4L2_TYPE_IS_CAPTURE(type)) { + /* guess flags based on sequence values */ + if (buf->sequence >= opener->read_position) { + set_done(buf->flags); + } else if (buf->flags & V4L2_BUF_FLAG_DONE) { + set_queued(buf->flags); + } + } + dprintkrw("QUERYBUF(%s, index=%u) -> " BUFFER_DEBUG_FMT_STR, + V4L2_TYPE_IS_CAPTURE(type) ? "CAPTURE" : "OUTPUT", index, + BUFFER_DEBUG_FMT_ARGS(buf)); + return 0; +} + +static void buffer_written(struct v4l2_loopback_device *dev, + struct v4l2l_buffer *buf) +{ + del_timer_sync(&dev->sustain_timer); + del_timer_sync(&dev->timeout_timer); + + spin_lock_bh(&dev->list_lock); + list_move_tail(&buf->list_head, &dev->outbufs_list); + spin_unlock_bh(&dev->list_lock); + + spin_lock_bh(&dev->lock); + dev->bufpos2index[v4l2l_mod64(dev->write_position, + dev->used_buffer_count)] = + buf->buffer.index; + ++dev->write_position; + dev->reread_count = 0; + + check_timers(dev); + spin_unlock_bh(&dev->lock); +} + +/* put buffer to queue + * called on VIDIOC_QBUF + */ +static int vidioc_qbuf(struct file *file, void *fh, struct v4l2_buffer *buf) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + struct v4l2l_buffer *bufd; + u32 index = buf->index; + u32 type = buf->type; + + if (!is_allocated(opener, type, index)) + return -EINVAL; + bufd = &dev->buffers[index]; + + switch (buf->memory) { + case V4L2_MEMORY_MMAP: + if (!(bufd->buffer.flags & V4L2_BUF_FLAG_MAPPED)) + dprintkrw("QBUF() unmapped buffer [index=%u]\n", index); + break; + default: + return -EINVAL; + } + + if (opener->format_token & V4L2L_TOKEN_TIMEOUT) { + set_queued(buf->flags); + return 0; + } + + switch (type) { + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + dprintkrw("QBUF(CAPTURE, index=%u) -> " BUFFER_DEBUG_FMT_STR, + index, BUFFER_DEBUG_FMT_ARGS(buf)); + set_queued(buf->flags); + break; + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + dprintkrw("QBUF(OUTPUT, index=%u) -> " BUFFER_DEBUG_FMT_STR, + index, BUFFER_DEBUG_FMT_ARGS(buf)); + if (!(bufd->buffer.flags & V4L2_BUF_FLAG_TIMESTAMP_COPY) && + (buf->timestamp.tv_sec == 0 && + buf->timestamp.tv_usec == 0)) { + v4l2l_get_timestamp(&bufd->buffer); + } else { + bufd->buffer.timestamp = buf->timestamp; + bufd->buffer.flags |= V4L2_BUF_FLAG_TIMESTAMP_COPY; + bufd->buffer.flags &= + ~V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; + } + if (dev->pix_format_has_valid_sizeimage) { + if (buf->bytesused >= dev->pix_format.sizeimage) { + bufd->buffer.bytesused = + dev->pix_format.sizeimage; + } else { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0) + dev_warn_ratelimited( + &dev->vdev->dev, +#else + dprintkrw( +#endif + "warning queued output buffer bytesused too small %u < %u\n", + buf->bytesused, + dev->pix_format.sizeimage); + bufd->buffer.bytesused = buf->bytesused; + } + } else { + bufd->buffer.bytesused = buf->bytesused; + } + bufd->buffer.sequence = dev->write_position; + set_queued(bufd->buffer.flags); + *buf = bufd->buffer; + buffer_written(dev, bufd); + set_done(bufd->buffer.flags); + wake_up_all(&dev->read_event); + break; + default: + return -EINVAL; + } + buf->type = type; + return 0; +} + +static int can_read(struct v4l2_loopback_device *dev, + struct v4l2_loopback_opener *opener) +{ + int ret; + + spin_lock_bh(&dev->lock); + check_timers(dev); + ret = dev->write_position > opener->read_position || + dev->reread_count > opener->reread_count || dev->timeout_happened; + spin_unlock_bh(&dev->lock); + return ret; +} + +static int get_capture_buffer(struct file *file) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(file->private_data); + int pos, timeout_happened; + u32 index; + + if ((file->f_flags & O_NONBLOCK) && + (dev->write_position <= opener->read_position && + dev->reread_count <= opener->reread_count && + !dev->timeout_happened)) + return -EAGAIN; + wait_event_interruptible(dev->read_event, can_read(dev, opener)); + + spin_lock_bh(&dev->lock); + if (dev->write_position == opener->read_position) { + if (dev->reread_count > opener->reread_count + 2) + opener->reread_count = dev->reread_count - 1; + ++opener->reread_count; + pos = v4l2l_mod64(opener->read_position + + dev->used_buffer_count - 1, + dev->used_buffer_count); + } else { + opener->reread_count = 0; + if (dev->write_position > + opener->read_position + dev->used_buffer_count) + opener->read_position = dev->write_position - 1; + pos = v4l2l_mod64(opener->read_position, + dev->used_buffer_count); + ++opener->read_position; + } + timeout_happened = dev->timeout_happened && (dev->timeout_jiffies > 0); + dev->timeout_happened = 0; + spin_unlock_bh(&dev->lock); + + index = dev->bufpos2index[pos]; + if (timeout_happened) { + if (index >= dev->used_buffer_count) { + dprintkrw("get_capture_buffer() read position is at " + "an unallocated buffer [index=%u]\n", + index); + return -EFAULT; + } + /* although allocated on-demand, timeout_image is freed only + * in free_buffers(), so we don't need to worry about it being + * deallocated suddenly */ + memcpy(dev->image + dev->buffers[index].buffer.m.offset, + dev->timeout_image, dev->buffer_size); + } + return (int)index; +} + +/* put buffer to dequeue + * called on VIDIOC_DQBUF + */ +static int vidioc_dqbuf(struct file *file, void *fh, struct v4l2_buffer *buf) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + u32 type = buf->type; + int index; + struct v4l2l_buffer *bufd; + + if (buf->memory != V4L2_MEMORY_MMAP) + return -EINVAL; + if (opener->format_token & V4L2L_TOKEN_TIMEOUT) { + *buf = dev->timeout_buffer.buffer; + buf->type = type; + unset_flags(buf->flags); + return 0; + } + if ((opener->buffer_count == 0) || + !(opener->format_token & token_from_type(type))) + return -EINVAL; + + switch (type) { + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + index = get_capture_buffer(file); + if (index < 0) + return index; + *buf = dev->buffers[index].buffer; + unset_flags(buf->flags); + break; + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + spin_lock_bh(&dev->list_lock); + + bufd = list_first_entry_or_null(&dev->outbufs_list, + struct v4l2l_buffer, list_head); + if (bufd) + list_move_tail(&bufd->list_head, &dev->outbufs_list); + + spin_unlock_bh(&dev->list_lock); + if (!bufd) + return -EFAULT; + unset_flags(bufd->buffer.flags); + *buf = bufd->buffer; + break; + default: + return -EINVAL; + } + + buf->type = type; + dprintkrw("DQBUF(%s, index=%u) -> " BUFFER_DEBUG_FMT_STR, + V4L2_TYPE_IS_CAPTURE(type) ? "CAPTURE" : "OUTPUT", index, + BUFFER_DEBUG_FMT_ARGS(buf)); + return 0; +} + +/* ------------- STREAMING ------------------- */ + +/* start streaming + * called on VIDIOC_STREAMON + */ +static int vidioc_streamon(struct file *file, void *fh, enum v4l2_buf_type type) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + u32 token = token_from_type(type); + + /* short-circuit when using timeout buffer set */ + if (opener->format_token & V4L2L_TOKEN_TIMEOUT) + return 0; + /* opener must have claimed (same) buffer set via REQBUFS */ + if (!opener->buffer_count || !(opener->format_token & token)) + return -EINVAL; + + switch (type) { + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + if (has_output_token(dev->stream_tokens) && !dev->keep_format) + return -EIO; + if (dev->stream_tokens & token) { + acquire_token(dev, opener, stream, token); + client_usage_queue_event(dev->vdev); + } + return 0; + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + if (dev->stream_tokens & token) + acquire_token(dev, opener, stream, token); + return 0; + default: + return -EINVAL; + } +} + +/* stop streaming + * called on VIDIOC_STREAMOFF + */ +static int vidioc_streamoff(struct file *file, void *fh, + enum v4l2_buf_type type) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + u32 token = token_from_type(type); + + /* short-circuit when using timeout buffer set */ + if (opener->format_token & V4L2L_TOKEN_TIMEOUT) + return 0; + /* short-circuit when buffer set has no owner */ + if (dev->format_tokens & token) + return 0; + /* opener needs a claim to buffer set */ + if (!opener->format_token) + return -EBUSY; + if (opener->format_token & ~token) + return -EINVAL; + + switch (type) { + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + if (opener->stream_token & token) + release_token(dev, opener, stream); + /* reset output queue */ + if (dev->used_buffer_count > 0) + prepare_buffer_queue(dev, dev->used_buffer_count); + return 0; + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + if (opener->stream_token & token) { + release_token(dev, opener, stream); + client_usage_queue_event(dev->vdev); + } + return 0; + default: + return -EINVAL; + } +} + +#ifdef CONFIG_VIDEO_V4L1_COMPAT +static int vidiocgmbuf(struct file *file, void *fh, struct video_mbuf *p) +{ + struct v4l2_loopback_device *dev; + MARK(); + + dev = v4l2loopback_getdevice(file); + p->frames = dev->buffer_count; + p->offsets[0] = 0; + p->offsets[1] = 0; + p->size = dev->buffer_size; + return 0; +} +#endif + +static void client_usage_queue_event(struct video_device *vdev) +{ + struct v4l2_event ev; + struct v4l2_loopback_device *dev; + + dev = container_of(vdev->v4l2_dev, struct v4l2_loopback_device, + v4l2_dev); + + memset(&ev, 0, sizeof(ev)); + ev.type = V4L2_EVENT_PRI_CLIENT_USAGE; + ((struct v4l2_event_client_usage *)&ev.u)->count = + !has_capture_token(dev->stream_tokens); + + v4l2_event_queue(vdev, &ev); +} + +static int client_usage_ops_add(struct v4l2_subscribed_event *sev, + unsigned elems) +{ + if (!(sev->flags & V4L2_EVENT_SUB_FL_SEND_INITIAL)) + return 0; + + client_usage_queue_event(sev->fh->vdev); + return 0; +} + +static void client_usage_ops_replace(struct v4l2_event *old, + const struct v4l2_event *new) +{ + *((struct v4l2_event_client_usage *)&old->u) = + *((struct v4l2_event_client_usage *)&new->u); +} + +static void client_usage_ops_merge(const struct v4l2_event *old, + struct v4l2_event *new) +{ + *((struct v4l2_event_client_usage *)&new->u) = + *((struct v4l2_event_client_usage *)&old->u); +} + +const struct v4l2_subscribed_event_ops client_usage_ops = { + .add = client_usage_ops_add, + .replace = client_usage_ops_replace, + .merge = client_usage_ops_merge, +}; + +static int vidioc_subscribe_event(struct v4l2_fh *fh, + const struct v4l2_event_subscription *sub) +{ + switch (sub->type) { + case V4L2_EVENT_CTRL: + return v4l2_ctrl_subscribe_event(fh, sub); + case V4L2_EVENT_PRI_CLIENT_USAGE: + return v4l2_event_subscribe(fh, sub, 0, &client_usage_ops); + } + + return -EINVAL; +} + +/* file operations */ +static void vm_open(struct vm_area_struct *vma) +{ + struct v4l2l_buffer *buf; + MARK(); + + buf = vma->vm_private_data; + atomic_inc(&buf->use_count); + buf->buffer.flags |= V4L2_BUF_FLAG_MAPPED; +} + +static void vm_close(struct vm_area_struct *vma) +{ + struct v4l2l_buffer *buf; + MARK(); + + buf = vma->vm_private_data; + if (atomic_dec_and_test(&buf->use_count)) + buf->buffer.flags &= ~V4L2_BUF_FLAG_MAPPED; +} + +static struct vm_operations_struct vm_ops = { + .open = vm_open, + .close = vm_close, +}; + +static int v4l2_loopback_mmap(struct file *file, struct vm_area_struct *vma) +{ + u8 *addr; + unsigned long start, size, offset; + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(file->private_data); + struct v4l2l_buffer *buffer = NULL; + int result = 0; + MARK(); + + offset = (unsigned long)vma->vm_pgoff << PAGE_SHIFT; + start = (unsigned long)vma->vm_start; + size = (unsigned long)(vma->vm_end - vma->vm_start); /* always != 0 */ + + /* ensure buffer size, count, and allocated image(s) are not altered by + * other file descriptors */ + result = mutex_lock_killable(&dev->image_mutex); + if (result < 0) + return result; + + if (size > dev->buffer_size) { + dprintk("mmap() attempt to map %lubytes when %ubytes are " + "allocated to buffers\n", + size, dev->buffer_size); + result = -EINVAL; + goto exit_mmap_unlock; + } + if (offset % dev->buffer_size != 0) { + dprintk("mmap() offset does not match start of any buffer\n"); + result = -EINVAL; + goto exit_mmap_unlock; + } + switch (opener->format_token) { + case V4L2L_TOKEN_TIMEOUT: + if (offset != (unsigned long)dev->buffer_size * MAX_BUFFERS) { + dprintk("mmap() incorrect offset for timeout image\n"); + result = -EINVAL; + goto exit_mmap_unlock; + } + buffer = &dev->timeout_buffer; + addr = dev->timeout_image; + break; + default: + if (offset >= dev->image_size) { + dprintk("mmap() attempt to map beyond all buffers\n"); + result = -EINVAL; + goto exit_mmap_unlock; + } + u32 index = offset / dev->buffer_size; + buffer = &dev->buffers[index]; + addr = dev->image + offset; + break; + } + + while (size > 0) { + struct page *page = vmalloc_to_page(addr); + + result = vm_insert_page(vma, start, page); + if (result < 0) + goto exit_mmap_unlock; + + start += PAGE_SIZE; + addr += PAGE_SIZE; + size -= PAGE_SIZE; + } + + vma->vm_ops = &vm_ops; + vma->vm_private_data = buffer; + + vm_open(vma); +exit_mmap_unlock: + mutex_unlock(&dev->image_mutex); + return result; +} + +static unsigned int v4l2_loopback_poll(struct file *file, + struct poll_table_struct *pts) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(file->private_data); + __poll_t req_events = poll_requested_events(pts); + int ret_mask = 0; + + /* call poll_wait in first call, regardless, to ensure that the + * wait-queue is not null */ + poll_wait(file, &dev->read_event, pts); + poll_wait(file, &opener->fh.wait, pts); + + if (req_events & POLLPRI) { + if (v4l2_event_pending(&opener->fh)) { + ret_mask |= POLLPRI; + if (!(req_events & DEFAULT_POLLMASK)) + return ret_mask; + } + } + + switch (opener->format_token) { + case V4L2L_TOKEN_OUTPUT: + if (opener->stream_token != 0 || + opener->io_method == V4L2L_IO_NONE) + ret_mask |= POLLOUT | POLLWRNORM; + break; + case V4L2L_TOKEN_CAPTURE: + if ((opener->io_method == V4L2L_IO_NONE || + opener->stream_token != 0) && + can_read(dev, opener)) + ret_mask |= POLLIN | POLLWRNORM; + break; + case V4L2L_TOKEN_TIMEOUT: + ret_mask |= POLLOUT | POLLWRNORM; + break; + default: + break; + } + + return ret_mask; +} + +/* do not want to limit device opens, it can be as many readers as user want, + * writers are limited by means of setting writer field */ +static int v4l2_loopback_open(struct file *file) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + + dev = v4l2loopback_getdevice(file); + if (dev->open_count.counter >= dev->max_openers) + return -EBUSY; + /* kfree on close */ + opener = kzalloc(sizeof(*opener), GFP_KERNEL); + if (opener == NULL) + return -ENOMEM; + + atomic_inc(&dev->open_count); + if (dev->timeout_image_io && dev->format_tokens & V4L2L_TOKEN_TIMEOUT) + /* will clear timeout_image_io once buffer set acquired */ + opener->io_method = V4L2L_IO_TIMEOUT; + + v4l2_fh_init(&opener->fh, video_devdata(file)); + file->private_data = &opener->fh; + + v4l2_fh_add(&opener->fh); + dprintk("open() -> dev@%p with image@%p\n", dev, + dev ? dev->image : NULL); + return 0; +} + +static int v4l2_loopback_close(struct file *file) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(file->private_data); + int result = 0; + dprintk("close() -> dev@%p with image@%p\n", dev, + dev ? dev->image : NULL); + + if (opener->format_token) { + struct v4l2_requestbuffers reqbuf = { + .count = 0, .memory = V4L2_MEMORY_MMAP, .type = 0 + }; + switch (opener->format_token) { + case V4L2L_TOKEN_CAPTURE: + reqbuf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + break; + case V4L2L_TOKEN_OUTPUT: + case V4L2L_TOKEN_TIMEOUT: + reqbuf.type = V4L2_BUF_TYPE_VIDEO_OUTPUT; + break; + } + if (reqbuf.type) + result = vidioc_reqbufs(file, file->private_data, + &reqbuf); + if (result < 0) + dprintk("failed to free buffers REQBUFS(count=0) " + " returned %d\n", + result); + mutex_lock(&dev->image_mutex); + release_token(dev, opener, format); + mutex_unlock(&dev->image_mutex); + } + + if (atomic_dec_and_test(&dev->open_count)) { + del_timer_sync(&dev->sustain_timer); + del_timer_sync(&dev->timeout_timer); + if (!dev->keep_format) { + mutex_lock(&dev->image_mutex); + free_buffers(dev); + mutex_unlock(&dev->image_mutex); + } + } + + v4l2_fh_del(&opener->fh); + v4l2_fh_exit(&opener->fh); + + kfree(opener); + return 0; +} + +static int start_fileio(struct file *file, void *fh, enum v4l2_buf_type type) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + struct v4l2_requestbuffers reqbuf = { .count = dev->buffer_count, + .memory = V4L2_MEMORY_MMAP, + .type = type }; + int token = token_from_type(type); + int result; + + if (opener->format_token & V4L2L_TOKEN_TIMEOUT || + opener->format_token & ~token) + return -EBUSY; /* NOTE: -EBADF might be more informative */ + + /* short-circuit if already have stream token */ + if (opener->stream_token && opener->io_method == V4L2L_IO_FILE) + return 0; + + /* otherwise attempt to acquire stream token and assign IO method */ + if (!(dev->stream_tokens & token) || opener->io_method != V4L2L_IO_NONE) + return -EBUSY; + + result = vidioc_reqbufs(file, fh, &reqbuf); + if (result < 0) + return result; + result = vidioc_streamon(file, fh, type); + if (result < 0) + return result; + + opener->io_method = V4L2L_IO_FILE; + return 0; +} + +static ssize_t v4l2_loopback_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_buffer *b; + int index, result; + + dprintkrw("read() %zu bytes\n", count); + result = start_fileio(file, file->private_data, + V4L2_BUF_TYPE_VIDEO_CAPTURE); + if (result < 0) + return result; + + index = get_capture_buffer(file); + if (index < 0) + return index; + b = &dev->buffers[index].buffer; + if (count > b->bytesused) + count = b->bytesused; + if (copy_to_user((void *)buf, (void *)(dev->image + b->m.offset), + count)) { + printk(KERN_ERR "v4l2-loopback read() failed copy_to_user()\n"); + return -EFAULT; + } + return count; +} + +static ssize_t v4l2_loopback_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_buffer *b; + int index, result; + + dprintkrw("write() %zu bytes\n", count); + result = start_fileio(file, file->private_data, + V4L2_BUF_TYPE_VIDEO_OUTPUT); + if (result < 0) + return result; + + if (count > dev->buffer_size) + count = dev->buffer_size; + index = v4l2l_mod64(dev->write_position, dev->used_buffer_count); + b = &dev->buffers[index].buffer; + + if (copy_from_user((void *)(dev->image + b->m.offset), (void *)buf, + count)) { + printk(KERN_ERR + "v4l2-loopback write() failed copy_from_user()\n"); + return -EFAULT; + } + b->bytesused = count; + + v4l2l_get_timestamp(b); + b->sequence = dev->write_position; + set_queued(b->flags); + buffer_written(dev, &dev->buffers[index]); + set_done(b->flags); + wake_up_all(&dev->read_event); + + return count; +} + +/* init functions */ +/* frees buffers, if allocated */ +static void free_buffers(struct v4l2_loopback_device *dev) +{ + dprintk("free_buffers() with image@%p\n", dev->image); + if (!dev->image) + return; + if (!has_no_owners(dev) || any_buffers_mapped(dev)) + /* maybe an opener snuck in before image_mutex was acquired */ + printk(KERN_WARNING + "v4l2-loopback free_buffers() buffers of video device " + "#%u freed while still mapped to userspace\n", + dev->vdev->num); + vfree(dev->image); + dev->image = NULL; + dev->image_size = 0; + dev->buffer_size = 0; +} + +static void free_timeout_buffer(struct v4l2_loopback_device *dev) +{ + dprintk("free_timeout_buffer() with timeout_image@%p\n", + dev->timeout_image); + if (!dev->timeout_image) + return; + + if ((dev->timeout_jiffies > 0 && !has_no_owners(dev)) || + dev->timeout_buffer.buffer.flags & V4L2_BUF_FLAG_MAPPED) + printk(KERN_WARNING + "v4l2-loopback free_timeout_buffer() timeout image " + "of device #%u freed while still mapped to userspace\n", + dev->vdev->num); + + vfree(dev->timeout_image); + dev->timeout_image = NULL; + dev->timeout_buffer_size = 0; +} +/* allocates buffers if no (other) openers are already using them */ +static int allocate_buffers(struct v4l2_loopback_device *dev, + struct v4l2_pix_format *pix_format) +{ + u32 buffer_size = PAGE_ALIGN(pix_format->sizeimage); + unsigned long image_size = + (unsigned long)buffer_size * (unsigned long)dev->buffer_count; + /* vfree on close file operation in case no open handles left */ + + if (buffer_size == 0 || dev->buffer_count == 0 || + buffer_size < pix_format->sizeimage) + return -EINVAL; + + if ((__LONG_MAX__ / buffer_size) < dev->buffer_count) + return -ENOSPC; + + dprintk("allocate_buffers() size %lubytes = %ubytes x %ubuffers\n", + image_size, buffer_size, dev->buffer_count); + if (dev->image) { + /* check that no buffers are expected in user-space */ + if (!has_no_owners(dev) || any_buffers_mapped(dev)) + return -EBUSY; + dprintk("allocate_buffers() existing size=%lubytes\n", + dev->image_size); + /* FIXME: prevent double allocation more intelligently! */ + if (image_size == dev->image_size) { + dprintk("allocate_buffers() keep existing\n"); + return 0; + } + free_buffers(dev); + } + + /* FIXME: set buffers to 0 */ + dev->image = vmalloc(image_size); + if (dev->image == NULL) { + dev->buffer_size = dev->image_size = 0; + return -ENOMEM; + } + init_buffers(dev, pix_format->sizeimage, buffer_size); + dev->buffer_size = buffer_size; + dev->image_size = image_size; + dprintk("allocate_buffers() -> vmalloc'd %lubytes\n", dev->image_size); + return 0; +} +static int allocate_timeout_buffer(struct v4l2_loopback_device *dev) +{ + /* device's `buffer_size` and `buffers` must be initialised in + * allocate_buffers() */ + + dprintk("allocate_timeout_buffer() size %ubytes\n", dev->buffer_size); + if (dev->buffer_size == 0) + return -EINVAL; + + if (dev->timeout_image) { + if (dev->timeout_buffer.buffer.flags & V4L2_BUF_FLAG_MAPPED) + return -EBUSY; + if (dev->buffer_size == dev->timeout_buffer_size) + return 0; + free_timeout_buffer(dev); + } + + dev->timeout_image = vzalloc(dev->buffer_size); + if (!dev->timeout_image) { + dev->timeout_buffer_size = 0; + return -ENOMEM; + } + dev->timeout_buffer_size = dev->buffer_size; + return 0; +} +/* init inner buffers, they are capture mode and flags are set as for capture + * mode buffers */ +static void init_buffers(struct v4l2_loopback_device *dev, u32 bytes_used, + u32 buffer_size) +{ + u32 i; + + for (i = 0; i < dev->buffer_count; ++i) { + struct v4l2_buffer *b = &dev->buffers[i].buffer; + b->index = i; + b->bytesused = bytes_used; + b->length = buffer_size; + b->field = V4L2_FIELD_NONE; + b->flags = 0; + b->m.offset = i * buffer_size; + b->memory = V4L2_MEMORY_MMAP; + b->sequence = 0; + b->timestamp.tv_sec = 0; + b->timestamp.tv_usec = 0; + b->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + + v4l2l_get_timestamp(b); + } + dev->timeout_buffer = dev->buffers[0]; + dev->timeout_buffer.buffer.m.offset = MAX_BUFFERS * buffer_size; +} + +/* fills and register video device */ +static void init_vdev(struct video_device *vdev, int nr) +{ +#ifdef V4L2LOOPBACK_WITH_STD + vdev->tvnorms = V4L2_STD_ALL; +#endif /* V4L2LOOPBACK_WITH_STD */ + + vdev->vfl_type = VFL_TYPE_VIDEO; + vdev->fops = &v4l2_loopback_fops; + vdev->ioctl_ops = &v4l2_loopback_ioctl_ops; + vdev->release = &video_device_release; + vdev->minor = -1; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) + vdev->device_caps = V4L2_CAP_DEVICE_CAPS | V4L2_CAP_VIDEO_CAPTURE | + V4L2_CAP_VIDEO_OUTPUT | V4L2_CAP_READWRITE | + V4L2_CAP_STREAMING; +#endif + + if (debug > 1) + vdev->dev_debug = V4L2_DEV_DEBUG_IOCTL | + V4L2_DEV_DEBUG_IOCTL_ARG; + + vdev->vfl_dir = VFL_DIR_M2M; +} + +/* init default capture parameters, only fps may be changed in future */ +static void init_capture_param(struct v4l2_captureparm *capture_param) +{ + capture_param->capability = V4L2_CAP_TIMEPERFRAME; /* since 2.16 */ + capture_param->capturemode = 0; + capture_param->extendedmode = 0; + capture_param->readbuffers = max_buffers; + capture_param->timeperframe.numerator = 1; + capture_param->timeperframe.denominator = V4L2LOOPBACK_FPS_DEFAULT; +} + +static void check_timers(struct v4l2_loopback_device *dev) +{ + if (has_output_token(dev->stream_tokens)) + return; + + if (dev->timeout_jiffies > 0 && !timer_pending(&dev->timeout_timer)) + mod_timer(&dev->timeout_timer, jiffies + dev->timeout_jiffies); + if (dev->sustain_framerate && !timer_pending(&dev->sustain_timer)) + mod_timer(&dev->sustain_timer, + jiffies + dev->frame_jiffies * 3 / 2); +} +#ifdef HAVE_TIMER_SETUP +static void sustain_timer_clb(struct timer_list *t) +{ + struct v4l2_loopback_device *dev = from_timer(dev, t, sustain_timer); +#else +static void sustain_timer_clb(unsigned long nr) +{ + struct v4l2_loopback_device *dev = + idr_find(&v4l2loopback_index_idr, nr); +#endif + spin_lock(&dev->lock); + if (dev->sustain_framerate) { + dev->reread_count++; + dprintkrw("sustain_timer_clb() write_pos=%lld reread=%u\n", + (long long)dev->write_position, dev->reread_count); + if (dev->reread_count == 1) + mod_timer(&dev->sustain_timer, + jiffies + max(1UL, dev->frame_jiffies / 2)); + else + mod_timer(&dev->sustain_timer, + jiffies + dev->frame_jiffies); + wake_up_all(&dev->read_event); + } + spin_unlock(&dev->lock); +} +#ifdef HAVE_TIMER_SETUP +static void timeout_timer_clb(struct timer_list *t) +{ + struct v4l2_loopback_device *dev = from_timer(dev, t, timeout_timer); +#else +static void timeout_timer_clb(unsigned long nr) +{ + struct v4l2_loopback_device *dev = + idr_find(&v4l2loopback_index_idr, nr); +#endif + spin_lock(&dev->lock); + if (dev->timeout_jiffies > 0) { + dev->timeout_happened = 1; + mod_timer(&dev->timeout_timer, jiffies + dev->timeout_jiffies); + wake_up_all(&dev->read_event); + } + spin_unlock(&dev->lock); +} + +/* init loopback main structure */ +#define DEFAULT_FROM_CONF(confmember, default_condition, default_value) \ + ((conf) ? \ + ((conf->confmember default_condition) ? (default_value) : \ + (conf->confmember)) : \ + default_value) + +static int v4l2_loopback_add(struct v4l2_loopback_config *conf, int *ret_nr) +{ + struct v4l2_loopback_device *dev; + struct v4l2_ctrl_handler *hdl; + struct v4l2loopback_private *vdev_priv = NULL; + int err; + + u32 _width = V4L2LOOPBACK_SIZE_DEFAULT_WIDTH; + u32 _height = V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT; + + u32 _min_width = DEFAULT_FROM_CONF(min_width, + < V4L2LOOPBACK_SIZE_MIN_WIDTH, + V4L2LOOPBACK_SIZE_MIN_WIDTH); + u32 _min_height = DEFAULT_FROM_CONF(min_height, + < V4L2LOOPBACK_SIZE_MIN_HEIGHT, + V4L2LOOPBACK_SIZE_MIN_HEIGHT); + u32 _max_width = DEFAULT_FROM_CONF(max_width, < _min_width, max_width); + u32 _max_height = + DEFAULT_FROM_CONF(max_height, < _min_height, max_height); + bool _announce_all_caps = (conf && conf->announce_all_caps >= 0) ? + (bool)(conf->announce_all_caps) : + !(V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS); + int _max_buffers = DEFAULT_FROM_CONF(max_buffers, <= 0, max_buffers); + int _max_openers = DEFAULT_FROM_CONF(max_openers, <= 0, max_openers); + struct v4l2_format _fmt; + + int nr = -1; + + if (conf) { + const int output_nr = conf->output_nr; +#ifdef SPLIT_DEVICES + const int capture_nr = conf->capture_nr; +#else + const int capture_nr = output_nr; +#endif + if (capture_nr >= 0 && output_nr == capture_nr) { + nr = output_nr; + } else if (capture_nr < 0 && output_nr < 0) { + nr = -1; + } else if (capture_nr < 0) { + nr = output_nr; + } else if (output_nr < 0) { + nr = capture_nr; + } else { + printk(KERN_ERR + "v4l2-loopback add() split OUTPUT and CAPTURE " + "devices not yet supported.\n"); + printk(KERN_INFO + "v4l2-loopback add() both devices must have the " + "same number (%d != %d).\n", + output_nr, capture_nr); + return -EINVAL; + } + } + + if (idr_find(&v4l2loopback_index_idr, nr)) + return -EEXIST; + + /* initialisation of a new device */ + dprintk("add() creating device #%d\n", nr); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + /* allocate id, if @id >= 0, we're requesting that specific id */ + if (nr >= 0) { + err = idr_alloc(&v4l2loopback_index_idr, dev, nr, nr + 1, + GFP_KERNEL); + if (err == -ENOSPC) + err = -EEXIST; + } else { + err = idr_alloc(&v4l2loopback_index_idr, dev, 0, 0, GFP_KERNEL); + } + if (err < 0) + goto out_free_dev; + + /* register new device */ + MARK(); + nr = err; + + if (conf && conf->card_label[0]) { + snprintf(dev->card_label, sizeof(dev->card_label), "%s", + conf->card_label); + } else { + snprintf(dev->card_label, sizeof(dev->card_label), + "Dummy video device (0x%04X)", nr); + } + snprintf(dev->v4l2_dev.name, sizeof(dev->v4l2_dev.name), + "v4l2loopback-%03d", nr); + + err = v4l2_device_register(NULL, &dev->v4l2_dev); + if (err) + goto out_free_idr; + + /* initialise the _video_ device */ + MARK(); + err = -ENOMEM; + dev->vdev = video_device_alloc(); + if (dev->vdev == NULL) + goto out_unregister; + + vdev_priv = kzalloc(sizeof(struct v4l2loopback_private), GFP_KERNEL); + if (vdev_priv == NULL) + goto out_unregister; + + video_set_drvdata(dev->vdev, vdev_priv); + if (video_get_drvdata(dev->vdev) == NULL) + goto out_unregister; + + snprintf(dev->vdev->name, sizeof(dev->vdev->name), "%s", + dev->card_label); + vdev_priv->device_nr = nr; + init_vdev(dev->vdev, nr); + dev->vdev->v4l2_dev = &dev->v4l2_dev; + + /* initialise v4l2-loopback specific parameters */ + MARK(); + dev->announce_all_caps = _announce_all_caps; + dev->min_width = _min_width; + dev->min_height = _min_height; + dev->max_width = _max_width; + dev->max_height = _max_height; + dev->max_openers = _max_openers; + + /* set (initial) pixel and stream format */ + _width = clamp_val(_width, _min_width, _max_width); + _height = clamp_val(_height, _min_height, _max_height); + _fmt = (struct v4l2_format){ + .type = V4L2_BUF_TYPE_VIDEO_CAPTURE, + .fmt.pix = { .width = _width, + .height = _height, + .pixelformat = formats[0].fourcc, + .colorspace = V4L2_COLORSPACE_DEFAULT, + .field = V4L2_FIELD_NONE } + }; + + err = v4l2l_fill_format(&_fmt, _min_width, _max_width, _min_height, + _max_height); + if (err) + /* highly unexpected failure to assign default format */ + goto out_unregister; + dev->pix_format = _fmt.fmt.pix; + init_capture_param(&dev->capture_param); + set_timeperframe(dev, &dev->capture_param.timeperframe); + + /* ctrls parameters */ + dev->keep_format = 0; + dev->sustain_framerate = 0; + dev->timeout_jiffies = 0; + dev->timeout_image_io = 0; + + /* initialise OUTPUT and CAPTURE buffer values */ + dev->image = NULL; + dev->image_size = 0; + dev->buffer_count = _max_buffers; + dev->buffer_size = 0; + dev->used_buffer_count = 0; + INIT_LIST_HEAD(&dev->outbufs_list); + do { + u32 index; + for (index = 0; index < dev->buffer_count; ++index) + INIT_LIST_HEAD(&dev->buffers[index].list_head); + + } while (0); + memset(dev->bufpos2index, 0, sizeof(dev->bufpos2index)); + dev->write_position = 0; + + /* initialise synchronisation data */ + atomic_set(&dev->open_count, 0); + mutex_init(&dev->image_mutex); + spin_lock_init(&dev->lock); + spin_lock_init(&dev->list_lock); + init_waitqueue_head(&dev->read_event); + dev->format_tokens = V4L2L_TOKEN_MASK; + dev->stream_tokens = V4L2L_TOKEN_MASK; + + /* initialise sustain frame rate and timeout parameters, and timers */ + dev->reread_count = 0; + dev->timeout_image = NULL; + dev->timeout_happened = 0; +#ifdef HAVE_TIMER_SETUP + timer_setup(&dev->sustain_timer, sustain_timer_clb, 0); + timer_setup(&dev->timeout_timer, timeout_timer_clb, 0); +#else + setup_timer(&dev->sustain_timer, sustain_timer_clb, nr); + setup_timer(&dev->timeout_timer, timeout_timer_clb, nr); +#endif + + /* initialise the control handler and add controls */ + MARK(); + hdl = &dev->ctrl_handler; + err = v4l2_ctrl_handler_init(hdl, 4); + if (err) + goto out_unregister; + v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_keepformat, NULL); + v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_sustainframerate, NULL); + v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_timeout, NULL); + v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_timeoutimageio, NULL); + if (hdl->error) { + err = hdl->error; + goto out_free_handler; + } + dev->v4l2_dev.ctrl_handler = hdl; + + err = v4l2_ctrl_handler_setup(hdl); + if (err) + goto out_free_handler; + + /* register the device (creates /dev/video*) */ + MARK(); + if (video_register_device(dev->vdev, VFL_TYPE_VIDEO, nr) < 0) { + printk(KERN_ERR + "v4l2-loopback add() failed video_register_device()\n"); + err = -EFAULT; + goto out_free_device; + } + v4l2loopback_create_sysfs(dev->vdev); + /* NOTE: ambivalent if sysfs entries fail */ + + if (ret_nr) + *ret_nr = dev->vdev->num; + return 0; + +out_free_device: + video_device_release(dev->vdev); +out_free_handler: + v4l2_ctrl_handler_free(&dev->ctrl_handler); +out_unregister: + video_set_drvdata(dev->vdev, NULL); + if (vdev_priv != NULL) + kfree(vdev_priv); + v4l2_device_unregister(&dev->v4l2_dev); +out_free_idr: + idr_remove(&v4l2loopback_index_idr, nr); +out_free_dev: + kfree(dev); + return err; +} + +static void v4l2_loopback_remove(struct v4l2_loopback_device *dev) +{ + int device_nr = v4l2loopback_get_vdev_nr(dev->vdev); + mutex_lock(&dev->image_mutex); + free_buffers(dev); + free_timeout_buffer(dev); + mutex_unlock(&dev->image_mutex); + v4l2loopback_remove_sysfs(dev->vdev); + v4l2_ctrl_handler_free(&dev->ctrl_handler); + kfree(video_get_drvdata(dev->vdev)); + video_unregister_device(dev->vdev); + v4l2_device_unregister(&dev->v4l2_dev); + idr_remove(&v4l2loopback_index_idr, device_nr); + kfree(dev); +} + +static long v4l2loopback_control_ioctl(struct file *file, unsigned int cmd, + unsigned long parm) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_config conf; + struct v4l2_loopback_config *confptr = &conf; + int device_nr, capture_nr, output_nr; + int ret; + + ret = mutex_lock_killable(&v4l2loopback_ctl_mutex); + if (ret) + return ret; + + ret = -EINVAL; + switch (cmd) { + default: + ret = -ENOSYS; + break; + /* add a v4l2loopback device (pair), based on the user-provided specs */ + case V4L2LOOPBACK_CTL_ADD: + if (parm) { + if ((ret = copy_from_user(&conf, (void *)parm, + sizeof(conf))) < 0) + break; + } else + confptr = NULL; + ret = v4l2_loopback_add(confptr, &device_nr); + if (ret >= 0) + ret = device_nr; + break; + /* remove a v4l2loopback device (both capture and output) */ + case V4L2LOOPBACK_CTL_REMOVE: + ret = v4l2loopback_lookup((int)parm, &dev); + if (ret >= 0 && dev) { + ret = -EBUSY; + if (dev->open_count.counter > 0) + break; + v4l2_loopback_remove(dev); + ret = 0; + }; + break; + /* get information for a loopback device. + * this is mostly about limits (which cannot be queried directly with VIDIOC_G_FMT and friends + */ + case V4L2LOOPBACK_CTL_QUERY: + if (!parm) + break; + if ((ret = copy_from_user(&conf, (void *)parm, sizeof(conf))) < + 0) + break; + capture_nr = output_nr = conf.output_nr; +#ifdef SPLIT_DEVICES + capture_nr = conf.capture_nr; +#endif + device_nr = (output_nr < 0) ? capture_nr : output_nr; + MARK(); + /* get the device from either capture_nr or output_nr (whatever is valid) */ + if ((ret = v4l2loopback_lookup(device_nr, &dev)) < 0) + break; + MARK(); + /* if we got the device from output_nr and there is a valid capture_nr, + * make sure that both refer to the same device (or bail out) + */ + if ((device_nr != capture_nr) && (capture_nr >= 0) && + ((ret = v4l2loopback_lookup(capture_nr, 0)) < 0)) + break; + MARK(); + /* if otoh, we got the device from capture_nr and there is a valid output_nr, + * make sure that both refer to the same device (or bail out) + */ + if ((device_nr != output_nr) && (output_nr >= 0) && + ((ret = v4l2loopback_lookup(output_nr, 0)) < 0)) + break; + + /* v4l2_loopback_config identified a single device, so fetch the data */ + snprintf(conf.card_label, sizeof(conf.card_label), "%s", + dev->card_label); + + conf.output_nr = dev->vdev->num; +#ifdef SPLIT_DEVICES + conf.capture_nr = dev->vdev->num; +#endif + conf.min_width = dev->min_width; + conf.min_height = dev->min_height; + conf.max_width = dev->max_width; + conf.max_height = dev->max_height; + conf.announce_all_caps = dev->announce_all_caps; + conf.max_buffers = dev->buffer_count; + conf.max_openers = dev->max_openers; + conf.debug = debug; + MARK(); + if (copy_to_user((void *)parm, &conf, sizeof(conf))) { + ret = -EFAULT; + break; + } + ret = 0; + break; + } + + mutex_unlock(&v4l2loopback_ctl_mutex); + MARK(); + return ret; +} + +/* LINUX KERNEL */ + +static const struct file_operations v4l2loopback_ctl_fops = { + // clang-format off + .owner = THIS_MODULE, + .open = nonseekable_open, + .unlocked_ioctl = v4l2loopback_control_ioctl, + .compat_ioctl = v4l2loopback_control_ioctl, + .llseek = noop_llseek, + // clang-format on +}; + +static struct miscdevice v4l2loopback_misc = { + // clang-format off + .minor = MISC_DYNAMIC_MINOR, + .name = "v4l2loopback", + .fops = &v4l2loopback_ctl_fops, + // clang-format on +}; + +static const struct v4l2_file_operations v4l2_loopback_fops = { + // clang-format off + .owner = THIS_MODULE, + .open = v4l2_loopback_open, + .release = v4l2_loopback_close, + .read = v4l2_loopback_read, + .write = v4l2_loopback_write, + .poll = v4l2_loopback_poll, + .mmap = v4l2_loopback_mmap, + .unlocked_ioctl = video_ioctl2, + // clang-format on +}; + +static const struct v4l2_ioctl_ops v4l2_loopback_ioctl_ops = { + // clang-format off + .vidioc_querycap = &vidioc_querycap, + .vidioc_enum_framesizes = &vidioc_enum_framesizes, + .vidioc_enum_frameintervals = &vidioc_enum_frameintervals, + + .vidioc_enum_output = &vidioc_enum_output, + .vidioc_g_output = &vidioc_g_output, + .vidioc_s_output = &vidioc_s_output, + + .vidioc_enum_input = &vidioc_enum_input, + .vidioc_g_input = &vidioc_g_input, + .vidioc_s_input = &vidioc_s_input, + + .vidioc_enum_fmt_vid_cap = &vidioc_enum_fmt_cap, + .vidioc_g_fmt_vid_cap = &vidioc_g_fmt_cap, + .vidioc_s_fmt_vid_cap = &vidioc_s_fmt_cap, + .vidioc_try_fmt_vid_cap = &vidioc_try_fmt_cap, + + .vidioc_enum_fmt_vid_out = &vidioc_enum_fmt_out, + .vidioc_s_fmt_vid_out = &vidioc_s_fmt_out, + .vidioc_g_fmt_vid_out = &vidioc_g_fmt_out, + .vidioc_try_fmt_vid_out = &vidioc_try_fmt_out, + +#ifdef V4L2L_OVERLAY + .vidioc_s_fmt_vid_overlay = &vidioc_s_fmt_overlay, + .vidioc_g_fmt_vid_overlay = &vidioc_g_fmt_overlay, +#endif + +#ifdef V4L2LOOPBACK_WITH_STD + .vidioc_s_std = &vidioc_s_std, + .vidioc_g_std = &vidioc_g_std, + .vidioc_querystd = &vidioc_querystd, +#endif /* V4L2LOOPBACK_WITH_STD */ + + .vidioc_g_parm = &vidioc_g_parm, + .vidioc_s_parm = &vidioc_s_parm, + + .vidioc_reqbufs = &vidioc_reqbufs, + .vidioc_querybuf = &vidioc_querybuf, + .vidioc_qbuf = &vidioc_qbuf, + .vidioc_dqbuf = &vidioc_dqbuf, + + .vidioc_streamon = &vidioc_streamon, + .vidioc_streamoff = &vidioc_streamoff, + +#ifdef CONFIG_VIDEO_V4L1_COMPAT + .vidiocgmbuf = &vidiocgmbuf, +#endif + + .vidioc_subscribe_event = &vidioc_subscribe_event, + .vidioc_unsubscribe_event = &v4l2_event_unsubscribe, + // clang-format on +}; + +static int free_device_cb(int id, void *ptr, void *data) +{ + struct v4l2_loopback_device *dev = ptr; + v4l2_loopback_remove(dev); + return 0; +} +static void free_devices(void) +{ + idr_for_each(&v4l2loopback_index_idr, &free_device_cb, NULL); + idr_destroy(&v4l2loopback_index_idr); +} + +static int __init v4l2loopback_init_module(void) +{ + const u32 min_width = V4L2LOOPBACK_SIZE_MIN_WIDTH; + const u32 min_height = V4L2LOOPBACK_SIZE_MIN_HEIGHT; + int err; + int i; + MARK(); + + err = misc_register(&v4l2loopback_misc); + if (err < 0) + return err; + + if (devices < 0) { + devices = 1; + + /* try guessing the devices from the "video_nr" parameter */ + for (i = MAX_DEVICES - 1; i >= 0; i--) { + if (video_nr[i] >= 0) { + devices = i + 1; + break; + } + } + } + + if (devices > MAX_DEVICES) { + devices = MAX_DEVICES; + printk(KERN_INFO + "v4l2-loopback init() number of initial devices is " + "limited to: %d\n", + MAX_DEVICES); + } + + if (max_buffers > MAX_BUFFERS) { + max_buffers = MAX_BUFFERS; + printk(KERN_INFO + "v4l2-loopback init() number of buffers is limited " + "to: %d\n", + MAX_BUFFERS); + } + + if (max_openers < 0) { + printk(KERN_INFO + "v4l2-loopback init() allowing %d openers rather " + "than %d\n", + 2, max_openers); + max_openers = 2; + } + + if (max_width < min_width) { + max_width = V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH; + printk(KERN_INFO "v4l2-loopback init() using max_width %d\n", + max_width); + } + if (max_height < min_height) { + max_height = V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT; + printk(KERN_INFO "v4l2-loopback init() using max_height %d\n", + max_height); + } + + for (i = 0; i < devices; i++) { + struct v4l2_loopback_config cfg = { + // clang-format off + .output_nr = video_nr[i], +#ifdef SPLIT_DEVICES + .capture_nr = video_nr[i], +#endif + .min_width = min_width, + .min_height = min_height, + .max_width = max_width, + .max_height = max_height, + .announce_all_caps = (!exclusive_caps[i]), + .max_buffers = max_buffers, + .max_openers = max_openers, + .debug = debug, + // clang-format on + }; + cfg.card_label[0] = 0; + if (card_label[i]) + snprintf(cfg.card_label, sizeof(cfg.card_label), "%s", + card_label[i]); + err = v4l2_loopback_add(&cfg, 0); + if (err) { + free_devices(); + goto error; + } + } + + dprintk("module installed\n"); + + printk(KERN_INFO "v4l2-loopback driver version %d.%d.%d%s loaded\n", + // clang-format off + (V4L2LOOPBACK_VERSION_CODE >> 16) & 0xff, + (V4L2LOOPBACK_VERSION_CODE >> 8) & 0xff, + (V4L2LOOPBACK_VERSION_CODE ) & 0xff, +#ifdef SNAPSHOT_VERSION + " (" __stringify(SNAPSHOT_VERSION) ")" +#else + "" +#endif + ); + // clang-format on + + return 0; +error: + misc_deregister(&v4l2loopback_misc); + return err; +} + +static void v4l2loopback_cleanup_module(void) +{ + MARK(); + /* unregister the device -> it deletes /dev/video* */ + free_devices(); + /* and get rid of /dev/v4l2loopback */ + misc_deregister(&v4l2loopback_misc); + dprintk("module removed\n"); +} + +MODULE_ALIAS_MISCDEV(MISC_DYNAMIC_MINOR); + +module_init(v4l2loopback_init_module); +module_exit(v4l2loopback_cleanup_module); diff --git a/drivers/media/v4l2-core/v4l2loopback.h b/drivers/media/v4l2-core/v4l2loopback.h new file mode 100644 index 00000000000000..ec0be6cbe97d1b --- /dev/null +++ b/drivers/media/v4l2-core/v4l2loopback.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * v4l2loopback.h + * + * Written by IOhannes m zmölnig, 7/1/20. + * + * Copyright 2020 by IOhannes m zmölnig. Redistribution of this file is + * permitted under the GNU General Public License. + */ +#ifndef _V4L2LOOPBACK_H +#define _V4L2LOOPBACK_H + +#define V4L2LOOPBACK_VERSION_MAJOR 0 +#define V4L2LOOPBACK_VERSION_MINOR 14 +#define V4L2LOOPBACK_VERSION_BUGFIX 0 + +/* /dev/v4l2loopback interface */ + +struct v4l2_loopback_config { + /** + * the device-number (/dev/video) + * V4L2LOOPBACK_CTL_ADD: + * setting this to a value<0, will allocate an available one + * if nr>=0 and the device already exists, the ioctl will EEXIST + * if output_nr and capture_nr are the same, only a single device will be created + * NOTE: currently split-devices (where output_nr and capture_nr differ) + * are not implemented yet. + * until then, requesting different device-IDs will result in EINVAL. + * + * V4L2LOOPBACK_CTL_QUERY: + * either both output_nr and capture_nr must refer to the same loopback, + * or one (and only one) of them must be -1 + * + */ + int output_nr; + int unused; /*capture_nr;*/ + + /** + * a nice name for your device + * if (*card_label)==0, an automatic name is assigned + */ + char card_label[32]; + + /** + * allowed frame size + * if too low, default values are used + */ + unsigned int min_width; + unsigned int max_width; + unsigned int min_height; + unsigned int max_height; + + /** + * number of buffers to allocate for the queue + * if set to <=0, default values are used + */ + int max_buffers; + + /** + * how many consumers are allowed to open this device concurrently + * if set to <=0, default values are used + */ + int max_openers; + + /** + * set the debugging level for this device + */ + int debug; + + /** + * whether to announce OUTPUT/CAPTURE capabilities exclusively + * for this device or not + * (!exclusive_caps) + * NOTE: this is going to be removed once separate output/capture + * devices are implemented + */ + int announce_all_caps; +}; + +/* a pointer to a (struct v4l2_loopback_config) that has all values you wish to impose on the + * to-be-created device set. + * if the ptr is NULL, a new device is created with default values at the driver's discretion. + * + * returns the device_nr of the OUTPUT device (which can be used with V4L2LOOPBACK_CTL_QUERY, + * to get more information on the device) + */ +#define V4L2LOOPBACK_CTL_ADD 0x4C80 + +/* a pointer to a (struct v4l2_loopback_config) that has output_nr and/or capture_nr set + * (the two values must either refer to video-devices associated with the same loopback device + * or exactly one of them must be <0 + */ +#define V4L2LOOPBACK_CTL_QUERY 0x4C82 + +/* the device-number (either CAPTURE or OUTPUT) associated with the loopback-device */ +#define V4L2LOOPBACK_CTL_REMOVE 0x4C81 + +#endif /* _V4L2LOOPBACK_H */ diff --git a/drivers/media/v4l2-core/v4l2loopback_formats.h b/drivers/media/v4l2-core/v4l2loopback_formats.h new file mode 100644 index 00000000000000..d855a379655419 --- /dev/null +++ b/drivers/media/v4l2-core/v4l2loopback_formats.h @@ -0,0 +1,445 @@ +static const struct v4l2l_format formats[] = { +#ifndef V4L2_PIX_FMT_VP9 +#define V4L2_PIX_FMT_VP9 v4l2_fourcc('V', 'P', '9', '0') +#endif +#ifndef V4L2_PIX_FMT_HEVC +#define V4L2_PIX_FMT_HEVC v4l2_fourcc('H', 'E', 'V', 'C') +#endif + + /* here come the packed formats */ + { + .name = "32 bpp RGB, le", + .fourcc = V4L2_PIX_FMT_BGR32, + .depth = 32, + .flags = 0, + }, + { + .name = "32 bpp RGB, be", + .fourcc = V4L2_PIX_FMT_RGB32, + .depth = 32, + .flags = 0, + }, + { + .name = "24 bpp RGB, le", + .fourcc = V4L2_PIX_FMT_BGR24, + .depth = 24, + .flags = 0, + }, + { + .name = "24 bpp RGB, be", + .fourcc = V4L2_PIX_FMT_RGB24, + .depth = 24, + .flags = 0, + }, +#ifdef V4L2_PIX_FMT_ABGR32 + { + .name = "32 bpp RGBA, le", + .fourcc = V4L2_PIX_FMT_ABGR32, + .depth = 32, + .flags = 0, + }, +#endif +#ifdef V4L2_PIX_FMT_RGBA32 + { + .name = "32 bpp RGBA", + .fourcc = V4L2_PIX_FMT_RGBA32, + .depth = 32, + .flags = 0, + }, +#endif +#ifdef V4L2_PIX_FMT_RGB332 + { + .name = "8 bpp RGB-3-3-2", + .fourcc = V4L2_PIX_FMT_RGB332, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB332 */ +#ifdef V4L2_PIX_FMT_RGB444 + { + .name = "16 bpp RGB (xxxxrrrr ggggbbbb)", + .fourcc = V4L2_PIX_FMT_RGB444, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB444 */ +#ifdef V4L2_PIX_FMT_RGB555 + { + .name = "16 bpp RGB-5-5-5", + .fourcc = V4L2_PIX_FMT_RGB555, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB555 */ +#ifdef V4L2_PIX_FMT_RGB565 + { + .name = "16 bpp RGB-5-6-5", + .fourcc = V4L2_PIX_FMT_RGB565, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB565 */ +#ifdef V4L2_PIX_FMT_RGB555X + { + .name = "16 bpp RGB-5-5-5 BE", + .fourcc = V4L2_PIX_FMT_RGB555X, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB555X */ +#ifdef V4L2_PIX_FMT_RGB565X + { + .name = "16 bpp RGB-5-6-5 BE", + .fourcc = V4L2_PIX_FMT_RGB565X, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB565X */ +#ifdef V4L2_PIX_FMT_BGR666 + { + .name = "18 bpp BGR-6-6-6", + .fourcc = V4L2_PIX_FMT_BGR666, + .depth = 18, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_BGR666 */ + { + .name = "4:2:2, packed, YUYV", + .fourcc = V4L2_PIX_FMT_YUYV, + .depth = 16, + .flags = 0, + }, + { + .name = "4:2:2, packed, UYVY", + .fourcc = V4L2_PIX_FMT_UYVY, + .depth = 16, + .flags = 0, + }, +#ifdef V4L2_PIX_FMT_YVYU + { + .name = "4:2:2, packed YVYU", + .fourcc = V4L2_PIX_FMT_YVYU, + .depth = 16, + .flags = 0, + }, +#endif +#ifdef V4L2_PIX_FMT_VYUY + { + .name = "4:2:2, packed VYUY", + .fourcc = V4L2_PIX_FMT_VYUY, + .depth = 16, + .flags = 0, + }, +#endif + { + .name = "4:2:2, packed YYUV", + .fourcc = V4L2_PIX_FMT_YYUV, + .depth = 16, + .flags = 0, + }, + { + .name = "YUV-8-8-8-8", + .fourcc = V4L2_PIX_FMT_YUV32, + .depth = 32, + .flags = 0, + }, + { + .name = "8 bpp, Greyscale", + .fourcc = V4L2_PIX_FMT_GREY, + .depth = 8, + .flags = 0, + }, +#ifdef V4L2_PIX_FMT_Y4 + { + .name = "4 bpp Greyscale", + .fourcc = V4L2_PIX_FMT_Y4, + .depth = 4, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_Y4 */ +#ifdef V4L2_PIX_FMT_Y6 + { + .name = "6 bpp Greyscale", + .fourcc = V4L2_PIX_FMT_Y6, + .depth = 6, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_Y6 */ +#ifdef V4L2_PIX_FMT_Y10 + { + .name = "10 bpp Greyscale", + .fourcc = V4L2_PIX_FMT_Y10, + .depth = 10, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_Y10 */ +#ifdef V4L2_PIX_FMT_Y12 + { + .name = "12 bpp Greyscale", + .fourcc = V4L2_PIX_FMT_Y12, + .depth = 12, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_Y12 */ + { + .name = "16 bpp, Greyscale", + .fourcc = V4L2_PIX_FMT_Y16, + .depth = 16, + .flags = 0, + }, +#ifdef V4L2_PIX_FMT_YUV444 + { + .name = "16 bpp xxxxyyyy uuuuvvvv", + .fourcc = V4L2_PIX_FMT_YUV444, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_YUV444 */ +#ifdef V4L2_PIX_FMT_YUV555 + { + .name = "16 bpp YUV-5-5-5", + .fourcc = V4L2_PIX_FMT_YUV555, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_YUV555 */ +#ifdef V4L2_PIX_FMT_YUV565 + { + .name = "16 bpp YUV-5-6-5", + .fourcc = V4L2_PIX_FMT_YUV565, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_YUV565 */ + +/* bayer formats */ +#ifdef V4L2_PIX_FMT_SRGGB8 + { + .name = "Bayer RGGB 8bit", + .fourcc = V4L2_PIX_FMT_SRGGB8, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_SRGGB8 */ +#ifdef V4L2_PIX_FMT_SGRBG8 + { + .name = "Bayer GRBG 8bit", + .fourcc = V4L2_PIX_FMT_SGRBG8, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_SGRBG8 */ +#ifdef V4L2_PIX_FMT_SGBRG8 + { + .name = "Bayer GBRG 8bit", + .fourcc = V4L2_PIX_FMT_SGBRG8, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_SGBRG8 */ +#ifdef V4L2_PIX_FMT_SBGGR8 + { + .name = "Bayer BA81 8bit", + .fourcc = V4L2_PIX_FMT_SBGGR8, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_SBGGR8 */ + + /* here come the planar formats */ + { + .name = "4:1:0, planar, Y-Cr-Cb", + .fourcc = V4L2_PIX_FMT_YVU410, + .depth = 9, + .flags = FORMAT_FLAGS_PLANAR, + }, + { + .name = "4:2:0, planar, Y-Cr-Cb", + .fourcc = V4L2_PIX_FMT_YVU420, + .depth = 12, + .flags = FORMAT_FLAGS_PLANAR, + }, + { + .name = "4:1:0, planar, Y-Cb-Cr", + .fourcc = V4L2_PIX_FMT_YUV410, + .depth = 9, + .flags = FORMAT_FLAGS_PLANAR, + }, + { + .name = "4:2:0, planar, Y-Cb-Cr", + .fourcc = V4L2_PIX_FMT_YUV420, + .depth = 12, + .flags = FORMAT_FLAGS_PLANAR, + }, +#ifdef V4L2_PIX_FMT_YUV422P + { + .name = "16 bpp YVU422 planar", + .fourcc = V4L2_PIX_FMT_YUV422P, + .depth = 16, + .flags = FORMAT_FLAGS_PLANAR, + }, +#endif /* V4L2_PIX_FMT_YUV422P */ +#ifdef V4L2_PIX_FMT_YUV411P + { + .name = "16 bpp YVU411 planar", + .fourcc = V4L2_PIX_FMT_YUV411P, + .depth = 16, + .flags = FORMAT_FLAGS_PLANAR, + }, +#endif /* V4L2_PIX_FMT_YUV411P */ +#ifdef V4L2_PIX_FMT_Y41P + { + .name = "12 bpp YUV 4:1:1", + .fourcc = V4L2_PIX_FMT_Y41P, + .depth = 12, + .flags = FORMAT_FLAGS_PLANAR, + }, +#endif /* V4L2_PIX_FMT_Y41P */ +#ifdef V4L2_PIX_FMT_NV12 + { + .name = "12 bpp Y/CbCr 4:2:0 ", + .fourcc = V4L2_PIX_FMT_NV12, + .depth = 12, + .flags = FORMAT_FLAGS_PLANAR, + }, +#endif /* V4L2_PIX_FMT_NV12 */ + +/* here come the compressed formats */ + +#ifdef V4L2_PIX_FMT_MJPEG + { + .name = "Motion-JPEG", + .fourcc = V4L2_PIX_FMT_MJPEG, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MJPEG */ +#ifdef V4L2_PIX_FMT_JPEG + { + .name = "JFIF JPEG", + .fourcc = V4L2_PIX_FMT_JPEG, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_JPEG */ +#ifdef V4L2_PIX_FMT_DV + { + .name = "DV1394", + .fourcc = V4L2_PIX_FMT_DV, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_DV */ +#ifdef V4L2_PIX_FMT_MPEG + { + .name = "MPEG-1/2/4 Multiplexed", + .fourcc = V4L2_PIX_FMT_MPEG, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MPEG */ +#ifdef V4L2_PIX_FMT_H264 + { + .name = "H264 with start codes", + .fourcc = V4L2_PIX_FMT_H264, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_H264 */ +#ifdef V4L2_PIX_FMT_H264_NO_SC + { + .name = "H264 without start codes", + .fourcc = V4L2_PIX_FMT_H264_NO_SC, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_H264_NO_SC */ +#ifdef V4L2_PIX_FMT_H264_MVC + { + .name = "H264 MVC", + .fourcc = V4L2_PIX_FMT_H264_MVC, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_H264_MVC */ +#ifdef V4L2_PIX_FMT_H263 + { + .name = "H263", + .fourcc = V4L2_PIX_FMT_H263, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_H263 */ +#ifdef V4L2_PIX_FMT_MPEG1 + { + .name = "MPEG-1 ES", + .fourcc = V4L2_PIX_FMT_MPEG1, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MPEG1 */ +#ifdef V4L2_PIX_FMT_MPEG2 + { + .name = "MPEG-2 ES", + .fourcc = V4L2_PIX_FMT_MPEG2, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MPEG2 */ +#ifdef V4L2_PIX_FMT_MPEG4 + { + .name = "MPEG-4 part 2 ES", + .fourcc = V4L2_PIX_FMT_MPEG4, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MPEG4 */ +#ifdef V4L2_PIX_FMT_XVID + { + .name = "Xvid", + .fourcc = V4L2_PIX_FMT_XVID, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_XVID */ +#ifdef V4L2_PIX_FMT_VC1_ANNEX_G + { + .name = "SMPTE 421M Annex G compliant stream", + .fourcc = V4L2_PIX_FMT_VC1_ANNEX_G, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_VC1_ANNEX_G */ +#ifdef V4L2_PIX_FMT_VC1_ANNEX_L + { + .name = "SMPTE 421M Annex L compliant stream", + .fourcc = V4L2_PIX_FMT_VC1_ANNEX_L, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_VC1_ANNEX_L */ +#ifdef V4L2_PIX_FMT_VP8 + { + .name = "VP8", + .fourcc = V4L2_PIX_FMT_VP8, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_VP8 */ +#ifdef V4L2_PIX_FMT_VP9 + { + .name = "VP9", + .fourcc = V4L2_PIX_FMT_VP9, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_VP9 */ +#ifdef V4L2_PIX_FMT_HEVC + { + .name = "HEVC", + .fourcc = V4L2_PIX_FMT_HEVC, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_HEVC */ +}; From 942227070fd461e8043f3ddb76a1427085f66602 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Sun, 5 May 2024 10:49:06 +0200 Subject: [PATCH 2854/2924] Cachy: Initialize ata before graphics ATA init is the long pole in the boot process, and its asynchronous. move the graphics init after it so that ata and graphics initialize in parallel Signed-off-by: Peter Jung --- drivers/Makefile | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/Makefile b/drivers/Makefile index b5749cf67044ce..5beba9f57254cd 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -64,14 +64,8 @@ obj-y += char/ # iommu/ comes before gpu as gpu are using iommu controllers obj-y += iommu/ -# gpu/ comes after char for AGP vs DRM startup and after iommu -obj-y += gpu/ - obj-$(CONFIG_CONNECTOR) += connector/ -# i810fb depends on char/agp/ -obj-$(CONFIG_FB_I810) += video/fbdev/i810/ - obj-$(CONFIG_PARPORT) += parport/ obj-y += base/ block/ misc/ mfd/ nfc/ obj-$(CONFIG_LIBNVDIMM) += nvdimm/ @@ -83,6 +77,13 @@ obj-y += macintosh/ obj-y += scsi/ obj-y += nvme/ obj-$(CONFIG_ATA) += ata/ + +# gpu/ comes after char for AGP vs DRM startup and after iommu +obj-y += gpu/ + +# i810fb depends on char/agp/ +obj-$(CONFIG_FB_I810) += video/fbdev/i810/ + obj-$(CONFIG_TARGET_CORE) += target/ obj-$(CONFIG_MTD) += mtd/ obj-$(CONFIG_SPI) += spi/ From f396f94c02be55394ae8e57b8be49a8987d4e96f Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Sun, 5 May 2024 10:49:42 +0200 Subject: [PATCH 2855/2924] Cachy: intel-pstate: Implement "enable" parameter If intel-pstate is compiled into the kernel, it will preempt the loading of acpi-cpufreq so you can take advantage of hardware p-states without any friction. However, intel-pstate is not completely superior to cpufreq's ondemand for one reason. There's no concept of an up_threshold property. In ondemand, up_threshold essentially reduces the maximum utilization to compare against, allowing you to hit max frequencies and turbo boost from a much lower core utilization. With intel-pstate, you have the concept of minimum and maximum performance, but no tunable that lets you define, maximum frequency means 50% core utilization. For just this oversight, there's reasons you may want ondemand. Lets support setting "enable" in kernel boot parameters. This lets kernel maintainers include "intel_pstate=disable" statically in the static boot parameters, but let users of the kernel override this selection. Signed-off-by: Peter Jung --- Documentation/admin-guide/kernel-parameters.txt | 3 +++ drivers/cpufreq/intel_pstate.c | 2 ++ 2 files changed, 5 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index d7bee42b709043..c865280efa8ebb 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2318,6 +2318,9 @@ disable Do not enable intel_pstate as the default scaling driver for the supported processors + enable + Enable intel_pstate in-case "disable" was passed + previously in the kernel boot parameters active Use intel_pstate driver to bypass the scaling governors layer of cpufreq and provides it own diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index ba9bf06f1c7736..b8ceac594acd77 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -3828,6 +3828,8 @@ static int __init intel_pstate_setup(char *str) if (!strcmp(str, "disable")) no_load = 1; + else if (!strcmp(str, "enable")) + no_load = 0; else if (!strcmp(str, "active")) default_driver = &intel_pstate; else if (!strcmp(str, "passive")) From b7e5564e57b52acb3f822d1343ee574f6806ec23 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Thu, 13 Dec 2018 01:00:49 +0000 Subject: [PATCH 2856/2924] Cachy: sched/wait: Do accept() in LIFO order for cache efficiency Signed-off-by: Alexandre Frade --- include/linux/wait.h | 2 ++ kernel/sched/wait.c | 24 ++++++++++++++++++++++++ net/ipv4/inet_connection_sock.c | 2 +- 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/include/linux/wait.h b/include/linux/wait.h index 965a19809c7e56..3d442267a2565c 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -163,6 +163,7 @@ static inline bool wq_has_sleeper(struct wait_queue_head *wq_head) extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); +extern void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); @@ -1195,6 +1196,7 @@ do { \ */ void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); bool prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); +void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout); diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 51e38f5f47018c..c5cc616484badd 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -47,6 +47,17 @@ void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_ } EXPORT_SYMBOL_GPL(add_wait_queue_priority); +void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) +{ + unsigned long flags; + + wq_entry->flags |= WQ_FLAG_EXCLUSIVE; + spin_lock_irqsave(&wq_head->lock, flags); + __add_wait_queue(wq_head, wq_entry); + spin_unlock_irqrestore(&wq_head->lock, flags); +} +EXPORT_SYMBOL(add_wait_queue_exclusive_lifo); + void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { unsigned long flags; @@ -258,6 +269,19 @@ prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_ent } EXPORT_SYMBOL(prepare_to_wait_exclusive); +void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state) +{ + unsigned long flags; + + wq_entry->flags |= WQ_FLAG_EXCLUSIVE; + spin_lock_irqsave(&wq_head->lock, flags); + if (list_empty(&wq_entry->entry)) + __add_wait_queue(wq_head, wq_entry); + set_current_state(state); + spin_unlock_irqrestore(&wq_head->lock, flags); +} +EXPORT_SYMBOL(prepare_to_wait_exclusive_lifo); + void init_wait_entry(struct wait_queue_entry *wq_entry, int flags) { wq_entry->flags = flags; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index dd5cf8914a28d1..b216088a6abd30 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -632,7 +632,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) * having to remove and re-insert us on the wait queue. */ for (;;) { - prepare_to_wait_exclusive(sk_sleep(sk), &wait, + prepare_to_wait_exclusive_lifo(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); release_sock(sk); if (reqsk_queue_empty(&icsk->icsk_accept_queue)) From 109c490641721f9acc97f70aadc09cf4bb8a0ad3 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 18 Feb 2018 23:35:41 +0000 Subject: [PATCH 2857/2924] locking: rwsem: spin faster tweak rwsem owner spinning a bit Signed-off-by: Alexandre Frade --- kernel/locking/rwsem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 2ddb827e3bea03..464049c4af3f4c 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -747,6 +747,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem) struct task_struct *new, *owner; unsigned long flags, new_flags; enum owner_state state; + int i = 0; lockdep_assert_preemption_disabled(); @@ -783,7 +784,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem) break; } - cpu_relax(); + if (i++ > 1000) + cpu_relax(); } return state; From b48961ba2aa6b81be8384daad481bd7ac9952153 Mon Sep 17 00:00:00 2001 From: Eric Naim Date: Fri, 22 Nov 2024 00:30:36 +0800 Subject: [PATCH 2858/2924] CACHY: Add VHBA Driver Signed-off-by: Eric Naim --- drivers/scsi/Kconfig | 2 + drivers/scsi/Makefile | 1 + drivers/scsi/vhba/Kconfig | 9 + drivers/scsi/vhba/Makefile | 4 + drivers/scsi/vhba/vhba.c | 1132 ++++++++++++++++++++++++++++++++++++ 5 files changed, 1148 insertions(+) create mode 100644 drivers/scsi/vhba/Kconfig create mode 100644 drivers/scsi/vhba/Makefile create mode 100644 drivers/scsi/vhba/vhba.c diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 5a3c670aec27d6..831ffcf7c7308a 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -1521,4 +1521,6 @@ endif # SCSI_LOWLEVEL source "drivers/scsi/device_handler/Kconfig" +source "drivers/scsi/vhba/Kconfig" + endmenu diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index 16de3e41f94c40..4e88f6e3e67bac 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -152,6 +152,7 @@ obj-$(CONFIG_CHR_DEV_SCH) += ch.o obj-$(CONFIG_SCSI_ENCLOSURE) += ses.o obj-$(CONFIG_SCSI_HISI_SAS) += hisi_sas/ +obj-$(CONFIG_VHBA) += vhba/ # This goes last, so that "real" scsi devices probe earlier obj-$(CONFIG_SCSI_DEBUG) += scsi_debug.o diff --git a/drivers/scsi/vhba/Kconfig b/drivers/scsi/vhba/Kconfig new file mode 100644 index 00000000000000..e70a381fe3dff8 --- /dev/null +++ b/drivers/scsi/vhba/Kconfig @@ -0,0 +1,9 @@ +config VHBA + tristate "Virtual (SCSI) Host Bus Adapter" + depends on SCSI + help + This is the in-kernel part of CDEmu, a CD/DVD-ROM device + emulator. + + This driver can also be built as a module. If so, the module + will be called vhba. diff --git a/drivers/scsi/vhba/Makefile b/drivers/scsi/vhba/Makefile new file mode 100644 index 00000000000000..2d7524b661990a --- /dev/null +++ b/drivers/scsi/vhba/Makefile @@ -0,0 +1,4 @@ +VHBA_VERSION := 20240917 + +obj-$(CONFIG_VHBA) += vhba.o +ccflags-y := -DVHBA_VERSION=\"$(VHBA_VERSION)\" -Werror diff --git a/drivers/scsi/vhba/vhba.c b/drivers/scsi/vhba/vhba.c new file mode 100644 index 00000000000000..878a3be0ba2b2c --- /dev/null +++ b/drivers/scsi/vhba/vhba.c @@ -0,0 +1,1132 @@ +/* + * vhba.c + * + * Copyright (C) 2007-2012 Chia-I Wu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#define pr_fmt(fmt) "vhba: " fmt + +#include + +#include +#include +#include +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) +#include +#else +#include +#endif +#include +#include +#include +#include +#include +#ifdef CONFIG_COMPAT +#include +#endif +#include +#include +#include +#include +#include +#include + + +MODULE_AUTHOR("Chia-I Wu"); +MODULE_VERSION(VHBA_VERSION); +MODULE_DESCRIPTION("Virtual SCSI HBA"); +MODULE_LICENSE("GPL"); + + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0) +#define sdev_dbg(sdev, fmt, a...) \ + dev_dbg(&(sdev)->sdev_gendev, fmt, ##a) +#define scmd_dbg(scmd, fmt, a...) \ + dev_dbg(&(scmd)->device->sdev_gendev, fmt, ##a) +#endif + +#define VHBA_MAX_SECTORS_PER_IO 256 +#define VHBA_MAX_BUS 16 +#define VHBA_MAX_ID 16 +#define VHBA_MAX_DEVICES (VHBA_MAX_BUS * (VHBA_MAX_ID-1)) +#define VHBA_KBUF_SIZE PAGE_SIZE + +#define DATA_TO_DEVICE(dir) ((dir) == DMA_TO_DEVICE || (dir) == DMA_BIDIRECTIONAL) +#define DATA_FROM_DEVICE(dir) ((dir) == DMA_FROM_DEVICE || (dir) == DMA_BIDIRECTIONAL) + + +static int vhba_can_queue = 32; +module_param_named(can_queue, vhba_can_queue, int, 0); + + +enum vhba_req_state { + VHBA_REQ_FREE, + VHBA_REQ_PENDING, + VHBA_REQ_READING, + VHBA_REQ_SENT, + VHBA_REQ_WRITING, +}; + +struct vhba_command { + struct scsi_cmnd *cmd; + /* metatags are per-host. not to be confused with + queue tags that are usually per-lun */ + unsigned long metatag; + int status; + struct list_head entry; +}; + +struct vhba_device { + unsigned int num; + spinlock_t cmd_lock; + struct list_head cmd_list; + wait_queue_head_t cmd_wq; + atomic_t refcnt; + + unsigned char *kbuf; + size_t kbuf_size; +}; + +struct vhba_host { + struct Scsi_Host *shost; + spinlock_t cmd_lock; + int cmd_next; + struct vhba_command *commands; + spinlock_t dev_lock; + struct vhba_device *devices[VHBA_MAX_DEVICES]; + int num_devices; + DECLARE_BITMAP(chgmap, VHBA_MAX_DEVICES); + int chgtype[VHBA_MAX_DEVICES]; + struct work_struct scan_devices; +}; + +#define MAX_COMMAND_SIZE 16 + +struct vhba_request { + __u32 metatag; + __u32 lun; + __u8 cdb[MAX_COMMAND_SIZE]; + __u8 cdb_len; + __u32 data_len; +}; + +struct vhba_response { + __u32 metatag; + __u32 status; + __u32 data_len; +}; + + + +static struct vhba_command *vhba_alloc_command (void); +static void vhba_free_command (struct vhba_command *vcmd); + +static struct platform_device vhba_platform_device; + + + +/* These functions define a symmetric 1:1 mapping between device numbers and + the bus and id. We have reserved the last id per bus for the host itself. */ +static void devnum_to_bus_and_id(unsigned int devnum, unsigned int *bus, unsigned int *id) +{ + *bus = devnum / (VHBA_MAX_ID-1); + *id = devnum % (VHBA_MAX_ID-1); +} + +static unsigned int bus_and_id_to_devnum(unsigned int bus, unsigned int id) +{ + return (bus * (VHBA_MAX_ID-1)) + id; +} + +static struct vhba_device *vhba_device_alloc (void) +{ + struct vhba_device *vdev; + + vdev = kzalloc(sizeof(struct vhba_device), GFP_KERNEL); + if (!vdev) { + return NULL; + } + + spin_lock_init(&vdev->cmd_lock); + INIT_LIST_HEAD(&vdev->cmd_list); + init_waitqueue_head(&vdev->cmd_wq); + atomic_set(&vdev->refcnt, 1); + + vdev->kbuf = NULL; + vdev->kbuf_size = 0; + + return vdev; +} + +static void vhba_device_put (struct vhba_device *vdev) +{ + if (atomic_dec_and_test(&vdev->refcnt)) { + kfree(vdev); + } +} + +static struct vhba_device *vhba_device_get (struct vhba_device *vdev) +{ + atomic_inc(&vdev->refcnt); + + return vdev; +} + +static int vhba_device_queue (struct vhba_device *vdev, struct scsi_cmnd *cmd) +{ + struct vhba_host *vhost; + struct vhba_command *vcmd; + unsigned long flags; + + vhost = platform_get_drvdata(&vhba_platform_device); + + vcmd = vhba_alloc_command(); + if (!vcmd) { + return SCSI_MLQUEUE_HOST_BUSY; + } + + vcmd->cmd = cmd; + + spin_lock_irqsave(&vdev->cmd_lock, flags); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 15, 0) + vcmd->metatag = scsi_cmd_to_rq(vcmd->cmd)->tag; +#else + vcmd->metatag = vcmd->cmd->request->tag; +#endif + list_add_tail(&vcmd->entry, &vdev->cmd_list); + spin_unlock_irqrestore(&vdev->cmd_lock, flags); + + wake_up_interruptible(&vdev->cmd_wq); + + return 0; +} + +static int vhba_device_dequeue (struct vhba_device *vdev, struct scsi_cmnd *cmd) +{ + struct vhba_command *vcmd; + int retval; + unsigned long flags; + + spin_lock_irqsave(&vdev->cmd_lock, flags); + list_for_each_entry(vcmd, &vdev->cmd_list, entry) { + if (vcmd->cmd == cmd) { + list_del_init(&vcmd->entry); + break; + } + } + + /* command not found */ + if (&vcmd->entry == &vdev->cmd_list) { + spin_unlock_irqrestore(&vdev->cmd_lock, flags); + return SUCCESS; + } + + while (vcmd->status == VHBA_REQ_READING || vcmd->status == VHBA_REQ_WRITING) { + spin_unlock_irqrestore(&vdev->cmd_lock, flags); + scmd_dbg(cmd, "wait for I/O before aborting\n"); + schedule_timeout(1); + spin_lock_irqsave(&vdev->cmd_lock, flags); + } + + retval = (vcmd->status == VHBA_REQ_SENT) ? FAILED : SUCCESS; + + vhba_free_command(vcmd); + + spin_unlock_irqrestore(&vdev->cmd_lock, flags); + + return retval; +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0) +static int vhba_slave_alloc(struct scsi_device *sdev) +{ + struct Scsi_Host *shost = sdev->host; + + sdev_dbg(sdev, "enabling tagging (queue depth: %i).\n", sdev->queue_depth); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0) + if (!shost_use_blk_mq(shost) && shost->bqt) { +#else + if (shost->bqt) { +#endif + blk_queue_init_tags(sdev->request_queue, sdev->queue_depth, shost->bqt); + } + scsi_adjust_queue_depth(sdev, 0, sdev->queue_depth); + + return 0; +} +#endif + +static void vhba_scan_devices_add (struct vhba_host *vhost, int bus, int id) +{ + struct scsi_device *sdev; + + sdev = scsi_device_lookup(vhost->shost, bus, id, 0); + if (!sdev) { + scsi_add_device(vhost->shost, bus, id, 0); + } else { + dev_warn(&vhost->shost->shost_gendev, "tried to add an already-existing device %d:%d:0!\n", bus, id); + scsi_device_put(sdev); + } +} + +static void vhba_scan_devices_remove (struct vhba_host *vhost, int bus, int id) +{ + struct scsi_device *sdev; + + sdev = scsi_device_lookup(vhost->shost, bus, id, 0); + if (sdev) { + scsi_remove_device(sdev); + scsi_device_put(sdev); + } else { + dev_warn(&vhost->shost->shost_gendev, "tried to remove non-existing device %d:%d:0!\n", bus, id); + } +} + +static void vhba_scan_devices (struct work_struct *work) +{ + struct vhba_host *vhost = container_of(work, struct vhba_host, scan_devices); + unsigned long flags; + int change, exists; + unsigned int devnum; + unsigned int bus, id; + + for (;;) { + spin_lock_irqsave(&vhost->dev_lock, flags); + + devnum = find_first_bit(vhost->chgmap, VHBA_MAX_DEVICES); + if (devnum >= VHBA_MAX_DEVICES) { + spin_unlock_irqrestore(&vhost->dev_lock, flags); + break; + } + change = vhost->chgtype[devnum]; + exists = vhost->devices[devnum] != NULL; + + vhost->chgtype[devnum] = 0; + clear_bit(devnum, vhost->chgmap); + + spin_unlock_irqrestore(&vhost->dev_lock, flags); + + devnum_to_bus_and_id(devnum, &bus, &id); + + if (change < 0) { + dev_dbg(&vhost->shost->shost_gendev, "trying to remove target %d:%d:0\n", bus, id); + vhba_scan_devices_remove(vhost, bus, id); + } else if (change > 0) { + dev_dbg(&vhost->shost->shost_gendev, "trying to add target %d:%d:0\n", bus, id); + vhba_scan_devices_add(vhost, bus, id); + } else { + /* quick sequence of add/remove or remove/add; we determine + which one it was by checking if device structure exists */ + if (exists) { + /* remove followed by add: remove and (re)add */ + dev_dbg(&vhost->shost->shost_gendev, "trying to (re)add target %d:%d:0\n", bus, id); + vhba_scan_devices_remove(vhost, bus, id); + vhba_scan_devices_add(vhost, bus, id); + } else { + /* add followed by remove: no-op */ + dev_dbg(&vhost->shost->shost_gendev, "no-op for target %d:%d:0\n", bus, id); + } + } + } +} + +static int vhba_add_device (struct vhba_device *vdev) +{ + struct vhba_host *vhost; + unsigned int devnum; + unsigned long flags; + + vhost = platform_get_drvdata(&vhba_platform_device); + + vhba_device_get(vdev); + + spin_lock_irqsave(&vhost->dev_lock, flags); + if (vhost->num_devices >= VHBA_MAX_DEVICES) { + spin_unlock_irqrestore(&vhost->dev_lock, flags); + vhba_device_put(vdev); + return -EBUSY; + } + + for (devnum = 0; devnum < VHBA_MAX_DEVICES; devnum++) { + if (vhost->devices[devnum] == NULL) { + vdev->num = devnum; + vhost->devices[devnum] = vdev; + vhost->num_devices++; + set_bit(devnum, vhost->chgmap); + vhost->chgtype[devnum]++; + break; + } + } + spin_unlock_irqrestore(&vhost->dev_lock, flags); + + schedule_work(&vhost->scan_devices); + + return 0; +} + +static int vhba_remove_device (struct vhba_device *vdev) +{ + struct vhba_host *vhost; + unsigned long flags; + + vhost = platform_get_drvdata(&vhba_platform_device); + + spin_lock_irqsave(&vhost->dev_lock, flags); + set_bit(vdev->num, vhost->chgmap); + vhost->chgtype[vdev->num]--; + vhost->devices[vdev->num] = NULL; + vhost->num_devices--; + spin_unlock_irqrestore(&vhost->dev_lock, flags); + + vhba_device_put(vdev); + + schedule_work(&vhost->scan_devices); + + return 0; +} + +static struct vhba_device *vhba_lookup_device (int devnum) +{ + struct vhba_host *vhost; + struct vhba_device *vdev = NULL; + unsigned long flags; + + vhost = platform_get_drvdata(&vhba_platform_device); + + if (likely(devnum < VHBA_MAX_DEVICES)) { + spin_lock_irqsave(&vhost->dev_lock, flags); + vdev = vhost->devices[devnum]; + if (vdev) { + vdev = vhba_device_get(vdev); + } + + spin_unlock_irqrestore(&vhost->dev_lock, flags); + } + + return vdev; +} + +static struct vhba_command *vhba_alloc_command (void) +{ + struct vhba_host *vhost; + struct vhba_command *vcmd; + unsigned long flags; + int i; + + vhost = platform_get_drvdata(&vhba_platform_device); + + spin_lock_irqsave(&vhost->cmd_lock, flags); + + vcmd = vhost->commands + vhost->cmd_next++; + if (vcmd->status != VHBA_REQ_FREE) { + for (i = 0; i < vhba_can_queue; i++) { + vcmd = vhost->commands + i; + + if (vcmd->status == VHBA_REQ_FREE) { + vhost->cmd_next = i + 1; + break; + } + } + + if (i == vhba_can_queue) { + vcmd = NULL; + } + } + + if (vcmd) { + vcmd->status = VHBA_REQ_PENDING; + } + + vhost->cmd_next %= vhba_can_queue; + + spin_unlock_irqrestore(&vhost->cmd_lock, flags); + + return vcmd; +} + +static void vhba_free_command (struct vhba_command *vcmd) +{ + struct vhba_host *vhost; + unsigned long flags; + + vhost = platform_get_drvdata(&vhba_platform_device); + + spin_lock_irqsave(&vhost->cmd_lock, flags); + vcmd->status = VHBA_REQ_FREE; + spin_unlock_irqrestore(&vhost->cmd_lock, flags); +} + +static int vhba_queuecommand (struct Scsi_Host *shost, struct scsi_cmnd *cmd) +{ + struct vhba_device *vdev; + int retval; + unsigned int devnum; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 15, 0) + scmd_dbg(cmd, "queue %p tag %i\n", cmd, scsi_cmd_to_rq(cmd)->tag); +#else + scmd_dbg(cmd, "queue %p tag %i\n", cmd, cmd->request->tag); +#endif + + devnum = bus_and_id_to_devnum(cmd->device->channel, cmd->device->id); + vdev = vhba_lookup_device(devnum); + if (!vdev) { + scmd_dbg(cmd, "no such device\n"); + + cmd->result = DID_NO_CONNECT << 16; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 16, 0) + scsi_done(cmd); +#else + cmd->scsi_done(cmd); +#endif + + return 0; + } + + retval = vhba_device_queue(vdev, cmd); + + vhba_device_put(vdev); + + return retval; +} + +static int vhba_abort (struct scsi_cmnd *cmd) +{ + struct vhba_device *vdev; + int retval = SUCCESS; + unsigned int devnum; + + scmd_dbg(cmd, "abort %p\n", cmd); + + devnum = bus_and_id_to_devnum(cmd->device->channel, cmd->device->id); + vdev = vhba_lookup_device(devnum); + if (vdev) { + retval = vhba_device_dequeue(vdev, cmd); + vhba_device_put(vdev); + } else { + cmd->result = DID_NO_CONNECT << 16; + } + + return retval; +} + +static struct scsi_host_template vhba_template = { + .module = THIS_MODULE, + .name = "vhba", + .proc_name = "vhba", + .queuecommand = vhba_queuecommand, + .eh_abort_handler = vhba_abort, + .this_id = -1, + .max_sectors = VHBA_MAX_SECTORS_PER_IO, + .sg_tablesize = 256, +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0) + .slave_alloc = vhba_slave_alloc, +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0) && LINUX_VERSION_CODE < KERNEL_VERSION(6, 14, 0) + .tag_alloc_policy = BLK_TAG_ALLOC_RR, +#else + .tag_alloc_policy_rr = true, +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) + .use_blk_tags = 1, +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0) + .max_segment_size = VHBA_KBUF_SIZE, +#endif +}; + +static ssize_t do_request (struct vhba_device *vdev, unsigned long metatag, struct scsi_cmnd *cmd, char __user *buf, size_t buf_len) +{ + struct vhba_request vreq; + ssize_t ret; + + scmd_dbg(cmd, "request %lu (%p), cdb 0x%x, bufflen %d, sg count %d\n", + metatag, cmd, cmd->cmnd[0], scsi_bufflen(cmd), scsi_sg_count(cmd)); + + ret = sizeof(vreq); + if (DATA_TO_DEVICE(cmd->sc_data_direction)) { + ret += scsi_bufflen(cmd); + } + + if (ret > buf_len) { + scmd_dbg(cmd, "buffer too small (%zd < %zd) for a request\n", buf_len, ret); + return -EIO; + } + + vreq.metatag = metatag; + vreq.lun = cmd->device->lun; + memcpy(vreq.cdb, cmd->cmnd, MAX_COMMAND_SIZE); + vreq.cdb_len = cmd->cmd_len; + vreq.data_len = scsi_bufflen(cmd); + + if (copy_to_user(buf, &vreq, sizeof(vreq))) { + return -EFAULT; + } + + if (DATA_TO_DEVICE(cmd->sc_data_direction) && vreq.data_len) { + buf += sizeof(vreq); + + if (scsi_sg_count(cmd)) { + unsigned char *kaddr, *uaddr; + struct scatterlist *sglist = scsi_sglist(cmd); + struct scatterlist *sg; + int i; + + uaddr = (unsigned char *) buf; + + for_each_sg(sglist, sg, scsi_sg_count(cmd), i) { + size_t len = sg->length; + + if (len > vdev->kbuf_size) { + scmd_dbg(cmd, "segment size (%zu) exceeds kbuf size (%zu)!", len, vdev->kbuf_size); + len = vdev->kbuf_size; + } + + kaddr = kmap_atomic(sg_page(sg)); + memcpy(vdev->kbuf, kaddr + sg->offset, len); + kunmap_atomic(kaddr); + + if (copy_to_user(uaddr, vdev->kbuf, len)) { + return -EFAULT; + } + uaddr += len; + } + } else { + if (copy_to_user(buf, scsi_sglist(cmd), vreq.data_len)) { + return -EFAULT; + } + } + } + + return ret; +} + +static ssize_t do_response (struct vhba_device *vdev, unsigned long metatag, struct scsi_cmnd *cmd, const char __user *buf, size_t buf_len, struct vhba_response *res) +{ + ssize_t ret = 0; + + scmd_dbg(cmd, "response %lu (%p), status %x, data len %d, sg count %d\n", + metatag, cmd, res->status, res->data_len, scsi_sg_count(cmd)); + + if (res->status) { + if (res->data_len > SCSI_SENSE_BUFFERSIZE) { + scmd_dbg(cmd, "truncate sense (%d < %d)", SCSI_SENSE_BUFFERSIZE, res->data_len); + res->data_len = SCSI_SENSE_BUFFERSIZE; + } + + if (copy_from_user(cmd->sense_buffer, buf, res->data_len)) { + return -EFAULT; + } + + cmd->result = res->status; + + ret += res->data_len; + } else if (DATA_FROM_DEVICE(cmd->sc_data_direction) && scsi_bufflen(cmd)) { + size_t to_read; + + if (res->data_len > scsi_bufflen(cmd)) { + scmd_dbg(cmd, "truncate data (%d < %d)\n", scsi_bufflen(cmd), res->data_len); + res->data_len = scsi_bufflen(cmd); + } + + to_read = res->data_len; + + if (scsi_sg_count(cmd)) { + unsigned char *kaddr, *uaddr; + struct scatterlist *sglist = scsi_sglist(cmd); + struct scatterlist *sg; + int i; + + uaddr = (unsigned char *)buf; + + for_each_sg(sglist, sg, scsi_sg_count(cmd), i) { + size_t len = (sg->length < to_read) ? sg->length : to_read; + + if (len > vdev->kbuf_size) { + scmd_dbg(cmd, "segment size (%zu) exceeds kbuf size (%zu)!", len, vdev->kbuf_size); + len = vdev->kbuf_size; + } + + if (copy_from_user(vdev->kbuf, uaddr, len)) { + return -EFAULT; + } + uaddr += len; + + kaddr = kmap_atomic(sg_page(sg)); + memcpy(kaddr + sg->offset, vdev->kbuf, len); + kunmap_atomic(kaddr); + + to_read -= len; + if (to_read == 0) { + break; + } + } + } else { + if (copy_from_user(scsi_sglist(cmd), buf, res->data_len)) { + return -EFAULT; + } + + to_read -= res->data_len; + } + + scsi_set_resid(cmd, to_read); + + ret += res->data_len - to_read; + } + + return ret; +} + +static struct vhba_command *next_command (struct vhba_device *vdev) +{ + struct vhba_command *vcmd; + + list_for_each_entry(vcmd, &vdev->cmd_list, entry) { + if (vcmd->status == VHBA_REQ_PENDING) { + break; + } + } + + if (&vcmd->entry == &vdev->cmd_list) { + vcmd = NULL; + } + + return vcmd; +} + +static struct vhba_command *match_command (struct vhba_device *vdev, __u32 metatag) +{ + struct vhba_command *vcmd; + + list_for_each_entry(vcmd, &vdev->cmd_list, entry) { + if (vcmd->metatag == metatag) { + break; + } + } + + if (&vcmd->entry == &vdev->cmd_list) { + vcmd = NULL; + } + + return vcmd; +} + +static struct vhba_command *wait_command (struct vhba_device *vdev, unsigned long flags) +{ + struct vhba_command *vcmd; + DEFINE_WAIT(wait); + + while (!(vcmd = next_command(vdev))) { + if (signal_pending(current)) { + break; + } + + prepare_to_wait(&vdev->cmd_wq, &wait, TASK_INTERRUPTIBLE); + + spin_unlock_irqrestore(&vdev->cmd_lock, flags); + + schedule(); + + spin_lock_irqsave(&vdev->cmd_lock, flags); + } + + finish_wait(&vdev->cmd_wq, &wait); + if (vcmd) { + vcmd->status = VHBA_REQ_READING; + } + + return vcmd; +} + +static ssize_t vhba_ctl_read (struct file *file, char __user *buf, size_t buf_len, loff_t *offset) +{ + struct vhba_device *vdev; + struct vhba_command *vcmd; + ssize_t ret; + unsigned long flags; + + vdev = file->private_data; + + /* Get next command */ + if (file->f_flags & O_NONBLOCK) { + /* Non-blocking variant */ + spin_lock_irqsave(&vdev->cmd_lock, flags); + vcmd = next_command(vdev); + spin_unlock_irqrestore(&vdev->cmd_lock, flags); + + if (!vcmd) { + return -EWOULDBLOCK; + } + } else { + /* Blocking variant */ + spin_lock_irqsave(&vdev->cmd_lock, flags); + vcmd = wait_command(vdev, flags); + spin_unlock_irqrestore(&vdev->cmd_lock, flags); + + if (!vcmd) { + return -ERESTARTSYS; + } + } + + ret = do_request(vdev, vcmd->metatag, vcmd->cmd, buf, buf_len); + + spin_lock_irqsave(&vdev->cmd_lock, flags); + if (ret >= 0) { + vcmd->status = VHBA_REQ_SENT; + *offset += ret; + } else { + vcmd->status = VHBA_REQ_PENDING; + } + + spin_unlock_irqrestore(&vdev->cmd_lock, flags); + + return ret; +} + +static ssize_t vhba_ctl_write (struct file *file, const char __user *buf, size_t buf_len, loff_t *offset) +{ + struct vhba_device *vdev; + struct vhba_command *vcmd; + struct vhba_response res; + ssize_t ret; + unsigned long flags; + + if (buf_len < sizeof(res)) { + return -EIO; + } + + if (copy_from_user(&res, buf, sizeof(res))) { + return -EFAULT; + } + + vdev = file->private_data; + + spin_lock_irqsave(&vdev->cmd_lock, flags); + vcmd = match_command(vdev, res.metatag); + if (!vcmd || vcmd->status != VHBA_REQ_SENT) { + spin_unlock_irqrestore(&vdev->cmd_lock, flags); + pr_debug("ctl dev #%u not expecting response\n", vdev->num); + return -EIO; + } + vcmd->status = VHBA_REQ_WRITING; + spin_unlock_irqrestore(&vdev->cmd_lock, flags); + + ret = do_response(vdev, vcmd->metatag, vcmd->cmd, buf + sizeof(res), buf_len - sizeof(res), &res); + + spin_lock_irqsave(&vdev->cmd_lock, flags); + if (ret >= 0) { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 16, 0) + scsi_done(vcmd->cmd); +#else + vcmd->cmd->scsi_done(vcmd->cmd); +#endif + ret += sizeof(res); + + /* don't compete with vhba_device_dequeue */ + if (!list_empty(&vcmd->entry)) { + list_del_init(&vcmd->entry); + vhba_free_command(vcmd); + } + } else { + vcmd->status = VHBA_REQ_SENT; + } + + spin_unlock_irqrestore(&vdev->cmd_lock, flags); + + return ret; +} + +static long vhba_ctl_ioctl (struct file *file, unsigned int cmd, unsigned long arg) +{ + struct vhba_device *vdev = file->private_data; + struct vhba_host *vhost = platform_get_drvdata(&vhba_platform_device); + + switch (cmd) { + case 0xBEEF001: { + unsigned int ident[4]; /* host, channel, id, lun */ + + ident[0] = vhost->shost->host_no; + devnum_to_bus_and_id(vdev->num, &ident[1], &ident[2]); + ident[3] = 0; /* lun */ + + if (copy_to_user((void *) arg, ident, sizeof(ident))) { + return -EFAULT; + } + + return 0; + } + case 0xBEEF002: { + unsigned int devnum = vdev->num; + + if (copy_to_user((void *) arg, &devnum, sizeof(devnum))) { + return -EFAULT; + } + + return 0; + } + } + + return -ENOTTY; +} + +#ifdef CONFIG_COMPAT +static long vhba_ctl_compat_ioctl (struct file *file, unsigned int cmd, unsigned long arg) +{ + unsigned long compat_arg = (unsigned long)compat_ptr(arg); + return vhba_ctl_ioctl(file, cmd, compat_arg); +} +#endif + +static unsigned int vhba_ctl_poll (struct file *file, poll_table *wait) +{ + struct vhba_device *vdev = file->private_data; + unsigned int mask = 0; + unsigned long flags; + + poll_wait(file, &vdev->cmd_wq, wait); + + spin_lock_irqsave(&vdev->cmd_lock, flags); + if (next_command(vdev)) { + mask |= POLLIN | POLLRDNORM; + } + spin_unlock_irqrestore(&vdev->cmd_lock, flags); + + return mask; +} + +static int vhba_ctl_open (struct inode *inode, struct file *file) +{ + struct vhba_device *vdev; + int retval; + + pr_debug("ctl dev open\n"); + + /* check if vhba is probed */ + if (!platform_get_drvdata(&vhba_platform_device)) { + return -ENODEV; + } + + vdev = vhba_device_alloc(); + if (!vdev) { + return -ENOMEM; + } + + vdev->kbuf_size = VHBA_KBUF_SIZE; + vdev->kbuf = kzalloc(vdev->kbuf_size, GFP_KERNEL); + if (!vdev->kbuf) { + return -ENOMEM; + } + + if (!(retval = vhba_add_device(vdev))) { + file->private_data = vdev; + } + + vhba_device_put(vdev); + + return retval; +} + +static int vhba_ctl_release (struct inode *inode, struct file *file) +{ + struct vhba_device *vdev; + struct vhba_command *vcmd; + unsigned long flags; + + vdev = file->private_data; + + pr_debug("ctl dev release\n"); + + vhba_device_get(vdev); + vhba_remove_device(vdev); + + spin_lock_irqsave(&vdev->cmd_lock, flags); + list_for_each_entry(vcmd, &vdev->cmd_list, entry) { + WARN_ON(vcmd->status == VHBA_REQ_READING || vcmd->status == VHBA_REQ_WRITING); + + scmd_dbg(vcmd->cmd, "device released with command %lu (%p)\n", vcmd->metatag, vcmd->cmd); + vcmd->cmd->result = DID_NO_CONNECT << 16; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 16, 0) + scsi_done(vcmd->cmd); +#else + vcmd->cmd->scsi_done(vcmd->cmd); +#endif + vhba_free_command(vcmd); + } + INIT_LIST_HEAD(&vdev->cmd_list); + spin_unlock_irqrestore(&vdev->cmd_lock, flags); + + kfree(vdev->kbuf); + vdev->kbuf = NULL; + + vhba_device_put(vdev); + + return 0; +} + +static struct file_operations vhba_ctl_fops = { + .owner = THIS_MODULE, + .open = vhba_ctl_open, + .release = vhba_ctl_release, + .read = vhba_ctl_read, + .write = vhba_ctl_write, + .poll = vhba_ctl_poll, + .unlocked_ioctl = vhba_ctl_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = vhba_ctl_compat_ioctl, +#endif +}; + +static struct miscdevice vhba_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "vhba_ctl", + .fops = &vhba_ctl_fops, +}; + +static int vhba_probe (struct platform_device *pdev) +{ + struct Scsi_Host *shost; + struct vhba_host *vhost; + int i; + + vhba_can_queue = clamp(vhba_can_queue, 1, 256); + + shost = scsi_host_alloc(&vhba_template, sizeof(struct vhba_host)); + if (!shost) { + return -ENOMEM; + } + + shost->max_channel = VHBA_MAX_BUS-1; + shost->max_id = VHBA_MAX_ID; + /* we don't support lun > 0 */ + shost->max_lun = 1; + shost->max_cmd_len = MAX_COMMAND_SIZE; + shost->can_queue = vhba_can_queue; + shost->cmd_per_lun = vhba_can_queue; + + vhost = (struct vhba_host *)shost->hostdata; + memset(vhost, 0, sizeof(struct vhba_host)); + + vhost->shost = shost; + vhost->num_devices = 0; + spin_lock_init(&vhost->dev_lock); + spin_lock_init(&vhost->cmd_lock); + INIT_WORK(&vhost->scan_devices, vhba_scan_devices); + vhost->cmd_next = 0; + vhost->commands = kzalloc(vhba_can_queue * sizeof(struct vhba_command), GFP_KERNEL); + if (!vhost->commands) { + return -ENOMEM; + } + + for (i = 0; i < vhba_can_queue; i++) { + vhost->commands[i].status = VHBA_REQ_FREE; + } + + platform_set_drvdata(pdev, vhost); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) + i = scsi_init_shared_tag_map(shost, vhba_can_queue); + if (i) return i; +#endif + + if (scsi_add_host(shost, &pdev->dev)) { + scsi_host_put(shost); + return -ENOMEM; + } + + return 0; +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 11, 0) +static int vhba_remove (struct platform_device *pdev) +#else +static void vhba_remove (struct platform_device *pdev) +#endif +{ + struct vhba_host *vhost; + struct Scsi_Host *shost; + + vhost = platform_get_drvdata(pdev); + shost = vhost->shost; + + scsi_remove_host(shost); + scsi_host_put(shost); + + kfree(vhost->commands); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 11, 0) + return 0; +#endif +} + +static void vhba_release (struct device * dev) +{ + return; +} + +static struct platform_device vhba_platform_device = { + .name = "vhba", + .id = -1, + .dev = { + .release = vhba_release, + }, +}; + +static struct platform_driver vhba_platform_driver = { + .driver = { + .owner = THIS_MODULE, + .name = "vhba", + }, + .probe = vhba_probe, + .remove = vhba_remove, +}; + +static int __init vhba_init (void) +{ + int ret; + + ret = platform_device_register(&vhba_platform_device); + if (ret < 0) { + return ret; + } + + ret = platform_driver_register(&vhba_platform_driver); + if (ret < 0) { + platform_device_unregister(&vhba_platform_device); + return ret; + } + + ret = misc_register(&vhba_miscdev); + if (ret < 0) { + platform_driver_unregister(&vhba_platform_driver); + platform_device_unregister(&vhba_platform_device); + return ret; + } + + return 0; +} + +static void __exit vhba_exit(void) +{ + misc_deregister(&vhba_miscdev); + platform_driver_unregister(&vhba_platform_driver); + platform_device_unregister(&vhba_platform_device); +} + +module_init(vhba_init); +module_exit(vhba_exit); + From 18ff7a3932e99618172aae097a1cdebc6b137861 Mon Sep 17 00:00:00 2001 From: Eric Naim Date: Sun, 1 Dec 2024 03:41:35 +0800 Subject: [PATCH 2859/2924] CACHY: Use BFQ as the elevator for SQ devices Signed-off-by: Eric Naim --- block/elevator.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/block/elevator.c b/block/elevator.c index dc4cadef728e55..dadd5abca3a173 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -560,7 +560,11 @@ static struct elevator_type *elevator_get_default(struct request_queue *q) !blk_mq_is_shared_tags(q->tag_set->flags)) return NULL; +#if defined(CONFIG_CACHY) && defined(CONFIG_IOSCHED_BFQ) + return elevator_find_get("bfq"); +#else return elevator_find_get("mq-deadline"); +#endif } /* From 2ea2f38ea34395f52780add2c0924320ce25085b Mon Sep 17 00:00:00 2001 From: Eric Naim Date: Sun, 1 Dec 2024 03:42:36 +0800 Subject: [PATCH 2860/2924] CACHY: Use mq-deadline as the elevator for MQ devices Signed-off-by: Eric Naim --- block/elevator.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/block/elevator.c b/block/elevator.c index dadd5abca3a173..d82ce6f13478e9 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -558,7 +558,11 @@ static struct elevator_type *elevator_get_default(struct request_queue *q) if (q->nr_hw_queues != 1 && !blk_mq_is_shared_tags(q->tag_set->flags)) +#if defined(CONFIG_CACHY) + return elevator_find_get("mq-deadline"); +#else return NULL; +#endif #if defined(CONFIG_CACHY) && defined(CONFIG_IOSCHED_BFQ) return elevator_find_get("bfq"); From 5bcd704defbfdccc2bde2a1f55b9601514d11f65 Mon Sep 17 00:00:00 2001 From: Vasiliy Stelmachenok Date: Tue, 31 Oct 2023 11:04:44 +0300 Subject: [PATCH 2861/2924] Kconfig: Remove CONFIG_EXPERT dependency from PREEMPT_RT It's just a headache to have a bunch of garbage in your config for the sake of enabling one option. It makes it much easier to keep our config and maintain it relative to other cachy kernels. Signed-off-by: Eric Naim --- kernel/Kconfig.preempt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index 54ea59ff8fbeb6..18f87e0dd137be 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -88,7 +88,7 @@ endchoice config PREEMPT_RT bool "Fully Preemptible Kernel (Real-Time)" - depends on EXPERT && ARCH_SUPPORTS_RT && !COMPILE_TEST + depends on ARCH_SUPPORTS_RT && !COMPILE_TEST select PREEMPTION help This option turns the kernel into a real-time kernel by replacing From e885dcf144dda80197fcb3638d921ac5fae96fee Mon Sep 17 00:00:00 2001 From: Masahito S Date: Tue, 4 Feb 2025 19:13:20 +0900 Subject: [PATCH 2862/2924] ADIOS v1.5.4 --- block/Kconfig.iosched | 9 + block/Makefile | 8 + block/adios.c | 1342 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1359 insertions(+) create mode 100644 block/adios.c diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 27f11320b8d12d..79fd5da5dd169a 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -16,6 +16,15 @@ config MQ_IOSCHED_KYBER synchronous writes, it will self-tune queue depths to achieve that goal. +config MQ_IOSCHED_ADIOS + tristate "Adaptive Deadline I/O scheduler" + default m + help + ADIOS is a multi-queue I/O scheduler for the Linux kernel, based on + mq-deadline and Kyber, with learning-based adaptive latency control. + It aims to provide low latency for synchronous requests while + maintaining high throughput for asynchronous requests and bulk I/O. + config IOSCHED_BFQ tristate "BFQ I/O scheduler" select BLK_ICQ diff --git a/block/Makefile b/block/Makefile index 3a941dc0d27fb2..94e3bf608bd2d1 100644 --- a/block/Makefile +++ b/block/Makefile @@ -23,6 +23,7 @@ obj-$(CONFIG_BLK_CGROUP_IOLATENCY) += blk-iolatency.o obj-$(CONFIG_BLK_CGROUP_IOCOST) += blk-iocost.o obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o obj-$(CONFIG_MQ_IOSCHED_KYBER) += kyber-iosched.o +obj-$(CONFIG_MQ_IOSCHED_ADIOS) += adios.o bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o obj-$(CONFIG_IOSCHED_BFQ) += bfq.o @@ -37,3 +38,10 @@ obj-$(CONFIG_BLK_INLINE_ENCRYPTION) += blk-crypto.o blk-crypto-profile.o \ blk-crypto-sysfs.o obj-$(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) += blk-crypto-fallback.o obj-$(CONFIG_BLOCK_HOLDER_DEPRECATED) += holder.o + +all: + make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules + +clean: + make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean + diff --git a/block/adios.c b/block/adios.c new file mode 100644 index 00000000000000..a35fcf6e45697c --- /dev/null +++ b/block/adios.c @@ -0,0 +1,1342 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * The Adaptive Deadline I/O Scheduler (ADIOS) + * Based on mq-deadline and Kyber, + * with learning-based adaptive latency control + * + * Copyright (C) 2025 Masahito Suzuki + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "elevator.h" +#include "blk.h" +#include "blk-mq.h" +#include "blk-mq-sched.h" + +#define ADIOS_VERSION "1.5.4" + +// Define operation types supported by ADIOS +enum adios_op_type { + ADIOS_READ = 0, + ADIOS_WRITE = 1, + ADIOS_DISCARD = 2, + ADIOS_OTHER = 3, + ADIOS_OPTYPES = 4, +}; + +// Global variable to control the latency +static u64 default_global_latency_window = 16000000ULL; +// Ratio below which batch queues should be refilled +static u8 default_bq_refill_below_ratio = 15; + +// Dynamic thresholds for shrinkage +static u32 default_lm_shrink_at_kreqs = 10000; +static u32 default_lm_shrink_at_gbytes = 100; +static u32 default_lm_shrink_resist = 2; + +// Latency targets for each operation type +static u64 default_latency_target[ADIOS_OPTYPES] = { + [ADIOS_READ] = 1ULL * NSEC_PER_MSEC, + [ADIOS_WRITE] = 2000ULL * NSEC_PER_MSEC, + [ADIOS_DISCARD] = 8000ULL * NSEC_PER_MSEC, + [ADIOS_OTHER] = 0ULL * NSEC_PER_MSEC, +}; + +// Maximum batch size limits for each operation type +static u32 default_batch_limit[ADIOS_OPTYPES] = { + [ADIOS_READ] = 24, + [ADIOS_WRITE] = 48, + [ADIOS_DISCARD] = 1, + [ADIOS_OTHER] = 1, +}; + +static u32 default_dl_prio[2] = {7, 0}; + +// Thresholds for latency model control +#define LM_BLOCK_SIZE_THRESHOLD 4096 +#define LM_SAMPLES_THRESHOLD 1024 +#define LM_INTERVAL_THRESHOLD 1500 +#define LM_OUTLIER_PERCENTILE 99 +#define LM_LAT_BUCKET_COUNT 64 + +// Structure to hold latency bucket data for small requests +struct latency_bucket_small { + u64 sum_latency; + u32 count; +}; + +// Structure to hold latency bucket data for large requests +struct latency_bucket_large { + u64 sum_latency; + u64 sum_block_size; + u32 count; +}; + +// Structure to hold the latency model context data +struct latency_model { + spinlock_t lock; + u64 base; + u64 slope; + u64 small_sum_delay; + u64 small_count; + u64 large_sum_delay; + u64 large_sum_bsize; + u64 last_update_jiffies; + + spinlock_t buckets_lock; + struct latency_bucket_small small_bucket[LM_LAT_BUCKET_COUNT]; + struct latency_bucket_large large_bucket[LM_LAT_BUCKET_COUNT]; + + u32 lm_shrink_at_kreqs; + u32 lm_shrink_at_gbytes; + u8 lm_shrink_resist; +}; + +#define ADIOS_BQ_PAGES 2 + +// Adios scheduler data +struct adios_data { + spinlock_t pq_lock; + struct list_head prio_queue; + + struct rb_root_cached dl_tree[2]; + spinlock_t lock; + u8 dl_queued; + s64 dl_bias; + s32 dl_prio[2]; + + u64 global_latency_window; + u64 latency_target[ADIOS_OPTYPES]; + u32 batch_limit[ADIOS_OPTYPES]; + u32 batch_actual_max_size[ADIOS_OPTYPES]; + u32 batch_actual_max_total; + u32 async_depth; + u8 bq_refill_below_ratio; + + u8 bq_page; + bool more_bq_ready; + struct list_head batch_queue[ADIOS_BQ_PAGES][ADIOS_OPTYPES]; + u32 batch_count[ADIOS_BQ_PAGES][ADIOS_OPTYPES]; + spinlock_t bq_lock; + + struct latency_model latency_model[ADIOS_OPTYPES]; + struct timer_list update_timer; + + atomic64_t total_pred_lat; + + struct kmem_cache *rq_data_pool; + struct kmem_cache *dl_group_pool; +}; + +// List of requests with the same deadline in the deadline-sorted tree +struct dl_group { + struct rb_node node; + struct list_head rqs; + u64 deadline; +} __attribute__((aligned(64))); + +// Structure to hold scheduler-specific data for each request +struct adios_rq_data { + struct list_head *dl_group; + struct list_head dl_node; + + struct request *rq; + u64 deadline; + u64 pred_lat; + u32 block_size; +} __attribute__((aligned(64))); + +static const int adios_prio_to_weight[40] = { + /* -20 */ 88761, 71755, 56483, 46273, 36291, + /* -15 */ 29154, 23254, 18705, 14949, 11916, + /* -10 */ 9548, 7620, 6100, 4904, 3906, + /* -5 */ 3121, 2501, 1991, 1586, 1277, + /* 0 */ 1024, 820, 655, 526, 423, + /* 5 */ 335, 272, 215, 172, 137, + /* 10 */ 110, 87, 70, 56, 45, + /* 15 */ 36, 29, 23, 18, 15, +}; + +// Count the number of entries in small buckets +static u32 lm_count_small_entries(struct latency_model *model) { + u32 total_count = 0; + for (u8 i = 0; i < LM_LAT_BUCKET_COUNT; i++) + total_count += model->small_bucket[i].count; + return total_count; +} + +// Update the small buckets in the latency model +static bool lm_update_small_buckets(struct latency_model *model, + u32 total_count, bool count_all) { + u64 sum_latency = 0; + u32 sum_count = 0; + u32 cumulative_count = 0, threshold_count = 0; + u8 outlier_threshold_bucket = 0; + u8 outlier_percentile = LM_OUTLIER_PERCENTILE; + u8 reduction; + + if (count_all) + outlier_percentile = 100; + + // Calculate the threshold count for outlier detection + threshold_count = (total_count * outlier_percentile) / 100; + + // Identify the bucket that corresponds to the outlier threshold + for (u8 i = 0; i < LM_LAT_BUCKET_COUNT; i++) { + cumulative_count += model->small_bucket[i].count; + if (cumulative_count >= threshold_count) { + outlier_threshold_bucket = i; + break; + } + } + + // Calculate the average latency, excluding outliers + for (u8 i = 0; i <= outlier_threshold_bucket; i++) { + struct latency_bucket_small *bucket = &model->small_bucket[i]; + if (i < outlier_threshold_bucket) { + sum_latency += bucket->sum_latency; + sum_count += bucket->count; + } else { + // The threshold bucket's contribution is proportional + u64 remaining_count = + threshold_count - (cumulative_count - bucket->count); + if (bucket->count > 0) { + sum_latency += + (bucket->sum_latency * remaining_count) / bucket->count; + sum_count += remaining_count; + } + } + } + + // Shrink the model if it reaches at the readjustment threshold + if (model->small_count >= 1000ULL * model->lm_shrink_at_kreqs) { + reduction = model->lm_shrink_resist; + if (model->small_count >> reduction) { + model->small_sum_delay -= model->small_sum_delay >> reduction; + model->small_count -= model->small_count >> reduction; + } + } + + // Accumulate the average latency into the statistics + model->small_sum_delay += sum_latency; + model->small_count += sum_count; + + // Reset small bucket information + memset(model->small_bucket, 0, + sizeof(model->small_bucket[0]) * LM_LAT_BUCKET_COUNT); + + return true; +} + +// Count the number of entries in large buckets +static u32 lm_count_large_entries(struct latency_model *model) { + u32 total_count = 0; + for (u8 i = 0; i < LM_LAT_BUCKET_COUNT; i++) + total_count += model->large_bucket[i].count; + return total_count; +} + +// Update the large buckets in the latency model +static bool lm_update_large_buckets( + struct latency_model *model, + u32 total_count, bool count_all) { + s64 sum_latency = 0; + u64 sum_block_size = 0, intercept; + u32 cumulative_count = 0, threshold_count = 0; + u8 outlier_threshold_bucket = 0; + u8 outlier_percentile = LM_OUTLIER_PERCENTILE; + u8 reduction; + + if (count_all) + outlier_percentile = 100; + + // Calculate the threshold count for outlier detection + threshold_count = (total_count * outlier_percentile) / 100; + + // Identify the bucket that corresponds to the outlier threshold + for (u8 i = 0; i < LM_LAT_BUCKET_COUNT; i++) { + cumulative_count += model->large_bucket[i].count; + if (cumulative_count >= threshold_count) { + outlier_threshold_bucket = i; + break; + } + } + + // Calculate the average latency and block size, excluding outliers + for (u8 i = 0; i <= outlier_threshold_bucket; i++) { + struct latency_bucket_large *bucket = &model->large_bucket[i]; + if (i < outlier_threshold_bucket) { + sum_latency += bucket->sum_latency; + sum_block_size += bucket->sum_block_size; + } else { + // The threshold bucket's contribution is proportional + u64 remaining_count = + threshold_count - (cumulative_count - bucket->count); + if (bucket->count > 0) { + sum_latency += + (bucket->sum_latency * remaining_count) / bucket->count; + sum_block_size += + (bucket->sum_block_size * remaining_count) / bucket->count; + } + } + } + + // Shrink the model if it reaches at the readjustment threshold + if (model->large_sum_bsize >= 0x40000000ULL * model->lm_shrink_at_gbytes) { + reduction = model->lm_shrink_resist; + if (model->large_sum_bsize >> reduction) { + model->large_sum_delay -= model->large_sum_delay >> reduction; + model->large_sum_bsize -= model->large_sum_bsize >> reduction; + } + } + + // Accumulate the average delay into the statistics + intercept = model->base * threshold_count; + if (sum_latency > intercept) + sum_latency -= intercept; + + model->large_sum_delay += sum_latency; + model->large_sum_bsize += sum_block_size; + + // Reset large bucket information + memset(model->large_bucket, 0, + sizeof(model->large_bucket[0]) * LM_LAT_BUCKET_COUNT); + + return true; +} + +// Update the latency model parameters and statistics +static void latency_model_update(struct latency_model *model) { + unsigned long flags; + u64 now; + u32 small_count, large_count; + bool time_elapsed; + bool small_processed = false, large_processed = false; + + guard(spinlock_irqsave)(&model->lock); + + spin_lock_irqsave(&model->buckets_lock, flags); + + // Whether enough time has elapsed since the last update + now = jiffies; + time_elapsed = unlikely(!model->base) || model->last_update_jiffies + + msecs_to_jiffies(LM_INTERVAL_THRESHOLD) <= now; + + // Count the number of entries in buckets + small_count = lm_count_small_entries(model); + large_count = lm_count_large_entries(model); + + // Update small buckets + if (small_count && (time_elapsed || + LM_SAMPLES_THRESHOLD <= small_count || !model->base)) + small_processed = lm_update_small_buckets( + model, small_count, !model->base); + // Update large buckets + if (large_count && (time_elapsed || + LM_SAMPLES_THRESHOLD <= large_count || !model->slope)) + large_processed = lm_update_large_buckets( + model, large_count, !model->slope); + + spin_unlock_irqrestore(&model->buckets_lock, flags); + + // Update the base parameter if small bucket was processed + if (small_processed && likely(model->small_count)) + model->base = div_u64(model->small_sum_delay, model->small_count); + + // Update the slope parameter if large bucket was processed + if (large_processed && likely(model->large_sum_bsize)) + model->slope = div_u64(model->large_sum_delay, + DIV_ROUND_UP_ULL(model->large_sum_bsize, 1024)); + + // Reset statistics and update last updated jiffies if time has elapsed + if (time_elapsed) + model->last_update_jiffies = now; +} + +// Determine the bucket index for a given measured and predicted latency +static u8 lm_input_bucket_index( + struct latency_model *model, u64 measured, u64 predicted) { + u8 bucket_index; + + if (measured < predicted * 2) + bucket_index = (measured * 20) / predicted; + else if (measured < predicted * 5) + bucket_index = (measured * 10) / predicted + 20; + else + bucket_index = (measured * 3) / predicted + 40; + + return bucket_index; +} + +// Input latency data into the latency model +static void latency_model_input(struct latency_model *model, + u32 block_size, u64 latency, u64 pred_lat) { + unsigned long flags; + u8 bucket_index; + + spin_lock_irqsave(&model->buckets_lock, flags); + + if (block_size <= LM_BLOCK_SIZE_THRESHOLD) { + // Handle small requests + bucket_index = lm_input_bucket_index(model, latency, model->base ?: 1); + + if (bucket_index >= LM_LAT_BUCKET_COUNT) + bucket_index = LM_LAT_BUCKET_COUNT - 1; + + model->small_bucket[bucket_index].count++; + model->small_bucket[bucket_index].sum_latency += latency; + + if (unlikely(!model->base)) { + spin_unlock_irqrestore(&model->buckets_lock, flags); + latency_model_update(model); + return; + } + } else { + // Handle large requests + if (!model->base || !pred_lat) { + spin_unlock_irqrestore(&model->buckets_lock, flags); + return; + } + + bucket_index = lm_input_bucket_index(model, latency, pred_lat); + + if (bucket_index >= LM_LAT_BUCKET_COUNT) + bucket_index = LM_LAT_BUCKET_COUNT - 1; + + model->large_bucket[bucket_index].count++; + model->large_bucket[bucket_index].sum_latency += latency; + model->large_bucket[bucket_index].sum_block_size += block_size; + } + + spin_unlock_irqrestore(&model->buckets_lock, flags); +} + +// Predict the latency for a given block size using the latency model +static u64 latency_model_predict(struct latency_model *model, u32 block_size) { + u64 result; + + guard(spinlock_irqsave)(&model->lock); + // Predict latency based on the model + result = model->base; + if (block_size > LM_BLOCK_SIZE_THRESHOLD) + result += model->slope * + DIV_ROUND_UP_ULL(block_size - LM_BLOCK_SIZE_THRESHOLD, 1024); + + return result; +} + +// Determine the type of operation based on request flags +static u8 adios_optype(struct request *rq) { + switch (rq->cmd_flags & REQ_OP_MASK) { + case REQ_OP_READ: + return ADIOS_READ; + case REQ_OP_WRITE: + return ADIOS_WRITE; + case REQ_OP_DISCARD: + return ADIOS_DISCARD; + default: + return ADIOS_OTHER; + } +} + +static inline u8 adios_optype_not_read(struct request *rq) { + return (rq->cmd_flags & REQ_OP_MASK) != REQ_OP_READ; +} + +// Helper function to retrieve adios_rq_data from a request +static inline struct adios_rq_data *get_rq_data(struct request *rq) { + return rq->elv.priv[0]; +} + +// Add a request to the deadline-sorted red-black tree +static void add_to_dl_tree( + struct adios_data *ad, bool dl_idx, struct request *rq) { + struct rb_root_cached *root = &ad->dl_tree[dl_idx]; + struct rb_node **link = &(root->rb_root.rb_node), *parent = NULL; + bool leftmost = true; + struct adios_rq_data *rd = get_rq_data(rq); + struct dl_group *dlg; + + rd->block_size = blk_rq_bytes(rq); + u8 optype = adios_optype(rq); + rd->pred_lat = + latency_model_predict(&ad->latency_model[optype], rd->block_size); + rd->deadline = + rq->start_time_ns + ad->latency_target[optype] + rd->pred_lat; + + while (*link) { + dlg = rb_entry(*link, struct dl_group, node); + s64 diff = rd->deadline - dlg->deadline; + + parent = *link; + if (diff < 0) { + link = &((*link)->rb_left); + } else if (diff > 0) { + link = &((*link)->rb_right); + leftmost = false; + } else { // diff == 0 + goto found; + } + } + + dlg = rb_entry_safe(parent, struct dl_group, node); + if (!dlg || dlg->deadline != rd->deadline) { + dlg = kmem_cache_zalloc(ad->dl_group_pool, GFP_ATOMIC); + if (!dlg) + return; + dlg->deadline = rd->deadline; + INIT_LIST_HEAD(&dlg->rqs); + rb_link_node(&dlg->node, parent, link); + rb_insert_color_cached(&dlg->node, root, leftmost); + } +found: + list_add_tail(&rd->dl_node, &dlg->rqs); + rd->dl_group = &dlg->rqs; + ad->dl_queued |= 1 << dl_idx; +} + +// Remove a request from the deadline-sorted red-black tree +static void del_from_dl_tree( + struct adios_data *ad, bool dl_idx, struct request *rq) { + struct rb_root_cached *root = &ad->dl_tree[dl_idx]; + struct adios_rq_data *rd = get_rq_data(rq); + struct dl_group *dlg = container_of(rd->dl_group, struct dl_group, rqs); + + list_del_init(&rd->dl_node); + if (list_empty(&dlg->rqs)) { + rb_erase_cached(&dlg->node, root); + kmem_cache_free(ad->dl_group_pool, dlg); + } + rd->dl_group = NULL; + + if (RB_EMPTY_ROOT(&ad->dl_tree[dl_idx].rb_root)) + ad->dl_queued &= ~(1 << dl_idx); +} + +// Remove a request from the scheduler +static void remove_request(struct adios_data *ad, struct request *rq) { + bool dl_idx = adios_optype_not_read(rq); + struct request_queue *q = rq->q; + struct adios_rq_data *rd = get_rq_data(rq); + + list_del_init(&rq->queuelist); + + // We might not be on the rbtree, if we are doing an insert merge + if (rd->dl_group) + del_from_dl_tree(ad, dl_idx, rq); + + elv_rqhash_del(q, rq); + if (q->last_merge == rq) + q->last_merge = NULL; +} + +// Convert a queue depth to the corresponding word depth for shallow allocation +static int to_word_depth(struct blk_mq_hw_ctx *hctx, unsigned int qdepth) { + struct sbitmap_queue *bt = &hctx->sched_tags->bitmap_tags; + const unsigned int nrr = hctx->queue->nr_requests; + + return ((qdepth << bt->sb.shift) + nrr - 1) / nrr; +} + +// Limit the depth of request allocation for asynchronous and write requests +static void adios_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data) { + struct adios_data *ad = data->q->elevator->elevator_data; + + // Do not throttle synchronous reads + if (op_is_sync(opf) && !op_is_write(opf)) + return; + + data->shallow_depth = to_word_depth(data->hctx, ad->async_depth); +} + +// Update async_depth when the number of requests in the queue changes +static void adios_depth_updated(struct blk_mq_hw_ctx *hctx) { + struct request_queue *q = hctx->queue; + struct adios_data *ad = q->elevator->elevator_data; + struct blk_mq_tags *tags = hctx->sched_tags; + + ad->async_depth = q->nr_requests; + + sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, 1); +} + +// Handle request merging after a merge operation +static void adios_request_merged(struct request_queue *q, struct request *req, + enum elv_merge type) { + bool dl_idx = adios_optype_not_read(req); + struct adios_data *ad = q->elevator->elevator_data; + + // if the merge was a front merge, we need to reposition request + if (type == ELEVATOR_FRONT_MERGE) { + del_from_dl_tree(ad, dl_idx, req); + add_to_dl_tree(ad, dl_idx, req); + } +} + +// Handle merging of requests after one has been merged into another +static void adios_merged_requests(struct request_queue *q, struct request *req, + struct request *next) { + struct adios_data *ad = q->elevator->elevator_data; + + lockdep_assert_held(&ad->lock); + + // kill knowledge of next, this one is a goner + remove_request(ad, next); +} + +// Try to merge a bio into an existing rq before associating it with an rq +static bool adios_bio_merge(struct request_queue *q, struct bio *bio, + unsigned int nr_segs) { + unsigned long flags; + struct adios_data *ad = q->elevator->elevator_data; + struct request *free = NULL; + bool ret; + + spin_lock_irqsave(&ad->lock, flags); + ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free); + spin_unlock_irqrestore(&ad->lock, flags); + + if (free) + blk_mq_free_request(free); + + return ret; +} + +// Insert a request into the scheduler +static void insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, + blk_insert_t insert_flags, struct list_head *free) { + unsigned long flags; + bool dl_idx = adios_optype_not_read(rq); + struct request_queue *q = hctx->queue; + struct adios_data *ad = q->elevator->elevator_data; + + lockdep_assert_held(&ad->lock); + + if (insert_flags & BLK_MQ_INSERT_AT_HEAD) { + spin_lock_irqsave(&ad->pq_lock, flags); + list_add(&rq->queuelist, &ad->prio_queue); + spin_unlock_irqrestore(&ad->pq_lock, flags); + return; + } + + if (blk_mq_sched_try_insert_merge(q, rq, free)) + return; + + add_to_dl_tree(ad, dl_idx, rq); + + if (rq_mergeable(rq)) { + elv_rqhash_add(q, rq); + if (!q->last_merge) + q->last_merge = rq; + } +} + +// Insert multiple requests into the scheduler +static void adios_insert_requests(struct blk_mq_hw_ctx *hctx, + struct list_head *list, + blk_insert_t insert_flags) { + unsigned long flags; + struct request_queue *q = hctx->queue; + struct adios_data *ad = q->elevator->elevator_data; + LIST_HEAD(free); + + spin_lock_irqsave(&ad->lock, flags); + while (!list_empty(list)) { + struct request *rq; + + rq = list_first_entry(list, struct request, queuelist); + list_del_init(&rq->queuelist); + insert_request(hctx, rq, insert_flags, &free); + } + spin_unlock_irqrestore(&ad->lock, flags); + + blk_mq_free_requests(&free); +} + +// Prepare a request before it is inserted into the scheduler +static void adios_prepare_request(struct request *rq) { + struct adios_data *ad = rq->q->elevator->elevator_data; + struct adios_rq_data *rd; + + rq->elv.priv[0] = NULL; + + /* Allocate adios_rq_data from the memory pool */ + rd = kmem_cache_zalloc(ad->rq_data_pool, GFP_ATOMIC); + if (WARN(!rd, "adios_prepare_request: " + "Failed to allocate memory from rq_data_pool. rd is NULL\n")) + return; + + rd->rq = rq; + rq->elv.priv[0] = rd; +} + +static struct adios_rq_data *get_dl_first_rd(struct adios_data *ad, bool idx) { + struct rb_root_cached *root = &ad->dl_tree[idx]; + struct rb_node *first = rb_first_cached(root); + struct dl_group *dl_group = rb_entry(first, struct dl_group, node); + + return list_first_entry(&dl_group->rqs, struct adios_rq_data, dl_node); +} + +// Select the next request to dispatch from the deadline-sorted red-black tree +static struct request *next_request(struct adios_data *ad) { + struct adios_rq_data *rd; + bool dl_idx, bias_idx, reduce_bias; + + if (!ad->dl_queued) + return NULL; + + dl_idx = ad->dl_queued >> 1; + rd = get_dl_first_rd(ad, dl_idx); + + bias_idx = ad->dl_bias < 0; + reduce_bias = (bias_idx == dl_idx); + + if (ad->dl_queued == 0x3) { + struct adios_rq_data *trd[2]; + trd[0] = get_dl_first_rd(ad, 0); + trd[1] = rd; + + rd = trd[bias_idx]; + + reduce_bias = + (trd[bias_idx]->deadline > trd[((u8)bias_idx + 1) % 2]->deadline); + } + + if (reduce_bias) { + s64 sign = ((int)bias_idx << 1) - 1; + if (unlikely(!rd->pred_lat)) + ad->dl_bias = sign; + else { + ad->dl_bias += sign * (s64)((rd->pred_lat * + adios_prio_to_weight[ad->dl_prio[bias_idx] + 20]) >> 10); + } + } + + return rd->rq; +} + +// Reset the batch queue counts for a given page +static void reset_batch_counts(struct adios_data *ad, u8 page) { + memset(&ad->batch_count[page], 0, sizeof(ad->batch_count[page])); +} + +// Initialize all batch queues +static void init_batch_queues(struct adios_data *ad) { + for (u8 page = 0; page < ADIOS_BQ_PAGES; page++) { + reset_batch_counts(ad, page); + + for (u8 optype = 0; optype < ADIOS_OPTYPES; optype++) + INIT_LIST_HEAD(&ad->batch_queue[page][optype]); + } +} + +// Fill the batch queues with requests from the deadline-sorted red-black tree +static bool fill_batch_queues(struct adios_data *ad, u64 current_lat) { + unsigned long flags; + u32 count = 0; + u32 optype_count[ADIOS_OPTYPES] = {0}; + u8 page = (ad->bq_page + 1) % ADIOS_BQ_PAGES; + + reset_batch_counts(ad, page); + + spin_lock_irqsave(&ad->lock, flags); + while (true) { + struct request *rq = next_request(ad); + if (!rq) + break; + + struct adios_rq_data *rd = get_rq_data(rq); + u8 optype = adios_optype(rq); + current_lat += rd->pred_lat; + + // Check batch size and total predicted latency + if (count && (!ad->latency_model[optype].base || + ad->batch_count[page][optype] >= ad->batch_limit[optype] || + current_lat > ad->global_latency_window)) { + break; + } + + remove_request(ad, rq); + + // Add request to the corresponding batch queue + list_add_tail(&rq->queuelist, &ad->batch_queue[page][optype]); + ad->batch_count[page][optype]++; + atomic64_add(rd->pred_lat, &ad->total_pred_lat); + optype_count[optype]++; + count++; + } + spin_unlock_irqrestore(&ad->lock, flags); + + if (count) { + ad->more_bq_ready = true; + for (u8 optype = 0; optype < ADIOS_OPTYPES; optype++) { + if (ad->batch_actual_max_size[optype] < optype_count[optype]) + ad->batch_actual_max_size[optype] = optype_count[optype]; + } + if (ad->batch_actual_max_total < count) + ad->batch_actual_max_total = count; + } + return count; +} + +// Flip to the next batch queue page +static void flip_bq_page(struct adios_data *ad) { + ad->more_bq_ready = false; + ad->bq_page = (ad->bq_page + 1) % ADIOS_BQ_PAGES; +} + +// Dispatch a request from the batch queues +static struct request *dispatch_from_bq(struct adios_data *ad) { + struct request *rq = NULL; + u64 tpl; + + guard(spinlock_irqsave)(&ad->bq_lock); + + tpl = atomic64_read(&ad->total_pred_lat); + + if (!ad->more_bq_ready && (!tpl || + tpl < ad->global_latency_window * ad->bq_refill_below_ratio / 100)) + fill_batch_queues(ad, tpl); + +again: + // Check if there are any requests in the batch queues + for (u8 i = 0; i < ADIOS_OPTYPES; i++) { + if (!list_empty(&ad->batch_queue[ad->bq_page][i])) { + rq = list_first_entry(&ad->batch_queue[ad->bq_page][i], + struct request, queuelist); + list_del_init(&rq->queuelist); + return rq; + } + } + + // If there's more batch queue page available, flip to it and retry + if (ad->more_bq_ready) { + flip_bq_page(ad); + goto again; + } + + return NULL; +} + +// Dispatch a request from the priority queue +static struct request *dispatch_from_pq(struct adios_data *ad) { + struct request *rq = NULL; + + guard(spinlock_irqsave)(&ad->pq_lock); + + if (!list_empty(&ad->prio_queue)) { + rq = list_first_entry(&ad->prio_queue, struct request, queuelist); + list_del_init(&rq->queuelist); + } + return rq; +} + +// Dispatch a request to the hardware queue +static struct request *adios_dispatch_request(struct blk_mq_hw_ctx *hctx) { + struct adios_data *ad = hctx->queue->elevator->elevator_data; + struct request *rq; + + rq = dispatch_from_pq(ad); + if (rq) goto found; + rq = dispatch_from_bq(ad); + if (!rq) return NULL; +found: + rq->rq_flags |= RQF_STARTED; + return rq; +} + +// Timer callback function to periodically update latency models +static void update_timer_callback(struct timer_list *t) { + struct adios_data *ad = from_timer(ad, t, update_timer); + + for (u8 optype = 0; optype < ADIOS_OPTYPES; optype++) + latency_model_update(&ad->latency_model[optype]); +} + +// Handle the completion of a request +static void adios_completed_request(struct request *rq, u64 now) { + struct adios_data *ad = rq->q->elevator->elevator_data; + struct adios_rq_data *rd = get_rq_data(rq); + + atomic64_sub(rd->pred_lat, &ad->total_pred_lat); + + if (!rq->io_start_time_ns || !rd->block_size) + return; + u64 latency = now - rq->io_start_time_ns; + u8 optype = adios_optype(rq); + latency_model_input(&ad->latency_model[optype], + rd->block_size, latency, rd->pred_lat); + timer_reduce(&ad->update_timer, jiffies + msecs_to_jiffies(100)); +} + +// Clean up after a request is finished +static void adios_finish_request(struct request *rq) { + struct adios_data *ad = rq->q->elevator->elevator_data; + + if (rq->elv.priv[0]) { + // Free adios_rq_data back to the memory pool + kmem_cache_free(ad->rq_data_pool, get_rq_data(rq)); + rq->elv.priv[0] = NULL; + } +} + +static inline bool pq_has_work(struct adios_data *ad) { + guard(spinlock_irqsave)(&ad->pq_lock); + return !list_empty(&ad->prio_queue); +} + +static inline bool bq_has_work(struct adios_data *ad) { + guard(spinlock_irqsave)(&ad->bq_lock); + + for (u8 i = 0; i < ADIOS_OPTYPES; i++) + if (!list_empty(&ad->batch_queue[ad->bq_page][i])) + return true; + + return ad->more_bq_ready; +} + +static inline bool dl_tree_has_work(struct adios_data *ad) { + guard(spinlock_irqsave)(&ad->lock); + return ad->dl_queued; +} + +// Check if there are any requests available for dispatch +static bool adios_has_work(struct blk_mq_hw_ctx *hctx) { + struct adios_data *ad = hctx->queue->elevator->elevator_data; + + return pq_has_work(ad) || bq_has_work(ad) || dl_tree_has_work(ad); +} + +// Initialize the scheduler-specific data for a hardware queue +static int adios_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) { + adios_depth_updated(hctx); + return 0; +} + +// Initialize the scheduler-specific data when initializing the request queue +static int adios_init_sched(struct request_queue *q, struct elevator_type *e) { + struct adios_data *ad; + struct elevator_queue *eq; + int ret = -ENOMEM; + + eq = elevator_alloc(q, e); + if (!eq) + return ret; + + ad = kzalloc_node(sizeof(*ad), GFP_KERNEL, q->node); + if (!ad) + goto put_eq; + + // Create a memory pool for adios_rq_data + ad->rq_data_pool = kmem_cache_create("rq_data_pool", + sizeof(struct adios_rq_data), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!ad->rq_data_pool) { + pr_err("adios: Failed to create rq_data_pool\n"); + goto free_ad; + } + + /* Create a memory pool for dl_group */ + ad->dl_group_pool = kmem_cache_create("dl_group_pool", + sizeof(struct dl_group), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!ad->dl_group_pool) { + pr_err("adios: Failed to create dl_group_pool\n"); + goto destroy_rq_data_pool; + } + + eq->elevator_data = ad; + + ad->global_latency_window = default_global_latency_window; + ad->bq_refill_below_ratio = default_bq_refill_below_ratio; + + INIT_LIST_HEAD(&ad->prio_queue); + for (u8 i = 0; i < 2; i++) + ad->dl_tree[i] = RB_ROOT_CACHED; + ad->dl_bias = 0; + ad->dl_queued = 0x0; + for (u8 i = 0; i < 2; i++) + ad->dl_prio[i] = default_dl_prio[i]; + + for (u8 i = 0; i < ADIOS_OPTYPES; i++) { + struct latency_model *model = &ad->latency_model[i]; + spin_lock_init(&model->lock); + spin_lock_init(&model->buckets_lock); + memset(model->small_bucket, 0, + sizeof(model->small_bucket[0]) * LM_LAT_BUCKET_COUNT); + memset(model->large_bucket, 0, + sizeof(model->large_bucket[0]) * LM_LAT_BUCKET_COUNT); + model->last_update_jiffies = jiffies; + model->lm_shrink_at_kreqs = default_lm_shrink_at_kreqs; + model->lm_shrink_at_gbytes = default_lm_shrink_at_gbytes; + model->lm_shrink_resist = default_lm_shrink_resist; + + ad->latency_target[i] = default_latency_target[i]; + ad->batch_limit[i] = default_batch_limit[i]; + } + timer_setup(&ad->update_timer, update_timer_callback, 0); + init_batch_queues(ad); + + spin_lock_init(&ad->lock); + spin_lock_init(&ad->pq_lock); + spin_lock_init(&ad->bq_lock); + + /* We dispatch from request queue wide instead of hw queue */ + blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q); + + q->elevator = eq; + return 0; + +destroy_rq_data_pool: + kmem_cache_destroy(ad->rq_data_pool); +free_ad: + kfree(ad); +put_eq: + kobject_put(&eq->kobj); + return ret; +} + +// Clean up and free resources when exiting the scheduler +static void adios_exit_sched(struct elevator_queue *e) { + struct adios_data *ad = e->elevator_data; + + timer_shutdown_sync(&ad->update_timer); + + WARN_ON_ONCE(!list_empty(&ad->prio_queue)); + + if (ad->rq_data_pool) + kmem_cache_destroy(ad->rq_data_pool); + + if (ad->dl_group_pool) + kmem_cache_destroy(ad->dl_group_pool); + + kfree(ad); +} + +// Define sysfs attributes for read operation latency model +#define SYSFS_OPTYPE_DECL(name, optype) \ +static ssize_t adios_lat_model_##name##_show( \ + struct elevator_queue *e, char *page) { \ + struct adios_data *ad = e->elevator_data; \ + struct latency_model *model = &ad->latency_model[optype]; \ + ssize_t len = 0; \ + guard(spinlock_irqsave)(&model->lock); \ + len += sprintf(page, "base : %llu ns\n", model->base); \ + len += sprintf(page + len, "slope: %llu ns/KiB\n", model->slope);\ + return len; \ +} \ +static ssize_t adios_lat_target_##name##_store( \ + struct elevator_queue *e, const char *page, size_t count) { \ + struct adios_data *ad = e->elevator_data; \ + unsigned long nsec; \ + int ret; \ + ret = kstrtoul(page, 10, &nsec); \ + if (ret) \ + return ret; \ + ad->latency_model[optype].base = 0ULL; \ + ad->latency_target[optype] = nsec; \ + return count; \ +} \ +static ssize_t adios_lat_target_##name##_show( \ + struct elevator_queue *e, char *page) { \ + struct adios_data *ad = e->elevator_data; \ + return sprintf(page, "%llu\n", ad->latency_target[optype]); \ +} \ +static ssize_t adios_batch_limit_##name##_store( \ + struct elevator_queue *e, const char *page, size_t count) { \ + unsigned long max_batch; \ + int ret; \ + ret = kstrtoul(page, 10, &max_batch); \ + if (ret || max_batch == 0) \ + return -EINVAL; \ + struct adios_data *ad = e->elevator_data; \ + ad->batch_limit[optype] = max_batch; \ + return count; \ +} \ +static ssize_t adios_batch_limit_##name##_show( \ + struct elevator_queue *e, char *page) { \ + struct adios_data *ad = e->elevator_data; \ + return sprintf(page, "%u\n", ad->batch_limit[optype]); \ +} + +SYSFS_OPTYPE_DECL(read, ADIOS_READ); +SYSFS_OPTYPE_DECL(write, ADIOS_WRITE); +SYSFS_OPTYPE_DECL(discard, ADIOS_DISCARD); + +// Show the maximum batch size actually achieved for each operation type +static ssize_t adios_batch_actual_max_show( + struct elevator_queue *e, char *page) { + struct adios_data *ad = e->elevator_data; + u32 total_count, read_count, write_count, discard_count; + + total_count = ad->batch_actual_max_total; + read_count = ad->batch_actual_max_size[ADIOS_READ]; + write_count = ad->batch_actual_max_size[ADIOS_WRITE]; + discard_count = ad->batch_actual_max_size[ADIOS_DISCARD]; + + return sprintf(page, + "Total : %u\nDiscard: %u\nRead : %u\nWrite : %u\n", + total_count, discard_count, read_count, write_count); +} + +// Set the global latency window +static ssize_t adios_global_latency_window_store( + struct elevator_queue *e, const char *page, size_t count) { + struct adios_data *ad = e->elevator_data; + unsigned long nsec; + int ret; + + ret = kstrtoul(page, 10, &nsec); + if (ret) + return ret; + + ad->global_latency_window = nsec; + + return count; +} + +// Show the global latency window +static ssize_t adios_global_latency_window_show( + struct elevator_queue *e, char *page) { + struct adios_data *ad = e->elevator_data; + return sprintf(page, "%llu\n", ad->global_latency_window); +} + +// Show the bq_refill_below_ratio +static ssize_t adios_bq_refill_below_ratio_show( + struct elevator_queue *e, char *page) { + struct adios_data *ad = e->elevator_data; + return sprintf(page, "%d\n", ad->bq_refill_below_ratio); +} + +// Set the bq_refill_below_ratio +static ssize_t adios_bq_refill_below_ratio_store( + struct elevator_queue *e, const char *page, size_t count) { + struct adios_data *ad = e->elevator_data; + int ratio; + int ret; + + ret = kstrtoint(page, 10, &ratio); + if (ret || ratio < 0 || ratio > 100) + return -EINVAL; + + ad->bq_refill_below_ratio = ratio; + + return count; +} + +// Show the read priority +static ssize_t adios_read_priority_show( + struct elevator_queue *e, char *page) { + struct adios_data *ad = e->elevator_data; + return sprintf(page, "%d\n", ad->dl_prio[0]); +} + +// Set the read priority +static ssize_t adios_read_priority_store( + struct elevator_queue *e, const char *page, size_t count) { + struct adios_data *ad = e->elevator_data; + int prio; + int ret; + + ret = kstrtoint(page, 10, &prio); + if (ret || prio < -20 || prio > 19) + return -EINVAL; + + guard(spinlock_irqsave)(&ad->lock); + ad->dl_prio[0] = prio; + ad->dl_bias = 0; + + return count; +} + +// Reset batch queue statistics +static ssize_t adios_reset_bq_stats_store( + struct elevator_queue *e, const char *page, size_t count) { + struct adios_data *ad = e->elevator_data; + unsigned long val; + int ret; + + ret = kstrtoul(page, 10, &val); + if (ret || val != 1) + return -EINVAL; + + for (u8 i = 0; i < ADIOS_OPTYPES; i++) + ad->batch_actual_max_size[i] = 0; + + ad->batch_actual_max_total = 0; + + return count; +} + +// Reset the latency model parameters +static ssize_t adios_reset_lat_model_store( + struct elevator_queue *e, const char *page, size_t count) { + struct adios_data *ad = e->elevator_data; + unsigned long val; + int ret; + + ret = kstrtoul(page, 10, &val); + if (ret || val != 1) + return -EINVAL; + + for (u8 i = 0; i < ADIOS_OPTYPES; i++) { + struct latency_model *model = &ad->latency_model[i]; + unsigned long flags; + spin_lock_irqsave(&model->lock, flags); + model->base = 0ULL; + model->slope = 0ULL; + model->small_sum_delay = 0ULL; + model->small_count = 0ULL; + model->large_sum_delay = 0ULL; + model->large_sum_bsize = 0ULL; + spin_unlock_irqrestore(&model->lock, flags); + } + + return count; +} + +// Show the ADIOS version +static ssize_t adios_version_show(struct elevator_queue *e, char *page) { + return sprintf(page, "%s\n", ADIOS_VERSION); +} + +// Define sysfs attributes for dynamic thresholds +#define SHRINK_THRESHOLD_ATTR_RW(name, model_field, min_value, max_value) \ +static ssize_t adios_shrink_##name##_store( \ + struct elevator_queue *e, const char *page, size_t count) { \ + struct adios_data *ad = e->elevator_data; \ + unsigned long val; \ + int ret; \ + ret = kstrtoul(page, 10, &val); \ + if (ret || val < min_value || val > max_value) \ + return -EINVAL; \ + for (u8 i = 0; i < ADIOS_OPTYPES; i++) { \ + struct latency_model *model = &ad->latency_model[i]; \ + unsigned long flags; \ + spin_lock_irqsave(&model->lock, flags); \ + model->model_field = val; \ + spin_unlock_irqrestore(&model->lock, flags); \ + } \ + return count; \ +} \ +static ssize_t adios_shrink_##name##_show( \ + struct elevator_queue *e, char *page) { \ + struct adios_data *ad = e->elevator_data; \ + u32 val = 0; \ + for (u8 i = 0; i < ADIOS_OPTYPES; i++) { \ + struct latency_model *model = &ad->latency_model[i]; \ + unsigned long flags; \ + spin_lock_irqsave(&model->lock, flags); \ + val = model->model_field; \ + spin_unlock_irqrestore(&model->lock, flags); \ + } \ + return sprintf(page, "%u\n", val); \ +} + +SHRINK_THRESHOLD_ATTR_RW(at_kreqs, lm_shrink_at_kreqs, 1, 100000) +SHRINK_THRESHOLD_ATTR_RW(at_gbytes, lm_shrink_at_gbytes, 1, 1000) +SHRINK_THRESHOLD_ATTR_RW(resist, lm_shrink_resist, 1, 3) + +// Define sysfs attributes +#define AD_ATTR(name, show_func, store_func) \ + __ATTR(name, 0644, show_func, store_func) +#define AD_ATTR_RW(name) \ + __ATTR(name, 0644, adios_##name##_show, adios_##name##_store) +#define AD_ATTR_RO(name) \ + __ATTR(name, 0644, adios_##name##_show, NULL) +#define AD_ATTR_WO(name) \ + __ATTR(name, 0644, NULL, adios_##name##_store) + +// Define sysfs attributes for ADIOS scheduler +static struct elv_fs_entry adios_sched_attrs[] = { + AD_ATTR_RO(batch_actual_max), + AD_ATTR_RW(bq_refill_below_ratio), + AD_ATTR_RW(global_latency_window), + + AD_ATTR_RW(batch_limit_read), + AD_ATTR_RW(batch_limit_write), + AD_ATTR_RW(batch_limit_discard), + + AD_ATTR_RO(lat_model_read), + AD_ATTR_RO(lat_model_write), + AD_ATTR_RO(lat_model_discard), + + AD_ATTR_RW(lat_target_read), + AD_ATTR_RW(lat_target_write), + AD_ATTR_RW(lat_target_discard), + + AD_ATTR_RW(shrink_at_kreqs), + AD_ATTR_RW(shrink_at_gbytes), + AD_ATTR_RW(shrink_resist), + + AD_ATTR_RW(read_priority), + + AD_ATTR_WO(reset_bq_stats), + AD_ATTR_WO(reset_lat_model), + AD_ATTR(adios_version, adios_version_show, NULL), + + __ATTR_NULL +}; + +// Define the ADIOS scheduler type +static struct elevator_type mq_adios = { + .ops = { + .next_request = elv_rb_latter_request, + .former_request = elv_rb_former_request, + .limit_depth = adios_limit_depth, + .depth_updated = adios_depth_updated, + .request_merged = adios_request_merged, + .requests_merged = adios_merged_requests, + .bio_merge = adios_bio_merge, + .insert_requests = adios_insert_requests, + .prepare_request = adios_prepare_request, + .dispatch_request = adios_dispatch_request, + .completed_request = adios_completed_request, + .finish_request = adios_finish_request, + .has_work = adios_has_work, + .init_hctx = adios_init_hctx, + .init_sched = adios_init_sched, + .exit_sched = adios_exit_sched, + }, +#ifdef CONFIG_BLK_DEBUG_FS +#endif + .elevator_attrs = adios_sched_attrs, + .elevator_name = "adios", + .elevator_owner = THIS_MODULE, +}; +MODULE_ALIAS("mq-adios-iosched"); + +#define ADIOS_PROGNAME "Adaptive Deadline I/O Scheduler" +#define ADIOS_AUTHOR "Masahito Suzuki" + +// Initialize the ADIOS scheduler module +static int __init adios_init(void) { + printk(KERN_INFO "%s %s by %s\n", + ADIOS_PROGNAME, ADIOS_VERSION, ADIOS_AUTHOR); + return elv_register(&mq_adios); +} + +// Exit the ADIOS scheduler module +static void __exit adios_exit(void) { + elv_unregister(&mq_adios); +} + +module_init(adios_init); +module_exit(adios_exit); + +MODULE_AUTHOR(ADIOS_AUTHOR); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION(ADIOS_PROGNAME); \ No newline at end of file From cf8ccf5e82136c2e60222c640e5b8163ea8b44ab Mon Sep 17 00:00:00 2001 From: Eric Naim Date: Mon, 7 Apr 2025 12:40:15 +0800 Subject: [PATCH 2863/2924] le9uo v1.10 Signed-off-by: Eric Naim --- Documentation/admin-guide/sysctl/vm.rst | 72 ++++++++++++ include/linux/mm.h | 8 ++ kernel/sysctl.c | 34 ++++++ mm/Kconfig | 63 ++++++++++ mm/mm_init.c | 1 + mm/vmscan.c | 147 +++++++++++++++++++++++- 6 files changed, 322 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index 8290177b4f7589..f2e601171a3e24 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -25,6 +25,9 @@ files can be found in mm/swap.c. Currently, these files are in /proc/sys/vm: - admin_reserve_kbytes +- anon_min_ratio +- clean_low_ratio +- clean_min_ratio - compact_memory - compaction_proactiveness - compact_unevictable_allowed @@ -109,6 +112,67 @@ On x86_64 this is about 128MB. Changing this takes effect whenever an application requests memory. +anon_min_ratio +============== + +This knob provides *hard* protection of anonymous pages. The anonymous pages +on the current node won't be reclaimed under any conditions when their amount +is below vm.anon_min_ratio. + +This knob may be used to prevent excessive swap thrashing when anonymous +memory is low (for example, when memory is going to be overfilled by +compressed data of zram module). + +Setting this value too high (close to 100) can result in inability to +swap and can lead to early OOM under memory pressure. + +The unit of measurement is the percentage of the total memory of the node. + +The default value is 15. + + +clean_low_ratio +================ + +This knob provides *best-effort* protection of clean file pages. The file pages +on the current node won't be reclaimed under memory pressure when the amount of +clean file pages is below vm.clean_low_ratio *unless* we threaten to OOM. + +Protection of clean file pages using this knob may be used when swapping is +still possible to + - prevent disk I/O thrashing under memory pressure; + - improve performance in disk cache-bound tasks under memory pressure. + +Setting it to a high value may result in a early eviction of anonymous pages +into the swap space by attempting to hold the protected amount of clean file +pages in memory. + +The unit of measurement is the percentage of the total memory of the node. + +The default value is 0. + + +clean_min_ratio +================ + +This knob provides *hard* protection of clean file pages. The file pages on the +current node won't be reclaimed under memory pressure when the amount of clean +file pages is below vm.clean_min_ratio. + +Hard protection of clean file pages using this knob may be used to + - prevent disk I/O thrashing under memory pressure even with no free swap space; + - improve performance in disk cache-bound tasks under memory pressure; + - avoid high latency and prevent livelock in near-OOM conditions. + +Setting it to a high value may result in a early out-of-memory condition due to +the inability to reclaim the protected amount of clean file pages when other +types of pages cannot be reclaimed. + +The unit of measurement is the percentage of the total memory of the node. + +The default value is 15. + + compact_memory ============== @@ -973,6 +1037,14 @@ be 133 (x + 2x = 200, 2x = 133.33). At 0, the kernel will not initiate swap until the amount of free and file-backed pages is less than the high watermark in a zone. +This knob has no effect if the amount of clean file pages on the current +node is below vm.clean_low_ratio or vm.clean_min_ratio. In this case, +only anonymous pages can be reclaimed. + +If the number of anonymous pages on the current node is below +vm.anon_min_ratio, then only file pages can be reclaimed with +any vm.swappiness value. + unprivileged_userfaultfd ======================== diff --git a/include/linux/mm.h b/include/linux/mm.h index e51dba8398f747..ece96e871a3b62 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -193,6 +193,14 @@ static inline void __mm_zero_struct_page(struct page *page) extern int sysctl_max_map_count; +extern bool sysctl_workingset_protection; +extern u8 sysctl_anon_min_ratio; +extern u8 sysctl_clean_low_ratio; +extern u8 sysctl_clean_min_ratio; +int vm_workingset_protection_update_handler( + const struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); + extern unsigned long sysctl_user_reserve_kbytes; extern unsigned long sysctl_admin_reserve_kbytes; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index fe7bec454345fd..abbb553218cd3d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1849,6 +1849,40 @@ static const struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif + { + .procname = "workingset_protection", + .data = &sysctl_workingset_protection, + .maxlen = sizeof(bool), + .mode = 0644, + .proc_handler = &proc_dobool, + }, + { + .procname = "anon_min_ratio", + .data = &sysctl_anon_min_ratio, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = &vm_workingset_protection_update_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_HUNDRED, + }, + { + .procname = "clean_low_ratio", + .data = &sysctl_clean_low_ratio, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = &vm_workingset_protection_update_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_HUNDRED, + }, + { + .procname = "clean_min_ratio", + .data = &sysctl_clean_min_ratio, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = &vm_workingset_protection_update_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_HUNDRED, + }, { .procname = "panic_on_warn", .data = &panic_on_warn, diff --git a/mm/Kconfig b/mm/Kconfig index 0f76cc26473d58..5fc1934a30957a 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -462,6 +462,69 @@ config ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP config ARCH_WANT_HUGETLB_VMEMMAP_PREINIT bool +config ANON_MIN_RATIO + int "Default value for vm.anon_min_ratio" + depends on SYSCTL + range 0 100 + default 15 + help + This option sets the default value for vm.anon_min_ratio sysctl knob. + + The vm.anon_min_ratio sysctl knob provides *hard* protection of + anonymous pages. The anonymous pages on the current node won't be + reclaimed under any conditions when their amount is below + vm.anon_min_ratio. This knob may be used to prevent excessive swap + thrashing when anonymous memory is low (for example, when memory is + going to be overfilled by compressed data of zram module). + + Setting this value too high (close to MemTotal) can result in + inability to swap and can lead to early OOM under memory pressure. + +config CLEAN_LOW_RATIO + int "Default value for vm.clean_low_ratio" + depends on SYSCTL + range 0 100 + default 0 + help + This option sets the default value for vm.clean_low_ratio sysctl knob. + + The vm.clean_low_ratio sysctl knob provides *best-effort* + protection of clean file pages. The file pages on the current node + won't be reclaimed under memory pressure when the amount of clean file + pages is below vm.clean_low_ratio *unless* we threaten to OOM. + Protection of clean file pages using this knob may be used when + swapping is still possible to + - prevent disk I/O thrashing under memory pressure; + - improve performance in disk cache-bound tasks under memory + pressure. + + Setting it to a high value may result in a early eviction of anonymous + pages into the swap space by attempting to hold the protected amount + of clean file pages in memory. + +config CLEAN_MIN_RATIO + int "Default value for vm.clean_min_ratio" + depends on SYSCTL + range 0 100 + default 15 + help + This option sets the default value for vm.clean_min_ratio sysctl knob. + + The vm.clean_min_ratio sysctl knob provides *hard* protection of + clean file pages. The file pages on the current node won't be + reclaimed under memory pressure when the amount of clean file pages is + below vm.clean_min_ratio. Hard protection of clean file pages using + this knob may be used to + - prevent disk I/O thrashing under memory pressure even with no free + swap space; + - improve performance in disk cache-bound tasks under memory + pressure; + - avoid high latency and prevent livelock in near-OOM conditions. + + Setting it to a high value may result in a early out-of-memory condition + due to the inability to reclaim the protected amount of clean file pages + when other types of pages cannot be reclaimed. + config HAVE_MEMBLOCK_PHYS_MAP bool diff --git a/mm/mm_init.c b/mm/mm_init.c index eedce9321e13dc..110d99319ff6f7 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -2732,6 +2732,7 @@ static void __init mem_init_print_info(void) , K(totalhigh_pages()) #endif ); + printk(KERN_INFO "le9 Unofficial (le9uo) working set protection 1.10 by Masahito Suzuki (forked from hakavlad's original le9 patch)"); } void __init __weak arch_mm_preinit(void) diff --git a/mm/vmscan.c b/mm/vmscan.c index 8e6fa42ad584fd..8a73850e0f992d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -148,6 +148,15 @@ struct scan_control { /* The file folios on the current node are dangerously low */ unsigned int file_is_tiny:1; + /* The anonymous pages on the current node are below vm.anon_min_ratio */ + unsigned int anon_below_min:1; + + /* The clean file pages on the current node are below vm.clean_low_ratio */ + unsigned int clean_below_low:1; + + /* The clean file pages on the current node are below vm.clean_min_ratio */ + unsigned int clean_below_min:1; + /* Always discard instead of demoting to lower tier memory */ unsigned int no_demotion:1; @@ -197,6 +206,15 @@ struct scan_control { #define prefetchw_prev_lru_folio(_folio, _base, _field) do { } while (0) #endif +bool sysctl_workingset_protection __read_mostly = true; +u8 sysctl_anon_min_ratio __read_mostly = CONFIG_ANON_MIN_RATIO; +u8 sysctl_clean_low_ratio __read_mostly = CONFIG_CLEAN_LOW_RATIO; +u8 sysctl_clean_min_ratio __read_mostly = CONFIG_CLEAN_MIN_RATIO; +static u64 sysctl_anon_min_ratio_kb __read_mostly = 0; +static u64 sysctl_clean_low_ratio_kb __read_mostly = 0; +static u64 sysctl_clean_min_ratio_kb __read_mostly = 0; +static u64 workingset_protection_prev_totalram __read_mostly = 0; + /* * From 0 .. MAX_SWAPPINESS. Higher means more swappy. */ @@ -1151,6 +1169,10 @@ static unsigned int shrink_folio_list(struct list_head *folio_list, if (!sc->may_unmap && folio_mapped(folio)) goto keep_locked; + if (folio_is_file_lru(folio) ? sc->clean_below_min : + (sc->anon_below_min && !sc->clean_below_min)) + goto keep_locked; + /* * The number of dirty pages determines if a node is marked * reclaim_congested. kswapd will stall and start writing @@ -2525,6 +2547,15 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, goto out; } + /* + * Force-scan anon if clean file pages is under vm.clean_low_ratio + * or vm.clean_min_ratio. + */ + if (sc->clean_below_low || sc->clean_below_min) { + scan_balance = SCAN_ANON; + goto out; + } + /* * If there is enough inactive page cache, we do not reclaim * anything from the anonymous working right now. @@ -2642,6 +2673,14 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, BUG(); } + /* + * Hard protection of the working set. + * Don't reclaim anon/file pages when the amount is + * below the watermark of the same type. + */ + if (file ? sc->clean_below_min : sc->anon_below_min) + scan = 0; + nr[lru] = scan; } } @@ -2661,6 +2700,96 @@ static bool can_age_anon_pages(struct pglist_data *pgdat, return can_demote(pgdat->node_id, sc); } +int vm_workingset_protection_update_handler(const struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); + if (ret || !write) + return ret; + + workingset_protection_prev_totalram = 0; + + return 0; +} + +static void prepare_workingset_protection(pg_data_t *pgdat, struct scan_control *sc) +{ + unsigned long node_mem_total; + struct sysinfo i; + + if (!(sysctl_workingset_protection)) { + sc->anon_below_min = 0; + sc->clean_below_low = 0; + sc->clean_below_min = 0; + return; + } + + if (likely(sysctl_anon_min_ratio || + sysctl_clean_low_ratio || + sysctl_clean_min_ratio)) { +#ifdef CONFIG_NUMA + si_meminfo_node(&i, pgdat->node_id); +#else //CONFIG_NUMA + si_meminfo(&i); +#endif //CONFIG_NUMA + node_mem_total = i.totalram; + + if (unlikely(workingset_protection_prev_totalram != node_mem_total)) { + sysctl_anon_min_ratio_kb = + node_mem_total * sysctl_anon_min_ratio / 100; + sysctl_clean_low_ratio_kb = + node_mem_total * sysctl_clean_low_ratio / 100; + sysctl_clean_min_ratio_kb = + node_mem_total * sysctl_clean_min_ratio / 100; + workingset_protection_prev_totalram = node_mem_total; + } + } + + /* + * Check the number of anonymous pages to protect them from + * reclaiming if their amount is below the specified. + */ + if (sysctl_anon_min_ratio) { + unsigned long reclaimable_anon; + + reclaimable_anon = + node_page_state(pgdat, NR_ACTIVE_ANON) + + node_page_state(pgdat, NR_INACTIVE_ANON) + + node_page_state(pgdat, NR_ISOLATED_ANON); + + sc->anon_below_min = reclaimable_anon < sysctl_anon_min_ratio_kb; + } else + sc->anon_below_min = 0; + + /* + * Check the number of clean file pages to protect them from + * reclaiming if their amount is below the specified. + */ + if (sysctl_clean_low_ratio || sysctl_clean_min_ratio) { + unsigned long reclaimable_file, dirty, clean; + + reclaimable_file = + node_page_state(pgdat, NR_ACTIVE_FILE) + + node_page_state(pgdat, NR_INACTIVE_FILE) + + node_page_state(pgdat, NR_ISOLATED_FILE); + dirty = node_page_state(pgdat, NR_FILE_DIRTY); + /* + * node_page_state() sum can go out of sync since + * all the values are not read at once. + */ + if (likely(reclaimable_file > dirty)) + clean = reclaimable_file - dirty; + else + clean = 0; + + sc->clean_below_low = clean < sysctl_clean_low_ratio_kb; + sc->clean_below_min = clean < sysctl_clean_min_ratio_kb; + } else { + sc->clean_below_low = 0; + sc->clean_below_min = 0; + } +} + #ifdef CONFIG_LRU_GEN #ifdef CONFIG_LRU_GEN_ENABLED @@ -4627,13 +4756,20 @@ static int get_tier_idx(struct lruvec *lruvec, int type) return tier - 1; } -static int get_type_to_scan(struct lruvec *lruvec, int swappiness) +static int get_type_to_scan(struct lruvec *lruvec, struct scan_control *sc, int swappiness) { struct ctrl_pos sp, pv; if (swappiness <= MIN_SWAPPINESS + 1) return LRU_GEN_FILE; + if (sc->clean_below_min) + return LRU_GEN_ANON; + if (sc->anon_below_min) + return LRU_GEN_FILE; + if (sc->clean_below_low) + return LRU_GEN_ANON; + if (swappiness >= MAX_SWAPPINESS) return LRU_GEN_ANON; /* @@ -4650,7 +4786,7 @@ static int isolate_folios(struct lruvec *lruvec, struct scan_control *sc, int sw int *type_scanned, struct list_head *list) { int i; - int type = get_type_to_scan(lruvec, swappiness); + int type = get_type_to_scan(lruvec, sc, swappiness); for_each_evictable_type(i, swappiness) { int scanned; @@ -4892,6 +5028,9 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) unsigned long reclaimed = sc->nr_reclaimed; struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct pglist_data *pgdat = lruvec_pgdat(lruvec); + bool can_swap = get_swappiness(lruvec, sc); + + prepare_workingset_protection(pgdat, sc); /* lru_gen_age_node() called mem_cgroup_calculate_protection() */ if (mem_cgroup_below_min(NULL, memcg)) @@ -4918,7 +5057,7 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) if (success && mem_cgroup_online(memcg)) return MEMCG_LRU_YOUNG; - if (!success && lruvec_is_sizable(lruvec, sc)) + if (!success && (!can_swap || lruvec_is_sizable(lruvec, sc))) return 0; /* one retry if offlined or too small */ @@ -6031,6 +6170,8 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) prepare_scan_control(pgdat, sc); + prepare_workingset_protection(pgdat, sc); + shrink_node_memcgs(pgdat, sc); flush_reclaim_state(sc); From 528d9c9e9ed6c9a4e09a9f9fec4c78347e8a4029 Mon Sep 17 00:00:00 2001 From: Tor Vic Date: Fri, 21 Mar 2025 15:28:58 +0100 Subject: [PATCH 2864/2924] x86/kbuild/64: Add the CONFIG_X86_NATIVE_CPU option to locally optimize the kernel with '-march=native' Add a 'native' option that allows users to build an optimized kernel for their local machine (i.e. the machine which is used to build the kernel) by passing '-march=native' to CFLAGS. The idea comes from Linus' reply to Arnd's initial proposal: https://lore.kernel.org/all/CAHk-=wji1sV93yKbc==Z7OSSHBiDE=LAdG_d5Y-zPBrnSs0k2A@mail.gmail.com/ Here are some numbers comparing 'generic' to 'native' on a Skylake dual-core laptop (generic --> native): - vmlinux and compressed modules size: 125'907'744 bytes --> 125'595'280 bytes (-0.248 %) 18'810 kilobytes --> 18'770 kilobytes (-0.213 %) - phoronix, average of 3 runs: ffmpeg: 130.99 --> 131.15 (+0.122 %) nginx: 10'650 --> 10'725 (+0.704 %) hackbench (lower is better): 102.27 --> 99.50 (-2.709 %) - xz compression of firefox tarball (lower is better): 319.57 seconds --> 317.34 seconds (-0.698 %) - stress-ng, bogoops, average of 3 15-second runs: fork: 111'744 --> 115'509 (+3.397 %) bsearch: 7'211 --> 7'436 (+3.120 %) memfd: 3'591 --> 3'604 (+0.362 %) mmapfork: 630 --> 629 (-0.159 %) schedmix: 42'715 --> 43'251 (+1.255 %) epoll: 2'443'767 --> 2'454'413 (+0.436 %) vm: 1'442'256 --> 1'486'615 (+3.076 %) - schbench (two message threads), 30-second runs: 304 rps --> 305 rps (+0.329 %) There is little difference both in terms of size and of performance, however the native build comes out on top ever so slightly. [ mingo: Renamed the option to CONFIG_X86_NATIVE_CPU, expanded the help text and added Linus's Suggested-by tag. ] Suggested-by: Linus Torvalds Signed-off-by: Tor Vic Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Brian Gerst Cc: Juergen Gross Cc: H. Peter Anvin Cc: Kees Cook Cc: Josh Poimboeuf Link: https://lore.kernel.org/r/20250321142859.13889-1-torvic9@mailbox.org --- arch/x86/Kconfig.cpu | 14 ++++++++++++++ arch/x86/Makefile | 5 +++++ 2 files changed, 19 insertions(+) diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 753b8763abaeb9..9d108a54c30a61 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -245,6 +245,20 @@ config MATOM endchoice +config X86_NATIVE_CPU + bool "Build and optimize for local/native CPU" + depends on X86_64 + default n + help + Optimize for the current CPU used to compile the kernel. + Use this option if you intend to build the kernel for your + local machine. + + Note that such a kernel might not work optimally on a + different x86 machine. + + If unsure, say N. + config X86_GENERIC bool "Generic x86 support" depends on X86_32 diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 594723005d95d9..9b76e77ff7f788 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -173,8 +173,13 @@ else # Use -mskip-rax-setup if supported. KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup) +ifdef CONFIG_X86_NATIVE_CPU + KBUILD_CFLAGS += -march=native + KBUILD_RUSTFLAGS += -Ctarget-cpu=native +else KBUILD_CFLAGS += -march=x86-64 -mtune=generic KBUILD_RUSTFLAGS += -Ctarget-cpu=x86-64 -Ztune-cpu=generic +endif KBUILD_CFLAGS += -mno-red-zone KBUILD_CFLAGS += -mcmodel=kernel From 436c725e6edd8efd4539445d45d486a1bd311bfa Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 24 Mar 2025 08:05:19 +0100 Subject: [PATCH 2865/2924] x86/kbuild/64: Test for the availability of the -mtune=native compiler flag Stephen reported this build failure when cross-compiling: cc1: error: bad value 'native' for '-march=' switch Test for the availability of the -march=native flag. Reported-by: Stephen Rothwell Signed-off-by: Ingo Molnar Tested-by: Stephen Rothwell # build test Cc: Tor Vic Cc: Andy Lutomirski Cc: Brian Gerst Cc: Juergen Gross Cc: H. Peter Anvin Cc: Kees Cook Cc: Josh Poimboeuf Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324172723.49fb0416@canb.auug.org.au --- arch/x86/Kconfig.cpu | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 9d108a54c30a61..87bede96e8009e 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -245,10 +245,14 @@ config MATOM endchoice +config CC_HAS_MARCH_NATIVE + # This flag might not be available in cross-compilers: + def_bool $(cc-option, -march=native) + config X86_NATIVE_CPU bool "Build and optimize for local/native CPU" depends on X86_64 - default n + depends on CC_HAS_MARCH_NATIVE help Optimize for the current CPU used to compile the kernel. Use this option if you intend to build the kernel for your From 7007b063db80de218ae5dcfa2d76e8d290a4673d Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 24 Mar 2025 20:23:00 -0700 Subject: [PATCH 2866/2924] x86/kbuild/64: Restrict clang versions that can use '-march=native' There are two issues that can appear when building with clang and '-march=native'. The first issue is a compiler crash in the ipv6 stack with clang-18, such as when building allmodconfig. This was frequently reported on the LLVM issue tracker: # Link: https://github.com/llvm/llvm-project/issues?q=is%3Aissue%20ip6_rcv_core Stack dump: 0. Program arguments: clang ... -march=native ... net/ipv6/ip6_input.c 1. parser at end of file 2. Code generation 3. Running pass 'Function Pass Manager' on module 'net/ipv6/ip6_input.c'. 4. Running pass 'X86 DAG->DAG Instruction Selection' on function '@ip6_rcv_core' The second issue is certain -Wframe-larger-than warnings that appear with clang versions prior to 19, which introduced an optimization that produces much better code: # Link: https://github.com/llvm/llvm-project/commit/90ba33099cbb17e7c159e9ebc5a512037db99d6d [2] clang-18: drivers/media/pci/saa7164/saa7164-core.c:605:20: error: stack frame size (2392) exceeds limit (2048) in 'saa7164_irq' [-Werror,-Wframe-larger-than] 605 | static irqreturn_t saa7164_irq(int irq, void *dev_id) | ^ clang-19: drivers/media/pci/saa7164/saa7164-core.c:605:20: error: stack frame size (216) exceeds limit (128) in 'saa7164_irq' [-Werror,-Wframe-larger-than] 605 | static irqreturn_t saa7164_irq(int irq, void *dev_id) | ^ Restrict the testing of the availability of '-march=native' to all supported GCC versions and clang-19 and newer to avoid these known resolved issues. Fixes: 0480bc7e65dc ("x86/kbuild/64: Add the CONFIG_X86_NATIVE_CPU option to locally optimize the kernel with '-march=native'") Signed-off-by: Nathan Chancellor Signed-off-by: Ingo Molnar Cc: Nick Desaulniers Cc: Tor Vic Cc: Andy Lutomirski Cc: Brian Gerst Cc: Juergen Gross Cc: H. Peter Anvin Cc: Kees Cook Cc: Josh Poimboeuf Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324-x86-march-native-clang-19-and-newer-v1-1-3a05ed32a89e@kernel.org --- arch/x86/Kconfig.cpu | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 87bede96e8009e..f928cf6e32524e 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -248,6 +248,12 @@ endchoice config CC_HAS_MARCH_NATIVE # This flag might not be available in cross-compilers: def_bool $(cc-option, -march=native) + # LLVM 18 has an easily triggered internal compiler error in core + # networking code with '-march=native' on certain systems: + # https://github.com/llvm/llvm-project/issues/72026 + # LLVM 19 introduces an optimization that resolves some high stack + # usage warnings that only appear wth '-march=native'. + depends on CC_IS_GCC || CLANG_VERSION >= 190100 config X86_NATIVE_CPU bool "Build and optimize for local/native CPU" From 0ab0b3c3f2b29c0adfe03a396e90549859af9f8a Mon Sep 17 00:00:00 2001 From: Eric Naim Date: Mon, 7 Apr 2025 13:12:53 +0800 Subject: [PATCH 2867/2924] CACHY: Add x86_64 ISA and Zen4 compiler optimizations Signed-off-by: Eric Naim --- arch/x86/Kconfig.cpu | 46 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/Makefile | 16 +++++++++++++-- 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index f928cf6e32524e..d4ce964d97137b 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -255,6 +255,11 @@ config CC_HAS_MARCH_NATIVE # usage warnings that only appear wth '-march=native'. depends on CC_IS_GCC || CLANG_VERSION >= 190100 + +choice + prompt "x86_64 Compiler Build Optimization" + default GENERIC_CPU + config X86_NATIVE_CPU bool "Build and optimize for local/native CPU" depends on X86_64 @@ -269,6 +274,47 @@ config X86_NATIVE_CPU If unsure, say N. +config GENERIC_CPU + bool "Generic-x86-64" + depends on X86_64 + help + Generic x86-64 CPU. + Runs equally well on all x86-64 CPUs. + +config MZEN4 + bool "AMD Ryzen 4" + depends on (CC_IS_GCC && GCC_VERSION >= 130000) || (CC_IS_CLANG && CLANG_VERSION >= 160000) + help + Select this for AMD Family 19h Zen 4 processors. + + Enables -march=znver4 + +endchoice + +config X86_64_VERSION + int "x86-64 compiler ISA level" + range 1 4 + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + depends on X86_64 && GENERIC_CPU + help + Specify a specific x86-64 compiler ISA level. + + There are three x86-64 ISA levels that work on top of + the x86-64 baseline, namely: x86-64-v2 and x86-64-v3. + + x86-64-v2 brings support for vector instructions up to Streaming SIMD + Extensions 4.2 (SSE4.2) and Supplemental Streaming SIMD Extensions 3 + (SSSE3), the POPCNT instruction, and CMPXCHG16B. + + x86-64-v3 adds vector instructions up to AVX2, MOVBE, and additional + bit-manipulation instructions. + + x86-64-v4 is not included since the kernel does not use AVX512 instructions + + You can find the best version for your CPU by running one of the following: + /lib/ld-linux-x86-64.so.2 --help | grep supported + /lib64/ld-linux-x86-64.so.2 --help | grep supported + config X86_GENERIC bool "Generic x86 support" depends on X86_32 diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 9b76e77ff7f788..cbd234e9e743fd 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -176,10 +176,22 @@ else ifdef CONFIG_X86_NATIVE_CPU KBUILD_CFLAGS += -march=native KBUILD_RUSTFLAGS += -Ctarget-cpu=native -else +endif + +ifdef CONFIG_MZEN4 + KBUILD_CFLAGS += -march=znver4 + KBUILD_RUSTFLAGS += -Ctarget-cpu=znver4 +endif + +ifdef CONFIG_GENERIC_CPU +ifeq ($(CONFIG_X86_64_VERSION),1) KBUILD_CFLAGS += -march=x86-64 -mtune=generic KBUILD_RUSTFLAGS += -Ctarget-cpu=x86-64 -Ztune-cpu=generic -endif +else + KBUILD_CFLAGS +=-march=x86-64-v$(CONFIG_X86_64_VERSION) + KBUILD_RUSTFLAGS += -Ctarget-cpu=x86-64-v$(CONFIG_X86_64_VERSION) +endif # CONFIG_X86_64_VERSION +endif # CONFIG_GENERIC_CPU KBUILD_CFLAGS += -mno-red-zone KBUILD_CFLAGS += -mcmodel=kernel From 63b8823a1fa50bd243a3853c5068647d757694be Mon Sep 17 00:00:00 2001 From: Eric Naim Date: Mon, 7 Apr 2025 15:09:58 +0800 Subject: [PATCH 2868/2924] v4l2loopback: Convert del_timer_sync to timer_delete_sync del_timer_sync is deprecated and not used anymore in the kernel. Signed-off-by: Eric Naim --- drivers/media/v4l2-core/v4l2loopback.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2loopback.c b/drivers/media/v4l2-core/v4l2loopback.c index 468d47f4950b33..5c8b70981afa49 100644 --- a/drivers/media/v4l2-core/v4l2loopback.c +++ b/drivers/media/v4l2-core/v4l2loopback.c @@ -1788,8 +1788,8 @@ static int vidioc_querybuf(struct file *file, void *fh, struct v4l2_buffer *buf) static void buffer_written(struct v4l2_loopback_device *dev, struct v4l2l_buffer *buf) { - del_timer_sync(&dev->sustain_timer); - del_timer_sync(&dev->timeout_timer); + timer_delete_sync(&dev->sustain_timer); + timer_delete_sync(&dev->timeout_timer); spin_lock_bh(&dev->list_lock); list_move_tail(&buf->list_head, &dev->outbufs_list); @@ -2366,8 +2366,8 @@ static int v4l2_loopback_close(struct file *file) } if (atomic_dec_and_test(&dev->open_count)) { - del_timer_sync(&dev->sustain_timer); - del_timer_sync(&dev->timeout_timer); + timer_delete_sync(&dev->sustain_timer); + timer_delete_sync(&dev->timeout_timer); if (!dev->keep_format) { mutex_lock(&dev->image_mutex); free_buffers(dev); From f484cbfcb2b91ad20ea6148efae023cc4547b251 Mon Sep 17 00:00:00 2001 From: Eric Naim Date: Tue, 8 Apr 2025 13:38:48 +0800 Subject: [PATCH 2869/2924] CACHY: sched/fair: Fix migration cost tweak Signed-off-by: Eric Naim --- kernel/sched/fair.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a6117ccf77f870..1e8c138c797f22 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -85,9 +85,9 @@ static unsigned int normalized_sysctl_sched_base_slice = 700000ULL; #endif /* CONFIG_CACHY */ #ifdef CONFIG_CACHY -const_debug unsigned int sysctl_sched_migration_cost = 300000UL; +__read_mostly unsigned int sysctl_sched_migration_cost = 300000UL; #else -const_debug unsigned int sysctl_sched_migration_cost = 500000UL; +__read_mostly unsigned int sysctl_sched_migration_cost = 500000UL; #endif static int __init setup_sched_thermal_decay_shift(char *str) From 30bc7e0e1a8a9b2d751b0a7a439bdd11b35b05df Mon Sep 17 00:00:00 2001 From: Denis Benato Date: Sun, 9 Mar 2025 20:49:34 +0100 Subject: [PATCH 2870/2924] HID: amd_sfh: Add support for tablet mode switch sensors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds support for the tablet mode switch sensors on convertible devices where that sensor is managed by AMD SFH, like the Asus Flow X13 and the Lenovo ThinkPad L13 Yoga Gen2 (AMD). Tested-by: Paweł Kotiuk Co-developed-by: Ivan Dovgal Signed-off-by: Ivan Dovgal Co-developed-by: Luke D. Jones Signed-off-by: Luke D. Jones Signed-off-by: Adrian Freund Signed-off-by: Denis Benato --- drivers/hid/amd-sfh-hid/amd_sfh_client.c | 2 ++ drivers/hid/amd-sfh-hid/amd_sfh_pcie.c | 4 +++ drivers/hid/amd-sfh-hid/amd_sfh_pcie.h | 1 + .../hid_descriptor/amd_sfh_hid_desc.c | 27 +++++++++++++++++++ .../hid_descriptor/amd_sfh_hid_desc.h | 8 ++++++ .../hid_descriptor/amd_sfh_hid_report_desc.h | 20 ++++++++++++++ 6 files changed, 62 insertions(+) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_client.c b/drivers/hid/amd-sfh-hid/amd_sfh_client.c index 3438d392920fad..867019955b10f2 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_client.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_client.c @@ -146,6 +146,8 @@ static const char *get_sensor_name(int idx) return "gyroscope"; case mag_idx: return "magnetometer"; + case tms_idx: + return "tablet-mode-switch"; case als_idx: case ACS_IDX: /* ambient color sensor */ return "ALS"; diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c index 1c1fd63330c939..7fd486a279b51b 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c @@ -29,6 +29,7 @@ #define ACEL_EN BIT(0) #define GYRO_EN BIT(1) #define MAGNO_EN BIT(2) +#define TMS_EN BIT(15) #define HPD_EN BIT(16) #define ALS_EN BIT(19) #define ACS_EN BIT(22) @@ -232,6 +233,9 @@ int amd_mp2_get_sensor_num(struct amd_mp2_dev *privdata, u8 *sensor_id) if (MAGNO_EN & activestatus) sensor_id[num_of_sensors++] = mag_idx; + if (TMS_EN & activestatus) + sensor_id[num_of_sensors++] = tms_idx; + if (ALS_EN & activestatus) sensor_id[num_of_sensors++] = als_idx; diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h index 05e400a4a83e40..617f0265b7073f 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h @@ -79,6 +79,7 @@ enum sensor_idx { accel_idx = 0, gyro_idx = 1, mag_idx = 2, + tms_idx = 15, als_idx = 19 }; diff --git a/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.c b/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.c index ef1f9be8b89383..516a07bf3be6d9 100644 --- a/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.c +++ b/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.c @@ -47,6 +47,11 @@ static int get_report_descriptor(int sensor_idx, u8 *rep_desc) memcpy(rep_desc, comp3_report_descriptor, sizeof(comp3_report_descriptor)); break; + case tms_idx: /* tablet mode switch */ + memset(rep_desc, 0, sizeof(tms_report_descriptor)); + memcpy(rep_desc, tms_report_descriptor, + sizeof(tms_report_descriptor)); + break; case als_idx: /* ambient light sensor */ case ACS_IDX: /* ambient color sensor */ memset(rep_desc, 0, sizeof(als_report_descriptor)); @@ -97,6 +102,16 @@ static u32 get_descr_sz(int sensor_idx, int descriptor_name) return sizeof(struct magno_feature_report); } break; + case tms_idx: + switch (descriptor_name) { + case descr_size: + return sizeof(tms_report_descriptor); + case input_size: + return sizeof(struct tms_input_report); + case feature_size: + return sizeof(struct tms_feature_report); + } + break; case als_idx: case ACS_IDX: /* ambient color sensor */ switch (descriptor_name) { @@ -140,6 +155,7 @@ static u8 get_feature_report(int sensor_idx, int report_id, u8 *feature_report) struct accel3_feature_report acc_feature; struct gyro_feature_report gyro_feature; struct magno_feature_report magno_feature; + struct tms_feature_report tms_feature; struct hpd_feature_report hpd_feature; struct als_feature_report als_feature; u8 report_size = 0; @@ -175,6 +191,11 @@ static u8 get_feature_report(int sensor_idx, int report_id, u8 *feature_report) memcpy(feature_report, &magno_feature, sizeof(magno_feature)); report_size = sizeof(magno_feature); break; + case tms_idx: /* tablet mode switch */ + get_common_features(&tms_feature.common_property, report_id); + memcpy(feature_report, &tms_feature, sizeof(tms_feature)); + report_size = sizeof(tms_feature); + break; case als_idx: /* ambient light sensor */ case ACS_IDX: /* ambient color sensor */ get_common_features(&als_feature.common_property, report_id); @@ -214,6 +235,7 @@ static u8 get_input_report(u8 current_index, int sensor_idx, int report_id, struct accel3_input_report acc_input; struct gyro_input_report gyro_input; struct hpd_input_report hpd_input; + struct tms_input_report tms_input; struct als_input_report als_input; struct hpd_status hpdstatus; u8 report_size = 0; @@ -247,6 +269,11 @@ static u8 get_input_report(u8 current_index, int sensor_idx, int report_id, memcpy(input_report, &magno_input, sizeof(magno_input)); report_size = sizeof(magno_input); break; +case tms_idx: /* tablet mode switch */ + get_common_inputs(&tms_input.common_property, report_id); + report_size = sizeof(tms_input); + memcpy(input_report, &tms_input, sizeof(tms_input)); + break; case als_idx: /* Als */ case ACS_IDX: /* ambient color sensor */ get_common_inputs(&als_input.common_property, report_id); diff --git a/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.h b/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.h index 882434b1501ff1..afcdac989cb673 100644 --- a/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.h +++ b/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.h @@ -114,4 +114,12 @@ struct hpd_input_report { u8 human_presence; } __packed; +struct tms_feature_report { + struct common_feature_property common_property; +} __packed; + +struct tms_input_report { + struct common_input_property common_property; +} __packed; + #endif diff --git a/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_report_desc.h b/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_report_desc.h index 67ec2d6a417de5..4dc87d6847769f 100644 --- a/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_report_desc.h +++ b/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_report_desc.h @@ -665,6 +665,26 @@ static const u8 als_report_descriptor[] = { 0xC0 /* HID end collection */ }; +/* TABLET MODE SWITCH */ +__maybe_unused // Used by sfh1.0, but not yet implemented in sfh1.1 +static const u8 tms_report_descriptor[] = { +0x06, 0x43, 0xFF, // Usage Page (Vendor Defined 0xFF43) +0x0A, 0x02, 0x02, // Usage (0x0202) +0xA1, 0x01, // Collection (Application) +0x85, 0x11, // Report ID (17) +0x15, 0x00, // Logical Minimum (0) +0x25, 0x01, // Logical Maximum (1) +0x35, 0x00, // Physical Minimum (0) +0x45, 0x01, // Physical Maximum (1) +0x65, 0x00, // Unit (None) +0x55, 0x00, // Unit Exponent (0) +0x75, 0x01, // Report Size (1) +0x95, 0x98, // Report Count (-104) +0x81, 0x03, // Input (Const,Var,Abs,No Wrap,Linear,Preferred State,No Null Position) +0x91, 0x03, // Output (Const,Var,Abs,No Wrap,Linear,Preferred State,No Null Position,Non-volatile) +0xC1, 0x00, // End Collection +}; + /* BIOMETRIC PRESENCE*/ static const u8 hpd_report_descriptor[] = { 0x05, 0x20, /* Usage page */ From 9a1645673c74dee781e9a86c22fbca7d91e844e7 Mon Sep 17 00:00:00 2001 From: Eric Naim Date: Sat, 12 Apr 2025 18:05:53 +0800 Subject: [PATCH 2871/2924] Revert "le9uo v1.10" This reverts commit 1434e648603e88fc00cb80e7b57209bc26919f11. Signed-off-by: Eric Naim --- Documentation/admin-guide/sysctl/vm.rst | 72 ------------ include/linux/mm.h | 8 -- kernel/sysctl.c | 34 ------ mm/Kconfig | 63 ---------- mm/mm_init.c | 1 - mm/vmscan.c | 147 +----------------------- 6 files changed, 3 insertions(+), 322 deletions(-) diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index f2e601171a3e24..8290177b4f7589 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -25,9 +25,6 @@ files can be found in mm/swap.c. Currently, these files are in /proc/sys/vm: - admin_reserve_kbytes -- anon_min_ratio -- clean_low_ratio -- clean_min_ratio - compact_memory - compaction_proactiveness - compact_unevictable_allowed @@ -112,67 +109,6 @@ On x86_64 this is about 128MB. Changing this takes effect whenever an application requests memory. -anon_min_ratio -============== - -This knob provides *hard* protection of anonymous pages. The anonymous pages -on the current node won't be reclaimed under any conditions when their amount -is below vm.anon_min_ratio. - -This knob may be used to prevent excessive swap thrashing when anonymous -memory is low (for example, when memory is going to be overfilled by -compressed data of zram module). - -Setting this value too high (close to 100) can result in inability to -swap and can lead to early OOM under memory pressure. - -The unit of measurement is the percentage of the total memory of the node. - -The default value is 15. - - -clean_low_ratio -================ - -This knob provides *best-effort* protection of clean file pages. The file pages -on the current node won't be reclaimed under memory pressure when the amount of -clean file pages is below vm.clean_low_ratio *unless* we threaten to OOM. - -Protection of clean file pages using this knob may be used when swapping is -still possible to - - prevent disk I/O thrashing under memory pressure; - - improve performance in disk cache-bound tasks under memory pressure. - -Setting it to a high value may result in a early eviction of anonymous pages -into the swap space by attempting to hold the protected amount of clean file -pages in memory. - -The unit of measurement is the percentage of the total memory of the node. - -The default value is 0. - - -clean_min_ratio -================ - -This knob provides *hard* protection of clean file pages. The file pages on the -current node won't be reclaimed under memory pressure when the amount of clean -file pages is below vm.clean_min_ratio. - -Hard protection of clean file pages using this knob may be used to - - prevent disk I/O thrashing under memory pressure even with no free swap space; - - improve performance in disk cache-bound tasks under memory pressure; - - avoid high latency and prevent livelock in near-OOM conditions. - -Setting it to a high value may result in a early out-of-memory condition due to -the inability to reclaim the protected amount of clean file pages when other -types of pages cannot be reclaimed. - -The unit of measurement is the percentage of the total memory of the node. - -The default value is 15. - - compact_memory ============== @@ -1037,14 +973,6 @@ be 133 (x + 2x = 200, 2x = 133.33). At 0, the kernel will not initiate swap until the amount of free and file-backed pages is less than the high watermark in a zone. -This knob has no effect if the amount of clean file pages on the current -node is below vm.clean_low_ratio or vm.clean_min_ratio. In this case, -only anonymous pages can be reclaimed. - -If the number of anonymous pages on the current node is below -vm.anon_min_ratio, then only file pages can be reclaimed with -any vm.swappiness value. - unprivileged_userfaultfd ======================== diff --git a/include/linux/mm.h b/include/linux/mm.h index ece96e871a3b62..e51dba8398f747 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -193,14 +193,6 @@ static inline void __mm_zero_struct_page(struct page *page) extern int sysctl_max_map_count; -extern bool sysctl_workingset_protection; -extern u8 sysctl_anon_min_ratio; -extern u8 sysctl_clean_low_ratio; -extern u8 sysctl_clean_min_ratio; -int vm_workingset_protection_update_handler( - const struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); - extern unsigned long sysctl_user_reserve_kbytes; extern unsigned long sysctl_admin_reserve_kbytes; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index abbb553218cd3d..fe7bec454345fd 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1849,40 +1849,6 @@ static const struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif - { - .procname = "workingset_protection", - .data = &sysctl_workingset_protection, - .maxlen = sizeof(bool), - .mode = 0644, - .proc_handler = &proc_dobool, - }, - { - .procname = "anon_min_ratio", - .data = &sysctl_anon_min_ratio, - .maxlen = sizeof(u8), - .mode = 0644, - .proc_handler = &vm_workingset_protection_update_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE_HUNDRED, - }, - { - .procname = "clean_low_ratio", - .data = &sysctl_clean_low_ratio, - .maxlen = sizeof(u8), - .mode = 0644, - .proc_handler = &vm_workingset_protection_update_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE_HUNDRED, - }, - { - .procname = "clean_min_ratio", - .data = &sysctl_clean_min_ratio, - .maxlen = sizeof(u8), - .mode = 0644, - .proc_handler = &vm_workingset_protection_update_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE_HUNDRED, - }, { .procname = "panic_on_warn", .data = &panic_on_warn, diff --git a/mm/Kconfig b/mm/Kconfig index 5fc1934a30957a..0f76cc26473d58 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -462,69 +462,6 @@ config ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP config ARCH_WANT_HUGETLB_VMEMMAP_PREINIT bool -config ANON_MIN_RATIO - int "Default value for vm.anon_min_ratio" - depends on SYSCTL - range 0 100 - default 15 - help - This option sets the default value for vm.anon_min_ratio sysctl knob. - - The vm.anon_min_ratio sysctl knob provides *hard* protection of - anonymous pages. The anonymous pages on the current node won't be - reclaimed under any conditions when their amount is below - vm.anon_min_ratio. This knob may be used to prevent excessive swap - thrashing when anonymous memory is low (for example, when memory is - going to be overfilled by compressed data of zram module). - - Setting this value too high (close to MemTotal) can result in - inability to swap and can lead to early OOM under memory pressure. - -config CLEAN_LOW_RATIO - int "Default value for vm.clean_low_ratio" - depends on SYSCTL - range 0 100 - default 0 - help - This option sets the default value for vm.clean_low_ratio sysctl knob. - - The vm.clean_low_ratio sysctl knob provides *best-effort* - protection of clean file pages. The file pages on the current node - won't be reclaimed under memory pressure when the amount of clean file - pages is below vm.clean_low_ratio *unless* we threaten to OOM. - Protection of clean file pages using this knob may be used when - swapping is still possible to - - prevent disk I/O thrashing under memory pressure; - - improve performance in disk cache-bound tasks under memory - pressure. - - Setting it to a high value may result in a early eviction of anonymous - pages into the swap space by attempting to hold the protected amount - of clean file pages in memory. - -config CLEAN_MIN_RATIO - int "Default value for vm.clean_min_ratio" - depends on SYSCTL - range 0 100 - default 15 - help - This option sets the default value for vm.clean_min_ratio sysctl knob. - - The vm.clean_min_ratio sysctl knob provides *hard* protection of - clean file pages. The file pages on the current node won't be - reclaimed under memory pressure when the amount of clean file pages is - below vm.clean_min_ratio. Hard protection of clean file pages using - this knob may be used to - - prevent disk I/O thrashing under memory pressure even with no free - swap space; - - improve performance in disk cache-bound tasks under memory - pressure; - - avoid high latency and prevent livelock in near-OOM conditions. - - Setting it to a high value may result in a early out-of-memory condition - due to the inability to reclaim the protected amount of clean file pages - when other types of pages cannot be reclaimed. - config HAVE_MEMBLOCK_PHYS_MAP bool diff --git a/mm/mm_init.c b/mm/mm_init.c index 110d99319ff6f7..eedce9321e13dc 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -2732,7 +2732,6 @@ static void __init mem_init_print_info(void) , K(totalhigh_pages()) #endif ); - printk(KERN_INFO "le9 Unofficial (le9uo) working set protection 1.10 by Masahito Suzuki (forked from hakavlad's original le9 patch)"); } void __init __weak arch_mm_preinit(void) diff --git a/mm/vmscan.c b/mm/vmscan.c index 8a73850e0f992d..8e6fa42ad584fd 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -148,15 +148,6 @@ struct scan_control { /* The file folios on the current node are dangerously low */ unsigned int file_is_tiny:1; - /* The anonymous pages on the current node are below vm.anon_min_ratio */ - unsigned int anon_below_min:1; - - /* The clean file pages on the current node are below vm.clean_low_ratio */ - unsigned int clean_below_low:1; - - /* The clean file pages on the current node are below vm.clean_min_ratio */ - unsigned int clean_below_min:1; - /* Always discard instead of demoting to lower tier memory */ unsigned int no_demotion:1; @@ -206,15 +197,6 @@ struct scan_control { #define prefetchw_prev_lru_folio(_folio, _base, _field) do { } while (0) #endif -bool sysctl_workingset_protection __read_mostly = true; -u8 sysctl_anon_min_ratio __read_mostly = CONFIG_ANON_MIN_RATIO; -u8 sysctl_clean_low_ratio __read_mostly = CONFIG_CLEAN_LOW_RATIO; -u8 sysctl_clean_min_ratio __read_mostly = CONFIG_CLEAN_MIN_RATIO; -static u64 sysctl_anon_min_ratio_kb __read_mostly = 0; -static u64 sysctl_clean_low_ratio_kb __read_mostly = 0; -static u64 sysctl_clean_min_ratio_kb __read_mostly = 0; -static u64 workingset_protection_prev_totalram __read_mostly = 0; - /* * From 0 .. MAX_SWAPPINESS. Higher means more swappy. */ @@ -1169,10 +1151,6 @@ static unsigned int shrink_folio_list(struct list_head *folio_list, if (!sc->may_unmap && folio_mapped(folio)) goto keep_locked; - if (folio_is_file_lru(folio) ? sc->clean_below_min : - (sc->anon_below_min && !sc->clean_below_min)) - goto keep_locked; - /* * The number of dirty pages determines if a node is marked * reclaim_congested. kswapd will stall and start writing @@ -2547,15 +2525,6 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, goto out; } - /* - * Force-scan anon if clean file pages is under vm.clean_low_ratio - * or vm.clean_min_ratio. - */ - if (sc->clean_below_low || sc->clean_below_min) { - scan_balance = SCAN_ANON; - goto out; - } - /* * If there is enough inactive page cache, we do not reclaim * anything from the anonymous working right now. @@ -2673,14 +2642,6 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, BUG(); } - /* - * Hard protection of the working set. - * Don't reclaim anon/file pages when the amount is - * below the watermark of the same type. - */ - if (file ? sc->clean_below_min : sc->anon_below_min) - scan = 0; - nr[lru] = scan; } } @@ -2700,96 +2661,6 @@ static bool can_age_anon_pages(struct pglist_data *pgdat, return can_demote(pgdat->node_id, sc); } -int vm_workingset_protection_update_handler(const struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - int ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); - if (ret || !write) - return ret; - - workingset_protection_prev_totalram = 0; - - return 0; -} - -static void prepare_workingset_protection(pg_data_t *pgdat, struct scan_control *sc) -{ - unsigned long node_mem_total; - struct sysinfo i; - - if (!(sysctl_workingset_protection)) { - sc->anon_below_min = 0; - sc->clean_below_low = 0; - sc->clean_below_min = 0; - return; - } - - if (likely(sysctl_anon_min_ratio || - sysctl_clean_low_ratio || - sysctl_clean_min_ratio)) { -#ifdef CONFIG_NUMA - si_meminfo_node(&i, pgdat->node_id); -#else //CONFIG_NUMA - si_meminfo(&i); -#endif //CONFIG_NUMA - node_mem_total = i.totalram; - - if (unlikely(workingset_protection_prev_totalram != node_mem_total)) { - sysctl_anon_min_ratio_kb = - node_mem_total * sysctl_anon_min_ratio / 100; - sysctl_clean_low_ratio_kb = - node_mem_total * sysctl_clean_low_ratio / 100; - sysctl_clean_min_ratio_kb = - node_mem_total * sysctl_clean_min_ratio / 100; - workingset_protection_prev_totalram = node_mem_total; - } - } - - /* - * Check the number of anonymous pages to protect them from - * reclaiming if their amount is below the specified. - */ - if (sysctl_anon_min_ratio) { - unsigned long reclaimable_anon; - - reclaimable_anon = - node_page_state(pgdat, NR_ACTIVE_ANON) + - node_page_state(pgdat, NR_INACTIVE_ANON) + - node_page_state(pgdat, NR_ISOLATED_ANON); - - sc->anon_below_min = reclaimable_anon < sysctl_anon_min_ratio_kb; - } else - sc->anon_below_min = 0; - - /* - * Check the number of clean file pages to protect them from - * reclaiming if their amount is below the specified. - */ - if (sysctl_clean_low_ratio || sysctl_clean_min_ratio) { - unsigned long reclaimable_file, dirty, clean; - - reclaimable_file = - node_page_state(pgdat, NR_ACTIVE_FILE) + - node_page_state(pgdat, NR_INACTIVE_FILE) + - node_page_state(pgdat, NR_ISOLATED_FILE); - dirty = node_page_state(pgdat, NR_FILE_DIRTY); - /* - * node_page_state() sum can go out of sync since - * all the values are not read at once. - */ - if (likely(reclaimable_file > dirty)) - clean = reclaimable_file - dirty; - else - clean = 0; - - sc->clean_below_low = clean < sysctl_clean_low_ratio_kb; - sc->clean_below_min = clean < sysctl_clean_min_ratio_kb; - } else { - sc->clean_below_low = 0; - sc->clean_below_min = 0; - } -} - #ifdef CONFIG_LRU_GEN #ifdef CONFIG_LRU_GEN_ENABLED @@ -4756,20 +4627,13 @@ static int get_tier_idx(struct lruvec *lruvec, int type) return tier - 1; } -static int get_type_to_scan(struct lruvec *lruvec, struct scan_control *sc, int swappiness) +static int get_type_to_scan(struct lruvec *lruvec, int swappiness) { struct ctrl_pos sp, pv; if (swappiness <= MIN_SWAPPINESS + 1) return LRU_GEN_FILE; - if (sc->clean_below_min) - return LRU_GEN_ANON; - if (sc->anon_below_min) - return LRU_GEN_FILE; - if (sc->clean_below_low) - return LRU_GEN_ANON; - if (swappiness >= MAX_SWAPPINESS) return LRU_GEN_ANON; /* @@ -4786,7 +4650,7 @@ static int isolate_folios(struct lruvec *lruvec, struct scan_control *sc, int sw int *type_scanned, struct list_head *list) { int i; - int type = get_type_to_scan(lruvec, sc, swappiness); + int type = get_type_to_scan(lruvec, swappiness); for_each_evictable_type(i, swappiness) { int scanned; @@ -5028,9 +4892,6 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) unsigned long reclaimed = sc->nr_reclaimed; struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct pglist_data *pgdat = lruvec_pgdat(lruvec); - bool can_swap = get_swappiness(lruvec, sc); - - prepare_workingset_protection(pgdat, sc); /* lru_gen_age_node() called mem_cgroup_calculate_protection() */ if (mem_cgroup_below_min(NULL, memcg)) @@ -5057,7 +4918,7 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) if (success && mem_cgroup_online(memcg)) return MEMCG_LRU_YOUNG; - if (!success && (!can_swap || lruvec_is_sizable(lruvec, sc))) + if (!success && lruvec_is_sizable(lruvec, sc)) return 0; /* one retry if offlined or too small */ @@ -6170,8 +6031,6 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) prepare_scan_control(pgdat, sc); - prepare_workingset_protection(pgdat, sc); - shrink_node_memcgs(pgdat, sc); flush_reclaim_state(sc); From 50148a95afa7e7a3e8250e7877132c681b931289 Mon Sep 17 00:00:00 2001 From: Eric Naim Date: Sat, 12 Apr 2025 18:06:10 +0800 Subject: [PATCH 2872/2924] le9uo 1.8 Signed-off-by: Eric Naim --- Documentation/admin-guide/sysctl/vm.rst | 72 ++++++++++++ include/linux/mm.h | 8 ++ kernel/sysctl.c | 34 ++++++ mm/Kconfig | 63 +++++++++++ mm/mm_init.c | 1 + mm/vmscan.c | 144 +++++++++++++++++++++++- 6 files changed, 320 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index 8290177b4f7589..f2e601171a3e24 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -25,6 +25,9 @@ files can be found in mm/swap.c. Currently, these files are in /proc/sys/vm: - admin_reserve_kbytes +- anon_min_ratio +- clean_low_ratio +- clean_min_ratio - compact_memory - compaction_proactiveness - compact_unevictable_allowed @@ -109,6 +112,67 @@ On x86_64 this is about 128MB. Changing this takes effect whenever an application requests memory. +anon_min_ratio +============== + +This knob provides *hard* protection of anonymous pages. The anonymous pages +on the current node won't be reclaimed under any conditions when their amount +is below vm.anon_min_ratio. + +This knob may be used to prevent excessive swap thrashing when anonymous +memory is low (for example, when memory is going to be overfilled by +compressed data of zram module). + +Setting this value too high (close to 100) can result in inability to +swap and can lead to early OOM under memory pressure. + +The unit of measurement is the percentage of the total memory of the node. + +The default value is 15. + + +clean_low_ratio +================ + +This knob provides *best-effort* protection of clean file pages. The file pages +on the current node won't be reclaimed under memory pressure when the amount of +clean file pages is below vm.clean_low_ratio *unless* we threaten to OOM. + +Protection of clean file pages using this knob may be used when swapping is +still possible to + - prevent disk I/O thrashing under memory pressure; + - improve performance in disk cache-bound tasks under memory pressure. + +Setting it to a high value may result in a early eviction of anonymous pages +into the swap space by attempting to hold the protected amount of clean file +pages in memory. + +The unit of measurement is the percentage of the total memory of the node. + +The default value is 0. + + +clean_min_ratio +================ + +This knob provides *hard* protection of clean file pages. The file pages on the +current node won't be reclaimed under memory pressure when the amount of clean +file pages is below vm.clean_min_ratio. + +Hard protection of clean file pages using this knob may be used to + - prevent disk I/O thrashing under memory pressure even with no free swap space; + - improve performance in disk cache-bound tasks under memory pressure; + - avoid high latency and prevent livelock in near-OOM conditions. + +Setting it to a high value may result in a early out-of-memory condition due to +the inability to reclaim the protected amount of clean file pages when other +types of pages cannot be reclaimed. + +The unit of measurement is the percentage of the total memory of the node. + +The default value is 15. + + compact_memory ============== @@ -973,6 +1037,14 @@ be 133 (x + 2x = 200, 2x = 133.33). At 0, the kernel will not initiate swap until the amount of free and file-backed pages is less than the high watermark in a zone. +This knob has no effect if the amount of clean file pages on the current +node is below vm.clean_low_ratio or vm.clean_min_ratio. In this case, +only anonymous pages can be reclaimed. + +If the number of anonymous pages on the current node is below +vm.anon_min_ratio, then only file pages can be reclaimed with +any vm.swappiness value. + unprivileged_userfaultfd ======================== diff --git a/include/linux/mm.h b/include/linux/mm.h index e51dba8398f747..ece96e871a3b62 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -193,6 +193,14 @@ static inline void __mm_zero_struct_page(struct page *page) extern int sysctl_max_map_count; +extern bool sysctl_workingset_protection; +extern u8 sysctl_anon_min_ratio; +extern u8 sysctl_clean_low_ratio; +extern u8 sysctl_clean_min_ratio; +int vm_workingset_protection_update_handler( + const struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); + extern unsigned long sysctl_user_reserve_kbytes; extern unsigned long sysctl_admin_reserve_kbytes; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index fe7bec454345fd..abbb553218cd3d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1849,6 +1849,40 @@ static const struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif + { + .procname = "workingset_protection", + .data = &sysctl_workingset_protection, + .maxlen = sizeof(bool), + .mode = 0644, + .proc_handler = &proc_dobool, + }, + { + .procname = "anon_min_ratio", + .data = &sysctl_anon_min_ratio, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = &vm_workingset_protection_update_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_HUNDRED, + }, + { + .procname = "clean_low_ratio", + .data = &sysctl_clean_low_ratio, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = &vm_workingset_protection_update_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_HUNDRED, + }, + { + .procname = "clean_min_ratio", + .data = &sysctl_clean_min_ratio, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = &vm_workingset_protection_update_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_HUNDRED, + }, { .procname = "panic_on_warn", .data = &panic_on_warn, diff --git a/mm/Kconfig b/mm/Kconfig index 0f76cc26473d58..5fc1934a30957a 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -462,6 +462,69 @@ config ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP config ARCH_WANT_HUGETLB_VMEMMAP_PREINIT bool +config ANON_MIN_RATIO + int "Default value for vm.anon_min_ratio" + depends on SYSCTL + range 0 100 + default 15 + help + This option sets the default value for vm.anon_min_ratio sysctl knob. + + The vm.anon_min_ratio sysctl knob provides *hard* protection of + anonymous pages. The anonymous pages on the current node won't be + reclaimed under any conditions when their amount is below + vm.anon_min_ratio. This knob may be used to prevent excessive swap + thrashing when anonymous memory is low (for example, when memory is + going to be overfilled by compressed data of zram module). + + Setting this value too high (close to MemTotal) can result in + inability to swap and can lead to early OOM under memory pressure. + +config CLEAN_LOW_RATIO + int "Default value for vm.clean_low_ratio" + depends on SYSCTL + range 0 100 + default 0 + help + This option sets the default value for vm.clean_low_ratio sysctl knob. + + The vm.clean_low_ratio sysctl knob provides *best-effort* + protection of clean file pages. The file pages on the current node + won't be reclaimed under memory pressure when the amount of clean file + pages is below vm.clean_low_ratio *unless* we threaten to OOM. + Protection of clean file pages using this knob may be used when + swapping is still possible to + - prevent disk I/O thrashing under memory pressure; + - improve performance in disk cache-bound tasks under memory + pressure. + + Setting it to a high value may result in a early eviction of anonymous + pages into the swap space by attempting to hold the protected amount + of clean file pages in memory. + +config CLEAN_MIN_RATIO + int "Default value for vm.clean_min_ratio" + depends on SYSCTL + range 0 100 + default 15 + help + This option sets the default value for vm.clean_min_ratio sysctl knob. + + The vm.clean_min_ratio sysctl knob provides *hard* protection of + clean file pages. The file pages on the current node won't be + reclaimed under memory pressure when the amount of clean file pages is + below vm.clean_min_ratio. Hard protection of clean file pages using + this knob may be used to + - prevent disk I/O thrashing under memory pressure even with no free + swap space; + - improve performance in disk cache-bound tasks under memory + pressure; + - avoid high latency and prevent livelock in near-OOM conditions. + + Setting it to a high value may result in a early out-of-memory condition + due to the inability to reclaim the protected amount of clean file pages + when other types of pages cannot be reclaimed. + config HAVE_MEMBLOCK_PHYS_MAP bool diff --git a/mm/mm_init.c b/mm/mm_init.c index eedce9321e13dc..78f87faf839478 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -2732,6 +2732,7 @@ static void __init mem_init_print_info(void) , K(totalhigh_pages()) #endif ); + printk(KERN_INFO "le9 Unofficial (le9uo) working set protection 1.8 by Masahito Suzuki (forked from hakavlad's original le9 patch)"); } void __init __weak arch_mm_preinit(void) diff --git a/mm/vmscan.c b/mm/vmscan.c index 8e6fa42ad584fd..4f991a6ae689e9 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -148,6 +148,15 @@ struct scan_control { /* The file folios on the current node are dangerously low */ unsigned int file_is_tiny:1; + /* The anonymous pages on the current node are below vm.anon_min_ratio */ + unsigned int anon_below_min:1; + + /* The clean file pages on the current node are below vm.clean_low_ratio */ + unsigned int clean_below_low:1; + + /* The clean file pages on the current node are below vm.clean_min_ratio */ + unsigned int clean_below_min:1; + /* Always discard instead of demoting to lower tier memory */ unsigned int no_demotion:1; @@ -197,6 +206,15 @@ struct scan_control { #define prefetchw_prev_lru_folio(_folio, _base, _field) do { } while (0) #endif +bool sysctl_workingset_protection __read_mostly = true; +u8 sysctl_anon_min_ratio __read_mostly = CONFIG_ANON_MIN_RATIO; +u8 sysctl_clean_low_ratio __read_mostly = CONFIG_CLEAN_LOW_RATIO; +u8 sysctl_clean_min_ratio __read_mostly = CONFIG_CLEAN_MIN_RATIO; +static u64 sysctl_anon_min_ratio_kb __read_mostly = 0; +static u64 sysctl_clean_low_ratio_kb __read_mostly = 0; +static u64 sysctl_clean_min_ratio_kb __read_mostly = 0; +static u64 workingset_protection_prev_totalram __read_mostly = 0; + /* * From 0 .. MAX_SWAPPINESS. Higher means more swappy. */ @@ -1151,6 +1169,10 @@ static unsigned int shrink_folio_list(struct list_head *folio_list, if (!sc->may_unmap && folio_mapped(folio)) goto keep_locked; + if (folio_is_file_lru(folio) ? sc->clean_below_min : + (sc->anon_below_min && !sc->clean_below_min)) + goto keep_locked; + /* * The number of dirty pages determines if a node is marked * reclaim_congested. kswapd will stall and start writing @@ -2525,6 +2547,15 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, goto out; } + /* + * Force-scan anon if clean file pages is under vm.clean_low_ratio + * or vm.clean_min_ratio. + */ + if (sc->clean_below_low || sc->clean_below_min) { + scan_balance = SCAN_ANON; + goto out; + } + /* * If there is enough inactive page cache, we do not reclaim * anything from the anonymous working right now. @@ -2642,6 +2673,14 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, BUG(); } + /* + * Hard protection of the working set. + * Don't reclaim anon/file pages when the amount is + * below the watermark of the same type. + */ + if (file ? sc->clean_below_min : sc->anon_below_min) + scan = 0; + nr[lru] = scan; } } @@ -2661,6 +2700,96 @@ static bool can_age_anon_pages(struct pglist_data *pgdat, return can_demote(pgdat->node_id, sc); } +int vm_workingset_protection_update_handler(const struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); + if (ret || !write) + return ret; + + workingset_protection_prev_totalram = 0; + + return 0; +} + +static void prepare_workingset_protection(pg_data_t *pgdat, struct scan_control *sc) +{ + unsigned long node_mem_total; + struct sysinfo i; + + if (!(sysctl_workingset_protection)) { + sc->anon_below_min = 0; + sc->clean_below_low = 0; + sc->clean_below_min = 0; + return; + } + + if (likely(sysctl_anon_min_ratio || + sysctl_clean_low_ratio || + sysctl_clean_min_ratio)) { +#ifdef CONFIG_NUMA + si_meminfo_node(&i, pgdat->node_id); +#else //CONFIG_NUMA + si_meminfo(&i); +#endif //CONFIG_NUMA + node_mem_total = i.totalram; + + if (unlikely(workingset_protection_prev_totalram != node_mem_total)) { + sysctl_anon_min_ratio_kb = + node_mem_total * sysctl_anon_min_ratio / 100; + sysctl_clean_low_ratio_kb = + node_mem_total * sysctl_clean_low_ratio / 100; + sysctl_clean_min_ratio_kb = + node_mem_total * sysctl_clean_min_ratio / 100; + workingset_protection_prev_totalram = node_mem_total; + } + } + + /* + * Check the number of anonymous pages to protect them from + * reclaiming if their amount is below the specified. + */ + if (sysctl_anon_min_ratio) { + unsigned long reclaimable_anon; + + reclaimable_anon = + node_page_state(pgdat, NR_ACTIVE_ANON) + + node_page_state(pgdat, NR_INACTIVE_ANON) + + node_page_state(pgdat, NR_ISOLATED_ANON); + + sc->anon_below_min = reclaimable_anon < sysctl_anon_min_ratio_kb; + } else + sc->anon_below_min = 0; + + /* + * Check the number of clean file pages to protect them from + * reclaiming if their amount is below the specified. + */ + if (sysctl_clean_low_ratio || sysctl_clean_min_ratio) { + unsigned long reclaimable_file, dirty, clean; + + reclaimable_file = + node_page_state(pgdat, NR_ACTIVE_FILE) + + node_page_state(pgdat, NR_INACTIVE_FILE) + + node_page_state(pgdat, NR_ISOLATED_FILE); + dirty = node_page_state(pgdat, NR_FILE_DIRTY); + /* + * node_page_state() sum can go out of sync since + * all the values are not read at once. + */ + if (likely(reclaimable_file > dirty)) + clean = reclaimable_file - dirty; + else + clean = 0; + + sc->clean_below_low = clean < sysctl_clean_low_ratio_kb; + sc->clean_below_min = clean < sysctl_clean_min_ratio_kb; + } else { + sc->clean_below_low = 0; + sc->clean_below_min = 0; + } +} + #ifdef CONFIG_LRU_GEN #ifdef CONFIG_LRU_GEN_ENABLED @@ -4627,13 +4756,20 @@ static int get_tier_idx(struct lruvec *lruvec, int type) return tier - 1; } -static int get_type_to_scan(struct lruvec *lruvec, int swappiness) +static int get_type_to_scan(struct lruvec *lruvec, struct scan_control *sc, int swappiness) { struct ctrl_pos sp, pv; if (swappiness <= MIN_SWAPPINESS + 1) return LRU_GEN_FILE; + if (sc->clean_below_min) + return LRU_GEN_ANON; + if (sc->anon_below_min) + return LRU_GEN_FILE; + if (sc->clean_below_low) + return LRU_GEN_ANON; + if (swappiness >= MAX_SWAPPINESS) return LRU_GEN_ANON; /* @@ -4650,7 +4786,7 @@ static int isolate_folios(struct lruvec *lruvec, struct scan_control *sc, int sw int *type_scanned, struct list_head *list) { int i; - int type = get_type_to_scan(lruvec, swappiness); + int type = get_type_to_scan(lruvec, sc, swappiness); for_each_evictable_type(i, swappiness) { int scanned; @@ -4893,6 +5029,8 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct pglist_data *pgdat = lruvec_pgdat(lruvec); + prepare_workingset_protection(pgdat, sc); + /* lru_gen_age_node() called mem_cgroup_calculate_protection() */ if (mem_cgroup_below_min(NULL, memcg)) return MEMCG_LRU_YOUNG; @@ -6031,6 +6169,8 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) prepare_scan_control(pgdat, sc); + prepare_workingset_protection(pgdat, sc); + shrink_node_memcgs(pgdat, sc); flush_reclaim_state(sc); From 072e5fc948e7bf43d7858fb39276cea40c91a840 Mon Sep 17 00:00:00 2001 From: Raag Jadav Date: Wed, 12 Mar 2025 14:29:09 +0530 Subject: [PATCH 2873/2924] drm/xe/hwmon: expose fan speed Add hwmon support for fan1_input, fan2_input and fan3_input attributes, which will expose fan speed of respective channels in RPM when supported by hardware. With this in place we can monitor fan speed using lm-sensors tool. v2: Rely on platform checks instead of mailbox error (Aravind, Rodrigo) v3: Introduce has_fan_control flag (Rodrigo) Signed-off-by: Raag Jadav Reviewed-by: Andi Shyti --- .../ABI/testing/sysfs-driver-intel-xe-hwmon | 24 ++++ drivers/gpu/drm/xe/regs/xe_pcode_regs.h | 3 + drivers/gpu/drm/xe/xe_device_types.h | 2 + drivers/gpu/drm/xe/xe_hwmon.c | 125 +++++++++++++++++- drivers/gpu/drm/xe/xe_pci.c | 4 + drivers/gpu/drm/xe/xe_pcode_api.h | 3 + 6 files changed, 160 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon index cb207c79680d9a..6fbab98fb639df 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon +++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon @@ -124,3 +124,27 @@ Contact: intel-xe@lists.freedesktop.org Description: RO. VRAM temperature in millidegree Celsius. Only supported for particular Intel Xe graphics platforms. + +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/fan1_input +Date: March 2025 +KernelVersion: 6.14 +Contact: intel-xe@lists.freedesktop.org +Description: RO. Fan 1 speed in RPM. + + Only supported for particular Intel Xe graphics platforms. + +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/fan2_input +Date: March 2025 +KernelVersion: 6.14 +Contact: intel-xe@lists.freedesktop.org +Description: RO. Fan 2 speed in RPM. + + Only supported for particular Intel Xe graphics platforms. + +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/fan3_input +Date: March 2025 +KernelVersion: 6.14 +Contact: intel-xe@lists.freedesktop.org +Description: RO. Fan 3 speed in RPM. + + Only supported for particular Intel Xe graphics platforms. diff --git a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h index 8846eb9ce2a40b..c7d5d782e3f95c 100644 --- a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h @@ -21,6 +21,9 @@ #define BMG_PACKAGE_POWER_SKU XE_REG(0x138098) #define BMG_PACKAGE_POWER_SKU_UNIT XE_REG(0x1380dc) #define BMG_PACKAGE_ENERGY_STATUS XE_REG(0x138120) +#define BMG_FAN_1_SPEED XE_REG(0x138140) +#define BMG_FAN_2_SPEED XE_REG(0x138170) +#define BMG_FAN_3_SPEED XE_REG(0x1381a0) #define BMG_VRAM_TEMPERATURE XE_REG(0x1382c0) #define BMG_PACKAGE_TEMPERATURE XE_REG(0x138434) #define BMG_PACKAGE_RAPL_LIMIT XE_REG(0x138440) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 0482f26aa48033..0564164935c609 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -314,6 +314,8 @@ struct xe_device { u8 has_atomic_enable_pte_bit:1; /** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */ u8 has_device_atomics_on_smem:1; + /** @info.has_fan_control: Device supports fan control */ + u8 has_fan_control:1; /** @info.has_flat_ccs: Whether flat CCS metadata is used */ u8 has_flat_ccs:1; /** @info.has_heci_cscfi: device has heci cscfi */ diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 48d80ffdf7bb9c..eb293aec36a0fb 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -27,6 +28,7 @@ enum xe_hwmon_reg { REG_PKG_POWER_SKU_UNIT, REG_GT_PERF_STATUS, REG_PKG_ENERGY_STATUS, + REG_FAN_SPEED, }; enum xe_hwmon_reg_operation { @@ -42,6 +44,13 @@ enum xe_hwmon_channel { CHANNEL_MAX, }; +enum xe_fan_channel { + FAN_1, + FAN_2, + FAN_3, + FAN_MAX, +}; + /* * SF_* - scale factors for particular quantities according to hwmon spec. */ @@ -61,6 +70,16 @@ struct xe_hwmon_energy_info { long accum_energy; }; +/** + * struct xe_hwmon_fan_info - to cache previous fan reading + */ +struct xe_hwmon_fan_info { + /** @reg_val_prev: previous fan reg val */ + u32 reg_val_prev; + /** @time_prev: previous timestamp */ + u64 time_prev; +}; + /** * struct xe_hwmon - xe hwmon data structure */ @@ -79,6 +98,8 @@ struct xe_hwmon { int scl_shift_time; /** @ei: Energy info for energyN_input */ struct xe_hwmon_energy_info ei[CHANNEL_MAX]; + /** @fi: Fan info for fanN_input */ + struct xe_hwmon_fan_info fi[FAN_MAX]; }; static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg, @@ -144,6 +165,14 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg return PCU_CR_PACKAGE_ENERGY_STATUS; } break; + case REG_FAN_SPEED: + if (channel == FAN_1) + return BMG_FAN_1_SPEED; + else if (channel == FAN_2) + return BMG_FAN_2_SPEED; + else if (channel == FAN_3) + return BMG_FAN_3_SPEED; + break; default: drm_warn(&xe->drm, "Unknown xe hwmon reg id: %d\n", hwmon_reg); break; @@ -454,6 +483,7 @@ static const struct hwmon_channel_info * const hwmon_info[] = { HWMON_CHANNEL_INFO(curr, HWMON_C_LABEL, HWMON_C_CRIT | HWMON_C_LABEL), HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL), HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT | HWMON_E_LABEL, HWMON_E_INPUT | HWMON_E_LABEL), + HWMON_CHANNEL_INFO(fan, HWMON_F_INPUT, HWMON_F_INPUT, HWMON_F_INPUT), NULL }; @@ -480,6 +510,19 @@ static int xe_hwmon_pcode_write_i1(const struct xe_hwmon *hwmon, u32 uval) (uval & POWER_SETUP_I1_DATA_MASK)); } +static int xe_hwmon_pcode_read_fan_control(const struct xe_hwmon *hwmon, u32 subcmd, u32 *uval) +{ + struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); + + /* Platforms that don't return correct value */ + if (hwmon->xe->info.platform == XE_DG2 && subcmd == FSC_READ_NUM_FANS) { + *uval = 2; + return 0; + } + + return xe_pcode_read(root_tile, PCODE_MBOX(FAN_SPEED_CONTROL, subcmd, 0), uval, NULL); +} + static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, int channel, long *value, u32 scale_factor) { @@ -705,6 +748,75 @@ xe_hwmon_energy_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) } } +static umode_t +xe_hwmon_fan_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) +{ + u32 uval; + + if (!hwmon->xe->info.has_fan_control) + return 0; + + switch (attr) { + case hwmon_fan_input: + if (xe_hwmon_pcode_read_fan_control(hwmon, FSC_READ_NUM_FANS, &uval)) + return 0; + + return channel < uval ? 0444 : 0; + default: + return 0; + } +} + +static int +xe_hwmon_fan_input_read(struct xe_hwmon *hwmon, int channel, long *val) +{ + struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); + struct xe_hwmon_fan_info *fi = &hwmon->fi[channel]; + u64 rotations, time_now, time; + u32 reg_val; + int ret = 0; + + mutex_lock(&hwmon->hwmon_lock); + + reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_FAN_SPEED, channel)); + time_now = get_jiffies_64(); + + /* + * HW register value is accumulated count of pulses from PWM fan with the scale + * of 2 pulses per rotation. + */ + rotations = (reg_val - fi->reg_val_prev) / 2; + + time = jiffies_delta_to_msecs(time_now - fi->time_prev); + if (unlikely(!time)) { + ret = -EAGAIN; + goto unlock; + } + + /* + * Calculate fan speed in RPM by time averaging two subsequent readings in minutes. + * RPM = number of rotations * msecs per minute / time in msecs + */ + *val = DIV_ROUND_UP_ULL(rotations * (MSEC_PER_SEC * 60), time); + + fi->reg_val_prev = reg_val; + fi->time_prev = time_now; +unlock: + mutex_unlock(&hwmon->hwmon_lock); + return ret; +} + +static int +xe_hwmon_fan_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) +{ + switch (attr) { + case hwmon_fan_input: + return xe_hwmon_fan_input_read(hwmon, channel, val); + default: + return -EOPNOTSUPP; + } +} + static umode_t xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, u32 attr, int channel) @@ -730,6 +842,9 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, case hwmon_energy: ret = xe_hwmon_energy_is_visible(hwmon, attr, channel); break; + case hwmon_fan: + ret = xe_hwmon_fan_is_visible(hwmon, attr, channel); + break; default: ret = 0; break; @@ -765,6 +880,9 @@ xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, case hwmon_energy: ret = xe_hwmon_energy_read(hwmon, attr, channel, val); break; + case hwmon_fan: + ret = xe_hwmon_fan_read(hwmon, attr, channel, val); + break; default: ret = -EOPNOTSUPP; break; @@ -842,7 +960,7 @@ static void xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon) { struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); - long energy; + long energy, fan_speed; u64 val_sku_unit = 0; int channel; struct xe_reg pkg_power_sku_unit; @@ -866,6 +984,11 @@ xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon) for (channel = 0; channel < CHANNEL_MAX; channel++) if (xe_hwmon_is_visible(hwmon, hwmon_energy, hwmon_energy_input, channel)) xe_hwmon_energy_get(hwmon, channel, &energy); + + /* Initialize 'struct xe_hwmon_fan_info' with initial fan register reading. */ + for (channel = 0; channel < FAN_MAX; channel++) + if (xe_hwmon_is_visible(hwmon, hwmon_fan, hwmon_fan_input, channel)) + xe_hwmon_fan_input_read(hwmon, channel, &fan_speed); } static void xe_hwmon_mutex_destroy(void *arg) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 30f7ce06c89690..57c607337848a3 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -62,6 +62,7 @@ struct xe_device_desc { u8 is_dgfx:1; u8 has_display:1; + u8 has_fan_control:1; u8 has_heci_gscfi:1; u8 has_heci_cscfi:1; u8 has_llc:1; @@ -303,6 +304,7 @@ static const struct xe_device_desc dg2_desc = { DG2_FEATURES, .has_display = true, + .has_fan_control = true, }; static const __maybe_unused struct xe_device_desc pvc_desc = { @@ -337,6 +339,7 @@ static const struct xe_device_desc bmg_desc = { PLATFORM(BATTLEMAGE), .dma_mask_size = 46, .has_display = true, + .has_fan_control = true, .has_heci_cscfi = 1, }; @@ -576,6 +579,7 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.dma_mask_size = desc->dma_mask_size; xe->info.is_dgfx = desc->is_dgfx; + xe->info.has_fan_control = desc->has_fan_control; xe->info.has_heci_gscfi = desc->has_heci_gscfi; xe->info.has_heci_cscfi = desc->has_heci_cscfi; xe->info.has_llc = desc->has_llc; diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h index 2bae9afdbd352a..e622ae17f08dd9 100644 --- a/drivers/gpu/drm/xe/xe_pcode_api.h +++ b/drivers/gpu/drm/xe/xe_pcode_api.h @@ -49,6 +49,9 @@ /* Domain IDs (param2) */ #define PCODE_MBOX_DOMAIN_HBM 0x2 +#define FAN_SPEED_CONTROL 0x7D +#define FSC_READ_NUM_FANS 0x4 + #define PCODE_SCRATCH(x) XE_REG(0x138320 + ((x) * 4)) /* PCODE_SCRATCH0 */ #define AUXINFO_REG_OFFSET REG_GENMASK(17, 15) From ad996adf00a3eb66e8e818ee05a4bbec7443f81b Mon Sep 17 00:00:00 2001 From: Rong Xu Date: Sat, 19 Apr 2025 18:02:14 -0700 Subject: [PATCH 2874/2924] kbuild: distributed build support for Clang ThinLTO Add distributed ThinLTO build support for the Linux kernel. This new mode offers several advantages: (1) Increased flexibility in handling user-specified build options. (2) Improved user-friendliness for developers. (3) Greater convenience for integrating with objtool and livepatch. Note that "distributed" in this context refers to a term that differentiates in-process ThinLTO builds by invoking backend compilation through the linker, not necessarily building in distributed environments. Distributed ThinLTO is enabled via the `CONFIG_LTO_CLANG_THIN_DIST` Kconfig option. For example: > make LLVM=1 defconfig > scripts/config -e LTO_CLANG_THIN_DIST > make LLVM=1 oldconfig > make LLVM=1 vmlinux -j <..> The implementation changes the top-level Makefile with a macro for generating `vmlinux.o` for distributed ThinLTO builds. It uses the existing Kbuild infrastructure to perform two recursive passes through the subdirectories. The first pass generates LLVM IR object files, similar to in-process ThinLTO. Following the thin-link stage, a second pass compiles these IR files into the final native object files. The build rules and actions for this two-pass process are primarily implemented in `scripts/Makefile.build`. Currently, this patch focuses on building the main kernel image (`vmlinux`) only. Support for building kernel modules using this method is planned for a subsequent patch. Tested on the following arch: x86, arm64, loongarch, and riscv. Some implementation details can be found here: https://discourse.llvm.org/t/rfc-distributed-thinlto-build-for-kernel/85934 Signed-off-by: Rong Xu --- MAINTAINERS | 5 +++ Makefile | 40 ++++++++++++++++++++--- arch/Kconfig | 12 +++++++ scripts/Makefile.build | 52 +++++++++++++++++++++++++++--- scripts/Makefile.lib | 7 +++- scripts/Makefile.vmlinux_o | 16 +++++++--- scripts/Makefile.vmlinux_thinlink | 53 +++++++++++++++++++++++++++++++ scripts/head-object-list.txt | 1 + 8 files changed, 171 insertions(+), 15 deletions(-) create mode 100644 scripts/Makefile.vmlinux_thinlink diff --git a/MAINTAINERS b/MAINTAINERS index dd844ac8d9107b..4feb3b9f8f739d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5790,6 +5790,11 @@ F: scripts/Makefile.clang F: scripts/clang-tools/ K: \b(?i:clang|llvm)\b +CLANG/LLVM THINLTO DISTRIBUTED BUILD +M: Rong Xu +S: Supported +F: scripts/Makefile.vmlinux_thinlink + CLK API M: Russell King L: linux-clk@vger.kernel.org diff --git a/Makefile b/Makefile index 2d9d46275c4bf9..ca7a269a08e372 100644 --- a/Makefile +++ b/Makefile @@ -298,7 +298,8 @@ no-dot-config-targets := $(clean-targets) \ outputmakefile rustavailable rustfmt rustfmtcheck no-sync-config-targets := $(no-dot-config-targets) %install modules_sign kernelrelease \ image_name -single-targets := %.a %.i %.ko %.lds %.ll %.lst %.mod %.o %.rsi %.s %/ +single-targets := %.a %.i %.ko %.lds %.ll %.lst %.mod %.o %.rsi %.s %.final_o \ + %.final_a %.o.thinlto.bc %/ config-build := mixed-build := @@ -999,10 +1000,10 @@ export CC_FLAGS_SCS endif ifdef CONFIG_LTO_CLANG -ifdef CONFIG_LTO_CLANG_THIN -CC_FLAGS_LTO := -flto=thin -fsplit-lto-unit -else +ifdef CONFIG_LTO_CLANG_FULL CC_FLAGS_LTO := -flto +else # for CONFIG_LTO_CLANG_THIN or CONFIG_LTO_CLANG_THIN_DIST +CC_FLAGS_LTO := -flto=thin -fsplit-lto-unit endif CC_FLAGS_LTO += -fvisibility=hidden @@ -1221,8 +1222,34 @@ vmlinux.a: $(KBUILD_VMLINUX_OBJS) scripts/head-object-list.txt FORCE $(call if_changed,ar_vmlinux.a) PHONY += vmlinux_o +ifdef CONFIG_LTO_CLANG_THIN_DIST +vmlinux.thinlink: vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE + $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.vmlinux_thinlink +targets += vmlinux.thinlink + +vmlinux_final_a := $(patsubst %.a,%.final_a,$(KBUILD_VMLINUX_OBJS)) +quiet_cmd_ar_vmlinux.final_a = AR $@ + cmd_ar_vmlinux.final_a = \ + rm -f $@; \ + $(AR) cDPrST $@ $(vmlinux_final_a); \ + $(AR) mPiT $$($(AR) t $@ | sed -n 1p) $@ $$($(AR) t $@ | grep -F -f $(srctree)/scripts/head-object-list.txt) + +define rule_gen_vmlinux_final_a + +$(Q)$(MAKE) $(build)=. need-builtin=1 thinlto_final_pass=1 need-modorder=1 built-in.final_a + $(call cmd_and_savecmd,ar_vmlinux.final_a) +endef + +vmlinux.final_a: vmlinux.thinlink scripts/head-object-list.txt FORCE + $(call if_changed_rule,gen_vmlinux_final_a) + +targets += vmlinux.final_a + +vmlinux_o: vmlinux.final_a + $(Q)$(MAKE) thinlto_final_pass=1 -f $(srctree)/scripts/Makefile.vmlinux_o +else vmlinux_o: vmlinux.a $(KBUILD_VMLINUX_LIBS) $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.vmlinux_o +endif vmlinux.o modules.builtin.modinfo modules.builtin: vmlinux_o @: @@ -1580,7 +1607,8 @@ CLEAN_FILES += vmlinux.symvers modules-only.symvers \ modules.builtin.ranges vmlinux.o.map vmlinux.unstripped \ compile_commands.json rust/test \ rust-project.json .vmlinux.objs .vmlinux.export.c \ - .builtin-dtbs-list .builtin-dtb.S + .builtin-dtbs-list .builtin-dtb.S \ + .vmlinux_thinlto_bc_files vmlinux.thinlink # Directories & files removed with 'make mrproper' MRPROPER_FILES += include/config include/generated \ @@ -2031,6 +2059,8 @@ clean: $(clean-dirs) -o -name '*.symtypes' -o -name 'modules.order' \ -o -name '*.c.[012]*.*' \ -o -name '*.ll' \ + -o -name '*.final_a' -o -name '*.final_o' \ + -o -name '*.o.thinlto.bc' \ -o -name '*.gcno' \ \) -type f -print \ -o -name '.tmp_*' -print \ diff --git a/arch/Kconfig b/arch/Kconfig index b0adb665041f17..cbeeeb9b076d8f 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -810,6 +810,18 @@ config LTO_CLANG_THIN https://clang.llvm.org/docs/ThinLTO.html If unsure, say Y. + +config LTO_CLANG_THIN_DIST + bool "Clang ThinLTO in distributed mode (EXPERIMENTAL)" + depends on HAS_LTO_CLANG && ARCH_SUPPORTS_LTO_CLANG_THIN + select LTO_CLANG + help + This option enables Clang's ThinLTO in distributed build mode. + In this mode, the linker performs the thin-link, generating + ThinLTO index files. Subsequently, the build system explicitly + invokes ThinLTO backend compilation using these index files + and pre-linked IR objects. The resulting final object files + are with the .final_o suffix. endchoice config ARCH_SUPPORTS_AUTOFDO_CLANG diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 13dcd86e74ca83..87259b0e2bfc88 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -50,18 +50,23 @@ endif # =========================================================================== +builtin_suffix := $(if $(filter %.final_a, $(MAKECMDGOALS)),final_a,a) +ifeq ($(thinlto_final_pass),1) +builtin_suffix := final_a +endif + # subdir-builtin and subdir-modorder may contain duplications. Use $(sort ...) -subdir-builtin := $(sort $(filter %/built-in.a, $(real-obj-y))) +subdir-builtin := $(sort $(filter %/built-in.$(builtin_suffix), $(real-obj-y))) subdir-modorder := $(sort $(filter %/modules.order, $(obj-m))) targets-for-builtin := $(extra-y) ifneq ($(strip $(lib-y) $(lib-m) $(lib-)),) -targets-for-builtin += $(obj)/lib.a +targets-for-builtin += $(obj)/lib.$(builtin_suffix) endif ifdef need-builtin -targets-for-builtin += $(obj)/built-in.a +targets-for-builtin += $(obj)/built-in.$(builtin_suffix) endif targets-for-modules := $(foreach x, o mod, \ @@ -337,6 +342,10 @@ $(obj)/%.o: $(obj)/%.S FORCE targets += $(filter-out $(subdir-builtin), $(real-obj-y)) targets += $(filter-out $(subdir-modorder), $(real-obj-m)) targets += $(lib-y) $(always-y) +ifeq ($(builtin_suffix), final_a) +final_o_targets = $(patsubst,%.o,%.final_o,$(targets)) +targets += $(final_targets) +endif # Linker scripts preprocessor (.lds.S -> .lds) # --------------------------------------------------------------------------- @@ -347,6 +356,24 @@ quiet_cmd_cpp_lds_S = LDS $@ $(obj)/%.lds: $(src)/%.lds.S FORCE $(call if_changed_dep,cpp_lds_S) +ifdef CONFIG_LTO_CLANG_THIN_DIST +# Generate .final_o (obj) from .o (bitcode) file +# --------------------------------------------------------------------------- +quiet_cmd_cc_o_bc = CC $(quiet_modtag) $@ + +cmd_cc_o_bc = $(if $(filter bitcode, $(shell file -b $<)),$(CC) \ + $(filter-out -Wp% $(LINUXINCLUDE) %.h.gch %.h -D% \ + -flto=thin, $(c_flags)) \ + -Wno-unused-command-line-argument \ + -x ir -fthinlto-index=$<.thinlto.bc -c -o $@ \ + $(if $(findstring ../,$<), \ + $$(realpath --relative-to=$(srcroot) $<), $<), \ + cp $< $@) + +$(obj)/%.final_o: $(obj)/%.o FORCE + $(call if_changed,cc_o_bc) +endif + # ASN.1 grammar # --------------------------------------------------------------------------- quiet_cmd_asn1_compiler = ASN.1 $(basename $@).[ch] @@ -360,7 +387,7 @@ $(obj)/%.asn1.c $(obj)/%.asn1.h: $(src)/%.asn1 $(objtree)/scripts/asn1_compiler # --------------------------------------------------------------------------- # To build objects in subdirs, we need to descend into the directories -$(subdir-builtin): $(obj)/%/built-in.a: $(obj)/% ; +$(subdir-builtin): $(obj)/%/built-in.$(builtin_suffix): $(obj)/% ; $(subdir-modorder): $(obj)/%/modules.order: $(obj)/% ; # @@ -377,6 +404,12 @@ quiet_cmd_ar_builtin = AR $@ $(obj)/built-in.a: $(real-obj-y) FORCE $(call if_changed,ar_builtin) +ifdef CONFIG_LTO_CLANG_THIN_DIST +# Rule to compile a set of .final_o files into one .final_a file. +$(obj)/built-in.final_a: $(patsubst %.o,%.final_o,$(real-obj-y)) FORCE + $(call if_changed,ar_builtin) +endif + # This is a list of build artifacts from the current Makefile and its # sub-directories. The timestamp should be updated when any of the member files. @@ -394,6 +427,14 @@ $(obj)/modules.order: $(obj-m) FORCE $(obj)/lib.a: $(lib-y) FORCE $(call if_changed,ar) +ifdef CONFIG_LTO_CLANG_THIN_DIST +quiet_cmd_ar_final = AR $@ + cmd_ar_final = rm -f $@; $(AR) cDPrsT $@ $(patsubst %.o,%.final_o,$(real-prereqs)) + +$(obj)/lib.final_a: $(patsubst %.o,%.final_o,$(lib-y)) FORCE + $(call if_changed,ar_final) +endif + quiet_cmd_ld_multi_m = LD [M] $@ cmd_ld_multi_m = $(LD) $(ld_flags) -r -o $@ @$< $(cmd_objtool) @@ -459,7 +500,8 @@ $(single-subdir-goals): $(single-subdirs) PHONY += $(subdir-ym) $(subdir-ym): $(Q)$(MAKE) $(build)=$@ \ - need-builtin=$(if $(filter $@/built-in.a, $(subdir-builtin)),1) \ + need-builtin=$(if $(filter $@/built-in.$(builtin_suffix), $(subdir-builtin)),1) \ + thinlto_final_pass=$(if $(filter final_a, $(builtin_suffix)),1) \ need-modorder=$(if $(filter $@/modules.order, $(subdir-modorder)),1) \ $(filter $@/%, $(single-subdir-goals)) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 2fe73cda0bddb9..ce341ed8d1df31 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -34,8 +34,13 @@ else obj-m := $(filter-out %/, $(obj-m)) endif +builtin_suffix := $(if $(filter %.final_a, $(MAKECMDGOALS)),final_a,a) +ifeq ($(thinlto_final_pass),1) + builtin_suffix := final_a +endif + ifdef need-builtin -obj-y := $(patsubst %/, %/built-in.a, $(obj-y)) +obj-y := $(patsubst %/, %/built-in.$(builtin_suffix), $(obj-y)) else obj-y := $(filter-out %/, $(obj-y)) endif diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o index b024ffb3e20187..97de473c548dbb 100644 --- a/scripts/Makefile.vmlinux_o +++ b/scripts/Makefile.vmlinux_o @@ -9,6 +9,14 @@ include $(srctree)/scripts/Kbuild.include # for objtool include $(srctree)/scripts/Makefile.lib +ifeq ($(thinlto_final_pass),1) +vmlinux_a := vmlinux.final_a +vmlinux_libs := $(patsubst %.a,%.final_a,$(KBUILD_VMLINUX_LIBS)) +else +vmlinux_a := vmlinux.a +vmlinux_libs := $(KBUILD_VMLINUX_LIBS) +endif + # Generate a linker script to ensure correct ordering of initcalls for Clang LTO # --------------------------------------------------------------------------- @@ -18,7 +26,7 @@ quiet_cmd_gen_initcalls_lds = GEN $@ $(PERL) $(real-prereqs) > $@ .tmp_initcalls.lds: $(srctree)/scripts/generate_initcall_order.pl \ - vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE + $(vmlinux_a) $(vmlinux_libs) FORCE $(call if_changed,gen_initcalls_lds) targets := .tmp_initcalls.lds @@ -59,8 +67,8 @@ quiet_cmd_ld_vmlinux.o = LD $@ $(LD) ${KBUILD_LDFLAGS} -r -o $@ \ $(vmlinux-o-ld-args-y) \ $(addprefix -T , $(initcalls-lds)) \ - --whole-archive vmlinux.a --no-whole-archive \ - --start-group $(KBUILD_VMLINUX_LIBS) --end-group \ + --whole-archive $(vmlinux_a) --no-whole-archive \ + --start-group $(vmlinux_libs) --end-group \ $(cmd_objtool) define rule_ld_vmlinux.o @@ -68,7 +76,7 @@ define rule_ld_vmlinux.o $(call cmd,gen_objtooldep) endef -vmlinux.o: $(initcalls-lds) vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE +vmlinux.o: $(initcalls-lds) $(vmlinux_a) $(vmlinux_libs) FORCE $(call if_changed_rule,ld_vmlinux.o) targets += vmlinux.o diff --git a/scripts/Makefile.vmlinux_thinlink b/scripts/Makefile.vmlinux_thinlink new file mode 100644 index 00000000000000..13e4026c7d45bd --- /dev/null +++ b/scripts/Makefile.vmlinux_thinlink @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: GPL-2.0-only + +PHONY := __default +__default: vmlinux.thinlink + +include include/config/auto.conf +include $(srctree)/scripts/Kbuild.include + + +# Generate a linker script to ensure correct ordering of initcalls for Clang LTO +# --------------------------------------------------------------------------- + +quiet_cmd_gen_initcalls_lds = GEN $@ + cmd_gen_initcalls_lds = \ + $(PYTHON3) $(srctree)/scripts/jobserver-exec \ + $(PERL) $(real-prereqs) > $@ + +.tmp_initcalls_thinlink.lds: $(srctree)/scripts/generate_initcall_order.pl \ + vmlinux.a FORCE + $(call if_changed,gen_initcalls_lds) + +targets := .tmp_initcalls_thinlink.lds + +initcalls-lds := .tmp_initcalls_thinlink.lds + +quiet_cmd_ld_vmlinux.thinlink = LD $@ + cmd_ld_vmlinux.thinlink = \ + $(AR) t vmlinux.a > .vmlinux_thinlto_bc_files; \ + $(LD) ${KBUILD_LDFLAGS} -r $(addprefix -T , $(initcalls-lds)) \ + --thinlto-index-only @.vmlinux_thinlto_bc_files; \ + touch vmlinux.thinlink + +vmlinux.thinlink: vmlinux.a $(initcalls-lds) FORCE + $(call if_changed,ld_vmlinux.thinlink) + +targets += vmlinux.thinlink + +# Add FORCE to the prerequisites of a target to force it to be always rebuilt. +# --------------------------------------------------------------------------- + +PHONY += FORCE +FORCE: + +# Read all saved command lines and dependencies for the $(targets) we +# may be building above, using $(if_changed{,_dep}). As an +# optimization, we don't need to read them if the target does not +# exist, we will rebuild anyway in that case. + +existing-targets := $(wildcard $(sort $(targets))) + +-include $(foreach f,$(existing-targets),$(dir $(f)).$(notdir $(f)).cmd) + +.PHONY: $(PHONY) diff --git a/scripts/head-object-list.txt b/scripts/head-object-list.txt index 7274dfc65af606..b300222e99a6b0 100644 --- a/scripts/head-object-list.txt +++ b/scripts/head-object-list.txt @@ -18,6 +18,7 @@ arch/arm/kernel/head.o arch/csky/kernel/head.o arch/hexagon/kernel/head.o arch/loongarch/kernel/head.o +arch/loongarch/kernel/head.final_o arch/m68k/68000/head.o arch/m68k/coldfire/head.o arch/m68k/kernel/head.o From 478b6a002fbcd9bbf29efda52caabbac23a46d5b Mon Sep 17 00:00:00 2001 From: Eric Naim Date: Tue, 22 Apr 2025 22:52:01 +0800 Subject: [PATCH 2875/2924] Makefile.{autofdo,propeller}: Add support for Distributed ThinLTO Signed-off-by: Eric Naim --- scripts/Makefile.autofdo | 2 +- scripts/Makefile.propeller | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/Makefile.autofdo b/scripts/Makefile.autofdo index 1caf2457e585c0..daca19a1753342 100644 --- a/scripts/Makefile.autofdo +++ b/scripts/Makefile.autofdo @@ -13,7 +13,7 @@ ifdef CLANG_AUTOFDO_PROFILE CFLAGS_AUTOFDO_CLANG += -fsplit-machine-functions endif -ifdef CONFIG_LTO_CLANG_THIN +ifeq ($(or $(CONFIG_LTO_CLANG_THIN),$(CONFIG_LTO_CLANG_THIN_DIST)),y) ifdef CLANG_AUTOFDO_PROFILE KBUILD_LDFLAGS += --lto-sample-profile=$(CLANG_AUTOFDO_PROFILE) endif diff --git a/scripts/Makefile.propeller b/scripts/Makefile.propeller index 48a660128e256d..46a59e160c0362 100644 --- a/scripts/Makefile.propeller +++ b/scripts/Makefile.propeller @@ -24,7 +24,7 @@ ifndef CONFIG_DEBUG_INFO endif endif -ifdef CONFIG_LTO_CLANG_THIN +ifeq ($(or $(CONFIG_LTO_CLANG_THIN),$(CONFIG_LTO_CLANG_THIN_DIST)),y) ifdef CLANG_PROPELLER_PROFILE_PREFIX KBUILD_LDFLAGS += --lto-basic-block-sections=$(CLANG_PROPELLER_PROFILE_PREFIX)_cc_profile.txt else From 8842ec91b5cb20518d5856d1eeea3577d551e39b Mon Sep 17 00:00:00 2001 From: Piotr Gorski Date: Wed, 30 Apr 2025 18:36:30 +0200 Subject: [PATCH 2876/2924] iosched-6.15: bump ADIOS to v1.5.5 Signed-off-by: Piotr Gorski --- block/adios.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/block/adios.c b/block/adios.c index a35fcf6e45697c..3e66b26b51ac10 100644 --- a/block/adios.c +++ b/block/adios.c @@ -24,7 +24,7 @@ #include "blk-mq.h" #include "blk-mq-sched.h" -#define ADIOS_VERSION "1.5.4" +#define ADIOS_VERSION "1.5.5" // Define operation types supported by ADIOS enum adios_op_type { @@ -213,7 +213,7 @@ static bool lm_update_small_buckets(struct latency_model *model, threshold_count - (cumulative_count - bucket->count); if (bucket->count > 0) { sum_latency += - (bucket->sum_latency * remaining_count) / bucket->count; + div_u64((bucket->sum_latency * remaining_count), bucket->count); sum_count += remaining_count; } } @@ -285,9 +285,9 @@ static bool lm_update_large_buckets( threshold_count - (cumulative_count - bucket->count); if (bucket->count > 0) { sum_latency += - (bucket->sum_latency * remaining_count) / bucket->count; + div_u64((bucket->sum_latency * remaining_count), bucket->count); sum_block_size += - (bucket->sum_block_size * remaining_count) / bucket->count; + div_u64((bucket->sum_block_size * remaining_count), bucket->count); } } } @@ -370,11 +370,11 @@ static u8 lm_input_bucket_index( u8 bucket_index; if (measured < predicted * 2) - bucket_index = (measured * 20) / predicted; + bucket_index = div_u64((measured * 20), predicted); else if (measured < predicted * 5) - bucket_index = (measured * 10) / predicted + 20; + bucket_index = div_u64((measured * 10), predicted) + 20; else - bucket_index = (measured * 3) / predicted + 40; + bucket_index = div_u64((measured * 3), predicted) + 40; return bucket_index; } @@ -807,7 +807,7 @@ static struct request *dispatch_from_bq(struct adios_data *ad) { tpl = atomic64_read(&ad->total_pred_lat); if (!ad->more_bq_ready && (!tpl || - tpl < ad->global_latency_window * ad->bq_refill_below_ratio / 100)) + tpl < div_u64(ad->global_latency_window * ad->bq_refill_below_ratio, 100) )) fill_batch_queues(ad, tpl); again: From 47faad2816dacb41a8dd235caa37af6543d8e16e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 9 Apr 2025 16:55:10 +0100 Subject: [PATCH 2877/2924] select: do_pollfd: add unlikely branch hint return path Adding an unlikely() hint on the fd < 0 comparison return path improves run-time performance of the poll() system call. gcov based coverage analysis based on running stress-ng and a kernel build shows that this path return path is highly unlikely. Benchmarking on an Debian based Intel(R) Core(TM) Ultra 9 285K with a 6.15-rc1 kernel and a poll of 1024 file descriptors with zero timeout shows an call reduction from 32818 ns down to 32635 ns, which is a ~0.5% performance improvement. Results based on running 25 tests with turbo disabled (to reduce clock freq turbo changes), with 30 second run per test and comparing the number of poll() calls per second. The % standard deviation of the 25 tests was 0.08%, so results are reliable. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/20250409155510.577490-1-colin.i.king@gmail.com Signed-off-by: Christian Brauner --- fs/select.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/select.c b/fs/select.c index 7da531b1cf6bec..0eaf3522abe9a2 100644 --- a/fs/select.c +++ b/fs/select.c @@ -857,7 +857,7 @@ static inline __poll_t do_pollfd(struct pollfd *pollfd, poll_table *pwait, int fd = pollfd->fd; __poll_t mask, filter; - if (fd < 0) + if (unlikely(fd < 0)) return 0; CLASS(fd, f)(fd); From bd79a697488868cf28a24cf4abf7f524530b40fa Mon Sep 17 00:00:00 2001 From: Masahito S Date: Sat, 10 May 2025 13:19:56 +0900 Subject: [PATCH 2878/2924] sched/fair: Prefer full-idle SMT cores When selecting an idle CPU for a task, always try to prioritize full-idle SMT cores (CPUs belonging to an SMT core where all its sibling are idle) over partially-idle cores. Signed-off-by: Andrea Righi Original patch at: https://web.git.kernel.org/pub/scm/linux/kernel/git/arighi/linux.git/commit/?h=sched-smt-idle --- kernel/sched/fair.c | 69 ++++++++++++++++++++------------------------- 1 file changed, 31 insertions(+), 38 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1e8c138c797f22..76d50c5b024210 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7567,9 +7567,14 @@ static inline int sched_balance_find_dst_cpu(struct sched_domain *sd, struct tas return new_cpu; } +static inline bool is_idle_cpu(int cpu) +{ + return available_idle_cpu(cpu) || sched_idle_cpu(cpu); +} + static inline int __select_idle_cpu(int cpu, struct task_struct *p) { - if ((available_idle_cpu(cpu) || sched_idle_cpu(cpu)) && + if (is_idle_cpu(cpu) && sched_cpu_cookie_match(cpu_rq(cpu), p)) return cpu; @@ -7580,6 +7585,24 @@ static inline int __select_idle_cpu(int cpu, struct task_struct *p) DEFINE_STATIC_KEY_FALSE(sched_smt_present); EXPORT_SYMBOL_GPL(sched_smt_present); +/* + * Return true if all the CPUs in the SMT core where @cpu belongs are idle, + * false otherwise. + */ +static bool is_idle_core(int cpu) +{ + int sibling; + + if (!sched_smt_active()) + return is_idle_cpu(cpu); + + for_each_cpu(sibling, cpu_smt_mask(cpu)) + if (!is_idle_cpu(sibling)) + return false; + + return true; +} + static inline void set_idle_cores(int cpu, int val) { struct sched_domain_shared *sds; @@ -7662,29 +7685,6 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu return -1; } -/* - * Scan the local SMT mask for idle CPUs. - */ -static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target) -{ - int cpu; - - for_each_cpu_and(cpu, cpu_smt_mask(target), p->cpus_ptr) { - if (cpu == target) - continue; - /* - * Check if the CPU is in the LLC scheduling domain of @target. - * Due to isolcpus, there is no guarantee that all the siblings are in the domain. - */ - if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) - continue; - if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) - return cpu; - } - - return -1; -} - #else /* CONFIG_SCHED_SMT */ static inline void set_idle_cores(int cpu, int val) @@ -7701,9 +7701,9 @@ static inline int select_idle_core(struct task_struct *p, int core, struct cpuma return __select_idle_cpu(core, p); } -static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target) +static inline bool is_idle_core(int cpu) { - return -1; + return is_idle_cpu(cpu); } #endif /* CONFIG_SCHED_SMT */ @@ -7800,7 +7800,7 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target) for_each_cpu_wrap(cpu, cpus, target) { unsigned long cpu_cap = capacity_of(cpu); - if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu)) + if (!is_idle_cpu(cpu)) continue; fits = util_fits_cpu(task_util, util_min, util_max, cpu); @@ -7871,7 +7871,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) */ lockdep_assert_irqs_disabled(); - if ((available_idle_cpu(target) || sched_idle_cpu(target)) && + if (is_idle_core(target) && asym_fits_cpu(task_util, util_min, util_max, target)) return target; @@ -7879,7 +7879,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) * If the previous CPU is cache affine and idle, don't be stupid: */ if (prev != target && cpus_share_cache(prev, target) && - (available_idle_cpu(prev) || sched_idle_cpu(prev)) && + is_idle_core(prev) && asym_fits_cpu(task_util, util_min, util_max, prev)) { if (!static_branch_unlikely(&sched_cluster_active) || @@ -7911,7 +7911,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) if (recent_used_cpu != prev && recent_used_cpu != target && cpus_share_cache(recent_used_cpu, target) && - (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) && + is_idle_core(recent_used_cpu) && cpumask_test_cpu(recent_used_cpu, p->cpus_ptr) && asym_fits_cpu(task_util, util_min, util_max, recent_used_cpu)) { @@ -7947,16 +7947,9 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) if (!sd) return target; - if (sched_smt_active()) { + if (sched_smt_active()) has_idle_core = test_idle_cores(target); - if (!has_idle_core && cpus_share_cache(prev, target)) { - i = select_idle_smt(p, sd, prev); - if ((unsigned int)i < nr_cpumask_bits) - return i; - } - } - i = select_idle_cpu(p, sd, has_idle_core, target); if ((unsigned)i < nr_cpumask_bits) return i; From 1508f15ec0eba3f193514e06597a033159256acc Mon Sep 17 00:00:00 2001 From: Eric Naim Date: Tue, 13 May 2025 10:38:10 +0800 Subject: [PATCH 2879/2924] ADIOS v1.5.7 --- block/Kconfig.iosched | 13 +++++++++---- block/adios.c | 11 ++++------- block/elevator.c | 5 +++++ 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 79fd5da5dd169a..e98585dd83e0f4 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -20,10 +20,15 @@ config MQ_IOSCHED_ADIOS tristate "Adaptive Deadline I/O scheduler" default m help - ADIOS is a multi-queue I/O scheduler for the Linux kernel, based on - mq-deadline and Kyber, with learning-based adaptive latency control. - It aims to provide low latency for synchronous requests while - maintaining high throughput for asynchronous requests and bulk I/O. + The Adaptive Deadline I/O Scheduler (ADIOS) is a multi-queue I/O + scheduler with learning-based adaptive latency control. + +config MQ_IOSCHED_DEFAULT_ADIOS + bool "Enable ADIOS I/O scheduler as default MQ I/O scheduler" + depends on MQ_IOSCHED_ADIOS=y + default n + help + Enable the ADIOS I/O scheduler as the default scheduler for MQ I/O. config IOSCHED_BFQ tristate "BFQ I/O scheduler" diff --git a/block/adios.c b/block/adios.c index 3e66b26b51ac10..4b869dfcc750f8 100644 --- a/block/adios.c +++ b/block/adios.c @@ -1,9 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * The Adaptive Deadline I/O Scheduler (ADIOS) - * Based on mq-deadline and Kyber, - * with learning-based adaptive latency control - * + * Adaptive Deadline I/O Scheduler (ADIOS) * Copyright (C) 2025 Masahito Suzuki */ #include @@ -24,7 +21,7 @@ #include "blk-mq.h" #include "blk-mq-sched.h" -#define ADIOS_VERSION "1.5.5" +#define ADIOS_VERSION "1.5.7" // Define operation types supported by ADIOS enum adios_op_type { @@ -762,7 +759,7 @@ static bool fill_batch_queues(struct adios_data *ad, u64 current_lat) { current_lat += rd->pred_lat; // Check batch size and total predicted latency - if (count && (!ad->latency_model[optype].base || + if (count && (!ad->latency_model[optype].base || ad->batch_count[page][optype] >= ad->batch_limit[optype] || current_lat > ad->global_latency_window)) { break; @@ -1339,4 +1336,4 @@ module_exit(adios_exit); MODULE_AUTHOR(ADIOS_AUTHOR); MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION(ADIOS_PROGNAME); \ No newline at end of file +MODULE_DESCRIPTION(ADIOS_PROGNAME); diff --git a/block/elevator.c b/block/elevator.c index d82ce6f13478e9..fc167480b7399a 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -556,6 +556,10 @@ static struct elevator_type *elevator_get_default(struct request_queue *q) if (q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT) return NULL; +#ifdef CONFIG_MQ_IOSCHED_DEFAULT_ADIOS + return elevator_find_get("adios"); +#else // !CONFIG_MQ_IOSCHED_DEFAULT_ADIOS + if (q->nr_hw_queues != 1 && !blk_mq_is_shared_tags(q->tag_set->flags)) #if defined(CONFIG_CACHY) @@ -569,6 +573,7 @@ static struct elevator_type *elevator_get_default(struct request_queue *q) #else return elevator_find_get("mq-deadline"); #endif +#endif // CONFIG_MQ_IOSCHED_DEFAULT_ADIOS } /* From cb52eb93e1beec3956505ea6f602da5a77ceb46e Mon Sep 17 00:00:00 2001 From: Eric Naim Date: Sun, 18 May 2025 22:43:08 +0800 Subject: [PATCH 2880/2924] Revert "sched/fair: Prefer full-idle SMT cores" This reverts commit 2681ac8da8dbb965c008c763131df22ce23182e4. Seems to be causing kernel panics. Signed-off-by: Eric Naim --- kernel/sched/fair.c | 69 +++++++++++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 31 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 76d50c5b024210..1e8c138c797f22 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7567,14 +7567,9 @@ static inline int sched_balance_find_dst_cpu(struct sched_domain *sd, struct tas return new_cpu; } -static inline bool is_idle_cpu(int cpu) -{ - return available_idle_cpu(cpu) || sched_idle_cpu(cpu); -} - static inline int __select_idle_cpu(int cpu, struct task_struct *p) { - if (is_idle_cpu(cpu) && + if ((available_idle_cpu(cpu) || sched_idle_cpu(cpu)) && sched_cpu_cookie_match(cpu_rq(cpu), p)) return cpu; @@ -7585,24 +7580,6 @@ static inline int __select_idle_cpu(int cpu, struct task_struct *p) DEFINE_STATIC_KEY_FALSE(sched_smt_present); EXPORT_SYMBOL_GPL(sched_smt_present); -/* - * Return true if all the CPUs in the SMT core where @cpu belongs are idle, - * false otherwise. - */ -static bool is_idle_core(int cpu) -{ - int sibling; - - if (!sched_smt_active()) - return is_idle_cpu(cpu); - - for_each_cpu(sibling, cpu_smt_mask(cpu)) - if (!is_idle_cpu(sibling)) - return false; - - return true; -} - static inline void set_idle_cores(int cpu, int val) { struct sched_domain_shared *sds; @@ -7685,6 +7662,29 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu return -1; } +/* + * Scan the local SMT mask for idle CPUs. + */ +static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target) +{ + int cpu; + + for_each_cpu_and(cpu, cpu_smt_mask(target), p->cpus_ptr) { + if (cpu == target) + continue; + /* + * Check if the CPU is in the LLC scheduling domain of @target. + * Due to isolcpus, there is no guarantee that all the siblings are in the domain. + */ + if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) + continue; + if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) + return cpu; + } + + return -1; +} + #else /* CONFIG_SCHED_SMT */ static inline void set_idle_cores(int cpu, int val) @@ -7701,9 +7701,9 @@ static inline int select_idle_core(struct task_struct *p, int core, struct cpuma return __select_idle_cpu(core, p); } -static inline bool is_idle_core(int cpu) +static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target) { - return is_idle_cpu(cpu); + return -1; } #endif /* CONFIG_SCHED_SMT */ @@ -7800,7 +7800,7 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target) for_each_cpu_wrap(cpu, cpus, target) { unsigned long cpu_cap = capacity_of(cpu); - if (!is_idle_cpu(cpu)) + if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu)) continue; fits = util_fits_cpu(task_util, util_min, util_max, cpu); @@ -7871,7 +7871,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) */ lockdep_assert_irqs_disabled(); - if (is_idle_core(target) && + if ((available_idle_cpu(target) || sched_idle_cpu(target)) && asym_fits_cpu(task_util, util_min, util_max, target)) return target; @@ -7879,7 +7879,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) * If the previous CPU is cache affine and idle, don't be stupid: */ if (prev != target && cpus_share_cache(prev, target) && - is_idle_core(prev) && + (available_idle_cpu(prev) || sched_idle_cpu(prev)) && asym_fits_cpu(task_util, util_min, util_max, prev)) { if (!static_branch_unlikely(&sched_cluster_active) || @@ -7911,7 +7911,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) if (recent_used_cpu != prev && recent_used_cpu != target && cpus_share_cache(recent_used_cpu, target) && - is_idle_core(recent_used_cpu) && + (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) && cpumask_test_cpu(recent_used_cpu, p->cpus_ptr) && asym_fits_cpu(task_util, util_min, util_max, recent_used_cpu)) { @@ -7947,9 +7947,16 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) if (!sd) return target; - if (sched_smt_active()) + if (sched_smt_active()) { has_idle_core = test_idle_cores(target); + if (!has_idle_core && cpus_share_cache(prev, target)) { + i = select_idle_smt(p, sd, prev); + if ((unsigned int)i < nr_cpumask_bits) + return i; + } + } + i = select_idle_cpu(p, sd, has_idle_core, target); if ((unsigned)i < nr_cpumask_bits) return i; From 6ebfa0be541fd8b1e84ab76eea0f83ecd0907b47 Mon Sep 17 00:00:00 2001 From: Eric Naim Date: Tue, 20 May 2025 13:14:20 +0800 Subject: [PATCH 2881/2924] Revert "kbuild: distributed build support for Clang ThinLTO" This reverts commit dfd4c69f9f12f8fdd68e08cdba250e37cefc93ce. Signed-off-by: Eric Naim --- MAINTAINERS | 5 --- Makefile | 40 +++-------------------- arch/Kconfig | 12 ------- scripts/Makefile.build | 52 +++--------------------------- scripts/Makefile.lib | 7 +--- scripts/Makefile.vmlinux_o | 16 +++------- scripts/Makefile.vmlinux_thinlink | 53 ------------------------------- scripts/head-object-list.txt | 1 - 8 files changed, 15 insertions(+), 171 deletions(-) delete mode 100644 scripts/Makefile.vmlinux_thinlink diff --git a/MAINTAINERS b/MAINTAINERS index 4feb3b9f8f739d..dd844ac8d9107b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5790,11 +5790,6 @@ F: scripts/Makefile.clang F: scripts/clang-tools/ K: \b(?i:clang|llvm)\b -CLANG/LLVM THINLTO DISTRIBUTED BUILD -M: Rong Xu -S: Supported -F: scripts/Makefile.vmlinux_thinlink - CLK API M: Russell King L: linux-clk@vger.kernel.org diff --git a/Makefile b/Makefile index ca7a269a08e372..2d9d46275c4bf9 100644 --- a/Makefile +++ b/Makefile @@ -298,8 +298,7 @@ no-dot-config-targets := $(clean-targets) \ outputmakefile rustavailable rustfmt rustfmtcheck no-sync-config-targets := $(no-dot-config-targets) %install modules_sign kernelrelease \ image_name -single-targets := %.a %.i %.ko %.lds %.ll %.lst %.mod %.o %.rsi %.s %.final_o \ - %.final_a %.o.thinlto.bc %/ +single-targets := %.a %.i %.ko %.lds %.ll %.lst %.mod %.o %.rsi %.s %/ config-build := mixed-build := @@ -1000,10 +999,10 @@ export CC_FLAGS_SCS endif ifdef CONFIG_LTO_CLANG -ifdef CONFIG_LTO_CLANG_FULL -CC_FLAGS_LTO := -flto -else # for CONFIG_LTO_CLANG_THIN or CONFIG_LTO_CLANG_THIN_DIST +ifdef CONFIG_LTO_CLANG_THIN CC_FLAGS_LTO := -flto=thin -fsplit-lto-unit +else +CC_FLAGS_LTO := -flto endif CC_FLAGS_LTO += -fvisibility=hidden @@ -1222,34 +1221,8 @@ vmlinux.a: $(KBUILD_VMLINUX_OBJS) scripts/head-object-list.txt FORCE $(call if_changed,ar_vmlinux.a) PHONY += vmlinux_o -ifdef CONFIG_LTO_CLANG_THIN_DIST -vmlinux.thinlink: vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE - $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.vmlinux_thinlink -targets += vmlinux.thinlink - -vmlinux_final_a := $(patsubst %.a,%.final_a,$(KBUILD_VMLINUX_OBJS)) -quiet_cmd_ar_vmlinux.final_a = AR $@ - cmd_ar_vmlinux.final_a = \ - rm -f $@; \ - $(AR) cDPrST $@ $(vmlinux_final_a); \ - $(AR) mPiT $$($(AR) t $@ | sed -n 1p) $@ $$($(AR) t $@ | grep -F -f $(srctree)/scripts/head-object-list.txt) - -define rule_gen_vmlinux_final_a - +$(Q)$(MAKE) $(build)=. need-builtin=1 thinlto_final_pass=1 need-modorder=1 built-in.final_a - $(call cmd_and_savecmd,ar_vmlinux.final_a) -endef - -vmlinux.final_a: vmlinux.thinlink scripts/head-object-list.txt FORCE - $(call if_changed_rule,gen_vmlinux_final_a) - -targets += vmlinux.final_a - -vmlinux_o: vmlinux.final_a - $(Q)$(MAKE) thinlto_final_pass=1 -f $(srctree)/scripts/Makefile.vmlinux_o -else vmlinux_o: vmlinux.a $(KBUILD_VMLINUX_LIBS) $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.vmlinux_o -endif vmlinux.o modules.builtin.modinfo modules.builtin: vmlinux_o @: @@ -1607,8 +1580,7 @@ CLEAN_FILES += vmlinux.symvers modules-only.symvers \ modules.builtin.ranges vmlinux.o.map vmlinux.unstripped \ compile_commands.json rust/test \ rust-project.json .vmlinux.objs .vmlinux.export.c \ - .builtin-dtbs-list .builtin-dtb.S \ - .vmlinux_thinlto_bc_files vmlinux.thinlink + .builtin-dtbs-list .builtin-dtb.S # Directories & files removed with 'make mrproper' MRPROPER_FILES += include/config include/generated \ @@ -2059,8 +2031,6 @@ clean: $(clean-dirs) -o -name '*.symtypes' -o -name 'modules.order' \ -o -name '*.c.[012]*.*' \ -o -name '*.ll' \ - -o -name '*.final_a' -o -name '*.final_o' \ - -o -name '*.o.thinlto.bc' \ -o -name '*.gcno' \ \) -type f -print \ -o -name '.tmp_*' -print \ diff --git a/arch/Kconfig b/arch/Kconfig index cbeeeb9b076d8f..b0adb665041f17 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -810,18 +810,6 @@ config LTO_CLANG_THIN https://clang.llvm.org/docs/ThinLTO.html If unsure, say Y. - -config LTO_CLANG_THIN_DIST - bool "Clang ThinLTO in distributed mode (EXPERIMENTAL)" - depends on HAS_LTO_CLANG && ARCH_SUPPORTS_LTO_CLANG_THIN - select LTO_CLANG - help - This option enables Clang's ThinLTO in distributed build mode. - In this mode, the linker performs the thin-link, generating - ThinLTO index files. Subsequently, the build system explicitly - invokes ThinLTO backend compilation using these index files - and pre-linked IR objects. The resulting final object files - are with the .final_o suffix. endchoice config ARCH_SUPPORTS_AUTOFDO_CLANG diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 87259b0e2bfc88..13dcd86e74ca83 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -50,23 +50,18 @@ endif # =========================================================================== -builtin_suffix := $(if $(filter %.final_a, $(MAKECMDGOALS)),final_a,a) -ifeq ($(thinlto_final_pass),1) -builtin_suffix := final_a -endif - # subdir-builtin and subdir-modorder may contain duplications. Use $(sort ...) -subdir-builtin := $(sort $(filter %/built-in.$(builtin_suffix), $(real-obj-y))) +subdir-builtin := $(sort $(filter %/built-in.a, $(real-obj-y))) subdir-modorder := $(sort $(filter %/modules.order, $(obj-m))) targets-for-builtin := $(extra-y) ifneq ($(strip $(lib-y) $(lib-m) $(lib-)),) -targets-for-builtin += $(obj)/lib.$(builtin_suffix) +targets-for-builtin += $(obj)/lib.a endif ifdef need-builtin -targets-for-builtin += $(obj)/built-in.$(builtin_suffix) +targets-for-builtin += $(obj)/built-in.a endif targets-for-modules := $(foreach x, o mod, \ @@ -342,10 +337,6 @@ $(obj)/%.o: $(obj)/%.S FORCE targets += $(filter-out $(subdir-builtin), $(real-obj-y)) targets += $(filter-out $(subdir-modorder), $(real-obj-m)) targets += $(lib-y) $(always-y) -ifeq ($(builtin_suffix), final_a) -final_o_targets = $(patsubst,%.o,%.final_o,$(targets)) -targets += $(final_targets) -endif # Linker scripts preprocessor (.lds.S -> .lds) # --------------------------------------------------------------------------- @@ -356,24 +347,6 @@ quiet_cmd_cpp_lds_S = LDS $@ $(obj)/%.lds: $(src)/%.lds.S FORCE $(call if_changed_dep,cpp_lds_S) -ifdef CONFIG_LTO_CLANG_THIN_DIST -# Generate .final_o (obj) from .o (bitcode) file -# --------------------------------------------------------------------------- -quiet_cmd_cc_o_bc = CC $(quiet_modtag) $@ - -cmd_cc_o_bc = $(if $(filter bitcode, $(shell file -b $<)),$(CC) \ - $(filter-out -Wp% $(LINUXINCLUDE) %.h.gch %.h -D% \ - -flto=thin, $(c_flags)) \ - -Wno-unused-command-line-argument \ - -x ir -fthinlto-index=$<.thinlto.bc -c -o $@ \ - $(if $(findstring ../,$<), \ - $$(realpath --relative-to=$(srcroot) $<), $<), \ - cp $< $@) - -$(obj)/%.final_o: $(obj)/%.o FORCE - $(call if_changed,cc_o_bc) -endif - # ASN.1 grammar # --------------------------------------------------------------------------- quiet_cmd_asn1_compiler = ASN.1 $(basename $@).[ch] @@ -387,7 +360,7 @@ $(obj)/%.asn1.c $(obj)/%.asn1.h: $(src)/%.asn1 $(objtree)/scripts/asn1_compiler # --------------------------------------------------------------------------- # To build objects in subdirs, we need to descend into the directories -$(subdir-builtin): $(obj)/%/built-in.$(builtin_suffix): $(obj)/% ; +$(subdir-builtin): $(obj)/%/built-in.a: $(obj)/% ; $(subdir-modorder): $(obj)/%/modules.order: $(obj)/% ; # @@ -404,12 +377,6 @@ quiet_cmd_ar_builtin = AR $@ $(obj)/built-in.a: $(real-obj-y) FORCE $(call if_changed,ar_builtin) -ifdef CONFIG_LTO_CLANG_THIN_DIST -# Rule to compile a set of .final_o files into one .final_a file. -$(obj)/built-in.final_a: $(patsubst %.o,%.final_o,$(real-obj-y)) FORCE - $(call if_changed,ar_builtin) -endif - # This is a list of build artifacts from the current Makefile and its # sub-directories. The timestamp should be updated when any of the member files. @@ -427,14 +394,6 @@ $(obj)/modules.order: $(obj-m) FORCE $(obj)/lib.a: $(lib-y) FORCE $(call if_changed,ar) -ifdef CONFIG_LTO_CLANG_THIN_DIST -quiet_cmd_ar_final = AR $@ - cmd_ar_final = rm -f $@; $(AR) cDPrsT $@ $(patsubst %.o,%.final_o,$(real-prereqs)) - -$(obj)/lib.final_a: $(patsubst %.o,%.final_o,$(lib-y)) FORCE - $(call if_changed,ar_final) -endif - quiet_cmd_ld_multi_m = LD [M] $@ cmd_ld_multi_m = $(LD) $(ld_flags) -r -o $@ @$< $(cmd_objtool) @@ -500,8 +459,7 @@ $(single-subdir-goals): $(single-subdirs) PHONY += $(subdir-ym) $(subdir-ym): $(Q)$(MAKE) $(build)=$@ \ - need-builtin=$(if $(filter $@/built-in.$(builtin_suffix), $(subdir-builtin)),1) \ - thinlto_final_pass=$(if $(filter final_a, $(builtin_suffix)),1) \ + need-builtin=$(if $(filter $@/built-in.a, $(subdir-builtin)),1) \ need-modorder=$(if $(filter $@/modules.order, $(subdir-modorder)),1) \ $(filter $@/%, $(single-subdir-goals)) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index ce341ed8d1df31..2fe73cda0bddb9 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -34,13 +34,8 @@ else obj-m := $(filter-out %/, $(obj-m)) endif -builtin_suffix := $(if $(filter %.final_a, $(MAKECMDGOALS)),final_a,a) -ifeq ($(thinlto_final_pass),1) - builtin_suffix := final_a -endif - ifdef need-builtin -obj-y := $(patsubst %/, %/built-in.$(builtin_suffix), $(obj-y)) +obj-y := $(patsubst %/, %/built-in.a, $(obj-y)) else obj-y := $(filter-out %/, $(obj-y)) endif diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o index 97de473c548dbb..b024ffb3e20187 100644 --- a/scripts/Makefile.vmlinux_o +++ b/scripts/Makefile.vmlinux_o @@ -9,14 +9,6 @@ include $(srctree)/scripts/Kbuild.include # for objtool include $(srctree)/scripts/Makefile.lib -ifeq ($(thinlto_final_pass),1) -vmlinux_a := vmlinux.final_a -vmlinux_libs := $(patsubst %.a,%.final_a,$(KBUILD_VMLINUX_LIBS)) -else -vmlinux_a := vmlinux.a -vmlinux_libs := $(KBUILD_VMLINUX_LIBS) -endif - # Generate a linker script to ensure correct ordering of initcalls for Clang LTO # --------------------------------------------------------------------------- @@ -26,7 +18,7 @@ quiet_cmd_gen_initcalls_lds = GEN $@ $(PERL) $(real-prereqs) > $@ .tmp_initcalls.lds: $(srctree)/scripts/generate_initcall_order.pl \ - $(vmlinux_a) $(vmlinux_libs) FORCE + vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE $(call if_changed,gen_initcalls_lds) targets := .tmp_initcalls.lds @@ -67,8 +59,8 @@ quiet_cmd_ld_vmlinux.o = LD $@ $(LD) ${KBUILD_LDFLAGS} -r -o $@ \ $(vmlinux-o-ld-args-y) \ $(addprefix -T , $(initcalls-lds)) \ - --whole-archive $(vmlinux_a) --no-whole-archive \ - --start-group $(vmlinux_libs) --end-group \ + --whole-archive vmlinux.a --no-whole-archive \ + --start-group $(KBUILD_VMLINUX_LIBS) --end-group \ $(cmd_objtool) define rule_ld_vmlinux.o @@ -76,7 +68,7 @@ define rule_ld_vmlinux.o $(call cmd,gen_objtooldep) endef -vmlinux.o: $(initcalls-lds) $(vmlinux_a) $(vmlinux_libs) FORCE +vmlinux.o: $(initcalls-lds) vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE $(call if_changed_rule,ld_vmlinux.o) targets += vmlinux.o diff --git a/scripts/Makefile.vmlinux_thinlink b/scripts/Makefile.vmlinux_thinlink deleted file mode 100644 index 13e4026c7d45bd..00000000000000 --- a/scripts/Makefile.vmlinux_thinlink +++ /dev/null @@ -1,53 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -PHONY := __default -__default: vmlinux.thinlink - -include include/config/auto.conf -include $(srctree)/scripts/Kbuild.include - - -# Generate a linker script to ensure correct ordering of initcalls for Clang LTO -# --------------------------------------------------------------------------- - -quiet_cmd_gen_initcalls_lds = GEN $@ - cmd_gen_initcalls_lds = \ - $(PYTHON3) $(srctree)/scripts/jobserver-exec \ - $(PERL) $(real-prereqs) > $@ - -.tmp_initcalls_thinlink.lds: $(srctree)/scripts/generate_initcall_order.pl \ - vmlinux.a FORCE - $(call if_changed,gen_initcalls_lds) - -targets := .tmp_initcalls_thinlink.lds - -initcalls-lds := .tmp_initcalls_thinlink.lds - -quiet_cmd_ld_vmlinux.thinlink = LD $@ - cmd_ld_vmlinux.thinlink = \ - $(AR) t vmlinux.a > .vmlinux_thinlto_bc_files; \ - $(LD) ${KBUILD_LDFLAGS} -r $(addprefix -T , $(initcalls-lds)) \ - --thinlto-index-only @.vmlinux_thinlto_bc_files; \ - touch vmlinux.thinlink - -vmlinux.thinlink: vmlinux.a $(initcalls-lds) FORCE - $(call if_changed,ld_vmlinux.thinlink) - -targets += vmlinux.thinlink - -# Add FORCE to the prerequisites of a target to force it to be always rebuilt. -# --------------------------------------------------------------------------- - -PHONY += FORCE -FORCE: - -# Read all saved command lines and dependencies for the $(targets) we -# may be building above, using $(if_changed{,_dep}). As an -# optimization, we don't need to read them if the target does not -# exist, we will rebuild anyway in that case. - -existing-targets := $(wildcard $(sort $(targets))) - --include $(foreach f,$(existing-targets),$(dir $(f)).$(notdir $(f)).cmd) - -.PHONY: $(PHONY) diff --git a/scripts/head-object-list.txt b/scripts/head-object-list.txt index b300222e99a6b0..7274dfc65af606 100644 --- a/scripts/head-object-list.txt +++ b/scripts/head-object-list.txt @@ -18,7 +18,6 @@ arch/arm/kernel/head.o arch/csky/kernel/head.o arch/hexagon/kernel/head.o arch/loongarch/kernel/head.o -arch/loongarch/kernel/head.final_o arch/m68k/68000/head.o arch/m68k/coldfire/head.o arch/m68k/kernel/head.o From 1acfa9f53cc598f79eda0adf6e3e0fed7077dfce Mon Sep 17 00:00:00 2001 From: Rong Xu Date: Wed, 7 May 2025 13:55:04 -0700 Subject: [PATCH 2882/2924] kbuild: distributed build support for Clang ThinLTO Add distributed ThinLTO build support for the Linux kernel. This new mode offers several advantages: (1) Increased flexibility in handling user-specified build options. (2) Improved user-friendliness for developers. (3) Greater convenience for integrating with objtool and livepatch. Note that "distributed" in this context refers to a term that differentiates in-process ThinLTO builds by invoking backend compilation through the linker, not necessarily building in distributed environments. Distributed ThinLTO is enabled via the `CONFIG_LTO_CLANG_THIN_DIST` Kconfig option. For example: > make LLVM=1 defconfig > scripts/config -e LTO_CLANG_THIN_DIST > make LLVM=1 oldconfig > make LLVM=1 vmlinux -j <..> The implementation changes the top-level Makefile with a macro for generating `vmlinux.o` for distributed ThinLTO builds. It uses the existing Kbuild infrastructure to perform two recursive passes through the subdirectories. The first pass generates LLVM IR object files, similar to in-process ThinLTO. Following the thin-link stage, a second pass compiles these IR files into the final native object files. The build rules and actions for this two-pass process are primarily implemented in `scripts/Makefile.build`. Currently, this patch focuses on building the main kernel image (`vmlinux`) only. Support for building kernel modules using this method is planned for a subsequent patch. Tested on the following arch: x86, arm64, loongarch, and riscv. Some implementation details can be found here: https://discourse.llvm.org/t/rfc-distributed-thinlto-build-for-kernel/85934 Signed-off-by: Rong Xu --- .gitignore | 2 ++ MAINTAINERS | 5 +++ Makefile | 40 ++++++++++++++++++++--- arch/Kconfig | 19 +++++++++++ scripts/Makefile.build | 52 +++++++++++++++++++++++++++--- scripts/Makefile.lib | 7 +++- scripts/Makefile.vmlinux_o | 16 +++++++--- scripts/Makefile.vmlinux_thinlink | 53 +++++++++++++++++++++++++++++++ scripts/head-object-list.txt | 1 + 9 files changed, 180 insertions(+), 15 deletions(-) create mode 100644 scripts/Makefile.vmlinux_thinlink diff --git a/.gitignore b/.gitignore index f2f63e47fb8868..4ac41950311c8b 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ # .* *.a +*.a.thinlto_native *.asn1.[ch] *.bin *.bz2 @@ -64,6 +65,7 @@ modules.order /vmlinux /vmlinux.32 /vmlinux.map +/vmlinux.thinlink /vmlinux.symvers /vmlinux.unstripped /vmlinux-gdb.py diff --git a/MAINTAINERS b/MAINTAINERS index dd844ac8d9107b..4feb3b9f8f739d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5790,6 +5790,11 @@ F: scripts/Makefile.clang F: scripts/clang-tools/ K: \b(?i:clang|llvm)\b +CLANG/LLVM THINLTO DISTRIBUTED BUILD +M: Rong Xu +S: Supported +F: scripts/Makefile.vmlinux_thinlink + CLK API M: Russell King L: linux-clk@vger.kernel.org diff --git a/Makefile b/Makefile index 2d9d46275c4bf9..dca749cc3739b6 100644 --- a/Makefile +++ b/Makefile @@ -298,7 +298,8 @@ no-dot-config-targets := $(clean-targets) \ outputmakefile rustavailable rustfmt rustfmtcheck no-sync-config-targets := $(no-dot-config-targets) %install modules_sign kernelrelease \ image_name -single-targets := %.a %.i %.ko %.lds %.ll %.lst %.mod %.o %.rsi %.s %/ +single-targets := %.a %.i %.ko %.lds %.ll %.lst %.mod %.o %.rsi %.s %.o.thinlto.native \ + %.a.thinlto.native %.o.thinlto.bc %/ config-build := mixed-build := @@ -999,10 +1000,10 @@ export CC_FLAGS_SCS endif ifdef CONFIG_LTO_CLANG -ifdef CONFIG_LTO_CLANG_THIN -CC_FLAGS_LTO := -flto=thin -fsplit-lto-unit -else +ifdef CONFIG_LTO_CLANG_FULL CC_FLAGS_LTO := -flto +else # for CONFIG_LTO_CLANG_THIN or CONFIG_LTO_CLANG_THIN_DIST +CC_FLAGS_LTO := -flto=thin -fsplit-lto-unit endif CC_FLAGS_LTO += -fvisibility=hidden @@ -1221,8 +1222,34 @@ vmlinux.a: $(KBUILD_VMLINUX_OBJS) scripts/head-object-list.txt FORCE $(call if_changed,ar_vmlinux.a) PHONY += vmlinux_o +ifdef CONFIG_LTO_CLANG_THIN_DIST +vmlinux.thinlink: vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE + $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.vmlinux_thinlink +targets += vmlinux.thinlink + +vmlinux.a.thinlto.native := $(patsubst %.a,%.a.thinlto.native,$(KBUILD_VMLINUX_OBJS)) +quiet_cmd_ar_vmlinux.a.thinlto.native = AR $@ + cmd_ar_vmlinux.a.thinlto.native = \ + rm -f $@; \ + $(AR) cDPrST $@ $(vmlinux.a.thinlto.native); \ + $(AR) mPiT $$($(AR) t $@ | sed -n 1p) $@ $$($(AR) t $@ | grep -F -f $(srctree)/scripts/head-object-list.txt) + +define rule_gen_vmlinux.a.thinlto.native + +$(Q)$(MAKE) $(build)=. need-builtin=1 thinlto_final_pass=1 need-modorder=1 built-in.a.thinlto.native + $(call cmd_and_savecmd,ar_vmlinux.a.thinlto.native) +endef + +vmlinux.a.thinlto.native: vmlinux.thinlink scripts/head-object-list.txt FORCE + $(call if_changed_rule,gen_vmlinux.a.thinlto.native) + +targets += vmlinux.a.thinlto.native + +vmlinux_o: vmlinux.a.thinlto.native + $(Q)$(MAKE) thinlto_final_pass=1 -f $(srctree)/scripts/Makefile.vmlinux_o +else vmlinux_o: vmlinux.a $(KBUILD_VMLINUX_LIBS) $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.vmlinux_o +endif vmlinux.o modules.builtin.modinfo modules.builtin: vmlinux_o @: @@ -1580,7 +1607,8 @@ CLEAN_FILES += vmlinux.symvers modules-only.symvers \ modules.builtin.ranges vmlinux.o.map vmlinux.unstripped \ compile_commands.json rust/test \ rust-project.json .vmlinux.objs .vmlinux.export.c \ - .builtin-dtbs-list .builtin-dtb.S + .builtin-dtbs-list .builtin-dtb.S \ + .vmlinux_thinlto_bc_files vmlinux.thinlink # Directories & files removed with 'make mrproper' MRPROPER_FILES += include/config include/generated \ @@ -2031,6 +2059,8 @@ clean: $(clean-dirs) -o -name '*.symtypes' -o -name 'modules.order' \ -o -name '*.c.[012]*.*' \ -o -name '*.ll' \ + -o -name '*.a.thinlto.native' -o -name '*.o.thinlto.native' \ + -o -name '*.o.thinlto.bc' \ -o -name '*.gcno' \ \) -type f -print \ -o -name '.tmp_*' -print \ diff --git a/arch/Kconfig b/arch/Kconfig index b0adb665041f17..5ed243ded6efaa 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -810,6 +810,25 @@ config LTO_CLANG_THIN https://clang.llvm.org/docs/ThinLTO.html If unsure, say Y. + +config LTO_CLANG_THIN_DIST + bool "Clang ThinLTO in distributed mode (EXPERIMENTAL)" + depends on HAS_LTO_CLANG && ARCH_SUPPORTS_LTO_CLANG_THIN + select LTO_CLANG + help + This option enables Clang's ThinLTO in distributed build mode. + In this mode, the linker performs the thin-link, generating + ThinLTO index files. Subsequently, the build system explicitly + invokes ThinLTO backend compilation using these index files + and pre-linked IR objects. The resulting native object files + are with the .o.thinlto.native suffix. + + This build mode offers improved visibility into the ThinLTO + process through explicit subcommand exposure. It also makes + final native object files directly available, benefiting + tools like objtool and kpatch. Additionally, it provides + crucial granular control over back-end options, enabling + module-specific compiler options, and simplifies debugging. endchoice config ARCH_SUPPORTS_AUTOFDO_CLANG diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 13dcd86e74ca83..49ea3934e80bc1 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -50,18 +50,23 @@ endif # =========================================================================== +builtin_suffix := $(if $(filter %.a.thinlto.native, $(MAKECMDGOALS)),.a.thinlto.native,.a) +ifeq ($(thinlto_final_pass),1) +builtin_suffix :=.a.thinlto.native +endif + # subdir-builtin and subdir-modorder may contain duplications. Use $(sort ...) -subdir-builtin := $(sort $(filter %/built-in.a, $(real-obj-y))) +subdir-builtin := $(sort $(filter %/built-in$(builtin_suffix), $(real-obj-y))) subdir-modorder := $(sort $(filter %/modules.order, $(obj-m))) targets-for-builtin := $(extra-y) ifneq ($(strip $(lib-y) $(lib-m) $(lib-)),) -targets-for-builtin += $(obj)/lib.a +targets-for-builtin += $(obj)/lib$(builtin_suffix) endif ifdef need-builtin -targets-for-builtin += $(obj)/built-in.a +targets-for-builtin += $(obj)/built-in$(builtin_suffix) endif targets-for-modules := $(foreach x, o mod, \ @@ -337,6 +342,10 @@ $(obj)/%.o: $(obj)/%.S FORCE targets += $(filter-out $(subdir-builtin), $(real-obj-y)) targets += $(filter-out $(subdir-modorder), $(real-obj-m)) targets += $(lib-y) $(always-y) +ifeq ($(builtin_suffix),.a.thinlto.native) +native_targets = $(patsubst,%.o,%.o.thinlto.native,$(targets)) +targets += $(native_targets) +endif # Linker scripts preprocessor (.lds.S -> .lds) # --------------------------------------------------------------------------- @@ -347,6 +356,24 @@ quiet_cmd_cpp_lds_S = LDS $@ $(obj)/%.lds: $(src)/%.lds.S FORCE $(call if_changed_dep,cpp_lds_S) +ifdef CONFIG_LTO_CLANG_THIN_DIST +# Generate .o.thinlto.native (obj) from .o (bitcode) file +# --------------------------------------------------------------------------- +quiet_cmd_cc_o_bc = CC $(quiet_modtag) $@ + +cmd_cc_o_bc = $(if $(filter bitcode, $(shell file -b $<)),$(CC) \ + $(filter-out -Wp% $(LINUXINCLUDE) %.h.gch %.h -D% \ + -flto=thin, $(c_flags)) \ + -Wno-unused-command-line-argument \ + -x ir -fthinlto-index=$<.thinlto.bc -c -o $@ \ + $(if $(findstring ../,$<), \ + $$(realpath --relative-to=$(srcroot) $<), $<), \ + cp $< $@) + +$(obj)/%.o.thinlto.native: $(obj)/%.o FORCE + $(call if_changed,cc_o_bc) +endif + # ASN.1 grammar # --------------------------------------------------------------------------- quiet_cmd_asn1_compiler = ASN.1 $(basename $@).[ch] @@ -360,7 +387,7 @@ $(obj)/%.asn1.c $(obj)/%.asn1.h: $(src)/%.asn1 $(objtree)/scripts/asn1_compiler # --------------------------------------------------------------------------- # To build objects in subdirs, we need to descend into the directories -$(subdir-builtin): $(obj)/%/built-in.a: $(obj)/% ; +$(subdir-builtin): $(obj)/%/built-in$(builtin_suffix): $(obj)/% ; $(subdir-modorder): $(obj)/%/modules.order: $(obj)/% ; # @@ -377,6 +404,12 @@ quiet_cmd_ar_builtin = AR $@ $(obj)/built-in.a: $(real-obj-y) FORCE $(call if_changed,ar_builtin) +ifdef CONFIG_LTO_CLANG_THIN_DIST +# Rule to compile a set of .o.thinlto.native files into one .a.thinlto.native file. +$(obj)/built-in.a.thinlto.native: $(patsubst %.o,%.o.thinlto.native,$(real-obj-y)) FORCE + $(call if_changed,ar_builtin) +endif + # This is a list of build artifacts from the current Makefile and its # sub-directories. The timestamp should be updated when any of the member files. @@ -394,6 +427,14 @@ $(obj)/modules.order: $(obj-m) FORCE $(obj)/lib.a: $(lib-y) FORCE $(call if_changed,ar) +ifdef CONFIG_LTO_CLANG_THIN_DIST +quiet_cmd_ar_native = AR $@ + cmd_ar_native = rm -f $@; $(AR) cDPrsT $@ $(patsubst %.o,%.o.thinlto.native,$(real-prereqs)) + +$(obj)/lib.a.thinlto.native: $(patsubst %.o,%.o.thinlto.native,$(lib-y)) FORCE + $(call if_changed,ar_native) +endif + quiet_cmd_ld_multi_m = LD [M] $@ cmd_ld_multi_m = $(LD) $(ld_flags) -r -o $@ @$< $(cmd_objtool) @@ -459,7 +500,8 @@ $(single-subdir-goals): $(single-subdirs) PHONY += $(subdir-ym) $(subdir-ym): $(Q)$(MAKE) $(build)=$@ \ - need-builtin=$(if $(filter $@/built-in.a, $(subdir-builtin)),1) \ + need-builtin=$(if $(filter $@/built-in$(builtin_suffix), $(subdir-builtin)),1) \ + thinlto_final_pass=$(if $(filter .a.thinlto.native, $(builtin_suffix)),1) \ need-modorder=$(if $(filter $@/modules.order, $(subdir-modorder)),1) \ $(filter $@/%, $(single-subdir-goals)) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 2fe73cda0bddb9..8e146367f7d163 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -34,8 +34,13 @@ else obj-m := $(filter-out %/, $(obj-m)) endif +builtin_suffix := $(if $(filter %.a.thinlto.native, $(MAKECMDGOALS)),.a.thinlto.native,.a) +ifeq ($(thinlto_final_pass),1) + builtin_suffix :=.a.thinlto.native +endif + ifdef need-builtin -obj-y := $(patsubst %/, %/built-in.a, $(obj-y)) +obj-y := $(patsubst %/, %/built-in$(builtin_suffix), $(obj-y)) else obj-y := $(filter-out %/, $(obj-y)) endif diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o index b024ffb3e20187..0ebdc2b22d3878 100644 --- a/scripts/Makefile.vmlinux_o +++ b/scripts/Makefile.vmlinux_o @@ -9,6 +9,14 @@ include $(srctree)/scripts/Kbuild.include # for objtool include $(srctree)/scripts/Makefile.lib +ifeq ($(thinlto_final_pass),1) +vmlinux_a := vmlinux.a.thinlto.native +vmlinux_libs := $(patsubst %.a,%.a.thinlto.native,$(KBUILD_VMLINUX_LIBS)) +else +vmlinux_a := vmlinux.a +vmlinux_libs := $(KBUILD_VMLINUX_LIBS) +endif + # Generate a linker script to ensure correct ordering of initcalls for Clang LTO # --------------------------------------------------------------------------- @@ -18,7 +26,7 @@ quiet_cmd_gen_initcalls_lds = GEN $@ $(PERL) $(real-prereqs) > $@ .tmp_initcalls.lds: $(srctree)/scripts/generate_initcall_order.pl \ - vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE + $(vmlinux_a) $(vmlinux_libs) FORCE $(call if_changed,gen_initcalls_lds) targets := .tmp_initcalls.lds @@ -59,8 +67,8 @@ quiet_cmd_ld_vmlinux.o = LD $@ $(LD) ${KBUILD_LDFLAGS} -r -o $@ \ $(vmlinux-o-ld-args-y) \ $(addprefix -T , $(initcalls-lds)) \ - --whole-archive vmlinux.a --no-whole-archive \ - --start-group $(KBUILD_VMLINUX_LIBS) --end-group \ + --whole-archive $(vmlinux_a) --no-whole-archive \ + --start-group $(vmlinux_libs) --end-group \ $(cmd_objtool) define rule_ld_vmlinux.o @@ -68,7 +76,7 @@ define rule_ld_vmlinux.o $(call cmd,gen_objtooldep) endef -vmlinux.o: $(initcalls-lds) vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE +vmlinux.o: $(initcalls-lds) $(vmlinux_a) $(vmlinux_libs) FORCE $(call if_changed_rule,ld_vmlinux.o) targets += vmlinux.o diff --git a/scripts/Makefile.vmlinux_thinlink b/scripts/Makefile.vmlinux_thinlink new file mode 100644 index 00000000000000..13e4026c7d45bd --- /dev/null +++ b/scripts/Makefile.vmlinux_thinlink @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: GPL-2.0-only + +PHONY := __default +__default: vmlinux.thinlink + +include include/config/auto.conf +include $(srctree)/scripts/Kbuild.include + + +# Generate a linker script to ensure correct ordering of initcalls for Clang LTO +# --------------------------------------------------------------------------- + +quiet_cmd_gen_initcalls_lds = GEN $@ + cmd_gen_initcalls_lds = \ + $(PYTHON3) $(srctree)/scripts/jobserver-exec \ + $(PERL) $(real-prereqs) > $@ + +.tmp_initcalls_thinlink.lds: $(srctree)/scripts/generate_initcall_order.pl \ + vmlinux.a FORCE + $(call if_changed,gen_initcalls_lds) + +targets := .tmp_initcalls_thinlink.lds + +initcalls-lds := .tmp_initcalls_thinlink.lds + +quiet_cmd_ld_vmlinux.thinlink = LD $@ + cmd_ld_vmlinux.thinlink = \ + $(AR) t vmlinux.a > .vmlinux_thinlto_bc_files; \ + $(LD) ${KBUILD_LDFLAGS} -r $(addprefix -T , $(initcalls-lds)) \ + --thinlto-index-only @.vmlinux_thinlto_bc_files; \ + touch vmlinux.thinlink + +vmlinux.thinlink: vmlinux.a $(initcalls-lds) FORCE + $(call if_changed,ld_vmlinux.thinlink) + +targets += vmlinux.thinlink + +# Add FORCE to the prerequisites of a target to force it to be always rebuilt. +# --------------------------------------------------------------------------- + +PHONY += FORCE +FORCE: + +# Read all saved command lines and dependencies for the $(targets) we +# may be building above, using $(if_changed{,_dep}). As an +# optimization, we don't need to read them if the target does not +# exist, we will rebuild anyway in that case. + +existing-targets := $(wildcard $(sort $(targets))) + +-include $(foreach f,$(existing-targets),$(dir $(f)).$(notdir $(f)).cmd) + +.PHONY: $(PHONY) diff --git a/scripts/head-object-list.txt b/scripts/head-object-list.txt index 7274dfc65af606..094a4e7c075597 100644 --- a/scripts/head-object-list.txt +++ b/scripts/head-object-list.txt @@ -18,6 +18,7 @@ arch/arm/kernel/head.o arch/csky/kernel/head.o arch/hexagon/kernel/head.o arch/loongarch/kernel/head.o +arch/loongarch/kernel/head.o.thinlto.native arch/m68k/68000/head.o arch/m68k/coldfire/head.o arch/m68k/kernel/head.o From 37ea72a859d5969360adae41b8026d2aedfbbf6b Mon Sep 17 00:00:00 2001 From: Eric Naim Date: Wed, 21 May 2025 22:24:18 +0800 Subject: [PATCH 2883/2924] ADIOS v1.5.8 --- block/adios.c | 15 ++++++++++----- block/elevator.c | 3 +-- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/block/adios.c b/block/adios.c index 4b869dfcc750f8..f008d28cd059d7 100644 --- a/block/adios.c +++ b/block/adios.c @@ -21,7 +21,7 @@ #include "blk-mq.h" #include "blk-mq-sched.h" -#define ADIOS_VERSION "1.5.7" +#define ADIOS_VERSION "1.5.8" // Define operation types supported by ADIOS enum adios_op_type { @@ -134,6 +134,8 @@ struct adios_data { struct kmem_cache *rq_data_pool; struct kmem_cache *dl_group_pool; + + struct request_queue *queue; }; // List of requests with the same deadline in the deadline-sorted tree @@ -759,7 +761,7 @@ static bool fill_batch_queues(struct adios_data *ad, u64 current_lat) { current_lat += rd->pred_lat; // Check batch size and total predicted latency - if (count && (!ad->latency_model[optype].base || + if (count && (!ad->latency_model[optype].base || ad->batch_count[page][optype] >= ad->batch_limit[optype] || current_lat > ad->global_latency_window)) { break; @@ -993,6 +995,9 @@ static int adios_init_sched(struct request_queue *q, struct elevator_type *e) { /* We dispatch from request queue wide instead of hw queue */ blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q); + ad->queue = q; + blk_stat_enable_accounting(q); + q->elevator = eq; return 0; @@ -1019,6 +1024,8 @@ static void adios_exit_sched(struct elevator_queue *e) { if (ad->dl_group_pool) kmem_cache_destroy(ad->dl_group_pool); + blk_stat_disable_accounting(ad->queue); + kfree(ad); } @@ -1308,8 +1315,6 @@ static struct elevator_type mq_adios = { .init_sched = adios_init_sched, .exit_sched = adios_exit_sched, }, -#ifdef CONFIG_BLK_DEBUG_FS -#endif .elevator_attrs = adios_sched_attrs, .elevator_name = "adios", .elevator_owner = THIS_MODULE, @@ -1336,4 +1341,4 @@ module_exit(adios_exit); MODULE_AUTHOR(ADIOS_AUTHOR); MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION(ADIOS_PROGNAME); +MODULE_DESCRIPTION(ADIOS_PROGNAME); \ No newline at end of file diff --git a/block/elevator.c b/block/elevator.c index fc167480b7399a..6757ed11bf608f 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -558,7 +558,7 @@ static struct elevator_type *elevator_get_default(struct request_queue *q) #ifdef CONFIG_MQ_IOSCHED_DEFAULT_ADIOS return elevator_find_get("adios"); -#else // !CONFIG_MQ_IOSCHED_DEFAULT_ADIOS +#endif // CONFIG_MQ_IOSCHED_DEFAULT_ADIOS if (q->nr_hw_queues != 1 && !blk_mq_is_shared_tags(q->tag_set->flags)) @@ -573,7 +573,6 @@ static struct elevator_type *elevator_get_default(struct request_queue *q) #else return elevator_find_get("mq-deadline"); #endif -#endif // CONFIG_MQ_IOSCHED_DEFAULT_ADIOS } /* From 19ef04b9a97b4c647c0a7c4f3bd612f32c623993 Mon Sep 17 00:00:00 2001 From: Eric Naim Date: Thu, 22 May 2025 13:43:52 +0800 Subject: [PATCH 2884/2924] Revert "kbuild: distributed build support for Clang ThinLTO" This reverts commit 46283ff7f83391d121ea4cae5024b7729a540f69. Signed-off-by: Eric Naim --- .gitignore | 2 -- MAINTAINERS | 5 --- Makefile | 40 +++-------------------- arch/Kconfig | 19 ----------- scripts/Makefile.build | 52 +++--------------------------- scripts/Makefile.lib | 7 +--- scripts/Makefile.vmlinux_o | 16 +++------- scripts/Makefile.vmlinux_thinlink | 53 ------------------------------- scripts/head-object-list.txt | 1 - 9 files changed, 15 insertions(+), 180 deletions(-) delete mode 100644 scripts/Makefile.vmlinux_thinlink diff --git a/.gitignore b/.gitignore index 4ac41950311c8b..f2f63e47fb8868 100644 --- a/.gitignore +++ b/.gitignore @@ -12,7 +12,6 @@ # .* *.a -*.a.thinlto_native *.asn1.[ch] *.bin *.bz2 @@ -65,7 +64,6 @@ modules.order /vmlinux /vmlinux.32 /vmlinux.map -/vmlinux.thinlink /vmlinux.symvers /vmlinux.unstripped /vmlinux-gdb.py diff --git a/MAINTAINERS b/MAINTAINERS index 4feb3b9f8f739d..dd844ac8d9107b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5790,11 +5790,6 @@ F: scripts/Makefile.clang F: scripts/clang-tools/ K: \b(?i:clang|llvm)\b -CLANG/LLVM THINLTO DISTRIBUTED BUILD -M: Rong Xu -S: Supported -F: scripts/Makefile.vmlinux_thinlink - CLK API M: Russell King L: linux-clk@vger.kernel.org diff --git a/Makefile b/Makefile index dca749cc3739b6..2d9d46275c4bf9 100644 --- a/Makefile +++ b/Makefile @@ -298,8 +298,7 @@ no-dot-config-targets := $(clean-targets) \ outputmakefile rustavailable rustfmt rustfmtcheck no-sync-config-targets := $(no-dot-config-targets) %install modules_sign kernelrelease \ image_name -single-targets := %.a %.i %.ko %.lds %.ll %.lst %.mod %.o %.rsi %.s %.o.thinlto.native \ - %.a.thinlto.native %.o.thinlto.bc %/ +single-targets := %.a %.i %.ko %.lds %.ll %.lst %.mod %.o %.rsi %.s %/ config-build := mixed-build := @@ -1000,10 +999,10 @@ export CC_FLAGS_SCS endif ifdef CONFIG_LTO_CLANG -ifdef CONFIG_LTO_CLANG_FULL -CC_FLAGS_LTO := -flto -else # for CONFIG_LTO_CLANG_THIN or CONFIG_LTO_CLANG_THIN_DIST +ifdef CONFIG_LTO_CLANG_THIN CC_FLAGS_LTO := -flto=thin -fsplit-lto-unit +else +CC_FLAGS_LTO := -flto endif CC_FLAGS_LTO += -fvisibility=hidden @@ -1222,34 +1221,8 @@ vmlinux.a: $(KBUILD_VMLINUX_OBJS) scripts/head-object-list.txt FORCE $(call if_changed,ar_vmlinux.a) PHONY += vmlinux_o -ifdef CONFIG_LTO_CLANG_THIN_DIST -vmlinux.thinlink: vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE - $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.vmlinux_thinlink -targets += vmlinux.thinlink - -vmlinux.a.thinlto.native := $(patsubst %.a,%.a.thinlto.native,$(KBUILD_VMLINUX_OBJS)) -quiet_cmd_ar_vmlinux.a.thinlto.native = AR $@ - cmd_ar_vmlinux.a.thinlto.native = \ - rm -f $@; \ - $(AR) cDPrST $@ $(vmlinux.a.thinlto.native); \ - $(AR) mPiT $$($(AR) t $@ | sed -n 1p) $@ $$($(AR) t $@ | grep -F -f $(srctree)/scripts/head-object-list.txt) - -define rule_gen_vmlinux.a.thinlto.native - +$(Q)$(MAKE) $(build)=. need-builtin=1 thinlto_final_pass=1 need-modorder=1 built-in.a.thinlto.native - $(call cmd_and_savecmd,ar_vmlinux.a.thinlto.native) -endef - -vmlinux.a.thinlto.native: vmlinux.thinlink scripts/head-object-list.txt FORCE - $(call if_changed_rule,gen_vmlinux.a.thinlto.native) - -targets += vmlinux.a.thinlto.native - -vmlinux_o: vmlinux.a.thinlto.native - $(Q)$(MAKE) thinlto_final_pass=1 -f $(srctree)/scripts/Makefile.vmlinux_o -else vmlinux_o: vmlinux.a $(KBUILD_VMLINUX_LIBS) $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.vmlinux_o -endif vmlinux.o modules.builtin.modinfo modules.builtin: vmlinux_o @: @@ -1607,8 +1580,7 @@ CLEAN_FILES += vmlinux.symvers modules-only.symvers \ modules.builtin.ranges vmlinux.o.map vmlinux.unstripped \ compile_commands.json rust/test \ rust-project.json .vmlinux.objs .vmlinux.export.c \ - .builtin-dtbs-list .builtin-dtb.S \ - .vmlinux_thinlto_bc_files vmlinux.thinlink + .builtin-dtbs-list .builtin-dtb.S # Directories & files removed with 'make mrproper' MRPROPER_FILES += include/config include/generated \ @@ -2059,8 +2031,6 @@ clean: $(clean-dirs) -o -name '*.symtypes' -o -name 'modules.order' \ -o -name '*.c.[012]*.*' \ -o -name '*.ll' \ - -o -name '*.a.thinlto.native' -o -name '*.o.thinlto.native' \ - -o -name '*.o.thinlto.bc' \ -o -name '*.gcno' \ \) -type f -print \ -o -name '.tmp_*' -print \ diff --git a/arch/Kconfig b/arch/Kconfig index 5ed243ded6efaa..b0adb665041f17 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -810,25 +810,6 @@ config LTO_CLANG_THIN https://clang.llvm.org/docs/ThinLTO.html If unsure, say Y. - -config LTO_CLANG_THIN_DIST - bool "Clang ThinLTO in distributed mode (EXPERIMENTAL)" - depends on HAS_LTO_CLANG && ARCH_SUPPORTS_LTO_CLANG_THIN - select LTO_CLANG - help - This option enables Clang's ThinLTO in distributed build mode. - In this mode, the linker performs the thin-link, generating - ThinLTO index files. Subsequently, the build system explicitly - invokes ThinLTO backend compilation using these index files - and pre-linked IR objects. The resulting native object files - are with the .o.thinlto.native suffix. - - This build mode offers improved visibility into the ThinLTO - process through explicit subcommand exposure. It also makes - final native object files directly available, benefiting - tools like objtool and kpatch. Additionally, it provides - crucial granular control over back-end options, enabling - module-specific compiler options, and simplifies debugging. endchoice config ARCH_SUPPORTS_AUTOFDO_CLANG diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 49ea3934e80bc1..13dcd86e74ca83 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -50,23 +50,18 @@ endif # =========================================================================== -builtin_suffix := $(if $(filter %.a.thinlto.native, $(MAKECMDGOALS)),.a.thinlto.native,.a) -ifeq ($(thinlto_final_pass),1) -builtin_suffix :=.a.thinlto.native -endif - # subdir-builtin and subdir-modorder may contain duplications. Use $(sort ...) -subdir-builtin := $(sort $(filter %/built-in$(builtin_suffix), $(real-obj-y))) +subdir-builtin := $(sort $(filter %/built-in.a, $(real-obj-y))) subdir-modorder := $(sort $(filter %/modules.order, $(obj-m))) targets-for-builtin := $(extra-y) ifneq ($(strip $(lib-y) $(lib-m) $(lib-)),) -targets-for-builtin += $(obj)/lib$(builtin_suffix) +targets-for-builtin += $(obj)/lib.a endif ifdef need-builtin -targets-for-builtin += $(obj)/built-in$(builtin_suffix) +targets-for-builtin += $(obj)/built-in.a endif targets-for-modules := $(foreach x, o mod, \ @@ -342,10 +337,6 @@ $(obj)/%.o: $(obj)/%.S FORCE targets += $(filter-out $(subdir-builtin), $(real-obj-y)) targets += $(filter-out $(subdir-modorder), $(real-obj-m)) targets += $(lib-y) $(always-y) -ifeq ($(builtin_suffix),.a.thinlto.native) -native_targets = $(patsubst,%.o,%.o.thinlto.native,$(targets)) -targets += $(native_targets) -endif # Linker scripts preprocessor (.lds.S -> .lds) # --------------------------------------------------------------------------- @@ -356,24 +347,6 @@ quiet_cmd_cpp_lds_S = LDS $@ $(obj)/%.lds: $(src)/%.lds.S FORCE $(call if_changed_dep,cpp_lds_S) -ifdef CONFIG_LTO_CLANG_THIN_DIST -# Generate .o.thinlto.native (obj) from .o (bitcode) file -# --------------------------------------------------------------------------- -quiet_cmd_cc_o_bc = CC $(quiet_modtag) $@ - -cmd_cc_o_bc = $(if $(filter bitcode, $(shell file -b $<)),$(CC) \ - $(filter-out -Wp% $(LINUXINCLUDE) %.h.gch %.h -D% \ - -flto=thin, $(c_flags)) \ - -Wno-unused-command-line-argument \ - -x ir -fthinlto-index=$<.thinlto.bc -c -o $@ \ - $(if $(findstring ../,$<), \ - $$(realpath --relative-to=$(srcroot) $<), $<), \ - cp $< $@) - -$(obj)/%.o.thinlto.native: $(obj)/%.o FORCE - $(call if_changed,cc_o_bc) -endif - # ASN.1 grammar # --------------------------------------------------------------------------- quiet_cmd_asn1_compiler = ASN.1 $(basename $@).[ch] @@ -387,7 +360,7 @@ $(obj)/%.asn1.c $(obj)/%.asn1.h: $(src)/%.asn1 $(objtree)/scripts/asn1_compiler # --------------------------------------------------------------------------- # To build objects in subdirs, we need to descend into the directories -$(subdir-builtin): $(obj)/%/built-in$(builtin_suffix): $(obj)/% ; +$(subdir-builtin): $(obj)/%/built-in.a: $(obj)/% ; $(subdir-modorder): $(obj)/%/modules.order: $(obj)/% ; # @@ -404,12 +377,6 @@ quiet_cmd_ar_builtin = AR $@ $(obj)/built-in.a: $(real-obj-y) FORCE $(call if_changed,ar_builtin) -ifdef CONFIG_LTO_CLANG_THIN_DIST -# Rule to compile a set of .o.thinlto.native files into one .a.thinlto.native file. -$(obj)/built-in.a.thinlto.native: $(patsubst %.o,%.o.thinlto.native,$(real-obj-y)) FORCE - $(call if_changed,ar_builtin) -endif - # This is a list of build artifacts from the current Makefile and its # sub-directories. The timestamp should be updated when any of the member files. @@ -427,14 +394,6 @@ $(obj)/modules.order: $(obj-m) FORCE $(obj)/lib.a: $(lib-y) FORCE $(call if_changed,ar) -ifdef CONFIG_LTO_CLANG_THIN_DIST -quiet_cmd_ar_native = AR $@ - cmd_ar_native = rm -f $@; $(AR) cDPrsT $@ $(patsubst %.o,%.o.thinlto.native,$(real-prereqs)) - -$(obj)/lib.a.thinlto.native: $(patsubst %.o,%.o.thinlto.native,$(lib-y)) FORCE - $(call if_changed,ar_native) -endif - quiet_cmd_ld_multi_m = LD [M] $@ cmd_ld_multi_m = $(LD) $(ld_flags) -r -o $@ @$< $(cmd_objtool) @@ -500,8 +459,7 @@ $(single-subdir-goals): $(single-subdirs) PHONY += $(subdir-ym) $(subdir-ym): $(Q)$(MAKE) $(build)=$@ \ - need-builtin=$(if $(filter $@/built-in$(builtin_suffix), $(subdir-builtin)),1) \ - thinlto_final_pass=$(if $(filter .a.thinlto.native, $(builtin_suffix)),1) \ + need-builtin=$(if $(filter $@/built-in.a, $(subdir-builtin)),1) \ need-modorder=$(if $(filter $@/modules.order, $(subdir-modorder)),1) \ $(filter $@/%, $(single-subdir-goals)) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 8e146367f7d163..2fe73cda0bddb9 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -34,13 +34,8 @@ else obj-m := $(filter-out %/, $(obj-m)) endif -builtin_suffix := $(if $(filter %.a.thinlto.native, $(MAKECMDGOALS)),.a.thinlto.native,.a) -ifeq ($(thinlto_final_pass),1) - builtin_suffix :=.a.thinlto.native -endif - ifdef need-builtin -obj-y := $(patsubst %/, %/built-in$(builtin_suffix), $(obj-y)) +obj-y := $(patsubst %/, %/built-in.a, $(obj-y)) else obj-y := $(filter-out %/, $(obj-y)) endif diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o index 0ebdc2b22d3878..b024ffb3e20187 100644 --- a/scripts/Makefile.vmlinux_o +++ b/scripts/Makefile.vmlinux_o @@ -9,14 +9,6 @@ include $(srctree)/scripts/Kbuild.include # for objtool include $(srctree)/scripts/Makefile.lib -ifeq ($(thinlto_final_pass),1) -vmlinux_a := vmlinux.a.thinlto.native -vmlinux_libs := $(patsubst %.a,%.a.thinlto.native,$(KBUILD_VMLINUX_LIBS)) -else -vmlinux_a := vmlinux.a -vmlinux_libs := $(KBUILD_VMLINUX_LIBS) -endif - # Generate a linker script to ensure correct ordering of initcalls for Clang LTO # --------------------------------------------------------------------------- @@ -26,7 +18,7 @@ quiet_cmd_gen_initcalls_lds = GEN $@ $(PERL) $(real-prereqs) > $@ .tmp_initcalls.lds: $(srctree)/scripts/generate_initcall_order.pl \ - $(vmlinux_a) $(vmlinux_libs) FORCE + vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE $(call if_changed,gen_initcalls_lds) targets := .tmp_initcalls.lds @@ -67,8 +59,8 @@ quiet_cmd_ld_vmlinux.o = LD $@ $(LD) ${KBUILD_LDFLAGS} -r -o $@ \ $(vmlinux-o-ld-args-y) \ $(addprefix -T , $(initcalls-lds)) \ - --whole-archive $(vmlinux_a) --no-whole-archive \ - --start-group $(vmlinux_libs) --end-group \ + --whole-archive vmlinux.a --no-whole-archive \ + --start-group $(KBUILD_VMLINUX_LIBS) --end-group \ $(cmd_objtool) define rule_ld_vmlinux.o @@ -76,7 +68,7 @@ define rule_ld_vmlinux.o $(call cmd,gen_objtooldep) endef -vmlinux.o: $(initcalls-lds) $(vmlinux_a) $(vmlinux_libs) FORCE +vmlinux.o: $(initcalls-lds) vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE $(call if_changed_rule,ld_vmlinux.o) targets += vmlinux.o diff --git a/scripts/Makefile.vmlinux_thinlink b/scripts/Makefile.vmlinux_thinlink deleted file mode 100644 index 13e4026c7d45bd..00000000000000 --- a/scripts/Makefile.vmlinux_thinlink +++ /dev/null @@ -1,53 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -PHONY := __default -__default: vmlinux.thinlink - -include include/config/auto.conf -include $(srctree)/scripts/Kbuild.include - - -# Generate a linker script to ensure correct ordering of initcalls for Clang LTO -# --------------------------------------------------------------------------- - -quiet_cmd_gen_initcalls_lds = GEN $@ - cmd_gen_initcalls_lds = \ - $(PYTHON3) $(srctree)/scripts/jobserver-exec \ - $(PERL) $(real-prereqs) > $@ - -.tmp_initcalls_thinlink.lds: $(srctree)/scripts/generate_initcall_order.pl \ - vmlinux.a FORCE - $(call if_changed,gen_initcalls_lds) - -targets := .tmp_initcalls_thinlink.lds - -initcalls-lds := .tmp_initcalls_thinlink.lds - -quiet_cmd_ld_vmlinux.thinlink = LD $@ - cmd_ld_vmlinux.thinlink = \ - $(AR) t vmlinux.a > .vmlinux_thinlto_bc_files; \ - $(LD) ${KBUILD_LDFLAGS} -r $(addprefix -T , $(initcalls-lds)) \ - --thinlto-index-only @.vmlinux_thinlto_bc_files; \ - touch vmlinux.thinlink - -vmlinux.thinlink: vmlinux.a $(initcalls-lds) FORCE - $(call if_changed,ld_vmlinux.thinlink) - -targets += vmlinux.thinlink - -# Add FORCE to the prerequisites of a target to force it to be always rebuilt. -# --------------------------------------------------------------------------- - -PHONY += FORCE -FORCE: - -# Read all saved command lines and dependencies for the $(targets) we -# may be building above, using $(if_changed{,_dep}). As an -# optimization, we don't need to read them if the target does not -# exist, we will rebuild anyway in that case. - -existing-targets := $(wildcard $(sort $(targets))) - --include $(foreach f,$(existing-targets),$(dir $(f)).$(notdir $(f)).cmd) - -.PHONY: $(PHONY) diff --git a/scripts/head-object-list.txt b/scripts/head-object-list.txt index 094a4e7c075597..7274dfc65af606 100644 --- a/scripts/head-object-list.txt +++ b/scripts/head-object-list.txt @@ -18,7 +18,6 @@ arch/arm/kernel/head.o arch/csky/kernel/head.o arch/hexagon/kernel/head.o arch/loongarch/kernel/head.o -arch/loongarch/kernel/head.o.thinlto.native arch/m68k/68000/head.o arch/m68k/coldfire/head.o arch/m68k/kernel/head.o From 82a56cfa566cf64c84e979df5928b144e384e83b Mon Sep 17 00:00:00 2001 From: Eric Naim Date: Thu, 22 May 2025 13:44:11 +0800 Subject: [PATCH 2885/2924] Revert "Makefile.{autofdo,propeller}: Add support for Distributed ThinLTO" This reverts commit d2785aae0ea96da2756bdd7aa7fd52dcfe324857. This is only needed for in-process ThinLTO builds Link: https://lore.kernel.org/lkml/CAF1bQ=TQSQ75qCyQYegW7kq8WdyB-SNViraOuBvgGgAyUPS54Q@mail.gmail.com/ Signed-off-by: Eric Naim --- scripts/Makefile.autofdo | 2 +- scripts/Makefile.propeller | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/Makefile.autofdo b/scripts/Makefile.autofdo index daca19a1753342..1caf2457e585c0 100644 --- a/scripts/Makefile.autofdo +++ b/scripts/Makefile.autofdo @@ -13,7 +13,7 @@ ifdef CLANG_AUTOFDO_PROFILE CFLAGS_AUTOFDO_CLANG += -fsplit-machine-functions endif -ifeq ($(or $(CONFIG_LTO_CLANG_THIN),$(CONFIG_LTO_CLANG_THIN_DIST)),y) +ifdef CONFIG_LTO_CLANG_THIN ifdef CLANG_AUTOFDO_PROFILE KBUILD_LDFLAGS += --lto-sample-profile=$(CLANG_AUTOFDO_PROFILE) endif diff --git a/scripts/Makefile.propeller b/scripts/Makefile.propeller index 46a59e160c0362..48a660128e256d 100644 --- a/scripts/Makefile.propeller +++ b/scripts/Makefile.propeller @@ -24,7 +24,7 @@ ifndef CONFIG_DEBUG_INFO endif endif -ifeq ($(or $(CONFIG_LTO_CLANG_THIN),$(CONFIG_LTO_CLANG_THIN_DIST)),y) +ifdef CONFIG_LTO_CLANG_THIN ifdef CLANG_PROPELLER_PROFILE_PREFIX KBUILD_LDFLAGS += --lto-basic-block-sections=$(CLANG_PROPELLER_PROFILE_PREFIX)_cc_profile.txt else From 91f2a36152062533df8e30cd52259bf932f549ed Mon Sep 17 00:00:00 2001 From: Rong Xu Date: Wed, 21 May 2025 14:35:34 -0700 Subject: [PATCH 2886/2924] kbuild: distributed build support for Clang ThinLTO Add distributed ThinLTO build support for the Linux kernel. This new mode offers several advantages: (1) Increased flexibility in handling user-specified build options. (2) Improved user-friendliness for developers. (3) Greater convenience for integrating with objtool and livepatch. Note that "distributed" in this context refers to a term that differentiates in-process ThinLTO builds by invoking backend compilation through the linker, not necessarily building in distributed environments. Distributed ThinLTO is enabled via the `CONFIG_LTO_CLANG_THIN_DIST` Kconfig option. For example: > make LLVM=1 defconfig > scripts/config -e LTO_CLANG_THIN_DIST > make LLVM=1 oldconfig > make LLVM=1 vmlinux -j <..> The implementation changes the top-level Makefile with a macro for generating `vmlinux.o` for distributed ThinLTO builds. It uses the existing Kbuild infrastructure to perform two recursive passes through the subdirectories. The first pass generates LLVM IR object files, similar to in-process ThinLTO. Following the thin-link stage, a second pass compiles these IR files into the final native object files. The build rules and actions for this two-pass process are primarily implemented in `scripts/Makefile.build`. Currently, this patch focuses on building the main kernel image (`vmlinux`) only. Support for building kernel modules using this method is planned for a subsequent patch. Tested on the following arch: x86, arm64, loongarch, and riscv. Some implementation details can be found here: https://discourse.llvm.org/t/rfc-distributed-thinlto-build-for-kernel/85934 Signed-off-by: Rong Xu --- .gitignore | 3 ++ MAINTAINERS | 5 +++ Makefile | 40 ++++++++++++++++++++--- arch/Kconfig | 19 +++++++++++ scripts/Makefile.build | 52 +++++++++++++++++++++++++++--- scripts/Makefile.lib | 7 +++- scripts/Makefile.vmlinux_o | 16 +++++++--- scripts/Makefile.vmlinux_thinlink | 53 +++++++++++++++++++++++++++++++ scripts/head-object-list.txt | 1 + 9 files changed, 181 insertions(+), 15 deletions(-) create mode 100644 scripts/Makefile.vmlinux_thinlink diff --git a/.gitignore b/.gitignore index f2f63e47fb8868..b83a68185ef469 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ # .* *.a +*.a_thinlto_native *.asn1.[ch] *.bin *.bz2 @@ -39,6 +40,7 @@ *.mod.c *.o *.o.* +*.o_thinlto_native *.patch *.rmeta *.rpm @@ -64,6 +66,7 @@ modules.order /vmlinux /vmlinux.32 /vmlinux.map +/vmlinux.thinlink /vmlinux.symvers /vmlinux.unstripped /vmlinux-gdb.py diff --git a/MAINTAINERS b/MAINTAINERS index dd844ac8d9107b..4feb3b9f8f739d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5790,6 +5790,11 @@ F: scripts/Makefile.clang F: scripts/clang-tools/ K: \b(?i:clang|llvm)\b +CLANG/LLVM THINLTO DISTRIBUTED BUILD +M: Rong Xu +S: Supported +F: scripts/Makefile.vmlinux_thinlink + CLK API M: Russell King L: linux-clk@vger.kernel.org diff --git a/Makefile b/Makefile index 2d9d46275c4bf9..1c77bc7755b23b 100644 --- a/Makefile +++ b/Makefile @@ -298,7 +298,8 @@ no-dot-config-targets := $(clean-targets) \ outputmakefile rustavailable rustfmt rustfmtcheck no-sync-config-targets := $(no-dot-config-targets) %install modules_sign kernelrelease \ image_name -single-targets := %.a %.i %.ko %.lds %.ll %.lst %.mod %.o %.rsi %.s %/ +single-targets := %.a %.i %.ko %.lds %.ll %.lst %.mod %.o %.rsi %.s %.o_thinlto_native \ + %.a_thinlto_native %.o.thinlto.bc %/ config-build := mixed-build := @@ -999,10 +1000,10 @@ export CC_FLAGS_SCS endif ifdef CONFIG_LTO_CLANG -ifdef CONFIG_LTO_CLANG_THIN -CC_FLAGS_LTO := -flto=thin -fsplit-lto-unit -else +ifdef CONFIG_LTO_CLANG_FULL CC_FLAGS_LTO := -flto +else # for CONFIG_LTO_CLANG_THIN or CONFIG_LTO_CLANG_THIN_DIST +CC_FLAGS_LTO := -flto=thin -fsplit-lto-unit endif CC_FLAGS_LTO += -fvisibility=hidden @@ -1221,8 +1222,34 @@ vmlinux.a: $(KBUILD_VMLINUX_OBJS) scripts/head-object-list.txt FORCE $(call if_changed,ar_vmlinux.a) PHONY += vmlinux_o +ifdef CONFIG_LTO_CLANG_THIN_DIST +vmlinux.thinlink: vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE + $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.vmlinux_thinlink +targets += vmlinux.thinlink + +vmlinux.a_thinlto_native := $(patsubst %.a,%.a_thinlto_native,$(KBUILD_VMLINUX_OBJS)) +quiet_cmd_ar_vmlinux.a_thinlto_native = AR $@ + cmd_ar_vmlinux.a_thinlto_native = \ + rm -f $@; \ + $(AR) cDPrST $@ $(vmlinux.a_thinlto_native); \ + $(AR) mPiT $$($(AR) t $@ | sed -n 1p) $@ $$($(AR) t $@ | grep -F -f $(srctree)/scripts/head-object-list.txt) + +define rule_gen_vmlinux.a_thinlto_native + +$(Q)$(MAKE) $(build)=. need-builtin=1 thinlto_final_pass=1 need-modorder=1 built-in.a_thinlto_native + $(call cmd_and_savecmd,ar_vmlinux.a_thinlto_native) +endef + +vmlinux.a_thinlto_native: vmlinux.thinlink scripts/head-object-list.txt FORCE + $(call if_changed_rule,gen_vmlinux.a_thinlto_native) + +targets += vmlinux.a_thinlto_native + +vmlinux_o: vmlinux.a_thinlto_native + $(Q)$(MAKE) thinlto_final_pass=1 -f $(srctree)/scripts/Makefile.vmlinux_o +else vmlinux_o: vmlinux.a $(KBUILD_VMLINUX_LIBS) $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.vmlinux_o +endif vmlinux.o modules.builtin.modinfo modules.builtin: vmlinux_o @: @@ -1580,7 +1607,8 @@ CLEAN_FILES += vmlinux.symvers modules-only.symvers \ modules.builtin.ranges vmlinux.o.map vmlinux.unstripped \ compile_commands.json rust/test \ rust-project.json .vmlinux.objs .vmlinux.export.c \ - .builtin-dtbs-list .builtin-dtb.S + .builtin-dtbs-list .builtin-dtb.S \ + .vmlinux_thinlto_bc_files vmlinux.thinlink # Directories & files removed with 'make mrproper' MRPROPER_FILES += include/config include/generated \ @@ -2031,6 +2059,8 @@ clean: $(clean-dirs) -o -name '*.symtypes' -o -name 'modules.order' \ -o -name '*.c.[012]*.*' \ -o -name '*.ll' \ + -o -name '*.a_thinlto_native' -o -name '*.o_thinlto_native' \ + -o -name '*.o.thinlto.bc' \ -o -name '*.gcno' \ \) -type f -print \ -o -name '.tmp_*' -print \ diff --git a/arch/Kconfig b/arch/Kconfig index b0adb665041f17..30dccda07c6716 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -810,6 +810,25 @@ config LTO_CLANG_THIN https://clang.llvm.org/docs/ThinLTO.html If unsure, say Y. + +config LTO_CLANG_THIN_DIST + bool "Clang ThinLTO in distributed mode (EXPERIMENTAL)" + depends on HAS_LTO_CLANG && ARCH_SUPPORTS_LTO_CLANG_THIN + select LTO_CLANG + help + This option enables Clang's ThinLTO in distributed build mode. + In this mode, the linker performs the thin-link, generating + ThinLTO index files. Subsequently, the build system explicitly + invokes ThinLTO backend compilation using these index files + and pre-linked IR objects. The resulting native object files + are with the .o_thinlto_native suffix. + + This build mode offers improved visibility into the ThinLTO + process through explicit subcommand exposure. It also makes + final native object files directly available, benefiting + tools like objtool and kpatch. Additionally, it provides + crucial granular control over back-end options, enabling + module-specific compiler options, and simplifies debugging. endchoice config ARCH_SUPPORTS_AUTOFDO_CLANG diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 13dcd86e74ca83..338e1aec0eaa3f 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -50,18 +50,23 @@ endif # =========================================================================== +builtin_suffix := $(if $(filter %.a_thinlto_native, $(MAKECMDGOALS)),.a_thinlto_native,.a) +ifeq ($(thinlto_final_pass),1) +builtin_suffix :=.a_thinlto_native +endif + # subdir-builtin and subdir-modorder may contain duplications. Use $(sort ...) -subdir-builtin := $(sort $(filter %/built-in.a, $(real-obj-y))) +subdir-builtin := $(sort $(filter %/built-in$(builtin_suffix), $(real-obj-y))) subdir-modorder := $(sort $(filter %/modules.order, $(obj-m))) targets-for-builtin := $(extra-y) ifneq ($(strip $(lib-y) $(lib-m) $(lib-)),) -targets-for-builtin += $(obj)/lib.a +targets-for-builtin += $(obj)/lib$(builtin_suffix) endif ifdef need-builtin -targets-for-builtin += $(obj)/built-in.a +targets-for-builtin += $(obj)/built-in$(builtin_suffix) endif targets-for-modules := $(foreach x, o mod, \ @@ -337,6 +342,10 @@ $(obj)/%.o: $(obj)/%.S FORCE targets += $(filter-out $(subdir-builtin), $(real-obj-y)) targets += $(filter-out $(subdir-modorder), $(real-obj-m)) targets += $(lib-y) $(always-y) +ifeq ($(builtin_suffix),.a_thinlto_native) +native_targets = $(patsubst,%.o,%.o_thinlto_native,$(targets)) +targets += $(native_targets) +endif # Linker scripts preprocessor (.lds.S -> .lds) # --------------------------------------------------------------------------- @@ -347,6 +356,24 @@ quiet_cmd_cpp_lds_S = LDS $@ $(obj)/%.lds: $(src)/%.lds.S FORCE $(call if_changed_dep,cpp_lds_S) +ifdef CONFIG_LTO_CLANG_THIN_DIST +# Generate .o_thinlto_native (obj) from .o (bitcode) file +# --------------------------------------------------------------------------- +quiet_cmd_cc_o_bc = CC $(quiet_modtag) $@ + +cmd_cc_o_bc = $(if $(filter bitcode, $(shell file -b $<)),$(CC) \ + $(filter-out -Wp% $(LINUXINCLUDE) %.h.gch %.h -D% \ + -flto=thin, $(c_flags)) \ + -Wno-unused-command-line-argument \ + -x ir -fthinlto-index=$<.thinlto.bc -c -o $@ \ + $(if $(findstring ../,$<), \ + $$(realpath --relative-to=$(srcroot) $<), $<), \ + cp $< $@) + +$(obj)/%.o_thinlto_native: $(obj)/%.o FORCE + $(call if_changed,cc_o_bc) +endif + # ASN.1 grammar # --------------------------------------------------------------------------- quiet_cmd_asn1_compiler = ASN.1 $(basename $@).[ch] @@ -360,7 +387,7 @@ $(obj)/%.asn1.c $(obj)/%.asn1.h: $(src)/%.asn1 $(objtree)/scripts/asn1_compiler # --------------------------------------------------------------------------- # To build objects in subdirs, we need to descend into the directories -$(subdir-builtin): $(obj)/%/built-in.a: $(obj)/% ; +$(subdir-builtin): $(obj)/%/built-in$(builtin_suffix): $(obj)/% ; $(subdir-modorder): $(obj)/%/modules.order: $(obj)/% ; # @@ -377,6 +404,12 @@ quiet_cmd_ar_builtin = AR $@ $(obj)/built-in.a: $(real-obj-y) FORCE $(call if_changed,ar_builtin) +ifdef CONFIG_LTO_CLANG_THIN_DIST +# Rule to compile a set of .o_thinlto_native files into one .a_thinlto_native file. +$(obj)/built-in.a_thinlto_native: $(patsubst %.o,%.o_thinlto_native,$(real-obj-y)) FORCE + $(call if_changed,ar_builtin) +endif + # This is a list of build artifacts from the current Makefile and its # sub-directories. The timestamp should be updated when any of the member files. @@ -394,6 +427,14 @@ $(obj)/modules.order: $(obj-m) FORCE $(obj)/lib.a: $(lib-y) FORCE $(call if_changed,ar) +ifdef CONFIG_LTO_CLANG_THIN_DIST +quiet_cmd_ar_native = AR $@ + cmd_ar_native = rm -f $@; $(AR) cDPrsT $@ $(patsubst %.o,%.o_thinlto_native,$(real-prereqs)) + +$(obj)/lib.a_thinlto_native: $(patsubst %.o,%.o_thinlto_native,$(lib-y)) FORCE + $(call if_changed,ar_native) +endif + quiet_cmd_ld_multi_m = LD [M] $@ cmd_ld_multi_m = $(LD) $(ld_flags) -r -o $@ @$< $(cmd_objtool) @@ -459,7 +500,8 @@ $(single-subdir-goals): $(single-subdirs) PHONY += $(subdir-ym) $(subdir-ym): $(Q)$(MAKE) $(build)=$@ \ - need-builtin=$(if $(filter $@/built-in.a, $(subdir-builtin)),1) \ + need-builtin=$(if $(filter $@/built-in$(builtin_suffix), $(subdir-builtin)),1) \ + thinlto_final_pass=$(if $(filter .a_thinlto_native, $(builtin_suffix)),1) \ need-modorder=$(if $(filter $@/modules.order, $(subdir-modorder)),1) \ $(filter $@/%, $(single-subdir-goals)) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 2fe73cda0bddb9..9cfd23590334df 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -34,8 +34,13 @@ else obj-m := $(filter-out %/, $(obj-m)) endif +builtin_suffix := $(if $(filter %.a_thinlto_native, $(MAKECMDGOALS)),.a_thinlto_native,.a) +ifeq ($(thinlto_final_pass),1) + builtin_suffix :=.a_thinlto_native +endif + ifdef need-builtin -obj-y := $(patsubst %/, %/built-in.a, $(obj-y)) +obj-y := $(patsubst %/, %/built-in$(builtin_suffix), $(obj-y)) else obj-y := $(filter-out %/, $(obj-y)) endif diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o index b024ffb3e20187..f9abc45a68b363 100644 --- a/scripts/Makefile.vmlinux_o +++ b/scripts/Makefile.vmlinux_o @@ -9,6 +9,14 @@ include $(srctree)/scripts/Kbuild.include # for objtool include $(srctree)/scripts/Makefile.lib +ifeq ($(thinlto_final_pass),1) +vmlinux_a := vmlinux.a_thinlto_native +vmlinux_libs := $(patsubst %.a,%.a_thinlto_native,$(KBUILD_VMLINUX_LIBS)) +else +vmlinux_a := vmlinux.a +vmlinux_libs := $(KBUILD_VMLINUX_LIBS) +endif + # Generate a linker script to ensure correct ordering of initcalls for Clang LTO # --------------------------------------------------------------------------- @@ -18,7 +26,7 @@ quiet_cmd_gen_initcalls_lds = GEN $@ $(PERL) $(real-prereqs) > $@ .tmp_initcalls.lds: $(srctree)/scripts/generate_initcall_order.pl \ - vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE + $(vmlinux_a) $(vmlinux_libs) FORCE $(call if_changed,gen_initcalls_lds) targets := .tmp_initcalls.lds @@ -59,8 +67,8 @@ quiet_cmd_ld_vmlinux.o = LD $@ $(LD) ${KBUILD_LDFLAGS} -r -o $@ \ $(vmlinux-o-ld-args-y) \ $(addprefix -T , $(initcalls-lds)) \ - --whole-archive vmlinux.a --no-whole-archive \ - --start-group $(KBUILD_VMLINUX_LIBS) --end-group \ + --whole-archive $(vmlinux_a) --no-whole-archive \ + --start-group $(vmlinux_libs) --end-group \ $(cmd_objtool) define rule_ld_vmlinux.o @@ -68,7 +76,7 @@ define rule_ld_vmlinux.o $(call cmd,gen_objtooldep) endef -vmlinux.o: $(initcalls-lds) vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE +vmlinux.o: $(initcalls-lds) $(vmlinux_a) $(vmlinux_libs) FORCE $(call if_changed_rule,ld_vmlinux.o) targets += vmlinux.o diff --git a/scripts/Makefile.vmlinux_thinlink b/scripts/Makefile.vmlinux_thinlink new file mode 100644 index 00000000000000..13e4026c7d45bd --- /dev/null +++ b/scripts/Makefile.vmlinux_thinlink @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: GPL-2.0-only + +PHONY := __default +__default: vmlinux.thinlink + +include include/config/auto.conf +include $(srctree)/scripts/Kbuild.include + + +# Generate a linker script to ensure correct ordering of initcalls for Clang LTO +# --------------------------------------------------------------------------- + +quiet_cmd_gen_initcalls_lds = GEN $@ + cmd_gen_initcalls_lds = \ + $(PYTHON3) $(srctree)/scripts/jobserver-exec \ + $(PERL) $(real-prereqs) > $@ + +.tmp_initcalls_thinlink.lds: $(srctree)/scripts/generate_initcall_order.pl \ + vmlinux.a FORCE + $(call if_changed,gen_initcalls_lds) + +targets := .tmp_initcalls_thinlink.lds + +initcalls-lds := .tmp_initcalls_thinlink.lds + +quiet_cmd_ld_vmlinux.thinlink = LD $@ + cmd_ld_vmlinux.thinlink = \ + $(AR) t vmlinux.a > .vmlinux_thinlto_bc_files; \ + $(LD) ${KBUILD_LDFLAGS} -r $(addprefix -T , $(initcalls-lds)) \ + --thinlto-index-only @.vmlinux_thinlto_bc_files; \ + touch vmlinux.thinlink + +vmlinux.thinlink: vmlinux.a $(initcalls-lds) FORCE + $(call if_changed,ld_vmlinux.thinlink) + +targets += vmlinux.thinlink + +# Add FORCE to the prerequisites of a target to force it to be always rebuilt. +# --------------------------------------------------------------------------- + +PHONY += FORCE +FORCE: + +# Read all saved command lines and dependencies for the $(targets) we +# may be building above, using $(if_changed{,_dep}). As an +# optimization, we don't need to read them if the target does not +# exist, we will rebuild anyway in that case. + +existing-targets := $(wildcard $(sort $(targets))) + +-include $(foreach f,$(existing-targets),$(dir $(f)).$(notdir $(f)).cmd) + +.PHONY: $(PHONY) diff --git a/scripts/head-object-list.txt b/scripts/head-object-list.txt index 7274dfc65af606..90710b87a38779 100644 --- a/scripts/head-object-list.txt +++ b/scripts/head-object-list.txt @@ -18,6 +18,7 @@ arch/arm/kernel/head.o arch/csky/kernel/head.o arch/hexagon/kernel/head.o arch/loongarch/kernel/head.o +arch/loongarch/kernel/head.o_thinlto_native arch/m68k/68000/head.o arch/m68k/coldfire/head.o arch/m68k/kernel/head.o From 8846f1a3311b972706fbe096a8d985b3827caa89 Mon Sep 17 00:00:00 2001 From: Eric Naim Date: Fri, 23 May 2025 20:05:19 +0800 Subject: [PATCH 2887/2924] CACHY: le9uo: Fix sysctl location Signed-off-by: Eric Naim --- kernel/sysctl.c | 34 ---------------------------------- mm/util.c | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index abbb553218cd3d..fe7bec454345fd 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1849,40 +1849,6 @@ static const struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif - { - .procname = "workingset_protection", - .data = &sysctl_workingset_protection, - .maxlen = sizeof(bool), - .mode = 0644, - .proc_handler = &proc_dobool, - }, - { - .procname = "anon_min_ratio", - .data = &sysctl_anon_min_ratio, - .maxlen = sizeof(u8), - .mode = 0644, - .proc_handler = &vm_workingset_protection_update_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE_HUNDRED, - }, - { - .procname = "clean_low_ratio", - .data = &sysctl_clean_low_ratio, - .maxlen = sizeof(u8), - .mode = 0644, - .proc_handler = &vm_workingset_protection_update_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE_HUNDRED, - }, - { - .procname = "clean_min_ratio", - .data = &sysctl_clean_min_ratio, - .maxlen = sizeof(u8), - .mode = 0644, - .proc_handler = &vm_workingset_protection_update_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE_HUNDRED, - }, { .procname = "panic_on_warn", .data = &panic_on_warn, diff --git a/mm/util.c b/mm/util.c index 448117da071f69..b9334775c51d43 100644 --- a/mm/util.c +++ b/mm/util.c @@ -857,6 +857,40 @@ static const struct ctl_table util_sysctl_table[] = { .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, + { + .procname = "workingset_protection", + .data = &sysctl_workingset_protection, + .maxlen = sizeof(bool), + .mode = 0644, + .proc_handler = &proc_dobool, + }, + { + .procname = "anon_min_ratio", + .data = &sysctl_anon_min_ratio, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = &vm_workingset_protection_update_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_HUNDRED, + }, + { + .procname = "clean_low_ratio", + .data = &sysctl_clean_low_ratio, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = &vm_workingset_protection_update_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_HUNDRED, + }, + { + .procname = "clean_min_ratio", + .data = &sysctl_clean_min_ratio, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = &vm_workingset_protection_update_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_HUNDRED, + }, }; static int __init init_vm_util_sysctls(void) From daf836d8e4e00ab7c4572c495cd7a5138665f9c9 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Wed, 4 Jun 2025 16:40:02 +0200 Subject: [PATCH 2888/2924] Revert "CACHY: le9uo: Fix sysctl location" This reverts commit a65320bf6bb34575b159d9d834d169a2c5b574c2. --- kernel/sysctl.c | 34 ++++++++++++++++++++++++++++++++++ mm/util.c | 34 ---------------------------------- 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index fe7bec454345fd..abbb553218cd3d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1849,6 +1849,40 @@ static const struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif + { + .procname = "workingset_protection", + .data = &sysctl_workingset_protection, + .maxlen = sizeof(bool), + .mode = 0644, + .proc_handler = &proc_dobool, + }, + { + .procname = "anon_min_ratio", + .data = &sysctl_anon_min_ratio, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = &vm_workingset_protection_update_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_HUNDRED, + }, + { + .procname = "clean_low_ratio", + .data = &sysctl_clean_low_ratio, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = &vm_workingset_protection_update_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_HUNDRED, + }, + { + .procname = "clean_min_ratio", + .data = &sysctl_clean_min_ratio, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = &vm_workingset_protection_update_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_HUNDRED, + }, { .procname = "panic_on_warn", .data = &panic_on_warn, diff --git a/mm/util.c b/mm/util.c index b9334775c51d43..448117da071f69 100644 --- a/mm/util.c +++ b/mm/util.c @@ -857,40 +857,6 @@ static const struct ctl_table util_sysctl_table[] = { .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, - { - .procname = "workingset_protection", - .data = &sysctl_workingset_protection, - .maxlen = sizeof(bool), - .mode = 0644, - .proc_handler = &proc_dobool, - }, - { - .procname = "anon_min_ratio", - .data = &sysctl_anon_min_ratio, - .maxlen = sizeof(u8), - .mode = 0644, - .proc_handler = &vm_workingset_protection_update_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE_HUNDRED, - }, - { - .procname = "clean_low_ratio", - .data = &sysctl_clean_low_ratio, - .maxlen = sizeof(u8), - .mode = 0644, - .proc_handler = &vm_workingset_protection_update_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE_HUNDRED, - }, - { - .procname = "clean_min_ratio", - .data = &sysctl_clean_min_ratio, - .maxlen = sizeof(u8), - .mode = 0644, - .proc_handler = &vm_workingset_protection_update_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE_HUNDRED, - }, }; static int __init init_vm_util_sysctls(void) From 9adb5a408d3427d2e56aa9fcdb07fa3443ec9331 Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Wed, 4 Jun 2025 16:40:10 +0200 Subject: [PATCH 2889/2924] Revert "le9uo 1.8" This reverts commit bd2b42843c996e845cf6d892165d4132b2f308fa. --- Documentation/admin-guide/sysctl/vm.rst | 72 ------------ include/linux/mm.h | 8 -- kernel/sysctl.c | 34 ------ mm/Kconfig | 63 ----------- mm/mm_init.c | 1 - mm/vmscan.c | 144 +----------------------- 6 files changed, 2 insertions(+), 320 deletions(-) diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index f2e601171a3e24..8290177b4f7589 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -25,9 +25,6 @@ files can be found in mm/swap.c. Currently, these files are in /proc/sys/vm: - admin_reserve_kbytes -- anon_min_ratio -- clean_low_ratio -- clean_min_ratio - compact_memory - compaction_proactiveness - compact_unevictable_allowed @@ -112,67 +109,6 @@ On x86_64 this is about 128MB. Changing this takes effect whenever an application requests memory. -anon_min_ratio -============== - -This knob provides *hard* protection of anonymous pages. The anonymous pages -on the current node won't be reclaimed under any conditions when their amount -is below vm.anon_min_ratio. - -This knob may be used to prevent excessive swap thrashing when anonymous -memory is low (for example, when memory is going to be overfilled by -compressed data of zram module). - -Setting this value too high (close to 100) can result in inability to -swap and can lead to early OOM under memory pressure. - -The unit of measurement is the percentage of the total memory of the node. - -The default value is 15. - - -clean_low_ratio -================ - -This knob provides *best-effort* protection of clean file pages. The file pages -on the current node won't be reclaimed under memory pressure when the amount of -clean file pages is below vm.clean_low_ratio *unless* we threaten to OOM. - -Protection of clean file pages using this knob may be used when swapping is -still possible to - - prevent disk I/O thrashing under memory pressure; - - improve performance in disk cache-bound tasks under memory pressure. - -Setting it to a high value may result in a early eviction of anonymous pages -into the swap space by attempting to hold the protected amount of clean file -pages in memory. - -The unit of measurement is the percentage of the total memory of the node. - -The default value is 0. - - -clean_min_ratio -================ - -This knob provides *hard* protection of clean file pages. The file pages on the -current node won't be reclaimed under memory pressure when the amount of clean -file pages is below vm.clean_min_ratio. - -Hard protection of clean file pages using this knob may be used to - - prevent disk I/O thrashing under memory pressure even with no free swap space; - - improve performance in disk cache-bound tasks under memory pressure; - - avoid high latency and prevent livelock in near-OOM conditions. - -Setting it to a high value may result in a early out-of-memory condition due to -the inability to reclaim the protected amount of clean file pages when other -types of pages cannot be reclaimed. - -The unit of measurement is the percentage of the total memory of the node. - -The default value is 15. - - compact_memory ============== @@ -1037,14 +973,6 @@ be 133 (x + 2x = 200, 2x = 133.33). At 0, the kernel will not initiate swap until the amount of free and file-backed pages is less than the high watermark in a zone. -This knob has no effect if the amount of clean file pages on the current -node is below vm.clean_low_ratio or vm.clean_min_ratio. In this case, -only anonymous pages can be reclaimed. - -If the number of anonymous pages on the current node is below -vm.anon_min_ratio, then only file pages can be reclaimed with -any vm.swappiness value. - unprivileged_userfaultfd ======================== diff --git a/include/linux/mm.h b/include/linux/mm.h index ece96e871a3b62..e51dba8398f747 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -193,14 +193,6 @@ static inline void __mm_zero_struct_page(struct page *page) extern int sysctl_max_map_count; -extern bool sysctl_workingset_protection; -extern u8 sysctl_anon_min_ratio; -extern u8 sysctl_clean_low_ratio; -extern u8 sysctl_clean_min_ratio; -int vm_workingset_protection_update_handler( - const struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); - extern unsigned long sysctl_user_reserve_kbytes; extern unsigned long sysctl_admin_reserve_kbytes; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index abbb553218cd3d..fe7bec454345fd 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1849,40 +1849,6 @@ static const struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif - { - .procname = "workingset_protection", - .data = &sysctl_workingset_protection, - .maxlen = sizeof(bool), - .mode = 0644, - .proc_handler = &proc_dobool, - }, - { - .procname = "anon_min_ratio", - .data = &sysctl_anon_min_ratio, - .maxlen = sizeof(u8), - .mode = 0644, - .proc_handler = &vm_workingset_protection_update_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE_HUNDRED, - }, - { - .procname = "clean_low_ratio", - .data = &sysctl_clean_low_ratio, - .maxlen = sizeof(u8), - .mode = 0644, - .proc_handler = &vm_workingset_protection_update_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE_HUNDRED, - }, - { - .procname = "clean_min_ratio", - .data = &sysctl_clean_min_ratio, - .maxlen = sizeof(u8), - .mode = 0644, - .proc_handler = &vm_workingset_protection_update_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE_HUNDRED, - }, { .procname = "panic_on_warn", .data = &panic_on_warn, diff --git a/mm/Kconfig b/mm/Kconfig index 5fc1934a30957a..0f76cc26473d58 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -462,69 +462,6 @@ config ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP config ARCH_WANT_HUGETLB_VMEMMAP_PREINIT bool -config ANON_MIN_RATIO - int "Default value for vm.anon_min_ratio" - depends on SYSCTL - range 0 100 - default 15 - help - This option sets the default value for vm.anon_min_ratio sysctl knob. - - The vm.anon_min_ratio sysctl knob provides *hard* protection of - anonymous pages. The anonymous pages on the current node won't be - reclaimed under any conditions when their amount is below - vm.anon_min_ratio. This knob may be used to prevent excessive swap - thrashing when anonymous memory is low (for example, when memory is - going to be overfilled by compressed data of zram module). - - Setting this value too high (close to MemTotal) can result in - inability to swap and can lead to early OOM under memory pressure. - -config CLEAN_LOW_RATIO - int "Default value for vm.clean_low_ratio" - depends on SYSCTL - range 0 100 - default 0 - help - This option sets the default value for vm.clean_low_ratio sysctl knob. - - The vm.clean_low_ratio sysctl knob provides *best-effort* - protection of clean file pages. The file pages on the current node - won't be reclaimed under memory pressure when the amount of clean file - pages is below vm.clean_low_ratio *unless* we threaten to OOM. - Protection of clean file pages using this knob may be used when - swapping is still possible to - - prevent disk I/O thrashing under memory pressure; - - improve performance in disk cache-bound tasks under memory - pressure. - - Setting it to a high value may result in a early eviction of anonymous - pages into the swap space by attempting to hold the protected amount - of clean file pages in memory. - -config CLEAN_MIN_RATIO - int "Default value for vm.clean_min_ratio" - depends on SYSCTL - range 0 100 - default 15 - help - This option sets the default value for vm.clean_min_ratio sysctl knob. - - The vm.clean_min_ratio sysctl knob provides *hard* protection of - clean file pages. The file pages on the current node won't be - reclaimed under memory pressure when the amount of clean file pages is - below vm.clean_min_ratio. Hard protection of clean file pages using - this knob may be used to - - prevent disk I/O thrashing under memory pressure even with no free - swap space; - - improve performance in disk cache-bound tasks under memory - pressure; - - avoid high latency and prevent livelock in near-OOM conditions. - - Setting it to a high value may result in a early out-of-memory condition - due to the inability to reclaim the protected amount of clean file pages - when other types of pages cannot be reclaimed. - config HAVE_MEMBLOCK_PHYS_MAP bool diff --git a/mm/mm_init.c b/mm/mm_init.c index 78f87faf839478..eedce9321e13dc 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -2732,7 +2732,6 @@ static void __init mem_init_print_info(void) , K(totalhigh_pages()) #endif ); - printk(KERN_INFO "le9 Unofficial (le9uo) working set protection 1.8 by Masahito Suzuki (forked from hakavlad's original le9 patch)"); } void __init __weak arch_mm_preinit(void) diff --git a/mm/vmscan.c b/mm/vmscan.c index 4f991a6ae689e9..8e6fa42ad584fd 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -148,15 +148,6 @@ struct scan_control { /* The file folios on the current node are dangerously low */ unsigned int file_is_tiny:1; - /* The anonymous pages on the current node are below vm.anon_min_ratio */ - unsigned int anon_below_min:1; - - /* The clean file pages on the current node are below vm.clean_low_ratio */ - unsigned int clean_below_low:1; - - /* The clean file pages on the current node are below vm.clean_min_ratio */ - unsigned int clean_below_min:1; - /* Always discard instead of demoting to lower tier memory */ unsigned int no_demotion:1; @@ -206,15 +197,6 @@ struct scan_control { #define prefetchw_prev_lru_folio(_folio, _base, _field) do { } while (0) #endif -bool sysctl_workingset_protection __read_mostly = true; -u8 sysctl_anon_min_ratio __read_mostly = CONFIG_ANON_MIN_RATIO; -u8 sysctl_clean_low_ratio __read_mostly = CONFIG_CLEAN_LOW_RATIO; -u8 sysctl_clean_min_ratio __read_mostly = CONFIG_CLEAN_MIN_RATIO; -static u64 sysctl_anon_min_ratio_kb __read_mostly = 0; -static u64 sysctl_clean_low_ratio_kb __read_mostly = 0; -static u64 sysctl_clean_min_ratio_kb __read_mostly = 0; -static u64 workingset_protection_prev_totalram __read_mostly = 0; - /* * From 0 .. MAX_SWAPPINESS. Higher means more swappy. */ @@ -1169,10 +1151,6 @@ static unsigned int shrink_folio_list(struct list_head *folio_list, if (!sc->may_unmap && folio_mapped(folio)) goto keep_locked; - if (folio_is_file_lru(folio) ? sc->clean_below_min : - (sc->anon_below_min && !sc->clean_below_min)) - goto keep_locked; - /* * The number of dirty pages determines if a node is marked * reclaim_congested. kswapd will stall and start writing @@ -2547,15 +2525,6 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, goto out; } - /* - * Force-scan anon if clean file pages is under vm.clean_low_ratio - * or vm.clean_min_ratio. - */ - if (sc->clean_below_low || sc->clean_below_min) { - scan_balance = SCAN_ANON; - goto out; - } - /* * If there is enough inactive page cache, we do not reclaim * anything from the anonymous working right now. @@ -2673,14 +2642,6 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, BUG(); } - /* - * Hard protection of the working set. - * Don't reclaim anon/file pages when the amount is - * below the watermark of the same type. - */ - if (file ? sc->clean_below_min : sc->anon_below_min) - scan = 0; - nr[lru] = scan; } } @@ -2700,96 +2661,6 @@ static bool can_age_anon_pages(struct pglist_data *pgdat, return can_demote(pgdat->node_id, sc); } -int vm_workingset_protection_update_handler(const struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - int ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); - if (ret || !write) - return ret; - - workingset_protection_prev_totalram = 0; - - return 0; -} - -static void prepare_workingset_protection(pg_data_t *pgdat, struct scan_control *sc) -{ - unsigned long node_mem_total; - struct sysinfo i; - - if (!(sysctl_workingset_protection)) { - sc->anon_below_min = 0; - sc->clean_below_low = 0; - sc->clean_below_min = 0; - return; - } - - if (likely(sysctl_anon_min_ratio || - sysctl_clean_low_ratio || - sysctl_clean_min_ratio)) { -#ifdef CONFIG_NUMA - si_meminfo_node(&i, pgdat->node_id); -#else //CONFIG_NUMA - si_meminfo(&i); -#endif //CONFIG_NUMA - node_mem_total = i.totalram; - - if (unlikely(workingset_protection_prev_totalram != node_mem_total)) { - sysctl_anon_min_ratio_kb = - node_mem_total * sysctl_anon_min_ratio / 100; - sysctl_clean_low_ratio_kb = - node_mem_total * sysctl_clean_low_ratio / 100; - sysctl_clean_min_ratio_kb = - node_mem_total * sysctl_clean_min_ratio / 100; - workingset_protection_prev_totalram = node_mem_total; - } - } - - /* - * Check the number of anonymous pages to protect them from - * reclaiming if their amount is below the specified. - */ - if (sysctl_anon_min_ratio) { - unsigned long reclaimable_anon; - - reclaimable_anon = - node_page_state(pgdat, NR_ACTIVE_ANON) + - node_page_state(pgdat, NR_INACTIVE_ANON) + - node_page_state(pgdat, NR_ISOLATED_ANON); - - sc->anon_below_min = reclaimable_anon < sysctl_anon_min_ratio_kb; - } else - sc->anon_below_min = 0; - - /* - * Check the number of clean file pages to protect them from - * reclaiming if their amount is below the specified. - */ - if (sysctl_clean_low_ratio || sysctl_clean_min_ratio) { - unsigned long reclaimable_file, dirty, clean; - - reclaimable_file = - node_page_state(pgdat, NR_ACTIVE_FILE) + - node_page_state(pgdat, NR_INACTIVE_FILE) + - node_page_state(pgdat, NR_ISOLATED_FILE); - dirty = node_page_state(pgdat, NR_FILE_DIRTY); - /* - * node_page_state() sum can go out of sync since - * all the values are not read at once. - */ - if (likely(reclaimable_file > dirty)) - clean = reclaimable_file - dirty; - else - clean = 0; - - sc->clean_below_low = clean < sysctl_clean_low_ratio_kb; - sc->clean_below_min = clean < sysctl_clean_min_ratio_kb; - } else { - sc->clean_below_low = 0; - sc->clean_below_min = 0; - } -} - #ifdef CONFIG_LRU_GEN #ifdef CONFIG_LRU_GEN_ENABLED @@ -4756,20 +4627,13 @@ static int get_tier_idx(struct lruvec *lruvec, int type) return tier - 1; } -static int get_type_to_scan(struct lruvec *lruvec, struct scan_control *sc, int swappiness) +static int get_type_to_scan(struct lruvec *lruvec, int swappiness) { struct ctrl_pos sp, pv; if (swappiness <= MIN_SWAPPINESS + 1) return LRU_GEN_FILE; - if (sc->clean_below_min) - return LRU_GEN_ANON; - if (sc->anon_below_min) - return LRU_GEN_FILE; - if (sc->clean_below_low) - return LRU_GEN_ANON; - if (swappiness >= MAX_SWAPPINESS) return LRU_GEN_ANON; /* @@ -4786,7 +4650,7 @@ static int isolate_folios(struct lruvec *lruvec, struct scan_control *sc, int sw int *type_scanned, struct list_head *list) { int i; - int type = get_type_to_scan(lruvec, sc, swappiness); + int type = get_type_to_scan(lruvec, swappiness); for_each_evictable_type(i, swappiness) { int scanned; @@ -5029,8 +4893,6 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct pglist_data *pgdat = lruvec_pgdat(lruvec); - prepare_workingset_protection(pgdat, sc); - /* lru_gen_age_node() called mem_cgroup_calculate_protection() */ if (mem_cgroup_below_min(NULL, memcg)) return MEMCG_LRU_YOUNG; @@ -6169,8 +6031,6 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) prepare_scan_control(pgdat, sc); - prepare_workingset_protection(pgdat, sc); - shrink_node_memcgs(pgdat, sc); flush_reclaim_state(sc); From 855dc4416cb924cf8618de0ff1bd111277f81afa Mon Sep 17 00:00:00 2001 From: Eric Naim Date: Mon, 9 Jun 2025 15:12:31 +0700 Subject: [PATCH 2890/2924] Reapply "le9uo 1.8" This reverts commit 419d3c5d64b46187bc3a43df270ec50241bf5136. Signed-off-by: Eric Naim --- Documentation/admin-guide/sysctl/vm.rst | 72 ++++++++++++ include/linux/mm.h | 8 ++ kernel/sysctl.c | 34 ++++++ mm/Kconfig | 63 +++++++++++ mm/mm_init.c | 1 + mm/vmscan.c | 144 +++++++++++++++++++++++- 6 files changed, 320 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index 8290177b4f7589..f2e601171a3e24 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -25,6 +25,9 @@ files can be found in mm/swap.c. Currently, these files are in /proc/sys/vm: - admin_reserve_kbytes +- anon_min_ratio +- clean_low_ratio +- clean_min_ratio - compact_memory - compaction_proactiveness - compact_unevictable_allowed @@ -109,6 +112,67 @@ On x86_64 this is about 128MB. Changing this takes effect whenever an application requests memory. +anon_min_ratio +============== + +This knob provides *hard* protection of anonymous pages. The anonymous pages +on the current node won't be reclaimed under any conditions when their amount +is below vm.anon_min_ratio. + +This knob may be used to prevent excessive swap thrashing when anonymous +memory is low (for example, when memory is going to be overfilled by +compressed data of zram module). + +Setting this value too high (close to 100) can result in inability to +swap and can lead to early OOM under memory pressure. + +The unit of measurement is the percentage of the total memory of the node. + +The default value is 15. + + +clean_low_ratio +================ + +This knob provides *best-effort* protection of clean file pages. The file pages +on the current node won't be reclaimed under memory pressure when the amount of +clean file pages is below vm.clean_low_ratio *unless* we threaten to OOM. + +Protection of clean file pages using this knob may be used when swapping is +still possible to + - prevent disk I/O thrashing under memory pressure; + - improve performance in disk cache-bound tasks under memory pressure. + +Setting it to a high value may result in a early eviction of anonymous pages +into the swap space by attempting to hold the protected amount of clean file +pages in memory. + +The unit of measurement is the percentage of the total memory of the node. + +The default value is 0. + + +clean_min_ratio +================ + +This knob provides *hard* protection of clean file pages. The file pages on the +current node won't be reclaimed under memory pressure when the amount of clean +file pages is below vm.clean_min_ratio. + +Hard protection of clean file pages using this knob may be used to + - prevent disk I/O thrashing under memory pressure even with no free swap space; + - improve performance in disk cache-bound tasks under memory pressure; + - avoid high latency and prevent livelock in near-OOM conditions. + +Setting it to a high value may result in a early out-of-memory condition due to +the inability to reclaim the protected amount of clean file pages when other +types of pages cannot be reclaimed. + +The unit of measurement is the percentage of the total memory of the node. + +The default value is 15. + + compact_memory ============== @@ -973,6 +1037,14 @@ be 133 (x + 2x = 200, 2x = 133.33). At 0, the kernel will not initiate swap until the amount of free and file-backed pages is less than the high watermark in a zone. +This knob has no effect if the amount of clean file pages on the current +node is below vm.clean_low_ratio or vm.clean_min_ratio. In this case, +only anonymous pages can be reclaimed. + +If the number of anonymous pages on the current node is below +vm.anon_min_ratio, then only file pages can be reclaimed with +any vm.swappiness value. + unprivileged_userfaultfd ======================== diff --git a/include/linux/mm.h b/include/linux/mm.h index e51dba8398f747..ece96e871a3b62 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -193,6 +193,14 @@ static inline void __mm_zero_struct_page(struct page *page) extern int sysctl_max_map_count; +extern bool sysctl_workingset_protection; +extern u8 sysctl_anon_min_ratio; +extern u8 sysctl_clean_low_ratio; +extern u8 sysctl_clean_min_ratio; +int vm_workingset_protection_update_handler( + const struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); + extern unsigned long sysctl_user_reserve_kbytes; extern unsigned long sysctl_admin_reserve_kbytes; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index fe7bec454345fd..abbb553218cd3d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1849,6 +1849,40 @@ static const struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif + { + .procname = "workingset_protection", + .data = &sysctl_workingset_protection, + .maxlen = sizeof(bool), + .mode = 0644, + .proc_handler = &proc_dobool, + }, + { + .procname = "anon_min_ratio", + .data = &sysctl_anon_min_ratio, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = &vm_workingset_protection_update_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_HUNDRED, + }, + { + .procname = "clean_low_ratio", + .data = &sysctl_clean_low_ratio, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = &vm_workingset_protection_update_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_HUNDRED, + }, + { + .procname = "clean_min_ratio", + .data = &sysctl_clean_min_ratio, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = &vm_workingset_protection_update_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_HUNDRED, + }, { .procname = "panic_on_warn", .data = &panic_on_warn, diff --git a/mm/Kconfig b/mm/Kconfig index 0f76cc26473d58..5fc1934a30957a 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -462,6 +462,69 @@ config ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP config ARCH_WANT_HUGETLB_VMEMMAP_PREINIT bool +config ANON_MIN_RATIO + int "Default value for vm.anon_min_ratio" + depends on SYSCTL + range 0 100 + default 15 + help + This option sets the default value for vm.anon_min_ratio sysctl knob. + + The vm.anon_min_ratio sysctl knob provides *hard* protection of + anonymous pages. The anonymous pages on the current node won't be + reclaimed under any conditions when their amount is below + vm.anon_min_ratio. This knob may be used to prevent excessive swap + thrashing when anonymous memory is low (for example, when memory is + going to be overfilled by compressed data of zram module). + + Setting this value too high (close to MemTotal) can result in + inability to swap and can lead to early OOM under memory pressure. + +config CLEAN_LOW_RATIO + int "Default value for vm.clean_low_ratio" + depends on SYSCTL + range 0 100 + default 0 + help + This option sets the default value for vm.clean_low_ratio sysctl knob. + + The vm.clean_low_ratio sysctl knob provides *best-effort* + protection of clean file pages. The file pages on the current node + won't be reclaimed under memory pressure when the amount of clean file + pages is below vm.clean_low_ratio *unless* we threaten to OOM. + Protection of clean file pages using this knob may be used when + swapping is still possible to + - prevent disk I/O thrashing under memory pressure; + - improve performance in disk cache-bound tasks under memory + pressure. + + Setting it to a high value may result in a early eviction of anonymous + pages into the swap space by attempting to hold the protected amount + of clean file pages in memory. + +config CLEAN_MIN_RATIO + int "Default value for vm.clean_min_ratio" + depends on SYSCTL + range 0 100 + default 15 + help + This option sets the default value for vm.clean_min_ratio sysctl knob. + + The vm.clean_min_ratio sysctl knob provides *hard* protection of + clean file pages. The file pages on the current node won't be + reclaimed under memory pressure when the amount of clean file pages is + below vm.clean_min_ratio. Hard protection of clean file pages using + this knob may be used to + - prevent disk I/O thrashing under memory pressure even with no free + swap space; + - improve performance in disk cache-bound tasks under memory + pressure; + - avoid high latency and prevent livelock in near-OOM conditions. + + Setting it to a high value may result in a early out-of-memory condition + due to the inability to reclaim the protected amount of clean file pages + when other types of pages cannot be reclaimed. + config HAVE_MEMBLOCK_PHYS_MAP bool diff --git a/mm/mm_init.c b/mm/mm_init.c index eedce9321e13dc..78f87faf839478 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -2732,6 +2732,7 @@ static void __init mem_init_print_info(void) , K(totalhigh_pages()) #endif ); + printk(KERN_INFO "le9 Unofficial (le9uo) working set protection 1.8 by Masahito Suzuki (forked from hakavlad's original le9 patch)"); } void __init __weak arch_mm_preinit(void) diff --git a/mm/vmscan.c b/mm/vmscan.c index 8e6fa42ad584fd..4f991a6ae689e9 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -148,6 +148,15 @@ struct scan_control { /* The file folios on the current node are dangerously low */ unsigned int file_is_tiny:1; + /* The anonymous pages on the current node are below vm.anon_min_ratio */ + unsigned int anon_below_min:1; + + /* The clean file pages on the current node are below vm.clean_low_ratio */ + unsigned int clean_below_low:1; + + /* The clean file pages on the current node are below vm.clean_min_ratio */ + unsigned int clean_below_min:1; + /* Always discard instead of demoting to lower tier memory */ unsigned int no_demotion:1; @@ -197,6 +206,15 @@ struct scan_control { #define prefetchw_prev_lru_folio(_folio, _base, _field) do { } while (0) #endif +bool sysctl_workingset_protection __read_mostly = true; +u8 sysctl_anon_min_ratio __read_mostly = CONFIG_ANON_MIN_RATIO; +u8 sysctl_clean_low_ratio __read_mostly = CONFIG_CLEAN_LOW_RATIO; +u8 sysctl_clean_min_ratio __read_mostly = CONFIG_CLEAN_MIN_RATIO; +static u64 sysctl_anon_min_ratio_kb __read_mostly = 0; +static u64 sysctl_clean_low_ratio_kb __read_mostly = 0; +static u64 sysctl_clean_min_ratio_kb __read_mostly = 0; +static u64 workingset_protection_prev_totalram __read_mostly = 0; + /* * From 0 .. MAX_SWAPPINESS. Higher means more swappy. */ @@ -1151,6 +1169,10 @@ static unsigned int shrink_folio_list(struct list_head *folio_list, if (!sc->may_unmap && folio_mapped(folio)) goto keep_locked; + if (folio_is_file_lru(folio) ? sc->clean_below_min : + (sc->anon_below_min && !sc->clean_below_min)) + goto keep_locked; + /* * The number of dirty pages determines if a node is marked * reclaim_congested. kswapd will stall and start writing @@ -2525,6 +2547,15 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, goto out; } + /* + * Force-scan anon if clean file pages is under vm.clean_low_ratio + * or vm.clean_min_ratio. + */ + if (sc->clean_below_low || sc->clean_below_min) { + scan_balance = SCAN_ANON; + goto out; + } + /* * If there is enough inactive page cache, we do not reclaim * anything from the anonymous working right now. @@ -2642,6 +2673,14 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, BUG(); } + /* + * Hard protection of the working set. + * Don't reclaim anon/file pages when the amount is + * below the watermark of the same type. + */ + if (file ? sc->clean_below_min : sc->anon_below_min) + scan = 0; + nr[lru] = scan; } } @@ -2661,6 +2700,96 @@ static bool can_age_anon_pages(struct pglist_data *pgdat, return can_demote(pgdat->node_id, sc); } +int vm_workingset_protection_update_handler(const struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); + if (ret || !write) + return ret; + + workingset_protection_prev_totalram = 0; + + return 0; +} + +static void prepare_workingset_protection(pg_data_t *pgdat, struct scan_control *sc) +{ + unsigned long node_mem_total; + struct sysinfo i; + + if (!(sysctl_workingset_protection)) { + sc->anon_below_min = 0; + sc->clean_below_low = 0; + sc->clean_below_min = 0; + return; + } + + if (likely(sysctl_anon_min_ratio || + sysctl_clean_low_ratio || + sysctl_clean_min_ratio)) { +#ifdef CONFIG_NUMA + si_meminfo_node(&i, pgdat->node_id); +#else //CONFIG_NUMA + si_meminfo(&i); +#endif //CONFIG_NUMA + node_mem_total = i.totalram; + + if (unlikely(workingset_protection_prev_totalram != node_mem_total)) { + sysctl_anon_min_ratio_kb = + node_mem_total * sysctl_anon_min_ratio / 100; + sysctl_clean_low_ratio_kb = + node_mem_total * sysctl_clean_low_ratio / 100; + sysctl_clean_min_ratio_kb = + node_mem_total * sysctl_clean_min_ratio / 100; + workingset_protection_prev_totalram = node_mem_total; + } + } + + /* + * Check the number of anonymous pages to protect them from + * reclaiming if their amount is below the specified. + */ + if (sysctl_anon_min_ratio) { + unsigned long reclaimable_anon; + + reclaimable_anon = + node_page_state(pgdat, NR_ACTIVE_ANON) + + node_page_state(pgdat, NR_INACTIVE_ANON) + + node_page_state(pgdat, NR_ISOLATED_ANON); + + sc->anon_below_min = reclaimable_anon < sysctl_anon_min_ratio_kb; + } else + sc->anon_below_min = 0; + + /* + * Check the number of clean file pages to protect them from + * reclaiming if their amount is below the specified. + */ + if (sysctl_clean_low_ratio || sysctl_clean_min_ratio) { + unsigned long reclaimable_file, dirty, clean; + + reclaimable_file = + node_page_state(pgdat, NR_ACTIVE_FILE) + + node_page_state(pgdat, NR_INACTIVE_FILE) + + node_page_state(pgdat, NR_ISOLATED_FILE); + dirty = node_page_state(pgdat, NR_FILE_DIRTY); + /* + * node_page_state() sum can go out of sync since + * all the values are not read at once. + */ + if (likely(reclaimable_file > dirty)) + clean = reclaimable_file - dirty; + else + clean = 0; + + sc->clean_below_low = clean < sysctl_clean_low_ratio_kb; + sc->clean_below_min = clean < sysctl_clean_min_ratio_kb; + } else { + sc->clean_below_low = 0; + sc->clean_below_min = 0; + } +} + #ifdef CONFIG_LRU_GEN #ifdef CONFIG_LRU_GEN_ENABLED @@ -4627,13 +4756,20 @@ static int get_tier_idx(struct lruvec *lruvec, int type) return tier - 1; } -static int get_type_to_scan(struct lruvec *lruvec, int swappiness) +static int get_type_to_scan(struct lruvec *lruvec, struct scan_control *sc, int swappiness) { struct ctrl_pos sp, pv; if (swappiness <= MIN_SWAPPINESS + 1) return LRU_GEN_FILE; + if (sc->clean_below_min) + return LRU_GEN_ANON; + if (sc->anon_below_min) + return LRU_GEN_FILE; + if (sc->clean_below_low) + return LRU_GEN_ANON; + if (swappiness >= MAX_SWAPPINESS) return LRU_GEN_ANON; /* @@ -4650,7 +4786,7 @@ static int isolate_folios(struct lruvec *lruvec, struct scan_control *sc, int sw int *type_scanned, struct list_head *list) { int i; - int type = get_type_to_scan(lruvec, swappiness); + int type = get_type_to_scan(lruvec, sc, swappiness); for_each_evictable_type(i, swappiness) { int scanned; @@ -4893,6 +5029,8 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct pglist_data *pgdat = lruvec_pgdat(lruvec); + prepare_workingset_protection(pgdat, sc); + /* lru_gen_age_node() called mem_cgroup_calculate_protection() */ if (mem_cgroup_below_min(NULL, memcg)) return MEMCG_LRU_YOUNG; @@ -6031,6 +6169,8 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) prepare_scan_control(pgdat, sc); + prepare_workingset_protection(pgdat, sc); + shrink_node_memcgs(pgdat, sc); flush_reclaim_state(sc); From 2096a20408e681a2913b673a5d8d4f626fa99c58 Mon Sep 17 00:00:00 2001 From: Eric Naim Date: Mon, 9 Jun 2025 15:12:33 +0700 Subject: [PATCH 2891/2924] Reapply "CACHY: le9uo: Fix sysctl location" This reverts commit c955575e5ab1715c067ad3e379c49c4450f158e8. Signed-off-by: Eric Naim --- kernel/sysctl.c | 34 ---------------------------------- mm/util.c | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index abbb553218cd3d..fe7bec454345fd 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1849,40 +1849,6 @@ static const struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif - { - .procname = "workingset_protection", - .data = &sysctl_workingset_protection, - .maxlen = sizeof(bool), - .mode = 0644, - .proc_handler = &proc_dobool, - }, - { - .procname = "anon_min_ratio", - .data = &sysctl_anon_min_ratio, - .maxlen = sizeof(u8), - .mode = 0644, - .proc_handler = &vm_workingset_protection_update_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE_HUNDRED, - }, - { - .procname = "clean_low_ratio", - .data = &sysctl_clean_low_ratio, - .maxlen = sizeof(u8), - .mode = 0644, - .proc_handler = &vm_workingset_protection_update_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE_HUNDRED, - }, - { - .procname = "clean_min_ratio", - .data = &sysctl_clean_min_ratio, - .maxlen = sizeof(u8), - .mode = 0644, - .proc_handler = &vm_workingset_protection_update_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE_HUNDRED, - }, { .procname = "panic_on_warn", .data = &panic_on_warn, diff --git a/mm/util.c b/mm/util.c index 448117da071f69..b9334775c51d43 100644 --- a/mm/util.c +++ b/mm/util.c @@ -857,6 +857,40 @@ static const struct ctl_table util_sysctl_table[] = { .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, + { + .procname = "workingset_protection", + .data = &sysctl_workingset_protection, + .maxlen = sizeof(bool), + .mode = 0644, + .proc_handler = &proc_dobool, + }, + { + .procname = "anon_min_ratio", + .data = &sysctl_anon_min_ratio, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = &vm_workingset_protection_update_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_HUNDRED, + }, + { + .procname = "clean_low_ratio", + .data = &sysctl_clean_low_ratio, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = &vm_workingset_protection_update_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_HUNDRED, + }, + { + .procname = "clean_min_ratio", + .data = &sysctl_clean_min_ratio, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = &vm_workingset_protection_update_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_HUNDRED, + }, }; static int __init init_vm_util_sysctls(void) From dafe03c3e9c459d98a7e7c96bcb859660e065357 Mon Sep 17 00:00:00 2001 From: Eric Naim Date: Mon, 9 Jun 2025 12:11:50 +0700 Subject: [PATCH 2892/2924] v4l2loopback: 0.15.0 Signed-off-by: Eric Naim --- drivers/media/v4l2-core/v4l2loopback.c | 23 +++++++++++++- drivers/media/v4l2-core/v4l2loopback.h | 42 ++++++++++++++++---------- 2 files changed, 48 insertions(+), 17 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2loopback.c b/drivers/media/v4l2-core/v4l2loopback.c index 5c8b70981afa49..74bd8125caa32c 100644 --- a/drivers/media/v4l2-core/v4l2loopback.c +++ b/drivers/media/v4l2-core/v4l2loopback.c @@ -33,6 +33,10 @@ #include #include "v4l2loopback.h" +#define V4L2LOOPBACK_CTL_ADD_legacy 0x4C80 +#define V4L2LOOPBACK_CTL_REMOVE_legacy 0x4C81 +#define V4L2LOOPBACK_CTL_QUERY_legacy 0x4C82 + #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) #error This module is not supported on kernels before 4.0.0. #endif @@ -49,6 +53,10 @@ #define VFL_TYPE_VIDEO VFL_TYPE_GRABBER #endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 2, 0) +#define timer_delete_sync del_timer_sync +#endif + #define V4L2LOOPBACK_VERSION_CODE \ KERNEL_VERSION(V4L2LOOPBACK_VERSION_MAJOR, V4L2LOOPBACK_VERSION_MINOR, \ V4L2LOOPBACK_VERSION_BUGFIX) @@ -2972,6 +2980,7 @@ static long v4l2loopback_control_ioctl(struct file *file, unsigned int cmd, struct v4l2_loopback_config *confptr = &conf; int device_nr, capture_nr, output_nr; int ret; + const __u32 version = V4L2LOOPBACK_VERSION_CODE; ret = mutex_lock_killable(&v4l2loopback_ctl_mutex); if (ret) @@ -2984,6 +2993,7 @@ static long v4l2loopback_control_ioctl(struct file *file, unsigned int cmd, break; /* add a v4l2loopback device (pair), based on the user-provided specs */ case V4L2LOOPBACK_CTL_ADD: + case V4L2LOOPBACK_CTL_ADD_legacy: if (parm) { if ((ret = copy_from_user(&conf, (void *)parm, sizeof(conf))) < 0) @@ -2996,7 +3006,8 @@ static long v4l2loopback_control_ioctl(struct file *file, unsigned int cmd, break; /* remove a v4l2loopback device (both capture and output) */ case V4L2LOOPBACK_CTL_REMOVE: - ret = v4l2loopback_lookup((int)parm, &dev); + case V4L2LOOPBACK_CTL_REMOVE_legacy: + ret = v4l2loopback_lookup((__u32)parm, &dev); if (ret >= 0 && dev) { ret = -EBUSY; if (dev->open_count.counter > 0) @@ -3009,6 +3020,7 @@ static long v4l2loopback_control_ioctl(struct file *file, unsigned int cmd, * this is mostly about limits (which cannot be queried directly with VIDIOC_G_FMT and friends */ case V4L2LOOPBACK_CTL_QUERY: + case V4L2LOOPBACK_CTL_QUERY_legacy: if (!parm) break; if ((ret = copy_from_user(&conf, (void *)parm, sizeof(conf))) < @@ -3061,6 +3073,15 @@ static long v4l2loopback_control_ioctl(struct file *file, unsigned int cmd, } ret = 0; break; + case V4L2LOOPBACK_CTL_VERSION: + if (!parm) + break; + if (copy_to_user((void *)parm, &version, sizeof(version))) { + ret = -EFAULT; + break; + } + ret = 0; + break; } mutex_unlock(&v4l2loopback_ctl_mutex); diff --git a/drivers/media/v4l2-core/v4l2loopback.h b/drivers/media/v4l2-core/v4l2loopback.h index ec0be6cbe97d1b..e48e0ce5949d6e 100644 --- a/drivers/media/v4l2-core/v4l2loopback.h +++ b/drivers/media/v4l2-core/v4l2loopback.h @@ -11,7 +11,7 @@ #define _V4L2LOOPBACK_H #define V4L2LOOPBACK_VERSION_MAJOR 0 -#define V4L2LOOPBACK_VERSION_MINOR 14 +#define V4L2LOOPBACK_VERSION_MINOR 15 #define V4L2LOOPBACK_VERSION_BUGFIX 0 /* /dev/v4l2loopback interface */ @@ -32,8 +32,8 @@ struct v4l2_loopback_config { * or one (and only one) of them must be -1 * */ - int output_nr; - int unused; /*capture_nr;*/ + __s32 output_nr; + __s32 unused; /*capture_nr;*/ /** * a nice name for your device @@ -45,27 +45,27 @@ struct v4l2_loopback_config { * allowed frame size * if too low, default values are used */ - unsigned int min_width; - unsigned int max_width; - unsigned int min_height; - unsigned int max_height; + __u32 min_width; + __u32 max_width; + __u32 min_height; + __u32 max_height; /** * number of buffers to allocate for the queue * if set to <=0, default values are used */ - int max_buffers; + __s32 max_buffers; /** * how many consumers are allowed to open this device concurrently * if set to <=0, default values are used */ - int max_openers; + __s32 max_openers; /** * set the debugging level for this device */ - int debug; + __s32 debug; /** * whether to announce OUTPUT/CAPTURE capabilities exclusively @@ -74,9 +74,17 @@ struct v4l2_loopback_config { * NOTE: this is going to be removed once separate output/capture * devices are implemented */ - int announce_all_caps; + __s32 announce_all_caps; }; +#define V4L2LOOPBACK_CTL_IOCTLMAGIC '~' + +/* a pointer to an (unsigned int) that - on success - will hold + * the version code of the v4l2loopback module + * as returned by KERNEL_VERSION(MAJOR, MINOR, BUGFIX) + */ +#define V4L2LOOPBACK_CTL_VERSION _IOR(V4L2LOOPBACK_CTL_IOCTLMAGIC, 0, __u32) + /* a pointer to a (struct v4l2_loopback_config) that has all values you wish to impose on the * to-be-created device set. * if the ptr is NULL, a new device is created with default values at the driver's discretion. @@ -84,15 +92,17 @@ struct v4l2_loopback_config { * returns the device_nr of the OUTPUT device (which can be used with V4L2LOOPBACK_CTL_QUERY, * to get more information on the device) */ -#define V4L2LOOPBACK_CTL_ADD 0x4C80 +#define V4L2LOOPBACK_CTL_ADD \ + _IOW(V4L2LOOPBACK_CTL_IOCTLMAGIC, 1, struct v4l2_loopback_config) + +/* the device-number (either CAPTURE or OUTPUT) associated with the loopback-device */ +#define V4L2LOOPBACK_CTL_REMOVE _IOW(V4L2LOOPBACK_CTL_IOCTLMAGIC, 2, __u32) /* a pointer to a (struct v4l2_loopback_config) that has output_nr and/or capture_nr set * (the two values must either refer to video-devices associated with the same loopback device * or exactly one of them must be <0 */ -#define V4L2LOOPBACK_CTL_QUERY 0x4C82 - -/* the device-number (either CAPTURE or OUTPUT) associated with the loopback-device */ -#define V4L2LOOPBACK_CTL_REMOVE 0x4C81 +#define V4L2LOOPBACK_CTL_QUERY \ + _IOWR(V4L2LOOPBACK_CTL_IOCTLMAGIC, 3, struct v4l2_loopback_config) #endif /* _V4L2LOOPBACK_H */ From 908e270dc54352f31ef823c29892e2166a3897b6 Mon Sep 17 00:00:00 2001 From: Eric Naim Date: Tue, 10 Jun 2025 18:45:08 +0700 Subject: [PATCH 2893/2924] linux6.15.y-le9uo-1.15a Signed-off-by: Eric Naim --- Documentation/admin-guide/sysctl/vm.rst | 6 +++--- mm/Kconfig | 6 +++--- mm/mm_init.c | 2 +- mm/vmscan.c | 9 ++++++++- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index f2e601171a3e24..2a94faa5a4cae3 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -128,7 +128,7 @@ swap and can lead to early OOM under memory pressure. The unit of measurement is the percentage of the total memory of the node. -The default value is 15. +The default value is 1. clean_low_ratio @@ -149,7 +149,7 @@ pages in memory. The unit of measurement is the percentage of the total memory of the node. -The default value is 0. +The default value is 15. clean_min_ratio @@ -170,7 +170,7 @@ types of pages cannot be reclaimed. The unit of measurement is the percentage of the total memory of the node. -The default value is 15. +The default value is 4. compact_memory diff --git a/mm/Kconfig b/mm/Kconfig index 5fc1934a30957a..825b5932debcd1 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -466,7 +466,7 @@ config ANON_MIN_RATIO int "Default value for vm.anon_min_ratio" depends on SYSCTL range 0 100 - default 15 + default 1 help This option sets the default value for vm.anon_min_ratio sysctl knob. @@ -484,7 +484,7 @@ config CLEAN_LOW_RATIO int "Default value for vm.clean_low_ratio" depends on SYSCTL range 0 100 - default 0 + default 15 help This option sets the default value for vm.clean_low_ratio sysctl knob. @@ -506,7 +506,7 @@ config CLEAN_MIN_RATIO int "Default value for vm.clean_min_ratio" depends on SYSCTL range 0 100 - default 15 + default 4 help This option sets the default value for vm.clean_min_ratio sysctl knob. diff --git a/mm/mm_init.c b/mm/mm_init.c index 78f87faf839478..8201039f1c1ccf 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -2732,7 +2732,7 @@ static void __init mem_init_print_info(void) , K(totalhigh_pages()) #endif ); - printk(KERN_INFO "le9 Unofficial (le9uo) working set protection 1.8 by Masahito Suzuki (forked from hakavlad's original le9 patch)"); + printk(KERN_INFO "le9 Unofficial (le9uo) working set protection 1.15a by Masahito Suzuki (forked from hakavlad's original le9 patch)"); } void __init __weak arch_mm_preinit(void) diff --git a/mm/vmscan.c b/mm/vmscan.c index 4f991a6ae689e9..75f48ea2b33694 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -4760,7 +4760,7 @@ static int get_type_to_scan(struct lruvec *lruvec, struct scan_control *sc, int { struct ctrl_pos sp, pv; - if (swappiness <= MIN_SWAPPINESS + 1) + if (swappiness == MIN_SWAPPINESS) return LRU_GEN_FILE; if (sc->clean_below_min) @@ -4770,6 +4770,9 @@ static int get_type_to_scan(struct lruvec *lruvec, struct scan_control *sc, int if (sc->clean_below_low) return LRU_GEN_ANON; + if (swappiness == MIN_SWAPPINESS + 1) + return LRU_GEN_FILE; + if (swappiness >= MAX_SWAPPINESS) return LRU_GEN_ANON; /* @@ -5031,6 +5034,10 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) prepare_workingset_protection(pgdat, sc); + if (sysctl_workingset_protection && sc->clean_below_min && + !can_reclaim_anon_pages(memcg, pgdat->node_id, sc)) + return 0; + /* lru_gen_age_node() called mem_cgroup_calculate_protection() */ if (mem_cgroup_below_min(NULL, memcg)) return MEMCG_LRU_YOUNG; From f2e09c0daf044912f3ab743332043fe38c4d5cda Mon Sep 17 00:00:00 2001 From: Piotr Gorski Date: Wed, 11 Jun 2025 18:03:22 +0200 Subject: [PATCH 2894/2924] iosched-6.15: bump ADIOS to v1.5.9 Signed-off-by: Piotr Gorski --- block/adios.c | 194 ++++++++++++++++++++++---------------------------- 1 file changed, 87 insertions(+), 107 deletions(-) diff --git a/block/adios.c b/block/adios.c index f008d28cd059d7..542b87b6a5be48 100644 --- a/block/adios.c +++ b/block/adios.c @@ -21,7 +21,7 @@ #include "blk-mq.h" #include "blk-mq-sched.h" -#define ADIOS_VERSION "1.5.8" +#define ADIOS_VERSION "1.5.9" // Define operation types supported by ADIOS enum adios_op_type { @@ -317,7 +317,6 @@ static bool lm_update_large_buckets( // Update the latency model parameters and statistics static void latency_model_update(struct latency_model *model) { - unsigned long flags; u64 now; u32 small_count, large_count; bool time_elapsed; @@ -325,29 +324,27 @@ static void latency_model_update(struct latency_model *model) { guard(spinlock_irqsave)(&model->lock); - spin_lock_irqsave(&model->buckets_lock, flags); - - // Whether enough time has elapsed since the last update - now = jiffies; - time_elapsed = unlikely(!model->base) || model->last_update_jiffies + - msecs_to_jiffies(LM_INTERVAL_THRESHOLD) <= now; - - // Count the number of entries in buckets - small_count = lm_count_small_entries(model); - large_count = lm_count_large_entries(model); - - // Update small buckets - if (small_count && (time_elapsed || - LM_SAMPLES_THRESHOLD <= small_count || !model->base)) - small_processed = lm_update_small_buckets( - model, small_count, !model->base); - // Update large buckets - if (large_count && (time_elapsed || - LM_SAMPLES_THRESHOLD <= large_count || !model->slope)) - large_processed = lm_update_large_buckets( - model, large_count, !model->slope); - - spin_unlock_irqrestore(&model->buckets_lock, flags); + scoped_guard(spinlock_irqsave, &model->buckets_lock) { + // Whether enough time has elapsed since the last update + now = jiffies; + time_elapsed = unlikely(!model->base) || model->last_update_jiffies + + msecs_to_jiffies(LM_INTERVAL_THRESHOLD) <= now; + + // Count the number of entries in buckets + small_count = lm_count_small_entries(model); + large_count = lm_count_large_entries(model); + + // Update small buckets + if (small_count && (time_elapsed || + LM_SAMPLES_THRESHOLD <= small_count || !model->base)) + small_processed = lm_update_small_buckets( + model, small_count, !model->base); + // Update large buckets + if (large_count && (time_elapsed || + LM_SAMPLES_THRESHOLD <= large_count || !model->slope)) + large_processed = lm_update_large_buckets( + model, large_count, !model->slope); + } // Update the base parameter if small bucket was processed if (small_processed && likely(model->small_count)) @@ -364,8 +361,7 @@ static void latency_model_update(struct latency_model *model) { } // Determine the bucket index for a given measured and predicted latency -static u8 lm_input_bucket_index( - struct latency_model *model, u64 measured, u64 predicted) { +static u8 lm_input_bucket_index(u64 measured, u64 predicted) { u8 bucket_index; if (measured < predicted * 2) @@ -381,44 +377,38 @@ static u8 lm_input_bucket_index( // Input latency data into the latency model static void latency_model_input(struct latency_model *model, u32 block_size, u64 latency, u64 pred_lat) { - unsigned long flags; u8 bucket_index; - spin_lock_irqsave(&model->buckets_lock, flags); - if (block_size <= LM_BLOCK_SIZE_THRESHOLD) { // Handle small requests - bucket_index = lm_input_bucket_index(model, latency, model->base ?: 1); + bucket_index = lm_input_bucket_index(latency, model->base ?: 1); if (bucket_index >= LM_LAT_BUCKET_COUNT) bucket_index = LM_LAT_BUCKET_COUNT - 1; - model->small_bucket[bucket_index].count++; - model->small_bucket[bucket_index].sum_latency += latency; + scoped_guard(spinlock_bh, &model->buckets_lock) { + model->small_bucket[bucket_index].count++; + model->small_bucket[bucket_index].sum_latency += latency; + } - if (unlikely(!model->base)) { - spin_unlock_irqrestore(&model->buckets_lock, flags); + if (unlikely(!model->base)) latency_model_update(model); - return; - } } else { // Handle large requests - if (!model->base || !pred_lat) { - spin_unlock_irqrestore(&model->buckets_lock, flags); - return; - } - - bucket_index = lm_input_bucket_index(model, latency, pred_lat); + bucket_index = lm_input_bucket_index(latency, pred_lat); if (bucket_index >= LM_LAT_BUCKET_COUNT) bucket_index = LM_LAT_BUCKET_COUNT - 1; - model->large_bucket[bucket_index].count++; - model->large_bucket[bucket_index].sum_latency += latency; - model->large_bucket[bucket_index].sum_block_size += block_size; - } + scoped_guard(spinlock_bh, &model->buckets_lock) { + if (!model->base || !pred_lat) + return; - spin_unlock_irqrestore(&model->buckets_lock, flags); + model->large_bucket[bucket_index].count++; + model->large_bucket[bucket_index].sum_latency += latency; + model->large_bucket[bucket_index].sum_block_size += block_size; + } + } } // Predict the latency for a given block size using the latency model @@ -597,14 +587,12 @@ static void adios_merged_requests(struct request_queue *q, struct request *req, // Try to merge a bio into an existing rq before associating it with an rq static bool adios_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs) { - unsigned long flags; struct adios_data *ad = q->elevator->elevator_data; struct request *free = NULL; bool ret; - spin_lock_irqsave(&ad->lock, flags); - ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free); - spin_unlock_irqrestore(&ad->lock, flags); + scoped_guard(spinlock_irqsave, &ad->lock) + ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free); if (free) blk_mq_free_request(free); @@ -615,7 +603,6 @@ static bool adios_bio_merge(struct request_queue *q, struct bio *bio, // Insert a request into the scheduler static void insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, blk_insert_t insert_flags, struct list_head *free) { - unsigned long flags; bool dl_idx = adios_optype_not_read(rq); struct request_queue *q = hctx->queue; struct adios_data *ad = q->elevator->elevator_data; @@ -623,9 +610,8 @@ static void insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, lockdep_assert_held(&ad->lock); if (insert_flags & BLK_MQ_INSERT_AT_HEAD) { - spin_lock_irqsave(&ad->pq_lock, flags); - list_add(&rq->queuelist, &ad->prio_queue); - spin_unlock_irqrestore(&ad->pq_lock, flags); + scoped_guard(spinlock_irqsave, &ad->pq_lock) + list_add_tail(&rq->queuelist, &ad->prio_queue); return; } @@ -645,20 +631,18 @@ static void insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, static void adios_insert_requests(struct blk_mq_hw_ctx *hctx, struct list_head *list, blk_insert_t insert_flags) { - unsigned long flags; struct request_queue *q = hctx->queue; struct adios_data *ad = q->elevator->elevator_data; LIST_HEAD(free); - spin_lock_irqsave(&ad->lock, flags); - while (!list_empty(list)) { - struct request *rq; + scoped_guard(spinlock_irqsave, &ad->lock) + while (!list_empty(list)) { + struct request *rq; - rq = list_first_entry(list, struct request, queuelist); - list_del_init(&rq->queuelist); - insert_request(hctx, rq, insert_flags, &free); - } - spin_unlock_irqrestore(&ad->lock, flags); + rq = list_first_entry(list, struct request, queuelist); + list_del_init(&rq->queuelist); + insert_request(hctx, rq, insert_flags, &free); + } blk_mq_free_requests(&free); } @@ -743,43 +727,44 @@ static void init_batch_queues(struct adios_data *ad) { // Fill the batch queues with requests from the deadline-sorted red-black tree static bool fill_batch_queues(struct adios_data *ad, u64 current_lat) { - unsigned long flags; u32 count = 0; u32 optype_count[ADIOS_OPTYPES] = {0}; u8 page = (ad->bq_page + 1) % ADIOS_BQ_PAGES; reset_batch_counts(ad, page); - spin_lock_irqsave(&ad->lock, flags); - while (true) { - struct request *rq = next_request(ad); - if (!rq) - break; + scoped_guard(spinlock_irqsave, &ad->lock) { + while (true) { + struct request *rq = next_request(ad); + if (!rq) + break; + + struct adios_rq_data *rd = get_rq_data(rq); + u8 optype = adios_optype(rq); + current_lat += rd->pred_lat; + + // Check batch size and total predicted latency + if (count && (!ad->latency_model[optype].base || + ad->batch_count[page][optype] >= ad->batch_limit[optype] || + current_lat > ad->global_latency_window)) { + break; + } - struct adios_rq_data *rd = get_rq_data(rq); - u8 optype = adios_optype(rq); - current_lat += rd->pred_lat; + remove_request(ad, rq); - // Check batch size and total predicted latency - if (count && (!ad->latency_model[optype].base || - ad->batch_count[page][optype] >= ad->batch_limit[optype] || - current_lat > ad->global_latency_window)) { - break; + // Add request to the corresponding batch queue + list_add_tail(&rq->queuelist, &ad->batch_queue[page][optype]); + ad->batch_count[page][optype]++; + atomic64_add(rd->pred_lat, &ad->total_pred_lat); + optype_count[optype]++; + count++; } - remove_request(ad, rq); - - // Add request to the corresponding batch queue - list_add_tail(&rq->queuelist, &ad->batch_queue[page][optype]); - ad->batch_count[page][optype]++; - atomic64_add(rd->pred_lat, &ad->total_pred_lat); - optype_count[optype]++; - count++; + if (count) + ad->more_bq_ready = true; } - spin_unlock_irqrestore(&ad->lock, flags); if (count) { - ad->more_bq_ready = true; for (u8 optype = 0; optype < ADIOS_OPTYPES; optype++) { if (ad->batch_actual_max_size[optype] < optype_count[optype]) ad->batch_actual_max_size[optype] = optype_count[optype]; @@ -806,7 +791,7 @@ static struct request *dispatch_from_bq(struct adios_data *ad) { tpl = atomic64_read(&ad->total_pred_lat); if (!ad->more_bq_ready && (!tpl || - tpl < div_u64(ad->global_latency_window * ad->bq_refill_below_ratio, 100) )) + tpl < div_u64(ad->global_latency_window * ad->bq_refill_below_ratio, 100))) fill_batch_queues(ad, tpl); again: @@ -1198,15 +1183,14 @@ static ssize_t adios_reset_lat_model_store( for (u8 i = 0; i < ADIOS_OPTYPES; i++) { struct latency_model *model = &ad->latency_model[i]; - unsigned long flags; - spin_lock_irqsave(&model->lock, flags); - model->base = 0ULL; - model->slope = 0ULL; - model->small_sum_delay = 0ULL; - model->small_count = 0ULL; - model->large_sum_delay = 0ULL; - model->large_sum_bsize = 0ULL; - spin_unlock_irqrestore(&model->lock, flags); + scoped_guard(spinlock_irqsave, &model->lock) { + model->base = 0ULL; + model->slope = 0ULL; + model->small_sum_delay = 0ULL; + model->small_count = 0ULL; + model->large_sum_delay = 0ULL; + model->large_sum_bsize = 0ULL; + } } return count; @@ -1229,10 +1213,8 @@ static ssize_t adios_shrink_##name##_store( \ return -EINVAL; \ for (u8 i = 0; i < ADIOS_OPTYPES; i++) { \ struct latency_model *model = &ad->latency_model[i]; \ - unsigned long flags; \ - spin_lock_irqsave(&model->lock, flags); \ - model->model_field = val; \ - spin_unlock_irqrestore(&model->lock, flags); \ + scoped_guard(spinlock_irqsave, &model->lock) \ + model->model_field = val; \ } \ return count; \ } \ @@ -1242,10 +1224,8 @@ static ssize_t adios_shrink_##name##_show( \ u32 val = 0; \ for (u8 i = 0; i < ADIOS_OPTYPES; i++) { \ struct latency_model *model = &ad->latency_model[i]; \ - unsigned long flags; \ - spin_lock_irqsave(&model->lock, flags); \ - val = model->model_field; \ - spin_unlock_irqrestore(&model->lock, flags); \ + scoped_guard(spinlock_irqsave, &model->lock) \ + val = model->model_field; \ } \ return sprintf(page, "%u\n", val); \ } From 574553ee09aed3780cdf8b54cfb3300d85baa2ad Mon Sep 17 00:00:00 2001 From: Piotr Gorski Date: Wed, 11 Jun 2025 22:02:45 +0200 Subject: [PATCH 2895/2924] iosched-6.15: bump ADIOS to v1.5.10 Signed-off-by: Piotr Gorski --- block/adios.c | 50 +++++++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/block/adios.c b/block/adios.c index 542b87b6a5be48..5f9af958a8d2c0 100644 --- a/block/adios.c +++ b/block/adios.c @@ -21,7 +21,7 @@ #include "blk-mq.h" #include "blk-mq-sched.h" -#define ADIOS_VERSION "1.5.9" +#define ADIOS_VERSION "1.5.10" // Define operation types supported by ADIOS enum adios_op_type { @@ -727,44 +727,48 @@ static void init_batch_queues(struct adios_data *ad) { // Fill the batch queues with requests from the deadline-sorted red-black tree static bool fill_batch_queues(struct adios_data *ad, u64 current_lat) { + unsigned long flags; u32 count = 0; u32 optype_count[ADIOS_OPTYPES] = {0}; u8 page = (ad->bq_page + 1) % ADIOS_BQ_PAGES; reset_batch_counts(ad, page); - scoped_guard(spinlock_irqsave, &ad->lock) { - while (true) { - struct request *rq = next_request(ad); - if (!rq) - break; + while (true) { + spin_lock_irqsave(&ad->lock, flags); - struct adios_rq_data *rd = get_rq_data(rq); - u8 optype = adios_optype(rq); - current_lat += rd->pred_lat; + struct request *rq = next_request(ad); + if (!rq) { + spin_unlock_irqrestore(&ad->lock, flags); + break; + } - // Check batch size and total predicted latency - if (count && (!ad->latency_model[optype].base || + struct adios_rq_data *rd = get_rq_data(rq); + u8 optype = adios_optype(rq); + current_lat += rd->pred_lat; + + // Check batch size and total predicted latency + if (count && (!ad->latency_model[optype].base || ad->batch_count[page][optype] >= ad->batch_limit[optype] || current_lat > ad->global_latency_window)) { - break; - } + spin_unlock_irqrestore(&ad->lock, flags); + break; + } - remove_request(ad, rq); + remove_request(ad, rq); - // Add request to the corresponding batch queue - list_add_tail(&rq->queuelist, &ad->batch_queue[page][optype]); - ad->batch_count[page][optype]++; - atomic64_add(rd->pred_lat, &ad->total_pred_lat); - optype_count[optype]++; - count++; - } + spin_unlock_irqrestore(&ad->lock, flags); - if (count) - ad->more_bq_ready = true; + // Add request to the corresponding batch queue + list_add_tail(&rq->queuelist, &ad->batch_queue[page][optype]); + ad->batch_count[page][optype]++; + atomic64_add(rd->pred_lat, &ad->total_pred_lat); + optype_count[optype]++; + count++; } if (count) { + ad->more_bq_ready = true; for (u8 optype = 0; optype < ADIOS_OPTYPES; optype++) { if (ad->batch_actual_max_size[optype] < optype_count[optype]) ad->batch_actual_max_size[optype] = optype_count[optype]; From 05f81dec226b0093f660a6f54b87595e3b870dac Mon Sep 17 00:00:00 2001 From: Piotr Gorski Date: Thu, 12 Jun 2025 15:11:29 +0200 Subject: [PATCH 2896/2924] iosched-6.15: back to v1.5.8 Signed-off-by: Piotr Gorski --- block/adios.c | 160 +++++++++++++++++++++++++++----------------------- 1 file changed, 88 insertions(+), 72 deletions(-) diff --git a/block/adios.c b/block/adios.c index 5f9af958a8d2c0..f008d28cd059d7 100644 --- a/block/adios.c +++ b/block/adios.c @@ -21,7 +21,7 @@ #include "blk-mq.h" #include "blk-mq-sched.h" -#define ADIOS_VERSION "1.5.10" +#define ADIOS_VERSION "1.5.8" // Define operation types supported by ADIOS enum adios_op_type { @@ -317,6 +317,7 @@ static bool lm_update_large_buckets( // Update the latency model parameters and statistics static void latency_model_update(struct latency_model *model) { + unsigned long flags; u64 now; u32 small_count, large_count; bool time_elapsed; @@ -324,27 +325,29 @@ static void latency_model_update(struct latency_model *model) { guard(spinlock_irqsave)(&model->lock); - scoped_guard(spinlock_irqsave, &model->buckets_lock) { - // Whether enough time has elapsed since the last update - now = jiffies; - time_elapsed = unlikely(!model->base) || model->last_update_jiffies + - msecs_to_jiffies(LM_INTERVAL_THRESHOLD) <= now; - - // Count the number of entries in buckets - small_count = lm_count_small_entries(model); - large_count = lm_count_large_entries(model); - - // Update small buckets - if (small_count && (time_elapsed || - LM_SAMPLES_THRESHOLD <= small_count || !model->base)) - small_processed = lm_update_small_buckets( - model, small_count, !model->base); - // Update large buckets - if (large_count && (time_elapsed || - LM_SAMPLES_THRESHOLD <= large_count || !model->slope)) - large_processed = lm_update_large_buckets( - model, large_count, !model->slope); - } + spin_lock_irqsave(&model->buckets_lock, flags); + + // Whether enough time has elapsed since the last update + now = jiffies; + time_elapsed = unlikely(!model->base) || model->last_update_jiffies + + msecs_to_jiffies(LM_INTERVAL_THRESHOLD) <= now; + + // Count the number of entries in buckets + small_count = lm_count_small_entries(model); + large_count = lm_count_large_entries(model); + + // Update small buckets + if (small_count && (time_elapsed || + LM_SAMPLES_THRESHOLD <= small_count || !model->base)) + small_processed = lm_update_small_buckets( + model, small_count, !model->base); + // Update large buckets + if (large_count && (time_elapsed || + LM_SAMPLES_THRESHOLD <= large_count || !model->slope)) + large_processed = lm_update_large_buckets( + model, large_count, !model->slope); + + spin_unlock_irqrestore(&model->buckets_lock, flags); // Update the base parameter if small bucket was processed if (small_processed && likely(model->small_count)) @@ -361,7 +364,8 @@ static void latency_model_update(struct latency_model *model) { } // Determine the bucket index for a given measured and predicted latency -static u8 lm_input_bucket_index(u64 measured, u64 predicted) { +static u8 lm_input_bucket_index( + struct latency_model *model, u64 measured, u64 predicted) { u8 bucket_index; if (measured < predicted * 2) @@ -377,38 +381,44 @@ static u8 lm_input_bucket_index(u64 measured, u64 predicted) { // Input latency data into the latency model static void latency_model_input(struct latency_model *model, u32 block_size, u64 latency, u64 pred_lat) { + unsigned long flags; u8 bucket_index; + spin_lock_irqsave(&model->buckets_lock, flags); + if (block_size <= LM_BLOCK_SIZE_THRESHOLD) { // Handle small requests - bucket_index = lm_input_bucket_index(latency, model->base ?: 1); + bucket_index = lm_input_bucket_index(model, latency, model->base ?: 1); if (bucket_index >= LM_LAT_BUCKET_COUNT) bucket_index = LM_LAT_BUCKET_COUNT - 1; - scoped_guard(spinlock_bh, &model->buckets_lock) { - model->small_bucket[bucket_index].count++; - model->small_bucket[bucket_index].sum_latency += latency; - } + model->small_bucket[bucket_index].count++; + model->small_bucket[bucket_index].sum_latency += latency; - if (unlikely(!model->base)) + if (unlikely(!model->base)) { + spin_unlock_irqrestore(&model->buckets_lock, flags); latency_model_update(model); + return; + } } else { // Handle large requests - bucket_index = lm_input_bucket_index(latency, pred_lat); + if (!model->base || !pred_lat) { + spin_unlock_irqrestore(&model->buckets_lock, flags); + return; + } + + bucket_index = lm_input_bucket_index(model, latency, pred_lat); if (bucket_index >= LM_LAT_BUCKET_COUNT) bucket_index = LM_LAT_BUCKET_COUNT - 1; - scoped_guard(spinlock_bh, &model->buckets_lock) { - if (!model->base || !pred_lat) - return; - - model->large_bucket[bucket_index].count++; - model->large_bucket[bucket_index].sum_latency += latency; - model->large_bucket[bucket_index].sum_block_size += block_size; - } + model->large_bucket[bucket_index].count++; + model->large_bucket[bucket_index].sum_latency += latency; + model->large_bucket[bucket_index].sum_block_size += block_size; } + + spin_unlock_irqrestore(&model->buckets_lock, flags); } // Predict the latency for a given block size using the latency model @@ -587,12 +597,14 @@ static void adios_merged_requests(struct request_queue *q, struct request *req, // Try to merge a bio into an existing rq before associating it with an rq static bool adios_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs) { + unsigned long flags; struct adios_data *ad = q->elevator->elevator_data; struct request *free = NULL; bool ret; - scoped_guard(spinlock_irqsave, &ad->lock) - ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free); + spin_lock_irqsave(&ad->lock, flags); + ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free); + spin_unlock_irqrestore(&ad->lock, flags); if (free) blk_mq_free_request(free); @@ -603,6 +615,7 @@ static bool adios_bio_merge(struct request_queue *q, struct bio *bio, // Insert a request into the scheduler static void insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, blk_insert_t insert_flags, struct list_head *free) { + unsigned long flags; bool dl_idx = adios_optype_not_read(rq); struct request_queue *q = hctx->queue; struct adios_data *ad = q->elevator->elevator_data; @@ -610,8 +623,9 @@ static void insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, lockdep_assert_held(&ad->lock); if (insert_flags & BLK_MQ_INSERT_AT_HEAD) { - scoped_guard(spinlock_irqsave, &ad->pq_lock) - list_add_tail(&rq->queuelist, &ad->prio_queue); + spin_lock_irqsave(&ad->pq_lock, flags); + list_add(&rq->queuelist, &ad->prio_queue); + spin_unlock_irqrestore(&ad->pq_lock, flags); return; } @@ -631,18 +645,20 @@ static void insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, static void adios_insert_requests(struct blk_mq_hw_ctx *hctx, struct list_head *list, blk_insert_t insert_flags) { + unsigned long flags; struct request_queue *q = hctx->queue; struct adios_data *ad = q->elevator->elevator_data; LIST_HEAD(free); - scoped_guard(spinlock_irqsave, &ad->lock) - while (!list_empty(list)) { - struct request *rq; + spin_lock_irqsave(&ad->lock, flags); + while (!list_empty(list)) { + struct request *rq; - rq = list_first_entry(list, struct request, queuelist); - list_del_init(&rq->queuelist); - insert_request(hctx, rq, insert_flags, &free); - } + rq = list_first_entry(list, struct request, queuelist); + list_del_init(&rq->queuelist); + insert_request(hctx, rq, insert_flags, &free); + } + spin_unlock_irqrestore(&ad->lock, flags); blk_mq_free_requests(&free); } @@ -734,14 +750,11 @@ static bool fill_batch_queues(struct adios_data *ad, u64 current_lat) { reset_batch_counts(ad, page); + spin_lock_irqsave(&ad->lock, flags); while (true) { - spin_lock_irqsave(&ad->lock, flags); - struct request *rq = next_request(ad); - if (!rq) { - spin_unlock_irqrestore(&ad->lock, flags); + if (!rq) break; - } struct adios_rq_data *rd = get_rq_data(rq); u8 optype = adios_optype(rq); @@ -749,16 +762,13 @@ static bool fill_batch_queues(struct adios_data *ad, u64 current_lat) { // Check batch size and total predicted latency if (count && (!ad->latency_model[optype].base || - ad->batch_count[page][optype] >= ad->batch_limit[optype] || - current_lat > ad->global_latency_window)) { - spin_unlock_irqrestore(&ad->lock, flags); + ad->batch_count[page][optype] >= ad->batch_limit[optype] || + current_lat > ad->global_latency_window)) { break; } remove_request(ad, rq); - spin_unlock_irqrestore(&ad->lock, flags); - // Add request to the corresponding batch queue list_add_tail(&rq->queuelist, &ad->batch_queue[page][optype]); ad->batch_count[page][optype]++; @@ -766,6 +776,7 @@ static bool fill_batch_queues(struct adios_data *ad, u64 current_lat) { optype_count[optype]++; count++; } + spin_unlock_irqrestore(&ad->lock, flags); if (count) { ad->more_bq_ready = true; @@ -795,7 +806,7 @@ static struct request *dispatch_from_bq(struct adios_data *ad) { tpl = atomic64_read(&ad->total_pred_lat); if (!ad->more_bq_ready && (!tpl || - tpl < div_u64(ad->global_latency_window * ad->bq_refill_below_ratio, 100))) + tpl < div_u64(ad->global_latency_window * ad->bq_refill_below_ratio, 100) )) fill_batch_queues(ad, tpl); again: @@ -1187,14 +1198,15 @@ static ssize_t adios_reset_lat_model_store( for (u8 i = 0; i < ADIOS_OPTYPES; i++) { struct latency_model *model = &ad->latency_model[i]; - scoped_guard(spinlock_irqsave, &model->lock) { - model->base = 0ULL; - model->slope = 0ULL; - model->small_sum_delay = 0ULL; - model->small_count = 0ULL; - model->large_sum_delay = 0ULL; - model->large_sum_bsize = 0ULL; - } + unsigned long flags; + spin_lock_irqsave(&model->lock, flags); + model->base = 0ULL; + model->slope = 0ULL; + model->small_sum_delay = 0ULL; + model->small_count = 0ULL; + model->large_sum_delay = 0ULL; + model->large_sum_bsize = 0ULL; + spin_unlock_irqrestore(&model->lock, flags); } return count; @@ -1217,8 +1229,10 @@ static ssize_t adios_shrink_##name##_store( \ return -EINVAL; \ for (u8 i = 0; i < ADIOS_OPTYPES; i++) { \ struct latency_model *model = &ad->latency_model[i]; \ - scoped_guard(spinlock_irqsave, &model->lock) \ - model->model_field = val; \ + unsigned long flags; \ + spin_lock_irqsave(&model->lock, flags); \ + model->model_field = val; \ + spin_unlock_irqrestore(&model->lock, flags); \ } \ return count; \ } \ @@ -1228,8 +1242,10 @@ static ssize_t adios_shrink_##name##_show( \ u32 val = 0; \ for (u8 i = 0; i < ADIOS_OPTYPES; i++) { \ struct latency_model *model = &ad->latency_model[i]; \ - scoped_guard(spinlock_irqsave, &model->lock) \ - val = model->model_field; \ + unsigned long flags; \ + spin_lock_irqsave(&model->lock, flags); \ + val = model->model_field; \ + spin_unlock_irqrestore(&model->lock, flags); \ } \ return sprintf(page, "%u\n", val); \ } From 896330ef4a45fb43702ff62b801e90bbc955828c Mon Sep 17 00:00:00 2001 From: Piotr Gorski Date: Thu, 12 Jun 2025 15:30:36 +0200 Subject: [PATCH 2897/2924] iosched-6.15: bump ADIOS to v1.5.12 Signed-off-by: Piotr Gorski --- block/adios.c | 161 ++++++++++++++++++++++---------------------------- 1 file changed, 72 insertions(+), 89 deletions(-) diff --git a/block/adios.c b/block/adios.c index f008d28cd059d7..8ab4675871863e 100644 --- a/block/adios.c +++ b/block/adios.c @@ -21,7 +21,7 @@ #include "blk-mq.h" #include "blk-mq-sched.h" -#define ADIOS_VERSION "1.5.8" +#define ADIOS_VERSION "1.5.12" // Define operation types supported by ADIOS enum adios_op_type { @@ -317,7 +317,6 @@ static bool lm_update_large_buckets( // Update the latency model parameters and statistics static void latency_model_update(struct latency_model *model) { - unsigned long flags; u64 now; u32 small_count, large_count; bool time_elapsed; @@ -325,29 +324,27 @@ static void latency_model_update(struct latency_model *model) { guard(spinlock_irqsave)(&model->lock); - spin_lock_irqsave(&model->buckets_lock, flags); - - // Whether enough time has elapsed since the last update - now = jiffies; - time_elapsed = unlikely(!model->base) || model->last_update_jiffies + - msecs_to_jiffies(LM_INTERVAL_THRESHOLD) <= now; - - // Count the number of entries in buckets - small_count = lm_count_small_entries(model); - large_count = lm_count_large_entries(model); - - // Update small buckets - if (small_count && (time_elapsed || - LM_SAMPLES_THRESHOLD <= small_count || !model->base)) - small_processed = lm_update_small_buckets( - model, small_count, !model->base); - // Update large buckets - if (large_count && (time_elapsed || - LM_SAMPLES_THRESHOLD <= large_count || !model->slope)) - large_processed = lm_update_large_buckets( - model, large_count, !model->slope); - - spin_unlock_irqrestore(&model->buckets_lock, flags); + scoped_guard(spinlock_irqsave, &model->buckets_lock) { + // Whether enough time has elapsed since the last update + now = jiffies; + time_elapsed = unlikely(!model->base) || model->last_update_jiffies + + msecs_to_jiffies(LM_INTERVAL_THRESHOLD) <= now; + + // Count the number of entries in buckets + small_count = lm_count_small_entries(model); + large_count = lm_count_large_entries(model); + + // Update small buckets + if (small_count && (time_elapsed || + LM_SAMPLES_THRESHOLD <= small_count || !model->base)) + small_processed = lm_update_small_buckets( + model, small_count, !model->base); + // Update large buckets + if (large_count && (time_elapsed || + LM_SAMPLES_THRESHOLD <= large_count || !model->slope)) + large_processed = lm_update_large_buckets( + model, large_count, !model->slope); + } // Update the base parameter if small bucket was processed if (small_processed && likely(model->small_count)) @@ -364,8 +361,7 @@ static void latency_model_update(struct latency_model *model) { } // Determine the bucket index for a given measured and predicted latency -static u8 lm_input_bucket_index( - struct latency_model *model, u64 measured, u64 predicted) { +static u8 lm_input_bucket_index(u64 measured, u64 predicted) { u8 bucket_index; if (measured < predicted * 2) @@ -388,7 +384,7 @@ static void latency_model_input(struct latency_model *model, if (block_size <= LM_BLOCK_SIZE_THRESHOLD) { // Handle small requests - bucket_index = lm_input_bucket_index(model, latency, model->base ?: 1); + bucket_index = lm_input_bucket_index(latency, model->base ?: 1); if (bucket_index >= LM_LAT_BUCKET_COUNT) bucket_index = LM_LAT_BUCKET_COUNT - 1; @@ -408,7 +404,7 @@ static void latency_model_input(struct latency_model *model, return; } - bucket_index = lm_input_bucket_index(model, latency, pred_lat); + bucket_index = lm_input_bucket_index(latency, pred_lat); if (bucket_index >= LM_LAT_BUCKET_COUNT) bucket_index = LM_LAT_BUCKET_COUNT - 1; @@ -597,14 +593,12 @@ static void adios_merged_requests(struct request_queue *q, struct request *req, // Try to merge a bio into an existing rq before associating it with an rq static bool adios_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs) { - unsigned long flags; struct adios_data *ad = q->elevator->elevator_data; struct request *free = NULL; bool ret; - spin_lock_irqsave(&ad->lock, flags); - ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free); - spin_unlock_irqrestore(&ad->lock, flags); + scoped_guard(spinlock_irqsave, &ad->lock) + ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free); if (free) blk_mq_free_request(free); @@ -615,7 +609,6 @@ static bool adios_bio_merge(struct request_queue *q, struct bio *bio, // Insert a request into the scheduler static void insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, blk_insert_t insert_flags, struct list_head *free) { - unsigned long flags; bool dl_idx = adios_optype_not_read(rq); struct request_queue *q = hctx->queue; struct adios_data *ad = q->elevator->elevator_data; @@ -623,9 +616,8 @@ static void insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, lockdep_assert_held(&ad->lock); if (insert_flags & BLK_MQ_INSERT_AT_HEAD) { - spin_lock_irqsave(&ad->pq_lock, flags); - list_add(&rq->queuelist, &ad->prio_queue); - spin_unlock_irqrestore(&ad->pq_lock, flags); + scoped_guard(spinlock_irqsave, &ad->pq_lock) + list_add_tail(&rq->queuelist, &ad->prio_queue); return; } @@ -645,20 +637,18 @@ static void insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, static void adios_insert_requests(struct blk_mq_hw_ctx *hctx, struct list_head *list, blk_insert_t insert_flags) { - unsigned long flags; struct request_queue *q = hctx->queue; struct adios_data *ad = q->elevator->elevator_data; LIST_HEAD(free); - spin_lock_irqsave(&ad->lock, flags); - while (!list_empty(list)) { - struct request *rq; + scoped_guard(spinlock_irqsave, &ad->lock) + while (!list_empty(list)) { + struct request *rq; - rq = list_first_entry(list, struct request, queuelist); - list_del_init(&rq->queuelist); - insert_request(hctx, rq, insert_flags, &free); - } - spin_unlock_irqrestore(&ad->lock, flags); + rq = list_first_entry(list, struct request, queuelist); + list_del_init(&rq->queuelist); + insert_request(hctx, rq, insert_flags, &free); + } blk_mq_free_requests(&free); } @@ -743,41 +733,39 @@ static void init_batch_queues(struct adios_data *ad) { // Fill the batch queues with requests from the deadline-sorted red-black tree static bool fill_batch_queues(struct adios_data *ad, u64 current_lat) { - unsigned long flags; u32 count = 0; u32 optype_count[ADIOS_OPTYPES] = {0}; u8 page = (ad->bq_page + 1) % ADIOS_BQ_PAGES; reset_batch_counts(ad, page); - spin_lock_irqsave(&ad->lock, flags); - while (true) { - struct request *rq = next_request(ad); - if (!rq) - break; + scoped_guard(spinlock_irqsave, &ad->lock) + while (true) { + struct request *rq = next_request(ad); + if (!rq) + break; + + struct adios_rq_data *rd = get_rq_data(rq); + u8 optype = adios_optype(rq); + current_lat += rd->pred_lat; + + // Check batch size and total predicted latency + if (count && (!ad->latency_model[optype].base || + ad->batch_count[page][optype] >= ad->batch_limit[optype] || + current_lat > ad->global_latency_window)) { + break; + } - struct adios_rq_data *rd = get_rq_data(rq); - u8 optype = adios_optype(rq); - current_lat += rd->pred_lat; + remove_request(ad, rq); - // Check batch size and total predicted latency - if (count && (!ad->latency_model[optype].base || - ad->batch_count[page][optype] >= ad->batch_limit[optype] || - current_lat > ad->global_latency_window)) { - break; + // Add request to the corresponding batch queue + list_add_tail(&rq->queuelist, &ad->batch_queue[page][optype]); + ad->batch_count[page][optype]++; + atomic64_add(rd->pred_lat, &ad->total_pred_lat); + optype_count[optype]++; + count++; } - remove_request(ad, rq); - - // Add request to the corresponding batch queue - list_add_tail(&rq->queuelist, &ad->batch_queue[page][optype]); - ad->batch_count[page][optype]++; - atomic64_add(rd->pred_lat, &ad->total_pred_lat); - optype_count[optype]++; - count++; - } - spin_unlock_irqrestore(&ad->lock, flags); - if (count) { ad->more_bq_ready = true; for (u8 optype = 0; optype < ADIOS_OPTYPES; optype++) { @@ -806,7 +794,7 @@ static struct request *dispatch_from_bq(struct adios_data *ad) { tpl = atomic64_read(&ad->total_pred_lat); if (!ad->more_bq_ready && (!tpl || - tpl < div_u64(ad->global_latency_window * ad->bq_refill_below_ratio, 100) )) + tpl < div_u64(ad->global_latency_window * ad->bq_refill_below_ratio, 100))) fill_batch_queues(ad, tpl); again: @@ -1198,15 +1186,14 @@ static ssize_t adios_reset_lat_model_store( for (u8 i = 0; i < ADIOS_OPTYPES; i++) { struct latency_model *model = &ad->latency_model[i]; - unsigned long flags; - spin_lock_irqsave(&model->lock, flags); - model->base = 0ULL; - model->slope = 0ULL; - model->small_sum_delay = 0ULL; - model->small_count = 0ULL; - model->large_sum_delay = 0ULL; - model->large_sum_bsize = 0ULL; - spin_unlock_irqrestore(&model->lock, flags); + scoped_guard(spinlock_irqsave, &model->lock) { + model->base = 0ULL; + model->slope = 0ULL; + model->small_sum_delay = 0ULL; + model->small_count = 0ULL; + model->large_sum_delay = 0ULL; + model->large_sum_bsize = 0ULL; + } } return count; @@ -1229,10 +1216,8 @@ static ssize_t adios_shrink_##name##_store( \ return -EINVAL; \ for (u8 i = 0; i < ADIOS_OPTYPES; i++) { \ struct latency_model *model = &ad->latency_model[i]; \ - unsigned long flags; \ - spin_lock_irqsave(&model->lock, flags); \ - model->model_field = val; \ - spin_unlock_irqrestore(&model->lock, flags); \ + scoped_guard(spinlock_irqsave, &model->lock) \ + model->model_field = val; \ } \ return count; \ } \ @@ -1242,10 +1227,8 @@ static ssize_t adios_shrink_##name##_show( \ u32 val = 0; \ for (u8 i = 0; i < ADIOS_OPTYPES; i++) { \ struct latency_model *model = &ad->latency_model[i]; \ - unsigned long flags; \ - spin_lock_irqsave(&model->lock, flags); \ - val = model->model_field; \ - spin_unlock_irqrestore(&model->lock, flags); \ + scoped_guard(spinlock_irqsave, &model->lock) \ + val = model->model_field; \ } \ return sprintf(page, "%u\n", val); \ } From f7538cde11ba855d0caf59fe53028d0e587dbe7a Mon Sep 17 00:00:00 2001 From: Eric Naim Date: Fri, 13 Jun 2025 22:27:27 +0700 Subject: [PATCH 2898/2924] Revert "HID: amd_sfh: Add support for tablet mode switch sensors" This reverts commit 891fe0dfb1a8570fab522c52ade80eeacf7a05e2. Signed-off-by: Eric Naim --- drivers/hid/amd-sfh-hid/amd_sfh_client.c | 2 -- drivers/hid/amd-sfh-hid/amd_sfh_pcie.c | 4 --- drivers/hid/amd-sfh-hid/amd_sfh_pcie.h | 1 - .../hid_descriptor/amd_sfh_hid_desc.c | 27 ------------------- .../hid_descriptor/amd_sfh_hid_desc.h | 8 ------ .../hid_descriptor/amd_sfh_hid_report_desc.h | 20 -------------- 6 files changed, 62 deletions(-) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_client.c b/drivers/hid/amd-sfh-hid/amd_sfh_client.c index 867019955b10f2..3438d392920fad 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_client.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_client.c @@ -146,8 +146,6 @@ static const char *get_sensor_name(int idx) return "gyroscope"; case mag_idx: return "magnetometer"; - case tms_idx: - return "tablet-mode-switch"; case als_idx: case ACS_IDX: /* ambient color sensor */ return "ALS"; diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c index 7fd486a279b51b..1c1fd63330c939 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c @@ -29,7 +29,6 @@ #define ACEL_EN BIT(0) #define GYRO_EN BIT(1) #define MAGNO_EN BIT(2) -#define TMS_EN BIT(15) #define HPD_EN BIT(16) #define ALS_EN BIT(19) #define ACS_EN BIT(22) @@ -233,9 +232,6 @@ int amd_mp2_get_sensor_num(struct amd_mp2_dev *privdata, u8 *sensor_id) if (MAGNO_EN & activestatus) sensor_id[num_of_sensors++] = mag_idx; - if (TMS_EN & activestatus) - sensor_id[num_of_sensors++] = tms_idx; - if (ALS_EN & activestatus) sensor_id[num_of_sensors++] = als_idx; diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h index 617f0265b7073f..05e400a4a83e40 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h @@ -79,7 +79,6 @@ enum sensor_idx { accel_idx = 0, gyro_idx = 1, mag_idx = 2, - tms_idx = 15, als_idx = 19 }; diff --git a/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.c b/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.c index 516a07bf3be6d9..ef1f9be8b89383 100644 --- a/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.c +++ b/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.c @@ -47,11 +47,6 @@ static int get_report_descriptor(int sensor_idx, u8 *rep_desc) memcpy(rep_desc, comp3_report_descriptor, sizeof(comp3_report_descriptor)); break; - case tms_idx: /* tablet mode switch */ - memset(rep_desc, 0, sizeof(tms_report_descriptor)); - memcpy(rep_desc, tms_report_descriptor, - sizeof(tms_report_descriptor)); - break; case als_idx: /* ambient light sensor */ case ACS_IDX: /* ambient color sensor */ memset(rep_desc, 0, sizeof(als_report_descriptor)); @@ -102,16 +97,6 @@ static u32 get_descr_sz(int sensor_idx, int descriptor_name) return sizeof(struct magno_feature_report); } break; - case tms_idx: - switch (descriptor_name) { - case descr_size: - return sizeof(tms_report_descriptor); - case input_size: - return sizeof(struct tms_input_report); - case feature_size: - return sizeof(struct tms_feature_report); - } - break; case als_idx: case ACS_IDX: /* ambient color sensor */ switch (descriptor_name) { @@ -155,7 +140,6 @@ static u8 get_feature_report(int sensor_idx, int report_id, u8 *feature_report) struct accel3_feature_report acc_feature; struct gyro_feature_report gyro_feature; struct magno_feature_report magno_feature; - struct tms_feature_report tms_feature; struct hpd_feature_report hpd_feature; struct als_feature_report als_feature; u8 report_size = 0; @@ -191,11 +175,6 @@ static u8 get_feature_report(int sensor_idx, int report_id, u8 *feature_report) memcpy(feature_report, &magno_feature, sizeof(magno_feature)); report_size = sizeof(magno_feature); break; - case tms_idx: /* tablet mode switch */ - get_common_features(&tms_feature.common_property, report_id); - memcpy(feature_report, &tms_feature, sizeof(tms_feature)); - report_size = sizeof(tms_feature); - break; case als_idx: /* ambient light sensor */ case ACS_IDX: /* ambient color sensor */ get_common_features(&als_feature.common_property, report_id); @@ -235,7 +214,6 @@ static u8 get_input_report(u8 current_index, int sensor_idx, int report_id, struct accel3_input_report acc_input; struct gyro_input_report gyro_input; struct hpd_input_report hpd_input; - struct tms_input_report tms_input; struct als_input_report als_input; struct hpd_status hpdstatus; u8 report_size = 0; @@ -269,11 +247,6 @@ static u8 get_input_report(u8 current_index, int sensor_idx, int report_id, memcpy(input_report, &magno_input, sizeof(magno_input)); report_size = sizeof(magno_input); break; -case tms_idx: /* tablet mode switch */ - get_common_inputs(&tms_input.common_property, report_id); - report_size = sizeof(tms_input); - memcpy(input_report, &tms_input, sizeof(tms_input)); - break; case als_idx: /* Als */ case ACS_IDX: /* ambient color sensor */ get_common_inputs(&als_input.common_property, report_id); diff --git a/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.h b/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.h index afcdac989cb673..882434b1501ff1 100644 --- a/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.h +++ b/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.h @@ -114,12 +114,4 @@ struct hpd_input_report { u8 human_presence; } __packed; -struct tms_feature_report { - struct common_feature_property common_property; -} __packed; - -struct tms_input_report { - struct common_input_property common_property; -} __packed; - #endif diff --git a/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_report_desc.h b/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_report_desc.h index 4dc87d6847769f..67ec2d6a417de5 100644 --- a/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_report_desc.h +++ b/drivers/hid/amd-sfh-hid/hid_descriptor/amd_sfh_hid_report_desc.h @@ -665,26 +665,6 @@ static const u8 als_report_descriptor[] = { 0xC0 /* HID end collection */ }; -/* TABLET MODE SWITCH */ -__maybe_unused // Used by sfh1.0, but not yet implemented in sfh1.1 -static const u8 tms_report_descriptor[] = { -0x06, 0x43, 0xFF, // Usage Page (Vendor Defined 0xFF43) -0x0A, 0x02, 0x02, // Usage (0x0202) -0xA1, 0x01, // Collection (Application) -0x85, 0x11, // Report ID (17) -0x15, 0x00, // Logical Minimum (0) -0x25, 0x01, // Logical Maximum (1) -0x35, 0x00, // Physical Minimum (0) -0x45, 0x01, // Physical Maximum (1) -0x65, 0x00, // Unit (None) -0x55, 0x00, // Unit Exponent (0) -0x75, 0x01, // Report Size (1) -0x95, 0x98, // Report Count (-104) -0x81, 0x03, // Input (Const,Var,Abs,No Wrap,Linear,Preferred State,No Null Position) -0x91, 0x03, // Output (Const,Var,Abs,No Wrap,Linear,Preferred State,No Null Position,Non-volatile) -0xC1, 0x00, // End Collection -}; - /* BIOMETRIC PRESENCE*/ static const u8 hpd_report_descriptor[] = { 0x05, 0x20, /* Usage page */ From 37230c0c8ee7fc2c2d335b43359dcb4bdc979869 Mon Sep 17 00:00:00 2001 From: Piotr Gorski Date: Mon, 23 Jun 2025 07:21:04 +0200 Subject: [PATCH 2899/2924] iosched-6.15: bump ADIOS to v2.0.0 Signed-off-by: Piotr Gorski --- block/adios.c | 303 +++++++++++++++++++++++++++++--------------------- 1 file changed, 179 insertions(+), 124 deletions(-) diff --git a/block/adios.c b/block/adios.c index 8ab4675871863e..6f6fb237df69dd 100644 --- a/block/adios.c +++ b/block/adios.c @@ -15,13 +15,14 @@ #include #include #include +#include #include "elevator.h" #include "blk.h" #include "blk-mq.h" #include "blk-mq-sched.h" -#define ADIOS_VERSION "1.5.12" +#define ADIOS_VERSION "2.0.0" // Define operation types supported by ADIOS enum adios_op_type { @@ -33,13 +34,13 @@ enum adios_op_type { }; // Global variable to control the latency -static u64 default_global_latency_window = 16000000ULL; +static u64 default_global_latency_window = 32000000ULL; // Ratio below which batch queues should be refilled -static u8 default_bq_refill_below_ratio = 15; +static u8 default_bq_refill_below_ratio = 25; // Dynamic thresholds for shrinkage -static u32 default_lm_shrink_at_kreqs = 10000; -static u32 default_lm_shrink_at_gbytes = 100; +static u32 default_lm_shrink_at_kreqs = 5000; +static u32 default_lm_shrink_at_gbytes = 50; static u32 default_lm_shrink_resist = 2; // Latency targets for each operation type @@ -80,9 +81,15 @@ struct latency_bucket_large { u32 count; }; +// New structure to hold per-cpu buckets, improving data locality and code clarity. +struct per_cpu_lm_buckets { + struct latency_bucket_small small_bucket[LM_LAT_BUCKET_COUNT]; + struct latency_bucket_large large_bucket[LM_LAT_BUCKET_COUNT]; +}; + // Structure to hold the latency model context data struct latency_model { - spinlock_t lock; + seqlock_t lock; u64 base; u64 slope; u64 small_sum_delay; @@ -91,9 +98,8 @@ struct latency_model { u64 large_sum_bsize; u64 last_update_jiffies; - spinlock_t buckets_lock; - struct latency_bucket_small small_bucket[LM_LAT_BUCKET_COUNT]; - struct latency_bucket_large large_bucket[LM_LAT_BUCKET_COUNT]; + // Per-CPU buckets to avoid lock contention on the completion path + struct per_cpu_lm_buckets __percpu *pcpu_buckets; u32 lm_shrink_at_kreqs; u32 lm_shrink_at_gbytes; @@ -127,6 +133,8 @@ struct adios_data { u32 batch_count[ADIOS_BQ_PAGES][ADIOS_OPTYPES]; spinlock_t bq_lock; + struct per_cpu_lm_buckets *aggr_buckets; + struct latency_model latency_model[ADIOS_OPTYPES]; struct timer_list update_timer; @@ -167,16 +175,17 @@ static const int adios_prio_to_weight[40] = { /* 15 */ 36, 29, 23, 18, 15, }; -// Count the number of entries in small buckets -static u32 lm_count_small_entries(struct latency_model *model) { +// Count the number of entries in aggregated small buckets +static u32 lm_count_small_entries(struct latency_bucket_small *buckets) { u32 total_count = 0; for (u8 i = 0; i < LM_LAT_BUCKET_COUNT; i++) - total_count += model->small_bucket[i].count; + total_count += buckets[i].count; return total_count; } -// Update the small buckets in the latency model +// Update the small buckets in the latency model from aggregated data static bool lm_update_small_buckets(struct latency_model *model, + struct latency_bucket_small *buckets, u32 total_count, bool count_all) { u64 sum_latency = 0; u32 sum_count = 0; @@ -193,7 +202,7 @@ static bool lm_update_small_buckets(struct latency_model *model, // Identify the bucket that corresponds to the outlier threshold for (u8 i = 0; i < LM_LAT_BUCKET_COUNT; i++) { - cumulative_count += model->small_bucket[i].count; + cumulative_count += buckets[i].count; if (cumulative_count >= threshold_count) { outlier_threshold_bucket = i; break; @@ -202,7 +211,7 @@ static bool lm_update_small_buckets(struct latency_model *model, // Calculate the average latency, excluding outliers for (u8 i = 0; i <= outlier_threshold_bucket; i++) { - struct latency_bucket_small *bucket = &model->small_bucket[i]; + struct latency_bucket_small *bucket = &buckets[i]; if (i < outlier_threshold_bucket) { sum_latency += bucket->sum_latency; sum_count += bucket->count; @@ -231,24 +240,20 @@ static bool lm_update_small_buckets(struct latency_model *model, model->small_sum_delay += sum_latency; model->small_count += sum_count; - // Reset small bucket information - memset(model->small_bucket, 0, - sizeof(model->small_bucket[0]) * LM_LAT_BUCKET_COUNT); - return true; } -// Count the number of entries in large buckets -static u32 lm_count_large_entries(struct latency_model *model) { +// Count the number of entries in aggregated large buckets +static u32 lm_count_large_entries(struct latency_bucket_large *buckets) { u32 total_count = 0; for (u8 i = 0; i < LM_LAT_BUCKET_COUNT; i++) - total_count += model->large_bucket[i].count; + total_count += buckets[i].count; return total_count; } -// Update the large buckets in the latency model -static bool lm_update_large_buckets( - struct latency_model *model, +// Update the large buckets in the latency model from aggregated data +static bool lm_update_large_buckets(struct latency_model *model, + struct latency_bucket_large *buckets, u32 total_count, bool count_all) { s64 sum_latency = 0; u64 sum_block_size = 0, intercept; @@ -265,7 +270,7 @@ static bool lm_update_large_buckets( // Identify the bucket that corresponds to the outlier threshold for (u8 i = 0; i < LM_LAT_BUCKET_COUNT; i++) { - cumulative_count += model->large_bucket[i].count; + cumulative_count += buckets[i].count; if (cumulative_count >= threshold_count) { outlier_threshold_bucket = i; break; @@ -274,7 +279,7 @@ static bool lm_update_large_buckets( // Calculate the average latency and block size, excluding outliers for (u8 i = 0; i <= outlier_threshold_bucket; i++) { - struct latency_bucket_large *bucket = &model->large_bucket[i]; + struct latency_bucket_large *bucket = &buckets[i]; if (i < outlier_threshold_bucket) { sum_latency += bucket->sum_latency; sum_block_size += bucket->sum_block_size; @@ -308,44 +313,68 @@ static bool lm_update_large_buckets( model->large_sum_delay += sum_latency; model->large_sum_bsize += sum_block_size; - // Reset large bucket information - memset(model->large_bucket, 0, - sizeof(model->large_bucket[0]) * LM_LAT_BUCKET_COUNT); - return true; } // Update the latency model parameters and statistics -static void latency_model_update(struct latency_model *model) { +static void latency_model_update( + struct adios_data *ad, struct latency_model *model) { u64 now; u32 small_count, large_count; bool time_elapsed; bool small_processed = false, large_processed = false; + struct per_cpu_lm_buckets *aggr = ad->aggr_buckets; + struct latency_bucket_small *asb; + struct latency_bucket_large *alb; + struct per_cpu_lm_buckets *pcpu_b; + unsigned long flags; + int cpu; + + memset(aggr, 0, sizeof(*aggr)); + + write_seqlock_irqsave(&model->lock, flags); - guard(spinlock_irqsave)(&model->lock); - - scoped_guard(spinlock_irqsave, &model->buckets_lock) { - // Whether enough time has elapsed since the last update - now = jiffies; - time_elapsed = unlikely(!model->base) || model->last_update_jiffies + - msecs_to_jiffies(LM_INTERVAL_THRESHOLD) <= now; - - // Count the number of entries in buckets - small_count = lm_count_small_entries(model); - large_count = lm_count_large_entries(model); - - // Update small buckets - if (small_count && (time_elapsed || - LM_SAMPLES_THRESHOLD <= small_count || !model->base)) - small_processed = lm_update_small_buckets( - model, small_count, !model->base); - // Update large buckets - if (large_count && (time_elapsed || - LM_SAMPLES_THRESHOLD <= large_count || !model->slope)) - large_processed = lm_update_large_buckets( - model, large_count, !model->slope); + // Aggregate data from all CPUs and reset per-cpu buckets. + for_each_possible_cpu(cpu) { + pcpu_b = per_cpu_ptr(model->pcpu_buckets, cpu); + + for (u8 i = 0; i < LM_LAT_BUCKET_COUNT; i++) { + if (pcpu_b->small_bucket[i].count) { + asb = &aggr->small_bucket[i]; + asb->count += pcpu_b->small_bucket[i].count; + asb->sum_latency += pcpu_b->small_bucket[i].sum_latency; + } + if (pcpu_b->large_bucket[i].count) { + alb = &aggr->large_bucket[i]; + alb->count += pcpu_b->large_bucket[i].count; + alb->sum_latency += pcpu_b->large_bucket[i].sum_latency; + alb->sum_block_size += pcpu_b->large_bucket[i].sum_block_size; + } + } + // Reset per-cpu buckets after aggregating + memset(pcpu_b, 0, sizeof(*pcpu_b)); } + // Whether enough time has elapsed since the last update + now = jiffies; + time_elapsed = unlikely(!model->base) || model->last_update_jiffies + + msecs_to_jiffies(LM_INTERVAL_THRESHOLD) <= now; + + // Count the number of entries in aggregated buckets + small_count = lm_count_small_entries(aggr->small_bucket); + large_count = lm_count_large_entries(aggr->large_bucket); + + // Update small buckets + if (small_count && (time_elapsed || + LM_SAMPLES_THRESHOLD <= small_count || !model->base)) + small_processed = lm_update_small_buckets( + model, aggr->small_bucket, small_count, !model->base); + // Update large buckets + if (large_count && (time_elapsed || + LM_SAMPLES_THRESHOLD <= large_count || !model->slope)) + large_processed = lm_update_large_buckets( + model, aggr->large_bucket, large_count, !model->slope); + // Update the base parameter if small bucket was processed if (small_processed && likely(model->small_count)) model->base = div_u64(model->small_sum_delay, model->small_count); @@ -358,6 +387,8 @@ static void latency_model_update(struct latency_model *model) { // Reset statistics and update last updated jiffies if time has elapsed if (time_elapsed) model->last_update_jiffies = now; + + write_sequnlock_irqrestore(&model->lock, flags); } // Determine the bucket index for a given measured and predicted latency @@ -375,12 +406,12 @@ static u8 lm_input_bucket_index(u64 measured, u64 predicted) { } // Input latency data into the latency model -static void latency_model_input(struct latency_model *model, - u32 block_size, u64 latency, u64 pred_lat) { - unsigned long flags; +static void latency_model_input(struct adios_data *ad, + struct latency_model *model, u32 block_size, u64 latency, u64 pred_lat) { u8 bucket_index; + struct per_cpu_lm_buckets *buckets; - spin_lock_irqsave(&model->buckets_lock, flags); + buckets = this_cpu_ptr(model->pcpu_buckets); if (block_size <= LM_BLOCK_SIZE_THRESHOLD) { // Handle small requests @@ -389,43 +420,43 @@ static void latency_model_input(struct latency_model *model, if (bucket_index >= LM_LAT_BUCKET_COUNT) bucket_index = LM_LAT_BUCKET_COUNT - 1; - model->small_bucket[bucket_index].count++; - model->small_bucket[bucket_index].sum_latency += latency; + buckets->small_bucket[bucket_index].count++; + buckets->small_bucket[bucket_index].sum_latency += latency; if (unlikely(!model->base)) { - spin_unlock_irqrestore(&model->buckets_lock, flags); - latency_model_update(model); + latency_model_update(ad, model); return; } } else { // Handle large requests - if (!model->base || !pred_lat) { - spin_unlock_irqrestore(&model->buckets_lock, flags); + if (!model->base || !pred_lat) return; - } bucket_index = lm_input_bucket_index(latency, pred_lat); if (bucket_index >= LM_LAT_BUCKET_COUNT) bucket_index = LM_LAT_BUCKET_COUNT - 1; - model->large_bucket[bucket_index].count++; - model->large_bucket[bucket_index].sum_latency += latency; - model->large_bucket[bucket_index].sum_block_size += block_size; + buckets->large_bucket[bucket_index].count++; + buckets->large_bucket[bucket_index].sum_latency += latency; + buckets->large_bucket[bucket_index].sum_block_size += block_size; } - - spin_unlock_irqrestore(&model->buckets_lock, flags); } // Predict the latency for a given block size using the latency model static u64 latency_model_predict(struct latency_model *model, u32 block_size) { - u64 result; + u64 result, base, slope; + unsigned int seq; + + do { + seq = read_seqbegin(&model->lock); + base = model->base; + slope = model->slope; + } while (read_seqretry(&model->lock, seq)); - guard(spinlock_irqsave)(&model->lock); - // Predict latency based on the model - result = model->base; + result = base; if (block_size > LM_BLOCK_SIZE_THRESHOLD) - result += model->slope * + result += slope * DIV_ROUND_UP_ULL(block_size - LM_BLOCK_SIZE_THRESHOLD, 1024); return result; @@ -572,11 +603,9 @@ static void adios_request_merged(struct request_queue *q, struct request *req, bool dl_idx = adios_optype_not_read(req); struct adios_data *ad = q->elevator->elevator_data; - // if the merge was a front merge, we need to reposition request - if (type == ELEVATOR_FRONT_MERGE) { - del_from_dl_tree(ad, dl_idx, req); - add_to_dl_tree(ad, dl_idx, req); - } + // Reposition request in the deadline-sorted tree + del_from_dl_tree(ad, dl_idx, req); + add_to_dl_tree(ad, dl_idx, req); } // Handle merging of requests after one has been merged into another @@ -613,14 +642,14 @@ static void insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, struct request_queue *q = hctx->queue; struct adios_data *ad = q->elevator->elevator_data; - lockdep_assert_held(&ad->lock); - if (insert_flags & BLK_MQ_INSERT_AT_HEAD) { scoped_guard(spinlock_irqsave, &ad->pq_lock) list_add_tail(&rq->queuelist, &ad->prio_queue); return; } + guard(spinlock_irqsave)(&ad->lock); + if (blk_mq_sched_try_insert_merge(q, rq, free)) return; @@ -637,18 +666,14 @@ static void insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, static void adios_insert_requests(struct blk_mq_hw_ctx *hctx, struct list_head *list, blk_insert_t insert_flags) { - struct request_queue *q = hctx->queue; - struct adios_data *ad = q->elevator->elevator_data; + struct request *rq; LIST_HEAD(free); - scoped_guard(spinlock_irqsave, &ad->lock) - while (!list_empty(list)) { - struct request *rq; - - rq = list_first_entry(list, struct request, queuelist); - list_del_init(&rq->queuelist); - insert_request(hctx, rq, insert_flags, &free); - } + while (!list_empty(list)) { + rq = list_first_entry(list, struct request, queuelist); + list_del_init(&rq->queuelist); + insert_request(hctx, rq, insert_flags, &free); + } blk_mq_free_requests(&free); } @@ -849,7 +874,7 @@ static void update_timer_callback(struct timer_list *t) { struct adios_data *ad = from_timer(ad, t, update_timer); for (u8 optype = 0; optype < ADIOS_OPTYPES; optype++) - latency_model_update(&ad->latency_model[optype]); + latency_model_update(ad, &ad->latency_model[optype]); } // Handle the completion of a request @@ -863,7 +888,7 @@ static void adios_completed_request(struct request *rq, u64 now) { return; u64 latency = now - rq->io_start_time_ns; u8 optype = adios_optype(rq); - latency_model_input(&ad->latency_model[optype], + latency_model_input(ad, &ad->latency_model[optype], rd->block_size, latency, rd->pred_lat); timer_reduce(&ad->update_timer, jiffies + msecs_to_jiffies(100)); } @@ -917,6 +942,7 @@ static int adios_init_sched(struct request_queue *q, struct elevator_type *e) { struct adios_data *ad; struct elevator_queue *eq; int ret = -ENOMEM; + int cpu = 0; eq = elevator_alloc(q, e); if (!eq) @@ -957,21 +983,27 @@ static int adios_init_sched(struct request_queue *q, struct elevator_type *e) { for (u8 i = 0; i < 2; i++) ad->dl_prio[i] = default_dl_prio[i]; - for (u8 i = 0; i < ADIOS_OPTYPES; i++) { - struct latency_model *model = &ad->latency_model[i]; - spin_lock_init(&model->lock); - spin_lock_init(&model->buckets_lock); - memset(model->small_bucket, 0, - sizeof(model->small_bucket[0]) * LM_LAT_BUCKET_COUNT); - memset(model->large_bucket, 0, - sizeof(model->large_bucket[0]) * LM_LAT_BUCKET_COUNT); + ad->aggr_buckets = kzalloc(sizeof(*ad->aggr_buckets), GFP_KERNEL); + if (!ad->aggr_buckets) { + pr_err("adios: Failed to allocate aggregation buckets\n"); + goto destroy_dl_group_pool; + } + + for (cpu = 0; cpu < ADIOS_OPTYPES; cpu++) { + struct latency_model *model = &ad->latency_model[cpu]; + seqlock_init(&model->lock); + + model->pcpu_buckets = alloc_percpu(struct per_cpu_lm_buckets); + if (!model->pcpu_buckets) + goto free_buckets; + model->last_update_jiffies = jiffies; model->lm_shrink_at_kreqs = default_lm_shrink_at_kreqs; model->lm_shrink_at_gbytes = default_lm_shrink_at_gbytes; model->lm_shrink_resist = default_lm_shrink_resist; - ad->latency_target[i] = default_latency_target[i]; - ad->batch_limit[i] = default_batch_limit[i]; + ad->latency_target[cpu] = default_latency_target[cpu]; + ad->batch_limit[cpu] = default_batch_limit[cpu]; } timer_setup(&ad->update_timer, update_timer_callback, 0); init_batch_queues(ad); @@ -989,6 +1021,15 @@ static int adios_init_sched(struct request_queue *q, struct elevator_type *e) { q->elevator = eq; return 0; +free_buckets: + pr_err("adios: Failed to allocate per-cpu buckets\n"); + while (--cpu >= 0) { + struct latency_model *prev_model = &ad->latency_model[cpu]; + free_percpu(prev_model->pcpu_buckets); + } + kfree(ad->aggr_buckets); +destroy_dl_group_pool: + kmem_cache_destroy(ad->dl_group_pool); destroy_rq_data_pool: kmem_cache_destroy(ad->rq_data_pool); free_ad: @@ -1006,6 +1047,12 @@ static void adios_exit_sched(struct elevator_queue *e) { WARN_ON_ONCE(!list_empty(&ad->prio_queue)); + for (u8 i = 0; i < ADIOS_OPTYPES; i++) { + struct latency_model *model = &ad->latency_model[i]; + free_percpu(model->pcpu_buckets); + } + kfree(ad->aggr_buckets); + if (ad->rq_data_pool) kmem_cache_destroy(ad->rq_data_pool); @@ -1024,9 +1071,15 @@ static ssize_t adios_lat_model_##name##_show( \ struct adios_data *ad = e->elevator_data; \ struct latency_model *model = &ad->latency_model[optype]; \ ssize_t len = 0; \ - guard(spinlock_irqsave)(&model->lock); \ - len += sprintf(page, "base : %llu ns\n", model->base); \ - len += sprintf(page + len, "slope: %llu ns/KiB\n", model->slope);\ + u64 base, slope; \ + unsigned int seq; \ + do { \ + seq = read_seqbegin(&model->lock); \ + base = model->base; \ + slope = model->slope; \ + } while (read_seqretry(&model->lock, seq)); \ + len += sprintf(page, "base : %llu ns\n", base); \ + len += sprintf(page + len, "slope: %llu ns/KiB\n", slope);\ return len; \ } \ static ssize_t adios_lat_target_##name##_store( \ @@ -1186,14 +1239,14 @@ static ssize_t adios_reset_lat_model_store( for (u8 i = 0; i < ADIOS_OPTYPES; i++) { struct latency_model *model = &ad->latency_model[i]; - scoped_guard(spinlock_irqsave, &model->lock) { - model->base = 0ULL; - model->slope = 0ULL; - model->small_sum_delay = 0ULL; - model->small_count = 0ULL; - model->large_sum_delay = 0ULL; - model->large_sum_bsize = 0ULL; - } + write_seqlock_bh(&model->lock); + model->base = 0ULL; + model->slope = 0ULL; + model->small_sum_delay = 0ULL; + model->small_count = 0ULL; + model->large_sum_delay = 0ULL; + model->large_sum_bsize = 0ULL; + write_sequnlock_bh(&model->lock); } return count; @@ -1216,8 +1269,9 @@ static ssize_t adios_shrink_##name##_store( \ return -EINVAL; \ for (u8 i = 0; i < ADIOS_OPTYPES; i++) { \ struct latency_model *model = &ad->latency_model[i]; \ - scoped_guard(spinlock_irqsave, &model->lock) \ - model->model_field = val; \ + write_seqlock_bh(&model->lock); \ + model->model_field = val; \ + write_sequnlock_bh(&model->lock); \ } \ return count; \ } \ @@ -1225,11 +1279,12 @@ static ssize_t adios_shrink_##name##_show( \ struct elevator_queue *e, char *page) { \ struct adios_data *ad = e->elevator_data; \ u32 val = 0; \ - for (u8 i = 0; i < ADIOS_OPTYPES; i++) { \ - struct latency_model *model = &ad->latency_model[i]; \ - scoped_guard(spinlock_irqsave, &model->lock) \ - val = model->model_field; \ - } \ + unsigned int seq; \ + struct latency_model *model = &ad->latency_model[0]; \ + do { \ + seq = read_seqbegin(&model->lock); \ + val = model->model_field; \ + } while (read_seqretry(&model->lock, seq)); \ return sprintf(page, "%u\n", val); \ } @@ -1243,9 +1298,9 @@ SHRINK_THRESHOLD_ATTR_RW(resist, lm_shrink_resist, 1, 3) #define AD_ATTR_RW(name) \ __ATTR(name, 0644, adios_##name##_show, adios_##name##_store) #define AD_ATTR_RO(name) \ - __ATTR(name, 0644, adios_##name##_show, NULL) + __ATTR(name, 0444, adios_##name##_show, NULL) #define AD_ATTR_WO(name) \ - __ATTR(name, 0644, NULL, adios_##name##_store) + __ATTR(name, 0200, NULL, adios_##name##_store) // Define sysfs attributes for ADIOS scheduler static struct elv_fs_entry adios_sched_attrs[] = { From 78d50e2cd2f862da4f87722eb4541e3c8ab5d25c Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 5 Mar 2025 14:05:51 +0100 Subject: [PATCH 2900/2924] drm/sched: Document run_job() refcount hazard drm_sched_backend_ops.run_job() returns a dma_fence for the scheduler. That fence is signalled by the driver once the hardware completed the associated job. The scheduler does not increment the reference count on that fence, but implicitly expects to inherit this fence from run_job(). This is relatively subtle and prone to misunderstandings. This implies that, to keep a reference for itself, a driver needs to call dma_fence_get() in addition to dma_fence_init() in that callback. It's further complicated by the fact that the scheduler even decrements the refcount in drm_sched_run_job_work() since it created a new reference in drm_sched_fence_scheduled(). It does, however, still use its pointer to the fence after calling dma_fence_put() - which is safe because of the aforementioned new reference, but actually still violates the refcounting rules. Move the call to dma_fence_put() to the position behind the last usage of the fence. Suggested-by: Danilo Krummrich Signed-off-by: Philipp Stanner Reviewed-by: Danilo Krummrich Signed-off-by: Philipp Stanner Link: https://patchwork.freedesktop.org/patch/msgid/20250305130551.136682-4-phasta@kernel.org --- drivers/gpu/drm/scheduler/sched_main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index bfea608a7106e2..53e6aec37b4666 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -1220,20 +1220,23 @@ static void drm_sched_run_job_work(struct work_struct *w) drm_sched_job_begin(sched_job); trace_drm_run_job(sched_job, entity); + /* + * The run_job() callback must by definition return a fence whose + * refcount has been incremented for the scheduler already. + */ fence = sched->ops->run_job(sched_job); complete_all(&entity->entity_idle); drm_sched_fence_scheduled(s_fence, fence); if (!IS_ERR_OR_NULL(fence)) { - /* Drop for original kref_init of the fence */ - dma_fence_put(fence); - r = dma_fence_add_callback(fence, &sched_job->cb, drm_sched_job_done_cb); if (r == -ENOENT) drm_sched_job_done(sched_job, fence->error); else if (r) DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n", r); + + dma_fence_put(fence); } else { drm_sched_job_done(sched_job, IS_ERR(fence) ? PTR_ERR(fence) : 0); From a59418e9de260551b6c7553f826112ac0d27b2ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 12 Mar 2025 14:44:00 +0100 Subject: [PATCH 2901/2924] drm/sched: revert "drm_sched_job_cleanup(): correct false doc" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 44d2f310f008613c1dbe5e234c2cf2be90cbbfab. The function drm_sched_job_arm() is indeed the point of no return. The background is that it is nearly impossible for the driver to correctly retract the fence and signal it in the order enforced by the dma_fence framework. The code in drm_sched_job_cleanup() is for the purpose to cleanup after the job was armed through drm_sched_job_arm() *and* processed by the scheduler. We can certainly improve the documentation, but removing the warning is clearly not a good idea. Signed-off-by: Christian König Signed-off-by: Philipp Stanner Link: https://patchwork.freedesktop.org/patch/msgid/20250312134400.2176393-1-christian.koenig@amd.com --- drivers/gpu/drm/scheduler/sched_main.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 53e6aec37b4666..4d4219fbe49de6 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -1015,13 +1015,11 @@ EXPORT_SYMBOL(drm_sched_job_has_dependency); * Cleans up the resources allocated with drm_sched_job_init(). * * Drivers should call this from their error unwind code if @job is aborted - * before it was submitted to an entity with drm_sched_entity_push_job(). + * before drm_sched_job_arm() is called. * - * Since calling drm_sched_job_arm() causes the job's fences to be initialized, - * it is up to the driver to ensure that fences that were exposed to external - * parties get signaled. drm_sched_job_cleanup() does not ensure this. - * - * This function must also be called in &struct drm_sched_backend_ops.free_job + * After that point of no return @job is committed to be executed by the + * scheduler, and this function should be called from the + * &drm_sched_backend_ops.free_job callback. */ void drm_sched_job_cleanup(struct drm_sched_job *job) { @@ -1032,7 +1030,7 @@ void drm_sched_job_cleanup(struct drm_sched_job *job) /* drm_sched_job_arm() has been called */ dma_fence_put(&job->s_fence->finished); } else { - /* aborted job before arming */ + /* aborted job before committing to run it */ drm_sched_fence_free(job->s_fence); } From 8179f2c4ec006ba09c36e6362fa9345f89b72c9b Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Thu, 13 Mar 2025 10:30:54 +0100 Subject: [PATCH 2902/2924] drm/sched: Clarify docu concerning drm_sched_job_arm() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The documentation for drm_sched_job_arm() and especially drm_sched_job_cleanup() does not make it very clear why drm_sched_job_arm() is a point of no return, which it indeed is. Make the nature of drm_sched_job_arm() in the docu as clear as possible. Suggested-by: Christian König Reviewed-by: Christian König Signed-off-by: Philipp Stanner Link: https://patchwork.freedesktop.org/patch/msgid/20250313093053.65001-2-phasta@kernel.org --- drivers/gpu/drm/scheduler/sched_main.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 4d4219fbe49de6..829579c41c6b5d 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -828,11 +828,15 @@ EXPORT_SYMBOL(drm_sched_job_init); * * This arms a scheduler job for execution. Specifically it initializes the * &drm_sched_job.s_fence of @job, so that it can be attached to struct dma_resv - * or other places that need to track the completion of this job. + * or other places that need to track the completion of this job. It also + * initializes sequence numbers, which are fundamental for fence ordering. * * Refer to drm_sched_entity_push_job() documentation for locking * considerations. * + * Once this function was called, you *must* submit @job with + * drm_sched_entity_push_job(). + * * This can only be called if drm_sched_job_init() succeeded. */ void drm_sched_job_arm(struct drm_sched_job *job) @@ -1017,9 +1021,12 @@ EXPORT_SYMBOL(drm_sched_job_has_dependency); * Drivers should call this from their error unwind code if @job is aborted * before drm_sched_job_arm() is called. * - * After that point of no return @job is committed to be executed by the - * scheduler, and this function should be called from the - * &drm_sched_backend_ops.free_job callback. + * drm_sched_job_arm() is a point of no return since it initializes the fences + * and their sequence number etc. Once that function has been called, you *must* + * submit it with drm_sched_entity_push_job() and cannot simply abort it by + * calling drm_sched_job_cleanup(). + * + * This function should be called in the &drm_sched_backend_ops.free_job callback. */ void drm_sched_job_cleanup(struct drm_sched_job *job) { @@ -1027,10 +1034,15 @@ void drm_sched_job_cleanup(struct drm_sched_job *job) unsigned long index; if (kref_read(&job->s_fence->finished.refcount)) { - /* drm_sched_job_arm() has been called */ + /* The job has been processed by the scheduler, i.e., + * drm_sched_job_arm() and drm_sched_entity_push_job() have + * been called. + */ dma_fence_put(&job->s_fence->finished); } else { - /* aborted job before committing to run it */ + /* The job was aborted before it has been committed to be run; + * notably, drm_sched_job_arm() has not been called. + */ drm_sched_fence_free(job->s_fence); } From 4c44a1a8c6e36be6a1259017d841d9644889ba94 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 24 Mar 2025 09:26:29 +0000 Subject: [PATCH 2903/2924] drm/sched: Add scheduler unit testing infrastructure and some basic tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement a mock scheduler backend and add some basic test to exercise the core scheduler code paths. Mock backend (kind of like a very simple mock GPU) can either process jobs by tests manually advancing the "timeline" job at a time, or alternatively jobs can be configured with a time duration in which case they get completed asynchronously from the unit test code. Core scheduler classes are subclassed to support this mock implementation. The tests added are just a few simple submission patterns. Signed-off-by: Tvrtko Ursulin Suggested-by: Philipp Stanner Cc: Christian König Cc: Danilo Krummrich Cc: Matthew Brost Cc: Philipp Stanner Acked-by: Christian König Signed-off-by: Philipp Stanner Link: https://patchwork.freedesktop.org/patch/msgid/20250324092633.49746-3-tvrtko.ursulin@igalia.com --- drivers/gpu/drm/Kconfig.debug | 116 ++++++ drivers/gpu/drm/scheduler/.kunitconfig | 12 + drivers/gpu/drm/scheduler/Makefile | 2 + drivers/gpu/drm/scheduler/tests/Makefile | 7 + .../gpu/drm/scheduler/tests/mock_scheduler.c | 354 ++++++++++++++++++ drivers/gpu/drm/scheduler/tests/sched_tests.h | 224 +++++++++++ drivers/gpu/drm/scheduler/tests/tests_basic.c | 198 ++++++++++ 7 files changed, 913 insertions(+) create mode 100644 drivers/gpu/drm/Kconfig.debug create mode 100644 drivers/gpu/drm/scheduler/.kunitconfig create mode 100644 drivers/gpu/drm/scheduler/tests/Makefile create mode 100644 drivers/gpu/drm/scheduler/tests/mock_scheduler.c create mode 100644 drivers/gpu/drm/scheduler/tests/sched_tests.h create mode 100644 drivers/gpu/drm/scheduler/tests/tests_basic.c diff --git a/drivers/gpu/drm/Kconfig.debug b/drivers/gpu/drm/Kconfig.debug new file mode 100644 index 00000000000000..c493743e8aca0e --- /dev/null +++ b/drivers/gpu/drm/Kconfig.debug @@ -0,0 +1,116 @@ +config DRM_USE_DYNAMIC_DEBUG + bool "use dynamic debug to implement drm.debug" + default n + depends on BROKEN + depends on DRM + depends on DYNAMIC_DEBUG || DYNAMIC_DEBUG_CORE + depends on JUMP_LABEL + help + Use dynamic-debug to avoid drm_debug_enabled() runtime overheads. + Due to callsite counts in DRM drivers (~4k in amdgpu) and 56 + bytes per callsite, the .data costs can be substantial, and + are therefore configurable. + +config DRM_WERROR + bool "Compile the drm subsystem with warnings as errors" + depends on DRM && EXPERT + depends on !WERROR + default n + help + A kernel build should not cause any compiler warnings, and this + enables the '-Werror' flag to enforce that rule in the drm subsystem. + + The drm subsystem enables more warnings than the kernel default, so + this config option is disabled by default. + + If in doubt, say N. + +config DRM_HEADER_TEST + bool "Ensure DRM headers are self-contained and pass kernel-doc" + depends on DRM && EXPERT + default n + help + Ensure the DRM subsystem headers both under drivers/gpu/drm and + include/drm compile, are self-contained, have header guards, and have + no kernel-doc warnings. + + If in doubt, say N. + +config DRM_DEBUG_MM + bool "Insert extra checks and debug info into the DRM range managers" + default n + depends on DRM + depends on STACKTRACE_SUPPORT + select STACKDEPOT + help + Enable allocation tracking of memory manager and leak detection on + shutdown. + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_KUNIT_TEST_HELPERS + tristate + depends on DRM && KUNIT + select DRM_KMS_HELPER + help + KUnit Helpers for KMS drivers. + +config DRM_KUNIT_TEST + tristate "KUnit tests for DRM" if !KUNIT_ALL_TESTS + depends on DRM && KUNIT && MMU + select DRM_BRIDGE_CONNECTOR + select DRM_BUDDY + select DRM_DISPLAY_DP_HELPER + select DRM_DISPLAY_HDMI_STATE_HELPER + select DRM_DISPLAY_HELPER + select DRM_EXEC + select DRM_EXPORT_FOR_TESTS if m + select DRM_GEM_SHMEM_HELPER + select DRM_KUNIT_TEST_HELPERS + select DRM_LIB_RANDOM + select PRIME_NUMBERS + default KUNIT_ALL_TESTS + help + This builds unit tests for DRM. This option is not useful for + distributions or general kernels, but only for kernel + developers working on DRM and associated drivers. + + For more information on KUnit and unit tests in general, + please refer to the KUnit documentation in + Documentation/dev-tools/kunit/. + + If in doubt, say "N". + +config DRM_TTM_KUNIT_TEST + tristate "KUnit tests for TTM" if !KUNIT_ALL_TESTS + default n + depends on DRM && KUNIT && MMU && (UML || COMPILE_TEST) + select DRM_TTM + select DRM_BUDDY + select DRM_EXPORT_FOR_TESTS if m + select DRM_KUNIT_TEST_HELPERS + default KUNIT_ALL_TESTS + help + Enables unit tests for TTM, a GPU memory manager subsystem used + to manage memory buffers. This option is mostly useful for kernel + developers. It depends on (UML || COMPILE_TEST) since no other driver + which uses TTM can be loaded while running the tests. + + If in doubt, say "N". + +config DRM_SCHED_KUNIT_TEST + tristate "KUnit tests for the DRM scheduler" if !KUNIT_ALL_TESTS + select DRM_SCHED + depends on DRM && KUNIT + default KUNIT_ALL_TESTS + help + Choose this option to build unit tests for the DRM scheduler. + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_EXPORT_FOR_TESTS + bool diff --git a/drivers/gpu/drm/scheduler/.kunitconfig b/drivers/gpu/drm/scheduler/.kunitconfig new file mode 100644 index 00000000000000..cece53609fcf66 --- /dev/null +++ b/drivers/gpu/drm/scheduler/.kunitconfig @@ -0,0 +1,12 @@ +CONFIG_KUNIT=y +CONFIG_DRM=y +CONFIG_DRM_SCHED_KUNIT_TEST=y +CONFIG_EXPERT=y +CONFIG_DEBUG_SPINLOCK=y +CONFIG_DEBUG_MUTEXES=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_LOCK_DEBUGGING_SUPPORT=y +CONFIG_PROVE_LOCKING=y +CONFIG_LOCKDEP=y +CONFIG_DEBUG_LOCKDEP=y +CONFIG_DEBUG_LIST=y diff --git a/drivers/gpu/drm/scheduler/Makefile b/drivers/gpu/drm/scheduler/Makefile index 53863621829f14..6e13e4c63e9d87 100644 --- a/drivers/gpu/drm/scheduler/Makefile +++ b/drivers/gpu/drm/scheduler/Makefile @@ -23,3 +23,5 @@ gpu-sched-y := sched_main.o sched_fence.o sched_entity.o obj-$(CONFIG_DRM_SCHED) += gpu-sched.o + +obj-$(CONFIG_DRM_SCHED_KUNIT_TEST) += tests/ diff --git a/drivers/gpu/drm/scheduler/tests/Makefile b/drivers/gpu/drm/scheduler/tests/Makefile new file mode 100644 index 00000000000000..5bf707bad37344 --- /dev/null +++ b/drivers/gpu/drm/scheduler/tests/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 + +drm-sched-tests-y := \ + mock_scheduler.o \ + tests_basic.o + +obj-$(CONFIG_DRM_SCHED_KUNIT_TEST) += drm-sched-tests.o diff --git a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c new file mode 100644 index 00000000000000..d039f873cc118a --- /dev/null +++ b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c @@ -0,0 +1,354 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Valve Corporation */ + +#include "sched_tests.h" + +/* + * Here we implement the mock "GPU" (or the scheduler backend) which is used by + * the DRM scheduler unit tests in order to exercise the core functionality. + * + * Test cases are implemented in a separate file. + */ + +/** + * drm_mock_sched_entity_new - Create a new mock scheduler entity + * + * @test: KUnit test owning the entity + * @priority: Scheduling priority + * @sched: Mock scheduler on which the entity can be scheduled + * + * Returns: New mock scheduler entity with allocation managed by the test + */ +struct drm_mock_sched_entity * +drm_mock_sched_entity_new(struct kunit *test, + enum drm_sched_priority priority, + struct drm_mock_scheduler *sched) +{ + struct drm_mock_sched_entity *entity; + struct drm_gpu_scheduler *drm_sched; + int ret; + + entity = kunit_kzalloc(test, sizeof(*entity), GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, entity); + + drm_sched = &sched->base; + ret = drm_sched_entity_init(&entity->base, + priority, + &drm_sched, 1, + NULL); + KUNIT_ASSERT_EQ(test, ret, 0); + + entity->test = test; + + return entity; +} + +/** + * drm_mock_sched_entity_free - Destroys a mock scheduler entity + * + * @entity: Entity to destroy + * + * To be used from the test cases once done with the entity. + */ +void drm_mock_sched_entity_free(struct drm_mock_sched_entity *entity) +{ + drm_sched_entity_destroy(&entity->base); +} + +static void drm_mock_sched_job_complete(struct drm_mock_sched_job *job) +{ + struct drm_mock_scheduler *sched = + drm_sched_to_mock_sched(job->base.sched); + + lockdep_assert_held(&sched->lock); + + job->flags |= DRM_MOCK_SCHED_JOB_DONE; + list_move_tail(&job->link, &sched->done_list); + dma_fence_signal(&job->hw_fence); + complete(&job->done); +} + +static enum hrtimer_restart +drm_mock_sched_job_signal_timer(struct hrtimer *hrtimer) +{ + struct drm_mock_sched_job *job = + container_of(hrtimer, typeof(*job), timer); + struct drm_mock_scheduler *sched = + drm_sched_to_mock_sched(job->base.sched); + struct drm_mock_sched_job *next; + ktime_t now = ktime_get(); + unsigned long flags; + LIST_HEAD(signal); + + spin_lock_irqsave(&sched->lock, flags); + list_for_each_entry_safe(job, next, &sched->job_list, link) { + if (!job->duration_us) + break; + + if (ktime_before(now, job->finish_at)) + break; + + sched->hw_timeline.cur_seqno = job->hw_fence.seqno; + drm_mock_sched_job_complete(job); + } + spin_unlock_irqrestore(&sched->lock, flags); + + return HRTIMER_NORESTART; +} + +/** + * drm_mock_sched_job_new - Create a new mock scheduler job + * + * @test: KUnit test owning the job + * @entity: Scheduler entity of the job + * + * Returns: New mock scheduler job with allocation managed by the test + */ +struct drm_mock_sched_job * +drm_mock_sched_job_new(struct kunit *test, + struct drm_mock_sched_entity *entity) +{ + struct drm_mock_sched_job *job; + int ret; + + job = kunit_kzalloc(test, sizeof(*job), GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, job); + + ret = drm_sched_job_init(&job->base, + &entity->base, + 1, + NULL); + KUNIT_ASSERT_EQ(test, ret, 0); + + job->test = test; + + init_completion(&job->done); + spin_lock_init(&job->lock); + INIT_LIST_HEAD(&job->link); + hrtimer_init(&job->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + job->timer.function = drm_mock_sched_job_signal_timer; + + return job; +} + +static const char *drm_mock_sched_hw_fence_driver_name(struct dma_fence *fence) +{ + return "drm_mock_sched"; +} + +static const char * +drm_mock_sched_hw_fence_timeline_name(struct dma_fence *fence) +{ + struct drm_mock_sched_job *job = + container_of(fence, typeof(*job), hw_fence); + + return (const char *)job->base.sched->name; +} + +static void drm_mock_sched_hw_fence_release(struct dma_fence *fence) +{ + struct drm_mock_sched_job *job = + container_of(fence, typeof(*job), hw_fence); + + hrtimer_cancel(&job->timer); + + /* Containing job is freed by the kunit framework */ +} + +static const struct dma_fence_ops drm_mock_sched_hw_fence_ops = { + .get_driver_name = drm_mock_sched_hw_fence_driver_name, + .get_timeline_name = drm_mock_sched_hw_fence_timeline_name, + .release = drm_mock_sched_hw_fence_release, +}; + +static struct dma_fence *mock_sched_run_job(struct drm_sched_job *sched_job) +{ + struct drm_mock_scheduler *sched = + drm_sched_to_mock_sched(sched_job->sched); + struct drm_mock_sched_job *job = drm_sched_job_to_mock_job(sched_job); + + dma_fence_init(&job->hw_fence, + &drm_mock_sched_hw_fence_ops, + &job->lock, + sched->hw_timeline.context, + atomic_inc_return(&sched->hw_timeline.next_seqno)); + + dma_fence_get(&job->hw_fence); /* Reference for the job_list */ + + spin_lock_irq(&sched->lock); + if (job->duration_us) { + ktime_t prev_finish_at = 0; + + if (!list_empty(&sched->job_list)) { + struct drm_mock_sched_job *prev = + list_last_entry(&sched->job_list, typeof(*prev), + link); + + prev_finish_at = prev->finish_at; + } + + if (!prev_finish_at) + prev_finish_at = ktime_get(); + + job->finish_at = ktime_add_us(prev_finish_at, job->duration_us); + } + list_add_tail(&job->link, &sched->job_list); + if (job->finish_at) + hrtimer_start(&job->timer, job->finish_at, HRTIMER_MODE_ABS); + spin_unlock_irq(&sched->lock); + + return &job->hw_fence; +} + +static enum drm_gpu_sched_stat +mock_sched_timedout_job(struct drm_sched_job *sched_job) +{ + return DRM_GPU_SCHED_STAT_ENODEV; +} + +static void mock_sched_free_job(struct drm_sched_job *sched_job) +{ + struct drm_mock_scheduler *sched = + drm_sched_to_mock_sched(sched_job->sched); + struct drm_mock_sched_job *job = drm_sched_job_to_mock_job(sched_job); + unsigned long flags; + + /* Remove from the scheduler done list. */ + spin_lock_irqsave(&sched->lock, flags); + list_del(&job->link); + spin_unlock_irqrestore(&sched->lock, flags); + dma_fence_put(&job->hw_fence); + + drm_sched_job_cleanup(sched_job); + + /* Mock job itself is freed by the kunit framework. */ +} + +static const struct drm_sched_backend_ops drm_mock_scheduler_ops = { + .run_job = mock_sched_run_job, + .timedout_job = mock_sched_timedout_job, + .free_job = mock_sched_free_job +}; + +/** + * drm_mock_sched_new - Create a new mock scheduler + * + * @test: KUnit test owning the job + * + * Returns: New mock scheduler with allocation managed by the test + */ +struct drm_mock_scheduler *drm_mock_sched_new(struct kunit *test) +{ + struct drm_sched_init_args args = { + .ops = &drm_mock_scheduler_ops, + .num_rqs = DRM_SCHED_PRIORITY_COUNT, + .credit_limit = U32_MAX, + .hang_limit = 1, + .timeout = MAX_SCHEDULE_TIMEOUT, + .name = "drm-mock-scheduler", + }; + struct drm_mock_scheduler *sched; + int ret; + + sched = kunit_kzalloc(test, sizeof(*sched), GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, sched); + + ret = drm_sched_init(&sched->base, &args); + KUNIT_ASSERT_EQ(test, ret, 0); + + sched->test = test; + sched->hw_timeline.context = dma_fence_context_alloc(1); + atomic_set(&sched->hw_timeline.next_seqno, 0); + INIT_LIST_HEAD(&sched->job_list); + INIT_LIST_HEAD(&sched->done_list); + spin_lock_init(&sched->lock); + + return sched; +} + +/** + * drm_mock_sched_fini - Destroys a mock scheduler + * + * @sched: Scheduler to destroy + * + * To be used from the test cases once done with the scheduler. + */ +void drm_mock_sched_fini(struct drm_mock_scheduler *sched) +{ + struct drm_mock_sched_job *job, *next; + unsigned long flags; + LIST_HEAD(list); + + drm_sched_wqueue_stop(&sched->base); + + /* Force complete all unfinished jobs. */ + spin_lock_irqsave(&sched->lock, flags); + list_for_each_entry_safe(job, next, &sched->job_list, link) + list_move_tail(&job->link, &list); + spin_unlock_irqrestore(&sched->lock, flags); + + list_for_each_entry(job, &list, link) + hrtimer_cancel(&job->timer); + + spin_lock_irqsave(&sched->lock, flags); + list_for_each_entry_safe(job, next, &list, link) + drm_mock_sched_job_complete(job); + spin_unlock_irqrestore(&sched->lock, flags); + + /* + * Free completed jobs and jobs not yet processed by the DRM scheduler + * free worker. + */ + spin_lock_irqsave(&sched->lock, flags); + list_for_each_entry_safe(job, next, &sched->done_list, link) + list_move_tail(&job->link, &list); + spin_unlock_irqrestore(&sched->lock, flags); + + list_for_each_entry_safe(job, next, &list, link) + mock_sched_free_job(&job->base); + + drm_sched_fini(&sched->base); +} + +/** + * drm_mock_sched_advance - Advances the mock scheduler timeline + * + * @sched: Scheduler timeline to advance + * @num: By how many jobs to advance + * + * Advancing the scheduler timeline by a number of seqnos will trigger + * signalling of the hardware fences and unlinking the jobs from the internal + * scheduler tracking. + * + * This can be used from test cases which want complete control of the simulated + * job execution timing. For example submitting one job with no set duration + * would never complete it before test cases advances the timeline by one. + */ +unsigned int drm_mock_sched_advance(struct drm_mock_scheduler *sched, + unsigned int num) +{ + struct drm_mock_sched_job *job, *next; + unsigned int found = 0; + unsigned long flags; + LIST_HEAD(signal); + + spin_lock_irqsave(&sched->lock, flags); + if (WARN_ON_ONCE(sched->hw_timeline.cur_seqno + num < + sched->hw_timeline.cur_seqno)) + goto unlock; + sched->hw_timeline.cur_seqno += num; + list_for_each_entry_safe(job, next, &sched->job_list, link) { + if (sched->hw_timeline.cur_seqno < job->hw_fence.seqno) + break; + + drm_mock_sched_job_complete(job); + found++; + } +unlock: + spin_unlock_irqrestore(&sched->lock, flags); + + return found; +} + +MODULE_DESCRIPTION("DRM mock scheduler and tests"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/scheduler/tests/sched_tests.h b/drivers/gpu/drm/scheduler/tests/sched_tests.h new file mode 100644 index 00000000000000..31aaba3443fabd --- /dev/null +++ b/drivers/gpu/drm/scheduler/tests/sched_tests.h @@ -0,0 +1,224 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2025 Valve Corporation */ + +#ifndef _SCHED_TESTS_H_ +#define _SCHED_TESTS_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* + * DOC: Mock DRM scheduler data structures + * + * drm_mock_* data structures are used to implement a mock "GPU". + * + * They subclass the core DRM scheduler objects and add their data on top, which + * enables tracking the submitted jobs and simulating their execution with the + * attributes as specified by the test case. + */ + +/** + * struct drm_mock_scheduler - implements a trivial mock GPU execution engine + * + * @base: DRM scheduler base class + * @test: Backpointer to owning the kunit test case + * @lock: Lock to protect the simulated @hw_timeline, @job_list and @done_list + * @job_list: List of jobs submitted to the mock GPU + * @done_list: List of jobs completed by the mock GPU + * @hw_timeline: Simulated hardware timeline has a @context, @next_seqno and + * @cur_seqno for implementing a struct dma_fence signaling the + * simulated job completion. + * + * Trivial mock GPU execution engine tracks submitted jobs and enables + * completing them strictly in submission order. + */ +struct drm_mock_scheduler { + struct drm_gpu_scheduler base; + + struct kunit *test; + + spinlock_t lock; + struct list_head job_list; + struct list_head done_list; + + struct { + u64 context; + atomic_t next_seqno; + unsigned int cur_seqno; + } hw_timeline; +}; + +/** + * struct drm_mock_sched_entity - implements a mock GPU sched entity + * + * @base: DRM scheduler entity base class + * @test: Backpointer to owning the kunit test case + * + * Mock GPU sched entity is used by the test cases to submit jobs to the mock + * scheduler. + */ +struct drm_mock_sched_entity { + struct drm_sched_entity base; + + struct kunit *test; +}; + +/** + * struct drm_mock_sched_job - implements a mock GPU job + * + * @base: DRM sched job base class + * @done: Completion signaling job completion. + * @flags: Flags designating job state. + * @link: List head element used by job tracking by the drm_mock_scheduler + * @timer: Timer used for simulating job execution duration + * @duration_us: Simulated job duration in micro seconds, or zero if in manual + * timeline advance mode + * @finish_at: Absolute time when the jobs with set duration will complete + * @lock: Lock used for @hw_fence + * @hw_fence: Fence returned to DRM scheduler as the hardware fence + * @test: Backpointer to owning the kunit test case + * + * Mock GPU sched job is used by the test cases to submit jobs to the mock + * scheduler. + */ +struct drm_mock_sched_job { + struct drm_sched_job base; + + struct completion done; + +#define DRM_MOCK_SCHED_JOB_DONE 0x1 + unsigned long flags; + + struct list_head link; + struct hrtimer timer; + + unsigned int duration_us; + ktime_t finish_at; + + spinlock_t lock; + struct dma_fence hw_fence; + + struct kunit *test; +}; + +static inline struct drm_mock_scheduler * +drm_sched_to_mock_sched(struct drm_gpu_scheduler *sched) +{ + return container_of(sched, struct drm_mock_scheduler, base); +}; + +static inline struct drm_mock_sched_entity * +drm_sched_entity_to_mock_entity(struct drm_sched_entity *sched_entity) +{ + return container_of(sched_entity, struct drm_mock_sched_entity, base); +}; + +static inline struct drm_mock_sched_job * +drm_sched_job_to_mock_job(struct drm_sched_job *sched_job) +{ + return container_of(sched_job, struct drm_mock_sched_job, base); +}; + +struct drm_mock_scheduler *drm_mock_sched_new(struct kunit *test); +void drm_mock_sched_fini(struct drm_mock_scheduler *sched); +unsigned int drm_mock_sched_advance(struct drm_mock_scheduler *sched, + unsigned int num); + +struct drm_mock_sched_entity * +drm_mock_sched_entity_new(struct kunit *test, + enum drm_sched_priority priority, + struct drm_mock_scheduler *sched); +void drm_mock_sched_entity_free(struct drm_mock_sched_entity *entity); + +struct drm_mock_sched_job * +drm_mock_sched_job_new(struct kunit *test, + struct drm_mock_sched_entity *entity); + +/** + * drm_mock_sched_job_submit - Arm and submit a job in one go + * + * @job: Job to arm and submit + */ +static inline void drm_mock_sched_job_submit(struct drm_mock_sched_job *job) +{ + drm_sched_job_arm(&job->base); + drm_sched_entity_push_job(&job->base); +} + +/** + * drm_mock_sched_job_set_duration_us - Set a job duration + * + * @job: Job to set the duration for + * @duration_us: Duration in micro seconds + * + * Jobs with duration set will be automatically completed by the mock scheduler + * as the timeline progresses, unless a job without a set duration is + * encountered in the timelime in which case calling drm_mock_sched_advance() + * will be required to bump the timeline. + */ +static inline void +drm_mock_sched_job_set_duration_us(struct drm_mock_sched_job *job, + unsigned int duration_us) +{ + job->duration_us = duration_us; +} + +/** + * drm_mock_sched_job_is_finished - Check if a job is finished + * + * @job: Job to check + * + * Returns: true if finished + */ +static inline bool +drm_mock_sched_job_is_finished(struct drm_mock_sched_job *job) +{ + return job->flags & DRM_MOCK_SCHED_JOB_DONE; +} + +/** + * drm_mock_sched_job_wait_finished - Wait until a job is finished + * + * @job: Job to wait for + * @timeout: Wait time in jiffies + * + * Returns: true if finished within the timeout provided, otherwise false + */ +static inline bool +drm_mock_sched_job_wait_finished(struct drm_mock_sched_job *job, long timeout) +{ + if (job->flags & DRM_MOCK_SCHED_JOB_DONE) + return true; + + return wait_for_completion_timeout(&job->done, timeout) != 0; +} + +/** + * drm_mock_sched_job_wait_scheduled - Wait until a job is scheduled + * + * @job: Job to wait for + * @timeout: Wait time in jiffies + * + * Returns: true if scheduled within the timeout provided, otherwise false + */ +static inline bool +drm_mock_sched_job_wait_scheduled(struct drm_mock_sched_job *job, long timeout) +{ + KUNIT_ASSERT_EQ(job->test, job->flags & DRM_MOCK_SCHED_JOB_DONE, 0); + + return dma_fence_wait_timeout(&job->base.s_fence->scheduled, + false, + timeout) != 0; +} + +#endif diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c b/drivers/gpu/drm/scheduler/tests/tests_basic.c new file mode 100644 index 00000000000000..c06672e13cf610 --- /dev/null +++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Valve Corporation */ + +#include "sched_tests.h" + +/* + * DRM scheduler basic tests should check the basic functional correctness of + * the scheduler, including some very light smoke testing. More targeted tests, + * for example focusing on testing specific bugs and other more complicated test + * scenarios, should be implemented in separate source units. + */ + +static int drm_sched_basic_init(struct kunit *test) +{ + test->priv = drm_mock_sched_new(test); + + return 0; +} + +static void drm_sched_basic_exit(struct kunit *test) +{ + struct drm_mock_scheduler *sched = test->priv; + + drm_mock_sched_fini(sched); +} + +static void drm_sched_basic_submit(struct kunit *test) +{ + struct drm_mock_scheduler *sched = test->priv; + struct drm_mock_sched_entity *entity; + struct drm_mock_sched_job *job; + unsigned int i; + bool done; + + /* + * Submit one job to the scheduler and verify that it gets scheduled + * and completed only when the mock hw backend processes it. + */ + + entity = drm_mock_sched_entity_new(test, + DRM_SCHED_PRIORITY_NORMAL, + sched); + job = drm_mock_sched_job_new(test, entity); + + drm_mock_sched_job_submit(job); + + done = drm_mock_sched_job_wait_scheduled(job, HZ); + KUNIT_ASSERT_TRUE(test, done); + + done = drm_mock_sched_job_wait_finished(job, HZ / 2); + KUNIT_ASSERT_FALSE(test, done); + + i = drm_mock_sched_advance(sched, 1); + KUNIT_ASSERT_EQ(test, i, 1); + + done = drm_mock_sched_job_wait_finished(job, HZ); + KUNIT_ASSERT_TRUE(test, done); + + drm_mock_sched_entity_free(entity); +} + +struct drm_sched_basic_params { + const char *description; + unsigned int queue_depth; + unsigned int num_entities; + unsigned int job_us; + bool dep_chain; +}; + +static const struct drm_sched_basic_params drm_sched_basic_cases[] = { + { + .description = "A queue of jobs in a single entity", + .queue_depth = 100, + .job_us = 1000, + .num_entities = 1, + }, + { + .description = "A chain of dependent jobs across multiple entities", + .queue_depth = 100, + .job_us = 1000, + .num_entities = 1, + .dep_chain = true, + }, + { + .description = "Multiple independent job queues", + .queue_depth = 100, + .job_us = 1000, + .num_entities = 4, + }, + { + .description = "Multiple inter-dependent job queues", + .queue_depth = 100, + .job_us = 1000, + .num_entities = 4, + .dep_chain = true, + }, +}; + +static void +drm_sched_basic_desc(const struct drm_sched_basic_params *params, char *desc) +{ + strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE); +} + +KUNIT_ARRAY_PARAM(drm_sched_basic, drm_sched_basic_cases, drm_sched_basic_desc); + +static void drm_sched_basic_test(struct kunit *test) +{ + const struct drm_sched_basic_params *params = test->param_value; + struct drm_mock_scheduler *sched = test->priv; + struct drm_mock_sched_job *job, *prev = NULL; + struct drm_mock_sched_entity **entity; + unsigned int i, cur_ent = 0; + bool done; + + entity = kunit_kcalloc(test, params->num_entities, sizeof(*entity), + GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, entity); + + for (i = 0; i < params->num_entities; i++) + entity[i] = drm_mock_sched_entity_new(test, + DRM_SCHED_PRIORITY_NORMAL, + sched); + + for (i = 0; i < params->queue_depth; i++) { + job = drm_mock_sched_job_new(test, entity[cur_ent++]); + cur_ent %= params->num_entities; + drm_mock_sched_job_set_duration_us(job, params->job_us); + if (params->dep_chain && prev) + drm_sched_job_add_dependency(&job->base, + dma_fence_get(&prev->base.s_fence->finished)); + drm_mock_sched_job_submit(job); + prev = job; + } + + done = drm_mock_sched_job_wait_finished(job, HZ); + KUNIT_ASSERT_TRUE(test, done); + + for (i = 0; i < params->num_entities; i++) + drm_mock_sched_entity_free(entity[i]); +} + +static void drm_sched_basic_entity_cleanup(struct kunit *test) +{ + struct drm_mock_sched_job *job, *mid, *prev = NULL; + struct drm_mock_scheduler *sched = test->priv; + struct drm_mock_sched_entity *entity[4]; + const unsigned int qd = 100; + unsigned int i, cur_ent = 0; + bool done; + + /* + * Submit a queue of jobs across different entities with an explicit + * chain of dependencies between them and trigger entity cleanup while + * the queue is still being processed. + */ + + for (i = 0; i < ARRAY_SIZE(entity); i++) + entity[i] = drm_mock_sched_entity_new(test, + DRM_SCHED_PRIORITY_NORMAL, + sched); + + for (i = 0; i < qd; i++) { + job = drm_mock_sched_job_new(test, entity[cur_ent++]); + cur_ent %= ARRAY_SIZE(entity); + drm_mock_sched_job_set_duration_us(job, 1000); + if (prev) + drm_sched_job_add_dependency(&job->base, + dma_fence_get(&prev->base.s_fence->finished)); + drm_mock_sched_job_submit(job); + if (i == qd / 2) + mid = job; + prev = job; + } + + done = drm_mock_sched_job_wait_finished(mid, HZ); + KUNIT_ASSERT_TRUE(test, done); + + /* Exit with half of the queue still pending to be executed. */ + for (i = 0; i < ARRAY_SIZE(entity); i++) + drm_mock_sched_entity_free(entity[i]); +} + +static struct kunit_case drm_sched_basic_tests[] = { + KUNIT_CASE(drm_sched_basic_submit), + KUNIT_CASE_PARAM(drm_sched_basic_test, drm_sched_basic_gen_params), + KUNIT_CASE(drm_sched_basic_entity_cleanup), + {} +}; + +static struct kunit_suite drm_sched_basic = { + .name = "drm_sched_basic_tests", + .init = drm_sched_basic_init, + .exit = drm_sched_basic_exit, + .test_cases = drm_sched_basic_tests, +}; + +kunit_test_suite(drm_sched_basic); From 9259b0488ad2e6f498da4382093f0a87f1dd41e5 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 24 Mar 2025 09:26:30 +0000 Subject: [PATCH 2904/2924] drm/sched: Add a simple timeout test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a very simple timeout test which submits a single job and verifies that the timeout handling will run if the backend failed to complete the job in time. Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Danilo Krummrich Cc: Matthew Brost Cc: Philipp Stanner Acked-by: Christian König Signed-off-by: Philipp Stanner Link: https://patchwork.freedesktop.org/patch/msgid/20250324092633.49746-4-tvrtko.ursulin@igalia.com --- .../gpu/drm/scheduler/tests/mock_scheduler.c | 11 +++- drivers/gpu/drm/scheduler/tests/sched_tests.h | 4 +- drivers/gpu/drm/scheduler/tests/tests_basic.c | 64 ++++++++++++++++++- 3 files changed, 73 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c index d039f873cc118a..61efc96e6e41c3 100644 --- a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c +++ b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c @@ -203,7 +203,11 @@ static struct dma_fence *mock_sched_run_job(struct drm_sched_job *sched_job) static enum drm_gpu_sched_stat mock_sched_timedout_job(struct drm_sched_job *sched_job) { - return DRM_GPU_SCHED_STAT_ENODEV; + struct drm_mock_sched_job *job = drm_sched_job_to_mock_job(sched_job); + + job->flags |= DRM_MOCK_SCHED_JOB_TIMEDOUT; + + return DRM_GPU_SCHED_STAT_NOMINAL; } static void mock_sched_free_job(struct drm_sched_job *sched_job) @@ -234,17 +238,18 @@ static const struct drm_sched_backend_ops drm_mock_scheduler_ops = { * drm_mock_sched_new - Create a new mock scheduler * * @test: KUnit test owning the job + * @timeout: Job timeout to set * * Returns: New mock scheduler with allocation managed by the test */ -struct drm_mock_scheduler *drm_mock_sched_new(struct kunit *test) +struct drm_mock_scheduler *drm_mock_sched_new(struct kunit *test, long timeout) { struct drm_sched_init_args args = { .ops = &drm_mock_scheduler_ops, .num_rqs = DRM_SCHED_PRIORITY_COUNT, .credit_limit = U32_MAX, .hang_limit = 1, - .timeout = MAX_SCHEDULE_TIMEOUT, + .timeout = timeout, .name = "drm-mock-scheduler", }; struct drm_mock_scheduler *sched; diff --git a/drivers/gpu/drm/scheduler/tests/sched_tests.h b/drivers/gpu/drm/scheduler/tests/sched_tests.h index 31aaba3443fabd..27caf8285fb74b 100644 --- a/drivers/gpu/drm/scheduler/tests/sched_tests.h +++ b/drivers/gpu/drm/scheduler/tests/sched_tests.h @@ -97,6 +97,7 @@ struct drm_mock_sched_job { struct completion done; #define DRM_MOCK_SCHED_JOB_DONE 0x1 +#define DRM_MOCK_SCHED_JOB_TIMEDOUT 0x2 unsigned long flags; struct list_head link; @@ -129,7 +130,8 @@ drm_sched_job_to_mock_job(struct drm_sched_job *sched_job) return container_of(sched_job, struct drm_mock_sched_job, base); }; -struct drm_mock_scheduler *drm_mock_sched_new(struct kunit *test); +struct drm_mock_scheduler *drm_mock_sched_new(struct kunit *test, + long timeout); void drm_mock_sched_fini(struct drm_mock_scheduler *sched); unsigned int drm_mock_sched_advance(struct drm_mock_scheduler *sched, unsigned int num); diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c b/drivers/gpu/drm/scheduler/tests/tests_basic.c index c06672e13cf610..0e1fa4767b0d88 100644 --- a/drivers/gpu/drm/scheduler/tests/tests_basic.c +++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c @@ -12,7 +12,7 @@ static int drm_sched_basic_init(struct kunit *test) { - test->priv = drm_mock_sched_new(test); + test->priv = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT); return 0; } @@ -24,6 +24,13 @@ static void drm_sched_basic_exit(struct kunit *test) drm_mock_sched_fini(sched); } +static int drm_sched_timeout_init(struct kunit *test) +{ + test->priv = drm_mock_sched_new(test, HZ); + + return 0; +} + static void drm_sched_basic_submit(struct kunit *test) { struct drm_mock_scheduler *sched = test->priv; @@ -195,4 +202,57 @@ static struct kunit_suite drm_sched_basic = { .test_cases = drm_sched_basic_tests, }; -kunit_test_suite(drm_sched_basic); +static void drm_sched_basic_timeout(struct kunit *test) +{ + struct drm_mock_scheduler *sched = test->priv; + struct drm_mock_sched_entity *entity; + struct drm_mock_sched_job *job; + bool done; + + /* + * Submit a single job against a scheduler with the timeout configured + * and verify that the timeout handling will run if the backend fails + * to complete it in time. + */ + + entity = drm_mock_sched_entity_new(test, + DRM_SCHED_PRIORITY_NORMAL, + sched); + job = drm_mock_sched_job_new(test, entity); + + drm_mock_sched_job_submit(job); + + done = drm_mock_sched_job_wait_scheduled(job, HZ); + KUNIT_ASSERT_TRUE(test, done); + + done = drm_mock_sched_job_wait_finished(job, HZ / 2); + KUNIT_ASSERT_FALSE(test, done); + + KUNIT_ASSERT_EQ(test, + job->flags & DRM_MOCK_SCHED_JOB_TIMEDOUT, + 0); + + done = drm_mock_sched_job_wait_finished(job, HZ); + KUNIT_ASSERT_FALSE(test, done); + + KUNIT_ASSERT_EQ(test, + job->flags & DRM_MOCK_SCHED_JOB_TIMEDOUT, + DRM_MOCK_SCHED_JOB_TIMEDOUT); + + drm_mock_sched_entity_free(entity); +} + +static struct kunit_case drm_sched_timeout_tests[] = { + KUNIT_CASE(drm_sched_basic_timeout), + {} +}; + +static struct kunit_suite drm_sched_timeout = { + .name = "drm_sched_basic_timeout_tests", + .init = drm_sched_timeout_init, + .exit = drm_sched_basic_exit, + .test_cases = drm_sched_timeout_tests, +}; + +kunit_test_suites(&drm_sched_basic, + &drm_sched_timeout); From 862f5a57539fcd5658983aea1766e6f3fd1fe16d Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 24 Mar 2025 09:26:31 +0000 Subject: [PATCH 2905/2924] drm/sched: Add basic priority tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add some basic tests for exercising entity priority handling. Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Danilo Krummrich Cc: Matthew Brost Cc: Philipp Stanner Acked-by: Christian König Signed-off-by: Philipp Stanner Link: https://patchwork.freedesktop.org/patch/msgid/20250324092633.49746-5-tvrtko.ursulin@igalia.com --- drivers/gpu/drm/scheduler/tests/tests_basic.c | 95 ++++++++++++++++++- 1 file changed, 94 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c b/drivers/gpu/drm/scheduler/tests/tests_basic.c index 0e1fa4767b0d88..10378b7ca457de 100644 --- a/drivers/gpu/drm/scheduler/tests/tests_basic.c +++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2025 Valve Corporation */ +#include + #include "sched_tests.h" /* @@ -254,5 +256,96 @@ static struct kunit_suite drm_sched_timeout = { .test_cases = drm_sched_timeout_tests, }; +static void drm_sched_priorities(struct kunit *test) +{ + struct drm_mock_sched_entity *entity[DRM_SCHED_PRIORITY_COUNT]; + struct drm_mock_scheduler *sched = test->priv; + struct drm_mock_sched_job *job; + const unsigned int qd = 100; + unsigned int i, cur_ent = 0; + enum drm_sched_priority p; + bool done; + + /* + * Submit a bunch of jobs against entities configured with different + * priorities. + */ + + BUILD_BUG_ON(DRM_SCHED_PRIORITY_KERNEL > DRM_SCHED_PRIORITY_LOW); + BUILD_BUG_ON(ARRAY_SIZE(entity) != DRM_SCHED_PRIORITY_COUNT); + + for (p = DRM_SCHED_PRIORITY_KERNEL; p <= DRM_SCHED_PRIORITY_LOW; p++) + entity[p] = drm_mock_sched_entity_new(test, p, sched); + + for (i = 0; i < qd; i++) { + job = drm_mock_sched_job_new(test, entity[cur_ent++]); + cur_ent %= ARRAY_SIZE(entity); + drm_mock_sched_job_set_duration_us(job, 1000); + drm_mock_sched_job_submit(job); + } + + done = drm_mock_sched_job_wait_finished(job, HZ); + KUNIT_ASSERT_TRUE(test, done); + + for (i = 0; i < ARRAY_SIZE(entity); i++) + drm_mock_sched_entity_free(entity[i]); +} + +static void drm_sched_change_priority(struct kunit *test) +{ + struct drm_mock_sched_entity *entity[DRM_SCHED_PRIORITY_COUNT]; + struct drm_mock_scheduler *sched = test->priv; + struct drm_mock_sched_job *job; + const unsigned int qd = 1000; + unsigned int i, cur_ent = 0; + enum drm_sched_priority p; + + /* + * Submit a bunch of jobs against entities configured with different + * priorities and while waiting for them to complete, periodically keep + * changing their priorities. + * + * We set up the queue-depth (qd) and job duration so the priority + * changing loop has some time to interact with submissions to the + * backend and job completions as they progress. + */ + + for (p = DRM_SCHED_PRIORITY_KERNEL; p <= DRM_SCHED_PRIORITY_LOW; p++) + entity[p] = drm_mock_sched_entity_new(test, p, sched); + + for (i = 0; i < qd; i++) { + job = drm_mock_sched_job_new(test, entity[cur_ent++]); + cur_ent %= ARRAY_SIZE(entity); + drm_mock_sched_job_set_duration_us(job, 1000); + drm_mock_sched_job_submit(job); + } + + do { + drm_sched_entity_set_priority(&entity[cur_ent]->base, + (entity[cur_ent]->base.priority + 1) % + DRM_SCHED_PRIORITY_COUNT); + cur_ent++; + cur_ent %= ARRAY_SIZE(entity); + usleep_range(200, 500); + } while (!drm_mock_sched_job_is_finished(job)); + + for (i = 0; i < ARRAY_SIZE(entity); i++) + drm_mock_sched_entity_free(entity[i]); +} + +static struct kunit_case drm_sched_priority_tests[] = { + KUNIT_CASE(drm_sched_priorities), + KUNIT_CASE(drm_sched_change_priority), + {} +}; + +static struct kunit_suite drm_sched_priority = { + .name = "drm_sched_basic_priority_tests", + .init = drm_sched_basic_init, + .exit = drm_sched_basic_exit, + .test_cases = drm_sched_priority_tests, +}; + kunit_test_suites(&drm_sched_basic, - &drm_sched_timeout); + &drm_sched_timeout, + &drm_sched_priority); From 8e24cdae039573ce3a2cfffe43b95a06d99ec791 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 24 Mar 2025 09:26:32 +0000 Subject: [PATCH 2906/2924] drm/sched: Add a basic test for modifying entities scheduler list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a basic test for exercising modifying the entities scheduler list at runtime. Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Danilo Krummrich Cc: Matthew Brost Cc: Philipp Stanner Acked-by: Christian König Signed-off-by: Philipp Stanner Link: https://patchwork.freedesktop.org/patch/msgid/20250324092633.49746-6-tvrtko.ursulin@igalia.com --- drivers/gpu/drm/scheduler/tests/tests_basic.c | 69 ++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c b/drivers/gpu/drm/scheduler/tests/tests_basic.c index 10378b7ca457de..996cac00bb5235 100644 --- a/drivers/gpu/drm/scheduler/tests/tests_basic.c +++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c @@ -346,6 +346,73 @@ static struct kunit_suite drm_sched_priority = { .test_cases = drm_sched_priority_tests, }; +static void drm_sched_test_modify_sched(struct kunit *test) +{ + unsigned int i, cur_ent = 0, cur_sched = 0; + struct drm_mock_sched_entity *entity[13]; + struct drm_mock_scheduler *sched[3]; + struct drm_mock_sched_job *job; + const unsigned int qd = 1000; + + /* + * Submit a bunch of jobs against entities configured with different + * schedulers and while waiting for them to complete, periodically keep + * changing schedulers associated with each entity. + * + * We set up the queue-depth (qd) and job duration so the sched modify + * loop has some time to interact with submissions to the backend and + * job completions as they progress. + * + * For the number of schedulers and entities we use primes in order to + * perturb the entity->sched assignments with less of a regular pattern. + */ + + for (i = 0; i < ARRAY_SIZE(sched); i++) + sched[i] = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT); + + for (i = 0; i < ARRAY_SIZE(entity); i++) + entity[i] = drm_mock_sched_entity_new(test, + DRM_SCHED_PRIORITY_NORMAL, + sched[i % ARRAY_SIZE(sched)]); + + for (i = 0; i < qd; i++) { + job = drm_mock_sched_job_new(test, entity[cur_ent++]); + cur_ent %= ARRAY_SIZE(entity); + drm_mock_sched_job_set_duration_us(job, 1000); + drm_mock_sched_job_submit(job); + } + + do { + struct drm_gpu_scheduler *modify; + + usleep_range(200, 500); + cur_ent++; + cur_ent %= ARRAY_SIZE(entity); + cur_sched++; + cur_sched %= ARRAY_SIZE(sched); + modify = &sched[cur_sched]->base; + drm_sched_entity_modify_sched(&entity[cur_ent]->base, &modify, + 1); + } while (!drm_mock_sched_job_is_finished(job)); + + for (i = 0; i < ARRAY_SIZE(entity); i++) + drm_mock_sched_entity_free(entity[i]); + + for (i = 0; i < ARRAY_SIZE(sched); i++) + drm_mock_sched_fini(sched[i]); +} + +static struct kunit_case drm_sched_modify_sched_tests[] = { + KUNIT_CASE(drm_sched_test_modify_sched), + {} +}; + +static struct kunit_suite drm_sched_modify_sched = { + .name = "drm_sched_basic_modify_sched_tests", + .test_cases = drm_sched_modify_sched_tests, +}; + kunit_test_suites(&drm_sched_basic, &drm_sched_timeout, - &drm_sched_priority); + &drm_sched_priority, + &drm_sched_modify_sched); From a3deb12f7801739178fd15d95b59ed2b39cf644e Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 24 Mar 2025 09:26:33 +0000 Subject: [PATCH 2907/2924] drm/sched: Add a basic test for checking credit limit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a basic test for checking whether scheduler respects the configured credit limit. Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Danilo Krummrich Cc: Matthew Brost Cc: Philipp Stanner Acked-by: Christian König Signed-off-by: Philipp Stanner Link: https://patchwork.freedesktop.org/patch/msgid/20250324092633.49746-7-tvrtko.ursulin@igalia.com --- drivers/gpu/drm/scheduler/tests/tests_basic.c | 60 ++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c b/drivers/gpu/drm/scheduler/tests/tests_basic.c index 996cac00bb5235..7230057e0594c6 100644 --- a/drivers/gpu/drm/scheduler/tests/tests_basic.c +++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c @@ -412,7 +412,65 @@ static struct kunit_suite drm_sched_modify_sched = { .test_cases = drm_sched_modify_sched_tests, }; +static void drm_sched_test_credits(struct kunit *test) +{ + struct drm_mock_sched_entity *entity; + struct drm_mock_scheduler *sched; + struct drm_mock_sched_job *job[2]; + bool done; + int i; + + /* + * Check that the configured credit limit is respected. + */ + + sched = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT); + sched->base.credit_limit = 1; + + entity = drm_mock_sched_entity_new(test, + DRM_SCHED_PRIORITY_NORMAL, + sched); + + job[0] = drm_mock_sched_job_new(test, entity); + job[1] = drm_mock_sched_job_new(test, entity); + + drm_mock_sched_job_submit(job[0]); + drm_mock_sched_job_submit(job[1]); + + done = drm_mock_sched_job_wait_scheduled(job[0], HZ); + KUNIT_ASSERT_TRUE(test, done); + + done = drm_mock_sched_job_wait_scheduled(job[1], HZ); + KUNIT_ASSERT_FALSE(test, done); + + i = drm_mock_sched_advance(sched, 1); + KUNIT_ASSERT_EQ(test, i, 1); + + done = drm_mock_sched_job_wait_scheduled(job[1], HZ); + KUNIT_ASSERT_TRUE(test, done); + + i = drm_mock_sched_advance(sched, 1); + KUNIT_ASSERT_EQ(test, i, 1); + + done = drm_mock_sched_job_wait_finished(job[1], HZ); + KUNIT_ASSERT_TRUE(test, done); + + drm_mock_sched_entity_free(entity); + drm_mock_sched_fini(sched); +} + +static struct kunit_case drm_sched_credits_tests[] = { + KUNIT_CASE(drm_sched_test_credits), + {} +}; + +static struct kunit_suite drm_sched_credits = { + .name = "drm_sched_basic_credits_tests", + .test_cases = drm_sched_credits_tests, +}; + kunit_test_suites(&drm_sched_basic, &drm_sched_timeout, &drm_sched_priority, - &drm_sched_modify_sched); + &drm_sched_modify_sched, + &drm_sched_credits); From ae7835dfef74190ac79b4e81a6b2a2d3fbf936af Mon Sep 17 00:00:00 2001 From: "Lin.Cao" Date: Thu, 15 May 2025 10:07:13 +0800 Subject: [PATCH 2908/2924] drm/scheduler: signal scheduled fence when kill job MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When an entity from application B is killed, drm_sched_entity_kill() removes all jobs belonging to that entity through drm_sched_entity_kill_jobs_work(). If application A's job depends on a scheduled fence from application B's job, and that fence is not properly signaled during the killing process, application A's dependency cannot be cleared. This leads to application A hanging indefinitely while waiting for a dependency that will never be resolved. Fix this issue by ensuring that scheduled fences are properly signaled when an entity is killed, allowing dependent applications to continue execution. Signed-off-by: Lin.Cao Reviewed-by: Philipp Stanner Signed-off-by: Christian König Link: https://lore.kernel.org/r/20250515020713.1110476-1-lincao12@amd.com --- drivers/gpu/drm/scheduler/sched_entity.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index bd39db7bb24087..e671aa24172068 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -176,6 +176,7 @@ static void drm_sched_entity_kill_jobs_work(struct work_struct *wrk) { struct drm_sched_job *job = container_of(wrk, typeof(*job), work); + drm_sched_fence_scheduled(job->s_fence, NULL); drm_sched_fence_finished(job->s_fence, -ESRCH); WARN_ON(job->s_fence->parent); job->sched->ops->free_job(job); From 00d2cb4c1af6a2a325eae9d0e5f7da8ff5f149bf Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 23 Jun 2025 13:27:31 +0100 Subject: [PATCH 2909/2924] drm/sched: Add some scheduling quality unit tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To make evaluating different scheduling policies easier (no need for external benchmarks) and perfectly repeatable, lets add some synthetic workloads built upon mock scheduler unit test infrastructure. Focus is on two parallel clients (two threads) submitting different job patterns and logging their progress and some overall metrics. This is repeated for both scheduler credit limit 1 and 2. Example test output: Normal and low: pct1 cps1 qd1; pct2 cps2 qd2 + 0ms: 0 0 0; 0 0 0 + 104ms: 100 1240 112; 100 1240 125 + 209ms: 100 0 99; 100 0 125 + 313ms: 100 0 86; 100 0 125 + 419ms: 100 0 73; 100 0 125 + 524ms: 100 0 60; 100 0 125 + 628ms: 100 0 47; 100 0 125 + 731ms: 100 0 34; 100 0 125 + 836ms: 100 0 21; 100 0 125 + 939ms: 100 0 8; 100 0 125 + 1043ms: ; 100 0 120 + 1147ms: ; 100 0 107 + 1252ms: ; 100 0 94 + 1355ms: ; 100 0 81 + 1459ms: ; 100 0 68 + 1563ms: ; 100 0 55 + 1667ms: ; 100 0 42 + 1771ms: ; 100 0 29 + 1875ms: ; 100 0 16 + 1979ms: ; 100 0 3 0: prio=normal sync=0 elapsed_ms=1015ms (ideal_ms=1000ms) cycle_time(min,avg,max)=134,222,978 us latency_time(min,avg,max)=134,222,978 us 1: prio=low sync=0 elapsed_ms=2009ms (ideal_ms=1000ms) cycle_time(min,avg,max)=134,215,806 us latency_time(min,avg,max)=134,215,806 us There we have two clients represented in the two respective columns, with their progress logged roughly every 100 milliseconds. The metrics are: - pct - Percentage progress of the job submit part - cps - Cycles per second - qd - Queue depth - number of submitted unfinished jobs The cycles per second metric is inherent to the fact that workload patterns are a data driven cycling sequence of: - Submit 1..N jobs - Wait for Nth job to finish (optional) - Sleep (optional) - Repeat from start In this particular example we have a normal priority and a low priority clients both spamming the scheduler with 8ms jobs with no sync and no sleeping. Hence they build a very deep queues and we can see how the low priority client is completely starved until the normal finishes. Note that the PCT and CPS metrics are irrelevant for "unsync" clients since they manage to complete all of their cycles instantaneously. A different example would be: Heavy and interactive: pct1 cps1 qd1; pct2 cps2 qd2 + 0ms: 0 0 0; 0 0 0 + 106ms: 5 40 3; 5 40 0 + 209ms: 9 40 0; 9 40 0 + 314ms: 14 50 3; 14 50 0 + 417ms: 18 40 0; 18 40 0 + 522ms: 23 50 3; 23 50 0 + 625ms: 27 40 0; 27 40 1 + 729ms: 32 50 0; 32 50 0 + 833ms: 36 40 1; 36 40 0 + 937ms: 40 40 0; 40 40 0 + 1041ms: 45 50 0; 45 50 0 + 1146ms: 49 40 1; 49 40 1 + 1249ms: 54 50 0; 54 50 0 + 1353ms: 58 40 1; 58 40 0 + 1457ms: 62 40 0; 62 40 1 + 1561ms: 67 50 0; 67 50 0 + 1665ms: 71 40 1; 71 40 0 + 1772ms: 76 50 0; 76 50 0 + 1877ms: 80 40 1; 80 40 0 + 1981ms: 84 40 0; 84 40 0 + 2085ms: 89 50 0; 89 50 0 + 2189ms: 93 40 1; 93 40 0 + 2293ms: 97 40 0; 97 40 1 In this case client one is submitting 3x 2.5ms jobs, waiting for the 3rd and then sleeping for 2.5ms (in effect causing 75% GPU load, minus the overheads). Second client is submitting 1ms jobs, waiting for each to finish and sleeping for 9ms (effective 10% GPU load). Here we can see the PCT and CPS reflecting real progress. Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Danilo Krummrich Cc: Matthew Brost Cc: Philipp Stanner Cc: Pierre-Eric Pelloux-Prayer Acked-by: Christian König --- drivers/gpu/drm/scheduler/tests/Makefile | 3 +- .../gpu/drm/scheduler/tests/tests_scheduler.c | 631 ++++++++++++++++++ 2 files changed, 633 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/scheduler/tests/tests_scheduler.c diff --git a/drivers/gpu/drm/scheduler/tests/Makefile b/drivers/gpu/drm/scheduler/tests/Makefile index 5bf707bad37344..9ec185fbbc153a 100644 --- a/drivers/gpu/drm/scheduler/tests/Makefile +++ b/drivers/gpu/drm/scheduler/tests/Makefile @@ -2,6 +2,7 @@ drm-sched-tests-y := \ mock_scheduler.o \ - tests_basic.o + tests_basic.o \ + tests_scheduler.o obj-$(CONFIG_DRM_SCHED_KUNIT_TEST) += drm-sched-tests.o diff --git a/drivers/gpu/drm/scheduler/tests/tests_scheduler.c b/drivers/gpu/drm/scheduler/tests/tests_scheduler.c new file mode 100644 index 00000000000000..b66321ef7abef2 --- /dev/null +++ b/drivers/gpu/drm/scheduler/tests/tests_scheduler.c @@ -0,0 +1,631 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Valve Corporation */ + +#include +#include +#include + +#include "sched_tests.h" + +/* + * DRM scheduler scheduler tests exercise load balancing decisions ie. entity + * selection logic. + */ + +static int drm_sched_scheduler_init(struct kunit *test) +{ + struct drm_mock_scheduler *sched; + + sched = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT); + sched->base.credit_limit = 1; + + test->priv = sched; + + return 0; +} + +static int drm_sched_scheduler_init2(struct kunit *test) +{ + struct drm_mock_scheduler *sched; + + sched = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT); + sched->base.credit_limit = 2; + + test->priv = sched; + + return 0; +} + +static void drm_sched_scheduler_exit(struct kunit *test) +{ + struct drm_mock_scheduler *sched = test->priv; + + drm_mock_sched_fini(sched); +} + +static void drm_sched_scheduler_queue_overhead(struct kunit *test) +{ + struct drm_mock_scheduler *sched = test->priv; + struct drm_mock_sched_entity *entity; + const unsigned int job_us = 1000; + const unsigned int jobs = 1000; + const unsigned int total_us = jobs * job_us; + struct drm_mock_sched_job *job, *first; + ktime_t start, end; + bool done; + int i; + + /* + * Deep queue job at a time processing (single credit). + * + * This measures the overhead of picking and processing a job at a time + * by comparing the ideal total "GPU" time of all submitted jobs versus + * the time actually taken. + */ + + KUNIT_ASSERT_EQ(test, sched->base.credit_limit, 1); + + entity = drm_mock_sched_entity_new(test, + DRM_SCHED_PRIORITY_NORMAL, + sched); + + for (i = 0; i <= jobs; i++) { + job = drm_mock_sched_job_new(test, entity); + if (i == 0) + first = job; /* Extra first job blocks the queue */ + else + drm_mock_sched_job_set_duration_us(job, job_us); + drm_mock_sched_job_submit(job); + } + + done = drm_mock_sched_job_wait_scheduled(first, HZ); + KUNIT_ASSERT_TRUE(test, done); + + start = ktime_get(); + i = drm_mock_sched_advance(sched, 1); /* Release the queue */ + KUNIT_ASSERT_EQ(test, i, 1); + + done = drm_mock_sched_job_wait_finished(job, + usecs_to_jiffies(total_us) * 5); + end = ktime_get(); + KUNIT_ASSERT_TRUE(test, done); + + pr_info("Expected %uus, actual %lldus\n", + total_us, + ktime_to_us(ktime_sub(end, start))); + + drm_mock_sched_entity_free(entity); +} + +static void drm_sched_scheduler_ping_pong(struct kunit *test) +{ + struct drm_mock_sched_job *job, *first, *prev = NULL; + struct drm_mock_scheduler *sched = test->priv; + struct drm_mock_sched_entity *entity[2]; + const unsigned int job_us = 1000; + const unsigned int jobs = 1000; + const unsigned int total_us = jobs * job_us; + ktime_t start, end; + bool done; + int i; + + /* + * Two entitites in inter-dependency chain. + * + * This measures the overhead of picking and processing a job at a time, + * where each job depends on the previous one from the diffferent + * entity, by comparing the ideal total "GPU" time of all submitted jobs + * versus the time actually taken. + */ + + KUNIT_ASSERT_EQ(test, sched->base.credit_limit, 1); + + for (i = 0; i < ARRAY_SIZE(entity); i++) + entity[i] = drm_mock_sched_entity_new(test, + DRM_SCHED_PRIORITY_NORMAL, + sched); + + for (i = 0; i <= jobs; i++) { + job = drm_mock_sched_job_new(test, entity[i & 1]); + if (i == 0) + first = job; /* Extra first job blocks the queue */ + else + drm_mock_sched_job_set_duration_us(job, job_us); + if (prev) + drm_sched_job_add_dependency(&job->base, + dma_fence_get(&prev->base.s_fence->finished)); + drm_mock_sched_job_submit(job); + prev = job; + } + + done = drm_mock_sched_job_wait_scheduled(first, HZ); + KUNIT_ASSERT_TRUE(test, done); + + start = ktime_get(); + i = drm_mock_sched_advance(sched, 1); /* Release the queue */ + KUNIT_ASSERT_EQ(test, i, 1); + + done = drm_mock_sched_job_wait_finished(job, + usecs_to_jiffies(total_us) * 5); + end = ktime_get(); + KUNIT_ASSERT_TRUE(test, done); + + pr_info("Expected %uus, actual %lldus\n", + total_us, + ktime_to_us(ktime_sub(end, start))); + + for (i = 0; i < ARRAY_SIZE(entity); i++) + drm_mock_sched_entity_free(entity[i]); +} + +static struct kunit_case drm_sched_scheduler_overhead_tests[] = { + KUNIT_CASE_SLOW(drm_sched_scheduler_queue_overhead), + KUNIT_CASE_SLOW(drm_sched_scheduler_ping_pong), + {} +}; + +static struct kunit_suite drm_sched_scheduler_overhead = { + .name = "drm_sched_scheduler_overhead_tests", + .init = drm_sched_scheduler_init, + .exit = drm_sched_scheduler_exit, + .test_cases = drm_sched_scheduler_overhead_tests, +}; + +struct drm_sched_client_params { + enum drm_sched_priority priority; + unsigned int job_cnt; + unsigned int job_us; + unsigned int wait_us; + bool sync; +}; + +struct drm_sched_test_params { + const char *description; + struct drm_sched_client_params client[2]; +}; + +static const struct drm_sched_test_params drm_sched_cases[] = { + { + .description = "Normal and normal", + .client[0] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 1, + .job_us = 8000, + .wait_us = 0, + .sync = false, + }, + .client[1] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 1, + .job_us = 8000, + .wait_us = 0, + .sync = false, + }, + }, + { + .description = "Normal and low", + .client[0] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 1, + .job_us = 8000, + .wait_us = 0, + .sync = false, + }, + .client[1] = { + .priority = DRM_SCHED_PRIORITY_LOW, + .job_cnt = 1, + .job_us = 8000, + .wait_us = 0, + .sync = false, + }, + }, + { + .description = "High and normal", + .client[0] = { + .priority = DRM_SCHED_PRIORITY_HIGH, + .job_cnt = 1, + .job_us = 8000, + .wait_us = 0, + .sync = false, + }, + .client[1] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 1, + .job_us = 8000, + .wait_us = 0, + .sync = false, + }, + }, + { + .description = "High and low", + .client[0] = { + .priority = DRM_SCHED_PRIORITY_HIGH, + .job_cnt = 1, + .job_us = 8000, + .wait_us = 0, + .sync = false, + }, + .client[1] = { + .priority = DRM_SCHED_PRIORITY_LOW, + .job_cnt = 1, + .job_us = 8000, + .wait_us = 0, + .sync = false, + }, + }, + { + .description = "50 and 50", + .client[0] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 1, + .job_us = 1500, + .wait_us = 1500, + .sync = true, + }, + .client[1] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 1, + .job_us = 2500, + .wait_us = 2500, + .sync = true, + }, + }, + { + .description = "50 and 50 low", + .client[0] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 1, + .job_us = 1500, + .wait_us = 1500, + .sync = true, + }, + .client[1] = { + .priority = DRM_SCHED_PRIORITY_LOW, + .job_cnt = 1, + .job_us = 2500, + .wait_us = 2500, + .sync = true, + }, + }, + { + .description = "50 high and 50", + .client[0] = { + .priority = DRM_SCHED_PRIORITY_HIGH, + .job_cnt = 1, + .job_us = 1500, + .wait_us = 1500, + .sync = true, + }, + .client[1] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 1, + .job_us = 2500, + .wait_us = 2500, + .sync = true, + }, + }, + { + .description = "Low hog and interactive", + .client[0] = { + .priority = DRM_SCHED_PRIORITY_LOW, + .job_cnt = 3, + .job_us = 2500, + .wait_us = 500, + .sync = false, + }, + .client[1] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 1, + .job_us = 500, + .wait_us = 10000, + .sync = true, + }, + }, + { + .description = "Heavy and interactive", + .client[0] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 3, + .job_us = 2500, + .wait_us = 2500, + .sync = true, + }, + .client[1] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 1, + .job_us = 1000, + .wait_us = 9000, + .sync = true, + }, + }, + { + .description = "Very heavy and interactive", + .client[0] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 4, + .job_us = 50000, + .wait_us = 1, + .sync = true, + }, + .client[1] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 1, + .job_us = 1000, + .wait_us = 9000, + .sync = true, + }, + }, +}; + +static void +drm_sched_desc(const struct drm_sched_test_params *params, char *desc) +{ + strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE); +} + +KUNIT_ARRAY_PARAM(drm_sched_scheduler_two_clients, + drm_sched_cases, + drm_sched_desc); + +struct test_client_stats { + unsigned long min_us; + unsigned long max_us; + unsigned long avg_us; +}; + +struct test_client { + struct kunit *test; + + struct drm_mock_sched_entity *entity; + + struct kthread_worker *worker; + struct kthread_work work; + + unsigned int id; + ktime_t duration; + + struct drm_sched_client_params params; + + ktime_t ideal_duration; + unsigned int cycles; + unsigned int cycle; + ktime_t start; + ktime_t end; + bool done; + + struct test_client_stats cycle_time; + struct test_client_stats latency_time; +}; + +static void +update_stats(struct test_client_stats *stats, unsigned int n, unsigned long us) +{ + if (us > stats->max_us) + stats->max_us = us; + if (us < stats->min_us) + stats->min_us = us; + stats->avg_us = DIV_ROUND_UP(n * stats->avg_us + us, n + 1); +} + +static void drm_sched_client_work(struct kthread_work *work) +{ + struct test_client *client = container_of(work, typeof(*client), work); + const long sync_wait = MAX_SCHEDULE_TIMEOUT; + unsigned int cycle, work_us, period_us; + struct drm_mock_sched_job *job = NULL; + + work_us = client->params.job_cnt * client->params.job_us; + period_us = work_us + client->params.wait_us; + client->cycles = DIV_ROUND_UP(ktime_to_us(client->duration), period_us); + client->ideal_duration = us_to_ktime(client->cycles * period_us); + + client->start = ktime_get(); + + for (cycle = 0; cycle < client->cycles; cycle++) { + unsigned int batch; + unsigned long us; + ktime_t t; + + if (READ_ONCE(client->done)) + break; + + t = ktime_get(); + for (batch = 0; batch < client->params.job_cnt; batch++) { + job = drm_mock_sched_job_new(client->test, + client->entity); + drm_mock_sched_job_set_duration_us(job, + client->params.job_us); + drm_mock_sched_job_submit(job); + } + + if (client->params.sync) + drm_mock_sched_job_wait_finished(job, sync_wait); + + t = ktime_sub(ktime_get(), t); + us = ktime_to_us(t); + update_stats(&client->cycle_time, cycle, us); + if (ktime_to_us(t) >= (long)work_us) + us = ktime_to_us(t) - work_us; + else if (WARN_ON_ONCE(client->params.sync)) + us = 0; + update_stats(&client->latency_time, cycle, us); + WRITE_ONCE(client->cycle, cycle); + + if (READ_ONCE(client->done)) + break; + + if (client->params.wait_us) + fsleep(client->params.wait_us); + else + cond_resched(); + } + + client->done = drm_mock_sched_job_wait_finished(job, sync_wait); + client->end = ktime_get(); +} + +static const char *prio_str(enum drm_sched_priority prio) +{ + switch (prio) { + case DRM_SCHED_PRIORITY_KERNEL: + return "kernel"; + case DRM_SCHED_PRIORITY_LOW: + return "low"; + case DRM_SCHED_PRIORITY_NORMAL: + return "normal"; + case DRM_SCHED_PRIORITY_HIGH: + return "high"; + default: + return "???"; + } +} + +static void drm_sched_scheduler_two_clients_test(struct kunit *test) +{ + const struct drm_sched_test_params *params = test->param_value; + struct drm_mock_scheduler *sched = test->priv; + struct test_client client[2] = { }; + unsigned int prev_cycle[2] = { }; + unsigned int i, j; + ktime_t start; + + /* + * Same job stream from from two clients. + */ + + for (i = 0; i < ARRAY_SIZE(client); i++) + client[i].entity = + drm_mock_sched_entity_new(test, + params->client[i].priority, + sched); + + for (i = 0; i < ARRAY_SIZE(client); i++) { + client[i].test = test; + client[i].id = i; + client[i].duration = ms_to_ktime(1000); + client[i].params = params->client[i]; + client[i].cycle_time.min_us = ~0UL; + client[i].latency_time.min_us = ~0UL; + client[i].worker = + kthread_create_worker(0, "%s-%u", __func__, i); + if (IS_ERR(client[i].worker)) { + for (j = 0; j < i; j++) + kthread_destroy_worker(client[j].worker); + KUNIT_FAIL(test, "Failed to create worker!\n"); + } + + kthread_init_work(&client[i].work, drm_sched_client_work); + } + + for (i = 0; i < ARRAY_SIZE(client); i++) + kthread_queue_work(client[i].worker, &client[i].work); + + /* + * The clients (workers) can be a mix of async (deep submission queue), + * sync (one job at a time), or something in between. Therefore it is + * difficult to display a single metric representing their progress. + * + * Each struct drm_sched_client_params describes the actual submission + * pattern which happens in the following steps: + * 1. Submit N jobs + * 2. Wait for last submitted job to finish + * 3. Sleep for U micro-seconds + * 4. Goto 1. for C cycles + * + * Where number of cycles is calculated to match the target client + * duration from the respective struct drm_sched_test_params. + * + * To asses scheduling behaviour what we output for both clients is: + * - pct: Percentage progress of the jobs submitted + * - cps: "Cycles" per second (where one cycle is one 1.-4. above) + * - qd: Number of outstanding jobs in the client/entity + */ + + start = ktime_get(); + pr_info("%s:\n\t pct1 cps1 qd1; pct2 cps2 qd2\n", + params->description); + while (!READ_ONCE(client[0].done) || !READ_ONCE(client[1].done)) { + unsigned int pct[2], qd[2], cycle[2], cps[2]; + + for (i = 0; i < ARRAY_SIZE(client); i++) { + qd[i] = spsc_queue_count(&client[i].entity->base.job_queue); + cycle[i] = READ_ONCE(client[i].cycle); + cps[i] = DIV_ROUND_UP(1000 * (cycle[i] - prev_cycle[i]), + 100); + if (client[i].cycles) + pct[i] = DIV_ROUND_UP(100 * (1 + cycle[i]), + client[i].cycles); + else + pct[i] = 0; + prev_cycle[i] = cycle[i]; + } + + if (READ_ONCE(client[0].done)) + pr_info("\t+%6lldms: ; %3u %5u %4u\n", + ktime_to_ms(ktime_sub(ktime_get(), start)), + pct[1], cps[1], qd[1]); + else if (READ_ONCE(client[1].done)) + pr_info("\t+%6lldms: %3u %5u %4u;\n", + ktime_to_ms(ktime_sub(ktime_get(), start)), + pct[0], cps[0], qd[0]); + else + pr_info("\t+%6lldms: %3u %5u %4u; %3u %5u %4u\n", + ktime_to_ms(ktime_sub(ktime_get(), start)), + pct[0], cps[0], qd[0], + pct[1], cps[1], qd[1]); + msleep(100); + } + + for (i = 0; i < ARRAY_SIZE(client); i++) { + kthread_flush_work(&client[i].work); + kthread_destroy_worker(client[i].worker); + } + + for (i = 0; i < ARRAY_SIZE(client); i++) + KUNIT_ASSERT_TRUE(test, client[i].done); + + for (i = 0; i < ARRAY_SIZE(client); i++) { + pr_info(" %u: prio=%s sync=%u elapsed_ms=%lldms (ideal_ms=%lldms) cycle_time(min,avg,max)=%lu,%lu,%lu us latency_time(min,avg,max)=%lu,%lu,%lu us", + i, + prio_str(params->client[i].priority), + params->client[i].sync, + ktime_to_ms(ktime_sub(client[i].end, client[i].start)), + ktime_to_ms(client[i].ideal_duration), + client[i].cycle_time.min_us, + client[i].cycle_time.avg_us, + client[i].cycle_time.max_us, + client[i].latency_time.min_us, + client[i].latency_time.avg_us, + client[i].latency_time.max_us); + drm_mock_sched_entity_free(client[i].entity); + } +} + +static const struct kunit_attributes drm_sched_scheduler_two_clients_attr = { + .speed = KUNIT_SPEED_SLOW, +}; + +static struct kunit_case drm_sched_scheduler_two_clients_tests[] = { + KUNIT_CASE_PARAM_ATTR(drm_sched_scheduler_two_clients_test, + drm_sched_scheduler_two_clients_gen_params, + drm_sched_scheduler_two_clients_attr), + {} +}; + +static struct kunit_suite drm_sched_scheduler_two_clients1 = { + .name = "drm_sched_scheduler_two_clients_one_credit_tests", + .init = drm_sched_scheduler_init, + .exit = drm_sched_scheduler_exit, + .test_cases = drm_sched_scheduler_two_clients_tests, +}; + +static struct kunit_suite drm_sched_scheduler_two_clients2 = { + .name = "drm_sched_scheduler_two_clients_two_credits_tests", + .init = drm_sched_scheduler_init2, + .exit = drm_sched_scheduler_exit, + .test_cases = drm_sched_scheduler_two_clients_tests, +}; + +kunit_test_suites(&drm_sched_scheduler_overhead, + &drm_sched_scheduler_two_clients1, + &drm_sched_scheduler_two_clients2); From 61d86637c9bc36449e49d5dd4b01e2bd9331e843 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 23 Jun 2025 13:27:32 +0100 Subject: [PATCH 2910/2924] drm/sched: Add some more scheduling quality unit tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This time round we explore the rate of submitted job queue processing with multiple identical parallel clients. Example test output: 3 clients: t cycle: min avg max : ... + 0ms 0 0 0 : 0 0 0 + 102ms 2 2 2 : 2 2 2 + 208ms 5 6 6 : 6 5 5 + 310ms 8 9 9 : 9 9 8 ... + 2616ms 82 83 83 : 83 83 82 + 2717ms 83 83 83 : 83 83 83 avg_max_min_delta(x100)=60 Every 100ms for the duration of the test test logs how many jobs each client had completed, prefixed by minimum, average and maximum numbers. When finished overall average delta between max and min is output as a rough indicator to scheduling fairness. Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Danilo Krummrich Cc: Matthew Brost Cc: Philipp Stanner Cc: Pierre-Eric Pelloux-Prayer Acked-by: Christian König --- .../gpu/drm/scheduler/tests/tests_scheduler.c | 186 +++++++++++++++++- 1 file changed, 185 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/scheduler/tests/tests_scheduler.c b/drivers/gpu/drm/scheduler/tests/tests_scheduler.c index b66321ef7abef2..d70b47d7bf7a31 100644 --- a/drivers/gpu/drm/scheduler/tests/tests_scheduler.c +++ b/drivers/gpu/drm/scheduler/tests/tests_scheduler.c @@ -181,6 +181,7 @@ struct drm_sched_client_params { struct drm_sched_test_params { const char *description; + unsigned int num_clients; struct drm_sched_client_params client[2]; }; @@ -626,6 +627,189 @@ static struct kunit_suite drm_sched_scheduler_two_clients2 = { .test_cases = drm_sched_scheduler_two_clients_tests, }; + +static const struct drm_sched_test_params drm_sched_many_cases[] = { + { + .description = "2 clients", + .num_clients = 2, + .client[0] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 4, + .job_us = 1000, + .wait_us = 0, + .sync = true, + }, + }, + { + .description = "3 clients", + .num_clients = 3, + .client[0] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 4, + .job_us = 1000, + .wait_us = 0, + .sync = true, + }, + }, + { + .description = "7 clients", + .num_clients = 7, + .client[0] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 4, + .job_us = 1000, + .wait_us = 0, + .sync = true, + }, + }, + { + .description = "13 clients", + .num_clients = 13, + .client[0] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 4, + .job_us = 1000, + .wait_us = 0, + .sync = true, + }, + }, + { + .description = "31 clients", + .num_clients = 31, + .client[0] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 2, + .job_us = 1000, + .wait_us = 0, + .sync = true, + }, + }, +}; + +KUNIT_ARRAY_PARAM(drm_sched_scheduler_many_clients, + drm_sched_many_cases, + drm_sched_desc); + +static void drm_sched_scheduler_many_clients_test(struct kunit *test) +{ + const struct drm_sched_test_params *params = test->param_value; + struct drm_mock_scheduler *sched = test->priv; + const unsigned int clients = params->num_clients; + unsigned int i, j, delta_total = 0, loops = 0; + struct test_client *client; + unsigned int *prev_cycle; + ktime_t start; + char *buf; + + /* + * Many clients with deep-ish async queues. + */ + + buf = kunit_kmalloc(test, PAGE_SIZE, GFP_KERNEL); + client = kunit_kcalloc(test, clients, sizeof(*client), GFP_KERNEL); + prev_cycle = kunit_kcalloc(test, clients, sizeof(*prev_cycle), + GFP_KERNEL); + + for (i = 0; i < clients; i++) + client[i].entity = + drm_mock_sched_entity_new(test, + DRM_SCHED_PRIORITY_NORMAL, + sched); + + for (i = 0; i < clients; i++) { + client[i].test = test; + client[i].id = i; + client[i].params = params->client[0]; + client[i].duration = ms_to_ktime(1000 / clients); + client[i].cycle_time.min_us = ~0UL; + client[i].latency_time.min_us = ~0UL; + client[i].worker = + kthread_create_worker(0, "%s-%u", __func__, i); + if (IS_ERR(client[i].worker)) { + for (j = 0; j < i; j++) + kthread_destroy_worker(client[j].worker); + KUNIT_FAIL(test, "Failed to create worker!\n"); + } + + kthread_init_work(&client[i].work, drm_sched_client_work); + } + + for (i = 0; i < clients; i++) + kthread_queue_work(client[i].worker, &client[i].work); + + start = ktime_get(); + pr_info("%u clients:\n\tt\t\tcycle:\t min avg max : ...\n", clients); + for (;;) { + unsigned int min = ~0; + unsigned int max = 0; + unsigned int total = 0; + bool done = true; + char pbuf[16]; + + memset(buf, 0, PAGE_SIZE); + for (i = 0; i < clients; i++) { + unsigned int cycle, cycles; + + cycle = READ_ONCE(client[i].cycle); + cycles = READ_ONCE(client[i].cycles); + + snprintf(pbuf, sizeof(pbuf), " %3d", cycle); + strncat(buf, pbuf, PAGE_SIZE); + + total += cycle; + if (cycle < min) + min = cycle; + if (cycle > max) + max = cycle; + + if (!min || (cycle + 1) < cycles) + done = false; + } + + loops++; + delta_total += max - min; + + pr_info("\t+%6lldms\t\t %3u %3u %3u :%s\n", + ktime_to_ms(ktime_sub(ktime_get(), start)), + min, DIV_ROUND_UP(total, clients), max, buf); + + if (done) + break; + + msleep(100); + } + + pr_info(" avg_max_min_delta(x100)=%u\n", + loops ? DIV_ROUND_UP(delta_total * 100, loops) : 0); + + for (i = 0; i < clients; i++) { + kthread_flush_work(&client[i].work); + kthread_destroy_worker(client[i].worker); + } + + for (i = 0; i < clients; i++) + drm_mock_sched_entity_free(client[i].entity); +} + +static const struct kunit_attributes drm_sched_scheduler_many_clients_attr = { + .speed = KUNIT_SPEED_SLOW, +}; + +static struct kunit_case drm_sched_scheduler_many_clients_tests[] = { + KUNIT_CASE_PARAM_ATTR(drm_sched_scheduler_many_clients_test, + drm_sched_scheduler_many_clients_gen_params, + drm_sched_scheduler_many_clients_attr), + {} +}; + +static struct kunit_suite drm_sched_scheduler_many_clients = { + .name = "drm_sched_scheduler_many_clients_tests", + .init = drm_sched_scheduler_init2, + .exit = drm_sched_scheduler_exit, + .test_cases = drm_sched_scheduler_many_clients_tests, +}; + kunit_test_suites(&drm_sched_scheduler_overhead, &drm_sched_scheduler_two_clients1, - &drm_sched_scheduler_two_clients2); + &drm_sched_scheduler_two_clients2, + &drm_sched_scheduler_many_clients); From 9592517bad9dcf24da904f24e8cf100b39411c4a Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 23 Jun 2025 13:27:33 +0100 Subject: [PATCH 2911/2924] drm/sched: De-clutter drm_sched_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move work queue allocation into a helper for a more streamlined function body. Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Danilo Krummrich Cc: Matthew Brost Cc: Philipp Stanner --- drivers/gpu/drm/scheduler/sched_main.c | 33 ++++++++++++++++---------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 829579c41c6b5d..c485283ab2dd9a 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -83,12 +83,6 @@ #define CREATE_TRACE_POINTS #include "gpu_scheduler_trace.h" -#ifdef CONFIG_LOCKDEP -static struct lockdep_map drm_sched_lockdep_map = { - .name = "drm_sched_lockdep_map" -}; -#endif - int drm_sched_policy = DRM_SCHED_POLICY_FIFO; /** @@ -1256,6 +1250,25 @@ static void drm_sched_run_job_work(struct work_struct *w) drm_sched_run_job_queue(sched); } +static struct workqueue_struct *drm_sched_alloc_wq(const char *name) +{ +#if (IS_ENABLED(CONFIG_LOCKDEP)) + static struct lockdep_map map = { + .name = "drm_sched_lockdep_map" + }; + + /* + * Avoid leaking a lockdep map on each drm sched creation and + * destruction by using a single lockdep map for all drm sched + * allocated submit_wq. + */ + + return alloc_ordered_workqueue_lockdep_map(name, WQ_MEM_RECLAIM, &map); +#else + return alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); +#endif +} + /** * drm_sched_init - Init a gpu scheduler instance * @@ -1296,13 +1309,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_init_ sched->submit_wq = args->submit_wq; sched->own_submit_wq = false; } else { -#ifdef CONFIG_LOCKDEP - sched->submit_wq = alloc_ordered_workqueue_lockdep_map(args->name, - WQ_MEM_RECLAIM, - &drm_sched_lockdep_map); -#else - sched->submit_wq = alloc_ordered_workqueue(args->name, WQ_MEM_RECLAIM); -#endif + sched->submit_wq = drm_sched_alloc_wq(args->name); if (!sched->submit_wq) return -ENOMEM; From 788e782430a804913b05494f04ede5906d6a6e42 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 23 Jun 2025 13:27:34 +0100 Subject: [PATCH 2912/2924] drm/sched: Avoid double re-lock on the job free path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently the job free work item will lock sched->job_list_lock first time to see if there are any jobs, free a single job, and then lock again to decide whether to re-queue itself if there are more finished jobs. Since drm_sched_get_finished_job() already looks at the second job in the queue we can simply add the signaled check and have it return the presence of more jobs to free to the caller. That way the work item does not have to lock the list again and repeat the signaled check. Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Danilo Krummrich Cc: Matthew Brost Cc: Philipp Stanner --- drivers/gpu/drm/scheduler/sched_main.c | 39 +++++++++++--------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index c485283ab2dd9a..42f8623ec7eb25 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -365,22 +365,6 @@ static void __drm_sched_run_free_queue(struct drm_gpu_scheduler *sched) queue_work(sched->submit_wq, &sched->work_free_job); } -/** - * drm_sched_run_free_queue - enqueue free-job work if ready - * @sched: scheduler instance - */ -static void drm_sched_run_free_queue(struct drm_gpu_scheduler *sched) -{ - struct drm_sched_job *job; - - spin_lock(&sched->job_list_lock); - job = list_first_entry_or_null(&sched->pending_list, - struct drm_sched_job, list); - if (job && dma_fence_is_signaled(&job->s_fence->finished)) - __drm_sched_run_free_queue(sched); - spin_unlock(&sched->job_list_lock); -} - /** * drm_sched_job_done - complete a job * @s_job: pointer to the job which is done @@ -1095,12 +1079,13 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched) * drm_sched_get_finished_job - fetch the next finished job to be destroyed * * @sched: scheduler instance + * @have_more: are there more finished jobs on the list * * Returns the next finished job from the pending list (if there is one) * ready for it to be destroyed. */ static struct drm_sched_job * -drm_sched_get_finished_job(struct drm_gpu_scheduler *sched) +drm_sched_get_finished_job(struct drm_gpu_scheduler *sched, bool *have_more) { struct drm_sched_job *job, *next; @@ -1108,22 +1093,27 @@ drm_sched_get_finished_job(struct drm_gpu_scheduler *sched) job = list_first_entry_or_null(&sched->pending_list, struct drm_sched_job, list); - if (job && dma_fence_is_signaled(&job->s_fence->finished)) { /* remove job from pending_list */ list_del_init(&job->list); /* cancel this job's TO timer */ cancel_delayed_work(&sched->work_tdr); - /* make the scheduled timestamp more accurate */ + + *have_more = false; next = list_first_entry_or_null(&sched->pending_list, typeof(*next), list); - if (next) { + /* make the scheduled timestamp more accurate */ if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &next->s_fence->scheduled.flags)) next->s_fence->scheduled.timestamp = dma_fence_timestamp(&job->s_fence->finished); + + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &next->s_fence->finished.flags)) + *have_more = true; + /* start TO timer for next job */ drm_sched_start_timeout(sched); } @@ -1182,12 +1172,15 @@ static void drm_sched_free_job_work(struct work_struct *w) struct drm_gpu_scheduler *sched = container_of(w, struct drm_gpu_scheduler, work_free_job); struct drm_sched_job *job; + bool have_more; - job = drm_sched_get_finished_job(sched); - if (job) + job = drm_sched_get_finished_job(sched, &have_more); + if (job) { sched->ops->free_job(job); + if (have_more) + __drm_sched_run_free_queue(sched); + } - drm_sched_run_free_queue(sched); drm_sched_run_job_queue(sched); } From 41d4d211d7f919b9ae6834ad7145a56fe7217338 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 23 Jun 2025 13:27:35 +0100 Subject: [PATCH 2913/2924] drm/sched: Consolidate drm_sched_job_timedout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reduce to one spin_unlock for hopefully a little bit clearer flow in the function. It may appear that there is a behavioural change with the drm_sched_start_timeout_unlocked() now not being called if there were initially no jobs on the pending list, and then some appeared after unlock, however if the code would rely on the TDR handler restarting itself then it would fail to do that if the job arrived on the pending list after the check. Also fix one stale comment while touching the function. Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Danilo Krummrich Cc: Matthew Brost Cc: Philipp Stanner --- drivers/gpu/drm/scheduler/sched_main.c | 33 +++++++++++++------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 42f8623ec7eb25..66509d1582a022 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -516,17 +516,15 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job) static void drm_sched_job_timedout(struct work_struct *work) { - struct drm_gpu_scheduler *sched; + struct drm_gpu_scheduler *sched = + container_of(work, struct drm_gpu_scheduler, work_tdr.work); + enum drm_gpu_sched_stat status; struct drm_sched_job *job; - enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_NOMINAL; - - sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work); /* Protects against concurrent deletion in drm_sched_get_finished_job */ spin_lock(&sched->job_list_lock); job = list_first_entry_or_null(&sched->pending_list, struct drm_sched_job, list); - if (job) { /* * Remove the bad job so it cannot be freed by concurrent @@ -534,20 +532,21 @@ static void drm_sched_job_timedout(struct work_struct *work) * is parked at which point it's safe. */ list_del_init(&job->list); - spin_unlock(&sched->job_list_lock); + } + spin_unlock(&sched->job_list_lock); + + if (!job) + return; - status = job->sched->ops->timedout_job(job); + status = job->sched->ops->timedout_job(job); - /* - * Guilty job did complete and hence needs to be manually removed - * See drm_sched_stop doc. - */ - if (sched->free_guilty) { - job->sched->ops->free_job(job); - sched->free_guilty = false; - } - } else { - spin_unlock(&sched->job_list_lock); + /* + * Guilty job did complete and hence needs to be manually removed. See + * documentation for drm_sched_stop. + */ + if (sched->free_guilty) { + job->sched->ops->free_job(job); + sched->free_guilty = false; } if (status != DRM_GPU_SCHED_STAT_ENODEV) From 73a7cc98c25f7a1f5149530db7916717ed4f74ab Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 23 Jun 2025 13:27:36 +0100 Subject: [PATCH 2914/2924] drm/sched: Consolidate drm_sched_rq_select_entity_rr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract out two copies of the identical code to function epilogue to make it smaller and more readable. Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Danilo Krummrich Cc: Matthew Brost Cc: Philipp Stanner --- drivers/gpu/drm/scheduler/sched_main.c | 48 +++++++++++--------------- 1 file changed, 20 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 66509d1582a022..546d50cfeb27c3 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -262,38 +262,14 @@ drm_sched_rq_select_entity_rr(struct drm_gpu_scheduler *sched, entity = rq->current_entity; if (entity) { list_for_each_entry_continue(entity, &rq->entities, list) { - if (drm_sched_entity_is_ready(entity)) { - /* If we can't queue yet, preserve the current - * entity in terms of fairness. - */ - if (!drm_sched_can_queue(sched, entity)) { - spin_unlock(&rq->lock); - return ERR_PTR(-ENOSPC); - } - - rq->current_entity = entity; - reinit_completion(&entity->entity_idle); - spin_unlock(&rq->lock); - return entity; - } + if (drm_sched_entity_is_ready(entity)) + goto found; } } list_for_each_entry(entity, &rq->entities, list) { - if (drm_sched_entity_is_ready(entity)) { - /* If we can't queue yet, preserve the current entity in - * terms of fairness. - */ - if (!drm_sched_can_queue(sched, entity)) { - spin_unlock(&rq->lock); - return ERR_PTR(-ENOSPC); - } - - rq->current_entity = entity; - reinit_completion(&entity->entity_idle); - spin_unlock(&rq->lock); - return entity; - } + if (drm_sched_entity_is_ready(entity)) + goto found; if (entity == rq->current_entity) break; @@ -302,6 +278,22 @@ drm_sched_rq_select_entity_rr(struct drm_gpu_scheduler *sched, spin_unlock(&rq->lock); return NULL; + +found: + if (!drm_sched_can_queue(sched, entity)) { + /* + * If scheduler cannot take more jobs signal the caller to not + * consider lower priority queues. + */ + entity = ERR_PTR(-ENOSPC); + } else { + rq->current_entity = entity; + reinit_completion(&entity->entity_idle); + } + + spin_unlock(&rq->lock); + + return entity; } /** From 2e57ee890b2ee7628592b1cf932d02e081f00236 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 23 Jun 2025 13:27:37 +0100 Subject: [PATCH 2915/2924] drm/sched: Implement RR via FIFO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Round-robin being the non-default policy and unclear how much it is used, we can notice that it can be implemented using the FIFO data structures if we only invent a fake submit timestamp which is monotonically increasing inside drm_sched_rq instances. So instead of remembering which was the last entity the scheduler worker picked, we can bump the picked one to the bottom of the tree, achieving the same round-robin behaviour. Advantage is that we can consolidate to a single code path and remove a bunch of code. Downside is round-robin mode now needs to lock on the job pop path but that should not be visible. Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Danilo Krummrich Cc: Matthew Brost Cc: Philipp Stanner --- drivers/gpu/drm/scheduler/sched_entity.c | 49 +++++++++------ drivers/gpu/drm/scheduler/sched_main.c | 76 ++---------------------- include/drm/gpu_scheduler.h | 5 +- 3 files changed, 38 insertions(+), 92 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index e671aa24172068..06cf882f234b7d 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -468,9 +468,19 @@ drm_sched_job_dependency(struct drm_sched_job *job, return NULL; } +static ktime_t +drm_sched_rq_get_rr_deadline(struct drm_sched_rq *rq) +{ + lockdep_assert_held(&rq->lock); + + rq->rr_deadline = ktime_add_ns(rq->rr_deadline, 1); + + return rq->rr_deadline; +} + struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) { - struct drm_sched_job *sched_job; + struct drm_sched_job *sched_job, *next_job; sched_job = drm_sched_entity_queue_peek(entity); if (!sched_job) @@ -505,21 +515,22 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) * Update the entity's location in the min heap according to * the timestamp of the next job, if any. */ - if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) { - struct drm_sched_job *next; - - next = drm_sched_entity_queue_peek(entity); - if (next) { - struct drm_sched_rq *rq; - - spin_lock(&entity->lock); - rq = entity->rq; - spin_lock(&rq->lock); - drm_sched_rq_update_fifo_locked(entity, rq, - next->submit_ts); - spin_unlock(&rq->lock); - spin_unlock(&entity->lock); - } + next_job = drm_sched_entity_queue_peek(entity); + if (next_job) { + struct drm_sched_rq *rq; + ktime_t ts; + + if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) + ts = next_job->submit_ts; + else + ts = drm_sched_rq_get_rr_deadline(rq); + + spin_lock(&entity->lock); + rq = entity->rq; + spin_lock(&rq->lock); + drm_sched_rq_update_fifo_locked(entity, rq, ts); + spin_unlock(&rq->lock); + spin_unlock(&entity->lock); } /* Jobs and entities might have different lifecycles. Since we're @@ -618,9 +629,9 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) spin_lock(&rq->lock); drm_sched_rq_add_entity(rq, entity); - - if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) - drm_sched_rq_update_fifo_locked(entity, rq, submit_ts); + if (drm_sched_policy == DRM_SCHED_POLICY_RR) + submit_ts = drm_sched_rq_get_rr_deadline(rq); + drm_sched_rq_update_fifo_locked(entity, rq, submit_ts); spin_unlock(&rq->lock); spin_unlock(&entity->lock); diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 546d50cfeb27c3..0bea630d02bdd2 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -184,7 +184,6 @@ static void drm_sched_rq_init(struct drm_gpu_scheduler *sched, spin_lock_init(&rq->lock); INIT_LIST_HEAD(&rq->entities); rq->rb_tree_root = RB_ROOT_CACHED; - rq->current_entity = NULL; rq->sched = sched; } @@ -230,74 +229,13 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, atomic_dec(rq->sched->score); list_del_init(&entity->list); - if (rq->current_entity == entity) - rq->current_entity = NULL; - - if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) - drm_sched_rq_remove_fifo_locked(entity, rq); - - spin_unlock(&rq->lock); -} - -/** - * drm_sched_rq_select_entity_rr - Select an entity which could provide a job to run - * - * @sched: the gpu scheduler - * @rq: scheduler run queue to check. - * - * Try to find the next ready entity. - * - * Return an entity if one is found; return an error-pointer (!NULL) if an - * entity was ready, but the scheduler had insufficient credits to accommodate - * its job; return NULL, if no ready entity was found. - */ -static struct drm_sched_entity * -drm_sched_rq_select_entity_rr(struct drm_gpu_scheduler *sched, - struct drm_sched_rq *rq) -{ - struct drm_sched_entity *entity; - - spin_lock(&rq->lock); - - entity = rq->current_entity; - if (entity) { - list_for_each_entry_continue(entity, &rq->entities, list) { - if (drm_sched_entity_is_ready(entity)) - goto found; - } - } - - list_for_each_entry(entity, &rq->entities, list) { - if (drm_sched_entity_is_ready(entity)) - goto found; - - if (entity == rq->current_entity) - break; - } - - spin_unlock(&rq->lock); - - return NULL; - -found: - if (!drm_sched_can_queue(sched, entity)) { - /* - * If scheduler cannot take more jobs signal the caller to not - * consider lower priority queues. - */ - entity = ERR_PTR(-ENOSPC); - } else { - rq->current_entity = entity; - reinit_completion(&entity->entity_idle); - } + drm_sched_rq_remove_fifo_locked(entity, rq); spin_unlock(&rq->lock); - - return entity; } /** - * drm_sched_rq_select_entity_fifo - Select an entity which provides a job to run + * drm_sched_rq_select_entity - Select an entity which provides a job to run * * @sched: the gpu scheduler * @rq: scheduler run queue to check. @@ -309,8 +247,8 @@ drm_sched_rq_select_entity_rr(struct drm_gpu_scheduler *sched, * its job; return NULL, if no ready entity was found. */ static struct drm_sched_entity * -drm_sched_rq_select_entity_fifo(struct drm_gpu_scheduler *sched, - struct drm_sched_rq *rq) +drm_sched_rq_select_entity(struct drm_gpu_scheduler *sched, + struct drm_sched_rq *rq) { struct rb_node *rb; @@ -1050,15 +988,13 @@ void drm_sched_wakeup(struct drm_gpu_scheduler *sched) static struct drm_sched_entity * drm_sched_select_entity(struct drm_gpu_scheduler *sched) { - struct drm_sched_entity *entity; + struct drm_sched_entity *entity = NULL; int i; /* Start with the highest priority. */ for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { - entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ? - drm_sched_rq_select_entity_fifo(sched, sched->sched_rq[i]) : - drm_sched_rq_select_entity_rr(sched, sched->sched_rq[i]); + entity = drm_sched_rq_select_entity(sched, sched->sched_rq[i]); if (entity) break; } diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 50928a7ae98e3d..2554cc17bae5ba 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -239,8 +239,7 @@ struct drm_sched_entity { * struct drm_sched_rq - queue of entities to be scheduled. * * @sched: the scheduler to which this rq belongs to. - * @lock: protects @entities, @rb_tree_root and @current_entity. - * @current_entity: the entity which is to be scheduled. + * @lock: protects @entities, @rb_tree_root and @rr_deadline. * @entities: list of the entities to be scheduled. * @rb_tree_root: root of time based priority queue of entities for FIFO scheduling * @@ -253,7 +252,7 @@ struct drm_sched_rq { spinlock_t lock; /* Following members are protected by the @lock: */ - struct drm_sched_entity *current_entity; + ktime_t rr_deadline; struct list_head entities; struct rb_root_cached rb_tree_root; }; From 3d8cd8959787158ca425303f24b7dc6f48ad4b89 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 23 Jun 2025 13:27:38 +0100 Subject: [PATCH 2916/2924] drm/sched: Consolidate entity run queue management MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the code dealing with entities entering and exiting run queues to helpers to logically separate it from jobs entering and exiting entities. Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Danilo Krummrich Cc: Matthew Brost Cc: Philipp Stanner --- drivers/gpu/drm/scheduler/sched_entity.c | 60 ++------------- drivers/gpu/drm/scheduler/sched_internal.h | 8 +- drivers/gpu/drm/scheduler/sched_main.c | 87 +++++++++++++++++++--- 3 files changed, 83 insertions(+), 72 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 06cf882f234b7d..5de39cfc7887e9 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -468,19 +468,9 @@ drm_sched_job_dependency(struct drm_sched_job *job, return NULL; } -static ktime_t -drm_sched_rq_get_rr_deadline(struct drm_sched_rq *rq) -{ - lockdep_assert_held(&rq->lock); - - rq->rr_deadline = ktime_add_ns(rq->rr_deadline, 1); - - return rq->rr_deadline; -} - struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) { - struct drm_sched_job *sched_job, *next_job; + struct drm_sched_job *sched_job; sched_job = drm_sched_entity_queue_peek(entity); if (!sched_job) @@ -511,27 +501,7 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) spsc_queue_pop(&entity->job_queue); - /* - * Update the entity's location in the min heap according to - * the timestamp of the next job, if any. - */ - next_job = drm_sched_entity_queue_peek(entity); - if (next_job) { - struct drm_sched_rq *rq; - ktime_t ts; - - if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) - ts = next_job->submit_ts; - else - ts = drm_sched_rq_get_rr_deadline(rq); - - spin_lock(&entity->lock); - rq = entity->rq; - spin_lock(&rq->lock); - drm_sched_rq_update_fifo_locked(entity, rq, ts); - spin_unlock(&rq->lock); - spin_unlock(&entity->lock); - } + drm_sched_rq_pop_entity(entity); /* Jobs and entities might have different lifecycles. Since we're * removing the job from the entities queue, set the jobs entity pointer @@ -613,30 +583,10 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) /* first job wakes up scheduler */ if (first) { struct drm_gpu_scheduler *sched; - struct drm_sched_rq *rq; - - /* Add the entity to the run queue */ - spin_lock(&entity->lock); - if (entity->stopped) { - spin_unlock(&entity->lock); - - DRM_ERROR("Trying to push to a killed entity\n"); - return; - } - - rq = entity->rq; - sched = rq->sched; - - spin_lock(&rq->lock); - drm_sched_rq_add_entity(rq, entity); - if (drm_sched_policy == DRM_SCHED_POLICY_RR) - submit_ts = drm_sched_rq_get_rr_deadline(rq); - drm_sched_rq_update_fifo_locked(entity, rq, submit_ts); - - spin_unlock(&rq->lock); - spin_unlock(&entity->lock); - drm_sched_wakeup(sched); + sched = drm_sched_rq_add_entity(entity, submit_ts); + if (sched) + drm_sched_wakeup(sched); } } EXPORT_SYMBOL(drm_sched_entity_push_job); diff --git a/drivers/gpu/drm/scheduler/sched_internal.h b/drivers/gpu/drm/scheduler/sched_internal.h index 599cf6e1bb7400..8e7e477bace3cc 100644 --- a/drivers/gpu/drm/scheduler/sched_internal.h +++ b/drivers/gpu/drm/scheduler/sched_internal.h @@ -12,13 +12,11 @@ extern int drm_sched_policy; void drm_sched_wakeup(struct drm_gpu_scheduler *sched); -void drm_sched_rq_add_entity(struct drm_sched_rq *rq, - struct drm_sched_entity *entity); +struct drm_gpu_scheduler * +drm_sched_rq_add_entity(struct drm_sched_entity *entity, ktime_t ts); void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, struct drm_sched_entity *entity); - -void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, - struct drm_sched_rq *rq, ktime_t ts); +void drm_sched_rq_pop_entity(struct drm_sched_entity *entity); void drm_sched_entity_select_rq(struct drm_sched_entity *entity); struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity); diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 0bea630d02bdd2..8645dcdce39f58 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -144,15 +144,18 @@ static __always_inline bool drm_sched_entity_compare_before(struct rb_node *a, static void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity *entity, struct drm_sched_rq *rq) { + lockdep_assert_held(&entity->lock); + lockdep_assert_held(&rq->lock); + if (!RB_EMPTY_NODE(&entity->rb_tree_node)) { rb_erase_cached(&entity->rb_tree_node, &rq->rb_tree_root); RB_CLEAR_NODE(&entity->rb_tree_node); } } -void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, - struct drm_sched_rq *rq, - ktime_t ts) +static void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, + struct drm_sched_rq *rq, + ktime_t ts) { /* * Both locks need to be grabbed, one to protect from entity->rq change @@ -187,25 +190,58 @@ static void drm_sched_rq_init(struct drm_gpu_scheduler *sched, rq->sched = sched; } +static ktime_t +drm_sched_rq_get_rr_deadline(struct drm_sched_rq *rq) +{ + lockdep_assert_held(&rq->lock); + + rq->rr_deadline = ktime_add_ns(rq->rr_deadline, 1); + + return rq->rr_deadline; +} + /** * drm_sched_rq_add_entity - add an entity * - * @rq: scheduler run queue * @entity: scheduler entity + * @ts: submission timestamp * * Adds a scheduler entity to the run queue. + * + * Returns a DRM scheduler pre-selected to handle this entity. */ -void drm_sched_rq_add_entity(struct drm_sched_rq *rq, - struct drm_sched_entity *entity) +struct drm_gpu_scheduler * +drm_sched_rq_add_entity(struct drm_sched_entity *entity, ktime_t ts) { - lockdep_assert_held(&entity->lock); - lockdep_assert_held(&rq->lock); + struct drm_gpu_scheduler *sched; + struct drm_sched_rq *rq; - if (!list_empty(&entity->list)) - return; + /* Add the entity to the run queue */ + spin_lock(&entity->lock); + if (entity->stopped) { + spin_unlock(&entity->lock); + + DRM_ERROR("Trying to push to a killed entity\n"); + return NULL; + } - atomic_inc(rq->sched->score); - list_add_tail(&entity->list, &rq->entities); + rq = entity->rq; + spin_lock(&rq->lock); + sched = rq->sched; + + if (list_empty(&entity->list)) { + atomic_inc(sched->score); + list_add_tail(&entity->list, &rq->entities); + } + + if (drm_sched_policy == DRM_SCHED_POLICY_RR) + ts = drm_sched_rq_get_rr_deadline(rq); + drm_sched_rq_update_fifo_locked(entity, rq, ts); + + spin_unlock(&rq->lock); + spin_unlock(&entity->lock); + + return sched; } /** @@ -234,6 +270,33 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, spin_unlock(&rq->lock); } +void drm_sched_rq_pop_entity(struct drm_sched_entity *entity) +{ + struct drm_sched_job *next_job; + struct drm_sched_rq *rq; + ktime_t ts; + + /* + * Update the entity's location in the min heap according to + * the timestamp of the next job, if any. + */ + next_job = drm_sched_entity_queue_peek(entity); + if (!next_job) + return; + + if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) + ts = next_job->submit_ts; + else + ts = drm_sched_rq_get_rr_deadline(rq); + + spin_lock(&entity->lock); + rq = entity->rq; + spin_lock(&rq->lock); + drm_sched_rq_update_fifo_locked(entity, rq, ts); + spin_unlock(&rq->lock); + spin_unlock(&entity->lock); +} + /** * drm_sched_rq_select_entity - Select an entity which provides a job to run * From af907d6ccef4ae066cd2103eadba8ac7bc757b72 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 23 Jun 2025 13:27:39 +0100 Subject: [PATCH 2917/2924] drm/sched: Move run queue related code into a separate file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lets move all the code dealing with struct drm_sched_rq into a separate compilation unit. Advantage being sched_main.c is left with a clearer set of responsibilities. Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Danilo Krummrich Cc: Matthew Brost Cc: Philipp Stanner --- drivers/gpu/drm/scheduler/Makefile | 2 +- drivers/gpu/drm/scheduler/sched_internal.h | 7 + drivers/gpu/drm/scheduler/sched_main.c | 210 +------------------- drivers/gpu/drm/scheduler/sched_rq.c | 214 +++++++++++++++++++++ 4 files changed, 224 insertions(+), 209 deletions(-) create mode 100644 drivers/gpu/drm/scheduler/sched_rq.c diff --git a/drivers/gpu/drm/scheduler/Makefile b/drivers/gpu/drm/scheduler/Makefile index 6e13e4c63e9d87..74e75eff6df50c 100644 --- a/drivers/gpu/drm/scheduler/Makefile +++ b/drivers/gpu/drm/scheduler/Makefile @@ -20,7 +20,7 @@ # OTHER DEALINGS IN THE SOFTWARE. # # -gpu-sched-y := sched_main.o sched_fence.o sched_entity.o +gpu-sched-y := sched_main.o sched_fence.o sched_entity.o sched_rq.o obj-$(CONFIG_DRM_SCHED) += gpu-sched.o diff --git a/drivers/gpu/drm/scheduler/sched_internal.h b/drivers/gpu/drm/scheduler/sched_internal.h index 8e7e477bace3cc..ee13a986b920c7 100644 --- a/drivers/gpu/drm/scheduler/sched_internal.h +++ b/drivers/gpu/drm/scheduler/sched_internal.h @@ -10,8 +10,15 @@ extern int drm_sched_policy; #define DRM_SCHED_POLICY_RR 0 #define DRM_SCHED_POLICY_FIFO 1 +bool drm_sched_can_queue(struct drm_gpu_scheduler *sched, + struct drm_sched_entity *entity); void drm_sched_wakeup(struct drm_gpu_scheduler *sched); +void drm_sched_rq_init(struct drm_gpu_scheduler *sched, + struct drm_sched_rq *rq); +struct drm_sched_entity * +drm_sched_rq_select_entity(struct drm_gpu_scheduler *sched, + struct drm_sched_rq *rq); struct drm_gpu_scheduler * drm_sched_rq_add_entity(struct drm_sched_entity *entity, ktime_t ts); void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 8645dcdce39f58..9f2b40cc2b4c5e 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -111,8 +111,8 @@ static u32 drm_sched_available_credits(struct drm_gpu_scheduler *sched) * Return true if we can push at least one more job from @entity, false * otherwise. */ -static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched, - struct drm_sched_entity *entity) +bool drm_sched_can_queue(struct drm_gpu_scheduler *sched, + struct drm_sched_entity *entity) { struct drm_sched_job *s_job; @@ -132,212 +132,6 @@ static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched, return drm_sched_available_credits(sched) >= s_job->credits; } -static __always_inline bool drm_sched_entity_compare_before(struct rb_node *a, - const struct rb_node *b) -{ - struct drm_sched_entity *ent_a = rb_entry((a), struct drm_sched_entity, rb_tree_node); - struct drm_sched_entity *ent_b = rb_entry((b), struct drm_sched_entity, rb_tree_node); - - return ktime_before(ent_a->oldest_job_waiting, ent_b->oldest_job_waiting); -} - -static void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity *entity, - struct drm_sched_rq *rq) -{ - lockdep_assert_held(&entity->lock); - lockdep_assert_held(&rq->lock); - - if (!RB_EMPTY_NODE(&entity->rb_tree_node)) { - rb_erase_cached(&entity->rb_tree_node, &rq->rb_tree_root); - RB_CLEAR_NODE(&entity->rb_tree_node); - } -} - -static void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, - struct drm_sched_rq *rq, - ktime_t ts) -{ - /* - * Both locks need to be grabbed, one to protect from entity->rq change - * for entity from within concurrent drm_sched_entity_select_rq and the - * other to update the rb tree structure. - */ - lockdep_assert_held(&entity->lock); - lockdep_assert_held(&rq->lock); - - drm_sched_rq_remove_fifo_locked(entity, rq); - - entity->oldest_job_waiting = ts; - - rb_add_cached(&entity->rb_tree_node, &rq->rb_tree_root, - drm_sched_entity_compare_before); -} - -/** - * drm_sched_rq_init - initialize a given run queue struct - * - * @sched: scheduler instance to associate with this run queue - * @rq: scheduler run queue - * - * Initializes a scheduler runqueue. - */ -static void drm_sched_rq_init(struct drm_gpu_scheduler *sched, - struct drm_sched_rq *rq) -{ - spin_lock_init(&rq->lock); - INIT_LIST_HEAD(&rq->entities); - rq->rb_tree_root = RB_ROOT_CACHED; - rq->sched = sched; -} - -static ktime_t -drm_sched_rq_get_rr_deadline(struct drm_sched_rq *rq) -{ - lockdep_assert_held(&rq->lock); - - rq->rr_deadline = ktime_add_ns(rq->rr_deadline, 1); - - return rq->rr_deadline; -} - -/** - * drm_sched_rq_add_entity - add an entity - * - * @entity: scheduler entity - * @ts: submission timestamp - * - * Adds a scheduler entity to the run queue. - * - * Returns a DRM scheduler pre-selected to handle this entity. - */ -struct drm_gpu_scheduler * -drm_sched_rq_add_entity(struct drm_sched_entity *entity, ktime_t ts) -{ - struct drm_gpu_scheduler *sched; - struct drm_sched_rq *rq; - - /* Add the entity to the run queue */ - spin_lock(&entity->lock); - if (entity->stopped) { - spin_unlock(&entity->lock); - - DRM_ERROR("Trying to push to a killed entity\n"); - return NULL; - } - - rq = entity->rq; - spin_lock(&rq->lock); - sched = rq->sched; - - if (list_empty(&entity->list)) { - atomic_inc(sched->score); - list_add_tail(&entity->list, &rq->entities); - } - - if (drm_sched_policy == DRM_SCHED_POLICY_RR) - ts = drm_sched_rq_get_rr_deadline(rq); - drm_sched_rq_update_fifo_locked(entity, rq, ts); - - spin_unlock(&rq->lock); - spin_unlock(&entity->lock); - - return sched; -} - -/** - * drm_sched_rq_remove_entity - remove an entity - * - * @rq: scheduler run queue - * @entity: scheduler entity - * - * Removes a scheduler entity from the run queue. - */ -void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, - struct drm_sched_entity *entity) -{ - lockdep_assert_held(&entity->lock); - - if (list_empty(&entity->list)) - return; - - spin_lock(&rq->lock); - - atomic_dec(rq->sched->score); - list_del_init(&entity->list); - - drm_sched_rq_remove_fifo_locked(entity, rq); - - spin_unlock(&rq->lock); -} - -void drm_sched_rq_pop_entity(struct drm_sched_entity *entity) -{ - struct drm_sched_job *next_job; - struct drm_sched_rq *rq; - ktime_t ts; - - /* - * Update the entity's location in the min heap according to - * the timestamp of the next job, if any. - */ - next_job = drm_sched_entity_queue_peek(entity); - if (!next_job) - return; - - if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) - ts = next_job->submit_ts; - else - ts = drm_sched_rq_get_rr_deadline(rq); - - spin_lock(&entity->lock); - rq = entity->rq; - spin_lock(&rq->lock); - drm_sched_rq_update_fifo_locked(entity, rq, ts); - spin_unlock(&rq->lock); - spin_unlock(&entity->lock); -} - -/** - * drm_sched_rq_select_entity - Select an entity which provides a job to run - * - * @sched: the gpu scheduler - * @rq: scheduler run queue to check. - * - * Find oldest waiting ready entity. - * - * Return an entity if one is found; return an error-pointer (!NULL) if an - * entity was ready, but the scheduler had insufficient credits to accommodate - * its job; return NULL, if no ready entity was found. - */ -static struct drm_sched_entity * -drm_sched_rq_select_entity(struct drm_gpu_scheduler *sched, - struct drm_sched_rq *rq) -{ - struct rb_node *rb; - - spin_lock(&rq->lock); - for (rb = rb_first_cached(&rq->rb_tree_root); rb; rb = rb_next(rb)) { - struct drm_sched_entity *entity; - - entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node); - if (drm_sched_entity_is_ready(entity)) { - /* If we can't queue yet, preserve the current entity in - * terms of fairness. - */ - if (!drm_sched_can_queue(sched, entity)) { - spin_unlock(&rq->lock); - return ERR_PTR(-ENOSPC); - } - - reinit_completion(&entity->entity_idle); - break; - } - } - spin_unlock(&rq->lock); - - return rb ? rb_entry(rb, struct drm_sched_entity, rb_tree_node) : NULL; -} - /** * drm_sched_run_job_queue - enqueue run-job work * @sched: scheduler instance diff --git a/drivers/gpu/drm/scheduler/sched_rq.c b/drivers/gpu/drm/scheduler/sched_rq.c new file mode 100644 index 00000000000000..d477a027feb92f --- /dev/null +++ b/drivers/gpu/drm/scheduler/sched_rq.c @@ -0,0 +1,214 @@ +#include + +#include +#include + +#include "sched_internal.h" + +static __always_inline bool +drm_sched_entity_compare_before(struct rb_node *a, const struct rb_node *b) +{ + struct drm_sched_entity *ea = + rb_entry((a), struct drm_sched_entity, rb_tree_node); + struct drm_sched_entity *eb = + rb_entry((b), struct drm_sched_entity, rb_tree_node); + + return ktime_before(ea->oldest_job_waiting, eb->oldest_job_waiting); +} + +static void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity *entity, + struct drm_sched_rq *rq) +{ + lockdep_assert_held(&entity->lock); + lockdep_assert_held(&rq->lock); + + if (!RB_EMPTY_NODE(&entity->rb_tree_node)) { + rb_erase_cached(&entity->rb_tree_node, &rq->rb_tree_root); + RB_CLEAR_NODE(&entity->rb_tree_node); + } +} + +static void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, + struct drm_sched_rq *rq, + ktime_t ts) +{ + /* + * Both locks need to be grabbed, one to protect from entity->rq change + * for entity from within concurrent drm_sched_entity_select_rq and the + * other to update the rb tree structure. + */ + lockdep_assert_held(&entity->lock); + lockdep_assert_held(&rq->lock); + + drm_sched_rq_remove_fifo_locked(entity, rq); + + entity->oldest_job_waiting = ts; + + rb_add_cached(&entity->rb_tree_node, &rq->rb_tree_root, + drm_sched_entity_compare_before); +} + +/** + * drm_sched_rq_init - initialize a given run queue struct + * + * @sched: scheduler instance to associate with this run queue + * @rq: scheduler run queue + * + * Initializes a scheduler runqueue. + */ +void drm_sched_rq_init(struct drm_gpu_scheduler *sched, + struct drm_sched_rq *rq) +{ + spin_lock_init(&rq->lock); + INIT_LIST_HEAD(&rq->entities); + rq->rb_tree_root = RB_ROOT_CACHED; + rq->sched = sched; +} + +static ktime_t +drm_sched_rq_get_rr_deadline(struct drm_sched_rq *rq) +{ + lockdep_assert_held(&rq->lock); + + rq->rr_deadline = ktime_add_ns(rq->rr_deadline, 1); + + return rq->rr_deadline; +} + +/** + * drm_sched_rq_add_entity - add an entity + * + * @entity: scheduler entity + * @ts: submission timestamp + * + * Adds a scheduler entity to the run queue. + * + * Returns a DRM scheduler pre-selected to handle this entity. + */ +struct drm_gpu_scheduler * +drm_sched_rq_add_entity(struct drm_sched_entity *entity, ktime_t ts) +{ + struct drm_gpu_scheduler *sched; + struct drm_sched_rq *rq; + + /* Add the entity to the run queue */ + spin_lock(&entity->lock); + if (entity->stopped) { + spin_unlock(&entity->lock); + + DRM_ERROR("Trying to push to a killed entity\n"); + return NULL; + } + + rq = entity->rq; + spin_lock(&rq->lock); + sched = rq->sched; + + if (list_empty(&entity->list)) { + atomic_inc(sched->score); + list_add_tail(&entity->list, &rq->entities); + } + + if (drm_sched_policy == DRM_SCHED_POLICY_RR) + ts = drm_sched_rq_get_rr_deadline(rq); + drm_sched_rq_update_fifo_locked(entity, rq, ts); + + spin_unlock(&rq->lock); + spin_unlock(&entity->lock); + + return sched; +} + +/** + * drm_sched_rq_remove_entity - remove an entity + * + * @rq: scheduler run queue + * @entity: scheduler entity + * + * Removes a scheduler entity from the run queue. + */ +void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, + struct drm_sched_entity *entity) +{ + lockdep_assert_held(&entity->lock); + + if (list_empty(&entity->list)) + return; + + spin_lock(&rq->lock); + + atomic_dec(rq->sched->score); + list_del_init(&entity->list); + + drm_sched_rq_remove_fifo_locked(entity, rq); + + spin_unlock(&rq->lock); +} + +void drm_sched_rq_pop_entity(struct drm_sched_entity *entity) +{ + struct drm_sched_job *next_job; + struct drm_sched_rq *rq; + ktime_t ts; + + /* + * Update the entity's location in the min heap according to + * the timestamp of the next job, if any. + */ + next_job = drm_sched_entity_queue_peek(entity); + if (!next_job) + return; + + if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) + ts = next_job->submit_ts; + else + ts = drm_sched_rq_get_rr_deadline(rq); + + spin_lock(&entity->lock); + rq = entity->rq; + spin_lock(&rq->lock); + drm_sched_rq_update_fifo_locked(entity, rq, ts); + spin_unlock(&rq->lock); + spin_unlock(&entity->lock); +} + +/** + * drm_sched_rq_select_entity - Select an entity which provides a job to run + * + * @sched: the gpu scheduler + * @rq: scheduler run queue to check. + * + * Find oldest waiting ready entity. + * + * Return an entity if one is found; return an error-pointer (!NULL) if an + * entity was ready, but the scheduler had insufficient credits to accommodate + * its job; return NULL, if no ready entity was found. + */ +struct drm_sched_entity * +drm_sched_rq_select_entity(struct drm_gpu_scheduler *sched, + struct drm_sched_rq *rq) +{ + struct rb_node *rb; + + spin_lock(&rq->lock); + for (rb = rb_first_cached(&rq->rb_tree_root); rb; rb = rb_next(rb)) { + struct drm_sched_entity *entity; + + entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node); + if (drm_sched_entity_is_ready(entity)) { + /* If we can't queue yet, preserve the current entity in + * terms of fairness. + */ + if (!drm_sched_can_queue(sched, entity)) { + spin_unlock(&rq->lock); + return ERR_PTR(-ENOSPC); + } + + reinit_completion(&entity->entity_idle); + break; + } + } + spin_unlock(&rq->lock); + + return rb ? rb_entry(rb, struct drm_sched_entity, rb_tree_node) : NULL; +} From a21f20f7806029647b7f605acb70f96d404d9ab5 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 23 Jun 2025 13:27:40 +0100 Subject: [PATCH 2918/2924] drm/sched: Free all finished jobs at once MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To implement fair scheduling we will need as accurate as possible view into per entity GPU time utilisation. Because sched fence execution time are only adjusted for accuracy in the free worker we need to process completed jobs as soon as possible so the metric is most up to date when view from the submission side of things. Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Danilo Krummrich Cc: Matthew Brost Cc: Philipp Stanner --- drivers/gpu/drm/scheduler/sched_main.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 9f2b40cc2b4c5e..d370a59e8f31e2 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -863,13 +863,12 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched) * drm_sched_get_finished_job - fetch the next finished job to be destroyed * * @sched: scheduler instance - * @have_more: are there more finished jobs on the list * * Returns the next finished job from the pending list (if there is one) * ready for it to be destroyed. */ static struct drm_sched_job * -drm_sched_get_finished_job(struct drm_gpu_scheduler *sched, bool *have_more) +drm_sched_get_finished_job(struct drm_gpu_scheduler *sched) { struct drm_sched_job *job, *next; @@ -884,7 +883,6 @@ drm_sched_get_finished_job(struct drm_gpu_scheduler *sched, bool *have_more) /* cancel this job's TO timer */ cancel_delayed_work(&sched->work_tdr); - *have_more = false; next = list_first_entry_or_null(&sched->pending_list, typeof(*next), list); if (next) { @@ -894,10 +892,6 @@ drm_sched_get_finished_job(struct drm_gpu_scheduler *sched, bool *have_more) next->s_fence->scheduled.timestamp = dma_fence_timestamp(&job->s_fence->finished); - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &next->s_fence->finished.flags)) - *have_more = true; - /* start TO timer for next job */ drm_sched_start_timeout(sched); } @@ -956,14 +950,9 @@ static void drm_sched_free_job_work(struct work_struct *w) struct drm_gpu_scheduler *sched = container_of(w, struct drm_gpu_scheduler, work_free_job); struct drm_sched_job *job; - bool have_more; - job = drm_sched_get_finished_job(sched, &have_more); - if (job) { + while ((job = drm_sched_get_finished_job(sched))) sched->ops->free_job(job); - if (have_more) - __drm_sched_run_free_queue(sched); - } drm_sched_run_job_queue(sched); } From 97199c120e6bbcabc8124162a54f77a5d65bad9b Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 23 Jun 2025 13:27:41 +0100 Subject: [PATCH 2919/2924] drm/sched: Account entity GPU time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To implement fair scheduling we need a view into the GPU time consumed by entities. Problem we have is that jobs and entities objects have decoupled lifetimes, where at the point we have a view into accurate GPU time, we cannot link back to the entity any longer. Solve this by adding a light weight entity stats object which is reference counted by both entity and the job and hence can safely be used from either side. With that, the only other thing we need is to add a helper for adding the job's GPU time into the respective entity stats object, and call it once the accurate GPU time has been calculated. Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Danilo Krummrich Cc: Matthew Brost Cc: Philipp Stanner --- drivers/gpu/drm/scheduler/sched_entity.c | 29 ++++++++++++++++ drivers/gpu/drm/scheduler/sched_internal.h | 40 ++++++++++++++++++++++ drivers/gpu/drm/scheduler/sched_main.c | 6 +++- include/drm/gpu_scheduler.h | 5 +++ 4 files changed, 79 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 5de39cfc7887e9..996181f19e8d67 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -32,6 +32,29 @@ #include "gpu_scheduler_trace.h" + +void drm_sched_entity_stats_release(struct kref *kref) +{ + struct drm_sched_entity_stats *stats = + container_of(kref, typeof(*stats), kref); + + kfree(stats); +} + +static struct drm_sched_entity_stats *drm_sched_entity_stats_alloc(void) +{ + struct drm_sched_entity_stats *stats; + + stats = kzalloc(sizeof(*stats), GFP_KERNEL); + if (!stats) + return NULL; + + kref_init(&stats->kref); + spin_lock_init(&stats->lock); + + return stats; +} + /** * drm_sched_entity_init - Init a context entity used by scheduler when * submit to HW ring. @@ -65,6 +88,11 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, return -EINVAL; memset(entity, 0, sizeof(struct drm_sched_entity)); + + entity->stats = drm_sched_entity_stats_alloc(); + if (!entity->stats) + return -ENOMEM; + INIT_LIST_HEAD(&entity->list); entity->rq = NULL; entity->guilty = guilty; @@ -338,6 +366,7 @@ void drm_sched_entity_fini(struct drm_sched_entity *entity) dma_fence_put(rcu_dereference_check(entity->last_scheduled, true)); RCU_INIT_POINTER(entity->last_scheduled, NULL); + drm_sched_entity_stats_put(entity->stats); } EXPORT_SYMBOL(drm_sched_entity_fini); diff --git a/drivers/gpu/drm/scheduler/sched_internal.h b/drivers/gpu/drm/scheduler/sched_internal.h index ee13a986b920c7..014416cadb3e77 100644 --- a/drivers/gpu/drm/scheduler/sched_internal.h +++ b/drivers/gpu/drm/scheduler/sched_internal.h @@ -3,6 +3,15 @@ #ifndef _DRM_GPU_SCHEDULER_INTERNAL_H_ #define _DRM_GPU_SCHEDULER_INTERNAL_H_ +#include +#include +#include + +struct drm_sched_entity_stats { + struct kref kref; + spinlock_t lock; + ktime_t runtime; +}; /* Used to choose between FIFO and RR job-scheduling */ extern int drm_sched_policy; @@ -93,4 +102,35 @@ drm_sched_entity_is_ready(struct drm_sched_entity *entity) return true; } +void drm_sched_entity_stats_release(struct kref *kref); + +static inline struct drm_sched_entity_stats * +drm_sched_entity_stats_get(struct drm_sched_entity_stats *stats) +{ + kref_get(&stats->kref); + + return stats; +} + +static inline void +drm_sched_entity_stats_put(struct drm_sched_entity_stats *stats) +{ + kref_put(&stats->kref, drm_sched_entity_stats_release); +} + +static inline void +drm_sched_entity_stats_job_add_gpu_time(struct drm_sched_job *job) +{ + struct drm_sched_entity_stats *stats = job->entity_stats; + struct drm_sched_fence *s_fence = job->s_fence; + ktime_t start, end; + + start = dma_fence_timestamp(&s_fence->scheduled); + end = dma_fence_timestamp(&s_fence->finished); + + spin_lock(&stats->lock); + stats->runtime = ktime_add(stats->runtime, ktime_sub(end, start)); + spin_unlock(&stats->lock); +} + #endif diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index d370a59e8f31e2..2d688fa92601fe 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -614,6 +614,7 @@ void drm_sched_job_arm(struct drm_sched_job *job) job->sched = sched; job->s_priority = entity->priority; + job->entity_stats = drm_sched_entity_stats_get(entity->stats); job->id = atomic64_inc_return(&sched->job_id_count); drm_sched_fence_init(job->s_fence, job->entity); @@ -803,6 +804,7 @@ void drm_sched_job_cleanup(struct drm_sched_job *job) * been called. */ dma_fence_put(&job->s_fence->finished); + drm_sched_entity_stats_put(job->entity_stats); } else { /* The job was aborted before it has been committed to be run; * notably, drm_sched_job_arm() has not been called. @@ -951,8 +953,10 @@ static void drm_sched_free_job_work(struct work_struct *w) container_of(w, struct drm_gpu_scheduler, work_free_job); struct drm_sched_job *job; - while ((job = drm_sched_get_finished_job(sched))) + while ((job = drm_sched_get_finished_job(sched))) { + drm_sched_entity_stats_job_add_gpu_time(job); sched->ops->free_job(job); + } drm_sched_run_job_queue(sched); } diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 2554cc17bae5ba..5eaf4c5fa67712 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -71,6 +71,8 @@ enum drm_sched_priority { DRM_SCHED_PRIORITY_COUNT }; +struct drm_sched_entity_stats; + /** * struct drm_sched_entity - A wrapper around a job queue (typically * attached to the DRM file_priv). @@ -109,6 +111,8 @@ struct drm_sched_entity { */ struct drm_sched_rq *rq; + struct drm_sched_entity_stats *stats; + /** * @sched_list: * @@ -351,6 +355,7 @@ struct drm_sched_job { struct drm_sched_fence *s_fence; struct drm_sched_entity *entity; + struct drm_sched_entity_stats *entity_stats; enum drm_sched_priority s_priority; u32 credits; From fc5fb79f1f8fdcffa5df6b1bf04fdd21309d0396 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 23 Jun 2025 13:27:42 +0100 Subject: [PATCH 2920/2924] drm/sched: Remove idle entity from tree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no need to keep entities with no jobs in the tree so lets remove it once the last job is consumed. This keeps the tree smaller which is nicer and more efficient as entities are removed and re-added on every popped job. Apart from that, the upcoming fair scheduling algorithm will rely on the tree only containing runnable entities. Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Danilo Krummrich Cc: Matthew Brost Cc: Philipp Stanner --- drivers/gpu/drm/scheduler/sched_rq.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_rq.c b/drivers/gpu/drm/scheduler/sched_rq.c index d477a027feb92f..2cde89cf25fbae 100644 --- a/drivers/gpu/drm/scheduler/sched_rq.c +++ b/drivers/gpu/drm/scheduler/sched_rq.c @@ -149,25 +149,27 @@ void drm_sched_rq_pop_entity(struct drm_sched_entity *entity) { struct drm_sched_job *next_job; struct drm_sched_rq *rq; - ktime_t ts; /* * Update the entity's location in the min heap according to * the timestamp of the next job, if any. */ - next_job = drm_sched_entity_queue_peek(entity); - if (!next_job) - return; - - if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) - ts = next_job->submit_ts; - else - ts = drm_sched_rq_get_rr_deadline(rq); - spin_lock(&entity->lock); rq = entity->rq; spin_lock(&rq->lock); - drm_sched_rq_update_fifo_locked(entity, rq, ts); + next_job = drm_sched_entity_queue_peek(entity); + if (next_job) { + ktime_t ts; + + if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) + ts = next_job->submit_ts; + else + ts = drm_sched_rq_get_rr_deadline(rq); + + drm_sched_rq_update_fifo_locked(entity, rq, ts); + } else { + drm_sched_rq_remove_fifo_locked(entity, rq); + } spin_unlock(&rq->lock); spin_unlock(&entity->lock); } From 499743796ee34043a9e2604df84d3fda4f74d154 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 23 Jun 2025 13:27:43 +0100 Subject: [PATCH 2921/2924] drm/sched: Add fair scheduling policy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fair scheduling policy is built upon the same concepts as the well known CFS kernel scheduler - entity run queue is sorted by the virtual GPU time consumed by entities in a way that the entity with least vruntime runs first. It is able to avoid total priority starvation, which is one of the problems with FIFO, and it also eliminates the need for per priority run queues. As it scales the actual GPU runtime by an exponential factor as the priority decreases, therefore the virtual runtime for low priority entities grows faster than for normal priority, pushing them further down the runqueue order for the same real GPU time spent. Apart from this fundamental fairness, fair policy is especially strong in oversubscription workloads where it is able to give more GPU time to short and bursty workloads when they are running in parallel with GPU heavy clients submitting deep job queues. Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Danilo Krummrich Cc: Matthew Brost Cc: Philipp Stanner Cc: Pierre-Eric Pelloux-Prayer --- drivers/gpu/drm/scheduler/sched_entity.c | 28 +++++---- drivers/gpu/drm/scheduler/sched_internal.h | 70 +++++++++++++++++++++- drivers/gpu/drm/scheduler/sched_main.c | 14 +++-- drivers/gpu/drm/scheduler/sched_rq.c | 35 ++++++++++- include/drm/gpu_scheduler.h | 3 + 5 files changed, 131 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 996181f19e8d67..52e3802914f103 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -98,6 +98,8 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, entity->guilty = guilty; entity->num_sched_list = num_sched_list; entity->priority = priority; + entity->rq_priority = drm_sched_policy == DRM_SCHED_POLICY_FAIR ? + DRM_SCHED_PRIORITY_KERNEL : priority; /* * It's perfectly valid to initialize an entity without having a valid * scheduler attached. It's just not valid to use the scheduler before it @@ -114,17 +116,23 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, */ pr_warn("%s: called with uninitialized scheduler\n", __func__); } else if (num_sched_list) { - /* The "priority" of an entity cannot exceed the number of run-queues of a - * scheduler. Protect against num_rqs being 0, by converting to signed. Choose - * the lowest priority available. + enum drm_sched_priority p = entity->priority; + + /* + * The "priority" of an entity cannot exceed the number of + * run-queues of a scheduler. Protect against num_rqs being 0, + * by converting to signed. Choose the lowest priority + * available. */ - if (entity->priority >= sched_list[0]->num_rqs) { - dev_err(sched_list[0]->dev, "entity has out-of-bounds priority: %u. num_rqs: %u\n", - entity->priority, sched_list[0]->num_rqs); - entity->priority = max_t(s32, (s32) sched_list[0]->num_rqs - 1, - (s32) DRM_SCHED_PRIORITY_KERNEL); + if (p >= sched_list[0]->num_user_rqs) { + dev_err(sched_list[0]->dev, "entity with out-of-bounds priority:%u num_user_rqs:%u\n", + p, sched_list[0]->num_user_rqs); + p = max_t(s32, + (s32)sched_list[0]->num_user_rqs - 1, + (s32)DRM_SCHED_PRIORITY_KERNEL); + entity->priority = p; } - entity->rq = sched_list[0]->sched_rq[entity->priority]; + entity->rq = sched_list[0]->sched_rq[entity->rq_priority]; } init_completion(&entity->entity_idle); @@ -571,7 +579,7 @@ void drm_sched_entity_select_rq(struct drm_sched_entity *entity) spin_lock(&entity->lock); sched = drm_sched_pick_best(entity->sched_list, entity->num_sched_list); - rq = sched ? sched->sched_rq[entity->priority] : NULL; + rq = sched ? sched->sched_rq[entity->rq_priority] : NULL; if (rq != entity->rq) { drm_sched_rq_remove_entity(entity->rq, entity); entity->rq = rq; diff --git a/drivers/gpu/drm/scheduler/sched_internal.h b/drivers/gpu/drm/scheduler/sched_internal.h index 014416cadb3e77..79dee95a8ca931 100644 --- a/drivers/gpu/drm/scheduler/sched_internal.h +++ b/drivers/gpu/drm/scheduler/sched_internal.h @@ -11,13 +11,16 @@ struct drm_sched_entity_stats { struct kref kref; spinlock_t lock; ktime_t runtime; + ktime_t prev_runtime; + u64 vruntime; }; /* Used to choose between FIFO and RR job-scheduling */ extern int drm_sched_policy; -#define DRM_SCHED_POLICY_RR 0 -#define DRM_SCHED_POLICY_FIFO 1 +#define DRM_SCHED_POLICY_RR 0 +#define DRM_SCHED_POLICY_FIFO 1 +#define DRM_SCHED_POLICY_FAIR 2 bool drm_sched_can_queue(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity); @@ -133,4 +136,67 @@ drm_sched_entity_stats_job_add_gpu_time(struct drm_sched_job *job) spin_unlock(&stats->lock); } +static inline void +drm_sched_entity_save_vruntime(struct drm_sched_entity *entity, + ktime_t min_vruntime) +{ + struct drm_sched_entity_stats *stats = entity->stats; + ktime_t vruntime; + + spin_lock(&stats->lock); + vruntime = stats->vruntime; + if (min_vruntime && vruntime > min_vruntime) + vruntime = ktime_sub(vruntime, min_vruntime); + else + vruntime = 0; + stats->vruntime = vruntime; + spin_unlock(&stats->lock); +} + +static inline ktime_t +drm_sched_entity_restore_vruntime(struct drm_sched_entity *entity, + ktime_t min_vruntime) +{ + struct drm_sched_entity_stats *stats = entity->stats; + ktime_t vruntime; + + spin_lock(&stats->lock); + vruntime = ktime_add(min_vruntime, stats->vruntime); + stats->vruntime = vruntime; + spin_unlock(&stats->lock); + + return vruntime; +} + +static inline ktime_t +drm_sched_entity_update_vruntime(struct drm_sched_entity *entity) +{ + static const unsigned int shift[] = { + [DRM_SCHED_PRIORITY_KERNEL] = 1, + [DRM_SCHED_PRIORITY_HIGH] = 2, + [DRM_SCHED_PRIORITY_NORMAL] = 4, + [DRM_SCHED_PRIORITY_LOW] = 7, + }; + struct drm_sched_entity_stats *stats = entity->stats; + ktime_t runtime, prev; + + spin_lock(&stats->lock); + prev = stats->prev_runtime; + runtime = stats->runtime; + stats->prev_runtime = runtime; + runtime = ktime_add_ns(stats->vruntime, + ktime_to_ns(ktime_sub(runtime, prev)) << + shift[entity->priority]); + stats->vruntime = runtime; + spin_unlock(&stats->lock); + + return runtime; +} + +static inline ktime_t +drm_sched_entity_get_job_ts(struct drm_sched_entity *entity) +{ + return drm_sched_entity_update_vruntime(entity); +} + #endif diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 2d688fa92601fe..1741281fd1f2c0 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -83,13 +83,13 @@ #define CREATE_TRACE_POINTS #include "gpu_scheduler_trace.h" -int drm_sched_policy = DRM_SCHED_POLICY_FIFO; +int drm_sched_policy = DRM_SCHED_POLICY_FAIR; /** * DOC: sched_policy (int) * Used to override default entities scheduling policy in a run queue. */ -MODULE_PARM_DESC(sched_policy, "Specify the scheduling policy for entities on a run-queue, " __stringify(DRM_SCHED_POLICY_RR) " = Round Robin, " __stringify(DRM_SCHED_POLICY_FIFO) " = FIFO (default)."); +MODULE_PARM_DESC(sched_policy, "Specify the scheduling policy for entities on a run-queue, " __stringify(DRM_SCHED_POLICY_RR) " = Round Robin, " __stringify(DRM_SCHED_POLICY_FIFO) " = FIFO, " __stringify(DRM_SCHED_POLICY_FAIR) " = Fair (default)."); module_param_named(sched_policy, drm_sched_policy, int, 0444); static u32 drm_sched_available_credits(struct drm_gpu_scheduler *sched) @@ -1086,11 +1086,15 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_init_ sched->own_submit_wq = true; } - sched->sched_rq = kmalloc_array(args->num_rqs, sizeof(*sched->sched_rq), + sched->num_user_rqs = args->num_rqs; + sched->num_rqs = drm_sched_policy != DRM_SCHED_POLICY_FAIR ? + args->num_rqs : 1; + sched->sched_rq = kmalloc_array(sched->num_rqs, + sizeof(*sched->sched_rq), GFP_KERNEL | __GFP_ZERO); if (!sched->sched_rq) goto Out_check_own; - sched->num_rqs = args->num_rqs; + for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { sched->sched_rq[i] = kzalloc(sizeof(*sched->sched_rq[i]), GFP_KERNEL); if (!sched->sched_rq[i]) @@ -1205,7 +1209,7 @@ void drm_sched_increase_karma(struct drm_sched_job *bad) if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) { atomic_inc(&bad->karma); - for (i = DRM_SCHED_PRIORITY_HIGH; i < sched->num_rqs; i++) { + for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { struct drm_sched_rq *rq = sched->sched_rq[i]; spin_lock(&rq->lock); diff --git a/drivers/gpu/drm/scheduler/sched_rq.c b/drivers/gpu/drm/scheduler/sched_rq.c index 2cde89cf25fbae..fd1cf89911e67d 100644 --- a/drivers/gpu/drm/scheduler/sched_rq.c +++ b/drivers/gpu/drm/scheduler/sched_rq.c @@ -75,6 +75,23 @@ drm_sched_rq_get_rr_deadline(struct drm_sched_rq *rq) return rq->rr_deadline; } +static ktime_t +drm_sched_rq_get_min_vruntime(struct drm_sched_rq *rq) +{ + struct drm_sched_entity *entity; + struct rb_node *rb; + + lockdep_assert_held(&rq->lock); + + for (rb = rb_first_cached(&rq->rb_tree_root); rb; rb = rb_next(rb)) { + entity = rb_entry(rb, typeof(*entity), rb_tree_node); + + return entity->stats->vruntime; /* Unlocked read */ + } + + return 0; +} + /** * drm_sched_rq_add_entity - add an entity * @@ -109,8 +126,13 @@ drm_sched_rq_add_entity(struct drm_sched_entity *entity, ktime_t ts) list_add_tail(&entity->list, &rq->entities); } - if (drm_sched_policy == DRM_SCHED_POLICY_RR) + if (drm_sched_policy == DRM_SCHED_POLICY_FAIR) { + ts = drm_sched_rq_get_min_vruntime(rq); + ts = drm_sched_entity_restore_vruntime(entity, ts); + } else if (drm_sched_policy == DRM_SCHED_POLICY_RR) { ts = drm_sched_rq_get_rr_deadline(rq); + } + drm_sched_rq_update_fifo_locked(entity, rq, ts); spin_unlock(&rq->lock); @@ -161,7 +183,9 @@ void drm_sched_rq_pop_entity(struct drm_sched_entity *entity) if (next_job) { ktime_t ts; - if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) + if (drm_sched_policy == DRM_SCHED_POLICY_FAIR) + ts = drm_sched_entity_get_job_ts(entity); + else if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) ts = next_job->submit_ts; else ts = drm_sched_rq_get_rr_deadline(rq); @@ -169,6 +193,13 @@ void drm_sched_rq_pop_entity(struct drm_sched_entity *entity) drm_sched_rq_update_fifo_locked(entity, rq, ts); } else { drm_sched_rq_remove_fifo_locked(entity, rq); + + if (drm_sched_policy == DRM_SCHED_POLICY_FAIR) { + ktime_t min_vruntime; + + min_vruntime = drm_sched_rq_get_min_vruntime(rq); + drm_sched_entity_save_vruntime(entity, min_vruntime); + } } spin_unlock(&rq->lock); spin_unlock(&entity->lock); diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 5eaf4c5fa67712..01ad4055d7ee3a 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -151,6 +151,8 @@ struct drm_sched_entity { */ struct spsc_queue job_queue; + enum drm_sched_priority rq_priority; + /** * @fence_seq: * @@ -514,6 +516,7 @@ struct drm_gpu_scheduler { long timeout; const char *name; u32 num_rqs; + u32 num_user_rqs; struct drm_sched_rq **sched_rq; wait_queue_head_t job_scheduled; atomic64_t job_id_count; From 00820c265320eb1aa0119001f1e5943189b310c7 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 23 Jun 2025 13:27:44 +0100 Subject: [PATCH 2922/2924] drm/sched: Remove FIFO and RR and simplify to a single run queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the new fair policy is at least as good as FIFO and we can afford to remove round-robin, we can simplify the scheduler code by making the scheduler to run queue relationship always 1:1 and remove some code. Also, now that the FIFO policy is gone the tree of entities is not a FIFO tree any more so rename it to just the tree. Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Danilo Krummrich Cc: Matthew Brost Cc: Philipp Stanner --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 25 ++-- drivers/gpu/drm/scheduler/sched_entity.c | 29 +---- drivers/gpu/drm/scheduler/sched_internal.h | 9 +- drivers/gpu/drm/scheduler/sched_main.c | 137 +++++---------------- drivers/gpu/drm/scheduler/sched_rq.c | 53 +++----- include/drm/gpu_scheduler.h | 17 +-- 6 files changed, 66 insertions(+), 204 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index acb21fc8b3ce5d..9440af58073b78 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -459,25 +459,22 @@ drm_sched_entity_queue_pop(struct drm_sched_entity *entity) void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched) { + struct drm_sched_rq *rq = sched->rq; + struct drm_sched_entity *s_entity; struct drm_sched_job *s_job; - struct drm_sched_entity *s_entity = NULL; - int i; /* Signal all jobs not yet scheduled */ - for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { - struct drm_sched_rq *rq = sched->sched_rq[i]; - spin_lock(&rq->lock); - list_for_each_entry(s_entity, &rq->entities, list) { - while ((s_job = drm_sched_entity_queue_pop(s_entity))) { - struct drm_sched_fence *s_fence = s_job->s_fence; - - dma_fence_signal(&s_fence->scheduled); - dma_fence_set_error(&s_fence->finished, -EHWPOISON); - dma_fence_signal(&s_fence->finished); - } + spin_lock(&rq->lock); + list_for_each_entry(s_entity, &rq->entities, list) { + while ((s_job = drm_sched_entity_queue_pop(s_entity))) { + struct drm_sched_fence *s_fence = s_job->s_fence; + + dma_fence_signal(&s_fence->scheduled); + dma_fence_set_error(&s_fence->finished, -EHWPOISON); + dma_fence_signal(&s_fence->finished); } - spin_unlock(&rq->lock); } + spin_unlock(&rq->lock); /* Signal all jobs already scheduled to HW */ list_for_each_entry(s_job, &sched->pending_list, list) { diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 52e3802914f103..9638a323e57a35 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -98,8 +98,6 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, entity->guilty = guilty; entity->num_sched_list = num_sched_list; entity->priority = priority; - entity->rq_priority = drm_sched_policy == DRM_SCHED_POLICY_FAIR ? - DRM_SCHED_PRIORITY_KERNEL : priority; /* * It's perfectly valid to initialize an entity without having a valid * scheduler attached. It's just not valid to use the scheduler before it @@ -109,30 +107,14 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, RCU_INIT_POINTER(entity->last_scheduled, NULL); RB_CLEAR_NODE(&entity->rb_tree_node); - if (num_sched_list && !sched_list[0]->sched_rq) { + if (num_sched_list && !sched_list[0]->rq) { /* Since every entry covered by num_sched_list * should be non-NULL and therefore we warn drivers * not to do this and to fix their DRM calling order. */ pr_warn("%s: called with uninitialized scheduler\n", __func__); } else if (num_sched_list) { - enum drm_sched_priority p = entity->priority; - - /* - * The "priority" of an entity cannot exceed the number of - * run-queues of a scheduler. Protect against num_rqs being 0, - * by converting to signed. Choose the lowest priority - * available. - */ - if (p >= sched_list[0]->num_user_rqs) { - dev_err(sched_list[0]->dev, "entity with out-of-bounds priority:%u num_user_rqs:%u\n", - p, sched_list[0]->num_user_rqs); - p = max_t(s32, - (s32)sched_list[0]->num_user_rqs - 1, - (s32)DRM_SCHED_PRIORITY_KERNEL); - entity->priority = p; - } - entity->rq = sched_list[0]->sched_rq[entity->rq_priority]; + entity->rq = sched_list[0]->rq; } init_completion(&entity->entity_idle); @@ -579,7 +561,7 @@ void drm_sched_entity_select_rq(struct drm_sched_entity *entity) spin_lock(&entity->lock); sched = drm_sched_pick_best(entity->sched_list, entity->num_sched_list); - rq = sched ? sched->sched_rq[entity->rq_priority] : NULL; + rq = sched ? sched->rq : NULL; if (rq != entity->rq) { drm_sched_rq_remove_entity(entity->rq, entity); entity->rq = rq; @@ -603,7 +585,6 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) { struct drm_sched_entity *entity = sched_job->entity; bool first; - ktime_t submit_ts; trace_drm_sched_job(sched_job, entity); atomic_inc(entity->rq->sched->score); @@ -612,16 +593,14 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) /* * After the sched_job is pushed into the entity queue, it may be * completed and freed up at any time. We can no longer access it. - * Make sure to set the submit_ts first, to avoid a race. */ - sched_job->submit_ts = submit_ts = ktime_get(); first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node); /* first job wakes up scheduler */ if (first) { struct drm_gpu_scheduler *sched; - sched = drm_sched_rq_add_entity(entity, submit_ts); + sched = drm_sched_rq_add_entity(entity); if (sched) drm_sched_wakeup(sched); } diff --git a/drivers/gpu/drm/scheduler/sched_internal.h b/drivers/gpu/drm/scheduler/sched_internal.h index 79dee95a8ca931..108b203484fb52 100644 --- a/drivers/gpu/drm/scheduler/sched_internal.h +++ b/drivers/gpu/drm/scheduler/sched_internal.h @@ -15,13 +15,6 @@ struct drm_sched_entity_stats { u64 vruntime; }; -/* Used to choose between FIFO and RR job-scheduling */ -extern int drm_sched_policy; - -#define DRM_SCHED_POLICY_RR 0 -#define DRM_SCHED_POLICY_FIFO 1 -#define DRM_SCHED_POLICY_FAIR 2 - bool drm_sched_can_queue(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity); void drm_sched_wakeup(struct drm_gpu_scheduler *sched); @@ -32,7 +25,7 @@ struct drm_sched_entity * drm_sched_rq_select_entity(struct drm_gpu_scheduler *sched, struct drm_sched_rq *rq); struct drm_gpu_scheduler * -drm_sched_rq_add_entity(struct drm_sched_entity *entity, ktime_t ts); +drm_sched_rq_add_entity(struct drm_sched_entity *entity); void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, struct drm_sched_entity *entity); void drm_sched_rq_pop_entity(struct drm_sched_entity *entity); diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 1741281fd1f2c0..01b6290289ecde 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -83,15 +83,6 @@ #define CREATE_TRACE_POINTS #include "gpu_scheduler_trace.h" -int drm_sched_policy = DRM_SCHED_POLICY_FAIR; - -/** - * DOC: sched_policy (int) - * Used to override default entities scheduling policy in a run queue. - */ -MODULE_PARM_DESC(sched_policy, "Specify the scheduling policy for entities on a run-queue, " __stringify(DRM_SCHED_POLICY_RR) " = Round Robin, " __stringify(DRM_SCHED_POLICY_FIFO) " = FIFO, " __stringify(DRM_SCHED_POLICY_FAIR) " = Fair (default)."); -module_param_named(sched_policy, drm_sched_policy, int, 0444); - static u32 drm_sched_available_credits(struct drm_gpu_scheduler *sched) { u32 credits; @@ -833,34 +824,6 @@ void drm_sched_wakeup(struct drm_gpu_scheduler *sched) drm_sched_run_job_queue(sched); } -/** - * drm_sched_select_entity - Select next entity to process - * - * @sched: scheduler instance - * - * Return an entity to process or NULL if none are found. - * - * Note, that we break out of the for-loop when "entity" is non-null, which can - * also be an error-pointer--this assures we don't process lower priority - * run-queues. See comments in the respectively called functions. - */ -static struct drm_sched_entity * -drm_sched_select_entity(struct drm_gpu_scheduler *sched) -{ - struct drm_sched_entity *entity = NULL; - int i; - - /* Start with the highest priority. - */ - for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { - entity = drm_sched_rq_select_entity(sched, sched->sched_rq[i]); - if (entity) - break; - } - - return IS_ERR(entity) ? NULL : entity; -} - /** * drm_sched_get_finished_job - fetch the next finished job to be destroyed * @@ -977,8 +940,8 @@ static void drm_sched_run_job_work(struct work_struct *w) int r; /* Find entity with a ready job */ - entity = drm_sched_select_entity(sched); - if (!entity) + entity = drm_sched_rq_select_entity(sched, sched->rq); + if (IS_ERR_OR_NULL(entity)) return; /* No more work */ sched_job = drm_sched_entity_pop_job(entity); @@ -1049,8 +1012,6 @@ static struct workqueue_struct *drm_sched_alloc_wq(const char *name) */ int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_init_args *args) { - int i; - sched->ops = args->ops; sched->credit_limit = args->credit_limit; sched->name = args->name; @@ -1060,13 +1021,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_init_ sched->score = args->score ? args->score : &sched->_score; sched->dev = args->dev; - if (args->num_rqs > DRM_SCHED_PRIORITY_COUNT) { - /* This is a gross violation--tell drivers what the problem is. - */ - dev_err(sched->dev, "%s: num_rqs cannot be greater than DRM_SCHED_PRIORITY_COUNT\n", - __func__); - return -EINVAL; - } else if (sched->sched_rq) { + if (sched->rq) { /* Not an error, but warn anyway so drivers can * fine-tune their DRM calling order, and return all * is good. @@ -1086,21 +1041,11 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_init_ sched->own_submit_wq = true; } - sched->num_user_rqs = args->num_rqs; - sched->num_rqs = drm_sched_policy != DRM_SCHED_POLICY_FAIR ? - args->num_rqs : 1; - sched->sched_rq = kmalloc_array(sched->num_rqs, - sizeof(*sched->sched_rq), - GFP_KERNEL | __GFP_ZERO); - if (!sched->sched_rq) + sched->rq = kmalloc(sizeof(*sched->rq), GFP_KERNEL | __GFP_ZERO); + if (!sched->rq) goto Out_check_own; - for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { - sched->sched_rq[i] = kzalloc(sizeof(*sched->sched_rq[i]), GFP_KERNEL); - if (!sched->sched_rq[i]) - goto Out_unroll; - drm_sched_rq_init(sched, sched->sched_rq[i]); - } + drm_sched_rq_init(sched, sched->rq); init_waitqueue_head(&sched->job_scheduled); INIT_LIST_HEAD(&sched->pending_list); @@ -1115,12 +1060,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_init_ sched->ready = true; return 0; -Out_unroll: - for (--i ; i >= DRM_SCHED_PRIORITY_KERNEL; i--) - kfree(sched->sched_rq[i]); - kfree(sched->sched_rq); - sched->sched_rq = NULL; Out_check_own: if (sched->own_submit_wq) destroy_workqueue(sched->submit_wq); @@ -1152,25 +1092,21 @@ EXPORT_SYMBOL(drm_sched_init); */ void drm_sched_fini(struct drm_gpu_scheduler *sched) { + + struct drm_sched_rq *rq = sched->rq; struct drm_sched_entity *s_entity; - int i; drm_sched_wqueue_stop(sched); - for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { - struct drm_sched_rq *rq = sched->sched_rq[i]; - - spin_lock(&rq->lock); - list_for_each_entry(s_entity, &rq->entities, list) - /* - * Prevents reinsertion and marks job_queue as idle, - * it will be removed from the rq in drm_sched_entity_fini() - * eventually - */ - s_entity->stopped = true; - spin_unlock(&rq->lock); - kfree(sched->sched_rq[i]); - } + spin_lock(&rq->lock); + list_for_each_entry(s_entity, &rq->entities, list) + /* + * Prevents reinsertion and marks job_queue as idle, + * it will be removed from the rq in drm_sched_entity_fini() + * eventually + */ + s_entity->stopped = true; + spin_unlock(&rq->lock); /* Wakeup everyone stuck in drm_sched_entity_flush for this scheduler */ wake_up_all(&sched->job_scheduled); @@ -1181,8 +1117,8 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched) if (sched->own_submit_wq) destroy_workqueue(sched->submit_wq); sched->ready = false; - kfree(sched->sched_rq); - sched->sched_rq = NULL; + kfree(sched->rq); + sched->rq = NULL; } EXPORT_SYMBOL(drm_sched_fini); @@ -1197,35 +1133,28 @@ EXPORT_SYMBOL(drm_sched_fini); */ void drm_sched_increase_karma(struct drm_sched_job *bad) { - int i; - struct drm_sched_entity *tmp; - struct drm_sched_entity *entity; struct drm_gpu_scheduler *sched = bad->sched; + struct drm_sched_entity *entity, *tmp; + struct drm_sched_rq *rq = sched->rq; /* don't change @bad's karma if it's from KERNEL RQ, * because sometimes GPU hang would cause kernel jobs (like VM updating jobs) * corrupt but keep in mind that kernel jobs always considered good. */ - if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) { - atomic_inc(&bad->karma); - - for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { - struct drm_sched_rq *rq = sched->sched_rq[i]; - - spin_lock(&rq->lock); - list_for_each_entry_safe(entity, tmp, &rq->entities, list) { - if (bad->s_fence->scheduled.context == - entity->fence_context) { - if (entity->guilty) - atomic_set(entity->guilty, 1); - break; - } - } - spin_unlock(&rq->lock); - if (&entity->list != &rq->entities) - break; + if (bad->s_priority == DRM_SCHED_PRIORITY_KERNEL) + return; + + atomic_inc(&bad->karma); + + spin_lock(&rq->lock); + list_for_each_entry_safe(entity, tmp, &rq->entities, list) { + if (bad->s_fence->scheduled.context == entity->fence_context) { + if (entity->guilty) + atomic_set(entity->guilty, 1); + break; } } + spin_unlock(&rq->lock); } EXPORT_SYMBOL(drm_sched_increase_karma); diff --git a/drivers/gpu/drm/scheduler/sched_rq.c b/drivers/gpu/drm/scheduler/sched_rq.c index fd1cf89911e67d..69ccdbd5af6c60 100644 --- a/drivers/gpu/drm/scheduler/sched_rq.c +++ b/drivers/gpu/drm/scheduler/sched_rq.c @@ -16,7 +16,7 @@ drm_sched_entity_compare_before(struct rb_node *a, const struct rb_node *b) return ktime_before(ea->oldest_job_waiting, eb->oldest_job_waiting); } -static void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity *entity, +static void drm_sched_rq_remove_tree_locked(struct drm_sched_entity *entity, struct drm_sched_rq *rq) { lockdep_assert_held(&entity->lock); @@ -28,7 +28,7 @@ static void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity *entity, } } -static void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, +static void drm_sched_rq_update_tree_locked(struct drm_sched_entity *entity, struct drm_sched_rq *rq, ktime_t ts) { @@ -40,7 +40,7 @@ static void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, lockdep_assert_held(&entity->lock); lockdep_assert_held(&rq->lock); - drm_sched_rq_remove_fifo_locked(entity, rq); + drm_sched_rq_remove_tree_locked(entity, rq); entity->oldest_job_waiting = ts; @@ -65,16 +65,6 @@ void drm_sched_rq_init(struct drm_gpu_scheduler *sched, rq->sched = sched; } -static ktime_t -drm_sched_rq_get_rr_deadline(struct drm_sched_rq *rq) -{ - lockdep_assert_held(&rq->lock); - - rq->rr_deadline = ktime_add_ns(rq->rr_deadline, 1); - - return rq->rr_deadline; -} - static ktime_t drm_sched_rq_get_min_vruntime(struct drm_sched_rq *rq) { @@ -103,10 +93,11 @@ drm_sched_rq_get_min_vruntime(struct drm_sched_rq *rq) * Returns a DRM scheduler pre-selected to handle this entity. */ struct drm_gpu_scheduler * -drm_sched_rq_add_entity(struct drm_sched_entity *entity, ktime_t ts) +drm_sched_rq_add_entity(struct drm_sched_entity *entity) { struct drm_gpu_scheduler *sched; struct drm_sched_rq *rq; + ktime_t ts; /* Add the entity to the run queue */ spin_lock(&entity->lock); @@ -126,14 +117,9 @@ drm_sched_rq_add_entity(struct drm_sched_entity *entity, ktime_t ts) list_add_tail(&entity->list, &rq->entities); } - if (drm_sched_policy == DRM_SCHED_POLICY_FAIR) { - ts = drm_sched_rq_get_min_vruntime(rq); - ts = drm_sched_entity_restore_vruntime(entity, ts); - } else if (drm_sched_policy == DRM_SCHED_POLICY_RR) { - ts = drm_sched_rq_get_rr_deadline(rq); - } - - drm_sched_rq_update_fifo_locked(entity, rq, ts); + ts = drm_sched_rq_get_min_vruntime(rq); + ts = drm_sched_entity_restore_vruntime(entity, ts); + drm_sched_rq_update_tree_locked(entity, rq, ts); spin_unlock(&rq->lock); spin_unlock(&entity->lock); @@ -162,7 +148,7 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, atomic_dec(rq->sched->score); list_del_init(&entity->list); - drm_sched_rq_remove_fifo_locked(entity, rq); + drm_sched_rq_remove_tree_locked(entity, rq); spin_unlock(&rq->lock); } @@ -183,23 +169,14 @@ void drm_sched_rq_pop_entity(struct drm_sched_entity *entity) if (next_job) { ktime_t ts; - if (drm_sched_policy == DRM_SCHED_POLICY_FAIR) - ts = drm_sched_entity_get_job_ts(entity); - else if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) - ts = next_job->submit_ts; - else - ts = drm_sched_rq_get_rr_deadline(rq); - - drm_sched_rq_update_fifo_locked(entity, rq, ts); + ts = drm_sched_entity_get_job_ts(entity); + drm_sched_rq_update_tree_locked(entity, rq, ts); } else { - drm_sched_rq_remove_fifo_locked(entity, rq); - - if (drm_sched_policy == DRM_SCHED_POLICY_FAIR) { - ktime_t min_vruntime; + ktime_t min_vruntime; - min_vruntime = drm_sched_rq_get_min_vruntime(rq); - drm_sched_entity_save_vruntime(entity, min_vruntime); - } + drm_sched_rq_remove_tree_locked(entity, rq); + min_vruntime = drm_sched_rq_get_min_vruntime(rq); + drm_sched_entity_save_vruntime(entity, min_vruntime); } spin_unlock(&rq->lock); spin_unlock(&entity->lock); diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 01ad4055d7ee3a..860834ad5371c5 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -151,8 +151,6 @@ struct drm_sched_entity { */ struct spsc_queue job_queue; - enum drm_sched_priority rq_priority; - /** * @fence_seq: * @@ -339,13 +337,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); struct drm_sched_job { u64 id; - /** - * @submit_ts: - * - * When the job was pushed into the entity queue. - */ - ktime_t submit_ts; - /** * @sched: * @@ -482,9 +473,7 @@ struct drm_sched_backend_ops { * @credit_count: the current credit count of this scheduler * @timeout: the time after which a job is removed from the scheduler. * @name: name of the ring for which this scheduler is being used. - * @num_rqs: Number of run-queues. This is at most DRM_SCHED_PRIORITY_COUNT, - * as there's usually one run-queue per priority, but could be less. - * @sched_rq: An allocated array of run-queues of size @num_rqs; + * @rq: Scheduler's run queue. * @job_scheduled: once @drm_sched_entity_do_release is called the scheduler * waits on this wait queue until all the scheduled jobs are * finished. @@ -515,9 +504,7 @@ struct drm_gpu_scheduler { atomic_t credit_count; long timeout; const char *name; - u32 num_rqs; - u32 num_user_rqs; - struct drm_sched_rq **sched_rq; + struct drm_sched_rq *rq; wait_queue_head_t job_scheduled; atomic64_t job_id_count; struct workqueue_struct *submit_wq; From db1ea3ae0ce4cac0d3a5735124cad751fe88bcb9 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 23 Jun 2025 13:27:45 +0100 Subject: [PATCH 2923/2924] drm/sched: Queue all free credits in one worker invocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no reason to queue just a single job if scheduler can take more and re-queue the worker to queue more. We can simply feed the hardware with as much as it can take in one go and hopefully win some latency. Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Danilo Krummrich Cc: Matthew Brost Cc: Philipp Stanner --- drivers/gpu/drm/scheduler/sched_internal.h | 2 - drivers/gpu/drm/scheduler/sched_main.c | 121 ++++++++++----------- drivers/gpu/drm/scheduler/sched_rq.c | 12 +- 3 files changed, 56 insertions(+), 79 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_internal.h b/drivers/gpu/drm/scheduler/sched_internal.h index 108b203484fb52..257e10200c6792 100644 --- a/drivers/gpu/drm/scheduler/sched_internal.h +++ b/drivers/gpu/drm/scheduler/sched_internal.h @@ -15,8 +15,6 @@ struct drm_sched_entity_stats { u64 vruntime; }; -bool drm_sched_can_queue(struct drm_gpu_scheduler *sched, - struct drm_sched_entity *entity); void drm_sched_wakeup(struct drm_gpu_scheduler *sched); void drm_sched_rq_init(struct drm_gpu_scheduler *sched, diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 01b6290289ecde..ca86a7a43ae047 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -94,35 +94,6 @@ static u32 drm_sched_available_credits(struct drm_gpu_scheduler *sched) return credits; } -/** - * drm_sched_can_queue -- Can we queue more to the hardware? - * @sched: scheduler instance - * @entity: the scheduler entity - * - * Return true if we can push at least one more job from @entity, false - * otherwise. - */ -bool drm_sched_can_queue(struct drm_gpu_scheduler *sched, - struct drm_sched_entity *entity) -{ - struct drm_sched_job *s_job; - - s_job = drm_sched_entity_queue_peek(entity); - if (!s_job) - return false; - - /* If a job exceeds the credit limit, truncate it to the credit limit - * itself to guarantee forward progress. - */ - if (s_job->credits > sched->credit_limit) { - dev_WARN(sched->dev, - "Jobs may not exceed the credit limit, truncate.\n"); - s_job->credits = sched->credit_limit; - } - - return drm_sched_available_credits(sched) >= s_job->credits; -} - /** * drm_sched_run_job_queue - enqueue run-job work * @sched: scheduler instance @@ -933,54 +904,72 @@ static void drm_sched_run_job_work(struct work_struct *w) { struct drm_gpu_scheduler *sched = container_of(w, struct drm_gpu_scheduler, work_run_job); + u32 job_credits, submitted_credits = 0; struct drm_sched_entity *entity; - struct dma_fence *fence; struct drm_sched_fence *s_fence; struct drm_sched_job *sched_job; - int r; + struct dma_fence *fence; - /* Find entity with a ready job */ - entity = drm_sched_rq_select_entity(sched, sched->rq); - if (IS_ERR_OR_NULL(entity)) - return; /* No more work */ + while (!READ_ONCE(sched->pause_submit)) { + /* Find entity with a ready job */ + entity = drm_sched_rq_select_entity(sched, sched->rq); + if (!entity) + break; /* No more work */ - sched_job = drm_sched_entity_pop_job(entity); - if (!sched_job) { - complete_all(&entity->entity_idle); - drm_sched_run_job_queue(sched); - return; - } + /* + * If a job exceeds the credit limit truncate it to guarantee + * forward progress. + */ + sched_job = drm_sched_entity_queue_peek(entity); + job_credits = sched_job->credits; + if (dev_WARN_ONCE(sched->dev, job_credits > sched->credit_limit, + "Jobs may not exceed the credit limit, truncating.\n")) + job_credits = sched_job->credits = sched->credit_limit; + + if (job_credits > drm_sched_available_credits(sched)) { + complete_all(&entity->entity_idle); + break; + } - s_fence = sched_job->s_fence; + sched_job = drm_sched_entity_pop_job(entity); + if (!sched_job) { + /* Top entity is not yet runnable after all */ + complete_all(&entity->entity_idle); + continue; + } - atomic_add(sched_job->credits, &sched->credit_count); - drm_sched_job_begin(sched_job); + s_fence = sched_job->s_fence; + drm_sched_job_begin(sched_job); + trace_drm_run_job(sched_job, entity); + submitted_credits += job_credits; + atomic_add(job_credits, &sched->credit_count); - trace_drm_run_job(sched_job, entity); - /* - * The run_job() callback must by definition return a fence whose - * refcount has been incremented for the scheduler already. - */ - fence = sched->ops->run_job(sched_job); - complete_all(&entity->entity_idle); - drm_sched_fence_scheduled(s_fence, fence); - - if (!IS_ERR_OR_NULL(fence)) { - r = dma_fence_add_callback(fence, &sched_job->cb, - drm_sched_job_done_cb); - if (r == -ENOENT) - drm_sched_job_done(sched_job, fence->error); - else if (r) - DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n", r); + fence = sched->ops->run_job(sched_job); + drm_sched_fence_scheduled(s_fence, fence); - dma_fence_put(fence); - } else { - drm_sched_job_done(sched_job, IS_ERR(fence) ? - PTR_ERR(fence) : 0); + if (!IS_ERR_OR_NULL(fence)) { + int r; + + /* Drop for original kref_init of the fence */ + dma_fence_put(fence); + + r = dma_fence_add_callback(fence, &sched_job->cb, + drm_sched_job_done_cb); + if (r == -ENOENT) + drm_sched_job_done(sched_job, fence->error); + else if (r) + DRM_DEV_ERROR(sched->dev, + "fence add callback failed (%d)\n", r); + } else { + drm_sched_job_done(sched_job, IS_ERR(fence) ? + PTR_ERR(fence) : 0); + } + + complete_all(&entity->entity_idle); } - wake_up(&sched->job_scheduled); - drm_sched_run_job_queue(sched); + if (submitted_credits) + wake_up(&sched->job_scheduled); } static struct workqueue_struct *drm_sched_alloc_wq(const char *name) diff --git a/drivers/gpu/drm/scheduler/sched_rq.c b/drivers/gpu/drm/scheduler/sched_rq.c index 69ccdbd5af6c60..b18265c7f07344 100644 --- a/drivers/gpu/drm/scheduler/sched_rq.c +++ b/drivers/gpu/drm/scheduler/sched_rq.c @@ -190,9 +190,7 @@ void drm_sched_rq_pop_entity(struct drm_sched_entity *entity) * * Find oldest waiting ready entity. * - * Return an entity if one is found; return an error-pointer (!NULL) if an - * entity was ready, but the scheduler had insufficient credits to accommodate - * its job; return NULL, if no ready entity was found. + * Return an entity if one is found or NULL if no ready entity was found. */ struct drm_sched_entity * drm_sched_rq_select_entity(struct drm_gpu_scheduler *sched, @@ -206,14 +204,6 @@ drm_sched_rq_select_entity(struct drm_gpu_scheduler *sched, entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node); if (drm_sched_entity_is_ready(entity)) { - /* If we can't queue yet, preserve the current entity in - * terms of fairness. - */ - if (!drm_sched_can_queue(sched, entity)) { - spin_unlock(&rq->lock); - return ERR_PTR(-ENOSPC); - } - reinit_completion(&entity->entity_idle); break; } From 7677fde47a026969c1d7e092a812428e5a58eef8 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 23 Jun 2025 13:27:46 +0100 Subject: [PATCH 2924/2924] drm/sched: Embed run queue singleton into the scheduler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that the run queue to scheduler relationship is always 1:1 we can embed it (the run queue) directly in the scheduler struct and save on some allocation error handling code and such. Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Danilo Krummrich Cc: Matthew Brost Cc: Philipp Stanner --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 6 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 5 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 8 ++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 8 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c | 8 +++--- drivers/gpu/drm/scheduler/sched_entity.c | 32 +++++++++------------ drivers/gpu/drm/scheduler/sched_fence.c | 2 +- drivers/gpu/drm/scheduler/sched_internal.h | 6 ++-- drivers/gpu/drm/scheduler/sched_main.c | 31 ++++---------------- drivers/gpu/drm/scheduler/sched_rq.c | 18 ++++++------ include/drm/gpu_scheduler.h | 5 +--- 12 files changed, 58 insertions(+), 77 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 82df06a72ee025..e18e180bf32c57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1108,7 +1108,8 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (p->gang_size > 1 && !adev->vm_manager.concurrent_flush) { for (i = 0; i < p->gang_size; ++i) { struct drm_sched_entity *entity = p->entities[i]; - struct drm_gpu_scheduler *sched = entity->rq->sched; + struct drm_gpu_scheduler *sched = + container_of(entity->rq, typeof(*sched), rq); struct amdgpu_ring *ring = to_amdgpu_ring(sched); if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub)) @@ -1236,7 +1237,8 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) return r; } - sched = p->gang_leader->base.entity->rq->sched; + sched = container_of(p->gang_leader->base.entity->rq, typeof(*sched), + rq); while ((fence = amdgpu_sync_get_fence(&p->sync))) { struct drm_sched_fence *s_fence = to_drm_sched_fence(fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 9440af58073b78..e3d4f750373884 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -359,7 +359,9 @@ static struct dma_fence * amdgpu_job_prepare_job(struct drm_sched_job *sched_job, struct drm_sched_entity *s_entity) { - struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched); + struct drm_gpu_scheduler *sched = + container_of(s_entity->rq, typeof(*sched), rq); + struct amdgpu_ring *ring = to_amdgpu_ring(sched); struct amdgpu_job *job = to_amdgpu_job(sched_job); struct dma_fence *fence; int r; @@ -459,7 +461,7 @@ drm_sched_entity_queue_pop(struct drm_sched_entity *entity) void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched) { - struct drm_sched_rq *rq = sched->rq; + struct drm_sched_rq *rq = &sched->rq; struct drm_sched_entity *s_entity; struct drm_sched_job *s_job; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index ce6b9ba967fff0..d6872baeba1e34 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -85,7 +85,10 @@ struct amdgpu_job { static inline struct amdgpu_ring *amdgpu_job_ring(struct amdgpu_job *job) { - return to_amdgpu_ring(job->base.entity->rq->sched); + struct drm_gpu_scheduler *sched = + container_of(job->base.entity->rq, typeof(*sched), rq); + + return to_amdgpu_ring(sched); } int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 11dd2e0f797964..197d20a37afb5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -145,6 +145,7 @@ TRACE_EVENT(amdgpu_cs, struct amdgpu_ib *ib), TP_ARGS(p, job, ib), TP_STRUCT__entry( + __field(struct drm_gpu_scheduler *, sched) __field(struct amdgpu_bo_list *, bo_list) __field(u32, ring) __field(u32, dw) @@ -152,11 +153,14 @@ TRACE_EVENT(amdgpu_cs, ), TP_fast_assign( + __entry->sched = container_of(job->base.entity->rq, + typeof(*__entry->sched), + rq); __entry->bo_list = p->bo_list; - __entry->ring = to_amdgpu_ring(job->base.entity->rq->sched)->idx; + __entry->ring = to_amdgpu_ring(__entry->sched)->idx; __entry->dw = ib->length_dw; __entry->fences = amdgpu_fence_count_emitted( - to_amdgpu_ring(job->base.entity->rq->sched)); + to_amdgpu_ring(__entry->sched)); ), TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u", __entry->bo_list, __entry->ring, __entry->dw, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index 46d9fb433ab2a3..42f2bfb30af184 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -105,13 +105,13 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p, static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, struct dma_fence **fence) { + struct drm_gpu_scheduler *sched = + container_of(p->vm->delayed.rq, typeof(*sched), rq); + struct amdgpu_ring *ring = + container_of(sched, struct amdgpu_ring, sched); struct amdgpu_ib *ib = p->job->ibs; - struct amdgpu_ring *ring; struct dma_fence *f; - ring = container_of(p->vm->delayed.rq->sched, struct amdgpu_ring, - sched); - WARN_ON(ib->length_dw == 0); amdgpu_ring_pad_ib(ring, ib); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c index 23b6f7a4aa4a11..ab132dae818371 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c @@ -420,15 +420,15 @@ int amdgpu_xcp_open_device(struct amdgpu_device *adev, void amdgpu_xcp_release_sched(struct amdgpu_device *adev, struct amdgpu_ctx_entity *entity) { - struct drm_gpu_scheduler *sched; - struct amdgpu_ring *ring; + struct drm_gpu_scheduler *sched = + container_of(entity->entity.rq, typeof(*sched), rq); if (!adev->xcp_mgr) return; - sched = entity->entity.rq->sched; if (drm_sched_wqueue_ready(sched)) { - ring = to_amdgpu_ring(entity->entity.rq->sched); + struct amdgpu_ring *ring = to_amdgpu_ring(sched); + atomic_dec(&adev->xcp_mgr->xcp[ring->xcp_id].ref_cnt); } } diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 9638a323e57a35..0b871c611456a7 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -104,19 +104,12 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, * is initialized itself. */ entity->sched_list = num_sched_list > 1 ? sched_list : NULL; + if (num_sched_list) { + entity->sched_list = num_sched_list > 1 ? sched_list : NULL; + entity->rq = &sched_list[0]->rq; + } RCU_INIT_POINTER(entity->last_scheduled, NULL); RB_CLEAR_NODE(&entity->rb_tree_node); - - if (num_sched_list && !sched_list[0]->rq) { - /* Since every entry covered by num_sched_list - * should be non-NULL and therefore we warn drivers - * not to do this and to fix their DRM calling order. - */ - pr_warn("%s: called with uninitialized scheduler\n", __func__); - } else if (num_sched_list) { - entity->rq = sched_list[0]->rq; - } - init_completion(&entity->entity_idle); /* We start in an idle state. */ @@ -302,7 +295,7 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout) if (!entity->rq) return 0; - sched = entity->rq->sched; + sched = container_of(entity->rq, typeof(*sched), rq); /** * The client will not queue more IBs during this fini, consume existing * queued IBs or discard them on SIGKILL @@ -394,9 +387,11 @@ static void drm_sched_entity_wakeup(struct dma_fence *f, { struct drm_sched_entity *entity = container_of(cb, struct drm_sched_entity, cb); + struct drm_gpu_scheduler *sched = + container_of(entity->rq, typeof(*sched), rq); drm_sched_entity_clear_dep(f, cb); - drm_sched_wakeup(entity->rq->sched); + drm_sched_wakeup(sched); } /** @@ -422,7 +417,8 @@ EXPORT_SYMBOL(drm_sched_entity_set_priority); */ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) { - struct drm_gpu_scheduler *sched = entity->rq->sched; + struct drm_gpu_scheduler *sched = + container_of(entity->rq, typeof(*sched), rq); struct dma_fence *fence = entity->dependency; struct drm_sched_fence *s_fence; @@ -561,7 +557,7 @@ void drm_sched_entity_select_rq(struct drm_sched_entity *entity) spin_lock(&entity->lock); sched = drm_sched_pick_best(entity->sched_list, entity->num_sched_list); - rq = sched ? sched->rq : NULL; + rq = sched ? &sched->rq : NULL; if (rq != entity->rq) { drm_sched_rq_remove_entity(entity->rq, entity); entity->rq = rq; @@ -584,10 +580,12 @@ void drm_sched_entity_select_rq(struct drm_sched_entity *entity) void drm_sched_entity_push_job(struct drm_sched_job *sched_job) { struct drm_sched_entity *entity = sched_job->entity; + struct drm_gpu_scheduler *sched = + container_of(entity->rq, typeof(*sched), rq); bool first; trace_drm_sched_job(sched_job, entity); - atomic_inc(entity->rq->sched->score); + atomic_inc(sched->score); WRITE_ONCE(entity->last_user, current->group_leader); /* @@ -598,8 +596,6 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) /* first job wakes up scheduler */ if (first) { - struct drm_gpu_scheduler *sched; - sched = drm_sched_rq_add_entity(entity); if (sched) drm_sched_wakeup(sched); diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c index e971528504a53c..bb48e690862d24 100644 --- a/drivers/gpu/drm/scheduler/sched_fence.c +++ b/drivers/gpu/drm/scheduler/sched_fence.c @@ -225,7 +225,7 @@ void drm_sched_fence_init(struct drm_sched_fence *fence, { unsigned seq; - fence->sched = entity->rq->sched; + fence->sched = container_of(entity->rq, typeof(*fence->sched), rq); seq = atomic_inc_return(&entity->fence_seq); dma_fence_init(&fence->scheduled, &drm_sched_fence_ops_scheduled, &fence->lock, entity->fence_context, seq); diff --git a/drivers/gpu/drm/scheduler/sched_internal.h b/drivers/gpu/drm/scheduler/sched_internal.h index 257e10200c6792..8d4bfe499b8445 100644 --- a/drivers/gpu/drm/scheduler/sched_internal.h +++ b/drivers/gpu/drm/scheduler/sched_internal.h @@ -17,11 +17,9 @@ struct drm_sched_entity_stats { void drm_sched_wakeup(struct drm_gpu_scheduler *sched); -void drm_sched_rq_init(struct drm_gpu_scheduler *sched, - struct drm_sched_rq *rq); +void drm_sched_rq_init(struct drm_gpu_scheduler *sched); struct drm_sched_entity * -drm_sched_rq_select_entity(struct drm_gpu_scheduler *sched, - struct drm_sched_rq *rq); +drm_sched_rq_select_entity(struct drm_gpu_scheduler *sched); struct drm_gpu_scheduler * drm_sched_rq_add_entity(struct drm_sched_entity *entity); void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index ca86a7a43ae047..7e98ce17211403 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -572,7 +572,7 @@ void drm_sched_job_arm(struct drm_sched_job *job) BUG_ON(!entity); drm_sched_entity_select_rq(entity); - sched = entity->rq->sched; + sched = container_of(entity->rq, typeof(*sched), rq); job->sched = sched; job->s_priority = entity->priority; @@ -912,7 +912,7 @@ static void drm_sched_run_job_work(struct work_struct *w) while (!READ_ONCE(sched->pause_submit)) { /* Find entity with a ready job */ - entity = drm_sched_rq_select_entity(sched, sched->rq); + entity = drm_sched_rq_select_entity(sched); if (!entity) break; /* No more work */ @@ -1010,15 +1010,6 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_init_ sched->score = args->score ? args->score : &sched->_score; sched->dev = args->dev; - if (sched->rq) { - /* Not an error, but warn anyway so drivers can - * fine-tune their DRM calling order, and return all - * is good. - */ - dev_warn(sched->dev, "%s: scheduler already initialized!\n", __func__); - return 0; - } - if (args->submit_wq) { sched->submit_wq = args->submit_wq; sched->own_submit_wq = false; @@ -1030,11 +1021,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_init_ sched->own_submit_wq = true; } - sched->rq = kmalloc(sizeof(*sched->rq), GFP_KERNEL | __GFP_ZERO); - if (!sched->rq) - goto Out_check_own; - - drm_sched_rq_init(sched, sched->rq); + drm_sched_rq_init(sched); init_waitqueue_head(&sched->job_scheduled); INIT_LIST_HEAD(&sched->pending_list); @@ -1049,12 +1036,6 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_init_ sched->ready = true; return 0; - -Out_check_own: - if (sched->own_submit_wq) - destroy_workqueue(sched->submit_wq); - dev_err(sched->dev, "%s: Failed to setup GPU scheduler--out of memory\n", __func__); - return -ENOMEM; } EXPORT_SYMBOL(drm_sched_init); @@ -1082,7 +1063,7 @@ EXPORT_SYMBOL(drm_sched_init); void drm_sched_fini(struct drm_gpu_scheduler *sched) { - struct drm_sched_rq *rq = sched->rq; + struct drm_sched_rq *rq = &sched->rq; struct drm_sched_entity *s_entity; drm_sched_wqueue_stop(sched); @@ -1106,8 +1087,6 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched) if (sched->own_submit_wq) destroy_workqueue(sched->submit_wq); sched->ready = false; - kfree(sched->rq); - sched->rq = NULL; } EXPORT_SYMBOL(drm_sched_fini); @@ -1124,7 +1103,7 @@ void drm_sched_increase_karma(struct drm_sched_job *bad) { struct drm_gpu_scheduler *sched = bad->sched; struct drm_sched_entity *entity, *tmp; - struct drm_sched_rq *rq = sched->rq; + struct drm_sched_rq *rq = &sched->rq; /* don't change @bad's karma if it's from KERNEL RQ, * because sometimes GPU hang would cause kernel jobs (like VM updating jobs) diff --git a/drivers/gpu/drm/scheduler/sched_rq.c b/drivers/gpu/drm/scheduler/sched_rq.c index b18265c7f07344..f2f10f7d6ddf9c 100644 --- a/drivers/gpu/drm/scheduler/sched_rq.c +++ b/drivers/gpu/drm/scheduler/sched_rq.c @@ -52,17 +52,16 @@ static void drm_sched_rq_update_tree_locked(struct drm_sched_entity *entity, * drm_sched_rq_init - initialize a given run queue struct * * @sched: scheduler instance to associate with this run queue - * @rq: scheduler run queue * * Initializes a scheduler runqueue. */ -void drm_sched_rq_init(struct drm_gpu_scheduler *sched, - struct drm_sched_rq *rq) +void drm_sched_rq_init(struct drm_gpu_scheduler *sched) { + struct drm_sched_rq *rq = &sched->rq; + spin_lock_init(&rq->lock); INIT_LIST_HEAD(&rq->entities); rq->rb_tree_root = RB_ROOT_CACHED; - rq->sched = sched; } static ktime_t @@ -109,8 +108,8 @@ drm_sched_rq_add_entity(struct drm_sched_entity *entity) } rq = entity->rq; + sched = container_of(rq, typeof(*sched), rq); spin_lock(&rq->lock); - sched = rq->sched; if (list_empty(&entity->list)) { atomic_inc(sched->score); @@ -138,6 +137,8 @@ drm_sched_rq_add_entity(struct drm_sched_entity *entity) void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, struct drm_sched_entity *entity) { + struct drm_gpu_scheduler *sched = container_of(rq, typeof(*sched), rq); + lockdep_assert_held(&entity->lock); if (list_empty(&entity->list)) @@ -145,7 +146,7 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, spin_lock(&rq->lock); - atomic_dec(rq->sched->score); + atomic_dec(sched->score); list_del_init(&entity->list); drm_sched_rq_remove_tree_locked(entity, rq); @@ -186,16 +187,15 @@ void drm_sched_rq_pop_entity(struct drm_sched_entity *entity) * drm_sched_rq_select_entity - Select an entity which provides a job to run * * @sched: the gpu scheduler - * @rq: scheduler run queue to check. * * Find oldest waiting ready entity. * * Return an entity if one is found or NULL if no ready entity was found. */ struct drm_sched_entity * -drm_sched_rq_select_entity(struct drm_gpu_scheduler *sched, - struct drm_sched_rq *rq) +drm_sched_rq_select_entity(struct drm_gpu_scheduler *sched) { + struct drm_sched_rq *rq = &sched->rq; struct rb_node *rb; spin_lock(&rq->lock); diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 860834ad5371c5..2aa1ef4a855757 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -242,7 +242,6 @@ struct drm_sched_entity { /** * struct drm_sched_rq - queue of entities to be scheduled. * - * @sched: the scheduler to which this rq belongs to. * @lock: protects @entities, @rb_tree_root and @rr_deadline. * @entities: list of the entities to be scheduled. * @rb_tree_root: root of time based priority queue of entities for FIFO scheduling @@ -252,8 +251,6 @@ struct drm_sched_entity { * the next entity to emit commands from. */ struct drm_sched_rq { - struct drm_gpu_scheduler *sched; - spinlock_t lock; /* Following members are protected by the @lock: */ ktime_t rr_deadline; @@ -504,7 +501,7 @@ struct drm_gpu_scheduler { atomic_t credit_count; long timeout; const char *name; - struct drm_sched_rq *rq; + struct drm_sched_rq rq; wait_queue_head_t job_scheduled; atomic64_t job_id_count; struct workqueue_struct *submit_wq;