From: Jay Fang f.fangjian@huawei.com
mainline inclusion from mainline-v5.13-rc1 commit c770d8631e1810d8f1ce21b18ad5dd67eeb39e5c category: feature bugzilla: 175249 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
This driver supports SPI Controller for HiSilicon Kunpeng SoCs. This driver supports SPI operations using FIFO mode of transfer.
DMA is not supported, and we just use IRQ mode for operation completion notification.
Only ACPI firmware is supported.
Signed-off-by: Jay Fang f.fangjian@huawei.com Link: https://lore.kernel.org/r/1616836200-45827-1-git-send-email-f.fangjian@huawe... Signed-off-by: Mark Brown broonie@kernel.org Reviewed-by: Chengwen Feng fengchengwen@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- MAINTAINERS | 7 + drivers/spi/Kconfig | 10 + drivers/spi/Makefile | 1 + drivers/spi/spi-hisi-kunpeng.c | 505 +++++++++++++++++++++++++++++++++ 4 files changed, 523 insertions(+) create mode 100644 drivers/spi/spi-hisi-kunpeng.c
diff --git a/MAINTAINERS b/MAINTAINERS index a032b00f1380..a12ecc71e1e7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8007,6 +8007,13 @@ F: drivers/crypto/hisilicon/sec2/sec_crypto.c F: drivers/crypto/hisilicon/sec2/sec_crypto.h F: drivers/crypto/hisilicon/sec2/sec_main.c
+HISILICON SPI Controller DRIVER FOR KUNPENG SOCS +M: Jay Fang f.fangjian@huawei.com +L: linux-spi@vger.kernel.org +S: Maintained +W: http://www.hisilicon.com +F: drivers/spi/spi-hisi-kunpeng.c + HISILICON STAGING DRIVERS FOR HIKEY 960/970 M: Mauro Carvalho Chehab mchehab+huawei@kernel.org S: Maintained diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index aadaea052f51..2e284cae97e3 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -339,6 +339,16 @@ config SPI_FSL_QUADSPI This controller does not support generic SPI messages. It only supports the high-level SPI memory interface.
+config SPI_HISI_KUNPENG + tristate "HiSilicon SPI Controller for Kunpeng SoCs" + depends on (ARM64 && ACPI) || COMPILE_TEST + help + This enables support for HiSilicon SPI controller found on + Kunpeng SoCs. + + This driver can also be built as a module. If so, the module + will be called hisi-kunpeng-spi. + config SPI_HISI_SFC_V3XX tristate "HiSilicon SPI NOR Flash Controller for Hi16XX chipsets" depends on (ARM64 && ACPI) || COMPILE_TEST diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile index 6fea5821662e..04291ff89e16 100644 --- a/drivers/spi/Makefile +++ b/drivers/spi/Makefile @@ -54,6 +54,7 @@ obj-$(CONFIG_SPI_FSL_LPSPI) += spi-fsl-lpspi.o obj-$(CONFIG_SPI_FSL_QUADSPI) += spi-fsl-qspi.o obj-$(CONFIG_SPI_FSL_SPI) += spi-fsl-spi.o obj-$(CONFIG_SPI_GPIO) += spi-gpio.o +obj-$(CONFIG_SPI_HISI_KUNPENG) += spi-hisi-kunpeng.o obj-$(CONFIG_SPI_HISI_SFC_V3XX) += spi-hisi-sfc-v3xx.o obj-$(CONFIG_SPI_IMG_SPFI) += spi-img-spfi.o obj-$(CONFIG_SPI_IMX) += spi-imx.o diff --git a/drivers/spi/spi-hisi-kunpeng.c b/drivers/spi/spi-hisi-kunpeng.c new file mode 100644 index 000000000000..abc0cd54eee6 --- /dev/null +++ b/drivers/spi/spi-hisi-kunpeng.c @@ -0,0 +1,505 @@ +// SPDX-License-Identifier: GPL-2.0-only +// +// HiSilicon SPI Controller Driver for Kunpeng SoCs +// +// Copyright (c) 2021 HiSilicon Technologies Co., Ltd. +// Author: Jay Fang f.fangjian@huawei.com +// +// This code is based on spi-dw-core.c. + +#include <linux/acpi.h> +#include <linux/bitfield.h> +#include <linux/delay.h> +#include <linux/err.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/property.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/spi/spi.h> + +/* Register offsets */ +#define HISI_SPI_CSCR 0x00 /* cs control register */ +#define HISI_SPI_CR 0x04 /* spi common control register */ +#define HISI_SPI_ENR 0x08 /* spi enable register */ +#define HISI_SPI_FIFOC 0x0c /* fifo level control register */ +#define HISI_SPI_IMR 0x10 /* interrupt mask register */ +#define HISI_SPI_DIN 0x14 /* data in register */ +#define HISI_SPI_DOUT 0x18 /* data out register */ +#define HISI_SPI_SR 0x1c /* status register */ +#define HISI_SPI_RISR 0x20 /* raw interrupt status register */ +#define HISI_SPI_ISR 0x24 /* interrupt status register */ +#define HISI_SPI_ICR 0x28 /* interrupt clear register */ +#define HISI_SPI_VERSION 0xe0 /* version register */ + +/* Bit fields in HISI_SPI_CR */ +#define CR_LOOP_MASK GENMASK(1, 1) +#define CR_CPOL_MASK GENMASK(2, 2) +#define CR_CPHA_MASK GENMASK(3, 3) +#define CR_DIV_PRE_MASK GENMASK(11, 4) +#define CR_DIV_POST_MASK GENMASK(19, 12) +#define CR_BPW_MASK GENMASK(24, 20) +#define CR_SPD_MODE_MASK GENMASK(25, 25) + +/* Bit fields in HISI_SPI_FIFOC */ +#define FIFOC_TX_MASK GENMASK(5, 3) +#define FIFOC_RX_MASK GENMASK(11, 9) + +/* Bit fields in HISI_SPI_IMR, 4 bits */ +#define IMR_RXOF BIT(0) /* Receive Overflow */ +#define IMR_RXTO BIT(1) /* Receive Timeout */ +#define IMR_RX BIT(2) /* Receive */ +#define IMR_TX BIT(3) /* Transmit */ +#define IMR_MASK (IMR_RXOF | IMR_RXTO | IMR_RX | IMR_TX) + +/* Bit fields in HISI_SPI_SR, 5 bits */ +#define SR_TXE BIT(0) /* Transmit FIFO empty */ +#define SR_TXNF BIT(1) /* Transmit FIFO not full */ +#define SR_RXNE BIT(2) /* Receive FIFO not empty */ +#define SR_RXF BIT(3) /* Receive FIFO full */ +#define SR_BUSY BIT(4) /* Busy Flag */ + +/* Bit fields in HISI_SPI_ISR, 4 bits */ +#define ISR_RXOF BIT(0) /* Receive Overflow */ +#define ISR_RXTO BIT(1) /* Receive Timeout */ +#define ISR_RX BIT(2) /* Receive */ +#define ISR_TX BIT(3) /* Transmit */ +#define ISR_MASK (ISR_RXOF | ISR_RXTO | ISR_RX | ISR_TX) + +/* Bit fields in HISI_SPI_ICR, 2 bits */ +#define ICR_RXOF BIT(0) /* Receive Overflow */ +#define ICR_RXTO BIT(1) /* Receive Timeout */ +#define ICR_MASK (ICR_RXOF | ICR_RXTO) + +#define DIV_POST_MAX 0xFF +#define DIV_POST_MIN 0x00 +#define DIV_PRE_MAX 0xFE +#define DIV_PRE_MIN 0x02 +#define CLK_DIV_MAX ((1 + DIV_POST_MAX) * DIV_PRE_MAX) +#define CLK_DIV_MIN ((1 + DIV_POST_MIN) * DIV_PRE_MIN) + +#define DEFAULT_NUM_CS 1 + +#define HISI_SPI_WAIT_TIMEOUT_MS 10UL + +enum hisi_spi_rx_level_trig { + HISI_SPI_RX_1, + HISI_SPI_RX_4, + HISI_SPI_RX_8, + HISI_SPI_RX_16, + HISI_SPI_RX_32, + HISI_SPI_RX_64, + HISI_SPI_RX_128 +}; + +enum hisi_spi_tx_level_trig { + HISI_SPI_TX_1_OR_LESS, + HISI_SPI_TX_4_OR_LESS, + HISI_SPI_TX_8_OR_LESS, + HISI_SPI_TX_16_OR_LESS, + HISI_SPI_TX_32_OR_LESS, + HISI_SPI_TX_64_OR_LESS, + HISI_SPI_TX_128_OR_LESS +}; + +enum hisi_spi_frame_n_bytes { + HISI_SPI_N_BYTES_NULL, + HISI_SPI_N_BYTES_U8, + HISI_SPI_N_BYTES_U16, + HISI_SPI_N_BYTES_U32 = 4 +}; + +/* Slave spi_dev related */ +struct hisi_chip_data { + u32 cr; + u32 speed_hz; /* baud rate */ + u16 clk_div; /* baud rate divider */ + + /* clk_div = (1 + div_post) * div_pre */ + u8 div_post; /* value from 0 to 255 */ + u8 div_pre; /* value from 2 to 254 (even only!) */ +}; + +struct hisi_spi { + struct device *dev; + + void __iomem *regs; + int irq; + u32 fifo_len; /* depth of the FIFO buffer */ + + /* Current message transfer state info */ + const void *tx; + unsigned int tx_len; + void *rx; + unsigned int rx_len; + u8 n_bytes; /* current is a 1/2/4 bytes op */ +}; + +static u32 hisi_spi_busy(struct hisi_spi *hs) +{ + return readl(hs->regs + HISI_SPI_SR) & SR_BUSY; +} + +static u32 hisi_spi_rx_not_empty(struct hisi_spi *hs) +{ + return readl(hs->regs + HISI_SPI_SR) & SR_RXNE; +} + +static u32 hisi_spi_tx_not_full(struct hisi_spi *hs) +{ + return readl(hs->regs + HISI_SPI_SR) & SR_TXNF; +} + +static void hisi_spi_flush_fifo(struct hisi_spi *hs) +{ + unsigned long limit = loops_per_jiffy << 1; + + do { + while (hisi_spi_rx_not_empty(hs)) + readl(hs->regs + HISI_SPI_DOUT); + } while (hisi_spi_busy(hs) && limit--); +} + +/* Disable the controller and all interrupts */ +static void hisi_spi_disable(struct hisi_spi *hs) +{ + writel(0, hs->regs + HISI_SPI_ENR); + writel(IMR_MASK, hs->regs + HISI_SPI_IMR); + writel(ICR_MASK, hs->regs + HISI_SPI_ICR); +} + +static u8 hisi_spi_n_bytes(struct spi_transfer *transfer) +{ + if (transfer->bits_per_word <= 8) + return HISI_SPI_N_BYTES_U8; + else if (transfer->bits_per_word <= 16) + return HISI_SPI_N_BYTES_U16; + else + return HISI_SPI_N_BYTES_U32; +} + +static void hisi_spi_reader(struct hisi_spi *hs) +{ + u32 max = min_t(u32, hs->rx_len, hs->fifo_len); + u32 rxw; + + while (hisi_spi_rx_not_empty(hs) && max--) { + rxw = readl(hs->regs + HISI_SPI_DOUT); + /* Check the transfer's original "rx" is not null */ + if (hs->rx) { + switch (hs->n_bytes) { + case HISI_SPI_N_BYTES_U8: + *(u8 *)(hs->rx) = rxw; + break; + case HISI_SPI_N_BYTES_U16: + *(u16 *)(hs->rx) = rxw; + break; + case HISI_SPI_N_BYTES_U32: + *(u32 *)(hs->rx) = rxw; + break; + } + hs->rx += hs->n_bytes; + } + --hs->rx_len; + } +} + +static void hisi_spi_writer(struct hisi_spi *hs) +{ + u32 max = min_t(u32, hs->tx_len, hs->fifo_len); + u32 txw = 0; + + while (hisi_spi_tx_not_full(hs) && max--) { + /* Check the transfer's original "tx" is not null */ + if (hs->tx) { + switch (hs->n_bytes) { + case HISI_SPI_N_BYTES_U8: + txw = *(u8 *)(hs->tx); + break; + case HISI_SPI_N_BYTES_U16: + txw = *(u16 *)(hs->tx); + break; + case HISI_SPI_N_BYTES_U32: + txw = *(u32 *)(hs->tx); + break; + } + hs->tx += hs->n_bytes; + } + writel(txw, hs->regs + HISI_SPI_DIN); + --hs->tx_len; + } +} + +static void __hisi_calc_div_reg(struct hisi_chip_data *chip) +{ + chip->div_pre = DIV_PRE_MAX; + while (chip->div_pre >= DIV_PRE_MIN) { + if (chip->clk_div % chip->div_pre == 0) + break; + + chip->div_pre -= 2; + } + + if (chip->div_pre > chip->clk_div) + chip->div_pre = chip->clk_div; + + chip->div_post = (chip->clk_div / chip->div_pre) - 1; +} + +static u32 hisi_calc_effective_speed(struct spi_controller *master, + struct hisi_chip_data *chip, u32 speed_hz) +{ + u32 effective_speed; + + /* Note clock divider doesn't support odd numbers */ + chip->clk_div = DIV_ROUND_UP(master->max_speed_hz, speed_hz) + 1; + chip->clk_div &= 0xfffe; + if (chip->clk_div > CLK_DIV_MAX) + chip->clk_div = CLK_DIV_MAX; + + effective_speed = master->max_speed_hz / chip->clk_div; + if (chip->speed_hz != effective_speed) { + __hisi_calc_div_reg(chip); + chip->speed_hz = effective_speed; + } + + return effective_speed; +} + +static u32 hisi_spi_prepare_cr(struct spi_device *spi) +{ + u32 cr = FIELD_PREP(CR_SPD_MODE_MASK, 1); + + cr |= FIELD_PREP(CR_CPHA_MASK, (spi->mode & SPI_CPHA) ? 1 : 0); + cr |= FIELD_PREP(CR_CPOL_MASK, (spi->mode & SPI_CPOL) ? 1 : 0); + cr |= FIELD_PREP(CR_LOOP_MASK, (spi->mode & SPI_LOOP) ? 1 : 0); + + return cr; +} + +static void hisi_spi_hw_init(struct hisi_spi *hs) +{ + hisi_spi_disable(hs); + + /* FIFO default config */ + writel(FIELD_PREP(FIFOC_TX_MASK, HISI_SPI_TX_64_OR_LESS) | + FIELD_PREP(FIFOC_RX_MASK, HISI_SPI_RX_16), + hs->regs + HISI_SPI_FIFOC); + + hs->fifo_len = 256; +} + +static irqreturn_t hisi_spi_irq(int irq, void *dev_id) +{ + struct spi_controller *master = dev_id; + struct hisi_spi *hs = spi_controller_get_devdata(master); + u32 irq_status = readl(hs->regs + HISI_SPI_ISR) & ISR_MASK; + + if (!irq_status) + return IRQ_NONE; + + if (!master->cur_msg) + return IRQ_HANDLED; + + /* Error handling */ + if (irq_status & ISR_RXOF) { + dev_err(hs->dev, "interrupt_transfer: fifo overflow\n"); + master->cur_msg->status = -EIO; + goto finalize_transfer; + } + + /* + * Read data from the Rx FIFO every time. If there is + * nothing left to receive, finalize the transfer. + */ + hisi_spi_reader(hs); + if (!hs->rx_len) + goto finalize_transfer; + + /* Send data out when Tx FIFO IRQ triggered */ + if (irq_status & ISR_TX) + hisi_spi_writer(hs); + + return IRQ_HANDLED; + +finalize_transfer: + hisi_spi_disable(hs); + spi_finalize_current_transfer(master); + return IRQ_HANDLED; +} + +static int hisi_spi_transfer_one(struct spi_controller *master, + struct spi_device *spi, struct spi_transfer *transfer) +{ + struct hisi_spi *hs = spi_controller_get_devdata(master); + struct hisi_chip_data *chip = spi_get_ctldata(spi); + u32 cr = chip->cr; + + /* Update per transfer options for speed and bpw */ + transfer->effective_speed_hz = + hisi_calc_effective_speed(master, chip, transfer->speed_hz); + cr |= FIELD_PREP(CR_DIV_PRE_MASK, chip->div_pre); + cr |= FIELD_PREP(CR_DIV_POST_MASK, chip->div_post); + cr |= FIELD_PREP(CR_BPW_MASK, transfer->bits_per_word - 1); + writel(cr, hs->regs + HISI_SPI_CR); + + hisi_spi_flush_fifo(hs); + + hs->n_bytes = hisi_spi_n_bytes(transfer); + hs->tx = transfer->tx_buf; + hs->tx_len = transfer->len / hs->n_bytes; + hs->rx = transfer->rx_buf; + hs->rx_len = hs->tx_len; + + /* + * Ensure that the transfer data above has been updated + * before the interrupt to start. + */ + smp_mb(); + + /* Enable all interrupts and the controller */ + writel(~IMR_MASK, hs->regs + HISI_SPI_IMR); + writel(1, hs->regs + HISI_SPI_ENR); + + return 1; +} + +static void hisi_spi_handle_err(struct spi_controller *master, + struct spi_message *msg) +{ + struct hisi_spi *hs = spi_controller_get_devdata(master); + + hisi_spi_disable(hs); + + /* + * Wait for interrupt handler that is + * already in timeout to complete. + */ + msleep(HISI_SPI_WAIT_TIMEOUT_MS); +} + +static int hisi_spi_setup(struct spi_device *spi) +{ + struct hisi_chip_data *chip; + + /* Only alloc on first setup */ + chip = spi_get_ctldata(spi); + if (!chip) { + chip = kzalloc(sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; + spi_set_ctldata(spi, chip); + } + + chip->cr = hisi_spi_prepare_cr(spi); + + return 0; +} + +static void hisi_spi_cleanup(struct spi_device *spi) +{ + struct hisi_chip_data *chip = spi_get_ctldata(spi); + + kfree(chip); + spi_set_ctldata(spi, NULL); +} + +static int hisi_spi_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct spi_controller *master; + struct hisi_spi *hs; + int ret, irq; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + master = devm_spi_alloc_master(dev, sizeof(*hs)); + if (!master) + return -ENOMEM; + + platform_set_drvdata(pdev, master); + + hs = spi_controller_get_devdata(master); + hs->dev = dev; + hs->irq = irq; + + hs->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(hs->regs)) + return PTR_ERR(hs->regs); + + /* Specify maximum SPI clocking speed (master only) by firmware */ + ret = device_property_read_u32(dev, "spi-max-frequency", + &master->max_speed_hz); + if (ret) { + dev_err(dev, "failed to get max SPI clocking speed, ret=%d\n", + ret); + return -EINVAL; + } + + ret = device_property_read_u16(dev, "num-cs", + &master->num_chipselect); + if (ret) + master->num_chipselect = DEFAULT_NUM_CS; + + master->use_gpio_descriptors = true; + master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LOOP; + master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32); + master->bus_num = pdev->id; + master->setup = hisi_spi_setup; + master->cleanup = hisi_spi_cleanup; + master->transfer_one = hisi_spi_transfer_one; + master->handle_err = hisi_spi_handle_err; + master->dev.fwnode = dev->fwnode; + + hisi_spi_hw_init(hs); + + ret = devm_request_irq(dev, hs->irq, hisi_spi_irq, 0, dev_name(dev), + master); + if (ret < 0) { + dev_err(dev, "failed to get IRQ=%d, ret=%d\n", hs->irq, ret); + return ret; + } + + ret = spi_register_controller(master); + if (ret) { + dev_err(dev, "failed to register spi master, ret=%d\n", ret); + return ret; + } + + dev_info(dev, "hw version:0x%x max-freq:%u kHz\n", + readl(hs->regs + HISI_SPI_VERSION), + master->max_speed_hz / 1000); + + return 0; +} + +static int hisi_spi_remove(struct platform_device *pdev) +{ + struct spi_controller *master = platform_get_drvdata(pdev); + + spi_unregister_controller(master); + + return 0; +} + +static const struct acpi_device_id hisi_spi_acpi_match[] = { + {"HISI03E1", 0}, + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_spi_acpi_match); + +static struct platform_driver hisi_spi_driver = { + .probe = hisi_spi_probe, + .remove = hisi_spi_remove, + .driver = { + .name = "hisi-kunpeng-spi", + .acpi_match_table = hisi_spi_acpi_match, + }, +}; +module_platform_driver(hisi_spi_driver); + +MODULE_AUTHOR("Jay Fang f.fangjian@huawei.com"); +MODULE_DESCRIPTION("HiSilicon SPI Controller Driver for Kunpeng SoCs"); +MODULE_LICENSE("GPL v2");
From: Jay Fang f.fangjian@huawei.com
mainline inclusion from mainline-v5.13-rc1 commit 9a446cf97af70ee81ba177703b67ac4955a5edcc category: feature bugzilla: 175249 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
Fix warning Woverflow on type conversion reported on x86_64:
drivers/spi/spi-hisi-kunpeng.c:361:9: warning: conversion from 'long unsigned int' to 'u32' {aka 'unsigned int'} changes value from '18446744073709551600' to '4294967280' [-Woverflow]
The registers are 32 bit, so fix by casting to u32.
Fixes: c770d8631e18 ("spi: Add HiSilicon SPI Controller Driver for Kunpeng SoCs") Cc: Stephen Rothwell sfr@canb.auug.org.au Reported-by: kernel test robot lkp@intel.com Signed-off-by: Jay Fang f.fangjian@huawei.com Link: https://lore.kernel.org/r/1617762660-54681-1-git-send-email-f.fangjian@huawe... Signed-off-by: Mark Brown broonie@kernel.org Reviewed-by: Chengwen Feng fengchengwen@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/spi/spi-hisi-kunpeng.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/spi/spi-hisi-kunpeng.c b/drivers/spi/spi-hisi-kunpeng.c index abc0cd54eee6..3f986ba1c328 100644 --- a/drivers/spi/spi-hisi-kunpeng.c +++ b/drivers/spi/spi-hisi-kunpeng.c @@ -358,7 +358,7 @@ static int hisi_spi_transfer_one(struct spi_controller *master, smp_mb();
/* Enable all interrupts and the controller */ - writel(~IMR_MASK, hs->regs + HISI_SPI_IMR); + writel(~(u32)IMR_MASK, hs->regs + HISI_SPI_IMR); writel(1, hs->regs + HISI_SPI_ENR);
return 1;
From: Xiongfeng Wang wangxiongfeng2@huawei.com
hulk inclusion category: bugfix bugzilla: 175146 CVE: NA
------------------------------------
Syzkaller caught the following BUG_ON:
------------[ cut here ]------------ kernel BUG at fs/userfaultfd.c:909! Internal error: Oops - BUG: 0 [#1] SMP Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: Process syz-executor.2 (pid: 1994, stack limit = 0x0000000048da525b) CPU: 0 PID: 1994 Comm: syz-executor.2 Not tainted 4.19.90+ #6 Hardware name: linux,dummy-virt (DT) pstate: 80000005 (Nzcv daif -PAN -UAO) pc : userfaultfd_release+0x4f0/0x6a0 fs/userfaultfd.c:908 lr : userfaultfd_release+0x4f0/0x6a0 fs/userfaultfd.c:908 sp : ffff80017d247c80 x29: ffff80017d247c90 x28: ffff80019b25f720 x27: 2000000000100077 x26: ffff80017c28fe40 x25: ffff80019b25f770 x24: ffff80019b25f7e0 x23: ffff80019b25e378 x22: 1ffff0002fa48fa6 x21: ffff80017f103200 x20: dfff200000000000 x19: ffff80017c28fe40 x18: 0000000000000000 x17: ffffffff00000001 x16: 0000000000000000 x15: 0000000000000000 x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000000 x10: 0000000000000000 x9 : 1ffff0002fa48fa6 x8 : ffff10002fa48fa6 x7 : ffff20000add39f0 x6 : 00000000f2000000 x5 : 0000000000000000 x4 : ffff10002fa48f76 x3 : ffff200008000000 x2 : ffff20000a61d000 x1 : ffff800160aa9000 x0 : 0000000000000000 Call trace: userfaultfd_release+0x4f0/0x6a0 fs/userfaultfd.c:908 __fput+0x20c/0x688 fs/file_table.c:278 ____fput+0x24/0x30 fs/file_table.c:309 task_work_run+0x13c/0x2f8 kernel/task_work.c:135 tracehook_notify_resume include/linux/tracehook.h:193 [inline] do_notify_resume+0x380/0x628 arch/arm64/kernel/signal.c:728 work_pending+0x8/0x10 Code: 97ecb0e4 d4210000 17ffffc7 97ecb0e1 (d4210000) ---[ end trace de790a3f637d9e60 ]---
In userfaultfd_release(), we check if 'vm_userfaultfd_ctx' and 'vm_flags&(VM_UFFD_MISSING|VM_UFFD_WP)' are not zero at the same time. If not, it is bug. But we lack checking for VM_USWAP flag. So add it to avoid the false BUG_ON(). This patch also fix several other issues.
Fixes: c3e6287f88f5 ("userswap: support userswap via userfaultfd") Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com
Conflicts: fs/userfaultfd.c Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- fs/userfaultfd.c | 12 +++++++++--- mm/memory.c | 1 - 2 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index b5b3cd07da20..a89281fc2b68 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -843,7 +843,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file) struct vm_area_struct *vma, *prev; /* len == 0 means wake all */ struct userfaultfd_wake_range range = { .len = 0, }; - unsigned long new_flags; + unsigned long new_flags, userfault_flags;
WRITE_ONCE(ctx->released, true);
@@ -861,14 +861,18 @@ static int userfaultfd_release(struct inode *inode, struct file *file) mmap_write_lock(mm); prev = NULL; for (vma = mm->mmap; vma; vma = vma->vm_next) { + userfault_flags = VM_UFFD_MISSING | VM_UFFD_WP; +#ifdef CONFIG_USERSWAP + userfault_flags |= VM_USWAP; +#endif cond_resched(); BUG_ON(!!vma->vm_userfaultfd_ctx.ctx ^ - !!(vma->vm_flags & (VM_UFFD_MISSING | VM_UFFD_WP))); + !!(vma->vm_flags & userfault_flags)); if (vma->vm_userfaultfd_ctx.ctx != ctx) { prev = vma; continue; } - new_flags = vma->vm_flags & ~(VM_UFFD_MISSING | VM_UFFD_WP); + new_flags = vma->vm_flags & ~userfault_flags; prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end, new_flags, vma->anon_vma, vma->vm_file, vma->vm_pgoff, @@ -1293,6 +1297,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, */ if (uffdio_register.mode & UFFDIO_REGISTER_MODE_USWAP) { uffdio_register.mode &= ~UFFDIO_REGISTER_MODE_USWAP; + if (!uffdio_register.mode) + goto out; vm_flags |= VM_USWAP; end = uffdio_register.range.start + uffdio_register.range.len - 1; vma = find_vma(mm, uffdio_register.range.start); diff --git a/mm/memory.c b/mm/memory.c index 0be6fd3198a8..d15b9ccfc108 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3317,7 +3317,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) vmf->address, vma->vm_flags); goto skip_uswap; } - BUG_ON(!(vma->vm_flags & VM_UFFD_MISSING)); ret = handle_userfault(vmf, VM_UFFD_MISSING | VM_USWAP); return ret; }
From: Xiongfeng Wang wangxiongfeng2@huawei.com
hulk inclusion category: bugfix bugzilla: 175146 CVE: NA
------------------------------------
Disable userswap by default and add a kernel parameter to enable it.
Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com
Conflicts: include/linux/userfaultfd_k.h Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- fs/userfaultfd.c | 17 +++++++++++++++-- include/linux/userfaultfd_k.h | 3 +++ mm/mmap.c | 6 +++--- 3 files changed, 21 insertions(+), 5 deletions(-)
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index a89281fc2b68..bd71fbd68199 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -31,6 +31,9 @@ int sysctl_unprivileged_userfaultfd __read_mostly = 1;
static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly; +#ifdef CONFIG_USERSWAP +int enable_userswap; +#endif
enum userfaultfd_state { UFFD_STATE_WAIT_API, @@ -863,7 +866,8 @@ static int userfaultfd_release(struct inode *inode, struct file *file) for (vma = mm->mmap; vma; vma = vma->vm_next) { userfault_flags = VM_UFFD_MISSING | VM_UFFD_WP; #ifdef CONFIG_USERSWAP - userfault_flags |= VM_USWAP; + if (enable_userswap) + userfault_flags |= VM_USWAP; #endif cond_resched(); BUG_ON(!!vma->vm_userfaultfd_ctx.ctx ^ @@ -1295,7 +1299,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, * register the whole vma overlapping with the address range to avoid * splitting the vma. */ - if (uffdio_register.mode & UFFDIO_REGISTER_MODE_USWAP) { + if (enable_userswap && (uffdio_register.mode & UFFDIO_REGISTER_MODE_USWAP)) { uffdio_register.mode &= ~UFFDIO_REGISTER_MODE_USWAP; if (!uffdio_register.mode) goto out; @@ -2024,6 +2028,15 @@ SYSCALL_DEFINE1(userfaultfd, int, flags) return fd; }
+#ifdef CONFIG_USERSWAP +static int __init enable_userswap_setup(char *str) +{ + enable_userswap = true; + return 1; +} +__setup("enable_userswap", enable_userswap_setup); +#endif + static int __init userfaultfd_init(void) { userfaultfd_ctx_cachep = kmem_cache_create("userfaultfd_ctx_cache", diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index e91f31a4c830..e1cacab86bde 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -31,6 +31,9 @@ #define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
extern int sysctl_unprivileged_userfaultfd; +#ifdef CONFIG_USERSWAP +extern int enable_userswap; +#endif
extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);
diff --git a/mm/mmap.c b/mm/mmap.c index 069d88aeeeba..e440f337238a 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1598,7 +1598,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr, prot |= PROT_EXEC;
#ifdef CONFIG_USERSWAP - if (flags & MAP_REPLACE) { + if (enable_userswap && (flags & MAP_REPLACE)) { if (offset_in_page(addr) || (len % PAGE_SIZE)) return -EINVAL; page_num = len / PAGE_SIZE; @@ -1765,7 +1765,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
#ifdef CONFIG_USERSWAP /* mark the vma as special to avoid merging with other vmas */ - if (flags & MAP_REPLACE) + if (enable_userswap && (flags & MAP_REPLACE)) vm_flags |= VM_SPECIAL; #endif
@@ -1777,7 +1777,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr, #ifndef CONFIG_USERSWAP return addr; #else - if (!(flags & MAP_REPLACE)) + if (!enable_userswap || !(flags & MAP_REPLACE)) return addr;
if (IS_ERR_VALUE(addr)) {