[PATCH] nvdimm: fix some compilation warnings
by Qian Cai
Several places (dimm_devs.c, core.c etc) include label.h but only
label.c uses NSINDEX_SIGNATURE, so move its definition to label.c
instead.
In file included from drivers/nvdimm/dimm_devs.c:23:
drivers/nvdimm/label.h:41:19: warning: 'NSINDEX_SIGNATURE' defined but
not used [-Wunused-const-variable=]
The commit d9b83c756953 ("libnvdimm, btt: rework error clearing") left
an unused variable.
drivers/nvdimm/btt.c: In function 'btt_read_pg':
drivers/nvdimm/btt.c:1272:8: warning: variable 'rc' set but not used
[-Wunused-but-set-variable]
Last, some places abuse "/**" which is only reserved for the kernel-doc.
drivers/nvdimm/bus.c:648: warning: cannot understand function prototype:
'struct attribute_group nd_device_attribute_group = '
drivers/nvdimm/bus.c:677: warning: cannot understand function prototype:
'struct attribute_group nd_numa_attribute_group = '
Signed-off-by: Qian Cai <cai(a)lca.pw>
---
drivers/nvdimm/btt.c | 6 ++----
drivers/nvdimm/bus.c | 4 ++--
drivers/nvdimm/label.c | 2 ++
drivers/nvdimm/label.h | 2 --
4 files changed, 6 insertions(+), 8 deletions(-)
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 4671776f5623..9f02a99cfac0 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1269,11 +1269,9 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
ret = btt_data_read(arena, page, off, postmap, cur_len);
if (ret) {
- int rc;
-
/* Media error - set the e_flag */
- rc = btt_map_write(arena, premap, postmap, 0, 1,
- NVDIMM_IO_ATOMIC);
+ btt_map_write(arena, premap, postmap, 0, 1,
+ NVDIMM_IO_ATOMIC);
goto out_rtt;
}
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 7ff684159f29..2eb6a6cfe9e4 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -642,7 +642,7 @@ static struct attribute *nd_device_attributes[] = {
NULL,
};
-/**
+/*
* nd_device_attribute_group - generic attributes for all devices on an nd bus
*/
struct attribute_group nd_device_attribute_group = {
@@ -671,7 +671,7 @@ static umode_t nd_numa_attr_visible(struct kobject *kobj, struct attribute *a,
return a->mode;
}
-/**
+/*
* nd_numa_attribute_group - NUMA attributes for all devices on an nd bus
*/
struct attribute_group nd_numa_attribute_group = {
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index f3d753d3169c..02a51b7775e1 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -25,6 +25,8 @@ static guid_t nvdimm_btt2_guid;
static guid_t nvdimm_pfn_guid;
static guid_t nvdimm_dax_guid;
+static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0";
+
static u32 best_seq(u32 a, u32 b)
{
a &= NSINDEX_SEQ_MASK;
diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h
index e9a2ad3c2150..4bb7add39580 100644
--- a/drivers/nvdimm/label.h
+++ b/drivers/nvdimm/label.h
@@ -38,8 +38,6 @@ enum {
ND_NSINDEX_INIT = 0x1,
};
-static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0";
-
/**
* struct nd_namespace_index - label set superblock
* @sig: NAMESPACE_INDEX\0
--
2.20.1 (Apple Git-117)
3 years, 2 months
[PATCH v2] mm: Fix modifying of page protection by insert_pfn_pmd()
by Aneesh Kumar K.V
With some architectures like ppc64, set_pmd_at() cannot cope with
a situation where there is already some (different) valid entry present.
Use pmdp_set_access_flags() instead to modify the pfn which is built to
deal with modifying existing PMD entries.
This is similar to
commit cae85cb8add3 ("mm/memory.c: fix modifying of page protection by insert_pfn()")
We also do similar update w.r.t insert_pfn_pud eventhough ppc64 don't support
pud pfn entries now.
Without this patch we also see the below message in kernel log
"BUG: non-zero pgtables_bytes on freeing mm:"
CC: stable(a)vger.kernel.org
Reported-by: Chandan Rajendra <chandan(a)linux.ibm.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar(a)linux.ibm.com>
---
Changes from v1:
* Fix the pgtable leak
mm/huge_memory.c | 36 ++++++++++++++++++++++++++++++++++++
1 file changed, 36 insertions(+)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 404acdcd0455..165ea46bf149 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -755,6 +755,21 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
spinlock_t *ptl;
ptl = pmd_lock(mm, pmd);
+ if (!pmd_none(*pmd)) {
+ if (write) {
+ if (pmd_pfn(*pmd) != pfn_t_to_pfn(pfn)) {
+ WARN_ON_ONCE(!is_huge_zero_pmd(*pmd));
+ goto out_unlock;
+ }
+ entry = pmd_mkyoung(*pmd);
+ entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+ if (pmdp_set_access_flags(vma, addr, pmd, entry, 1))
+ update_mmu_cache_pmd(vma, addr, pmd);
+ }
+
+ goto out_unlock;
+ }
+
entry = pmd_mkhuge(pfn_t_pmd(pfn, prot));
if (pfn_t_devmap(pfn))
entry = pmd_mkdevmap(entry);
@@ -766,11 +781,16 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
if (pgtable) {
pgtable_trans_huge_deposit(mm, pmd, pgtable);
mm_inc_nr_ptes(mm);
+ pgtable = NULL;
}
set_pmd_at(mm, addr, pmd, entry);
update_mmu_cache_pmd(vma, addr, pmd);
+
+out_unlock:
spin_unlock(ptl);
+ if (pgtable)
+ pte_free(mm, pgtable);
}
vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
@@ -821,6 +841,20 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
spinlock_t *ptl;
ptl = pud_lock(mm, pud);
+ if (!pud_none(*pud)) {
+ if (write) {
+ if (pud_pfn(*pud) != pfn_t_to_pfn(pfn)) {
+ WARN_ON_ONCE(!is_huge_zero_pud(*pud));
+ goto out_unlock;
+ }
+ entry = pud_mkyoung(*pud);
+ entry = maybe_pud_mkwrite(pud_mkdirty(entry), vma);
+ if (pudp_set_access_flags(vma, addr, pud, entry, 1))
+ update_mmu_cache_pud(vma, addr, pud);
+ }
+ goto out_unlock;
+ }
+
entry = pud_mkhuge(pfn_t_pud(pfn, prot));
if (pfn_t_devmap(pfn))
entry = pud_mkdevmap(entry);
@@ -830,6 +864,8 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
}
set_pud_at(mm, addr, pud, entry);
update_mmu_cache_pud(vma, addr, pud);
+
+out_unlock:
spin_unlock(ptl);
}
--
2.20.1
3 years, 2 months
[PATCH v3 12/26] compat_ioctl: move more drivers to compat_ptr_ioctl
by Arnd Bergmann
The .ioctl and .compat_ioctl file operations have the same prototype so
they can both point to the same function, which works great almost all
the time when all the commands are compatible.
One exception is the s390 architecture, where a compat pointer is only
31 bit wide, and converting it into a 64-bit pointer requires calling
compat_ptr(). Most drivers here will ever run in s390, but since we now
have a generic helper for it, it's easy enough to use it consistently.
I double-checked all these drivers to ensure that all ioctl arguments
are used as pointers or are ignored, but are not interpreted as integer
values.
Acked-by: Jason Gunthorpe <jgg(a)mellanox.com>
Acked-by: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Acked-by: Mauro Carvalho Chehab <mchehab+samsung(a)kernel.org>
Acked-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Acked-by: David Sterba <dsterba(a)suse.com>
Acked-by: Darren Hart (VMware) <dvhart(a)infradead.org>
Acked-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
Acked-by: Bjorn Andersson <bjorn.andersson(a)linaro.org>
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
---
drivers/android/binder.c | 2 +-
drivers/crypto/qat/qat_common/adf_ctl_drv.c | 2 +-
drivers/dma-buf/dma-buf.c | 4 +---
drivers/dma-buf/sw_sync.c | 2 +-
drivers/dma-buf/sync_file.c | 2 +-
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 +-
drivers/hid/hidraw.c | 4 +---
drivers/iio/industrialio-core.c | 2 +-
drivers/infiniband/core/uverbs_main.c | 4 ++--
drivers/media/rc/lirc_dev.c | 4 +---
drivers/mfd/cros_ec_dev.c | 4 +---
drivers/misc/vmw_vmci/vmci_host.c | 2 +-
drivers/nvdimm/bus.c | 4 ++--
drivers/nvme/host/core.c | 2 +-
drivers/pci/switch/switchtec.c | 2 +-
drivers/platform/x86/wmi.c | 2 +-
drivers/rpmsg/rpmsg_char.c | 4 ++--
drivers/sbus/char/display7seg.c | 2 +-
drivers/sbus/char/envctrl.c | 4 +---
drivers/scsi/3w-xxxx.c | 4 +---
drivers/scsi/cxlflash/main.c | 2 +-
drivers/scsi/esas2r/esas2r_main.c | 2 +-
drivers/scsi/pmcraid.c | 4 +---
drivers/staging/android/ion/ion.c | 4 +---
drivers/staging/vme/devices/vme_user.c | 2 +-
drivers/tee/tee_core.c | 2 +-
drivers/usb/class/cdc-wdm.c | 2 +-
drivers/usb/class/usbtmc.c | 4 +---
drivers/virt/fsl_hypervisor.c | 2 +-
fs/btrfs/super.c | 2 +-
fs/ceph/dir.c | 2 +-
fs/ceph/file.c | 2 +-
fs/fuse/dev.c | 2 +-
fs/notify/fanotify/fanotify_user.c | 2 +-
fs/userfaultfd.c | 2 +-
net/rfkill/core.c | 2 +-
36 files changed, 39 insertions(+), 57 deletions(-)
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 4b9c7ca492e6..48109ade7234 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -5998,7 +5998,7 @@ const struct file_operations binder_fops = {
.owner = THIS_MODULE,
.poll = binder_poll,
.unlocked_ioctl = binder_ioctl,
- .compat_ioctl = binder_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.mmap = binder_mmap,
.open = binder_open,
.flush = binder_flush,
diff --git a/drivers/crypto/qat/qat_common/adf_ctl_drv.c b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
index abc7a7f64d64..ef0e482ee04f 100644
--- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c
+++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
@@ -68,7 +68,7 @@ static long adf_ctl_ioctl(struct file *fp, unsigned int cmd, unsigned long arg);
static const struct file_operations adf_ctl_ops = {
.owner = THIS_MODULE,
.unlocked_ioctl = adf_ctl_ioctl,
- .compat_ioctl = adf_ctl_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
struct adf_ctl_drv_info {
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 7c858020d14b..0cb336fe6324 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -325,9 +325,7 @@ static const struct file_operations dma_buf_fops = {
.llseek = dma_buf_llseek,
.poll = dma_buf_poll,
.unlocked_ioctl = dma_buf_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = dma_buf_ioctl,
-#endif
+ .compat_ioctl = compat_ptr_ioctl,
};
/*
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
index 32dcf7b4c935..411de6a8a0ad 100644
--- a/drivers/dma-buf/sw_sync.c
+++ b/drivers/dma-buf/sw_sync.c
@@ -419,5 +419,5 @@ const struct file_operations sw_sync_debugfs_fops = {
.open = sw_sync_debugfs_open,
.release = sw_sync_debugfs_release,
.unlocked_ioctl = sw_sync_ioctl,
- .compat_ioctl = sw_sync_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index 4f6305ca52c8..0949f91eb85f 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -488,5 +488,5 @@ static const struct file_operations sync_file_fops = {
.release = sync_file_release,
.poll = sync_file_poll,
.unlocked_ioctl = sync_file_ioctl,
- .compat_ioctl = sync_file_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 083bd8114db1..5d6ac7885aa7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -49,7 +49,7 @@ static const char kfd_dev_name[] = "kfd";
static const struct file_operations kfd_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = kfd_ioctl,
- .compat_ioctl = kfd_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.open = kfd_open,
.mmap = kfd_mmap,
};
diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
index 9fc51eff1079..e7284d38b66d 100644
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -476,9 +476,7 @@ static const struct file_operations hidraw_ops = {
.release = hidraw_release,
.unlocked_ioctl = hidraw_ioctl,
.fasync = hidraw_fasync,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = hidraw_ioctl,
-#endif
+ .compat_ioctl = compat_ptr_ioctl,
.llseek = noop_llseek,
};
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 4700fd5d8c90..eed1bea257b4 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -1635,7 +1635,7 @@ static const struct file_operations iio_buffer_fileops = {
.owner = THIS_MODULE,
.llseek = noop_llseek,
.unlocked_ioctl = iio_ioctl,
- .compat_ioctl = iio_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
static int iio_check_unique_scan_index(struct iio_dev *indio_dev)
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 70b7d80431a9..ac4321d7c800 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -1120,7 +1120,7 @@ static const struct file_operations uverbs_fops = {
.release = ib_uverbs_close,
.llseek = no_llseek,
.unlocked_ioctl = ib_uverbs_ioctl,
- .compat_ioctl = ib_uverbs_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
static const struct file_operations uverbs_mmap_fops = {
@@ -1131,7 +1131,7 @@ static const struct file_operations uverbs_mmap_fops = {
.release = ib_uverbs_close,
.llseek = no_llseek,
.unlocked_ioctl = ib_uverbs_ioctl,
- .compat_ioctl = ib_uverbs_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
static struct ib_client uverbs_client = {
diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c
index f862f1b7f996..9ccc7e9cbc8e 100644
--- a/drivers/media/rc/lirc_dev.c
+++ b/drivers/media/rc/lirc_dev.c
@@ -730,9 +730,7 @@ static const struct file_operations lirc_fops = {
.owner = THIS_MODULE,
.write = ir_lirc_transmit_ir,
.unlocked_ioctl = ir_lirc_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = ir_lirc_ioctl,
-#endif
+ .compat_ioctl = compat_ptr_ioctl,
.read = ir_lirc_read,
.poll = ir_lirc_poll,
.open = ir_lirc_open,
diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c
index d275deaecb12..4a602a40d75c 100644
--- a/drivers/mfd/cros_ec_dev.c
+++ b/drivers/mfd/cros_ec_dev.c
@@ -251,9 +251,7 @@ static const struct file_operations fops = {
.release = ec_device_release,
.read = ec_device_read,
.unlocked_ioctl = ec_device_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = ec_device_ioctl,
-#endif
+ .compat_ioctl = compat_ptr_ioctl,
};
static void cros_ec_class_release(struct device *dev)
diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c
index 997f92543dd4..5bb406dabe85 100644
--- a/drivers/misc/vmw_vmci/vmci_host.c
+++ b/drivers/misc/vmw_vmci/vmci_host.c
@@ -969,7 +969,7 @@ static const struct file_operations vmuser_fops = {
.release = vmci_host_close,
.poll = vmci_host_poll,
.unlocked_ioctl = vmci_host_unlocked_ioctl,
- .compat_ioctl = vmci_host_unlocked_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
static struct miscdevice vmci_host_miscdev = {
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 7bbff0af29b2..065ebd584482 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -1167,7 +1167,7 @@ static const struct file_operations nvdimm_bus_fops = {
.owner = THIS_MODULE,
.open = nd_open,
.unlocked_ioctl = nd_ioctl,
- .compat_ioctl = nd_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.llseek = noop_llseek,
};
@@ -1175,7 +1175,7 @@ static const struct file_operations nvdimm_fops = {
.owner = THIS_MODULE,
.open = nd_open,
.unlocked_ioctl = nvdimm_ioctl,
- .compat_ioctl = nvdimm_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.llseek = noop_llseek,
};
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 2c43e12b70af..560929bee5ce 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2739,7 +2739,7 @@ static const struct file_operations nvme_dev_fops = {
.owner = THIS_MODULE,
.open = nvme_dev_open,
.unlocked_ioctl = nvme_dev_ioctl,
- .compat_ioctl = nvme_dev_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
static ssize_t nvme_sysfs_reset(struct device *dev,
diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
index e22766c79fe9..3a54b4b616e2 100644
--- a/drivers/pci/switch/switchtec.c
+++ b/drivers/pci/switch/switchtec.c
@@ -1006,7 +1006,7 @@ static const struct file_operations switchtec_fops = {
.read = switchtec_dev_read,
.poll = switchtec_dev_poll,
.unlocked_ioctl = switchtec_dev_ioctl,
- .compat_ioctl = switchtec_dev_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
static void link_event_work(struct work_struct *work)
diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index 7b26b6ccf1a0..dded9cef42f4 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -889,7 +889,7 @@ static const struct file_operations wmi_fops = {
.read = wmi_char_read,
.open = wmi_char_open,
.unlocked_ioctl = wmi_ioctl,
- .compat_ioctl = wmi_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
static int wmi_dev_probe(struct device *dev)
diff --git a/drivers/rpmsg/rpmsg_char.c b/drivers/rpmsg/rpmsg_char.c
index eea5ebbb5119..507bfe163883 100644
--- a/drivers/rpmsg/rpmsg_char.c
+++ b/drivers/rpmsg/rpmsg_char.c
@@ -290,7 +290,7 @@ static const struct file_operations rpmsg_eptdev_fops = {
.write_iter = rpmsg_eptdev_write_iter,
.poll = rpmsg_eptdev_poll,
.unlocked_ioctl = rpmsg_eptdev_ioctl,
- .compat_ioctl = rpmsg_eptdev_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
static ssize_t name_show(struct device *dev, struct device_attribute *attr,
@@ -451,7 +451,7 @@ static const struct file_operations rpmsg_ctrldev_fops = {
.open = rpmsg_ctrldev_open,
.release = rpmsg_ctrldev_release,
.unlocked_ioctl = rpmsg_ctrldev_ioctl,
- .compat_ioctl = rpmsg_ctrldev_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
static void rpmsg_ctrldev_release_device(struct device *dev)
diff --git a/drivers/sbus/char/display7seg.c b/drivers/sbus/char/display7seg.c
index a36e4cf1841d..c9f60656f54d 100644
--- a/drivers/sbus/char/display7seg.c
+++ b/drivers/sbus/char/display7seg.c
@@ -155,7 +155,7 @@ static long d7s_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
static const struct file_operations d7s_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = d7s_ioctl,
- .compat_ioctl = d7s_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.open = d7s_open,
.release = d7s_release,
.llseek = noop_llseek,
diff --git a/drivers/sbus/char/envctrl.c b/drivers/sbus/char/envctrl.c
index 1a6e7224017c..dd2dfa85fc68 100644
--- a/drivers/sbus/char/envctrl.c
+++ b/drivers/sbus/char/envctrl.c
@@ -714,9 +714,7 @@ static const struct file_operations envctrl_fops = {
.owner = THIS_MODULE,
.read = envctrl_read,
.unlocked_ioctl = envctrl_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = envctrl_ioctl,
-#endif
+ .compat_ioctl = compat_ptr_ioctl,
.open = envctrl_open,
.release = envctrl_release,
.llseek = noop_llseek,
diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c
index 2b1e0d503020..fb6444d0409c 100644
--- a/drivers/scsi/3w-xxxx.c
+++ b/drivers/scsi/3w-xxxx.c
@@ -1049,9 +1049,7 @@ static int tw_chrdev_open(struct inode *inode, struct file *file)
static const struct file_operations tw_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = tw_chrdev_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = tw_chrdev_ioctl,
-#endif
+ .compat_ioctl = compat_ptr_ioctl,
.open = tw_chrdev_open,
.release = NULL,
.llseek = noop_llseek,
diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
index 7096810fd222..e13d5de1d76e 100644
--- a/drivers/scsi/cxlflash/main.c
+++ b/drivers/scsi/cxlflash/main.c
@@ -3589,7 +3589,7 @@ static const struct file_operations cxlflash_chr_fops = {
.owner = THIS_MODULE,
.open = cxlflash_chr_open,
.unlocked_ioctl = cxlflash_chr_ioctl,
- .compat_ioctl = cxlflash_chr_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
/**
diff --git a/drivers/scsi/esas2r/esas2r_main.c b/drivers/scsi/esas2r/esas2r_main.c
index fdbda5c05aa0..80c5a235d193 100644
--- a/drivers/scsi/esas2r/esas2r_main.c
+++ b/drivers/scsi/esas2r/esas2r_main.c
@@ -613,7 +613,7 @@ static int __init esas2r_init(void)
/* Handle ioctl calls to "/proc/scsi/esas2r/ATTOnode" */
static const struct file_operations esas2r_proc_fops = {
- .compat_ioctl = esas2r_proc_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.unlocked_ioctl = esas2r_proc_ioctl,
};
diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
index e338d7a4f571..c0a1a1218c56 100644
--- a/drivers/scsi/pmcraid.c
+++ b/drivers/scsi/pmcraid.c
@@ -3988,9 +3988,7 @@ static const struct file_operations pmcraid_fops = {
.open = pmcraid_chr_open,
.fasync = pmcraid_chr_fasync,
.unlocked_ioctl = pmcraid_chr_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = pmcraid_chr_ioctl,
-#endif
+ .compat_ioctl = compat_ptr_ioctl,
.llseek = noop_llseek,
};
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index 92c2914239e3..1663c163edca 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -567,9 +567,7 @@ static long ion_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
static const struct file_operations ion_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = ion_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = ion_ioctl,
-#endif
+ .compat_ioctl = compat_ptr_ioctl,
};
static int debug_shrink_set(void *data, u64 val)
diff --git a/drivers/staging/vme/devices/vme_user.c b/drivers/staging/vme/devices/vme_user.c
index 6a33aaa1a49f..fd0ea4dbcb91 100644
--- a/drivers/staging/vme/devices/vme_user.c
+++ b/drivers/staging/vme/devices/vme_user.c
@@ -494,7 +494,7 @@ static const struct file_operations vme_user_fops = {
.write = vme_user_write,
.llseek = vme_user_llseek,
.unlocked_ioctl = vme_user_unlocked_ioctl,
- .compat_ioctl = vme_user_unlocked_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.mmap = vme_user_mmap,
};
diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c
index 17c64fccbb10..eb97acf09868 100644
--- a/drivers/tee/tee_core.c
+++ b/drivers/tee/tee_core.c
@@ -684,7 +684,7 @@ static const struct file_operations tee_fops = {
.open = tee_open,
.release = tee_release,
.unlocked_ioctl = tee_ioctl,
- .compat_ioctl = tee_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
static void tee_release_device(struct device *dev)
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index 9e9caff905d5..d48c032580d0 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -724,7 +724,7 @@ static const struct file_operations wdm_fops = {
.release = wdm_release,
.poll = wdm_poll,
.unlocked_ioctl = wdm_ioctl,
- .compat_ioctl = wdm_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.llseek = noop_llseek,
};
diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index 4942122b2346..bbd0308b13f5 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -2220,9 +2220,7 @@ static const struct file_operations fops = {
.release = usbtmc_release,
.flush = usbtmc_flush,
.unlocked_ioctl = usbtmc_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = usbtmc_ioctl,
-#endif
+ .compat_ioctl = compat_ptr_ioctl,
.fasync = usbtmc_fasync,
.poll = usbtmc_poll,
.llseek = default_llseek,
diff --git a/drivers/virt/fsl_hypervisor.c b/drivers/virt/fsl_hypervisor.c
index 8ba726e600e9..fbf02bf60f62 100644
--- a/drivers/virt/fsl_hypervisor.c
+++ b/drivers/virt/fsl_hypervisor.c
@@ -703,7 +703,7 @@ static const struct file_operations fsl_hv_fops = {
.poll = fsl_hv_poll,
.read = fsl_hv_read,
.unlocked_ioctl = fsl_hv_ioctl,
- .compat_ioctl = fsl_hv_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
static struct miscdevice fsl_hv_misc_dev = {
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 120e4340792a..162ea4b6b417 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2307,7 +2307,7 @@ static const struct super_operations btrfs_super_ops = {
static const struct file_operations btrfs_ctl_fops = {
.open = btrfs_control_open,
.unlocked_ioctl = btrfs_control_ioctl,
- .compat_ioctl = btrfs_control_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.owner = THIS_MODULE,
.llseek = noop_llseek,
};
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 7c060cb22aa3..a493b957713f 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1785,7 +1785,7 @@ const struct file_operations ceph_dir_fops = {
.open = ceph_open,
.release = ceph_release,
.unlocked_ioctl = ceph_ioctl,
- .compat_ioctl = ceph_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.fsync = ceph_fsync,
.lock = ceph_lock,
.flock = ceph_flock,
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 9f53c3d99304..9b5fe7eee3c1 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -2112,7 +2112,7 @@ const struct file_operations ceph_file_fops = {
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.unlocked_ioctl = ceph_ioctl,
- .compat_ioctl = ceph_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.fallocate = ceph_fallocate,
.copy_file_range = ceph_copy_file_range,
};
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 9971a35cf1ef..dcdb26068b71 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -2354,7 +2354,7 @@ const struct file_operations fuse_dev_operations = {
.release = fuse_dev_release,
.fasync = fuse_dev_fasync,
.unlocked_ioctl = fuse_dev_ioctl,
- .compat_ioctl = fuse_dev_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
EXPORT_SYMBOL_GPL(fuse_dev_operations);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index a90bb19dcfa2..a55aa029a308 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -523,7 +523,7 @@ static const struct file_operations fanotify_fops = {
.fasync = NULL,
.release = fanotify_release,
.unlocked_ioctl = fanotify_ioctl,
- .compat_ioctl = fanotify_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.llseek = noop_llseek,
};
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 89800fc7dc9d..f93dcf8c996f 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1901,7 +1901,7 @@ static const struct file_operations userfaultfd_fops = {
.poll = userfaultfd_poll,
.read = userfaultfd_read,
.unlocked_ioctl = userfaultfd_ioctl,
- .compat_ioctl = userfaultfd_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.llseek = noop_llseek,
};
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index abca57040f37..3b2f6ea44397 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -1323,7 +1323,7 @@ static const struct file_operations rfkill_fops = {
.release = rfkill_fop_release,
#ifdef CONFIG_RFKILL_INPUT
.unlocked_ioctl = rfkill_fop_ioctl,
- .compat_ioctl = rfkill_fop_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
#endif
.llseek = no_llseek,
};
--
2.20.0
3 years, 2 months
[PATCH] arm64: configurable sparsemem section size
by Pavel Tatashin
sparsemem section size determines the maximum size and alignment that
is allowed to offline/online memory block. The bigger the size the less
the clutter in /sys/devices/system/memory/*. On the other hand, however,
there is less flexability in what granules of memory can be added and
removed.
Recently, it was enabled in Linux to hotadd persistent memory that
can be either real NV device, or reserved from regular System RAM
and has identity of devdax.
The problem is that because ARM64's section size is 1G, and devdax must
have 2M label section, the first 1G is always missed when device is
attached, because it is not 1G aligned.
Allow, better flexibility by making section size configurable.
Signed-off-by: Pavel Tatashin <pasha.tatashin(a)soleen.com>
---
arch/arm64/Kconfig | 10 ++++++++++
arch/arm64/include/asm/sparsemem.h | 2 +-
2 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b5d8cf57e220..a0c5b9d13a7f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -801,6 +801,16 @@ config ARM64_PA_BITS
default 48 if ARM64_PA_BITS_48
default 52 if ARM64_PA_BITS_52
+config ARM64_SECTION_SIZE_BITS
+ int "sparsemem section size shift"
+ range 27 30
+ default "30"
+ depends on SPARSEMEM
+ help
+ Specify section size in bits. Section size determines the hotplug
+ hotremove granularity. The current size can be determined from
+ /sys/devices/system/memory/block_size_bytes
+
config CPU_BIG_ENDIAN
bool "Build big-endian kernel"
help
diff --git a/arch/arm64/include/asm/sparsemem.h b/arch/arm64/include/asm/sparsemem.h
index b299929fe56c..810db34d7038 100644
--- a/arch/arm64/include/asm/sparsemem.h
+++ b/arch/arm64/include/asm/sparsemem.h
@@ -18,7 +18,7 @@
#ifdef CONFIG_SPARSEMEM
#define MAX_PHYSMEM_BITS CONFIG_ARM64_PA_BITS
-#define SECTION_SIZE_BITS 30
+#define SECTION_SIZE_BITS CONFIG_ARM64_SECTION_SIZE_BITS
#endif
#endif
--
2.21.0
3 years, 2 months
[PATCH v1 00/17] kunit: introduce KUnit, the Linux kernel unit testing framework
by Brendan Higgins
This patch set proposes KUnit, a lightweight unit testing and mocking
framework for the Linux kernel.
Unlike Autotest and kselftest, KUnit is a true unit testing framework;
it does not require installing the kernel on a test machine or in a VM
and does not require tests to be written in userspace running on a host
kernel. Additionally, KUnit is fast: From invocation to completion KUnit
can run several dozen tests in under a second. Currently, the entire
KUnit test suite for KUnit runs in under a second from the initial
invocation (build time excluded).
KUnit is heavily inspired by JUnit, Python's unittest.mock, and
Googletest/Googlemock for C++. KUnit provides facilities for defining
unit test cases, grouping related test cases into test suites, providing
common infrastructure for running tests, mocking, spying, and much more.
## What's so special about unit testing?
A unit test is supposed to test a single unit of code in isolation,
hence the name. There should be no dependencies outside the control of
the test; this means no external dependencies, which makes tests orders
of magnitudes faster. Likewise, since there are no external dependencies,
there are no hoops to jump through to run the tests. Additionally, this
makes unit tests deterministic: a failing unit test always indicates a
problem. Finally, because unit tests necessarily have finer granularity,
they are able to test all code paths easily solving the classic problem
of difficulty in exercising error handling code.
## Is KUnit trying to replace other testing frameworks for the kernel?
No. Most existing tests for the Linux kernel are end-to-end tests, which
have their place. A well tested system has lots of unit tests, a
reasonable number of integration tests, and some end-to-end tests. KUnit
is just trying to address the unit test space which is currently not
being addressed.
## More information on KUnit
There is a bunch of documentation near the end of this patch set that
describes how to use KUnit and best practices for writing unit tests.
For convenience I am hosting the compiled docs here:
https://google.github.io/kunit-docs/third_party/kernel/docs/
Additionally for convenience, I have applied these patches to a branch:
https://kunit.googlesource.com/linux/+/kunit/rfc/v5.1-rc2/v1
The repo may be cloned with:
git clone https://kunit.googlesource.com/linux
This patchset is on the kunit/rfc/v5.1-rc2/v1 branch.
## Changes Since Last Version
Last version was RFC v4. It seemed we were pretty much done with the RFC
phase, so I started the numbering over again. Sorry if anyone finds that
confusing.
- Reduced usage of object oriented style of member functions as
suggested by Frank.
- Did a bunch of heavy clean up of the kunit_abort stuff as suggested
by Frank and Stephen:
- Biggest change was to reduce the usage of direct calls of member
functions.
- Added a better explanation of what abort is for and further
explained the rationale for KUNIT_ASSERT_* vs. KUNIT_EXPECT_*
- Dropped BUG() usage
- Also moved try_catch interface to a new file since it seemed
obscured by being mixed in with the code that used it.
- Fixed some other minor issues pointed out by Stephen.
- Updated email address of one of the contributors.
- Dropped DT unittest port since it seemed like there was a lot more
discussion to be had: it wasn't ready to leave the RFC phase.
Instead, I added a KUnit test written by Iurii for PROC SYSCTL that
was requested by Luis some time ago.
For reference, RFC v4 can be found here:
https://lkml.org/lkml/2019/2/14/1144
--
2.21.0.392.gf8f6787159e-goog
3 years, 2 months
[v3 0/2] "Hotremove" persistent memory
by Pavel Tatashin
Changelog:
v3
- Addressed comments from David Hildenbrand. Don't release
lock_device_hotplug after checking memory status, and rename
memblock_offlined_cb() to check_memblock_offlined_cb()
v2
- Dan Williams mentioned that drv->remove() return is ignored
by unbind. Unbind always succeeds. Because we cannot guarantee
that memory can be offlined from the driver, don't even
attempt to do so. Simply check that every section is offlined
beforehand and only then proceed with removing dax memory.
---
Recently, adding a persistent memory to be used like a regular RAM was
added to Linux. This work extends this functionality to also allow hot
removing persistent memory.
We (Microsoft) have an important use case for this functionality.
The requirement is for physical machines with small amount of RAM (~8G)
to be able to reboot in a very short period of time (<1s). Yet, there is
a userland state that is expensive to recreate (~2G).
The solution is to boot machines with 2G preserved for persistent
memory.
Copy the state, and hotadd the persistent memory so machine still has
all 8G available for runtime. Before reboot, offline and hotremove
device-dax 2G, copy the memory that is needed to be preserved to pmem0
device, and reboot.
The series of operations look like this:
1. After boot restore /dev/pmem0 to ramdisk to be consumed by apps.
and free ramdisk.
2. Convert raw pmem0 to devdax
ndctl create-namespace --mode devdax --map mem -e namespace0.0 -f
3. Hotadd to System RAM
echo dax0.0 > /sys/bus/dax/drivers/device_dax/unbind
echo dax0.0 > /sys/bus/dax/drivers/kmem/new_id
echo online_movable > /sys/devices/system/memoryXXX/state
4. Before reboot hotremove device-dax memory from System RAM
echo offline > /sys/devices/system/memoryXXX/state
echo dax0.0 > /sys/bus/dax/drivers/kmem/unbind
5. Create raw pmem0 device
ndctl create-namespace --mode raw -e namespace0.0 -f
6. Copy the state that was stored by apps to ramdisk to pmem device
7. Do kexec reboot or reboot through firmware if firmware does not
zero memory in pmem0 region (These machines have only regular
volatile memory). So to have pmem0 device either memmap kernel
parameter is used, or devices nodes in dtb are specified.
Pavel Tatashin (2):
device-dax: fix memory and resource leak if hotplug fails
device-dax: "Hotremove" persistent memory that is used like normal RAM
drivers/dax/dax-private.h | 2 +
drivers/dax/kmem.c | 99 +++++++++++++++++++++++++++++++++++++--
2 files changed, 96 insertions(+), 5 deletions(-)
--
2.21.0
3 years, 2 months
[v2 0/2] "Hotremove" persistent memory
by Pavel Tatashin
Changelog:
v2
- Dan Williams mentioned that drv->remove() return is ignored
by unbind. Unbind always succeeds. Because we cannot guarantee
that memory can be offlined from the driver, don't even
attempt to do so. Simply check that every section is offlined
beforehand and only then proceed with removing dax memory.
---
Recently, adding a persistent memory to be used like a regular RAM was
added to Linux. This work extends this functionality to also allow hot
removing persistent memory.
We (Microsoft) have an important use case for this functionality.
The requirement is for physical machines with small amount of RAM (~8G)
to be able to reboot in a very short period of time (<1s). Yet, there is
a userland state that is expensive to recreate (~2G).
The solution is to boot machines with 2G preserved for persistent
memory.
Copy the state, and hotadd the persistent memory so machine still has
all 8G available for runtime. Before reboot, offline and hotremove
device-dax 2G, copy the memory that is needed to be preserved to pmem0
device, and reboot.
The series of operations look like this:
1. After boot restore /dev/pmem0 to ramdisk to be consumed by apps.
and free ramdisk.
2. Convert raw pmem0 to devdax
ndctl create-namespace --mode devdax --map mem -e namespace0.0 -f
3. Hotadd to System RAM
echo dax0.0 > /sys/bus/dax/drivers/device_dax/unbind
echo dax0.0 > /sys/bus/dax/drivers/kmem/new_id
echo online_movable > /sys/devices/system/memoryXXX/state
4. Before reboot hotremove device-dax memory from System RAM
echo offline > /sys/devices/system/memoryXXX/state
echo dax0.0 > /sys/bus/dax/drivers/kmem/unbind
5. Create raw pmem0 device
ndctl create-namespace --mode raw -e namespace0.0 -f
6. Copy the state that was stored by apps to ramdisk to pmem device
7. Do kexec reboot or reboot through firmware if firmware does not
zero memory in pmem0 region (These machines have only regular
volatile memory). So to have pmem0 device either memmap kernel
parameter is used, or devices nodes in dtb are specified.
Pavel Tatashin (2):
device-dax: fix memory and resource leak if hotplug fails
device-dax: "Hotremove" persistent memory that is used like normal RAM
drivers/dax/dax-private.h | 2 +
drivers/dax/kmem.c | 96 +++++++++++++++++++++++++++++++++++++--
2 files changed, 93 insertions(+), 5 deletions(-)
--
2.21.0
3 years, 2 months
[PATCH v6 0/6] virtio pmem driver
by Pankaj Gupta
This patch series has implementation for "virtio pmem".
"virtio pmem" is fake persistent memory(nvdimm) in guest
which allows to bypass the guest page cache. This also
implements a VIRTIO based asynchronous flush mechanism.
Sharing guest kernel driver in this patchset with the
changes suggested in v4. Tested with Qemu side device
emulation [6] for virtio-pmem. Documented the impact of
possible page cache side channel attacks with suggested
countermeasures.
Incorporated all the review suggestions in V5.
Details of project idea for 'virtio pmem' flushing interface
is shared [3] & [4].
Implementation is divided into two parts:
New virtio pmem guest driver and qemu code changes for new
virtio pmem paravirtualized device.
1. Guest virtio-pmem kernel driver
---------------------------------
- Reads persistent memory range from paravirt device and
registers with 'nvdimm_bus'.
- 'nvdimm/pmem' driver uses this information to allocate
persistent memory region and setup filesystem operations
to the allocated memory.
- virtio pmem driver implements asynchronous flushing
interface to flush from guest to host.
2. Qemu virtio-pmem device
---------------------------------
- Creates virtio pmem device and exposes a memory range to
KVM guest.
- At host side this is file backed memory which acts as
persistent memory.
- Qemu side flush uses aio thread pool API's and virtio
for asynchronous guest multi request handling.
David Hildenbrand CCed also posted a modified version[7] of
qemu virtio-pmem code based on updated Qemu memory device API.
Virtio-pmem security implications and countermeasures:
-----------------------------------------------------
In previous posting of kernel driver, there was discussion [9]
on possible implications of page cache side channel attacks with
virtio pmem. After thorough analysis of details of known side
channel attacks, below are the suggestions:
- Depends entirely on how host backing image file is mapped
into guest address space.
- virtio-pmem device emulation, by default shared mapping is used
to map host backing file. It is recommended to use separate
backing file at host side for every guest. This will prevent
any possibility of executing common code from multiple guests
and any chance of inferring guest local data based based on
execution time.
- If backing file is required to be shared among multiple guests
it is recommended to don't support host page cache eviction
commands from the guest driver. This will avoid any possibility
of inferring guest local data or host data from another guest.
- Proposed device specification [8] for virtio-pmem device with
details of possible security implications and suggested
countermeasures for device emulation.
Virtio-pmem errors handling:
----------------------------------------
Checked behaviour of virtio-pmem for below types of errors
Need suggestions on expected behaviour for handling these errors?
- Hardware Errors: Uncorrectable recoverable Errors:
a] virtio-pmem:
- As per current logic if error page belongs to Qemu process,
host MCE handler isolates(hwpoison) that page and send SIGBUS.
Qemu SIGBUS handler injects exception to KVM guest.
- KVM guest then isolates the page and send SIGBUS to guest
userspace process which has mapped the page.
b] Existing implementation for ACPI pmem driver:
- Handles such errors with MCE notifier and creates a list
of bad blocks. Read/direct access DAX operation return EIO
if accessed memory page fall in bad block list.
- It also starts backgound scrubbing.
- Similar functionality can be reused in virtio-pmem with MCE
notifier but without scrubbing(no ACPI/ARS)? Need inputs to
confirm if this behaviour is ok or needs any change?
Changes from PATCH v5: [1]
Changes suggested in by - [Cornelia, Yuval]
- Remove assignment chaining in virtio driver
- Better error message and remove not required free
- Check nd_region before use
Changes suggested by - [Jan Kara]
- dax_synchronous() for !CONFIG_DAX
- Correct 'daxdev_mapping_supported' comment and non-dax implementation
Changes suggested by - [Dan Williams]
- Pass meaningful flag 'DAXDEV_F_SYNC' to alloc_dax
- Gate nvdimm_flush instead of additional async parameter
- Move block chaining logic to flush callback than common nvdimm_flush
- Use NULL flush callback for generic flush for better readability [Dan, Jan]
- Use virtio device id 27 from 25(already used) - [MST]
Changes from PATCH v4: [2]
- Factor out MAP_SYNC supported functionality to a common helper
[Dave, Darrick, Jan]
- Comment, indentation and virtqueue_kick failure handle - Yuval Shaia
Changes from PATCH v3:
- Use generic dax_synchronous() helper to check for DAXDEV_SYNC
flag - [Dan, Darrick, Jan]
- Add 'is_nvdimm_async' function
- Document page cache side channel attacks implications &
countermeasures - [Dave Chinner, Michael]
Changes from PATCH v2:
- Disable MAP_SYNC for ext4 & XFS filesystems - [Dan]
- Use name 'virtio pmem' in place of 'fake dax'
Changes from PATCH v1:
- 0-day build test for build dependency on libnvdimm
Changes suggested by - [Dan Williams]
- Split the driver into two parts virtio & pmem
- Move queuing of async block request to block layer
- Add "sync" parameter in nvdimm_flush function
- Use indirect call for nvdimm_flush
- Don’t move declarations to common global header e.g nd.h
- nvdimm_flush() return 0 or -EIO if it fails
- Teach nsio_rw_bytes() that the flush can fail
- Rename nvdimm_flush() to generic_nvdimm_flush()
- Use 'nd_region->provider_data' for long dereferencing
- Remove virtio_pmem_freeze/restore functions
- Remove BSD license text with SPDX license text
- Add might_sleep() in virtio_pmem_flush - [Luiz]
- Make spin_lock_irqsave() narrow
Changes from RFC v3
- Rebase to latest upstream - Luiz
- Call ndregion->flush in place of nvdimm_flush- Luiz
- kmalloc return check - Luiz
- virtqueue full handling - Stefan
- Don't map entire virtio_pmem_req to device - Stefan
- request leak, correct sizeof req - Stefan
- Move declaration to virtio_pmem.c
Changes from RFC v2:
- Add flush function in the nd_region in place of switching
on a flag - Dan & Stefan
- Add flush completion function with proper locking and wait
for host side flush completion - Stefan & Dan
- Keep userspace API in uapi header file - Stefan, MST
- Use LE fields & New device id - MST
- Indentation & spacing suggestions - MST & Eric
- Remove extra header files & add licensing - Stefan
Changes from RFC v1:
- Reuse existing 'pmem' code for registering persistent
memory and other operations instead of creating an entirely
new block driver.
- Use VIRTIO driver to register memory information with
nvdimm_bus and create region_type accordingly.
- Call VIRTIO flush from existing pmem driver.
Pankaj Gupta (6):
libnvdimm: nd_region flush callback support
virtio-pmem: Add virtio-pmem guest driver
libnvdimm: add nd_region buffered dax_dev flag
dax: check synchronous mapping is supported
ext4: disable map_sync for virtio pmem
xfs: disable map_sync for virtio pmem
[1] https://lkml.org/lkml/2019/4/10/3
[2] https://lkml.org/lkml/2019/4/3/394
[3] https://www.spinics.net/lists/kvm/msg149761.html
[4] https://www.spinics.net/lists/kvm/msg153095.html
[5] https://lkml.org/lkml/2018/8/31/413
[6] https://marc.info/?l=linux-kernel&m=153572228719237&w=2
[7] https://marc.info/?l=qemu-devel&m=153555721901824&w=2
[8] https://lists.oasis-open.org/archives/virtio-dev/201903/msg00083.html
[9] https://lkml.org/lkml/2019/1/9/1191
drivers/acpi/nfit/core.c | 4 -
drivers/dax/bus.c | 2
drivers/dax/super.c | 13 +++-
drivers/md/dm.c | 3
drivers/nvdimm/claim.c | 6 +
drivers/nvdimm/nd.h | 1
drivers/nvdimm/pmem.c | 16 +++--
drivers/nvdimm/region_devs.c | 33 ++++++++++
drivers/nvdimm/virtio_pmem.c | 114 +++++++++++++++++++++++++++++++++++++
drivers/virtio/Kconfig | 10 +++
drivers/virtio/Makefile | 1
drivers/virtio/pmem.c | 118 +++++++++++++++++++++++++++++++++++++++
fs/ext4/file.c | 11 +--
fs/xfs/xfs_file.c | 10 +--
include/linux/dax.h | 25 +++++++-
include/linux/libnvdimm.h | 9 ++
include/linux/virtio_pmem.h | 60 +++++++++++++++++++
include/uapi/linux/virtio_ids.h | 1
include/uapi/linux/virtio_pmem.h | 10 +++
19 files changed, 420 insertions(+), 27 deletions(-)
3 years, 2 months
[4.19, 5.0 stable PATCH 1/4] nfit/ars: Remove ars_start_flags
by Dan Williams
commit 317a992ab9266b86b774b9f6b0f87eb4f59879a1 upstream.
The ars_start_flags property of 'struct acpi_nfit_desc' is no longer
used since ARS_REQ_SHORT and ARS_REQ_LONG were added.
Reviewed-by: Toshi Kani <toshi.kani(a)hpe.com>
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
---
drivers/acpi/nfit/core.c | 10 +++++-----
drivers/acpi/nfit/nfit.h | 1 -
2 files changed, 5 insertions(+), 6 deletions(-)
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index f75f8f870ce3..1a48c92eaed5 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -2660,11 +2660,11 @@ static int ars_continue(struct acpi_nfit_desc *acpi_desc)
struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
- memset(&ars_start, 0, sizeof(ars_start));
- ars_start.address = ars_status->restart_address;
- ars_start.length = ars_status->restart_length;
- ars_start.type = ars_status->type;
- ars_start.flags = acpi_desc->ars_start_flags;
+ ars_start = (struct nd_cmd_ars_start) {
+ .address = ars_status->restart_address,
+ .length = ars_status->restart_length,
+ .type = ars_status->type,
+ };
rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start,
sizeof(ars_start), &cmd_rc);
if (rc < 0)
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
index 33691aecfcee..871fb3de3b30 100644
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -223,7 +223,6 @@ struct acpi_nfit_desc {
struct list_head idts;
struct nvdimm_bus *nvdimm_bus;
struct device *dev;
- u8 ars_start_flags;
struct nd_cmd_ars_status *ars_status;
struct nfit_spa *scrub_spa;
struct delayed_work dwork;
3 years, 2 months
[PATCH v5 0/6] virtio pmem driver
by Pankaj Gupta
This patch series has implementation for "virtio pmem".
"virtio pmem" is fake persistent memory(nvdimm) in guest
which allows to bypass the guest page cache. This also
implements a VIRTIO based asynchronous flush mechanism.
Sharing guest kernel driver in this patchset with the
changes suggested in v4. Tested with Qemu side device
emulation [6] for virtio-pmem.
We have incorporated all the suggestions in V4. Documented
the impact of possible page cache side channel attacks with
suggested countermeasures.
Details of project idea for 'virtio pmem' flushing interface
is shared [3] & [4].
Implementation is divided into two parts:
New virtio pmem guest driver and qemu code changes for new
virtio pmem paravirtualized device.
1. Guest virtio-pmem kernel driver
---------------------------------
- Reads persistent memory range from paravirt device and
registers with 'nvdimm_bus'.
- 'nvdimm/pmem' driver uses this information to allocate
persistent memory region and setup filesystem operations
to the allocated memory.
- virtio pmem driver implements asynchronous flushing
interface to flush from guest to host.
2. Qemu virtio-pmem device
---------------------------------
- Creates virtio pmem device and exposes a memory range to
KVM guest.
- At host side this is file backed memory which acts as
persistent memory.
- Qemu side flush uses aio thread pool API's and virtio
for asynchronous guest multi request handling.
David Hildenbrand CCed also posted a modified version[7] of
qemu virtio-pmem code based on updated Qemu memory device API.
Virtio-pmem security implications and countermeasures:
-----------------------------------------------------
In previous posting of kernel driver, there was discussion [9]
on possible implications of page cache side channel attacks with
virtio pmem. After thorough analysis of details of known side
channel attacks, below are the suggestions:
- Depends entirely on how host backing image file is mapped
into guest address space.
- virtio-pmem device emulation, by default shared mapping is used
to map host backing file. It is recommended to use separate
backing file at host side for every guest. This will prevent
any possibility of executing common code from multiple guests
and any chance of inferring guest local data based based on
execution time.
- If backing file is required to be shared among multiple guests
it is recommended to don't support host page cache eviction
commands from the guest driver. This will avoid any possibility
of inferring guest local data or host data from another guest.
- Proposed device specification [8] for virtio-pmem device with
details of possible security implications and suggested
countermeasures for device emulation.
Virtio-pmem errors handling:
----------------------------------------
Checked behaviour of virtio-pmem for below types of errors
Need suggestions on expected behaviour for handling these errors?
- Hardware Errors: Uncorrectable recoverable Errors:
a] virtio-pmem:
- As per current logic if error page belongs to Qemu process,
host MCE handler isolates(hwpoison) that page and send SIGBUS.
Qemu SIGBUS handler injects exception to KVM guest.
- KVM guest then isolates the page and send SIGBUS to guest
userspace process which has mapped the page.
b] Existing implementation for ACPI pmem driver:
- Handles such errors with MCE notifier and creates a list
of bad blocks. Read/direct access DAX operation return EIO
if accessed memory page fall in bad block list.
- It also starts backgound scrubbing.
- Similar functionality can be reused in virtio-pmem with MCE
notifier but without scrubbing(no ACPI/ARS)? Need inputs to
confirm if this behaviour is ok or needs any change?
Changes from PATCH v4: [1]
- Factor out MAP_SYNC supported functionality to a common helper
[Dave, Darrick, Jan]
- Comment, indentation and virtqueue_kick failure handle - Yuval Shaia
Changes from PATCH v3: [2]
- Use generic dax_synchronous() helper to check for DAXDEV_SYNC
flag - [Dan, Darrick, Jan]
- Add 'is_nvdimm_async' function
- Document page cache side channel attacks implications &
countermeasures - [Dave Chinner, Michael]
Changes from PATCH v2:
- Disable MAP_SYNC for ext4 & XFS filesystems - [Dan]
- Use name 'virtio pmem' in place of 'fake dax'
Changes from PATCH v1:
- 0-day build test for build dependency on libnvdimm
Changes suggested by - [Dan Williams]
- Split the driver into two parts virtio & pmem
- Move queuing of async block request to block layer
- Add "sync" parameter in nvdimm_flush function
- Use indirect call for nvdimm_flush
- Don’t move declarations to common global header e.g nd.h
- nvdimm_flush() return 0 or -EIO if it fails
- Teach nsio_rw_bytes() that the flush can fail
- Rename nvdimm_flush() to generic_nvdimm_flush()
- Use 'nd_region->provider_data' for long dereferencing
- Remove virtio_pmem_freeze/restore functions
- Remove BSD license text with SPDX license text
- Add might_sleep() in virtio_pmem_flush - [Luiz]
- Make spin_lock_irqsave() narrow
Changes from RFC v3
- Rebase to latest upstream - Luiz
- Call ndregion->flush in place of nvdimm_flush- Luiz
- kmalloc return check - Luiz
- virtqueue full handling - Stefan
- Don't map entire virtio_pmem_req to device - Stefan
- request leak, correct sizeof req- Stefan
- Move declaration to virtio_pmem.c
Changes from RFC v2:
- Add flush function in the nd_region in place of switching
on a flag - Dan & Stefan
- Add flush completion function with proper locking and wait
for host side flush completion - Stefan & Dan
- Keep userspace API in uapi header file - Stefan, MST
- Use LE fields & New device id - MST
- Indentation & spacing suggestions - MST & Eric
- Remove extra header files & add licensing - Stefan
Changes from RFC v1:
- Reuse existing 'pmem' code for registering persistent
memory and other operations instead of creating an entirely
new block driver.
- Use VIRTIO driver to register memory information with
nvdimm_bus and create region_type accordingly.
- Call VIRTIO flush from existing pmem driver.
Pankaj Gupta (6):
libnvdimm: nd_region flush callback support
virtio-pmem: Add virtio-pmem guest driver
libnvdimm: add nd_region buffered dax_dev flag
dax: check synchronous mapping is supported
ext4: disable map_sync for virtio pmem
xfs: disable map_sync for virtio pmem
[1] https://lkml.org/lkml/2019/4/3/394
[2] https://lkml.org/lkml/2019/1/9/471
[3] https://www.spinics.net/lists/kvm/msg149761.html
[4] https://www.spinics.net/lists/kvm/msg153095.html
[5] https://lkml.org/lkml/2018/8/31/413
[6] https://marc.info/?l=linux-kernel&m=153572228719237&w=2
[7] https://marc.info/?l=qemu-devel&m=153555721901824&w=2
[8] https://lists.oasis-open.org/archives/virtio-dev/201903/msg00083.html
[9] https://lkml.org/lkml/2019/1/9/1191
drivers/acpi/nfit/core.c | 4 -
drivers/dax/bus.c | 2
drivers/dax/super.c | 13 +++-
drivers/md/dm.c | 2
drivers/nvdimm/claim.c | 6 +
drivers/nvdimm/nd.h | 1
drivers/nvdimm/pmem.c | 17 +++--
drivers/nvdimm/region_devs.c | 45 +++++++++++++-
drivers/nvdimm/virtio_pmem.c | 88 +++++++++++++++++++++++++++
drivers/virtio/Kconfig | 10 +++
drivers/virtio/Makefile | 1
drivers/virtio/pmem.c | 124 +++++++++++++++++++++++++++++++++++++++
fs/ext4/file.c | 11 +--
fs/xfs/xfs_file.c | 10 +--
include/linux/dax.h | 32 +++++++++-
include/linux/libnvdimm.h | 9 ++
include/linux/virtio_pmem.h | 60 ++++++++++++++++++
include/uapi/linux/virtio_ids.h | 1
include/uapi/linux/virtio_pmem.h | 10 +++
19 files changed, 419 insertions(+), 27 deletions(-)
3 years, 2 months