[PATCH] ndctl, check: Ensure mmap of BTT sections work with 64K page-size
by Vaibhav Jain
Presently on PPC64 which uses a 64K page-size, ndtl-check command
fails on a BTT device with following error:
$sudo ndctl check-namespace namespace0.0 -vv
namespace0.0: namespace_check: checking namespace0.0
namespace0.0: btt_discover_arenas: found 1 BTT arena
namespace0.0: btt_create_mappings: mmap arena[0].info [sz = 0x1000, off = 0x1000] failed: Invalid argument
error checking namespaces: Invalid argument
checked 0 namespaces
Error happens when btt_create_mappings() tries to mmap the sections of
the BTT device which are usually 4K offset aligned. However the mmap()
syscall expects the 'offset' argument to be in multiples of page-size,
hence it returns EINVAL causing the btt_create_mappings() to error
out.
As a fix for the issue this patch proposes addition of two new
functions to 'check.c' namely btt_mmap/btt_unmap that can be used to
map/unmap parts of BTT device to ndctl process address-space. The
functions tweaks the requested 'offset' argument to mmap() ensuring
that its page-size aligned and then fix-ups the returned pointer such
that it points to the requested offset within m-mapped region.
Reported-by: harish(a)linux.ibm.com
Signed-off-by: Vaibhav Jain <vaibhav(a)linux.ibm.com>
---
ndctl/check.c | 71 ++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 56 insertions(+), 15 deletions(-)
diff --git a/ndctl/check.c b/ndctl/check.c
index 8a7125053865..18d259048616 100644
--- a/ndctl/check.c
+++ b/ndctl/check.c
@@ -907,6 +907,47 @@ static int btt_discover_arenas(struct btt_chk *bttc)
return ret;
}
+/* Mmap requested btt region so it works with non 4-K page sizes */
+static void *btt_mmap(struct btt_chk *bttc, void *addr, size_t length,
+ int prot, int flags, off_t offset)
+{
+ off_t shift;
+
+ /* Calculate the shift back needed to make offset page aligned */
+ shift = offset - rounddown(offset, bttc->sys_page_size);
+
+ /* Update the offset and length with the shift calculated above */
+ offset -= shift;
+ length += shift;
+
+ addr = mmap(addr, length, prot, flags, bttc->fd, offset);
+
+ /* If needed fixup the return pointer to correct offset request */
+ if (addr != MAP_FAILED)
+ addr = (void *) ((uintptr_t)addr + shift);
+
+ dbg(bttc, "mmap: addr=%p length=0x%lx offset=0x%lx shift=0x%lx\n",
+ addr, length, offset, shift);
+
+ return addr;
+}
+
+static void btt_unmap(struct btt_chk *bttc, void *ptr, size_t length)
+{
+ uintptr_t addr = ptr;
+ off_t shift;
+
+ /* Calculate the shift back needed to make offset page aligned */
+ shift = addr - rounddown(addr, bttc->sys_page_size);
+
+ addr -= shift;
+ length += shift;
+
+ munmap((void *)addr, length);
+ dbg(bttc, "unmap: addr=%p length=0x%lx shift=0x%lx\n",
+ addr, length, shift);
+}
+
static int btt_create_mappings(struct btt_chk *bttc)
{
struct arena_info *a;
@@ -921,8 +962,8 @@ static int btt_create_mappings(struct btt_chk *bttc)
for (i = 0; i < bttc->num_arenas; i++) {
a = &bttc->arena[i];
a->map.info_len = BTT_INFO_SIZE;
- a->map.info = mmap(NULL, a->map.info_len, mmap_flags,
- MAP_SHARED, bttc->fd, a->infooff);
+ a->map.info = btt_mmap(bttc, NULL, a->map.info_len, mmap_flags,
+ MAP_SHARED, a->infooff);
if (a->map.info == MAP_FAILED) {
err(bttc, "mmap arena[%d].info [sz = %#lx, off = %#lx] failed: %s\n",
i, a->map.info_len, a->infooff, strerror(errno));
@@ -930,8 +971,8 @@ static int btt_create_mappings(struct btt_chk *bttc)
}
a->map.data_len = a->mapoff - a->dataoff;
- a->map.data = mmap(NULL, a->map.data_len, mmap_flags,
- MAP_SHARED, bttc->fd, a->dataoff);
+ a->map.data = btt_mmap(bttc, NULL, a->map.data_len, mmap_flags,
+ MAP_SHARED, a->dataoff);
if (a->map.data == MAP_FAILED) {
err(bttc, "mmap arena[%d].data [sz = %#lx, off = %#lx] failed: %s\n",
i, a->map.data_len, a->dataoff, strerror(errno));
@@ -939,8 +980,8 @@ static int btt_create_mappings(struct btt_chk *bttc)
}
a->map.map_len = a->logoff - a->mapoff;
- a->map.map = mmap(NULL, a->map.map_len, mmap_flags,
- MAP_SHARED, bttc->fd, a->mapoff);
+ a->map.map = btt_mmap(bttc, NULL, a->map.map_len, mmap_flags,
+ MAP_SHARED, a->mapoff);
if (a->map.map == MAP_FAILED) {
err(bttc, "mmap arena[%d].map [sz = %#lx, off = %#lx] failed: %s\n",
i, a->map.map_len, a->mapoff, strerror(errno));
@@ -948,8 +989,8 @@ static int btt_create_mappings(struct btt_chk *bttc)
}
a->map.log_len = a->info2off - a->logoff;
- a->map.log = mmap(NULL, a->map.log_len, mmap_flags,
- MAP_SHARED, bttc->fd, a->logoff);
+ a->map.log = btt_mmap(bttc, NULL, a->map.log_len, mmap_flags,
+ MAP_SHARED, a->logoff);
if (a->map.log == MAP_FAILED) {
err(bttc, "mmap arena[%d].log [sz = %#lx, off = %#lx] failed: %s\n",
i, a->map.log_len, a->logoff, strerror(errno));
@@ -957,8 +998,8 @@ static int btt_create_mappings(struct btt_chk *bttc)
}
a->map.info2_len = BTT_INFO_SIZE;
- a->map.info2 = mmap(NULL, a->map.info2_len, mmap_flags,
- MAP_SHARED, bttc->fd, a->info2off);
+ a->map.info2 = btt_mmap(bttc, NULL, a->map.info2_len,
+ mmap_flags, MAP_SHARED, a->info2off);
if (a->map.info2 == MAP_FAILED) {
err(bttc, "mmap arena[%d].info2 [sz = %#lx, off = %#lx] failed: %s\n",
i, a->map.info2_len, a->info2off, strerror(errno));
@@ -977,15 +1018,15 @@ static void btt_remove_mappings(struct btt_chk *bttc)
for (i = 0; i < bttc->num_arenas; i++) {
a = &bttc->arena[i];
if (a->map.info)
- munmap(a->map.info, a->map.info_len);
+ btt_unmap(bttc, a->map.info, a->map.info_len);
if (a->map.data)
- munmap(a->map.data, a->map.data_len);
+ btt_unmap(bttc, a->map.data, a->map.data_len);
if (a->map.map)
- munmap(a->map.map, a->map.map_len);
+ btt_unmap(bttc, a->map.map, a->map.map_len);
if (a->map.log)
- munmap(a->map.log, a->map.log_len);
+ btt_unmap(bttc, a->map.log, a->map.log_len);
if (a->map.info2)
- munmap(a->map.info2, a->map.info2_len);
+ btt_unmap(bttc, a->map.info2, a->map.info2_len);
}
}
--
2.21.0
2 years, 10 months
[PATCH] memremap: move from kernel/ to mm/
by Christoph Hellwig
memremap.c implements MM functionality for ZONE_DEVICE, so it really
should be in the mm/ directory, not the kernel/ one.
Signed-off-by: Christoph Hellwig <hch(a)lst.de>
---
Sending for applying just after -rc1 preferably to avoid conflicts
later in the merge window
kernel/Makefile | 1 -
mm/Makefile | 1 +
{kernel => mm}/memremap.c | 0
3 files changed, 1 insertion(+), 1 deletion(-)
rename {kernel => mm}/memremap.c (100%)
diff --git a/kernel/Makefile b/kernel/Makefile
index a8d923b5481b..ef0d95a190b4 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -111,7 +111,6 @@ obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
obj-$(CONFIG_TORTURE_TEST) += torture.o
obj-$(CONFIG_HAS_IOMEM) += iomem.o
-obj-$(CONFIG_ZONE_DEVICE) += memremap.o
obj-$(CONFIG_RSEQ) += rseq.o
obj-$(CONFIG_GCC_PLUGIN_STACKLEAK) += stackleak.o
diff --git a/mm/Makefile b/mm/Makefile
index 338e528ad436..d0b295c3b764 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -102,5 +102,6 @@ obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o
obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o
+obj-$(CONFIG_ZONE_DEVICE) += memremap.o
obj-$(CONFIG_HMM_MIRROR) += hmm.o
obj-$(CONFIG_MEMFD_CREATE) += memfd.o
diff --git a/kernel/memremap.c b/mm/memremap.c
similarity index 100%
rename from kernel/memremap.c
rename to mm/memremap.c
--
2.20.1
2 years, 10 months
Re: [PATCH v11 4/7] mm, fs, dax: handle layout changes to pinned dax mappings
by Liu Bo
On Tue, Jul 30, 2019 at 8:58 PM Dan Williams <dan.j.williams(a)intel.com> wrote:
>
> On Tue, Jul 30, 2019 at 7:27 PM Liu Bo <obuil.liubo(a)gmail.com> wrote:
> >
> > Hi Dan,
> >
> >
> > (Sorry for replying in a very old thread.)
> >
> >
> > On Fri, May 18, 2018 at 6:45 PM Dan Williams <dan.j.williams(a)intel.com> wrote:
> > >
> > > Background:
> > >
> > > get_user_pages() in the filesystem pins file backed memory pages for
> > > access by devices performing dma. However, it only pins the memory pages
> > > not the page-to-file offset association. If a file is truncated the
> > > pages are mapped out of the file and dma may continue indefinitely into
> > > a page that is owned by a device driver. This breaks coherency of the
> > > file vs dma, but the assumption is that if userspace wants the
> > > file-space truncated it does not matter what data is inbound from the
> > > device, it is not relevant anymore. The only expectation is that dma can
> > > safely continue while the filesystem reallocates the block(s).
> > >
> > > Problem:
> > >
> > > This expectation that dma can safely continue while the filesystem
> > > changes the block map is broken by dax. With dax the target dma page
> > > *is* the filesystem block. The model of leaving the page pinned for dma,
> > > but truncating the file block out of the file, means that the filesytem
> > > is free to reallocate a block under active dma to another file and now
> > > the expected data-incoherency situation has turned into active
> > > data-corruption.
> > >
> > > Solution:
> > >
> > > Defer all filesystem operations (fallocate(), truncate()) on a dax mode
> > > file while any page/block in the file is under active dma. This solution
> > > assumes that dma is transient. Cases where dma operations are known to
> > > not be transient, like RDMA, have been explicitly disabled via
> > > commits like 5f1d43de5416 "IB/core: disable memory registration of
> > > filesystem-dax vmas".
> > >
> > > The dax_layout_busy_page() routine is called by filesystems with a lock
> > > held against mm faults (i_mmap_lock) to find pinned / busy dax pages.
> > > The process of looking up a busy page invalidates all mappings
> > > to trigger any subsequent get_user_pages() to block on i_mmap_lock.
> > > The filesystem continues to call dax_layout_busy_page() until it finally
> > > returns no more active pages. This approach assumes that the page
> > > pinning is transient, if that assumption is violated the system would
> > > have likely hung from the uncompleted I/O.
> > >
> > > Cc: Jeff Moyer <jmoyer(a)redhat.com>
> > > Cc: Dave Chinner <david(a)fromorbit.com>
> > > Cc: Matthew Wilcox <mawilcox(a)microsoft.com>
> > > Cc: Alexander Viro <viro(a)zeniv.linux.org.uk>
> > > Cc: "Darrick J. Wong" <darrick.wong(a)oracle.com>
> > > Cc: Ross Zwisler <ross.zwisler(a)linux.intel.com>
> > > Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
> > > Cc: Andrew Morton <akpm(a)linux-foundation.org>
> > > Reported-by: Christoph Hellwig <hch(a)lst.de>
> > > Reviewed-by: Christoph Hellwig <hch(a)lst.de>
> > > Reviewed-by: Jan Kara <jack(a)suse.cz>
> > > Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
> > > ---
> > > fs/dax.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++
> > > include/linux/dax.h | 7 ++++
> > > 2 files changed, 104 insertions(+)
> > >
> > > diff --git a/fs/dax.c b/fs/dax.c
> > > index aaec72ded1b6..e8f61ea690f7 100644
> > > --- a/fs/dax.c
> > > +++ b/fs/dax.c
> > > @@ -351,6 +351,19 @@ static void dax_disassociate_entry(void *entry, struct address_space *mapping,
> > > }
> > > }
> > >
> > > +static struct page *dax_busy_page(void *entry)
> > > +{
> > > + unsigned long pfn;
> > > +
> > > + for_each_mapped_pfn(entry, pfn) {
> > > + struct page *page = pfn_to_page(pfn);
> > > +
> > > + if (page_ref_count(page) > 1)
> > > + return page;
> > > + }
> > > + return NULL;
> > > +}
> > > +
> > > /*
> > > * Find radix tree entry at given index. If it points to an exceptional entry,
> > > * return it with the radix tree entry locked. If the radix tree doesn't
> > > @@ -492,6 +505,90 @@ static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index,
> > > return entry;
> > > }
> > >
> > > +/**
> > > + * dax_layout_busy_page - find first pinned page in @mapping
> > > + * @mapping: address space to scan for a page with ref count > 1
> > > + *
> > > + * DAX requires ZONE_DEVICE mapped pages. These pages are never
> > > + * 'onlined' to the page allocator so they are considered idle when
> > > + * page->count == 1. A filesystem uses this interface to determine if
> > > + * any page in the mapping is busy, i.e. for DMA, or other
> > > + * get_user_pages() usages.
> > > + *
> > > + * It is expected that the filesystem is holding locks to block the
> > > + * establishment of new mappings in this address_space. I.e. it expects
> > > + * to be able to run unmap_mapping_range() and subsequently not race
> > > + * mapping_mapped() becoming true.
> > > + */
> > > +struct page *dax_layout_busy_page(struct address_space *mapping)
> > > +{
> > > + pgoff_t indices[PAGEVEC_SIZE];
> > > + struct page *page = NULL;
> > > + struct pagevec pvec;
> > > + pgoff_t index, end;
> > > + unsigned i;
> > > +
> > > + /*
> > > + * In the 'limited' case get_user_pages() for dax is disabled.
> > > + */
> > > + if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
> > > + return NULL;
> > > +
> > > + if (!dax_mapping(mapping) || !mapping_mapped(mapping))
> > > + return NULL;
> > > +
> > > + pagevec_init(&pvec);
> > > + index = 0;
> > > + end = -1;
> > > +
> > > + /*
> > > + * If we race get_user_pages_fast() here either we'll see the
> > > + * elevated page count in the pagevec_lookup and wait, or
> > > + * get_user_pages_fast() will see that the page it took a reference
> > > + * against is no longer mapped in the page tables and bail to the
> > > + * get_user_pages() slow path. The slow path is protected by
> > > + * pte_lock() and pmd_lock(). New references are not taken without
> > > + * holding those locks, and unmap_mapping_range() will not zero the
> > > + * pte or pmd without holding the respective lock, so we are
> > > + * guaranteed to either see new references or prevent new
> > > + * references from being established.
> > > + */
> > > + unmap_mapping_range(mapping, 0, 0, 1);
> >
> > Why do we have to unmap the whole address space prior to check busy pages?
> > Can we have a variate of dax_layout_busy_page() to only unmap a sub
> > set of the whole address space?
> >
>
> This is due to the location in xfs where layouts are broken vs where
> the file range is mapped to physical blocks for the truncate
> operation. I ultimately decided the reworks needed for that
> optimization were large and that the relative performance gain was
> small. Do you have performance numbers to the contrary? Feel free to
> copy the linux-nvdimm list on future mails, no need for this to be a
> private discussion.
Thanks a lot for the prompt reply.
For virtiofs[1]'s dax mode, it also suffers the same race problem
between dax-DMA(mmap+directIO) and fs truncate/punch_hole, besides, it
maintains a kind of resource named dax mapping range for IO
operations, which is similar to the block concept in filesystem and
sometimes we need to reclaim some dax mapping ranges in background.
So it might end up the same race problem when this reclaim process and
dax-dma(mmap+directIO) run concurrently, however, since reclaim is not
a user-triggered operations as truncate, it might be triggered
frequently on the fly by virtiofs itself, now if that happened, mmap
workloads would be impacted significantly by the reclaim because of
reclaim unmapping the whole address space of inode.
As every dax mapping range is 2M for now, a ideal solution is to have
layout_checking unmap only that specific 2M range so that other areas
in mmap ranges are good to go.
[1]: https://virtio-fs.gitlab.io/
thanks,
liubo
2 years, 10 months
[ndctl PATCH v8 00/13] daxctl: add a new reconfigure-device command
by Vishal Verma
Changes in v8:
- rename the --attempt-offline option to --force (Dan)
- clarify the messages when device is already in the requested state (Dan)
- s/unable/failed/ in device.c error messages (Dan)
- daxctl_memory_{on,off}line() instead of daxctl_memory_set_{on,off}line (Dan)
- Add an interface to get a count of the memory sections associated with a
device (Dan)
- As a result, refactor the readdir loop into a common memory_op function that
can set the state, get the online state, and get a count of all blocks.
- Update the onlining/offlining routines used in both the reconfigure-device
and {on,off}line-memory commands to use the new daxctl_memory_num_sections()
interface to validate the number of sections for which we changed the state.
- Add some small clarifications in the daxctl-reconfigure-device man page (Dan)
- In device.c add a verify_dax_bus_model() helper to check for the dax-bus
subsystem (Dan).
Changes in v7:
- Fix a couple of checkaptch type errors in the new lines added in v6 (Dan).
- Get rid of daxctl_dev_get_mode. daxctl_dev_get_memory is sufficient to
both check the mode and allocate the memory related structures on its
first call. (Dan)
- Due to the above, daxctl_dev_mode is now private to libdaxctl, and not
part of the API exported through libdaxctl.h
- Add a large enough buffer at init time to construct dynamic paths, and avoid
asprintf() type allocations for memory blocks at runtime (Dan).
Changes in v6:
- For memory block online/offline operations, the kernel responds with
an EINVAL for both 'real' errors, and if the memory was already in the
requested state. Since there is a TOCTOU hole between checking the
state and storing it, just perform a second check if the store results
in an error. If the check shows the state to be the same as the one
we're attempting, it means that another agent (usually udev) won the
race, but we don't care so long as the state change happened, so don't
report an error. (Fan Du)
Changes in v5:
- device.c: correctly set loglevel for daxctl_ctx for --verbose
- drop the subsys caching, its complexity started to exceed its
benefit. dax-class device models will simply error out during
reconfigure. (Dan)
- Add a note to the man page for the above.
- Clarify the onlining policy (online_movable) in the man page
- rename "numa_node" to "target_node" in device listings (Dan)
- When printing a device 'mode', assume devdax if !system-ram,
avoiding a "mode: unknown" situation which can be confusing. (Dan)
- Add a "state: disabled" attribute to the device listing if a driver
is not bound. This is more apt than the previous "mode: unknown"
listing.
- add an api to get 'dev->resource' parsing /proc/iomem as a
fallback for when the kernel doesn't provide the attribute (Dan)
- convert node_* apis to 'memory_* apis that act on a new daxctl_memory
object (Dan)
- online only memory sections belonging to the device in question by
cross referencing block indices with the dax device resource (Dan)
- Refuse to reconfigure a device that is already in the target mode.
Until now, reconfiguring a system-ram device back to system-ram would
result in a 'online memory may not be hot-removed' kernel warning.
- If the device was already in the system-ram mode, skip
disabling/enabling, but still try to online the memory unless the
--no-online option is in effect.
- In daxctl_unbind, also 'remove_id' to prevent devices automatically
binding to the kmem driver on a disable + re-enable, which can be
surprising (Dan).
- Rewrite the top half of daxctl/device.c to borrow elements from
ndctl/namespace.c so that it can support growing additional commands
that operate on devices (online-memory and offline-memory)
- Refactor the bottom half of daxctl/device.c so we only do the
disabling/offlining steps if the device was enabled.
- Add new commands to online and offline memory sections (Dan)
associated with a given dax device (Dan)
- Add a new test - daxctl-device.sh - to test daxctl reconfigure-device,
online-memory, and offline-memory commands.
- Add an example in documentation demonstrating how to use numactl
to bind a process to a node surfaced from a dax device (Andy Rudoff)
Changes in v4:
- Don't fail add_dax_dev for kmod failures. Instead fail only when the kmod
list is actually used, i.e. during daxctl-reconfigure-device
Changes in v3:
- In daxctl_dev_get_mode(), remove the subsystem warning, detect dax-class
and simply make it return devdax
Changes in v2:
- Add examples to the documentation page (Dave Hansen)
- Clarify documentation regarding the conversion from system-ram to devdax
- Remove any references to a persistent config from the documentation -
those can be added when the feature is added.
- device.c: validate option compatibility
- daxctl-list: display numa_node for device listings
- daxctl-list: display mode for device listings
- make the options more consistent by adding a '-O' short option
for --attempt-offline
Add a new daxctl-reconfigure-device command that lets us reconfigure DAX
devices back and forth between 'system-ram' and 'device-dax' modes. It
also includes facilities to online any newly hot-plugged memory
(default), and attempt to offline memory before converting away from the
system-ram mode (not default, requires a --attempt-offline option).
Currently missing from this series is a way to persistently store which
devices have been 'marked' for use as system-ram. This depends on a
config system overhaul in ndctl, and patches for those will follow
separately and are independent of this work.
Example invocations:
1. Reconfigure dax0.0 to system-ram mode, don’t online the memory
# daxctl reconfigure-device --mode=system-ram --no-online dax0.0
[
{
"chardev":"dax0.0",
"size":16777216000,
"target_node":2,
"mode":"system-ram"
}
]
2. Reconfigure dax0.0 to devdax mode, attempt to offline the memory
# daxctl reconfigure-device --human --mode=devdax --attempt-offline dax0.0
{
"chardev":"dax0.0",
"size":"15.63 GiB (16.78 GB)",
"target_node":2,
"mode":"devdax"
}
3. Reconfigure all dax devices on region0 to system-ram mode
# daxctl reconfigure-device --mode=system-ram --region=0 all
[
{
"chardev":"dax0.0",
"size":16777216000,
"target_node":2,
"mode":"system-ram"
},
{
"chardev":"dax0.1",
"size":16777216000,
"target_node":3,
"mode":"system-ram"
}
]
These patches can also be found in the 'kmem-pending' branch on github:
https://github.com/pmem/ndctl/tree/kmem-pending
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: Pavel Tatashin <pasha.tatashin(a)soleen.com>
Vishal Verma (13):
libdaxctl: add interfaces to get ctx and check device state
libdaxctl: add interfaces to enable/disable devices
libdaxctl: add an interface to retrieve the device resource
libdaxctl: add a 'daxctl_memory' object for memory based operations
daxctl/list: add target_node for device listings
daxctl/list: display the mode for a dax device
daxctl: add a new reconfigure-device command
Documentation/daxctl: add a man page for daxctl-reconfigure-device
daxctl: add commands to online and offline memory
Documentation: Add man pages for daxctl-{on,off}line-memory
contrib/ndctl: fix region-id completions for daxctl
contrib/ndctl: add bash-completion for the new daxctl commands
test: Add a unit test for daxctl-reconfigure-device and friends
Documentation/daxctl/Makefile.am | 5 +-
.../daxctl/daxctl-offline-memory.txt | 72 ++
Documentation/daxctl/daxctl-online-memory.txt | 80 +++
.../daxctl/daxctl-reconfigure-device.txt | 156 ++++
Makefile.am | 3 +-
contrib/ndctl | 38 +-
daxctl/Makefile.am | 2 +
daxctl/builtin.h | 3 +
daxctl/daxctl.c | 3 +
daxctl/device.c | 591 ++++++++++++++++
daxctl/lib/Makefile.am | 5 +-
daxctl/lib/libdaxctl-private.h | 40 ++
daxctl/lib/libdaxctl.c | 664 ++++++++++++++++++
daxctl/lib/libdaxctl.sym | 19 +
daxctl/libdaxctl.h | 17 +
test/Makefile.am | 3 +-
test/common | 19 +-
test/daxctl-devices.sh | 81 +++
util/iomem.c | 37 +
util/iomem.h | 12 +
util/json.c | 22 +
21 files changed, 1858 insertions(+), 14 deletions(-)
create mode 100644 Documentation/daxctl/daxctl-offline-memory.txt
create mode 100644 Documentation/daxctl/daxctl-online-memory.txt
create mode 100644 Documentation/daxctl/daxctl-reconfigure-device.txt
create mode 100644 daxctl/device.c
create mode 100755 test/daxctl-devices.sh
create mode 100644 util/iomem.c
create mode 100644 util/iomem.h
--
2.20.1
2 years, 10 months
I am looking for a soul mate
by Beatrice
Russian bride agency
=> More> http://newukrdat.in.ua/
--
Best Regards,
Beatrice Arturovna
**********************************************************************
If you do not wish to receive further emails,
then please click here to List-Unsubscribe
**********************************************************************
| 52992 Santa Monica Blvd, Suite 52992 Los Angeles, CA 52992-52992, USA |
2 years, 10 months
[PATCH] dm: fix dax_dev NULL dereference
by Pankaj Gupta
'Murphy Zhou' reports[1] hitting the panic when running xfstests
generic/108 on pmem ramdisk. In his words:
This test is simulating partial disk error when calling fsync():
create a lvm vg which consists of 2 disks:
one scsi_debug disk; one other disk I specified, pmem ramdisk in this case.
create lv in this vg and write to it, make sure writing across 2 disks;
offline scsi_debug disk;
write again to allocated area;
expect fsync: IO error.
If one of the disks is pmem ramdisk, it reproduces every time on my setup,
on v5.3-rc2+.
The mount -o dax option is not required to reproduce this panic.
...
Fix this by returning false from 'device_synchronous' function when dax_dev
is NULL.
[ 1984.878208] BUG: kernel NULL pointer dereference, address: 00000000000002d0
[ 1984.882546] #PF: supervisor read access in kernel mode
[ 1984.885664] #PF: error_code(0x0000) - not-present page
[ 1984.888626] PGD 0 P4D 0
[ 1984.890140] Oops: 0000 [#1] SMP PTI
...
...
[ 1984.943682] Call Trace:
[ 1984.945007] device_synchronous+0xe/0x20 [dm_mod]
[ 1984.947328] stripe_iterate_devices+0x48/0x60 [dm_mod]
[ 1984.949947] ? dm_set_device_limits+0x130/0x130 [dm_mod]
[ 1984.952516] dm_table_supports_dax+0x39/0x90 [dm_mod]
[ 1984.954989] dm_table_set_restrictions+0x248/0x5d0 [dm_mod]
[ 1984.957685] dm_setup_md_queue+0x66/0x110 [dm_mod]
[ 1984.960280] table_load+0x1e3/0x390 [dm_mod]
[ 1984.962491] ? retrieve_status+0x1c0/0x1c0 [dm_mod]
[ 1984.964910] ctl_ioctl+0x1d3/0x550 [dm_mod]
[ 1984.967006] ? path_lookupat+0xf4/0x200
[ 1984.968890] dm_ctl_ioctl+0xa/0x10 [dm_mod]
[ 1984.970920] do_vfs_ioctl+0xa9/0x630
[ 1984.972701] ksys_ioctl+0x60/0x90
[ 1984.974335] __x64_sys_ioctl+0x16/0x20
[ 1984.976221] do_syscall_64+0x5b/0x1d0
[ 1984.978091] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[1] https://lore.kernel.org/linux-fsdevel/2011806368.5335560.1564469373050.Ja...
Fixes: 2e9ee0955d3c ("dm: enable synchronous dax")
Reported-by: jencce.kernel(a)gmail.com
Tested-by: jencce.kernel(a)gmail.com
Signed-off-by: Pankaj Gupta <pagupta(a)redhat.com>
---
drivers/md/dm-table.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index caaee8032afe..b065845c1bdd 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -894,6 +894,9 @@ int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
static int device_synchronous(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{
+ if (!dev->dax_dev)
+ return false;
+
return dax_synchronous(dev->dax_dev);
}
--
2.20.1
2 years, 10 months
[GIT PULL] dax fix for v5.3-rc3
by Dan Williams
Hi Linus, please pull from:
git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm dax-fix-5.3-rc3
...to receive a manual fixup I happened to drop. I re-fetched the
patch from the mailing list after integrating the git message-id
support to generate a "Link:" tag [1], but then did not re-apply the
fixup. This now matches what I tested and went into yesterday's -next.
[1]: https://lists.linuxfoundation.org/pipermail/ksummit-discuss/2019-July/006...
---
The following changes since commit 609488bc979f99f805f34e9a32c1e3b71179d10b:
Linux 5.3-rc2 (2019-07-28 12:47:02 -0700)
are available in the Git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm dax-fix-5.3-rc3
for you to fetch changes up to 61c30c98ef17e5a330d7bb8494b78b3d6dffe9b8:
dax: Fix missed wakeup in put_unlocked_entry() (2019-07-29 09:24:22 -0700)
----------------------------------------------------------------
dax fix 5.3-rc3
- Fix a botched manual patch update that got dropped between testing and
application.
----------------------------------------------------------------
Jan Kara (1):
dax: Fix missed wakeup in put_unlocked_entry()
fs/dax.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
---
diff --git a/fs/dax.c b/fs/dax.c
index a237141d8787..b64964ef44f6 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -266,7 +266,7 @@ static void wait_entry_unlocked(struct xa_state
*xas, void *entry)
static void put_unlocked_entry(struct xa_state *xas, void *entry)
{
/* If we were the only waiter woken, wake the next one */
- if (entry && dax_is_conflict(entry))
+ if (entry && !dax_is_conflict(entry))
dax_wake_entry(xas, entry, false);
}
2 years, 10 months
[PATCH v5 13/29] compat_ioctl: move more drivers to compat_ptr_ioctl
by Arnd Bergmann
The .ioctl and .compat_ioctl file operations have the same prototype so
they can both point to the same function, which works great almost all
the time when all the commands are compatible.
One exception is the s390 architecture, where a compat pointer is only
31 bit wide, and converting it into a 64-bit pointer requires calling
compat_ptr(). Most drivers here will never run in s390, but since we now
have a generic helper for it, it's easy enough to use it consistently.
I double-checked all these drivers to ensure that all ioctl arguments
are used as pointers or are ignored, but are not interpreted as integer
values.
Acked-by: Jason Gunthorpe <jgg(a)mellanox.com>
Acked-by: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Acked-by: Mauro Carvalho Chehab <mchehab+samsung(a)kernel.org>
Acked-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Acked-by: David Sterba <dsterba(a)suse.com>
Acked-by: Darren Hart (VMware) <dvhart(a)infradead.org>
Acked-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
Acked-by: Bjorn Andersson <bjorn.andersson(a)linaro.org>
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
---
drivers/android/binder.c | 2 +-
drivers/crypto/qat/qat_common/adf_ctl_drv.c | 2 +-
drivers/dma-buf/dma-buf.c | 4 +---
drivers/dma-buf/sw_sync.c | 2 +-
drivers/dma-buf/sync_file.c | 2 +-
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 +-
drivers/hid/hidraw.c | 4 +---
drivers/iio/industrialio-core.c | 2 +-
drivers/infiniband/core/uverbs_main.c | 4 ++--
drivers/media/rc/lirc_dev.c | 4 +---
drivers/mfd/cros_ec_dev.c | 4 +---
drivers/misc/vmw_vmci/vmci_host.c | 2 +-
drivers/nvdimm/bus.c | 4 ++--
drivers/nvme/host/core.c | 2 +-
drivers/pci/switch/switchtec.c | 2 +-
drivers/platform/x86/wmi.c | 2 +-
drivers/rpmsg/rpmsg_char.c | 4 ++--
drivers/sbus/char/display7seg.c | 2 +-
drivers/sbus/char/envctrl.c | 4 +---
drivers/scsi/3w-xxxx.c | 4 +---
drivers/scsi/cxlflash/main.c | 2 +-
drivers/scsi/esas2r/esas2r_main.c | 2 +-
drivers/scsi/pmcraid.c | 4 +---
drivers/staging/android/ion/ion.c | 4 +---
drivers/staging/vme/devices/vme_user.c | 2 +-
drivers/tee/tee_core.c | 2 +-
drivers/usb/class/cdc-wdm.c | 2 +-
drivers/usb/class/usbtmc.c | 4 +---
drivers/virt/fsl_hypervisor.c | 2 +-
fs/btrfs/super.c | 2 +-
fs/fuse/dev.c | 2 +-
fs/notify/fanotify/fanotify_user.c | 2 +-
fs/userfaultfd.c | 2 +-
net/rfkill/core.c | 2 +-
34 files changed, 37 insertions(+), 55 deletions(-)
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index dc1c83eafc22..79955e82544a 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -6043,7 +6043,7 @@ const struct file_operations binder_fops = {
.owner = THIS_MODULE,
.poll = binder_poll,
.unlocked_ioctl = binder_ioctl,
- .compat_ioctl = binder_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.mmap = binder_mmap,
.open = binder_open,
.flush = binder_flush,
diff --git a/drivers/crypto/qat/qat_common/adf_ctl_drv.c b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
index abc7a7f64d64..ef0e482ee04f 100644
--- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c
+++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
@@ -68,7 +68,7 @@ static long adf_ctl_ioctl(struct file *fp, unsigned int cmd, unsigned long arg);
static const struct file_operations adf_ctl_ops = {
.owner = THIS_MODULE,
.unlocked_ioctl = adf_ctl_ioctl,
- .compat_ioctl = adf_ctl_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
struct adf_ctl_drv_info {
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index f45bfb29ef96..f6d9047b7a69 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -415,9 +415,7 @@ static const struct file_operations dma_buf_fops = {
.llseek = dma_buf_llseek,
.poll = dma_buf_poll,
.unlocked_ioctl = dma_buf_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = dma_buf_ioctl,
-#endif
+ .compat_ioctl = compat_ptr_ioctl,
.show_fdinfo = dma_buf_show_fdinfo,
};
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
index 051f6c2873c7..51026cb08801 100644
--- a/drivers/dma-buf/sw_sync.c
+++ b/drivers/dma-buf/sw_sync.c
@@ -410,5 +410,5 @@ const struct file_operations sw_sync_debugfs_fops = {
.open = sw_sync_debugfs_open,
.release = sw_sync_debugfs_release,
.unlocked_ioctl = sw_sync_ioctl,
- .compat_ioctl = sw_sync_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index ee4d1a96d779..85b96757fc76 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -480,5 +480,5 @@ static const struct file_operations sync_file_fops = {
.release = sync_file_release,
.poll = sync_file_poll,
.unlocked_ioctl = sync_file_ioctl,
- .compat_ioctl = sync_file_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 26b15cc56c31..ea933d2444bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -49,7 +49,7 @@ static const char kfd_dev_name[] = "kfd";
static const struct file_operations kfd_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = kfd_ioctl,
- .compat_ioctl = kfd_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.open = kfd_open,
.mmap = kfd_mmap,
};
diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
index 006bd6f4f653..923edc650f46 100644
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -468,9 +468,7 @@ static const struct file_operations hidraw_ops = {
.release = hidraw_release,
.unlocked_ioctl = hidraw_ioctl,
.fasync = hidraw_fasync,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = hidraw_ioctl,
-#endif
+ .compat_ioctl = compat_ptr_ioctl,
.llseek = noop_llseek,
};
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 524a686077ca..9dd687534035 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -1610,7 +1610,7 @@ static const struct file_operations iio_buffer_fileops = {
.owner = THIS_MODULE,
.llseek = noop_llseek,
.unlocked_ioctl = iio_ioctl,
- .compat_ioctl = iio_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
static int iio_check_unique_scan_index(struct iio_dev *indio_dev)
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 11c13c1381cf..d6d2f6c0cd01 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -1135,7 +1135,7 @@ static const struct file_operations uverbs_fops = {
.release = ib_uverbs_close,
.llseek = no_llseek,
.unlocked_ioctl = ib_uverbs_ioctl,
- .compat_ioctl = ib_uverbs_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
static const struct file_operations uverbs_mmap_fops = {
@@ -1146,7 +1146,7 @@ static const struct file_operations uverbs_mmap_fops = {
.release = ib_uverbs_close,
.llseek = no_llseek,
.unlocked_ioctl = ib_uverbs_ioctl,
- .compat_ioctl = ib_uverbs_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
static int ib_uverbs_get_nl_info(struct ib_device *ibdev, void *client_data,
diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c
index f078f8a3aec8..9a8c1cf54ac4 100644
--- a/drivers/media/rc/lirc_dev.c
+++ b/drivers/media/rc/lirc_dev.c
@@ -720,9 +720,7 @@ static const struct file_operations lirc_fops = {
.owner = THIS_MODULE,
.write = ir_lirc_transmit_ir,
.unlocked_ioctl = ir_lirc_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = ir_lirc_ioctl,
-#endif
+ .compat_ioctl = compat_ptr_ioctl,
.read = ir_lirc_read,
.poll = ir_lirc_poll,
.open = ir_lirc_open,
diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c
index 41dccced5026..db1eefcd770b 100644
--- a/drivers/mfd/cros_ec_dev.c
+++ b/drivers/mfd/cros_ec_dev.c
@@ -239,9 +239,7 @@ static const struct file_operations fops = {
.release = ec_device_release,
.read = ec_device_read,
.unlocked_ioctl = ec_device_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = ec_device_ioctl,
-#endif
+ .compat_ioctl = compat_ptr_ioctl,
};
static void cros_ec_class_release(struct device *dev)
diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c
index 833e2bd248a5..903e321e8e87 100644
--- a/drivers/misc/vmw_vmci/vmci_host.c
+++ b/drivers/misc/vmw_vmci/vmci_host.c
@@ -961,7 +961,7 @@ static const struct file_operations vmuser_fops = {
.release = vmci_host_close,
.poll = vmci_host_poll,
.unlocked_ioctl = vmci_host_unlocked_ioctl,
- .compat_ioctl = vmci_host_unlocked_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
static struct miscdevice vmci_host_miscdev = {
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 798c5c4aea9c..6ca142d833ab 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -1229,7 +1229,7 @@ static const struct file_operations nvdimm_bus_fops = {
.owner = THIS_MODULE,
.open = nd_open,
.unlocked_ioctl = bus_ioctl,
- .compat_ioctl = bus_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.llseek = noop_llseek,
};
@@ -1237,7 +1237,7 @@ static const struct file_operations nvdimm_fops = {
.owner = THIS_MODULE,
.open = nd_open,
.unlocked_ioctl = dimm_ioctl,
- .compat_ioctl = dimm_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.llseek = noop_llseek,
};
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 8f3fbe5ca937..be07bd1f6654 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2813,7 +2813,7 @@ static const struct file_operations nvme_dev_fops = {
.owner = THIS_MODULE,
.open = nvme_dev_open,
.unlocked_ioctl = nvme_dev_ioctl,
- .compat_ioctl = nvme_dev_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
static ssize_t nvme_sysfs_reset(struct device *dev,
diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
index 8c94cd3fd1f2..66610f04d76d 100644
--- a/drivers/pci/switch/switchtec.c
+++ b/drivers/pci/switch/switchtec.c
@@ -1025,7 +1025,7 @@ static const struct file_operations switchtec_fops = {
.read = switchtec_dev_read,
.poll = switchtec_dev_poll,
.unlocked_ioctl = switchtec_dev_ioctl,
- .compat_ioctl = switchtec_dev_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
static void link_event_work(struct work_struct *work)
diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index 784cea8572c2..d9a0dd94ee62 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -913,7 +913,7 @@ static const struct file_operations wmi_fops = {
.read = wmi_char_read,
.open = wmi_char_open,
.unlocked_ioctl = wmi_ioctl,
- .compat_ioctl = wmi_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
static int wmi_dev_probe(struct device *dev)
diff --git a/drivers/rpmsg/rpmsg_char.c b/drivers/rpmsg/rpmsg_char.c
index eea5ebbb5119..507bfe163883 100644
--- a/drivers/rpmsg/rpmsg_char.c
+++ b/drivers/rpmsg/rpmsg_char.c
@@ -290,7 +290,7 @@ static const struct file_operations rpmsg_eptdev_fops = {
.write_iter = rpmsg_eptdev_write_iter,
.poll = rpmsg_eptdev_poll,
.unlocked_ioctl = rpmsg_eptdev_ioctl,
- .compat_ioctl = rpmsg_eptdev_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
static ssize_t name_show(struct device *dev, struct device_attribute *attr,
@@ -451,7 +451,7 @@ static const struct file_operations rpmsg_ctrldev_fops = {
.open = rpmsg_ctrldev_open,
.release = rpmsg_ctrldev_release,
.unlocked_ioctl = rpmsg_ctrldev_ioctl,
- .compat_ioctl = rpmsg_ctrldev_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
static void rpmsg_ctrldev_release_device(struct device *dev)
diff --git a/drivers/sbus/char/display7seg.c b/drivers/sbus/char/display7seg.c
index 971fe074d7c9..fad936eb845f 100644
--- a/drivers/sbus/char/display7seg.c
+++ b/drivers/sbus/char/display7seg.c
@@ -156,7 +156,7 @@ static long d7s_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
static const struct file_operations d7s_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = d7s_ioctl,
- .compat_ioctl = d7s_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.open = d7s_open,
.release = d7s_release,
.llseek = noop_llseek,
diff --git a/drivers/sbus/char/envctrl.c b/drivers/sbus/char/envctrl.c
index a63d5e402ff2..12d66aa61ede 100644
--- a/drivers/sbus/char/envctrl.c
+++ b/drivers/sbus/char/envctrl.c
@@ -715,9 +715,7 @@ static const struct file_operations envctrl_fops = {
.owner = THIS_MODULE,
.read = envctrl_read,
.unlocked_ioctl = envctrl_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = envctrl_ioctl,
-#endif
+ .compat_ioctl = compat_ptr_ioctl,
.open = envctrl_open,
.release = envctrl_release,
.llseek = noop_llseek,
diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c
index 2b1e0d503020..fb6444d0409c 100644
--- a/drivers/scsi/3w-xxxx.c
+++ b/drivers/scsi/3w-xxxx.c
@@ -1049,9 +1049,7 @@ static int tw_chrdev_open(struct inode *inode, struct file *file)
static const struct file_operations tw_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = tw_chrdev_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = tw_chrdev_ioctl,
-#endif
+ .compat_ioctl = compat_ptr_ioctl,
.open = tw_chrdev_open,
.release = NULL,
.llseek = noop_llseek,
diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
index b1f4724efde2..6927654792b0 100644
--- a/drivers/scsi/cxlflash/main.c
+++ b/drivers/scsi/cxlflash/main.c
@@ -3585,7 +3585,7 @@ static const struct file_operations cxlflash_chr_fops = {
.owner = THIS_MODULE,
.open = cxlflash_chr_open,
.unlocked_ioctl = cxlflash_chr_ioctl,
- .compat_ioctl = cxlflash_chr_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
/**
diff --git a/drivers/scsi/esas2r/esas2r_main.c b/drivers/scsi/esas2r/esas2r_main.c
index fdbda5c05aa0..80c5a235d193 100644
--- a/drivers/scsi/esas2r/esas2r_main.c
+++ b/drivers/scsi/esas2r/esas2r_main.c
@@ -613,7 +613,7 @@ static int __init esas2r_init(void)
/* Handle ioctl calls to "/proc/scsi/esas2r/ATTOnode" */
static const struct file_operations esas2r_proc_fops = {
- .compat_ioctl = esas2r_proc_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.unlocked_ioctl = esas2r_proc_ioctl,
};
diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
index 71ff3936da4f..12c4487cb9f6 100644
--- a/drivers/scsi/pmcraid.c
+++ b/drivers/scsi/pmcraid.c
@@ -3973,9 +3973,7 @@ static const struct file_operations pmcraid_fops = {
.open = pmcraid_chr_open,
.fasync = pmcraid_chr_fasync,
.unlocked_ioctl = pmcraid_chr_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = pmcraid_chr_ioctl,
-#endif
+ .compat_ioctl = compat_ptr_ioctl,
.llseek = noop_llseek,
};
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index 92c2914239e3..1663c163edca 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -567,9 +567,7 @@ static long ion_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
static const struct file_operations ion_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = ion_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = ion_ioctl,
-#endif
+ .compat_ioctl = compat_ptr_ioctl,
};
static int debug_shrink_set(void *data, u64 val)
diff --git a/drivers/staging/vme/devices/vme_user.c b/drivers/staging/vme/devices/vme_user.c
index 6a33aaa1a49f..fd0ea4dbcb91 100644
--- a/drivers/staging/vme/devices/vme_user.c
+++ b/drivers/staging/vme/devices/vme_user.c
@@ -494,7 +494,7 @@ static const struct file_operations vme_user_fops = {
.write = vme_user_write,
.llseek = vme_user_llseek,
.unlocked_ioctl = vme_user_unlocked_ioctl,
- .compat_ioctl = vme_user_unlocked_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.mmap = vme_user_mmap,
};
diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c
index 0f16d9ffd8d1..37d22e39fd8d 100644
--- a/drivers/tee/tee_core.c
+++ b/drivers/tee/tee_core.c
@@ -675,7 +675,7 @@ static const struct file_operations tee_fops = {
.open = tee_open,
.release = tee_release,
.unlocked_ioctl = tee_ioctl,
- .compat_ioctl = tee_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
static void tee_release_device(struct device *dev)
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index a7824a51f86d..3234dc539873 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -724,7 +724,7 @@ static const struct file_operations wdm_fops = {
.release = wdm_release,
.poll = wdm_poll,
.unlocked_ioctl = wdm_ioctl,
- .compat_ioctl = wdm_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.llseek = noop_llseek,
};
diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index 4942122b2346..bbd0308b13f5 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -2220,9 +2220,7 @@ static const struct file_operations fops = {
.release = usbtmc_release,
.flush = usbtmc_flush,
.unlocked_ioctl = usbtmc_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = usbtmc_ioctl,
-#endif
+ .compat_ioctl = compat_ptr_ioctl,
.fasync = usbtmc_fasync,
.poll = usbtmc_poll,
.llseek = default_llseek,
diff --git a/drivers/virt/fsl_hypervisor.c b/drivers/virt/fsl_hypervisor.c
index 93d5bebf9572..1b0b11b55d2a 100644
--- a/drivers/virt/fsl_hypervisor.c
+++ b/drivers/virt/fsl_hypervisor.c
@@ -706,7 +706,7 @@ static const struct file_operations fsl_hv_fops = {
.poll = fsl_hv_poll,
.read = fsl_hv_read,
.unlocked_ioctl = fsl_hv_ioctl,
- .compat_ioctl = fsl_hv_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
static struct miscdevice fsl_hv_misc_dev = {
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 78de9d5d80c6..f4f792b7379d 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2305,7 +2305,7 @@ static const struct super_operations btrfs_super_ops = {
static const struct file_operations btrfs_ctl_fops = {
.open = btrfs_control_open,
.unlocked_ioctl = btrfs_control_ioctl,
- .compat_ioctl = btrfs_control_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.owner = THIS_MODULE,
.llseek = noop_llseek,
};
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ea8237513dfa..5bb93a3c397e 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -2354,7 +2354,7 @@ const struct file_operations fuse_dev_operations = {
.release = fuse_dev_release,
.fasync = fuse_dev_fasync,
.unlocked_ioctl = fuse_dev_ioctl,
- .compat_ioctl = fuse_dev_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
EXPORT_SYMBOL_GPL(fuse_dev_operations);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 91006f47e420..3f494c8eaf2b 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -523,7 +523,7 @@ static const struct file_operations fanotify_fops = {
.fasync = NULL,
.release = fanotify_release,
.unlocked_ioctl = fanotify_ioctl,
- .compat_ioctl = fanotify_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.llseek = noop_llseek,
};
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index ccbdbd62f0d8..6ec18e0492e6 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1920,7 +1920,7 @@ static const struct file_operations userfaultfd_fops = {
.poll = userfaultfd_poll,
.read = userfaultfd_read,
.unlocked_ioctl = userfaultfd_ioctl,
- .compat_ioctl = userfaultfd_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.llseek = noop_llseek,
};
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index f9b08a6d8dbe..c4be6a94ba97 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -1311,7 +1311,7 @@ static const struct file_operations rfkill_fops = {
.release = rfkill_fop_release,
#ifdef CONFIG_RFKILL_INPUT
.unlocked_ioctl = rfkill_fop_ioctl,
- .compat_ioctl = rfkill_fop_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
#endif
.llseek = no_llseek,
};
--
2.20.0
2 years, 11 months
[regression] panic at __dax_synchronous after synchronous dax enabled
by Murphy Zhou
Hi,
Hit this panic when running xfstests generic/108 on pmem ramdisk.
This test is simulating partial disk error when calling fsync():
create a lvm vg which consists of 2 disks:
one scsi_debug disk; one other disk I specified, pmem ramdisk in this case.
create lv in this vg and write to it, make sure writing across 2 disks;
offline scsi_debug disk;
write again to allocated area;
expect fsync: IO error.
If one of the disks is pmem ramdisk, it reproduces every time on my setup,
on v5.3-rc2+.
The mount -o dax option is not required to reproduce this panic.
Bisect points to this:
commit 2e9ee0955d3c2d3db56aa02ba6f948ba35d5e9c1
Author: Pankaj Gupta <pagupta(a)redhat.com>
Date: Fri Jul 5 19:33:25 2019 +0530
dm: enable synchronous dax
Reverting this commit "fixes" this panic. I can send a revert patch if needed..
Thanks,
M
FSTYP -- xfs (debug)
PLATFORM -- Linux/x86_64 7u 5.3.0-rc2-master-2a11c76+ #155 SMP Tue Jul 30 11:29:05 CST 2019
MKFS_OPTIONS -- -f -f -b size=4096 /dev/pmem1
MOUNT_OPTIONS -- -o dax -o context=system_u:object_r:root_t:s0 /dev/pmem1 /test1
generic/108 5s ... [00:17:34]
[ 1984.878208] BUG: kernel NULL pointer dereference, address: 00000000000002d0
[ 1984.882546] #PF: supervisor read access in kernel mode
[ 1984.885664] #PF: error_code(0x0000) - not-present page
[ 1984.888626] PGD 0 P4D 0
[ 1984.890140] Oops: 0000 [#1] SMP PTI
[ 1984.892345] CPU: 17 PID: 3321 Comm: lvm Not tainted 5.3.0-rc2-master-2a11c76+ #155
[ 1984.896864] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[ 1984.900460] RIP: 0010:__dax_synchronous+0x5/0x20
[ 1984.903161] Code: ff ff ff c3 90 66 66 66 66 90 48 8b 87 d0 02 00 00 48 d1 e8 83 e0 01 c3 0f 1f 00 66 2e 0f 1f 84 00 00 00 00 00 66 66 66 66 90 <48> 8b 87 d0 02 00 00 48 c1 e8 02 83 e0 01 c3 66 90 66 2e 0f 1f 84
[ 1984.912987] RSP: 0018:ffffad06503a7b38 EFLAGS: 00010246
[ 1984.915722] RAX: ffff9a248c7c2200 RBX: 0000000000000000 RCX: 0000000000046000
[ 1984.919417] RDX: 0000000000000800 RSI: ffff9a2493486d18 RDI: 0000000000000000
[ 1984.923182] RBP: ffff9a248c7c2200 R08: 0000000000000000 R09: 0000000000000000
[ 1984.926644] R10: 0000000000000003 R11: ffffad06503a7a28 R12: ffffad0640109040
[ 1984.930214] R13: 0000000000000000 R14: ffffffffc03d3ed0 R15: 0000000000000000
[ 1984.933648] FS: 00007f4dbf87d880(0000) GS:ffff9a2498640000(0000) knlGS:0000000000000000
[ 1984.937494] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1984.940273] CR2: 00000000000002d0 CR3: 000000046be80000 CR4: 00000000000006e0
[ 1984.943682] Call Trace:
[ 1984.945007] device_synchronous+0xe/0x20 [dm_mod]
[ 1984.947328] stripe_iterate_devices+0x48/0x60 [dm_mod]
[ 1984.949947] ? dm_set_device_limits+0x130/0x130 [dm_mod]
[ 1984.952516] dm_table_supports_dax+0x39/0x90 [dm_mod]
[ 1984.954989] dm_table_set_restrictions+0x248/0x5d0 [dm_mod]
[ 1984.957685] dm_setup_md_queue+0x66/0x110 [dm_mod]
[ 1984.960280] table_load+0x1e3/0x390 [dm_mod]
[ 1984.962491] ? retrieve_status+0x1c0/0x1c0 [dm_mod]
[ 1984.964910] ctl_ioctl+0x1d3/0x550 [dm_mod]
[ 1984.967006] ? path_lookupat+0xf4/0x200
[ 1984.968890] dm_ctl_ioctl+0xa/0x10 [dm_mod]
[ 1984.970920] do_vfs_ioctl+0xa9/0x630
[ 1984.972701] ksys_ioctl+0x60/0x90
[ 1984.974335] __x64_sys_ioctl+0x16/0x20
[ 1984.976221] do_syscall_64+0x5b/0x1d0
[ 1984.978091] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 1984.980552] RIP: 0033:0x7f4dbe49f2f7
[ 1984.982304] Code: 44 00 00 48 8b 05 79 1b 2d 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 49 1b 2d 00 f7 d8 64 89 01 48
[ 1984.991519] RSP: 002b:00007ffd2b70d578 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[ 1984.995203] RAX: ffffffffffffffda RBX: 00005612727d5680 RCX: 00007f4dbe49f2f7
[ 1984.998685] RDX: 000056127483c860 RSI: 00000000c138fd09 RDI: 0000000000000004
[ 1985.002145] RBP: 00007f4dbec07503 R08: 00007f4dbec08040 R09: 00007ffd2b70d4a0
[ 1985.005667] R10: 0000000000000003 R11: 0000000000000246 R12: 000056127483c860
[ 1985.009147] R13: 00007f4dbec07503 R14: 000056127481a700 R15: 00007f4dbec07503
[ 1985.012670] Modules linked in: scsi_debug sunrpc snd_hda_codec_generic ledtrig_audio snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep crct10dif_pclmul crc32_pclmul snd_seq ghash_clmulni_intel snd_seq_device snd_pcm snd_timer aesni_intel snd dax_pmem_compat crypto_simd device_dax cryptd soundcore sg glue_helper dax_pmem_core pcspkr virtio_balloon joydev i2c_piix4 ip_tables xfs libcrc32c qxl drm_kms_helper syscopyarea sysfillrect sysimgblt sd_mod fb_sys_fops ttm ata_generic pata_acpi drm virtio_console ata_piix 8139too libata virtio_pci crc32c_intel 8139cp nd_pmem serio_raw virtio_ring virtio floppy mii dm_mirror dm_region_hash dm_log dm_mod
[ 1985.040136] CR2: 00000000000002d0
[ 1985.042038] ---[ end trace db9a39c3773bb6fd ]---
[ 1985.044378] RIP: 0010:__dax_synchronous+0x5/0x20
[ 1985.046697] Code: ff ff ff c3 90 66 66 66 66 90 48 8b 87 d0 02 00 00 48 d1 e8 83 e0 01 c3 0f 1f 00 66 2e 0f 1f 84 00 00 00 00 00 66 66 66 66 90 <48> 8b 87 d0 02 00 00 48 c1 e8 02 83 e0 01 c3 66 90 66 2e 0f 1f 84
[ 1985.055931] RSP: 0018:ffffad06503a7b38 EFLAGS: 00010246
[ 1985.058525] RAX: ffff9a248c7c2200 RBX: 0000000000000000 RCX: 0000000000046000
[ 1985.062065] RDX: 0000000000000800 RSI: ffff9a2493486d18 RDI: 0000000000000000
[ 1985.065441] RBP: ffff9a248c7c2200 R08: 0000000000000000 R09: 0000000000000000
[ 1985.068699] R10: 0000000000000003 R11: ffffad06503a7a28 R12: ffffad0640109040
[ 1985.071930] R13: 0000000000000000 R14: ffffffffc03d3ed0 R15: 0000000000000000
[ 1985.075169] FS: 00007f4dbf87d880(0000) GS:ffff9a2498640000(0000) knlGS:0000000000000000
[ 1985.078966] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1985.081619] CR2: 00000000000002d0 CR3: 000000046be80000 CR4: 00000000000006e0
[ 1985.084802] Kernel panic - not syncing: Fatal exception
[ 1985.156962] Kernel Offset: 0x3c00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
[ 1985.161249] ---[ end Kernel panic - not syncing: Fatal exception ]---
bisect log:
git bisect start
# bad: [f8c3500cd137867927bc080f4a6e02e0222dd1b8] Merge tag 'libnvdimm-for-5.3' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
git bisect bad f8c3500cd137867927bc080f4a6e02e0222dd1b8
# good: [2ae048e16636afd7521270acacb08d9c42fd23f0] Merge tag 'sound-fix-5.3-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
git bisect good 2ae048e16636afd7521270acacb08d9c42fd23f0
# good: [913b187d12962fe8d9fa93c959f2f71ac16597ec] watchdog: stmp3xxx_rtc_wdt: drop warning after registering device
git bisect good 913b187d12962fe8d9fa93c959f2f71ac16597ec
# good: [4d1c6a0ec2d98e51f950127bf9299531caac53e1] watchdog: introduce watchdog.open_timeout commandline parameter
git bisect good 4d1c6a0ec2d98e51f950127bf9299531caac53e1
# good: [7fb832ae72949c883da52d6316ff08f03c75d300] watchdog: digicolor_wdt: Remove unused variable in dc_wdt_probe
git bisect good 7fb832ae72949c883da52d6316ff08f03c75d300
# bad: [2e9ee0955d3c2d3db56aa02ba6f948ba35d5e9c1] dm: enable synchronous dax
git bisect bad 2e9ee0955d3c2d3db56aa02ba6f948ba35d5e9c1
# good: [c5d4355d10d414a96ca870b731756b89d068d57a] libnvdimm: nd_region flush callback support
git bisect good c5d4355d10d414a96ca870b731756b89d068d57a
# good: [fefc1d97fa4b5e016bbe15447dc3edcd9e1bcb9f] libnvdimm: add dax_dev sync flag
git bisect good fefc1d97fa4b5e016bbe15447dc3edcd9e1bcb9f
# first bad commit: [2e9ee0955d3c2d3db56aa02ba6f948ba35d5e9c1] dm: enable synchronous dax
2 years, 11 months
专业办理香港手机卡--月租低至6元/月
by 专业办理香港手机卡--月租低至6元/月
专业办理香港手机卡--在大陆享用香港信号
专业办理香港手机卡 在大陆享用香港信号 月租低至6元/月 大陆建立香港虚拟办公点
Q Q 咨询:483456665 E-mail 咨询:hktel852(a)hotmail.com
2 years, 11 months