[ndctl PATCH] ndctl: fix unit test build instructions
by Dan Williams
Use "M=tools/testing/nvdimm" from the kernel build directory rather than
"-C tools/testing/nvdimm". This arranges for the unit test modules to
honor a "localversion" file like the one distributed by the -next tree.
Also switch from 'install' to 'modules_install' to trigger depmod to run
after the modules are installed.
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
---
README.md | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/README.md b/README.md
index 3f62adce2f92..af2d7881ec51 100644
--- a/README.md
+++ b/README.md
@@ -40,12 +40,9 @@ This will be used to emulate DAX.
`CONFIG_ND_BLK=m`
`CONFIG_ND_BTT=m`
-4. Build and install the unit test enabled libnvdimm modules in the
- following order. The unit test modules need to be in place prior to
- the `depmod` that runs during the final `modules_install`
-`make -C tools/testing/nvdimm/`
-`sudo make -C tools/testing/nvdimm/ install`
-`sudo make modules_install`
+4. Build and install the unit test enabled libnvdimm modules.
+`make M=tools/testing/nvdimm/`
+`sudo make M=tools/testing/nvdimm/ modules_install`
5. Now run `make check` in the ndctl source directory, or `ndctl test`,
if ndctl was built with `--enable-test`.
6 years, 3 months
[PATCH v2-UPDATE2 3/4] resource: Add device-managed insert/remove_resource()
by Toshi Kani
insert_resource() and remove_resouce() are called by producers
of resources, such as FW modules and bus drivers. These modules
may be implemented as loadable modules.
Add device-managed implementaions of insert_resource() and
remove_resouce() functions.
Signed-off-by: Toshi Kani <toshi.kani(a)hpe.com>
Cc: Ingo Molnar <mingo(a)kernel.org>
Cc: Borislav Petkov <bp(a)suse.de>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Dan Williams <dan.j.williams(a)intel.com>
---
v2-UPDATE2:
- Update per Ingo's review comments; use ret, use WARN_ON_ONCE().
v2-UPDATE:
- Rename a helper remove func to __devm_remove_resource(). (Dan Williams)
---
include/linux/ioport.h | 5 +++
kernel/resource.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 77 insertions(+)
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 8017b8b..3580038 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -259,6 +259,11 @@ extern struct resource * __devm_request_region(struct device *dev,
extern void __devm_release_region(struct device *dev, struct resource *parent,
resource_size_t start, resource_size_t n);
+
+extern int devm_insert_resource(struct device *dev, struct resource *root,
+ struct resource *new);
+extern void devm_remove_resource(struct device *dev, struct resource *old);
+
extern int iomem_map_sanity_check(resource_size_t addr, unsigned long size);
extern int iomem_is_exclusive(u64 addr);
diff --git a/kernel/resource.c b/kernel/resource.c
index effb6ee..86fdd28 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1449,6 +1449,78 @@ void __devm_release_region(struct device *dev, struct resource *parent,
EXPORT_SYMBOL(__devm_release_region);
/*
+ * Helper remove function for devm_insert_resource() and devm_remove_resource()
+ */
+static void __devm_remove_resource(struct device *dev, void *ptr)
+{
+ struct resource **r = ptr;
+
+ remove_resource(*r);
+}
+
+/**
+ * devm_insert_resource() - insert an I/O or memory resource
+ * @dev: device for which to produce the resource
+ * @root: root of the resource tree
+ * @new: descriptor of the new resource
+ *
+ * This is a device-managed version of insert_resource(). There is usually
+ * no need to release resources requested by this function explicitly, since
+ * that will be taken care of when the device is unbound from its bus driver.
+ * If for some reason the resource needs to be released explicitly, because
+ * of ordering issues for example, bus drivers must call devm_remove_resource()
+ * rather than the regular remove_resource().
+ *
+ * devm_insert_resource() is intended for producers of resources, such as
+ * FW modules and bus drivers.
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int devm_insert_resource(struct device *dev, struct resource *root,
+ struct resource *new)
+{
+ struct resource **ptr;
+ int ret;
+
+ ptr = devres_alloc(__devm_remove_resource, sizeof(*ptr), GFP_KERNEL);
+ if (!ptr)
+ return -ENOMEM;
+
+ *ptr = new;
+
+ ret = insert_resource(root, new);
+ if (ret) {
+ dev_err(dev, "unable to insert resource: %pR (%d)\n", new, ret);
+ devres_free(ptr);
+ return ret;
+ }
+
+ devres_add(dev, ptr);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devm_insert_resource);
+
+/**
+ * devm_remove_resource() - remove a previously inserted resource
+ * @dev: device for which to remove the resource
+ * @old: descriptor of the resource
+ *
+ * Remove a resource previously inserted using devm_insert_resource().
+ *
+ * devm_remove_resource() is intended for producers of resources, such as
+ * FW modules and bus drivers.
+ */
+void devm_remove_resource(struct device *dev, struct resource *old)
+{
+ int ret;
+
+ ret = devres_release(dev, __devm_remove_resource,
+ devm_resource_match, old);
+ WARN_ON_ONCE(ret);
+}
+EXPORT_SYMBOL_GPL(devm_remove_resource);
+
+/*
* Called from init/main.c to reserve IO ports.
*/
#define MAXRESERVE 4
6 years, 3 months
[ndctl PATCH 0/8] updates for v52
by Dan Williams
1/ Add new apis for retrieving the resource base address and the
effective capacity for pmem namespaces in 'raw' or 'memory' mode.
2/ Add a -t option to 'ndctl list' to filter based on 'pmem' or 'blk'
region types.
3/ Add a unit test for the new 'clear poison' functionality targeting
the 4.6 kernel.
4/ Miscellaneous updates and fixlets
---
Dan Williams (8):
ndctl: make --enable-local the default
ndctl: ndctl_{namespace|pfn}_get_resource
ndctl: refresh dynamic pfn attributes from a duplicate
ndctl: use the pfn trimmed size for memory mode namespaces
ndctl create-namespace: fall back to blk if no pmem
ndctl: make test/dax-dev more robust
ndctl: add filter-by-type to the 'list' command
ndctl: test clear poison
Documentation/ndctl-list.txt | 4 ++
Makefile.am | 3 +-
builtin-list.c | 21 +++++++++++++
builtin-xaction-namespace.c | 68 ++++++++++++++++++++++++++++++++----------
configure.ac | 4 +-
lib/libndctl.c | 38 +++++++++++++++++++++++
lib/libndctl.sym | 3 ++
lib/ndctl/libndctl.h | 3 ++
test/clear.sh | 67 +++++++++++++++++++++++++++++++++++++++++
test/dax-dev.c | 7 ++++
util/json.c | 24 +++++++++++----
11 files changed, 213 insertions(+), 29 deletions(-)
create mode 100755 test/clear.sh
6 years, 3 months
[PATCH] x86, pmem: use memcpy_mcsafe() for memcpy_from_pmem()
by Dan Williams
Update the definition of memcpy_from_pmem() to return 0 or -EIO on
error. Implement x86::arch_memcpy_from_pmem() with memcpy_mcsafe().
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Ingo Molnar <mingo(a)kernel.org>
Cc: Tony Luck <tony.luck(a)intel.com>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Andy Lutomirski <luto(a)amacapital.net>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Ross Zwisler <ross.zwisler(a)linux.intel.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
---
Note that this patch is built on a merge of tip/ras/core, tip/x86/asm, and
nvdimm/for-4.6/acpi6.1 for the following reasons (respectively):
1/ memcpy_mcsafe() itself
2/ X86_FEATURE_MCE_RECOVERY cpu capability
3/ merge with the new 'clear poison' implementation:
https://lists.01.org/pipermail/linux-nvdimm/2016-March/004826.html
arch/x86/include/asm/pmem.h | 9 +++++++++
drivers/nvdimm/pmem.c | 4 ++--
include/linux/pmem.h | 14 ++++++++------
3 files changed, 19 insertions(+), 8 deletions(-)
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index bf8b35d2035a..4df3820535c6 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -47,6 +47,15 @@ static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
BUG();
}
+static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src,
+ size_t n)
+{
+ if (static_cpu_has(X86_FEATURE_MCE_RECOVERY))
+ return memcpy_mcsafe(dst, (void __force *) src, n) ? 0 : -EIO;
+ memcpy(dst, (void __force *) src, n);
+ return 0;
+}
+
/**
* arch_wmb_pmem - synchronize writes to persistent memory
*
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index adc387236fe7..2022d08c60ce 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -98,7 +98,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
if (unlikely(bad_pmem))
rc = -EIO;
else {
- memcpy_from_pmem(mem + off, pmem_addr, len);
+ rc = memcpy_from_pmem(mem + off, pmem_addr, len);
flush_dcache_page(page);
}
} else {
@@ -295,7 +295,7 @@ static int pmem_rw_bytes(struct nd_namespace_common *ndns,
if (unlikely(is_bad_pmem(&pmem->bb, offset / 512, sz_align)))
return -EIO;
- memcpy_from_pmem(buf, pmem->virt_addr + offset, size);
+ return memcpy_from_pmem(buf, pmem->virt_addr + offset, size);
} else {
memcpy_to_pmem(pmem->virt_addr + offset, buf, size);
wmb_pmem();
diff --git a/include/linux/pmem.h b/include/linux/pmem.h
index 3ec5309e29f3..c46c5cf6538e 100644
--- a/include/linux/pmem.h
+++ b/include/linux/pmem.h
@@ -66,14 +66,16 @@ static inline void arch_invalidate_pmem(void __pmem *addr, size_t size)
#endif
/*
- * Architectures that define ARCH_HAS_PMEM_API must provide
- * implementations for arch_memcpy_to_pmem(), arch_wmb_pmem(),
- * arch_copy_from_iter_pmem(), arch_clear_pmem(), arch_wb_cache_pmem()
- * and arch_has_wmb_pmem().
+ * memcpy_from_pmem - read from persistent memory with error handling
+ * @dst: destination buffer
+ * @src: source buffer
+ *
+ * Returns 0 on success -EIO on failure.
*/
-static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size)
+static inline int memcpy_from_pmem(void *dst, void __pmem const *src,
+ size_t size)
{
- memcpy(dst, (void __force const *) src, size);
+ return arch_memcpy_from_pmem(dst, src, size);
}
static inline bool arch_has_pmem_api(void)
6 years, 3 months
[PATCH v2-UPDATE 3/4] resource: Add device-managed insert/remove_resource()
by Toshi Kani
insert_resource() and remove_resouce() are called by producers
of resources, such as FW modules and bus drivers. These modules
may be implemented as loadable modules.
Add device-managed implementaions of insert_resource() and
remove_resouce() functions.
Signed-off-by: Toshi Kani <toshi.kani(a)hpe.com>
Cc: Ingo Molnar <mingo(a)kernel.org>
Cc: Borislav Petkov <bp(a)suse.de>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Dan Williams <dan.j.williams(a)intel.com>
---
v2-UPDATE:
- Rename a helper remove func to __devm_remove_resource(). (Dan Williams)
---
include/linux/ioport.h | 5 +++
kernel/resource.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 74 insertions(+)
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 8017b8b..3580038 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -259,6 +259,11 @@ extern struct resource * __devm_request_region(struct device *dev,
extern void __devm_release_region(struct device *dev, struct resource *parent,
resource_size_t start, resource_size_t n);
+
+extern int devm_insert_resource(struct device *dev, struct resource *root,
+ struct resource *new);
+extern void devm_remove_resource(struct device *dev, struct resource *old);
+
extern int iomem_map_sanity_check(resource_size_t addr, unsigned long size);
extern int iomem_is_exclusive(u64 addr);
diff --git a/kernel/resource.c b/kernel/resource.c
index effb6ee..12a9d57 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1449,6 +1449,75 @@ void __devm_release_region(struct device *dev, struct resource *parent,
EXPORT_SYMBOL(__devm_release_region);
/*
+ * Helper remove function for devm_insert_resource() and devm_remove_resource()
+ */
+static void __devm_remove_resource(struct device *dev, void *ptr)
+{
+ struct resource **r = ptr;
+
+ remove_resource(*r);
+}
+
+/**
+ * devm_insert_resource() - insert an I/O or memory resource
+ * @dev: device for which to produce the resource
+ * @root: root of the resource tree
+ * @new: descriptor of the new resource
+ *
+ * This is a device-managed version of insert_resource(). There is usually
+ * no need to release resources requested by this function explicitly since
+ * that will be taken care of when the device is unbound from its bus driver.
+ * If for some reason the resource needs to be released explicitly, because
+ * of ordering issues for example, bus drivers must call devm_remove_resource()
+ * rather than the regular remove_resource().
+ *
+ * devm_insert_resource() is intended for producers of resources, such as
+ * FW modules and bus drivers.
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int devm_insert_resource(struct device *dev, struct resource *root,
+ struct resource *new)
+{
+ struct resource **ptr;
+ int ret;
+
+ ptr = devres_alloc(__devm_remove_resource, sizeof(*ptr), GFP_KERNEL);
+ if (!ptr)
+ return -ENOMEM;
+
+ *ptr = new;
+
+ ret = insert_resource(root, new);
+ if (ret) {
+ dev_err(dev, "unable to insert resource: %pR (%d)\n", new, ret);
+ devres_free(ptr);
+ return -EBUSY;
+ }
+
+ devres_add(dev, ptr);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devm_insert_resource);
+
+/**
+ * devm_remove_resource() - remove a previously inserted resource
+ * @dev: device for which to remove the resource
+ * @old: descriptor of the resource
+ *
+ * Remove a resource previously inserted using devm_insert_resource().
+ *
+ * devm_remove_resource() is intended for producers of resources, such as
+ * FW modules and bus drivers.
+ */
+void devm_remove_resource(struct device *dev, struct resource *old)
+{
+ WARN_ON(devres_release(dev, __devm_remove_resource, devm_resource_match,
+ old));
+}
+EXPORT_SYMBOL_GPL(devm_remove_resource);
+
+/*
* Called from init/main.c to reserve IO ports.
*/
#define MAXRESERVE 4
6 years, 3 months
[PATCH 4.2.y-ckt 176/273] x86/mm: Fix vmalloc_fault() to handle large pages properly
by Kamal Mostafa
4.2.8-ckt5 -stable review patch. If anyone has any objections, please let me know.
---8<------------------------------------------------------------
From: Toshi Kani <toshi.kani(a)hpe.com>
commit f4eafd8bcd5229e998aa252627703b8462c3b90f upstream.
A kernel page fault oops with the callstack below was observed
when a read syscall was made to a pmem device after a huge amount
(>512GB) of vmalloc ranges was allocated by ioremap() on a x86_64
system:
BUG: unable to handle kernel paging request at ffff880840000ff8
IP: vmalloc_fault+0x1be/0x300
PGD c7f03a067 PUD 0
Oops: 0000 [#1] SM
Call Trace:
__do_page_fault+0x285/0x3e0
do_page_fault+0x2f/0x80
? put_prev_entity+0x35/0x7a0
page_fault+0x28/0x30
? memcpy_erms+0x6/0x10
? schedule+0x35/0x80
? pmem_rw_bytes+0x6a/0x190 [nd_pmem]
? schedule_timeout+0x183/0x240
btt_log_read+0x63/0x140 [nd_btt]
:
? __symbol_put+0x60/0x60
? kernel_read+0x50/0x80
SyS_finit_module+0xb9/0xf0
entry_SYSCALL_64_fastpath+0x1a/0xa4
Since v4.1, ioremap() supports large page (pud/pmd) mappings in
x86_64 and PAE. vmalloc_fault() however assumes that the vmalloc
range is limited to pte mappings.
vmalloc faults do not normally happen in ioremap'd ranges since
ioremap() sets up the kernel page tables, which are shared by
user processes. pgd_ctor() sets the kernel's PGD entries to
user's during fork(). When allocation of the vmalloc ranges
crosses a 512GB boundary, ioremap() allocates a new pud table
and updates the kernel PGD entry to point it. If user process's
PGD entry does not have this update yet, a read/write syscall
to the range will cause a vmalloc fault, which hits the Oops
above as it does not handle a large page properly.
Following changes are made to vmalloc_fault().
64-bit:
- No change for the PGD sync operation as it handles large
pages already.
- Add pud_huge() and pmd_huge() to the validation code to
handle large pages.
- Change pud_page_vaddr() to pud_pfn() since an ioremap range
is not directly mapped (while the if-statement still works
with a bogus addr).
- Change pmd_page() to pmd_pfn() since an ioremap range is not
backed by struct page (while the if-statement still works
with a bogus addr).
32-bit:
- No change for the sync operation since the index3 PGD entry
covers the entire vmalloc range, which is always valid.
(A separate change to sync PGD entry is necessary if this
memory layout is changed regardless of the page size.)
- Add pmd_huge() to the validation code to handle large pages.
This is for completeness since vmalloc_fault() won't happen
in ioremap'd ranges as its PGD entry is always valid.
Reported-by: Henning Schild <henning.schild(a)siemens.com>
Signed-off-by: Toshi Kani <toshi.kani(a)hpe.com>
Acked-by: Borislav Petkov <bp(a)alien8.de>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Andy Lutomirski <luto(a)amacapital.net>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: Denys Vlasenko <dvlasenk(a)redhat.com>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof(a)suse.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Toshi Kani <toshi.kani(a)hp.com>
Cc: linux-mm(a)kvack.org
Cc: linux-nvdimm(a)lists.01.org
Link: http://lkml.kernel.org/r/1455758214-24623-1-git-send-email-toshi.kani@hpe...
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Kamal Mostafa <kamal(a)canonical.com>
---
arch/x86/mm/fault.c | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 9dc9098..1d3beaf 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -286,6 +286,9 @@ static noinline int vmalloc_fault(unsigned long address)
if (!pmd_k)
return -1;
+ if (pmd_huge(*pmd_k))
+ return 0;
+
pte_k = pte_offset_kernel(pmd_k, address);
if (!pte_present(*pte_k))
return -1;
@@ -357,8 +360,6 @@ void vmalloc_sync_all(void)
* 64-bit:
*
* Handle a fault on the vmalloc area
- *
- * This assumes no large pages in there.
*/
static noinline int vmalloc_fault(unsigned long address)
{
@@ -400,17 +401,23 @@ static noinline int vmalloc_fault(unsigned long address)
if (pud_none(*pud_ref))
return -1;
- if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
+ if (pud_none(*pud) || pud_pfn(*pud) != pud_pfn(*pud_ref))
BUG();
+ if (pud_huge(*pud))
+ return 0;
+
pmd = pmd_offset(pud, address);
pmd_ref = pmd_offset(pud_ref, address);
if (pmd_none(*pmd_ref))
return -1;
- if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
+ if (pmd_none(*pmd) || pmd_pfn(*pmd) != pmd_pfn(*pmd_ref))
BUG();
+ if (pmd_huge(*pmd))
+ return 0;
+
pte_ref = pte_offset_kernel(pmd_ref, address);
if (!pte_present(*pte_ref))
return -1;
--
2.7.0
6 years, 3 months
[PATCH 4.2.y-ckt 172/273] x86/uaccess/64: Handle the caching of 4-byte nocache copies properly in __copy_user_nocache()
by Kamal Mostafa
4.2.8-ckt5 -stable review patch. If anyone has any objections, please let me know.
---8<------------------------------------------------------------
From: Toshi Kani <toshi.kani(a)hpe.com>
commit a82eee7424525e34e98d821dd059ce14560a1e35 upstream.
Data corruption issues were observed in tests which initiated
a system crash/reset while accessing BTT devices. This problem
is reproducible.
The BTT driver calls pmem_rw_bytes() to update data in pmem
devices. This interface calls __copy_user_nocache(), which
uses non-temporal stores so that the stores to pmem are
persistent.
__copy_user_nocache() uses non-temporal stores when a request
size is 8 bytes or larger (and is aligned by 8 bytes). The
BTT driver updates the BTT map table, which entry size is
4 bytes. Therefore, updates to the map table entries remain
cached, and are not written to pmem after a crash.
Change __copy_user_nocache() to use non-temporal store when
a request size is 4 bytes. The change extends the current
byte-copy path for a less-than-8-bytes request, and does not
add any overhead to the regular path.
Reported-and-tested-by: Micah Parrish <micah.parrish(a)hpe.com>
Reported-and-tested-by: Brian Boylston <brian.boylston(a)hpe.com>
Signed-off-by: Toshi Kani <toshi.kani(a)hpe.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Andy Lutomirski <luto(a)amacapital.net>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Borislav Petkov <bp(a)suse.de>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Denys Vlasenko <dvlasenk(a)redhat.com>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof(a)suse.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Ross Zwisler <ross.zwisler(a)linux.intel.com>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Toshi Kani <toshi.kani(a)hp.com>
Cc: Vishal Verma <vishal.l.verma(a)intel.com>
Cc: linux-nvdimm(a)lists.01.org
Link: http://lkml.kernel.org/r/1455225857-12039-3-git-send-email-toshi.kani@hpe...
[ Small readability edits. ]
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Kamal Mostafa <kamal(a)canonical.com>
---
arch/x86/lib/copy_user_64.S | 36 ++++++++++++++++++++++++++++++++----
1 file changed, 32 insertions(+), 4 deletions(-)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index a644aad..27f89c7 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -237,13 +237,14 @@ ENDPROC(copy_user_enhanced_fast_string)
* Note: Cached memory copy is used when destination or size is not
* naturally aligned. That is:
* - Require 8-byte alignment when size is 8 bytes or larger.
+ * - Require 4-byte alignment when size is 4 bytes.
*/
ENTRY(__copy_user_nocache)
ASM_STAC
- /* If size is less than 8 bytes, go to byte copy */
+ /* If size is less than 8 bytes, go to 4-byte copy */
cmpl $8,%edx
- jb .L_1b_cache_copy_entry
+ jb .L_4b_nocache_copy_entry
/* If destination is not 8-byte aligned, "cache" copy to align it */
ALIGN_DESTINATION
@@ -282,7 +283,7 @@ ENTRY(__copy_user_nocache)
movl %edx,%ecx
andl $7,%edx
shrl $3,%ecx
- jz .L_1b_cache_copy_entry /* jump if count is 0 */
+ jz .L_4b_nocache_copy_entry /* jump if count is 0 */
/* Perform 8-byte nocache loop-copy */
.L_8b_nocache_copy_loop:
@@ -294,11 +295,33 @@ ENTRY(__copy_user_nocache)
jnz .L_8b_nocache_copy_loop
/* If no byte left, we're done */
-.L_1b_cache_copy_entry:
+.L_4b_nocache_copy_entry:
+ andl %edx,%edx
+ jz .L_finish_copy
+
+ /* If destination is not 4-byte aligned, go to byte copy: */
+ movl %edi,%ecx
+ andl $3,%ecx
+ jnz .L_1b_cache_copy_entry
+
+ /* Set 4-byte copy count (1 or 0) and remainder */
+ movl %edx,%ecx
+ andl $3,%edx
+ shrl $2,%ecx
+ jz .L_1b_cache_copy_entry /* jump if count is 0 */
+
+ /* Perform 4-byte nocache copy: */
+30: movl (%rsi),%r8d
+31: movnti %r8d,(%rdi)
+ leaq 4(%rsi),%rsi
+ leaq 4(%rdi),%rdi
+
+ /* If no bytes left, we're done: */
andl %edx,%edx
jz .L_finish_copy
/* Perform byte "cache" loop-copy for the remainder */
+.L_1b_cache_copy_entry:
movl %edx,%ecx
.L_1b_cache_copy_loop:
40: movb (%rsi),%al
@@ -323,6 +346,9 @@ ENTRY(__copy_user_nocache)
.L_fixup_8b_copy:
lea (%rdx,%rcx,8),%rdx
jmp .L_fixup_handle_tail
+.L_fixup_4b_copy:
+ lea (%rdx,%rcx,4),%rdx
+ jmp .L_fixup_handle_tail
.L_fixup_1b_copy:
movl %ecx,%edx
.L_fixup_handle_tail:
@@ -348,6 +374,8 @@ ENTRY(__copy_user_nocache)
_ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
_ASM_EXTABLE(20b,.L_fixup_8b_copy)
_ASM_EXTABLE(21b,.L_fixup_8b_copy)
+ _ASM_EXTABLE(30b,.L_fixup_4b_copy)
+ _ASM_EXTABLE(31b,.L_fixup_4b_copy)
_ASM_EXTABLE(40b,.L_fixup_1b_copy)
_ASM_EXTABLE(41b,.L_fixup_1b_copy)
ENDPROC(__copy_user_nocache)
--
2.7.0
6 years, 3 months
[PATCH 4.2.y-ckt 171/273] x86/uaccess/64: Make the __copy_user_nocache() assembly code more readable
by Kamal Mostafa
4.2.8-ckt5 -stable review patch. If anyone has any objections, please let me know.
---8<------------------------------------------------------------
From: Toshi Kani <toshi.kani(a)hpe.com>
commit ee9737c924706aaa72c2ead93e3ad5644681dc1c upstream.
Add comments to __copy_user_nocache() to clarify its procedures
and alignment requirements.
Also change numeric branch target labels to named local labels.
No code changed:
arch/x86/lib/copy_user_64.o:
text data bss dec hex filename
1239 0 0 1239 4d7 copy_user_64.o.before
1239 0 0 1239 4d7 copy_user_64.o.after
md5:
58bed94c2db98c1ca9a2d46d0680aaae copy_user_64.o.before.asm
58bed94c2db98c1ca9a2d46d0680aaae copy_user_64.o.after.asm
Signed-off-by: Toshi Kani <toshi.kani(a)hpe.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Andy Lutomirski <luto(a)amacapital.net>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Borislav Petkov <bp(a)suse.de>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: Denys Vlasenko <dvlasenk(a)redhat.com>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof(a)suse.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Toshi Kani <toshi.kani(a)hp.com>
Cc: brian.boylston(a)hpe.com
Cc: dan.j.williams(a)intel.com
Cc: linux-nvdimm(a)lists.01.org
Cc: micah.parrish(a)hpe.com
Cc: ross.zwisler(a)linux.intel.com
Cc: vishal.l.verma(a)intel.com
Link: http://lkml.kernel.org/r/1455225857-12039-2-git-send-email-toshi.kani@hpe...
[ Small readability edits and added object file comparison. ]
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Kamal Mostafa <kamal(a)canonical.com>
---
arch/x86/lib/copy_user_64.S | 114 ++++++++++++++++++++++++++++----------------
1 file changed, 73 insertions(+), 41 deletions(-)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 982ce34..a644aad 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -232,17 +232,30 @@ ENDPROC(copy_user_enhanced_fast_string)
/*
* copy_user_nocache - Uncached memory copy with exception handling
- * This will force destination/source out of cache for more performance.
+ * This will force destination out of cache for more performance.
+ *
+ * Note: Cached memory copy is used when destination or size is not
+ * naturally aligned. That is:
+ * - Require 8-byte alignment when size is 8 bytes or larger.
*/
ENTRY(__copy_user_nocache)
ASM_STAC
+
+ /* If size is less than 8 bytes, go to byte copy */
cmpl $8,%edx
- jb 20f /* less then 8 bytes, go to byte copy loop */
+ jb .L_1b_cache_copy_entry
+
+ /* If destination is not 8-byte aligned, "cache" copy to align it */
ALIGN_DESTINATION
+
+ /* Set 4x8-byte copy count and remainder */
movl %edx,%ecx
andl $63,%edx
shrl $6,%ecx
- jz 17f
+ jz .L_8b_nocache_copy_entry /* jump if count is 0 */
+
+ /* Perform 4x8-byte nocache loop-copy */
+.L_4x8b_nocache_copy_loop:
1: movq (%rsi),%r8
2: movq 1*8(%rsi),%r9
3: movq 2*8(%rsi),%r10
@@ -262,60 +275,79 @@ ENTRY(__copy_user_nocache)
leaq 64(%rsi),%rsi
leaq 64(%rdi),%rdi
decl %ecx
- jnz 1b
-17: movl %edx,%ecx
+ jnz .L_4x8b_nocache_copy_loop
+
+ /* Set 8-byte copy count and remainder */
+.L_8b_nocache_copy_entry:
+ movl %edx,%ecx
andl $7,%edx
shrl $3,%ecx
- jz 20f
-18: movq (%rsi),%r8
-19: movnti %r8,(%rdi)
+ jz .L_1b_cache_copy_entry /* jump if count is 0 */
+
+ /* Perform 8-byte nocache loop-copy */
+.L_8b_nocache_copy_loop:
+20: movq (%rsi),%r8
+21: movnti %r8,(%rdi)
leaq 8(%rsi),%rsi
leaq 8(%rdi),%rdi
decl %ecx
- jnz 18b
-20: andl %edx,%edx
- jz 23f
+ jnz .L_8b_nocache_copy_loop
+
+ /* If no byte left, we're done */
+.L_1b_cache_copy_entry:
+ andl %edx,%edx
+ jz .L_finish_copy
+
+ /* Perform byte "cache" loop-copy for the remainder */
movl %edx,%ecx
-21: movb (%rsi),%al
-22: movb %al,(%rdi)
+.L_1b_cache_copy_loop:
+40: movb (%rsi),%al
+41: movb %al,(%rdi)
incq %rsi
incq %rdi
decl %ecx
- jnz 21b
-23: xorl %eax,%eax
+ jnz .L_1b_cache_copy_loop
+
+ /* Finished copying; fence the prior stores */
+.L_finish_copy:
+ xorl %eax,%eax
ASM_CLAC
sfence
ret
.section .fixup,"ax"
-30: shll $6,%ecx
+.L_fixup_4x8b_copy:
+ shll $6,%ecx
addl %ecx,%edx
- jmp 60f
-40: lea (%rdx,%rcx,8),%rdx
- jmp 60f
-50: movl %ecx,%edx
-60: sfence
+ jmp .L_fixup_handle_tail
+.L_fixup_8b_copy:
+ lea (%rdx,%rcx,8),%rdx
+ jmp .L_fixup_handle_tail
+.L_fixup_1b_copy:
+ movl %ecx,%edx
+.L_fixup_handle_tail:
+ sfence
jmp copy_user_handle_tail
.previous
- _ASM_EXTABLE(1b,30b)
- _ASM_EXTABLE(2b,30b)
- _ASM_EXTABLE(3b,30b)
- _ASM_EXTABLE(4b,30b)
- _ASM_EXTABLE(5b,30b)
- _ASM_EXTABLE(6b,30b)
- _ASM_EXTABLE(7b,30b)
- _ASM_EXTABLE(8b,30b)
- _ASM_EXTABLE(9b,30b)
- _ASM_EXTABLE(10b,30b)
- _ASM_EXTABLE(11b,30b)
- _ASM_EXTABLE(12b,30b)
- _ASM_EXTABLE(13b,30b)
- _ASM_EXTABLE(14b,30b)
- _ASM_EXTABLE(15b,30b)
- _ASM_EXTABLE(16b,30b)
- _ASM_EXTABLE(18b,40b)
- _ASM_EXTABLE(19b,40b)
- _ASM_EXTABLE(21b,50b)
- _ASM_EXTABLE(22b,50b)
+ _ASM_EXTABLE(1b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE(2b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE(3b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE(4b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE(5b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE(6b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE(7b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE(8b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE(9b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE(10b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE(11b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE(12b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE(13b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE(14b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE(15b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
+ _ASM_EXTABLE(20b,.L_fixup_8b_copy)
+ _ASM_EXTABLE(21b,.L_fixup_8b_copy)
+ _ASM_EXTABLE(40b,.L_fixup_1b_copy)
+ _ASM_EXTABLE(41b,.L_fixup_1b_copy)
ENDPROC(__copy_user_nocache)
--
2.7.0
6 years, 3 months
[4.2.y-ckt stable] Patch "x86/mm: Fix vmalloc_fault() to handle large pages properly" has been added to the 4.2.y-ckt tree
by Kamal Mostafa
This is a note to let you know that I have just added a patch titled
x86/mm: Fix vmalloc_fault() to handle large pages properly
to the linux-4.2.y-queue branch of the 4.2.y-ckt extended stable tree
which can be found at:
http://kernel.ubuntu.com/git/ubuntu/linux.git/log/?h=linux-4.2.y-queue
This patch is scheduled to be released in version 4.2.8-ckt5.
If you, or anyone else, feels it should not be added to this tree, please
reply to this email.
For more information about the 4.2.y-ckt tree, see
https://wiki.ubuntu.com/Kernel/Dev/ExtendedStable
Thanks.
-Kamal
---8<------------------------------------------------------------
>From f2282ee1b90bbfbbaccbad1945c38611aa2d8810 Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani(a)hpe.com>
Date: Wed, 17 Feb 2016 18:16:54 -0700
Subject: x86/mm: Fix vmalloc_fault() to handle large pages properly
commit f4eafd8bcd5229e998aa252627703b8462c3b90f upstream.
A kernel page fault oops with the callstack below was observed
when a read syscall was made to a pmem device after a huge amount
(>512GB) of vmalloc ranges was allocated by ioremap() on a x86_64
system:
BUG: unable to handle kernel paging request at ffff880840000ff8
IP: vmalloc_fault+0x1be/0x300
PGD c7f03a067 PUD 0
Oops: 0000 [#1] SM
Call Trace:
__do_page_fault+0x285/0x3e0
do_page_fault+0x2f/0x80
? put_prev_entity+0x35/0x7a0
page_fault+0x28/0x30
? memcpy_erms+0x6/0x10
? schedule+0x35/0x80
? pmem_rw_bytes+0x6a/0x190 [nd_pmem]
? schedule_timeout+0x183/0x240
btt_log_read+0x63/0x140 [nd_btt]
:
? __symbol_put+0x60/0x60
? kernel_read+0x50/0x80
SyS_finit_module+0xb9/0xf0
entry_SYSCALL_64_fastpath+0x1a/0xa4
Since v4.1, ioremap() supports large page (pud/pmd) mappings in
x86_64 and PAE. vmalloc_fault() however assumes that the vmalloc
range is limited to pte mappings.
vmalloc faults do not normally happen in ioremap'd ranges since
ioremap() sets up the kernel page tables, which are shared by
user processes. pgd_ctor() sets the kernel's PGD entries to
user's during fork(). When allocation of the vmalloc ranges
crosses a 512GB boundary, ioremap() allocates a new pud table
and updates the kernel PGD entry to point it. If user process's
PGD entry does not have this update yet, a read/write syscall
to the range will cause a vmalloc fault, which hits the Oops
above as it does not handle a large page properly.
Following changes are made to vmalloc_fault().
64-bit:
- No change for the PGD sync operation as it handles large
pages already.
- Add pud_huge() and pmd_huge() to the validation code to
handle large pages.
- Change pud_page_vaddr() to pud_pfn() since an ioremap range
is not directly mapped (while the if-statement still works
with a bogus addr).
- Change pmd_page() to pmd_pfn() since an ioremap range is not
backed by struct page (while the if-statement still works
with a bogus addr).
32-bit:
- No change for the sync operation since the index3 PGD entry
covers the entire vmalloc range, which is always valid.
(A separate change to sync PGD entry is necessary if this
memory layout is changed regardless of the page size.)
- Add pmd_huge() to the validation code to handle large pages.
This is for completeness since vmalloc_fault() won't happen
in ioremap'd ranges as its PGD entry is always valid.
Reported-by: Henning Schild <henning.schild(a)siemens.com>
Signed-off-by: Toshi Kani <toshi.kani(a)hpe.com>
Acked-by: Borislav Petkov <bp(a)alien8.de>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Andy Lutomirski <luto(a)amacapital.net>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: Denys Vlasenko <dvlasenk(a)redhat.com>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof(a)suse.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Toshi Kani <toshi.kani(a)hp.com>
Cc: linux-mm(a)kvack.org
Cc: linux-nvdimm(a)lists.01.org
Link: http://lkml.kernel.org/r/1455758214-24623-1-git-send-email-toshi.kani@hpe...
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Kamal Mostafa <kamal(a)canonical.com>
---
arch/x86/mm/fault.c | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 9dc9098..1d3beaf 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -286,6 +286,9 @@ static noinline int vmalloc_fault(unsigned long address)
if (!pmd_k)
return -1;
+ if (pmd_huge(*pmd_k))
+ return 0;
+
pte_k = pte_offset_kernel(pmd_k, address);
if (!pte_present(*pte_k))
return -1;
@@ -357,8 +360,6 @@ void vmalloc_sync_all(void)
* 64-bit:
*
* Handle a fault on the vmalloc area
- *
- * This assumes no large pages in there.
*/
static noinline int vmalloc_fault(unsigned long address)
{
@@ -400,17 +401,23 @@ static noinline int vmalloc_fault(unsigned long address)
if (pud_none(*pud_ref))
return -1;
- if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
+ if (pud_none(*pud) || pud_pfn(*pud) != pud_pfn(*pud_ref))
BUG();
+ if (pud_huge(*pud))
+ return 0;
+
pmd = pmd_offset(pud, address);
pmd_ref = pmd_offset(pud_ref, address);
if (pmd_none(*pmd_ref))
return -1;
- if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
+ if (pmd_none(*pmd) || pmd_pfn(*pmd) != pmd_pfn(*pmd_ref))
BUG();
+ if (pmd_huge(*pmd))
+ return 0;
+
pte_ref = pte_offset_kernel(pmd_ref, address);
if (!pte_present(*pte_ref))
return -1;
--
2.7.0
6 years, 3 months
[4.2.y-ckt stable] Patch "x86/uaccess/64: Handle the caching of 4-byte nocache copies properly in __copy_user_nocache()" has been added to the 4.2.y-ckt tree
by Kamal Mostafa
This is a note to let you know that I have just added a patch titled
x86/uaccess/64: Handle the caching of 4-byte nocache copies properly in __copy_user_nocache()
to the linux-4.2.y-queue branch of the 4.2.y-ckt extended stable tree
which can be found at:
http://kernel.ubuntu.com/git/ubuntu/linux.git/log/?h=linux-4.2.y-queue
This patch is scheduled to be released in version 4.2.8-ckt5.
If you, or anyone else, feels it should not be added to this tree, please
reply to this email.
For more information about the 4.2.y-ckt tree, see
https://wiki.ubuntu.com/Kernel/Dev/ExtendedStable
Thanks.
-Kamal
---8<------------------------------------------------------------
>From e1aa4f7fd1f243f280c9c3fde9d4cbe31e4cc492 Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani(a)hpe.com>
Date: Thu, 11 Feb 2016 14:24:17 -0700
Subject: x86/uaccess/64: Handle the caching of 4-byte nocache copies properly
in __copy_user_nocache()
commit a82eee7424525e34e98d821dd059ce14560a1e35 upstream.
Data corruption issues were observed in tests which initiated
a system crash/reset while accessing BTT devices. This problem
is reproducible.
The BTT driver calls pmem_rw_bytes() to update data in pmem
devices. This interface calls __copy_user_nocache(), which
uses non-temporal stores so that the stores to pmem are
persistent.
__copy_user_nocache() uses non-temporal stores when a request
size is 8 bytes or larger (and is aligned by 8 bytes). The
BTT driver updates the BTT map table, which entry size is
4 bytes. Therefore, updates to the map table entries remain
cached, and are not written to pmem after a crash.
Change __copy_user_nocache() to use non-temporal store when
a request size is 4 bytes. The change extends the current
byte-copy path for a less-than-8-bytes request, and does not
add any overhead to the regular path.
Reported-and-tested-by: Micah Parrish <micah.parrish(a)hpe.com>
Reported-and-tested-by: Brian Boylston <brian.boylston(a)hpe.com>
Signed-off-by: Toshi Kani <toshi.kani(a)hpe.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Andy Lutomirski <luto(a)amacapital.net>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: Borislav Petkov <bp(a)suse.de>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Denys Vlasenko <dvlasenk(a)redhat.com>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof(a)suse.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Ross Zwisler <ross.zwisler(a)linux.intel.com>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Toshi Kani <toshi.kani(a)hp.com>
Cc: Vishal Verma <vishal.l.verma(a)intel.com>
Cc: linux-nvdimm(a)lists.01.org
Link: http://lkml.kernel.org/r/1455225857-12039-3-git-send-email-toshi.kani@hpe...
[ Small readability edits. ]
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Signed-off-by: Kamal Mostafa <kamal(a)canonical.com>
---
arch/x86/lib/copy_user_64.S | 36 ++++++++++++++++++++++++++++++++----
1 file changed, 32 insertions(+), 4 deletions(-)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index a644aad..27f89c7 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -237,13 +237,14 @@ ENDPROC(copy_user_enhanced_fast_string)
* Note: Cached memory copy is used when destination or size is not
* naturally aligned. That is:
* - Require 8-byte alignment when size is 8 bytes or larger.
+ * - Require 4-byte alignment when size is 4 bytes.
*/
ENTRY(__copy_user_nocache)
ASM_STAC
- /* If size is less than 8 bytes, go to byte copy */
+ /* If size is less than 8 bytes, go to 4-byte copy */
cmpl $8,%edx
- jb .L_1b_cache_copy_entry
+ jb .L_4b_nocache_copy_entry
/* If destination is not 8-byte aligned, "cache" copy to align it */
ALIGN_DESTINATION
@@ -282,7 +283,7 @@ ENTRY(__copy_user_nocache)
movl %edx,%ecx
andl $7,%edx
shrl $3,%ecx
- jz .L_1b_cache_copy_entry /* jump if count is 0 */
+ jz .L_4b_nocache_copy_entry /* jump if count is 0 */
/* Perform 8-byte nocache loop-copy */
.L_8b_nocache_copy_loop:
@@ -294,11 +295,33 @@ ENTRY(__copy_user_nocache)
jnz .L_8b_nocache_copy_loop
/* If no byte left, we're done */
-.L_1b_cache_copy_entry:
+.L_4b_nocache_copy_entry:
+ andl %edx,%edx
+ jz .L_finish_copy
+
+ /* If destination is not 4-byte aligned, go to byte copy: */
+ movl %edi,%ecx
+ andl $3,%ecx
+ jnz .L_1b_cache_copy_entry
+
+ /* Set 4-byte copy count (1 or 0) and remainder */
+ movl %edx,%ecx
+ andl $3,%edx
+ shrl $2,%ecx
+ jz .L_1b_cache_copy_entry /* jump if count is 0 */
+
+ /* Perform 4-byte nocache copy: */
+30: movl (%rsi),%r8d
+31: movnti %r8d,(%rdi)
+ leaq 4(%rsi),%rsi
+ leaq 4(%rdi),%rdi
+
+ /* If no bytes left, we're done: */
andl %edx,%edx
jz .L_finish_copy
/* Perform byte "cache" loop-copy for the remainder */
+.L_1b_cache_copy_entry:
movl %edx,%ecx
.L_1b_cache_copy_loop:
40: movb (%rsi),%al
@@ -323,6 +346,9 @@ ENTRY(__copy_user_nocache)
.L_fixup_8b_copy:
lea (%rdx,%rcx,8),%rdx
jmp .L_fixup_handle_tail
+.L_fixup_4b_copy:
+ lea (%rdx,%rcx,4),%rdx
+ jmp .L_fixup_handle_tail
.L_fixup_1b_copy:
movl %ecx,%edx
.L_fixup_handle_tail:
@@ -348,6 +374,8 @@ ENTRY(__copy_user_nocache)
_ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
_ASM_EXTABLE(20b,.L_fixup_8b_copy)
_ASM_EXTABLE(21b,.L_fixup_8b_copy)
+ _ASM_EXTABLE(30b,.L_fixup_4b_copy)
+ _ASM_EXTABLE(31b,.L_fixup_4b_copy)
_ASM_EXTABLE(40b,.L_fixup_1b_copy)
_ASM_EXTABLE(41b,.L_fixup_1b_copy)
ENDPROC(__copy_user_nocache)
--
2.7.0
6 years, 3 months