On Tue, Sep 22, 2015 at 10:47 AM, Ross Zwisler
<ross.zwisler(a)linux.intel.com> wrote:
The purpose of this test is to validate that the DAX hugepage fault
handler is working correctly. The DAX PMD fault handler in v4.3-rc1 and
-rc2 has an issue where it tries to zero at an undefined address,
causing a BUG(). Without the zeroing code in place at all this test
will find data corruption as the newly allocated huge page will be
filled with random garbage.
This test is being added to the "destructive" group, and is currently
only run as part of 'make check-TESTS'. We also specifically call out a
device named "/dev/pmem0" to avoid running into a known bug with PMD
page faults on struct page backed devices (/dev/pmem0m). This will be
broadened when that bug is addressed.
Signed-off-by: Ross Zwisler <ross.zwisler(a)linux.intel.com>
---
Makefile.am | 11 +++--
lib/test-dax-pmd.c | 126 +++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 134 insertions(+), 3 deletions(-)
create mode 100644 lib/test-dax-pmd.c
diff --git a/Makefile.am b/Makefile.am
index e5b4b49..329cce6 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -78,7 +78,8 @@ endif
if ENABLE_DESTRUCTIVE
ndctl_SOURCES += lib/blk_namespaces.c \
lib/pmem_namespaces.c \
- lib/test-pcommit.c
+ lib/test-pcommit.c \
+ lib/test-dax-pmd.c
ndctl_SOURCES += builtin-bat.c
endif
@@ -116,13 +117,17 @@ TESTS = lib/test-libndctl lib/test-dpa-alloc lib/test-parent-uuid
check_PROGRAMS = lib/test-libndctl lib/test-dpa-alloc lib/test-parent-uuid
if ENABLE_DESTRUCTIVE
-TESTS += lib/test-blk-ns lib/test-pmem-ns lib/test-pcommit
-check_PROGRAMS += lib/test-blk-ns lib/test-pmem-ns lib/test-pcommit
+TESTS += lib/test-blk-ns lib/test-pmem-ns lib/test-pcommit lib/test-dax-pmd
+check_PROGRAMS += lib/test-blk-ns lib/test-pmem-ns lib/test-pcommit \
+ lib/test-dax-pmd
endif
lib_test_libndctl_SOURCES = lib/test-libndctl.c lib/test-core.c
lib_test_libndctl_LDADD = lib/libndctl.la $(UUID_LIBS) $(KMOD_LIBS)
+lib_test_dax_pmd_SOURCES = lib/test-dax-pmd.c
+lib_test_dax_pmd_LDADD = lib/libndctl.la $(KMOD_LIBS)
+
lib_test_pcommit_SOURCES = lib/test-pcommit.c
lib_test_pcommit_LDADD = lib/libndctl.la $(KMOD_LIBS)
diff --git a/lib/test-dax-pmd.c b/lib/test-dax-pmd.c
new file mode 100644
index 0000000..ec35312
--- /dev/null
+++ b/lib/test-dax-pmd.c
@@ -0,0 +1,126 @@
+/*
+ * test-dax-pmd: Exercise the DAX PMD page fault path
+ *
+ * Copyright (c) 2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU Lesser General Public License,
+ * version 2.1, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
+ * more details.
+ */
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <test.h>
+
+/*
+ * This will give us a 50 MiB partition. This does take a while to fill in
+ * with random data, but we really a partition this large so that the ext4
+ * block allocator will give us 2MiB aligned blocks.
+ */
+#define PART_OFFSET_MB 2
+#define PART_END_MB 52
+#define MB(a) ((a) * 1024UL * 1024UL)
+#define PAGE(a) ((a)*0x1000)
+#define PART_SIZE MB(PART_END_MB - PART_OFFSET_MB)
+#define DEV "/dev/pmem0"
+#define PART "/dev/pmem0p1"
Which pmem device is this expecting to find? If this is the "e820"
one then use libndctl to find the namespace name by bus, region,
etc...
...bonus points to find the device and auto-convert it from the
default pmem0m to pmem0.
+#define MNT "/mnt/dax"
+#define MMAP_SIZE MB(4)
+
+static void sys(char *command)
+{
+ int rc = 0;
+
+ rc = system(command);
+ if (rc) {
+ rc = WEXITSTATUS(rc);
+ exit(rc);
+ }
+}
+
+/*
+ * The purpose of this test is to validate that the DAX hugepage fault handler
+ * is working correctly. The DAX PMD fault handler in v4.3-rc1 and -rc2 has
+ * an issue where it tries to zero at an undefined address, causing a BUG().
+ * Without the zeroing code in place at all this test will find data
+ * corruption as the newly allocated huge page will be filled with random
+ * garbage.
+ */
+static int test_dax_pmd(void)
+{
+ char *data_array = (char*) 0x10200000; /* request a 2MiB aligned address with
mmap() */
+ char command[128];
+ int rc = 0;
+ int fd;
+
+ if (access(DEV, F_OK) < 0)
+ return TEST_SKIP;
+
+ /*
+ * Set up a configuration that will give us a huge page fault.
+ * Getting PMD faults is actually pretty tricky - I ended up being
+ * able to get them by having a 2 MiB aligned partition, making ext4
+ * with a 4096 block size and a 2 MiB stride, and by explicitly asking
+ * mmap() to give me a 2 MiB aligned address.
+ */
+ sys("parted -s " DEV " mktable msdos");
+ snprintf(command, sizeof(command), "parted -s -a optimal " DEV "
mkpart Primary %uMiB %uMiB",
+ PART_OFFSET_MB, PART_END_MB);
+ sys(command);
+ snprintf(command, sizeof(command), "dd if=/dev/urandom of=" PART "
bs=%d count=%lu",
+ PAGE(1), PART_SIZE/PAGE(1));
Why does the test need to partition the pmem device vs just placing
the filesystem on the raw device directly, and why does it need to
fill it with random data?
+ sys(command);
+ sys("mkfs.ext4 -E stride=512 -b 4096 " PART);
+ sys("mkdir -p " MNT);
+ sys("mount -o dax " PART " " MNT);
+
+ fd = open(MNT "/data", O_RDWR|O_CREAT, S_IRUSR|S_IWUSR);
+ if (fd < 0) {
+ perror("fd");
+ return 1;
+ }
+
+ /*
+ * Write to a 10 MiB offset to increase the file size. The entire
+ * mmap() we set up next will be over a hole.
+ */
+ pwrite(fd, "a", 1, MB(10));
fallocate?
+
+ data_array = mmap(data_array, MMAP_SIZE, PROT_READ|PROT_WRITE,
+ MAP_SHARED, fd, 0);
+
+ if ((long unsigned)data_array & (MB(2)-1)) {
+ rc = EINVAL;
+ goto out;
+ }
+
+ /*
+ * Write to the first byte of the hole, causing a DAX PMD page fault.
+ * If everything works correctly the second byte should be cleared by
+ * the fault handler, and should read as zero.
+ */
+ data_array[0] = 0xff;
+ if (data_array[1] != 0)
+ rc = EIO;
I wonder if there is a way to verify we actually got a 2MB mapping
from userspace vs falling back to 4K?
+ out:
+ munmap(data_array, MMAP_SIZE);
+ close(fd);
+
+ sys("umount " MNT);
+ return rc;
+}
+
+int __attribute__((weak)) main(int argc, char *argv[])
+{
This needs a "ndctl_test_attempt(test, KERNEL_VERSION(4, 3, 0))"
somewhere since the pmd faulting for DAX is new in 4.3. This is
useful for documenting when features arrived for backports that claim
to be "4.3" feature compatible.