Historical speck list archives
 help / color / mirror / Atom feed
From: Andrew Cooper <andrew.cooper3@citrix.com>
To: speck@linutronix.de
Subject: [MODERATED] Re: Some micro-perf tests
Date: Wed, 27 Feb 2019 19:44:39 +0000	[thread overview]
Message-ID: <404d365b-94c6-0846-22c0-17576238c51a@citrix.com> (raw)
In-Reply-To: <9E22D332-B9DA-4CE3-9C8D-EF45E696386D@intel.com>


[-- Attachment #1.1: Type: text/plain, Size: 2418 bytes --]

On 27/02/2019 19:09, speck for Stewart, David C wrote:
> On Sat, Feb 23, 2019 at 10:27 AM speck for Andrew Cooper
> <speck@linutronix.de> wrote:
>>
>>
>> Pre microcode:
>> * VERW of NUL   => 65-69 cycles
>> * VERW of %ds   => 33-37 cycles
>>
>> Post microcode:
>> * VERW of NUL   => 512-520 cycles
>> * VERW of %ds   => 520-540 cycles
> 
> Andrew – can you please send me your code? We need to root-cause why the behavior you are seeing is different than our guidance.

Hello,

The exact code is unlikely to be of direct interest, as it is specific
to my Xen Test Framework.

The interesting subset, converted to work in regular userspace is:

andrewcoop@andrewcoop:/tmp/verw$ cat verw.c
#include <inttypes.h>
#include <stdint.h>
#include <stdio.h>

#define barrier() asm volatile ("" ::: "memory")

static unsigned int read_ds(void)
{
    unsigned int sel;

    asm volatile ("mov %%ds, %0" : "=rm" (sel));

    return sel;
}

static uint64_t read_time(void)
{
    unsigned long low, high;

    asm volatile ("rdtscp"
                  : "=a" (low), "=d" (high) :: "ecx");

    return ((uint64_t)high << 32) | low;
}

static uint64_t time_sel(unsigned int sel)
{
    uint64_t t1, t2;

    barrier();
    t1 = read_time();

    asm volatile ("verw %0" :: "m" (sel));

    t2 = read_time();
    barrier();

    return t2 - t1;
}

int main(void)
{
    static uint64_t times[2][50];
    unsigned int i;

    for ( i = 0; i < 50; ++i )
    {
        unsigned int sel = 0;

        times[0][i] = time_sel(sel);
    }

    for ( i = 0; i < 50; ++i )
    {
        unsigned int sel = read_ds();

        times[1][i] = time_sel(sel);
    }

    printf("     0 \t%#4x\n", read_ds());
    for ( i = 0; i < 50; ++i )
        printf("[%02u] %"PRIu64"\t%"PRIu64"\n",
               i, times[0][i], times[1][i]);

    return 0;
}

To compile,
$gcc -m32 -O3 verw.c -o verw

For 64bit, you need to hardcode a %ds other than 0.  0x2b looks to be
the going candidate.

The exact CPU in question is:

[root@idol ~]# head /proc/cpuinfo
processor	: 0
vendor_id	: GenuineIntel
cpu family	: 6
model		: 158
model name	: Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz
stepping	: 10
microcode	: 0x109a
cpu MHz		: 3200.000
cache size	: 12288 KB
physical id	: 0

which is a CoffeeLake part using the alpha ucode drop.

Thanks,

~Andrew

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1.2: 0001-time-verw.patch --]
[-- Type: text/x-patch; name="0001-time-verw.patch", Size: 3306 bytes --]

From 15872ecbac1df9fba04b8a1e052b888f4b65eaf2 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 1 Nov 2018 20:21:36 +0000
Subject: [PATCH 1/1] time verw

---
 docs/all-tests.dox  |   2 +
 tests/verw/Makefile |   9 +++++
 tests/verw/main.c   | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 116 insertions(+)
 create mode 100644 tests/verw/Makefile
 create mode 100644 tests/verw/main.c

diff --git a/docs/all-tests.dox b/docs/all-tests.dox
index 732d44c..8aa54ab 100644
--- a/docs/all-tests.dox
+++ b/docs/all-tests.dox
@@ -146,3 +146,5 @@ enable BTS.
 
 @subpage test-nested-vmx - Nested VT-x tests.
 */
+# Placeholder: Merge into the appropriate location above
+@subpage test-verw - @todo title
diff --git a/tests/verw/Makefile b/tests/verw/Makefile
new file mode 100644
index 0000000..b6260b2
--- /dev/null
+++ b/tests/verw/Makefile
@@ -0,0 +1,9 @@
+include $(ROOT)/build/common.mk
+
+NAME      := verw
+CATEGORY  := utility
+TEST-ENVS := pv64 hvm64
+
+obj-perenv += main.o
+
+include $(ROOT)/build/gen.mk
diff --git a/tests/verw/main.c b/tests/verw/main.c
new file mode 100644
index 0000000..630f881
--- /dev/null
+++ b/tests/verw/main.c
@@ -0,0 +1,105 @@
+/**
+ * @file tests/verw/main.c
+ * @ref test-verw
+ *
+ * @page test-verw verw
+ *
+ * @todo Docs for test-verw
+ *
+ * @see tests/verw/main.c
+ */
+#include <xtf.h>
+
+const char test_title[] = "Test verw";
+
+static uint64_t read_time(void)
+{
+    unsigned long low, high;
+
+    asm volatile ("rdtscp"
+                  : "=a" (low), "=d" (high) :: "ecx");
+
+    return ((uint64_t)high << 32) | low;
+}
+
+static uint64_t time_sel(unsigned int sel)
+{
+    uint64_t t1, t2;
+
+    barrier();
+    t1 = read_time();
+
+    asm volatile ("verw %0" :: "m" (sel));
+
+    t2 = read_time();
+    barrier();
+
+    return t2 - t1;
+}
+
+static uint64_t time_flushcmd(void)
+{
+    uint64_t t1, t2;
+
+    barrier();
+    t1 = read_time();
+
+    wrmsr(0x10b, 1);
+
+    t2 = read_time();
+    barrier();
+
+    return t2 - t1;
+}
+
+void test_main(void)
+{
+    static uint64_t times[2][20];
+    unsigned int i;
+
+    for ( i = 0; i < 20; ++i )
+    {
+        unsigned int sel = 0;
+
+        //asm volatile ("clflush %1" : "+r" (sel) : "m"(*gdt));
+
+        times[0][i] = time_sel(sel);
+    }
+
+    for ( i = 0; i < 20; ++i )
+    {
+        unsigned int sel = read_ds();
+
+        //asm volatile ("clflush %1" : "+r" (sel) : "m"(*gdt));
+
+        times[1][i] = time_sel(sel);
+    }
+
+    printk("     0 \t%#x\n", read_ds());
+    for ( i = 0; i < 20; ++i )
+        printk("[%02u] %"PRIu64"\t%"PRIu64"\n",
+               i, times[0][i], times[1][i]);
+
+    if ( IS_DEFINED(CONFIG_HVM) )
+    {
+        for ( i = 0; i < 20; ++i )
+            times[0][i] = time_flushcmd();
+
+        printk("MSR_FLUSH_CMD\n");
+        for ( i = 0; i < 20; ++i )
+            printk("[%02u] %"PRIu64"\n",
+                   i, times[0][i]);
+    }
+
+    xtf_success(NULL);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
-- 
2.1.4


  reply	other threads:[~2019-02-27 19:44 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-27 19:09 [MODERATED] Re: Some micro-perf tests Stewart, David C
2019-02-27 19:44 ` Andrew Cooper [this message]
  -- strict thread matches above, loose matches on Subject: below --
2019-02-27 17:53 Stewart, David C

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=404d365b-94c6-0846-22c0-17576238c51a@citrix.com \
    --to=andrew.cooper3@citrix.com \
    --cc=speck@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).