KVM Archive mirror
 help / color / mirror / Atom feed
From: David Woodhouse <dwmw2@infradead.org>
To: Jack Allister <jalliste@amazon.com>
Cc: bp@alien8.de, corbet@lwn.net, dave.hansen@linux.intel.com,
	hpa@zytor.com,  kvm@vger.kernel.org, linux-doc@vger.kernel.org,
	linux-kernel@vger.kernel.org,  mingo@redhat.com, paul@xen.org,
	pbonzini@redhat.com, seanjc@google.com,  tglx@linutronix.de,
	x86@kernel.org, Dongli Zhang <dongli.zhang@oracle.com>
Subject: Re: [PATCH v2 1/2] KVM: x86: Add KVM_[GS]ET_CLOCK_GUEST for accurate KVM clock migration
Date: Mon, 15 Apr 2024 08:16:51 +0100	[thread overview]
Message-ID: <2d3e0d4146a30621324f9b1019d0950e8896ca6d.camel@infradead.org> (raw)
In-Reply-To: <20240410095244.77109-2-jalliste@amazon.com>

[-- Attachment #1: Type: text/plain, Size: 7243 bytes --]

On Wed, 2024-04-10 at 09:52 +0000, Jack Allister wrote:
> 
> +static int kvm_vcpu_ioctl_get_clock_guest(struct kvm_vcpu *v, void __user *argp)
> +{
> +       struct pvclock_vcpu_time_info *vcpu_pvti = &v->arch.hv_clock;
> +       struct pvclock_vcpu_time_info local_pvti = { 0 };
> +       struct kvm_arch *ka = &v->kvm->arch;
> +       uint64_t host_tsc, guest_tsc;
> +       bool use_master_clock;
> +       uint64_t kernel_ns;
> +       unsigned int seq;
> +
> +       /*
> +        * CLOCK_MONOTONIC_RAW is not suitable for GET/SET API,
> +        * see kvm_vcpu_ioctl_set_clock_guest equivalent comment.
> +        */
> +       if (!static_cpu_has(X86_FEATURE_CONSTANT_TSC))
> +               return -EINVAL;
> +
> +       /*
> +        * Querying needs to be performed in a seqcount loop as it's possible
> +        * another vCPU has triggered an update of the master clock. If so we
> +        * should store the host TSC & time at this point.
> +        */
> +       do {
> +               seq = read_seqcount_begin(&ka->pvclock_sc);
> +               use_master_clock = ka->use_master_clock;
> +               if (use_master_clock) {
> +                       host_tsc = ka->master_cycle_now;
> +                       kernel_ns = ka->master_kernel_ns;
> +               }
> +       } while (read_seqcount_retry(&ka->pvclock_sc, seq));
> +
> +       if (!use_master_clock)
> +               return -EINVAL;
> +
> +       /*
> +        * It's possible that this vCPU doesn't have a HVCLOCK configured
> +        * but the other vCPUs may. If this is the case calculate based
> +        * upon the time gathered in the seqcount but do not update the
> +        * vCPU specific PVTI. If we have one, then use that.
> +        */
> +       if (!vcpu_pvti->tsc_timestamp && !vcpu_pvti->system_time) {

 || !kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)

Otherwise you may be using out of date information.
 
> +               guest_tsc = kvm_read_l1_tsc(v, host_tsc);
> +
> +               local_pvti.tsc_timestamp = guest_tsc;
> +               local_pvti.system_time = kernel_ns + ka->kvmclock_offset;

This is missing the scale information in tsc_to_system_mul and
tsc_shift. Is there a reason we can't just call kvm_guest_time_update()
from here? (I think we looked at using it for the *SET* function, but
did we look at doing so for GET?)


> +       } else {
> +               local_pvti = *vcpu_pvti;
> +       }
> +
> +       if (copy_to_user(argp, &local_pvti, sizeof(local_pvti)))
> +               return -EFAULT;
> +
> +       return 0;
> +}
> +
> +static int kvm_vcpu_ioctl_set_clock_guest(struct kvm_vcpu *v, void __user *argp)
> +{
> +       struct pvclock_vcpu_time_info dummy_pvti;
> +       struct pvclock_vcpu_time_info orig_pvti;
> +       struct kvm *kvm = v->kvm;
> +       struct kvm_arch *ka = &kvm->arch;
> +       uint64_t clock_orig, clock_dummy;
> +       uint64_t host_tsc, guest_tsc;
> +       int64_t kernel_ns;
> +       int64_t correction;
> +       int rc = 0;
> +
> +       /*
> +        * If a constant TSC is not provided by the host then KVM will
> +        * be using CLOCK_MONOTONIC_RAW, correction using this is not
> +        * precise and as such we can never sync to the precision we
> +        * are requiring.
> +        */
> +       if (!static_cpu_has(X86_FEATURE_CONSTANT_TSC))
> +               return -EINVAL;
> +
> +       if (copy_from_user(&orig_pvti, argp, sizeof(orig_pvti)))
> +               return -EFAULT;
> +
> +       kvm_hv_request_tsc_page_update(kvm);
> +       kvm_start_pvclock_update(kvm);
> +       pvclock_update_vm_gtod_copy(kvm);
> +
> +       if (!ka->use_master_clock) {
> +               rc = -EINVAL;
> +               goto out;
> +       }
> +
> +       /*
> +        * Sample the kernel time and host TSC at a singular point.
> +        * We then calculate the guest TSC using this exact point in time.
> +        * From here we can then determine the delta using the
> +        * PV time info requested from the user and what we currently have
> +        * using the fixed point in time. This delta is then used as a
> +        * correction factor to subtract from the clock offset.
> +        */
> +       if (!kvm_get_time_and_clockread(&kernel_ns, &host_tsc)) {
> +               rc = -EFAULT;
> +               goto out;
> +       }
> +
> +       guest_tsc = kvm_read_l1_tsc(v, host_tsc);
> +
> +       dummy_pvti = orig_pvti;
> +       dummy_pvti.tsc_timestamp = guest_tsc;
> +       dummy_pvti.system_time = kernel_ns + ka->kvmclock_offset;
> +
> +       clock_orig = __pvclock_read_cycles(&orig_pvti, guest_tsc);
> +       clock_dummy = __pvclock_read_cycles(&dummy_pvti, guest_tsc);
> 

In both cases here you're using the scale information directly from
userspace... that you forgot to fill in for them earlier. I think we
should we have a sanity check on it, to ensure that it matches the TSC
frequency of the vCPU?

> +       correction = clock_orig - clock_dummy;
> +       ka->kvmclock_offset += correction;
> +
> +out:
> +       kvm_end_pvclock_update(kvm);
> +       return rc;
> +}
> +
>  long kvm_arch_vcpu_ioctl(struct file *filp,
>                          unsigned int ioctl, unsigned long arg)
>  {
> @@ -6239,6 +6357,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
>                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
>                 break;
>         }
> +       case KVM_SET_CLOCK_GUEST:
> +               r = kvm_vcpu_ioctl_set_clock_guest(vcpu, argp);
> +               break;
> +       case KVM_GET_CLOCK_GUEST:
> +               r = kvm_vcpu_ioctl_get_clock_guest(vcpu, argp);
> +               break;
>  #ifdef CONFIG_KVM_HYPERV
>         case KVM_GET_SUPPORTED_HV_CPUID:
>                 r = kvm_ioctl_get_supported_hv_cpuid(vcpu, argp);
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 2190adbe3002..0d306311e4d6 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1548,4 +1548,7 @@ struct kvm_create_guest_memfd {
>         __u64 reserved[6];
>  };
>  
> +#define KVM_SET_CLOCK_GUEST       _IOW(KVMIO,  0xd5, struct pvclock_vcpu_time_info)
> +#define KVM_GET_CLOCK_GUEST       _IOR(KVMIO,  0xd6, struct pvclock_vcpu_time_info)
> +
>  #endif /* __LINUX_KVM_H */


[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5965 bytes --]

  parent reply	other threads:[~2024-04-15  7:17 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-08 22:07 [PATCH 0/2] Add API to correct KVM/PV clock drift Jack Allister
2024-04-08 22:07 ` [PATCH 1/2] KVM: x86: Add KVM_[GS]ET_CLOCK_GUEST for KVM clock drift fixup Jack Allister
2024-04-09  0:34   ` Dongli Zhang
2024-04-09  3:50     ` David Woodhouse
2024-04-10 10:08     ` Allister, Jack
2024-04-08 22:07 ` [PATCH 2/2] KVM: selftests: Add KVM/PV clock selftest to prove timer drift correction Jack Allister
2024-04-09  0:43   ` Dongli Zhang
2024-04-09  4:23     ` David Woodhouse
2024-04-10 10:15     ` Allister, Jack
2024-04-11 13:28       ` David Woodhouse
2024-04-19 17:13   ` Chen, Zide
     [not found]     ` <17F1A2E9-6BAD-40E7-ACDD-B110CFC124B3@infradead.org>
2024-04-19 18:43       ` David Woodhouse
2024-04-19 23:54         ` Chen, Zide
2024-04-20 10:32           ` David Woodhouse
2024-04-20 16:03           ` David Woodhouse
2024-04-22 22:02             ` Chen, Zide
2024-04-23  7:49               ` David Woodhouse
2024-04-23 17:59                 ` Chen, Zide
2024-04-23 21:02                   ` David Woodhouse
2024-04-24 12:58               ` David Woodhouse
2024-04-19 19:34     ` David Woodhouse
2024-04-19 23:53       ` Chen, Zide
2024-04-10  9:52 ` [PATCH v2 0/2] Add API for accurate KVM/PV clock migration Jack Allister
2024-04-10  9:52   ` [PATCH v2 1/2] KVM: x86: Add KVM_[GS]ET_CLOCK_GUEST for accurate KVM " Jack Allister
2024-04-10 10:29     ` Paul Durrant
2024-04-10 12:09       ` David Woodhouse
2024-04-10 12:43         ` Paul Durrant
2024-04-17 19:50           ` David Woodhouse
2024-04-15  7:16     ` David Woodhouse [this message]
2024-04-10  9:52   ` [PATCH v2 2/2] KVM: selftests: Add KVM/PV clock selftest to prove timer correction Jack Allister
2024-04-10 10:36     ` Paul Durrant
2024-04-12  8:19     ` Dongli Zhang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2d3e0d4146a30621324f9b1019d0950e8896ca6d.camel@infradead.org \
    --to=dwmw2@infradead.org \
    --cc=bp@alien8.de \
    --cc=corbet@lwn.net \
    --cc=dave.hansen@linux.intel.com \
    --cc=dongli.zhang@oracle.com \
    --cc=hpa@zytor.com \
    --cc=jalliste@amazon.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=paul@xen.org \
    --cc=pbonzini@redhat.com \
    --cc=seanjc@google.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).