From d5e7229bec41406a4040a1ac9131e24cb1f8768d Mon Sep 17 00:00:00 2001
From: Alexey Makhalov <amakhalov@vmware.com>
Date: Wed, 30 Sep 2015 23:00:00 +0000
Subject: [PATCH 01/12] Measure correct boot time.

---
 arch/x86/Kconfig          |  8 ++++++++
 arch/x86/kernel/head_64.S | 16 ++++++++++++++++
 init/main.c               | 11 +++++++++++
 3 files changed, 35 insertions(+)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b3a1a5d..24141ac 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -708,6 +708,14 @@ config KVM_DEBUG_FS
 	  Statistics are displayed in debugfs filesystem. Enabling this option
 	  may incur significant overhead.
 
+config VMWARE
+	bool "VMware Guest support"
+	depends on PARAVIRT
+	default y
+	---help---
+	  This option enables various optimizations for running under the
+	  VMware hypervisor. It includes a correct boot time measurement.
+
 source "arch/x86/lguest/Kconfig"
 
 config PARAVIRT_TIME_ACCOUNTING
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 1d40ca8..eccf2d7 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -65,6 +65,16 @@ startup_64:
 	 * tables and then reload them.
 	 */
 
+#ifdef CONFIG_VMWARE
+	/*
+	 * Read a TSC value first
+	 */
+	rdtsc
+	shl	$0x20, %rdx
+	or	%rax, %rdx
+	mov	%rdx, tsc_at_head(%rip)
+#endif
+
 	/*
 	 * Compute the delta between the address I am compiled to run at and the
 	 * address I am actually running at.
@@ -512,6 +522,12 @@ early_gdt_descr:
 early_gdt_descr_base:
 	.quad	INIT_PER_CPU_VAR(gdt_page)
 
+#ifdef CONFIG_VMWARE
+	.globl tsc_at_head
+tsc_at_head:
+	.quad 0
+#endif
+
 ENTRY(phys_base)
 	/* This must match the first entry in level2_kernel_pgt */
 	.quad   0x0000000000000000
diff --git a/init/main.c b/init/main.c
index 5650655..c386186 100644
--- a/init/main.c
+++ b/init/main.c
@@ -929,6 +929,9 @@ static int try_to_run_init_process(const char *init_filename)
 }
 
 static noinline void __init kernel_init_freeable(void);
+#ifdef CONFIG_VMWARE
+extern unsigned long long tsc_at_head;
+#endif
 
 static int __ref kernel_init(void *unused)
 {
@@ -944,6 +947,14 @@ static int __ref kernel_init(void *unused)
 
 	flush_delayed_fput();
 
+#ifdef CONFIG_VMWARE
+	printk(KERN_INFO "Pre-Kernel time: %5dms\n",
+		(unsigned int) (tsc_at_head / tsc_khz));
+	printk(KERN_INFO "Kernel boot time:%5dms\n",
+		(unsigned int) ((__native_read_tsc() - tsc_at_head) /
+			tsc_khz));
+#endif
+
 	if (ramdisk_execute_command) {
 		ret = run_init_process(ramdisk_execute_command);
 		if (!ret)
-- 
1.9.1


From 500436e32d4dffae5d78f12be72c2e6784ab8cfb Mon Sep 17 00:00:00 2001
From: Alexey Makhalov <amakhalov@vmware.com>
Date: Fri, 2 Oct 2015 20:00:06 +0000
Subject: [PATCH 02/12] PV io_delay for VMware guest.

---
 arch/x86/kernel/cpu/vmware.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 628a059..8fdd031 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -26,6 +26,7 @@
 #include <asm/div64.h>
 #include <asm/x86_init.h>
 #include <asm/hypervisor.h>
+#include <asm/timer.h>
 
 #define CPUID_VMWARE_INFO_LEAF	0x40000000
 #define VMWARE_HYPERVISOR_MAGIC	0x564D5868
@@ -75,6 +76,16 @@ static unsigned long vmware_get_tsc_khz(void)
 	return tsc_hz;
 }
 
+static void __init paravirt_ops_setup(void)
+{
+	pv_info.name = "VMware";
+	pv_cpu_ops.io_delay = paravirt_nop,
+
+#ifdef CONFIG_X86_IO_APIC
+	no_timer_check = 1;
+#endif
+}
+
 static void __init vmware_platform_setup(void)
 {
 	uint32_t eax, ebx, ecx, edx;
@@ -86,6 +97,8 @@ static void __init vmware_platform_setup(void)
 	else
 		printk(KERN_WARNING
 		       "Failed to get TSC freq from the hypervisor\n");
+
+	paravirt_ops_setup();
 }
 
 /*
-- 
1.9.1


From adff5db39b45d8adef2b4579ec46ab1bb721a81f Mon Sep 17 00:00:00 2001
From: Alexey Makhalov <amakhalov@vmware.com>
Date: Wed, 7 Oct 2015 22:53:18 +0000
Subject: [PATCH 03/12] Improved tsc based sched_clock & clocksource.

---
 arch/x86/Kconfig             |  1 +
 arch/x86/kernel/cpu/vmware.c | 66 ++++++++++++++++++++++++++++++++++++++++++++
 init/main.c                  | 11 --------
 kernel/sched/clock.c         |  2 ++
 4 files changed, 69 insertions(+), 11 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 24141ac..ca0be27 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -711,6 +711,7 @@ config KVM_DEBUG_FS
 config VMWARE
 	bool "VMware Guest support"
 	depends on PARAVIRT
+	select PARAVIRT_CLOCK
 	default y
 	---help---
 	  This option enables various optimizations for running under the
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 8fdd031..004825e 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -27,6 +27,7 @@
 #include <asm/x86_init.h>
 #include <asm/hypervisor.h>
 #include <asm/timer.h>
+#include <linux/sched.h>
 
 #define CPUID_VMWARE_INFO_LEAF	0x40000000
 #define VMWARE_HYPERVISOR_MAGIC	0x564D5868
@@ -76,10 +77,43 @@ static unsigned long vmware_get_tsc_khz(void)
 	return tsc_hz;
 }
 
+static struct cyc2ns_data vmware_cyc2ns;
+extern unsigned long long tsc_at_head;
+static cycle_t vmware_clock_get_cycles(struct clocksource *cs)
+{
+	return __native_read_tsc();
+}
+
+static struct clocksource clocksource_vmware = {
+	.name = "vmware-clock",
+	.read = vmware_clock_get_cycles,
+	.rating = 400,
+	.mask = CLOCKSOURCE_MASK(64),
+	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+struct clocksource * __init clocksource_default_clock(void)
+{
+	return &clocksource_vmware;
+}
+
+#define CYC2NS_SCALE_FACTOR 8
+
+static u64 vmware_sched_clock(void)
+{
+	u64 ret;
+
+	ret = __native_read_tsc() - vmware_cyc2ns.cyc2ns_offset;
+	ret = mul_u64_u32_shr(ret, vmware_cyc2ns.cyc2ns_mul, CYC2NS_SCALE_FACTOR);
+	return ret;
+}
+
+extern __read_mostly int sched_clock_running;
 static void __init paravirt_ops_setup(void)
 {
 	pv_info.name = "VMware";
 	pv_cpu_ops.io_delay = paravirt_nop,
+	pv_time_ops.sched_clock = vmware_sched_clock;
 
 #ifdef CONFIG_X86_IO_APIC
 	no_timer_check = 1;
@@ -88,6 +122,7 @@ static void __init paravirt_ops_setup(void)
 
 static void __init vmware_platform_setup(void)
 {
+	uint64_t cpu_khz;
 	uint32_t eax, ebx, ecx, edx;
 
 	VMWARE_PORT(GETHZ, eax, ebx, ecx, edx);
@@ -98,6 +133,19 @@ static void __init vmware_platform_setup(void)
 		printk(KERN_WARNING
 		       "Failed to get TSC freq from the hypervisor\n");
 
+	cpu_khz = eax | (((uint64_t)ebx) << 32);
+	do_div(cpu_khz, 1000);
+	printk(KERN_INFO "Pre Kernel boot time: %dms\n",
+		(unsigned int) (tsc_at_head / cpu_khz));
+
+	vmware_cyc2ns.cyc2ns_mul =
+		DIV_ROUND_CLOSEST(NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR,
+				  cpu_khz);
+	vmware_cyc2ns.cyc2ns_shift = CYC2NS_SCALE_FACTOR;
+	vmware_cyc2ns.cyc2ns_offset = tsc_at_head;
+
+	clocksource_register_khz(&clocksource_vmware, cpu_khz);
+
 	paravirt_ops_setup();
 }
 
@@ -158,3 +206,21 @@ const __refconst struct hypervisor_x86 x86_hyper_vmware = {
 	.x2apic_available	= vmware_legacy_x2apic_available,
 };
 EXPORT_SYMBOL(x86_hyper_vmware);
+
+void read_boot_clock64(struct timespec64 *ts)
+{
+	struct timespec64 now;
+	u64 delta, delta_nsec;
+	u32 rem;
+
+	read_persistent_clock64(&now);
+	delta = __native_read_tsc() - vmware_cyc2ns.cyc2ns_offset;
+	delta_nsec = mul_u64_u32_shr(delta, vmware_cyc2ns.cyc2ns_mul,
+					CYC2NS_SCALE_FACTOR);
+	ts->tv_sec = now.tv_sec - div_s64_rem(delta_nsec, NSEC_PER_SEC, &rem);
+	ts->tv_nsec = now.tv_nsec - rem;
+	while (unlikely(ts->tv_nsec < 0)) {
+		ts->tv_sec--;
+		ts->tv_nsec += NSEC_PER_SEC;
+	}
+}
diff --git a/init/main.c b/init/main.c
index c386186..5650655 100644
--- a/init/main.c
+++ b/init/main.c
@@ -929,9 +929,6 @@ static int try_to_run_init_process(const char *init_filename)
 }
 
 static noinline void __init kernel_init_freeable(void);
-#ifdef CONFIG_VMWARE
-extern unsigned long long tsc_at_head;
-#endif
 
 static int __ref kernel_init(void *unused)
 {
@@ -947,14 +944,6 @@ static int __ref kernel_init(void *unused)
 
 	flush_delayed_fput();
 
-#ifdef CONFIG_VMWARE
-	printk(KERN_INFO "Pre-Kernel time: %5dms\n",
-		(unsigned int) (tsc_at_head / tsc_khz));
-	printk(KERN_INFO "Kernel boot time:%5dms\n",
-		(unsigned int) ((__native_read_tsc() - tsc_at_head) /
-			tsc_khz));
-#endif
-
 	if (ramdisk_execute_command) {
 		ret = run_init_process(ramdisk_execute_command);
 		if (!ret)
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index c0a2051..284a7ba 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -385,8 +385,10 @@ u64 cpu_clock(int cpu)
  */
 u64 local_clock(void)
 {
+#ifndef CONFIG_VMWARE
 	if (!sched_clock_stable())
 		return sched_clock_cpu(raw_smp_processor_id());
+#endif
 
 	return sched_clock();
 }
-- 
1.9.1


From 3bd5760c3b1f6cb39568361561d7d1e5440f1109 Mon Sep 17 00:00:00 2001
From: Alexey Makhalov <amakhalov@vmware.com>
Date: Mon, 12 Oct 2015 22:43:38 +0000
Subject: [PATCH 04/12] Move read_boot_clock64 into pv_time_ops.

---
 arch/x86/Kconfig                      | 14 ++++++--
 arch/x86/include/asm/paravirt.h       |  5 +++
 arch/x86/include/asm/paravirt_types.h |  5 +++
 arch/x86/kernel/cpu/vmware.c          | 66 ++++++++++++++++++++---------------
 arch/x86/kernel/head_64.S             |  8 +----
 arch/x86/kernel/paravirt.c            |  7 ++++
 arch/x86/kernel/setup.c               |  9 +++++
 kernel/sched/clock.c                  |  7 +++-
 8 files changed, 83 insertions(+), 38 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ca0be27..d3ef8ef 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -709,13 +709,23 @@ config KVM_DEBUG_FS
 	  may incur significant overhead.
 
 config VMWARE
-	bool "VMware Guest support"
+	bool "VMware guest support"
 	depends on PARAVIRT
 	select PARAVIRT_CLOCK
 	default y
 	---help---
 	  This option enables various optimizations for running under the
-	  VMware hypervisor. It includes a correct boot time measurement.
+	  VMware hypervisor. It includes vmware-clock clocksource and some
+	  pv-ops implementations.
+
+config VMWARE_ONLY
+	bool "Build for VMware only"
+	depends on VMWARE
+	default n
+	---help---
+	  This option enables VMware guest specific optimizations. If you say
+	  yes here, the kernel will probably work only under VMware hypervisor.
+
 
 source "arch/x86/lguest/Kconfig"
 
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index d143bfa..ffcbd18 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -201,6 +201,11 @@ static inline u64 paravirt_steal_clock(int cpu)
 	return PVOP_CALL1(u64, pv_time_ops.steal_clock, cpu);
 }
 
+static inline void paravirt_read_boot_clock64(struct timespec64 *ts)
+{
+	PVOP_VCALL1(pv_time_ops.read_boot_clock64, ts);
+}
+
 static inline unsigned long long paravirt_read_pmc(int counter)
 {
 	return PVOP_CALL1(u64, pv_cpu_ops.read_pmc, counter);
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index a6b8f9f..7adcd55 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -51,6 +51,10 @@ struct mm_struct;
 struct desc_struct;
 struct task_struct;
 struct cpumask;
+#if __BITS_PER_LONG == 64
+# define timespec64 timespec
+#endif
+struct timespec64;
 
 /*
  * Wrapper type for pointers to code which uses the non-standard
@@ -98,6 +102,7 @@ struct pv_time_ops {
 	unsigned long long (*sched_clock)(void);
 	unsigned long long (*steal_clock)(int cpu);
 	unsigned long (*get_tsc_khz)(void);
+	void (*read_boot_clock64)(struct timespec64 *ts);
 };
 
 struct pv_cpu_ops {
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 004825e..1bf1fe3 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -77,8 +77,10 @@ static unsigned long vmware_get_tsc_khz(void)
 	return tsc_hz;
 }
 
+#define CYC2NS_SCALE_FACTOR 8
 static struct cyc2ns_data vmware_cyc2ns;
-extern unsigned long long tsc_at_head;
+u64 __initdata tsc_at_head;
+
 static cycle_t vmware_clock_get_cycles(struct clocksource *cs)
 {
 	return __native_read_tsc();
@@ -92,12 +94,14 @@ static struct clocksource clocksource_vmware = {
 	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
+#ifdef CONFIG_VMWARE_ONLY
+/* We want to use clocksource_vmware from the beginning to avoid drifting in
+   monotonic clock */
 struct clocksource * __init clocksource_default_clock(void)
 {
 	return &clocksource_vmware;
 }
-
-#define CYC2NS_SCALE_FACTOR 8
+#endif
 
 static u64 vmware_sched_clock(void)
 {
@@ -108,12 +112,33 @@ static u64 vmware_sched_clock(void)
 	return ret;
 }
 
-extern __read_mostly int sched_clock_running;
+
+/* Function to read the exact time the system has been started. It will be
+   used as zero time for monotonic clock */
+static void vmware_read_boot_clock64(struct timespec64 *ts)
+{
+	struct timespec64 now;
+	u64 delta, delta_nsec;
+	u32 rem;
+
+	read_persistent_clock64(&now);
+	delta = __native_read_tsc() - vmware_cyc2ns.cyc2ns_offset;
+	delta_nsec = mul_u64_u32_shr(delta, vmware_cyc2ns.cyc2ns_mul,
+					CYC2NS_SCALE_FACTOR);
+	ts->tv_sec = now.tv_sec - div_s64_rem(delta_nsec, NSEC_PER_SEC, &rem);
+	ts->tv_nsec = now.tv_nsec - rem;
+	while (unlikely(ts->tv_nsec < 0)) {
+		ts->tv_sec--;
+		ts->tv_nsec += NSEC_PER_SEC;
+	}
+}
+
 static void __init paravirt_ops_setup(void)
 {
 	pv_info.name = "VMware";
 	pv_cpu_ops.io_delay = paravirt_nop,
 	pv_time_ops.sched_clock = vmware_sched_clock;
+	pv_time_ops.read_boot_clock64 = vmware_read_boot_clock64;
 
 #ifdef CONFIG_X86_IO_APIC
 	no_timer_check = 1;
@@ -122,7 +147,7 @@ static void __init paravirt_ops_setup(void)
 
 static void __init vmware_platform_setup(void)
 {
-	uint64_t cpu_khz;
+	uint64_t vtsc_khz;
 	uint32_t eax, ebx, ecx, edx;
 
 	VMWARE_PORT(GETHZ, eax, ebx, ecx, edx);
@@ -133,18 +158,18 @@ static void __init vmware_platform_setup(void)
 		printk(KERN_WARNING
 		       "Failed to get TSC freq from the hypervisor\n");
 
-	cpu_khz = eax | (((uint64_t)ebx) << 32);
-	do_div(cpu_khz, 1000);
+	vtsc_khz = eax | (((uint64_t)ebx) << 32);
+	do_div(vtsc_khz, 1000);
 	printk(KERN_INFO "Pre Kernel boot time: %dms\n",
-		(unsigned int) (tsc_at_head / cpu_khz));
+		(unsigned int) (tsc_at_head / vtsc_khz));
 
 	vmware_cyc2ns.cyc2ns_mul =
 		DIV_ROUND_CLOSEST(NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR,
-				  cpu_khz);
+				  vtsc_khz);
 	vmware_cyc2ns.cyc2ns_shift = CYC2NS_SCALE_FACTOR;
 	vmware_cyc2ns.cyc2ns_offset = tsc_at_head;
 
-	clocksource_register_khz(&clocksource_vmware, cpu_khz);
+	clocksource_register_khz(&clocksource_vmware, vtsc_khz);
 
 	paravirt_ops_setup();
 }
@@ -156,6 +181,9 @@ static void __init vmware_platform_setup(void)
  */
 static uint32_t __init vmware_platform(void)
 {
+#ifndef CONFIG_VMWARE_ONLY
+	tsc_at_head = __native_read_tsc();
+#endif
 	if (cpu_has_hypervisor) {
 		unsigned int eax;
 		unsigned int hyper_vendor_id[3];
@@ -206,21 +234,3 @@ const __refconst struct hypervisor_x86 x86_hyper_vmware = {
 	.x2apic_available	= vmware_legacy_x2apic_available,
 };
 EXPORT_SYMBOL(x86_hyper_vmware);
-
-void read_boot_clock64(struct timespec64 *ts)
-{
-	struct timespec64 now;
-	u64 delta, delta_nsec;
-	u32 rem;
-
-	read_persistent_clock64(&now);
-	delta = __native_read_tsc() - vmware_cyc2ns.cyc2ns_offset;
-	delta_nsec = mul_u64_u32_shr(delta, vmware_cyc2ns.cyc2ns_mul,
-					CYC2NS_SCALE_FACTOR);
-	ts->tv_sec = now.tv_sec - div_s64_rem(delta_nsec, NSEC_PER_SEC, &rem);
-	ts->tv_nsec = now.tv_nsec - rem;
-	while (unlikely(ts->tv_nsec < 0)) {
-		ts->tv_sec--;
-		ts->tv_nsec += NSEC_PER_SEC;
-	}
-}
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index eccf2d7..1dfd805 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -65,7 +65,7 @@ startup_64:
 	 * tables and then reload them.
 	 */
 
-#ifdef CONFIG_VMWARE
+#ifdef CONFIG_VMWARE_ONLY
 	/*
 	 * Read a TSC value first
 	 */
@@ -522,12 +522,6 @@ early_gdt_descr:
 early_gdt_descr_base:
 	.quad	INIT_PER_CPU_VAR(gdt_page)
 
-#ifdef CONFIG_VMWARE
-	.globl tsc_at_head
-tsc_at_head:
-	.quad 0
-#endif
-
 ENTRY(phys_base)
 	/* This must match the first entry in level2_kernel_pgt */
 	.quad   0x0000000000000000
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 58bcfb6..abf40ec 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -210,6 +210,12 @@ static u64 native_steal_clock(int cpu)
 	return 0;
 }
 
+static void native_read_boot_clock64(struct timespec64 *ts)
+{
+	ts->tv_sec = 0;
+	ts->tv_nsec = 0;
+}
+
 /* These are in entry.S */
 extern void native_iret(void);
 extern void native_irq_enable_sysexit(void);
@@ -320,6 +326,7 @@ struct pv_init_ops pv_init_ops = {
 struct pv_time_ops pv_time_ops = {
 	.sched_clock = native_sched_clock,
 	.steal_clock = native_steal_clock,
+	.read_boot_clock64 = native_read_boot_clock64,
 };
 
 __visible struct pv_irq_ops pv_irq_ops = {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 80f874b..0d7022e 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1289,3 +1289,12 @@ static int __init register_kernel_offset_dumper(void)
 	return 0;
 }
 __initcall(register_kernel_offset_dumper);
+
+/* We need to define a real function for read_boot_clock64, to override the
+   weak default version */
+#ifdef CONFIG_PARAVIRT
+void read_boot_clock64(struct timespec64 *ts)
+{
+	paravirt_read_boot_clock64(ts);
+}
+#endif
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index 284a7ba..615aeb4 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -385,7 +385,12 @@ u64 cpu_clock(int cpu)
  */
 u64 local_clock(void)
 {
-#ifndef CONFIG_VMWARE
+	/*
+	 * sched_clock is stable and running for VMware guest.
+	 * Let's disable this checking. It will allow us to have
+	 * printk timestamps from the beginning
+	 */
+#if !defined(CONFIG_VMWARE_ONLY) || !defined(CONFIG_PRINTK_TIME)
 	if (!sched_clock_stable())
 		return sched_clock_cpu(raw_smp_processor_id());
 #endif
-- 
1.9.1


From aa93eaec3f709633007ab6ce3ddbb8aaa455b557 Mon Sep 17 00:00:00 2001
From: Alexey Makhalov <amakhalov@vmware.com>
Date: Thu, 5 Nov 2015 21:02:52 +0000
Subject: [PATCH 05/12] Fix clocksource_vmware issue in VM version <= 10

---
 arch/x86/kernel/cpu/vmware.c | 48 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 45 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 1bf1fe3..0b89bb9 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -79,7 +79,8 @@ static unsigned long vmware_get_tsc_khz(void)
 
 #define CYC2NS_SCALE_FACTOR 8
 static struct cyc2ns_data vmware_cyc2ns;
-u64 __initdata tsc_at_head;
+uint64_t __initdata tsc_at_head;
+uint64_t __initdata vtsc_khz;
 
 static cycle_t vmware_clock_get_cycles(struct clocksource *cs)
 {
@@ -95,11 +96,45 @@ static struct clocksource clocksource_vmware = {
 };
 
 #ifdef CONFIG_VMWARE_ONLY
+/*
+ * clocksource_vmware_periodic - is a temporary clocksource only for
+ * early boot initialization.
+ * Hack to avoid infinite looping in calibrate_APIC_clock() when
+ * tsc_deadline_timer is not supported by hypervisor (VM version <= 10)
+ * calibrate_APIC_clock() relies on _periodic_ timer!
+ * In that case we do not need to use clocksource that is valid for
+ * hres/oneshot timer.
+ */
+static struct clocksource __initdata clocksource_vmware_periodic = {
+	.name = "vmware-clock-periodic",
+	.read = vmware_clock_get_cycles,
+	.rating = 100,
+	.mask = CLOCKSOURCE_MASK(64),
+};
+
+static struct clocksource __initdata * initial_clocksource;
+
+/*
+ * clocksource_vmware_register
+ *
+ * Time to register real clocksource. It will be activated in
+ * clocksource_done_booting().
+ */
+static int __init clocksource_vmware_register(void)
+{
+	if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
+		clocksource_register_khz(&clocksource_vmware, vtsc_khz);
+		clocksource_unregister(&clocksource_vmware_periodic);
+	}
+	return 0;
+}
+subsys_initcall(clocksource_vmware_register);
+
 /* We want to use clocksource_vmware from the beginning to avoid drifting in
    monotonic clock */
 struct clocksource * __init clocksource_default_clock(void)
 {
-	return &clocksource_vmware;
+	return initial_clocksource;
 }
 #endif
 
@@ -147,7 +182,6 @@ static void __init paravirt_ops_setup(void)
 
 static void __init vmware_platform_setup(void)
 {
-	uint64_t vtsc_khz;
 	uint32_t eax, ebx, ecx, edx;
 
 	VMWARE_PORT(GETHZ, eax, ebx, ecx, edx);
@@ -169,7 +203,15 @@ static void __init vmware_platform_setup(void)
 	vmware_cyc2ns.cyc2ns_shift = CYC2NS_SCALE_FACTOR;
 	vmware_cyc2ns.cyc2ns_offset = tsc_at_head;
 
+#ifdef CONFIG_VMWARE_ONLY
+	if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+		initial_clocksource = &clocksource_vmware_periodic;
+	else
+		initial_clocksource = &clocksource_vmware;
+	clocksource_register_khz(initial_clocksource, vtsc_khz);
+#else
 	clocksource_register_khz(&clocksource_vmware, vtsc_khz);
+#endif
 
 	paravirt_ops_setup();
 }
-- 
1.9.1


From 245c6ff168fabde177b5b6023356b6005b0efbef Mon Sep 17 00:00:00 2001
From: Alexey Makhalov <amakhalov@vmware.com>
Date: Tue, 10 Nov 2015 11:46:57 +0000
Subject: [PATCH 06/12] Get lapic timer frequency from HV, skip calibration

---
 arch/x86/kernel/cpu/vmware.c | 48 +++++---------------------------------------
 1 file changed, 5 insertions(+), 43 deletions(-)

diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 0b89bb9..b16618b 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -96,45 +96,11 @@ static struct clocksource clocksource_vmware = {
 };
 
 #ifdef CONFIG_VMWARE_ONLY
-/*
- * clocksource_vmware_periodic - is a temporary clocksource only for
- * early boot initialization.
- * Hack to avoid infinite looping in calibrate_APIC_clock() when
- * tsc_deadline_timer is not supported by hypervisor (VM version <= 10)
- * calibrate_APIC_clock() relies on _periodic_ timer!
- * In that case we do not need to use clocksource that is valid for
- * hres/oneshot timer.
- */
-static struct clocksource __initdata clocksource_vmware_periodic = {
-	.name = "vmware-clock-periodic",
-	.read = vmware_clock_get_cycles,
-	.rating = 100,
-	.mask = CLOCKSOURCE_MASK(64),
-};
-
-static struct clocksource __initdata * initial_clocksource;
-
-/*
- * clocksource_vmware_register
- *
- * Time to register real clocksource. It will be activated in
- * clocksource_done_booting().
- */
-static int __init clocksource_vmware_register(void)
-{
-	if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
-		clocksource_register_khz(&clocksource_vmware, vtsc_khz);
-		clocksource_unregister(&clocksource_vmware_periodic);
-	}
-	return 0;
-}
-subsys_initcall(clocksource_vmware_register);
-
 /* We want to use clocksource_vmware from the beginning to avoid drifting in
    monotonic clock */
 struct clocksource * __init clocksource_default_clock(void)
 {
-	return initial_clocksource;
+	return &clocksource_vmware;
 }
 #endif
 
@@ -197,21 +163,17 @@ static void __init vmware_platform_setup(void)
 	printk(KERN_INFO "Pre Kernel boot time: %dms\n",
 		(unsigned int) (tsc_at_head / vtsc_khz));
 
+#ifdef CONFIG_X86_LOCAL_APIC
+	/* Skip lapic calibration since we know bus frequency. */
+	lapic_timer_frequency = ecx;
+#endif
 	vmware_cyc2ns.cyc2ns_mul =
 		DIV_ROUND_CLOSEST(NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR,
 				  vtsc_khz);
 	vmware_cyc2ns.cyc2ns_shift = CYC2NS_SCALE_FACTOR;
 	vmware_cyc2ns.cyc2ns_offset = tsc_at_head;
 
-#ifdef CONFIG_VMWARE_ONLY
-	if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
-		initial_clocksource = &clocksource_vmware_periodic;
-	else
-		initial_clocksource = &clocksource_vmware;
-	clocksource_register_khz(initial_clocksource, vtsc_khz);
-#else
 	clocksource_register_khz(&clocksource_vmware, vtsc_khz);
-#endif
 
 	paravirt_ops_setup();
 }
-- 
1.9.1


From 23055114ca27a04044ebbe38853834e0aa869da0 Mon Sep 17 00:00:00 2001
From: Alexey Makhalov <amakhalov@vmware.com>
Date: Tue, 15 Dec 2015 21:31:18 +0000
Subject: [PATCH 07/12] Skip rdrand reseed

---
 arch/x86/kernel/cpu/common.c | 2 ++
 arch/x86/kernel/cpu/rdrand.c | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cb9e5df..5327c74 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -943,7 +943,9 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 #endif
 
 	init_hypervisor(c);
+#ifndef CONFIG_VMWARE_ONLY
 	x86_init_rdrand(c);
+#endif
 	x86_init_cache_qos(c);
 
 	/*
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c
index 136ac74..0685891 100644
--- a/arch/x86/kernel/cpu/rdrand.c
+++ b/arch/x86/kernel/cpu/rdrand.c
@@ -32,6 +32,7 @@ static int __init x86_rdrand_setup(char *s)
 }
 __setup("nordrand", x86_rdrand_setup);
 
+#ifndef CONFIG_VMWARE_ONLY
 /*
  * Force a reseed cycle; we are architecturally guaranteed a reseed
  * after no more than 512 128-bit chunks of random data.  This also
@@ -58,3 +59,4 @@ void x86_init_rdrand(struct cpuinfo_x86 *c)
 		clear_cpu_cap(c, X86_FEATURE_RDRAND);
 #endif
 }
+#endif
-- 
1.9.1


From bd806a16d202bf9dc41fbe3f8e39545e704adf9e Mon Sep 17 00:00:00 2001
From: Alexey Makhalov <amakhalov@vmware.com>
Date: Thu, 3 Dec 2015 00:46:46 +0000
Subject: [PATCH 08/12] STA implementation. first version.

---
 arch/x86/kernel/cpu/vmware.c | 163 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 163 insertions(+)

diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index b16618b..cf1fb64 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -28,6 +28,8 @@
 #include <asm/hypervisor.h>
 #include <asm/timer.h>
 #include <linux/sched.h>
+#include <linux/cpu.h>
+#include <asm/pci_x86.h>
 
 #define CPUID_VMWARE_INFO_LEAF	0x40000000
 #define VMWARE_HYPERVISOR_MAGIC	0x564D5868
@@ -38,6 +40,10 @@
 #define VMWARE_PORT_CMD_GETVCPU_INFO	68
 #define VMWARE_PORT_CMD_LEGACY_X2APIC	3
 #define VMWARE_PORT_CMD_VCPU_RESERVED	31
+#define VMWARE_PORT_CMD_STEALCLOCK	91
+# define CMD_STEALCLOCK_ENABLE		0
+# define CMD_STEALCLOCK_DISABLE		1
+
 
 #define VMWARE_PORT(cmd, eax, ebx, ecx, edx)				\
 	__asm__("inl (%%dx)" :						\
@@ -47,6 +53,34 @@
 			"2"(VMWARE_HYPERVISOR_PORT), "3"(UINT_MAX) :	\
 			"memory");
 
+struct vmware_steal_time {
+	uint64_t clock;	/* stolen time counter in units of vtsc */
+	uint64_t reserved;
+};
+static DEFINE_PER_CPU(struct vmware_steal_time, steal_time) __aligned(64);
+static int has_steal_clock = 0;
+
+static int vmware_cmd_stealclock(int subcmd, uint32_t arg1, uint32_t arg2)
+{
+	uint32_t result, info;
+	__asm__ __volatile__ ("inl (%%dx)"
+		:	"=a" (result),
+			"=c" (info)
+		:       "a"  (VMWARE_HYPERVISOR_MAGIC),
+			"c"  (VMWARE_PORT_CMD_STEALCLOCK),
+			"d"  (VMWARE_HYPERVISOR_PORT),
+			"b"  (subcmd),
+			"S"  (arg1),
+			"D"  (arg2));
+	return result;
+}
+#define STEALCLOCK_ENABLE(pa)					\
+	vmware_cmd_stealclock(CMD_STEALCLOCK_ENABLE,		\
+			      (pa) >> 32, (pa) & 0xffffffff)
+
+#define STEALCLOCK_DISABLE()					\
+	vmware_cmd_stealclock(CMD_STEALCLOCK_DISABLE, 0, 0)
+
 static inline int __vmware_platform(void)
 {
 	uint32_t eax, ebx, ecx, edx;
@@ -134,6 +168,114 @@ static void vmware_read_boot_clock64(struct timespec64 *ts)
 	}
 }
 
+static uint64_t vmware_steal_clock(int cpu)
+{
+	struct vmware_steal_time *steal;
+
+	steal = &per_cpu(steal_time, cpu);
+	return mul_u64_u32_shr(steal->clock, vmware_cyc2ns.cyc2ns_mul,
+			       CYC2NS_SCALE_FACTOR);
+}
+
+static void vmware_register_steal_time(void)
+{
+	int cpu = smp_processor_id();
+	struct vmware_steal_time *st = &per_cpu(steal_time, cpu);
+
+	if (!has_steal_clock)
+		return;
+
+	memset(st, 0, sizeof(*st));
+
+	if (STEALCLOCK_ENABLE(slow_virt_to_phys(st)) != 0) {
+		has_steal_clock = 0;
+		return;
+	}
+
+	pr_info("vmware-stealtime: cpu %d, pa %llx\n",
+		cpu, (unsigned long long) slow_virt_to_phys(st));
+}
+
+void vmware_disable_steal_time(void)
+{
+	if (!has_steal_clock)
+		return;
+
+	STEALCLOCK_DISABLE();
+}
+
+static void vmware_guest_cpu_init(void)
+{
+	if (has_steal_clock)
+		vmware_register_steal_time();
+}
+
+#ifdef CONFIG_SMP
+static void __init vmware_smp_prepare_boot_cpu(void)
+{
+	vmware_guest_cpu_init();
+	native_smp_prepare_boot_cpu();
+}
+
+static void vmware_guest_cpu_online(void *dummy)
+{
+	vmware_guest_cpu_init();
+}
+
+static void vmware_guest_cpu_offline(void *dummy)
+{
+	vmware_disable_steal_time();
+}
+
+static int vmware_cpu_notify(struct notifier_block *self, unsigned long action,
+			  void *hcpu)
+{
+	int cpu = (unsigned long)hcpu;
+	switch (action) {
+	case CPU_ONLINE:
+	case CPU_DOWN_FAILED:
+	case CPU_ONLINE_FROZEN:
+		smp_call_function_single(cpu, vmware_guest_cpu_online,
+			NULL, 0);
+		break;
+	case CPU_DOWN_PREPARE:
+	case CPU_DOWN_PREPARE_FROZEN:
+		smp_call_function_single(cpu, vmware_guest_cpu_offline,
+			NULL, 1);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block vmware_cpu_notifier = {
+        .notifier_call  = vmware_cpu_notify,
+};
+#endif
+
+static int sta_enabled = 1; /* steal time accounting */
+static int parse_vmw_no_sta(char *arg)
+{
+        sta_enabled = 0;
+        return 0;
+}
+
+early_param("vmw-no-sta", parse_vmw_no_sta);
+
+static __init int activate_jump_labels(void)
+{
+	if (has_steal_clock) {
+		static_key_slow_inc(&paravirt_steal_enabled);
+		if (sta_enabled)
+			static_key_slow_inc(&paravirt_steal_rq_enabled);
+	}
+
+	return 0;
+}
+arch_initcall(activate_jump_labels);
+
+
 static void __init paravirt_ops_setup(void)
 {
 	pv_info.name = "VMware";
@@ -141,9 +283,18 @@ static void __init paravirt_ops_setup(void)
 	pv_time_ops.sched_clock = vmware_sched_clock;
 	pv_time_ops.read_boot_clock64 = vmware_read_boot_clock64;
 
+	/*
+	 * TODO: check for STEAL_TIME support
+	 */
+	if (1) {
+		has_steal_clock = 1;
+		pv_time_ops.steal_clock = vmware_steal_clock;
+	}
+
 #ifdef CONFIG_X86_IO_APIC
 	no_timer_check = 1;
 #endif
+
 }
 
 static void __init vmware_platform_setup(void)
@@ -176,6 +327,18 @@ static void __init vmware_platform_setup(void)
 	clocksource_register_khz(&clocksource_vmware, vtsc_khz);
 
 	paravirt_ops_setup();
+
+#ifdef CONFIG_SMP
+	smp_ops.smp_prepare_boot_cpu = vmware_smp_prepare_boot_cpu;
+	register_cpu_notifier(&vmware_cpu_notifier);
+#else
+	vmware_guest_cpu_init();
+#endif
+
+#ifdef CONFIG_PCI
+	/* PCI BIOS service won't work from a PV guest. */
+	pci_probe &= ~PCI_PROBE_BIOS;
+#endif
 }
 
 /*
-- 
1.9.1


From 8496145f4f5fcd430e5d8f493066a8e54aaaf96b Mon Sep 17 00:00:00 2001
From: Alexey Makhalov <amakhalov@vmware.com>
Date: Wed, 13 Jan 2016 22:54:04 +0000
Subject: [PATCH 09/12] STA. updated version

---
 arch/x86/kernel/cpu/vmware.c | 34 ++++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index cf1fb64..196703c 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -41,16 +41,23 @@
 #define VMWARE_PORT_CMD_LEGACY_X2APIC	3
 #define VMWARE_PORT_CMD_VCPU_RESERVED	31
 #define VMWARE_PORT_CMD_STEALCLOCK	91
-# define CMD_STEALCLOCK_ENABLE		0
-# define CMD_STEALCLOCK_DISABLE		1
+# define CMD_STEALCLOCK_STATUS		0
+#  define STEALCLOCK_IS_NOT_AVALIABLE	0
+#  define STEALCLOCK_IS_ENABLED		1
+#  define STEALCLOCK_IS_DISABLED	2
+# define CMD_STEALCLOCK_ENABLE		1
+# define CMD_STEALCLOCK_DISABLE		2
 
 
 #define VMWARE_PORT(cmd, eax, ebx, ecx, edx)				\
+	VMWARE_PORT2(cmd, eax, ebx, ecx, edx, UINT_MAX)
+
+#define VMWARE_PORT2(cmd, eax, ebx, ecx, edx, arg)			\
 	__asm__("inl (%%dx)" :						\
 			"=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) :	\
 			"0"(VMWARE_HYPERVISOR_MAGIC),			\
 			"1"(VMWARE_PORT_CMD_##cmd),			\
-			"2"(VMWARE_HYPERVISOR_PORT), "3"(UINT_MAX) :	\
+			"2"(VMWARE_HYPERVISOR_PORT), "3"(arg) :		\
 			"memory");
 
 struct vmware_steal_time {
@@ -60,6 +67,13 @@ struct vmware_steal_time {
 static DEFINE_PER_CPU(struct vmware_steal_time, steal_time) __aligned(64);
 static int has_steal_clock = 0;
 
+static int vmware_is_stealclock_available(void)
+{
+	uint32_t eax, ebx, ecx, edx;
+	VMWARE_PORT2(STEALCLOCK, eax, ebx, ecx, edx, CMD_STEALCLOCK_STATUS);
+	printk("%s:%d %d %d\n", __FUNCTION__, __LINE__, eax, ebx);
+	return eax == 0 && ebx != STEALCLOCK_IS_NOT_AVALIABLE;
+}
 static int vmware_cmd_stealclock(int subcmd, uint32_t arg1, uint32_t arg2)
 {
 	uint32_t result, info;
@@ -283,10 +297,7 @@ static void __init paravirt_ops_setup(void)
 	pv_time_ops.sched_clock = vmware_sched_clock;
 	pv_time_ops.read_boot_clock64 = vmware_read_boot_clock64;
 
-	/*
-	 * TODO: check for STEAL_TIME support
-	 */
-	if (1) {
+	if (vmware_is_stealclock_available()) {
 		has_steal_clock = 1;
 		pv_time_ops.steal_clock = vmware_steal_clock;
 	}
@@ -328,12 +339,15 @@ static void __init vmware_platform_setup(void)
 
 	paravirt_ops_setup();
 
+	/* vmware_cpu_notifier is used only by STA */
+	if (has_steal_clock) {
 #ifdef CONFIG_SMP
-	smp_ops.smp_prepare_boot_cpu = vmware_smp_prepare_boot_cpu;
-	register_cpu_notifier(&vmware_cpu_notifier);
+		smp_ops.smp_prepare_boot_cpu = vmware_smp_prepare_boot_cpu;
+		register_cpu_notifier(&vmware_cpu_notifier);
 #else
-	vmware_guest_cpu_init();
+		vmware_guest_cpu_init();
 #endif
+	}
 
 #ifdef CONFIG_PCI
 	/* PCI BIOS service won't work from a PV guest. */
-- 
1.9.1


From a8f165433de453994aef84a9d6bf1704a18b3a95 Mon Sep 17 00:00:00 2001
From: Alexey Makhalov <amakhalov@vmware.com>
Date: Tue, 15 Mar 2016 22:29:23 +0000
Subject: [PATCH 10/12] STA: version with a single backdoor command.

---
 arch/x86/kernel/cpu/vmware.c | 35 +++++++++++++++--------------------
 1 file changed, 15 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 196703c..743b8ad 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -41,12 +41,9 @@
 #define VMWARE_PORT_CMD_LEGACY_X2APIC	3
 #define VMWARE_PORT_CMD_VCPU_RESERVED	31
 #define VMWARE_PORT_CMD_STEALCLOCK	91
-# define CMD_STEALCLOCK_STATUS		0
-#  define STEALCLOCK_IS_NOT_AVALIABLE	0
-#  define STEALCLOCK_IS_ENABLED		1
-#  define STEALCLOCK_IS_DISABLED	2
-# define CMD_STEALCLOCK_ENABLE		1
-# define CMD_STEALCLOCK_DISABLE		2
+# define STEALCLOCK_IS_NOT_AVALIABLE	-1
+# define STEALCLOCK_IS_DISABLED		0
+# define STEALCLOCK_IS_ENABLED		1
 
 
 #define VMWARE_PORT(cmd, eax, ebx, ecx, edx)				\
@@ -62,19 +59,12 @@
 
 struct vmware_steal_time {
 	uint64_t clock;	/* stolen time counter in units of vtsc */
-	uint64_t reserved;
+	uint64_t reserved[7];
 };
 static DEFINE_PER_CPU(struct vmware_steal_time, steal_time) __aligned(64);
 static int has_steal_clock = 0;
 
-static int vmware_is_stealclock_available(void)
-{
-	uint32_t eax, ebx, ecx, edx;
-	VMWARE_PORT2(STEALCLOCK, eax, ebx, ecx, edx, CMD_STEALCLOCK_STATUS);
-	printk("%s:%d %d %d\n", __FUNCTION__, __LINE__, eax, ebx);
-	return eax == 0 && ebx != STEALCLOCK_IS_NOT_AVALIABLE;
-}
-static int vmware_cmd_stealclock(int subcmd, uint32_t arg1, uint32_t arg2)
+static int vmware_cmd_stealclock(uint32_t arg1, uint32_t arg2)
 {
 	uint32_t result, info;
 	__asm__ __volatile__ ("inl (%%dx)"
@@ -83,17 +73,22 @@ static int vmware_cmd_stealclock(int subcmd, uint32_t arg1, uint32_t arg2)
 		:       "a"  (VMWARE_HYPERVISOR_MAGIC),
 			"c"  (VMWARE_PORT_CMD_STEALCLOCK),
 			"d"  (VMWARE_HYPERVISOR_PORT),
-			"b"  (subcmd),
+			"b"  (0),
 			"S"  (arg1),
 			"D"  (arg2));
 	return result;
 }
 #define STEALCLOCK_ENABLE(pa)					\
-	vmware_cmd_stealclock(CMD_STEALCLOCK_ENABLE,		\
-			      (pa) >> 32, (pa) & 0xffffffff)
+	(vmware_cmd_stealclock((pa) >> 32, (pa) & 0xffffffff)   \
+			== STEALCLOCK_IS_ENABLED)
 
 #define STEALCLOCK_DISABLE()					\
-	vmware_cmd_stealclock(CMD_STEALCLOCK_DISABLE, 0, 0)
+	vmware_cmd_stealclock(0, 1)
+
+static int vmware_is_stealclock_available(void)
+{
+	return STEALCLOCK_DISABLE() != STEALCLOCK_IS_NOT_AVALIABLE;
+}
 
 static inline int __vmware_platform(void)
 {
@@ -201,7 +196,7 @@ static void vmware_register_steal_time(void)
 
 	memset(st, 0, sizeof(*st));
 
-	if (STEALCLOCK_ENABLE(slow_virt_to_phys(st)) != 0) {
+	if (!STEALCLOCK_ENABLE(slow_virt_to_phys(st))) {
 		has_steal_clock = 0;
 		return;
 	}
-- 
1.9.1


From 27b9b08cf68f55fbfa297eb047f7d1309e0a60cf Mon Sep 17 00:00:00 2001
From: Alexey Makhalov <amakhalov@vmware.com>
Date: Fri, 25 Mar 2016 01:14:17 +0000
Subject: [PATCH 11/12] Remove delays for smpboot

---
 arch/x86/kernel/smpboot.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index b1f3ed9..8f0be52 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -560,7 +560,7 @@ wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip)
 	/*
 	 * Give the other CPU some time to accept the IPI.
 	 */
-	udelay(200);
+//	udelay(200);
 	if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
 		maxlvt = lapic_get_maxlvt();
 		if (maxlvt > 3)			/* Due to the Pentium erratum 3AP.  */
@@ -665,7 +665,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
 		/*
 		 * Give the other CPU some time to accept the IPI.
 		 */
-		udelay(300);
+//		udelay(300);
 
 		pr_debug("Startup point 1\n");
 
@@ -675,7 +675,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
 		/*
 		 * Give the other CPU some time to accept the IPI.
 		 */
-		udelay(200);
+//		udelay(200);
 
 		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP.  */
 			apic_write(APIC_ESR, 0);
-- 
1.9.1


From 3fe2ad9c5031e059849ba0970ccee95ce07f8239 Mon Sep 17 00:00:00 2001
From: Alexey Makhalov <amakhalov@vmware.com>
Date: Tue, 29 Mar 2016 21:14:46 +0000
Subject: [PATCH 12/12] kmsg_dumper to vmware.log

---
 arch/x86/kernel/cpu/vmware.c | 143 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 139 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 743b8ad..e9f7d52 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -30,10 +30,12 @@
 #include <linux/sched.h>
 #include <linux/cpu.h>
 #include <asm/pci_x86.h>
+#include <linux/kmsg_dump.h>
 
-#define CPUID_VMWARE_INFO_LEAF	0x40000000
-#define VMWARE_HYPERVISOR_MAGIC	0x564D5868
-#define VMWARE_HYPERVISOR_PORT	0x5658
+#define CPUID_VMWARE_INFO_LEAF		0x40000000
+#define VMWARE_HYPERVISOR_MAGIC		0x564D5868
+#define VMWARE_HYPERVISOR_PORT		0x5658
+#define VMWARE_HYPERVISOR_HB_PORT	0x5659
 
 #define VMWARE_PORT_CMD_GETVERSION	10
 #define VMWARE_PORT_CMD_GETHZ		45
@@ -44,7 +46,8 @@
 # define STEALCLOCK_IS_NOT_AVALIABLE	-1
 # define STEALCLOCK_IS_DISABLED		0
 # define STEALCLOCK_IS_ENABLED		1
-
+#define VMWARE_PORT_CMD_MESSAGE		30
+#define VMWARE_HB_PORT_CMD_MESSAGE	0
 
 #define VMWARE_PORT(cmd, eax, ebx, ecx, edx)				\
 	VMWARE_PORT2(cmd, eax, ebx, ecx, edx, UINT_MAX)
@@ -303,6 +306,13 @@ static void __init paravirt_ops_setup(void)
 
 }
 
+static void kmsg_dumper_vmware_log(struct kmsg_dumper *dumper,
+				enum kmsg_dump_reason reason);
+
+static struct kmsg_dumper kmsg_dumper = {
+	.dump = kmsg_dumper_vmware_log
+};
+
 static void __init vmware_platform_setup(void)
 {
 	uint32_t eax, ebx, ecx, edx;
@@ -348,6 +358,7 @@ static void __init vmware_platform_setup(void)
 	/* PCI BIOS service won't work from a PV guest. */
 	pci_probe &= ~PCI_PROBE_BIOS;
 #endif
+	kmsg_dump_register(&kmsg_dumper);
 }
 
 /*
@@ -410,3 +421,127 @@ const __refconst struct hypervisor_x86 x86_hyper_vmware = {
 	.x2apic_available	= vmware_legacy_x2apic_available,
 };
 EXPORT_SYMBOL(x86_hyper_vmware);
+
+#define MESSAGE_STATUS_SUCCESS  (0x01 << 16)
+#define MESSAGE_STATUS_CPT      (0x10 << 16)
+#define MESSAGE_STATUS_HB       (0x80 << 16)
+
+#define RPCI_PROTOCOL_NUM       0x49435052 /* 'RPCI' */
+#define GUESTMSG_FLAG_COOKIE    0x80000000
+
+#define MESSAGE_TYPE_OPEN	(0 << 16)
+#define MESSAGE_TYPE_SENDSIZE	(1 << 16)
+#define MESSAGE_TYPE_CLOSE	(6 << 16)
+
+typedef struct {
+	uint32_t id;
+	uint32_t cookieHigh;
+	uint32_t cookieLow;
+} vmw_msg;
+
+static int
+vmware_log_open(vmw_msg *msg) {
+	uint32_t result, info, dx, si, di;
+	__asm__ __volatile__ ("inl (%%dx)"
+		:	"=a" (result),
+			"=c" (info),
+			"=d" (dx),
+			"=S" (si),
+			"=D" (di)
+		:       "a"  (VMWARE_HYPERVISOR_MAGIC),
+			"c"  (VMWARE_PORT_CMD_MESSAGE | MESSAGE_TYPE_OPEN),
+			"d"  (VMWARE_HYPERVISOR_PORT),
+			"b"  (RPCI_PROTOCOL_NUM | GUESTMSG_FLAG_COOKIE));
+
+	if ((info & MESSAGE_STATUS_SUCCESS) == 0)
+		return 1;
+
+	msg->id = dx & 0xffff0000;
+	msg->cookieHigh = si;
+	msg->cookieLow = di;
+	return 0;
+}
+
+static int
+vmware_log_close(vmw_msg *msg) {
+	uint32_t result, info;
+	__asm__ __volatile__ ("inl (%%dx)"
+		:	"=a" (result),
+			"=c" (info)
+		:       "a"  (VMWARE_HYPERVISOR_MAGIC),
+			"c"  (VMWARE_PORT_CMD_MESSAGE | MESSAGE_TYPE_CLOSE),
+			"d"  (VMWARE_HYPERVISOR_PORT | msg->id),
+			"b"  (0),
+			"S"  (msg->cookieHigh),
+			"D"  (msg->cookieLow));
+
+	if ((info & MESSAGE_STATUS_SUCCESS) == 0)
+		return 1;
+	return 0;
+}
+
+static int
+vmware_log_send(vmw_msg *msg, const char *string) {
+	uint32_t result, info;
+	uint32_t len = strlen(string);
+
+retry:
+	__asm__ __volatile__ ("inl (%%dx)"
+		:	"=a" (result),
+			"=c" (info)
+		:       "a"  (VMWARE_HYPERVISOR_MAGIC),
+			"c"  (VMWARE_PORT_CMD_MESSAGE | MESSAGE_TYPE_SENDSIZE),
+			"d"  (VMWARE_HYPERVISOR_PORT | msg->id),
+			"b"  (len),
+			"S"  (msg->cookieHigh),
+			"D"  (msg->cookieLow));
+
+	if ((info & MESSAGE_STATUS_SUCCESS) == 0 ||
+	    (info & MESSAGE_STATUS_HB) == 0)
+		/* Expected success + high-bandwidth. Give up. */
+		return 1;
+
+	__asm__ __volatile__ ("pushq %%rbp\n\t"
+			      "movl %[rbp], %%ebp\n\t"
+			      "cld\n\t"
+			      "rep; outsb\n\t"
+			      "popq %%rbp\n\t"
+		:	"=a" (result),
+			"=b" (info)
+		:       "a"  (VMWARE_HYPERVISOR_MAGIC),
+			"c"  (len),
+			"d"  (VMWARE_HYPERVISOR_HB_PORT | msg->id),
+			"b"  (VMWARE_HB_PORT_CMD_MESSAGE | MESSAGE_STATUS_SUCCESS),
+			"S"  (string),
+		  [rbp] "r"  (msg->cookieHigh),
+			"D"  (msg->cookieLow));
+
+	if ((info & MESSAGE_STATUS_SUCCESS) == 0) {
+		if (info & MESSAGE_STATUS_CPT)
+			/* A checkpoint occurred. Retry. */
+			goto retry;
+		return 1;
+	}
+	return 0;
+}
+
+static void kmsg_dumper_vmware_log(struct kmsg_dumper *dumper,
+				enum kmsg_dump_reason reason)
+{
+	vmw_msg msg;
+	static char line[1024];
+	size_t len = 0;
+
+	line[0] = 'l';
+	line[1] = 'o';
+	line[2] = 'g';
+	line[3] = ' ';
+
+	while (kmsg_dump_get_line(dumper, true, line + 4, sizeof(line) - 4, &len)) {
+		line[len + 4] = '\0';
+		if (vmware_log_open(&msg) ||
+		    vmware_log_send(&msg, line) ||
+		    vmware_log_close(&msg))
+			break;
+	}
+}
-- 
1.9.1