vmware / pyvmomi-community-samples

A place for community contributed samples for the pyVmomi library.
Apache License 2.0
1.01k stars 922 forks source link

Vcenter inaccurate measurements of VMs #708

Open DaemonCypher opened 2 years ago

DaemonCypher commented 2 years ago

Describe the bug

I am merging two examples from the community example vm_perf_example.py and vminfo_quick.py to create a measurement on the status and health of the VM, but when I run the code I getting numbers that are not possible for a VM. I did notice that sometimes when the cpu.usage.average is well above 100 it is because of the number of virtual cpus tied to each machine so average is 400 and virtual cpu is 8 than the actual average is 50 or 50%. Even with this calculation the numbers on VMs are still too far off from actual numbers.

http://vijava.sourceforge.net/vSphereAPIDoc/ver5/ReferenceGuide/vim.VirtualMachine.html https://communities.vmware.com/t5/Storage-Performance/vCenter-Performance-Counters/ta-p/2790328

code -->

!/usr/bin/env python3

from future import annotations

from typing import Optional import sys from webbrowser import get from pyVmomi import vim, vmodl # pylint: disable=no-name-in-module from pyVim.connect import SmartConnect from pyVim.task import WaitForTask from tools import cli, service_instance, pchelper import time

def connect(): try: si = SmartConnect(host="blank", user="blank", pwd="blank", disableSslCertValidation=True) return si

except IOError as io_error:
    print(io_error)

return None

def createPropDict(): si = connect()

List of properties from the VM you want to retrieve

# See: http://goo.gl/fjTEpW
# for all properties.
vm_properties = ["name", "config.uuid","guest.guestState",
                "guestHeartbeatStatus","config.hardware.numCPU",
                "config.hardware.memoryMB","parent"]
view = pchelper.get_container_view(si, obj_type=[vim.VirtualMachine])
vm_data = pchelper.collect_properties(si,
                                      view_ref=view,
                                      obj_type=vim.VirtualMachine,
                                      path_set=vm_properties,
                                      include_mors=True)
vm_dict={}
# removing indexes from the dictionary
for vm in vm_data:
    vm_dict[vm['name']]=vm
return vm_dict

TODO there is a bug where the information being pulled from VM's

is inaccurate i.e. VM#1 200% cpu usage but only 1 core and the same

can be said with the amount of memory the VM has

def infoDump():

si=connect()
content = si.RetrieveContent()
perf_manager = content.perfManager
# create a mapping from performance stats to their counterIDs
# counterInfo: [performance stat => counterId]
# performance stat example: cpu.usagemhz.LATEST
# counterId example: 6
counter_info = {}
for counter in perf_manager.perfCounter:
    full_name = counter.groupInfo.key + "." + \
                counter.nameInfo.key + "." + counter.rollupType
    counter_info[full_name] = counter.key
# create a list of vim.VirtualMachine objects so
# that we can query them for statistics
container = content.rootFolder
view_type = [vim.VirtualMachine]
recursive = True

container_view = content.viewManager.CreateContainerView(container, view_type, recursive)
children = container_view.view
# Loop through all the VMs
for child in children:
    # Get all available metric IDs for this VM
    # refer to more indepth info 
    # https://communities.vmware.com/t5/Storage-Performance/vCenter-Performance-Counters/ta-p/2790328
    counter_ids = [2,24,98]
    # Using the IDs form a list of MetricId
    # objects for building the Query Spec
    metric_ids = [vim.PerformanceManager.MetricId(
        counterId=counter, instance="*") for counter in counter_ids]
    # Build the specification to be used
    # for querying the performance manager
    spec = vim.PerformanceManager.QuerySpec(maxSample=1,
                                            entity=child,
                                            metricId=metric_ids)
    # Query the performance manager
    # based on the metrics created above
    result_stats = perf_manager.QueryStats(querySpec=[spec])
    # Loop through the results and print the output
    output = ""
    vm_prop_dict=createPropDict()
    for _ in result_stats:
        vm_prop=vm_prop_dict[child.summary.config.name]
        output += "-" * 70 + "\n"
        output += "Name:                      {0}".format(child.summary.config.name) + "\n"
        output += "BIOS UUID:                 {0}".format(vm_prop["config.uuid"]) + "\n"
        output += "Guest PowerState:          {0}".format(vm_prop["guest.guestState"]) + "\n"
        output += "CPUs:                      {0}".format(vm_prop["config.hardware.numCPU"]) + "\n"
        output += "Health:                    {0}".format(vm_prop["guestHeartbeatStatus"]) + "\n"
        output += "Memory(MB):                {0}".format(vm_prop["config.hardware.memoryMB"]) + "\n"
        if vm_prop["guestHeartbeatStatus"] == "gray":
            output += "gray - VMware Tools are not installed or not running." + "\n"
        elif vm_prop["guestHeartbeatStatus"] == "red":
            output += "red - No heartbeat. Guest operating system may have stopped responding." + "\n"
        elif vm_prop["guestHeartbeatStatus"] == "yellow":
            output += "yellow - Intermittent heartbeat. May be due to guest load." + "\n"
        elif vm_prop["guestHeartbeatStatus"] == "green":
            output += "green - Guest operating system is responding normally." + "\n"
        else:
            output += "ERROR: No heart beat status found" + "\n"
        for val in result_stats[0].value:
            if sys.version_info[0] > 2:
                counterinfo_k_to_v = list(counter_info.keys())[
                    list(counter_info.values()).index(val.id.counterId)]
            # python2
            else:
                counterinfo_k_to_v = counter_info.keys()[
                    counter_info.values().index(val.id.counterId)]
            if val.id.instance == '':
                if counterinfo_k_to_v == "cpu.usage.average":
                    true_cpu_usage = val.value[0] / vm_prop["config.hardware.numCPU"]
                    true_cpu_usage = round(true_cpu_usage,6)
                    output += "CPU Usage:                 {0}%".format(true_cpu_usage) + "\n"                     
                elif counterinfo_k_to_v =="mem.usage.average":
                    true_mem_usage = vm_prop["config.hardware.memoryMB"] / val.value[0]
                    output += "Memory Usage:              {0}%".format(true_mem_usage) + "\n"         
                else:
                    output += "ERROR: Nothing Found"
                output += "%s: %s\n" % (
                    counterinfo_k_to_v, str(val.value[0]))
            else:
                if counterinfo_k_to_v == "cpu.usage.average":
                    true_cpu_usage = val.value[0] / vm_prop["config.hardware.numCPU"]
                    true_cpu_usage = round(true_cpu_usage,6)
                    output += "CPU Usage:                 {0}%".format(true_cpu_usage) + "\n"                     
                elif counterinfo_k_to_v =="mem.usage.average":
                    true_mem_usage = vm_prop["config.hardware.memoryMB"] / val.value[0]
                    output += "Memory Usage:              {0}%".format(true_mem_usage) + "\n"         
                else:
                    output += "ERROR: Nothing Found"
                output += "%s (%s): %s\n" % (
                    counterinfo_k_to_v, val.id.instance, str(val.value[0]))

    print(output)

Name: Blank BIOS UUID: Blank Guest PowerState: running CPUs: 2 Health: green Memory(MB): 2048 green - Guest operating system is responding normally. CPU Usage: 59.0% cpu.usage.average: 118 Memory Usage: 0.6694998365478915% mem.usage.average: 3059


Name: Blank BIOS UUID: Blank Guest PowerState: running CPUs: 2 Health: green Memory(MB): 32768 green - Guest operating system is responding normally. CPU Usage: 306.0% cpu.usage.average: 612 Memory Usage: 12.972288202692003% mem.usage.average: 2526

Reproduction steps

blank

Expected behavior

Blank

Additional context

No response