Open ArneTR opened 2 months ago
This is weird as this should have been fixed. Disk is not needed and should be ignored.
I can see that you tried to introduce some optional validation here: https://github.com/green-coding-solutions/green-metrics-tool/commit/edf3c6e8efab068c36c28cd3d8c1dc688cc3ded6
But still you are setting the Pydantic object as the main validator for the route: async def hog_add(measurements: List[HogMeasurement]):
So to my understanding instead of making some checks optional you add some checks on top.
I think the validate_measurement_data() can be simplified as many checks can be moved to the Pydantic object. Then disk must be made optional in https://github.com/green-coding-solutions/green-metrics-tool/blob/3caa354a9b521f953ea8702235e7472ce18701f0/api/object_specifications.py#L46C3-L56C30
I implemented a second validator as it seemed cleaner than describing the whole object in pydantic which I find quite confusing for this case. But I get the argument that we should use one system.
from pydantic import BaseModel, Field, validator, ValidationError
from typing import List, Dict, Optional, Union
# Define Coalition model
class Coalition(BaseModel):
name: str
tasks: List[Dict]
energy_impact_per_s: float
cputime_ms_per_s: float
diskio_bytesread: int
diskio_byteswritten: int
intr_wakeups: int
idle_wakeups: int
# Custom validator to ensure 'tasks' is a list
@validator('tasks')
def validate_tasks(cls, v):
if not isinstance(v, list):
raise ValueError("Expected 'tasks' to be a list")
return v
# Define Processor model
class Processor(BaseModel):
combined_power: Optional[float] = None
cpu_energy: Optional[float] = None
gpu_energy: Optional[float] = None
ane_energy: Optional[float] = None
package_joules: Optional[float] = None
cpu_joules: Optional[float] = None
igpu_watts: Optional[float] = None
# Custom validator to check for required fields based on processor type
@validator('combined_power', 'cpu_energy', 'gpu_energy', pre=True, always=True)
def validate_processor_fields(cls, v, values, field):
if 'ane_energy' in values:
required_fields = ['combined_power', 'cpu_energy', 'gpu_energy', 'ane_energy']
elif 'package_joules' in values:
required_fields = ['package_joules', 'cpu_joules', 'igpu_watts']
else:
raise ValueError("Unknown processor type")
for req_field in required_fields:
if req_field not in values:
raise ValueError(f"Missing required processor field: {req_field}")
return v
# Define GPU model
class GPU(BaseModel):
gpu_usage: float
memory_usage: float
# Define the main Measurement model
class Measurement(BaseModel):
is_delta: bool
elapsed_ns: int
timestamp: int
coalitions: List[Coalition]
all_tasks: Dict
network: Optional[Dict] = None # Network is optional
disk: Optional[Dict] = None # Disk is optional
interrupts: List[Dict]
processor: Processor
thermal_pressure: str
sfi: Dict
gpu: Optional[GPU] = None # GPU is optional
# Custom validation for 'all_tasks'
@validator('all_tasks')
def validate_all_tasks(cls, v):
if 'energy_impact_per_s' not in v:
raise ValueError("Missing 'energy_impact_per_s' in 'all_tasks'")
return v
# Validate coalitions to ensure correct structure
@validator('coalitions')
def validate_coalitions(cls, v):
if not isinstance(v, list):
raise ValueError("Expected 'coalitions' to be a list")
return v
# Custom validation for the top-level required fields
@validator('thermal_pressure')
def validate_thermal_pressure(cls, v):
if not v:
raise ValueError("Missing 'thermal_pressure'")
return v
# Example usage
try:
data = {
"is_delta": True,
"elapsed_ns": 123456789,
"timestamp": 1627543200,
"coalitions": [
{
"name": "coalition1",
"tasks": [{}],
"energy_impact_per_s": 0.5,
"cputime_ms_per_s": 100,
"diskio_bytesread": 1024,
"diskio_byteswritten": 2048,
"intr_wakeups": 10,
"idle_wakeups": 5
}
],
"all_tasks": {
"energy_impact_per_s": 0.2
},
"processor": {
"combined_power": 10.0,
"cpu_energy": 5.0,
"gpu_energy": 3.0,
"ane_energy": 2.0
},
"thermal_pressure": "medium",
"sfi": {},
"interrupts": [{}]
}
measurement = Measurement(**data)
print(measurement)
except ValidationError as e:
print(e)
The JSON sent from PowerHog is expected to have a
disk
key. However sometimes it is missing.Example error:
@ribalba I sent you the JSON separately via email because it contains sensitive content.
For now I excluded this error from triggering. Unsure if that is a viable solution. See commit here: https://github.com/green-coding-solutions/green-metrics-tool/pull/897/commits/a4c29aaa9e94257de72ba7289249543cb24a2ba0