Open mgifford opened 1 year ago
Hi @mgifford yes I think that is correct, we used to have cvs support but it seems to been removed long time ago, sorry that seems to be missing in the changelog.
I think the problem is how to make it configurable which metrics to save in the csv file? We have many many metrics and you probably do not want them all in file, and we do not have a good pattern to choose metrics yet. Building a custom plugin would be easy, I can help guide you, there you could cherry pick the metrics you are interested in.
For keeping track of metrics over time I use Graphite and Grafana.
Seems a bit backwards, but here's a Python script that will generate a CSV from the detailed.html page.
It's working for now, but seems like a not very direct path.
import os
import csv
from bs4 import BeautifulSoup # Import BeautifulSoup for HTML parsing
from datetime import datetime # Import datetime module
# Define the output CSV file
output_csv = "output.csv"
# Define the array of values
header_array = ["Scan", "Count", "Date", "Date/Time", "Site", "Pages", "Coach score", "Coach performance score",
"Privacy score", "Best Practice score", "Image requests", "CSS requests", "Javascript requests",
"Font requests", "Total requests", "Image size", "HTML size", "CSS size", "Javascript size",
"Font size", "Total size", "First Paint", "Fully Loaded", "Largest Contentful Paint", "First Contentful Paint",
"First Paint", "backEndTime", "domContentLoadedTime", "domInteractiveTime", "domainLookupTime", "frontEndTime",
"pageDownloadTime", "pageLoadTime", "redirectionTime", "serverConnectionTime", "serverResponseTime",
"Cumulative Layout Shift", "First Visual Change", "Speed Index", "Visual Complete 85%", "Visual Complete 95%",
"Visual Complete 99%", "Last Visual Change", "CPU Long Tasks", "CPU Long Tasks total duration",
"Total Blocking Time", "Max Potential First Input Delay", "Axe Critical Violations", "Axe Serious Violations",
"Axe Minor Violations", "Axe Moderate Violations", "Total CO2", "CO2 per page view", "CO2 First Party", "CO2 Third Party"]
# Create the header string
header_string = ",".join(header_array)
# Create or overwrite the CSV file with header row
with open(output_csv, 'w', newline='') as csvfile:
csvwriter = csv.writer(csvfile)
csvwriter.writerow(header_string.split(','))
# Initialize a counter variable
counter = 0
# Initialize a dictionary to count sub-directories for each dir2
subdir_count = {}
# Search for detailed.html files and process them
for root, dirs, files in os.walk(".", topdown=True):
for filename in files:
if filename == "detailed.html":
html_file = os.path.join(root, filename)
try:
# Attempt to read and parse the HTML file
with open(html_file, 'r') as hfile:
h2 = hfile.read()
soup = BeautifulSoup(h2, "html.parser") # Parse HTML content
# Check if the HTML file contains valid HTML content
if not soup:
print(f"Invalid HTML in {html_file}, skipping.")
continue # Skip processing invalid HTML
# Extract directory information
dir1a = os.path.dirname(html_file)
dir1 = os.path.basename(dir1a) # Extract the last part of the directory path
dir2 = os.path.basename(os.path.dirname(dir1a)) # Extract the domain directory name
# Count sub-directories for each dir2
subdir_count[dir2] = subdir_count.get(dir2, 0) + 1
print("Directories: ", dir1, dir2, subdir_count[dir2])
# Extract h2 using pup and then extract the number of pages
page_count = soup.find('h2').text.split()[0] if soup.find('h2') else ''
print("Page Count: ", page_count)
# Extract values from the table using BeautifulSoup
table = soup.find('table', id='detailed')
if table:
values1_dict = {}
values4_dict = {}
for row in table.find_all('tr'):
columns = row.find_all('td')
if len(columns) >= 4:
# values1_dict[columns[0].text.strip()] = columns[3].text.strip()
# values4_dict[columns[3].text.strip()] = columns[3].text.strip()
header_text = columns[0].text.strip()
value_text = columns[3].text.strip()
# Handle the case where the value_text contains 's' suffix
if ' s' in value_text:
try:
# Try converting to float, assuming 's' suffix
value_float = float(value_text.replace(' s', '').strip())
# Convert seconds to seconds
value_text = str(value_float)
except ValueError:
pass
# Handle the case where the value_text contains 's' suffix
elif ' ms' in value_text:
try:
# Try converting to float, assuming 's' suffix
value_float = float(value_text.replace(' ms', '').strip())
# Convert seconds to seconds
value_text = str(value_float / 1000)
except ValueError:
pass
# Handle the case where the value_text contains 'b'
elif ' b' in value_text:
try:
# Try converting to float, assuming 'b' suffix
value_float = float(value_text.replace(' b', '').strip())
# Convert seconds to KB
value_text = str(value_float / 1024)
except ValueError:
pass
# Handle the case where the value_text contains 'B'
elif ' B' in value_text:
try:
# Try converting to float, assuming 'B' suffix
value_float = float(value_text.replace(' B', '').strip())
# Convert seconds to KB
value_text = str(value_float / 1024)
except ValueError:
pass
elif ' KB' in value_text:
try:
# Try converting to float, assuming 'KB' suffix
value_float = float(value_text.replace(' KB', '').strip())
# Convert seconds to KB
value_text = str(value_float)
except ValueError:
pass
# Handle the case where the value_text contains 'MB' suffix
elif ' MB' in value_text:
try:
# Try converting to float, assuming 'MB' suffix
value_float = float(value_text.replace(' MB', '').strip())
# Convert seconds to KB
value_text = str(value_float * 1000)
except ValueError:
pass
values1_dict[header_text] = value_text
values4_dict[value_text] = value_text
# Convert string to a datetime object
date_obj = datetime.strptime(dir1[:19], "%Y-%m-%d-%H-%M-%S")
# Prepare the data based on header_array
data = [counter, subdir_count[dir2], dir1[:10], date_obj.isoformat(), dir2, page_count]
data.extend([values1_dict.get(header, "") for header in header_array[6:]])
data.extend([values4_dict.get(header, "") for header in header_array[6:]])
# Add a counter
counter += 1
# Write the data to the CSV file
with open(output_csv, 'a', newline='') as csvfile:
csvwriter = csv.writer(csvfile)
csvwriter.writerow(data)
except Exception as e:
print(f"Error processing file {html_file}: {e}")
print("CSV file generated: ", output_csv)
Have you read the documentation?
URL
https://www.whitehouse.gov/
What are you trying to accomplish
I want to have an CSV of the errors so that it is easier to compare over time. I also would like to be able to share the sheets in a way that allow me to not send a 200MB file. I just want a nice light way to share a .csv with teams so that they can see:
There might be others that are useful, but sharing / storing the whole report sucks down a lot of GB of disk space over time.
It looks like it was added here https://www.sitespeed.io/release-notes/1.8.2/
What browser did you use?
Firefox
How to reproduce
Log output