HTTPArchive / custom-metrics

Custom metrics to use with WebPageTest agents
Apache License 2.0
19 stars 22 forks source link

Robots meta 2022 #24

Closed jroakes closed 2 years ago

jroakes commented 2 years ago

Progress on https://github.com/HTTPArchive/almanac.httparchive.org/issues/2888

Updated robots_meta custom metric. Includes an additional feature this year to 1) Include indexifembedded and 2) Parses iFrame meta information.

The following tests have been done:

// https://www.boostability.com/content/the-affect-of-iframes-on-seo

"robots_meta": {
        "main_frame_robots_rendered": {
            "robots": {
                "noindex": false,
                "index": true,
                "follow": true,
                "none": false,
                "nofollow": false,
                "noarchive": false,
                "nosnippet": false,
                "unavailable_after": false,
                "max-snippet": true,
                "max-image-preview": true,
                "max-video-preview": true,
                "notranslate": false,
                "noimageindex": false,
                "nocache": false,
                "indexifembedded": false
            }
        },
        "main_frame_robots_raw": {
            "robots": {
                "noindex": false,
                "index": true,
                "follow": true,
                "none": false,
                "nofollow": false,
                "noarchive": false,
                "nosnippet": false,
                "unavailable_after": false,
                "max-snippet": true,
                "max-image-preview": true,
                "max-video-preview": true,
                "notranslate": false,
                "noimageindex": false,
                "nocache": false,
                "indexifembedded": false
            }
        },
        "main_frame_robots_headers": [],
        "iframe_robots_raw": {
            "google": {
                "noindex": 0,
                "index": 0,
                "follow": 0,
                "none": 0,
                "nofollow": 0,
                "noarchive": 0,
                "nosnippet": 0,
                "unavailable_after": 0,
                "max-snippet": 0,
                "max-image-preview": 0,
                "max-video-preview": 0,
                "notranslate": 1,
                "noimageindex": 0,
                "nocache": 0,
                "indexifembedded": 0
            }
        },
        "iframe_robots_headers": [],
        "main_frame_crawl_data": {
            "rendered": {
                "robots": {
                    "indexable": true,
                    "followable": true
                }
            },
            "raw": {
                "robots": {
                    "indexable": true,
                    "followable": true
                }
            },
            "headers": [],
            "all": {
                "robots": {
                    "indexable": true,
                    "followable": true
                }
            }
        }
    }
// https://patrickstox.com/
"robots_meta": {
        "main_frame_robots_rendered": {
            "googlebot": {
                "noindex": false,
                "index": false,
                "follow": false,
                "none": false,
                "nofollow": false,
                "noarchive": false,
                "nosnippet": true,
                "unavailable_after": false,
                "max-snippet": false,
                "max-image-preview": false,
                "max-video-preview": false,
                "notranslate": false,
                "noimageindex": false,
                "nocache": false,
                "indexifembedded": false
            }
        },
        "main_frame_robots_raw": {
            "googlebot": {
                "noindex": false,
                "index": false,
                "follow": false,
                "none": false,
                "nofollow": false,
                "noarchive": false,
                "nosnippet": true,
                "unavailable_after": false,
                "max-snippet": false,
                "max-image-preview": false,
                "max-video-preview": false,
                "notranslate": false,
                "noimageindex": false,
                "nocache": false,
                "indexifembedded": false
            }
        },
        "main_frame_robots_headers": [],
        "iframe_robots_raw": {
            "robots": {
                "noindex": 3,
                "index": 0,
                "follow": 0,
                "none": 0,
                "nofollow": 0,
                "noarchive": 0,
                "nosnippet": 0,
                "unavailable_after": 0,
                "max-snippet": 0,
                "max-image-preview": 0,
                "max-video-preview": 0,
                "notranslate": 0,
                "noimageindex": 0,
                "nocache": 0,
                "indexifembedded": 0
            }
        },
        "iframe_robots_headers": [],
        "main_frame_crawl_data": {
            "rendered": {
                "googlebot": {
                    "indexable": true,
                    "followable": true
                }
            },
            "raw": {
                "googlebot": {
                    "indexable": true,
                    "followable": true
                }
            },
            "headers": [],
            "all": {
                "googlebot": {
                    "indexable": true,
                    "followable": true
                }
            }
        }
    }
https://soundcloud.com/
"robots_meta": {
                    "main_frame_robots_rendered": [],
                    "main_frame_robots_raw": [],
                    "main_frame_robots_headers": [],
                    "iframe_robots_raw": [],
                    "iframe_robots_headers": [],
                    "main_frame_crawl_data": {
                        "rendered": [],
                        "raw": [],
                        "headers": [],
                        "all": []
                    }
                }
https://www.ibm.com/watson
"robots_meta": {
        "main_frame_robots_rendered": {
            "robots": {
                "noindex": false,
                "index": true,
                "follow": true,
                "none": false,
                "nofollow": false,
                "noarchive": false,
                "nosnippet": false,
                "unavailable_after": false,
                "max-snippet": false,
                "max-image-preview": false,
                "max-video-preview": false,
                "notranslate": false,
                "noimageindex": false,
                "nocache": false,
                "indexifembedded": false
            }
        },
        "main_frame_robots_raw": {
            "robots": {
                "noindex": false,
                "index": true,
                "follow": true,
                "none": false,
                "nofollow": false,
                "noarchive": false,
                "nosnippet": false,
                "unavailable_after": false,
                "max-snippet": false,
                "max-image-preview": false,
                "max-video-preview": false,
                "notranslate": false,
                "noimageindex": false,
                "nocache": false,
                "indexifembedded": false
            }
        },
        "main_frame_robots_headers": [],
        "iframe_robots_raw": [],
        "iframe_robots_headers": [],
        "main_frame_crawl_data": {
            "rendered": {
                "robots": {
                    "indexable": true,
                    "followable": true
                }
            },
            "raw": {
                "robots": {
                    "indexable": true,
                    "followable": true
                }
            },
            "headers": [],
            "all": {
                "robots": {
                    "indexable": true,
                    "followable": true
                }
            }
        }
    }