RickStrahl / Westwind.HtmlPackager

A small utility class used to package HTML content into a self contained HTML document both as a single file, or a folder with all dependencies copied to local.
35 stars 9 forks source link

FavIcon(s) are not supported #7

Closed StefH closed 1 year ago

StefH commented 2 years ago

A link like this is not inlined.

<link rel="shortcut icon" href="/images/favicon.png">

Possible code fix could be:

private static void ProcessImages(HtmlDocument doc, Uri baseUri)
    {
        // https://www.w3schools.com/tags/tag_img.asp
        var imageNodes = doc.DocumentNode.SelectNodes("//img");
        if (imageNodes == null || imageNodes.Count < 1)
        {
            return;
        }

        var images = imageNodes.Select(node => new { attr = "src", node }).ToList();

        // https://www.w3schools.com/tags/att_link_rel.asp
        var favIconNodes = doc.DocumentNode.SelectNodes("//link");
        if (favIconNodes != null)
        {
            foreach (var favIconNode in favIconNodes)
            {
                var url = favIconNode.Attributes["href"]?.Value;
                var rel = favIconNode.Attributes["rel"]?.Value;

                if (url != null && rel.Contains("icon", StringComparison.OrdinalIgnoreCase))
                {
                    images.Add(new { attr = "href", node = favIconNode });
                }
            }
        }

        foreach (var image in images)
        {
            var url = image.node.Attributes[image.attr]?.Value;
            if (url == null)
            {
                continue;
            }

            byte[] imageData;
            string contentType;

            if (url.StartsWith("http"))
            {
                using var http = new WebClient();
                imageData = http.DownloadData(url);
                contentType = http.ResponseHeaders[HttpResponseHeader.ContentType];
            }
            else if (url.StartsWith("file:///"))
            {
                url = url.Substring(8);

                try
                {
                    imageData = File.ReadAllBytes(url);
                    contentType = GetMimeTypeFromUrl(url);
                }
                catch
                {
                    continue;
                }
            }
            else // Relative Path
            {
                try
                {
                    var origUri = Append(baseUri, url);
                    url = origUri.AbsoluteUri;

                    if (url.StartsWith("http") && url.Contains("://"))
                    {
                        using var http = new WebClient();
                        imageData = http.DownloadData(url);
                    }
                    else
                    {
                        imageData = File.ReadAllBytes(WebUtility.UrlDecode(url.Replace("file:///", "")));
                    }

                    contentType = GetMimeTypeFromUrl(url);
                }
                catch
                {
                    continue;
                }
            }

            if (imageData == null)
            {
                continue;
            }

            // Only replace the node.Name for a real image, not for an icon.
            if (image.attr == "src")
            {
                image.node.Name = "img";
            }

            var data = $"data:{contentType};base64,{Convert.ToBase64String(imageData)}";
            image.node.Attributes[image.attr].Value = data;
        }
    }
RickStrahl commented 1 year ago

Late but added. It needs to be src for the added node, but yes this works...