posit-dev / great-tables

Make awesome display tables using Python.
https://posit-dev.github.io/great-tables/
MIT License
1.86k stars 67 forks source link

Include SVG as available output format for saving table to file #494

Open claysmyth opened 2 weeks ago

claysmyth commented 2 weeks ago

Prework

Proposal

The ability to save tables to SVG file format would be very useful for including GreatTables into composite scientific figures! Albeit, I am ignorant on how large of an undertaking this may be.

MarekOzana commented 4 days ago

SVG output would be great! I am using Typst and it cannot import pdfs only svg.

claysmyth commented 3 days ago

I added the following the _save_screenshot in _export.py that does the trick. Kluge fix, so not creating a pull request.

if path.endswith(".svg"):
        # Get the table HTML content
        table_html = driver.find_element(by=By.TAG_NAME, value="table").get_attribute('outerHTML')

        # # Import svg converter
        # _try_import(name="svgwrite", pip_install_line="pip install svgwrite")
        # import svgwrite

        # # Create SVG drawing
        # dwg = svgwrite.Drawing(path, size=(f"{required_width}px", f"{required_height}px"))

        # # Create foreignObject element with proper namespace
        # foreign = dwg.add(dwg.g().add(svgwrite.container.SVG(
        #     insert=(0, 0),
        #     size=(required_width, required_height)
        # )))
        # foreign.set_desc(title='Table HTML', desc=table_html)

        # # Save the SVG
        # dwg.save()
        # 
        #
        # Get any styles from the head
        # Get any styles from the head
        styles = driver.execute_script("""
            var styles = '';
            // Get stylesheet rules
            var styleSheets = document.styleSheets;
            for(var i = 0; i < styleSheets.length; i++) {
                try {
                    var rules = styleSheets[i].cssRules;
                    for(var j = 0; j < rules.length; j++) {
                        styles += rules[j].cssText + '\\n';
                    }
                } catch (e) {
                    console.log('Error reading stylesheet:', e);
                }
            }

            // Get computed styles for each element
            var table = document.getElementsByTagName('table')[0];
            var elements = table.getElementsByTagName('*');
            var computedStyles = {};

            // Important style properties to capture
            var styleProps = [
                'font-family', 'font-size', 'font-weight', 'color', 
                'background-color', 'border', 'border-color', 'border-width',
                'border-style', 'padding', 'margin', 'text-align', 'vertical-align',
                'width', 'height', 'display', 'position', 'top', 'left',
                'border-collapse', 'border-spacing', 'line-height',
                'border-top', 'border-bottom', 'border-left', 'border-right',
                'padding-top', 'padding-bottom', 'padding-left', 'padding-right',
                'background', 'white-space', 'text-decoration', 'font-style'
            ];

            // Capture styles for each element with a class
            for (var i = 0; i < elements.length; i++) {
                var el = elements[i];
                if (el.className) {
                    var computed = window.getComputedStyle(el);
                    var classStyles = '';
                    styleProps.forEach(function(prop) {
                        var value = computed.getPropertyValue(prop);
                        if (value) {
                            classStyles += prop + ':' + value + ';';
                        }
                    });
                    if (classStyles) {
                        styles += '.' + el.className.replace(/ /g, '.') + '{' + classStyles + '}\\n';
                    }
                }
            }
            return styles;
        """)

        # Function to escape XML attribute content
        def escape_xml_attr(s):
            return (s.replace("&", "&amp;")
                    .replace("<", "&lt;")
                    .replace(">", "&gt;")
                    .replace('"', "&quot;")
                    .replace("'", "&apos;"))

        # Process table HTML to escape attributes properly
        import re
        def escape_html_attrs(html):
            def replace_attr(match):
                attr_name = match.group(1)
                attr_value = match.group(2)
                escaped_value = escape_xml_attr(attr_value)
                return f'{attr_name}="{escaped_value}"'

            # Find and escape attribute values
            pattern = r'(\w+)="([^"]*)"'
            return re.sub(pattern, replace_attr, html)

        # Clean up table HTML first
        table_html = (
            table_html
            .replace("&nbsp;", "&#160;")  # Replace HTML entities with XML entities
            .replace("<em>", "<span style='font-style: italic'>")  # Convert em to styled span
            .replace("</em>", "</span>")  # Close styled span
        )

        # Then escape attributes
        table_html = escape_html_attrs(table_html)

        # Create SVG wrapper with embedded styles
        svg_content = f'''<?xml version="1.0" encoding="UTF-8" standalone="no"?>
        <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"
             width="{required_width}" height="{required_height}">
            <foreignObject width="100%" height="100%">
                <div xmlns="http://www.w3.org/1999/xhtml">
                    <style>
                        /* Base table styles */
                        table {{
                            border-collapse: collapse;
                            border-spacing: 0;
                            font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, "Helvetica Neue", sans-serif;
                            width: 100%;
                        }}
                        th, td {{
                            padding: 8px;
                            border: 1px solid #ddd;
                        }}
                        th {{
                            background-color: #f8f9fa;
                            font-weight: bold;
                        }}
                        {styles}
                    </style>
                    {table_html}
                </div>
            </foreignObject>
        </svg>
        '''

        # Final cleanup of any remaining issues
        svg_content = (
            svg_content
            .replace("&amp;#", "&#")  # Fix double-escaped numeric entities
            .replace("&amp;amp;", "&amp;")  # Fix double-escaped ampersands
        )

        # Save the SVG file
        with open(path, 'w', encoding='utf-8') as f:
            f.write(svg_content)