luckyframework / lucky

A full-featured Crystal web framework that catches bugs for you, runs incredibly fast, and helps you write code that lasts.
https://luckyframework.org
MIT License
2.57k stars 156 forks source link

Adding a strip_tags helper with allow list capabilities #1853

Open jwoertink opened 3 months ago

jwoertink commented 3 months ago

@robacarp came up with this nice little snippet. Maybe we can just plop it in as a helper method

require "xml"

class Sanitize
  def self.strip_tags(html, allowed_tags : Array(String), allowed_attributes : Array(String)) : String
    instance = new html, allowed_tags, allowed_attributes
    instance.strip
    instance.render
  end

  def initialize(@dirty : String, @allowed_tags : Array(String), @allowed_attributes : Array(String))
    @io = IO::Memory.new
    @builder = XML::Builder.new @io
  end

  def strip
    parsed_document = XML.parse_html @dirty, XML::HTMLParserOptions::NOIMPLIED | XML::HTMLParserOptions::NODEFDTD
    traverse parsed_document
  end

  def render : String
    @builder.end_document
    @builder.flush
    @io.to_s
  end

  def traverse(node : Nil)
  end

  def traverse(node : XML::Node) : Nil
    if node.text?
      @builder.text node.content
      return
    end

    if node.document?
      return traverse_children node
    end

    if ! @allowed_tags.includes? node.name
      return traverse_children node
    end

    stripped_attributes = node.attributes.compact_map do |attribute|
      if @allowed_attributes.includes? attribute.name
        [attribute.name, attribute.content]
      end
    end.to_h

    @builder.element node.name, attributes: stripped_attributes do
      traverse_children node
    end
  end

  def traverse_children(node : XML::Node) : Nil
    node.children.each do |child|
      traverse child
    end
  end
end

snippets = [
  "<p>hell</div>o</p>",
  "<p>he<b>l<l</b>o</p><p>world</p>",
  "<p>he<b>l<a href='http://www.google.com'>l</b>o</p><p>world</p>"
]

tags_to_allow = ["a", "p", "b"]
attributes_to_allow = ["href"]

snippets.each do |snippet|
  puts "starting: #{snippet}"
  puts "finished: #{Sanitize.strip_tags snippet, tags_to_allow, attributes_to_allow}"
  puts
end