const cleaner = require('clean-html');
const fs = require('fs');
fs.readFile('foo.html', 'utf8', (err, input) => {
cleaner.clean(input, output => console.log(output));
});
Options can be provided like so:
const options = {
'break-around-comments': false,
'decode-entities': true,
'remove-tags': ['b', 'i', 'center', 'font'],
'wrap': 80
};
cleaner.clean(input, options, output => {...});
If installed globally, just run clean-html
. Otherwise, run npx clean-html
.
Input can be piped from stdin:
$ echo '<h1>Hello, World!</h1>' | clean-html
$ cat foo.html | clean-html
Or you can provide a filename as the first argument:
$ clean-html foo.html
Output can be redirected to another file:
$ clean-html foo.html > bar.html
Or you can edit the file in place:
$ clean-html foo.html --in-place
Other options can be provided like so:
$ clean-html foo.html \
--break-around-comments \
--decode-entities false \
--remove-tags b,i,center,font \
--wrap 80
Array type option values should be separated by commas. Boolean type options are disabled if followed by
false
and enabled if followed bytrue
or nothing.
Allows attributes to be output without values. For example, checked
instead of checked=""
.
Please set to true
for Angular components or for <input>
elements.
Type: Boolean
Default: false
Adds line breaks before and after comments.
Type: Boolean
Default: true
Tags that should have line breaks added before and after.
Type: Array of strings
Default: ['body', 'blockquote', 'br', 'div', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'hr', 'link', 'meta', 'p', 'table', 'title', 'td', 'tr']
Replaces HTML entities with their decoded equivalents. e.g., if true
then
will be
replaced by a space character.
Type: Boolean
Default: false
The string to use for indentation. e.g., a tab character or one or more spaces.
Type: String
Default: ' '
(two spaces)
Converts all tag names to lower case.
Please set to false
for Angular components.
Type: Boolean
Default: true
Converts all attribute names to lower case.
Please set to false
for Angular components.
Type: Boolean
Default: true
Tags that should be left alone. i.e., content inside these tags will not be formatted or indented.
Type: Array of strings
Default: ['script', 'style']
Attributes to remove from markup.
Type: Array of strings or regular expressions
Default: ['align', 'bgcolor', 'border', 'cellpadding', 'cellspacing', 'color', 'height', 'target', 'valign', 'width']
Removes comments.
Type: Boolean
Default: false
Tags to remove from markup if empty.
Type: Array of strings or regular expressions
Default: []
Tags to always remove from markup. Nested content is preserved.
Type: Array of strings or regular expressions
Default: ['center', 'font']
The column number where lines should wrap. Set to 0 to disable line wrapping.
Type: Integer
Default: 120
These options exist for your convenience.
Additional tags to include in break-around-tags
.
Type: Array of strings
Default: null
Additional attributes to include in remove-attributes
.
Type: Array of strings
Default: null
Additional tags to include in remove-tags
.
Type: Array of strings
Default: null