@@ -0,0 +1,874 @@
/*
sanitize-html (Apostrophe Technologies)
SOURCE: https://github.com/apostrophecms/sanitize-html
LICENSE: https://github.com/apostrophecms/sanitize-html/blob/main/LICENSE
Modified for audiobookshelf
*/
const htmlparser = require ( 'htmlparser2' ) ;
// const escapeStringRegexp = require('escape-string-regexp');
// const { isPlainObject } = require('is-plain-object');
// const deepmerge = require('deepmerge');
// const parseSrcset = require('parse-srcset');
// const { parse: postcssParse } = require('postcss');
// Tags that can conceivably represent stand-alone media.
// ABS UPDATE: Packages not necessary
// SOURCE: https://github.com/sindresorhus/escape-string-regexp/blob/main/index.js
function escapeStringRegexp ( string ) {
if ( typeof string !== 'string' ) {
throw new TypeError ( 'Expected a string' ) ;
}
// Escape characters with special meaning either inside or outside character sets.
// Use a simple backslash escape when it’ s always valid, and a `\xnn` escape when the simpler form would be disallowed by Unicode patterns’ stricter grammar.
return string
. replace ( /[|\\{}()[\]^$+*?.]/g , '\\$&' )
. replace ( /-/g , '\\x2d' ) ;
}
// SOURCE: https://github.com/jonschlinkert/is-plain-object/blob/master/is-plain-object.js
function isObject ( o ) {
return Object . prototype . toString . call ( o ) === '[object Object]' ;
}
function isPlainObject ( o ) {
var ctor , prot ;
if ( isObject ( o ) === false ) return false ;
// If has modified constructor
ctor = o . constructor ;
if ( ctor === undefined ) return true ;
// If has modified prototype
prot = ctor . prototype ;
if ( isObject ( prot ) === false ) return false ;
// If constructor does not have an Object-specific method
if ( prot . hasOwnProperty ( 'isPrototypeOf' ) === false ) {
return false ;
}
// Most likely a plain Object
return true ;
} ;
const mediaTags = [
'img' , 'audio' , 'video' , 'picture' , 'svg' ,
'object' , 'map' , 'iframe' , 'embed'
] ;
// Tags that are inherently vulnerable to being used in XSS attacks.
const vulnerableTags = [ 'script' , 'style' ] ;
function each ( obj , cb ) {
if ( obj ) {
Object . keys ( obj ) . forEach ( function ( key ) {
cb ( obj [ key ] , key ) ;
} ) ;
}
}
// Avoid false positives with .__proto__, .hasOwnProperty, etc.
function has ( obj , key ) {
return ( { } ) . hasOwnProperty . call ( obj , key ) ;
}
// Returns those elements of `a` for which `cb(a)` returns truthy
function filter ( a , cb ) {
const n = [ ] ;
each ( a , function ( v ) {
if ( cb ( v ) ) {
n . push ( v ) ;
}
} ) ;
return n ;
}
function isEmptyObject ( obj ) {
for ( const key in obj ) {
if ( has ( obj , key ) ) {
return false ;
}
}
return true ;
}
function stringifySrcset ( parsedSrcset ) {
return parsedSrcset . map ( function ( part ) {
if ( ! part . url ) {
throw new Error ( 'URL missing' ) ;
}
return (
part . url +
( part . w ? ` ${ part . w } w ` : '' ) +
( part . h ? ` ${ part . h } h ` : '' ) +
( part . d ? ` ${ part . d } x ` : '' )
) ;
} ) . join ( ', ' ) ;
}
module . exports = sanitizeHtml ;
// A valid attribute name.
// We use a tolerant definition based on the set of strings defined by
// html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
// and html.spec.whatwg.org/multipage/parsing.html#attribute-name-state .
// The characters accepted are ones which can be appended to the attribute
// name buffer without triggering a parse error:
// * unexpected-equals-sign-before-attribute-name
// * unexpected-null-character
// * unexpected-character-in-attribute-name
// We exclude the empty string because it's impossible to get to the after
// attribute name state with an empty attribute name buffer.
const VALID _HTML _ATTRIBUTE _NAME = /^[^\0\t\n\f\r /<=>]+$/ ;
// Ignore the _recursing flag; it's there for recursive
// invocation as a guard against this exploit:
// https://github.com/fb55/htmlparser2/issues/105
function sanitizeHtml ( html , options , _recursing ) {
if ( html == null ) {
return '' ;
}
let result = '' ;
// Used for hot swapping the result variable with an empty string in order to "capture" the text written to it.
let tempResult = '' ;
function Frame ( tag , attribs ) {
const that = this ;
this . tag = tag ;
this . attribs = attribs || { } ;
this . tagPosition = result . length ;
this . text = '' ; // Node inner text
this . mediaChildren = [ ] ;
this . updateParentNodeText = function ( ) {
if ( stack . length ) {
const parentFrame = stack [ stack . length - 1 ] ;
parentFrame . text += that . text ;
}
} ;
this . updateParentNodeMediaChildren = function ( ) {
if ( stack . length && mediaTags . includes ( this . tag ) ) {
const parentFrame = stack [ stack . length - 1 ] ;
parentFrame . mediaChildren . push ( this . tag ) ;
}
} ;
}
options = Object . assign ( { } , sanitizeHtml . defaults , options ) ;
options . parser = Object . assign ( { } , htmlParserDefaults , options . parser ) ;
// vulnerableTags
vulnerableTags . forEach ( function ( tag ) {
if (
options . allowedTags && options . allowedTags . indexOf ( tag ) > - 1 &&
! options . allowVulnerableTags
) {
console . warn ( ` \n \n ⚠️ Your \` allowedTags \` option includes, \` ${ tag } \` , which is inherently \n vulnerable to XSS attacks. Please remove it from \` allowedTags \` . \n Or, to disable this warning, add the \` allowVulnerableTags \` option \n and ensure you are accounting for this risk. \n \n ` ) ;
}
} ) ;
// Tags that contain something other than HTML, or where discarding
// the text when the tag is disallowed makes sense for other reasons.
// If we are not allowing these tags, we should drop their content too.
// For other tags you would drop the tag but keep its content.
const nonTextTagsArray = options . nonTextTags || [
'script' ,
'style' ,
'textarea' ,
'option'
] ;
let allowedAttributesMap ;
let allowedAttributesGlobMap ;
if ( options . allowedAttributes ) {
allowedAttributesMap = { } ;
allowedAttributesGlobMap = { } ;
each ( options . allowedAttributes , function ( attributes , tag ) {
allowedAttributesMap [ tag ] = [ ] ;
const globRegex = [ ] ;
attributes . forEach ( function ( obj ) {
if ( typeof obj === 'string' && obj . indexOf ( '*' ) >= 0 ) {
globRegex . push ( escapeStringRegexp ( obj ) . replace ( /\\\*/g , '.*' ) ) ;
} else {
allowedAttributesMap [ tag ] . push ( obj ) ;
}
} ) ;
if ( globRegex . length ) {
allowedAttributesGlobMap [ tag ] = new RegExp ( '^(' + globRegex . join ( '|' ) + ')$' ) ;
}
} ) ;
}
const allowedClassesMap = { } ;
const allowedClassesGlobMap = { } ;
const allowedClassesRegexMap = { } ;
each ( options . allowedClasses , function ( classes , tag ) {
// Implicitly allows the class attribute
if ( allowedAttributesMap ) {
if ( ! has ( allowedAttributesMap , tag ) ) {
allowedAttributesMap [ tag ] = [ ] ;
}
allowedAttributesMap [ tag ] . push ( 'class' ) ;
}
allowedClassesMap [ tag ] = [ ] ;
allowedClassesRegexMap [ tag ] = [ ] ;
const globRegex = [ ] ;
classes . forEach ( function ( obj ) {
if ( typeof obj === 'string' && obj . indexOf ( '*' ) >= 0 ) {
globRegex . push ( escapeStringRegexp ( obj ) . replace ( /\\\*/g , '.*' ) ) ;
} else if ( obj instanceof RegExp ) {
allowedClassesRegexMap [ tag ] . push ( obj ) ;
} else {
allowedClassesMap [ tag ] . push ( obj ) ;
}
} ) ;
if ( globRegex . length ) {
allowedClassesGlobMap [ tag ] = new RegExp ( '^(' + globRegex . join ( '|' ) + ')$' ) ;
}
} ) ;
const transformTagsMap = { } ;
let transformTagsAll ;
each ( options . transformTags , function ( transform , tag ) {
let transFun ;
if ( typeof transform === 'function' ) {
transFun = transform ;
} else if ( typeof transform === 'string' ) {
transFun = sanitizeHtml . simpleTransform ( transform ) ;
}
if ( tag === '*' ) {
transformTagsAll = transFun ;
} else {
transformTagsMap [ tag ] = transFun ;
}
} ) ;
let depth ;
let stack ;
let skipMap ;
let transformMap ;
let skipText ;
let skipTextDepth ;
let addedText = false ;
initializeState ( ) ;
const parser = new htmlparser . Parser ( {
onopentag : function ( name , attribs ) {
// If `enforceHtmlBoundary` is `true` and this has found the opening
// `html` tag, reset the state.
if ( options . enforceHtmlBoundary && name === 'html' ) {
initializeState ( ) ;
}
if ( skipText ) {
skipTextDepth ++ ;
return ;
}
const frame = new Frame ( name , attribs ) ;
stack . push ( frame ) ;
let skip = false ;
const hasText = ! ! frame . text ;
let transformedTag ;
if ( has ( transformTagsMap , name ) ) {
transformedTag = transformTagsMap [ name ] ( name , attribs ) ;
frame . attribs = attribs = transformedTag . attribs ;
if ( transformedTag . text !== undefined ) {
frame . innerText = transformedTag . text ;
}
if ( name !== transformedTag . tagName ) {
frame . name = name = transformedTag . tagName ;
transformMap [ depth ] = transformedTag . tagName ;
}
}
if ( transformTagsAll ) {
transformedTag = transformTagsAll ( name , attribs ) ;
frame . attribs = attribs = transformedTag . attribs ;
if ( name !== transformedTag . tagName ) {
frame . name = name = transformedTag . tagName ;
transformMap [ depth ] = transformedTag . tagName ;
}
}
if ( ( options . allowedTags && options . allowedTags . indexOf ( name ) === - 1 ) || ( options . disallowedTagsMode === 'recursiveEscape' && ! isEmptyObject ( skipMap ) ) || ( options . nestingLimit != null && depth >= options . nestingLimit ) ) {
skip = true ;
skipMap [ depth ] = true ;
if ( options . disallowedTagsMode === 'discard' ) {
if ( nonTextTagsArray . indexOf ( name ) !== - 1 ) {
skipText = true ;
skipTextDepth = 1 ;
}
}
skipMap [ depth ] = true ;
}
depth ++ ;
if ( skip ) {
if ( options . disallowedTagsMode === 'discard' ) {
// We want the contents but not this tag
return ;
}
tempResult = result ;
result = '' ;
}
result += '<' + name ;
if ( name === 'script' ) {
if ( options . allowedScriptHostnames || options . allowedScriptDomains ) {
frame . innerText = '' ;
}
}
if ( ! allowedAttributesMap || has ( allowedAttributesMap , name ) || allowedAttributesMap [ '*' ] ) {
each ( attribs , function ( value , a ) {
if ( ! VALID _HTML _ATTRIBUTE _NAME . test ( a ) ) {
// This prevents part of an attribute name in the output from being
// interpreted as the end of an attribute, or end of a tag.
delete frame . attribs [ a ] ;
return ;
}
let parsed ;
// check allowedAttributesMap for the element and attribute and modify the value
// as necessary if there are specific values defined.
let passedAllowedAttributesMapCheck = false ;
if ( ! allowedAttributesMap ||
( has ( allowedAttributesMap , name ) && allowedAttributesMap [ name ] . indexOf ( a ) !== - 1 ) ||
( allowedAttributesMap [ '*' ] && allowedAttributesMap [ '*' ] . indexOf ( a ) !== - 1 ) ||
( has ( allowedAttributesGlobMap , name ) && allowedAttributesGlobMap [ name ] . test ( a ) ) ||
( allowedAttributesGlobMap [ '*' ] && allowedAttributesGlobMap [ '*' ] . test ( a ) ) ) {
passedAllowedAttributesMapCheck = true ;
} else if ( allowedAttributesMap && allowedAttributesMap [ name ] ) {
for ( const o of allowedAttributesMap [ name ] ) {
if ( isPlainObject ( o ) && o . name && ( o . name === a ) ) {
passedAllowedAttributesMapCheck = true ;
let newValue = '' ;
if ( o . multiple === true ) {
// verify the values that are allowed
const splitStrArray = value . split ( ' ' ) ;
for ( const s of splitStrArray ) {
if ( o . values . indexOf ( s ) !== - 1 ) {
if ( newValue === '' ) {
newValue = s ;
} else {
newValue += ' ' + s ;
}
}
}
} else if ( o . values . indexOf ( value ) >= 0 ) {
// verified an allowed value matches the entire attribute value
newValue = value ;
}
value = newValue ;
}
}
}
if ( passedAllowedAttributesMapCheck ) {
if ( options . allowedSchemesAppliedToAttributes . indexOf ( a ) !== - 1 ) {
if ( naughtyHref ( name , value ) ) {
delete frame . attribs [ a ] ;
return ;
}
}
if ( name === 'script' && a === 'src' ) {
let allowed = true ;
try {
const parsed = new URL ( value ) ;
if ( options . allowedScriptHostnames || options . allowedScriptDomains ) {
const allowedHostname = ( options . allowedScriptHostnames || [ ] ) . find ( function ( hostname ) {
return hostname === parsed . hostname ;
} ) ;
const allowedDomain = ( options . allowedScriptDomains || [ ] ) . find ( function ( domain ) {
return parsed . hostname === domain || parsed . hostname . endsWith ( ` . ${ domain } ` ) ;
} ) ;
allowed = allowedHostname || allowedDomain ;
}
} catch ( e ) {
allowed = false ;
}
if ( ! allowed ) {
delete frame . attribs [ a ] ;
return ;
}
}
if ( name === 'iframe' && a === 'src' ) {
let allowed = true ;
try {
// Chrome accepts \ as a substitute for / in the // at the
// start of a URL, so rewrite accordingly to prevent exploit.
// Also drop any whitespace at that point in the URL
value = value . replace ( /^(\w+:)?\s*[\\/]\s*[\\/]/ , '$1//' ) ;
if ( value . startsWith ( 'relative:' ) ) {
// An attempt to exploit our workaround for base URLs being
// mandatory for relative URL validation in the WHATWG
// URL parser, reject it
throw new Error ( 'relative: exploit attempt' ) ;
}
// naughtyHref is in charge of whether protocol relative URLs
// are cool. Here we are concerned just with allowed hostnames and
// whether to allow relative URLs.
//
// Build a placeholder "base URL" against which any reasonable
// relative URL may be parsed successfully
let base = 'relative://relative-site' ;
for ( let i = 0 ; ( i < 100 ) ; i ++ ) {
base += ` / ${ i } ` ;
}
const parsed = new URL ( value , base ) ;
const isRelativeUrl = parsed && parsed . hostname === 'relative-site' && parsed . protocol === 'relative:' ;
if ( isRelativeUrl ) {
// default value of allowIframeRelativeUrls is true
// unless allowedIframeHostnames or allowedIframeDomains specified
allowed = has ( options , 'allowIframeRelativeUrls' )
? options . allowIframeRelativeUrls
: ( ! options . allowedIframeHostnames && ! options . allowedIframeDomains ) ;
} else if ( options . allowedIframeHostnames || options . allowedIframeDomains ) {
const allowedHostname = ( options . allowedIframeHostnames || [ ] ) . find ( function ( hostname ) {
return hostname === parsed . hostname ;
} ) ;
const allowedDomain = ( options . allowedIframeDomains || [ ] ) . find ( function ( domain ) {
return parsed . hostname === domain || parsed . hostname . endsWith ( ` . ${ domain } ` ) ;
} ) ;
allowed = allowedHostname || allowedDomain ;
}
} catch ( e ) {
// Unparseable iframe src
allowed = false ;
}
if ( ! allowed ) {
delete frame . attribs [ a ] ;
return ;
}
}
if ( a === 'srcset' ) {
delete frame . attribs [ a ] ;
// ABS UPDATE: srcset not necessary
// try {
// parsed = parseSrcset(value);
// parsed.forEach(function (value) {
// if (naughtyHref('srcset', value.url)) {
// value.evil = true;
// }
// });
// parsed = filter(parsed, function (v) {
// return !v.evil;
// });
// if (!parsed.length) {
// delete frame.attribs[a];
// return;
// } else {
// value = stringifySrcset(filter(parsed, function (v) {
// return !v.evil;
// }));
// frame.attribs[a] = value;
// }
// } catch (e) {
// // Unparseable srcset
// delete frame.attribs[a];
// return;
// }
}
if ( a === 'class' ) {
const allowedSpecificClasses = allowedClassesMap [ name ] ;
const allowedWildcardClasses = allowedClassesMap [ '*' ] ;
const allowedSpecificClassesGlob = allowedClassesGlobMap [ name ] ;
const allowedSpecificClassesRegex = allowedClassesRegexMap [ name ] ;
const allowedWildcardClassesGlob = allowedClassesGlobMap [ '*' ] ;
const allowedClassesGlobs = [
allowedSpecificClassesGlob ,
allowedWildcardClassesGlob
]
. concat ( allowedSpecificClassesRegex )
. filter ( function ( t ) {
return t ;
} ) ;
if ( allowedSpecificClasses && allowedWildcardClasses ) {
// ABS UPDATE: classes and wildcard classes not necessary now
// value = filterClasses(value, deepmerge(allowedSpecificClasses, allowedWildcardClasses), allowedClassesGlobs);
} else {
value = filterClasses ( value , allowedSpecificClasses || allowedWildcardClasses , allowedClassesGlobs ) ;
}
if ( ! value . length ) {
delete frame . attribs [ a ] ;
return ;
}
}
if ( a === 'style' ) {
delete frame . attribs [ a ] ;
// ABS UPDATE: Styles not necessary
// try {
// const abstractSyntaxTree = postcssParse(name + ' {' + value + '}');
// const filteredAST = filterCss(abstractSyntaxTree, options.allowedStyles);
// value = stringifyStyleAttributes(filteredAST);
// if (value.length === 0) {
// delete frame.attribs[a];
// return;
// }
// } catch (e) {
// delete frame.attribs[a];
// return;
// }
}
result += ' ' + a ;
if ( value && value . length ) {
result += '="' + escapeHtml ( value , true ) + '"' ;
}
} else {
delete frame . attribs [ a ] ;
}
} ) ;
}
if ( options . selfClosing . indexOf ( name ) !== - 1 ) {
result += ' />' ;
} else {
result += '>' ;
if ( frame . innerText && ! hasText && ! options . textFilter ) {
result += escapeHtml ( frame . innerText ) ;
addedText = true ;
}
}
if ( skip ) {
result = tempResult + escapeHtml ( result ) ;
tempResult = '' ;
}
} ,
ontext : function ( text ) {
if ( skipText ) {
return ;
}
const lastFrame = stack [ stack . length - 1 ] ;
let tag ;
if ( lastFrame ) {
tag = lastFrame . tag ;
// If inner text was set by transform function then let's use it
text = lastFrame . innerText !== undefined ? lastFrame . innerText : text ;
}
if ( options . disallowedTagsMode === 'discard' && ( ( tag === 'script' ) || ( tag === 'style' ) ) ) {
// htmlparser2 gives us these as-is. Escaping them ruins the content. Allowing
// script tags is, by definition, game over for XSS protection, so if that's
// your concern, don't allow them. The same is essentially true for style tags
// which have their own collection of XSS vectors.
result += text ;
} else {
const escaped = escapeHtml ( text , false ) ;
if ( options . textFilter && ! addedText ) {
result += options . textFilter ( escaped , tag ) ;
} else if ( ! addedText ) {
result += escaped ;
}
}
if ( stack . length ) {
const frame = stack [ stack . length - 1 ] ;
frame . text += text ;
}
} ,
onclosetag : function ( name ) {
if ( skipText ) {
skipTextDepth -- ;
if ( ! skipTextDepth ) {
skipText = false ;
} else {
return ;
}
}
const frame = stack . pop ( ) ;
if ( ! frame ) {
// Do not crash on bad markup
return ;
}
skipText = options . enforceHtmlBoundary ? name === 'html' : false ;
depth -- ;
const skip = skipMap [ depth ] ;
if ( skip ) {
delete skipMap [ depth ] ;
if ( options . disallowedTagsMode === 'discard' ) {
frame . updateParentNodeText ( ) ;
return ;
}
tempResult = result ;
result = '' ;
}
if ( transformMap [ depth ] ) {
name = transformMap [ depth ] ;
delete transformMap [ depth ] ;
}
if ( options . exclusiveFilter && options . exclusiveFilter ( frame ) ) {
result = result . substr ( 0 , frame . tagPosition ) ;
return ;
}
frame . updateParentNodeMediaChildren ( ) ;
frame . updateParentNodeText ( ) ;
if ( options . selfClosing . indexOf ( name ) !== - 1 ) {
// Already output />
if ( skip ) {
result = tempResult ;
tempResult = '' ;
}
return ;
}
result += '</' + name + '>' ;
if ( skip ) {
result = tempResult + escapeHtml ( result ) ;
tempResult = '' ;
}
addedText = false ;
}
} , options . parser ) ;
parser . write ( html ) ;
parser . end ( ) ;
return result ;
function initializeState ( ) {
result = '' ;
depth = 0 ;
stack = [ ] ;
skipMap = { } ;
transformMap = { } ;
skipText = false ;
skipTextDepth = 0 ;
}
function escapeHtml ( s , quote ) {
if ( typeof ( s ) !== 'string' ) {
s = s + '' ;
}
if ( options . parser . decodeEntities ) {
s = s . replace ( /&/g , '&' ) . replace ( /</g , '<' ) . replace ( />/g , '>' ) ;
if ( quote ) {
s = s . replace ( /"/g , '"' ) ;
}
}
// TODO: this is inadequate because it will pass `&0;`. This approach
// will not work, each & must be considered with regard to whether it
// is followed by a 100% syntactically valid entity or not, and escaped
// if it is not. If this bothers you, don't set parser.decodeEntities
// to false. (The default is true.)
s = s . replace ( /&(?![a-zA-Z0-9#]{1,20};)/g , '&' ) // Match ampersands not part of existing HTML entity
. replace ( /</g , '<' )
. replace ( />/g , '>' ) ;
if ( quote ) {
s = s . replace ( /"/g , '"' ) ;
}
return s ;
}
function naughtyHref ( name , href ) {
// Browsers ignore character codes of 32 (space) and below in a surprising
// number of situations. Start reading here:
// https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#Embedded_tab
// eslint-disable-next-line no-control-regex
href = href . replace ( /[\x00-\x20]+/g , '' ) ;
// Clobber any comments in URLs, which the browser might
// interpret inside an XML data island, allowing
// a javascript: URL to be snuck through
href = href . replace ( /<!--.*?-->/g , '' ) ;
// Case insensitive so we don't get faked out by JAVASCRIPT #1
// Allow more characters after the first so we don't get faked
// out by certain schemes browsers accept
const matches = href . match ( /^([a-zA-Z][a-zA-Z0-9.\-+]*):/ ) ;
if ( ! matches ) {
// Protocol-relative URL starting with any combination of '/' and '\'
if ( href . match ( /^[/\\]{2}/ ) ) {
return ! options . allowProtocolRelative ;
}
// No scheme
return false ;
}
const scheme = matches [ 1 ] . toLowerCase ( ) ;
if ( has ( options . allowedSchemesByTag , name ) ) {
return options . allowedSchemesByTag [ name ] . indexOf ( scheme ) === - 1 ;
}
return ! options . allowedSchemes || options . allowedSchemes . indexOf ( scheme ) === - 1 ;
}
/**
* Filters user input css properties by allowlisted regex attributes.
* Modifies the abstractSyntaxTree object.
*
* @param {object} abstractSyntaxTree - Object representation of CSS attributes.
* @property {array[Declaration]} abstractSyntaxTree.nodes[0] - Each object cointains prop and value key, i.e { prop: 'color', value: 'red' }.
* @param {object} allowedStyles - Keys are properties (i.e color), value is list of permitted regex rules (i.e /green/i).
* @return {object} - The modified tree.
*/
// function filterCss(abstractSyntaxTree, allowedStyles) {
// if (!allowedStyles) {
// return abstractSyntaxTree;
// }
// const astRules = abstractSyntaxTree.nodes[0];
// let selectedRule;
// // Merge global and tag-specific styles into new AST.
// if (allowedStyles[astRules.selector] && allowedStyles['*']) {
// selectedRule = deepmerge(
// allowedStyles[astRules.selector],
// allowedStyles['*']
// );
// } else {
// selectedRule = allowedStyles[astRules.selector] || allowedStyles['*'];
// }
// if (selectedRule) {
// abstractSyntaxTree.nodes[0].nodes = astRules.nodes.reduce(filterDeclarations(selectedRule), []);
// }
// return abstractSyntaxTree;
// }
/**
* Extracts the style attributes from an AbstractSyntaxTree and formats those
* values in the inline style attribute format.
*
* @param {AbstractSyntaxTree} filteredAST
* @return {string} - Example: "color:yellow;text-align:center !important;font-family:helvetica;"
*/
function stringifyStyleAttributes ( filteredAST ) {
return filteredAST . nodes [ 0 ] . nodes
. reduce ( function ( extractedAttributes , attrObject ) {
extractedAttributes . push (
` ${ attrObject . prop } : ${ attrObject . value } ${ attrObject . important ? ' !important' : '' } `
) ;
return extractedAttributes ;
} , [ ] )
. join ( ';' ) ;
}
/**
* Filters the existing attributes for the given property. Discards any attributes
* which don't match the allowlist.
*
* @param {object} selectedRule - Example: { color: red, font-family: helvetica }
* @param {array} allowedDeclarationsList - List of declarations which pass the allowlist.
* @param {object} attributeObject - Object representing the current css property.
* @property {string} attributeObject.type - Typically 'declaration'.
* @property {string} attributeObject.prop - The CSS property, i.e 'color'.
* @property {string} attributeObject.value - The corresponding value to the css property, i.e 'red'.
* @return {function} - When used in Array.reduce, will return an array of Declaration objects
*/
function filterDeclarations ( selectedRule ) {
return function ( allowedDeclarationsList , attributeObject ) {
// If this property is allowlisted...
if ( has ( selectedRule , attributeObject . prop ) ) {
const matchesRegex = selectedRule [ attributeObject . prop ] . some ( function ( regularExpression ) {
return regularExpression . test ( attributeObject . value ) ;
} ) ;
if ( matchesRegex ) {
allowedDeclarationsList . push ( attributeObject ) ;
}
}
return allowedDeclarationsList ;
} ;
}
function filterClasses ( classes , allowed , allowedGlobs ) {
if ( ! allowed ) {
// The class attribute is allowed without filtering on this tag
return classes ;
}
classes = classes . split ( /\s+/ ) ;
return classes . filter ( function ( clss ) {
return allowed . indexOf ( clss ) !== - 1 || allowedGlobs . some ( function ( glob ) {
return glob . test ( clss ) ;
} ) ;
} ) . join ( ' ' ) ;
}
}
// Defaults are accessible to you so that you can use them as a starting point
// programmatically if you wish
const htmlParserDefaults = {
decodeEntities : true
} ;
sanitizeHtml . defaults = {
allowedTags : [
// Sections derived from MDN element categories and limited to the more
// benign categories.
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element
// Content sectioning
'address' , 'article' , 'aside' , 'footer' , 'header' ,
'h1' , 'h2' , 'h3' , 'h4' , 'h5' , 'h6' , 'hgroup' ,
'main' , 'nav' , 'section' ,
// Text content
'blockquote' , 'dd' , 'div' , 'dl' , 'dt' , 'figcaption' , 'figure' ,
'hr' , 'li' , 'main' , 'ol' , 'p' , 'pre' , 'ul' ,
// Inline text semantics
'a' , 'abbr' , 'b' , 'bdi' , 'bdo' , 'br' , 'cite' , 'code' , 'data' , 'dfn' ,
'em' , 'i' , 'kbd' , 'mark' , 'q' ,
'rb' , 'rp' , 'rt' , 'rtc' , 'ruby' ,
's' , 'samp' , 'small' , 'span' , 'strong' , 'sub' , 'sup' , 'time' , 'u' , 'var' , 'wbr' ,
// Table content
'caption' , 'col' , 'colgroup' , 'table' , 'tbody' , 'td' , 'tfoot' , 'th' ,
'thead' , 'tr'
] ,
disallowedTagsMode : 'discard' ,
allowedAttributes : {
a : [ 'href' , 'name' , 'target' ] ,
// We don't currently allow img itself by default, but
// these attributes would make sense if we did.
img : [ 'src' , 'srcset' , 'alt' , 'title' , 'width' , 'height' , 'loading' ]
} ,
// Lots of these won't come up by default because we don't allow them
selfClosing : [ 'img' , 'br' , 'hr' , 'area' , 'base' , 'basefont' , 'input' , 'link' , 'meta' ] ,
// URL schemes we permit
allowedSchemes : [ 'http' , 'https' , 'ftp' , 'mailto' , 'tel' ] ,
allowedSchemesByTag : { } ,
allowedSchemesAppliedToAttributes : [ 'href' , 'src' , 'cite' ] ,
allowProtocolRelative : true ,
enforceHtmlBoundary : false
} ;
sanitizeHtml . simpleTransform = function ( newTagName , newAttribs , merge ) {
merge = ( merge === undefined ) ? true : merge ;
newAttribs = newAttribs || { } ;
return function ( tagName , attribs ) {
let attrib ;
if ( merge ) {
for ( attrib in newAttribs ) {
attribs [ attrib ] = newAttribs [ attrib ] ;
}
} else {
attribs = newAttribs ;
}
return {
tagName : newTagName ,
attribs : attribs
} ;
} ;
} ;