{-# LANGUAGE OverloadedStrings #-}
module Text.HTML.SanitizeXSS
(
sanitize
, sanitizeBalance
, sanitizeXSS
, filterTags
, safeTags
, safeTagsCustom
, balanceTags
, safeTagName
, sanitizeAttribute
, sanitaryURI
) where
import Text.HTML.SanitizeXSS.Css
import Text.HTML.TagSoup
import Data.Set (Set(), member, notMember, (\\), fromList, fromAscList)
import Data.Char ( toLower )
import Data.Text (Text)
import qualified Data.Text as T
import Network.URI ( parseURIReference, URI (..),
isAllowedInURI, escapeURIString, uriScheme )
import Codec.Binary.UTF8.String ( encodeString )
import Data.Maybe (mapMaybe)
sanitize :: Text -> Text
sanitize :: Text -> Text
sanitize = Text -> Text
sanitizeXSS
sanitizeXSS :: Text -> Text
sanitizeXSS :: Text -> Text
sanitizeXSS = ([Tag Text] -> [Tag Text]) -> Text -> Text
filterTags [Tag Text] -> [Tag Text]
safeTags
sanitizeBalance :: Text -> Text
sanitizeBalance :: Text -> Text
sanitizeBalance = ([Tag Text] -> [Tag Text]) -> Text -> Text
filterTags ([Tag Text] -> [Tag Text]
balanceTags ([Tag Text] -> [Tag Text])
-> ([Tag Text] -> [Tag Text]) -> [Tag Text] -> [Tag Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Tag Text] -> [Tag Text]
safeTags)
balanceTags :: [Tag Text] -> [Tag Text]
balanceTags :: [Tag Text] -> [Tag Text]
balanceTags = [Text] -> [Tag Text] -> [Tag Text]
balance []
filterTags :: ([Tag Text] -> [Tag Text]) -> Text -> Text
filterTags :: ([Tag Text] -> [Tag Text]) -> Text -> Text
filterTags f :: [Tag Text] -> [Tag Text]
f = RenderOptions Text -> [Tag Text] -> Text
forall str. StringLike str => RenderOptions str -> [Tag str] -> str
renderTagsOptions RenderOptions Text
forall str. StringLike str => RenderOptions str
renderOptions {
optMinimize :: Text -> Bool
optMinimize = \x :: Text
x -> Text
x Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
`member` Set Text
voidElems
} ([Tag Text] -> Text) -> (Text -> [Tag Text]) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Tag Text] -> [Tag Text]
f ([Tag Text] -> [Tag Text])
-> (Text -> [Tag Text]) -> Text -> [Tag Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Tag Text] -> [Tag Text]
forall str. StringLike str => [Tag str] -> [Tag str]
canonicalizeTags ([Tag Text] -> [Tag Text])
-> (Text -> [Tag Text]) -> Text -> [Tag Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Tag Text]
forall str. StringLike str => str -> [Tag str]
parseTags
voidElems :: Set T.Text
voidElems :: Set Text
voidElems = [Text] -> Set Text
forall a. Eq a => [a] -> Set a
fromAscList ([Text] -> Set Text) -> [Text] -> Set Text
forall a b. (a -> b) -> a -> b
$ Text -> [Text]
T.words (Text -> [Text]) -> Text -> [Text]
forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack "area base br col command embed hr img input keygen link meta param source track wbr"
balance :: [Text]
-> [Tag Text] -> [Tag Text]
balance :: [Text] -> [Tag Text] -> [Tag Text]
balance unclosed :: [Text]
unclosed [] = (Text -> Tag Text) -> [Text] -> [Tag Text]
forall a b. (a -> b) -> [a] -> [b]
map Text -> Tag Text
forall str. str -> Tag str
TagClose ([Text] -> [Tag Text]) -> [Text] -> [Tag Text]
forall a b. (a -> b) -> a -> b
$ (Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
filter (Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
`notMember` Set Text
voidElems) [Text]
unclosed
balance (x :: Text
x:xs :: [Text]
xs) tags' :: [Tag Text]
tags'@(TagClose name :: Text
name:tags :: [Tag Text]
tags)
| Text
x Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
name = Text -> Tag Text
forall str. str -> Tag str
TagClose Text
name Tag Text -> [Tag Text] -> [Tag Text]
forall a. a -> [a] -> [a]
: [Text] -> [Tag Text] -> [Tag Text]
balance [Text]
xs [Tag Text]
tags
| Text
x Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
`member` Set Text
voidElems = [Text] -> [Tag Text] -> [Tag Text]
balance [Text]
xs [Tag Text]
tags'
| Bool
otherwise = Text -> [Attribute Text] -> Tag Text
forall str. str -> [Attribute str] -> Tag str
TagOpen Text
name [] Tag Text -> [Tag Text] -> [Tag Text]
forall a. a -> [a] -> [a]
: Text -> Tag Text
forall str. str -> Tag str
TagClose Text
name Tag Text -> [Tag Text] -> [Tag Text]
forall a. a -> [a] -> [a]
: [Text] -> [Tag Text] -> [Tag Text]
balance (Text
xText -> [Text] -> [Text]
forall a. a -> [a] -> [a]
:[Text]
xs) [Tag Text]
tags
balance unclosed :: [Text]
unclosed (TagOpen name :: Text
name as :: [Attribute Text]
as : tags :: [Tag Text]
tags) =
Text -> [Attribute Text] -> Tag Text
forall str. str -> [Attribute str] -> Tag str
TagOpen Text
name [Attribute Text]
as Tag Text -> [Tag Text] -> [Tag Text]
forall a. a -> [a] -> [a]
: [Text] -> [Tag Text] -> [Tag Text]
balance (Text
name Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
unclosed) [Tag Text]
tags
balance unclosed :: [Text]
unclosed (t :: Tag Text
t:ts :: [Tag Text]
ts) = Tag Text
t Tag Text -> [Tag Text] -> [Tag Text]
forall a. a -> [a] -> [a]
: [Text] -> [Tag Text] -> [Tag Text]
balance [Text]
unclosed [Tag Text]
ts
safeTags :: [Tag Text] -> [Tag Text]
safeTags :: [Tag Text] -> [Tag Text]
safeTags = (Text -> Bool)
-> (Attribute Text -> Maybe (Attribute Text))
-> [Tag Text]
-> [Tag Text]
safeTagsCustom Text -> Bool
safeTagName Attribute Text -> Maybe (Attribute Text)
sanitizeAttribute
safeTagsCustom ::
(Text -> Bool)
-> ((Text, Text) -> Maybe (Text, Text))
-> [Tag Text] -> [Tag Text]
safeTagsCustom :: (Text -> Bool)
-> (Attribute Text -> Maybe (Attribute Text))
-> [Tag Text]
-> [Tag Text]
safeTagsCustom _ _ [] = []
safeTagsCustom safeName :: Text -> Bool
safeName sanitizeAttr :: Attribute Text -> Maybe (Attribute Text)
sanitizeAttr (t :: Tag Text
t@(TagClose name :: Text
name):tags :: [Tag Text]
tags)
| Text -> Bool
safeName Text
name = Tag Text
t Tag Text -> [Tag Text] -> [Tag Text]
forall a. a -> [a] -> [a]
: (Text -> Bool)
-> (Attribute Text -> Maybe (Attribute Text))
-> [Tag Text]
-> [Tag Text]
safeTagsCustom Text -> Bool
safeName Attribute Text -> Maybe (Attribute Text)
sanitizeAttr [Tag Text]
tags
| Bool
otherwise = (Text -> Bool)
-> (Attribute Text -> Maybe (Attribute Text))
-> [Tag Text]
-> [Tag Text]
safeTagsCustom Text -> Bool
safeName Attribute Text -> Maybe (Attribute Text)
sanitizeAttr [Tag Text]
tags
safeTagsCustom safeName :: Text -> Bool
safeName sanitizeAttr :: Attribute Text -> Maybe (Attribute Text)
sanitizeAttr (TagOpen name :: Text
name attributes :: [Attribute Text]
attributes:tags :: [Tag Text]
tags)
| Text -> Bool
safeName Text
name = Text -> [Attribute Text] -> Tag Text
forall str. str -> [Attribute str] -> Tag str
TagOpen Text
name ((Attribute Text -> Maybe (Attribute Text))
-> [Attribute Text] -> [Attribute Text]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe Attribute Text -> Maybe (Attribute Text)
sanitizeAttr [Attribute Text]
attributes) Tag Text -> [Tag Text] -> [Tag Text]
forall a. a -> [a] -> [a]
:
(Text -> Bool)
-> (Attribute Text -> Maybe (Attribute Text))
-> [Tag Text]
-> [Tag Text]
safeTagsCustom Text -> Bool
safeName Attribute Text -> Maybe (Attribute Text)
sanitizeAttr [Tag Text]
tags
| Bool
otherwise = (Text -> Bool)
-> (Attribute Text -> Maybe (Attribute Text))
-> [Tag Text]
-> [Tag Text]
safeTagsCustom Text -> Bool
safeName Attribute Text -> Maybe (Attribute Text)
sanitizeAttr [Tag Text]
tags
safeTagsCustom n :: Text -> Bool
n a :: Attribute Text -> Maybe (Attribute Text)
a (t :: Tag Text
t:tags :: [Tag Text]
tags) = Tag Text
t Tag Text -> [Tag Text] -> [Tag Text]
forall a. a -> [a] -> [a]
: (Text -> Bool)
-> (Attribute Text -> Maybe (Attribute Text))
-> [Tag Text]
-> [Tag Text]
safeTagsCustom Text -> Bool
n Attribute Text -> Maybe (Attribute Text)
a [Tag Text]
tags
safeTagName :: Text -> Bool
safeTagName :: Text -> Bool
safeTagName tagname :: Text
tagname = Text
tagname Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
`member` Set Text
sanitaryTags
safeAttribute :: (Text, Text) -> Bool
safeAttribute :: Attribute Text -> Bool
safeAttribute (name :: Text
name, value :: Text
value) = Text
name Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
`member` Set Text
sanitaryAttributes Bool -> Bool -> Bool
&&
(Text
name Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
`notMember` Set Text
uri_attributes Bool -> Bool -> Bool
|| Text -> Bool
sanitaryURI Text
value)
sanitizeAttribute :: (Text, Text) -> Maybe (Text, Text)
sanitizeAttribute :: Attribute Text -> Maybe (Attribute Text)
sanitizeAttribute ("style", value :: Text
value) =
let css :: Text
css = Text -> Text
sanitizeCSS Text
value
in if Text -> Bool
T.null Text
css then Maybe (Attribute Text)
forall a. Maybe a
Nothing else Attribute Text -> Maybe (Attribute Text)
forall a. a -> Maybe a
Just ("style", Text
css)
sanitizeAttribute attr :: Attribute Text
attr | Attribute Text -> Bool
safeAttribute Attribute Text
attr = Attribute Text -> Maybe (Attribute Text)
forall a. a -> Maybe a
Just Attribute Text
attr
| Bool
otherwise = Maybe (Attribute Text)
forall a. Maybe a
Nothing
sanitaryURI :: Text -> Bool
sanitaryURI :: Text -> Bool
sanitaryURI u :: Text
u =
case String -> Maybe URI
parseURIReference (String -> String
escapeURI (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ Text -> String
T.unpack Text
u) of
Just p :: URI
p -> (String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (URI -> String
uriScheme URI
p)) Bool -> Bool -> Bool
||
(((Char -> Char) -> String -> String
forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toLower (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ String -> String
forall a. [a] -> [a]
init (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ URI -> String
uriScheme URI
p) String -> Set String -> Bool
forall a. Ord a => a -> Set a -> Bool
`member` Set String
safeURISchemes)
Nothing -> Bool
False
escapeURI :: String -> String
escapeURI :: String -> String
escapeURI = (Char -> Bool) -> String -> String
escapeURIString Char -> Bool
isAllowedInURI (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
encodeString
safeURISchemes :: Set String
safeURISchemes :: Set String
safeURISchemes = [String] -> Set String
forall a. Ord a => [a] -> Set a
fromList [String]
acceptable_protocols
sanitaryTags :: Set Text
sanitaryTags :: Set Text
sanitaryTags = [Text] -> Set Text
forall a. Ord a => [a] -> Set a
fromList ([Text]
acceptable_elements [Text] -> [Text] -> [Text]
forall a. [a] -> [a] -> [a]
++ [Text]
mathml_elements [Text] -> [Text] -> [Text]
forall a. [a] -> [a] -> [a]
++ [Text]
svg_elements)
Set Text -> Set Text -> Set Text
forall a. Ord a => Set a -> Set a -> Set a
\\ ([Text] -> Set Text
forall a. Ord a => [a] -> Set a
fromList [Text]
svg_allow_local_href)
sanitaryAttributes :: Set Text
sanitaryAttributes :: Set Text
sanitaryAttributes = [Text] -> Set Text
forall a. Ord a => [a] -> Set a
fromList ([Text]
allowed_html_uri_attributes [Text] -> [Text] -> [Text]
forall a. [a] -> [a] -> [a]
++ [Text]
acceptable_attributes [Text] -> [Text] -> [Text]
forall a. [a] -> [a] -> [a]
++ [Text]
mathml_attributes [Text] -> [Text] -> [Text]
forall a. [a] -> [a] -> [a]
++ [Text]
svg_attributes)
Set Text -> Set Text -> Set Text
forall a. Ord a => Set a -> Set a -> Set a
\\ ([Text] -> Set Text
forall a. Ord a => [a] -> Set a
fromList [Text]
svg_attr_val_allows_ref)
allowed_html_uri_attributes :: [Text]
allowed_html_uri_attributes :: [Text]
allowed_html_uri_attributes = ["href", "src", "cite", "action", "longdesc"]
uri_attributes :: Set Text
uri_attributes :: Set Text
uri_attributes = [Text] -> Set Text
forall a. Ord a => [a] -> Set a
fromList ([Text] -> Set Text) -> [Text] -> Set Text
forall a b. (a -> b) -> a -> b
$ [Text]
allowed_html_uri_attributes [Text] -> [Text] -> [Text]
forall a. [a] -> [a] -> [a]
++ ["xlink:href", "xml:base"]
acceptable_elements :: [Text]
acceptable_elements :: [Text]
acceptable_elements = ["a", "abbr", "acronym", "address", "area",
"article", "aside", "audio", "b", "big", "blockquote", "br", "button",
"canvas", "caption", "center", "cite", "code", "col", "colgroup",
"command", "datagrid", "datalist", "dd", "del", "details", "dfn",
"dialog", "dir", "div", "dl", "dt", "em", "event-source", "fieldset",
"figcaption", "figure", "footer", "font", "form", "header", "h1", "h2",
"h3", "h4", "h5", "h6", "hr", "i", "img", "input", "ins", "keygen",
"kbd", "label", "legend", "li", "m", "main", "map", "menu", "meter", "multicol",
"nav", "nextid", "ol", "output", "optgroup", "option", "p", "pre",
"progress", "q", "s", "samp", "section", "select", "small", "sound",
"source", "spacer", "span", "strike", "strong", "sub", "sup", "table",
"tbody", "td", "textarea", "time", "tfoot", "th", "thead", "tr", "tt",
"u", "ul", "var", "video"]
mathml_elements :: [Text]
mathml_elements :: [Text]
mathml_elements = ["maction", "math", "merror", "mfrac", "mi",
"mmultiscripts", "mn", "mo", "mover", "mpadded", "mphantom",
"mprescripts", "mroot", "mrow", "mspace", "msqrt", "mstyle", "msub",
"msubsup", "msup", "mtable", "mtd", "mtext", "mtr", "munder",
"munderover", "none"]
svg_elements :: [Text]
svg_elements :: [Text]
svg_elements = ["a", "animate", "animateColor", "animateMotion",
"animateTransform", "clipPath", "circle", "defs", "desc", "ellipse",
"font-face", "font-face-name", "font-face-src", "g", "glyph", "hkern",
"linearGradient", "line", "marker", "metadata", "missing-glyph",
"mpath", "path", "polygon", "polyline", "radialGradient", "rect",
"set", "stop", "svg", "switch", "text", "title", "tspan", "use"]
acceptable_attributes :: [Text]
acceptable_attributes :: [Text]
acceptable_attributes = ["abbr", "accept", "accept-charset", "accesskey",
"align", "alt", "autocomplete", "autofocus", "axis",
"background", "balance", "bgcolor", "bgproperties", "border",
"bordercolor", "bordercolordark", "bordercolorlight", "bottompadding",
"cellpadding", "cellspacing", "ch", "challenge", "char", "charoff",
"choff", "charset", "checked", "class", "clear", "color",
"cols", "colspan", "compact", "contenteditable", "controls", "coords",
"datafld", "datapagesize", "datasrc", "datetime", "default",
"delay", "dir", "disabled", "draggable", "dynsrc", "enctype", "end",
"face", "for", "form", "frame", "galleryimg", "gutter", "headers",
"height", "hidefocus", "hidden", "high", "hreflang", "hspace",
"icon", "id", "inputmode", "ismap", "keytype", "label", "leftspacing",
"lang", "list", "loop", "loopcount", "loopend",
"loopstart", "low", "lowsrc", "max", "maxlength", "media", "method",
"min", "multiple", "name", "nohref", "noshade", "nowrap", "open",
"optimum", "pattern", "ping", "point-size", "prompt", "pqg",
"radiogroup", "readonly", "rel", "repeat-max", "repeat-min",
"replace", "required", "rev", "rightspacing", "rows", "rowspan",
"rules", "scope", "selected", "shape", "size", "span", "start",
"step",
"style",
"summary", "suppress", "tabindex", "target",
"template", "title", "toppadding", "type", "unselectable", "usemap",
"urn", "valign", "value", "variable", "volume", "vspace", "vrml",
"width", "wrap", "xml:lang"]
acceptable_protocols :: [String]
acceptable_protocols :: [String]
acceptable_protocols = [ "ed2k", "ftp", "http", "https", "irc",
"mailto", "news", "gopher", "nntp", "telnet", "webcal",
"xmpp", "callto", "feed", "urn", "aim", "rsync", "tag",
"ssh", "sftp", "rtsp", "afs" ]
mathml_attributes :: [Text]
mathml_attributes :: [Text]
mathml_attributes = ["actiontype", "align", "columnalign", "columnalign",
"columnalign", "columnlines", "columnspacing", "columnspan", "depth",
"display", "displaystyle", "equalcolumns", "equalrows", "fence",
"fontstyle", "fontweight", "frame", "height", "linethickness", "lspace",
"mathbackground", "mathcolor", "mathvariant", "mathvariant", "maxsize",
"minsize", "other", "rowalign", "rowalign", "rowalign", "rowlines",
"rowspacing", "rowspan", "rspace", "scriptlevel", "selection",
"separator", "stretchy", "width", "width", "xlink:href", "xlink:show",
"xlink:type", "xmlns", "xmlns:xlink"]
svg_attributes :: [Text]
svg_attributes :: [Text]
svg_attributes = ["accent-height", "accumulate", "additive", "alphabetic",
"arabic-form", "ascent", "attributeName", "attributeType",
"baseProfile", "bbox", "begin", "by", "calcMode", "cap-height",
"class", "clip-path", "color", "color-rendering", "content", "cx",
"cy", "d", "dx", "dy", "descent", "display", "dur", "end", "fill",
"fill-opacity", "fill-rule", "font-family", "font-size",
"font-stretch", "font-style", "font-variant", "font-weight", "from",
"fx", "fy", "g1", "g2", "glyph-name", "gradientUnits", "hanging",
"height", "horiz-adv-x", "horiz-origin-x", "id", "ideographic", "k",
"keyPoints", "keySplines", "keyTimes", "lang", "marker-end",
"marker-mid", "marker-start", "markerHeight", "markerUnits",
"markerWidth", "mathematical", "max", "min", "name", "offset",
"opacity", "orient", "origin", "overline-position",
"overline-thickness", "panose-1", "path", "pathLength", "points",
"preserveAspectRatio", "r", "refX", "refY", "repeatCount",
"repeatDur", "requiredExtensions", "requiredFeatures", "restart",
"rotate", "rx", "ry", "slope", "stemh", "stemv", "stop-color",
"stop-opacity", "strikethrough-position", "strikethrough-thickness",
"stroke", "stroke-dasharray", "stroke-dashoffset", "stroke-linecap",
"stroke-linejoin", "stroke-miterlimit", "stroke-opacity",
"stroke-width", "systemLanguage", "target", "text-anchor", "to",
"transform", "type", "u1", "u2", "underline-position",
"underline-thickness", "unicode", "unicode-range", "units-per-em",
"values", "version", "viewBox", "visibility", "width", "widths", "x",
"x-height", "x1", "x2", "xlink:actuate", "xlink:arcrole",
"xlink:href", "xlink:role", "xlink:show", "xlink:title", "xlink:type",
"xml:base", "xml:lang", "xml:space", "xmlns", "xmlns:xlink", "y",
"y1", "y2", "zoomAndPan"]
svg_attr_val_allows_ref :: [Text]
svg_attr_val_allows_ref :: [Text]
svg_attr_val_allows_ref = ["clip-path", "color-profile", "cursor", "fill",
"filter", "marker", "marker-start", "marker-mid", "marker-end",
"mask", "stroke"]
svg_allow_local_href :: [Text]
svg_allow_local_href :: [Text]
svg_allow_local_href = ["altGlyph", "animate", "animateColor",
"animateMotion", "animateTransform", "cursor", "feImage", "filter",
"linearGradient", "pattern", "radialGradient", "textpath", "tref",
"set", "use"]