Browse code
Initial version
fiddlerwoaroof authored on 14/08/2017 06:39:16
Showing 4 changed files
Showing 4 changed files
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,16 @@ |
1 |
+;;; -*- Mode:Lisp; Syntax:ANSI-Common-Lisp; Package: ASDF-USER -*- |
|
2 |
+(in-package :asdf-user) |
|
3 |
+ |
|
4 |
+(defsystem :html-sanitizer |
|
5 |
+ :description "" |
|
6 |
+ :author "Ed L <edward@elangley.org>" |
|
7 |
+ :license "MIT" |
|
8 |
+ :depends-on (:alexandria |
|
9 |
+ :uiop |
|
10 |
+ :serapeum |
|
11 |
+ :plump |
|
12 |
+ :lquery |
|
13 |
+ :fiveam) |
|
14 |
+ :serial t |
|
15 |
+ :components ((:file "package") |
|
16 |
+ (:file "html-sanitizer"))) |
2 | 19 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,109 @@ |
1 |
+(in-package :html-sanitizer) |
|
2 |
+ |
|
3 |
+(defparameter *serialization-mode* :xml) |
|
4 |
+(defparameter +plump-dont-self-close-tags+ '("span" "div" "iframe" "script")) ; insert more tags if needed |
|
5 |
+ |
|
6 |
+(defmethod plump:serialize-object :around ((node plump:element)) |
|
7 |
+ (let ((tag-name (plump:tag-name node))) |
|
8 |
+ (if (and (eq *serialization-mode* :html) |
|
9 |
+ (= 0 (length (plump:children node))) |
|
10 |
+ (member tag-name +plump-dont-self-close-tags+ :test #'string-equal)) |
|
11 |
+ (progn |
|
12 |
+ (format plump:*stream* "<~A" tag-name) |
|
13 |
+ (plump:serialize (plump:attributes node) plump:*stream*) |
|
14 |
+ (format plump:*stream* "></~A>" tag-name)) |
|
15 |
+ (call-next-method node)))) |
|
16 |
+ |
|
17 |
+(defparameter +safe-tags+ |
|
18 |
+ (list "a" "abbr" "acronym" "address" "area" "article" "aside" |
|
19 |
+ "audio" "b" "bdi" "bdo" "big" "blink" "blockquote" "body" "br" |
|
20 |
+ "caption" "center" "cite" "code" "col" "colgroup" "content" |
|
21 |
+ "data" "datalist" "dd" "decorator" "del" "details" "dfn" "dir" |
|
22 |
+ "div" "dl" "dt" "element" "em" "fieldset" "figcaption" |
|
23 |
+ "figure" "font" "footer" "form" "h1" "h2" "h3" "h4" "h5" "h6" |
|
24 |
+ "head" "header" "hgroup" "hr" "html" "i" "img" "ins" "kbd" |
|
25 |
+ "label" "legend" "li" "main" "map" "mark" "marquee" "menu" |
|
26 |
+ "menuitem" "meter" "nav" "nobr" "ol" "optgroup" "option" |
|
27 |
+ "output" "p" "pre" "progress" "q" "rp" "rt" "ruby" "s" "samp" |
|
28 |
+ "section" "select" "shadow" "small" "source" "spacer" "span" |
|
29 |
+ "strike" "strong" "sub" "summary" "sup" "table" "tbody" "td" |
|
30 |
+ "template" "textarea" "tfoot" "th" "thead" "time" "tr" "track" |
|
31 |
+ "tt" "u" "ul" "var" "video" "wbr")) |
|
32 |
+ |
|
33 |
+ |
|
34 |
+(defparameter +safe-attrs+ |
|
35 |
+ (list "accept" "action" "align" "alt" "autocomplete" "background" |
|
36 |
+ "bgcolor" "border" "cellpadding" "cellspacing" "checked" "cite" |
|
37 |
+ "class" "clear" "color" "cols" "colspan" "coords" "datetime" |
|
38 |
+ "default" "dir" "disabled" "download" "enctype" "face" "for" |
|
39 |
+ "headers" "height" "hidden" "high" "href" "hreflang" "id" |
|
40 |
+ "ismap" "label" "lang" "list" "loop" "low" "max" "maxlength" |
|
41 |
+ "media" "method" "min" "multiple" "name" "noshade" "novalidate" |
|
42 |
+ "nowrap" "open" "optimum" "pattern" "placeholder" "poster" |
|
43 |
+ "preload" "pubdate" "radiogroup" "readonly" "rel" "required" |
|
44 |
+ "rev" "reversed" "role" "rows" "rowspan" "spellcheck" "scope" |
|
45 |
+ "selected" "shape" "size" "span" "srclang" "start" "src" "step" |
|
46 |
+ "summary" "tabindex" "title" "type" "usemap" "valign" "value" |
|
47 |
+ "width" "xmlns")) |
|
48 |
+ |
|
49 |
+(defparameter *comment-mode* :strip |
|
50 |
+ ;;TODO: strip-conditional to only strip conditional |
|
51 |
+ ;; comments (e.g. <!--[if ...]>...<![endif]-->) |
|
52 |
+ ) |
|
53 |
+ |
|
54 |
+(defgeneric sanitize-node (node) |
|
55 |
+ (:documentation "") |
|
56 |
+ (:method (node) |
|
57 |
+ node) |
|
58 |
+ |
|
59 |
+ (:method ((node plump:comment)) |
|
60 |
+ (plump:remove-child node)) |
|
61 |
+ |
|
62 |
+ (:method :after ((node plump:element)) |
|
63 |
+ (map nil #'sanitize-node |
|
64 |
+ (plump:children node))) |
|
65 |
+ |
|
66 |
+ (:method ((node plump:element)) |
|
67 |
+ (if (member (plump:tag-name node) |
|
68 |
+ +safe-tags+ |
|
69 |
+ :test 'equalp) |
|
70 |
+ (progn |
|
71 |
+ (loop for attr being the hash-keys in (plump:attributes node) |
|
72 |
+ unless (member attr +safe-attrs+ :test 'equalp) do |
|
73 |
+ (plump:remove-attribute node attr)) |
|
74 |
+ node) |
|
75 |
+ (plump:remove-child node)))) |
|
76 |
+ |
|
77 |
+(defun sanitize (html) |
|
78 |
+ (let ((result (plump:parse html)) |
|
79 |
+ (*serialization-mode* :html)) |
|
80 |
+ (map nil |
|
81 |
+ #'sanitize-node |
|
82 |
+ (plump:children result)) |
|
83 |
+ (plump:serialize result nil))) |
|
84 |
+ |
|
85 |
+(defpackage :html-sanitizer.test |
|
86 |
+ (:use :cl :fiveam)) |
|
87 |
+(in-package :html-sanitizer.test) |
|
88 |
+ |
|
89 |
+(eval-when (:compile-toplevel :load-toplevel :execute) |
|
90 |
+ (import 'html-sanitizer::sanitize)) |
|
91 |
+ |
|
92 |
+(def-suite :html-sanitizer) |
|
93 |
+(in-suite :html-sanitizer) |
|
94 |
+ |
|
95 |
+(test removes-script-tags |
|
96 |
+ (is (equal "<div></div>" |
|
97 |
+ (sanitize "<div><script></script></div>")))) |
|
98 |
+ |
|
99 |
+(test removes-style-tags |
|
100 |
+ (is (equal "<div></div>" |
|
101 |
+ (sanitize "<div><style></style></div>")))) |
|
102 |
+ |
|
103 |
+(test removes-style-attrs |
|
104 |
+ (is (equal "<div></div>" |
|
105 |
+ (sanitize "<div style=\"a: 1\"></div>")))) |
|
106 |
+ |
|
107 |
+(test removes-comments |
|
108 |
+ (is (equal "" |
|
109 |
+ (sanitize "<!-- -->")))) |