1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
|
import markdown
from bleach.sanitizer import Cleaner
from bleach.html5lib_shim import Filter
# Tags and attributes generated by markdown (anything that's not
# generated by markdown is clearly manually added html)
# This list is from the bleach-allowlist module, but adding a dependency
# on it just to get two arrays seems silly.
_markdown_tags = [
"h1", "h2", "h3", "h4", "h5", "h6",
"b", "i", "strong", "em", "tt",
"p", "br",
"span", "div", "blockquote", "code", "pre", "hr",
"ul", "ol", "li", "dd", "dt",
# "img", # img is optional in our markdown validation
"a",
"sub", "sup",
]
_markdown_attrs = {
"*": ["id"],
"img": ["src", "alt", "title"],
"a": ["href", "alt", "title"],
}
# Prevent relative links, by simply removing any href tag that does not have
# a : in it.
class RelativeLinkFilter(Filter):
def __iter__(self):
for token in Filter.__iter__(self):
if token['type'] in ['StartTag', 'EmptyTag'] and token['data']:
if (None, 'href') in token['data']:
# This means a href attribute with no namespace
if ':' not in token['data'][(None, 'href')]:
# Relative link!
del token['data'][(None, 'href')]
yield token
def pgmarkdown(value, allow_images=False, allow_relative_links=False):
tags = _markdown_tags
filters = []
if allow_images:
tags.append('img')
if not allow_relative_links:
filters.append(RelativeLinkFilter)
cleaner = Cleaner(tags=tags, attributes=_markdown_attrs, filters=filters)
return cleaner.clean(markdown.markdown(value))
|