From 7e5e76b08e69db81ae703352f3e2070ff8966f44 Mon Sep 17 00:00:00 2001
From: pknowles <pknowles@users.noreply.github.com>
Date: Mon, 15 May 2023 00:04:55 -0700
Subject: [PATCH 1/5] extras: adds special_attribute

---
 lib/markdown2.py | 59 ++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 54 insertions(+), 5 deletions(-)
diff --git a/lib/markdown2.py b/lib/markdown2.py
index 79ffbc46..4b1b3a23 100755
--- a/lib/markdown2.py
+++ b/lib/markdown2.py
@@ -1418,6 +1418,17 @@ def _sanitize_html(self, s):
             (?P<id>.*?)
           \]
         ''', re.X | re.S)
+    _special_attribute = re.compile(r'''
+        [ \t]*
+        (
+          (?<=[ \t{]) # must-be-separated look-behind
+          \#[A-Za-z][-A-Za-z0-9_:.]* # html id
+          |
+          \.-?[_a-zA-Z]+[_a-zA-Z0-9-]* # css class
+          |
+          ([A-Za-z]+)=([A-Za-z0-9%.]+) # simple attribute
+        )
+        ''', re.X | re.S)
 
     _whitespace = re.compile(r'\s*')
 
@@ -1465,6 +1476,32 @@ def _extract_url_and_title(self, text, start):
             url = self._strip_anglebrackets.sub(r'\1', url)
         return url, title, end_idx
 
+    def _extract_special_attributes(self, text, start, allowlist=None):
+        """Extracts the url and (optional) title from the tail of a link"""
+        # text[start] equals the opening parenthesis
+        idx = self._find_non_whitespace(text, start+1)
+        if idx == len(text):
+            return {}, start
+        end_idx = idx
+        end_idx = self._find_balanced(text, end_idx, "{", "}")
+        result = {}
+        classes = []
+        for match in self._special_attribute.finditer(text, idx, end_idx):
+            if match.group(1)[0] == "#":
+                if allowlist is None or match.group(1) in allowlist:
+                    result["id"] = match.group(1)[0][1:]
+            elif match.group(1)[0] == ".":
+                if allowlist is None or match.group(1) in allowlist:
+                    classes += [match.group(1)[0][1:]]
+            elif match.group(2):
+                attribute = match.group(2)
+                value = match.group(3)
+                if allowlist is None or attribute in allowlist:
+                    result[attribute] = _xml_escape_attr(value)
+        if len(classes):
+            result["class"] = " ".join(classes)
+        return result, end_idx
+
     def _protect_url(self, url):
         '''
         Function that passes a URL through `_html_escape_url` to remove any nasty characters,
@@ -1568,6 +1605,10 @@ def _do_links(self, text):
                     is_img = start_idx > 0 and text[start_idx-1] == "!"
                     if is_img:
                         start_idx -= 1
+                        attributes = {}
+                        if 'special-attributes' in self.extras:
+                            allowlist = self.extras['special-attributes'].get('img') if isinstance(self.extras, dict) else None
+                            attributes, url_end_idx = self._extract_special_attributes(text, url_end_idx, allowlist)
 
                     # We've got to encode these to avoid conflicting
                     # with italics/bold.
@@ -1582,11 +1623,12 @@ def _do_links(self, text):
                         title_str = ''
                     if is_img:
                         img_class_str = self._html_class_str_from_tag("img")
-                        result = '<img src="%s" alt="%s"%s%s%s' \
+                        result = '<img src="%s" alt="%s"%s%s%s%s' \
                             % (self._protect_url(url),
                                _xml_escape_attr(link_text),
                                title_str,
                                img_class_str,
+                               "".join(' %s="%s"' % item for item in attributes.items()),
                                self.empty_element_suffix)
                         if "smarty-pants" in self.extras:
                             result = result.replace('"', self._escape_table['"'])
@@ -1616,10 +1658,16 @@ def _do_links(self, text):
             else:
                 match = self._tail_of_reference_link_re.match(text, p)
                 if match:
+                    consume_end = match.end()
+
                     # Handle a reference-style anchor or img.
                     is_img = start_idx > 0 and text[start_idx-1] == "!"
                     if is_img:
                         start_idx -= 1
+                        attributes = {}
+                        if 'special-attributes' in self.extras:
+                            allowlist = self.extras['special-attributes'].get('img') if isinstance(self.extras, dict) else None
+                            attributes, consume_end = self._extract_special_attributes(text, consume_end, allowlist)
                     link_id = match.group("id").lower()
                     if not link_id:
                         link_id = link_text.lower()  # for links like [this][]
@@ -1639,16 +1687,17 @@ def _do_links(self, text):
                             title_str = ''
                         if is_img:
                             img_class_str = self._html_class_str_from_tag("img")
-                            result = '<img src="%s" alt="%s"%s%s%s' \
+                            result = '<img src="%s" alt="%s"%s%s%s%s' \
                                 % (self._protect_url(url),
                                    _xml_escape_attr(link_text),
                                    title_str,
+                                   "".join(' %s="%s"' % item for item in attributes.items()),
                                    img_class_str,
                                    self.empty_element_suffix)
                             if "smarty-pants" in self.extras:
                                 result = result.replace('"', self._escape_table['"'])
                             curr_pos = start_idx + len(result)
-                            text = text[:start_idx] + result + text[match.end():]
+                            text = text[:start_idx] + result + text[consume_end:]
                         elif start_idx >= anchor_allowed_pos:
                             if self.safe_mode and not self._safe_protocols.match(url):
                                 result_head = '<a href="#"%s>' % (title_str)
@@ -1661,13 +1710,13 @@ def _do_links(self, text):
                             # anchor_allowed_pos on.
                             curr_pos = start_idx + len(result_head)
                             anchor_allowed_pos = start_idx + len(result)
-                            text = text[:start_idx] + result + text[match.end():]
+                            text = text[:start_idx] + result + text[consume_end:]
                         else:
                             # Anchor not allowed here.
                             curr_pos = start_idx + 1
                     else:
                         # This id isn't defined, leave the markup alone.
-                        curr_pos = match.end()
+                        curr_pos = consume_end
                     continue
 
             # Otherwise, it isn't markup.

From 1039949e654afd73fcc9e7359aa4751d6368b4d6 Mon Sep 17 00:00:00 2001
From: pknowles <pknowles@users.noreply.github.com>
Date: Mon, 22 May 2023 12:28:53 -0700
Subject: [PATCH 2/5] python2 and unicode

- isinstance() checks for both str and unicode objects
- yield from to for loop
- use .decode instead of str constructor
---
 lib/markdown2.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/lib/markdown2.py b/lib/markdown2.py
index 4b1b3a23..a9ea4654 100755
--- a/lib/markdown2.py
+++ b/lib/markdown2.py
@@ -305,7 +305,7 @@ def convert(self, text):
 
         if not isinstance(text, str):
             # TODO: perhaps shouldn't presume UTF-8 for string input?
-            text = str(text, 'utf-8')
+            text = text.decode('utf-8')
 
         if self.use_file_vars:
             # Look for emacs-style file variable hints.
@@ -741,7 +741,7 @@ def _detab(self, text):
     _html_markdown_attr_re = re.compile(
         r'''\s+markdown=("1"|'1')''')
     def _hash_html_block_sub(self, match, raw=False):
-        if isinstance(match, str):
+        if isinstance(match, (str, unicode)):
             html = match
         else:
             html = match.group(1)
@@ -1740,7 +1740,7 @@ def header_id_from_text(self, text, prefix, n):
             the TOC (if the "toc" extra is specified).
         """
         header_id = _slugify(text)
-        if prefix and isinstance(prefix, str):
+        if prefix and isinstance(prefix, (str, unicode)):
             header_id = prefix + '-' + header_id
 
         self._count_from_header_id[header_id] += 1
@@ -2001,7 +2001,8 @@ def _wrap_code(self, inner):
             def _add_newline(self, inner):
                 # Add newlines around the inner contents so that _strict_tag_block_re matches the outer div.
                 yield 0, "\n"
-                yield from inner
+                for i in inner:
+                    yield i
                 yield 0, "\n"
 
             def wrap(self, source, outfile=None):

From 76c663dbb2b93edc9b343d6ff4e429d327aaf269 Mon Sep 17 00:00:00 2001
From: pknowles <pknowles@users.noreply.github.com>
Date: Mon, 22 May 2023 14:03:08 -0700
Subject: [PATCH 3/5] add a more general url parser/sanitizer

copies a url regex from pagedown (used by stackoverflow)
---
 lib/markdown2.py | 42 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 39 insertions(+), 3 deletions(-)

diff --git a/lib/markdown2.py b/lib/markdown2.py
index a9ea4654..4d60e64a 100755
--- a/lib/markdown2.py
+++ b/lib/markdown2.py
@@ -1513,7 +1513,43 @@ def _protect_url(self, url):
         self._escape_table[url] = key
         return key
 
-    _safe_protocols = re.compile(r'(https?|ftp):', re.I)
+    # _safe_href is copied from pagedown's Markdown.Sanitizer.js
+    # Inlining the entire license as I don't have the time to add it properly for upstreaming
+    # From: https://github.com/StackExchange/pagedown/blob/master/LICENSE.txt
+    #
+    # A javascript port of Markdown, as used on Stack Overflow
+    # and the rest of Stack Exchange network.
+    #
+    # Largely based on showdown.js by John Fraser (Attacklab).
+    #
+    # Original Markdown Copyright (c) 2004-2005 John Gruber
+    #   <http://daringfireball.net/projects/markdown/>
+    #
+    #
+    # Original Showdown code copyright (c) 2007 John Fraser
+    #
+    # Modifications and bugfixes (c) 2009 Dana Robinson
+    # Modifications and bugfixes (c) 2009-2014 Stack Exchange Inc.
+    #
+    # Permission is hereby granted, free of charge, to any person obtaining a copy
+    # of this software and associated documentation files (the "Software"), to deal
+    # in the Software without restriction, including without limitation the rights
+    # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    # copies of the Software, and to permit persons to whom the Software is
+    # furnished to do so, subject to the following conditions:
+    #
+    # The above copyright notice and this permission notice shall be included in
+    # all copies or substantial portions of the Software.
+    #
+    # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+    # THE SOFTWARE.
+    _safe_href = re.compile(r'^((https?|ftp):\/\/|\/|\.|#)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)*[\]$]+$', re.I)
+
     def _do_links(self, text):
         """Turn Markdown link shortcuts into XHTML <a> and <img> tags.
 
@@ -1636,7 +1672,7 @@ def _do_links(self, text):
                         anchor_allowed_pos = start_idx + len(result)
                         text = text[:start_idx] + result + text[url_end_idx:]
                     elif start_idx >= anchor_allowed_pos:
-                        safe_link = self._safe_protocols.match(url) or url.startswith('#')
+                        safe_link = self._safe_href.match(url)
                         if self.safe_mode and not safe_link:
                             result_head = '<a href="#"%s>' % (title_str)
                         else:
@@ -1699,7 +1735,7 @@ def _do_links(self, text):
                             curr_pos = start_idx + len(result)
                             text = text[:start_idx] + result + text[consume_end:]
                         elif start_idx >= anchor_allowed_pos:
-                            if self.safe_mode and not self._safe_protocols.match(url):
+                            if self.safe_mode and not self._safe_href.match(url):
                                 result_head = '<a href="#"%s>' % (title_str)
                             else:
                                 result_head = '<a href="%s"%s>' % (self._protect_url(url), title_str)

From ad4ee4e54ceb03a8223213b31990c7104823e1f1 Mon Sep 17 00:00:00 2001
From: pknowles <pknowles@users.noreply.github.com>
Date: Mon, 22 May 2023 15:44:16 -0700
Subject: [PATCH 4/5] Reverts "Removed Python2 support."

This reverts commit 77d5275adc832b8952600d780ff8a5ad6d028c99.
---
 CONTRIBUTORS.txt               |  1 -
 TODO.txt                       |  1 +
 lib/markdown2.py               | 36 +++++++++++++++++++++-----
 perf/gen_perf_cases.py         | 22 ++++++++--------
 perf/perf.py                   | 16 ++++++------
 perf/util.py                   |  4 +--
 sandbox/wiki.py                |  2 +-
 test/markdown.py               | 32 +++++++++++------------
 test/test_markdown2.py         | 47 ++++++++++++++++++++++++++++------
 test/testall.py                |  2 +-
 tools/tables-align-columns.py  | 12 ++++-----
 tools/which.py                 | 10 ++++----
 tools/wiki-tables-to-tables.py |  2 +-
 13 files changed, 121 insertions(+), 66 deletions(-)

diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt
index 5e0980ab..09c1cb86 100644
--- a/CONTRIBUTORS.txt
+++ b/CONTRIBUTORS.txt
@@ -52,7 +52,6 @@ Maximilian Hils (github.com/mhils)
 BarkeH (github.com/BarkeH)
 cav71 (github.com/cav71)
 Crozzers (github.com/Crozzers)
-Bastian Venthur (https://github.com/venthur), removed Python2 support
 gitbra (github.com/gitbra)
 Łukasz Langa (github.com/ambv)
 Max Omdal (github.com/momja)
diff --git a/TODO.txt b/TODO.txt
index 9f8cbb9d..e6f880b2 100644
--- a/TODO.txt
+++ b/TODO.txt
@@ -1,3 +1,4 @@
+- py3: py2.4 test (broken?)
 - add "smarty-pants" extra to wiki
 - add "html-classes" extra to wiki
 - more on the "code-color" extra wiki page
diff --git a/lib/markdown2.py b/lib/markdown2.py
index 4d60e64a..09838c9d 100755
--- a/lib/markdown2.py
+++ b/lib/markdown2.py
@@ -112,6 +112,22 @@
 import codecs
 from collections import defaultdict
 
+
+# ---- Python version compat
+
+# Use `bytes` for byte strings and `unicode` for unicode strings (str in Py3).
+if sys.version_info[0] <= 2:
+    py3 = False
+    try:
+        bytes
+    except NameError:
+        bytes = str
+    base_string_type = basestring
+elif sys.version_info[0] >= 3:
+    py3 = True
+    unicode = str
+    base_string_type = str
+
 # ---- globals
 
 DEBUG = False
@@ -303,9 +319,9 @@ def convert(self, text):
         # articles):
         self.reset()
 
-        if not isinstance(text, str):
+        if not isinstance(text, unicode):
             # TODO: perhaps shouldn't presume UTF-8 for string input?
-            text = text.decode('utf-8')
+            text = unicode(text, 'utf-8')
 
         if self.use_file_vars:
             # Look for emacs-style file variable hints.
@@ -1776,7 +1792,7 @@ def header_id_from_text(self, text, prefix, n):
             the TOC (if the "toc" extra is specified).
         """
         header_id = _slugify(text)
-        if prefix and isinstance(prefix, (str, unicode)):
+        if prefix and isinstance(prefix, base_string_type):
             header_id = prefix + '-' + header_id
 
         self._count_from_header_id[header_id] += 1
@@ -2776,7 +2792,7 @@ def indent():
     return '\n'.join(lines) + '\n'
 
 
-class UnicodeWithAttrs(str):
+class UnicodeWithAttrs(unicode):
     """A subclass of unicode used for the return value of conversion to
     possibly attach some attributes. E.g. the "toc_html" attribute when
     the "toc" extra is used.
@@ -3151,7 +3167,11 @@ def main(argv=None):
             p.stdin.write(text.encode('utf-8'))
             p.stdin.close()
             perl_html = p.stdout.read().decode('utf-8')
-            sys.stdout.write(perl_html)
+            if py3:
+                sys.stdout.write(perl_html)
+            else:
+                sys.stdout.write(perl_html.encode(
+                    sys.stdout.encoding or "utf-8", 'xmlcharrefreplace'))
             print("==== markdown2.py ====")
         html = markdown(text,
             html4tags=opts.html4tags,
@@ -3159,7 +3179,11 @@ def main(argv=None):
             extras=extras, link_patterns=link_patterns,
             use_file_vars=opts.use_file_vars,
             cli=True)
-        sys.stdout.write(html)
+        if py3:
+            sys.stdout.write(html)
+        else:
+            sys.stdout.write(html.encode(
+                sys.stdout.encoding or "utf-8", 'xmlcharrefreplace'))
         if extras and "toc" in extras:
             log.debug("toc_html: " +
                 str(html.toc_html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace')))
diff --git a/perf/gen_perf_cases.py b/perf/gen_perf_cases.py
index 28b9e18c..2108f2a5 100755
--- a/perf/gen_perf_cases.py
+++ b/perf/gen_perf_cases.py
@@ -15,7 +15,7 @@
 def gen_aspn_cases(limit=0):
     base_dir = TMP+'aspn-cases'
     if exists(base_dir):
-        print("'%s' exists, skipping" % base_dir)
+        print "'%s' exists, skipping" % base_dir
         return 
     os.makedirs(base_dir)
     sys.stdout.write("generate %s" % base_dir); sys.stdout.flush()
@@ -48,10 +48,10 @@ def gen_aspn_cases(limit=0):
 def gen_test_cases():
     base_dir = TMP+"test-cases"
     if exists(base_dir):
-        print("'%s' exists, skipping" % base_dir)
+        print "'%s' exists, skipping" % base_dir
         return 
     os.makedirs(base_dir)
-    print("generate %s" % base_dir)
+    print "generate %s" % base_dir
     for test_cases_dir in glob(join("..", "test", "*-cases")):
         for text_file in glob(join(test_cases_dir, "*.text")):
             shutil.copy(text_file, join(base_dir, basename(text_file)))
@@ -134,7 +134,7 @@ def _markdown_from_aspn_html(html):
         try:
             idx = markdown.index(marker)
         except ValueError:
-            print("marker: %r" % marker)
+            print "marker: %r" % marker
             raise
         if not markdown[:idx].strip():
             #TODO: Correct this false diagnosis. Problem is not limited
@@ -191,8 +191,8 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False):
     """
     DEBUG = False
     if DEBUG: 
-        print("dedent: dedent(..., tabsize=%d, skip_first_line=%r)"\
-              % (tabsize, skip_first_line))
+        print "dedent: dedent(..., tabsize=%d, skip_first_line=%r)"\
+              % (tabsize, skip_first_line)
     indents = []
     margin = None
     for i, line in enumerate(lines):
@@ -209,12 +209,12 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False):
                 break
         else:
             continue # skip all-whitespace lines
-        if DEBUG: print("dedent: indent=%d: %r" % (indent, line))
+        if DEBUG: print "dedent: indent=%d: %r" % (indent, line)
         if margin is None:
             margin = indent
         else:
             margin = min(margin, indent)
-    if DEBUG: print("dedent: margin=%r" % margin)
+    if DEBUG: print "dedent: margin=%r" % margin
 
     if margin is not None and margin > 0:
         for i, line in enumerate(lines):
@@ -226,7 +226,7 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False):
                 elif ch == '\t':
                     removed += tabsize - (removed % tabsize)
                 elif ch in '\r\n':
-                    if DEBUG: print("dedent: %r: EOL -> strip up to EOL" % line)
+                    if DEBUG: print "dedent: %r: EOL -> strip up to EOL" % line
                     lines[i] = lines[i][j:]
                     break
                 else:
@@ -234,8 +234,8 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False):
                                      "line %r while removing %d-space margin"
                                      % (ch, line, margin))
                 if DEBUG:
-                    print("dedent: %r: %r -> removed %d/%d"\
-                          % (line, ch, removed, margin))
+                    print "dedent: %r: %r -> removed %d/%d"\
+                          % (line, ch, removed, margin)
                 if removed == margin:
                     lines[i] = lines[i][j+1:]
                     break
diff --git a/perf/perf.py b/perf/perf.py
index ad500d8e..b2b04a0a 100755
--- a/perf/perf.py
+++ b/perf/perf.py
@@ -44,7 +44,7 @@ def time_markdown_py(cases_dir, repeat):
                 pass
         end = clock()
         times.append(end - start)
-    print("  markdown.py: best of %d: %.3fs" % (repeat, min(times)))
+    print "  markdown.py: best of %d: %.3fs" % (repeat, min(times))
 
 @hotshotit
 def hotshot_markdown2_py(cases_dir, repeat):
@@ -65,7 +65,7 @@ def time_markdown2_py(cases_dir, repeat):
             markdowner.convert(content)
         end = clock()
         times.append(end - start)
-    print("  markdown2.py: best of %d: %.3fs" % (repeat, min(times)))
+    print "  markdown2.py: best of %d: %.3fs" % (repeat, min(times))
 
 def time_markdown_pl(cases_dir, repeat):
     times = []
@@ -74,7 +74,7 @@ def time_markdown_pl(cases_dir, repeat):
         os.system('perl time_markdown_pl.pl "%s"' % cases_dir)
         end = clock()
         times.append(end - start)
-    print("  Markdown.pl: best of %d: %.3fs" % (repeat, min(times)))
+    print "  Markdown.pl: best of %d: %.3fs" % (repeat, min(times))
 
 def time_all(cases_dir, repeat):
     time_markdown_pl(cases_dir, repeat=repeat)
@@ -130,10 +130,10 @@ def main(args=sys.argv):
         if timer_name not in d:
             raise ValueError("no '%s' timer function" % timer_name)
         timer = d[timer_name]
-        print("Profile conversion of %s (plat=%s):" \
-              % (os.path.join(cases_dir, "*.text"), sys.platform))
+        print "Profile conversion of %s (plat=%s):" \
+              % (os.path.join(cases_dir, "*.text"), sys.platform)
         timer(cases_dir, repeat=opts.repeat)
-        print()
+        print
         os.system("python show_stats.py %s.prof" % timer_name)
 
     else:
@@ -144,8 +144,8 @@ def main(args=sys.argv):
         if timer_name not in d:
             raise ValueError("no '%s' timer function" % timer_name)
         timer = d[timer_name]
-        print("Time conversion of %s (plat=%s):" \
-              % (os.path.join(cases_dir, "*.text"), sys.platform))
+        print "Time conversion of %s (plat=%s):" \
+              % (os.path.join(cases_dir, "*.text"), sys.platform)
         timer(cases_dir, repeat=opts.repeat)
     
 if __name__ == "__main__":
diff --git a/perf/util.py b/perf/util.py
index e32d0f8b..4b52e6f8 100644
--- a/perf/util.py
+++ b/perf/util.py
@@ -30,14 +30,14 @@ def wrapper(*args, **kw):
             return func(*args, **kw)
         finally:
             total_time = clock() - start_time
-            print("%s took %.3fs" % (func.__name__, total_time))
+            print "%s took %.3fs" % (func.func_name, total_time)
     return wrapper
 
 def hotshotit(func):
     def wrapper(*args, **kw):
         import hotshot
         global hotshotProfilers
-        prof_name = func.__name__+".prof"
+        prof_name = func.func_name+".prof"
         profiler = hotshotProfilers.get(prof_name)
         if profiler is None:
             profiler = hotshot.Profile(prof_name)
diff --git a/sandbox/wiki.py b/sandbox/wiki.py
index f270b636..ac27199a 100644
--- a/sandbox/wiki.py
+++ b/sandbox/wiki.py
@@ -18,4 +18,4 @@
 ]
 processor = markdown2.Markdown(extras=["link-patterns"],
                                link_patterns=link_patterns)
-print(processor.convert(wiki_page))
+print processor.convert(wiki_page)
diff --git a/test/markdown.py b/test/markdown.py
index e18336b1..c76f8d95 100644
--- a/test/markdown.py
+++ b/test/markdown.py
@@ -32,13 +32,13 @@
 import re, sys, codecs
 
 # Set debug level: 3 none, 2 critical, 1 informative, 0 all
-(VERBOSE, INFO, CRITICAL, NONE) = list(range(4))
+(VERBOSE, INFO, CRITICAL, NONE) = range(4)
 
 MESSAGE_THRESHOLD = CRITICAL
 
 def message(level, text) :
     if level >= MESSAGE_THRESHOLD :
-        print(text)
+        print text
 
 
 # --------------- CONSTANTS YOU MIGHT WANT TO MODIFY -----------------
@@ -48,9 +48,9 @@ def message(level, text) :
 SMART_EMPHASIS = 1        # this_or_that does not become this<i>or</i>that
 HTML_REMOVED_TEXT = "[HTML_REMOVED]" # text used instead of HTML in safe mode
 
-RTL_BIDI_RANGES = ( ('\u0590', '\u07FF'),
+RTL_BIDI_RANGES = ( (u'\u0590', u'\u07FF'),
                     # from Hebrew to Nko (includes Arabic, Syriac and Thaana)
-                    ('\u2D30', '\u2D7F'),
+                    (u'\u2D30', u'\u2D7F'),
                     # Tifinagh
                     )
 
@@ -62,9 +62,9 @@ def message(level, text) :
 # 0780-07BF - Thaana
 # 07C0-07FF - Nko
 
-BOMS = { 'utf-8' : (str(codecs.BOM_UTF8, "utf-8"), ),
-         'utf-16' : (str(codecs.BOM_UTF16_LE, "utf-16"),
-                     str(codecs.BOM_UTF16_BE, "utf-16")),
+BOMS = { 'utf-8' : (unicode(codecs.BOM_UTF8, "utf-8"), ),
+         'utf-16' : (unicode(codecs.BOM_UTF16_LE, "utf-16"),
+                     unicode(codecs.BOM_UTF16_BE, "utf-16")),
          #'utf-32' : (unicode(codecs.BOM_UTF32_LE, "utf-32"),
          #            unicode(codecs.BOM_UTF32_BE, "utf-32")),
          }
@@ -128,7 +128,7 @@ def getBidiType(text) :
 
     ch = text[0]
 
-    if not isinstance(ch, str) or not ch.isalpha():
+    if not isinstance(ch, unicode) or not ch.isalpha():
         return None
 
     else :
@@ -312,7 +312,7 @@ def toxml(self):
         if self.nodeName in ['p', 'li', 'ul', 'ol',
                              'h1', 'h2', 'h3', 'h4', 'h5', 'h6'] :
 
-            if "dir" not in self.attribute_values:
+            if not self.attribute_values.has_key("dir"):
                 if self.bidi :
                     bidi = self.bidi
                 else :
@@ -788,7 +788,7 @@ def handleMatch(self, m, doc):
             # we'll use "google" as the id
             id = m.group(2).lower()
 
-        if id not in self.references : # ignore undefined refs
+        if not self.references.has_key(id) : # ignore undefined refs
             return None
         href, title = self.references[id]
         text = m.group(2)
@@ -1127,7 +1127,7 @@ def registerExtensions(self, extensions, configs) :
                         % (ext, extension_module_name) )
             else :
 
-                if ext in configs :
+                if configs.has_key(ext) :
                     configs_for_ext = configs[ext]
                 else :
                     configs_for_ext = []
@@ -1489,7 +1489,7 @@ def _handleInlineWrapper (self, line) :
                 
                 x = parts[i]
 
-                if isinstance(x, str) :
+                if isinstance(x, (str, unicode)) :
                     result = self._applyPattern(x, pattern)
 
                     if result :
@@ -1502,7 +1502,7 @@ def _handleInlineWrapper (self, line) :
 
         for i in range(len(parts)) :
             x = parts[i]
-            if isinstance(x, str) :
+            if isinstance(x, (str, unicode)) :
                 parts[i] = self.doc.createTextNode(x)
 
         return parts
@@ -1577,7 +1577,7 @@ def _applyPattern(self, line, pattern) :
 
                             for item in result:
 
-                                if isinstance(item, str):
+                                if isinstance(item, (str, unicode)):
                                     if len(item) > 0:
                                         node.insertChild(position,
                                              self.doc.createTextNode(item))
@@ -1723,7 +1723,7 @@ def __init__(self, configs = {}) :
         self.config = configs
 
     def getConfig(self, key) :
-        if key in self.config :
+        if self.config.has_key(key) :
             return self.config[key][0]
         else :
             return ""
@@ -1757,7 +1757,7 @@ def parse_options() :
                     'encoding' : None }
 
         else :
-            print(OPTPARSE_WARNING)
+            print OPTPARSE_WARNING
             return None
 
     parser = optparse.OptionParser(usage="%prog INPUTFILE [options]")
diff --git a/test/test_markdown2.py b/test/test_markdown2.py
index cc2a80b2..003f7f4b 100755
--- a/test/test_markdown2.py
+++ b/test/test_markdown2.py
@@ -21,6 +21,26 @@
 finally:
     del sys.path[0]
 
+
+
+#---- Python version compat
+
+# Use `bytes` for byte strings and `unicode` for unicode strings (str in Py3).
+if sys.version_info[0] <= 2:
+    py3 = False
+    try:
+        bytes
+    except NameError:
+        bytes = str
+    base_string_type = basestring
+elif sys.version_info[0] >= 3:
+    py3 = True
+    unicode = str
+    base_string_type = str
+    unichr = chr
+
+
+
 #---- Test cases
 
 class _MarkdownTestCase(unittest.TestCase):
@@ -103,8 +123,12 @@ def _assertMarkdown(self, text, html, text_path=None, html_path=None,
         def charreprreplace(exc):
             if not isinstance(exc, UnicodeEncodeError):
                 raise TypeError("don't know how to handle %r" % exc)
-            obj_repr = repr(exc.object[exc.start:exc.end])[1:-1]
-            return (str(obj_repr), exc.end)
+            if py3:
+                obj_repr = repr(exc.object[exc.start:exc.end])[1:-1]
+            else:
+                # repr -> remote "u'" and "'"
+                obj_repr = repr(exc.object[exc.start:exc.end])[2:-1]
+            return (unicode(obj_repr), exc.end)
         codecs.register_error("charreprreplace", charreprreplace)
 
         self.assertEqual(python_norm_html, norm_html, errmsg)
@@ -258,7 +282,7 @@ def test_pre(self):
             '<p>some starter text</p>\n\n<pre><code>#!/usr/bin/python\nprint "hi"\n</code></pre>\n')
 
     def test_russian(self):
-        ko = '\\u043b\\u0449' # 'ko' on russian keyboard
+        ko = '\u043b\u0449' # 'ko' on russian keyboard
         self._assertMarkdown("## %s" % ko,
             '<h2>%s</h2>\n' % ko)
     test_russian.tags = ["unicode", "issue3"]
@@ -315,6 +339,13 @@ def test_api(self):
         test = doctest.DocFileTest("api.doctests")
         test.runTest()
 
+    # Don't bother on Python 3 because (a) there aren't many inline doctests,
+    # and (b) they are more to be didactic than comprehensive test suites.
+    if not py3:
+        def test_internal(self):
+            doctest.testmod(markdown2)
+
+
 
 #---- internal support stuff
 
@@ -322,9 +353,9 @@ def test_api(self):
 def _xml_escape_sub(match):
     escape = match.group(1)
     if escape[0] == 'x':
-        return chr(int('0'+escape, base=16))
+        return unichr(int('0'+escape, base=16))
     else:
-        return chr(int(escape))
+        return unichr(int(escape))
 
 _markdown_email_link_re = re.compile(r'<a href="(.*?&#.*?)">(.*?)</a>', re.U)
 def _markdown_email_link_sub(match):
@@ -341,7 +372,7 @@ def norm_html_from_html(html):
 
     Also normalize EOLs.
     """
-    if not isinstance(html, str):
+    if not isinstance(html, unicode):
         html = html.decode('utf-8')
     html = _markdown_email_link_re.sub(
         _markdown_email_link_sub, html)
@@ -352,7 +383,7 @@ def norm_html_from_html(html):
 
 def _display(s):
     """Markup the given string for useful display."""
-    if not isinstance(s, str):
+    if not isinstance(s, unicode):
         s = s.decode("utf-8")
     s = _indent(_escaped_text_from_text(s, "whitespace"), 4)
     if not s.endswith('\n'):
@@ -499,7 +530,7 @@ def _escaped_text_from_text(text, escapes="eol"):
     # - Add _escaped_html_from_text() with a similar call sig.
     import re
 
-    if isinstance(escapes, str):
+    if isinstance(escapes, base_string_type):
         if escapes == "eol":
             escapes = {'\r\n': "\\r\\n\r\n", '\n': "\\n\n", '\r': "\\r\r"}
         elif escapes == "whitespace":
diff --git a/test/testall.py b/test/testall.py
index e26856ed..b236cbbc 100644
--- a/test/testall.py
+++ b/test/testall.py
@@ -3,7 +3,7 @@
 # Run the test suite against all the Python versions we can find.
 #
 
-
+from __future__ import print_function
 
 import sys
 import os
diff --git a/tools/tables-align-columns.py b/tools/tables-align-columns.py
index f3535928..64e230e5 100755
--- a/tools/tables-align-columns.py
+++ b/tools/tables-align-columns.py
@@ -8,7 +8,7 @@
 - Can't handle tables where cells have a pipe.
 """
 
-
+from __future__ import print_function
 
 __version__ = "1.0.0"
 
@@ -87,17 +87,17 @@ def _table_sub(match):
             width = width_from_col_idx[col_idx]
             align = align_from_col_idx[col_idx]
             if align == 'center':
-                underline.append(':' + '-'*(width-2) + ':')
+                underline.append(':' + u'-'*(width-2) + ':')
             elif align == 'right':
-                underline.append('-'*(width-1) + ':')
+                underline.append(u'-'*(width-1) + ':')
             elif align == 'left':
-                underline.append(':' + '-'*(width-1))
+                underline.append(':' + u'-'*(width-1))
             else:
-                underline.append('-'*width)
+                underline.append(u'-'*width)
         table[1:1] = [underline]
         #e(pformat(table, width=200))
 
-        table_str = '\n'.join(('| ' + ' | '.join(r) + ' |') for r in table)
+        table_str = u'\n'.join(('| ' + u' | '.join(r) + ' |') for r in table)
         return table_str + '\n'
 
     text = codecs.open(path, 'rb', 'utf8').read()
diff --git a/tools/which.py b/tools/which.py
index eeb6b148..83e431b9 100755
--- a/tools/which.py
+++ b/tools/which.py
@@ -33,7 +33,7 @@
     from HKLM\SOFTWARE\...\perl.exe
 """
 
-
+from __future__ import print_function
 
 _cmdlnUsage = """
     Show the full path of commands.
@@ -93,13 +93,13 @@ def _getRegisteredExecutable(exeName):
     if sys.platform.startswith('win'):
         if os.path.splitext(exeName)[1].lower() != '.exe':
             exeName += '.exe'
-        import winreg
+        import _winreg
         try:
             key = "SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\App Paths\\" +\
                   exeName
-            value = winreg.QueryValue(winreg.HKEY_LOCAL_MACHINE, key)
+            value = _winreg.QueryValue(_winreg.HKEY_LOCAL_MACHINE, key)
             registered = (value, "from HKLM\\"+key)
-        except winreg.error:
+        except _winreg.error:
             pass
         if registered and not os.path.exists(registered[0]):
             registered = None
@@ -252,7 +252,7 @@ def which(command, path=None, verbose=0, exts=None):
     If no match is found for the command, a WhichError is raised.
     """
     try:
-        match = next(whichgen(command, path, verbose, exts))
+        match = whichgen(command, path, verbose, exts).next()
     except StopIteration:
         raise WhichError("Could not find '%s' on the path." % command)
     return match
diff --git a/tools/wiki-tables-to-tables.py b/tools/wiki-tables-to-tables.py
index 8c3b3fb0..b70e20c7 100755
--- a/tools/wiki-tables-to-tables.py
+++ b/tools/wiki-tables-to-tables.py
@@ -29,7 +29,7 @@
   mode... and the only supported mode for now.
 """
 
-
+from __future__ import print_function
 
 __version__ = "1.0.0"
 

From f9d6611496f1e23e3d463a4653142059d5768878 Mon Sep 17 00:00:00 2001
From: pknowles <pknowles@users.noreply.github.com>
Date: Mon, 22 May 2023 17:31:32 -0700
Subject: [PATCH 5/5] fix special_attributes consuming all text in a paragraph

---
 lib/markdown2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/markdown2.py b/lib/markdown2.py
index 09838c9d..1a64d5b9 100755
--- a/lib/markdown2.py
+++ b/lib/markdown2.py
@@ -1496,7 +1496,7 @@ def _extract_special_attributes(self, text, start, allowlist=None):
         """Extracts the url and (optional) title from the tail of a link"""
         # text[start] equals the opening parenthesis
         idx = self._find_non_whitespace(text, start+1)
-        if idx == len(text):
+        if idx == len(text) or text[start] != "{":
             return {}, start
         end_idx = idx
         end_idx = self._find_balanced(text, end_idx, "{", "}")