summaryrefslogtreecommitdiff
path: root/python/skytools/parsing.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/skytools/parsing.py')
-rw-r--r--python/skytools/parsing.py42
1 files changed, 32 insertions, 10 deletions
diff --git a/python/skytools/parsing.py b/python/skytools/parsing.py
index bdcd1e38..0545116b 100644
--- a/python/skytools/parsing.py
+++ b/python/skytools/parsing.py
@@ -220,9 +220,13 @@ def parse_tabbed_table(txt):
_extstr = r""" ['] (?: [^'\\]+ | \\. | [']['] )* ['] """
_stdstr = r""" ['] (?: [^']+ | [']['] )* ['] """
+_name = r""" (?: [a-z][a-z0-9_$]* | " (?: [^"]+ | "" )* " ) """
+
+_ident = r""" (?P<ident> %s ) """ % _name
+_fqident = r""" (?P<ident> %s (?: \. %s )? ) """ % (_name, _name)
+
_base_sql = r"""
- (?P<ident> [a-z][a-z0-9_$]* | ["] (?: [^"]+ | ["]["] )* ["] )
- | (?P<dolq> (?P<dname> [$] (?: [_a-z][_a-z0-9]*)? [$] )
+ (?P<dolq> (?P<dname> [$] (?: [_a-z][_a-z0-9]*)? [$] )
.*?
(?P=dname) )
| (?P<num> [0-9][0-9.e]* )
@@ -232,11 +236,18 @@ _base_sql = r"""
| (?P<ws> (?: \s+ | [/][*] .*? [*][/] | [-][-][^\n]* )+ )
| (?P<error> ['"$\\] )
| (?P<sym> . )"""
-_std_sql = r"""(?: (?P<str> [E] %s | %s ) | %s )""" % (_extstr, _stdstr, _base_sql)
-_ext_sql = r"""(?: (?P<str> [E]? %s ) | %s )""" % (_extstr, _base_sql)
+
+_base_sql_fq = r"%s | %s" % (_fqident, _base_sql)
+_base_sql = r"%s | %s" % (_ident, _base_sql)
+
+_std_sql = r"""(?: (?P<str> [E] %s | %s ) | %s )""" % (_extstr, _stdstr, _base_sql)
+_std_sql_fq = r"""(?: (?P<str> [E] %s | %s ) | %s )""" % (_extstr, _stdstr, _base_sql_fq)
+_ext_sql = r"""(?: (?P<str> [E]? %s ) | %s )""" % (_extstr, _base_sql)
+_ext_sql_fq = r"""(?: (?P<str> [E]? %s ) | %s )""" % (_extstr, _base_sql_fq)
_std_sql_rc = _ext_sql_rc = None
+_std_sql_fq_rc = _ext_sql_fq_rc = None
-def sql_tokenizer(sql, standard_quoting = False, ignore_whitespace = False):
+def sql_tokenizer(sql, standard_quoting = False, ignore_whitespace = False, fqident = False):
r"""Parser SQL to tokens.
Iterator, returns (toktype, tokstr) tuples.
@@ -246,16 +257,26 @@ def sql_tokenizer(sql, standard_quoting = False, ignore_whitespace = False):
[('ident', 'select'), ('sym', '*'), ('ident', 'from'), ('ident', 'a'), ('sym', '.'), ('ident', 'b')]
>>> [x for x in sql_tokenizer("\"c olumn\",'str''val'")]
[('ident', '"c olumn"'), ('sym', ','), ('str', "'str''val'")]
+ >>> list(sql_tokenizer('a.b a."b "" c" a.1', fqident=True, ignore_whitespace=True))
+ [('ident', 'a.b'), ('ident', 'a."b "" c"'), ('ident', 'a'), ('sym', '.'), ('num', '1')]
"""
- global _std_sql_rc, _ext_sql_rc
+ global _std_sql_rc, _ext_sql_rc, _std_sql_fq_rc, _ext_sql_fq_rc
if not _std_sql_rc:
_std_sql_rc = re.compile(_std_sql, re.X | re.I | re.S)
_ext_sql_rc = re.compile(_ext_sql, re.X | re.I | re.S)
+ _std_sql_fq_rc = re.compile(_std_sql_fq, re.X | re.I | re.S)
+ _ext_sql_fq_rc = re.compile(_ext_sql_fq, re.X | re.I | re.S)
if standard_quoting:
- rc = _std_sql_rc
+ if fqident:
+ rc = _std_sql_fq_rc
+ else:
+ rc = _std_sql_rc
else:
- rc = _ext_sql_rc
+ if fqident:
+ rc = _ext_sql_fq_rc
+ else:
+ rc = _ext_sql_rc
pos = 0
while 1:
@@ -264,8 +285,9 @@ def sql_tokenizer(sql, standard_quoting = False, ignore_whitespace = False):
break
pos = m.end()
typ = m.lastgroup
- if not ignore_whitespace or typ != "ws":
- yield (m.lastgroup, m.group())
+ if ignore_whitespace and typ == "ws":
+ continue
+ yield (typ, m.group())
_copy_from_stdin_re = "copy.*from\s+stdin"
_copy_from_stdin_rc = None