diff options
author | Marko Kreen | 2010-11-25 07:24:25 +0000 |
---|---|---|
committer | Marko Kreen | 2010-11-25 07:24:25 +0000 |
commit | 2d169f74668a0bd6e3019971235ba7f9e9f325f2 (patch) | |
tree | 1d0f0f5d3ed83a0437f175cf1a235b00d5ca2e8d /python/skytools/parsing.py | |
parent | c100f7f02c0bbbdea587927d36bf208921605aec (diff) |
skytools.sql_tokenizer: support fully-qualified names
It's easier to merge name parts here than to do it in upper layers.
The merging does not support some crazier parts of sql syntax
like comments between name parts, so it defaults to off.
Diffstat (limited to 'python/skytools/parsing.py')
-rw-r--r-- | python/skytools/parsing.py | 42 |
1 files changed, 32 insertions, 10 deletions
diff --git a/python/skytools/parsing.py b/python/skytools/parsing.py index bdcd1e38..0545116b 100644 --- a/python/skytools/parsing.py +++ b/python/skytools/parsing.py @@ -220,9 +220,13 @@ def parse_tabbed_table(txt): _extstr = r""" ['] (?: [^'\\]+ | \\. | [']['] )* ['] """ _stdstr = r""" ['] (?: [^']+ | [']['] )* ['] """ +_name = r""" (?: [a-z][a-z0-9_$]* | " (?: [^"]+ | "" )* " ) """ + +_ident = r""" (?P<ident> %s ) """ % _name +_fqident = r""" (?P<ident> %s (?: \. %s )? ) """ % (_name, _name) + _base_sql = r""" - (?P<ident> [a-z][a-z0-9_$]* | ["] (?: [^"]+ | ["]["] )* ["] ) - | (?P<dolq> (?P<dname> [$] (?: [_a-z][_a-z0-9]*)? [$] ) + (?P<dolq> (?P<dname> [$] (?: [_a-z][_a-z0-9]*)? [$] ) .*? (?P=dname) ) | (?P<num> [0-9][0-9.e]* ) @@ -232,11 +236,18 @@ _base_sql = r""" | (?P<ws> (?: \s+ | [/][*] .*? [*][/] | [-][-][^\n]* )+ ) | (?P<error> ['"$\\] ) | (?P<sym> . )""" -_std_sql = r"""(?: (?P<str> [E] %s | %s ) | %s )""" % (_extstr, _stdstr, _base_sql) -_ext_sql = r"""(?: (?P<str> [E]? %s ) | %s )""" % (_extstr, _base_sql) + +_base_sql_fq = r"%s | %s" % (_fqident, _base_sql) +_base_sql = r"%s | %s" % (_ident, _base_sql) + +_std_sql = r"""(?: (?P<str> [E] %s | %s ) | %s )""" % (_extstr, _stdstr, _base_sql) +_std_sql_fq = r"""(?: (?P<str> [E] %s | %s ) | %s )""" % (_extstr, _stdstr, _base_sql_fq) +_ext_sql = r"""(?: (?P<str> [E]? %s ) | %s )""" % (_extstr, _base_sql) +_ext_sql_fq = r"""(?: (?P<str> [E]? %s ) | %s )""" % (_extstr, _base_sql_fq) _std_sql_rc = _ext_sql_rc = None +_std_sql_fq_rc = _ext_sql_fq_rc = None -def sql_tokenizer(sql, standard_quoting = False, ignore_whitespace = False): +def sql_tokenizer(sql, standard_quoting = False, ignore_whitespace = False, fqident = False): r"""Parser SQL to tokens. Iterator, returns (toktype, tokstr) tuples. @@ -246,16 +257,26 @@ def sql_tokenizer(sql, standard_quoting = False, ignore_whitespace = False): [('ident', 'select'), ('sym', '*'), ('ident', 'from'), ('ident', 'a'), ('sym', '.'), ('ident', 'b')] >>> [x for x in sql_tokenizer("\"c olumn\",'str''val'")] [('ident', '"c olumn"'), ('sym', ','), ('str', "'str''val'")] + >>> list(sql_tokenizer('a.b a."b "" c" a.1', fqident=True, ignore_whitespace=True)) + [('ident', 'a.b'), ('ident', 'a."b "" c"'), ('ident', 'a'), ('sym', '.'), ('num', '1')] """ - global _std_sql_rc, _ext_sql_rc + global _std_sql_rc, _ext_sql_rc, _std_sql_fq_rc, _ext_sql_fq_rc if not _std_sql_rc: _std_sql_rc = re.compile(_std_sql, re.X | re.I | re.S) _ext_sql_rc = re.compile(_ext_sql, re.X | re.I | re.S) + _std_sql_fq_rc = re.compile(_std_sql_fq, re.X | re.I | re.S) + _ext_sql_fq_rc = re.compile(_ext_sql_fq, re.X | re.I | re.S) if standard_quoting: - rc = _std_sql_rc + if fqident: + rc = _std_sql_fq_rc + else: + rc = _std_sql_rc else: - rc = _ext_sql_rc + if fqident: + rc = _ext_sql_fq_rc + else: + rc = _ext_sql_rc pos = 0 while 1: @@ -264,8 +285,9 @@ def sql_tokenizer(sql, standard_quoting = False, ignore_whitespace = False): break pos = m.end() typ = m.lastgroup - if not ignore_whitespace or typ != "ws": - yield (m.lastgroup, m.group()) + if ignore_whitespace and typ == "ws": + continue + yield (typ, m.group()) _copy_from_stdin_re = "copy.*from\s+stdin" _copy_from_stdin_rc = None |