--- /dev/null
+#include "postgres.h"
+
+#include "utils/builtins.h"
+#include "utils/pg_locale.h"
+#include "mb/pg_wchar.h"
+
+#include "deflex.h"
+#include "parser.h"
+#include "ts_locale.h"
+
+
+static TParserPosition*
+newTParserPosition(TParserPosition *prev) {
+ TParserPosition *res = (TParserPosition*)palloc(sizeof(TParserPosition));
+
+ if ( prev )
+ memcpy(res, prev, sizeof(TParserPosition));
+ else
+ memset(res, 0, sizeof(TParserPosition));
+
+ res->prev = prev;
+
+ res->pushedAtAction = NULL;
+
+ return res;
+}
+
+TParser*
+TParserInit( char *str, int len ) {
+ TParser *prs = (TParser*)palloc0( sizeof(TParser) );
+
+ prs->charmaxlen = pg_database_encoding_max_length();
+ prs->str = str;
+ prs->lenstr = len;
+
+#ifdef TS_USE_WIDE
+ /*
+ * Use wide char code only when max encoding length > 1 and ctype != C.
+ * Some operating systems fail with multi-byte encodings and a C locale.
+ * Also, for a C locale there is no need to process as multibyte.
+ * From backend/utils/adt/oracle_compat.c Teodor
+ */
+
+ if ( prs->charmaxlen > 1 && !lc_ctype_is_c() ) {
+ prs->usewide=true;
+ prs->wstr = (wchar_t*)palloc( sizeof(wchar_t) * prs->lenstr );
+ prs->lenwstr = char2wchar( prs->wstr, prs->str, prs->lenstr );
+ } else
+#endif
+ prs->usewide=false;
+
+ prs->state = newTParserPosition(NULL);
+ prs->state->state = TPS_Base;
+
+ return prs;
+}
+
+void
+TParserClose( TParser* prs ) {
+ while( prs->state ) {
+ TParserPosition *ptr = prs->state->prev;
+ pfree( prs->state );
+ prs->state = ptr;
+ }
+
+ if ( prs->wstr )
+ pfree( prs->wstr );
+ pfree( prs );
+}
+
+/*
+ * defining support function, equvalent is* macroses, but
+ * working with any possible encodings and locales
+ */
+
+#ifdef TS_USE_WIDE
+
+#define p_iswhat(type) \
+static int \
+p_is##type(TParser *prs) { \
+ Assert( prs->state ); \
+ return ( ( prs->usewide ) ? isw##type( (wint_t)*( prs->wstr + prs->state->poschar ) ) : \
+ is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ) ); \
+} \
+ \
+static int \
+p_isnot##type(TParser *prs) { \
+ return !p_is##type(prs); \
+}
+
+
+
+/* p_iseq should be used only for ascii symbols */
+
+static int
+p_iseq(TParser *prs, char c) {
+ Assert( prs->state );
+ return ( ( prs->state->charlen==1 && *( prs->str + prs->state->posbyte ) == c ) ) ? 1 : 0;
+}
+
+#else /* TS_USE_WIDE */
+
+#define p_iswhat(type) \
+static int \
+p_is##type(TParser *prs) { \
+ Assert( prs->state ); \
+ return is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ) ); \
+} \
+ \
+static int \
+p_isnot##type(TParser *prs) { \
+ return !p_is##type(prs); \
+}
+
+
+static int
+p_iseq(TParser *prs, char c) {
+ Assert( prs->state );
+ return ( *( prs->str + prs->state->posbyte ) == c ) ) ? 1 : 0;
+}
+
+#endif /* TS_USE_WIDE */
+
+p_iswhat(alnum)
+p_iswhat(alpha)
+p_iswhat(digit)
+p_iswhat(lower)
+p_iswhat(print)
+p_iswhat(punct)
+p_iswhat(space)
+p_iswhat(upper)
+p_iswhat(xdigit)
+
+static int
+p_isEOF(TParser *prs) {
+ Assert( prs->state );
+ return (prs->state->posbyte == prs->lenstr || prs->state->charlen==0) ? 1 : 0;
+}
+
+static int
+p_iseqC(TParser *prs) {
+ return p_iseq(prs, prs->c);
+}
+
+static int
+p_isneC(TParser *prs) {
+ return !p_iseq(prs, prs->c);
+}
+
+static int
+p_isascii(TParser *prs) {
+ return ( prs->state->charlen==1 && isascii( (unsigned char) *( prs->str + prs->state->posbyte ) ) ) ? 1 : 0;
+}
+
+static int
+p_islatin(TParser *prs) {
+ return ( p_isalpha(prs) && p_isascii(prs) ) ? 1 : 0;
+}
+
+static int
+p_isnonlatin(TParser *prs) {
+ return ( p_isalpha(prs) && !p_isascii(prs) ) ? 1 : 0;
+}
+
+void _make_compiler_happy(void);
+void
+_make_compiler_happy(void) {
+ p_isalnum(NULL); p_isnotalnum(NULL);
+ p_isalpha(NULL); p_isnotalpha(NULL);
+ p_isdigit(NULL); p_isnotdigit(NULL);
+ p_islower(NULL); p_isnotlower(NULL);
+ p_isprint(NULL); p_isnotprint(NULL);
+ p_ispunct(NULL); p_isnotpunct(NULL);
+ p_isspace(NULL); p_isnotspace(NULL);
+ p_isupper(NULL); p_isnotupper(NULL);
+ p_isxdigit(NULL); p_isnotxdigit(NULL);
+ p_isEOF(NULL);
+ p_iseqC(NULL); p_isneC(NULL);
+}
+
+
+static void
+SpecialTags(TParser *prs) {
+ switch( prs->state->lencharlexeme ) {
+ case 8: /* </script */
+ if ( pg_strncasecmp( prs->lexeme, "</script", 8 ) == 0 )
+ prs->ignore = false;
+ break;
+ case 7: /* <script || </style */
+ if ( pg_strncasecmp( prs->lexeme, "</style", 7 ) == 0 )
+ prs->ignore = false;
+ else if ( pg_strncasecmp( prs->lexeme, "<script", 7 ) == 0 )
+ prs->ignore = true;
+ break;
+ case 6: /* <style */
+ if ( pg_strncasecmp( prs->lexeme, "<style", 6 ) == 0 )
+ prs->ignore = true;
+ break;
+ default: break;
+ }
+}
+
+static void
+SpecialFURL(TParser *prs) {
+ prs->wanthost = true;
+ prs->state->posbyte -= prs->state->lenbytelexeme;
+ prs->state->poschar -= prs->state->lencharlexeme;
+}
+
+static void
+SpecialHyphen(TParser *prs) {
+ prs->state->posbyte -= prs->state->lenbytelexeme;
+ prs->state->poschar -= prs->state->lencharlexeme;
+}
+
+static int
+p_isstophost(TParser *prs) {
+ if ( prs->wanthost ) {
+ prs->wanthost = false;
+ return 1;
+ }
+ return 0;
+}
+
+static int
+p_isignore(TParser *prs) {
+ return (prs->ignore) ? 1 : 0;
+}
+
+static int
+p_ishost(TParser *prs) {
+ TParser *tmpprs = TParserInit( prs->str+prs->state->posbyte, prs->lenstr - prs->state->posbyte );
+ int res = 0;
+
+ if ( TParserGet(tmpprs) && tmpprs->type == HOST ) {
+ prs->state->posbyte += tmpprs->lenbytelexeme;
+ prs->state->poschar += tmpprs->lencharlexeme;
+ prs->state->lenbytelexeme += tmpprs->lenbytelexeme;
+ prs->state->lencharlexeme += tmpprs->lencharlexeme;
+ prs->state->charlen = tmpprs->state->charlen;
+ res = 1;
+ }
+ TParserClose(tmpprs);
+
+ return res;
+}
+
+static int
+p_isURI(TParser *prs) {
+ TParser *tmpprs = TParserInit( prs->str+prs->state->posbyte, prs->lenstr - prs->state->posbyte );
+ int res = 0;
+
+ tmpprs->state = newTParserPosition( tmpprs->state );
+ tmpprs->state->state = TPS_InFileFirst;
+
+ if ( TParserGet(tmpprs) && (tmpprs->type == URI || tmpprs->type == FILEPATH) ) {
+ prs->state->posbyte += tmpprs->lenbytelexeme;
+ prs->state->poschar += tmpprs->lencharlexeme;
+ prs->state->lenbytelexeme += tmpprs->lenbytelexeme;
+ prs->state->lencharlexeme += tmpprs->lencharlexeme;
+ prs->state->charlen = tmpprs->state->charlen;
+ res = 1;
+ }
+ TParserClose(tmpprs);
+
+ return res;
+}
+
+/*
+ * Table of state/action of parser
+ */
+
+#define A_NEXT 0x0000
+#define A_BINGO 0x0001
+#define A_POP 0x0002
+#define A_PUSH 0x0004
+#define A_RERUN 0x0008
+#define A_CLEAR 0x0010
+#define A_MERGE 0x0020
+#define A_CLRALL 0x0040
+
+static TParserStateActionItem actionTPS_Base[] = {
+ {p_isEOF, 0, A_NEXT, TPS_Null, 0, NULL},
+ {p_iseqC, '<', A_PUSH, TPS_InTagFirst, 0, NULL},
+ {p_isignore, 0, A_NEXT, TPS_InSpace, 0, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InLatWord, 0, NULL},
+ {p_isnonlatin, 0, A_NEXT, TPS_InCyrWord, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InUnsignedInt, 0, NULL},
+ {p_iseqC, '-', A_PUSH, TPS_InSignedIntFirst, 0, NULL},
+ {p_iseqC, '+', A_PUSH, TPS_InSignedIntFirst, 0, NULL},
+ {p_iseqC, '&', A_PUSH, TPS_InHTMLEntityFirst, 0, NULL},
+ {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
+ {NULL, 0, A_NEXT, TPS_InSpace, 0, NULL}
+};
+
+
+static TParserStateActionItem actionTPS_InUWord[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, UWORD, NULL},
+ {p_isalnum, 0, A_NEXT, TPS_InUWord, 0, NULL},
+ {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
+ {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
+ {p_iseqC, '-', A_PUSH, TPS_InHyphenUWordFirst, 0, NULL},
+ {NULL, 0, A_BINGO, TPS_Base, UWORD, NULL}
+};
+
+static TParserStateActionItem actionTPS_InLatWord[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, LATWORD, NULL},
+ {p_islatin, 0, A_NEXT, TPS_Null, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomen, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InFileFirst, 0, NULL},
+ {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
+ {p_iseqC, '-', A_PUSH, TPS_InHyphenLatWordFirst,0, NULL},
+ {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
+ {p_iseqC, ':', A_PUSH, TPS_InProtocolFirst, 0, NULL},
+ {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
+ {p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL},
+ {p_isalnum, 0, A_NEXT, TPS_InUWord, 0, NULL},
+ {NULL, 0, A_BINGO, TPS_Base, LATWORD, NULL}
+};
+
+static TParserStateActionItem actionTPS_InCyrWord[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, CYRWORD, NULL},
+ {p_isnonlatin, 0, A_NEXT, TPS_Null, 0, NULL},
+ {p_isalnum, 0, A_NEXT, TPS_InUWord, 0, NULL},
+ {p_iseqC, '-', A_PUSH, TPS_InHyphenCyrWordFirst,0, NULL},
+ {NULL, 0, A_BINGO, TPS_Base, CYRWORD, NULL}
+};
+
+static TParserStateActionItem actionTPS_InUnsignedInt[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomen, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InUDecimalFirst, 0, NULL},
+ {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
+ {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
+ {p_islatin, 0, A_PUSH, TPS_InHost, 0, NULL},
+ {p_isalpha, 0, A_NEXT, TPS_InUWord, 0, NULL},
+ {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
+ {NULL, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL}
+};
+
+static TParserStateActionItem actionTPS_InSignedIntFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_NEXT|A_CLEAR, TPS_InSignedInt, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InSignedInt[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, SIGNEDINT, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InDecimalFirst, 0, NULL},
+ {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
+ {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
+ {NULL, 0, A_BINGO, TPS_Base, SIGNEDINT, NULL}
+};
+
+static TParserStateActionItem actionTPS_InSpace[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, SPACE, NULL},
+ {p_iseqC, '<', A_BINGO, TPS_Base, SPACE, NULL},
+ {p_isignore, 0, A_NEXT, TPS_Null, 0, NULL},
+ {p_iseqC, '-', A_BINGO, TPS_Base, SPACE, NULL},
+ {p_iseqC, '+', A_BINGO, TPS_Base, SPACE, NULL},
+ {p_iseqC, '&', A_BINGO, TPS_Base, SPACE, NULL},
+ {p_iseqC, '/', A_BINGO, TPS_Base, SPACE, NULL},
+ {p_isnotalnum, 0, A_NEXT, TPS_InSpace, 0, NULL},
+ {NULL, 0, A_BINGO, TPS_Base, SPACE, NULL}
+};
+
+static TParserStateActionItem actionTPS_InUDecimalFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_CLEAR, TPS_InUDecimal, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InUDecimal[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InUDecimal, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InVersionFirst, 0, NULL},
+ {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
+ {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
+ {NULL, 0, A_BINGO, TPS_Base, DECIMAL, NULL}
+};
+
+static TParserStateActionItem actionTPS_InDecimalFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_CLEAR, TPS_InDecimal, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InDecimal[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InDecimal, 0, NULL},
+ {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
+ {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
+ {NULL, 0, A_BINGO, TPS_Base, DECIMAL, NULL}
+};
+
+static TParserStateActionItem actionTPS_InVersionFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_CLEAR, TPS_InVersion, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InVersion[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InVersion, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InVersionFirst, 0, NULL},
+ {NULL, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL}
+};
+
+static TParserStateActionItem actionTPS_InMantissaFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_CLEAR, TPS_InMantissa, 0, NULL},
+ {p_iseqC, '+', A_NEXT, TPS_InMantissaSign, 0, NULL},
+ {p_iseqC, '-', A_NEXT, TPS_InMantissaSign, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InMantissaSign[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_CLEAR, TPS_InMantissa, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InMantissa[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, SCIENTIFIC, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InMantissa, 0, NULL},
+ {NULL, 0, A_BINGO, TPS_Base, SCIENTIFIC, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHTMLEntityFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, '#', A_NEXT, TPS_InHTMLEntityNumFirst,0, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InHTMLEntity, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHTMLEntity[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InHTMLEntity, 0, NULL},
+ {p_iseqC, ';', A_NEXT, TPS_InHTMLEntityEnd, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHTMLEntityNumFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHTMLEntityNum, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHTMLEntityNum[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHTMLEntityNum, 0, NULL},
+ {p_iseqC, ';', A_NEXT, TPS_InHTMLEntityEnd, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHTMLEntityEnd[] = {
+ {NULL, 0, A_BINGO|A_CLEAR,TPS_Base, HTMLENTITY, NULL}
+};
+
+static TParserStateActionItem actionTPS_InTagFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, '/', A_PUSH, TPS_InTagCloseFirst, 0, NULL},
+ {p_iseqC, '!', A_PUSH, TPS_InCommentFirst, 0, NULL},
+ {p_islatin, 0, A_PUSH, TPS_InTag, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InTagCloseFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InTag, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InTag[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, '>', A_NEXT, TPS_InTagEnd, 0, SpecialTags},
+ {p_iseqC, '\'', A_NEXT, TPS_InTagEscapeK, 0, NULL},
+ {p_iseqC, '"', A_NEXT, TPS_InTagEscapeKK, 0, NULL},
+ {p_islatin, 0, A_NEXT, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL},
+ {p_iseqC, '=', A_NEXT, TPS_Null, 0, NULL},
+ {p_iseqC, '-', A_NEXT, TPS_Null, 0, NULL},
+ {p_iseqC, '#', A_NEXT, TPS_Null, 0, NULL},
+ {p_iseqC, '%', A_NEXT, TPS_Null, 0, NULL},
+ {p_isspace, 0, A_NEXT, TPS_Null, 0, SpecialTags},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InTagEscapeK[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, '\\', A_PUSH, TPS_InTagBackSleshed, 0, NULL},
+ {p_iseqC, '\'', A_NEXT, TPS_InTag, 0, NULL},
+ {NULL, 0, A_NEXT, TPS_InTagEscapeK, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InTagEscapeKK[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, '\\', A_PUSH, TPS_InTagBackSleshed, 0, NULL},
+ {p_iseqC, '"', A_NEXT, TPS_InTag, 0, NULL},
+ {NULL, 0, A_NEXT, TPS_InTagEscapeKK, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InTagBackSleshed[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {NULL, 0, A_MERGE, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InTagEnd[] = {
+ {NULL, 0, A_BINGO|A_CLRALL,TPS_Base, TAG, NULL}
+};
+
+static TParserStateActionItem actionTPS_InCommentFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, '-', A_NEXT, TPS_InCommentLast, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InCommentLast[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, '-', A_NEXT, TPS_InComment, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InComment[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, '-', A_NEXT, TPS_InCloseCommentFirst,0, NULL},
+ {NULL, 0, A_NEXT, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InCloseCommentFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, '-', A_NEXT, TPS_InCloseCommentLast, 0, NULL},
+ {NULL, 0, A_NEXT, TPS_InComment, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InCloseCommentLast[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, '-', A_NEXT, TPS_Null, 0, NULL},
+ {p_iseqC, '>', A_NEXT, TPS_InCommentEnd, 0, NULL},
+ {NULL, 0, A_NEXT, TPS_InComment, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InCommentEnd[] = {
+ {NULL, 0, A_BINGO|A_CLRALL,TPS_Base, TAG, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHostFirstDomen[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InHostDomenSecond, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL},
+ //{p_iseqC, '-', A_POP, TPS_InHostFirstAN, 0, NULL},
+ //{p_iseqC, '.', A_POP, TPS_InHostFirstDomen, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHostDomenSecond[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InHostDomen, 0, NULL},
+ {p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL},
+ {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomen, 0, NULL},
+ {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHostDomen[] = {
+ {p_isEOF, 0, A_BINGO|A_CLRALL,TPS_Base, HOST, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InHostDomen, 0, NULL},
+ {p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL},
+ {p_iseqC, ':', A_PUSH, TPS_InPortFirst, 0, NULL},
+ {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomen, 0, NULL},
+ {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
+ {p_isdigit, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isstophost, 0, A_BINGO|A_CLRALL,TPS_InURIStart, HOST, NULL},
+ {p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL},
+ {NULL, 0, A_BINGO|A_CLRALL,TPS_Base, HOST, NULL}
+};
+
+static TParserStateActionItem actionTPS_InPortFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InPort, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InPort[] = {
+ {p_isEOF, 0, A_BINGO|A_CLRALL,TPS_Base, HOST, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InPort, 0, NULL},
+ {p_isstophost, 0, A_BINGO|A_CLRALL,TPS_InURIStart, HOST, NULL},
+ {p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL},
+ {NULL, 0, A_BINGO|A_CLRALL,TPS_Base, HOST, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHostFirstAN[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InHost, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHost[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InHost, 0, NULL},
+ {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomen, 0, NULL},
+ {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InEmail[] = {
+ {p_ishost, 0, A_BINGO|A_CLRALL, TPS_Base, EMAIL, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InFileFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_islatin, 0, A_CLEAR, TPS_InFile, 0, NULL},
+ {p_isdigit, 0, A_CLEAR, TPS_InFile, 0, NULL},
+ {p_iseqC, '.', A_CLEAR, TPS_InFile, 0, NULL},
+ {p_iseqC, '_', A_CLEAR, TPS_InFile, 0, NULL},
+ {p_iseqC, '?', A_PUSH, TPS_InURIFirst, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InFile[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, FILEPATH, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InFile, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InFileNext, 0, NULL},
+ {p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
+ {p_iseqC, '-', A_NEXT, TPS_InFile, 0, NULL},
+ {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
+ {p_iseqC, '?', A_PUSH, TPS_InURIFirst, 0, NULL},
+ {NULL, 0, A_BINGO, TPS_Base, FILEPATH, NULL}
+};
+
+static TParserStateActionItem actionTPS_InFileNext[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_islatin, 0, A_CLEAR, TPS_InFile, 0, NULL},
+ {p_isdigit, 0, A_CLEAR, TPS_InFile, 0, NULL},
+ {p_iseqC, '_', A_CLEAR, TPS_InFile, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InURIFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, '"', A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, '\'', A_POP, TPS_Null, 0, NULL},
+ {p_isnotspace, 0, A_CLEAR, TPS_InURI, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL},
+};
+
+static TParserStateActionItem actionTPS_InURIStart[] = {
+ {NULL, 0, A_NEXT, TPS_InURI, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InURI[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, URI, NULL},
+ {p_iseqC, '"', A_BINGO, TPS_Base, URI, NULL},
+ {p_iseqC, '\'', A_BINGO, TPS_Base, URI, NULL},
+ {p_isnotspace, 0, A_NEXT, TPS_InURI, 0, NULL},
+ {NULL, 0, A_BINGO, TPS_Base, URI, NULL}
+};
+
+static TParserStateActionItem actionTPS_InFURL[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isURI, 0, A_BINGO|A_CLRALL,TPS_Base, FURL, SpecialFURL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InProtocolFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, '/', A_NEXT, TPS_InProtocolSecond, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InProtocolSecond[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_iseqC, '/', A_NEXT, TPS_InProtocolEnd, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InProtocolEnd[] = {
+ {NULL, 0, A_BINGO|A_CLRALL,TPS_Base, PROTOCOL, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHyphenLatWordFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InHyphenLatWord, 0, NULL},
+ {p_isnonlatin, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHyphenLatWord[] = {
+ {p_isEOF, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, LATHYPHENWORD, SpecialHyphen},
+ {p_islatin, 0, A_NEXT, TPS_InHyphenLatWord, 0, NULL},
+ {p_isnonlatin, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
+ {p_iseqC, '-', A_PUSH, TPS_InHyphenLatWordFirst,0, NULL},
+ {NULL, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, LATHYPHENWORD, SpecialHyphen}
+};
+
+static TParserStateActionItem actionTPS_InHyphenCyrWordFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isnonlatin, 0, A_NEXT, TPS_InHyphenCyrWord, 0, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHyphenCyrWord[] = {
+ {p_isEOF, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, CYRHYPHENWORD, SpecialHyphen},
+ {p_isnonlatin, 0, A_NEXT, TPS_InHyphenCyrWord, 0, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
+ {p_iseqC, '-', A_PUSH, TPS_InHyphenCyrWordFirst,0, NULL},
+ {NULL, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, CYRHYPHENWORD, SpecialHyphen}
+};
+
+static TParserStateActionItem actionTPS_InHyphenUWordFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL},
+ {p_isalnum, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHyphenUWord[] = {
+ {p_isEOF, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen},
+ {p_isalnum, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
+ {p_iseqC, '-', A_PUSH, TPS_InHyphenUWordFirst,0, NULL},
+ {NULL, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen}
+};
+
+static TParserStateActionItem actionTPS_InHyphenValueFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHyphenValueExact, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHyphenValue[] = {
+ {p_isEOF, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen},
+ {p_isdigit, 0, A_NEXT, TPS_InHyphenValue, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InHyphenValueFirst, 0, NULL},
+ {p_iseqC, '-', A_PUSH, TPS_InHyphenUWordFirst,0, NULL},
+ {p_isalpha, 0, A_NEXT, TPS_InHyphenUWord, 0, NULL},
+ {NULL, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen}
+};
+
+static TParserStateActionItem actionTPS_InHyphenValueExact[] = {
+ {p_isEOF, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen},
+ {p_isdigit, 0, A_NEXT, TPS_InHyphenValueExact, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InHyphenValueFirst, 0, NULL},
+ {p_iseqC, '-', A_PUSH, TPS_InHyphenUWordFirst, 0, NULL},
+ {NULL, 0, A_BINGO|A_CLRALL,TPS_InParseHyphen, HYPHENWORD, SpecialHyphen}
+};
+
+static TParserStateActionItem actionTPS_InParseHyphen[] = {
+ {p_isEOF, 0, A_RERUN, TPS_Base, 0, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InHyphenLatWordPart,0, NULL},
+ {p_isnonlatin, 0, A_NEXT, TPS_InHyphenCyrWordPart,0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHyphenUnsignedInt,0, NULL},
+ {p_iseqC, '-', A_PUSH, TPS_InParseHyphenHyphen,0, NULL},
+ {NULL, 0, A_RERUN, TPS_Base, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InParseHyphenHyphen[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isalnum, 0, A_BINGO|A_CLEAR,TPS_InParseHyphen, SPACE, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHyphenCyrWordPart[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, CYRPARTHYPHENWORD,NULL},
+ {p_isnonlatin, 0, A_NEXT, TPS_InHyphenCyrWordPart,0, NULL},
+ {p_islatin, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL},
+ {NULL, 0, A_BINGO, TPS_InParseHyphen, CYRPARTHYPHENWORD,NULL}
+};
+
+static TParserStateActionItem actionTPS_InHyphenLatWordPart[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, LATPARTHYPHENWORD,NULL},
+ {p_islatin, 0, A_NEXT, TPS_InHyphenLatWordPart,0, NULL},
+ {p_isnonlatin, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL},
+ {NULL, 0, A_BINGO, TPS_InParseHyphen, LATPARTHYPHENWORD,NULL}
+};
+
+static TParserStateActionItem actionTPS_InHyphenUWordPart[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, PARTHYPHENWORD, NULL},
+ {p_isalnum, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL},
+ {NULL, 0, A_BINGO, TPS_InParseHyphen, PARTHYPHENWORD, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHyphenUnsignedInt[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHyphenUnsignedInt,0, NULL},
+ {p_isalpha, 0, A_NEXT, TPS_InHyphenUWordPart, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InHDecimalPartFirst,0, NULL},
+ {NULL, 0, A_BINGO, TPS_InParseHyphen, UNSIGNEDINT, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHDecimalPartFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_CLEAR, TPS_InHDecimalPart, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHDecimalPart[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHDecimalPart, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InHVersionPartFirst,0, NULL},
+ {NULL, 0, A_BINGO, TPS_InParseHyphen, DECIMAL, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHVersionPartFirst[] = {
+ {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
+ {p_isdigit, 0, A_CLEAR, TPS_InHVersionPart, 0, NULL},
+ {NULL, 0, A_POP, TPS_Null, 0, NULL}
+};
+
+static TParserStateActionItem actionTPS_InHVersionPart[] = {
+ {p_isEOF, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL},
+ {p_isdigit, 0, A_NEXT, TPS_InHVersionPart, 0, NULL},
+ {p_iseqC, '.', A_PUSH, TPS_InHVersionPartFirst,0, NULL},
+ {NULL, 0, A_BINGO, TPS_InParseHyphen, VERSIONNUMBER, NULL}
+};
+
+/*
+ * order should be the same as in typedef enum {} TParserState!!
+ */
+
+static const TParserStateAction Actions[] = {
+ { TPS_Base, actionTPS_Base },
+ { TPS_InUWord, actionTPS_InUWord },
+ { TPS_InLatWord, actionTPS_InLatWord },
+ { TPS_InCyrWord, actionTPS_InCyrWord },
+ { TPS_InUnsignedInt, actionTPS_InUnsignedInt },
+ { TPS_InSignedIntFirst, actionTPS_InSignedIntFirst },
+ { TPS_InSignedInt, actionTPS_InSignedInt },
+ { TPS_InSpace, actionTPS_InSpace },
+ { TPS_InUDecimalFirst, actionTPS_InUDecimalFirst },
+ { TPS_InUDecimal, actionTPS_InUDecimal },
+ { TPS_InDecimalFirst, actionTPS_InDecimalFirst },
+ { TPS_InDecimal, actionTPS_InDecimal },
+ { TPS_InVersionFirst, actionTPS_InVersionFirst },
+ { TPS_InVersion, actionTPS_InVersion },
+ { TPS_InMantissaFirst, actionTPS_InMantissaFirst },
+ { TPS_InMantissaSign, actionTPS_InMantissaSign },
+ { TPS_InMantissa, actionTPS_InMantissa },
+ { TPS_InHTMLEntityFirst, actionTPS_InHTMLEntityFirst },
+ { TPS_InHTMLEntity, actionTPS_InHTMLEntity },
+ { TPS_InHTMLEntityNumFirst, actionTPS_InHTMLEntityNumFirst },
+ { TPS_InHTMLEntityNum, actionTPS_InHTMLEntityNum },
+ { TPS_InHTMLEntityEnd, actionTPS_InHTMLEntityEnd },
+ { TPS_InTagFirst, actionTPS_InTagFirst },
+ { TPS_InTagCloseFirst, actionTPS_InTagCloseFirst },
+ { TPS_InTag, actionTPS_InTag },
+ { TPS_InTagEscapeK, actionTPS_InTagEscapeK },
+ { TPS_InTagEscapeKK, actionTPS_InTagEscapeKK },
+ { TPS_InTagBackSleshed, actionTPS_InTagBackSleshed },
+ { TPS_InTagEnd, actionTPS_InTagEnd },
+ { TPS_InCommentFirst, actionTPS_InCommentFirst },
+ { TPS_InCommentLast, actionTPS_InCommentLast },
+ { TPS_InComment, actionTPS_InComment },
+ { TPS_InCloseCommentFirst, actionTPS_InCloseCommentFirst },
+ { TPS_InCloseCommentLast, actionTPS_InCloseCommentLast },
+ { TPS_InCommentEnd, actionTPS_InCommentEnd },
+ { TPS_InHostFirstDomen, actionTPS_InHostFirstDomen },
+ { TPS_InHostDomenSecond, actionTPS_InHostDomenSecond },
+ { TPS_InHostDomen, actionTPS_InHostDomen },
+ { TPS_InPortFirst, actionTPS_InPortFirst },
+ { TPS_InPort, actionTPS_InPort },
+ { TPS_InHostFirstAN, actionTPS_InHostFirstAN },
+ { TPS_InHost, actionTPS_InHost },
+ { TPS_InEmail, actionTPS_InEmail },
+ { TPS_InFileFirst, actionTPS_InFileFirst },
+ { TPS_InFile, actionTPS_InFile },
+ { TPS_InFileNext, actionTPS_InFileNext },
+ { TPS_InURIFirst, actionTPS_InURIFirst },
+ { TPS_InURIStart, actionTPS_InURIStart },
+ { TPS_InURI, actionTPS_InURI },
+ { TPS_InFURL, actionTPS_InFURL },
+ { TPS_InProtocolFirst, actionTPS_InProtocolFirst },
+ { TPS_InProtocolSecond, actionTPS_InProtocolSecond },
+ { TPS_InProtocolEnd, actionTPS_InProtocolEnd },
+ { TPS_InHyphenLatWordFirst, actionTPS_InHyphenLatWordFirst },
+ { TPS_InHyphenLatWord, actionTPS_InHyphenLatWord },
+ { TPS_InHyphenCyrWordFirst, actionTPS_InHyphenCyrWordFirst },
+ { TPS_InHyphenCyrWord, actionTPS_InHyphenCyrWord },
+ { TPS_InHyphenUWordFirst, actionTPS_InHyphenUWordFirst },
+ { TPS_InHyphenUWord, actionTPS_InHyphenUWord },
+ { TPS_InHyphenValueFirst, actionTPS_InHyphenValueFirst },
+ { TPS_InHyphenValue, actionTPS_InHyphenValue },
+ { TPS_InHyphenValueExact, actionTPS_InHyphenValueExact },
+ { TPS_InParseHyphen, actionTPS_InParseHyphen },
+ { TPS_InParseHyphenHyphen, actionTPS_InParseHyphenHyphen },
+ { TPS_InHyphenCyrWordPart, actionTPS_InHyphenCyrWordPart },
+ { TPS_InHyphenLatWordPart, actionTPS_InHyphenLatWordPart },
+ { TPS_InHyphenUWordPart, actionTPS_InHyphenUWordPart },
+ { TPS_InHyphenUnsignedInt, actionTPS_InHyphenUnsignedInt },
+ { TPS_InHDecimalPartFirst, actionTPS_InHDecimalPartFirst },
+ { TPS_InHDecimalPart, actionTPS_InHDecimalPart },
+ { TPS_InHVersionPartFirst, actionTPS_InHVersionPartFirst },
+ { TPS_InHVersionPart, actionTPS_InHVersionPart },
+ { TPS_Null, NULL }
+};
+
+
+bool
+TParserGet( TParser *prs ) {
+ TParserStateActionItem *item=NULL;
+
+ if ( prs->state->posbyte >= prs->lenstr )
+ return false;
+
+ Assert( prs->state );
+ prs->lexeme = prs->str + prs->state->posbyte;
+ prs->state->pushedAtAction = NULL;
+
+ /* look at string */
+ while (prs->state->posbyte <= prs->lenstr) {
+ if ( prs->state->posbyte == prs->lenstr )
+ prs->state->charlen = 0;
+ else
+ prs->state->charlen = ( prs->charmaxlen == 1 ) ? prs->charmaxlen :
+ pg_mblen( prs->str + prs->state->posbyte );
+
+ Assert( prs->state->posbyte + prs->state->charlen <= prs->lenstr );
+ Assert( prs->state->state >=TPS_Base && prs->state->state < TPS_Null );
+ Assert( Actions[ prs->state->state ].state == prs->state->state );
+
+ item = Actions[ prs->state->state ].action;
+ Assert(item!=NULL);
+
+ if ( item < prs->state->pushedAtAction )
+ item = prs->state->pushedAtAction;
+
+ /* find action by character class */
+ while( item->isclass ) {
+ prs->c = item->c;
+ if ( item->isclass(prs)!=0 ) {
+ if ( item > prs->state->pushedAtAction ) /* remember: after pushing we were by false way */
+ break;
+ }
+ item++;
+ }
+
+ prs->state->pushedAtAction = NULL;
+
+ /* call special handler if exists */
+ if ( item->special )
+ item->special(prs);
+
+ /* BINGO, lexeme is found */
+ if ( item->flags & A_BINGO ) {
+ Assert( item->type>0 );
+ prs->lenbytelexeme = prs->state->lenbytelexeme;
+ prs->lencharlexeme = prs->state->lencharlexeme;
+ prs->state->lenbytelexeme = prs->state->lencharlexeme = 0;
+ prs->type = item->type;
+ }
+
+ /* do various actions by flags */
+ if ( item->flags & A_POP ) { /* pop stored state in stack */
+ TParserPosition *ptr = prs->state->prev;
+ pfree( prs->state );
+ prs->state = ptr;
+ Assert( prs->state );
+ } else if ( item->flags & A_PUSH ) { /* push (store) state in stack */
+ prs->state->pushedAtAction = item; /* remember where we push */
+ prs->state = newTParserPosition( prs->state );
+ } else if ( item->flags & A_CLEAR ) { /* clear previous pushed state */
+ TParserPosition *ptr;
+ Assert( prs->state->prev );
+ ptr = prs->state->prev->prev;
+ pfree( prs->state->prev );
+ prs->state->prev = ptr;
+ } else if ( item->flags & A_CLRALL ) { /* clear all previous pushed state */
+ TParserPosition *ptr;
+ while( prs->state->prev ) {
+ ptr = prs->state->prev->prev;
+ pfree( prs->state->prev );
+ prs->state->prev = ptr;
+ }
+ } else if ( item->flags & A_MERGE ) { /* merge posinfo with current and pushed state */
+ TParserPosition *ptr = prs->state;
+ Assert( prs->state->prev );
+ prs->state = prs->state->prev;
+
+ prs->state->posbyte = ptr->posbyte;
+ prs->state->poschar = ptr->poschar;
+ prs->state->charlen = ptr->charlen;
+ prs->state->lenbytelexeme = ptr->lenbytelexeme;
+ prs->state->lencharlexeme = ptr->lencharlexeme;
+ pfree(ptr);
+ }
+
+ /* set new state if pointed */
+ if ( item->tostate != TPS_Null )
+ prs->state->state = item->tostate;
+
+ /* check for go away */
+ if ( (item->flags & A_BINGO) || (prs->state->posbyte >= prs->lenstr && (item->flags & A_RERUN)==0 ) )
+ break;
+
+ /* go to begining of loop if we should rerun or we just restore state */
+ if ( item->flags & ( A_RERUN | A_POP ) )
+ continue;
+
+ /* move forward */
+ if ( prs->state->charlen ) {
+ prs->state->posbyte += prs->state->charlen;
+ prs->state->lenbytelexeme += prs->state->charlen;
+ prs->state->poschar ++;
+ prs->state->lencharlexeme ++;
+ }
+ }
+
+ return (item && (item->flags & A_BINGO)) ? true : false;
+}
+
+