LCOV - differential code coverage report
Current view: top level - src/backend/tsearch - wparser_def.c (source / functions) Coverage Total Hit LBC UIC UBC GBC GIC GNC CBC EUB ECB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 89.3 % 624 557 5 10 52 3 144 66 344 12 170 38
Current Date: 2023-04-08 15:15:32 Functions: 71.2 % 52 37 15 4 3 30 6 1
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * wparser_def.c
       4                 :  *      Default text search parser
       5                 :  *
       6                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       7                 :  *
       8                 :  *
       9                 :  * IDENTIFICATION
      10                 :  *    src/backend/tsearch/wparser_def.c
      11                 :  *
      12                 :  *-------------------------------------------------------------------------
      13                 :  */
      14                 : 
      15                 : #include "postgres.h"
      16                 : 
      17                 : #include <limits.h>
      18                 : 
      19                 : #include "catalog/pg_collation.h"
      20                 : #include "commands/defrem.h"
      21                 : #include "tsearch/ts_locale.h"
      22                 : #include "tsearch/ts_public.h"
      23                 : #include "tsearch/ts_type.h"
      24                 : #include "tsearch/ts_utils.h"
      25                 : #include "utils/builtins.h"
      26                 : 
      27                 : 
      28                 : /* Define me to enable tracing of parser behavior */
      29                 : /* #define WPARSER_TRACE */
      30                 : 
      31                 : 
      32                 : /* Output token categories */
      33                 : 
      34                 : #define ASCIIWORD       1
      35                 : #define WORD_T          2
      36                 : #define NUMWORD         3
      37                 : #define EMAIL           4
      38                 : #define URL_T           5
      39                 : #define HOST            6
      40                 : #define SCIENTIFIC      7
      41                 : #define VERSIONNUMBER   8
      42                 : #define NUMPARTHWORD    9
      43                 : #define PARTHWORD       10
      44                 : #define ASCIIPARTHWORD  11
      45                 : #define SPACE           12
      46                 : #define TAG_T           13
      47                 : #define PROTOCOL        14
      48                 : #define NUMHWORD        15
      49                 : #define ASCIIHWORD      16
      50                 : #define HWORD           17
      51                 : #define URLPATH         18
      52                 : #define FILEPATH        19
      53                 : #define DECIMAL_T       20
      54                 : #define SIGNEDINT       21
      55                 : #define UNSIGNEDINT     22
      56                 : #define XMLENTITY       23
      57                 : 
      58                 : #define LASTNUM         23
      59                 : 
      60                 : static const char *const tok_alias[] = {
      61                 :     "",
      62                 :     "asciiword",
      63                 :     "word",
      64                 :     "numword",
      65                 :     "email",
      66                 :     "url",
      67                 :     "host",
      68                 :     "sfloat",
      69                 :     "version",
      70                 :     "hword_numpart",
      71                 :     "hword_part",
      72                 :     "hword_asciipart",
      73                 :     "blank",
      74                 :     "tag",
      75                 :     "protocol",
      76                 :     "numhword",
      77                 :     "asciihword",
      78                 :     "hword",
      79                 :     "url_path",
      80                 :     "file",
      81                 :     "float",
      82                 :     "int",
      83                 :     "uint",
      84                 :     "entity"
      85                 : };
      86                 : 
      87                 : static const char *const lex_descr[] = {
      88                 :     "",
      89                 :     "Word, all ASCII",
      90                 :     "Word, all letters",
      91                 :     "Word, letters and digits",
      92                 :     "Email address",
      93                 :     "URL",
      94                 :     "Host",
      95                 :     "Scientific notation",
      96                 :     "Version number",
      97                 :     "Hyphenated word part, letters and digits",
      98                 :     "Hyphenated word part, all letters",
      99                 :     "Hyphenated word part, all ASCII",
     100                 :     "Space symbols",
     101                 :     "XML tag",
     102                 :     "Protocol head",
     103                 :     "Hyphenated word, letters and digits",
     104                 :     "Hyphenated word, all ASCII",
     105                 :     "Hyphenated word, all letters",
     106                 :     "URL path",
     107                 :     "File or path name",
     108                 :     "Decimal notation",
     109                 :     "Signed integer",
     110                 :     "Unsigned integer",
     111                 :     "XML entity"
     112                 : };
     113                 : 
     114                 : 
     115                 : /* Parser states */
     116                 : 
     117                 : typedef enum
     118                 : {
     119                 :     TPS_Base = 0,
     120                 :     TPS_InNumWord,
     121                 :     TPS_InAsciiWord,
     122                 :     TPS_InWord,
     123                 :     TPS_InUnsignedInt,
     124                 :     TPS_InSignedIntFirst,
     125                 :     TPS_InSignedInt,
     126                 :     TPS_InSpace,
     127                 :     TPS_InUDecimalFirst,
     128                 :     TPS_InUDecimal,
     129                 :     TPS_InDecimalFirst,
     130                 :     TPS_InDecimal,
     131                 :     TPS_InVerVersion,
     132                 :     TPS_InSVerVersion,
     133                 :     TPS_InVersionFirst,
     134                 :     TPS_InVersion,
     135                 :     TPS_InMantissaFirst,
     136                 :     TPS_InMantissaSign,
     137                 :     TPS_InMantissa,
     138                 :     TPS_InXMLEntityFirst,
     139                 :     TPS_InXMLEntity,
     140                 :     TPS_InXMLEntityNumFirst,
     141                 :     TPS_InXMLEntityNum,
     142                 :     TPS_InXMLEntityHexNumFirst,
     143                 :     TPS_InXMLEntityHexNum,
     144                 :     TPS_InXMLEntityEnd,
     145                 :     TPS_InTagFirst,
     146                 :     TPS_InXMLBegin,
     147                 :     TPS_InTagCloseFirst,
     148                 :     TPS_InTagName,
     149                 :     TPS_InTagBeginEnd,
     150                 :     TPS_InTag,
     151                 :     TPS_InTagEscapeK,
     152                 :     TPS_InTagEscapeKK,
     153                 :     TPS_InTagBackSleshed,
     154                 :     TPS_InTagEnd,
     155                 :     TPS_InCommentFirst,
     156                 :     TPS_InCommentLast,
     157                 :     TPS_InComment,
     158                 :     TPS_InCloseCommentFirst,
     159                 :     TPS_InCloseCommentLast,
     160                 :     TPS_InCommentEnd,
     161                 :     TPS_InHostFirstDomain,
     162                 :     TPS_InHostDomainSecond,
     163                 :     TPS_InHostDomain,
     164                 :     TPS_InPortFirst,
     165                 :     TPS_InPort,
     166                 :     TPS_InHostFirstAN,
     167                 :     TPS_InHost,
     168                 :     TPS_InEmail,
     169                 :     TPS_InFileFirst,
     170                 :     TPS_InFileTwiddle,
     171                 :     TPS_InPathFirst,
     172                 :     TPS_InPathFirstFirst,
     173                 :     TPS_InPathSecond,
     174                 :     TPS_InFile,
     175                 :     TPS_InFileNext,
     176                 :     TPS_InURLPathFirst,
     177                 :     TPS_InURLPathStart,
     178                 :     TPS_InURLPath,
     179                 :     TPS_InFURL,
     180                 :     TPS_InProtocolFirst,
     181                 :     TPS_InProtocolSecond,
     182                 :     TPS_InProtocolEnd,
     183                 :     TPS_InHyphenAsciiWordFirst,
     184                 :     TPS_InHyphenAsciiWord,
     185                 :     TPS_InHyphenWordFirst,
     186                 :     TPS_InHyphenWord,
     187                 :     TPS_InHyphenNumWordFirst,
     188                 :     TPS_InHyphenNumWord,
     189                 :     TPS_InHyphenDigitLookahead,
     190                 :     TPS_InParseHyphen,
     191                 :     TPS_InParseHyphenHyphen,
     192                 :     TPS_InHyphenWordPart,
     193                 :     TPS_InHyphenAsciiWordPart,
     194                 :     TPS_InHyphenNumWordPart,
     195                 :     TPS_InHyphenUnsignedInt,
     196                 :     TPS_Null                    /* last state (fake value) */
     197                 : } TParserState;
     198                 : 
     199                 : /* forward declaration */
     200                 : struct TParser;
     201                 : 
     202                 : typedef int (*TParserCharTest) (struct TParser *);  /* any p_is* functions
     203                 :                                                      * except p_iseq */
     204                 : typedef void (*TParserSpecial) (struct TParser *);  /* special handler for
     205                 :                                                      * special cases... */
     206                 : 
     207                 : typedef struct
     208                 : {
     209                 :     TParserCharTest isclass;
     210                 :     char        c;
     211                 :     uint16      flags;
     212                 :     TParserState tostate;
     213                 :     int         type;
     214                 :     TParserSpecial special;
     215                 : } TParserStateActionItem;
     216                 : 
     217                 : /* Flag bits in TParserStateActionItem.flags */
     218                 : #define A_NEXT      0x0000
     219                 : #define A_BINGO     0x0001
     220                 : #define A_POP       0x0002
     221                 : #define A_PUSH      0x0004
     222                 : #define A_RERUN     0x0008
     223                 : #define A_CLEAR     0x0010
     224                 : #define A_MERGE     0x0020
     225                 : #define A_CLRALL    0x0040
     226                 : 
     227                 : typedef struct TParserPosition
     228                 : {
     229                 :     int         posbyte;        /* position of parser in bytes */
     230                 :     int         poschar;        /* position of parser in characters */
     231                 :     int         charlen;        /* length of current char */
     232                 :     int         lenbytetoken;   /* length of token-so-far in bytes */
     233                 :     int         lenchartoken;   /* and in chars */
     234                 :     TParserState state;
     235                 :     struct TParserPosition *prev;
     236                 :     const TParserStateActionItem *pushedAtAction;
     237                 : } TParserPosition;
     238                 : 
     239                 : typedef struct TParser
     240                 : {
     241                 :     /* string and position information */
     242                 :     char       *str;            /* multibyte string */
     243                 :     int         lenstr;         /* length of mbstring */
     244                 :     wchar_t    *wstr;           /* wide character string */
     245                 :     pg_wchar   *pgwstr;         /* wide character string for C-locale */
     246                 :     bool        usewide;
     247                 : 
     248                 :     /* State of parse */
     249                 :     int         charmaxlen;
     250                 :     TParserPosition *state;
     251                 :     bool        ignore;
     252                 :     bool        wanthost;
     253                 : 
     254                 :     /* silly char */
     255                 :     char        c;
     256                 : 
     257                 :     /* out */
     258                 :     char       *token;
     259                 :     int         lenbytetoken;
     260                 :     int         lenchartoken;
     261                 :     int         type;
     262                 : } TParser;
     263                 : 
     264                 : 
     265                 : /* forward decls here */
     266                 : static bool TParserGet(TParser *prs);
     267                 : 
     268                 : 
     269                 : static TParserPosition *
     270 CBC        5104 : newTParserPosition(TParserPosition *prev)
     271                 : {
     272            5104 :     TParserPosition *res = (TParserPosition *) palloc(sizeof(TParserPosition));
     273                 : 
     274            5104 :     if (prev)
     275            2619 :         memcpy(res, prev, sizeof(TParserPosition));
     276                 :     else
     277            2485 :         memset(res, 0, sizeof(TParserPosition));
     278                 : 
     279            5104 :     res->prev = prev;
     280                 : 
     281            5104 :     res->pushedAtAction = NULL;
     282                 : 
     283            5104 :     return res;
     284                 : }
     285                 : 
     286                 : static TParser *
     287            2365 : TParserInit(char *str, int len)
     288                 : {
     289            2365 :     TParser    *prs = (TParser *) palloc0(sizeof(TParser));
     290                 : 
     291            2365 :     prs->charmaxlen = pg_database_encoding_max_length();
     292            2365 :     prs->str = str;
     293            2365 :     prs->lenstr = len;
     294                 : 
     295                 :     /*
     296                 :      * Use wide char code only when max encoding length > 1.
     297                 :      */
     298            2365 :     if (prs->charmaxlen > 1)
     299                 :     {
     300            2365 :         pg_locale_t mylocale = 0;   /* TODO */
     301                 : 
     302            2365 :         prs->usewide = true;
     303            2365 :         if (database_ctype_is_c)
     304                 :         {
     305                 :             /*
     306                 :              * char2wchar doesn't work for C-locale and sizeof(pg_wchar) could
     307                 :              * be different from sizeof(wchar_t)
     308                 :              */
     309             787 :             prs->pgwstr = (pg_wchar *) palloc(sizeof(pg_wchar) * (prs->lenstr + 1));
     310             787 :             pg_mb2wchar_with_len(prs->str, prs->pgwstr, prs->lenstr);
     311                 :         }
     312                 :         else
     313                 :         {
     314            1578 :             prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * (prs->lenstr + 1));
     315            1578 :             char2wchar(prs->wstr, prs->lenstr + 1, prs->str, prs->lenstr,
     316                 :                        mylocale);
     317                 :         }
     318                 :     }
     319                 :     else
     320 UBC           0 :         prs->usewide = false;
     321                 : 
     322 CBC        2365 :     prs->state = newTParserPosition(NULL);
     323            2365 :     prs->state->state = TPS_Base;
     324                 : 
     325                 : #ifdef WPARSER_TRACE
     326                 :     fprintf(stderr, "parsing \"%.*s\"\n", len, str);
     327                 : #endif
     328                 : 
     329            2365 :     return prs;
     330                 : }
     331                 : 
     332                 : /*
     333                 :  * As an alternative to a full TParserInit one can create a
     334                 :  * TParserCopy which basically is a regular TParser without a private
     335                 :  * copy of the string - instead it uses the one from another TParser.
     336                 :  * This is useful because at some places TParsers are created
     337                 :  * recursively and the repeated copying around of the strings can
     338                 :  * cause major inefficiency if the source string is long.
     339                 :  * The new parser starts parsing at the original's current position.
     340                 :  *
     341                 :  * Obviously one must not close the original TParser before the copy.
     342                 :  */
     343                 : static TParser *
     344             120 : TParserCopyInit(const TParser *orig)
     345                 : {
     346             120 :     TParser    *prs = (TParser *) palloc0(sizeof(TParser));
     347                 : 
     348             120 :     prs->charmaxlen = orig->charmaxlen;
     349             120 :     prs->str = orig->str + orig->state->posbyte;
     350             120 :     prs->lenstr = orig->lenstr - orig->state->posbyte;
     351             120 :     prs->usewide = orig->usewide;
     352                 : 
     353             120 :     if (orig->pgwstr)
     354              40 :         prs->pgwstr = orig->pgwstr + orig->state->poschar;
     355             120 :     if (orig->wstr)
     356              80 :         prs->wstr = orig->wstr + orig->state->poschar;
     357                 : 
     358             120 :     prs->state = newTParserPosition(NULL);
     359             120 :     prs->state->state = TPS_Base;
     360                 : 
     361                 : #ifdef WPARSER_TRACE
     362                 :     fprintf(stderr, "parsing copy of \"%.*s\"\n", prs->lenstr, prs->str);
     363                 : #endif
     364                 : 
     365             120 :     return prs;
     366                 : }
     367                 : 
     368                 : 
     369                 : static void
     370            2365 : TParserClose(TParser *prs)
     371                 : {
     372            4730 :     while (prs->state)
     373                 :     {
     374            2365 :         TParserPosition *ptr = prs->state->prev;
     375                 : 
     376            2365 :         pfree(prs->state);
     377            2365 :         prs->state = ptr;
     378                 :     }
     379                 : 
     380            2365 :     if (prs->wstr)
     381            1578 :         pfree(prs->wstr);
     382            2365 :     if (prs->pgwstr)
     383             787 :         pfree(prs->pgwstr);
     384                 : 
     385                 : #ifdef WPARSER_TRACE
     386                 :     fprintf(stderr, "closing parser\n");
     387                 : #endif
     388            2365 :     pfree(prs);
     389            2365 : }
     390                 : 
     391                 : /*
     392                 :  * Close a parser created with TParserCopyInit
     393                 :  */
     394                 : static void
     395             120 : TParserCopyClose(TParser *prs)
     396                 : {
     397             306 :     while (prs->state)
     398                 :     {
     399             186 :         TParserPosition *ptr = prs->state->prev;
     400                 : 
     401             186 :         pfree(prs->state);
     402             186 :         prs->state = ptr;
     403                 :     }
     404                 : 
     405                 : #ifdef WPARSER_TRACE
     406                 :     fprintf(stderr, "closing parser copy\n");
     407                 : #endif
     408             120 :     pfree(prs);
     409             120 : }
     410                 : 
     411                 : 
     412                 : /*
     413                 :  * Character-type support functions, equivalent to is* macros, but
     414                 :  * working with any possible encodings and locales. Notes:
     415                 :  *  - with multibyte encoding and C-locale isw* function may fail
     416                 :  *    or give wrong result.
     417                 :  *  - multibyte encoding and C-locale often are used for
     418                 :  *    Asian languages.
     419                 :  *  - if locale is C then we use pgwstr instead of wstr.
     420                 :  */
     421                 : 
     422                 : #define p_iswhat(type, nonascii)                                            \
     423                 :                                                                             \
     424                 : static int                                                                  \
     425                 : p_is##type(TParser *prs)                                                    \
     426                 : {                                                                           \
     427                 :     Assert(prs->state);                                                      \
     428                 :     if (prs->usewide)                                                        \
     429                 :     {                                                                       \
     430                 :         if (prs->pgwstr)                                                 \
     431                 :         {                                                                   \
     432                 :             unsigned int c = *(prs->pgwstr + prs->state->poschar);         \
     433                 :             if (c > 0x7f)                                                    \
     434                 :                 return nonascii;                                            \
     435                 :             return is##type(c);                                             \
     436                 :         }                                                                   \
     437                 :         return isw##type(*(prs->wstr + prs->state->poschar));              \
     438                 :     }                                                                       \
     439                 :     return is##type(*(unsigned char *) (prs->str + prs->state->posbyte));  \
     440                 : }                                                                           \
     441                 :                                                                             \
     442                 : static int                                                                  \
     443                 : p_isnot##type(TParser *prs)                                                 \
     444                 : {                                                                           \
     445                 :     return !p_is##type(prs);                                                \
     446                 : }
     447                 : 
     448                 : /*
     449                 :  * In C locale with a multibyte encoding, any non-ASCII symbol is considered
     450                 :  * an alpha character, but not a member of other char classes.
     451                 :  */
     452           12561 : p_iswhat(alnum, 1)
     453           46886 : p_iswhat(alpha, 1)
     454           18566 : p_iswhat(digit, 0)
     455 UBC           0 : p_iswhat(lower, 0)
     456               0 : p_iswhat(print, 0)
     457               0 : p_iswhat(punct, 0)
     458 CBC         339 : p_iswhat(space, 0)
     459 UBC           0 : p_iswhat(upper, 0)
     460 CBC           9 : p_iswhat(xdigit, 0)
     461                 : 
     462                 : /* p_iseq should be used only for ascii symbols */
     463                 : 
     464                 : static int
     465          115684 : p_iseq(TParser *prs, char c)
     466                 : {
     467          115684 :     Assert(prs->state);
     468          115684 :     return ((prs->state->charlen == 1 && *(prs->str + prs->state->posbyte) == c)) ? 1 : 0;
     469                 : }
     470                 : 
     471                 : static int
     472           50025 : p_isEOF(TParser *prs)
     473                 : {
     474           50025 :     Assert(prs->state);
     475           50025 :     return (prs->state->posbyte == prs->lenstr || prs->state->charlen == 0) ? 1 : 0;
     476                 : }
     477                 : 
     478                 : static int
     479          115684 : p_iseqC(TParser *prs)
     480                 : {
     481          115684 :     return p_iseq(prs, prs->c);
     482                 : }
     483                 : 
     484                 : static int
     485 UBC           0 : p_isneC(TParser *prs)
     486                 : {
     487               0 :     return !p_iseq(prs, prs->c);
     488                 : }
     489                 : 
     490                 : static int
     491 CBC       36730 : p_isascii(TParser *prs)
     492                 : {
     493           36730 :     return (prs->state->charlen == 1 && isascii((unsigned char) *(prs->str + prs->state->posbyte))) ? 1 : 0;
     494                 : }
     495                 : 
     496                 : static int
     497           36730 : p_isasclet(TParser *prs)
     498                 : {
     499           36730 :     return (p_isascii(prs) && p_isalpha(prs)) ? 1 : 0;
     500                 : }
     501                 : 
     502                 : static int
     503            1329 : p_isurlchar(TParser *prs)
     504                 : {
     505                 :     char        ch;
     506                 : 
     507                 :     /* no non-ASCII need apply */
     508            1329 :     if (prs->state->charlen != 1)
     509 UBC           0 :         return 0;
     510 CBC        1329 :     ch = *(prs->str + prs->state->posbyte);
     511                 :     /* no spaces or control characters */
     512            1329 :     if (ch <= 0x20 || ch >= 0x7F)
     513             117 :         return 0;
     514                 :     /* reject characters disallowed by RFC 3986 */
     515            1212 :     switch (ch)
     516                 :     {
     517              12 :         case '"':
     518                 :         case '<':
     519                 :         case '>':
     520                 :         case '\\':
     521                 :         case '^':
     522                 :         case '`':
     523                 :         case '{':
     524                 :         case '|':
     525                 :         case '}':
     526              12 :             return 0;
     527                 :     }
     528            1200 :     return 1;
     529                 : }
     530                 : 
     531                 : 
     532                 : /* deliberately suppress unused-function complaints for the above */
     533                 : void        _make_compiler_happy(void);
     534                 : void
     535 UBC           0 : _make_compiler_happy(void)
     536                 : {
     537               0 :     p_isalnum(NULL);
     538               0 :     p_isnotalnum(NULL);
     539               0 :     p_isalpha(NULL);
     540               0 :     p_isnotalpha(NULL);
     541               0 :     p_isdigit(NULL);
     542               0 :     p_isnotdigit(NULL);
     543               0 :     p_islower(NULL);
     544               0 :     p_isnotlower(NULL);
     545               0 :     p_isprint(NULL);
     546               0 :     p_isnotprint(NULL);
     547               0 :     p_ispunct(NULL);
     548               0 :     p_isnotpunct(NULL);
     549               0 :     p_isspace(NULL);
     550               0 :     p_isnotspace(NULL);
     551               0 :     p_isupper(NULL);
     552               0 :     p_isnotupper(NULL);
     553               0 :     p_isxdigit(NULL);
     554               0 :     p_isnotxdigit(NULL);
     555               0 :     p_isEOF(NULL);
     556               0 :     p_iseqC(NULL);
     557               0 :     p_isneC(NULL);
     558               0 : }
     559                 : 
     560                 : 
     561                 : static void
     562 CBC         126 : SpecialTags(TParser *prs)
     563                 : {
     564             126 :     switch (prs->state->lenchartoken)
     565                 :     {
     566               3 :         case 8:                 /* </script */
     567               3 :             if (pg_strncasecmp(prs->token, "</script", 8) == 0)
     568               3 :                 prs->ignore = false;
     569               3 :             break;
     570              12 :         case 7:                 /* <script || </style */
     571              12 :             if (pg_strncasecmp(prs->token, "</style", 7) == 0)
     572 UBC           0 :                 prs->ignore = false;
     573 CBC          12 :             else if (pg_strncasecmp(prs->token, "<script", 7) == 0)
     574               3 :                 prs->ignore = true;
     575              12 :             break;
     576               9 :         case 6:                 /* <style */
     577               9 :             if (pg_strncasecmp(prs->token, "<style", 6) == 0)
     578 UBC           0 :                 prs->ignore = true;
     579 CBC           9 :             break;
     580             102 :         default:
     581             102 :             break;
     582                 :     }
     583             126 : }
     584                 : 
     585                 : static void
     586              66 : SpecialFURL(TParser *prs)
     587                 : {
     588              66 :     prs->wanthost = true;
     589              66 :     prs->state->posbyte -= prs->state->lenbytetoken;
     590              66 :     prs->state->poschar -= prs->state->lenchartoken;
     591              66 : }
     592                 : 
     593                 : static void
     594              18 : SpecialHyphen(TParser *prs)
     595                 : {
     596              18 :     prs->state->posbyte -= prs->state->lenbytetoken;
     597              18 :     prs->state->poschar -= prs->state->lenchartoken;
     598              18 : }
     599                 : 
     600                 : static void
     601 UBC           0 : SpecialVerVersion(TParser *prs)
     602                 : {
     603               0 :     prs->state->posbyte -= prs->state->lenbytetoken;
     604               0 :     prs->state->poschar -= prs->state->lenchartoken;
     605               0 :     prs->state->lenbytetoken = 0;
     606               0 :     prs->state->lenchartoken = 0;
     607               0 : }
     608                 : 
     609                 : static int
     610 CBC         240 : p_isstophost(TParser *prs)
     611                 : {
     612             240 :     if (prs->wanthost)
     613                 :     {
     614             102 :         prs->wanthost = false;
     615             102 :         return 1;
     616                 :     }
     617             138 :     return 0;
     618                 : }
     619                 : 
     620                 : static int
     621           18031 : p_isignore(TParser *prs)
     622                 : {
     623           18031 :     return (prs->ignore) ? 1 : 0;
     624                 : }
     625                 : 
     626                 : static int
     627              45 : p_ishost(TParser *prs)
     628                 : {
     629              45 :     TParser    *tmpprs = TParserCopyInit(prs);
     630              45 :     int         res = 0;
     631                 : 
     632              45 :     tmpprs->wanthost = true;
     633                 : 
     634              45 :     if (TParserGet(tmpprs) && tmpprs->type == HOST)
     635                 :     {
     636              36 :         prs->state->posbyte += tmpprs->lenbytetoken;
     637              36 :         prs->state->poschar += tmpprs->lenchartoken;
     638              36 :         prs->state->lenbytetoken += tmpprs->lenbytetoken;
     639              36 :         prs->state->lenchartoken += tmpprs->lenchartoken;
     640              36 :         prs->state->charlen = tmpprs->state->charlen;
     641              36 :         res = 1;
     642                 :     }
     643              45 :     TParserCopyClose(tmpprs);
     644                 : 
     645              45 :     return res;
     646                 : }
     647                 : 
     648                 : static int
     649              75 : p_isURLPath(TParser *prs)
     650                 : {
     651              75 :     TParser    *tmpprs = TParserCopyInit(prs);
     652              75 :     int         res = 0;
     653                 : 
     654              75 :     tmpprs->state = newTParserPosition(tmpprs->state);
     655              75 :     tmpprs->state->state = TPS_InURLPathFirst;
     656                 : 
     657              75 :     if (TParserGet(tmpprs) && tmpprs->type == URLPATH)
     658                 :     {
     659              66 :         prs->state->posbyte += tmpprs->lenbytetoken;
     660              66 :         prs->state->poschar += tmpprs->lenchartoken;
     661              66 :         prs->state->lenbytetoken += tmpprs->lenbytetoken;
     662              66 :         prs->state->lenchartoken += tmpprs->lenchartoken;
     663              66 :         prs->state->charlen = tmpprs->state->charlen;
     664              66 :         res = 1;
     665                 :     }
     666              75 :     TParserCopyClose(tmpprs);
     667                 : 
     668              75 :     return res;
     669                 : }
     670                 : 
     671                 : /*
     672                 :  * returns true if current character has zero display length or
     673                 :  * it's a special sign in several languages. Such characters
     674                 :  * aren't a word-breaker although they aren't an isalpha.
     675                 :  * In beginning of word they aren't a part of it.
     676                 :  */
     677                 : static int
     678            4362 : p_isspecial(TParser *prs)
     679                 : {
     680                 :     /*
     681                 :      * pg_dsplen could return -1 which means error or control character
     682                 :      */
     683            4362 :     if (pg_dsplen(prs->str + prs->state->posbyte) == 0)
     684 UBC           0 :         return 1;
     685                 : 
     686                 :     /*
     687                 :      * Unicode Characters in the 'Mark, Spacing Combining' Category That
     688                 :      * characters are not alpha although they are not breakers of word too.
     689                 :      * Check that only in utf encoding, because other encodings aren't
     690                 :      * supported by postgres or even exists.
     691                 :      */
     692 CBC        4362 :     if (GetDatabaseEncoding() == PG_UTF8 && prs->usewide)
     693                 :     {
     694                 :         static const pg_wchar strange_letter[] = {
     695                 :             /*
     696                 :              * use binary search, so elements should be ordered
     697                 :              */
     698                 :             0x0903,             /* DEVANAGARI SIGN VISARGA */
     699                 :             0x093E,             /* DEVANAGARI VOWEL SIGN AA */
     700                 :             0x093F,             /* DEVANAGARI VOWEL SIGN I */
     701                 :             0x0940,             /* DEVANAGARI VOWEL SIGN II */
     702                 :             0x0949,             /* DEVANAGARI VOWEL SIGN CANDRA O */
     703                 :             0x094A,             /* DEVANAGARI VOWEL SIGN SHORT O */
     704                 :             0x094B,             /* DEVANAGARI VOWEL SIGN O */
     705                 :             0x094C,             /* DEVANAGARI VOWEL SIGN AU */
     706                 :             0x0982,             /* BENGALI SIGN ANUSVARA */
     707                 :             0x0983,             /* BENGALI SIGN VISARGA */
     708                 :             0x09BE,             /* BENGALI VOWEL SIGN AA */
     709                 :             0x09BF,             /* BENGALI VOWEL SIGN I */
     710                 :             0x09C0,             /* BENGALI VOWEL SIGN II */
     711                 :             0x09C7,             /* BENGALI VOWEL SIGN E */
     712                 :             0x09C8,             /* BENGALI VOWEL SIGN AI */
     713                 :             0x09CB,             /* BENGALI VOWEL SIGN O */
     714                 :             0x09CC,             /* BENGALI VOWEL SIGN AU */
     715                 :             0x09D7,             /* BENGALI AU LENGTH MARK */
     716                 :             0x0A03,             /* GURMUKHI SIGN VISARGA */
     717                 :             0x0A3E,             /* GURMUKHI VOWEL SIGN AA */
     718                 :             0x0A3F,             /* GURMUKHI VOWEL SIGN I */
     719                 :             0x0A40,             /* GURMUKHI VOWEL SIGN II */
     720                 :             0x0A83,             /* GUJARATI SIGN VISARGA */
     721                 :             0x0ABE,             /* GUJARATI VOWEL SIGN AA */
     722                 :             0x0ABF,             /* GUJARATI VOWEL SIGN I */
     723                 :             0x0AC0,             /* GUJARATI VOWEL SIGN II */
     724                 :             0x0AC9,             /* GUJARATI VOWEL SIGN CANDRA O */
     725                 :             0x0ACB,             /* GUJARATI VOWEL SIGN O */
     726                 :             0x0ACC,             /* GUJARATI VOWEL SIGN AU */
     727                 :             0x0B02,             /* ORIYA SIGN ANUSVARA */
     728                 :             0x0B03,             /* ORIYA SIGN VISARGA */
     729                 :             0x0B3E,             /* ORIYA VOWEL SIGN AA */
     730                 :             0x0B40,             /* ORIYA VOWEL SIGN II */
     731                 :             0x0B47,             /* ORIYA VOWEL SIGN E */
     732                 :             0x0B48,             /* ORIYA VOWEL SIGN AI */
     733                 :             0x0B4B,             /* ORIYA VOWEL SIGN O */
     734                 :             0x0B4C,             /* ORIYA VOWEL SIGN AU */
     735                 :             0x0B57,             /* ORIYA AU LENGTH MARK */
     736                 :             0x0BBE,             /* TAMIL VOWEL SIGN AA */
     737                 :             0x0BBF,             /* TAMIL VOWEL SIGN I */
     738                 :             0x0BC1,             /* TAMIL VOWEL SIGN U */
     739                 :             0x0BC2,             /* TAMIL VOWEL SIGN UU */
     740                 :             0x0BC6,             /* TAMIL VOWEL SIGN E */
     741                 :             0x0BC7,             /* TAMIL VOWEL SIGN EE */
     742                 :             0x0BC8,             /* TAMIL VOWEL SIGN AI */
     743                 :             0x0BCA,             /* TAMIL VOWEL SIGN O */
     744                 :             0x0BCB,             /* TAMIL VOWEL SIGN OO */
     745                 :             0x0BCC,             /* TAMIL VOWEL SIGN AU */
     746                 :             0x0BD7,             /* TAMIL AU LENGTH MARK */
     747                 :             0x0C01,             /* TELUGU SIGN CANDRABINDU */
     748                 :             0x0C02,             /* TELUGU SIGN ANUSVARA */
     749                 :             0x0C03,             /* TELUGU SIGN VISARGA */
     750                 :             0x0C41,             /* TELUGU VOWEL SIGN U */
     751                 :             0x0C42,             /* TELUGU VOWEL SIGN UU */
     752                 :             0x0C43,             /* TELUGU VOWEL SIGN VOCALIC R */
     753                 :             0x0C44,             /* TELUGU VOWEL SIGN VOCALIC RR */
     754                 :             0x0C82,             /* KANNADA SIGN ANUSVARA */
     755                 :             0x0C83,             /* KANNADA SIGN VISARGA */
     756                 :             0x0CBE,             /* KANNADA VOWEL SIGN AA */
     757                 :             0x0CC0,             /* KANNADA VOWEL SIGN II */
     758                 :             0x0CC1,             /* KANNADA VOWEL SIGN U */
     759                 :             0x0CC2,             /* KANNADA VOWEL SIGN UU */
     760                 :             0x0CC3,             /* KANNADA VOWEL SIGN VOCALIC R */
     761                 :             0x0CC4,             /* KANNADA VOWEL SIGN VOCALIC RR */
     762                 :             0x0CC7,             /* KANNADA VOWEL SIGN EE */
     763                 :             0x0CC8,             /* KANNADA VOWEL SIGN AI */
     764                 :             0x0CCA,             /* KANNADA VOWEL SIGN O */
     765                 :             0x0CCB,             /* KANNADA VOWEL SIGN OO */
     766                 :             0x0CD5,             /* KANNADA LENGTH MARK */
     767                 :             0x0CD6,             /* KANNADA AI LENGTH MARK */
     768                 :             0x0D02,             /* MALAYALAM SIGN ANUSVARA */
     769                 :             0x0D03,             /* MALAYALAM SIGN VISARGA */
     770                 :             0x0D3E,             /* MALAYALAM VOWEL SIGN AA */
     771                 :             0x0D3F,             /* MALAYALAM VOWEL SIGN I */
     772                 :             0x0D40,             /* MALAYALAM VOWEL SIGN II */
     773                 :             0x0D46,             /* MALAYALAM VOWEL SIGN E */
     774                 :             0x0D47,             /* MALAYALAM VOWEL SIGN EE */
     775                 :             0x0D48,             /* MALAYALAM VOWEL SIGN AI */
     776                 :             0x0D4A,             /* MALAYALAM VOWEL SIGN O */
     777                 :             0x0D4B,             /* MALAYALAM VOWEL SIGN OO */
     778                 :             0x0D4C,             /* MALAYALAM VOWEL SIGN AU */
     779                 :             0x0D57,             /* MALAYALAM AU LENGTH MARK */
     780                 :             0x0D82,             /* SINHALA SIGN ANUSVARAYA */
     781                 :             0x0D83,             /* SINHALA SIGN VISARGAYA */
     782                 :             0x0DCF,             /* SINHALA VOWEL SIGN AELA-PILLA */
     783                 :             0x0DD0,             /* SINHALA VOWEL SIGN KETTI AEDA-PILLA */
     784                 :             0x0DD1,             /* SINHALA VOWEL SIGN DIGA AEDA-PILLA */
     785                 :             0x0DD8,             /* SINHALA VOWEL SIGN GAETTA-PILLA */
     786                 :             0x0DD9,             /* SINHALA VOWEL SIGN KOMBUVA */
     787                 :             0x0DDA,             /* SINHALA VOWEL SIGN DIGA KOMBUVA */
     788                 :             0x0DDB,             /* SINHALA VOWEL SIGN KOMBU DEKA */
     789                 :             0x0DDC,             /* SINHALA VOWEL SIGN KOMBUVA HAA AELA-PILLA */
     790                 :             0x0DDD,             /* SINHALA VOWEL SIGN KOMBUVA HAA DIGA
     791                 :                                  * AELA-PILLA */
     792                 :             0x0DDE,             /* SINHALA VOWEL SIGN KOMBUVA HAA GAYANUKITTA */
     793                 :             0x0DDF,             /* SINHALA VOWEL SIGN GAYANUKITTA */
     794                 :             0x0DF2,             /* SINHALA VOWEL SIGN DIGA GAETTA-PILLA */
     795                 :             0x0DF3,             /* SINHALA VOWEL SIGN DIGA GAYANUKITTA */
     796                 :             0x0F3E,             /* TIBETAN SIGN YAR TSHES */
     797                 :             0x0F3F,             /* TIBETAN SIGN MAR TSHES */
     798                 :             0x0F7F,             /* TIBETAN SIGN RNAM BCAD */
     799                 :             0x102B,             /* MYANMAR VOWEL SIGN TALL AA */
     800                 :             0x102C,             /* MYANMAR VOWEL SIGN AA */
     801                 :             0x1031,             /* MYANMAR VOWEL SIGN E */
     802                 :             0x1038,             /* MYANMAR SIGN VISARGA */
     803                 :             0x103B,             /* MYANMAR CONSONANT SIGN MEDIAL YA */
     804                 :             0x103C,             /* MYANMAR CONSONANT SIGN MEDIAL RA */
     805                 :             0x1056,             /* MYANMAR VOWEL SIGN VOCALIC R */
     806                 :             0x1057,             /* MYANMAR VOWEL SIGN VOCALIC RR */
     807                 :             0x1062,             /* MYANMAR VOWEL SIGN SGAW KAREN EU */
     808                 :             0x1063,             /* MYANMAR TONE MARK SGAW KAREN HATHI */
     809                 :             0x1064,             /* MYANMAR TONE MARK SGAW KAREN KE PHO */
     810                 :             0x1067,             /* MYANMAR VOWEL SIGN WESTERN PWO KAREN EU */
     811                 :             0x1068,             /* MYANMAR VOWEL SIGN WESTERN PWO KAREN UE */
     812                 :             0x1069,             /* MYANMAR SIGN WESTERN PWO KAREN TONE-1 */
     813                 :             0x106A,             /* MYANMAR SIGN WESTERN PWO KAREN TONE-2 */
     814                 :             0x106B,             /* MYANMAR SIGN WESTERN PWO KAREN TONE-3 */
     815                 :             0x106C,             /* MYANMAR SIGN WESTERN PWO KAREN TONE-4 */
     816                 :             0x106D,             /* MYANMAR SIGN WESTERN PWO KAREN TONE-5 */
     817                 :             0x1083,             /* MYANMAR VOWEL SIGN SHAN AA */
     818                 :             0x1084,             /* MYANMAR VOWEL SIGN SHAN E */
     819                 :             0x1087,             /* MYANMAR SIGN SHAN TONE-2 */
     820                 :             0x1088,             /* MYANMAR SIGN SHAN TONE-3 */
     821                 :             0x1089,             /* MYANMAR SIGN SHAN TONE-5 */
     822                 :             0x108A,             /* MYANMAR SIGN SHAN TONE-6 */
     823                 :             0x108B,             /* MYANMAR SIGN SHAN COUNCIL TONE-2 */
     824                 :             0x108C,             /* MYANMAR SIGN SHAN COUNCIL TONE-3 */
     825                 :             0x108F,             /* MYANMAR SIGN RUMAI PALAUNG TONE-5 */
     826                 :             0x17B6,             /* KHMER VOWEL SIGN AA */
     827                 :             0x17BE,             /* KHMER VOWEL SIGN OE */
     828                 :             0x17BF,             /* KHMER VOWEL SIGN YA */
     829                 :             0x17C0,             /* KHMER VOWEL SIGN IE */
     830                 :             0x17C1,             /* KHMER VOWEL SIGN E */
     831                 :             0x17C2,             /* KHMER VOWEL SIGN AE */
     832                 :             0x17C3,             /* KHMER VOWEL SIGN AI */
     833                 :             0x17C4,             /* KHMER VOWEL SIGN OO */
     834                 :             0x17C5,             /* KHMER VOWEL SIGN AU */
     835                 :             0x17C7,             /* KHMER SIGN REAHMUK */
     836                 :             0x17C8,             /* KHMER SIGN YUUKALEAPINTU */
     837                 :             0x1923,             /* LIMBU VOWEL SIGN EE */
     838                 :             0x1924,             /* LIMBU VOWEL SIGN AI */
     839                 :             0x1925,             /* LIMBU VOWEL SIGN OO */
     840                 :             0x1926,             /* LIMBU VOWEL SIGN AU */
     841                 :             0x1929,             /* LIMBU SUBJOINED LETTER YA */
     842                 :             0x192A,             /* LIMBU SUBJOINED LETTER RA */
     843                 :             0x192B,             /* LIMBU SUBJOINED LETTER WA */
     844                 :             0x1930,             /* LIMBU SMALL LETTER KA */
     845                 :             0x1931,             /* LIMBU SMALL LETTER NGA */
     846                 :             0x1933,             /* LIMBU SMALL LETTER TA */
     847                 :             0x1934,             /* LIMBU SMALL LETTER NA */
     848                 :             0x1935,             /* LIMBU SMALL LETTER PA */
     849                 :             0x1936,             /* LIMBU SMALL LETTER MA */
     850                 :             0x1937,             /* LIMBU SMALL LETTER RA */
     851                 :             0x1938,             /* LIMBU SMALL LETTER LA */
     852                 :             0x19B0,             /* NEW TAI LUE VOWEL SIGN VOWEL SHORTENER */
     853                 :             0x19B1,             /* NEW TAI LUE VOWEL SIGN AA */
     854                 :             0x19B2,             /* NEW TAI LUE VOWEL SIGN II */
     855                 :             0x19B3,             /* NEW TAI LUE VOWEL SIGN U */
     856                 :             0x19B4,             /* NEW TAI LUE VOWEL SIGN UU */
     857                 :             0x19B5,             /* NEW TAI LUE VOWEL SIGN E */
     858                 :             0x19B6,             /* NEW TAI LUE VOWEL SIGN AE */
     859                 :             0x19B7,             /* NEW TAI LUE VOWEL SIGN O */
     860                 :             0x19B8,             /* NEW TAI LUE VOWEL SIGN OA */
     861                 :             0x19B9,             /* NEW TAI LUE VOWEL SIGN UE */
     862                 :             0x19BA,             /* NEW TAI LUE VOWEL SIGN AY */
     863                 :             0x19BB,             /* NEW TAI LUE VOWEL SIGN AAY */
     864                 :             0x19BC,             /* NEW TAI LUE VOWEL SIGN UY */
     865                 :             0x19BD,             /* NEW TAI LUE VOWEL SIGN OY */
     866                 :             0x19BE,             /* NEW TAI LUE VOWEL SIGN OAY */
     867                 :             0x19BF,             /* NEW TAI LUE VOWEL SIGN UEY */
     868                 :             0x19C0,             /* NEW TAI LUE VOWEL SIGN IY */
     869                 :             0x19C8,             /* NEW TAI LUE TONE MARK-1 */
     870                 :             0x19C9,             /* NEW TAI LUE TONE MARK-2 */
     871                 :             0x1A19,             /* BUGINESE VOWEL SIGN E */
     872                 :             0x1A1A,             /* BUGINESE VOWEL SIGN O */
     873                 :             0x1A1B,             /* BUGINESE VOWEL SIGN AE */
     874                 :             0x1B04,             /* BALINESE SIGN BISAH */
     875                 :             0x1B35,             /* BALINESE VOWEL SIGN TEDUNG */
     876                 :             0x1B3B,             /* BALINESE VOWEL SIGN RA REPA TEDUNG */
     877                 :             0x1B3D,             /* BALINESE VOWEL SIGN LA LENGA TEDUNG */
     878                 :             0x1B3E,             /* BALINESE VOWEL SIGN TALING */
     879                 :             0x1B3F,             /* BALINESE VOWEL SIGN TALING REPA */
     880                 :             0x1B40,             /* BALINESE VOWEL SIGN TALING TEDUNG */
     881                 :             0x1B41,             /* BALINESE VOWEL SIGN TALING REPA TEDUNG */
     882                 :             0x1B43,             /* BALINESE VOWEL SIGN PEPET TEDUNG */
     883                 :             0x1B44,             /* BALINESE ADEG ADEG */
     884                 :             0x1B82,             /* SUNDANESE SIGN PANGWISAD */
     885                 :             0x1BA1,             /* SUNDANESE CONSONANT SIGN PAMINGKAL */
     886                 :             0x1BA6,             /* SUNDANESE VOWEL SIGN PANAELAENG */
     887                 :             0x1BA7,             /* SUNDANESE VOWEL SIGN PANOLONG */
     888                 :             0x1BAA,             /* SUNDANESE SIGN PAMAAEH */
     889                 :             0x1C24,             /* LEPCHA SUBJOINED LETTER YA */
     890                 :             0x1C25,             /* LEPCHA SUBJOINED LETTER RA */
     891                 :             0x1C26,             /* LEPCHA VOWEL SIGN AA */
     892                 :             0x1C27,             /* LEPCHA VOWEL SIGN I */
     893                 :             0x1C28,             /* LEPCHA VOWEL SIGN O */
     894                 :             0x1C29,             /* LEPCHA VOWEL SIGN OO */
     895                 :             0x1C2A,             /* LEPCHA VOWEL SIGN U */
     896                 :             0x1C2B,             /* LEPCHA VOWEL SIGN UU */
     897                 :             0x1C34,             /* LEPCHA CONSONANT SIGN NYIN-DO */
     898                 :             0x1C35,             /* LEPCHA CONSONANT SIGN KANG */
     899                 :             0xA823,             /* SYLOTI NAGRI VOWEL SIGN A */
     900                 :             0xA824,             /* SYLOTI NAGRI VOWEL SIGN I */
     901                 :             0xA827,             /* SYLOTI NAGRI VOWEL SIGN OO */
     902                 :             0xA880,             /* SAURASHTRA SIGN ANUSVARA */
     903                 :             0xA881,             /* SAURASHTRA SIGN VISARGA */
     904                 :             0xA8B4,             /* SAURASHTRA CONSONANT SIGN HAARU */
     905                 :             0xA8B5,             /* SAURASHTRA VOWEL SIGN AA */
     906                 :             0xA8B6,             /* SAURASHTRA VOWEL SIGN I */
     907                 :             0xA8B7,             /* SAURASHTRA VOWEL SIGN II */
     908                 :             0xA8B8,             /* SAURASHTRA VOWEL SIGN U */
     909                 :             0xA8B9,             /* SAURASHTRA VOWEL SIGN UU */
     910                 :             0xA8BA,             /* SAURASHTRA VOWEL SIGN VOCALIC R */
     911                 :             0xA8BB,             /* SAURASHTRA VOWEL SIGN VOCALIC RR */
     912                 :             0xA8BC,             /* SAURASHTRA VOWEL SIGN VOCALIC L */
     913                 :             0xA8BD,             /* SAURASHTRA VOWEL SIGN VOCALIC LL */
     914                 :             0xA8BE,             /* SAURASHTRA VOWEL SIGN E */
     915                 :             0xA8BF,             /* SAURASHTRA VOWEL SIGN EE */
     916                 :             0xA8C0,             /* SAURASHTRA VOWEL SIGN AI */
     917                 :             0xA8C1,             /* SAURASHTRA VOWEL SIGN O */
     918                 :             0xA8C2,             /* SAURASHTRA VOWEL SIGN OO */
     919                 :             0xA8C3,             /* SAURASHTRA VOWEL SIGN AU */
     920                 :             0xA952,             /* REJANG CONSONANT SIGN H */
     921                 :             0xA953,             /* REJANG VIRAMA */
     922                 :             0xAA2F,             /* CHAM VOWEL SIGN O */
     923                 :             0xAA30,             /* CHAM VOWEL SIGN AI */
     924                 :             0xAA33,             /* CHAM CONSONANT SIGN YA */
     925                 :             0xAA34,             /* CHAM CONSONANT SIGN RA */
     926                 :             0xAA4D              /* CHAM CONSONANT SIGN FINAL H */
     927                 :         };
     928            4362 :         const pg_wchar *StopLow = strange_letter,
     929            4362 :                    *StopHigh = strange_letter + lengthof(strange_letter),
     930                 :                    *StopMiddle;
     931                 :         pg_wchar    c;
     932                 : 
     933            4362 :         if (prs->pgwstr)
     934            1454 :             c = *(prs->pgwstr + prs->state->poschar);
     935                 :         else
     936            2908 :             c = (pg_wchar) *(prs->wstr + prs->state->poschar);
     937                 : 
     938           39258 :         while (StopLow < StopHigh)
     939                 :         {
     940           34896 :             StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
     941           34896 :             if (*StopMiddle == c)
     942 UBC           0 :                 return 1;
     943 CBC       34896 :             else if (*StopMiddle < c)
     944 UBC           0 :                 StopLow = StopMiddle + 1;
     945                 :             else
     946 CBC       34896 :                 StopHigh = StopMiddle;
     947                 :         }
     948                 :     }
     949                 : 
     950            4362 :     return 0;
     951                 : }
     952                 : 
     953                 : /*
     954                 :  * Table of state/action of parser
     955                 :  */
     956                 : 
     957                 : static const TParserStateActionItem actionTPS_Base[] = {
     958                 :     {p_isEOF, 0, A_NEXT, TPS_Null, 0, NULL},
     959                 :     {p_iseqC, '<', A_PUSH, TPS_InTagFirst, 0, NULL},
     960                 :     {p_isignore, 0, A_NEXT, TPS_InSpace, 0, NULL},
     961                 :     {p_isasclet, 0, A_NEXT, TPS_InAsciiWord, 0, NULL},
     962                 :     {p_isalpha, 0, A_NEXT, TPS_InWord, 0, NULL},
     963                 :     {p_isdigit, 0, A_NEXT, TPS_InUnsignedInt, 0, NULL},
     964                 :     {p_iseqC, '-', A_PUSH, TPS_InSignedIntFirst, 0, NULL},
     965                 :     {p_iseqC, '+', A_PUSH, TPS_InSignedIntFirst, 0, NULL},
     966                 :     {p_iseqC, '&', A_PUSH, TPS_InXMLEntityFirst, 0, NULL},
     967                 :     {p_iseqC, '~', A_PUSH, TPS_InFileTwiddle, 0, NULL},
     968                 :     {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
     969                 :     {p_iseqC, '.', A_PUSH, TPS_InPathFirstFirst, 0, NULL},
     970                 :     {NULL, 0, A_NEXT, TPS_InSpace, 0, NULL}
     971                 : };
     972                 : 
     973                 : 
     974                 : static const TParserStateActionItem actionTPS_InNumWord[] = {
     975                 :     {p_isEOF, 0, A_BINGO, TPS_Base, NUMWORD, NULL},
     976                 :     {p_isalnum, 0, A_NEXT, TPS_InNumWord, 0, NULL},
     977                 :     {p_isspecial, 0, A_NEXT, TPS_InNumWord, 0, NULL},
     978                 :     {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
     979                 :     {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
     980                 :     {p_iseqC, '.', A_PUSH, TPS_InFileNext, 0, NULL},
     981                 :     {p_iseqC, '-', A_PUSH, TPS_InHyphenNumWordFirst, 0, NULL},
     982                 :     {NULL, 0, A_BINGO, TPS_Base, NUMWORD, NULL}
     983                 : };
     984                 : 
     985                 : static const TParserStateActionItem actionTPS_InAsciiWord[] = {
     986                 :     {p_isEOF, 0, A_BINGO, TPS_Base, ASCIIWORD, NULL},
     987                 :     {p_isasclet, 0, A_NEXT, TPS_Null, 0, NULL},
     988                 :     {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomain, 0, NULL},
     989                 :     {p_iseqC, '.', A_PUSH, TPS_InFileNext, 0, NULL},
     990                 :     {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
     991                 :     {p_iseqC, '-', A_PUSH, TPS_InHyphenAsciiWordFirst, 0, NULL},
     992                 :     {p_iseqC, '_', A_PUSH, TPS_InHostFirstAN, 0, NULL},
     993                 :     {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
     994                 :     {p_iseqC, ':', A_PUSH, TPS_InProtocolFirst, 0, NULL},
     995                 :     {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
     996                 :     {p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL},
     997                 :     {p_isdigit, 0, A_NEXT, TPS_InNumWord, 0, NULL},
     998                 :     {p_isalpha, 0, A_NEXT, TPS_InWord, 0, NULL},
     999                 :     {p_isspecial, 0, A_NEXT, TPS_InWord, 0, NULL},
    1000                 :     {NULL, 0, A_BINGO, TPS_Base, ASCIIWORD, NULL}
    1001                 : };
    1002                 : 
    1003                 : static const TParserStateActionItem actionTPS_InWord[] = {
    1004                 :     {p_isEOF, 0, A_BINGO, TPS_Base, WORD_T, NULL},
    1005                 :     {p_isalpha, 0, A_NEXT, TPS_Null, 0, NULL},
    1006                 :     {p_isspecial, 0, A_NEXT, TPS_Null, 0, NULL},
    1007                 :     {p_isdigit, 0, A_NEXT, TPS_InNumWord, 0, NULL},
    1008                 :     {p_iseqC, '-', A_PUSH, TPS_InHyphenWordFirst, 0, NULL},
    1009                 :     {NULL, 0, A_BINGO, TPS_Base, WORD_T, NULL}
    1010                 : };
    1011                 : 
    1012                 : static const TParserStateActionItem actionTPS_InUnsignedInt[] = {
    1013                 :     {p_isEOF, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL},
    1014                 :     {p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL},
    1015                 :     {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomain, 0, NULL},
    1016                 :     {p_iseqC, '.', A_PUSH, TPS_InUDecimalFirst, 0, NULL},
    1017                 :     {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
    1018                 :     {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
    1019                 :     {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
    1020                 :     {p_iseqC, '_', A_PUSH, TPS_InHostFirstAN, 0, NULL},
    1021                 :     {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
    1022                 :     {p_isasclet, 0, A_PUSH, TPS_InHost, 0, NULL},
    1023                 :     {p_isalpha, 0, A_NEXT, TPS_InNumWord, 0, NULL},
    1024                 :     {p_isspecial, 0, A_NEXT, TPS_InNumWord, 0, NULL},
    1025                 :     {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
    1026                 :     {NULL, 0, A_BINGO, TPS_Base, UNSIGNEDINT, NULL}
    1027                 : };
    1028                 : 
    1029                 : static const TParserStateActionItem actionTPS_InSignedIntFirst[] = {
    1030                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1031                 :     {p_isdigit, 0, A_NEXT | A_CLEAR, TPS_InSignedInt, 0, NULL},
    1032                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1033                 : };
    1034                 : 
    1035                 : static const TParserStateActionItem actionTPS_InSignedInt[] = {
    1036                 :     {p_isEOF, 0, A_BINGO, TPS_Base, SIGNEDINT, NULL},
    1037                 :     {p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL},
    1038                 :     {p_iseqC, '.', A_PUSH, TPS_InDecimalFirst, 0, NULL},
    1039                 :     {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
    1040                 :     {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
    1041                 :     {NULL, 0, A_BINGO, TPS_Base, SIGNEDINT, NULL}
    1042                 : };
    1043                 : 
    1044                 : static const TParserStateActionItem actionTPS_InSpace[] = {
    1045                 :     {p_isEOF, 0, A_BINGO, TPS_Base, SPACE, NULL},
    1046                 :     {p_iseqC, '<', A_BINGO, TPS_Base, SPACE, NULL},
    1047                 :     {p_isignore, 0, A_NEXT, TPS_Null, 0, NULL},
    1048                 :     {p_iseqC, '-', A_BINGO, TPS_Base, SPACE, NULL},
    1049                 :     {p_iseqC, '+', A_BINGO, TPS_Base, SPACE, NULL},
    1050                 :     {p_iseqC, '&', A_BINGO, TPS_Base, SPACE, NULL},
    1051                 :     {p_iseqC, '/', A_BINGO, TPS_Base, SPACE, NULL},
    1052                 :     {p_isnotalnum, 0, A_NEXT, TPS_InSpace, 0, NULL},
    1053                 :     {NULL, 0, A_BINGO, TPS_Base, SPACE, NULL}
    1054                 : };
    1055                 : 
    1056                 : static const TParserStateActionItem actionTPS_InUDecimalFirst[] = {
    1057                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1058                 :     {p_isdigit, 0, A_CLEAR, TPS_InUDecimal, 0, NULL},
    1059                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1060                 : };
    1061                 : 
    1062                 : static const TParserStateActionItem actionTPS_InUDecimal[] = {
    1063                 :     {p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL_T, NULL},
    1064                 :     {p_isdigit, 0, A_NEXT, TPS_InUDecimal, 0, NULL},
    1065                 :     {p_iseqC, '.', A_PUSH, TPS_InVersionFirst, 0, NULL},
    1066                 :     {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
    1067                 :     {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
    1068                 :     {NULL, 0, A_BINGO, TPS_Base, DECIMAL_T, NULL}
    1069                 : };
    1070                 : 
    1071                 : static const TParserStateActionItem actionTPS_InDecimalFirst[] = {
    1072                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1073                 :     {p_isdigit, 0, A_CLEAR, TPS_InDecimal, 0, NULL},
    1074                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1075                 : };
    1076                 : 
    1077                 : static const TParserStateActionItem actionTPS_InDecimal[] = {
    1078                 :     {p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL_T, NULL},
    1079                 :     {p_isdigit, 0, A_NEXT, TPS_InDecimal, 0, NULL},
    1080                 :     {p_iseqC, '.', A_PUSH, TPS_InVerVersion, 0, NULL},
    1081                 :     {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL},
    1082                 :     {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL},
    1083                 :     {NULL, 0, A_BINGO, TPS_Base, DECIMAL_T, NULL}
    1084                 : };
    1085                 : 
    1086                 : static const TParserStateActionItem actionTPS_InVerVersion[] = {
    1087                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1088                 :     {p_isdigit, 0, A_RERUN, TPS_InSVerVersion, 0, SpecialVerVersion},
    1089                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1090                 : };
    1091                 : 
    1092                 : static const TParserStateActionItem actionTPS_InSVerVersion[] = {
    1093                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1094                 :     {p_isdigit, 0, A_BINGO | A_CLRALL, TPS_InUnsignedInt, SPACE, NULL},
    1095                 :     {NULL, 0, A_NEXT, TPS_Null, 0, NULL}
    1096                 : };
    1097                 : 
    1098                 : 
    1099                 : static const TParserStateActionItem actionTPS_InVersionFirst[] = {
    1100                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1101                 :     {p_isdigit, 0, A_CLEAR, TPS_InVersion, 0, NULL},
    1102                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1103                 : };
    1104                 : 
    1105                 : static const TParserStateActionItem actionTPS_InVersion[] = {
    1106                 :     {p_isEOF, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL},
    1107                 :     {p_isdigit, 0, A_NEXT, TPS_InVersion, 0, NULL},
    1108                 :     {p_iseqC, '.', A_PUSH, TPS_InVersionFirst, 0, NULL},
    1109                 :     {NULL, 0, A_BINGO, TPS_Base, VERSIONNUMBER, NULL}
    1110                 : };
    1111                 : 
    1112                 : static const TParserStateActionItem actionTPS_InMantissaFirst[] = {
    1113                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1114                 :     {p_isdigit, 0, A_CLEAR, TPS_InMantissa, 0, NULL},
    1115                 :     {p_iseqC, '+', A_NEXT, TPS_InMantissaSign, 0, NULL},
    1116                 :     {p_iseqC, '-', A_NEXT, TPS_InMantissaSign, 0, NULL},
    1117                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1118                 : };
    1119                 : 
    1120                 : static const TParserStateActionItem actionTPS_InMantissaSign[] = {
    1121                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1122                 :     {p_isdigit, 0, A_CLEAR, TPS_InMantissa, 0, NULL},
    1123                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1124                 : };
    1125                 : 
    1126                 : static const TParserStateActionItem actionTPS_InMantissa[] = {
    1127                 :     {p_isEOF, 0, A_BINGO, TPS_Base, SCIENTIFIC, NULL},
    1128                 :     {p_isdigit, 0, A_NEXT, TPS_InMantissa, 0, NULL},
    1129                 :     {NULL, 0, A_BINGO, TPS_Base, SCIENTIFIC, NULL}
    1130                 : };
    1131                 : 
    1132                 : static const TParserStateActionItem actionTPS_InXMLEntityFirst[] = {
    1133                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1134                 :     {p_iseqC, '#', A_NEXT, TPS_InXMLEntityNumFirst, 0, NULL},
    1135                 :     {p_isasclet, 0, A_NEXT, TPS_InXMLEntity, 0, NULL},
    1136                 :     {p_iseqC, ':', A_NEXT, TPS_InXMLEntity, 0, NULL},
    1137                 :     {p_iseqC, '_', A_NEXT, TPS_InXMLEntity, 0, NULL},
    1138                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1139                 : };
    1140                 : 
    1141                 : static const TParserStateActionItem actionTPS_InXMLEntity[] = {
    1142                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1143                 :     {p_isalnum, 0, A_NEXT, TPS_InXMLEntity, 0, NULL},
    1144                 :     {p_iseqC, ':', A_NEXT, TPS_InXMLEntity, 0, NULL},
    1145                 :     {p_iseqC, '_', A_NEXT, TPS_InXMLEntity, 0, NULL},
    1146                 :     {p_iseqC, '.', A_NEXT, TPS_InXMLEntity, 0, NULL},
    1147                 :     {p_iseqC, '-', A_NEXT, TPS_InXMLEntity, 0, NULL},
    1148                 :     {p_iseqC, ';', A_NEXT, TPS_InXMLEntityEnd, 0, NULL},
    1149                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1150                 : };
    1151                 : 
    1152                 : static const TParserStateActionItem actionTPS_InXMLEntityNumFirst[] = {
    1153                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1154                 :     {p_iseqC, 'x', A_NEXT, TPS_InXMLEntityHexNumFirst, 0, NULL},
    1155                 :     {p_iseqC, 'X', A_NEXT, TPS_InXMLEntityHexNumFirst, 0, NULL},
    1156                 :     {p_isdigit, 0, A_NEXT, TPS_InXMLEntityNum, 0, NULL},
    1157                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1158                 : };
    1159                 : 
    1160                 : static const TParserStateActionItem actionTPS_InXMLEntityHexNumFirst[] = {
    1161                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1162                 :     {p_isxdigit, 0, A_NEXT, TPS_InXMLEntityHexNum, 0, NULL},
    1163                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1164                 : };
    1165                 : 
    1166                 : static const TParserStateActionItem actionTPS_InXMLEntityNum[] = {
    1167                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1168                 :     {p_isdigit, 0, A_NEXT, TPS_InXMLEntityNum, 0, NULL},
    1169                 :     {p_iseqC, ';', A_NEXT, TPS_InXMLEntityEnd, 0, NULL},
    1170                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1171                 : };
    1172                 : 
    1173                 : static const TParserStateActionItem actionTPS_InXMLEntityHexNum[] = {
    1174                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1175                 :     {p_isxdigit, 0, A_NEXT, TPS_InXMLEntityHexNum, 0, NULL},
    1176                 :     {p_iseqC, ';', A_NEXT, TPS_InXMLEntityEnd, 0, NULL},
    1177                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1178                 : };
    1179                 : 
    1180                 : static const TParserStateActionItem actionTPS_InXMLEntityEnd[] = {
    1181                 :     {NULL, 0, A_BINGO | A_CLEAR, TPS_Base, XMLENTITY, NULL}
    1182                 : };
    1183                 : 
    1184                 : static const TParserStateActionItem actionTPS_InTagFirst[] = {
    1185                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1186                 :     {p_iseqC, '/', A_PUSH, TPS_InTagCloseFirst, 0, NULL},
    1187                 :     {p_iseqC, '!', A_PUSH, TPS_InCommentFirst, 0, NULL},
    1188                 :     {p_iseqC, '?', A_PUSH, TPS_InXMLBegin, 0, NULL},
    1189                 :     {p_isasclet, 0, A_PUSH, TPS_InTagName, 0, NULL},
    1190                 :     {p_iseqC, ':', A_PUSH, TPS_InTagName, 0, NULL},
    1191                 :     {p_iseqC, '_', A_PUSH, TPS_InTagName, 0, NULL},
    1192                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1193                 : };
    1194                 : 
    1195                 : static const TParserStateActionItem actionTPS_InXMLBegin[] = {
    1196                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1197                 :     /* <?xml ... */
    1198                 :     /* XXX do we wants states for the m and l ?  Right now this accepts <?xZ */
    1199                 :     {p_iseqC, 'x', A_NEXT, TPS_InTag, 0, NULL},
    1200                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1201                 : };
    1202                 : 
    1203                 : static const TParserStateActionItem actionTPS_InTagCloseFirst[] = {
    1204                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1205                 :     {p_isasclet, 0, A_NEXT, TPS_InTagName, 0, NULL},
    1206                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1207                 : };
    1208                 : 
    1209                 : static const TParserStateActionItem actionTPS_InTagName[] = {
    1210                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1211                 :     /* <br/> case */
    1212                 :     {p_iseqC, '/', A_NEXT, TPS_InTagBeginEnd, 0, NULL},
    1213                 :     {p_iseqC, '>', A_NEXT, TPS_InTagEnd, 0, SpecialTags},
    1214                 :     {p_isspace, 0, A_NEXT, TPS_InTag, 0, SpecialTags},
    1215                 :     {p_isalnum, 0, A_NEXT, TPS_Null, 0, NULL},
    1216                 :     {p_iseqC, ':', A_NEXT, TPS_Null, 0, NULL},
    1217                 :     {p_iseqC, '_', A_NEXT, TPS_Null, 0, NULL},
    1218                 :     {p_iseqC, '.', A_NEXT, TPS_Null, 0, NULL},
    1219                 :     {p_iseqC, '-', A_NEXT, TPS_Null, 0, NULL},
    1220                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1221                 : };
    1222                 : 
    1223                 : static const TParserStateActionItem actionTPS_InTagBeginEnd[] = {
    1224                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1225                 :     {p_iseqC, '>', A_NEXT, TPS_InTagEnd, 0, NULL},
    1226                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1227                 : };
    1228                 : 
    1229                 : static const TParserStateActionItem actionTPS_InTag[] = {
    1230                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1231                 :     {p_iseqC, '>', A_NEXT, TPS_InTagEnd, 0, SpecialTags},
    1232                 :     {p_iseqC, '\'', A_NEXT, TPS_InTagEscapeK, 0, NULL},
    1233                 :     {p_iseqC, '"', A_NEXT, TPS_InTagEscapeKK, 0, NULL},
    1234                 :     {p_isasclet, 0, A_NEXT, TPS_Null, 0, NULL},
    1235                 :     {p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL},
    1236                 :     {p_iseqC, '=', A_NEXT, TPS_Null, 0, NULL},
    1237                 :     {p_iseqC, '-', A_NEXT, TPS_Null, 0, NULL},
    1238                 :     {p_iseqC, '_', A_NEXT, TPS_Null, 0, NULL},
    1239                 :     {p_iseqC, '#', A_NEXT, TPS_Null, 0, NULL},
    1240                 :     {p_iseqC, '/', A_NEXT, TPS_Null, 0, NULL},
    1241                 :     {p_iseqC, ':', A_NEXT, TPS_Null, 0, NULL},
    1242                 :     {p_iseqC, '.', A_NEXT, TPS_Null, 0, NULL},
    1243                 :     {p_iseqC, '&', A_NEXT, TPS_Null, 0, NULL},
    1244                 :     {p_iseqC, '?', A_NEXT, TPS_Null, 0, NULL},
    1245                 :     {p_iseqC, '%', A_NEXT, TPS_Null, 0, NULL},
    1246                 :     {p_iseqC, '~', A_NEXT, TPS_Null, 0, NULL},
    1247                 :     {p_isspace, 0, A_NEXT, TPS_Null, 0, SpecialTags},
    1248                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1249                 : };
    1250                 : 
    1251                 : static const TParserStateActionItem actionTPS_InTagEscapeK[] = {
    1252                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1253                 :     {p_iseqC, '\\', A_PUSH, TPS_InTagBackSleshed, 0, NULL},
    1254                 :     {p_iseqC, '\'', A_NEXT, TPS_InTag, 0, NULL},
    1255                 :     {NULL, 0, A_NEXT, TPS_InTagEscapeK, 0, NULL}
    1256                 : };
    1257                 : 
    1258                 : static const TParserStateActionItem actionTPS_InTagEscapeKK[] = {
    1259                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1260                 :     {p_iseqC, '\\', A_PUSH, TPS_InTagBackSleshed, 0, NULL},
    1261                 :     {p_iseqC, '"', A_NEXT, TPS_InTag, 0, NULL},
    1262                 :     {NULL, 0, A_NEXT, TPS_InTagEscapeKK, 0, NULL}
    1263                 : };
    1264                 : 
    1265                 : static const TParserStateActionItem actionTPS_InTagBackSleshed[] = {
    1266                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1267                 :     {NULL, 0, A_MERGE, TPS_Null, 0, NULL}
    1268                 : };
    1269                 : 
    1270                 : static const TParserStateActionItem actionTPS_InTagEnd[] = {
    1271                 :     {NULL, 0, A_BINGO | A_CLRALL, TPS_Base, TAG_T, NULL}
    1272                 : };
    1273                 : 
    1274                 : static const TParserStateActionItem actionTPS_InCommentFirst[] = {
    1275                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1276                 :     {p_iseqC, '-', A_NEXT, TPS_InCommentLast, 0, NULL},
    1277                 :     /* <!DOCTYPE ...> */
    1278                 :     {p_iseqC, 'D', A_NEXT, TPS_InTag, 0, NULL},
    1279                 :     {p_iseqC, 'd', A_NEXT, TPS_InTag, 0, NULL},
    1280                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1281                 : };
    1282                 : 
    1283                 : static const TParserStateActionItem actionTPS_InCommentLast[] = {
    1284                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1285                 :     {p_iseqC, '-', A_NEXT, TPS_InComment, 0, NULL},
    1286                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1287                 : };
    1288                 : 
    1289                 : static const TParserStateActionItem actionTPS_InComment[] = {
    1290                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1291                 :     {p_iseqC, '-', A_NEXT, TPS_InCloseCommentFirst, 0, NULL},
    1292                 :     {NULL, 0, A_NEXT, TPS_Null, 0, NULL}
    1293                 : };
    1294                 : 
    1295                 : static const TParserStateActionItem actionTPS_InCloseCommentFirst[] = {
    1296                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1297                 :     {p_iseqC, '-', A_NEXT, TPS_InCloseCommentLast, 0, NULL},
    1298                 :     {NULL, 0, A_NEXT, TPS_InComment, 0, NULL}
    1299                 : };
    1300                 : 
    1301                 : static const TParserStateActionItem actionTPS_InCloseCommentLast[] = {
    1302                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1303                 :     {p_iseqC, '-', A_NEXT, TPS_Null, 0, NULL},
    1304                 :     {p_iseqC, '>', A_NEXT, TPS_InCommentEnd, 0, NULL},
    1305                 :     {NULL, 0, A_NEXT, TPS_InComment, 0, NULL}
    1306                 : };
    1307                 : 
    1308                 : static const TParserStateActionItem actionTPS_InCommentEnd[] = {
    1309                 :     {NULL, 0, A_BINGO | A_CLRALL, TPS_Base, TAG_T, NULL}
    1310                 : };
    1311                 : 
    1312                 : static const TParserStateActionItem actionTPS_InHostFirstDomain[] = {
    1313                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1314                 :     {p_isasclet, 0, A_NEXT, TPS_InHostDomainSecond, 0, NULL},
    1315                 :     {p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL},
    1316                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1317                 : };
    1318                 : 
    1319                 : static const TParserStateActionItem actionTPS_InHostDomainSecond[] = {
    1320                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1321                 :     {p_isasclet, 0, A_NEXT, TPS_InHostDomain, 0, NULL},
    1322                 :     {p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL},
    1323                 :     {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
    1324                 :     {p_iseqC, '_', A_PUSH, TPS_InHostFirstAN, 0, NULL},
    1325                 :     {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomain, 0, NULL},
    1326                 :     {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
    1327                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1328                 : };
    1329                 : 
    1330                 : static const TParserStateActionItem actionTPS_InHostDomain[] = {
    1331                 :     {p_isEOF, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL},
    1332                 :     {p_isasclet, 0, A_NEXT, TPS_InHostDomain, 0, NULL},
    1333                 :     {p_isdigit, 0, A_PUSH, TPS_InHost, 0, NULL},
    1334                 :     {p_iseqC, ':', A_PUSH, TPS_InPortFirst, 0, NULL},
    1335                 :     {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
    1336                 :     {p_iseqC, '_', A_PUSH, TPS_InHostFirstAN, 0, NULL},
    1337                 :     {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomain, 0, NULL},
    1338                 :     {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
    1339                 :     {p_isdigit, 0, A_POP, TPS_Null, 0, NULL},
    1340                 :     {p_isstophost, 0, A_BINGO | A_CLRALL, TPS_InURLPathStart, HOST, NULL},
    1341                 :     {p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL},
    1342                 :     {NULL, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL}
    1343                 : };
    1344                 : 
    1345                 : static const TParserStateActionItem actionTPS_InPortFirst[] = {
    1346                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1347                 :     {p_isdigit, 0, A_NEXT, TPS_InPort, 0, NULL},
    1348                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1349                 : };
    1350                 : 
    1351                 : static const TParserStateActionItem actionTPS_InPort[] = {
    1352                 :     {p_isEOF, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL},
    1353                 :     {p_isdigit, 0, A_NEXT, TPS_InPort, 0, NULL},
    1354                 :     {p_isstophost, 0, A_BINGO | A_CLRALL, TPS_InURLPathStart, HOST, NULL},
    1355                 :     {p_iseqC, '/', A_PUSH, TPS_InFURL, 0, NULL},
    1356                 :     {NULL, 0, A_BINGO | A_CLRALL, TPS_Base, HOST, NULL}
    1357                 : };
    1358                 : 
    1359                 : static const TParserStateActionItem actionTPS_InHostFirstAN[] = {
    1360                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1361                 :     {p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL},
    1362                 :     {p_isasclet, 0, A_NEXT, TPS_InHost, 0, NULL},
    1363                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1364                 : };
    1365                 : 
    1366                 : static const TParserStateActionItem actionTPS_InHost[] = {
    1367                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1368                 :     {p_isdigit, 0, A_NEXT, TPS_InHost, 0, NULL},
    1369                 :     {p_isasclet, 0, A_NEXT, TPS_InHost, 0, NULL},
    1370                 :     {p_iseqC, '@', A_PUSH, TPS_InEmail, 0, NULL},
    1371                 :     {p_iseqC, '.', A_PUSH, TPS_InHostFirstDomain, 0, NULL},
    1372                 :     {p_iseqC, '-', A_PUSH, TPS_InHostFirstAN, 0, NULL},
    1373                 :     {p_iseqC, '_', A_PUSH, TPS_InHostFirstAN, 0, NULL},
    1374                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1375                 : };
    1376                 : 
    1377                 : static const TParserStateActionItem actionTPS_InEmail[] = {
    1378                 :     {p_isstophost, 0, A_POP, TPS_Null, 0, NULL},
    1379                 :     {p_ishost, 0, A_BINGO | A_CLRALL, TPS_Base, EMAIL, NULL},
    1380                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1381                 : };
    1382                 : 
    1383                 : static const TParserStateActionItem actionTPS_InFileFirst[] = {
    1384                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1385                 :     {p_isasclet, 0, A_NEXT, TPS_InFile, 0, NULL},
    1386                 :     {p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
    1387                 :     {p_iseqC, '.', A_NEXT, TPS_InPathFirst, 0, NULL},
    1388                 :     {p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
    1389                 :     {p_iseqC, '~', A_PUSH, TPS_InFileTwiddle, 0, NULL},
    1390                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1391                 : };
    1392                 : 
    1393                 : static const TParserStateActionItem actionTPS_InFileTwiddle[] = {
    1394                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1395                 :     {p_isasclet, 0, A_NEXT, TPS_InFile, 0, NULL},
    1396                 :     {p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
    1397                 :     {p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
    1398                 :     {p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL},
    1399                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1400                 : };
    1401                 : 
    1402                 : static const TParserStateActionItem actionTPS_InPathFirst[] = {
    1403                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1404                 :     {p_isasclet, 0, A_NEXT, TPS_InFile, 0, NULL},
    1405                 :     {p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
    1406                 :     {p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
    1407                 :     {p_iseqC, '.', A_NEXT, TPS_InPathSecond, 0, NULL},
    1408                 :     {p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL},
    1409                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1410                 : };
    1411                 : 
    1412                 : static const TParserStateActionItem actionTPS_InPathFirstFirst[] = {
    1413                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1414                 :     {p_iseqC, '.', A_NEXT, TPS_InPathSecond, 0, NULL},
    1415                 :     {p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL},
    1416                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1417                 : };
    1418                 : 
    1419                 : static const TParserStateActionItem actionTPS_InPathSecond[] = {
    1420                 :     {p_isEOF, 0, A_BINGO | A_CLEAR, TPS_Base, FILEPATH, NULL},
    1421                 :     {p_iseqC, '/', A_NEXT | A_PUSH, TPS_InFileFirst, 0, NULL},
    1422                 :     {p_iseqC, '/', A_BINGO | A_CLEAR, TPS_Base, FILEPATH, NULL},
    1423                 :     {p_isspace, 0, A_BINGO | A_CLEAR, TPS_Base, FILEPATH, NULL},
    1424                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1425                 : };
    1426                 : 
    1427                 : static const TParserStateActionItem actionTPS_InFile[] = {
    1428                 :     {p_isEOF, 0, A_BINGO, TPS_Base, FILEPATH, NULL},
    1429                 :     {p_isasclet, 0, A_NEXT, TPS_InFile, 0, NULL},
    1430                 :     {p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL},
    1431                 :     {p_iseqC, '.', A_PUSH, TPS_InFileNext, 0, NULL},
    1432                 :     {p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL},
    1433                 :     {p_iseqC, '-', A_NEXT, TPS_InFile, 0, NULL},
    1434                 :     {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL},
    1435                 :     {NULL, 0, A_BINGO, TPS_Base, FILEPATH, NULL}
    1436                 : };
    1437                 : 
    1438                 : static const TParserStateActionItem actionTPS_InFileNext[] = {
    1439                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1440                 :     {p_isasclet, 0, A_CLEAR, TPS_InFile, 0, NULL},
    1441                 :     {p_isdigit, 0, A_CLEAR, TPS_InFile, 0, NULL},
    1442                 :     {p_iseqC, '_', A_CLEAR, TPS_InFile, 0, NULL},
    1443                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1444                 : };
    1445                 : 
    1446                 : static const TParserStateActionItem actionTPS_InURLPathFirst[] = {
    1447                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1448                 :     {p_isurlchar, 0, A_NEXT, TPS_InURLPath, 0, NULL},
    1449                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL},
    1450                 : };
    1451                 : 
    1452                 : static const TParserStateActionItem actionTPS_InURLPathStart[] = {
    1453                 :     {NULL, 0, A_NEXT, TPS_InURLPath, 0, NULL}
    1454                 : };
    1455                 : 
    1456                 : static const TParserStateActionItem actionTPS_InURLPath[] = {
    1457                 :     {p_isEOF, 0, A_BINGO, TPS_Base, URLPATH, NULL},
    1458                 :     {p_isurlchar, 0, A_NEXT, TPS_InURLPath, 0, NULL},
    1459                 :     {NULL, 0, A_BINGO, TPS_Base, URLPATH, NULL}
    1460                 : };
    1461                 : 
    1462                 : static const TParserStateActionItem actionTPS_InFURL[] = {
    1463                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1464                 :     {p_isURLPath, 0, A_BINGO | A_CLRALL, TPS_Base, URL_T, SpecialFURL},
    1465                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1466                 : };
    1467                 : 
    1468                 : static const TParserStateActionItem actionTPS_InProtocolFirst[] = {
    1469                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1470                 :     {p_iseqC, '/', A_NEXT, TPS_InProtocolSecond, 0, NULL},
    1471                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1472                 : };
    1473                 : 
    1474                 : static const TParserStateActionItem actionTPS_InProtocolSecond[] = {
    1475                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1476                 :     {p_iseqC, '/', A_NEXT, TPS_InProtocolEnd, 0, NULL},
    1477                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1478                 : };
    1479                 : 
    1480                 : static const TParserStateActionItem actionTPS_InProtocolEnd[] = {
    1481                 :     {NULL, 0, A_BINGO | A_CLRALL, TPS_Base, PROTOCOL, NULL}
    1482                 : };
    1483                 : 
    1484                 : static const TParserStateActionItem actionTPS_InHyphenAsciiWordFirst[] = {
    1485                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1486                 :     {p_isasclet, 0, A_NEXT, TPS_InHyphenAsciiWord, 0, NULL},
    1487                 :     {p_isalpha, 0, A_NEXT, TPS_InHyphenWord, 0, NULL},
    1488                 :     {p_isdigit, 0, A_NEXT, TPS_InHyphenDigitLookahead, 0, NULL},
    1489                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1490                 : };
    1491                 : 
    1492                 : static const TParserStateActionItem actionTPS_InHyphenAsciiWord[] = {
    1493                 :     {p_isEOF, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, ASCIIHWORD, SpecialHyphen},
    1494                 :     {p_isasclet, 0, A_NEXT, TPS_InHyphenAsciiWord, 0, NULL},
    1495                 :     {p_isalpha, 0, A_NEXT, TPS_InHyphenWord, 0, NULL},
    1496                 :     {p_isspecial, 0, A_NEXT, TPS_InHyphenWord, 0, NULL},
    1497                 :     {p_isdigit, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
    1498                 :     {p_iseqC, '-', A_PUSH, TPS_InHyphenAsciiWordFirst, 0, NULL},
    1499                 :     {NULL, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, ASCIIHWORD, SpecialHyphen}
    1500                 : };
    1501                 : 
    1502                 : static const TParserStateActionItem actionTPS_InHyphenWordFirst[] = {
    1503                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1504                 :     {p_isalpha, 0, A_NEXT, TPS_InHyphenWord, 0, NULL},
    1505                 :     {p_isdigit, 0, A_NEXT, TPS_InHyphenDigitLookahead, 0, NULL},
    1506                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1507                 : };
    1508                 : 
    1509                 : static const TParserStateActionItem actionTPS_InHyphenWord[] = {
    1510                 :     {p_isEOF, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, HWORD, SpecialHyphen},
    1511                 :     {p_isalpha, 0, A_NEXT, TPS_InHyphenWord, 0, NULL},
    1512                 :     {p_isspecial, 0, A_NEXT, TPS_InHyphenWord, 0, NULL},
    1513                 :     {p_isdigit, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
    1514                 :     {p_iseqC, '-', A_PUSH, TPS_InHyphenWordFirst, 0, NULL},
    1515                 :     {NULL, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, HWORD, SpecialHyphen}
    1516                 : };
    1517                 : 
    1518                 : static const TParserStateActionItem actionTPS_InHyphenNumWordFirst[] = {
    1519                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1520                 :     {p_isalpha, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
    1521                 :     {p_isdigit, 0, A_NEXT, TPS_InHyphenDigitLookahead, 0, NULL},
    1522                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1523                 : };
    1524                 : 
    1525                 : static const TParserStateActionItem actionTPS_InHyphenNumWord[] = {
    1526                 :     {p_isEOF, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, NUMHWORD, SpecialHyphen},
    1527                 :     {p_isalnum, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
    1528                 :     {p_isspecial, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
    1529                 :     {p_iseqC, '-', A_PUSH, TPS_InHyphenNumWordFirst, 0, NULL},
    1530                 :     {NULL, 0, A_BINGO | A_CLRALL, TPS_InParseHyphen, NUMHWORD, SpecialHyphen}
    1531                 : };
    1532                 : 
    1533                 : static const TParserStateActionItem actionTPS_InHyphenDigitLookahead[] = {
    1534                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1535                 :     {p_isdigit, 0, A_NEXT, TPS_InHyphenDigitLookahead, 0, NULL},
    1536                 :     {p_isalpha, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
    1537                 :     {p_isspecial, 0, A_NEXT, TPS_InHyphenNumWord, 0, NULL},
    1538                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1539                 : };
    1540                 : 
    1541                 : static const TParserStateActionItem actionTPS_InParseHyphen[] = {
    1542                 :     {p_isEOF, 0, A_RERUN, TPS_Base, 0, NULL},
    1543                 :     {p_isasclet, 0, A_NEXT, TPS_InHyphenAsciiWordPart, 0, NULL},
    1544                 :     {p_isalpha, 0, A_NEXT, TPS_InHyphenWordPart, 0, NULL},
    1545                 :     {p_isdigit, 0, A_PUSH, TPS_InHyphenUnsignedInt, 0, NULL},
    1546                 :     {p_iseqC, '-', A_PUSH, TPS_InParseHyphenHyphen, 0, NULL},
    1547                 :     {NULL, 0, A_RERUN, TPS_Base, 0, NULL}
    1548                 : };
    1549                 : 
    1550                 : static const TParserStateActionItem actionTPS_InParseHyphenHyphen[] = {
    1551                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1552                 :     {p_isalnum, 0, A_BINGO | A_CLEAR, TPS_InParseHyphen, SPACE, NULL},
    1553                 :     {p_isspecial, 0, A_BINGO | A_CLEAR, TPS_InParseHyphen, SPACE, NULL},
    1554                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1555                 : };
    1556                 : 
    1557                 : static const TParserStateActionItem actionTPS_InHyphenWordPart[] = {
    1558                 :     {p_isEOF, 0, A_BINGO, TPS_Base, PARTHWORD, NULL},
    1559                 :     {p_isalpha, 0, A_NEXT, TPS_InHyphenWordPart, 0, NULL},
    1560                 :     {p_isspecial, 0, A_NEXT, TPS_InHyphenWordPart, 0, NULL},
    1561                 :     {p_isdigit, 0, A_NEXT, TPS_InHyphenNumWordPart, 0, NULL},
    1562                 :     {NULL, 0, A_BINGO, TPS_InParseHyphen, PARTHWORD, NULL}
    1563                 : };
    1564                 : 
    1565                 : static const TParserStateActionItem actionTPS_InHyphenAsciiWordPart[] = {
    1566                 :     {p_isEOF, 0, A_BINGO, TPS_Base, ASCIIPARTHWORD, NULL},
    1567                 :     {p_isasclet, 0, A_NEXT, TPS_InHyphenAsciiWordPart, 0, NULL},
    1568                 :     {p_isalpha, 0, A_NEXT, TPS_InHyphenWordPart, 0, NULL},
    1569                 :     {p_isspecial, 0, A_NEXT, TPS_InHyphenWordPart, 0, NULL},
    1570                 :     {p_isdigit, 0, A_NEXT, TPS_InHyphenNumWordPart, 0, NULL},
    1571                 :     {NULL, 0, A_BINGO, TPS_InParseHyphen, ASCIIPARTHWORD, NULL}
    1572                 : };
    1573                 : 
    1574                 : static const TParserStateActionItem actionTPS_InHyphenNumWordPart[] = {
    1575                 :     {p_isEOF, 0, A_BINGO, TPS_Base, NUMPARTHWORD, NULL},
    1576                 :     {p_isalnum, 0, A_NEXT, TPS_InHyphenNumWordPart, 0, NULL},
    1577                 :     {p_isspecial, 0, A_NEXT, TPS_InHyphenNumWordPart, 0, NULL},
    1578                 :     {NULL, 0, A_BINGO, TPS_InParseHyphen, NUMPARTHWORD, NULL}
    1579                 : };
    1580                 : 
    1581                 : static const TParserStateActionItem actionTPS_InHyphenUnsignedInt[] = {
    1582                 :     {p_isEOF, 0, A_POP, TPS_Null, 0, NULL},
    1583                 :     {p_isdigit, 0, A_NEXT, TPS_Null, 0, NULL},
    1584                 :     {p_isalpha, 0, A_CLEAR, TPS_InHyphenNumWordPart, 0, NULL},
    1585                 :     {p_isspecial, 0, A_CLEAR, TPS_InHyphenNumWordPart, 0, NULL},
    1586                 :     {NULL, 0, A_POP, TPS_Null, 0, NULL}
    1587                 : };
    1588                 : 
    1589                 : 
    1590                 : /*
    1591                 :  * main table of per-state parser actions
    1592                 :  */
    1593                 : typedef struct
    1594                 : {
    1595                 :     const TParserStateActionItem *action;   /* the actual state info */
    1596                 :     TParserState state;         /* only for Assert crosscheck */
    1597                 : #ifdef WPARSER_TRACE
    1598                 :     const char *state_name;     /* only for debug printout */
    1599                 : #endif
    1600                 : } TParserStateAction;
    1601                 : 
    1602                 : #ifdef WPARSER_TRACE
    1603                 : #define TPARSERSTATEACTION(state) \
    1604                 :     { CppConcat(action,state), state, CppAsString(state) }
    1605                 : #else
    1606                 : #define TPARSERSTATEACTION(state) \
    1607                 :     { CppConcat(action,state), state }
    1608                 : #endif
    1609                 : 
    1610                 : /*
    1611                 :  * order must be the same as in typedef enum {} TParserState!!
    1612                 :  */
    1613                 : 
    1614                 : static const TParserStateAction Actions[] = {
    1615                 :     TPARSERSTATEACTION(TPS_Base),
    1616                 :     TPARSERSTATEACTION(TPS_InNumWord),
    1617                 :     TPARSERSTATEACTION(TPS_InAsciiWord),
    1618                 :     TPARSERSTATEACTION(TPS_InWord),
    1619                 :     TPARSERSTATEACTION(TPS_InUnsignedInt),
    1620                 :     TPARSERSTATEACTION(TPS_InSignedIntFirst),
    1621                 :     TPARSERSTATEACTION(TPS_InSignedInt),
    1622                 :     TPARSERSTATEACTION(TPS_InSpace),
    1623                 :     TPARSERSTATEACTION(TPS_InUDecimalFirst),
    1624                 :     TPARSERSTATEACTION(TPS_InUDecimal),
    1625                 :     TPARSERSTATEACTION(TPS_InDecimalFirst),
    1626                 :     TPARSERSTATEACTION(TPS_InDecimal),
    1627                 :     TPARSERSTATEACTION(TPS_InVerVersion),
    1628                 :     TPARSERSTATEACTION(TPS_InSVerVersion),
    1629                 :     TPARSERSTATEACTION(TPS_InVersionFirst),
    1630                 :     TPARSERSTATEACTION(TPS_InVersion),
    1631                 :     TPARSERSTATEACTION(TPS_InMantissaFirst),
    1632                 :     TPARSERSTATEACTION(TPS_InMantissaSign),
    1633                 :     TPARSERSTATEACTION(TPS_InMantissa),
    1634                 :     TPARSERSTATEACTION(TPS_InXMLEntityFirst),
    1635                 :     TPARSERSTATEACTION(TPS_InXMLEntity),
    1636                 :     TPARSERSTATEACTION(TPS_InXMLEntityNumFirst),
    1637                 :     TPARSERSTATEACTION(TPS_InXMLEntityNum),
    1638                 :     TPARSERSTATEACTION(TPS_InXMLEntityHexNumFirst),
    1639                 :     TPARSERSTATEACTION(TPS_InXMLEntityHexNum),
    1640                 :     TPARSERSTATEACTION(TPS_InXMLEntityEnd),
    1641                 :     TPARSERSTATEACTION(TPS_InTagFirst),
    1642                 :     TPARSERSTATEACTION(TPS_InXMLBegin),
    1643                 :     TPARSERSTATEACTION(TPS_InTagCloseFirst),
    1644                 :     TPARSERSTATEACTION(TPS_InTagName),
    1645                 :     TPARSERSTATEACTION(TPS_InTagBeginEnd),
    1646                 :     TPARSERSTATEACTION(TPS_InTag),
    1647                 :     TPARSERSTATEACTION(TPS_InTagEscapeK),
    1648                 :     TPARSERSTATEACTION(TPS_InTagEscapeKK),
    1649                 :     TPARSERSTATEACTION(TPS_InTagBackSleshed),
    1650                 :     TPARSERSTATEACTION(TPS_InTagEnd),
    1651                 :     TPARSERSTATEACTION(TPS_InCommentFirst),
    1652                 :     TPARSERSTATEACTION(TPS_InCommentLast),
    1653                 :     TPARSERSTATEACTION(TPS_InComment),
    1654                 :     TPARSERSTATEACTION(TPS_InCloseCommentFirst),
    1655                 :     TPARSERSTATEACTION(TPS_InCloseCommentLast),
    1656                 :     TPARSERSTATEACTION(TPS_InCommentEnd),
    1657                 :     TPARSERSTATEACTION(TPS_InHostFirstDomain),
    1658                 :     TPARSERSTATEACTION(TPS_InHostDomainSecond),
    1659                 :     TPARSERSTATEACTION(TPS_InHostDomain),
    1660                 :     TPARSERSTATEACTION(TPS_InPortFirst),
    1661                 :     TPARSERSTATEACTION(TPS_InPort),
    1662                 :     TPARSERSTATEACTION(TPS_InHostFirstAN),
    1663                 :     TPARSERSTATEACTION(TPS_InHost),
    1664                 :     TPARSERSTATEACTION(TPS_InEmail),
    1665                 :     TPARSERSTATEACTION(TPS_InFileFirst),
    1666                 :     TPARSERSTATEACTION(TPS_InFileTwiddle),
    1667                 :     TPARSERSTATEACTION(TPS_InPathFirst),
    1668                 :     TPARSERSTATEACTION(TPS_InPathFirstFirst),
    1669                 :     TPARSERSTATEACTION(TPS_InPathSecond),
    1670                 :     TPARSERSTATEACTION(TPS_InFile),
    1671                 :     TPARSERSTATEACTION(TPS_InFileNext),
    1672                 :     TPARSERSTATEACTION(TPS_InURLPathFirst),
    1673                 :     TPARSERSTATEACTION(TPS_InURLPathStart),
    1674                 :     TPARSERSTATEACTION(TPS_InURLPath),
    1675                 :     TPARSERSTATEACTION(TPS_InFURL),
    1676                 :     TPARSERSTATEACTION(TPS_InProtocolFirst),
    1677                 :     TPARSERSTATEACTION(TPS_InProtocolSecond),
    1678                 :     TPARSERSTATEACTION(TPS_InProtocolEnd),
    1679                 :     TPARSERSTATEACTION(TPS_InHyphenAsciiWordFirst),
    1680                 :     TPARSERSTATEACTION(TPS_InHyphenAsciiWord),
    1681                 :     TPARSERSTATEACTION(TPS_InHyphenWordFirst),
    1682                 :     TPARSERSTATEACTION(TPS_InHyphenWord),
    1683                 :     TPARSERSTATEACTION(TPS_InHyphenNumWordFirst),
    1684                 :     TPARSERSTATEACTION(TPS_InHyphenNumWord),
    1685                 :     TPARSERSTATEACTION(TPS_InHyphenDigitLookahead),
    1686                 :     TPARSERSTATEACTION(TPS_InParseHyphen),
    1687                 :     TPARSERSTATEACTION(TPS_InParseHyphenHyphen),
    1688                 :     TPARSERSTATEACTION(TPS_InHyphenWordPart),
    1689                 :     TPARSERSTATEACTION(TPS_InHyphenAsciiWordPart),
    1690                 :     TPARSERSTATEACTION(TPS_InHyphenNumWordPart),
    1691                 :     TPARSERSTATEACTION(TPS_InHyphenUnsignedInt)
    1692                 : };
    1693                 : 
    1694                 : 
    1695                 : static bool
    1696           14438 : TParserGet(TParser *prs)
    1697                 : {
    1698           14438 :     const TParserStateActionItem *item = NULL;
    1699                 : 
    1700           14438 :     Assert(prs->state);
    1701                 : 
    1702           14438 :     if (prs->state->posbyte >= prs->lenstr)
    1703            2365 :         return false;
    1704                 : 
    1705           12073 :     prs->token = prs->str + prs->state->posbyte;
    1706           12073 :     prs->state->pushedAtAction = NULL;
    1707                 : 
    1708                 :     /* look at string */
    1709           51585 :     while (prs->state->posbyte <= prs->lenstr)
    1710                 :     {
    1711           51585 :         if (prs->state->posbyte == prs->lenstr)
    1712            2440 :             prs->state->charlen = 0;
    1713                 :         else
    1714           98290 :             prs->state->charlen = (prs->charmaxlen == 1) ? prs->charmaxlen :
    1715           49145 :                 pg_mblen(prs->str + prs->state->posbyte);
    1716                 : 
    1717           51585 :         Assert(prs->state->posbyte + prs->state->charlen <= prs->lenstr);
    1718           51585 :         Assert(prs->state->state >= TPS_Base && prs->state->state < TPS_Null);
    1719           51585 :         Assert(Actions[prs->state->state].state == prs->state->state);
    1720                 : 
    1721           51585 :         if (prs->state->pushedAtAction)
    1722                 :         {
    1723                 :             /* After a POP, pick up at the next test */
    1724            1296 :             item = prs->state->pushedAtAction + 1;
    1725            1296 :             prs->state->pushedAtAction = NULL;
    1726                 :         }
    1727                 :         else
    1728                 :         {
    1729           50289 :             item = Actions[prs->state->state].action;
    1730           50289 :             Assert(item != NULL);
    1731                 :         }
    1732                 : 
    1733                 :         /* find action by character class */
    1734          277734 :         while (item->isclass)
    1735                 :         {
    1736          262062 :             prs->c = item->c;
    1737          262062 :             if (item->isclass(prs) != 0)
    1738           35913 :                 break;
    1739          226149 :             item++;
    1740                 :         }
    1741                 : 
    1742                 : #ifdef WPARSER_TRACE
    1743                 :         {
    1744                 :             TParserPosition *ptr;
    1745                 : 
    1746                 :             fprintf(stderr, "state ");
    1747                 :             /* indent according to stack depth */
    1748                 :             for (ptr = prs->state->prev; ptr; ptr = ptr->prev)
    1749                 :                 fprintf(stderr, "  ");
    1750                 :             fprintf(stderr, "%s ", Actions[prs->state->state].state_name);
    1751                 :             if (prs->state->posbyte < prs->lenstr)
    1752                 :                 fprintf(stderr, "at %c", *(prs->str + prs->state->posbyte));
    1753                 :             else
    1754                 :                 fprintf(stderr, "at EOF");
    1755                 :             fprintf(stderr, " matched rule %d flags%s%s%s%s%s%s%s%s%s%s%s\n",
    1756                 :                     (int) (item - Actions[prs->state->state].action),
    1757                 :                     (item->flags & A_BINGO) ? " BINGO" : "",
    1758                 :                     (item->flags & A_POP) ? " POP" : "",
    1759                 :                     (item->flags & A_PUSH) ? " PUSH" : "",
    1760                 :                     (item->flags & A_RERUN) ? " RERUN" : "",
    1761                 :                     (item->flags & A_CLEAR) ? " CLEAR" : "",
    1762                 :                     (item->flags & A_MERGE) ? " MERGE" : "",
    1763                 :                     (item->flags & A_CLRALL) ? " CLRALL" : "",
    1764                 :                     (item->tostate != TPS_Null) ? " tostate " : "",
    1765                 :                     (item->tostate != TPS_Null) ? Actions[item->tostate].state_name : "",
    1766                 :                     (item->type > 0) ? " type " : "",
    1767                 :                     tok_alias[item->type]);
    1768                 :         }
    1769                 : #endif
    1770                 : 
    1771                 :         /* call special handler if exists */
    1772           51585 :         if (item->special)
    1773             210 :             item->special(prs);
    1774                 : 
    1775                 :         /* BINGO, token is found */
    1776           51585 :         if (item->flags & A_BINGO)
    1777                 :         {
    1778           12073 :             Assert(item->type > 0);
    1779           12073 :             prs->lenbytetoken = prs->state->lenbytetoken;
    1780           12073 :             prs->lenchartoken = prs->state->lenchartoken;
    1781           12073 :             prs->state->lenbytetoken = prs->state->lenchartoken = 0;
    1782           12073 :             prs->type = item->type;
    1783                 :         }
    1784                 : 
    1785                 :         /* do various actions by flags */
    1786           51585 :         if (item->flags & A_POP)
    1787                 :         {                       /* pop stored state in stack */
    1788            1305 :             TParserPosition *ptr = prs->state->prev;
    1789                 : 
    1790            1305 :             pfree(prs->state);
    1791            1305 :             prs->state = ptr;
    1792            1305 :             Assert(prs->state);
    1793                 :         }
    1794           50280 :         else if (item->flags & A_PUSH)
    1795                 :         {                       /* push (store) state in stack */
    1796            2544 :             prs->state->pushedAtAction = item;    /* remember where we push */
    1797            2544 :             prs->state = newTParserPosition(prs->state);
    1798                 :         }
    1799           47736 :         else if (item->flags & A_CLEAR)
    1800                 :         {                       /* clear previous pushed state */
    1801                 :             TParserPosition *ptr;
    1802                 : 
    1803             249 :             Assert(prs->state->prev);
    1804             249 :             ptr = prs->state->prev->prev;
    1805             249 :             pfree(prs->state->prev);
    1806             249 :             prs->state->prev = ptr;
    1807                 :         }
    1808           47487 :         else if (item->flags & A_CLRALL)
    1809                 :         {                       /* clear all previous pushed state */
    1810                 :             TParserPosition *ptr;
    1811                 : 
    1812            1389 :             while (prs->state->prev)
    1813                 :             {
    1814             999 :                 ptr = prs->state->prev->prev;
    1815             999 :                 pfree(prs->state->prev);
    1816             999 :                 prs->state->prev = ptr;
    1817                 :             }
    1818                 :         }
    1819           47097 :         else if (item->flags & A_MERGE)
    1820                 :         {                       /* merge posinfo with current and pushed state */
    1821 UBC           0 :             TParserPosition *ptr = prs->state;
    1822                 : 
    1823               0 :             Assert(prs->state->prev);
    1824               0 :             prs->state = prs->state->prev;
    1825                 : 
    1826               0 :             prs->state->posbyte = ptr->posbyte;
    1827               0 :             prs->state->poschar = ptr->poschar;
    1828               0 :             prs->state->charlen = ptr->charlen;
    1829               0 :             prs->state->lenbytetoken = ptr->lenbytetoken;
    1830               0 :             prs->state->lenchartoken = ptr->lenchartoken;
    1831               0 :             pfree(ptr);
    1832                 :         }
    1833                 : 
    1834                 :         /* set new state if pointed */
    1835 CBC       51585 :         if (item->tostate != TPS_Null)
    1836           33077 :             prs->state->state = item->tostate;
    1837                 : 
    1838                 :         /* check for go away */
    1839           51585 :         if ((item->flags & A_BINGO) ||
    1840           39512 :             (prs->state->posbyte >= prs->lenstr &&
    1841 UBC           0 :              (item->flags & A_RERUN) == 0))
    1842                 :             break;
    1843                 : 
    1844                 :         /* go to beginning of loop if we should rerun or we just restore state */
    1845 CBC       39512 :         if (item->flags & (A_RERUN | A_POP))
    1846            1317 :             continue;
    1847                 : 
    1848                 :         /* move forward */
    1849           38195 :         if (prs->state->charlen)
    1850                 :         {
    1851           38195 :             prs->state->posbyte += prs->state->charlen;
    1852           38195 :             prs->state->lenbytetoken += prs->state->charlen;
    1853           38195 :             prs->state->poschar++;
    1854           38195 :             prs->state->lenchartoken++;
    1855                 :         }
    1856                 :     }
    1857                 : 
    1858           12073 :     return (item && (item->flags & A_BINGO));
    1859                 : }
    1860                 : 
    1861                 : Datum
    1862           25703 : prsd_lextype(PG_FUNCTION_ARGS)
    1863                 : {
    1864           25703 :     LexDescr   *descr = (LexDescr *) palloc(sizeof(LexDescr) * (LASTNUM + 1));
    1865                 :     int         i;
    1866                 : 
    1867          616872 :     for (i = 1; i <= LASTNUM; i++)
    1868                 :     {
    1869          591169 :         descr[i - 1].lexid = i;
    1870          591169 :         descr[i - 1].alias = pstrdup(tok_alias[i]);
    1871          591169 :         descr[i - 1].descr = pstrdup(lex_descr[i]);
    1872                 :     }
    1873                 : 
    1874           25703 :     descr[LASTNUM].lexid = 0;
    1875                 : 
    1876           25703 :     PG_RETURN_POINTER(descr);
    1877                 : }
    1878                 : 
    1879                 : Datum
    1880            2365 : prsd_start(PG_FUNCTION_ARGS)
    1881                 : {
    1882            2365 :     PG_RETURN_POINTER(TParserInit((char *) PG_GETARG_POINTER(0), PG_GETARG_INT32(1)));
    1883                 : }
    1884                 : 
    1885                 : Datum
    1886           14318 : prsd_nexttoken(PG_FUNCTION_ARGS)
    1887                 : {
    1888           14318 :     TParser    *p = (TParser *) PG_GETARG_POINTER(0);
    1889           14318 :     char      **t = (char **) PG_GETARG_POINTER(1);
    1890           14318 :     int        *tlen = (int *) PG_GETARG_POINTER(2);
    1891                 : 
    1892           14318 :     if (!TParserGet(p))
    1893            2365 :         PG_RETURN_INT32(0);
    1894                 : 
    1895           11953 :     *t = p->token;
    1896           11953 :     *tlen = p->lenbytetoken;
    1897                 : 
    1898           11953 :     PG_RETURN_INT32(p->type);
    1899                 : }
    1900                 : 
    1901                 : Datum
    1902            2365 : prsd_end(PG_FUNCTION_ARGS)
    1903                 : {
    1904            2365 :     TParser    *p = (TParser *) PG_GETARG_POINTER(0);
    1905                 : 
    1906            2365 :     TParserClose(p);
    1907            2365 :     PG_RETURN_VOID();
    1908                 : }
    1909                 : 
    1910                 : 
    1911                 : /*
    1912                 :  * ts_headline support begins here
    1913                 :  */
    1914                 : 
    1915                 : /* token type classification macros */
    1916                 : #define TS_IDIGNORE(x)  ( (x)==TAG_T || (x)==PROTOCOL || (x)==SPACE || (x)==XMLENTITY )
    1917                 : #define HLIDREPLACE(x)  ( (x)==TAG_T )
    1918                 : #define HLIDSKIP(x)     ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
    1919                 : #define XMLHLIDSKIP(x)  ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
    1920                 : #define NONWORDTOKEN(x) ( (x)==SPACE || HLIDREPLACE(x) || HLIDSKIP(x) )
    1921                 : #define NOENDTOKEN(x)   ( NONWORDTOKEN(x) || (x)==SCIENTIFIC || (x)==VERSIONNUMBER || (x)==DECIMAL_T || (x)==SIGNEDINT || (x)==UNSIGNEDINT || TS_IDIGNORE(x) )
    1922                 : 
    1923                 : /*
    1924                 :  * Macros useful in headline selection.  These rely on availability of
    1925                 :  * "HeadlineParsedText *prs" describing some text, and "int shortword"
    1926                 :  * describing the "short word" length parameter.
    1927                 :  */
    1928                 : 
    1929                 : /* Interesting words are non-repeated search terms */
    1930                 : #define INTERESTINGWORD(j) \
    1931                 :     (prs->words[j].item && !prs->words[j].repeated)
    1932                 : 
    1933                 : /* Don't want to end at a non-word or a short word, unless interesting */
    1934                 : #define BADENDPOINT(j) \
    1935                 :     ((NOENDTOKEN(prs->words[j].type) || prs->words[j].len <= shortword) && \
    1936                 :      !INTERESTINGWORD(j))
    1937                 : 
    1938                 : typedef struct
    1939                 : {
    1940                 :     /* one cover (well, really one fragment) for mark_hl_fragments */
    1941                 :     int32       startpos;       /* fragment's starting word index */
    1942                 :     int32       endpos;         /* ending word index (inclusive) */
    1943                 :     int32       poslen;         /* number of interesting words */
    1944                 :     int32       curlen;         /* total number of words */
    1945                 :     bool        chosen;         /* chosen? */
    1946                 :     bool        excluded;       /* excluded? */
    1947                 : } CoverPos;
    1948                 : 
    1949                 : typedef struct
    1950                 : {
    1951                 :     /* callback data for checkcondition_HL */
    1952                 :     HeadlineWordEntry *words;
    1953                 :     int         len;
    1954                 : } hlCheck;
    1955                 : 
    1956                 : 
    1957                 : /*
    1958                 :  * TS_execute callback for matching a tsquery operand to headline words
    1959                 :  *
    1960                 :  * Note: it's tempting to report words[] indexes as pos values to save
    1961                 :  * searching in hlCover; but that would screw up phrase matching, which
    1962                 :  * expects to measure distances in lexemes not tokens.
    1963                 :  */
    1964                 : static TSTernaryValue
    1965 GIC         500 : checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data)
    1966                 : {
    1967             500 :     hlCheck    *checkval = (hlCheck *) opaque;
    1968                 :     int         i;
    1969 ECB             : 
    1970                 :     /* scan words array for matching items */
    1971 CBC       12725 :     for (i = 0; i < checkval->len; i++)
    1972                 :     {
    1973 GIC       12325 :         if (checkval->words[i].item == val)
    1974                 :         {
    1975 ECB             :             /* if data == NULL, don't need to report positions */
    1976 GIC         437 :             if (!data)
    1977 CBC         100 :                 return TS_YES;
    1978                 : 
    1979 GIC         337 :             if (!data->pos)
    1980 ECB             :             {
    1981 CBC         238 :                 data->pos = palloc(sizeof(WordEntryPos) * checkval->len);
    1982 GIC         238 :                 data->allocated = true;
    1983 CBC         238 :                 data->npos = 1;
    1984 GIC         238 :                 data->pos[0] = checkval->words[i].pos;
    1985 ECB             :             }
    1986 CBC          99 :             else if (data->pos[data->npos - 1] < checkval->words[i].pos)
    1987 ECB             :             {
    1988 CBC          99 :                 data->pos[data->npos++] = checkval->words[i].pos;
    1989                 :             }
    1990 ECB             :         }
    1991                 :     }
    1992                 : 
    1993 GIC         400 :     if (data && data->npos > 0)
    1994             238 :         return TS_YES;
    1995                 : 
    1996             162 :     return TS_NO;
    1997 ECB             : }
    1998                 : 
    1999                 : /*
    2000                 :  * hlCover: try to find a substring of prs' word list that satisfies query
    2001                 :  *
    2002                 :  * locations is the result of TS_execute_locations() for the query.
    2003                 :  * We use this to identify plausible subranges of the query.
    2004                 :  *
    2005                 :  * *nextpos is the lexeme position (NOT word index) to start the search
    2006                 :  * at.  Caller should initialize this to zero.  If successful, we'll
    2007                 :  * advance it to the next place to search at.
    2008                 :  *
    2009                 :  * On success, sets *p to first word index and *q to last word index of the
    2010                 :  * cover substring, and returns true.
    2011                 :  *
    2012                 :  * The result is a minimal cover, in the sense that both *p and *q will be
    2013                 :  * words used in the query.
    2014                 :  */
    2015                 : static bool
    2016 GNC         281 : hlCover(HeadlineParsedText *prs, TSQuery query, List *locations,
    2017                 :         int *nextpos, int *p, int *q)
    2018                 : {
    2019             281 :     int         pos = *nextpos;
    2020 ECB             : 
    2021                 :     /* This loop repeats when our selected word-range fails the query */
    2022                 :     for (;;)
    2023 CBC          30 :     {
    2024                 :         int         posb,
    2025                 :                     pose;
    2026                 :         ListCell   *lc;
    2027 ECB             : 
    2028                 :         /*
    2029                 :          * For each AND'ed query term or phrase, find its first occurrence at
    2030                 :          * or after pos; set pose to the maximum of those positions.
    2031                 :          *
    2032                 :          * We need not consider ORs or NOTs here; see the comments for
    2033                 :          * TS_execute_locations().  Rechecking the match with TS_execute(),
    2034                 :          * below, will deal with any ensuing imprecision.
    2035                 :          */
    2036 GNC         311 :         pose = -1;
    2037             483 :         foreach(lc, locations)
    2038                 :         {
    2039             233 :             ExecPhraseData *pdata = (ExecPhraseData *) lfirst(lc);
    2040             233 :             int         first = -1;
    2041                 : 
    2042             396 :             for (int i = 0; i < pdata->npos; i++)
    2043                 :             {
    2044                 :                 /* For phrase matches, use the ending lexeme */
    2045             335 :                 int         endp = pdata->pos[i];
    2046                 : 
    2047             335 :                 if (endp >= pos)
    2048                 :                 {
    2049             172 :                     first = endp;
    2050             172 :                     break;
    2051                 :                 }
    2052                 :             }
    2053             233 :             if (first < 0)
    2054              61 :                 return false;   /* no more matches for this term */
    2055             172 :             if (first > pose)
    2056             163 :                 pose = first;
    2057 ECB             :         }
    2058                 : 
    2059 GNC         250 :         if (pose < 0)
    2060             123 :             return false;       /* we only get here if empty list */
    2061                 : 
    2062                 :         /*
    2063                 :          * Now, for each AND'ed query term or phrase, find its last occurrence
    2064                 :          * at or before pose; set posb to the minimum of those positions.
    2065                 :          *
    2066                 :          * We start posb at INT_MAX - 1 to guarantee no overflow if we compute
    2067                 :          * posb + 1 below.
    2068                 :          */
    2069             127 :         posb = INT_MAX - 1;
    2070             293 :         foreach(lc, locations)
    2071                 :         {
    2072             166 :             ExecPhraseData *pdata = (ExecPhraseData *) lfirst(lc);
    2073             166 :             int         last = -1;
    2074                 : 
    2075             247 :             for (int i = pdata->npos - 1; i >= 0; i--)
    2076                 :             {
    2077                 :                 /* For phrase matches, use the starting lexeme */
    2078             247 :                 int         startp = pdata->pos[i] - pdata->width;
    2079                 : 
    2080             247 :                 if (startp <= pose)
    2081                 :                 {
    2082             166 :                     last = startp;
    2083             166 :                     break;
    2084                 :                 }
    2085                 :             }
    2086             166 :             if (last < posb)
    2087             136 :                 posb = last;
    2088                 :         }
    2089                 : 
    2090                 :         /*
    2091                 :          * We could end up with posb to the left of pos, in case some phrase
    2092                 :          * match crosses pos.  Try the match starting at pos anyway, since the
    2093                 :          * result of TS_execute_locations is imprecise for phrase matches OR'd
    2094                 :          * with plain matches; that is, if the query is "(A <-> B) | C" then C
    2095                 :          * could match at pos even though the phrase match would have to
    2096                 :          * extend to the left of pos.
    2097                 :          */
    2098             127 :         posb = Max(posb, pos);
    2099                 : 
    2100                 :         /* This test probably always succeeds, but be paranoid */
    2101             127 :         if (posb <= pose)
    2102                 :         {
    2103                 :             /*
    2104                 :              * posb .. pose is now the shortest, earliest-after-pos range of
    2105                 :              * lexeme positions containing all the query terms.  It will
    2106                 :              * contain all phrase matches, too, except in the corner case
    2107                 :              * described just above.
    2108                 :              *
    2109                 :              * Now convert these lexeme positions to indexes in prs->words[].
    2110                 :              */
    2111             127 :             int         idxb = -1;
    2112             127 :             int         idxe = -1;
    2113                 : 
    2114            5812 :             for (int i = 0; i < prs->curwords; i++)
    2115                 :             {
    2116            5748 :                 if (prs->words[i].item == NULL)
    2117            5306 :                     continue;
    2118             442 :                 if (idxb < 0 && prs->words[i].pos >= posb)
    2119             127 :                     idxb = i;
    2120             442 :                 if (prs->words[i].pos <= pose)
    2121             379 :                     idxe = i;
    2122                 :                 else
    2123              63 :                     break;
    2124                 :             }
    2125                 : 
    2126                 :             /* This test probably always succeeds, but be paranoid */
    2127             127 :             if (idxb >= 0 && idxe >= idxb)
    2128                 :             {
    2129                 :                 /*
    2130                 :                  * Finally, check that the selected range satisfies the query.
    2131                 :                  * This should succeed in all simple cases; but odd cases
    2132                 :                  * involving non-top-level NOT conditions or phrase matches
    2133                 :                  * OR'd with other things could fail, since the result of
    2134                 :                  * TS_execute_locations doesn't fully represent such things.
    2135                 :                  */
    2136                 :                 hlCheck     ch;
    2137                 : 
    2138             127 :                 ch.words = &(prs->words[idxb]);
    2139             127 :                 ch.len = idxe - idxb + 1;
    2140             127 :                 if (TS_execute(GETQUERY(query), &ch,
    2141                 :                                TS_EXEC_EMPTY, checkcondition_HL))
    2142                 :                 {
    2143                 :                     /* Match!  Advance *nextpos and return the word range. */
    2144              97 :                     *nextpos = posb + 1;
    2145              97 :                     *p = idxb;
    2146              97 :                     *q = idxe;
    2147              97 :                     return true;
    2148                 :                 }
    2149                 :             }
    2150                 :         }
    2151                 : 
    2152                 :         /*
    2153                 :          * Advance pos and try again.  Any later workable match must start
    2154                 :          * beyond posb.
    2155                 :          */
    2156              30 :         pos = posb + 1;
    2157 ECB             :     }
    2158                 :     /* Can't get here, but stupider compilers complain if we leave it off */
    2159                 :     return false;
    2160                 : }
    2161                 : 
    2162                 : /*
    2163                 :  * Apply suitable highlight marking to words selected by headline selector
    2164                 :  *
    2165                 :  * The words from startpos to endpos inclusive are marked per highlightall
    2166                 :  */
    2167                 : static void
    2168 GIC         193 : mark_fragment(HeadlineParsedText *prs, bool highlightall,
    2169                 :               int startpos, int endpos)
    2170                 : {
    2171                 :     int         i;
    2172                 : 
    2173            2827 :     for (i = startpos; i <= endpos; i++)
    2174 ECB             :     {
    2175 GIC        2634 :         if (prs->words[i].item)
    2176             250 :             prs->words[i].selected = 1;
    2177 CBC        2634 :         if (!highlightall)
    2178                 :         {
    2179 GIC        2511 :             if (HLIDREPLACE(prs->words[i].type))
    2180 UIC           0 :                 prs->words[i].replace = 1;
    2181 GIC        2511 :             else if (HLIDSKIP(prs->words[i].type))
    2182 UIC           0 :                 prs->words[i].skip = 1;
    2183                 :         }
    2184                 :         else
    2185                 :         {
    2186 GIC         123 :             if (XMLHLIDSKIP(prs->words[i].type))
    2187 CBC           3 :                 prs->words[i].skip = 1;
    2188 ECB             :         }
    2189                 : 
    2190 CBC        2634 :         prs->words[i].in = (prs->words[i].repeated) ? 0 : 1;
    2191                 :     }
    2192             193 : }
    2193 ECB             : 
    2194                 : /*
    2195                 :  * split a cover substring into fragments not longer than max_words
    2196                 :  *
    2197                 :  * At entry, *startpos and *endpos are the (remaining) bounds of the cover
    2198                 :  * substring.  They are updated to hold the bounds of the next fragment.
    2199                 :  *
    2200                 :  * *curlen and *poslen are set to the fragment's length, in words and
    2201                 :  * interesting words respectively.
    2202                 :  */
    2203                 : static void
    2204 GIC          18 : get_next_fragment(HeadlineParsedText *prs, int *startpos, int *endpos,
    2205                 :                   int *curlen, int *poslen, int max_words)
    2206                 : {
    2207                 :     int         i;
    2208                 : 
    2209                 :     /*
    2210                 :      * Objective: select a fragment of words between startpos and endpos such
    2211                 :      * that it has at most max_words and both ends have query words. If the
    2212                 :      * startpos and endpos are the endpoints of the cover and the cover has
    2213                 :      * fewer words than max_words, then this function should just return the
    2214 ECB             :      * cover
    2215                 :      */
    2216                 :     /* first move startpos to an item */
    2217 GIC         444 :     for (i = *startpos; i <= *endpos; i++)
    2218                 :     {
    2219             444 :         *startpos = i;
    2220 CBC         444 :         if (INTERESTINGWORD(i))
    2221              18 :             break;
    2222 ECB             :     }
    2223                 :     /* cut endpos to have only max_words */
    2224 GIC          18 :     *curlen = 0;
    2225              18 :     *poslen = 0;
    2226             480 :     for (i = *startpos; i <= *endpos && *curlen < max_words; i++)
    2227                 :     {
    2228             462 :         if (!NONWORDTOKEN(prs->words[i].type))
    2229             240 :             *curlen += 1;
    2230             462 :         if (INTERESTINGWORD(i))
    2231              27 :             *poslen += 1;
    2232 ECB             :     }
    2233                 :     /* if the cover was cut then move back endpos to a query item */
    2234 GIC          18 :     if (*endpos > i)
    2235                 :     {
    2236               6 :         *endpos = i;
    2237             420 :         for (i = *endpos; i >= *startpos; i--)
    2238                 :         {
    2239             420 :             *endpos = i;
    2240             420 :             if (INTERESTINGWORD(i))
    2241               6 :                 break;
    2242             414 :             if (!NONWORDTOKEN(prs->words[i].type))
    2243             204 :                 *curlen -= 1;
    2244 ECB             :         }
    2245                 :     }
    2246 GIC          18 : }
    2247                 : 
    2248                 : /*
    2249 ECB             :  * Headline selector used when MaxFragments > 0
    2250                 :  *
    2251                 :  * Note: in this mode, highlightall is disregarded for phrase selection;
    2252                 :  * it only controls presentation details.
    2253                 :  */
    2254                 : static void
    2255 GNC          15 : mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, List *locations,
    2256                 :                   bool highlightall,
    2257 EUB             :                   int shortword, int min_words,
    2258                 :                   int max_words, int max_fragments)
    2259                 : {
    2260                 :     int32       poslen,
    2261                 :                 curlen,
    2262                 :                 i,
    2263 ECB             :                 f,
    2264 CBC          15 :                 num_f = 0;
    2265                 :     int32       stretch,
    2266                 :                 maxstretch,
    2267 ECB             :                 posmarker;
    2268                 : 
    2269 CBC          15 :     int32       startpos = 0,
    2270 GIC          15 :                 endpos = 0,
    2271 GNC          15 :                 nextpos = 0,
    2272 GIC          15 :                 p = 0,
    2273              15 :                 q = 0;
    2274                 : 
    2275              15 :     int32       numcovers = 0,
    2276              15 :                 maxcovers = 32;
    2277                 : 
    2278                 :     int32       minI,
    2279                 :                 minwords,
    2280                 :                 maxitems;
    2281                 :     CoverPos   *covers;
    2282 ECB             : 
    2283 GIC          15 :     covers = palloc(maxcovers * sizeof(CoverPos));
    2284                 : 
    2285                 :     /* get all covers */
    2286 GNC          27 :     while (hlCover(prs, query, locations, &nextpos, &p, &q))
    2287                 :     {
    2288 GIC          12 :         startpos = p;
    2289              12 :         endpos = q;
    2290                 : 
    2291                 :         /*
    2292                 :          * Break the cover into smaller fragments such that each fragment has
    2293                 :          * at most max_words. Also ensure that each end of each fragment is a
    2294                 :          * query word. This will allow us to stretch the fragment in either
    2295 ECB             :          * direction
    2296                 :          */
    2297                 : 
    2298 CBC          30 :         while (startpos <= endpos)
    2299 ECB             :         {
    2300 GIC          18 :             get_next_fragment(prs, &startpos, &endpos, &curlen, &poslen, max_words);
    2301              18 :             if (numcovers >= maxcovers)
    2302 ECB             :             {
    2303 LBC           0 :                 maxcovers *= 2;
    2304               0 :                 covers = repalloc(covers, sizeof(CoverPos) * maxcovers);
    2305                 :             }
    2306 CBC          18 :             covers[numcovers].startpos = startpos;
    2307              18 :             covers[numcovers].endpos = endpos;
    2308              18 :             covers[numcovers].curlen = curlen;
    2309              18 :             covers[numcovers].poslen = poslen;
    2310 GIC          18 :             covers[numcovers].chosen = false;
    2311              18 :             covers[numcovers].excluded = false;
    2312 CBC          18 :             numcovers++;
    2313 GIC          18 :             startpos = endpos + 1;
    2314 CBC          18 :             endpos = q;
    2315 ECB             :         }
    2316                 :     }
    2317                 : 
    2318                 :     /* choose best covers */
    2319 GIC          33 :     for (f = 0; f < max_fragments; f++)
    2320                 :     {
    2321 CBC          24 :         maxitems = 0;
    2322 GIC          24 :         minwords = PG_INT32_MAX;
    2323              24 :         minI = -1;
    2324                 : 
    2325                 :         /*
    2326                 :          * Choose the cover that contains max items. In case of tie choose the
    2327                 :          * one with smaller number of words.
    2328                 :          */
    2329              57 :         for (i = 0; i < numcovers; i++)
    2330 ECB             :         {
    2331 GIC          33 :             if (!covers[i].chosen && !covers[i].excluded &&
    2332              24 :                 (maxitems < covers[i].poslen ||
    2333               6 :                  (maxitems == covers[i].poslen &&
    2334               6 :                   minwords > covers[i].curlen)))
    2335                 :             {
    2336              18 :                 maxitems = covers[i].poslen;
    2337              18 :                 minwords = covers[i].curlen;
    2338              18 :                 minI = i;
    2339 ECB             :             }
    2340                 :         }
    2341                 :         /* if a cover was found mark it */
    2342 GIC          24 :         if (minI >= 0)
    2343                 :         {
    2344 CBC          18 :             covers[minI].chosen = true;
    2345 ECB             :             /* adjust the size of cover */
    2346 CBC          18 :             startpos = covers[minI].startpos;
    2347              18 :             endpos = covers[minI].endpos;
    2348              18 :             curlen = covers[minI].curlen;
    2349                 :             /* stretch the cover if cover size is lower than max_words */
    2350              18 :             if (curlen < max_words)
    2351 ECB             :             {
    2352                 :                 /* divide the stretch on both sides of cover */
    2353 GIC          18 :                 maxstretch = (max_words - curlen) / 2;
    2354                 : 
    2355                 :                 /*
    2356                 :                  * first stretch the startpos stop stretching if 1. we hit the
    2357                 :                  * beginning of document 2. exceed maxstretch 3. we hit an
    2358 ECB             :                  * already marked fragment
    2359                 :                  */
    2360 GIC          18 :                 stretch = 0;
    2361 CBC          18 :                 posmarker = startpos;
    2362 GIC         300 :                 for (i = startpos - 1; i >= 0 && stretch < maxstretch && !prs->words[i].in; i--)
    2363 ECB             :                 {
    2364 CBC         282 :                     if (!NONWORDTOKEN(prs->words[i].type))
    2365                 :                     {
    2366 GIC         135 :                         curlen++;
    2367             135 :                         stretch++;
    2368                 :                     }
    2369             282 :                     posmarker = i;
    2370                 :                 }
    2371                 :                 /* cut back startpos till we find a good endpoint */
    2372              66 :                 for (i = posmarker; i < startpos && BADENDPOINT(i); i++)
    2373 ECB             :                 {
    2374 GIC          48 :                     if (!NONWORDTOKEN(prs->words[i].type))
    2375 CBC          18 :                         curlen--;
    2376 ECB             :                 }
    2377 GIC          18 :                 startpos = i;
    2378 EUB             :                 /* now stretch the endpos as much as possible */
    2379 GBC          18 :                 posmarker = endpos;
    2380 GIC         483 :                 for (i = endpos + 1; i < prs->curwords && curlen < max_words && !prs->words[i].in; i++)
    2381 ECB             :                 {
    2382 CBC         465 :                     if (!NONWORDTOKEN(prs->words[i].type))
    2383             231 :                         curlen++;
    2384             465 :                     posmarker = i;
    2385 ECB             :                 }
    2386                 :                 /* cut back endpos till we find a good endpoint */
    2387 CBC          45 :                 for (i = posmarker; i > endpos && BADENDPOINT(i); i--)
    2388 ECB             :                 {
    2389 CBC          27 :                     if (!NONWORDTOKEN(prs->words[i].type))
    2390 GIC          12 :                         curlen--;
    2391                 :                 }
    2392              18 :                 endpos = i;
    2393                 :             }
    2394 CBC          18 :             covers[minI].startpos = startpos;
    2395 GIC          18 :             covers[minI].endpos = endpos;
    2396 CBC          18 :             covers[minI].curlen = curlen;
    2397 ECB             :             /* Mark the chosen fragments (covers) */
    2398 CBC          18 :             mark_fragment(prs, highlightall, startpos, endpos);
    2399 GIC          18 :             num_f++;
    2400                 :             /* Exclude covers overlapping this one from future consideration */
    2401              48 :             for (i = 0; i < numcovers; i++)
    2402                 :             {
    2403              30 :                 if (i != minI &&
    2404 CBC          12 :                     ((covers[i].startpos >= startpos &&
    2405 GIC           6 :                       covers[i].startpos <= endpos) ||
    2406 CBC          12 :                      (covers[i].endpos >= startpos &&
    2407               6 :                       covers[i].endpos <= endpos) ||
    2408              12 :                      (covers[i].startpos < startpos &&
    2409               6 :                       covers[i].endpos > endpos)))
    2410 UIC           0 :                     covers[i].excluded = true;
    2411 ECB             :             }
    2412                 :         }
    2413                 :         else
    2414 GIC           6 :             break;              /* no selectable covers remain */
    2415                 :     }
    2416                 : 
    2417 ECB             :     /* show the first min_words words if we have not marked anything */
    2418 GIC          15 :     if (num_f <= 0)
    2419 ECB             :     {
    2420 GNC           3 :         startpos = curlen = 0;
    2421               3 :         endpos = -1;
    2422 CBC          93 :         for (i = 0; i < prs->curwords && curlen < min_words; i++)
    2423 ECB             :         {
    2424 CBC          90 :             if (!NONWORDTOKEN(prs->words[i].type))
    2425 GIC          45 :                 curlen++;
    2426 CBC          90 :             endpos = i;
    2427                 :         }
    2428 GIC           3 :         mark_fragment(prs, highlightall, startpos, endpos);
    2429 ECB             :     }
    2430                 : 
    2431 GIC          15 :     pfree(covers);
    2432              15 : }
    2433                 : 
    2434                 : /*
    2435                 :  * Headline selector used when MaxFragments == 0
    2436 ECB             :  */
    2437                 : static void
    2438 GNC         172 : mark_hl_words(HeadlineParsedText *prs, TSQuery query, List *locations,
    2439                 :               bool highlightall,
    2440                 :               int shortword, int min_words, int max_words)
    2441 ECB             : {
    2442 GNC         172 :     int         nextpos = 0,
    2443             172 :                 p = 0,
    2444 CBC         172 :                 q = 0;
    2445             172 :     int         bestb = -1,
    2446 GIC         172 :                 beste = -1;
    2447 CBC         172 :     int         bestlen = -1;
    2448 GIC         172 :     bool        bestcover = false;
    2449                 :     int         pose,
    2450 ECB             :                 posb,
    2451                 :                 poslen,
    2452                 :                 curlen;
    2453                 :     bool        poscover;
    2454                 :     int         i;
    2455                 : 
    2456 GIC         172 :     if (!highlightall)
    2457 ECB             :     {
    2458                 :         /* examine all covers, select a headline using the best one */
    2459 GNC         254 :         while (hlCover(prs, query, locations, &nextpos, &p, &q))
    2460 ECB             :         {
    2461                 :             /*
    2462                 :              * Count words (curlen) and interesting words (poslen) within
    2463                 :              * cover, but stop once we reach max_words.  This step doesn't
    2464                 :              * consider whether that's a good stopping point.  posb and pose
    2465                 :              * are set to the start and end indexes of the possible headline.
    2466                 :              */
    2467 CBC          85 :             curlen = 0;
    2468              85 :             poslen = 0;
    2469 GIC          85 :             posb = pose = p;
    2470 CBC         728 :             for (i = p; i <= q && curlen < max_words; i++)
    2471                 :             {
    2472             643 :                 if (!NONWORDTOKEN(prs->words[i].type))
    2473             364 :                     curlen++;
    2474             643 :                 if (INTERESTINGWORD(i))
    2475 GIC         145 :                     poslen++;
    2476 CBC         643 :                 pose = i;
    2477 ECB             :             }
    2478                 : 
    2479 CBC          85 :             if (curlen < max_words)
    2480                 :             {
    2481 ECB             :                 /*
    2482                 :                  * We have room to lengthen the headline, so search forward
    2483                 :                  * until it's full or we find a good stopping point.  We'll
    2484                 :                  * reconsider the word at "q", then move forward.
    2485                 :                  */
    2486 CBC        1469 :                 for (i = i - 1; i < prs->curwords && curlen < max_words; i++)
    2487 ECB             :                 {
    2488 GBC        1456 :                     if (i > q)
    2489                 :                     {
    2490 GIC        1377 :                         if (!NONWORDTOKEN(prs->words[i].type))
    2491             687 :                             curlen++;
    2492 CBC        1377 :                         if (INTERESTINGWORD(i))
    2493 GIC          60 :                             poslen++;
    2494                 :                     }
    2495            1456 :                     pose = i;
    2496 CBC        1456 :                     if (BADENDPOINT(i))
    2497 GIC         972 :                         continue;
    2498 CBC         484 :                     if (curlen >= min_words)
    2499              66 :                         break;
    2500 ECB             :                 }
    2501 GIC          79 :                 if (curlen < min_words)
    2502 ECB             :                 {
    2503                 :                     /*
    2504                 :                      * Reached end of text and our headline is still shorter
    2505                 :                      * than min_words, so try to extend it to the left.
    2506                 :                      */
    2507 GIC         183 :                     for (i = p - 1; i >= 0; i--)
    2508                 :                     {
    2509 CBC         182 :                         if (!NONWORDTOKEN(prs->words[i].type))
    2510              91 :                             curlen++;
    2511 GIC         182 :                         if (INTERESTINGWORD(i))
    2512               3 :                             poslen++;
    2513             182 :                         if (curlen >= max_words)
    2514 UIC           0 :                             break;
    2515 GIC         182 :                         if (BADENDPOINT(i))
    2516 CBC         118 :                             continue;
    2517 GIC          64 :                         if (curlen >= min_words)
    2518              12 :                             break;
    2519                 :                     }
    2520 CBC          13 :                     posb = (i >= 0) ? i : 0;
    2521 ECB             :                 }
    2522                 :             }
    2523                 :             else
    2524                 :             {
    2525                 :                 /*
    2526                 :                  * Can't make headline longer, so consider making it shorter
    2527                 :                  * if needed to avoid a bad endpoint.
    2528                 :                  */
    2529 GIC           6 :                 if (i > q)
    2530               3 :                     i = q;
    2531              15 :                 for (; curlen > min_words; i--)
    2532                 :                 {
    2533              15 :                     if (!BADENDPOINT(i))
    2534 ECB             :                         break;
    2535 GIC           9 :                     if (!NONWORDTOKEN(prs->words[i].type))
    2536               3 :                         curlen--;
    2537 CBC           9 :                     if (INTERESTINGWORD(i))
    2538 UIC           0 :                         poslen--;
    2539 GIC           9 :                     pose = i - 1;
    2540                 :                 }
    2541                 :             }
    2542                 : 
    2543                 :             /*
    2544                 :              * Check whether the proposed headline includes the original
    2545 ECB             :              * cover; it might not if we trimmed it due to max_words.
    2546                 :              */
    2547 CBC          85 :             poscover = (posb <= p && pose >= q);
    2548 ECB             : 
    2549                 :             /*
    2550                 :              * Adopt this headline if it's better than the last one, giving
    2551                 :              * highest priority to headlines including the cover, then to
    2552                 :              * headlines with more interesting words, then to headlines with
    2553                 :              * good stopping points.  (Since bestlen is initially -1, we will
    2554                 :              * certainly adopt the first headline.)
    2555                 :              */
    2556 GIC          85 :             if (poscover > bestcover ||
    2557 CBC          39 :                 (poscover == bestcover && poslen > bestlen) ||
    2558 GIC          36 :                 (poscover == bestcover && poslen == bestlen &&
    2559               6 :                  !BADENDPOINT(pose) && BADENDPOINT(beste)))
    2560                 :             {
    2561              49 :                 bestb = posb;
    2562              49 :                 beste = pose;
    2563              49 :                 bestlen = poslen;
    2564 CBC          49 :                 bestcover = poscover;
    2565                 :             }
    2566 ECB             :         }
    2567                 : 
    2568                 :         /*
    2569                 :          * If we found nothing acceptable, select min_words words starting at
    2570                 :          * the beginning.
    2571                 :          */
    2572 CBC         169 :         if (bestlen < 0)
    2573 ECB             :         {
    2574 CBC         120 :             curlen = 0;
    2575 GNC         120 :             pose = -1;
    2576 CBC         519 :             for (i = 0; i < prs->curwords && curlen < min_words; i++)
    2577                 :             {
    2578 GIC         399 :                 if (!NONWORDTOKEN(prs->words[i].type))
    2579             258 :                     curlen++;
    2580             399 :                 pose = i;
    2581                 :             }
    2582 CBC         120 :             bestb = 0;
    2583 GIC         120 :             beste = pose;
    2584 ECB             :         }
    2585                 :     }
    2586                 :     else
    2587                 :     {
    2588                 :         /* highlightall mode: headline is whole document */
    2589 GBC           3 :         bestb = 0;
    2590 CBC           3 :         beste = prs->curwords - 1;
    2591 ECB             :     }
    2592                 : 
    2593 CBC         172 :     mark_fragment(prs, highlightall, bestb, beste);
    2594 GIC         172 : }
    2595 ECB             : 
    2596                 : /*
    2597                 :  * Default parser's prsheadline function
    2598                 :  */
    2599                 : Datum
    2600 GIC         187 : prsd_headline(PG_FUNCTION_ARGS)
    2601                 : {
    2602             187 :     HeadlineParsedText *prs = (HeadlineParsedText *) PG_GETARG_POINTER(0);
    2603             187 :     List       *prsoptions = (List *) PG_GETARG_POINTER(1);
    2604 CBC         187 :     TSQuery     query = PG_GETARG_TSQUERY(2);
    2605                 :     List       *locations;
    2606 ECB             : 
    2607                 :     /* default option values: */
    2608 GIC         187 :     int         min_words = 15;
    2609 CBC         187 :     int         max_words = 35;
    2610 GIC         187 :     int         shortword = 3;
    2611 CBC         187 :     int         max_fragments = 0;
    2612             187 :     bool        highlightall = false;
    2613 EUB             :     ListCell   *l;
    2614 ECB             : 
    2615                 :     /* Extract configuration option values */
    2616 GIC         187 :     prs->startsel = NULL;
    2617             187 :     prs->stopsel = NULL;
    2618             187 :     prs->fragdelim = NULL;
    2619             364 :     foreach(l, prsoptions)
    2620                 :     {
    2621             177 :         DefElem    *defel = (DefElem *) lfirst(l);
    2622 CBC         177 :         char       *val = defGetString(defel);
    2623                 : 
    2624 GIC         177 :         if (pg_strcasecmp(defel->defname, "MaxWords") == 0)
    2625              18 :             max_words = pg_strtoint32(val);
    2626             159 :         else if (pg_strcasecmp(defel->defname, "MinWords") == 0)
    2627              18 :             min_words = pg_strtoint32(val);
    2628             141 :         else if (pg_strcasecmp(defel->defname, "ShortWord") == 0)
    2629 UIC           0 :             shortword = pg_strtoint32(val);
    2630 GIC         141 :         else if (pg_strcasecmp(defel->defname, "MaxFragments") == 0)
    2631 CBC          15 :             max_fragments = pg_strtoint32(val);
    2632             126 :         else if (pg_strcasecmp(defel->defname, "StartSel") == 0)
    2633              60 :             prs->startsel = pstrdup(val);
    2634              66 :         else if (pg_strcasecmp(defel->defname, "StopSel") == 0)
    2635 GIC          60 :             prs->stopsel = pstrdup(val);
    2636 CBC           6 :         else if (pg_strcasecmp(defel->defname, "FragmentDelimiter") == 0)
    2637               3 :             prs->fragdelim = pstrdup(val);
    2638               3 :         else if (pg_strcasecmp(defel->defname, "HighlightAll") == 0)
    2639               9 :             highlightall = (pg_strcasecmp(val, "1") == 0 ||
    2640 GIC           6 :                             pg_strcasecmp(val, "on") == 0 ||
    2641               3 :                             pg_strcasecmp(val, "true") == 0 ||
    2642 UIC           0 :                             pg_strcasecmp(val, "t") == 0 ||
    2643 GIC           6 :                             pg_strcasecmp(val, "y") == 0 ||
    2644 UIC           0 :                             pg_strcasecmp(val, "yes") == 0);
    2645                 :         else
    2646               0 :             ereport(ERROR,
    2647 ECB             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    2648                 :                      errmsg("unrecognized headline parameter: \"%s\"",
    2649                 :                             defel->defname)));
    2650                 :     }
    2651                 : 
    2652                 :     /* in HighlightAll mode these parameters are ignored */
    2653 GIC         187 :     if (!highlightall)
    2654                 :     {
    2655 CBC         184 :         if (min_words >= max_words)
    2656 LBC           0 :             ereport(ERROR,
    2657                 :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    2658                 :                      errmsg("MinWords should be less than MaxWords")));
    2659 CBC         184 :         if (min_words <= 0)
    2660 LBC           0 :             ereport(ERROR,
    2661                 :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    2662                 :                      errmsg("MinWords should be positive")));
    2663 GIC         184 :         if (shortword < 0)
    2664 UIC           0 :             ereport(ERROR,
    2665                 :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    2666 ECB             :                      errmsg("ShortWord should be >= 0")));
    2667 GIC         184 :         if (max_fragments < 0)
    2668 LBC           0 :             ereport(ERROR,
    2669 ECB             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    2670                 :                      errmsg("MaxFragments should be >= 0")));
    2671                 :     }
    2672                 : 
    2673                 :     /* Locate words and phrases matching the query */
    2674 GNC         187 :     if (query->size > 0)
    2675                 :     {
    2676                 :         hlCheck     ch;
    2677                 : 
    2678             181 :         ch.words = prs->words;
    2679             181 :         ch.len = prs->curwords;
    2680             181 :         locations = TS_execute_locations(GETQUERY(query), &ch, TS_EXEC_EMPTY,
    2681                 :                                          checkcondition_HL);
    2682                 :     }
    2683                 :     else
    2684               6 :         locations = NIL;        /* empty query matches nothing */
    2685                 : 
    2686                 :     /* Apply appropriate headline selector */
    2687 CBC         187 :     if (max_fragments == 0)
    2688 GNC         172 :         mark_hl_words(prs, query, locations, highlightall, shortword,
    2689                 :                       min_words, max_words);
    2690 ECB             :     else
    2691 GNC          15 :         mark_hl_fragments(prs, query, locations, highlightall, shortword,
    2692                 :                           min_words, max_words, max_fragments);
    2693                 : 
    2694                 :     /* Fill in default values for string options */
    2695 CBC         187 :     if (!prs->startsel)
    2696             127 :         prs->startsel = pstrdup("<b>");
    2697             187 :     if (!prs->stopsel)
    2698             127 :         prs->stopsel = pstrdup("</b>");
    2699 GIC         187 :     if (!prs->fragdelim)
    2700 CBC         184 :         prs->fragdelim = pstrdup(" ... ");
    2701 ECB             : 
    2702                 :     /* Caller will need these lengths, too */
    2703 CBC         187 :     prs->startsellen = strlen(prs->startsel);
    2704             187 :     prs->stopsellen = strlen(prs->stopsel);
    2705             187 :     prs->fragdelimlen = strlen(prs->fragdelim);
    2706 ECB             : 
    2707 CBC         187 :     PG_RETURN_POINTER(prs);
    2708 EUB             : }
        

Generated by: LCOV version v1.16-55-g56c0a2a