LCOV - Differential Code Coverage 16@8cea358b128 vs 17@8cea358b128

LCOV - differential code coverage report

Current view:	top level - contrib/pg_trgm - trgm_op.c (source / functions)		Coverage	Total	Hit	UBC	GNC	CBC	DUB	DCB
Current:	Differential Code Coverage 16@8cea358b128 vs 17@8cea358b128	Lines:	91.6 %	489	448	41	1	447	1	4
Current Date:	2024-04-14 14:21:10	Functions:	96.2 %	52	50	2	1	49
Baseline:	16@8cea358b128	Branches:	54.4 %	618	336	282		336
Baseline Date:	2024-04-14 14:21:09	Line coverage date bins:
Legend:	Lines: hit not hit \| Branches: + taken - not taken # not executed	[..60] days:	100.0 %	1	1		1
		(240..) days:	91.6 %	488	447	41		447
		Function coverage date bins:
		(240..) days:	96.2 %	52	50	2	1	49
		Branch coverage date bins:
		(240..) days:	54.4 %	618	336	282		336

 Age         Owner                    Branch data    TLA  Line data    Source code

                                  1                 :                : /*
                                  2                 :                :  * contrib/pg_trgm/trgm_op.c
                                  3                 :                :  */
                                  4                 :                : #include "postgres.h"
                                  5                 :                : 
                                  6                 :                : #include <ctype.h>
                                  7                 :                : 
                                  8                 :                : #include "catalog/pg_type.h"
                                  9                 :                : #include "common/int.h"
                                 10                 :                : #include "lib/qunique.h"
                                 11                 :                : #include "miscadmin.h"
                                 12                 :                : #include "trgm.h"
                                 13                 :                : #include "tsearch/ts_locale.h"
                                 14                 :                : #include "utils/guc.h"
                                 15                 :                : #include "utils/lsyscache.h"
                                 16                 :                : #include "utils/memutils.h"
                                 17                 :                : #include "utils/pg_crc.h"
                                 18                 :                : 
 6529 tgl@sss.pgh.pa.us          19                 :CBC           3 : PG_MODULE_MAGIC;
                                 20                 :                : 
                                 21                 :                : /* GUC variables */
                                 22                 :                : double      similarity_threshold = 0.3f;
                                 23                 :                : double      word_similarity_threshold = 0.6f;
                                 24                 :                : double      strict_word_similarity_threshold = 0.5f;
                                 25                 :                : 
 7258 teodor@sigaev.ru           26                 :              2 : PG_FUNCTION_INFO_V1(set_limit);
 4822 tgl@sss.pgh.pa.us          27                 :              2 : PG_FUNCTION_INFO_V1(show_limit);
                                 28                 :              2 : PG_FUNCTION_INFO_V1(show_trgm);
                                 29                 :              2 : PG_FUNCTION_INFO_V1(similarity);
 2951 teodor@sigaev.ru           30                 :              2 : PG_FUNCTION_INFO_V1(word_similarity);
 2216                            31                 :              2 : PG_FUNCTION_INFO_V1(strict_word_similarity);
 4822 tgl@sss.pgh.pa.us          32                 :              2 : PG_FUNCTION_INFO_V1(similarity_dist);
                                 33                 :              2 : PG_FUNCTION_INFO_V1(similarity_op);
 2951 teodor@sigaev.ru           34                 :              2 : PG_FUNCTION_INFO_V1(word_similarity_op);
                                 35                 :              2 : PG_FUNCTION_INFO_V1(word_similarity_commutator_op);
                                 36                 :              1 : PG_FUNCTION_INFO_V1(word_similarity_dist_op);
                                 37                 :              2 : PG_FUNCTION_INFO_V1(word_similarity_dist_commutator_op);
 2216                            38                 :              2 : PG_FUNCTION_INFO_V1(strict_word_similarity_op);
                                 39                 :              2 : PG_FUNCTION_INFO_V1(strict_word_similarity_commutator_op);
                                 40                 :              1 : PG_FUNCTION_INFO_V1(strict_word_similarity_dist_op);
                                 41                 :              2 : PG_FUNCTION_INFO_V1(strict_word_similarity_dist_commutator_op);
                                 42                 :                : 
                                 43                 :                : /* Trigram with position */
                                 44                 :                : typedef struct
                                 45                 :                : {
                                 46                 :                :     trgm        trg;
                                 47                 :                :     int         index;
                                 48                 :                : } pos_trgm;
                                 49                 :                : 
                                 50                 :                : /* Trigram bound type */
                                 51                 :                : typedef uint8 TrgmBound;
                                 52                 :                : #define TRGM_BOUND_LEFT             0x01    /* trigram is left bound of word */
                                 53                 :                : #define TRGM_BOUND_RIGHT            0x02    /* trigram is right bound of word */
                                 54                 :                : 
                                 55                 :                : /* Word similarity flags */
                                 56                 :                : #define WORD_SIMILARITY_CHECK_ONLY  0x01    /* only check existence of similar
                                 57                 :                :                                              * search pattern in text */
                                 58                 :                : #define WORD_SIMILARITY_STRICT      0x02    /* force bounds of extent to match
                                 59                 :                :                                              * word bounds */
                                 60                 :                : 
                                 61                 :                : /*
                                 62                 :                :  * Module load callback
                                 63                 :                :  */
                                 64                 :                : void
 2951                            65                 :              3 : _PG_init(void)
                                 66                 :                : {
                                 67                 :                :     /* Define custom GUC variables. */
                                 68                 :              3 :     DefineCustomRealVariable("pg_trgm.similarity_threshold",
                                 69                 :                :                              "Sets the threshold used by the % operator.",
                                 70                 :                :                              "Valid range is 0.0 .. 1.0.",
                                 71                 :                :                              &similarity_threshold,
                                 72                 :                :                              0.3f,
                                 73                 :                :                              0.0,
                                 74                 :                :                              1.0,
                                 75                 :                :                              PGC_USERSET,
                                 76                 :                :                              0,
                                 77                 :                :                              NULL,
                                 78                 :                :                              NULL,
                                 79                 :                :                              NULL);
                                 80                 :              3 :     DefineCustomRealVariable("pg_trgm.word_similarity_threshold",
                                 81                 :                :                              "Sets the threshold used by the <% operator.",
                                 82                 :                :                              "Valid range is 0.0 .. 1.0.",
                                 83                 :                :                              &word_similarity_threshold,
                                 84                 :                :                              0.6f,
                                 85                 :                :                              0.0,
                                 86                 :                :                              1.0,
                                 87                 :                :                              PGC_USERSET,
                                 88                 :                :                              0,
                                 89                 :                :                              NULL,
                                 90                 :                :                              NULL,
                                 91                 :                :                              NULL);
 2216                            92                 :              3 :     DefineCustomRealVariable("pg_trgm.strict_word_similarity_threshold",
                                 93                 :                :                              "Sets the threshold used by the <<% operator.",
                                 94                 :                :                              "Valid range is 0.0 .. 1.0.",
                                 95                 :                :                              &strict_word_similarity_threshold,
                                 96                 :                :                              0.5f,
                                 97                 :                :                              0.0,
                                 98                 :                :                              1.0,
                                 99                 :                :                              PGC_USERSET,
                                100                 :                :                              0,
                                101                 :                :                              NULL,
                                102                 :                :                              NULL,
                                103                 :                :                              NULL);
                                104                 :                : 
  783 tgl@sss.pgh.pa.us         105                 :              3 :     MarkGUCPrefixReserved("pg_trgm");
 2951 teodor@sigaev.ru          106                 :              3 : }
                                107                 :                : 
                                108                 :                : /*
                                109                 :                :  * Deprecated function.
                                110                 :                :  * Use "pg_trgm.similarity_threshold" GUC variable instead of this function.
                                111                 :                :  */
                                112                 :                : Datum
 7168 bruce@momjian.us          113                 :              2 : set_limit(PG_FUNCTION_ARGS)
                                114                 :                : {
                                115                 :              2 :     float4      nlimit = PG_GETARG_FLOAT4(0);
                                116                 :                :     char       *nlimit_str;
                                117                 :                :     Oid         func_out_oid;
                                118                 :                :     bool        is_varlena;
                                119                 :                : 
 2949 teodor@sigaev.ru          120                 :              2 :     getTypeOutputInfo(FLOAT4OID, &func_out_oid, &is_varlena);
                                121                 :                : 
                                122                 :              2 :     nlimit_str = OidOutputFunctionCall(func_out_oid, Float4GetDatum(nlimit));
                                123                 :                : 
                                124                 :              2 :     SetConfigOption("pg_trgm.similarity_threshold", nlimit_str,
                                125                 :                :                     PGC_USERSET, PGC_S_SESSION);
                                126                 :                : 
 2951                           127                 :              2 :     PG_RETURN_FLOAT4(similarity_threshold);
                                128                 :                : }
                                129                 :                : 
                                130                 :                : 
                                131                 :                : /*
                                132                 :                :  * Get similarity threshold for given index scan strategy number.
                                133                 :                :  */
                                134                 :                : double
 2216                           135                 :          43500 : index_strategy_get_limit(StrategyNumber strategy)
                                136                 :                : {
                                137   [ +  +  +  - ]:          43500 :     switch (strategy)
                                138                 :                :     {
                                139                 :          32704 :         case SimilarityStrategyNumber:
                                140                 :          32704 :             return similarity_threshold;
                                141                 :           4818 :         case WordSimilarityStrategyNumber:
                                142                 :           4818 :             return word_similarity_threshold;
                                143                 :           5978 :         case StrictWordSimilarityStrategyNumber:
                                144                 :           5978 :             return strict_word_similarity_threshold;
 2216 teodor@sigaev.ru          145                 :UBC           0 :         default:
                                146         [ #  # ]:              0 :             elog(ERROR, "unrecognized strategy number: %d", strategy);
                                147                 :                :             break;
                                148                 :                :     }
                                149                 :                : 
                                150                 :                :     return 0.0;                 /* keep compiler quiet */
                                151                 :                : }
                                152                 :                : 
                                153                 :                : /*
                                154                 :                :  * Deprecated function.
                                155                 :                :  * Use "pg_trgm.similarity_threshold" GUC variable instead of this function.
                                156                 :                :  */
                                157                 :                : Datum
 7168 bruce@momjian.us          158                 :CBC       20000 : show_limit(PG_FUNCTION_ARGS)
                                159                 :                : {
 2951 teodor@sigaev.ru          160                 :          20000 :     PG_RETURN_FLOAT4(similarity_threshold);
                                161                 :                : }
                                162                 :                : 
                                163                 :                : static int
 7168 bruce@momjian.us          164                 :        3132620 : comp_trgm(const void *a, const void *b)
                                165                 :                : {
                                166   [ +  +  +  +  :        3132620 :     return CMPTRGM(a, b);
                                     +  +  +  +  +  
                                           +  +  + ]
                                167                 :                : }
                                168                 :                : 
                                169                 :                : /*
                                170                 :                :  * Finds first word in string, returns pointer to the word,
                                171                 :                :  * endword points to the character after word
                                172                 :                :  */
                                173                 :                : static char *
 5421                           174                 :         239411 : find_word(char *str, int lenstr, char **endword, int *charlen)
                                175                 :                : {
                                176                 :         239411 :     char       *beginword = str;
                                177                 :                : 
 4023 tgl@sss.pgh.pa.us         178   [ +  +  +  + ]:         253075 :     while (beginword - str < lenstr && !ISWORDCHR(beginword))
 5632 teodor@sigaev.ru          179                 :          13664 :         beginword += pg_mblen(beginword);
                                180                 :                : 
                                181         [ +  + ]:         239411 :     if (beginword - str >= lenstr)
                                182                 :         113080 :         return NULL;
                                183                 :                : 
                                184                 :         126331 :     *endword = beginword;
                                185                 :         126331 :     *charlen = 0;
 4023 tgl@sss.pgh.pa.us         186   [ +  +  +  + ]:        1087678 :     while (*endword - str < lenstr && ISWORDCHR(*endword))
                                187                 :                :     {
 5632 teodor@sigaev.ru          188                 :         961347 :         *endword += pg_mblen(*endword);
                                189                 :         961347 :         (*charlen)++;
                                190                 :                :     }
                                191                 :                : 
                                192                 :         126331 :     return beginword;
                                193                 :                : }
                                194                 :                : 
                                195                 :                : /*
                                196                 :                :  * Reduce a trigram (three possibly multi-byte characters) to a trgm,
                                197                 :                :  * which is always exactly three bytes.  If we have three single-byte
                                198                 :                :  * characters, we just use them as-is; otherwise we form a hash value.
                                199                 :                :  */
                                200                 :                : void
 4025 tgl@sss.pgh.pa.us         201                 :           1459 : compact_trigram(trgm *tptr, char *str, int bytelen)
                                202                 :                : {
 5421 bruce@momjian.us          203         [ +  - ]:           1459 :     if (bytelen == 3)
                                204                 :                :     {
                                205                 :           1459 :         CPTRGM(tptr, str);
                                206                 :                :     }
                                207                 :                :     else
                                208                 :                :     {
                                209                 :                :         pg_crc32    crc;
                                210                 :                : 
 3449 heikki.linnakangas@i      211                 :UBC           0 :         INIT_LEGACY_CRC32(crc);
                                212         [ #  # ]:              0 :         COMP_LEGACY_CRC32(crc, str, bytelen);
                                213                 :              0 :         FIN_LEGACY_CRC32(crc);
                                214                 :                : 
                                215                 :                :         /*
                                216                 :                :          * use only 3 upper bytes from crc, hope, it's good enough hashing
                                217                 :                :          */
 5632 teodor@sigaev.ru          218                 :              0 :         CPTRGM(tptr, &crc);
                                219                 :                :     }
 5632 teodor@sigaev.ru          220                 :CBC        1459 : }
                                221                 :                : 
                                222                 :                : /*
                                223                 :                :  * Adds trigrams from words (already padded).
                                224                 :                :  */
                                225                 :                : static trgm *
 5421 bruce@momjian.us          226                 :         126395 : make_trigrams(trgm *tptr, char *str, int bytelen, int charlen)
                                227                 :                : {
                                228                 :         126395 :     char       *ptr = str;
                                229                 :                : 
                                230         [ +  + ]:         126395 :     if (charlen < 3)
 5632 teodor@sigaev.ru          231                 :             27 :         return tptr;
                                232                 :                : 
 4025 tgl@sss.pgh.pa.us         233         [ -  + ]:         126368 :     if (bytelen > charlen)
                                234                 :                :     {
                                235                 :                :         /* Find multibyte character boundaries and apply compact_trigram */
 5421 bruce@momjian.us          236                 :UBC           0 :         int         lenfirst = pg_mblen(str),
                                237                 :              0 :                     lenmiddle = pg_mblen(str + lenfirst),
                                238                 :              0 :                     lenlast = pg_mblen(str + lenfirst + lenmiddle);
                                239                 :                : 
                                240         [ #  # ]:              0 :         while ((ptr - str) + lenfirst + lenmiddle + lenlast <= bytelen)
                                241                 :                :         {
 4025 tgl@sss.pgh.pa.us         242                 :              0 :             compact_trigram(tptr, ptr, lenfirst + lenmiddle + lenlast);
                                243                 :                : 
 5632 teodor@sigaev.ru          244                 :              0 :             ptr += lenfirst;
                                245                 :              0 :             tptr++;
                                246                 :                : 
 5421 bruce@momjian.us          247                 :              0 :             lenfirst = lenmiddle;
                                248                 :              0 :             lenmiddle = lenlast;
                                249                 :              0 :             lenlast = pg_mblen(ptr + lenfirst + lenmiddle);
                                250                 :                :         }
                                251                 :                :     }
                                252                 :                :     else
                                253                 :                :     {
                                254                 :                :         /* Fast path when there are no multibyte characters */
 5421 bruce@momjian.us          255         [ -  + ]:CBC      126368 :         Assert(bytelen == charlen);
                                256                 :                : 
 5632 teodor@sigaev.ru          257         [ +  + ]:        1214137 :         while (ptr - str < bytelen - 2 /* number of trigrams = strlen - 2 */ )
                                258                 :                :         {
                                259                 :        1087769 :             CPTRGM(tptr, ptr);
                                260                 :        1087769 :             ptr++;
                                261                 :        1087769 :             tptr++;
                                262                 :                :         }
                                263                 :                :     }
                                264                 :                : 
                                265                 :         126368 :     return tptr;
                                266                 :                : }
                                267                 :                : 
                                268                 :                : /*
                                269                 :                :  * Make array of trigrams without sorting and removing duplicate items.
                                270                 :                :  *
                                271                 :                :  * trg: where to return the array of trigrams.
                                272                 :                :  * str: source string, of length slen bytes.
                                273                 :                :  * bounds: where to return bounds of trigrams (if needed).
                                274                 :                :  *
                                275                 :                :  * Returns length of the generated array.
                                276                 :                :  */
                                277                 :                : static int
 2216                           278                 :         113081 : generate_trgm_only(trgm *trg, char *str, int slen, TrgmBound *bounds)
                                279                 :                : {
                                280                 :                :     trgm       *tptr;
                                281                 :                :     char       *buf;
                                282                 :                :     int         charlen,
                                283                 :                :                 bytelen;
                                284                 :                :     char       *bword,
                                285                 :                :                *eword;
                                286                 :                : 
 7168 bruce@momjian.us          287   [ +  -  +  + ]:         113081 :     if (slen + LPADDING + RPADDING < 3 || slen == 0)
 2951 teodor@sigaev.ru          288                 :              1 :         return 0;
                                289                 :                : 
                                290                 :         113080 :     tptr = trg;
                                291                 :                : 
                                292                 :                :     /* Allocate a buffer for case-folded, blank-padded words */
 3744 tgl@sss.pgh.pa.us         293                 :         113080 :     buf = (char *) palloc(slen * pg_database_encoding_max_length() + 4);
                                294                 :                : 
                                295                 :                :     if (LPADDING > 0)
                                296                 :                :     {
 7258 teodor@sigaev.ru          297                 :         113080 :         *buf = ' ';
                                298                 :                :         if (LPADDING > 1)
 7168 bruce@momjian.us          299                 :         113080 :             *(buf + 1) = ' ';
                                300                 :                :     }
                                301                 :                : 
 5632 teodor@sigaev.ru          302                 :         113080 :     eword = str;
 5421 bruce@momjian.us          303         [ +  + ]:         239411 :     while ((bword = find_word(eword, slen - (eword - str), &eword, &charlen)) != NULL)
                                304                 :                :     {
                                305                 :                : #ifdef IGNORECASE
 5632 teodor@sigaev.ru          306                 :         126331 :         bword = lowerstr_with_len(bword, eword - bword);
                                307                 :         126331 :         bytelen = strlen(bword);
                                308                 :                : #else
                                309                 :                :         bytelen = eword - bword;
                                310                 :                : #endif
                                311                 :                : 
                                312                 :         126331 :         memcpy(buf + LPADDING, bword, bytelen);
                                313                 :                : 
                                314                 :                : #ifdef IGNORECASE
                                315                 :         126331 :         pfree(bword);
                                316                 :                : #endif
                                317                 :                : 
 5421 bruce@momjian.us          318                 :         126331 :         buf[LPADDING + bytelen] = ' ';
                                319                 :         126331 :         buf[LPADDING + bytelen + 1] = ' ';
                                320                 :                : 
                                321                 :                :         /* Calculate trigrams marking their bounds if needed */
 2216 teodor@sigaev.ru          322         [ +  + ]:         126331 :         if (bounds)
                                323                 :          12398 :             bounds[tptr - trg] |= TRGM_BOUND_LEFT;
 5421 bruce@momjian.us          324                 :         126331 :         tptr = make_trigrams(tptr, buf, bytelen + LPADDING + RPADDING,
                                325                 :                :                              charlen + LPADDING + RPADDING);
 2216 teodor@sigaev.ru          326         [ +  + ]:         126331 :         if (bounds)
                                327                 :          12398 :             bounds[tptr - trg - 1] |= TRGM_BOUND_RIGHT;
                                328                 :                :     }
                                329                 :                : 
 7258                           330                 :         113080 :     pfree(buf);
                                331                 :                : 
 2951                           332                 :         113080 :     return tptr - trg;
                                333                 :                : }
                                334                 :                : 
                                335                 :                : /*
                                336                 :                :  * Guard against possible overflow in the palloc requests below.  (We
                                337                 :                :  * don't worry about the additive constants, since palloc can detect
                                338                 :                :  * requests that are a little above MaxAllocSize --- we just need to
                                339                 :                :  * prevent integer overflow in the multiplications.)
                                340                 :                :  */
                                341                 :                : static void
                                342                 :         101010 : protect_out_of_mem(int slen)
                                343                 :                : {
                                344         [ +  - ]:         101010 :     if ((Size) (slen / 2) >= (MaxAllocSize / (sizeof(trgm) * 3)) ||
                                345         [ -  + ]:         101010 :         (Size) slen >= (MaxAllocSize / pg_database_encoding_max_length()))
 2951 teodor@sigaev.ru          346         [ #  # ]:UBC           0 :         ereport(ERROR,
                                347                 :                :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
                                348                 :                :                  errmsg("out of memory")));
 2951 teodor@sigaev.ru          349                 :CBC      101010 : }
                                350                 :                : 
                                351                 :                : /*
                                352                 :                :  * Make array of trigrams with sorting and removing duplicate items.
                                353                 :                :  *
                                354                 :                :  * str: source string, of length slen bytes.
                                355                 :                :  *
                                356                 :                :  * Returns the sorted array of unique trigrams.
                                357                 :                :  */
                                358                 :                : TRGM *
                                359                 :          88829 : generate_trgm(char *str, int slen)
                                360                 :                : {
                                361                 :                :     TRGM       *trg;
                                362                 :                :     int         len;
                                363                 :                : 
                                364                 :          88829 :     protect_out_of_mem(slen);
                                365                 :                : 
 2489 tgl@sss.pgh.pa.us         366                 :          88829 :     trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) * 3);
 2951 teodor@sigaev.ru          367                 :          88829 :     trg->flag = ARRKEY;
                                368                 :                : 
 2216                           369                 :          88829 :     len = generate_trgm_only(GETARR(trg), str, slen, NULL);
 2951                           370                 :          88829 :     SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
                                371                 :                : 
                                372         [ +  + ]:          88829 :     if (len == 0)
 7258                           373                 :              4 :         return trg;
                                374                 :                : 
                                375                 :                :     /*
                                376                 :                :      * Make trigrams unique.
                                377                 :                :      */
 3744 tgl@sss.pgh.pa.us         378         [ +  - ]:          88825 :     if (len > 1)
                                379                 :                :     {
  432 peter@eisentraut.org      380                 :          88825 :         qsort(GETARR(trg), len, sizeof(trgm), comp_trgm);
 1620 tmunro@postgresql.or      381                 :          88825 :         len = qunique(GETARR(trg), len, sizeof(trgm), comp_trgm);
                                382                 :                :     }
                                383                 :                : 
 6255 tgl@sss.pgh.pa.us         384                 :          88825 :     SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
                                385                 :                : 
 7258 teodor@sigaev.ru          386                 :          88825 :     return trg;
                                387                 :                : }
                                388                 :                : 
                                389                 :                : /*
                                390                 :                :  * Make array of positional trigrams from two trigram arrays trg1 and trg2.
                                391                 :                :  *
                                392                 :                :  * trg1: trigram array of search pattern, of length len1. trg1 is required
                                393                 :                :  *       word which positions don't matter and replaced with -1.
                                394                 :                :  * trg2: trigram array of text, of length len2. trg2 is haystack where we
                                395                 :                :  *       search and have to store its positions.
                                396                 :                :  *
                                397                 :                :  * Returns concatenated trigram array.
                                398                 :                :  */
                                399                 :                : static pos_trgm *
 2951                           400                 :          12126 : make_positional_trgm(trgm *trg1, int len1, trgm *trg2, int len2)
                                401                 :                : {
                                402                 :                :     pos_trgm   *result;
                                403                 :                :     int         i,
 2866 rhaas@postgresql.org      404                 :          12126 :                 len = len1 + len2;
                                405                 :                : 
 2951 teodor@sigaev.ru          406                 :          12126 :     result = (pos_trgm *) palloc(sizeof(pos_trgm) * len);
                                407                 :                : 
                                408         [ +  + ]:         120864 :     for (i = 0; i < len1; i++)
                                409                 :                :     {
                                410                 :         108738 :         memcpy(&result[i].trg, &trg1[i], sizeof(trgm));
                                411                 :         108738 :         result[i].index = -1;
                                412                 :                :     }
                                413                 :                : 
                                414         [ +  + ]:         192216 :     for (i = 0; i < len2; i++)
                                415                 :                :     {
                                416                 :         180090 :         memcpy(&result[i + len1].trg, &trg2[i], sizeof(trgm));
                                417                 :         180090 :         result[i + len1].index = i;
                                418                 :                :     }
                                419                 :                : 
                                420                 :          12126 :     return result;
                                421                 :                : }
                                422                 :                : 
                                423                 :                : /*
                                424                 :                :  * Compare position trigrams: compare trigrams first and position second.
                                425                 :                :  */
                                426                 :                : static int
                                427                 :        1307720 : comp_ptrgm(const void *v1, const void *v2)
                                428                 :                : {
 2866 rhaas@postgresql.org      429                 :        1307720 :     const pos_trgm *p1 = (const pos_trgm *) v1;
                                430                 :        1307720 :     const pos_trgm *p2 = (const pos_trgm *) v2;
                                431                 :                :     int         cmp;
                                432                 :                : 
 2951 teodor@sigaev.ru          433   [ +  +  +  +  :        1307720 :     cmp = CMPTRGM(p1->trg, p2->trg);
                                     +  +  +  +  +  
                                           +  +  + ]
                                434         [ +  + ]:        1307720 :     if (cmp != 0)
                                435                 :        1268016 :         return cmp;
                                436                 :                : 
   58 nathan@postgresql.or      437                 :GNC       39704 :     return pg_cmp_s32(p1->index, p2->index);
                                438                 :                : }
                                439                 :                : 
                                440                 :                : /*
                                441                 :                :  * Iterative search function which calculates maximum similarity with word in
                                442                 :                :  * the string. Maximum similarity is only calculated only if the flag
                                443                 :                :  * WORD_SIMILARITY_CHECK_ONLY isn't set.
                                444                 :                :  *
                                445                 :                :  * trg2indexes: array which stores indexes of the array "found".
                                446                 :                :  * found: array which stores true of false values.
                                447                 :                :  * ulen1: count of unique trigrams of array "trg1".
                                448                 :                :  * len2: length of array "trg2" and array "trg2indexes".
                                449                 :                :  * len: length of the array "found".
                                450                 :                :  * flags: set of boolean flags parameterizing similarity calculation.
                                451                 :                :  * bounds: whether each trigram is left/right bound of word.
                                452                 :                :  *
                                453                 :                :  * Returns word similarity.
                                454                 :                :  */
                                455                 :                : static float4
 2951 teodor@sigaev.ru          456                 :CBC       12126 : iterate_word_similarity(int *trg2indexes,
                                457                 :                :                         bool *found,
                                458                 :                :                         int ulen1,
                                459                 :                :                         int len2,
                                460                 :                :                         int len,
                                461                 :                :                         uint8 flags,
                                462                 :                :                         TrgmBound *bounds)
                                463                 :                : {
                                464                 :                :     int        *lastpos,
                                465                 :                :                 i,
                                466                 :          12126 :                 ulen2 = 0,
                                467                 :          12126 :                 count = 0,
                                468                 :          12126 :                 upper = -1,
                                469                 :                :                 lower;
                                470                 :                :     float4      smlr_cur,
                                471                 :          12126 :                 smlr_max = 0.0f;
                                472                 :                :     double      threshold;
                                473                 :                : 
 2216                           474   [ +  +  -  + ]:          12126 :     Assert(bounds || !(flags & WORD_SIMILARITY_STRICT));
                                475                 :                : 
                                476                 :                :     /* Select appropriate threshold */
                                477                 :          24252 :     threshold = (flags & WORD_SIMILARITY_STRICT) ?
 2180 tgl@sss.pgh.pa.us         478         [ +  + ]:          12126 :         strict_word_similarity_threshold :
                                479                 :                :         word_similarity_threshold;
                                480                 :                : 
                                481                 :                :     /*
                                482                 :                :      * Consider first trigram as initial lower bound for strict word
                                483                 :                :      * similarity, or initialize it later with first trigram present for plain
                                484                 :                :      * word similarity.
                                485                 :                :      */
 2216 teodor@sigaev.ru          486         [ +  + ]:          12126 :     lower = (flags & WORD_SIMILARITY_STRICT) ? 0 : -1;
                                487                 :                : 
                                488                 :                :     /* Memorise last position of each trigram */
 2951                           489                 :          12126 :     lastpos = (int *) palloc(sizeof(int) * len);
                                490                 :          12126 :     memset(lastpos, -1, sizeof(int) * len);
                                491                 :                : 
                                492         [ +  + ]:         183646 :     for (i = 0; i < len2; i++)
                                493                 :                :     {
                                494                 :                :         int         trgindex;
                                495                 :                : 
  587 dgustafsson@postgres      496         [ -  + ]:         173304 :         CHECK_FOR_INTERRUPTS();
                                497                 :                : 
                                498                 :                :         /* Get index of next trigram */
                                499                 :         173304 :         trgindex = trg2indexes[i];
                                500                 :                : 
                                501                 :                :         /* Update last position of this trigram */
 2951 teodor@sigaev.ru          502   [ +  +  +  + ]:         173304 :         if (lower >= 0 || found[trgindex])
                                503                 :                :         {
                                504         [ +  + ]:         135794 :             if (lastpos[trgindex] < 0)
                                505                 :                :             {
                                506                 :         133942 :                 ulen2++;
                                507         [ +  + ]:         133942 :                 if (found[trgindex])
                                508                 :          30756 :                     count++;
                                509                 :                :             }
                                510                 :         135794 :             lastpos[trgindex] = i;
                                511                 :                :         }
                                512                 :                : 
                                513                 :                :         /*
                                514                 :                :          * Adjust upper bound if trigram is upper bound of word for strict
                                515                 :                :          * word similarity, or if trigram is present in required substring for
                                516                 :                :          * plain word similarity
                                517                 :                :          */
 2216                           518   [ +  +  +  + ]:         250347 :         if ((flags & WORD_SIMILARITY_STRICT) ? (bounds[i] & TRGM_BOUND_RIGHT)
 2180 tgl@sss.pgh.pa.us         519                 :          77043 :             : found[trgindex])
                                520                 :                :         {
                                521                 :                :             int         prev_lower,
                                522                 :                :                         tmp_ulen2,
                                523                 :                :                         tmp_lower,
                                524                 :                :                         tmp_count;
                                525                 :                : 
 2951 teodor@sigaev.ru          526                 :          25635 :             upper = i;
                                527         [ +  + ]:          25635 :             if (lower == -1)
                                528                 :                :             {
                                529                 :           4695 :                 lower = i;
                                530                 :           4695 :                 ulen2 = 1;
                                531                 :                :             }
                                532                 :                : 
                                533                 :          25635 :             smlr_cur = CALCSML(count, ulen1, ulen2);
                                534                 :                : 
                                535                 :                :             /* Also try to adjust lower bound for greater similarity */
                                536                 :          25635 :             tmp_count = count;
                                537                 :          25635 :             tmp_ulen2 = ulen2;
                                538                 :          25635 :             prev_lower = lower;
                                539         [ +  + ]:         208579 :             for (tmp_lower = lower; tmp_lower <= upper; tmp_lower++)
                                540                 :                :             {
                                541                 :                :                 float       smlr_tmp;
                                542                 :                :                 int         tmp_trgindex;
                                543                 :                : 
                                544                 :                :                 /*
                                545                 :                :                  * Adjust lower bound only if trigram is lower bound of word
                                546                 :                :                  * for strict word similarity, or consider every trigram as
                                547                 :                :                  * lower bound for plain word similarity.
                                548                 :                :                  */
 2216                           549         [ +  + ]:         184728 :                 if (!(flags & WORD_SIMILARITY_STRICT)
                                550         [ +  + ]:         145180 :                     || (bounds[tmp_lower] & TRGM_BOUND_LEFT))
                                551                 :                :                 {
                                552                 :          59680 :                     smlr_tmp = CALCSML(tmp_count, ulen1, tmp_ulen2);
                                553         [ +  + ]:          59680 :                     if (smlr_tmp > smlr_cur)
                                554                 :                :                     {
                                555                 :           3511 :                         smlr_cur = smlr_tmp;
                                556                 :           3511 :                         ulen2 = tmp_ulen2;
                                557                 :           3511 :                         lower = tmp_lower;
                                558                 :           3511 :                         count = tmp_count;
                                559                 :                :                     }
                                560                 :                : 
                                561                 :                :                     /*
                                562                 :                :                      * If we only check that word similarity is greater than
                                563                 :                :                      * threshold we do not need to calculate a maximum
                                564                 :                :                      * similarity.
                                565                 :                :                      */
                                566         [ +  + ]:          59680 :                     if ((flags & WORD_SIMILARITY_CHECK_ONLY)
                                567         [ +  + ]:          37114 :                         && smlr_cur >= threshold)
                                568                 :           1784 :                         break;
                                569                 :                :                 }
                                570                 :                : 
 2951                           571                 :         182944 :                 tmp_trgindex = trg2indexes[tmp_lower];
                                572         [ +  + ]:         182944 :                 if (lastpos[tmp_trgindex] == tmp_lower)
                                573                 :                :                 {
                                574                 :         180685 :                     tmp_ulen2--;
                                575         [ +  + ]:         180685 :                     if (found[tmp_trgindex])
                                576                 :          46579 :                         tmp_count--;
                                577                 :                :                 }
                                578                 :                :             }
                                579                 :                : 
                                580         [ +  + ]:          25635 :             smlr_max = Max(smlr_max, smlr_cur);
                                581                 :                : 
                                582                 :                :             /*
                                583                 :                :              * if we only check that word similarity is greater than threshold
                                584                 :                :              * we do not need to calculate a maximum similarity.
                                585                 :                :              */
 2216                           586   [ +  +  +  + ]:          25635 :             if ((flags & WORD_SIMILARITY_CHECK_ONLY) && smlr_max >= threshold)
 2951                           587                 :           1784 :                 break;
                                588                 :                : 
                                589         [ +  + ]:          40599 :             for (tmp_lower = prev_lower; tmp_lower < lower; tmp_lower++)
                                590                 :                :             {
                                591                 :                :                 int         tmp_trgindex;
                                592                 :                : 
                                593                 :          16748 :                 tmp_trgindex = trg2indexes[tmp_lower];
                                594         [ +  + ]:          16748 :                 if (lastpos[tmp_trgindex] == tmp_lower)
                                595                 :          16000 :                     lastpos[tmp_trgindex] = -1;
                                596                 :                :             }
                                597                 :                :         }
                                598                 :                :     }
                                599                 :                : 
                                600                 :          12126 :     pfree(lastpos);
                                601                 :                : 
                                602                 :          12126 :     return smlr_max;
                                603                 :                : }
                                604                 :                : 
                                605                 :                : /*
                                606                 :                :  * Calculate word similarity.
                                607                 :                :  * This function prepare two arrays: "trg2indexes" and "found". Then this arrays
                                608                 :                :  * are used to calculate word similarity using iterate_word_similarity().
                                609                 :                :  *
                                610                 :                :  * "trg2indexes" is array which stores indexes of the array "found".
                                611                 :                :  * In other words:
                                612                 :                :  * trg2indexes[j] = i;
                                613                 :                :  * found[i] = true (or false);
                                614                 :                :  * If found[i] == true then there is trigram trg2[j] in array "trg1".
                                615                 :                :  * If found[i] == false then there is not trigram trg2[j] in array "trg1".
                                616                 :                :  *
                                617                 :                :  * str1: search pattern string, of length slen1 bytes.
                                618                 :                :  * str2: text in which we are looking for a word, of length slen2 bytes.
                                619                 :                :  * flags: set of boolean flags parameterizing similarity calculation.
                                620                 :                :  *
                                621                 :                :  * Returns word similarity.
                                622                 :                :  */
                                623                 :                : static float4
                                624                 :          12126 : calc_word_similarity(char *str1, int slen1, char *str2, int slen2,
                                625                 :                :                      uint8 flags)
                                626                 :                : {
                                627                 :                :     bool       *found;
                                628                 :                :     pos_trgm   *ptrg;
                                629                 :                :     trgm       *trg1;
                                630                 :                :     trgm       *trg2;
                                631                 :                :     int         len1,
                                632                 :                :                 len2,
                                633                 :                :                 len,
                                634                 :                :                 i,
                                635                 :                :                 j,
                                636                 :                :                 ulen1;
                                637                 :                :     int        *trg2indexes;
                                638                 :                :     float4      result;
                                639                 :                :     TrgmBound  *bounds;
                                640                 :                : 
                                641                 :          12126 :     protect_out_of_mem(slen1 + slen2);
                                642                 :                : 
                                643                 :                :     /* Make positional trigrams */
 2489 tgl@sss.pgh.pa.us         644                 :          12126 :     trg1 = (trgm *) palloc(sizeof(trgm) * (slen1 / 2 + 1) * 3);
                                645                 :          12126 :     trg2 = (trgm *) palloc(sizeof(trgm) * (slen2 / 2 + 1) * 3);
 2216 teodor@sigaev.ru          646         [ +  + ]:          12126 :     if (flags & WORD_SIMILARITY_STRICT)
                                647                 :           6662 :         bounds = (TrgmBound *) palloc0(sizeof(TrgmBound) * (slen2 / 2 + 1) * 3);
                                648                 :                :     else
                                649                 :           5464 :         bounds = NULL;
                                650                 :                : 
                                651                 :          12126 :     len1 = generate_trgm_only(trg1, str1, slen1, NULL);
                                652                 :          12126 :     len2 = generate_trgm_only(trg2, str2, slen2, bounds);
                                653                 :                : 
 2951                           654                 :          12126 :     ptrg = make_positional_trgm(trg1, len1, trg2, len2);
                                655                 :          12126 :     len = len1 + len2;
                                656                 :          12126 :     qsort(ptrg, len, sizeof(pos_trgm), comp_ptrgm);
                                657                 :                : 
                                658                 :          12126 :     pfree(trg1);
                                659                 :          12126 :     pfree(trg2);
                                660                 :                : 
                                661                 :                :     /*
                                662                 :                :      * Merge positional trigrams array: enumerate each trigram and find its
                                663                 :                :      * presence in required word.
                                664                 :                :      */
                                665                 :          12126 :     trg2indexes = (int *) palloc(sizeof(int) * len2);
                                666                 :          12126 :     found = (bool *) palloc0(sizeof(bool) * len);
                                667                 :                : 
                                668                 :          12126 :     ulen1 = 0;
                                669                 :          12126 :     j = 0;
                                670         [ +  + ]:         300954 :     for (i = 0; i < len; i++)
                                671                 :                :     {
                                672         [ +  + ]:         288828 :         if (i > 0)
                                673                 :                :         {
 2866 rhaas@postgresql.org      674   [ +  +  +  -  :         276702 :             int         cmp = CMPTRGM(ptrg[i - 1].trg, ptrg[i].trg);
                                     +  +  +  -  +  
                                           +  +  - ]
                                675                 :                : 
 2951 teodor@sigaev.ru          676         [ +  + ]:         276702 :             if (cmp != 0)
                                677                 :                :             {
                                678         [ +  + ]:         242502 :                 if (found[j])
                                679                 :         101139 :                     ulen1++;
                                680                 :         242502 :                 j++;
                                681                 :                :             }
                                682                 :                :         }
                                683                 :                : 
                                684         [ +  + ]:         288828 :         if (ptrg[i].index >= 0)
                                685                 :                :         {
                                686                 :         180090 :             trg2indexes[ptrg[i].index] = j;
                                687                 :                :         }
                                688                 :                :         else
                                689                 :                :         {
                                690                 :         108738 :             found[j] = true;
                                691                 :                :         }
                                692                 :                :     }
                                693         [ +  + ]:          12126 :     if (found[j])
                                694                 :           7599 :         ulen1++;
                                695                 :                : 
                                696                 :                :     /* Run iterative procedure to find maximum similarity with word */
                                697                 :          12126 :     result = iterate_word_similarity(trg2indexes, found, ulen1, len2, len,
                                698                 :                :                                      flags, bounds);
                                699                 :                : 
                                700                 :          12126 :     pfree(trg2indexes);
                                701                 :          12126 :     pfree(found);
                                702                 :          12126 :     pfree(ptrg);
                                703                 :                : 
                                704                 :          12126 :     return result;
                                705                 :                : }
                                706                 :                : 
                                707                 :                : 
                                708                 :                : /*
                                709                 :                :  * Extract the next non-wildcard part of a search string, i.e. a word bounded
                                710                 :                :  * by '_' or '%' meta-characters, non-word characters or string end.
                                711                 :                :  *
                                712                 :                :  * str: source string, of length lenstr bytes (need not be null-terminated)
                                713                 :                :  * buf: where to return the substring (must be long enough)
                                714                 :                :  * *bytelen: receives byte length of the found substring
                                715                 :                :  * *charlen: receives character length of the found substring
                                716                 :                :  *
                                717                 :                :  * Returns pointer to end+1 of the found substring in the source string.
                                718                 :                :  * Returns NULL if no word found (in which case buf, bytelen, charlen not set)
                                719                 :                :  *
                                720                 :                :  * If the found word is bounded by non-word characters or string boundaries
                                721                 :                :  * then this function will include corresponding padding spaces into buf.
                                722                 :                :  */
                                723                 :                : static const char *
 4822 tgl@sss.pgh.pa.us         724                 :            119 : get_wildcard_part(const char *str, int lenstr,
                                725                 :                :                   char *buf, int *bytelen, int *charlen)
                                726                 :                : {
                                727                 :            119 :     const char *beginword = str;
                                728                 :                :     const char *endword;
                                729                 :            119 :     char       *s = buf;
 4255                           730                 :            119 :     bool        in_leading_wildcard_meta = false;
                                731                 :            119 :     bool        in_trailing_wildcard_meta = false;
 4753 bruce@momjian.us          732                 :            119 :     bool        in_escape = false;
                                733                 :                :     int         clen;
                                734                 :                : 
                                735                 :                :     /*
                                736                 :                :      * Find the first word character, remembering whether preceding character
                                737                 :                :      * was wildcard meta-character.  Note that the in_escape state persists
                                738                 :                :      * from this loop to the next one, since we may exit at a word character
                                739                 :                :      * that is in_escape.
                                740                 :                :      */
 4822 tgl@sss.pgh.pa.us         741         [ +  + ]:            241 :     while (beginword - str < lenstr)
                                742                 :                :     {
                                743         [ +  + ]:            186 :         if (in_escape)
                                744                 :                :         {
 4023                           745         [ +  - ]:              3 :             if (ISWORDCHR(beginword))
 4822                           746                 :              3 :                 break;
 4255 tgl@sss.pgh.pa.us         747                 :UBC           0 :             in_escape = false;
                                748                 :              0 :             in_leading_wildcard_meta = false;
                                749                 :                :         }
                                750                 :                :         else
                                751                 :                :         {
 4822 tgl@sss.pgh.pa.us         752         [ +  + ]:CBC         183 :             if (ISESCAPECHAR(beginword))
                                753                 :              3 :                 in_escape = true;
                                754   [ +  -  +  + ]:            180 :             else if (ISWILDCARDCHAR(beginword))
 4255                           755                 :            104 :                 in_leading_wildcard_meta = true;
 4023                           756         [ +  + ]:             76 :             else if (ISWORDCHR(beginword))
 4822                           757                 :             61 :                 break;
                                758                 :                :             else
 4255                           759                 :             15 :                 in_leading_wildcard_meta = false;
                                760                 :                :         }
 4822                           761                 :            122 :         beginword += pg_mblen(beginword);
                                762                 :                :     }
                                763                 :                : 
                                764                 :                :     /*
                                765                 :                :      * Handle string end.
                                766                 :                :      */
                                767         [ +  + ]:            119 :     if (beginword - str >= lenstr)
                                768                 :             55 :         return NULL;
                                769                 :                : 
                                770                 :                :     /*
                                771                 :                :      * Add left padding spaces if preceding character wasn't wildcard
                                772                 :                :      * meta-character.
                                773                 :                :      */
                                774                 :             64 :     *charlen = 0;
 4255                           775         [ +  + ]:             64 :     if (!in_leading_wildcard_meta)
                                776                 :                :     {
                                777                 :                :         if (LPADDING > 0)
                                778                 :                :         {
 4822                           779                 :             15 :             *s++ = ' ';
                                780                 :             15 :             (*charlen)++;
                                781                 :                :             if (LPADDING > 1)
                                782                 :                :             {
                                783                 :             15 :                 *s++ = ' ';
                                784                 :             15 :                 (*charlen)++;
                                785                 :                :             }
                                786                 :                :         }
                                787                 :                :     }
                                788                 :                : 
                                789                 :                :     /*
                                790                 :                :      * Copy data into buf until wildcard meta-character, non-word character or
                                791                 :                :      * string boundary.  Strip escapes during copy.
                                792                 :                :      */
                                793                 :             64 :     endword = beginword;
                                794         [ +  - ]:            244 :     while (endword - str < lenstr)
                                795                 :                :     {
                                796                 :            244 :         clen = pg_mblen(endword);
                                797         [ +  + ]:            244 :         if (in_escape)
                                798                 :                :         {
 4023                           799         [ +  - ]:              3 :             if (ISWORDCHR(endword))
                                800                 :                :             {
 4822                           801                 :              3 :                 memcpy(s, endword, clen);
                                802                 :              3 :                 (*charlen)++;
                                803                 :              3 :                 s += clen;
                                804                 :                :             }
                                805                 :                :             else
                                806                 :                :             {
                                807                 :                :                 /*
                                808                 :                :                  * Back up endword to the escape character when stopping at an
                                809                 :                :                  * escaped char, so that subsequent get_wildcard_part will
                                810                 :                :                  * restart from the escape character.  We assume here that
                                811                 :                :                  * escape chars are single-byte.
                                812                 :                :                  */
 4255 tgl@sss.pgh.pa.us         813                 :UBC           0 :                 endword--;
 4822                           814                 :              0 :                 break;
                                815                 :                :             }
 4255 tgl@sss.pgh.pa.us         816                 :CBC           3 :             in_escape = false;
                                817                 :                :         }
                                818                 :                :         else
                                819                 :                :         {
 4822                           820         [ -  + ]:            241 :             if (ISESCAPECHAR(endword))
 4822 tgl@sss.pgh.pa.us         821                 :UBC           0 :                 in_escape = true;
 4822 tgl@sss.pgh.pa.us         822   [ +  -  +  + ]:CBC         241 :             else if (ISWILDCARDCHAR(endword))
                                823                 :                :             {
 4255                           824                 :             55 :                 in_trailing_wildcard_meta = true;
 4822                           825                 :             55 :                 break;
                                826                 :                :             }
 4023                           827         [ +  + ]:            186 :             else if (ISWORDCHR(endword))
                                828                 :                :             {
 4822                           829                 :            177 :                 memcpy(s, endword, clen);
                                830                 :            177 :                 (*charlen)++;
                                831                 :            177 :                 s += clen;
                                832                 :                :             }
                                833                 :                :             else
                                834                 :              9 :                 break;
                                835                 :                :         }
                                836                 :            180 :         endword += clen;
                                837                 :                :     }
                                838                 :                : 
                                839                 :                :     /*
                                840                 :                :      * Add right padding spaces if next character isn't wildcard
                                841                 :                :      * meta-character.
                                842                 :                :      */
 4255                           843         [ +  + ]:             64 :     if (!in_trailing_wildcard_meta)
                                844                 :                :     {
                                845                 :                :         if (RPADDING > 0)
                                846                 :                :         {
 4822                           847                 :              9 :             *s++ = ' ';
                                848                 :              9 :             (*charlen)++;
                                849                 :                :             if (RPADDING > 1)
                                850                 :                :             {
                                851                 :                :                 *s++ = ' ';
                                852                 :                :                 (*charlen)++;
                                853                 :                :             }
                                854                 :                :         }
                                855                 :                :     }
                                856                 :                : 
                                857                 :             64 :     *bytelen = s - buf;
                                858                 :             64 :     return endword;
                                859                 :                : }
                                860                 :                : 
                                861                 :                : /*
                                862                 :                :  * Generates trigrams for wildcard search string.
                                863                 :                :  *
                                864                 :                :  * Returns array of trigrams that must occur in any string that matches the
                                865                 :                :  * wildcard string.  For example, given pattern "a%bcd%" the trigrams
                                866                 :                :  * " a", "bcd" would be extracted.
                                867                 :                :  */
                                868                 :                : TRGM *
                                869                 :             55 : generate_wildcard_trgm(const char *str, int slen)
                                870                 :                : {
                                871                 :                :     TRGM       *trg;
                                872                 :                :     char       *buf,
                                873                 :                :                *buf2;
                                874                 :                :     trgm       *tptr;
                                875                 :                :     int         len,
                                876                 :                :                 charlen,
                                877                 :                :                 bytelen;
                                878                 :                :     const char *eword;
                                879                 :                : 
 2951 teodor@sigaev.ru          880                 :             55 :     protect_out_of_mem(slen);
                                881                 :                : 
 2489 tgl@sss.pgh.pa.us         882                 :             55 :     trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) * 3);
 4822                           883                 :             55 :     trg->flag = ARRKEY;
                                884                 :             55 :     SET_VARSIZE(trg, TRGMHDRSIZE);
                                885                 :                : 
                                886   [ +  -  -  + ]:             55 :     if (slen + LPADDING + RPADDING < 3 || slen == 0)
 4822 tgl@sss.pgh.pa.us         887                 :UBC           0 :         return trg;
                                888                 :                : 
 4822 tgl@sss.pgh.pa.us         889                 :CBC          55 :     tptr = GETARR(trg);
                                890                 :                : 
                                891                 :                :     /* Allocate a buffer for blank-padded, but not yet case-folded, words */
                                892                 :             55 :     buf = palloc(sizeof(char) * (slen + 4));
                                893                 :                : 
                                894                 :                :     /*
                                895                 :                :      * Extract trigrams from each substring extracted by get_wildcard_part.
                                896                 :                :      */
                                897                 :             55 :     eword = str;
                                898                 :            119 :     while ((eword = get_wildcard_part(eword, slen - (eword - str),
                                899         [ +  + ]:            119 :                                       buf, &bytelen, &charlen)) != NULL)
                                900                 :                :     {
                                901                 :                : #ifdef IGNORECASE
                                902                 :             64 :         buf2 = lowerstr_with_len(buf, bytelen);
                                903                 :             64 :         bytelen = strlen(buf2);
                                904                 :                : #else
                                905                 :                :         buf2 = buf;
                                906                 :                : #endif
                                907                 :                : 
                                908                 :                :         /*
                                909                 :                :          * count trigrams
                                910                 :                :          */
                                911                 :             64 :         tptr = make_trigrams(tptr, buf2, bytelen, charlen);
                                912                 :                : 
                                913                 :                : #ifdef IGNORECASE
                                914                 :             64 :         pfree(buf2);
                                915                 :                : #endif
                                916                 :                :     }
                                917                 :                : 
                                918                 :             55 :     pfree(buf);
                                919                 :                : 
                                920         [ +  + ]:             55 :     if ((len = tptr - GETARR(trg)) == 0)
                                921                 :             24 :         return trg;
                                922                 :                : 
                                923                 :                :     /*
                                924                 :                :      * Make trigrams unique.
                                925                 :                :      */
 3744                           926         [ +  + ]:             31 :     if (len > 1)
                                927                 :                :     {
  432 peter@eisentraut.org      928                 :             17 :         qsort(GETARR(trg), len, sizeof(trgm), comp_trgm);
 1620 tmunro@postgresql.or      929                 :             17 :         len = qunique(GETARR(trg), len, sizeof(trgm), comp_trgm);
                                930                 :                :     }
                                931                 :                : 
 4822 tgl@sss.pgh.pa.us         932                 :             31 :     SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
                                933                 :                : 
                                934                 :             31 :     return trg;
                                935                 :                : }
                                936                 :                : 
                                937                 :                : uint32
 5632 teodor@sigaev.ru          938                 :          34773 : trgm2int(trgm *ptr)
                                939                 :                : {
 5421 bruce@momjian.us          940                 :          34773 :     uint32      val = 0;
                                941                 :                : 
                                942                 :          34773 :     val |= *(((unsigned char *) ptr));
 5632 teodor@sigaev.ru          943                 :          34773 :     val <<= 8;
 5421 bruce@momjian.us          944                 :          34773 :     val |= *(((unsigned char *) ptr) + 1);
 5632 teodor@sigaev.ru          945                 :          34773 :     val <<= 8;
 5421 bruce@momjian.us          946                 :          34773 :     val |= *(((unsigned char *) ptr) + 2);
                                947                 :                : 
 5632 teodor@sigaev.ru          948                 :          34773 :     return val;
                                949                 :                : }
                                950                 :                : 
                                951                 :                : Datum
 7168 bruce@momjian.us          952                 :              7 : show_trgm(PG_FUNCTION_ARGS)
                                953                 :                : {
 2590 noah@leadboat.com         954                 :              7 :     text       *in = PG_GETARG_TEXT_PP(0);
                                955                 :                :     TRGM       *trg;
                                956                 :                :     Datum      *d;
                                957                 :                :     ArrayType  *a;
                                958                 :                :     trgm       *ptr;
                                959                 :                :     int         i;
                                960                 :                : 
                                961   [ -  +  -  -  :              7 :     trg = generate_trgm(VARDATA_ANY(in), VARSIZE_ANY_EXHDR(in));
                                     -  -  -  -  -  
                                           +  -  + ]
 7168 bruce@momjian.us          962                 :              7 :     d = (Datum *) palloc(sizeof(Datum) * (1 + ARRNELEM(trg)));
                                963                 :                : 
 6121 tgl@sss.pgh.pa.us         964         [ +  + ]:             44 :     for (i = 0, ptr = GETARR(trg); i < ARRNELEM(trg); i++, ptr++)
                                965                 :                :     {
 5421 bruce@momjian.us          966         [ +  - ]:             37 :         text       *item = (text *) palloc(VARHDRSZ + Max(12, pg_database_encoding_max_length() * 3));
                                967                 :                : 
                                968   [ +  -  +  -  :             37 :         if (pg_database_encoding_max_length() > 1 && !ISPRINTABLETRGM(ptr))
                                     +  +  +  -  +  
                                     -  +  +  +  -  
                                     +  -  +  +  -  
                                                 + ]
                                969                 :                :         {
 5632 teodor@sigaev.ru          970                 :UBC           0 :             snprintf(VARDATA(item), 12, "0x%06x", trgm2int(ptr));
                                971                 :              0 :             SET_VARSIZE(item, VARHDRSZ + strlen(VARDATA(item)));
                                972                 :                :         }
                                973                 :                :         else
                                974                 :                :         {
 5632 teodor@sigaev.ru          975                 :CBC          37 :             SET_VARSIZE(item, VARHDRSZ + 3);
                                976                 :             37 :             CPTRGM(VARDATA(item), ptr);
                                977                 :                :         }
 6121 tgl@sss.pgh.pa.us         978                 :             37 :         d[i] = PointerGetDatum(item);
                                979                 :                :     }
                                980                 :                : 
  653 peter@eisentraut.org      981                 :              7 :     a = construct_array_builtin(d, ARRNELEM(trg), TEXTOID);
                                982                 :                : 
 6121 tgl@sss.pgh.pa.us         983         [ +  + ]:             44 :     for (i = 0; i < ARRNELEM(trg); i++)
                                984                 :             37 :         pfree(DatumGetPointer(d[i]));
                                985                 :                : 
 7258 teodor@sigaev.ru          986                 :              7 :     pfree(d);
                                987                 :              7 :     pfree(trg);
 7168 bruce@momjian.us          988         [ -  + ]:              7 :     PG_FREE_IF_COPY(in, 0);
                                989                 :                : 
 7258 teodor@sigaev.ru          990                 :              7 :     PG_RETURN_POINTER(a);
                                991                 :                : }
                                992                 :                : 
                                993                 :                : float4
 2951                           994                 :          68994 : cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact)
                                995                 :                : {
                                996                 :                :     trgm       *ptr1,
                                997                 :                :                *ptr2;
 7168 bruce@momjian.us          998                 :          68994 :     int         count = 0;
                                999                 :                :     int         len1,
                               1000                 :                :                 len2;
                               1001                 :                : 
 7258 teodor@sigaev.ru         1002                 :          68994 :     ptr1 = GETARR(trg1);
                               1003                 :          68994 :     ptr2 = GETARR(trg2);
                               1004                 :                : 
                               1005                 :          68994 :     len1 = ARRNELEM(trg1);
                               1006                 :          68994 :     len2 = ARRNELEM(trg2);
                               1007                 :                : 
                               1008                 :                :     /* explicit test is needed to avoid 0/0 division when both lengths are 0 */
 4078 tgl@sss.pgh.pa.us        1009   [ +  +  -  + ]:          68994 :     if (len1 <= 0 || len2 <= 0)
                               1010                 :              1 :         return (float4) 0.0;
                               1011                 :                : 
 7168 bruce@momjian.us         1012   [ +  +  +  + ]:         878157 :     while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2)
                               1013                 :                :     {
                               1014   [ +  +  +  +  :         809164 :         int         res = CMPTRGM(ptr1, ptr2);
                                     +  +  +  +  +  
                                           +  +  + ]
                               1015                 :                : 
                               1016         [ +  + ]:         809164 :         if (res < 0)
 7258 teodor@sigaev.ru         1017                 :         183669 :             ptr1++;
 7168 bruce@momjian.us         1018         [ +  + ]:         625495 :         else if (res > 0)
 7258 teodor@sigaev.ru         1019                 :         214677 :             ptr2++;
                               1020                 :                :         else
                               1021                 :                :         {
                               1022                 :         410818 :             ptr1++;
                               1023                 :         410818 :             ptr2++;
                               1024                 :         410818 :             count++;
                               1025                 :                :         }
                               1026                 :                :     }
                               1027                 :                : 
                               1028                 :                :     /*
                               1029                 :                :      * If inexact then len2 is equal to count, because we don't know actual
                               1030                 :                :      * length of second string in inexact search and we can assume that count
                               1031                 :                :      * is a lower bound of len2.
                               1032                 :                :      */
 2951                          1033         [ +  + ]:          68993 :     return CALCSML(count, len1, inexact ? count : len2);
                               1034                 :                : }
                               1035                 :                : 
                               1036                 :                : 
                               1037                 :                : /*
                               1038                 :                :  * Returns whether trg2 contains all trigrams in trg1.
                               1039                 :                :  * This relies on the trigram arrays being sorted.
                               1040                 :                :  */
                               1041                 :                : bool
 4822 tgl@sss.pgh.pa.us        1042                 :            190 : trgm_contained_by(TRGM *trg1, TRGM *trg2)
                               1043                 :                : {
                               1044                 :                :     trgm       *ptr1,
                               1045                 :                :                *ptr2;
                               1046                 :                :     int         len1,
                               1047                 :                :                 len2;
                               1048                 :                : 
                               1049                 :            190 :     ptr1 = GETARR(trg1);
                               1050                 :            190 :     ptr2 = GETARR(trg2);
                               1051                 :                : 
                               1052                 :            190 :     len1 = ARRNELEM(trg1);
                               1053                 :            190 :     len2 = ARRNELEM(trg2);
                               1054                 :                : 
                               1055   [ +  +  +  + ]:            622 :     while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2)
                               1056                 :                :     {
                               1057   [ +  +  +  +  :            599 :         int         res = CMPTRGM(ptr1, ptr2);
                                     +  +  +  +  +  
                                           +  +  + ]
                               1058                 :                : 
                               1059         [ +  + ]:            599 :         if (res < 0)
                               1060                 :            167 :             return false;
                               1061         [ +  + ]:            432 :         else if (res > 0)
                               1062                 :            320 :             ptr2++;
                               1063                 :                :         else
                               1064                 :                :         {
                               1065                 :            112 :             ptr1++;
                               1066                 :            112 :             ptr2++;
                               1067                 :                :         }
                               1068                 :                :     }
                               1069         [ +  + ]:             23 :     if (ptr1 - GETARR(trg1) < len1)
                               1070                 :              4 :         return false;
                               1071                 :                :     else
                               1072                 :             19 :         return true;
                               1073                 :                : }
                               1074                 :                : 
                               1075                 :                : /*
                               1076                 :                :  * Return a palloc'd boolean array showing, for each trigram in "query",
                               1077                 :                :  * whether it is present in the trigram array "key".
                               1078                 :                :  * This relies on the "key" array being sorted, but "query" need not be.
                               1079                 :                :  */
                               1080                 :                : bool *
 4022                          1081                 :           2150 : trgm_presence_map(TRGM *query, TRGM *key)
                               1082                 :                : {
                               1083                 :                :     bool       *result;
                               1084                 :           2150 :     trgm       *ptrq = GETARR(query),
                               1085                 :           2150 :                *ptrk = GETARR(key);
                               1086                 :           2150 :     int         lenq = ARRNELEM(query),
                               1087                 :           2150 :                 lenk = ARRNELEM(key),
                               1088                 :                :                 i;
                               1089                 :                : 
                               1090                 :           2150 :     result = (bool *) palloc0(lenq * sizeof(bool));
                               1091                 :                : 
                               1092                 :                :     /* for each query trigram, do a binary search in the key array */
                               1093         [ +  + ]:         507560 :     for (i = 0; i < lenq; i++)
                               1094                 :                :     {
                               1095                 :         505410 :         int         lo = 0;
                               1096                 :         505410 :         int         hi = lenk;
                               1097                 :                : 
                               1098         [ +  + ]:        2373653 :         while (lo < hi)
                               1099                 :                :         {
                               1100                 :        1876282 :             int         mid = (lo + hi) / 2;
                               1101   [ +  +  +  +  :        1876282 :             int         res = CMPTRGM(ptrq, ptrk + mid);
                                     +  +  +  +  +  
                                           +  +  + ]
                               1102                 :                : 
                               1103         [ +  + ]:        1876282 :             if (res < 0)
                               1104                 :         784082 :                 hi = mid;
                               1105         [ +  + ]:        1092200 :             else if (res > 0)
                               1106                 :        1084161 :                 lo = mid + 1;
                               1107                 :                :             else
                               1108                 :                :             {
                               1109                 :           8039 :                 result[i] = true;
                               1110                 :           8039 :                 break;
                               1111                 :                :             }
                               1112                 :                :         }
                               1113                 :         505410 :         ptrq++;
                               1114                 :                :     }
                               1115                 :                : 
                               1116                 :           2150 :     return result;
                               1117                 :                : }
                               1118                 :                : 
                               1119                 :                : Datum
 7168 bruce@momjian.us         1120                 :          31452 : similarity(PG_FUNCTION_ARGS)
                               1121                 :                : {
 2590 noah@leadboat.com        1122                 :          31452 :     text       *in1 = PG_GETARG_TEXT_PP(0);
                               1123                 :          31452 :     text       *in2 = PG_GETARG_TEXT_PP(1);
                               1124                 :                :     TRGM       *trg1,
                               1125                 :                :                *trg2;
                               1126                 :                :     float4      res;
                               1127                 :                : 
                               1128   [ -  +  -  -  :          31452 :     trg1 = generate_trgm(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1));
                                     -  -  -  -  +  
                                           +  +  + ]
                               1129   [ -  +  -  -  :          31452 :     trg2 = generate_trgm(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2));
                                     -  -  -  -  -  
                                           +  -  + ]
                               1130                 :                : 
 2951 teodor@sigaev.ru         1131                 :          31452 :     res = cnt_sml(trg1, trg2, false);
                               1132                 :                : 
 7258                          1133                 :          31452 :     pfree(trg1);
                               1134                 :          31452 :     pfree(trg2);
 7168 bruce@momjian.us         1135         [ -  + ]:          31452 :     PG_FREE_IF_COPY(in1, 0);
                               1136         [ -  + ]:          31452 :     PG_FREE_IF_COPY(in2, 1);
                               1137                 :                : 
 7258 teodor@sigaev.ru         1138                 :          31452 :     PG_RETURN_FLOAT4(res);
                               1139                 :                : }
                               1140                 :                : 
                               1141                 :                : Datum
 2951                          1142                 :            902 : word_similarity(PG_FUNCTION_ARGS)
                               1143                 :                : {
                               1144                 :            902 :     text       *in1 = PG_GETARG_TEXT_PP(0);
                               1145                 :            902 :     text       *in2 = PG_GETARG_TEXT_PP(1);
                               1146                 :                :     float4      res;
                               1147                 :                : 
                               1148   [ -  +  -  -  :           1804 :     res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
                                     -  -  -  -  -  
                                           +  -  + ]
 2866 rhaas@postgresql.org     1149   [ -  +  -  -  :           1804 :                                VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
                                     -  -  -  -  +  
                                           -  +  - ]
                               1150                 :                :                                0);
                               1151                 :                : 
 2216 teodor@sigaev.ru         1152         [ -  + ]:            902 :     PG_FREE_IF_COPY(in1, 0);
                               1153         [ -  + ]:            902 :     PG_FREE_IF_COPY(in2, 1);
                               1154                 :            902 :     PG_RETURN_FLOAT4(res);
                               1155                 :                : }
                               1156                 :                : 
                               1157                 :                : Datum
                               1158                 :            882 : strict_word_similarity(PG_FUNCTION_ARGS)
                               1159                 :                : {
                               1160                 :            882 :     text       *in1 = PG_GETARG_TEXT_PP(0);
                               1161                 :            882 :     text       *in2 = PG_GETARG_TEXT_PP(1);
                               1162                 :                :     float4      res;
                               1163                 :                : 
                               1164   [ -  +  -  -  :           1764 :     res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
                                     -  -  -  -  -  
                                           +  -  + ]
                               1165   [ -  +  -  -  :           1764 :                                VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
                                     -  -  -  -  +  
                                           -  +  - ]
                               1166                 :                :                                WORD_SIMILARITY_STRICT);
                               1167                 :                : 
 2951                          1168         [ -  + ]:            882 :     PG_FREE_IF_COPY(in1, 0);
                               1169         [ -  + ]:            882 :     PG_FREE_IF_COPY(in2, 1);
                               1170                 :            882 :     PG_RETURN_FLOAT4(res);
                               1171                 :                : }
                               1172                 :                : 
                               1173                 :                : Datum
 4880 tgl@sss.pgh.pa.us        1174                 :           1004 : similarity_dist(PG_FUNCTION_ARGS)
                               1175                 :                : {
                               1176                 :           1004 :     float4      res = DatumGetFloat4(DirectFunctionCall2(similarity,
                               1177                 :                :                                                          PG_GETARG_DATUM(0),
                               1178                 :                :                                                          PG_GETARG_DATUM(1)));
                               1179                 :                : 
                               1180                 :           1004 :     PG_RETURN_FLOAT4(1.0 - res);
                               1181                 :                : }
                               1182                 :                : 
                               1183                 :                : Datum
 7168 bruce@momjian.us         1184                 :           6000 : similarity_op(PG_FUNCTION_ARGS)
                               1185                 :                : {
 4880 tgl@sss.pgh.pa.us        1186                 :           6000 :     float4      res = DatumGetFloat4(DirectFunctionCall2(similarity,
                               1187                 :                :                                                          PG_GETARG_DATUM(0),
                               1188                 :                :                                                          PG_GETARG_DATUM(1)));
                               1189                 :                : 
 2951 teodor@sigaev.ru         1190                 :           6000 :     PG_RETURN_BOOL(res >= similarity_threshold);
                               1191                 :                : }
                               1192                 :                : 
                               1193                 :                : Datum
                               1194                 :           1924 : word_similarity_op(PG_FUNCTION_ARGS)
                               1195                 :                : {
                               1196                 :           1924 :     text       *in1 = PG_GETARG_TEXT_PP(0);
                               1197                 :           1924 :     text       *in2 = PG_GETARG_TEXT_PP(1);
                               1198                 :                :     float4      res;
                               1199                 :                : 
                               1200   [ -  +  -  -  :           3848 :     res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
                                     -  -  -  -  -  
                                           +  -  + ]
 2866 rhaas@postgresql.org     1201   [ -  +  -  -  :           3848 :                                VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
                                     -  -  -  -  +  
                                           -  +  - ]
                               1202                 :                :                                WORD_SIMILARITY_CHECK_ONLY);
                               1203                 :                : 
 2951 teodor@sigaev.ru         1204         [ -  + ]:           1924 :     PG_FREE_IF_COPY(in1, 0);
                               1205         [ -  + ]:           1924 :     PG_FREE_IF_COPY(in2, 1);
                               1206                 :           1924 :     PG_RETURN_BOOL(res >= word_similarity_threshold);
                               1207                 :                : }
                               1208                 :                : 
                               1209                 :                : Datum
                               1210                 :           1924 : word_similarity_commutator_op(PG_FUNCTION_ARGS)
                               1211                 :                : {
                               1212                 :           1924 :     text       *in1 = PG_GETARG_TEXT_PP(0);
                               1213                 :           1924 :     text       *in2 = PG_GETARG_TEXT_PP(1);
                               1214                 :                :     float4      res;
                               1215                 :                : 
                               1216   [ -  +  -  -  :           3848 :     res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
                                     -  -  -  -  -  
                                           +  -  + ]
 2866 rhaas@postgresql.org     1217   [ -  +  -  -  :           3848 :                                VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
                                     -  -  -  -  +  
                                           -  +  - ]
                               1218                 :                :                                WORD_SIMILARITY_CHECK_ONLY);
                               1219                 :                : 
 2951 teodor@sigaev.ru         1220         [ -  + ]:           1924 :     PG_FREE_IF_COPY(in1, 0);
                               1221         [ -  + ]:           1924 :     PG_FREE_IF_COPY(in2, 1);
                               1222                 :           1924 :     PG_RETURN_BOOL(res >= word_similarity_threshold);
                               1223                 :                : }
                               1224                 :                : 
                               1225                 :                : Datum
 2951 teodor@sigaev.ru         1226                 :UBC           0 : word_similarity_dist_op(PG_FUNCTION_ARGS)
                               1227                 :                : {
                               1228                 :              0 :     text       *in1 = PG_GETARG_TEXT_PP(0);
                               1229                 :              0 :     text       *in2 = PG_GETARG_TEXT_PP(1);
                               1230                 :                :     float4      res;
                               1231                 :                : 
                               1232   [ #  #  #  #  :              0 :     res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
                                     #  #  #  #  #  
                                           #  #  # ]
 2866 rhaas@postgresql.org     1233   [ #  #  #  #  :              0 :                                VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
                                     #  #  #  #  #  
                                           #  #  # ]
                               1234                 :                :                                0);
                               1235                 :                : 
 2951 teodor@sigaev.ru         1236         [ #  # ]:              0 :     PG_FREE_IF_COPY(in1, 0);
                               1237         [ #  # ]:              0 :     PG_FREE_IF_COPY(in2, 1);
                               1238                 :              0 :     PG_RETURN_FLOAT4(1.0 - res);
                               1239                 :                : }
                               1240                 :                : 
                               1241                 :                : Datum
 2951 teodor@sigaev.ru         1242                 :CBC         714 : word_similarity_dist_commutator_op(PG_FUNCTION_ARGS)
                               1243                 :                : {
                               1244                 :            714 :     text       *in1 = PG_GETARG_TEXT_PP(0);
                               1245                 :            714 :     text       *in2 = PG_GETARG_TEXT_PP(1);
                               1246                 :                :     float4      res;
                               1247                 :                : 
                               1248   [ -  +  -  -  :           1428 :     res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
                                     -  -  -  -  -  
                                           +  -  + ]
 2866 rhaas@postgresql.org     1249   [ -  +  -  -  :           1428 :                                VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
                                     -  -  -  -  +  
                                           -  +  - ]
                               1250                 :                :                                0);
                               1251                 :                : 
 2216 teodor@sigaev.ru         1252         [ -  + ]:            714 :     PG_FREE_IF_COPY(in1, 0);
                               1253         [ -  + ]:            714 :     PG_FREE_IF_COPY(in2, 1);
                               1254                 :            714 :     PG_RETURN_FLOAT4(1.0 - res);
                               1255                 :                : }
                               1256                 :                : 
                               1257                 :                : Datum
                               1258                 :           2530 : strict_word_similarity_op(PG_FUNCTION_ARGS)
                               1259                 :                : {
                               1260                 :           2530 :     text       *in1 = PG_GETARG_TEXT_PP(0);
                               1261                 :           2530 :     text       *in2 = PG_GETARG_TEXT_PP(1);
                               1262                 :                :     float4      res;
                               1263                 :                : 
                               1264   [ -  +  -  -  :           5060 :     res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
                                     -  -  -  -  -  
                                           +  -  + ]
                               1265   [ -  +  -  -  :           5060 :                                VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
                                     -  -  -  -  +  
                                           -  +  - ]
                               1266                 :                :                                WORD_SIMILARITY_CHECK_ONLY | WORD_SIMILARITY_STRICT);
                               1267                 :                : 
                               1268         [ -  + ]:           2530 :     PG_FREE_IF_COPY(in1, 0);
                               1269         [ -  + ]:           2530 :     PG_FREE_IF_COPY(in2, 1);
                               1270                 :           2530 :     PG_RETURN_BOOL(res >= strict_word_similarity_threshold);
                               1271                 :                : }
                               1272                 :                : 
                               1273                 :                : Datum
                               1274                 :           2530 : strict_word_similarity_commutator_op(PG_FUNCTION_ARGS)
                               1275                 :                : {
                               1276                 :           2530 :     text       *in1 = PG_GETARG_TEXT_PP(0);
                               1277                 :           2530 :     text       *in2 = PG_GETARG_TEXT_PP(1);
                               1278                 :                :     float4      res;
                               1279                 :                : 
                               1280   [ -  +  -  -  :           5060 :     res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
                                     -  -  -  -  -  
                                           +  -  + ]
                               1281   [ -  +  -  -  :           5060 :                                VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
                                     -  -  -  -  +  
                                           -  +  - ]
                               1282                 :                :                                WORD_SIMILARITY_CHECK_ONLY | WORD_SIMILARITY_STRICT);
                               1283                 :                : 
                               1284         [ -  + ]:           2530 :     PG_FREE_IF_COPY(in1, 0);
                               1285         [ -  + ]:           2530 :     PG_FREE_IF_COPY(in2, 1);
                               1286                 :           2530 :     PG_RETURN_BOOL(res >= strict_word_similarity_threshold);
                               1287                 :                : }
                               1288                 :                : 
                               1289                 :                : Datum
 2216 teodor@sigaev.ru         1290                 :UBC           0 : strict_word_similarity_dist_op(PG_FUNCTION_ARGS)
                               1291                 :                : {
                               1292                 :              0 :     text       *in1 = PG_GETARG_TEXT_PP(0);
                               1293                 :              0 :     text       *in2 = PG_GETARG_TEXT_PP(1);
                               1294                 :                :     float4      res;
                               1295                 :                : 
                               1296   [ #  #  #  #  :              0 :     res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
                                     #  #  #  #  #  
                                           #  #  # ]
                               1297   [ #  #  #  #  :              0 :                                VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
                                     #  #  #  #  #  
                                           #  #  # ]
                               1298                 :                :                                WORD_SIMILARITY_STRICT);
                               1299                 :                : 
                               1300         [ #  # ]:              0 :     PG_FREE_IF_COPY(in1, 0);
                               1301         [ #  # ]:              0 :     PG_FREE_IF_COPY(in2, 1);
                               1302                 :              0 :     PG_RETURN_FLOAT4(1.0 - res);
                               1303                 :                : }
                               1304                 :                : 
                               1305                 :                : Datum
 2216 teodor@sigaev.ru         1306                 :CBC         720 : strict_word_similarity_dist_commutator_op(PG_FUNCTION_ARGS)
                               1307                 :                : {
                               1308                 :            720 :     text       *in1 = PG_GETARG_TEXT_PP(0);
                               1309                 :            720 :     text       *in2 = PG_GETARG_TEXT_PP(1);
                               1310                 :                :     float4      res;
                               1311                 :                : 
                               1312   [ -  +  -  -  :           1440 :     res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
                                     -  -  -  -  -  
                                           +  -  + ]
                               1313   [ -  +  -  -  :           1440 :                                VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
                                     -  -  -  -  +  
                                           -  +  - ]
                               1314                 :                :                                WORD_SIMILARITY_STRICT);
                               1315                 :                : 
 2951                          1316         [ -  + ]:            720 :     PG_FREE_IF_COPY(in1, 0);
                               1317         [ -  + ]:            720 :     PG_FREE_IF_COPY(in2, 1);
                               1318                 :            720 :     PG_RETURN_FLOAT4(1.0 - res);
                               1319                 :                : }

Generated by: LCOV version 2.1-beta2-3-g6141622