LCOV - differential code coverage report
Current view: top level - src/include/port - pg_lfind.h (source / functions) Coverage Total Hit GNC
Current: Differential Code Coverage HEAD vs 15 Lines: 100.0 % 52 52 52
Current Date: 2023-04-08 15:15:32 Functions: 100.0 % 3 3 3
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * pg_lfind.h
       4                 :  *    Optimized linear search routines using SIMD intrinsics where
       5                 :  *    available.
       6                 :  *
       7                 :  * Copyright (c) 2022-2023, PostgreSQL Global Development Group
       8                 :  *
       9                 :  * IDENTIFICATION
      10                 :  *    src/include/port/pg_lfind.h
      11                 :  *
      12                 :  *-------------------------------------------------------------------------
      13                 :  */
      14                 : #ifndef PG_LFIND_H
      15                 : #define PG_LFIND_H
      16                 : 
      17                 : #include "port/simd.h"
      18                 : 
      19                 : /*
      20                 :  * pg_lfind8
      21                 :  *
      22                 :  * Return true if there is an element in 'base' that equals 'key', otherwise
      23                 :  * return false.
      24                 :  */
      25                 : static inline bool
      26 GNC     1621808 : pg_lfind8(uint8 key, uint8 *base, uint32 nelem)
      27                 : {
      28                 :     uint32      i;
      29                 : 
      30                 :     /* round down to multiple of vector length */
      31         1621808 :     uint32      tail_idx = nelem & ~(sizeof(Vector8) - 1);
      32                 :     Vector8     chunk;
      33                 : 
      34         2593476 :     for (i = 0; i < tail_idx; i += sizeof(Vector8))
      35                 :     {
      36         1621848 :         vector8_load(&chunk, &base[i]);
      37         1621848 :         if (vector8_has(chunk, key))
      38          650180 :             return true;
      39                 :     }
      40                 : 
      41                 :     /* Process the remaining elements one at a time. */
      42          971681 :     for (; i < nelem; i++)
      43                 :     {
      44              60 :         if (key == base[i])
      45               7 :             return true;
      46                 :     }
      47                 : 
      48          971621 :     return false;
      49                 : }
      50                 : 
      51                 : /*
      52                 :  * pg_lfind8_le
      53                 :  *
      54                 :  * Return true if there is an element in 'base' that is less than or equal to
      55                 :  * 'key', otherwise return false.
      56                 :  */
      57                 : static inline bool
      58          160874 : pg_lfind8_le(uint8 key, uint8 *base, uint32 nelem)
      59                 : {
      60                 :     uint32      i;
      61                 : 
      62                 :     /* round down to multiple of vector length */
      63          160874 :     uint32      tail_idx = nelem & ~(sizeof(Vector8) - 1);
      64                 :     Vector8     chunk;
      65                 : 
      66          321775 :     for (i = 0; i < tail_idx; i += sizeof(Vector8))
      67                 :     {
      68          160914 :         vector8_load(&chunk, &base[i]);
      69          160914 :         if (vector8_has_le(chunk, key))
      70              13 :             return true;
      71                 :     }
      72                 : 
      73                 :     /* Process the remaining elements one at a time. */
      74          160908 :     for (; i < nelem; i++)
      75                 :     {
      76              60 :         if (base[i] <= key)
      77              13 :             return true;
      78                 :     }
      79                 : 
      80          160848 :     return false;
      81                 : }
      82                 : 
      83                 : /*
      84                 :  * pg_lfind32
      85                 :  *
      86                 :  * Return true if there is an element in 'base' that equals 'key', otherwise
      87                 :  * return false.
      88                 :  */
      89                 : static inline bool
      90         6372203 : pg_lfind32(uint32 key, uint32 *base, uint32 nelem)
      91                 : {
      92         6372203 :     uint32      i = 0;
      93                 : 
      94                 : #ifndef USE_NO_SIMD
      95                 : 
      96                 :     /*
      97                 :      * For better instruction-level parallelism, each loop iteration operates
      98                 :      * on a block of four registers.  Testing for SSE2 has showed this is ~40%
      99                 :      * faster than using a block of two registers.
     100                 :      */
     101         6372203 :     const Vector32 keys = vector32_broadcast(key);  /* load copies of key */
     102         6372203 :     const uint32 nelem_per_vector = sizeof(Vector32) / sizeof(uint32);
     103         6372203 :     const uint32 nelem_per_iteration = 4 * nelem_per_vector;
     104                 : 
     105                 :     /* round down to multiple of elements per iteration */
     106         6372203 :     const uint32 tail_idx = nelem & ~(nelem_per_iteration - 1);
     107                 : 
     108                 : #if defined(USE_ASSERT_CHECKING)
     109         6372203 :     bool        assert_result = false;
     110                 : 
     111                 :     /* pre-compute the result for assert checking */
     112        14008084 :     for (i = 0; i < nelem; i++)
     113                 :     {
     114         7641820 :         if (key == base[i])
     115                 :         {
     116            5939 :             assert_result = true;
     117            5939 :             break;
     118                 :         }
     119                 :     }
     120                 : #endif
     121                 : 
     122         6372231 :     for (i = 0; i < tail_idx; i += nelem_per_iteration)
     123                 :     {
     124                 :         Vector32    vals1,
     125                 :                     vals2,
     126                 :                     vals3,
     127                 :                     vals4,
     128                 :                     result1,
     129                 :                     result2,
     130                 :                     result3,
     131                 :                     result4,
     132                 :                     tmp1,
     133                 :                     tmp2,
     134                 :                     result;
     135                 : 
     136                 :         /* load the next block into 4 registers */
     137              30 :         vector32_load(&vals1, &base[i]);
     138              30 :         vector32_load(&vals2, &base[i + nelem_per_vector]);
     139              30 :         vector32_load(&vals3, &base[i + nelem_per_vector * 2]);
     140              30 :         vector32_load(&vals4, &base[i + nelem_per_vector * 3]);
     141                 : 
     142                 :         /* compare each value to the key */
     143              30 :         result1 = vector32_eq(keys, vals1);
     144              30 :         result2 = vector32_eq(keys, vals2);
     145              30 :         result3 = vector32_eq(keys, vals3);
     146              30 :         result4 = vector32_eq(keys, vals4);
     147                 : 
     148                 :         /* combine the results into a single variable */
     149              30 :         tmp1 = vector32_or(result1, result2);
     150              30 :         tmp2 = vector32_or(result3, result4);
     151              30 :         result = vector32_or(tmp1, tmp2);
     152                 : 
     153                 :         /* see if there was a match */
     154              30 :         if (vector32_is_highbit_set(result))
     155                 :         {
     156               2 :             Assert(assert_result == true);
     157               2 :             return true;
     158                 :         }
     159                 :     }
     160                 : #endif                          /* ! USE_NO_SIMD */
     161                 : 
     162                 :     /* Process the remaining elements one at a time. */
     163        14007626 :     for (; i < nelem; i++)
     164                 :     {
     165         7641362 :         if (key == base[i])
     166                 :         {
     167                 : #ifndef USE_NO_SIMD
     168            5937 :             Assert(assert_result == true);
     169                 : #endif
     170            5937 :             return true;
     171                 :         }
     172                 :     }
     173                 : 
     174                 : #ifndef USE_NO_SIMD
     175         6366264 :     Assert(assert_result == false);
     176                 : #endif
     177         6366264 :     return false;
     178                 : }
     179                 : 
     180                 : #endif                          /* PG_LFIND_H */
        

Generated by: LCOV version v1.16-55-g56c0a2a