LCOV - differential code coverage report
Current view: top level - src/include/port - pg_lfind.h (source / functions) Coverage Total Hit GNC
Current: Differential Code Coverage HEAD vs 15 Lines: 100.0 % 52 52 52
Current Date: 2023-04-08 17:13:01 Functions: 100.0 % 3 3 3
Baseline: 15 Line coverage date bins:
Baseline Date: 2023-04-08 15:09:40 (180,240] days: 100.0 % 37 37 37
Legend: Lines: hit not hit (240..) days: 100.0 % 15 15 15
Function coverage date bins:
(180,240] days: 100.0 % 2 2 2
(240..) days: 100.0 % 1 1 1

 Age         Owner                  TLA  Line data    Source code
                                  1                 : /*-------------------------------------------------------------------------
                                  2                 :  *
                                  3                 :  * pg_lfind.h
                                  4                 :  *    Optimized linear search routines using SIMD intrinsics where
                                  5                 :  *    available.
                                  6                 :  *
                                  7                 :  * Copyright (c) 2022-2023, PostgreSQL Global Development Group
                                  8                 :  *
                                  9                 :  * IDENTIFICATION
                                 10                 :  *    src/include/port/pg_lfind.h
                                 11                 :  *
                                 12                 :  *-------------------------------------------------------------------------
                                 13                 :  */
                                 14                 : #ifndef PG_LFIND_H
                                 15                 : #define PG_LFIND_H
                                 16                 : 
                                 17                 : #include "port/simd.h"
                                 18                 : 
                                 19                 : /*
                                 20                 :  * pg_lfind8
                                 21                 :  *
                                 22                 :  * Return true if there is an element in 'base' that equals 'key', otherwise
                                 23                 :  * return false.
                                 24                 :  */
                                 25                 : static inline bool
  232 john.naylor                26 GNC     1621808 : pg_lfind8(uint8 key, uint8 *base, uint32 nelem)
                                 27                 : {
                                 28                 :     uint32      i;
                                 29                 : 
                                 30                 :     /* round down to multiple of vector length */
                                 31         1621808 :     uint32      tail_idx = nelem & ~(sizeof(Vector8) - 1);
                                 32                 :     Vector8     chunk;
                                 33                 : 
                                 34         2593476 :     for (i = 0; i < tail_idx; i += sizeof(Vector8))
                                 35                 :     {
                                 36         1621848 :         vector8_load(&chunk, &base[i]);
                                 37         1621848 :         if (vector8_has(chunk, key))
                                 38          650180 :             return true;
                                 39                 :     }
                                 40                 : 
                                 41                 :     /* Process the remaining elements one at a time. */
                                 42          971681 :     for (; i < nelem; i++)
                                 43                 :     {
                                 44              60 :         if (key == base[i])
                                 45               7 :             return true;
                                 46                 :     }
                                 47                 : 
                                 48          971621 :     return false;
                                 49                 : }
                                 50                 : 
                                 51                 : /*
                                 52                 :  * pg_lfind8_le
                                 53                 :  *
                                 54                 :  * Return true if there is an element in 'base' that is less than or equal to
                                 55                 :  * 'key', otherwise return false.
                                 56                 :  */
                                 57                 : static inline bool
                                 58          160874 : pg_lfind8_le(uint8 key, uint8 *base, uint32 nelem)
                                 59                 : {
                                 60                 :     uint32      i;
                                 61                 : 
                                 62                 :     /* round down to multiple of vector length */
                                 63          160874 :     uint32      tail_idx = nelem & ~(sizeof(Vector8) - 1);
                                 64                 :     Vector8     chunk;
                                 65                 : 
                                 66          321775 :     for (i = 0; i < tail_idx; i += sizeof(Vector8))
                                 67                 :     {
                                 68          160914 :         vector8_load(&chunk, &base[i]);
                                 69          160914 :         if (vector8_has_le(chunk, key))
                                 70              13 :             return true;
                                 71                 :     }
                                 72                 : 
                                 73                 :     /* Process the remaining elements one at a time. */
                                 74          160908 :     for (; i < nelem; i++)
                                 75                 :     {
                                 76              60 :         if (base[i] <= key)
                                 77              13 :             return true;
                                 78                 :     }
                                 79                 : 
                                 80          160848 :     return false;
                                 81                 : }
                                 82                 : 
                                 83                 : /*
                                 84                 :  * pg_lfind32
                                 85                 :  *
                                 86                 :  * Return true if there is an element in 'base' that equals 'key', otherwise
                                 87                 :  * return false.
                                 88                 :  */
                                 89                 : static inline bool
  249                            90         6372203 : pg_lfind32(uint32 key, uint32 *base, uint32 nelem)
                                 91                 : {
                                 92         6372203 :     uint32      i = 0;
                                 93                 : 
                                 94                 : #ifndef USE_NO_SIMD
                                 95                 : 
                                 96                 :     /*
                                 97                 :      * For better instruction-level parallelism, each loop iteration operates
                                 98                 :      * on a block of four registers.  Testing for SSE2 has showed this is ~40%
                                 99                 :      * faster than using a block of two registers.
                                100                 :      */
  223                           101         6372203 :     const Vector32 keys = vector32_broadcast(key);  /* load copies of key */
                                102         6372203 :     const uint32 nelem_per_vector = sizeof(Vector32) / sizeof(uint32);
                                103         6372203 :     const uint32 nelem_per_iteration = 4 * nelem_per_vector;
                                104                 : 
                                105                 :     /* round down to multiple of elements per iteration */
                                106         6372203 :     const uint32 tail_idx = nelem & ~(nelem_per_iteration - 1);
                                107                 : 
                                108                 : #if defined(USE_ASSERT_CHECKING)
  249                           109         6372203 :     bool        assert_result = false;
                                110                 : 
                                111                 :     /* pre-compute the result for assert checking */
                                112        14008084 :     for (i = 0; i < nelem; i++)
                                113                 :     {
                                114         7641820 :         if (key == base[i])
                                115                 :         {
                                116            5939 :             assert_result = true;
                                117            5939 :             break;
                                118                 :         }
                                119                 :     }
                                120                 : #endif
                                121                 : 
  223                           122         6372231 :     for (i = 0; i < tail_idx; i += nelem_per_iteration)
                                123                 :     {
                                124                 :         Vector32    vals1,
                                125                 :                     vals2,
                                126                 :                     vals3,
                                127                 :                     vals4,
                                128                 :                     result1,
                                129                 :                     result2,
                                130                 :                     result3,
                                131                 :                     result4,
                                132                 :                     tmp1,
                                133                 :                     tmp2,
                                134                 :                     result;
                                135                 : 
                                136                 :         /* load the next block into 4 registers */
                                137              30 :         vector32_load(&vals1, &base[i]);
                                138              30 :         vector32_load(&vals2, &base[i + nelem_per_vector]);
                                139              30 :         vector32_load(&vals3, &base[i + nelem_per_vector * 2]);
                                140              30 :         vector32_load(&vals4, &base[i + nelem_per_vector * 3]);
                                141                 : 
                                142                 :         /* compare each value to the key */
                                143              30 :         result1 = vector32_eq(keys, vals1);
                                144              30 :         result2 = vector32_eq(keys, vals2);
                                145              30 :         result3 = vector32_eq(keys, vals3);
                                146              30 :         result4 = vector32_eq(keys, vals4);
                                147                 : 
                                148                 :         /* combine the results into a single variable */
                                149              30 :         tmp1 = vector32_or(result1, result2);
                                150              30 :         tmp2 = vector32_or(result3, result4);
                                151              30 :         result = vector32_or(tmp1, tmp2);
                                152                 : 
                                153                 :         /* see if there was a match */
                                154              30 :         if (vector32_is_highbit_set(result))
                                155                 :         {
  249                           156               2 :             Assert(assert_result == true);
                                157               2 :             return true;
                                158                 :         }
                                159                 :     }
                                160                 : #endif                          /* ! USE_NO_SIMD */
                                161                 : 
                                162                 :     /* Process the remaining elements one at a time. */
                                163        14007626 :     for (; i < nelem; i++)
                                164                 :     {
                                165         7641362 :         if (key == base[i])
                                166                 :         {
                                167                 : #ifndef USE_NO_SIMD
                                168            5937 :             Assert(assert_result == true);
                                169                 : #endif
                                170            5937 :             return true;
                                171                 :         }
                                172                 :     }
                                173                 : 
                                174                 : #ifndef USE_NO_SIMD
                                175         6366264 :     Assert(assert_result == false);
                                176                 : #endif
                                177         6366264 :     return false;
                                178                 : }
                                179                 : 
                                180                 : #endif                          /* PG_LFIND_H */
        

Generated by: LCOV version v1.16-55-g56c0a2a