LCOV - differential code coverage report
Current view: top level - src/include/port/atomics - arch-x86.h (source / functions) Coverage Total Hit GNC CBC DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 100.0 % 22 22 1 21 1
Current Date: 2023-04-08 15:15:32 Functions: 100.0 % 6 6 1 5
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * arch-x86.h
       4                 :  *    Atomic operations considerations specific to intel x86
       5                 :  *
       6                 :  * Note that we actually require a 486 upwards because the 386 doesn't have
       7                 :  * support for xadd and cmpxchg. Given that the 386 isn't supported anywhere
       8                 :  * anymore that's not much of a restriction luckily.
       9                 :  *
      10                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      11                 :  * Portions Copyright (c) 1994, Regents of the University of California
      12                 :  *
      13                 :  * NOTES:
      14                 :  *
      15                 :  * src/include/port/atomics/arch-x86.h
      16                 :  *
      17                 :  *-------------------------------------------------------------------------
      18                 :  */
      19                 : 
      20                 : /*
      21                 :  * Both 32 and 64 bit x86 do not allow loads to be reordered with other loads,
      22                 :  * or stores to be reordered with other stores, but a load can be performed
      23                 :  * before a subsequent store.
      24                 :  *
      25                 :  * Technically, some x86-ish chips support uncached memory access and/or
      26                 :  * special instructions that are weakly ordered.  In those cases we'd need
      27                 :  * the read and write barriers to be lfence and sfence.  But since we don't
      28                 :  * do those things, a compiler barrier should be enough.
      29                 :  *
      30                 :  * "lock; addl" has worked for longer than "mfence". It's also rumored to be
      31                 :  * faster in many scenarios.
      32                 :  */
      33                 : 
      34                 : #if defined(__GNUC__) || defined(__INTEL_COMPILER)
      35                 : #if defined(__i386__) || defined(__i386)
      36                 : #define pg_memory_barrier_impl()        \
      37                 :     __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory", "cc")
      38                 : #elif defined(__x86_64__)
      39                 : #define pg_memory_barrier_impl()        \
      40                 :     __asm__ __volatile__ ("lock; addl $0,0(%%rsp)" : : : "memory", "cc")
      41                 : #endif
      42                 : #endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
      43                 : 
      44                 : #define pg_read_barrier_impl()      pg_compiler_barrier_impl()
      45                 : #define pg_write_barrier_impl()     pg_compiler_barrier_impl()
      46                 : 
      47                 : /*
      48                 :  * Provide implementation for atomics using inline assembly on x86 gcc. It's
      49                 :  * nice to support older gcc's and the compare/exchange implementation here is
      50                 :  * actually more efficient than the * __sync variant.
      51                 :  */
      52                 : #if defined(HAVE_ATOMICS)
      53                 : 
      54                 : #if defined(__GNUC__) || defined(__INTEL_COMPILER)
      55                 : 
      56                 : #define PG_HAVE_ATOMIC_FLAG_SUPPORT
      57                 : typedef struct pg_atomic_flag
      58                 : {
      59                 :     volatile char value;
      60                 : } pg_atomic_flag;
      61                 : 
      62                 : #define PG_HAVE_ATOMIC_U32_SUPPORT
      63                 : typedef struct pg_atomic_uint32
      64                 : {
      65                 :     volatile uint32 value;
      66                 : } pg_atomic_uint32;
      67                 : 
      68                 : /*
      69                 :  * It's too complicated to write inline asm for 64bit types on 32bit and the
      70                 :  * 486 can't do it anyway.
      71                 :  */
      72                 : #ifdef __x86_64__
      73                 : #define PG_HAVE_ATOMIC_U64_SUPPORT
      74                 : typedef struct pg_atomic_uint64
      75                 : {
      76                 :     /* alignment guaranteed due to being on a 64bit platform */
      77                 :     volatile uint64 value;
      78                 : } pg_atomic_uint64;
      79                 : #endif  /* __x86_64__ */
      80                 : 
      81                 : #endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
      82                 : 
      83                 : #endif /* defined(HAVE_ATOMICS) */
      84                 : 
      85                 : #if !defined(PG_HAVE_SPIN_DELAY)
      86                 : /*
      87                 :  * This sequence is equivalent to the PAUSE instruction ("rep" is
      88                 :  * ignored by old IA32 processors if the following instruction is
      89                 :  * not a string operation); the IA-32 Architecture Software
      90                 :  * Developer's Manual, Vol. 3, Section 7.7.2 describes why using
      91                 :  * PAUSE in the inner loop of a spin lock is necessary for good
      92                 :  * performance:
      93                 :  *
      94                 :  *     The PAUSE instruction improves the performance of IA-32
      95                 :  *     processors supporting Hyper-Threading Technology when
      96                 :  *     executing spin-wait loops and other routines where one
      97                 :  *     thread is accessing a shared lock or semaphore in a tight
      98                 :  *     polling loop. When executing a spin-wait loop, the
      99                 :  *     processor can suffer a severe performance penalty when
     100                 :  *     exiting the loop because it detects a possible memory order
     101                 :  *     violation and flushes the core processor's pipeline. The
     102                 :  *     PAUSE instruction provides a hint to the processor that the
     103                 :  *     code sequence is a spin-wait loop. The processor uses this
     104                 :  *     hint to avoid the memory order violation and prevent the
     105                 :  *     pipeline flush. In addition, the PAUSE instruction
     106                 :  *     de-pipelines the spin-wait loop to prevent it from
     107                 :  *     consuming execution resources excessively.
     108                 :  */
     109                 : #if defined(__GNUC__) || defined(__INTEL_COMPILER)
     110                 : #define PG_HAVE_SPIN_DELAY
     111                 : static __inline__ void
     112                 : pg_spin_delay_impl(void)
     113                 : {
     114                 :     __asm__ __volatile__(" rep; nop            \n");
     115                 : }
     116                 : #elif defined(_MSC_VER) && defined(__x86_64__)
     117                 : #define PG_HAVE_SPIN_DELAY
     118                 : static __forceinline void
     119                 : pg_spin_delay_impl(void)
     120                 : {
     121                 :     _mm_pause();
     122                 : }
     123                 : #elif defined(_MSC_VER)
     124                 : #define PG_HAVE_SPIN_DELAY
     125                 : static __forceinline void
     126                 : pg_spin_delay_impl(void)
     127                 : {
     128                 :     /* See comment for gcc code. Same code, MASM syntax */
     129                 :     __asm rep nop;
     130                 : }
     131                 : #endif
     132                 : #endif /* !defined(PG_HAVE_SPIN_DELAY) */
     133                 : 
     134                 : 
     135                 : #if defined(HAVE_ATOMICS)
     136                 : 
     137                 : #if defined(__GNUC__) || defined(__INTEL_COMPILER)
     138                 : 
     139                 : #define PG_HAVE_ATOMIC_TEST_SET_FLAG
     140                 : static inline bool
     141 CBC         325 : pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
     142                 : {
     143 GNC         325 :     char        _res = 1;
     144                 : 
     145 CBC         325 :     __asm__ __volatile__(
     146                 :         "  lock            \n"
     147                 :         "  xchgb   %0,%1   \n"
     148                 : :       "+q"(_res), "+m"(ptr->value)
     149                 : :
     150                 : :       "memory");
     151             325 :     return _res == 0;
     152                 : }
     153                 : 
     154                 : #define PG_HAVE_ATOMIC_CLEAR_FLAG
     155                 : static inline void
     156            5503 : pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
     157                 : {
     158                 :     /*
     159                 :      * On a TSO architecture like x86 it's sufficient to use a compiler
     160                 :      * barrier to achieve release semantics.
     161                 :      */
     162            5503 :     __asm__ __volatile__("" ::: "memory");
     163            5503 :     ptr->value = 0;
     164            5503 : }
     165                 : 
     166                 : #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
     167                 : static inline bool
     168       457759923 : pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
     169                 :                                     uint32 *expected, uint32 newval)
     170                 : {
     171                 :     char    ret;
     172                 : 
     173                 :     /*
     174                 :      * Perform cmpxchg and use the zero flag which it implicitly sets when
     175                 :      * equal to measure the success.
     176                 :      */
     177       457759923 :     __asm__ __volatile__(
     178                 :         "  lock                \n"
     179                 :         "  cmpxchgl    %4,%5   \n"
     180                 :         "   setz       %2      \n"
     181                 : :       "=a" (*expected), "=m"(ptr->value), "=q" (ret)
     182       457759923 : :       "a" (*expected), "r" (newval), "m"(ptr->value)
     183                 : :       "memory", "cc");
     184       457759923 :     return (bool) ret;
     185                 : }
     186                 : 
     187                 : #define PG_HAVE_ATOMIC_FETCH_ADD_U32
     188                 : static inline uint32
     189         5316421 : pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
     190                 : {
     191                 :     uint32 res;
     192         5316421 :     __asm__ __volatile__(
     193                 :         "  lock                \n"
     194                 :         "  xaddl   %0,%1       \n"
     195                 : :       "=q"(res), "=m"(ptr->value)
     196                 : :       "0" (add_), "m"(ptr->value)
     197                 : :       "memory", "cc");
     198         5316421 :     return res;
     199                 : }
     200                 : 
     201                 : #ifdef __x86_64__
     202                 : 
     203                 : #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
     204                 : static inline bool
     205         1481957 : pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
     206                 :                                     uint64 *expected, uint64 newval)
     207                 : {
     208                 :     char    ret;
     209                 : 
     210                 :     /*
     211                 :      * Perform cmpxchg and use the zero flag which it implicitly sets when
     212                 :      * equal to measure the success.
     213                 :      */
     214         1481957 :     __asm__ __volatile__(
     215                 :         "  lock                \n"
     216                 :         "  cmpxchgq    %4,%5   \n"
     217                 :         "   setz       %2      \n"
     218                 : :       "=a" (*expected), "=m"(ptr->value), "=q" (ret)
     219         1481957 : :       "a" (*expected), "r" (newval), "m"(ptr->value)
     220                 : :       "memory", "cc");
     221         1481957 :     return (bool) ret;
     222                 : }
     223                 : 
     224                 : #define PG_HAVE_ATOMIC_FETCH_ADD_U64
     225                 : static inline uint64
     226           93115 : pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
     227                 : {
     228                 :     uint64 res;
     229           93115 :     __asm__ __volatile__(
     230                 :         "  lock                \n"
     231                 :         "  xaddq   %0,%1       \n"
     232                 : :       "=q"(res), "=m"(ptr->value)
     233                 : :       "0" (add_), "m"(ptr->value)
     234                 : :       "memory", "cc");
     235           93115 :     return res;
     236                 : }
     237                 : 
     238                 : #endif /* __x86_64__ */
     239                 : 
     240                 : #endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
     241                 : 
     242                 : /*
     243                 :  * 8 byte reads / writes have single-copy atomicity on 32 bit x86 platforms
     244                 :  * since at least the 586. As well as on all x86-64 cpus.
     245                 :  */
     246                 : #if defined(__i568__) || defined(__i668__) || /* gcc i586+ */  \
     247                 :     (defined(_M_IX86) && _M_IX86 >= 500) || /* msvc i586+ */ \
     248                 :     defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) /* gcc, sunpro, msvc */
     249                 : #define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
     250                 : #endif /* 8 byte single-copy atomicity */
     251                 : 
     252                 : #endif /* HAVE_ATOMICS */
        

Generated by: LCOV version v1.16-55-g56c0a2a