LCOV - differential code coverage report
Current view: top level - src/include/storage - s_lock.h (source / functions) Coverage Total Hit GIC GNC CBC DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 100.0 % 15 15 8 1 6 9
Current Date: 2023-04-08 15:15:32 Functions: 100.0 % 3 3 1 1 1 1
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * s_lock.h
       4                 :  *     Hardware-dependent implementation of spinlocks.
       5                 :  *
       6                 :  *  NOTE: none of the macros in this file are intended to be called directly.
       7                 :  *  Call them through the hardware-independent macros in spin.h.
       8                 :  *
       9                 :  *  The following hardware-dependent macros must be provided for each
      10                 :  *  supported platform:
      11                 :  *
      12                 :  *  void S_INIT_LOCK(slock_t *lock)
      13                 :  *      Initialize a spinlock (to the unlocked state).
      14                 :  *
      15                 :  *  int S_LOCK(slock_t *lock)
      16                 :  *      Acquire a spinlock, waiting if necessary.
      17                 :  *      Time out and abort() if unable to acquire the lock in a
      18                 :  *      "reasonable" amount of time --- typically ~ 1 minute.
      19                 :  *      Should return number of "delays"; see s_lock.c
      20                 :  *
      21                 :  *  void S_UNLOCK(slock_t *lock)
      22                 :  *      Unlock a previously acquired lock.
      23                 :  *
      24                 :  *  bool S_LOCK_FREE(slock_t *lock)
      25                 :  *      Tests if the lock is free. Returns true if free, false if locked.
      26                 :  *      This does *not* change the state of the lock.
      27                 :  *
      28                 :  *  void SPIN_DELAY(void)
      29                 :  *      Delay operation to occur inside spinlock wait loop.
      30                 :  *
      31                 :  *  Note to implementors: there are default implementations for all these
      32                 :  *  macros at the bottom of the file.  Check if your platform can use
      33                 :  *  these or needs to override them.
      34                 :  *
      35                 :  *  Usually, S_LOCK() is implemented in terms of even lower-level macros
      36                 :  *  TAS() and TAS_SPIN():
      37                 :  *
      38                 :  *  int TAS(slock_t *lock)
      39                 :  *      Atomic test-and-set instruction.  Attempt to acquire the lock,
      40                 :  *      but do *not* wait.  Returns 0 if successful, nonzero if unable
      41                 :  *      to acquire the lock.
      42                 :  *
      43                 :  *  int TAS_SPIN(slock_t *lock)
      44                 :  *      Like TAS(), but this version is used when waiting for a lock
      45                 :  *      previously found to be contended.  By default, this is the
      46                 :  *      same as TAS(), but on some architectures it's better to poll a
      47                 :  *      contended lock using an unlocked instruction and retry the
      48                 :  *      atomic test-and-set only when it appears free.
      49                 :  *
      50                 :  *  TAS() and TAS_SPIN() are NOT part of the API, and should never be called
      51                 :  *  directly.
      52                 :  *
      53                 :  *  CAUTION: on some platforms TAS() and/or TAS_SPIN() may sometimes report
      54                 :  *  failure to acquire a lock even when the lock is not locked.  For example,
      55                 :  *  on Alpha TAS() will "fail" if interrupted.  Therefore a retry loop must
      56                 :  *  always be used, even if you are certain the lock is free.
      57                 :  *
      58                 :  *  It is the responsibility of these macros to make sure that the compiler
      59                 :  *  does not re-order accesses to shared memory to precede the actual lock
      60                 :  *  acquisition, or follow the lock release.  Prior to PostgreSQL 9.5, this
      61                 :  *  was the caller's responsibility, which meant that callers had to use
      62                 :  *  volatile-qualified pointers to refer to both the spinlock itself and the
      63                 :  *  shared data being accessed within the spinlocked critical section.  This
      64                 :  *  was notationally awkward, easy to forget (and thus error-prone), and
      65                 :  *  prevented some useful compiler optimizations.  For these reasons, we
      66                 :  *  now require that the macros themselves prevent compiler re-ordering,
      67                 :  *  so that the caller doesn't need to take special precautions.
      68                 :  *
      69                 :  *  On platforms with weak memory ordering, the TAS(), TAS_SPIN(), and
      70                 :  *  S_UNLOCK() macros must further include hardware-level memory fence
      71                 :  *  instructions to prevent similar re-ordering at the hardware level.
      72                 :  *  TAS() and TAS_SPIN() must guarantee that loads and stores issued after
      73                 :  *  the macro are not executed until the lock has been obtained.  Conversely,
      74                 :  *  S_UNLOCK() must guarantee that loads and stores issued before the macro
      75                 :  *  have been executed before the lock is released.
      76                 :  *
      77                 :  *  On most supported platforms, TAS() uses a tas() function written
      78                 :  *  in assembly language to execute a hardware atomic-test-and-set
      79                 :  *  instruction.  Equivalent OS-supplied mutex routines could be used too.
      80                 :  *
      81                 :  *  If no system-specific TAS() is available (ie, HAVE_SPINLOCKS is not
      82                 :  *  defined), then we fall back on an emulation that uses SysV semaphores
      83                 :  *  (see spin.c).  This emulation will be MUCH MUCH slower than a proper TAS()
      84                 :  *  implementation, because of the cost of a kernel call per lock or unlock.
      85                 :  *  An old report is that Postgres spends around 40% of its time in semop(2)
      86                 :  *  when using the SysV semaphore code.
      87                 :  *
      88                 :  *
      89                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      90                 :  * Portions Copyright (c) 1994, Regents of the University of California
      91                 :  *
      92                 :  *    src/include/storage/s_lock.h
      93                 :  *
      94                 :  *-------------------------------------------------------------------------
      95                 :  */
      96                 : #ifndef S_LOCK_H
      97                 : #define S_LOCK_H
      98                 : 
      99                 : #ifdef FRONTEND
     100                 : #error "s_lock.h may not be included from frontend code"
     101                 : #endif
     102                 : 
     103                 : #ifdef HAVE_SPINLOCKS   /* skip spinlocks if requested */
     104                 : 
     105                 : #if defined(__GNUC__) || defined(__INTEL_COMPILER)
     106                 : /*************************************************************************
     107                 :  * All the gcc inlines
     108                 :  * Gcc consistently defines the CPU as __cpu__.
     109                 :  * Other compilers use __cpu or __cpu__ so we test for both in those cases.
     110                 :  */
     111                 : 
     112                 : /*----------
     113                 :  * Standard gcc asm format (assuming "volatile slock_t *lock"):
     114                 : 
     115                 :     __asm__ __volatile__(
     116                 :         "  instruction \n"
     117                 :         "  instruction \n"
     118                 :         "  instruction \n"
     119                 : :       "=r"(_res), "+m"(*lock)     // return register, in/out lock value
     120                 : :       "r"(lock)                 // lock pointer, in input register
     121                 : :       "memory", "cc");            // show clobbered registers here
     122                 : 
     123                 :  * The output-operands list (after first colon) should always include
     124                 :  * "+m"(*lock), whether or not the asm code actually refers to this
     125                 :  * operand directly.  This ensures that gcc believes the value in the
     126                 :  * lock variable is used and set by the asm code.  Also, the clobbers
     127                 :  * list (after third colon) should always include "memory"; this prevents
     128                 :  * gcc from thinking it can cache the values of shared-memory fields
     129                 :  * across the asm code.  Add "cc" if your asm code changes the condition
     130                 :  * code register, and also list any temp registers the code uses.
     131                 :  *----------
     132                 :  */
     133                 : 
     134                 : 
     135                 : #ifdef __i386__     /* 32-bit i386 */
     136                 : #define HAS_TEST_AND_SET
     137                 : 
     138                 : typedef unsigned char slock_t;
     139                 : 
     140                 : #define TAS(lock) tas(lock)
     141                 : 
     142                 : static __inline__ int
     143                 : tas(volatile slock_t *lock)
     144                 : {
     145                 :     slock_t     _res = 1;
     146                 : 
     147                 :     /*
     148                 :      * Use a non-locking test before asserting the bus lock.  Note that the
     149                 :      * extra test appears to be a small loss on some x86 platforms and a small
     150                 :      * win on others; it's by no means clear that we should keep it.
     151                 :      *
     152                 :      * When this was last tested, we didn't have separate TAS() and TAS_SPIN()
     153                 :      * macros.  Nowadays it probably would be better to do a non-locking test
     154                 :      * in TAS_SPIN() but not in TAS(), like on x86_64, but no-one's done the
     155                 :      * testing to verify that.  Without some empirical evidence, better to
     156                 :      * leave it alone.
     157                 :      */
     158                 :     __asm__ __volatile__(
     159                 :         "  cmpb    $0,%1   \n"
     160                 :         "  jne     1f      \n"
     161                 :         "  lock            \n"
     162                 :         "  xchgb   %0,%1   \n"
     163                 :         "1: \n"
     164                 : :       "+q"(_res), "+m"(*lock)
     165                 : :       /* no inputs */
     166                 : :       "memory", "cc");
     167                 :     return (int) _res;
     168                 : }
     169                 : 
     170                 : #define SPIN_DELAY() spin_delay()
     171                 : 
     172                 : static __inline__ void
     173                 : spin_delay(void)
     174                 : {
     175                 :     /*
     176                 :      * This sequence is equivalent to the PAUSE instruction ("rep" is
     177                 :      * ignored by old IA32 processors if the following instruction is
     178                 :      * not a string operation); the IA-32 Architecture Software
     179                 :      * Developer's Manual, Vol. 3, Section 7.7.2 describes why using
     180                 :      * PAUSE in the inner loop of a spin lock is necessary for good
     181                 :      * performance:
     182                 :      *
     183                 :      *     The PAUSE instruction improves the performance of IA-32
     184                 :      *     processors supporting Hyper-Threading Technology when
     185                 :      *     executing spin-wait loops and other routines where one
     186                 :      *     thread is accessing a shared lock or semaphore in a tight
     187                 :      *     polling loop. When executing a spin-wait loop, the
     188                 :      *     processor can suffer a severe performance penalty when
     189                 :      *     exiting the loop because it detects a possible memory order
     190                 :      *     violation and flushes the core processor's pipeline. The
     191                 :      *     PAUSE instruction provides a hint to the processor that the
     192                 :      *     code sequence is a spin-wait loop. The processor uses this
     193                 :      *     hint to avoid the memory order violation and prevent the
     194                 :      *     pipeline flush. In addition, the PAUSE instruction
     195                 :      *     de-pipelines the spin-wait loop to prevent it from
     196                 :      *     consuming execution resources excessively.
     197                 :      */
     198                 :     __asm__ __volatile__(
     199                 :         " rep; nop         \n");
     200                 : }
     201                 : 
     202                 : #endif   /* __i386__ */
     203                 : 
     204                 : 
     205                 : #ifdef __x86_64__       /* AMD Opteron, Intel EM64T */
     206                 : #define HAS_TEST_AND_SET
     207                 : 
     208                 : typedef unsigned char slock_t;
     209                 : 
     210                 : #define TAS(lock) tas(lock)
     211                 : 
     212                 : /*
     213                 :  * On Intel EM64T, it's a win to use a non-locking test before the xchg proper,
     214                 :  * but only when spinning.
     215                 :  *
     216                 :  * See also Implementing Scalable Atomic Locks for Multi-Core Intel(tm) EM64T
     217                 :  * and IA32, by Michael Chynoweth and Mary R. Lee. As of this writing, it is
     218                 :  * available at:
     219                 :  * http://software.intel.com/en-us/articles/implementing-scalable-atomic-locks-for-multi-core-intel-em64t-and-ia32-architectures
     220                 :  */
     221                 : #define TAS_SPIN(lock)    (*(lock) ? 1 : TAS(lock))
     222                 : 
     223                 : static __inline__ int
     224 CBC    82645295 : tas(volatile slock_t *lock)
     225                 : {
     226 GNC    82645295 :     slock_t     _res = 1;
     227                 : 
     228 CBC    82645295 :     __asm__ __volatile__(
     229                 :         "  lock            \n"
     230                 :         "  xchgb   %0,%1   \n"
     231                 : :       "+q"(_res), "+m"(*lock)
     232                 : :       /* no inputs */
     233                 : :       "memory", "cc");
     234        82645295 :     return (int) _res;
     235                 : }
     236                 : 
     237                 : #define SPIN_DELAY() spin_delay()
     238                 : 
     239                 : static __inline__ void
     240          183999 : spin_delay(void)
     241                 : {
     242                 :     /*
     243                 :      * Adding a PAUSE in the spin delay loop is demonstrably a no-op on
     244                 :      * Opteron, but it may be of some use on EM64T, so we keep it.
     245                 :      */
     246          183999 :     __asm__ __volatile__(
     247                 :         " rep; nop         \n");
     248          183999 : }
     249                 : 
     250                 : #endif   /* __x86_64__ */
     251                 : 
     252                 : 
     253                 : /*
     254                 :  * On ARM and ARM64, we use __sync_lock_test_and_set(int *, int) if available.
     255                 :  *
     256                 :  * We use the int-width variant of the builtin because it works on more chips
     257                 :  * than other widths.
     258                 :  */
     259                 : #if defined(__arm__) || defined(__arm) || defined(__aarch64__)
     260                 : #ifdef HAVE_GCC__SYNC_INT32_TAS
     261                 : #define HAS_TEST_AND_SET
     262                 : 
     263                 : #define TAS(lock) tas(lock)
     264                 : 
     265                 : typedef int slock_t;
     266                 : 
     267                 : static __inline__ int
     268                 : tas(volatile slock_t *lock)
     269                 : {
     270                 :     return __sync_lock_test_and_set(lock, 1);
     271                 : }
     272                 : 
     273                 : #define S_UNLOCK(lock) __sync_lock_release(lock)
     274                 : 
     275                 : /*
     276                 :  * Using an ISB instruction to delay in spinlock loops appears beneficial on
     277                 :  * high-core-count ARM64 processors.  It seems mostly a wash for smaller gear,
     278                 :  * and ISB doesn't exist at all on pre-v7 ARM chips.
     279                 :  */
     280                 : #if defined(__aarch64__)
     281                 : 
     282                 : #define SPIN_DELAY() spin_delay()
     283                 : 
     284                 : static __inline__ void
     285                 : spin_delay(void)
     286                 : {
     287                 :     __asm__ __volatile__(
     288                 :         " isb;             \n");
     289                 : }
     290                 : 
     291                 : #endif   /* __aarch64__ */
     292                 : #endif   /* HAVE_GCC__SYNC_INT32_TAS */
     293                 : #endif   /* __arm__ || __arm || __aarch64__ */
     294                 : 
     295                 : 
     296                 : /* S/390 and S/390x Linux (32- and 64-bit zSeries) */
     297                 : #if defined(__s390__) || defined(__s390x__)
     298                 : #define HAS_TEST_AND_SET
     299                 : 
     300                 : typedef unsigned int slock_t;
     301                 : 
     302                 : #define TAS(lock)      tas(lock)
     303                 : 
     304                 : static __inline__ int
     305                 : tas(volatile slock_t *lock)
     306                 : {
     307                 :     int         _res = 0;
     308                 : 
     309                 :     __asm__ __volatile__(
     310                 :         "  cs  %0,%3,0(%2)     \n"
     311                 : :       "+d"(_res), "+m"(*lock)
     312                 : :       "a"(lock), "d"(1)
     313                 : :       "memory", "cc");
     314                 :     return _res;
     315                 : }
     316                 : 
     317                 : #endif   /* __s390__ || __s390x__ */
     318                 : 
     319                 : 
     320                 : #if defined(__sparc__)      /* Sparc */
     321                 : /*
     322                 :  * Solaris has always run sparc processors in TSO (total store) mode, but
     323                 :  * linux didn't use to and the *BSDs still don't. So, be careful about
     324                 :  * acquire/release semantics. The CPU will treat superfluous membars as
     325                 :  * NOPs, so it's just code space.
     326                 :  */
     327                 : #define HAS_TEST_AND_SET
     328                 : 
     329                 : typedef unsigned char slock_t;
     330                 : 
     331                 : #define TAS(lock) tas(lock)
     332                 : 
     333                 : static __inline__ int
     334                 : tas(volatile slock_t *lock)
     335                 : {
     336                 :     slock_t     _res;
     337                 : 
     338                 :     /*
     339                 :      *  See comment in src/backend/port/tas/sunstudio_sparc.s for why this
     340                 :      *  uses "ldstub", and that file uses "cas".  gcc currently generates
     341                 :      *  sparcv7-targeted binaries, so "cas" use isn't possible.
     342                 :      */
     343                 :     __asm__ __volatile__(
     344                 :         "  ldstub  [%2], %0    \n"
     345                 : :       "=r"(_res), "+m"(*lock)
     346                 : :       "r"(lock)
     347                 : :       "memory");
     348                 : #if defined(__sparcv7) || defined(__sparc_v7__)
     349                 :     /*
     350                 :      * No stbar or membar available, luckily no actually produced hardware
     351                 :      * requires a barrier.
     352                 :      */
     353                 : #elif defined(__sparcv8) || defined(__sparc_v8__)
     354                 :     /* stbar is available (and required for both PSO, RMO), membar isn't */
     355                 :     __asm__ __volatile__ ("stbar    \n":::"memory");
     356                 : #else
     357                 :     /*
     358                 :      * #LoadStore (RMO) | #LoadLoad (RMO) together are the appropriate acquire
     359                 :      * barrier for sparcv8+ upwards.
     360                 :      */
     361                 :     __asm__ __volatile__ ("membar #LoadStore | #LoadLoad \n":::"memory");
     362                 : #endif
     363                 :     return (int) _res;
     364                 : }
     365                 : 
     366                 : #if defined(__sparcv7) || defined(__sparc_v7__)
     367                 : /*
     368                 :  * No stbar or membar available, luckily no actually produced hardware
     369                 :  * requires a barrier.  We fall through to the default gcc definition of
     370                 :  * S_UNLOCK in this case.
     371                 :  */
     372                 : #elif defined(__sparcv8) || defined(__sparc_v8__)
     373                 : /* stbar is available (and required for both PSO, RMO), membar isn't */
     374                 : #define S_UNLOCK(lock)  \
     375                 : do \
     376                 : { \
     377                 :     __asm__ __volatile__ ("stbar    \n":::"memory"); \
     378                 :     *((volatile slock_t *) (lock)) = 0; \
     379                 : } while (0)
     380                 : #else
     381                 : /*
     382                 :  * #LoadStore (RMO) | #StoreStore (RMO, PSO) together are the appropriate
     383                 :  * release barrier for sparcv8+ upwards.
     384                 :  */
     385                 : #define S_UNLOCK(lock)  \
     386                 : do \
     387                 : { \
     388                 :     __asm__ __volatile__ ("membar #LoadStore | #StoreStore \n":::"memory"); \
     389                 :     *((volatile slock_t *) (lock)) = 0; \
     390                 : } while (0)
     391                 : #endif
     392                 : 
     393                 : #endif   /* __sparc__ */
     394                 : 
     395                 : 
     396                 : /* PowerPC */
     397                 : #if defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
     398                 : #define HAS_TEST_AND_SET
     399                 : 
     400                 : typedef unsigned int slock_t;
     401                 : 
     402                 : #define TAS(lock) tas(lock)
     403                 : 
     404                 : /* On PPC, it's a win to use a non-locking test before the lwarx */
     405                 : #define TAS_SPIN(lock)  (*(lock) ? 1 : TAS(lock))
     406                 : 
     407                 : /*
     408                 :  * The second operand of addi can hold a constant zero or a register number,
     409                 :  * hence constraint "=&b" to avoid allocating r0.  "b" stands for "address
     410                 :  * base register"; most operands having this register-or-zero property are
     411                 :  * address bases, e.g. the second operand of lwax.
     412                 :  *
     413                 :  * NOTE: per the Enhanced PowerPC Architecture manual, v1.0 dated 7-May-2002,
     414                 :  * an isync is a sufficient synchronization barrier after a lwarx/stwcx loop.
     415                 :  * But if the spinlock is in ordinary memory, we can use lwsync instead for
     416                 :  * better performance.
     417                 :  *
     418                 :  * Ordinarily, we'd code the branches here using GNU-style local symbols, that
     419                 :  * is "1f" referencing "1:" and so on.  But some people run gcc on AIX with
     420                 :  * IBM's assembler as backend, and IBM's assembler doesn't do local symbols.
     421                 :  * So hand-code the branch offsets; fortunately, all PPC instructions are
     422                 :  * exactly 4 bytes each, so it's not too hard to count.
     423                 :  */
     424                 : static __inline__ int
     425                 : tas(volatile slock_t *lock)
     426                 : {
     427                 :     slock_t _t;
     428                 :     int _res;
     429                 : 
     430                 :     __asm__ __volatile__(
     431                 : "  lwarx   %0,0,%3,1   \n"
     432                 : "  cmpwi   %0,0        \n"
     433                 : "  bne     $+16        \n"        /* branch to li %1,1 */
     434                 : "  addi    %0,%0,1     \n"
     435                 : "  stwcx.  %0,0,%3     \n"
     436                 : "  beq     $+12        \n"        /* branch to lwsync */
     437                 : "  li      %1,1        \n"
     438                 : "  b       $+12        \n"        /* branch to end of asm sequence */
     439                 : "  lwsync              \n"
     440                 : "  li      %1,0        \n"
     441                 : 
     442                 : :   "=&b"(_t), "=r"(_res), "+m"(*lock)
     443                 : :   "r"(lock)
     444                 : :   "memory", "cc");
     445                 :     return _res;
     446                 : }
     447                 : 
     448                 : /*
     449                 :  * PowerPC S_UNLOCK is almost standard but requires a "sync" instruction.
     450                 :  * But we can use lwsync instead for better performance.
     451                 :  */
     452                 : #define S_UNLOCK(lock)  \
     453                 : do \
     454                 : { \
     455                 :     __asm__ __volatile__ ("    lwsync \n" ::: "memory"); \
     456                 :     *((volatile slock_t *) (lock)) = 0; \
     457                 : } while (0)
     458                 : 
     459                 : #endif /* powerpc */
     460                 : 
     461                 : 
     462                 : #if defined(__mips__) && !defined(__sgi)    /* non-SGI MIPS */
     463                 : #define HAS_TEST_AND_SET
     464                 : 
     465                 : typedef unsigned int slock_t;
     466                 : 
     467                 : #define TAS(lock) tas(lock)
     468                 : 
     469                 : /*
     470                 :  * Original MIPS-I processors lacked the LL/SC instructions, but if we are
     471                 :  * so unfortunate as to be running on one of those, we expect that the kernel
     472                 :  * will handle the illegal-instruction traps and emulate them for us.  On
     473                 :  * anything newer (and really, MIPS-I is extinct) LL/SC is the only sane
     474                 :  * choice because any other synchronization method must involve a kernel
     475                 :  * call.  Unfortunately, many toolchains still default to MIPS-I as the
     476                 :  * codegen target; if the symbol __mips shows that that's the case, we
     477                 :  * have to force the assembler to accept LL/SC.
     478                 :  *
     479                 :  * R10000 and up processors require a separate SYNC, which has the same
     480                 :  * issues as LL/SC.
     481                 :  */
     482                 : #if __mips < 2
     483                 : #define MIPS_SET_MIPS2  "       .set mips2          \n"
     484                 : #else
     485                 : #define MIPS_SET_MIPS2
     486                 : #endif
     487                 : 
     488                 : static __inline__ int
     489                 : tas(volatile slock_t *lock)
     490                 : {
     491                 :     volatile slock_t *_l = lock;
     492                 :     int         _res;
     493                 :     int         _tmp;
     494                 : 
     495                 :     __asm__ __volatile__(
     496                 :         "       .set push           \n"
     497                 :         MIPS_SET_MIPS2
     498                 :         "       .set noreorder      \n"
     499                 :         "       .set nomacro        \n"
     500                 :         "       ll      %0, %2      \n"
     501                 :         "       or      %1, %0, 1   \n"
     502                 :         "       sc      %1, %2      \n"
     503                 :         "       xori    %1, 1       \n"
     504                 :         "       or      %0, %0, %1  \n"
     505                 :         "       sync                \n"
     506                 :         "       .set pop              "
     507                 : :       "=&r" (_res), "=&r" (_tmp), "+R" (*_l)
     508                 : :       /* no inputs */
     509                 : :       "memory");
     510                 :     return _res;
     511                 : }
     512                 : 
     513                 : /* MIPS S_UNLOCK is almost standard but requires a "sync" instruction */
     514                 : #define S_UNLOCK(lock)  \
     515                 : do \
     516                 : { \
     517                 :     __asm__ __volatile__( \
     518                 :         "       .set push           \n" \
     519                 :         MIPS_SET_MIPS2 \
     520                 :         "       .set noreorder      \n" \
     521                 :         "       .set nomacro        \n" \
     522                 :         "       sync                \n" \
     523                 :         "       .set pop              " \
     524                 : :       /* no outputs */ \
     525                 : :       /* no inputs */ \
     526                 : :       "memory"); \
     527                 :     *((volatile slock_t *) (lock)) = 0; \
     528                 : } while (0)
     529                 : 
     530                 : #endif /* __mips__ && !__sgi */
     531                 : 
     532                 : 
     533                 : #if defined(__hppa) || defined(__hppa__)    /* HP PA-RISC */
     534                 : /*
     535                 :  * HP's PA-RISC
     536                 :  *
     537                 :  * Because LDCWX requires a 16-byte-aligned address, we declare slock_t as a
     538                 :  * 16-byte struct.  The active word in the struct is whichever has the aligned
     539                 :  * address; the other three words just sit at -1.
     540                 :  */
     541                 : #define HAS_TEST_AND_SET
     542                 : 
     543                 : typedef struct
     544                 : {
     545                 :     int         sema[4];
     546                 : } slock_t;
     547                 : 
     548                 : #define TAS_ACTIVE_WORD(lock)   ((volatile int *) (((uintptr_t) (lock) + 15) & ~15))
     549                 : 
     550                 : static __inline__ int
     551                 : tas(volatile slock_t *lock)
     552                 : {
     553                 :     volatile int *lockword = TAS_ACTIVE_WORD(lock);
     554                 :     int         lockval;
     555                 : 
     556                 :     /*
     557                 :      * The LDCWX instruction atomically clears the target word and
     558                 :      * returns the previous value.  Hence, if the instruction returns
     559                 :      * 0, someone else has already acquired the lock before we tested
     560                 :      * it (i.e., we have failed).
     561                 :      *
     562                 :      * Notice that this means that we actually clear the word to set
     563                 :      * the lock and set the word to clear the lock.  This is the
     564                 :      * opposite behavior from the SPARC LDSTUB instruction.  For some
     565                 :      * reason everything that H-P does is rather baroque...
     566                 :      *
     567                 :      * For details about the LDCWX instruction, see the "Precision
     568                 :      * Architecture and Instruction Reference Manual" (09740-90014 of June
     569                 :      * 1987), p. 5-38.
     570                 :      */
     571                 :     __asm__ __volatile__(
     572                 :         "  ldcwx   0(0,%2),%0  \n"
     573                 : :       "=r"(lockval), "+m"(*lockword)
     574                 : :       "r"(lockword)
     575                 : :       "memory");
     576                 :     return (lockval == 0);
     577                 : }
     578                 : 
     579                 : #define S_UNLOCK(lock)  \
     580                 :     do { \
     581                 :         __asm__ __volatile__("" : : : "memory"); \
     582                 :         *TAS_ACTIVE_WORD(lock) = -1; \
     583                 :     } while (0)
     584                 : 
     585                 : #define S_INIT_LOCK(lock) \
     586                 :     do { \
     587                 :         volatile slock_t *lock_ = (lock); \
     588                 :         lock_->sema[0] = -1; \
     589                 :         lock_->sema[1] = -1; \
     590                 :         lock_->sema[2] = -1; \
     591                 :         lock_->sema[3] = -1; \
     592                 :     } while (0)
     593                 : 
     594                 : #define S_LOCK_FREE(lock)   (*TAS_ACTIVE_WORD(lock) != 0)
     595                 : 
     596                 : #endif   /* __hppa || __hppa__ */
     597                 : 
     598                 : 
     599                 : /*
     600                 :  * If we have no platform-specific knowledge, but we found that the compiler
     601                 :  * provides __sync_lock_test_and_set(), use that.  Prefer the int-width
     602                 :  * version over the char-width version if we have both, on the rather dubious
     603                 :  * grounds that that's known to be more likely to work in the ARM ecosystem.
     604                 :  * (But we dealt with ARM above.)
     605                 :  */
     606                 : #if !defined(HAS_TEST_AND_SET)
     607                 : 
     608                 : #if defined(HAVE_GCC__SYNC_INT32_TAS)
     609                 : #define HAS_TEST_AND_SET
     610                 : 
     611                 : #define TAS(lock) tas(lock)
     612                 : 
     613                 : typedef int slock_t;
     614                 : 
     615                 : static __inline__ int
     616                 : tas(volatile slock_t *lock)
     617                 : {
     618                 :     return __sync_lock_test_and_set(lock, 1);
     619                 : }
     620                 : 
     621                 : #define S_UNLOCK(lock) __sync_lock_release(lock)
     622                 : 
     623                 : #elif defined(HAVE_GCC__SYNC_CHAR_TAS)
     624                 : #define HAS_TEST_AND_SET
     625                 : 
     626                 : #define TAS(lock) tas(lock)
     627                 : 
     628                 : typedef char slock_t;
     629                 : 
     630                 : static __inline__ int
     631                 : tas(volatile slock_t *lock)
     632                 : {
     633                 :     return __sync_lock_test_and_set(lock, 1);
     634                 : }
     635                 : 
     636                 : #define S_UNLOCK(lock) __sync_lock_release(lock)
     637                 : 
     638                 : #endif   /* HAVE_GCC__SYNC_INT32_TAS */
     639                 : 
     640                 : #endif  /* !defined(HAS_TEST_AND_SET) */
     641                 : 
     642                 : 
     643                 : /*
     644                 :  * Default implementation of S_UNLOCK() for gcc/icc.
     645                 :  *
     646                 :  * Note that this implementation is unsafe for any platform that can reorder
     647                 :  * a memory access (either load or store) after a following store.  That
     648                 :  * happens not to be possible on x86 and most legacy architectures (some are
     649                 :  * single-processor!), but many modern systems have weaker memory ordering.
     650                 :  * Those that do must define their own version of S_UNLOCK() rather than
     651                 :  * relying on this one.
     652                 :  */
     653                 : #if !defined(S_UNLOCK)
     654                 : #define S_UNLOCK(lock)  \
     655                 :     do { __asm__ __volatile__("" : : : "memory");  *(lock) = 0; } while (0)
     656                 : #endif
     657                 : 
     658                 : #endif  /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
     659                 : 
     660                 : 
     661                 : /*
     662                 :  * ---------------------------------------------------------------------
     663                 :  * Platforms that use non-gcc inline assembly:
     664                 :  * ---------------------------------------------------------------------
     665                 :  */
     666                 : 
     667                 : #if !defined(HAS_TEST_AND_SET)  /* We didn't trigger above, let's try here */
     668                 : 
     669                 : #if defined(_AIX)   /* AIX */
     670                 : /*
     671                 :  * AIX (POWER)
     672                 :  */
     673                 : #define HAS_TEST_AND_SET
     674                 : 
     675                 : #include <sys/atomic_op.h>
     676                 : 
     677                 : typedef int slock_t;
     678                 : 
     679                 : #define TAS(lock)           _check_lock((slock_t *) (lock), 0, 1)
     680                 : #define S_UNLOCK(lock)      _clear_lock((slock_t *) (lock), 0)
     681                 : #endif   /* _AIX */
     682                 : 
     683                 : 
     684                 : /* These are in sunstudio_(sparc|x86).s */
     685                 : 
     686                 : #if defined(__SUNPRO_C) && (defined(__i386) || defined(__x86_64__) || defined(__sparc__) || defined(__sparc))
     687                 : #define HAS_TEST_AND_SET
     688                 : 
     689                 : #if defined(__i386) || defined(__x86_64__) || defined(__sparcv9) || defined(__sparcv8plus)
     690                 : typedef unsigned int slock_t;
     691                 : #else
     692                 : typedef unsigned char slock_t;
     693                 : #endif
     694                 : 
     695                 : extern slock_t pg_atomic_cas(volatile slock_t *lock, slock_t with,
     696                 :                                       slock_t cmp);
     697                 : 
     698                 : #define TAS(a) (pg_atomic_cas((a), 1, 0) != 0)
     699                 : #endif
     700                 : 
     701                 : 
     702                 : #ifdef _MSC_VER
     703                 : typedef LONG slock_t;
     704                 : 
     705                 : #define HAS_TEST_AND_SET
     706                 : #define TAS(lock) (InterlockedCompareExchange(lock, 1, 0))
     707                 : 
     708                 : #define SPIN_DELAY() spin_delay()
     709                 : 
     710                 : /* If using Visual C++ on Win64, inline assembly is unavailable.
     711                 :  * Use a _mm_pause intrinsic instead of rep nop.
     712                 :  */
     713                 : #if defined(_WIN64)
     714                 : static __forceinline void
     715                 : spin_delay(void)
     716                 : {
     717                 :     _mm_pause();
     718                 : }
     719                 : #else
     720                 : static __forceinline void
     721                 : spin_delay(void)
     722                 : {
     723                 :     /* See comment for gcc code. Same code, MASM syntax */
     724                 :     __asm rep nop;
     725                 : }
     726                 : #endif
     727                 : 
     728                 : #include <intrin.h>
     729                 : #pragma intrinsic(_ReadWriteBarrier)
     730                 : 
     731                 : #define S_UNLOCK(lock)  \
     732                 :     do { _ReadWriteBarrier(); (*(lock)) = 0; } while (0)
     733                 : 
     734                 : #endif
     735                 : 
     736                 : 
     737                 : #endif  /* !defined(HAS_TEST_AND_SET) */
     738                 : 
     739                 : 
     740                 : /* Blow up if we didn't have any way to do spinlocks */
     741                 : #ifndef HAS_TEST_AND_SET
     742                 : #error PostgreSQL does not have native spinlock support on this platform.  To continue the compilation, rerun configure using --disable-spinlocks.  However, performance will be poor.  Please report this to pgsql-bugs@lists.postgresql.org.
     743                 : #endif
     744                 : 
     745                 : 
     746                 : #else   /* !HAVE_SPINLOCKS */
     747                 : 
     748                 : 
     749                 : /*
     750                 :  * Fake spinlock implementation using semaphores --- slow and prone
     751                 :  * to fall foul of kernel limits on number of semaphores, so don't use this
     752                 :  * unless you must!  The subroutines appear in spin.c.
     753                 :  */
     754                 : typedef int slock_t;
     755                 : 
     756                 : extern bool s_lock_free_sema(volatile slock_t *lock);
     757                 : extern void s_unlock_sema(volatile slock_t *lock);
     758                 : extern void s_init_lock_sema(volatile slock_t *lock, bool nested);
     759                 : extern int  tas_sema(volatile slock_t *lock);
     760                 : 
     761                 : #define S_LOCK_FREE(lock)   s_lock_free_sema(lock)
     762                 : #define S_UNLOCK(lock)   s_unlock_sema(lock)
     763                 : #define S_INIT_LOCK(lock)   s_init_lock_sema(lock, false)
     764                 : #define TAS(lock)   tas_sema(lock)
     765                 : 
     766                 : 
     767                 : #endif  /* HAVE_SPINLOCKS */
     768                 : 
     769                 : 
     770                 : /*
     771                 :  * Default Definitions - override these above as needed.
     772                 :  */
     773                 : 
     774                 : #if !defined(S_LOCK)
     775                 : #define S_LOCK(lock) \
     776                 :     (TAS(lock) ? s_lock((lock), __FILE__, __LINE__, __func__) : 0)
     777                 : #endif   /* S_LOCK */
     778                 : 
     779                 : #if !defined(S_LOCK_FREE)
     780                 : #define S_LOCK_FREE(lock)   (*(lock) == 0)
     781                 : #endif   /* S_LOCK_FREE */
     782                 : 
     783                 : #if !defined(S_UNLOCK)
     784                 : /*
     785                 :  * Our default implementation of S_UNLOCK is essentially *(lock) = 0.  This
     786                 :  * is unsafe if the platform can reorder a memory access (either load or
     787                 :  * store) after a following store; platforms where this is possible must
     788                 :  * define their own S_UNLOCK.  But CPU reordering is not the only concern:
     789                 :  * if we simply defined S_UNLOCK() as an inline macro, the compiler might
     790                 :  * reorder instructions from inside the critical section to occur after the
     791                 :  * lock release.  Since the compiler probably can't know what the external
     792                 :  * function s_unlock is doing, putting the same logic there should be adequate.
     793                 :  * A sufficiently-smart globally optimizing compiler could break that
     794                 :  * assumption, though, and the cost of a function call for every spinlock
     795                 :  * release may hurt performance significantly, so we use this implementation
     796                 :  * only for platforms where we don't know of a suitable intrinsic.  For the
     797                 :  * most part, those are relatively obscure platform/compiler combinations to
     798                 :  * which the PostgreSQL project does not have access.
     799                 :  */
     800                 : #define USE_DEFAULT_S_UNLOCK
     801                 : extern void s_unlock(volatile slock_t *lock);
     802                 : #define S_UNLOCK(lock)      s_unlock(lock)
     803                 : #endif   /* S_UNLOCK */
     804                 : 
     805                 : #if !defined(S_INIT_LOCK)
     806                 : #define S_INIT_LOCK(lock)   S_UNLOCK(lock)
     807                 : #endif   /* S_INIT_LOCK */
     808                 : 
     809                 : #if !defined(SPIN_DELAY)
     810                 : #define SPIN_DELAY()    ((void) 0)
     811                 : #endif   /* SPIN_DELAY */
     812                 : 
     813                 : #if !defined(TAS)
     814                 : extern int  tas(volatile slock_t *lock);        /* in port/.../tas.s, or
     815                 :                                                  * s_lock.c */
     816                 : 
     817                 : #define TAS(lock)       tas(lock)
     818                 : #endif   /* TAS */
     819                 : 
     820                 : #if !defined(TAS_SPIN)
     821                 : #define TAS_SPIN(lock)  TAS(lock)
     822                 : #endif   /* TAS_SPIN */
     823                 : 
     824                 : extern PGDLLIMPORT slock_t dummy_spinlock;
     825                 : 
     826                 : /*
     827                 :  * Platform-independent out-of-line support routines
     828                 :  */
     829                 : extern int s_lock(volatile slock_t *lock, const char *file, int line, const char *func);
     830                 : 
     831                 : /* Support for dynamic adjustment of spins_per_delay */
     832                 : #define DEFAULT_SPINS_PER_DELAY  100
     833                 : 
     834                 : extern void set_spins_per_delay(int shared_spins_per_delay);
     835                 : extern int  update_spins_per_delay(int shared_spins_per_delay);
     836                 : 
     837                 : /*
     838                 :  * Support for spin delay which is useful in various places where
     839                 :  * spinlock-like procedures take place.
     840                 :  */
     841                 : typedef struct
     842                 : {
     843                 :     int         spins;
     844                 :     int         delays;
     845                 :     int         cur_delay;
     846                 :     const char *file;
     847                 :     int         line;
     848                 :     const char *func;
     849                 : } SpinDelayStatus;
     850                 : 
     851                 : static inline void
     852 GIC    51788804 : init_spin_delay(SpinDelayStatus *status,
     853                 :                 const char *file, int line, const char *func)
     854                 : {
     855        51788804 :     status->spins = 0;
     856        51788804 :     status->delays = 0;
     857        51788804 :     status->cur_delay = 0;
     858        51788804 :     status->file = file;
     859        51788804 :     status->line = line;
     860        51788804 :     status->func = func;
     861        51788804 : }
     862                 : 
     863                 : #define init_local_spin_delay(status) init_spin_delay(status, __FILE__, __LINE__, __func__)
     864                 : extern void perform_spin_delay(SpinDelayStatus *status);
     865                 : extern void finish_spin_delay(SpinDelayStatus *status);
     866                 : 
     867                 : #endif   /* S_LOCK_H */
        

Generated by: LCOV version v1.16-55-g56c0a2a