536 lines
16 KiB
536 lines
16 KiB
#ifndef Py_ATOMIC_H
|
|
#define Py_ATOMIC_H
|
|
#ifdef Py_BUILD_CORE
|
|
|
|
#include "dynamic_annotations.h"
|
|
|
|
#include "pyconfig.h"
|
|
|
|
#if defined(HAVE_STD_ATOMIC)
|
|
#include <stdatomic.h>
|
|
#endif
|
|
|
|
|
|
#if defined(_MSC_VER)
|
|
#include <intrin.h>
|
|
#include <immintrin.h>
|
|
#endif
|
|
|
|
/* This is modeled after the atomics interface from C1x, according to
|
|
* the draft at
|
|
* http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf.
|
|
* Operations and types are named the same except with a _Py_ prefix
|
|
* and have the same semantics.
|
|
*
|
|
* Beware, the implementations here are deep magic.
|
|
*/
|
|
|
|
#if defined(HAVE_STD_ATOMIC)
|
|
|
|
typedef enum _Py_memory_order {
|
|
_Py_memory_order_relaxed = memory_order_relaxed,
|
|
_Py_memory_order_acquire = memory_order_acquire,
|
|
_Py_memory_order_release = memory_order_release,
|
|
_Py_memory_order_acq_rel = memory_order_acq_rel,
|
|
_Py_memory_order_seq_cst = memory_order_seq_cst
|
|
} _Py_memory_order;
|
|
|
|
typedef struct _Py_atomic_address {
|
|
atomic_uintptr_t _value;
|
|
} _Py_atomic_address;
|
|
|
|
typedef struct _Py_atomic_int {
|
|
atomic_int _value;
|
|
} _Py_atomic_int;
|
|
|
|
#define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
|
|
atomic_signal_fence(ORDER)
|
|
|
|
#define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
|
|
atomic_thread_fence(ORDER)
|
|
|
|
#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
|
|
atomic_store_explicit(&(ATOMIC_VAL)->_value, NEW_VAL, ORDER)
|
|
|
|
#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
|
|
atomic_load_explicit(&(ATOMIC_VAL)->_value, ORDER)
|
|
|
|
/* Use builtin atomic operations in GCC >= 4.7 */
|
|
#elif defined(HAVE_BUILTIN_ATOMIC)
|
|
|
|
typedef enum _Py_memory_order {
|
|
_Py_memory_order_relaxed = __ATOMIC_RELAXED,
|
|
_Py_memory_order_acquire = __ATOMIC_ACQUIRE,
|
|
_Py_memory_order_release = __ATOMIC_RELEASE,
|
|
_Py_memory_order_acq_rel = __ATOMIC_ACQ_REL,
|
|
_Py_memory_order_seq_cst = __ATOMIC_SEQ_CST
|
|
} _Py_memory_order;
|
|
|
|
typedef struct _Py_atomic_address {
|
|
uintptr_t _value;
|
|
} _Py_atomic_address;
|
|
|
|
typedef struct _Py_atomic_int {
|
|
int _value;
|
|
} _Py_atomic_int;
|
|
|
|
#define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
|
|
__atomic_signal_fence(ORDER)
|
|
|
|
#define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
|
|
__atomic_thread_fence(ORDER)
|
|
|
|
#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
|
|
(assert((ORDER) == __ATOMIC_RELAXED \
|
|
|| (ORDER) == __ATOMIC_SEQ_CST \
|
|
|| (ORDER) == __ATOMIC_RELEASE), \
|
|
__atomic_store_n(&(ATOMIC_VAL)->_value, NEW_VAL, ORDER))
|
|
|
|
#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
|
|
(assert((ORDER) == __ATOMIC_RELAXED \
|
|
|| (ORDER) == __ATOMIC_SEQ_CST \
|
|
|| (ORDER) == __ATOMIC_ACQUIRE \
|
|
|| (ORDER) == __ATOMIC_CONSUME), \
|
|
__atomic_load_n(&(ATOMIC_VAL)->_value, ORDER))
|
|
|
|
/* Only support GCC (for expression statements) and x86 (for simple
|
|
* atomic semantics) and MSVC x86/x64/ARM */
|
|
#elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64))
|
|
typedef enum _Py_memory_order {
|
|
_Py_memory_order_relaxed,
|
|
_Py_memory_order_acquire,
|
|
_Py_memory_order_release,
|
|
_Py_memory_order_acq_rel,
|
|
_Py_memory_order_seq_cst
|
|
} _Py_memory_order;
|
|
|
|
typedef struct _Py_atomic_address {
|
|
uintptr_t _value;
|
|
} _Py_atomic_address;
|
|
|
|
typedef struct _Py_atomic_int {
|
|
int _value;
|
|
} _Py_atomic_int;
|
|
|
|
|
|
static __inline__ void
|
|
_Py_atomic_signal_fence(_Py_memory_order order)
|
|
{
|
|
if (order != _Py_memory_order_relaxed)
|
|
__asm__ volatile("":::"memory");
|
|
}
|
|
|
|
static __inline__ void
|
|
_Py_atomic_thread_fence(_Py_memory_order order)
|
|
{
|
|
if (order != _Py_memory_order_relaxed)
|
|
__asm__ volatile("mfence":::"memory");
|
|
}
|
|
|
|
/* Tell the race checker about this operation's effects. */
|
|
static __inline__ void
|
|
_Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
|
|
{
|
|
(void)address; /* shut up -Wunused-parameter */
|
|
switch(order) {
|
|
case _Py_memory_order_release:
|
|
case _Py_memory_order_acq_rel:
|
|
case _Py_memory_order_seq_cst:
|
|
_Py_ANNOTATE_HAPPENS_BEFORE(address);
|
|
break;
|
|
case _Py_memory_order_relaxed:
|
|
case _Py_memory_order_acquire:
|
|
break;
|
|
}
|
|
switch(order) {
|
|
case _Py_memory_order_acquire:
|
|
case _Py_memory_order_acq_rel:
|
|
case _Py_memory_order_seq_cst:
|
|
_Py_ANNOTATE_HAPPENS_AFTER(address);
|
|
break;
|
|
case _Py_memory_order_relaxed:
|
|
case _Py_memory_order_release:
|
|
break;
|
|
}
|
|
}
|
|
|
|
#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
|
|
__extension__ ({ \
|
|
__typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
|
|
__typeof__(atomic_val->_value) new_val = NEW_VAL;\
|
|
volatile __typeof__(new_val) *volatile_data = &atomic_val->_value; \
|
|
_Py_memory_order order = ORDER; \
|
|
_Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
|
|
\
|
|
/* Perform the operation. */ \
|
|
_Py_ANNOTATE_IGNORE_WRITES_BEGIN(); \
|
|
switch(order) { \
|
|
case _Py_memory_order_release: \
|
|
_Py_atomic_signal_fence(_Py_memory_order_release); \
|
|
/* fallthrough */ \
|
|
case _Py_memory_order_relaxed: \
|
|
*volatile_data = new_val; \
|
|
break; \
|
|
\
|
|
case _Py_memory_order_acquire: \
|
|
case _Py_memory_order_acq_rel: \
|
|
case _Py_memory_order_seq_cst: \
|
|
__asm__ volatile("xchg %0, %1" \
|
|
: "+r"(new_val) \
|
|
: "m"(atomic_val->_value) \
|
|
: "memory"); \
|
|
break; \
|
|
} \
|
|
_Py_ANNOTATE_IGNORE_WRITES_END(); \
|
|
})
|
|
|
|
#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
|
|
__extension__ ({ \
|
|
__typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
|
|
__typeof__(atomic_val->_value) result; \
|
|
volatile __typeof__(result) *volatile_data = &atomic_val->_value; \
|
|
_Py_memory_order order = ORDER; \
|
|
_Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
|
|
\
|
|
/* Perform the operation. */ \
|
|
_Py_ANNOTATE_IGNORE_READS_BEGIN(); \
|
|
switch(order) { \
|
|
case _Py_memory_order_release: \
|
|
case _Py_memory_order_acq_rel: \
|
|
case _Py_memory_order_seq_cst: \
|
|
/* Loads on x86 are not releases by default, so need a */ \
|
|
/* thread fence. */ \
|
|
_Py_atomic_thread_fence(_Py_memory_order_release); \
|
|
break; \
|
|
default: \
|
|
/* No fence */ \
|
|
break; \
|
|
} \
|
|
result = *volatile_data; \
|
|
switch(order) { \
|
|
case _Py_memory_order_acquire: \
|
|
case _Py_memory_order_acq_rel: \
|
|
case _Py_memory_order_seq_cst: \
|
|
/* Loads on x86 are automatically acquire operations so */ \
|
|
/* can get by with just a compiler fence. */ \
|
|
_Py_atomic_signal_fence(_Py_memory_order_acquire); \
|
|
break; \
|
|
default: \
|
|
/* No fence */ \
|
|
break; \
|
|
} \
|
|
_Py_ANNOTATE_IGNORE_READS_END(); \
|
|
result; \
|
|
})
|
|
|
|
#elif defined(_MSC_VER)
|
|
/* _Interlocked* functions provide a full memory barrier and are therefore
|
|
enough for acq_rel and seq_cst. If the HLE variants aren't available
|
|
in hardware they will fall back to a full memory barrier as well.
|
|
|
|
This might affect performance but likely only in some very specific and
|
|
hard to meassure scenario.
|
|
*/
|
|
#if defined(_M_IX86) || defined(_M_X64)
|
|
typedef enum _Py_memory_order {
|
|
_Py_memory_order_relaxed,
|
|
_Py_memory_order_acquire,
|
|
_Py_memory_order_release,
|
|
_Py_memory_order_acq_rel,
|
|
_Py_memory_order_seq_cst
|
|
} _Py_memory_order;
|
|
|
|
typedef struct _Py_atomic_address {
|
|
volatile uintptr_t _value;
|
|
} _Py_atomic_address;
|
|
|
|
typedef struct _Py_atomic_int {
|
|
volatile int _value;
|
|
} _Py_atomic_int;
|
|
|
|
|
|
#if defined(_M_X64)
|
|
#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
|
|
switch (ORDER) { \
|
|
case _Py_memory_order_acquire: \
|
|
_InterlockedExchange64_HLEAcquire((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
|
|
break; \
|
|
case _Py_memory_order_release: \
|
|
_InterlockedExchange64_HLERelease((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
|
|
break; \
|
|
default: \
|
|
_InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
|
|
break; \
|
|
}
|
|
#else
|
|
#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
|
|
#endif
|
|
|
|
#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
|
|
switch (ORDER) { \
|
|
case _Py_memory_order_acquire: \
|
|
_InterlockedExchange_HLEAcquire((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
|
|
break; \
|
|
case _Py_memory_order_release: \
|
|
_InterlockedExchange_HLERelease((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
|
|
break; \
|
|
default: \
|
|
_InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
|
|
break; \
|
|
}
|
|
|
|
#if defined(_M_X64)
|
|
/* This has to be an intptr_t for now.
|
|
gil_created() uses -1 as a sentinel value, if this returns
|
|
a uintptr_t it will do an unsigned compare and crash
|
|
*/
|
|
inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) {
|
|
__int64 old;
|
|
switch (order) {
|
|
case _Py_memory_order_acquire:
|
|
{
|
|
do {
|
|
old = *value;
|
|
} while(_InterlockedCompareExchange64_HLEAcquire((volatile __int64*)value, old, old) != old);
|
|
break;
|
|
}
|
|
case _Py_memory_order_release:
|
|
{
|
|
do {
|
|
old = *value;
|
|
} while(_InterlockedCompareExchange64_HLERelease((volatile __int64*)value, old, old) != old);
|
|
break;
|
|
}
|
|
case _Py_memory_order_relaxed:
|
|
old = *value;
|
|
break;
|
|
default:
|
|
{
|
|
do {
|
|
old = *value;
|
|
} while(_InterlockedCompareExchange64((volatile __int64*)value, old, old) != old);
|
|
break;
|
|
}
|
|
}
|
|
return old;
|
|
}
|
|
|
|
#else
|
|
#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL
|
|
#endif
|
|
|
|
inline int _Py_atomic_load_32bit(volatile int* value, int order) {
|
|
long old;
|
|
switch (order) {
|
|
case _Py_memory_order_acquire:
|
|
{
|
|
do {
|
|
old = *value;
|
|
} while(_InterlockedCompareExchange_HLEAcquire((volatile long*)value, old, old) != old);
|
|
break;
|
|
}
|
|
case _Py_memory_order_release:
|
|
{
|
|
do {
|
|
old = *value;
|
|
} while(_InterlockedCompareExchange_HLERelease((volatile long*)value, old, old) != old);
|
|
break;
|
|
}
|
|
case _Py_memory_order_relaxed:
|
|
old = *value;
|
|
break;
|
|
default:
|
|
{
|
|
do {
|
|
old = *value;
|
|
} while(_InterlockedCompareExchange((volatile long*)value, old, old) != old);
|
|
break;
|
|
}
|
|
}
|
|
return old;
|
|
}
|
|
|
|
#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
|
|
if (sizeof(*ATOMIC_VAL._value) == 8) { \
|
|
_Py_atomic_store_64bit((volatile long long*)ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \
|
|
_Py_atomic_store_32bit((volatile long*)ATOMIC_VAL._value, NEW_VAL, ORDER) }
|
|
|
|
#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
|
|
( \
|
|
sizeof(*(ATOMIC_VAL._value)) == 8 ? \
|
|
_Py_atomic_load_64bit((volatile long long*)ATOMIC_VAL._value, ORDER) : \
|
|
_Py_atomic_load_32bit((volatile long*)ATOMIC_VAL._value, ORDER) \
|
|
)
|
|
#elif defined(_M_ARM) || defined(_M_ARM64)
|
|
typedef enum _Py_memory_order {
|
|
_Py_memory_order_relaxed,
|
|
_Py_memory_order_acquire,
|
|
_Py_memory_order_release,
|
|
_Py_memory_order_acq_rel,
|
|
_Py_memory_order_seq_cst
|
|
} _Py_memory_order;
|
|
|
|
typedef struct _Py_atomic_address {
|
|
volatile uintptr_t _value;
|
|
} _Py_atomic_address;
|
|
|
|
typedef struct _Py_atomic_int {
|
|
volatile int _value;
|
|
} _Py_atomic_int;
|
|
|
|
|
|
#if defined(_M_ARM64)
|
|
#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
|
|
switch (ORDER) { \
|
|
case _Py_memory_order_acquire: \
|
|
_InterlockedExchange64_acq((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
|
|
break; \
|
|
case _Py_memory_order_release: \
|
|
_InterlockedExchange64_rel((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
|
|
break; \
|
|
default: \
|
|
_InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
|
|
break; \
|
|
}
|
|
#else
|
|
#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
|
|
#endif
|
|
|
|
#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
|
|
switch (ORDER) { \
|
|
case _Py_memory_order_acquire: \
|
|
_InterlockedExchange_acq((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
|
|
break; \
|
|
case _Py_memory_order_release: \
|
|
_InterlockedExchange_rel((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
|
|
break; \
|
|
default: \
|
|
_InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
|
|
break; \
|
|
}
|
|
|
|
#if defined(_M_ARM64)
|
|
/* This has to be an intptr_t for now.
|
|
gil_created() uses -1 as a sentinel value, if this returns
|
|
a uintptr_t it will do an unsigned compare and crash
|
|
*/
|
|
inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) {
|
|
uintptr_t old;
|
|
switch (order) {
|
|
case _Py_memory_order_acquire:
|
|
{
|
|
do {
|
|
old = *value;
|
|
} while(_InterlockedCompareExchange64_acq(value, old, old) != old);
|
|
break;
|
|
}
|
|
case _Py_memory_order_release:
|
|
{
|
|
do {
|
|
old = *value;
|
|
} while(_InterlockedCompareExchange64_rel(value, old, old) != old);
|
|
break;
|
|
}
|
|
case _Py_memory_order_relaxed:
|
|
old = *value;
|
|
break;
|
|
default:
|
|
{
|
|
do {
|
|
old = *value;
|
|
} while(_InterlockedCompareExchange64(value, old, old) != old);
|
|
break;
|
|
}
|
|
}
|
|
return old;
|
|
}
|
|
|
|
#else
|
|
#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL
|
|
#endif
|
|
|
|
inline int _Py_atomic_load_32bit(volatile int* value, int order) {
|
|
int old;
|
|
switch (order) {
|
|
case _Py_memory_order_acquire:
|
|
{
|
|
do {
|
|
old = *value;
|
|
} while(_InterlockedCompareExchange_acq(value, old, old) != old);
|
|
break;
|
|
}
|
|
case _Py_memory_order_release:
|
|
{
|
|
do {
|
|
old = *value;
|
|
} while(_InterlockedCompareExchange_rel(value, old, old) != old);
|
|
break;
|
|
}
|
|
case _Py_memory_order_relaxed:
|
|
old = *value;
|
|
break;
|
|
default:
|
|
{
|
|
do {
|
|
old = *value;
|
|
} while(_InterlockedCompareExchange(value, old, old) != old);
|
|
break;
|
|
}
|
|
}
|
|
return old;
|
|
}
|
|
|
|
#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
|
|
if (sizeof(*ATOMIC_VAL._value) == 8) { \
|
|
_Py_atomic_store_64bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \
|
|
_Py_atomic_store_32bit(ATOMIC_VAL._value, NEW_VAL, ORDER) }
|
|
|
|
#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
|
|
( \
|
|
sizeof(*(ATOMIC_VAL._value)) == 8 ? \
|
|
_Py_atomic_load_64bit(ATOMIC_VAL._value, ORDER) : \
|
|
_Py_atomic_load_32bit(ATOMIC_VAL._value, ORDER) \
|
|
)
|
|
#endif
|
|
#else /* !gcc x86 !_msc_ver */
|
|
typedef enum _Py_memory_order {
|
|
_Py_memory_order_relaxed,
|
|
_Py_memory_order_acquire,
|
|
_Py_memory_order_release,
|
|
_Py_memory_order_acq_rel,
|
|
_Py_memory_order_seq_cst
|
|
} _Py_memory_order;
|
|
|
|
typedef struct _Py_atomic_address {
|
|
uintptr_t _value;
|
|
} _Py_atomic_address;
|
|
|
|
typedef struct _Py_atomic_int {
|
|
int _value;
|
|
} _Py_atomic_int;
|
|
/* Fall back to other compilers and processors by assuming that simple
|
|
volatile accesses are atomic. This is false, so people should port
|
|
this. */
|
|
#define _Py_atomic_signal_fence(/*memory_order*/ ORDER) ((void)0)
|
|
#define _Py_atomic_thread_fence(/*memory_order*/ ORDER) ((void)0)
|
|
#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
|
|
((ATOMIC_VAL)->_value = NEW_VAL)
|
|
#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
|
|
((ATOMIC_VAL)->_value)
|
|
#endif
|
|
|
|
/* Standardized shortcuts. */
|
|
#define _Py_atomic_store(ATOMIC_VAL, NEW_VAL) \
|
|
_Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_seq_cst)
|
|
#define _Py_atomic_load(ATOMIC_VAL) \
|
|
_Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_seq_cst)
|
|
|
|
/* Python-local extensions */
|
|
|
|
#define _Py_atomic_store_relaxed(ATOMIC_VAL, NEW_VAL) \
|
|
_Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_relaxed)
|
|
#define _Py_atomic_load_relaxed(ATOMIC_VAL) \
|
|
_Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_relaxed)
|
|
#endif /* Py_BUILD_CORE */
|
|
#endif /* Py_ATOMIC_H */
|