Skip to content

Commit a849487

Browse files
author
Mikael Ronstrom
committed
WL#5138 merged to mysql-next-mr
2 parents ecb6228 + 018b63c commit a849487

File tree

19 files changed

+701
-366
lines changed

19 files changed

+701
-366
lines changed

include/Makefile.am

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ noinst_HEADERS = config-win.h config-netware.h my_bit.h \
3939
thr_lock.h t_ctype.h violite.h my_md5.h base64.h \
4040
my_handler.h my_time.h service_versions.h \
4141
my_vle.h my_user.h my_atomic.h atomic/nolock.h \
42-
atomic/rwlock.h atomic/x86-gcc.h atomic/x86-msvc.h \
42+
atomic/rwlock.h atomic/x86-gcc.h atomic/generic-msvc.h \
4343
atomic/solaris.h \
4444
atomic/gcc_builtins.h my_libwrap.h my_stacktrace.h
4545

include/atomic/gcc_builtins.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
#define make_atomic_add_body(S) \
2020
v= __sync_fetch_and_add(a, v);
21-
#define make_atomic_swap_body(S) \
21+
#define make_atomic_fas_body(S) \
2222
v= __sync_lock_test_and_set(a, v);
2323
#define make_atomic_cas_body(S) \
2424
int ## S sav; \
@@ -28,7 +28,10 @@
2828
#ifdef MY_ATOMIC_MODE_DUMMY
2929
#define make_atomic_load_body(S) ret= *a
3030
#define make_atomic_store_body(S) *a= v
31+
#define MY_ATOMIC_MODE "gcc-builtins-up"
32+
3133
#else
34+
#define MY_ATOMIC_MODE "gcc-builtins-smp"
3235
#define make_atomic_load_body(S) \
3336
ret= __sync_fetch_and_or(a, 0);
3437
#define make_atomic_store_body(S) \

include/atomic/generic-msvc.h

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
/* Copyright (C) 2006-2008 MySQL AB, 2008-2009 Sun Microsystems, Inc.
2+
3+
This program is free software; you can redistribute it and/or modify
4+
it under the terms of the GNU General Public License as published by
5+
the Free Software Foundation; version 2 of the License.
6+
7+
This program is distributed in the hope that it will be useful,
8+
but WITHOUT ANY WARRANTY; without even the implied warranty of
9+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10+
GNU General Public License for more details.
11+
12+
You should have received a copy of the GNU General Public License
13+
along with this program; if not, write to the Free Software
14+
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
15+
16+
#ifndef _atomic_h_cleanup_
17+
#define _atomic_h_cleanup_ "atomic/generic-msvc.h"
18+
19+
/*
20+
We don't implement anything specific for MY_ATOMIC_MODE_DUMMY, always use
21+
intrinsics.
22+
8 and 16-bit atomics are not implemented, but it can be done if necessary.
23+
*/
24+
#undef MY_ATOMIC_HAS_8_16
25+
26+
/*
27+
x86 compilers (both VS2003 or VS2005) never use instrinsics, but generate
28+
function calls to kernel32 instead, even in the optimized build.
29+
We force intrinsics as described in MSDN documentation for
30+
_InterlockedCompareExchange.
31+
*/
32+
#ifdef _M_IX86
33+
34+
#if (_MSC_VER >= 1500)
35+
#include <intrin.h>
36+
#else
37+
C_MODE_START
38+
/*Visual Studio 2003 and earlier do not have prototypes for atomic intrinsics*/
39+
LONG _InterlockedExchange (LONG volatile *Target,LONG Value);
40+
LONG _InterlockedCompareExchange (LONG volatile *Target, LONG Value, LONG Comp);
41+
LONG _InterlockedExchangeAdd (LONG volatile *Addend, LONG Value);
42+
C_MODE_END
43+
44+
#pragma intrinsic(_InterlockedExchangeAdd)
45+
#pragma intrinsic(_InterlockedCompareExchange)
46+
#pragma intrinsic(_InterlockedExchange)
47+
#endif
48+
49+
#define InterlockedExchange _InterlockedExchange
50+
#define InterlockedExchangeAdd _InterlockedExchangeAdd
51+
#define InterlockedCompareExchange _InterlockedCompareExchange
52+
/*
53+
No need to do something special for InterlockedCompareExchangePointer
54+
as it is a #define to InterlockedCompareExchange. The same applies to
55+
InterlockedExchangePointer.
56+
*/
57+
#endif /*_M_IX86*/
58+
59+
#define MY_ATOMIC_MODE "msvc-intrinsics"
60+
#define IL_EXCHG_ADD32(X,Y) InterlockedExchangeAdd((volatile LONG *)(X),(Y))
61+
#define IL_COMP_EXCHG32(X,Y,Z) InterlockedCompareExchange((volatile LONG *)(X),(Y),(Z))
62+
#define IL_COMP_EXCHGptr InterlockedCompareExchangePointer
63+
#define IL_EXCHG32(X,Y) InterlockedExchange((volatile LONG *)(X),(Y))
64+
#define IL_EXCHGptr InterlockedExchangePointer
65+
#define make_atomic_add_body(S) \
66+
v= IL_EXCHG_ADD ## S (a, v)
67+
#define make_atomic_cas_body(S) \
68+
int ## S initial_cmp= *cmp; \
69+
int ## S initial_a= IL_COMP_EXCHG ## S (a, set, initial_cmp); \
70+
if (!(ret= (initial_a == initial_cmp))) *cmp= initial_a;
71+
#define make_atomic_swap_body(S) \
72+
v= IL_EXCHG ## S (a, v)
73+
#define make_atomic_load_body(S) \
74+
ret= 0; /* avoid compiler warning */ \
75+
ret= IL_COMP_EXCHG ## S (a, ret, ret);
76+
77+
/*
78+
my_yield_processor (equivalent of x86 PAUSE instruction) should be used
79+
to improve performance on hyperthreaded CPUs. Intel recommends to use it in
80+
spin loops also on non-HT machines to reduce power consumption (see e.g
81+
http://softwarecommunity.intel.com/articles/eng/2004.htm)
82+
83+
Running benchmarks for spinlocks implemented with InterlockedCompareExchange
84+
and YieldProcessor shows that much better performance is achieved by calling
85+
YieldProcessor in a loop - that is, yielding longer. On Intel boxes setting
86+
loop count in the range 200-300 brought best results.
87+
*/
88+
#ifndef YIELD_LOOPS
89+
#define YIELD_LOOPS 200
90+
#endif
91+
92+
static __inline int my_yield_processor()
93+
{
94+
int i;
95+
for(i=0; i<YIELD_LOOPS; i++)
96+
{
97+
#if (_MSC_VER <= 1310)
98+
/* On older compilers YieldProcessor is not available, use inline assembly*/
99+
__asm { rep nop }
100+
#else
101+
YieldProcessor();
102+
#endif
103+
}
104+
return 1;
105+
}
106+
107+
#define LF_BACKOFF my_yield_processor()
108+
#else /* cleanup */
109+
110+
#undef IL_EXCHG_ADD32
111+
#undef IL_COMP_EXCHG32
112+
#undef IL_COMP_EXCHGptr
113+
#undef IL_EXCHG32
114+
#undef IL_EXCHGptr
115+
116+
#endif

include/atomic/nolock.h

Lines changed: 21 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -16,43 +16,36 @@
1616
along with this program; if not, write to the Free Software
1717
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
1818

19-
#if defined(__i386__) || defined(_M_IX86) || defined(HAVE_GCC_ATOMIC_BUILTINS)
20-
21-
#ifdef MY_ATOMIC_MODE_DUMMY
22-
# define LOCK ""
23-
#else
24-
# define LOCK "lock"
25-
#endif
26-
27-
#ifdef HAVE_GCC_ATOMIC_BUILTINS
28-
#include "gcc_builtins.h"
29-
#elif __GNUC__
30-
#include "x86-gcc.h"
31-
#elif defined(_MSC_VER)
32-
#include "x86-msvc.h"
33-
#endif
34-
19+
#if defined(__i386__) || defined(_MSC_VER) || defined(__x86_64__) \
20+
|| defined(HAVE_GCC_ATOMIC_BUILTINS)
21+
22+
# ifdef MY_ATOMIC_MODE_DUMMY
23+
# define LOCK_prefix ""
24+
# else
25+
# define LOCK_prefix "lock"
26+
# endif
27+
28+
# ifdef HAVE_GCC_ATOMIC_BUILTINS
29+
# include "gcc_builtins.h"
30+
# elif __GNUC__
31+
# include "x86-gcc.h"
32+
# elif defined(_MSC_VER)
33+
# include "generic-msvc.h"
34+
# endif
3535
#elif defined(HAVE_SOLARIS_ATOMIC)
36-
3736
#include "solaris.h"
38-
39-
#endif /* __i386__ || _M_IX86 || HAVE_GCC_ATOMIC_BUILTINS */
37+
#endif
4038

4139
#if defined(make_atomic_cas_body) || defined(MY_ATOMICS_MADE)
4240
/*
4341
* We have atomics that require no locking
4442
*/
4543
#define MY_ATOMIC_NOLOCK
46-
47-
#ifdef __SUNPRO_C
4844
/*
49-
* Sun Studio 12 (and likely earlier) does not accept a typedef struct {}
50-
*/
51-
typedef char my_atomic_rwlock_t;
52-
#else
53-
typedef struct { } my_atomic_rwlock_t;
54-
#endif
55-
45+
Type not used so minimal size (emptry struct has different size between C
46+
and C++, zero-length array is gcc-specific).
47+
*/
48+
typedef char my_atomic_rwlock_t __attribute__ ((unused));
5649
#define my_atomic_rwlock_destroy(name)
5750
#define my_atomic_rwlock_init(name)
5851
#define my_atomic_rwlock_rdlock(name)

include/atomic/rwlock.h

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616
along with this program; if not, write to the Free Software
1717
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
1818

19-
typedef struct {pthread_rwlock_t rw;} my_atomic_rwlock_t;
19+
typedef struct {pthread_mutex_t rw;} my_atomic_rwlock_t;
20+
#define MY_ATOMIC_MODE_RWLOCKS 1
2021

2122
#ifdef MY_ATOMIC_MODE_DUMMY
2223
/*
@@ -34,17 +35,25 @@ typedef struct {pthread_rwlock_t rw;} my_atomic_rwlock_t;
3435
#define my_atomic_rwlock_wrunlock(name)
3536
#define MY_ATOMIC_MODE "dummy (non-atomic)"
3637
#else
37-
#define my_atomic_rwlock_destroy(name) pthread_rwlock_destroy(& (name)->rw)
38-
#define my_atomic_rwlock_init(name) pthread_rwlock_init(& (name)->rw, 0)
39-
#define my_atomic_rwlock_rdlock(name) pthread_rwlock_rdlock(& (name)->rw)
40-
#define my_atomic_rwlock_wrlock(name) pthread_rwlock_wrlock(& (name)->rw)
41-
#define my_atomic_rwlock_rdunlock(name) pthread_rwlock_unlock(& (name)->rw)
42-
#define my_atomic_rwlock_wrunlock(name) pthread_rwlock_unlock(& (name)->rw)
43-
#define MY_ATOMIC_MODE "rwlocks"
38+
/*
39+
we're using read-write lock macros but map them to mutex locks, and they're
40+
faster. Still, having semantically rich API we can change the
41+
underlying implementation, if necessary.
42+
*/
43+
#define my_atomic_rwlock_destroy(name) pthread_mutex_destroy(& (name)->rw)
44+
#define my_atomic_rwlock_init(name) pthread_mutex_init(& (name)->rw, 0)
45+
#define my_atomic_rwlock_rdlock(name) pthread_mutex_lock(& (name)->rw)
46+
#define my_atomic_rwlock_wrlock(name) pthread_mutex_lock(& (name)->rw)
47+
#define my_atomic_rwlock_rdunlock(name) pthread_mutex_unlock(& (name)->rw)
48+
#define my_atomic_rwlock_wrunlock(name) pthread_mutex_unlock(& (name)->rw)
49+
#define MY_ATOMIC_MODE "mutex"
50+
#ifndef MY_ATOMIC_MODE_RWLOCKS
51+
#define MY_ATOMIC_MODE_RWLOCKS 1
52+
#endif
4453
#endif
4554

4655
#define make_atomic_add_body(S) int ## S sav; sav= *a; *a+= v; v=sav;
47-
#define make_atomic_swap_body(S) int ## S sav; sav= *a; *a= v; v=sav;
56+
#define make_atomic_fas_body(S) int ## S sav; sav= *a; *a= v; v=sav;
4857
#define make_atomic_cas_body(S) if ((ret= (*a == *cmp))) *a= set; else *cmp=*a;
4958
#define make_atomic_load_body(S) ret= *a;
5059
#define make_atomic_store_body(S) *a= v;

include/atomic/solaris.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -186,25 +186,25 @@ my_atomic_storeptr(void * volatile *a, void *v)
186186
/* ------------------------------------------------------------------------ */
187187

188188
STATIC_INLINE int8
189-
my_atomic_swap8(int8 volatile *a, int8 v)
189+
my_atomic_fas8(int8 volatile *a, int8 v)
190190
{
191191
return ((int8) atomic_swap_8((volatile uint8_t *)a, (uint8_t)v));
192192
}
193193

194194
STATIC_INLINE int16
195-
my_atomic_swap16(int16 volatile *a, int16 v)
195+
my_atomic_fas16(int16 volatile *a, int16 v)
196196
{
197197
return ((int16) atomic_swap_16((volatile uint16_t *)a, (uint16_t)v));
198198
}
199199

200200
STATIC_INLINE int32
201-
my_atomic_swap32(int32 volatile *a, int32 v)
201+
my_atomic_fas32(int32 volatile *a, int32 v)
202202
{
203203
return ((int32) atomic_swap_32((volatile uint32_t *)a, (uint32_t)v));
204204
}
205205

206206
STATIC_INLINE void *
207-
my_atomic_swapptr(void * volatile *a, void *v)
207+
my_atomic_fasptr(void * volatile *a, void *v)
208208
{
209209
return (atomic_swap_ptr(a, v));
210210
}

include/atomic/x86-gcc.h

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,18 @@
2222
architectures support double-word (128-bit) cas.
2323
*/
2424

25-
#ifdef MY_ATOMIC_NO_XADD
26-
#define MY_ATOMIC_MODE "gcc-x86" LOCK "-no-xadd"
25+
#ifdef __x86_64__
26+
# ifdef MY_ATOMIC_NO_XADD
27+
# define MY_ATOMIC_MODE "gcc-amd64" LOCK_prefix "-no-xadd"
28+
# else
29+
# define MY_ATOMIC_MODE "gcc-amd64" LOCK_prefix
30+
# endif
2731
#else
28-
#define MY_ATOMIC_MODE "gcc-x86" LOCK
32+
# ifdef MY_ATOMIC_NO_XADD
33+
# define MY_ATOMIC_MODE "gcc-x86" LOCK_prefix "-no-xadd"
34+
# else
35+
# define MY_ATOMIC_MODE "gcc-x86" LOCK_prefix
36+
# endif
2937
#endif
3038

3139
/* fix -ansi errors while maintaining readability */
@@ -34,29 +42,53 @@
3442
#endif
3543

3644
#ifndef MY_ATOMIC_NO_XADD
37-
#define make_atomic_add_body(S) \
38-
asm volatile (LOCK "; xadd %0, %1;" : "+r" (v) , "+m" (*a))
45+
#define make_atomic_add_body(S) make_atomic_add_body ## S
46+
#define make_atomic_cas_body(S) make_atomic_cas_body ## S
3947
#endif
40-
#define make_atomic_swap_body(S) \
41-
asm volatile ("; xchg %0, %1;" : "+q" (v) , "+m" (*a))
42-
#define make_atomic_cas_body(S) \
43-
asm volatile (LOCK "; cmpxchg %3, %0; setz %2;" \
48+
49+
#define make_atomic_add_body32 \
50+
asm volatile (LOCK_prefix "; xadd %0, %1;" : "+r" (v) , "+m" (*a))
51+
52+
#define make_atomic_cas_body32 \
53+
asm volatile (LOCK_prefix "; cmpxchg %3, %0; setz %2;" \
4454
: "+m" (*a), "+a" (*cmp), "=q" (ret): "r" (set))
4555

56+
#define make_atomic_cas_bodyptr make_atomic_cas_body32
57+
58+
#ifndef __x86_64__
59+
#define make_atomic_add_body64 make_atomic_add_body32
60+
#define make_atomic_cas_body64 make_atomic_cas_body32
61+
#else
62+
#define make_atomic_add_body64 \
63+
int64 tmp=*a; \
64+
while (!my_atomic_cas64(a, &tmp, tmp+v)); \
65+
v=tmp;
66+
#define make_atomic_cas_body64 \
67+
int32 ebx=(set & 0xFFFFFFFF), ecx=(set >> 32); \
68+
asm volatile (LOCK_prefix "; cmpxchg8b %0; setz %2;" \
69+
: "+m" (*a), "+A" (*cmp), "=q" (ret) \
70+
:"b" (ebx), "c" (ecx))
71+
#endif
72+
73+
#define make_atomic_fas_body(S) \
74+
asm volatile ("xchg %0, %1;" : "+r" (v) , "+m" (*a))
75+
4676
#ifdef MY_ATOMIC_MODE_DUMMY
4777
#define make_atomic_load_body(S) ret=*a
4878
#define make_atomic_store_body(S) *a=v
4979
#else
5080
/*
5181
Actually 32-bit reads/writes are always atomic on x86
52-
But we add LOCK here anyway to force memory barriers
82+
But we add LOCK_prefix here anyway to force memory barriers
5383
*/
5484
#define make_atomic_load_body(S) \
5585
ret=0; \
56-
asm volatile (LOCK "; cmpxchg %2, %0" \
86+
asm volatile (LOCK_prefix "; cmpxchg %2, %0" \
5787
: "+m" (*a), "+a" (ret): "r" (ret))
5888
#define make_atomic_store_body(S) \
59-
asm volatile ("; xchg %0, %1;" : "+m" (*a) : "r" (v))
89+
asm volatile ("; xchg %0, %1;" : "+m" (*a), "+r" (v))
6090
#endif
6191

92+
/* TODO test on intel whether the below helps. on AMD it makes no difference */
93+
//#define LF_BACKOFF ({asm volatile ("rep; nop"); 1; })
6294
#endif /* ATOMIC_X86_GCC_INCLUDED */

0 commit comments

Comments
 (0)