目录
环境
文档用途
详细信息
环境
系统平台:Linux x86-64 Red Hat Enterprise Linux 7
版本:14
文档用途
了解spinlock实现,作为pg最基础的锁,向上可实现LWlock
详细信息
1.依靠硬件实现的spinlock
有些机器内部有TAS指令(test-and-set),原子操作,根据这条指令可以实现spinlock,代码在s_lock.h和s_lock.c中。
1.1 内联汇编
详细可查看:https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html
一般形式:
asm asm-qualifiers (
AssemblerTemplate // 汇编指令模板
: OutputOperands // 输出操作数
: InputOperands // 输入操作数
: Clobbers // 破坏描述部分
: GotoLabels);
e.g.:
__asm__ __volatile__(
" lock \n"
" xchgb %0,%1 \n" // %0, %1代表占位符,由后边的操作数依次进行替换
:"+q"(_res), "+m"(*lock)
// "+"表示操作数是读写类型;"q"表示将输入变量放入eax,ebx,ecx,edx中的一个;"m"表示内存变量;"_res", "lock代表C语言变量。
: /* no inputs */:
"memory", "cc"); // 有些指令会改变条件码,因此要设置cc提示编译器,memory在后文中有解释。
1.2 spinlock相关定义
#define SpinLockInit(lock) S_INIT_LOCK(lock) // 初始化spinlock,状态为未上锁的状态,pg设置值为0
#define SpinLockAcquire(lock) S_LOCK(lock) // 请求锁
#define SpinLockRelease(lock) S_UNLOCK(lock) // 释放锁
#define SpinLockFree(lock) S_LOCK_FREE(lock) // 测试锁的状态 *(lock) == 0
#define S_INIT_LOCK(lock) S_UNLOCK(lock)
#define S_LOCK(lock) (TAS(lock) ? s_lock((lock), __FILE__, __LINE__, PG_FUNCNAME_MACRO) : 0)
#define S_UNLOCK(lock) do { __asm__ __volatile__("" : : : "memory"); *(lock) = 0; } while (0)
#define S_LOCK_FREE(lock) (*(lock) == 0)
lock为:
typedef unsigned char slock_t;
1.2.1 S_INIT_LOCK(lock) 和 S_UNLOCK(lock)
初始化和解锁都将lock的值置为0。
/*
__asm__ __volatile__ ("": : :"memory"); 是 compiler memory barrier
1.该条汇编语句能够在编译器级别上为了防止编译器优化产生的指令重排,执行此汇编语句之前所有的语句都已执行;
2.让寄存器失效,将值回写到内存中,即内存中的变量都是 up-to-date。
*/
#define S_UNLOCK(lock) do { __asm__ __volatile__("" : : : "memory"); *(lock) = 0; } while (0)
1.2.2 S_LOCK(lock)
获取锁时,TAS将锁的旧值返回,并将锁的值置为1,根据返回结果判断是否需要自旋。
/*
指令:
xchgb 指令相当于两个变量值内容进行交换: 参考:64-ia-32-architectures-software-developer-vol-2c-manual
temp = *lock
*lock = res
res = temp
lock作为指令前缀能够保证后面的指令原子操作。参考:64-ia-32-architectures-software-developer-vol-2a-manual
修饰符:
__inline__ : 作为建议并不强制编译器将函数定义代码替换函数调用的每个实例,一般常用的工具类函数并且比较短小需要此修饰符。
volatile : 防止编译器对volatile修饰的变量做任何优化,读取该变量的值时总是从内存读取,而不是cache或者register。
register : 作为建议将register修饰的变量存放到寄存器中。
__volatile__: 避免编译器对其中的汇编语句做任何形式的优化,因为某些情况下的优化不但会改变指令顺序,甚至会忽略诊断汇编代码。
*/
#define S_LOCK(lock) (TAS(lock) ? s_lock((lock), __FILE__, __LINE__, PG_FUNCNAME_MACRO) : 0)
#define TAS(lock) tas(lock)
#define TAS_SPIN(lock) (*(lock) ? 1 : TAS(lock))
static __inline__ int tas(volatile slock_t *lock)
{
register slock_t _res = 1;
__asm__ __volatile__(
" lock \n"
" xchgb %0,%1 \n"
:"+q"(_res), "+m"(*lock)
: /* no inputs */:
"memory", "cc");
return (int) _res;
}
/*
/src/backend/storage/lmgr/s_lock.c
platform-independent portion of waiting for a spinlock.
*/
int s_lock(volatile slock_t *lock, const char *file, int line, const char *func)
{
SpinDelayStatus delayStatus;
init_spin_delay(&delayStatus, file, line, func);
// 判断锁的状态,如果*(lock)为1,表示当前其他线程持锁,请求该锁的线程自旋等待,延时函数为perform_spin_delay; 如果*(lock)为0 ,表示可持锁,通过TAS原子操作将*(lock)置1,并返回0值。
while (TAS_SPIN(lock))
{
perform_spin_delay(&delayStatus);
}
finish_spin_delay(&delayStatus);
return delayStatus.delays;
}
/*
src/include/storage/s_lock.h
*/
typedef struct
{
int spins;
int delays;
int cur_delay;
const char *file;
int line;
const char *func;
} SpinDelayStatus;
/*
src/include/storage/s_lock.h
*/
static inline void init_spin_delay(SpinDelayStatus *status, const char *file, int line, const char *func)
{
status->spins = 0;
status->delays = 0;
status->cur_delay = 0;
status->file = file;
status->line = line;
status->func = func;
}
1.2.3 延时等待函数
首先调用CPU体系结构相关的指令,这部分可参考常见的CPU手册,手册建议是在循环中添加这条指令,提高性能。
如果status->spins即循环次数超过了全局变量spins_per_delay(预设为100),先判断status->dalays(这个变量是一直累加的,可以用于统计,status->spins则是会在睡眠过后清0)是不是超过了NUM_DELAYS,如果是直接报错就行。否则该进程主动进入睡眠,睡眠时长由cur_delay指定,并且初次进入睡眠时该值为MIN_DELAY_USEC,之后则通过一个随机函数进行调整。
/*
* Wait while spinning on a contended spinlock.
*/
void perform_spin_delay(SpinDelayStatus *status)
{
/* CPU-specific delay each time through the loop */
SPIN_DELAY();
/*
Block the process every spins_per_delay tries:
static int spins_per_delay = DEFAULT_SPINS_PER_DELAY;
Support for dynamic adjustment of spins_per_delay:
#define DEFAULT_SPINS_PER_DELAY 100
*/
if (++(status->spins) >= spins_per_delay)
{
#define NUM_DELAYS 1000
if (++(status->delays) > NUM_DELAYS)
s_lock_stuck(status->file, status->line, status->func);
#define MIN_DELAY_USEC 1000L
if (status->cur_delay == 0) /* first time to delay? */
status->cur_delay = MIN_DELAY_USEC;
/*
* Once we start sleeping, the overhead of reporting a wait event is
* justified. Actively spinning easily stands out in profilers, but
* sleeping with an exponential backoff is harder to spot...
*
* We might want to report something more granular at some point, but
* this is better than nothing.
*/
// 和pgstat_report_wait_end一起使用,配pg_stat_activity视图进程状态
pgstat_report_wait_start(WAIT_EVENT_SPIN_DELAY);
// https://github.com/postgres/postgres/commit/a948e49e2ef11815be0b211723bfc5b53b7f75a8
//原先用select实现,因为兼容问题现在用nanosleep实现,status->cur_delay代表microsecond, 代表进程睡眠时长。
pg_usleep(status->cur_delay);
pgstat_report_wait_end();
// debug使用,无所谓
#if defined(S_LOCK_TEST)
fprintf(stdout, "*");
fflush(stdout);
#endif
/* increase delay by a random fraction between 1X and 2X */
/* pg_prng_double(): select a random double uniformly from the range [0.0, 1.0] */
status->cur_delay += (int) (status->cur_delay * pg_prng_double(&pg_global_prng_state) + 0.5);
/* wrap back to minimum delay when max is exceeded */
#define MIN_DELAY_USEC 1000L
#define MAX_DELAY_USEC 1000000L
if (status->cur_delay > MAX_DELAY_USEC)
status->cur_delay = MIN_DELAY_USEC;
status->spins = 0;
}
}
参考:64-ia-32-architectures-software-developer-vol-2b-manual,
/*
"Improves the performance of spin-wait loops. When executing a “spin-wait loop,” processors will suffer a severe performance penalty when exiting the loop because it detects a possible memory order violation. The PAUSE instruction provides a hint to the processor that the code sequence is a spin-wait loop. The processor uses this hint to avoid the memory order violation in most situations, which greatly improves processor performance. For this reason, it is recommended that a PAUSE instruction be placed in all spin-wait loops."
*/
#define SPIN_DELAY() spin_delay()
static __inline__ void spin_delay(void)
{
/*
* This sequence is equivalent to the PAUSE instruction ("rep" is
* ignored by old IA32 processors if the following instruction is
* not a string operation);
*/
__asm__ __volatile__(
" rep; nop \n");
}
/*
* s_lock_stuck() - complain about a stuck spinlock
*/
static void s_lock_stuck(const char *file, int line, const char *func)
{
if (!func)
func = "(unknown)";
#if defined(S_LOCK_TEST)
fprintf(stderr, "\nStuck spinlock detected at %s, %s:%d.\n",func, file, line);
exit(1);
#else
elog(PANIC, "stuck spinlock detected at %s, %s:%d",func, file, line);
#endif
}