阻塞就是睡眠,可以通过等待队列方式实现,唤醒跟睡眠是反向关系。
1. 阻塞概念:
阻塞操作:指执行设备操作时若不能获得资源,则挂起(即休眠),直到满足可操作的条件后再进行操作。
可以提高CPU 的工作效率。
Applications typically expect to block, when a request cannot satisfied immediately.
Some examples include:
--- Call read() when there is current no data available.
--- Call write() when a device cannot yet accept data.
--- Call open() when a device which takes a little bit of time to initialize.
In these cases, drivers should be able to block the application.
--- Process will sleep. until the operation can yield results.
2. 等待队列---wait queue
linux驱动程序中,使用等待队列来实现阻塞进程, A wait_queue is a common way to implement blocking behavior.
a wait queue is a kernel construct which enable a process to sleeep until it is woken up by another thread.
wait queue enable a process to continue sleeping, until a particular condition is met.
Wait queues provide the following interface for blocking process.
2.1 Statically declare and initialize a wait queue.
DECLARE_WAIT_QUEUE_HEAD(name); //初始化一个名为 name 的等待队列头,这个等待队列头指向一个等待队列任务链表 task_list 。
#define DECLARE_WAIT_QUEUE_HEAD(name) \
wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
#define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \
.lock = __SPIN_LOCK_UNLOCKED(name.lock), \
.task_list = { &(name).task_list, &(name).task_list } } //task_list --- 任务链表
DECLARE_WAITQUEUE(name, tsk); //tsk 应该是对应被阻塞进程的 task 结构
#define DECLARE_WAITQUEUE(name, tsk) \
wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk)
#define __WAITQUEUE_INITIALIZER(name, tsk) { \
.private = tsk, \
.func = default_wake_function, \
.task_list = { NULL, NULL } }
2.2 Initialize a dynamically allocated wait queue:
wait_queue_head_t my_queue; //定义 等待队列头,
注意这是一个链表,因为镶嵌了 list_head 结构。struct __wait_queue_head {
spinlock_t lock; //内容有一把自旋锁
struct list_head task_list; //任务链表
};
typedef struct __wait_queue_head wait_queue_head_t; //wait_queue_head_t 等待队列头定义,指向一个等待队列
init_waitqueue_head(&my_queue); //初始化名为 name 的等待队列链头,这个等待队列头指向一个等待队列任务链表
#define init_waitqueue_head(q) \
do { \
static struct lock_class_key __key; \
\
__init_waitqueue_head((q), &__key); \
} while (0)
void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *key)
{
spin_lock_init(&q->lock); //上自旋锁
lockdep_set_class(&q->lock, key); //设置锁的等级深度
INIT_LIST_HEAD(&q->task_list); //初始化任务链表
}
2.3 添加和删除等待队列:
void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait); //将等待队列 wait 添加到等待队列头q指向的等待队列链表中。
void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
{
unsigned long flags;
wait->flags &= ~WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
__add_wait_queue(q, wait); //添加等待队列 wait 到 q 指向的对待队列任务链表 task_list.
spin_unlock_irqrestore(&q->lock, flags);
}
EXPORT_SYMBOL(add_wait_queue);
static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
{
list_add(&new->task_list, &head->task_list);
}
void fastcall remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait); //将等待队列 wait 从等待队列头q指向的等待队列链表中移除。
void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
{
unsigned long flags;
spin_lock_irqsave(&q->lock, flags);
__remove_wait_queue(q, wait); //从q指向的等待队列链表中移除wait项
spin_unlock_irqrestore(&q->lock, flags);
}
EXPORT_SYMBOL(remove_wait_queue);
static inline void __remove_wait_queue(wait_queue_head_t *head,
wait_queue_t *old)
{
list_del(&old->task_list); //删除任务链表
}
3. 等待事件:
3.1 wait_event(queue, condition);
等待一个参数为queue的等待队列头被唤醒,第二个参数condition必须满足,否则阻塞。
wait until a condition is true, Once sleep, the process cannot be interrupted, You should consider using an interruptible(or killable at least) version instead.
/**
* wait_event - sleep until a condition gets true
* @wq: the waitqueue to wait on
* @condition: a C expression for the event to wait for
*
* The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
* @condition evaluates to true. The @condition is checked each time
* the waitqueue @wq is woken up.
*
* wake_up() has to be called after changing any variable that could
* change the result of the wait condition.
*/
#define wait_event(wq, condition) \
do { \
if (condition) \
break; \
__wait_event(wq, condition); \
} while (0)
#define __wait_event(wq, condition) \
do { \
DEFINE_WAIT(__wait); \
\
for (;;) { \
prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE); \ //这个在waite_event_interruptible()中是TASK_INTERRUPTIBLE.
if (condition) \
break; \
schedule(); \
} \
finish_wait(&wq, &__wait); \
} while (0)
DEFINE_WAIT()定义:
#define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function)
#define DEFINE_WAIT_FUNC(name, function) \
wait_queue_t name = { \
.private = current, \
.func = function, \
.task_list = LIST_HEAD_INIT((name).task_list), \
}
prepare_to_wait()定义:
/*
* Note: we use "set_current_state()" _after_ the wait-queue add,
* because we need a memory barrier there on SMP, so that any
* wake-function that tests for the wait-queue being active
* will be guaranteed to see waitqueue addition _or_ subsequent
* tests in this thread will see the wakeup having taken place.
*
* The spin_unlock() itself is semi-permeable and only protects
* one way (it only protects stuff inside the critical region and
* stops them from bleeding out - it would still allow subsequent
* loads to move into the critical region).
*/
void
prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
{
unsigned long flags;
wait->flags &= ~WQ_FLAG_EXCLUSIVE;
spin_lock_irqsave(&q->lock, flags);
if (list_empty(&wait->task_list))
__add_wait_queue(q, wait);
set_current_state(state);
spin_unlock_irqrestore(&q->lock, flags);
}
EXPORT_SYMBOL(prepare_to_wait);
finish_wait()定义:
/**
* finish_wait - clean up after waiting in a queue
* @q: waitqueue waited on
* @wait: wait descriptor
*
* Sets current thread back to running state and removes
* the wait descriptor from the given waitqueue if still
* queued.
*/
void finish_wait(wait_queue_head_t *q, wait_queue_t *wait)
{
unsigned long flags;
__set_current_state(TASK_RUNNING);
/*
* We can check for list emptiness outside the lock
* IFF:
* - we use the "careful" check that verifies both
* the next and prev pointers, so that there cannot
* be any half-pending updates in progress on other
* CPU's that we haven't seen yet (and that might
* still change the stack area.
* and
* - all other users take the lock (ie we can only
* have _one_ other CPU that looks at or modifies
* the list).
*/
if (!list_empty_careful(&wait->task_list)) {
spin_lock_irqsave(&q->lock, flags);
list_del_init(&wait->task_list);
spin_unlock_irqrestore(&q->lock, flags);
}
}
EXPORT_SYMBOL(finish_wait);
3.2 wait_event_interruptible(queue, condition);
Friendly version of wait_event() which can be interrupted by any signal.
/**
* wait_event_interruptible - sleep until a condition gets true
* @wq: the waitqueue to wait on
* @condition: a C expression for the event to wait for
*
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
* @condition evaluates to true or a signal is received.
* The @condition is checked each time the waitqueue @wq is woken up.
*
* wake_up() has to be called after changing any variable that could
* change the result of the wait condition.
*
* The function will return -ERESTARTSYS if it was interrupted by a
* signal and 0 if @condition evaluated to true.
*/
#define wait_event_interruptible(wq, condition) \
({ \
int __ret = 0; \
if (!(condition)) \
__wait_event_interruptible(wq, condition, __ret); \
__ret; \
})
#define __wait_event_interruptible(wq, condition, ret) \
do { \
DEFINE_WAIT(__wait); \
\
for (;;) { \
prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \ //跟wait_event主要是这个标志的区别,TASK_INTERRUPTABLE.
if (condition) \
break; \
if (!signal_pending(current)) { \
schedule(); \
continue; \
} \
ret = -ERESTARTSYS; \
break; \
} \
finish_wait(&wq, &__wait); \
} while (0)
3.3 wait_event_timeout(queue, condition, timeout);
like wait_event(), but will eventually time out regardless of whether or not the condition is satisfied in the meantime.
/**
* wait_event_timeout - sleep until a condition gets true or a timeout elapses
* @wq: the waitqueue to wait on
* @condition: a C expression for the event to wait for
* @timeout: timeout, in jiffies
*
* The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
* @condition evaluates to true. The @condition is checked each time
* the waitqueue @wq is woken up.
*
* wake_up() has to be called after changing any variable that could
* change the result of the wait condition.
*
* The function returns 0 if the @timeout elapsed, and the remaining
* jiffies if the condition evaluated to true before the timeout elapsed.
*/
#define wait_event_timeout(wq, condition, timeout) \
({ \
long __ret = timeout; \
if (!(condition)) \
__wait_event_timeout(wq, condition, __ret); \
__ret; \
})
#define __wait_event_timeout(wq, condition, ret) \
do { \
DEFINE_WAIT(__wait); \
\
for (;;) { \
prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
if (condition) \
break; \
ret = schedule_timeout(ret); \
if (!ret) \
break; \
} \
finish_wait(&wq, &__wait); \
} while (0)
3.4 wait_event_interruptible_timeout(queue, condition, timeout);
等待queue作为等待队列头的等待队列被唤醒。
timeout指阻塞等待的超时时间,以jiffy为单位,timeout到达时,无论condition是否满足,均返回。
like wait_event_interruptible(), but will eventually time out regardless whether or not the condition is satisfied in the meantime.
/**
* wait_event_interruptible_timeout - sleep until a condition gets true or a timeout elapses
* @wq: the waitqueue to wait on
* @condition: a C expression for the event to wait for
* @timeout: timeout, in jiffies
*
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
* @condition evaluates to true or a signal is received.
* The @condition is checked each time the waitqueue @wq is woken up.
*
* wake_up() has to be called after changing any variable that could
* change the result of the wait condition.
*
* The function returns 0 if the @timeout elapsed, -ERESTARTSYS if it
* was interrupted by a signal, and the remaining jiffies otherwise
* if the condition evaluated to true before the timeout elapsed.
*/
#define wait_event_interruptible_timeout(wq, condition, timeout) \
({ \
long __ret = timeout; \
if (!(condition)) \
__wait_event_interruptible_timeout(wq, condition, __ret); \
__ret; \
})
#define __wait_event_interruptible_timeout(wq, condition, ret) \
do { \
DEFINE_WAIT(__wait); \
\
for (;;) { \
prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \
if (condition) \
break; \
if (!signal_pending(current)) { \
ret = schedule_timeout(ret); \
if (!ret) \
break; \
continue; \
} \
ret = -ERESTARTSYS; \
break; \
} \
finish_wait(&wq, &__wait); \
} while (0)
3.5 several examples for wait_event implement:
example1:
putting a process to sleep is only half of the solution,
without someone to wake up that process up, the read() would block indefinitely, which is not desirable behavior.
wait queues therefor have a complementary set of functions for waking up sleepers.
--- this need to be done from another context, obviously, such as a separated thread, or an interrupt handler, or event other process.
--- any place which might modify the condition which the process is sleeping on.
example2 :
//假定q是我们希望休眠的等待队列
DEFINE_WAIT(wait);
add_wait_queue(q, &wait); //把wait添加进 等待队列q
while(!condition){ //condition是我们等待的事件
prepare_to_wait(&q, wait, TASK_INTERRUPTIBLE);
if(signal_pending(current))
//信号处理
schedule();
}
finish_wait(&q, &wait); //把wait 移出等待队列q
进程通过以下几个步骤将自己加入到一个等待队列中:
a. 调用DEFINE_WAIT()宏,创建一项等待事件,
b. 调用add_wait_queue(), 将等待事件项wait加入到等待队列q中。 当condition事件发生时,对等待队列执行wake_up()操作。
c. 调用 prepare_to_wait() 将进程状态改为 TASK_INTERRUPTIBLE/TASK_UNINTERRUPTIBLE.
d. 当进程被唤醒时,他会再次检查条件condition,如果是,就退出循环。
e. 当条件condition满足后,进程将自己设备TASK_RUNNING, 并调用finish_wait()方法,把自己移除等待队列。
example3: inotify_read() 是等待队列的一个典型用法,只不过是在while里检查了状态,而不是条件循环。
static ssize_t inotify_read(struct file *file, char __user *buf,
size_t count, loff_t *pos)
{
struct fsnotify_group *group;
struct fsnotify_event *kevent;
char __user *start;
int ret;
DEFINE_WAIT(wait);
start = buf;
group = file->private_data;
while (1) {
prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE);
mutex_lock(&group->notification_mutex);
kevent = get_one_event(group, count);
mutex_unlock(&group->notification_mutex);
pr_debug("%s: group=%p kevent=%p\n", __func__, group, kevent);
if (kevent) {
ret = PTR_ERR(kevent);
if (IS_ERR(kevent))
break;
ret = copy_event_to_user(group, kevent, buf);
fsnotify_put_event(kevent);
if (ret < 0)
break;
buf += ret;
count -= ret;
continue;
}
ret = -EAGAIN;
if (file->f_flags & O_NONBLOCK)
break;
ret = -EINTR;
if (signal_pending(current))
break;
if (start != buf)
break;
schedule();
}
finish_wait(&group->notification_waitq, &wait);
if (start != buf && ret != -EFAULT)
ret = buf - start;
return ret;
}
4. 唤醒队列 --- wake_up():
waite queues provide following functions for waking up processes waiting on a wait queue.
4.1 void wake_up(wait_queue_head_t *queue); //唤醒以queue作为等待队列头的等待队列链表中所有在等待的进程。
wake up all processes waiting on a wait queue.
The appropriate time to call this is after changing any variable that could change the result of wait condition.
#define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL)
/**
* __wake_up - wake up threads blocked on a waitqueue.
* @q: the waitqueue
* @mode: which threads
* @nr_exclusive: how many wake-one or wake-many threads to wake up
* @key: is directly passed to the wakeup function
*
* It may be assumed that this function implies a write memory barrier before
* changing the task state if and only if any tasks are woken up.
*/
void __wake_up(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, void *key)
{
unsigned long flags;
spin_lock_irqsave(&q->lock, flags);
__wake_up_common(q, mode, nr_exclusive, 0, key);
spin_unlock_irqrestore(&q->lock, flags);
}
EXPORT_SYMBOL(__wake_up);
/*
* The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
* wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
* number) then we wake all the non-exclusive tasks and one exclusive task.
*
* There are circumstances in which we can try to wake a task which has already
* started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
* zero in this (rare) case, and we handle it by continuing to scan the queue.
*/
static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, int wake_flags, void *key)
{
wait_queue_t *curr, *next;
list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
unsigned flags = curr->flags;
if (curr->func(curr, mode, wake_flags, key) &&
(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
break;
}
}
4.2 void wake_up_interruptible(wait_queue_head_t *queue);
It is considered good practice to use wake_up_interruptible() if your sleeping threads use wait_event_interruptible().
#define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
4.3 An example to implement wake_up().

5. 在等待队列上睡眠:
5.1 sleep_on(wait_queue_head_t *q);
作用:将目前进程的状态置为TASK_UNINTERRUPTIBLE,并定义一个等待队列,之后把它附属到等待队列头q,直到资源可获得,q引导的等待队列被唤醒。
void __sched sleep_on(wait_queue_head_t *q)
{
sleep_on_common(q, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
}
EXPORT_SYMBOL(sleep_on);
static long __sched
sleep_on_common(wait_queue_head_t *q, int state, long timeout)
{
unsigned long flags;
wait_queue_t wait;
init_waitqueue_entry(&wait, current);
__set_current_state(state); //改变当前进程状态 TASK_INTERRUPTIBLE or TASK_UNINTERRUPTIBLE.
spin_lock_irqsave(&q->lock, flags);
__add_wait_queue(q, &wait);
spin_unlock(&q->lock);
timeout = schedule_timeout(timeout); //放弃CPU.进而调度其他进程。
spin_lock_irq(&q->lock);
__remove_wait_queue(q, &wait);
spin_unlock_irqrestore(&q->lock, flags);
return timeout;
}
/*
* set_current_state() includes a barrier so that the write of current->state
* is correctly serialised wrt the caller's subsequent test of whether to
* actually sleep:
*
* set_current_state(TASK_UNINTERRUPTIBLE);
* if (do_i_need_to_sleep())
* schedule();
*
* If the caller does not need such serialisation then use __set_current_state()
*/
#define __set_current_state(state_value) \
do { current->state = (state_value); } while (0)
5.2 interruptible_sleep_on(wait_queue_head_t *q);
作用: 与sleep_on()类似,将目前进程状态置为TASK_INTERRUPTIBLE...
void __sched interruptible_sleep_on(wait_queue_head_t *q)
{
sleep_on_common(q, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
}
EXPORT_SYMBOL(interruptible_sleep_on);
sleep_on() 函数应该与 wake_up()成对使用,
interrupt_sleep_on() 应该与 wake_up_interruptible() 成对使用。
就这么几个函数,关于等待队列的,函数很简单,但是里面的参数有点绕。