summaryrefslogtreecommitdiff
path: root/kernel/workqueue.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/workqueue.c')
-rw-r--r--kernel/workqueue.c407
1 files changed, 257 insertions, 150 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1745ca788ede..45320e27a16c 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -222,7 +222,9 @@ struct worker_pool {
struct workqueue_attrs *attrs; /* I: worker attributes */
struct hlist_node hash_node; /* PL: unbound_pool_hash node */
int refcnt; /* PL: refcnt for unbound pools */
-
+#ifdef CONFIG_PREEMPT_RT
+ spinlock_t cb_lock; /* BH worker cancel lock */
+#endif
/*
* Destruction of pool is RCU protected to allow dereferences
* from get_work_pool().
@@ -364,7 +366,8 @@ struct workqueue_struct {
#ifdef CONFIG_LOCKDEP
char *lock_name;
struct lock_class_key key;
- struct lockdep_map lockdep_map;
+ struct lockdep_map __lockdep_map;
+ struct lockdep_map *lockdep_map;
#endif
char name[WQ_NAME_LEN]; /* I: workqueue name */
@@ -377,7 +380,7 @@ struct workqueue_struct {
/* hot fields used during command issue, aligned to cacheline */
unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */
- struct pool_workqueue __percpu __rcu **cpu_pwq; /* I: per-cpu pwqs */
+ struct pool_workqueue __rcu * __percpu *cpu_pwq; /* I: per-cpu pwqs */
struct wq_node_nr_active *node_nr_active[]; /* I: per-node nr_active */
};
@@ -476,16 +479,13 @@ static bool wq_debug_force_rr_cpu = false;
module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
/* to raise softirq for the BH worker pools on other CPUs */
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_work [NR_STD_WORKER_POOLS],
- bh_pool_irq_works);
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_work [NR_STD_WORKER_POOLS], bh_pool_irq_works);
/* the BH worker pools */
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
- bh_worker_pools);
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], bh_worker_pools);
/* the per-cpu worker pools */
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
- cpu_worker_pools);
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], cpu_worker_pools);
static DEFINE_IDR(worker_pool_idr); /* PR: idr of all pools */
@@ -507,12 +507,16 @@ static struct kthread_worker *pwq_release_worker __ro_after_init;
struct workqueue_struct *system_wq __ro_after_init;
EXPORT_SYMBOL(system_wq);
+struct workqueue_struct *system_percpu_wq __ro_after_init;
+EXPORT_SYMBOL(system_percpu_wq);
struct workqueue_struct *system_highpri_wq __ro_after_init;
EXPORT_SYMBOL_GPL(system_highpri_wq);
struct workqueue_struct *system_long_wq __ro_after_init;
EXPORT_SYMBOL_GPL(system_long_wq);
struct workqueue_struct *system_unbound_wq __ro_after_init;
EXPORT_SYMBOL_GPL(system_unbound_wq);
+struct workqueue_struct *system_dfl_wq __ro_after_init;
+EXPORT_SYMBOL_GPL(system_dfl_wq);
struct workqueue_struct *system_freezable_wq __ro_after_init;
EXPORT_SYMBOL_GPL(system_freezable_wq);
struct workqueue_struct *system_power_efficient_wq __ro_after_init;
@@ -688,7 +692,7 @@ EXPORT_SYMBOL_GPL(destroy_work_on_stack);
void destroy_delayed_work_on_stack(struct delayed_work *work)
{
- destroy_timer_on_stack(&work->timer);
+ timer_destroy_on_stack(&work->timer);
debug_object_free(&work->work, &work_debug_descr);
}
EXPORT_SYMBOL_GPL(destroy_delayed_work_on_stack);
@@ -897,7 +901,7 @@ static struct worker_pool *get_work_pool(struct work_struct *work)
static unsigned long shift_and_mask(unsigned long v, u32 shift, u32 bits)
{
- return (v >> shift) & ((1 << bits) - 1);
+ return (v >> shift) & ((1U << bits) - 1);
}
static void work_offqd_unpack(struct work_offq_data *offqd, unsigned long data)
@@ -1688,17 +1692,14 @@ static void __pwq_activate_work(struct pool_workqueue *pwq,
static bool tryinc_node_nr_active(struct wq_node_nr_active *nna)
{
int max = READ_ONCE(nna->max);
+ int old = atomic_read(&nna->nr);
- while (true) {
- int old, tmp;
-
- old = atomic_read(&nna->nr);
+ do {
if (old >= max)
return false;
- tmp = atomic_cmpxchg_relaxed(&nna->nr, old, old + 1);
- if (tmp == old)
- return true;
- }
+ } while (!atomic_try_cmpxchg_relaxed(&nna->nr, &old, old + 1));
+
+ return true;
}
/**
@@ -2059,11 +2060,11 @@ static int try_to_grab_pending(struct work_struct *work, u32 cflags,
struct delayed_work *dwork = to_delayed_work(work);
/*
- * dwork->timer is irqsafe. If del_timer() fails, it's
+ * dwork->timer is irqsafe. If timer_delete() fails, it's
* guaranteed that the timer is not queued anywhere and not
* running on the local CPU.
*/
- if (likely(del_timer(&dwork->timer)))
+ if (likely(timer_delete(&dwork->timer)))
return 1;
}
@@ -2182,7 +2183,7 @@ static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
debug_work_activate(work);
/* record the work call stack in order to print it in KASAN reports */
- kasan_record_aux_stack_noalloc(work);
+ kasan_record_aux_stack(work);
/* we own @work, set data and link */
set_work_pwq(work, pwq, extra_flags);
@@ -2223,12 +2224,9 @@ static int wq_select_unbound_cpu(int cpu)
}
new_cpu = __this_cpu_read(wq_rr_cpu_last);
- new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
- if (unlikely(new_cpu >= nr_cpu_ids)) {
- new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask);
- if (unlikely(new_cpu >= nr_cpu_ids))
- return cpu;
- }
+ new_cpu = cpumask_next_and_wrap(new_cpu, wq_unbound_cpumask, cpu_online_mask);
+ if (unlikely(new_cpu >= nr_cpu_ids))
+ return cpu;
__this_cpu_write(wq_rr_cpu_last, new_cpu);
return new_cpu;
@@ -2256,8 +2254,10 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
* queues a new work item to a wq after destroy_workqueue(wq).
*/
if (unlikely(wq->flags & (__WQ_DESTROYING | __WQ_DRAINING) &&
- WARN_ON_ONCE(!is_chained_work(wq))))
+ WARN_ONCE(!is_chained_work(wq), "workqueue: cannot queue %ps on wq %s\n",
+ work->func, wq->name))) {
return;
+ }
rcu_read_lock();
retry:
/* pwq which will be used unless @work is executing elsewhere */
@@ -2481,7 +2481,7 @@ EXPORT_SYMBOL_GPL(queue_work_node);
void delayed_work_timer_fn(struct timer_list *t)
{
- struct delayed_work *dwork = from_timer(dwork, t, timer);
+ struct delayed_work *dwork = timer_container_of(dwork, t, timer);
/* should have been called from irqsafe timer with irq already off */
__queue_work(dwork->cpu, dwork->wq, &dwork->work);
@@ -2510,6 +2510,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
return;
}
+ WARN_ON_ONCE(cpu != WORK_CPU_UNBOUND && !cpu_online(cpu));
dwork->wq = wq;
dwork->cpu = cpu;
timer->expires = jiffies + delay;
@@ -2535,6 +2536,12 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
* @dwork: work to queue
* @delay: number of jiffies to wait before queueing
*
+ * We queue the delayed_work to a specific CPU, for non-zero delays the
+ * caller must ensure it is online and can't go away. Callers that fail
+ * to ensure this, may get @dwork->timer queued to an offlined CPU and
+ * this will prevent queueing of @dwork->work unless the offlined CPU
+ * becomes online again.
+ *
* Return: %false if @work was already on a queue, %true otherwise. If
* @delay is zero and @dwork is idle, it will be scheduled for immediate
* execution.
@@ -2709,7 +2716,6 @@ static void detach_worker(struct worker *worker)
unbind_worker(worker);
list_del(&worker->node);
- worker->pool = NULL;
}
/**
@@ -2729,6 +2735,7 @@ static void worker_detach_from_pool(struct worker *worker)
mutex_lock(&wq_pool_attach_mutex);
detach_worker(worker);
+ worker->pool = NULL;
mutex_unlock(&wq_pool_attach_mutex);
/* clear leftover flags without pool->lock after it is detached */
@@ -2902,7 +2909,7 @@ static void set_worker_dying(struct worker *worker, struct list_head *list)
*/
static void idle_worker_timeout(struct timer_list *t)
{
- struct worker_pool *pool = from_timer(pool, t, idle_timer);
+ struct worker_pool *pool = timer_container_of(pool, t, idle_timer);
bool do_cull = false;
if (work_pending(&pool->idle_cull_work))
@@ -2925,7 +2932,7 @@ static void idle_worker_timeout(struct timer_list *t)
raw_spin_unlock_irq(&pool->lock);
if (do_cull)
- queue_work(system_unbound_wq, &pool->idle_cull_work);
+ queue_work(system_dfl_wq, &pool->idle_cull_work);
}
/**
@@ -3001,7 +3008,7 @@ static void send_mayday(struct work_struct *work)
static void pool_mayday_timeout(struct timer_list *t)
{
- struct worker_pool *pool = from_timer(pool, t, mayday_timer);
+ struct worker_pool *pool = timer_container_of(pool, t, mayday_timer);
struct work_struct *work;
raw_spin_lock_irq(&pool->lock);
@@ -3062,7 +3069,7 @@ restart:
break;
}
- del_timer_sync(&pool->mayday_timer);
+ timer_delete_sync(&pool->mayday_timer);
raw_spin_lock_irq(&pool->lock);
/*
* This is necessary even after a new worker was just successfully
@@ -3073,6 +3080,31 @@ restart:
goto restart;
}
+#ifdef CONFIG_PREEMPT_RT
+static void worker_lock_callback(struct worker_pool *pool)
+{
+ spin_lock(&pool->cb_lock);
+}
+
+static void worker_unlock_callback(struct worker_pool *pool)
+{
+ spin_unlock(&pool->cb_lock);
+}
+
+static void workqueue_callback_cancel_wait_running(struct worker_pool *pool)
+{
+ spin_lock(&pool->cb_lock);
+ spin_unlock(&pool->cb_lock);
+}
+
+#else
+
+static void worker_lock_callback(struct worker_pool *pool) { }
+static void worker_unlock_callback(struct worker_pool *pool) { }
+static void workqueue_callback_cancel_wait_running(struct worker_pool *pool) { }
+
+#endif
+
/**
* manage_workers - manage worker pool
* @worker: self
@@ -3203,7 +3235,7 @@ __acquires(&pool->lock)
lockdep_start_depth = lockdep_depth(current);
/* see drain_dead_softirq_workfn() */
if (!bh_draining)
- lock_map_acquire(&pwq->wq->lockdep_map);
+ lock_map_acquire(pwq->wq->lockdep_map);
lock_map_acquire(&lockdep_map);
/*
* Strictly speaking we should mark the invariant state without holding
@@ -3234,10 +3266,10 @@ __acquires(&pool->lock)
* point will only record its address.
*/
trace_workqueue_execute_end(work, worker->current_func);
- pwq->stats[PWQ_STAT_COMPLETED]++;
+
lock_map_release(&lockdep_map);
if (!bh_draining)
- lock_map_release(&pwq->wq->lockdep_map);
+ lock_map_release(pwq->wq->lockdep_map);
if (unlikely((worker->task && in_atomic()) ||
lockdep_depth(current) != lockdep_start_depth ||
@@ -3265,6 +3297,8 @@ __acquires(&pool->lock)
raw_spin_lock_irq(&pool->lock);
+ pwq->stats[PWQ_STAT_COMPLETED]++;
+
/*
* In addition to %WQ_CPU_INTENSIVE, @worker may also have been marked
* CPU intensive by wq_worker_tick() if @work hogged CPU longer than
@@ -3349,9 +3383,12 @@ woke_up:
if (unlikely(worker->flags & WORKER_DIE)) {
raw_spin_unlock_irq(&pool->lock);
set_pf_worker(false);
-
+ /*
+ * The worker is dead and PF_WQ_WORKER is cleared, worker->pool
+ * shouldn't be accessed, reset it to NULL in case otherwise.
+ */
+ worker->pool = NULL;
ida_free(&pool->worker_ida, worker->id);
- WARN_ON_ONCE(!list_empty(&worker->entry));
return 0;
}
@@ -3509,12 +3546,6 @@ repeat:
}
/*
- * Put the reference grabbed by send_mayday(). @pool won't
- * go away while we're still attached to it.
- */
- put_pwq(pwq);
-
- /*
* Leave this pool. Notify regular workers; otherwise, we end up
* with 0 concurrency and stalling the execution.
*/
@@ -3524,6 +3555,12 @@ repeat:
worker_detach_from_pool(rescuer);
+ /*
+ * Put the reference grabbed by send_mayday(). @pool might
+ * go away any time after it.
+ */
+ put_pwq_unlocked(pwq);
+
raw_spin_lock_irq(&wq_mayday_lock);
}
@@ -3547,6 +3584,7 @@ static void bh_worker(struct worker *worker)
int nr_restarts = BH_WORKER_RESTARTS;
unsigned long end = jiffies + BH_WORKER_JIFFIES;
+ worker_lock_callback(pool);
raw_spin_lock_irq(&pool->lock);
worker_leave_idle(worker);
@@ -3575,6 +3613,7 @@ done:
worker_enter_idle(worker);
kick_pool(pool);
raw_spin_unlock_irq(&pool->lock);
+ worker_unlock_callback(pool);
}
/*
@@ -3679,23 +3718,27 @@ void workqueue_softirq_dead(unsigned int cpu)
* check_flush_dependency - check for flush dependency sanity
* @target_wq: workqueue being flushed
* @target_work: work item being flushed (NULL for workqueue flushes)
+ * @from_cancel: are we called from the work cancel path
*
* %current is trying to flush the whole @target_wq or @target_work on it.
- * If @target_wq doesn't have %WQ_MEM_RECLAIM, verify that %current is not
- * reclaiming memory or running on a workqueue which doesn't have
- * %WQ_MEM_RECLAIM as that can break forward-progress guarantee leading to
- * a deadlock.
+ * If this is not the cancel path (which implies work being flushed is either
+ * already running, or will not be at all), check if @target_wq doesn't have
+ * %WQ_MEM_RECLAIM and verify that %current is not reclaiming memory or running
+ * on a workqueue which doesn't have %WQ_MEM_RECLAIM as that can break forward-
+ * progress guarantee leading to a deadlock.
*/
static void check_flush_dependency(struct workqueue_struct *target_wq,
- struct work_struct *target_work)
+ struct work_struct *target_work,
+ bool from_cancel)
{
- work_func_t target_func = target_work ? target_work->func : NULL;
+ work_func_t target_func;
struct worker *worker;
- if (target_wq->flags & WQ_MEM_RECLAIM)
+ if (from_cancel || target_wq->flags & WQ_MEM_RECLAIM)
return;
worker = current_wq_worker();
+ target_func = target_work ? target_work->func : NULL;
WARN_ONCE(current->flags & PF_MEMALLOC,
"workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%ps",
@@ -3832,16 +3875,28 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
{
bool wait = false;
struct pool_workqueue *pwq;
+ struct worker_pool *current_pool = NULL;
if (flush_color >= 0) {
WARN_ON_ONCE(atomic_read(&wq->nr_pwqs_to_flush));
atomic_set(&wq->nr_pwqs_to_flush, 1);
}
+ /*
+ * For unbound workqueue, pwqs will map to only a few pools.
+ * Most of the time, pwqs within the same pool will be linked
+ * sequentially to wq->pwqs by cpu index. So in the majority
+ * of pwq iters, the pool is the same, only doing lock/unlock
+ * if the pool has changed. This can largely reduce expensive
+ * lock operations.
+ */
for_each_pwq(pwq, wq) {
- struct worker_pool *pool = pwq->pool;
-
- raw_spin_lock_irq(&pool->lock);
+ if (current_pool != pwq->pool) {
+ if (likely(current_pool))
+ raw_spin_unlock_irq(&current_pool->lock);
+ current_pool = pwq->pool;
+ raw_spin_lock_irq(&current_pool->lock);
+ }
if (flush_color >= 0) {
WARN_ON_ONCE(pwq->flush_color != -1);
@@ -3858,9 +3913,11 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
pwq->work_color = work_color;
}
- raw_spin_unlock_irq(&pool->lock);
}
+ if (current_pool)
+ raw_spin_unlock_irq(&current_pool->lock);
+
if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
complete(&wq->first_flusher->done);
@@ -3870,11 +3927,14 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
static void touch_wq_lockdep_map(struct workqueue_struct *wq)
{
#ifdef CONFIG_LOCKDEP
+ if (unlikely(!wq->lockdep_map))
+ return;
+
if (wq->flags & WQ_BH)
local_bh_disable();
- lock_map_acquire(&wq->lockdep_map);
- lock_map_release(&wq->lockdep_map);
+ lock_map_acquire(wq->lockdep_map);
+ lock_map_release(wq->lockdep_map);
if (wq->flags & WQ_BH)
local_bh_enable();
@@ -3908,7 +3968,7 @@ void __flush_workqueue(struct workqueue_struct *wq)
struct wq_flusher this_flusher = {
.list = LIST_HEAD_INIT(this_flusher.list),
.flush_color = -1,
- .done = COMPLETION_INITIALIZER_ONSTACK_MAP(this_flusher.done, wq->lockdep_map),
+ .done = COMPLETION_INITIALIZER_ONSTACK_MAP(this_flusher.done, (*wq->lockdep_map)),
};
int next_color;
@@ -3962,7 +4022,7 @@ void __flush_workqueue(struct workqueue_struct *wq)
list_add_tail(&this_flusher.list, &wq->flusher_overflow);
}
- check_flush_dependency(wq, NULL);
+ check_flush_dependency(wq, NULL, false);
mutex_unlock(&wq->mutex);
@@ -4137,7 +4197,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
}
wq = pwq->wq;
- check_flush_dependency(wq, work);
+ check_flush_dependency(wq, work, from_cancel);
insert_wq_barrier(pwq, barr, work, worker);
raw_spin_unlock_irq(&pool->lock);
@@ -4167,7 +4227,6 @@ already_gone:
static bool __flush_work(struct work_struct *work, bool from_cancel)
{
struct wq_barrier barr;
- unsigned long data;
if (WARN_ON(!wq_online))
return false;
@@ -4185,29 +4244,35 @@ static bool __flush_work(struct work_struct *work, bool from_cancel)
* was queued on a BH workqueue, we also know that it was running in the
* BH context and thus can be busy-waited.
*/
- data = *work_data_bits(work);
- if (from_cancel &&
- !WARN_ON_ONCE(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_BH)) {
- /*
- * On RT, prevent a live lock when %current preempted soft
- * interrupt processing or prevents ksoftirqd from running by
- * keeping flipping BH. If the BH work item runs on a different
- * CPU then this has no effect other than doing the BH
- * disable/enable dance for nothing. This is copied from
- * kernel/softirq.c::tasklet_unlock_spin_wait().
- */
- while (!try_wait_for_completion(&barr.done)) {
- if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
- local_bh_disable();
- local_bh_enable();
- } else {
- cpu_relax();
+ if (from_cancel) {
+ unsigned long data = *work_data_bits(work);
+
+ if (!WARN_ON_ONCE(data & WORK_STRUCT_PWQ) &&
+ (data & WORK_OFFQ_BH)) {
+ /*
+ * On RT, prevent a live lock when %current preempted
+ * soft interrupt processing by blocking on lock which
+ * is owned by the thread invoking the callback.
+ */
+ while (!try_wait_for_completion(&barr.done)) {
+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ struct worker_pool *pool;
+
+ guard(rcu)();
+ pool = get_work_pool(work);
+ if (pool)
+ workqueue_callback_cancel_wait_running(pool);
+ } else {
+ cpu_relax();
+ }
}
+ goto out_destroy;
}
- } else {
- wait_for_completion(&barr.done);
}
+ wait_for_completion(&barr.done);
+
+out_destroy:
destroy_work_on_stack(&barr.work);
return true;
}
@@ -4245,7 +4310,7 @@ EXPORT_SYMBOL_GPL(flush_work);
bool flush_delayed_work(struct delayed_work *dwork)
{
local_irq_disable();
- if (del_timer_sync(&dwork->timer))
+ if (timer_delete_sync(&dwork->timer))
__queue_work(dwork->cpu, dwork->wq, &dwork->work);
local_irq_enable();
return flush_work(&dwork->work);
@@ -4591,7 +4656,7 @@ void free_workqueue_attrs(struct workqueue_attrs *attrs)
*
* Return: The allocated new workqueue_attr on success. %NULL on failure.
*/
-struct workqueue_attrs *alloc_workqueue_attrs(void)
+struct workqueue_attrs *alloc_workqueue_attrs_noprof(void)
{
struct workqueue_attrs *attrs;
@@ -4746,6 +4811,9 @@ static int init_worker_pool(struct worker_pool *pool)
ida_init(&pool->worker_ida);
INIT_HLIST_NODE(&pool->hash_node);
pool->refcnt = 1;
+#ifdef CONFIG_PREEMPT_RT
+ spin_lock_init(&pool->cb_lock);
+#endif
/* shouldn't fail above this point */
pool->attrs = alloc_workqueue_attrs();
@@ -4768,16 +4836,23 @@ static void wq_init_lockdep(struct workqueue_struct *wq)
lock_name = wq->name;
wq->lock_name = lock_name;
- lockdep_init_map(&wq->lockdep_map, lock_name, &wq->key, 0);
+ wq->lockdep_map = &wq->__lockdep_map;
+ lockdep_init_map(wq->lockdep_map, lock_name, &wq->key, 0);
}
static void wq_unregister_lockdep(struct workqueue_struct *wq)
{
+ if (wq->lockdep_map != &wq->__lockdep_map)
+ return;
+
lockdep_unregister_key(&wq->key);
}
static void wq_free_lockdep(struct workqueue_struct *wq)
{
+ if (wq->lockdep_map != &wq->__lockdep_map)
+ return;
+
if (wq->lock_name != wq->name)
kfree(wq->lock_name);
}
@@ -4941,9 +5016,9 @@ static void put_unbound_pool(struct worker_pool *pool)
reap_dying_workers(&cull_list);
/* shut down the timers */
- del_timer_sync(&pool->idle_timer);
+ timer_delete_sync(&pool->idle_timer);
cancel_work_sync(&pool->idle_cull_work);
- del_timer_sync(&pool->mayday_timer);
+ timer_delete_sync(&pool->mayday_timer);
/* RCU protected to allow dereferences from get_work_pool() */
call_rcu(&pool->rcu, rcu_free_pool);
@@ -5611,12 +5686,11 @@ static void wq_adjust_max_active(struct workqueue_struct *wq)
} while (activated);
}
-__printf(1, 4)
-struct workqueue_struct *alloc_workqueue(const char *fmt,
- unsigned int flags,
- int max_active, ...)
+__printf(1, 0)
+static struct workqueue_struct *__alloc_workqueue(const char *fmt,
+ unsigned int flags,
+ int max_active, va_list args)
{
- va_list args;
struct workqueue_struct *wq;
size_t wq_size;
int name_len;
@@ -5638,19 +5712,17 @@ struct workqueue_struct *alloc_workqueue(const char *fmt,
else
wq_size = sizeof(*wq);
- wq = kzalloc(wq_size, GFP_KERNEL);
+ wq = kzalloc_noprof(wq_size, GFP_KERNEL);
if (!wq)
return NULL;
if (flags & WQ_UNBOUND) {
- wq->unbound_attrs = alloc_workqueue_attrs();
+ wq->unbound_attrs = alloc_workqueue_attrs_noprof();
if (!wq->unbound_attrs)
goto err_free_wq;
}
- va_start(args, max_active);
name_len = vsnprintf(wq->name, sizeof(wq->name), fmt, args);
- va_end(args);
if (name_len >= WQ_NAME_LEN)
pr_warn_once("workqueue: name exceeds WQ_NAME_LEN. Truncating to: %s\n",
@@ -5680,12 +5752,11 @@ struct workqueue_struct *alloc_workqueue(const char *fmt,
INIT_LIST_HEAD(&wq->flusher_overflow);
INIT_LIST_HEAD(&wq->maydays);
- wq_init_lockdep(wq);
INIT_LIST_HEAD(&wq->list);
if (flags & WQ_UNBOUND) {
if (alloc_node_nr_active(wq->node_nr_active) < 0)
- goto err_unreg_lockdep;
+ goto err_free_wq;
}
/*
@@ -5724,9 +5795,6 @@ err_unlock_free_node_nr_active:
kthread_flush_worker(pwq_release_worker);
free_node_nr_active(wq->node_nr_active);
}
-err_unreg_lockdep:
- wq_unregister_lockdep(wq);
- wq_free_lockdep(wq);
err_free_wq:
free_workqueue_attrs(wq->unbound_attrs);
kfree(wq);
@@ -5737,7 +5805,48 @@ err_destroy:
destroy_workqueue(wq);
return NULL;
}
-EXPORT_SYMBOL_GPL(alloc_workqueue);
+
+__printf(1, 4)
+struct workqueue_struct *alloc_workqueue_noprof(const char *fmt,
+ unsigned int flags,
+ int max_active, ...)
+{
+ struct workqueue_struct *wq;
+ va_list args;
+
+ va_start(args, max_active);
+ wq = __alloc_workqueue(fmt, flags, max_active, args);
+ va_end(args);
+ if (!wq)
+ return NULL;
+
+ wq_init_lockdep(wq);
+
+ return wq;
+}
+EXPORT_SYMBOL_GPL(alloc_workqueue_noprof);
+
+#ifdef CONFIG_LOCKDEP
+__printf(1, 5)
+struct workqueue_struct *
+alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags,
+ int max_active, struct lockdep_map *lockdep_map, ...)
+{
+ struct workqueue_struct *wq;
+ va_list args;
+
+ va_start(args, lockdep_map);
+ wq = __alloc_workqueue(fmt, flags, max_active, args);
+ va_end(args);
+ if (!wq)
+ return NULL;
+
+ wq->lockdep_map = lockdep_map;
+
+ return wq;
+}
+EXPORT_SYMBOL_GPL(alloc_workqueue_lockdep_map);
+#endif
static bool pwq_busy(struct pool_workqueue *pwq)
{
@@ -5760,6 +5869,17 @@ static bool pwq_busy(struct pool_workqueue *pwq)
* @wq: target workqueue
*
* Safely destroy a workqueue. All work currently pending will be done first.
+ *
+ * This function does NOT guarantee that non-pending work that has been
+ * submitted with queue_delayed_work() and similar functions will be done
+ * before destroying the workqueue. The fundamental problem is that, currently,
+ * the workqueue has no way of accessing non-pending delayed_work. delayed_work
+ * is only linked on the timer-side. All delayed_work must, therefore, be
+ * canceled before calling this function.
+ *
+ * TODO: It would be better if the problem described above wouldn't exist and
+ * destroy_workqueue() would cleanly cancel all pending and non-pending
+ * delayed_work.
*/
void destroy_workqueue(struct workqueue_struct *wq)
{
@@ -5958,7 +6078,6 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
struct pool_workqueue *pwq;
bool ret;
- rcu_read_lock();
preempt_disable();
if (cpu == WORK_CPU_UNBOUND)
@@ -5968,7 +6087,6 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
ret = !list_empty(&pwq->inactive_works);
preempt_enable();
- rcu_read_unlock();
return ret;
}
@@ -6680,31 +6798,6 @@ long work_on_cpu_key(int cpu, long (*fn)(void *),
return wfc.ret;
}
EXPORT_SYMBOL_GPL(work_on_cpu_key);
-
-/**
- * work_on_cpu_safe_key - run a function in thread context on a particular cpu
- * @cpu: the cpu to run on
- * @fn: the function to run
- * @arg: the function argument
- * @key: The lock class key for lock debugging purposes
- *
- * Disables CPU hotplug and calls work_on_cpu(). The caller must not hold
- * any locks which would prevent @fn from completing.
- *
- * Return: The value @fn returns.
- */
-long work_on_cpu_safe_key(int cpu, long (*fn)(void *),
- void *arg, struct lock_class_key *key)
-{
- long ret = -ENODEV;
-
- cpus_read_lock();
- if (cpu_online(cpu))
- ret = work_on_cpu_key(cpu, fn, arg, key);
- cpus_read_unlock();
- return ret;
-}
-EXPORT_SYMBOL_GPL(work_on_cpu_safe_key);
#endif /* CONFIG_SMP */
#ifdef CONFIG_FREEZER
@@ -7398,6 +7491,9 @@ static struct timer_list wq_watchdog_timer;
static unsigned long wq_watchdog_touched = INITIAL_JIFFIES;
static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;
+static unsigned int wq_panic_on_stall;
+module_param_named(panic_on_stall, wq_panic_on_stall, uint, 0644);
+
/*
* Show workers that might prevent the processing of pending work items.
* The only candidates are CPU-bound workers in the running state.
@@ -7449,6 +7545,16 @@ static void show_cpu_pools_hogs(void)
rcu_read_unlock();
}
+static void panic_on_wq_watchdog(void)
+{
+ static unsigned int wq_stall;
+
+ if (wq_panic_on_stall) {
+ wq_stall++;
+ BUG_ON(wq_stall >= wq_panic_on_stall);
+ }
+}
+
static void wq_watchdog_reset_touched(void)
{
int cpu;
@@ -7470,8 +7576,6 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
if (!thresh)
return;
- rcu_read_lock();
-
for_each_pool(pool, pi) {
unsigned long pool_ts, touched, ts;
@@ -7513,14 +7617,15 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
}
- rcu_read_unlock();
-
if (lockup_detected)
show_all_workqueues();
if (cpu_pool_stall)
show_cpu_pools_hogs();
+ if (lockup_detected)
+ panic_on_wq_watchdog();
+
wq_watchdog_reset_touched();
mod_timer(&wq_watchdog_timer, jiffies + thresh);
}
@@ -7544,7 +7649,7 @@ notrace void wq_watchdog_touch(int cpu)
static void wq_watchdog_set_thresh(unsigned long thresh)
{
wq_watchdog_thresh = 0;
- del_timer_sync(&wq_watchdog_timer);
+ timer_delete_sync(&wq_watchdog_timer);
if (thresh) {
wq_watchdog_thresh = thresh;
@@ -7563,7 +7668,7 @@ static int wq_watchdog_param_set_thresh(const char *val,
if (ret)
return ret;
- if (system_wq)
+ if (system_percpu_wq)
wq_watchdog_set_thresh(thresh);
else
wq_watchdog_thresh = thresh;
@@ -7661,7 +7766,8 @@ void __init workqueue_init_early(void)
restrict_unbound_cpumask("workqueue.unbound_cpus", &wq_cmdline_cpumask);
cpumask_copy(wq_requested_unbound_cpumask, wq_unbound_cpumask);
-
+ cpumask_andnot(wq_isolated_cpumask, cpu_possible_mask,
+ housekeeping_cpumask(HK_TYPE_DOMAIN));
pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
unbound_wq_update_pwq_attrs_buf = alloc_workqueue_attrs();
@@ -7722,23 +7828,24 @@ void __init workqueue_init_early(void)
ordered_wq_attrs[i] = attrs;
}
- system_wq = alloc_workqueue("events", 0, 0);
- system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
- system_long_wq = alloc_workqueue("events_long", 0, 0);
- system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
- WQ_MAX_ACTIVE);
+ system_wq = alloc_workqueue("events", WQ_PERCPU, 0);
+ system_percpu_wq = alloc_workqueue("events", WQ_PERCPU, 0);
+ system_highpri_wq = alloc_workqueue("events_highpri",
+ WQ_HIGHPRI | WQ_PERCPU, 0);
+ system_long_wq = alloc_workqueue("events_long", WQ_PERCPU, 0);
+ system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, WQ_MAX_ACTIVE);
+ system_dfl_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, WQ_MAX_ACTIVE);
system_freezable_wq = alloc_workqueue("events_freezable",
- WQ_FREEZABLE, 0);
+ WQ_FREEZABLE | WQ_PERCPU, 0);
system_power_efficient_wq = alloc_workqueue("events_power_efficient",
- WQ_POWER_EFFICIENT, 0);
+ WQ_POWER_EFFICIENT | WQ_PERCPU, 0);
system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_pwr_efficient",
- WQ_FREEZABLE | WQ_POWER_EFFICIENT,
- 0);
- system_bh_wq = alloc_workqueue("events_bh", WQ_BH, 0);
+ WQ_FREEZABLE | WQ_POWER_EFFICIENT | WQ_PERCPU, 0);
+ system_bh_wq = alloc_workqueue("events_bh", WQ_BH | WQ_PERCPU, 0);
system_bh_highpri_wq = alloc_workqueue("events_bh_highpri",
- WQ_BH | WQ_HIGHPRI, 0);
- BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
- !system_unbound_wq || !system_freezable_wq ||
+ WQ_BH | WQ_HIGHPRI | WQ_PERCPU, 0);
+ BUG_ON(!system_wq || !system_percpu_wq|| !system_highpri_wq || !system_long_wq ||
+ !system_unbound_wq || !system_freezable_wq || !system_dfl_wq ||
!system_power_efficient_wq ||
!system_freezable_power_efficient_wq ||
!system_bh_wq || !system_bh_highpri_wq);
@@ -7749,7 +7856,7 @@ static void __init wq_cpu_intensive_thresh_init(void)
unsigned long thresh;
unsigned long bogo;
- pwq_release_worker = kthread_create_worker(0, "pool_workqueue_release");
+ pwq_release_worker = kthread_run_worker(0, "pool_workqueue_release");
BUG_ON(IS_ERR(pwq_release_worker));
/* if the user set it to a specific value, keep it */