forked from Minki/linux
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block layer fixes from Jens Axboe: "A final set of fixes for 4.3. It is (again) bigger than I would have liked, but it's all been through the testing mill and has been carefully reviewed by multiple parties. Each fix is either a regression fix for this cycle, or is marked stable. You can scold me at KS. The pull request contains: - Three simple fixes for NVMe, fixing regressions since 4.3. From Arnd, Christoph, and Keith. - A single xen-blkfront fix from Cathy, fixing a NULL dereference if an error is returned through the staste change callback. - Fixup for some bad/sloppy code in nbd that got introduced earlier in this cycle. From Markus Pargmann. - A blk-mq tagset use-after-free fix from Junichi. - A backing device lifetime fix from Tejun, fixing a crash. - And finally, a set of regression/stable fixes for cgroup writeback from Tejun" * 'for-linus' of git://git.kernel.dk/linux-block: writeback: remove broken rbtree_postorder_for_each_entry_safe() usage in cgwb_bdi_destroy() NVMe: Fix memory leak on retried commands block: don't release bdi while request_queue has live references nvme: use an integer value to Linux errno values blk-mq: fix use-after-free in blk_mq_free_tag_set() nvme: fix 32-bit build warning writeback: fix incorrect calculation of available memory for memcg domains writeback: memcg dirty_throttle_control should be initialized with wb->memcg_completions writeback: bdi_writeback iteration must not skip dying ones writeback: fix bdi_writeback iteration in wakeup_dirtytime_writeback() writeback: laptop_mode_timer_fn() needs rcu_read_lock() around bdi_writeback iteration nbd: Add locking for tasks xen-blkfront: check for null drvdata in blkback_changed (XenbusStateClosing)
This commit is contained in:
commit
ea1ee5ff1b
|
@ -576,7 +576,7 @@ void blk_cleanup_queue(struct request_queue *q)
|
||||||
q->queue_lock = &q->__queue_lock;
|
q->queue_lock = &q->__queue_lock;
|
||||||
spin_unlock_irq(lock);
|
spin_unlock_irq(lock);
|
||||||
|
|
||||||
bdi_destroy(&q->backing_dev_info);
|
bdi_unregister(&q->backing_dev_info);
|
||||||
|
|
||||||
/* @q is and will stay empty, shutdown and put */
|
/* @q is and will stay empty, shutdown and put */
|
||||||
blk_put_queue(q);
|
blk_put_queue(q);
|
||||||
|
|
|
@ -641,6 +641,7 @@ void blk_mq_free_tags(struct blk_mq_tags *tags)
|
||||||
{
|
{
|
||||||
bt_free(&tags->bitmap_tags);
|
bt_free(&tags->bitmap_tags);
|
||||||
bt_free(&tags->breserved_tags);
|
bt_free(&tags->breserved_tags);
|
||||||
|
free_cpumask_var(tags->cpumask);
|
||||||
kfree(tags);
|
kfree(tags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2296,10 +2296,8 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < set->nr_hw_queues; i++) {
|
for (i = 0; i < set->nr_hw_queues; i++) {
|
||||||
if (set->tags[i]) {
|
if (set->tags[i])
|
||||||
blk_mq_free_rq_map(set, set->tags[i], i);
|
blk_mq_free_rq_map(set, set->tags[i], i);
|
||||||
free_cpumask_var(set->tags[i]->cpumask);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
kfree(set->tags);
|
kfree(set->tags);
|
||||||
|
|
|
@ -540,6 +540,7 @@ static void blk_release_queue(struct kobject *kobj)
|
||||||
struct request_queue *q =
|
struct request_queue *q =
|
||||||
container_of(kobj, struct request_queue, kobj);
|
container_of(kobj, struct request_queue, kobj);
|
||||||
|
|
||||||
|
bdi_exit(&q->backing_dev_info);
|
||||||
blkcg_exit_queue(q);
|
blkcg_exit_queue(q);
|
||||||
|
|
||||||
if (q->elevator) {
|
if (q->elevator) {
|
||||||
|
|
|
@ -60,6 +60,7 @@ struct nbd_device {
|
||||||
bool disconnect; /* a disconnect has been requested by user */
|
bool disconnect; /* a disconnect has been requested by user */
|
||||||
|
|
||||||
struct timer_list timeout_timer;
|
struct timer_list timeout_timer;
|
||||||
|
spinlock_t tasks_lock;
|
||||||
struct task_struct *task_recv;
|
struct task_struct *task_recv;
|
||||||
struct task_struct *task_send;
|
struct task_struct *task_send;
|
||||||
|
|
||||||
|
@ -140,21 +141,23 @@ static void sock_shutdown(struct nbd_device *nbd)
|
||||||
static void nbd_xmit_timeout(unsigned long arg)
|
static void nbd_xmit_timeout(unsigned long arg)
|
||||||
{
|
{
|
||||||
struct nbd_device *nbd = (struct nbd_device *)arg;
|
struct nbd_device *nbd = (struct nbd_device *)arg;
|
||||||
struct task_struct *task;
|
unsigned long flags;
|
||||||
|
|
||||||
if (list_empty(&nbd->queue_head))
|
if (list_empty(&nbd->queue_head))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
nbd->disconnect = true;
|
nbd->disconnect = true;
|
||||||
|
|
||||||
task = READ_ONCE(nbd->task_recv);
|
spin_lock_irqsave(&nbd->tasks_lock, flags);
|
||||||
if (task)
|
|
||||||
force_sig(SIGKILL, task);
|
|
||||||
|
|
||||||
task = READ_ONCE(nbd->task_send);
|
if (nbd->task_recv)
|
||||||
if (task)
|
force_sig(SIGKILL, nbd->task_recv);
|
||||||
|
|
||||||
|
if (nbd->task_send)
|
||||||
force_sig(SIGKILL, nbd->task_send);
|
force_sig(SIGKILL, nbd->task_send);
|
||||||
|
|
||||||
|
spin_unlock_irqrestore(&nbd->tasks_lock, flags);
|
||||||
|
|
||||||
dev_err(nbd_to_dev(nbd), "Connection timed out, killed receiver and sender, shutting down connection\n");
|
dev_err(nbd_to_dev(nbd), "Connection timed out, killed receiver and sender, shutting down connection\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -403,17 +406,24 @@ static int nbd_thread_recv(struct nbd_device *nbd)
|
||||||
{
|
{
|
||||||
struct request *req;
|
struct request *req;
|
||||||
int ret;
|
int ret;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
BUG_ON(nbd->magic != NBD_MAGIC);
|
BUG_ON(nbd->magic != NBD_MAGIC);
|
||||||
|
|
||||||
sk_set_memalloc(nbd->sock->sk);
|
sk_set_memalloc(nbd->sock->sk);
|
||||||
|
|
||||||
|
spin_lock_irqsave(&nbd->tasks_lock, flags);
|
||||||
nbd->task_recv = current;
|
nbd->task_recv = current;
|
||||||
|
spin_unlock_irqrestore(&nbd->tasks_lock, flags);
|
||||||
|
|
||||||
ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
|
ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
|
dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
|
||||||
|
|
||||||
|
spin_lock_irqsave(&nbd->tasks_lock, flags);
|
||||||
nbd->task_recv = NULL;
|
nbd->task_recv = NULL;
|
||||||
|
spin_unlock_irqrestore(&nbd->tasks_lock, flags);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -429,7 +439,9 @@ static int nbd_thread_recv(struct nbd_device *nbd)
|
||||||
|
|
||||||
device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
|
device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
|
||||||
|
|
||||||
|
spin_lock_irqsave(&nbd->tasks_lock, flags);
|
||||||
nbd->task_recv = NULL;
|
nbd->task_recv = NULL;
|
||||||
|
spin_unlock_irqrestore(&nbd->tasks_lock, flags);
|
||||||
|
|
||||||
if (signal_pending(current)) {
|
if (signal_pending(current)) {
|
||||||
siginfo_t info;
|
siginfo_t info;
|
||||||
|
@ -534,8 +546,11 @@ static int nbd_thread_send(void *data)
|
||||||
{
|
{
|
||||||
struct nbd_device *nbd = data;
|
struct nbd_device *nbd = data;
|
||||||
struct request *req;
|
struct request *req;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&nbd->tasks_lock, flags);
|
||||||
nbd->task_send = current;
|
nbd->task_send = current;
|
||||||
|
spin_unlock_irqrestore(&nbd->tasks_lock, flags);
|
||||||
|
|
||||||
set_user_nice(current, MIN_NICE);
|
set_user_nice(current, MIN_NICE);
|
||||||
while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
|
while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
|
||||||
|
@ -572,7 +587,15 @@ static int nbd_thread_send(void *data)
|
||||||
nbd_handle_req(nbd, req);
|
nbd_handle_req(nbd, req);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
spin_lock_irqsave(&nbd->tasks_lock, flags);
|
||||||
nbd->task_send = NULL;
|
nbd->task_send = NULL;
|
||||||
|
spin_unlock_irqrestore(&nbd->tasks_lock, flags);
|
||||||
|
|
||||||
|
/* Clear maybe pending signals */
|
||||||
|
if (signal_pending(current)) {
|
||||||
|
siginfo_t info;
|
||||||
|
dequeue_signal_lock(current, ¤t->blocked, &info);
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -1052,6 +1075,7 @@ static int __init nbd_init(void)
|
||||||
nbd_dev[i].magic = NBD_MAGIC;
|
nbd_dev[i].magic = NBD_MAGIC;
|
||||||
INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
|
INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
|
||||||
spin_lock_init(&nbd_dev[i].queue_lock);
|
spin_lock_init(&nbd_dev[i].queue_lock);
|
||||||
|
spin_lock_init(&nbd_dev[i].tasks_lock);
|
||||||
INIT_LIST_HEAD(&nbd_dev[i].queue_head);
|
INIT_LIST_HEAD(&nbd_dev[i].queue_head);
|
||||||
mutex_init(&nbd_dev[i].tx_lock);
|
mutex_init(&nbd_dev[i].tx_lock);
|
||||||
init_timer(&nbd_dev[i].timeout_timer);
|
init_timer(&nbd_dev[i].timeout_timer);
|
||||||
|
|
|
@ -603,27 +603,31 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
|
||||||
struct nvme_iod *iod = ctx;
|
struct nvme_iod *iod = ctx;
|
||||||
struct request *req = iod_get_private(iod);
|
struct request *req = iod_get_private(iod);
|
||||||
struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req);
|
struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req);
|
||||||
|
|
||||||
u16 status = le16_to_cpup(&cqe->status) >> 1;
|
u16 status = le16_to_cpup(&cqe->status) >> 1;
|
||||||
|
bool requeue = false;
|
||||||
|
int error = 0;
|
||||||
|
|
||||||
if (unlikely(status)) {
|
if (unlikely(status)) {
|
||||||
if (!(status & NVME_SC_DNR || blk_noretry_request(req))
|
if (!(status & NVME_SC_DNR || blk_noretry_request(req))
|
||||||
&& (jiffies - req->start_time) < req->timeout) {
|
&& (jiffies - req->start_time) < req->timeout) {
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
|
requeue = true;
|
||||||
blk_mq_requeue_request(req);
|
blk_mq_requeue_request(req);
|
||||||
spin_lock_irqsave(req->q->queue_lock, flags);
|
spin_lock_irqsave(req->q->queue_lock, flags);
|
||||||
if (!blk_queue_stopped(req->q))
|
if (!blk_queue_stopped(req->q))
|
||||||
blk_mq_kick_requeue_list(req->q);
|
blk_mq_kick_requeue_list(req->q);
|
||||||
spin_unlock_irqrestore(req->q->queue_lock, flags);
|
spin_unlock_irqrestore(req->q->queue_lock, flags);
|
||||||
return;
|
goto release_iod;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
|
if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
|
||||||
if (cmd_rq->ctx == CMD_CTX_CANCELLED)
|
if (cmd_rq->ctx == CMD_CTX_CANCELLED)
|
||||||
status = -EINTR;
|
error = -EINTR;
|
||||||
|
else
|
||||||
|
error = status;
|
||||||
} else {
|
} else {
|
||||||
status = nvme_error_status(status);
|
error = nvme_error_status(status);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -635,8 +639,9 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
|
||||||
if (cmd_rq->aborted)
|
if (cmd_rq->aborted)
|
||||||
dev_warn(nvmeq->dev->dev,
|
dev_warn(nvmeq->dev->dev,
|
||||||
"completing aborted command with status:%04x\n",
|
"completing aborted command with status:%04x\n",
|
||||||
status);
|
error);
|
||||||
|
|
||||||
|
release_iod:
|
||||||
if (iod->nents) {
|
if (iod->nents) {
|
||||||
dma_unmap_sg(nvmeq->dev->dev, iod->sg, iod->nents,
|
dma_unmap_sg(nvmeq->dev->dev, iod->sg, iod->nents,
|
||||||
rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
|
rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
|
||||||
|
@ -649,7 +654,8 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
|
||||||
}
|
}
|
||||||
nvme_free_iod(nvmeq->dev, iod);
|
nvme_free_iod(nvmeq->dev, iod);
|
||||||
|
|
||||||
blk_mq_complete_request(req, status);
|
if (likely(!requeue))
|
||||||
|
blk_mq_complete_request(req, error);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* length is in bytes. gfp flags indicates whether we may sleep. */
|
/* length is in bytes. gfp flags indicates whether we may sleep. */
|
||||||
|
@ -1804,7 +1810,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
|
||||||
|
|
||||||
length = (io.nblocks + 1) << ns->lba_shift;
|
length = (io.nblocks + 1) << ns->lba_shift;
|
||||||
meta_len = (io.nblocks + 1) * ns->ms;
|
meta_len = (io.nblocks + 1) * ns->ms;
|
||||||
metadata = (void __user *)(unsigned long)io.metadata;
|
metadata = (void __user *)(uintptr_t)io.metadata;
|
||||||
write = io.opcode & 1;
|
write = io.opcode & 1;
|
||||||
|
|
||||||
if (ns->ext) {
|
if (ns->ext) {
|
||||||
|
@ -1844,7 +1850,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
|
||||||
c.rw.metadata = cpu_to_le64(meta_dma);
|
c.rw.metadata = cpu_to_le64(meta_dma);
|
||||||
|
|
||||||
status = __nvme_submit_sync_cmd(ns->queue, &c, NULL,
|
status = __nvme_submit_sync_cmd(ns->queue, &c, NULL,
|
||||||
(void __user *)io.addr, length, NULL, 0);
|
(void __user *)(uintptr_t)io.addr, length, NULL, 0);
|
||||||
unmap:
|
unmap:
|
||||||
if (meta) {
|
if (meta) {
|
||||||
if (status == NVME_SC_SUCCESS && !write) {
|
if (status == NVME_SC_SUCCESS && !write) {
|
||||||
|
@ -1886,7 +1892,7 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
|
||||||
timeout = msecs_to_jiffies(cmd.timeout_ms);
|
timeout = msecs_to_jiffies(cmd.timeout_ms);
|
||||||
|
|
||||||
status = __nvme_submit_sync_cmd(ns ? ns->queue : dev->admin_q, &c,
|
status = __nvme_submit_sync_cmd(ns ? ns->queue : dev->admin_q, &c,
|
||||||
NULL, (void __user *)cmd.addr, cmd.data_len,
|
NULL, (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
|
||||||
&cmd.result, timeout);
|
&cmd.result, timeout);
|
||||||
if (status >= 0) {
|
if (status >= 0) {
|
||||||
if (put_user(cmd.result, &ucmd->result))
|
if (put_user(cmd.result, &ucmd->result))
|
||||||
|
|
|
@ -1956,7 +1956,8 @@ static void blkback_changed(struct xenbus_device *dev,
|
||||||
break;
|
break;
|
||||||
/* Missed the backend's Closing state -- fallthrough */
|
/* Missed the backend's Closing state -- fallthrough */
|
||||||
case XenbusStateClosing:
|
case XenbusStateClosing:
|
||||||
blkfront_closing(info);
|
if (info)
|
||||||
|
blkfront_closing(info);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -778,19 +778,24 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
|
||||||
struct wb_writeback_work *base_work,
|
struct wb_writeback_work *base_work,
|
||||||
bool skip_if_busy)
|
bool skip_if_busy)
|
||||||
{
|
{
|
||||||
int next_memcg_id = 0;
|
struct bdi_writeback *last_wb = NULL;
|
||||||
struct bdi_writeback *wb;
|
struct bdi_writeback *wb = list_entry_rcu(&bdi->wb_list,
|
||||||
struct wb_iter iter;
|
struct bdi_writeback, bdi_node);
|
||||||
|
|
||||||
might_sleep();
|
might_sleep();
|
||||||
restart:
|
restart:
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
bdi_for_each_wb(wb, bdi, &iter, next_memcg_id) {
|
list_for_each_entry_continue_rcu(wb, &bdi->wb_list, bdi_node) {
|
||||||
DEFINE_WB_COMPLETION_ONSTACK(fallback_work_done);
|
DEFINE_WB_COMPLETION_ONSTACK(fallback_work_done);
|
||||||
struct wb_writeback_work fallback_work;
|
struct wb_writeback_work fallback_work;
|
||||||
struct wb_writeback_work *work;
|
struct wb_writeback_work *work;
|
||||||
long nr_pages;
|
long nr_pages;
|
||||||
|
|
||||||
|
if (last_wb) {
|
||||||
|
wb_put(last_wb);
|
||||||
|
last_wb = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
/* SYNC_ALL writes out I_DIRTY_TIME too */
|
/* SYNC_ALL writes out I_DIRTY_TIME too */
|
||||||
if (!wb_has_dirty_io(wb) &&
|
if (!wb_has_dirty_io(wb) &&
|
||||||
(base_work->sync_mode == WB_SYNC_NONE ||
|
(base_work->sync_mode == WB_SYNC_NONE ||
|
||||||
|
@ -819,12 +824,22 @@ restart:
|
||||||
|
|
||||||
wb_queue_work(wb, work);
|
wb_queue_work(wb, work);
|
||||||
|
|
||||||
next_memcg_id = wb->memcg_css->id + 1;
|
/*
|
||||||
|
* Pin @wb so that it stays on @bdi->wb_list. This allows
|
||||||
|
* continuing iteration from @wb after dropping and
|
||||||
|
* regrabbing rcu read lock.
|
||||||
|
*/
|
||||||
|
wb_get(wb);
|
||||||
|
last_wb = wb;
|
||||||
|
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
wb_wait_for_completion(bdi, &fallback_work_done);
|
wb_wait_for_completion(bdi, &fallback_work_done);
|
||||||
goto restart;
|
goto restart;
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
if (last_wb)
|
||||||
|
wb_put(last_wb);
|
||||||
}
|
}
|
||||||
|
|
||||||
#else /* CONFIG_CGROUP_WRITEBACK */
|
#else /* CONFIG_CGROUP_WRITEBACK */
|
||||||
|
@ -1857,12 +1872,11 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
|
list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
|
||||||
struct bdi_writeback *wb;
|
struct bdi_writeback *wb;
|
||||||
struct wb_iter iter;
|
|
||||||
|
|
||||||
if (!bdi_has_dirty_io(bdi))
|
if (!bdi_has_dirty_io(bdi))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
bdi_for_each_wb(wb, bdi, &iter, 0)
|
list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node)
|
||||||
wb_start_writeback(wb, wb_split_bdi_pages(wb, nr_pages),
|
wb_start_writeback(wb, wb_split_bdi_pages(wb, nr_pages),
|
||||||
false, reason);
|
false, reason);
|
||||||
}
|
}
|
||||||
|
@ -1894,11 +1908,10 @@ static void wakeup_dirtytime_writeback(struct work_struct *w)
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
|
list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
|
||||||
struct bdi_writeback *wb;
|
struct bdi_writeback *wb;
|
||||||
struct wb_iter iter;
|
|
||||||
|
|
||||||
bdi_for_each_wb(wb, bdi, &iter, 0)
|
list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node)
|
||||||
if (!list_empty(&bdi->wb.b_dirty_time))
|
if (!list_empty(&wb->b_dirty_time))
|
||||||
wb_wakeup(&bdi->wb);
|
wb_wakeup(wb);
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
|
schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
|
||||||
|
|
|
@ -116,6 +116,8 @@ struct bdi_writeback {
|
||||||
struct list_head work_list;
|
struct list_head work_list;
|
||||||
struct delayed_work dwork; /* work item used for writeback */
|
struct delayed_work dwork; /* work item used for writeback */
|
||||||
|
|
||||||
|
struct list_head bdi_node; /* anchored at bdi->wb_list */
|
||||||
|
|
||||||
#ifdef CONFIG_CGROUP_WRITEBACK
|
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||||
struct percpu_ref refcnt; /* used only for !root wb's */
|
struct percpu_ref refcnt; /* used only for !root wb's */
|
||||||
struct fprop_local_percpu memcg_completions;
|
struct fprop_local_percpu memcg_completions;
|
||||||
|
@ -150,6 +152,7 @@ struct backing_dev_info {
|
||||||
atomic_long_t tot_write_bandwidth;
|
atomic_long_t tot_write_bandwidth;
|
||||||
|
|
||||||
struct bdi_writeback wb; /* the root writeback info for this bdi */
|
struct bdi_writeback wb; /* the root writeback info for this bdi */
|
||||||
|
struct list_head wb_list; /* list of all wbs */
|
||||||
#ifdef CONFIG_CGROUP_WRITEBACK
|
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||||
struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */
|
struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */
|
||||||
struct rb_root cgwb_congested_tree; /* their congested states */
|
struct rb_root cgwb_congested_tree; /* their congested states */
|
||||||
|
|
|
@ -19,13 +19,17 @@
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
|
|
||||||
int __must_check bdi_init(struct backing_dev_info *bdi);
|
int __must_check bdi_init(struct backing_dev_info *bdi);
|
||||||
void bdi_destroy(struct backing_dev_info *bdi);
|
void bdi_exit(struct backing_dev_info *bdi);
|
||||||
|
|
||||||
__printf(3, 4)
|
__printf(3, 4)
|
||||||
int bdi_register(struct backing_dev_info *bdi, struct device *parent,
|
int bdi_register(struct backing_dev_info *bdi, struct device *parent,
|
||||||
const char *fmt, ...);
|
const char *fmt, ...);
|
||||||
int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
|
int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
|
||||||
|
void bdi_unregister(struct backing_dev_info *bdi);
|
||||||
|
|
||||||
int __must_check bdi_setup_and_register(struct backing_dev_info *, char *);
|
int __must_check bdi_setup_and_register(struct backing_dev_info *, char *);
|
||||||
|
void bdi_destroy(struct backing_dev_info *bdi);
|
||||||
|
|
||||||
void wb_start_writeback(struct bdi_writeback *wb, long nr_pages,
|
void wb_start_writeback(struct bdi_writeback *wb, long nr_pages,
|
||||||
bool range_cyclic, enum wb_reason reason);
|
bool range_cyclic, enum wb_reason reason);
|
||||||
void wb_start_background_writeback(struct bdi_writeback *wb);
|
void wb_start_background_writeback(struct bdi_writeback *wb);
|
||||||
|
@ -408,61 +412,6 @@ static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked)
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
struct wb_iter {
|
|
||||||
int start_memcg_id;
|
|
||||||
struct radix_tree_iter tree_iter;
|
|
||||||
void **slot;
|
|
||||||
};
|
|
||||||
|
|
||||||
static inline struct bdi_writeback *__wb_iter_next(struct wb_iter *iter,
|
|
||||||
struct backing_dev_info *bdi)
|
|
||||||
{
|
|
||||||
struct radix_tree_iter *titer = &iter->tree_iter;
|
|
||||||
|
|
||||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
|
||||||
|
|
||||||
if (iter->start_memcg_id >= 0) {
|
|
||||||
iter->slot = radix_tree_iter_init(titer, iter->start_memcg_id);
|
|
||||||
iter->start_memcg_id = -1;
|
|
||||||
} else {
|
|
||||||
iter->slot = radix_tree_next_slot(iter->slot, titer, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!iter->slot)
|
|
||||||
iter->slot = radix_tree_next_chunk(&bdi->cgwb_tree, titer, 0);
|
|
||||||
if (iter->slot)
|
|
||||||
return *iter->slot;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline struct bdi_writeback *__wb_iter_init(struct wb_iter *iter,
|
|
||||||
struct backing_dev_info *bdi,
|
|
||||||
int start_memcg_id)
|
|
||||||
{
|
|
||||||
iter->start_memcg_id = start_memcg_id;
|
|
||||||
|
|
||||||
if (start_memcg_id)
|
|
||||||
return __wb_iter_next(iter, bdi);
|
|
||||||
else
|
|
||||||
return &bdi->wb;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* bdi_for_each_wb - walk all wb's of a bdi in ascending memcg ID order
|
|
||||||
* @wb_cur: cursor struct bdi_writeback pointer
|
|
||||||
* @bdi: bdi to walk wb's of
|
|
||||||
* @iter: pointer to struct wb_iter to be used as iteration buffer
|
|
||||||
* @start_memcg_id: memcg ID to start iteration from
|
|
||||||
*
|
|
||||||
* Iterate @wb_cur through the wb's (bdi_writeback's) of @bdi in ascending
|
|
||||||
* memcg ID order starting from @start_memcg_id. @iter is struct wb_iter
|
|
||||||
* to be used as temp storage during iteration. rcu_read_lock() must be
|
|
||||||
* held throughout iteration.
|
|
||||||
*/
|
|
||||||
#define bdi_for_each_wb(wb_cur, bdi, iter, start_memcg_id) \
|
|
||||||
for ((wb_cur) = __wb_iter_init(iter, bdi, start_memcg_id); \
|
|
||||||
(wb_cur); (wb_cur) = __wb_iter_next(iter, bdi))
|
|
||||||
|
|
||||||
#else /* CONFIG_CGROUP_WRITEBACK */
|
#else /* CONFIG_CGROUP_WRITEBACK */
|
||||||
|
|
||||||
static inline bool inode_cgwb_enabled(struct inode *inode)
|
static inline bool inode_cgwb_enabled(struct inode *inode)
|
||||||
|
@ -522,14 +471,6 @@ static inline void wb_blkcg_offline(struct blkcg *blkcg)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
struct wb_iter {
|
|
||||||
int next_id;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define bdi_for_each_wb(wb_cur, bdi, iter, start_blkcg_id) \
|
|
||||||
for ((iter)->next_id = (start_blkcg_id); \
|
|
||||||
({ (wb_cur) = !(iter)->next_id++ ? &(bdi)->wb : NULL; }); )
|
|
||||||
|
|
||||||
static inline int inode_congested(struct inode *inode, int cong_bits)
|
static inline int inode_congested(struct inode *inode, int cong_bits)
|
||||||
{
|
{
|
||||||
return wb_congested(&inode_to_bdi(inode)->wb, cong_bits);
|
return wb_congested(&inode_to_bdi(inode)->wb, cong_bits);
|
||||||
|
|
|
@ -676,8 +676,9 @@ enum {
|
||||||
|
|
||||||
struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg);
|
struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg);
|
||||||
struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb);
|
struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb);
|
||||||
void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pavail,
|
void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
|
||||||
unsigned long *pdirty, unsigned long *pwriteback);
|
unsigned long *pheadroom, unsigned long *pdirty,
|
||||||
|
unsigned long *pwriteback);
|
||||||
|
|
||||||
#else /* CONFIG_CGROUP_WRITEBACK */
|
#else /* CONFIG_CGROUP_WRITEBACK */
|
||||||
|
|
||||||
|
@ -687,7 +688,8 @@ static inline struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb,
|
static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb,
|
||||||
unsigned long *pavail,
|
unsigned long *pfilepages,
|
||||||
|
unsigned long *pheadroom,
|
||||||
unsigned long *pdirty,
|
unsigned long *pdirty,
|
||||||
unsigned long *pwriteback)
|
unsigned long *pwriteback)
|
||||||
{
|
{
|
||||||
|
|
|
@ -480,6 +480,10 @@ static void cgwb_release_workfn(struct work_struct *work)
|
||||||
release_work);
|
release_work);
|
||||||
struct backing_dev_info *bdi = wb->bdi;
|
struct backing_dev_info *bdi = wb->bdi;
|
||||||
|
|
||||||
|
spin_lock_irq(&cgwb_lock);
|
||||||
|
list_del_rcu(&wb->bdi_node);
|
||||||
|
spin_unlock_irq(&cgwb_lock);
|
||||||
|
|
||||||
wb_shutdown(wb);
|
wb_shutdown(wb);
|
||||||
|
|
||||||
css_put(wb->memcg_css);
|
css_put(wb->memcg_css);
|
||||||
|
@ -575,6 +579,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
|
||||||
ret = radix_tree_insert(&bdi->cgwb_tree, memcg_css->id, wb);
|
ret = radix_tree_insert(&bdi->cgwb_tree, memcg_css->id, wb);
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
atomic_inc(&bdi->usage_cnt);
|
atomic_inc(&bdi->usage_cnt);
|
||||||
|
list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list);
|
||||||
list_add(&wb->memcg_node, memcg_cgwb_list);
|
list_add(&wb->memcg_node, memcg_cgwb_list);
|
||||||
list_add(&wb->blkcg_node, blkcg_cgwb_list);
|
list_add(&wb->blkcg_node, blkcg_cgwb_list);
|
||||||
css_get(memcg_css);
|
css_get(memcg_css);
|
||||||
|
@ -676,7 +681,7 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
|
||||||
static void cgwb_bdi_destroy(struct backing_dev_info *bdi)
|
static void cgwb_bdi_destroy(struct backing_dev_info *bdi)
|
||||||
{
|
{
|
||||||
struct radix_tree_iter iter;
|
struct radix_tree_iter iter;
|
||||||
struct bdi_writeback_congested *congested, *congested_n;
|
struct rb_node *rbn;
|
||||||
void **slot;
|
void **slot;
|
||||||
|
|
||||||
WARN_ON(test_bit(WB_registered, &bdi->wb.state));
|
WARN_ON(test_bit(WB_registered, &bdi->wb.state));
|
||||||
|
@ -686,9 +691,11 @@ static void cgwb_bdi_destroy(struct backing_dev_info *bdi)
|
||||||
radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0)
|
radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0)
|
||||||
cgwb_kill(*slot);
|
cgwb_kill(*slot);
|
||||||
|
|
||||||
rbtree_postorder_for_each_entry_safe(congested, congested_n,
|
while ((rbn = rb_first(&bdi->cgwb_congested_tree))) {
|
||||||
&bdi->cgwb_congested_tree, rb_node) {
|
struct bdi_writeback_congested *congested =
|
||||||
rb_erase(&congested->rb_node, &bdi->cgwb_congested_tree);
|
rb_entry(rbn, struct bdi_writeback_congested, rb_node);
|
||||||
|
|
||||||
|
rb_erase(rbn, &bdi->cgwb_congested_tree);
|
||||||
congested->bdi = NULL; /* mark @congested unlinked */
|
congested->bdi = NULL; /* mark @congested unlinked */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -764,15 +771,22 @@ static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { }
|
||||||
|
|
||||||
int bdi_init(struct backing_dev_info *bdi)
|
int bdi_init(struct backing_dev_info *bdi)
|
||||||
{
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
bdi->dev = NULL;
|
bdi->dev = NULL;
|
||||||
|
|
||||||
bdi->min_ratio = 0;
|
bdi->min_ratio = 0;
|
||||||
bdi->max_ratio = 100;
|
bdi->max_ratio = 100;
|
||||||
bdi->max_prop_frac = FPROP_FRAC_BASE;
|
bdi->max_prop_frac = FPROP_FRAC_BASE;
|
||||||
INIT_LIST_HEAD(&bdi->bdi_list);
|
INIT_LIST_HEAD(&bdi->bdi_list);
|
||||||
|
INIT_LIST_HEAD(&bdi->wb_list);
|
||||||
init_waitqueue_head(&bdi->wb_waitq);
|
init_waitqueue_head(&bdi->wb_waitq);
|
||||||
|
|
||||||
return cgwb_bdi_init(bdi);
|
ret = cgwb_bdi_init(bdi);
|
||||||
|
|
||||||
|
list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list);
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(bdi_init);
|
EXPORT_SYMBOL(bdi_init);
|
||||||
|
|
||||||
|
@ -823,7 +837,7 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi)
|
||||||
synchronize_rcu_expedited();
|
synchronize_rcu_expedited();
|
||||||
}
|
}
|
||||||
|
|
||||||
void bdi_destroy(struct backing_dev_info *bdi)
|
void bdi_unregister(struct backing_dev_info *bdi)
|
||||||
{
|
{
|
||||||
/* make sure nobody finds us on the bdi_list anymore */
|
/* make sure nobody finds us on the bdi_list anymore */
|
||||||
bdi_remove_from_list(bdi);
|
bdi_remove_from_list(bdi);
|
||||||
|
@ -835,9 +849,19 @@ void bdi_destroy(struct backing_dev_info *bdi)
|
||||||
device_unregister(bdi->dev);
|
device_unregister(bdi->dev);
|
||||||
bdi->dev = NULL;
|
bdi->dev = NULL;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void bdi_exit(struct backing_dev_info *bdi)
|
||||||
|
{
|
||||||
|
WARN_ON_ONCE(bdi->dev);
|
||||||
wb_exit(&bdi->wb);
|
wb_exit(&bdi->wb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void bdi_destroy(struct backing_dev_info *bdi)
|
||||||
|
{
|
||||||
|
bdi_unregister(bdi);
|
||||||
|
bdi_exit(bdi);
|
||||||
|
}
|
||||||
EXPORT_SYMBOL(bdi_destroy);
|
EXPORT_SYMBOL(bdi_destroy);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -3741,44 +3741,43 @@ struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
|
||||||
/**
|
/**
|
||||||
* mem_cgroup_wb_stats - retrieve writeback related stats from its memcg
|
* mem_cgroup_wb_stats - retrieve writeback related stats from its memcg
|
||||||
* @wb: bdi_writeback in question
|
* @wb: bdi_writeback in question
|
||||||
* @pavail: out parameter for number of available pages
|
* @pfilepages: out parameter for number of file pages
|
||||||
|
* @pheadroom: out parameter for number of allocatable pages according to memcg
|
||||||
* @pdirty: out parameter for number of dirty pages
|
* @pdirty: out parameter for number of dirty pages
|
||||||
* @pwriteback: out parameter for number of pages under writeback
|
* @pwriteback: out parameter for number of pages under writeback
|
||||||
*
|
*
|
||||||
* Determine the numbers of available, dirty, and writeback pages in @wb's
|
* Determine the numbers of file, headroom, dirty, and writeback pages in
|
||||||
* memcg. Dirty and writeback are self-explanatory. Available is a bit
|
* @wb's memcg. File, dirty and writeback are self-explanatory. Headroom
|
||||||
* more involved.
|
* is a bit more involved.
|
||||||
*
|
*
|
||||||
* A memcg's headroom is "min(max, high) - used". The available memory is
|
* A memcg's headroom is "min(max, high) - used". In the hierarchy, the
|
||||||
* calculated as the lowest headroom of itself and the ancestors plus the
|
* headroom is calculated as the lowest headroom of itself and the
|
||||||
* number of pages already being used for file pages. Note that this
|
* ancestors. Note that this doesn't consider the actual amount of
|
||||||
* doesn't consider the actual amount of available memory in the system.
|
* available memory in the system. The caller should further cap
|
||||||
* The caller should further cap *@pavail accordingly.
|
* *@pheadroom accordingly.
|
||||||
*/
|
*/
|
||||||
void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pavail,
|
void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
|
||||||
unsigned long *pdirty, unsigned long *pwriteback)
|
unsigned long *pheadroom, unsigned long *pdirty,
|
||||||
|
unsigned long *pwriteback)
|
||||||
{
|
{
|
||||||
struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
|
struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
|
||||||
struct mem_cgroup *parent;
|
struct mem_cgroup *parent;
|
||||||
unsigned long head_room = PAGE_COUNTER_MAX;
|
|
||||||
unsigned long file_pages;
|
|
||||||
|
|
||||||
*pdirty = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_DIRTY);
|
*pdirty = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_DIRTY);
|
||||||
|
|
||||||
/* this should eventually include NR_UNSTABLE_NFS */
|
/* this should eventually include NR_UNSTABLE_NFS */
|
||||||
*pwriteback = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
|
*pwriteback = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
|
||||||
|
*pfilepages = mem_cgroup_nr_lru_pages(memcg, (1 << LRU_INACTIVE_FILE) |
|
||||||
|
(1 << LRU_ACTIVE_FILE));
|
||||||
|
*pheadroom = PAGE_COUNTER_MAX;
|
||||||
|
|
||||||
file_pages = mem_cgroup_nr_lru_pages(memcg, (1 << LRU_INACTIVE_FILE) |
|
|
||||||
(1 << LRU_ACTIVE_FILE));
|
|
||||||
while ((parent = parent_mem_cgroup(memcg))) {
|
while ((parent = parent_mem_cgroup(memcg))) {
|
||||||
unsigned long ceiling = min(memcg->memory.limit, memcg->high);
|
unsigned long ceiling = min(memcg->memory.limit, memcg->high);
|
||||||
unsigned long used = page_counter_read(&memcg->memory);
|
unsigned long used = page_counter_read(&memcg->memory);
|
||||||
|
|
||||||
head_room = min(head_room, ceiling - min(ceiling, used));
|
*pheadroom = min(*pheadroom, ceiling - min(ceiling, used));
|
||||||
memcg = parent;
|
memcg = parent;
|
||||||
}
|
}
|
||||||
|
|
||||||
*pavail = file_pages + head_room;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#else /* CONFIG_CGROUP_WRITEBACK */
|
#else /* CONFIG_CGROUP_WRITEBACK */
|
||||||
|
|
|
@ -145,9 +145,6 @@ struct dirty_throttle_control {
|
||||||
unsigned long pos_ratio;
|
unsigned long pos_ratio;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define DTC_INIT_COMMON(__wb) .wb = (__wb), \
|
|
||||||
.wb_completions = &(__wb)->completions
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Length of period for aging writeout fractions of bdis. This is an
|
* Length of period for aging writeout fractions of bdis. This is an
|
||||||
* arbitrarily chosen number. The longer the period, the slower fractions will
|
* arbitrarily chosen number. The longer the period, the slower fractions will
|
||||||
|
@ -157,12 +154,16 @@ struct dirty_throttle_control {
|
||||||
|
|
||||||
#ifdef CONFIG_CGROUP_WRITEBACK
|
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||||
|
|
||||||
#define GDTC_INIT(__wb) .dom = &global_wb_domain, \
|
#define GDTC_INIT(__wb) .wb = (__wb), \
|
||||||
DTC_INIT_COMMON(__wb)
|
.dom = &global_wb_domain, \
|
||||||
|
.wb_completions = &(__wb)->completions
|
||||||
|
|
||||||
#define GDTC_INIT_NO_WB .dom = &global_wb_domain
|
#define GDTC_INIT_NO_WB .dom = &global_wb_domain
|
||||||
#define MDTC_INIT(__wb, __gdtc) .dom = mem_cgroup_wb_domain(__wb), \
|
|
||||||
.gdtc = __gdtc, \
|
#define MDTC_INIT(__wb, __gdtc) .wb = (__wb), \
|
||||||
DTC_INIT_COMMON(__wb)
|
.dom = mem_cgroup_wb_domain(__wb), \
|
||||||
|
.wb_completions = &(__wb)->memcg_completions, \
|
||||||
|
.gdtc = __gdtc
|
||||||
|
|
||||||
static bool mdtc_valid(struct dirty_throttle_control *dtc)
|
static bool mdtc_valid(struct dirty_throttle_control *dtc)
|
||||||
{
|
{
|
||||||
|
@ -213,7 +214,8 @@ static void wb_min_max_ratio(struct bdi_writeback *wb,
|
||||||
|
|
||||||
#else /* CONFIG_CGROUP_WRITEBACK */
|
#else /* CONFIG_CGROUP_WRITEBACK */
|
||||||
|
|
||||||
#define GDTC_INIT(__wb) DTC_INIT_COMMON(__wb)
|
#define GDTC_INIT(__wb) .wb = (__wb), \
|
||||||
|
.wb_completions = &(__wb)->completions
|
||||||
#define GDTC_INIT_NO_WB
|
#define GDTC_INIT_NO_WB
|
||||||
#define MDTC_INIT(__wb, __gdtc)
|
#define MDTC_INIT(__wb, __gdtc)
|
||||||
|
|
||||||
|
@ -682,13 +684,19 @@ static unsigned long hard_dirty_limit(struct wb_domain *dom,
|
||||||
return max(thresh, dom->dirty_limit);
|
return max(thresh, dom->dirty_limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* memory available to a memcg domain is capped by system-wide clean memory */
|
/*
|
||||||
static void mdtc_cap_avail(struct dirty_throttle_control *mdtc)
|
* Memory which can be further allocated to a memcg domain is capped by
|
||||||
|
* system-wide clean memory excluding the amount being used in the domain.
|
||||||
|
*/
|
||||||
|
static void mdtc_calc_avail(struct dirty_throttle_control *mdtc,
|
||||||
|
unsigned long filepages, unsigned long headroom)
|
||||||
{
|
{
|
||||||
struct dirty_throttle_control *gdtc = mdtc_gdtc(mdtc);
|
struct dirty_throttle_control *gdtc = mdtc_gdtc(mdtc);
|
||||||
unsigned long clean = gdtc->avail - min(gdtc->avail, gdtc->dirty);
|
unsigned long clean = filepages - min(filepages, mdtc->dirty);
|
||||||
|
unsigned long global_clean = gdtc->avail - min(gdtc->avail, gdtc->dirty);
|
||||||
|
unsigned long other_clean = global_clean - min(global_clean, clean);
|
||||||
|
|
||||||
mdtc->avail = min(mdtc->avail, clean);
|
mdtc->avail = filepages + min(headroom, other_clean);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1562,16 +1570,16 @@ static void balance_dirty_pages(struct address_space *mapping,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mdtc) {
|
if (mdtc) {
|
||||||
unsigned long writeback;
|
unsigned long filepages, headroom, writeback;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If @wb belongs to !root memcg, repeat the same
|
* If @wb belongs to !root memcg, repeat the same
|
||||||
* basic calculations for the memcg domain.
|
* basic calculations for the memcg domain.
|
||||||
*/
|
*/
|
||||||
mem_cgroup_wb_stats(wb, &mdtc->avail, &mdtc->dirty,
|
mem_cgroup_wb_stats(wb, &filepages, &headroom,
|
||||||
&writeback);
|
&mdtc->dirty, &writeback);
|
||||||
mdtc_cap_avail(mdtc);
|
|
||||||
mdtc->dirty += writeback;
|
mdtc->dirty += writeback;
|
||||||
|
mdtc_calc_avail(mdtc, filepages, headroom);
|
||||||
|
|
||||||
domain_dirty_limits(mdtc);
|
domain_dirty_limits(mdtc);
|
||||||
|
|
||||||
|
@ -1893,10 +1901,11 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (mdtc) {
|
if (mdtc) {
|
||||||
unsigned long writeback;
|
unsigned long filepages, headroom, writeback;
|
||||||
|
|
||||||
mem_cgroup_wb_stats(wb, &mdtc->avail, &mdtc->dirty, &writeback);
|
mem_cgroup_wb_stats(wb, &filepages, &headroom, &mdtc->dirty,
|
||||||
mdtc_cap_avail(mdtc);
|
&writeback);
|
||||||
|
mdtc_calc_avail(mdtc, filepages, headroom);
|
||||||
domain_dirty_limits(mdtc); /* ditto, ignore writeback */
|
domain_dirty_limits(mdtc); /* ditto, ignore writeback */
|
||||||
|
|
||||||
if (mdtc->dirty > mdtc->bg_thresh)
|
if (mdtc->dirty > mdtc->bg_thresh)
|
||||||
|
@ -1956,7 +1965,6 @@ void laptop_mode_timer_fn(unsigned long data)
|
||||||
int nr_pages = global_page_state(NR_FILE_DIRTY) +
|
int nr_pages = global_page_state(NR_FILE_DIRTY) +
|
||||||
global_page_state(NR_UNSTABLE_NFS);
|
global_page_state(NR_UNSTABLE_NFS);
|
||||||
struct bdi_writeback *wb;
|
struct bdi_writeback *wb;
|
||||||
struct wb_iter iter;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We want to write everything out, not just down to the dirty
|
* We want to write everything out, not just down to the dirty
|
||||||
|
@ -1965,10 +1973,12 @@ void laptop_mode_timer_fn(unsigned long data)
|
||||||
if (!bdi_has_dirty_io(&q->backing_dev_info))
|
if (!bdi_has_dirty_io(&q->backing_dev_info))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
bdi_for_each_wb(wb, &q->backing_dev_info, &iter, 0)
|
rcu_read_lock();
|
||||||
|
list_for_each_entry_rcu(wb, &q->backing_dev_info.wb_list, bdi_node)
|
||||||
if (wb_has_dirty_io(wb))
|
if (wb_has_dirty_io(wb))
|
||||||
wb_start_writeback(wb, nr_pages, true,
|
wb_start_writeback(wb, nr_pages, true,
|
||||||
WB_REASON_LAPTOP_TIMER);
|
WB_REASON_LAPTOP_TIMER);
|
||||||
|
rcu_read_unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
Loading…
Reference in New Issue
Block a user