diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 71df353e367c..32e94635484f 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -1385,6 +1385,15 @@ swapcache reclaim. Decreasing vfs_cache_pressure causes the kernel to prefer to retain dentry and inode caches. Increasing vfs_cache_pressure beyond 100 causes the kernel to prefer to reclaim dentries and inodes. +dirty_background_bytes +---------------------- + +Contains the amount of dirty memory at which the pdflush background writeback +daemon will start writeback. + +If dirty_background_bytes is written, dirty_background_ratio becomes a function +of its value (dirty_background_bytes / the amount of dirtyable system memory). + dirty_background_ratio ---------------------- @@ -1393,14 +1402,29 @@ pages + file cache, not including locked pages and HugePages), the number of pages at which the pdflush background writeback daemon will start writing out dirty data. +If dirty_background_ratio is written, dirty_background_bytes becomes a function +of its value (dirty_background_ratio * the amount of dirtyable system memory). + +dirty_bytes +----------- + +Contains the amount of dirty memory at which a process generating disk writes +will itself start writeback. + +If dirty_bytes is written, dirty_ratio becomes a function of its value +(dirty_bytes / the amount of dirtyable system memory). + dirty_ratio ------------------ +----------- Contains, as a percentage of the dirtyable system memory (free pages + mapped pages + file cache, not including locked pages and HugePages), the number of pages at which a process which is generating disk writes will itself start writing out dirty data. +If dirty_ratio is written, dirty_bytes becomes a function of its value +(dirty_ratio * the amount of dirtyable system memory). + dirty_writeback_centisecs ------------------------- diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index d79eeda7a699..cd05994a49e6 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -41,7 +41,8 @@ Currently, these files are in /proc/sys/vm: ============================================================== -dirty_ratio, dirty_background_ratio, dirty_expire_centisecs, +dirty_bytes, dirty_ratio, dirty_background_bytes, +dirty_background_ratio, dirty_expire_centisecs, dirty_writeback_centisecs, highmem_is_dirtyable, vfs_cache_pressure, laptop_mode, block_dump, swap_token_timeout, drop-caches, hugepages_treat_as_movable: diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 259e9ea58cab..bb28c975c1d7 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -107,7 +107,9 @@ void throttle_vm_writeout(gfp_t gfp_mask); /* These are exported to sysctl. */ extern int dirty_background_ratio; +extern unsigned long dirty_background_bytes; extern int vm_dirty_ratio; +extern unsigned long vm_dirty_bytes; extern int dirty_writeback_interval; extern int dirty_expire_interval; extern int vm_highmem_is_dirtyable; @@ -116,9 +118,18 @@ extern int laptop_mode; extern unsigned long determine_dirtyable_memory(void); +extern int dirty_background_ratio_handler(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos); +extern int dirty_background_bytes_handler(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos); extern int dirty_ratio_handler(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); +extern int dirty_bytes_handler(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos); struct ctl_table; struct file; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ff6d45c7626f..92f6e5bc3c24 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -87,10 +87,6 @@ extern int rcutorture_runnable; #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ /* Constants used for minimum and maximum */ -#if defined(CONFIG_HIGHMEM) || defined(CONFIG_DETECT_SOFTLOCKUP) -static int one = 1; -#endif - #ifdef CONFIG_DETECT_SOFTLOCKUP static int sixty = 60; static int neg_one = -1; @@ -101,6 +97,7 @@ static int two = 2; #endif static int zero; +static int one = 1; static int one_hundred = 100; /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ @@ -952,11 +949,21 @@ static struct ctl_table vm_table[] = { .data = &dirty_background_ratio, .maxlen = sizeof(dirty_background_ratio), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, + .proc_handler = &dirty_background_ratio_handler, .strategy = &sysctl_intvec, .extra1 = &zero, .extra2 = &one_hundred, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "dirty_background_bytes", + .data = &dirty_background_bytes, + .maxlen = sizeof(dirty_background_bytes), + .mode = 0644, + .proc_handler = &dirty_background_bytes_handler, + .strategy = &sysctl_intvec, + .extra1 = &one, + }, { .ctl_name = VM_DIRTY_RATIO, .procname = "dirty_ratio", @@ -968,6 +975,16 @@ static struct ctl_table vm_table[] = { .extra1 = &zero, .extra2 = &one_hundred, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "dirty_bytes", + .data = &vm_dirty_bytes, + .maxlen = sizeof(vm_dirty_bytes), + .mode = 0644, + .proc_handler = &dirty_bytes_handler, + .strategy = &sysctl_intvec, + .extra1 = &one, + }, { .procname = "dirty_writeback_centisecs", .data = &dirty_writeback_interval, diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 4d4074cff300..b493db7841dc 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -68,6 +68,12 @@ static inline long sync_writeback_pages(void) */ int dirty_background_ratio = 5; +/* + * dirty_background_bytes starts at 0 (disabled) so that it is a function of + * dirty_background_ratio * the amount of dirtyable memory + */ +unsigned long dirty_background_bytes; + /* * free highmem will not be subtracted from the total free memory * for calculating free ratios if vm_highmem_is_dirtyable is true @@ -79,6 +85,12 @@ int vm_highmem_is_dirtyable; */ int vm_dirty_ratio = 10; +/* + * vm_dirty_bytes starts at 0 (disabled) so that it is a function of + * vm_dirty_ratio * the amount of dirtyable memory + */ +unsigned long vm_dirty_bytes; + /* * The interval between `kupdate'-style writebacks, in jiffies */ @@ -135,23 +147,75 @@ static int calc_period_shift(void) { unsigned long dirty_total; - dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) / 100; + if (vm_dirty_bytes) + dirty_total = vm_dirty_bytes / PAGE_SIZE; + else + dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) / + 100; return 2 + ilog2(dirty_total - 1); } /* - * update the period when the dirty ratio changes. + * update the period when the dirty threshold changes. */ +static void update_completion_period(void) +{ + int shift = calc_period_shift(); + prop_change_shift(&vm_completions, shift); + prop_change_shift(&vm_dirties, shift); +} + +int dirty_background_ratio_handler(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret; + + ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + if (ret == 0 && write) + dirty_background_bytes = 0; + return ret; +} + +int dirty_background_bytes_handler(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret; + + ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); + if (ret == 0 && write) + dirty_background_ratio = 0; + return ret; +} + int dirty_ratio_handler(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { int old_ratio = vm_dirty_ratio; - int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + int ret; + + ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); if (ret == 0 && write && vm_dirty_ratio != old_ratio) { - int shift = calc_period_shift(); - prop_change_shift(&vm_completions, shift); - prop_change_shift(&vm_dirties, shift); + update_completion_period(); + vm_dirty_bytes = 0; + } + return ret; +} + + +int dirty_bytes_handler(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int old_bytes = vm_dirty_bytes; + int ret; + + ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); + if (ret == 0 && write && vm_dirty_bytes != old_bytes) { + update_completion_period(); + vm_dirty_ratio = 0; } return ret; } @@ -365,23 +429,29 @@ void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, unsigned long *pbdi_dirty, struct backing_dev_info *bdi) { - int background_ratio; /* Percentages */ - int dirty_ratio; unsigned long background; unsigned long dirty; unsigned long available_memory = determine_dirtyable_memory(); struct task_struct *tsk; - dirty_ratio = vm_dirty_ratio; - if (dirty_ratio < 5) - dirty_ratio = 5; + if (vm_dirty_bytes) + dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE); + else { + int dirty_ratio; - background_ratio = dirty_background_ratio; - if (background_ratio >= dirty_ratio) - background_ratio = dirty_ratio / 2; + dirty_ratio = vm_dirty_ratio; + if (dirty_ratio < 5) + dirty_ratio = 5; + dirty = (dirty_ratio * available_memory) / 100; + } - background = (background_ratio * available_memory) / 100; - dirty = (dirty_ratio * available_memory) / 100; + if (dirty_background_bytes) + background = DIV_ROUND_UP(dirty_background_bytes, PAGE_SIZE); + else + background = (dirty_background_ratio * available_memory) / 100; + + if (background >= dirty) + background = dirty / 2; tsk = current; if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) { background += background / 4;